330 lines
No EOL
11 KiB
Matlab
330 lines
No EOL
11 KiB
Matlab
function mutationPlot(tietue)
|
|
|
|
global COUNTS; global PARTITION; global SUMCOUNTS;
|
|
clearGlobalVars;
|
|
|
|
h0 = findobj('Tag','load_menu');
|
|
c = get(h0,'UserData');
|
|
h0 = findobj('Tag','filename1_text');
|
|
filename1 = get(h0,'String');
|
|
disp('---------------------------------------------------');
|
|
disp('Viewing the mutation plot.');
|
|
disp(['Load the mixture result from: ',[filename1],'...']);
|
|
|
|
if (~isstruct(tietue))
|
|
% [filename, pathname] = uigetfile('*.mat', 'Load mixture result file');
|
|
% if (filename==0 & pathname==0) return;
|
|
% else
|
|
% %display('---------------------------------------------------');
|
|
% %display(['Reading mixture result from: ',[pathname filename],'...']);
|
|
% end
|
|
% pause(0.0001);
|
|
% h0 = findobj('Tag','filename1_text');
|
|
% set(h0,'String',filename); clear h0;
|
|
%
|
|
% struct_array = load([pathname filename]);
|
|
% if isfield(struct_array,'c') %Matlab versio
|
|
% c = struct_array.c;
|
|
% if ~isfield(c,'PARTITION') | ~isfield(c,'rowsFromInd')
|
|
% disp('Incorrect file format');
|
|
% return
|
|
% end
|
|
% else
|
|
% disp('Incorrect file format');
|
|
% return;
|
|
% end
|
|
PARTITION = c.PARTITION; COUNTS = c.COUNTS; SUMCOUNTS = c.SUMCOUNTS;
|
|
alleleCodes = c.alleleCodes; adjprior = c.adjprior; popnames = c.popnames;
|
|
rowsFromInd = c.rowsFromInd; data = c.data; npops = c.npops; noalle = c.noalle;
|
|
else
|
|
PARTITION = tietue.PARTITION;
|
|
COUNTS = tietue.COUNTS;
|
|
SUMCOUNTS = tietue.SUMCOUNTS;
|
|
alleleCodes = tietue.alleleCodes;
|
|
adjprior = tietue.adjprior;
|
|
popnames = tietue.popnames;
|
|
rowsFromInd = tietue.rowsFromInd;
|
|
data = double(tietue.data);
|
|
npops = tietue.npops;
|
|
noalle = tietue.noalle;
|
|
end
|
|
|
|
nloci = size(COUNTS,2);
|
|
ninds = size(data,1)/rowsFromInd;
|
|
|
|
if isfield(c,'CQ_COUNTS')
|
|
% Linked data
|
|
if isfield(c,'gene_lengths')
|
|
gene_lengths = c.gene_lengths;
|
|
else
|
|
[filename, pathname] = uigetfile('*.txt', 'Load file with gene lengths.');
|
|
if (filename==0 & pathname==0)
|
|
return
|
|
end
|
|
gene_lengths = load([pathname filename]);
|
|
end
|
|
|
|
component_mat = zeros(length(gene_lengths), max(gene_lengths));
|
|
cum_length = cumsum(gene_lengths);
|
|
component_mat(1,1:gene_lengths(1))=1:gene_lengths(1);
|
|
for i = 2:length(gene_lengths)
|
|
component_mat(i,1:gene_lengths(i)) = cum_length(i-1)+1:cum_length(i);
|
|
end
|
|
else
|
|
component_mat = 1:nloci;
|
|
gene_lengths = nloci;
|
|
end
|
|
|
|
if isfield(c,'CQ_COUNTS')
|
|
answers = inputdlg({'Index of the individual of interest',...
|
|
'BF limit (log)',...
|
|
'Genes to analyze'},...
|
|
'INPUT',[1; 1; 1],...
|
|
{' ','2.3',['1:' num2str(length(gene_lengths))]});
|
|
ind = str2num(answers{1});
|
|
BF = str2num(answers{2});
|
|
genes = str2num(answers{3});
|
|
%ind = input('Input individual: ');
|
|
%BF = input('Input BF: ');
|
|
%genes = input('Input genes to analyze: ');
|
|
all_right = check_inputs(ind,BF,genes, ninds, gene_lengths);
|
|
if all_right==0
|
|
return
|
|
end
|
|
n_genes = length(genes);
|
|
nameText = ['ind: ' num2str(ind) ', genes: ' num2str(genes)];
|
|
else
|
|
answers = inputdlg({'Index of the individual of interest',...
|
|
'BF limit (log)'},'INPUT',[1; 1],{' ','2.3'});
|
|
ind = str2num(answers{1});
|
|
BF = str2num(answers{2});
|
|
%ind = input('Input individual: ');
|
|
%BF = input('Input BF: ');
|
|
all_right = check_inputs(ind,BF,1, ninds, gene_lengths);
|
|
if all_right==0
|
|
return
|
|
end
|
|
genes = 1;
|
|
n_genes = 1;
|
|
nameText = ['ind: ' num2str(ind)];
|
|
end
|
|
|
|
origin = PARTITION(ind);
|
|
varit = giveColors(npops);
|
|
|
|
figure('NumberTitle','off','Name',nameText);
|
|
|
|
mutationList = repmat(...
|
|
struct('gene',[],'site',[],...
|
|
'row',[],'pops',[],'bf_vals',[]), [10 1]);
|
|
n_mutations = 0;
|
|
|
|
for i = 1:n_genes
|
|
% Loop over different genes
|
|
gene = genes(i);
|
|
|
|
for j = 1:rowsFromInd
|
|
% Loop over different "haplotypes"
|
|
left = 0.05;
|
|
bottom = 1 - (1/n_genes)*i + (1/n_genes)/(rowsFromInd*2+1)*(2*j-1);
|
|
width = 0.9;
|
|
height = 1/n_genes/(rowsFromInd*2+1);
|
|
axes('Position',[left bottom width height]);
|
|
set(gca, 'Xlim', [-.5 , gene_lengths(gene)+.5], 'YLim', [0,1], ...
|
|
'XTick', [], 'XTickLabel', [], 'YTick', [], 'YTickLabel', []);
|
|
|
|
for k=1:gene_lengths(gene)
|
|
% Loop over different sites
|
|
site = component_mat(gene,k);
|
|
allele = data((ind-1)*rowsFromInd+j, site);
|
|
%disp(num2str((ind-1)*rowsFromInd+j));
|
|
counts = squeeze(COUNTS(:,site,:));
|
|
|
|
logml = zeros(npops,1);
|
|
if allele>0
|
|
for m=1:npops
|
|
% Calculate the predictive probabilities of different
|
|
% origins
|
|
counts(allele,origin) = counts(allele,origin)-1;
|
|
counts(allele,m) = counts(allele,m)+1;
|
|
logml(m) = calculateLogml(counts, noalle(site));
|
|
counts(allele,m) = counts(allele,m)-1;
|
|
counts(allele,origin) = counts(allele,origin)+1;
|
|
end
|
|
end
|
|
|
|
logml = logml-max(logml);
|
|
probs = exp(logml) ./ sum(exp(logml));
|
|
|
|
cumprobs = cumsum(probs);
|
|
|
|
if log(max(probs)/probs(origin)) >= BF
|
|
h0=patch([k-1 k k k-1], [0 0 cumprobs(1) cumprobs(1)], varit(1,:));
|
|
set(h0,'EdgeColor','none');
|
|
for m=2:npops
|
|
h0=patch([k-1 k k k-1], [cumprobs(m-1) cumprobs(m-1) cumprobs(m) cumprobs(m)], varit(m,:));
|
|
set(h0,'EdgeColor','none');
|
|
end
|
|
n_mutations = n_mutations+1;
|
|
if n_mutations>length(mutationList)
|
|
mutationList = [mutationList; repmat(...
|
|
struct('gene',[],'site',[],...
|
|
'row',[],'pops',[],'bf_vals',[]), [length(mutationList) 1])];
|
|
end
|
|
mutationList(n_mutations).gene = gene;
|
|
mutationList(n_mutations).site = k; % The location in gene i!
|
|
mutationList(n_mutations).row = j;
|
|
aux = log(probs ./ probs(origin));
|
|
mutationList(n_mutations).pops = find(aux>=BF);
|
|
mutationList(n_mutations).bf_vals = aux(find(aux>=BF));
|
|
end
|
|
end
|
|
end
|
|
end
|
|
mlst_data = isfield(c,'CQ_COUNTS');
|
|
writeResults(mutationList, n_mutations, ind, origin, mlst_data);
|
|
|
|
|
|
%--------------------------------------------------------------------
|
|
|
|
|
|
function all_right = check_inputs(ind,BF,genes, ninds, gene_lengths)
|
|
|
|
all_right = 0;
|
|
if length(ind)~=1
|
|
disp('ERROR: index of one individual must be given.');
|
|
return;
|
|
end
|
|
if ind<=0 | ind >ninds
|
|
disp('ERROR: Index of the given individual is out of range.');
|
|
return
|
|
end
|
|
if length(BF)~=1
|
|
disp('ERROR: one BF value must be given.');
|
|
return
|
|
end
|
|
if BF<0
|
|
disp('ERROR: BF must be positive.');
|
|
return
|
|
end
|
|
if length(genes)<1 | length(genes)>length(gene_lengths)
|
|
disp('ERROR: input for the genes was incorrect.');
|
|
return
|
|
end
|
|
if any(genes<1) | any(genes>length(gene_lengths))
|
|
disp('ERROR: input for the genes was incorrect.');
|
|
return
|
|
end
|
|
all_right = 1;
|
|
|
|
%------------------------------------------------------------------
|
|
|
|
function writeResults(mutationList, n_mutations, ind, home, mlst_data)
|
|
|
|
h0 = findobj('Tag','filename2_text');
|
|
outf = get(h0,'String'); clear h0;
|
|
|
|
if length(outf)>0
|
|
fid = fopen(outf,'a');
|
|
else
|
|
fid = -1;
|
|
end
|
|
|
|
disp(' ');
|
|
disp('--------------------------');
|
|
disp(['Individual: ' num2str(ind) '.']);
|
|
disp(['Origin of the individual ' num2str(home) '.']);
|
|
|
|
if fid ~= -1
|
|
fprintf(fid, '\n');
|
|
fprintf(fid,'%s \n', ['--------------------------------------------']); fprintf(fid, '\n');
|
|
fprintf(fid,'%s \n', ['Individual: ' num2str(ind) '.']); fprintf(fid, '\n');
|
|
fprintf(fid,'%s \n', ['Origin of the individual ' num2str(home) '.']); fprintf(fid, '\n');
|
|
end
|
|
|
|
|
|
if n_mutations==0
|
|
disp('No putative locations for mutations detected');
|
|
if fid ~= -1
|
|
fprintf(fid,'%s \n', 'No putative locations for mutations detected'); fprintf(fid, '\n');
|
|
end
|
|
return
|
|
end
|
|
|
|
disp(' ');
|
|
if fid ~= -1
|
|
fprintf(fid,'%s \n', [' ']); fprintf(fid, '\n');
|
|
end
|
|
|
|
if mlst_data==1
|
|
disp('Putative locations for mutations:');
|
|
disp('gene, site, possible origins');
|
|
if fid ~= -1
|
|
fprintf(fid,'%s \n', 'Putative locations for mutations:'); fprintf(fid, '\n');
|
|
fprintf(fid,'%s \n', 'gene, site, possible origins'); fprintf(fid, '\n');
|
|
end
|
|
for i=1:n_mutations
|
|
gene = mutationList(i).gene;
|
|
site = mutationList(i).site;
|
|
text_line = [num2str(gene) blanks(6-floor(log10(gene))) num2str(site) blanks(7-floor(log10(site)))];
|
|
pops = mutationList(i).pops;
|
|
bf_vals = mutationList(i).bf_vals;
|
|
for j=1:length(pops)
|
|
text_line = [text_line num2str(pops(j)) '(' num2str(bf_vals(j)) ') '];
|
|
end
|
|
disp(text_line);
|
|
if fid ~= -1
|
|
fprintf(fid,'%s \n', [text_line]); fprintf(fid, '\n');
|
|
end
|
|
end
|
|
else
|
|
disp('Putative locations for mutations:');
|
|
disp('locus, haplotype, possible origins');
|
|
if fid ~= -1
|
|
fprintf(fid,'%s \n', 'Putative locations for mutations:'); fprintf(fid, '\n');
|
|
fprintf(fid,'%s \n', 'locus, haplotype, possible origins'); fprintf(fid, '\n');
|
|
end
|
|
for i=1:n_mutations
|
|
locus = mutationList(i).site;
|
|
row = mutationList(i).row;
|
|
text_line = [num2str(locus) blanks(7-floor(log10(locus))) num2str(row) blanks(10-floor(log10(row)))];
|
|
pops = mutationList(i).pops;
|
|
bf_vals = mutationList(i).bf_vals;
|
|
for j=1:length(pops)
|
|
text_line = [text_line num2str(pops(j)) '(' num2str(bf_vals(j)) ') '];
|
|
end
|
|
disp(text_line);
|
|
if fid ~= -1
|
|
fprintf(fid,'%s \n', [text_line]); fprintf(fid, '\n');
|
|
end
|
|
end
|
|
end
|
|
|
|
if fid ~= -1
|
|
fclose(fid);
|
|
end
|
|
|
|
%------------------------------------------------------------------
|
|
|
|
function val = calculateLogml(counts, noalle)
|
|
% counts corresponds to counts of a SINGLE locus. It is two-dimensional
|
|
% with as many columns as there are populations.
|
|
|
|
npops = size(counts,2);
|
|
|
|
prior = ones(noalle,npops)./noalle;
|
|
counts = counts(1:noalle,:);
|
|
|
|
val = sum(gammaln(sum(prior,1)),2) ...
|
|
- sum(gammaln(sum(counts+prior,1)),2) ...
|
|
+ sum(sum(gammaln(counts+prior))) ...
|
|
- sum(sum(gammaln(prior)));
|
|
|
|
|
|
%---------------------------------------------------------------
|
|
|
|
function clearGlobalVars
|
|
|
|
global COUNTS; COUNTS = [];
|
|
global SUMCOUNTS; SUMCOUNTS = [];
|
|
global PARTITION; PARTITION = []; |