476 lines
18 KiB
Matlab
476 lines
18 KiB
Matlab
function view_loglikelihood
|
||
%waitALittle;
|
||
% [filename1, pathname1] = uigetfile('*.mat', 'Load mixture results.');
|
||
% if (sum(filename1)==0) || (sum(pathname1)==0)
|
||
% return;
|
||
% end
|
||
% struct_array = load([pathname1 filename1]);
|
||
|
||
h0 = findobj('Tag','load_menu');
|
||
c = get(h0,'UserData');
|
||
h0 = findobj('Tag','filename1_text');
|
||
filename1 = get(h0,'String');
|
||
disp('---------------------------------------------------');
|
||
disp('Viewing the change of log likelihood.');
|
||
disp(['Load the mixture result from: ',[filename1],'...']);
|
||
|
||
if isfield(c,'c') %Matlab versio
|
||
c = c.c;
|
||
if ~isfield(c,'mixtureType')
|
||
disp('*** ERROR: Incorrect file format');
|
||
return
|
||
end
|
||
%else
|
||
% disp('*** ERROR: Incorrect file format');
|
||
% return;
|
||
end
|
||
if ~isfield(c,'changesInLogml')
|
||
disp('*** WARNING: Old mixture format detected.');
|
||
if 0 % Disabled 12/06/07, Jukka Siren
|
||
switch c.mixtureType
|
||
case 'linkage_mix'
|
||
%waitALittle;
|
||
[filename, pathname] = uigetfile('*.mat', 'Load the corresponding preprocessed data.');
|
||
if (sum(filename)==0) || (sum(pathname)==0)
|
||
return;
|
||
end
|
||
struct_array = load([pathname filename]);
|
||
disp('The corresponding preprocessed data is needed.');
|
||
disp(['Load the preprocessed data from: ',[pathname filename], '...']);
|
||
if isfield(struct_array,'c')
|
||
c_raw = struct_array.c;
|
||
if ~all(size(c_raw.adjprior)==size(c.adjprior)) ||...
|
||
~all(size(c_raw.data)==size(c.data))
|
||
disp('Mixture result and preprocessed data do not match.');
|
||
return;
|
||
end
|
||
|
||
if ~isfield(c_raw,'linkage_model')
|
||
model_entry = input('Specify the linkage model(1-linear 2-codon):');
|
||
switch model_entry,
|
||
case 1
|
||
linkage_model = 'linear';
|
||
case 2
|
||
linkage_model = 'codon';
|
||
end
|
||
else
|
||
linkage_model = c_raw.linkage_model;
|
||
end
|
||
else
|
||
disp('*** ERROR: Incorrect file format');
|
||
return;
|
||
end
|
||
disp('Start calculating log likelihood. Please wait...');
|
||
npops = c.npops;
|
||
partition = c.PARTITION;
|
||
cq_counts = c.CQ_COUNTS;
|
||
cq_sumcounts = c.CQ_SUMCOUNTS;
|
||
sp_counts = c.SP_COUNTS;
|
||
sp_sumcounts = c.SP_SUMCOUNTS;
|
||
|
||
% Shrink the raw data
|
||
noalle_cq = size(c.adjprior_cq,1);
|
||
informative_cq = logical(sum(c.adjprior_cq)~=noalle_cq);
|
||
noalle_sp = size(c.adjprior_sp,1);
|
||
informative_sp = logical(sum(c.adjprior_sp)~=noalle_sp);
|
||
cq_counts = uint16(cq_counts(:,informative_cq,:));
|
||
cq_sumcounts = uint16(cq_sumcounts(:,informative_cq));
|
||
sp_counts = uint16(sp_counts(:,informative_sp,:));
|
||
sp_sumcounts = uint16(sp_sumcounts(:,informative_sp));
|
||
|
||
data = c_raw.data;
|
||
ninds = size(data,1);
|
||
component_mat = c_raw.component_mat;
|
||
clear c_raw;
|
||
index = data(:,end);
|
||
[data_clique, data_separator, noalle_clique, noalle_separator] = ...
|
||
transform4(data, component_mat, linkage_model);
|
||
|
||
if length(noalle_clique)~=length(find(informative_cq))
|
||
disp('*** ERROR: The linkage model is not consistent with the data.');
|
||
return
|
||
end
|
||
|
||
data_clique = [data_clique index];
|
||
data_separator = [data_separator index];
|
||
|
||
[counts_cq, nalleles_cq, prior_cq, adjprior_cq]...
|
||
= allfreqsnew2(data_clique, double(noalle_clique));
|
||
clear data_clique;
|
||
[counts_sp, nalleles_sp, prior_sp, adjprior_sp]...
|
||
= allfreqsnew2(data_separator, double(noalle_separator));
|
||
clear data_separator;
|
||
counts_cq = uint8(counts_cq);
|
||
counts_sp = uint8(counts_sp);
|
||
|
||
pop_logml = computePopulationLogml(double(cq_counts), double(cq_sumcounts),...
|
||
double(sp_counts), double(sp_sumcounts),...
|
||
[1:npops], adjprior_cq, adjprior_sp);
|
||
|
||
changesInLogml = zeros(npops,ninds);
|
||
for ind = 1:ninds
|
||
indCqCounts = uint16(counts_cq(:,:,ind));
|
||
indSpCounts = uint16(counts_sp(:,:,ind));
|
||
changesInLogml(:,ind) = computeChanges(cq_counts, cq_sumcounts,...
|
||
sp_counts, sp_sumcounts,...
|
||
partition, pop_logml,...
|
||
ind, adjprior_cq, ...
|
||
adjprior_sp, indCqCounts, indSpCounts);
|
||
end
|
||
case 'mix' % Independence model.
|
||
%waitALittle;
|
||
[filename, pathname] = uigetfile('*.mat', 'Load the corresponding preprocessed data.');
|
||
if (sum(filename)==0) || (sum(pathname)==0)
|
||
return;
|
||
end
|
||
struct_array = load([pathname filename]);
|
||
disp('The corresponding preprocessed data is needed.');
|
||
disp(['Load the preprocessed data from: ',[pathname filename], '...']);
|
||
if isfield(struct_array,'c')
|
||
c_raw = struct_array.c;
|
||
if ~all(size(c_raw.adjprior)==size(c.adjprior)) ||...
|
||
~all(size(c_raw.data(:,[1:end-1]))==size(c.data))
|
||
disp('Mixture result and preprocessed data do not match.');
|
||
return;
|
||
end
|
||
else
|
||
disp('*** ERROR: Incorrect file format');
|
||
return;
|
||
end
|
||
|
||
|
||
npops = c.npops;
|
||
pops = 1:npops;
|
||
rowsFromInd = c.rowsFromInd;
|
||
data = c.data;
|
||
ninds = size(data,1)/rowsFromInd;
|
||
adjprior = c.adjprior;
|
||
priorTerm = c_raw.priorTerm;
|
||
COUNTS = c.COUNTS;
|
||
SUMCOUNTS = c.SUMCOUNTS;
|
||
PARTITION = c.PARTITION;
|
||
POP_LOGML = computePopulationLogml2(pops, adjprior, priorTerm, ...
|
||
COUNTS, SUMCOUNTS);
|
||
changesInLogml = zeros(npops,ninds);
|
||
for ind = 1:ninds
|
||
[muutokset, diffInCounts] = laskeMuutokset(ind, rowsFromInd, data, ...
|
||
adjprior, priorTerm, COUNTS, SUMCOUNTS, PARTITION, POP_LOGML);
|
||
changesInLogml(:,ind) = muutokset;
|
||
end
|
||
otherwise
|
||
disp('This model is under construction.');
|
||
return
|
||
end
|
||
|
||
|
||
c.changesInLogml = changesInLogml;
|
||
|
||
fprintf(1,'Saving the result...')
|
||
%waitALittle;
|
||
save_preproc = questdlg('Do you wish to save the updated mixture result?',...
|
||
'Save mixture results?',...
|
||
'Yes','No','Yes');
|
||
if isequal(save_preproc,'Yes');
|
||
%waitALittle;
|
||
[filename, pathname] = uiputfile('*.mat','Save mixture result as');
|
||
if isempty(filename) && isempty(pathname)
|
||
return;
|
||
else
|
||
kokonimi = [pathname filename];
|
||
% save(kokonimi,'c');
|
||
save(kokonimi,'c','-v7.3'); % added by Lu Cheng, 08.06.2012
|
||
filename1 = filename;
|
||
end
|
||
|
||
end;
|
||
fprintf(1,'Finished.\n');
|
||
end
|
||
return
|
||
end
|
||
|
||
if isequal(c.mixtureType,'spatialPop') || isequal(c.mixtureType,'popMix')
|
||
% Group level clustering, using the correct partition
|
||
view_density(c.changesInLogml, c.groupPartition, filename1);
|
||
else
|
||
view_density(c.changesInLogml, c.PARTITION, filename1);
|
||
end
|
||
|
||
% -------------------------------------------------------------------------
|
||
function view_all_density(changesInLogml, partition, filename)
|
||
npops = size(changesInLogml,1);
|
||
groupnames = cell(1,npops);
|
||
for i=1:npops
|
||
groupnames{i} = sprintf('Cluster %d',i);
|
||
end
|
||
nrows = npops*(npops-1);
|
||
f = zeros(nrows,100);
|
||
xi = zeros(nrows,100);
|
||
map = giveColors(npops);
|
||
h = zeros(npops,1);
|
||
|
||
k = 1;
|
||
for i = 1:npops
|
||
inds = logical(partition==i);
|
||
% ninds = sum(inds); % individuals in the source cluster
|
||
target_pop = find(logical((1:npops)~=i));
|
||
for j = target_pop
|
||
[f(k,:),xi(k,:)] = ksdensity_myown(changesInLogml(j,inds)');
|
||
k = k+1;
|
||
end
|
||
h(i) = figure('NumberTitle', 'off', 'menubar','none','toolbar','figure'); % density plot;
|
||
set(h(i),'Tag','density_plot');
|
||
set(h(i),'Name',sprintf('%d Density of log likelihood changes - %s',i, filename));
|
||
set(gca,'ColorOrder',map(target_pop,:));
|
||
hold on;
|
||
plot(xi([(i-1)*(npops-1)+1:i*(npops-1)],:)',f([(i-1)*(npops-1)+1:i*(npops-1)],:)');
|
||
|
||
legend(groupnames(target_pop));
|
||
xlabel('Change of log likelihood');
|
||
ylabel('Estimated density');
|
||
title(sprintf('Cluster %d',i));
|
||
end
|
||
|
||
x_min = min(min(xi));
|
||
x_max = max(max(xi));
|
||
for i = 1:npops
|
||
set(get(h(i),'CurrentAxes'),'xlim',[x_min, x_max]);
|
||
end
|
||
|
||
% -------------------------------------------------------------------------
|
||
function view_density(changesInLogml, partition, filename)
|
||
npops = size(changesInLogml,1);
|
||
groupnames = cell(1,npops+1);
|
||
for i=1:npops
|
||
groupnames{i} = sprintf('Population %d',i);
|
||
end
|
||
groupnames{npops+1} = sprintf('All clusters');
|
||
%waitALittle;
|
||
[s1,v1] = listdlg('PromptString','Select one source cluster:',...
|
||
'SelectionMode','single',...
|
||
'Name','Select source cluster',...
|
||
'ListString',groupnames);
|
||
if isempty(s1) || ~v1
|
||
disp('*** WARNING: Viewing loglikelihood cancelled.');
|
||
return
|
||
elseif s1==npops+1
|
||
view_all_density(changesInLogml, partition, filename);
|
||
else
|
||
remain_pop = logical((1:npops)~=s1);
|
||
%waitALittle;
|
||
[s2,v2] = listdlg('PromptString', 'Select target clusters:',...
|
||
'SelectionMode','multiple',...
|
||
'Name','Select target cluster',...
|
||
'ListString',groupnames(remain_pop));
|
||
if isempty(s2) || ~v2
|
||
disp('*** WARNING: Viewing loglikelihood cancelled.');
|
||
return
|
||
end
|
||
inds = logical(partition==s1);
|
||
ninds = sum(inds); % individuals in the source cluster
|
||
remain = find(remain_pop);
|
||
fprintf('Source cluster: %d\n', s1);
|
||
fprintf('Number of strains: %d\n',ninds);
|
||
fprintf('Target cluster(s): %s\n', num2str(remain(s2)));
|
||
|
||
f = zeros(length(s2),100);
|
||
xi = zeros(length(s2),100);
|
||
for i = 1:length(s2)
|
||
[f(i,:),xi(i,:)] = ksdensity_myown(-changesInLogml(remain(s2(i)),inds)');
|
||
end
|
||
|
||
map = giveColors(npops);
|
||
h0 = figure('NumberTitle', 'off'); % density plot;
|
||
set(h0,'Tag','density_plot');
|
||
set(h0,'Name',['Density of log likelihood changes - ' filename]);
|
||
set(gca,'ColorOrder',map(remain(s2),:));
|
||
hold on;
|
||
plot(xi',f');
|
||
set(gca,'xlim',[min(min(xi)),max(max(xi))]);
|
||
legend(groupnames(remain(s2)));
|
||
xlabel('Change of log likelihood');
|
||
ylabel('Estimated density');
|
||
title(sprintf('Population %d',s1));
|
||
|
||
% h1 = figure('NumberTitle', 'off'); % histogram plot;
|
||
% set(h1,'Tag','histogram_plot');
|
||
% set(h1,'Name',['Histogram of log likelihood changes - ' filename]);
|
||
% hist(changesInLogml(remain(s2),inds)');
|
||
% colormap(map(remain(s2),:));
|
||
% % histfit(changesInLogml(remain(s2),inds)'); % need statistical toolbox
|
||
% legend(groupnames(remain(s2)));
|
||
% xlabel('Change of log likelihood');
|
||
% ylabel('Frequency');
|
||
% title(sprintf('Cluster %d',s1));
|
||
% rose(changesInLogml(remain(s2),inds));
|
||
end
|
||
|
||
|
||
%--------------------------------------%
|
||
%%% functions for linkage model %%%
|
||
%---------------------------------------
|
||
|
||
%--------------------------------------------------------------------------
|
||
|
||
function changes = computeChanges(cq_counts, cq_sumcounts,...
|
||
sp_counts, sp_sumcounts,...
|
||
partition, pop_logml,...
|
||
ind, adjprior_cq, adjprior_sp, ...
|
||
indCqCounts, indSpCounts)
|
||
% Computes changes in log-marginal likelihood if individual ind is
|
||
% moved to another population
|
||
%
|
||
% Input:
|
||
% ind - the individual to be moved
|
||
% adjprior_cq & _sp - adjpriors for cliques and separators
|
||
% indCqCounts, indSpCounts - counts for individual ind
|
||
%
|
||
% Output:
|
||
% changes - table of size 1*npops. changes(i) = difference in logml if
|
||
% ind is move to population i.
|
||
|
||
npops = size(cq_counts,3);
|
||
changes = zeros(npops,1);
|
||
|
||
i1 = partition(ind);
|
||
i1_logml = pop_logml(i1);
|
||
sumCq = uint16(sum(indCqCounts,1));
|
||
sumSp = uint16(sum(indSpCounts,1));
|
||
|
||
cq_counts(:,:,i1) = cq_counts(:,:,i1)-indCqCounts;
|
||
cq_sumcounts(i1,:) = cq_sumcounts(i1,:)-sumCq;
|
||
sp_counts(:,:,i1) = sp_counts(:,:,i1)-indSpCounts;
|
||
sp_sumcounts(i1,:) = sp_sumcounts(i1,:)-sumSp;
|
||
|
||
new_i1_logml = computePopulationLogml(double(cq_counts), double(cq_sumcounts),...
|
||
double(sp_counts), double(sp_sumcounts),...
|
||
i1, adjprior_cq, adjprior_sp);
|
||
|
||
cq_counts(:,:,i1) = cq_counts(:,:,i1)+indCqCounts;
|
||
cq_sumcounts(i1,:) = cq_sumcounts(i1,:)+sumCq;
|
||
sp_counts(:,:,i1) = sp_counts(:,:,i1)+indSpCounts;
|
||
sp_sumcounts(i1,:) = sp_sumcounts(i1,:)+sumSp;
|
||
|
||
|
||
i2 = [1:i1-1 , i1+1:npops];
|
||
i2_logml = pop_logml(i2);
|
||
|
||
cq_counts(:,:,i2) = cq_counts(:,:,i2)+repmat(indCqCounts, [1 1 npops-1]);
|
||
cq_sumcounts(i2,:) = cq_sumcounts(i2,:)+repmat(sumCq,[npops-1 1]);
|
||
sp_counts(:,:,i2) = sp_counts(:,:,i2)+repmat(indSpCounts, [1 1 npops-1]);
|
||
sp_sumcounts(i2,:) = sp_sumcounts(i2,:) + repmat(sumSp,[npops-1 1]);
|
||
|
||
new_i2_logml = computePopulationLogml(double(cq_counts), double(cq_sumcounts),...
|
||
double(sp_counts), double(sp_sumcounts),...
|
||
i2, adjprior_cq, adjprior_sp);
|
||
|
||
changes(i2) = new_i1_logml - i1_logml ...
|
||
+ new_i2_logml - i2_logml;
|
||
|
||
|
||
%--------------------------------------------------------------------------
|
||
|
||
function popLogml = computePopulationLogml(cq_counts, cq_sumcounts,...
|
||
sp_counts, sp_sumcounts,...
|
||
pops, adjprior_cq, adjprior_sp)
|
||
% Palauttaa length(pops)*1 taulukon, jossa on laskettu korikohtaiset
|
||
% logml:t koreille, jotka on m<><6D>ritelty pops-muuttujalla.
|
||
|
||
|
||
nall_cq = size(cq_counts,1);
|
||
nall_sp = size(sp_counts, 1);
|
||
ncliq = size(cq_counts,2);
|
||
nsep = size(sp_counts, 2);
|
||
|
||
z = length(pops);
|
||
|
||
popLogml_cq = ...
|
||
squeeze(sum(sum(reshape(...
|
||
gammaln(repmat(adjprior_cq,[1 1 length(pops)]) + cq_counts(:,:,pops)) ...
|
||
,[nall_cq ncliq z]),1),2)) - sum(gammaln(1+cq_sumcounts(pops,:)),2) - ...
|
||
sum(sum(gammaln(adjprior_cq)));
|
||
|
||
popLogml_sp = ...
|
||
squeeze(sum(sum(reshape(...
|
||
gammaln(repmat(adjprior_sp,[1 1 length(pops)]) + sp_counts(:,:,pops)) ...
|
||
,[nall_sp nsep z]),1),2)) - sum(gammaln(1+sp_sumcounts(pops,:)),2) - ...
|
||
sum(sum(gammaln(adjprior_sp)));
|
||
|
||
popLogml = popLogml_cq - popLogml_sp;
|
||
clear cq_counts cq_sumcounts sp_counts sp_sumcounts;
|
||
|
||
%--------------------------------------------------------------------------
|
||
|
||
|
||
%--------------------------------------%
|
||
%%% functions for independence model %%%
|
||
%---------------------------------------
|
||
%--------------------------------------------------------------------------
|
||
function [muutokset, diffInCounts] = ...
|
||
laskeMuutokset(ind, rowsFromInd, data, adjprior, priorTerm, ...
|
||
COUNTS, SUMCOUNTS, PARTITION, POP_LOGML)
|
||
% Palauttaa npops*1 taulun, jossa i:s alkio kertoo, mik?olisi
|
||
% muutos logml:ss? mik<69>li yksil?ind siirret<65><74>n koriin i.
|
||
% diffInCounts on poistettava COUNTS:in siivusta i1 ja lis<69>tt<74>v?
|
||
% COUNTS:in siivuun i2, mik<69>li muutos toteutetaan.
|
||
|
||
npops = size(COUNTS,3);
|
||
muutokset = zeros(npops,1);
|
||
|
||
i1 = PARTITION(ind);
|
||
i1_logml = POP_LOGML(i1);
|
||
|
||
rows = (ind-1)*rowsFromInd+1 : ind*rowsFromInd;
|
||
diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data);
|
||
diffInSumCounts = sum(diffInCounts);
|
||
|
||
COUNTS(:,:,i1) = COUNTS(:,:,i1)-diffInCounts;
|
||
SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)-diffInSumCounts;
|
||
new_i1_logml = computePopulationLogml2(i1, adjprior, priorTerm,COUNTS, SUMCOUNTS);
|
||
COUNTS(:,:,i1) = COUNTS(:,:,i1)+diffInCounts;
|
||
SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)+diffInSumCounts;
|
||
|
||
i2 = [1:i1-1 , i1+1:npops];
|
||
i2_logml = POP_LOGML(i2);
|
||
|
||
COUNTS(:,:,i2) = COUNTS(:,:,i2)+repmat(diffInCounts, [1 1 npops-1]);
|
||
SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)+repmat(diffInSumCounts,[npops-1 1]);
|
||
new_i2_logml = computePopulationLogml2(i2, adjprior, priorTerm,COUNTS, SUMCOUNTS);
|
||
COUNTS(:,:,i2) = COUNTS(:,:,i2)-repmat(diffInCounts, [1 1 npops-1]);
|
||
SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)-repmat(diffInSumCounts,[npops-1 1]);
|
||
|
||
muutokset(i2) = new_i1_logml - i1_logml ...
|
||
+ new_i2_logml - i2_logml;
|
||
|
||
%--------------------------------------------------------------------------
|
||
function popLogml = computePopulationLogml2(pops, adjprior, priorTerm, ...
|
||
COUNTS, SUMCOUNTS)
|
||
% Palauttaa length(pops)*1 taulukon, jossa on laskettu korikohtaiset
|
||
% logml:t koreille, jotka on m<><6D>ritelty pops-muuttujalla.
|
||
|
||
x = size(COUNTS,1);
|
||
y = size(COUNTS,2);
|
||
z = length(pops);
|
||
|
||
popLogml = ...
|
||
squeeze(sum(sum(reshape(...
|
||
gammaln(repmat(adjprior,[1 1 length(pops)]) + COUNTS(:,:,pops)) ...
|
||
,[x y z]),1),2)) - sum(gammaln(1+SUMCOUNTS(pops,:)),2) - priorTerm;
|
||
|
||
|
||
%--------------------------------------------------------------------------
|
||
function diffInCounts = computeDiffInCounts(rows, max_noalle, nloci, data)
|
||
% Muodostaa max_noalle*nloci taulukon, jossa on niiden alleelien
|
||
% lukum<75><6D>r<EFBFBD>t (vastaavasti kuin COUNTS:issa), jotka ovat data:n
|
||
% riveill?rows.
|
||
|
||
diffInCounts = zeros(max_noalle, nloci);
|
||
for i=rows
|
||
row = data(i,:);
|
||
notEmpty = find(row>=0);
|
||
|
||
if length(notEmpty)>0
|
||
diffInCounts(row(notEmpty) + (notEmpty-1)*max_noalle) = ...
|
||
diffInCounts(row(notEmpty) + (notEmpty-1)*max_noalle) + 1;
|
||
end
|
||
end
|
||
|