function linkage_parallel(options) % LINKAGE_PARALLEL is the command line version of the baps partition with % linkage models. % Input: options is a struct generated by parallel.m %-------------------------------------------------------------------------- %- Syntax check out %-------------------------------------------------------------------------- outp = [options.outputMat '.txt']; inp = options.dataFile; if strcmp(options.fixedK, 'yes') fixedK = 1; else fixedK = 0; end switch options.mixtureType case 'linear_mix' linkage_model = 'linear'; case 'codon_mix' linkage_model = 'codon'; case '' error('*** ERROR: ''modeltype'' not specified.'); otherwise error('*** ERROR: unknown modeltype.'); end %-------------------------------------------------------------------------- %- Get data file location %-------------------------------------------------------------------------- switch options.dataType case {'numeric', 'sequence'} if isempty(options.linkageMap) error('*** ERROR: ''linkagemap'' not specified.'); end if strcmp(options.dataType, 'numeric') try data = load(options.dataFile); catch disp('*** ERROR: Incorrect BAPS numerical data.'); return end else %[data, filename] = readbaps(inp); [data, filename] = silentReadBaps(inp); % modified by Lu Cheng, 29.06.2010 if isempty(data) return end end %------------------------------------------------------------------ %- Get name and index file location %------------------------------------------------------------------ if ~isempty(options.nameFile) & ~isempty(options.indexFile) popnames = initPopNames(options.nameFile{1}, options.indexFile{1}); else popnames(:,1) = num2cell(unique(data(:,end))); popnames(:,2) = popnames(:,1); ninds = max(data(:,end)); popnames = fixPopnames(popnames, ninds); end if isempty(data) error('*** ERROR: Failed in loading the data'); end data = uint16(data); % Check that the data is rational: isRational = isTheLoadedNewDataRational(data); if isRational == 0 error('*** ERROR: failed in loading the data.'); else display(['# of haplotypes: ', num2str(size(data,1))]); display(['# of loci: ', num2str(size(data,2)-1)]); % display('Finished.'); end; % Check if the data is discrete or continuous if any(any(fix(data)~=data)) disp('Found decimal numbers. Continuous model will be used.'); error('** ERROR: continuous model is under construction.'); end display('---------------------------------------------------'); % Load the linkage map display(['Reading linkage map from: ',options.linkageMap,'...']); try component_mat = load(options.linkageMap); catch disp('*** ERROR: Incorrect linkage map.'); return; end % Check if the linkage map matches the data if (size(data,2)-1) ~= max(component_mat(:)) disp('*** ERROR: Incosistent linkage map.'); return; else display(['# of linkage groups: ', num2str(size(component_mat,1))]); end; display('---------------------------------------------------'); fprintf(1,'Preprocessing the data ...\n'); % Make the missing data complete data = makecomplete(data); if isempty(data) display('*** ERROR: Failed in completing the missing data'); return; end [data, rowsFromInd, alleleCodes, noalle, adjprior, priorTerm] = ... handleData(data); % Distance between individuals is computed as if the loci are % independent. fprintf(1, 'Caculating the distances ...'); [Z,dist] = newGetDistances(data,rowsFromInd); fprintf(1,'Finished.\n'); c.data = uint16(data); c.rowsFromInd = rowsFromInd; c.alleleCodes = alleleCodes; c.noalle = noalle; c.adjprior = adjprior; c.priorTerm = priorTerm; c.dist = dist; c.popnames = popnames; c.Z = Z; c.component_mat = component_mat; % Data transformation % display('---------------------------------------------------'); fprintf(1,'Transforming the data ...'); index = data(:,end); [data_clique, data_separator, noalle_clique, noalle_separator] = ... transform4(data, component_mat, linkage_model); data_clique = [data_clique index]; data_separator = [data_separator index]; [counts_cq, nalleles_cq, prior_cq, adjprior_cq, genotypes_cq]... = allfreqsnew2(data_clique, double(noalle_clique)); clear data_clique; [counts_sp, nalleles_sp, prior_sp, adjprior_sp, genotypes_sp]... = allfreqsnew2(data_separator, double(noalle_separator)); clear data_separator; counts_cq = uint8(counts_cq); counts_sp = uint8(counts_sp); fprintf(1,'Finished.\n'); clear c; case 'matlab' struct_array = load(options.dataFile); if isfield(struct_array,'c') %Matlab versio c = struct_array.c; if ~isfield(c,'dist') display('*** ERROR: Incorrect file format'); return end clear struct_array; elseif isfield(struct_array,'dist') %Mideva versio c = struct_array; clear struct_array; else display('*** ERROR: Incorrect file format'); return; end % The following are the same as in the basic clustering data = c.data; popnames = c.popnames; Z = c.Z; noalle = c.noalle; adjprior = c.adjprior; rowsFromInd = c.rowsFromInd; alleleCodes = c.alleleCodes; dist = c.dist; priorTerm = c.priorTerm; if ~isfield(c,'component_mat') display('*** ERROR: Incorrect linkage data'); return end % This is new component_mat = c.component_mat; data = uint16(data); display(['# of haplotypes: ', num2str(size(data,1))]); display(['# of loci: ', num2str(size(data,2)-1)]); display(['# of linkage groups: ', num2str(size(component_mat,1))]); if strcmpi(options.mixtureType, 'codon_mix') linkage_model = 'codon'; else linkage_model = 'linear'; end if ~isfield(c, 'mixtureType') clear c; % save the memory usage % Data transformation fprintf(1,'Transforming the data ...'); index = data(:,end); % [data_clique, data_separator, noalle_clique, noalle_separator] = ... % transform2(data, component_mat, linkage_model); [data_clique, data_separator, noalle_clique, noalle_separator] = ... transform4(data, component_mat, linkage_model); data_clique = [data_clique index]; data_separator = [data_separator index]; [counts_cq, nalleles_cq, prior_cq, adjprior_cq, genotypes_cq]... = allfreqsnew2(data_clique, double(noalle_clique)); clear data_clique; [counts_sp, nalleles_sp, prior_sp, adjprior_sp, genotypes_sp]... = allfreqsnew2(data_separator, double(noalle_separator)); clear data_separator; counts_cq = uint8(counts_cq); counts_sp = uint8(counts_sp); fprintf(1,'Finished.\n'); else if ~strcmpi(c.mixtureType, options.mixtureType) error('*** ERROR: incorrect mixture type'); end %Linkage model is specified in the preprocessed file. counts_cq = c.counts_cq; adjprior_cq = c.adjprior_cq; counts_sp = c.counts_sp; adjprior_sp = c.adjprior_sp; linkage_model = c.linkage_model; counts_cq = uint8(counts_cq); counts_sp = uint8(counts_sp); clear c; display(['linkage model: ', linkage_model]); end case 'excel' display('---------------------------------------------------'); display(['Reading sequence profile from: ', options.dataFile,'...']); [data, component_mat, popnames] = processxls(options.dataFile); if isempty(data) display('*** ERROR: Failed in loading the data'); return; end display('---------------------------------------------------'); fprintf(1,'Preprocessing the data ...\n'); % Make the missing data complete % missing values are denoted as -999 data = uint16(data); % data = uint8(data); data = makecomplete(data); if isempty(data) display('*** ERROR: Failed in completing the missing data'); return; end isRational = isTheLoadedNewDataRational(data); if isRational == 0 return; else display(['# of haplotypes: ', num2str(size(data,1))]); display(['# of loci: ', num2str(size(data,2)-1)]); end [data, rowsFromInd, alleleCodes, noalle, adjprior, priorTerm] = ... handleData(data); % Distance between individuals is computed as if the loci are % independent. [Z,dist] = newGetDistances(data,rowsFromInd); fprintf(1,'Finished.\n'); ninds = max(data(:,end)); popnames = fixPopnames(popnames, ninds); c.data = uint16(data); c.rowsFromInd = rowsFromInd; c.alleleCodes = alleleCodes; c.noalle = noalle; c.adjprior = adjprior; c.priorTerm = priorTerm; c.popnames = popnames; c.component_mat = component_mat; c.dist = dist; c.Z = Z; % Data transformation display('---------------------------------------------------'); fprintf(1,'Transforming the data ...'); index = data(:,end); [data_clique, data_separator, noalle_clique, noalle_separator] = ... transform4(data, component_mat, linkage_model); data_clique = [data_clique index]; data_separator = [data_separator index]; [counts_cq, nalleles_cq, prior_cq, adjprior_cq, genotypes_cq]... = allfreqsnew2(data_clique, double(noalle_clique)); clear data_clique; [counts_sp, nalleles_sp, prior_sp, adjprior_sp, genotypes_sp]... = allfreqsnew2(data_separator, double(noalle_separator)); clear data_separator; counts_cq = uint8(counts_cq); counts_sp = uint8(counts_sp); fprintf(1,'Finished.\n'); clear c; otherwise error('*** ERROR: data type is not specified or unknown.'); end global POP_LOGML; global PARTITION; global CQ_COUNTS; global SP_COUNTS; %These counts are for populations global CQ_SUMCOUNTS; global SP_SUMCOUNTS; %not for individuals clearGlobalVars; npopstext = []; npopstextExtra = options.initialK; if length(npopstextExtra)>=255 npopstextExtra = npopstextExtra(1:255); npopstext = [npopstext ' ' npopstextExtra]; teksti = 'The input field length limit (255 characters) was reached. Input more values: '; else % ----------------------------------------------------- % Set the limit of the input value. % Modified by Jing Tang, 30.12.2005 if max(npopstextExtra) > size(data,1) error('Values larger than the sample size are not accepted. '); else npopstext = [npopstext ' ' num2str(npopstextExtra)]; end end clear ready; clear teksti; if isempty(npopstext) || length(npopstext)==1 return else npopsTable = str2num(npopstext); % ykkoset = find(npopsTable==1); npopsTable(logical(npopsTable==1)) = []; if isempty(npopsTable) return end % clear ykkoset; end c.noalle = noalle; c.adjprior = adjprior; %priorTerm = c.priorTerm; c.rowsFromInd = rowsFromInd; c.counts_cq = counts_cq; c.adjprior_cq = adjprior_cq; c.counts_sp = counts_sp; c.adjprior_sp = adjprior_sp; c.dist = dist; c.Z = Z; if fixedK % Only the first value of npopsTaulu is used npops = npopsTable(1); nruns = length(npopsTable); % [logml, npops, partitionSummary]=linkageMix_fixK(c,npops,nruns,1); display('*** ERROR: fixed K for linkage module is not available.'); return else [logml, npops, partitionSummary]=linkageMix(c,npopsTable); end if logml==1 return end data = noIndex(data,noalle); %This is basically the same as in BAPS 3. changesInLogml = writeMixtureInfo(logml, counts_cq, counts_sp, adjprior_cq, ... adjprior_sp, outp, inp, partitionSummary, popnames, linkage_model, ... fixedK); if exist('baps4_output.baps','file') copyfile('baps4_output.baps',outp) delete('baps4_output.baps') end [sumcounts, counts] = indLociCounts(PARTITION, data, npops, noalle); % NB! Index column is removed in data matrix. c.PARTITION = PARTITION; c.CQ_COUNTS = CQ_COUNTS; c.CQ_SUMCOUNTS = CQ_SUMCOUNTS; c.SP_COUNTS = SP_COUNTS; c.SP_SUMCOUNTS = SP_SUMCOUNTS; c.alleleCodes = alleleCodes; c.adjprior_cq = adjprior_cq; c.adjprior_sp = adjprior_sp; c.popnames = popnames; c.rowsFromInd = rowsFromInd; c.data = uint16(data); c.npops = npops; % c.nalleles_cq = nalleles_cq; c.nalleles_sp = nalleles_sp; if strcmp(linkage_model,'linear') % Added on 03.11.06 c.mixtureType = 'linear_mix'; elseif strcmp(linkage_model,'codon') c.mixtureType = 'codon_mix'; end c.changesInLogml = changesInLogml; % this variable stores the change of likelihoods. % [ncluster ninds] % -Added on 02.11.2006 % The next ones are for the admixture input c.COUNTS = counts; c.SUMCOUNTS = sumcounts; c.adjprior = adjprior; c.rowsFromInd = rowsFromInd; c.noalle = noalle; c.alleleCodes = alleleCodes; % The two variables below are for the new linkage admixture model c.linkage_model = linkage_model; c.gene_lengths = calcGeneLengths(component_mat); % The logml is saved for parallel computing c.logml = logml; fprintf(1,'Saving the result...') try % save(options.outputMat, 'c'); save(options.outputMat, 'c','-v7.3'); % added by Lu Cheng, 08.06.2012 fprintf(1,'Finished.\n'); catch display('*** ERROR in saving the result.'); end %-------------------------------------------------------------------------- % The next three functions are for computing the initial partition % according to the distance between the individuals function initial_partition=admixture_initialization(nclusters,Z) T=cluster_own(Z,nclusters); initial_partition=T; %-------------------------------------------------------------------------- function T = cluster_own(Z,nclust) % true=logical(1); % false=logical(0); maxclust = nclust; % Start of algorithm m = size(Z,1)+1; T = zeros(m,1); % maximum number of clusters based on inconsistency if m <= maxclust T = (1:m)'; elseif maxclust==1 T = ones(m,1); else clsnum = 1; for k = (m-maxclust+1):(m-1) i = Z(k,1); % left tree if i <= m % original node, no leafs T(i) = clsnum; clsnum = clsnum + 1; elseif i < (2*m-maxclust+1) % created before cutoff, search down the tree T = clusternum(Z, T, i-m, clsnum); clsnum = clsnum + 1; end i = Z(k,2); % right tree if i <= m % original node, no leafs T(i) = clsnum; clsnum = clsnum + 1; elseif i < (2*m-maxclust+1) % created before cutoff, search down the tree T = clusternum(Z, T, i-m, clsnum); clsnum = clsnum + 1; end end end function T = clusternum(X, T, k, c) m = size(X,1)+1; while(~isempty(k)) % Get the children of nodes at this level children = X(k,1:2); children = children(:); % Assign this node number to leaf children t = (children<=m); T(children(t)) = c; % Move to next level k = children(~t) - m; end %-------------------------------------------------------------------------- function dist2 = laskeOsaDist(inds2, dist, ninds) % Muodostaa dist vektorista osavektorin, joka sisältää yksilöiden inds2 % väliset etäisyydet. ninds=kaikkien yksilöiden lukumäär? ninds2 = length(inds2); apu = zeros(nchoosek(ninds2,2),2); rivi = 1; for i=1:ninds2-1 for j=i+1:ninds2 apu(rivi, 1) = inds2(i); apu(rivi, 2) = inds2(j); rivi = rivi+1; end end apu = (apu(:,1)-1).*ninds - apu(:,1) ./ 2 .* (apu(:,1)-1) + (apu(:,2)-apu(:,1)); dist2 = dist(apu); %-------------------------------------------------------------------------- function Z = computeLinkage(Y, method) [k, n] = size(Y); m = (1+sqrt(1+8*n))/2; if k ~= 1 || m ~= fix(m) error('The first input has to match the output of the PDIST function in size.'); end if nargin == 1 % set default switch to be 'co' method = 'co'; end method = lower(method(1:2)); % simplify the switch string. % monotonic = 1; Z = zeros(m-1,3); % allocate the output matrix. N = zeros(1,2*m-1); N(1:m) = 1; n = m; % since m is changing, we need to save m in n. R = 1:n; for s = 1:(n-1) X = Y; [v, k] = min(X); i = floor(m+1/2-sqrt(m^2-m+1/4-2*(k-1))); j = k - (i-1)*(m-i/2)+i; Z(s,:) = [R(i) R(j) v]; % update one more row to the output matrix A I1 = 1:(i-1); I2 = (i+1):(j-1); I3 = (j+1):m; % these are temp variables. U = [I1 I2 I3]; I = [I1.*(m-(I1+1)/2)-m+i i*(m-(i+1)/2)-m+I2 i*(m-(i+1)/2)-m+I3]; J = [I1.*(m-(I1+1)/2)-m+j I2.*(m-(I2+1)/2)-m+j j*(m-(j+1)/2)-m+I3]; switch method case 'si' %single linkage Y(I) = min(Y(I),Y(J)); case 'av' % average linkage Y(I) = Y(I) + Y(J); case 'co' %complete linkage Y(I) = max(Y(I),Y(J)); case 'ce' % centroid linkage K = N(R(i))+N(R(j)); Y(I) = (N(R(i)).*Y(I)+N(R(j)).*Y(J)-(N(R(i)).*N(R(j))*v^2)./K)./K; case 'wa' Y(I) = ((N(R(U))+N(R(i))).*Y(I) + (N(R(U))+N(R(j))).*Y(J) - ... N(R(U))*v)./(N(R(i))+N(R(j))+N(R(U))); end J = [J i*(m-(i+1)/2)-m+j]; Y(J) = []; % no need for the cluster information about j. % update m, N, R m = m-1; N(n+s) = N(R(i)) + N(R(j)); R(i) = n+s; R(j:(n-1))=R((j+1):n); end %-------------------------------------------------------------------------- function changes = computeChanges(ind, adjprior_cq, adjprior_sp, ... indCqCounts, indSpCounts) % Computes changes in log-marginal likelihood if individual ind is % moved to another population % % Input: % ind - the individual to be moved % adjprior_cq & _sp - adjpriors for cliques and separators % indCqCounts, indSpCounts - counts for individual ind % % Output: % changes - table of size 1*npops. changes(i) = difference in logml if % ind is move to population i. global CQ_COUNTS; global CQ_SUMCOUNTS; global SP_COUNTS; global SP_SUMCOUNTS; global PARTITION; global POP_LOGML; npops = size(CQ_COUNTS,3); changes = zeros(npops,1); i1 = PARTITION(ind); i1_logml = POP_LOGML(i1); sumCq = uint16(sum(indCqCounts,1)); sumSp = uint16(sum(indSpCounts,1)); CQ_COUNTS(:,:,i1) = CQ_COUNTS(:,:,i1)-indCqCounts; CQ_SUMCOUNTS(i1,:) = CQ_SUMCOUNTS(i1,:)-sumCq; SP_COUNTS(:,:,i1) = SP_COUNTS(:,:,i1)-indSpCounts; SP_SUMCOUNTS(i1,:) = SP_SUMCOUNTS(i1,:)-sumSp; new_i1_logml = computePopulationLogml(i1, adjprior_cq, adjprior_sp); CQ_COUNTS(:,:,i1) = CQ_COUNTS(:,:,i1)+indCqCounts; CQ_SUMCOUNTS(i1,:) = CQ_SUMCOUNTS(i1,:)+sumCq; SP_COUNTS(:,:,i1) = SP_COUNTS(:,:,i1)+indSpCounts; SP_SUMCOUNTS(i1,:) = SP_SUMCOUNTS(i1,:)+sumSp; i2 = [1:i1-1 , i1+1:npops]; i2_logml = POP_LOGML(i2); CQ_COUNTS(:,:,i2) = CQ_COUNTS(:,:,i2)+repmat(indCqCounts, [1 1 npops-1]); CQ_SUMCOUNTS(i2,:) = CQ_SUMCOUNTS(i2,:)+repmat(sumCq,[npops-1 1]); SP_COUNTS(:,:,i2) = SP_COUNTS(:,:,i2)+repmat(indSpCounts, [1 1 npops-1]); SP_SUMCOUNTS(i2,:) = SP_SUMCOUNTS(i2,:) + repmat(sumSp,[npops-1 1]); new_i2_logml = computePopulationLogml(i2, adjprior_cq, adjprior_sp); CQ_COUNTS(:,:,i2) = CQ_COUNTS(:,:,i2)-repmat(indCqCounts, [1 1 npops-1]); CQ_SUMCOUNTS(i2,:) = CQ_SUMCOUNTS(i2,:)-repmat(sumCq,[npops-1 1]); SP_COUNTS(:,:,i2) = SP_COUNTS(:,:,i2)-repmat(indSpCounts, [1 1 npops-1]); SP_SUMCOUNTS(i2,:) = SP_SUMCOUNTS(i2,:) - repmat(sumSp,[npops-1 1]); % a = repmat(sumSp,[npops-1 1]); changes(i2) = new_i1_logml - i1_logml ... + new_i2_logml - i2_logml; %------------------------------------------------------------------------------------ function changes = computeChanges2(i1, adjprior_cq, adjprior_sp) % Computes changes in log marginal likelihood if population i1 is combined % with another population % % Input: % i1 - the population to be combined % adjprior_cq & _sp - adjpriors for cliques and separators % % Output: % changes - table of size 1*npops. changes(i) = difference in logml if % i1 is combined with population i. global CQ_COUNTS; global CQ_SUMCOUNTS; global SP_COUNTS; global SP_SUMCOUNTS; global POP_LOGML; npops = size(CQ_COUNTS,3); changes = zeros(npops,1); i1_logml = POP_LOGML(i1); indCqCounts = CQ_COUNTS(:,:,i1); indSpCounts = SP_COUNTS(:,:,i1); sumCq = uint16(sum(indCqCounts,1)); sumSp = uint16(sum(indSpCounts,1)); new_i1_logml = 0; i2 = [1:i1-1 , i1+1:npops]; i2_logml = POP_LOGML(i2); CQ_COUNTS(:,:,i2) = CQ_COUNTS(:,:,i2)+repmat(indCqCounts, [1 1 npops-1]); CQ_SUMCOUNTS(i2,:) = CQ_SUMCOUNTS(i2,:)+repmat(sumCq,[npops-1 1]); SP_COUNTS(:,:,i2) = SP_COUNTS(:,:,i2)+repmat(indSpCounts, [1 1 npops-1]); SP_SUMCOUNTS(i2,:) = SP_SUMCOUNTS(i2,:)+ repmat(sumSp,[npops-1 1]); % a = repmat(sumSp,[npops-1 1]); % if ~any(sumSp) % a(:,[1:size(a,2)])=[]; % end % SP_SUMCOUNTS(i2,:) = SP_SUMCOUNTS(i2,:)+ a ; new_i2_logml = computePopulationLogml(i2, adjprior_cq, adjprior_sp); CQ_COUNTS(:,:,i2) = CQ_COUNTS(:,:,i2)-repmat(indCqCounts, [1 1 npops-1]); CQ_SUMCOUNTS(i2,:) = CQ_SUMCOUNTS(i2,:)-repmat(sumCq,[npops-1 1]); SP_COUNTS(:,:,i2) = SP_COUNTS(:,:,i2)-repmat(indSpCounts, [1 1 npops-1]); SP_SUMCOUNTS(i2,:) = SP_SUMCOUNTS(i2,:)- repmat(sumSp,[npops-1 1]); changes(i2) = new_i1_logml - i1_logml ... + new_i2_logml - i2_logml; %------------------------------------------------------------------------------------ function changes = computeChanges3(T2, inds2, i1, counts_cq, counts_sp, ... adjprior_cq, adjprior_sp) % Computes changes in log marginal likelihood if subpopulation of i2 is % moved to another population % % Input: % T2 - partition of inds2 to subpopulations % inds2 - individuals in population i1 % i2 % counts_cq, counts_sp - counts for individuals % % Output: % changes - table of size length(unique(T2))*npops. % changes(i,j) = difference in logml if subpopulation inds2(find(T2==i)) of % i2 is moved to population j global CQ_COUNTS; global CQ_SUMCOUNTS; global SP_COUNTS; global SP_SUMCOUNTS; global POP_LOGML; npops = size(CQ_COUNTS,3); npops2 = length(unique(T2)); changes = zeros(npops2,npops); %cq_counts = CQ_COUNTS; %sp_counts = SP_COUNTS; %cq_sumcounts = CQ_SUMCOUNTS; %sp_sumcounts = SP_SUMCOUNTS; i1_logml = POP_LOGML(i1); for pop2 = 1:npops2 % inds = inds2(find(T2==pop2)); inds = inds2(logical(T2==pop2)); ninds = length(inds); if ninds>0 indCqCounts = uint16(sum(counts_cq(:,:,inds),3)); indSpCounts = uint16(sum(counts_sp(:,:,inds),3)); sumCq = uint16(sum(indCqCounts,1)); sumSp = uint16(sum(indSpCounts,1)); CQ_COUNTS(:,:,i1) = CQ_COUNTS(:,:,i1)-indCqCounts; CQ_SUMCOUNTS(i1,:) = CQ_SUMCOUNTS(i1,:)-sumCq; SP_COUNTS(:,:,i1) = SP_COUNTS(:,:,i1)-indSpCounts; SP_SUMCOUNTS(i1,:) = SP_SUMCOUNTS(i1,:)-sumSp; new_i1_logml = computePopulationLogml(i1, adjprior_cq, adjprior_sp); CQ_COUNTS(:,:,i1) = CQ_COUNTS(:,:,i1)+indCqCounts; CQ_SUMCOUNTS(i1,:) = CQ_SUMCOUNTS(i1,:)+sumCq; SP_COUNTS(:,:,i1) = SP_COUNTS(:,:,i1)+indSpCounts; SP_SUMCOUNTS(i1,:) = SP_SUMCOUNTS(i1,:)+sumSp; i2 = [1:i1-1 , i1+1:npops]; i2_logml = POP_LOGML(i2)'; CQ_COUNTS(:,:,i2) = CQ_COUNTS(:,:,i2)+repmat(indCqCounts, [1 1 npops-1]); CQ_SUMCOUNTS(i2,:) = CQ_SUMCOUNTS(i2,:)+repmat(sumCq,[npops-1 1]); SP_COUNTS(:,:,i2) = SP_COUNTS(:,:,i2)+repmat(indSpCounts, [1 1 npops-1]); SP_SUMCOUNTS(i2,:) = SP_SUMCOUNTS(i2,:)+ repmat(sumSp,[npops-1 1]); new_i2_logml = computePopulationLogml(i2, adjprior_cq, adjprior_sp)'; CQ_COUNTS(:,:,i2) = CQ_COUNTS(:,:,i2)-repmat(indCqCounts, [1 1 npops-1]); CQ_SUMCOUNTS(i2,:) = CQ_SUMCOUNTS(i2,:)-repmat(sumCq,[npops-1 1]); SP_COUNTS(:,:,i2) = SP_COUNTS(:,:,i2)-repmat(indSpCounts, [1 1 npops-1]); SP_SUMCOUNTS(i2,:) = SP_SUMCOUNTS(i2,:)- repmat(sumSp,[npops-1 1]); changes(pop2,i2) = new_i1_logml - i1_logml ... + new_i2_logml - i2_logml; end end %-------------------------------------------------------------------------- function changes = computeChanges5(inds, i1, i2, counts_cq, counts_sp, ... adjprior_cq, adjprior_sp) % Computes change in logml if individual of inds is moved between % populations i1 and i2 global CQ_COUNTS; global CQ_SUMCOUNTS; global SP_COUNTS; global SP_SUMCOUNTS; global POP_LOGML; global PARTITION; ninds = length(inds); changes = zeros(ninds,1); i1_logml = POP_LOGML(i1); i2_logml = POP_LOGML(i2); for i = 1:ninds ind = inds(i); if PARTITION(ind)==i1 pop1 = i1; %from pop2 = i2; %to else pop1 = i2; pop2 = i1; end indCqCounts = uint16(counts_cq(:,:,ind)); indSpCounts = uint16(counts_sp(:,:,ind)); sumCq = uint16(sum(indCqCounts,1)); sumSp = uint16(sum(indSpCounts,1)); CQ_COUNTS(:,:,pop1) = CQ_COUNTS(:,:,pop1)-indCqCounts; CQ_SUMCOUNTS(pop1,:) = CQ_SUMCOUNTS(pop1,:)-sumCq; SP_COUNTS(:,:,pop1) = SP_COUNTS(:,:,pop1)-indSpCounts; SP_SUMCOUNTS(pop1,:) = SP_SUMCOUNTS(pop1,:) - sumSp; CQ_COUNTS(:,:,pop2) = CQ_COUNTS(:,:,pop2)+indCqCounts; CQ_SUMCOUNTS(pop2,:) = CQ_SUMCOUNTS(pop2,:)+sumCq; SP_COUNTS(:,:,pop2) = SP_COUNTS(:,:,pop2)+indSpCounts; SP_SUMCOUNTS(pop2,:) = SP_SUMCOUNTS(pop2,:) + sumSp; new_logmls = computePopulationLogml([i1 i2], adjprior_cq, adjprior_sp); changes(i) = sum(new_logmls); CQ_COUNTS(:,:,pop1) = CQ_COUNTS(:,:,pop1)+indCqCounts; CQ_SUMCOUNTS(pop1,:) = CQ_SUMCOUNTS(pop1,:)+sumCq; SP_COUNTS(:,:,pop1) = SP_COUNTS(:,:,pop1)+indSpCounts; SP_SUMCOUNTS(pop1,:) = SP_SUMCOUNTS(pop1,:)+sumSp; CQ_COUNTS(:,:,pop2) = CQ_COUNTS(:,:,pop2)-indCqCounts; CQ_SUMCOUNTS(pop2,:) = CQ_SUMCOUNTS(pop2,:)-sumCq; SP_COUNTS(:,:,pop2) = SP_COUNTS(:,:,pop2)-indSpCounts; SP_SUMCOUNTS(pop2,:) = SP_SUMCOUNTS(pop2,:)-sumSp; end changes = changes - i1_logml - i2_logml; %------------------------------------------------------------------------------------- function updateGlobalVariables(ind, i2, indCqCounts, indSpCounts, ... adjprior_cq, adjprior_sp) % Updates global variables when individual ind is moved to population i2 global CQ_COUNTS; global CQ_SUMCOUNTS; global SP_COUNTS; global SP_SUMCOUNTS; global PARTITION; global POP_LOGML; i1 = PARTITION(ind); PARTITION(ind)=i2; sumCq = uint16(sum(indCqCounts,1)); sumSp = uint16(sum(indSpCounts,1)); CQ_COUNTS(:,:,i1) = CQ_COUNTS(:,:,i1)-indCqCounts; CQ_SUMCOUNTS(i1,:) = CQ_SUMCOUNTS(i1,:)-sumCq; SP_COUNTS(:,:,i1) = SP_COUNTS(:,:,i1)-indSpCounts; SP_SUMCOUNTS(i1,:) = SP_SUMCOUNTS(i1,:)-sumSp; CQ_COUNTS(:,:,i2) = CQ_COUNTS(:,:,i2)+indCqCounts; CQ_SUMCOUNTS(i2,:) = CQ_SUMCOUNTS(i2,:)+sumCq; SP_COUNTS(:,:,i2) = SP_COUNTS(:,:,i2)+indSpCounts; SP_SUMCOUNTS(i2,:) = SP_SUMCOUNTS(i2,:)+sumSp; POP_LOGML([i1 i2]) = computePopulationLogml([i1 i2], adjprior_cq, adjprior_sp); %--------------------------------------------------------------------------------- function updateGlobalVariables2(i1, i2, adjprior_cq, adjprior_sp) % Updates global variables when all individuals from population i1 are moved % to population i2 global CQ_COUNTS; global CQ_SUMCOUNTS; global SP_COUNTS; global SP_SUMCOUNTS; global PARTITION; global POP_LOGML; % inds = find(PARTITION==i1); % PARTITION(inds) = i2; PARTITION(logical(PARTITION==i1)) = i2; CQ_COUNTS(:,:,i2) = CQ_COUNTS(:,:,i2)+CQ_COUNTS(:,:,i1); CQ_SUMCOUNTS(i2,:) = CQ_SUMCOUNTS(i2,:)+CQ_SUMCOUNTS(i1,:); SP_COUNTS(:,:,i2) = SP_COUNTS(:,:,i2)+SP_COUNTS(:,:,i1); SP_SUMCOUNTS(i2,:) = SP_SUMCOUNTS(i2,:)+SP_SUMCOUNTS(i1,:); CQ_COUNTS(:,:,i1) = 0; CQ_SUMCOUNTS(i1,:) = 0; SP_COUNTS(:,:,i1) = 0; SP_SUMCOUNTS(i1,:) = 0; POP_LOGML(i1) = 0; POP_LOGML(i2) = computePopulationLogml(i2, adjprior_cq, adjprior_sp); %------------------------------------------------------------------------------------ function updateGlobalVariables3(muuttuvat, i2, indCqCounts, indSpCounts, ... adjprior_cq, adjprior_sp) % Updates global variables when individuals muuttuvat are moved to % population i2 global CQ_COUNTS; global CQ_SUMCOUNTS; global SP_COUNTS; global SP_SUMCOUNTS; global PARTITION; global POP_LOGML; i1 = PARTITION(muuttuvat(1)); PARTITION(muuttuvat) = i2; sumCq = uint16(sum(indCqCounts,1)); sumSp = uint16(sum(indSpCounts,1)); CQ_COUNTS(:,:,i1) = CQ_COUNTS(:,:,i1)-indCqCounts; CQ_SUMCOUNTS(i1,:) = CQ_SUMCOUNTS(i1,:)-sumCq; SP_COUNTS(:,:,i1) = SP_COUNTS(:,:,i1)-indSpCounts; SP_SUMCOUNTS(i1,:) = SP_SUMCOUNTS(i1,:)-sumSp; CQ_COUNTS(:,:,i2) = CQ_COUNTS(:,:,i2)+indCqCounts; CQ_SUMCOUNTS(i2,:) = CQ_SUMCOUNTS(i2,:)+sumCq; SP_COUNTS(:,:,i2) = SP_COUNTS(:,:,i2)+indSpCounts; SP_SUMCOUNTS(i2,:) = SP_SUMCOUNTS(i2,:)+sumSp; POP_LOGML([i1 i2]) = computePopulationLogml([i1 i2], adjprior_cq, adjprior_sp); %---------------------------------------------------------------------- function inds = returnInOrder(inds, pop, counts_cq, counts_sp, ... adjprior_cq, adjprior_sp) % Returns individuals inds in order according to the change in the logml if % they are moved out of the population pop global CQ_COUNTS; global CQ_SUMCOUNTS; global SP_COUNTS; global SP_SUMCOUNTS; ninds = length(inds); apuTaulu = [inds, zeros(ninds,1)]; for i=1:ninds ind = inds(i); indCqCounts = uint16(counts_cq(:,:,ind)); indSpCounts = uint16(counts_sp(:,:,ind)); sumCq = uint16(sum(indCqCounts,1)); sumSp = uint16(sum(indSpCounts,1)); CQ_COUNTS(:,:,pop) = CQ_COUNTS(:,:,pop)-indCqCounts; CQ_SUMCOUNTS(pop,:) = CQ_SUMCOUNTS(pop,:)-sumCq; SP_COUNTS(:,:,pop) = SP_COUNTS(:,:,pop)-indSpCounts; SP_SUMCOUNTS(pop,:) = SP_SUMCOUNTS(pop,:)-sumSp; apuTaulu(i, 2) = computePopulationLogml(pop, adjprior_cq, adjprior_sp); CQ_COUNTS(:,:,pop) = CQ_COUNTS(:,:,pop)+indCqCounts; CQ_SUMCOUNTS(pop,:) = CQ_SUMCOUNTS(pop,:)+sumCq; SP_COUNTS(:,:,pop) = SP_COUNTS(:,:,pop)+indSpCounts; SP_SUMCOUNTS(pop,:) = SP_SUMCOUNTS(pop,:)+sumSp; end apuTaulu = sortrows(apuTaulu,2); inds = apuTaulu(ninds:-1:1,1); %------------------------------------------------------------------------- function [Z, dist] = newGetDistances(data, rowsFromInd) ninds = max(data(:,end)); nloci = size(data,2)-1; riviLkm = nchoosek(double(ninds),2); % empties = find(data<0); % data(empties)=0; data(logical(data<0)) = 0; data = uint16(data); pariTaulu = zeros(riviLkm,2); aPointer=1; for a=1:ninds-1 pariTaulu(aPointer:aPointer+double(ninds-1-a),1) = ones(ninds-a,1,'uint16')*a; pariTaulu(aPointer:aPointer+double(ninds-1-a),2) = uint16((a+1:ninds)'); aPointer = aPointer+double(ninds-a); end eka = pariTaulu(:,ones(1,rowsFromInd)); eka = eka * rowsFromInd; miinus = repmat(rowsFromInd-1 : -1 : 0, [riviLkm 1]); eka = eka - miinus; toka = pariTaulu(:,ones(1,rowsFromInd)*2); toka = toka * rowsFromInd; toka = toka - miinus; eka = uint16(eka); toka = uint16(toka); clear pariTaulu; clear miinus; summa = uint16(zeros(riviLkm,1)); vertailuja = uint16(zeros(riviLkm,1)); x = zeros(size(eka)); x = uint16(x); y = zeros(size(toka)); y = uint16(y); % fprintf(1,'%%10'); for j=1:nloci; for k=1:rowsFromInd x(:,k) = data(eka(:,k),j); y(:,k) = data(toka(:,k),j); end for a=1:rowsFromInd for b=1:rowsFromInd vertailutNyt = uint16(x(:,a)>0 & y(:,b)>0); vertailuja = vertailuja + vertailutNyt; lisays = (x(:,a)~=y(:,b) & vertailutNyt); summa = summa + uint16(lisays); end end % fprintf(1,'\b\b'); % fprintf(1,'%d',floor(10+80*j/nloci)); end clear x; clear y; clear vertailutNyt; dist = zeros(length(vertailuja),1); % nollat = find(vertailuja==0); % dist(nollat) = 1; dist(logical(vertailuja==0)) = 1; muut = find(vertailuja>0); dist(muut) = double(summa(muut))./double(vertailuja(muut)); clear summa; clear vertailuja; Z = computeLinkage(dist'); % fprintf(1,'\b\b'); % fprintf(1,'%d\n',100); %-------------------------------------------------------------------------- function clearGlobalVars global CQ_COUNTS; CQ_COUNTS = []; global CQ_SUMCOUNTS; CQ_SUMCOUNTS = []; global SP_COUNTS; SP_COUNTS = []; global SP_SUMCOUNTS; SP_SUMCOUNTS = []; global PARTITION; PARTITION = []; global POP_LOGML; POP_LOGML = []; %-------------------------------------------------------------------------- function npops = removeEmptyPops % Removes empty pops from all global COUNTS variables. % Updates PARTITION and npops global CQ_COUNTS; global CQ_SUMCOUNTS; global SP_COUNTS; global SP_SUMCOUNTS; global PARTITION; notEmpty = find(any(CQ_SUMCOUNTS,2)); CQ_COUNTS = CQ_COUNTS(:,:,notEmpty); CQ_SUMCOUNTS = CQ_SUMCOUNTS(notEmpty,:); SP_COUNTS = SP_COUNTS(:,:,notEmpty); SP_SUMCOUNTS = SP_SUMCOUNTS(notEmpty,:); for n=1:length(notEmpty) % apu = find(PARTITION==notEmpty(n)); % PARTITION(apu)=n; PARTITION(logical(PARTITION==notEmpty(n))) = n; end npops = length(notEmpty); %-------------------------------------------------------------------------- function [partitionSummary, added] = addToSummary(logml, partitionSummary, worstIndex) % Tiedetään, ett?annettu logml on isompi kuin huonoin arvo % partitionSummary taulukossa. Jos partitionSummary:ss?ei viel?ole % annettua logml arvoa, niin lisätään worstIndex:in kohtaan uusi logml ja % nykyist?partitiota vastaava nclusters:in arvo. Muutoin ei tehd?mitään. global PARTITION; apu = isempty(find(abs(partitionSummary(:,2)-logml)<1e-5,1)); if apu % Nyt löydetty partitio ei ole viel?kirjattuna summaryyn. npops = length(unique(PARTITION)); partitionSummary(worstIndex,1) = npops; partitionSummary(worstIndex,2) = logml; added = 1; else added = 0; end %-------------------------------------------------------------------------- function [counts, sumcounts] = initialCounts(ind_counts) global PARTITION; pops = unique(PARTITION); npops = max(pops); counts = zeros(size(ind_counts,1), size(ind_counts,2), npops,'uint16'); sumcounts = zeros(npops, size(ind_counts,2),'uint16'); for i = 1:npops inds = find(PARTITION == i); counts(:,:,i) = sum(ind_counts(:,:,inds), 3); sumcounts(i,:) = sum(counts(:,:,i),1); end %-------------------------------------------------------------------------- function logml = computeLogml(adjprior_cq, adjprior_sp) global CQ_COUNTS; global CQ_SUMCOUNTS; global SP_COUNTS; global SP_SUMCOUNTS; cq_counts = double(CQ_COUNTS); cq_sumcounts = double(CQ_SUMCOUNTS); sp_counts = double(SP_COUNTS); sp_sumcounts = double(SP_SUMCOUNTS); npops = size(CQ_COUNTS, 3); cq_logml = sum(sum(sum(gammaln(cq_counts+repmat(adjprior_cq,[1 1 npops]))))) ... - npops*sum(sum(gammaln(adjprior_cq))) - ... sum(sum(gammaln(1+cq_sumcounts))); sp_logml = sum(sum(sum(gammaln(sp_counts+repmat(adjprior_sp,[1 1 npops]))))) ... - npops*sum(sum(gammaln(adjprior_sp))) - ... sum(sum(gammaln(1+sp_sumcounts))); logml = cq_logml - sp_logml; clear cq_counts cq_sumcounts sp_counts sp_sumcounts; %-------------------------------------------------------------------------- function popLogml = computePopulationLogml(pops, adjprior_cq, adjprior_sp) % Palauttaa length(pops)*1 taulukon, jossa on laskettu korikohtaiset % logml:t koreille, jotka on määritelty pops-muuttujalla. global CQ_COUNTS; global CQ_SUMCOUNTS; global SP_COUNTS; global SP_SUMCOUNTS; cq_counts = double(CQ_COUNTS); cq_sumcounts = double(CQ_SUMCOUNTS); sp_counts = double(SP_COUNTS); sp_sumcounts = double(SP_SUMCOUNTS); nall_cq = size(CQ_COUNTS,1); nall_sp = size(SP_COUNTS, 1); ncliq = size(CQ_COUNTS,2); nsep = size(SP_COUNTS, 2); z = length(pops); popLogml_cq = ... squeeze(sum(sum(reshape(... gammaln(repmat(adjprior_cq,[1 1 z]) + cq_counts(:,:,pops)) ... ,[nall_cq ncliq z]),1),2)) - sum(gammaln(1+cq_sumcounts(pops,:)),2) - ... sum(sum(gammaln(adjprior_cq))); popLogml_sp = ... squeeze(sum(sum(reshape(... gammaln(repmat(adjprior_sp,[1 1 z]) + sp_counts(:,:,pops)) ... ,[nall_sp nsep z]),1),2)) - sum(gammaln(1+sp_sumcounts(pops,:)),2) - ... sum(sum(gammaln(adjprior_sp))); popLogml = popLogml_cq - popLogml_sp; clear cq_counts cq_sumcounts sp_counts sp_sumcounts; %------------------------------------------------------------------- function changesInLogml = writeMixtureInfo(logml, counts_cq, counts_sp, adjprior_cq, ... adjprior_sp, outPutFile, inputFile, partitionSummary, popnames, linkage_model, ... fixedK) global PARTITION; global CQ_COUNTS; global LOGDIFF; %global CQ_SUMCOUNTS; %global SP_COUNTS; global SP_SUMCOUNTS; ninds = length(PARTITION); npops = size(CQ_COUNTS,3); names = (size(popnames,1) == ninds); %Tarkistetaan ett?nimet viittaavat yksilöihin if length(outPutFile)>0 fid = fopen(outPutFile,'w'); else fid = -1; diary('baps4_output.baps'); % save in text anyway. end dispLine; disp('RESULTS OF INDIVIDUAL LEVEL MIXTURE ANALYSIS:'); disp(['Data file/ Linkage map: ' inputFile]); disp(['Model: ' linkage_model]); disp(['Number of clustered individuals: ' ownNum2Str(ninds)]); disp(['Number of groups in optimal partition: ' ownNum2Str(npops)]); disp(['Log(marginal likelihood) of optimal partition: ' ownNum2Str(logml)]); disp(' '); if (fid ~= -1) fprintf(fid,'%s \n', ['RESULTS OF INDIVIDUAL LEVEL MIXTURE ANALYSIS:']); fprintf(fid,'\n'); fprintf(fid,'%s \n', ['Data file: ' inputFile]); fprintf(fid,'\n'); fprintf(fid,'%s \n', ['Number of clustered individuals: ' ownNum2Str(ninds)]); fprintf(fid,'\n'); fprintf(fid,'%s \n', ['Number of groups in optimal partition: ' ownNum2Str(npops)]); fprintf(fid,'\n'); fprintf(fid,'%s \n', ['Log(marginal likelihood) of optimal partition: ' ownNum2Str(logml)]); fprintf(fid,'\n'); end cluster_count = length(unique(PARTITION)); disp('Best Partition: '); if (fid ~= -1) fprintf(fid,'%s \n','Best Partition: '); fprintf(fid,'\n'); end for m=1:cluster_count indsInM = find(PARTITION==m); length_of_beginning = 11 + floor(log10(m)); cluster_size = length(indsInM); if names text = ['Cluster ' num2str(m) ': {' char(popnames{indsInM(1)})]; for k = 2:cluster_size text = [text ', ' char(popnames{indsInM(k)})]; end; else text = ['Cluster ' num2str(m) ': {' num2str(indsInM(1))]; for k = 2:cluster_size text = [text ', ' num2str(indsInM(k))]; end; end text = [text '}']; while length(text)>58 %Take one line and display it. new_line = takeLine(text,58); text = text(length(new_line)+1:end); disp(new_line); if (fid ~= -1) fprintf(fid,'%s \n',new_line); fprintf(fid,'\n'); end if length(text)>0 text = [blanks(length_of_beginning) text]; else text = []; end; end; if ~isempty(text) disp(text); if (fid ~= -1) fprintf(fid,'%s \n',text); fprintf(fid,'\n'); end end; end if npops == 1 changesInLogml = []; else disp(' '); disp(' '); disp('Changes in log(marginal likelihood) if indvidual i is moved to group j:'); if (fid ~= -1) fprintf(fid, '%s \n', ' '); fprintf(fid, '\n'); fprintf(fid, '%s \n', ' '); fprintf(fid, '\n'); fprintf(fid, '%s \n', 'Changes in log(marginal likelihood) if indvidual i is moved to group j:'); fprintf(fid, '\n'); end if names nameSizes = zeros(ninds,1); for i = 1:ninds nimi = char(popnames{i}); nameSizes(i) = length(nimi); end maxSize = max(nameSizes); maxSize = max(maxSize, 5); erotus = maxSize - 5; alku = blanks(erotus); ekarivi = [alku ' ind' blanks(6+erotus)]; else ekarivi = ' ind '; end for i = 1:cluster_count ekarivi = [ekarivi ownNum2Str(i) blanks(8-floor(log10(i)))]; end disp(ekarivi); if (fid ~= -1) fprintf(fid, '%s \n', ekarivi); fprintf(fid, '\n'); end %ninds = size(data,1)/rowsFromInd; changesInLogml = LOGDIFF'; for ind = 1:ninds indCqCounts = uint16(counts_cq(:,:,ind)); indSpCounts = uint16(counts_sp(:,:,ind)); %changesInLogml(:,ind) = computeChanges(ind, adjprior_cq, ... % adjprior_sp, indCqCounts, indSpCounts); if names nimi = char(popnames{ind}); rivi = [blanks(maxSize - length(nimi)) nimi ':']; else rivi = [blanks(4-floor(log10(ind))) ownNum2Str(ind) ':']; end for j = 1:npops rivi = [rivi ' ' logml2String(omaRound(changesInLogml(j,ind)))]; end disp(rivi); if (fid ~= -1) fprintf(fid, '%s \n', rivi); fprintf(fid, '\n'); end end % % KL-divergence has to be calculated otherwise... % % { % disp(' '); disp(' '); % disp('KL-divergence matrix:'); % % if (fid ~= -1) % fprintf(fid, '%s \n', [' ']); fprintf(fid, '\n'); % fprintf(fid, '%s \n', [' ']); fprintf(fid, '\n'); % fprintf(fid, '%s \n', ['KL-divergence matrix:']); fprintf(fid, '\n'); % end % % maxnoalle = size(COUNTS,1); % nloci = size(COUNTS,2); % d = zeros(maxnoalle, nloci, npops); % prior = adjprior; % prior(find(prior==1))=0; % nollia = find(all(prior==0)); %Lokukset, joissa oli havaittu vain yht?alleelia. % prior(1,nollia)=1; % for pop1 = 1:npops % d(:,:,pop1) = (squeeze(COUNTS(:,:,pop1))+prior) ./ repmat(sum(squeeze(COUNTS(:,:,pop1))+prior),maxnoalle,1); % dist1(pop1) = (squeeze(COUNTS(:,:,pop1))+adjprior) ./ repmat((SUMCOUNTS(pop1,:)+adjprior), maxnoalle, 1); % end % ekarivi = blanks(7); % for pop = 1:npops % ekarivi = [ekarivi num2str(pop) blanks(7-floor(log10(pop)))]; % end % disp(ekarivi); % if (fid ~= -1) % fprintf(fid, '%s \n', [ekarivi]); fprintf(fid, '\n'); % end % % for pop1 = 1:npops % rivi = [blanks(2-floor(log10(pop1))) num2str(pop1) ' ']; % for pop2 = 1:pop1-1 % dist1 = d(:,:,pop1); dist2 = d(:,:,pop2); % div12 = sum(sum(dist1.*log2((dist1+10^-10) ./ (dist2+10^-10))))/nloci; % div21 = sum(sum(dist2.*log2((dist2+10^-10) ./ (dist1+10^-10))))/nloci; % div = (div12+div21)/2; % rivi = [rivi kldiv2str(div) ' ']; % end % disp(rivi); % if (fid ~= -1) % fprintf(fid, '%s \n', [rivi]); fprintf(fid, '\n'); % end % end % % } end disp(' '); disp(' '); disp('List of sizes of 10 best visited partitions and corresponding log(ml) values'); if (fid ~= -1) fprintf(fid, '%s \n', ' '); fprintf(fid, '\n'); fprintf(fid, '%s \n', ' '); fprintf(fid, '\n'); fprintf(fid, '%s \n', 'List of sizes of 10 best visited partitions and corresponding log(ml) values'); fprintf(fid, '\n'); end partitionSummary = sortrows(partitionSummary,2); partitionSummary = partitionSummary(size(partitionSummary,1):-1:1 , :); % partitionSummary = partitionSummary(find(partitionSummary(:,2)>-1e49),:); partitionSummary = partitionSummary(logical(partitionSummary(:,2)>-1e49),:); if size(partitionSummary,1)>10 vikaPartitio = 10; else vikaPartitio = size(partitionSummary,1); end for part = 1:vikaPartitio line = [num2str(partitionSummary(part,1)) ' ' num2str(partitionSummary(part,2))]; disp(line); if (fid ~= -1) fprintf(fid, '%s \n', line); fprintf(fid, '\n'); end end if ~fixedK disp(' '); disp(' '); disp('Probabilities for number of clusters'); if (fid ~= -1) fprintf(fid, '%s \n', ' '); fprintf(fid, '\n'); fprintf(fid, '%s \n', ' '); fprintf(fid, '\n'); fprintf(fid, '%s \n', 'Probabilities for number of clusters'); fprintf(fid, '\n'); end npopsTaulu = unique(partitionSummary(:,1)); len = length(npopsTaulu); probs = zeros(len,1); partitionSummary(:,2) = partitionSummary(:,2)-max(partitionSummary(:,2)); sumtn = sum(exp(partitionSummary(:,2))); for i=1:len % npopstn = sum(exp(partitionSummary(find(partitionSummary(:,1)==npopsTaulu(i)),2))); npopstn = sum(exp(partitionSummary(logical(partitionSummary(:,1)==npopsTaulu(i)),2))); probs(i) = npopstn / sumtn; end for i=1:len if probs(i)>1e-5 line = [num2str(npopsTaulu(i)) ' ' num2str(probs(i))]; disp(line); if (fid ~= -1) fprintf(fid, '%s \n', line); fprintf(fid, '\n'); end end end end if (fid ~= -1) fclose(fid); else diary off end %-------------------------------------------------------------- function newline = takeLine(description,width) %Returns one line from the description: line ends to the first %space after width:th mark. % newLine = description(1:width); n = width+1; while ~isspace(description(n)) && n=0 digit = rem(num, 10^(yks+1)); digit = floor(digit/(10^yks)); else digit = num*10; digit = floor(rem(digit,10)); end digit = num2str(digit); %------------------------------------------------------------------------- function [newData, rowsFromInd, alleleCodes, noalle, adjprior, priorTerm] = ... handleData(raw_data) % Alkuperäisen datan viimeinen sarake kertoo, milt?yksilölt? % kyseinen rivi on peräisin. Funktio tutkii ensin, ett?montako % rivi?maksimissaan on peräisin yhdelt?yksilölt? jolloin saadaan % tietää onko kyseess?haploidi, diploidi jne... Tämän jälkeen funktio % lisää tyhji?rivej?niille yksilöille, joilta on peräisin vähemmän % rivej?kuin maksimimäär? % Mikäli jonkin alleelin koodi on =0, funktio muuttaa tämän alleelin % koodi pienimmäksi koodiksi, joka isompi kuin mikään käytöss?oleva koodi. % Tämän jälkeen funktio muuttaa alleelikoodit siten, ett?yhden lokuksen j % koodit saavat arvoja välill?1,...,noalle(j). data = raw_data; nloci=size(raw_data,2)-1; dataApu = data(:,1:nloci); nollat = find(dataApu==0); if ~isempty(nollat) isoinAlleeli = max(max(dataApu)); dataApu(nollat) = isoinAlleeli+1; data(:,1:nloci) = dataApu; end % dataApu = []; % nollat = []; % isoinAlleeli = []; noalle=zeros(1,nloci); alleelitLokuksessa = cell(nloci,1); for i=1:nloci alleelitLokuksessaI = unique(data(:,i)); %alleelitLokuksessa{i,1} = alleelitLokuksessaI(find(alleelitLokuksessaI>=0)); alleelitLokuksessa{i,1} = alleelitLokuksessaI(logical(alleelitLokuksessaI>=0)); noalle(i) = length(alleelitLokuksessa{i,1}); end alleleCodes = zeros(max(noalle),nloci); for i=1:nloci alleelitLokuksessaI = alleelitLokuksessa{i,1}; puuttuvia = max(noalle)-length(alleelitLokuksessaI); alleleCodes(:,i) = [alleelitLokuksessaI; zeros(puuttuvia,1)]; end for loc = 1:nloci for all = 1:noalle(loc) % data(find(data(:,loc)==alleleCodes(all,loc)), loc)=all; data(logical(data(:,loc)==alleleCodes(all,loc)), loc)=all; end; end; nind = max(data(:,end)); nrows = size(data,1); ncols = size(data,2); rowsFromInd = zeros(nind,1); for i=1:nind rowsFromInd(i) = length(find(data(:,end)==i)); end maxRowsFromInd = max(rowsFromInd); a = -999; emptyRow = repmat(a, 1, ncols); lessThanMax = find(rowsFromInd < maxRowsFromInd); missingRows = maxRowsFromInd*nind - nrows; data = [data; zeros(missingRows, ncols)]; pointer = 1; for ind=lessThanMax' %Käy läpi ne yksilöt, joilta puuttuu rivej? miss = maxRowsFromInd-rowsFromInd(ind); % Tält?yksilölt?puuttuvien lkm. for j=1:miss rowToBeAdded = emptyRow; rowToBeAdded(end) = ind; data(nrows+pointer, :) = rowToBeAdded; pointer = pointer+1; end end data = sortrows(data, ncols); % Sorttaa yksilöiden mukaisesti newData = data; rowsFromInd = maxRowsFromInd; adjprior = zeros(max(noalle),nloci); priorTerm = 0; for j=1:nloci adjprior(:,j) = [repmat(1/noalle(j), [noalle(j),1]) ; ones(max(noalle)-noalle(j),1)]; priorTerm = priorTerm + noalle(j)*gammaln(1/noalle(j)); end %-------------------------------------------------------------------------- function [emptyPop, pops] = findEmptyPop(npops) % Palauttaa ensimmäisen tyhjän populaation indeksin. Jos tyhji? % populaatioita ei ole, palauttaa -1:n. global PARTITION; pops = unique(PARTITION)'; if (length(pops) ==npops) emptyPop = -1; else popDiff = diff([0 pops npops+1]); emptyPop = min(find(popDiff > 1)); end %-------------------------------------------------------------------------- function popnames = fixPopnames(popnames, ninds) if length(popnames) == ninds for i=1:ninds if isnumeric(popnames{i}) popnames{i} = num2str(popnames{i}); % popnames(i) = num2str(popnames{i}); end popnames{i} = cellstr(popnames{i}); % popnames(i) = cellstr(popnames{i}); end end %-------------------------------------------------------------------------- function isRational = isTheLoadedNewDataRational(data) % The last column of the data must include numbers 1-npops % If so, isRational = 1, otherwise isRational = 0. % The row numbers must be larger than 1. if size(data,1) == 1 isRational = 0; display('*** ERROR: Sample size must be larger than one'); return; end last_column = data(:,end); last_column = sort(last_column); current = 1; if last_column(1) ~= current isRational = 0; display('*** ERROR: Wrong Indexes in the data'); return; end; lengthcol = length(last_column); for n = 2:lengthcol if ~(last_column(n) == current || last_column(n) == current + 1) %Some population is missing from the last column isRational = 0; display('*** ERROR: Missing indexes in the data'); return; end; current = last_column(n); end; isRational = 1; % %------------------------------------------------------------------------- % function isRational = isTheLoadedNewLinkageRational(linkage_data) % % Each positive element must be unique. % % If so, isRational = 1, otherwise isRational = 0; % nonzero = find(linkage_data~=0); % dif = diff(linkage_data(nonzero)); % if ~all(dif) % isRational = 0; return; % end; % isRational = 1; %-------------------------------------------------------------------------- function [sumcounts, counts] = ... indLociCounts(partition, data, npops, noalle) nloci=size(data,2)-1; % ninds = size(data,1); counts = zeros(max(noalle),nloci,npops); sumcounts = zeros(npops,nloci); for i=1:npops for j=1:nloci % havainnotLokuksessa = find(partition==i & data(:,j)>=0); havainnotLokuksessa = find(ismember(data(:,end),find(partition==i))); sumcounts(i,j) = length(havainnotLokuksessa); for k=1:noalle(j) alleleCode = k; N_ijk = length(find(data(havainnotLokuksessa,j)==alleleCode)); counts(k,j,i) = N_ijk; end end end %----------------------------------------------------------------------------------- function popnames = initPopNames(nameFile, indexFile) %Palauttaa tyhjän, mikäli nimitiedosto ja indeksitiedosto % eivät olleet yht?pitki? popnames = []; indices = load(indexFile); fid = fopen(nameFile); if fid == -1 %File didn't exist msgbox('Loading of the population names was unsuccessful', ... 'Error', 'error'); return; end; line = fgetl(fid); counter = 1; while (line ~= -1) && ~isempty(line) names{counter} = line; line = fgetl(fid); counter = counter + 1; end; fclose(fid); if length(names) ~= length(indices) disp('The number of population names must be equal to the number of '); disp('entries in the file specifying indices of the first individuals of '); disp('each population.'); return; end popnames = cell(length(names), 2); for i = 1:length(names) popnames{i,1} = names(i); popnames{i,2} = indices(i); end