diff --git a/matlab/independent/greedyPopMix.m b/matlab/independent/greedyPopMix.m index e2cd225..4618135 100644 --- a/matlab/independent/greedyPopMix.m +++ b/matlab/independent/greedyPopMix.m @@ -312,17 +312,6 @@ for j=1:nloci end -%---------------------------------------------------------------- - - -function clearGlobalVars - -global COUNTS; COUNTS = []; -global SUMCOUNTS; SUMCOUNTS = []; -global PARTITION; PARTITION = []; -global POP_LOGML; POP_LOGML = []; - - %-------------------------------------------------------------------- function [Z,distances] = getPopDistancesByKL(adjprior) @@ -356,180 +345,6 @@ for pop1 = 1:npops-1 end Z=linkage(distances'); -%-------------------------------------------------------------------------- - - -function Z = linkage(Y, method) -[k, n] = size(Y); -m = (1+sqrt(1+8*n))/2; -if k ~= 1 | m ~= fix(m) - error('The first input has to match the output of the PDIST function in size.'); -end -if nargin == 1 % set default switch to be 'co' - method = 'co'; -end -method = lower(method(1:2)); % simplify the switch string. -monotonic = 1; -Z = zeros(m-1,3); % allocate the output matrix. -N = zeros(1,2*m-1); -N(1:m) = 1; -n = m; % since m is changing, we need to save m in n. -R = 1:n; -for s = 1:(n-1) - X = Y; - [v, k] = min(X); - i = floor(m+1/2-sqrt(m^2-m+1/4-2*(k-1))); - j = k - (i-1)*(m-i/2)+i; - Z(s,:) = [R(i) R(j) v]; % update one more row to the output matrix A - I1 = 1:(i-1); I2 = (i+1):(j-1); I3 = (j+1):m; % these are temp variables. - U = [I1 I2 I3]; - I = [I1.*(m-(I1+1)/2)-m+i i*(m-(i+1)/2)-m+I2 i*(m-(i+1)/2)-m+I3]; - J = [I1.*(m-(I1+1)/2)-m+j I2.*(m-(I2+1)/2)-m+j j*(m-(j+1)/2)-m+I3]; - - switch method - case 'si' %single linkage - Y(I) = min(Y(I),Y(J)); - case 'av' % average linkage - Y(I) = Y(I) + Y(J); - case 'co' %complete linkage - Y(I) = max(Y(I),Y(J)); - case 'ce' % centroid linkage - K = N(R(i))+N(R(j)); - Y(I) = (N(R(i)).*Y(I)+N(R(j)).*Y(J)-(N(R(i)).*N(R(j))*v^2)./K)./K; - case 'wa' - Y(I) = ((N(R(U))+N(R(i))).*Y(I) + (N(R(U))+N(R(j))).*Y(J) - ... - N(R(U))*v)./(N(R(i))+N(R(j))+N(R(U))); - end - J = [J i*(m-(i+1)/2)-m+j]; - Y(J) = []; % no need for the cluster information about j. - - % update m, N, R - m = m-1; - N(n+s) = N(R(i)) + N(R(j)); - R(i) = n+s; - R(j:(n-1))=R((j+1):n); -end - - -%----------------------------------------------------------------------- - - -function [sumcounts, counts, logml] = ... - initialPopCounts(data, npops, rows, noalle, adjprior) - -nloci=size(data,2); -counts = zeros(max(noalle),nloci,npops); -sumcounts = zeros(npops,nloci); - -for i=1:npops - for j=1:nloci - i_rivit = rows(i,1):rows(i,2); - havainnotLokuksessa = find(data(i_rivit,j)>=0); - sumcounts(i,j) = length(havainnotLokuksessa); - for k=1:noalle(j) - alleleCode = k; - N_ijk = length(find(data(i_rivit,j)==alleleCode)); - counts(k,j,i) = N_ijk; - end - end -end - -logml = laskeLoggis(counts, sumcounts, adjprior); - - -%----------------------------------------------------------------------- - - -function loggis = laskeLoggis(counts, sumcounts, adjprior) -npops = size(counts,3); - -logml2 = sum(sum(sum(gammaln(counts+repmat(adjprior,[1 1 npops]))))) ... - - npops*sum(sum(gammaln(adjprior))) - ... - sum(sum(gammaln(1+sumcounts))); -loggis = logml2; - - -%-------------------------------------------------------------------- - - -function kunnossa = testaaGenePopData(tiedostonNimi) -% kunnossa == 0, jos data ei ole kelvollinen genePop data. -% Muussa tapauksessa kunnossa == 1. - -kunnossa = 0; -fid = fopen(tiedostonNimi); -line1 = fgetl(fid); %ensimmäinen rivi -line2 = fgetl(fid); %toinen rivi -line3 = fgetl(fid); %kolmas - -if (isequal(line1,-1) | isequal(line2,-1) | isequal(line3,-1)) - disp('Incorrect file format'); fclose(fid); - return -end -if (testaaPop(line1)==1 | testaaPop(line2)==1) - disp('Incorrect file format'); fclose(fid); - return -end -if testaaPop(line3)==1 - %2 rivi tällöin lokusrivi - nloci = rivinSisaltamienMjonojenLkm(line2); - line4 = fgetl(fid); - if isequal(line4,-1) - disp('Incorrect file format'); fclose(fid); - return - end - if ~any(line4==',') - % Rivin nelj?täytyy sisältää pilkku. - disp('Incorrect file format'); fclose(fid); - return - end - pointer = 1; - while ~isequal(line4(pointer),',') %Tiedetään, ett?pysähtyy - pointer = pointer+1; - end - line4 = line4(pointer+1:end); %pilkun jälkeinen osa - nloci2 = rivinSisaltamienMjonojenLkm(line4); - if (nloci2~=nloci) - disp('Incorrect file format'); fclose(fid); - return - end -else - line = fgetl(fid); - lineNumb = 4; - while (testaaPop(line)~=1 & ~isequal(line,-1)) - line = fgetl(fid); - lineNumb = lineNumb+1; - end - if isequal(line,-1) - disp('Incorrect file format'); fclose(fid); - return - end - nloci = lineNumb-2; - line4 = fgetl(fid); %Eka rivi pop sanan jälkeen - if isequal(line4,-1) - disp('Incorrect file format'); fclose(fid); - return - end - if ~any(line4==',') - % Rivin täytyy sisältää pilkku. - disp('Incorrect file format'); fclose(fid); - return - end - pointer = 1; - while ~isequal(line4(pointer),',') %Tiedetään, ett?pysähtyy. - pointer = pointer+1; - end - - line4 = line4(pointer+1:end); %pilkun jälkeinen osa - nloci2 = rivinSisaltamienMjonojenLkm(line4); - if (nloci2~=nloci) - disp('Incorrect file format'); fclose(fid); - return - end -end -kunnossa = 1; -fclose(fid); - %-------------------------------------------------------------------- @@ -610,558 +425,6 @@ for pop = 1:npops end end -%------------------------------------------------------- - -function nimi = lueNimi(line) -%Palauttaa line:n alusta sen osan, joka on ennen pilkkua. -n = 1; -merkki = line(n); -nimi = ''; -while ~isequal(merkki,',') - nimi = [nimi merkki]; - n = n+1; - merkki = line(n); -end - -%------------------------------------------------------- - -function df = selvitaDigitFormat(line) -% line on ensimmäinen pop-sanan jälkeinen rivi -% Genepop-formaatissa olevasta datasta. funktio selvittää -% rivin muodon perusteella, ovatko datan alleelit annettu -% 2 vai 3 numeron avulla. - -n = 1; -merkki = line(n); -while ~isequal(merkki,',') - n = n+1; - merkki = line(n); -end - -while ~any(merkki == '0123456789'); - n = n+1; - merkki = line(n); -end -numeroja = 0; -while any(merkki == '0123456789'); - numeroja = numeroja+1; - n = n+1; - merkki = line(n); -end - -df = numeroja/2; - - -%------------------------------------------------------ - - -function count = rivinSisaltamienMjonojenLkm(line) -% Palauttaa line:n sisältämien mjonojen lukumäärän. -% Mjonojen väliss?täytyy olla välilyönti. -count = 0; -pit = length(line); -tila = 0; %0, jos odotetaan välilyöntej? 1 jos odotetaan muita merkkej? -for i=1:pit - merkki = line(i); - if (isspace(merkki) & tila==0) - %Ei tehd?mitään. - elseif (isspace(merkki) & tila==1) - tila = 0; - elseif (~isspace(merkki) & tila==0) - tila = 1; - count = count+1; - elseif (~isspace(merkki) & tila==1) - %Ei tehd?mitään - end -end - -%------------------------------------------------------- - -function pal = testaaPop(rivi) -% pal=1, mikäli rivi alkaa jollain seuraavista -% kirjainyhdistelmist? Pop, pop, POP. Kaikissa muissa -% tapauksissa pal=0. - -if length(rivi)<3 - pal = 0; - return -end -if (all(rivi(1:3)=='Pop') | ... - all(rivi(1:3)=='pop') | ... - all(rivi(1:3)=='POP')) - pal = 1; - return -else - pal = 0; - return -end - - -%-------------------------------------------------------- - - -function data = addAlleles(data, ind, line, divider) -% Lisaa BAPS-formaatissa olevaan datataulukkoon -% yksilöä ind vastaavat rivit. Yksilön alleelit -% luetaan genepop-formaatissa olevasta rivist? -% line. Jos data on 3 digit formaatissa on divider=1000. -% Jos data on 2 digit formaatissa on divider=100. - -nloci = size(data,2)-1; -if size(data,1) < 2*ind - data = [data; zeros(100,nloci+1)]; -end - -k=1; -merkki=line(k); -while ~isequal(merkki,',') - k=k+1; - merkki=line(k); -end -line = line(k+1:end); -clear k; clear merkki; - -alleeliTaulu = sscanf(line,'%d'); - -if length(alleeliTaulu)~=nloci - disp('Incorrect data format.'); -end - -for j=1:nloci - ekaAlleeli = floor(alleeliTaulu(j)/divider); - if ekaAlleeli==0 ekaAlleeli=-999; end; - tokaAlleeli = rem(alleeliTaulu(j),divider); - if tokaAlleeli==0 tokaAlleeli=-999; end - - data(2*ind-1,j) = ekaAlleeli; - data(2*ind,j) = tokaAlleeli; -end - -data(2*ind-1,end) = ind; -data(2*ind,end) = ind; - -%------------------------------------------------------------------------------------ - - -function popLogml = computePopulationLogml(pops, adjprior, priorTerm) -% Palauttaa length(pops)*1 taulukon, jossa on laskettu korikohtaiset -% logml:t koreille, jotka on määritelty pops-muuttujalla. - -global COUNTS; -global SUMCOUNTS; -x = size(COUNTS,1); -y = size(COUNTS,2); -z = length(pops); - -popLogml = ... - squeeze(sum(sum(reshape(... - gammaln(repmat(adjprior,[1 1 length(pops)]) + COUNTS(:,:,pops)) ... - ,[x y z]),1),2)) - sum(gammaln(1+SUMCOUNTS(pops,:)),2) - priorTerm; - - -%-------------------------------------------------------------------------- - - -function [muutokset, diffInCounts] = ... - laskeMuutokset(ind, globalRows, data, adjprior, priorTerm) -% Palauttaa npops*1 taulun, jossa i:s alkio kertoo, mik?olisi -% muutos logml:ss? mikäli yksil?ind siirretään koriin i. -% diffInCounts on poistettava COUNTS:in siivusta i1 ja lisättäv? -% COUNTS:in siivuun i2, mikäli muutos toteutetaan. - -global COUNTS; global SUMCOUNTS; -global PARTITION; global POP_LOGML; -npops = size(COUNTS,3); -muutokset = zeros(npops,1); - -i1 = PARTITION(ind); -i1_logml = POP_LOGML(i1); - -rows = globalRows(ind,1):globalRows(ind,2); -diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data); -diffInSumCounts = sum(diffInCounts); - -COUNTS(:,:,i1) = COUNTS(:,:,i1)-diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)-diffInSumCounts; -new_i1_logml = computePopulationLogml(i1, adjprior, priorTerm); -COUNTS(:,:,i1) = COUNTS(:,:,i1)+diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)+diffInSumCounts; - -i2 = [1:i1-1 , i1+1:npops]; -i2_logml = POP_LOGML(i2); - -COUNTS(:,:,i2) = COUNTS(:,:,i2)+repmat(diffInCounts, [1 1 npops-1]); -SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)+repmat(diffInSumCounts,[npops-1 1]); -new_i2_logml = computePopulationLogml(i2, adjprior, priorTerm); -COUNTS(:,:,i2) = COUNTS(:,:,i2)-repmat(diffInCounts, [1 1 npops-1]); -SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)-repmat(diffInSumCounts,[npops-1 1]); - -muutokset(i2) = new_i1_logml - i1_logml ... - + new_i2_logml - i2_logml; - - -%---------------------------------------------------------------------- - - -function diffInCounts = computeDiffInCounts(rows, max_noalle, nloci, data) -% Muodostaa max_noalle*nloci taulukon, jossa on niiden alleelien -% lukumäärät (vastaavasti kuin COUNTS:issa), jotka ovat data:n -% riveill?rows. rows pitää olla vaakavektori. - -diffInCounts = zeros(max_noalle, nloci); -for i=rows - row = data(i,:); - notEmpty = find(row>=0); - - if length(notEmpty)>0 - diffInCounts(row(notEmpty) + (notEmpty-1)*max_noalle) = ... - diffInCounts(row(notEmpty) + (notEmpty-1)*max_noalle) + 1; - end -end - -%------------------------------------------------------------------------ - - -%------------------------------------------------------------------------------------- - - -function updateGlobalVariables(ind, i2, diffInCounts, ... - adjprior, priorTerm) -% Suorittaa globaalien muuttujien muutokset, kun yksil?ind -% on siirretään koriin i2. - -global PARTITION; -global COUNTS; -global SUMCOUNTS; -global POP_LOGML; - -i1 = PARTITION(ind); -PARTITION(ind)=i2; - -COUNTS(:,:,i1) = COUNTS(:,:,i1) - diffInCounts; -COUNTS(:,:,i2) = COUNTS(:,:,i2) + diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:) - sum(diffInCounts); -SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:) + sum(diffInCounts); - -POP_LOGML([i1 i2]) = computePopulationLogml([i1 i2], adjprior, priorTerm); - - -%-------------------------------------------------------------------------- -%-- - -%------------------------------------------------------------------------------------ - - -function [muutokset, diffInCounts] = laskeMuutokset2( ... - i1, globalRows, data, adjprior, priorTerm); -% Palauttaa npops*1 taulun, jossa i:s alkio kertoo, mik?olisi -% muutos logml:ss? mikäli korin i1 kaikki yksilöt siirretään -% koriin i. - -global COUNTS; global SUMCOUNTS; -global PARTITION; global POP_LOGML; -npops = size(COUNTS,3); -muutokset = zeros(npops,1); - -i1_logml = POP_LOGML(i1); - -inds = find(PARTITION==i1); -ninds = length(inds); - -if ninds==0 - diffInCounts = zeros(size(COUNTS,1), size(COUNTS,2)); - return; -end - -rows = []; -for i = 1:ninds - ind = inds(i); - lisa = globalRows(ind,1):globalRows(ind,2); - rows = [rows; lisa']; - %rows = [rows; globalRows{ind}']; -end - -diffInCounts = computeDiffInCounts(rows', size(COUNTS,1), size(COUNTS,2), data); -diffInSumCounts = sum(diffInCounts); - -COUNTS(:,:,i1) = COUNTS(:,:,i1)-diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)-diffInSumCounts; -new_i1_logml = computePopulationLogml(i1, adjprior, priorTerm); -COUNTS(:,:,i1) = COUNTS(:,:,i1)+diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)+diffInSumCounts; - -i2 = [1:i1-1 , i1+1:npops]; -i2_logml = POP_LOGML(i2); - -COUNTS(:,:,i2) = COUNTS(:,:,i2)+repmat(diffInCounts, [1 1 npops-1]); -SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)+repmat(diffInSumCounts,[npops-1 1]); -new_i2_logml = computePopulationLogml(i2, adjprior, priorTerm); -COUNTS(:,:,i2) = COUNTS(:,:,i2)-repmat(diffInCounts, [1 1 npops-1]); -SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)-repmat(diffInSumCounts,[npops-1 1]); - -muutokset(i2) = new_i1_logml - i1_logml ... - + new_i2_logml - i2_logml; - - -%--------------------------------------------------------------------------------- - - -function updateGlobalVariables2( ... - i1, i2, diffInCounts, adjprior, priorTerm); -% Suorittaa globaalien muuttujien muutokset, kun kaikki -% korissa i1 olevat yksilöt siirretään koriin i2. - -global PARTITION; -global COUNTS; -global SUMCOUNTS; -global POP_LOGML; - -inds = find(PARTITION==i1); -PARTITION(inds) = i2; - -COUNTS(:,:,i1) = COUNTS(:,:,i1) - diffInCounts; -COUNTS(:,:,i2) = COUNTS(:,:,i2) + diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:) - sum(diffInCounts); -SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:) + sum(diffInCounts); - -POP_LOGML(i1) = 0; -POP_LOGML(i2) = computePopulationLogml(i2, adjprior, priorTerm); - - -%-------------------------------------------------------------------------- -%---- - -function muutokset = laskeMuutokset3(T2, inds2, globalRows, ... - data, adjprior, priorTerm, i1) -% Palauttaa length(unique(T2))*npops taulun, jossa (i,j):s alkio -% kertoo, mik?olisi muutos logml:ss? jos populaation i1 osapopulaatio -% inds2(find(T2==i)) siirretään koriin j. - -global COUNTS; global SUMCOUNTS; -global PARTITION; global POP_LOGML; -npops = size(COUNTS,3); -npops2 = length(unique(T2)); -muutokset = zeros(npops2, npops); - -i1_logml = POP_LOGML(i1); -for pop2 = 1:npops2 - inds = inds2(find(T2==pop2)); - ninds = length(inds); - if ninds>0 - rows = []; - for i = 1:ninds - ind = inds(i); - lisa = globalRows(ind,1):globalRows(ind,2); - rows = [rows; lisa']; - %rows = [rows; globalRows{ind}']; - end - diffInCounts = computeDiffInCounts(rows', size(COUNTS,1), size(COUNTS,2), data); - diffInSumCounts = sum(diffInCounts); - - COUNTS(:,:,i1) = COUNTS(:,:,i1)-diffInCounts; - SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)-diffInSumCounts; - new_i1_logml = computePopulationLogml(i1, adjprior, priorTerm); - COUNTS(:,:,i1) = COUNTS(:,:,i1)+diffInCounts; - SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)+diffInSumCounts; - - i2 = [1:i1-1 , i1+1:npops]; - i2_logml = POP_LOGML(i2)'; - - COUNTS(:,:,i2) = COUNTS(:,:,i2)+repmat(diffInCounts, [1 1 npops-1]); - SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)+repmat(diffInSumCounts,[npops-1 1]); - new_i2_logml = computePopulationLogml(i2, adjprior, priorTerm)'; - COUNTS(:,:,i2) = COUNTS(:,:,i2)-repmat(diffInCounts, [1 1 npops-1]); - SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)-repmat(diffInSumCounts,[npops-1 1]); - - muutokset(pop2,i2) = new_i1_logml - i1_logml ... - + new_i2_logml - i2_logml; - end -end - -%------------------------------------------------------------------------------------ - -function muutokset = laskeMuutokset5(inds, globalRows, data, adjprior, ... - priorTerm, i1, i2) - -% Palauttaa length(inds)*1 taulun, jossa i:s alkio kertoo, mik?olisi -% muutos logml:ss? mikäli yksil?i vaihtaisi koria i1:n ja i2:n välill? - -global COUNTS; global SUMCOUNTS; -global PARTITION; global POP_LOGML; - -ninds = length(inds); -muutokset = zeros(ninds,1); - -i1_logml = POP_LOGML(i1); -i2_logml = POP_LOGML(i2); - -for i = 1:ninds - ind = inds(i); - if PARTITION(ind)==i1 - pop1 = i1; %mist? - pop2 = i2; %mihin - else - pop1 = i2; - pop2 = i1; - end - rows = globalRows(ind,1):globalRows(ind,2); - diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data); - diffInSumCounts = sum(diffInCounts); - - COUNTS(:,:,pop1) = COUNTS(:,:,pop1)-diffInCounts; - SUMCOUNTS(pop1,:) = SUMCOUNTS(pop1,:)-diffInSumCounts; - COUNTS(:,:,pop2) = COUNTS(:,:,pop2)+diffInCounts; - SUMCOUNTS(pop2,:) = SUMCOUNTS(pop2,:)+diffInSumCounts; - - new_logmls = computePopulationLogml([i1 i2], adjprior, priorTerm); - muutokset(i) = sum(new_logmls); - - COUNTS(:,:,pop1) = COUNTS(:,:,pop1)+diffInCounts; - SUMCOUNTS(pop1,:) = SUMCOUNTS(pop1,:)+diffInSumCounts; - COUNTS(:,:,pop2) = COUNTS(:,:,pop2)-diffInCounts; - SUMCOUNTS(pop2,:) = SUMCOUNTS(pop2,:)-diffInSumCounts; -end - -muutokset = muutokset - i1_logml - i2_logml; - -%------------------------------------------------------------------------------------ - - -function updateGlobalVariables3(muuttuvat, diffInCounts, ... - adjprior, priorTerm, i2); -% Suorittaa globaalien muuttujien päivitykset, kun yksilöt 'muuttuvat' -% siirretään koriin i2. Ennen siirtoa yksilöiden on kuuluttava samaan -% koriin. - -global PARTITION; -global COUNTS; -global SUMCOUNTS; -global POP_LOGML; - -i1 = PARTITION(muuttuvat(1)); -PARTITION(muuttuvat) = i2; - -COUNTS(:,:,i1) = COUNTS(:,:,i1) - diffInCounts; -COUNTS(:,:,i2) = COUNTS(:,:,i2) + diffInCounts; -SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:) - sum(diffInCounts); -SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:) + sum(diffInCounts); - -POP_LOGML([i1 i2]) = computePopulationLogml([i1 i2], adjprior, priorTerm); - - -%---------------------------------------------------------------------------- - - -function dist2 = laskeOsaDist(inds2, dist, ninds) -% Muodostaa dist vektorista osavektorin, joka sisältää yksilöiden inds2 -% väliset etäisyydet. ninds=kaikkien yksilöiden lukumäär? - -ninds2 = length(inds2); -apu = zeros(nchoosek(ninds2,2),2); -rivi = 1; -for i=1:ninds2-1 - for j=i+1:ninds2 - apu(rivi, 1) = inds2(i); - apu(rivi, 2) = inds2(j); - rivi = rivi+1; - end -end -apu = (apu(:,1)-1).*ninds - apu(:,1) ./ 2 .* (apu(:,1)-1) + (apu(:,2)-apu(:,1)); -dist2 = dist(apu); - - -%----------------------------------------------------------------------------------- - - -function npops = poistaTyhjatPopulaatiot(npops) -% Poistaa tyhjentyneet populaatiot COUNTS:ista ja -% SUMCOUNTS:ista. Päivittää npops:in ja PARTITION:in. - -global COUNTS; -global SUMCOUNTS; -global PARTITION; - -notEmpty = find(any(SUMCOUNTS,2)); -COUNTS = COUNTS(:,:,notEmpty); -SUMCOUNTS = SUMCOUNTS(notEmpty,:); - -for n=1:length(notEmpty) - apu = find(PARTITION==notEmpty(n)); - PARTITION(apu)=n; -end -npops = length(notEmpty); - - -%----------------------------------------------------------------------------------- - - -function popnames = initPopNames(nameFile) - -fid = fopen(nameFile); -if fid == -1 - %File didn't exist - msgbox('Loading of the population names was unsuccessful', ... - 'Error', 'error'); - return; -end; -line = fgetl(fid); -counter = 1; -while (line ~= -1) & ~isempty(line) - names{counter} = line; - line = fgetl(fid); - counter = counter + 1; -end; -fclose(fid); - -popnames = cell(length(names), 2); -for i = 1:length(names) - popnames{i,1} = names(i); - popnames{i,2} = 0; -end - - -%------------------------------------------------------------------------- - - -function [popnames2, rowsFromInd] = findOutRowsFromInd(popnames, rows) - -ploidisuus = questdlg('Specify the type of individuals in the data: ',... - 'Individual type?', 'Haploid', 'Diploid', 'Tetraploid', ... - 'Diploid'); - -switch ploidisuus -case 'Haploid' - rowsFromInd = 1; -case 'Diploid' - rowsFromInd = 2; -case 'Tetraploid' - rowsFromInd = 4; -end - -if ~isempty(popnames) - for i = 1:size(rows,1) - popnames2{i,1} = popnames{i,1}; - rivi = rows(i,1):rows(i,2); - popnames2{i,2} = (rivi(rowsFromInd))/rowsFromInd; - end -else - popnames2 = []; -end - -%-------------------------------------------------------------------- - - -function newline = takeLine(description,width) -%Returns one line from the description: line ends to the first -%space after width:th mark. -newLine = description(1:width); -n = width+1; -while ~isspace(description(n)) & n=0 - digit = rem(num, 10^(yks+1)); - digit = floor(digit/(10^yks)); -else - digit = num*10; - digit = floor(rem(digit,10)); -end -digit = num2str(digit); - - -function mjono = kldiv2str(div) -mjono = ' '; -if abs(div)<100 - %Ei tarvita e-muotoa - mjono(6) = num2str(rem(floor(div*1000),10)); - mjono(5) = num2str(rem(floor(div*100),10)); - mjono(4) = num2str(rem(floor(div*10),10)); - mjono(3) = '.'; - mjono(2) = num2str(rem(floor(div),10)); - arvo = rem(floor(div/10),10); - if arvo>0 - mjono(1) = num2str(arvo); - end - -else - suurinYks = floor(log10(div)); - mjono(6) = num2str(suurinYks); - mjono(5) = 'e'; - mjono(4) = palautaYks(abs(div),suurinYks-1); - mjono(3) = '.'; - mjono(2) = palautaYks(abs(div),suurinYks); -end - - -%----------------------------------------------- - - -function ninds = testaaOnkoKunnollinenBapsData(data) -%Tarkastaa onko viimeisess?sarakkeessa kaikki -%luvut 1,2,...,n johonkin n:ään asti. -%Tarkastaa lisäksi, ett?on vähintään 2 saraketta. -if size(data,1)<2 - ninds = 0; return; -end -lastCol = data(:,end); -ninds = max(lastCol); -if ~isequal((1:ninds)',unique(lastCol)) - ninds = 0; return; -end - -%-------------------------------------------------------------------------- -%Seuraavat kolme funktiota liittyvat alkupartition muodostamiseen. - -function initial_partition=admixture_initialization(data_matrix,nclusters, Z) - -size_data=size(data_matrix); -nloci=size_data(2)-1; -n=max(data_matrix(:,end)); -T=cluster_own(Z,nclusters); -initial_partition=zeros(size_data(1),1); -for i=1:n - kori=T(i); - here=find(data_matrix(:,end)==i); - for j=1:length(here) - initial_partition(here(j),1)=kori; - end -end - -function T = cluster_own(Z,nclust) -true=logical(1); -false=logical(0); -maxclust = nclust; -% Start of algorithm -m = size(Z,1)+1; -T = zeros(m,1); - % maximum number of clusters based on inconsistency - if m <= maxclust - T = (1:m)'; - elseif maxclust==1 - T = ones(m,1); - else - clsnum = 1; - for k = (m-maxclust+1):(m-1) - i = Z(k,1); % left tree - if i <= m % original node, no leafs - T(i) = clsnum; - clsnum = clsnum + 1; - elseif i < (2*m-maxclust+1) % created before cutoff, search down the tree - T = clusternum(Z, T, i-m, clsnum); - clsnum = clsnum + 1; - end - i = Z(k,2); % right tree - if i <= m % original node, no leafs - T(i) = clsnum; - clsnum = clsnum + 1; - elseif i < (2*m-maxclust+1) % created before cutoff, search down the tree - T = clusternum(Z, T, i-m, clsnum); - clsnum = clsnum + 1; - end - end - end - -function T = clusternum(X, T, k, c) -m = size(X,1)+1; -while(~isempty(k)) - % Get the children of nodes at this level - children = X(k,1:2); - children = children(:); - - % Assign this node number to leaf children - t = (children<=m); - T(children(t)) = c; - - % Move to next level - k = children(~t) - m; -end - -%-------------------------------------------------------------------------- -function [logml] = ... - initialCounts(partition, data, npops, rows, noalle, adjprior) - -nloci=size(data,2); -ninds = size(rows, 1); - -%koot = rows(:,1) - rows(:,2) + 1; -%maxSize = max(koot); - -counts = zeros(max(noalle),nloci,npops); -sumcounts = zeros(npops,nloci); -for i=1:npops - for j=1:nloci - - havainnotLokuksessa = find(partition==i & data(:,j)>=0); - sumcounts(i,j) = length(havainnotLokuksessa); - for k=1:noalle(j) - alleleCode = k; - N_ijk = length(find(data(havainnotLokuksessa,j)==alleleCode)); - counts(k,j,i) = N_ijk; - end - end -end - - -%initializeGammaln(ninds, maxSize, max(noalle)); - -logml = laskeLoggis(counts, sumcounts, adjprior); - - -%-------------------------------------------------------------------------- - - -function [partitionSummary, added] = addToSummary(logml, partitionSummary, worstIndex) -% Tiedetään, ett?annettu logml on isompi kuin huonoin arvo -% partitionSummary taulukossa. Jos partitionSummary:ss?ei viel?ole -% annettua logml arvoa, niin lisätään worstIndex:in kohtaan uusi logml ja -% nykyist?partitiota vastaava nclusters:in arvo. Muutoin ei tehd?mitään. - -apu = find(abs(partitionSummary(:,2)-logml)<1e-5); -if isempty(apu) - % Nyt löydetty partitio ei ole viel?kirjattuna summaryyn. - global PARTITION; - npops = length(unique(PARTITION)); - partitionSummary(worstIndex,1) = npops; - partitionSummary(worstIndex,2) = logml; - added = 1; -else - added = 0; -end - -%-------------------------------------------------------------------------- - -function inds = returnInOrder(inds, pop, globalRows, data, ... - adjprior, priorTerm) -% Palauttaa yksilöt järjestyksess?siten, ett?ensimmäisen?on -% se, jonka poistaminen populaatiosta pop nostaisi logml:n -% arvoa eniten. - -global COUNTS; global SUMCOUNTS; -ninds = length(inds); -apuTaulu = [inds, zeros(ninds,1)]; - -for i=1:ninds - ind =inds(i); - rows = globalRows(i,1):globalRows(i,2); - diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data); - diffInSumCounts = sum(diffInCounts); - - COUNTS(:,:,pop) = COUNTS(:,:,pop)-diffInCounts; - SUMCOUNTS(pop,:) = SUMCOUNTS(pop,:)-diffInSumCounts; - apuTaulu(i, 2) = computePopulationLogml(pop, adjprior, priorTerm); - COUNTS(:,:,pop) = COUNTS(:,:,pop)+diffInCounts; - SUMCOUNTS(pop,:) = SUMCOUNTS(pop,:)+diffInSumCounts; -end -apuTaulu = sortrows(apuTaulu,2); -inds = apuTaulu(ninds:-1:1,1); - -%-------------------------------------------------------------------------- - -function [emptyPop, pops] = findEmptyPop(npops) -% Palauttaa ensimmäisen tyhjän populaation indeksin. Jos tyhji? -% populaatioita ei ole, palauttaa -1:n. - -global PARTITION; -pops = unique(PARTITION)'; -if (length(pops) ==npops) - emptyPop = -1; -else - popDiff = diff([0 pops npops+1]); - emptyPop = min(find(popDiff > 1)); -end