Translated handlePopData (#2)

This commit is contained in:
Waldir Leoncio 2022-04-12 14:31:54 +02:00
parent 9224c44cb6
commit 03db7f4124
4 changed files with 86 additions and 94 deletions

View file

@ -251,100 +251,6 @@ else
end
end
%--------------------------------------------------------------------------
function [newData, rows, alleleCodes, noalle, adjprior, priorTerm] = handlePopData(raw_data)
% Alkuperäisen datan viimeinen sarake kertoo, milt?yksilölt?
% kyseinen rivi on peräisin. Funktio muuttaa alleelikoodit
% siten, ett?yhden lokuksen j koodit saavat arvoja
% välill?1,...,noalle(j). Ennen tät?muutosta alleeli, jonka
% koodi on nolla muutetaan.
data = raw_data;
nloci=size(raw_data,2)-1;
dataApu = data(:,1:nloci);
nollat = find(dataApu==0);
if ~isempty(nollat)
isoinAlleeli = max(max(dataApu));
dataApu(nollat) = isoinAlleeli+1;
data(:,1:nloci) = dataApu;
end
dataApu = []; nollat = []; isoinAlleeli = [];
noalle=zeros(1,nloci);
alleelitLokuksessa = cell(nloci,1);
for i=1:nloci
alleelitLokuksessaI = unique(data(:,i));
alleelitLokuksessa{i,1} = alleelitLokuksessaI(find(alleelitLokuksessaI>=0));
noalle(i) = length(alleelitLokuksessa{i,1});
end
alleleCodes = zeros(max(noalle),nloci);
for i=1:nloci
alleelitLokuksessaI = alleelitLokuksessa{i,1};
puuttuvia = max(noalle)-length(alleelitLokuksessaI);
alleleCodes(:,i) = [alleelitLokuksessaI; zeros(puuttuvia,1)];
end
for loc = 1:nloci
for all = 1:noalle(loc)
data(find(data(:,loc)==alleleCodes(all,loc)), loc)=all;
end;
end;
nind = max(data(:,end));
%rows = cell(nind,1);
rows = zeros(nind,2);
for i=1:nind
rivit = find(data(:,end)==i)';
rows(i,1) = min(rivit);
rows(i,2) = max(rivit);
end
newData = data;
adjprior = zeros(max(noalle),nloci);
priorTerm = 0;
for j=1:nloci
adjprior(:,j) = [repmat(1/noalle(j), [noalle(j),1]) ; ones(max(noalle)-noalle(j),1)];
priorTerm = priorTerm + noalle(j)*gammaln(1/noalle(j));
end
%--------------------------------------------------------------------
function [Z,distances] = getPopDistancesByKL(adjprior)
% Laskee populaatioille etäisyydet
% käyttäen KL-divergenssi?
global COUNTS;
maxnoalle = size(COUNTS,1);
nloci = size(COUNTS,2);
npops = size(COUNTS,3);
distances = zeros(nchoosek(npops,2),1);
d = zeros(maxnoalle, nloci, npops);
prior = adjprior;
prior(find(prior==1))=0;
nollia = find(all(prior==0)); %Lokukset, joissa oli havaittu vain yht?alleelia.
prior(1,nollia)=1;
for pop1 = 1:npops
d(:,:,pop1) = (squeeze(COUNTS(:,:,pop1))+prior) ./ repmat(sum(squeeze(COUNTS(:,:,pop1))+prior),maxnoalle,1);
%dist1(pop1) = (squeeze(COUNTS(:,:,pop1))+adjprior) ./ repmat((SUMCOUNTS(pop1,:)+adjprior), maxnoalle, 1);
end
pointer = 1;
for pop1 = 1:npops-1
for pop2 = pop1+1:npops
dist1 = d(:,:,pop1); dist2 = d(:,:,pop2);
div12 = sum(sum(dist1.*log2((dist1+10^-10) ./ (dist2+10^-10))))/nloci;
div21 = sum(sum(dist2.*log2((dist2+10^-10) ./ (dist1+10^-10))))/nloci;
div = (div12+div21)/2;
distances(pointer) = div;
pointer = pointer+1;
end
end
Z=linkage(distances');
%--------------------------------------------------------------------