Translated handlePopData (#2)

This commit is contained in:
Waldir Leoncio 2022-04-12 14:31:54 +02:00
parent 9224c44cb6
commit 03db7f4124
4 changed files with 86 additions and 94 deletions

View file

@ -12,6 +12,7 @@ export(fgetl)
export(fopen)
export(greedyMix)
export(handleData)
export(handlePopData)
export(initPopNames)
export(laskeMuutokset4)
export(learn_partition_modified)

71
R/handlePopData.R Normal file
View file

@ -0,0 +1,71 @@
#' @title Handle Pop data
#' @description The last column of the original data tells you which
#' <missing translation> that line is from. The function changes the allele
#' codes so that the codes for one locus have values between 1 and noalle[j].
#' Before this change, an allele whose code is zero is changed.
#' @param raw_data raw data
#' @export
handlePopData <- function(raw_data) {
# Alkuperäisen datan viimeinen sarake kertoo, milt?yksilölt?
# kyseinen rivi on peräisin. Funktio muuttaa alleelikoodit
# siten, ett?yhden lokuksen j koodit saavat arvoja
# välill?1, ,noalle(j). Ennen tät?muutosta alleeli, jonka
# koodi on nolla muutetaan.
data <- raw_data
nloci <- size(raw_data, 2) - 1
dataApu <- data[, 1:nloci]
nollat <- find(dataApu == 0)
if (length(nollat) > 0) {
isoinAlleeli <- max(max(dataApu)$maxs)$maxs
dataApu[nollat] <- isoinAlleeli + 1
data[, 1:nloci] <- dataApu
}
noalle <- zeros(1, nloci)
alleelitLokuksessa <- cell(nloci, 1, expandable = TRUE)
for (i in 1:nloci) {
alleelitLokuksessaI <- sort(unique(data[, i]))
alleelitLokuksessa[[i]] <- alleelitLokuksessaI[find(alleelitLokuksessaI >= 0)]
noalle[i] <- length(alleelitLokuksessa[[i]])
}
alleleCodes <- zeros(unique(max(noalle)$maxs), nloci)
for (i in 1:nloci) {
alleelitLokuksessaI <- alleelitLokuksessa[[i]]
puuttuvia <- unique(max(noalle)$maxs) - length(alleelitLokuksessaI)
alleleCodes[, i] = c(alleelitLokuksessaI, zeros(puuttuvia, 1))
}
for (loc in 1:nloci) {
for (all in 1:noalle[loc]) {
data[find(data[ , loc] == alleleCodes[all, loc]), loc] <- all
}
}
nind <- max(data[, ncol(data)])$maxs
rows <- zeros(nind, 2)
for (i in 1:nind) {
rivit <- t(find(data[, ncol(data)] == i))
rows[i, 1] <- min(rivit)$mins
rows[i, 2] <- max(rivit)$maxs
}
newData <- data
adjprior <- zeros(unique(max(noalle)$maxs), nloci)
priorTerm <- 0
for (j in 1:nloci) {
adjprior[, j] <- c(repmat(1 / noalle[j], c(noalle[j], 1)), ones(unique(max(noalle)$maxs) - noalle[j], 1))
priorTerm <- priorTerm + noalle[j] * log(gamma(1 / noalle[j]))
}
return(
list(
newData = newData,
rows = rows,
alleleCodes = alleleCodes,
noalle = noalle,
adjprior = adjprior,
priorTerm = priorTerm
)
)
}

14
man/handlePopData.Rd Normal file
View file

@ -0,0 +1,14 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/handlePopData.R
\name{handlePopData}
\alias{handlePopData}
\title{Handle Pop data}
\usage{
handlePopData(raw_data)
}
\arguments{
\item{raw_data}{raw data}
}
\description{
The last column of the original data tells you which? that line is from. The function changes the allele codes so that the codes for one locus have values between 1,, Noah (j). Before this change, an allele whose code is zero is changed.
}

View file

@ -251,100 +251,6 @@ else
end
end
%--------------------------------------------------------------------------
function [newData, rows, alleleCodes, noalle, adjprior, priorTerm] = handlePopData(raw_data)
% Alkuperäisen datan viimeinen sarake kertoo, milt?yksilölt?
% kyseinen rivi on peräisin. Funktio muuttaa alleelikoodit
% siten, ett?yhden lokuksen j koodit saavat arvoja
% välill?1,...,noalle(j). Ennen tät?muutosta alleeli, jonka
% koodi on nolla muutetaan.
data = raw_data;
nloci=size(raw_data,2)-1;
dataApu = data(:,1:nloci);
nollat = find(dataApu==0);
if ~isempty(nollat)
isoinAlleeli = max(max(dataApu));
dataApu(nollat) = isoinAlleeli+1;
data(:,1:nloci) = dataApu;
end
dataApu = []; nollat = []; isoinAlleeli = [];
noalle=zeros(1,nloci);
alleelitLokuksessa = cell(nloci,1);
for i=1:nloci
alleelitLokuksessaI = unique(data(:,i));
alleelitLokuksessa{i,1} = alleelitLokuksessaI(find(alleelitLokuksessaI>=0));
noalle(i) = length(alleelitLokuksessa{i,1});
end
alleleCodes = zeros(max(noalle),nloci);
for i=1:nloci
alleelitLokuksessaI = alleelitLokuksessa{i,1};
puuttuvia = max(noalle)-length(alleelitLokuksessaI);
alleleCodes(:,i) = [alleelitLokuksessaI; zeros(puuttuvia,1)];
end
for loc = 1:nloci
for all = 1:noalle(loc)
data(find(data(:,loc)==alleleCodes(all,loc)), loc)=all;
end;
end;
nind = max(data(:,end));
%rows = cell(nind,1);
rows = zeros(nind,2);
for i=1:nind
rivit = find(data(:,end)==i)';
rows(i,1) = min(rivit);
rows(i,2) = max(rivit);
end
newData = data;
adjprior = zeros(max(noalle),nloci);
priorTerm = 0;
for j=1:nloci
adjprior(:,j) = [repmat(1/noalle(j), [noalle(j),1]) ; ones(max(noalle)-noalle(j),1)];
priorTerm = priorTerm + noalle(j)*gammaln(1/noalle(j));
end
%--------------------------------------------------------------------
function [Z,distances] = getPopDistancesByKL(adjprior)
% Laskee populaatioille etäisyydet
% käyttäen KL-divergenssi?
global COUNTS;
maxnoalle = size(COUNTS,1);
nloci = size(COUNTS,2);
npops = size(COUNTS,3);
distances = zeros(nchoosek(npops,2),1);
d = zeros(maxnoalle, nloci, npops);
prior = adjprior;
prior(find(prior==1))=0;
nollia = find(all(prior==0)); %Lokukset, joissa oli havaittu vain yht?alleelia.
prior(1,nollia)=1;
for pop1 = 1:npops
d(:,:,pop1) = (squeeze(COUNTS(:,:,pop1))+prior) ./ repmat(sum(squeeze(COUNTS(:,:,pop1))+prior),maxnoalle,1);
%dist1(pop1) = (squeeze(COUNTS(:,:,pop1))+adjprior) ./ repmat((SUMCOUNTS(pop1,:)+adjprior), maxnoalle, 1);
end
pointer = 1;
for pop1 = 1:npops-1
for pop2 = pop1+1:npops
dist1 = d(:,:,pop1); dist2 = d(:,:,pop2);
div12 = sum(sum(dist1.*log2((dist1+10^-10) ./ (dist2+10^-10))))/nloci;
div21 = sum(sum(dist2.*log2((dist2+10^-10) ./ (dist1+10^-10))))/nloci;
div = (div12+div21)/2;
distances(pointer) = div;
pointer = pointer+1;
end
end
Z=linkage(distances');
%--------------------------------------------------------------------