Translated writeMixtureInfoPop() (#2)

This commit is contained in:
Waldir Leoncio 2022-05-19 10:46:01 +02:00
parent 26e2a48c97
commit f0d4858149
3 changed files with 269 additions and 258 deletions

View file

@ -38,6 +38,7 @@ export(takeLine)
export(testaaOnkoKunnollinenBapsData) export(testaaOnkoKunnollinenBapsData)
export(testaaPop) export(testaaPop)
export(writeMixtureInfo) export(writeMixtureInfo)
export(writeMixtureInfoPop)
importFrom(Rsamtools,scanBam) importFrom(Rsamtools,scanBam)
importFrom(adegenet,.readExt) importFrom(adegenet,.readExt)
importFrom(adegenet,read.genepop) importFrom(adegenet,read.genepop)

View file

@ -1,258 +1,225 @@
function changesInLogml = writeMixtureInfoPop(logml, rows, data, adjprior, ... #' @title Write Mixture Info Pop
priorTerm, outPutFile, inputFile, partitionSummary, popnames, fixedK) #' @description Writes information about the pop mixture
#' @param logml logml
global PARTITION; #' @param rows rows
global COUNTS; #' @param data data
global SUMCOUNTS; #' @param adjprior adjprior
global LOGDIFF; #' @param priorTerm priorTerm
ninds = size(rows,1); #' @param outPutFile outPutFile
npops = size(COUNTS,3); #' @param inputFile inputFile
names = (size(popnames,1) == ninds); %Tarkistetaan ett?nimet viittaavat yksilöihin #' @param partitionSummary partitionSummary
changesInLogml = []; #' @param popnames popnames
if length(outPutFile)>0 #' @param fixedK fixedK
fid = fopen(outPutFile,'a'); #' @export
else writeMixtureInfoPop <- function(logml, rows, data, adjprior, priorTerm,
fid = -1; outPutFile, inputFile, partitionSummary,
diary('baps4_output.baps'); % save in text anyway. popnames, fixedK) {
end ninds <- size(rows, 1)
npops <- size(COUNTS, 3)
dispLine; names <- size(popnames, 1) == ninds # Tarkistetaan ett?nimet viittaavat yksilöihin
disp('RESULTS OF GROUP LEVEL MIXTURE ANALYSIS:'); changesInLogml <- vector()
disp(['Data file: ' inputFile]); if (!missing(outPutFile)) {
disp(['Number of clustered groups: ' ownNum2Str(ninds)]); fid <- vector()
disp(['Number of clusters in optimal partition: ' ownNum2Str(npops)]); }
disp(['Log(marginal likelihood) of optimal partition: ' ownNum2Str(logml)]); cat("RESULTS OF GROUP LEVEL MIXTURE ANALYSIS:\n")
disp(' '); cat("Data file:", inputFile, "\n")
if (fid ~= -1) cat("Number of clustered groups:", ownNum2Str(ninds), "\n")
fprintf(fid,'%s \n', ['RESULTS OF GROUP LEVEL MIXTURE ANALYSIS:']); fprintf(fid,'\n'); cat("Number of clusters in optimal partition:", ownNum2Str(npops), "\n")
fprintf(fid,'%s \n', ['Data file: ' inputFile]); fprintf(fid,'\n'); cat("Log(marginal likelihood) of optimal partition:", ownNum2Str(logml), "\n")
fprintf(fid,'%s \n', ['Number of clustered groups: ' ownNum2Str(ninds)]); fprintf(fid,'\n'); if (exists("fid")) {
fprintf(fid,'%s \n', ['Number of clusters in optimal partition: ' ownNum2Str(npops)]); fprintf(fid,'\n'); append(fid, "RESULTS OF GROUP LEVEL MIXTURE ANALYSIS:\n")
fprintf(fid,'%s \n', ['Log(marginal likelihood) of optimal partition: ' ownNum2Str(logml)]); fprintf(fid,'\n'); append(fid, c("Data file:", inputFile, "\n"))
fprintf(fid,'\n'); append(fid, c("Number of clustered groups:", ownNum2Str(ninds), "\n"))
end append(fid, c("Number of clusters in optimal partition:", ownNum2Str(npops), "\n"))
append(fid, c("Log(marginal likelihood) of optimal partition:", ownNum2Str(logml), "\n\n"))
cluster_count = length(unique(PARTITION)); }
disp(['Best Partition: ']); cluster_count <- length(unique(PARTITION))
if (fid ~= -1) cat("Best Partition:\n")
fprintf(fid,'%s \n',['Best Partition: ']); fprintf(fid,'\n'); if (exists("fid")) {
end append(fid, c("Best partition:\n"))
for m=1:cluster_count }
indsInM = find(PARTITION==m); for (m in 1:cluster_count) {
length_of_beginning = 11 + floor(log10(m)); indsInM <- find(PARTITION == m)
cluster_size = length(indsInM); length_of_beginning <- 11 + floor(log10(m))
cluster_size <- length(indsInM)
if names if (names) {
text = ['Cluster ' num2str(m) ': {' char(popnames{indsInM(1)})]; text <- c("Cluster ", as.character(m), ": {", as.character(popnames[indsInM[1]]))
for k = 2:cluster_size for (k in 2:cluster_size) {
text = [text ', ' char(popnames{indsInM(k)})]; text <- c(text, ", ", as.character(popnames[[indsInM[k]]]))
end; }
else } else {
text = ['Cluster ' num2str(m) ': {' num2str(indsInM(1))]; text <- c("Cluster ", as.character(m), ": {", as.character(indsInM[1]))
for k = 2:cluster_size for (k in 2:cluster_size) {
text = [text ', ' num2str(indsInM(k))]; text <- c(text, ", ", as.character(indsInM[k]))
end; }
end }
text = [text '}']; text <- c(text, "}")
while length(text)>58 while (length(text) > 58) {
%Take one line and display it. # Take one line and display it.
new_line = takeLine(text,58); new_line <- takeLine(text, 58)
text = text(length(new_line)+1:end); text <- text[(length(new_line) + 1):length(text)]
disp(new_line); cat(new_line, "\n")
if (fid ~= -1) if (exists("fid")) {
fprintf(fid,'%s \n',[new_line]); append(fid, c(new_line, "\n"))
fprintf(fid,'\n'); }
end if (length(text) > 0) {
if length(text)>0 text <- c(blanks(length_of_beginning), text)
text = [blanks(length_of_beginning) text]; } else {
else text <- vector()
text = []; }
end; }
end; if (!is.null(text)) {
if ~isempty(text) cat(text, "\n")
disp(text); if (exists(fid)) {
if (fid ~= -1) append(fid, c("\n", text, "\n"))
fprintf(fid,'%s \n',[text]); }
fprintf(fid,'\n'); }
end }
end; if (npops > 1) {
end cat("\n\nChanges in log(marginal likelihood) if (group i is moved to cluster j:")
if (exists("fid")) {
if npops > 1 append(fid, " \n \n", )
disp(' '); append(fid, "Changes in log(marginal likelihood) if (group i is moved to cluster j:")
disp(' '); }
disp('Changes in log(marginal likelihood) if group i is moved to cluster j:'); if (names) {
if (fid ~= -1) nameSizes <- zeros(ninds, 1)
fprintf(fid, '%s \n', [' ']); %fprintf(fid, '\n'); for (i in 1:ninds) {
fprintf(fid, '%s \n', [' ']); %fprintf(fid, '\n'); nimi <- as.character(popnames[i])
fprintf(fid, '%s \n', ['Changes in log(marginal likelihood) if group i is moved to cluster j:']); %fprintf(fid, '\n'); nameSizes[i] <- length(nimi)
end }
maxSize <- max(nameSizes)
if names maxSize <- max(maxSize, 5)
nameSizes = zeros(ninds,1); erotus <- maxSize - 5
for i = 1:ninds alku <- blanks(erotus)
nimi = char(popnames{i}); ekarivi <- c(alku, "group", blanks(6 + erotus))
nameSizes(i) = length(nimi); } else {
end ekarivi <- "group "
maxSize = max(nameSizes); }
maxSize = max(maxSize, 5); for (i in 1:cluster_count) {
erotus = maxSize - 5; ekarivi <- c(ekarivi, ownNum2Str(i), blanks(8 - floor(log10(i))))
alku = blanks(erotus); }
ekarivi = [alku 'group' blanks(6+erotus)]; cat(ekarivi, "\n")
else if (exists("fid")) {
ekarivi = 'group '; append(fid, c(ekarivi, "\n"))
end }
for i = 1:cluster_count changesInLogml <- t(LOGDIFF)
ekarivi = [ekarivi ownNum2Str(i) blanks(8-floor(log10(i)))]; for (ind in 1:ninds) {
end muutokset <- changesInLogml[, ind]
disp(ekarivi); if (names) {
if (fid ~= -1) nimi <- as.character(popnames[ind])
fprintf(fid, '%s \n', [ekarivi]); %fprintf(fid, '\n'); rivi <- c(blanks(maxSize - length(nimi)), nimi, ":")
end } else {
rivi <- c(blanks(4 - floor(log10(ind))), ownNum2Str(ind), ":")
changesInLogml = LOGDIFF'; }
for ind = 1:ninds for (j in 1:npops) {
%[muutokset, diffInCounts] = laskeMuutokset(ind, rows, data, ... rivi <- c(rivi, " ", logml2String(omaRound(muutokset(j))))
% adjprior, priorTerm); }
%changesInLogml(:,ind) = muutokset; cat(rivi, "\n")
muutokset = changesInLogml(:,ind); if (exists("fid")) {
if names append(fid, c(rivi, "\n"))
nimi = char(popnames{ind}); }
rivi = [blanks(maxSize - length(nimi)) nimi ':']; }
else cat(" ")
rivi = [blanks(4-floor(log10(ind))) ownNum2Str(ind) ':']; cat("KL-divergence matrix in PHYLIP format:")
end dist_mat <- zeros(npops, npops)
for j = 1:npops if (exists("fid")) {
rivi = [rivi ' ' logml2String(omaRound(muutokset(j)))]; append(fid, " \n")
end append(fid, " \n")
disp(rivi); append(fid, "KL - divergence matrix in PHYLIP format:\n")
if (fid ~= -1) }
fprintf(fid, '%s \n', [rivi]); fprintf(fid, '\n'); maxnoalle <- size(COUNTS, 1)
end nloci <- size(COUNTS, 2)
end d <- zeros(maxnoalle, nloci, npops)
prior <- adjprior
disp(' '); disp(' '); prior[find[prior == 1]] <- 0
disp('KL-divergence matrix in PHYLIP format:'); nollia <- find(all(prior == 0)) # Lokukset, joissa oli havaittu vain yht?alleelia.
dist_mat = zeros(npops, npops); prior[1, nollia] <- 1
if (fid ~= -1) for (pop1 in 1:npops) {
fprintf(fid, '%s \n', [' ']); %fprintf(fid, '\n'); d[, , pop1] <- (squeeze(COUNTS[, , pop1]) + prior) / repmat(sum(squeeze(COUNTS[, , pop1]) + prior), c(maxnoalle, 1))
fprintf(fid, '%s \n', [' ']); %fprintf(fid, '\n'); }
fprintf(fid, '%s \n', ['KL-divergence matrix in PHYLIP format:']); %fprintf(fid, '\n'); ekarivi <- as.character(npops)
end cat(ekarivi, "\n")
if (exists("fid")) {
maxnoalle = size(COUNTS,1); append(fid, c(ekarivi, "\n"))
nloci = size(COUNTS,2); }
d = zeros(maxnoalle, nloci, npops); for (pop1 in 1:npops) {
prior = adjprior; rivi <- c(blanks(2 - floor(log10(pop1))), as.character(pop1), " ")
prior(find(prior==1))=0; for (pop2 in 1:(pop1 - 1)) {
nollia = find(all(prior==0)); %Lokukset, joissa oli havaittu vain yht?alleelia. dist1 <- d[, , pop1]
prior(1,nollia)=1; dist2 <- d[, , pop2]
for pop1 = 1:npops div12 <- sum(sum(dist1 * log2((dist1 + 10^-10) / (dist2 + 10^-10)))) / nloci
d(:,:,pop1) = (squeeze(COUNTS(:,:,pop1))+prior) ./ repmat(sum(squeeze(COUNTS(:,:,pop1))+prior),maxnoalle,1); div21 <- sum(sum(dist2 * log2((dist2 + 10^-10) / (dist1 + 10^-10)))) / nloci
%dist1(pop1) = (squeeze(COUNTS(:,:,pop1))+adjprior) ./ repmat((SUMCOUNTS(pop1,:)+adjprior), maxnoalle, 1); div <- (div12 + div21) / 2
end dist_mat[pop1, pop2] <- div
% ekarivi = blanks(7); }
% for pop = 1:npops }
% ekarivi = [ekarivi num2str(pop) blanks(7-floor(log10(pop)))]; dist_mat <- dist_mat + t(dist_mat) # make it symmetric
% end for (pop1 in 1:npops) {
ekarivi = num2str(npops); rivi <- c("Cluster_", as.character(pop1), " ")
disp(ekarivi); for (pop2 in 1:npops) {
rivi <- c(rivi, kldiv2str(dist_mat(pop1, pop2)), " ")
if (fid ~= -1) }
fprintf(fid, '%s \n', [ekarivi]); %fprintf(fid, '\n'); cat(rivi)
end if (exists("fid")) {
append(fid, c(rivi, "\n"))
for pop1 = 1:npops }
rivi = [blanks(2-floor(log10(pop1))) num2str(pop1) ' ']; }
for pop2 = 1:pop1-1 }
dist1 = d(:,:,pop1); dist2 = d(:,:,pop2); cat(" \n \n \n")
div12 = sum(sum(dist1.*log2((dist1+10^-10) ./ (dist2+10^-10))))/nloci; cat("List of sizes of 10 best visited partitions and corresponding log(ml) values\n")
div21 = sum(sum(dist2.*log2((dist2+10^-10) ./ (dist1+10^-10))))/nloci; if (exists("fid")) {
div = (div12+div21)/2; append(fid, " \n\n")
% rivi = [rivi kldiv2str(div) ' ']; append(fid, " \n\n")
dist_mat(pop1,pop2) = div; append(fid, " \n\n")
end append(fid, " \n\n")
% disp(rivi); append(fid, "List of sizes of 10 best visited partitions and corresponding log(ml) values\n")
% if (fid ~= -1) }
% fprintf(fid, '%s \n', [rivi]); fprintf(fid, '\n'); partitionSummary <- sortrows(partitionSummary, 2)
% end partitionSummary <- partitionSummary[size(partitionSummary, 1):-1, ]
end partitionSummary <- partitionSummary[find(partitionSummary[, 2] > -1e49), ]
if (size(partitionSummary, 1) > 10) {
vikaPartitio <- 10
} else {
dist_mat = dist_mat + dist_mat'; % make it symmetric vikaPartitio <- size(partitionSummary, 1)
for pop1 = 1:npops }
rivi = ['Cluster_' num2str(pop1) ' ']; for (part in 1:vikaPartitio) {
for pop2 = 1:npops line <- c(as.character(partitionSummary[part, 1]), " ", as.character(partitionSummary[part, 2]))
rivi = [rivi kldiv2str(dist_mat(pop1,pop2)) ' ']; cat(line, "\n")
end if (exists("fid")) {
disp(rivi); append(fid, c(line, "\n"))
if (fid ~= -1) }
fprintf(fid, '%s \n', [rivi]); %fprintf(fid, '\n'); }
end if (!fixedK) {
end cat(" \n")
cat(" \n")
end cat("Probabilities for number of clusters\n")
if (exists("fid")) {
disp(' '); append(fid, " \n\n")
disp(' '); append(fid, " \n\n")
disp('List of sizes of 10 best visited partitions and corresponding log(ml) values'); append(fid, "Probabilities for number of clusters\n")
}
if (fid ~= -1) npopsTaulu <- unique(partitionSummary[, 1])
fprintf(fid, '%s \n', [' ']); fprintf(fid, '\n'); len <- length(npopsTaulu)
fprintf(fid, '%s \n', [' ']); fprintf(fid, '\n'); probs <- zeros(len, 1)
fprintf(fid, '%s \n', ['List of sizes of 10 best visited partitions and corresponding log(ml) values']); fprintf(fid, '\n'); partitionSummary[, 2] <- partitionSummary[, 2] - max(partitionSummary[, 2])
end sumtn <- sum(exp(partitionSummary[, 2]))
for (i in 1:len) {
partitionSummary = sortrows(partitionSummary,2); npopstn <- sum(exp(partitionSummary(find(partitionSummary[, 1] == npopsTaulu(i)), 2)))
partitionSummary = partitionSummary(size(partitionSummary,1):-1:1 , :); probs[i] <- npopstn / sumtn
partitionSummary = partitionSummary(find(partitionSummary(:,2)>-1e49),:); }
if size(partitionSummary,1)>10 for (i in 1:len) {
vikaPartitio = 10; if (probs(i) > 1e-5) {
else line <- c(as.character(npopsTaulu(i)), " ", as.character(probs(i)))
vikaPartitio = size(partitionSummary,1); cat(line)
end if (exists("fid")) {
for part = 1:vikaPartitio append(fid, line)
line = [num2str(partitionSummary(part,1)) ' ' num2str(partitionSummary(part,2))]; append(fid, "\n")
disp(line); }
if (fid ~= -1) }
fprintf(fid, '%s \n', [line]); fprintf(fid, '\n'); }
end }
end if (exists("fid")) {
save(fid, file = outPutFile)
if ~fixedK }
disp(' '); return(changesInLogml)
disp(' '); }
disp('Probabilities for number of clusters');
if (fid ~= -1)
fprintf(fid, '%s \n', [' ']); fprintf(fid, '\n');
fprintf(fid, '%s \n', [' ']); fprintf(fid, '\n');
fprintf(fid, '%s \n', ['Probabilities for number of clusters']); fprintf(fid, '\n');
end
npopsTaulu = unique(partitionSummary(:,1));
len = length(npopsTaulu);
probs = zeros(len,1);
partitionSummary(:,2) = partitionSummary(:,2)-max(partitionSummary(:,2));
sumtn = sum(exp(partitionSummary(:,2)));
for i=1:len
npopstn = sum(exp(partitionSummary(find(partitionSummary(:,1)==npopsTaulu(i)),2)));
probs(i) = npopstn / sumtn;
end
for i=1:len
if probs(i)>1e-5
line = [num2str(npopsTaulu(i)) ' ' num2str(probs(i))];
disp(line);
if (fid ~= -1)
fprintf(fid, '%s \n', [line]); fprintf(fid, '\n');
end
end
end
end
if (fid ~= -1)
fclose(fid);
else
diary off
end

View file

@ -0,0 +1,43 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/writeMixtureInfoPop.R
\name{writeMixtureInfoPop}
\alias{writeMixtureInfoPop}
\title{Write Mixture Info Pop}
\usage{
writeMixtureInfoPop(
logml,
rows,
data,
adjprior,
priorTerm,
outPutFile,
inputFile,
partitionSummary,
popnames,
fixedK
)
}
\arguments{
\item{logml}{logml}
\item{rows}{rows}
\item{data}{data}
\item{adjprior}{adjprior}
\item{priorTerm}{priorTerm}
\item{outPutFile}{outPutFile}
\item{inputFile}{inputFile}
\item{partitionSummary}{partitionSummary}
\item{popnames}{popnames}
\item{fixedK}{fixedK}
}
\description{
Writes information about the pop mixture
}