Translated writeMixtureInfoPop() (#2)

2022-05-19 10:46:01 +02:00 · 2022-05-19 10:46:01 +02:00 · f0d4858149
commit f0d4858149
parent 26e2a48c97
3 changed files with 269 additions and 258 deletions
--- a/1
+++ b/1
@ -38,6 +38,7 @@ export(takeLine)
 export(testaaOnkoKunnollinenBapsData)
 export(testaaPop)
 export(writeMixtureInfo)
 export(writeMixtureInfoPop)
 importFrom(Rsamtools,scanBam)
 importFrom(adegenet,.readExt)
 importFrom(adegenet,read.genepop)
--- a/R/writeMixtureInfoPop.R
+++ b/R/writeMixtureInfoPop.R
@ -1,258 +1,225 @@
-function changesInLogml = writeMixtureInfoPop(logml, rows, data, adjprior, ...
+#' @title Write Mixture Info Pop
-  priorTerm, outPutFile, inputFile, partitionSummary, popnames, fixedK)
+#' @description Writes information about the pop mixture
-
+#' @param logml logml
-global PARTITION;
+#' @param rows rows
-global COUNTS;
+#' @param data data
-global SUMCOUNTS;
+#' @param adjprior adjprior
-global LOGDIFF;
+#' @param priorTerm priorTerm
-ninds = size(rows,1);
+#' @param outPutFile outPutFile
-npops =  size(COUNTS,3);
+#' @param inputFile inputFile
-names = (size(popnames,1) == ninds);    %Tarkistetaan ett?nimet viittaavat yksilöihin
+#' @param partitionSummary partitionSummary
-changesInLogml = [];
+#' @param popnames popnames
-if length(outPutFile)>0
+#' @param fixedK fixedK
-  fid = fopen(outPutFile,'a');
+#' @export
-else
+writeMixtureInfoPop <- function(logml, rows, data, adjprior, priorTerm,
-  fid = -1;
+                                outPutFile, inputFile, partitionSummary,
-  diary('baps4_output.baps'); % save in text anyway.
+                                popnames, fixedK) {
-end
+  ninds <- size(rows, 1)
-
+  npops <- size(COUNTS, 3)
-dispLine;
+  names <- size(popnames, 1) == ninds # Tarkistetaan ett?nimet viittaavat yksilöihin
-disp('RESULTS OF GROUP LEVEL MIXTURE ANALYSIS:');
+  changesInLogml <- vector()
-disp(['Data file: ' inputFile]);
+  if (!missing(outPutFile)) {
-disp(['Number of clustered groups: ' ownNum2Str(ninds)]);
+    fid <- vector()
-disp(['Number of clusters in optimal partition: ' ownNum2Str(npops)]);
+  }
-disp(['Log(marginal likelihood) of optimal partition: ' ownNum2Str(logml)]);
+  cat("RESULTS OF GROUP LEVEL MIXTURE ANALYSIS:\n")
-disp(' ');
+  cat("Data file:", inputFile, "\n")
-if (fid ~= -1)
+  cat("Number of clustered groups:", ownNum2Str(ninds), "\n")
-  fprintf(fid,'%s \n', ['RESULTS OF GROUP LEVEL MIXTURE ANALYSIS:']); fprintf(fid,'\n');
+  cat("Number of clusters in optimal partition:", ownNum2Str(npops), "\n")
-  fprintf(fid,'%s \n', ['Data file: ' inputFile]); fprintf(fid,'\n');
+  cat("Log(marginal likelihood) of optimal partition:", ownNum2Str(logml), "\n")
-  fprintf(fid,'%s \n', ['Number of clustered groups: ' ownNum2Str(ninds)]); fprintf(fid,'\n');
+  if (exists("fid")) {
-  fprintf(fid,'%s \n', ['Number of clusters in optimal partition: ' ownNum2Str(npops)]); fprintf(fid,'\n');
+    append(fid, "RESULTS OF GROUP LEVEL MIXTURE ANALYSIS:\n")
-  fprintf(fid,'%s \n', ['Log(marginal likelihood) of optimal partition: ' ownNum2Str(logml)]); fprintf(fid,'\n');
+    append(fid, c("Data file:", inputFile, "\n"))
-  fprintf(fid,'\n');
+    append(fid, c("Number of clustered groups:", ownNum2Str(ninds), "\n"))
-end
+    append(fid, c("Number of clusters in optimal partition:", ownNum2Str(npops), "\n"))
-
+    append(fid, c("Log(marginal likelihood) of optimal partition:", ownNum2Str(logml), "\n\n"))
-cluster_count = length(unique(PARTITION));
+  }
-disp(['Best Partition: ']);
+  cluster_count <- length(unique(PARTITION))
-if (fid ~= -1)
+  cat("Best Partition:\n")
-  fprintf(fid,'%s \n',['Best Partition: ']); fprintf(fid,'\n');
+  if (exists("fid")) {
-end
+    append(fid, c("Best partition:\n"))
-for m=1:cluster_count
+  }
-  indsInM = find(PARTITION==m);
+  for (m in 1:cluster_count) {
-  length_of_beginning = 11 + floor(log10(m));
+    indsInM <- find(PARTITION == m)
-  cluster_size = length(indsInM);
+    length_of_beginning <- 11 + floor(log10(m))
-
+    cluster_size <- length(indsInM)
-  if names
+    if (names) {
-      text = ['Cluster ' num2str(m) ': {' char(popnames{indsInM(1)})];
+      text <- c("Cluster ", as.character(m), ": {", as.character(popnames[indsInM[1]]))
-      for k = 2:cluster_size
+      for (k in 2:cluster_size) {
-          text = [text ', ' char(popnames{indsInM(k)})];
+        text <- c(text, ", ", as.character(popnames[[indsInM[k]]]))
-      end;
+      }
-  else
+    } else {
-      text = ['Cluster ' num2str(m) ': {' num2str(indsInM(1))];
+      text <- c("Cluster ", as.character(m), ": {", as.character(indsInM[1]))
-      for k = 2:cluster_size
+      for (k in 2:cluster_size) {
-          text = [text ', ' num2str(indsInM(k))];
+        text <- c(text, ", ", as.character(indsInM[k]))
-      end;
+      }
-  end
+    }
-  text = [text '}'];
+    text <- c(text, "}")
-  while length(text)>58
+    while (length(text) > 58) {
-      %Take one line and display it.
+      # Take one line and display it.
-      new_line = takeLine(text,58);
+      new_line <- takeLine(text, 58)
-      text = text(length(new_line)+1:end);
+      text <- text[(length(new_line) + 1):length(text)]
-      disp(new_line);
+      cat(new_line, "\n")
-      if (fid ~= -1)
+      if (exists("fid")) {
-          fprintf(fid,'%s \n',[new_line]);
+        append(fid, c(new_line, "\n"))
-          fprintf(fid,'\n');
+      }
-      end
+      if (length(text) > 0) {
-      if length(text)>0
+        text <- c(blanks(length_of_beginning), text)
-          text = [blanks(length_of_beginning) text];
+      } else {
-      else
+        text <- vector()
-          text = [];
+      }
-      end;
+    }
-  end;
+    if (!is.null(text)) {
-  if ~isempty(text)
+      cat(text, "\n")
-      disp(text);
+      if (exists(fid)) {
-      if (fid ~= -1)
+        append(fid, c("\n", text, "\n"))
-          fprintf(fid,'%s \n',[text]);
+      }
-          fprintf(fid,'\n');
+    }
-      end
+  }
-  end;
+  if (npops > 1) {
-end
+    cat("\n\nChanges in log(marginal likelihood) if (group i is moved to cluster j:")
-
+    if (exists("fid")) {
-if npops > 1
+      append(fid, " \n \n", )
-  disp(' ');
+      append(fid, "Changes in log(marginal likelihood) if (group i is moved to cluster j:")
-  disp(' ');
+    }
-  disp('Changes in log(marginal likelihood) if group i is moved to cluster j:');
+    if (names) {
-  if (fid ~= -1)
+      nameSizes <- zeros(ninds, 1)
-      fprintf(fid, '%s \n', [' ']); %fprintf(fid, '\n');
+      for (i in 1:ninds) {
-      fprintf(fid, '%s \n', [' ']); %fprintf(fid, '\n');
+        nimi <- as.character(popnames[i])
-      fprintf(fid, '%s \n', ['Changes in log(marginal likelihood) if group i is moved to cluster j:']); %fprintf(fid, '\n');
+        nameSizes[i] <- length(nimi)
-  end
+      }
-
+      maxSize <- max(nameSizes)
-  if names
+      maxSize <- max(maxSize, 5)
-      nameSizes = zeros(ninds,1);
+      erotus <- maxSize - 5
-      for i = 1:ninds
+      alku <- blanks(erotus)
-          nimi = char(popnames{i});
+      ekarivi <- c(alku, "group", blanks(6 + erotus))
-          nameSizes(i) = length(nimi);
+    } else {
-      end
+      ekarivi <- "group      "
-      maxSize = max(nameSizes);
+    }
-      maxSize = max(maxSize, 5);
+    for (i in 1:cluster_count) {
-      erotus = maxSize - 5;
+      ekarivi <- c(ekarivi, ownNum2Str(i), blanks(8 - floor(log10(i))))
-      alku = blanks(erotus);
+    }
-      ekarivi = [alku 'group' blanks(6+erotus)];
+    cat(ekarivi, "\n")
-  else
+    if (exists("fid")) {
-      ekarivi = 'group      ';
+      append(fid, c(ekarivi, "\n"))
-  end
+    }
-  for i = 1:cluster_count
+    changesInLogml <- t(LOGDIFF)
-      ekarivi = [ekarivi ownNum2Str(i) blanks(8-floor(log10(i)))];
+    for (ind in 1:ninds) {
-  end
+      muutokset <- changesInLogml[, ind]
-  disp(ekarivi);
+      if (names) {
-  if (fid ~= -1)
+        nimi <- as.character(popnames[ind])
-      fprintf(fid, '%s \n', [ekarivi]); %fprintf(fid, '\n');
+        rivi <- c(blanks(maxSize - length(nimi)), nimi, ":")
-  end
+      } else {
-
+        rivi <- c(blanks(4 - floor(log10(ind))), ownNum2Str(ind), ":")
-  changesInLogml = LOGDIFF';
+      }
-  for ind = 1:ninds
+      for (j in 1:npops) {
-      %[muutokset, diffInCounts] = laskeMuutokset(ind, rows, data, ...
+        rivi <- c(rivi, "  ", logml2String(omaRound(muutokset(j))))
-      %    adjprior, priorTerm);
+      }
-      %changesInLogml(:,ind) = muutokset;
+      cat(rivi, "\n")
-      muutokset = changesInLogml(:,ind);
+      if (exists("fid")) {
-      if names
+        append(fid, c(rivi, "\n"))
-          nimi = char(popnames{ind});
+      }
-          rivi = [blanks(maxSize - length(nimi)) nimi ':'];
+    }
-      else
+    cat("  ")
-          rivi = [blanks(4-floor(log10(ind))) ownNum2Str(ind) ':'];
+    cat("KL-divergence matrix in PHYLIP format:")
-      end
+    dist_mat <- zeros(npops, npops)
-      for j = 1:npops
+    if (exists("fid")) {
-          rivi = [rivi '  ' logml2String(omaRound(muutokset(j)))];
+      append(fid, " \n")
-      end
+      append(fid, " \n")
-      disp(rivi);
+      append(fid, "KL - divergence matrix in PHYLIP format:\n")
-      if (fid ~= -1)
+    }
-          fprintf(fid, '%s \n', [rivi]); fprintf(fid, '\n');
+    maxnoalle <- size(COUNTS, 1)
-      end
+    nloci <- size(COUNTS, 2)
-  end
+    d <- zeros(maxnoalle, nloci, npops)
-
+    prior <- adjprior
-  disp(' '); disp(' ');
+    prior[find[prior == 1]] <- 0
-  disp('KL-divergence matrix in PHYLIP format:');
+    nollia <- find(all(prior == 0)) # Lokukset, joissa oli havaittu vain yht?alleelia.
-  dist_mat = zeros(npops, npops);
+    prior[1, nollia] <- 1
-  if (fid ~= -1)
+    for (pop1 in 1:npops) {
-      fprintf(fid, '%s \n', [' ']); %fprintf(fid, '\n');
+      d[, , pop1] <- (squeeze(COUNTS[, , pop1]) + prior) / repmat(sum(squeeze(COUNTS[, , pop1]) + prior), c(maxnoalle, 1))
-      fprintf(fid, '%s \n', [' ']); %fprintf(fid, '\n');
+    }
-      fprintf(fid, '%s \n', ['KL-divergence matrix in PHYLIP format:']); %fprintf(fid, '\n');
+    ekarivi <- as.character(npops)
-  end
+    cat(ekarivi, "\n")
-
+    if (exists("fid")) {
-  maxnoalle = size(COUNTS,1);
+      append(fid, c(ekarivi, "\n"))
-  nloci = size(COUNTS,2);
+    }
-  d = zeros(maxnoalle, nloci, npops);
+    for (pop1 in 1:npops) {
-  prior = adjprior;
+      rivi <- c(blanks(2 - floor(log10(pop1))), as.character(pop1), "  ")
-  prior(find(prior==1))=0;
+      for (pop2 in 1:(pop1 - 1)) {
-  nollia = find(all(prior==0));  %Lokukset, joissa oli havaittu vain yht?alleelia.
+        dist1 <- d[, , pop1]
-  prior(1,nollia)=1;
+        dist2 <- d[, , pop2]
-  for pop1 = 1:npops
+        div12 <- sum(sum(dist1 * log2((dist1 + 10^-10) / (dist2 + 10^-10)))) / nloci
-      d(:,:,pop1) = (squeeze(COUNTS(:,:,pop1))+prior) ./ repmat(sum(squeeze(COUNTS(:,:,pop1))+prior),maxnoalle,1);
+        div21 <- sum(sum(dist2 * log2((dist2 + 10^-10) / (dist1 + 10^-10)))) / nloci
-      %dist1(pop1) = (squeeze(COUNTS(:,:,pop1))+adjprior) ./ repmat((SUMCOUNTS(pop1,:)+adjprior), maxnoalle, 1);
+        div <- (div12 + div21) / 2
-  end
+        dist_mat[pop1, pop2] <- div
-%     ekarivi = blanks(7);
+      }
-%     for pop = 1:npops
+    }
-%         ekarivi = [ekarivi num2str(pop) blanks(7-floor(log10(pop)))];
+    dist_mat <- dist_mat + t(dist_mat) # make it symmetric
-%     end
+    for (pop1 in 1:npops) {
-  ekarivi = num2str(npops);
+      rivi <- c("Cluster_", as.character(pop1), " ")
-  disp(ekarivi);
+      for (pop2 in 1:npops) {
-
+        rivi <- c(rivi, kldiv2str(dist_mat(pop1, pop2)), " ")
-  if (fid ~= -1)
+      }
-      fprintf(fid, '%s \n', [ekarivi]); %fprintf(fid, '\n');
+      cat(rivi)
-  end
+      if (exists("fid")) {
-
+        append(fid, c(rivi, "\n"))
-  for pop1 = 1:npops
+      }
-      rivi = [blanks(2-floor(log10(pop1))) num2str(pop1) '  '];
+    }
-      for pop2 = 1:pop1-1
+  }
-          dist1 = d(:,:,pop1); dist2 = d(:,:,pop2);
+  cat(" \n \n \n")
-          div12 = sum(sum(dist1.*log2((dist1+10^-10) ./ (dist2+10^-10))))/nloci;
+  cat("List of sizes of 10 best visited partitions and corresponding log(ml) values\n")
-          div21 = sum(sum(dist2.*log2((dist2+10^-10) ./ (dist1+10^-10))))/nloci;
+  if (exists("fid")) {
-          div = (div12+div21)/2;
+    append(fid, " \n\n")
-          % rivi = [rivi kldiv2str(div) '  '];
+    append(fid, " \n\n")
-          dist_mat(pop1,pop2) = div;
+    append(fid, " \n\n")
-      end
+    append(fid, " \n\n")
-%         disp(rivi);
+    append(fid, "List of sizes of 10 best visited partitions and corresponding log(ml) values\n")
-%         if (fid ~= -1)
+  }
-%             fprintf(fid, '%s \n', [rivi]); fprintf(fid, '\n');
+  partitionSummary <- sortrows(partitionSummary, 2)
-%         end
+  partitionSummary <- partitionSummary[size(partitionSummary, 1):-1, ]
-  end
+  partitionSummary <- partitionSummary[find(partitionSummary[, 2] > -1e49), ]
-
+  if (size(partitionSummary, 1) > 10) {
-
+    vikaPartitio <- 10
-
+  } else {
-  dist_mat = dist_mat + dist_mat'; % make it symmetric
+    vikaPartitio <- size(partitionSummary, 1)
-  for pop1 = 1:npops
+  }
-      rivi = ['Cluster_' num2str(pop1) ' '];
+  for (part in 1:vikaPartitio) {
-      for pop2 = 1:npops
+    line <- c(as.character(partitionSummary[part, 1]), "    ", as.character(partitionSummary[part, 2]))
-          rivi = [rivi kldiv2str(dist_mat(pop1,pop2)) ' '];
+    cat(line, "\n")
-      end
+    if (exists("fid")) {
-      disp(rivi);
+      append(fid, c(line, "\n"))
-      if (fid ~= -1)
+    }
-          fprintf(fid, '%s \n', [rivi]); %fprintf(fid, '\n');
+  }
-      end
+  if (!fixedK) {
-  end
+    cat(" \n")
-
+    cat(" \n")
-end
+    cat("Probabilities for number of clusters\n")
-
+    if (exists("fid")) {
-disp(' ');
+      append(fid, " \n\n")
-disp(' ');
+      append(fid, " \n\n")
-disp('List of sizes of 10 best visited partitions and corresponding log(ml) values');
+      append(fid, "Probabilities for number of clusters\n")
-
+    }
-if (fid ~= -1)
+    npopsTaulu <- unique(partitionSummary[, 1])
-  fprintf(fid, '%s \n', [' ']); fprintf(fid, '\n');
+    len <- length(npopsTaulu)
-  fprintf(fid, '%s \n', [' ']); fprintf(fid, '\n');
+    probs <- zeros(len, 1)
-  fprintf(fid, '%s \n', ['List of sizes of 10 best visited partitions and corresponding log(ml) values']); fprintf(fid, '\n');
+    partitionSummary[, 2] <- partitionSummary[, 2] - max(partitionSummary[, 2])
-end
+    sumtn <- sum(exp(partitionSummary[, 2]))
-
+    for (i in 1:len) {
-partitionSummary = sortrows(partitionSummary,2);
+      npopstn <- sum(exp(partitionSummary(find(partitionSummary[, 1] == npopsTaulu(i)), 2)))
-partitionSummary = partitionSummary(size(partitionSummary,1):-1:1 , :);
+      probs[i] <- npopstn / sumtn
-partitionSummary = partitionSummary(find(partitionSummary(:,2)>-1e49),:);
+    }
-if size(partitionSummary,1)>10
+    for (i in 1:len) {
-  vikaPartitio = 10;
+      if (probs(i) > 1e-5) {
-else
+        line <- c(as.character(npopsTaulu(i)), "   ", as.character(probs(i)))
-  vikaPartitio = size(partitionSummary,1);
+        cat(line)
-end
+        if (exists("fid")) {
-for part = 1:vikaPartitio
+          append(fid, line)
-  line = [num2str(partitionSummary(part,1)) '    ' num2str(partitionSummary(part,2))];
+          append(fid, "\n")
-  disp(line);
+        }
-  if (fid ~= -1)
+      }
-      fprintf(fid, '%s \n', [line]); fprintf(fid, '\n');
+    }
-  end
+  }
-end
+  if (exists("fid")) {
-
+    save(fid, file = outPutFile)
-if ~fixedK
+  }
-  disp(' ');
+  return(changesInLogml)
-  disp(' ');
+}
  disp('Probabilities for number of clusters');
  if (fid ~= -1)
      fprintf(fid, '%s \n', [' ']); fprintf(fid, '\n');
      fprintf(fid, '%s \n', [' ']); fprintf(fid, '\n');
      fprintf(fid, '%s \n', ['Probabilities for number of clusters']); fprintf(fid, '\n');
  end
  npopsTaulu = unique(partitionSummary(:,1));
  len = length(npopsTaulu);
  probs = zeros(len,1);
  partitionSummary(:,2) = partitionSummary(:,2)-max(partitionSummary(:,2));
  sumtn = sum(exp(partitionSummary(:,2)));
  for i=1:len
      npopstn = sum(exp(partitionSummary(find(partitionSummary(:,1)==npopsTaulu(i)),2)));
      probs(i) = npopstn / sumtn;
  end
  for i=1:len
      if probs(i)>1e-5
          line = [num2str(npopsTaulu(i)) '   ' num2str(probs(i))];
          disp(line);
          if (fid ~= -1)
              fprintf(fid, '%s \n', [line]); fprintf(fid, '\n');
          end
      end
  end
 end
 if (fid ~= -1)
  fclose(fid);
 else
  diary off
 end
--- a/man/writeMixtureInfoPop.Rd
+++ b/man/writeMixtureInfoPop.Rd
@ -0,0 +1,43 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/writeMixtureInfoPop.R
 \name{writeMixtureInfoPop}
 \alias{writeMixtureInfoPop}
 \title{Write Mixture Info Pop}
 \usage{
 writeMixtureInfoPop(
  logml,
  rows,
  data,
  adjprior,
  priorTerm,
  outPutFile,
  inputFile,
  partitionSummary,
  popnames,
  fixedK
 )
 }
 \arguments{
 \item{logml}{logml}
 \item{rows}{rows}
 \item{data}{data}
 \item{adjprior}{adjprior}
 \item{priorTerm}{priorTerm}
 \item{outPutFile}{outPutFile}
 \item{inputFile}{inputFile}
 \item{partitionSummary}{partitionSummary}
 \item{popnames}{popnames}
 \item{fixedK}{fixedK}
 }
 \description{
 Writes information about the pop mixture
 }