Merge issue-2-translate-greedyPopMix into develop

2022-04-13 09:17:46 +02:00 · 2022-04-13 09:17:46 +02:00 · 2ac99025b4
commit 2ac99025b4
parent 9224c44cb6 ae57dd4536
6 changed files with 106 additions and 96 deletions
--- a/2
+++ b/2
@ -1,6 +1,6 @@
 Package: rBAPS
 Title: Bayesian Analysis of Population Structure
-Version: 0.0.0.9005
+Version: 0.0.0.9006
 Date: 2020-11-09
 Authors@R:
    c(
--- a/1
+++ b/1
@ -12,6 +12,7 @@ export(fgetl)
 export(fopen)
 export(greedyMix)
 export(handleData)
+export(handlePopData)
 export(initPopNames)
 export(laskeMuutokset4)
 export(learn_partition_modified)
--- a/R/handlePopData.R
+++ b/R/handlePopData.R
@ -0,0 +1,71 @@
+#' @title Handle Pop data
+#' @description The last column of the original data tells you which
+#' <missing translation> that line is from. The function changes the allele
+#' codes so that the codes for one locus have values between 1 and noalle[j].
+#' Before this change, an allele whose code is zero is changed.
+#' @param raw_data raw data
+#' @export
+handlePopData <- function(raw_data) {
+  # Alkuperäisen datan viimeinen sarake kertoo, milt?yksilölt?
+  # kyseinen rivi on peräisin. Funktio muuttaa alleelikoodit
+  # siten, ett?yhden lokuksen j koodit saavat arvoja
+  # välill?1, ,noalle(j). Ennen tät?muutosta alleeli, jonka
+  # koodi on nolla muutetaan.
+
+  data <- raw_data
+  nloci <- size(raw_data, 2) - 1
+
+  dataApu <- data[, 1:nloci]
+  nollat <- find(dataApu == 0)
+  if (length(nollat) > 0) {
+     isoinAlleeli <- max(max(dataApu)$maxs)$maxs
+     dataApu[nollat] <- isoinAlleeli + 1
+     data[, 1:nloci] <- dataApu
+  }
+
+  noalle <- zeros(1, nloci)
+  alleelitLokuksessa <- cell(nloci, 1, expandable = TRUE)
+  for (i in 1:nloci) {
+      alleelitLokuksessaI <- sort(unique(data[, i]))
+      alleelitLokuksessa[[i]]  <- alleelitLokuksessaI[find(alleelitLokuksessaI >= 0)]
+      noalle[i] <- length(alleelitLokuksessa[[i]])
+  }
+  alleleCodes <- zeros(unique(max(noalle)$maxs), nloci)
+  for (i in 1:nloci) {
+      alleelitLokuksessaI <- alleelitLokuksessa[[i]]
+      puuttuvia <- unique(max(noalle)$maxs) - length(alleelitLokuksessaI)
+      alleleCodes[, i] = c(alleelitLokuksessaI, zeros(puuttuvia, 1))
+  }
+
+  for (loc in 1:nloci) {
+      for (all in 1:noalle[loc]) {
+          data[find(data[ , loc] == alleleCodes[all, loc]), loc] <- all
+      }
+  }
+
+  nind <- max(data[, ncol(data)])$maxs
+  rows <- zeros(nind, 2)
+  for (i in 1:nind) {
+      rivit <- t(find(data[, ncol(data)] == i))
+      rows[i, 1] <- min(rivit)$mins
+      rows[i, 2] <- max(rivit)$maxs
+  }
+  newData <- data
+
+  adjprior <- zeros(unique(max(noalle)$maxs), nloci)
+  priorTerm <- 0
+  for (j in 1:nloci) {
+      adjprior[, j]  <- c(repmat(1 / noalle[j], c(noalle[j], 1)), ones(unique(max(noalle)$maxs) - noalle[j], 1))
+      priorTerm <- priorTerm + noalle[j] * log(gamma(1 / noalle[j]))
+  }
+	return(
+        list(
+            newData = newData,
+            rows = rows,
+            alleleCodes = alleleCodes,
+            noalle = noalle,
+            adjprior = adjprior,
+            priorTerm = priorTerm
+        )
+    )
+}
--- a/man/handlePopData.Rd
+++ b/man/handlePopData.Rd
@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/handlePopData.R
+\name{handlePopData}
+\alias{handlePopData}
+\title{Handle Pop data}
+\usage{
+handlePopData(raw_data)
+}
+\arguments{
+\item{raw_data}{raw data}
+}
+\description{
+The last column of the original data tells you which
+<missing translation> that line is from. The function changes the allele
+codes so that the codes for one locus have values between 1 and noalle[j].
+Before this change, an allele whose code is zero is changed.
+}
--- a/matlab/independent/greedyPopMix.m
+++ b/matlab/independent/greedyPopMix.m
@ -251,100 +251,6 @@ else
    end
 end

-%--------------------------------------------------------------------------
-
-
-function [newData, rows, alleleCodes, noalle, adjprior, priorTerm] = handlePopData(raw_data)
-% Alkuperäisen datan viimeinen sarake kertoo, milt?yksilölt?
-% kyseinen rivi on peräisin. Funktio muuttaa alleelikoodit
-% siten, ett?yhden lokuksen j koodit saavat arvoja
-% välill?1,...,noalle(j). Ennen tät?muutosta alleeli, jonka
-% koodi on nolla muutetaan.
-
-
-data = raw_data;
-nloci=size(raw_data,2)-1;
-
-dataApu = data(:,1:nloci);
-nollat = find(dataApu==0);
-if ~isempty(nollat)
-   isoinAlleeli = max(max(dataApu));
-   dataApu(nollat) = isoinAlleeli+1;
-   data(:,1:nloci) = dataApu;
-end
-dataApu = []; nollat = []; isoinAlleeli = [];
-
-noalle=zeros(1,nloci);
-alleelitLokuksessa = cell(nloci,1);
-for i=1:nloci
-    alleelitLokuksessaI = unique(data(:,i));
-    alleelitLokuksessa{i,1} = alleelitLokuksessaI(find(alleelitLokuksessaI>=0));
-    noalle(i) = length(alleelitLokuksessa{i,1});
-end
-alleleCodes = zeros(max(noalle),nloci);
-for i=1:nloci
-    alleelitLokuksessaI = alleelitLokuksessa{i,1};
-    puuttuvia = max(noalle)-length(alleelitLokuksessaI);
-    alleleCodes(:,i) = [alleelitLokuksessaI; zeros(puuttuvia,1)];
-end
-
-for loc = 1:nloci
-    for all = 1:noalle(loc)
-        data(find(data(:,loc)==alleleCodes(all,loc)), loc)=all;
-    end;
-end;
-
-nind = max(data(:,end));
-%rows = cell(nind,1);
-rows = zeros(nind,2);
-for i=1:nind
-    rivit = find(data(:,end)==i)';
-    rows(i,1) = min(rivit);
-    rows(i,2) = max(rivit);
-end
-newData = data;
-
-adjprior = zeros(max(noalle),nloci);
-priorTerm = 0;
-for j=1:nloci
-    adjprior(:,j) = [repmat(1/noalle(j), [noalle(j),1]) ; ones(max(noalle)-noalle(j),1)];
-    priorTerm = priorTerm + noalle(j)*gammaln(1/noalle(j));
-end
-
-
-%--------------------------------------------------------------------
-
-function [Z,distances] = getPopDistancesByKL(adjprior)
-% Laskee populaatioille etäisyydet
-% käyttäen KL-divergenssi?
-global COUNTS;
-maxnoalle = size(COUNTS,1);
-nloci = size(COUNTS,2);
-npops = size(COUNTS,3);
-distances = zeros(nchoosek(npops,2),1);
-
-d = zeros(maxnoalle, nloci, npops);
-prior = adjprior;
-prior(find(prior==1))=0;
-nollia = find(all(prior==0));  %Lokukset, joissa oli havaittu vain yht?alleelia.
-prior(1,nollia)=1;
-for pop1 = 1:npops
-    d(:,:,pop1) = (squeeze(COUNTS(:,:,pop1))+prior) ./ repmat(sum(squeeze(COUNTS(:,:,pop1))+prior),maxnoalle,1);
-    %dist1(pop1) = (squeeze(COUNTS(:,:,pop1))+adjprior) ./ repmat((SUMCOUNTS(pop1,:)+adjprior), maxnoalle, 1);
-end
-pointer = 1;
-for pop1 = 1:npops-1
-    for pop2 = pop1+1:npops
-        dist1 = d(:,:,pop1); dist2 = d(:,:,pop2);
-        div12 = sum(sum(dist1.*log2((dist1+10^-10) ./ (dist2+10^-10))))/nloci;
-        div21 = sum(sum(dist2.*log2((dist2+10^-10) ./ (dist1+10^-10))))/nloci;
-        div = (div12+div21)/2;
-        distances(pointer) = div;
-        pointer = pointer+1;
-    end
-end
-Z=linkage(distances');
-
 %--------------------------------------------------------------------


--- a/tests/testthat/test-greedyPopMix.R
+++ b/tests/testthat/test-greedyPopMix.R
@ -1,14 +1,17 @@
 context("greedyPopMix functions")

 test_that("Auxiliary functions work properly", {
+  # Testing findOutRowsFromInd -------------------------------------------------
  x <- matrix(11:16, 3)
  y <- matrix(2:7, 3)
  z <- list(
    popnames2 = matrix(c(11:13, seq(1.5, 2.5, 0.5)), 3),
    rowsFromInd = 2
  )
-  x2 <- matrix(seq(4, 14, 2), 3)
  expect_equal(findOutRowsFromInd(x, y, "Diploid"), z)
+
+  # Testing getPopDistancesByKL ------------------------------------------------
+  x2 <- matrix(seq(4, 14, 2), 3)
  expect_equal(
    getPopDistancesByKL(x2),
    list(
@ -16,4 +19,16 @@ test_that("Auxiliary functions work properly", {
      distances = as.matrix(rep(0, 4950))
    )
  )
+
+  # Testing handlePopData ------------------------------------------------------
+  x3 <- matrix(c(9, 4, 1, 8, 5, 2, 1, 2, 3), 3)
+  y3 <- list(
+    newData     = matrix(c(3: 1, 3: 1, 1: 3), 3),
+    rows        = matrix(c(1: 3, 1: 3), 3),
+    alleleCodes = matrix(c(1, 4, 9, 2, 5, 8), 3),
+    noalle      = matrix(c(3, 3), 1),
+    adjprior    = matrix(rep(3/9, 6), 3),
+    priorTerm   = 5.9125
+  )
+  expect_equal(handlePopData(x3), y3, tol = 1e-4)
 })