From 03db7f41242eac0559303b19ce9958e0dd83d012 Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Tue, 12 Apr 2022 14:31:54 +0200 Subject: [PATCH 1/6] Translated handlePopData (#2) --- NAMESPACE | 1 + R/handlePopData.R | 71 +++++++++++++++++++++++ man/handlePopData.Rd | 14 +++++ matlab/independent/greedyPopMix.m | 94 ------------------------------- 4 files changed, 86 insertions(+), 94 deletions(-) create mode 100644 R/handlePopData.R create mode 100644 man/handlePopData.Rd diff --git a/NAMESPACE b/NAMESPACE index 9c77f97..8b6dded 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -12,6 +12,7 @@ export(fgetl) export(fopen) export(greedyMix) export(handleData) +export(handlePopData) export(initPopNames) export(laskeMuutokset4) export(learn_partition_modified) diff --git a/R/handlePopData.R b/R/handlePopData.R new file mode 100644 index 0000000..56a7356 --- /dev/null +++ b/R/handlePopData.R @@ -0,0 +1,71 @@ +#' @title Handle Pop data +#' @description The last column of the original data tells you which +#' that line is from. The function changes the allele +#' codes so that the codes for one locus have values between 1 and noalle[j]. +#' Before this change, an allele whose code is zero is changed. +#' @param raw_data raw data +#' @export +handlePopData <- function(raw_data) { + # Alkuperäisen datan viimeinen sarake kertoo, milt?yksilölt? + # kyseinen rivi on peräisin. Funktio muuttaa alleelikoodit + # siten, ett?yhden lokuksen j koodit saavat arvoja + # välill?1, ,noalle(j). Ennen tät?muutosta alleeli, jonka + # koodi on nolla muutetaan. + + data <- raw_data + nloci <- size(raw_data, 2) - 1 + + dataApu <- data[, 1:nloci] + nollat <- find(dataApu == 0) + if (length(nollat) > 0) { + isoinAlleeli <- max(max(dataApu)$maxs)$maxs + dataApu[nollat] <- isoinAlleeli + 1 + data[, 1:nloci] <- dataApu + } + + noalle <- zeros(1, nloci) + alleelitLokuksessa <- cell(nloci, 1, expandable = TRUE) + for (i in 1:nloci) { + alleelitLokuksessaI <- sort(unique(data[, i])) + alleelitLokuksessa[[i]] <- alleelitLokuksessaI[find(alleelitLokuksessaI >= 0)] + noalle[i] <- length(alleelitLokuksessa[[i]]) + } + alleleCodes <- zeros(unique(max(noalle)$maxs), nloci) + for (i in 1:nloci) { + alleelitLokuksessaI <- alleelitLokuksessa[[i]] + puuttuvia <- unique(max(noalle)$maxs) - length(alleelitLokuksessaI) + alleleCodes[, i] = c(alleelitLokuksessaI, zeros(puuttuvia, 1)) + } + + for (loc in 1:nloci) { + for (all in 1:noalle[loc]) { + data[find(data[ , loc] == alleleCodes[all, loc]), loc] <- all + } + } + + nind <- max(data[, ncol(data)])$maxs + rows <- zeros(nind, 2) + for (i in 1:nind) { + rivit <- t(find(data[, ncol(data)] == i)) + rows[i, 1] <- min(rivit)$mins + rows[i, 2] <- max(rivit)$maxs + } + newData <- data + + adjprior <- zeros(unique(max(noalle)$maxs), nloci) + priorTerm <- 0 + for (j in 1:nloci) { + adjprior[, j] <- c(repmat(1 / noalle[j], c(noalle[j], 1)), ones(unique(max(noalle)$maxs) - noalle[j], 1)) + priorTerm <- priorTerm + noalle[j] * log(gamma(1 / noalle[j])) + } + return( + list( + newData = newData, + rows = rows, + alleleCodes = alleleCodes, + noalle = noalle, + adjprior = adjprior, + priorTerm = priorTerm + ) + ) +} diff --git a/man/handlePopData.Rd b/man/handlePopData.Rd new file mode 100644 index 0000000..9ce3be6 --- /dev/null +++ b/man/handlePopData.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/handlePopData.R +\name{handlePopData} +\alias{handlePopData} +\title{Handle Pop data} +\usage{ +handlePopData(raw_data) +} +\arguments{ +\item{raw_data}{raw data} +} +\description{ +The last column of the original data tells you which? that line is from. The function changes the allele codes so that the codes for one locus have values between 1,, Noah (j). Before this change, an allele whose code is zero is changed. +} diff --git a/matlab/independent/greedyPopMix.m b/matlab/independent/greedyPopMix.m index 4618135..17d63ae 100644 --- a/matlab/independent/greedyPopMix.m +++ b/matlab/independent/greedyPopMix.m @@ -251,100 +251,6 @@ else end end -%-------------------------------------------------------------------------- - - -function [newData, rows, alleleCodes, noalle, adjprior, priorTerm] = handlePopData(raw_data) -% Alkuperäisen datan viimeinen sarake kertoo, milt?yksilölt? -% kyseinen rivi on peräisin. Funktio muuttaa alleelikoodit -% siten, ett?yhden lokuksen j koodit saavat arvoja -% välill?1,...,noalle(j). Ennen tät?muutosta alleeli, jonka -% koodi on nolla muutetaan. - - -data = raw_data; -nloci=size(raw_data,2)-1; - -dataApu = data(:,1:nloci); -nollat = find(dataApu==0); -if ~isempty(nollat) - isoinAlleeli = max(max(dataApu)); - dataApu(nollat) = isoinAlleeli+1; - data(:,1:nloci) = dataApu; -end -dataApu = []; nollat = []; isoinAlleeli = []; - -noalle=zeros(1,nloci); -alleelitLokuksessa = cell(nloci,1); -for i=1:nloci - alleelitLokuksessaI = unique(data(:,i)); - alleelitLokuksessa{i,1} = alleelitLokuksessaI(find(alleelitLokuksessaI>=0)); - noalle(i) = length(alleelitLokuksessa{i,1}); -end -alleleCodes = zeros(max(noalle),nloci); -for i=1:nloci - alleelitLokuksessaI = alleelitLokuksessa{i,1}; - puuttuvia = max(noalle)-length(alleelitLokuksessaI); - alleleCodes(:,i) = [alleelitLokuksessaI; zeros(puuttuvia,1)]; -end - -for loc = 1:nloci - for all = 1:noalle(loc) - data(find(data(:,loc)==alleleCodes(all,loc)), loc)=all; - end; -end; - -nind = max(data(:,end)); -%rows = cell(nind,1); -rows = zeros(nind,2); -for i=1:nind - rivit = find(data(:,end)==i)'; - rows(i,1) = min(rivit); - rows(i,2) = max(rivit); -end -newData = data; - -adjprior = zeros(max(noalle),nloci); -priorTerm = 0; -for j=1:nloci - adjprior(:,j) = [repmat(1/noalle(j), [noalle(j),1]) ; ones(max(noalle)-noalle(j),1)]; - priorTerm = priorTerm + noalle(j)*gammaln(1/noalle(j)); -end - - -%-------------------------------------------------------------------- - -function [Z,distances] = getPopDistancesByKL(adjprior) -% Laskee populaatioille etäisyydet -% käyttäen KL-divergenssi? -global COUNTS; -maxnoalle = size(COUNTS,1); -nloci = size(COUNTS,2); -npops = size(COUNTS,3); -distances = zeros(nchoosek(npops,2),1); - -d = zeros(maxnoalle, nloci, npops); -prior = adjprior; -prior(find(prior==1))=0; -nollia = find(all(prior==0)); %Lokukset, joissa oli havaittu vain yht?alleelia. -prior(1,nollia)=1; -for pop1 = 1:npops - d(:,:,pop1) = (squeeze(COUNTS(:,:,pop1))+prior) ./ repmat(sum(squeeze(COUNTS(:,:,pop1))+prior),maxnoalle,1); - %dist1(pop1) = (squeeze(COUNTS(:,:,pop1))+adjprior) ./ repmat((SUMCOUNTS(pop1,:)+adjprior), maxnoalle, 1); -end -pointer = 1; -for pop1 = 1:npops-1 - for pop2 = pop1+1:npops - dist1 = d(:,:,pop1); dist2 = d(:,:,pop2); - div12 = sum(sum(dist1.*log2((dist1+10^-10) ./ (dist2+10^-10))))/nloci; - div21 = sum(sum(dist2.*log2((dist2+10^-10) ./ (dist1+10^-10))))/nloci; - div = (div12+div21)/2; - distances(pointer) = div; - pointer = pointer+1; - end -end -Z=linkage(distances'); - %-------------------------------------------------------------------- From 40f2b3f07ad5c5416947bd96502b263ecba35d33 Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Tue, 12 Apr 2022 15:47:22 +0200 Subject: [PATCH 2/6] Added test unit for handlePopData() --- tests/testthat/test-greedyPopMix.R | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tests/testthat/test-greedyPopMix.R b/tests/testthat/test-greedyPopMix.R index e3d79b2..5378a70 100644 --- a/tests/testthat/test-greedyPopMix.R +++ b/tests/testthat/test-greedyPopMix.R @@ -16,4 +16,14 @@ test_that("Auxiliary functions work properly", { distances = as.matrix(rep(0, 4950)) ) ) + x3 <- matrix(c(9, 4, 1, 8, 5, 2, 1, 2, 3), 3) + y3 <- list( + newData = matrix(c(3:1, 3:1, 1:3), 3), + rows = matrix(c(1:3, 1:3), 3), + alleleCodes = matrix(c(1, 4, 9, 2, 5, 8), 3), + noalle = c(3, 3), + adjprior = matrix(rep(3/9, 6), 3), + priorTerm = 5.9125 + ) + expect_equal(handlePopData(x3), y3) }) From f81a07836539505409c613d97e9bee928921d809 Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Tue, 12 Apr 2022 15:50:16 +0200 Subject: [PATCH 3/6] Reorganizing tests --- tests/testthat/test-greedyPopMix.R | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/tests/testthat/test-greedyPopMix.R b/tests/testthat/test-greedyPopMix.R index 5378a70..cfe441c 100644 --- a/tests/testthat/test-greedyPopMix.R +++ b/tests/testthat/test-greedyPopMix.R @@ -1,14 +1,17 @@ context("greedyPopMix functions") test_that("Auxiliary functions work properly", { + # Testing findOutRowsFromInd ------------------------------------------------- x <- matrix(11:16, 3) y <- matrix(2:7, 3) z <- list( popnames2 = matrix(c(11:13, seq(1.5, 2.5, 0.5)), 3), rowsFromInd = 2 ) - x2 <- matrix(seq(4, 14, 2), 3) expect_equal(findOutRowsFromInd(x, y, "Diploid"), z) + + # Testing getPopDistancesByKL ------------------------------------------------ + x2 <- matrix(seq(4, 14, 2), 3) expect_equal( getPopDistancesByKL(x2), list( @@ -16,14 +19,16 @@ test_that("Auxiliary functions work properly", { distances = as.matrix(rep(0, 4950)) ) ) + + # Testing handlePopData ------------------------------------------------------ x3 <- matrix(c(9, 4, 1, 8, 5, 2, 1, 2, 3), 3) y3 <- list( - newData = matrix(c(3:1, 3:1, 1:3), 3), - rows = matrix(c(1:3, 1:3), 3), + newData = matrix(c(3: 1, 3: 1, 1: 3), 3), + rows = matrix(c(1: 3, 1: 3), 3), alleleCodes = matrix(c(1, 4, 9, 2, 5, 8), 3), - noalle = c(3, 3), - adjprior = matrix(rep(3/9, 6), 3), - priorTerm = 5.9125 + noalle = c(3, 3), + adjprior = matrix(rep(3/9, 6), 3), + priorTerm = 5.9125 ) expect_equal(handlePopData(x3), y3) }) From 9b1cf810a3528f79772faa64ec68be68db5064f7 Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Wed, 13 Apr 2022 08:59:07 +0200 Subject: [PATCH 4/6] Fixing test expectation --- tests/testthat/test-greedyPopMix.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-greedyPopMix.R b/tests/testthat/test-greedyPopMix.R index cfe441c..873ae1e 100644 --- a/tests/testthat/test-greedyPopMix.R +++ b/tests/testthat/test-greedyPopMix.R @@ -26,9 +26,9 @@ test_that("Auxiliary functions work properly", { newData = matrix(c(3: 1, 3: 1, 1: 3), 3), rows = matrix(c(1: 3, 1: 3), 3), alleleCodes = matrix(c(1, 4, 9, 2, 5, 8), 3), - noalle = c(3, 3), + noalle = matrix(c(3, 3), 1), adjprior = matrix(rep(3/9, 6), 3), priorTerm = 5.9125 ) - expect_equal(handlePopData(x3), y3) + expect_equal(handlePopData(x3), y3, tol = 1e-4) }) From c0f4072525fbd5e230b8c604dd31c818bb011a7e Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Wed, 13 Apr 2022 09:13:25 +0200 Subject: [PATCH 5/6] Bumped build version --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 7379aac..4ce1fa8 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: rBAPS Title: Bayesian Analysis of Population Structure -Version: 0.0.0.9005 +Version: 0.0.0.9006 Date: 2020-11-09 Authors@R: c( From ae57dd453606ac8c93afa596441039bdafb6c8b1 Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Wed, 13 Apr 2022 09:17:40 +0200 Subject: [PATCH 6/6] Updated docs --- man/handlePopData.Rd | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/man/handlePopData.Rd b/man/handlePopData.Rd index 9ce3be6..6fb9d48 100644 --- a/man/handlePopData.Rd +++ b/man/handlePopData.Rd @@ -10,5 +10,8 @@ handlePopData(raw_data) \item{raw_data}{raw data} } \description{ -The last column of the original data tells you which? that line is from. The function changes the allele codes so that the codes for one locus have values between 1,, Noah (j). Before this change, an allele whose code is zero is changed. +The last column of the original data tells you which + that line is from. The function changes the allele +codes so that the codes for one locus have values between 1 and noalle[j]. +Before this change, an allele whose code is zero is changed. }