diff --git a/DESCRIPTION b/DESCRIPTION index 7379aac..4ce1fa8 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: rBAPS Title: Bayesian Analysis of Population Structure -Version: 0.0.0.9005 +Version: 0.0.0.9006 Date: 2020-11-09 Authors@R: c( diff --git a/NAMESPACE b/NAMESPACE index 9c77f97..8b6dded 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -12,6 +12,7 @@ export(fgetl) export(fopen) export(greedyMix) export(handleData) +export(handlePopData) export(initPopNames) export(laskeMuutokset4) export(learn_partition_modified) diff --git a/R/handlePopData.R b/R/handlePopData.R new file mode 100644 index 0000000..56a7356 --- /dev/null +++ b/R/handlePopData.R @@ -0,0 +1,71 @@ +#' @title Handle Pop data +#' @description The last column of the original data tells you which +#' that line is from. The function changes the allele +#' codes so that the codes for one locus have values between 1 and noalle[j]. +#' Before this change, an allele whose code is zero is changed. +#' @param raw_data raw data +#' @export +handlePopData <- function(raw_data) { + # Alkuperäisen datan viimeinen sarake kertoo, milt?yksilölt? + # kyseinen rivi on peräisin. Funktio muuttaa alleelikoodit + # siten, ett?yhden lokuksen j koodit saavat arvoja + # välill?1, ,noalle(j). Ennen tät?muutosta alleeli, jonka + # koodi on nolla muutetaan. + + data <- raw_data + nloci <- size(raw_data, 2) - 1 + + dataApu <- data[, 1:nloci] + nollat <- find(dataApu == 0) + if (length(nollat) > 0) { + isoinAlleeli <- max(max(dataApu)$maxs)$maxs + dataApu[nollat] <- isoinAlleeli + 1 + data[, 1:nloci] <- dataApu + } + + noalle <- zeros(1, nloci) + alleelitLokuksessa <- cell(nloci, 1, expandable = TRUE) + for (i in 1:nloci) { + alleelitLokuksessaI <- sort(unique(data[, i])) + alleelitLokuksessa[[i]] <- alleelitLokuksessaI[find(alleelitLokuksessaI >= 0)] + noalle[i] <- length(alleelitLokuksessa[[i]]) + } + alleleCodes <- zeros(unique(max(noalle)$maxs), nloci) + for (i in 1:nloci) { + alleelitLokuksessaI <- alleelitLokuksessa[[i]] + puuttuvia <- unique(max(noalle)$maxs) - length(alleelitLokuksessaI) + alleleCodes[, i] = c(alleelitLokuksessaI, zeros(puuttuvia, 1)) + } + + for (loc in 1:nloci) { + for (all in 1:noalle[loc]) { + data[find(data[ , loc] == alleleCodes[all, loc]), loc] <- all + } + } + + nind <- max(data[, ncol(data)])$maxs + rows <- zeros(nind, 2) + for (i in 1:nind) { + rivit <- t(find(data[, ncol(data)] == i)) + rows[i, 1] <- min(rivit)$mins + rows[i, 2] <- max(rivit)$maxs + } + newData <- data + + adjprior <- zeros(unique(max(noalle)$maxs), nloci) + priorTerm <- 0 + for (j in 1:nloci) { + adjprior[, j] <- c(repmat(1 / noalle[j], c(noalle[j], 1)), ones(unique(max(noalle)$maxs) - noalle[j], 1)) + priorTerm <- priorTerm + noalle[j] * log(gamma(1 / noalle[j])) + } + return( + list( + newData = newData, + rows = rows, + alleleCodes = alleleCodes, + noalle = noalle, + adjprior = adjprior, + priorTerm = priorTerm + ) + ) +} diff --git a/man/handlePopData.Rd b/man/handlePopData.Rd new file mode 100644 index 0000000..6fb9d48 --- /dev/null +++ b/man/handlePopData.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/handlePopData.R +\name{handlePopData} +\alias{handlePopData} +\title{Handle Pop data} +\usage{ +handlePopData(raw_data) +} +\arguments{ +\item{raw_data}{raw data} +} +\description{ +The last column of the original data tells you which + that line is from. The function changes the allele +codes so that the codes for one locus have values between 1 and noalle[j]. +Before this change, an allele whose code is zero is changed. +} diff --git a/matlab/independent/greedyPopMix.m b/matlab/independent/greedyPopMix.m index 4618135..17d63ae 100644 --- a/matlab/independent/greedyPopMix.m +++ b/matlab/independent/greedyPopMix.m @@ -251,100 +251,6 @@ else end end -%-------------------------------------------------------------------------- - - -function [newData, rows, alleleCodes, noalle, adjprior, priorTerm] = handlePopData(raw_data) -% Alkuperäisen datan viimeinen sarake kertoo, milt?yksilölt? -% kyseinen rivi on peräisin. Funktio muuttaa alleelikoodit -% siten, ett?yhden lokuksen j koodit saavat arvoja -% välill?1,...,noalle(j). Ennen tät?muutosta alleeli, jonka -% koodi on nolla muutetaan. - - -data = raw_data; -nloci=size(raw_data,2)-1; - -dataApu = data(:,1:nloci); -nollat = find(dataApu==0); -if ~isempty(nollat) - isoinAlleeli = max(max(dataApu)); - dataApu(nollat) = isoinAlleeli+1; - data(:,1:nloci) = dataApu; -end -dataApu = []; nollat = []; isoinAlleeli = []; - -noalle=zeros(1,nloci); -alleelitLokuksessa = cell(nloci,1); -for i=1:nloci - alleelitLokuksessaI = unique(data(:,i)); - alleelitLokuksessa{i,1} = alleelitLokuksessaI(find(alleelitLokuksessaI>=0)); - noalle(i) = length(alleelitLokuksessa{i,1}); -end -alleleCodes = zeros(max(noalle),nloci); -for i=1:nloci - alleelitLokuksessaI = alleelitLokuksessa{i,1}; - puuttuvia = max(noalle)-length(alleelitLokuksessaI); - alleleCodes(:,i) = [alleelitLokuksessaI; zeros(puuttuvia,1)]; -end - -for loc = 1:nloci - for all = 1:noalle(loc) - data(find(data(:,loc)==alleleCodes(all,loc)), loc)=all; - end; -end; - -nind = max(data(:,end)); -%rows = cell(nind,1); -rows = zeros(nind,2); -for i=1:nind - rivit = find(data(:,end)==i)'; - rows(i,1) = min(rivit); - rows(i,2) = max(rivit); -end -newData = data; - -adjprior = zeros(max(noalle),nloci); -priorTerm = 0; -for j=1:nloci - adjprior(:,j) = [repmat(1/noalle(j), [noalle(j),1]) ; ones(max(noalle)-noalle(j),1)]; - priorTerm = priorTerm + noalle(j)*gammaln(1/noalle(j)); -end - - -%-------------------------------------------------------------------- - -function [Z,distances] = getPopDistancesByKL(adjprior) -% Laskee populaatioille etäisyydet -% käyttäen KL-divergenssi? -global COUNTS; -maxnoalle = size(COUNTS,1); -nloci = size(COUNTS,2); -npops = size(COUNTS,3); -distances = zeros(nchoosek(npops,2),1); - -d = zeros(maxnoalle, nloci, npops); -prior = adjprior; -prior(find(prior==1))=0; -nollia = find(all(prior==0)); %Lokukset, joissa oli havaittu vain yht?alleelia. -prior(1,nollia)=1; -for pop1 = 1:npops - d(:,:,pop1) = (squeeze(COUNTS(:,:,pop1))+prior) ./ repmat(sum(squeeze(COUNTS(:,:,pop1))+prior),maxnoalle,1); - %dist1(pop1) = (squeeze(COUNTS(:,:,pop1))+adjprior) ./ repmat((SUMCOUNTS(pop1,:)+adjprior), maxnoalle, 1); -end -pointer = 1; -for pop1 = 1:npops-1 - for pop2 = pop1+1:npops - dist1 = d(:,:,pop1); dist2 = d(:,:,pop2); - div12 = sum(sum(dist1.*log2((dist1+10^-10) ./ (dist2+10^-10))))/nloci; - div21 = sum(sum(dist2.*log2((dist2+10^-10) ./ (dist1+10^-10))))/nloci; - div = (div12+div21)/2; - distances(pointer) = div; - pointer = pointer+1; - end -end -Z=linkage(distances'); - %-------------------------------------------------------------------- diff --git a/tests/testthat/test-greedyPopMix.R b/tests/testthat/test-greedyPopMix.R index e3d79b2..873ae1e 100644 --- a/tests/testthat/test-greedyPopMix.R +++ b/tests/testthat/test-greedyPopMix.R @@ -1,14 +1,17 @@ context("greedyPopMix functions") test_that("Auxiliary functions work properly", { + # Testing findOutRowsFromInd ------------------------------------------------- x <- matrix(11:16, 3) y <- matrix(2:7, 3) z <- list( popnames2 = matrix(c(11:13, seq(1.5, 2.5, 0.5)), 3), rowsFromInd = 2 ) - x2 <- matrix(seq(4, 14, 2), 3) expect_equal(findOutRowsFromInd(x, y, "Diploid"), z) + + # Testing getPopDistancesByKL ------------------------------------------------ + x2 <- matrix(seq(4, 14, 2), 3) expect_equal( getPopDistancesByKL(x2), list( @@ -16,4 +19,16 @@ test_that("Auxiliary functions work properly", { distances = as.matrix(rep(0, 4950)) ) ) + + # Testing handlePopData ------------------------------------------------------ + x3 <- matrix(c(9, 4, 1, 8, 5, 2, 1, 2, 3), 3) + y3 <- list( + newData = matrix(c(3: 1, 3: 1, 1: 3), 3), + rows = matrix(c(1: 3, 1: 3), 3), + alleleCodes = matrix(c(1, 4, 9, 2, 5, 8), 3), + noalle = matrix(c(3, 3), 1), + adjprior = matrix(rep(3/9, 6), 3), + priorTerm = 5.9125 + ) + expect_equal(handlePopData(x3), y3, tol = 1e-4) })