Merge issue-2-translate-greedyPopMix into develop
This commit is contained in:
commit
2ac99025b4
6 changed files with 106 additions and 96 deletions
|
|
@ -1,6 +1,6 @@
|
|||
Package: rBAPS
|
||||
Title: Bayesian Analysis of Population Structure
|
||||
Version: 0.0.0.9005
|
||||
Version: 0.0.0.9006
|
||||
Date: 2020-11-09
|
||||
Authors@R:
|
||||
c(
|
||||
|
|
|
|||
|
|
@ -12,6 +12,7 @@ export(fgetl)
|
|||
export(fopen)
|
||||
export(greedyMix)
|
||||
export(handleData)
|
||||
export(handlePopData)
|
||||
export(initPopNames)
|
||||
export(laskeMuutokset4)
|
||||
export(learn_partition_modified)
|
||||
|
|
|
|||
71
R/handlePopData.R
Normal file
71
R/handlePopData.R
Normal file
|
|
@ -0,0 +1,71 @@
|
|||
#' @title Handle Pop data
|
||||
#' @description The last column of the original data tells you which
|
||||
#' <missing translation> that line is from. The function changes the allele
|
||||
#' codes so that the codes for one locus have values between 1 and noalle[j].
|
||||
#' Before this change, an allele whose code is zero is changed.
|
||||
#' @param raw_data raw data
|
||||
#' @export
|
||||
handlePopData <- function(raw_data) {
|
||||
# Alkuperäisen datan viimeinen sarake kertoo, milt?yksilölt?
|
||||
# kyseinen rivi on peräisin. Funktio muuttaa alleelikoodit
|
||||
# siten, ett?yhden lokuksen j koodit saavat arvoja
|
||||
# välill?1, ,noalle(j). Ennen tät?muutosta alleeli, jonka
|
||||
# koodi on nolla muutetaan.
|
||||
|
||||
data <- raw_data
|
||||
nloci <- size(raw_data, 2) - 1
|
||||
|
||||
dataApu <- data[, 1:nloci]
|
||||
nollat <- find(dataApu == 0)
|
||||
if (length(nollat) > 0) {
|
||||
isoinAlleeli <- max(max(dataApu)$maxs)$maxs
|
||||
dataApu[nollat] <- isoinAlleeli + 1
|
||||
data[, 1:nloci] <- dataApu
|
||||
}
|
||||
|
||||
noalle <- zeros(1, nloci)
|
||||
alleelitLokuksessa <- cell(nloci, 1, expandable = TRUE)
|
||||
for (i in 1:nloci) {
|
||||
alleelitLokuksessaI <- sort(unique(data[, i]))
|
||||
alleelitLokuksessa[[i]] <- alleelitLokuksessaI[find(alleelitLokuksessaI >= 0)]
|
||||
noalle[i] <- length(alleelitLokuksessa[[i]])
|
||||
}
|
||||
alleleCodes <- zeros(unique(max(noalle)$maxs), nloci)
|
||||
for (i in 1:nloci) {
|
||||
alleelitLokuksessaI <- alleelitLokuksessa[[i]]
|
||||
puuttuvia <- unique(max(noalle)$maxs) - length(alleelitLokuksessaI)
|
||||
alleleCodes[, i] = c(alleelitLokuksessaI, zeros(puuttuvia, 1))
|
||||
}
|
||||
|
||||
for (loc in 1:nloci) {
|
||||
for (all in 1:noalle[loc]) {
|
||||
data[find(data[ , loc] == alleleCodes[all, loc]), loc] <- all
|
||||
}
|
||||
}
|
||||
|
||||
nind <- max(data[, ncol(data)])$maxs
|
||||
rows <- zeros(nind, 2)
|
||||
for (i in 1:nind) {
|
||||
rivit <- t(find(data[, ncol(data)] == i))
|
||||
rows[i, 1] <- min(rivit)$mins
|
||||
rows[i, 2] <- max(rivit)$maxs
|
||||
}
|
||||
newData <- data
|
||||
|
||||
adjprior <- zeros(unique(max(noalle)$maxs), nloci)
|
||||
priorTerm <- 0
|
||||
for (j in 1:nloci) {
|
||||
adjprior[, j] <- c(repmat(1 / noalle[j], c(noalle[j], 1)), ones(unique(max(noalle)$maxs) - noalle[j], 1))
|
||||
priorTerm <- priorTerm + noalle[j] * log(gamma(1 / noalle[j]))
|
||||
}
|
||||
return(
|
||||
list(
|
||||
newData = newData,
|
||||
rows = rows,
|
||||
alleleCodes = alleleCodes,
|
||||
noalle = noalle,
|
||||
adjprior = adjprior,
|
||||
priorTerm = priorTerm
|
||||
)
|
||||
)
|
||||
}
|
||||
17
man/handlePopData.Rd
Normal file
17
man/handlePopData.Rd
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/handlePopData.R
|
||||
\name{handlePopData}
|
||||
\alias{handlePopData}
|
||||
\title{Handle Pop data}
|
||||
\usage{
|
||||
handlePopData(raw_data)
|
||||
}
|
||||
\arguments{
|
||||
\item{raw_data}{raw data}
|
||||
}
|
||||
\description{
|
||||
The last column of the original data tells you which
|
||||
<missing translation> that line is from. The function changes the allele
|
||||
codes so that the codes for one locus have values between 1 and noalle[j].
|
||||
Before this change, an allele whose code is zero is changed.
|
||||
}
|
||||
|
|
@ -251,100 +251,6 @@ else
|
|||
end
|
||||
end
|
||||
|
||||
%--------------------------------------------------------------------------
|
||||
|
||||
|
||||
function [newData, rows, alleleCodes, noalle, adjprior, priorTerm] = handlePopData(raw_data)
|
||||
% Alkuperäisen datan viimeinen sarake kertoo, milt?yksilölt?
|
||||
% kyseinen rivi on peräisin. Funktio muuttaa alleelikoodit
|
||||
% siten, ett?yhden lokuksen j koodit saavat arvoja
|
||||
% välill?1,...,noalle(j). Ennen tät?muutosta alleeli, jonka
|
||||
% koodi on nolla muutetaan.
|
||||
|
||||
|
||||
data = raw_data;
|
||||
nloci=size(raw_data,2)-1;
|
||||
|
||||
dataApu = data(:,1:nloci);
|
||||
nollat = find(dataApu==0);
|
||||
if ~isempty(nollat)
|
||||
isoinAlleeli = max(max(dataApu));
|
||||
dataApu(nollat) = isoinAlleeli+1;
|
||||
data(:,1:nloci) = dataApu;
|
||||
end
|
||||
dataApu = []; nollat = []; isoinAlleeli = [];
|
||||
|
||||
noalle=zeros(1,nloci);
|
||||
alleelitLokuksessa = cell(nloci,1);
|
||||
for i=1:nloci
|
||||
alleelitLokuksessaI = unique(data(:,i));
|
||||
alleelitLokuksessa{i,1} = alleelitLokuksessaI(find(alleelitLokuksessaI>=0));
|
||||
noalle(i) = length(alleelitLokuksessa{i,1});
|
||||
end
|
||||
alleleCodes = zeros(max(noalle),nloci);
|
||||
for i=1:nloci
|
||||
alleelitLokuksessaI = alleelitLokuksessa{i,1};
|
||||
puuttuvia = max(noalle)-length(alleelitLokuksessaI);
|
||||
alleleCodes(:,i) = [alleelitLokuksessaI; zeros(puuttuvia,1)];
|
||||
end
|
||||
|
||||
for loc = 1:nloci
|
||||
for all = 1:noalle(loc)
|
||||
data(find(data(:,loc)==alleleCodes(all,loc)), loc)=all;
|
||||
end;
|
||||
end;
|
||||
|
||||
nind = max(data(:,end));
|
||||
%rows = cell(nind,1);
|
||||
rows = zeros(nind,2);
|
||||
for i=1:nind
|
||||
rivit = find(data(:,end)==i)';
|
||||
rows(i,1) = min(rivit);
|
||||
rows(i,2) = max(rivit);
|
||||
end
|
||||
newData = data;
|
||||
|
||||
adjprior = zeros(max(noalle),nloci);
|
||||
priorTerm = 0;
|
||||
for j=1:nloci
|
||||
adjprior(:,j) = [repmat(1/noalle(j), [noalle(j),1]) ; ones(max(noalle)-noalle(j),1)];
|
||||
priorTerm = priorTerm + noalle(j)*gammaln(1/noalle(j));
|
||||
end
|
||||
|
||||
|
||||
%--------------------------------------------------------------------
|
||||
|
||||
function [Z,distances] = getPopDistancesByKL(adjprior)
|
||||
% Laskee populaatioille etäisyydet
|
||||
% käyttäen KL-divergenssi?
|
||||
global COUNTS;
|
||||
maxnoalle = size(COUNTS,1);
|
||||
nloci = size(COUNTS,2);
|
||||
npops = size(COUNTS,3);
|
||||
distances = zeros(nchoosek(npops,2),1);
|
||||
|
||||
d = zeros(maxnoalle, nloci, npops);
|
||||
prior = adjprior;
|
||||
prior(find(prior==1))=0;
|
||||
nollia = find(all(prior==0)); %Lokukset, joissa oli havaittu vain yht?alleelia.
|
||||
prior(1,nollia)=1;
|
||||
for pop1 = 1:npops
|
||||
d(:,:,pop1) = (squeeze(COUNTS(:,:,pop1))+prior) ./ repmat(sum(squeeze(COUNTS(:,:,pop1))+prior),maxnoalle,1);
|
||||
%dist1(pop1) = (squeeze(COUNTS(:,:,pop1))+adjprior) ./ repmat((SUMCOUNTS(pop1,:)+adjprior), maxnoalle, 1);
|
||||
end
|
||||
pointer = 1;
|
||||
for pop1 = 1:npops-1
|
||||
for pop2 = pop1+1:npops
|
||||
dist1 = d(:,:,pop1); dist2 = d(:,:,pop2);
|
||||
div12 = sum(sum(dist1.*log2((dist1+10^-10) ./ (dist2+10^-10))))/nloci;
|
||||
div21 = sum(sum(dist2.*log2((dist2+10^-10) ./ (dist1+10^-10))))/nloci;
|
||||
div = (div12+div21)/2;
|
||||
distances(pointer) = div;
|
||||
pointer = pointer+1;
|
||||
end
|
||||
end
|
||||
Z=linkage(distances');
|
||||
|
||||
%--------------------------------------------------------------------
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1,14 +1,17 @@
|
|||
context("greedyPopMix functions")
|
||||
|
||||
test_that("Auxiliary functions work properly", {
|
||||
# Testing findOutRowsFromInd -------------------------------------------------
|
||||
x <- matrix(11:16, 3)
|
||||
y <- matrix(2:7, 3)
|
||||
z <- list(
|
||||
popnames2 = matrix(c(11:13, seq(1.5, 2.5, 0.5)), 3),
|
||||
rowsFromInd = 2
|
||||
)
|
||||
x2 <- matrix(seq(4, 14, 2), 3)
|
||||
expect_equal(findOutRowsFromInd(x, y, "Diploid"), z)
|
||||
|
||||
# Testing getPopDistancesByKL ------------------------------------------------
|
||||
x2 <- matrix(seq(4, 14, 2), 3)
|
||||
expect_equal(
|
||||
getPopDistancesByKL(x2),
|
||||
list(
|
||||
|
|
@ -16,4 +19,16 @@ test_that("Auxiliary functions work properly", {
|
|||
distances = as.matrix(rep(0, 4950))
|
||||
)
|
||||
)
|
||||
|
||||
# Testing handlePopData ------------------------------------------------------
|
||||
x3 <- matrix(c(9, 4, 1, 8, 5, 2, 1, 2, 3), 3)
|
||||
y3 <- list(
|
||||
newData = matrix(c(3: 1, 3: 1, 1: 3), 3),
|
||||
rows = matrix(c(1: 3, 1: 3), 3),
|
||||
alleleCodes = matrix(c(1, 4, 9, 2, 5, 8), 3),
|
||||
noalle = matrix(c(3, 3), 1),
|
||||
adjprior = matrix(rep(3/9, 6), 3),
|
||||
priorTerm = 5.9125
|
||||
)
|
||||
expect_equal(handlePopData(x3), y3, tol = 1e-4)
|
||||
})
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue