Merge branch 'issue-24' into develop

* issue-24:
  Increment version number to 0.0.0.9026
  Added (commented) test for BAPS data (#24)
  Renamed global environment, fixed assignments (#24)
  Fixes to `laskeMuutokset()` (#24)
  Syntax fix to `indMix()` (#24)
  Retranslated `computeDiffInCounts()` (#24)
  Changed default `npops` to 3 (#24)
  `laskeMuutokset()` reads `npop` from parent functions (#24)
  Using a dedicated environment for globals (#24)
  Retranslated `computePopulationLogml()` (#24)
  Added/renamed BAPS example data (#24)
This commit is contained in:
Waldir Leoncio 2024-04-11 09:49:00 +02:00
commit 2d5d93adbc
30 changed files with 194 additions and 183 deletions

View file

@ -1,6 +1,6 @@
Package: rBAPS
Title: Bayesian Analysis of Population Structure
Version: 0.0.0.9025
Version: 0.0.0.9026
Date: 2020-11-09
Authors@R:
c(

View file

@ -7,7 +7,7 @@ addToSummary <- function(logml, partitionSummary, worstIndex) {
apu <- matlab2r::find(abs(partitionSummary[, 2] - logml) < 1e-5)
if (isempty(apu)) {
# Nyt l<>ydetty partitio ei ole viel<65> kirjattuna summaryyn.
npops <- length(unique(PARTITION))
npops <- length(unique(globals$PARTITION))
partitionSummary[worstIndex, 1] <- npops
partitionSummary[worstIndex, 2] <- logml
added <- 1

View file

@ -5,12 +5,12 @@ checkLogml <- function(priorTerm, adjprior, cliques, separators) {
# global SEPCOUNTS
# global PARTITION
npops <- length(unique(PARTITION))
npops <- length(unique(globals$PARTITION))
cliqcounts <- computeCounts(cliques, separators, npops)$cliqcounts
sepcounts <- computeCounts(cliques, separators, npops)$sepcounts
CLIQCOUNTS <- cliqcounts
SEPCOUNTS <- sepcounts
globals$CLIQCOUNTS <- cliqcounts
globals$SEPCOUNTS <- sepcounts
logml <- computeLogml(adjprior, priorTerm)$logml
spatialPrior <- computeLogml(adjprior, priorTerm)$spatialPrior

View file

@ -3,12 +3,12 @@
#' j 1/noalle(j) verran.
#' @param noalle noalle
computeAllFreqs2 <- function(noalle) {
COUNTS <- ifelse(isGlobalEmpty(COUNTS), vector(), COUNTS)
SUMCOUNTS <- ifelse(isGlobalEmpty(SUMCOUNTS), vector(), COUNTS)
max_noalle <- size(COUNTS, 1)
nloci <- size(COUNTS, 2)
npops <- size(COUNTS, 3)
sumCounts <- SUMCOUNTS + ones(size(SUMCOUNTS))
globals$COUNTS <- ifelse(isGlobalEmpty(globals$COUNTS), vector(), globals$COUNTS)
globals$COUNTS <- ifelse(isGlobalEmpty(globals$COUNTS), vector(), globals$COUNTS)
max_noalle <- size(globals$COUNTS, 1)
nloci <- size(globals$COUNTS, 2)
npops <- size(globals$COUNTS, 3)
sumCounts <- globals$COUNTS + ones(size(globals$COUNTS))
sumCounts <- reshape(t(sumCounts), c(1, nloci, npops))
sumCounts <- repmat(sumCounts, c(max_noalle, 1, 1))
@ -20,9 +20,9 @@ computeAllFreqs2 <- function(noalle) {
}
prioriAlleelit <- repmat(prioriAlleelit, c(1, 1, npops))
counts <- ifelse(
test = isGlobalEmpty(COUNTS),
test = isGlobalEmpty(globals$COUNTS),
yes = prioriAlleelit,
no = COUNTS + prioriAlleelit
no = globals$COUNTS + prioriAlleelit
)
allFreqs <- counts / drop(sumCounts)
return(allFreqs)

View file

@ -4,20 +4,14 @@ computeDiffInCounts <- function(rows, max_noalle, nloci, data) {
# % riveill<6C> rows. rows pit<69><74> olla vaakavektori.
diffInCounts <- zeros(max_noalle, nloci)
for (i in seq_len(nrow(data))) {
for (i in rows) { # yep, just one iteration
row <- data[i, ]
notEmpty <- as.matrix(matlab2r::find(row >= 0))
if (length(notEmpty) > 0) {
diffInCounts[row(notEmpty) + (notEmpty - 1) * max_noalle] <-
diffInCounts[row(notEmpty) + (notEmpty - 1) * max_noalle] + 1
element <- row[notEmpty] + (notEmpty - 1) * max_noalle
diffInCounts[element] <- diffInCounts[element] + 1
}
}
diffInCounts <- matrix(
data = diffInCounts[!is.na(diffInCounts)],
nrow = max_noalle,
ncol = nloci,
byrow = TRUE
)
return(diffInCounts)
}

View file

@ -14,14 +14,14 @@ computeLogml <- function(counts, sumcounts, noalle, data, rowsFromInd) {
logml <- sum(
sum(
sum(
GAMMA_LN[
globals$GAMMA_LN[
counts + 1 +
repmat(rowsInG * (adjnoalle - 1), c(1, 1, npops))
]
)
)
) -
npops * sum(sum(GAMMA_LN[1, adjnoalle])) -
sum(sum(GAMMA_LN[sumcounts + 1, 1]))
npops * sum(sum(globals$GAMMA_LN[1, adjnoalle])) -
sum(sum(globals$GAMMA_LN[sumcounts + 1, 1]))
return(logml)
}

View file

@ -8,11 +8,11 @@
#' @param allFreqs allFreqs
#' @param rowsFromInd rowsFromInd
computePersonalAllFreqs <- function(ind, data, allFreqs, rowsFromInd) {
if (isGlobalEmpty(COUNTS)) {
if (isGlobalEmpty(globals$COUNTS)) {
nloci <- npops <- 1
} else {
nloci <- ifelse(is.na(dim(COUNTS)[2]), 1, dim(COUNTS)[2])
npops <- ifelse(is.na(dim(COUNTS)[3]), 1, dim(COUNTS)[3])
nloci <- ifelse(is.na(dim(globals$COUNTS)[2]), 1, dim(globals$COUNTS)[2])
npops <- ifelse(is.na(dim(globals$COUNTS)[3]), 1, dim(globals$COUNTS)[3])
}
rows <- as.matrix(t(data))[computeRows(rowsFromInd, ind, 1), , drop = FALSE]

View file

@ -1,43 +1,21 @@
computePopulationLogml <- function(pops, adjprior, priorTerm) {
# Palauttaa length(pops)*1 taulukon, jossa on laskettu korikohtaiset
# ======================================================== #
# Limiting COUNTS size #
# ======================================================== #
if (!is.null(adjprior)) {
nr <- seq_len(nrow(adjprior))
nc <- seq_len(ncol(adjprior))
COUNTS <- COUNTS[nr, nc, pops, drop = FALSE]
} else {
COUNTS <- NA
}
nr <- seq_len(nrow(adjprior))
nc <- seq_len(ncol(adjprior))
x <- size(COUNTS, 1)
y <- size(COUNTS, 2)
x <- size(globals$COUNTS, 1)
y <- size(globals$COUNTS, 2)
z <- length(pops)
# ======================================================== #
# Computation #
# ======================================================== #
term1 <- NULL
if (!is.null(adjprior)) {
isarray <- length(dim(repmat(adjprior, c(1, 1, length(pops))))) > 2
term1 <- squeeze(
sum(
sum(
reshape(
lgamma(
repmat(adjprior, c(1, 1, length(pops))) + COUNTS[nr, nc, pops, drop = !isarray]
),
c(x, y, z)
),
1
),
2
)
)
}
if (is.null(priorTerm)) priorTerm <- 0
popLogml <- term1 - sum(lgamma(1 + SUMCOUNTS[pops, ]), 2) - priorTerm
return(popLogml)
rep_adj <- repmat(adjprior, c(1, 1, z))
gamma_rep_counts <- matlab2r::gammaln(rep_adj + globals$COUNTS[, , pops])
gamma_sum_counts <- rowSums(matlab2r::gammaln(1 + globals$SUMCOUNTS[pops, , drop = FALSE]))
gamma_rep_counts_sum <- colSums(colSums(reshape(gamma_rep_counts, c(x, y, z))))
gamma_rep_counts_reshaped <- squeeze(gamma_rep_counts_sum)
popLogml <- gamma_rep_counts_reshaped - gamma_sum_counts - priorTerm
return(popLogml[, , drop = FALSE])
}

View file

@ -10,7 +10,7 @@ fiksaaPartitioYksiloTasolle <- function(rows, rowsFromInd) {
kaikkiRivit <- rows[ind, 1]:rows[ind, 2]
for (riviNumero in seq(rowsFromInd, length(kaikkiRivit), rowsFromInd)) {
rivi <- kaikkiRivit[riviNumero]
partitio2[rivi / rowsFromInd] <- PARTITION[ind]
partitio2[rivi / rowsFromInd] <- globals$PARTITION[ind]
}
}
global_env <- as.environment(1L)

View file

@ -1,7 +1,7 @@
findEmptyPop <- function(npops) {
# % Palauttaa ensimm<6D>isen tyhj<68>n populaation indeksin. Jos tyhji<6A>
# % populaatioita ei ole, palauttaa -1:n.
pops <- t(unique(PARTITION))
pops <- t(unique(globals$PARTITION))
if (length(pops) == npops) {
emptyPop <- -1
} else {

View file

@ -1,10 +1,10 @@
getPopDistancesByKL <- function(adjprior) {
# Laskee populaatioille etהisyydet
# kהyttהen KL-divergenssi?
COUNTS <- COUNTS[seq_len(nrow(adjprior)), seq_len(ncol(adjprior)), ]
maxnoalle <- size(COUNTS, 1)
nloci <- size(COUNTS, 2)
npops <- size(COUNTS, 3)
globals$COUNTS <- globals$COUNTS[seq_len(nrow(adjprior)), seq_len(ncol(adjprior)), ]
maxnoalle <- size(globals$COUNTS, 1)
nloci <- size(globals$COUNTS, 2)
npops <- size(globals$COUNTS, 3)
distances <- zeros(choose(npops, 2), 1)
d <- zeros(maxnoalle, nloci, npops)
@ -16,8 +16,8 @@ getPopDistancesByKL <- function(adjprior) {
prior[1, nollia] <- 1
for (pop1 in 1:npops) {
d[, , pop1] <- (squeeze(COUNTS[, , pop1]) + prior) / repmat(
sum(squeeze(COUNTS[, , pop1]) + prior), c(maxnoalle, ncol(prior))
d[, , pop1] <- (squeeze(globals$COUNTS[, , pop1]) + prior) / repmat(
sum(squeeze(globals$COUNTS[, , pop1]) + prior), c(maxnoalle, ncol(prior))
)
}
pointer <- 1

View file

@ -1,11 +1,13 @@
COUNTS <- array(0, dim = c(100, 100, 100))
SUMCOUNTS <- array(0, dim = c(100, 100))
PARTITION <- array(1, dim = 100)
POP_LOGML <- array(1, dim = 100)
LOGDIFF <- array(1, dim = c(100, 100))
# If handling globas break, try other ideas from
# https://stackoverflow.com/a/65252740/1169233
globals <- new.env(parent = emptyenv())
utils::globalVariables(
c("PARTITION", "COUNTS", "SUMCOUNTS", "LOGDIFF", "POP_LOGML", "GAMMA_LN")
)
assign("COUNTS", array(0, dim = c(0, 0, 0)), envir = globals)
assign("SUMCOUNTS", array(0, dim = c(0, 0)), envir = globals)
assign("PARTITION", array(1, dim = 0), envir = globals)
assign("POP_LOGML", array(1, dim = 0), envir = globals)
assign("LOGDIFF", array(1, dim = c(0, 0)), envir = globals)
assign("CLOQCOUNTS", array(0, dim = c(0, 0)), envir = globals)
assign("SEPCOUNTS", array(0, dim = c(0, 0)), envir = globals)
assign("GAMMA_LN", array(0, dim = c(0, 0)), envir = globals)
# If handling globas break, try other ideas from
# https://stackoverflow.com/a/65252740/1169233 and
# https://stackoverflow.com/questions/12598242/

View file

@ -24,7 +24,7 @@
#' greedyMix(data, "baps")
#' } # TEMP: unwrap once #24 is resolved
greedyMix <- function(
data, format = gsub("^.*\\.", "", data), partitionCompare = NULL, npops = 1L,
data, format = gsub("^.*\\.", "", data), partitionCompare = NULL, npops = 3L,
counts = NULL, sumcounts = NULL, max_iter = 100L, alleleCodes = NULL,
inp = NULL, popnames = NULL, fixedK = FALSE, verbose = FALSE
) {

View file

@ -89,11 +89,11 @@ greedyPopMix <- function(data, format, partitionCompare = NULL, verbose = TRUE
rows <- popnames_rowsFromInd$rows
rm(popnames_rowsFromInd)
}
groupPartition <- PARTITION
groupPartition <- globals$PARTITION
fiksaaPartitioYksiloTasolle(rows, rowsFromInd)
c$PARTITION <- PARTITION
c$COUNTS <- COUNTS
c$SUMCOUNTS <- SUMCOUNTS
c$PARTITION <- globals$PARTITION
c$COUNTS <- globals$COUNTS
c$SUMCOUNTS <- globals$SUMCOUNTS
c$alleleCodes <- alleleCodes
c$adjprior <- adjprior
c$rowsFromInd <- rowsFromInd

View file

@ -4,7 +4,6 @@ indMix <- function(c, npops, counts = NULL, sumcounts = NULL, max_iter = 100L, d
# Input npops is not used if called by greedyMix or greedyPopMix.
logml <- 1
clearGlobalVars()
noalle <- c$noalle
rows <- c$rows
@ -94,16 +93,16 @@ indMix <- function(c, npops, counts = NULL, sumcounts = NULL, max_iter = 100L, d
counts <- sumcounts_counts_logml$counts
logml <- sumcounts_counts_logml$logml
PARTITION <- zeros(ninds, 1)
assign("PARTITION", zeros(ninds, 1), globals)
for (i in seq_len(ninds)) {
apu <- rows[i]
PARTITION[i] <- initialPartition[apu[1]]
globals$PARTITION[i] <- initialPartition[apu[1]]
}
COUNTS <- counts
SUMCOUNTS <- sumcounts
POP_LOGML <- computePopulationLogml(seq_len(npops), adjprior, priorTerm)
LOGDIFF <- repmat(-Inf, c(ninds, npops))
assign("COUNTS", counts, globals)
assign("SUMCOUNTS", sumcounts, globals)
assign("POP_LOGML", computePopulationLogml(seq_len(npops), adjprior, priorTerm), globals)
assign("LOGDIFF", matrix(-Inf, nrow = ninds, ncol = npops), globals)
# PARHAAN MIXTURE-PARTITION ETSIMINEN
nRoundTypes <- 7
@ -147,10 +146,10 @@ indMix <- function(c, npops, counts = NULL, sumcounts = NULL, max_iter = 100L, d
muutosNyt <- 0
for (ind in inds) {
i1 <- PARTITION[ind]
i1 <- globals$PARTITION[ind]
muutokset_diffInCounts <- greedyMix_muutokset$new()
muutokset_diffInCounts <- muutokset_diffInCounts$laskeMuutokset(
ind, rows, data, adjprior, priorTerm
ind, rows, data, adjprior, priorTerm, npops
)
muutokset <- muutokset_diffInCounts$muutokset
diffInCounts <- muutokset_diffInCounts$diffInCounts
@ -196,7 +195,9 @@ indMix <- function(c, npops, counts = NULL, sumcounts = NULL, max_iter = 100L, d
} else if (round == 2) { # Populaation yhdist<73>minen toiseen.
maxMuutos <- 0
for (pop in seq_len(npops)) {
muutokset_diffInCounts <- greedyMix_muutokset$new()
muutokset_diffInCounts <- greedyMix_muutokset$new
# FIXME: wrong input
browser() # TEMP. Tip: browserText()
muutokset_diffInCounts <- muutokset_diffInCounts$laskeMuutokset2(
pop, rows, data, adjprior, priorTerm
)
@ -278,7 +279,7 @@ indMix <- function(c, npops, counts = NULL, sumcounts = NULL, max_iter = 100L, d
diffInCounts <- computeDiffInCounts(
t(rivit), size(COUNTS, 1), size(COUNTS, 2), data
)
i1 <- PARTITION(muuttuvat[1])
i1 <- PARTITION[muuttuvat[1]]
updateGlobalVariables3(
muuttuvat, diffInCounts, adjprior, priorTerm, i2
)
@ -513,7 +514,7 @@ indMix <- function(c, npops, counts = NULL, sumcounts = NULL, max_iter = 100L, d
}
if (muutoksia == 0) {
if (vaihe <= 4) {
vaihe <= vaihe + 1
vaihe <- vaihe + 1
} else if (vaihe == 5) {
ready <- 1
}

View file

@ -6,5 +6,5 @@
#' @importFrom stats sd
#' @author Waldir Leoncio
isGlobalEmpty <- function(g) {
return(sum(g) == 0 & sd(g) == 0)
return(sum(g) == 0 && is.na(sd(g)))
}

View file

@ -291,10 +291,10 @@ admix1_muutokset <- R6Class(
#' @param omaFreqs own Freqs?
#' @param logml log maximum likelihood
laskeMuutokset4 = function(osuus, osuusTaulu, omaFreqs, logml) {
if (isGlobalEmpty(COUNTS)) {
if (isGlobalEmpty(globals$COUNTS)) {
npops <- 1
} else {
npops <- ifelse(is.na(dim(COUNTS)[3]), 1, dim(COUNTS)[3])
npops <- ifelse(is.na(dim(globals$COUNTS)[3]), 1, dim(globals$COUNTS)[3])
}
notEmpty <- which(osuusTaulu > 0.005)
muutokset <- zeros(npops)
@ -341,12 +341,11 @@ greedyMix_muutokset <- R6Class(
#' @param data data
#' @param adjprior adjprior
#' @param priorTerm priorTerm
laskeMuutokset = function(ind, globalRows, data, adjprior, priorTerm) {
npops <- size(COUNTS, 3)
muutokset <- LOGDIFF[ind, ]
laskeMuutokset = function(ind, globalRows, data, adjprior, priorTerm, npops) {
muutokset <- globals$LOGDIFF[ind, ]
i1 <- PARTITION[ind]
i1_logml <- POP_LOGML[i1]
i1 <- globals$PARTITION[ind]
i1_logml <- globals$POP_LOGML[i1]
muutokset[i1] <- 0
if (is.null(dim(globalRows))) {
@ -354,30 +353,31 @@ greedyMix_muutokset <- R6Class(
} else {
rows <- globalRows[ind, 1]:globalRows[ind, 2]
}
diffInCounts <- computeDiffInCounts(
rows, size(COUNTS, 1), size(COUNTS, 2), data
rows, size(globals$COUNTS, 1), size(globals$COUNTS, 2), data
)
diffInSumCounts <- colSums(diffInCounts)
COUNTS[, , i1] <- COUNTS[, , i1] - diffInCounts
SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] - diffInSumCounts
globals$COUNTS[, , i1] <- globals$COUNTS[, , i1] - diffInCounts
globals$SUMCOUNTS[i1, ] <- globals$SUMCOUNTS[i1, ] - diffInSumCounts
new_i1_logml <- computePopulationLogml(i1, adjprior, priorTerm)
COUNTS[, , i1] <- COUNTS[, , i1] + diffInCounts
SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] + diffInSumCounts
globals$COUNTS[, , i1] <- globals$COUNTS[, , i1] + diffInCounts
globals$SUMCOUNTS[i1, ] <- globals$SUMCOUNTS[i1, ] + diffInSumCounts
i2 <- matlab2r::find(muutokset == -Inf) # Etsit<69><74>n populaatiot jotka muuttuneet viime kerran j<>lkeen. (Searching for populations that have changed since the last time)
i2 <- setdiff(i2, i1)
i2_logml <- POP_LOGML[i2]
i2_logml <- globals$POP_LOGML[i2]
ni2 <- length(i2)
COUNTS[, , i2] <- COUNTS[, , i2] + repmat(diffInCounts, c(1, 1, ni2))
SUMCOUNTS[i2, ] <- SUMCOUNTS[i2, ] + repmat(diffInSumCounts, c(ni2, 1))
globals$COUNTS[, , i2] <- globals$COUNTS[, , i2] + repmat(diffInCounts, c(1, 1, ni2))
globals$SUMCOUNTS[i2, ] <- globals$SUMCOUNTS[i2, ] + repmat(diffInSumCounts, c(ni2, 1))
new_i2_logml <- computePopulationLogml(i2, adjprior, priorTerm)
COUNTS[, , i2] <- COUNTS[, , i2] - repmat(diffInCounts, c(1, 1, ni2))
SUMCOUNTS[i2, ] <- SUMCOUNTS[i2, ] - repmat(diffInSumCounts, c(ni2, 1))
globals$COUNTS[, , i2] <- globals$COUNTS[, , i2] - repmat(diffInCounts, c(1, 1, ni2))
globals$SUMCOUNTS[i2, ] <- globals$SUMCOUNTS[i2, ] - repmat(diffInSumCounts, c(ni2, 1))
muutokset[i2] <- new_i1_logml - i1_logml + new_i2_logml - i2_logml
LOGDIFF[ind, ] <- muutokset
muutokset[i2] <- new_i1_logml[, ] - i1_logml + new_i2_logml[, ] - i2_logml
globals$LOGDIFF[ind, ] <- muutokset
return(list(muutokset = muutokset, diffInCounts = diffInCounts))
},
#' @param i1 i1
@ -390,45 +390,45 @@ greedyMix_muutokset <- R6Class(
# % muutos logml:ss<73>, mik<69>li korin i1 kaikki yksil<69>t siirret<65><74>n
# % koriin i.
npops <- size(COUNTS, 3)
npops <- size(globals$COUNTS, 3)
muutokset <- zeros(npops, 1)
i1_logml <- POP_LOGML[i1]
i1_logml <- globals$POP_LOGML[i1]
inds <- matlab2r::find(PARTITION == i1)
inds <- matlab2r::find(globals$PARTITION == i1)
ninds <- length(inds)
if (ninds == 0) {
diffInCounts <- zeros(size(COUNTS, 1), size(COUNTS, 2))
diffInCounts <- zeros(size(globals$COUNTS, 1), size(globals$COUNTS, 2))
return()
}
rows <- list()
for (i in 1:ninds) {
ind <- inds(i)
lisa <- globalRows(ind, 1):globalRows(ind, 2)
ind <- inds[i]
lisa <- globalRows[ind, 1]:globalRows[ind, 2]
rows <- c(rows, t(lisa))
}
diffInCounts <- computeDiffInCounts(
t(rows), size(COUNTS, 1), size(COUNTS, 2), data
t(rows), size(globals$COUNTS, 1), size(globals$COUNTS, 2), data
)
diffInSumCounts <- sum(diffInCounts)
COUNTS[, , i1] <- COUNTS[, , i1] - diffInCounts
SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] - diffInSumCounts
globals$COUNTS[, , i1] <- globals$COUNTS[, , i1] - diffInCounts
globals$SUMCOUNTS[i1, ] <- globals$SUMCOUNTS[i1, ] - diffInSumCounts
new_i1_logml <- computePopulationLogml(i1, adjprior, priorTerm)
COUNTS[, , i1] <- COUNTS[, , i1] + diffInCounts
SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] + diffInSumCounts
globals$COUNTS[, , i1] <- globals$COUNTS[, , i1] + diffInCounts
globals$SUMCOUNTS[i1, ] <- globals$SUMCOUNTS[i1, ] + diffInSumCounts
i2 <- c(1:i1 - 1, i1 + 1:npops)
i2_logml <- POP_LOGML[i2]
i2_logml <- globals$POP_LOGML[i2]
COUNTS[, , i2] <- COUNTS[, , i2] + repmat(diffInCounts, c(1, 1, npops - 1))
SUMCOUNTS[i2, ] <- SUMCOUNTS[i2, ] + repmat(diffInSumCounts, c(npops - 1, 1))
globals$COUNTS[, , i2] <- globals$COUNTS[, , i2] + repmat(diffInCounts, c(1, 1, npops - 1))
globals$SUMCOUNTS[i2, ] <- globals$SUMCOUNTS[i2, ] + repmat(diffInSumCounts, c(npops - 1, 1))
new_i2_logml <- computePopulationLogml(i2, adjprior, priorTerm)
COUNTS[, , i2] <- COUNTS[, , i2] - repmat(diffInCounts, c(1, 1, npops - 1))
SUMCOUNTS[i2, ] <- SUMCOUNTS[i2, ] - repmat(diffInSumCounts, c(npops - 1, 1))
globals$COUNTS[, , i2] <- globals$COUNTS[, , i2] - repmat(diffInCounts, c(1, 1, npops - 1))
globals$SUMCOUNTS[i2, ] <- globals$SUMCOUNTS[i2, ] - repmat(diffInSumCounts, c(npops - 1, 1))
muutokset[i2] <- new_i1_logml - i1_logml + new_i2_logml - i2_logml
return(list(muutokset = muutokset, diffInCounts = diffInCounts))

View file

@ -9,7 +9,7 @@ poistaLiianPienet <- function(npops, rowsFromInd, alaraja) {
popSize <- zeros(1, npops)
if (npops > 0) {
for (i in 1:npops) {
popSize[i] <- length(which(PARTITION == i))
popSize[i] <- length(which(globals$PARTITION == i))
}
}
miniPops <- which(popSize < alaraja)
@ -20,19 +20,19 @@ poistaLiianPienet <- function(npops, rowsFromInd, alaraja) {
outliers <- matrix(NA, 0, 0)
for (pop in miniPops) {
inds <- which(PARTITION == pop)
inds <- which(globals$PARTITION == pop)
cat("Removed individuals: ")
cat(as.character(inds))
outliers <- matrix(c(outliers, inds), ncol = 1)
}
ninds <- length(PARTITION)
PARTITION[outliers] <- 0
korit <- unique(PARTITION(which(PARTITION > 0)))
ninds <- length(globals$PARTITION)
globals$PARTITION[outliers] <- 0
korit <- unique(globals$PARTITION(which(globals$PARTITION > 0)))
for (n in 1:length(korit)) {
kori <- korit[n]
yksilot <- which(PARTITION == kori)
PARTITION[yksilot] == n
yksilot <- which(globals$PARTITION == kori)
globals$PARTITION[yksilot] == n
}
# TODO: add COUNTS, SUMCOUNTS and PARTITION to return or use <-

View file

@ -3,14 +3,14 @@
#' Add each allele to each locus in each population by j 1 / noalle(j). The Dirichlet distributions corresponding to the counts thus obtained simulate values for the allele frequencies of the populations.
#' @param noalle noalle
simulateAllFreqs <- function(noalle) {
if (isGlobalEmpty(COUNTS)) {
if (isGlobalEmpty(globals$COUNTS)) {
max_noalle <- 0
nloci <- 0
npops <- 1
} else {
max_noalle <- size(COUNTS, 1)
nloci <- size(COUNTS, 2)
npops <- size(COUNTS, 3)
max_noalle <- size(globals$COUNTS, 1)
nloci <- size(globals$COUNTS, 2)
npops <- size(globals$COUNTS, 3)
}
prioriAlleelit <- zeros(max_noalle, nloci)
@ -21,9 +21,9 @@ simulateAllFreqs <- function(noalle) {
}
prioriAlleelit <- repmat(prioriAlleelit, matrix(c(1, 1, npops), 1))
counts <- ifelse(
test = isGlobalEmpty(COUNTS),
test = isGlobalEmpty(globals$COUNTS),
yes = prioriAlleelit,
no = COUNTS + prioriAlleelit
no = globals$COUNTS + prioriAlleelit
)
allfreqs <- zeros(size(counts))

View file

@ -4,10 +4,10 @@
#' @param osuus percentage?
#' @param indeksi index
suoritaMuutos <- function(osuusTaulu, osuus, indeksi) {
if (isGlobalEmpty(COUNTS)) {
if (isGlobalEmpty(globals$COUNTS)) {
npops <- 1
} else {
npops <- ifelse(is.na(dim(COUNTS)[3]), 1, dim(COUNTS)[3])
npops <- ifelse(is.na(dim(globals$COUNTS)[3]), 1, dim(globals$COUNTS)[3])
}
i1 <- indeksi %% npops

View file

@ -1,21 +1,21 @@
updateGlobalVariables <- function(ind, i2, diffInCounts, adjprior, priorTerm) {
# % Suorittaa globaalien muuttujien muutokset, kun yksil<69> ind
# % on siirret<65><74>n koriin i2.
i1 <- PARTITION[ind]
PARTITION[ind] <- i2
i1 <- globals$PARTITION[ind]
globals$PARTITION[ind] <- i2
COUNTS[, , i1] <- COUNTS[, , i1] - diffInCounts
COUNTS[, , i2] <- COUNTS[, , i2] + diffInCounts
SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] - colSums(diffInCounts)
SUMCOUNTS[i2, ] <- SUMCOUNTS[i2, ] + colSums(diffInCounts)
globals$COUNTS[, , i1] <- globals$COUNTS[, , i1] - diffInCounts
globals$COUNTS[, , i2] <- globals$COUNTS[, , i2] + diffInCounts
globals$SUMCOUNTS[i1, ] <- globals$SUMCOUNTS[i1, ] - colSums(diffInCounts)
globals$SUMCOUNTS[i2, ] <- globals$SUMCOUNTS[i2, ] + colSums(diffInCounts)
POP_LOGML[c(i1, i2)] <- computePopulationLogml(
globals$POP_LOGML[c(i1, i2)] <- computePopulationLogml(
c(i1, i2), adjprior, priorTerm
)
LOGDIFF[, c(i1, i2)] <- -Inf
inx <- c(matlab2r::find(PARTITION == i1), matlab2r::find(PARTITION == i2))
LOGDIFF[inx, ] <- -Inf
globals$LOGDIFF[, c(i1, i2)] <- -Inf
inx <- c(matlab2r::find(globals$PARTITION == i1), matlab2r::find(globals$PARTITION == i2))
globals$LOGDIFF[inx, ] <- -Inf
}
updateGlobalVariables2 <- function(i1, i2, diffInCounts, adjprior, priorTerm) {

View file

@ -1,5 +1,5 @@
viewPartition <- function(osuudet, popnames) {
npops <- size(COUNTS, 3)
npops <- size(globals$COUNTS, 3)
nind <- size(osuudet, 1)
# TODO: translate if necessary. Remove if this function won't be used

View file

@ -16,7 +16,7 @@ writeMixtureInfo <- function(
partitionSummary, popnames, fixedK, verbose
) {
ninds <- size(data, 1) / rowsFromInd
npops <- size(COUNTS, 3)
npops <- size(globals$COUNTS, 3)
# Check that the names refer to individuals
# Tarkistetaan ett?nimet viittaavat yksil<69>ihin
@ -64,13 +64,13 @@ writeMixtureInfo <- function(
)
}
cluster_count <- length(unique(PARTITION))
cluster_count <- length(unique(globals$PARTITION))
if (verbose) cat("Best Partition: ")
if (fid != -1) {
append(fid, c("Best Partition: ", "\n"))
}
for (m in 1:cluster_count) {
indsInM <- matlab2r::find(PARTITION == m)
indsInM <- matlab2r::find(globals$PARTITION == m)
length_of_beginning <- 11 + floor(log10(m))
cluster_size <- length(indsInM)
@ -158,7 +158,7 @@ writeMixtureInfo <- function(
}
# %ninds = size(data,1)/rowsFromInd;
changesInLogml <- t(LOGDIFF)
changesInLogml <- t(globals$LOGDIFF)
for (ind in 1:ninds) {
muutokset <- changesInLogml[, ind]
if (names) {
@ -183,9 +183,9 @@ writeMixtureInfo <- function(
append(fid, " KL-divergence matrix in PHYLIP format: ")
}
COUNTS <- COUNTS[seq_len(nrow(adjprior)), seq_len(ncol(adjprior)), , drop = FALSE]
maxnoalle <- size(COUNTS, 1)
nloci <- size(COUNTS, 2)
globals$COUNTS <- globals$COUNTS[seq_len(nrow(adjprior)), seq_len(ncol(adjprior)), , drop = FALSE]
maxnoalle <- size(globals$COUNTS, 1)
nloci <- size(globals$COUNTS, 2)
d <- zeros(maxnoalle, nloci, npops)
prior <- adjprior
prior[matlab2r::find(prior == 1)] <- 0
@ -195,7 +195,7 @@ writeMixtureInfo <- function(
prior[1, nollia] <- 1
for (pop1 in 1:npops) {
squeezed_COUNTS_prior <- squeeze(COUNTS[, , pop1]) + prior
squeezed_COUNTS_prior <- squeeze(globals$COUNTS[, , pop1]) + prior
d[, , pop1] <- squeezed_COUNTS_prior / sum(squeezed_COUNTS_prior)
}
ekarivi <- as.character(npops)

View file

@ -14,7 +14,7 @@ writeMixtureInfoPop <- function(logml, rows, data, adjprior, priorTerm,
outPutFile, inputFile, partitionSummary,
popnames, fixedK) {
ninds <- size(rows, 1)
npops <- size(COUNTS, 3)
npops <- size(globals$COUNTS, 3)
names <- size(popnames, 1) == ninds # Tarkistetaan ett?nimet viittaavat yksilöihin
changesInLogml <- vector()
if (!missing(outPutFile)) {
@ -32,13 +32,13 @@ writeMixtureInfoPop <- function(logml, rows, data, adjprior, priorTerm,
append(fid, c("Number of clusters in optimal partition:", ownNum2Str(npops), "\n"))
append(fid, c("Log(marginal likelihood) of optimal partition:", ownNum2Str(logml), "\n\n"))
}
cluster_count <- length(unique(PARTITION))
cluster_count <- length(unique(globals$PARTITION))
cat("Best Partition:\n")
if (exists("fid")) {
append(fid, c("Best partition:\n"))
}
for (m in 1:cluster_count) {
indsInM <- find(PARTITION == m)
indsInM <- find(globals$PARTITION == m)
length_of_beginning <- 11 + floor(log10(m))
cluster_size <- length(indsInM)
if (names) {
@ -101,7 +101,7 @@ writeMixtureInfoPop <- function(logml, rows, data, adjprior, priorTerm,
if (exists("fid")) {
append(fid, c(ekarivi, "\n"))
}
changesInLogml <- t(LOGDIFF)
changesInLogml <- t(globals$LOGDIFF)
for (ind in 1:ninds) {
muutokset <- changesInLogml[, ind]
if (names) {
@ -126,16 +126,16 @@ writeMixtureInfoPop <- function(logml, rows, data, adjprior, priorTerm,
append(fid, " \n")
append(fid, "KL - divergence matrix in PHYLIP format:\n")
}
maxnoalle <- size(COUNTS, 1)
nloci <- size(COUNTS, 2)
maxnoalle <- size(globals$COUNTS, 1)
nloci <- size(globals$COUNTS, 2)
d <- zeros(maxnoalle, nloci, npops)
prior <- adjprior
prior[find[prior == 1]] <- 0
nollia <- find(all(prior == 0)) # Lokukset, joissa oli havaittu vain yht?alleelia.
prior[1, nollia] <- 1
for (pop1 in 1:npops) {
d[, , pop1] <- (squeeze(COUNTS[, , pop1]) + prior) /
repmat(sum(squeeze(COUNTS[, , pop1]) + prior), c(maxnoalle, 1))
d[, , pop1] <- (squeeze(globals$COUNTS[, , pop1]) + prior) /
repmat(sum(squeeze(globals$COUNTS[, , pop1]) + prior), c(maxnoalle, 1))
}
ekarivi <- as.character(npops)
cat(ekarivi, "\n")

View file

@ -0,0 +1,5 @@
88 102 56 80 100 90 118 -9 88 104 1
80 102 54 82 102 92 116 90 86 104 2
88 104 56 84 102 -9 120 90 88 100 3
86 102 56 80 102 90 116 90 86 100 4
88 102 -9 80 102 90 116 92 86 100 5

View file

@ -8,7 +8,7 @@ greedyMix(
data,
format = gsub("^.*\\\\.", "", data),
partitionCompare = NULL,
npops = 1L,
npops = 3L,
counts = NULL,
sumcounts = NULL,
max_iter = 100L,

View file

@ -28,7 +28,14 @@ logml:ss<73> siirrett<74>ess<73> yksil<69>it<69> toisiin populaatioihin.
\if{latex}{\out{\hypertarget{method-greedyMix_muutokset-laskeMuutokset}{}}}
\subsection{Method \code{laskeMuutokset()}}{
\subsection{Usage}{
\if{html}{\out{<div class="r">}}\preformatted{greedyMix_muutokset$laskeMuutokset(ind, globalRows, data, adjprior, priorTerm)}\if{html}{\out{</div>}}
\if{html}{\out{<div class="r">}}\preformatted{greedyMix_muutokset$laskeMuutokset(
ind,
globalRows,
data,
adjprior,
priorTerm,
npops
)}\if{html}{\out{</div>}}
}
\subsection{Arguments}{

View file

@ -5,7 +5,7 @@ path_inst <- system.file("extdata", "", package = "rBAPS")
# Reading datasets -------------------------------------------------------------
baps_diploid <- read.delim(
file = file.path(path_inst, "BAPS_format_clustering_diploid.txt"),
file = file.path(path_inst, "BAPS_clustering_diploid.txt"),
sep = " ",
header = FALSE
)
@ -46,6 +46,11 @@ raw_bam <- importFile(
data = file.path(path_inst, "bam_example.bam"),
format = "BAM",
)
# TODO: uncomment for testing #24
# raw_baps <- importFile(
# data = file.path(path_inst, "FASTA_clustering_haploid.fasta"),
# format = "FASTA"
# )
test_that("Files are imported correctly", {
expect_equal(dim(raw_fasta), c(5, 99))

View file

@ -11,14 +11,33 @@ test_that("Auxiliary functions work properly", {
expect_equal(findOutRowsFromInd(x, y, "Diploid"), z)
# Testing getPopDistancesByKL ------------------------------------------------
set.seed(5562143)
globals$COUNTS <- array(rpois(10 ^ 3, lambda = 10), dim = c(10, 10, 10))
x2 <- matrix(seq(4, 14, 2), 3)
expect_equal(
getPopDistancesByKL(x2),
list(
Z = matrix(c(c(1, 101:198), 2:100, rep(0, 99)), nrow = 99, ncol = 3),
distances = as.matrix(rep(0, 4950))
)
Z <- matrix(
c(
5, 8, 0.004410383, 11, 10, 0.009386554, 2, 7, 0.010046641,
3, 12, 0.011518562, 1, 4, 0.013682110, 14, 6, 0.027109229,
15, 13, 0.032833011, 16, 9, 0.045394132, 17, 18, 0.086729590
),
nrow = 9, ncol = 3, byrow = TRUE
)
distances <- matrix(
c(
0.015767099, 0.020110943, 0.013682110, 0.010214691, 0.050375802,
0.019692440, 0.010294099, 0.053740306, 0.028410233, 0.034496635,
0.025356695, 0.015267764, 0.045409885, 0.010046641, 0.012797950,
0.065436956, 0.026308651, 0.019896390, 0.006193117, 0.011735103,
0.036077638, 0.011518562, 0.029233839, 0.010798344, 0.013262098,
0.034671122, 0.032833011, 0.024732472, 0.033976345, 0.040243817,
0.020374790, 0.024770297, 0.004410383, 0.024329842, 0.009386554,
0.052217370, 0.027109229, 0.038127778, 0.016888110, 0.024061302,
0.086729590, 0.038787638, 0.045394132, 0.005206076, 0.042606915
),
nrow = 45, ncol = 1,
byrow = TRUE
)
expect_equal(getPopDistancesByKL(x2), list(Z = Z, distances = distances))
# Testing handlePopData ------------------------------------------------------
x3 <- matrix(c(9, 4, 1, 8, 5, 2, 1, 2, 3), 3)