diff --git a/DESCRIPTION b/DESCRIPTION index 7ed5904..a7bdb2e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: rBAPS Title: Bayesian Analysis of Population Structure -Version: 0.0.0.9025 +Version: 0.0.0.9026 Date: 2020-11-09 Authors@R: c( diff --git a/R/addToSummary.R b/R/addToSummary.R index b1ee36e..a524b4b 100644 --- a/R/addToSummary.R +++ b/R/addToSummary.R @@ -7,7 +7,7 @@ addToSummary <- function(logml, partitionSummary, worstIndex) { apu <- matlab2r::find(abs(partitionSummary[, 2] - logml) < 1e-5) if (isempty(apu)) { # Nyt l�ydetty partitio ei ole viel� kirjattuna summaryyn. - npops <- length(unique(PARTITION)) + npops <- length(unique(globals$PARTITION)) partitionSummary[worstIndex, 1] <- npops partitionSummary[worstIndex, 2] <- logml added <- 1 diff --git a/R/checkLogml.R b/R/checkLogml.R index ff7435c..b52b618 100644 --- a/R/checkLogml.R +++ b/R/checkLogml.R @@ -5,12 +5,12 @@ checkLogml <- function(priorTerm, adjprior, cliques, separators) { # global SEPCOUNTS # global PARTITION - npops <- length(unique(PARTITION)) + npops <- length(unique(globals$PARTITION)) cliqcounts <- computeCounts(cliques, separators, npops)$cliqcounts sepcounts <- computeCounts(cliques, separators, npops)$sepcounts - CLIQCOUNTS <- cliqcounts - SEPCOUNTS <- sepcounts + globals$CLIQCOUNTS <- cliqcounts + globals$SEPCOUNTS <- sepcounts logml <- computeLogml(adjprior, priorTerm)$logml spatialPrior <- computeLogml(adjprior, priorTerm)$spatialPrior diff --git a/R/computeAllFreqs2.R b/R/computeAllFreqs2.R index bb89bd2..8af71a7 100644 --- a/R/computeAllFreqs2.R +++ b/R/computeAllFreqs2.R @@ -3,12 +3,12 @@ #' j 1/noalle(j) verran. #' @param noalle noalle computeAllFreqs2 <- function(noalle) { - COUNTS <- ifelse(isGlobalEmpty(COUNTS), vector(), COUNTS) - SUMCOUNTS <- ifelse(isGlobalEmpty(SUMCOUNTS), vector(), COUNTS) - max_noalle <- size(COUNTS, 1) - nloci <- size(COUNTS, 2) - npops <- size(COUNTS, 3) - sumCounts <- SUMCOUNTS + ones(size(SUMCOUNTS)) + globals$COUNTS <- ifelse(isGlobalEmpty(globals$COUNTS), vector(), globals$COUNTS) + globals$COUNTS <- ifelse(isGlobalEmpty(globals$COUNTS), vector(), globals$COUNTS) + max_noalle <- size(globals$COUNTS, 1) + nloci <- size(globals$COUNTS, 2) + npops <- size(globals$COUNTS, 3) + sumCounts <- globals$COUNTS + ones(size(globals$COUNTS)) sumCounts <- reshape(t(sumCounts), c(1, nloci, npops)) sumCounts <- repmat(sumCounts, c(max_noalle, 1, 1)) @@ -20,9 +20,9 @@ computeAllFreqs2 <- function(noalle) { } prioriAlleelit <- repmat(prioriAlleelit, c(1, 1, npops)) counts <- ifelse( - test = isGlobalEmpty(COUNTS), + test = isGlobalEmpty(globals$COUNTS), yes = prioriAlleelit, - no = COUNTS + prioriAlleelit + no = globals$COUNTS + prioriAlleelit ) allFreqs <- counts / drop(sumCounts) return(allFreqs) diff --git a/R/computeDiffInCounts.R b/R/computeDiffInCounts.R index c8abbfa..10a3beb 100644 --- a/R/computeDiffInCounts.R +++ b/R/computeDiffInCounts.R @@ -4,20 +4,14 @@ computeDiffInCounts <- function(rows, max_noalle, nloci, data) { # % riveill� rows. rows pit�� olla vaakavektori. diffInCounts <- zeros(max_noalle, nloci) - for (i in seq_len(nrow(data))) { + for (i in rows) { # yep, just one iteration row <- data[i, ] notEmpty <- as.matrix(matlab2r::find(row >= 0)) if (length(notEmpty) > 0) { - diffInCounts[row(notEmpty) + (notEmpty - 1) * max_noalle] <- - diffInCounts[row(notEmpty) + (notEmpty - 1) * max_noalle] + 1 + element <- row[notEmpty] + (notEmpty - 1) * max_noalle + diffInCounts[element] <- diffInCounts[element] + 1 } } - diffInCounts <- matrix( - data = diffInCounts[!is.na(diffInCounts)], - nrow = max_noalle, - ncol = nloci, - byrow = TRUE - ) return(diffInCounts) } diff --git a/R/computeLogml.R b/R/computeLogml.R index a6dc228..9662e3e 100644 --- a/R/computeLogml.R +++ b/R/computeLogml.R @@ -14,14 +14,14 @@ computeLogml <- function(counts, sumcounts, noalle, data, rowsFromInd) { logml <- sum( sum( sum( - GAMMA_LN[ + globals$GAMMA_LN[ counts + 1 + repmat(rowsInG * (adjnoalle - 1), c(1, 1, npops)) ] ) ) ) - - npops * sum(sum(GAMMA_LN[1, adjnoalle])) - - sum(sum(GAMMA_LN[sumcounts + 1, 1])) + npops * sum(sum(globals$GAMMA_LN[1, adjnoalle])) - + sum(sum(globals$GAMMA_LN[sumcounts + 1, 1])) return(logml) } diff --git a/R/computePersonalAllFreqs.R b/R/computePersonalAllFreqs.R index 142bffe..f01576c 100644 --- a/R/computePersonalAllFreqs.R +++ b/R/computePersonalAllFreqs.R @@ -8,11 +8,11 @@ #' @param allFreqs allFreqs #' @param rowsFromInd rowsFromInd computePersonalAllFreqs <- function(ind, data, allFreqs, rowsFromInd) { - if (isGlobalEmpty(COUNTS)) { + if (isGlobalEmpty(globals$COUNTS)) { nloci <- npops <- 1 } else { - nloci <- ifelse(is.na(dim(COUNTS)[2]), 1, dim(COUNTS)[2]) - npops <- ifelse(is.na(dim(COUNTS)[3]), 1, dim(COUNTS)[3]) + nloci <- ifelse(is.na(dim(globals$COUNTS)[2]), 1, dim(globals$COUNTS)[2]) + npops <- ifelse(is.na(dim(globals$COUNTS)[3]), 1, dim(globals$COUNTS)[3]) } rows <- as.matrix(t(data))[computeRows(rowsFromInd, ind, 1), , drop = FALSE] diff --git a/R/computePopulationLogml.R b/R/computePopulationLogml.R index 9114a87..bb3fc6d 100644 --- a/R/computePopulationLogml.R +++ b/R/computePopulationLogml.R @@ -1,43 +1,21 @@ computePopulationLogml <- function(pops, adjprior, priorTerm) { # Palauttaa length(pops)*1 taulukon, jossa on laskettu korikohtaiset - # ======================================================== # - # Limiting COUNTS size # - # ======================================================== # - if (!is.null(adjprior)) { - nr <- seq_len(nrow(adjprior)) - nc <- seq_len(ncol(adjprior)) - COUNTS <- COUNTS[nr, nc, pops, drop = FALSE] - } else { - COUNTS <- NA - } + nr <- seq_len(nrow(adjprior)) + nc <- seq_len(ncol(adjprior)) - x <- size(COUNTS, 1) - y <- size(COUNTS, 2) + x <- size(globals$COUNTS, 1) + y <- size(globals$COUNTS, 2) z <- length(pops) # ======================================================== # # Computation # # ======================================================== # - term1 <- NULL - if (!is.null(adjprior)) { - isarray <- length(dim(repmat(adjprior, c(1, 1, length(pops))))) > 2 - term1 <- squeeze( - sum( - sum( - reshape( - lgamma( - repmat(adjprior, c(1, 1, length(pops))) + COUNTS[nr, nc, pops, drop = !isarray] - ), - c(x, y, z) - ), - 1 - ), - 2 - ) - ) - } - if (is.null(priorTerm)) priorTerm <- 0 - popLogml <- term1 - sum(lgamma(1 + SUMCOUNTS[pops, ]), 2) - priorTerm - return(popLogml) + rep_adj <- repmat(adjprior, c(1, 1, z)) + gamma_rep_counts <- matlab2r::gammaln(rep_adj + globals$COUNTS[, , pops]) + gamma_sum_counts <- rowSums(matlab2r::gammaln(1 + globals$SUMCOUNTS[pops, , drop = FALSE])) + gamma_rep_counts_sum <- colSums(colSums(reshape(gamma_rep_counts, c(x, y, z)))) + gamma_rep_counts_reshaped <- squeeze(gamma_rep_counts_sum) + popLogml <- gamma_rep_counts_reshaped - gamma_sum_counts - priorTerm + return(popLogml[, , drop = FALSE]) } diff --git a/R/fiksaaPartitioYksiloTasolle.R b/R/fiksaaPartitioYksiloTasolle.R index be571bb..bd00ea6 100644 --- a/R/fiksaaPartitioYksiloTasolle.R +++ b/R/fiksaaPartitioYksiloTasolle.R @@ -10,7 +10,7 @@ fiksaaPartitioYksiloTasolle <- function(rows, rowsFromInd) { kaikkiRivit <- rows[ind, 1]:rows[ind, 2] for (riviNumero in seq(rowsFromInd, length(kaikkiRivit), rowsFromInd)) { rivi <- kaikkiRivit[riviNumero] - partitio2[rivi / rowsFromInd] <- PARTITION[ind] + partitio2[rivi / rowsFromInd] <- globals$PARTITION[ind] } } global_env <- as.environment(1L) diff --git a/R/findEmptyPop.R b/R/findEmptyPop.R index f9d8e48..f177346 100644 --- a/R/findEmptyPop.R +++ b/R/findEmptyPop.R @@ -1,7 +1,7 @@ findEmptyPop <- function(npops) { # % Palauttaa ensimm�isen tyhj�n populaation indeksin. Jos tyhji� # % populaatioita ei ole, palauttaa -1:n. - pops <- t(unique(PARTITION)) + pops <- t(unique(globals$PARTITION)) if (length(pops) == npops) { emptyPop <- -1 } else { diff --git a/R/getPopDistancesByKL.R b/R/getPopDistancesByKL.R index 21aae37..00e0881 100644 --- a/R/getPopDistancesByKL.R +++ b/R/getPopDistancesByKL.R @@ -1,10 +1,10 @@ getPopDistancesByKL <- function(adjprior) { # Laskee populaatioille etהisyydet # kהyttהen KL-divergenssi? - COUNTS <- COUNTS[seq_len(nrow(adjprior)), seq_len(ncol(adjprior)), ] - maxnoalle <- size(COUNTS, 1) - nloci <- size(COUNTS, 2) - npops <- size(COUNTS, 3) + globals$COUNTS <- globals$COUNTS[seq_len(nrow(adjprior)), seq_len(ncol(adjprior)), ] + maxnoalle <- size(globals$COUNTS, 1) + nloci <- size(globals$COUNTS, 2) + npops <- size(globals$COUNTS, 3) distances <- zeros(choose(npops, 2), 1) d <- zeros(maxnoalle, nloci, npops) @@ -16,8 +16,8 @@ getPopDistancesByKL <- function(adjprior) { prior[1, nollia] <- 1 for (pop1 in 1:npops) { - d[, , pop1] <- (squeeze(COUNTS[, , pop1]) + prior) / repmat( - sum(squeeze(COUNTS[, , pop1]) + prior), c(maxnoalle, ncol(prior)) + d[, , pop1] <- (squeeze(globals$COUNTS[, , pop1]) + prior) / repmat( + sum(squeeze(globals$COUNTS[, , pop1]) + prior), c(maxnoalle, ncol(prior)) ) } pointer <- 1 diff --git a/R/globals.R b/R/globals.R index 85a3580..592b448 100644 --- a/R/globals.R +++ b/R/globals.R @@ -1,11 +1,13 @@ -COUNTS <- array(0, dim = c(100, 100, 100)) -SUMCOUNTS <- array(0, dim = c(100, 100)) -PARTITION <- array(1, dim = 100) -POP_LOGML <- array(1, dim = 100) -LOGDIFF <- array(1, dim = c(100, 100)) -# If handling globas break, try other ideas from -# https://stackoverflow.com/a/65252740/1169233 +globals <- new.env(parent = emptyenv()) -utils::globalVariables( - c("PARTITION", "COUNTS", "SUMCOUNTS", "LOGDIFF", "POP_LOGML", "GAMMA_LN") -) +assign("COUNTS", array(0, dim = c(0, 0, 0)), envir = globals) +assign("SUMCOUNTS", array(0, dim = c(0, 0)), envir = globals) +assign("PARTITION", array(1, dim = 0), envir = globals) +assign("POP_LOGML", array(1, dim = 0), envir = globals) +assign("LOGDIFF", array(1, dim = c(0, 0)), envir = globals) +assign("CLOQCOUNTS", array(0, dim = c(0, 0)), envir = globals) +assign("SEPCOUNTS", array(0, dim = c(0, 0)), envir = globals) +assign("GAMMA_LN", array(0, dim = c(0, 0)), envir = globals) +# If handling globas break, try other ideas from +# https://stackoverflow.com/a/65252740/1169233 and +# https://stackoverflow.com/questions/12598242/ diff --git a/R/greedyMix.R b/R/greedyMix.R index 13e9a6d..3bff02b 100644 --- a/R/greedyMix.R +++ b/R/greedyMix.R @@ -24,7 +24,7 @@ #' greedyMix(data, "baps") #' } # TEMP: unwrap once #24 is resolved greedyMix <- function( - data, format = gsub("^.*\\.", "", data), partitionCompare = NULL, npops = 1L, + data, format = gsub("^.*\\.", "", data), partitionCompare = NULL, npops = 3L, counts = NULL, sumcounts = NULL, max_iter = 100L, alleleCodes = NULL, inp = NULL, popnames = NULL, fixedK = FALSE, verbose = FALSE ) { diff --git a/R/greedyPopMix.R b/R/greedyPopMix.R index d2114c7..6911956 100644 --- a/R/greedyPopMix.R +++ b/R/greedyPopMix.R @@ -89,11 +89,11 @@ greedyPopMix <- function(data, format, partitionCompare = NULL, verbose = TRUE rows <- popnames_rowsFromInd$rows rm(popnames_rowsFromInd) } - groupPartition <- PARTITION + groupPartition <- globals$PARTITION fiksaaPartitioYksiloTasolle(rows, rowsFromInd) - c$PARTITION <- PARTITION - c$COUNTS <- COUNTS - c$SUMCOUNTS <- SUMCOUNTS + c$PARTITION <- globals$PARTITION + c$COUNTS <- globals$COUNTS + c$SUMCOUNTS <- globals$SUMCOUNTS c$alleleCodes <- alleleCodes c$adjprior <- adjprior c$rowsFromInd <- rowsFromInd diff --git a/R/indMix.R b/R/indMix.R index f30a9e9..323c8d0 100644 --- a/R/indMix.R +++ b/R/indMix.R @@ -4,7 +4,6 @@ indMix <- function(c, npops, counts = NULL, sumcounts = NULL, max_iter = 100L, d # Input npops is not used if called by greedyMix or greedyPopMix. logml <- 1 - clearGlobalVars() noalle <- c$noalle rows <- c$rows @@ -94,16 +93,16 @@ indMix <- function(c, npops, counts = NULL, sumcounts = NULL, max_iter = 100L, d counts <- sumcounts_counts_logml$counts logml <- sumcounts_counts_logml$logml - PARTITION <- zeros(ninds, 1) + assign("PARTITION", zeros(ninds, 1), globals) for (i in seq_len(ninds)) { apu <- rows[i] - PARTITION[i] <- initialPartition[apu[1]] + globals$PARTITION[i] <- initialPartition[apu[1]] } - COUNTS <- counts - SUMCOUNTS <- sumcounts - POP_LOGML <- computePopulationLogml(seq_len(npops), adjprior, priorTerm) - LOGDIFF <- repmat(-Inf, c(ninds, npops)) + assign("COUNTS", counts, globals) + assign("SUMCOUNTS", sumcounts, globals) + assign("POP_LOGML", computePopulationLogml(seq_len(npops), adjprior, priorTerm), globals) + assign("LOGDIFF", matrix(-Inf, nrow = ninds, ncol = npops), globals) # PARHAAN MIXTURE-PARTITION ETSIMINEN nRoundTypes <- 7 @@ -147,10 +146,10 @@ indMix <- function(c, npops, counts = NULL, sumcounts = NULL, max_iter = 100L, d muutosNyt <- 0 for (ind in inds) { - i1 <- PARTITION[ind] + i1 <- globals$PARTITION[ind] muutokset_diffInCounts <- greedyMix_muutokset$new() muutokset_diffInCounts <- muutokset_diffInCounts$laskeMuutokset( - ind, rows, data, adjprior, priorTerm + ind, rows, data, adjprior, priorTerm, npops ) muutokset <- muutokset_diffInCounts$muutokset diffInCounts <- muutokset_diffInCounts$diffInCounts @@ -196,7 +195,9 @@ indMix <- function(c, npops, counts = NULL, sumcounts = NULL, max_iter = 100L, d } else if (round == 2) { # Populaation yhdist�minen toiseen. maxMuutos <- 0 for (pop in seq_len(npops)) { - muutokset_diffInCounts <- greedyMix_muutokset$new() + muutokset_diffInCounts <- greedyMix_muutokset$new + # FIXME: wrong input + browser() # TEMP. Tip: browserText() muutokset_diffInCounts <- muutokset_diffInCounts$laskeMuutokset2( pop, rows, data, adjprior, priorTerm ) @@ -278,7 +279,7 @@ indMix <- function(c, npops, counts = NULL, sumcounts = NULL, max_iter = 100L, d diffInCounts <- computeDiffInCounts( t(rivit), size(COUNTS, 1), size(COUNTS, 2), data ) - i1 <- PARTITION(muuttuvat[1]) + i1 <- PARTITION[muuttuvat[1]] updateGlobalVariables3( muuttuvat, diffInCounts, adjprior, priorTerm, i2 ) @@ -513,7 +514,7 @@ indMix <- function(c, npops, counts = NULL, sumcounts = NULL, max_iter = 100L, d } if (muutoksia == 0) { if (vaihe <= 4) { - vaihe <= vaihe + 1 + vaihe <- vaihe + 1 } else if (vaihe == 5) { ready <- 1 } diff --git a/R/isGlobalEmpty.R b/R/isGlobalEmpty.R index 1bd9995..c2f97b7 100644 --- a/R/isGlobalEmpty.R +++ b/R/isGlobalEmpty.R @@ -6,5 +6,5 @@ #' @importFrom stats sd #' @author Waldir Leoncio isGlobalEmpty <- function(g) { - return(sum(g) == 0 & sd(g) == 0) + return(sum(g) == 0 && is.na(sd(g))) } diff --git a/R/laskeMuutokset12345.R b/R/laskeMuutokset12345.R index 78d2276..14cc5bf 100644 --- a/R/laskeMuutokset12345.R +++ b/R/laskeMuutokset12345.R @@ -291,10 +291,10 @@ admix1_muutokset <- R6Class( #' @param omaFreqs own Freqs? #' @param logml log maximum likelihood laskeMuutokset4 = function(osuus, osuusTaulu, omaFreqs, logml) { - if (isGlobalEmpty(COUNTS)) { + if (isGlobalEmpty(globals$COUNTS)) { npops <- 1 } else { - npops <- ifelse(is.na(dim(COUNTS)[3]), 1, dim(COUNTS)[3]) + npops <- ifelse(is.na(dim(globals$COUNTS)[3]), 1, dim(globals$COUNTS)[3]) } notEmpty <- which(osuusTaulu > 0.005) muutokset <- zeros(npops) @@ -341,12 +341,11 @@ greedyMix_muutokset <- R6Class( #' @param data data #' @param adjprior adjprior #' @param priorTerm priorTerm - laskeMuutokset = function(ind, globalRows, data, adjprior, priorTerm) { - npops <- size(COUNTS, 3) - muutokset <- LOGDIFF[ind, ] + laskeMuutokset = function(ind, globalRows, data, adjprior, priorTerm, npops) { + muutokset <- globals$LOGDIFF[ind, ] - i1 <- PARTITION[ind] - i1_logml <- POP_LOGML[i1] + i1 <- globals$PARTITION[ind] + i1_logml <- globals$POP_LOGML[i1] muutokset[i1] <- 0 if (is.null(dim(globalRows))) { @@ -354,30 +353,31 @@ greedyMix_muutokset <- R6Class( } else { rows <- globalRows[ind, 1]:globalRows[ind, 2] } + diffInCounts <- computeDiffInCounts( - rows, size(COUNTS, 1), size(COUNTS, 2), data + rows, size(globals$COUNTS, 1), size(globals$COUNTS, 2), data ) diffInSumCounts <- colSums(diffInCounts) - COUNTS[, , i1] <- COUNTS[, , i1] - diffInCounts - SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] - diffInSumCounts + globals$COUNTS[, , i1] <- globals$COUNTS[, , i1] - diffInCounts + globals$SUMCOUNTS[i1, ] <- globals$SUMCOUNTS[i1, ] - diffInSumCounts new_i1_logml <- computePopulationLogml(i1, adjprior, priorTerm) - COUNTS[, , i1] <- COUNTS[, , i1] + diffInCounts - SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] + diffInSumCounts + globals$COUNTS[, , i1] <- globals$COUNTS[, , i1] + diffInCounts + globals$SUMCOUNTS[i1, ] <- globals$SUMCOUNTS[i1, ] + diffInSumCounts i2 <- matlab2r::find(muutokset == -Inf) # Etsit��n populaatiot jotka muuttuneet viime kerran j�lkeen. (Searching for populations that have changed since the last time) i2 <- setdiff(i2, i1) - i2_logml <- POP_LOGML[i2] + i2_logml <- globals$POP_LOGML[i2] ni2 <- length(i2) - COUNTS[, , i2] <- COUNTS[, , i2] + repmat(diffInCounts, c(1, 1, ni2)) - SUMCOUNTS[i2, ] <- SUMCOUNTS[i2, ] + repmat(diffInSumCounts, c(ni2, 1)) + globals$COUNTS[, , i2] <- globals$COUNTS[, , i2] + repmat(diffInCounts, c(1, 1, ni2)) + globals$SUMCOUNTS[i2, ] <- globals$SUMCOUNTS[i2, ] + repmat(diffInSumCounts, c(ni2, 1)) new_i2_logml <- computePopulationLogml(i2, adjprior, priorTerm) - COUNTS[, , i2] <- COUNTS[, , i2] - repmat(diffInCounts, c(1, 1, ni2)) - SUMCOUNTS[i2, ] <- SUMCOUNTS[i2, ] - repmat(diffInSumCounts, c(ni2, 1)) + globals$COUNTS[, , i2] <- globals$COUNTS[, , i2] - repmat(diffInCounts, c(1, 1, ni2)) + globals$SUMCOUNTS[i2, ] <- globals$SUMCOUNTS[i2, ] - repmat(diffInSumCounts, c(ni2, 1)) - muutokset[i2] <- new_i1_logml - i1_logml + new_i2_logml - i2_logml - LOGDIFF[ind, ] <- muutokset + muutokset[i2] <- new_i1_logml[, ] - i1_logml + new_i2_logml[, ] - i2_logml + globals$LOGDIFF[ind, ] <- muutokset return(list(muutokset = muutokset, diffInCounts = diffInCounts)) }, #' @param i1 i1 @@ -390,45 +390,45 @@ greedyMix_muutokset <- R6Class( # % muutos logml:ss�, mik�li korin i1 kaikki yksil�t siirret��n # % koriin i. - npops <- size(COUNTS, 3) + npops <- size(globals$COUNTS, 3) muutokset <- zeros(npops, 1) - i1_logml <- POP_LOGML[i1] + i1_logml <- globals$POP_LOGML[i1] - inds <- matlab2r::find(PARTITION == i1) + inds <- matlab2r::find(globals$PARTITION == i1) ninds <- length(inds) if (ninds == 0) { - diffInCounts <- zeros(size(COUNTS, 1), size(COUNTS, 2)) + diffInCounts <- zeros(size(globals$COUNTS, 1), size(globals$COUNTS, 2)) return() } rows <- list() for (i in 1:ninds) { - ind <- inds(i) - lisa <- globalRows(ind, 1):globalRows(ind, 2) + ind <- inds[i] + lisa <- globalRows[ind, 1]:globalRows[ind, 2] rows <- c(rows, t(lisa)) } diffInCounts <- computeDiffInCounts( - t(rows), size(COUNTS, 1), size(COUNTS, 2), data + t(rows), size(globals$COUNTS, 1), size(globals$COUNTS, 2), data ) diffInSumCounts <- sum(diffInCounts) - COUNTS[, , i1] <- COUNTS[, , i1] - diffInCounts - SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] - diffInSumCounts + globals$COUNTS[, , i1] <- globals$COUNTS[, , i1] - diffInCounts + globals$SUMCOUNTS[i1, ] <- globals$SUMCOUNTS[i1, ] - diffInSumCounts new_i1_logml <- computePopulationLogml(i1, adjprior, priorTerm) - COUNTS[, , i1] <- COUNTS[, , i1] + diffInCounts - SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] + diffInSumCounts + globals$COUNTS[, , i1] <- globals$COUNTS[, , i1] + diffInCounts + globals$SUMCOUNTS[i1, ] <- globals$SUMCOUNTS[i1, ] + diffInSumCounts i2 <- c(1:i1 - 1, i1 + 1:npops) - i2_logml <- POP_LOGML[i2] + i2_logml <- globals$POP_LOGML[i2] - COUNTS[, , i2] <- COUNTS[, , i2] + repmat(diffInCounts, c(1, 1, npops - 1)) - SUMCOUNTS[i2, ] <- SUMCOUNTS[i2, ] + repmat(diffInSumCounts, c(npops - 1, 1)) + globals$COUNTS[, , i2] <- globals$COUNTS[, , i2] + repmat(diffInCounts, c(1, 1, npops - 1)) + globals$SUMCOUNTS[i2, ] <- globals$SUMCOUNTS[i2, ] + repmat(diffInSumCounts, c(npops - 1, 1)) new_i2_logml <- computePopulationLogml(i2, adjprior, priorTerm) - COUNTS[, , i2] <- COUNTS[, , i2] - repmat(diffInCounts, c(1, 1, npops - 1)) - SUMCOUNTS[i2, ] <- SUMCOUNTS[i2, ] - repmat(diffInSumCounts, c(npops - 1, 1)) + globals$COUNTS[, , i2] <- globals$COUNTS[, , i2] - repmat(diffInCounts, c(1, 1, npops - 1)) + globals$SUMCOUNTS[i2, ] <- globals$SUMCOUNTS[i2, ] - repmat(diffInSumCounts, c(npops - 1, 1)) muutokset[i2] <- new_i1_logml - i1_logml + new_i2_logml - i2_logml return(list(muutokset = muutokset, diffInCounts = diffInCounts)) diff --git a/R/poistaLiianPienet.R b/R/poistaLiianPienet.R index a063cdc..b4f3c7d 100644 --- a/R/poistaLiianPienet.R +++ b/R/poistaLiianPienet.R @@ -9,7 +9,7 @@ poistaLiianPienet <- function(npops, rowsFromInd, alaraja) { popSize <- zeros(1, npops) if (npops > 0) { for (i in 1:npops) { - popSize[i] <- length(which(PARTITION == i)) + popSize[i] <- length(which(globals$PARTITION == i)) } } miniPops <- which(popSize < alaraja) @@ -20,19 +20,19 @@ poistaLiianPienet <- function(npops, rowsFromInd, alaraja) { outliers <- matrix(NA, 0, 0) for (pop in miniPops) { - inds <- which(PARTITION == pop) + inds <- which(globals$PARTITION == pop) cat("Removed individuals: ") cat(as.character(inds)) outliers <- matrix(c(outliers, inds), ncol = 1) } - ninds <- length(PARTITION) - PARTITION[outliers] <- 0 - korit <- unique(PARTITION(which(PARTITION > 0))) + ninds <- length(globals$PARTITION) + globals$PARTITION[outliers] <- 0 + korit <- unique(globals$PARTITION(which(globals$PARTITION > 0))) for (n in 1:length(korit)) { kori <- korit[n] - yksilot <- which(PARTITION == kori) - PARTITION[yksilot] == n + yksilot <- which(globals$PARTITION == kori) + globals$PARTITION[yksilot] == n } # TODO: add COUNTS, SUMCOUNTS and PARTITION to return or use <- diff --git a/R/simulateAllFreqs.R b/R/simulateAllFreqs.R index 6e518f2..e5d39dd 100644 --- a/R/simulateAllFreqs.R +++ b/R/simulateAllFreqs.R @@ -3,14 +3,14 @@ #' Add each allele to each locus in each population by j 1 / noalle(j). The Dirichlet distributions corresponding to the counts thus obtained simulate values for the allele frequencies of the populations. #' @param noalle noalle simulateAllFreqs <- function(noalle) { - if (isGlobalEmpty(COUNTS)) { + if (isGlobalEmpty(globals$COUNTS)) { max_noalle <- 0 nloci <- 0 npops <- 1 } else { - max_noalle <- size(COUNTS, 1) - nloci <- size(COUNTS, 2) - npops <- size(COUNTS, 3) + max_noalle <- size(globals$COUNTS, 1) + nloci <- size(globals$COUNTS, 2) + npops <- size(globals$COUNTS, 3) } prioriAlleelit <- zeros(max_noalle, nloci) @@ -21,9 +21,9 @@ simulateAllFreqs <- function(noalle) { } prioriAlleelit <- repmat(prioriAlleelit, matrix(c(1, 1, npops), 1)) counts <- ifelse( - test = isGlobalEmpty(COUNTS), + test = isGlobalEmpty(globals$COUNTS), yes = prioriAlleelit, - no = COUNTS + prioriAlleelit + no = globals$COUNTS + prioriAlleelit ) allfreqs <- zeros(size(counts)) diff --git a/R/suoritaMuutos.R b/R/suoritaMuutos.R index 09368ef..bc45c48 100644 --- a/R/suoritaMuutos.R +++ b/R/suoritaMuutos.R @@ -4,10 +4,10 @@ #' @param osuus percentage? #' @param indeksi index suoritaMuutos <- function(osuusTaulu, osuus, indeksi) { - if (isGlobalEmpty(COUNTS)) { + if (isGlobalEmpty(globals$COUNTS)) { npops <- 1 } else { - npops <- ifelse(is.na(dim(COUNTS)[3]), 1, dim(COUNTS)[3]) + npops <- ifelse(is.na(dim(globals$COUNTS)[3]), 1, dim(globals$COUNTS)[3]) } i1 <- indeksi %% npops diff --git a/R/updateGlobalVariables.R b/R/updateGlobalVariables.R index 3eada6d..9a4d844 100644 --- a/R/updateGlobalVariables.R +++ b/R/updateGlobalVariables.R @@ -1,21 +1,21 @@ updateGlobalVariables <- function(ind, i2, diffInCounts, adjprior, priorTerm) { # % Suorittaa globaalien muuttujien muutokset, kun yksil� ind # % on siirret��n koriin i2. - i1 <- PARTITION[ind] - PARTITION[ind] <- i2 + i1 <- globals$PARTITION[ind] + globals$PARTITION[ind] <- i2 - COUNTS[, , i1] <- COUNTS[, , i1] - diffInCounts - COUNTS[, , i2] <- COUNTS[, , i2] + diffInCounts - SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] - colSums(diffInCounts) - SUMCOUNTS[i2, ] <- SUMCOUNTS[i2, ] + colSums(diffInCounts) + globals$COUNTS[, , i1] <- globals$COUNTS[, , i1] - diffInCounts + globals$COUNTS[, , i2] <- globals$COUNTS[, , i2] + diffInCounts + globals$SUMCOUNTS[i1, ] <- globals$SUMCOUNTS[i1, ] - colSums(diffInCounts) + globals$SUMCOUNTS[i2, ] <- globals$SUMCOUNTS[i2, ] + colSums(diffInCounts) - POP_LOGML[c(i1, i2)] <- computePopulationLogml( + globals$POP_LOGML[c(i1, i2)] <- computePopulationLogml( c(i1, i2), adjprior, priorTerm ) - LOGDIFF[, c(i1, i2)] <- -Inf - inx <- c(matlab2r::find(PARTITION == i1), matlab2r::find(PARTITION == i2)) - LOGDIFF[inx, ] <- -Inf + globals$LOGDIFF[, c(i1, i2)] <- -Inf + inx <- c(matlab2r::find(globals$PARTITION == i1), matlab2r::find(globals$PARTITION == i2)) + globals$LOGDIFF[inx, ] <- -Inf } updateGlobalVariables2 <- function(i1, i2, diffInCounts, adjprior, priorTerm) { diff --git a/R/viewPartition.R b/R/viewPartition.R index 6dfb14f..8124edc 100644 --- a/R/viewPartition.R +++ b/R/viewPartition.R @@ -1,5 +1,5 @@ viewPartition <- function(osuudet, popnames) { - npops <- size(COUNTS, 3) + npops <- size(globals$COUNTS, 3) nind <- size(osuudet, 1) # TODO: translate if necessary. Remove if this function won't be used diff --git a/R/writeMixtureInfo.R b/R/writeMixtureInfo.R index 2a1144b..65bdbee 100644 --- a/R/writeMixtureInfo.R +++ b/R/writeMixtureInfo.R @@ -16,7 +16,7 @@ writeMixtureInfo <- function( partitionSummary, popnames, fixedK, verbose ) { ninds <- size(data, 1) / rowsFromInd - npops <- size(COUNTS, 3) + npops <- size(globals$COUNTS, 3) # Check that the names refer to individuals # Tarkistetaan ett?nimet viittaavat yksil�ihin @@ -64,13 +64,13 @@ writeMixtureInfo <- function( ) } - cluster_count <- length(unique(PARTITION)) + cluster_count <- length(unique(globals$PARTITION)) if (verbose) cat("Best Partition: ") if (fid != -1) { append(fid, c("Best Partition: ", "\n")) } for (m in 1:cluster_count) { - indsInM <- matlab2r::find(PARTITION == m) + indsInM <- matlab2r::find(globals$PARTITION == m) length_of_beginning <- 11 + floor(log10(m)) cluster_size <- length(indsInM) @@ -158,7 +158,7 @@ writeMixtureInfo <- function( } # %ninds = size(data,1)/rowsFromInd; - changesInLogml <- t(LOGDIFF) + changesInLogml <- t(globals$LOGDIFF) for (ind in 1:ninds) { muutokset <- changesInLogml[, ind] if (names) { @@ -183,9 +183,9 @@ writeMixtureInfo <- function( append(fid, " KL-divergence matrix in PHYLIP format: ") } - COUNTS <- COUNTS[seq_len(nrow(adjprior)), seq_len(ncol(adjprior)), , drop = FALSE] - maxnoalle <- size(COUNTS, 1) - nloci <- size(COUNTS, 2) + globals$COUNTS <- globals$COUNTS[seq_len(nrow(adjprior)), seq_len(ncol(adjprior)), , drop = FALSE] + maxnoalle <- size(globals$COUNTS, 1) + nloci <- size(globals$COUNTS, 2) d <- zeros(maxnoalle, nloci, npops) prior <- adjprior prior[matlab2r::find(prior == 1)] <- 0 @@ -195,7 +195,7 @@ writeMixtureInfo <- function( prior[1, nollia] <- 1 for (pop1 in 1:npops) { - squeezed_COUNTS_prior <- squeeze(COUNTS[, , pop1]) + prior + squeezed_COUNTS_prior <- squeeze(globals$COUNTS[, , pop1]) + prior d[, , pop1] <- squeezed_COUNTS_prior / sum(squeezed_COUNTS_prior) } ekarivi <- as.character(npops) diff --git a/R/writeMixtureInfoPop.R b/R/writeMixtureInfoPop.R index 41ffe8e..f0c293f 100644 --- a/R/writeMixtureInfoPop.R +++ b/R/writeMixtureInfoPop.R @@ -14,7 +14,7 @@ writeMixtureInfoPop <- function(logml, rows, data, adjprior, priorTerm, outPutFile, inputFile, partitionSummary, popnames, fixedK) { ninds <- size(rows, 1) - npops <- size(COUNTS, 3) + npops <- size(globals$COUNTS, 3) names <- size(popnames, 1) == ninds # Tarkistetaan ett?nimet viittaavat yksilöihin changesInLogml <- vector() if (!missing(outPutFile)) { @@ -32,13 +32,13 @@ writeMixtureInfoPop <- function(logml, rows, data, adjprior, priorTerm, append(fid, c("Number of clusters in optimal partition:", ownNum2Str(npops), "\n")) append(fid, c("Log(marginal likelihood) of optimal partition:", ownNum2Str(logml), "\n\n")) } - cluster_count <- length(unique(PARTITION)) + cluster_count <- length(unique(globals$PARTITION)) cat("Best Partition:\n") if (exists("fid")) { append(fid, c("Best partition:\n")) } for (m in 1:cluster_count) { - indsInM <- find(PARTITION == m) + indsInM <- find(globals$PARTITION == m) length_of_beginning <- 11 + floor(log10(m)) cluster_size <- length(indsInM) if (names) { @@ -101,7 +101,7 @@ writeMixtureInfoPop <- function(logml, rows, data, adjprior, priorTerm, if (exists("fid")) { append(fid, c(ekarivi, "\n")) } - changesInLogml <- t(LOGDIFF) + changesInLogml <- t(globals$LOGDIFF) for (ind in 1:ninds) { muutokset <- changesInLogml[, ind] if (names) { @@ -126,16 +126,16 @@ writeMixtureInfoPop <- function(logml, rows, data, adjprior, priorTerm, append(fid, " \n") append(fid, "KL - divergence matrix in PHYLIP format:\n") } - maxnoalle <- size(COUNTS, 1) - nloci <- size(COUNTS, 2) + maxnoalle <- size(globals$COUNTS, 1) + nloci <- size(globals$COUNTS, 2) d <- zeros(maxnoalle, nloci, npops) prior <- adjprior prior[find[prior == 1]] <- 0 nollia <- find(all(prior == 0)) # Lokukset, joissa oli havaittu vain yht?alleelia. prior[1, nollia] <- 1 for (pop1 in 1:npops) { - d[, , pop1] <- (squeeze(COUNTS[, , pop1]) + prior) / - repmat(sum(squeeze(COUNTS[, , pop1]) + prior), c(maxnoalle, 1)) + d[, , pop1] <- (squeeze(globals$COUNTS[, , pop1]) + prior) / + repmat(sum(squeeze(globals$COUNTS[, , pop1]) + prior), c(maxnoalle, 1)) } ekarivi <- as.character(npops) cat(ekarivi, "\n") diff --git a/inst/extdata/BAPS_format_clustering_diploid.txt b/inst/extdata/BAPS_clustering_diploid.txt similarity index 100% rename from inst/extdata/BAPS_format_clustering_diploid.txt rename to inst/extdata/BAPS_clustering_diploid.txt diff --git a/inst/extdata/BAPS_clustering_haploid.txt b/inst/extdata/BAPS_clustering_haploid.txt new file mode 100644 index 0000000..4623678 --- /dev/null +++ b/inst/extdata/BAPS_clustering_haploid.txt @@ -0,0 +1,5 @@ +88 102 56 80 100 90 118 -9 88 104 1 +80 102 54 82 102 92 116 90 86 104 2 +88 104 56 84 102 -9 120 90 88 100 3 +86 102 56 80 102 90 116 90 86 100 4 +88 102 -9 80 102 90 116 92 86 100 5 diff --git a/man/greedyMix.Rd b/man/greedyMix.Rd index f9c58a6..2d1a0e4 100644 --- a/man/greedyMix.Rd +++ b/man/greedyMix.Rd @@ -8,7 +8,7 @@ greedyMix( data, format = gsub("^.*\\\\.", "", data), partitionCompare = NULL, - npops = 1L, + npops = 3L, counts = NULL, sumcounts = NULL, max_iter = 100L, diff --git a/man/greedyMix_muutokset.Rd b/man/greedyMix_muutokset.Rd index b720e58..3868f55 100644 --- a/man/greedyMix_muutokset.Rd +++ b/man/greedyMix_muutokset.Rd @@ -28,7 +28,14 @@ logml:ss� siirrett�ess� yksil�it� toisiin populaatioihin. \if{latex}{\out{\hypertarget{method-greedyMix_muutokset-laskeMuutokset}{}}} \subsection{Method \code{laskeMuutokset()}}{ \subsection{Usage}{ -\if{html}{\out{
}}\preformatted{greedyMix_muutokset$laskeMuutokset(ind, globalRows, data, adjprior, priorTerm)}\if{html}{\out{
}} +\if{html}{\out{
}}\preformatted{greedyMix_muutokset$laskeMuutokset( + ind, + globalRows, + data, + adjprior, + priorTerm, + npops +)}\if{html}{\out{
}} } \subsection{Arguments}{ diff --git a/tests/testthat/test-greedyMix.R b/tests/testthat/test-greedyMix.R index 20380af..6a4f2f2 100644 --- a/tests/testthat/test-greedyMix.R +++ b/tests/testthat/test-greedyMix.R @@ -5,7 +5,7 @@ path_inst <- system.file("extdata", "", package = "rBAPS") # Reading datasets ------------------------------------------------------------- baps_diploid <- read.delim( - file = file.path(path_inst, "BAPS_format_clustering_diploid.txt"), + file = file.path(path_inst, "BAPS_clustering_diploid.txt"), sep = " ", header = FALSE ) @@ -46,6 +46,11 @@ raw_bam <- importFile( data = file.path(path_inst, "bam_example.bam"), format = "BAM", ) +# TODO: uncomment for testing #24 +# raw_baps <- importFile( +# data = file.path(path_inst, "FASTA_clustering_haploid.fasta"), +# format = "FASTA" +# ) test_that("Files are imported correctly", { expect_equal(dim(raw_fasta), c(5, 99)) diff --git a/tests/testthat/test-greedyPopMix.R b/tests/testthat/test-greedyPopMix.R index a3d1e0b..98db735 100644 --- a/tests/testthat/test-greedyPopMix.R +++ b/tests/testthat/test-greedyPopMix.R @@ -11,14 +11,33 @@ test_that("Auxiliary functions work properly", { expect_equal(findOutRowsFromInd(x, y, "Diploid"), z) # Testing getPopDistancesByKL ------------------------------------------------ + set.seed(5562143) + globals$COUNTS <- array(rpois(10 ^ 3, lambda = 10), dim = c(10, 10, 10)) x2 <- matrix(seq(4, 14, 2), 3) - expect_equal( - getPopDistancesByKL(x2), - list( - Z = matrix(c(c(1, 101:198), 2:100, rep(0, 99)), nrow = 99, ncol = 3), - distances = as.matrix(rep(0, 4950)) - ) + Z <- matrix( + c( + 5, 8, 0.004410383, 11, 10, 0.009386554, 2, 7, 0.010046641, + 3, 12, 0.011518562, 1, 4, 0.013682110, 14, 6, 0.027109229, + 15, 13, 0.032833011, 16, 9, 0.045394132, 17, 18, 0.086729590 + ), + nrow = 9, ncol = 3, byrow = TRUE ) + distances <- matrix( + c( + 0.015767099, 0.020110943, 0.013682110, 0.010214691, 0.050375802, + 0.019692440, 0.010294099, 0.053740306, 0.028410233, 0.034496635, + 0.025356695, 0.015267764, 0.045409885, 0.010046641, 0.012797950, + 0.065436956, 0.026308651, 0.019896390, 0.006193117, 0.011735103, + 0.036077638, 0.011518562, 0.029233839, 0.010798344, 0.013262098, + 0.034671122, 0.032833011, 0.024732472, 0.033976345, 0.040243817, + 0.020374790, 0.024770297, 0.004410383, 0.024329842, 0.009386554, + 0.052217370, 0.027109229, 0.038127778, 0.016888110, 0.024061302, + 0.086729590, 0.038787638, 0.045394132, 0.005206076, 0.042606915 + ), + nrow = 45, ncol = 1, + byrow = TRUE + ) + expect_equal(getPopDistancesByKL(x2), list(Z = Z, distances = distances)) # Testing handlePopData ------------------------------------------------------ x3 <- matrix(c(9, 4, 1, 8, 5, 2, 1, 2, 3), 3)