diff --git a/.Rbuildignore b/.Rbuildignore index 9238dee..548f11c 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -4,4 +4,4 @@ matlab CHANGELOG.md CITATION.cff .travis.yml -data/ExamplesDataFormatting \ No newline at end of file +inst/ext/ExamplesDataFormatting \ No newline at end of file diff --git a/DESCRIPTION b/DESCRIPTION index e7e0081..aa9e85a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: rBAPS Title: Bayesian Analysis of Population Structure -Version: 0.0.0.9000 -Date: 2020-01-14 +Version: 0.0.0.9001 +Date: 2020-11-09 Authors@R: c( person( @@ -30,14 +30,14 @@ Description: Partial R implementation of the BAPS software Corander et al. 2008b ; Tang et al. 2009 ; Cheng et al. 2011 , - available at ), provides a - computationally-efficient method for the identification of admixture events + available at ), provides a + computationally-efficient method for the identification of admixture events in genetic population history. License: GPL-3 Encoding: UTF-8 LazyData: true RoxygenNote: 7.1.1 -Suggests: +Suggests: testthat (>= 2.1.0) Imports: methods diff --git a/NAMESPACE b/NAMESPACE index 2dd1c5e..42b0063 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -23,7 +23,6 @@ export(linkage) export(logml2String) export(lueGenePopData) export(lueNimi) -export(min_MATLAB) export(noIndex) export(ownNum2Str) export(poistaLiianPienet) diff --git a/R/addAlleles.R b/R/addAlleles.R index 33d0f38..6dd1c08 100644 --- a/R/addAlleles.R +++ b/R/addAlleles.R @@ -20,8 +20,8 @@ addAlleles <- function(data, ind, line, divider) { k <- 1 merkki <- substring(line, k, k) while (merkki != ',') { - k <- k + 1 - merkki <- substring(line, k, k) + k <- k + 1 + merkki <- substring(line, k, k) } line <- substring(line, k + 1) # clear k; clear merkki; diff --git a/R/addToSummary.R b/R/addToSummary.R new file mode 100644 index 0000000..4affbe9 --- /dev/null +++ b/R/addToSummary.R @@ -0,0 +1,18 @@ +addToSummary <- function(logml, partitionSummary, worstIndex) { + # Tiedet��n, ett� annettu logml on isompi kuin huonoin arvo + # partitionSummary taulukossa. Jos partitionSummary:ss� ei viel� ole + # annettua logml arvoa, niin lis�t��n worstIndex:in kohtaan uusi logml ja + # nykyist� partitiota vastaava nclusters:in arvo. Muutoin ei tehd� mit��n. + + apu <- find(abs(partitionSummary[, 2] - logml) < 1e-5) + if (isempty(apu)) { + # Nyt l�ydetty partitio ei ole viel� kirjattuna summaryyn. + npops <- length(unique(PARTITION)) + partitionSummary[worstIndex, 1] <- npops + partitionSummary[worstIndex, 2] <- logml + added <- 1 + } else { + added <- 0 + } + return(list(partitionSummary = partitionSummary, added = added)) +} \ No newline at end of file diff --git a/R/admixture_initialization.R b/R/admixture_initialization.R new file mode 100644 index 0000000..4d9f6c8 --- /dev/null +++ b/R/admixture_initialization.R @@ -0,0 +1,20 @@ +#' @title Seuraavat kolme funktiota liittyvat alkupartition muodostamiseen. +#' @param data_matrix data_matrix +#' @param nclusters ncluster +#' @param Z Z + +admixture_initialization <- function (data_matrix, nclusters, Z) { + size_data <- size(data_matrix) + nloci <- size_data[2] - 1 + n <- max(data_matrix[, end]) + T <- cluster_own(Z, nclusters) + initial_partition <- zeros(size_data[1], 1) + for (i in 1:n) { + kori <- T[i] + here <- find(data_matrix[,end] == i) + for (j in 1:length(here)) { + initial_partition[here[j], 1] <- kori + } + } + return(initial_partition) +} \ No newline at end of file diff --git a/R/clearGlobalVars.R b/R/clearGlobalVars.R index 1d77d9c..abc59fb 100644 --- a/R/clearGlobalVars.R +++ b/R/clearGlobalVars.R @@ -1,7 +1,8 @@ clearGlobalVars <- function() { - COUNTS <- SUMCOUNTS <- PARTITION <- POP_LOGML <- vector() # placeholders + # COUNTS <- SUMCOUNTS <- PARTITION <- POP_LOGML <- vector() # placeholders COUNTS <<- vector() SUMCOUNTS <<- vector() PARTITION <<- vector() POP_LOGML <<- vector() + LOGDIFF <<- vector() } \ No newline at end of file diff --git a/R/cluster_own.R b/R/cluster_own.R new file mode 100644 index 0000000..b94b7b8 --- /dev/null +++ b/R/cluster_own.R @@ -0,0 +1,51 @@ +cluster_own <- function(Z, nclust) { + true <- logical(1) + false <- logical(0) + maxclust <- nclust + # % Start of algorithm + m <- size(Z, 1) + 1 + T <- zeros(m, 1) + # % maximum number of clusters based on inconsistency + if (m <= maxclust) { + T = t((1:m)) + } else if (maxclust == 1) { + T <- ones(m, 1) + } else { + clsnum <- 1 + for (k in (m - maxclust + 1):(m - 1)) { + i = Z(k, 1) # left tree + if (i <= m) { # original node, no leafs + T(i) = clsnum + clsnum = clsnum + 1 + } else if (i < (2 * m - maxclust + 1)) { # created before cutoff, search down the tree + T <- clusternum(Z, T, i - m, clsnum) + clsnum <- clsnum + 1 + } + i <- Z(k, 2) # right tree + if (i <= m) { # original node, no leafs + T[i] <- clsnum + clsnum <- clsnum + 1 + } else if (i < (2 * m - maxclust + 1)) { # created before cutoff, search down the tree + T <- clusternum(Z, T, i - m, clsnum) + clsnum <- clsnum + 1 + } + } + } + return(T) +} + +clusternum <- function(X, T, k, c) { + m <- size(X, 1) + 1 + while (!isempty(k)) { + # Get the children of nodes at this level + children <- X[k, 1:2] + children <- children + + # Assign this node number to leaf children + t <- (children <= m) + T[children[t]] <- c + # Move to next level + k <- children(!t) - m + } + return(T) +} \ No newline at end of file diff --git a/R/computeDiffInCounts.R b/R/computeDiffInCounts.R new file mode 100644 index 0000000..de19e30 --- /dev/null +++ b/R/computeDiffInCounts.R @@ -0,0 +1,17 @@ +computeDiffInCounts <- function(rows, max_noalle, nloci, data) { + # % Muodostaa max_noalle*nloci taulukon, jossa on niiden alleelien + # % lukum��r�t (vastaavasti kuin COUNTS:issa), jotka ovat data:n + # % riveill� rows. rows pit�� olla vaakavektori. + + diffInCounts <- zeros(max_noalle, nloci) + for (i in rows) { + row <- data[i, ] + notEmpty <- find(row>=0) + + if (length(notEmpty) > 0) { + diffInCounts(row(notEmpty) + (notEmpty - 1) * max_noalle) <- + diffInCounts(row(notEmpty) + (notEmpty - 1) * max_noalle) + 1 + } + } + return(diffInCounts) +} diff --git a/R/computePopulationLogml.R b/R/computePopulationLogml.R new file mode 100644 index 0000000..bf9782d --- /dev/null +++ b/R/computePopulationLogml.R @@ -0,0 +1,24 @@ +computePopulationLogml <- function(pops, adjprior, priorTerm) { + # Palauttaa length(pops)*1 taulukon, jossa on laskettu korikohtaiset + + x <- size(COUNTS, 1) + y <- size(COUNTS, 2) + z <- length(pops) + + popLogml <- squeeze( + sum( + sum( + reshape( + lgamma( + repmat(adjprior, c(1, 1, length(pops))) + + COUNTS[, , pops] + ), + c(x, y, z) + ), + 1 + ), + 2 + ) + ) - sum(lgamma(1 + SUMCOUNTS[pops, ]), 2) - priorTerm + return(popLogml) +} \ No newline at end of file diff --git a/R/findEmptyPop.R b/R/findEmptyPop.R new file mode 100644 index 0000000..e62b5e3 --- /dev/null +++ b/R/findEmptyPop.R @@ -0,0 +1,12 @@ +findEmptyPop <- function(npops) { + # % Palauttaa ensimm�isen tyhj�n populaation indeksin. Jos tyhji� + # % populaatioita ei ole, palauttaa -1:n. + pops <- t(unique(PARTITION)) + if (length(pops) == npops) { + emptyPop <- -1 + } else { + popDiff <- diff(c(0, pops, npops + 1)) + emptyPop <- min(find(popDiff > 1)) + } + return(list(emptyPop = emptyPop, pops = pops)) +} diff --git a/R/greedyMix.R b/R/greedyMix.R index ef91b17..7b0004e 100644 --- a/R/greedyMix.R +++ b/R/greedyMix.R @@ -11,10 +11,11 @@ greedyMix <- function( savePreProcessed = NULL, filePreProcessed = NULL ) { - # ASK: graphical components. Remove? + # ASK: Unclear when fixedk == TRUE. Remove? # check whether fixed k mode is selected # h0 <- findobj('Tag','fixk_menu') # fixedK = get(h0, 'userdata'); + fixedK <- FALSE # if fixedK # if ~(fixKWarning == 1) % call function fixKWarning @@ -22,9 +23,11 @@ greedyMix <- function( # end # end + # ASK: ditto # % check whether partition compare mode is selected # h1 = findobj('Tag','partitioncompare_menu'); # partitionCompare = get(h1, 'userdata'); + partitionCompare <- FALSE if (is(tietue, "list") | is(tietue, "character")) { # ---------------------------------------------------------------------- @@ -244,13 +247,15 @@ greedyMix <- function( } # ========================================================================== - # Declaring global variables + # Declaring global variables and changing environment of children functions # ========================================================================== PARTITION <- vector() COUNTS <- vector() SUMCOUNTS <- vector() POP_LOGML <- vector() - clearGlobalVars <- vector() + clearGlobalVars() + + environment(writeMixtureInfo) <- environment() # ========================================================================== c <- list() c$data <- data @@ -265,6 +270,7 @@ greedyMix <- function( ekat <- t(seq(1, ninds, rowsFromInd) * rowsFromInd) c$rows <- c(ekat, ekat + rowsFromInd - 1) + # ASK remove? # partition compare # if (!is.null(partitionCompare)) { # nsamplingunits <- size(c$rows, 1) @@ -291,17 +297,22 @@ greedyMix <- function( # } # # return the logml result # partitionCompare$logmls <- partitionLogml - # # set(h1, 'userdata', partitionCompare) # ASK remove? + # # set(h1, 'userdata', partitionCompare) # return() # } - # ASK remove (graphical part)? - # if (fixedK) { - # #logml_npops_partitionSummary <- indMix_fixK(c) # ASK translate? - # } else { - # #logml_npops_partitionSummary <- indMix(c) # ASK translate? - # } - # if (logml_npops_partitionSummary$logml == 1) return() + if (fixedK) { + # logml_npops_partitionSummary <- indMix_fixK(c) # TODO: translate + # logml <- logml_npops_partitionSummary$logml + # npops <- logml_npops_partitionSummary$npops + # partitionSummary <- logml_npops_partitionSummary$partitionSummary + } else { + logml_npops_partitionSummary <- indMix(c) # TODO: translate + logml <- logml_npops_partitionSummary$logml + npops <- logml_npops_partitionSummary$npops + partitionSummary <- logml_npops_partitionSummary$partitionSummary + } + if (logml_npops_partitionSummary$logml == 1) return() data <- data[, seq_len(ncol(data) - 1)] @@ -310,8 +321,9 @@ greedyMix <- function( # inp = get(h0,'String'); # h0 = findobj('Tag','filename2_text') # outp = get(h0,'String'); + inp <- vector() + outp <- vector() - browser() # TEMP changesInLogml <- writeMixtureInfo( logml, rowsFromInd, data, adjprior, priorTerm, outp, inp, popnames, fixedK diff --git a/R/indMix.R b/R/indMix.R new file mode 100644 index 0000000..6b11bf9 --- /dev/null +++ b/R/indMix.R @@ -0,0 +1,567 @@ +indMix <- function(c, npops, dispText) { + # Greedy search algorithm with unknown number of classes for regular + # clustering. + # Input npops is not used if called by greedyMix or greedyPopMix. + + logml <- 1 + clearGlobalVars() + + noalle <- c$noalle + rows <- c$rows + data <- c$data + + adjprior <- c$adjprior + priorTerm <- c$priorTerm + rowsFromInd <- c$rowsFromInd + + if (isfield(c, 'dist')) { + dist <- c$dist + Z <- c$Z + } + + rm(c) + nargin <- length(as.list(match.call())) - 1 + + if (nargin < 2) { + dispText <- 1 + npopstext <- matrix() + ready <- FALSE + teksti <- 'Input upper bound to the number of populations (possibly multiple values)' + while (!ready) { + npopstextExtra <- inputdlg( + teksti, + 1, + '20' + ) + if (isempty(npopstextExtra)) { # Painettu Cancel:ia + return() + } + npopstextExtra <- npopstextExtra[1] + if (length(npopstextExtra)>=255) { + npopstextExtra <- npopstextExtra[1:255] + npopstext <- c(npopstext, ' ', npopstextExtra) + teksti <- 'The input field length limit (255 characters) was reached. Input more values: ' + } else { + npopstext <- c(npopstext, ' ', npopstextExtra) + ready <- TRUE + } + } + rm(ready, teksti) + if (isempty(npopstext) | length(npopstext) == 1) { + return() + } else { + npopsTaulu <- as.numeric(npopstext) + ykkoset <- find(npopsTaulu == 1) + npopsTaulu(ykkoset) <- list() # Mik�li ykk�si� annettu yl�rajaksi, ne poistetaan. + if (isempty(npopsTaulu)) { + logml <- 1 + partitionSummary <- 1 + npops <- 1 + return() + } + rm(ykkoset) + } + } else { + npopsTaulu <- npops + } + + nruns <- length(npopsTaulu) + + initData <- data + data <- data[,1:(end - 1)] + + logmlBest <- -1e50 + partitionSummary <- -1e50 * ones(30, 2) # Tiedot 30 parhaasta partitiosta (npops ja logml) + partitionSummary[,1] <- zeros(30, 1) + worstLogml <- -1e50 + worstIndex <- 1 + for (run in 1:nruns) { + npops <- npopsTaulu(run) + if (dispText) { + dispLine() + print( + paste0( + 'Run ', num2str(run), '/', num2str(nruns), + ', maximum number of populations ', num2str(npops), '.' + ) + ) + } + ninds <- size(rows, 1) + + initialPartition <- admixture_initialization(initData, npops, Z) # TODO: translate + sumcounts_counts_logml = initialCounts( + initialPartition, data, npops, rows, noalle, adjprior + ) # TODO: translate + sumcounts <- sumcounts_counts_logml$sumcounts + counts <- sumcounts_counts_logml$counts + logml <- sumcounts_counts_logml$logml + + PARTITION <- zeros(ninds, 1) + for (i in 1:ninds) { + apu <- rows[i] + PARTITION[i] <- initialPartition(apu[1]) + } + + COUNTS <- counts + SUMCOUNTS <- sumcounts + POP_LOGML <- computePopulationLogml(1:npops, adjprior, priorTerm) # TODO: translate + LOGDIFF <- repmat(-Inf, c(ninds, npops)) + rm(initialPartition, counts, sumcounts) + + # PARHAAN MIXTURE-PARTITION ETSIMINEN + nRoundTypes <- 7 + kokeiltu <- zeros(nRoundTypes, 1) + roundTypes <- c(1, 1) # Ykk�svaiheen sykli kahteen kertaan. + ready <- 0 + vaihe <- 1 + + if (dispText) { + print(' ') + print( + paste0( + 'Mixture analysis started with initial', + num2str(npops), + 'populations.' + ) + ) + } + + while (ready != 1) { + muutoksia <- 0 + + if (dispText) { + print(paste('Performing steps:', num2str(roundTypes))) + } + + for (n in 1:length(roundTypes)) { + + round <- roundTypes[n] + kivaluku <- 0 + + if (kokeiltu(round) == 1) { #Askelta kokeiltu viime muutoksen j�lkeen + + } else if (round == 0 | round == 1) { #Yksil�n siirt�minen toiseen populaatioon. + inds <- 1:ninds + aputaulu <- c(t(inds), rand(ninds, 1)) + aputaulu <- sortrows(aputaulu, 2) + inds <- t(aputaulu[, 1]) + muutosNyt <- 0 + + for (ind in inds) { + i1 <- PARTITION[ind] + muutokset_diffInCounts = laskeMuutokset( + ind, rows, data, adjprior, priorTerm + ) + muutokset <- muutokset_diffInCounts$muutokset + diffInCounts <- muutokset_diffInCounts$diffInCounts + + if (round == 1) { + maxMuutos <- max_MATLAB(muutokset)[[1]] + i2 <- max_MATLAB(muutokset)[[2]] + } + + if (i1 != i2 & maxMuutos > 1e-5) { + # Tapahtui muutos + muutoksia <- 1 + if (muutosNyt == 0) { + muutosNyt <- 1 + if (dispText) { + print('Action 1') + } + } + kokeiltu <- zeros(nRoundTypes, 1) + kivaluku <- kivaluku + 1 + updateGlobalVariables( + ind, i2, diffInCounts, adjprior, priorTerm + ) + logml <- logml+maxMuutos + if (logml > worstLogml) { + partitionSummary_added = addToSummary( + logml, partitionSummary, worstIndex + ) + partitionSummary_added <- partitionSummary_added$partitionSummary + added <- partitionSummary_added$added + if (added == 1) { + worstLogml <- min_MATLAB(partitionSummary[, 2])[[1]] + worstIndex <- min_MATLAB(partitionSummary[, 2])[[2]] + } + } + } + } + + if (muutosNyt == 0) { + kokeiltu[round] <- 1 + } + + } else if (round == 2) { # Populaation yhdist�minen toiseen. + maxMuutos <- 0 + for (pop in 1:npops) { + muutokset_diffInCounts <- laskeMuutokset2( + pop, rows, data, adjprior, priorTerm + ) + muutokset <- muutokset_diffInCounts$muutokset + diffInCounts <- muutokset_diffInCounts$diffInCounts + isoin <- max_MATLAB(muutokset)[[1]] + indeksi <- max_MATLAB(muutokset)[[2]] + if (isoin > maxMuutos) { + maxMuutos <- isoin + i1 <- pop + i2 <- indeksi + diffInCountsBest <- diffInCounts + } + } + + if (maxMuutos > 1e-5) { + muutoksia <- 1 + kokeiltu <- zeros(nRoundTypes, 1) + updateGlobalVariables2( + i1, i2, diffInCountsBest, adjprior, priorTerm + ) + logml <- logml + maxMuutos + if (dispText) { + print('Action 2') + } + if (logml > worstLogml) { + partitionSummary_added <- addToSummary( + logml, partitionSummary, worstIndex + ) + partitionSummary <- partitionSummary_added$partitionSummary + added <- partitionSummary_added$added + if (added==1) { + worstLogml <- min_MATLAB(partitionSummary[, 2])[[1]] + worstIndex <- min_MATLAB(partitionSummary[, 2])[[2]] + } + } + } else { + kokeiltu[round] <- 1 + } + + + } else if (round == 3 || round == 4) { #Populaation jakaminen osiin. + maxMuutos <- 0 + ninds <- size(rows, 1) + for (pop in 1:npops) { + inds2 <- find(PARTITION == pop) + ninds2 <- length(inds2) + if (ninds2 > 2) { + dist2 <- laskeOsaDist(inds2, dist, ninds) + Z2 <- linkage(t(dist2)) + if (round == 3) { + npops2 <- max(min(20, floor(ninds2 / 5)), 2) + } else if (round == 4) { + npops2 <- 2 # Moneenko osaan jaetaan + } + T2 <- cluster_own(Z2, npops2) + muutokset <- laskeMuutokset3( + T2, inds2, rows, data, adjprior, priorTerm, pop + ) + isoin <- max_MATLAB(muutokset)[[1]] + indeksi <- max_MATLAB(muutokset)[[2]] + if (isoin > maxMuutos) { + maxMuutos <- isoin + muuttuvaPop2 <- indeksi %% npops2 + if (muuttuvaPop2==0) muuttuvaPop2 <- npops2 + muuttuvat <- inds2[find(T2 == muuttuvaPop2)] + i2 <- ceiling(indeksi / npops2) + } + } + } + if (maxMuutos > 1e-5) { + muutoksia <- 1 + kokeiltu <- zeros(nRoundTypes, 1) + rivit <- list() + for (i in 1:length(muuttuvat)) { + ind <- muuttuvat[i] + lisa <- rows[ind, 1]:rows[ind, 2] + rivit <- rbind(rivit, t(lisa)) + } + diffInCounts <- computeDiffInCounts( + t(rivit), size(COUNTS, 1), size(COUNTS, 2), data + ) + i1 <- PARTITION(muuttuvat[1]) + updateGlobalVariables3( + muuttuvat, diffInCounts, adjprior, priorTerm, i2 + ) + logml <- logml + maxMuutos + if (dispText) { + if (round == 3) { + print('Action 3') + } else { + print('Action 4') + } + } + if (logml > worstLogml) { + partitionSummary_added <- addToSummary( + logml, partitionSummary, worstIndex + ) + partitionSummary <- partitionSummary_added$partitionSummary + added <- partitionSummary_added$added + if (added==1) { + worstLogml <- min_MATLAB(partitionSummary[, 2])[[1]] + worstIndex <- min_MATLAB(partitionSummary[, 2])[[2]] + } + } + + } else { + kokeiltu[round] <- 1 + } + } else if (round == 5 || round == 6) { + j <- 0 + muutettu <- 0 + poplogml <- POP_LOGML + partition <- PARTITION + counts <- COUNTS + sumcounts <- SUMCOUNTS + logdiff <- LOGDIFF + + pops <- sample(npops) + while (j < npops & muutettu == 0) { + j <- j + 1 + pop <- pops[j] + totalMuutos <- 0 + inds <- find(PARTITION == pop) + if (round == 5) { + aputaulu <- c(inds, rand(length(inds), 1)) + aputaulu <- sortrows(aputaulu, 2) + inds <- t(aputaulu[, 1]) + } else if (round == 6) { + inds <- returnInOrder( + inds, pop, rows, data, adjprior, priorTerm + ) + } + + i <- 0 + + while (length(inds) > 0 & i < length(inds)) { + i <- i + 1 + ind <- inds[i] + + muutokset_diffInCounts <- laskeMuutokset( + ind, rows, data, adjprior, priorTerm + ) + muutokset <- muutokset_diffInCounts$muutokset + diffInCounts <- muutokset_diffInCounts$diffInCounts + + muutokset[pop] <- -1e50 # Varmasti ei suurin!!! + maxMuutos <- max_MATLAB(muutokset)[[1]] + i2 <- max_MATLAB(muutokset)[[2]] + updateGlobalVariables( + ind, i2, diffInCounts, adjprior, priorTerm + ) + + totalMuutos <- totalMuutos+maxMuutos + logml <- logml + maxMuutos + if (round == 6) { + # Lopetetaan heti kun muutos on positiivinen. + if (totalMuutos > 1e-5) { + i <- length(inds) + } + } + } + + if (totalMuutos > 1e-5) { + kokeiltu <- zeros(nRoundTypes, 1) + muutettu <- 1 + if (muutoksia == 0) { + muutoksia <- 1 # Ulompi kirjanpito. + if (dispText) { + if (round == 5) { + print('Action 5') + } else { + print('Action 6') + } + } + } + if (logml > worstLogml) { + partitionSummary_added <- addToSummary( + logml, partitionSummary, worstIndex + ) + partitionSummary <- partitionSummary_added$partitionSummary + added <- partitionSummary_added$added + if (added==1) { + worstLogml <- min_MATLAB(partitionSummary[, 2])[[1]] + worstIndex <- min_MATLAB(partitionSummary[, 2])[[2]] + } + } + } else { + # Miss��n vaiheessa tila ei parantunut. + # Perutaan kaikki muutokset. + PARTITION <- partition + SUMCOUNTS <- sumcounts + POP_LOGML <- poplogml + COUNTS <- counts + logml <- logml - totalMuutos + LOGDIFF <- logdiff + kokeiltu[round] <- 1 + } + } + rm(partition, sumcounts, counts, poplogml) + + } else if (round == 7) { + emptyPop <- findEmptyPop(npops) + j <- 0 + pops <- sample(npops) + muutoksiaNyt <- 0 + if (emptyPop == -1) { + j <- npops + } + while (j < npops) { + j <- j + 1 + pop <- pops[j] + inds2 <- find(PARTITION == pop) + ninds2 <- length(inds2) + if (ninds2 > 5) { + partition <- PARTITION + sumcounts <- SUMCOUNTS + counts <- COUNTS + poplogml <- POP_LOGML + logdiff <- LOGDIFF + + dist2 <- laskeOsaDist(inds2, dist, ninds); + Z2 <- linkage(t(dist2)) + T2 <- cluster_own(Z2, 2) + muuttuvat <- inds2[find(T2 == 1)] + + muutokset <- laskeMuutokset3( + T2, inds2, rows, data, adjprior, priorTerm, pop + ) + totalMuutos <- muutokset(1, emptyPop) + + rivit <- list() + for (i in 1:length(muuttuvat)) { + ind <- muuttuvat[i] + lisa <- rows[ind, 1]:rows[ind, 2] + rivit <- c(rivit, lisa) + } + diffInCounts <- computeDiffInCounts( + rivit, size(COUNTS, 1), size(COUNTS, 2), data + ) + + updateGlobalVariables3( + muuttuvat, diffInCounts, adjprior, priorTerm, + emptyPop + ) + + muutettu <- 1 + while (muutettu == 1) { + muutettu <- 0 + # Siirret��n yksil�it� populaatioiden v�lill� + muutokset <- laskeMuutokset5( + inds2, rows, data, adjprior, priorTerm, + pop, emptyPop + ) + + maxMuutos <- indeksi <- max_MATLAB(muutokset) + + muuttuva <- inds2(indeksi) + if (PARTITION(muuttuva) == pop) { + i2 <- emptyPop + } else { + i2 <- pop + } + + if (maxMuutos > 1e-5) { + rivit <- rows[muuttuva, 1]:rows[muuttuva, 2] + diffInCounts <- computeDiffInCounts( + rivit, size(COUNTS, 1), size(COUNTS, 2), + data + ) + updateGlobalVariables3( + muuttuva,diffInCounts, adjprior, + priorTerm, i2 + ) + muutettu <- 1 + totalMuutos <- totalMuutos + maxMuutos + } + } + if (totalMuutos > 1e-5) { + muutoksia <- 1 + logml <- logml + totalMuutos + if (logml > worstLogml) { + partitionSummary_added = addToSummary( + logml, partitionSummary, worstIndex + ) + partitionSummary_added <- partitionSummary_added$partitionSummary + added <- partitionSummary_added$added + if (added == 1) { + worstLogml <- min_MATLAB(partitionSummary[, 2])[[1]] + worstIndex <- min_MATLAB(partitionSummary[, 2])[[2]] + } + } + if (muutoksiaNyt == 0) { + if (dispText) { + print('Action 7') + } + muutoksiaNyt <- 1 + } + kokeiltu <- zeros(nRoundTypes, 1) + j <- npops + } else { + # palutetaan vanhat arvot + PARTITION <- partition + SUMCOUNTS <- sumcounts + COUNTS <- counts + POP_LOGML <- poplogml + LOGDIFF <- logdiff + } + } + + } + + if (muutoksiaNyt == 0) { + kokeiltu[round] <- 1 + } + + } + } + + if (muutoksia == 0) { + if (vaihe <= 4) { + vaihe <= vaihe + 1 + } else if (vaihe == 5) { + ready <- 1 + } + } else { + muutoksia <- 0 + } + + if (ready == 0) { + if (vaihe == 1) { + roundTypes <- c(1) + } else if (vaihe == 2) { + roundTypes <- c(2, 1) + } else if (vaihe == 3) { + roundTypes <- c(5, 5, 7) + } else if (vaihe == 4) { + roundTypes = c(4, 3, 1) + } else if (vaihe == 5) { + roundTypes <- c(6, 7, 2, 3, 4, 1) + } + } + } + + # TALLENNETAAN + + npops <- poistaTyhjatPopulaatiot(npops) + POP_LOGML <- computePopulationLogml(1:npops, adjprior, priorTerm) + if (dispText) { + print(paste('Found partition with', num2str(npops), 'populations.')) + print(paste('Log(ml) =', as.character(logml))) + print(' ') + } + + if (logml > logmlBest) { + # P�ivitet��n parasta l�ydetty� partitiota. + logmlBest <- logml + npopsBest <- npops + partitionBest <- PARTITION + countsBest <- COUNTS + sumCountsBest <- SUMCOUNTS + pop_logmlBest <- POP_LOGML + logdiffbest <- LOGDIFF + } + } + return( + list(logml = logml, npops = npops, partitionSummary = partitionSummary) + ) +} diff --git a/R/initialPopCounts.R b/R/initialPopCounts.R new file mode 100644 index 0000000..2cb4806 --- /dev/null +++ b/R/initialPopCounts.R @@ -0,0 +1,20 @@ +initialPopCounts <- function(data, npops, rows, noalle, adjprior) { + nloci <- size(data, 2) + counts <- zeros(max(noalle), nloci, npops) + sumcounts <- zeros(npops, nloci) + + for (i in 1:npops) { + for (j in 1:nloci) { + i_rivit <- rows(i, 1):rows(i, 2) + havainnotLokuksessa <- find(data[i_rivit, j] >= 0) + sumcounts(i, j) <- length(havainnotLokuksessa) + for (k in 1:noalle[j]) { + alleleCode <- k + N_ijk <- length(find(data[i_rivit, j] == alleleCode)) + counts(k, j, i) <- N_ijk + } + } + } + logml <- laskeLoggis(counts, sumcounts, adjprior) + return(sumcounts = sumcounts, counts = counts, logml = logml) +} diff --git a/R/laskeMuutokset12345.R b/R/laskeMuutokset12345.R new file mode 100644 index 0000000..bfb4d59 --- /dev/null +++ b/R/laskeMuutokset12345.R @@ -0,0 +1,226 @@ +#' @title Calculate changes (?) +#' @description Palauttaa npops*npops taulun, jonka alkio (i,j) kertoo, mik?on +#' muutos logml:ss? mikäli populaatiosta i siirretään osuuden verran +#' todennäköisyysmassaa populaatioon j. Mikäli populaatiossa i ei ole mitään +#' siirrettävää, on vastaavassa kohdassa rivi nollia. +#' @param osuus Percentages? +#' @param omaFreqs own Freqs? +#' @param osuusTaulu Percentage table? +#' @param logml log maximum likelihood +#' @param COUNTS COUNTS +#' @export +laskeMuutokset4 <- function ( + osuus, osuusTaulu, omaFreqs, logml, COUNTS = matrix(0) +) { + npops <- ifelse(is.na(dim(COUNTS)[3]), 1, dim(COUNTS)[3]) + notEmpty <- which(osuusTaulu > 0.005) + muutokset <- zeros(npops) + empties <- !notEmpty + + for (i1 in notEmpty) { + osuusTaulu[i1] <- osuusTaulu[i1] - osuus + for (i2 in c(colon(1, i1 - 1), colon(i1 + 1, npops))) { + osuusTaulu[i2] <- osuusTaulu[i2] + osuus + loggis <- computeIndLogml(omaFreqs, osuusTaulu) + + # Work around Matlab OOB bug + if (i1 > nrow(muutokset)) { + muutokset <- rbind(muutokset, muutokset * 0) + } + if (i2 > ncol(muutokset)) { + muutokset <- cbind(muutokset, muutokset * 0) + } + + muutokset[i1, i2] <- loggis - logml + osuusTaulu[i2] <- osuusTaulu[i2] - osuus + } + osuusTaulu[i1] <- osuusTaulu[i1] + osuus + } + return (muutokset) +} + +# Palauttaa npops*1 taulun, jossa i:s alkio kertoo, mik� olisi +# muutos logml:ss�, mik�li yksil� ind siirret��n koriin i. +# diffInCounts on poistettava COUNTS:in siivusta i1 ja lis�tt�v� +# COUNTS:in siivuun i2, mik�li muutos toteutetaan. +# +# Lis�ys 25.9.2007: +# Otettu k�ytt��n globaali muuttuja LOGDIFF, johon on tallennettu muutokset +# logml:ss� siirrett�ess� yksil�it� toisiin populaatioihin. +laskeMuutokset <- function(ind, globalRows, data, adjprior, priorTerm) { + npops <- size(COUNTS, 3) + muutokset <- LOGDIFF[ind, ] + + i1 <- PARTITION[ind] + i1_logml <- POP_LOGML[i1] + muutokset[i1] <- 0 + + rows <- globalRows[ind, 1]:globalRows[ind, 2] + diffInCounts <- computeDiffInCounts( + rows, size(COUNTS, 1), size(COUNTS, 2), data + ) + diffInSumCounts <- sum(diffInCounts) + + COUNTS[, , i1] <- COUNTS[, , i1] - diffInCounts + SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] - diffInSumCounts + new_i1_logml <- computePopulationLogml(i1, adjprior, priorTerm) + COUNTS[, , i1] <- COUNTS[, , i1] + diffInCounts + SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] + diffInSumCounts + + i2 <- find(muutokset == -Inf) # Etsit��n populaatiot jotka muuttuneet viime kerran j�lkeen. + i2 <- setdiff(i2, i1) + i2_logml <- POP_LOGML[i2] + + ni2 <- length(i2) + + COUNTS[, , i2] <- COUNTS[, , i2] + repmat(diffInCounts, c(1, 1, ni2)) + SUMCOUNTS[i2, ] <- SUMCOUNTS[i2, ] + repmat(diffInSumCounts, c(ni2, 1)) + new_i2_logml <- computePopulationLogml(i2, adjprior, priorTerm) + COUNTS[, , i2] <- COUNTS[, , i2] - repmat(diffInCounts, c(1, 1, ni2)) + SUMCOUNTS[i2, ] <- SUMCOUNTS[i2, ] - repmat(diffInSumCounts, c(ni2, 1)) + + muutokset[i2] <- new_i1_logml - i1_logml + new_i2_logml - i2_logml + LOGDIFF[ind, ] = muutokset + return(list(muutokset = muutokset, diffInCounts = diffInCounts)) +} + +laskeMuutokset2 <- function(i1, globalRows, data, adjprior, priorTerm) { + # % Palauttaa npops*1 taulun, jossa i:s alkio kertoo, mik� olisi + # % muutos logml:ss�, mik�li korin i1 kaikki yksil�t siirret��n + # % koriin i. + + npops <- size(COUNTS, 3) + muutokset <- zeros(npops, 1) + + i1_logml <- POP_LOGML[i1] + + inds <- find(PARTITION == i1) + ninds <- length(inds) + + if (ninds == 0) { + diffInCounts <- zeros(size(COUNTS, 1), size(COUNTS, 2)) + return() + } + + rows = list() + for (i in 1:ninds) { + ind <- inds(i) + lisa <- globalRows(ind, 1):globalRows(ind, 2) + rows <- c(rows, t(lisa)) + } + + diffInCounts <- computeDiffInCounts( + t(rows), size(COUNTS, 1), size(COUNTS, 2), data + ) + diffInSumCounts <- sum(diffInCounts) + + COUNTS[, , i1] <- COUNTS[, , i1] - diffInCounts + SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] - diffInSumCounts + new_i1_logml <- computePopulationLogml(i1, adjprior, priorTerm) + COUNTS[, , i1] <- COUNTS[, , i1] + diffInCounts + SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] + diffInSumCounts + + i2 <- c(1:i1-1, i1+1:npops) + i2_logml <- POP_LOGML[i2] + + COUNTS[, , i2] <- COUNTS[, , i2] + repmat(diffInCounts, c(1, 1, npops - 1)) + SUMCOUNTS[i2, ] <- SUMCOUNTS[i2, ] + repmat(diffInSumCounts, c(npops - 1, 1)) + new_i2_logml <- computePopulationLogml(i2, adjprior, priorTerm) + COUNTS[, , i2] <- COUNTS[, , i2] - repmat(diffInCounts, c(1, 1, npops - 1)) + SUMCOUNTS[i2, ] <- SUMCOUNTS[i2, ] - repmat(diffInSumCounts, c(npops - 1, 1)) + + muutokset[i2] <- new_i1_logml - i1_logml + new_i2_logml - i2_logml + return(list(muutokset = muutokset, diffInCounts = diffInCounts)) +} + + +laskeMuutokset3 <- function(T2, inds2, globalRows, data, adjprior, priorTerm, i1) { + # Palauttaa length(unique(T2))*npops taulun, jossa (i,j):s alkio + # kertoo, mik� olisi muutos logml:ss�, jos populaation i1 osapopulaatio + # inds2(find(T2==i)) siirret��n koriin j. + + npops <- size(COUNTS, 3) + npops2 <- length(unique(T2)) + muutokset <- zeros(npops2, npops) + + i1_logml = POP_LOGML[i1] + for (pop2 in 1:npops2) { + inds <- inds2[find(T2==pop2)] + ninds <- length(inds); + if (ninds > 0) { + rows <- list() + for (i in 1:ninds) { + ind <- inds[i] + lisa <- globalRows[ind, 1]:globalRows[ind, 2] + rows <- c(rows, t(lisa)) + } + diffInCounts <- computeDiffInCounts( + t(rows), size(COUNTS, 1), size(COUNTS, 2), data + ) + diffInSumCounts <- sum(diffInCounts) + + COUNTS[, , i1] <- COUNTS[, , i1] - diffInCounts + SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] - diffInSumCounts + new_i1_logml <- computePopulationLogml(i1, adjprior, priorTerm) + COUNTS[, , i1] <- COUNTS[, , i1] + diffInCounts + SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] + diffInSumCounts + + i2 <- c(1:i1-1, i1+1:npops) + i2_logml <- t(POP_LOGML[i2]) + + COUNTS[, , i2] <- COUNTS[, , i2] + repmat(diffInCounts, c(1, 1, npops - 1)) + SUMCOUNTS[i2, ] <- SUMCOUNTS[i2, ] + repmat(diffInSumCounts, c(npops - 1, 1)) + new_i2_logml <- t(computePopulationLogml(i2, adjprior, priorTerm)) + COUNTS[, , i2] <- COUNTS[, , i2] - repmat(diffInCounts, c(1, 1, npops - 1)) + SUMCOUNTS[i2, ] <- SUMCOUNTS[i2, ] - repmat(diffInSumCounts, c(npops - 1, 1)) + + muutokset[pop2, i2] <- new_i1_logml - i1_logml + new_i2_logml - i2_logml + } + } + return(muutokset) +} + +laskeMuutokset5 <- function(inds, globalRows, data, adjprior, priorTerm, i1, i2) { + # Palauttaa length(inds)*1 taulun, jossa i:s alkio kertoo, mik� olisi + # muutos logml:ss�, mik�li yksil� i vaihtaisi koria i1:n ja i2:n v�lill�. + + ninds <- length(inds) + muutokset <- zeros(ninds, 1) + + i1_logml <- POP_LOGML[i1] + i2_logml <- POP_LOGML[i2] + + for (i in 1:ninds) { + ind <- inds[i] + if (PARTITION[ind] == i1) { + pop1 <- i1 #mist� + pop2 <- i2 #mihin + } else { + pop1 <- i2 + pop2 <- i1 + } + rows <- globalRows[ind, 1]:globalRows[ind, 2] + diffInCounts <- computeDiffInCounts( + rows, size(COUNTS, 1), size(COUNTS, 2), data + ) + diffInSumCounts <- sum(diffInCounts) + + + + COUNTS[, , pop1] <- COUNTS[, , pop1] - diffInCounts + SUMCOUNTS[pop1, ] <- SUMCOUNTS[pop1, ] - diffInSumCounts + COUNTS[, , pop2] <- COUNTS[, , pop2] + diffInCounts + SUMCOUNTS[pop2, ] <- SUMCOUNTS[pop2, ] + diffInSumCounts + + new_logmls <- computePopulationLogml(c(i1, i2), adjprior, priorTerm) + muutokset[i] <- sum(new_logmls) + + COUNTS[, , pop1] <- COUNTS[, , pop1] + diffInCounts + SUMCOUNTS[pop1, ] <- SUMCOUNTS[pop1, ] + diffInSumCounts + COUNTS[, , pop2] <- COUNTS[, , pop2] - diffInCounts + SUMCOUNTS[pop2, ] <- SUMCOUNTS[pop2, ] - diffInSumCounts + } + + muutokset <- muutokset - i1_logml - i2_logml + return(muutokset) +} \ No newline at end of file diff --git a/R/laskeMuutokset4.R b/R/laskeMuutokset4.R deleted file mode 100644 index 56bcef3..0000000 --- a/R/laskeMuutokset4.R +++ /dev/null @@ -1,39 +0,0 @@ -#' @title Calculate changes? -#' @description Palauttaa npops*npops taulun, jonka alkio (i,j) kertoo, mik?on -#' muutos logml:ss? mikäli populaatiosta i siirretään osuuden verran -#' todennäköisyysmassaa populaatioon j. Mikäli populaatiossa i ei ole mitään -#' siirrettävää, on vastaavassa kohdassa rivi nollia. -#' @param osuus Percentages? -#' @param omaFreqs own Freqs? -#' @param osuusTaulu Percentage table? -#' @param logml log maximum likelihood -#' @param COUNTS COUNTS -#' @export -laskeMuutokset4 <- function (osuus, osuusTaulu, omaFreqs, logml, - COUNTS = matrix(0)) { - npops <- ifelse(is.na(dim(COUNTS)[3]), 1, dim(COUNTS)[3]) - notEmpty <- which(osuusTaulu > 0.005) - muutokset <- zeros(npops) - empties <- !notEmpty - - for (i1 in notEmpty) { - osuusTaulu[i1] <- osuusTaulu[i1] - osuus - for (i2 in c(colon(1, i1 - 1), colon(i1 + 1, npops))) { - osuusTaulu[i2] <- osuusTaulu[i2] + osuus - loggis <- computeIndLogml(omaFreqs, osuusTaulu) - - # Work around Matlab OOB bug - if (i1 > nrow(muutokset)) { - muutokset <- rbind(muutokset, muutokset * 0) - } - if (i2 > ncol(muutokset)) { - muutokset <- cbind(muutokset, muutokset * 0) - } - - muutokset[i1, i2] <- loggis - logml - osuusTaulu[i2] <- osuusTaulu[i2] - osuus - } - osuusTaulu[i1] <- osuusTaulu[i1] + osuus - } - return (muutokset) -} \ No newline at end of file diff --git a/R/laskeOsaDist.R b/R/laskeOsaDist.R new file mode 100644 index 0000000..1ea9495 --- /dev/null +++ b/R/laskeOsaDist.R @@ -0,0 +1,19 @@ +laskeOsaDist <- function(inds2, dist, ninds) { + # % Muodostaa dist vektorista osavektorin, joka sis�lt�� yksil�iden inds2 + # % v�liset et�isyydet. ninds=kaikkien yksil�iden lukum��r�. + + ninds2 <- length(inds2) + apu <- zeros(nchoosek(ninds2, 2), 2) + rivi <- 1 + for (i in 1:ninds2-1) { + for (j in i+1:ninds2) { + apu[rivi, 1] <- inds2[i] + apu[rivi, 2] <- inds2[j] + rivi <- rivi + 1 + } + } + apu <- (apu[, 1]-1) * ninds - apu[, 1] / 2 * + (apu[, 1]-1) + (apu[, 2] - apu[, 1]) + dist2 <- dist(apu) + return(dist2) +} \ No newline at end of file diff --git a/R/matlab2r.R b/R/matlab2r.R new file mode 100644 index 0000000..e041634 --- /dev/null +++ b/R/matlab2r.R @@ -0,0 +1,49 @@ +#' @title Convert Matlab function to R +#' @description Performs basic syntax conversion from Matlab to R +#' @param filename name of the file +#' @param saveOutput if `TRUE`, `filename` is overwritten. Defaults to `FALSE` +#' @return text converted to R, printed to screen or replacing input file +#' @author Waldir Leoncio +#' @export +matlab2r <- function(filename, saveOutput = FALSE) { + # Verification + if (!file.exists(filename)) stop("File not found") + # Reading file into R + txt <- readLines(filename) + # Replacing text + txt <- gsub( + pattern = "function (.+)\\s+=\\s*(.+)\\((.+)\\)", + replacement = "\\2 <- function(\\3) { return(\\1)", + x = txt + ) + # txt <- gsub("\\%\\s*(\\w+)", "# \\1", txt) + txt <- gsub(";", "", txt) + txt <- gsub("for (.+)=(.+)", "for (\\1 in \\2) {", txt) + txt <- gsub("end", "}", txt) + txt <- gsub("(.),(\\S)", "\\1, \\2", txt) + # TODO: replace forms like (:,:) with [, ] + # TODO: add argument to skip some of these rules + txt <- gsub("if (.+)", "if (\\1) {", txt) # FIXME: paste comments after { + txt <- gsub("else$", "} else {", txt) + txt <- gsub("elseif", "} else if", txt) + txt <- gsub("\\(~", "(!", txt) + txt <- gsub("while (.+)", "while \\1 {", txt) + ## Math operators + txt <- gsub("(\\S)\\+(\\S)", "\\1 + \\2", txt) + txt <- gsub("(\\S)\\-(\\S)", "\\1 - \\2", txt) + txt <- gsub("(\\S)\\*(\\S)", "\\1 * \\2", txt) + # Returning converted code + if (!saveOutput) { + return(cat(txt, sep="\n")) + } else { + return( + write.table( + x = txt, + file = filename, + quote = FALSE, + row.names = FALSE, + col.names = FALSE + ) + ) + } +} diff --git a/R/min.R b/R/min.R deleted file mode 100644 index 2d8d617..0000000 --- a/R/min.R +++ /dev/null @@ -1,16 +0,0 @@ -#' @title Minimum (MATLAB version) -#' @description Finds the minimum value for each column of a matrix, potentially returning the indices instead -#' @param X matrix -#' @param indices return indices? -#' @return Either a list or a vector -#' @author Waldir Leoncio -#' @export -min_MATLAB <- function(X, indices = TRUE) { - mins <- apply(X, 2, min) - idx <- sapply(seq_len(ncol(X)), function(x) match(mins[x], X[, x])) - if (indices) { - return(list(mins = mins, idx = idx)) - } else { - return(mins) - } -} \ No newline at end of file diff --git a/R/min_max_MATLAB.R b/R/min_max_MATLAB.R new file mode 100644 index 0000000..7ad9d0e --- /dev/null +++ b/R/min_max_MATLAB.R @@ -0,0 +1,31 @@ +#' @title Minimum (MATLAB version) +#' @description Finds the minimum value for each column of a matrix, potentially returning the indices instead +#' @param X matrix +#' @param indices return indices? +#' @return Either a list or a vector +#' @author Waldir Leoncio +min_MATLAB <- function(X, indices = TRUE) { + mins <- apply(X, 2, min) + idx <- sapply(seq_len(ncol(X)), function(x) match(mins[x], X[, x])) + if (indices) { + return(list(mins = mins, idx = idx)) + } else { + return(mins) + } +} + +#' @title Maximum (MATLAB version) +#' @description Finds the minimum value for each column of a matrix, potentially returning the indices instead +#' @param X matrix +#' @param indices return indices? +#' @return Either a list or a vector +#' @author Waldir Leoncio +max_MATLAB <- function(X, indices = TRUE) { + maxs <- apply(X, 2, max) + idx <- sapply(seq_len(ncol(X)), function(x) match(maxs[x], X[, x])) + if (indices) { + return(list(maxs = maxs, idx = idx)) + } else { + return(maxs) + } +} \ No newline at end of file diff --git a/R/nargin.R b/R/nargin.R new file mode 100644 index 0000000..6ff20cc --- /dev/null +++ b/R/nargin.R @@ -0,0 +1,10 @@ +#' @title Number of function input arguments +#' @description Returns the number of arguments passed to the parent function +#' @return An integer +#' @author Waldir Leoncio +#' @note This function only makes sense inside another function +#' @references https://stackoverflow.com/q/64422780/1169233 +nargin <- function() { + if(sys.nframe() < 2) stop("must be called from inside a function") + length(as.list(sys.call(-1))) - 1 +} diff --git a/R/poistaTyhjatPopulaatiot.R b/R/poistaTyhjatPopulaatiot.R new file mode 100644 index 0000000..b811586 --- /dev/null +++ b/R/poistaTyhjatPopulaatiot.R @@ -0,0 +1,15 @@ +poistaTyhjatPopulaatiot <- function(npops) { + # % Poistaa tyhjentyneet populaatiot COUNTS:ista ja + # % SUMCOUNTS:ista. P�ivitt�� npops:in ja PARTITION:in. + notEmpty <- find(any(SUMCOUNTS, 2)) + COUNTS <<- COUNTS[, , notEmpty] + SUMCOUNTS <<- SUMCOUNTS[notEmpty, ] + LOGDIFF <<- LOGDIFF[, notEmpty] + + for (n in 1:length(notEmpty)) { + apu <- find(PARTITION == notEmpty(n)) + PARTITION(apu) <<- n + } + npops <- length(notEmpty) + return(npops) +} \ No newline at end of file diff --git a/R/returnInOrder.R b/R/returnInOrder.R new file mode 100644 index 0000000..c6a48d8 --- /dev/null +++ b/R/returnInOrder.R @@ -0,0 +1,26 @@ +returnInOrder <- function(inds, pop, globalRows, data, adjprior, priorTerm) { + # % Palauttaa yksil�t j�rjestyksess� siten, ett� ensimm�isen� on + # % se, jonka poistaminen populaatiosta pop nostaisi logml:n + # % arvoa eniten. + + ninds <- length(inds) + apuTaulu <- c(inds, zeros(ninds, 1)) + + for (i in 1:ninds) { + ind <- inds[i] + rows <- globalRows[i, 1]:globalRows[i, 2] + diffInCounts <- computeDiffInCounts( + rows, size[COUNTS, 1], size[COUNTS, 2], data + ) + diffInSumCounts <- sum(diffInCounts) + + COUNTS[ , ,pop] <- COUNTS[ , ,pop] - diffInCounts + SUMCOUNTS[pop, ] <- SUMCOUNTS[pop, ] - diffInSumCounts + apuTaulu[i, 2] <- computePopulationLogml(pop, adjprior, priorTerm) + COUNTS[ , ,pop] <- COUNTS[ , ,pop] + diffInCounts + SUMCOUNTS[pop, ] <- SUMCOUNTS[pop, ] + diffInSumCounts + } + apuTaulu <- sortrows(apuTaulu, 2) + inds <- apuTaulu[ninds:1, 1] + return(inds) +} \ No newline at end of file diff --git a/R/setdiff_MATLAB.R b/R/setdiff_MATLAB.R new file mode 100644 index 0000000..3c2324b --- /dev/null +++ b/R/setdiff_MATLAB.R @@ -0,0 +1,16 @@ +#' @title Set differences of two arrays +#' @description Loosely replicates the behavior of the homonym Matlab function +#' @param A first array +#' @param B second array +#' @param legacy if `TRUE`, preserves the behavior of the setdiff function from MATLAB R2012b and prior releases. (currently not supported) +#' @author Waldir Leoncio +setdiff_MATLAB <- function(A, B, legacy = FALSE) { + if (legacy) message("legacy=TRUE not supported. Ignoring.") + if (is(A, "numeric") & is(B, "numeric")) { + values <- sort(unique(A[is.na(match(A, B))])) + } else if (is(A, "data.frame") & is(B, "data.frame")) { + stop("Not implemented for data frames") + } + # TODO: add support for indices (if necessary) + return(values) +} \ No newline at end of file diff --git a/R/updateGlobalVariables.R b/R/updateGlobalVariables.R new file mode 100644 index 0000000..4b17469 --- /dev/null +++ b/R/updateGlobalVariables.R @@ -0,0 +1,64 @@ +updateGlobalVariables <- function(ind, i2, diffInCounts, adjprior, priorTerm) { + # % Suorittaa globaalien muuttujien muutokset, kun yksil� ind + # % on siirret��n koriin i2. + + i1 <- PARTITION(ind) + PARTITION(ind) <<- i2 + + COUNTS[, , i1] <<- COUNTS[, , i1] - diffInCounts + COUNTS[, , i2] <<- COUNTS[, , i2] + diffInCounts + SUMCOUNTS[i1, ] <<- SUMCOUNTS[i1, ] - sum[diffInCounts] + SUMCOUNTS[i2, ] <<- SUMCOUNTS[i2, ] + sum[diffInCounts] + + POP_LOGML[c(i1, i2)] <<- computePopulationLogml( + c(i1, i2), adjprior, priorTerm + ) + + LOGDIFF[, c(i1, i2)] <<- -Inf + inx <- c(find(PARTITION == i1), find(PARTITION==i2)) + LOGDIFF[inx, ] <<- -Inf +} + +updateGlobalVariables2 <- function(i1, i2, diffInCounts, adjprior, priorTerm) { + # % Suorittaa globaalien muuttujien muutokset, kun kaikki + # % korissa i1 olevat yksil�t siirret��n koriin i2. + + inds <- find(PARTITION == i1) + PARTITION(inds) <<- i2 + + COUNTS[, , i1] <<- COUNTS[, , i1] - diffInCounts + COUNTS[, , i2] <<- COUNTS[, , i2] + diffInCounts + SUMCOUNTS[i1, ] <<- SUMCOUNTS[i1, ] - sum[diffInCounts] + SUMCOUNTS[i2, ] <<- SUMCOUNTS[i2, ] + sum[diffInCounts] + + POP_LOGML[i1] <- 0 + POP_LOGML[i2] <- computePopulationLogml(i2, adjprior, priorTerm) + + LOGDIFF[, c(i1, i2)] <- -Inf + inx <- c(find(PARTITION == i1), find(PARTITION == i2)) + LOGDIFF[inx, ] <- -Inf +} + +updateGlobalVariables3 <- function( + muuttuvat, diffInCounts, adjprior, priorTerm, i2 +) { + # % Suorittaa globaalien muuttujien p�ivitykset, kun yksil�t 'muuttuvat' + # % siirret��n koriin i2. Ennen siirtoa yksil�iden on kuuluttava samaan + # % koriin. + + i1 <- PARTITION[muuttuvat(1)] + PARTITION[muuttuvat] <<- i2 + + COUNTS[, , i1] <<- COUNTS[, , i1] - diffInCounts + COUNTS[, , i2] <<- COUNTS[, , i2] + diffInCounts + SUMCOUNTS[i1, ] <<- SUMCOUNTS[i1, ] - sum[diffInCounts] + SUMCOUNTS[i2, ] <<- SUMCOUNTS[i2, ] + sum[diffInCounts] + + POP_LOGML[c(i1, i2)] <<- computePopulationLogml( + c(i1, i2), adjprior, priorTerm + ) + + LOGDIFF[, c(i1, i2)] <<- -Inf + inx <- c(find(PARTITION == i1), find(PARTITION == i2)) + LOGDIFF[inx, ] <<- -Inf +} diff --git a/R/writeMixtureInfo.R b/R/writeMixtureInfo.R index ca8efea..ec5120b 100644 --- a/R/writeMixtureInfo.R +++ b/R/writeMixtureInfo.R @@ -10,16 +10,10 @@ #' @param partitionSummary partitionSummary #' @param popnames popnames #' @param fixedK fixedK -#' @param PARTITION PARTITION -#' @param COUNTS COUNTS -#' @param SUMCOUNTS SUMCOUNTS -#' @param LOGDIFF LOGDIFF #' @export writeMixtureInfo <- function( - logml, rowsFromInd, data, adjprior, priorTerm, outPutFile, inputFile, partitionSummary, popnames, fixedK, PARTITION, COUNTS, SUMCOUNTS, - LOGDIFF + logml, rowsFromInd, data, adjprior, priorTerm, outPutFile, inputFile, partitionSummary, popnames, fixedK ) { - changesInLogml <- list() ninds <- size(data, 1) / rowsFromInd npops <- size(COUNTS, 3) @@ -30,7 +24,6 @@ writeMixtureInfo <- function( fid <- load(outPutFile) } else { fid <- -1 - message('Diverting output to baps4_output.baps') # TODO: replace sink with option that will record input and output sink('baps4_output.baps', split=TRUE) # save in text anyway. } diff --git a/data/ExamplesDataFormatting/Example MLST DNA sequence data in concatenated Excel format.xls b/inst/ext/ExamplesDataFormatting/Example MLST DNA sequence data in concatenated Excel format.xls similarity index 100% rename from data/ExamplesDataFormatting/Example MLST DNA sequence data in concatenated Excel format.xls rename to inst/ext/ExamplesDataFormatting/Example MLST DNA sequence data in concatenated Excel format.xls diff --git a/data/ExamplesDataFormatting/Example baseline data in GENEPOP format for Trained clustering.txt b/inst/ext/ExamplesDataFormatting/Example baseline data in GENEPOP format for Trained clustering.txt similarity index 100% rename from data/ExamplesDataFormatting/Example baseline data in GENEPOP format for Trained clustering.txt rename to inst/ext/ExamplesDataFormatting/Example baseline data in GENEPOP format for Trained clustering.txt diff --git a/data/ExamplesDataFormatting/Example coordinates for the spatial clustering of DNA sequences.txt b/inst/ext/ExamplesDataFormatting/Example coordinates for the spatial clustering of DNA sequences.txt similarity index 100% rename from data/ExamplesDataFormatting/Example coordinates for the spatial clustering of DNA sequences.txt rename to inst/ext/ExamplesDataFormatting/Example coordinates for the spatial clustering of DNA sequences.txt diff --git a/data/ExamplesDataFormatting/Example coordinates for the spatial clustering.txt b/inst/ext/ExamplesDataFormatting/Example coordinates for the spatial clustering.txt similarity index 100% rename from data/ExamplesDataFormatting/Example coordinates for the spatial clustering.txt rename to inst/ext/ExamplesDataFormatting/Example coordinates for the spatial clustering.txt diff --git a/data/ExamplesDataFormatting/Example data in BAPS format for clustering of diploid individuals.txt b/inst/ext/ExamplesDataFormatting/Example data in BAPS format for clustering of diploid individuals.txt similarity index 100% rename from data/ExamplesDataFormatting/Example data in BAPS format for clustering of diploid individuals.txt rename to inst/ext/ExamplesDataFormatting/Example data in BAPS format for clustering of diploid individuals.txt diff --git a/data/ExamplesDataFormatting/Example data in BAPS format for clustering of haploid individuals.txt b/inst/ext/ExamplesDataFormatting/Example data in BAPS format for clustering of haploid individuals.txt similarity index 100% rename from data/ExamplesDataFormatting/Example data in BAPS format for clustering of haploid individuals.txt rename to inst/ext/ExamplesDataFormatting/Example data in BAPS format for clustering of haploid individuals.txt diff --git a/data/ExamplesDataFormatting/Example data in BAPS format for clustering of tetraploid individuals.txt b/inst/ext/ExamplesDataFormatting/Example data in BAPS format for clustering of tetraploid individuals.txt similarity index 100% rename from data/ExamplesDataFormatting/Example data in BAPS format for clustering of tetraploid individuals.txt rename to inst/ext/ExamplesDataFormatting/Example data in BAPS format for clustering of tetraploid individuals.txt diff --git a/data/ExamplesDataFormatting/Example data in BAPS format for group-wise clustering of diploid individuals.txt b/inst/ext/ExamplesDataFormatting/Example data in BAPS format for group-wise clustering of diploid individuals.txt similarity index 100% rename from data/ExamplesDataFormatting/Example data in BAPS format for group-wise clustering of diploid individuals.txt rename to inst/ext/ExamplesDataFormatting/Example data in BAPS format for group-wise clustering of diploid individuals.txt diff --git a/data/ExamplesDataFormatting/Example data in BAPS format for group-wise clustering of haploid individuals.txt b/inst/ext/ExamplesDataFormatting/Example data in BAPS format for group-wise clustering of haploid individuals.txt similarity index 100% rename from data/ExamplesDataFormatting/Example data in BAPS format for group-wise clustering of haploid individuals.txt rename to inst/ext/ExamplesDataFormatting/Example data in BAPS format for group-wise clustering of haploid individuals.txt diff --git a/data/ExamplesDataFormatting/Example data in BAPS format for group-wise clustering of tetraploid individuals.txt b/inst/ext/ExamplesDataFormatting/Example data in BAPS format for group-wise clustering of tetraploid individuals.txt similarity index 100% rename from data/ExamplesDataFormatting/Example data in BAPS format for group-wise clustering of tetraploid individuals.txt rename to inst/ext/ExamplesDataFormatting/Example data in BAPS format for group-wise clustering of tetraploid individuals.txt diff --git a/data/ExamplesDataFormatting/Example data in FASTA format for clustering of haploid individuals.fasta b/inst/ext/ExamplesDataFormatting/Example data in FASTA format for clustering of haploid individuals.fasta similarity index 100% rename from data/ExamplesDataFormatting/Example data in FASTA format for clustering of haploid individuals.fasta rename to inst/ext/ExamplesDataFormatting/Example data in FASTA format for clustering of haploid individuals.fasta diff --git a/data/ExamplesDataFormatting/Example of a BAPS formatted diploid sequence file for clustering with linked loci.txt b/inst/ext/ExamplesDataFormatting/Example of a BAPS formatted diploid sequence file for clustering with linked loci.txt similarity index 100% rename from data/ExamplesDataFormatting/Example of a BAPS formatted diploid sequence file for clustering with linked loci.txt rename to inst/ext/ExamplesDataFormatting/Example of a BAPS formatted diploid sequence file for clustering with linked loci.txt diff --git a/data/ExamplesDataFormatting/Example of a BAPS formatted haploid sequence file for clustering with linked loci.txt b/inst/ext/ExamplesDataFormatting/Example of a BAPS formatted haploid sequence file for clustering with linked loci.txt similarity index 100% rename from data/ExamplesDataFormatting/Example of a BAPS formatted haploid sequence file for clustering with linked loci.txt rename to inst/ext/ExamplesDataFormatting/Example of a BAPS formatted haploid sequence file for clustering with linked loci.txt diff --git a/data/ExamplesDataFormatting/Example of a BAPS formatted tetraploid sequence file for clustering with linked loci.txt b/inst/ext/ExamplesDataFormatting/Example of a BAPS formatted tetraploid sequence file for clustering with linked loci.txt similarity index 100% rename from data/ExamplesDataFormatting/Example of a BAPS formatted tetraploid sequence file for clustering with linked loci.txt rename to inst/ext/ExamplesDataFormatting/Example of a BAPS formatted tetraploid sequence file for clustering with linked loci.txt diff --git a/data/ExamplesDataFormatting/Example of a fasta formatted sequence file for gene recA.txt b/inst/ext/ExamplesDataFormatting/Example of a fasta formatted sequence file for gene recA.txt similarity index 100% rename from data/ExamplesDataFormatting/Example of a fasta formatted sequence file for gene recA.txt rename to inst/ext/ExamplesDataFormatting/Example of a fasta formatted sequence file for gene recA.txt diff --git a/data/ExamplesDataFormatting/Example of a file specifying gene boundaries for a concatenated sequence in BAPS format.txt b/inst/ext/ExamplesDataFormatting/Example of a file specifying gene boundaries for a concatenated sequence in BAPS format.txt similarity index 100% rename from data/ExamplesDataFormatting/Example of a file specifying gene boundaries for a concatenated sequence in BAPS format.txt rename to inst/ext/ExamplesDataFormatting/Example of a file specifying gene boundaries for a concatenated sequence in BAPS format.txt diff --git a/data/ExamplesDataFormatting/Example of a partition file for admixture analysis based on user-specified populations.txt b/inst/ext/ExamplesDataFormatting/Example of a partition file for admixture analysis based on user-specified populations.txt similarity index 100% rename from data/ExamplesDataFormatting/Example of a partition file for admixture analysis based on user-specified populations.txt rename to inst/ext/ExamplesDataFormatting/Example of a partition file for admixture analysis based on user-specified populations.txt diff --git a/data/ExamplesDataFormatting/Example of an MLST profile file for 6 genes.txt b/inst/ext/ExamplesDataFormatting/Example of an MLST profile file for 6 genes.txt similarity index 100% rename from data/ExamplesDataFormatting/Example of an MLST profile file for 6 genes.txt rename to inst/ext/ExamplesDataFormatting/Example of an MLST profile file for 6 genes.txt diff --git a/data/ExamplesDataFormatting/Example of hierBAPS input in FASTA format.fa b/inst/ext/ExamplesDataFormatting/Example of hierBAPS input in FASTA format.fa similarity index 100% rename from data/ExamplesDataFormatting/Example of hierBAPS input in FASTA format.fa rename to inst/ext/ExamplesDataFormatting/Example of hierBAPS input in FASTA format.fa diff --git a/data/ExamplesDataFormatting/Example of hierBAPS input in XLS format.xls b/inst/ext/ExamplesDataFormatting/Example of hierBAPS input in XLS format.xls similarity index 100% rename from data/ExamplesDataFormatting/Example of hierBAPS input in XLS format.xls rename to inst/ext/ExamplesDataFormatting/Example of hierBAPS input in XLS format.xls diff --git a/data/ExamplesDataFormatting/Example of sample population index file to be used with BAPS formatted data.txt b/inst/ext/ExamplesDataFormatting/Example of sample population index file to be used with BAPS formatted data.txt similarity index 100% rename from data/ExamplesDataFormatting/Example of sample population index file to be used with BAPS formatted data.txt rename to inst/ext/ExamplesDataFormatting/Example of sample population index file to be used with BAPS formatted data.txt diff --git a/data/ExamplesDataFormatting/Example of sample population names file to be used with BAPS formatted data.txt b/inst/ext/ExamplesDataFormatting/Example of sample population names file to be used with BAPS formatted data.txt similarity index 100% rename from data/ExamplesDataFormatting/Example of sample population names file to be used with BAPS formatted data.txt rename to inst/ext/ExamplesDataFormatting/Example of sample population names file to be used with BAPS formatted data.txt diff --git a/data/ExamplesDataFormatting/Example pre-grouped sample data in GENEPOP format for Trained clustering.txt b/inst/ext/ExamplesDataFormatting/Example pre-grouped sample data in GENEPOP format for Trained clustering.txt similarity index 100% rename from data/ExamplesDataFormatting/Example pre-grouped sample data in GENEPOP format for Trained clustering.txt rename to inst/ext/ExamplesDataFormatting/Example pre-grouped sample data in GENEPOP format for Trained clustering.txt diff --git a/data/ExamplesDataFormatting/Example sample data in GENEPOP format for Trained clustering.txt b/inst/ext/ExamplesDataFormatting/Example sample data in GENEPOP format for Trained clustering.txt similarity index 100% rename from data/ExamplesDataFormatting/Example sample data in GENEPOP format for Trained clustering.txt rename to inst/ext/ExamplesDataFormatting/Example sample data in GENEPOP format for Trained clustering.txt diff --git a/data/ExamplesDataFormatting/Example test data in XLS format for Trained clustering.xls b/inst/ext/ExamplesDataFormatting/Example test data in XLS format for Trained clustering.xls similarity index 100% rename from data/ExamplesDataFormatting/Example test data in XLS format for Trained clustering.xls rename to inst/ext/ExamplesDataFormatting/Example test data in XLS format for Trained clustering.xls diff --git a/data/ExamplesDataFormatting/Example training data in XLS format for Trained clustering.xls b/inst/ext/ExamplesDataFormatting/Example training data in XLS format for Trained clustering.xls similarity index 100% rename from data/ExamplesDataFormatting/Example training data in XLS format for Trained clustering.xls rename to inst/ext/ExamplesDataFormatting/Example training data in XLS format for Trained clustering.xls diff --git a/data/ExamplesDataFormatting/Notes.md b/inst/ext/ExamplesDataFormatting/Notes.md similarity index 100% rename from data/ExamplesDataFormatting/Notes.md rename to inst/ext/ExamplesDataFormatting/Notes.md diff --git a/man/admixture_initialization.Rd b/man/admixture_initialization.Rd new file mode 100644 index 0000000..c10e4f8 --- /dev/null +++ b/man/admixture_initialization.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/admixture_initialization.R +\name{admixture_initialization} +\alias{admixture_initialization} +\title{Seuraavat kolme funktiota liittyvat alkupartition muodostamiseen.} +\usage{ +admixture_initialization(data_matrix, nclusters, Z) +} +\arguments{ +\item{data_matrix}{data_matrix} + +\item{nclusters}{ncluster} + +\item{Z}{Z} +} +\description{ +Seuraavat kolme funktiota liittyvat alkupartition muodostamiseen. +} diff --git a/man/laskeMuutokset4.Rd b/man/laskeMuutokset4.Rd index 7c16a19..e3382de 100644 --- a/man/laskeMuutokset4.Rd +++ b/man/laskeMuutokset4.Rd @@ -1,8 +1,8 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/laskeMuutokset4.R +% Please edit documentation in R/laskeMuutokset12345.R \name{laskeMuutokset4} \alias{laskeMuutokset4} -\title{Calculate changes?} +\title{Calculate changes (?)} \usage{ laskeMuutokset4(osuus, osuusTaulu, omaFreqs, logml, COUNTS = matrix(0)) } @@ -20,6 +20,6 @@ laskeMuutokset4(osuus, osuusTaulu, omaFreqs, logml, COUNTS = matrix(0)) \description{ Palauttaa npops*npops taulun, jonka alkio (i,j) kertoo, mik?on muutos logml:ss? mikäli populaatiosta i siirretään osuuden verran -todennäköisyysmassaa populaatioon j. Mikäli populaatiossa i ei ole mitään +todennäköisyysmassaa populaatioon j. Mikäli populaatiossa i ei ole mitään siirrettävää, on vastaavassa kohdassa rivi nollia. } diff --git a/man/max_MATLAB.Rd b/man/max_MATLAB.Rd new file mode 100644 index 0000000..d7fbdf7 --- /dev/null +++ b/man/max_MATLAB.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/min_max_MATLAB.R +\name{max_MATLAB} +\alias{max_MATLAB} +\title{Maximum (MATLAB version)} +\usage{ +max_MATLAB(X, indices = TRUE) +} +\arguments{ +\item{X}{matrix} + +\item{indices}{return indices?} +} +\value{ +Either a list or a vector +} +\description{ +Finds the minimum value for each column of a matrix, potentially returning the indices instead +} +\author{ +Waldir Leoncio +} diff --git a/man/min_MATLAB.Rd b/man/min_MATLAB.Rd index 9376fed..9825e90 100644 --- a/man/min_MATLAB.Rd +++ b/man/min_MATLAB.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/min.R +% Please edit documentation in R/min_max_MATLAB.R \name{min_MATLAB} \alias{min_MATLAB} \title{Minimum (MATLAB version)} diff --git a/man/nargin.Rd b/man/nargin.Rd new file mode 100644 index 0000000..a7922a6 --- /dev/null +++ b/man/nargin.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/nargin.R +\name{nargin} +\alias{nargin} +\title{Number of function input arguments} +\usage{ +nargin() +} +\value{ +An integer +} +\description{ +Returns the number of arguments passed to the parent function +} +\note{ +This function only makes sense inside another function +} +\references{ +https://stackoverflow.com/q/64422780/1169233 +} +\author{ +Waldir Leoncio +} diff --git a/man/setdiff_MATLAB.Rd b/man/setdiff_MATLAB.Rd new file mode 100644 index 0000000..dcc6c00 --- /dev/null +++ b/man/setdiff_MATLAB.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/setdiff_MATLAB.R +\name{setdiff_MATLAB} +\alias{setdiff_MATLAB} +\title{Set differences of two arrays} +\usage{ +setdiff_MATLAB(A, B, legacy = FALSE) +} +\arguments{ +\item{A}{first array} + +\item{B}{second array} + +\item{legacy}{if `TRUE`, preserves the behavior of the setdiff function from MATLAB R2012b and prior releases. (currently not supported)} +} +\description{ +Loosely replicates the behavior of the homonym Matlab function +} +\author{ +Waldir Leoncio +} diff --git a/man/writeMixtureInfo.Rd b/man/writeMixtureInfo.Rd index e973db9..f30bde0 100644 --- a/man/writeMixtureInfo.Rd +++ b/man/writeMixtureInfo.Rd @@ -14,11 +14,7 @@ writeMixtureInfo( inputFile, partitionSummary, popnames, - fixedK, - PARTITION, - COUNTS, - SUMCOUNTS, - LOGDIFF + fixedK ) } \arguments{ @@ -41,14 +37,6 @@ writeMixtureInfo( \item{popnames}{popnames} \item{fixedK}{fixedK} - -\item{PARTITION}{PARTITION} - -\item{COUNTS}{COUNTS} - -\item{SUMCOUNTS}{SUMCOUNTS} - -\item{LOGDIFF}{LOGDIFF} } \description{ Writes information about the mixture diff --git a/tests/testthat/test-convertedBaseFunctions.R b/tests/testthat/test-convertedBaseFunctions.R index bfc3fe8..52d14f8 100644 --- a/tests/testthat/test-convertedBaseFunctions.R +++ b/tests/testthat/test-convertedBaseFunctions.R @@ -158,18 +158,18 @@ test_that("find works as expected", { }) test_that("sortrows works as expected", { - mx <- matrix(c(3, 2, 2, 1, 1, 10, 0, pi), 4) - expect_equal(sortrows(mx), matrix(c(1, 2, 2, 3, pi, 10, 0, 1), 4)) - expect_equal(sortrows(mx, 2), matrix(c(2, 3, 1, 2, 0, 1, pi, 10), 4)) - expect_equal(sortrows(mx, 1:2), mx[order(mx[, 1], mx[, 2]), ]) + mx <- matrix(c(3, 2, 2, 1, 1, 10, 0, pi), 4) + expect_equal(sortrows(mx), matrix(c(1, 2, 2, 3, pi, 10, 0, 1), 4)) + expect_equal(sortrows(mx, 2), matrix(c(2, 3, 1, 2, 0, 1, pi, 10), 4)) + expect_equal(sortrows(mx, 1:2), mx[order(mx[, 1], mx[, 2]), ]) }) test_that("cell works as expected", { - expect_equal(cell(0), array(dim = c(0, 0))) - expect_equal(cell(1), array(dim = c(1, 1))) - expect_equal(cell(2), array(dim = c(2, 2))) - expect_equal(cell(3, 4), array(dim = c(3, 4))) - expect_equal(cell(5, 7, 6), array(dim = c(5, 7, 6))) + expect_equivalent(cell(0), array(0, dim = c(0, 0))) + expect_equivalent(cell(1), array(0, dim = c(1, 1))) + expect_equivalent(cell(2), array(0, dim = c(2, 2))) + expect_equivalent(cell(3, 4), array(0, dim = c(3, 4))) + expect_equivalent(cell(5, 7, 6), array(0, dim = c(5, 7, 6))) }) test_that("blanks works as expected", { @@ -201,4 +201,44 @@ test_that("isspace works as expected", { X <- '\t a b\tcde f' expect_identical(isspace(chr), c(0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0)) expect_identical(isspace(X), c(1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0)) +}) + +test_that("nargin works correctly", { + addme <- function(a, b) { + if (nargin() == 2) { + c <- a + b + } else if (nargin() == 1) { + c <- a + a + } else { + c <- 0 + } + return(c) + } + expect_equal(addme(13, 42), 55) + expect_equal(addme(13), 26) + expect_equal(addme(), 0) +}) + +test_that("setdiff works as expected", { + A <- c(3, 6, 2, 1, 5, 1, 1) + B <- c(2, 4, 6) + C <- c(1, 3, 5) + # expect_equal(setdiff_MATLAB(A, B), C) # TODO: export setdiff_MATLAB + A <- data.frame( + Var1 = 1:5, + Var2 = LETTERS[1:5], + Var3 = c(FALSE, TRUE, FALSE, TRUE, FALSE) + ) + B <- data.frame( + Var1 = seq(1, 9, by = 2), + Var2 = LETTERS[seq(1, 9, by = 2)], + Var3 = rep(FALSE, 5) + ) + C <- data.frame( + Var1 = c(2, 4), + Var2 = c('B', 'D'), + Var3 = c(TRUE, TRUE) + ) + # expect_equal(setdiff_MATLAB(A, B), C) # TODO: implement for data frames + # TODO: add more examples from https://se.mathworks.com/help/matlab/ref/double.setdiff.html;jsessionid=0d8d42582d4d299b8224403899f1 }) \ No newline at end of file diff --git a/tests/testthat/test-greedyMix.R b/tests/testthat/test-greedyMix.R index 9a1c05a..f1faf53 100644 --- a/tests/testthat/test-greedyMix.R +++ b/tests/testthat/test-greedyMix.R @@ -1,14 +1,10 @@ -# library(devtools)#TEMP -library(testthat)#TEMP -# library(rBAPS)#TEMP - context("Opening files on greedyMix") -greedyMix( - tietue = "data/ExamplesDataFormatting/Example baseline data in GENEPOP format for Trained clustering.txt", - format = "GenePop", - savePreProcessed = FALSE -) +# greedyMix( +# tietue = "inst/ext/ExamplesDataFormatting/Example baseline data in GENEPOP format for Trained clustering.txt", +# format = "GenePop", +# savePreProcessed = FALSE +# ) context("Linkage")