Merge work on greedyMix into develop

2021-03-31 10:40:29 +02:00 · 2021-03-31 10:40:29 +02:00 · 3fd1cac27b
commit 3fd1cac27b
parent bdf512fca7 b35053fd3a
17 changed files with 171 additions and 96 deletions
--- a/2
+++ b/2
@ -1,6 +1,6 @@
 Package: rBAPS
 Title: Bayesian Analysis of Population Structure
-Version: 0.0.0.9001
+Version: 0.0.0.9002
 Date: 2020-11-09
 Authors@R:
    c(
--- a/R/cell.R
+++ b/R/cell.R
@ -2,9 +2,14 @@
 #' @description Creates an array of zeros
 #' @param n a the first dimension (or both, if sz is not passed)
 #' @param sz the second dimension (or 1st and 2nd, if not passed)
+#' @param expandable if TRUE, output is a list (so it can take different
+#' lengths)
 #' @param ... Other dimensions
 #' @return An array of zeroes with the dimensions passed on call
-cell <- function(n, sz = c(n, n), ...) {
+cell <- function(n, sz = c(n, n), expandable=FALSE, ...) {
+	if (expandable) {
+		return(vector("list", length = n))
+	}
 	if (length(sz) == 1 & missing(...)) {
 		return(array(0, dim = c(n, sz)))
 	} else if (length(sz) == 2) {
--- a/R/computeDiffInCounts.R
+++ b/R/computeDiffInCounts.R
@ -4,9 +4,9 @@ computeDiffInCounts <- function(rows, max_noalle, nloci, data) {
 	# % riveill<6C> rows. rows pit<69><74> olla vaakavektori.

 	diffInCounts <- zeros(max_noalle, nloci)
-	for (i in rows) {
+	for (i in seq_len(nrow(data)) ) {
 		row <- data[i, ]
-		notEmpty <- find(row>=0)
+		notEmpty <- as.matrix(find(row>=0))

 		if (length(notEmpty) > 0) {
 			diffInCounts[row(notEmpty) + (notEmpty - 1) * max_noalle] <-
--- a/R/computePopulationLogml.R
+++ b/R/computePopulationLogml.R
@ -1,18 +1,26 @@
 computePopulationLogml <- function(pops, adjprior, priorTerm) {
 	# Palauttaa length(pops)*1 taulukon, jossa on laskettu korikohtaiset

+	# ======================================================== #
+	# Limiting COUNTS size                                     #
+	# ======================================================== #
+	COUNTS <- COUNTS[seq_len(nrow(adjprior)), seq_len(ncol(adjprior)), pops, drop=FALSE]
+
 	x <- size(COUNTS, 1)
 	y <- size(COUNTS, 2)
 	z <- length(pops)

-	popLogml <- squeeze(
-		# FIXME: assumes COUNTS has 3 dims. Where does this come from?
+	# ======================================================== #
+	# Computation                                              #
+	# ======================================================== #
+	isarray <- length(dim(repmat(adjprior, c(1, 1, length(pops))))) > 2
+	term1 <- squeeze(
 		sum(
 			sum(
 				reshape(
 					lgamma(
 						repmat(adjprior, c(1, 1, length(pops))) +
-							COUNTS[, , pops]
+							COUNTS[seq_len(nrow(adjprior)), seq_len(ncol(adjprior)), pops, drop=!isarray]
 					),
 					c(x, y, z)
 				),
@ -20,6 +28,8 @@ computePopulationLogml <- function(pops, adjprior, priorTerm) {
 			),
 			2
 		)
-	) - sum(lgamma(1 + SUMCOUNTS[pops, ]), 2) - priorTerm
+	)
+	if (is.null(priorTerm)) priorTerm <- 0
+	popLogml <- term1 - sum(lgamma(1 + SUMCOUNTS[pops, ]), 2) - priorTerm
 	return(popLogml)
 }
--- a/R/find.R
+++ b/R/find.R
@ -1,10 +1,15 @@
 #' @title Find indices and values of nonzero elements
 #' @description Emulates behavior of `find`
 #' @param x object or logic operation on an object
-find <- function(x) {
+#' @param sort sort output?
+find <- function(x, sort=TRUE) {
 	if (is.logical(x)) {
-		return(which(x))
+		out <- which(x)
 	} else {
-		return(which(x > 0))
+		out <- which(x > 0)
 	}
+	if (sort) {
+		out <- sort(out)
+	}
+	return(out)
 }
--- a/R/globals.R
+++ b/R/globals.R
@ -1,8 +1,8 @@
 COUNTS <- array(0, dim=c(100, 100, 100))
 SUMCOUNTS <- array(0, dim=c(100, 100))
-PARTITION <- vector()
-POP_LOGML <- vector()
-LOGDIFF <- vector()
+PARTITION <- array(1, dim=c(100))
+POP_LOGML <-  array(1, dim=c(100))
+LOGDIFF <-  array(1, dim=c(100, 100))
 # If handling globas break, try other ideas from https://stackoverflow.com/a/65252740/1169233


--- a/R/greedyMix.R
+++ b/R/greedyMix.R
@ -68,19 +68,20 @@ greedyMix <- function(
 			# 	fprintf(1,'Data: %s\n',[pathname filename]);
 			# end

-			data <- read.delim(pathname_filename) # TODO: discover delimiter
+			data <- read.delim(pathname_filename, header = FALSE, sep = " ")
+			data <- as.matrix(data)
 			ninds <- testaaOnkoKunnollinenBapsData(data)  # testing
 			if (ninds == 0) stop('Incorrect Data-file')

 			# ASK: remove?
 			# h0 = findobj('Tag','filename1_text');
 			# set(h0,'String',filename); clear h0;
-			cat(
+			message(
 				'When using data which are in BAPS-format,',
 				'you can specify the sampling populations of the',
 				'individuals by giving two additional files:',
 				'one containing the names of the populations,',
-				'the other containing the indices of the first',
+				'the other containing the indices of the first ',
 				'individuals of the populations.'
 			)
 			input_pops <- inputdlg(
@ -104,8 +105,17 @@ greedyMix <- function(
 				popnames <- ""
 			}

-			# [data, rowsFromInd, alleleCodes, noalle, adjprior, priorTerm] = handleData(data); # TODO: translate this function
-			# [Z,dist] = newGetDistances(data,rowsFromInd); # TODO: translate
+			temp_handleData <- handleData(data)
+			data <- temp_handleData$newData
+			rowsFromInd <- temp_handleData$rowsFromInd
+			alleleCodes <- temp_handleData$alleleCodes
+			noalle <- temp_handleData$noalle
+			adjprior <- temp_handleData$adjprior
+			priorTerm <- temp_handleData$priorTerm
+			Z_dist <- newGetDistances(data,rowsFromInd)
+			Z <- Z_dist$Z
+			dist <- Z_dist$dist
+			rm(temp_handleData, Z_dist)
 			if (is.null(savePreProcessed)) {
 				save_preproc <- questdlg(
 					quest = 'Do you wish to save pre-processed data?',
@ -307,7 +317,7 @@ greedyMix <- function(
 		# npops <- logml_npops_partitionSummary$npops
 		# partitionSummary <- logml_npops_partitionSummary$partitionSummary
 	} else {
-		logml_npops_partitionSummary <- indMix(c) # TODO: translate
+		logml_npops_partitionSummary <- indMix(c)
 		logml <- logml_npops_partitionSummary$logml
 		npops <- logml_npops_partitionSummary$npops
 		partitionSummary <- logml_npops_partitionSummary$partitionSummary
--- a/R/handleData.R
+++ b/R/handleData.R
@ -20,7 +20,7 @@ handleData <- function(raw_data) {
 	# koodi pienimm?ksi koodiksi, joka isompi kuin mik??n k?yt?ss?oleva koodi.
 	# T?m?n j?lkeen funktio muuttaa alleelikoodit siten, ett?yhden lokuksen j
 	# koodit saavat arvoja v?lill?1,...,noalle(j).
-	data <- raw_data
+	data <- as.matrix(raw_data)
 	nloci <- size(raw_data, 2) - 1

 	dataApu <- data[, 1:nloci]
@ -35,26 +35,31 @@ handleData <- function(raw_data) {
 	# isoinAlleeli <- []

 	noalle <- zeros(1, nloci)
-	alleelitLokuksessa <- cell(nloci, 1)
+	alleelitLokuksessa <- cell(nloci, 1, expandable=TRUE)
 	for (i in 1:nloci) {
 		alleelitLokuksessaI <- unique(data[, i])
-		alleelitLokuksessaI_pos <- find(alleelitLokuksessaI >= 0)
-		alleelitLokuksessa[i, 1] <- ifelse(
-			test = length(alleelitLokuksessaI_pos) > 0,
-			yes  = alleelitLokuksessaI[alleelitLokuksessaI_pos],
-			no   = 0
+		alleelitLokuksessa[[i]] <- sort(alleelitLokuksessaI[
+			find(
+				alleelitLokuksessaI >= 0
 			)
-		noalle[i] <- length(alleelitLokuksessa[i, 1])
+		])
+		noalle[i] <- length(alleelitLokuksessa[[i]])
 	}
 	alleleCodes <- zeros(max(noalle), nloci)
 	for (i in 1:nloci) {
-		alleelitLokuksessaI <- alleelitLokuksessa[i, 1]
+		alleelitLokuksessaI <- alleelitLokuksessa[[i]]
 		puuttuvia <- max(noalle) - length(alleelitLokuksessaI)
 		alleleCodes[, i] <- as.matrix(
 			c(alleelitLokuksessaI, zeros(puuttuvia, 1))
 		)
 	}

+	for (loc in seq_len(nloci)) {
+		for (all in seq_len(noalle[loc])) {
+			data[find(data[, loc] == alleleCodes[all, loc]), loc] <- all
+		}
+	}
+
 	nind <- max(data[, ncol(data)])
 	nrows <- size(data, 1)
 	ncols <- size(data, 2)
--- a/R/indMix.R
+++ b/R/indMix.R
@ -21,18 +21,13 @@ indMix <- function(c, npops, dispText) {

 	rm(c)
 	nargin <- length(as.list(match.call())) - 1
-
 	if (nargin < 2) {
 		dispText <- 1
 		npopstext <- matrix()
 		ready <- FALSE
 		teksti <- 'Input upper bound to the number of populations (possibly multiple values)' # TODO: add "likely ncol(Z) values"?
 		while (!ready) {
-			npopstextExtra <- inputdlg(
-				teksti,
-				1,
-				'20'
-			)
+			npopstextExtra <- inputdlg(teksti, 1, '20')
 			if (isempty(npopstextExtra)) { # Painettu Cancel:ia
 				return()
 			}
@ -52,7 +47,7 @@ indMix <- function(c, npops, dispText) {
 		} else {
 			npopsTaulu <- as.numeric(npopstext)
 			ykkoset <- find(npopsTaulu == 1)
-			npopsTaulu[ykkoset] <- list() # Mik<69>li ykk<6B>si<73> annettu yl<79>rajaksi, ne poistetaan.
+			npopsTaulu[ykkoset] <- NA # Mik<69>li ykk<6B>si<73> annettu yl<79>rajaksi, ne poistetaan (if ones are given as an upper limit, they are deleted)
 			if (isempty(npopsTaulu)) {
 				logml <- 1
 				partitionSummary <- 1
@ -79,10 +74,11 @@ indMix <- function(c, npops, dispText) {
 		npops <- npopsTaulu[[run]]
 		if (dispText) {
 			dispLine()
-			print(
+			cat(
 				paste0(
 					'Run ', as.character(run), '/', as.character(nruns),
-					', maximum number of populations ', as.character(npops), '.'
+					', maximum number of populations ', as.character(npops),
+					'.\n'
 				)
 			)
 		}
@ -115,12 +111,11 @@ indMix <- function(c, npops, dispText) {
 		vaihe <- 1

 		if (dispText) {
-			print(' ')
-			print(
+			message(
 				paste0(
-					'Mixture analysis started with initial',
+					'\nMixture analysis started with initial ',
 					as.character(npops),
-					'populations.'
+					' populations.'
 				)
 			)
 		}
@ -129,7 +124,7 @@ indMix <- function(c, npops, dispText) {
 			muutoksia <- 0

 			if (dispText) {
-				print(paste('Performing steps:', as.character(roundTypes)))
+				message(paste('\nPerforming steps:', as.character(roundTypes)))
 			}

 			for (n in 1:length(roundTypes)) {
@ -137,26 +132,26 @@ indMix <- function(c, npops, dispText) {
 				round <- roundTypes[n]
 				kivaluku <- 0

-				if (kokeiltu(round) == 1) { #Askelta kokeiltu viime muutoksen j<>lkeen
+				if (kokeiltu[round] == 1) { #Askelta kokeiltu viime muutoksen j<>lkeen

 				} else if (round == 0 | round == 1) { #Yksil<69>n siirt<72>minen toiseen populaatioon.
 					inds <- 1:ninds
-					aputaulu <- c(t(inds), rand(ninds, 1))
+					aputaulu <- cbind(inds, rand(ninds, 1))
 					aputaulu <- sortrows(aputaulu, 2)
 					inds <- t(aputaulu[, 1])
 					muutosNyt <- 0

 					for (ind in inds) {
 						i1 <- PARTITION[ind]
-						muutokset_diffInCounts = laskeMuutokset(
+						muutokset_diffInCounts <- laskeMuutokset(
 							ind, rows, data, adjprior, priorTerm
 						)
 						muutokset <- muutokset_diffInCounts$muutokset
 						diffInCounts <- muutokset_diffInCounts$diffInCounts

 						if (round == 1) {
-							maxMuutos <- max_MATLAB(muutokset)[[1]]
-							i2 <- max_MATLAB(muutokset)[[2]]
+							maxMuutos <- max_MATLAB(muutokset)$max
+							i2 <- max_MATLAB(muutokset)$idx
 						}

 						if (i1 != i2 & maxMuutos > 1e-5) {
@ -164,25 +159,26 @@ indMix <- function(c, npops, dispText) {
 							muutoksia <- 1
 							if (muutosNyt == 0) {
 								muutosNyt <- 1
-								if (dispText) {
-									print('Action 1')
-								}
+								if (dispText) message('Action 1')
 							}
 							kokeiltu <- zeros(nRoundTypes, 1)
 							kivaluku <- kivaluku + 1
 							updateGlobalVariables(
 								ind, i2, diffInCounts, adjprior, priorTerm
 							)
-							logml <- logml+maxMuutos
+							logml <- logml + maxMuutos
 							if (logml > worstLogml) {
-								partitionSummary_added = addToSummary(
+								temp_addToSum <- addToSummary(
 									logml, partitionSummary, worstIndex
 								)
-								partitionSummary_added <- partitionSummary_added$partitionSummary
-								added <- partitionSummary_added$added
+								partitionSummary <- temp_addToSum$partitionSummary
+								added <- temp_addToSum$added
 								if (added == 1) {
-									worstLogml <- min_MATLAB(partitionSummary[, 2])[[1]]
-									worstIndex <- min_MATLAB(partitionSummary[, 2])[[2]]
+									temp_minMATLAB <- min_MATLAB(
+										partitionSummary[, 2]
+									)
+									worstLogml <- temp_minMATLAB[[1]]
+									worstIndex <- temp_minMATLAB[[2]]
 								}
 							}
 						}
@ -191,7 +187,6 @@ indMix <- function(c, npops, dispText) {
 					if (muutosNyt == 0) {
 						kokeiltu[round] <- 1
 					}
-
 				} else if (round == 2) { # Populaation yhdist<73>minen toiseen.
 					maxMuutos <- 0
 					for (pop in 1:npops) {
@ -218,14 +213,14 @@ indMix <- function(c, npops, dispText) {
 						)
 						logml <- logml + maxMuutos
 						if (dispText) {
-							print('Action 2')
+							cat('Action 2')
 						}
 						if (logml > worstLogml) {
-							partitionSummary_added <- addToSummary(
+							temp_addToSum <- addToSummary(
 								logml, partitionSummary, worstIndex
 							)
-							partitionSummary <- partitionSummary_added$partitionSummary
-							added <- partitionSummary_added$added
+							partitionSummary <- temp_addToSum$partitionSummary
+							added <- temp_addToSum$added
 							if (added==1) {
 								worstLogml <- min_MATLAB(partitionSummary[, 2])[[1]]
 								worstIndex <- min_MATLAB(partitionSummary[, 2])[[2]]
@ -284,17 +279,17 @@ indMix <- function(c, npops, dispText) {
 						logml <- logml + maxMuutos
 						if (dispText) {
 							if (round == 3) {
-								print('Action 3')
+								cat('Action 3')
 							} else {
-								print('Action 4')
+								cat('Action 4')
 							}
 						}
 						if (logml > worstLogml) {
-							partitionSummary_added <- addToSummary(
+							temp_addToSum <- addToSummary(
 								logml, partitionSummary, worstIndex
 							)
-							partitionSummary <- partitionSummary_added$partitionSummary
-							added <- partitionSummary_added$added
+							partitionSummary <- temp_addToSum$partitionSummary
+							added <- temp_addToSum$added
 							if (added==1) {
 								worstLogml <- min_MATLAB(partitionSummary[, 2])[[1]]
 								worstIndex <- min_MATLAB(partitionSummary[, 2])[[2]]
@ -365,18 +360,18 @@ indMix <- function(c, npops, dispText) {
 								muutoksia <- 1  # Ulompi kirjanpito.
 								if (dispText) {
 									if (round == 5) {
-										print('Action 5')
+										cat('Action 5')
 									} else {
-										print('Action 6')
+										cat('Action 6')
 									}
 								}
 							}
 							if (logml > worstLogml) {
-								partitionSummary_added <- addToSummary(
+								temp_addToSum <- addToSummary(
 									logml, partitionSummary, worstIndex
 								)
-								partitionSummary <- partitionSummary_added$partitionSummary
-								added <- partitionSummary_added$added
+								partitionSummary <- temp_addToSum$partitionSummary
+								added <- temp_addToSum$added
 								if (added==1) {
 									worstLogml <- min_MATLAB(partitionSummary[, 2])[[1]]
 									worstIndex <- min_MATLAB(partitionSummary[, 2])[[2]]
@ -477,11 +472,11 @@ indMix <- function(c, npops, dispText) {
 								muutoksia <- 1
 								logml <- logml + totalMuutos
 								if (logml > worstLogml) {
-									partitionSummary_added = addToSummary(
+									temp_addToSum <- addToSummary(
 										logml, partitionSummary, worstIndex
 									)
-									partitionSummary_added <- partitionSummary_added$partitionSummary
-									added <- partitionSummary_added$added
+									partitionSummary <- temp_addToSum$partitionSummary
+									added <- temp_addToSum$added
 									if (added == 1) {
 										worstLogml <- min_MATLAB(partitionSummary[, 2])[[1]]
 										worstIndex <- min_MATLAB(partitionSummary[, 2])[[2]]
@ -489,7 +484,7 @@ indMix <- function(c, npops, dispText) {
 								}
 								if (muutoksiaNyt == 0) {
 									if (dispText) {
-										print('Action 7')
+										cat('Action 7')
 									}
 									muutoksiaNyt <- 1
 								}
@ -513,7 +508,8 @@ indMix <- function(c, npops, dispText) {

 				}
 			}
-
+			# FIXME: muutoksia is never 0, so vaihe never equals 5 and ready 1
+			print(paste("i1 =", i1, "i2 =", i2, "maxMuutos =", maxMuutos))#TEMP
 			if (muutoksia == 0) {
 				if (vaihe <= 4) {
 					vaihe <= vaihe + 1
@ -532,7 +528,7 @@ indMix <- function(c, npops, dispText) {
 				} else if (vaihe == 3) {
 					roundTypes <- c(5, 5, 7)
 				} else if (vaihe == 4) {
-					roundTypes = c(4, 3, 1)
+					roundTypes <- c(4, 3, 1)
 				} else if (vaihe == 5) {
 					roundTypes <- c(6, 7, 2, 3, 4, 1)
 				}
--- a/R/laskeMuutokset12345.R
+++ b/R/laskeMuutokset12345.R
@ -60,7 +60,7 @@ laskeMuutokset <- function(ind, globalRows, data, adjprior, priorTerm) {
 	diffInCounts <- computeDiffInCounts(
 		rows, size(COUNTS, 1), size(COUNTS, 2), data
 	)
-	diffInSumCounts <- sum(diffInCounts)
+	diffInSumCounts <- colSums(diffInCounts)

 	COUNTS[, , i1] <- COUNTS[, , i1] - diffInCounts
 	SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] - diffInSumCounts
@ -68,7 +68,7 @@ laskeMuutokset <- function(ind, globalRows, data, adjprior, priorTerm) {
 	COUNTS[, , i1] <- COUNTS[, , i1] + diffInCounts
 	SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] + diffInSumCounts

-	i2 <- find(muutokset == -Inf) # Etsit<69><74>n populaatiot jotka muuttuneet viime kerran j<>lkeen.
+	i2 <- find(muutokset == -Inf) # Etsit<69><74>n populaatiot jotka muuttuneet viime kerran j<>lkeen. (Searching for populations that have changed since the last time)
 	i2 <- setdiff(i2, i1)
 	i2_logml <- POP_LOGML[i2]

@ -81,7 +81,7 @@ laskeMuutokset <- function(ind, globalRows, data, adjprior, priorTerm) {
 	SUMCOUNTS[i2, ] <- SUMCOUNTS[i2, ] - repmat(diffInSumCounts, c(ni2, 1))

 	muutokset[i2] <- new_i1_logml - i1_logml + new_i2_logml - i2_logml
-	LOGDIFF[ind, ] = muutokset
+	LOGDIFF[ind, ] <- muutokset
 	return(list(muutokset = muutokset, diffInCounts = diffInCounts))
 }

--- a/R/min_max_MATLAB.R
+++ b/R/min_max_MATLAB.R
@ -5,6 +5,7 @@
 #' @return Either a list or a vector
 #' @author Waldir Leoncio
 min_MATLAB <- function(X, indices = TRUE) {
+	if (!is(X, "matrix")) X <- as.matrix(X)
 	mins <- apply(X, 2, min)
 	idx  <- sapply(seq_len(ncol(X)), function(x) match(mins[x], X[, x]))
 	if (indices) {
@ -21,6 +22,7 @@ min_MATLAB <- function(X, indices = TRUE) {
 #' @return Either a list or a vector
 #' @author Waldir Leoncio
 max_MATLAB <- function(X, indices = TRUE) {
+	if (!is(X, "matrix")) X <- as.matrix(X)
 	maxs <- apply(X, 2, max)
 	idx  <- sapply(seq_len(ncol(X)), function(x) match(maxs[x], X[, x]))
 	if (indices) {
--- a/R/repmat.R
+++ b/R/repmat.R
@ -15,6 +15,12 @@ repmat <- function (mx, n) {
    if (length(n) > 3) warning("Extra dimensions of n ignored")
    if (!is(mx, "matrix")) mx <- t(as.matrix(mx))
    if (length(n) == 1) n <- rep(n, 2)
+    if (any(n == 0)) {
+        n_zero <- which(n == 0)
+        out_dim <- dim(mx)
+        out_dim[n_zero] <- 0
+        return(array(dim=out_dim))
+    }

    # Replicating cols
    out <- mx_col <- matrix(rep(mx, n[2]), nrow(mx))
--- a/R/testaaOnkoKunnollinenBapsData.R
+++ b/R/testaaOnkoKunnollinenBapsData.R
@ -7,13 +7,13 @@ testaaOnkoKunnollinenBapsData <- function(data) {
 	# Tarkastaa onko viimeisess?sarakkeessa kaikki
 	# luvut 1,2,...,n johonkin n:<3A><>n asti.
 	# Tarkastaa lis<69>ksi, ett?on v<>hint<6E><74>n 2 saraketta.
-	if (size[data, 1] < 2) {
+	if (size(data, 1) < 2) {
 		ninds <- 0
 		return(ninds)
 	}
 	lastCol <- data[, ncol(data)]
 	ninds <- max(lastCol)
-	if (t(1:ninds) != unique(lastCol)) {
+	if (any(1:ninds != unique(lastCol))) {
 		ninds <- 0
 		return(ninds)
 	}
--- a/R/updateGlobalVariables.R
+++ b/R/updateGlobalVariables.R
@ -1,14 +1,13 @@
 updateGlobalVariables <- function(ind, i2, diffInCounts, adjprior, priorTerm) {
 	# % Suorittaa globaalien muuttujien muutokset, kun yksil<69> ind
 	# % on siirret<65><74>n koriin i2.
-
 	i1 <- PARTITION[ind]
 	PARTITION[ind] <- i2

 	COUNTS[, , i1] <- COUNTS[, , i1] - diffInCounts
 	COUNTS[, , i2] <- COUNTS[, , i2] + diffInCounts
-	SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] - sum[diffInCounts]
-	SUMCOUNTS[i2, ] <- SUMCOUNTS[i2, ] + sum[diffInCounts]
+	SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] - colSums(diffInCounts)
+	SUMCOUNTS[i2, ] <- SUMCOUNTS[i2, ] + colSums(diffInCounts)

 	POP_LOGML[c(i1, i2)] <- computePopulationLogml(
 		c(i1, i2), adjprior, priorTerm
@ -28,8 +27,8 @@ updateGlobalVariables2 <- function(i1, i2, diffInCounts, adjprior, priorTerm) {

 	COUNTS[, , i1] <- COUNTS[, , i1] - diffInCounts
 	COUNTS[, , i2] <- COUNTS[, , i2] + diffInCounts
-	SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] - sum[diffInCounts]
-	SUMCOUNTS[i2, ] <- SUMCOUNTS[i2, ] + sum[diffInCounts]
+	SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] - colSums(diffInCounts)
+	SUMCOUNTS[i2, ] <- SUMCOUNTS[i2, ] + colSums(diffInCounts)

 	POP_LOGML[i1] <- 0
 	POP_LOGML[i2] <- computePopulationLogml(i2, adjprior, priorTerm)
@ -51,8 +50,8 @@ updateGlobalVariables3 <- function(

 	COUNTS[, , i1] <- COUNTS[, , i1] - diffInCounts
 	COUNTS[, , i2] <- COUNTS[, , i2] + diffInCounts
-	SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] - sum[diffInCounts]
-	SUMCOUNTS[i2, ] <- SUMCOUNTS[i2, ] + sum[diffInCounts]
+	SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] - colSums(diffInCounts)
+	SUMCOUNTS[i2, ] <- SUMCOUNTS[i2, ] + colSums(diffInCounts)

 	POP_LOGML[c(i1, i2)] <- computePopulationLogml(
 		c(i1, i2), adjprior, priorTerm
--- a/man/cell.Rd
+++ b/man/cell.Rd
@ -4,13 +4,16 @@
 \alias{cell}
 \title{Cell array}
 \usage{
-cell(n, sz = c(n, n), ...)
+cell(n, sz = c(n, n), expandable = FALSE, ...)
 }
 \arguments{
 \item{n}{a the first dimension (or both, if sz is not passed)}

 \item{sz}{the second dimension (or 1st and 2nd, if not passed)}

+\item{expandable}{if TRUE, output is a list (so it can take different
+lengths)}
+
 \item{...}{Other dimensions}
 }
 \value{
--- a/man/find.Rd
+++ b/man/find.Rd
@ -4,10 +4,12 @@
 \alias{find}
 \title{Find indices and values of nonzero elements}
 \usage{
-find(x)
+find(x, sort = TRUE)
 }
 \arguments{
 \item{x}{object or logic operation on an object}
+
+\item{sort}{sort output?}
 }
 \description{
 Emulates behavior of `find`
--- a/tests/testthat/test-greedyMix.R
+++ b/tests/testthat/test-greedyMix.R
@ -1,10 +1,42 @@
+context("Auxiliary functions to greedyMix")
+
+baps_diploid <- read.delim(
+	"inst/ext/ExamplesDataFormatting/Example data in BAPS format for clustering of diploid individuals.txt",
+	sep = " ",
+	header = FALSE
+)
+
+handleData(baps_diploid)$newData
+
+test_that("handleData works as expected", {
+	data_obs <- handleData(baps_diploid)$newData
+	data_exp <- matrix(
+		c(
+			-9, 1, 2, 1, 1, 1, 2, 1, 2, 2, 1,
+			-9, 1, 1, 2, 2, 2, 1, 1, 1, 2, 1,
+			3, 2, 2, 3, 2, -9, 3, 1, 2, 1, 2,
+			2, 1, 2, 1, 2, -9, 1, 1, 1, 1, 2,
+			3, 1, 1, 1, 2, 1, 1, 2, -9, 1, 3,
+			3, 1, 2, 1, 1, 1, 2, 1, -9, 2, 3,
+			1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 4,
+			3, 2, 2, 3, 2, 2, 3, 1, 2, 1, 4,
+			2, 1, 2, 1, -9, 1, 1, 1, 1, 1, 5,
+			3, 1, 1, 1, -9, 1, 1, 2, 1, 1, 5
+		),
+		nrow = 10, byrow = TRUE
+	)
+	colnames(data_exp) <- colnames(data_obs)
+	expect_equal(data_obs, data_exp)
+})
+
 context("Opening files on greedyMix")

+# TODO: needs #12 to be fixed before this can be done without user intervention
 # greedyMix(
-# 	tietue = "inst/ext/ExamplesDataFormatting/Example baseline data in GENEPOP format for Trained clustering.txt",
-# 	format = "GenePop",
+# 	tietue = "inst/ext/ExamplesDataFormatting/Example data in BAPS format for clustering of diploid individuals.txt",
+# 	format = "BAPS",
 # 	savePreProcessed = FALSE
-# )
+# ) # Upper bounds 100 100

 context("Linkage")