From cc9704489cbd7c8513343e6a2d5f2ef009473d3f Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Mon, 13 Jul 2020 13:23:31 +0200 Subject: [PATCH 01/13] Translated takeLine() --- NAMESPACE | 1 + R/greedyMix.R | 13 ------------- R/takeLine.R | 17 +++++++++++++++++ man/takeLine.Rd | 19 +++++++++++++++++++ 4 files changed, 37 insertions(+), 13 deletions(-) create mode 100644 R/takeLine.R create mode 100644 man/takeLine.Rd diff --git a/NAMESPACE b/NAMESPACE index 81c1201..fac9bb5 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -32,6 +32,7 @@ export(simuloiAlleeli) export(size) export(strcmp) export(suoritaMuutos) +export(takeLine) export(testaaOnkoKunnollinenBapsData) export(testaaPop) export(times) diff --git a/R/greedyMix.R b/R/greedyMix.R index 8ea16e0..928bcc1 100644 --- a/R/greedyMix.R +++ b/R/greedyMix.R @@ -1533,19 +1533,6 @@ greedyMix <- function( # %-------------------------------------------------------------------- - -# function newline = takeLine(description,width) -# %Returns one line from the description: line ends to the first -# %space after width:th mark. -# newLine = description(1:width); -# n = width+1; -# while ~isspace(description(n)) & n Date: Mon, 13 Jul 2020 13:23:57 +0200 Subject: [PATCH 02/13] ' --- DESCRIPTION | 2 +- R/find.R | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index eff2fcf..e7e0081 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -36,7 +36,7 @@ Description: Partial R implementation of the BAPS software License: GPL-3 Encoding: UTF-8 LazyData: true -RoxygenNote: 7.1.0 +RoxygenNote: 7.1.1 Suggests: testthat (>= 2.1.0) Imports: diff --git a/R/find.R b/R/find.R index 45559c2..788b6ec 100644 --- a/R/find.R +++ b/R/find.R @@ -2,9 +2,9 @@ #' @description Emulates behavior of `find` #' @param x object or logic operation on an object find <- function(x) { - if (is.logical(x)) { - return(which(x)) - } else { - return(which(x > 0)) - } + if (is.logical(x)) { + return(which(x)) + } else { + return(which(x > 0)) + } } \ No newline at end of file From 32e5020d62da57e005eae2b150ea044e558abb30 Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Mon, 13 Jul 2020 13:59:35 +0200 Subject: [PATCH 03/13] Translated basic Matlab function blanks() --- NAMESPACE | 1 + R/blanks.R | 14 ++++++++++++ man/blanks.Rd | 23 ++++++++++++++++++++ tests/testthat/test-convertedBaseFunctions.R | 8 +++++++ 4 files changed, 46 insertions(+) create mode 100644 R/blanks.R create mode 100644 man/blanks.Rd diff --git a/NAMESPACE b/NAMESPACE index fac9bb5..caa653b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,6 +2,7 @@ export(addAlleles) export(admix1) +export(blanks) export(calculatePopLogml) export(colon) export(computeAllFreqs2) diff --git a/R/blanks.R b/R/blanks.R new file mode 100644 index 0000000..bd1409c --- /dev/null +++ b/R/blanks.R @@ -0,0 +1,14 @@ +#' @title Blanks +#' @description Create character vector of blanks +#' @details This function emulates the behavior of a homonimous function from Matlab +#' @param n length of vector +#' @return Vector of n blanks +#' @author Waldir Leoncio +#' @export +blanks <- function(n) { + if (n < 0) { + warning("Negative n passed. Treating as n = 0") + n <- 0 + } + paste(rep(" ", n), collapse="") +} \ No newline at end of file diff --git a/man/blanks.Rd b/man/blanks.Rd new file mode 100644 index 0000000..87471f4 --- /dev/null +++ b/man/blanks.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/blanks.R +\name{blanks} +\alias{blanks} +\title{Blanks} +\usage{ +blanks(n) +} +\arguments{ +\item{n}{length of vector} +} +\value{ +Vector of n blanks +} +\description{ +Create character vector of blanks +} +\details{ +This function emulates the behavior of a homonimous function from Matlab +} +\author{ +Waldir Leoncio +} diff --git a/tests/testthat/test-convertedBaseFunctions.R b/tests/testthat/test-convertedBaseFunctions.R index ca945c3..66c2ac9 100644 --- a/tests/testthat/test-convertedBaseFunctions.R +++ b/tests/testthat/test-convertedBaseFunctions.R @@ -168,4 +168,12 @@ test_that("cell works as expected", { expect_equal(cell(2), array(dim = c(2, 2))) expect_equal(cell(3, 4), array(dim = c(3, 4))) expect_equal(cell(5, 7, 6), array(dim = c(5, 7, 6))) +}) + +test_that("blanks works as expected", { + expect_warning(blanks(-1)) + expect_equal(suppressWarnings(blanks(-1)), "") + expect_equal(blanks(0), "") + expect_equal(blanks(1), " ") + expect_equal(blanks(10), " ") }) \ No newline at end of file From a632a54a1d8cf0b7f6530d131454e97e3b271ab4 Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Mon, 13 Jul 2020 13:59:45 +0200 Subject: [PATCH 04/13] Renamed test file --- tests/testthat/{test.greedyMix.R => test-greedyMix.R} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/testthat/{test.greedyMix.R => test-greedyMix.R} (100%) diff --git a/tests/testthat/test.greedyMix.R b/tests/testthat/test-greedyMix.R similarity index 100% rename from tests/testthat/test.greedyMix.R rename to tests/testthat/test-greedyMix.R From a7e2049007bf419c7d89ef6d16b10b064e3f025e Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Mon, 13 Jul 2020 15:05:18 +0200 Subject: [PATCH 05/13] Translated palautaYks --- R/palautaYks.R | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 R/palautaYks.R diff --git a/R/palautaYks.R b/R/palautaYks.R new file mode 100644 index 0000000..e64cde7 --- /dev/null +++ b/R/palautaYks.R @@ -0,0 +1,17 @@ +palautaYks <- function(num, yks) { + # palauttaa luvun num 10^yks termin kertoimen + # string:in? + # yks t�ytyy olla kokonaisluku, joka on + # v�hint��n -1:n suuruinen. Pienemmill? + # luvuilla tapahtuu jokin py�ristysvirhe. + + if (yks >= 0) { + digit <- num %% 10 ^ (yks + 1) + digit <- floor(digit / (10 ^ yks)) + } else { + digit <- num * 10 + digit <- floor(digit %% 10) + } + digit <- as.character(digit) + return(digit) +} \ No newline at end of file From 6389a4104c66b4bd06513e5ba1e5e52b295c724a Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Mon, 13 Jul 2020 15:13:36 +0200 Subject: [PATCH 06/13] Translated logml2String --- NAMESPACE | 1 + R/logml2String.R | 57 +++++++++++++++++++++++++++++++++++++++++++++ man/logml2String.Rd | 17 ++++++++++++++ 3 files changed, 75 insertions(+) create mode 100644 R/logml2String.R create mode 100644 man/logml2String.Rd diff --git a/NAMESPACE b/NAMESPACE index caa653b..0caf33f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -15,6 +15,7 @@ export(inputdlg) export(isfield) export(laskeMuutokset4) export(learn_simple_partition) +export(logml2String) export(lueGenePopData) export(lueNimi) export(noIndex) diff --git a/R/logml2String.R b/R/logml2String.R new file mode 100644 index 0000000..7e2c1dc --- /dev/null +++ b/R/logml2String.R @@ -0,0 +1,57 @@ +#' @title Logml to string +#' @description Returns a string representation of a logml +#' @param logml input Logml +#' @return String version of logml +#' @export +logml2String <- function(logml) { + # Palauttaa logml:n string-esityksen. + mjono = ' ' + + if (logml == -Inf) { + mjono[7] <- '-' + return(mjono) + } + + if (abs(logml) < 10000) { + # Ei tarvita e-muotoa + mjono[7] <- palautaYks(abs(logml), -1) + mjono[6] <- '.' + mjono[5] <- palautaYks(abs(logml), 0) + mjono[4] <- palautaYks(abs(logml), 1) + mjono[3] <- palautaYks(abs(logml), 2) + mjono[2] <- palautaYks(abs(logml), 3) + pointer <- 2 + while (mjono[pointer] == '0' & pointer < 7) { + mjono[pointer] <- ' ' + pointer <- pointer + 1 + } + if (logml < 0) { + mjono[pointer - 1] <- '-' + } + } else { + suurinYks <- 4 + while (abs(logml) / (10 ^ (suurinYks + 1)) >= 1) { + suurinYks <- suurinYks + 1 + } + if (suurinYks < 10) { + mjono[7] <- as.character(suurinYks) + mjono[6] <- 'e' + mjono[5] <- palautaYks(abs(logml), suurinYks - 1) + mjono[4] <- '.' + mjono[3] <- palautaYks(abs(logml), suurinYks) + if (logml < 0) { + mjono[2] <- '-' + } + } else if (suurinYks >= 10) { + mjono[6:7] <- as.character(suurinYks) + mjono[5] <- 'e' + mjono[4] <- palautaYks(abs(logml), suurinYks - 1) + mjono[3] <- '.' + mjono[2] <- palautaYks(abs(logml), suurinYks) + if (logml < 0) { + mjono[1] <- '-' + } + } + } + return(mjono) +} \ No newline at end of file diff --git a/man/logml2String.Rd b/man/logml2String.Rd new file mode 100644 index 0000000..d8b81ce --- /dev/null +++ b/man/logml2String.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/logml2String.R +\name{logml2String} +\alias{logml2String} +\title{Logml to string} +\usage{ +logml2String(logml) +} +\arguments{ +\item{logml}{input Logml} +} +\value{ +String version of logml +} +\description{ +Returns a string representation of a logml +} From 9c5beeda435c0ba02ed82c36fe5f89e92594904b Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Tue, 14 Jul 2020 09:40:46 +0200 Subject: [PATCH 07/13] Translated squeeze --- R/squeeze.R | 22 ++++++++++++++++++++++ man/squeeze.Rd | 29 +++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+) create mode 100644 R/squeeze.R create mode 100644 man/squeeze.Rd diff --git a/R/squeeze.R b/R/squeeze.R new file mode 100644 index 0000000..f13afa8 --- /dev/null +++ b/R/squeeze.R @@ -0,0 +1,22 @@ +#' @title Squeeze +#' @description Remove dimensions of length 1 +#' @details This function implements the behavior of the homonimous function on +#' Matlab. `B = squeeze(A)` returns an array with the same elements as the +#' input array A, but with dimensions of length 1 removed. For example, if A is +#' a 3-by-1-by-1-by-2 array, then squeeze(A) returns a 3-by-2 matrix. If A is a +#' row vector, column vector, scalar, or an array with no dimensions of length +#' 1, then squeeze returns the input A. +#' @param A input or array matrix +#' @return An array with the same elements as the input array, but with +#' dimensions of length 1 removed. +#' @author Waldir Leoncio +squeeze <- function(A) { + A <- as.array(A) + dim_1 <- which(dim(A) == 1) + B <- array(A, dim = dim(A)[-dim_1]) + + # Workaround to match Matlab behavior + if (length(dim(B)) == 1) B <- as.matrix(B) + + return(B) +} \ No newline at end of file diff --git a/man/squeeze.Rd b/man/squeeze.Rd new file mode 100644 index 0000000..6e495e2 --- /dev/null +++ b/man/squeeze.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/squeeze.R +\name{squeeze} +\alias{squeeze} +\title{Squeeze} +\usage{ +squeeze(A) +} +\arguments{ +\item{A}{input or array matrix} +} +\value{ +An array with the same elements as the input array, but with +dimensions of length 1 removed. +} +\description{ +Remove dimensions of length 1 +} +\details{ +This function implements the behavior of the homonimous function on +Matlab. `B = squeeze(A)` returns an array with the same elements as the +input array A, but with dimensions of length 1 removed. For example, if A is +a 3-by-1-by-1-by-2 array, then squeeze(A) returns a 3-by-2 matrix. If A is a +row vector, column vector, scalar, or an array with no dimensions of length +1, then squeeze returns the input A. +} +\author{ +Waldir Leoncio +} From d05fd94cfb6301912686a28769cebb4ac98fcfd7 Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Tue, 14 Jul 2020 10:10:26 +0200 Subject: [PATCH 08/13] Translated kldiv2str --- R/kldiv2str.R | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 R/kldiv2str.R diff --git a/R/kldiv2str.R b/R/kldiv2str.R new file mode 100644 index 0000000..9e1e31e --- /dev/null +++ b/R/kldiv2str.R @@ -0,0 +1,24 @@ +kldiv2str <- function(div) { + mjono <- ' ' + if (abs(div) < 100) { + # Ei tarvita e-muotoa + mjono[6] <- as.character((floor(div * 1000)) %% 10) + mjono[5] <- as.character((floor(div * 100)) %% 10) + mjono[4] <- as.character((floor(div * 10)) %% 10) + mjono[3] <- '.' + mjono[2] <- as.character((floor(div)) %% 10) + arvo <- (floor(div / 10)) %% 10 + if (arvo > 0) { + mjono[1] <- as.character(arvo) + } + + } else { + suurinYks <- floor(log10(div)) + mjono[6] <- as.character(suurinYks) + mjono[5] <- 'e' + mjono[4] <- palautaYks(abs(div), suurinYks - 1) + mjono[3] <- '.' + mjono[2] <- palautaYks(abs(div), suurinYks) + } + return(mjono) +} \ No newline at end of file From e89c51de57537103a6c8b95abdbe979562004095 Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Tue, 14 Jul 2020 11:16:03 +0200 Subject: [PATCH 09/13] Translated writeMixtureInfo() --- NAMESPACE | 1 + R/writeMixtureInfo.R | 337 ++++++++++++++++++++++++++++++++++++++++ man/writeMixtureInfo.Rd | 58 +++++++ 3 files changed, 396 insertions(+) create mode 100644 R/writeMixtureInfo.R create mode 100644 man/writeMixtureInfo.Rd diff --git a/NAMESPACE b/NAMESPACE index 0caf33f..8516a73 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -40,6 +40,7 @@ export(testaaPop) export(times) export(uigetfile) export(uiputfile) +export(writeMixtureInfo) importFrom(methods,is) importFrom(stats,runif) importFrom(utils,read.delim) diff --git a/R/writeMixtureInfo.R b/R/writeMixtureInfo.R new file mode 100644 index 0000000..174781c --- /dev/null +++ b/R/writeMixtureInfo.R @@ -0,0 +1,337 @@ +#' @title Write Mixture Info +#' @description Writes information about the mixture +#' @param logml logml +#' @param rowsFromInd rowsFromInd +#' @param data data +#' @param adjprior adjprior +#' @param priorTerm priorTerm +#' @param outPutFile outPutFile +#' @param inputFile inputFile +#' @param partitionSummary partitionSummary +#' @param popnames popnames +#' @param fixedK fixedK +#' @param PARTITION PARTITION +#' @param COUNTS COUNTS +#' @param SUMCOUNTS SUMCOUNTS +#' @param LOGDIFF LOGDIFF +#' @return changesInLogml +#' @export +writeMixtureInfo <- function( + logml, rowsFromInd, data, adjprior, priorTerm, outPutFile, inputFile, partitionSummary, popnames, fixedK, PARTITION, COUNTS, SUMCOUNTS, + LOGDIFF +) { + + changesInLogml <- list() + ninds <- size(data, 1) / rowsFromInd + npops <- size(COUNTS, 3) + # Check that the names refer to individuals + names <- (size(popnames, 1) == ninds) #Tarkistetaan ett?nimet viittaavat yksil�ihin + + if (length(outPutFile) > 0) { + fid <- load(outPutFile) + } else { + fid <- -1 + message('Diverting output to baps4_output.baps') + # TODO: replace sink with option that will record input and output + sink('baps4_output.baps', split=TRUE) # save in text anyway. + } + + dispLine() + cat('RESULTS OF INDIVIDUAL LEVEL MIXTURE ANALYSIS:') + cat(c('Data file: ', inputFile)) + cat('Model: independent') + cat(c('Number of clustered individuals: ', ownNum2Str(ninds))) + cat(c('Number of groups in optimal partition: ', ownNum2Str(npops))) + cat(c('Log(marginal likelihood) of optimal partition: ', ownNum2Str(logml))) + cat(' ') + if (fid != -1) { + append(fid, 'RESULTS OF INDIVIDUAL LEVEL MIXTURE ANALYSIS:\n') + append(fid, c('Data file: ', inputFile, '\n')) + append( + fid, + c('Number of clustered individuals: ', ownNum2Str(ninds), '\n') + ) + append( + fid, + c( + 'Number of groups in optimal partition: ', + ownNum2Str(npops), '\n' + ) + ) + append( + fid, + c( + 'Log(marginal likelihood) of optimal partition: ', + ownNum2Str(logml), + '\n' + ) + ) + } + + cluster_count <- length(unique(PARTITION)) + cat('Best Partition: ') + if (fid != -1) { + append(fid, c('Best Partition: ', '\n')) + } + for (m in 1:cluster_count) { + indsInM <- find(PARTITION == m) + length_of_beginning <- 11 + floor(log10(m)) + cluster_size <- length(indsInM) + + if (names) { + text <- c( + 'Cluster ', + as.character(m), + ': {', + as.character(popnames[[indsInM[1]]]) + ) + for (k in 2:cluster_size) { + text <- c(text, ', ', as.character(popnames[[indsInM[k]]])) + } + } else { + text <- c( + 'Cluster ', as.character(m), ': {', as.character(indsInM[1]) + ) + for (k in 2:cluster_size) { + text <- c(text, ', ', as.character(indsInM[k])) + } + } + text <- c(text, '}') + while (length(text) > 58) { + # Take one line and display it. + new_line <- takeLine(text, 58) + text <- (length(new_line) + 1):end + cat(new_line) + if (fid != -1) { + append(fid, new_line) + append(fid,'\n') + } + if (length(text) > 0) { + text <- c(blanks(length_of_beginning), text) + } else { + text <- "" + } + } + if (text != "") { + cat(text) + if (fid != -1) { + append(fid, text) + append(fid,'\n') + } + } + } + + if (npops > 1) { + cat(' ') + cat(' ') + cat( + 'Changes in log(marginal likelihood)', + ' if indvidual i is moved to group j:' + ) + if (fid != -1) { + append(fid, ' ') + append(fid, '\n') + append(fid, ' ') + append(fid, '\n') + append( + fid, + c( + 'Changes in log(marginal likelihood)', + 'if indvidual i is moved to group j:' + ) + ) + append(fid, '\n') + } + + if (names) { + nameSizes <- zeros(ninds, 1) + for (i in 1:ninds) { + nimi <- as.character(popnames[i]) + nameSizes[i] <- length(nimi) + } + maxSize <- max(nameSizes) + maxSize <- max(maxSize, 5) + erotus <- maxSize - 5 + alku <- blanks(erotus) + ekarivi <- c(alku, ' ind', blanks(6 + erotus)) + } else { + ekarivi <- ' ind ' + } + for (i in 1:cluster_count) { + ekarivi <- c(ekarivi, ownNum2Str(i), blanks(8 - floor(log10(i)))) + } + cat(ekarivi) + if (fid != -1) { + append(fid, ekarivi) + append(fid, '\n') + } + + # %ninds = size(data,1)/rowsFromInd; + changesInLogml <- t(LOGDIFF) + for (ind in 1:ninds) { + muutokset <- changesInLogml[, ind] + + if (names) { + nimi <- as.character(popnames[ind]) + rivi <- c(blanks(maxSize - length(nimi)), nimi, ':') + } else { + rivi <- c(blanks(4 - floor(log10(ind))), ownNum2Str(ind), ':') + } + for (j in 1:npops) { + rivi <- c(rivi, ' ', logml2String(omaRound(muutokset[j]))) + } + cat(rivi) + if (fid != -1) { + append(fid, rivi) + append(fid, '\n') + } + } + + cat(' ') + cat(' ') + cat('KL-divergence matrix in PHYLIP format:') + + dist_mat <- zeros(npops, npops) + if (fid != -1) { + append(fid, ' ') + append(fid, ' ') + append(fid, c('KL-divergence matrix in PHYLIP format:')) + append(fid, '\n') + } + + maxnoalle <- size(COUNTS, 1) + nloci <- size(COUNTS, 2) + d <- zeros(maxnoalle, nloci, npops) + prior <- adjprior + prior[find(prior == 1)] <- 0 + nollia <- find(all(prior == 0)) # Loci in which only one allele was detected. + prior[1, nollia] <- 1 + for (pop1 in 1:npops) { + d[, , pop1] <- (squeeze(COUNTS[, , pop1]) + prior) / + repmat(sum(squeeze(COUNTS[, , pop1]) + prior), c(maxnoalle, 1)) + } + ekarivi <- as.character(npops) + cat(ekarivi) + if (fid != -1) { + append(fid, ekarivi) + append(fid, '\n') + } + + for (pop1 in 1:npops) { + for (pop2 in 1:(pop1 - 1)) { + dist1 <- d[, , pop1] + dist2 <- d[, , pop2] + div12 <- sum( + sum(dist1 * log2((dist1 + 10 ^ -10) / (dist2 + 10 ^ -10))) + ) / nloci + div21 <- sum( + sum(dist2 * log2((dist2 + 10 ^ -10) / (dist1 + 10 ^ -10))) + ) / nloci + div <- (div12 + div21) / 2 + dist_mat(pop1, pop2) <- div + } + } + + + dist_mat <- dist_mat + t(dist_mat) # make it symmetric + for (pop1 in 1:npops) { + rivi <- c('Cluster_', as.character(pop1), ' ') + for (pop2 in 1:npops) { + rivi <- c(rivi, kldiv2str(dist_mat[pop1, pop2]), ' ') + } + cat(rivi) + if (fid != -1) { + append(fid, rivi) + append(fid, '\n') + } + } + } + + cat(' ') + cat(' '); + cat( + 'List of sizes of 10 best visited partitions', + 'and corresponding log(ml) values' + ) + + if (fid != -1) { + append(fid, ' ') + append(fid, '\n') + append(fid, ' ') + append(fid, '\n') + append( + fid, + c( + 'List of sizes of 10 best visited partitions', + 'and corresponding log(ml) values' + ) + ) + append(fid, '\n') + } + + partitionSummary <- sortrows(partitionSummary, 2) + partitionSummary <- partitionSummary[size(partitionSummary, 1):1, ] + partitionSummary <- partitionSummary[find(partitionSummary[, 2] > -1e49), ] + if (size(partitionSummary, 1) > 10) { + vikaPartitio <- 10 + } else { + vikaPartitio <- size(partitionSummary, 1) + } + for (part in 1:vikaPartitio) { + line <- c( + as.character(partitionSummary[part, 1]), + ' ', + as.character(partitionSummary(part, 2)) + ) + cat(line) + if (fid != -1) { + append(fid, line) + append(fid, '\n') + } + } + + if (!fixedK) { + cat(' ') + cat(' ') + cat('Probabilities for number of clusters') + + if (fid != -1) { + append(fid, ' ') + append(fid, '\n') + append(fid, ' ') + append(fid, '\n') + append(fid, c('Probabilities for number of clusters')) + append(fid, '\n') + } + + npopsTaulu <- unique(partitionSummary[, 1]) + len <- length(npopsTaulu) + probs <- zeros(len, 1) + partitionSummary[, 2] <- partitionSummary[, 2] - + max(partitionSummary[, 2]) + sumtn <- sum(exp(partitionSummary[, 2])) + for (i in 1:len) { + npopstn <- sum( + exp( + partitionSummary[find( + partitionSummary[, 1] == npopsTaulu[i] + ), 2] + ) + ) + probs[i] <- npopstn / sumtn + } + for (i in 1:len) { + if (probs[i] > 1e-5) { + line <- c( + as.character(npopsTaulu[i]), ' ', as.character(probs[i]) + ) + cat(line) + if (fid != -1) { + append(fid, line) + append(fid, '\n') + } + } + } + } + return(changesInLogml) +} \ No newline at end of file diff --git a/man/writeMixtureInfo.Rd b/man/writeMixtureInfo.Rd new file mode 100644 index 0000000..f9a1682 --- /dev/null +++ b/man/writeMixtureInfo.Rd @@ -0,0 +1,58 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/writeMixtureInfo.R +\name{writeMixtureInfo} +\alias{writeMixtureInfo} +\title{Write Mixture Info} +\usage{ +writeMixtureInfo( + logml, + rowsFromInd, + data, + adjprior, + priorTerm, + outPutFile, + inputFile, + partitionSummary, + popnames, + fixedK, + PARTITION, + COUNTS, + SUMCOUNTS, + LOGDIFF +) +} +\arguments{ +\item{logml}{logml} + +\item{rowsFromInd}{rowsFromInd} + +\item{data}{data} + +\item{adjprior}{adjprior} + +\item{priorTerm}{priorTerm} + +\item{outPutFile}{outPutFile} + +\item{inputFile}{inputFile} + +\item{partitionSummary}{partitionSummary} + +\item{popnames}{popnames} + +\item{fixedK}{fixedK} + +\item{PARTITION}{PARTITION} + +\item{COUNTS}{COUNTS} + +\item{SUMCOUNTS}{SUMCOUNTS} + +\item{LOGDIFF}{LOGDIFF} +} +\value{ +changesInLogml +} +\description{ +Writes information about the mixture +} From bb44d0cfd47b3cc8112640548bcd0c04fe12d7aa Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Tue, 14 Jul 2020 11:17:25 +0200 Subject: [PATCH 10/13] Improved exception handling --- R/computeAllFreqs2.R | 2 +- R/repmat.R | 2 +- tests/testthat/test-convertedBaseFunctions.R | 6 ++++-- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/R/computeAllFreqs2.R b/R/computeAllFreqs2.R index e3f68f8..4b87fea 100644 --- a/R/computeAllFreqs2.R +++ b/R/computeAllFreqs2.R @@ -24,6 +24,6 @@ computeAllFreqs2 <- function (noalle, COUNTS = matrix(NA, 0, 0), } prioriAlleelit <- repmat(prioriAlleelit, c(1, 1, npops)) counts <- COUNTS + prioriAlleelit - allFreqs <- counts / sumCounts + allFreqs <- counts / drop(sumCounts) return(allFreqs) } \ No newline at end of file diff --git a/R/repmat.R b/R/repmat.R index 630053f..642c3cd 100644 --- a/R/repmat.R +++ b/R/repmat.R @@ -13,8 +13,8 @@ repmat <- function (mx, n) { # Validation if (length(n) > 3) warning("Extra dimensions of n ignored") + if (!is(mx, "matrix")) mx <- t(as.matrix(mx)) if (length(n) == 1) n <- rep(n, 2) - if (!is(mx, "matrix")) mx <- as.matrix(mx) # Replicating cols out <- mx_col <- matrix(rep(mx, n[2]), nrow(mx)) diff --git a/tests/testthat/test-convertedBaseFunctions.R b/tests/testthat/test-convertedBaseFunctions.R index 66c2ac9..715fe97 100644 --- a/tests/testthat/test-convertedBaseFunctions.R +++ b/tests/testthat/test-convertedBaseFunctions.R @@ -11,10 +11,10 @@ test_that("repmat works properly", { mx1 <- matrix(5:8) mx2 <- matrix(0:-3, 2) expect_error(repmat(mx0)) - expect_equal(repmat(mx0, 1), as.matrix(mx0)) + expect_equal(repmat(mx0, 1), t(as.matrix(mx0))) expect_equal( object = repmat(mx0, 2), - expected = unname(t(cbind(rbind(mx0, mx0), rbind(mx0, mx0)))) + expected = unname(cbind(rbind(mx0, mx0), rbind(mx0, mx0))) ) expect_equal( object = repmat(mx1, 2), @@ -32,6 +32,8 @@ test_that("repmat works properly", { object = repmat(mx2, c(1, 1, 2)), expected = array(mx2, c(2, 2, 2)) ) + expect_equal(repmat(1:2, 3), matrix(rep(1:2, 9), 3, 6, byrow=TRUE)) + expect_equal(repmat(10, c(3, 2)), matrix(10, 3, 2)) }) test_that("zeros and ones work as expected", { From a27f303f4a4a2f56ef58792aa2a4e694ae7a176d Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Tue, 14 Jul 2020 11:18:01 +0200 Subject: [PATCH 11/13] Optimized squeeze() --- R/squeeze.R | 13 +++---------- man/squeeze.Rd | 4 ++++ 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/R/squeeze.R b/R/squeeze.R index f13afa8..8030894 100644 --- a/R/squeeze.R +++ b/R/squeeze.R @@ -6,17 +6,10 @@ #' a 3-by-1-by-1-by-2 array, then squeeze(A) returns a 3-by-2 matrix. If A is a #' row vector, column vector, scalar, or an array with no dimensions of length #' 1, then squeeze returns the input A. +#' @note This is basically a wrapper of drop() with a minor adjustment to adapt +#' the output to what happens on Matlab #' @param A input or array matrix #' @return An array with the same elements as the input array, but with #' dimensions of length 1 removed. #' @author Waldir Leoncio -squeeze <- function(A) { - A <- as.array(A) - dim_1 <- which(dim(A) == 1) - B <- array(A, dim = dim(A)[-dim_1]) - - # Workaround to match Matlab behavior - if (length(dim(B)) == 1) B <- as.matrix(B) - - return(B) -} \ No newline at end of file +squeeze <- function(A) as.matrix(drop(A)) \ No newline at end of file diff --git a/man/squeeze.Rd b/man/squeeze.Rd index 6e495e2..6191ee4 100644 --- a/man/squeeze.Rd +++ b/man/squeeze.Rd @@ -24,6 +24,10 @@ a 3-by-1-by-1-by-2 array, then squeeze(A) returns a 3-by-2 matrix. If A is a row vector, column vector, scalar, or an array with no dimensions of length 1, then squeeze returns the input A. } +\note{ +This is basically a wrapper of drop() with a minor adjustment to adapt +the output to what happens on Matlab +} \author{ Waldir Leoncio } From c2bbe701bec0c9eb05c28164b82cbab7d9aa6103 Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Tue, 14 Jul 2020 11:18:11 +0200 Subject: [PATCH 12/13] Added tests for squeeze --- tests/testthat/test-convertedBaseFunctions.R | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tests/testthat/test-convertedBaseFunctions.R b/tests/testthat/test-convertedBaseFunctions.R index 715fe97..705955e 100644 --- a/tests/testthat/test-convertedBaseFunctions.R +++ b/tests/testthat/test-convertedBaseFunctions.R @@ -178,4 +178,14 @@ test_that("blanks works as expected", { expect_equal(blanks(0), "") expect_equal(blanks(1), " ") expect_equal(blanks(10), " ") +}) + +test_that("squeeze works as expected", { + A <- array(dim = c(2, 1, 2)) + A[, , 1] <- c(1, 2) + A[, , 2] <- c(3, 4) + expect_equal(squeeze(A), matrix(1:4, 2)) + A <- array(0, dim = c(1, 1, 3)) + A[, , 1:3] <- 1:3 + expect_equal(squeeze(A), matrix(1:3, 3)) }) \ No newline at end of file From a78a9c8a7d807b2bf32fd81b5e919ce292f5f782 Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Tue, 14 Jul 2020 11:18:20 +0200 Subject: [PATCH 13/13] Code cleanup --- R/greedyMix.R | 323 -------------------------------------------------- 1 file changed, 323 deletions(-) diff --git a/R/greedyMix.R b/R/greedyMix.R index 928bcc1..7b83b77 100644 --- a/R/greedyMix.R +++ b/R/greedyMix.R @@ -366,22 +366,6 @@ greedyMix <- function( # global PARTITION; PARTITION = []; # global POP_LOGML; POP_LOGML = []; - -# %------------------------------------------------------------------------------------- - - -# function rows = computeRows(rowsFromInd, inds, ninds) -# % On annettu yksil�t inds. Funktio palauttaa vektorin, joka -# % sis�lt�� niiden rivien numerot, jotka sis�lt�v�t yksil�iden -# % dataa. - -# rows = inds(:, ones(1,rowsFromInd)); -# rows = rows*rowsFromInd; -# miinus = repmat(rowsFromInd-1 : -1 : 0, [ninds 1]); -# rows = rows - miinus; -# rows = reshape(rows', [1,rowsFromInd*ninds]); - - # %-------------------------------------------------------------------------- @@ -1210,270 +1194,6 @@ greedyMix <- function( # end # end -# %------------------------------------------------------------------- - - -# function changesInLogml = writeMixtureInfo(logml, rowsFromInd, data, adjprior, ... -# priorTerm, outPutFile, inputFile, partitionSummary, popnames, fixedK) - -# global PARTITION; -# global COUNTS; -# global SUMCOUNTS; -# global LOGDIFF; -# changesInLogml = []; -# ninds = size(data,1)/rowsFromInd; -# npops = size(COUNTS,3); -# names = (size(popnames,1) == ninds); %Tarkistetaan ett?nimet viittaavat yksil�ihin - -# if length(outPutFile)>0 -# fid = fopen(outPutFile,'a'); -# else -# fid = -1; -# diary('baps4_output.baps'); % save in text anyway. -# end - -# dispLine; -# disp('RESULTS OF INDIVIDUAL LEVEL MIXTURE ANALYSIS:'); -# disp(['Data file: ' inputFile]); -# disp(['Model: independent']); -# disp(['Number of clustered individuals: ' ownNum2Str(ninds)]); -# disp(['Number of groups in optimal partition: ' ownNum2Str(npops)]); -# disp(['Log(marginal likelihood) of optimal partition: ' ownNum2Str(logml)]); -# disp(' '); -# if (fid ~= -1) -# fprintf(fid,'%s \n', ['RESULTS OF INDIVIDUAL LEVEL MIXTURE ANALYSIS:']); fprintf(fid,'\n'); -# fprintf(fid,'%s \n', ['Data file: ' inputFile]); fprintf(fid,'\n'); -# fprintf(fid,'%s \n', ['Number of clustered individuals: ' ownNum2Str(ninds)]); fprintf(fid,'\n'); -# fprintf(fid,'%s \n', ['Number of groups in optimal partition: ' ownNum2Str(npops)]); fprintf(fid,'\n'); -# fprintf(fid,'%s \n', ['Log(marginal likelihood) of optimal partition: ' ownNum2Str(logml)]); fprintf(fid,'\n'); -# end - -# cluster_count = length(unique(PARTITION)); -# disp(['Best Partition: ']); -# if (fid ~= -1) -# fprintf(fid,'%s \n',['Best Partition: ']); fprintf(fid,'\n'); -# end -# for m=1:cluster_count -# indsInM = find(PARTITION==m); -# length_of_beginning = 11 + floor(log10(m)); -# cluster_size = length(indsInM); - -# if names -# text = ['Cluster ' num2str(m) ': {' char(popnames{indsInM(1)})]; -# for k = 2:cluster_size -# text = [text ', ' char(popnames{indsInM(k)})]; -# end; -# else -# text = ['Cluster ' num2str(m) ': {' num2str(indsInM(1))]; -# for k = 2:cluster_size -# text = [text ', ' num2str(indsInM(k))]; -# end; -# end -# text = [text '}']; -# while length(text)>58 -# %Take one line and display it. -# new_line = takeLine(text,58); -# text = text(length(new_line)+1:end); -# disp(new_line); -# if (fid ~= -1) -# fprintf(fid,'%s \n',[new_line]); -# fprintf(fid,'\n'); -# end -# if length(text)>0 -# text = [blanks(length_of_beginning) text]; -# else -# text = []; -# end; -# end; -# if ~isempty(text) -# disp(text); -# if (fid ~= -1) -# fprintf(fid,'%s \n',[text]); -# fprintf(fid,'\n'); -# end -# end; -# end - -# if npops > 1 -# disp(' '); -# disp(' '); -# disp('Changes in log(marginal likelihood) if indvidual i is moved to group j:'); -# if (fid ~= -1) -# fprintf(fid, '%s \n', [' ']); fprintf(fid, '\n'); -# fprintf(fid, '%s \n', [' ']); fprintf(fid, '\n'); -# fprintf(fid, '%s \n', ['Changes in log(marginal likelihood) if indvidual i is moved to group j:']); fprintf(fid, '\n'); -# end - -# if names -# nameSizes = zeros(ninds,1); -# for i = 1:ninds -# nimi = char(popnames{i}); -# nameSizes(i) = length(nimi); -# end -# maxSize = max(nameSizes); -# maxSize = max(maxSize, 5); -# erotus = maxSize - 5; -# alku = blanks(erotus); -# ekarivi = [alku ' ind' blanks(6+erotus)]; -# else -# ekarivi = ' ind '; -# end -# for i = 1:cluster_count -# ekarivi = [ekarivi ownNum2Str(i) blanks(8-floor(log10(i)))]; -# end -# disp(ekarivi); -# if (fid ~= -1) -# fprintf(fid, '%s \n', [ekarivi]); fprintf(fid, '\n'); -# end - -# %ninds = size(data,1)/rowsFromInd; -# changesInLogml = LOGDIFF'; -# for ind = 1:ninds -# %[muutokset, diffInCounts] = laskeMuutokset(ind, rowsFromInd, data, ... -# % adjprior, priorTerm); -# %changesInLogml(:,ind) = muutokset; -# muutokset = changesInLogml(:,ind); - -# if names -# nimi = char(popnames{ind}); -# rivi = [blanks(maxSize - length(nimi)) nimi ':']; -# else -# rivi = [blanks(4-floor(log10(ind))) ownNum2Str(ind) ':']; -# end -# for j = 1:npops -# rivi = [rivi ' ' logml2String(omaRound(muutokset(j)))]; -# end -# disp(rivi); -# if (fid ~= -1) -# fprintf(fid, '%s \n', [rivi]); fprintf(fid, '\n'); -# end -# end - -# disp(' '); disp(' '); -# disp('KL-divergence matrix in PHYLIP format:'); - -# dist_mat = zeros(npops, npops); -# if (fid ~= -1) -# fprintf(fid, '%s \n', [' ']); %fprintf(fid, '\n'); -# fprintf(fid, '%s \n', [' ']); %fprintf(fid, '\n'); -# fprintf(fid, '%s \n', ['KL-divergence matrix in PHYLIP format:']); %fprintf(fid, '\n'); -# end - -# maxnoalle = size(COUNTS,1); -# nloci = size(COUNTS,2); -# d = zeros(maxnoalle, nloci, npops); -# prior = adjprior; -# prior(find(prior==1))=0; -# nollia = find(all(prior==0)); %Lokukset, joissa oli havaittu vain yht?alleelia. -# prior(1,nollia)=1; -# for pop1 = 1:npops -# d(:,:,pop1) = (squeeze(COUNTS(:,:,pop1))+prior) ./ repmat(sum(squeeze(COUNTS(:,:,pop1))+prior),maxnoalle,1); -# %dist1(pop1) = (squeeze(COUNTS(:,:,pop1))+adjprior) ./ repmat((SUMCOUNTS(pop1,:)+adjprior), maxnoalle, 1); -# end -# % ekarivi = blanks(7); -# % for pop = 1:npops -# % ekarivi = [ekarivi num2str(pop) blanks(7-floor(log10(pop)))]; -# % end -# ekarivi = num2str(npops); -# disp(ekarivi); -# if (fid ~= -1) -# fprintf(fid, '%s \n', [ekarivi]); %fprintf(fid, '\n'); -# end - -# for pop1 = 1:npops -# % rivi = [blanks(2-floor(log10(pop1))) num2str(pop1) ' ']; -# for pop2 = 1:pop1-1 -# dist1 = d(:,:,pop1); dist2 = d(:,:,pop2); -# div12 = sum(sum(dist1.*log2((dist1+10^-10) ./ (dist2+10^-10))))/nloci; -# div21 = sum(sum(dist2.*log2((dist2+10^-10) ./ (dist1+10^-10))))/nloci; -# div = (div12+div21)/2; -# % rivi = [rivi kldiv2str(div) ' ']; -# dist_mat(pop1,pop2) = div; -# end -# % disp(rivi); -# % if (fid ~= -1) -# % fprintf(fid, '%s \n', [rivi]); fprintf(fid, '\n'); -# % end -# end - - -# dist_mat = dist_mat + dist_mat'; % make it symmetric -# for pop1 = 1:npops -# rivi = ['Cluster_' num2str(pop1) ' ']; -# for pop2 = 1:npops -# rivi = [rivi kldiv2str(dist_mat(pop1,pop2)) ' ']; -# end -# disp(rivi); -# if (fid ~= -1) -# fprintf(fid, '%s \n', [rivi]); %fprintf(fid, '\n'); -# end -# end -# end - -# disp(' '); -# disp(' '); -# disp('List of sizes of 10 best visited partitions and corresponding log(ml) values'); - -# if (fid ~= -1) -# fprintf(fid, '%s \n', [' ']); fprintf(fid, '\n'); -# fprintf(fid, '%s \n', [' ']); fprintf(fid, '\n'); -# fprintf(fid, '%s \n', ['List of sizes of 10 best visited partitions and corresponding log(ml) values']); fprintf(fid, '\n'); -# end - -# partitionSummary = sortrows(partitionSummary,2); -# partitionSummary = partitionSummary(size(partitionSummary,1):-1:1 , :); -# partitionSummary = partitionSummary(find(partitionSummary(:,2)>-1e49),:); -# if size(partitionSummary,1)>10 -# vikaPartitio = 10; -# else -# vikaPartitio = size(partitionSummary,1); -# end -# for part = 1:vikaPartitio -# line = [num2str(partitionSummary(part,1)) ' ' num2str(partitionSummary(part,2))]; -# disp(line); -# if (fid ~= -1) -# fprintf(fid, '%s \n', [line]); fprintf(fid, '\n'); -# end -# end - -# if ~fixedK -# disp(' '); -# disp(' '); -# disp('Probabilities for number of clusters'); - -# if (fid ~= -1) -# fprintf(fid, '%s \n', [' ']); fprintf(fid, '\n'); -# fprintf(fid, '%s \n', [' ']); fprintf(fid, '\n'); -# fprintf(fid, '%s \n', ['Probabilities for number of clusters']); fprintf(fid, '\n'); -# end - -# npopsTaulu = unique(partitionSummary(:,1)); -# len = length(npopsTaulu); -# probs = zeros(len,1); -# partitionSummary(:,2) = partitionSummary(:,2)-max(partitionSummary(:,2)); -# sumtn = sum(exp(partitionSummary(:,2))); -# for i=1:len -# npopstn = sum(exp(partitionSummary(find(partitionSummary(:,1)==npopsTaulu(i)),2))); -# probs(i) = npopstn / sumtn; -# end -# for i=1:len -# if probs(i)>1e-5 -# line = [num2str(npopsTaulu(i)) ' ' num2str(probs(i))]; -# disp(line); -# if (fid ~= -1) -# fprintf(fid, '%s \n', [line]); fprintf(fid, '\n'); -# end -# end -# end -# end - -# if (fid ~= -1) -# fclose(fid); -# else -# diary off -# end - - # %--------------------------------------------------------------- @@ -1488,49 +1208,6 @@ greedyMix <- function( # num = round(num); # num2 = num/10; -# %--------------------------------------------------------- - - -# function digit = palautaYks(num,yks) -# % palauttaa luvun num 10^yks termin kertoimen -# % string:in? -# % yks t�ytyy olla kokonaisluku, joka on -# % v�hint��n -1:n suuruinen. Pienemmill? -# % luvuilla tapahtuu jokin py�ristysvirhe. - -# if yks>=0 -# digit = rem(num, 10^(yks+1)); -# digit = floor(digit/(10^yks)); -# else -# digit = num*10; -# digit = floor(rem(digit,10)); -# end -# digit = num2str(digit); - - -# function mjono = kldiv2str(div) -# mjono = ' '; -# if abs(div)<100 -# %Ei tarvita e-muotoa -# mjono(6) = num2str(rem(floor(div*1000),10)); -# mjono(5) = num2str(rem(floor(div*100),10)); -# mjono(4) = num2str(rem(floor(div*10),10)); -# mjono(3) = '.'; -# mjono(2) = num2str(rem(floor(div),10)); -# arvo = rem(floor(div/10),10); -# if arvo>0 -# mjono(1) = num2str(arvo); -# end - -# else -# suurinYks = floor(log10(div)); -# mjono(6) = num2str(suurinYks); -# mjono(5) = 'e'; -# mjono(4) = palautaYks(abs(div),suurinYks-1); -# mjono(3) = '.'; -# mjono(2) = palautaYks(abs(div),suurinYks); -# end - # %-------------------------------------------------------------------- # function dist2 = laskeOsaDist(inds2, dist, ninds)