From 06eb8e54338a47149f2b2ff4b27d3b5047b5f7d4 Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Tue, 14 Jan 2020 16:36:00 +0100 Subject: [PATCH 01/14] Implemented matlab base function in R --- R/times.R | 37 +++++++++++++++++++++++++++++++++++++ man/times.Rd | 22 ++++++++++++++++++++++ 2 files changed, 59 insertions(+) create mode 100644 R/times.R create mode 100644 man/times.Rd diff --git a/R/times.R b/R/times.R new file mode 100644 index 0000000..b270331 --- /dev/null +++ b/R/times.R @@ -0,0 +1,37 @@ +#' @title Element-wise matrix multiplication +#' @description Emulates the `times()` and `.*` operators from Matlab. +#' @details This function basically handles elements of different length better than the `*` operator in R, at least as far as behavior from a Matlab user is expecting. +#' @param a first factor of the multiplication +#' @param b second factor of the multiplication +#' @export +#' @returns matrix with dimensions equal to the larger of the two factors +times <- function(a, b) { + # Converting everything to matrix because Matlab looooooves the matrix + a <- as.matrix(a) + b <- as.matrix(b) + + dominant_mx <- NULL + if (!all(dim(a) == dim(b))) { + if (all(dim(a) >= dim(b))) { + dominant_mx <- a + dominated_mx <- b + } else if (all(dim(b) >= dim(a))) { + dominant_mx <- b + dominated_mx <- a + } + } + + if (is.null(dominant_mx)) { + return(a * b) + } else { + # Expanding dominated matrix + dominated_mx <- repmat( + mx = dominated_mx, + n = c( + nrow(dominant_mx) - nrow(dominated_mx) + 1, + ncol(dominant_mx) - ncol(dominated_mx) + 1 + ) + ) + return(dominant_mx * dominated_mx) + } +} \ No newline at end of file diff --git a/man/times.Rd b/man/times.Rd new file mode 100644 index 0000000..a52d264 --- /dev/null +++ b/man/times.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/times.R +\name{times} +\alias{times} +\title{Element-wise matrix multiplication} +\usage{ +times(a, b) +} +\arguments{ +\item{a}{first factor of the multiplication} + +\item{b}{second factor of the multiplication} +} +\value{ +matrix with dimensions equal to the larger of the two factors +} +\description{ +Emulates the `times()` and `.*` operators from Matlab. +} +\details{ +This function basically handles elements of different length better than the `*` operator in R, at least as far as behavior from a Matlab user is expecting. +} From 47eb3f4e9039974fff78be86a78c4a42edc022c6 Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Tue, 14 Jan 2020 16:37:00 +0100 Subject: [PATCH 02/14] Improved documentation --- R/rand.R | 1 + R/repmat.R | 2 +- TODO.md | 12 ++++++------ man/computeIndLogml.Rd | 4 ++-- man/laskeMuutokset4.Rd | 8 ++++---- man/repmat.Rd | 2 +- 6 files changed, 15 insertions(+), 14 deletions(-) diff --git a/R/rand.R b/R/rand.R index 9e23039..917f35a 100644 --- a/R/rand.R +++ b/R/rand.R @@ -4,6 +4,7 @@ #' @param c number of columns of output matrix #' @return \eqn{r \times c} matrix with random trials from a standard uniform distribution. #' @importFrom stats runif +#' @export rand <- function(r = 1, c = 1) { matrix(runif(r * c), r, c) } \ No newline at end of file diff --git a/R/repmat.R b/R/repmat.R index 45c23cd..fcc041f 100644 --- a/R/repmat.R +++ b/R/repmat.R @@ -3,7 +3,7 @@ #' @details This function was created to replicate the behavior of a homonymous #' function on Matlab #' @param mx matrix -#' @param n either a scalat with the number of replications in both rows and columns or a 2-length vector with individual repetitions. +#' @param n either a scalar with the number of replications in both rows and columns or a 2-length vector with individual repetitions. #' @return matrix replicated over `ncol(mx) * n` columns and `nrow(mx) * n` rows #' @note The Matlab implementation of this function accepts `n` with length > 2. #' @export diff --git a/TODO.md b/TODO.md index 1ba5df4..7476bbe 100644 --- a/TODO.md +++ b/TODO.md @@ -25,12 +25,12 @@ The list below contains non-essential but nice-to-have tasks for the next stable The following behavioral differences have been detected between the Matlab functions and their R counterparts. In order to save time, these differences will not be addressed, since they could require extensive reworking of a function. However, such differences may very well cause unexpected problems in some situations, which is why compiling this list is so important. The list below might provide a good starting point for identifying and fixing bugs: -Function | Argument | Value | Matlab output | R output ----------|----------|-------|---------------|--------- -`ownNum2Str` | `number` | `NaN` | `'NAN'` | error -`ownNum2Str` | `number` | `` | `''` | `''` + warning -`repmat` | `length(n)` | `> 2` | > 2D matrix | 2D matrix -`computeIndLogml` | only one of the arguments is negative | complex number | `NaN` +| Function | Argument | Value | Matlab output | R output | +| ----------------- | ------------------ | -------------- | --------------------- | ------------------------------- | +| `ownNum2Str` | `number` | `NaN` | `'NAN'` | error | +| `ownNum2Str` | `number` | `` | `''` | `''` + warning | +| `repmat` | `length(n)` | `> 2` | > 2D matrix | 2D matrix | +| `computeIndLogml` | some arguments < 0 | complex number | `NaN` | As general remarks, one should keep in mind that: diff --git a/man/computeIndLogml.Rd b/man/computeIndLogml.Rd index 6457c2b..c1d3f6e 100644 --- a/man/computeIndLogml.Rd +++ b/man/computeIndLogml.Rd @@ -7,9 +7,9 @@ computeIndLogml(omaFreqs, osuusTaulu) } \arguments{ -\item{omaFreqs}{omaFreqs} +\item{omaFreqs}{own Freqs?} -\item{osuusTaulu}{osuusTaulu} +\item{osuusTaulu}{Percentage table?} } \description{ Palauttaa yksilön logml:n, kun oletetaan yksilön alkuperät diff --git a/man/laskeMuutokset4.Rd b/man/laskeMuutokset4.Rd index f458d9e..bf7c6f4 100644 --- a/man/laskeMuutokset4.Rd +++ b/man/laskeMuutokset4.Rd @@ -7,13 +7,13 @@ laskeMuutokset4(osuus, osuusTaulu, omaFreqs, logml, COUNTS = matrix(0)) } \arguments{ -\item{osuus}{osuus} +\item{osuus}{Percentages?} -\item{osuusTaulu}{osuusTaulu} +\item{osuusTaulu}{Percentage table?} -\item{omaFreqs}{omaFreqs} +\item{omaFreqs}{own Freqs?} -\item{logml}{logml} +\item{logml}{log maximum likelihood} \item{COUNTS}{COUNTS} } diff --git a/man/repmat.Rd b/man/repmat.Rd index b17bddc..f05885b 100644 --- a/man/repmat.Rd +++ b/man/repmat.Rd @@ -9,7 +9,7 @@ repmat(mx, n) \arguments{ \item{mx}{matrix} -\item{n}{either a scalat with the number of replications in both rows and columns or a 2-length vector with individual repetitions.} +\item{n}{either a scalar with the number of replications in both rows and columns or a 2-length vector with individual repetitions.} } \value{ matrix replicated over `ncol(mx) * n` columns and `nrow(mx) * n` rows From 93872f259742238572221646c2b9b8b863611ed8 Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Tue, 14 Jan 2020 16:37:16 +0100 Subject: [PATCH 03/14] Added test for converted basic Matlab functions --- tests/testthat/test.convertedBaseFunctions.R | 45 ++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 tests/testthat/test.convertedBaseFunctions.R diff --git a/tests/testthat/test.convertedBaseFunctions.R b/tests/testthat/test.convertedBaseFunctions.R new file mode 100644 index 0000000..364284f --- /dev/null +++ b/tests/testthat/test.convertedBaseFunctions.R @@ -0,0 +1,45 @@ +context("Basic Matlab functions") + +test_that("rand works properly", { + expect_equal(dim(rand()), c(1, 1)) + expect_equal(dim(rand(1, 2)), c(1, 2)) + expect_equal(dim(rand(3, 2)), c(3, 2)) +}) + +test_that("repmat works properly", { + mx <- matrix(1:4) + expect_equal(repmat(1:4, 1), mx) + expect_equal(repmat(1:4, 2), t(cbind(rbind(mx, mx), rbind(mx, mx)))) + expect_equal( + object = repmat(1:4, c(2, 3)), + expected = t(cbind(rbind(mx, mx), rbind(mx, mx), rbind(mx, mx))) + ) + expect_equal( + object = repmat(1:4, c(4, 1)), + expected = rbind(mx, mx, mx, mx) + ) +}) + +test_that("zeros and ones work as expected", { + expect_equal(zeros(1), matrix(0, 1)) + expect_equal(zeros(2), matrix(0, 2, 2)) + expect_equal(zeros(2, 1), matrix(0, 2, 1)) + expect_equal(zeros(1, 10), matrix(0, 1, 10)) + expect_equal(ones(8), matrix(1, 8, 8)) + expect_equal(ones(5, 2), matrix(1, 5, 2)) + expect_equal(ones(2, 100), matrix(1, 2, 100)) +}) + +test_that("times works as expected", { + expect_equal(times(9, 6), as.matrix(54)) + expect_equal(times(9, c(.8, 9)), as.matrix(c(7.2, 81))) + expect_equal(times(c(.8, 9), 5), as.matrix(c(4, 45))) + expect_equal(times(matrix(1:4, 2), 5), matrix(c(5, 10, 15, 20), 2)) + expect_equal(times(5, matrix(1:4, 2)), matrix(c(5, 10, 15, 20), 2)) + expect_equal(times(matrix(1:4, 2), c(10, 3)), matrix(c(10, 20, 9, 12), 2)) + expect_equal(times(c(10, 3), matrix(1:4, 2)), matrix(c(10, 20, 9, 12), 2)) + expect_equal( + object = times(matrix(c(10, -5, 3, 9), 2), matrix(1:4, 2)), + expected = matrix(c(10, -10, 9, 36), 2) + ) +}) \ No newline at end of file From ff8bc8cce695abdfb20c1656c4a3762f5058fa54 Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Tue, 14 Jan 2020 17:36:14 +0100 Subject: [PATCH 04/14] Improved docs, formatting and handling of globals --- R/computeIndLogml.R | 6 +++--- R/laskeMuutokset4.R | 12 ++++++------ 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/R/computeIndLogml.R b/R/computeIndLogml.R index ff15d90..872c932 100644 --- a/R/computeIndLogml.R +++ b/R/computeIndLogml.R @@ -1,8 +1,8 @@ #' @title computeIndLogml #' @description Palauttaa yksilön logml:n, kun oletetaan yksilön alkuperät #' määritellyiksi kuten osuusTaulu:ssa. -#' @param omaFreqs omaFreqs -#' @param osuusTaulu osuusTaulu +#' @param omaFreqs own Freqs? +#' @param osuusTaulu Percentage table? #' @export computeIndLogml <- function (omaFreqs, osuusTaulu) { @@ -14,7 +14,7 @@ computeIndLogml <- function (omaFreqs, osuusTaulu) { apu <- sum(apu) } - apu = log(apu) + apu <- log(apu) loggis <- sum(apu) return (loggis) diff --git a/R/laskeMuutokset4.R b/R/laskeMuutokset4.R index a098800..730b5e0 100644 --- a/R/laskeMuutokset4.R +++ b/R/laskeMuutokset4.R @@ -3,15 +3,15 @@ #' muutos logml:ss? mikäli populaatiosta i siirretään osuuden verran #' todennäköisyysmassaa populaatioon j. Mikäli populaatiossa i ei ole mitään #' siirrettävää, on vastaavassa kohdassa rivi nollia. -#' @param osuus osuus -#' @param osuusTaulu osuusTaulu -#' @param omaFreqs omaFreqs -#' @param logml logml +#' @param osuus Percentages? +#' @param omaFreqs own Freqs? +#' @param osuusTaulu Percentage table? +#' @param logml log maximum likelihood #' @param COUNTS COUNTS -#' +#' @export laskeMuutokset4 <- function (osuus, osuusTaulu, omaFreqs, logml, COUNTS = matrix(0)) { - npops <- dim(COUNTS)[3] + npops <- ifelse(is.na(dim(COUNTS)[3]), 1, dim(COUNTS)[3]) notEmpty <- osuusTaulu > 0.005 muutokset <- zeros(npops) empties <- !notEmpty From aaa4b9302c70abc094331a7b2a0d93786c3b9542 Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Wed, 15 Jan 2020 10:55:50 +0100 Subject: [PATCH 05/14] Fixed repmat --- R/repmat.R | 16 +++++++++++++--- man/repmat.Rd | 2 ++ 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/R/repmat.R b/R/repmat.R index fcc041f..be72325 100644 --- a/R/repmat.R +++ b/R/repmat.R @@ -6,13 +6,23 @@ #' @param n either a scalar with the number of replications in both rows and columns or a 2-length vector with individual repetitions. #' @return matrix replicated over `ncol(mx) * n` columns and `nrow(mx) * n` rows #' @note The Matlab implementation of this function accepts `n` with length > 2. +#' +#' It should also be noted that a concatenated vector in R, e.g. `c(5, 2)`, becomes a column vector when coerced to matrix, even though it may look like a row vector at first glance. This is important to keep in mind when considering the expected output of this function. Vectors in R make sense to be seen as column vectors, given R's Statistics-oriented paradigm where variables are usually disposed as columns in a dataset. #' @export repmat <- function (mx, n) { + # Validation if (length(n) > 2) warning("Extra dimensions of n ignored") if (length(n) == 1) n <- rep(n, 2) - out <- mx_cols <- rep(mx, n[1]) - if (n[2] > 1) { - for (i in seq(n[2] - 1)) out <- rbind(out, mx_cols) + if (class(mx) != "matrix") mx <- as.matrix(mx) + + # Replicating cols + out <- mx_col <- matrix(rep(mx, n[2]), nrow(mx)) + + # Replicating rows + if (n[1] > 1) { + for (i in seq(n[1] - 1)) out <- rbind(out, mx_col) } + + # Output return(unname(as.matrix(out))) } \ No newline at end of file diff --git a/man/repmat.Rd b/man/repmat.Rd index f05885b..3ec7750 100644 --- a/man/repmat.Rd +++ b/man/repmat.Rd @@ -23,4 +23,6 @@ function on Matlab } \note{ The Matlab implementation of this function accepts `n` with length > 2. + +It should also be noted that a concatenated vector in R, e.g. `c(5, 2)`, becomes a column vector when coerced to matrix, even though it may look like a row vector at first glance. This is important to keep in mind when considering the expected output of this function. Vectors in R make sense to be seen as column vectors, given R's Statistics-oriented paradigm where variables are usually disposed as columns in a dataset. } From 8065883471604c4597c8cf8ff3796ce679fae9a3 Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Wed, 15 Jan 2020 11:14:55 +0100 Subject: [PATCH 06/14] Fixed tests for base functions --- tests/testthat/test.convertedBaseFunctions.R | 32 ++++++++++++++------ 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/tests/testthat/test.convertedBaseFunctions.R b/tests/testthat/test.convertedBaseFunctions.R index 364284f..1b53265 100644 --- a/tests/testthat/test.convertedBaseFunctions.R +++ b/tests/testthat/test.convertedBaseFunctions.R @@ -7,16 +7,26 @@ test_that("rand works properly", { }) test_that("repmat works properly", { - mx <- matrix(1:4) - expect_equal(repmat(1:4, 1), mx) - expect_equal(repmat(1:4, 2), t(cbind(rbind(mx, mx), rbind(mx, mx)))) + mx0 <- c(1:4) # when converted to matrix, results in a column vector + mx1 <- matrix(5:8) + mx2 <- matrix(0:-3, 2) + expect_error(repmat(mx0)) + expect_equal(repmat(mx0, 1), as.matrix(mx0)) expect_equal( - object = repmat(1:4, c(2, 3)), - expected = t(cbind(rbind(mx, mx), rbind(mx, mx), rbind(mx, mx))) + object = repmat(mx0, 2), + expected = unname(t(cbind(rbind(mx0, mx0), rbind(mx0, mx0)))) ) expect_equal( - object = repmat(1:4, c(4, 1)), - expected = rbind(mx, mx, mx, mx) + object = repmat(mx1, 2), + expected = unname(cbind(rbind(mx1, mx1), rbind(mx1, mx1))) + ) + expect_equal( + object = repmat(mx2, c(2, 3)), + expected = cbind(rbind(mx2, mx2), rbind(mx2, mx2), rbind(mx2, mx2)) + ) + expect_equal( + object = repmat(mx2, c(4, 1)), + expected = rbind(mx2, mx2, mx2, mx2) ) }) @@ -36,8 +46,12 @@ test_that("times works as expected", { expect_equal(times(c(.8, 9), 5), as.matrix(c(4, 45))) expect_equal(times(matrix(1:4, 2), 5), matrix(c(5, 10, 15, 20), 2)) expect_equal(times(5, matrix(1:4, 2)), matrix(c(5, 10, 15, 20), 2)) - expect_equal(times(matrix(1:4, 2), c(10, 3)), matrix(c(10, 20, 9, 12), 2)) - expect_equal(times(c(10, 3), matrix(1:4, 2)), matrix(c(10, 20, 9, 12), 2)) + expect_equal(times(matrix(1:4, 2), c(10, 3)), matrix(c(10, 6, 30, 12), 2)) + expect_equal( + object = times(matrix(1:4, 2), matrix(c(10, 3), 1)), + expected = matrix(c(10, 20, 9, 12), 2) + ) + expect_equal(times(c(10, 3), matrix(1:4, 2)), matrix(c(10, 6, 30, 12), 2)) expect_equal( object = times(matrix(c(10, -5, 3, 9), 2), matrix(1:4, 2)), expected = matrix(c(10, -10, 9, 36), 2) From 32b0e3e9c0c9ce64d585634396be161a40073fef Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Wed, 15 Jan 2020 11:15:50 +0100 Subject: [PATCH 07/14] Refactoring --- NAMESPACE | 4 ++++ ...convertedBaseFunctions.R => test-convertedBaseFunctions.R} | 0 2 files changed, 4 insertions(+) rename tests/testthat/{test.convertedBaseFunctions.R => test-convertedBaseFunctions.R} (100%) diff --git a/NAMESPACE b/NAMESPACE index e97e44b..6642668 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,10 +4,14 @@ export(admix1) export(calculatePopLogml) export(computeIndLogml) export(computeRows) +export(etsiParas) +export(laskeMuutokset4) export(learn_simple_partition) export(ownNum2Str) export(proportion2str) +export(rand) export(randdir) export(repmat) export(suoritaMuutos) +export(times) importFrom(stats,runif) diff --git a/tests/testthat/test.convertedBaseFunctions.R b/tests/testthat/test-convertedBaseFunctions.R similarity index 100% rename from tests/testthat/test.convertedBaseFunctions.R rename to tests/testthat/test-convertedBaseFunctions.R From 168f58899eaeea0a65b97ddf697380e33d2a547c Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Wed, 15 Jan 2020 13:20:23 +0100 Subject: [PATCH 08/14] Fixed relative domination case --- R/times.R | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/R/times.R b/R/times.R index b270331..c887d3d 100644 --- a/R/times.R +++ b/R/times.R @@ -18,11 +18,24 @@ times <- function(a, b) { } else if (all(dim(b) >= dim(a))) { dominant_mx <- b dominated_mx <- a + } else { + dominant_mx <- "neither" + dominant_dim <- c(max(nrow(b), nrow(a)), max(ncol(b), ncol(a))) } } if (is.null(dominant_mx)) { - return(a * b) + out <- a * b + } else if (dominant_mx == "neither") { + a <- repmat( + mx = a, + n = c(dominant_dim[1] - nrow(a) + 1, dominant_dim[2] - ncol(a) + 1) + ) + b <- repmat( + mx = b, + n = c(dominant_dim[1] - nrow(b) + 1, dominant_dim[2] - ncol(b) + 1) + ) + out <- a * b } else { # Expanding dominated matrix dominated_mx <- repmat( @@ -32,6 +45,7 @@ times <- function(a, b) { ncol(dominant_mx) - ncol(dominated_mx) + 1 ) ) - return(dominant_mx * dominated_mx) + out <- dominant_mx * dominated_mx } + return(out) } \ No newline at end of file From f8f65f176cf792187854d3e9c2b85f8a35e8c2ee Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Wed, 15 Jan 2020 13:29:47 +0100 Subject: [PATCH 09/14] Fixed bugs on old functions --- R/computeIndLogml.R | 8 +++++++- R/computeRows.R | 2 +- R/times.R | 2 +- tests/testthat/test-admix1.R | 15 +++++++++++++++ tests/testthat/test-convertedBaseFunctions.R | 4 ++++ 5 files changed, 28 insertions(+), 3 deletions(-) diff --git a/R/computeIndLogml.R b/R/computeIndLogml.R index 872c932..0e6d08c 100644 --- a/R/computeIndLogml.R +++ b/R/computeIndLogml.R @@ -5,15 +5,21 @@ #' @param osuusTaulu Percentage table? #' @export computeIndLogml <- function (omaFreqs, osuusTaulu) { + omaFreqs <- as.matrix(omaFreqs) + osuusTaulu <- as.matrix(osuusTaulu) apu <- repmat(t(osuusTaulu), c(1, dim(omaFreqs)[2])) - apu <- c(apu) * omaFreqs # c() avoids deprecation error re. matrix ops + apu <- times(apu, omaFreqs) # c() avoids deprecation error re. matrix ops if (length(apu) > 1) { apu <- colSums(as.matrix(apu)) } else { apu <- sum(apu) } + if (any(apu < 0)) { + # Workaround for log of a negative number + apu <- as.complex(apu) + } apu <- log(apu) loggis <- sum(apu) diff --git a/R/computeRows.R b/R/computeRows.R index 447e5ef..c4a69c3 100644 --- a/R/computeRows.R +++ b/R/computeRows.R @@ -14,7 +14,7 @@ computeRows <- function(rowsFromInd, inds, ninds) { } rows <- inds[, rep(1, rowsFromInd)] rows <- rows * rowsFromInd - miinus <- repmat((rowsFromInd - 1):0, c(1, ninds)) + miinus <- repmat(t((rowsFromInd - 1):0), c(ninds, 1)) rows <- rows - miinus rows <- matrix(t(rows), c(1, rowsFromInd * ninds)) return(t(rows)) diff --git a/R/times.R b/R/times.R index c887d3d..e935cfc 100644 --- a/R/times.R +++ b/R/times.R @@ -26,7 +26,7 @@ times <- function(a, b) { if (is.null(dominant_mx)) { out <- a * b - } else if (dominant_mx == "neither") { + } else if (dominant_mx[1] == "neither") { a <- repmat( mx = a, n = c(dominant_dim[1] - nrow(a) + 1, dominant_dim[2] - ncol(a) + 1) diff --git a/tests/testthat/test-admix1.R b/tests/testthat/test-admix1.R index 1c427eb..876ec49 100644 --- a/tests/testthat/test-admix1.R +++ b/tests/testthat/test-admix1.R @@ -117,6 +117,21 @@ test_that("computeIndLogml works like on Matlab", { expect_equivalent(computeIndLogml(-pi, -8), 3.2242, tol = .0001) expect_equivalent(computeIndLogml(2:3, 2), 2.3026, tol = .0001) expect_equivalent(computeIndLogml(matrix(8:5, 2), 100), 14.316, tol = .001) + expect_equivalent( + object = computeIndLogml(matrix(8:5, 2), matrix(c(1, 3), 1)), + expected = 6.4118, + tol = .001 + ) + expect_equivalent( + object = computeIndLogml(matrix(8:5, 1), matrix(c(1, 3), 1)), + expected = 12.9717, + tol = .001 + ) + expect_equivalent( + object = computeIndLogml(c(8, 1), c(-1.6, 5)), + expected = complex(real = 6.4739, imaginary = pi), + tol = .001 + ) }) test_that("suoritaMuutos works like on Matlab", { diff --git a/tests/testthat/test-convertedBaseFunctions.R b/tests/testthat/test-convertedBaseFunctions.R index 1b53265..3480053 100644 --- a/tests/testthat/test-convertedBaseFunctions.R +++ b/tests/testthat/test-convertedBaseFunctions.R @@ -56,4 +56,8 @@ test_that("times works as expected", { object = times(matrix(c(10, -5, 3, 9), 2), matrix(1:4, 2)), expected = matrix(c(10, -10, 9, 36), 2) ) + expect_equal( + object = times(matrix(c(-1.6, 5), 1), c(8, 1)), + expected = matrix(c(-12.8, -1.6, 40, 5), 2) + ) }) \ No newline at end of file From 1b79529e4b152d8013eebccc17e4c0d0034a33e1 Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Wed, 15 Jan 2020 15:18:49 +0100 Subject: [PATCH 10/14] Added basic colon function --- R/colon.R | 12 ++++++++++++ man/colon.Rd | 16 ++++++++++++++++ tests/testthat/test-convertedBaseFunctions.R | 5 +++++ 3 files changed, 33 insertions(+) create mode 100644 R/colon.R create mode 100644 man/colon.Rd diff --git a/R/colon.R b/R/colon.R new file mode 100644 index 0000000..abe7a12 --- /dev/null +++ b/R/colon.R @@ -0,0 +1,12 @@ +#' @title Vector creation +#' @description Simulates the function `colon()` and its equivalent `:` operator from Matlab, which have a similar but not quite equivalent behavior when compared to `seq()` and `:` in R. +#' @param a initial number +#' @param b final number +#' @export +colon <- function(a, b) { + if (a <= b) { + return(a:b) + } else { + return(vector(mode = "numeric")) + } +} \ No newline at end of file diff --git a/man/colon.Rd b/man/colon.Rd new file mode 100644 index 0000000..4ae3857 --- /dev/null +++ b/man/colon.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/colon.R +\name{colon} +\alias{colon} +\title{Vector creation} +\usage{ +colon(a, b) +} +\arguments{ +\item{a}{initial number} + +\item{b}{final number} +} +\description{ +Simulates the function `colon()` and its equivalent `:` operator from Matlab, which have a similar but not quite equivalent behavior when compared to `seq()` and `:` in R. +} diff --git a/tests/testthat/test-convertedBaseFunctions.R b/tests/testthat/test-convertedBaseFunctions.R index 3480053..9a9c9c4 100644 --- a/tests/testthat/test-convertedBaseFunctions.R +++ b/tests/testthat/test-convertedBaseFunctions.R @@ -60,4 +60,9 @@ test_that("times works as expected", { object = times(matrix(c(-1.6, 5), 1), c(8, 1)), expected = matrix(c(-12.8, -1.6, 40, 5), 2) ) +}) + +test_that("colon works as expected (hee hee)", { + expect_equal(colon(1, 4), 1:4) + expect_length(colon(4, 1), 0) }) \ No newline at end of file From f9a344ad88cda40e3880acc6428cad0c71661078 Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Wed, 15 Jan 2020 15:30:22 +0100 Subject: [PATCH 11/14] Improved documentation --- NAMESPACE | 1 + TODO.md | 5 ++--- man/etsiParas.Rd | 20 ++++++++++++++++++++ man/laskeMuutokset4.Rd | 2 +- 4 files changed, 24 insertions(+), 4 deletions(-) create mode 100644 man/etsiParas.Rd diff --git a/NAMESPACE b/NAMESPACE index 6642668..c77eb3f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,6 +2,7 @@ export(admix1) export(calculatePopLogml) +export(colon) export(computeIndLogml) export(computeRows) export(etsiParas) diff --git a/TODO.md b/TODO.md index 7476bbe..795b825 100644 --- a/TODO.md +++ b/TODO.md @@ -16,8 +16,8 @@ Note to contributors: as tasks get finished, please update this file with an `x` The list below contains non-essential but nice-to-have tasks for the next stable release. -- [ ] Uniformize capitalization of function names -- [ ] Uniformize capitalization of function arguments +- [ ] Standardize capitalization of function names +- [ ] Standardize capitalization of function arguments - [ ] Implement optimizations from `fastbaps` - [ ] Replace redundant functions (ex.: `randga`) @@ -30,7 +30,6 @@ The following behavioral differences have been detected between the Matlab funct | `ownNum2Str` | `number` | `NaN` | `'NAN'` | error | | `ownNum2Str` | `number` | `` | `''` | `''` + warning | | `repmat` | `length(n)` | `> 2` | > 2D matrix | 2D matrix | -| `computeIndLogml` | some arguments < 0 | complex number | `NaN` | As general remarks, one should keep in mind that: diff --git a/man/etsiParas.Rd b/man/etsiParas.Rd new file mode 100644 index 0000000..bafaf9c --- /dev/null +++ b/man/etsiParas.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/etsiParas.R +\name{etsiParas} +\alias{etsiParas} +\title{Etsi Paras} +\usage{ +etsiParas(osuus, osuusTaulu, omaFreqs, logml) +} +\arguments{ +\item{osuus}{Percentages?} + +\item{osuusTaulu}{Percentage table?} + +\item{omaFreqs}{own Freqs?} + +\item{logml}{log maximum likelihood} +} +\description{ +Search for the best? +} diff --git a/man/laskeMuutokset4.Rd b/man/laskeMuutokset4.Rd index bf7c6f4..7c16a19 100644 --- a/man/laskeMuutokset4.Rd +++ b/man/laskeMuutokset4.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/laskeMuutokset4.R \name{laskeMuutokset4} \alias{laskeMuutokset4} -\title{laskeMuutokset4} +\title{Calculate changes?} \usage{ laskeMuutokset4(osuus, osuusTaulu, omaFreqs, logml, COUNTS = matrix(0)) } From 1ad661618796f9e88092e70937a3273cbd279200 Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Wed, 15 Jan 2020 16:22:21 +0100 Subject: [PATCH 12/14] Worked around Matlab OOB bug --- R/laskeMuutokset4.R | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/R/laskeMuutokset4.R b/R/laskeMuutokset4.R index 730b5e0..56bcef3 100644 --- a/R/laskeMuutokset4.R +++ b/R/laskeMuutokset4.R @@ -1,4 +1,4 @@ -#' @title laskeMuutokset4 +#' @title Calculate changes? #' @description Palauttaa npops*npops taulun, jonka alkio (i,j) kertoo, mik?on #' muutos logml:ss? mikäli populaatiosta i siirretään osuuden verran #' todennäköisyysmassaa populaatioon j. Mikäli populaatiossa i ei ole mitään @@ -12,15 +12,24 @@ laskeMuutokset4 <- function (osuus, osuusTaulu, omaFreqs, logml, COUNTS = matrix(0)) { npops <- ifelse(is.na(dim(COUNTS)[3]), 1, dim(COUNTS)[3]) - notEmpty <- osuusTaulu > 0.005 + notEmpty <- which(osuusTaulu > 0.005) muutokset <- zeros(npops) empties <- !notEmpty for (i1 in notEmpty) { osuusTaulu[i1] <- osuusTaulu[i1] - osuus - for (i2 in c(1:(i1 - 1), (i1 + 1):npops)) { + for (i2 in c(colon(1, i1 - 1), colon(i1 + 1, npops))) { osuusTaulu[i2] <- osuusTaulu[i2] + osuus loggis <- computeIndLogml(omaFreqs, osuusTaulu) + + # Work around Matlab OOB bug + if (i1 > nrow(muutokset)) { + muutokset <- rbind(muutokset, muutokset * 0) + } + if (i2 > ncol(muutokset)) { + muutokset <- cbind(muutokset, muutokset * 0) + } + muutokset[i1, i2] <- loggis - logml osuusTaulu[i2] <- osuusTaulu[i2] - osuus } From 6534d4fa4add77e70a58ed4f7f0534ed6971e046 Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Wed, 15 Jan 2020 17:05:36 +0100 Subject: [PATCH 13/14] Finished proper implementation of etsiParas --- R/etsiParas.R | 24 ++++++++++++++++++++---- tests/testthat/test-admix1.R | 27 ++++++++++++++++++++++++++- 2 files changed, 46 insertions(+), 5 deletions(-) diff --git a/R/etsiParas.R b/R/etsiParas.R index 22eda5e..98606e7 100644 --- a/R/etsiParas.R +++ b/R/etsiParas.R @@ -1,9 +1,25 @@ -etsiParas <- function = (osuus, osuusTaulu, omaFreqs, logml) { - ready <- 0; +#' @export +#' @title Etsi Paras +#' @description Search for the best? +#' @param osuus Percentages? +#' @param omaFreqs own Freqs? +#' @param osuusTaulu Percentage table? +#' @param logml log maximum likelihood +etsiParas <- function (osuus, osuusTaulu, omaFreqs, logml) { + ready <- 0 while (ready != 1) { muutokset <- laskeMuutokset4(osuus, osuusTaulu, omaFreqs, logml) - [maxMuutos, indeksi] = max(muutokset[1:end]) # TODO: how does this work on Matlab? - if (maxMuutos > 0) { + + # Work around R's max() limitation on complex numbers + if (any(sapply(muutokset, class) == "complex")) { + maxRe <- max(Re(as.vector(muutokset))) + maxIm <- max(Im(as.vector(muutokset))) + maxMuutos <- complex(real = maxRe, imaginary = maxIm) + } else { + maxMuutos <- max(as.vector(muutokset)) + } + indeksi <- which(muutokset == maxMuutos) + if (Re(maxMuutos) > 0) { osuusTaulu <- suoritaMuutos(osuusTaulu, osuus, indeksi) logml <- logml + maxMuutos } else { diff --git a/tests/testthat/test-admix1.R b/tests/testthat/test-admix1.R index 876ec49..5221db0 100644 --- a/tests/testthat/test-admix1.R +++ b/tests/testthat/test-admix1.R @@ -1,6 +1,5 @@ context("Admixture analysis") - test_that("learn*partition behaves like on Matlab", { # Test data p1 <- c(0, .5, 1, 1.5) @@ -146,4 +145,30 @@ test_that("suoritaMuutos works like on Matlab", { expect_equal(suoritaMuutos(mx2, 0, 5), mx2) expect_equal(suoritaMuutos(mx2, 0, 5), mx2) expect_equal(suoritaMuutos(mx2, -3, 6), matrix(c(13, 9, 5, 8, 8, -10), 2)) +}) + +test_that("laskeMuutokset4 works like on Matlab", { + # TODO: build these tests based on problems found in etsiParas + mx1 <- t(c(.4, 7)) + expect_equivalent( + object = laskeMuutokset4(2, mx1, c(8, 2), 3), + expected = t(c(0, .3742)), + tol = .0001 + ) +}) + +test_that("etsiParas works like on Matlab", { + mx1 <- t(c(.4, 7)) + expect_equal(etsiParas(2, mx1, c(8, 1), 8), c(.4, 7, 8)) + expect_equivalent(etsiParas(2, mx1, c(8, 1), 1), c(-1.6, 9, 3.1864), .0001) + expect_equivalent( + object = etsiParas(5, mx1, c(8, 1), -pi), + expected = c(-4.6, 12, 3.8111), + tol = .001 + ) + expect_equivalent( + object = etsiParas(-.5, mx1, c(-1, 0), -10), + expected = c(7.4, 0, complex(real = 1.8563, imaginary = 3.1416)), + tol = .0001 + ) }) \ No newline at end of file From ab19ebd6719f9c68dc33cef7232ebc19d72c8e6e Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Wed, 15 Jan 2020 17:05:45 +0100 Subject: [PATCH 14/14] Updated documentation --- TODO.md | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/TODO.md b/TODO.md index 795b825..5b3568c 100644 --- a/TODO.md +++ b/TODO.md @@ -23,7 +23,15 @@ The list below contains non-essential but nice-to-have tasks for the next stable # Known pitfalls -The following behavioral differences have been detected between the Matlab functions and their R counterparts. In order to save time, these differences will not be addressed, since they could require extensive reworking of a function. However, such differences may very well cause unexpected problems in some situations, which is why compiling this list is so important. The list below might provide a good starting point for identifying and fixing bugs: +The following behavioral differences have been detected between the Matlab functions and their R counterparts. In order to save time, these differences will not be addressed, since they could require extensive reworking of a function. However, such differences may very well cause unexpected problems in some situations, which is why compiling this list is so important. The tables below might provide a good starting point for identifying and fixing bugs. + +As general remarks, one should keep in mind that: + +- For compliance with IEC 60559, the `round` in base R rounds .5 to the nearest even integer, whereas the homonym function in Matlab rounds up (or down, if negative). +- Some clobal variables have been added as a new (last) argument to the function they appear in. +- When a function is defined on Matlab with multiple outputs, as in `[y1, y2] = f(x)`, it will output only `y1` if called without assignment, as in `f(3)`, or if called with a one-length assignment, as in `a = f(3)`. In order to receive the full output, one must assign two variables to the left side of the assignment operator, as in `[a, b] = f(3)`. rBAPS Functions that might be affected by this behavior include `etsiParas`. + +## Functional differences in rBAPS functions | Function | Argument | Value | Matlab output | R output | | ----------------- | ------------------ | -------------- | --------------------- | ------------------------------- | @@ -31,7 +39,8 @@ The following behavioral differences have been detected between the Matlab funct | `ownNum2Str` | `number` | `` | `''` | `''` + warning | | `repmat` | `length(n)` | `> 2` | > 2D matrix | 2D matrix | -As general remarks, one should keep in mind that: +## Functional differences in base Matlab functions -- For compliance with IEC 60559, the `round` in base R rounds .5 to the nearest even integer, whereas the homonym function in Matlab rounds up (or down, if negative). -- Some clobal variables have been added as a new (last) argument to the function they appear in. \ No newline at end of file +Function | Matlab output | R output +-------- | ------------- | -------- +`max` | Can handle complex numbers | Cannot handle complex numbers \ No newline at end of file