Merge branch 'etsiParas' into dev

2020-01-15 17:10:07 +01:00 · 2020-01-15 17:10:07 +01:00 · cf4c491c52
commit cf4c491c52
parent 50e1791cee ab19ebd671
18 changed files with 338 additions and 38 deletions
--- a/5
+++ b/5
@ -2,12 +2,17 @@

 export(admix1)
 export(calculatePopLogml)
+export(colon)
 export(computeIndLogml)
 export(computeRows)
+export(etsiParas)
+export(laskeMuutokset4)
 export(learn_simple_partition)
 export(ownNum2Str)
 export(proportion2str)
+export(rand)
 export(randdir)
 export(repmat)
 export(suoritaMuutos)
+export(times)
 importFrom(stats,runif)
--- a/R/colon.R
+++ b/R/colon.R
@ -0,0 +1,12 @@
+#' @title Vector creation
+#' @description Simulates the function `colon()` and its equivalent `:` operator from Matlab, which have a similar but not quite equivalent behavior when compared to `seq()` and `:` in R.
+#' @param a initial number
+#' @param b final number
+#' @export
+colon <- function(a, b) {
+    if (a <= b) {
+        return(a:b)
+    } else {
+        return(vector(mode = "numeric"))
+    }
+}
--- a/R/computeIndLogml.R
+++ b/R/computeIndLogml.R
@ -1,20 +1,26 @@
 #' @title computeIndLogml
 #' @description Palauttaa yksilön logml:n, kun oletetaan yksilön alkuperät
 #' määritellyiksi kuten osuusTaulu:ssa.
-#' @param omaFreqs omaFreqs
-#' @param osuusTaulu osuusTaulu
+#' @param omaFreqs own Freqs?
+#' @param osuusTaulu Percentage table?
 #' @export
 computeIndLogml <- function (omaFreqs, osuusTaulu) {
+    omaFreqs <- as.matrix(omaFreqs)
+    osuusTaulu <- as.matrix(osuusTaulu)

    apu <- repmat(t(osuusTaulu), c(1, dim(omaFreqs)[2]))
-    apu <- c(apu) * omaFreqs # c() avoids deprecation error re. matrix ops
+    apu <- times(apu, omaFreqs) # c() avoids deprecation error re. matrix ops
    if (length(apu) > 1) {
        apu <- colSums(as.matrix(apu))
    } else {
        apu <- sum(apu)
    }

-    apu = log(apu)
+    if (any(apu < 0)) {
+        # Workaround for log of a negative number
+       apu <- as.complex(apu)
+    }
+    apu <- log(apu)

    loggis <- sum(apu)
    return (loggis)
--- a/R/computeRows.R
+++ b/R/computeRows.R
@ -14,7 +14,7 @@ computeRows <- function(rowsFromInd, inds, ninds) {
    }
    rows <- inds[, rep(1, rowsFromInd)]
    rows <- rows * rowsFromInd
-    miinus <- repmat((rowsFromInd - 1):0, c(1, ninds))
+    miinus <- repmat(t((rowsFromInd - 1):0), c(ninds, 1))
    rows <- rows - miinus
    rows <- matrix(t(rows), c(1, rowsFromInd * ninds))
    return(t(rows))
--- a/R/etsiParas.R
+++ b/R/etsiParas.R
@ -0,0 +1,30 @@
+#' @export
+#' @title Etsi Paras
+#' @description Search for the best?
+#' @param osuus Percentages?
+#' @param omaFreqs own Freqs?
+#' @param osuusTaulu Percentage table?
+#' @param logml log maximum likelihood
+etsiParas <- function (osuus, osuusTaulu, omaFreqs, logml) {
+    ready <- 0
+    while (ready != 1) {
+        muutokset <- laskeMuutokset4(osuus, osuusTaulu, omaFreqs, logml)
+
+        # Work around R's max() limitation on complex numbers
+        if (any(sapply(muutokset, class) == "complex")) {
+            maxRe <- max(Re(as.vector(muutokset)))
+            maxIm <- max(Im(as.vector(muutokset)))
+            maxMuutos <- complex(real = maxRe, imaginary = maxIm)
+        } else {
+            maxMuutos <- max(as.vector(muutokset))
+        }
+        indeksi <- which(muutokset == maxMuutos)
+        if (Re(maxMuutos) > 0) {
+            osuusTaulu <- suoritaMuutos(osuusTaulu, osuus, indeksi)
+            logml <- logml + maxMuutos
+        } else {
+            ready <- 1
+        }
+    }
+    return (c(osuusTaulu, logml))
+}
--- a/R/laskeMuutokset4.R
+++ b/R/laskeMuutokset4.R
@ -1,26 +1,35 @@
-#' @title laskeMuutokset4
+#' @title Calculate changes?
 #' @description Palauttaa npops*npops taulun, jonka alkio (i,j) kertoo, mik?on
 #' muutos logml:ss? mikäli populaatiosta i siirretään osuuden verran
 #' todennäköisyysmassaa populaatioon j. Mikäli populaatiossa i ei ole mitään 
 #' siirrettävää, on vastaavassa kohdassa rivi nollia.
-#' @param osuus osuus
-#' @param osuusTaulu osuusTaulu
-#' @param omaFreqs omaFreqs
-#' @param logml logml
+#' @param osuus Percentages?
+#' @param omaFreqs own Freqs?
+#' @param osuusTaulu Percentage table?
+#' @param logml log maximum likelihood
 #' @param COUNTS COUNTS
-#' 
+#' @export
 laskeMuutokset4 <- function (osuus, osuusTaulu, omaFreqs, logml,
                             COUNTS = matrix(0)) {
-    npops <- dim(COUNTS)[3]
-    notEmpty <- osuusTaulu > 0.005
+    npops <- ifelse(is.na(dim(COUNTS)[3]), 1, dim(COUNTS)[3])
+    notEmpty <- which(osuusTaulu > 0.005)
    muutokset <- zeros(npops)
    empties <- !notEmpty

    for (i1 in notEmpty) {
        osuusTaulu[i1] <- osuusTaulu[i1] - osuus
-        for (i2 in c(1:(i1 - 1), (i1 + 1):npops)) {
+        for (i2 in c(colon(1, i1 - 1), colon(i1 + 1, npops))) {
            osuusTaulu[i2] <- osuusTaulu[i2] + osuus
            loggis <- computeIndLogml(omaFreqs, osuusTaulu)
+            
+            # Work around Matlab OOB bug
+            if (i1 > nrow(muutokset)) {
+                muutokset <- rbind(muutokset, muutokset * 0)
+            }
+            if (i2 > ncol(muutokset)) {
+                muutokset <- cbind(muutokset, muutokset * 0)
+            }
+
            muutokset[i1, i2] <- loggis - logml
            osuusTaulu[i2] <- osuusTaulu[i2] - osuus
        } 
--- a/R/rand.R
+++ b/R/rand.R
@ -4,6 +4,7 @@
 #' @param c number of columns of output matrix
 #' @return \eqn{r \times c} matrix with random trials from a standard uniform distribution.
 #' @importFrom stats runif
+#' @export
 rand <- function(r = 1, c = 1) {
    matrix(runif(r * c), r, c)
 }
--- a/R/repmat.R
+++ b/R/repmat.R
@ -3,16 +3,26 @@
 #' @details This function was created to replicate the behavior of a homonymous
 #' function on Matlab
 #' @param mx matrix
-#' @param n either a scalat with the number of replications in both rows and columns or a 2-length vector with individual repetitions.
+#' @param n either a scalar with the number of replications in both rows and columns or a 2-length vector with individual repetitions.
 #' @return matrix replicated over `ncol(mx) * n` columns and `nrow(mx) * n` rows
 #' @note The Matlab implementation of this function accepts `n` with length > 2.
+#' 
+#' It should also be noted that a concatenated vector in R, e.g. `c(5, 2)`, becomes a column vector when coerced to matrix, even though it may look like a row vector at first glance. This is important to keep in mind when considering the expected output of this function. Vectors in R make sense to be seen as column vectors, given R's Statistics-oriented paradigm where variables are usually disposed as columns in a dataset.
 #' @export
 repmat <- function (mx, n) {
+    # Validation
    if (length(n) > 2) warning("Extra dimensions of n ignored")
    if (length(n) == 1) n <- rep(n, 2)
-    out <- mx_cols <- rep(mx, n[1])
-    if (n[2] > 1) {
-        for (i in seq(n[2] - 1)) out <- rbind(out, mx_cols)
+    if (class(mx) != "matrix") mx <- as.matrix(mx)
+
+    # Replicating cols
+    out <- mx_col <- matrix(rep(mx, n[2]), nrow(mx))
+
+    # Replicating rows
+    if (n[1] > 1) {
+        for (i in seq(n[1] - 1)) out <- rbind(out, mx_col)
    }
+
+    # Output
    return(unname(as.matrix(out)))
 }
--- a/R/times.R
+++ b/R/times.R
@ -0,0 +1,51 @@
+#' @title Element-wise matrix multiplication
+#' @description Emulates the `times()` and `.*` operators from Matlab.
+#' @details This function basically handles elements of different length better than the `*` operator in R, at least as far as behavior from a Matlab user is expecting.
+#' @param a first factor of the multiplication
+#' @param b second factor of the multiplication
+#' @export
+#' @returns matrix with dimensions equal to the larger of the two factors
+times <- function(a, b) {
+    # Converting everything to matrix because Matlab looooooves the matrix
+    a <- as.matrix(a)
+    b <- as.matrix(b)
+
+    dominant_mx <- NULL
+    if (!all(dim(a) == dim(b))) {
+        if (all(dim(a) >= dim(b))) {
+            dominant_mx <- a
+            dominated_mx <- b
+        }  else if (all(dim(b) >= dim(a))) {
+            dominant_mx <- b
+            dominated_mx <- a
+        } else {
+            dominant_mx <- "neither"
+            dominant_dim <- c(max(nrow(b), nrow(a)), max(ncol(b), ncol(a)))
+        }
+    }
+
+    if (is.null(dominant_mx)) {
+        out <- a * b
+    } else if (dominant_mx[1] == "neither") {
+        a <- repmat(
+            mx = a,
+            n = c(dominant_dim[1] - nrow(a) + 1, dominant_dim[2] - ncol(a) + 1)
+        )
+        b <- repmat(
+            mx = b,
+            n = c(dominant_dim[1] - nrow(b) + 1, dominant_dim[2] - ncol(b) + 1)
+        )
+        out <- a * b
+    } else {
+        # Expanding dominated matrix
+        dominated_mx <- repmat(
+            mx = dominated_mx,
+            n = c(
+                nrow(dominant_mx) - nrow(dominated_mx) + 1,
+                ncol(dominant_mx) - ncol(dominated_mx) + 1
+            )
+        )
+        out <- dominant_mx * dominated_mx
+    }
+    return(out)
+}
--- a/TODO.md
+++ b/TODO.md
@ -16,23 +16,31 @@ Note to contributors: as tasks get finished, please update this file with an `x`

 The list below contains non-essential but nice-to-have tasks for the next stable release.

- [ ] Uniformize capitalization of function names
- [ ] Uniformize capitalization of function arguments
+- [ ] Standardize capitalization of function names
+- [ ] Standardize capitalization of function arguments
 - [ ] Implement optimizations from `fastbaps`
 - [ ] Replace redundant functions (ex.: `randga`)

 # Known pitfalls

-The following behavioral differences have been detected between the Matlab functions and their R counterparts. In order to save time, these differences will not be addressed, since they could require extensive reworking of a function. However, such differences may very well cause unexpected problems in some situations, which is why compiling this list is so important. The list below might provide a good starting point for identifying and fixing bugs:
-
-Function | Argument | Value | Matlab output | R output
---------|----------|-------|---------------|---------
-`ownNum2Str` | `number` | `NaN` | `'NAN'` | error
-`ownNum2Str` | `number` | `<vector>` | `'<vector elements>'` | `'<vector elements>'` + warning
-`repmat` | `length(n)` | `> 2` | > 2D matrix | 2D matrix
-`computeIndLogml` | only one of the arguments is negative | complex number | `NaN`
+The following behavioral differences have been detected between the Matlab functions and their R counterparts. In order to save time, these differences will not be addressed, since they could require extensive reworking of a function. However, such differences may very well cause unexpected problems in some situations, which is why compiling this list is so important. The tables below might provide a good starting point for identifying and fixing bugs.

 As general remarks, one should keep in mind that:

 - For compliance with IEC 60559, the `round` in base R rounds .5 to the nearest even integer, whereas the homonym function in Matlab rounds up (or down, if negative).
- Some clobal variables have been added as a new (last) argument to the function they appear in.
+- Some clobal variables have been added as a new (last) argument to the function they appear in.
+- When a function is defined on Matlab with multiple outputs, as in `[y1, y2] = f(x)`, it will output only `y1` if called without assignment, as in `f(3)`, or if called with a one-length assignment, as in `a = f(3)`. In order to receive the full output, one must assign two variables to the left side of the assignment operator, as in `[a, b] = f(3)`. rBAPS Functions that might be affected by this behavior include `etsiParas`.
+
+## Functional differences in rBAPS functions
+
+| Function          | Argument           | Value          | Matlab output         | R output                        |
+| ----------------- | ------------------ | -------------- | --------------------- | ------------------------------- |
+| `ownNum2Str`      | `number`           | `NaN`          | `'NAN'`               | error                           |
+| `ownNum2Str`      | `number`           | `<vector>`     | `'<vector elements>'` | `'<vector elements>'` + warning |
+| `repmat`          | `length(n)`        | `> 2`          | > 2D matrix           | 2D matrix                       |
+
+## Functional differences in base Matlab functions
+
+Function | Matlab output | R output
+-------- | ------------- | --------
+`max` | Can handle complex numbers | Cannot handle complex numbers
--- a/man/colon.Rd
+++ b/man/colon.Rd
@ -0,0 +1,16 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/colon.R
+\name{colon}
+\alias{colon}
+\title{Vector creation}
+\usage{
+colon(a, b)
+}
+\arguments{
+\item{a}{initial number}
+
+\item{b}{final number}
+}
+\description{
+Simulates the function `colon()` and its equivalent `:` operator from Matlab, which have a similar but not quite equivalent behavior when compared to `seq()` and `:` in R.
+}
--- a/man/computeIndLogml.Rd
+++ b/man/computeIndLogml.Rd
@ -7,9 +7,9 @@
 computeIndLogml(omaFreqs, osuusTaulu)
 }
 \arguments{
-\item{omaFreqs}{omaFreqs}
+\item{omaFreqs}{own Freqs?}

-\item{osuusTaulu}{osuusTaulu}
+\item{osuusTaulu}{Percentage table?}
 }
 \description{
 Palauttaa yksilön logml:n, kun oletetaan yksilön alkuperät
--- a/man/etsiParas.Rd
+++ b/man/etsiParas.Rd
@ -0,0 +1,20 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/etsiParas.R
+\name{etsiParas}
+\alias{etsiParas}
+\title{Etsi Paras}
+\usage{
+etsiParas(osuus, osuusTaulu, omaFreqs, logml)
+}
+\arguments{
+\item{osuus}{Percentages?}
+
+\item{osuusTaulu}{Percentage table?}
+
+\item{omaFreqs}{own Freqs?}
+
+\item{logml}{log maximum likelihood}
+}
+\description{
+Search for the best?
+}
--- a/man/laskeMuutokset4.Rd
+++ b/man/laskeMuutokset4.Rd
@ -2,18 +2,18 @@
 % Please edit documentation in R/laskeMuutokset4.R
 \name{laskeMuutokset4}
 \alias{laskeMuutokset4}
-\title{laskeMuutokset4}
+\title{Calculate changes?}
 \usage{
 laskeMuutokset4(osuus, osuusTaulu, omaFreqs, logml, COUNTS = matrix(0))
 }
 \arguments{
-\item{osuus}{osuus}
+\item{osuus}{Percentages?}

-\item{osuusTaulu}{osuusTaulu}
+\item{osuusTaulu}{Percentage table?}

-\item{omaFreqs}{omaFreqs}
+\item{omaFreqs}{own Freqs?}

-\item{logml}{logml}
+\item{logml}{log maximum likelihood}

 \item{COUNTS}{COUNTS}
 }
--- a/man/repmat.Rd
+++ b/man/repmat.Rd
@ -9,7 +9,7 @@ repmat(mx, n)
 \arguments{
 \item{mx}{matrix}

-\item{n}{either a scalat with the number of replications in both rows and columns or a 2-length vector with individual repetitions.}
+\item{n}{either a scalar with the number of replications in both rows and columns or a 2-length vector with individual repetitions.}
 }
 \value{
 matrix replicated over `ncol(mx) * n` columns and `nrow(mx) * n` rows
@ -23,4 +23,6 @@ function on Matlab
 }
 \note{
 The Matlab implementation of this function accepts `n` with length > 2.
+
+It should also be noted that a concatenated vector in R, e.g. `c(5, 2)`, becomes a column vector when coerced to matrix, even though it may look like a row vector at first glance. This is important to keep in mind when considering the expected output of this function. Vectors in R make sense to be seen as column vectors, given R's Statistics-oriented paradigm where variables are usually disposed as columns in a dataset.
 }
--- a/man/times.Rd
+++ b/man/times.Rd
@ -0,0 +1,22 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/times.R
+\name{times}
+\alias{times}
+\title{Element-wise matrix multiplication}
+\usage{
+times(a, b)
+}
+\arguments{
+\item{a}{first factor of the multiplication}
+
+\item{b}{second factor of the multiplication}
+}
+\value{
+matrix with dimensions equal to the larger of the two factors
+}
+\description{
+Emulates the `times()` and `.*` operators from Matlab.
+}
+\details{
+This function basically handles elements of different length better than the `*` operator in R, at least as far as behavior from a Matlab user is expecting.
+}
--- a/tests/testthat/test-admix1.R
+++ b/tests/testthat/test-admix1.R
@ -1,6 +1,5 @@
 context("Admixture analysis")

-
 test_that("learn*partition behaves like on Matlab", {
    # Test data
    p1 <- c(0, .5, 1, 1.5)
@ -117,6 +116,21 @@ test_that("computeIndLogml works like on Matlab", {
    expect_equivalent(computeIndLogml(-pi, -8), 3.2242, tol = .0001)
    expect_equivalent(computeIndLogml(2:3, 2), 2.3026, tol = .0001)    
    expect_equivalent(computeIndLogml(matrix(8:5, 2), 100), 14.316, tol = .001)
+    expect_equivalent(
+        object    = computeIndLogml(matrix(8:5, 2), matrix(c(1, 3), 1)),
+        expected  = 6.4118,
+        tol       = .001
+    )
+    expect_equivalent(
+        object    = computeIndLogml(matrix(8:5, 1), matrix(c(1, 3), 1)),
+        expected  = 12.9717,
+        tol       = .001
+    )
+    expect_equivalent(
+        object = computeIndLogml(c(8, 1), c(-1.6, 5)),
+        expected = complex(real = 6.4739, imaginary = pi),
+        tol = .001
+    )
 })

 test_that("suoritaMuutos works like on Matlab", {
@ -131,4 +145,30 @@ test_that("suoritaMuutos works like on Matlab", {
    expect_equal(suoritaMuutos(mx2, 0, 5), mx2)
    expect_equal(suoritaMuutos(mx2, 0, 5), mx2)
    expect_equal(suoritaMuutos(mx2, -3, 6), matrix(c(13, 9, 5, 8, 8, -10), 2))
+})
+
+test_that("laskeMuutokset4 works like on Matlab", {
+    # TODO: build these tests based on problems found in etsiParas
+    mx1 <- t(c(.4, 7))
+    expect_equivalent(
+        object = laskeMuutokset4(2, mx1, c(8, 2), 3),
+        expected = t(c(0, .3742)),
+        tol = .0001
+    )
+})
+
+test_that("etsiParas works like on Matlab", {
+    mx1 <- t(c(.4, 7))
+    expect_equal(etsiParas(2, mx1, c(8, 1), 8), c(.4, 7, 8))
+    expect_equivalent(etsiParas(2, mx1, c(8, 1), 1), c(-1.6, 9, 3.1864), .0001)
+    expect_equivalent(
+        object = etsiParas(5, mx1, c(8, 1), -pi),
+        expected = c(-4.6, 12, 3.8111),
+        tol = .001
+    )
+    expect_equivalent(
+        object = etsiParas(-.5, mx1, c(-1, 0), -10),
+        expected = c(7.4, 0, complex(real = 1.8563, imaginary = 3.1416)),
+        tol = .0001
+    )
 })
--- a/tests/testthat/test-convertedBaseFunctions.R
+++ b/tests/testthat/test-convertedBaseFunctions.R
@ -0,0 +1,68 @@
+context("Basic Matlab functions")
+
+test_that("rand works properly", {
+    expect_equal(dim(rand()), c(1, 1))
+    expect_equal(dim(rand(1, 2)), c(1, 2))
+    expect_equal(dim(rand(3, 2)), c(3, 2))
+})
+
+test_that("repmat works properly", {
+    mx0 <- c(1:4) # when converted to matrix, results in a column vector
+    mx1 <- matrix(5:8)
+    mx2 <- matrix(0:-3, 2)
+    expect_error(repmat(mx0))
+    expect_equal(repmat(mx0, 1), as.matrix(mx0))
+    expect_equal(
+        object = repmat(mx0, 2),
+        expected = unname(t(cbind(rbind(mx0, mx0), rbind(mx0, mx0))))
+    )
+    expect_equal(
+        object = repmat(mx1, 2),
+        expected = unname(cbind(rbind(mx1, mx1), rbind(mx1, mx1)))
+    )
+    expect_equal(
+        object = repmat(mx2, c(2, 3)),
+        expected = cbind(rbind(mx2, mx2), rbind(mx2, mx2), rbind(mx2, mx2))
+    )
+    expect_equal(
+        object = repmat(mx2, c(4, 1)),
+        expected = rbind(mx2, mx2, mx2, mx2)
+    )
+})
+
+test_that("zeros and ones work as expected", {
+    expect_equal(zeros(1), matrix(0, 1))
+    expect_equal(zeros(2), matrix(0, 2, 2))
+    expect_equal(zeros(2, 1), matrix(0, 2, 1))
+    expect_equal(zeros(1, 10), matrix(0, 1, 10))
+    expect_equal(ones(8), matrix(1, 8, 8))
+    expect_equal(ones(5, 2), matrix(1, 5, 2))
+    expect_equal(ones(2, 100), matrix(1, 2, 100))
+})
+
+test_that("times works as expected", {
+    expect_equal(times(9, 6), as.matrix(54))
+    expect_equal(times(9, c(.8, 9)), as.matrix(c(7.2, 81)))
+    expect_equal(times(c(.8, 9), 5), as.matrix(c(4, 45)))
+    expect_equal(times(matrix(1:4, 2), 5), matrix(c(5, 10, 15, 20), 2))
+    expect_equal(times(5, matrix(1:4, 2)), matrix(c(5, 10, 15, 20), 2))
+    expect_equal(times(matrix(1:4, 2), c(10, 3)), matrix(c(10, 6, 30, 12), 2))
+    expect_equal(
+        object = times(matrix(1:4, 2), matrix(c(10, 3), 1)),
+        expected = matrix(c(10, 20, 9, 12), 2)
+    )
+    expect_equal(times(c(10, 3), matrix(1:4, 2)), matrix(c(10, 6, 30, 12), 2))
+    expect_equal(
+        object = times(matrix(c(10, -5, 3, 9), 2), matrix(1:4, 2)),
+        expected = matrix(c(10, -10, 9, 36), 2)
+    )
+    expect_equal(
+        object = times(matrix(c(-1.6, 5), 1), c(8, 1)),
+        expected = matrix(c(-12.8, -1.6, 40, 5), 2)
+    )
+})
+
+test_that("colon works as expected (hee hee)", {
+    expect_equal(colon(1, 4), 1:4)
+    expect_length(colon(4, 1), 0)
+})