Merge branch 'etsiParas' into dev

This commit is contained in:
Waldir Leoncio 2020-01-15 17:10:07 +01:00
commit cf4c491c52
18 changed files with 338 additions and 38 deletions

View file

@ -2,12 +2,17 @@
export(admix1)
export(calculatePopLogml)
export(colon)
export(computeIndLogml)
export(computeRows)
export(etsiParas)
export(laskeMuutokset4)
export(learn_simple_partition)
export(ownNum2Str)
export(proportion2str)
export(rand)
export(randdir)
export(repmat)
export(suoritaMuutos)
export(times)
importFrom(stats,runif)

12
R/colon.R Normal file
View file

@ -0,0 +1,12 @@
#' @title Vector creation
#' @description Simulates the function `colon()` and its equivalent `:` operator from Matlab, which have a similar but not quite equivalent behavior when compared to `seq()` and `:` in R.
#' @param a initial number
#' @param b final number
#' @export
colon <- function(a, b) {
if (a <= b) {
return(a:b)
} else {
return(vector(mode = "numeric"))
}
}

View file

@ -1,20 +1,26 @@
#' @title computeIndLogml
#' @description Palauttaa yksilön logml:n, kun oletetaan yksilön alkuperät
#' määritellyiksi kuten osuusTaulu:ssa.
#' @param omaFreqs omaFreqs
#' @param osuusTaulu osuusTaulu
#' @param omaFreqs own Freqs?
#' @param osuusTaulu Percentage table?
#' @export
computeIndLogml <- function (omaFreqs, osuusTaulu) {
omaFreqs <- as.matrix(omaFreqs)
osuusTaulu <- as.matrix(osuusTaulu)
apu <- repmat(t(osuusTaulu), c(1, dim(omaFreqs)[2]))
apu <- c(apu) * omaFreqs # c() avoids deprecation error re. matrix ops
apu <- times(apu, omaFreqs) # c() avoids deprecation error re. matrix ops
if (length(apu) > 1) {
apu <- colSums(as.matrix(apu))
} else {
apu <- sum(apu)
}
apu = log(apu)
if (any(apu < 0)) {
# Workaround for log of a negative number
apu <- as.complex(apu)
}
apu <- log(apu)
loggis <- sum(apu)
return (loggis)

View file

@ -14,7 +14,7 @@ computeRows <- function(rowsFromInd, inds, ninds) {
}
rows <- inds[, rep(1, rowsFromInd)]
rows <- rows * rowsFromInd
miinus <- repmat((rowsFromInd - 1):0, c(1, ninds))
miinus <- repmat(t((rowsFromInd - 1):0), c(ninds, 1))
rows <- rows - miinus
rows <- matrix(t(rows), c(1, rowsFromInd * ninds))
return(t(rows))

30
R/etsiParas.R Normal file
View file

@ -0,0 +1,30 @@
#' @export
#' @title Etsi Paras
#' @description Search for the best?
#' @param osuus Percentages?
#' @param omaFreqs own Freqs?
#' @param osuusTaulu Percentage table?
#' @param logml log maximum likelihood
etsiParas <- function (osuus, osuusTaulu, omaFreqs, logml) {
ready <- 0
while (ready != 1) {
muutokset <- laskeMuutokset4(osuus, osuusTaulu, omaFreqs, logml)
# Work around R's max() limitation on complex numbers
if (any(sapply(muutokset, class) == "complex")) {
maxRe <- max(Re(as.vector(muutokset)))
maxIm <- max(Im(as.vector(muutokset)))
maxMuutos <- complex(real = maxRe, imaginary = maxIm)
} else {
maxMuutos <- max(as.vector(muutokset))
}
indeksi <- which(muutokset == maxMuutos)
if (Re(maxMuutos) > 0) {
osuusTaulu <- suoritaMuutos(osuusTaulu, osuus, indeksi)
logml <- logml + maxMuutos
} else {
ready <- 1
}
}
return (c(osuusTaulu, logml))
}

View file

@ -1,26 +1,35 @@
#' @title laskeMuutokset4
#' @title Calculate changes?
#' @description Palauttaa npops*npops taulun, jonka alkio (i,j) kertoo, mik?on
#' muutos logml:ss? mikäli populaatiosta i siirretään osuuden verran
#' todennäköisyysmassaa populaatioon j. Mikäli populaatiossa i ei ole mitään
#' siirrettävää, on vastaavassa kohdassa rivi nollia.
#' @param osuus osuus
#' @param osuusTaulu osuusTaulu
#' @param omaFreqs omaFreqs
#' @param logml logml
#' @param osuus Percentages?
#' @param omaFreqs own Freqs?
#' @param osuusTaulu Percentage table?
#' @param logml log maximum likelihood
#' @param COUNTS COUNTS
#'
#' @export
laskeMuutokset4 <- function (osuus, osuusTaulu, omaFreqs, logml,
COUNTS = matrix(0)) {
npops <- dim(COUNTS)[3]
notEmpty <- osuusTaulu > 0.005
npops <- ifelse(is.na(dim(COUNTS)[3]), 1, dim(COUNTS)[3])
notEmpty <- which(osuusTaulu > 0.005)
muutokset <- zeros(npops)
empties <- !notEmpty
for (i1 in notEmpty) {
osuusTaulu[i1] <- osuusTaulu[i1] - osuus
for (i2 in c(1:(i1 - 1), (i1 + 1):npops)) {
for (i2 in c(colon(1, i1 - 1), colon(i1 + 1, npops))) {
osuusTaulu[i2] <- osuusTaulu[i2] + osuus
loggis <- computeIndLogml(omaFreqs, osuusTaulu)
# Work around Matlab OOB bug
if (i1 > nrow(muutokset)) {
muutokset <- rbind(muutokset, muutokset * 0)
}
if (i2 > ncol(muutokset)) {
muutokset <- cbind(muutokset, muutokset * 0)
}
muutokset[i1, i2] <- loggis - logml
osuusTaulu[i2] <- osuusTaulu[i2] - osuus
}

View file

@ -4,6 +4,7 @@
#' @param c number of columns of output matrix
#' @return \eqn{r \times c} matrix with random trials from a standard uniform distribution.
#' @importFrom stats runif
#' @export
rand <- function(r = 1, c = 1) {
matrix(runif(r * c), r, c)
}

View file

@ -3,16 +3,26 @@
#' @details This function was created to replicate the behavior of a homonymous
#' function on Matlab
#' @param mx matrix
#' @param n either a scalat with the number of replications in both rows and columns or a 2-length vector with individual repetitions.
#' @param n either a scalar with the number of replications in both rows and columns or a 2-length vector with individual repetitions.
#' @return matrix replicated over `ncol(mx) * n` columns and `nrow(mx) * n` rows
#' @note The Matlab implementation of this function accepts `n` with length > 2.
#'
#' It should also be noted that a concatenated vector in R, e.g. `c(5, 2)`, becomes a column vector when coerced to matrix, even though it may look like a row vector at first glance. This is important to keep in mind when considering the expected output of this function. Vectors in R make sense to be seen as column vectors, given R's Statistics-oriented paradigm where variables are usually disposed as columns in a dataset.
#' @export
repmat <- function (mx, n) {
# Validation
if (length(n) > 2) warning("Extra dimensions of n ignored")
if (length(n) == 1) n <- rep(n, 2)
out <- mx_cols <- rep(mx, n[1])
if (n[2] > 1) {
for (i in seq(n[2] - 1)) out <- rbind(out, mx_cols)
if (class(mx) != "matrix") mx <- as.matrix(mx)
# Replicating cols
out <- mx_col <- matrix(rep(mx, n[2]), nrow(mx))
# Replicating rows
if (n[1] > 1) {
for (i in seq(n[1] - 1)) out <- rbind(out, mx_col)
}
# Output
return(unname(as.matrix(out)))
}

51
R/times.R Normal file
View file

@ -0,0 +1,51 @@
#' @title Element-wise matrix multiplication
#' @description Emulates the `times()` and `.*` operators from Matlab.
#' @details This function basically handles elements of different length better than the `*` operator in R, at least as far as behavior from a Matlab user is expecting.
#' @param a first factor of the multiplication
#' @param b second factor of the multiplication
#' @export
#' @returns matrix with dimensions equal to the larger of the two factors
times <- function(a, b) {
# Converting everything to matrix because Matlab looooooves the matrix
a <- as.matrix(a)
b <- as.matrix(b)
dominant_mx <- NULL
if (!all(dim(a) == dim(b))) {
if (all(dim(a) >= dim(b))) {
dominant_mx <- a
dominated_mx <- b
} else if (all(dim(b) >= dim(a))) {
dominant_mx <- b
dominated_mx <- a
} else {
dominant_mx <- "neither"
dominant_dim <- c(max(nrow(b), nrow(a)), max(ncol(b), ncol(a)))
}
}
if (is.null(dominant_mx)) {
out <- a * b
} else if (dominant_mx[1] == "neither") {
a <- repmat(
mx = a,
n = c(dominant_dim[1] - nrow(a) + 1, dominant_dim[2] - ncol(a) + 1)
)
b <- repmat(
mx = b,
n = c(dominant_dim[1] - nrow(b) + 1, dominant_dim[2] - ncol(b) + 1)
)
out <- a * b
} else {
# Expanding dominated matrix
dominated_mx <- repmat(
mx = dominated_mx,
n = c(
nrow(dominant_mx) - nrow(dominated_mx) + 1,
ncol(dominant_mx) - ncol(dominated_mx) + 1
)
)
out <- dominant_mx * dominated_mx
}
return(out)
}

30
TODO.md
View file

@ -16,23 +16,31 @@ Note to contributors: as tasks get finished, please update this file with an `x`
The list below contains non-essential but nice-to-have tasks for the next stable release.
- [ ] Uniformize capitalization of function names
- [ ] Uniformize capitalization of function arguments
- [ ] Standardize capitalization of function names
- [ ] Standardize capitalization of function arguments
- [ ] Implement optimizations from `fastbaps`
- [ ] Replace redundant functions (ex.: `randga`)
# Known pitfalls
The following behavioral differences have been detected between the Matlab functions and their R counterparts. In order to save time, these differences will not be addressed, since they could require extensive reworking of a function. However, such differences may very well cause unexpected problems in some situations, which is why compiling this list is so important. The list below might provide a good starting point for identifying and fixing bugs:
Function | Argument | Value | Matlab output | R output
---------|----------|-------|---------------|---------
`ownNum2Str` | `number` | `NaN` | `'NAN'` | error
`ownNum2Str` | `number` | `<vector>` | `'<vector elements>'` | `'<vector elements>'` + warning
`repmat` | `length(n)` | `> 2` | > 2D matrix | 2D matrix
`computeIndLogml` | only one of the arguments is negative | complex number | `NaN`
The following behavioral differences have been detected between the Matlab functions and their R counterparts. In order to save time, these differences will not be addressed, since they could require extensive reworking of a function. However, such differences may very well cause unexpected problems in some situations, which is why compiling this list is so important. The tables below might provide a good starting point for identifying and fixing bugs.
As general remarks, one should keep in mind that:
- For compliance with IEC 60559, the `round` in base R rounds .5 to the nearest even integer, whereas the homonym function in Matlab rounds up (or down, if negative).
- Some clobal variables have been added as a new (last) argument to the function they appear in.
- Some clobal variables have been added as a new (last) argument to the function they appear in.
- When a function is defined on Matlab with multiple outputs, as in `[y1, y2] = f(x)`, it will output only `y1` if called without assignment, as in `f(3)`, or if called with a one-length assignment, as in `a = f(3)`. In order to receive the full output, one must assign two variables to the left side of the assignment operator, as in `[a, b] = f(3)`. rBAPS Functions that might be affected by this behavior include `etsiParas`.
## Functional differences in rBAPS functions
| Function | Argument | Value | Matlab output | R output |
| ----------------- | ------------------ | -------------- | --------------------- | ------------------------------- |
| `ownNum2Str` | `number` | `NaN` | `'NAN'` | error |
| `ownNum2Str` | `number` | `<vector>` | `'<vector elements>'` | `'<vector elements>'` + warning |
| `repmat` | `length(n)` | `> 2` | > 2D matrix | 2D matrix |
## Functional differences in base Matlab functions
Function | Matlab output | R output
-------- | ------------- | --------
`max` | Can handle complex numbers | Cannot handle complex numbers

16
man/colon.Rd Normal file
View file

@ -0,0 +1,16 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/colon.R
\name{colon}
\alias{colon}
\title{Vector creation}
\usage{
colon(a, b)
}
\arguments{
\item{a}{initial number}
\item{b}{final number}
}
\description{
Simulates the function `colon()` and its equivalent `:` operator from Matlab, which have a similar but not quite equivalent behavior when compared to `seq()` and `:` in R.
}

View file

@ -7,9 +7,9 @@
computeIndLogml(omaFreqs, osuusTaulu)
}
\arguments{
\item{omaFreqs}{omaFreqs}
\item{omaFreqs}{own Freqs?}
\item{osuusTaulu}{osuusTaulu}
\item{osuusTaulu}{Percentage table?}
}
\description{
Palauttaa yksilön logml:n, kun oletetaan yksilön alkuperät

20
man/etsiParas.Rd Normal file
View file

@ -0,0 +1,20 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/etsiParas.R
\name{etsiParas}
\alias{etsiParas}
\title{Etsi Paras}
\usage{
etsiParas(osuus, osuusTaulu, omaFreqs, logml)
}
\arguments{
\item{osuus}{Percentages?}
\item{osuusTaulu}{Percentage table?}
\item{omaFreqs}{own Freqs?}
\item{logml}{log maximum likelihood}
}
\description{
Search for the best?
}

View file

@ -2,18 +2,18 @@
% Please edit documentation in R/laskeMuutokset4.R
\name{laskeMuutokset4}
\alias{laskeMuutokset4}
\title{laskeMuutokset4}
\title{Calculate changes?}
\usage{
laskeMuutokset4(osuus, osuusTaulu, omaFreqs, logml, COUNTS = matrix(0))
}
\arguments{
\item{osuus}{osuus}
\item{osuus}{Percentages?}
\item{osuusTaulu}{osuusTaulu}
\item{osuusTaulu}{Percentage table?}
\item{omaFreqs}{omaFreqs}
\item{omaFreqs}{own Freqs?}
\item{logml}{logml}
\item{logml}{log maximum likelihood}
\item{COUNTS}{COUNTS}
}

View file

@ -9,7 +9,7 @@ repmat(mx, n)
\arguments{
\item{mx}{matrix}
\item{n}{either a scalat with the number of replications in both rows and columns or a 2-length vector with individual repetitions.}
\item{n}{either a scalar with the number of replications in both rows and columns or a 2-length vector with individual repetitions.}
}
\value{
matrix replicated over `ncol(mx) * n` columns and `nrow(mx) * n` rows
@ -23,4 +23,6 @@ function on Matlab
}
\note{
The Matlab implementation of this function accepts `n` with length > 2.
It should also be noted that a concatenated vector in R, e.g. `c(5, 2)`, becomes a column vector when coerced to matrix, even though it may look like a row vector at first glance. This is important to keep in mind when considering the expected output of this function. Vectors in R make sense to be seen as column vectors, given R's Statistics-oriented paradigm where variables are usually disposed as columns in a dataset.
}

22
man/times.Rd Normal file
View file

@ -0,0 +1,22 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/times.R
\name{times}
\alias{times}
\title{Element-wise matrix multiplication}
\usage{
times(a, b)
}
\arguments{
\item{a}{first factor of the multiplication}
\item{b}{second factor of the multiplication}
}
\value{
matrix with dimensions equal to the larger of the two factors
}
\description{
Emulates the `times()` and `.*` operators from Matlab.
}
\details{
This function basically handles elements of different length better than the `*` operator in R, at least as far as behavior from a Matlab user is expecting.
}

View file

@ -1,6 +1,5 @@
context("Admixture analysis")
test_that("learn*partition behaves like on Matlab", {
# Test data
p1 <- c(0, .5, 1, 1.5)
@ -117,6 +116,21 @@ test_that("computeIndLogml works like on Matlab", {
expect_equivalent(computeIndLogml(-pi, -8), 3.2242, tol = .0001)
expect_equivalent(computeIndLogml(2:3, 2), 2.3026, tol = .0001)
expect_equivalent(computeIndLogml(matrix(8:5, 2), 100), 14.316, tol = .001)
expect_equivalent(
object = computeIndLogml(matrix(8:5, 2), matrix(c(1, 3), 1)),
expected = 6.4118,
tol = .001
)
expect_equivalent(
object = computeIndLogml(matrix(8:5, 1), matrix(c(1, 3), 1)),
expected = 12.9717,
tol = .001
)
expect_equivalent(
object = computeIndLogml(c(8, 1), c(-1.6, 5)),
expected = complex(real = 6.4739, imaginary = pi),
tol = .001
)
})
test_that("suoritaMuutos works like on Matlab", {
@ -131,4 +145,30 @@ test_that("suoritaMuutos works like on Matlab", {
expect_equal(suoritaMuutos(mx2, 0, 5), mx2)
expect_equal(suoritaMuutos(mx2, 0, 5), mx2)
expect_equal(suoritaMuutos(mx2, -3, 6), matrix(c(13, 9, 5, 8, 8, -10), 2))
})
test_that("laskeMuutokset4 works like on Matlab", {
# TODO: build these tests based on problems found in etsiParas
mx1 <- t(c(.4, 7))
expect_equivalent(
object = laskeMuutokset4(2, mx1, c(8, 2), 3),
expected = t(c(0, .3742)),
tol = .0001
)
})
test_that("etsiParas works like on Matlab", {
mx1 <- t(c(.4, 7))
expect_equal(etsiParas(2, mx1, c(8, 1), 8), c(.4, 7, 8))
expect_equivalent(etsiParas(2, mx1, c(8, 1), 1), c(-1.6, 9, 3.1864), .0001)
expect_equivalent(
object = etsiParas(5, mx1, c(8, 1), -pi),
expected = c(-4.6, 12, 3.8111),
tol = .001
)
expect_equivalent(
object = etsiParas(-.5, mx1, c(-1, 0), -10),
expected = c(7.4, 0, complex(real = 1.8563, imaginary = 3.1416)),
tol = .0001
)
})

View file

@ -0,0 +1,68 @@
context("Basic Matlab functions")
test_that("rand works properly", {
expect_equal(dim(rand()), c(1, 1))
expect_equal(dim(rand(1, 2)), c(1, 2))
expect_equal(dim(rand(3, 2)), c(3, 2))
})
test_that("repmat works properly", {
mx0 <- c(1:4) # when converted to matrix, results in a column vector
mx1 <- matrix(5:8)
mx2 <- matrix(0:-3, 2)
expect_error(repmat(mx0))
expect_equal(repmat(mx0, 1), as.matrix(mx0))
expect_equal(
object = repmat(mx0, 2),
expected = unname(t(cbind(rbind(mx0, mx0), rbind(mx0, mx0))))
)
expect_equal(
object = repmat(mx1, 2),
expected = unname(cbind(rbind(mx1, mx1), rbind(mx1, mx1)))
)
expect_equal(
object = repmat(mx2, c(2, 3)),
expected = cbind(rbind(mx2, mx2), rbind(mx2, mx2), rbind(mx2, mx2))
)
expect_equal(
object = repmat(mx2, c(4, 1)),
expected = rbind(mx2, mx2, mx2, mx2)
)
})
test_that("zeros and ones work as expected", {
expect_equal(zeros(1), matrix(0, 1))
expect_equal(zeros(2), matrix(0, 2, 2))
expect_equal(zeros(2, 1), matrix(0, 2, 1))
expect_equal(zeros(1, 10), matrix(0, 1, 10))
expect_equal(ones(8), matrix(1, 8, 8))
expect_equal(ones(5, 2), matrix(1, 5, 2))
expect_equal(ones(2, 100), matrix(1, 2, 100))
})
test_that("times works as expected", {
expect_equal(times(9, 6), as.matrix(54))
expect_equal(times(9, c(.8, 9)), as.matrix(c(7.2, 81)))
expect_equal(times(c(.8, 9), 5), as.matrix(c(4, 45)))
expect_equal(times(matrix(1:4, 2), 5), matrix(c(5, 10, 15, 20), 2))
expect_equal(times(5, matrix(1:4, 2)), matrix(c(5, 10, 15, 20), 2))
expect_equal(times(matrix(1:4, 2), c(10, 3)), matrix(c(10, 6, 30, 12), 2))
expect_equal(
object = times(matrix(1:4, 2), matrix(c(10, 3), 1)),
expected = matrix(c(10, 20, 9, 12), 2)
)
expect_equal(times(c(10, 3), matrix(1:4, 2)), matrix(c(10, 6, 30, 12), 2))
expect_equal(
object = times(matrix(c(10, -5, 3, 9), 2), matrix(1:4, 2)),
expected = matrix(c(10, -10, 9, 36), 2)
)
expect_equal(
object = times(matrix(c(-1.6, 5), 1), c(8, 1)),
expected = matrix(c(-12.8, -1.6, 40, 5), 2)
)
})
test_that("colon works as expected (hee hee)", {
expect_equal(colon(1, 4), 1:4)
expect_length(colon(4, 1), 0)
})