Merge branch 'issue-24' into develop

* issue-24:
  Updated NEWS.md (#24)
  Increment version number to 0.0.0.9030
  Added logml tests for #24
  Fixed calculation of distances
  Fixed calculation of logml
  Adjusted unit tests
  Fixed output
  Syntax fixes
This commit is contained in:
Waldir Leoncio 2024-09-27 13:26:49 +02:00
commit 8e51878f50
9 changed files with 37 additions and 27 deletions

View file

@ -1,6 +1,6 @@
Package: rBAPS Package: rBAPS
Title: Bayesian Analysis of Population Structure Title: Bayesian Analysis of Population Structure
Version: 0.0.0.9029 Version: 0.0.0.9030
Date: 2020-11-09 Date: 2020-11-09
Authors@R: Authors@R:
c( c(

View file

@ -1,5 +1,7 @@
# rBAPS (development version) # rBAPS (development version)
* Fixed `greedyMix()` for BAPS and Genepop files.
# rBAPS 0.0.0.9021 # rBAPS 0.0.0.9021
* Added a `NEWS.md` file to track changes to the package. * Added a `NEWS.md` file to track changes to the package.

View file

@ -80,7 +80,7 @@ greedyMix <- function(
# Generating partition summary =============================================== # Generating partition summary ===============================================
ekat <- seq(1L, ninds * c[["rowsFromInd"]], c[["rowsFromInd"]]) ekat <- seq(1L, ninds * c[["rowsFromInd"]], c[["rowsFromInd"]])
c[["rows"]] <- cbind(ekat, ekat + c[["rowsFromInd"]] - 1L) c[["rows"]] <- cbind(ekat, ekat + c[["rowsFromInd"]] - 1L)
logml_npops_partitionSummary <- indMixWrapper(c, npops, counts, sumcounts, max_iter, fixedK, verbose) # FIXME: not working for FASTA, GenePop logml_npops_partitionSummary <- indMixWrapper(c, npops, counts, sumcounts, max_iter, fixedK, verbose) # FIXME: not working for FASTA
logml <- logml_npops_partitionSummary[["logml"]] logml <- logml_npops_partitionSummary[["logml"]]
npops <- logml_npops_partitionSummary[["npops"]] npops <- logml_npops_partitionSummary[["npops"]]
partitionSummary <- logml_npops_partitionSummary[["partitionSummary"]] partitionSummary <- logml_npops_partitionSummary[["partitionSummary"]]

View file

@ -20,5 +20,5 @@ kldiv2str <- function(div, max_chars = 6L) {
mjono[3] <- "." mjono[3] <- "."
mjono[2] <- palautaYks(abs(div), suurinYks) mjono[2] <- palautaYks(abs(div), suurinYks)
} }
return(mjono) paste(mjono, collapse = "")
} }

View file

@ -2,8 +2,6 @@ laskeLoggis <- function(counts, sumcounts, adjprior) {
npops <- size(counts, 3) npops <- size(counts, 3)
replicated_adjprior <- array(adjprior, c(nrow(adjprior), ncol(adjprior), npops)) replicated_adjprior <- array(adjprior, c(nrow(adjprior), ncol(adjprior), npops))
sum1 <- sum(sum(sum(lgamma(counts + replicated_adjprior)))) sum1 <- sum(sum(sum(lgamma(counts + replicated_adjprior))))
sum3 <- sum(sum(lgamma(adjprior))) - sum(sum(lgamma(1 + sumcounts))) sum2 <- npops * sum(sum(lgamma(adjprior))) + sum(sum(lgamma(1 + sumcounts)))
logml2 <- sum1 - npops * sum3 sum1 - sum2
loggis <- logml2
return(loggis)
} }

View file

@ -423,7 +423,7 @@ greedyMix_muutokset <- R6Class(
globals$SUMCOUNTS[i1, ] <- globals$SUMCOUNTS[i1, ] + diffInSumCounts globals$SUMCOUNTS[i1, ] <- globals$SUMCOUNTS[i1, ] + diffInSumCounts
if (i1 < npops) { if (i1 < npops) {
i2 <- c(1:(i1 - 1), (i1 + 1):npops) i2 <- c(seq_len(i1 - 1), (i1 + 1):npops)
} else { } else {
i2 <- 1:(i1 - 1) i2 <- 1:(i1 - 1)
} }
@ -458,12 +458,12 @@ greedyMix_muutokset <- R6Class(
muutokset <- zeros(npops2, npops) muutokset <- zeros(npops2, npops)
i1_logml <- globals$POP_LOGML[i1] i1_logml <- globals$POP_LOGML[i1]
for (pop2 in 1:npops2) { for (pop2 in seq_len(npops2)) {
inds <- inds2[matlab2r::find(T2 == pop2)] inds <- inds2[matlab2r::find(T2 == pop2)]
ninds <- length(inds) ninds <- length(inds)
if (ninds > 0) { if (ninds > 0) {
rows <- list() rows <- list()
for (i in 1:ninds) { for (i in seq_len(ninds)) {
ind <- inds[i] ind <- inds[i]
lisa <- globalRows[ind, 1]:globalRows[ind, 2] lisa <- globalRows[ind, 1]:globalRows[ind, 2]
rows <- c(rows, t(lisa)) rows <- c(rows, t(lisa))
@ -480,7 +480,7 @@ greedyMix_muutokset <- R6Class(
globals$SUMCOUNTS[i1, ] <- globals$SUMCOUNTS[i1, ] + diffInSumCounts globals$SUMCOUNTS[i1, ] <- globals$SUMCOUNTS[i1, ] + diffInSumCounts
if (i1 < npops) { if (i1 < npops) {
i2 <- c(1:(i1 - 1), (i1 + 1):npops) i2 <- c(seq_len(i1 - 1), (i1 + 1):npops)
} else { } else {
i2 <- 1:(i1 - 1) i2 <- 1:(i1 - 1)
} }

View file

@ -11,7 +11,7 @@ laskeOsaDist <- function(inds2, dist, ninds) {
ninds2 <- length(inds2) ninds2 <- length(inds2)
apu <- zeros(choose(ninds2, 2), 2) apu <- zeros(choose(ninds2, 2), 2)
rivi <- 1 rivi <- 1
for (i in 1:(ninds2 - 1)) { for (i in seq_len(ninds2 - 1)) {
for (j in (i + 1):ninds2) { for (j in (i + 1):ninds2) {
apu[rivi, 1] <- inds2[i] apu[rivi, 1] <- inds2[i]
apu[rivi, 2] <- inds2[j] apu[rivi, 2] <- inds2[j]
@ -20,6 +20,6 @@ laskeOsaDist <- function(inds2, dist, ninds) {
} }
apu <- (apu[, 1] - 1) * ninds - apu[, 1] / 2 * apu <- (apu[, 1] - 1) * ninds - apu[, 1] / 2 *
(apu[, 1] - 1) + (apu[, 2] - apu[, 1]) (apu[, 1] - 1) + (apu[, 2] - apu[, 1])
dist2 <- dist(apu) dist2 <- dist[apu]
return(dist2) return(dist2)
} }

View file

@ -65,9 +65,9 @@ writeMixtureInfo <- function(
} }
cluster_count <- length(unique(globals$PARTITION)) cluster_count <- length(unique(globals$PARTITION))
if (verbose) cat("Best Partition: ") if (verbose) cat("\nBest Partition:\n")
if (fid != -1) { if (fid != -1) {
append(fid, c("Best Partition: ", "\n")) append(fid, c("\nBest Partition:", "\n"))
} }
for (m in 1:cluster_count) { for (m in 1:cluster_count) {
indsInM <- matlab2r::find(globals$PARTITION == m) indsInM <- matlab2r::find(globals$PARTITION == m)
@ -116,7 +116,7 @@ writeMixtureInfo <- function(
cat("\n") cat("\n")
cat( cat(
"Changes in log(marginal likelihood)", "Changes in log(marginal likelihood)",
" if indvidual i is moved to group j:\n" "if indvidual i is moved to group j:\n"
) )
} }
if (fid != -1) { if (fid != -1) {
@ -163,12 +163,12 @@ writeMixtureInfo <- function(
muutokset <- changesInLogml[, ind] muutokset <- changesInLogml[, ind]
if (names) { if (names) {
nimi <- as.character(popnames[ind]) nimi <- as.character(popnames[ind])
rivi <- c(blanks(maxSize - length(nimi)), nimi, ":\n") rivi <- c(blanks(maxSize - length(nimi)), nimi, ":")
} else { } else {
rivi <- c("\n", blanks(4 - floor(log10(ind))), ownNum2Str(ind), ":\n") rivi <- c("\n", blanks(4 - floor(log10(ind))), ownNum2Str(ind), ":")
} }
for (j in 1:npops) { for (j in 1:npops) {
rivi <- c(rivi, logml2String(omaRound(muutokset[j]), "")) rivi <- c(rivi, " ", logml2String(omaRound(muutokset[j])))
} }
if (verbose) cat(rivi, sep = "") if (verbose) cat(rivi, sep = "")
if (fid != -1) { if (fid != -1) {
@ -176,11 +176,11 @@ writeMixtureInfo <- function(
append(fid, "\n") append(fid, "\n")
} }
} }
if (verbose) cat("\n\nKL-divergence matrix in PHYLIP format: ") if (verbose) cat("\n\nKL-divergence matrix in PHYLIP format:\n")
dist_mat <- zeros(npops, npops) dist_mat <- zeros(npops, npops)
if (fid != -1) { if (fid != -1) {
append(fid, " KL-divergence matrix in PHYLIP format: ") append(fid, " KL-divergence matrix in PHYLIP format:\n")
} }
globals$COUNTS <- globals$COUNTS[seq_len(nrow(adjprior)), seq_len(ncol(adjprior)), , drop = FALSE] globals$COUNTS <- globals$COUNTS[seq_len(nrow(adjprior)), seq_len(ncol(adjprior)), , drop = FALSE]
@ -196,7 +196,10 @@ writeMixtureInfo <- function(
prior[1, nollia] <- 1 prior[1, nollia] <- 1
for (pop1 in 1:npops) { for (pop1 in 1:npops) {
squeezed_COUNTS_prior <- squeeze(globals$COUNTS[, , pop1]) + prior squeezed_COUNTS_prior <- squeeze(globals$COUNTS[, , pop1]) + prior
d[, , pop1] <- squeezed_COUNTS_prior / sum(squeezed_COUNTS_prior) repmat_squeezed_COUNTS_prior <- matlab2r::repmat(
colSums(squeezed_COUNTS_prior), c(maxnoalle, 1L)
)
d[, , pop1] <- squeezed_COUNTS_prior / repmat_squeezed_COUNTS_prior
} }
ekarivi <- as.character(npops) ekarivi <- as.character(npops)
if (verbose) cat(ekarivi) if (verbose) cat(ekarivi)
@ -223,9 +226,9 @@ writeMixtureInfo <- function(
dist_mat <- dist_mat + t(dist_mat) # make it symmetric dist_mat <- dist_mat + t(dist_mat) # make it symmetric
for (pop1 in 1:npops) { for (pop1 in 1:npops) {
rivi <- c("\nCluster_", as.character(pop1), "\n") rivi <- c("\nCluster_", as.character(pop1), " ")
for (pop2 in 1:npops) { for (pop2 in 1:npops) {
rivi <- c(rivi, kldiv2str(dist_mat[pop1, pop2], 4L)) rivi <- c(rivi, kldiv2str(dist_mat[pop1, pop2]))
} }
if (verbose) cat(rivi, sep = "") if (verbose) cat(rivi, sep = "")
if (fid != -1) { if (fid != -1) {

View file

@ -65,13 +65,20 @@ test_that("greedyMix() fails successfully", {
}) })
test_that("greedyMix() works when it should", { test_that("greedyMix() works when it should", {
tol <- 1e-4
baps_file <- file.path(path_inst, "BAPS_clustering_diploid.txt") baps_file <- file.path(path_inst, "BAPS_clustering_diploid.txt")
genepop_file <- file.path(path_inst, "GenePop.txt")
fasta_file <- file.path(path_inst, "FASTA_clustering_haploid.fasta")
greedy_baps <- greedyMix(baps_file, "BAPS") greedy_baps <- greedyMix(baps_file, "BAPS")
expect_error(greedy_genepop <- greedyMix(genepop_file, "GenePop")) # TEMP: fails
expect_type(greedy_baps, "list") expect_type(greedy_baps, "list")
expect_length(greedy_baps, 10L) expect_length(greedy_baps, 10L)
expect_equal(greedy_baps[['logml']], -78.10151, tolerance = tol)
genepop_file <- file.path(path_inst, "GenePop.txt")
greedy_genepop <- greedyMix(genepop_file, "GenePop")
expect_type(greedy_genepop, "list")
expect_length(greedy_genepop, 10L)
expect_equal(greedy_genepop[['logml']], -10.76224, tolerance = tol)
fasta_file <- file.path(path_inst, "FASTA_clustering_haploid.fasta")
}) })
context("Linkage") context("Linkage")