Fixed several linting issues

This commit is contained in:
Waldir Leoncio 2022-07-28 15:47:36 +02:00
parent 66d0f0c730
commit 835ac7e6b9
28 changed files with 180 additions and 175 deletions

View file

@ -21,8 +21,6 @@ admix1 <- function(tietue) {
cat("---------------------------------------------------\n") cat("---------------------------------------------------\n")
message("Reading mixture result from: ", pathname_filename, "...") message("Reading mixture result from: ", pathname_filename, "...")
} }
Sys.sleep(0.0001) # TODO: remove
# ASK: what is this supposed to do? What do graphic obj have to do here? # ASK: what is this supposed to do? What do graphic obj have to do here?
# h0 = findobj('Tag','filename1_text'); # h0 = findobj('Tag','filename1_text');
# set(h0,'String',filename); clear h0; # set(h0,'String',filename); clear h0;
@ -42,12 +40,10 @@ admix1 <- function(tietue) {
if (isfield(c, "gene_lengths") & if (isfield(c, "gene_lengths") &
strcmp(c$mixtureType, "linear_mix") | strcmp(c$mixtureType, "linear_mix") |
strcmp(c$mixtureType, "codon_mix")) { # if the mixture is from a linkage model strcmp(c$mixtureType, "codon_mix")) {
# Redirect the call to the linkage admixture function. # if the mixture is from a linkage model Redirect the call to the linkage
# call function noindex to remove the index column # admixture function. call function noindex to remove the index column
c$data <- noIndex(c$data, c$noalle) c$data <- noIndex(c$data, c$noalle)
# linkage_admix(c) # TODO: obsolete. remove.
# return
stop("linkage_admix not implemented") stop("linkage_admix not implemented")
} }
PARTITION <- c$PARTITION PARTITION <- c$PARTITION
@ -148,7 +144,8 @@ admix1 <- function(tietue) {
} }
} }
# Analyze further only individuals who have log-likelihood ratio larger than 3: # Analyze further only individuals who have log-likelihood ratio larger than
# 3
to_investigate <- t(matlab2r::find(likelihood > 3)) to_investigate <- t(matlab2r::find(likelihood > 3))
cat("Possibly admixed individuals:\n") cat("Possibly admixed individuals:\n")
for (i in 1:length(to_investigate)) { for (i in 1:length(to_investigate)) {
@ -229,9 +226,15 @@ admix1 <- function(tietue) {
# Initialize the data structures, which are required in taking the missing # Initialize the data structures, which are required in taking the missing
# data into account: # data into account:
n_missing_levels <- zeros(npops, 1) # number of different levels of "missingness" in each pop (max 3).
missing_levels <- zeros(npops, 3) # the mean values for different levels. # number of different levels of "missingness" in each pop (max 3).
missing_level_partition <- zeros(ninds, 1) # level of each individual (one of the levels of its population). n_missing_levels <- zeros(npops, 1)
# the mean values for different levels.
missing_levels <- zeros(npops, 3)
# level of each individual (one of the levels of its population).
missing_level_partition <- zeros(ninds, 1)
for (i in 1:npops) { for (i in 1:npops) {
inds <- matlab2r::find(PARTITION == i) inds <- matlab2r::find(PARTITION == i)
# Proportions of non-missing data for the individuals: # Proportions of non-missing data for the individuals:
@ -239,7 +242,9 @@ admix1 <- function(tietue) {
for (j in 1:length(inds)) { for (j in 1:length(inds)) {
ind <- inds[j] ind <- inds[j]
non_missing_data[j] <- length( non_missing_data[j] <- length(
matlab2r::find(data[(ind - 1) * rowsFromInd + 1:ind * rowsFromInd, ] > 0) matlab2r::find(
data[(ind - 1) * rowsFromInd + 1:ind * rowsFromInd, ] > 0
)
) / (rowsFromInd * nloci) ) / (rowsFromInd * nloci)
} }
if (all(non_missing_data > 0.9)) { if (all(non_missing_data > 0.9)) {
@ -247,8 +252,6 @@ admix1 <- function(tietue) {
missing_levels[i, 1] <- mean(non_missing_data) missing_levels[i, 1] <- mean(non_missing_data)
missing_level_partition[inds] <- 1 missing_level_partition[inds] <- 1
} else { } else {
# TODO: fix syntax
# [ordered, ordering] = sort(non_missing_data);
ordered <- ordering <- sort(non_missing_data) ordered <- ordering <- sort(non_missing_data)
# part = learn_simple_partition(ordered, 0.05); # part = learn_simple_partition(ordered, 0.05);
part <- learn_partition_modified(ordered) part <- learn_partition_modified(ordered)
@ -258,7 +261,9 @@ admix1 <- function(tietue) {
n_levels <- length(unique(part)) n_levels <- length(unique(part))
n_missing_levels[i] <- n_levels n_missing_levels[i] <- n_levels
for (j in 1:n_levels) { for (j in 1:n_levels) {
missing_levels[i, j] <- mean(non_missing_data[matlab2r::find(part == j)]) missing_levels[i, j] <- mean(
non_missing_data[matlab2r::find(part == j)]
)
} }
} }
} }
@ -369,8 +374,8 @@ admix1 <- function(tietue) {
} }
} }
tulostaAdmixtureTiedot(proportionsIt, uskottavuus, alaRaja, iterationCount) # TODO: textual outputs. probably not necessary. translate nonetheless tulostaAdmixtureTiedot(proportionsIt, uskottavuus, alaRaja, iterationCount)
viewPartition(proportionsIt, popnames) # TODO: adapt viewPartition(proportionsIt, popnames)
talle <- inputdlg("Do you want to save the admixture results? [Y/n]", "y") talle <- inputdlg("Do you want to save the admixture results? [Y/n]", "y")
if (talle %in% c("y", "Y", "yes", "Yes")) { if (talle %in% c("y", "Y", "yes", "Yes")) {

View file

@ -17,7 +17,8 @@ cluster_own <- function(Z, nclust) {
if (i <= m) { # original node, no leafs if (i <= m) { # original node, no leafs
T[i] <- clsnum T[i] <- clsnum
clsnum <- clsnum + 1 clsnum <- clsnum + 1
} else if (i < (2 * m - maxclust + 1)) { # created before cutoff, search down the tree } else if (i < (2 * m - maxclust + 1)) {
# created before cutoff, search down the tree
T <- clusternum(Z, T, i - m, clsnum) T <- clusternum(Z, T, i - m, clsnum)
clsnum <- clsnum + 1 clsnum <- clsnum + 1
} }
@ -25,7 +26,8 @@ cluster_own <- function(Z, nclust) {
if (i <= m) { # original node, no leafs if (i <= m) { # original node, no leafs
T[i] <- clsnum T[i] <- clsnum
clsnum <- clsnum + 1 clsnum <- clsnum + 1
} else if (i < (2 * m - maxclust + 1)) { # created before cutoff, search down the tree } else if (i < (2 * m - maxclust + 1)) {
# created before cutoff, search down the tree
T <- clusternum(Z, T, i - m, clsnum) T <- clusternum(Z, T, i - m, clsnum)
clsnum <- clsnum + 1 clsnum <- clsnum + 1
} }

View file

@ -4,7 +4,9 @@ computePopulationLogml <- function(pops, adjprior, priorTerm) {
# ======================================================== # # ======================================================== #
# Limiting COUNTS size # # Limiting COUNTS size #
# ======================================================== # # ======================================================== #
COUNTS <- COUNTS[seq_len(nrow(adjprior)), seq_len(ncol(adjprior)), pops, drop = FALSE] COUNTS <- COUNTS[
seq_len(nrow(adjprior)), seq_len(ncol(adjprior)), pops, drop = FALSE
]
x <- size(COUNTS, 1) x <- size(COUNTS, 1)
y <- size(COUNTS, 2) y <- size(COUNTS, 2)
@ -14,14 +16,16 @@ computePopulationLogml <- function(pops, adjprior, priorTerm) {
# Computation # # Computation #
# ======================================================== # # ======================================================== #
isarray <- length(dim(repmat(adjprior, c(1, 1, length(pops))))) > 2 isarray <- length(dim(repmat(adjprior, c(1, 1, length(pops))))) > 2
# FIXME: 3rd dimension of COUNTS getting dropped
term1 <- squeeze( term1 <- squeeze(
sum( sum(
sum( sum(
reshape( reshape(
lgamma( lgamma(
repmat(adjprior, c(1, 1, length(pops))) + repmat(adjprior, c(1, 1, length(pops))) +
COUNTS[seq_len(nrow(adjprior)), seq_len(ncol(adjprior)), pops, drop = !isarray] COUNTS[
seq_len(nrow(adjprior)), seq_len(ncol(adjprior)), pops,
drop = !isarray
]
), ),
c(x, y, z) c(x, y, z)
), ),

View file

@ -11,7 +11,7 @@ etsiParas <- function(osuus, osuusTaulu, omaFreqs, logml) {
muutokset <- laskeMuutokset4(osuus, osuusTaulu, omaFreqs, logml) muutokset <- laskeMuutokset4(osuus, osuusTaulu, omaFreqs, logml)
# Work around R's base::max() limitation on complex numbers # Work around R's base::max() limitation on complex numbers
if (any(sapply(muutokset, class) == "complex")) { if (any(vapply(muutokset, class, vector("character", 1)) == "complex")) {
maxRe <- base::max(Re(as.vector(muutokset))) maxRe <- base::max(Re(as.vector(muutokset)))
maxIm <- base::max(Im(as.vector(muutokset))) maxIm <- base::max(Im(as.vector(muutokset)))
maxMuutos <- complex(real = maxRe, imaginary = maxIm) maxMuutos <- complex(real = maxRe, imaginary = maxIm)

View file

@ -1,7 +1,9 @@
#' @title Read line from file, removing newline characters #' @title Read line from file, removing newline characters
#' @description Equivalent function to its homonymous Matlab equivalent. #' @description Equivalent function to its homonymous Matlab equivalent.
#' @param file character vector to be read, usually an output of `fopen()` #' @param file character vector to be read, usually an output of `fopen()`
#' @return If the file is nonempty, then fgetl returns tline as a character vector. If the file is empty and contains only the end-of-file marker, then fgetl returns tline as a numeric value -1. #' @return If the file is nonempty, then fgetl returns tline as a character
#' vector. If the file is empty and contains only the end-of-file marker, then
#' fgetl returns tline as a numeric value -1.
#' @author Waldir Leoncio #' @author Waldir Leoncio
#' @seealso fopen #' @seealso fopen
#' @export #' @export

View file

@ -13,5 +13,6 @@ fiksaaPartitioYksiloTasolle <- function(rows, rowsFromInd) {
partitio2[rivi / rowsFromInd] <- PARTITION[ind] partitio2[rivi / rowsFromInd] <- PARTITION[ind]
} }
} }
PARTITION <<- partitio2 global_env <- as.environment(1L)
assign("PARTITION", partitio2, envir = global_env)
} }

View file

@ -1,17 +1,17 @@
findOutRowsFromInd <- function(popnames, rows, ploidisuus = NULL) { findOutRowsFromInd <- function(popnames, rows, ploidisuus = NULL) {
if (is.null(ploidisuus)) { if (is.null(ploidisuus)) {
ploidisuus <- questdlg( ploidisuus <- questdlg(
quest = 'Specify the type of individuals in the data', quest = "Specify the type of individuals in the data",
dlgtitle = 'Individual type?', dlgtitle = "Individual type?",
btn = c('Haploid', 'Diploid', 'Tetraploid'), btn = c("Haploid", "Diploid", "Tetraploid"),
defbtn = 'Diploid' defbtn = "Diploid"
) )
} }
rowsFromInd <- switch(ploidisuus, rowsFromInd <- switch(ploidisuus,
'Haploid' = 1, "Haploid" = 1,
'Diploid' = 2, "Diploid" = 2,
'Tetraploid' = 4 "Tetraploid" = 4
) )
popnames2 <- popnames * NA popnames2 <- popnames * NA
@ -22,5 +22,5 @@ findOutRowsFromInd <- function(popnames, rows, ploidisuus = NULL) {
popnames2[i, 2] <- rivi[rowsFromInd] / rowsFromInd popnames2[i, 2] <- rivi[rowsFromInd] / rowsFromInd
} }
} }
return(list(popnames2 = popnames2, rowsFromInd = rowsFromInd)) return(list(popnames2 = popnames2, rowsFromInd = rowsFromInd))
} }

View file

@ -1,11 +1,12 @@
getDistances <- function(data_matrix, nclusters) { getDistances <- function(data_matrix, nclusters) {
# %finds initial admixture clustering solution with nclusters clusters, uses simple mean Hamming distance # finds initial admixture clustering solution with nclusters clusters, uses
# %gives partition in 8 - bit format # simple mean Hamming distance; gives partition in 8 - bit format; allocates
# %allocates all alleles of a single individual into the same basket # all alleles of a single individual into the same basket; data_matrix
# %data_matrix contains #Loci + 1 columns, last column indicate whose alleles are placed in each row, # contains #Loci + 1 columns, last column indicate whose alleles are placed in
# %i.e. ranges from 1 to #individuals. For diploids there are 2 rows per individual, for haploids only a single row # each row, i.e. ranges from 1 to #individuals. For diploids there are 2 rows
# %missing values are indicated by zeros in the partition and by negative integers in the data_matrix. # per individual, for haploids only a single row; missing values are indicated
# by zeros in the partition and by negative integers in the data_matrix.
size_data <- size(data_matrix) size_data <- size(data_matrix)
nloci <- size_data[2] - 1 nloci <- size_data[2] - 1

View file

@ -10,18 +10,25 @@ getPopDistancesByKL <- function(adjprior) {
d <- zeros(maxnoalle, nloci, npops) d <- zeros(maxnoalle, nloci, npops)
prior <- adjprior prior <- adjprior
prior[find(prior == 1)] <- 0 prior[find(prior == 1)] <- 0
nollia <- find(all(prior == 0)) # Lokukset, joissa oli havaittu vain yht?alleelia.
# Lokukset, joissa oli havaittu vain yht?alleelia.
nollia <- find(all(prior == 0))
prior[1, nollia] <- 1 prior[1, nollia] <- 1
for (pop1 in 1:npops) { for (pop1 in 1:npops) {
d[, , pop1] <- (squeeze(COUNTS[, , pop1]) + prior) / repmat(sum(squeeze(COUNTS[, , pop1]) + prior), c(maxnoalle, ncol(prior))) d[, , pop1] <- (squeeze(COUNTS[, , pop1]) + prior) / repmat(
sum(squeeze(COUNTS[, , pop1]) + prior), c(maxnoalle, ncol(prior))
)
} }
pointer <- 1 pointer <- 1
for (pop1 in 1:(npops - 1)) { for (pop1 in 1:(npops - 1)) {
for (pop2 in (pop1 + 1):npops) { for (pop2 in (pop1 + 1):npops) {
dist1 <- d[, , pop1] dist1 <- d[, , pop1]
dist2 <- d[, , pop2] dist2 <- d[, , pop2]
div12 <- sum(sum(dist1 * log2((dist1 + 10^-10) / (dist2 + 10^-10)))) / nloci div12 <- sum(sum(dist1 * log2((dist1 + 10^-10) / (dist2 + 10^-10)))) /
div21 <- sum(sum(dist2 * log2((dist2 + 10^-10) / (dist1 + 10^-10)))) / nloci nloci
div21 <- sum(sum(dist2 * log2((dist2 + 10^-10) / (dist1 + 10^-10)))) /
nloci
div <- (div12 + div21) / 2 div <- (div12 + div21) / 2
distances[pointer] <- div distances[pointer] <- div
pointer <- pointer + 1 pointer <- pointer + 1

View file

@ -46,5 +46,4 @@ greedyMix <- function(data, format, verbose = TRUE) {
stop("Format not supported.") stop("Format not supported.")
} }
return(out) return(out)
# TODO: add handleData(out) or some other post-processing of data
} }

View file

@ -12,7 +12,8 @@
#' @references Samtools: a suite of programs for interacting #' @references Samtools: a suite of programs for interacting
#' with high-throughput sequencing data. <http://www.htslib.org/> #' with high-throughput sequencing data. <http://www.htslib.org/>
#' @export #' @export
greedyPopMix <- function(data, format, partitionCompare = NULL, verbose = TRUE) { greedyPopMix <- function(data, format, partitionCompare = NULL, verbose = TRUE
) {
# Replacing original file reading code with greedyMix() # Replacing original file reading code with greedyMix()
rawdata <- greedyMix(data, format, verbose) rawdata <- greedyMix(data, format, verbose)
@ -26,12 +27,12 @@ greedyPopMix <- function(data, format, partitionCompare = NULL, verbose = TRUE)
priorTerm <- data_greedyMix_handle$priorTerm priorTerm <- data_greedyMix_handle$priorTerm
rm(data_greedyMix_handle) rm(data_greedyMix_handle)
Z_dist <- getPopDistancesByKL(adjprior) Z_dist <- getPopDistancesByKL(adjprior)
Z_dist$Z -> Z Z <- Z_dist$Z
Z_dist$dist -> dist dist <- Z_dist$dist
rm(Z_dist) rm(Z_dist)
a_data <- data[, 1:(ncol(data) - 1)] a_data <- data[, 1:(ncol(data) - 1)]
sumcounts_counts_logml <- initialPopCounts(a_data, npops, rows, noalle, adjprior) sumcounts_counts_logml <- initialPopCounts(a_data, npops, rows, noalle, adjprior)
sumcounts_counts_logml$logml -> logml logml <- sumcounts_counts_logml$logml
rm(sumcounts_counts_logml) rm(sumcounts_counts_logml)
c <- list() c <- list()
c$data <- data c$data <- data
@ -76,17 +77,17 @@ greedyPopMix <- function(data, format, partitionCompare = NULL, verbose = TRUE)
NULL, NULL, partitionSummary, popnames, fixedK = FALSE NULL, NULL, partitionSummary, popnames, fixedK = FALSE
) )
talle <- questdlg( talle <- questdlg(
'Do you want to save the mixture populations so that you can use them later in admixture analysis?', "Do you want to save the mixture populations so that you can use them later in admixture analysis?",
'Save results?', c('Yes', 'No'), 'Yes' "Save results?", c("Yes", "No"), "Yes"
) )
if (tolower(talle) == 'yes') { if (tolower(talle) == "yes") {
waitALittle() waitALittle()
filename_pathname <- uiputfile() filename_pathname <- uiputfile()
if (rowsFromInd == 0) { if (rowsFromInd == 0) {
# BAPS format was used, rowsFromInd is not known. # BAPS format was used, rowsFromInd is not known.
popnames_rowsFromInd <- findOutRowsFromInd(popnames, rows) popnames_rowsFromInd <- findOutRowsFromInd(popnames, rows)
popnames_rowsFromInd$popnames -> popnames popnames <- popnames_rowsFromInd$popnames
popnames_rowsFromInd$rows -> rows rows <- popnames_rowsFromInd$rows
rm(popnames_rowsFromInd) rm(popnames_rowsFromInd)
} }
groupPartition <- PARTITION groupPartition <- PARTITION
@ -101,7 +102,7 @@ greedyPopMix <- function(data, format, partitionCompare = NULL, verbose = TRUE)
c$data <- data c$data <- data
c$npops <- npops c$npops <- npops
c$noalle <- noalle c$noalle <- noalle
c$mixtureType = 'popMix' c$mixtureType <- "popMix"
c$groupPartition <- groupPartition c$groupPartition <- groupPartition
c$rows <- rows c$rows <- rows
c$logml <- logml c$logml <- logml

View file

@ -18,54 +18,57 @@ handlePopData <- function(raw_data) {
dataApu <- data[, 1:nloci] dataApu <- data[, 1:nloci]
nollat <- find(dataApu == 0) nollat <- find(dataApu == 0)
if (length(nollat) > 0) { if (length(nollat) > 0) {
isoinAlleeli <- max(max(dataApu)$maxs)$maxs isoinAlleeli <- max(max(dataApu)$maxs)$maxs
dataApu[nollat] <- isoinAlleeli + 1 dataApu[nollat] <- isoinAlleeli + 1
data[, 1:nloci] <- dataApu data[, 1:nloci] <- dataApu
} }
noalle <- zeros(1, nloci) noalle <- zeros(1, nloci)
alleelitLokuksessa <- cell(nloci, 1, expandable = TRUE) alleelitLokuksessa <- cell(nloci, 1, expandable = TRUE)
for (i in 1:nloci) { for (i in 1:nloci) {
alleelitLokuksessaI <- sort(unique(data[, i])) alleelitLokuksessaI <- sort(unique(data[, i]))
alleelitLokuksessa[[i]] <- alleelitLokuksessaI[find(alleelitLokuksessaI >= 0)] alleelitLokuksessa[[i]] <- alleelitLokuksessaI[find(alleelitLokuksessaI >= 0)]
noalle[i] <- length(alleelitLokuksessa[[i]]) noalle[i] <- length(alleelitLokuksessa[[i]])
} }
alleleCodes <- zeros(unique(max(noalle)$maxs), nloci) alleleCodes <- zeros(unique(max(noalle)$maxs), nloci)
for (i in 1:nloci) { for (i in 1:nloci) {
alleelitLokuksessaI <- alleelitLokuksessa[[i]] alleelitLokuksessaI <- alleelitLokuksessa[[i]]
puuttuvia <- unique(max(noalle)$maxs) - length(alleelitLokuksessaI) puuttuvia <- unique(max(noalle)$maxs) - length(alleelitLokuksessaI)
alleleCodes[, i] = c(alleelitLokuksessaI, zeros(puuttuvia, 1)) alleleCodes[, i] <- c(alleelitLokuksessaI, zeros(puuttuvia, 1))
} }
for (loc in 1:nloci) { for (loc in 1:nloci) {
for (all in 1:noalle[loc]) { for (all in 1:noalle[loc]) {
data[find(data[ , loc] == alleleCodes[all, loc]), loc] <- all data[find(data[, loc] == alleleCodes[all, loc]), loc] <- all
} }
} }
nind <- max(data[, ncol(data)])$maxs nind <- max(data[, ncol(data)])$maxs
rows <- zeros(nind, 2) rows <- zeros(nind, 2)
for (i in 1:nind) { for (i in 1:nind) {
rivit <- t(find(data[, ncol(data)] == i)) rivit <- t(find(data[, ncol(data)] == i))
rows[i, 1] <- min(rivit)$mins rows[i, 1] <- min(rivit)$mins
rows[i, 2] <- max(rivit)$maxs rows[i, 2] <- max(rivit)$maxs
} }
newData <- data newData <- data
adjprior <- zeros(unique(max(noalle)$maxs), nloci) adjprior <- zeros(unique(max(noalle)$maxs), nloci)
priorTerm <- 0 priorTerm <- 0
for (j in 1:nloci) { for (j in 1:nloci) {
adjprior[, j] <- c(repmat(1 / noalle[j], c(noalle[j], 1)), ones(unique(max(noalle)$maxs) - noalle[j], 1)) adjprior[, j] <- c(
priorTerm <- priorTerm + noalle[j] * log(gamma(1 / noalle[j])) repmat(1 / noalle[j], c(noalle[j], 1)),
} ones(unique(max(noalle)$maxs) - noalle[j], 1)
return(
list(
newData = newData,
rows = rows,
alleleCodes = alleleCodes,
noalle = noalle,
adjprior = adjprior,
priorTerm = priorTerm
)
) )
priorTerm <- priorTerm + noalle[j] * log(gamma(1 / noalle[j]))
}
return(
list(
newData = newData,
rows = rows,
alleleCodes = alleleCodes,
noalle = noalle,
adjprior = adjprior,
priorTerm = priorTerm
)
)
} }

View file

@ -135,7 +135,8 @@ laskeMuutokset2 <- function(i1, globalRows, data, adjprior, priorTerm) {
} }
laskeMuutokset3 <- function(T2, inds2, globalRows, data, adjprior, priorTerm, i1) { laskeMuutokset3 <- function(T2, inds2, globalRows, data, adjprior, priorTerm, i1
) {
# Palauttaa length(unique(T2))*npops taulun, jossa (i,j):s alkio # Palauttaa length(unique(T2))*npops taulun, jossa (i,j):s alkio
# kertoo, mik<69> olisi muutos logml:ss<73>, jos populaation i1 osapopulaatio # kertoo, mik<69> olisi muutos logml:ss<73>, jos populaation i1 osapopulaatio
# inds2(matlab2r::find(T2==i)) siirret<65><74>n koriin j. # inds2(matlab2r::find(T2==i)) siirret<65><74>n koriin j.

View file

@ -31,7 +31,7 @@ lueGenePopDataPop <- function(tiedostonNimi) {
nimienLkm <- 0 nimienLkm <- 0
ninds <- 0 ninds <- 0
poimiNimi <- 1 poimiNimi <- 1
digitFormat = -1 digitFormat <- -1
while (lokusRiveja < length(fid) - 2) { while (lokusRiveja < length(fid) - 2) {
lokusRiveja <- lokusRiveja + 1 # Keeps the loop moving along lokusRiveja <- lokusRiveja + 1 # Keeps the loop moving along
line <- fid[lokusRiveja + 2] line <- fid[lokusRiveja + 2]

View file

@ -1,9 +1,12 @@
#' @title Generates random number from a Gamma distribution #' @title Generates random number from a Gamma distribution
#' @description Generates one random number from shape parameter a and rate parameter b #' @description Generates one random number from shape parameter a and rate
#' parameter b
#' @param a shape #' @param a shape
#' @param b rate #' @param b rate
#' @return One realization of Gamma(a, b) #' @return One realization of Gamma(a, b)
#' @details The generated random variable has mean a / b. It will be positively-skewed for small values, but converges to a symmetric distribution for very large numbers of a and b. #' @details The generated random variable has mean a / b. It will be
#' positively-skewed for small values, but converges to a symmetric distribution
#' for very large numbers of a and b.
randga <- function(a, b) { randga <- function(a, b) {
flag <- 0 flag <- 0
if (a > 1) { if (a > 1) {

View file

@ -1,6 +1,7 @@
#' @title simuloiAlleeli #' @title simuloiAlleeli
#' @description Simuloi populaation pop lokukseen loc alleelin. #' @description Simuloi populaation pop lokukseen loc alleelin.
#' @note This function is (only?) called by `simulateIndividuals()`. Therefore, exporting it is probably unnecessary. #' @note This function is (only?) called by `simulateIndividuals()`. Therefore,
#' exporting it is probably unnecessary.
#' @param allfreqs allfreqa #' @param allfreqs allfreqa
#' @param pop pop #' @param pop pop
#' @param loc loc #' @param loc loc

View file

@ -1,7 +1,9 @@
#' @title Tests GenePop data #' @title Tests GenePop data
#' @param tiedostonNimi Filename #' @param tiedostonNimi Filename
#' @return kunnossa (binary "ok" condition value) == 0 if the data is not valid genePop data. Otherwise, kunnossa == 1. #' @return kunnossa (binary "ok" condition value) == 0 if the data is not valid
#' @details GenePop data are textfiles that follow the GenePop format. This function checks if such file is properly formatted as GenePop. #' genePop data. Otherwise, kunnossa == 1.
#' @details GenePop data are textfiles that follow the GenePop format. This
#' function checks if such file is properly formatted as GenePop.
testaaGenePopData <- function(tiedostonNimi) { testaaGenePopData <- function(tiedostonNimi) {
# kunnossa == 0, jos data ei ole kelvollinen genePop data. # kunnossa == 0, jos data ei ole kelvollinen genePop data.
# Muussa tapauksessa kunnossa == 1. # Muussa tapauksessa kunnossa == 1.
@ -36,7 +38,10 @@ testaaGenePopData <- function(tiedostonNimi) {
# Tiedet<65><74>n, ett?pys<79>htyy # Tiedet<65><74>n, ett?pys<79>htyy
pointer <- pointer + 1 pointer <- pointer + 1
} }
line4 <- substring(line4, pointer + 1) # pilkun j<>lkeinen osa (the part after the comma)
# pilkun j<>lkeinen osa (the part after the comma)
line4 <- substring(line4, pointer + 1)
nloci2 <- rivinSisaltamienMjonojenLkm(line4) nloci2 <- rivinSisaltamienMjonojenLkm(line4)
if (nloci2 != nloci) stop("Incorrect file format 1195") if (nloci2 != nloci) stop("Incorrect file format 1195")
} else { } else {
@ -59,7 +64,10 @@ testaaGenePopData <- function(tiedostonNimi) {
# Tiedet<65><74>n, ett?pys<79>htyy # Tiedet<65><74>n, ett?pys<79>htyy
pointer <- pointer + 1 pointer <- pointer + 1
} }
line4 <- substring(line4, pointer + 1) # pilkun j<>lkeinen osa (the part after the comma)
# pilkun j<>lkeinen osa (the part after the comma)
line4 <- substring(line4, pointer + 1)
nloci2 <- rivinSisaltamienMjonojenLkm(line4) nloci2 <- rivinSisaltamienMjonojenLkm(line4)
if (nloci2 != nloci) stop("Incorrect file format 1228") if (nloci2 != nloci) stop("Incorrect file format 1228")
} }

View file

@ -1,60 +1,3 @@
tulostaAdmixtureTiedot <- function(proportions, uskottavuus, alaRaja, niter) { tulostaAdmixtureTiedot <- function(proportions, uskottavuus, alaRaja, niter) {
# ASK: what does this function does. Plotting? Get examples? warning("tulostaAdmixtureTiedot() not implemented" )
# h0 <- findobj('Tag','filename1_text')
# inputf = get(h0,'String');
# h0 = findobj('Tag','filename2_text');
# outf = get(h0,'String'); clear h0;
# if length(outf)>0
# fid = fopen(outf,'a');
# else
# fid = -1;
# diary('baps4_output.baps'); % save in text anyway.
# end
# ninds = length(uskottavuus);
# npops = size(proportions,2);
# disp(' ');
# dispLine;
# disp('RESULTS OF ADMIXTURE ANALYSIS BASED');
# disp('ON MIXTURE CLUSTERING OF INDIVIDUALS');
# disp(['Data file: ' inputf]);
# disp(['Number of individuals: ' num2str(ninds)]);
# disp(['Results based on ' num2str(niter) ' simulations from posterior allele frequencies.']);
# disp(' ');
# if fid ~= -1
# fprintf(fid, '\n');
# fprintf(fid,'%s \n', ['--------------------------------------------']); fprintf(fid, '\n');
# fprintf(fid,'%s \n', ['RESULTS OF ADMIXTURE ANALYSIS BASED']); fprintf(fid, '\n');
# fprintf(fid,'%s \n', ['ON MIXTURE CLUSTERING OF INDIVIDUALS']); fprintf(fid, '\n');
# fprintf(fid,'%s \n', ['Data file: ' inputf]); fprintf(fid, '\n');
# fprintf(fid,'%s \n', ['Number of individuals: ' num2str(ninds)]); fprintf(fid, '\n');
# fprintf(fid,'%s \n', ['Results based on ' num2str(niter) ' simulations from posterior allele frequencies.']); fprintf(fid, '\n');
# fprintf(fid, '\n');
# end
# ekaRivi = blanks(6);
# for pop = 1:npops
# ekaRivi = [ekaRivi blanks(3-floor(log10(pop))) num2str(pop) blanks(2)];
# end
# ekaRivi = [ekaRivi blanks(1) 'p']; % Added on 29.08.06
# disp(ekaRivi);
# for ind = 1:ninds
# rivi = [num2str(ind) ':' blanks(4-floor(log10(ind)))];
# if any(proportions(ind,:)>0)
# for pop = 1:npops-1
# rivi = [rivi proportion2str(proportions(ind,pop)) blanks(2)];
# end
# rivi = [rivi proportion2str(proportions(ind,npops)) ': '];
# rivi = [rivi ownNum2Str(uskottavuus(ind))];
# end
# disp(rivi);
# if fid ~= -1
# fprintf(fid,'%s \n',[rivi]); fprintf(fid,'\n');
# end
# end
# if fid ~= -1
# fclose(fid);
# else
# diary off
} }

View file

@ -11,12 +11,17 @@
#' @param popnames popnames #' @param popnames popnames
#' @param fixedK fixedK #' @param fixedK fixedK
#' @export #' @export
writeMixtureInfo <- function(logml, rowsFromInd, data, adjprior, priorTerm, outPutFile, inputFile, partitionSummary, popnames, fixedK) { writeMixtureInfo <- function(
logml, rowsFromInd, data, adjprior, priorTerm, outPutFile, inputFile,
partitionSummary, popnames, fixedK
) {
changesInLogml <- list() changesInLogml <- list()
ninds <- size(data, 1) / rowsFromInd ninds <- size(data, 1) / rowsFromInd
npops <- size(COUNTS, 3) npops <- size(COUNTS, 3)
# Check that the names refer to individuals # Check that the names refer to individuals
names <- (size(popnames, 1) == ninds) # Tarkistetaan ett?nimet viittaavat yksil<69>ihin
# Tarkistetaan ett?nimet viittaavat yksil<69>ihin
names <- (size(popnames, 1) == ninds)
if (length(outPutFile) > 0) { if (length(outPutFile) > 0) {
fid <- load(outPutFile) fid <- load(outPutFile)
@ -194,7 +199,10 @@ writeMixtureInfo <- function(logml, rowsFromInd, data, adjprior, priorTerm, outP
d <- zeros(maxnoalle, nloci, npops) d <- zeros(maxnoalle, nloci, npops)
prior <- adjprior prior <- adjprior
prior[matlab2r::find(prior == 1)] <- 0 prior[matlab2r::find(prior == 1)] <- 0
nollia <- matlab2r::find(all(prior == 0)) # Loci in which only one allele was detected.
# Loci in which only one allele was detected.
nollia <- matlab2r::find(all(prior == 0))
prior[1, nollia] <- 1 prior[1, nollia] <- 1
for (pop1 in 1:npops) { for (pop1 in 1:npops) {
d[, , pop1] <- (squeeze(COUNTS[, , pop1]) + prior) / d[, , pop1] <- (squeeze(COUNTS[, , pop1]) + prior) /

View file

@ -135,7 +135,8 @@ writeMixtureInfoPop <- function(logml, rows, data, adjprior, priorTerm,
nollia <- find(all(prior == 0)) # Lokukset, joissa oli havaittu vain yht?alleelia. nollia <- find(all(prior == 0)) # Lokukset, joissa oli havaittu vain yht?alleelia.
prior[1, nollia] <- 1 prior[1, nollia] <- 1
for (pop1 in 1:npops) { for (pop1 in 1:npops) {
d[, , pop1] <- (squeeze(COUNTS[, , pop1]) + prior) / repmat(sum(squeeze(COUNTS[, , pop1]) + prior), c(maxnoalle, 1)) d[, , pop1] <- (squeeze(COUNTS[, , pop1]) + prior) /
repmat(sum(squeeze(COUNTS[, , pop1]) + prior), c(maxnoalle, 1))
} }
ekarivi <- as.character(npops) ekarivi <- as.character(npops)
cat(ekarivi, "\n") cat(ekarivi, "\n")
@ -166,13 +167,23 @@ writeMixtureInfoPop <- function(logml, rows, data, adjprior, priorTerm,
} }
} }
cat(" \n \n \n") cat(" \n \n \n")
cat("List of sizes of 10 best visited partitions and corresponding log(ml) values\n") cat(
"List of sizes of 10 best visited partitions and corresponding",
"log(ml) values\n"
)
if (exists("fid")) { if (exists("fid")) {
append(fid, " \n\n") append(fid, " \n\n")
append(fid, " \n\n") append(fid, " \n\n")
append(fid, " \n\n") append(fid, " \n\n")
append(fid, " \n\n") append(fid, " \n\n")
append(fid, "List of sizes of 10 best visited partitions and corresponding log(ml) values\n") append(
fid,
cat(
"List of sizes of 10 best visited partitions and corresponding",
"log(ml) values\n"
)
)
} }
partitionSummary <- sortrows(partitionSummary, 2) partitionSummary <- sortrows(partitionSummary, 2)
partitionSummary <- partitionSummary[size(partitionSummary, 1):-1, ] partitionSummary <- partitionSummary[size(partitionSummary, 1):-1, ]
@ -183,7 +194,11 @@ writeMixtureInfoPop <- function(logml, rows, data, adjprior, priorTerm,
vikaPartitio <- size(partitionSummary, 1) vikaPartitio <- size(partitionSummary, 1)
} }
for (part in 1:vikaPartitio) { for (part in 1:vikaPartitio) {
line <- c(as.character(partitionSummary[part, 1]), " ", as.character(partitionSummary[part, 2])) line <- c(
as.character(partitionSummary[part, 1]),
" ",
as.character(partitionSummary[part, 2])
)
cat(line, "\n") cat(line, "\n")
if (exists("fid")) { if (exists("fid")) {
append(fid, c(line, "\n")) append(fid, c(line, "\n"))
@ -204,7 +219,9 @@ writeMixtureInfoPop <- function(logml, rows, data, adjprior, priorTerm,
partitionSummary[, 2] <- partitionSummary[, 2] - max(partitionSummary[, 2]) partitionSummary[, 2] <- partitionSummary[, 2] - max(partitionSummary[, 2])
sumtn <- sum(exp(partitionSummary[, 2])) sumtn <- sum(exp(partitionSummary[, 2]))
for (i in 1:len) { for (i in 1:len) {
npopstn <- sum(exp(partitionSummary(find(partitionSummary[, 1] == npopsTaulu(i)), 2))) npopstn <- sum(
exp(partitionSummary(find(partitionSummary[, 1] == npopsTaulu(i)), 2))
)
probs[i] <- npopstn / sumtn probs[i] <- npopstn / sumtn
} }
for (i in 1:len) { for (i in 1:len) {

View file

@ -10,7 +10,9 @@ fgetl(file)
\item{file}{character vector to be read, usually an output of `fopen()`} \item{file}{character vector to be read, usually an output of `fopen()`}
} }
\value{ \value{
If the file is nonempty, then fgetl returns tline as a character vector. If the file is empty and contains only the end-of-file marker, then fgetl returns tline as a numeric value -1. If the file is nonempty, then fgetl returns tline as a character
vector. If the file is empty and contains only the end-of-file marker, then
fgetl returns tline as a numeric value -1.
} }
\description{ \description{
Equivalent function to its homonymous Matlab equivalent. Equivalent function to its homonymous Matlab equivalent.

View file

@ -15,8 +15,11 @@ randga(a, b)
One realization of Gamma(a, b) One realization of Gamma(a, b)
} }
\description{ \description{
Generates one random number from shape parameter a and rate parameter b Generates one random number from shape parameter a and rate
parameter b
} }
\details{ \details{
The generated random variable has mean a / b. It will be positively-skewed for small values, but converges to a symmetric distribution for very large numbers of a and b. The generated random variable has mean a / b. It will be
positively-skewed for small values, but converges to a symmetric distribution
for very large numbers of a and b.
} }

View file

@ -17,5 +17,6 @@ simuloiAlleeli(allfreqs, pop, loc)
Simuloi populaation pop lokukseen loc alleelin. Simuloi populaation pop lokukseen loc alleelin.
} }
\note{ \note{
This function is (only?) called by `simulateIndividuals()`. Therefore, exporting it is probably unnecessary. This function is (only?) called by `simulateIndividuals()`. Therefore,
exporting it is probably unnecessary.
} }

View file

@ -10,11 +10,13 @@ testaaGenePopData(tiedostonNimi)
\item{tiedostonNimi}{Filename} \item{tiedostonNimi}{Filename}
} }
\value{ \value{
kunnossa (binary "ok" condition value) == 0 if the data is not valid genePop data. Otherwise, kunnossa == 1. kunnossa (binary "ok" condition value) == 0 if the data is not valid
genePop data. Otherwise, kunnossa == 1.
} }
\description{ \description{
Tests GenePop data Tests GenePop data
} }
\details{ \details{
GenePop data are textfiles that follow the GenePop format. This function checks if such file is properly formatted as GenePop. GenePop data are textfiles that follow the GenePop format. This
function checks if such file is properly formatted as GenePop.
} }

View file

@ -1,4 +1 @@
library(testthat) testthat::test_check("rBAPS")
library(rBAPS)
test_check("rBAPS")

View file

@ -46,7 +46,6 @@ test_that("type convertions behave like on Matlab", {
expect_equal(proportion2str(0.4), "0.40") expect_equal(proportion2str(0.4), "0.40")
expect_equal(proportion2str(0.89), "0.89") expect_equal(proportion2str(0.89), "0.89")
expect_equal(proportion2str(-0.4), "0.0-40") # also bugged in original expect_equal(proportion2str(-0.4), "0.0-40") # also bugged in original
# TODO: fix after release, as long as it doesn't break anything else
}) })
test_that("computeRows behaves like on Matlab", { test_that("computeRows behaves like on Matlab", {

View file

@ -1,11 +1,7 @@
context("Auxiliary functions to greedyMix") context("Auxiliary functions to greedyMix")
# Defining the relative path to current inst ----------------------------------- # Defining the relative path to current inst -----------------------------------
if (interactive()) { path_inst <- system.file("ext", "", package = "rBAPS")
path_inst <- "../../inst/ext"
} else {
path_inst <- system.file("ext", "", package = "rBAPS")
}
# Reading datasets ------------------------------------------------------------- # Reading datasets -------------------------------------------------------------
baps_diploid <- read.delim( baps_diploid <- read.delim(
@ -50,7 +46,6 @@ df_bam <- greedyMix(
data = file.path(path_inst, "bam_example.bam"), data = file.path(path_inst, "bam_example.bam"),
format = "BAM", format = "BAM",
) )
# TODO #19: add example reading Genpop
test_that("Files are imported correctly", { test_that("Files are imported correctly", {
expect_equal(dim(df_fasta), c(5, 99)) expect_equal(dim(df_fasta), c(5, 99))
expect_equal(dim(df_vcf), c(variants = 2, fix_cols = 8, gt_cols = 3)) expect_equal(dim(df_vcf), c(variants = 2, fix_cols = 8, gt_cols = 3))

View file

@ -15,7 +15,7 @@ test_that("Auxiliary functions work properly", {
expect_equal( expect_equal(
getPopDistancesByKL(x2), getPopDistancesByKL(x2),
list( list(
Z = matrix(c(c(1, 101:198), c(2:100), rep(0, 99)), nrow = 99, ncol = 3), Z = matrix(c(c(1, 101:198), 2:100, rep(0, 99)), nrow = 99, ncol = 3),
distances = as.matrix(rep(0, 4950)) distances = as.matrix(rep(0, 4950))
) )
) )
@ -27,7 +27,7 @@ test_that("Auxiliary functions work properly", {
rows = matrix(c(1: 3, 1: 3), 3), rows = matrix(c(1: 3, 1: 3), 3),
alleleCodes = matrix(c(1, 4, 9, 2, 5, 8), 3), alleleCodes = matrix(c(1, 4, 9, 2, 5, 8), 3),
noalle = matrix(c(3, 3), 1), noalle = matrix(c(3, 3), 1),
adjprior = matrix(rep(3/9, 6), 3), adjprior = matrix(rep(3 / 9, 6), 3),
priorTerm = 5.9125 priorTerm = 5.9125
) )
expect_equal(handlePopData(x3), y3, tol = 1e-4) expect_equal(handlePopData(x3), y3, tol = 1e-4)