Merge branch 'fix-linter' into develop
This commit is contained in:
commit
ad4bcc6696
29 changed files with 190 additions and 185 deletions
20
.github/workflows/linter.yml
vendored
20
.github/workflows/linter.yml
vendored
|
|
@ -40,16 +40,16 @@ jobs:
|
|||
run: |
|
||||
library(lintr)
|
||||
style_rules <- list(
|
||||
assignment_linter, closed_curly_linter, commas_linter,
|
||||
todo_comment_linter, equals_na_linter,
|
||||
function_left_parentheses_linter, infix_spaces_linter,
|
||||
line_length_linter, no_tab_linter, open_curly_linter,
|
||||
paren_brace_linter, absolute_path_linter, nonportable_path_linter,
|
||||
pipe_continuation_linter, semicolon_terminator_linter,
|
||||
single_quotes_linter, spaces_inside_linter,
|
||||
trailing_blank_lines_linter, trailing_whitespace_linter,
|
||||
undesirable_function_linter, undesirable_operator_linter,
|
||||
unneeded_concatenation_linter
|
||||
assignment_linter(), brace_linter(), commas_linter(),
|
||||
todo_comment_linter(), equals_na_linter(),
|
||||
function_left_parentheses_linter(), infix_spaces_linter(),
|
||||
line_length_linter(), no_tab_linter(), brace_linter(),
|
||||
brace_linter(), absolute_path_linter(), nonportable_path_linter(),
|
||||
pipe_continuation_linter(), semicolon_linter(),
|
||||
single_quotes_linter(), spaces_inside_linter(),
|
||||
trailing_blank_lines_linter(), trailing_whitespace_linter(),
|
||||
undesirable_function_linter(), undesirable_operator_linter(),
|
||||
unneeded_concatenation_linter()
|
||||
) # TODO: expand style rules as package matures
|
||||
lint_package(linters = style_rules)
|
||||
shell: Rscript {0}
|
||||
|
|
|
|||
39
R/admix1.R
39
R/admix1.R
|
|
@ -21,8 +21,6 @@ admix1 <- function(tietue) {
|
|||
cat("---------------------------------------------------\n")
|
||||
message("Reading mixture result from: ", pathname_filename, "...")
|
||||
}
|
||||
Sys.sleep(0.0001) # TODO: remove
|
||||
|
||||
# ASK: what is this supposed to do? What do graphic obj have to do here?
|
||||
# h0 = findobj('Tag','filename1_text');
|
||||
# set(h0,'String',filename); clear h0;
|
||||
|
|
@ -42,12 +40,10 @@ admix1 <- function(tietue) {
|
|||
|
||||
if (isfield(c, "gene_lengths") &
|
||||
strcmp(c$mixtureType, "linear_mix") |
|
||||
strcmp(c$mixtureType, "codon_mix")) { # if the mixture is from a linkage model
|
||||
# Redirect the call to the linkage admixture function.
|
||||
# call function noindex to remove the index column
|
||||
strcmp(c$mixtureType, "codon_mix")) {
|
||||
# if the mixture is from a linkage model Redirect the call to the linkage
|
||||
# admixture function. call function noindex to remove the index column
|
||||
c$data <- noIndex(c$data, c$noalle)
|
||||
# linkage_admix(c) # TODO: obsolete. remove.
|
||||
# return
|
||||
stop("linkage_admix not implemented")
|
||||
}
|
||||
PARTITION <- c$PARTITION
|
||||
|
|
@ -148,7 +144,8 @@ admix1 <- function(tietue) {
|
|||
}
|
||||
}
|
||||
|
||||
# Analyze further only individuals who have log-likelihood ratio larger than 3:
|
||||
# Analyze further only individuals who have log-likelihood ratio larger than
|
||||
# 3
|
||||
to_investigate <- t(matlab2r::find(likelihood > 3))
|
||||
cat("Possibly admixed individuals:\n")
|
||||
for (i in 1:length(to_investigate)) {
|
||||
|
|
@ -229,9 +226,15 @@ admix1 <- function(tietue) {
|
|||
|
||||
# Initialize the data structures, which are required in taking the missing
|
||||
# data into account:
|
||||
n_missing_levels <- zeros(npops, 1) # number of different levels of "missingness" in each pop (max 3).
|
||||
missing_levels <- zeros(npops, 3) # the mean values for different levels.
|
||||
missing_level_partition <- zeros(ninds, 1) # level of each individual (one of the levels of its population).
|
||||
|
||||
# number of different levels of "missingness" in each pop (max 3).
|
||||
n_missing_levels <- zeros(npops, 1)
|
||||
|
||||
# the mean values for different levels.
|
||||
missing_levels <- zeros(npops, 3)
|
||||
|
||||
# level of each individual (one of the levels of its population).
|
||||
missing_level_partition <- zeros(ninds, 1)
|
||||
for (i in 1:npops) {
|
||||
inds <- matlab2r::find(PARTITION == i)
|
||||
# Proportions of non-missing data for the individuals:
|
||||
|
|
@ -239,7 +242,9 @@ admix1 <- function(tietue) {
|
|||
for (j in 1:length(inds)) {
|
||||
ind <- inds[j]
|
||||
non_missing_data[j] <- length(
|
||||
matlab2r::find(data[(ind - 1) * rowsFromInd + 1:ind * rowsFromInd, ] > 0)
|
||||
matlab2r::find(
|
||||
data[(ind - 1) * rowsFromInd + 1:ind * rowsFromInd, ] > 0
|
||||
)
|
||||
) / (rowsFromInd * nloci)
|
||||
}
|
||||
if (all(non_missing_data > 0.9)) {
|
||||
|
|
@ -247,8 +252,6 @@ admix1 <- function(tietue) {
|
|||
missing_levels[i, 1] <- mean(non_missing_data)
|
||||
missing_level_partition[inds] <- 1
|
||||
} else {
|
||||
# TODO: fix syntax
|
||||
# [ordered, ordering] = sort(non_missing_data);
|
||||
ordered <- ordering <- sort(non_missing_data)
|
||||
# part = learn_simple_partition(ordered, 0.05);
|
||||
part <- learn_partition_modified(ordered)
|
||||
|
|
@ -258,7 +261,9 @@ admix1 <- function(tietue) {
|
|||
n_levels <- length(unique(part))
|
||||
n_missing_levels[i] <- n_levels
|
||||
for (j in 1:n_levels) {
|
||||
missing_levels[i, j] <- mean(non_missing_data[matlab2r::find(part == j)])
|
||||
missing_levels[i, j] <- mean(
|
||||
non_missing_data[matlab2r::find(part == j)]
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -369,8 +374,8 @@ admix1 <- function(tietue) {
|
|||
}
|
||||
}
|
||||
|
||||
tulostaAdmixtureTiedot(proportionsIt, uskottavuus, alaRaja, iterationCount) # TODO: textual outputs. probably not necessary. translate nonetheless
|
||||
viewPartition(proportionsIt, popnames) # TODO: adapt
|
||||
tulostaAdmixtureTiedot(proportionsIt, uskottavuus, alaRaja, iterationCount)
|
||||
viewPartition(proportionsIt, popnames)
|
||||
|
||||
talle <- inputdlg("Do you want to save the admixture results? [Y/n]", "y")
|
||||
if (talle %in% c("y", "Y", "yes", "Yes")) {
|
||||
|
|
|
|||
|
|
@ -17,7 +17,8 @@ cluster_own <- function(Z, nclust) {
|
|||
if (i <= m) { # original node, no leafs
|
||||
T[i] <- clsnum
|
||||
clsnum <- clsnum + 1
|
||||
} else if (i < (2 * m - maxclust + 1)) { # created before cutoff, search down the tree
|
||||
} else if (i < (2 * m - maxclust + 1)) {
|
||||
# created before cutoff, search down the tree
|
||||
T <- clusternum(Z, T, i - m, clsnum)
|
||||
clsnum <- clsnum + 1
|
||||
}
|
||||
|
|
@ -25,7 +26,8 @@ cluster_own <- function(Z, nclust) {
|
|||
if (i <= m) { # original node, no leafs
|
||||
T[i] <- clsnum
|
||||
clsnum <- clsnum + 1
|
||||
} else if (i < (2 * m - maxclust + 1)) { # created before cutoff, search down the tree
|
||||
} else if (i < (2 * m - maxclust + 1)) {
|
||||
# created before cutoff, search down the tree
|
||||
T <- clusternum(Z, T, i - m, clsnum)
|
||||
clsnum <- clsnum + 1
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,7 +4,9 @@ computePopulationLogml <- function(pops, adjprior, priorTerm) {
|
|||
# ======================================================== #
|
||||
# Limiting COUNTS size #
|
||||
# ======================================================== #
|
||||
COUNTS <- COUNTS[seq_len(nrow(adjprior)), seq_len(ncol(adjprior)), pops, drop = FALSE]
|
||||
COUNTS <- COUNTS[
|
||||
seq_len(nrow(adjprior)), seq_len(ncol(adjprior)), pops, drop = FALSE
|
||||
]
|
||||
|
||||
x <- size(COUNTS, 1)
|
||||
y <- size(COUNTS, 2)
|
||||
|
|
@ -14,14 +16,16 @@ computePopulationLogml <- function(pops, adjprior, priorTerm) {
|
|||
# Computation #
|
||||
# ======================================================== #
|
||||
isarray <- length(dim(repmat(adjprior, c(1, 1, length(pops))))) > 2
|
||||
# FIXME: 3rd dimension of COUNTS getting dropped
|
||||
term1 <- squeeze(
|
||||
sum(
|
||||
sum(
|
||||
reshape(
|
||||
lgamma(
|
||||
repmat(adjprior, c(1, 1, length(pops))) +
|
||||
COUNTS[seq_len(nrow(adjprior)), seq_len(ncol(adjprior)), pops, drop = !isarray]
|
||||
COUNTS[
|
||||
seq_len(nrow(adjprior)), seq_len(ncol(adjprior)), pops,
|
||||
drop = !isarray
|
||||
]
|
||||
),
|
||||
c(x, y, z)
|
||||
),
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ etsiParas <- function(osuus, osuusTaulu, omaFreqs, logml) {
|
|||
muutokset <- laskeMuutokset4(osuus, osuusTaulu, omaFreqs, logml)
|
||||
|
||||
# Work around R's base::max() limitation on complex numbers
|
||||
if (any(sapply(muutokset, class) == "complex")) {
|
||||
if (any(vapply(muutokset, class, vector("character", 1)) == "complex")) {
|
||||
maxRe <- base::max(Re(as.vector(muutokset)))
|
||||
maxIm <- base::max(Im(as.vector(muutokset)))
|
||||
maxMuutos <- complex(real = maxRe, imaginary = maxIm)
|
||||
|
|
|
|||
|
|
@ -1,7 +1,9 @@
|
|||
#' @title Read line from file, removing newline characters
|
||||
#' @description Equivalent function to its homonymous Matlab equivalent.
|
||||
#' @param file character vector to be read, usually an output of `fopen()`
|
||||
#' @return If the file is nonempty, then fgetl returns tline as a character vector. If the file is empty and contains only the end-of-file marker, then fgetl returns tline as a numeric value -1.
|
||||
#' @return If the file is nonempty, then fgetl returns tline as a character
|
||||
#' vector. If the file is empty and contains only the end-of-file marker, then
|
||||
#' fgetl returns tline as a numeric value -1.
|
||||
#' @author Waldir Leoncio
|
||||
#' @seealso fopen
|
||||
#' @export
|
||||
|
|
|
|||
|
|
@ -13,5 +13,6 @@ fiksaaPartitioYksiloTasolle <- function(rows, rowsFromInd) {
|
|||
partitio2[rivi / rowsFromInd] <- PARTITION[ind]
|
||||
}
|
||||
}
|
||||
PARTITION <<- partitio2
|
||||
global_env <- as.environment(1L)
|
||||
assign("PARTITION", partitio2, envir = global_env)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,17 +1,17 @@
|
|||
findOutRowsFromInd <- function(popnames, rows, ploidisuus = NULL) {
|
||||
if (is.null(ploidisuus)) {
|
||||
ploidisuus <- questdlg(
|
||||
quest = 'Specify the type of individuals in the data',
|
||||
dlgtitle = 'Individual type?',
|
||||
btn = c('Haploid', 'Diploid', 'Tetraploid'),
|
||||
defbtn = 'Diploid'
|
||||
quest = "Specify the type of individuals in the data",
|
||||
dlgtitle = "Individual type?",
|
||||
btn = c("Haploid", "Diploid", "Tetraploid"),
|
||||
defbtn = "Diploid"
|
||||
)
|
||||
}
|
||||
|
||||
rowsFromInd <- switch(ploidisuus,
|
||||
'Haploid' = 1,
|
||||
'Diploid' = 2,
|
||||
'Tetraploid' = 4
|
||||
"Haploid" = 1,
|
||||
"Diploid" = 2,
|
||||
"Tetraploid" = 4
|
||||
)
|
||||
|
||||
popnames2 <- popnames * NA
|
||||
|
|
@ -22,5 +22,5 @@ findOutRowsFromInd <- function(popnames, rows, ploidisuus = NULL) {
|
|||
popnames2[i, 2] <- rivi[rowsFromInd] / rowsFromInd
|
||||
}
|
||||
}
|
||||
return(list(popnames2 = popnames2, rowsFromInd = rowsFromInd))
|
||||
return(list(popnames2 = popnames2, rowsFromInd = rowsFromInd))
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,11 +1,12 @@
|
|||
getDistances <- function(data_matrix, nclusters) {
|
||||
|
||||
# %finds initial admixture clustering solution with nclusters clusters, uses simple mean Hamming distance
|
||||
# %gives partition in 8 - bit format
|
||||
# %allocates all alleles of a single individual into the same basket
|
||||
# %data_matrix contains #Loci + 1 columns, last column indicate whose alleles are placed in each row,
|
||||
# %i.e. ranges from 1 to #individuals. For diploids there are 2 rows per individual, for haploids only a single row
|
||||
# %missing values are indicated by zeros in the partition and by negative integers in the data_matrix.
|
||||
# finds initial admixture clustering solution with nclusters clusters, uses
|
||||
# simple mean Hamming distance; gives partition in 8 - bit format; allocates
|
||||
# all alleles of a single individual into the same basket; data_matrix
|
||||
# contains #Loci + 1 columns, last column indicate whose alleles are placed in
|
||||
# each row, i.e. ranges from 1 to #individuals. For diploids there are 2 rows
|
||||
# per individual, for haploids only a single row; missing values are indicated
|
||||
# by zeros in the partition and by negative integers in the data_matrix.
|
||||
|
||||
size_data <- size(data_matrix)
|
||||
nloci <- size_data[2] - 1
|
||||
|
|
|
|||
|
|
@ -10,18 +10,25 @@ getPopDistancesByKL <- function(adjprior) {
|
|||
d <- zeros(maxnoalle, nloci, npops)
|
||||
prior <- adjprior
|
||||
prior[find(prior == 1)] <- 0
|
||||
nollia <- find(all(prior == 0)) # Lokukset, joissa oli havaittu vain yht?alleelia.
|
||||
|
||||
# Lokukset, joissa oli havaittu vain yht?alleelia.
|
||||
nollia <- find(all(prior == 0))
|
||||
|
||||
prior[1, nollia] <- 1
|
||||
for (pop1 in 1:npops) {
|
||||
d[, , pop1] <- (squeeze(COUNTS[, , pop1]) + prior) / repmat(sum(squeeze(COUNTS[, , pop1]) + prior), c(maxnoalle, ncol(prior)))
|
||||
d[, , pop1] <- (squeeze(COUNTS[, , pop1]) + prior) / repmat(
|
||||
sum(squeeze(COUNTS[, , pop1]) + prior), c(maxnoalle, ncol(prior))
|
||||
)
|
||||
}
|
||||
pointer <- 1
|
||||
for (pop1 in 1:(npops - 1)) {
|
||||
for (pop2 in (pop1 + 1):npops) {
|
||||
dist1 <- d[, , pop1]
|
||||
dist2 <- d[, , pop2]
|
||||
div12 <- sum(sum(dist1 * log2((dist1 + 10^-10) / (dist2 + 10^-10)))) / nloci
|
||||
div21 <- sum(sum(dist2 * log2((dist2 + 10^-10) / (dist1 + 10^-10)))) / nloci
|
||||
div12 <- sum(sum(dist1 * log2((dist1 + 10^-10) / (dist2 + 10^-10)))) /
|
||||
nloci
|
||||
div21 <- sum(sum(dist2 * log2((dist2 + 10^-10) / (dist1 + 10^-10)))) /
|
||||
nloci
|
||||
div <- (div12 + div21) / 2
|
||||
distances[pointer] <- div
|
||||
pointer <- pointer + 1
|
||||
|
|
|
|||
|
|
@ -46,5 +46,4 @@ greedyMix <- function(data, format, verbose = TRUE) {
|
|||
stop("Format not supported.")
|
||||
}
|
||||
return(out)
|
||||
# TODO: add handleData(out) or some other post-processing of data
|
||||
}
|
||||
|
|
|
|||
|
|
@ -12,7 +12,8 @@
|
|||
#' @references Samtools: a suite of programs for interacting
|
||||
#' with high-throughput sequencing data. <http://www.htslib.org/>
|
||||
#' @export
|
||||
greedyPopMix <- function(data, format, partitionCompare = NULL, verbose = TRUE) {
|
||||
greedyPopMix <- function(data, format, partitionCompare = NULL, verbose = TRUE
|
||||
) {
|
||||
# Replacing original file reading code with greedyMix()
|
||||
rawdata <- greedyMix(data, format, verbose)
|
||||
|
||||
|
|
@ -26,12 +27,12 @@ greedyPopMix <- function(data, format, partitionCompare = NULL, verbose = TRUE)
|
|||
priorTerm <- data_greedyMix_handle$priorTerm
|
||||
rm(data_greedyMix_handle)
|
||||
Z_dist <- getPopDistancesByKL(adjprior)
|
||||
Z_dist$Z -> Z
|
||||
Z_dist$dist -> dist
|
||||
Z <- Z_dist$Z
|
||||
dist <- Z_dist$dist
|
||||
rm(Z_dist)
|
||||
a_data <- data[, 1:(ncol(data) - 1)]
|
||||
sumcounts_counts_logml <- initialPopCounts(a_data, npops, rows, noalle, adjprior)
|
||||
sumcounts_counts_logml$logml -> logml
|
||||
logml <- sumcounts_counts_logml$logml
|
||||
rm(sumcounts_counts_logml)
|
||||
c <- list()
|
||||
c$data <- data
|
||||
|
|
@ -76,17 +77,17 @@ greedyPopMix <- function(data, format, partitionCompare = NULL, verbose = TRUE)
|
|||
NULL, NULL, partitionSummary, popnames, fixedK = FALSE
|
||||
)
|
||||
talle <- questdlg(
|
||||
'Do you want to save the mixture populations so that you can use them later in admixture analysis?',
|
||||
'Save results?', c('Yes', 'No'), 'Yes'
|
||||
"Do you want to save the mixture populations so that you can use them later in admixture analysis?",
|
||||
"Save results?", c("Yes", "No"), "Yes"
|
||||
)
|
||||
if (tolower(talle) == 'yes') {
|
||||
if (tolower(talle) == "yes") {
|
||||
waitALittle()
|
||||
filename_pathname <- uiputfile()
|
||||
if (rowsFromInd == 0) {
|
||||
# BAPS format was used, rowsFromInd is not known.
|
||||
popnames_rowsFromInd <- findOutRowsFromInd(popnames, rows)
|
||||
popnames_rowsFromInd$popnames -> popnames
|
||||
popnames_rowsFromInd$rows -> rows
|
||||
popnames <- popnames_rowsFromInd$popnames
|
||||
rows <- popnames_rowsFromInd$rows
|
||||
rm(popnames_rowsFromInd)
|
||||
}
|
||||
groupPartition <- PARTITION
|
||||
|
|
@ -101,7 +102,7 @@ greedyPopMix <- function(data, format, partitionCompare = NULL, verbose = TRUE)
|
|||
c$data <- data
|
||||
c$npops <- npops
|
||||
c$noalle <- noalle
|
||||
c$mixtureType = 'popMix'
|
||||
c$mixtureType <- "popMix"
|
||||
c$groupPartition <- groupPartition
|
||||
c$rows <- rows
|
||||
c$logml <- logml
|
||||
|
|
|
|||
|
|
@ -18,54 +18,57 @@ handlePopData <- function(raw_data) {
|
|||
dataApu <- data[, 1:nloci]
|
||||
nollat <- find(dataApu == 0)
|
||||
if (length(nollat) > 0) {
|
||||
isoinAlleeli <- max(max(dataApu)$maxs)$maxs
|
||||
dataApu[nollat] <- isoinAlleeli + 1
|
||||
data[, 1:nloci] <- dataApu
|
||||
isoinAlleeli <- max(max(dataApu)$maxs)$maxs
|
||||
dataApu[nollat] <- isoinAlleeli + 1
|
||||
data[, 1:nloci] <- dataApu
|
||||
}
|
||||
|
||||
noalle <- zeros(1, nloci)
|
||||
alleelitLokuksessa <- cell(nloci, 1, expandable = TRUE)
|
||||
for (i in 1:nloci) {
|
||||
alleelitLokuksessaI <- sort(unique(data[, i]))
|
||||
alleelitLokuksessa[[i]] <- alleelitLokuksessaI[find(alleelitLokuksessaI >= 0)]
|
||||
noalle[i] <- length(alleelitLokuksessa[[i]])
|
||||
alleelitLokuksessaI <- sort(unique(data[, i]))
|
||||
alleelitLokuksessa[[i]] <- alleelitLokuksessaI[find(alleelitLokuksessaI >= 0)]
|
||||
noalle[i] <- length(alleelitLokuksessa[[i]])
|
||||
}
|
||||
alleleCodes <- zeros(unique(max(noalle)$maxs), nloci)
|
||||
for (i in 1:nloci) {
|
||||
alleelitLokuksessaI <- alleelitLokuksessa[[i]]
|
||||
puuttuvia <- unique(max(noalle)$maxs) - length(alleelitLokuksessaI)
|
||||
alleleCodes[, i] = c(alleelitLokuksessaI, zeros(puuttuvia, 1))
|
||||
alleelitLokuksessaI <- alleelitLokuksessa[[i]]
|
||||
puuttuvia <- unique(max(noalle)$maxs) - length(alleelitLokuksessaI)
|
||||
alleleCodes[, i] <- c(alleelitLokuksessaI, zeros(puuttuvia, 1))
|
||||
}
|
||||
|
||||
for (loc in 1:nloci) {
|
||||
for (all in 1:noalle[loc]) {
|
||||
data[find(data[ , loc] == alleleCodes[all, loc]), loc] <- all
|
||||
}
|
||||
for (all in 1:noalle[loc]) {
|
||||
data[find(data[, loc] == alleleCodes[all, loc]), loc] <- all
|
||||
}
|
||||
}
|
||||
|
||||
nind <- max(data[, ncol(data)])$maxs
|
||||
rows <- zeros(nind, 2)
|
||||
for (i in 1:nind) {
|
||||
rivit <- t(find(data[, ncol(data)] == i))
|
||||
rows[i, 1] <- min(rivit)$mins
|
||||
rows[i, 2] <- max(rivit)$maxs
|
||||
rivit <- t(find(data[, ncol(data)] == i))
|
||||
rows[i, 1] <- min(rivit)$mins
|
||||
rows[i, 2] <- max(rivit)$maxs
|
||||
}
|
||||
newData <- data
|
||||
|
||||
adjprior <- zeros(unique(max(noalle)$maxs), nloci)
|
||||
priorTerm <- 0
|
||||
for (j in 1:nloci) {
|
||||
adjprior[, j] <- c(repmat(1 / noalle[j], c(noalle[j], 1)), ones(unique(max(noalle)$maxs) - noalle[j], 1))
|
||||
priorTerm <- priorTerm + noalle[j] * log(gamma(1 / noalle[j]))
|
||||
}
|
||||
return(
|
||||
list(
|
||||
newData = newData,
|
||||
rows = rows,
|
||||
alleleCodes = alleleCodes,
|
||||
noalle = noalle,
|
||||
adjprior = adjprior,
|
||||
priorTerm = priorTerm
|
||||
)
|
||||
adjprior[, j] <- c(
|
||||
repmat(1 / noalle[j], c(noalle[j], 1)),
|
||||
ones(unique(max(noalle)$maxs) - noalle[j], 1)
|
||||
)
|
||||
priorTerm <- priorTerm + noalle[j] * log(gamma(1 / noalle[j]))
|
||||
}
|
||||
return(
|
||||
list(
|
||||
newData = newData,
|
||||
rows = rows,
|
||||
alleleCodes = alleleCodes,
|
||||
noalle = noalle,
|
||||
adjprior = adjprior,
|
||||
priorTerm = priorTerm
|
||||
)
|
||||
)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -135,7 +135,8 @@ laskeMuutokset2 <- function(i1, globalRows, data, adjprior, priorTerm) {
|
|||
}
|
||||
|
||||
|
||||
laskeMuutokset3 <- function(T2, inds2, globalRows, data, adjprior, priorTerm, i1) {
|
||||
laskeMuutokset3 <- function(T2, inds2, globalRows, data, adjprior, priorTerm, i1
|
||||
) {
|
||||
# Palauttaa length(unique(T2))*npops taulun, jossa (i,j):s alkio
|
||||
# kertoo, mik<69> olisi muutos logml:ss<73>, jos populaation i1 osapopulaatio
|
||||
# inds2(matlab2r::find(T2==i)) siirret<65><74>n koriin j.
|
||||
|
|
|
|||
|
|
@ -31,7 +31,7 @@ lueGenePopDataPop <- function(tiedostonNimi) {
|
|||
nimienLkm <- 0
|
||||
ninds <- 0
|
||||
poimiNimi <- 1
|
||||
digitFormat = -1
|
||||
digitFormat <- -1
|
||||
while (lokusRiveja < length(fid) - 2) {
|
||||
lokusRiveja <- lokusRiveja + 1 # Keeps the loop moving along
|
||||
line <- fid[lokusRiveja + 2]
|
||||
|
|
|
|||
|
|
@ -1,9 +1,12 @@
|
|||
#' @title Generates random number from a Gamma distribution
|
||||
#' @description Generates one random number from shape parameter a and rate parameter b
|
||||
#' @description Generates one random number from shape parameter a and rate
|
||||
#' parameter b
|
||||
#' @param a shape
|
||||
#' @param b rate
|
||||
#' @return One realization of Gamma(a, b)
|
||||
#' @details The generated random variable has mean a / b. It will be positively-skewed for small values, but converges to a symmetric distribution for very large numbers of a and b.
|
||||
#' @details The generated random variable has mean a / b. It will be
|
||||
#' positively-skewed for small values, but converges to a symmetric distribution
|
||||
#' for very large numbers of a and b.
|
||||
randga <- function(a, b) {
|
||||
flag <- 0
|
||||
if (a > 1) {
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
#' @title simuloiAlleeli
|
||||
#' @description Simuloi populaation pop lokukseen loc alleelin.
|
||||
#' @note This function is (only?) called by `simulateIndividuals()`. Therefore, exporting it is probably unnecessary.
|
||||
#' @note This function is (only?) called by `simulateIndividuals()`. Therefore,
|
||||
#' exporting it is probably unnecessary.
|
||||
#' @param allfreqs allfreqa
|
||||
#' @param pop pop
|
||||
#' @param loc loc
|
||||
|
|
|
|||
|
|
@ -1,7 +1,9 @@
|
|||
#' @title Tests GenePop data
|
||||
#' @param tiedostonNimi Filename
|
||||
#' @return kunnossa (binary "ok" condition value) == 0 if the data is not valid genePop data. Otherwise, kunnossa == 1.
|
||||
#' @details GenePop data are textfiles that follow the GenePop format. This function checks if such file is properly formatted as GenePop.
|
||||
#' @return kunnossa (binary "ok" condition value) == 0 if the data is not valid
|
||||
#' genePop data. Otherwise, kunnossa == 1.
|
||||
#' @details GenePop data are textfiles that follow the GenePop format. This
|
||||
#' function checks if such file is properly formatted as GenePop.
|
||||
testaaGenePopData <- function(tiedostonNimi) {
|
||||
# kunnossa == 0, jos data ei ole kelvollinen genePop data.
|
||||
# Muussa tapauksessa kunnossa == 1.
|
||||
|
|
@ -36,7 +38,10 @@ testaaGenePopData <- function(tiedostonNimi) {
|
|||
# Tiedet<65><74>n, ett?pys<79>htyy
|
||||
pointer <- pointer + 1
|
||||
}
|
||||
line4 <- substring(line4, pointer + 1) # pilkun j<>lkeinen osa (the part after the comma)
|
||||
|
||||
# pilkun j<>lkeinen osa (the part after the comma)
|
||||
line4 <- substring(line4, pointer + 1)
|
||||
|
||||
nloci2 <- rivinSisaltamienMjonojenLkm(line4)
|
||||
if (nloci2 != nloci) stop("Incorrect file format 1195")
|
||||
} else {
|
||||
|
|
@ -59,7 +64,10 @@ testaaGenePopData <- function(tiedostonNimi) {
|
|||
# Tiedet<65><74>n, ett?pys<79>htyy
|
||||
pointer <- pointer + 1
|
||||
}
|
||||
line4 <- substring(line4, pointer + 1) # pilkun j<>lkeinen osa (the part after the comma)
|
||||
|
||||
# pilkun j<>lkeinen osa (the part after the comma)
|
||||
line4 <- substring(line4, pointer + 1)
|
||||
|
||||
nloci2 <- rivinSisaltamienMjonojenLkm(line4)
|
||||
if (nloci2 != nloci) stop("Incorrect file format 1228")
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,60 +1,3 @@
|
|||
tulostaAdmixtureTiedot <- function(proportions, uskottavuus, alaRaja, niter) {
|
||||
# ASK: what does this function does. Plotting? Get examples?
|
||||
# h0 <- findobj('Tag','filename1_text')
|
||||
# inputf = get(h0,'String');
|
||||
# h0 = findobj('Tag','filename2_text');
|
||||
# outf = get(h0,'String'); clear h0;
|
||||
|
||||
# if length(outf)>0
|
||||
# fid = fopen(outf,'a');
|
||||
# else
|
||||
# fid = -1;
|
||||
# diary('baps4_output.baps'); % save in text anyway.
|
||||
# end
|
||||
|
||||
# ninds = length(uskottavuus);
|
||||
# npops = size(proportions,2);
|
||||
# disp(' ');
|
||||
# dispLine;
|
||||
# disp('RESULTS OF ADMIXTURE ANALYSIS BASED');
|
||||
# disp('ON MIXTURE CLUSTERING OF INDIVIDUALS');
|
||||
# disp(['Data file: ' inputf]);
|
||||
# disp(['Number of individuals: ' num2str(ninds)]);
|
||||
# disp(['Results based on ' num2str(niter) ' simulations from posterior allele frequencies.']);
|
||||
# disp(' ');
|
||||
# if fid ~= -1
|
||||
# fprintf(fid, '\n');
|
||||
# fprintf(fid,'%s \n', ['--------------------------------------------']); fprintf(fid, '\n');
|
||||
# fprintf(fid,'%s \n', ['RESULTS OF ADMIXTURE ANALYSIS BASED']); fprintf(fid, '\n');
|
||||
# fprintf(fid,'%s \n', ['ON MIXTURE CLUSTERING OF INDIVIDUALS']); fprintf(fid, '\n');
|
||||
# fprintf(fid,'%s \n', ['Data file: ' inputf]); fprintf(fid, '\n');
|
||||
# fprintf(fid,'%s \n', ['Number of individuals: ' num2str(ninds)]); fprintf(fid, '\n');
|
||||
# fprintf(fid,'%s \n', ['Results based on ' num2str(niter) ' simulations from posterior allele frequencies.']); fprintf(fid, '\n');
|
||||
# fprintf(fid, '\n');
|
||||
# end
|
||||
|
||||
# ekaRivi = blanks(6);
|
||||
# for pop = 1:npops
|
||||
# ekaRivi = [ekaRivi blanks(3-floor(log10(pop))) num2str(pop) blanks(2)];
|
||||
# end
|
||||
# ekaRivi = [ekaRivi blanks(1) 'p']; % Added on 29.08.06
|
||||
# disp(ekaRivi);
|
||||
# for ind = 1:ninds
|
||||
# rivi = [num2str(ind) ':' blanks(4-floor(log10(ind)))];
|
||||
# if any(proportions(ind,:)>0)
|
||||
# for pop = 1:npops-1
|
||||
# rivi = [rivi proportion2str(proportions(ind,pop)) blanks(2)];
|
||||
# end
|
||||
# rivi = [rivi proportion2str(proportions(ind,npops)) ': '];
|
||||
# rivi = [rivi ownNum2Str(uskottavuus(ind))];
|
||||
# end
|
||||
# disp(rivi);
|
||||
# if fid ~= -1
|
||||
# fprintf(fid,'%s \n',[rivi]); fprintf(fid,'\n');
|
||||
# end
|
||||
# end
|
||||
# if fid ~= -1
|
||||
# fclose(fid);
|
||||
# else
|
||||
# diary off
|
||||
warning("tulostaAdmixtureTiedot() not implemented" )
|
||||
}
|
||||
|
|
|
|||
|
|
@ -11,12 +11,17 @@
|
|||
#' @param popnames popnames
|
||||
#' @param fixedK fixedK
|
||||
#' @export
|
||||
writeMixtureInfo <- function(logml, rowsFromInd, data, adjprior, priorTerm, outPutFile, inputFile, partitionSummary, popnames, fixedK) {
|
||||
writeMixtureInfo <- function(
|
||||
logml, rowsFromInd, data, adjprior, priorTerm, outPutFile, inputFile,
|
||||
partitionSummary, popnames, fixedK
|
||||
) {
|
||||
changesInLogml <- list()
|
||||
ninds <- size(data, 1) / rowsFromInd
|
||||
npops <- size(COUNTS, 3)
|
||||
# Check that the names refer to individuals
|
||||
names <- (size(popnames, 1) == ninds) # Tarkistetaan ett?nimet viittaavat yksil<69>ihin
|
||||
|
||||
# Tarkistetaan ett?nimet viittaavat yksil<69>ihin
|
||||
names <- (size(popnames, 1) == ninds)
|
||||
|
||||
if (length(outPutFile) > 0) {
|
||||
fid <- load(outPutFile)
|
||||
|
|
@ -194,7 +199,10 @@ writeMixtureInfo <- function(logml, rowsFromInd, data, adjprior, priorTerm, outP
|
|||
d <- zeros(maxnoalle, nloci, npops)
|
||||
prior <- adjprior
|
||||
prior[matlab2r::find(prior == 1)] <- 0
|
||||
nollia <- matlab2r::find(all(prior == 0)) # Loci in which only one allele was detected.
|
||||
|
||||
# Loci in which only one allele was detected.
|
||||
nollia <- matlab2r::find(all(prior == 0))
|
||||
|
||||
prior[1, nollia] <- 1
|
||||
for (pop1 in 1:npops) {
|
||||
d[, , pop1] <- (squeeze(COUNTS[, , pop1]) + prior) /
|
||||
|
|
|
|||
|
|
@ -135,7 +135,8 @@ writeMixtureInfoPop <- function(logml, rows, data, adjprior, priorTerm,
|
|||
nollia <- find(all(prior == 0)) # Lokukset, joissa oli havaittu vain yht?alleelia.
|
||||
prior[1, nollia] <- 1
|
||||
for (pop1 in 1:npops) {
|
||||
d[, , pop1] <- (squeeze(COUNTS[, , pop1]) + prior) / repmat(sum(squeeze(COUNTS[, , pop1]) + prior), c(maxnoalle, 1))
|
||||
d[, , pop1] <- (squeeze(COUNTS[, , pop1]) + prior) /
|
||||
repmat(sum(squeeze(COUNTS[, , pop1]) + prior), c(maxnoalle, 1))
|
||||
}
|
||||
ekarivi <- as.character(npops)
|
||||
cat(ekarivi, "\n")
|
||||
|
|
@ -166,13 +167,23 @@ writeMixtureInfoPop <- function(logml, rows, data, adjprior, priorTerm,
|
|||
}
|
||||
}
|
||||
cat(" \n \n \n")
|
||||
cat("List of sizes of 10 best visited partitions and corresponding log(ml) values\n")
|
||||
cat(
|
||||
"List of sizes of 10 best visited partitions and corresponding",
|
||||
"log(ml) values\n"
|
||||
)
|
||||
if (exists("fid")) {
|
||||
append(fid, " \n\n")
|
||||
append(fid, " \n\n")
|
||||
append(fid, " \n\n")
|
||||
append(fid, " \n\n")
|
||||
append(fid, "List of sizes of 10 best visited partitions and corresponding log(ml) values\n")
|
||||
append(
|
||||
fid,
|
||||
cat(
|
||||
"List of sizes of 10 best visited partitions and corresponding",
|
||||
"log(ml) values\n"
|
||||
)
|
||||
)
|
||||
|
||||
}
|
||||
partitionSummary <- sortrows(partitionSummary, 2)
|
||||
partitionSummary <- partitionSummary[size(partitionSummary, 1):-1, ]
|
||||
|
|
@ -183,7 +194,11 @@ writeMixtureInfoPop <- function(logml, rows, data, adjprior, priorTerm,
|
|||
vikaPartitio <- size(partitionSummary, 1)
|
||||
}
|
||||
for (part in 1:vikaPartitio) {
|
||||
line <- c(as.character(partitionSummary[part, 1]), " ", as.character(partitionSummary[part, 2]))
|
||||
line <- c(
|
||||
as.character(partitionSummary[part, 1]),
|
||||
" ",
|
||||
as.character(partitionSummary[part, 2])
|
||||
)
|
||||
cat(line, "\n")
|
||||
if (exists("fid")) {
|
||||
append(fid, c(line, "\n"))
|
||||
|
|
@ -204,7 +219,9 @@ writeMixtureInfoPop <- function(logml, rows, data, adjprior, priorTerm,
|
|||
partitionSummary[, 2] <- partitionSummary[, 2] - max(partitionSummary[, 2])
|
||||
sumtn <- sum(exp(partitionSummary[, 2]))
|
||||
for (i in 1:len) {
|
||||
npopstn <- sum(exp(partitionSummary(find(partitionSummary[, 1] == npopsTaulu(i)), 2)))
|
||||
npopstn <- sum(
|
||||
exp(partitionSummary(find(partitionSummary[, 1] == npopsTaulu(i)), 2))
|
||||
)
|
||||
probs[i] <- npopstn / sumtn
|
||||
}
|
||||
for (i in 1:len) {
|
||||
|
|
|
|||
|
|
@ -10,7 +10,9 @@ fgetl(file)
|
|||
\item{file}{character vector to be read, usually an output of `fopen()`}
|
||||
}
|
||||
\value{
|
||||
If the file is nonempty, then fgetl returns tline as a character vector. If the file is empty and contains only the end-of-file marker, then fgetl returns tline as a numeric value -1.
|
||||
If the file is nonempty, then fgetl returns tline as a character
|
||||
vector. If the file is empty and contains only the end-of-file marker, then
|
||||
fgetl returns tline as a numeric value -1.
|
||||
}
|
||||
\description{
|
||||
Equivalent function to its homonymous Matlab equivalent.
|
||||
|
|
|
|||
|
|
@ -15,8 +15,11 @@ randga(a, b)
|
|||
One realization of Gamma(a, b)
|
||||
}
|
||||
\description{
|
||||
Generates one random number from shape parameter a and rate parameter b
|
||||
Generates one random number from shape parameter a and rate
|
||||
parameter b
|
||||
}
|
||||
\details{
|
||||
The generated random variable has mean a / b. It will be positively-skewed for small values, but converges to a symmetric distribution for very large numbers of a and b.
|
||||
The generated random variable has mean a / b. It will be
|
||||
positively-skewed for small values, but converges to a symmetric distribution
|
||||
for very large numbers of a and b.
|
||||
}
|
||||
|
|
|
|||
|
|
@ -17,5 +17,6 @@ simuloiAlleeli(allfreqs, pop, loc)
|
|||
Simuloi populaation pop lokukseen loc alleelin.
|
||||
}
|
||||
\note{
|
||||
This function is (only?) called by `simulateIndividuals()`. Therefore, exporting it is probably unnecessary.
|
||||
This function is (only?) called by `simulateIndividuals()`. Therefore,
|
||||
exporting it is probably unnecessary.
|
||||
}
|
||||
|
|
|
|||
|
|
@ -10,11 +10,13 @@ testaaGenePopData(tiedostonNimi)
|
|||
\item{tiedostonNimi}{Filename}
|
||||
}
|
||||
\value{
|
||||
kunnossa (binary "ok" condition value) == 0 if the data is not valid genePop data. Otherwise, kunnossa == 1.
|
||||
kunnossa (binary "ok" condition value) == 0 if the data is not valid
|
||||
genePop data. Otherwise, kunnossa == 1.
|
||||
}
|
||||
\description{
|
||||
Tests GenePop data
|
||||
}
|
||||
\details{
|
||||
GenePop data are textfiles that follow the GenePop format. This function checks if such file is properly formatted as GenePop.
|
||||
GenePop data are textfiles that follow the GenePop format. This
|
||||
function checks if such file is properly formatted as GenePop.
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,4 +1 @@
|
|||
library(testthat)
|
||||
library(rBAPS)
|
||||
|
||||
test_check("rBAPS")
|
||||
testthat::test_check("rBAPS")
|
||||
|
|
|
|||
|
|
@ -46,7 +46,6 @@ test_that("type convertions behave like on Matlab", {
|
|||
expect_equal(proportion2str(0.4), "0.40")
|
||||
expect_equal(proportion2str(0.89), "0.89")
|
||||
expect_equal(proportion2str(-0.4), "0.0-40") # also bugged in original
|
||||
# TODO: fix after release, as long as it doesn't break anything else
|
||||
})
|
||||
|
||||
test_that("computeRows behaves like on Matlab", {
|
||||
|
|
|
|||
|
|
@ -1,11 +1,7 @@
|
|||
context("Auxiliary functions to greedyMix")
|
||||
|
||||
# Defining the relative path to current inst -----------------------------------
|
||||
if (interactive()) {
|
||||
path_inst <- "../../inst/ext"
|
||||
} else {
|
||||
path_inst <- system.file("ext", "", package = "rBAPS")
|
||||
}
|
||||
path_inst <- system.file("ext", "", package = "rBAPS")
|
||||
|
||||
# Reading datasets -------------------------------------------------------------
|
||||
baps_diploid <- read.delim(
|
||||
|
|
@ -50,7 +46,6 @@ df_bam <- greedyMix(
|
|||
data = file.path(path_inst, "bam_example.bam"),
|
||||
format = "BAM",
|
||||
)
|
||||
# TODO #19: add example reading Genpop
|
||||
test_that("Files are imported correctly", {
|
||||
expect_equal(dim(df_fasta), c(5, 99))
|
||||
expect_equal(dim(df_vcf), c(variants = 2, fix_cols = 8, gt_cols = 3))
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ test_that("Auxiliary functions work properly", {
|
|||
expect_equal(
|
||||
getPopDistancesByKL(x2),
|
||||
list(
|
||||
Z = matrix(c(c(1, 101:198), c(2:100), rep(0, 99)), nrow = 99, ncol = 3),
|
||||
Z = matrix(c(c(1, 101:198), 2:100, rep(0, 99)), nrow = 99, ncol = 3),
|
||||
distances = as.matrix(rep(0, 4950))
|
||||
)
|
||||
)
|
||||
|
|
@ -27,7 +27,7 @@ test_that("Auxiliary functions work properly", {
|
|||
rows = matrix(c(1: 3, 1: 3), 3),
|
||||
alleleCodes = matrix(c(1, 4, 9, 2, 5, 8), 3),
|
||||
noalle = matrix(c(3, 3), 1),
|
||||
adjprior = matrix(rep(3/9, 6), 3),
|
||||
adjprior = matrix(rep(3 / 9, 6), 3),
|
||||
priorTerm = 5.9125
|
||||
)
|
||||
expect_equal(handlePopData(x3), y3, tol = 1e-4)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue