Merge branch 'process-genepop' into issue-24

This commit is contained in:
Waldir Leoncio 2024-09-13 13:53:12 +02:00
commit 822521af9b
4 changed files with 40 additions and 15 deletions

View file

@ -11,7 +11,7 @@ addAlleles <- function(data, ind, line, divider) {
# line. Jos data on 3 digit formaatissa on divider=1000.
# Jos data on 2 digit formaatissa on divider=100.
nloci <- size(data, 2) # added 1 from original code
nloci <- size(data, 2) - 1L
if (size(data, 1) < (2 * ind)) {
data <- rbind(data, zeros(100, nloci)) # subtracted 1 from original code
}
@ -22,8 +22,7 @@ addAlleles <- function(data, ind, line, divider) {
k <- k + 1
merkki <- substring(line, k, k)
}
line <- substring(line, k + 1)
# clear k; clear merkki;
line <- trimws(substring(line, k + 1))
if (grepl(" ", line)) {
alleeliTaulu <- as.numeric(strsplit(line, split = " ")[[1]])

View file

@ -27,6 +27,7 @@ greedyMix <- function(
inp = NULL, popnames = NULL, fixedK = FALSE, verbose = FALSE
) {
# Importing and handling data ================================================
# TODO: use format as class and make handling data a generic
if (tolower(format) %in% "fasta") {
data <- convert_FASTA_to_BAPS(data)
format <- "baps"
@ -42,6 +43,17 @@ greedyMix <- function(
Z = data[["Z"]],
dist = data[["dist"]]
)
} else if (tolower(format) %in% "genepop") {
data <- process_GenePop_data(data)
c <- list(
noalle = data[["noalle"]],
data = data[["data"]],
adjprior = data[["adjprior"]],
priorTerm = data[["priorTerm"]],
rowsFromInd = data[["rowsFromInd"]],
Z = data[["Z"]],
dist = data[["dist"]]
)
} else {
data <- importFile(data, format, verbose)
data <- handleData(data, tolower(format))
@ -68,7 +80,7 @@ greedyMix <- function(
# Generating partition summary ===============================================
ekat <- seq(1L, ninds * c[["rowsFromInd"]], c[["rowsFromInd"]])
c[["rows"]] <- cbind(ekat, ekat + c[["rowsFromInd"]] - 1L)
logml_npops_partitionSummary <- indMixWrapper(c, npops, counts, sumcounts, max_iter, fixedK, verbose) # FIXME: not working for FASTA data
logml_npops_partitionSummary <- indMixWrapper(c, npops, counts, sumcounts, max_iter, fixedK, verbose) # FIXME: not working for FASTA, GenePop
logml <- logml_npops_partitionSummary[["logml"]]
npops <- logml_npops_partitionSummary[["npops"]]
partitionSummary <- logml_npops_partitionSummary[["partitionSummary"]]

View file

@ -1,6 +1,5 @@
#' @title Import data file
#' @description Imports data from several formats (FASTA, VCF, SAM, BAM,
#' Genepop).
#' @description Imports data from several formats (FASTA, VCF, SAM, BAM).
#' @param data raw dataset
#' @param format data format (guesses from extension if not provided)
#' @param verbose if \code{TRUE}, prints extra output information
@ -33,15 +32,6 @@ importFile <- function(data, format, verbose) {
)
} else if (format == "bam") {
out <- Rsamtools::scanBam(data)
} else if (format == "genepop") {
if (toupper(adegenet::.readExt(data)) == "TXT") {
message("Creating a copy of the file with the .gen extension")
dataGen <- gsub("txt", "gen", data)
file.copy(data, dataGen)
out <- adegenet::read.genepop(dataGen)
} else {
out <- adegenet::read.genepop(data)
}
} else {
stop("Format not supported.")
}

24
R/process_GenePop_data.R Normal file
View file

@ -0,0 +1,24 @@
process_GenePop_data <- function(filename) {
# Pre-processing GenePop data
kunnossa <- testaaGenePopData(filename)
data_popnames <- lueGenePopData(filename)
data <- data_popnames$data
popnames <- data_popnames$popnames
# Processing GenePop data
c <- handleData(data, "genepop")
Z_dist <- newGetDistances(c$newData, c$rowsFromInd)
# Forming and returning pre-processed data
list(
"data" = c$newData,
"rowsFromInd" = c$rowsFromInd,
"alleleCodes" = c$alleleCodes,
"noalle" = c$noalle,
"adjprior" = c$adjprior,
"priorTerm" = c$priorTerm,
"dist" = Z_dist$dist,
"popnames" = popnames,
"Z" = Z_dist$Z
)
}