Merge branch 'process-genepop' into issue-24
This commit is contained in:
commit
822521af9b
4 changed files with 40 additions and 15 deletions
|
|
@ -11,7 +11,7 @@ addAlleles <- function(data, ind, line, divider) {
|
|||
# line. Jos data on 3 digit formaatissa on divider=1000.
|
||||
# Jos data on 2 digit formaatissa on divider=100.
|
||||
|
||||
nloci <- size(data, 2) # added 1 from original code
|
||||
nloci <- size(data, 2) - 1L
|
||||
if (size(data, 1) < (2 * ind)) {
|
||||
data <- rbind(data, zeros(100, nloci)) # subtracted 1 from original code
|
||||
}
|
||||
|
|
@ -22,8 +22,7 @@ addAlleles <- function(data, ind, line, divider) {
|
|||
k <- k + 1
|
||||
merkki <- substring(line, k, k)
|
||||
}
|
||||
line <- substring(line, k + 1)
|
||||
# clear k; clear merkki;
|
||||
line <- trimws(substring(line, k + 1))
|
||||
|
||||
if (grepl(" ", line)) {
|
||||
alleeliTaulu <- as.numeric(strsplit(line, split = " ")[[1]])
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@ greedyMix <- function(
|
|||
inp = NULL, popnames = NULL, fixedK = FALSE, verbose = FALSE
|
||||
) {
|
||||
# Importing and handling data ================================================
|
||||
# TODO: use format as class and make handling data a generic
|
||||
if (tolower(format) %in% "fasta") {
|
||||
data <- convert_FASTA_to_BAPS(data)
|
||||
format <- "baps"
|
||||
|
|
@ -42,6 +43,17 @@ greedyMix <- function(
|
|||
Z = data[["Z"]],
|
||||
dist = data[["dist"]]
|
||||
)
|
||||
} else if (tolower(format) %in% "genepop") {
|
||||
data <- process_GenePop_data(data)
|
||||
c <- list(
|
||||
noalle = data[["noalle"]],
|
||||
data = data[["data"]],
|
||||
adjprior = data[["adjprior"]],
|
||||
priorTerm = data[["priorTerm"]],
|
||||
rowsFromInd = data[["rowsFromInd"]],
|
||||
Z = data[["Z"]],
|
||||
dist = data[["dist"]]
|
||||
)
|
||||
} else {
|
||||
data <- importFile(data, format, verbose)
|
||||
data <- handleData(data, tolower(format))
|
||||
|
|
@ -68,7 +80,7 @@ greedyMix <- function(
|
|||
# Generating partition summary ===============================================
|
||||
ekat <- seq(1L, ninds * c[["rowsFromInd"]], c[["rowsFromInd"]])
|
||||
c[["rows"]] <- cbind(ekat, ekat + c[["rowsFromInd"]] - 1L)
|
||||
logml_npops_partitionSummary <- indMixWrapper(c, npops, counts, sumcounts, max_iter, fixedK, verbose) # FIXME: not working for FASTA data
|
||||
logml_npops_partitionSummary <- indMixWrapper(c, npops, counts, sumcounts, max_iter, fixedK, verbose) # FIXME: not working for FASTA, GenePop
|
||||
logml <- logml_npops_partitionSummary[["logml"]]
|
||||
npops <- logml_npops_partitionSummary[["npops"]]
|
||||
partitionSummary <- logml_npops_partitionSummary[["partitionSummary"]]
|
||||
|
|
|
|||
|
|
@ -1,6 +1,5 @@
|
|||
#' @title Import data file
|
||||
#' @description Imports data from several formats (FASTA, VCF, SAM, BAM,
|
||||
#' Genepop).
|
||||
#' @description Imports data from several formats (FASTA, VCF, SAM, BAM).
|
||||
#' @param data raw dataset
|
||||
#' @param format data format (guesses from extension if not provided)
|
||||
#' @param verbose if \code{TRUE}, prints extra output information
|
||||
|
|
@ -33,15 +32,6 @@ importFile <- function(data, format, verbose) {
|
|||
)
|
||||
} else if (format == "bam") {
|
||||
out <- Rsamtools::scanBam(data)
|
||||
} else if (format == "genepop") {
|
||||
if (toupper(adegenet::.readExt(data)) == "TXT") {
|
||||
message("Creating a copy of the file with the .gen extension")
|
||||
dataGen <- gsub("txt", "gen", data)
|
||||
file.copy(data, dataGen)
|
||||
out <- adegenet::read.genepop(dataGen)
|
||||
} else {
|
||||
out <- adegenet::read.genepop(data)
|
||||
}
|
||||
} else {
|
||||
stop("Format not supported.")
|
||||
}
|
||||
|
|
|
|||
24
R/process_GenePop_data.R
Normal file
24
R/process_GenePop_data.R
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
process_GenePop_data <- function(filename) {
|
||||
# Pre-processing GenePop data
|
||||
kunnossa <- testaaGenePopData(filename)
|
||||
data_popnames <- lueGenePopData(filename)
|
||||
data <- data_popnames$data
|
||||
popnames <- data_popnames$popnames
|
||||
|
||||
# Processing GenePop data
|
||||
c <- handleData(data, "genepop")
|
||||
Z_dist <- newGetDistances(c$newData, c$rowsFromInd)
|
||||
|
||||
# Forming and returning pre-processed data
|
||||
list(
|
||||
"data" = c$newData,
|
||||
"rowsFromInd" = c$rowsFromInd,
|
||||
"alleleCodes" = c$alleleCodes,
|
||||
"noalle" = c$noalle,
|
||||
"adjprior" = c$adjprior,
|
||||
"priorTerm" = c$priorTerm,
|
||||
"dist" = Z_dist$dist,
|
||||
"popnames" = popnames,
|
||||
"Z" = Z_dist$Z
|
||||
)
|
||||
}
|
||||
Loading…
Add table
Reference in a new issue