Merge branch 'process-genepop' into issue-24
This commit is contained in:
commit
822521af9b
4 changed files with 40 additions and 15 deletions
|
|
@ -11,7 +11,7 @@ addAlleles <- function(data, ind, line, divider) {
|
||||||
# line. Jos data on 3 digit formaatissa on divider=1000.
|
# line. Jos data on 3 digit formaatissa on divider=1000.
|
||||||
# Jos data on 2 digit formaatissa on divider=100.
|
# Jos data on 2 digit formaatissa on divider=100.
|
||||||
|
|
||||||
nloci <- size(data, 2) # added 1 from original code
|
nloci <- size(data, 2) - 1L
|
||||||
if (size(data, 1) < (2 * ind)) {
|
if (size(data, 1) < (2 * ind)) {
|
||||||
data <- rbind(data, zeros(100, nloci)) # subtracted 1 from original code
|
data <- rbind(data, zeros(100, nloci)) # subtracted 1 from original code
|
||||||
}
|
}
|
||||||
|
|
@ -22,8 +22,7 @@ addAlleles <- function(data, ind, line, divider) {
|
||||||
k <- k + 1
|
k <- k + 1
|
||||||
merkki <- substring(line, k, k)
|
merkki <- substring(line, k, k)
|
||||||
}
|
}
|
||||||
line <- substring(line, k + 1)
|
line <- trimws(substring(line, k + 1))
|
||||||
# clear k; clear merkki;
|
|
||||||
|
|
||||||
if (grepl(" ", line)) {
|
if (grepl(" ", line)) {
|
||||||
alleeliTaulu <- as.numeric(strsplit(line, split = " ")[[1]])
|
alleeliTaulu <- as.numeric(strsplit(line, split = " ")[[1]])
|
||||||
|
|
|
||||||
|
|
@ -27,6 +27,7 @@ greedyMix <- function(
|
||||||
inp = NULL, popnames = NULL, fixedK = FALSE, verbose = FALSE
|
inp = NULL, popnames = NULL, fixedK = FALSE, verbose = FALSE
|
||||||
) {
|
) {
|
||||||
# Importing and handling data ================================================
|
# Importing and handling data ================================================
|
||||||
|
# TODO: use format as class and make handling data a generic
|
||||||
if (tolower(format) %in% "fasta") {
|
if (tolower(format) %in% "fasta") {
|
||||||
data <- convert_FASTA_to_BAPS(data)
|
data <- convert_FASTA_to_BAPS(data)
|
||||||
format <- "baps"
|
format <- "baps"
|
||||||
|
|
@ -42,6 +43,17 @@ greedyMix <- function(
|
||||||
Z = data[["Z"]],
|
Z = data[["Z"]],
|
||||||
dist = data[["dist"]]
|
dist = data[["dist"]]
|
||||||
)
|
)
|
||||||
|
} else if (tolower(format) %in% "genepop") {
|
||||||
|
data <- process_GenePop_data(data)
|
||||||
|
c <- list(
|
||||||
|
noalle = data[["noalle"]],
|
||||||
|
data = data[["data"]],
|
||||||
|
adjprior = data[["adjprior"]],
|
||||||
|
priorTerm = data[["priorTerm"]],
|
||||||
|
rowsFromInd = data[["rowsFromInd"]],
|
||||||
|
Z = data[["Z"]],
|
||||||
|
dist = data[["dist"]]
|
||||||
|
)
|
||||||
} else {
|
} else {
|
||||||
data <- importFile(data, format, verbose)
|
data <- importFile(data, format, verbose)
|
||||||
data <- handleData(data, tolower(format))
|
data <- handleData(data, tolower(format))
|
||||||
|
|
@ -68,7 +80,7 @@ greedyMix <- function(
|
||||||
# Generating partition summary ===============================================
|
# Generating partition summary ===============================================
|
||||||
ekat <- seq(1L, ninds * c[["rowsFromInd"]], c[["rowsFromInd"]])
|
ekat <- seq(1L, ninds * c[["rowsFromInd"]], c[["rowsFromInd"]])
|
||||||
c[["rows"]] <- cbind(ekat, ekat + c[["rowsFromInd"]] - 1L)
|
c[["rows"]] <- cbind(ekat, ekat + c[["rowsFromInd"]] - 1L)
|
||||||
logml_npops_partitionSummary <- indMixWrapper(c, npops, counts, sumcounts, max_iter, fixedK, verbose) # FIXME: not working for FASTA data
|
logml_npops_partitionSummary <- indMixWrapper(c, npops, counts, sumcounts, max_iter, fixedK, verbose) # FIXME: not working for FASTA, GenePop
|
||||||
logml <- logml_npops_partitionSummary[["logml"]]
|
logml <- logml_npops_partitionSummary[["logml"]]
|
||||||
npops <- logml_npops_partitionSummary[["npops"]]
|
npops <- logml_npops_partitionSummary[["npops"]]
|
||||||
partitionSummary <- logml_npops_partitionSummary[["partitionSummary"]]
|
partitionSummary <- logml_npops_partitionSummary[["partitionSummary"]]
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,5 @@
|
||||||
#' @title Import data file
|
#' @title Import data file
|
||||||
#' @description Imports data from several formats (FASTA, VCF, SAM, BAM,
|
#' @description Imports data from several formats (FASTA, VCF, SAM, BAM).
|
||||||
#' Genepop).
|
|
||||||
#' @param data raw dataset
|
#' @param data raw dataset
|
||||||
#' @param format data format (guesses from extension if not provided)
|
#' @param format data format (guesses from extension if not provided)
|
||||||
#' @param verbose if \code{TRUE}, prints extra output information
|
#' @param verbose if \code{TRUE}, prints extra output information
|
||||||
|
|
@ -33,15 +32,6 @@ importFile <- function(data, format, verbose) {
|
||||||
)
|
)
|
||||||
} else if (format == "bam") {
|
} else if (format == "bam") {
|
||||||
out <- Rsamtools::scanBam(data)
|
out <- Rsamtools::scanBam(data)
|
||||||
} else if (format == "genepop") {
|
|
||||||
if (toupper(adegenet::.readExt(data)) == "TXT") {
|
|
||||||
message("Creating a copy of the file with the .gen extension")
|
|
||||||
dataGen <- gsub("txt", "gen", data)
|
|
||||||
file.copy(data, dataGen)
|
|
||||||
out <- adegenet::read.genepop(dataGen)
|
|
||||||
} else {
|
|
||||||
out <- adegenet::read.genepop(data)
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
stop("Format not supported.")
|
stop("Format not supported.")
|
||||||
}
|
}
|
||||||
|
|
|
||||||
24
R/process_GenePop_data.R
Normal file
24
R/process_GenePop_data.R
Normal file
|
|
@ -0,0 +1,24 @@
|
||||||
|
process_GenePop_data <- function(filename) {
|
||||||
|
# Pre-processing GenePop data
|
||||||
|
kunnossa <- testaaGenePopData(filename)
|
||||||
|
data_popnames <- lueGenePopData(filename)
|
||||||
|
data <- data_popnames$data
|
||||||
|
popnames <- data_popnames$popnames
|
||||||
|
|
||||||
|
# Processing GenePop data
|
||||||
|
c <- handleData(data, "genepop")
|
||||||
|
Z_dist <- newGetDistances(c$newData, c$rowsFromInd)
|
||||||
|
|
||||||
|
# Forming and returning pre-processed data
|
||||||
|
list(
|
||||||
|
"data" = c$newData,
|
||||||
|
"rowsFromInd" = c$rowsFromInd,
|
||||||
|
"alleleCodes" = c$alleleCodes,
|
||||||
|
"noalle" = c$noalle,
|
||||||
|
"adjprior" = c$adjprior,
|
||||||
|
"priorTerm" = c$priorTerm,
|
||||||
|
"dist" = Z_dist$dist,
|
||||||
|
"popnames" = popnames,
|
||||||
|
"Z" = Z_dist$Z
|
||||||
|
)
|
||||||
|
}
|
||||||
Loading…
Add table
Reference in a new issue