Merge branch 'issue-24' into develop
* issue-24: Updated docs Temp-wrapping disfunctional code Aligned processing of Genepop with MATLAB code Dropped support for GenePop on `importFile()` Added `process_GenePop_data()` Fixed `addAlleles()` handleData defaults to BAPS format Added unit tests for `greedyMix()` on GenePop (#24) Added GenePop data, txt and gen (#24)
This commit is contained in:
commit
1d87be9b83
10 changed files with 69 additions and 19 deletions
|
|
@ -11,7 +11,7 @@ addAlleles <- function(data, ind, line, divider) {
|
|||
# line. Jos data on 3 digit formaatissa on divider=1000.
|
||||
# Jos data on 2 digit formaatissa on divider=100.
|
||||
|
||||
nloci <- size(data, 2) # added 1 from original code
|
||||
nloci <- size(data, 2) - 1L
|
||||
if (size(data, 1) < (2 * ind)) {
|
||||
data <- rbind(data, zeros(100, nloci)) # subtracted 1 from original code
|
||||
}
|
||||
|
|
@ -22,8 +22,7 @@ addAlleles <- function(data, ind, line, divider) {
|
|||
k <- k + 1
|
||||
merkki <- substring(line, k, k)
|
||||
}
|
||||
line <- substring(line, k + 1)
|
||||
# clear k; clear merkki;
|
||||
line <- trimws(substring(line, k + 1))
|
||||
|
||||
if (grepl(" ", line)) {
|
||||
alleeliTaulu <- as.numeric(strsplit(line, split = " ")[[1]])
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@ greedyMix <- function(
|
|||
inp = NULL, popnames = NULL, fixedK = FALSE, verbose = FALSE
|
||||
) {
|
||||
# Importing and handling data ================================================
|
||||
# TODO: use format as class and make handling data a generic
|
||||
if (tolower(format) %in% "fasta") {
|
||||
data <- convert_FASTA_to_BAPS(data)
|
||||
format <- "baps"
|
||||
|
|
@ -42,6 +43,17 @@ greedyMix <- function(
|
|||
Z = data[["Z"]],
|
||||
dist = data[["dist"]]
|
||||
)
|
||||
} else if (tolower(format) %in% "genepop") {
|
||||
data <- process_GenePop_data(data)
|
||||
c <- list(
|
||||
noalle = data[["noalle"]],
|
||||
data = data[["data"]],
|
||||
adjprior = data[["adjprior"]],
|
||||
priorTerm = data[["priorTerm"]],
|
||||
rowsFromInd = data[["rowsFromInd"]],
|
||||
Z = data[["Z"]],
|
||||
dist = data[["dist"]]
|
||||
)
|
||||
} else {
|
||||
data <- importFile(data, format, verbose)
|
||||
data <- handleData(data, tolower(format))
|
||||
|
|
@ -68,7 +80,7 @@ greedyMix <- function(
|
|||
# Generating partition summary ===============================================
|
||||
ekat <- seq(1L, ninds * c[["rowsFromInd"]], c[["rowsFromInd"]])
|
||||
c[["rows"]] <- cbind(ekat, ekat + c[["rowsFromInd"]] - 1L)
|
||||
logml_npops_partitionSummary <- indMixWrapper(c, npops, counts, sumcounts, max_iter, fixedK, verbose) # FIXME: not working for FASTA data
|
||||
logml_npops_partitionSummary <- indMixWrapper(c, npops, counts, sumcounts, max_iter, fixedK, verbose) # FIXME: not working for FASTA, GenePop
|
||||
logml <- logml_npops_partitionSummary[["logml"]]
|
||||
npops <- logml_npops_partitionSummary[["npops"]]
|
||||
partitionSummary <- logml_npops_partitionSummary[["partitionSummary"]]
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@
|
|||
#' the function changes the allele codes so that one locus j
|
||||
#' codes get values between? 1, ..., noalle(j).
|
||||
#' @export
|
||||
handleData <- function(raw_data, format = "Genepop") {
|
||||
handleData <- function(raw_data, format = "baps") {
|
||||
# Alkuper?isen datan viimeinen sarake kertoo, milt?yksil?lt?
|
||||
# kyseinen rivi on per?isin. Funktio tutkii ensin, ett?montako
|
||||
# rivi?maksimissaan on per?isin yhdelt?yksil?lt? jolloin saadaan
|
||||
|
|
|
|||
|
|
@ -1,6 +1,5 @@
|
|||
#' @title Import data file
|
||||
#' @description Imports data from several formats (FASTA, VCF, SAM, BAM,
|
||||
#' Genepop).
|
||||
#' @description Imports data from several formats (FASTA, VCF, SAM, BAM).
|
||||
#' @param data raw dataset
|
||||
#' @param format data format (guesses from extension if not provided)
|
||||
#' @param verbose if \code{TRUE}, prints extra output information
|
||||
|
|
@ -33,15 +32,6 @@ importFile <- function(data, format, verbose) {
|
|||
)
|
||||
} else if (format == "bam") {
|
||||
out <- Rsamtools::scanBam(data)
|
||||
} else if (format == "genepop") {
|
||||
if (toupper(adegenet::.readExt(data)) == "TXT") {
|
||||
message("Creating a copy of the file with the .gen extension")
|
||||
dataGen <- gsub("txt", "gen", data)
|
||||
file.copy(data, dataGen)
|
||||
out <- adegenet::read.genepop(dataGen)
|
||||
} else {
|
||||
out <- adegenet::read.genepop(data)
|
||||
}
|
||||
} else {
|
||||
stop("Format not supported.")
|
||||
}
|
||||
|
|
|
|||
24
R/process_GenePop_data.R
Normal file
24
R/process_GenePop_data.R
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
process_GenePop_data <- function(filename) {
|
||||
# Pre-processing GenePop data
|
||||
kunnossa <- testaaGenePopData(filename)
|
||||
data_popnames <- lueGenePopData(filename)
|
||||
data <- data_popnames$data
|
||||
popnames <- data_popnames$popnames
|
||||
|
||||
# Processing GenePop data
|
||||
c <- handleData(data, "genepop")
|
||||
Z_dist <- newGetDistances(c$newData, c$rowsFromInd)
|
||||
|
||||
# Forming and returning pre-processed data
|
||||
list(
|
||||
"data" = c$newData,
|
||||
"rowsFromInd" = c$rowsFromInd,
|
||||
"alleleCodes" = c$alleleCodes,
|
||||
"noalle" = c$noalle,
|
||||
"adjprior" = c$adjprior,
|
||||
"priorTerm" = c$priorTerm,
|
||||
"dist" = Z_dist$dist,
|
||||
"popnames" = popnames,
|
||||
"Z" = Z_dist$Z
|
||||
)
|
||||
}
|
||||
12
inst/extdata/GenePop.gen
vendored
Normal file
12
inst/extdata/GenePop.gen
vendored
Normal file
|
|
@ -0,0 +1,12 @@
|
|||
Example GenePop Data
|
||||
Loc1
|
||||
Loc2
|
||||
Loc3
|
||||
Pop
|
||||
Ind1, 0101 0202 0303
|
||||
Ind2, 0102 0201 0303
|
||||
Ind3, 0101 0202 0303
|
||||
Pop
|
||||
Ind4, 0101 0202 0303
|
||||
Ind5, 0102 0201 0303
|
||||
Ind6, 0101 0202 0303
|
||||
12
inst/extdata/GenePop.txt
vendored
Normal file
12
inst/extdata/GenePop.txt
vendored
Normal file
|
|
@ -0,0 +1,12 @@
|
|||
Example GenePop Data
|
||||
Loc1
|
||||
Loc2
|
||||
Loc3
|
||||
Pop
|
||||
Ind1, 0101 0202 0303
|
||||
Ind2, 0102 0201 0303
|
||||
Ind3, 0101 0202 0303
|
||||
Pop
|
||||
Ind4, 0101 0202 0303
|
||||
Ind5, 0102 0201 0303
|
||||
Ind6, 0101 0202 0303
|
||||
|
|
@ -4,7 +4,7 @@
|
|||
\alias{handleData}
|
||||
\title{Handle Data}
|
||||
\usage{
|
||||
handleData(raw_data, format = "Genepop")
|
||||
handleData(raw_data, format = "baps")
|
||||
}
|
||||
\arguments{
|
||||
\item{raw_data}{Raw data in Genepop or BAPS format}
|
||||
|
|
|
|||
|
|
@ -17,8 +17,7 @@ importFile(data, format, verbose)
|
|||
The data in a format that can be used by the other functions
|
||||
}
|
||||
\description{
|
||||
Imports data from several formats (FASTA, VCF, SAM, BAM,
|
||||
Genepop).
|
||||
Imports data from several formats (FASTA, VCF, SAM, BAM).
|
||||
}
|
||||
\examples{
|
||||
path_inst <- system.file("extdata", "", package = "rBAPS")
|
||||
|
|
|
|||
|
|
@ -66,8 +66,10 @@ test_that("greedyMix() fails successfully", {
|
|||
|
||||
test_that("greedyMix() works when it should", {
|
||||
baps_file <- file.path(path_inst, "BAPS_clustering_diploid.txt")
|
||||
genepop_file <- file.path(path_inst, "GenePop.txt")
|
||||
fasta_file <- file.path(path_inst, "FASTA_clustering_haploid.fasta")
|
||||
greedy_baps <- greedyMix(baps_file, "BAPS")
|
||||
expect_error(greedy_genepop <- greedyMix(genepop_file, "GenePop")) # TEMP: fails
|
||||
expect_type(greedy_baps, "list")
|
||||
expect_length(greedy_baps, 10L)
|
||||
})
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue