diff --git a/R/addAlleles.R b/R/addAlleles.R index 9854027..451db4e 100644 --- a/R/addAlleles.R +++ b/R/addAlleles.R @@ -11,7 +11,7 @@ addAlleles <- function(data, ind, line, divider) { # line. Jos data on 3 digit formaatissa on divider=1000. # Jos data on 2 digit formaatissa on divider=100. - nloci <- size(data, 2) # added 1 from original code + nloci <- size(data, 2) - 1L if (size(data, 1) < (2 * ind)) { data <- rbind(data, zeros(100, nloci)) # subtracted 1 from original code } @@ -22,8 +22,7 @@ addAlleles <- function(data, ind, line, divider) { k <- k + 1 merkki <- substring(line, k, k) } - line <- substring(line, k + 1) - # clear k; clear merkki; + line <- trimws(substring(line, k + 1)) if (grepl(" ", line)) { alleeliTaulu <- as.numeric(strsplit(line, split = " ")[[1]]) diff --git a/R/greedyMix.R b/R/greedyMix.R index dd0d117..cea602b 100644 --- a/R/greedyMix.R +++ b/R/greedyMix.R @@ -27,6 +27,7 @@ greedyMix <- function( inp = NULL, popnames = NULL, fixedK = FALSE, verbose = FALSE ) { # Importing and handling data ================================================ + # TODO: use format as class and make handling data a generic if (tolower(format) %in% "fasta") { data <- convert_FASTA_to_BAPS(data) format <- "baps" @@ -42,6 +43,17 @@ greedyMix <- function( Z = data[["Z"]], dist = data[["dist"]] ) + } else if (tolower(format) %in% "genepop") { + data <- process_GenePop_data(data) + c <- list( + noalle = data[["noalle"]], + data = data[["data"]], + adjprior = data[["adjprior"]], + priorTerm = data[["priorTerm"]], + rowsFromInd = data[["rowsFromInd"]], + Z = data[["Z"]], + dist = data[["dist"]] + ) } else { data <- importFile(data, format, verbose) data <- handleData(data, tolower(format)) @@ -68,7 +80,7 @@ greedyMix <- function( # Generating partition summary =============================================== ekat <- seq(1L, ninds * c[["rowsFromInd"]], c[["rowsFromInd"]]) c[["rows"]] <- cbind(ekat, ekat + c[["rowsFromInd"]] - 1L) - logml_npops_partitionSummary <- indMixWrapper(c, npops, counts, sumcounts, max_iter, fixedK, verbose) # FIXME: not working for FASTA data + logml_npops_partitionSummary <- indMixWrapper(c, npops, counts, sumcounts, max_iter, fixedK, verbose) # FIXME: not working for FASTA, GenePop logml <- logml_npops_partitionSummary[["logml"]] npops <- logml_npops_partitionSummary[["npops"]] partitionSummary <- logml_npops_partitionSummary[["partitionSummary"]] diff --git a/R/handleData.R b/R/handleData.R index fbdd9c9..d8d44eb 100644 --- a/R/handleData.R +++ b/R/handleData.R @@ -10,7 +10,7 @@ #' the function changes the allele codes so that one locus j #' codes get values between? 1, ..., noalle(j). #' @export -handleData <- function(raw_data, format = "Genepop") { +handleData <- function(raw_data, format = "baps") { # Alkuper?isen datan viimeinen sarake kertoo, milt?yksil?lt? # kyseinen rivi on per?isin. Funktio tutkii ensin, ett?montako # rivi?maksimissaan on per?isin yhdelt?yksil?lt? jolloin saadaan diff --git a/R/importFile.R b/R/importFile.R index 2ed9c75..57f2fee 100644 --- a/R/importFile.R +++ b/R/importFile.R @@ -1,6 +1,5 @@ #' @title Import data file -#' @description Imports data from several formats (FASTA, VCF, SAM, BAM, -#' Genepop). +#' @description Imports data from several formats (FASTA, VCF, SAM, BAM). #' @param data raw dataset #' @param format data format (guesses from extension if not provided) #' @param verbose if \code{TRUE}, prints extra output information @@ -33,15 +32,6 @@ importFile <- function(data, format, verbose) { ) } else if (format == "bam") { out <- Rsamtools::scanBam(data) - } else if (format == "genepop") { - if (toupper(adegenet::.readExt(data)) == "TXT") { - message("Creating a copy of the file with the .gen extension") - dataGen <- gsub("txt", "gen", data) - file.copy(data, dataGen) - out <- adegenet::read.genepop(dataGen) - } else { - out <- adegenet::read.genepop(data) - } } else { stop("Format not supported.") } diff --git a/R/process_GenePop_data.R b/R/process_GenePop_data.R new file mode 100644 index 0000000..e444c47 --- /dev/null +++ b/R/process_GenePop_data.R @@ -0,0 +1,24 @@ +process_GenePop_data <- function(filename) { + # Pre-processing GenePop data + kunnossa <- testaaGenePopData(filename) + data_popnames <- lueGenePopData(filename) + data <- data_popnames$data + popnames <- data_popnames$popnames + + # Processing GenePop data + c <- handleData(data, "genepop") + Z_dist <- newGetDistances(c$newData, c$rowsFromInd) + + # Forming and returning pre-processed data + list( + "data" = c$newData, + "rowsFromInd" = c$rowsFromInd, + "alleleCodes" = c$alleleCodes, + "noalle" = c$noalle, + "adjprior" = c$adjprior, + "priorTerm" = c$priorTerm, + "dist" = Z_dist$dist, + "popnames" = popnames, + "Z" = Z_dist$Z + ) +} diff --git a/inst/extdata/GenePop.gen b/inst/extdata/GenePop.gen new file mode 100644 index 0000000..9e1874b --- /dev/null +++ b/inst/extdata/GenePop.gen @@ -0,0 +1,12 @@ +Example GenePop Data +Loc1 +Loc2 +Loc3 +Pop +Ind1, 0101 0202 0303 +Ind2, 0102 0201 0303 +Ind3, 0101 0202 0303 +Pop +Ind4, 0101 0202 0303 +Ind5, 0102 0201 0303 +Ind6, 0101 0202 0303 diff --git a/inst/extdata/GenePop.txt b/inst/extdata/GenePop.txt new file mode 100644 index 0000000..9e1874b --- /dev/null +++ b/inst/extdata/GenePop.txt @@ -0,0 +1,12 @@ +Example GenePop Data +Loc1 +Loc2 +Loc3 +Pop +Ind1, 0101 0202 0303 +Ind2, 0102 0201 0303 +Ind3, 0101 0202 0303 +Pop +Ind4, 0101 0202 0303 +Ind5, 0102 0201 0303 +Ind6, 0101 0202 0303 diff --git a/man/handleData.Rd b/man/handleData.Rd index 4a20a8c..6dd4c90 100644 --- a/man/handleData.Rd +++ b/man/handleData.Rd @@ -4,7 +4,7 @@ \alias{handleData} \title{Handle Data} \usage{ -handleData(raw_data, format = "Genepop") +handleData(raw_data, format = "baps") } \arguments{ \item{raw_data}{Raw data in Genepop or BAPS format} diff --git a/man/importFile.Rd b/man/importFile.Rd index 9adb584..2ad7e3b 100644 --- a/man/importFile.Rd +++ b/man/importFile.Rd @@ -17,8 +17,7 @@ importFile(data, format, verbose) The data in a format that can be used by the other functions } \description{ -Imports data from several formats (FASTA, VCF, SAM, BAM, -Genepop). +Imports data from several formats (FASTA, VCF, SAM, BAM). } \examples{ path_inst <- system.file("extdata", "", package = "rBAPS") diff --git a/tests/testthat/test-greedyMix.R b/tests/testthat/test-greedyMix.R index 9353b0a..2c80ca3 100644 --- a/tests/testthat/test-greedyMix.R +++ b/tests/testthat/test-greedyMix.R @@ -66,8 +66,10 @@ test_that("greedyMix() fails successfully", { test_that("greedyMix() works when it should", { baps_file <- file.path(path_inst, "BAPS_clustering_diploid.txt") + genepop_file <- file.path(path_inst, "GenePop.txt") fasta_file <- file.path(path_inst, "FASTA_clustering_haploid.fasta") greedy_baps <- greedyMix(baps_file, "BAPS") + expect_error(greedy_genepop <- greedyMix(genepop_file, "GenePop")) # TEMP: fails expect_type(greedy_baps, "list") expect_length(greedy_baps, 10L) })