Merge branch 'issue-24' into develop
* issue-24: Increment version number to 0.0.0.9029 Disabled FASTA tests on greedyMix() (#24) Added synthetic FASTA file (#24) Ensure diffInCounts returns as.matrix (#24) Adapted tests to FASTA on greedyMix() (#24) Removed baps file pointing to fasta Improved conversion from FASTA to BAPS (#24) Improved handling of FASTA data (#24) `process_BAPS_data()` can handle file being an R object Added function to convert from FASTA to BAPS (#24) Unwrapped `greedyMix() example (#24)
This commit is contained in:
commit
010f18dd19
11 changed files with 109 additions and 27 deletions
|
|
@ -1,6 +1,6 @@
|
|||
Package: rBAPS
|
||||
Title: Bayesian Analysis of Population Structure
|
||||
Version: 0.0.0.9028
|
||||
Version: 0.0.0.9029
|
||||
Date: 2020-11-09
|
||||
Authors@R:
|
||||
c(
|
||||
|
|
@ -36,7 +36,7 @@ Description: Partial R implementation of the BAPS software
|
|||
License: GPL-3
|
||||
BugReports: https://github.com/ocbe-uio/rBAPS/issues
|
||||
Encoding: UTF-8
|
||||
RoxygenNote: 7.3.1
|
||||
RoxygenNote: 7.3.2
|
||||
Suggests:
|
||||
testthat (>= 2.1.0)
|
||||
Imports:
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
# Generated by roxygen2: do not edit by hand
|
||||
|
||||
export(convert_FASTA_to_BAPS)
|
||||
export(greedyMix)
|
||||
export(handleData)
|
||||
export(importFile)
|
||||
|
|
|
|||
|
|
@ -13,5 +13,5 @@ computeDiffInCounts <- function(rows, max_noalle, nloci, data) {
|
|||
diffInCounts[element] <- diffInCounts[element] + 1
|
||||
}
|
||||
}
|
||||
return(diffInCounts)
|
||||
return(as.matrix(diffInCounts))
|
||||
}
|
||||
|
|
|
|||
16
R/convert_FASTA_to_BAPS.R
Normal file
16
R/convert_FASTA_to_BAPS.R
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
#' @title Convert from FASTA to BAPS
|
||||
#' @description Converts a file (not an R object) from FASTA to BAPS format
|
||||
#' @param file filename of FASTA file
|
||||
#' @return `data` in BAPS format
|
||||
#' @author Waldir Leoncio
|
||||
#' @export
|
||||
#' @examples
|
||||
#' file <- system.file("extdata", "FASTA_clustering_haploid.fasta", package = "rBAPS")
|
||||
#' convert_FASTA_to_BAPS(file)
|
||||
convert_FASTA_to_BAPS <- function(file) {
|
||||
data <- load_fasta(file) # Processing data
|
||||
data <- cbind(data, seq_len(nrow(data))) # Add IDs of individuals (sequential)
|
||||
data[data == 0] <- -9 # Because zeros (missing) in BAPS are coded as -9
|
||||
colnames(data) <- paste("V", seq_len(ncol(data)), sep = "")
|
||||
return(data)
|
||||
}
|
||||
|
|
@ -19,10 +19,8 @@
|
|||
#' with high-throughput sequencing data. <http://www.htslib.org/>
|
||||
#' @export
|
||||
#' @examples
|
||||
#' \dontrun{ # TEMP: unwrap once #24 is resolved
|
||||
#' data <- system.file("extdata", "BAPS_format_clustering_diploid.txt", package = "rBAPS")
|
||||
#' data <- system.file("extdata", "BAPS_clustering_diploid.txt", package = "rBAPS")
|
||||
#' greedyMix(data, "baps")
|
||||
#' } # TEMP: unwrap once #24 is resolved
|
||||
greedyMix <- function(
|
||||
data, format = gsub("^.*\\.", "", data), partitionCompare = NULL, npops = 3L,
|
||||
counts = NULL, sumcounts = NULL, max_iter = 100L, alleleCodes = NULL,
|
||||
|
|
@ -30,7 +28,8 @@ greedyMix <- function(
|
|||
) {
|
||||
# Importing and handling data ================================================
|
||||
if (tolower(format) %in% "fasta") {
|
||||
stop("FASTA format not yet supported on greedyMix")
|
||||
data <- convert_FASTA_to_BAPS(data)
|
||||
format <- "baps"
|
||||
}
|
||||
if (tolower(format) %in% "baps") {
|
||||
data <- process_BAPS_data(data, NULL)
|
||||
|
|
@ -69,7 +68,7 @@ greedyMix <- function(
|
|||
# Generating partition summary ===============================================
|
||||
ekat <- seq(1L, ninds * c[["rowsFromInd"]], c[["rowsFromInd"]])
|
||||
c[["rows"]] <- cbind(ekat, ekat + c[["rowsFromInd"]] - 1L)
|
||||
logml_npops_partitionSummary <- indMixWrapper(c, npops, counts, sumcounts, max_iter, fixedK, verbose)
|
||||
logml_npops_partitionSummary <- indMixWrapper(c, npops, counts, sumcounts, max_iter, fixedK, verbose) # FIXME: not working for FASTA data
|
||||
logml <- logml_npops_partitionSummary[["logml"]]
|
||||
npops <- logml_npops_partitionSummary[["npops"]]
|
||||
partitionSummary <- logml_npops_partitionSummary[["partitionSummary"]]
|
||||
|
|
|
|||
|
|
@ -30,6 +30,7 @@ handleData <- function(raw_data, format = "Genepop") {
|
|||
"bam" = stop("BAM format not supported for processing yet")
|
||||
)
|
||||
data <- as.matrix(raw_data)
|
||||
|
||||
dataApu <- data[, seq_len(nloci)]
|
||||
nollat <- matlab2r::find(dataApu == 0)
|
||||
if (!isempty(nollat)) {
|
||||
|
|
@ -54,11 +55,16 @@ handleData <- function(raw_data, format = "Genepop") {
|
|||
alleleCodes[, i] <- as.matrix(c(alleelitLokuksessaI, zeros(puuttuvia, 1)))
|
||||
}
|
||||
|
||||
# This is where data gets converted to {1, 2, 3, 4} for {A, C, G, T}
|
||||
codes <- unique(as.vector(data[, -ncol(data)]))
|
||||
skip_conversion <- base::min(codes) == -9 && base::max(codes) == 4
|
||||
if (!skip_conversion) {
|
||||
for (loc in seq_len(nloci)) {
|
||||
for (all in seq_len(noalle[loc])) {
|
||||
data[matlab2r::find(data[, loc] == alleleCodes[all, loc]), loc] <- all
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
nind <- as.integer(base::max(data[, ncol(data)]))
|
||||
nrows <- size(data, 1)
|
||||
|
|
|
|||
|
|
@ -2,14 +2,23 @@ process_BAPS_data <- function(file, partitionCompare) {
|
|||
if (!is.null(partitionCompare)) {
|
||||
cat('Data:', file, '\n')
|
||||
}
|
||||
|
||||
# Importing data
|
||||
if (is.character(file)) {
|
||||
data <- read.table(file)
|
||||
ninds <- testaaOnkoKunnollinenBapsData(data) # for testing purposes?
|
||||
} else {
|
||||
data <- file
|
||||
}
|
||||
|
||||
ninds <- testaaOnkoKunnollinenBapsData(data) # Checks if last column is ID
|
||||
if (ninds == 0) {
|
||||
warning('Incorrect Data-file.')
|
||||
return(NULL)
|
||||
}
|
||||
|
||||
popnames <- NULL # Dropped specification of population names (from BAPS 6)
|
||||
|
||||
# Processing data
|
||||
result <- handleData(data, format = "BAPS")
|
||||
data <- result$newData
|
||||
rowsFromInd <- result$rowsFromInd
|
||||
|
|
|
|||
39
inst/extdata/FASTA_clustering_haploid_ext.fasta
vendored
Normal file
39
inst/extdata/FASTA_clustering_haploid_ext.fasta
vendored
Normal file
|
|
@ -0,0 +1,39 @@
|
|||
>1
|
||||
AACGAAACGATCGCGTCACCGGAACGTTGTCCGTCTCGAATAGCACTGTGGGAACGTGTTTTACATTCGT
|
||||
TAGTAACATGGTCAGCTGCTCATCCGTATT
|
||||
|
||||
>2
|
||||
ATCAGCAAACGAGAAGTTGCAGAGGTCTTTGGTTTGAGCATTGCCCCCATACAATCGACTTCTGGCCTGG
|
||||
AATGCACCACAAACATACCCCACAGGCTCG
|
||||
|
||||
>3
|
||||
GCTTTTACTAAGGCCTATCGGATTCAACGTCACTAAGACTCGGCACTAACAGGCCGTTGTAAGCCGCTCT
|
||||
GTCTGAGTATGGATGGTGGAGGCGGAGCCG
|
||||
|
||||
>4
|
||||
ACCTGGACCTCTGTATTAACGGCTGTGATTCTGAGGGGGGTATCGCAGCGCACTTTCTAGCTATATCACG
|
||||
CAAGGATAAAGTTCACCCATCACGTTGACC
|
||||
|
||||
>5
|
||||
ACAATACGTCATCCACACCGCGCCTATGGAAGAATTTGCCCTTTCGGCGACAGCCCATGCTGTCAAGGAG
|
||||
GTAACATAGCTACCAGGTCCCATTCCAGGA
|
||||
|
||||
>6
|
||||
TCCCCCCAGTGGACACGGCTCGGGTAATGCAGCTTACCTCAACGCTAACGCATTTGACAGTAGTGAATCA
|
||||
CGGGCAACGCTGGGTGATTGCAAGTTTTGT
|
||||
|
||||
>7
|
||||
GCAACCACTGGTCGCCTGGAGCATTGATCAGGAACATGTCTGCAAGGGGGGCCGTTGCGGGTTTCAGTCA
|
||||
TCGTATTGCGCTGCAAATCCTCGGAGCCTC
|
||||
|
||||
>8
|
||||
CACCCGTAAAGCACGAGTAGGTTTCACCGCGACTTATATATTCCACCATACGGTTAACAAGGCAACACTT
|
||||
ATTCGTCGTCCAATGATCGTCCCTCTCCAG
|
||||
|
||||
>9
|
||||
CGAATCCATTCGGGATAAAGTTAATACGTAAGTCGAACGGGGTTTAGGAAGAGCTCTGCTGTTAAGCGCG
|
||||
CTTATCATCTTATATGTGTCAGTTGTGTAC
|
||||
|
||||
>10
|
||||
CGTTCGCATTTATAGGATATCCCCTAAACTAATTGGTAGTGATGGTATACCAGCGGTGCATTGTCCTCGC
|
||||
CTGTAGTTTAAGTCAACCTCTGCCTTAATC
|
||||
24
man/convert_FASTA_to_BAPS.Rd
Normal file
24
man/convert_FASTA_to_BAPS.Rd
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/convert_FASTA_to_BAPS.R
|
||||
\name{convert_FASTA_to_BAPS}
|
||||
\alias{convert_FASTA_to_BAPS}
|
||||
\title{Convert from FASTA to BAPS}
|
||||
\usage{
|
||||
convert_FASTA_to_BAPS(file)
|
||||
}
|
||||
\arguments{
|
||||
\item{file}{filename of FASTA file}
|
||||
}
|
||||
\value{
|
||||
`data` in BAPS format
|
||||
}
|
||||
\description{
|
||||
Converts a file (not an R object) from FASTA to BAPS format
|
||||
}
|
||||
\examples{
|
||||
file <- system.file("extdata", "FASTA_clustering_haploid.fasta", package = "rBAPS")
|
||||
convert_FASTA_to_BAPS(file)
|
||||
}
|
||||
\author{
|
||||
Waldir Leoncio
|
||||
}
|
||||
|
|
@ -48,10 +48,8 @@ greedyMix(
|
|||
Clustering of individuals
|
||||
}
|
||||
\examples{
|
||||
\dontrun{ # TEMP: unwrap once #24 is resolved
|
||||
data <- system.file("extdata", "BAPS_format_clustering_diploid.txt", package = "rBAPS")
|
||||
data <- system.file("extdata", "BAPS_clustering_diploid.txt", package = "rBAPS")
|
||||
greedyMix(data, "baps")
|
||||
} # TEMP: unwrap once #24 is resolved
|
||||
}
|
||||
\references{
|
||||
Samtools: a suite of programs for interacting
|
||||
|
|
|
|||
|
|
@ -46,10 +46,6 @@ raw_bam <- importFile(
|
|||
data = file.path(path_inst, "bam_example.bam"),
|
||||
format = "BAM",
|
||||
)
|
||||
raw_baps <- importFile(
|
||||
data = file.path(path_inst, "FASTA_clustering_haploid.fasta"),
|
||||
format = "FASTA"
|
||||
)
|
||||
|
||||
test_that("Files are imported correctly", {
|
||||
expect_equal(dim(raw_fasta), c(5, 99))
|
||||
|
|
@ -61,13 +57,6 @@ test_that("Files are imported correctly", {
|
|||
)
|
||||
)
|
||||
expect_equal(length(raw_bam[[1]]), 13)
|
||||
expect_error(
|
||||
greedyMix(
|
||||
data = file.path(path_inst, "FASTA_clustering_haploid.fasta"),
|
||||
format = "FASTA"
|
||||
),
|
||||
"FASTA format not yet supported on greedyMix"
|
||||
)
|
||||
})
|
||||
|
||||
test_that("greedyMix() fails successfully", {
|
||||
|
|
@ -77,6 +66,7 @@ test_that("greedyMix() fails successfully", {
|
|||
|
||||
test_that("greedyMix() works when it should", {
|
||||
baps_file <- file.path(path_inst, "BAPS_clustering_diploid.txt")
|
||||
fasta_file <- file.path(path_inst, "FASTA_clustering_haploid.fasta")
|
||||
greedy_baps <- greedyMix(baps_file, "BAPS")
|
||||
expect_type(greedy_baps, "list")
|
||||
expect_length(greedy_baps, 10L)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue