Merge branch 'issue-24' into develop
* issue-24: Increment version number to 0.0.0.9029 Disabled FASTA tests on greedyMix() (#24) Added synthetic FASTA file (#24) Ensure diffInCounts returns as.matrix (#24) Adapted tests to FASTA on greedyMix() (#24) Removed baps file pointing to fasta Improved conversion from FASTA to BAPS (#24) Improved handling of FASTA data (#24) `process_BAPS_data()` can handle file being an R object Added function to convert from FASTA to BAPS (#24) Unwrapped `greedyMix() example (#24)
This commit is contained in:
commit
010f18dd19
11 changed files with 109 additions and 27 deletions
|
|
@ -1,6 +1,6 @@
|
||||||
Package: rBAPS
|
Package: rBAPS
|
||||||
Title: Bayesian Analysis of Population Structure
|
Title: Bayesian Analysis of Population Structure
|
||||||
Version: 0.0.0.9028
|
Version: 0.0.0.9029
|
||||||
Date: 2020-11-09
|
Date: 2020-11-09
|
||||||
Authors@R:
|
Authors@R:
|
||||||
c(
|
c(
|
||||||
|
|
@ -36,7 +36,7 @@ Description: Partial R implementation of the BAPS software
|
||||||
License: GPL-3
|
License: GPL-3
|
||||||
BugReports: https://github.com/ocbe-uio/rBAPS/issues
|
BugReports: https://github.com/ocbe-uio/rBAPS/issues
|
||||||
Encoding: UTF-8
|
Encoding: UTF-8
|
||||||
RoxygenNote: 7.3.1
|
RoxygenNote: 7.3.2
|
||||||
Suggests:
|
Suggests:
|
||||||
testthat (>= 2.1.0)
|
testthat (>= 2.1.0)
|
||||||
Imports:
|
Imports:
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,6 @@
|
||||||
# Generated by roxygen2: do not edit by hand
|
# Generated by roxygen2: do not edit by hand
|
||||||
|
|
||||||
|
export(convert_FASTA_to_BAPS)
|
||||||
export(greedyMix)
|
export(greedyMix)
|
||||||
export(handleData)
|
export(handleData)
|
||||||
export(importFile)
|
export(importFile)
|
||||||
|
|
|
||||||
|
|
@ -13,5 +13,5 @@ computeDiffInCounts <- function(rows, max_noalle, nloci, data) {
|
||||||
diffInCounts[element] <- diffInCounts[element] + 1
|
diffInCounts[element] <- diffInCounts[element] + 1
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return(diffInCounts)
|
return(as.matrix(diffInCounts))
|
||||||
}
|
}
|
||||||
|
|
|
||||||
16
R/convert_FASTA_to_BAPS.R
Normal file
16
R/convert_FASTA_to_BAPS.R
Normal file
|
|
@ -0,0 +1,16 @@
|
||||||
|
#' @title Convert from FASTA to BAPS
|
||||||
|
#' @description Converts a file (not an R object) from FASTA to BAPS format
|
||||||
|
#' @param file filename of FASTA file
|
||||||
|
#' @return `data` in BAPS format
|
||||||
|
#' @author Waldir Leoncio
|
||||||
|
#' @export
|
||||||
|
#' @examples
|
||||||
|
#' file <- system.file("extdata", "FASTA_clustering_haploid.fasta", package = "rBAPS")
|
||||||
|
#' convert_FASTA_to_BAPS(file)
|
||||||
|
convert_FASTA_to_BAPS <- function(file) {
|
||||||
|
data <- load_fasta(file) # Processing data
|
||||||
|
data <- cbind(data, seq_len(nrow(data))) # Add IDs of individuals (sequential)
|
||||||
|
data[data == 0] <- -9 # Because zeros (missing) in BAPS are coded as -9
|
||||||
|
colnames(data) <- paste("V", seq_len(ncol(data)), sep = "")
|
||||||
|
return(data)
|
||||||
|
}
|
||||||
|
|
@ -19,10 +19,8 @@
|
||||||
#' with high-throughput sequencing data. <http://www.htslib.org/>
|
#' with high-throughput sequencing data. <http://www.htslib.org/>
|
||||||
#' @export
|
#' @export
|
||||||
#' @examples
|
#' @examples
|
||||||
#' \dontrun{ # TEMP: unwrap once #24 is resolved
|
#' data <- system.file("extdata", "BAPS_clustering_diploid.txt", package = "rBAPS")
|
||||||
#' data <- system.file("extdata", "BAPS_format_clustering_diploid.txt", package = "rBAPS")
|
|
||||||
#' greedyMix(data, "baps")
|
#' greedyMix(data, "baps")
|
||||||
#' } # TEMP: unwrap once #24 is resolved
|
|
||||||
greedyMix <- function(
|
greedyMix <- function(
|
||||||
data, format = gsub("^.*\\.", "", data), partitionCompare = NULL, npops = 3L,
|
data, format = gsub("^.*\\.", "", data), partitionCompare = NULL, npops = 3L,
|
||||||
counts = NULL, sumcounts = NULL, max_iter = 100L, alleleCodes = NULL,
|
counts = NULL, sumcounts = NULL, max_iter = 100L, alleleCodes = NULL,
|
||||||
|
|
@ -30,7 +28,8 @@ greedyMix <- function(
|
||||||
) {
|
) {
|
||||||
# Importing and handling data ================================================
|
# Importing and handling data ================================================
|
||||||
if (tolower(format) %in% "fasta") {
|
if (tolower(format) %in% "fasta") {
|
||||||
stop("FASTA format not yet supported on greedyMix")
|
data <- convert_FASTA_to_BAPS(data)
|
||||||
|
format <- "baps"
|
||||||
}
|
}
|
||||||
if (tolower(format) %in% "baps") {
|
if (tolower(format) %in% "baps") {
|
||||||
data <- process_BAPS_data(data, NULL)
|
data <- process_BAPS_data(data, NULL)
|
||||||
|
|
@ -69,7 +68,7 @@ greedyMix <- function(
|
||||||
# Generating partition summary ===============================================
|
# Generating partition summary ===============================================
|
||||||
ekat <- seq(1L, ninds * c[["rowsFromInd"]], c[["rowsFromInd"]])
|
ekat <- seq(1L, ninds * c[["rowsFromInd"]], c[["rowsFromInd"]])
|
||||||
c[["rows"]] <- cbind(ekat, ekat + c[["rowsFromInd"]] - 1L)
|
c[["rows"]] <- cbind(ekat, ekat + c[["rowsFromInd"]] - 1L)
|
||||||
logml_npops_partitionSummary <- indMixWrapper(c, npops, counts, sumcounts, max_iter, fixedK, verbose)
|
logml_npops_partitionSummary <- indMixWrapper(c, npops, counts, sumcounts, max_iter, fixedK, verbose) # FIXME: not working for FASTA data
|
||||||
logml <- logml_npops_partitionSummary[["logml"]]
|
logml <- logml_npops_partitionSummary[["logml"]]
|
||||||
npops <- logml_npops_partitionSummary[["npops"]]
|
npops <- logml_npops_partitionSummary[["npops"]]
|
||||||
partitionSummary <- logml_npops_partitionSummary[["partitionSummary"]]
|
partitionSummary <- logml_npops_partitionSummary[["partitionSummary"]]
|
||||||
|
|
|
||||||
|
|
@ -30,6 +30,7 @@ handleData <- function(raw_data, format = "Genepop") {
|
||||||
"bam" = stop("BAM format not supported for processing yet")
|
"bam" = stop("BAM format not supported for processing yet")
|
||||||
)
|
)
|
||||||
data <- as.matrix(raw_data)
|
data <- as.matrix(raw_data)
|
||||||
|
|
||||||
dataApu <- data[, seq_len(nloci)]
|
dataApu <- data[, seq_len(nloci)]
|
||||||
nollat <- matlab2r::find(dataApu == 0)
|
nollat <- matlab2r::find(dataApu == 0)
|
||||||
if (!isempty(nollat)) {
|
if (!isempty(nollat)) {
|
||||||
|
|
@ -54,9 +55,14 @@ handleData <- function(raw_data, format = "Genepop") {
|
||||||
alleleCodes[, i] <- as.matrix(c(alleelitLokuksessaI, zeros(puuttuvia, 1)))
|
alleleCodes[, i] <- as.matrix(c(alleelitLokuksessaI, zeros(puuttuvia, 1)))
|
||||||
}
|
}
|
||||||
|
|
||||||
for (loc in seq_len(nloci)) {
|
# This is where data gets converted to {1, 2, 3, 4} for {A, C, G, T}
|
||||||
for (all in seq_len(noalle[loc])) {
|
codes <- unique(as.vector(data[, -ncol(data)]))
|
||||||
data[matlab2r::find(data[, loc] == alleleCodes[all, loc]), loc] <- all
|
skip_conversion <- base::min(codes) == -9 && base::max(codes) == 4
|
||||||
|
if (!skip_conversion) {
|
||||||
|
for (loc in seq_len(nloci)) {
|
||||||
|
for (all in seq_len(noalle[loc])) {
|
||||||
|
data[matlab2r::find(data[, loc] == alleleCodes[all, loc]), loc] <- all
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -2,14 +2,23 @@ process_BAPS_data <- function(file, partitionCompare) {
|
||||||
if (!is.null(partitionCompare)) {
|
if (!is.null(partitionCompare)) {
|
||||||
cat('Data:', file, '\n')
|
cat('Data:', file, '\n')
|
||||||
}
|
}
|
||||||
data <- read.table(file)
|
|
||||||
ninds <- testaaOnkoKunnollinenBapsData(data) # for testing purposes?
|
# Importing data
|
||||||
|
if (is.character(file)) {
|
||||||
|
data <- read.table(file)
|
||||||
|
} else {
|
||||||
|
data <- file
|
||||||
|
}
|
||||||
|
|
||||||
|
ninds <- testaaOnkoKunnollinenBapsData(data) # Checks if last column is ID
|
||||||
if (ninds == 0) {
|
if (ninds == 0) {
|
||||||
warning('Incorrect Data-file.')
|
warning('Incorrect Data-file.')
|
||||||
return(NULL)
|
return(NULL)
|
||||||
}
|
}
|
||||||
|
|
||||||
popnames <- NULL # Dropped specification of population names (from BAPS 6)
|
popnames <- NULL # Dropped specification of population names (from BAPS 6)
|
||||||
|
|
||||||
|
# Processing data
|
||||||
result <- handleData(data, format = "BAPS")
|
result <- handleData(data, format = "BAPS")
|
||||||
data <- result$newData
|
data <- result$newData
|
||||||
rowsFromInd <- result$rowsFromInd
|
rowsFromInd <- result$rowsFromInd
|
||||||
|
|
|
||||||
39
inst/extdata/FASTA_clustering_haploid_ext.fasta
vendored
Normal file
39
inst/extdata/FASTA_clustering_haploid_ext.fasta
vendored
Normal file
|
|
@ -0,0 +1,39 @@
|
||||||
|
>1
|
||||||
|
AACGAAACGATCGCGTCACCGGAACGTTGTCCGTCTCGAATAGCACTGTGGGAACGTGTTTTACATTCGT
|
||||||
|
TAGTAACATGGTCAGCTGCTCATCCGTATT
|
||||||
|
|
||||||
|
>2
|
||||||
|
ATCAGCAAACGAGAAGTTGCAGAGGTCTTTGGTTTGAGCATTGCCCCCATACAATCGACTTCTGGCCTGG
|
||||||
|
AATGCACCACAAACATACCCCACAGGCTCG
|
||||||
|
|
||||||
|
>3
|
||||||
|
GCTTTTACTAAGGCCTATCGGATTCAACGTCACTAAGACTCGGCACTAACAGGCCGTTGTAAGCCGCTCT
|
||||||
|
GTCTGAGTATGGATGGTGGAGGCGGAGCCG
|
||||||
|
|
||||||
|
>4
|
||||||
|
ACCTGGACCTCTGTATTAACGGCTGTGATTCTGAGGGGGGTATCGCAGCGCACTTTCTAGCTATATCACG
|
||||||
|
CAAGGATAAAGTTCACCCATCACGTTGACC
|
||||||
|
|
||||||
|
>5
|
||||||
|
ACAATACGTCATCCACACCGCGCCTATGGAAGAATTTGCCCTTTCGGCGACAGCCCATGCTGTCAAGGAG
|
||||||
|
GTAACATAGCTACCAGGTCCCATTCCAGGA
|
||||||
|
|
||||||
|
>6
|
||||||
|
TCCCCCCAGTGGACACGGCTCGGGTAATGCAGCTTACCTCAACGCTAACGCATTTGACAGTAGTGAATCA
|
||||||
|
CGGGCAACGCTGGGTGATTGCAAGTTTTGT
|
||||||
|
|
||||||
|
>7
|
||||||
|
GCAACCACTGGTCGCCTGGAGCATTGATCAGGAACATGTCTGCAAGGGGGGCCGTTGCGGGTTTCAGTCA
|
||||||
|
TCGTATTGCGCTGCAAATCCTCGGAGCCTC
|
||||||
|
|
||||||
|
>8
|
||||||
|
CACCCGTAAAGCACGAGTAGGTTTCACCGCGACTTATATATTCCACCATACGGTTAACAAGGCAACACTT
|
||||||
|
ATTCGTCGTCCAATGATCGTCCCTCTCCAG
|
||||||
|
|
||||||
|
>9
|
||||||
|
CGAATCCATTCGGGATAAAGTTAATACGTAAGTCGAACGGGGTTTAGGAAGAGCTCTGCTGTTAAGCGCG
|
||||||
|
CTTATCATCTTATATGTGTCAGTTGTGTAC
|
||||||
|
|
||||||
|
>10
|
||||||
|
CGTTCGCATTTATAGGATATCCCCTAAACTAATTGGTAGTGATGGTATACCAGCGGTGCATTGTCCTCGC
|
||||||
|
CTGTAGTTTAAGTCAACCTCTGCCTTAATC
|
||||||
24
man/convert_FASTA_to_BAPS.Rd
Normal file
24
man/convert_FASTA_to_BAPS.Rd
Normal file
|
|
@ -0,0 +1,24 @@
|
||||||
|
% Generated by roxygen2: do not edit by hand
|
||||||
|
% Please edit documentation in R/convert_FASTA_to_BAPS.R
|
||||||
|
\name{convert_FASTA_to_BAPS}
|
||||||
|
\alias{convert_FASTA_to_BAPS}
|
||||||
|
\title{Convert from FASTA to BAPS}
|
||||||
|
\usage{
|
||||||
|
convert_FASTA_to_BAPS(file)
|
||||||
|
}
|
||||||
|
\arguments{
|
||||||
|
\item{file}{filename of FASTA file}
|
||||||
|
}
|
||||||
|
\value{
|
||||||
|
`data` in BAPS format
|
||||||
|
}
|
||||||
|
\description{
|
||||||
|
Converts a file (not an R object) from FASTA to BAPS format
|
||||||
|
}
|
||||||
|
\examples{
|
||||||
|
file <- system.file("extdata", "FASTA_clustering_haploid.fasta", package = "rBAPS")
|
||||||
|
convert_FASTA_to_BAPS(file)
|
||||||
|
}
|
||||||
|
\author{
|
||||||
|
Waldir Leoncio
|
||||||
|
}
|
||||||
|
|
@ -48,10 +48,8 @@ greedyMix(
|
||||||
Clustering of individuals
|
Clustering of individuals
|
||||||
}
|
}
|
||||||
\examples{
|
\examples{
|
||||||
\dontrun{ # TEMP: unwrap once #24 is resolved
|
data <- system.file("extdata", "BAPS_clustering_diploid.txt", package = "rBAPS")
|
||||||
data <- system.file("extdata", "BAPS_format_clustering_diploid.txt", package = "rBAPS")
|
|
||||||
greedyMix(data, "baps")
|
greedyMix(data, "baps")
|
||||||
} # TEMP: unwrap once #24 is resolved
|
|
||||||
}
|
}
|
||||||
\references{
|
\references{
|
||||||
Samtools: a suite of programs for interacting
|
Samtools: a suite of programs for interacting
|
||||||
|
|
|
||||||
|
|
@ -46,10 +46,6 @@ raw_bam <- importFile(
|
||||||
data = file.path(path_inst, "bam_example.bam"),
|
data = file.path(path_inst, "bam_example.bam"),
|
||||||
format = "BAM",
|
format = "BAM",
|
||||||
)
|
)
|
||||||
raw_baps <- importFile(
|
|
||||||
data = file.path(path_inst, "FASTA_clustering_haploid.fasta"),
|
|
||||||
format = "FASTA"
|
|
||||||
)
|
|
||||||
|
|
||||||
test_that("Files are imported correctly", {
|
test_that("Files are imported correctly", {
|
||||||
expect_equal(dim(raw_fasta), c(5, 99))
|
expect_equal(dim(raw_fasta), c(5, 99))
|
||||||
|
|
@ -61,13 +57,6 @@ test_that("Files are imported correctly", {
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
expect_equal(length(raw_bam[[1]]), 13)
|
expect_equal(length(raw_bam[[1]]), 13)
|
||||||
expect_error(
|
|
||||||
greedyMix(
|
|
||||||
data = file.path(path_inst, "FASTA_clustering_haploid.fasta"),
|
|
||||||
format = "FASTA"
|
|
||||||
),
|
|
||||||
"FASTA format not yet supported on greedyMix"
|
|
||||||
)
|
|
||||||
})
|
})
|
||||||
|
|
||||||
test_that("greedyMix() fails successfully", {
|
test_that("greedyMix() fails successfully", {
|
||||||
|
|
@ -77,6 +66,7 @@ test_that("greedyMix() fails successfully", {
|
||||||
|
|
||||||
test_that("greedyMix() works when it should", {
|
test_that("greedyMix() works when it should", {
|
||||||
baps_file <- file.path(path_inst, "BAPS_clustering_diploid.txt")
|
baps_file <- file.path(path_inst, "BAPS_clustering_diploid.txt")
|
||||||
|
fasta_file <- file.path(path_inst, "FASTA_clustering_haploid.fasta")
|
||||||
greedy_baps <- greedyMix(baps_file, "BAPS")
|
greedy_baps <- greedyMix(baps_file, "BAPS")
|
||||||
expect_type(greedy_baps, "list")
|
expect_type(greedy_baps, "list")
|
||||||
expect_length(greedy_baps, 10L)
|
expect_length(greedy_baps, 10L)
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue