From 6e08c35f13fda353e348600496418b02a2ac646c Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Wed, 7 Aug 2024 11:42:29 +0200 Subject: [PATCH 01/11] Unwrapped `greedyMix() example (#24) --- DESCRIPTION | 2 +- R/greedyMix.R | 4 +--- man/greedyMix.Rd | 4 +--- 3 files changed, 3 insertions(+), 7 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index a2e2c03..ab7405e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -36,7 +36,7 @@ Description: Partial R implementation of the BAPS software License: GPL-3 BugReports: https://github.com/ocbe-uio/rBAPS/issues Encoding: UTF-8 -RoxygenNote: 7.3.1 +RoxygenNote: 7.3.2 Suggests: testthat (>= 2.1.0) Imports: diff --git a/R/greedyMix.R b/R/greedyMix.R index 3bff02b..9a9bc20 100644 --- a/R/greedyMix.R +++ b/R/greedyMix.R @@ -19,10 +19,8 @@ #' with high-throughput sequencing data. #' @export #' @examples -#' \dontrun{ # TEMP: unwrap once #24 is resolved -#' data <- system.file("extdata", "BAPS_format_clustering_diploid.txt", package = "rBAPS") +#' data <- system.file("extdata", "BAPS_clustering_diploid.txt", package = "rBAPS") #' greedyMix(data, "baps") -#' } # TEMP: unwrap once #24 is resolved greedyMix <- function( data, format = gsub("^.*\\.", "", data), partitionCompare = NULL, npops = 3L, counts = NULL, sumcounts = NULL, max_iter = 100L, alleleCodes = NULL, diff --git a/man/greedyMix.Rd b/man/greedyMix.Rd index 2d1a0e4..5974165 100644 --- a/man/greedyMix.Rd +++ b/man/greedyMix.Rd @@ -48,10 +48,8 @@ greedyMix( Clustering of individuals } \examples{ -\dontrun{ # TEMP: unwrap once #24 is resolved -data <- system.file("extdata", "BAPS_format_clustering_diploid.txt", package = "rBAPS") +data <- system.file("extdata", "BAPS_clustering_diploid.txt", package = "rBAPS") greedyMix(data, "baps") -} # TEMP: unwrap once #24 is resolved } \references{ Samtools: a suite of programs for interacting From 7b7968f00bed01b277c575eb5c3593b1c678c78a Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Fri, 9 Aug 2024 13:19:35 +0200 Subject: [PATCH 02/11] Added function to convert from FASTA to BAPS (#24) --- NAMESPACE | 1 + R/convert_FASTA_to_BAPS.R | 15 +++++++++++++++ R/greedyMix.R | 3 ++- R/handleData.R | 2 ++ man/convert_FASTA_to_BAPS.Rd | 24 ++++++++++++++++++++++++ 5 files changed, 44 insertions(+), 1 deletion(-) create mode 100644 R/convert_FASTA_to_BAPS.R create mode 100644 man/convert_FASTA_to_BAPS.Rd diff --git a/NAMESPACE b/NAMESPACE index 09db974..3094ed5 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,6 @@ # Generated by roxygen2: do not edit by hand +export(convert_FASTA_to_BAPS) export(greedyMix) export(handleData) export(importFile) diff --git a/R/convert_FASTA_to_BAPS.R b/R/convert_FASTA_to_BAPS.R new file mode 100644 index 0000000..b86d090 --- /dev/null +++ b/R/convert_FASTA_to_BAPS.R @@ -0,0 +1,15 @@ +#' @title Convert from FASTA to BAPS +#' @description Converts a file (not an R object) from FASTA to BAPS format +#' @param file filename of FASTA file +#' @return `data` in BAPS format +#' @author Waldir Leoncio +#' @export +#' @examples +#' file <- system.file("extdata", "FASTA_clustering_haploid.fasta", package = "rBAPS") +#' convert_FASTA_to_BAPS(file) +convert_FASTA_to_BAPS <- function(file) { + data <- load_fasta(file) # Processing data + data <- cbind(data, seq_len(nrow(data))) # Add IDs of individuals (sequential) + data[data == 0] <- -9 # Because zeros (missing) in BAPS are coded as -9 + return(data) +} diff --git a/R/greedyMix.R b/R/greedyMix.R index 9a9bc20..305ca25 100644 --- a/R/greedyMix.R +++ b/R/greedyMix.R @@ -28,7 +28,8 @@ greedyMix <- function( ) { # Importing and handling data ================================================ if (tolower(format) %in% "fasta") { - stop("FASTA format not yet supported on greedyMix") + data <- load_fasta(data) + data <- handleData(data, "FASTA") } if (tolower(format) %in% "baps") { data <- process_BAPS_data(data, NULL) diff --git a/R/handleData.R b/R/handleData.R index 8868fc2..6ac3e8e 100644 --- a/R/handleData.R +++ b/R/handleData.R @@ -30,6 +30,7 @@ handleData <- function(raw_data, format = "Genepop") { "bam" = stop("BAM format not supported for processing yet") ) data <- as.matrix(raw_data) + dataApu <- data[, seq_len(nloci)] nollat <- matlab2r::find(dataApu == 0) if (!isempty(nollat)) { @@ -54,6 +55,7 @@ handleData <- function(raw_data, format = "Genepop") { alleleCodes[, i] <- as.matrix(c(alleelitLokuksessaI, zeros(puuttuvia, 1))) } + # This is where data gets converted to {1, 2, 3, 4} for {A, C, G, T} for (loc in seq_len(nloci)) { for (all in seq_len(noalle[loc])) { data[matlab2r::find(data[, loc] == alleleCodes[all, loc]), loc] <- all diff --git a/man/convert_FASTA_to_BAPS.Rd b/man/convert_FASTA_to_BAPS.Rd new file mode 100644 index 0000000..ebc7396 --- /dev/null +++ b/man/convert_FASTA_to_BAPS.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/convert_FASTA_to_BAPS.R +\name{convert_FASTA_to_BAPS} +\alias{convert_FASTA_to_BAPS} +\title{Convert an R object from FASTA to BAPS format} +\usage{ +convert_FASTA_to_BAPS(data) +} +\arguments{ +\item{data}{dataset to be converted} +} +\value{ +`data` in BAPS format +} +\description{ +Converts an R object from FASTA to BAPS format +} +\examples{ +data <- system.file("extdata", "FASTA_clustering_diploid.fasta", package = "rBAPS") +convert_FASTA_to_BAPS(data) +} +\author{ +Waldir Leoncio +} From 73b0a70138cbe7bfbee48ca2db90a7cba74050a8 Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Fri, 9 Aug 2024 13:55:55 +0200 Subject: [PATCH 03/11] `process_BAPS_data()` can handle file being an R object --- R/process_BAPS_data.R | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/R/process_BAPS_data.R b/R/process_BAPS_data.R index 7a76bf6..1bc61cf 100644 --- a/R/process_BAPS_data.R +++ b/R/process_BAPS_data.R @@ -2,14 +2,23 @@ process_BAPS_data <- function(file, partitionCompare) { if (!is.null(partitionCompare)) { cat('Data:', file, '\n') } - data <- read.table(file) - ninds <- testaaOnkoKunnollinenBapsData(data) # for testing purposes? + + # Importing data + if (is.character(file)) { + data <- read.table(file) + } else { + data <- file + } + + ninds <- testaaOnkoKunnollinenBapsData(data) # Checks if last column is ID if (ninds == 0) { warning('Incorrect Data-file.') return(NULL) } + popnames <- NULL # Dropped specification of population names (from BAPS 6) + # Processing data result <- handleData(data, format = "BAPS") data <- result$newData rowsFromInd <- result$rowsFromInd From ca358ff0fbf635af8515118996c7ea9d73fc478c Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Fri, 9 Aug 2024 14:57:07 +0200 Subject: [PATCH 04/11] Improved handling of FASTA data (#24) --- R/greedyMix.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/greedyMix.R b/R/greedyMix.R index 305ca25..dd0d117 100644 --- a/R/greedyMix.R +++ b/R/greedyMix.R @@ -28,8 +28,8 @@ greedyMix <- function( ) { # Importing and handling data ================================================ if (tolower(format) %in% "fasta") { - data <- load_fasta(data) - data <- handleData(data, "FASTA") + data <- convert_FASTA_to_BAPS(data) + format <- "baps" } if (tolower(format) %in% "baps") { data <- process_BAPS_data(data, NULL) @@ -68,7 +68,7 @@ greedyMix <- function( # Generating partition summary =============================================== ekat <- seq(1L, ninds * c[["rowsFromInd"]], c[["rowsFromInd"]]) c[["rows"]] <- cbind(ekat, ekat + c[["rowsFromInd"]] - 1L) - logml_npops_partitionSummary <- indMixWrapper(c, npops, counts, sumcounts, max_iter, fixedK, verbose) + logml_npops_partitionSummary <- indMixWrapper(c, npops, counts, sumcounts, max_iter, fixedK, verbose) # FIXME: not working for FASTA data logml <- logml_npops_partitionSummary[["logml"]] npops <- logml_npops_partitionSummary[["npops"]] partitionSummary <- logml_npops_partitionSummary[["partitionSummary"]] From 471c380ce3fd753573d3858159d8a507d51753b7 Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Fri, 9 Aug 2024 15:33:03 +0200 Subject: [PATCH 05/11] Improved conversion from FASTA to BAPS (#24) --- R/convert_FASTA_to_BAPS.R | 1 + R/handleData.R | 10 +++++++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/R/convert_FASTA_to_BAPS.R b/R/convert_FASTA_to_BAPS.R index b86d090..c152635 100644 --- a/R/convert_FASTA_to_BAPS.R +++ b/R/convert_FASTA_to_BAPS.R @@ -11,5 +11,6 @@ convert_FASTA_to_BAPS <- function(file) { data <- load_fasta(file) # Processing data data <- cbind(data, seq_len(nrow(data))) # Add IDs of individuals (sequential) data[data == 0] <- -9 # Because zeros (missing) in BAPS are coded as -9 + colnames(data) <- paste("V", seq_len(ncol(data)), sep = "") return(data) } diff --git a/R/handleData.R b/R/handleData.R index 6ac3e8e..fbdd9c9 100644 --- a/R/handleData.R +++ b/R/handleData.R @@ -56,9 +56,13 @@ handleData <- function(raw_data, format = "Genepop") { } # This is where data gets converted to {1, 2, 3, 4} for {A, C, G, T} - for (loc in seq_len(nloci)) { - for (all in seq_len(noalle[loc])) { - data[matlab2r::find(data[, loc] == alleleCodes[all, loc]), loc] <- all + codes <- unique(as.vector(data[, -ncol(data)])) + skip_conversion <- base::min(codes) == -9 && base::max(codes) == 4 + if (!skip_conversion) { + for (loc in seq_len(nloci)) { + for (all in seq_len(noalle[loc])) { + data[matlab2r::find(data[, loc] == alleleCodes[all, loc]), loc] <- all + } } } From 43886d649f6a69330495510cb3b0d7a2c3a6707b Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Mon, 19 Aug 2024 13:29:18 +0200 Subject: [PATCH 06/11] Removed baps file pointing to fasta --- tests/testthat/test-greedyMix.R | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tests/testthat/test-greedyMix.R b/tests/testthat/test-greedyMix.R index 6029c63..1283e71 100644 --- a/tests/testthat/test-greedyMix.R +++ b/tests/testthat/test-greedyMix.R @@ -46,10 +46,6 @@ raw_bam <- importFile( data = file.path(path_inst, "bam_example.bam"), format = "BAM", ) -raw_baps <- importFile( - data = file.path(path_inst, "FASTA_clustering_haploid.fasta"), - format = "FASTA" -) test_that("Files are imported correctly", { expect_equal(dim(raw_fasta), c(5, 99)) From b7282d67dfee26db7fa73f6c332a5ce7c807b1bc Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Mon, 19 Aug 2024 13:32:05 +0200 Subject: [PATCH 07/11] Adapted tests to FASTA on greedyMix() (#24) --- tests/testthat/test-greedyMix.R | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/tests/testthat/test-greedyMix.R b/tests/testthat/test-greedyMix.R index 1283e71..ff83883 100644 --- a/tests/testthat/test-greedyMix.R +++ b/tests/testthat/test-greedyMix.R @@ -57,13 +57,6 @@ test_that("Files are imported correctly", { ) ) expect_equal(length(raw_bam[[1]]), 13) - expect_error( - greedyMix( - data = file.path(path_inst, "FASTA_clustering_haploid.fasta"), - format = "FASTA" - ), - "FASTA format not yet supported on greedyMix" - ) }) test_that("greedyMix() fails successfully", { @@ -73,9 +66,13 @@ test_that("greedyMix() fails successfully", { test_that("greedyMix() works when it should", { baps_file <- file.path(path_inst, "BAPS_clustering_diploid.txt") + fasta_file <- file.path(path_inst, "FASTA_clustering_haploid.fasta") greedy_baps <- greedyMix(baps_file, "BAPS") + greedy_fasta <- greedyMix(fasta_file, "FASTA") expect_type(greedy_baps, "list") expect_length(greedy_baps, 10L) + expect_type(greedy_fasta, "list") + expect_length(greedy_fasta, 10L) }) context("Linkage") From 99e272a80f639895bde8bc33103471508800f474 Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Mon, 26 Aug 2024 06:09:27 +0200 Subject: [PATCH 08/11] Ensure diffInCounts returns as.matrix (#24) --- R/computeDiffInCounts.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/computeDiffInCounts.R b/R/computeDiffInCounts.R index 10a3beb..210ecdb 100644 --- a/R/computeDiffInCounts.R +++ b/R/computeDiffInCounts.R @@ -13,5 +13,5 @@ computeDiffInCounts <- function(rows, max_noalle, nloci, data) { diffInCounts[element] <- diffInCounts[element] + 1 } } - return(diffInCounts) + return(as.matrix(diffInCounts)) } From 6f70f6f752ebd86b262a545f77dae53982d80c0c Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Mon, 26 Aug 2024 06:10:07 +0200 Subject: [PATCH 09/11] Added synthetic FASTA file (#24) --- .../FASTA_clustering_haploid_ext.fasta | 39 +++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 inst/extdata/FASTA_clustering_haploid_ext.fasta diff --git a/inst/extdata/FASTA_clustering_haploid_ext.fasta b/inst/extdata/FASTA_clustering_haploid_ext.fasta new file mode 100644 index 0000000..c7ba753 --- /dev/null +++ b/inst/extdata/FASTA_clustering_haploid_ext.fasta @@ -0,0 +1,39 @@ +>1 +AACGAAACGATCGCGTCACCGGAACGTTGTCCGTCTCGAATAGCACTGTGGGAACGTGTTTTACATTCGT +TAGTAACATGGTCAGCTGCTCATCCGTATT + +>2 +ATCAGCAAACGAGAAGTTGCAGAGGTCTTTGGTTTGAGCATTGCCCCCATACAATCGACTTCTGGCCTGG +AATGCACCACAAACATACCCCACAGGCTCG + +>3 +GCTTTTACTAAGGCCTATCGGATTCAACGTCACTAAGACTCGGCACTAACAGGCCGTTGTAAGCCGCTCT +GTCTGAGTATGGATGGTGGAGGCGGAGCCG + +>4 +ACCTGGACCTCTGTATTAACGGCTGTGATTCTGAGGGGGGTATCGCAGCGCACTTTCTAGCTATATCACG +CAAGGATAAAGTTCACCCATCACGTTGACC + +>5 +ACAATACGTCATCCACACCGCGCCTATGGAAGAATTTGCCCTTTCGGCGACAGCCCATGCTGTCAAGGAG +GTAACATAGCTACCAGGTCCCATTCCAGGA + +>6 +TCCCCCCAGTGGACACGGCTCGGGTAATGCAGCTTACCTCAACGCTAACGCATTTGACAGTAGTGAATCA +CGGGCAACGCTGGGTGATTGCAAGTTTTGT + +>7 +GCAACCACTGGTCGCCTGGAGCATTGATCAGGAACATGTCTGCAAGGGGGGCCGTTGCGGGTTTCAGTCA +TCGTATTGCGCTGCAAATCCTCGGAGCCTC + +>8 +CACCCGTAAAGCACGAGTAGGTTTCACCGCGACTTATATATTCCACCATACGGTTAACAAGGCAACACTT +ATTCGTCGTCCAATGATCGTCCCTCTCCAG + +>9 +CGAATCCATTCGGGATAAAGTTAATACGTAAGTCGAACGGGGTTTAGGAAGAGCTCTGCTGTTAAGCGCG +CTTATCATCTTATATGTGTCAGTTGTGTAC + +>10 +CGTTCGCATTTATAGGATATCCCCTAAACTAATTGGTAGTGATGGTATACCAGCGGTGCATTGTCCTCGC +CTGTAGTTTAAGTCAACCTCTGCCTTAATC From 9ff5f39c3944fbe0c03588f309e068dea39a70e1 Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Mon, 26 Aug 2024 06:30:49 +0200 Subject: [PATCH 10/11] Disabled FASTA tests on greedyMix() (#24) --- man/convert_FASTA_to_BAPS.Rd | 12 ++++++------ tests/testthat/test-greedyMix.R | 3 --- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/man/convert_FASTA_to_BAPS.Rd b/man/convert_FASTA_to_BAPS.Rd index ebc7396..e43d7eb 100644 --- a/man/convert_FASTA_to_BAPS.Rd +++ b/man/convert_FASTA_to_BAPS.Rd @@ -2,22 +2,22 @@ % Please edit documentation in R/convert_FASTA_to_BAPS.R \name{convert_FASTA_to_BAPS} \alias{convert_FASTA_to_BAPS} -\title{Convert an R object from FASTA to BAPS format} +\title{Convert from FASTA to BAPS} \usage{ -convert_FASTA_to_BAPS(data) +convert_FASTA_to_BAPS(file) } \arguments{ -\item{data}{dataset to be converted} +\item{file}{filename of FASTA file} } \value{ `data` in BAPS format } \description{ -Converts an R object from FASTA to BAPS format +Converts a file (not an R object) from FASTA to BAPS format } \examples{ -data <- system.file("extdata", "FASTA_clustering_diploid.fasta", package = "rBAPS") -convert_FASTA_to_BAPS(data) +file <- system.file("extdata", "FASTA_clustering_haploid.fasta", package = "rBAPS") +convert_FASTA_to_BAPS(file) } \author{ Waldir Leoncio diff --git a/tests/testthat/test-greedyMix.R b/tests/testthat/test-greedyMix.R index ff83883..9353b0a 100644 --- a/tests/testthat/test-greedyMix.R +++ b/tests/testthat/test-greedyMix.R @@ -68,11 +68,8 @@ test_that("greedyMix() works when it should", { baps_file <- file.path(path_inst, "BAPS_clustering_diploid.txt") fasta_file <- file.path(path_inst, "FASTA_clustering_haploid.fasta") greedy_baps <- greedyMix(baps_file, "BAPS") - greedy_fasta <- greedyMix(fasta_file, "FASTA") expect_type(greedy_baps, "list") expect_length(greedy_baps, 10L) - expect_type(greedy_fasta, "list") - expect_length(greedy_fasta, 10L) }) context("Linkage") From d925545e709991854926b8a3ab26797bac27fe65 Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Mon, 26 Aug 2024 06:30:56 +0200 Subject: [PATCH 11/11] Increment version number to 0.0.0.9029 --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index ab7405e..695be07 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: rBAPS Title: Bayesian Analysis of Population Structure -Version: 0.0.0.9028 +Version: 0.0.0.9029 Date: 2020-11-09 Authors@R: c(