From 12edd330b9c3125d6a927d66a967866baa75bd2d Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Fri, 3 Sep 2021 09:09:09 +0200 Subject: [PATCH 1/2] Improved load_fasta validation --- R/load_fasta.R | 9 ++++++--- man/load_fasta.Rd | 2 +- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/R/load_fasta.R b/R/load_fasta.R index 8d6a547..209a7c3 100644 --- a/R/load_fasta.R +++ b/R/load_fasta.R @@ -12,7 +12,7 @@ #' msa <- system.file("ext", "seqs.fa", package="rBAPS") #' snp.matrix <- load_fasta(msa) #' -#' @author Gerry Tonkin-Hill +#' @author Gerry Tonkin-Hill, Waldir Leoncio #' @seealso rhierbaps::load_fasta #' @importFrom ape read.FASTA as.DNAbin #' @export @@ -37,8 +37,11 @@ load_fasta <- function(msa, keep.singletons=FALSE) { rownames(seqs) <- seq_names seqs[is.na(seqs)] <- "-" - if (nrow(seqs)<3) stop("Less than 3 sequences!") - warning("Characters not in acgtnACGTN- will be treated as missing (-)...") + # Validation ----------------------------------------------------------------- + if (nrow(seqs) < 3) stop("Less than 3 sequences!") + if (any(!(as.vector(tolower(seqs)) %in% c("a", "c", "g", "t", "n", "-")))) { + warning("Characters not in acgtnACGTN- will be treated as missing (-)...") + } #Remove conserved columns conserved <- colSums(t(t(seqs)==seqs[1,]))==nrow(seqs) diff --git a/man/load_fasta.Rd b/man/load_fasta.Rd index 171bfb3..9b4c84c 100644 --- a/man/load_fasta.Rd +++ b/man/load_fasta.Rd @@ -27,5 +27,5 @@ snp.matrix <- load_fasta(msa) rhierbaps::load_fasta } \author{ -Gerry Tonkin-Hill +Gerry Tonkin-Hill, Waldir Leoncio } From 682555f8e36080c19fcc3b4b0d9738017e8887b5 Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Fri, 3 Sep 2021 09:10:21 +0200 Subject: [PATCH 2/2] Adapted greedyMix examples (closes #15) --- inst/ext/FASTA_clustering_haploid.fasta | 20 +++++++++++++++ tests/testthat/test-greedyMix.R | 33 ++++++++++++++----------- 2 files changed, 38 insertions(+), 15 deletions(-) create mode 100644 inst/ext/FASTA_clustering_haploid.fasta diff --git a/inst/ext/FASTA_clustering_haploid.fasta b/inst/ext/FASTA_clustering_haploid.fasta new file mode 100644 index 0000000..9501482 --- /dev/null +++ b/inst/ext/FASTA_clustering_haploid.fasta @@ -0,0 +1,20 @@ +>1 +AACGAAACGATCGCGTCACCGGAACGTTGTCCGTCTCGAATAGCACTGTGGGAACGTGTTTTACATTCGT +TAGTAACATGGTCAGCTGCTCATCCGTATT + +>2 +ATCAGCAAACGAGAAGTTGCAGAGGTCTTTGGTTTGAGCATTGCCCCCATACAATCGACTTCTGGCCTGG +AATGCACCACAAACATACCCCACAGGCTCG + +>3 +GCTTTTACTAAGGCCTATCGGATTCAACGTCACTAAGACTCGGCACTAACAGGCCGTTGTAAGCCGCTCT +GTCTGAGTATGGATGGTGGAGGCGGAGCCG + +>4 +ACCTGGACCTCTGTATTAACGGCTGTGATTCTGAGGGGGGTATCGCAGCGCACTTTCTAGCTATATCACG +CAAGGATAAAGTTCACCCATCACGTTGACC + +>5 +ACAATACGTCATCCACACCGCGCCTATGGAAGAATTTGCCCTTTCGGCGACAGCCCATGCTGTCAAGGAG +GTAACATAGCTACCAGGTCCCATTCCAGGA + diff --git a/tests/testthat/test-greedyMix.R b/tests/testthat/test-greedyMix.R index eaedf24..dde5e54 100644 --- a/tests/testthat/test-greedyMix.R +++ b/tests/testthat/test-greedyMix.R @@ -1,15 +1,15 @@ context("Auxiliary functions to greedyMix") -# Defining the relative path to current inst ------------- # +# Defining the relative path to current inst ----------------------------------- if (interactive()) { - path_inst <- "../../inst/ext/BAPS_format_clustering_diploid.txt" + path_inst <- "../../inst/ext" } else { - path_inst <- system.file( - "ext", "BAPS_format_clustering_diploid.txt", package="rBAPS" - ) + path_inst <- system.file("ext", "", package="rBAPS") } + +# Reading datasets ------------------------------------------------------------- baps_diploid <- read.delim( - file = path_inst, + file = paste(path_inst, "BAPS_format_clustering_diploid.txt", sep="/"), sep = " ", header = FALSE ) @@ -35,15 +35,18 @@ test_that("handleData works as expected", { expect_equal(data_obs, data_exp) }) -context("Opening files on greedyMix/load_fasta") +context("Opening files on greedyMix") -# # TODO: needs #12 to be fixed before this can be done without user intervention -# greedyMix( -# tietue = "inst/ext/ExamplesDataFormatting/Example data in BAPS format for clustering of diploid individuals.txt", -# format = "BAPS", -# savePreProcessed = FALSE -# ) # Upper bounds 100 100 -# TODO #16: replace with load_fasta() +df_fasta <- greedyMix( + data = paste(path_inst, "FASTA_clustering_haploid.fasta", sep="/"), + format ="fasta" +) +# TODO: add example reading VCF +# TODO: add example reading SAM +# TODO: add example reading Genpop +test_that("Files are imported correctly", { + expect_equal(dim(df_fasta), c(5, 99)) +}) context("Linkage") @@ -53,4 +56,4 @@ test_that("Linkages are properly calculated", { object = linkage(Y), expected = matrix(c(2, 1, 7, 8, 4, 3, 5, 6, .2, .3, .3, .6), ncol=3) ) -}) \ No newline at end of file +})