Merge branch 'issue-18' into develop

This commit is contained in:
Waldir Leoncio 2021-09-03 12:50:46 +02:00
commit 1b1999d2b8
6 changed files with 1921 additions and 9 deletions

View file

@ -1,9 +1,11 @@
#' @title Clustering of individuals #' @title Clustering of individuals
#' @param data data file #' @param data data file
#' @param format Format of the data c("FASTA", "VCF" ,"SAM", or "GenePop") #' @param format Format of the data c("FASTA", "VCF" ,"BAM", or "GenePop")
#' @param verbose if \code{TRUE}, prints extra output information #' @param verbose if \code{TRUE}, prints extra output information
#' @importFrom utils read.delim #' @importFrom utils read.delim
#' @importFrom vcfR read.vcfR #' @importFrom vcfR read.vcfR
#' @references Samtools: a suite of programs for interacting
#' with high-throughput sequencing data. <http://www.htslib.org/>
#' @export #' @export
greedyMix <- function(data, format, verbose = TRUE) { greedyMix <- function(data, format, verbose = TRUE) {
format <- tolower(format) format <- tolower(format)
@ -12,9 +14,15 @@ greedyMix <- function(data, format, verbose = TRUE) {
} else if (format == "vcf") { } else if (format == "vcf") {
out <- vcfR::read.vcfR(data, verbose = verbose) out <- vcfR::read.vcfR(data, verbose = verbose)
} else if (format == "sam") { } else if (format == "sam") {
stop("SAM files not yet supported." ) stop(
# TODO #18: implement load_sam() "SAM files not directly supported. ",
} else if(format == "genepop") { "Install the samtools software and execute ",
"'samtools view -b in_file.sam > out_file.bam' to convert to BAM ",
"and try running this function again with 'format=BAM'"
)
} else if (format == "bam") {
out <- Rsamtools::scanBam(data)
} else if (format == "genepop") {
# TODO #19: implement load_genepop() # TODO #19: implement load_genepop()
stop("GenePop files not yet supported." ) stop("GenePop files not yet supported." )
} else { } else {

BIN
inst/ext/bam_example.bam Normal file

Binary file not shown.

File diff suppressed because it is too large Load diff

12
inst/ext/sam_example.sam Normal file
View file

@ -0,0 +1,12 @@
@HD VN:1.0 SO:coordinate
@SQ SN:1 LN:249250621 AS:NCBI37 UR:file:/data/local/ref/GATK/human_g1k_v37.fasta M5:1b22b98cdeb4a9304cb5d48026a85128
@SQ SN:2 LN:243199373 AS:NCBI37 UR:file:/data/local/ref/GATK/human_g1k_v37.fasta M5:a0d9851da00400dec1098a9255ac712e
@SQ SN:3 LN:198022430 AS:NCBI37 UR:file:/data/local/ref/GATK/human_g1k_v37.fasta M5:fdfd811849cc2fadebc929bb925902e5
@RG ID:UM0098:1 PL:ILLUMINA PU:HWUSI-EAS1707-615LHAAXX-L001 LB:80 DT:2010-05-05T20:00:00-0400 SM:SD37743 CN:UMCORE
@RG ID:UM0098:2 PL:ILLUMINA PU:HWUSI-EAS1707-615LHAAXX-L002 LB:80 DT:2010-05-05T20:00:00-0400 SM:SD37743 CN:UMCORE
@PG ID:bwa VN:0.5.4
@PG ID:GATK TableRecalibration VN:1.0.3471 CL:Covariates=[ReadGroupCovariate, QualityScoreCovariate, CycleCovariate, DinucCovariate, TileCovariate], default_read_group=null, default_platform=null, force_read_group=null, force_platform=null, solid_recal_mode=SET_Q_ZERO, window_size_nqs=5, homopolymer_nback=7, exception_if_no_tile=false, ignore_nocall_colorspace=false, pQ=5, maxQ=40, smoothing=1
1:497:R:-272+13M17D24M 113 1 497 37 37M 15 100338662 0 CGGGTCTGACCTGAGGAGAACTGTGCTCCGCCTTCAG 0;==-==9;>>>>>=>>>>>>>>>>>=>>>>>>>>>> XT:A:U NM:i:0 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:37
19:20389:F:275+18M2D19M 99 1 17644 0 37M = 17919 314 TATGACTGCTAATAATACCTACACATGTTAGAACCAT >>>>>>>>>>>>>>>>>>>><<>>><<>>4::>>:<9 RG:Z:UM0098:1 XT:A:R NM:i:0 SM:i:0 AM:i:0 X0:i:4 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:37
19:20389:F:275+18M2D19M 147 1 17919 0 18M2D19M = 17644 -314 GTAGTACCAACTGTAAGTCCTTATCTTCATACTTTGT ;44999;499<8<8<<<8<<><<<<><7<;<<<>><< XT:A:R NM:i:2 SM:i:0 AM:i:0 X0:i:4 X1:i:0 XM:i:0 XO:i:1 XG:i:2 MD:Z:18^CA19
9:21597+10M2I25M:R:-209 83 1 21678 0 8M2I27M = 21469 -244 CACCACATCACATATACCAAGCCTGGCTGTGTCTTCT <;9<<5><<<<><<<>><<><>><9>><>>>9>>><> XT:A:R NM:i:2 SM:i:0 AM:i:0 X0:i:5 X1:i:0 XM:i:0 XO:i:1 XG:i:2 MD:Z:35

View file

@ -9,10 +9,14 @@ greedyMix(data, format, verbose = TRUE)
\arguments{ \arguments{
\item{data}{data file} \item{data}{data file}
\item{format}{Format of the data c("FASTA", "VCF" ,"SAM", or "GenePop")} \item{format}{Format of the data c("FASTA", "VCF" ,"BAM", or "GenePop")}
\item{verbose}{if \code{TRUE}, prints extra output information} \item{verbose}{if \code{TRUE}, prints extra output information}
} }
\description{ \description{
Clustering of individuals Clustering of individuals
} }
\references{
Samtools: a suite of programs for interacting
with high-throughput sequencing data. <http://www.htslib.org/>
}

View file

@ -38,20 +38,29 @@ test_that("handleData works as expected", {
context("Opening files on greedyMix") context("Opening files on greedyMix")
df_fasta <- greedyMix( df_fasta <- greedyMix(
data = paste(path_inst, "FASTA_clustering_haploid.fasta", sep="/"), data = file.path(path_inst, "FASTA_clustering_haploid.fasta"),
format = "FASTA" format = "FASTA"
) )
df_vcf <- greedyMix( df_vcf <- greedyMix(
data = paste(path_inst, "vcf_example.vcf", sep="/"), data = file.path(path_inst, "vcf_example.vcf"),
format = "VCF", format = "VCF",
verbose = FALSE verbose = FALSE
) )
# TODO #17: add example reading VCF df_bam <- greedyMix(
# TODO #18: add example reading SAM data = file.path(path_inst, "bam_example.bam"),
format = "BAM",
)
# TODO #19: add example reading Genpop # TODO #19: add example reading Genpop
test_that("Files are imported correctly", { test_that("Files are imported correctly", {
expect_equal(dim(df_fasta), c(5, 99)) expect_equal(dim(df_fasta), c(5, 99))
expect_equal(dim(df_vcf), c(variants = 2, fix_cols = 8, gt_cols = 3)) expect_equal(dim(df_vcf), c(variants = 2, fix_cols = 8, gt_cols = 3))
expect_error(
greedyMix(
data = paste(path_inst, "sam_example.sam", sep="/"),
format = "SAM",
)
)
expect_equal(length(df_bam[[1]]), 13)
}) })
context("Linkage") context("Linkage")