Merge branch 'issue-18' into develop
This commit is contained in:
commit
1b1999d2b8
6 changed files with 1921 additions and 9 deletions
|
|
@ -1,9 +1,11 @@
|
||||||
#' @title Clustering of individuals
|
#' @title Clustering of individuals
|
||||||
#' @param data data file
|
#' @param data data file
|
||||||
#' @param format Format of the data c("FASTA", "VCF" ,"SAM", or "GenePop")
|
#' @param format Format of the data c("FASTA", "VCF" ,"BAM", or "GenePop")
|
||||||
#' @param verbose if \code{TRUE}, prints extra output information
|
#' @param verbose if \code{TRUE}, prints extra output information
|
||||||
#' @importFrom utils read.delim
|
#' @importFrom utils read.delim
|
||||||
#' @importFrom vcfR read.vcfR
|
#' @importFrom vcfR read.vcfR
|
||||||
|
#' @references Samtools: a suite of programs for interacting
|
||||||
|
#' with high-throughput sequencing data. <http://www.htslib.org/>
|
||||||
#' @export
|
#' @export
|
||||||
greedyMix <- function(data, format, verbose = TRUE) {
|
greedyMix <- function(data, format, verbose = TRUE) {
|
||||||
format <- tolower(format)
|
format <- tolower(format)
|
||||||
|
|
@ -12,8 +14,14 @@ greedyMix <- function(data, format, verbose = TRUE) {
|
||||||
} else if (format == "vcf") {
|
} else if (format == "vcf") {
|
||||||
out <- vcfR::read.vcfR(data, verbose = verbose)
|
out <- vcfR::read.vcfR(data, verbose = verbose)
|
||||||
} else if (format == "sam") {
|
} else if (format == "sam") {
|
||||||
stop("SAM files not yet supported." )
|
stop(
|
||||||
# TODO #18: implement load_sam()
|
"SAM files not directly supported. ",
|
||||||
|
"Install the samtools software and execute ",
|
||||||
|
"'samtools view -b in_file.sam > out_file.bam' to convert to BAM ",
|
||||||
|
"and try running this function again with 'format=BAM'"
|
||||||
|
)
|
||||||
|
} else if (format == "bam") {
|
||||||
|
out <- Rsamtools::scanBam(data)
|
||||||
} else if (format == "genepop") {
|
} else if (format == "genepop") {
|
||||||
# TODO #19: implement load_genepop()
|
# TODO #19: implement load_genepop()
|
||||||
stop("GenePop files not yet supported." )
|
stop("GenePop files not yet supported." )
|
||||||
|
|
|
||||||
BIN
inst/ext/bam_example.bam
Normal file
BIN
inst/ext/bam_example.bam
Normal file
Binary file not shown.
1879
inst/ext/example_drosophila.Aligned.out.sam
Normal file
1879
inst/ext/example_drosophila.Aligned.out.sam
Normal file
File diff suppressed because it is too large
Load diff
12
inst/ext/sam_example.sam
Normal file
12
inst/ext/sam_example.sam
Normal file
|
|
@ -0,0 +1,12 @@
|
||||||
|
@HD VN:1.0 SO:coordinate
|
||||||
|
@SQ SN:1 LN:249250621 AS:NCBI37 UR:file:/data/local/ref/GATK/human_g1k_v37.fasta M5:1b22b98cdeb4a9304cb5d48026a85128
|
||||||
|
@SQ SN:2 LN:243199373 AS:NCBI37 UR:file:/data/local/ref/GATK/human_g1k_v37.fasta M5:a0d9851da00400dec1098a9255ac712e
|
||||||
|
@SQ SN:3 LN:198022430 AS:NCBI37 UR:file:/data/local/ref/GATK/human_g1k_v37.fasta M5:fdfd811849cc2fadebc929bb925902e5
|
||||||
|
@RG ID:UM0098:1 PL:ILLUMINA PU:HWUSI-EAS1707-615LHAAXX-L001 LB:80 DT:2010-05-05T20:00:00-0400 SM:SD37743 CN:UMCORE
|
||||||
|
@RG ID:UM0098:2 PL:ILLUMINA PU:HWUSI-EAS1707-615LHAAXX-L002 LB:80 DT:2010-05-05T20:00:00-0400 SM:SD37743 CN:UMCORE
|
||||||
|
@PG ID:bwa VN:0.5.4
|
||||||
|
@PG ID:GATK TableRecalibration VN:1.0.3471 CL:Covariates=[ReadGroupCovariate, QualityScoreCovariate, CycleCovariate, DinucCovariate, TileCovariate], default_read_group=null, default_platform=null, force_read_group=null, force_platform=null, solid_recal_mode=SET_Q_ZERO, window_size_nqs=5, homopolymer_nback=7, exception_if_no_tile=false, ignore_nocall_colorspace=false, pQ=5, maxQ=40, smoothing=1
|
||||||
|
1:497:R:-272+13M17D24M 113 1 497 37 37M 15 100338662 0 CGGGTCTGACCTGAGGAGAACTGTGCTCCGCCTTCAG 0;==-==9;>>>>>=>>>>>>>>>>>=>>>>>>>>>> XT:A:U NM:i:0 SM:i:37 AM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:37
|
||||||
|
19:20389:F:275+18M2D19M 99 1 17644 0 37M = 17919 314 TATGACTGCTAATAATACCTACACATGTTAGAACCAT >>>>>>>>>>>>>>>>>>>><<>>><<>>4::>>:<9 RG:Z:UM0098:1 XT:A:R NM:i:0 SM:i:0 AM:i:0 X0:i:4 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:37
|
||||||
|
19:20389:F:275+18M2D19M 147 1 17919 0 18M2D19M = 17644 -314 GTAGTACCAACTGTAAGTCCTTATCTTCATACTTTGT ;44999;499<8<8<<<8<<><<<<><7<;<<<>><< XT:A:R NM:i:2 SM:i:0 AM:i:0 X0:i:4 X1:i:0 XM:i:0 XO:i:1 XG:i:2 MD:Z:18^CA19
|
||||||
|
9:21597+10M2I25M:R:-209 83 1 21678 0 8M2I27M = 21469 -244 CACCACATCACATATACCAAGCCTGGCTGTGTCTTCT <;9<<5><<<<><<<>><<><>><9>><>>>9>>><> XT:A:R NM:i:2 SM:i:0 AM:i:0 X0:i:5 X1:i:0 XM:i:0 XO:i:1 XG:i:2 MD:Z:35
|
||||||
|
|
@ -9,10 +9,14 @@ greedyMix(data, format, verbose = TRUE)
|
||||||
\arguments{
|
\arguments{
|
||||||
\item{data}{data file}
|
\item{data}{data file}
|
||||||
|
|
||||||
\item{format}{Format of the data c("FASTA", "VCF" ,"SAM", or "GenePop")}
|
\item{format}{Format of the data c("FASTA", "VCF" ,"BAM", or "GenePop")}
|
||||||
|
|
||||||
\item{verbose}{if \code{TRUE}, prints extra output information}
|
\item{verbose}{if \code{TRUE}, prints extra output information}
|
||||||
}
|
}
|
||||||
\description{
|
\description{
|
||||||
Clustering of individuals
|
Clustering of individuals
|
||||||
}
|
}
|
||||||
|
\references{
|
||||||
|
Samtools: a suite of programs for interacting
|
||||||
|
with high-throughput sequencing data. <http://www.htslib.org/>
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -38,20 +38,29 @@ test_that("handleData works as expected", {
|
||||||
context("Opening files on greedyMix")
|
context("Opening files on greedyMix")
|
||||||
|
|
||||||
df_fasta <- greedyMix(
|
df_fasta <- greedyMix(
|
||||||
data = paste(path_inst, "FASTA_clustering_haploid.fasta", sep="/"),
|
data = file.path(path_inst, "FASTA_clustering_haploid.fasta"),
|
||||||
format = "FASTA"
|
format = "FASTA"
|
||||||
)
|
)
|
||||||
df_vcf <- greedyMix(
|
df_vcf <- greedyMix(
|
||||||
data = paste(path_inst, "vcf_example.vcf", sep="/"),
|
data = file.path(path_inst, "vcf_example.vcf"),
|
||||||
format = "VCF",
|
format = "VCF",
|
||||||
verbose = FALSE
|
verbose = FALSE
|
||||||
)
|
)
|
||||||
# TODO #17: add example reading VCF
|
df_bam <- greedyMix(
|
||||||
# TODO #18: add example reading SAM
|
data = file.path(path_inst, "bam_example.bam"),
|
||||||
|
format = "BAM",
|
||||||
|
)
|
||||||
# TODO #19: add example reading Genpop
|
# TODO #19: add example reading Genpop
|
||||||
test_that("Files are imported correctly", {
|
test_that("Files are imported correctly", {
|
||||||
expect_equal(dim(df_fasta), c(5, 99))
|
expect_equal(dim(df_fasta), c(5, 99))
|
||||||
expect_equal(dim(df_vcf), c(variants = 2, fix_cols = 8, gt_cols = 3))
|
expect_equal(dim(df_vcf), c(variants = 2, fix_cols = 8, gt_cols = 3))
|
||||||
|
expect_error(
|
||||||
|
greedyMix(
|
||||||
|
data = paste(path_inst, "sam_example.sam", sep="/"),
|
||||||
|
format = "SAM",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
expect_equal(length(df_bam[[1]]), 13)
|
||||||
})
|
})
|
||||||
|
|
||||||
context("Linkage")
|
context("Linkage")
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue