Added greedyMix support for VCF (closes #17)

This commit is contained in:
Waldir Leoncio 2021-09-03 11:10:06 +02:00
parent f44a71d97b
commit 3fd15086af
3 changed files with 18 additions and 3 deletions

View file

@ -1,15 +1,16 @@
#' @title Clustering of individuals #' @title Clustering of individuals
#' @param data data file #' @param data data file
#' @param format Format of the data c("FASTA", "VCF" ,"SAM", or "GenePop") #' @param format Format of the data c("FASTA", "VCF" ,"SAM", or "GenePop")
#' @param verbose if \code{TRUE}, prints extra output information
#' @importFrom utils read.delim #' @importFrom utils read.delim
#' @export #' @export
greedyMix <- function(data, format) { greedyMix <- function(data, format, verbose = TRUE) {
format <- tolower(format) format <- tolower(format)
if (format == "fasta") { if (format == "fasta") {
out <- load_fasta(data) out <- load_fasta(data)
} else if (format == "vcf") { } else if (format == "vcf") {
stop("VCF files not yet supported." )
# TODO #17: implement load_vcf() # TODO #17: implement load_vcf()
out <- vcfR::read.vcfR(data, verbose = verbose)
} else if (format == "sam") { } else if (format == "sam") {
stop("SAM files not yet supported." ) stop("SAM files not yet supported." )
# TODO #18: implement load_sam() # TODO #18: implement load_sam()

8
inst/ext/vcf_example.vcf Normal file
View file

@ -0,0 +1,8 @@
##fileformat=VCFv4.2
##FORMAT=<ID=GT,Number=1,Type=Integer,Description="Genotype">
##FORMAT=<ID=GP,Number=G,Type=Float,Description="Genotype Probabilities">
##FORMAT=<ID=PL,Number=G,Type=Float,Description="Phred-scaled Genotype Likelihoods">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMP001 SAMP002
20 1291018 rs11449 G A . PASS . GT 0/0 0/1
20 2300608 rs84825 C T . PASS . GT:GP 0/1:. 0/1:0.03,0.97,0
20 2301308 rs84823 T G . PASS . GT:PL ./.:. 1/1:10,5,0

View file

@ -39,13 +39,19 @@ context("Opening files on greedyMix")
df_fasta <- greedyMix( df_fasta <- greedyMix(
data = paste(path_inst, "FASTA_clustering_haploid.fasta", sep="/"), data = paste(path_inst, "FASTA_clustering_haploid.fasta", sep="/"),
format ="fasta" format = "FASTA"
)
df_vcf <- greedyMix(
data = paste(path_inst, "vcf_example.vcf", sep="/"),
format = "VCF",
verbose = FALSE
) )
# TODO #17: add example reading VCF # TODO #17: add example reading VCF
# TODO #18: add example reading SAM # TODO #18: add example reading SAM
# TODO #19: add example reading Genpop # TODO #19: add example reading Genpop
test_that("Files are imported correctly", { test_that("Files are imported correctly", {
expect_equal(dim(df_fasta), c(5, 99)) expect_equal(dim(df_fasta), c(5, 99))
expect_equal(dim(df_vcf), c(variants = 2, fix_cols = 8, gt_cols = 3))
}) })
context("Linkage") context("Linkage")