Improved load_fasta validation

This commit is contained in:
Waldir Leoncio 2021-09-03 09:09:09 +02:00
parent 26dab21cca
commit 12edd330b9
2 changed files with 7 additions and 4 deletions

View file

@ -12,7 +12,7 @@
#' msa <- system.file("ext", "seqs.fa", package="rBAPS") #' msa <- system.file("ext", "seqs.fa", package="rBAPS")
#' snp.matrix <- load_fasta(msa) #' snp.matrix <- load_fasta(msa)
#' #'
#' @author Gerry Tonkin-Hill #' @author Gerry Tonkin-Hill, Waldir Leoncio
#' @seealso rhierbaps::load_fasta #' @seealso rhierbaps::load_fasta
#' @importFrom ape read.FASTA as.DNAbin #' @importFrom ape read.FASTA as.DNAbin
#' @export #' @export
@ -37,8 +37,11 @@ load_fasta <- function(msa, keep.singletons=FALSE) {
rownames(seqs) <- seq_names rownames(seqs) <- seq_names
seqs[is.na(seqs)] <- "-" seqs[is.na(seqs)] <- "-"
if (nrow(seqs)<3) stop("Less than 3 sequences!") # Validation -----------------------------------------------------------------
warning("Characters not in acgtnACGTN- will be treated as missing (-)...") if (nrow(seqs) < 3) stop("Less than 3 sequences!")
if (any(!(as.vector(tolower(seqs)) %in% c("a", "c", "g", "t", "n", "-")))) {
warning("Characters not in acgtnACGTN- will be treated as missing (-)...")
}
#Remove conserved columns #Remove conserved columns
conserved <- colSums(t(t(seqs)==seqs[1,]))==nrow(seqs) conserved <- colSums(t(t(seqs)==seqs[1,]))==nrow(seqs)

View file

@ -27,5 +27,5 @@ snp.matrix <- load_fasta(msa)
rhierbaps::load_fasta rhierbaps::load_fasta
} }
\author{ \author{
Gerry Tonkin-Hill Gerry Tonkin-Hill, Waldir Leoncio
} }