Added numeric output option to load_fasta() (#25)
This commit is contained in:
parent
a51816d5c0
commit
95d9d658cb
2 changed files with 15 additions and 7 deletions
|
|
@ -4,7 +4,7 @@
|
|||
#' running the hierBAPS algorithm.
|
||||
#'
|
||||
#' @param msa Either the location of a fasta file or ape DNAbin object containing the multiple sequence alignment data to be clustered
|
||||
#' @param keep.singletons A logical indicating whether to consider singleton mutations in calculating the clusters
|
||||
#' @param keep_singletons A logical indicating whether to consider singleton mutations in calculating the clusters
|
||||
#'
|
||||
#' @return A character matrix with filtered SNP data
|
||||
#'
|
||||
|
|
@ -15,7 +15,7 @@
|
|||
#' @seealso rhierbaps::load_fasta
|
||||
#' @importFrom ape read.FASTA as.DNAbin
|
||||
#' @export
|
||||
load_fasta <- function(msa, keep.singletons = FALSE) {
|
||||
load_fasta <- function(msa, keep_singletons = FALSE, output_numbers = TRUE) {
|
||||
|
||||
# Check inputs
|
||||
if (is(msa, "character")) {
|
||||
|
|
@ -28,7 +28,9 @@ load_fasta <- function(msa, keep.singletons = FALSE) {
|
|||
} else {
|
||||
stop("incorrect input for msa!")
|
||||
}
|
||||
if (!is.logical(keep.singletons)) stop("Invalid keep.singletons! Must be on of TRUE/FALSE.")
|
||||
if (!is.logical(keep_singletons)) {
|
||||
stop("Invalid keep_singletons! Must be one of TRUE/FALSE.")
|
||||
}
|
||||
|
||||
# Load sequences using ape. This does a lot of the checking for us.
|
||||
seq_names <- labels(seqs)
|
||||
|
|
@ -46,8 +48,8 @@ load_fasta <- function(msa, keep.singletons = FALSE) {
|
|||
conserved <- colSums(t(t(seqs) == seqs[1, ])) == nrow(seqs)
|
||||
seqs <- seqs[, !conserved]
|
||||
|
||||
if (!keep.singletons) {
|
||||
# remove singletons as they are uninformative in the algorithm
|
||||
if (!keep_singletons) {
|
||||
# remove_singletons as they are uninformative in the algorithm
|
||||
is_singleton <- apply(seqs, 2, function(x) {
|
||||
tab <- table(x)
|
||||
return(x %in% names(tab)[tab == 1])
|
||||
|
|
@ -58,5 +60,11 @@ load_fasta <- function(msa, keep.singletons = FALSE) {
|
|||
# Convert gaps and unknowns to same symbol
|
||||
seqs[seqs == "n"] <- "-"
|
||||
|
||||
# Replace letters with numbers, dashes with zeros
|
||||
if (output_numbers) {
|
||||
seqs <- matrix(match(seqs, c("a", "c", "g", "t")), nrow(seqs))
|
||||
seqs[is.na(seqs)] <- 0
|
||||
}
|
||||
|
||||
return(seqs)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,12 +4,12 @@
|
|||
\alias{load_fasta}
|
||||
\title{load_fasta}
|
||||
\usage{
|
||||
load_fasta(msa, keep.singletons = FALSE)
|
||||
load_fasta(msa, keep_singletons = FALSE, output_numbers = TRUE)
|
||||
}
|
||||
\arguments{
|
||||
\item{msa}{Either the location of a fasta file or ape DNAbin object containing the multiple sequence alignment data to be clustered}
|
||||
|
||||
\item{keep.singletons}{A logical indicating whether to consider singleton mutations in calculating the clusters}
|
||||
\item{keep_singletons}{A logical indicating whether to consider singleton mutations in calculating the clusters}
|
||||
}
|
||||
\value{
|
||||
A character matrix with filtered SNP data
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue