Added numeric output option to load_fasta() (#25)
This commit is contained in:
parent
a51816d5c0
commit
95d9d658cb
2 changed files with 15 additions and 7 deletions
|
|
@ -4,7 +4,7 @@
|
||||||
#' running the hierBAPS algorithm.
|
#' running the hierBAPS algorithm.
|
||||||
#'
|
#'
|
||||||
#' @param msa Either the location of a fasta file or ape DNAbin object containing the multiple sequence alignment data to be clustered
|
#' @param msa Either the location of a fasta file or ape DNAbin object containing the multiple sequence alignment data to be clustered
|
||||||
#' @param keep.singletons A logical indicating whether to consider singleton mutations in calculating the clusters
|
#' @param keep_singletons A logical indicating whether to consider singleton mutations in calculating the clusters
|
||||||
#'
|
#'
|
||||||
#' @return A character matrix with filtered SNP data
|
#' @return A character matrix with filtered SNP data
|
||||||
#'
|
#'
|
||||||
|
|
@ -15,7 +15,7 @@
|
||||||
#' @seealso rhierbaps::load_fasta
|
#' @seealso rhierbaps::load_fasta
|
||||||
#' @importFrom ape read.FASTA as.DNAbin
|
#' @importFrom ape read.FASTA as.DNAbin
|
||||||
#' @export
|
#' @export
|
||||||
load_fasta <- function(msa, keep.singletons = FALSE) {
|
load_fasta <- function(msa, keep_singletons = FALSE, output_numbers = TRUE) {
|
||||||
|
|
||||||
# Check inputs
|
# Check inputs
|
||||||
if (is(msa, "character")) {
|
if (is(msa, "character")) {
|
||||||
|
|
@ -28,7 +28,9 @@ load_fasta <- function(msa, keep.singletons = FALSE) {
|
||||||
} else {
|
} else {
|
||||||
stop("incorrect input for msa!")
|
stop("incorrect input for msa!")
|
||||||
}
|
}
|
||||||
if (!is.logical(keep.singletons)) stop("Invalid keep.singletons! Must be on of TRUE/FALSE.")
|
if (!is.logical(keep_singletons)) {
|
||||||
|
stop("Invalid keep_singletons! Must be one of TRUE/FALSE.")
|
||||||
|
}
|
||||||
|
|
||||||
# Load sequences using ape. This does a lot of the checking for us.
|
# Load sequences using ape. This does a lot of the checking for us.
|
||||||
seq_names <- labels(seqs)
|
seq_names <- labels(seqs)
|
||||||
|
|
@ -46,8 +48,8 @@ load_fasta <- function(msa, keep.singletons = FALSE) {
|
||||||
conserved <- colSums(t(t(seqs) == seqs[1, ])) == nrow(seqs)
|
conserved <- colSums(t(t(seqs) == seqs[1, ])) == nrow(seqs)
|
||||||
seqs <- seqs[, !conserved]
|
seqs <- seqs[, !conserved]
|
||||||
|
|
||||||
if (!keep.singletons) {
|
if (!keep_singletons) {
|
||||||
# remove singletons as they are uninformative in the algorithm
|
# remove_singletons as they are uninformative in the algorithm
|
||||||
is_singleton <- apply(seqs, 2, function(x) {
|
is_singleton <- apply(seqs, 2, function(x) {
|
||||||
tab <- table(x)
|
tab <- table(x)
|
||||||
return(x %in% names(tab)[tab == 1])
|
return(x %in% names(tab)[tab == 1])
|
||||||
|
|
@ -58,5 +60,11 @@ load_fasta <- function(msa, keep.singletons = FALSE) {
|
||||||
# Convert gaps and unknowns to same symbol
|
# Convert gaps and unknowns to same symbol
|
||||||
seqs[seqs == "n"] <- "-"
|
seqs[seqs == "n"] <- "-"
|
||||||
|
|
||||||
|
# Replace letters with numbers, dashes with zeros
|
||||||
|
if (output_numbers) {
|
||||||
|
seqs <- matrix(match(seqs, c("a", "c", "g", "t")), nrow(seqs))
|
||||||
|
seqs[is.na(seqs)] <- 0
|
||||||
|
}
|
||||||
|
|
||||||
return(seqs)
|
return(seqs)
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -4,12 +4,12 @@
|
||||||
\alias{load_fasta}
|
\alias{load_fasta}
|
||||||
\title{load_fasta}
|
\title{load_fasta}
|
||||||
\usage{
|
\usage{
|
||||||
load_fasta(msa, keep.singletons = FALSE)
|
load_fasta(msa, keep_singletons = FALSE, output_numbers = TRUE)
|
||||||
}
|
}
|
||||||
\arguments{
|
\arguments{
|
||||||
\item{msa}{Either the location of a fasta file or ape DNAbin object containing the multiple sequence alignment data to be clustered}
|
\item{msa}{Either the location of a fasta file or ape DNAbin object containing the multiple sequence alignment data to be clustered}
|
||||||
|
|
||||||
\item{keep.singletons}{A logical indicating whether to consider singleton mutations in calculating the clusters}
|
\item{keep_singletons}{A logical indicating whether to consider singleton mutations in calculating the clusters}
|
||||||
}
|
}
|
||||||
\value{
|
\value{
|
||||||
A character matrix with filtered SNP data
|
A character matrix with filtered SNP data
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue