diff --git a/NAMESPACE b/NAMESPACE index 09db974..3094ed5 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,6 @@ # Generated by roxygen2: do not edit by hand +export(convert_FASTA_to_BAPS) export(greedyMix) export(handleData) export(importFile) diff --git a/R/convert_FASTA_to_BAPS.R b/R/convert_FASTA_to_BAPS.R new file mode 100644 index 0000000..b86d090 --- /dev/null +++ b/R/convert_FASTA_to_BAPS.R @@ -0,0 +1,15 @@ +#' @title Convert from FASTA to BAPS +#' @description Converts a file (not an R object) from FASTA to BAPS format +#' @param file filename of FASTA file +#' @return `data` in BAPS format +#' @author Waldir Leoncio +#' @export +#' @examples +#' file <- system.file("extdata", "FASTA_clustering_haploid.fasta", package = "rBAPS") +#' convert_FASTA_to_BAPS(file) +convert_FASTA_to_BAPS <- function(file) { + data <- load_fasta(file) # Processing data + data <- cbind(data, seq_len(nrow(data))) # Add IDs of individuals (sequential) + data[data == 0] <- -9 # Because zeros (missing) in BAPS are coded as -9 + return(data) +} diff --git a/R/greedyMix.R b/R/greedyMix.R index 9a9bc20..305ca25 100644 --- a/R/greedyMix.R +++ b/R/greedyMix.R @@ -28,7 +28,8 @@ greedyMix <- function( ) { # Importing and handling data ================================================ if (tolower(format) %in% "fasta") { - stop("FASTA format not yet supported on greedyMix") + data <- load_fasta(data) + data <- handleData(data, "FASTA") } if (tolower(format) %in% "baps") { data <- process_BAPS_data(data, NULL) diff --git a/R/handleData.R b/R/handleData.R index 8868fc2..6ac3e8e 100644 --- a/R/handleData.R +++ b/R/handleData.R @@ -30,6 +30,7 @@ handleData <- function(raw_data, format = "Genepop") { "bam" = stop("BAM format not supported for processing yet") ) data <- as.matrix(raw_data) + dataApu <- data[, seq_len(nloci)] nollat <- matlab2r::find(dataApu == 0) if (!isempty(nollat)) { @@ -54,6 +55,7 @@ handleData <- function(raw_data, format = "Genepop") { alleleCodes[, i] <- as.matrix(c(alleelitLokuksessaI, zeros(puuttuvia, 1))) } + # This is where data gets converted to {1, 2, 3, 4} for {A, C, G, T} for (loc in seq_len(nloci)) { for (all in seq_len(noalle[loc])) { data[matlab2r::find(data[, loc] == alleleCodes[all, loc]), loc] <- all diff --git a/man/convert_FASTA_to_BAPS.Rd b/man/convert_FASTA_to_BAPS.Rd new file mode 100644 index 0000000..ebc7396 --- /dev/null +++ b/man/convert_FASTA_to_BAPS.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/convert_FASTA_to_BAPS.R +\name{convert_FASTA_to_BAPS} +\alias{convert_FASTA_to_BAPS} +\title{Convert an R object from FASTA to BAPS format} +\usage{ +convert_FASTA_to_BAPS(data) +} +\arguments{ +\item{data}{dataset to be converted} +} +\value{ +`data` in BAPS format +} +\description{ +Converts an R object from FASTA to BAPS format +} +\examples{ +data <- system.file("extdata", "FASTA_clustering_diploid.fasta", package = "rBAPS") +convert_FASTA_to_BAPS(data) +} +\author{ +Waldir Leoncio +}