From 471c380ce3fd753573d3858159d8a507d51753b7 Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Fri, 9 Aug 2024 15:33:03 +0200 Subject: [PATCH] Improved conversion from FASTA to BAPS (#24) --- R/convert_FASTA_to_BAPS.R | 1 + R/handleData.R | 10 +++++++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/R/convert_FASTA_to_BAPS.R b/R/convert_FASTA_to_BAPS.R index b86d090..c152635 100644 --- a/R/convert_FASTA_to_BAPS.R +++ b/R/convert_FASTA_to_BAPS.R @@ -11,5 +11,6 @@ convert_FASTA_to_BAPS <- function(file) { data <- load_fasta(file) # Processing data data <- cbind(data, seq_len(nrow(data))) # Add IDs of individuals (sequential) data[data == 0] <- -9 # Because zeros (missing) in BAPS are coded as -9 + colnames(data) <- paste("V", seq_len(ncol(data)), sep = "") return(data) } diff --git a/R/handleData.R b/R/handleData.R index 6ac3e8e..fbdd9c9 100644 --- a/R/handleData.R +++ b/R/handleData.R @@ -56,9 +56,13 @@ handleData <- function(raw_data, format = "Genepop") { } # This is where data gets converted to {1, 2, 3, 4} for {A, C, G, T} - for (loc in seq_len(nloci)) { - for (all in seq_len(noalle[loc])) { - data[matlab2r::find(data[, loc] == alleleCodes[all, loc]), loc] <- all + codes <- unique(as.vector(data[, -ncol(data)])) + skip_conversion <- base::min(codes) == -9 && base::max(codes) == 4 + if (!skip_conversion) { + for (loc in seq_len(nloci)) { + for (all in seq_len(noalle[loc])) { + data[matlab2r::find(data[, loc] == alleleCodes[all, loc]), loc] <- all + } } }