Improved conversion from FASTA to BAPS (#24)

This commit is contained in:
Waldir Leoncio 2024-08-09 15:33:03 +02:00
parent ca358ff0fb
commit 471c380ce3
2 changed files with 8 additions and 3 deletions

View file

@ -11,5 +11,6 @@ convert_FASTA_to_BAPS <- function(file) {
data <- load_fasta(file) # Processing data data <- load_fasta(file) # Processing data
data <- cbind(data, seq_len(nrow(data))) # Add IDs of individuals (sequential) data <- cbind(data, seq_len(nrow(data))) # Add IDs of individuals (sequential)
data[data == 0] <- -9 # Because zeros (missing) in BAPS are coded as -9 data[data == 0] <- -9 # Because zeros (missing) in BAPS are coded as -9
colnames(data) <- paste("V", seq_len(ncol(data)), sep = "")
return(data) return(data)
} }

View file

@ -56,11 +56,15 @@ handleData <- function(raw_data, format = "Genepop") {
} }
# This is where data gets converted to {1, 2, 3, 4} for {A, C, G, T} # This is where data gets converted to {1, 2, 3, 4} for {A, C, G, T}
codes <- unique(as.vector(data[, -ncol(data)]))
skip_conversion <- base::min(codes) == -9 && base::max(codes) == 4
if (!skip_conversion) {
for (loc in seq_len(nloci)) { for (loc in seq_len(nloci)) {
for (all in seq_len(noalle[loc])) { for (all in seq_len(noalle[loc])) {
data[matlab2r::find(data[, loc] == alleleCodes[all, loc]), loc] <- all data[matlab2r::find(data[, loc] == alleleCodes[all, loc]), loc] <- all
} }
} }
}
nind <- as.integer(base::max(data[, ncol(data)])) nind <- as.integer(base::max(data[, ncol(data)]))
nrows <- size(data, 1) nrows <- size(data, 1)