From 530442487f771fa74867b31444bdfc71144851b3 Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Mon, 25 Mar 2024 12:35:31 +0100 Subject: [PATCH] Added processing of BAPS format To be eventually dropped, but useful for comparing output with MATLAB code. --- R/handleData.R | 10 +++++----- R/importFile.R | 2 ++ R/newGetDistances.R | 2 +- R/process_BAPS_data.R | 37 +++++++++++++++++++++++++++++++++++++ 4 files changed, 45 insertions(+), 6 deletions(-) create mode 100644 R/process_BAPS_data.R diff --git a/R/handleData.R b/R/handleData.R index 6b4a209..8868fc2 100644 --- a/R/handleData.R +++ b/R/handleData.R @@ -30,17 +30,17 @@ handleData <- function(raw_data, format = "Genepop") { "bam" = stop("BAM format not supported for processing yet") ) data <- as.matrix(raw_data) - dataApu <- data[, 1:nloci] + dataApu <- data[, seq_len(nloci)] nollat <- matlab2r::find(dataApu == 0) if (!isempty(nollat)) { isoinAlleeli <- base::max(base::max(dataApu)) dataApu[nollat] <- isoinAlleeli + 1 - data[, 1:nloci] <- dataApu + data[, seq_len(nloci)] <- dataApu } noalle <- zeros(1, nloci) alleelitLokuksessa <- cell(nloci, 1, expandable = TRUE) - for (i in 1:nloci) { + for (i in seq_len(nloci)) { alleelitLokuksessaI <- unique(data[, i]) alleelitLokuksessa[[i]] <- sort(alleelitLokuksessaI[ matlab2r::find(alleelitLokuksessaI >= 0) @@ -48,7 +48,7 @@ handleData <- function(raw_data, format = "Genepop") { noalle[i] <- length(alleelitLokuksessa[[i]]) } alleleCodes <- zeros(base::max(noalle), nloci) - for (i in 1:nloci) { + for (i in seq_len(nloci)) { alleelitLokuksessaI <- alleelitLokuksessa[[i]] puuttuvia <- base::max(noalle) - length(alleelitLokuksessaI) alleleCodes[, i] <- as.matrix(c(alleelitLokuksessaI, zeros(puuttuvia, 1))) @@ -83,7 +83,7 @@ handleData <- function(raw_data, format = "Genepop") { adjprior <- zeros(base::max(noalle), nloci) priorTerm <- 0 - for (j in 1:nloci) { + for (j in seq_len(nloci)) { adjprior[, j] <- as.matrix(c( repmat(1 / noalle[j], c(noalle[j], 1)), ones(base::max(noalle) - noalle[j], 1) diff --git a/R/importFile.R b/R/importFile.R index 2ed9c75..494a6a6 100644 --- a/R/importFile.R +++ b/R/importFile.R @@ -42,6 +42,8 @@ importFile <- function(data, format, verbose) { } else { out <- adegenet::read.genepop(data) } + } else if (format == "baps") { + out <- process_BAPS_data(data, NULL)$data } else { stop("Format not supported.") } diff --git a/R/newGetDistances.R b/R/newGetDistances.R index cf62722..ef12041 100644 --- a/R/newGetDistances.R +++ b/R/newGetDistances.R @@ -35,7 +35,7 @@ newGetDistances <- function(data, rowsFromInd) { y <- zeros(size(toka)) y <- apply(y, 2, as.integer) - for (j in 1:nloci) { + for (j in seq_len(nloci)) { for (k in 1:rowsFromInd) { x[, k] <- data[eka[, k], j] y[, k] <- data[toka[, k], j] diff --git a/R/process_BAPS_data.R b/R/process_BAPS_data.R new file mode 100644 index 0000000..7a76bf6 --- /dev/null +++ b/R/process_BAPS_data.R @@ -0,0 +1,37 @@ +process_BAPS_data <- function(file, partitionCompare) { + if (!is.null(partitionCompare)) { + cat('Data:', file, '\n') + } + data <- read.table(file) + ninds <- testaaOnkoKunnollinenBapsData(data) # for testing purposes? + if (ninds == 0) { + warning('Incorrect Data-file.') + return(NULL) + } + popnames <- NULL # Dropped specification of population names (from BAPS 6) + + result <- handleData(data, format = "BAPS") + data <- result$newData + rowsFromInd <- result$rowsFromInd + alleleCodes <- result$alleleCodes + noalle <- result$noalle + adjprior <- result$adjprior + priorTerm <- result$priorTerm + result <- newGetDistances(data, rowsFromInd) + Z <- result$Z + dist <- result$dist + + # Forming and saving pre-processed data + processed_data <- list( + data = data, + rowsFromInd = rowsFromInd, + alleleCodes = alleleCodes, + noalle = noalle, + adjprior = adjprior, + priorTerm = priorTerm, + dist = dist, + popnames = popnames, + Z = Z + ) + return(processed_data) +}