Added processing of BAPS format

To be eventually dropped, but useful for comparing output with MATLAB code.
This commit is contained in:
Waldir Leoncio 2024-03-25 12:35:31 +01:00
parent d2ec5894b3
commit 530442487f
4 changed files with 45 additions and 6 deletions

View file

@ -30,17 +30,17 @@ handleData <- function(raw_data, format = "Genepop") {
"bam" = stop("BAM format not supported for processing yet")
)
data <- as.matrix(raw_data)
dataApu <- data[, 1:nloci]
dataApu <- data[, seq_len(nloci)]
nollat <- matlab2r::find(dataApu == 0)
if (!isempty(nollat)) {
isoinAlleeli <- base::max(base::max(dataApu))
dataApu[nollat] <- isoinAlleeli + 1
data[, 1:nloci] <- dataApu
data[, seq_len(nloci)] <- dataApu
}
noalle <- zeros(1, nloci)
alleelitLokuksessa <- cell(nloci, 1, expandable = TRUE)
for (i in 1:nloci) {
for (i in seq_len(nloci)) {
alleelitLokuksessaI <- unique(data[, i])
alleelitLokuksessa[[i]] <- sort(alleelitLokuksessaI[
matlab2r::find(alleelitLokuksessaI >= 0)
@ -48,7 +48,7 @@ handleData <- function(raw_data, format = "Genepop") {
noalle[i] <- length(alleelitLokuksessa[[i]])
}
alleleCodes <- zeros(base::max(noalle), nloci)
for (i in 1:nloci) {
for (i in seq_len(nloci)) {
alleelitLokuksessaI <- alleelitLokuksessa[[i]]
puuttuvia <- base::max(noalle) - length(alleelitLokuksessaI)
alleleCodes[, i] <- as.matrix(c(alleelitLokuksessaI, zeros(puuttuvia, 1)))
@ -83,7 +83,7 @@ handleData <- function(raw_data, format = "Genepop") {
adjprior <- zeros(base::max(noalle), nloci)
priorTerm <- 0
for (j in 1:nloci) {
for (j in seq_len(nloci)) {
adjprior[, j] <- as.matrix(c(
repmat(1 / noalle[j], c(noalle[j], 1)),
ones(base::max(noalle) - noalle[j], 1)

View file

@ -42,6 +42,8 @@ importFile <- function(data, format, verbose) {
} else {
out <- adegenet::read.genepop(data)
}
} else if (format == "baps") {
out <- process_BAPS_data(data, NULL)$data
} else {
stop("Format not supported.")
}

View file

@ -35,7 +35,7 @@ newGetDistances <- function(data, rowsFromInd) {
y <- zeros(size(toka))
y <- apply(y, 2, as.integer)
for (j in 1:nloci) {
for (j in seq_len(nloci)) {
for (k in 1:rowsFromInd) {
x[, k] <- data[eka[, k], j]
y[, k] <- data[toka[, k], j]

37
R/process_BAPS_data.R Normal file
View file

@ -0,0 +1,37 @@
process_BAPS_data <- function(file, partitionCompare) {
if (!is.null(partitionCompare)) {
cat('Data:', file, '\n')
}
data <- read.table(file)
ninds <- testaaOnkoKunnollinenBapsData(data) # for testing purposes?
if (ninds == 0) {
warning('Incorrect Data-file.')
return(NULL)
}
popnames <- NULL # Dropped specification of population names (from BAPS 6)
result <- handleData(data, format = "BAPS")
data <- result$newData
rowsFromInd <- result$rowsFromInd
alleleCodes <- result$alleleCodes
noalle <- result$noalle
adjprior <- result$adjprior
priorTerm <- result$priorTerm
result <- newGetDistances(data, rowsFromInd)
Z <- result$Z
dist <- result$dist
# Forming and saving pre-processed data
processed_data <- list(
data = data,
rowsFromInd = rowsFromInd,
alleleCodes = alleleCodes,
noalle = noalle,
adjprior = adjprior,
priorTerm = priorTerm,
dist = dist,
popnames = popnames,
Z = Z
)
return(processed_data)
}