Added processing of BAPS format
To be eventually dropped, but useful for comparing output with MATLAB code.
This commit is contained in:
parent
d2ec5894b3
commit
530442487f
4 changed files with 45 additions and 6 deletions
|
|
@ -30,17 +30,17 @@ handleData <- function(raw_data, format = "Genepop") {
|
||||||
"bam" = stop("BAM format not supported for processing yet")
|
"bam" = stop("BAM format not supported for processing yet")
|
||||||
)
|
)
|
||||||
data <- as.matrix(raw_data)
|
data <- as.matrix(raw_data)
|
||||||
dataApu <- data[, 1:nloci]
|
dataApu <- data[, seq_len(nloci)]
|
||||||
nollat <- matlab2r::find(dataApu == 0)
|
nollat <- matlab2r::find(dataApu == 0)
|
||||||
if (!isempty(nollat)) {
|
if (!isempty(nollat)) {
|
||||||
isoinAlleeli <- base::max(base::max(dataApu))
|
isoinAlleeli <- base::max(base::max(dataApu))
|
||||||
dataApu[nollat] <- isoinAlleeli + 1
|
dataApu[nollat] <- isoinAlleeli + 1
|
||||||
data[, 1:nloci] <- dataApu
|
data[, seq_len(nloci)] <- dataApu
|
||||||
}
|
}
|
||||||
|
|
||||||
noalle <- zeros(1, nloci)
|
noalle <- zeros(1, nloci)
|
||||||
alleelitLokuksessa <- cell(nloci, 1, expandable = TRUE)
|
alleelitLokuksessa <- cell(nloci, 1, expandable = TRUE)
|
||||||
for (i in 1:nloci) {
|
for (i in seq_len(nloci)) {
|
||||||
alleelitLokuksessaI <- unique(data[, i])
|
alleelitLokuksessaI <- unique(data[, i])
|
||||||
alleelitLokuksessa[[i]] <- sort(alleelitLokuksessaI[
|
alleelitLokuksessa[[i]] <- sort(alleelitLokuksessaI[
|
||||||
matlab2r::find(alleelitLokuksessaI >= 0)
|
matlab2r::find(alleelitLokuksessaI >= 0)
|
||||||
|
|
@ -48,7 +48,7 @@ handleData <- function(raw_data, format = "Genepop") {
|
||||||
noalle[i] <- length(alleelitLokuksessa[[i]])
|
noalle[i] <- length(alleelitLokuksessa[[i]])
|
||||||
}
|
}
|
||||||
alleleCodes <- zeros(base::max(noalle), nloci)
|
alleleCodes <- zeros(base::max(noalle), nloci)
|
||||||
for (i in 1:nloci) {
|
for (i in seq_len(nloci)) {
|
||||||
alleelitLokuksessaI <- alleelitLokuksessa[[i]]
|
alleelitLokuksessaI <- alleelitLokuksessa[[i]]
|
||||||
puuttuvia <- base::max(noalle) - length(alleelitLokuksessaI)
|
puuttuvia <- base::max(noalle) - length(alleelitLokuksessaI)
|
||||||
alleleCodes[, i] <- as.matrix(c(alleelitLokuksessaI, zeros(puuttuvia, 1)))
|
alleleCodes[, i] <- as.matrix(c(alleelitLokuksessaI, zeros(puuttuvia, 1)))
|
||||||
|
|
@ -83,7 +83,7 @@ handleData <- function(raw_data, format = "Genepop") {
|
||||||
|
|
||||||
adjprior <- zeros(base::max(noalle), nloci)
|
adjprior <- zeros(base::max(noalle), nloci)
|
||||||
priorTerm <- 0
|
priorTerm <- 0
|
||||||
for (j in 1:nloci) {
|
for (j in seq_len(nloci)) {
|
||||||
adjprior[, j] <- as.matrix(c(
|
adjprior[, j] <- as.matrix(c(
|
||||||
repmat(1 / noalle[j], c(noalle[j], 1)),
|
repmat(1 / noalle[j], c(noalle[j], 1)),
|
||||||
ones(base::max(noalle) - noalle[j], 1)
|
ones(base::max(noalle) - noalle[j], 1)
|
||||||
|
|
|
||||||
|
|
@ -42,6 +42,8 @@ importFile <- function(data, format, verbose) {
|
||||||
} else {
|
} else {
|
||||||
out <- adegenet::read.genepop(data)
|
out <- adegenet::read.genepop(data)
|
||||||
}
|
}
|
||||||
|
} else if (format == "baps") {
|
||||||
|
out <- process_BAPS_data(data, NULL)$data
|
||||||
} else {
|
} else {
|
||||||
stop("Format not supported.")
|
stop("Format not supported.")
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -35,7 +35,7 @@ newGetDistances <- function(data, rowsFromInd) {
|
||||||
y <- zeros(size(toka))
|
y <- zeros(size(toka))
|
||||||
y <- apply(y, 2, as.integer)
|
y <- apply(y, 2, as.integer)
|
||||||
|
|
||||||
for (j in 1:nloci) {
|
for (j in seq_len(nloci)) {
|
||||||
for (k in 1:rowsFromInd) {
|
for (k in 1:rowsFromInd) {
|
||||||
x[, k] <- data[eka[, k], j]
|
x[, k] <- data[eka[, k], j]
|
||||||
y[, k] <- data[toka[, k], j]
|
y[, k] <- data[toka[, k], j]
|
||||||
|
|
|
||||||
37
R/process_BAPS_data.R
Normal file
37
R/process_BAPS_data.R
Normal file
|
|
@ -0,0 +1,37 @@
|
||||||
|
process_BAPS_data <- function(file, partitionCompare) {
|
||||||
|
if (!is.null(partitionCompare)) {
|
||||||
|
cat('Data:', file, '\n')
|
||||||
|
}
|
||||||
|
data <- read.table(file)
|
||||||
|
ninds <- testaaOnkoKunnollinenBapsData(data) # for testing purposes?
|
||||||
|
if (ninds == 0) {
|
||||||
|
warning('Incorrect Data-file.')
|
||||||
|
return(NULL)
|
||||||
|
}
|
||||||
|
popnames <- NULL # Dropped specification of population names (from BAPS 6)
|
||||||
|
|
||||||
|
result <- handleData(data, format = "BAPS")
|
||||||
|
data <- result$newData
|
||||||
|
rowsFromInd <- result$rowsFromInd
|
||||||
|
alleleCodes <- result$alleleCodes
|
||||||
|
noalle <- result$noalle
|
||||||
|
adjprior <- result$adjprior
|
||||||
|
priorTerm <- result$priorTerm
|
||||||
|
result <- newGetDistances(data, rowsFromInd)
|
||||||
|
Z <- result$Z
|
||||||
|
dist <- result$dist
|
||||||
|
|
||||||
|
# Forming and saving pre-processed data
|
||||||
|
processed_data <- list(
|
||||||
|
data = data,
|
||||||
|
rowsFromInd = rowsFromInd,
|
||||||
|
alleleCodes = alleleCodes,
|
||||||
|
noalle = noalle,
|
||||||
|
adjprior = adjprior,
|
||||||
|
priorTerm = priorTerm,
|
||||||
|
dist = dist,
|
||||||
|
popnames = popnames,
|
||||||
|
Z = Z
|
||||||
|
)
|
||||||
|
return(processed_data)
|
||||||
|
}
|
||||||
Loading…
Add table
Reference in a new issue