Improved handleData() to handle FASTA (#25)
This commit is contained in:
parent
95d9d658cb
commit
f47e13d3a9
2 changed files with 16 additions and 13 deletions
|
|
@ -1,5 +1,6 @@
|
||||||
#' @title Handle Data
|
#' @title Handle Data
|
||||||
#' @param raw_data Raw data in Genepop or BAPS format
|
#' @param raw_data Raw data in Genepop or BAPS format
|
||||||
|
#' @param format data format
|
||||||
#' @details The last column of the original data tells you from which
|
#' @details The last column of the original data tells you from which
|
||||||
#' individual that line is from. The function first examines how many line
|
#' individual that line is from. The function first examines how many line
|
||||||
#' maximum is from one individual giving know if it is haploid, diploid, etc.
|
#' maximum is from one individual giving know if it is haploid, diploid, etc.
|
||||||
|
|
@ -8,7 +9,7 @@
|
||||||
#' code to the smallest code that is larger than any code in use. After this,
|
#' code to the smallest code that is larger than any code in use. After this,
|
||||||
#' the function changes the allele codes so that one locus j
|
#' the function changes the allele codes so that one locus j
|
||||||
#' codes get values between? 1, ..., noalle(j).
|
#' codes get values between? 1, ..., noalle(j).
|
||||||
handleData <- function(raw_data) {
|
handleData <- function(raw_data, format = "Genepop") {
|
||||||
# Alkuper?isen datan viimeinen sarake kertoo, milt?yksil?lt?
|
# Alkuper?isen datan viimeinen sarake kertoo, milt?yksil?lt?
|
||||||
# kyseinen rivi on per?isin. Funktio tutkii ensin, ett?montako
|
# kyseinen rivi on per?isin. Funktio tutkii ensin, ett?montako
|
||||||
# rivi?maksimissaan on per?isin yhdelt?yksil?lt? jolloin saadaan
|
# rivi?maksimissaan on per?isin yhdelt?yksil?lt? jolloin saadaan
|
||||||
|
|
@ -20,12 +21,16 @@ handleData <- function(raw_data) {
|
||||||
# T?m?n j?lkeen funktio muuttaa alleelikoodit siten, ett?yhden lokuksen j
|
# T?m?n j?lkeen funktio muuttaa alleelikoodit siten, ett?yhden lokuksen j
|
||||||
# koodit saavat arvoja v?lill?1,...,noalle(j).
|
# koodit saavat arvoja v?lill?1,...,noalle(j).
|
||||||
data <- as.matrix(raw_data)
|
data <- as.matrix(raw_data)
|
||||||
nloci <- size(raw_data, 2) - 1
|
if (format %in% c("genepop", "baps")) {
|
||||||
|
nloci <- size(raw_data, 2) - 1
|
||||||
|
} else {
|
||||||
|
nloci <- size(raw_data, 2)
|
||||||
|
}
|
||||||
|
|
||||||
dataApu <- data[, 1:nloci]
|
dataApu <- data[, 1:nloci]
|
||||||
nollat <- matlab2r::find(dataApu == 0)
|
nollat <- matlab2r::find(dataApu == 0)
|
||||||
if (!isempty(nollat)) {
|
if (!isempty(nollat)) {
|
||||||
isoinAlleeli <- base::max(max(dataApu))
|
isoinAlleeli <- base::max(base::max(dataApu))
|
||||||
dataApu[nollat] <- isoinAlleeli + 1
|
dataApu[nollat] <- isoinAlleeli + 1
|
||||||
data[, 1:nloci] <- dataApu
|
data[, 1:nloci] <- dataApu
|
||||||
}
|
}
|
||||||
|
|
@ -35,9 +40,7 @@ handleData <- function(raw_data) {
|
||||||
for (i in 1:nloci) {
|
for (i in 1:nloci) {
|
||||||
alleelitLokuksessaI <- unique(data[, i])
|
alleelitLokuksessaI <- unique(data[, i])
|
||||||
alleelitLokuksessa[[i]] <- sort(alleelitLokuksessaI[
|
alleelitLokuksessa[[i]] <- sort(alleelitLokuksessaI[
|
||||||
matlab2r::find(
|
matlab2r::find(alleelitLokuksessaI >= 0)
|
||||||
alleelitLokuksessaI >= 0
|
|
||||||
)
|
|
||||||
])
|
])
|
||||||
noalle[i] <- length(alleelitLokuksessa[[i]])
|
noalle[i] <- length(alleelitLokuksessa[[i]])
|
||||||
}
|
}
|
||||||
|
|
@ -45,9 +48,7 @@ handleData <- function(raw_data) {
|
||||||
for (i in 1:nloci) {
|
for (i in 1:nloci) {
|
||||||
alleelitLokuksessaI <- alleelitLokuksessa[[i]]
|
alleelitLokuksessaI <- alleelitLokuksessa[[i]]
|
||||||
puuttuvia <- base::max(noalle) - length(alleelitLokuksessaI)
|
puuttuvia <- base::max(noalle) - length(alleelitLokuksessaI)
|
||||||
alleleCodes[, i] <- as.matrix(
|
alleleCodes[, i] <- as.matrix(c(alleelitLokuksessaI, zeros(puuttuvia, 1)))
|
||||||
c(alleelitLokuksessaI, zeros(puuttuvia, 1))
|
|
||||||
)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for (loc in seq_len(nloci)) {
|
for (loc in seq_len(nloci)) {
|
||||||
|
|
@ -56,7 +57,7 @@ handleData <- function(raw_data) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
nind <- base::max(data[, ncol(data)])
|
nind <- as.integer(base::max(data[, ncol(data)]))
|
||||||
nrows <- size(data, 1)
|
nrows <- size(data, 1)
|
||||||
ncols <- size(data, 2)
|
ncols <- size(data, 2)
|
||||||
rowsFromInd <- zeros(nind, 1)
|
rowsFromInd <- zeros(nind, 1)
|
||||||
|
|
@ -67,11 +68,11 @@ handleData <- function(raw_data) {
|
||||||
a <- -999
|
a <- -999
|
||||||
emptyRow <- repmat(a, c(1, ncols))
|
emptyRow <- repmat(a, c(1, ncols))
|
||||||
lessThanMax <- matlab2r::find(rowsFromInd < maxRowsFromInd)
|
lessThanMax <- matlab2r::find(rowsFromInd < maxRowsFromInd)
|
||||||
missingRows <- maxRowsFromInd * nind - nrows
|
missingRows <- max(maxRowsFromInd * nind - nrows, 0L)
|
||||||
data <- rbind(data, zeros(missingRows, ncols))
|
data <- rbind(data, zeros(missingRows, ncols))
|
||||||
pointer <- 1
|
pointer <- 1
|
||||||
for (ind in t(lessThanMax)) { # K?y l?pi ne yksil?t, joilta puuttuu rivej?
|
for (ind in t(lessThanMax)) { # K?y l?pi ne yksil?t, joilta puuttuu rivej?
|
||||||
miss <- maxRowsFromInd - rowsFromInd(ind) # T?lt?yksil?lt?puuttuvien lkm.
|
miss <- maxRowsFromInd - rowsFromInd[ind] # T?lt?yksil?lt?puuttuvien lkm.
|
||||||
}
|
}
|
||||||
data <- sortrows(data, ncols) # Sorttaa yksil?iden mukaisesti
|
data <- sortrows(data, ncols) # Sorttaa yksil?iden mukaisesti
|
||||||
newData <- data
|
newData <- data
|
||||||
|
|
|
||||||
|
|
@ -4,10 +4,12 @@
|
||||||
\alias{handleData}
|
\alias{handleData}
|
||||||
\title{Handle Data}
|
\title{Handle Data}
|
||||||
\usage{
|
\usage{
|
||||||
handleData(raw_data)
|
handleData(raw_data, format = "Genepop")
|
||||||
}
|
}
|
||||||
\arguments{
|
\arguments{
|
||||||
\item{raw_data}{Raw data in Genepop or BAPS format}
|
\item{raw_data}{Raw data in Genepop or BAPS format}
|
||||||
|
|
||||||
|
\item{format}{data format}
|
||||||
}
|
}
|
||||||
\description{
|
\description{
|
||||||
Handle Data
|
Handle Data
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue