Translated handleData

This commit is contained in:
Waldir Leoncio 2020-07-14 12:25:30 +02:00
parent bb7cfe4b8d
commit e265f738e8
4 changed files with 120 additions and 83 deletions

View file

@ -11,6 +11,7 @@ export(computePersonalAllFreqs)
export(computeRows)
export(etsiParas)
export(greedyMix)
export(handleData)
export(inputdlg)
export(isfield)
export(laskeMuutokset4)

View file

@ -835,89 +835,6 @@ greedyMix <- function(
# %---------------------------------------------------------------------------------------
# function [newData, rowsFromInd, alleleCodes, noalle, adjprior, priorTerm] = ...
# handleData(raw_data)
# % Alkuper<65>isen datan viimeinen sarake kertoo, milt?yksil<69>lt?
# % kyseinen rivi on per<65>isin. Funktio tutkii ensin, ett?montako
# % rivi?maksimissaan on per<65>isin yhdelt?yksil<69>lt? jolloin saadaan
# % tiet<65><74> onko kyseess?haploidi, diploidi jne... T<>m<EFBFBD>n j<>lkeen funktio
# % lis<69><73> tyhji?rivej?niille yksil<69>ille, joilta on per<65>isin v<>hemm<6D>n
# % rivej?kuin maksimim<69><6D>r?
# % Mik<69>li jonkin alleelin koodi on =0, funktio muuttaa t<>m<EFBFBD>n alleelin
# % koodi pienimm<6D>ksi koodiksi, joka isompi kuin mik<69><6B>n k<>yt<79>ss?oleva koodi.
# % T<>m<EFBFBD>n j<>lkeen funktio muuttaa alleelikoodit siten, ett?yhden lokuksen j
# % koodit saavat arvoja v<>lill?1,...,noalle(j).
# data = raw_data;
# nloci=size(raw_data,2)-1;
# dataApu = data(:,1:nloci);
# nollat = find(dataApu==0);
# if ~isempty(nollat)
# isoinAlleeli = max(max(dataApu));
# dataApu(nollat) = isoinAlleeli+1;
# data(:,1:nloci) = dataApu;
# end
# dataApu = []; nollat = []; isoinAlleeli = [];
# noalle=zeros(1,nloci);
# alleelitLokuksessa = cell(nloci,1);
# for i=1:nloci
# alleelitLokuksessaI = unique(data(:,i));
# alleelitLokuksessa{i,1} = alleelitLokuksessaI(find(alleelitLokuksessaI>=0));
# noalle(i) = length(alleelitLokuksessa{i,1});
# end
# alleleCodes = zeros(max(noalle),nloci);
# for i=1:nloci
# alleelitLokuksessaI = alleelitLokuksessa{i,1};
# puuttuvia = max(noalle)-length(alleelitLokuksessaI);
# alleleCodes(:,i) = [alleelitLokuksessaI; zeros(puuttuvia,1)];
# end
# for loc = 1:nloci
# for all = 1:noalle(loc)
# data(find(data(:,loc)==alleleCodes(all,loc)), loc)=all;
# end;
# end;
# nind = max(data(:,end));
# nrows = size(data,1);
# ncols = size(data,2);
# rowsFromInd = zeros(nind,1);
# for i=1:nind
# rowsFromInd(i) = length(find(data(:,end)==i));
# end
# maxRowsFromInd = max(rowsFromInd);
# a = -999;
# emptyRow = repmat(a, 1, ncols);
# lessThanMax = find(rowsFromInd < maxRowsFromInd);
# missingRows = maxRowsFromInd*nind - nrows;
# data = [data; zeros(missingRows, ncols)];
# pointer = 1;
# for ind=lessThanMax' %K<>y l<>pi ne yksil<69>t, joilta puuttuu rivej?
# miss = maxRowsFromInd-rowsFromInd(ind); % T<>lt?yksil<69>lt?puuttuvien lkm.
# for j=1:miss
# rowToBeAdded = emptyRow;
# rowToBeAdded(end) = ind;
# data(nrows+pointer, :) = rowToBeAdded;
# pointer = pointer+1;
# end
# end
# data = sortrows(data, ncols); % Sorttaa yksil<69>iden mukaisesti
# newData = data;
# rowsFromInd = maxRowsFromInd;
# adjprior = zeros(max(noalle),nloci);
# priorTerm = 0;
# for j=1:nloci
# adjprior(:,j) = [repmat(1/noalle(j), [noalle(j),1]) ; ones(max(noalle)-noalle(j),1)];
# priorTerm = priorTerm + noalle(j)*gammaln(1/noalle(j));
# end
# %----------------------------------------------------------------------------------------
# function [Z, dist] = newGetDistances(data, rowsFromInd)
# ninds = max(data(:,end));

95
R/handleData.R Normal file
View file

@ -0,0 +1,95 @@
#' @title Handle Data
#' @param raw_data Raw data
#' @details The last column of the original data tells you from which
#' individual that line is from. The function first examines how many line
#' maximum is from one individual giving know if it is haploid, diploid, etc.
#' After this function. Add blank lines for individuals with fewer rows as
#' maximum. If the code of an allele is = 0, the function changes that allele
#' code to the smallest code that is larger than any code in use. After this,
#' the function changes the allele codes so that one locus j
#' codes get values between? 1, ..., Noah (j).
#' @export
handleData <- function(raw_data) {
# Alkuper?isen datan viimeinen sarake kertoo, milt?yksil?lt?
# kyseinen rivi on per?isin. Funktio tutkii ensin, ett?montako
# rivi?maksimissaan on per?isin yhdelt?yksil?lt? jolloin saadaan
# tiet?? onko kyseess?haploidi, diploidi jne... T?m?n j?lkeen funktio
# lis?? tyhji?rivej?niille yksil?ille, joilta on per?isin v?hemm?n
# rivej?kuin maksimim??r?
# Mik?li jonkin alleelin koodi on =0, funktio muuttaa t?m?n alleelin
# koodi pienimm?ksi koodiksi, joka isompi kuin mik??n k?yt?ss?oleva koodi.
# T?m?n j?lkeen funktio muuttaa alleelikoodit siten, ett?yhden lokuksen j
# koodit saavat arvoja v?lill?1,...,noalle(j).
data <- raw_data
nloci <- size(raw_data, 2) - 1
dataApu <- data[, 1:nloci]
nollat <- find(dataApu==0)
if (!isempty(nollat)) {
isoinAlleeli <- max(max(dataApu))
dataApu[nollat] <- isoinAlleeli + 1
data[, 1:nloci] <- dataApu
}
# dataApu <- []
# nollat <- []
# isoinAlleeli <- []
noalle <- zeros(1, nloci)
alleelitLokuksessa <- cell(nloci, 1)
for (i in 1:nloci) {
alleelitLokuksessaI <- unique(data[, i])
alleelitLokuksessa[i, 1] <- alleelitLokuksessaI[
find(alleelitLokuksessaI >= 0)
]
noalle[i] <- length(alleelitLokuksessa[i, 1])
}
alleleCodes <- zeros(max(noalle), nloci)
for (i in 1:nloci) {
alleelitLokuksessaI <- alleelitLokuksessa[i, 1]
puuttuvia <- max(noalle) - length(alleelitLokuksessaI)
alleleCodes[, i] <- as.matrix(
c(alleelitLokuksessaI, zeros(puuttuvia, 1))
)
}
nind <- max(data[, end])
nrows <- size(data, 1)
ncols <- size(data, 2)
rowsFromInd <- zeros(nind, 1)
for (i in 1:nind) {
rowsFromInd[i] <- length(find(data[, end] == i))
}
maxRowsFromInd <- max(rowsFromInd)
a <- -999
emptyRow <- repmat(a, c(1, ncols))
lessThanMax <- find(rowsFromInd < maxRowsFromInd)
missingRows <- maxRowsFromInd * nind - nrows
data <- as.matrix(c(data, zeros(missingRows, ncols)))
pointer <- 1
for (ind in t(lessThanMax)) { #K?y l?pi ne yksil?t, joilta puuttuu rivej?
miss = maxRowsFromInd-rowsFromInd(ind); # T?lt?yksil?lt?puuttuvien lkm.
}
data <- sortrows(data, ncols) # Sorttaa yksil?iden mukaisesti
newData <- data
rowsFromInd <- maxRowsFromInd
adjprior <- zeros(max(noalle), nloci)
priorTerm <- 0
for (j in 1:nloci) {
adjprior[, j] <- as.matrix(c(
repmat(1 / noalle[j], c(noalle[j], 1)),
ones(max(noalle) - noalle[j], 1)
))
priorTerm <- priorTerm + noalle[j] * gammaln(1 / noalle[j])
}
out <- list(
newData = newData,
rowsFromInd = rowsFromInd,
alleleCodes = alleleCodes,
noalle = noalle,
adjprior = adjprior,
priorTerm = priorTerm
)
return(out)
}

24
man/handleData.Rd Normal file
View file

@ -0,0 +1,24 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/handleData.R
\name{handleData}
\alias{handleData}
\title{Handle Data}
\usage{
handleData(raw_data)
}
\arguments{
\item{raw_data}{Raw data}
}
\description{
Handle Data
}
\details{
The last column of the original data tells you from which
individual that line is from. The function first examines how many line
maximum is from one individual giving know if it is haploid, diploid, etc.
After this function. Add blank lines for individuals with fewer rows as
maximum. If the code of an allele is = 0, the function changes that allele
code to the smallest code that is larger than any code in use. After this,
the function changes the allele codes so that one locus j
codes get values between? 1, ..., Noah (j).
}