Translated handleData
This commit is contained in:
parent
bb7cfe4b8d
commit
e265f738e8
4 changed files with 120 additions and 83 deletions
|
|
@ -11,6 +11,7 @@ export(computePersonalAllFreqs)
|
|||
export(computeRows)
|
||||
export(etsiParas)
|
||||
export(greedyMix)
|
||||
export(handleData)
|
||||
export(inputdlg)
|
||||
export(isfield)
|
||||
export(laskeMuutokset4)
|
||||
|
|
|
|||
|
|
@ -835,89 +835,6 @@ greedyMix <- function(
|
|||
|
||||
# %---------------------------------------------------------------------------------------
|
||||
|
||||
|
||||
# function [newData, rowsFromInd, alleleCodes, noalle, adjprior, priorTerm] = ...
|
||||
# handleData(raw_data)
|
||||
# % Alkuper<65>isen datan viimeinen sarake kertoo, milt?yksil<69>lt?
|
||||
# % kyseinen rivi on per<65>isin. Funktio tutkii ensin, ett?montako
|
||||
# % rivi?maksimissaan on per<65>isin yhdelt?yksil<69>lt? jolloin saadaan
|
||||
# % tiet<65><74> onko kyseess?haploidi, diploidi jne... T<>m<EFBFBD>n j<>lkeen funktio
|
||||
# % lis<69><73> tyhji?rivej?niille yksil<69>ille, joilta on per<65>isin v<>hemm<6D>n
|
||||
# % rivej?kuin maksimim<69><6D>r?
|
||||
# % Mik<69>li jonkin alleelin koodi on =0, funktio muuttaa t<>m<EFBFBD>n alleelin
|
||||
# % koodi pienimm<6D>ksi koodiksi, joka isompi kuin mik<69><6B>n k<>yt<79>ss?oleva koodi.
|
||||
# % T<>m<EFBFBD>n j<>lkeen funktio muuttaa alleelikoodit siten, ett?yhden lokuksen j
|
||||
# % koodit saavat arvoja v<>lill?1,...,noalle(j).
|
||||
# data = raw_data;
|
||||
# nloci=size(raw_data,2)-1;
|
||||
|
||||
# dataApu = data(:,1:nloci);
|
||||
# nollat = find(dataApu==0);
|
||||
# if ~isempty(nollat)
|
||||
# isoinAlleeli = max(max(dataApu));
|
||||
# dataApu(nollat) = isoinAlleeli+1;
|
||||
# data(:,1:nloci) = dataApu;
|
||||
# end
|
||||
# dataApu = []; nollat = []; isoinAlleeli = [];
|
||||
|
||||
# noalle=zeros(1,nloci);
|
||||
# alleelitLokuksessa = cell(nloci,1);
|
||||
# for i=1:nloci
|
||||
# alleelitLokuksessaI = unique(data(:,i));
|
||||
# alleelitLokuksessa{i,1} = alleelitLokuksessaI(find(alleelitLokuksessaI>=0));
|
||||
# noalle(i) = length(alleelitLokuksessa{i,1});
|
||||
# end
|
||||
# alleleCodes = zeros(max(noalle),nloci);
|
||||
# for i=1:nloci
|
||||
# alleelitLokuksessaI = alleelitLokuksessa{i,1};
|
||||
# puuttuvia = max(noalle)-length(alleelitLokuksessaI);
|
||||
# alleleCodes(:,i) = [alleelitLokuksessaI; zeros(puuttuvia,1)];
|
||||
# end
|
||||
|
||||
# for loc = 1:nloci
|
||||
# for all = 1:noalle(loc)
|
||||
# data(find(data(:,loc)==alleleCodes(all,loc)), loc)=all;
|
||||
# end;
|
||||
# end;
|
||||
|
||||
# nind = max(data(:,end));
|
||||
# nrows = size(data,1);
|
||||
# ncols = size(data,2);
|
||||
# rowsFromInd = zeros(nind,1);
|
||||
# for i=1:nind
|
||||
# rowsFromInd(i) = length(find(data(:,end)==i));
|
||||
# end
|
||||
# maxRowsFromInd = max(rowsFromInd);
|
||||
# a = -999;
|
||||
# emptyRow = repmat(a, 1, ncols);
|
||||
# lessThanMax = find(rowsFromInd < maxRowsFromInd);
|
||||
# missingRows = maxRowsFromInd*nind - nrows;
|
||||
# data = [data; zeros(missingRows, ncols)];
|
||||
# pointer = 1;
|
||||
# for ind=lessThanMax' %K<>y l<>pi ne yksil<69>t, joilta puuttuu rivej?
|
||||
# miss = maxRowsFromInd-rowsFromInd(ind); % T<>lt?yksil<69>lt?puuttuvien lkm.
|
||||
# for j=1:miss
|
||||
# rowToBeAdded = emptyRow;
|
||||
# rowToBeAdded(end) = ind;
|
||||
# data(nrows+pointer, :) = rowToBeAdded;
|
||||
# pointer = pointer+1;
|
||||
# end
|
||||
# end
|
||||
# data = sortrows(data, ncols); % Sorttaa yksil<69>iden mukaisesti
|
||||
# newData = data;
|
||||
# rowsFromInd = maxRowsFromInd;
|
||||
|
||||
# adjprior = zeros(max(noalle),nloci);
|
||||
# priorTerm = 0;
|
||||
# for j=1:nloci
|
||||
# adjprior(:,j) = [repmat(1/noalle(j), [noalle(j),1]) ; ones(max(noalle)-noalle(j),1)];
|
||||
# priorTerm = priorTerm + noalle(j)*gammaln(1/noalle(j));
|
||||
# end
|
||||
|
||||
|
||||
# %----------------------------------------------------------------------------------------
|
||||
|
||||
|
||||
# function [Z, dist] = newGetDistances(data, rowsFromInd)
|
||||
|
||||
# ninds = max(data(:,end));
|
||||
|
|
|
|||
95
R/handleData.R
Normal file
95
R/handleData.R
Normal file
|
|
@ -0,0 +1,95 @@
|
|||
#' @title Handle Data
|
||||
#' @param raw_data Raw data
|
||||
#' @details The last column of the original data tells you from which
|
||||
#' individual that line is from. The function first examines how many line
|
||||
#' maximum is from one individual giving know if it is haploid, diploid, etc.
|
||||
#' After this function. Add blank lines for individuals with fewer rows as
|
||||
#' maximum. If the code of an allele is = 0, the function changes that allele
|
||||
#' code to the smallest code that is larger than any code in use. After this,
|
||||
#' the function changes the allele codes so that one locus j
|
||||
#' codes get values between? 1, ..., Noah (j).
|
||||
#' @export
|
||||
handleData <- function(raw_data) {
|
||||
# Alkuper?isen datan viimeinen sarake kertoo, milt?yksil?lt?
|
||||
# kyseinen rivi on per?isin. Funktio tutkii ensin, ett?montako
|
||||
# rivi?maksimissaan on per?isin yhdelt?yksil?lt? jolloin saadaan
|
||||
# tiet?? onko kyseess?haploidi, diploidi jne... T?m?n j?lkeen funktio
|
||||
# lis?? tyhji?rivej?niille yksil?ille, joilta on per?isin v?hemm?n
|
||||
# rivej?kuin maksimim??r?
|
||||
# Mik?li jonkin alleelin koodi on =0, funktio muuttaa t?m?n alleelin
|
||||
# koodi pienimm?ksi koodiksi, joka isompi kuin mik??n k?yt?ss?oleva koodi.
|
||||
# T?m?n j?lkeen funktio muuttaa alleelikoodit siten, ett?yhden lokuksen j
|
||||
# koodit saavat arvoja v?lill?1,...,noalle(j).
|
||||
|
||||
data <- raw_data
|
||||
nloci <- size(raw_data, 2) - 1
|
||||
|
||||
dataApu <- data[, 1:nloci]
|
||||
nollat <- find(dataApu==0)
|
||||
if (!isempty(nollat)) {
|
||||
isoinAlleeli <- max(max(dataApu))
|
||||
dataApu[nollat] <- isoinAlleeli + 1
|
||||
data[, 1:nloci] <- dataApu
|
||||
}
|
||||
# dataApu <- []
|
||||
# nollat <- []
|
||||
# isoinAlleeli <- []
|
||||
|
||||
noalle <- zeros(1, nloci)
|
||||
alleelitLokuksessa <- cell(nloci, 1)
|
||||
for (i in 1:nloci) {
|
||||
alleelitLokuksessaI <- unique(data[, i])
|
||||
alleelitLokuksessa[i, 1] <- alleelitLokuksessaI[
|
||||
find(alleelitLokuksessaI >= 0)
|
||||
]
|
||||
noalle[i] <- length(alleelitLokuksessa[i, 1])
|
||||
}
|
||||
alleleCodes <- zeros(max(noalle), nloci)
|
||||
for (i in 1:nloci) {
|
||||
alleelitLokuksessaI <- alleelitLokuksessa[i, 1]
|
||||
puuttuvia <- max(noalle) - length(alleelitLokuksessaI)
|
||||
alleleCodes[, i] <- as.matrix(
|
||||
c(alleelitLokuksessaI, zeros(puuttuvia, 1))
|
||||
)
|
||||
}
|
||||
|
||||
nind <- max(data[, end])
|
||||
nrows <- size(data, 1)
|
||||
ncols <- size(data, 2)
|
||||
rowsFromInd <- zeros(nind, 1)
|
||||
for (i in 1:nind) {
|
||||
rowsFromInd[i] <- length(find(data[, end] == i))
|
||||
}
|
||||
maxRowsFromInd <- max(rowsFromInd)
|
||||
a <- -999
|
||||
emptyRow <- repmat(a, c(1, ncols))
|
||||
lessThanMax <- find(rowsFromInd < maxRowsFromInd)
|
||||
missingRows <- maxRowsFromInd * nind - nrows
|
||||
data <- as.matrix(c(data, zeros(missingRows, ncols)))
|
||||
pointer <- 1
|
||||
for (ind in t(lessThanMax)) { #K?y l?pi ne yksil?t, joilta puuttuu rivej?
|
||||
miss = maxRowsFromInd-rowsFromInd(ind); # T?lt?yksil?lt?puuttuvien lkm.
|
||||
}
|
||||
data <- sortrows(data, ncols) # Sorttaa yksil?iden mukaisesti
|
||||
newData <- data
|
||||
rowsFromInd <- maxRowsFromInd
|
||||
|
||||
adjprior <- zeros(max(noalle), nloci)
|
||||
priorTerm <- 0
|
||||
for (j in 1:nloci) {
|
||||
adjprior[, j] <- as.matrix(c(
|
||||
repmat(1 / noalle[j], c(noalle[j], 1)),
|
||||
ones(max(noalle) - noalle[j], 1)
|
||||
))
|
||||
priorTerm <- priorTerm + noalle[j] * gammaln(1 / noalle[j])
|
||||
}
|
||||
out <- list(
|
||||
newData = newData,
|
||||
rowsFromInd = rowsFromInd,
|
||||
alleleCodes = alleleCodes,
|
||||
noalle = noalle,
|
||||
adjprior = adjprior,
|
||||
priorTerm = priorTerm
|
||||
)
|
||||
return(out)
|
||||
}
|
||||
24
man/handleData.Rd
Normal file
24
man/handleData.Rd
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/handleData.R
|
||||
\name{handleData}
|
||||
\alias{handleData}
|
||||
\title{Handle Data}
|
||||
\usage{
|
||||
handleData(raw_data)
|
||||
}
|
||||
\arguments{
|
||||
\item{raw_data}{Raw data}
|
||||
}
|
||||
\description{
|
||||
Handle Data
|
||||
}
|
||||
\details{
|
||||
The last column of the original data tells you from which
|
||||
individual that line is from. The function first examines how many line
|
||||
maximum is from one individual giving know if it is haploid, diploid, etc.
|
||||
After this function. Add blank lines for individuals with fewer rows as
|
||||
maximum. If the code of an allele is = 0, the function changes that allele
|
||||
code to the smallest code that is larger than any code in use. After this,
|
||||
the function changes the allele codes so that one locus j
|
||||
codes get values between? 1, ..., Noah (j).
|
||||
}
|
||||
Loading…
Add table
Reference in a new issue