From e265f738e8ad2309a39919b65e22e0f3f8f5e439 Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Tue, 14 Jul 2020 12:25:30 +0200 Subject: [PATCH] Translated handleData --- NAMESPACE | 1 + R/greedyMix.R | 83 ----------------------------------------- R/handleData.R | 95 +++++++++++++++++++++++++++++++++++++++++++++++ man/handleData.Rd | 24 ++++++++++++ 4 files changed, 120 insertions(+), 83 deletions(-) create mode 100644 R/handleData.R create mode 100644 man/handleData.Rd diff --git a/NAMESPACE b/NAMESPACE index 8516a73..13f1ad8 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -11,6 +11,7 @@ export(computePersonalAllFreqs) export(computeRows) export(etsiParas) export(greedyMix) +export(handleData) export(inputdlg) export(isfield) export(laskeMuutokset4) diff --git a/R/greedyMix.R b/R/greedyMix.R index 7b83b77..16f735f 100644 --- a/R/greedyMix.R +++ b/R/greedyMix.R @@ -835,89 +835,6 @@ greedyMix <- function( # %--------------------------------------------------------------------------------------- - -# function [newData, rowsFromInd, alleleCodes, noalle, adjprior, priorTerm] = ... -# handleData(raw_data) -# % Alkuper�isen datan viimeinen sarake kertoo, milt?yksil�lt? -# % kyseinen rivi on per�isin. Funktio tutkii ensin, ett?montako -# % rivi?maksimissaan on per�isin yhdelt?yksil�lt? jolloin saadaan -# % tiet�� onko kyseess?haploidi, diploidi jne... T�m�n j�lkeen funktio -# % lis�� tyhji?rivej?niille yksil�ille, joilta on per�isin v�hemm�n -# % rivej?kuin maksimim��r? -# % Mik�li jonkin alleelin koodi on =0, funktio muuttaa t�m�n alleelin -# % koodi pienimm�ksi koodiksi, joka isompi kuin mik��n k�yt�ss?oleva koodi. -# % T�m�n j�lkeen funktio muuttaa alleelikoodit siten, ett?yhden lokuksen j -# % koodit saavat arvoja v�lill?1,...,noalle(j). -# data = raw_data; -# nloci=size(raw_data,2)-1; - -# dataApu = data(:,1:nloci); -# nollat = find(dataApu==0); -# if ~isempty(nollat) -# isoinAlleeli = max(max(dataApu)); -# dataApu(nollat) = isoinAlleeli+1; -# data(:,1:nloci) = dataApu; -# end -# dataApu = []; nollat = []; isoinAlleeli = []; - -# noalle=zeros(1,nloci); -# alleelitLokuksessa = cell(nloci,1); -# for i=1:nloci -# alleelitLokuksessaI = unique(data(:,i)); -# alleelitLokuksessa{i,1} = alleelitLokuksessaI(find(alleelitLokuksessaI>=0)); -# noalle(i) = length(alleelitLokuksessa{i,1}); -# end -# alleleCodes = zeros(max(noalle),nloci); -# for i=1:nloci -# alleelitLokuksessaI = alleelitLokuksessa{i,1}; -# puuttuvia = max(noalle)-length(alleelitLokuksessaI); -# alleleCodes(:,i) = [alleelitLokuksessaI; zeros(puuttuvia,1)]; -# end - -# for loc = 1:nloci -# for all = 1:noalle(loc) -# data(find(data(:,loc)==alleleCodes(all,loc)), loc)=all; -# end; -# end; - -# nind = max(data(:,end)); -# nrows = size(data,1); -# ncols = size(data,2); -# rowsFromInd = zeros(nind,1); -# for i=1:nind -# rowsFromInd(i) = length(find(data(:,end)==i)); -# end -# maxRowsFromInd = max(rowsFromInd); -# a = -999; -# emptyRow = repmat(a, 1, ncols); -# lessThanMax = find(rowsFromInd < maxRowsFromInd); -# missingRows = maxRowsFromInd*nind - nrows; -# data = [data; zeros(missingRows, ncols)]; -# pointer = 1; -# for ind=lessThanMax' %K�y l�pi ne yksil�t, joilta puuttuu rivej? -# miss = maxRowsFromInd-rowsFromInd(ind); % T�lt?yksil�lt?puuttuvien lkm. -# for j=1:miss -# rowToBeAdded = emptyRow; -# rowToBeAdded(end) = ind; -# data(nrows+pointer, :) = rowToBeAdded; -# pointer = pointer+1; -# end -# end -# data = sortrows(data, ncols); % Sorttaa yksil�iden mukaisesti -# newData = data; -# rowsFromInd = maxRowsFromInd; - -# adjprior = zeros(max(noalle),nloci); -# priorTerm = 0; -# for j=1:nloci -# adjprior(:,j) = [repmat(1/noalle(j), [noalle(j),1]) ; ones(max(noalle)-noalle(j),1)]; -# priorTerm = priorTerm + noalle(j)*gammaln(1/noalle(j)); -# end - - -# %---------------------------------------------------------------------------------------- - - # function [Z, dist] = newGetDistances(data, rowsFromInd) # ninds = max(data(:,end)); diff --git a/R/handleData.R b/R/handleData.R new file mode 100644 index 0000000..d6a4611 --- /dev/null +++ b/R/handleData.R @@ -0,0 +1,95 @@ +#' @title Handle Data +#' @param raw_data Raw data +#' @details The last column of the original data tells you from which +#' individual that line is from. The function first examines how many line +#' maximum is from one individual giving know if it is haploid, diploid, etc. +#' After this function. Add blank lines for individuals with fewer rows as +#' maximum. If the code of an allele is = 0, the function changes that allele +#' code to the smallest code that is larger than any code in use. After this, +#' the function changes the allele codes so that one locus j +#' codes get values between? 1, ..., Noah (j). +#' @export +handleData <- function(raw_data) { + # Alkuper?isen datan viimeinen sarake kertoo, milt?yksil?lt? + # kyseinen rivi on per?isin. Funktio tutkii ensin, ett?montako + # rivi?maksimissaan on per?isin yhdelt?yksil?lt? jolloin saadaan + # tiet?? onko kyseess?haploidi, diploidi jne... T?m?n j?lkeen funktio + # lis?? tyhji?rivej?niille yksil?ille, joilta on per?isin v?hemm?n + # rivej?kuin maksimim??r? + # Mik?li jonkin alleelin koodi on =0, funktio muuttaa t?m?n alleelin + # koodi pienimm?ksi koodiksi, joka isompi kuin mik??n k?yt?ss?oleva koodi. + # T?m?n j?lkeen funktio muuttaa alleelikoodit siten, ett?yhden lokuksen j + # koodit saavat arvoja v?lill?1,...,noalle(j). + + data <- raw_data + nloci <- size(raw_data, 2) - 1 + + dataApu <- data[, 1:nloci] + nollat <- find(dataApu==0) + if (!isempty(nollat)) { + isoinAlleeli <- max(max(dataApu)) + dataApu[nollat] <- isoinAlleeli + 1 + data[, 1:nloci] <- dataApu + } + # dataApu <- [] + # nollat <- [] + # isoinAlleeli <- [] + + noalle <- zeros(1, nloci) + alleelitLokuksessa <- cell(nloci, 1) + for (i in 1:nloci) { + alleelitLokuksessaI <- unique(data[, i]) + alleelitLokuksessa[i, 1] <- alleelitLokuksessaI[ + find(alleelitLokuksessaI >= 0) + ] + noalle[i] <- length(alleelitLokuksessa[i, 1]) + } + alleleCodes <- zeros(max(noalle), nloci) + for (i in 1:nloci) { + alleelitLokuksessaI <- alleelitLokuksessa[i, 1] + puuttuvia <- max(noalle) - length(alleelitLokuksessaI) + alleleCodes[, i] <- as.matrix( + c(alleelitLokuksessaI, zeros(puuttuvia, 1)) + ) + } + + nind <- max(data[, end]) + nrows <- size(data, 1) + ncols <- size(data, 2) + rowsFromInd <- zeros(nind, 1) + for (i in 1:nind) { + rowsFromInd[i] <- length(find(data[, end] == i)) + } + maxRowsFromInd <- max(rowsFromInd) + a <- -999 + emptyRow <- repmat(a, c(1, ncols)) + lessThanMax <- find(rowsFromInd < maxRowsFromInd) + missingRows <- maxRowsFromInd * nind - nrows + data <- as.matrix(c(data, zeros(missingRows, ncols))) + pointer <- 1 + for (ind in t(lessThanMax)) { #K?y l?pi ne yksil?t, joilta puuttuu rivej? + miss = maxRowsFromInd-rowsFromInd(ind); # T?lt?yksil?lt?puuttuvien lkm. + } + data <- sortrows(data, ncols) # Sorttaa yksil?iden mukaisesti + newData <- data + rowsFromInd <- maxRowsFromInd + + adjprior <- zeros(max(noalle), nloci) + priorTerm <- 0 + for (j in 1:nloci) { + adjprior[, j] <- as.matrix(c( + repmat(1 / noalle[j], c(noalle[j], 1)), + ones(max(noalle) - noalle[j], 1) + )) + priorTerm <- priorTerm + noalle[j] * gammaln(1 / noalle[j]) + } + out <- list( + newData = newData, + rowsFromInd = rowsFromInd, + alleleCodes = alleleCodes, + noalle = noalle, + adjprior = adjprior, + priorTerm = priorTerm + ) + return(out) +} \ No newline at end of file diff --git a/man/handleData.Rd b/man/handleData.Rd new file mode 100644 index 0000000..527cc32 --- /dev/null +++ b/man/handleData.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/handleData.R +\name{handleData} +\alias{handleData} +\title{Handle Data} +\usage{ +handleData(raw_data) +} +\arguments{ +\item{raw_data}{Raw data} +} +\description{ +Handle Data +} +\details{ +The last column of the original data tells you from which +individual that line is from. The function first examines how many line +maximum is from one individual giving know if it is haploid, diploid, etc. +After this function. Add blank lines for individuals with fewer rows as +maximum. If the code of an allele is = 0, the function changes that allele +code to the smallest code that is larger than any code in use. After this, +the function changes the allele codes so that one locus j +codes get values between? 1, ..., Noah (j). +}