diff --git a/NAMESPACE b/NAMESPACE index 70e107a..b7d74df 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -17,6 +17,7 @@ export(inputdlg) export(isfield) export(laskeMuutokset4) export(learn_simple_partition) +export(linkage) export(logml2String) export(lueGenePopData) export(lueNimi) diff --git a/R/fix.R b/R/fix.R new file mode 100644 index 0000000..50ec46e --- /dev/null +++ b/R/fix.R @@ -0,0 +1,5 @@ +#' @title Round toward zero +#' @description Rounds each element of input to the nearest integer towards zero. Basically the same as trunc() +#' @param X input element +#' @author Waldir Leoncio +fix <- function(X) trunc(X) \ No newline at end of file diff --git a/R/greedyMix.R b/R/greedyMix.R index 1939e69..b9e56f3 100644 --- a/R/greedyMix.R +++ b/R/greedyMix.R @@ -294,14 +294,15 @@ greedyMix <- function( data <- data[, seq_len(ncol(data) - 1)] # ASK: remove? - # h0 = findobj('Tag','filename1_text'); inp = get(h0,'String'); - # h0 = findobj('Tag','filename2_text'); + # h0 = findobj('Tag','filename1_text') + # inp = get(h0,'String'); + # h0 = findobj('Tag','filename2_text') # outp = get(h0,'String'); - # changesInLogml <- writeMixtureInfo( - # logml, rowsFromInd, data, adjprior, priorTerm, outp, inp, - # popnames, fixedK - # ) # TODO translate + changesInLogml <- writeMixtureInfo( + logml, rowsFromInd, data, adjprior, priorTerm, outp, inp, + popnames, fixedK + ) # FIXMEL depends on get function above # viewMixPartition(PARTITION, popnames) # ASK translate? On graph folder @@ -832,75 +833,6 @@ greedyMix <- function( # k = children(~t) - m; # end - -# %--------------------------------------------------------------------------------------- - -# function [Z, dist] = newGetDistances(data, rowsFromInd) - -# ninds = max(data(:,end)); -# nloci = size(data,2)-1; -# riviLkm = nchoosek(ninds,2); - -# empties = find(data<0); -# data(empties)=0; -# data = uint8(data); % max(noalle) oltava <256 - -# pariTaulu = zeros(riviLkm,2); -# aPointer=1; -# for a=1:ninds-1 -# pariTaulu(aPointer:aPointer+ninds-1-a,1) = ones(ninds-a,1)*a; -# pariTaulu(aPointer:aPointer+ninds-1-a,2) = (a+1:ninds)'; -# aPointer = aPointer+ninds-a; -# end - -# eka = pariTaulu(:,ones(1,rowsFromInd)); -# eka = eka * rowsFromInd; -# miinus = repmat(rowsFromInd-1 : -1 : 0, [riviLkm 1]); -# eka = eka - miinus; - -# toka = pariTaulu(:,ones(1,rowsFromInd)*2); -# toka = toka * rowsFromInd; -# toka = toka - miinus; - -# %eka = uint16(eka); -# %toka = uint16(toka); - -# summa = zeros(riviLkm,1); -# vertailuja = zeros(riviLkm,1); - -# clear pariTaulu; clear miinus; - -# x = zeros(size(eka)); x = uint8(x); -# y = zeros(size(toka)); y = uint8(y); - -# for j=1:nloci; - -# for k=1:rowsFromInd -# x(:,k) = data(eka(:,k),j); -# y(:,k) = data(toka(:,k),j); -# end - -# for a=1:rowsFromInd -# for b=1:rowsFromInd -# vertailutNyt = double(x(:,a)>0 & y(:,b)>0); -# vertailuja = vertailuja + vertailutNyt; -# lisays = (x(:,a)~=y(:,b) & vertailutNyt); -# summa = summa+double(lisays); -# end -# end -# end - -# clear x; clear y; clear vertailutNyt; -# nollat = find(vertailuja==0); -# dist = zeros(length(vertailuja),1); -# dist(nollat) = 1; -# muut = find(vertailuja>0); -# dist(muut) = summa(muut)./vertailuja(muut); -# clear summa; clear vertailuja; - -# Z = linkage(dist'); - - # %---------------------------------------------------------------------------------------- @@ -946,60 +878,6 @@ greedyMix <- function( # %---------------------------------------------------------------------------------------- -# function Z = linkage(Y, method) -# [k, n] = size(Y); -# m = (1+sqrt(1+8*n))/2; -# if k ~= 1 | m ~= fix(m) -# error('The first input has to match the output of the PDIST function in size.'); -# end -# if nargin == 1 % set default switch to be 'co' -# method = 'co'; -# end -# method = lower(method(1:2)); % simplify the switch string. -# monotonic = 1; -# Z = zeros(m-1,3); % allocate the output matrix. -# N = zeros(1,2*m-1); -# N(1:m) = 1; -# n = m; % since m is changing, we need to save m in n. -# R = 1:n; -# for s = 1:(n-1) -# X = Y; -# [v, k] = min(X); -# i = floor(m+1/2-sqrt(m^2-m+1/4-2*(k-1))); -# j = k - (i-1)*(m-i/2)+i; -# Z(s,:) = [R(i) R(j) v]; % update one more row to the output matrix A -# I1 = 1:(i-1); I2 = (i+1):(j-1); I3 = (j+1):m; % these are temp variables. -# U = [I1 I2 I3]; -# I = [I1.*(m-(I1+1)/2)-m+i i*(m-(i+1)/2)-m+I2 i*(m-(i+1)/2)-m+I3]; -# J = [I1.*(m-(I1+1)/2)-m+j I2.*(m-(I2+1)/2)-m+j j*(m-(j+1)/2)-m+I3]; - -# switch method -# case 'si' %single linkage -# Y(I) = min(Y(I),Y(J)); -# case 'av' % average linkage -# Y(I) = Y(I) + Y(J); -# case 'co' %complete linkage -# Y(I) = max(Y(I),Y(J)); -# case 'ce' % centroid linkage -# K = N(R(i))+N(R(j)); -# Y(I) = (N(R(i)).*Y(I)+N(R(j)).*Y(J)-(N(R(i)).*N(R(j))*v^2)./K)./K; -# case 'wa' -# Y(I) = ((N(R(U))+N(R(i))).*Y(I) + (N(R(U))+N(R(j))).*Y(J) - ... -# N(R(U))*v)./(N(R(i))+N(R(j))+N(R(U))); -# end -# J = [J i*(m-(i+1)/2)-m+j]; -# Y(J) = []; % no need for the cluster information about j. - -# % update m, N, R -# m = m-1; -# N(n+s) = N(R(i)) + N(R(j)); -# R(i) = n+s; -# R(j:(n-1))=R((j+1):n); -# end - - -# %----------------------------------------------------------------------------------- - # function logml = ... # initialCounts(partition, data, npops, rows, noalle, adjprior) diff --git a/R/handleData.R b/R/handleData.R index d6a4611..40d39e7 100644 --- a/R/handleData.R +++ b/R/handleData.R @@ -53,12 +53,12 @@ handleData <- function(raw_data) { ) } - nind <- max(data[, end]) + nind <- max(data[, ncol(data)]) nrows <- size(data, 1) ncols <- size(data, 2) rowsFromInd <- zeros(nind, 1) for (i in 1:nind) { - rowsFromInd[i] <- length(find(data[, end] == i)) + rowsFromInd[i] <- length(find(data[, ncol(data)] == i)) } maxRowsFromInd <- max(rowsFromInd) a <- -999 @@ -81,7 +81,7 @@ handleData <- function(raw_data) { repmat(1 / noalle[j], c(noalle[j], 1)), ones(max(noalle) - noalle[j], 1) )) - priorTerm <- priorTerm + noalle[j] * gammaln(1 / noalle[j]) + priorTerm <- priorTerm + noalle[j] * lgamma(1 / noalle[j]) } out <- list( newData = newData, diff --git a/R/linkage.R b/R/linkage.R new file mode 100644 index 0000000..80d6b30 --- /dev/null +++ b/R/linkage.R @@ -0,0 +1,73 @@ +#' @title Linkage +#' @description Create hierarchical cluster tree. +#' @details Z = LINKAGE(Y) creates a hierarchical cluster tree, using the single +#' linkage algorithm. The input Y is a distance matrix such as is generated by +#' PDIST. Y may also be a more general dissimilarity matrix conforming to the +#' output format of PDIST. +#' @param Y data +#' @param method either 'si', 'av', 'co' 'ce' or 'wa' +#' @export +linkage <- function(Y, method = 'co') { + k <- size(Y)[1] + n <- size(Y)[2] + m <- (1 + sqrt(1 + 8 * n)) / 2 + if ((k != 1) | (m != trunc(m))) { + stop( + 'The first input has to match the output', + 'of the PDIST function in size.' + ) + } + method <- tolower(substr(method, 1, 2)) # simplify the switch string. + monotonic <- 1 + Z <- zeros(m - 1, 3) # allocate the output matrix. + N <- zeros(1, 2 * m - 1) + N[1:m] <- 1 + n <- m; # since m is changing, we need to save m in n. + R <- 1:n + for (s in 1:(n-1)) { + X <- Y + v <- min(X)[1] + k <- min(X)[2] + i <- floor(m + 1 / 2 - sqrt(m ^ 2 - m + 1 / 4 - 2 * (k - 1))) + j <- k - (i - 1) * (m - i / 2) + i + Z[s, ] <- c(R[i], R[j], v) # update one more row to the output matrix A + # Temp variables + I1 <- 1:(i - 1) + I2 <- (i + 1):(j - 1) + I3 <- (j + 1):m + + U <- c(I1, I2, I3) + I <- c( + I1 * (m - (I1 + 1) / 2) - m + i, + i * (m - (i + 1) / 2) - m + I2, + i * (m - (i + 1) / 2) - m + I3 + ) + J <- c( + I1 * (m - (I1 + 1) / 2) - m + j, + I2 * (m - (I2 + 1) / 2) - m + j, + j * (m - (j + 1) / 2) - m + I3 + ) + + switch(method, + 'si' = Y[I] <- min(Y[I], Y[J]), # single linkage + 'av' = Y[I] <- Y[I] + Y[J], # average linkage + 'co' = Y[I] <- max(Y[I], Y[J]), #complete linkage + 'ce' = { + K <- N[R[i]] + N[R[j]] # centroid linkage + Y[I] <- (N[R[i]] * Y[I] + N[R[j]] * Y[J] - + (N[R[i]] * N[R[j]] * v ^ 2) / K) / K + }, + 'wa' = Y[I] <- ((N[R[U]] + N[R[i]]) * Y[I] + (N[R[U]] + N[R[j]]) * + Y[J] - N[R[U]] * v) / (N[R[i]] + N[R[j]] + N[R[U]]) + ) + J <- c(J, i * (m - (i + 1) / 2) - m + j) + Y[J] <- vector() # no need for the cluster information about j + + # update m, N, R + m <- m - 1 + N[n + s] <- N[R[i]] + N[R[j]] + R[i] <- n + s + R[j:(n - 1)] <- R[(j + 1):n] + } + return(Z) +} \ No newline at end of file diff --git a/R/newGetDistances.R b/R/newGetDistances.R new file mode 100644 index 0000000..896b1a3 --- /dev/null +++ b/R/newGetDistances.R @@ -0,0 +1,63 @@ +newGetDistances <- function(data, rowsFromInd) { + ninds <- max(data[, ncol(data)]) + nloci <- size(data, 2) - 1 + riviLkm <- choose(ninds, 2) + + empties <- find(data < 0) + data[empties] <- 0 + data <- as.integer(data) # max(noalle) oltava <256 + + pariTaulu <- zeros(riviLkm, 2) + aPointer <- 1 + for (a in (1:ninds) - 1) { + pariTaulu[aPointer:(aPointer + ninds - 1 - a), 1] <- + ones(ninds - a, 1) * a + pariTaulu[aPointer:aPointer + ninds - 1 - a, 2] <- t(a + 1:ninds) + aPointer <- aPointer + ninds - a + } + + eka <- pariTaulu[, ones(1, rowsFromInd)] + eka <- eka * rowsFromInd + miinus <- repmat((rowsFromInd - 1):0, c(riviLkm, 1)) + eka <- eka - miinus + + toka <- pariTaulu[, ones(1, rowsFromInd) * 2] + toka <- toka * rowsFromInd + toka <- toka - miinus + + summa <- zeros(riviLkm, 1) + vertailuja <- zeros(riviLkm, 1) + + rm(pariTaulu, miinus) + + x <- zeros(size(eka)) + x <- as.integer(x) + y <- zeros(size(toka)) + y <- as.integer(y) + + for (j in 1:nloci) { + for (k in 1:rowsFromInd) { + x[, k] <- data[eka[, k], j] + y[, k] <- data[toka[, k], j] + } + for (a in 1:rowsFromInd) { + for (b in 1:rowsFromInd) { + vertailutNyt <- as.double(x[, a] > 0 & y[, b] > 0) + vertailuja <- vertailuja + vertailutNyt + lisays <- (x[, a] != y[, b] & vertailutNyt) + summa <- summa + as.double(lisays) + } + } + } + + rm(x, y, vertailutNyt) + nollat <- find(vertailuja == 0) + dist <- zeros(length(vertailuja), 1) + dist[nollat] <- 1 + muut <- find(vertailuja > 0) + dist[muut] <- summa[muut] / vertailuja[muut] + rm(summa, vertailuja) + + Z = linkage(t(dist)) + return(list(Z = Z, dist = dist)) +} \ No newline at end of file diff --git a/R/writeMixtureInfo.R b/R/writeMixtureInfo.R index 174781c..ca8efea 100644 --- a/R/writeMixtureInfo.R +++ b/R/writeMixtureInfo.R @@ -14,7 +14,6 @@ #' @param COUNTS COUNTS #' @param SUMCOUNTS SUMCOUNTS #' @param LOGDIFF LOGDIFF -#' @return changesInLogml #' @export writeMixtureInfo <- function( logml, rowsFromInd, data, adjprior, priorTerm, outPutFile, inputFile, partitionSummary, popnames, fixedK, PARTITION, COUNTS, SUMCOUNTS, @@ -100,7 +99,7 @@ writeMixtureInfo <- function( while (length(text) > 58) { # Take one line and display it. new_line <- takeLine(text, 58) - text <- (length(new_line) + 1):end + text <- (length(new_line) + 1):length(text) cat(new_line) if (fid != -1) { append(fid, new_line) @@ -228,7 +227,7 @@ writeMixtureInfo <- function( sum(dist2 * log2((dist2 + 10 ^ -10) / (dist1 + 10 ^ -10))) ) / nloci div <- (div12 + div21) / 2 - dist_mat(pop1, pop2) <- div + dist_mat[pop1, pop2] <- div } } diff --git a/man/fix.Rd b/man/fix.Rd new file mode 100644 index 0000000..91b4678 --- /dev/null +++ b/man/fix.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/fix.R +\name{fix} +\alias{fix} +\title{Round toward zero} +\usage{ +fix(X) +} +\arguments{ +\item{X}{input element} +} +\description{ +Rounds each element of input to the nearest integer towards zero. Basically the same as trunc() +} +\author{ +Waldir Leoncio +} diff --git a/man/linkage.Rd b/man/linkage.Rd new file mode 100644 index 0000000..4f75835 --- /dev/null +++ b/man/linkage.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/linkage.R +\name{linkage} +\alias{linkage} +\title{Linkage} +\usage{ +linkage(Y, method = "co") +} +\arguments{ +\item{Y}{data} + +\item{method}{either 'si', 'av', 'co' 'ce' or 'wa'} +} +\description{ +Create hierarchical cluster tree. +} +\details{ +Z = LINKAGE(Y) creates a hierarchical cluster tree, using the single +linkage algorithm. The input Y is a distance matrix such as is generated by +PDIST. Y may also be a more general dissimilarity matrix conforming to the +output format of PDIST. +} diff --git a/man/writeMixtureInfo.Rd b/man/writeMixtureInfo.Rd index f9a1682..e973db9 100644 --- a/man/writeMixtureInfo.Rd +++ b/man/writeMixtureInfo.Rd @@ -50,9 +50,6 @@ writeMixtureInfo( \item{LOGDIFF}{LOGDIFF} } -\value{ -changesInLogml -} \description{ Writes information about the mixture } diff --git a/tests/testthat/test-convertedBaseFunctions.R b/tests/testthat/test-convertedBaseFunctions.R index 705955e..c903441 100644 --- a/tests/testthat/test-convertedBaseFunctions.R +++ b/tests/testthat/test-convertedBaseFunctions.R @@ -188,4 +188,10 @@ test_that("squeeze works as expected", { A <- array(0, dim = c(1, 1, 3)) A[, , 1:3] <- 1:3 expect_equal(squeeze(A), matrix(1:3, 3)) +}) + +test_that("fix works as expected", { + X <- matrix(c(-1.9, -3.4, 1.6, 2.5, -4.5, 4.5), 3, byrow=TRUE) + Y <- matrix(c(-1, -3, 1, 2, -4, 4), 3, byrow=TRUE) + expect_identical(fix(X), Y) }) \ No newline at end of file