Merge branch 'newGetDistances' into dev

2020-07-14 14:52:41 +02:00 · 2020-07-14 14:52:41 +02:00 · 31a61c1661
commit 31a61c1661
parent 1dee767e6e 7a68fbd585
11 changed files with 199 additions and 138 deletions
--- a/1
+++ b/1
@ -17,6 +17,7 @@ export(inputdlg)
 export(isfield)
 export(laskeMuutokset4)
 export(learn_simple_partition)
+export(linkage)
 export(logml2String)
 export(lueGenePopData)
 export(lueNimi)
--- a/R/fix.R
+++ b/R/fix.R
@ -0,0 +1,5 @@
+#' @title Round toward zero
+#' @description Rounds each element of input to the nearest integer towards zero. Basically the same as trunc()
+#' @param X input element
+#' @author Waldir Leoncio
+fix <- function(X) trunc(X)
--- a/R/greedyMix.R
+++ b/R/greedyMix.R
@ -294,14 +294,15 @@ greedyMix <- function(
 	data <- data[, seq_len(ncol(data) - 1)]

 	# ASK: remove?
-	# h0 = findobj('Tag','filename1_text');  inp = get(h0,'String');
-	# h0 = findobj('Tag','filename2_text');
+	# h0 = findobj('Tag','filename1_text')
+	# inp = get(h0,'String');
+	# h0 = findobj('Tag','filename2_text')
 	# outp = get(h0,'String');

-	# changesInLogml <- writeMixtureInfo(
-	# 	logml, rowsFromInd, data, adjprior, priorTerm, outp, inp,
-	# 	popnames, fixedK
-	# ) # TODO translate
+	changesInLogml <- writeMixtureInfo(
+		logml, rowsFromInd, data, adjprior, priorTerm, outp, inp,
+		popnames, fixedK
+	) # FIXMEL depends on get function above

 	# viewMixPartition(PARTITION, popnames) # ASK translate? On graph folder

@ -832,75 +833,6 @@ greedyMix <- function(
 # 	   k = children(~t) - m;
 # 	end

-
-# 	%---------------------------------------------------------------------------------------
-
-# 	function [Z, dist] = newGetDistances(data, rowsFromInd)
-
-# 	ninds = max(data(:,end));
-# 	nloci = size(data,2)-1;
-# 	riviLkm = nchoosek(ninds,2);
-
-# 	empties = find(data<0);
-# 	data(empties)=0;
-# 	data = uint8(data);   % max(noalle) oltava <256
-
-# 	pariTaulu = zeros(riviLkm,2);
-# 	aPointer=1;
-# 	for a=1:ninds-1
-# 		pariTaulu(aPointer:aPointer+ninds-1-a,1) = ones(ninds-a,1)*a;
-# 		pariTaulu(aPointer:aPointer+ninds-1-a,2) = (a+1:ninds)';
-# 		aPointer = aPointer+ninds-a;
-# 	end
-
-# 	eka = pariTaulu(:,ones(1,rowsFromInd));
-# 	eka = eka * rowsFromInd;
-# 	miinus = repmat(rowsFromInd-1 : -1 : 0, [riviLkm 1]);
-# 	eka = eka - miinus;
-
-# 	toka = pariTaulu(:,ones(1,rowsFromInd)*2);
-# 	toka = toka * rowsFromInd;
-# 	toka = toka - miinus;
-
-# 	%eka = uint16(eka);
-# 	%toka = uint16(toka);
-
-# 	summa = zeros(riviLkm,1);
-# 	vertailuja = zeros(riviLkm,1);
-
-# 	clear pariTaulu; clear miinus;
-
-# 	x = zeros(size(eka));    x = uint8(x);
-# 	y = zeros(size(toka));   y = uint8(y);
-
-# 	for j=1:nloci;
-
-# 		for k=1:rowsFromInd
-# 			x(:,k) = data(eka(:,k),j);
-# 			y(:,k) = data(toka(:,k),j);
-# 		end
-
-# 		for a=1:rowsFromInd
-# 			for b=1:rowsFromInd
-# 				vertailutNyt = double(x(:,a)>0 & y(:,b)>0);
-# 				vertailuja = vertailuja + vertailutNyt;
-# 				lisays = (x(:,a)~=y(:,b) & vertailutNyt);
-# 				summa = summa+double(lisays);
-# 			end
-# 		end
-# 	end
-
-# 	clear x;    clear y;   clear vertailutNyt;
-# 	nollat = find(vertailuja==0);
-# 	dist = zeros(length(vertailuja),1);
-# 	dist(nollat) = 1;
-# 	muut = find(vertailuja>0);
-# 	dist(muut) = summa(muut)./vertailuja(muut);
-# 	clear summa; clear vertailuja;
-
-# 	Z = linkage(dist');
-
-
 # 	%----------------------------------------------------------------------------------------


@ -946,60 +878,6 @@ greedyMix <- function(
 # 	%----------------------------------------------------------------------------------------


-# 	function Z = linkage(Y, method)
-# 	[k, n] = size(Y);
-# 	m = (1+sqrt(1+8*n))/2;
-# 	if k ~= 1 | m ~= fix(m)
-# 	  error('The first input has to match the output of the PDIST function in size.');
-# 	end
-# 	if nargin == 1 % set default switch to be 'co'
-# 	   method = 'co';
-# 	end
-# 	method = lower(method(1:2)); % simplify the switch string.
-# 	monotonic = 1;
-# 	Z = zeros(m-1,3); % allocate the output matrix.
-# 	N = zeros(1,2*m-1);
-# 	N(1:m) = 1;
-# 	n = m; % since m is changing, we need to save m in n.
-# 	R = 1:n;
-# 	for s = 1:(n-1)
-# 	   X = Y;
-# 	   [v, k] = min(X);
-# 	   i = floor(m+1/2-sqrt(m^2-m+1/4-2*(k-1)));
-# 	   j = k - (i-1)*(m-i/2)+i;
-# 	   Z(s,:) = [R(i) R(j) v]; % update one more row to the output matrix A
-# 	   I1 = 1:(i-1); I2 = (i+1):(j-1); I3 = (j+1):m; % these are temp variables.
-# 	   U = [I1 I2 I3];
-# 	   I = [I1.*(m-(I1+1)/2)-m+i i*(m-(i+1)/2)-m+I2 i*(m-(i+1)/2)-m+I3];
-# 	   J = [I1.*(m-(I1+1)/2)-m+j I2.*(m-(I2+1)/2)-m+j j*(m-(j+1)/2)-m+I3];
-
-# 	   switch method
-# 	   case 'si' %single linkage
-# 		  Y(I) = min(Y(I),Y(J));
-# 	   case 'av' % average linkage
-# 		  Y(I) = Y(I) + Y(J);
-# 	   case 'co' %complete linkage
-# 		  Y(I) = max(Y(I),Y(J));
-# 	   case 'ce' % centroid linkage
-# 		  K = N(R(i))+N(R(j));
-# 		  Y(I) = (N(R(i)).*Y(I)+N(R(j)).*Y(J)-(N(R(i)).*N(R(j))*v^2)./K)./K;
-# 	   case 'wa'
-# 		  Y(I) = ((N(R(U))+N(R(i))).*Y(I) + (N(R(U))+N(R(j))).*Y(J) - ...
-# 		  N(R(U))*v)./(N(R(i))+N(R(j))+N(R(U)));
-# 	   end
-# 	   J = [J i*(m-(i+1)/2)-m+j];
-# 	   Y(J) = []; % no need for the cluster information about j.
-
-# 	   % update m, N, R
-# 	   m = m-1;
-# 	   N(n+s) = N(R(i)) + N(R(j));
-# 	   R(i) = n+s;
-# 	   R(j:(n-1))=R((j+1):n);
-# 	end
-
-
-# 	%-----------------------------------------------------------------------------------
-
 # 	function logml = ...
 # 		initialCounts(partition, data, npops, rows, noalle, adjprior)

--- a/R/handleData.R
+++ b/R/handleData.R
@ -53,12 +53,12 @@ handleData <- function(raw_data) {
 		)
 	}

-	nind <- max(data[, end])
+	nind <- max(data[, ncol(data)])
 	nrows <- size(data, 1)
 	ncols <- size(data, 2)
 	rowsFromInd <- zeros(nind, 1)
 	for (i in 1:nind) {
-		rowsFromInd[i] <- length(find(data[, end] == i))
+		rowsFromInd[i] <- length(find(data[, ncol(data)] == i))
 	}
 	maxRowsFromInd <- max(rowsFromInd)
 	a <- -999
@ -81,7 +81,7 @@ handleData <- function(raw_data) {
 			repmat(1 / noalle[j], c(noalle[j], 1)),
 			ones(max(noalle) - noalle[j], 1)
 		))
-		priorTerm <- priorTerm + noalle[j] * gammaln(1 / noalle[j])
+		priorTerm <- priorTerm + noalle[j] * lgamma(1 / noalle[j])
 	}
 	out <- list(
 		newData = newData,
--- a/R/linkage.R
+++ b/R/linkage.R
@ -0,0 +1,73 @@
+#' @title Linkage
+#' @description Create hierarchical cluster tree.
+#' @details Z = LINKAGE(Y) creates a hierarchical cluster tree, using the single
+#' linkage algorithm.  The input Y is a distance matrix such as is generated by
+#' PDIST. Y may also be a more general dissimilarity matrix conforming to the
+#' output format of PDIST.
+#' @param Y data
+#' @param method either 'si', 'av', 'co' 'ce' or 'wa'
+#' @export
+linkage <- function(Y, method = 'co') {
+	k <- size(Y)[1]
+	n <- size(Y)[2]
+	m <- (1 + sqrt(1 + 8 * n)) / 2
+	if ((k != 1) | (m != trunc(m))) {
+		stop(
+			'The first input has to match the output',
+			'of the PDIST function in size.'
+		)
+	}
+	method <- tolower(substr(method, 1, 2)) # simplify the switch string.
+	monotonic <- 1
+	Z <- zeros(m - 1, 3) # allocate the output matrix.
+	N <- zeros(1, 2 * m - 1)
+	N[1:m] <- 1
+	n <- m; # since m is changing, we need to save m in n.
+	R <- 1:n
+	for (s in 1:(n-1)) {
+		X <- Y
+		v <- min(X)[1]
+		k <- min(X)[2]
+		i <- floor(m + 1 / 2 - sqrt(m ^ 2 - m + 1 / 4 - 2 * (k - 1)))
+		j <- k - (i - 1) * (m - i / 2) + i
+		Z[s, ] <- c(R[i], R[j], v) # update one more row to the output matrix A
+		# Temp variables
+		I1 <- 1:(i - 1)
+		I2 <- (i + 1):(j - 1)
+		I3 <- (j + 1):m
+
+		U <- c(I1, I2, I3)
+		I <- c(
+			I1 * (m - (I1 + 1) / 2) - m + i,
+			i * (m - (i + 1) / 2) - m + I2,
+			i * (m - (i + 1) / 2) - m + I3
+		)
+		J <- c(
+			I1 * (m - (I1 + 1) / 2) - m + j,
+			I2 * (m - (I2 + 1) / 2) - m + j,
+			j * (m - (j + 1) / 2) - m + I3
+		)
+
+		switch(method,
+			'si' = Y[I] <- min(Y[I], Y[J]), # single linkage
+			'av' = Y[I] <- Y[I] + Y[J], # average linkage
+			'co' = Y[I] <- max(Y[I], Y[J]), #complete linkage
+			'ce' = {
+				K <- N[R[i]] + N[R[j]] # centroid linkage
+				Y[I] <- (N[R[i]] * Y[I] + N[R[j]] * Y[J] -
+					(N[R[i]] * N[R[j]] * v ^ 2) / K) / K
+			},
+			'wa' = Y[I] <- ((N[R[U]] + N[R[i]]) * Y[I] + (N[R[U]] + N[R[j]]) *
+				Y[J] - N[R[U]] * v) / (N[R[i]] + N[R[j]] + N[R[U]])
+		)
+		J <- c(J, i * (m - (i + 1) / 2) - m + j)
+		Y[J] <- vector() # no need for the cluster information about j
+
+		# update m, N, R
+		m <- m - 1
+		N[n + s] <- N[R[i]] + N[R[j]]
+		R[i] <- n + s
+		R[j:(n - 1)] <- R[(j + 1):n]
+	}
+	return(Z)
+}
--- a/R/newGetDistances.R
+++ b/R/newGetDistances.R
@ -0,0 +1,63 @@
+newGetDistances <- function(data, rowsFromInd) {
+	ninds <- max(data[, ncol(data)])
+	nloci <- size(data, 2) - 1
+	riviLkm <- choose(ninds, 2)
+
+	empties <- find(data < 0)
+	data[empties] <- 0
+	data <- as.integer(data) # max(noalle) oltava <256
+
+	pariTaulu <- zeros(riviLkm, 2)
+	aPointer <- 1
+	for (a in (1:ninds) - 1) {
+		pariTaulu[aPointer:(aPointer + ninds - 1 - a), 1] <-
+			ones(ninds - a, 1) * a
+		pariTaulu[aPointer:aPointer + ninds - 1 - a, 2] <- t(a + 1:ninds)
+		aPointer <- aPointer + ninds - a
+	}
+
+	eka <- pariTaulu[, ones(1, rowsFromInd)]
+	eka <- eka * rowsFromInd
+	miinus <- repmat((rowsFromInd - 1):0, c(riviLkm, 1))
+	eka <- eka - miinus
+
+	toka <- pariTaulu[, ones(1, rowsFromInd) * 2]
+	toka <- toka * rowsFromInd
+	toka <- toka - miinus
+
+	summa <- zeros(riviLkm, 1)
+	vertailuja <- zeros(riviLkm, 1)
+
+	rm(pariTaulu, miinus)
+
+	x <- zeros(size(eka))
+	x <- as.integer(x)
+	y <- zeros(size(toka))
+	y <- as.integer(y)
+
+	for (j in 1:nloci) {
+		for (k in 1:rowsFromInd) {
+			x[, k] <- data[eka[, k], j]
+			y[, k] <- data[toka[, k], j]
+		}
+		for (a in 1:rowsFromInd) {
+			for (b in 1:rowsFromInd) {
+				vertailutNyt <- as.double(x[, a] > 0 & y[, b] > 0)
+				vertailuja <- vertailuja + vertailutNyt
+				lisays <- (x[, a] != y[, b] & vertailutNyt)
+				summa <- summa + as.double(lisays)
+			}
+		}
+	}
+
+	rm(x, y, vertailutNyt)
+	nollat <- find(vertailuja == 0)
+	dist <- zeros(length(vertailuja), 1)
+	dist[nollat] <- 1
+	muut <- find(vertailuja > 0)
+	dist[muut] <- summa[muut] / vertailuja[muut]
+	rm(summa, vertailuja)
+
+	Z = linkage(t(dist))
+	return(list(Z = Z, dist = dist))
+}
--- a/R/writeMixtureInfo.R
+++ b/R/writeMixtureInfo.R
@ -14,7 +14,6 @@
 #' @param COUNTS COUNTS
 #' @param SUMCOUNTS SUMCOUNTS
 #' @param LOGDIFF LOGDIFF
-#' @return changesInLogml
 #' @export
 writeMixtureInfo <- function(
 	logml, rowsFromInd, data, adjprior, priorTerm, outPutFile, inputFile, partitionSummary, popnames, fixedK, PARTITION, COUNTS, SUMCOUNTS,
@ -100,7 +99,7 @@ writeMixtureInfo <- function(
 		while (length(text) > 58) {
 			# Take one line and display it.
 			new_line <- takeLine(text, 58)
-			text <- (length(new_line) + 1):end
+			text <- (length(new_line) + 1):length(text)
 			cat(new_line)
 			if (fid != -1) {
 				append(fid, new_line)
@ -228,7 +227,7 @@ writeMixtureInfo <- function(
 					sum(dist2 * log2((dist2 + 10 ^ -10) / (dist1 + 10 ^ -10)))
 				) / nloci
 				div <- (div12 + div21) / 2
-				dist_mat(pop1, pop2) <- div
+				dist_mat[pop1, pop2] <- div
 			}
 		}

--- a/man/fix.Rd
+++ b/man/fix.Rd
@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/fix.R
+\name{fix}
+\alias{fix}
+\title{Round toward zero}
+\usage{
+fix(X)
+}
+\arguments{
+\item{X}{input element}
+}
+\description{
+Rounds each element of input to the nearest integer towards zero. Basically the same as trunc()
+}
+\author{
+Waldir Leoncio
+}
--- a/man/linkage.Rd
+++ b/man/linkage.Rd
@ -0,0 +1,22 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/linkage.R
+\name{linkage}
+\alias{linkage}
+\title{Linkage}
+\usage{
+linkage(Y, method = "co")
+}
+\arguments{
+\item{Y}{data}
+
+\item{method}{either 'si', 'av', 'co' 'ce' or 'wa'}
+}
+\description{
+Create hierarchical cluster tree.
+}
+\details{
+Z = LINKAGE(Y) creates a hierarchical cluster tree, using the single
+linkage algorithm.  The input Y is a distance matrix such as is generated by
+PDIST. Y may also be a more general dissimilarity matrix conforming to the
+output format of PDIST.
+}
--- a/man/writeMixtureInfo.Rd
+++ b/man/writeMixtureInfo.Rd
@ -50,9 +50,6 @@ writeMixtureInfo(

 \item{LOGDIFF}{LOGDIFF}
 }
-\value{
-changesInLogml
-}
 \description{
 Writes information about the mixture
 }
--- a/tests/testthat/test-convertedBaseFunctions.R
+++ b/tests/testthat/test-convertedBaseFunctions.R
@ -189,3 +189,9 @@ test_that("squeeze works as expected", {
 	A[, , 1:3] <- 1:3
 	expect_equal(squeeze(A), matrix(1:3, 3))
 })
+
+test_that("fix works as expected", {
+	X <- matrix(c(-1.9, -3.4, 1.6, 2.5, -4.5, 4.5), 3, byrow=TRUE)
+	Y <- matrix(c(-1, -3, 1, 2, -4, 4), 3, byrow=TRUE)
+	expect_identical(fix(X), Y)
+})