Merge branch 'newGetDistances' into dev

This commit is contained in:
Waldir Leoncio 2020-07-14 14:52:41 +02:00
commit 31a61c1661
11 changed files with 199 additions and 138 deletions

View file

@ -17,6 +17,7 @@ export(inputdlg)
export(isfield)
export(laskeMuutokset4)
export(learn_simple_partition)
export(linkage)
export(logml2String)
export(lueGenePopData)
export(lueNimi)

5
R/fix.R Normal file
View file

@ -0,0 +1,5 @@
#' @title Round toward zero
#' @description Rounds each element of input to the nearest integer towards zero. Basically the same as trunc()
#' @param X input element
#' @author Waldir Leoncio
fix <- function(X) trunc(X)

View file

@ -294,14 +294,15 @@ greedyMix <- function(
data <- data[, seq_len(ncol(data) - 1)]
# ASK: remove?
# h0 = findobj('Tag','filename1_text'); inp = get(h0,'String');
# h0 = findobj('Tag','filename2_text');
# h0 = findobj('Tag','filename1_text')
# inp = get(h0,'String');
# h0 = findobj('Tag','filename2_text')
# outp = get(h0,'String');
# changesInLogml <- writeMixtureInfo(
# logml, rowsFromInd, data, adjprior, priorTerm, outp, inp,
# popnames, fixedK
# ) # TODO translate
changesInLogml <- writeMixtureInfo(
logml, rowsFromInd, data, adjprior, priorTerm, outp, inp,
popnames, fixedK
) # FIXMEL depends on get function above
# viewMixPartition(PARTITION, popnames) # ASK translate? On graph folder
@ -832,75 +833,6 @@ greedyMix <- function(
# k = children(~t) - m;
# end
# %---------------------------------------------------------------------------------------
# function [Z, dist] = newGetDistances(data, rowsFromInd)
# ninds = max(data(:,end));
# nloci = size(data,2)-1;
# riviLkm = nchoosek(ninds,2);
# empties = find(data<0);
# data(empties)=0;
# data = uint8(data); % max(noalle) oltava <256
# pariTaulu = zeros(riviLkm,2);
# aPointer=1;
# for a=1:ninds-1
# pariTaulu(aPointer:aPointer+ninds-1-a,1) = ones(ninds-a,1)*a;
# pariTaulu(aPointer:aPointer+ninds-1-a,2) = (a+1:ninds)';
# aPointer = aPointer+ninds-a;
# end
# eka = pariTaulu(:,ones(1,rowsFromInd));
# eka = eka * rowsFromInd;
# miinus = repmat(rowsFromInd-1 : -1 : 0, [riviLkm 1]);
# eka = eka - miinus;
# toka = pariTaulu(:,ones(1,rowsFromInd)*2);
# toka = toka * rowsFromInd;
# toka = toka - miinus;
# %eka = uint16(eka);
# %toka = uint16(toka);
# summa = zeros(riviLkm,1);
# vertailuja = zeros(riviLkm,1);
# clear pariTaulu; clear miinus;
# x = zeros(size(eka)); x = uint8(x);
# y = zeros(size(toka)); y = uint8(y);
# for j=1:nloci;
# for k=1:rowsFromInd
# x(:,k) = data(eka(:,k),j);
# y(:,k) = data(toka(:,k),j);
# end
# for a=1:rowsFromInd
# for b=1:rowsFromInd
# vertailutNyt = double(x(:,a)>0 & y(:,b)>0);
# vertailuja = vertailuja + vertailutNyt;
# lisays = (x(:,a)~=y(:,b) & vertailutNyt);
# summa = summa+double(lisays);
# end
# end
# end
# clear x; clear y; clear vertailutNyt;
# nollat = find(vertailuja==0);
# dist = zeros(length(vertailuja),1);
# dist(nollat) = 1;
# muut = find(vertailuja>0);
# dist(muut) = summa(muut)./vertailuja(muut);
# clear summa; clear vertailuja;
# Z = linkage(dist');
# %----------------------------------------------------------------------------------------
@ -946,60 +878,6 @@ greedyMix <- function(
# %----------------------------------------------------------------------------------------
# function Z = linkage(Y, method)
# [k, n] = size(Y);
# m = (1+sqrt(1+8*n))/2;
# if k ~= 1 | m ~= fix(m)
# error('The first input has to match the output of the PDIST function in size.');
# end
# if nargin == 1 % set default switch to be 'co'
# method = 'co';
# end
# method = lower(method(1:2)); % simplify the switch string.
# monotonic = 1;
# Z = zeros(m-1,3); % allocate the output matrix.
# N = zeros(1,2*m-1);
# N(1:m) = 1;
# n = m; % since m is changing, we need to save m in n.
# R = 1:n;
# for s = 1:(n-1)
# X = Y;
# [v, k] = min(X);
# i = floor(m+1/2-sqrt(m^2-m+1/4-2*(k-1)));
# j = k - (i-1)*(m-i/2)+i;
# Z(s,:) = [R(i) R(j) v]; % update one more row to the output matrix A
# I1 = 1:(i-1); I2 = (i+1):(j-1); I3 = (j+1):m; % these are temp variables.
# U = [I1 I2 I3];
# I = [I1.*(m-(I1+1)/2)-m+i i*(m-(i+1)/2)-m+I2 i*(m-(i+1)/2)-m+I3];
# J = [I1.*(m-(I1+1)/2)-m+j I2.*(m-(I2+1)/2)-m+j j*(m-(j+1)/2)-m+I3];
# switch method
# case 'si' %single linkage
# Y(I) = min(Y(I),Y(J));
# case 'av' % average linkage
# Y(I) = Y(I) + Y(J);
# case 'co' %complete linkage
# Y(I) = max(Y(I),Y(J));
# case 'ce' % centroid linkage
# K = N(R(i))+N(R(j));
# Y(I) = (N(R(i)).*Y(I)+N(R(j)).*Y(J)-(N(R(i)).*N(R(j))*v^2)./K)./K;
# case 'wa'
# Y(I) = ((N(R(U))+N(R(i))).*Y(I) + (N(R(U))+N(R(j))).*Y(J) - ...
# N(R(U))*v)./(N(R(i))+N(R(j))+N(R(U)));
# end
# J = [J i*(m-(i+1)/2)-m+j];
# Y(J) = []; % no need for the cluster information about j.
# % update m, N, R
# m = m-1;
# N(n+s) = N(R(i)) + N(R(j));
# R(i) = n+s;
# R(j:(n-1))=R((j+1):n);
# end
# %-----------------------------------------------------------------------------------
# function logml = ...
# initialCounts(partition, data, npops, rows, noalle, adjprior)

View file

@ -53,12 +53,12 @@ handleData <- function(raw_data) {
)
}
nind <- max(data[, end])
nind <- max(data[, ncol(data)])
nrows <- size(data, 1)
ncols <- size(data, 2)
rowsFromInd <- zeros(nind, 1)
for (i in 1:nind) {
rowsFromInd[i] <- length(find(data[, end] == i))
rowsFromInd[i] <- length(find(data[, ncol(data)] == i))
}
maxRowsFromInd <- max(rowsFromInd)
a <- -999
@ -81,7 +81,7 @@ handleData <- function(raw_data) {
repmat(1 / noalle[j], c(noalle[j], 1)),
ones(max(noalle) - noalle[j], 1)
))
priorTerm <- priorTerm + noalle[j] * gammaln(1 / noalle[j])
priorTerm <- priorTerm + noalle[j] * lgamma(1 / noalle[j])
}
out <- list(
newData = newData,

73
R/linkage.R Normal file
View file

@ -0,0 +1,73 @@
#' @title Linkage
#' @description Create hierarchical cluster tree.
#' @details Z = LINKAGE(Y) creates a hierarchical cluster tree, using the single
#' linkage algorithm. The input Y is a distance matrix such as is generated by
#' PDIST. Y may also be a more general dissimilarity matrix conforming to the
#' output format of PDIST.
#' @param Y data
#' @param method either 'si', 'av', 'co' 'ce' or 'wa'
#' @export
linkage <- function(Y, method = 'co') {
k <- size(Y)[1]
n <- size(Y)[2]
m <- (1 + sqrt(1 + 8 * n)) / 2
if ((k != 1) | (m != trunc(m))) {
stop(
'The first input has to match the output',
'of the PDIST function in size.'
)
}
method <- tolower(substr(method, 1, 2)) # simplify the switch string.
monotonic <- 1
Z <- zeros(m - 1, 3) # allocate the output matrix.
N <- zeros(1, 2 * m - 1)
N[1:m] <- 1
n <- m; # since m is changing, we need to save m in n.
R <- 1:n
for (s in 1:(n-1)) {
X <- Y
v <- min(X)[1]
k <- min(X)[2]
i <- floor(m + 1 / 2 - sqrt(m ^ 2 - m + 1 / 4 - 2 * (k - 1)))
j <- k - (i - 1) * (m - i / 2) + i
Z[s, ] <- c(R[i], R[j], v) # update one more row to the output matrix A
# Temp variables
I1 <- 1:(i - 1)
I2 <- (i + 1):(j - 1)
I3 <- (j + 1):m
U <- c(I1, I2, I3)
I <- c(
I1 * (m - (I1 + 1) / 2) - m + i,
i * (m - (i + 1) / 2) - m + I2,
i * (m - (i + 1) / 2) - m + I3
)
J <- c(
I1 * (m - (I1 + 1) / 2) - m + j,
I2 * (m - (I2 + 1) / 2) - m + j,
j * (m - (j + 1) / 2) - m + I3
)
switch(method,
'si' = Y[I] <- min(Y[I], Y[J]), # single linkage
'av' = Y[I] <- Y[I] + Y[J], # average linkage
'co' = Y[I] <- max(Y[I], Y[J]), #complete linkage
'ce' = {
K <- N[R[i]] + N[R[j]] # centroid linkage
Y[I] <- (N[R[i]] * Y[I] + N[R[j]] * Y[J] -
(N[R[i]] * N[R[j]] * v ^ 2) / K) / K
},
'wa' = Y[I] <- ((N[R[U]] + N[R[i]]) * Y[I] + (N[R[U]] + N[R[j]]) *
Y[J] - N[R[U]] * v) / (N[R[i]] + N[R[j]] + N[R[U]])
)
J <- c(J, i * (m - (i + 1) / 2) - m + j)
Y[J] <- vector() # no need for the cluster information about j
# update m, N, R
m <- m - 1
N[n + s] <- N[R[i]] + N[R[j]]
R[i] <- n + s
R[j:(n - 1)] <- R[(j + 1):n]
}
return(Z)
}

63
R/newGetDistances.R Normal file
View file

@ -0,0 +1,63 @@
newGetDistances <- function(data, rowsFromInd) {
ninds <- max(data[, ncol(data)])
nloci <- size(data, 2) - 1
riviLkm <- choose(ninds, 2)
empties <- find(data < 0)
data[empties] <- 0
data <- as.integer(data) # max(noalle) oltava <256
pariTaulu <- zeros(riviLkm, 2)
aPointer <- 1
for (a in (1:ninds) - 1) {
pariTaulu[aPointer:(aPointer + ninds - 1 - a), 1] <-
ones(ninds - a, 1) * a
pariTaulu[aPointer:aPointer + ninds - 1 - a, 2] <- t(a + 1:ninds)
aPointer <- aPointer + ninds - a
}
eka <- pariTaulu[, ones(1, rowsFromInd)]
eka <- eka * rowsFromInd
miinus <- repmat((rowsFromInd - 1):0, c(riviLkm, 1))
eka <- eka - miinus
toka <- pariTaulu[, ones(1, rowsFromInd) * 2]
toka <- toka * rowsFromInd
toka <- toka - miinus
summa <- zeros(riviLkm, 1)
vertailuja <- zeros(riviLkm, 1)
rm(pariTaulu, miinus)
x <- zeros(size(eka))
x <- as.integer(x)
y <- zeros(size(toka))
y <- as.integer(y)
for (j in 1:nloci) {
for (k in 1:rowsFromInd) {
x[, k] <- data[eka[, k], j]
y[, k] <- data[toka[, k], j]
}
for (a in 1:rowsFromInd) {
for (b in 1:rowsFromInd) {
vertailutNyt <- as.double(x[, a] > 0 & y[, b] > 0)
vertailuja <- vertailuja + vertailutNyt
lisays <- (x[, a] != y[, b] & vertailutNyt)
summa <- summa + as.double(lisays)
}
}
}
rm(x, y, vertailutNyt)
nollat <- find(vertailuja == 0)
dist <- zeros(length(vertailuja), 1)
dist[nollat] <- 1
muut <- find(vertailuja > 0)
dist[muut] <- summa[muut] / vertailuja[muut]
rm(summa, vertailuja)
Z = linkage(t(dist))
return(list(Z = Z, dist = dist))
}

View file

@ -14,7 +14,6 @@
#' @param COUNTS COUNTS
#' @param SUMCOUNTS SUMCOUNTS
#' @param LOGDIFF LOGDIFF
#' @return changesInLogml
#' @export
writeMixtureInfo <- function(
logml, rowsFromInd, data, adjprior, priorTerm, outPutFile, inputFile, partitionSummary, popnames, fixedK, PARTITION, COUNTS, SUMCOUNTS,
@ -100,7 +99,7 @@ writeMixtureInfo <- function(
while (length(text) > 58) {
# Take one line and display it.
new_line <- takeLine(text, 58)
text <- (length(new_line) + 1):end
text <- (length(new_line) + 1):length(text)
cat(new_line)
if (fid != -1) {
append(fid, new_line)
@ -228,7 +227,7 @@ writeMixtureInfo <- function(
sum(dist2 * log2((dist2 + 10 ^ -10) / (dist1 + 10 ^ -10)))
) / nloci
div <- (div12 + div21) / 2
dist_mat(pop1, pop2) <- div
dist_mat[pop1, pop2] <- div
}
}

17
man/fix.Rd Normal file
View file

@ -0,0 +1,17 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/fix.R
\name{fix}
\alias{fix}
\title{Round toward zero}
\usage{
fix(X)
}
\arguments{
\item{X}{input element}
}
\description{
Rounds each element of input to the nearest integer towards zero. Basically the same as trunc()
}
\author{
Waldir Leoncio
}

22
man/linkage.Rd Normal file
View file

@ -0,0 +1,22 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/linkage.R
\name{linkage}
\alias{linkage}
\title{Linkage}
\usage{
linkage(Y, method = "co")
}
\arguments{
\item{Y}{data}
\item{method}{either 'si', 'av', 'co' 'ce' or 'wa'}
}
\description{
Create hierarchical cluster tree.
}
\details{
Z = LINKAGE(Y) creates a hierarchical cluster tree, using the single
linkage algorithm. The input Y is a distance matrix such as is generated by
PDIST. Y may also be a more general dissimilarity matrix conforming to the
output format of PDIST.
}

View file

@ -50,9 +50,6 @@ writeMixtureInfo(
\item{LOGDIFF}{LOGDIFF}
}
\value{
changesInLogml
}
\description{
Writes information about the mixture
}

View file

@ -189,3 +189,9 @@ test_that("squeeze works as expected", {
A[, , 1:3] <- 1:3
expect_equal(squeeze(A), matrix(1:3, 3))
})
test_that("fix works as expected", {
X <- matrix(c(-1.9, -3.4, 1.6, 2.5, -4.5, 4.5), 3, byrow=TRUE)
Y <- matrix(c(-1, -3, 1, 2, -4, 4), 3, byrow=TRUE)
expect_identical(fix(X), Y)
})