Merge branch 'newGetDistances' into dev
This commit is contained in:
commit
31a61c1661
11 changed files with 199 additions and 138 deletions
|
|
@ -17,6 +17,7 @@ export(inputdlg)
|
|||
export(isfield)
|
||||
export(laskeMuutokset4)
|
||||
export(learn_simple_partition)
|
||||
export(linkage)
|
||||
export(logml2String)
|
||||
export(lueGenePopData)
|
||||
export(lueNimi)
|
||||
|
|
|
|||
5
R/fix.R
Normal file
5
R/fix.R
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
#' @title Round toward zero
|
||||
#' @description Rounds each element of input to the nearest integer towards zero. Basically the same as trunc()
|
||||
#' @param X input element
|
||||
#' @author Waldir Leoncio
|
||||
fix <- function(X) trunc(X)
|
||||
136
R/greedyMix.R
136
R/greedyMix.R
|
|
@ -294,14 +294,15 @@ greedyMix <- function(
|
|||
data <- data[, seq_len(ncol(data) - 1)]
|
||||
|
||||
# ASK: remove?
|
||||
# h0 = findobj('Tag','filename1_text'); inp = get(h0,'String');
|
||||
# h0 = findobj('Tag','filename2_text');
|
||||
# h0 = findobj('Tag','filename1_text')
|
||||
# inp = get(h0,'String');
|
||||
# h0 = findobj('Tag','filename2_text')
|
||||
# outp = get(h0,'String');
|
||||
|
||||
# changesInLogml <- writeMixtureInfo(
|
||||
# logml, rowsFromInd, data, adjprior, priorTerm, outp, inp,
|
||||
# popnames, fixedK
|
||||
# ) # TODO translate
|
||||
changesInLogml <- writeMixtureInfo(
|
||||
logml, rowsFromInd, data, adjprior, priorTerm, outp, inp,
|
||||
popnames, fixedK
|
||||
) # FIXMEL depends on get function above
|
||||
|
||||
# viewMixPartition(PARTITION, popnames) # ASK translate? On graph folder
|
||||
|
||||
|
|
@ -832,75 +833,6 @@ greedyMix <- function(
|
|||
# k = children(~t) - m;
|
||||
# end
|
||||
|
||||
|
||||
# %---------------------------------------------------------------------------------------
|
||||
|
||||
# function [Z, dist] = newGetDistances(data, rowsFromInd)
|
||||
|
||||
# ninds = max(data(:,end));
|
||||
# nloci = size(data,2)-1;
|
||||
# riviLkm = nchoosek(ninds,2);
|
||||
|
||||
# empties = find(data<0);
|
||||
# data(empties)=0;
|
||||
# data = uint8(data); % max(noalle) oltava <256
|
||||
|
||||
# pariTaulu = zeros(riviLkm,2);
|
||||
# aPointer=1;
|
||||
# for a=1:ninds-1
|
||||
# pariTaulu(aPointer:aPointer+ninds-1-a,1) = ones(ninds-a,1)*a;
|
||||
# pariTaulu(aPointer:aPointer+ninds-1-a,2) = (a+1:ninds)';
|
||||
# aPointer = aPointer+ninds-a;
|
||||
# end
|
||||
|
||||
# eka = pariTaulu(:,ones(1,rowsFromInd));
|
||||
# eka = eka * rowsFromInd;
|
||||
# miinus = repmat(rowsFromInd-1 : -1 : 0, [riviLkm 1]);
|
||||
# eka = eka - miinus;
|
||||
|
||||
# toka = pariTaulu(:,ones(1,rowsFromInd)*2);
|
||||
# toka = toka * rowsFromInd;
|
||||
# toka = toka - miinus;
|
||||
|
||||
# %eka = uint16(eka);
|
||||
# %toka = uint16(toka);
|
||||
|
||||
# summa = zeros(riviLkm,1);
|
||||
# vertailuja = zeros(riviLkm,1);
|
||||
|
||||
# clear pariTaulu; clear miinus;
|
||||
|
||||
# x = zeros(size(eka)); x = uint8(x);
|
||||
# y = zeros(size(toka)); y = uint8(y);
|
||||
|
||||
# for j=1:nloci;
|
||||
|
||||
# for k=1:rowsFromInd
|
||||
# x(:,k) = data(eka(:,k),j);
|
||||
# y(:,k) = data(toka(:,k),j);
|
||||
# end
|
||||
|
||||
# for a=1:rowsFromInd
|
||||
# for b=1:rowsFromInd
|
||||
# vertailutNyt = double(x(:,a)>0 & y(:,b)>0);
|
||||
# vertailuja = vertailuja + vertailutNyt;
|
||||
# lisays = (x(:,a)~=y(:,b) & vertailutNyt);
|
||||
# summa = summa+double(lisays);
|
||||
# end
|
||||
# end
|
||||
# end
|
||||
|
||||
# clear x; clear y; clear vertailutNyt;
|
||||
# nollat = find(vertailuja==0);
|
||||
# dist = zeros(length(vertailuja),1);
|
||||
# dist(nollat) = 1;
|
||||
# muut = find(vertailuja>0);
|
||||
# dist(muut) = summa(muut)./vertailuja(muut);
|
||||
# clear summa; clear vertailuja;
|
||||
|
||||
# Z = linkage(dist');
|
||||
|
||||
|
||||
# %----------------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
|
@ -946,60 +878,6 @@ greedyMix <- function(
|
|||
# %----------------------------------------------------------------------------------------
|
||||
|
||||
|
||||
# function Z = linkage(Y, method)
|
||||
# [k, n] = size(Y);
|
||||
# m = (1+sqrt(1+8*n))/2;
|
||||
# if k ~= 1 | m ~= fix(m)
|
||||
# error('The first input has to match the output of the PDIST function in size.');
|
||||
# end
|
||||
# if nargin == 1 % set default switch to be 'co'
|
||||
# method = 'co';
|
||||
# end
|
||||
# method = lower(method(1:2)); % simplify the switch string.
|
||||
# monotonic = 1;
|
||||
# Z = zeros(m-1,3); % allocate the output matrix.
|
||||
# N = zeros(1,2*m-1);
|
||||
# N(1:m) = 1;
|
||||
# n = m; % since m is changing, we need to save m in n.
|
||||
# R = 1:n;
|
||||
# for s = 1:(n-1)
|
||||
# X = Y;
|
||||
# [v, k] = min(X);
|
||||
# i = floor(m+1/2-sqrt(m^2-m+1/4-2*(k-1)));
|
||||
# j = k - (i-1)*(m-i/2)+i;
|
||||
# Z(s,:) = [R(i) R(j) v]; % update one more row to the output matrix A
|
||||
# I1 = 1:(i-1); I2 = (i+1):(j-1); I3 = (j+1):m; % these are temp variables.
|
||||
# U = [I1 I2 I3];
|
||||
# I = [I1.*(m-(I1+1)/2)-m+i i*(m-(i+1)/2)-m+I2 i*(m-(i+1)/2)-m+I3];
|
||||
# J = [I1.*(m-(I1+1)/2)-m+j I2.*(m-(I2+1)/2)-m+j j*(m-(j+1)/2)-m+I3];
|
||||
|
||||
# switch method
|
||||
# case 'si' %single linkage
|
||||
# Y(I) = min(Y(I),Y(J));
|
||||
# case 'av' % average linkage
|
||||
# Y(I) = Y(I) + Y(J);
|
||||
# case 'co' %complete linkage
|
||||
# Y(I) = max(Y(I),Y(J));
|
||||
# case 'ce' % centroid linkage
|
||||
# K = N(R(i))+N(R(j));
|
||||
# Y(I) = (N(R(i)).*Y(I)+N(R(j)).*Y(J)-(N(R(i)).*N(R(j))*v^2)./K)./K;
|
||||
# case 'wa'
|
||||
# Y(I) = ((N(R(U))+N(R(i))).*Y(I) + (N(R(U))+N(R(j))).*Y(J) - ...
|
||||
# N(R(U))*v)./(N(R(i))+N(R(j))+N(R(U)));
|
||||
# end
|
||||
# J = [J i*(m-(i+1)/2)-m+j];
|
||||
# Y(J) = []; % no need for the cluster information about j.
|
||||
|
||||
# % update m, N, R
|
||||
# m = m-1;
|
||||
# N(n+s) = N(R(i)) + N(R(j));
|
||||
# R(i) = n+s;
|
||||
# R(j:(n-1))=R((j+1):n);
|
||||
# end
|
||||
|
||||
|
||||
# %-----------------------------------------------------------------------------------
|
||||
|
||||
# function logml = ...
|
||||
# initialCounts(partition, data, npops, rows, noalle, adjprior)
|
||||
|
||||
|
|
|
|||
|
|
@ -53,12 +53,12 @@ handleData <- function(raw_data) {
|
|||
)
|
||||
}
|
||||
|
||||
nind <- max(data[, end])
|
||||
nind <- max(data[, ncol(data)])
|
||||
nrows <- size(data, 1)
|
||||
ncols <- size(data, 2)
|
||||
rowsFromInd <- zeros(nind, 1)
|
||||
for (i in 1:nind) {
|
||||
rowsFromInd[i] <- length(find(data[, end] == i))
|
||||
rowsFromInd[i] <- length(find(data[, ncol(data)] == i))
|
||||
}
|
||||
maxRowsFromInd <- max(rowsFromInd)
|
||||
a <- -999
|
||||
|
|
@ -81,7 +81,7 @@ handleData <- function(raw_data) {
|
|||
repmat(1 / noalle[j], c(noalle[j], 1)),
|
||||
ones(max(noalle) - noalle[j], 1)
|
||||
))
|
||||
priorTerm <- priorTerm + noalle[j] * gammaln(1 / noalle[j])
|
||||
priorTerm <- priorTerm + noalle[j] * lgamma(1 / noalle[j])
|
||||
}
|
||||
out <- list(
|
||||
newData = newData,
|
||||
|
|
|
|||
73
R/linkage.R
Normal file
73
R/linkage.R
Normal file
|
|
@ -0,0 +1,73 @@
|
|||
#' @title Linkage
|
||||
#' @description Create hierarchical cluster tree.
|
||||
#' @details Z = LINKAGE(Y) creates a hierarchical cluster tree, using the single
|
||||
#' linkage algorithm. The input Y is a distance matrix such as is generated by
|
||||
#' PDIST. Y may also be a more general dissimilarity matrix conforming to the
|
||||
#' output format of PDIST.
|
||||
#' @param Y data
|
||||
#' @param method either 'si', 'av', 'co' 'ce' or 'wa'
|
||||
#' @export
|
||||
linkage <- function(Y, method = 'co') {
|
||||
k <- size(Y)[1]
|
||||
n <- size(Y)[2]
|
||||
m <- (1 + sqrt(1 + 8 * n)) / 2
|
||||
if ((k != 1) | (m != trunc(m))) {
|
||||
stop(
|
||||
'The first input has to match the output',
|
||||
'of the PDIST function in size.'
|
||||
)
|
||||
}
|
||||
method <- tolower(substr(method, 1, 2)) # simplify the switch string.
|
||||
monotonic <- 1
|
||||
Z <- zeros(m - 1, 3) # allocate the output matrix.
|
||||
N <- zeros(1, 2 * m - 1)
|
||||
N[1:m] <- 1
|
||||
n <- m; # since m is changing, we need to save m in n.
|
||||
R <- 1:n
|
||||
for (s in 1:(n-1)) {
|
||||
X <- Y
|
||||
v <- min(X)[1]
|
||||
k <- min(X)[2]
|
||||
i <- floor(m + 1 / 2 - sqrt(m ^ 2 - m + 1 / 4 - 2 * (k - 1)))
|
||||
j <- k - (i - 1) * (m - i / 2) + i
|
||||
Z[s, ] <- c(R[i], R[j], v) # update one more row to the output matrix A
|
||||
# Temp variables
|
||||
I1 <- 1:(i - 1)
|
||||
I2 <- (i + 1):(j - 1)
|
||||
I3 <- (j + 1):m
|
||||
|
||||
U <- c(I1, I2, I3)
|
||||
I <- c(
|
||||
I1 * (m - (I1 + 1) / 2) - m + i,
|
||||
i * (m - (i + 1) / 2) - m + I2,
|
||||
i * (m - (i + 1) / 2) - m + I3
|
||||
)
|
||||
J <- c(
|
||||
I1 * (m - (I1 + 1) / 2) - m + j,
|
||||
I2 * (m - (I2 + 1) / 2) - m + j,
|
||||
j * (m - (j + 1) / 2) - m + I3
|
||||
)
|
||||
|
||||
switch(method,
|
||||
'si' = Y[I] <- min(Y[I], Y[J]), # single linkage
|
||||
'av' = Y[I] <- Y[I] + Y[J], # average linkage
|
||||
'co' = Y[I] <- max(Y[I], Y[J]), #complete linkage
|
||||
'ce' = {
|
||||
K <- N[R[i]] + N[R[j]] # centroid linkage
|
||||
Y[I] <- (N[R[i]] * Y[I] + N[R[j]] * Y[J] -
|
||||
(N[R[i]] * N[R[j]] * v ^ 2) / K) / K
|
||||
},
|
||||
'wa' = Y[I] <- ((N[R[U]] + N[R[i]]) * Y[I] + (N[R[U]] + N[R[j]]) *
|
||||
Y[J] - N[R[U]] * v) / (N[R[i]] + N[R[j]] + N[R[U]])
|
||||
)
|
||||
J <- c(J, i * (m - (i + 1) / 2) - m + j)
|
||||
Y[J] <- vector() # no need for the cluster information about j
|
||||
|
||||
# update m, N, R
|
||||
m <- m - 1
|
||||
N[n + s] <- N[R[i]] + N[R[j]]
|
||||
R[i] <- n + s
|
||||
R[j:(n - 1)] <- R[(j + 1):n]
|
||||
}
|
||||
return(Z)
|
||||
}
|
||||
63
R/newGetDistances.R
Normal file
63
R/newGetDistances.R
Normal file
|
|
@ -0,0 +1,63 @@
|
|||
newGetDistances <- function(data, rowsFromInd) {
|
||||
ninds <- max(data[, ncol(data)])
|
||||
nloci <- size(data, 2) - 1
|
||||
riviLkm <- choose(ninds, 2)
|
||||
|
||||
empties <- find(data < 0)
|
||||
data[empties] <- 0
|
||||
data <- as.integer(data) # max(noalle) oltava <256
|
||||
|
||||
pariTaulu <- zeros(riviLkm, 2)
|
||||
aPointer <- 1
|
||||
for (a in (1:ninds) - 1) {
|
||||
pariTaulu[aPointer:(aPointer + ninds - 1 - a), 1] <-
|
||||
ones(ninds - a, 1) * a
|
||||
pariTaulu[aPointer:aPointer + ninds - 1 - a, 2] <- t(a + 1:ninds)
|
||||
aPointer <- aPointer + ninds - a
|
||||
}
|
||||
|
||||
eka <- pariTaulu[, ones(1, rowsFromInd)]
|
||||
eka <- eka * rowsFromInd
|
||||
miinus <- repmat((rowsFromInd - 1):0, c(riviLkm, 1))
|
||||
eka <- eka - miinus
|
||||
|
||||
toka <- pariTaulu[, ones(1, rowsFromInd) * 2]
|
||||
toka <- toka * rowsFromInd
|
||||
toka <- toka - miinus
|
||||
|
||||
summa <- zeros(riviLkm, 1)
|
||||
vertailuja <- zeros(riviLkm, 1)
|
||||
|
||||
rm(pariTaulu, miinus)
|
||||
|
||||
x <- zeros(size(eka))
|
||||
x <- as.integer(x)
|
||||
y <- zeros(size(toka))
|
||||
y <- as.integer(y)
|
||||
|
||||
for (j in 1:nloci) {
|
||||
for (k in 1:rowsFromInd) {
|
||||
x[, k] <- data[eka[, k], j]
|
||||
y[, k] <- data[toka[, k], j]
|
||||
}
|
||||
for (a in 1:rowsFromInd) {
|
||||
for (b in 1:rowsFromInd) {
|
||||
vertailutNyt <- as.double(x[, a] > 0 & y[, b] > 0)
|
||||
vertailuja <- vertailuja + vertailutNyt
|
||||
lisays <- (x[, a] != y[, b] & vertailutNyt)
|
||||
summa <- summa + as.double(lisays)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
rm(x, y, vertailutNyt)
|
||||
nollat <- find(vertailuja == 0)
|
||||
dist <- zeros(length(vertailuja), 1)
|
||||
dist[nollat] <- 1
|
||||
muut <- find(vertailuja > 0)
|
||||
dist[muut] <- summa[muut] / vertailuja[muut]
|
||||
rm(summa, vertailuja)
|
||||
|
||||
Z = linkage(t(dist))
|
||||
return(list(Z = Z, dist = dist))
|
||||
}
|
||||
|
|
@ -14,7 +14,6 @@
|
|||
#' @param COUNTS COUNTS
|
||||
#' @param SUMCOUNTS SUMCOUNTS
|
||||
#' @param LOGDIFF LOGDIFF
|
||||
#' @return changesInLogml
|
||||
#' @export
|
||||
writeMixtureInfo <- function(
|
||||
logml, rowsFromInd, data, adjprior, priorTerm, outPutFile, inputFile, partitionSummary, popnames, fixedK, PARTITION, COUNTS, SUMCOUNTS,
|
||||
|
|
@ -100,7 +99,7 @@ writeMixtureInfo <- function(
|
|||
while (length(text) > 58) {
|
||||
# Take one line and display it.
|
||||
new_line <- takeLine(text, 58)
|
||||
text <- (length(new_line) + 1):end
|
||||
text <- (length(new_line) + 1):length(text)
|
||||
cat(new_line)
|
||||
if (fid != -1) {
|
||||
append(fid, new_line)
|
||||
|
|
@ -228,7 +227,7 @@ writeMixtureInfo <- function(
|
|||
sum(dist2 * log2((dist2 + 10 ^ -10) / (dist1 + 10 ^ -10)))
|
||||
) / nloci
|
||||
div <- (div12 + div21) / 2
|
||||
dist_mat(pop1, pop2) <- div
|
||||
dist_mat[pop1, pop2] <- div
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
17
man/fix.Rd
Normal file
17
man/fix.Rd
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/fix.R
|
||||
\name{fix}
|
||||
\alias{fix}
|
||||
\title{Round toward zero}
|
||||
\usage{
|
||||
fix(X)
|
||||
}
|
||||
\arguments{
|
||||
\item{X}{input element}
|
||||
}
|
||||
\description{
|
||||
Rounds each element of input to the nearest integer towards zero. Basically the same as trunc()
|
||||
}
|
||||
\author{
|
||||
Waldir Leoncio
|
||||
}
|
||||
22
man/linkage.Rd
Normal file
22
man/linkage.Rd
Normal file
|
|
@ -0,0 +1,22 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/linkage.R
|
||||
\name{linkage}
|
||||
\alias{linkage}
|
||||
\title{Linkage}
|
||||
\usage{
|
||||
linkage(Y, method = "co")
|
||||
}
|
||||
\arguments{
|
||||
\item{Y}{data}
|
||||
|
||||
\item{method}{either 'si', 'av', 'co' 'ce' or 'wa'}
|
||||
}
|
||||
\description{
|
||||
Create hierarchical cluster tree.
|
||||
}
|
||||
\details{
|
||||
Z = LINKAGE(Y) creates a hierarchical cluster tree, using the single
|
||||
linkage algorithm. The input Y is a distance matrix such as is generated by
|
||||
PDIST. Y may also be a more general dissimilarity matrix conforming to the
|
||||
output format of PDIST.
|
||||
}
|
||||
|
|
@ -50,9 +50,6 @@ writeMixtureInfo(
|
|||
|
||||
\item{LOGDIFF}{LOGDIFF}
|
||||
}
|
||||
\value{
|
||||
changesInLogml
|
||||
}
|
||||
\description{
|
||||
Writes information about the mixture
|
||||
}
|
||||
|
|
|
|||
|
|
@ -189,3 +189,9 @@ test_that("squeeze works as expected", {
|
|||
A[, , 1:3] <- 1:3
|
||||
expect_equal(squeeze(A), matrix(1:3, 3))
|
||||
})
|
||||
|
||||
test_that("fix works as expected", {
|
||||
X <- matrix(c(-1.9, -3.4, 1.6, 2.5, -4.5, 4.5), 3, byrow=TRUE)
|
||||
Y <- matrix(c(-1, -3, 1, 2, -4, 4), 3, byrow=TRUE)
|
||||
expect_identical(fix(X), Y)
|
||||
})
|
||||
Loading…
Add table
Reference in a new issue