Merge branch 'fix-newGetDistances' into import-genepop
This commit is contained in:
commit
6b34dba186
4 changed files with 75 additions and 30 deletions
50
R/linkage.R
50
R/linkage.R
|
|
@ -4,10 +4,14 @@
|
|||
#' linkage algorithm. The input Y is a distance matrix such as is generated by
|
||||
#' PDIST. Y may also be a more general dissimilarity matrix conforming to the
|
||||
#' output format of PDIST.
|
||||
#' @param Y data
|
||||
#'
|
||||
#' Z = linkage(X) returns a matrix Z that encodes a tree containing hierarchical clusters of the rows of the input data matrix X.
|
||||
#' @param Y matrix
|
||||
#' @param method either 'si', 'av', 'co' 'ce' or 'wa'
|
||||
#' @note This is also a base Matlab function. The reason why the source code is also present here is unclear.
|
||||
#' @export
|
||||
linkage <- function(Y, method = 'co') {
|
||||
#TODO: compare R output with MATLAB output
|
||||
k <- size(Y)[1]
|
||||
n <- size(Y)[2]
|
||||
m <- (1 + sqrt(1 + 8 * n)) / 2
|
||||
|
|
@ -24,18 +28,33 @@ linkage <- function(Y, method = 'co') {
|
|||
N[1:m] <- 1
|
||||
n <- m; # since m is changing, we need to save m in n.
|
||||
R <- 1:n
|
||||
for (s in 1:(n-1)) {
|
||||
X <- Y
|
||||
v <- min(X)[1]
|
||||
k <- min(X)[2]
|
||||
for (s in 1:(n - 1)) {
|
||||
X <- as.matrix(as.vector(Y), ncol=1)
|
||||
|
||||
v <- min_MATLAB(X)$mins
|
||||
k <- min_MATLAB(X)$idx
|
||||
|
||||
i <- floor(m + 1 / 2 - sqrt(m ^ 2 - m + 1 / 4 - 2 * (k - 1)))
|
||||
j <- k - (i - 1) * (m - i / 2) + i
|
||||
Z[s, ] <- c(R[i], R[j], v) # update one more row to the output matrix A
|
||||
# Temp variables
|
||||
I1 <- 1:(i - 1)
|
||||
I2 <- (i + 1):(j - 1)
|
||||
I3 <- (j + 1):m
|
||||
|
||||
Z[s, ] <- c(R[i], R[j], v) # update one more row to the output matrix A
|
||||
|
||||
# Temp variables
|
||||
if (i > 1) {
|
||||
I1 <- 1:(i - 1)
|
||||
} else {
|
||||
I1 <- NULL
|
||||
}
|
||||
if (i + 1 <= j - 1) {
|
||||
I2 <- (i + 1):(j - 1)
|
||||
} else {
|
||||
I2 <- NULL
|
||||
}
|
||||
if (j + 1 <= m) {
|
||||
I3 <- (j + 1):m
|
||||
} else {
|
||||
I3 <- NULL
|
||||
}
|
||||
U <- c(I1, I2, I3)
|
||||
I <- c(
|
||||
I1 * (m - (I1 + 1) / 2) - m + i,
|
||||
|
|
@ -47,11 +66,13 @@ linkage <- function(Y, method = 'co') {
|
|||
I2 * (m - (I2 + 1) / 2) - m + j,
|
||||
j * (m - (j + 1) / 2) - m + I3
|
||||
)
|
||||
|
||||
# Workaround in R for negative values in I and J
|
||||
# I <- I[I > 0 & I <= length(Y)]
|
||||
# J <- J[J > 0 & J <= length(Y)]
|
||||
switch(method,
|
||||
'si' = Y[I] <- min(Y[I], Y[J]), # single linkage
|
||||
'si' = Y[I] <- apply(cbind(Y[I], Y[J]), 1, min), # single linkage
|
||||
'av' = Y[I] <- Y[I] + Y[J], # average linkage
|
||||
'co' = Y[I] <- max(Y[I], Y[J]), #complete linkage
|
||||
'co' = Y[I] <- apply(cbind(Y[I], Y[J]), 1, max), #complete linkage
|
||||
'ce' = {
|
||||
K <- N[R[i]] + N[R[j]] # centroid linkage
|
||||
Y[I] <- (N[R[i]] * Y[I] + N[R[j]] * Y[J] -
|
||||
|
|
@ -61,8 +82,7 @@ linkage <- function(Y, method = 'co') {
|
|||
Y[J] - N[R[U]] * v) / (N[R[i]] + N[R[j]] + N[R[U]])
|
||||
)
|
||||
J <- c(J, i * (m - (i + 1) / 2) - m + j)
|
||||
Y[J] <- vector() # no need for the cluster information about j
|
||||
|
||||
Y <- Y[-J] # no need for the cluster information about j
|
||||
# update m, N, R
|
||||
m <- m - 1
|
||||
N[n + s] <- N[R[i]] + N[R[j]]
|
||||
|
|
|
|||
16
R/min.R
Normal file
16
R/min.R
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
#' @title Minimum (MATLAB version)
|
||||
#' @description Finds the minimum value for each column of a matrix, potentially returning the indices instead
|
||||
#' @param X matrix
|
||||
#' @param indices return indices?
|
||||
#' @return Either a list or a vector
|
||||
#' @author Waldir Leoncio
|
||||
#' @export
|
||||
min_MATLAB <- function(X, indices = TRUE) {
|
||||
mins <- apply(X, 2, min)
|
||||
idx <- sapply(seq_len(ncol(X)), function(x) match(mins[x], X[, x]))
|
||||
if (indices) {
|
||||
return(list(mins = mins, idx = idx))
|
||||
} else {
|
||||
return(mins)
|
||||
}
|
||||
}
|
||||
|
|
@ -5,14 +5,14 @@ newGetDistances <- function(data, rowsFromInd) {
|
|||
|
||||
empties <- find(data < 0)
|
||||
data[empties] <- 0
|
||||
data <- as.integer(data) # max(noalle) oltava <256
|
||||
data <- apply(data, 2, as.numeric) # max(noalle) oltava <256
|
||||
|
||||
pariTaulu <- zeros(riviLkm, 2)
|
||||
aPointer <- 1
|
||||
for (a in (1:ninds) - 1) {
|
||||
pariTaulu[aPointer:(aPointer + ninds - 1 - a), 1] <-
|
||||
ones(ninds - a, 1) * a
|
||||
pariTaulu[aPointer:aPointer + ninds - 1 - a, 2] <- t(a + 1:ninds)
|
||||
for (a in 1:(ninds - 1)) {
|
||||
pariTaulu_rows <- aPointer:(aPointer + ninds - 1 - a)
|
||||
pariTaulu[pariTaulu_rows, 1] <- ones(ninds - a, 1) * a
|
||||
pariTaulu[pariTaulu_rows, 2] <- t((a + 1):ninds)
|
||||
aPointer <- aPointer + ninds - a
|
||||
}
|
||||
|
||||
|
|
@ -31,9 +31,9 @@ newGetDistances <- function(data, rowsFromInd) {
|
|||
rm(pariTaulu, miinus)
|
||||
|
||||
x <- zeros(size(eka))
|
||||
x <- as.integer(x)
|
||||
x <- apply(x, 2, as.integer)
|
||||
y <- zeros(size(toka))
|
||||
y <- as.integer(y)
|
||||
y <- apply(y, 2, as.integer)
|
||||
|
||||
for (j in 1:nloci) {
|
||||
for (k in 1:rowsFromInd) {
|
||||
|
|
@ -57,7 +57,6 @@ newGetDistances <- function(data, rowsFromInd) {
|
|||
muut <- find(vertailuja > 0)
|
||||
dist[muut] <- summa[muut] / vertailuja[muut]
|
||||
rm(summa, vertailuja)
|
||||
|
||||
Z = linkage(t(dist))
|
||||
Z <- linkage(t(dist))
|
||||
return(list(Z = Z, dist = dist))
|
||||
}
|
||||
|
|
@ -1,11 +1,21 @@
|
|||
# library(devtools)#TEMP
|
||||
# library(testthat)#TEMP
|
||||
library(testthat)#TEMP
|
||||
# library(rBAPS)#TEMP
|
||||
|
||||
context("Opening files on greedyMix")
|
||||
|
||||
# greedyMix(
|
||||
# tietue = "data/ExamplesDataFormatting/Example baseline data in GENEPOP format for Trained clustering.txt",
|
||||
# format = "GenePop",
|
||||
# savePreProcessed = FALSE
|
||||
# )
|
||||
greedyMix(
|
||||
tietue = "data/ExamplesDataFormatting/Example baseline data in GENEPOP format for Trained clustering.txt",
|
||||
format = "GenePop",
|
||||
savePreProcessed = FALSE
|
||||
)
|
||||
|
||||
context("Linkage")
|
||||
|
||||
test_that("Linkages are properly calculated", {
|
||||
Y <- c(0.5, 0.3, 0.6, 0.3, 0.3, 0.2, 0.3, 0.3, 0.3, 0.5)
|
||||
expect_equal(
|
||||
object = linkage(Y),
|
||||
expected = matrix(c(2, 1, 7, 8, 4, 3, 5, 6, .2, .3, .3, .6), ncol=3)
|
||||
)
|
||||
})
|
||||
Loading…
Add table
Reference in a new issue