Fixed linkage()
This commit is contained in:
parent
debe7463cc
commit
3018427237
1 changed files with 35 additions and 15 deletions
50
R/linkage.R
50
R/linkage.R
|
|
@ -4,10 +4,14 @@
|
||||||
#' linkage algorithm. The input Y is a distance matrix such as is generated by
|
#' linkage algorithm. The input Y is a distance matrix such as is generated by
|
||||||
#' PDIST. Y may also be a more general dissimilarity matrix conforming to the
|
#' PDIST. Y may also be a more general dissimilarity matrix conforming to the
|
||||||
#' output format of PDIST.
|
#' output format of PDIST.
|
||||||
#' @param Y data
|
#'
|
||||||
|
#' Z = linkage(X) returns a matrix Z that encodes a tree containing hierarchical clusters of the rows of the input data matrix X.
|
||||||
|
#' @param Y matrix
|
||||||
#' @param method either 'si', 'av', 'co' 'ce' or 'wa'
|
#' @param method either 'si', 'av', 'co' 'ce' or 'wa'
|
||||||
|
#' @note This is also a base Matlab function. The reason why the source code is also present here is unclear.
|
||||||
#' @export
|
#' @export
|
||||||
linkage <- function(Y, method = 'co') {
|
linkage <- function(Y, method = 'co') {
|
||||||
|
#TODO: compare R output with MATLAB output
|
||||||
k <- size(Y)[1]
|
k <- size(Y)[1]
|
||||||
n <- size(Y)[2]
|
n <- size(Y)[2]
|
||||||
m <- (1 + sqrt(1 + 8 * n)) / 2
|
m <- (1 + sqrt(1 + 8 * n)) / 2
|
||||||
|
|
@ -24,18 +28,33 @@ linkage <- function(Y, method = 'co') {
|
||||||
N[1:m] <- 1
|
N[1:m] <- 1
|
||||||
n <- m; # since m is changing, we need to save m in n.
|
n <- m; # since m is changing, we need to save m in n.
|
||||||
R <- 1:n
|
R <- 1:n
|
||||||
for (s in 1:(n-1)) {
|
for (s in 1:(n - 1)) {
|
||||||
X <- Y
|
X <- as.matrix(as.vector(Y), ncol=1)
|
||||||
v <- min(X)[1]
|
|
||||||
k <- min(X)[2]
|
v <- min_MATLAB(X)$mins
|
||||||
|
k <- min_MATLAB(X)$idx
|
||||||
|
|
||||||
i <- floor(m + 1 / 2 - sqrt(m ^ 2 - m + 1 / 4 - 2 * (k - 1)))
|
i <- floor(m + 1 / 2 - sqrt(m ^ 2 - m + 1 / 4 - 2 * (k - 1)))
|
||||||
j <- k - (i - 1) * (m - i / 2) + i
|
j <- k - (i - 1) * (m - i / 2) + i
|
||||||
Z[s, ] <- c(R[i], R[j], v) # update one more row to the output matrix A
|
|
||||||
# Temp variables
|
|
||||||
I1 <- 1:(i - 1)
|
|
||||||
I2 <- (i + 1):(j - 1)
|
|
||||||
I3 <- (j + 1):m
|
|
||||||
|
|
||||||
|
Z[s, ] <- c(R[i], R[j], v) # update one more row to the output matrix A
|
||||||
|
|
||||||
|
# Temp variables
|
||||||
|
if (i > 1) {
|
||||||
|
I1 <- 1:(i - 1)
|
||||||
|
} else {
|
||||||
|
I1 <- NULL
|
||||||
|
}
|
||||||
|
if (i + 1 <= j - 1) {
|
||||||
|
I2 <- (i + 1):(j - 1)
|
||||||
|
} else {
|
||||||
|
I2 <- NULL
|
||||||
|
}
|
||||||
|
if (j + 1 <= m) {
|
||||||
|
I3 <- (j + 1):m
|
||||||
|
} else {
|
||||||
|
I3 <- NULL
|
||||||
|
}
|
||||||
U <- c(I1, I2, I3)
|
U <- c(I1, I2, I3)
|
||||||
I <- c(
|
I <- c(
|
||||||
I1 * (m - (I1 + 1) / 2) - m + i,
|
I1 * (m - (I1 + 1) / 2) - m + i,
|
||||||
|
|
@ -47,11 +66,13 @@ linkage <- function(Y, method = 'co') {
|
||||||
I2 * (m - (I2 + 1) / 2) - m + j,
|
I2 * (m - (I2 + 1) / 2) - m + j,
|
||||||
j * (m - (j + 1) / 2) - m + I3
|
j * (m - (j + 1) / 2) - m + I3
|
||||||
)
|
)
|
||||||
|
# Workaround in R for negative values in I and J
|
||||||
|
# I <- I[I > 0 & I <= length(Y)]
|
||||||
|
# J <- J[J > 0 & J <= length(Y)]
|
||||||
switch(method,
|
switch(method,
|
||||||
'si' = Y[I] <- min(Y[I], Y[J]), # single linkage
|
'si' = Y[I] <- apply(cbind(Y[I], Y[J]), 1, min), # single linkage
|
||||||
'av' = Y[I] <- Y[I] + Y[J], # average linkage
|
'av' = Y[I] <- Y[I] + Y[J], # average linkage
|
||||||
'co' = Y[I] <- max(Y[I], Y[J]), #complete linkage
|
'co' = Y[I] <- apply(cbind(Y[I], Y[J]), 1, max), #complete linkage
|
||||||
'ce' = {
|
'ce' = {
|
||||||
K <- N[R[i]] + N[R[j]] # centroid linkage
|
K <- N[R[i]] + N[R[j]] # centroid linkage
|
||||||
Y[I] <- (N[R[i]] * Y[I] + N[R[j]] * Y[J] -
|
Y[I] <- (N[R[i]] * Y[I] + N[R[j]] * Y[J] -
|
||||||
|
|
@ -61,8 +82,7 @@ linkage <- function(Y, method = 'co') {
|
||||||
Y[J] - N[R[U]] * v) / (N[R[i]] + N[R[j]] + N[R[U]])
|
Y[J] - N[R[U]] * v) / (N[R[i]] + N[R[j]] + N[R[U]])
|
||||||
)
|
)
|
||||||
J <- c(J, i * (m - (i + 1) / 2) - m + j)
|
J <- c(J, i * (m - (i + 1) / 2) - m + j)
|
||||||
Y[J] <- vector() # no need for the cluster information about j
|
Y <- Y[-J] # no need for the cluster information about j
|
||||||
|
|
||||||
# update m, N, R
|
# update m, N, R
|
||||||
m <- m - 1
|
m <- m - 1
|
||||||
N[n + s] <- N[R[i]] + N[R[j]]
|
N[n + s] <- N[R[i]] + N[R[j]]
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue