379 lines
10 KiB
Matlab
379 lines
10 KiB
Matlab
function [T,v] = dendrogram_alpha(Z,varargin)
|
|
%ADDED FEATURE
|
|
true=logical(1);
|
|
false=logical(0);
|
|
|
|
%DENDROGRAM Generate dendrogram plot.
|
|
% DENDROGRAM(Z) generates a dendrogram plot of the hierarchical
|
|
% binary cluster tree Z. Z is an (M-1)-by-3 matrix, generated
|
|
% by the LINKAGE function, where M is the number of objects in the
|
|
% original dataset.
|
|
%
|
|
% A dendrogram consists of many U-shaped lines connecting objects
|
|
% in a hierarchical tree. Except for the Ward linkage (see LINKAGE),
|
|
% the height of each U represents the distance between the two
|
|
% objects being connected.
|
|
%
|
|
% DENDROGRAM(Z,P) generates a dendrogram with only the top P nodes.
|
|
% By default, DENDROGRAM uses 30 as the value of P. When there are
|
|
% more than 30 initial nodes, a dendrogram may look crowded. To
|
|
% display every node, set P = 0.
|
|
%
|
|
% H = DENDROGRAM(...) returns a vector of line handles.
|
|
%
|
|
% [H,T] = DENDROGRAM(...) generates a dendrogram and returns T, a vector of
|
|
% size M that contains the leaf node number for each object in the original
|
|
% dataset. T is useful when P is less than the total number of objects, so
|
|
% some leaf nodes in the display correspond to multiple objects. For
|
|
% example, to find out which objects are contained in leaf node k of the
|
|
% dendrogram, use find(T==k). When there are fewer than P objects in the
|
|
% original data, all objects are displayed in the dendrogram. In this case,
|
|
% T is the identity map, i.e., T = (1:M)', where each node contains only itself.
|
|
%
|
|
% [H,T,PERM] = DENDROGRAM(...) generates a dendrogram and returns
|
|
% the permutation vector of the node labels of the leaves of the
|
|
% dendrogram. PERM is ordered from left to right on a horizontal dendrogram
|
|
% and bottom to top for a vertical dendrogram.
|
|
%
|
|
% H = DENDROGRAM(...,'COLORTHRESHOLD',T) assigns a unique color
|
|
% to each group of nodes within the dendrogram whose linkage is less than
|
|
% the scalar value T where T is in the range 0 < T < max(Z(:,3)). If T is
|
|
% less than or equal to zero or if T is greater than the maximum linkage then
|
|
% the dendrogram will be drawn using only one color. T can also be set to
|
|
% 'default' in which case the threshold will be set to 70% of the maximum
|
|
% linkage i.e. 0.7 * max(Z(:,3)).
|
|
%
|
|
% H = DENDROGRAM(...,'ORIENTATION',ORIENT) will orient the dendrogram
|
|
% within the figure window. Options are:
|
|
%
|
|
% 'top' --- top to bottom (default)
|
|
% 'bottom' --- bottom to top
|
|
% 'left' --- left to right
|
|
% 'right' --- right to left
|
|
%
|
|
% Example:
|
|
%
|
|
% rand('seed',12);
|
|
% X = rand(100,2);
|
|
% Y = pdist(X,'cityblock');
|
|
% Z = linkage(Y,'average');
|
|
% [H, T] = dendrogram(Z);
|
|
%
|
|
% See also LINKAGE, PDIST, CLUSTER, CLUSTERDATA, INCONSISTENT.
|
|
|
|
% Copyright 1993-2002 The MathWorks, Inc.
|
|
% $Revision: 1.15 $
|
|
|
|
|
|
m = size(Z,1)+1;
|
|
if nargin < 2
|
|
p = 30;
|
|
end
|
|
|
|
if nargin == 2
|
|
p = varargin{1};
|
|
end
|
|
|
|
orientation = 'd';
|
|
horz = false;
|
|
color = false;
|
|
threshold = 0.7 * max(Z(:,3));
|
|
|
|
if nargin > 2
|
|
if isnumeric(varargin{1})
|
|
p = varargin{1};
|
|
offset = 1;
|
|
else
|
|
p = 30;
|
|
offset = 0;
|
|
end
|
|
|
|
if rem(nargin - offset,2)== 0
|
|
error('Incorrect number of arguments to DENDROGRAM.');
|
|
end
|
|
okargs = strvcat('orientation','colorthreshold');
|
|
for j=(1 + offset):2:nargin-2
|
|
pname = varargin{j};
|
|
pval = varargin{j+1};
|
|
k = strmatch(lower(pname), okargs);
|
|
if isempty(k)
|
|
error(sprintf('Unknown parameter name: %s.',pname));
|
|
elseif length(k)>1
|
|
error(sprintf('Ambiguous parameter name: %s.',pname));
|
|
else
|
|
switch(k)
|
|
case 1 % orientation
|
|
if ~isempty(pval) & ischar(pval)
|
|
orientation = lower(pval(1));
|
|
end
|
|
if ~ismember(orientation,{'t','b','d','r','l'})
|
|
orientation = 'd';
|
|
warning('Unknown orientation specified, using ''top''.');
|
|
end
|
|
if ismember(orientation,{'r','l'})
|
|
horz = true;
|
|
end
|
|
case 2 % colorthreshold
|
|
color = true;
|
|
if ischar(pval)
|
|
if ~strmatch(lower(pval),'default')
|
|
warning('Unknown threshold specified, using default');
|
|
end
|
|
end
|
|
if isnumeric(pval)
|
|
threshold = pval;
|
|
end
|
|
end
|
|
end
|
|
end
|
|
end
|
|
Z = transz(Z); % convert from m+k indexing to min(i,j) indexing.
|
|
T = (1:m)';
|
|
|
|
% if there are more than p node, dendrogram looks crowded, the following code
|
|
% will make the last p link nodes as the leaf node.
|
|
if (m > p) & (p ~= 0)
|
|
|
|
Y = Z((m-p+1):end,:);
|
|
|
|
R = Y(:,1:2);
|
|
R = unique(R(:));
|
|
Rlp = R(R<=p);
|
|
Rgp = R(R>p);
|
|
W(Rlp) = Rlp;
|
|
W(Rgp) = setdiff(1:p, Rlp);
|
|
W = W';
|
|
T(R) = W(R);
|
|
|
|
% computer all the leaf that each node (in the last 30 row) has
|
|
for i = 1:p
|
|
c = R(i);
|
|
T = clusternum(Z,T,W(c),c,m-p+1,0); % assign to it's leaves.
|
|
end
|
|
|
|
|
|
Y(:,1) = W(Y(:,1));
|
|
Y(:,2) = W(Y(:,2));
|
|
Z = Y;
|
|
|
|
m = p; % reset the number of node to be 30 (row number = 29).
|
|
end
|
|
|
|
A = zeros(4,m-1);
|
|
B = A;
|
|
n = m;
|
|
X = 1:n;
|
|
Y = zeros(n,1);
|
|
r = Y;
|
|
|
|
% arrange Z into W so that there will be no crossing in the dendrogram.
|
|
W = zeros(size(Z));
|
|
W(1,:) = Z(1,:);
|
|
|
|
nsw = zeros(n,1); rsw = nsw;
|
|
nsw(Z(1,1:2)) = 1; rsw(1) = 1;
|
|
k = 2; s = 2;
|
|
|
|
while (k < n)
|
|
i = s;
|
|
while rsw(i) | ~any(nsw(Z(i,1:2)))
|
|
if rsw(i) & i == s, s = s+1; end
|
|
i = i+1;
|
|
end
|
|
|
|
W(k,:) = Z(i,:);
|
|
nsw(Z(i,1:2)) = 1;
|
|
rsw(i) = 1;
|
|
if s == i, s = s+1; end
|
|
k = k+1;
|
|
end
|
|
|
|
g = 1;
|
|
for k = 1:m-1 % initialize X
|
|
i = W(k,1);
|
|
if ~r(i),
|
|
X(i) = g;
|
|
g = g+1;
|
|
r(i) = 1;
|
|
end
|
|
i = W(k,2);
|
|
if ~r(i),
|
|
X(i) = g;
|
|
g = g+1;
|
|
r(i) = 1;
|
|
end
|
|
end
|
|
[u,v]=sort(X);
|
|
% v is the third output value (PERM)
|
|
label = num2str(v');
|
|
|
|
% set up the color
|
|
|
|
numColors = 1;theGroups = 1;
|
|
groups = 0;
|
|
cmap = [0 0 1];
|
|
|
|
if color
|
|
groups = sum(Z(:,3)< threshold);
|
|
if groups > 1 & groups < (m-1)
|
|
theGroups = zeros(m-1,1);
|
|
numColors = 0;
|
|
for count = groups:-1:1
|
|
if (theGroups(count) == 0)
|
|
P = zeros(m-1,1);
|
|
P(count) = 1;
|
|
P = colorcluster(Z,P,Z(count,1),count);
|
|
P = colorcluster(Z,P,Z(count,2),count);
|
|
numColors = numColors + 1;
|
|
theGroups(logical(P)) = numColors;
|
|
end
|
|
end
|
|
cmap = hsv(numColors);
|
|
cmap(end+1,:) = [0 0 0];
|
|
else
|
|
groups = 1;
|
|
end
|
|
|
|
end
|
|
|
|
% ----------------------------------------------------------
|
|
% if isempty(get(0,'CurrentFigure')) | ishold
|
|
% figure;
|
|
% set(gcf,'Position', [50, 50, 800, 500]);
|
|
% else
|
|
% newplot;
|
|
% end
|
|
%
|
|
% col = zeros(m-1,3);
|
|
% h = zeros(m-1,1);
|
|
%
|
|
% for n = 1:(m-1)
|
|
% i = Z(n,1); j = Z(n,2); w = Z(n,3);
|
|
% A(:,n) = [X(i) X(i) X(j) X(j)]';
|
|
% B(:,n) = [Y(i) w w Y(j)]';
|
|
% X(i) = (X(i)+X(j))/2; Y(i) = w;
|
|
% if n <= groups
|
|
% col(n,:) = cmap(theGroups(n),:);
|
|
% else
|
|
% col(n,:) = cmap(end,:);
|
|
% end
|
|
% end
|
|
%
|
|
%
|
|
% ymin = min(Z(:,3));
|
|
% ymax = max(Z(:,3));
|
|
% margin = (ymax - ymin) * 0.05;
|
|
% n = size(label,1);
|
|
%
|
|
% if(~horz)
|
|
% for count = 1:(m-1)
|
|
% h(count) = line(A(:,count),B(:,count),'color',col(count,:));
|
|
% end
|
|
% lims = [0 m+1 max(0,ymin-margin) (ymax+margin)];
|
|
% set(gca, 'Xlim', [.5 ,(n +.5)], 'XTick', 1:n, 'XTickLabel', label, ...
|
|
% 'Box', 'off');
|
|
% mask = logical([0 0 1 1]);
|
|
% if strcmp(orientation,'b')
|
|
% set(gca,'XAxisLocation','top','Ydir','reverse');
|
|
% end
|
|
% else
|
|
% for count = 1:(m-1)
|
|
% h(count) = line(B(:,count),A(:,count),'color',col(count,:));
|
|
% end
|
|
% lims = [max(0,ymin-margin) (ymax+margin) 0 m+1 ];
|
|
% set(gca, 'Ylim', [.5 ,(n +.5)], 'YTick', 1:n, 'YTickLabel', label, ...
|
|
% 'Box', 'off');
|
|
% mask = logical([1 1 0 0]);
|
|
% if strcmp(orientation, 'l')
|
|
% set(gca,'YAxisLocation','right','Xdir','reverse');
|
|
% end
|
|
% end
|
|
%
|
|
% if margin==0
|
|
% if ymax~=0
|
|
% lims(mask) = ymax * [0 1.25];
|
|
% else
|
|
% lims(mask) = [0 1];
|
|
% end
|
|
% end
|
|
% axis(lims);
|
|
|
|
% -----------------------------------------------------------
|
|
function T = clusternum(X, T, c, k, m, d)
|
|
% assign leaves under cluster c to c.
|
|
|
|
d = d+1;
|
|
n = m; flag = 0;
|
|
while n > 1
|
|
n = n-1;
|
|
if X(n,1) == k % node k is not a leave, it has subtrees
|
|
T = clusternum(X, T, c, k, n,d); % trace back left subtree
|
|
T = clusternum(X, T, c, X(n,2), n,d);
|
|
flag = 1; break;
|
|
end
|
|
end
|
|
|
|
n = size(X,1);
|
|
if flag == 0 & d ~= 1 % row m is leaf node.
|
|
T(X(m,1)) = c;
|
|
T(X(m,2)) = c;
|
|
end
|
|
% ---------------------------------------
|
|
function T = colorcluster(X, T, k, m)
|
|
% find local clustering
|
|
|
|
n = m;
|
|
while n > 1
|
|
n = n-1;
|
|
if X(n,1) == k % node k is not a leave, it has subtrees
|
|
T = colorcluster(X, T, k, n); % trace back left subtree
|
|
T = colorcluster(X, T, X(n,2), n);
|
|
break;
|
|
end
|
|
end
|
|
T(m) = 1;
|
|
% ---------------------------------------
|
|
function Z = transz(Z)
|
|
%TRANSZ Translate output of LINKAGE into another format.
|
|
% This is a helper function used by DENDROGRAM and COPHENET.
|
|
|
|
% In LINKAGE, when a new cluster is formed from cluster i & j, it is
|
|
% easier for the latter computation to name the newly formed cluster
|
|
% min(i,j). However, this definition makes it hard to understand
|
|
% the linkage information. We choose to give the newly formed
|
|
% cluster a cluster index M+k, where M is the number of original
|
|
% observation, and k means that this new cluster is the kth cluster
|
|
% to be formmed. This helper function converts the M+k indexing into
|
|
% min(i,j) indexing.
|
|
|
|
m = size(Z,1)+1;
|
|
|
|
for i = 1:(m-1)
|
|
if Z(i,1) > m
|
|
Z(i,1) = traceback(Z,Z(i,1));
|
|
end
|
|
if Z(i,2) > m
|
|
Z(i,2) = traceback(Z,Z(i,2));
|
|
end
|
|
if Z(i,1) > Z(i,2)
|
|
Z(i,1:2) = Z(i,[2 1]);
|
|
end
|
|
end
|
|
|
|
|
|
function a = traceback(Z,b)
|
|
|
|
m = size(Z,1)+1;
|
|
|
|
if Z(b-m,1) > m
|
|
a = traceback(Z,Z(b-m,1));
|
|
else
|
|
a = Z(b-m,1);
|
|
end
|
|
if Z(b-m,2) > m
|
|
c = traceback(Z,Z(b-m,2));
|
|
else
|
|
c = Z(b-m,2);
|
|
end
|
|
|
|
a = min(a,c);
|