153 lines
5.2 KiB
Matlab
153 lines
5.2 KiB
Matlab
function [dist,comm] = pdist(tr,varargin)
|
|
%PDIST computes the pairwise patristic distance.
|
|
%
|
|
% D = PDIST(TREE) returns a vector D containing the patristic
|
|
% distances between every possible pair of leaf nodes in the phylogenic
|
|
% tree object TREE. The distance is computed following the path through
|
|
% the branches of the tree.
|
|
%
|
|
% The output vector D is arranged in the order of ((2,1),(3,1),...,
|
|
% (M,1),(3,2),...(M,3),.....(M,M-1)), i.e. the lower left triangle of the
|
|
% full M-by-M distance matrix. To get the distance between the Ith and
|
|
% Jth nodes (I > J) use the formula D((J-1)*(M-J/2)+I-J). M is the
|
|
% number of leaves)
|
|
%
|
|
% D = PDIST(...,'NODES',N) indicates the nodes to be included in the
|
|
% computation. N can be 'leaves' (default) or 'all'. In the former
|
|
% case the output will be order as before, but M is the total number of
|
|
% nodes in the tree, i.e. NUMLEAVES+NUMBRANCHES.
|
|
%
|
|
% D = PDIST(...,'SQUAREFORM',true) coverts the output into a square
|
|
% format, so that D(I,J) denotes the distance between the Ith and the Jth
|
|
% node. The output matrix is symmetric and has a zero diagonal.
|
|
%
|
|
% D = PDIST(...,'CRITERIA',C) changes the criteria used to relate pairs.
|
|
% C can be 'distance' (default) or 'levels'.
|
|
%
|
|
% [D,C] = PDIST(TREE) returns in C the index of the closest common parent
|
|
% nodes for every possible pair of query nodes.
|
|
%
|
|
% Example:
|
|
%
|
|
% % get the tree distances between every leaf
|
|
% tr = phytreeread('pf00002.tree')
|
|
% dist = pdist(tr,'nodes','leaves','squareform',true)
|
|
%
|
|
% See also SEQPDIST, SEQLINKAGE, PHYTREE, PHYTREETOOL.
|
|
|
|
% Copyright 2003-2005 The MathWorks, Inc.
|
|
% $Revision: 1.1.6.6 $ $Author: batserve $ $Date: 2005/06/09 21:55:59 $
|
|
|
|
if numel(tr)~=1
|
|
error('Bioinfo:phytree:pdist:NoMultielementArrays',...
|
|
'Phylogenetic tree must be an 1-by-1 object.');
|
|
end
|
|
|
|
% set default
|
|
squaredOutput = false;
|
|
CriteriaIsLevels = false;
|
|
outNodes = 'leaves';
|
|
|
|
numBranches = size(tr.tree,1);
|
|
numLeaves = numBranches + 1;
|
|
numLabels = numBranches + numLeaves;
|
|
|
|
% process input arguments
|
|
if nargin > 1
|
|
if rem(nargin,2) == 0
|
|
error('Bioinfo:phytree:pdist:IncorrectNumberOfArguments',...
|
|
'Incorrect number of arguments to %s.',mfilename);
|
|
end
|
|
okargs = {'nodes','squareform','criteria'};
|
|
for ind = 2 : 2: nargin
|
|
pname = varargin{ind-1};
|
|
pval = varargin{ind};
|
|
k = find(strncmpi(pname,okargs,numel(pname)));
|
|
if isempty(k)
|
|
error('Bioinfo:phytree:pdist:UnknownParameterName',...
|
|
'Unknown parameter name: %s.',pname);
|
|
elseif length(k)>1
|
|
error('Bioinfo:phytree:pdist:AmbiguousParameterName',...
|
|
'Ambiguous parameter name: %s.',pname);
|
|
else
|
|
switch(k)
|
|
case 1 % nodes
|
|
okNodes = {'leaves','all','branches'};
|
|
h = strmatch(lower(pval),okNodes ); %#ok
|
|
if isempty(h)
|
|
error('Bioinfo:phytree:pdist:IncorrectReferenceNode',...
|
|
'Incorrect node selection')
|
|
else
|
|
outNodes = okNodes{h};
|
|
end
|
|
case 2 % squareform
|
|
squaredOutput = (pval == true);
|
|
case 3 % criteria
|
|
h = strmatch(lower(pval),{'levels','distance'}); %#ok
|
|
if numel(h)
|
|
CriteriaIsLevels = (h == 1);
|
|
else error('Bioinfo:phytree:pdist:InvalidCriteria',...
|
|
'Invalid string for criteria.');
|
|
end
|
|
end
|
|
end
|
|
end
|
|
end
|
|
|
|
|
|
% create indexes to work only the lower leff triangle
|
|
m = numLabels*(numLabels-1)/2;
|
|
p = cumsum([1 (numLabels-1):-1:2]);
|
|
I = ones(m,1); I(p) = [2 3-numLabels:0];
|
|
J = zeros(m,1); J(p) = 1;
|
|
H = I; H(p) = 2:numLabels;
|
|
I = cumsum(I); J = cumsum(J); H = cumsum(H);
|
|
|
|
switch outNodes
|
|
case 'leaves'
|
|
outSelection = (I <= numLeaves) & (J <= numLeaves);
|
|
case 'all'
|
|
outSelection = (I>0);
|
|
case 'branches'
|
|
outSelection = (I > numLeaves) & (J > numLeaves);
|
|
end
|
|
% find closest common branch for every pair of nodes
|
|
% diagonal is invalid ! but not needed
|
|
|
|
% initializing full matrix
|
|
commf = zeros(numLabels,'int16');
|
|
children = false(1,numLabels);
|
|
for ind = numBranches:-1:1
|
|
children(:) = false;
|
|
children(ind+numLeaves) = true;
|
|
for ind2 = ind:-1:1
|
|
if children(ind2+numLeaves)
|
|
children(tr.tree(ind2,:))=true;
|
|
end
|
|
end
|
|
commf(children,children)=int16(ind);
|
|
end
|
|
|
|
% output vector with the lower leff triangle closest common branches
|
|
comm = double(commf(H));
|
|
|
|
% compute the distance to root for every node
|
|
cdist = tr.dist;
|
|
if CriteriaIsLevels % set to count levels instead
|
|
cdist(:) = 1;
|
|
cdist(end) = 0;
|
|
end
|
|
for ind = numBranches:-1:1
|
|
cdist(tr.tree(ind,:)) = cdist(tr.tree(ind,:)) + cdist(ind+numLeaves);
|
|
end
|
|
|
|
% compute pairwise distance
|
|
dist = cdist(I)+cdist(J)-2*cdist(comm+numLeaves);
|
|
|
|
dist = dist(outSelection);
|
|
comm = comm(outSelection);
|
|
|
|
if squaredOutput
|
|
dist = squareform(dist);
|
|
comm = squareform(comm);
|
|
end
|