Added source Matlab code for reference
This commit is contained in:
parent
b8af977117
commit
b5d99903d2
186 changed files with 61405 additions and 1 deletions
153
matlab/graph/@phyTree/pDist.m
Normal file
153
matlab/graph/@phyTree/pDist.m
Normal file
|
|
@ -0,0 +1,153 @@
|
|||
function [dist,comm] = pdist(tr,varargin)
|
||||
%PDIST computes the pairwise patristic distance.
|
||||
%
|
||||
% D = PDIST(TREE) returns a vector D containing the patristic
|
||||
% distances between every possible pair of leaf nodes in the phylogenic
|
||||
% tree object TREE. The distance is computed following the path through
|
||||
% the branches of the tree.
|
||||
%
|
||||
% The output vector D is arranged in the order of ((2,1),(3,1),...,
|
||||
% (M,1),(3,2),...(M,3),.....(M,M-1)), i.e. the lower left triangle of the
|
||||
% full M-by-M distance matrix. To get the distance between the Ith and
|
||||
% Jth nodes (I > J) use the formula D((J-1)*(M-J/2)+I-J). M is the
|
||||
% number of leaves)
|
||||
%
|
||||
% D = PDIST(...,'NODES',N) indicates the nodes to be included in the
|
||||
% computation. N can be 'leaves' (default) or 'all'. In the former
|
||||
% case the output will be order as before, but M is the total number of
|
||||
% nodes in the tree, i.e. NUMLEAVES+NUMBRANCHES.
|
||||
%
|
||||
% D = PDIST(...,'SQUAREFORM',true) coverts the output into a square
|
||||
% format, so that D(I,J) denotes the distance between the Ith and the Jth
|
||||
% node. The output matrix is symmetric and has a zero diagonal.
|
||||
%
|
||||
% D = PDIST(...,'CRITERIA',C) changes the criteria used to relate pairs.
|
||||
% C can be 'distance' (default) or 'levels'.
|
||||
%
|
||||
% [D,C] = PDIST(TREE) returns in C the index of the closest common parent
|
||||
% nodes for every possible pair of query nodes.
|
||||
%
|
||||
% Example:
|
||||
%
|
||||
% % get the tree distances between every leaf
|
||||
% tr = phytreeread('pf00002.tree')
|
||||
% dist = pdist(tr,'nodes','leaves','squareform',true)
|
||||
%
|
||||
% See also SEQPDIST, SEQLINKAGE, PHYTREE, PHYTREETOOL.
|
||||
|
||||
% Copyright 2003-2005 The MathWorks, Inc.
|
||||
% $Revision: 1.1.6.6 $ $Author: batserve $ $Date: 2005/06/09 21:55:59 $
|
||||
|
||||
if numel(tr)~=1
|
||||
error('Bioinfo:phytree:pdist:NoMultielementArrays',...
|
||||
'Phylogenetic tree must be an 1-by-1 object.');
|
||||
end
|
||||
|
||||
% set default
|
||||
squaredOutput = false;
|
||||
CriteriaIsLevels = false;
|
||||
outNodes = 'leaves';
|
||||
|
||||
numBranches = size(tr.tree,1);
|
||||
numLeaves = numBranches + 1;
|
||||
numLabels = numBranches + numLeaves;
|
||||
|
||||
% process input arguments
|
||||
if nargin > 1
|
||||
if rem(nargin,2) == 0
|
||||
error('Bioinfo:phytree:pdist:IncorrectNumberOfArguments',...
|
||||
'Incorrect number of arguments to %s.',mfilename);
|
||||
end
|
||||
okargs = {'nodes','squareform','criteria'};
|
||||
for ind = 2 : 2: nargin
|
||||
pname = varargin{ind-1};
|
||||
pval = varargin{ind};
|
||||
k = find(strncmpi(pname,okargs,numel(pname)));
|
||||
if isempty(k)
|
||||
error('Bioinfo:phytree:pdist:UnknownParameterName',...
|
||||
'Unknown parameter name: %s.',pname);
|
||||
elseif length(k)>1
|
||||
error('Bioinfo:phytree:pdist:AmbiguousParameterName',...
|
||||
'Ambiguous parameter name: %s.',pname);
|
||||
else
|
||||
switch(k)
|
||||
case 1 % nodes
|
||||
okNodes = {'leaves','all','branches'};
|
||||
h = strmatch(lower(pval),okNodes ); %#ok
|
||||
if isempty(h)
|
||||
error('Bioinfo:phytree:pdist:IncorrectReferenceNode',...
|
||||
'Incorrect node selection')
|
||||
else
|
||||
outNodes = okNodes{h};
|
||||
end
|
||||
case 2 % squareform
|
||||
squaredOutput = (pval == true);
|
||||
case 3 % criteria
|
||||
h = strmatch(lower(pval),{'levels','distance'}); %#ok
|
||||
if numel(h)
|
||||
CriteriaIsLevels = (h == 1);
|
||||
else error('Bioinfo:phytree:pdist:InvalidCriteria',...
|
||||
'Invalid string for criteria.');
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
% create indexes to work only the lower leff triangle
|
||||
m = numLabels*(numLabels-1)/2;
|
||||
p = cumsum([1 (numLabels-1):-1:2]);
|
||||
I = ones(m,1); I(p) = [2 3-numLabels:0];
|
||||
J = zeros(m,1); J(p) = 1;
|
||||
H = I; H(p) = 2:numLabels;
|
||||
I = cumsum(I); J = cumsum(J); H = cumsum(H);
|
||||
|
||||
switch outNodes
|
||||
case 'leaves'
|
||||
outSelection = (I <= numLeaves) & (J <= numLeaves);
|
||||
case 'all'
|
||||
outSelection = (I>0);
|
||||
case 'branches'
|
||||
outSelection = (I > numLeaves) & (J > numLeaves);
|
||||
end
|
||||
% find closest common branch for every pair of nodes
|
||||
% diagonal is invalid ! but not needed
|
||||
|
||||
% initializing full matrix
|
||||
commf = zeros(numLabels,'int16');
|
||||
children = false(1,numLabels);
|
||||
for ind = numBranches:-1:1
|
||||
children(:) = false;
|
||||
children(ind+numLeaves) = true;
|
||||
for ind2 = ind:-1:1
|
||||
if children(ind2+numLeaves)
|
||||
children(tr.tree(ind2,:))=true;
|
||||
end
|
||||
end
|
||||
commf(children,children)=int16(ind);
|
||||
end
|
||||
|
||||
% output vector with the lower leff triangle closest common branches
|
||||
comm = double(commf(H));
|
||||
|
||||
% compute the distance to root for every node
|
||||
cdist = tr.dist;
|
||||
if CriteriaIsLevels % set to count levels instead
|
||||
cdist(:) = 1;
|
||||
cdist(end) = 0;
|
||||
end
|
||||
for ind = numBranches:-1:1
|
||||
cdist(tr.tree(ind,:)) = cdist(tr.tree(ind,:)) + cdist(ind+numLeaves);
|
||||
end
|
||||
|
||||
% compute pairwise distance
|
||||
dist = cdist(I)+cdist(J)-2*cdist(comm+numLeaves);
|
||||
|
||||
dist = dist(outSelection);
|
||||
comm = comm(outSelection);
|
||||
|
||||
if squaredOutput
|
||||
dist = squareform(dist);
|
||||
comm = squareform(comm);
|
||||
end
|
||||
Loading…
Add table
Add a link
Reference in a new issue