198 lines
7.1 KiB
Mathematica
198 lines
7.1 KiB
Mathematica
|
|
function tr = prune(tr,sel,varargin)
|
||
|
|
%PRUNE Reduces a phylogenetic tree by removing branch and leaf nodes.
|
||
|
|
% T2 = PRUNE(T1,NODES) prunes the nodes listed in the NODES vector from
|
||
|
|
% the tree T1. Any branch (or leaf) node listed in NODES and all their
|
||
|
|
% descendants will disappear. The respective 'parent' nodes will be
|
||
|
|
% connected to the respective 'brother' nodes as required. NODES in the
|
||
|
|
% tree are indexed as [1:NUMLEAVES] for the leaves and as
|
||
|
|
% [NUMLEAVES+1 : NUMLEAVES+NUMBRANCHES] for the branches. NODES can also
|
||
|
|
% be a logical array of following sizes: [NUMLEAVES+NUMBRANCHES x 1],
|
||
|
|
% [NUMLEAVES x 1] or [NUMBRANCHES x 1].
|
||
|
|
%
|
||
|
|
% T2 = PRUNE(T1,NODES,'MODE','EXCLUSIVE') changes the pruning mode to
|
||
|
|
% 'EXCLUSIVE', i.e. only the descendants of NODES will be pruned. Then
|
||
|
|
% NODES will become leaves as long as they do not have a predecessor in
|
||
|
|
% the list NODES. In this case pruning is the process of reducing a tree
|
||
|
|
% by turning some branch nodes into leaf nodes, and removing the leaf
|
||
|
|
% nodes under the original branch. Default is 'INCLUSIVE' and it behaves
|
||
|
|
% as explained above, i.e. the listed NODES are also pruned.
|
||
|
|
%
|
||
|
|
% Examples:
|
||
|
|
%
|
||
|
|
% % Load a phylogenetic tree created from a protein family:
|
||
|
|
% tr = phytreeread('pf00002.tree');
|
||
|
|
% view(tr)
|
||
|
|
%
|
||
|
|
% % To remove all the 'mouse' proteins use:
|
||
|
|
% ind = getbyname(tr,'mouse');
|
||
|
|
% tr = prune(tr,ind);
|
||
|
|
% view(tr)
|
||
|
|
%
|
||
|
|
% % To remove potential outliers in the tree use:
|
||
|
|
% [sel,sel_leaves] = select(tr,'criteria','distance','threshold',.3,...
|
||
|
|
% 'reference','leaves','exclude','leaves','propagate','toleaves');
|
||
|
|
% tr = prune(tr,~sel_leaves)
|
||
|
|
% view(tr)
|
||
|
|
%
|
||
|
|
% See also PHYTREE, PHYTREE/SELECT, PHYTREE/GET, PHYTREETOOL.
|
||
|
|
|
||
|
|
% Copyright 2003-2005 The MathWorks, Inc.
|
||
|
|
% $Revision: 1.1.4.8.2.1 $ $Date: 2004/11/30 03:45:24 $
|
||
|
|
|
||
|
|
% set default
|
||
|
|
exclusiveMode = false;
|
||
|
|
|
||
|
|
if numel(tr)~=1
|
||
|
|
error('Bioinfo:phytree:prune:NoMultielementArrays',...
|
||
|
|
'Phylogenetic tree must be an 1-by-1 object.');
|
||
|
|
end
|
||
|
|
|
||
|
|
btr = tr;
|
||
|
|
numBranches = size(tr.tree,1);
|
||
|
|
numLeaves = numBranches + 1;
|
||
|
|
numLabels = numBranches + numLeaves;
|
||
|
|
|
||
|
|
% validate sel
|
||
|
|
if islogical(sel)
|
||
|
|
if numel(sel)==numLabels
|
||
|
|
sel = sel(:)==true;
|
||
|
|
elseif numel(sel)==numLeaves
|
||
|
|
sel = [sel(:);false(numBranches,1)];
|
||
|
|
elseif numel(sel)==numBranches
|
||
|
|
sel = [false(numLeaves,1);sel(:)];
|
||
|
|
else
|
||
|
|
error('Bioinfo:IncorrectLogical',...
|
||
|
|
'Logical vector must have the same number of elements as nodes in the Phylogenetic Tree');
|
||
|
|
end
|
||
|
|
elseif isnumeric(sel) && isreal(sel) && all(sel>=1) && all(sel<=numLabels)
|
||
|
|
tem(numLabels)=false;
|
||
|
|
tem(floor(sel))=true;
|
||
|
|
sel=tem(:);
|
||
|
|
else
|
||
|
|
error('Bioinfo:IncorrectTypeofArguments','Invalid value for NODES');
|
||
|
|
end
|
||
|
|
|
||
|
|
nvarargin = numel(varargin);
|
||
|
|
if nvarargin
|
||
|
|
if rem(nvarargin,2)
|
||
|
|
error('Bioinfo:IncorrectNumberOfArguments',...
|
||
|
|
'Incorrect number of arguments to %s.',mfilename);
|
||
|
|
end
|
||
|
|
okargs = {'mode',''};
|
||
|
|
for j=1:2:nvarargin
|
||
|
|
pname = varargin{j};
|
||
|
|
pval = varargin{j+1};
|
||
|
|
k = find(strncmpi(pname,okargs,numel(pname)));
|
||
|
|
if isempty(k)
|
||
|
|
error('Bioinfo:UnknownParameterName',...
|
||
|
|
'Unknown parameter name: %s.',pname);
|
||
|
|
elseif length(k)>1
|
||
|
|
error('Bioinfo:AmbiguousParameterName',...
|
||
|
|
'Ambiguous parameter name: %s.',pname);
|
||
|
|
else
|
||
|
|
switch(k)
|
||
|
|
case 1 % classifiers
|
||
|
|
modeOptions = {'exclusive','inclusive'};
|
||
|
|
modeSelected = strmatch(lower(pval),modeOptions); %#ok
|
||
|
|
if isempty(modeSelected)
|
||
|
|
error('Bioinfo:NotValidMode','Not a valid mode.')
|
||
|
|
end
|
||
|
|
exclusiveMode = modeSelected==1;
|
||
|
|
end
|
||
|
|
end
|
||
|
|
end
|
||
|
|
end
|
||
|
|
|
||
|
|
% shortcut for an empty sel
|
||
|
|
if ~sum(sel)
|
||
|
|
return;
|
||
|
|
end
|
||
|
|
|
||
|
|
% when inclusiveMode if the two chidren of a branch are selected then the
|
||
|
|
% parent node should also be selected
|
||
|
|
if ~exclusiveMode
|
||
|
|
for ind = 1:numBranches
|
||
|
|
if all(sel(tr.tree(ind,:)))
|
||
|
|
sel(ind+numLeaves) = true;
|
||
|
|
end
|
||
|
|
end
|
||
|
|
end
|
||
|
|
|
||
|
|
% find descendants not selected under selected nodes
|
||
|
|
for ind = numBranches:-1:1
|
||
|
|
if sel(ind+numLeaves)
|
||
|
|
sel(tr.tree(ind,:))=true;
|
||
|
|
end
|
||
|
|
end
|
||
|
|
|
||
|
|
if sel(numLabels)
|
||
|
|
warning('Bioinfo:PrunedRoot',...
|
||
|
|
'Can not prune the root node in a Phylogenetic Tree.')
|
||
|
|
tr=btr; return
|
||
|
|
end
|
||
|
|
|
||
|
|
% obtain parents for every node
|
||
|
|
parents(tr.tree(:)) = repmat(numLeaves+1:numLabels,2,1)';
|
||
|
|
|
||
|
|
if ~exclusiveMode % (the selected nodes are deleted with their descendants)
|
||
|
|
% find the top selected nodes in order to edit branches
|
||
|
|
htop = find(~[sel(parents);0]&sel);
|
||
|
|
% for every top node do the junction
|
||
|
|
for ind = 1:length(htop)
|
||
|
|
g=htop(ind);
|
||
|
|
mypar = parents(g);
|
||
|
|
if mypar < numLabels % my parent is NOT the root
|
||
|
|
% then connect brother to granparent
|
||
|
|
mygrpar = parents(mypar); % grandparent
|
||
|
|
myuncle = setxor(tr.tree(mygrpar-numLeaves,:),mypar); % uncle
|
||
|
|
mybro = setxor(tr.tree(mypar-numLeaves,:),g); % brother
|
||
|
|
tr.tree(mygrpar-numLeaves,:) = [myuncle mybro];
|
||
|
|
tr.dist(mybro) = tr.dist(mybro) + tr.dist(mypar);
|
||
|
|
parents(mybro) = mygrpar;
|
||
|
|
end
|
||
|
|
sel(mypar) = true; %also delete my par
|
||
|
|
|
||
|
|
end
|
||
|
|
if sum(~sel) == 1
|
||
|
|
warning('Bioinfo:NotAMinimumTree',...
|
||
|
|
'The selected nodes lead to only one leaf, Phylogenetic Tree not pruned.')
|
||
|
|
tr=btr; return
|
||
|
|
end
|
||
|
|
% find indexes to change tree
|
||
|
|
permuta = 1:numLabels;
|
||
|
|
permuta(sel) = [];
|
||
|
|
ipermuta(permuta) = 1:length(permuta);
|
||
|
|
permutaBranches = permuta(permuta>numLeaves)-numLeaves;
|
||
|
|
% update all tree structure fields
|
||
|
|
tr.names = tr.names(permuta);
|
||
|
|
tr.dist = tr.dist(permuta);
|
||
|
|
tr.dist(end) = 0;
|
||
|
|
tr.tree = tr.tree(permutaBranches,:);
|
||
|
|
tr.tree = ipermuta(tr.tree);
|
||
|
|
|
||
|
|
else % exclusiveMode (the selected nodes are not deleted, only their descendants)
|
||
|
|
|
||
|
|
% unselect leaves which are already in the top
|
||
|
|
sel(1:numLeaves)=sel(parents(1:numLeaves));
|
||
|
|
% find the top selected nodes in order to edit branches
|
||
|
|
top = [~sel(parents);1] & sel;
|
||
|
|
% find the new leaves (no deleted leaves + branches that become leaves)
|
||
|
|
newLeaves = [~sel(1:numLeaves);top(numLeaves+1:end)];
|
||
|
|
% find which branches will stay
|
||
|
|
stayingBranches = ~sel(numLeaves + 1 : numLabels);
|
||
|
|
% setting new indexes to change the tree architecture
|
||
|
|
permuta = [find(newLeaves);numLeaves+find(stayingBranches)];
|
||
|
|
ipermuta(permuta) = 1:length(permuta);
|
||
|
|
% update all tree structure fields
|
||
|
|
tr.names = tr.names(permuta);
|
||
|
|
tr.dist = tr.dist(permuta);
|
||
|
|
tr.dist(end) = 0;
|
||
|
|
tr.tree = tr.tree(stayingBranches,:);
|
||
|
|
tr.tree = ipermuta(tr.tree);
|
||
|
|
% calling phytree with this format to force edge-crossing check
|
||
|
|
tr = phytree(tr.tree,tr.dist,tr.names);
|
||
|
|
|
||
|
|
end % if ~exclusiveMode
|
||
|
|
|
||
|
|
|