Added source Matlab code for reference

This commit is contained in:
Waldir Leoncio 2019-12-16 16:47:21 +01:00
parent b8af977117
commit b5d99903d2
186 changed files with 61405 additions and 1 deletions

View file

@ -0,0 +1,26 @@
function display(tr)
%DISPLAY command window display of phylogenetic tree objects.
% Copyright 2003-2005 The MathWorks, Inc.
% $Revision: 1.1.6.4 $ $Author: batserve $ $Date: 2005/06/09 21:55:53 $
n = numel(tr);
if n > 1
disp(tr)
elseif n==0
disp(' Empty array of phylogenetic tree objects')
else
n = length(tr.dist);
switch n
case 0
disp(' Empty phylogenetic tree object')
case 1
disp(' Phylogenetic tree object with 1 leaf (0 branches)')
case 3
disp(' Phylogenetic tree object with 2 leaves (1 branch)')
otherwise
disp([' Phylogenetic tree object with ' num2str((n+1)/2) ...
' leaves (' num2str((n-1)/2) ' branches)'])
end
end

View file

@ -0,0 +1,89 @@
function varargout = get(tr,varargin)
%GET Get information about a phylogenetic tree object.
% [VALUE1,VALUE2, ...] = GET(TREE,'NAME1','NAME2', ...) returns the
% contents of the specified fields for the PHYTREE object TREE.
%
% The valid choices for 'NAME' are:
% 'POINTERS' : branch to leaf/branch connectivity list
% 'DISTANCES' : edge length for every leaf/branch
% 'NUMLEAVES' : number of leaves
% 'NUMBRANCHES' : number of branches
% 'NUMNODES' : number of nodes (numleaves + numbranches)
% 'LEAFNAMES' : names of the leaves
% 'BRANCHNAMES' : names of the branches
% 'NODENAMES' : names of all the nodes
%
% GET(TREE) displays all property names and their current values for
% the PHYTREE object TREE.
%
% V = GET(TREE) returns a structure where each field name is the name of
% a property of TREE and each field contains the value of that property.
%
% Examples:
% tr = phytreeread('pf00002.tree')
% protein_names = get(tr,'LeafNames')
%
% See also PHYTREE, PHYTREEREAD, PHYTREE/SELECT, PHYTREE/GETBYNAME.
% Copyright 2003-2005 The MathWorks, Inc.
% $Revision: 1.1.6.7 $ $Author: batserve $ $Date: 2005/06/09 21:55:54 $
if numel(tr)~=1
error('Bioinfo:phytree:get:NoMultielementArrays',...
'Phylogenetic tree must be an 1-by-1 object.');
end
numBranches = size(tr.tree,1);
numLeaves = numBranches + 1;
numLabels = numBranches + numLeaves;
% get input without arguments displays a summary
if nargin == 1
s.NumLeaves = numLeaves;
s.NumBranches = numBranches;
s.NumNodes = numLabels;
s.Pointers = tr.tree;
s.Distances = tr.dist;
s.LeafNames = tr.names(1:numLeaves);
s.BranchNames = tr.names(numLeaves+1:numLabels);
s.NodeNames = tr.names;
if nargout == 0
disp(s)
else
varargout{1} = s;
end
return;
end
okargs = {'pointers','distances','numleaves','numbranches',...
'numnodes','leafnames','branchnames','nodenames'};
for ind = 2 : nargin
pname = varargin{ind-1};
k = find(strncmpi(pname,okargs,numel(pname)));
if isempty(k)
error('Bioinfo:phytree:get:UnknownParameterName',...
'Unknown parameter name: %s.',pname);
elseif length(k)>1
error('Bioinfo:phytree:get:AmbiguousParameterName',...
'Ambiguous parameter name: %s.',pname);
else
switch(k)
case 1 % pointers
varargout{ind-1} = tr.tree; %#ok
case 2 % distances
varargout{ind-1} = tr.dist;
case 3 % numleaves
varargout{ind-1} = numLeaves;
case 4 % numbranches
varargout{ind-1} = numBranches;
case 5 % numNodes
varargout{ind-1} = numLabels;
case 6 % leafnames
varargout{ind-1} = tr.names(1:numLeaves);
case 7 % branchnames
varargout{ind-1} = tr.names(numLeaves+1:numLabels);
case 8 % nodenames
varargout{ind-1} = tr.names;
end
end
end

View file

@ -0,0 +1,629 @@
function handles = Plot(tr,varargin)
%PLOT renders a phylogenetic tree.
%
% PLOT(TREE) renders a phylogenetic tree object into a MATLAB figure as a
% phylogram. The significant distances between branches and nodes are in
% horizontal direction, vertical coordinates are accommodated only for
% display purposes. Handles to graph elements are stored in the
% 'UserData' figure field, such that graphic properties can be easily
% modified.
%
% PLOT(TREE,ACTIVEBRANCHES) hides the non'active branches and all their
% descendants. ACTIVEBRANCHES is a logical array of size
% [numBranches x 1] indicating the active branches.
%
% PLOT(...,'TYPE',type) selects the method to render the phylogenetic
% tree. Options are: 'square' (default), 'angular', and 'radial'.
%
% PLOT(...,'ORIENTATION',orient) will orient the phylogenetic tree within
% the figure window. Options are: 'top', 'bottom', 'left' (default), and,
% 'right'. Orientation parameter is valid only for phylograms or
% cladograms.
%
% PLOT(...,'BRANCHLABELS',value) hides/unhides branch labels. Options are
% true or false. Branch labels are placed next to the branch node.
% Defaults to false (true) when TYPE is (is not) 'radial'.
%
% PLOT(...,'LEAFLABELS',value) hides/unhides leaf labels. Options are
% true or false. Leaf labels are placed next to the leaf nodes. Defaults
% to false (true) when TYPE is (is not) 'radial'.
%
% PLOT(...,'TERMINALLABELS',value) hides/unhides terminal labels. Options
% are true (default) or false. Terminal labels are placed over the axis
% tick labels, ignored when 'radial' type is used.
%
% H = PLOT(...) returns a structure with handles to the graph elements.
%
% Example:
%
% tr = phytreeread('pf00002.tree')
% plot(tr,'type','radial')
%
% % Graph element properties can be modified as follows:
%
% h=get(gcf,'UserData')
% set(h.branchNodeLabels,'FontSize',6,'Color',[.5 .5 .5])
%
% See also PHYTREE, PHYTREE/VIEW, PHYTREEREAD, PHYTREETOOL, SEQLINKAGE.
% Copyright 2003-2006 The MathWorks, Inc.
% $Revision: 1.1.6.10 $ $Author: batserve $ $Date: 2006/06/16 20:06:45 $
if numel(tr)~=1
error('Bioinfo:phytree:plot:NoMultielementArrays',...
'Phylogenetic tree must be an 1-by-1 object.');
end
% set defaults
dispBranchLabels = NaN;
dispLeafLabels = NaN;
dispTerminalLabels = true;
renderType = 'square';
orientation = 'left';
rotation = 0;
tr = struct(tr);
tr.numBranches = size(tr.tree,1);
if nargin>1 && islogical(varargin{1})
activeBranches = varargin{1};
argStart = 2;
else
activeBranches = true(tr.numBranches,1);
argStart = 1;
end
if nargin - argStart > 0
if rem(nargin - argStart,2) == 1
error('Bioinfo:phytree:plot:IncorrectNumberOfArguments',...
'Incorrect number of arguments to %s.',mfilename);
end
okargs = {'type','orientation','rotation',...
'branchlabels','leaflabels','terminallabels'};
for j = argStart:2:nargin-argStart
pname = varargin{j};
pval = varargin{j+1};
k = find(strncmpi(pname,okargs,numel(pname)));
if isempty(k)
error('Bioinfo:phytree:plot:UnknownParameterName',...
'Unknown parameter name: %s.',pname);
elseif length(k)>1
error('Bioinfo:phytree:plot:AmbiguousParameterName',...
'Ambiguous parameter name: %s.',pname);
else
switch(k)
case 1 % type
oktypes={'square','angular','radial'};
l = strmatch(lower(pval),oktypes); %#ok
if isempty(l)
error('Bioinfo:phytree:plot:UnknownTypeName',...
'Unknown option for %s.',upper(okargs{k}));
else
if l==4
l=1;
end
renderType = oktypes{l};
end
case 2 % orientation
oktypes={'left','right','top','bottom'};
l = strmatch(lower(pval),oktypes); %#ok
if isempty(l)
error('Bioinfo:phytree:plot:UnknownOrientation',...
'Unknown option for %s.',upper(okargs{k}));
else
orientation = oktypes{l};
end
case 3 % rotation
if isreal(pval(1))
rotation = double(pval(1));
else
error('Bioinfo:phytree:plot:NotValidType',...
'ROTATION must be numeric and real');
end
case 4 % branch labels
dispBranchLabels = opttf(pval);
case 5 % leaf labels
dispLeafLabels = opttf(pval);
case 6 % terminal labels
dispTerminalLabels = opttf(pval);
end
end
end
end
% set dependent defaults
if isnan(dispBranchLabels)
if isequal(renderType,'radial')
dispBranchLabels = true;
else
dispBranchLabels = false;
end
end
if isnan(dispLeafLabels)
if isequal(renderType,'radial')
dispLeafLabels = true;
else
dispLeafLabels = false;
end
end
tr = doBasicCalculations(tr,activeBranches,renderType);
nodeIndex = 1:tr.numLabels;
leafIndex = 1:tr.numLeaves;
branchIndex = tr.numLeaves+1:tr.numLabels;
% check empty names
for ind = nodeIndex
if isempty(tr.names{ind})
if ind > tr.numLeaves
tr.names{ind} = ['Branch ' num2str(ind-tr.numLeaves)];
else
tr.names{ind} = ['Leaf ' num2str(ind)];
end
end
end
% rendering graphic objects
fig = gcf;
% fig = figure('Renderer','ZBuffer');
h.fig = fig;
h.axes = axes; hold on;
sepUnit = max(tr.x)*[-1/20 21/20];
% setting the axes
switch renderType
case {'square','angular'}
switch orientation
case 'left'
set(h.axes,'YTick',1:numel(tr.terminalNodes),'Ydir','reverse',...
'YtickLabel','','YAxisLocation','Right')
if dispTerminalLabels
set(h.axes,'Position',[.05 .10 .7 .85])
else
set(h.axes,'Position',[.05 .10 .9 .85])
end
xlim(sepUnit);
ylim([0 numel(tr.terminalNodes)+1]);
case 'right'
set(h.axes,'YTick',1:numel(tr.terminalNodes),'Xdir','reverse','Ydir','reverse',...
'YtickLabel','','YAxisLocation','Left')
if dispTerminalLabels
set(h.axes,'Position',[.25 .10 .7 .85])
else
set(h.axes,'Position',[.05 .10 .9 .85])
end
xlim(sepUnit);
ylim([0 numel(tr.terminalNodes)+1]);
case 'top'
set(h.axes,'XTick',1:numel(tr.terminalNodes),...
'XtickLabel','','XAxisLocation','Top')
if dispTerminalLabels
set(h.axes,'Position',[.10 .05 .85 .7])
else
set(h.axes,'Position',[.10 .05 .85 .9])
end
ylim(sepUnit);
xlim([0 numel(tr.terminalNodes)+1]);
case 'bottom'
set(h.axes,'XTick',1:numel(tr.terminalNodes),'Ydir','reverse',...
'XtickLabel','','XAxisLocation','Bottom')
if dispTerminalLabels
set(h.axes,'Position',[.10 .25 .85 .7])
else
set(h.axes,'Position',[.10 .05 .85 .9])
end
ylim(sepUnit);
xlim([0 numel(tr.terminalNodes)+1]);
end
case 'radial'
set(h.axes,'XTick',[],'YTick',[])
set(h.axes,'Position',[.05 .05 .9 .9])
dispTerminalLabels = false;
axis equal
end
% drawing lines
switch renderType
case 'square'
X = tr.x([nodeIndex;repmat([tr.par(1:tr.numLabels-1) tr.numLabels],2,1)]);
Y = tr.y([repmat(nodeIndex,2,1);[tr.par(1:tr.numLabels-1) tr.numLabels]]);
switch orientation
case {'left','right'}
h.BranchLines = plot(X,Y,'-k');
delete(h.BranchLines(~tr.activeNodes))
h.BranchLines = h.BranchLines(tr.activeNodes);
case {'top','bottom'}
h.BranchLines = plot(Y,X,'-k');
delete(h.BranchLines(~tr.activeNodes))
h.BranchLines = h.BranchLines(tr.activeNodes);
end
case 'angular'
X = tr.x([nodeIndex;[tr.par(1:tr.numLabels-1) tr.numLabels]]);
Y = tr.y([nodeIndex;[tr.par(1:tr.numLabels-1) tr.numLabels]]);
switch orientation
case {'left','right'}
h.BranchLines = plot(X,Y,'-k');
delete(h.BranchLines(~tr.activeNodes))
h.BranchLines = h.BranchLines(tr.activeNodes);
case {'top','bottom'}
h.BranchLines = plot(Y,X,'-k');
delete(h.BranchLines(~tr.activeNodes))
h.BranchLines = h.BranchLines(tr.activeNodes);
end
case 'radial'
R = tr.x;
A = tr.y / numel(tr.terminalNodes)*2*pi+rotation*pi/180;
tr.x = R .* sin(A);
tr.y = R .* cos(A);
X = tr.x([nodeIndex;[tr.par(1:tr.numLabels-1) tr.numLabels]]);
Y = tr.y([nodeIndex;[tr.par(1:tr.numLabels-1) tr.numLabels]]);
h.BranchLines = plot(X,Y,'-k');
delete(h.BranchLines(~tr.activeNodes))
h.BranchLines = h.BranchLines(tr.activeNodes);
end
% drawing nodes
switch renderType
case {'square','angular'}
switch orientation
case {'left','right'}
h.BranchDots = plot(tr.x(branchIndex(tr.activeNodes(branchIndex))),...
tr.y(branchIndex(tr.activeNodes(branchIndex))),'o',...
'MarkerSize',5,'MarkerEdgeColor','k',...
'MarkerFaceColor','b');
h.LeafDots = plot(tr.x(leafIndex(tr.activeNodes(leafIndex))),...
tr.y(leafIndex(tr.activeNodes(leafIndex))),'square',...
'MarkerSize',4,'MarkerEdgeColor','k',...
'MarkerFaceColor','w');
case {'top','bottom'}
h.BranchDots = plot(tr.y(branchIndex(tr.activeNodes(branchIndex))),...
tr.x(branchIndex(tr.activeNodes(branchIndex))),'o',...
'MarkerSize',5,'MarkerEdgeColor','k',...
'MarkerFaceColor','b');
h.LeafDots = plot(tr.y(leafIndex(tr.activeNodes(leafIndex))),...
tr.x(leafIndex(tr.activeNodes(leafIndex))),'square',...
'MarkerSize',4,'MarkerEdgeColor','k',...
'MarkerFaceColor','w');
end
case 'radial'
h.BranchDots = plot(tr.x(branchIndex(tr.activeNodes(branchIndex))),...
tr.y(branchIndex(tr.activeNodes(branchIndex))),'o',...
'MarkerSize',5,'MarkerEdgeColor','k',...
'MarkerFaceColor','b');
h.LeafDots = plot(tr.x(leafIndex(tr.activeNodes(leafIndex))),...
tr.y(leafIndex(tr.activeNodes(leafIndex))),'square',...
'MarkerSize',4,'MarkerEdgeColor','k',...
'MarkerFaceColor','w');
end
% resize figure if needed
switch renderType
case {'square','angular'}
switch orientation
case {'left','right'}
correctFigureSize(fig, 15 * numel(tr.terminalNodes),0);
fontRatio = max(get(fig,'Position').*[0 0 0 1])/numel(tr.terminalNodes);
case {'top','bottom'}
correctFigureSize(fig, 0, 15 * numel(tr.terminalNodes));
fontRatio = max(get(fig,'Position').*[0 0 1 0])/numel(tr.terminalNodes);
end
case 'radial'
temp = 10/pi*numel(tr.terminalNodes);
correctFigureSize(fig,temp,temp);
fontRatio = max(get(fig,'Position').*[0 0 1 0])/numel(tr.terminalNodes);
end
set(h.axes,'Fontsize',min(9,ceil(fontRatio/1.5)));
% set branch node labels
X = tr.x(branchIndex(tr.activeNodes(tr.numLeaves+1:tr.numLabels)));
Y = tr.y(branchIndex(tr.activeNodes(tr.numLeaves+1:tr.numLabels)));
switch renderType
case {'square','angular'}
switch orientation
case {'left'}
h.branchNodeLabels = text(X+sepUnit(1)/2,Y,tr.names(branchIndex(tr.activeNodes(tr.numLeaves+1:tr.numLabels))));
set(h.branchNodeLabels,'color',[0 0 .8],'clipping','on')
set(h.branchNodeLabels,'vertical','bottom')
set(h.branchNodeLabels,'horizontal','right')
set(h.branchNodeLabels,'Fontsize',min(8,ceil(fontRatio/2)));
case {'right'}
h.branchNodeLabels = text(X+sepUnit(1)/2,Y,tr.names(branchIndex(tr.activeNodes(tr.numLeaves+1:tr.numLabels))));
set(h.branchNodeLabels,'color',[0 0 .8],'clipping','on')
set(h.branchNodeLabels,'vertical','bottom')
set(h.branchNodeLabels,'Fontsize',min(8,ceil(fontRatio/2)));
case {'top'}
h.branchNodeLabels = text(Y,X-sepUnit(1)/2,tr.names(branchIndex(tr.activeNodes(tr.numLeaves+1:tr.numLabels))));
set(h.branchNodeLabels,'color',[0 0 .8],'clipping','on')
set(h.branchNodeLabels,'vertical','bottom','Rotation',30)
set(h.branchNodeLabels,'Fontsize',min(8,ceil(fontRatio/2)));
case {'bottom'}
h.branchNodeLabels = text(Y,X+sepUnit(1)/2,tr.names(branchIndex(tr.activeNodes(tr.numLeaves+1:tr.numLabels))));
set(h.branchNodeLabels,'color',[0 0 .8],'clipping','on')
set(h.branchNodeLabels,'vertical','bottom','Rotation',30)
set(h.branchNodeLabels,'Fontsize',min(8,ceil(fontRatio/2)));
end
case 'radial'
h.branchNodeLabels = text(X,Y,tr.names(branchIndex(tr.activeNodes(tr.numLeaves+1:tr.numLabels))));
set(h.branchNodeLabels,'color',[0 0 .8],'clipping','on')
set(h.branchNodeLabels,'vertical','bottom')
set(h.branchNodeLabels,'Fontsize',min(8,ceil(fontRatio*1.2)));
for ind = 1:numel(h.branchNodeLabels)
if X(ind)<0
set(h.branchNodeLabels(ind),'horizontal','right')
set(h.branchNodeLabels(ind),'Position',get(h.branchNodeLabels(ind),'Position')+[sepUnit(1)/2 0 0])
else
set(h.branchNodeLabels(ind),'horizontal','left')
set(h.branchNodeLabels(ind),'Position',get(h.branchNodeLabels(ind),'Position')-[sepUnit(1)/2 0 0])
end
end
end
% set leaf nodes labels
X = tr.x(leafIndex(tr.activeNodes(1:tr.numLeaves)));
Y = tr.y(leafIndex(tr.activeNodes(1:tr.numLeaves)));
switch renderType
case {'square','angular'}
switch orientation
case {'left'}
h.leafNodeLabels = text(X-sepUnit(1)/2,Y,tr.names(leafIndex(tr.activeNodes(1:tr.numLeaves))));
set(h.leafNodeLabels,'color',[.5 .5 .5],'clipping','on')
set(h.leafNodeLabels,'horizontal','left')
set(h.leafNodeLabels,'Fontsize',min(8,ceil(fontRatio/2)));
case {'right'}
h.leafNodeLabels = text(X-sepUnit(1)/2,Y,tr.names(leafIndex(tr.activeNodes(1:tr.numLeaves))));
set(h.leafNodeLabels,'color',[.5 .5 .5],'clipping','on')
set(h.leafNodeLabels,'horizontal','right')
set(h.leafNodeLabels,'Fontsize',min(8,ceil(fontRatio/2)));
case {'top'}
h.leafNodeLabels = text(Y,X-sepUnit(1)/2,tr.names(leafIndex(tr.activeNodes(1:tr.numLeaves))));
set(h.leafNodeLabels,'color',[.5 .5 .5],'clipping','on')
set(h.leafNodeLabels,'horizontal','left','Rotation',60)
set(h.leafNodeLabels,'Fontsize',min(8,ceil(fontRatio/2)));
case {'bottom'}
h.leafNodeLabels = text(Y,X-sepUnit(1),tr.names(leafIndex(tr.activeNodes(1:tr.numLeaves))));
set(h.leafNodeLabels,'color',[.5 .5 .5],'clipping','on')
set(h.leafNodeLabels,'horizontal','right','Rotation',60)
set(h.leafNodeLabels,'Fontsize',min(8,ceil(fontRatio/2)));
end
case 'radial'
h.leafNodeLabels = text(X,Y,tr.names(leafIndex(tr.activeNodes(1:tr.numLeaves))));
set(h.leafNodeLabels,'color',[.5 .5 .5],'clipping','on')
set(h.leafNodeLabels,'Fontsize',min(8,ceil(fontRatio*1.2)));
% textHeight = mean(cell2mat(get(h.leafNodeLabels,'Extent')))*[0 0 0 1]';
for ind = 1:numel(h.leafNodeLabels)
if X(ind)<0
set(h.leafNodeLabels(ind),'horizontal','right')
set(h.leafNodeLabels(ind),'Position',get(h.leafNodeLabels(ind),'Position')+[sepUnit(1) 0 0])
else
set(h.leafNodeLabels(ind),'horizontal','left')
set(h.leafNodeLabels(ind),'Position',get(h.leafNodeLabels(ind),'Position')-[sepUnit(1) 0 0])
end
% a=atan(Y(ind)/X(ind))*180/pi;
% if a > 0 a = max(0,a-60)/2; else
% a = min(0,a+60)/2; end
% set(h.leafNodeLabels(ind),'Rotation',a)
end
[sortedY,hsY]=sort(Y);
idx=hsY(X(hsY)>0 & sortedY>0);
if numel(idx)
extentY = get(h.leafNodeLabels(idx(1)),'Extent')*[0;0;0;1];
positionY = get(h.leafNodeLabels(idx(1)),'Position')*[0;1;0];
for i = 2:numel(idx)
position = get(h.leafNodeLabels(idx(i)),'Position');
positionY = max(positionY+extentY,position(2));
position(2) = positionY;
set(h.leafNodeLabels(idx(i)),'Position',position)
end
end
idx=hsY(X(hsY)<0 & sortedY>0);
if numel(idx)
extentY = get(h.leafNodeLabels(idx(1)),'Extent')*[0;0;0;1];
positionY = get(h.leafNodeLabels(idx(1)),'Position')*[0;1;0];
for i = 2:numel(idx)
position = get(h.leafNodeLabels(idx(i)),'Position');
positionY = max(positionY+extentY,position(2));
position(2) = positionY;
set(h.leafNodeLabels(idx(i)),'Position',position)
end
end
idx=flipud(hsY(X(hsY)>0 & sortedY<0));
if numel(idx)
extentY = get(h.leafNodeLabels(idx(1)),'Extent')*[0;0;0;1];
positionY = get(h.leafNodeLabels(idx(1)),'Position')*[0;1;0];
for i = 2:numel(idx)
position = get(h.leafNodeLabels(idx(i)),'Position');
positionY = min(positionY-extentY,position(2));
position(2) = positionY;
set(h.leafNodeLabels(idx(i)),'Position',position)
end
end
idx=flipud(hsY(X(hsY)<0 & sortedY<0));
if numel(idx)
extentY = get(h.leafNodeLabels(idx(1)),'Extent')*[0;0;0;1];
positionY = get(h.leafNodeLabels(idx(1)),'Position')*[0;1;0];
for i = 2:numel(idx)
position = get(h.leafNodeLabels(idx(i)),'Position');
positionY = min(positionY-extentY,position(2));
position(2) = positionY;
set(h.leafNodeLabels(idx(i)),'Position',position)
end
end
end
% correct axis limits given the extent of labels
if dispBranchLabels
E = cell2mat(get(h.branchNodeLabels,'Extent'));
if strcmp(get(gca,'XDir'),'reverse')
E(:,1) = E(:,1) - E(:,3);
end
if strcmp(get(gca,'YDir'),'reverse')
E(:,2) = E(:,2) - E(:,4);
end
E=[E;[xlim*[1;0] ylim*[1;0] diff(xlim) diff(ylim)]];
mins = min(E(:,[1 2]));
maxs = max([sum(E(:,[1 3]),2) sum(E(:,[2 4]),2)]);
axis([mins(1) maxs(1) mins(2) maxs(2)])
end
if dispLeafLabels
E = cell2mat(get(h.leafNodeLabels,'Extent'));
if strcmp(get(gca,'XDir'),'reverse')
E(:,1) = E(:,1) - E(:,3);
end
if strcmp(get(gca,'YDir'),'reverse')
E(:,2) = E(:,2) - E(:,4);
end
E=[E;[xlim*[1;0] ylim*[1;0] diff(xlim) diff(ylim)]];
mins = min(E(:,[1 2]));
maxs = max([sum(E(:,[1 3]),2) sum(E(:,[2 4]),2)]);
axis([mins(1) maxs(1) mins(2) maxs(2)])
end
% set terminal nodes labels
switch renderType
case {'square','angular'}
X = tr.x(tr.terminalNodes) * 0;
Y = tr.y(tr.terminalNodes);
switch orientation
case {'left'}
X = X + max(xlim) - sepUnit(1)/2;
h.terminalNodeLabels = text(X,Y,tr.names(tr.terminalNodes));
case {'right'}
X = X + max(xlim) - sepUnit(1)/2;
h.terminalNodeLabels = text(X,Y,tr.names(tr.terminalNodes));
set(h.terminalNodeLabels,'Horizontal','right')
case {'top'}
X = X + max(ylim) - sepUnit(1)/2;
h.terminalNodeLabels = text(Y,X,tr.names(tr.terminalNodes));
set(h.terminalNodeLabels,'Rotation',90)
case {'bottom'}
X = X + max(ylim) - sepUnit(1)/2;
h.terminalNodeLabels = text(Y,X,tr.names(tr.terminalNodes));
set(h.terminalNodeLabels,'Rotation',270)
end
case 'radial'
h.terminalNodeLabels = text(0,0,' ');
end
if dispTerminalLabels
set(h.terminalNodeLabels,'Fontsize',min(9,ceil(fontRatio/1.5)));
end
if ~dispBranchLabels
set(h.branchNodeLabels,'visible','off');
end
if ~dispLeafLabels
set(h.leafNodeLabels,'visible','off');
end
if ~dispTerminalLabels
set(h.terminalNodeLabels,'visible','off');
end
box on
hold off
% store handles
set(fig,'UserData',h)
if nargout
handles = h;
end
% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function tr = doBasicCalculations(tr,activeBranches,renderType)
% helper function to compute and find some features of the tree
tr.numLeaves = tr.numBranches + 1;
tr.numLabels = tr.numBranches + tr.numLeaves;
% remove uderscores from names
for ind = 1:tr.numLabels
tr.names{ind}(tr.names{ind}=='_')=' ';
end
% obtain parents for every node
tr.par(tr.tree(:)) = tr.numLeaves + [1:tr.numBranches 1:tr.numBranches];
% find active nodes
tr.activeNodes = true(tr.numLabels,1);
for ind =tr.numBranches:-1:1
tr.activeNodes(tr.tree(ind,:)) = tr.activeNodes(tr.numLeaves+ind) & activeBranches(ind);
end
% propagate last leaf
tr.lastleaf = 1:tr.numLabels;
for ind = tr.numBranches:-1:1
if ~tr.activeNodes(tr.tree(ind,1))
tr.lastleaf(tr.tree(ind,:))=tr.lastleaf(ind+tr.numLeaves);
end
end
tr.activeBranches = tr.activeNodes(tr.numLeaves+1:tr.numLabels)&activeBranches;
tr.activeLeaves = tr.activeNodes(1:tr.numLeaves);
% find x coordinates of branches
tr.x = tr.dist;
for ind = tr.numBranches:-1:1
tr.x(tr.tree(ind,:)) = tr.x(tr.tree(ind,:)) + tr.x(ind+tr.numLeaves);
end
% find y coordinates of branches
tr.terminalNodes = tr.lastleaf([true,diff(tr.lastleaf(1:tr.numLeaves))~=0]);
tr.y=zeros(tr.numLabels,1);
tr.y(tr.terminalNodes)=1:length(tr.terminalNodes);
switch renderType
case 'square'
for ind = 1:tr.numBranches
if tr.activeBranches(ind)
tr.y(ind+tr.numLeaves) = mean(tr.y(tr.tree(ind,:)));
end
end
case {'angular','radial'}
for ind = 1:tr.numBranches
if tr.activeBranches(ind)
if tr.x(tr.tree(ind,1))/tr.x(tr.tree(ind,2))>3
tr.y(ind+tr.numLeaves) = tr.y(tr.tree(ind,1));
elseif tr.x(tr.tree(ind,2))/tr.x(tr.tree(ind,1))>3
tr.y(ind+tr.numLeaves) = tr.y(tr.tree(ind,2));
else
tr.y(ind+tr.numLeaves) = mean(tr.y(tr.tree(ind,:)));
end
end
end
end
% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function correctFigureSize(fig,recommendedHeight,recommendedWidth)
% helper function to increase initial figure size depending on the screen &
% tree sizes
screenSize = diff(reshape(get(0,'ScreenSize'),2,2),[],2)-[0;100];
% 100 gives extra space for the figure header and win toolbar
position = get(fig,'Position');
if recommendedHeight > position(4)
if recommendedHeight < sum(position([2 4]))
position(2) = sum(position([2 4])) - recommendedHeight;
position(4) = recommendedHeight;
elseif recommendedHeight < screenSize(2)
position(2) = 30;
position(4) = recommendedHeight;
else
position(2) = 30;
position(4) = screenSize(2);
end
end
if recommendedWidth > position(3)
if recommendedWidth < sum(position([1 3]))
position(1) = sum(position([1 3])) - recommendedWidth;
position(3) = recommendedWidth;
elseif recommendedWidth < screenSize(1)
position(1) = 0;
position(3) = recommendedHeight;
else
position(1) = 0;
position(3) = screenSize(1);
end
end
set(fig,'Position',position)

View file

@ -0,0 +1,197 @@
function tr = prune(tr,sel,varargin)
%PRUNE Reduces a phylogenetic tree by removing branch and leaf nodes.
% T2 = PRUNE(T1,NODES) prunes the nodes listed in the NODES vector from
% the tree T1. Any branch (or leaf) node listed in NODES and all their
% descendants will disappear. The respective 'parent' nodes will be
% connected to the respective 'brother' nodes as required. NODES in the
% tree are indexed as [1:NUMLEAVES] for the leaves and as
% [NUMLEAVES+1 : NUMLEAVES+NUMBRANCHES] for the branches. NODES can also
% be a logical array of following sizes: [NUMLEAVES+NUMBRANCHES x 1],
% [NUMLEAVES x 1] or [NUMBRANCHES x 1].
%
% T2 = PRUNE(T1,NODES,'MODE','EXCLUSIVE') changes the pruning mode to
% 'EXCLUSIVE', i.e. only the descendants of NODES will be pruned. Then
% NODES will become leaves as long as they do not have a predecessor in
% the list NODES. In this case pruning is the process of reducing a tree
% by turning some branch nodes into leaf nodes, and removing the leaf
% nodes under the original branch. Default is 'INCLUSIVE' and it behaves
% as explained above, i.e. the listed NODES are also pruned.
%
% Examples:
%
% % Load a phylogenetic tree created from a protein family:
% tr = phytreeread('pf00002.tree');
% view(tr)
%
% % To remove all the 'mouse' proteins use:
% ind = getbyname(tr,'mouse');
% tr = prune(tr,ind);
% view(tr)
%
% % To remove potential outliers in the tree use:
% [sel,sel_leaves] = select(tr,'criteria','distance','threshold',.3,...
% 'reference','leaves','exclude','leaves','propagate','toleaves');
% tr = prune(tr,~sel_leaves)
% view(tr)
%
% See also PHYTREE, PHYTREE/SELECT, PHYTREE/GET, PHYTREETOOL.
% Copyright 2003-2005 The MathWorks, Inc.
% $Revision: 1.1.4.8.2.1 $ $Date: 2004/11/30 03:45:24 $
% set default
exclusiveMode = false;
if numel(tr)~=1
error('Bioinfo:phytree:prune:NoMultielementArrays',...
'Phylogenetic tree must be an 1-by-1 object.');
end
btr = tr;
numBranches = size(tr.tree,1);
numLeaves = numBranches + 1;
numLabels = numBranches + numLeaves;
% validate sel
if islogical(sel)
if numel(sel)==numLabels
sel = sel(:)==true;
elseif numel(sel)==numLeaves
sel = [sel(:);false(numBranches,1)];
elseif numel(sel)==numBranches
sel = [false(numLeaves,1);sel(:)];
else
error('Bioinfo:IncorrectLogical',...
'Logical vector must have the same number of elements as nodes in the Phylogenetic Tree');
end
elseif isnumeric(sel) && isreal(sel) && all(sel>=1) && all(sel<=numLabels)
tem(numLabels)=false;
tem(floor(sel))=true;
sel=tem(:);
else
error('Bioinfo:IncorrectTypeofArguments','Invalid value for NODES');
end
nvarargin = numel(varargin);
if nvarargin
if rem(nvarargin,2)
error('Bioinfo:IncorrectNumberOfArguments',...
'Incorrect number of arguments to %s.',mfilename);
end
okargs = {'mode',''};
for j=1:2:nvarargin
pname = varargin{j};
pval = varargin{j+1};
k = find(strncmpi(pname,okargs,numel(pname)));
if isempty(k)
error('Bioinfo:UnknownParameterName',...
'Unknown parameter name: %s.',pname);
elseif length(k)>1
error('Bioinfo:AmbiguousParameterName',...
'Ambiguous parameter name: %s.',pname);
else
switch(k)
case 1 % classifiers
modeOptions = {'exclusive','inclusive'};
modeSelected = strmatch(lower(pval),modeOptions); %#ok
if isempty(modeSelected)
error('Bioinfo:NotValidMode','Not a valid mode.')
end
exclusiveMode = modeSelected==1;
end
end
end
end
% shortcut for an empty sel
if ~sum(sel)
return;
end
% when inclusiveMode if the two chidren of a branch are selected then the
% parent node should also be selected
if ~exclusiveMode
for ind = 1:numBranches
if all(sel(tr.tree(ind,:)))
sel(ind+numLeaves) = true;
end
end
end
% find descendants not selected under selected nodes
for ind = numBranches:-1:1
if sel(ind+numLeaves)
sel(tr.tree(ind,:))=true;
end
end
if sel(numLabels)
warning('Bioinfo:PrunedRoot',...
'Can not prune the root node in a Phylogenetic Tree.')
tr=btr; return
end
% obtain parents for every node
parents(tr.tree(:)) = repmat(numLeaves+1:numLabels,2,1)';
if ~exclusiveMode % (the selected nodes are deleted with their descendants)
% find the top selected nodes in order to edit branches
htop = find(~[sel(parents);0]&sel);
% for every top node do the junction
for ind = 1:length(htop)
g=htop(ind);
mypar = parents(g);
if mypar < numLabels % my parent is NOT the root
% then connect brother to granparent
mygrpar = parents(mypar); % grandparent
myuncle = setxor(tr.tree(mygrpar-numLeaves,:),mypar); % uncle
mybro = setxor(tr.tree(mypar-numLeaves,:),g); % brother
tr.tree(mygrpar-numLeaves,:) = [myuncle mybro];
tr.dist(mybro) = tr.dist(mybro) + tr.dist(mypar);
parents(mybro) = mygrpar;
end
sel(mypar) = true; %also delete my par
end
if sum(~sel) == 1
warning('Bioinfo:NotAMinimumTree',...
'The selected nodes lead to only one leaf, Phylogenetic Tree not pruned.')
tr=btr; return
end
% find indexes to change tree
permuta = 1:numLabels;
permuta(sel) = [];
ipermuta(permuta) = 1:length(permuta);
permutaBranches = permuta(permuta>numLeaves)-numLeaves;
% update all tree structure fields
tr.names = tr.names(permuta);
tr.dist = tr.dist(permuta);
tr.dist(end) = 0;
tr.tree = tr.tree(permutaBranches,:);
tr.tree = ipermuta(tr.tree);
else % exclusiveMode (the selected nodes are not deleted, only their descendants)
% unselect leaves which are already in the top
sel(1:numLeaves)=sel(parents(1:numLeaves));
% find the top selected nodes in order to edit branches
top = [~sel(parents);1] & sel;
% find the new leaves (no deleted leaves + branches that become leaves)
newLeaves = [~sel(1:numLeaves);top(numLeaves+1:end)];
% find which branches will stay
stayingBranches = ~sel(numLeaves + 1 : numLabels);
% setting new indexes to change the tree architecture
permuta = [find(newLeaves);numLeaves+find(stayingBranches)];
ipermuta(permuta) = 1:length(permuta);
% update all tree structure fields
tr.names = tr.names(permuta);
tr.dist = tr.dist(permuta);
tr.dist(end) = 0;
tr.tree = tr.tree(stayingBranches,:);
tr.tree = ipermuta(tr.tree);
% calling phytree with this format to force edge-crossing check
tr = phytree(tr.tree,tr.dist,tr.names);
end % if ~exclusiveMode

View file

@ -0,0 +1,21 @@
function tr = set(tr,varargin) %#ok
%SET Set object properties of a phylogenetic tree object.
%
% Properties in a phylogenetic tree object can not be manually set.
% A PHYTREE object must be created by its constructor method PHYTREE
% or by using one of the functions: PHYTREEREAD, SEQLINKAGE, SEQNEIGHJOIN.
%
% See also: PHYTREE, PHYTREEREAD, SEQLINKAGE, SEQNEIGHJOIN.
% Copyright 2003-2006 The MathWorks, Inc.
% $Revision: 1.1.8.2.2.1 $ $Author: batserve $ $Date: 2006/07/27 21:37:51 $
% error('Bioinfo:phytree:set:NotAllowedMethod',...
% ['Properties in a phylogenetic tree object can not be manually set.\n'...
% 'A PHYTREE object must be created by its constructor method PHYTREE\n'...
% 'or by using one of the functions: PHYTREEREAD, SEQLINKAGE, SEQNEIGHJOIN.'])
% numBranches = size(tr.tree,1);
% numLeaves = numBranches + 1;
%
% tr.names(1:numLeaves)= varargin{2}';

View file

@ -0,0 +1,313 @@
function [sel,sell,selb] = select(tr,varargin)
%SELECT Selects tree branches and leaves.
%
% S = SELECT(T,N) returns a logical vector S of size [NUMNODES x 1]
% indicating N closest nodes to the root node of the phylogenetic tree
% object T (NUMNODES = NUMLEAVES + NUMBRANCHES). The first criteria used
% is branch levels, then patristic distance, also known as tree distance.
% By default SELECT, uses INF as the value of N, therefore SELECT(T) will
% return a vector of 'trues'.
%
% S = SELECT(...,'REFERENCE',R) changes the reference point(s) to measure
% the closeness. R can be 'root' (default) or 'leaves'. When using
% 'leaves', a node to be tested may have different distances to its
% descendant leaves, which are the references (e.g. a non-ultrametric
% tree), if this the case the minimum distance to any descendant leaf
% will be considered. R may also be an index which points to any node of
% the tree.
%
% S = SELECT(...,'CRITERIA',C) changes the criteria used to measure
% closeness. If C='levels' (default) then the first criteria is branch
% levels and then patristic distance. If C='distance' then the first
% criteria is patristic distance and then branch levels.
%
% S = SELECT(...,'THRESHOLD',V) selects all the nodes which closeness is
% less or equal than the threshold value V. Observe that either
% 'criteria' or either 'reference' can be used. If N is not specified
% N = INF, otherwise the output can be further size limited by N.
%
% S = SELECT(...,'EXCLUDE',E) sets a post-filter which excludes all the
% branch nodes from S when E=='branches' or all the leaf nodes when
% E=='leaves'. The default is 'none'.
%
% S = SELECT(...,'PROPAGATE',P) activates a post-functionality which
% propagates the selected nodes to the leaves when P=='toleaves' or
% towards the root finding a common ancestor when P=='toroot'. The
% default is 'none', P may also be 'both'. 'PROPAGATE' switch acts after
% 'EXCLUDE' switch.
%
% [S,SELLEAVES,SELBRANCHES] = SELECT(...) returns two additional logical
% vectors, one for the selected leaves and one for the selected branches.
%
% Examples:
%
% % Load a phylogenetic tree created from a protein family:
% tr = phytreeread('pf00002.tree');
%
% % To find close products for a given protein (e.g. vips_human):
% ind = getbyname(tr,'vips_human');
% [sel,sel_leaves] = select(tr,'criteria','distance',...
% 'threshold',0.6,'reference',ind);
% view(tr,sel_leaves)
%
% % To find potential outliers in the tree use:
% [sel,sel_leaves] = select(tr,'criteria','distance','threshold',.3,...
% 'reference','leaves','exclude','leaves','propagate','toleaves');
% view(tr,~sel_leaves)
%
%
% See also PHYTREE, PHYTREE/GET, PHYTREE/PDIST, PHYTREE/PRUNE, PHYTREETOOL.
%
% Copyright 2003-2006 The MathWorks, Inc.
% $Revision: 1.1.6.7.2.1 $ $Author: batserve $ $Date: 2006/07/27 21:37:50 $
if numel(tr)~=1
error('Bioinfo:phytree:select:NoMultielementArrays',...
'Phylogenetic tree must be an 1-by-1 object.');
end
% set defaults
V=inf;
CriteriaIsDistance = false;
ReferenceIs = 'root';
ExcludeSwitch = false;
PostPropagate = false;
% check is first argument is N, otherwise set N to default
if (nargin>1 && isnumeric(varargin{1}) && isreal(varargin{1}))
N = floor(varargin{1});
first_arg = 3;
else
N = inf;
first_arg = 2;
end
numBranches = size(tr.tree,1);
numLeaves = numBranches + 1;
numLabels = numBranches + numLeaves;
% identify input arguments
if nargin - first_arg + 1 > 0
if rem(nargin - first_arg,2) == 0
error('Bioinfo:phytree:select:IncorrectNumberOfArguments',...
'Incorrect number of arguments to %s.',mfilename);
end
okargs = {'reference','criteria','threshold','exclude','propagate'};
for j=first_arg - 1 : 2 : nargin - first_arg + 1
pname = varargin{j};
pval = varargin{j+1};
k = find(strncmpi(pname,okargs,numel(pname)));
if isempty(k)
error('Bioinfo:phytree:select:UnknownParameterName',...
'Unknown parameter name: %s.',pname);
elseif length(k)>1
error('Bioinfo:phytree:select:AmbiguousParameterName',...
'Ambiguous parameter name: %s.',pname);
else
switch(k)
case 1 % reference
if islogical(pval)
if numel(pval)==numLabels
pval = pval(:)==true;
elseif numel(pval)==numLeaves
pval = [pval(:);false(numBranches,1)];
elseif numel(pval)==numBranches
pval = [false(numLeaves,1);pval(:)];
else
error('Bioinfo:phytree:select:InvalidSizeLogicalReferenceNode',...
'When reference node is a logical vector it must contain NUMNODES, NUMLEAVES or NUMBRANCHES elements.')
end
pval = find(pval);
ReferenceIs = 'node';
if numel(pval) ~= 1
error('Bioinfo:phytree:select:InvalidValuesLogicalReferenceNode',...
'When reference node is a logical vector one element must be true and all others false.')
else
ReferenceNode = pval;
end
elseif isnumeric(pval)
ReferenceIs = 'node';
if numel(pval) ~= 1
error('Bioinfo:phytree:select:InvalidSizeReferenceNode',...
'Reference node must be scalar.')
elseif all(pval~=1:numLabels)
error('Bioinfo:phytree:select:InvalidValueReferenceNode',...
'Incorrect reference node.')
else
ReferenceNode = pval;
end
else
h = strmatch(lower(pval),{'root','leaves'}); %#ok
if numel(h)
switch(h)
case 1
ReferenceIs = 'root';
case 2
ReferenceIs = 'leaves';
end
else error('Bioinfo:phytree:select:InvalidStringReferenceNode',...
'Invalid string for the reference node.');
end
end
case 2 % criteria
h = strmatch(lower(pval),{'distance','levels'}); %#ok
if numel(h)
CriteriaIsDistance = (h == 1);
else error('Bioinfo:phytree:select:InvalidCriteria',...
'Invalid string for criteria.');
end
case 3 % threshold
V = pval;
if (~isnumeric(V) || ~isreal(V) || numel(V)>1)
error('Bioinfo:phytree:select:InvalidThreshold',...
'Invalid value for V.');
end
case 4 % exclude
h = strmatch(lower(pval),{'branches','leaves','none'}); %#ok
if numel(h)
switch(h)
case 1
ExcludeSwitch = true;
ExcludeType = 'branches';
case 2
ExcludeSwitch = true;
ExcludeType = 'leaves';
case 3
ExcludeSwitch = false;
end
else error('Bioinfo:phytree:select:InvalidExcludeOption',...
'Invalid string for exclude switch.');
end
case 5 % propagate
h = strmatch(lower(pval),{'toleaves','toroot','both','none'}); %#ok
if numel(h)
switch(h)
case 1
PostPropagate = true;
PostPropagateType = 'toleaves';
case 2
PostPropagate = true;
PostPropagateType = 'toroot';
case 3
PostPropagate = true;
PostPropagateType = 'both';
case 4
PostPropagate = false;
end
else error('Bioinfo:phytree:select:InvalidPostPropagate',...
'Invalid string for post-propagate switch.');
end
end % switch(k)
end % if ...
end % for j=...
end % nargin
% calculate the distance (and levels) of every node to the reference
levels2Ref = zeros(numLabels,1);
switch ReferenceIs
case 'root'
% calculate the distance to the root for every node
dist2Ref = tr.dist;
for ind = numBranches:-1:1
dist2Ref(tr.tree(ind,:)) = ...
dist2Ref(tr.tree(ind,:)) + dist2Ref(ind+numLeaves);
levels2Ref(tr.tree(ind,:)) = levels2Ref(ind+numLeaves) + 1;
end
case 'leaves'
% calculate the distance to the closest leaf for every node
dist2Ref = zeros(numLabels,1);
for ind = 1:numBranches
dist2Ref(ind+numLeaves) = ...
min(dist2Ref(tr.tree(ind,:))+tr.dist(tr.tree(ind,:)));
levels2Ref(ind+numLeaves) = min(levels2Ref(tr.tree(ind,:))) + 1;
end
case 'node'
refVector = zeros(numLabels,1);
refVector(ReferenceNode)=1;
dist2Ref = pdist(tr,'SquareForm',true,'nodes','all')...
* refVector;
tr.dist = ones(numLabels,1); % to count now levels !
levels2Ref = pdist(tr,'SquareForm',true,'nodes','all') ...
* refVector;
end
% applies the threshold value
if CriteriaIsDistance
sel = dist2Ref < V;
else % ~CriteriaIsDistance
sel = levels2Ref < V;
end % if CriteriaIsDistance
% needs to remove additional nodes because of N
if sum(sel)>N
if CriteriaIsDistance
[dum,h]=sortrows([dist2Ref levels2Ref]); %#ok
else % ~CriteriaIsDistance
[dum,h]=sortrows([levels2Ref dist2Ref]); %#ok
end % if CriteriaIsDistance
g=h(sel(h));
sel(g(N+1:end))=false;
end
% exclude option
if ExcludeSwitch
switch ExcludeType
case 'branches'
sel((1+numLeaves):numLabels) = false;
case 'leaves'
sel(1:numLeaves) = false;
end
end
% post-propagate option
if PostPropagate
% expands all the current nodes towards the leaves
if any(strcmp({'toleaves','both'},PostPropagateType))
for ind = numBranches:-1:1
if sel(ind+numLeaves)
sel(tr.tree(ind,:))=true;
end
end
end
% propagates towards the root finding the common ancestors
if any(strcmp({'toroot','both'},PostPropagateType))
% find closest common branch for every pair of nodes
% diagonal is invalid ! but not needed
% initializing full matrix
commf = zeros(numLabels,'int16');
children = false(1,numLabels);
for ind = numBranches:-1:1
children(:) = false;
children(ind+numLeaves) = true;
for ind2 = ind:-1:1
if children(ind2+numLeaves)
children(tr.tree(ind2,:))=true;
end
end
commf(children,children)=int16(ind);
end
commf = commf(sel,sel);
commf = commf - diag(diag(commf));
commf = unique(commf(commf(:)>0));
sel(commf+numLeaves) = true;
% now propagates towards the common ancestor
for ind = 1:double(max(commf))
if any(sel(tr.tree(ind,:)))
sel(ind+numLeaves) = true;
end
end
end
end
if nargout > 1
sell = sel(1:numLeaves);
end
if nargout > 2
selb = sel(numLeaves+1:numLabels);
end

View file

@ -0,0 +1,91 @@
function subtr = subtree(tr,nodes)
%SUBTREE Extracts a subtree.
%
% T2 = SUBTREE(T1,NODES) Extracts a new subtree T2 in which the new root
% is the first common ancestor of the NODES vector from T1. NODES in the
% tree are indexed as [1:NUMLEAVES] for the leaves and as [NUMLEAVES+1 :
% NUMLEAVES+NUMBRANCHES] for the branches. NODES can also be a logical
% array of following sizes: [NUMLEAVES+NUMBRANCHES x 1], [NUMLEAVES x 1]
% or [NUMBRANCHES x 1].
%
% Example:
%
% % Load a phylogenetic tree created from a protein family:
% tr = phytreeread('pf00002.tree')
%
% % Get the subtree that contains the VIPS and CGRR human proteins:
% sel = getbyname(tr,{'vips_human','cgrr_human'});
% sel = any(sel,2);
% tr = subtree(tr,sel)
% view(tr);
%
% See also PHYTREE, PHYTREE/PRUNE, PHYTREE/SELECT, PHYTREE/GET,
% PHYTREE/GETBYNAME.
% Copyright 2003-2005 The MathWorks, Inc.
% $Revision: 1.1.8.1 $ $Author: batserve $ $Date: 2005/06/09 21:57:04 $
if numel(tr)~=1
error('Bioinfo:phytree:subtree:NoMultielementArrays',...
'Phylogenetic tree must be an 1-by-1 object.');
end
numBranches = size(tr.tree,1);
numLeaves = numBranches + 1;
numLabels = numBranches + numLeaves;
% validate nodes
if islogical(nodes)
if numel(nodes)==numLabels
nodes = nodes(:)==true;
elseif numel(nodes)==numLeaves
nodes = [nodes(:);false(numBranches,1)];
elseif numel(nodes)==numBranches
nodes = [false(numLeaves,1);nodes(:)];
else
error('Bioinfo:phytree:subtree:IncorrectSizeInputVector',...
'Logical vector must have the same number of elements as nodes in the Phylogenetic Tree.');
end
elseif isnumeric(nodes) && isreal(nodes) && all(nodes(:)>=1) && all(nodes(:)<=numLabels)
tem = false(numLabels,1);
tem(floor(nodes(:))) = true;
nodes=tem(:);
else
error('Bioinfo:phytree:subtree:InvalidInputNodes',...
'Invalid value for NODES.');
end
% at this point NODES should only be a logical vector
if (~any(nodes(numLeaves+1:numLabels)) && (sum(nodes(1:numLeaves))<2))
error('Bioinfo:phytree:subtree:InvalidSubtree',...
'Subtree must contain at least two leaves.');
end
% look for the first common ancestor that contains all selected nodes,
% accumulating the selected nodes towards the root, the common ancestor
% will be the first sum equal to the number of selected nodes
branchWidth = double(nodes);
for ind = 1:numBranches
branchWidth(numLeaves+ind) = branchWidth(numLeaves+ind) + ...
sum(branchWidth(tr.tree(ind,:)));
end
commonAncestor = find(branchWidth==sum(nodes),1);
% now propagate the ancestor new) root towards the leaves to find all the
% nodes that should stay for the subtree
sel = false(1,numLabels);
sel(commonAncestor) = true;
for ind = commonAncestor:-1:numLeaves+1
sel(tr.tree(ind-numLeaves,:)) = sel(ind);
end
% extract the subtree
permuta = find(sel);
subtr=phytree;
ipermuta(permuta) = 1:length(permuta);
subtrNumLeaves = (ipermuta(end) + 1)/2;
subtr.tree = ipermuta(tr.tree(permuta(subtrNumLeaves+1:end)-numLeaves,:));
subtr.dist = tr.dist(permuta);
subtr.names = tr.names(permuta);
subtr.dist(end) = 0;

1618
matlab/graph/@phyTree/View.m Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,54 @@
function W = weights(tr)
%WEIGHTS Tree based sequence weights.
%
% W = WEIGHTS(T) Calculates branch proportional weights for every leaf in
% the tree using the Thompson-Higgins-Gibson method. The distance of every
% segment of the tree is adjusted by dividing it by the number of leaves
% it contains. The sequence weights are the result of normalizing to the
% unity the new patristic distances between every leaf and the root.
%
% Example:
%
% % Create an ultrametric tree with specified branch distances
% bd = [1 2 3]';
% tr_1 = phytree([1 2;3 4;5 6],bd)
% view(tr_1)
% weights(tr_1)
%
% See also MULTIALIGN, PHYTREE, PROFALIGN, SEQLINKAGE.
% References:
% J.D. Thompson, D.G. Higgins, and T.J. Gibson. Nucleic Acids Res. (1994)
% 22(22):4673-4680.
% S.Henikoff and J. G. Henikoff. JMB. (1994) 243(4):574--578.
%
% Copyright 2003-2005 The MathWorks, Inc.
% $Revision: 1.1.8.2 $Author: batserve $ $Date: 2005/06/17 20:19:24 $
if numel(tr)~=1
error('Bioinfo:phytree:weights:NoMultielementArrays',...
'Phylogenetic tree must be an 1-by-1 object.');
end
numBranches = size(tr.tree,1);
numLeaves = numBranches + 1;
numLabels = numBranches + numLeaves;
% calculate the branch width
branchWidth = ones(numLabels,1);
for ind = 1:numBranches
branchWidth(numLeaves+ind) = sum(branchWidth(tr.tree(ind,:)));
end
% adjust the distances
tr.dist = tr.dist ./ branchWidth;
% calculate distance of every leave to root
cdist = tr.dist;
for ind = numBranches:-1:1
cdist(tr.tree(ind,:)) = cdist(tr.tree(ind,:)) + cdist(ind+numLeaves);
end
W = cdist(1:numLeaves);
W = W./max(W);

View file

@ -0,0 +1,103 @@
function sel = getbyname(tr,query,varargin)
%GETBYNAME Selects branches and leaves by name.
%
% S = GETBYNAME(T,EXPRESSION) returns a logical vector S of size
% [NUMNODES x 1] indicating the node names of the phylogenetic tree T
% that match the regular expression EXPRESSION regardless of case.
%
% Symbols than can be used in a matching regular expression are explained
% in help REGEXP.
%
% When EXPRESSION is a cell array of strings, GETBYNAME returns a matrix
% where every column corresponds to every query in EXPRESSION.
%
% S = GETBYNAME(T,STRING,'EXACT',true) looks for exact matches only
% (ignoring case). When STRING is a cell array of strings, GETBYNAME
% returns a vector with indices.
%
% Example:
%
% % Load a phylogenetic tree created from a protein family:
% tr = phytreeread('pf00002.tree');
%
% % Select all the 'mouse' and 'human' proteins:
% sel = getbyname(tr,{'mouse','human'});
% view(tr,any(sel,2));
%
% See also PHYTREE, PHYTREE/PRUNE, PHYTREE/SELECT, PHYTREE/GET.
% Copyright 2003-2005 The MathWorks, Inc.
% $Revision: 1.1.6.5 $ $Author: batserve $ $Date: 2005/06/09 21:55:55 $
if numel(tr)~=1
error('Bioinfo:phytree:getbyname:NoMultielementArrays',...
'Phylogenetic tree must be an 1-by-1 object.');
end
doExactMatch = false;
if nargin > 2
okargs = {'exact',''};
for j=1:2:nargin-2
pname = varargin{j};
k = strmatch(lower(pname), okargs); %#ok
if isempty(k)
error('Bioinfo:phytree:getbyname:UnknownParameterName',...
'Unknown parameter name: %s.',pname);
elseif length(k)>1
error('Bioinfo:phytree:getbyname:AmbiguousParameterName',...
'Ambiguous parameter name: %s.',pname);
else
switch(k)
case 1
if nargin == 3
doExactMatch = true;
else
doExactMatch = opttf(varargin{j+1});
if isempty(doExactMatch)
error('Bioinfo:phytree:getbyname:InputOptionNotLogical',...
'%s must be a logical value, true or false.',...
upper(char(okargs(k))));
end
end
end %switch
end %if
end %for
end %if
numLabels = numel(tr.names);
if iscell(query)
if doExactMatch
sel = zeros(numLabels,1);
else
sel = false(numLabels,numel(query));
end
for ind = 1:numel(query)
if doExactMatch
sel(strcmpi(query{ind},tr.names)) = ind;
else
try
regexpiOutput = regexpi(tr(:).names,query{ind});
catch
error('Bioinfo:phytree:getbyname:IncorrectRegularExpression',...
['The query expression produced the following error in ' ...
'REGEXPI: \n%s'],lasterr);
end
sel(:,ind) = ~cellfun('isempty',regexpiOutput);
end
end
else % must be a single string of chars
if doExactMatch
sel = strcmpi(query,tr.names);
else
try
regexpiOutput = regexpi(tr(:).names,query);
catch
error('Bioinfo:phytree:getbyname:IncorrectRegularExpression',...
['The query expression produced the following error in ' ...
'REGEXPI: \n%s'],lasterr);
end
sel = ~cellfun('isempty',regexpiOutput);
end
end

View file

@ -0,0 +1,65 @@
function [ptrs,dist,names] = getcanonical(tr)
%GETCANONICAL Calculates the canonical form of a phylogenetic tree.
%
% PTRS = GETCANONICAL(TREE) Returns the pointers of the canonical form of
% a phylogenetic tree. In a canonical tree the leaves are ordered
% alphabetically and the branches are ordered first by their width and
% then alphabetically by their first element. A canonical tree is
% isomorphic to all the trees with the same skeleton independently of the
% order of their leaves and branches.
%
% [PTRS,DIST,NAMES] = GETCANONICAL(TREE) Returns also the re-ordered
% distances and node names.
%
% Example:
% % create two trees with same skeleton but slightly different distances
% b = [1 2; 3 4; 5 6; 7 8;9 10];
% tr_1 = phytree(b,[.1 .2 .3 .3 .4 ]');
% tr_2 = phytree(b,[.2 .1 .2 .3 .4 ]');
% plot(tr_1)
% plot(tr_2)
%
% % compare if the two trees are isomorphic
% isequal(getcanonical(tr_1),getcanonical(tr_2))
%
% See also PHYTREE, PHYTREEREAD, PHYTREE/GETBYNAME, PHYTREE/SELECT,
% PHYTREE/SUBTREE.
% Copyright 2003-2006 The MathWorks, Inc.
% $Revision: 1.1.8.2 $ $Author: batserve $ $Date: 2006/06/16 20:06:42 $
if numel(tr)~=1
error('Bioinfo:phytree:getcanonical:NoMultielementArrays',...
'Phylogenetic tree must be an 1-by-1 object.');
end
numBranches = size(tr.tree,1);
numLeaves = numBranches + 1;
numLabels = numBranches + numLeaves;
[dummy,h] = sort(tr.names(1:numLeaves)); %#ok
h(h)=1:numLeaves;
% compute the branch width and the first element for each one
branchWidth = ones(numLabels,1);
firstElement = [h;inf(numBranches,1)];
for ind = 1:numBranches
branchWidth(numLeaves+ind) = sum(branchWidth(tr.tree(ind,:)));
firstElement(numLeaves+ind) = min(firstElement(tr.tree(ind,:)));
end
% find out how to re-order
[dummy,ord]=sortrows([branchWidth firstElement]); %#ok
iord(ord) = 1:numLabels;
% re-order pointers
ptrs = sort(iord(tr.tree(ord(numLeaves+1:numLabels)-numLeaves,:)),2);
if nargout > 1
dist = tr.dist(ord);
end
if nargout > 2
names = tr.names(ord);
end

View file

@ -0,0 +1,62 @@
function [cm lab dist] = getmatrix(tr,varargin)
%GETMATRIX converts a Phytree Object into a relationship matrix.
%
% [MATRIX, ID, DISTANCES] = GETMATRIX(T) converts the phylogenetic tree
% object T into a logical sparse matrix, where 1's indicate that a branch
% node (row index) is connected to its child (column index). The child
% can be either another branch node or a leaf node. ID is a list of the
% labels that correspond to the rows and columns of MATRIX, first the
% leaf nodes from 1 to NUMLEAVES, then the branch nodes from NUMLEAVES+1
% to NUMLEAVES+NUMBRANCHES, being the root the last node. DISTANCES is
% a column vector with one entry for every nonzero entry in MATRIX
% traversed columnwise and representing the distance between the branch
% node and the child.
%
% Example:
%
% T = phytreeread('pf00002.tree')
% [MATRIX ID DIST] = getmatrix(T);
%
% See also PHYTREE, PHYTREE/GET, PHYTREE/PDIST, PHYTREE/PRUNE, PHYTREETOOL.
% Copyright 2006 The MathWorks, Inc.
% $Revision: 1.1.6.1 $ $Date: 2006/06/16 20:06:43 $
%%% check arguments
if nargin > 1
if rem(nargin,2) == 0
error('Bioinfo:phytree:getmatrix:IncorrectNumberOfArguments',...
'Incorrect number of arguments to %s.',mfilename);
end
okargs = {'no_input_arguments'};
for j=1:2:nargin-2
pname = varargin{j};
pval = varargin{j+1};
if(isstruct(pval))
error('Bioinfo:phytree:getmatrix:StructParamError',...
'parameter cannot be a struct');
end
k = find(strncmpi(pname, okargs,length(pname)));
if isempty(k)
error('Bioinfo:phytree:getmatrix:UnknownParameterName',...
'Unknown parameter name: %s.',pname);
elseif length(k)>1
error('Bioinfo:phytree:getmatrix:AmbiguousParameterName',...
'Ambiguous parameter name: %s.',pname);
else
switch(k)
end
end
end
end
numLeaves = size(tr.tree,1)+1;
numNodes = numLeaves + numLeaves -1;
cm = sparse(repmat(numLeaves+1:numNodes,1,2),tr.tree(:),true,numNodes,numNodes);
if nargout>1
lab = tr.names;
end
if nargout>2
dist = tr.dist;
end

View file

@ -0,0 +1,133 @@
function str = getnewickstr(tr,varargin)
%GETNEWICKSTR creates a NEWICK formatted string.
%
% STR = GETNEWICKSTR(TREE) returns the NEWICK formatted string of the
% phylogenetic tree object TREE.
%
% GETNEWICKSTR(...,'DISTANCES',false) excludes the distances from the
% output. Default is true.
%
% GETNEWICKSTR(...,'BRANCHNAMES',true) includes the branch names into the
% output. Default is false.
%
% The NEWICK tree format is found at:
% http://evolution.genetics.washington.edu/phylip/newicktree.html
%
% Example:
%
% seqs = int2nt(ceil(rand(10)*4)); % some random sequences
% dist = seqpdist(seqs,'alpha','nt'); % pairwise distances
% tree = seqlinkage(dist); % construct phylogenetic tree
% str = getnewickstr(tree) % get the NEWICK string
%
% See also PHYTREE, PHYTREEREAD, PHYTREEWRITE, PHYTREETOOL, SEQLINKAGE,
% PHYTREE/GET, PHYTREE/GETBYNAME, PHYTREE/GETCANONICAL.
% Undocumented:
% GETSTR(...,'MULTILINE',true) introduces 'new line' characters for a
% multi-line output. This option is used by PHYTREEWRITE. Default is
% false.
% Copyright 2003-2005 The MathWorks, Inc.
% $Revision: 1.1.8.1 $ $Author: batserve $ $Date: 2005/06/09 21:55:57 $
if numel(tr)~=1
error('Bioinfo:phytree:getstr:NoMultielementArrays',...
'Phylogenetic tree must be an 1-by-1 object.');
end
numBranches = size(tr.tree,1);
numLeaves = numBranches + 1;
numLabels = numBranches + numLeaves;
% set defaults
writeDistances = true;
writeBranchNames = false;
multiLine = false;
nvarargin = numel(varargin);
if nvarargin
if rem(nvarargin,2)
error('Bioinfo:phytree:getstr:IncorrectNumberOfArguments',...
'Incorrect number of arguments to %s.',mfilename);
end
okargs = {'multiline','distances','branchnames'};
for j=1:2:nvarargin
pname = varargin{j};
pval = varargin{j+1};
k = find(strncmpi(pname,okargs,numel(pname)));
if isempty(k)
error('Bioinfo:phytree:getstr:UnknownParameterName',...
'Unknown parameter name: %s.',pname);
elseif length(k)>1
error('Bioinfo:AmbiguousParameterName',...
'Ambiguous parameter name: %s.',pname);
else
switch(k)
case 1 % multi-lines
multiLine = opttf(pval);
if isempty(multiLine)
error('Bioinfo:phytree:getstr:multiLineOptionNotLogical',...
'%s must be a logical value, true or false.',...
upper(char(okargs(k))));
end
case 2 % write distances
writeDistances = opttf(pval);
if isempty(writeDistances)
error('Bioinfo:phytree:getstr:writeDistancesOptionNotLogical',...
'%s must be a logical value, true or false.',...
upper(char(okargs(k))));
end
case 3 % write branch names
writeBranchNames = opttf(pval);
if isempty(writeBranchNames)
error('Bioinfo:phytree:getstr:writeBranchNamesOptionNotLogical',...
'%s must be a logical value, true or false.',...
upper(char(okargs(k))));
end
end
end
end
end
for i=1:numLabels-1;
if (i<=numLeaves || writeBranchNames)
if writeDistances
namedist{i} = [tr.names{i} ':' num2str(tr.dist(i))]; %#ok
else
namedist{i} = tr.names{i};
end
elseif writeDistances && ~writeBranchNames
namedist{i} = [':' num2str(tr.dist(i))]; %#ok
else
namedist{i} = '';
end
end
if writeBranchNames
namedist{numLabels} = [tr.names{numLabels} ';'];
else
namedist{numLabels} = ';';
end
for i=1:numBranches
if tr.tree(i,1) > numLeaves
t1 = branchstr{tr.tree(i,1)};
else
t1 = namedist{tr.tree(i,1)};
end
if tr.tree(i,2) > numLeaves
t2 = branchstr{tr.tree(i,2)};
else
t2 = namedist{tr.tree(i,2)};
end
branchstr{i+numLeaves} = ...
[ '(\n' t1 ',\n' t2 ')\n' , namedist{i+numLeaves} ]; %#ok
end
str = sprintf(branchstr{numLabels});
if ~multiLine
str = strrep(str,sprintf('\n'),'');
end

View file

@ -0,0 +1,12 @@
function openvar(name, tr) %#ok
%OPENVAR Opens a phylogenetic tree object for graphical editing.
% Copyright 2003-2006 The MathWorks, Inc.
% $Revision: 1.1.6.3 $ $Author: batserve $ $Date: 2006/06/16 20:06:44 $
try
view(tr);
catch
% rethrows the error into a dlg window
errordlg(lasterr, 'Inspection error', 'modal');
end

View file

@ -0,0 +1,153 @@
function [dist,comm] = pdist(tr,varargin)
%PDIST computes the pairwise patristic distance.
%
% D = PDIST(TREE) returns a vector D containing the patristic
% distances between every possible pair of leaf nodes in the phylogenic
% tree object TREE. The distance is computed following the path through
% the branches of the tree.
%
% The output vector D is arranged in the order of ((2,1),(3,1),...,
% (M,1),(3,2),...(M,3),.....(M,M-1)), i.e. the lower left triangle of the
% full M-by-M distance matrix. To get the distance between the Ith and
% Jth nodes (I > J) use the formula D((J-1)*(M-J/2)+I-J). M is the
% number of leaves)
%
% D = PDIST(...,'NODES',N) indicates the nodes to be included in the
% computation. N can be 'leaves' (default) or 'all'. In the former
% case the output will be order as before, but M is the total number of
% nodes in the tree, i.e. NUMLEAVES+NUMBRANCHES.
%
% D = PDIST(...,'SQUAREFORM',true) coverts the output into a square
% format, so that D(I,J) denotes the distance between the Ith and the Jth
% node. The output matrix is symmetric and has a zero diagonal.
%
% D = PDIST(...,'CRITERIA',C) changes the criteria used to relate pairs.
% C can be 'distance' (default) or 'levels'.
%
% [D,C] = PDIST(TREE) returns in C the index of the closest common parent
% nodes for every possible pair of query nodes.
%
% Example:
%
% % get the tree distances between every leaf
% tr = phytreeread('pf00002.tree')
% dist = pdist(tr,'nodes','leaves','squareform',true)
%
% See also SEQPDIST, SEQLINKAGE, PHYTREE, PHYTREETOOL.
% Copyright 2003-2005 The MathWorks, Inc.
% $Revision: 1.1.6.6 $ $Author: batserve $ $Date: 2005/06/09 21:55:59 $
if numel(tr)~=1
error('Bioinfo:phytree:pdist:NoMultielementArrays',...
'Phylogenetic tree must be an 1-by-1 object.');
end
% set default
squaredOutput = false;
CriteriaIsLevels = false;
outNodes = 'leaves';
numBranches = size(tr.tree,1);
numLeaves = numBranches + 1;
numLabels = numBranches + numLeaves;
% process input arguments
if nargin > 1
if rem(nargin,2) == 0
error('Bioinfo:phytree:pdist:IncorrectNumberOfArguments',...
'Incorrect number of arguments to %s.',mfilename);
end
okargs = {'nodes','squareform','criteria'};
for ind = 2 : 2: nargin
pname = varargin{ind-1};
pval = varargin{ind};
k = find(strncmpi(pname,okargs,numel(pname)));
if isempty(k)
error('Bioinfo:phytree:pdist:UnknownParameterName',...
'Unknown parameter name: %s.',pname);
elseif length(k)>1
error('Bioinfo:phytree:pdist:AmbiguousParameterName',...
'Ambiguous parameter name: %s.',pname);
else
switch(k)
case 1 % nodes
okNodes = {'leaves','all','branches'};
h = strmatch(lower(pval),okNodes ); %#ok
if isempty(h)
error('Bioinfo:phytree:pdist:IncorrectReferenceNode',...
'Incorrect node selection')
else
outNodes = okNodes{h};
end
case 2 % squareform
squaredOutput = (pval == true);
case 3 % criteria
h = strmatch(lower(pval),{'levels','distance'}); %#ok
if numel(h)
CriteriaIsLevels = (h == 1);
else error('Bioinfo:phytree:pdist:InvalidCriteria',...
'Invalid string for criteria.');
end
end
end
end
end
% create indexes to work only the lower leff triangle
m = numLabels*(numLabels-1)/2;
p = cumsum([1 (numLabels-1):-1:2]);
I = ones(m,1); I(p) = [2 3-numLabels:0];
J = zeros(m,1); J(p) = 1;
H = I; H(p) = 2:numLabels;
I = cumsum(I); J = cumsum(J); H = cumsum(H);
switch outNodes
case 'leaves'
outSelection = (I <= numLeaves) & (J <= numLeaves);
case 'all'
outSelection = (I>0);
case 'branches'
outSelection = (I > numLeaves) & (J > numLeaves);
end
% find closest common branch for every pair of nodes
% diagonal is invalid ! but not needed
% initializing full matrix
commf = zeros(numLabels,'int16');
children = false(1,numLabels);
for ind = numBranches:-1:1
children(:) = false;
children(ind+numLeaves) = true;
for ind2 = ind:-1:1
if children(ind2+numLeaves)
children(tr.tree(ind2,:))=true;
end
end
commf(children,children)=int16(ind);
end
% output vector with the lower leff triangle closest common branches
comm = double(commf(H));
% compute the distance to root for every node
cdist = tr.dist;
if CriteriaIsLevels % set to count levels instead
cdist(:) = 1;
cdist(end) = 0;
end
for ind = numBranches:-1:1
cdist(tr.tree(ind,:)) = cdist(tr.tree(ind,:)) + cdist(ind+numLeaves);
end
% compute pairwise distance
dist = cdist(I)+cdist(J)-2*cdist(comm+numLeaves);
dist = dist(outSelection);
comm = comm(outSelection);
if squaredOutput
dist = squareform(dist);
comm = squareform(comm);
end

View file

@ -0,0 +1,235 @@
function tr = phyTree(varargin)
%PHYTREE Phylogenetic tree object.
%
% TREE = PHYTREE(B) creates an ultrametric phylogenetic tree object. B is
% a numeric array of size [NUMBRANCHES X 2] in which every row represents
% a branch of the tree and it contains two pointers to the branches
% or leaves nodes which are its children. Leaf nodes are numbered from 1
% to NUMLEAVES and branch nodes are numbered from NUMLEAVES + 1 to
% NUMLEAVES + NUMBRANCHES. Note that since only binary trees are allowed,
% then NUMLEAVES = NUMBRANCHES + 1. Branches are defined in chronological
% order, i.e. B(i,:) > NUMLEAVES + i. As a consequence, the first row can
% only have pointers to leaves and the last row must represent the 'root'
% branch. Parent-child distances are set to the unit or by the ultrametric
% condition if child is a leaf.
%
% TREE = PHYTREE(B,D) creates an additive phylogenetic tree object with
% branch distances defined by D. D is a numeric array of size [NUMNODES X
% 1] with the distances of every child node (leaf or branch) to its parent
% branch. NUMNODES = NUMLEAVES + NUMBRANCHES. D(end), the distance
% associated to the root node, is meaningless.
%
% TREE = PHYTREE(B,C) creates an ultrametric phylogenetic tree object with
% branch distances defined by C. C is a numeric array of size [NUMBRANCHES
% X 1] with the coordinates of every branch node. In ultrametric tress all
% the leaves are at the same location (i.e. same distance to the root).
%
% TREE = PHYTREE(BC) creates an ultrametric phylogenetic binary tree
% object with branch pointers in BC(:,[1 2]) and branch coordinates in
% BC(:,3). Same as PHYTREE(B,C).
%
% TREE = PHYTREE(...,N) specifies the names for the leaves and/or the
% branches. N is a cell of strings. If NUMEL(N)==NUMLEAVES then the names
% are assigned chronologically to the leaves. If NUMEL(N)==NUMBRANCHES the
% names are assigned to the branch nodes. If NUMEL(N)==NUMLEAVES +
% NUMBRANCHES all the nodes are named. Unassigned names default to 'Leaf
% #' and/or 'Branch #' as required.
%
% TREE = PHYTREE creates an empty phylogenectic tree object
%
% Example:
%
% % create an ultrametric tree
% b = [1 2; 3 4; 5 6; 7 8;9 10];
% t = phytree(b);
% view(t)
%
% % create an ultrametric tree with specified branch distances
% bd = [.1 .2 .3 .3 .4 ]';
% b = [1 2; 3 4; 5 6; 7 8;9 10];
% t = phytree(b,bd);
% view(t)
%
% See also PHYTREE/GET, PHYTREE/SELECT, PHYTREEREAD, PHYTREETOOL,
% PHYTREEWRITE, SEQLINKAGE, SEQNEIGHJOIN, SEQPDIST.
% Copyright 2003-2006 The MathWorks, Inc.
% $Revision: 1.1.6.11.2.1 $ $Author: batserve $ $Date: 2006/07/27 21:37:49 $
justVerifyValidity = false;
switch nargin
case 0
tr.tree = zeros(0,2);
tr.dist = zeros(0,1);
tr.names = {};
case 1
B = varargin{1};
case 2
B = varargin{1};
if iscell(varargin{2})
N = varargin{2};
else
D = varargin{2};
end
case 3
B = varargin{1};
D = varargin{2};
N = varargin{3};
otherwise
error('Bioinfo:phytree:IncorrectNumberOfArguments',...
'Incorrect number of arguments to %s.',mfilename);
end
if nargin==1 && isstruct(B) && isfield(B,'tree') && isfield(B,'names') && isfield(B,'dist')
N = B.names;
D = B.dist;
B = B.tree;
tr.tree = B;
tr.dist = D;
tr.names = N;
justVerifyValidity = true;
end
if nargin
if isnumeric(B)
switch size(B,2)
case 2
% ok
case 3
D = B(:,3);
B(:,3)=[];
otherwise
error('Bioinfo:phytree:IncorrectSize','Incorrect size for B or BC')
end
else
error('Bioinfo:phytree:IncorrectType','Incorrect type for B or BC')
end
% test B
if sum(diff(sort(B(:)))~=1) || (min(B(:))~=1)
error('Bioinfo:phytree:IncompleteTree','Branch architecture is not complete')
end
numBranches = size(B,1);
numLeaves = numBranches + 1;
numLabels = numBranches + numLeaves;
h=all(B'>=repmat(numLeaves+1:numLabels,2,1));
if any(h)
error('Bioinfo:phytree:NonChronologicalTree',...
['Branch(es) not in chronological order: [' num2str(find(h)) ']'])
end
if exist('D','var')
if ~isnumeric(D) || any(D(:)<0) || ~all(isreal(D)) || size(D,2)~=1
error('Bioinfo:phytree:DistancesNotValid',...
'Distances must be a column vector of real positive numbers')
end
switch size(D,1)
case numBranches
D = [zeros(numLeaves,1); D]; % add ultrametric distances of leaves
D(B) = D((numLeaves+(1:numBranches))'*[1 1])-D(B); %dist of edges
D(end) = 0; % set root at zero
case numLabels
% ok
otherwise
error('Bioinfo:phytree:DistancesNotValid',...
'Distances must agree either with the number of branches (C) or total nodes (D)')
end
else % set defaut D
% look for parents
P = zeros(numLabels,1);
P(B) = repmat((1:numBranches)',1,2);
P(end) = numBranches;
% look at which level is every branch
L = zeros(numLabels,1);
for ind = 1:numBranches
L(ind+numLeaves) = max(L(B(ind,:))+1);
end
D = L(P+numLeaves)-L;
end
% set default names
for ind = 1:numLeaves
names{ind}=['Leaf ' num2str(ind)]; %#ok
end
for ind = 1:numBranches
names{ind+numLeaves}=['Branch ' num2str(ind)];
end
if exist('N','var')
if ~iscell(N)
error('Bioinfo:phytree:NamesNotValid',...
'Names must be supplied with a cell of strings')
end
switch numel(N)
case numLabels
h = 1:numLabels;
case numLeaves
h = 1:numLeaves;
case numBranches
h = numLeaves+1:numLabels;
otherwise
error('Bioinfo:phytree:NamesNotValid',...
'Names must agree either with the number of branches, number of leaves, or total nodes')
end
for ind = 1:length(h);
str = N{ind};
if ~ischar(str)
error('Bioinfo:phytree:NamesNotValid',...
'Names must be valid strings')
end
names{h(ind)}=str;
end
% check that none of the names is empty
for ind = 1:numLabels
if isempty(names{ind})
if ind > numLeaves
names{ind} = ['Branch ' num2str(ind-numLeaves)];
else
names{ind} = ['Leaf ' num2str(ind)];
end
end
end
if numel(unique(names))~=numLabels
error('Bioinfo:phytree:NamesNotUnique',...
'Names for leaves and branches must be unique')
end
end
% check and corrects a non-monotonic tree
monotonicWarning = false;
for ind = 1:numBranches
if any(D(B(ind,:))<0)
monotonicWarning = true;
tmp = min(D(B(ind,:)));
D(B(ind,:)) = D(B(ind,:)) - tmp;
D(numLeaves+ind) = D(numLeaves+ind) + tmp;
end
end
if monotonicWarning
warning('Bioinfo:phytree:NonMonotonicTree',...
'Non consistent branch distances; \n Incremented branch lengths to hold a Monotonic Phylogenetic Tree')
end
if justVerifyValidity
tr = class(tr,'phytree');
return
end
tr.tree = B;
tr.dist = D;
tr.names = names(:);
% reorder such that there will be no crossings in the displayed tree
tr = prettyOrder(tr);
end %if nargin
% for trees of only one branch correct dimensions
% if size(tr.tree,2) <2 tr.tree = tr.tree'; end
% Makes the tree a class
tr = class(tr,'phyTree');

View file

@ -0,0 +1,31 @@
function tf = opttf(pval)
%OPTTF determines whether input options are true or false
% Copyright 2003-2004 The MathWorks, Inc.
% $Revision: 1.3.4.2 $ $Date: 2004/12/24 20:42:39 $
if islogical(pval)
tf = all(pval);
return
end
if isnumeric(pval)
tf = all(pval~=0);
return
end
if ischar(pval)
truevals = {'true','yes','on','t'};
k = any(strcmpi(pval,truevals));
if k
tf = true;
return
end
falsevals = {'false','no','off','f'};
k = any(strcmpi(pval,falsevals));
if k
tf = false;
return
end
end
% return empty if unknown value
tf = logical([]);

View file

@ -0,0 +1,39 @@
function tr = prettyOrder(tr)
%PRETTYORDER Reorders the leaf nodes to avoid branch crossings.
%
% T2 = PRETTYORDER(T1) Reorders the leaf nodes in the phylogenetic tree
% T1 such that the layout of the tree does not contain branch crossings.
% Copyright 2003-2005 The MathWorks, Inc.
% $Revision: 1.1.8.1 $ $Author: batserve $ $Date: 2005/06/09 21:56:11 $
numBranches = size(tr.tree,1);
numLeaves = numBranches + 1;
numLabels = numBranches + numLeaves;
L = [ones(numLeaves,1); zeros(numBranches,1)];
for ind = 1 : numBranches
L(ind+numLeaves) = sum(L(tr.tree(ind,:)));
end
X = zeros(numLabels,1);
for ind = numBranches:-1:1
X(tr.tree(ind,:)) = tr.dist(tr.tree(ind,:))+X(ind+numLeaves);
end
Li = zeros(1,numLabels); Ls = Li;
Ls(numLabels) = numLeaves;
for ind = numBranches:-1:1
Ls(tr.tree(ind,:)) = Ls(ind+numLeaves);
Li(tr.tree(ind,:)) = Li(ind+numLeaves);
if diff(X(tr.tree(ind,:)))>=0
Ls(tr.tree(ind,1)) = Li(tr.tree(ind,1)) + L(tr.tree(ind,1));
Li(tr.tree(ind,2)) = Ls(tr.tree(ind,2)) - L(tr.tree(ind,2));
else
Ls(tr.tree(ind,2)) = Li(tr.tree(ind,2)) + L(tr.tree(ind,2));
Li(tr.tree(ind,1)) = Ls(tr.tree(ind,1)) - L(tr.tree(ind,1));
end
end
tr.names(Ls(1:numLeaves))=tr.names(1:numLeaves);
tr.dist(Ls(1:numLeaves))=tr.dist(1:numLeaves);
Ls(numLeaves+1:numLabels)=numLeaves+1:numLabels;
tr.tree = Ls(tr.tree);

View file

@ -0,0 +1,206 @@
function tr = reRoot(tr,node,distance)
%REROOT changes the root of a phylogenetic tree.
%
% T2 = REROOT(T1) changes the root of the phylogenetic tree T1 using the
% mid-point method. The mid-point is the location where the means of
% the branch lengths of either side of the tree are equalized. The
% original root is deleted.
%
% T2 = REROOT(T1,NODE) changes the root to the branch indexed by NODE.
% The new root is placed at half the distance between NODE and its
% parent.
%
% T2 = REROOT(T1,NODE,DISTANCE) re-roots T1 by placing the new root at a
% given DISTANCE from the reference NODE towards the root of the tree.
%
% Note: The new branch in T2 representing the root is labeled as 'Root'.
%
% Example:
%
% % Create an ultrametric tree
% tr_1 = phytree([5 7;8 9;6 11; 1 2;3 4;10 12;14 16;15 17;13 18])
% plot(tr_1,'branchlabels',true)
%
% % Place the new root at 'Branch 7'
% sel = getbyname(tr_1,'Branch 7');
% tr_2 = reroot(tr_1,sel)
% plot(tr_2,'branchlabels',true)
%
% % The mid-point of the original tree was the root, since it was an
% % ultrametric tree
% tr_3 = reroot(tr_2)
% plot(tr_3,'branchlabels',true)
%
% See also PHYTREE, PHYTREE/GET, PHYTREE/GETBYNAME, PHYTREE/PRUNE,
% PHYTREE/SELECT, SEQNEIGHJOIN.
% Copyright 2003-2006 The MathWorks, Inc.
% $Revision: 1.1.8.3 $ $Author: batserve $ $Date: 2006/06/16 20:06:46 $
if numel(tr)~=1
error('Bioinfo:phytree:reroot:NoMultielementArrays',...
'Phylogenetic tree must be an 1-by-1 object.');
end
numBranches = size(tr.tree,1);
numLeaves = numBranches + 1;
numLabels = numBranches + numLeaves;
if nargin == 1;
[node,distance] = midpoint(tr);
else
% validate node
if islogical(node)
if any(numel(node) == [numLabels numLeaves])
node = find(node);
elseif numel(node) == numBranches
node = find(node) + numLeaves;
else
error('Bioinfo:phytree:reroot:IncorrectSizeInputVector',...
'Logical vector must have the same number of elements as nodes in the Phylogenetic Tree.');
end
end
if ~isscalar(node) || node<1 || node>numLabels
error('Bioinfo:phytree:reroot:InvalidInputNode',...
'Invalid value for NODE.');
end
node = round(node);
% when no distance is given put the root at half the branch
if nargin<3
distance = tr.dist(node)/2;
end
end
% find parents for every tree node
parent = zeros(numLabels,1);
parent(tr.tree) = repmat(numLeaves+1:numLabels,1,2);
% validate distance, if necessary shift the origin node
if ~isscalar(distance) || distance<0
error('Bioinfo:phytree:reroot:InvalidInputDistance',...
'Invalid value for DISTANCE.');
end
% validate distance, if necessary shift the origin node
while (tr.dist(node)<=distance) && (node ~= numLabels)
distance = distance - tr.dist(node);
node = parent(node);
end
if node == numLabels
if distance>0
warning('Bioinfo:phytree:reroot:BeyondRoot',...
'Distance goes beyond the root, tree unchanged.')
else
tr.names{end} = 'Root';
end
return
end
% check if we just need to move the branches of current root
if parent(node) == numLabels
tr.dist(setxor(tr.tree(end,:),node)) = ...
sum(tr.dist(tr.tree(end,:))) - distance;
tr.dist(node) = distance;
tr.names{end} = 'Root';
return
end
% path to root from node and bros for every point in the path
path2r = false(numBranches,1);
pathBros = [];
me = node;
par = parent(node);
while par
pathBros = [pathBros;setxor(tr.tree(par-numLeaves,:),me)];
path2r(par-numLeaves) = true;
me = par;
par = parent(par);
end
path2rInd=find(path2r)+numLeaves;
% new tree pointers
tr.tree = [tr.tree(~path2r,:);...
[[pathBros(end-1:-1:1);node],[pathBros(end);path2rInd(end-1:-1:1)]]];
% swapping distances in the nodes that belong to the path2r
tr.dist(pathBros(end)) = tr.dist(pathBros(end)) + tr.dist(path2rInd(end-1));
tr.dist(path2rInd(2:end-1)) = tr.dist(path2rInd(1:end-2));
tr.dist(parent(node)) = tr.dist(node) - distance;
tr.dist(node) = distance;
% some branches changed positions, need to tree
permuta = [(1:numLeaves)';find(~path2r)+numLeaves;...
path2rInd(end-1:-1:1);numLabels];
ipermuta(permuta)=1:numLabels;
tr.tree = ipermuta(tr.tree);
tr.dist = tr.dist(permuta);
tr.names = tr.names(permuta);
tr.names{end} = 'Root';
% re-order leaves for no branch crossings
tr = prettyorder(tr);
%-% ----------------------------------------------------------------
% Selects the point where the mean of the branch length is equalized
function [node,distance] = midpoint(tr)
numBranches = size(tr.tree,1);
numLeaves = numBranches + 1;
numLabels = numBranches + numLeaves;
branchWidth = ones(numLabels,1);
downDist = zeros(numLabels,1);
upDist = zeros(numLabels,1);
% cumulative distance and width downwards the tree
for ind = 1:numBranches
branchWidth(numLeaves+ind) = sum(branchWidth(tr.tree(ind,:)));
downDist(numLeaves+ind) = sum(downDist(tr.tree(ind,:)) + ...
tr.dist(tr.tree(ind,:)).*branchWidth(tr.tree(ind,:)));
end
% backpropagate distances
for ind = numBranches:-1:1
upDist(tr.tree(ind,:)) = downDist(tr.tree(ind,[2 1])) +...
upDist(ind+numLeaves) + ...
tr.dist(ind+numLeaves).*(numLeaves-branchWidth(ind+numLeaves)) + ...
tr.dist(tr.tree(ind,[2 1])).*(branchWidth(tr.tree(ind,[2 1])));
end
% find all possible midpoints, solve this eq for every edge
% ud/Nu + (x)*e = dd/Nd + (1-x)*e
% ud = cumulative upwards distances
% dd = cumulative downwards distances
% Nu = number of leaves in the upper braches
% Nd = number of leaves in the lower braches
% e = distance of current edge
h = tr.dist~=0; % root can not be in an edge which length is zero
h(numLabels) = false; % the route can not be segmented
x = inf(numLabels,1);
x(h) = (upDist(h)./branchWidth(h) - ...
downDist(h)./(numLeaves-branchWidth(h)))./tr.dist(h)/2 + 1/2;
x(h) = (upDist(h)./(numLeaves-branchWidth(h)) - ...
downDist(h)./(branchWidth(h)))./tr.dist(h)/2 + 1/2;
% find all possible roots
h = find(x>=0 & x<=1);
if isempty(h)
[dummy,h] = min(abs(x-1/2)); %#ok
end
% pick the most balanced one
[d,g]=min(abs(branchWidth(h)*2-numLeaves)); %#ok
node = h(g);
ratio = min(max(x(h(g)),0),1);
% if ratio is 1 then better pick the parent
if ratio == 1
[node,dummy] = find(tr.tree==node); %#ok
node = node + numLeaves;
ratio = 0;
end
% change the ratio (x) to the distance to the selected node
distance = ratio .* tr.dist(node);