297 lines
10 KiB
Matlab
297 lines
10 KiB
Matlab
function parallel(varargin)
|
|
% PARALLEL the main function of doing parallel classification/admixture.
|
|
% input: order of input is arbitrary. The first option is the default.
|
|
% 'datafile' - the full path of the data.
|
|
% 'mixturetype', ['mix';'codon_mix';'linear_mix';'spatial';'ad_mix'];
|
|
% - the classification/admixture model.
|
|
% 'initialk', a row vector of positive integers;
|
|
% - the initial number of clusters.
|
|
% 'fixedk',['no';'yes'];
|
|
% - whether the number of clusters is fixed during the computation.
|
|
% 'outputmat' - the full path of the output .mat file.
|
|
% 'datatype', ['numeric';'sequence';'matlab';'excel';'genepop']
|
|
% - the data format;
|
|
% 'namefile' - the full path of the population name file.
|
|
% 'indexfile' - the full path of the index file.
|
|
% 'linkagemap' - the full path of the linkage map, needed only for the
|
|
% unpreprocessed data under the linkage model.
|
|
% 'coordinatefile' - needed with the spatial model.
|
|
% 'groups', ['no';'yes'] - clustering of groups instead of individuals.
|
|
% 'groupname' - the full path of the group name file.
|
|
|
|
% Examples:
|
|
% - Linkage model:
|
|
% parallel('datafile','e:\baps4\baps_source\data\bpseudomallei.xls',...
|
|
% 'mixturetype','codon_mix','initialk','[10 15]','fixedk','no',...
|
|
% 'outputmat','E:\test_link.mat','datatype','excel')
|
|
% - Independent model:
|
|
% parallel('datafile','e:\baps4\baps_source\data\baps_data.txt',...
|
|
% 'mixturetype','mix','initialk','[10:15]','fixedk','no',...
|
|
% 'outputmat','e:\test_ind.mat','datatype','numeric');
|
|
% - Spatial model:
|
|
% parallel('datafile','e:\baps4\baps_source\data\wolverines_spatial_preprocessed.mat',...
|
|
% 'mixturetype','spatial','initialk', '[10 11]', 'fixedk','no',...
|
|
% 'outputmat','e:\test_spatial.mat','datatype','matlab');
|
|
%
|
|
%
|
|
% - Admixture model:
|
|
% parallel('datafile','e:\baps5\data\data1_mixture.mat', ...
|
|
% 'mixturetype','ad_mix', ...
|
|
% 'clusters','[1 3 5]',...
|
|
% 'iters','2',...
|
|
% 'refinds','3',...
|
|
% 'refiters','4',...
|
|
% 'outputmat','e:\baps5\data\data1_admixture_parallel.mat');
|
|
|
|
% A group of result files can be later compared by using compare.m
|
|
% function.
|
|
%-------------------------------------------------------------------------------
|
|
%- Set up options and default parameters
|
|
%-------------------------------------------------------------------------------
|
|
msgInvalidPair = '***ERROR: Bad value for argument: ''%s''';
|
|
|
|
% default options
|
|
options = struct('dataFile', '',...
|
|
'dataType', 'numeric',...
|
|
'mixtureType', 'mix',...
|
|
'initialK', 1, ...
|
|
'fixedK', 'no', ...
|
|
'nameFile', '', ...
|
|
'indexFile', '', ...
|
|
'outputMat', '', ...
|
|
'linkageMap','', ...
|
|
'coordinateFile', '', ...
|
|
'groups','no', ...
|
|
'groupname', '', ...
|
|
'clusters', '', ...
|
|
'minSize', '', ...
|
|
'iters', '', ...
|
|
'refInds', '', ...
|
|
'refIters', '' ...
|
|
);
|
|
|
|
if nargin == 1 && isstruct(varargin{1})
|
|
paramlist = [ fieldnames(varargin{1}) ...
|
|
struct2cell(varargin{1}) ]';
|
|
paramlist = { paramlist{:} };
|
|
else
|
|
if mod(nargin,2)
|
|
error('Invalid parameter/value pair arguments.');
|
|
end
|
|
paramlist = varargin;
|
|
end
|
|
|
|
optionsnames = lower(fieldnames(options));
|
|
for i=1:2:length(paramlist)
|
|
pname = paramlist{i};
|
|
pvalue = paramlist{i+1};
|
|
ind = strmatch(lower(pname),optionsnames);
|
|
if isempty(ind)
|
|
error(['Invalid parameter: ''' pname '''.']);
|
|
elseif length(ind) > 1
|
|
error(['Ambiguous parameter: ''' pname '''.']);
|
|
end
|
|
switch(optionsnames{ind})
|
|
case 'datafile'
|
|
if ischar(pvalue)
|
|
options.dataFile = pvalue;
|
|
else
|
|
error(sprintf(msgInvalidPair,pname));
|
|
end
|
|
% if ~isempty(findstr(pvalue , '.txt'))
|
|
% options.dataType = 'text';
|
|
% elseif ~isempty(findstr(pvalue, '.mat'))
|
|
% options.dataType = 'matlab';
|
|
% elseif ~isempty(findstr(pvalue, '.xls'))
|
|
% options.dataType = 'excel';
|
|
% else
|
|
% error('*** ERROR: unrecognized data format');
|
|
% end
|
|
case 'mixturetype'
|
|
if ischar(pvalue)
|
|
if ~strmatch(pvalue, strvcat('mix','ad_mix','linear_mix','codon_mix','spatical'),'exact')
|
|
error('*** ERROR: unrecoganized model type');
|
|
end
|
|
if isempty(pvalue),
|
|
options.mixtureType = '.';
|
|
else
|
|
options.mixtureType = pvalue;
|
|
end
|
|
else
|
|
error(sprintf(msgInvalidPair,pname));
|
|
end
|
|
case 'initialk'
|
|
pvalue = str2num(pvalue);
|
|
if isnumeric(pvalue)
|
|
if isempty(pvalue),
|
|
options.initialK = 0;
|
|
else
|
|
options.initialK = pvalue;
|
|
end
|
|
else
|
|
error(sprintf(msgInvalidPair,pname));
|
|
end
|
|
case 'fixedk'
|
|
if ischar(pvalue)
|
|
if isempty(pvalue),
|
|
options.fixedK = 'no';
|
|
else
|
|
options.fixedK = pvalue;
|
|
end
|
|
else
|
|
error(sprintf(msgInvalidPair,pname));
|
|
end
|
|
case 'namefile'
|
|
if ischar(pvalue)
|
|
options.nameFile = pvalue;
|
|
else
|
|
error(sprintf(msgInvalidPair,pname));
|
|
end
|
|
case 'indexfile'
|
|
if ischar(pvalue)
|
|
options.indexFile = pvalue;
|
|
else
|
|
error(sprintf(msgInvalidPair,pname));
|
|
end
|
|
case 'outputmat'
|
|
if ischar(pvalue)
|
|
options.outputMat = pvalue;
|
|
directoryName = fileparts(pvalue);
|
|
if ~exist(directoryName)
|
|
fprintf(1,'*** ERROR: Output directory ''%s'' does not exist.\n', directoryName);
|
|
return
|
|
end
|
|
else
|
|
error(sprintf(msgInvalidPair,pname));
|
|
end
|
|
case 'datatype'
|
|
if ischar(pvalue)
|
|
options.dataType = pvalue;
|
|
else
|
|
error(sprintf(msgInvalidPair,pname));
|
|
end
|
|
case 'linkagemap'
|
|
if ischar(pvalue)
|
|
options.linkageMap = pvalue;
|
|
else
|
|
error(sprintf(msgInvalidPair,pname));
|
|
end
|
|
case 'coordinatefile'
|
|
if ischar(pvalue)
|
|
options.coordinateFile = pvalue;
|
|
else
|
|
error(sprintf(msgInvalidPair,pname));
|
|
end
|
|
case 'groups'
|
|
if ischar(pvalue)
|
|
options.groups = pvalue;
|
|
else
|
|
error(sprintf(msgInvalidPair,pname));
|
|
end
|
|
case 'groupname'
|
|
if ischar(pvalue)
|
|
options.groupname = pvalue;
|
|
else
|
|
error(sprintf(msgInvalidPair,pname));
|
|
end
|
|
|
|
% the options below are for admixture analysis
|
|
case 'clusters'
|
|
if ischar(pvalue)
|
|
options.clusters = str2num(pvalue);
|
|
else
|
|
error(sprintf(msgInvalidPair,pname));
|
|
end
|
|
case 'minsize'
|
|
if ischar(pvalue)
|
|
options.minSize = str2num(pvalue);
|
|
else
|
|
error(sprintf(msgInvalidPair,pname));
|
|
end
|
|
case 'iters'
|
|
if ischar(pvalue)
|
|
options.iters = str2num(pvalue);
|
|
else
|
|
error(sprintf(msgInvalidPair,pname));
|
|
end
|
|
case 'refinds'
|
|
if ischar(pvalue)
|
|
options.refInds = str2num(pvalue);
|
|
else
|
|
error(sprintf(msgInvalidPair,pname));
|
|
end
|
|
case 'refiters'
|
|
if ischar(pvalue)
|
|
options.refIters = str2num(pvalue);
|
|
else
|
|
error(sprintf(msgInvalidPair,pname));
|
|
end
|
|
otherwise
|
|
error(['Invalid parameter: ''' pname '''.']);
|
|
end
|
|
end
|
|
|
|
% The subfunction to check syntax
|
|
if ~checkSyntax(options)
|
|
return
|
|
end
|
|
|
|
switch options.mixtureType
|
|
case 'mix'
|
|
if isequal(options.groups,'yes')
|
|
greedyPopMix_parallel(options);
|
|
else
|
|
independent_parallel(options);
|
|
end
|
|
case 'linear_mix'
|
|
linkage_parallel(options);
|
|
case 'codon_mix'
|
|
linkage_parallel(options);
|
|
case 'spatial'
|
|
if isequal(options.groups, 'yes')
|
|
spatialPopMixture_parallel(options);
|
|
else
|
|
spatial_parallel(options);
|
|
end
|
|
case 'ad_mix'
|
|
admix_parallel(options);
|
|
end
|
|
|
|
% -------------------------------------------------------------------------
|
|
% Subfunctions
|
|
% -------------------------------------------------------------------------
|
|
function isOK = checkSyntax(options)
|
|
isOK = 1;
|
|
if strcmp(options.fixedK, 'yes') && length(options.initialK)>1
|
|
display('*** ERROR: conflicting in options fixedk and initialk.');
|
|
isOK = 0;
|
|
end
|
|
|
|
if strcmp(options.mixtureType, 'mix')
|
|
if strcmp(options.dataType, 'excel') || strcmp(options.dataType,'sequence')
|
|
display('*** ERROR: unknown datatype for the independence module.');
|
|
isOK = 0;
|
|
end
|
|
end
|
|
|
|
% check the admixture parameters
|
|
admix_str = {options.clusters, options.minSize, options.iters, ...
|
|
options.refInds, options.refIters};
|
|
pt = cellfun('isempty', admix_str);
|
|
|
|
if all(pt)
|
|
if ~strcmp(options.mixtureType, 'ad_mix')
|
|
isOK = 1;
|
|
else
|
|
display('*** ERROR: problematic mixture type.');
|
|
isOK = 0;
|
|
end
|
|
end
|
|
|
|
if any(pt) && strcmp(options.mixtureType, 'ad_mix')
|
|
display('*** ERROR: incomplete admixture parameters.');
|
|
isOK = 0;
|
|
else
|
|
isOK = 1;
|
|
end
|
|
|