ourMELONS/matlab/parallel/parallel.m

298 lines
10 KiB
Mathematica
Raw Permalink Normal View History

2019-12-16 16:47:21 +01:00
function parallel(varargin)
% PARALLEL the main function of doing parallel classification/admixture.
% input: order of input is arbitrary. The first option is the default.
% 'datafile' - the full path of the data.
% 'mixturetype', ['mix';'codon_mix';'linear_mix';'spatial';'ad_mix'];
% - the classification/admixture model.
% 'initialk', a row vector of positive integers;
% - the initial number of clusters.
% 'fixedk',['no';'yes'];
% - whether the number of clusters is fixed during the computation.
% 'outputmat' - the full path of the output .mat file.
% 'datatype', ['numeric';'sequence';'matlab';'excel';'genepop']
% - the data format;
% 'namefile' - the full path of the population name file.
% 'indexfile' - the full path of the index file.
% 'linkagemap' - the full path of the linkage map, needed only for the
% unpreprocessed data under the linkage model.
% 'coordinatefile' - needed with the spatial model.
% 'groups', ['no';'yes'] - clustering of groups instead of individuals.
% 'groupname' - the full path of the group name file.
% Examples:
% - Linkage model:
% parallel('datafile','e:\baps4\baps_source\data\bpseudomallei.xls',...
% 'mixturetype','codon_mix','initialk','[10 15]','fixedk','no',...
% 'outputmat','E:\test_link.mat','datatype','excel')
% - Independent model:
% parallel('datafile','e:\baps4\baps_source\data\baps_data.txt',...
% 'mixturetype','mix','initialk','[10:15]','fixedk','no',...
% 'outputmat','e:\test_ind.mat','datatype','numeric');
% - Spatial model:
% parallel('datafile','e:\baps4\baps_source\data\wolverines_spatial_preprocessed.mat',...
% 'mixturetype','spatial','initialk', '[10 11]', 'fixedk','no',...
% 'outputmat','e:\test_spatial.mat','datatype','matlab');
%
%
% - Admixture model:
% parallel('datafile','e:\baps5\data\data1_mixture.mat', ...
% 'mixturetype','ad_mix', ...
% 'clusters','[1 3 5]',...
% 'iters','2',...
% 'refinds','3',...
% 'refiters','4',...
% 'outputmat','e:\baps5\data\data1_admixture_parallel.mat');
% A group of result files can be later compared by using compare.m
% function.
%-------------------------------------------------------------------------------
%- Set up options and default parameters
%-------------------------------------------------------------------------------
msgInvalidPair = '***ERROR: Bad value for argument: ''%s''';
% default options
options = struct('dataFile', '',...
'dataType', 'numeric',...
'mixtureType', 'mix',...
'initialK', 1, ...
'fixedK', 'no', ...
'nameFile', '', ...
'indexFile', '', ...
'outputMat', '', ...
'linkageMap','', ...
'coordinateFile', '', ...
'groups','no', ...
'groupname', '', ...
'clusters', '', ...
'minSize', '', ...
'iters', '', ...
'refInds', '', ...
'refIters', '' ...
);
if nargin == 1 && isstruct(varargin{1})
paramlist = [ fieldnames(varargin{1}) ...
struct2cell(varargin{1}) ]';
paramlist = { paramlist{:} };
else
if mod(nargin,2)
error('Invalid parameter/value pair arguments.');
end
paramlist = varargin;
end
optionsnames = lower(fieldnames(options));
for i=1:2:length(paramlist)
pname = paramlist{i};
pvalue = paramlist{i+1};
ind = strmatch(lower(pname),optionsnames);
if isempty(ind)
error(['Invalid parameter: ''' pname '''.']);
elseif length(ind) > 1
error(['Ambiguous parameter: ''' pname '''.']);
end
switch(optionsnames{ind})
case 'datafile'
if ischar(pvalue)
options.dataFile = pvalue;
else
error(sprintf(msgInvalidPair,pname));
end
% if ~isempty(findstr(pvalue , '.txt'))
% options.dataType = 'text';
% elseif ~isempty(findstr(pvalue, '.mat'))
% options.dataType = 'matlab';
% elseif ~isempty(findstr(pvalue, '.xls'))
% options.dataType = 'excel';
% else
% error('*** ERROR: unrecognized data format');
% end
case 'mixturetype'
if ischar(pvalue)
if ~strmatch(pvalue, strvcat('mix','ad_mix','linear_mix','codon_mix','spatical'),'exact')
error('*** ERROR: unrecoganized model type');
end
if isempty(pvalue),
options.mixtureType = '.';
else
options.mixtureType = pvalue;
end
else
error(sprintf(msgInvalidPair,pname));
end
case 'initialk'
pvalue = str2num(pvalue);
if isnumeric(pvalue)
if isempty(pvalue),
options.initialK = 0;
else
options.initialK = pvalue;
end
else
error(sprintf(msgInvalidPair,pname));
end
case 'fixedk'
if ischar(pvalue)
if isempty(pvalue),
options.fixedK = 'no';
else
options.fixedK = pvalue;
end
else
error(sprintf(msgInvalidPair,pname));
end
case 'namefile'
if ischar(pvalue)
options.nameFile = pvalue;
else
error(sprintf(msgInvalidPair,pname));
end
case 'indexfile'
if ischar(pvalue)
options.indexFile = pvalue;
else
error(sprintf(msgInvalidPair,pname));
end
case 'outputmat'
if ischar(pvalue)
options.outputMat = pvalue;
directoryName = fileparts(pvalue);
if ~exist(directoryName)
fprintf(1,'*** ERROR: Output directory ''%s'' does not exist.\n', directoryName);
return
end
else
error(sprintf(msgInvalidPair,pname));
end
case 'datatype'
if ischar(pvalue)
options.dataType = pvalue;
else
error(sprintf(msgInvalidPair,pname));
end
case 'linkagemap'
if ischar(pvalue)
options.linkageMap = pvalue;
else
error(sprintf(msgInvalidPair,pname));
end
case 'coordinatefile'
if ischar(pvalue)
options.coordinateFile = pvalue;
else
error(sprintf(msgInvalidPair,pname));
end
case 'groups'
if ischar(pvalue)
options.groups = pvalue;
else
error(sprintf(msgInvalidPair,pname));
end
case 'groupname'
if ischar(pvalue)
options.groupname = pvalue;
else
error(sprintf(msgInvalidPair,pname));
end
% the options below are for admixture analysis
case 'clusters'
if ischar(pvalue)
options.clusters = str2num(pvalue);
else
error(sprintf(msgInvalidPair,pname));
end
case 'minsize'
if ischar(pvalue)
options.minSize = str2num(pvalue);
else
error(sprintf(msgInvalidPair,pname));
end
case 'iters'
if ischar(pvalue)
options.iters = str2num(pvalue);
else
error(sprintf(msgInvalidPair,pname));
end
case 'refinds'
if ischar(pvalue)
options.refInds = str2num(pvalue);
else
error(sprintf(msgInvalidPair,pname));
end
case 'refiters'
if ischar(pvalue)
options.refIters = str2num(pvalue);
else
error(sprintf(msgInvalidPair,pname));
end
otherwise
error(['Invalid parameter: ''' pname '''.']);
end
end
% The subfunction to check syntax
if ~checkSyntax(options)
return
end
switch options.mixtureType
case 'mix'
if isequal(options.groups,'yes')
greedyPopMix_parallel(options);
else
independent_parallel(options);
end
case 'linear_mix'
linkage_parallel(options);
case 'codon_mix'
linkage_parallel(options);
case 'spatial'
if isequal(options.groups, 'yes')
spatialPopMixture_parallel(options);
else
spatial_parallel(options);
end
case 'ad_mix'
admix_parallel(options);
end
% -------------------------------------------------------------------------
% Subfunctions
% -------------------------------------------------------------------------
function isOK = checkSyntax(options)
isOK = 1;
if strcmp(options.fixedK, 'yes') && length(options.initialK)>1
display('*** ERROR: conflicting in options fixedk and initialk.');
isOK = 0;
end
if strcmp(options.mixtureType, 'mix')
if strcmp(options.dataType, 'excel') || strcmp(options.dataType,'sequence')
display('*** ERROR: unknown datatype for the independence module.');
isOK = 0;
end
end
% check the admixture parameters
admix_str = {options.clusters, options.minSize, options.iters, ...
options.refInds, options.refIters};
pt = cellfun('isempty', admix_str);
if all(pt)
if ~strcmp(options.mixtureType, 'ad_mix')
isOK = 1;
else
display('*** ERROR: problematic mixture type.');
isOK = 0;
end
end
if any(pt) && strcmp(options.mixtureType, 'ad_mix')
display('*** ERROR: incomplete admixture parameters.');
isOK = 0;
else
isOK = 1;
end