Added source Matlab code for reference

This commit is contained in:
Waldir Leoncio 2019-12-16 16:47:21 +01:00
parent b8af977117
commit b5d99903d2
186 changed files with 61405 additions and 1 deletions

View file

@ -0,0 +1,776 @@
function admix_parallel(options)
% ADMIX_PARALLEL is the command line version of the baps partition with
% admixture models.
% Input: options is a struct generated by parallel.m
%--------------------------------------------------------------------------
%- Syntax check out
%--------------------------------------------------------------------------
rand('state',0); % used for debugging
outp = [options.outputMat '.txt'];
inp = options.dataFile;
clusters = options.clusters;
fprintf(1,'Parallel computing...\n');
fprintf(1,'Admixture analysis for cluster(s): %s.\n',num2str(clusters));
global COUNTS; global PARTITION; global SUMCOUNTS;
clearGlobalVars;
struct_array = load(options.dataFile);
if isfield(struct_array,'c') %Matlab versio
c = struct_array.c;
if ~isfield(c,'PARTITION') | ~isfield(c,'rowsFromInd')
disp('*** ERROR: Incorrect data format');
return
end
elseif isfield(struct_array,'PARTITION') %Mideva versio
c = struct_array;
if ~isfield(c,'rowsFromInd')
disp('*** ERROR: Incorrect data format');
return
end
else
disp('*** ERROR: Incorrect data format');
return;
end
if isfield(c, 'gene_lengths') && ...
(strcmp(c.mixtureType,'linear_mix') | ...
strcmp(c.mixtureType,'codon_mix')) % if the mixture is from a linkage model
% Redirect the call to the linkage admixture function.
fprintf(1,'Redirecting to Linkage Model Admixture\n');
c.data = noIndex(c.data,c.noalle); % call function noindex to remove the index column
linkage_admix_parallel(c,options);
return
end
% This section is disabled, -Jing 27.10.2009
% if isfield(c, 'gene_lengths') && ...
% (strcmp(c.mixtureType,'linkage_mix') | ...
% strcmp(c.mixtureType,'codon_mix')) % if the mixture is from a linkage model
% % Redirect the call to the linkage admixture function.
% c.data = noIndex(c.data,c.noalle); % call function noindex to remove the index column
% linkage_admix(c);
% return
% end
PARTITION = c.PARTITION; COUNTS = c.COUNTS; SUMCOUNTS = c.SUMCOUNTS;
alleleCodes = c.alleleCodes; adjprior = c.adjprior; popnames = c.popnames;
rowsFromInd = c.rowsFromInd; data = c.data; npops = c.npops; noalle = c.noalle;
% answers = inputdlg({['Input the minimum size of a population that will'...
% ' be taken into account when admixture is estimated.']},...
% 'Input minimum population size',[1],...
% {'5'});
% if isempty(answers) return; end
% -------------------------------------------
% NEW: for parallel computing
% -------------------------------------------
alaRaja = options.minSize;
[npops, clusterIndex] = poistaLiianPienet(npops, rowsFromInd, alaRaja);
if length(clusterIndex)<length(clusters)
disp('*** ERROR: error in cluster labels.');
return
end
clusters = clusterIndex(clusters); % after removing outlier clusters
nloci = size(COUNTS,2);
ninds = size(data,1)/rowsFromInd;
% answers = inputdlg({['Input number of iterations']},'Input',[1],{'50'});
% if isempty(answers) return; end
iterationCount = options.iters;
% answers = inputdlg({['Input number of reference individuals from each population']},'Input',[1],{'50'});
% if isempty(answers) nrefIndsInPop = 50;
% else nrefIndsInPop = str2num(answers{1,1});
% end
nrefIndsInPop = options.refInds;
% answers = inputdlg({['Input number of iterations for reference individuals']},'Input',[1],{'10'});
% if isempty(answers) return; end
% iterationCountRef = str2num(answers{1,1});
iterationCountRef = options.refIters;
% First calculate log-likelihood ratio for all individuals:
likelihood = zeros(ninds,1);
allfreqs = computeAllFreqs2(noalle);
for ind = 1:ninds
omaFreqs = computePersonalAllFreqs(ind, data, allfreqs, rowsFromInd);
osuusTaulu = zeros(1,npops);
if PARTITION(ind)==0
% Yksil?on outlier
elseif PARTITION(ind)~=0
if PARTITION(ind)>0
osuusTaulu(PARTITION(ind)) = 1;
else
% Yksilöt, joita ei ole sijoitettu mihinkään koriin.
arvot = zeros(1,npops);
for q=1:npops
osuusTaulu = zeros(1,npops);
osuusTaulu(q) = 1;
arvot(q) = computeIndLogml(omaFreqs, osuusTaulu);
end
[iso_arvo, isoimman_indeksi] = max(arvot);
osuusTaulu = zeros(1,npops);
osuusTaulu(isoimman_indeksi) = 1;
PARTITION(ind)=isoimman_indeksi;
end
logml = computeIndLogml(omaFreqs, osuusTaulu);
logmlAlku = logml;
for osuus = [0.5 0.25 0.05 0.01]
[osuusTaulu, logml] = etsiParas(osuus, osuusTaulu, omaFreqs, logml);
end
logmlLoppu = logml;
likelihood(ind) = logmlLoppu-logmlAlku;
end
end
% Analyze further only individuals who have log-likelihood ratio larger than 3:
% ---------------------------------------
% PARALLEL COMPUTING IMPLEMENTED HERE
% ---------------------------------------
to_investigate = (find(likelihood>3))';
admix_populaatiot = unique(PARTITION(to_investigate));
validCluster = intersect(clusters, admix_populaatiot); % for the chosen clusters
ix = zeros(length(to_investigate),1);
for i = 1:length(validCluster)
ix = ix | (PARTITION(to_investigate) == validCluster(i));
end
if ~any(ix) to_investigate = [];
else to_investigate = to_investigate(ix);
end
admix_populaatiot = unique(PARTITION(to_investigate));
disp('Possibly admixed individuals in the chosen clusters: ');
if isempty(to_investigate)
disp('none');
disp('Admixture analysis terminated.');
return
else
for i = 1:length(to_investigate)
disp(num2str(to_investigate(i)));
end
end
disp(' ');
disp('clusters for possibly admixed individuals: ');
for i = 1:length(admix_populaatiot)
disp(num2str(admix_populaatiot(i)));
end
% THUS, there are two types of individuals, who will not be analyzed with
% simulated allele frequencies: those who belonged to a mini-population
% which was removed, and those who have log-likelihood ratio less than 3.
% The value in the PARTITION for the first kind of individuals is 0. The
% second kind of individuals can be identified, because they do not
% belong to "to_investigate" array. When the results are presented, the
% first kind of individuals are omitted completely, while the second kind
% of individuals are completely put to the population, where they ended up
% in the mixture analysis. These second type of individuals will have a
% unit p-value.
% Simulate allele frequencies a given number of times and save the average
% result to "proportionsIt" array.
proportionsIt = zeros(ninds,npops);
for iterationNum = 1:iterationCount
disp(['Iter: ' num2str(iterationNum)]);
allfreqs = simulateAllFreqs(noalle); % Allele frequencies on this iteration.
for ind=to_investigate
%disp(num2str(ind));
omaFreqs = computePersonalAllFreqs(ind, data, allfreqs, rowsFromInd);
osuusTaulu = zeros(1,npops);
if PARTITION(ind)==0
% Yksil?on outlier
elseif PARTITION(ind)~=0
if PARTITION(ind)>0
osuusTaulu(PARTITION(ind)) = 1;
else
% Yksilöt, joita ei ole sijoitettu mihinkään koriin.
arvot = zeros(1,npops);
for q=1:npops
osuusTaulu = zeros(1,npops);
osuusTaulu(q) = 1;
arvot(q) = computeIndLogml(omaFreqs, osuusTaulu);
end
[iso_arvo, isoimman_indeksi] = max(arvot);
osuusTaulu = zeros(1,npops);
osuusTaulu(isoimman_indeksi) = 1;
PARTITION(ind)=isoimman_indeksi;
end
logml = computeIndLogml(omaFreqs, osuusTaulu);
for osuus = [0.5 0.25 0.05 0.01]
[osuusTaulu, logml] = etsiParas(osuus, osuusTaulu, omaFreqs, logml);
end
end
proportionsIt(ind,:) = proportionsIt(ind,:).*(iterationNum-1) + osuusTaulu;
proportionsIt(ind,:) = proportionsIt(ind,:)./iterationNum;
end
end
%disp(['Creating ' num2str(nrefIndsInPop) ' reference individuals from ']);
%disp('each population.');
%allfreqs = simulateAllFreqs(noalle); % Simuloidaan alleelifrekvenssisetti
allfreqs = computeAllFreqs2(noalle); % Koitetaan tällaista.
% Initialize the data structures, which are required in taking the missing
% data into account:
n_missing_levels = zeros(npops,1); % number of different levels of "missingness" in each pop (max 3).
missing_levels = zeros(npops,3); % the mean values for different levels.
missing_level_partition = zeros(ninds,1); % level of each individual (one of the levels of its population).
for i=1:npops
inds = find(PARTITION==i);
% Proportions of non-missing data for the individuals:
non_missing_data = zeros(length(inds),1);
for j = 1:length(inds)
ind = inds(j);
non_missing_data(j) = length(find(data((ind-1)*rowsFromInd+1:ind*rowsFromInd,:)>0)) ./ (rowsFromInd*nloci);
end
if all(non_missing_data>0.9)
n_missing_levels(i) = 1;
missing_levels(i,1) = mean(non_missing_data);
missing_level_partition(inds) = 1;
else
[ordered, ordering] = sort(non_missing_data);
%part = learn_simple_partition(ordered, 0.05);
part = learn_partition_modified(ordered);
aux = sortrows([part ordering],2);
part = aux(:,1);
missing_level_partition(inds) = part;
n_levels = length(unique(part));
n_missing_levels(i) = n_levels;
for j=1:n_levels
missing_levels(i,j) = mean(non_missing_data(find(part==j)));
end
end
end
% Create and analyse reference individuals for populations
% with potentially admixed individuals:
refTaulu = zeros(npops,100,3);
for pop = admix_populaatiot'
for level = 1:n_missing_levels(pop)
potential_inds_in_this_pop_and_level = ...
find(PARTITION==pop & missing_level_partition==level &...
likelihood>3); % Potential admix individuals here.
if ~isempty(potential_inds_in_this_pop_and_level)
%refData = simulateIndividuals(nrefIndsInPop,rowsFromInd,allfreqs);
refData = simulateIndividuals(nrefIndsInPop, rowsFromInd, allfreqs, ...
pop, missing_levels(pop,level));
disp(['Analysing the reference individuals from pop ' num2str(pop) ' (level ' num2str(level) ').']);
refProportions = zeros(nrefIndsInPop,npops);
for iter = 1:iterationCountRef
%disp(['Iter: ' num2str(iter)]);
allfreqs = simulateAllFreqs(noalle);
for ind = 1:nrefIndsInPop
omaFreqs = computePersonalAllFreqs(ind, refData, allfreqs, rowsFromInd);
osuusTaulu = zeros(1,npops);
osuusTaulu(pop)=1;
logml = computeIndLogml(omaFreqs, osuusTaulu);
for osuus = [0.5 0.25 0.05 0.01]
[osuusTaulu, logml] = etsiParas(osuus, osuusTaulu, omaFreqs, logml);
end
refProportions(ind,:) = refProportions(ind,:).*(iter-1) + osuusTaulu;
refProportions(ind,:) = refProportions(ind,:)./iter;
end
end
for ind = 1:nrefIndsInPop
omanOsuus = refProportions(ind,pop);
if round(omanOsuus*100)==0
omanOsuus = 0.01;
end
if abs(omanOsuus)<1e-5
omanOsuus = 0.01;
end
refTaulu(pop, round(omanOsuus*100),level) = refTaulu(pop, round(omanOsuus*100),level)+1;
end
end
end
end
% Rounding of the results:
proportionsIt = proportionsIt.*100; proportionsIt = round(proportionsIt);
proportionsIt = proportionsIt./100;
for ind = 1:ninds
if ~any(to_investigate==ind)
if PARTITION(ind)>0
proportionsIt(ind,PARTITION(ind))=1;
end
else
% In case of a rounding error, the sum is made equal to unity by
% fixing the largest value.
if (PARTITION(ind)>0) & (sum(proportionsIt(ind,:)) ~= 1)
[isoin,indeksi] = max(proportionsIt(ind,:));
erotus = sum(proportionsIt(ind,:))-1;
proportionsIt(ind,indeksi) = isoin-erotus;
end
end
end
% Calculate p-value for each individual:
uskottavuus = zeros(ninds,1);
for ind = 1:ninds
pop = PARTITION(ind);
if pop==0 % Individual is outlier
uskottavuus(ind)=1;
elseif isempty(find(to_investigate==ind))
% Individual had log-likelihood ratio<3
uskottavuus(ind)=1;
else
omanOsuus = proportionsIt(ind,pop);
if abs(omanOsuus)<1e-5
omanOsuus = 0.01;
end
if round(omanOsuus*100)==0
omanOsuus = 0.01;
end
level = missing_level_partition(ind);
refPienempia = sum(refTaulu(pop, 1:round(100*omanOsuus), level));
uskottavuus(ind) = refPienempia / nrefIndsInPop;
end
end
% tulostaAdmixtureTiedot(proportionsIt, uskottavuus, alaRaja, iterationCount);
% viewPartition(proportionsIt, popnames);
c.proportionsIt = proportionsIt;
c.pvalue = uskottavuus; % Added by Jing
c.mixtureType = 'admix'; % Jing
c.admixnpops = npops;
c.clusters = clusters; % added for parallel computing
c.minsize = alaRaja;
c.iters = iterationCount;
c.refInds = nrefIndsInPop;
c.refIters = iterationCountRef;
fprintf(1,'Saving the result...')
try
% save(options.outputMat, 'c');
save(options.outputMat, 'c','-v7.3'); % added by Lu Cheng, 08.06.2012
fprintf(1,'Finished.\n');
catch
display('*** ERROR in saving the result.');
end
%----------------------------------------------------------------------------
function [npops, clusterIndex] = poistaLiianPienet(npops, rowsFromInd, alaraja)
% Muokkaa tulokset muotoon, jossa outlier yksilöt on
% poistettu. Tarkalleen ottaen poistaa ne populaatiot,
% joissa on vähemmän kuin 'alaraja':n verran yksilöit?
% NEW: clusterIndex is output for parallel computing. - Jing
global PARTITION;
global COUNTS;
global SUMCOUNTS;
clusterIndex = [1:npops]';
popSize=zeros(1,npops);
for i=1:npops
popSize(i)=length(find(PARTITION==i));
end
miniPops = find(popSize<alaraja);
if length(miniPops)==0
return;
end
outliers = [];
for pop = miniPops
inds = find(PARTITION==pop);
disp('Removed individuals: ');
disp(num2str(inds));
outliers = [outliers; inds];
clusterIndex(pop) = 0;
end
ninds = length(PARTITION);
PARTITION(outliers) = 0;
korit = unique(PARTITION(find(PARTITION>0)));
for n=1:length(korit)
kori = korit(n);
yksilot = find(PARTITION==kori);
PARTITION(yksilot) = n;
clusterIndex(kori) = n;
end
COUNTS(:,:,miniPops) = [];
SUMCOUNTS(miniPops,:) = [];
npops = npops-length(miniPops);
%------------------------------------------------------------------------
function clearGlobalVars
global COUNTS; COUNTS = [];
global SUMCOUNTS; SUMCOUNTS = [];
global PARTITION; PARTITION = [];
global POP_LOGML; POP_LOGML = [];
%--------------------------------------------------------
function allFreqs = computeAllFreqs2(noalle)
% Lisää a priori jokaista alleelia
% joka populaation joka lokukseen j 1/noalle(j) verran.
global COUNTS;
global SUMCOUNTS;
max_noalle = size(COUNTS,1);
nloci = size(COUNTS,2);
npops = size(COUNTS,3);
sumCounts = SUMCOUNTS+ones(size(SUMCOUNTS));
sumCounts = reshape(sumCounts', [1, nloci, npops]);
sumCounts = repmat(sumCounts, [max_noalle, 1 1]);
prioriAlleelit = zeros(max_noalle,nloci);
for j=1:nloci
prioriAlleelit(1:noalle(j),j) = 1/noalle(j);
end
prioriAlleelit = repmat(prioriAlleelit, [1,1,npops]);
counts = COUNTS + prioriAlleelit;
allFreqs = counts./sumCounts;
function allfreqs = simulateAllFreqs(noalle)
% Lisää jokaista alleelia joka populaation joka lokukseen j 1/noalle(j)
% verran. Näin saatuja counts:eja vastaavista Dirichlet-jakaumista
% simuloidaan arvot populaatioiden alleelifrekvensseille.
global COUNTS;
max_noalle = size(COUNTS,1);
nloci = size(COUNTS,2);
npops = size(COUNTS,3);
prioriAlleelit = zeros(max_noalle,nloci);
for j=1:nloci
prioriAlleelit(1:noalle(j),j) = 1/noalle(j);
end
prioriAlleelit = repmat(prioriAlleelit, [1,1,npops]);
counts = COUNTS + prioriAlleelit;
allfreqs = zeros(size(counts));
for i=1:npops
for j=1:nloci
simuloidut = randdir(counts(1:noalle(j),j,i) , noalle(j));
allfreqs(1:noalle(j),j,i) = simuloidut;
end
end
%--------------------------------------------------------------------------
function refData = simulateIndividuals(n,rowsFromInd,allfreqs,pop, missing_level)
% simulate n individuals from population pop, such that approximately
% proportion "missing_level" of the alleles are present.
nloci = size(allfreqs,2);
refData = zeros(n*rowsFromInd,nloci);
counter = 1; % which row will be generated next.
for ind = 1:n
for loc = 1:nloci
for k=0:rowsFromInd-1
if rand<missing_level
refData(counter+k,loc) = simuloiAlleeli(allfreqs,pop,loc);
else
refData(counter+k,loc) = -999;
end
end
end
counter = counter+rowsFromInd;
end
function all = simuloiAlleeli(allfreqs,pop,loc)
% Simuloi populaation pop lokukseen loc alleelin.
freqs = allfreqs(:,loc,pop);
cumsumma = cumsum(freqs);
arvo = rand;
isommat = find(cumsumma>arvo);
all = min(isommat);
%--------------------------------------------------------------------------
function omaFreqs = computePersonalAllFreqs(ind, data, allFreqs, rowsFromInd)
% Laskee npops*(rowsFromInd*nloci) taulukon, jonka kutakin saraketta
% vastaa yksilön ind alleeli. Eri rivit ovat alleelin alkuperäfrekvenssit
% eri populaatioissa. Jos yksilölt?puuttuu jokin alleeli, niin vastaavaan
% kohtaa tulee sarake ykkösi?
global COUNTS;
nloci = size(COUNTS,2);
npops = size(COUNTS,3);
rows = data(computeRows(rowsFromInd, ind, 1),:);
omaFreqs = zeros(npops, (rowsFromInd*nloci));
pointer = 1;
for loc=1:size(rows,2)
for all=1:size(rows,1)
if rows(all,loc)>=0
try,
omaFreqs(:,pointer) = ...
reshape(allFreqs(rows(all,loc),loc,:), [npops,1]);
catch
a=0;
end
else
omaFreqs(:,pointer) = ones(npops,1);
end
pointer = pointer+1;
end
end
%---------------------------------------------------------------------------
function loggis = computeIndLogml(omaFreqs, osuusTaulu)
% Palauttaa yksilön logml:n, kun oletetaan yksilön alkuperät
% määritellyiksi kuten osuusTaulu:ssa.
apu = repmat(osuusTaulu', [1 size(omaFreqs,2)]);
apu = apu .* omaFreqs;
apu = sum(apu);
apu = log(apu);
loggis = sum(apu);
%--------------------------------------------------------------------------
function osuusTaulu = suoritaMuutos(osuusTaulu, osuus, indeksi)
% Päivittää osuusTaulun muutoksen jälkeen.
global COUNTS;
npops = size(COUNTS,3);
i1 = rem(indeksi,npops);
if i1==0, i1 = npops; end;
i2 = ceil(indeksi / npops);
osuusTaulu(i1) = osuusTaulu(i1)-osuus;
osuusTaulu(i2) = osuusTaulu(i2)+osuus;
%-------------------------------------------------------------------------
function [osuusTaulu, logml] = etsiParas(osuus, osuusTaulu, omaFreqs, logml)
ready = 0;
while ready ~= 1
muutokset = laskeMuutokset4(osuus, osuusTaulu, omaFreqs, logml);
[maxMuutos, indeksi] = max(muutokset(1:end));
if maxMuutos>0
osuusTaulu = suoritaMuutos(osuusTaulu, osuus, indeksi);
logml = logml + maxMuutos;
else
ready = 1;
end
end
%---------------------------------------------------------------------------
function muutokset = laskeMuutokset4(osuus, osuusTaulu, omaFreqs, logml)
% Palauttaa npops*npops taulun, jonka alkio (i,j) kertoo, mik?on
% muutos logml:ss? mikäli populaatiosta i siirretään osuuden verran
% todennäköisyysmassaa populaatioon j. Mikäli populaatiossa i ei ole
% mitään siirrettävää, on vastaavassa kohdassa rivi nollia.
global COUNTS;
npops = size(COUNTS,3);
notEmpty = find(osuusTaulu>0.005);
muutokset = zeros(npops);
empties = ~notEmpty;
for i1=notEmpty
osuusTaulu(i1) = osuusTaulu(i1)-osuus;
for i2 = [1:i1-1 i1+1:npops]
osuusTaulu(i2) = osuusTaulu(i2)+osuus;
loggis = computeIndLogml(omaFreqs, osuusTaulu);
muutokset(i1,i2) = loggis-logml;
osuusTaulu(i2) = osuusTaulu(i2)-osuus;
end
osuusTaulu(i1) = osuusTaulu(i1)+osuus;
end
function g=randga(a,b)
flag = 0;
if a>1
c1 = a-1; c2 = (a-(1/(6*a)))/c1; c3 = 2/c1; c4 = c3+2; c5 = 1/sqrt(a);
U1=-1;
while flag == 0,
if a<=2.5,
U1=rand;U2=rand;
else
while ~(U1>0 & U1<1),
U1=rand;U2=rand;
U1 = U2 + c5*(1-1.86*U1);
end %while
end %if
W = c2*U2/U1;
if c3*U1+W+(1/W)<=c4,
flag = 1;
g = c1*W/b;
elseif c3*log(U1)-log(W)+W<1,
flag = 1;
g = c1*W/b;
else
U1=-1;
end %if
end %while flag
elseif a==1
g=sum(-(1/b)*log(rand(a,1)));
else
while flag == 0,
U = rand(2,1);
if U(1)>exp(1)/(a+exp(1)),
g = -log(((a+exp(1))*(1-U(1)))/(a*exp(1)));
if U(2)<=g^(a-1),
flag = 1;
end %if
else
g = ((a+exp(1))*U(1)/((exp(1))^(1/a)));
if U(2)<=exp(-g),
flag = 1;
end %if
end %if
end %while flag
g=g/b;
end %if;
%-------------------------------------------------
function svar=randdir(counts,nc)
% Käyttöesim randdir([10;30;60],3)
svar=zeros(nc,1);
for i=1:nc
svar(i,1)=randga(counts(i,1),1);
end
svar=svar/sum(svar);
%-------------------------------------------------------------------------------------
function rows = computeRows(rowsFromInd, inds, ninds)
% Individuals inds have been given. The function returns a vector,
% containing the indices of the rows, which contain data from the
% individuals.
rows = inds(:, ones(1,rowsFromInd));
rows = rows*rowsFromInd;
miinus = repmat(rowsFromInd-1 : -1 : 0, [ninds 1]);
rows = rows - miinus;
rows = reshape(rows', [1,rowsFromInd*ninds]);
%--------------------------------------------------------------------------
%-----
function str = ownNum2Str(number)
absolute = abs(number);
if absolute < 1000
str = num2str(number);
elseif absolute < 10000000
first_three = rem(number,1000);
next_four = (number - first_three) /1000;
first_three = abs(first_three);
if first_three<10
first_three = ['00' num2str(first_three)];
elseif first_three<100
first_three = ['0' num2str(first_three)];
else
first_three = num2str(first_three);
end;
str = [num2str(next_four) first_three];
elseif absolute < 100000000
first_four = rem(number,10000);
next_four = (number - first_four) /10000;
first_four = abs(first_four);
if first_four<10
first_four = ['000' num2str(first_four)];
elseif first_four<100
first_four = ['00' num2str(first_four)];
elseif first_four<1000
first_four = ['0' num2str(first_four)];
else
first_four = num2str(first_four);
end;
str = [num2str(next_four) first_four];
else
str = num2str(number);
end;
%------------------------------------------------
function part = learn_partition_modified(ordered)
% This function is called only if some individual has less than 90 per cent
% non-missing data. The function uses fuzzy clustering for the "non-missingness"
% values, finding maximum three clusters. If two of the found clusters are such
% that all the values are >0.9, then those two are further combined.
part = learn_simple_partition(ordered,0.05);
nclust = length(unique(part));
if nclust==3
mini_1 = min(ordered(find(part==1)));
mini_2 = min(ordered(find(part==2)));
mini_3 = min(ordered(find(part==3)));
if mini_1>0.9 & mini_2>0.9
part(find(part==2)) = 1;
part(find(part==3)) = 2;
elseif mini_1>0.9 & mini_3>0.9
part(find(part==3)) = 1;
elseif mini_2>0.9 & mini_3>0.9
% This is the one happening in practice, since the values are
% ordered, leading to mini_1 <= mini_2 <= mini_3
part(find(part==3)) = 2;
end
end

81
matlab/parallel/compare.m Normal file
View file

@ -0,0 +1,81 @@
function compare(varargin)
% COMPARE compares the results from multiple runs.
% input: is a group of result .mat files on the same data.
% Example: compare('e:\data\result1.mat','e:\data\result2.mat',...)
% or call it from the BAPS menu.
if nargin == 1
error('number of input arguments must be >=2');
end
if nargin == 0
out = uipickfiles('FilterSpec','*.mat',...
'Prompt','Select mixture results: be sure that the underlying data and models are consistent.');
if isnumeric(out)
return
end
nfiles = length(out);
filesin = out;
else
nfiles = nargin;
filesin = varargin;
end
display('---------------------------------------------------');
fprintf(1,'Comparing results ...\n');
if nfiles == 1
disp('*** ERROR: Too few files.');
return
end
for i = 1:nfiles
struct_array = load(filesin{i});
if isfield(struct_array,'c') %Matlab versio
c = struct_array.c;
clear struct_array;
if ~isfield(c,'PARTITION') || ~isfield(c,'rowsFromInd') || strcmp(c.mixtureType,'admix')
fprintf(1,'*** ERROR: Incorrect mixture result in file %d\n',i );
return
end
elseif isfield(struct_array,'PARTITION') %Mideva versio
c = struct_array;
if ~isfield(c,'rowsFromInd')
fprintf(1,'*** ERROR: Incorrect mixture result in file %d\n',i );
return
end
else
fprintf(1,'*** ERROR: Incorrect mixture result in file %d\n',i );
return;
end
try
partitionMat(i,:) = sort_partition(c.PARTITION);
catch
error('*** ERROR: inconsistent results.');
end
mixtureType{i} = c.mixtureType;
logml(i) = c.logml;
clear c;
end
len_mixture = length(mixtureType{1});
for i = 2:nfiles
if len_mixture ~= length(mixtureType{i});
error('*** ERROR: inconsistent mixture types.');
end
end
% Find the best partition
best = logical(logml == max(logml));
[uniquepartition, ind1, ind2] = unique(partitionMat(best,:), 'rows');
fprintf(1,'Best partition was found at ''%s''\n',filesin{best});

View file

@ -0,0 +1,142 @@
function compare_admix(varargin)
% COMPARE compares the results from multiple runs for admixture results
% input: is a group of result .mat files on the same data.
% Example: compare('e:\data\result1.mat','e:\data\result2.mat',...)
% or call it from the BAPS menu.
if nargin == 1
error('number of input arguments must be >=2');
end
if nargin == 0
out = uipickfiles('FilterSpec','*.mat',...
'Prompt','Select admixture results: be sure that the underlying data and parameters are consistent.');
if isnumeric(out)
return
end
nfiles = length(out);
filesin = out;
else
nfiles = nargin;
filesin = varargin;
end
display('---------------------------------------------------');
fprintf(1,'Comparing results ...\n');
minsize = zeros(nfiles,1);
iters = zeros(nfiles,1);
refInds = zeros(nfiles,1);
refIters = zeros(nfiles,1);
prop = cell(nfiles,1);
clusters = cell(nfiles,1);
adjprior = [];
if nfiles == 1
disp('*** ERROR: Too few files.');
return
end
% read admixture files
for i = 1:nfiles
struct_array = load(filesin{i});
if isfield(struct_array,'c') %Matlab versio
c = struct_array.c;
clear struct_array;
if ~isfield(c,'PARTITION') || ~isfield(c,'rowsFromInd') ...
|| ~isfield(c,'proportionsIt')
fprintf(1,'*** ERROR: Incorrect admixture result in file %d\n',i );
return
end
elseif isfield(struct_array,'PARTITION') %Mideva versio
c = struct_array;
if ~isfield(c,'rowsFromInd')
fprintf(1,'*** ERROR: Incorrect admixture result in file %d\n',i );
return
end
else
fprintf(1,'*** ERROR: Incorrect admixture result in file %d\n',i );
return;
end
prop{i} = c.proportionsIt;
pvalue(:,i) = c.pvalue;
clusters{i} = c.clusters;
popnames = c.popnames;
% parameters
minsize(i) = c.minsize;
iters(i) = c.iters;
refInds(i) = c.refInds;
refIters(i) = c.refIters;
if i==1
adjprior = c.adjprior;
else
if ~isequal(adjprior,c.adjprior)
disp('*** ERROR: incosistent admixture results.');
return
end
end
clear c;
end
if length(unique(minsize))~=1 || length(unique(iters))~=1 ...
|| length(unique(refInds))~=1 || length(unique(refIters))~=1
disp('*** ERROR: inconsistent admixture parameters.');
return
end
% now combine the results
prop_combine = prop{1};
[ninds npops] = size(prop_combine);
[pvalue_combine,index] = min(pvalue,[],2);
for i = 1:ninds
prop_combine(i,:) = prop{index(i)}(i,:);
end
% display the results
tulostaAdmixtureTiedot(prop_combine, pvalue_combine, minsize(1), iters(1));
viewPartition(prop_combine, popnames);
% save the results
talle = questdlg(['Do you want to save the combined admixture results?'], ...
'Save results?','Yes','No','Yes');
if isequal(talle,'Yes')
%waitALittle;
[filename, pathname] = uiputfile('*.mat','Save results as');
if (filename == 0) & (pathname == 0)
% Cancel was pressed
return
else % copy 'baps4_output.baps' into the text file with the same name.
if exist('baps4_output.baps','file')
copyfile('baps4_output.baps',[pathname filename '.txt'])
delete('baps4_output.baps')
end
end
struct_array = load(filesin{1});
c = struct_array.c;
clear struct_array;
c.proportionsIt = prop_combine;
c.pvalue = pvalue_combine; % Added by Jing
fprintf(1, 'Saving the results...');
% save([pathname filename], 'c');
save([pathname filename], 'c','-v7.3'); % added by Lu Cheng, 08.06.2012
fprintf(1,'finished.\');
end

View file

@ -0,0 +1,3 @@
%------------------------------------------------------
function dispLine;
disp('---------------------------------------------------');

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,35 @@
function popnames = initPopNames(nameFile, indexFile)
%Palauttaa tyhjän, mikäli nimitiedosto ja indeksitiedosto
% eivät olleet yhtä pitkiä.
popnames = [];
indices = load(indexFile);
fid = fopen(nameFile);
if fid == -1
%File didn't exist
msgbox('Loading of the population names was unsuccessful', ...
'Error', 'error');
return;
end;
line = fgetl(fid);
counter = 1;
while (line ~= -1) && ~isempty(line)
names{counter} = line;
line = fgetl(fid);
counter = counter + 1;
end;
fclose(fid);
if length(names) ~= length(indices)
disp('The number of population names must be equal to the number of ');
disp('entries in the file specifying indices of the first individuals of ');
disp('each population.');
return;
end
popnames = cell(length(names), 2);
for i = 1:length(names)
popnames{i,1} = names(i);
popnames{i,2} = indices(i);
end

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

297
matlab/parallel/parallel.m Normal file
View file

@ -0,0 +1,297 @@
function parallel(varargin)
% PARALLEL the main function of doing parallel classification/admixture.
% input: order of input is arbitrary. The first option is the default.
% 'datafile' - the full path of the data.
% 'mixturetype', ['mix';'codon_mix';'linear_mix';'spatial';'ad_mix'];
% - the classification/admixture model.
% 'initialk', a row vector of positive integers;
% - the initial number of clusters.
% 'fixedk',['no';'yes'];
% - whether the number of clusters is fixed during the computation.
% 'outputmat' - the full path of the output .mat file.
% 'datatype', ['numeric';'sequence';'matlab';'excel';'genepop']
% - the data format;
% 'namefile' - the full path of the population name file.
% 'indexfile' - the full path of the index file.
% 'linkagemap' - the full path of the linkage map, needed only for the
% unpreprocessed data under the linkage model.
% 'coordinatefile' - needed with the spatial model.
% 'groups', ['no';'yes'] - clustering of groups instead of individuals.
% 'groupname' - the full path of the group name file.
% Examples:
% - Linkage model:
% parallel('datafile','e:\baps4\baps_source\data\bpseudomallei.xls',...
% 'mixturetype','codon_mix','initialk','[10 15]','fixedk','no',...
% 'outputmat','E:\test_link.mat','datatype','excel')
% - Independent model:
% parallel('datafile','e:\baps4\baps_source\data\baps_data.txt',...
% 'mixturetype','mix','initialk','[10:15]','fixedk','no',...
% 'outputmat','e:\test_ind.mat','datatype','numeric');
% - Spatial model:
% parallel('datafile','e:\baps4\baps_source\data\wolverines_spatial_preprocessed.mat',...
% 'mixturetype','spatial','initialk', '[10 11]', 'fixedk','no',...
% 'outputmat','e:\test_spatial.mat','datatype','matlab');
%
%
% - Admixture model:
% parallel('datafile','e:\baps5\data\data1_mixture.mat', ...
% 'mixturetype','ad_mix', ...
% 'clusters','[1 3 5]',...
% 'iters','2',...
% 'refinds','3',...
% 'refiters','4',...
% 'outputmat','e:\baps5\data\data1_admixture_parallel.mat');
% A group of result files can be later compared by using compare.m
% function.
%-------------------------------------------------------------------------------
%- Set up options and default parameters
%-------------------------------------------------------------------------------
msgInvalidPair = '***ERROR: Bad value for argument: ''%s''';
% default options
options = struct('dataFile', '',...
'dataType', 'numeric',...
'mixtureType', 'mix',...
'initialK', 1, ...
'fixedK', 'no', ...
'nameFile', '', ...
'indexFile', '', ...
'outputMat', '', ...
'linkageMap','', ...
'coordinateFile', '', ...
'groups','no', ...
'groupname', '', ...
'clusters', '', ...
'minSize', '', ...
'iters', '', ...
'refInds', '', ...
'refIters', '' ...
);
if nargin == 1 && isstruct(varargin{1})
paramlist = [ fieldnames(varargin{1}) ...
struct2cell(varargin{1}) ]';
paramlist = { paramlist{:} };
else
if mod(nargin,2)
error('Invalid parameter/value pair arguments.');
end
paramlist = varargin;
end
optionsnames = lower(fieldnames(options));
for i=1:2:length(paramlist)
pname = paramlist{i};
pvalue = paramlist{i+1};
ind = strmatch(lower(pname),optionsnames);
if isempty(ind)
error(['Invalid parameter: ''' pname '''.']);
elseif length(ind) > 1
error(['Ambiguous parameter: ''' pname '''.']);
end
switch(optionsnames{ind})
case 'datafile'
if ischar(pvalue)
options.dataFile = pvalue;
else
error(sprintf(msgInvalidPair,pname));
end
% if ~isempty(findstr(pvalue , '.txt'))
% options.dataType = 'text';
% elseif ~isempty(findstr(pvalue, '.mat'))
% options.dataType = 'matlab';
% elseif ~isempty(findstr(pvalue, '.xls'))
% options.dataType = 'excel';
% else
% error('*** ERROR: unrecognized data format');
% end
case 'mixturetype'
if ischar(pvalue)
if ~strmatch(pvalue, strvcat('mix','ad_mix','linear_mix','codon_mix','spatical'),'exact')
error('*** ERROR: unrecoganized model type');
end
if isempty(pvalue),
options.mixtureType = '.';
else
options.mixtureType = pvalue;
end
else
error(sprintf(msgInvalidPair,pname));
end
case 'initialk'
pvalue = str2num(pvalue);
if isnumeric(pvalue)
if isempty(pvalue),
options.initialK = 0;
else
options.initialK = pvalue;
end
else
error(sprintf(msgInvalidPair,pname));
end
case 'fixedk'
if ischar(pvalue)
if isempty(pvalue),
options.fixedK = 'no';
else
options.fixedK = pvalue;
end
else
error(sprintf(msgInvalidPair,pname));
end
case 'namefile'
if ischar(pvalue)
options.nameFile = pvalue;
else
error(sprintf(msgInvalidPair,pname));
end
case 'indexfile'
if ischar(pvalue)
options.indexFile = pvalue;
else
error(sprintf(msgInvalidPair,pname));
end
case 'outputmat'
if ischar(pvalue)
options.outputMat = pvalue;
directoryName = fileparts(pvalue);
if ~exist(directoryName)
fprintf(1,'*** ERROR: Output directory ''%s'' does not exist.\n', directoryName);
return
end
else
error(sprintf(msgInvalidPair,pname));
end
case 'datatype'
if ischar(pvalue)
options.dataType = pvalue;
else
error(sprintf(msgInvalidPair,pname));
end
case 'linkagemap'
if ischar(pvalue)
options.linkageMap = pvalue;
else
error(sprintf(msgInvalidPair,pname));
end
case 'coordinatefile'
if ischar(pvalue)
options.coordinateFile = pvalue;
else
error(sprintf(msgInvalidPair,pname));
end
case 'groups'
if ischar(pvalue)
options.groups = pvalue;
else
error(sprintf(msgInvalidPair,pname));
end
case 'groupname'
if ischar(pvalue)
options.groupname = pvalue;
else
error(sprintf(msgInvalidPair,pname));
end
% the options below are for admixture analysis
case 'clusters'
if ischar(pvalue)
options.clusters = str2num(pvalue);
else
error(sprintf(msgInvalidPair,pname));
end
case 'minsize'
if ischar(pvalue)
options.minSize = str2num(pvalue);
else
error(sprintf(msgInvalidPair,pname));
end
case 'iters'
if ischar(pvalue)
options.iters = str2num(pvalue);
else
error(sprintf(msgInvalidPair,pname));
end
case 'refinds'
if ischar(pvalue)
options.refInds = str2num(pvalue);
else
error(sprintf(msgInvalidPair,pname));
end
case 'refiters'
if ischar(pvalue)
options.refIters = str2num(pvalue);
else
error(sprintf(msgInvalidPair,pname));
end
otherwise
error(['Invalid parameter: ''' pname '''.']);
end
end
% The subfunction to check syntax
if ~checkSyntax(options)
return
end
switch options.mixtureType
case 'mix'
if isequal(options.groups,'yes')
greedyPopMix_parallel(options);
else
independent_parallel(options);
end
case 'linear_mix'
linkage_parallel(options);
case 'codon_mix'
linkage_parallel(options);
case 'spatial'
if isequal(options.groups, 'yes')
spatialPopMixture_parallel(options);
else
spatial_parallel(options);
end
case 'ad_mix'
admix_parallel(options);
end
% -------------------------------------------------------------------------
% Subfunctions
% -------------------------------------------------------------------------
function isOK = checkSyntax(options)
isOK = 1;
if strcmp(options.fixedK, 'yes') && length(options.initialK)>1
display('*** ERROR: conflicting in options fixedk and initialk.');
isOK = 0;
end
if strcmp(options.mixtureType, 'mix')
if strcmp(options.dataType, 'excel') || strcmp(options.dataType,'sequence')
display('*** ERROR: unknown datatype for the independence module.');
isOK = 0;
end
end
% check the admixture parameters
admix_str = {options.clusters, options.minSize, options.iters, ...
options.refInds, options.refIters};
pt = cellfun('isempty', admix_str);
if all(pt)
if ~strcmp(options.mixtureType, 'ad_mix')
isOK = 1;
else
display('*** ERROR: problematic mixture type.');
isOK = 0;
end
end
if any(pt) && strcmp(options.mixtureType, 'ad_mix')
display('*** ERROR: incomplete admixture parameters.');
isOK = 0;
else
isOK = 1;
end

View file

@ -0,0 +1,18 @@
function str = proportion2str(prob)
%prob belongs to [0.00, 0.01, ... ,1].
%str is a 4-mark presentation of proportion.
if abs(prob)<1e-3
str = '0.00';
elseif abs(prob-1) < 1e-3;
str = '1.00';
else
prob = round(100*prob);
if prob<10
str = ['0.0' num2str(prob)];
else
str = ['0.' num2str(prob)];
end;
end;
%-------------------------------------------------------

View file

@ -0,0 +1,66 @@
function readScript(filename)
% READSCRIPT read the script file and output the parameters
% this function does not perform syntax checking.
% Example:
% readScript('script.txt')
% read the script
ind = readfile(filename);
if isempty(ind)
return
end
nLines = size(ind,1);
% extract command information
optionStr = [];
for k = 1:nLines
[cmdName, paraStr] = extract(ind(k,:));
optionStr = [optionStr cmdName ',' paraStr ','];
end
optionStr = optionStr(1:end-1); % remove the last coma
% call function parallel
eval(['parallel(' optionStr ')'])
% -------------------------------------------------------------------------
% Subfunctions
% -------------------------------------------------------------------------
function [cmdName, paraStr] = extract(commandline)
% function to extract the command name and the parameter string
[cmdName, remainStr] = strtok(commandline,'(');
boundary = regexp(remainStr,'''');
if isempty(boundary) % if paraStr does not contain quotation marks
% use parenthesis as boundaries
startPt = regexp(remainStr,'(') + 1;
endPt = regexp(remainStr,')') - 1;
else
startPt = boundary(1) + 1;
endPt = boundary(2) - 1;
end
paraStr = remainStr(startPt: endPt);
cmdName = strcat('''',cmdName,'''');
paraStr = strcat('''',paraStr,'''');
% -------------------------------------------------------------------------
function T = readfile(filename);
f = fopen(filename,'r');
if f == -1
% error(filename);
display('*** ERROR: invalid script name.');
T = [];
return
end
i = 1;
while 1
clear line;
line = fgetl(f);
if ~isstr(line), break, end
n = length(line);
T(i,1:n) = line(1:n);
i = i+1;
end
fclose(f);

6
matlab/parallel/sc.txt Normal file
View file

@ -0,0 +1,6 @@
datafile('r:\baps5\data\simple_data.txt');
mixturetype('mix')
outputmat('r:\baps5\data\simple_mix.mat')
initialk(2 3)
fixedk('no')
datatype('numeric')

8
matlab/parallel/sc2.txt Normal file
View file

@ -0,0 +1,8 @@
datafile('e:\baps5\data\ssuis_mixture_results_codon.mat')
mixturetype('ad_mix')
outputmat('e:\baps5\data\ssuis_small_script_2.mat')
clusters(3:4)
minsize(5)
iters(2)
refinds(3)
refiters(4)

View file

@ -0,0 +1,17 @@
function srt_partition = sort_partition(partition)
% SORT_PARTITION sorts a given partition (row vector) into the canonical order, where every
% new class has the smallest possible index.
% input:
% partition is a column vector.
% output:
% srt_partition is a row vector.
n_classes=max(partition);
srt_partition=zeros(1,n_classes);
for i=1:n_classes
nonz=find(partition);
here=find(partition==partition(nonz(1)));
srt_partition(here)=i;
partition(here)=0;
end

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,63 @@
%--------------------------------------------------------------------------
function tulostaAdmixtureTiedot(proportions, uskottavuus, alaRaja, niter)
h0 = findobj('Tag','filename1_text');
inputf = get(h0,'String');
h0 = findobj('Tag','filename2_text');
outf = get(h0,'String'); clear h0;
if length(outf)>0
fid = fopen(outf,'a');
else
fid = -1;
diary('baps4_output.baps'); % save in text anyway.
end
ninds = length(uskottavuus);
npops = size(proportions,2);
disp(' ');
dispLine;
disp('RESULTS OF ADMIXTURE ANALYSIS BASED');
disp('ON MIXTURE CLUSTERING OF INDIVIDUALS');
disp(['Data file: ' inputf]);
disp(['Number of individuals: ' num2str(ninds)]);
disp(['Results based on ' num2str(niter) ' simulations from posterior allele frequencies.']);
disp(' ');
if fid ~= -1
fprintf(fid, '\n');
fprintf(fid,'%s \n', ['--------------------------------------------']); fprintf(fid, '\n');
fprintf(fid,'%s \n', ['RESULTS OF ADMIXTURE ANALYSIS BASED']); fprintf(fid, '\n');
fprintf(fid,'%s \n', ['ON MIXTURE CLUSTERING OF INDIVIDUALS']); fprintf(fid, '\n');
fprintf(fid,'%s \n', ['Data file: ' inputf]); fprintf(fid, '\n');
fprintf(fid,'%s \n', ['Number of individuals: ' num2str(ninds)]); fprintf(fid, '\n');
fprintf(fid,'%s \n', ['Results based on ' num2str(niter) ' simulations from posterior allele frequencies.']); fprintf(fid, '\n');
fprintf(fid, '\n');
end
ekaRivi = blanks(6);
for pop = 1:npops
ekaRivi = [ekaRivi blanks(3-floor(log10(pop))) num2str(pop) blanks(2)];
end
ekaRivi = [ekaRivi blanks(1) 'p']; % Added on 29.08.06
disp(ekaRivi);
for ind = 1:ninds
rivi = [num2str(ind) ':' blanks(4-floor(log10(ind)))];
if any(proportions(ind,:)>0)
for pop = 1:npops-1
rivi = [rivi proportion2str(proportions(ind,pop)) blanks(2)];
end
rivi = [rivi proportion2str(proportions(ind,npops)) ': '];
rivi = [rivi ownNum2Str(uskottavuus(ind))];
end
disp(rivi);
if fid ~= -1
fprintf(fid,'%s \n',[rivi]); fprintf(fid,'\n');
end
end
if fid ~= -1
fclose(fid);
else
diary off
end

View file

@ -0,0 +1,803 @@
function out = uipickfiles(varargin)
%uipickfiles: GUI program to select file(s) and/or directories.
%
% Syntax:
% files = uipickfiles('PropertyName',PropertyValue,...)
%
% The current directory can be changed by operating in the file navigator:
% double-clicking on a directory in the list to move further down the tree,
% using the popup menu to move up the tree, typing a path in the box to
% move to any directory or right-clicking on the path box to revisit a
% previously-listed directory.
%
% Files can be added to the list by double-clicking or selecting files
% (non-contiguous selections are possible with the control key) and
% pressing the Add button. Files in the list can be removed or re-ordered.
% When finished, a press of the Done button will return the full paths to
% the selected files in a cell array, structure array or character array.
% If the Cancel button is pressed then zero is returned.
%
% The following optional property/value pairs can be specified as arguments
% to control the indicated behavior:
%
% Property Value
% ---------- ----------------------------------------------------------
% FilterSpec String to specify starting directory and/or file filter.
% Ex: 'C:\bin' will start up in that directory. '*.txt'
% will list only files ending in '.txt'. 'c:\bin\*.txt' will
% do both. Default is to start up in the current directory
% and list all files. Can be changed with the GUI.
%
% REFilter String containing a regular expression used to filter the
% file list. Ex: '\.m$|\.mat$' will list files ending in
% '.m' and '.mat'. Default is empty string. Can be used
% with FilterSpec and both filters are applied. Can be
% changed with the GUI.
%
% Prompt String containing a prompt appearing in the title bar of
% the figure. Default is 'Select files'.
%
% NumFiles Scalar or vector specifying number of files that must be
% selected. A scalar specifies an exact value; a two-element
% vector can be used to specify a range, [min max]. The
% function will not return unless the specified number of
% files have been chosen. Default is [] which accepts any
% number of files.
%
% Output String specifying the data type of the output: 'cell',
% 'struct' or 'char'. Specifying 'cell' produces a cell
% array of strings, the strings containing the full paths of
% the chosen files. 'Struct' returns a structure array like
% the result of the dir function except that the 'name' field
% contains a full path instead of just the file name. 'Char'
% returns a character array of the full paths. This is most
% useful when you have just one file and want it in a string
% instead of a cell array containing just one string. The
% default is 'cell'.
%
% All properties and values are case-insensitive and need only be
% unambiguous. For example,
%
% files = uipickfiles('num',1,'out','ch')
%
% is valid usage.
% Version: 1.0, 25 April 2006
% Author: Douglas M. Schwarz
% Email: dmschwarz=ieee*org, dmschwarz=urgrad*rochester*edu
% Real_email = regexprep(Email,{'=','*'},{'@','.'})
% Define properties and set default values.
prop.filterspec = '*';
prop.refilter = '';
prop.prompt = 'Select files';
prop.numfiles = [];
prop.output = 'cell';
% Process inputs and set prop fields.
properties = fieldnames(prop);
arg_index = 1;
while arg_index <= nargin
arg = varargin{arg_index};
if ischar(arg)
prop_index = find(strncmpi(arg,properties,length(arg)));
if length(prop_index) == 1
prop.(properties{prop_index}) = varargin{arg_index + 1};
else
error('Property ''%s'' does not exist or is ambiguous.',arg)
end
arg_index = arg_index + 2;
elseif isstruct(arg)
arg_fn = fieldnames(arg);
for i = 1:length(arg_fn)
prop_index = find(strncmpi(arg_fn{i},properties,...
length(arg_fn{i})));
if length(prop_index) == 1
prop.(properties{prop_index}) = arg.(arg_fn{i});
else
error('Property ''%s'' does not exist or is ambiguous.',...
arg_fn{i})
end
end
arg_index = arg_index + 1;
else
error(['Properties must be specified by property/value pairs',...
' or structures.'])
end
end
% Validate FilterSpec property.
if isempty(prop.filterspec)
prop.filterspec = '*';
end
if ~ischar(prop.filterspec)
error('FilterSpec property must contain a string.')
end
% Validate REFilter property.
if ~ischar(prop.refilter)
error('REFilter property must contain a string.')
end
% Validate Prompt property.
if ~ischar(prop.prompt)
error('Prompt property must contain a string.')
end
% Validate NumFiles property.
if numel(prop.numfiles) > 2 || any(prop.numfiles < 0)
error('NumFiles must be empty, a scalar or two-element vector.')
end
prop.numfiles = unique(prop.numfiles);
if isequal(prop.numfiles,1)
numstr = 'Select exactly 1 file.';
elseif length(prop.numfiles) == 1
numstr = sprintf('Select exactly %d files.',prop.numfiles);
else
numstr = sprintf('Select %d to %d files.',prop.numfiles);
end
% Validate Output property.
legal_outputs = {'cell','struct','char'};
out_idx = find(strncmpi(prop.output,legal_outputs,length(prop.output)));
if length(out_idx) == 1
prop.output = legal_outputs{out_idx};
else
error(['Value of ''Output'' property, ''%s'', is illegal or '...
'ambiguous.'],prop.output)
end
% Initialize file lists.
[current_dir,f,e] = fileparts(prop.filterspec);
filter = [f,e];
if isempty(current_dir)
current_dir = pwd;
end
if isempty(filter)
filter = '*';
end
re_filter = prop.refilter;
full_filter = fullfile(current_dir,filter);
path_cell = path2cell(current_dir);
fdir = filtered_dir(full_filter,re_filter);
filenames = {fdir.name}';
filenames = annotate_file_names(filenames,fdir);
% Initialize some data.
file_picks = {};
full_file_picks = {};
dir_picks = dir(' '); % Create empty directory structure.
show_full_path = false;
nodupes = true;
history = {current_dir};
% Create figure.
gray = get(0,'DefaultUIControlBackgroundColor');
fig = figure('Position',[0 0 740 445],...
'Color',gray,...
'WindowStyle','modal',...
'Resize','off',...
'NumberTitle','off',...
'Name',prop.prompt,...
'IntegerHandle','off',...
'CloseRequestFcn',@cancel,...
'CreateFcn',{@movegui,'center'});
% Create uicontrols.
uicontrol('Style','frame',...
'Position',[255 260 110 70])
uicontrol('Style','frame',...
'Position',[275 135 110 100])
navlist = uicontrol('Style','listbox',...
'Position',[10 10 250 320],...
'String',filenames,...
'Value',[],...
'BackgroundColor','w',...
'Callback',@clicknav,...
'Max',2);
pickslist = uicontrol('Style','listbox',...
'Position',[380 10 350 320],...
'String',{},...
'BackgroundColor','w',...
'Callback',@clickpicks,...
'Max',2);
openbut = uicontrol('Style','pushbutton',...
'Position',[270 300 80 20],...
'String','Open',...
'Enable','off',...
'Callback',@open);
arrow = [2 2 2 2 2 2 2 2 1 2 2 2;...
2 2 2 2 2 2 2 2 2 0 2 2;...
2 2 2 2 2 2 2 2 2 2 0 2;...
0 0 0 0 0 0 0 0 0 0 0 0;...
2 2 2 2 2 2 2 2 2 2 0 2;...
2 2 2 2 2 2 2 2 2 0 2 2;...
2 2 2 2 2 2 2 2 1 2 2 2];
arrow(arrow == 2) = NaN;
arrow_im = NaN*ones(16,76);
arrow_im(6:12,45:56) = arrow/2;
im = repmat(arrow_im,[1 1 3]);
addbut = uicontrol('Style','pushbutton',...
'Position',[270 270 80 20],...
'String','Add ',...
'Enable','off',...
'CData',im,...
'Callback',@add);
removebut = uicontrol('Style','pushbutton',...
'Position',[290 205 80 20],...
'String','Remove',...
'Enable','off',...
'Callback',@remove);
moveupbut = uicontrol('Style','pushbutton',...
'Position',[290 175 80 20],...
'String','Move Up',...
'Enable','off',...
'Callback',@moveup);
movedownbut = uicontrol('Style','pushbutton',...
'Position',[290 145 80 20],...
'String','Move Down',...
'Enable','off',...
'Callback',@movedown);
uicontrol('Position',[10 380 250 16],...
'Style','text',...
'String','Current Directory',...
'HorizontalAlignment','center')
dir_popup = uicontrol('Style','popupmenu',...
'Position',[10 335 250 20],...
'BackgroundColor','w',...
'String',path_cell(end:-1:1),...
'Value',1,...
'Callback',@dirpopup);
hist_cm = uicontextmenu;
pathbox = uicontrol('Position',[10 360 250 20],...
'Style','edit',...
'BackgroundColor','w',...
'String',current_dir,...
'HorizontalAlignment','left',...
'Callback',@change_path,...
'UIContextMenu',hist_cm);
hist_menus = [];
hist_cb = @history_cb;
hist_menus = make_history_cm(hist_cb,hist_cm,hist_menus,history);
uicontrol('Position',[10 425 80 16],...
'Style','text',...
'String','File Filter',...
'HorizontalAlignment','left')
uicontrol('Position',[100 425 160 16],...
'Style','text',...
'String','Reg. Exp. Filter',...
'HorizontalAlignment','left')
showallfiles = uicontrol('Position',[270 405 100 20],...
'Style','checkbox',...
'String','Show All Files',...
'Value',0,...
'HorizontalAlignment','left',...
'Callback',@togglefilter);
filter_ed = uicontrol('Position',[10 405 80 20],...
'Style','edit',...
'BackgroundColor','w',...
'String',filter,...
'HorizontalAlignment','left',...
'Callback',@setfilspec);
refilter_ed = uicontrol('Position',[100 405 160 20],...
'Style','edit',...
'BackgroundColor','w',...
'String',re_filter,...
'HorizontalAlignment','left',...
'Callback',@setrefilter);
viewfullpath = uicontrol('Style','checkbox',...
'Position',[380 335 230 20],...
'String','Show full paths',...
'Value',show_full_path,...
'HorizontalAlignment','left',...
'Callback',@showfullpath);
remove_dupes = uicontrol('Style','checkbox',...
'Position',[380 360 230 20],...
'String','Remove duplicates (as per full path)',...
'Value',nodupes,...
'HorizontalAlignment','left',...
'Callback',@removedupes);
uicontrol('Position',[380 405 350 20],...
'Style','text',...
'String','Selected Files',...
'HorizontalAlignment','center')
uicontrol('Position',[280 80 80 30],'String','Done',...
'Callback',@done);
uicontrol('Position',[280 30 80 30],'String','Cancel',...
'Callback',@cancel);
if ~isempty(prop.numfiles)
uicontrol('Position',[380 385 350 16],...
'Style','text',...
'String',numstr,...
'ForegroundColor','r',...
'HorizontalAlignment','center')
end
set(fig,'HandleVisibility','off')
uiwait(fig)
% Compute desired output.
switch prop.output
case 'cell'
out = full_file_picks;
case 'struct'
out = dir_picks(:);
case 'char'
out = char(full_file_picks);
case 'cancel'
out = 0;
end
% -------------------- Callback functions --------------------
function add(varargin)
values = get(navlist,'Value');
for i = 1:length(values)
dir_pick = fdir(values(i));
pick = dir_pick.name;
pick_full = fullfile(current_dir,pick);
dir_pick.name = pick_full;
if ~nodupes || ~any(strcmp(full_file_picks,pick_full))
file_picks{end + 1} = pick;
full_file_picks{end + 1} = pick_full;
dir_picks(end + 1) = dir_pick;
end
end
if show_full_path
set(pickslist,'String',full_file_picks,'Value',[]);
else
set(pickslist,'String',file_picks,'Value',[]);
end
set([removebut,moveupbut,movedownbut],'Enable','off');
end
function remove(varargin)
values = get(pickslist,'Value');
file_picks(values) = [];
full_file_picks(values) = [];
dir_picks(values) = [];
top = get(pickslist,'ListboxTop');
num_above_top = sum(values < top);
top = top - num_above_top;
num_picks = length(file_picks);
new_value = min(min(values) - num_above_top,num_picks);
if num_picks == 0
new_value = [];
set([removebut,moveupbut,movedownbut],'Enable','off')
end
if show_full_path
set(pickslist,'String',full_file_picks,'Value',new_value,...
'ListboxTop',top)
else
set(pickslist,'String',file_picks,'Value',new_value,...
'ListboxTop',top)
end
end
function open(varargin)
values = get(navlist,'Value');
if fdir(values).isdir
if strcmp(fdir(values).name,'.')
return
elseif strcmp(fdir(values).name,'..')
set(dir_popup,'Value',min(2,length(path_cell)))
dirpopup();
return
end
current_dir = fullfile(current_dir,fdir(values).name);
history{end+1} = current_dir;
history = unique(history);
hist_menus = make_history_cm(hist_cb,hist_cm,hist_menus,...
history);
full_filter = fullfile(current_dir,filter);
path_cell = path2cell(current_dir);
fdir = filtered_dir(full_filter,re_filter);
filenames = {fdir.name}';
filenames = annotate_file_names(filenames,fdir);
set(dir_popup,'String',path_cell(end:-1:1),'Value',1)
set(pathbox,'String',current_dir)
set(navlist,'ListboxTop',1,'Value',[],'String',filenames)
set(addbut,'Enable','off')
set(openbut,'Enable','off')
end
end
function clicknav(varargin)
value = get(navlist,'Value');
nval = length(value);
dbl_click_fcn = @add;
switch nval
case 0
set([addbut,openbut],'Enable','off')
case 1
set(addbut,'Enable','on');
if fdir(value).isdir
set(openbut,'Enable','on')
dbl_click_fcn = @open;
else
set(openbut,'Enable','off')
end
otherwise
set(addbut,'Enable','on')
set(openbut,'Enable','off')
end
if strcmp(get(fig,'SelectionType'),'open')
dbl_click_fcn();
end
end
function clickpicks(varargin)
value = get(pickslist,'Value');
if isempty(value)
set([removebut,moveupbut,movedownbut],'Enable','off')
else
set(removebut,'Enable','on')
if min(value) == 1
set(moveupbut,'Enable','off')
else
set(moveupbut,'Enable','on')
end
if max(value) == length(file_picks)
set(movedownbut,'Enable','off')
else
set(movedownbut,'Enable','on')
end
end
if strcmp(get(fig,'SelectionType'),'open')
remove();
end
end
function dirpopup(varargin)
value = get(dir_popup,'Value');
len = length(path_cell);
path_cell = path_cell(1:end-value+1);
if ispc && value == len
current_dir = '';
full_filter = filter;
fdir = struct('name',getdrives,'date',datestr(now),...
'bytes',0,'isdir',1);
else
current_dir = cell2path(path_cell);
history{end+1} = current_dir;
history = unique(history);
hist_menus = make_history_cm(hist_cb,hist_cm,hist_menus,...
history);
full_filter = fullfile(current_dir,filter);
fdir = filtered_dir(full_filter,re_filter);
end
filenames = {fdir.name}';
filenames = annotate_file_names(filenames,fdir);
set(dir_popup,'String',path_cell(end:-1:1),'Value',1)
set(pathbox,'String',current_dir)
set(navlist,'String',filenames,'Value',[])
set(addbut,'Enable','off')
end
function change_path(varargin)
proposed_path = get(pathbox,'String');
% Process any directories named '..'.
proposed_path_cell = path2cell(proposed_path);
ddots = strcmp(proposed_path_cell,'..');
ddots(find(ddots) - 1) = true;
proposed_path_cell(ddots) = [];
proposed_path = cell2path(proposed_path_cell);
% Check for existance of directory.
if ~exist(proposed_path,'dir')
uiwait(errordlg(['Directory "',proposed_path,...
'" does not exist.'],'','modal'))
return
end
current_dir = proposed_path;
history{end+1} = current_dir;
history = unique(history);
hist_menus = make_history_cm(hist_cb,hist_cm,hist_menus,history);
full_filter = fullfile(current_dir,filter);
path_cell = path2cell(current_dir);
fdir = filtered_dir(full_filter,re_filter);
filenames = {fdir.name}';
filenames = annotate_file_names(filenames,fdir);
set(dir_popup,'String',path_cell(end:-1:1),'Value',1)
set(pathbox,'String',current_dir)
set(navlist,'String',filenames,'Value',[])
set(addbut,'Enable','off')
set(openbut,'Enable','off')
end
function showfullpath(varargin)
show_full_path = get(viewfullpath,'Value');
if show_full_path
set(pickslist,'String',full_file_picks)
else
set(pickslist,'String',file_picks)
end
end
function removedupes(varargin)
nodupes = get(remove_dupes,'Value');
if nodupes
num_picks = length(full_file_picks);
[unused,rev_order] = unique(full_file_picks(end:-1:1));
order = sort(num_picks + 1 - rev_order);
full_file_picks = full_file_picks(order);
file_picks = file_picks(order);
if show_full_path
set(pickslist,'String',full_file_picks,'Value',[])
else
set(pickslist,'String',file_picks,'Value',[])
end
set([removebut,moveupbut,movedownbut],'Enable','off')
end
end
function moveup(varargin)
value = get(pickslist,'Value');
set(removebut,'Enable','on')
n = length(file_picks);
omega = 1:n;
index = zeros(1,n);
index(value - 1) = omega(value);
index(setdiff(omega,value - 1)) = omega(setdiff(omega,value));
file_picks = file_picks(index);
full_file_picks = full_file_picks(index);
value = value - 1;
if show_full_path
set(pickslist,'String',full_file_picks,'Value',value)
else
set(pickslist,'String',file_picks,'Value',value)
end
if min(value) == 1
set(moveupbut,'Enable','off')
end
set(movedownbut,'Enable','on')
end
function movedown(varargin)
value = get(pickslist,'Value');
set(removebut,'Enable','on')
n = length(file_picks);
omega = 1:n;
index = zeros(1,n);
index(value + 1) = omega(value);
index(setdiff(omega,value + 1)) = omega(setdiff(omega,value));
file_picks = file_picks(index);
full_file_picks = full_file_picks(index);
value = value + 1;
if show_full_path
set(pickslist,'String',full_file_picks,'Value',value)
else
set(pickslist,'String',file_picks,'Value',value)
end
if max(value) == n
set(movedownbut,'Enable','off')
end
set(moveupbut,'Enable','on')
end
function togglefilter(varargin)
value = get(showallfiles,'Value');
if value
filter = '*';
re_filter = '';
set([filter_ed,refilter_ed],'Enable','off')
else
filter = get(filter_ed,'String');
re_filter = get(refilter_ed,'String');
set([filter_ed,refilter_ed],'Enable','on')
end
full_filter = fullfile(current_dir,filter);
fdir = filtered_dir(full_filter,re_filter);
filenames = {fdir.name}';
filenames = annotate_file_names(filenames,fdir);
set(navlist,'String',filenames,'Value',[])
set(addbut,'Enable','off')
end
function setfilspec(varargin)
filter = get(filter_ed,'String');
if isempty(filter)
filter = '*';
set(filter_ed,'String',filter)
end
% Process file spec if a subdirectory was included.
[p,f,e] = fileparts(filter);
if ~isempty(p)
newpath = fullfile(current_dir,p,'');
set(pathbox,'String',newpath)
filter = [f,e];
if isempty(filter)
filter = '*';
end
set(filter_ed,'String',filter)
change_path();
end
full_filter = fullfile(current_dir,filter);
fdir = filtered_dir(full_filter,re_filter);
filenames = {fdir.name}';
filenames = annotate_file_names(filenames,fdir);
set(navlist,'String',filenames,'Value',[])
set(addbut,'Enable','off')
end
function setrefilter(varargin)
re_filter = get(refilter_ed,'String');
fdir = filtered_dir(full_filter,re_filter);
filenames = {fdir.name}';
filenames = annotate_file_names(filenames,fdir);
set(navlist,'String',filenames,'Value',[])
set(addbut,'Enable','off')
end
function done(varargin)
% Optional shortcut: click on a file and press 'Done'.
% if isempty(full_file_picks) && strcmp(get(addbut,'Enable'),'on')
% add();
% end
numfiles = length(full_file_picks);
if ~isempty(prop.numfiles)
if numfiles < prop.numfiles(1)
msg = {'Too few files selected.',numstr};
uiwait(errordlg(msg,'','modal'))
return
elseif numfiles > prop.numfiles(end)
msg = {'Too many files selected.',numstr};
uiwait(errordlg(msg,'','modal'))
return
end
end
delete(fig)
end
function cancel(varargin)
prop.output = 'cancel';
delete(fig)
end
function history_cb(varargin)
current_dir = history{varargin{3}};
full_filter = fullfile(current_dir,filter);
path_cell = path2cell(current_dir);
fdir = filtered_dir(full_filter,re_filter);
filenames = {fdir.name}';
filenames = annotate_file_names(filenames,fdir);
set(dir_popup,'String',path_cell(end:-1:1),'Value',1)
set(pathbox,'String',current_dir)
set(navlist,'ListboxTop',1,'Value',[],'String',filenames)
set(addbut,'Enable','off')
set(openbut,'Enable','off')
end
end
% -------------------- Subfunctions --------------------
function c = path2cell(p)
% Turns a path string into a cell array of path elements.
c = strread(p,'%s','delimiter','\\/');
if ispc
c = [{'My Computer'};c];
else
c = [{filesep};c(2:end)];
end
end
function p = cell2path(c)
% Turns a cell array of path elements into a path string.
if ispc
p = fullfile(c{2:end},'');
else
p = fullfile(c{:},'');
end
end
function d = filtered_dir(full_filter,re_filter)
% Like dir, but applies filters and sorting.
p = fileparts(full_filter);
if isempty(p) && full_filter(1) == '/'
p = '/';
end
if exist(full_filter,'dir')
c = cell(0,1);
dfiles = struct('name',c,'date',c,'bytes',c,'isdir',c);
else
dfiles = dir(full_filter);
end
if ~isempty(dfiles)
dfiles([dfiles.isdir]) = [];
end
ddir = dir(p);
ddir = ddir([ddir.isdir]);
% Additional regular expression filter.
if nargin > 1 && ~isempty(re_filter)
if ispc
no_match = cellfun('isempty',regexpi({dfiles.name},re_filter));
else
no_match = cellfun('isempty',regexp({dfiles.name},re_filter));
end
dfiles(no_match) = [];
end
% Set navigator style:
% 1 => mix file and directory names
% 2 => means list all files before all directories
% 3 => means list all directories before all files
% 4 => same as 2 except put . and .. directories first
if isunix
style = 4;
else
style = 4;
end
switch style
case 1
d = [dfiles;ddir];
[unused,index] = sort({d.name});
d = d(index);
case 2
[unused,index1] = sort({dfiles.name});
[unused,index2] = sort({ddir.name});
d = [dfiles(index1);ddir(index2)];
case 3
[unused,index1] = sort({dfiles.name});
[unused,index2] = sort({ddir.name});
d = [ddir(index2);dfiles(index1)];
case 4
[unused,index1] = sort({dfiles.name});
dot1 = find(strcmp({ddir.name},'.'));
dot2 = find(strcmp({ddir.name},'..'));
ddot1 = ddir(dot1);
ddot2 = ddir(dot2);
ddir([dot1,dot2]) = [];
[unused,index2] = sort({ddir.name});
d = [ddot1;ddot2;dfiles(index1);ddir(index2)];
end
end
function drives = getdrives
% Returns a cell array of drive names on Windows.
letters = char('A':'Z');
num_letters = length(letters);
drives = cell(1,num_letters);
for i = 1:num_letters
if exist([letters(i),':\'],'dir');
drives{i} = [letters(i),':'];
end
end
drives(cellfun('isempty',drives)) = [];
end
function filenames = annotate_file_names(filenames,dir_listing)
% Adds a trailing filesep character to directory names.
fs = filesep;
for i = 1:length(filenames)
if dir_listing(i).isdir
filenames{i} = [filenames{i},fs];
end
end
end
function hist_menus = make_history_cm(cb,hist_cm,hist_menus,history)
% Make context menu for history.
if ~isempty(hist_menus)
delete(hist_menus)
end
num_hist = length(history);
hist_menus = zeros(1,num_hist);
for i = 1:num_hist
hist_menus(i) = uimenu(hist_cm,'Label',history{i},...
'Callback',{cb,i});
end
end