ourMELONS/matlab/parallel/admix_parallel.m

function admix_parallel(options)
% ADMIX_PARALLEL is the command line version of the baps partition with
% admixture models.
% Input: options is a struct generated by parallel.m

%--------------------------------------------------------------------------
%- Syntax check out
%--------------------------------------------------------------------------
rand('state',0); % used for debugging
outp = [options.outputMat '.txt'];
inp = options.dataFile;
clusters = options.clusters;
fprintf(1,'Parallel computing...\n');
fprintf(1,'Admixture analysis for cluster(s): %s.\n',num2str(clusters));

global COUNTS; global PARTITION; global SUMCOUNTS;
clearGlobalVars;

struct_array = load(options.dataFile);
if isfield(struct_array,'c')  %Matlab versio
    c = struct_array.c;
    if ~isfield(c,'PARTITION') | ~isfield(c,'rowsFromInd')
        disp('*** ERROR: Incorrect data format');
        return
    end
elseif isfield(struct_array,'PARTITION')  %Mideva versio
    c = struct_array;
    if ~isfield(c,'rowsFromInd')
        disp('*** ERROR: Incorrect data format');
        return
    end
else
    disp('*** ERROR: Incorrect data format');
    return;
end


if isfield(c, 'gene_lengths') && ...
            (strcmp(c.mixtureType,'linear_mix') | ...
            strcmp(c.mixtureType,'codon_mix')) % if the mixture is from a linkage model
        % Redirect the call to the linkage admixture function.
        fprintf(1,'Redirecting to Linkage Model Admixture\n');
        c.data = noIndex(c.data,c.noalle); % call function noindex to remove the index column
        linkage_admix_parallel(c,options);
        return
    end

% This section is disabled, -Jing 27.10.2009
% if isfield(c, 'gene_lengths') && ...
%         (strcmp(c.mixtureType,'linkage_mix') | ...
%         strcmp(c.mixtureType,'codon_mix')) % if the mixture is from a linkage model
%     % Redirect the call to the linkage admixture function.
%     c.data = noIndex(c.data,c.noalle); % call function noindex to remove the index column
%     linkage_admix(c);
%     return
% end

PARTITION = c.PARTITION; COUNTS = c.COUNTS; SUMCOUNTS = c.SUMCOUNTS;
alleleCodes = c.alleleCodes; adjprior = c.adjprior; popnames = c.popnames;
rowsFromInd = c.rowsFromInd; data = c.data; npops = c.npops; noalle = c.noalle;


% answers = inputdlg({['Input the minimum size of a population that will'...
%             ' be taken into account when admixture is estimated.']},...
%             'Input minimum population size',[1],...
%             {'5'});
% if isempty(answers) return; end
% -------------------------------------------
% NEW: for parallel computing
% -------------------------------------------
alaRaja = options.minSize;
[npops, clusterIndex] = poistaLiianPienet(npops, rowsFromInd, alaRaja);
if length(clusterIndex)<length(clusters)
    disp('*** ERROR: error in cluster labels.');
    return
end
clusters = clusterIndex(clusters); % after removing outlier clusters

nloci = size(COUNTS,2);
ninds = size(data,1)/rowsFromInd;

% answers = inputdlg({['Input number of iterations']},'Input',[1],{'50'});
% if isempty(answers) return; end
iterationCount = options.iters;

% answers = inputdlg({['Input number of reference individuals from each population']},'Input',[1],{'50'});
% if isempty(answers) nrefIndsInPop = 50;
% else nrefIndsInPop = str2num(answers{1,1});
% end
nrefIndsInPop = options.refInds;

% answers = inputdlg({['Input number of iterations for reference individuals']},'Input',[1],{'10'});
% if isempty(answers) return; end
% iterationCountRef = str2num(answers{1,1});
iterationCountRef = options.refIters;

% First calculate log-likelihood ratio for all individuals:
likelihood = zeros(ninds,1);
allfreqs = computeAllFreqs2(noalle);
for ind = 1:ninds
    omaFreqs = computePersonalAllFreqs(ind, data, allfreqs, rowsFromInd);
    osuusTaulu = zeros(1,npops);
    if PARTITION(ind)==0
        % Yksil?on outlier
    elseif PARTITION(ind)~=0
        if PARTITION(ind)>0
            osuusTaulu(PARTITION(ind)) = 1;
        else
            % Yksil<69>t, joita ei ole sijoitettu mihink<6E><6B>n koriin.
            arvot = zeros(1,npops);
            for q=1:npops
                osuusTaulu = zeros(1,npops);
                osuusTaulu(q) = 1;
                arvot(q) = computeIndLogml(omaFreqs, osuusTaulu);
            end
            [iso_arvo, isoimman_indeksi] = max(arvot);
            osuusTaulu = zeros(1,npops);
            osuusTaulu(isoimman_indeksi) = 1;
            PARTITION(ind)=isoimman_indeksi;
        end
        logml = computeIndLogml(omaFreqs, osuusTaulu);
        logmlAlku = logml;
        for osuus = [0.5 0.25 0.05 0.01]
            [osuusTaulu, logml] = etsiParas(osuus, osuusTaulu, omaFreqs, logml);
        end
        logmlLoppu = logml;
        likelihood(ind) = logmlLoppu-logmlAlku;
    end
end

% Analyze further only individuals who have log-likelihood ratio larger than 3:
% ---------------------------------------
% PARALLEL COMPUTING IMPLEMENTED HERE
% ---------------------------------------
to_investigate = (find(likelihood>3))';
admix_populaatiot = unique(PARTITION(to_investigate));
validCluster = intersect(clusters, admix_populaatiot); % for the chosen clusters

ix = zeros(length(to_investigate),1);
for i = 1:length(validCluster)
    ix = ix | (PARTITION(to_investigate) == validCluster(i));
end
if ~any(ix) to_investigate = [];
else to_investigate = to_investigate(ix);
end
admix_populaatiot = unique(PARTITION(to_investigate));

disp('Possibly admixed individuals in the chosen clusters: ');
if isempty(to_investigate)
    disp('none');
    disp('Admixture analysis terminated.');
    return
else
    for i = 1:length(to_investigate)
        disp(num2str(to_investigate(i)));
    end
end

disp(' ');
disp('clusters for possibly admixed individuals: ');

for i = 1:length(admix_populaatiot)
    disp(num2str(admix_populaatiot(i)));
end

% THUS, there are two types of individuals, who will not be analyzed with
% simulated allele frequencies: those who belonged to a mini-population
% which was removed, and those who have log-likelihood ratio less than 3.
% The value in the PARTITION for the first kind of individuals is 0. The
% second kind of individuals can be identified, because they do not
% belong to "to_investigate" array. When the results are presented, the
% first kind of individuals are omitted completely, while the second kind
% of individuals are completely put to the population, where they ended up
% in the mixture analysis. These second type of individuals will have a
% unit p-value.


% Simulate allele frequencies a given number of times and save the average
% result to "proportionsIt" array.

proportionsIt = zeros(ninds,npops);
for iterationNum = 1:iterationCount
    disp(['Iter: ' num2str(iterationNum)]);
    allfreqs = simulateAllFreqs(noalle);   % Allele frequencies on this iteration.

    for ind=to_investigate
        %disp(num2str(ind));
        omaFreqs = computePersonalAllFreqs(ind, data, allfreqs, rowsFromInd);
        osuusTaulu = zeros(1,npops);
        if PARTITION(ind)==0
            % Yksil?on outlier
        elseif PARTITION(ind)~=0
            if PARTITION(ind)>0
                osuusTaulu(PARTITION(ind)) = 1;
            else
                % Yksil<69>t, joita ei ole sijoitettu mihink<6E><6B>n koriin.
                arvot = zeros(1,npops);
                for q=1:npops
                    osuusTaulu = zeros(1,npops);
                    osuusTaulu(q) = 1;
                    arvot(q) = computeIndLogml(omaFreqs, osuusTaulu);
                end
                [iso_arvo, isoimman_indeksi] = max(arvot);
                osuusTaulu = zeros(1,npops);
                osuusTaulu(isoimman_indeksi) = 1;
                PARTITION(ind)=isoimman_indeksi;
            end
            logml = computeIndLogml(omaFreqs, osuusTaulu);

            for osuus = [0.5 0.25 0.05 0.01]
                [osuusTaulu, logml] = etsiParas(osuus, osuusTaulu, omaFreqs, logml);
            end
        end
        proportionsIt(ind,:) = proportionsIt(ind,:).*(iterationNum-1) + osuusTaulu;
        proportionsIt(ind,:) = proportionsIt(ind,:)./iterationNum;
    end
end

%disp(['Creating ' num2str(nrefIndsInPop) ' reference individuals from ']);
%disp('each population.');

%allfreqs = simulateAllFreqs(noalle);  % Simuloidaan alleelifrekvenssisetti
allfreqs = computeAllFreqs2(noalle); % Koitetaan t<>llaista.


% Initialize the data structures, which are required in taking the missing
% data into account:
n_missing_levels = zeros(npops,1);  % number of different levels of "missingness" in each pop (max 3).
missing_levels = zeros(npops,3);    % the mean values for different levels.
missing_level_partition = zeros(ninds,1); % level of each individual (one of the levels of its population).
for i=1:npops
    inds = find(PARTITION==i);
    % Proportions of non-missing data for the individuals:
    non_missing_data = zeros(length(inds),1);
    for j = 1:length(inds)
        ind = inds(j);
        non_missing_data(j) = length(find(data((ind-1)*rowsFromInd+1:ind*rowsFromInd,:)>0)) ./ (rowsFromInd*nloci);
    end
    if all(non_missing_data>0.9)
        n_missing_levels(i) = 1;
        missing_levels(i,1) = mean(non_missing_data);
        missing_level_partition(inds) = 1;
    else
        [ordered, ordering] = sort(non_missing_data);
        %part = learn_simple_partition(ordered, 0.05);
        part = learn_partition_modified(ordered);
        aux = sortrows([part ordering],2);
        part = aux(:,1);
        missing_level_partition(inds) = part;
        n_levels = length(unique(part));
        n_missing_levels(i) = n_levels;
        for j=1:n_levels
            missing_levels(i,j) = mean(non_missing_data(find(part==j)));
        end
    end
end

% Create and analyse reference individuals for populations
% with potentially admixed individuals:
refTaulu = zeros(npops,100,3);
for pop = admix_populaatiot'

    for level = 1:n_missing_levels(pop)

        potential_inds_in_this_pop_and_level = ...
            find(PARTITION==pop & missing_level_partition==level &...
            likelihood>3);  % Potential admix individuals here.

        if ~isempty(potential_inds_in_this_pop_and_level)

            %refData = simulateIndividuals(nrefIndsInPop,rowsFromInd,allfreqs);
            refData = simulateIndividuals(nrefIndsInPop, rowsFromInd, allfreqs, ...
                pop, missing_levels(pop,level));

            disp(['Analysing the reference individuals from pop ' num2str(pop) ' (level ' num2str(level) ').']);
            refProportions = zeros(nrefIndsInPop,npops);
            for iter = 1:iterationCountRef
                %disp(['Iter: ' num2str(iter)]);
                allfreqs = simulateAllFreqs(noalle);

                for ind = 1:nrefIndsInPop
                    omaFreqs = computePersonalAllFreqs(ind, refData, allfreqs, rowsFromInd);
                    osuusTaulu = zeros(1,npops);
                    osuusTaulu(pop)=1;
                    logml = computeIndLogml(omaFreqs, osuusTaulu);
                    for osuus = [0.5 0.25 0.05 0.01]
                        [osuusTaulu, logml] = etsiParas(osuus, osuusTaulu, omaFreqs, logml);
                    end
                    refProportions(ind,:) = refProportions(ind,:).*(iter-1) + osuusTaulu;
                    refProportions(ind,:) = refProportions(ind,:)./iter;
                end
            end
            for ind = 1:nrefIndsInPop
                omanOsuus = refProportions(ind,pop);
                if round(omanOsuus*100)==0
                    omanOsuus = 0.01;
                end
                if abs(omanOsuus)<1e-5
                    omanOsuus = 0.01;
                end
                refTaulu(pop, round(omanOsuus*100),level) = refTaulu(pop, round(omanOsuus*100),level)+1;
            end
        end
    end
end

% Rounding of the results:
proportionsIt = proportionsIt.*100; proportionsIt = round(proportionsIt);
proportionsIt = proportionsIt./100;
for ind = 1:ninds
    if ~any(to_investigate==ind)
        if PARTITION(ind)>0
            proportionsIt(ind,PARTITION(ind))=1;
        end
    else
        % In case of a rounding error, the sum is made equal to unity by
        % fixing the largest value.
        if (PARTITION(ind)>0) & (sum(proportionsIt(ind,:)) ~= 1)
            [isoin,indeksi] = max(proportionsIt(ind,:));
            erotus = sum(proportionsIt(ind,:))-1;
            proportionsIt(ind,indeksi) = isoin-erotus;
        end
    end
end

% Calculate p-value for each individual:
uskottavuus = zeros(ninds,1);
for ind = 1:ninds
    pop = PARTITION(ind);
    if pop==0 % Individual is outlier
        uskottavuus(ind)=1;
    elseif isempty(find(to_investigate==ind))
        % Individual had log-likelihood ratio<3
        uskottavuus(ind)=1;
    else
        omanOsuus = proportionsIt(ind,pop);
        if abs(omanOsuus)<1e-5
            omanOsuus = 0.01;
        end
        if round(omanOsuus*100)==0
            omanOsuus = 0.01;
        end
        level = missing_level_partition(ind);
        refPienempia = sum(refTaulu(pop, 1:round(100*omanOsuus), level));
        uskottavuus(ind) = refPienempia / nrefIndsInPop;
    end
end

% tulostaAdmixtureTiedot(proportionsIt, uskottavuus, alaRaja, iterationCount);

% viewPartition(proportionsIt, popnames);


c.proportionsIt = proportionsIt;
c.pvalue = uskottavuus; % Added by Jing
c.mixtureType = 'admix'; % Jing
c.admixnpops = npops;

c.clusters = clusters; % added for parallel computing
c.minsize = alaRaja;
c.iters = iterationCount;
c.refInds = nrefIndsInPop;
c.refIters = iterationCountRef;


fprintf(1,'Saving the result...')
try
%     save(options.outputMat, 'c');
    save(options.outputMat, 'c','-v7.3'); % added by Lu Cheng, 08.06.2012
    fprintf(1,'Finished.\n');
catch
    display('*** ERROR in saving the result.');
end


%----------------------------------------------------------------------------


function [npops, clusterIndex] = poistaLiianPienet(npops, rowsFromInd, alaraja)
% Muokkaa tulokset muotoon, jossa outlier yksil<69>t on
% poistettu. Tarkalleen ottaen poistaa ne populaatiot,
% joissa on v<>hemm<6D>n kuin 'alaraja':n verran yksil<69>it?

% NEW: clusterIndex is output for parallel computing. - Jing
global PARTITION;
global COUNTS;
global SUMCOUNTS;

clusterIndex = [1:npops]';
popSize=zeros(1,npops);
for i=1:npops
    popSize(i)=length(find(PARTITION==i));
end
miniPops = find(popSize<alaraja);

if length(miniPops)==0
    return;
end

outliers = [];
for pop = miniPops
    inds = find(PARTITION==pop);
    disp('Removed individuals: ');
    disp(num2str(inds));
    outliers = [outliers; inds];
    clusterIndex(pop) = 0;
end

ninds = length(PARTITION);
PARTITION(outliers) = 0;
korit = unique(PARTITION(find(PARTITION>0)));
for n=1:length(korit)
    kori = korit(n);
    yksilot = find(PARTITION==kori);
    PARTITION(yksilot) = n;
    clusterIndex(kori) = n;
end
COUNTS(:,:,miniPops) = [];
SUMCOUNTS(miniPops,:) = [];

npops = npops-length(miniPops);

%------------------------------------------------------------------------

function clearGlobalVars

global COUNTS; COUNTS = [];
global SUMCOUNTS; SUMCOUNTS = [];
global PARTITION; PARTITION = [];
global POP_LOGML; POP_LOGML = [];

%--------------------------------------------------------


function allFreqs = computeAllFreqs2(noalle)
% Lis<69><73> a priori jokaista alleelia
% joka populaation joka lokukseen j 1/noalle(j) verran.

global COUNTS;
global SUMCOUNTS;

max_noalle = size(COUNTS,1);
nloci = size(COUNTS,2);
npops = size(COUNTS,3);

sumCounts = SUMCOUNTS+ones(size(SUMCOUNTS));
sumCounts = reshape(sumCounts', [1, nloci, npops]);
sumCounts = repmat(sumCounts, [max_noalle, 1 1]);

prioriAlleelit = zeros(max_noalle,nloci);
for j=1:nloci
    prioriAlleelit(1:noalle(j),j) = 1/noalle(j);
end
prioriAlleelit = repmat(prioriAlleelit, [1,1,npops]);
counts = COUNTS + prioriAlleelit;
allFreqs = counts./sumCounts;


function allfreqs = simulateAllFreqs(noalle)
% Lis<69><73> jokaista alleelia joka populaation joka lokukseen j 1/noalle(j)
% verran. N<>in saatuja counts:eja vastaavista Dirichlet-jakaumista
% simuloidaan arvot populaatioiden alleelifrekvensseille.

global COUNTS;

max_noalle = size(COUNTS,1);
nloci = size(COUNTS,2);
npops = size(COUNTS,3);

prioriAlleelit = zeros(max_noalle,nloci);
for j=1:nloci
    prioriAlleelit(1:noalle(j),j) = 1/noalle(j);
end
prioriAlleelit = repmat(prioriAlleelit, [1,1,npops]);
counts = COUNTS + prioriAlleelit;
allfreqs = zeros(size(counts));

for i=1:npops
    for j=1:nloci
        simuloidut = randdir(counts(1:noalle(j),j,i) , noalle(j));
        allfreqs(1:noalle(j),j,i) = simuloidut;
    end
end

%--------------------------------------------------------------------------


function refData = simulateIndividuals(n,rowsFromInd,allfreqs,pop, missing_level)
% simulate n individuals from population pop, such that approximately
% proportion "missing_level" of the alleles are present.

nloci = size(allfreqs,2);

refData = zeros(n*rowsFromInd,nloci);
counter = 1;  % which row will be generated next.

for ind = 1:n
    for loc = 1:nloci
        for k=0:rowsFromInd-1
            if rand<missing_level
                refData(counter+k,loc) = simuloiAlleeli(allfreqs,pop,loc);
            else
                refData(counter+k,loc) = -999;
            end
        end
    end
    counter = counter+rowsFromInd;
end

function all = simuloiAlleeli(allfreqs,pop,loc)
% Simuloi populaation pop lokukseen loc alleelin.
freqs = allfreqs(:,loc,pop);
cumsumma = cumsum(freqs);
arvo = rand;
isommat = find(cumsumma>arvo);
all = min(isommat);


%--------------------------------------------------------------------------


function omaFreqs = computePersonalAllFreqs(ind, data, allFreqs, rowsFromInd)
% Laskee npops*(rowsFromInd*nloci) taulukon, jonka kutakin saraketta
% vastaa yksil<69>n ind alleeli. Eri rivit ovat alleelin alkuper<65>frekvenssit
% eri populaatioissa. Jos yksil<69>lt?puuttuu jokin alleeli, niin vastaavaan
% kohtaa tulee sarake ykk<6B>si?

global COUNTS;
nloci = size(COUNTS,2);
npops = size(COUNTS,3);

rows = data(computeRows(rowsFromInd, ind, 1),:);

omaFreqs = zeros(npops, (rowsFromInd*nloci));
pointer = 1;
for loc=1:size(rows,2)
    for all=1:size(rows,1)
        if rows(all,loc)>=0
            try,
            omaFreqs(:,pointer) = ...
                reshape(allFreqs(rows(all,loc),loc,:), [npops,1]);
            catch
                a=0;
            end
        else
            omaFreqs(:,pointer) = ones(npops,1);
        end
        pointer = pointer+1;
    end
end


%---------------------------------------------------------------------------


function loggis = computeIndLogml(omaFreqs, osuusTaulu)
% Palauttaa yksil<69>n logml:n, kun oletetaan yksil<69>n alkuper<65>t
% m<><6D>ritellyiksi kuten osuusTaulu:ssa.

apu = repmat(osuusTaulu', [1 size(omaFreqs,2)]);
apu = apu .* omaFreqs;
apu = sum(apu);

apu = log(apu);

loggis = sum(apu);


%--------------------------------------------------------------------------


function osuusTaulu = suoritaMuutos(osuusTaulu, osuus, indeksi)
% P<>ivitt<74><74> osuusTaulun muutoksen j<>lkeen.

global COUNTS;
npops = size(COUNTS,3);

i1 = rem(indeksi,npops);
if i1==0, i1 = npops; end;
i2 = ceil(indeksi / npops);

osuusTaulu(i1) = osuusTaulu(i1)-osuus;
osuusTaulu(i2) = osuusTaulu(i2)+osuus;


%-------------------------------------------------------------------------


function [osuusTaulu, logml] = etsiParas(osuus, osuusTaulu, omaFreqs, logml)

ready = 0;
while ready ~= 1
    muutokset = laskeMuutokset4(osuus, osuusTaulu, omaFreqs, logml);
    [maxMuutos, indeksi] = max(muutokset(1:end));
    if maxMuutos>0
        osuusTaulu = suoritaMuutos(osuusTaulu, osuus, indeksi);
        logml = logml + maxMuutos;
    else
        ready = 1;
    end
end


%---------------------------------------------------------------------------


function muutokset = laskeMuutokset4(osuus, osuusTaulu, omaFreqs, logml)
% Palauttaa npops*npops taulun, jonka alkio (i,j) kertoo, mik?on
% muutos logml:ss? mik<69>li populaatiosta i siirret<65><74>n osuuden verran
% todenn<6E>k<EFBFBD>isyysmassaa populaatioon j. Mik<69>li populaatiossa i ei ole
% mit<69><74>n siirrett<74>v<EFBFBD><76>, on vastaavassa kohdassa rivi nollia.

global COUNTS;
npops = size(COUNTS,3);

notEmpty = find(osuusTaulu>0.005);
muutokset = zeros(npops);
empties = ~notEmpty;

for i1=notEmpty

    osuusTaulu(i1) = osuusTaulu(i1)-osuus;

    for i2 = [1:i1-1 i1+1:npops]
        osuusTaulu(i2) = osuusTaulu(i2)+osuus;
        loggis = computeIndLogml(omaFreqs, osuusTaulu);
        muutokset(i1,i2) = loggis-logml;
        osuusTaulu(i2) = osuusTaulu(i2)-osuus;
    end

    osuusTaulu(i1) = osuusTaulu(i1)+osuus;
end


function g=randga(a,b)
flag = 0;
if a>1
c1 = a-1; c2 = (a-(1/(6*a)))/c1; c3 = 2/c1; c4 = c3+2; c5 = 1/sqrt(a);
U1=-1;
while flag == 0,
if a<=2.5,
U1=rand;U2=rand;
else
while ~(U1>0 & U1<1),
U1=rand;U2=rand;
U1 = U2 + c5*(1-1.86*U1);
end %while
end %if
W = c2*U2/U1;
if c3*U1+W+(1/W)<=c4,
flag = 1;
g = c1*W/b;
elseif c3*log(U1)-log(W)+W<1,
flag = 1;
g = c1*W/b;
else
U1=-1;
end %if
end %while flag
elseif a==1
g=sum(-(1/b)*log(rand(a,1)));
else
while flag == 0,
U = rand(2,1);
if U(1)>exp(1)/(a+exp(1)),
g = -log(((a+exp(1))*(1-U(1)))/(a*exp(1)));
if U(2)<=g^(a-1),
flag = 1;
end %if
else
g = ((a+exp(1))*U(1)/((exp(1))^(1/a)));
if U(2)<=exp(-g),
flag = 1;
end %if
end %if
end %while flag
g=g/b;
end %if;


%-------------------------------------------------

function svar=randdir(counts,nc)
% K<>ytt<74>esim randdir([10;30;60],3)

svar=zeros(nc,1);
for i=1:nc
   svar(i,1)=randga(counts(i,1),1);
end
svar=svar/sum(svar);

%-------------------------------------------------------------------------------------


function rows = computeRows(rowsFromInd, inds, ninds)
% Individuals inds have been given. The function returns a vector,
% containing the indices of the rows, which contain data from the
% individuals.

rows = inds(:, ones(1,rowsFromInd));
rows = rows*rowsFromInd;
miinus = repmat(rowsFromInd-1 : -1 : 0, [ninds 1]);
rows = rows - miinus;
rows = reshape(rows', [1,rowsFromInd*ninds]);

%--------------------------------------------------------------------------
%-----

function str = ownNum2Str(number)

absolute = abs(number);

if absolute < 1000
    str = num2str(number);
elseif absolute < 10000000
    first_three = rem(number,1000);
    next_four = (number - first_three) /1000;
    first_three = abs(first_three);
    if first_three<10
        first_three = ['00' num2str(first_three)];
    elseif first_three<100
        first_three = ['0' num2str(first_three)];
    else
        first_three = num2str(first_three);
    end;
    str = [num2str(next_four) first_three];
elseif absolute < 100000000
    first_four = rem(number,10000);
    next_four = (number - first_four) /10000;
    first_four = abs(first_four);
    if first_four<10
        first_four = ['000' num2str(first_four)];
    elseif first_four<100
        first_four = ['00' num2str(first_four)];
    elseif first_four<1000
        first_four = ['0' num2str(first_four)];
    else
        first_four = num2str(first_four);
    end;
    str = [num2str(next_four) first_four];
else
    str = num2str(number);
end;

%------------------------------------------------


function part = learn_partition_modified(ordered)
% This function is called only if some individual has less than 90 per cent
% non-missing data. The function uses fuzzy clustering for the "non-missingness"
% values, finding maximum three clusters. If two of the found clusters are such
% that all the values are >0.9, then those two are further combined.

part = learn_simple_partition(ordered,0.05);
nclust = length(unique(part));
if nclust==3
    mini_1 = min(ordered(find(part==1)));
    mini_2 = min(ordered(find(part==2)));
    mini_3 = min(ordered(find(part==3)));

    if mini_1>0.9 & mini_2>0.9
        part(find(part==2)) = 1;
        part(find(part==3)) = 2;

    elseif mini_1>0.9 & mini_3>0.9
        part(find(part==3)) = 1;

    elseif mini_2>0.9 & mini_3>0.9
        % This is the one happening in practice, since the values are
        % ordered, leading to mini_1 <= mini_2 <= mini_3
        part(find(part==3)) = 2;
    end
end