ourMELONS/R/admix1.R

#' @title Admixture analysis
#' @param tietue record
#' @details If the record == -1, the mixture results file is loaded. Otherwise, will the required variables be retrieved from the record fields?
#' @export
admix1 <- function(tietue) {
    if (!is.list(tietue)) {
#         c(filename, pathname) = uigetfile('*.mat', 'Load mixture result file');
#         if (filename==0 & pathname==0), return;
#         else
#             disp('---------------------------------------------------');
#             disp(['Reading mixture result from: ',[pathname filename],'...']);
#         end
#         pause(0.0001);
#         h0 = findobj('Tag','filename1_text');
#         set(h0,'String',filename); clear h0;

#         struct_array = load([pathname filename]);
#         if isfield(struct_array,'c')  #Matlab versio
#             c = struct_array.c;
#             if ~isfield(c,'PARTITION') | ~isfield(c,'rowsFromInd')
#                 disp('Incorrect file format');
#                 return
#             end
#         elseif isfield(struct_array,'PARTITION')  #Mideva versio
#             c = struct_array;
#             if ~isfield(c,'rowsFromInd')
#                 disp('Incorrect file format');
#                 return
#             end
#         else
#             disp('Incorrect file format');
#             return;
#         end

#         if isfield(c, 'gene_lengths') && ...
#                 (strcmp(c.mixtureType,'linear_mix') | ...
#                 strcmp(c.mixtureType,'codon_mix')) # if the mixture is from a linkage model
#             # Redirect the call to the linkage admixture function.
#             c.data = noIndex(c.data,c.noalle); # call function noindex to remove the index column
#             linkage_admix(c);
#             return
#         end

#         PARTITION = c.PARTITION; COUNTS = c.COUNTS; SUMCOUNTS = c.SUMCOUNTS;
#         alleleCodes = c.alleleCodes; adjprior = c.adjprior; popnames = c.popnames;
#         rowsFromInd = c.rowsFromInd; data = c.data; npops = c.npops; noalle = c.noalle;
    } else {
        PARTITION <- tietue$PARTITION
        COUNTS <- tietue$COUNTS
        SUMCOUNTS <- tietue$SUMCOUNTS
        alleleCodes <- tietue$alleleCodes
        adjprior <- tietue$adjprior
        popnames <- tietue$popnames
        rowsFromInd <- tietue$rowsFromInd
        data <- as.double(tietue$data)
        npops <- tietue$npops
        noalle <- tietue$noalle
    }

#     answers = inputdlg({['Input the minimum size of a population that will'...
#                 ' be taken into account when admixture is estimated.']},...
#                 'Input minimum population size',[1],...
#                 {'5'});
#     if isempty(answers) return; end
#     alaRaja = str2num(answers{1,1});
#     [npops] = poistaLiianPienet(npops, rowsFromInd, alaRaja);

#     nloci = size(COUNTS,2);
#     ninds = size(data,1)/rowsFromInd;

#     answers = inputdlg({['Input number of iterations']},'Input',[1],{'50'});
#     if isempty(answers) return; end
#     iterationCount = str2num(answers{1,1});

#     answers = inputdlg({['Input number of reference individuals from each population']},'Input',[1],{'50'});
#     if isempty(answers) nrefIndsInPop = 50;
#     else nrefIndsInPop = str2num(answers{1,1});
#     end

#     answers = inputdlg({['Input number of iterations for reference individuals']},'Input',[1],{'10'});
#     if isempty(answers) return; end
#     iterationCountRef = str2num(answers{1,1});


#     # First calculate log-likelihood ratio for all individuals:
#     likelihood = zeros(ninds,1);
#     allfreqs = computeAllFreqs2(noalle);
#     for ind = 1:ninds
#         omaFreqs = computePersonalAllFreqs(ind, data, allfreqs, rowsFromInd);
#         osuusTaulu = zeros(1,npops);
#         if PARTITION(ind)==0
#             # Yksil?on outlier
#         elseif PARTITION(ind)~=0
#             if PARTITION(ind)>0
#                 osuusTaulu(PARTITION(ind)) = 1;
#             else
#                 # Yksilöt, joita ei ole sijoitettu mihinkään koriin.
#                 arvot = zeros(1,npops);
#                 for q=1:npops
#                     osuusTaulu = zeros(1,npops);
#                     osuusTaulu(q) = 1;
#                     arvot(q) = computeIndLogml(omaFreqs, osuusTaulu);
#                 end
#                 [iso_arvo, isoimman_indeksi] = max(arvot);
#                 osuusTaulu = zeros(1,npops);
#                 osuusTaulu(isoimman_indeksi) = 1;
#                 PARTITION(ind)=isoimman_indeksi;
#             end
#             logml = computeIndLogml(omaFreqs, osuusTaulu);
#             logmlAlku = logml;
#             for osuus = [0.5 0.25 0.05 0.01]
#                 [osuusTaulu, logml] = etsiParas(osuus, osuusTaulu, omaFreqs, logml);
#             end
#             logmlLoppu = logml;
#             likelihood(ind) = logmlLoppu-logmlAlku;
#         end
#     end

#     # Analyze further only individuals who have log-likelihood ratio larger than 3:
#     to_investigate = (find(likelihood>3))';
#     disp('Possibly admixed individuals: ');
#     for i = 1:length(to_investigate)
#         disp(num2str(to_investigate(i)));
#     end
#     disp(' ');
#     disp('Populations for possibly admixed individuals: ');
#     admix_populaatiot = unique(PARTITION(to_investigate));
#     for i = 1:length(admix_populaatiot)
#         disp(num2str(admix_populaatiot(i)));
#     end

#     # THUS, there are two types of individuals, who will not be analyzed with
#     # simulated allele frequencies: those who belonged to a mini-population
#     # which was removed, and those who have log-likelihood ratio less than 3.
#     # The value in the PARTITION for the first kind of individuals is 0. The
#     # second kind of individuals can be identified, because they do not
#     # belong to "to_investigate" array. When the results are presented, the
#     # first kind of individuals are omitted completely, while the second kind
#     # of individuals are completely put to the population, where they ended up
#     # in the mixture analysis. These second type of individuals will have a
#     # unit p-value.


#     # Simulate allele frequencies a given number of times and save the average
#     # result to "proportionsIt" array.

#     proportionsIt = zeros(ninds,npops);
#     for iterationNum = 1:iterationCount
#         disp(['Iter: ' num2str(iterationNum)]);
#         allfreqs = simulateAllFreqs(noalle);   # Allele frequencies on this iteration.

#         for ind=to_investigate
#             #disp(num2str(ind));
#             omaFreqs = computePersonalAllFreqs(ind, data, allfreqs, rowsFromInd);
#             osuusTaulu = zeros(1,npops);
#             if PARTITION(ind)==0
#                 # Yksil?on outlier
#             elseif PARTITION(ind)~=0
#                 if PARTITION(ind)>0
#                     osuusTaulu(PARTITION(ind)) = 1;
#                 else
#                     # Yksilöt, joita ei ole sijoitettu mihinkään koriin.
#                     arvot = zeros(1,npops);
#                     for q=1:npops
#                         osuusTaulu = zeros(1,npops);
#                         osuusTaulu(q) = 1;
#                         arvot(q) = computeIndLogml(omaFreqs, osuusTaulu);
#                     end
#                     [iso_arvo, isoimman_indeksi] = max(arvot);
#                     osuusTaulu = zeros(1,npops);
#                     osuusTaulu(isoimman_indeksi) = 1;
#                     PARTITION(ind)=isoimman_indeksi;
#                 end
#                 logml = computeIndLogml(omaFreqs, osuusTaulu);

#                 for osuus = [0.5 0.25 0.05 0.01]
#                     [osuusTaulu, logml] = etsiParas(osuus, osuusTaulu, omaFreqs, logml);
#                 end
#             end
#             proportionsIt(ind,:) = proportionsIt(ind,:).*(iterationNum-1) + osuusTaulu;
#             proportionsIt(ind,:) = proportionsIt(ind,:)./iterationNum;
#         end
#     end

#     #disp(['Creating ' num2str(nrefIndsInPop) ' reference individuals from ']);
#     #disp('each population.');

#     #allfreqs = simulateAllFreqs(noalle);  # Simuloidaan alleelifrekvenssisetti
#     allfreqs = computeAllFreqs2(noalle); # Koitetaan tällaista.


#     # Initialize the data structures, which are required in taking the missing
#     # data into account:
#     n_missing_levels = zeros(npops,1);  # number of different levels of "missingness" in each pop (max 3).
#     missing_levels = zeros(npops,3);    # the mean values for different levels.
#     missing_level_partition = zeros(ninds,1); # level of each individual (one of the levels of its population).
#     for i=1:npops
#         inds = find(PARTITION==i);
#         # Proportions of non-missing data for the individuals:
#         non_missing_data = zeros(length(inds),1);
#         for j = 1:length(inds)
#             ind = inds(j);
#             non_missing_data(j) = length(find(data((ind-1)*rowsFromInd+1:ind*rowsFromInd,:)>0)) ./ (rowsFromInd*nloci);
#         end
#         if all(non_missing_data>0.9)
#             n_missing_levels(i) = 1;
#             missing_levels(i,1) = mean(non_missing_data);
#             missing_level_partition(inds) = 1;
#         else
#             [ordered, ordering] = sort(non_missing_data);
#             #part = learn_simple_partition(ordered, 0.05);
#             part = learn_partition_modified(ordered);
#             aux = sortrows([part ordering],2);
#             part = aux(:,1);
#             missing_level_partition(inds) = part;
#             n_levels = length(unique(part));
#             n_missing_levels(i) = n_levels;
#             for j=1:n_levels
#                 missing_levels(i,j) = mean(non_missing_data(find(part==j)));
#             end
#         end
#     end

#     # Create and analyse reference individuals for populations
#     # with potentially admixed individuals:
#     refTaulu = zeros(npops,100,3);
#     for pop = admix_populaatiot'

#         for level = 1:n_missing_levels(pop)

#             potential_inds_in_this_pop_and_level = ...
#                 find(PARTITION==pop & missing_level_partition==level &...
#                 likelihood>3);  # Potential admix individuals here.

#             if ~isempty(potential_inds_in_this_pop_and_level)

#                 #refData = simulateIndividuals(nrefIndsInPop,rowsFromInd,allfreqs);
#                 refData = simulateIndividuals(nrefIndsInPop, rowsFromInd, allfreqs, ...
#                     pop, missing_levels(pop,level));

#                 disp(['Analysing the reference individuals from pop ' num2str(pop) ' (level ' num2str(level) ').']);
#                 refProportions = zeros(nrefIndsInPop,npops);
#                 for iter = 1:iterationCountRef
#                     #disp(['Iter: ' num2str(iter)]);
#                     allfreqs = simulateAllFreqs(noalle);

#                     for ind = 1:nrefIndsInPop
#                         omaFreqs = computePersonalAllFreqs(ind, refData, allfreqs, rowsFromInd);
#                         osuusTaulu = zeros(1,npops);
#                         osuusTaulu(pop)=1;
#                         logml = computeIndLogml(omaFreqs, osuusTaulu);
#                         for osuus = [0.5 0.25 0.05 0.01]
#                             [osuusTaulu, logml] = etsiParas(osuus, osuusTaulu, omaFreqs, logml);
#                         end
#                         refProportions(ind,:) = refProportions(ind,:).*(iter-1) + osuusTaulu;
#                         refProportions(ind,:) = refProportions(ind,:)./iter;
#                     end
#                 end
#                 for ind = 1:nrefIndsInPop
#                     omanOsuus = refProportions(ind,pop);
#                     if round(omanOsuus*100)==0
#                         omanOsuus = 0.01;
#                     end
#                     if abs(omanOsuus)<1e-5
#                         omanOsuus = 0.01;
#                     end
#                     refTaulu(pop, round(omanOsuus*100),level) = refTaulu(pop, round(omanOsuus*100),level)+1;
#                 end
#             end
#         end
#     end

#     # Rounding of the results:
#     proportionsIt = proportionsIt.*100; proportionsIt = round(proportionsIt);
#     proportionsIt = proportionsIt./100;
#     for ind = 1:ninds
#         if ~any(to_investigate==ind)
#             if PARTITION(ind)>0
#                 proportionsIt(ind,PARTITION(ind))=1;
#             end
#         else
#             # In case of a rounding error, the sum is made equal to unity by
#             # fixing the largest value.
#             if (PARTITION(ind)>0) & (sum(proportionsIt(ind,:)) ~= 1)
#                 [isoin,indeksi] = max(proportionsIt(ind,:));
#                 erotus = sum(proportionsIt(ind,:))-1;
#                 proportionsIt(ind,indeksi) = isoin-erotus;
#             end
#         end
#     end

#     # Calculate p-value for each individual:
#     uskottavuus = zeros(ninds,1);
#     for ind = 1:ninds
#         pop = PARTITION(ind);
#         if pop==0 # Individual is outlier
#             uskottavuus(ind)=1;
#         elseif isempty(find(to_investigate==ind))
#             # Individual had log-likelihood ratio<3
#             uskottavuus(ind)=1;
#         else
#             omanOsuus = proportionsIt(ind,pop);
#             if abs(omanOsuus)<1e-5
#                 omanOsuus = 0.01;
#             end
#             if round(omanOsuus*100)==0
#                 omanOsuus = 0.01;
#             end
#             level = missing_level_partition(ind);
#             refPienempia = sum(refTaulu(pop, 1:round(100*omanOsuus), level));
#             uskottavuus(ind) = refPienempia / nrefIndsInPop;
#         end
#     end

#     tulostaAdmixtureTiedot(proportionsIt, uskottavuus, alaRaja, iterationCount);

#     viewPartition(proportionsIt, popnames);

#     talle = questdlg(['Do you want to save the admixture results?'], ...
#         'Save results?','Yes','No','Yes');
#     if isequal(talle,'Yes')
#         #waitALittle;
#         [filename, pathname] = uiputfile('*.mat','Save results as');


#         if (filename == 0) & (pathname == 0)
#             # Cancel was pressed
#             return
#         else # copy 'baps4_output.baps' into the text file with the same name.
#             if exist('baps4_output.baps','file')
#                 copyfile('baps4_output.baps',[pathname filename '.txt'])
#                 delete('baps4_output.baps')
#             end
#         end


#         if (~isstruct(tietue))
#             c.proportionsIt = proportionsIt;
#             c.pvalue = uskottavuus; # Added by Jing
#             c.mixtureType = 'admix'; # Jing
#             c.admixnpops = npops;
#     #         save([pathname filename], 'c');
#             save([pathname filename], 'c', '-v7.3'); # added by Lu Cheng, 08.06.2012
#         else
#             tietue.proportionsIt = proportionsIt;
#             tietue.pvalue = uskottavuus; # Added by Jing
#             tietue.mixtureType = 'admix';
#             tietue.admixnpops = npops;
#     #         save([pathname filename], 'tietue');
#             save([pathname filename], 'tietue', '-v7.3'); # added by Lu Cheng, 08.06.2012
#         end
#     end
}


# function [npops] = poistaLiianPienet(npops, rowsFromInd, alaraja)
# % Muokkaa tulokset muotoon, jossa outlier yksilöt on
# % poistettu. Tarkalleen ottaen poistaa ne populaatiot,
# % joissa on vähemmän kuin 'alaraja':n verran yksilöit?

# global PARTITION;
# global COUNTS;
# global SUMCOUNTS;

# popSize=zeros(1,npops);
# for i=1:npops
#     popSize(i)=length(find(PARTITION==i));
# end
# miniPops = find(popSize<alaraja);

# if length(miniPops)==0
#     return;
# end

# outliers = [];
# for pop = miniPops
#     inds = find(PARTITION==pop);
#     disp('Removed individuals: ');
#     disp(num2str(inds));
#     outliers = [outliers; inds];
# end

# ninds = length(PARTITION);
# PARTITION(outliers) = 0;
# korit = unique(PARTITION(find(PARTITION>0)));
# for n=1:length(korit)
#     kori = korit(n);
#     yksilot = find(PARTITION==kori);
#     PARTITION(yksilot) = n;
# end
# COUNTS(:,:,miniPops) = [];
# SUMCOUNTS(miniPops,:) = [];

# npops = npops-length(miniPops);

# %------------------------------------------------------------------------

# function clearGlobalVars

# global COUNTS; COUNTS = [];
# global SUMCOUNTS; SUMCOUNTS = [];
# global PARTITION; PARTITION = [];
# global POP_LOGML; POP_LOGML = [];

# %--------------------------------------------------------


# function allFreqs = computeAllFreqs2(noalle)
# % Lisää a priori jokaista alleelia
# % joka populaation joka lokukseen j 1/noalle(j) verran.

# global COUNTS;
# global SUMCOUNTS;

# max_noalle = size(COUNTS,1);
# nloci = size(COUNTS,2);
# npops = size(COUNTS,3);

# sumCounts = SUMCOUNTS+ones(size(SUMCOUNTS));
# sumCounts = reshape(sumCounts', [1, nloci, npops]);
# sumCounts = repmat(sumCounts, [max_noalle, 1 1]);

# prioriAlleelit = zeros(max_noalle,nloci);
# for j=1:nloci
#     prioriAlleelit(1:noalle(j),j) = 1/noalle(j);
# end
# prioriAlleelit = repmat(prioriAlleelit, [1,1,npops]);
# counts = COUNTS + prioriAlleelit;
# allFreqs = counts./sumCounts;


# function allfreqs = simulateAllFreqs(noalle)
# % Lisää jokaista alleelia joka populaation joka lokukseen j 1/noalle(j)
# % verran. Näin saatuja counts:eja vastaavista Dirichlet-jakaumista
# % simuloidaan arvot populaatioiden alleelifrekvensseille.

# global COUNTS;

# max_noalle = size(COUNTS,1);
# nloci = size(COUNTS,2);
# npops = size(COUNTS,3);

# prioriAlleelit = zeros(max_noalle,nloci);
# for j=1:nloci
#     prioriAlleelit(1:noalle(j),j) = 1/noalle(j);
# end
# prioriAlleelit = repmat(prioriAlleelit, [1,1,npops]);
# counts = COUNTS + prioriAlleelit;
# allfreqs = zeros(size(counts));

# for i=1:npops
#     for j=1:nloci
#         simuloidut = randdir(counts(1:noalle(j),j,i) , noalle(j));
#         allfreqs(1:noalle(j),j,i) = simuloidut;
#     end
# end

# %--------------------------------------------------------------------------


# function refData = simulateIndividuals(n,rowsFromInd,allfreqs,pop, missing_level)
# % simulate n individuals from population pop, such that approximately
# % proportion "missing_level" of the alleles are present.

# nloci = size(allfreqs,2);

# refData = zeros(n*rowsFromInd,nloci);
# counter = 1;  % which row will be generated next.

# for ind = 1:n
#     for loc = 1:nloci
#         for k=0:rowsFromInd-1
#             if rand<missing_level
#                 refData(counter+k,loc) = simuloiAlleeli(allfreqs,pop,loc);
#             else
#                 refData(counter+k,loc) = -999;
#             end
#         end
#     end
#     counter = counter+rowsFromInd;
# end