Added bare-bones greedyMix

This commit is contained in:
Waldir Leoncio 2020-05-20 15:34:40 +02:00
parent 3962c8c0e8
commit b622375062
2 changed files with 287 additions and 237 deletions

View file

@ -8,6 +8,7 @@ export(computeIndLogml)
export(computePersonalAllFreqs) export(computePersonalAllFreqs)
export(computeRows) export(computeRows)
export(etsiParas) export(etsiParas)
export(greedyMix)
export(inputdlg) export(inputdlg)
export(isfield) export(isfield)
export(laskeMuutokset4) export(laskeMuutokset4)

View file

@ -1,7 +1,10 @@
# function greedyMix(tietue) #' @title Clustering of individuals
#' @param tietue Record
# % check whether fixed k mode is selected #' @export
# h0 = findobj('Tag','fixk_menu'); greedyMix <- function(tietue) {
# ASK: graphical components. Remove?
# check whether fixed k mode is selected
# h0 <- findobj('Tag','fixk_menu')
# fixedK = get(h0, 'userdata'); # fixedK = get(h0, 'userdata');
# if fixedK # if fixedK
@ -14,255 +17,301 @@
# h1 = findobj('Tag','partitioncompare_menu'); # h1 = findobj('Tag','partitioncompare_menu');
# partitionCompare = get(h1, 'userdata'); # partitionCompare = get(h1, 'userdata');
if (identical(tietue, -1)) {
input_type <- inputdlg(
paste(
'Specify the format of your data:\n',
'1) BAPS-format\n',
'2) GenePop-format\n',
'3) Preprocessed data\n'
)
)
# Converting from number into name
input_type_name <- switch(
input_type, 'BAPS-format', 'GenePop-format', 'Preprocessed data'
)
if (length(input_type_name) == 0) {
stop('Invalid alternative')
} else if (input_type_name == 'BAPS-format') {
pathname_filename <- uigetfile("*.txt", "Loaddata in BAPS-format")
# if isequal(tietue,-1) # ASK: remove?
# input_type = questdlg('Specify the format of your data: ',...
# 'Specify Data Format', ...
# 'BAPS-format', 'GenePop-format', 'Preprocessed data', ...
# 'BAPS-format');
# switch input_type
# case 'BAPS-format'
# waitALittle;
# [filename, pathname] = uigetfile('*.txt', 'Load data in BAPS-format');
# if filename==0
# return;
# end
# if ~isempty(partitionCompare) # if ~isempty(partitionCompare)
# fprintf(1,'Data: %s\n',[pathname filename]); # fprintf(1,'Data: %s\n',[pathname filename]);
# end # end
# data = load([pathname filename]); data <- load(pathname_filename)
# ninds = testaaOnkoKunnollinenBapsData(data); %TESTAUS # ninds <- testaaOnkoKunnollinenBapsData(data) #TESTAUS # TODO: trans
# if (ninds==0) if (ninds == 0) stop('Incorrect Data-file')
# disp('Incorrect Data-file.');
# return; # ASK: remove?
# end
# h0 = findobj('Tag','filename1_text'); # h0 = findobj('Tag','filename1_text');
# set(h0,'String',filename); clear h0; # set(h0,'String',filename); clear h0;
cat(
'When using data which are in BAPS-format,',
'you can specify the sampling populations of the',
'individuals by giving two additional files:',
'one containing the names of the populations,',
'the other containing the indices of the first',
'individuals of the populations.'
)
input_pops <- inputdlg(
prompt = 'Do you wish to specify the sampling populations? [y/N]',
definput = 'N'
)
if (tolower(input_pops) %in% c('yes', 'y')) {
popfile <- uigetfile('*.txt', 'Load population names')
kysyToinen <- ifelse(popfile$name == 0, 0, 1)
if (kysyToinen == 1) {
indicesfile <- uigetfile('*.txt', 'Load population indices')
if (indicesfile == 0) {
popnames = ""
} else {
# popnames = initPopNames([namepath namefile],[indicespath indicesfile]) # TODO: translate this fun
}
} else {
popnames <- ""
}
} else {
popnames <- ""
}
# input_pops = questdlg(['When using data which are in BAPS-format, '... # [data, rowsFromInd, alleleCodes, noalle, adjprior, priorTerm] = handleData(data); # TODO: translate this function
# 'you can specify the sampling populations of the individuals by '... # [Z,dist] = newGetDistances(data,rowsFromInd); # TODO: translate
# 'giving two additional files: one containing the names of the '...
# 'populations, the other containing the indices of the first '...
# 'individuals of the populations. Do you wish to specify the '...
# 'sampling populations?'], ...
# 'Specify sampling populations?',...
# 'Yes', 'No', 'No');
# if isequal(input_pops,'Yes')
# waitALittle;
# [namefile, namepath] = uigetfile('*.txt', 'Load population names');
# if namefile==0
# kysyToinen = 0;
# else
# kysyToinen = 1;
# end
# if kysyToinen==1
# waitALittle;
# [indicesfile, indicespath] = uigetfile('*.txt', 'Load population indices');
# if indicesfile==0
# popnames = [];
# else
# popnames = initPopNames([namepath namefile],[indicespath indicesfile]);
# end
# else
# popnames = [];
# end
# else
# popnames = [];
# end
# [data, rowsFromInd, alleleCodes, noalle, adjprior, priorTerm] = handleData(data); save_preproc <- questdlg(
# [Z,dist] = newGetDistances(data,rowsFromInd); quest = 'Do you wish to save pre-processed data?',
dlgtitle = 'Save pre-processed data?',
defbtn = 'y'
)
if (save_preproc %in% c('y', 'yes')) {
file_out <- uiputfile('.rda','Save pre-processed data as')
kokonimi <- paste0(file_out$path, file_out$name)
c <- list()
c$data <- data
c$rowsFromInd <- rowsFromInd
c$alleleCodes <- alleleCodes
c$noalle <- noalle
c$adjprior <- adjprior
c$priorTerm <- priorTerm
c$dist <- dist
c$popnames <- popnames
c$Z <- Z
save(c, file = kokonimi)
rm(c)
}
} else if (input_type_name == 'GenePop-format') {
filename_pathname <- uigetfile(
filter = '*.txt',
title = 'Load data in GenePop-format'
)
if (filename_pathname$name == 0) stop("No name provided")
# save_preproc = questdlg('Do you wish to save pre-processed data?',... # ASK: remove?
# 'Save pre-processed data?',...
# 'Yes','No','Yes');
# if isequal(save_preproc,'Yes');
# waitALittle;
# [filename, pathname] = uiputfile('*.mat','Save pre-processed data as');
# kokonimi = [pathname filename];
# c.data = data; c.rowsFromInd = rowsFromInd; c.alleleCodes = alleleCodes;
# c.noalle = noalle; c.adjprior = adjprior; c.priorTerm = priorTerm;
# c.dist = dist; c.popnames = popnames; c.Z = Z;
# % save(kokonimi,'c');
# save(kokonimi,'c','-v7.3'); % added by Lu Cheng, 08.06.2012
# clear c;
# end;
# case 'GenePop-format'
# waitALittle;
# [filename, pathname] = uigetfile('*.txt', 'Load data in GenePop-format');
# if filename==0
# return;
# end
# if ~isempty(partitionCompare) # if ~isempty(partitionCompare)
# fprintf(1,'Data: %s\n',[pathname filename]); # fprintf(1,'Data: %s\n',[pathname filename]);
# end # end
# kunnossa = testaaGenePopData([pathname filename]);
# if kunnossa==0 # kunnossa = testaaGenePopData([pathname filename]); # TODO: trans
# return # if (kunnossa == 0) stop("testaaGenePopData returned 0")
# end # [data,popnames]=lueGenePopData([pathname filename]); # TODO: trans
# [data,popnames]=lueGenePopData([pathname filename]);
# h0 = findobj('Tag','filename1_text'); # h0 = findobj('Tag','filename1_text');
# set(h0,'String',filename); clear h0; # set(h0,'String',filename); clear h0;
# [data, rowsFromInd, alleleCodes, noalle, adjprior, priorTerm] = handleData(data); # [data, rowsFromInd, alleleCodes, noalle, adjprior, priorTerm] = handleData(data); # TODO:trans
# [Z,dist] = newGetDistances(data,rowsFromInd); # [Z,dist] = newGetDistances(data,rowsFromInd); # TODO: trans
# save_preproc = questdlg('Do you wish to save pre-processed data?',... save_preproc <- questdlg(
# 'Save pre-processed data?',... quest = 'Do you wish to save pre-processed data?',
# 'Yes','No','Yes'); dlgtitle = 'Save pre-processed data?',
# if isequal(save_preproc,'Yes'); defbtn = 'y'
# waitALittle; )
# [filename, pathname] = uiputfile('*.mat','Save pre-processed data as'); if (save_preproc %in% c('y', 'Yes')) {
# kokonimi = [pathname filename]; file_out <- uiputfile('.rda','Save pre-processed data as')
# c.data = data; c.rowsFromInd = rowsFromInd; c.alleleCodes = alleleCodes; kokonimi <- paste0(file_out$path, file_out$name)
# c.noalle = noalle; c.adjprior = adjprior; c.priorTerm = priorTerm; c$data <- data
# c.dist = dist; c.popnames = popnames; c.Z = Z; c$rowsFromInd <- rowsFromInd
# % save(kokonimi,'c'); c$alleleCodes <- alleleCodes
# save(kokonimi,'c','-v7.3'); % added by Lu Cheng, 08.06.2012 c$noalle <- noalle
# clear c; c$adjprior <- adjprior
# end; c$priorTerm <- priorTerm
c$dist <- dist
c$popnames <- popnames
c$Z <- Z
save(c, file = kokonimi)
rm(c)
}
} else if (input_type_name == 'Preprocessed data') {
file_in <- uigetfile(
filter = '*.txt',
title = 'Load pre-processed data in GenePop-format'
)
if (file_in$name == 0) stop("No name provided")
# case 'Preprocessed data' # ASK: remove?
# waitALittle;
# [filename, pathname] = uigetfile('*.mat', 'Load pre-processed data');
# if filename==0
# return;
# end
# h0 = findobj('Tag','filename1_text'); # h0 = findobj('Tag','filename1_text');
# set(h0,'String',filename); clear h0; # set(h0,'String',filename); clear h0;
# if ~isempty(partitionCompare) # if ~isempty(partitionCompare)
# fprintf(1,'Data: %s\n',[pathname filename]); # fprintf(1,'Data: %s\n',[pathname filename]);
# end # end
# struct_array = load([pathname filename]); struct_array <- readRDS(paste0(file_in$path, file_in$name))
# if isfield(struct_array,'c') %Matlab versio if (isfield(struct_array,'c')) { # Matlab versio
# c = struct_array.c; c <- struct_array$c
# if ~isfield(c,'dist') if (!isfield(c,'dist')) stop('Incorrect file format')
# disp('Incorrect file format'); } else if (isfield(struct_array,'dist')) { #Mideva versio
# return c <- struct_array
# end } else {
# elseif isfield(struct_array,'dist') %Mideva versio stop('Incorrect file format')
# c = struct_array; }
# else data <- double(c$data)
# disp('Incorrect file format'); rowsFromInd <- c$rowsFromInd
# return; alleleCodes <- c$alleleCodes
# end noalle <- c$noalle
# data = double(c.data); rowsFromInd = c.rowsFromInd; alleleCodes = c.alleleCodes; adjprior <- c$adjprior
# noalle = c.noalle; adjprior = c.adjprior; priorTerm = c.priorTerm; priorTerm <- c$priorTerm
# dist = c.dist; popnames = c.popnames; Z = c.Z; dist <- c$dist
# clear c; popnames <- c$popnames
# otherwise Z <- c$Z
# return rm(c)
# end }
} else {
data <- double(tietue$data)
rowsFromInd <- tietue$rowsFromInd
alleleCodes <- tietue$alleleCodes
noalle <- tietue$noalle
adjprior <- tietue$adjprior
priorTerm <- tietue$priorTerm
dist <- tietue$dist
popnames <- tietue$popnames
Z <- tietue$Z
rm(tietue)
}
# else # ==========================================================================
# data = double(tietue.data); rowsFromInd = tietue.rowsFromInd; alleleCodes = tietue.alleleCodes; # Declaring global variables
# noalle = tietue.noalle; adjprior = tietue.adjprior; priorTerm = tietue.priorTerm; # ==========================================================================
# dist = tietue.dist; popnames = tietue.popnames; Z = tietue.Z; PARTITION <- vector()
# clear tietue; COUNTS <- vector()
# end SUMCOUNTS <- vector()
POP_LOGML <- vector()
clearGlobalVars <- vector()
# ==========================================================================
c$data <- data
c$noalle <- noalle
c$adjprior <- adjprior
c$priorTerm <- priorTerm
c$dist <- dist
c$Z <- Z
c$rowsFromInd <- rowsFromInd
# global PARTITION; global COUNTS; ninds <- length(unique(data[, ncol(data)]))
# global SUMCOUNTS; global POP_LOGML; ekat <- t(seq(1, ninds, rowsFromInd) * rowsFromInd)
# clearGlobalVars; c$rows <- c(ekat, ekat + rowsFromInd - 1)
# c.data=data; # partition compare
# c.noalle = noalle; c.adjprior = adjprior; c.priorTerm = priorTerm; if (!is.null(partitionCompare)) {
# c.dist=dist; c.Z=Z; c.rowsFromInd = rowsFromInd; nsamplingunits <- size(c$rows, 1)
partitions <- partitionCompare$partitions
npartitions <- size(partitions, 2)
partitionLogml <- zeros(1, npartitions)
for (i in seq_len(npartitions)) {
# number of unique partition lables
npops <- length(unique(partitions[, i]))
# ninds = length(unique(data(:,end))); partitionInd <- zeros(ninds * rowsFromInd, 1)
# ekat = (1:rowsFromInd:ninds*rowsFromInd)'; partitionSample <- partitions[, i]
# c.rows = [ekat ekat+rowsFromInd-1]; for (j in seq_len(nsamplingunits)) {
partitionInd[c$rows[j, 1]:c$rows[j, 2]] <- partitionSample[j]
}
# partitionLogml[i] = initialCounts(
# partitionInd,
# data[, seq_len(end - 1)],
# npops,
# c$rows,
# noalle,
# adjprior
# ) #TODO translate
}
# return the logml result
partitionCompare$logmls <- partitionLogml
# set(h1, 'userdata', partitionCompare) # ASK remove?
return()
}
# % partition compare # ASK remove (graphical part)?
# if ~isempty(partitionCompare) # if (fixedK) {
# nsamplingunits = size(c.rows,1); # #logml_npops_partitionSummary <- indMix_fixK(c) # TODO translate?
# partitions = partitionCompare.partitions; # } else {
# npartitions = size(partitions,2); # #logml_npops_partitionSummary <- indMix(c) # TODO translate?
# partitionLogml = zeros(1,npartitions); # }
# for i = 1:npartitions # if (logml_npops_partitionSummary$logml == 1) return()
# % number of unique partition lables
# npops = length(unique(partitions(:,i)));
# partitionInd = zeros(ninds*rowsFromInd,1); data <- data[, seq_len(ncol(data) - 1)]
# partitionSample = partitions(:,i);
# for j = 1:nsamplingunits
# partitionInd([c.rows(j,1):c.rows(j,2)]) = partitionSample(j);
# end
# partitionLogml(i) = ...
# initialCounts(partitionInd, data(:,1:end-1), npops, c.rows, noalle, adjprior);
# end
# % return the logml result
# partitionCompare.logmls = partitionLogml;
# set(h1, 'userdata', partitionCompare);
# return
# end
# if fixedK
# [logml, npops, partitionSummary]=indMix_fixK(c);
# else
# [logml, npops, partitionSummary]=indMix(c);
# end
# if logml==1
# return
# end
# data = data(:,1:end-1);
# ASK: remove?
# h0 = findobj('Tag','filename1_text'); inp = get(h0,'String'); # h0 = findobj('Tag','filename1_text'); inp = get(h0,'String');
# h0 = findobj('Tag','filename2_text'); # h0 = findobj('Tag','filename2_text');
# outp = get(h0,'String'); # outp = get(h0,'String');
# changesInLogml = writeMixtureInfo(logml, rowsFromInd, data, adjprior, priorTerm, ...
# outp,inp,partitionSummary, popnames, fixedK);
# viewMixPartition(PARTITION, popnames); # changesInLogml <- writeMixtureInfo(
# logml, rowsFromInd, data, adjprior, priorTerm, outp, inp,
# popnames, fixedK
# ) # TODO translate
# talle = questdlg(['Do you want to save the mixture populations ' ... # viewMixPartition(PARTITION, popnames) # TODO translate function
# 'so that you can use them later in admixture analysis?'], ...
# 'Save results?','Yes','No','Yes');
# if isequal(talle,'Yes')
# waitALittle;
# [filename, pathname] = uiputfile('*.mat','Save results as');
# % -------------------------------------------
# % Added by Jing, 26.12.2005
# if (sum(filename)==0) || (sum(pathname)==0)
# % Cancel was pressed
# return;
# else
# % copy 'baps4_output.baps' into the text file with the same name.
# if exist('baps4_output.baps','file')
# copyfile('baps4_output.baps',[pathname filename '.txt'])
# delete('baps4_output.baps')
# end
# end;
# % -------------------------------------------
# c.PARTITION = PARTITION; c.COUNTS = COUNTS; c.SUMCOUNTS = SUMCOUNTS;
# c.alleleCodes = alleleCodes; c.adjprior = adjprior; c.popnames = popnames;
# c.rowsFromInd = rowsFromInd; c.data = data; c.npops = npops;
# c.noalle = noalle; c.mixtureType = 'mix';
# c.logml = logml; c.changesInLogml = changesInLogml;
# % save([pathname filename], 'c');
# save([pathname filename], 'c', '-v7.3'); % added by Lu Cheng, 08.06.2012
# else
# if exist('baps4_output.baps','file')
# delete('baps4_output.baps')
# end
# end
talle <- questdlg(
quest = paste(
'Do you want to save the mixture populations',
'so that you can use them later in admixture analysis?'
),
dlgtitle = 'Save results?',
defbtn = 'y'
)
if (talle %in% c('Yes', 'y')) {
filename_pathname <- uiputfile('.mat','Save results as')
# ==========================================================================
cond <- (sum(filename_pathname$name) == 0) |
(sum(filename_pathname$path) == 0)
if (cond) {
# Cancel was pressed
return()
} else {
# copy 'baps4_output.baps' into the text file with the same name.
if (file.exists('baps4_output.baps')) {
file.copy(
from = 'baps4_output.baps',
to = paste0(
filename_pathname$path, filename_pathname$name, '.txt'
)
)
file.remove('baps4_output.baps')
}
}
# ==========================================================================
c$PARTITION <- PARTITION
c$COUNTS <- COUNTS
c$SUMCOUNTS <- SUMCOUNTS
c$alleleCodes <- alleleCodes
c$adjprior <- adjprior
c$popnames <- popnames
c$rowsFromInd <- rowsFromInd
c$data <- data
c$npops <- npops
c$noalle <- noalle
c$mixtureType <- 'mix'
c$logml <- logml
c$changesInLogml <- changesInLogml
save(c, file = paste0(filename_pathname$path, filename_pathname$name))
} else {
if (file.exists('baps4_output.baps')) file.remove('baps4_output.baps')
}
}
# %------------------------------------------------------------------------------------- # %-------------------------------------------------------------------------------------
# %------------------------------------------------------------------------------------- # %-------------------------------------------------------------------------------------