ourMELONS/R/greedyMix.R

384 lines
12 KiB
R
Raw Normal View History

2020-05-20 15:34:40 +02:00
#' @title Clustering of individuals
2020-06-24 11:48:23 +02:00
#' @param tietue File
#' @param format Format of the data ("BAPS", "GenePop" or "Preprocessed")
#' @param savePreProcessed Save the pre-processed data?
2020-06-24 11:48:23 +02:00
#' @param filePreProcessed Is the file already processed?
#' @importFrom utils read.delim
2020-05-20 15:34:40 +02:00
#' @export
greedyMix <- function(
tietue,
format = NULL,
savePreProcessed = NULL,
filePreProcessed = NULL
) {
2020-10-19 13:44:18 +02:00
# ASK: Unclear when fixedk == TRUE. Remove?
2020-05-20 15:34:40 +02:00
# check whether fixed k mode is selected
# h0 <- findobj('Tag','fixk_menu')
# fixedK = get(h0, 'userdata');
2020-10-19 13:44:18 +02:00
fixedK <- FALSE
2020-05-20 15:34:40 +02:00
# if fixedK
# if ~(fixKWarning == 1) % call function fixKWarning
# return
# end
# end
2020-10-19 13:44:18 +02:00
# ASK: ditto
2020-05-20 15:34:40 +02:00
# % check whether partition compare mode is selected
# h1 = findobj('Tag','partitioncompare_menu');
# partitionCompare = get(h1, 'userdata');
2020-10-19 13:44:18 +02:00
partitionCompare <- FALSE
2020-05-20 15:34:40 +02:00
if (is(tietue, "list") | is(tietue, "character")) {
# ----------------------------------------------------------------------
# Defining type of file
# ----------------------------------------------------------------------
if (is.null(format)) {
input_type <- inputdlg(
paste(
'Specify the format of your data:\n',
'1) BAPS-format\n',
'2) GenePop-format\n',
'3) Preprocessed data\n'
)
2020-05-20 15:34:40 +02:00
)
# Converting from number into name
input_type_name <- switch(
input_type, 'BAPS-format', 'GenePop-format', 'Preprocessed data'
)
} else {
input_type_name <- paste0(format, "-format")
}
2020-05-20 15:34:40 +02:00
if (length(input_type_name) == 0) {
stop('Invalid alternative')
} else if (input_type_name == 'BAPS-format') {
2020-06-24 12:37:38 +02:00
# ------------------------------------------------------------------
# Treating BAPS-formatted files
# ------------------------------------------------------------------
if (!is(tietue, "character")) {
2020-06-24 11:48:23 +02:00
pathname_filename <- uigetfile(
"*.txt", "Load data in BAPS-format"
)
} else {
pathname_filename <- tietue
}
2020-05-20 15:34:40 +02:00
# ASK: remove?
# if ~isempty(partitionCompare)
# fprintf(1,'Data: %s\n',[pathname filename]);
# end
2020-06-24 12:37:38 +02:00
data <- read.delim(pathname_filename) # TODO: discover delimiter
2020-06-24 11:49:16 +02:00
ninds <- testaaOnkoKunnollinenBapsData(data) # testing
if (ninds == 0) stop('Incorrect Data-file')
2020-05-20 15:34:40 +02:00
# ASK: remove?
# h0 = findobj('Tag','filename1_text');
# set(h0,'String',filename); clear h0;
cat(
'When using data which are in BAPS-format,',
'you can specify the sampling populations of the',
'individuals by giving two additional files:',
'one containing the names of the populations,',
'the other containing the indices of the first',
'individuals of the populations.'
)
input_pops <- inputdlg(
prompt = 'Do you wish to specify the sampling populations? [y/N]',
definput = 'N'
)
if (tolower(input_pops) %in% c('yes', 'y')) {
popfile <- uigetfile('*.txt', 'Load population names')
kysyToinen <- ifelse(popfile$name == 0, 0, 1)
if (kysyToinen == 1) {
indicesfile <- uigetfile('*.txt', 'Load population indices')
if (indicesfile == 0) {
popnames = ""
} else {
# popnames = initPopNames([namepath namefile],[indicespath indicesfile]) # TODO: translate this fun
}
} else {
popnames <- ""
}
} else {
popnames <- ""
}
# [data, rowsFromInd, alleleCodes, noalle, adjprior, priorTerm] = handleData(data); # TODO: translate this function
# [Z,dist] = newGetDistances(data,rowsFromInd); # TODO: translate
if (is.null(savePreProcessed)) {
save_preproc <- questdlg(
quest = 'Do you wish to save pre-processed data?',
dlgtitle = 'Save pre-processed data?',
defbtn = 'y'
)
} else {
save_preproc <- savePreProcessed
}
if (save_preproc %in% c('y', 'yes', TRUE)) {
2020-05-20 15:34:40 +02:00
file_out <- uiputfile('.rda','Save pre-processed data as')
kokonimi <- paste0(file_out$path, file_out$name)
c <- list()
c$data <- data
c$rowsFromInd <- rowsFromInd
c$alleleCodes <- alleleCodes
c$noalle <- noalle
c$adjprior <- adjprior
c$priorTerm <- priorTerm
c$dist <- dist
c$popnames <- popnames
c$Z <- Z
save(c, file = kokonimi)
rm(c)
}
} else if (input_type_name == 'GenePop-format') {
2020-06-24 12:37:38 +02:00
# ------------------------------------------------------------------
# Treating GenePop-formatted files
# ------------------------------------------------------------------
if (!is(tietue, "character")) {
filename_pathname <- uigetfile(
filter = '*.txt',
title = 'Load data in GenePop-format'
)
if (filename_pathname$name == 0) stop("No name provided")
} else {
filename_pathname <- tietue
}
2020-05-20 15:34:40 +02:00
# ASK: remove?
# if ~isempty(partitionCompare)
# fprintf(1,'Data: %s\n',[pathname filename]);
# end
2020-07-28 11:45:28 +02:00
kunnossa <- testaaGenePopData(filename_pathname)
if (kunnossa == 0) stop("testaaGenePopData returned 0")
data_popnames <- lueGenePopData(filename_pathname)
data <- data_popnames$data
popnames <- data_popnames$popnames
2020-05-20 15:34:40 +02:00
# h0 = findobj('Tag','filename1_text');
# set(h0,'String',filename); clear h0;
2020-07-31 14:05:40 +02:00
list_dranap <- handleData(data)
data <- list_dranap$newData
rowsFromInd <- list_dranap$rowsFromInd
alleleCodes <- list_dranap$alleleCodes
noalle <- list_dranap$noalle
adjprior <- list_dranap$adjprior
priorTerm <- list_dranap$prioterm
list_Zd <- newGetDistances(data,rowsFromInd) # FIXME: debug
2020-07-31 14:00:29 +02:00
Z <- list_Zd$Z
dist <- list_Zd$dist
if (is.null(savePreProcessed)) {
save_preproc <- questdlg(
quest = 'Do you wish to save pre-processed data?',
dlgtitle = 'Save pre-processed data?',
defbtn = 'y'
)
} else {
save_preproc <- savePreProcessed
}
if (save_preproc %in% c('y', 'Yes', TRUE)) {
2020-05-20 15:34:40 +02:00
file_out <- uiputfile('.rda','Save pre-processed data as')
kokonimi <- paste0(file_out$path, file_out$name)
# FIXME: translate functions above so the objects below exist
2020-05-20 15:34:40 +02:00
c$data <- data
c$rowsFromInd <- rowsFromInd
c$alleleCodes <- alleleCodes
c$noalle <- noalle
c$adjprior <- adjprior
c$priorTerm <- priorTerm
c$dist <- dist
c$popnames <- popnames
c$Z <- Z
save(c, file = kokonimi)
rm(c)
}
} else if (input_type_name == 'Preprocessed data') {
2020-06-24 12:37:38 +02:00
# ------------------------------------------------------------------
# Handling Pre-processed data
# ------------------------------------------------------------------
2020-05-20 15:34:40 +02:00
file_in <- uigetfile(
filter = '*.txt',
title = 'Load pre-processed data in GenePop-format'
)
if (file_in$name == 0) stop("No name provided")
# ASK: remove?
# h0 = findobj('Tag','filename1_text');
# set(h0,'String',filename); clear h0;
# if ~isempty(partitionCompare)
# fprintf(1,'Data: %s\n',[pathname filename]);
# end
struct_array <- readRDS(paste0(file_in$path, file_in$name))
if (isfield(struct_array,'c')) { # Matlab versio
c <- struct_array$c
if (!isfield(c,'dist')) stop('Incorrect file format')
} else if (isfield(struct_array,'dist')) { #Mideva versio
c <- struct_array
} else {
stop('Incorrect file format')
}
data <- double(c$data)
rowsFromInd <- c$rowsFromInd
alleleCodes <- c$alleleCodes
noalle <- c$noalle
adjprior <- c$adjprior
priorTerm <- c$priorTerm
dist <- c$dist
popnames <- c$popnames
Z <- c$Z
rm(c)
}
} else {
data <- double(tietue$data)
rowsFromInd <- tietue$rowsFromInd
alleleCodes <- tietue$alleleCodes
noalle <- tietue$noalle
adjprior <- tietue$adjprior
priorTerm <- tietue$priorTerm
dist <- tietue$dist
popnames <- tietue$popnames
Z <- tietue$Z
rm(tietue)
}
# ==========================================================================
2020-10-19 13:44:18 +02:00
# Declaring global variables and changing environment of children functions
2020-05-20 15:34:40 +02:00
# ==========================================================================
PARTITION <- vector()
COUNTS <- vector()
SUMCOUNTS <- vector()
POP_LOGML <- vector()
2020-10-19 13:44:18 +02:00
clearGlobalVars()
environment(writeMixtureInfo) <- environment()
2020-05-20 15:34:40 +02:00
# ==========================================================================
2020-07-31 14:05:40 +02:00
c <- list()
2020-05-20 15:34:40 +02:00
c$data <- data
c$noalle <- noalle
c$adjprior <- adjprior
c$priorTerm <- priorTerm
c$dist <- dist
c$Z <- Z
c$rowsFromInd <- rowsFromInd
ninds <- length(unique(data[, ncol(data)]))
ekat <- t(seq(1, ninds, rowsFromInd) * rowsFromInd)
2021-01-15 11:46:53 +01:00
c$rows <- t(rbind(ekat, ekat + rowsFromInd - 1))
2020-05-20 15:34:40 +02:00
2020-10-19 13:44:18 +02:00
# ASK remove?
2020-05-20 15:34:40 +02:00
# partition compare
2020-07-31 14:05:40 +02:00
# if (!is.null(partitionCompare)) {
# nsamplingunits <- size(c$rows, 1)
# partitions <- partitionCompare$partitions
# npartitions <- size(partitions, 2)
# partitionLogml <- zeros(1, npartitions)
# for (i in seq_len(npartitions)) {
# # number of unique partition lables
# npops <- length(unique(partitions[, i]))
2020-05-20 15:34:40 +02:00
2020-07-31 14:05:40 +02:00
# partitionInd <- zeros(ninds * rowsFromInd, 1)
# partitionSample <- partitions[, i]
# for (j in seq_len(nsamplingunits)) {
# partitionInd[c$rows[j, 1]:c$rows[j, 2]] <- partitionSample[j]
# }
# # partitionLogml[i] = initialCounts(
# # partitionInd,
# # data[, seq_len(end - 1)],
# # npops,
# # c$rows,
# # noalle,
# # adjprior
# # ) #TODO translate
# }
# # return the logml result
# partitionCompare$logmls <- partitionLogml
2020-10-19 13:44:18 +02:00
# # set(h1, 'userdata', partitionCompare)
2020-07-31 14:05:40 +02:00
# return()
# }
2020-05-20 15:34:40 +02:00
2020-10-19 13:44:18 +02:00
if (fixedK) {
# logml_npops_partitionSummary <- indMix_fixK(c) # TODO: translate
# logml <- logml_npops_partitionSummary$logml
# npops <- logml_npops_partitionSummary$npops
# partitionSummary <- logml_npops_partitionSummary$partitionSummary
} else {
logml_npops_partitionSummary <- indMix(c) # TODO: translate
logml <- logml_npops_partitionSummary$logml
npops <- logml_npops_partitionSummary$npops
partitionSummary <- logml_npops_partitionSummary$partitionSummary
}
if (logml_npops_partitionSummary$logml == 1) return()
2020-05-20 15:34:40 +02:00
data <- data[, seq_len(ncol(data) - 1)]
# ASK: remove?
2020-07-14 14:52:31 +02:00
# h0 = findobj('Tag','filename1_text')
# inp = get(h0,'String');
# h0 = findobj('Tag','filename2_text')
2020-05-20 15:34:40 +02:00
# outp = get(h0,'String');
2020-10-19 13:44:18 +02:00
inp <- vector()
outp <- vector()
2020-05-20 15:34:40 +02:00
2020-07-14 14:52:31 +02:00
changesInLogml <- writeMixtureInfo(
logml, rowsFromInd, data, adjprior, priorTerm, outp, inp,
popnames, fixedK
2020-07-31 14:05:40 +02:00
) # FIXME: broken
2020-05-20 15:34:40 +02:00
2020-06-24 12:37:38 +02:00
# viewMixPartition(PARTITION, popnames) # ASK translate? On graph folder
2020-05-20 15:34:40 +02:00
talle <- questdlg(
quest = paste(
'Do you want to save the mixture populations',
'so that you can use them later in admixture analysis?'
),
dlgtitle = 'Save results?',
defbtn = 'y'
)
if (talle %in% c('Yes', 'y')) {
filename_pathname <- uiputfile('.mat','Save results as')
2020-06-24 12:37:38 +02:00
# ======================================================================
2020-05-20 15:34:40 +02:00
cond <- (sum(filename_pathname$name) == 0) |
(sum(filename_pathname$path) == 0)
if (cond) {
# Cancel was pressed
return()
} else {
# copy 'baps4_output.baps' into the text file with the same name.
if (file.exists('baps4_output.baps')) {
file.copy(
from = 'baps4_output.baps',
to = paste0(
filename_pathname$path, filename_pathname$name, '.txt'
)
)
file.remove('baps4_output.baps')
}
}
2020-06-24 12:37:38 +02:00
# ======================================================================
2020-05-20 15:34:40 +02:00
c$PARTITION <- PARTITION
c$COUNTS <- COUNTS
c$SUMCOUNTS <- SUMCOUNTS
c$alleleCodes <- alleleCodes
c$adjprior <- adjprior
c$popnames <- popnames
c$rowsFromInd <- rowsFromInd
c$data <- data
c$npops <- npops
c$noalle <- noalle
c$mixtureType <- 'mix'
c$logml <- logml
c$changesInLogml <- changesInLogml
save(c, file = paste0(filename_pathname$path, filename_pathname$name))
} else {
if (file.exists('baps4_output.baps')) file.remove('baps4_output.baps')
}
}