400 lines
12 KiB
R
400 lines
12 KiB
R
#' @title Clustering of individuals
|
||
#' @param tietue File
|
||
#' @param format Format of the data ("BAPS", "GenePop" or "Preprocessed")
|
||
#' @param savePreProcessed Save the pre-processed data?
|
||
#' @param filePreProcessed Is the file already processed?
|
||
#' @importFrom utils read.delim
|
||
#' @export
|
||
greedyMix <- function(
|
||
tietue,
|
||
format = NULL,
|
||
savePreProcessed = NULL,
|
||
filePreProcessed = NULL
|
||
) {
|
||
# ASK: Unclear when fixedk == TRUE. Remove?
|
||
# check whether fixed k mode is selected
|
||
# h0 <- findobj('Tag','fixk_menu')
|
||
# fixedK = get(h0, 'userdata');
|
||
fixedK <- FALSE
|
||
|
||
# if fixedK
|
||
# if ~(fixKWarning == 1) % call function fixKWarning
|
||
# return
|
||
# end
|
||
# end
|
||
|
||
# ASK: ditto
|
||
# % check whether partition compare mode is selected
|
||
# h1 = findobj('Tag','partitioncompare_menu');
|
||
# partitionCompare = get(h1, 'userdata');
|
||
partitionCompare <- FALSE
|
||
|
||
if (is(tietue, "list") | is(tietue, "character")) {
|
||
# ----------------------------------------------------------------------
|
||
# Defining type of file
|
||
# ----------------------------------------------------------------------
|
||
if (is.null(format)) {
|
||
input_type <- inputdlg(
|
||
paste(
|
||
'Specify the format of your data:\n',
|
||
'1) BAPS-format\n',
|
||
'2) GenePop-format\n',
|
||
'3) Preprocessed data\n'
|
||
)
|
||
)
|
||
# Converting from number into name
|
||
input_type_name <- switch(
|
||
input_type, 'BAPS-format', 'GenePop-format', 'Preprocessed data'
|
||
)
|
||
} else {
|
||
input_type_name <- paste0(format, "-format")
|
||
}
|
||
if (length(input_type_name) == 0) {
|
||
stop('Invalid alternative')
|
||
} else if (input_type_name == 'BAPS-format') {
|
||
# ------------------------------------------------------------------
|
||
# Treating BAPS-formatted files
|
||
# ------------------------------------------------------------------
|
||
if (!is(tietue, "character")) {
|
||
pathname_filename <- uigetfile(
|
||
"*.txt", "Load data in BAPS-format"
|
||
)
|
||
} else {
|
||
pathname_filename <- tietue
|
||
}
|
||
|
||
# ASK: remove?
|
||
# if ~isempty(partitionCompare)
|
||
# fprintf(1,'Data: %s\n',[pathname filename]);
|
||
# end
|
||
|
||
data <- read.delim(pathname_filename) # TODO: discover delimiter
|
||
ninds <- testaaOnkoKunnollinenBapsData(data) # testing
|
||
if (ninds == 0) stop('Incorrect Data-file')
|
||
|
||
# ASK: remove?
|
||
# h0 = findobj('Tag','filename1_text');
|
||
# set(h0,'String',filename); clear h0;
|
||
cat(
|
||
'When using data which are in BAPS-format,',
|
||
'you can specify the sampling populations of the',
|
||
'individuals by giving two additional files:',
|
||
'one containing the names of the populations,',
|
||
'the other containing the indices of the first',
|
||
'individuals of the populations.'
|
||
)
|
||
input_pops <- inputdlg(
|
||
prompt = 'Do you wish to specify the sampling populations? [y/N]',
|
||
definput = 'N'
|
||
)
|
||
if (tolower(input_pops) %in% c('yes', 'y')) {
|
||
popfile <- uigetfile('*.txt', 'Load population names')
|
||
kysyToinen <- ifelse(popfile$name == 0, 0, 1)
|
||
if (kysyToinen == 1) {
|
||
indicesfile <- uigetfile('*.txt', 'Load population indices')
|
||
if (indicesfile == 0) {
|
||
popnames = ""
|
||
} else {
|
||
# popnames = initPopNames([namepath namefile],[indicespath indicesfile]) # TODO: translate this fun
|
||
}
|
||
} else {
|
||
popnames <- ""
|
||
}
|
||
} else {
|
||
popnames <- ""
|
||
}
|
||
|
||
# [data, rowsFromInd, alleleCodes, noalle, adjprior, priorTerm] = handleData(data); # TODO: translate this function
|
||
# [Z,dist] = newGetDistances(data,rowsFromInd); # TODO: translate
|
||
if (is.null(savePreProcessed)) {
|
||
save_preproc <- questdlg(
|
||
quest = 'Do you wish to save pre-processed data?',
|
||
dlgtitle = 'Save pre-processed data?',
|
||
defbtn = 'y'
|
||
)
|
||
} else {
|
||
save_preproc <- savePreProcessed
|
||
}
|
||
if (save_preproc %in% c('y', 'yes', TRUE)) {
|
||
file_out <- uiputfile('.rda','Save pre-processed data as')
|
||
kokonimi <- paste0(file_out$path, file_out$name)
|
||
c <- list()
|
||
c$data <- data
|
||
c$rowsFromInd <- rowsFromInd
|
||
c$alleleCodes <- alleleCodes
|
||
c$noalle <- noalle
|
||
c$adjprior <- adjprior
|
||
c$priorTerm <- priorTerm
|
||
c$dist <- dist
|
||
c$popnames <- popnames
|
||
c$Z <- Z
|
||
save(c, file = kokonimi)
|
||
rm(c)
|
||
}
|
||
} else if (input_type_name == 'GenePop-format') {
|
||
# ------------------------------------------------------------------
|
||
# Treating GenePop-formatted files
|
||
# ------------------------------------------------------------------
|
||
if (!is(tietue, "character")) {
|
||
filename_pathname <- uigetfile(
|
||
filter = '*.txt',
|
||
title = 'Load data in GenePop-format'
|
||
)
|
||
if (filename_pathname$name == 0) stop("No name provided")
|
||
} else {
|
||
filename_pathname <- tietue
|
||
}
|
||
|
||
# ASK: remove?
|
||
# if ~isempty(partitionCompare)
|
||
# fprintf(1,'Data: %s\n',[pathname filename]);
|
||
# end
|
||
|
||
kunnossa <- testaaGenePopData(filename_pathname)
|
||
if (kunnossa == 0) stop("testaaGenePopData returned 0")
|
||
data_popnames <- lueGenePopData(filename_pathname)
|
||
data <- data_popnames$data
|
||
popnames <- data_popnames$popnames
|
||
|
||
# h0 = findobj('Tag','filename1_text');
|
||
# set(h0,'String',filename); clear h0;
|
||
|
||
list_dranap <- handleData(data)
|
||
data <- list_dranap$newData
|
||
rowsFromInd <- list_dranap$rowsFromInd
|
||
alleleCodes <- list_dranap$alleleCodes
|
||
noalle <- list_dranap$noalle
|
||
adjprior <- list_dranap$adjprior
|
||
priorTerm <- list_dranap$prioterm
|
||
|
||
list_Zd <- newGetDistances(data,rowsFromInd) # FIXME: debug
|
||
Z <- list_Zd$Z
|
||
dist <- list_Zd$dist
|
||
|
||
if (is.null(savePreProcessed)) {
|
||
save_preproc <- questdlg(
|
||
quest = 'Do you wish to save pre-processed data?',
|
||
dlgtitle = 'Save pre-processed data?',
|
||
defbtn = 'y'
|
||
)
|
||
} else {
|
||
save_preproc <- savePreProcessed
|
||
}
|
||
if (save_preproc %in% c('y', 'Yes', TRUE)) {
|
||
file_out <- uiputfile('.rda','Save pre-processed data as')
|
||
kokonimi <- paste0(file_out$path, file_out$name)
|
||
# FIXME: translate functions above so the objects below exist
|
||
c$data <- data
|
||
c$rowsFromInd <- rowsFromInd
|
||
c$alleleCodes <- alleleCodes
|
||
c$noalle <- noalle
|
||
c$adjprior <- adjprior
|
||
c$priorTerm <- priorTerm
|
||
c$dist <- dist
|
||
c$popnames <- popnames
|
||
c$Z <- Z
|
||
save(c, file = kokonimi)
|
||
rm(c)
|
||
}
|
||
} else if (input_type_name == 'Preprocessed data') {
|
||
# ------------------------------------------------------------------
|
||
# Handling Pre-processed data
|
||
# ------------------------------------------------------------------
|
||
file_in <- uigetfile(
|
||
filter = '*.txt',
|
||
title = 'Load pre-processed data in GenePop-format'
|
||
)
|
||
if (file_in$name == 0) stop("No name provided")
|
||
|
||
# ASK: remove?
|
||
# h0 = findobj('Tag','filename1_text');
|
||
# set(h0,'String',filename); clear h0;
|
||
# if ~isempty(partitionCompare)
|
||
# fprintf(1,'Data: %s\n',[pathname filename]);
|
||
# end
|
||
|
||
struct_array <- readRDS(paste0(file_in$path, file_in$name))
|
||
if (isfield(struct_array,'c')) { # Matlab versio
|
||
c <- struct_array$c
|
||
if (!isfield(c,'dist')) stop('Incorrect file format')
|
||
} else if (isfield(struct_array,'dist')) { #Mideva versio
|
||
c <- struct_array
|
||
} else {
|
||
stop('Incorrect file format')
|
||
}
|
||
data <- double(c$data)
|
||
rowsFromInd <- c$rowsFromInd
|
||
alleleCodes <- c$alleleCodes
|
||
noalle <- c$noalle
|
||
adjprior <- c$adjprior
|
||
priorTerm <- c$priorTerm
|
||
dist <- c$dist
|
||
popnames <- c$popnames
|
||
Z <- c$Z
|
||
rm(c)
|
||
}
|
||
} else {
|
||
data <- double(tietue$data)
|
||
rowsFromInd <- tietue$rowsFromInd
|
||
alleleCodes <- tietue$alleleCodes
|
||
noalle <- tietue$noalle
|
||
adjprior <- tietue$adjprior
|
||
priorTerm <- tietue$priorTerm
|
||
dist <- tietue$dist
|
||
popnames <- tietue$popnames
|
||
Z <- tietue$Z
|
||
rm(tietue)
|
||
}
|
||
|
||
# ==========================================================================
|
||
# Declaring global variables and changing environment of children functions
|
||
# ==========================================================================
|
||
PARTITION <- vector()
|
||
COUNTS <- vector()
|
||
SUMCOUNTS <- vector()
|
||
POP_LOGML <- vector()
|
||
clearGlobalVars()
|
||
|
||
environment(writeMixtureInfo) <- environment()
|
||
# ==========================================================================
|
||
c <- list()
|
||
c$data <- data
|
||
c$noalle <- noalle
|
||
c$adjprior <- adjprior
|
||
c$priorTerm <- priorTerm
|
||
c$dist <- dist
|
||
c$Z <- Z
|
||
c$rowsFromInd <- rowsFromInd
|
||
|
||
ninds <- length(unique(data[, ncol(data)]))
|
||
ekat <- t(seq(1, ninds, rowsFromInd) * rowsFromInd)
|
||
c$rows <- c(ekat, ekat + rowsFromInd - 1)
|
||
|
||
# ASK remove?
|
||
# partition compare
|
||
# if (!is.null(partitionCompare)) {
|
||
# nsamplingunits <- size(c$rows, 1)
|
||
# partitions <- partitionCompare$partitions
|
||
# npartitions <- size(partitions, 2)
|
||
# partitionLogml <- zeros(1, npartitions)
|
||
# for (i in seq_len(npartitions)) {
|
||
# # number of unique partition lables
|
||
# npops <- length(unique(partitions[, i]))
|
||
|
||
# partitionInd <- zeros(ninds * rowsFromInd, 1)
|
||
# partitionSample <- partitions[, i]
|
||
# for (j in seq_len(nsamplingunits)) {
|
||
# partitionInd[c$rows[j, 1]:c$rows[j, 2]] <- partitionSample[j]
|
||
# }
|
||
# # partitionLogml[i] = initialCounts(
|
||
# # partitionInd,
|
||
# # data[, seq_len(end - 1)],
|
||
# # npops,
|
||
# # c$rows,
|
||
# # noalle,
|
||
# # adjprior
|
||
# # ) #TODO translate
|
||
# }
|
||
# # return the logml result
|
||
# partitionCompare$logmls <- partitionLogml
|
||
# # set(h1, 'userdata', partitionCompare)
|
||
# return()
|
||
# }
|
||
|
||
if (fixedK) {
|
||
# logml_npops_partitionSummary <- indMix_fixK(c) # TODO: translate
|
||
# logml <- logml_npops_partitionSummary$logml
|
||
# npops <- logml_npops_partitionSummary$npops
|
||
# partitionSummary <- logml_npops_partitionSummary$partitionSummary
|
||
} else {
|
||
logml_npops_partitionSummary <- indMix(c) # TODO: translate
|
||
logml <- logml_npops_partitionSummary$logml
|
||
npops <- logml_npops_partitionSummary$npops
|
||
partitionSummary <- logml_npops_partitionSummary$partitionSummary
|
||
}
|
||
if (logml_npops_partitionSummary$logml == 1) return()
|
||
|
||
data <- data[, seq_len(ncol(data) - 1)]
|
||
|
||
# ASK: remove?
|
||
# h0 = findobj('Tag','filename1_text')
|
||
# inp = get(h0,'String');
|
||
# h0 = findobj('Tag','filename2_text')
|
||
# outp = get(h0,'String');
|
||
inp <- vector()
|
||
outp <- vector()
|
||
|
||
changesInLogml <- writeMixtureInfo(
|
||
logml, rowsFromInd, data, adjprior, priorTerm, outp, inp,
|
||
popnames, fixedK
|
||
) # FIXME: broken
|
||
|
||
# viewMixPartition(PARTITION, popnames) # ASK translate? On graph folder
|
||
|
||
talle <- questdlg(
|
||
quest = paste(
|
||
'Do you want to save the mixture populations',
|
||
'so that you can use them later in admixture analysis?'
|
||
),
|
||
dlgtitle = 'Save results?',
|
||
defbtn = 'y'
|
||
)
|
||
if (talle %in% c('Yes', 'y')) {
|
||
filename_pathname <- uiputfile('.mat','Save results as')
|
||
|
||
# ======================================================================
|
||
cond <- (sum(filename_pathname$name) == 0) |
|
||
(sum(filename_pathname$path) == 0)
|
||
if (cond) {
|
||
# Cancel was pressed
|
||
return()
|
||
} else {
|
||
# copy 'baps4_output.baps' into the text file with the same name.
|
||
if (file.exists('baps4_output.baps')) {
|
||
file.copy(
|
||
from = 'baps4_output.baps',
|
||
to = paste0(
|
||
filename_pathname$path, filename_pathname$name, '.txt'
|
||
)
|
||
)
|
||
file.remove('baps4_output.baps')
|
||
}
|
||
}
|
||
# ======================================================================
|
||
|
||
c$PARTITION <- PARTITION
|
||
c$COUNTS <- COUNTS
|
||
c$SUMCOUNTS <- SUMCOUNTS
|
||
c$alleleCodes <- alleleCodes
|
||
c$adjprior <- adjprior
|
||
c$popnames <- popnames
|
||
c$rowsFromInd <- rowsFromInd
|
||
c$data <- data
|
||
c$npops <- npops
|
||
c$noalle <- noalle
|
||
c$mixtureType <- 'mix'
|
||
c$logml <- logml
|
||
c$changesInLogml <- changesInLogml
|
||
|
||
save(c, file = paste0(filename_pathname$path, filename_pathname$name))
|
||
} else {
|
||
if (file.exists('baps4_output.baps')) file.remove('baps4_output.baps')
|
||
}
|
||
}
|
||
|
||
# %--------------------------------------------------------------------------
|
||
|
||
|
||
# function [suurin, i2] = arvoSeuraavaTila(muutokset, logml)
|
||
# % Suorittaa yksil<69>n seuraavan tilan arvonnan
|
||
|
||
# y = logml + muutokset; % siirron j<>lkeiset logml:t
|
||
# y = y - max(y);
|
||
# y = exp(y);
|
||
# summa = sum(y);
|
||
# y = y/summa;
|
||
# y = cumsum(y);
|
||
|
||
# i2 = rand_disc(y); % uusi kori
|
||
# suurin = muutokset(i2);
|
||
|