ourMELONS/R/greedyMix.R

#' @title Clustering of individuals
#' @param tietue File
#' @param format Format of the data ("BAPS", "GenePop" or "Preprocessed")
#' @param savePreProcessed Save the pre-processed data?
#' @param filePreProcessed Is the file already processed?
#' @importFrom utils read.delim
#' @export
greedyMix <- function(
	tietue,
	format           = NULL,
	savePreProcessed = NULL,
	filePreProcessed = NULL
) {
	# ASK: Unclear when fixedk == TRUE. Remove?
	# check whether fixed k mode is selected
	# h0 <- findobj('Tag','fixk_menu')
	# fixedK = get(h0, 'userdata');
	fixedK <- FALSE

	# if fixedK
	# 	if ~(fixKWarning == 1) % call function fixKWarning
	# 		return
	# 	end
	# end

	# ASK: ditto
	# % check whether partition compare mode is selected
	# h1 = findobj('Tag','partitioncompare_menu');
	# partitionCompare = get(h1, 'userdata');
	partitionCompare <- FALSE

	if (is(tietue, "list") | is(tietue, "character")) {
		# ----------------------------------------------------------------------
		# Defining type of file
		# ----------------------------------------------------------------------
		if (is.null(format)) {
			input_type <- inputdlg(
				paste(
					'Specify the format of your data:\n',
					'1) BAPS-format\n',
					'2) GenePop-format\n',
					'3) Preprocessed data\n'
				)
			)
			# Converting from number into name
			input_type_name <- switch(
				input_type, 'BAPS-format', 'GenePop-format', 'Preprocessed data'
			)
		} else {
			input_type_name <- paste0(format, "-format")
		}
		if (length(input_type_name) == 0) {
			stop('Invalid alternative')
		} else if (input_type_name == 'BAPS-format') {
			# ------------------------------------------------------------------
			# Treating BAPS-formatted files
			# ------------------------------------------------------------------
			if (!is(tietue, "character")) {
				pathname_filename <- uigetfile(
					"*.txt", "Load data in BAPS-format"
				)
			} else {
				pathname_filename <- tietue
			}

			# ASK: remove?
			# if ~isempty(partitionCompare)
			# 	fprintf(1,'Data: %s\n',[pathname filename]);
			# end

			data <- read.delim(pathname_filename, header = FALSE, sep = " ")
			data <- as.matrix(data)
			ninds <- testaaOnkoKunnollinenBapsData(data)  # testing
			if (ninds == 0) stop('Incorrect Data-file')

			# ASK: remove?
			# h0 = findobj('Tag','filename1_text');
			# set(h0,'String',filename); clear h0;
			message(
				'When using data which are in BAPS-format,',
				'you can specify the sampling populations of the',
				'individuals by giving two additional files:',
				'one containing the names of the populations,',
				'the other containing the indices of the first ',
				'individuals of the populations.'
			)
			input_pops <- inputdlg(
				prompt = 'Do you wish to specify the sampling populations? [y/N]',
				definput = 'N'
			)
			if (tolower(input_pops) %in% c('yes', 'y')) {
				popfile    <- uigetfile('*.txt', 'Load population names')
				kysyToinen <- ifelse(popfile$name == 0, 0, 1)
				if (kysyToinen == 1) {
					indicesfile <- uigetfile('*.txt', 'Load population indices')
					if (indicesfile == 0) {
						popnames = ""
					} else {
						# popnames = initPopNames([namepath namefile],[indicespath indicesfile]) # TODO: translate this fun
					}
				} else {
					popnames <- ""
				}
			} else {
				popnames <- ""
			}

			# [data, rowsFromInd, alleleCodes, noalle, adjprior, priorTerm] = handleData(data); # TODO: translate this function
			# [Z,dist] = newGetDistances(data,rowsFromInd); # TODO: translate
			if (is.null(savePreProcessed)) {
				save_preproc <- questdlg(
					quest = 'Do you wish to save pre-processed data?',
					dlgtitle = 'Save pre-processed data?',
					defbtn = 'y'
				)
			} else {
				save_preproc <- savePreProcessed
			}
			if (save_preproc %in% c('y', 'yes', TRUE)) {
				file_out      <- uiputfile('.rda','Save pre-processed data as')
				kokonimi      <- paste0(file_out$path, file_out$name)
				c             <- list()
				c$data        <- data
				c$rowsFromInd <- rowsFromInd
				c$alleleCodes <- alleleCodes
				c$noalle      <- noalle
				c$adjprior    <- adjprior
				c$priorTerm   <- priorTerm
				c$dist        <- dist
				c$popnames    <- popnames
				c$Z           <- Z
				save(c, file = kokonimi)
				rm(c)
			}
		} else if (input_type_name == 'GenePop-format') {
			# ------------------------------------------------------------------
			# Treating GenePop-formatted files
			# ------------------------------------------------------------------
			if (!is(tietue, "character")) {
				filename_pathname <- uigetfile(
					filter = '*.txt',
					title = 'Load data in GenePop-format'
				)
				if (filename_pathname$name == 0) stop("No name provided")
			} else {
				filename_pathname <- tietue
			}

			# ASK: remove?
			# if ~isempty(partitionCompare)
			# 	fprintf(1,'Data: %s\n',[pathname filename]);
			# end

			kunnossa <- testaaGenePopData(filename_pathname)
			if (kunnossa == 0) stop("testaaGenePopData returned 0")
			data_popnames <- lueGenePopData(filename_pathname)
			data          <- data_popnames$data
			popnames      <- data_popnames$popnames

# 			h0 = findobj('Tag','filename1_text');
# 			set(h0,'String',filename); clear h0;

			list_dranap <- handleData(data)
			data        <- list_dranap$newData
			rowsFromInd <- list_dranap$rowsFromInd
			alleleCodes <- list_dranap$alleleCodes
			noalle      <- list_dranap$noalle
			adjprior    <- list_dranap$adjprior
			priorTerm   <- list_dranap$prioterm

			list_Zd <- newGetDistances(data,rowsFromInd) # FIXME: debug
			Z       <- list_Zd$Z
			dist    <- list_Zd$dist

			if (is.null(savePreProcessed)) {
				save_preproc <- questdlg(
					quest = 'Do you wish to save pre-processed data?',
					dlgtitle = 'Save pre-processed data?',
					defbtn = 'y'
				)
			} else {
				save_preproc <- savePreProcessed
			}
			if (save_preproc %in% c('y', 'Yes', TRUE)) {
				file_out      <- uiputfile('.rda','Save pre-processed data as')
				kokonimi      <- paste0(file_out$path, file_out$name)
				# FIXME: translate functions above so the objects below exist
				c$data        <- data
				c$rowsFromInd <- rowsFromInd
				c$alleleCodes <- alleleCodes
				c$noalle      <- noalle
				c$adjprior    <- adjprior
				c$priorTerm   <- priorTerm
				c$dist        <- dist
				c$popnames    <- popnames
				c$Z           <- Z
				save(c, file = kokonimi)
				rm(c)
			}
		} else if (input_type_name == 'Preprocessed data') {
			# ------------------------------------------------------------------
			# Handling Pre-processed data
			# ------------------------------------------------------------------
			file_in <- uigetfile(
				filter = '*.txt',
				title = 'Load pre-processed data in GenePop-format'
			)
			if (file_in$name == 0) stop("No name provided")

			# ASK: remove?
			# h0 = findobj('Tag','filename1_text');
			# set(h0,'String',filename); clear h0;
			# if ~isempty(partitionCompare)
			# 	fprintf(1,'Data: %s\n',[pathname filename]);
			# end

			struct_array <- readRDS(paste0(file_in$path, file_in$name))
			if (isfield(struct_array,'c')) { # Matlab versio
				c <- struct_array$c
				if (!isfield(c,'dist')) stop('Incorrect file format')
			} else if (isfield(struct_array,'dist')) { #Mideva versio
				c <- struct_array
			} else {
				stop('Incorrect file format')
			}
			data        <- double(c$data)
			rowsFromInd <- c$rowsFromInd
			alleleCodes <- c$alleleCodes
			noalle      <- c$noalle
			adjprior    <- c$adjprior
			priorTerm   <- c$priorTerm
			dist        <- c$dist
			popnames    <- c$popnames
			Z           <- c$Z
			rm(c)
		}
	} else {
		data        <- double(tietue$data)
		rowsFromInd <- tietue$rowsFromInd
		alleleCodes <- tietue$alleleCodes
		noalle      <- tietue$noalle
		adjprior    <- tietue$adjprior
		priorTerm   <- tietue$priorTerm
		dist        <- tietue$dist
		popnames    <- tietue$popnames
		Z           <- tietue$Z
		rm(tietue)
	}

	# ==========================================================================
	# Declaring global variables and changing environment of children functions
	# ==========================================================================
	PARTITION       <- vector()
	COUNTS          <- vector()
	SUMCOUNTS       <- vector()
	POP_LOGML       <- vector()
	clearGlobalVars()

	environment(writeMixtureInfo) <- environment()
	# ==========================================================================
	c             <- list()
	c$data        <- data
	c$noalle      <- noalle
	c$adjprior    <- adjprior
	c$priorTerm   <- priorTerm
	c$dist        <- dist
	c$Z           <- Z
	c$rowsFromInd <- rowsFromInd

	ninds  <- length(unique(data[, ncol(data)]))
	ekat   <- t(seq(1, ninds, rowsFromInd) * rowsFromInd)
	c$rows <- t(rbind(ekat, ekat + rowsFromInd - 1))

	# ASK remove?
	# partition compare
	# if (!is.null(partitionCompare)) {
	# 	nsamplingunits <- size(c$rows, 1)
	# 	partitions     <- partitionCompare$partitions
	# 	npartitions    <- size(partitions, 2)
	# 	partitionLogml <- zeros(1, npartitions)
	# 	for (i in seq_len(npartitions)) {
	# 		# number of unique partition lables
	# 		npops <- length(unique(partitions[, i]))

	# 		partitionInd <- zeros(ninds * rowsFromInd, 1)
	# 		partitionSample <- partitions[, i]
	# 		for (j in seq_len(nsamplingunits)) {
	# 			partitionInd[c$rows[j, 1]:c$rows[j, 2]] <- partitionSample[j]
	# 		}
	# 		# partitionLogml[i] = initialCounts(
	# 		# 	partitionInd,
	# 		# 	data[, seq_len(end - 1)],
	# 		# 	npops,
	# 		# 	c$rows,
	# 		# 	noalle,
	# 		# 	adjprior
	# 		# ) #TODO translate
	# 	}
	# 	# return the logml result
	# 	partitionCompare$logmls <- partitionLogml
	# 	# set(h1, 'userdata', partitionCompare)
	# 	return()
	# }

	if (fixedK) {
		# logml_npops_partitionSummary <- indMix_fixK(c) # TODO: translate
		# logml <- logml_npops_partitionSummary$logml
		# npops <- logml_npops_partitionSummary$npops
		# partitionSummary <- logml_npops_partitionSummary$partitionSummary
	} else {
		logml_npops_partitionSummary <- indMix(c) # TODO: translate
		logml <- logml_npops_partitionSummary$logml
		npops <- logml_npops_partitionSummary$npops
		partitionSummary <- logml_npops_partitionSummary$partitionSummary
	}
	if (logml_npops_partitionSummary$logml == 1) return()

	data <- data[, seq_len(ncol(data) - 1)]

	# ASK: remove?
	# h0 = findobj('Tag','filename1_text')
	# inp = get(h0,'String');
	# h0 = findobj('Tag','filename2_text')
	# outp = get(h0,'String');
	inp <- vector()
	outp <- vector()

	changesInLogml <- writeMixtureInfo(
		logml, rowsFromInd, data, adjprior, priorTerm, outp, inp,
		popnames, fixedK
	) # FIXME: broken

	# viewMixPartition(PARTITION, popnames) # ASK translate? On graph folder

	talle <- questdlg(
		quest = paste(
			'Do you want to save the mixture populations',
			'so that you can use them later in admixture analysis?'
		),
		dlgtitle = 'Save results?',
		defbtn = 'y'
	)
	if (talle %in% c('Yes', 'y')) {
		filename_pathname <- uiputfile('.mat','Save results as')

		# ======================================================================
		cond <- (sum(filename_pathname$name) == 0) |
		(sum(filename_pathname$path) == 0)
		if (cond) {
			# Cancel was pressed
			return()
		} else {
			# copy 'baps4_output.baps' into the text file with the same name.
			if (file.exists('baps4_output.baps')) {
				file.copy(
					from = 'baps4_output.baps',
					to = paste0(
						filename_pathname$path, filename_pathname$name, '.txt'
					)
				)
				file.remove('baps4_output.baps')
			}
		}
		# ======================================================================

		c$PARTITION      <- PARTITION
		c$COUNTS         <- COUNTS
		c$SUMCOUNTS      <- SUMCOUNTS
		c$alleleCodes    <- alleleCodes
		c$adjprior       <- adjprior
		c$popnames       <- popnames
		c$rowsFromInd    <- rowsFromInd
		c$data           <- data
		c$npops          <- npops
		c$noalle         <- noalle
		c$mixtureType    <- 'mix'
		c$logml          <- logml
		c$changesInLogml <- changesInLogml

		save(c, file = paste0(filename_pathname$path, filename_pathname$name))
	} else {
		if (file.exists('baps4_output.baps')) file.remove('baps4_output.baps')
	}
}