ourMELONS/R/greedyMix.R

#' @title Clustering of individuals
#' @param tietue File
#' @param format Format of the data ("BAPS", "GenePop" or "Preprocessed")
#' @param savePreProcessed Save the pre-processed data?
#' @param filePreProcessed Is the file already processed?
#' @importFrom utils read.delim
#' @export
greedyMix <- function(
	tietue,
	format           = NULL,
	savePreProcessed = NULL,
	filePreProcessed = NULL
) {
	# ASK: graphical components. Remove?
	# check whether fixed k mode is selected
	# h0 <- findobj('Tag','fixk_menu')
	# fixedK = get(h0, 'userdata');

	# if fixedK
	# 	if ~(fixKWarning == 1) % call function fixKWarning
	# 		return
	# 	end
	# end

	# % check whether partition compare mode is selected
	# h1 = findobj('Tag','partitioncompare_menu');
	# partitionCompare = get(h1, 'userdata');

	if (is(tietue, "list") | is(tietue, "character")) {
		# ----------------------------------------------------------------------
		# Defining type of file
		# ----------------------------------------------------------------------
		if (is.null(format)) {
			input_type <- inputdlg(
				paste(
					'Specify the format of your data:\n',
					'1) BAPS-format\n',
					'2) GenePop-format\n',
					'3) Preprocessed data\n'
				)
			)
			# Converting from number into name
			input_type_name <- switch(
				input_type, 'BAPS-format', 'GenePop-format', 'Preprocessed data'
			)
		} else {
			input_type_name <- paste0(format, "-format")
		}
		if (length(input_type_name) == 0) {
			stop('Invalid alternative')
		} else if (input_type_name == 'BAPS-format') {
			# ------------------------------------------------------------------
			# Treating BAPS-formatted files
			# ------------------------------------------------------------------
			if (!is(tietue, "character")) {
				pathname_filename <- uigetfile(
					"*.txt", "Load data in BAPS-format"
				)
			} else {
				pathname_filename <- tietue
			}

			# ASK: remove?
			# if ~isempty(partitionCompare)
			# 	fprintf(1,'Data: %s\n',[pathname filename]);
			# end

			data <- read.delim(pathname_filename) # TODO: discover delimiter
			ninds <- testaaOnkoKunnollinenBapsData(data)  # testing
			if (ninds == 0) stop('Incorrect Data-file')

			# ASK: remove?
			# h0 = findobj('Tag','filename1_text');
			# set(h0,'String',filename); clear h0;
			cat(
				'When using data which are in BAPS-format,',
				'you can specify the sampling populations of the',
				'individuals by giving two additional files:',
				'one containing the names of the populations,',
				'the other containing the indices of the first',
				'individuals of the populations.'
			)
			input_pops <- inputdlg(
				prompt = 'Do you wish to specify the sampling populations? [y/N]',
				definput = 'N'
			)
			if (tolower(input_pops) %in% c('yes', 'y')) {
				popfile    <- uigetfile('*.txt', 'Load population names')
				kysyToinen <- ifelse(popfile$name == 0, 0, 1)
				if (kysyToinen == 1) {
					indicesfile <- uigetfile('*.txt', 'Load population indices')
					if (indicesfile == 0) {
						popnames = ""
					} else {
						# popnames = initPopNames([namepath namefile],[indicespath indicesfile]) # TODO: translate this fun
					}
				} else {
					popnames <- ""
				}
			} else {
				popnames <- ""
			}

			# [data, rowsFromInd, alleleCodes, noalle, adjprior, priorTerm] = handleData(data); # TODO: translate this function
			# [Z,dist] = newGetDistances(data,rowsFromInd); # TODO: translate
			if (is.null(savePreProcessed)) {
				save_preproc <- questdlg(
					quest = 'Do you wish to save pre-processed data?',
					dlgtitle = 'Save pre-processed data?',
					defbtn = 'y'
				)
			} else {
				save_preproc <- savePreProcessed
			}
			if (save_preproc %in% c('y', 'yes', TRUE)) {
				file_out      <- uiputfile('.rda','Save pre-processed data as')
				kokonimi      <- paste0(file_out$path, file_out$name)
				c             <- list()
				c$data        <- data
				c$rowsFromInd <- rowsFromInd
				c$alleleCodes <- alleleCodes
				c$noalle      <- noalle
				c$adjprior    <- adjprior
				c$priorTerm   <- priorTerm
				c$dist        <- dist
				c$popnames    <- popnames
				c$Z           <- Z
				save(c, file = kokonimi)
				rm(c)
			}
		} else if (input_type_name == 'GenePop-format') {
			# ------------------------------------------------------------------
			# Treating GenePop-formatted files
			# ------------------------------------------------------------------
			if (!is(tietue, "character")) {
				filename_pathname <- uigetfile(
					filter = '*.txt',
					title = 'Load data in GenePop-format'
				)
				if (filename_pathname$name == 0) stop("No name provided")
			} else {
				filename_pathname <- tietue
			}

			# ASK: remove?
			# if ~isempty(partitionCompare)
			# 	fprintf(1,'Data: %s\n',[pathname filename]);
			# end

			kunnossa <- testaaGenePopData(pathname_filename)
			# if (kunnossa == 0) stop("testaaGenePopData returned 0")
			# [data,popnames]=lueGenePopData([pathname filename]); # TODO: trans

# 			h0 = findobj('Tag','filename1_text');
# 			set(h0,'String',filename); clear h0;

# 			[data, rowsFromInd, alleleCodes, noalle, adjprior, priorTerm] = handleData(data); # TODO:trans
# 			[Z,dist] = newGetDistances(data,rowsFromInd); # TODO: trans
			if (is.null(savePreProcessed)) {
				save_preproc <- questdlg(
					quest = 'Do you wish to save pre-processed data?',
					dlgtitle = 'Save pre-processed data?',
					defbtn = 'y'
				)
			} else {
				save_preproc <- savePreProcessed
			}
			if (save_preproc %in% c('y', 'Yes', TRUE)) {
				file_out      <- uiputfile('.rda','Save pre-processed data as')
				kokonimi      <- paste0(file_out$path, file_out$name)
				# FIXME: translate functions above so the objects below exist
				c$data        <- data
				c$rowsFromInd <- rowsFromInd
				c$alleleCodes <- alleleCodes
				c$noalle      <- noalle
				c$adjprior    <- adjprior
				c$priorTerm   <- priorTerm
				c$dist        <- dist
				c$popnames    <- popnames
				c$Z           <- Z
				save(c, file = kokonimi)
				rm(c)
			}
		} else if (input_type_name == 'Preprocessed data') {
			# ------------------------------------------------------------------
			# Handling Pre-processed data
			# ------------------------------------------------------------------
			file_in <- uigetfile(
				filter = '*.txt',
				title = 'Load pre-processed data in GenePop-format'
			)
			if (file_in$name == 0) stop("No name provided")

			# ASK: remove?
			# h0 = findobj('Tag','filename1_text');
			# set(h0,'String',filename); clear h0;
			# if ~isempty(partitionCompare)
			# 	fprintf(1,'Data: %s\n',[pathname filename]);
			# end

			struct_array <- readRDS(paste0(file_in$path, file_in$name))
			if (isfield(struct_array,'c')) { # Matlab versio
				c <- struct_array$c
				if (!isfield(c,'dist')) stop('Incorrect file format')
			} else if (isfield(struct_array,'dist')) { #Mideva versio
				c <- struct_array
			} else {
				stop('Incorrect file format')
			}
			data        <- double(c$data)
			rowsFromInd <- c$rowsFromInd
			alleleCodes <- c$alleleCodes
			noalle      <- c$noalle
			adjprior    <- c$adjprior
			priorTerm   <- c$priorTerm
			dist        <- c$dist
			popnames    <- c$popnames
			Z           <- c$Z
			rm(c)
		}
	} else {
		data        <- double(tietue$data)
		rowsFromInd <- tietue$rowsFromInd
		alleleCodes <- tietue$alleleCodes
		noalle      <- tietue$noalle
		adjprior    <- tietue$adjprior
		priorTerm   <- tietue$priorTerm
		dist        <- tietue$dist
		popnames    <- tietue$popnames
		Z           <- tietue$Z
		rm(tietue)
	}

	# ==========================================================================
	# Declaring global variables
	# ==========================================================================
	PARTITION       <- vector()
	COUNTS          <- vector()
	SUMCOUNTS       <- vector()
	POP_LOGML       <- vector()
	clearGlobalVars <- vector()
	# ==========================================================================

	c$data        <- data
	c$noalle      <- noalle
	c$adjprior    <- adjprior
	c$priorTerm   <- priorTerm
	c$dist        <- dist
	c$Z           <- Z
	c$rowsFromInd <- rowsFromInd

	ninds  <- length(unique(data[, ncol(data)]))
	ekat   <- t(seq(1, ninds, rowsFromInd) * rowsFromInd)
	c$rows <- c(ekat, ekat + rowsFromInd - 1)

	# partition compare
	if (!is.null(partitionCompare)) {
		nsamplingunits <- size(c$rows, 1)
		partitions     <- partitionCompare$partitions
		npartitions    <- size(partitions, 2)
		partitionLogml <- zeros(1, npartitions)
		for (i in seq_len(npartitions)) {
			# number of unique partition lables
			npops <- length(unique(partitions[, i]))

			partitionInd <- zeros(ninds * rowsFromInd, 1)
			partitionSample <- partitions[, i]
			for (j in seq_len(nsamplingunits)) {
				partitionInd[c$rows[j, 1]:c$rows[j, 2]] <- partitionSample[j]
			}
			# partitionLogml[i] = initialCounts(
			# 	partitionInd,
			# 	data[, seq_len(end - 1)],
			# 	npops,
			# 	c$rows,
			# 	noalle,
			# 	adjprior
			# ) #TODO translate
		}
		# return the logml result
		partitionCompare$logmls <- partitionLogml
		# set(h1, 'userdata', partitionCompare) # ASK remove?
		return()
	}

	# ASK remove (graphical part)?
	# if (fixedK) {
	# 	#logml_npops_partitionSummary <- indMix_fixK(c) # ASK translate?
	# } else {
	# 	#logml_npops_partitionSummary <- indMix(c) # ASK translate?
	# }
	# if (logml_npops_partitionSummary$logml == 1) return()

	data <- data[, seq_len(ncol(data) - 1)]

	# ASK: remove?
	# h0 = findobj('Tag','filename1_text');  inp = get(h0,'String');
	# h0 = findobj('Tag','filename2_text');
	# outp = get(h0,'String');

	# changesInLogml <- writeMixtureInfo(
	# 	logml, rowsFromInd, data, adjprior, priorTerm, outp, inp,
	# 	popnames, fixedK
	# ) # TODO translate

	# viewMixPartition(PARTITION, popnames) # ASK translate? On graph folder

	talle <- questdlg(
		quest = paste(
			'Do you want to save the mixture populations',
			'so that you can use them later in admixture analysis?'
		),
		dlgtitle = 'Save results?',
		defbtn = 'y'
	)
	if (talle %in% c('Yes', 'y')) {
		filename_pathname <- uiputfile('.mat','Save results as')

		# ======================================================================
		cond <- (sum(filename_pathname$name) == 0) |
		(sum(filename_pathname$path) == 0)
		if (cond) {
			# Cancel was pressed
			return()
		} else {
			# copy 'baps4_output.baps' into the text file with the same name.
			if (file.exists('baps4_output.baps')) {
				file.copy(
					from = 'baps4_output.baps',
					to = paste0(
						filename_pathname$path, filename_pathname$name, '.txt'
					)
				)
				file.remove('baps4_output.baps')
			}
		}
		# ======================================================================

		c$PARTITION      <- PARTITION
		c$COUNTS         <- COUNTS
		c$SUMCOUNTS      <- SUMCOUNTS
		c$alleleCodes    <- alleleCodes
		c$adjprior       <- adjprior
		c$popnames       <- popnames
		c$rowsFromInd    <- rowsFromInd
		c$data           <- data
		c$npops          <- npops
		c$noalle         <- noalle
		c$mixtureType    <- 'mix'
		c$logml          <- logml
		c$changesInLogml <- changesInLogml

		save(c, file = paste0(filename_pathname$path, filename_pathname$name))
	} else {
		if (file.exists('baps4_output.baps')) file.remove('baps4_output.baps')
	}
}

# 	%-------------------------------------------------------------------------------------
# 	%-------------------------------------------------------------------------------------

# 	function clearGlobalVars

# 	global COUNTS; COUNTS = [];
# 	global SUMCOUNTS; SUMCOUNTS = [];
# 	global PARTITION; PARTITION = [];
# 	global POP_LOGML; POP_LOGML = [];

# 	%--------------------------------------------------------------------------


# 	function [partitionSummary, added] = addToSummary(logml, partitionSummary, worstIndex)
# 	% Tiedet<65><74>n, ett?annettu logml on isompi kuin huonoin arvo
# 	% partitionSummary taulukossa. Jos partitionSummary:ss?ei viel?ole
# 	% annettua logml arvoa, niin lis<69>t<EFBFBD><74>n worstIndex:in kohtaan uusi logml ja
# 	% nykyist?partitiota vastaava nclusters:in arvo. Muutoin ei tehd?mit<69><74>n.

# 	apu = find(abs(partitionSummary(:,2)-logml)<1e-5);
# 	if isempty(apu)
# 		% Nyt l<>ydetty partitio ei ole viel?kirjattuna summaryyn.
# 		global PARTITION;
# 		npops = length(unique(PARTITION));
# 		partitionSummary(worstIndex,1) = npops;
# 		partitionSummary(worstIndex,2) = logml;
# 		added = 1;
# 	else
# 		added = 0;
# 	end


# 	%--------------------------------------------------------------------------


# 	function [suurin, i2] = arvoSeuraavaTila(muutokset, logml)
# 	% Suorittaa yksil<69>n seuraavan tilan arvonnan

# 	y = logml + muutokset;  % siirron j<>lkeiset logml:t
# 	y = y - max(y);
# 	y = exp(y);
# 	summa = sum(y);
# 	y = y/summa;
# 	y = cumsum(y);

# 	i2 = rand_disc(y);   % uusi kori
# 	suurin = muutokset(i2);


# 	%--------------------------------------------------------------------------------------


# 	function svar=rand_disc(CDF)
# 	%returns an index of a value from a discrete distribution using inversion method
# 	slump=rand;
# 	har=find(CDF>slump);
# 	svar=har(1);


# 	%-------------------------------------------------------------------------------------


# 	function updateGlobalVariables(ind, i2, rowsFromInd, diffInCounts, ...
# 		adjprior, priorTerm)
# 	% Suorittaa globaalien muuttujien muutokset, kun yksil?ind
# 	% on siirret<65><74>n koriin i2.

# 	global PARTITION;
# 	global COUNTS;
# 	global SUMCOUNTS;
# 	global POP_LOGML;

# 	i1 = PARTITION(ind);
# 	PARTITION(ind)=i2;

# 	COUNTS(:,:,i1) = COUNTS(:,:,i1) - diffInCounts;
# 	COUNTS(:,:,i2) = COUNTS(:,:,i2) + diffInCounts;
# 	SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:) - sum(diffInCounts);
# 	SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:) + sum(diffInCounts);

# 	POP_LOGML([i1 i2]) = computePopulationLogml([i1 i2], adjprior, priorTerm);


# 	%---------------------------------------------------------------------------------


# 	function updateGlobalVariables2( ...
# 		i1, i2, rowsFromInd, diffInCounts, adjprior, priorTerm);
# 	% Suorittaa globaalien muuttujien muutokset, kun kaikki
# 	% korissa i1 olevat yksil<69>t siirret<65><74>n koriin i2.

# 	global PARTITION;
# 	global COUNTS;
# 	global SUMCOUNTS;
# 	global POP_LOGML;

# 	inds = find(PARTITION==i1);
# 	PARTITION(inds) = i2;

# 	COUNTS(:,:,i1) = COUNTS(:,:,i1) - diffInCounts;
# 	COUNTS(:,:,i2) = COUNTS(:,:,i2) + diffInCounts;
# 	SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:) - sum(diffInCounts);
# 	SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:) + sum(diffInCounts);

# 	POP_LOGML(i1) = 0;
# 	POP_LOGML(i2) = computePopulationLogml(i2, adjprior, priorTerm);


# 	%------------------------------------------------------------------------------------


# 	function updateGlobalVariables3(muuttuvat, rowsFromInd, diffInCounts, ...
# 		adjprior, priorTerm, i2);
# 	% Suorittaa globaalien muuttujien p<>ivitykset, kun yksil<69>t 'muuttuvat'
# 	% siirret<65><74>n koriin i2. Ennen siirtoa yksil<69>iden on kuuluttava samaan
# 	% koriin.

# 	global PARTITION;
# 	global COUNTS;
# 	global SUMCOUNTS;
# 	global POP_LOGML;

# 	i1 = PARTITION(muuttuvat(1));
# 	PARTITION(muuttuvat) = i2;

# 	COUNTS(:,:,i1) = COUNTS(:,:,i1) - diffInCounts;
# 	COUNTS(:,:,i2) = COUNTS(:,:,i2) + diffInCounts;
# 	SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:) - sum(diffInCounts);
# 	SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:) + sum(diffInCounts);

# 	POP_LOGML([i1 i2]) = computePopulationLogml([i1 i2], adjprior, priorTerm);


# 	%----------------------------------------------------------------------


# 	function inds = returnInOrder(inds, pop, rowsFromInd, data, adjprior, priorTerm)
# 	% Palauttaa yksil<69>t j<>rjestyksess?siten, ett?ensimm<6D>isen?on
# 	% se, jonka poistaminen populaatiosta pop nostaisi logml:n
# 	% arvoa eniten.

# 	global COUNTS;      global SUMCOUNTS;
# 	ninds = length(inds);
# 	apuTaulu = [inds, zeros(ninds,1)];

# 	for i=1:ninds
# 		ind = inds(i);
# 		rows = (ind-1)*rowsFromInd+1 : ind*rowsFromInd;
# 		diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data);
# 		diffInSumCounts = sum(diffInCounts);

# 		COUNTS(:,:,pop) = COUNTS(:,:,pop)-diffInCounts;
# 		SUMCOUNTS(pop,:) = SUMCOUNTS(pop,:)-diffInSumCounts;
# 		apuTaulu(i, 2) = computePopulationLogml(pop, adjprior, priorTerm);
# 		COUNTS(:,:,pop) = COUNTS(:,:,pop)+diffInCounts;
# 		SUMCOUNTS(pop,:) = SUMCOUNTS(pop,:)+diffInSumCounts;
# 	end
# 	apuTaulu = sortrows(apuTaulu,2);
# 	inds = apuTaulu(ninds:-1:1,1);

# 	%------------------------------------------------------------------------------------


# 	function [muutokset, diffInCounts] = ...
# 		laskeMuutokset(ind, rowsFromInd, data, adjprior, priorTerm)
# 	% Palauttaa npops*1 taulun, jossa i:s alkio kertoo, mik?olisi
# 	% muutos logml:ss? mik<69>li yksil?ind siirret<65><74>n koriin i.
# 	% diffInCounts on poistettava COUNTS:in siivusta i1 ja lis<69>tt<74>v?
# 	% COUNTS:in siivuun i2, mik<69>li muutos toteutetaan.

# 	global COUNTS;      global SUMCOUNTS;
# 	global PARTITION;   global POP_LOGML;
# 	npops = size(COUNTS,3);
# 	muutokset = zeros(npops,1);

# 	i1 = PARTITION(ind);
# 	i1_logml = POP_LOGML(i1);

# 	rows = (ind-1)*rowsFromInd+1 : ind*rowsFromInd;
# 	diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data);
# 	diffInSumCounts = sum(diffInCounts);

# 	COUNTS(:,:,i1) = COUNTS(:,:,i1)-diffInCounts;
# 	SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)-diffInSumCounts;
# 	new_i1_logml = computePopulationLogml(i1, adjprior, priorTerm);
# 	COUNTS(:,:,i1) = COUNTS(:,:,i1)+diffInCounts;
# 	SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)+diffInSumCounts;

# 	i2 = [1:i1-1 , i1+1:npops];
# 	i2_logml = POP_LOGML(i2);

# 	COUNTS(:,:,i2) = COUNTS(:,:,i2)+repmat(diffInCounts, [1 1 npops-1]);
# 	SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)+repmat(diffInSumCounts,[npops-1 1]);
# 	new_i2_logml = computePopulationLogml(i2, adjprior, priorTerm);
# 	COUNTS(:,:,i2) = COUNTS(:,:,i2)-repmat(diffInCounts, [1 1 npops-1]);
# 	SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)-repmat(diffInSumCounts,[npops-1 1]);

# 	muutokset(i2) = new_i1_logml - i1_logml ...
# 		+ new_i2_logml - i2_logml;


# 	%------------------------------------------------------------------------------------


# 	function [muutokset, diffInCounts] = laskeMuutokset2( ...
# 		i1, rowsFromInd, data, adjprior, priorTerm);
# 	% Palauttaa npops*1 taulun, jossa i:s alkio kertoo, mik?olisi
# 	% muutos logml:ss? mik<69>li korin i1 kaikki yksil<69>t siirret<65><74>n
# 	% koriin i.

# 	global COUNTS;      global SUMCOUNTS;
# 	global PARTITION;   global POP_LOGML;
# 	npops = size(COUNTS,3);
# 	muutokset = zeros(npops,1);

# 	i1_logml = POP_LOGML(i1);

# 	inds = find(PARTITION==i1);
# 	ninds = length(inds);

# 	if ninds==0
# 		diffInCounts = zeros(size(COUNTS,1), size(COUNTS,2));
# 		return;
# 	end

# 	rows = computeRows(rowsFromInd, inds, ninds);

# 	diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data);
# 	diffInSumCounts = sum(diffInCounts);

# 	COUNTS(:,:,i1) = COUNTS(:,:,i1)-diffInCounts;
# 	SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)-diffInSumCounts;
# 	new_i1_logml = computePopulationLogml(i1, adjprior, priorTerm);
# 	COUNTS(:,:,i1) = COUNTS(:,:,i1)+diffInCounts;
# 	SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)+diffInSumCounts;

# 	i2 = [1:i1-1 , i1+1:npops];
# 	i2_logml = POP_LOGML(i2);

# 	COUNTS(:,:,i2) = COUNTS(:,:,i2)+repmat(diffInCounts, [1 1 npops-1]);
# 	SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)+repmat(diffInSumCounts,[npops-1 1]);
# 	new_i2_logml = computePopulationLogml(i2, adjprior, priorTerm);
# 	COUNTS(:,:,i2) = COUNTS(:,:,i2)-repmat(diffInCounts, [1 1 npops-1]);
# 	SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)-repmat(diffInSumCounts,[npops-1 1]);

# 	muutokset(i2) = new_i1_logml - i1_logml ...
# 		+ new_i2_logml - i2_logml;


# 	%------------------------------------------------------------------------------------


# 	function muutokset = laskeMuutokset3(T2, inds2, rowsFromInd, ...
# 		data, adjprior, priorTerm, i1)
# 	% Palauttaa length(unique(T2))*npops taulun, jossa (i,j):s alkio
# 	% kertoo, mik?olisi muutos logml:ss? jos populaation i1 osapopulaatio
# 	% inds2(find(T2==i)) siirret<65><74>n koriin j.

# 	global COUNTS;      global SUMCOUNTS;
# 	global PARTITION;   global POP_LOGML;
# 	npops = size(COUNTS,3);
# 	npops2 = length(unique(T2));
# 	muutokset = zeros(npops2, npops);

# 	i1_logml = POP_LOGML(i1);

# 	for pop2 = 1:npops2
# 		inds = inds2(find(T2==pop2));
# 		ninds = length(inds);
# 		if ninds>0
# 			rows = computeRows(rowsFromInd, inds, ninds);
# 			diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data);
# 			diffInSumCounts = sum(diffInCounts);

# 			COUNTS(:,:,i1) = COUNTS(:,:,i1)-diffInCounts;
# 			SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)-diffInSumCounts;
# 			new_i1_logml = computePopulationLogml(i1, adjprior, priorTerm);
# 			COUNTS(:,:,i1) = COUNTS(:,:,i1)+diffInCounts;
# 			SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)+diffInSumCounts;

# 			i2 = [1:i1-1 , i1+1:npops];
# 			i2_logml = POP_LOGML(i2)';

# 			COUNTS(:,:,i2) = COUNTS(:,:,i2)+repmat(diffInCounts, [1 1 npops-1]);
# 			SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)+repmat(diffInSumCounts,[npops-1 1]);
# 			new_i2_logml = computePopulationLogml(i2, adjprior, priorTerm)';
# 			COUNTS(:,:,i2) = COUNTS(:,:,i2)-repmat(diffInCounts, [1 1 npops-1]);
# 			SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)-repmat(diffInSumCounts,[npops-1 1]);

# 			muutokset(pop2,i2) = new_i1_logml - i1_logml ...
# 				+ new_i2_logml - i2_logml;
# 		end
# 	end


# 	%------------------------------------------------------------------------------------

# 	function muutokset = laskeMuutokset5(inds, rowsFromInd, data, adjprior, ...
# 		priorTerm, i1, i2)

# 	% Palauttaa length(inds)*1 taulun, jossa i:s alkio kertoo, mik?olisi
# 	% muutos logml:ss? mik<69>li yksil?i vaihtaisi koria i1:n ja i2:n v<>lill?

# 	global COUNTS;      global SUMCOUNTS;
# 	global PARTITION;   global POP_LOGML;

# 	ninds = length(inds);
# 	muutokset = zeros(ninds,1);

# 	i1_logml = POP_LOGML(i1);
# 	i2_logml = POP_LOGML(i2);

# 	for i = 1:ninds
# 		ind = inds(i);
# 		if PARTITION(ind)==i1
# 			pop1 = i1;  %mist?
# 			pop2 = i2;  %mihin
# 		else
# 			pop1 = i2;
# 			pop2 = i1;
# 		end
# 		rows = (ind-1)*rowsFromInd+1 : ind*rowsFromInd;
# 		diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data);
# 		diffInSumCounts = sum(diffInCounts);

# 		COUNTS(:,:,pop1) = COUNTS(:,:,pop1)-diffInCounts;
# 		SUMCOUNTS(pop1,:) = SUMCOUNTS(pop1,:)-diffInSumCounts;
# 		COUNTS(:,:,pop2) = COUNTS(:,:,pop2)+diffInCounts;
# 		SUMCOUNTS(pop2,:) = SUMCOUNTS(pop2,:)+diffInSumCounts;

# 		PARTITION(ind) = pop2;

# 		new_logmls = computePopulationLogml([i1 i2], adjprior, priorTerm);

# 		muutokset(i) = sum(new_logmls);

# 		COUNTS(:,:,pop1) = COUNTS(:,:,pop1)+diffInCounts;
# 		SUMCOUNTS(pop1,:) = SUMCOUNTS(pop1,:)+diffInSumCounts;
# 		COUNTS(:,:,pop2) = COUNTS(:,:,pop2)-diffInCounts;
# 		SUMCOUNTS(pop2,:) = SUMCOUNTS(pop2,:)-diffInSumCounts;

# 		PARTITION(ind) = pop1;
# 	end

# 	muutokset = muutokset - i1_logml - i2_logml;

# 	%--------------------------------------------------------------------------


# 	function diffInCounts = computeDiffInCounts(rows, max_noalle, nloci, data)
# 	% Muodostaa max_noalle*nloci taulukon, jossa on niiden alleelien
# 	% lukum<75><6D>r<EFBFBD>t (vastaavasti kuin COUNTS:issa), jotka ovat data:n
# 	% riveill?rows.

# 	diffInCounts = zeros(max_noalle, nloci);
# 	for i=rows
# 		row = data(i,:);
# 		notEmpty = find(row>=0);

# 		if length(notEmpty)>0
# 			diffInCounts(row(notEmpty) + (notEmpty-1)*max_noalle) = ...
# 				diffInCounts(row(notEmpty) + (notEmpty-1)*max_noalle) + 1;
# 		end
# 	end


# 	%------------------------------------------------------------------------------------


# 	function popLogml = computePopulationLogml(pops, adjprior, priorTerm)
# 	% Palauttaa length(pops)*1 taulukon, jossa on laskettu korikohtaiset
# 	% logml:t koreille, jotka on m<><6D>ritelty pops-muuttujalla.

# 	global COUNTS;
# 	global SUMCOUNTS;
# 	x = size(COUNTS,1);
# 	y = size(COUNTS,2);
# 	z = length(pops);

# 	popLogml = ...
# 		squeeze(sum(sum(reshape(...
# 		gammaln(repmat(adjprior,[1 1 length(pops)]) + COUNTS(:,:,pops)) ...
# 		,[x y z]),1),2)) - sum(gammaln(1+SUMCOUNTS(pops,:)),2) - priorTerm;


# 	%-----------------------------------------------------------------------------------


# 	function npops = poistaTyhjatPopulaatiot(npops)
# 	% Poistaa tyhjentyneet populaatiot COUNTS:ista ja
# 	% SUMCOUNTS:ista. P<>ivitt<74><74> npops:in ja PARTITION:in.

# 	global COUNTS;
# 	global SUMCOUNTS;
# 	global PARTITION;

# 	notEmpty = find(any(SUMCOUNTS,2));
# 	COUNTS = COUNTS(:,:,notEmpty);
# 	SUMCOUNTS = SUMCOUNTS(notEmpty,:);

# 	for n=1:length(notEmpty)
# 		apu = find(PARTITION==notEmpty(n));
# 		PARTITION(apu)=n;
# 	end
# 	npops = length(notEmpty);


# 	%----------------------------------------------------------------------------------
# 	%Seuraavat kolme funktiota liittyvat alkupartition muodostamiseen.

# 	function initial_partition=admixture_initialization(data_matrix,nclusters,Z)
# 	size_data=size(data_matrix);
# 	nloci=size_data(2)-1;
# 	n=max(data_matrix(:,end));
# 	T=cluster_own(Z,nclusters);
# 	initial_partition=zeros(size_data(1),1);
# 	for i=1:n
# 		kori=T(i);
# 		here=find(data_matrix(:,end)==i);
# 		for j=1:length(here)
# 			initial_partition(here(j),1)=kori;
# 		end
# 	end

# 	function T = cluster_own(Z,nclust)
# 	true=logical(1);
# 	false=logical(0);
# 	maxclust = nclust;
# 	% Start of algorithm
# 	m = size(Z,1)+1;
# 	T = zeros(m,1);
# 	   % maximum number of clusters based on inconsistency
# 	   if m <= maxclust
# 		  T = (1:m)';
# 	   elseif maxclust==1
# 		  T = ones(m,1);
# 	   else
# 		  clsnum = 1;
# 		  for k = (m-maxclust+1):(m-1)
# 			 i = Z(k,1); % left tree
# 			 if i <= m % original node, no leafs
# 				T(i) = clsnum;
# 				clsnum = clsnum + 1;
# 			 elseif i < (2*m-maxclust+1) % created before cutoff, search down the tree
# 				T = clusternum(Z, T, i-m, clsnum);
# 				clsnum = clsnum + 1;
# 			 end
# 			 i = Z(k,2); % right tree
# 			 if i <= m  % original node, no leafs
# 				T(i) = clsnum;
# 				clsnum = clsnum + 1;
# 			 elseif i < (2*m-maxclust+1) % created before cutoff, search down the tree
# 				T = clusternum(Z, T, i-m, clsnum);
# 				clsnum = clsnum + 1;
# 			 end
# 		  end
# 	   end

# 	function T = clusternum(X, T, k, c)
# 	m = size(X,1)+1;
# 	while(~isempty(k))
# 	   % Get the children of nodes at this level
# 	   children = X(k,1:2);
# 	   children = children(:);

# 	   % Assign this node number to leaf children
# 	   t = (children<=m);
# 	   T(children(t)) = c;

# 	   % Move to next level
# 	   k = children(~t) - m;
# 	end


# 	%---------------------------------------------------------------------------------------

# 	function [Z, dist] = newGetDistances(data, rowsFromInd)

# 	ninds = max(data(:,end));
# 	nloci = size(data,2)-1;
# 	riviLkm = nchoosek(ninds,2);

# 	empties = find(data<0);
# 	data(empties)=0;
# 	data = uint8(data);   % max(noalle) oltava <256

# 	pariTaulu = zeros(riviLkm,2);
# 	aPointer=1;
# 	for a=1:ninds-1
# 		pariTaulu(aPointer:aPointer+ninds-1-a,1) = ones(ninds-a,1)*a;
# 		pariTaulu(aPointer:aPointer+ninds-1-a,2) = (a+1:ninds)';
# 		aPointer = aPointer+ninds-a;
# 	end

# 	eka = pariTaulu(:,ones(1,rowsFromInd));
# 	eka = eka * rowsFromInd;
# 	miinus = repmat(rowsFromInd-1 : -1 : 0, [riviLkm 1]);
# 	eka = eka - miinus;

# 	toka = pariTaulu(:,ones(1,rowsFromInd)*2);
# 	toka = toka * rowsFromInd;
# 	toka = toka - miinus;

# 	%eka = uint16(eka);
# 	%toka = uint16(toka);

# 	summa = zeros(riviLkm,1);
# 	vertailuja = zeros(riviLkm,1);

# 	clear pariTaulu; clear miinus;

# 	x = zeros(size(eka));    x = uint8(x);
# 	y = zeros(size(toka));   y = uint8(y);

# 	for j=1:nloci;

# 		for k=1:rowsFromInd
# 			x(:,k) = data(eka(:,k),j);
# 			y(:,k) = data(toka(:,k),j);
# 		end

# 		for a=1:rowsFromInd
# 			for b=1:rowsFromInd
# 				vertailutNyt = double(x(:,a)>0 & y(:,b)>0);
# 				vertailuja = vertailuja + vertailutNyt;
# 				lisays = (x(:,a)~=y(:,b) & vertailutNyt);
# 				summa = summa+double(lisays);
# 			end
# 		end
# 	end

# 	clear x;    clear y;   clear vertailutNyt;
# 	nollat = find(vertailuja==0);
# 	dist = zeros(length(vertailuja),1);
# 	dist(nollat) = 1;
# 	muut = find(vertailuja>0);
# 	dist(muut) = summa(muut)./vertailuja(muut);
# 	clear summa; clear vertailuja;

# 	Z = linkage(dist');


# 	%----------------------------------------------------------------------------------------


# 	function [Z, distances]=getDistances(data_matrix,nclusters)

# 	%finds initial admixture clustering solution with nclusters clusters, uses simple mean Hamming distance
# 	%gives partition in 8-bit format
# 	%allocates all alleles of a single individual into the same basket
# 	%data_matrix contains #Loci+1 columns, last column indicate whose alleles are placed in each row,
# 	%i.e. ranges from 1 to #individuals. For diploids there are 2 rows per individual, for haploids only a single row
# 	%missing values are indicated by zeros in the partition and by negative integers in the data_matrix.

# 	size_data=size(data_matrix);
# 	nloci=size_data(2)-1;
# 	n=max(data_matrix(:,end));
# 	distances=zeros(nchoosek(n,2),1);
# 	pointer=1;
# 	for i=1:n-1
# 		i_data=data_matrix(find(data_matrix(:,end)==i),1:nloci);
# 		for j=i+1:n
# 			d_ij=0;
# 			j_data=data_matrix(find(data_matrix(:,end)==j),1:nloci);
# 			vertailuja = 0;
# 			for k=1:size(i_data,1)
# 				for l=1:size(j_data,1)
# 					here_i=find(i_data(k,:)>=0);
# 					here_j=find(j_data(l,:)>=0);
# 					here_joint=intersect(here_i,here_j);
# 					vertailuja = vertailuja + length(here_joint);
# 					d_ij = d_ij + length(find(i_data(k,here_joint)~=j_data(l,here_joint)));
# 				end
# 			end
# 			d_ij = d_ij / vertailuja;
# 			distances(pointer)=d_ij;
# 			pointer=pointer+1;
# 		end
# 	end

# 	Z=linkage(distances');


# 	%----------------------------------------------------------------------------------------


# 	function Z = linkage(Y, method)
# 	[k, n] = size(Y);
# 	m = (1+sqrt(1+8*n))/2;
# 	if k ~= 1 | m ~= fix(m)
# 	  error('The first input has to match the output of the PDIST function in size.');
# 	end
# 	if nargin == 1 % set default switch to be 'co'
# 	   method = 'co';
# 	end
# 	method = lower(method(1:2)); % simplify the switch string.
# 	monotonic = 1;
# 	Z = zeros(m-1,3); % allocate the output matrix.
# 	N = zeros(1,2*m-1);
# 	N(1:m) = 1;
# 	n = m; % since m is changing, we need to save m in n.
# 	R = 1:n;
# 	for s = 1:(n-1)
# 	   X = Y;
# 	   [v, k] = min(X);
# 	   i = floor(m+1/2-sqrt(m^2-m+1/4-2*(k-1)));
# 	   j = k - (i-1)*(m-i/2)+i;
# 	   Z(s,:) = [R(i) R(j) v]; % update one more row to the output matrix A
# 	   I1 = 1:(i-1); I2 = (i+1):(j-1); I3 = (j+1):m; % these are temp variables.
# 	   U = [I1 I2 I3];
# 	   I = [I1.*(m-(I1+1)/2)-m+i i*(m-(i+1)/2)-m+I2 i*(m-(i+1)/2)-m+I3];
# 	   J = [I1.*(m-(I1+1)/2)-m+j I2.*(m-(I2+1)/2)-m+j j*(m-(j+1)/2)-m+I3];

# 	   switch method
# 	   case 'si' %single linkage
# 		  Y(I) = min(Y(I),Y(J));
# 	   case 'av' % average linkage
# 		  Y(I) = Y(I) + Y(J);
# 	   case 'co' %complete linkage
# 		  Y(I) = max(Y(I),Y(J));
# 	   case 'ce' % centroid linkage
# 		  K = N(R(i))+N(R(j));
# 		  Y(I) = (N(R(i)).*Y(I)+N(R(j)).*Y(J)-(N(R(i)).*N(R(j))*v^2)./K)./K;
# 	   case 'wa'
# 		  Y(I) = ((N(R(U))+N(R(i))).*Y(I) + (N(R(U))+N(R(j))).*Y(J) - ...
# 		  N(R(U))*v)./(N(R(i))+N(R(j))+N(R(U)));
# 	   end
# 	   J = [J i*(m-(i+1)/2)-m+j];
# 	   Y(J) = []; % no need for the cluster information about j.

# 	   % update m, N, R
# 	   m = m-1;
# 	   N(n+s) = N(R(i)) + N(R(j));
# 	   R(i) = n+s;
# 	   R(j:(n-1))=R((j+1):n);
# 	end


# 	%-----------------------------------------------------------------------------------


# 	function popnames = initPopNames(nameFile, indexFile)
# 	%Palauttaa tyhj<68>n, mik<69>li nimitiedosto ja indeksitiedosto
# 	% eiv<69>t olleet yht?pitki?

# 	popnames = [];
# 	indices = load(indexFile);

# 	fid = fopen(nameFile);
# 	if fid == -1
# 		%File didn't exist
# 		msgbox('Loading of the population names was unsuccessful', ...
# 			'Error', 'error');
# 		return;
# 	end;
# 	line = fgetl(fid);
# 	counter = 1;
# 	while (line ~= -1) & ~isempty(line)
# 		names{counter} = line;
# 		line = fgetl(fid);
# 		counter = counter + 1;
# 	end;
# 	fclose(fid);

# 	if length(names) ~= length(indices)
# 		disp('The number of population names must be equal to the number of ');
# 		disp('entries in the file specifying indices of the first individuals of ');
# 		disp('each population.');
# 		return;
# 	end

# 	popnames = cell(length(names), 2);
# 	for i = 1:length(names)
# 		popnames{i,1} = names(i);
# 		popnames{i,2} = indices(i);
# 	end


# 	%-----------------------------------------------------------------------------------


# 	%--------------------------------------------------------------------------

# 	function logml = ...
# 		initialCounts(partition, data, npops, rows, noalle, adjprior)

# 	nloci=size(data,2);
# 	ninds = size(rows, 1);

# 	koot = rows(:,1) - rows(:,2) + 1;
# 	maxSize = max(koot);

# 	counts = zeros(max(noalle),nloci,npops);
# 	sumcounts = zeros(npops,nloci);
# 	for i=1:npops
# 		for j=1:nloci
# 			havainnotLokuksessa = find(partition==i & data(:,j)>=0);
# 			sumcounts(i,j) = length(havainnotLokuksessa);
# 			for k=1:noalle(j)
# 				alleleCode = k;
# 				N_ijk = length(find(data(havainnotLokuksessa,j)==alleleCode));
# 				counts(k,j,i) = N_ijk;
# 			end
# 		end
# 	end

# 	%initializeGammaln(ninds, maxSize, max(noalle));

# 	logml = laskeLoggis(counts, sumcounts, adjprior);


# 	%-----------------------------------------------------------------------


# 	function logml=computeLogml(counts, sumcounts, noalle, data, rowsFromInd)
# 	nloci = size(counts,2);
# 	npops = size(counts,3);
# 	adjnoalle = zeros(max(noalle),nloci);
# 	for j=1:nloci
# 		adjnoalle(1:noalle(j),j)=noalle(j);
# 		if (noalle(j)<max(noalle))
# 			adjnoalle(noalle(j)+1:end,j)=1;
# 		end
# 	end

# 	%logml2 = sum(sum(sum(gammaln(counts+repmat(adjprior,[1 1 npops]))))) ...
# 	%    - npops*sum(sum(gammaln(adjprior))) - ...
# 	%    sum(sum(gammaln(1+sumcounts)));
# 	%logml = logml2;

# 	global GAMMA_LN;
# 	rowsInG = size(data,1)+rowsFromInd;

# 	logml = sum(sum(sum(GAMMA_LN(counts+1 + repmat(rowsInG*(adjnoalle-1),[1 1 npops]))))) ...
# 		- npops*sum(sum(GAMMA_LN(1, adjnoalle))) ...
# 		-sum(sum(GAMMA_LN(sumcounts+1,1)));


# 	%--------------------------------------------------------------------------


# 	function initializeGammaln(ninds, rowsFromInd, maxAlleles)
# 	%Alustaa GAMMALN muuttujan s.e. GAMMALN(i,j)=gammaln((i-1) + 1/j)
# 	global GAMMA_LN;
# 	GAMMA_LN = zeros((1+ninds)*rowsFromInd, maxAlleles);
# 	for i=1:(ninds+1)*rowsFromInd
# 		for j=1:maxAlleles
# 			GAMMA_LN(i,j)=gammaln((i-1) + 1/j);
# 		end
# 	end

# 	%---------------------------------------------------------------


# 	function dispLine;
# 	disp('---------------------------------------------------');

# 	%--------------------------------------------------------------

# 	function num2 = omaRound(num)
# 	% Py<50>rist<73><74> luvun num 1 desimaalin tarkkuuteen
# 	num = num*10;
# 	num = round(num);
# 	num2 = num/10;

# 	%--------------------------------------------------------------------

# 	function dist2 = laskeOsaDist(inds2, dist, ninds)
# 	% Muodostaa dist vektorista osavektorin, joka sis<69>lt<6C><74> yksil<69>iden inds2
# 	% v<>liset et<65>isyydet. ninds=kaikkien yksil<69>iden lukum<75><6D>r?

# 	ninds2 = length(inds2);
# 	apu = zeros(nchoosek(ninds2,2),2);
# 	rivi = 1;
# 	for i=1:ninds2-1
# 		for j=i+1:ninds2
# 			apu(rivi, 1) = inds2(i);
# 			apu(rivi, 2) = inds2(j);
# 			rivi = rivi+1;
# 		end
# 	end
# 	apu = (apu(:,1)-1).*ninds - apu(:,1) ./ 2 .* (apu(:,1)-1) + (apu(:,2)-apu(:,1));
# 	dist2 = dist(apu);

# 	%--------------------------------------------------------------------------

# 	function [emptyPop, pops] = findEmptyPop(npops)
# 	% Palauttaa ensimm<6D>isen tyhj<68>n populaation indeksin. Jos tyhji?
# 	% populaatioita ei ole, palauttaa -1:n.

# 	global PARTITION;
# 	pops = unique(PARTITION)';
# 	if (length(pops) ==npops)
# 		emptyPop = -1;
# 	else
# 		popDiff = diff([0 pops npops+1]);
# 		emptyPop = min(find(popDiff > 1));
# 	end

# 	%------------------------------------------------------

# 	function loggis = laskeLoggis(counts, sumcounts, adjprior)
# 	npops = size(counts,3);

# 	logml2 = sum(sum(sum(gammaln(counts+repmat(adjprior,[1 1 npops]))))) ...
# 		- npops*sum(sum(gammaln(adjprior))) - ...
# 		sum(sum(gammaln(1+sumcounts)));
# 	loggis = logml2;