2020-05-20 15:34:40 +02:00
|
|
|
|
#' @title Clustering of individuals
|
2020-06-24 11:48:23 +02:00
|
|
|
|
#' @param tietue File
|
2020-06-08 13:42:36 +02:00
|
|
|
|
#' @param format Format of the data ("BAPS", "GenePop" or "Preprocessed")
|
|
|
|
|
|
#' @param savePreProcessed Save the pre-processed data?
|
2020-06-24 11:48:23 +02:00
|
|
|
|
#' @param filePreProcessed Is the file already processed?
|
|
|
|
|
|
#' @importFrom utils read.delim
|
2020-05-20 15:34:40 +02:00
|
|
|
|
#' @export
|
2020-06-08 13:42:36 +02:00
|
|
|
|
greedyMix <- function(
|
|
|
|
|
|
tietue,
|
|
|
|
|
|
format = NULL,
|
|
|
|
|
|
savePreProcessed = NULL,
|
|
|
|
|
|
filePreProcessed = NULL
|
|
|
|
|
|
) {
|
2020-05-20 15:34:40 +02:00
|
|
|
|
# ASK: graphical components. Remove?
|
|
|
|
|
|
# check whether fixed k mode is selected
|
|
|
|
|
|
# h0 <- findobj('Tag','fixk_menu')
|
|
|
|
|
|
# fixedK = get(h0, 'userdata');
|
|
|
|
|
|
|
|
|
|
|
|
# if fixedK
|
|
|
|
|
|
# if ~(fixKWarning == 1) % call function fixKWarning
|
|
|
|
|
|
# return
|
|
|
|
|
|
# end
|
|
|
|
|
|
# end
|
|
|
|
|
|
|
|
|
|
|
|
# % check whether partition compare mode is selected
|
|
|
|
|
|
# h1 = findobj('Tag','partitioncompare_menu');
|
|
|
|
|
|
# partitionCompare = get(h1, 'userdata');
|
|
|
|
|
|
|
2020-06-08 13:42:36 +02:00
|
|
|
|
if (is(tietue, "list") | is(tietue, "character")) {
|
|
|
|
|
|
# ----------------------------------------------------------------------
|
|
|
|
|
|
# Defining type of file
|
|
|
|
|
|
# ----------------------------------------------------------------------
|
|
|
|
|
|
if (is.null(format)) {
|
|
|
|
|
|
input_type <- inputdlg(
|
|
|
|
|
|
paste(
|
|
|
|
|
|
'Specify the format of your data:\n',
|
|
|
|
|
|
'1) BAPS-format\n',
|
|
|
|
|
|
'2) GenePop-format\n',
|
|
|
|
|
|
'3) Preprocessed data\n'
|
|
|
|
|
|
)
|
2020-05-20 15:34:40 +02:00
|
|
|
|
)
|
2020-06-08 13:42:36 +02:00
|
|
|
|
# Converting from number into name
|
|
|
|
|
|
input_type_name <- switch(
|
|
|
|
|
|
input_type, 'BAPS-format', 'GenePop-format', 'Preprocessed data'
|
|
|
|
|
|
)
|
|
|
|
|
|
} else {
|
|
|
|
|
|
input_type_name <- paste0(format, "-format")
|
|
|
|
|
|
}
|
2020-05-20 15:34:40 +02:00
|
|
|
|
if (length(input_type_name) == 0) {
|
|
|
|
|
|
stop('Invalid alternative')
|
|
|
|
|
|
} else if (input_type_name == 'BAPS-format') {
|
2020-06-24 12:37:38 +02:00
|
|
|
|
# ------------------------------------------------------------------
|
|
|
|
|
|
# Treating BAPS-formatted files
|
|
|
|
|
|
# ------------------------------------------------------------------
|
2020-06-08 13:42:36 +02:00
|
|
|
|
if (!is(tietue, "character")) {
|
2020-06-24 11:48:23 +02:00
|
|
|
|
pathname_filename <- uigetfile(
|
|
|
|
|
|
"*.txt", "Load data in BAPS-format"
|
|
|
|
|
|
)
|
2020-06-08 13:42:36 +02:00
|
|
|
|
} else {
|
|
|
|
|
|
pathname_filename <- tietue
|
|
|
|
|
|
}
|
2020-05-20 15:34:40 +02:00
|
|
|
|
|
|
|
|
|
|
# ASK: remove?
|
|
|
|
|
|
# if ~isempty(partitionCompare)
|
|
|
|
|
|
# fprintf(1,'Data: %s\n',[pathname filename]);
|
|
|
|
|
|
# end
|
|
|
|
|
|
|
2020-06-24 12:37:38 +02:00
|
|
|
|
data <- read.delim(pathname_filename) # TODO: discover delimiter
|
2020-06-24 11:49:16 +02:00
|
|
|
|
ninds <- testaaOnkoKunnollinenBapsData(data) # testing
|
|
|
|
|
|
if (ninds == 0) stop('Incorrect Data-file')
|
2020-05-20 15:34:40 +02:00
|
|
|
|
|
|
|
|
|
|
# ASK: remove?
|
|
|
|
|
|
# h0 = findobj('Tag','filename1_text');
|
|
|
|
|
|
# set(h0,'String',filename); clear h0;
|
|
|
|
|
|
cat(
|
|
|
|
|
|
'When using data which are in BAPS-format,',
|
|
|
|
|
|
'you can specify the sampling populations of the',
|
|
|
|
|
|
'individuals by giving two additional files:',
|
|
|
|
|
|
'one containing the names of the populations,',
|
|
|
|
|
|
'the other containing the indices of the first',
|
|
|
|
|
|
'individuals of the populations.'
|
|
|
|
|
|
)
|
|
|
|
|
|
input_pops <- inputdlg(
|
|
|
|
|
|
prompt = 'Do you wish to specify the sampling populations? [y/N]',
|
|
|
|
|
|
definput = 'N'
|
|
|
|
|
|
)
|
|
|
|
|
|
if (tolower(input_pops) %in% c('yes', 'y')) {
|
|
|
|
|
|
popfile <- uigetfile('*.txt', 'Load population names')
|
|
|
|
|
|
kysyToinen <- ifelse(popfile$name == 0, 0, 1)
|
|
|
|
|
|
if (kysyToinen == 1) {
|
|
|
|
|
|
indicesfile <- uigetfile('*.txt', 'Load population indices')
|
|
|
|
|
|
if (indicesfile == 0) {
|
|
|
|
|
|
popnames = ""
|
|
|
|
|
|
} else {
|
|
|
|
|
|
# popnames = initPopNames([namepath namefile],[indicespath indicesfile]) # TODO: translate this fun
|
|
|
|
|
|
}
|
|
|
|
|
|
} else {
|
|
|
|
|
|
popnames <- ""
|
|
|
|
|
|
}
|
|
|
|
|
|
} else {
|
|
|
|
|
|
popnames <- ""
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
# [data, rowsFromInd, alleleCodes, noalle, adjprior, priorTerm] = handleData(data); # TODO: translate this function
|
|
|
|
|
|
# [Z,dist] = newGetDistances(data,rowsFromInd); # TODO: translate
|
2020-06-08 13:42:36 +02:00
|
|
|
|
if (is.null(savePreProcessed)) {
|
|
|
|
|
|
save_preproc <- questdlg(
|
|
|
|
|
|
quest = 'Do you wish to save pre-processed data?',
|
|
|
|
|
|
dlgtitle = 'Save pre-processed data?',
|
|
|
|
|
|
defbtn = 'y'
|
|
|
|
|
|
)
|
|
|
|
|
|
} else {
|
|
|
|
|
|
save_preproc <- savePreProcessed
|
|
|
|
|
|
}
|
|
|
|
|
|
if (save_preproc %in% c('y', 'yes', TRUE)) {
|
2020-05-20 15:34:40 +02:00
|
|
|
|
file_out <- uiputfile('.rda','Save pre-processed data as')
|
|
|
|
|
|
kokonimi <- paste0(file_out$path, file_out$name)
|
|
|
|
|
|
c <- list()
|
|
|
|
|
|
c$data <- data
|
|
|
|
|
|
c$rowsFromInd <- rowsFromInd
|
|
|
|
|
|
c$alleleCodes <- alleleCodes
|
|
|
|
|
|
c$noalle <- noalle
|
|
|
|
|
|
c$adjprior <- adjprior
|
|
|
|
|
|
c$priorTerm <- priorTerm
|
|
|
|
|
|
c$dist <- dist
|
|
|
|
|
|
c$popnames <- popnames
|
|
|
|
|
|
c$Z <- Z
|
|
|
|
|
|
save(c, file = kokonimi)
|
|
|
|
|
|
rm(c)
|
|
|
|
|
|
}
|
|
|
|
|
|
} else if (input_type_name == 'GenePop-format') {
|
2020-06-24 12:37:38 +02:00
|
|
|
|
# ------------------------------------------------------------------
|
|
|
|
|
|
# Treating GenePop-formatted files
|
|
|
|
|
|
# ------------------------------------------------------------------
|
2020-06-08 13:42:36 +02:00
|
|
|
|
if (!is(tietue, "character")) {
|
|
|
|
|
|
filename_pathname <- uigetfile(
|
|
|
|
|
|
filter = '*.txt',
|
|
|
|
|
|
title = 'Load data in GenePop-format'
|
|
|
|
|
|
)
|
|
|
|
|
|
if (filename_pathname$name == 0) stop("No name provided")
|
|
|
|
|
|
} else {
|
|
|
|
|
|
filename_pathname <- tietue
|
|
|
|
|
|
}
|
2020-05-20 15:34:40 +02:00
|
|
|
|
|
|
|
|
|
|
# ASK: remove?
|
|
|
|
|
|
# if ~isempty(partitionCompare)
|
|
|
|
|
|
# fprintf(1,'Data: %s\n',[pathname filename]);
|
|
|
|
|
|
# end
|
|
|
|
|
|
|
2020-07-28 11:45:28 +02:00
|
|
|
|
kunnossa <- testaaGenePopData(filename_pathname)
|
|
|
|
|
|
if (kunnossa == 0) stop("testaaGenePopData returned 0")
|
2020-07-28 15:51:28 +02:00
|
|
|
|
data_popnames <- lueGenePopData(filename_pathname)
|
|
|
|
|
|
data <- data_popnames$data
|
|
|
|
|
|
popnames <- data_popnames$popnames
|
2020-05-20 15:34:40 +02:00
|
|
|
|
|
|
|
|
|
|
# h0 = findobj('Tag','filename1_text');
|
|
|
|
|
|
# set(h0,'String',filename); clear h0;
|
|
|
|
|
|
|
2020-07-28 15:51:28 +02:00
|
|
|
|
browser()#TEMP
|
|
|
|
|
|
list_dranap <- handleData(data) # FIXME: debug
|
|
|
|
|
|
data <- list_dranap$newData
|
|
|
|
|
|
rowsFromInd <- list_dranap$rowsFromInd
|
|
|
|
|
|
alleleCodes <- list_dranap$alleleCodes
|
|
|
|
|
|
noalle <- list_dranap$noalle
|
|
|
|
|
|
adjprior <- list_dranap$adjprior
|
|
|
|
|
|
priorTerm <- list_dranap$prioterm
|
|
|
|
|
|
|
|
|
|
|
|
list_Zd <- newGetDistances(data,rowsFromInd) # FIXME: debug
|
2020-07-31 14:00:29 +02:00
|
|
|
|
Z <- list_Zd$Z
|
|
|
|
|
|
dist <- list_Zd$dist
|
2020-07-28 15:51:28 +02:00
|
|
|
|
|
2020-06-08 13:42:36 +02:00
|
|
|
|
if (is.null(savePreProcessed)) {
|
|
|
|
|
|
save_preproc <- questdlg(
|
|
|
|
|
|
quest = 'Do you wish to save pre-processed data?',
|
|
|
|
|
|
dlgtitle = 'Save pre-processed data?',
|
|
|
|
|
|
defbtn = 'y'
|
|
|
|
|
|
)
|
|
|
|
|
|
} else {
|
|
|
|
|
|
save_preproc <- savePreProcessed
|
|
|
|
|
|
}
|
|
|
|
|
|
if (save_preproc %in% c('y', 'Yes', TRUE)) {
|
2020-05-20 15:34:40 +02:00
|
|
|
|
file_out <- uiputfile('.rda','Save pre-processed data as')
|
|
|
|
|
|
kokonimi <- paste0(file_out$path, file_out$name)
|
2020-06-08 13:42:36 +02:00
|
|
|
|
# FIXME: translate functions above so the objects below exist
|
2020-05-20 15:34:40 +02:00
|
|
|
|
c$data <- data
|
|
|
|
|
|
c$rowsFromInd <- rowsFromInd
|
|
|
|
|
|
c$alleleCodes <- alleleCodes
|
|
|
|
|
|
c$noalle <- noalle
|
|
|
|
|
|
c$adjprior <- adjprior
|
|
|
|
|
|
c$priorTerm <- priorTerm
|
|
|
|
|
|
c$dist <- dist
|
|
|
|
|
|
c$popnames <- popnames
|
|
|
|
|
|
c$Z <- Z
|
|
|
|
|
|
save(c, file = kokonimi)
|
|
|
|
|
|
rm(c)
|
|
|
|
|
|
}
|
|
|
|
|
|
} else if (input_type_name == 'Preprocessed data') {
|
2020-06-24 12:37:38 +02:00
|
|
|
|
# ------------------------------------------------------------------
|
|
|
|
|
|
# Handling Pre-processed data
|
|
|
|
|
|
# ------------------------------------------------------------------
|
2020-05-20 15:34:40 +02:00
|
|
|
|
file_in <- uigetfile(
|
|
|
|
|
|
filter = '*.txt',
|
|
|
|
|
|
title = 'Load pre-processed data in GenePop-format'
|
|
|
|
|
|
)
|
|
|
|
|
|
if (file_in$name == 0) stop("No name provided")
|
|
|
|
|
|
|
|
|
|
|
|
# ASK: remove?
|
|
|
|
|
|
# h0 = findobj('Tag','filename1_text');
|
|
|
|
|
|
# set(h0,'String',filename); clear h0;
|
|
|
|
|
|
# if ~isempty(partitionCompare)
|
|
|
|
|
|
# fprintf(1,'Data: %s\n',[pathname filename]);
|
|
|
|
|
|
# end
|
|
|
|
|
|
|
|
|
|
|
|
struct_array <- readRDS(paste0(file_in$path, file_in$name))
|
|
|
|
|
|
if (isfield(struct_array,'c')) { # Matlab versio
|
|
|
|
|
|
c <- struct_array$c
|
|
|
|
|
|
if (!isfield(c,'dist')) stop('Incorrect file format')
|
|
|
|
|
|
} else if (isfield(struct_array,'dist')) { #Mideva versio
|
|
|
|
|
|
c <- struct_array
|
|
|
|
|
|
} else {
|
|
|
|
|
|
stop('Incorrect file format')
|
|
|
|
|
|
}
|
|
|
|
|
|
data <- double(c$data)
|
|
|
|
|
|
rowsFromInd <- c$rowsFromInd
|
|
|
|
|
|
alleleCodes <- c$alleleCodes
|
|
|
|
|
|
noalle <- c$noalle
|
|
|
|
|
|
adjprior <- c$adjprior
|
|
|
|
|
|
priorTerm <- c$priorTerm
|
|
|
|
|
|
dist <- c$dist
|
|
|
|
|
|
popnames <- c$popnames
|
|
|
|
|
|
Z <- c$Z
|
|
|
|
|
|
rm(c)
|
|
|
|
|
|
}
|
|
|
|
|
|
} else {
|
|
|
|
|
|
data <- double(tietue$data)
|
|
|
|
|
|
rowsFromInd <- tietue$rowsFromInd
|
|
|
|
|
|
alleleCodes <- tietue$alleleCodes
|
|
|
|
|
|
noalle <- tietue$noalle
|
|
|
|
|
|
adjprior <- tietue$adjprior
|
|
|
|
|
|
priorTerm <- tietue$priorTerm
|
|
|
|
|
|
dist <- tietue$dist
|
|
|
|
|
|
popnames <- tietue$popnames
|
|
|
|
|
|
Z <- tietue$Z
|
|
|
|
|
|
rm(tietue)
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
# ==========================================================================
|
|
|
|
|
|
# Declaring global variables
|
|
|
|
|
|
# ==========================================================================
|
|
|
|
|
|
PARTITION <- vector()
|
|
|
|
|
|
COUNTS <- vector()
|
|
|
|
|
|
SUMCOUNTS <- vector()
|
|
|
|
|
|
POP_LOGML <- vector()
|
|
|
|
|
|
clearGlobalVars <- vector()
|
|
|
|
|
|
# ==========================================================================
|
|
|
|
|
|
|
|
|
|
|
|
c$data <- data
|
|
|
|
|
|
c$noalle <- noalle
|
|
|
|
|
|
c$adjprior <- adjprior
|
|
|
|
|
|
c$priorTerm <- priorTerm
|
|
|
|
|
|
c$dist <- dist
|
|
|
|
|
|
c$Z <- Z
|
|
|
|
|
|
c$rowsFromInd <- rowsFromInd
|
|
|
|
|
|
|
|
|
|
|
|
ninds <- length(unique(data[, ncol(data)]))
|
|
|
|
|
|
ekat <- t(seq(1, ninds, rowsFromInd) * rowsFromInd)
|
|
|
|
|
|
c$rows <- c(ekat, ekat + rowsFromInd - 1)
|
|
|
|
|
|
|
|
|
|
|
|
# partition compare
|
|
|
|
|
|
if (!is.null(partitionCompare)) {
|
|
|
|
|
|
nsamplingunits <- size(c$rows, 1)
|
|
|
|
|
|
partitions <- partitionCompare$partitions
|
|
|
|
|
|
npartitions <- size(partitions, 2)
|
|
|
|
|
|
partitionLogml <- zeros(1, npartitions)
|
|
|
|
|
|
for (i in seq_len(npartitions)) {
|
|
|
|
|
|
# number of unique partition lables
|
|
|
|
|
|
npops <- length(unique(partitions[, i]))
|
|
|
|
|
|
|
|
|
|
|
|
partitionInd <- zeros(ninds * rowsFromInd, 1)
|
|
|
|
|
|
partitionSample <- partitions[, i]
|
|
|
|
|
|
for (j in seq_len(nsamplingunits)) {
|
|
|
|
|
|
partitionInd[c$rows[j, 1]:c$rows[j, 2]] <- partitionSample[j]
|
|
|
|
|
|
}
|
|
|
|
|
|
# partitionLogml[i] = initialCounts(
|
|
|
|
|
|
# partitionInd,
|
|
|
|
|
|
# data[, seq_len(end - 1)],
|
|
|
|
|
|
# npops,
|
|
|
|
|
|
# c$rows,
|
|
|
|
|
|
# noalle,
|
|
|
|
|
|
# adjprior
|
|
|
|
|
|
# ) #TODO translate
|
|
|
|
|
|
}
|
|
|
|
|
|
# return the logml result
|
|
|
|
|
|
partitionCompare$logmls <- partitionLogml
|
|
|
|
|
|
# set(h1, 'userdata', partitionCompare) # ASK remove?
|
|
|
|
|
|
return()
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
# ASK remove (graphical part)?
|
|
|
|
|
|
# if (fixedK) {
|
2020-06-24 12:37:38 +02:00
|
|
|
|
# #logml_npops_partitionSummary <- indMix_fixK(c) # ASK translate?
|
2020-05-20 15:34:40 +02:00
|
|
|
|
# } else {
|
2020-06-24 12:37:38 +02:00
|
|
|
|
# #logml_npops_partitionSummary <- indMix(c) # ASK translate?
|
2020-05-20 15:34:40 +02:00
|
|
|
|
# }
|
|
|
|
|
|
# if (logml_npops_partitionSummary$logml == 1) return()
|
|
|
|
|
|
|
|
|
|
|
|
data <- data[, seq_len(ncol(data) - 1)]
|
|
|
|
|
|
|
|
|
|
|
|
# ASK: remove?
|
2020-07-14 14:52:31 +02:00
|
|
|
|
# h0 = findobj('Tag','filename1_text')
|
|
|
|
|
|
# inp = get(h0,'String');
|
|
|
|
|
|
# h0 = findobj('Tag','filename2_text')
|
2020-05-20 15:34:40 +02:00
|
|
|
|
# outp = get(h0,'String');
|
|
|
|
|
|
|
2020-07-14 14:52:31 +02:00
|
|
|
|
changesInLogml <- writeMixtureInfo(
|
|
|
|
|
|
logml, rowsFromInd, data, adjprior, priorTerm, outp, inp,
|
|
|
|
|
|
popnames, fixedK
|
|
|
|
|
|
) # FIXMEL depends on get function above
|
2020-05-20 15:34:40 +02:00
|
|
|
|
|
2020-06-24 12:37:38 +02:00
|
|
|
|
# viewMixPartition(PARTITION, popnames) # ASK translate? On graph folder
|
2020-05-20 15:34:40 +02:00
|
|
|
|
|
|
|
|
|
|
talle <- questdlg(
|
|
|
|
|
|
quest = paste(
|
|
|
|
|
|
'Do you want to save the mixture populations',
|
|
|
|
|
|
'so that you can use them later in admixture analysis?'
|
|
|
|
|
|
),
|
|
|
|
|
|
dlgtitle = 'Save results?',
|
|
|
|
|
|
defbtn = 'y'
|
|
|
|
|
|
)
|
|
|
|
|
|
if (talle %in% c('Yes', 'y')) {
|
|
|
|
|
|
filename_pathname <- uiputfile('.mat','Save results as')
|
|
|
|
|
|
|
2020-06-24 12:37:38 +02:00
|
|
|
|
# ======================================================================
|
2020-05-20 15:34:40 +02:00
|
|
|
|
cond <- (sum(filename_pathname$name) == 0) |
|
|
|
|
|
|
(sum(filename_pathname$path) == 0)
|
|
|
|
|
|
if (cond) {
|
|
|
|
|
|
# Cancel was pressed
|
|
|
|
|
|
return()
|
|
|
|
|
|
} else {
|
|
|
|
|
|
# copy 'baps4_output.baps' into the text file with the same name.
|
|
|
|
|
|
if (file.exists('baps4_output.baps')) {
|
|
|
|
|
|
file.copy(
|
|
|
|
|
|
from = 'baps4_output.baps',
|
|
|
|
|
|
to = paste0(
|
|
|
|
|
|
filename_pathname$path, filename_pathname$name, '.txt'
|
|
|
|
|
|
)
|
|
|
|
|
|
)
|
|
|
|
|
|
file.remove('baps4_output.baps')
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
2020-06-24 12:37:38 +02:00
|
|
|
|
# ======================================================================
|
2020-05-20 15:34:40 +02:00
|
|
|
|
|
|
|
|
|
|
c$PARTITION <- PARTITION
|
|
|
|
|
|
c$COUNTS <- COUNTS
|
|
|
|
|
|
c$SUMCOUNTS <- SUMCOUNTS
|
|
|
|
|
|
c$alleleCodes <- alleleCodes
|
|
|
|
|
|
c$adjprior <- adjprior
|
|
|
|
|
|
c$popnames <- popnames
|
|
|
|
|
|
c$rowsFromInd <- rowsFromInd
|
|
|
|
|
|
c$data <- data
|
|
|
|
|
|
c$npops <- npops
|
|
|
|
|
|
c$noalle <- noalle
|
|
|
|
|
|
c$mixtureType <- 'mix'
|
|
|
|
|
|
c$logml <- logml
|
|
|
|
|
|
c$changesInLogml <- changesInLogml
|
|
|
|
|
|
|
|
|
|
|
|
save(c, file = paste0(filename_pathname$path, filename_pathname$name))
|
|
|
|
|
|
} else {
|
|
|
|
|
|
if (file.exists('baps4_output.baps')) file.remove('baps4_output.baps')
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
2020-05-20 07:34:36 +02:00
|
|
|
|
|
|
|
|
|
|
# %-------------------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
# function [partitionSummary, added] = addToSummary(logml, partitionSummary, worstIndex)
|
|
|
|
|
|
# % Tiedet<65><74>n, ett?annettu logml on isompi kuin huonoin arvo
|
|
|
|
|
|
# % partitionSummary taulukossa. Jos partitionSummary:ss?ei viel?ole
|
|
|
|
|
|
# % annettua logml arvoa, niin lis<69>t<EFBFBD><74>n worstIndex:in kohtaan uusi logml ja
|
|
|
|
|
|
# % nykyist?partitiota vastaava nclusters:in arvo. Muutoin ei tehd?mit<69><74>n.
|
|
|
|
|
|
|
|
|
|
|
|
# apu = find(abs(partitionSummary(:,2)-logml)<1e-5);
|
|
|
|
|
|
# if isempty(apu)
|
|
|
|
|
|
# % Nyt l<>ydetty partitio ei ole viel?kirjattuna summaryyn.
|
|
|
|
|
|
# global PARTITION;
|
|
|
|
|
|
# npops = length(unique(PARTITION));
|
|
|
|
|
|
# partitionSummary(worstIndex,1) = npops;
|
|
|
|
|
|
# partitionSummary(worstIndex,2) = logml;
|
|
|
|
|
|
# added = 1;
|
|
|
|
|
|
# else
|
|
|
|
|
|
# added = 0;
|
|
|
|
|
|
# end
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# %--------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# function [suurin, i2] = arvoSeuraavaTila(muutokset, logml)
|
|
|
|
|
|
# % Suorittaa yksil<69>n seuraavan tilan arvonnan
|
|
|
|
|
|
|
|
|
|
|
|
# y = logml + muutokset; % siirron j<>lkeiset logml:t
|
|
|
|
|
|
# y = y - max(y);
|
|
|
|
|
|
# y = exp(y);
|
|
|
|
|
|
# summa = sum(y);
|
|
|
|
|
|
# y = y/summa;
|
|
|
|
|
|
# y = cumsum(y);
|
|
|
|
|
|
|
|
|
|
|
|
# i2 = rand_disc(y); % uusi kori
|
|
|
|
|
|
# suurin = muutokset(i2);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# %--------------------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# function svar=rand_disc(CDF)
|
|
|
|
|
|
# %returns an index of a value from a discrete distribution using inversion method
|
|
|
|
|
|
# slump=rand;
|
|
|
|
|
|
# har=find(CDF>slump);
|
|
|
|
|
|
# svar=har(1);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# %-------------------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# function updateGlobalVariables(ind, i2, rowsFromInd, diffInCounts, ...
|
|
|
|
|
|
# adjprior, priorTerm)
|
|
|
|
|
|
# % Suorittaa globaalien muuttujien muutokset, kun yksil?ind
|
|
|
|
|
|
# % on siirret<65><74>n koriin i2.
|
|
|
|
|
|
|
|
|
|
|
|
# global PARTITION;
|
|
|
|
|
|
# global COUNTS;
|
|
|
|
|
|
# global SUMCOUNTS;
|
|
|
|
|
|
# global POP_LOGML;
|
|
|
|
|
|
|
|
|
|
|
|
# i1 = PARTITION(ind);
|
|
|
|
|
|
# PARTITION(ind)=i2;
|
|
|
|
|
|
|
|
|
|
|
|
# COUNTS(:,:,i1) = COUNTS(:,:,i1) - diffInCounts;
|
|
|
|
|
|
# COUNTS(:,:,i2) = COUNTS(:,:,i2) + diffInCounts;
|
|
|
|
|
|
# SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:) - sum(diffInCounts);
|
|
|
|
|
|
# SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:) + sum(diffInCounts);
|
|
|
|
|
|
|
|
|
|
|
|
# POP_LOGML([i1 i2]) = computePopulationLogml([i1 i2], adjprior, priorTerm);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# %---------------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# function updateGlobalVariables2( ...
|
|
|
|
|
|
# i1, i2, rowsFromInd, diffInCounts, adjprior, priorTerm);
|
|
|
|
|
|
# % Suorittaa globaalien muuttujien muutokset, kun kaikki
|
|
|
|
|
|
# % korissa i1 olevat yksil<69>t siirret<65><74>n koriin i2.
|
|
|
|
|
|
|
|
|
|
|
|
# global PARTITION;
|
|
|
|
|
|
# global COUNTS;
|
|
|
|
|
|
# global SUMCOUNTS;
|
|
|
|
|
|
# global POP_LOGML;
|
|
|
|
|
|
|
|
|
|
|
|
# inds = find(PARTITION==i1);
|
|
|
|
|
|
# PARTITION(inds) = i2;
|
|
|
|
|
|
|
|
|
|
|
|
# COUNTS(:,:,i1) = COUNTS(:,:,i1) - diffInCounts;
|
|
|
|
|
|
# COUNTS(:,:,i2) = COUNTS(:,:,i2) + diffInCounts;
|
|
|
|
|
|
# SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:) - sum(diffInCounts);
|
|
|
|
|
|
# SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:) + sum(diffInCounts);
|
|
|
|
|
|
|
|
|
|
|
|
# POP_LOGML(i1) = 0;
|
|
|
|
|
|
# POP_LOGML(i2) = computePopulationLogml(i2, adjprior, priorTerm);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# %------------------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# function updateGlobalVariables3(muuttuvat, rowsFromInd, diffInCounts, ...
|
|
|
|
|
|
# adjprior, priorTerm, i2);
|
|
|
|
|
|
# % Suorittaa globaalien muuttujien p<>ivitykset, kun yksil<69>t 'muuttuvat'
|
|
|
|
|
|
# % siirret<65><74>n koriin i2. Ennen siirtoa yksil<69>iden on kuuluttava samaan
|
|
|
|
|
|
# % koriin.
|
|
|
|
|
|
|
|
|
|
|
|
# global PARTITION;
|
|
|
|
|
|
# global COUNTS;
|
|
|
|
|
|
# global SUMCOUNTS;
|
|
|
|
|
|
# global POP_LOGML;
|
|
|
|
|
|
|
|
|
|
|
|
# i1 = PARTITION(muuttuvat(1));
|
|
|
|
|
|
# PARTITION(muuttuvat) = i2;
|
|
|
|
|
|
|
|
|
|
|
|
# COUNTS(:,:,i1) = COUNTS(:,:,i1) - diffInCounts;
|
|
|
|
|
|
# COUNTS(:,:,i2) = COUNTS(:,:,i2) + diffInCounts;
|
|
|
|
|
|
# SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:) - sum(diffInCounts);
|
|
|
|
|
|
# SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:) + sum(diffInCounts);
|
|
|
|
|
|
|
|
|
|
|
|
# POP_LOGML([i1 i2]) = computePopulationLogml([i1 i2], adjprior, priorTerm);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# %----------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# function inds = returnInOrder(inds, pop, rowsFromInd, data, adjprior, priorTerm)
|
|
|
|
|
|
# % Palauttaa yksil<69>t j<>rjestyksess?siten, ett?ensimm<6D>isen?on
|
|
|
|
|
|
# % se, jonka poistaminen populaatiosta pop nostaisi logml:n
|
|
|
|
|
|
# % arvoa eniten.
|
|
|
|
|
|
|
|
|
|
|
|
# global COUNTS; global SUMCOUNTS;
|
|
|
|
|
|
# ninds = length(inds);
|
|
|
|
|
|
# apuTaulu = [inds, zeros(ninds,1)];
|
|
|
|
|
|
|
|
|
|
|
|
# for i=1:ninds
|
|
|
|
|
|
# ind = inds(i);
|
|
|
|
|
|
# rows = (ind-1)*rowsFromInd+1 : ind*rowsFromInd;
|
|
|
|
|
|
# diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data);
|
|
|
|
|
|
# diffInSumCounts = sum(diffInCounts);
|
|
|
|
|
|
|
|
|
|
|
|
# COUNTS(:,:,pop) = COUNTS(:,:,pop)-diffInCounts;
|
|
|
|
|
|
# SUMCOUNTS(pop,:) = SUMCOUNTS(pop,:)-diffInSumCounts;
|
|
|
|
|
|
# apuTaulu(i, 2) = computePopulationLogml(pop, adjprior, priorTerm);
|
|
|
|
|
|
# COUNTS(:,:,pop) = COUNTS(:,:,pop)+diffInCounts;
|
|
|
|
|
|
# SUMCOUNTS(pop,:) = SUMCOUNTS(pop,:)+diffInSumCounts;
|
|
|
|
|
|
# end
|
|
|
|
|
|
# apuTaulu = sortrows(apuTaulu,2);
|
|
|
|
|
|
# inds = apuTaulu(ninds:-1:1,1);
|
|
|
|
|
|
|
|
|
|
|
|
# %------------------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# function [muutokset, diffInCounts] = ...
|
|
|
|
|
|
# laskeMuutokset(ind, rowsFromInd, data, adjprior, priorTerm)
|
|
|
|
|
|
# % Palauttaa npops*1 taulun, jossa i:s alkio kertoo, mik?olisi
|
|
|
|
|
|
# % muutos logml:ss? mik<69>li yksil?ind siirret<65><74>n koriin i.
|
|
|
|
|
|
# % diffInCounts on poistettava COUNTS:in siivusta i1 ja lis<69>tt<74>v?
|
|
|
|
|
|
# % COUNTS:in siivuun i2, mik<69>li muutos toteutetaan.
|
|
|
|
|
|
|
|
|
|
|
|
# global COUNTS; global SUMCOUNTS;
|
|
|
|
|
|
# global PARTITION; global POP_LOGML;
|
|
|
|
|
|
# npops = size(COUNTS,3);
|
|
|
|
|
|
# muutokset = zeros(npops,1);
|
|
|
|
|
|
|
|
|
|
|
|
# i1 = PARTITION(ind);
|
|
|
|
|
|
# i1_logml = POP_LOGML(i1);
|
|
|
|
|
|
|
|
|
|
|
|
# rows = (ind-1)*rowsFromInd+1 : ind*rowsFromInd;
|
|
|
|
|
|
# diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data);
|
|
|
|
|
|
# diffInSumCounts = sum(diffInCounts);
|
|
|
|
|
|
|
|
|
|
|
|
# COUNTS(:,:,i1) = COUNTS(:,:,i1)-diffInCounts;
|
|
|
|
|
|
# SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)-diffInSumCounts;
|
|
|
|
|
|
# new_i1_logml = computePopulationLogml(i1, adjprior, priorTerm);
|
|
|
|
|
|
# COUNTS(:,:,i1) = COUNTS(:,:,i1)+diffInCounts;
|
|
|
|
|
|
# SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)+diffInSumCounts;
|
|
|
|
|
|
|
|
|
|
|
|
# i2 = [1:i1-1 , i1+1:npops];
|
|
|
|
|
|
# i2_logml = POP_LOGML(i2);
|
|
|
|
|
|
|
|
|
|
|
|
# COUNTS(:,:,i2) = COUNTS(:,:,i2)+repmat(diffInCounts, [1 1 npops-1]);
|
|
|
|
|
|
# SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)+repmat(diffInSumCounts,[npops-1 1]);
|
|
|
|
|
|
# new_i2_logml = computePopulationLogml(i2, adjprior, priorTerm);
|
|
|
|
|
|
# COUNTS(:,:,i2) = COUNTS(:,:,i2)-repmat(diffInCounts, [1 1 npops-1]);
|
|
|
|
|
|
# SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)-repmat(diffInSumCounts,[npops-1 1]);
|
|
|
|
|
|
|
|
|
|
|
|
# muutokset(i2) = new_i1_logml - i1_logml ...
|
|
|
|
|
|
# + new_i2_logml - i2_logml;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# %------------------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# function [muutokset, diffInCounts] = laskeMuutokset2( ...
|
|
|
|
|
|
# i1, rowsFromInd, data, adjprior, priorTerm);
|
|
|
|
|
|
# % Palauttaa npops*1 taulun, jossa i:s alkio kertoo, mik?olisi
|
|
|
|
|
|
# % muutos logml:ss? mik<69>li korin i1 kaikki yksil<69>t siirret<65><74>n
|
|
|
|
|
|
# % koriin i.
|
|
|
|
|
|
|
|
|
|
|
|
# global COUNTS; global SUMCOUNTS;
|
|
|
|
|
|
# global PARTITION; global POP_LOGML;
|
|
|
|
|
|
# npops = size(COUNTS,3);
|
|
|
|
|
|
# muutokset = zeros(npops,1);
|
|
|
|
|
|
|
|
|
|
|
|
# i1_logml = POP_LOGML(i1);
|
|
|
|
|
|
|
|
|
|
|
|
# inds = find(PARTITION==i1);
|
|
|
|
|
|
# ninds = length(inds);
|
|
|
|
|
|
|
|
|
|
|
|
# if ninds==0
|
|
|
|
|
|
# diffInCounts = zeros(size(COUNTS,1), size(COUNTS,2));
|
|
|
|
|
|
# return;
|
|
|
|
|
|
# end
|
|
|
|
|
|
|
|
|
|
|
|
# rows = computeRows(rowsFromInd, inds, ninds);
|
|
|
|
|
|
|
|
|
|
|
|
# diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data);
|
|
|
|
|
|
# diffInSumCounts = sum(diffInCounts);
|
|
|
|
|
|
|
|
|
|
|
|
# COUNTS(:,:,i1) = COUNTS(:,:,i1)-diffInCounts;
|
|
|
|
|
|
# SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)-diffInSumCounts;
|
|
|
|
|
|
# new_i1_logml = computePopulationLogml(i1, adjprior, priorTerm);
|
|
|
|
|
|
# COUNTS(:,:,i1) = COUNTS(:,:,i1)+diffInCounts;
|
|
|
|
|
|
# SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)+diffInSumCounts;
|
|
|
|
|
|
|
|
|
|
|
|
# i2 = [1:i1-1 , i1+1:npops];
|
|
|
|
|
|
# i2_logml = POP_LOGML(i2);
|
|
|
|
|
|
|
|
|
|
|
|
# COUNTS(:,:,i2) = COUNTS(:,:,i2)+repmat(diffInCounts, [1 1 npops-1]);
|
|
|
|
|
|
# SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)+repmat(diffInSumCounts,[npops-1 1]);
|
|
|
|
|
|
# new_i2_logml = computePopulationLogml(i2, adjprior, priorTerm);
|
|
|
|
|
|
# COUNTS(:,:,i2) = COUNTS(:,:,i2)-repmat(diffInCounts, [1 1 npops-1]);
|
|
|
|
|
|
# SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)-repmat(diffInSumCounts,[npops-1 1]);
|
|
|
|
|
|
|
|
|
|
|
|
# muutokset(i2) = new_i1_logml - i1_logml ...
|
|
|
|
|
|
# + new_i2_logml - i2_logml;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# %------------------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# function muutokset = laskeMuutokset3(T2, inds2, rowsFromInd, ...
|
|
|
|
|
|
# data, adjprior, priorTerm, i1)
|
|
|
|
|
|
# % Palauttaa length(unique(T2))*npops taulun, jossa (i,j):s alkio
|
|
|
|
|
|
# % kertoo, mik?olisi muutos logml:ss? jos populaation i1 osapopulaatio
|
|
|
|
|
|
# % inds2(find(T2==i)) siirret<65><74>n koriin j.
|
|
|
|
|
|
|
|
|
|
|
|
# global COUNTS; global SUMCOUNTS;
|
|
|
|
|
|
# global PARTITION; global POP_LOGML;
|
|
|
|
|
|
# npops = size(COUNTS,3);
|
|
|
|
|
|
# npops2 = length(unique(T2));
|
|
|
|
|
|
# muutokset = zeros(npops2, npops);
|
|
|
|
|
|
|
|
|
|
|
|
# i1_logml = POP_LOGML(i1);
|
|
|
|
|
|
|
|
|
|
|
|
# for pop2 = 1:npops2
|
|
|
|
|
|
# inds = inds2(find(T2==pop2));
|
|
|
|
|
|
# ninds = length(inds);
|
|
|
|
|
|
# if ninds>0
|
|
|
|
|
|
# rows = computeRows(rowsFromInd, inds, ninds);
|
|
|
|
|
|
# diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data);
|
|
|
|
|
|
# diffInSumCounts = sum(diffInCounts);
|
|
|
|
|
|
|
|
|
|
|
|
# COUNTS(:,:,i1) = COUNTS(:,:,i1)-diffInCounts;
|
|
|
|
|
|
# SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)-diffInSumCounts;
|
|
|
|
|
|
# new_i1_logml = computePopulationLogml(i1, adjprior, priorTerm);
|
|
|
|
|
|
# COUNTS(:,:,i1) = COUNTS(:,:,i1)+diffInCounts;
|
|
|
|
|
|
# SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)+diffInSumCounts;
|
|
|
|
|
|
|
|
|
|
|
|
# i2 = [1:i1-1 , i1+1:npops];
|
|
|
|
|
|
# i2_logml = POP_LOGML(i2)';
|
|
|
|
|
|
|
|
|
|
|
|
# COUNTS(:,:,i2) = COUNTS(:,:,i2)+repmat(diffInCounts, [1 1 npops-1]);
|
|
|
|
|
|
# SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)+repmat(diffInSumCounts,[npops-1 1]);
|
|
|
|
|
|
# new_i2_logml = computePopulationLogml(i2, adjprior, priorTerm)';
|
|
|
|
|
|
# COUNTS(:,:,i2) = COUNTS(:,:,i2)-repmat(diffInCounts, [1 1 npops-1]);
|
|
|
|
|
|
# SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)-repmat(diffInSumCounts,[npops-1 1]);
|
|
|
|
|
|
|
|
|
|
|
|
# muutokset(pop2,i2) = new_i1_logml - i1_logml ...
|
|
|
|
|
|
# + new_i2_logml - i2_logml;
|
|
|
|
|
|
# end
|
|
|
|
|
|
# end
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# %------------------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
# function muutokset = laskeMuutokset5(inds, rowsFromInd, data, adjprior, ...
|
|
|
|
|
|
# priorTerm, i1, i2)
|
|
|
|
|
|
|
|
|
|
|
|
# % Palauttaa length(inds)*1 taulun, jossa i:s alkio kertoo, mik?olisi
|
|
|
|
|
|
# % muutos logml:ss? mik<69>li yksil?i vaihtaisi koria i1:n ja i2:n v<>lill?
|
|
|
|
|
|
|
|
|
|
|
|
# global COUNTS; global SUMCOUNTS;
|
|
|
|
|
|
# global PARTITION; global POP_LOGML;
|
|
|
|
|
|
|
|
|
|
|
|
# ninds = length(inds);
|
|
|
|
|
|
# muutokset = zeros(ninds,1);
|
|
|
|
|
|
|
|
|
|
|
|
# i1_logml = POP_LOGML(i1);
|
|
|
|
|
|
# i2_logml = POP_LOGML(i2);
|
|
|
|
|
|
|
|
|
|
|
|
# for i = 1:ninds
|
|
|
|
|
|
# ind = inds(i);
|
|
|
|
|
|
# if PARTITION(ind)==i1
|
|
|
|
|
|
# pop1 = i1; %mist?
|
|
|
|
|
|
# pop2 = i2; %mihin
|
|
|
|
|
|
# else
|
|
|
|
|
|
# pop1 = i2;
|
|
|
|
|
|
# pop2 = i1;
|
|
|
|
|
|
# end
|
|
|
|
|
|
# rows = (ind-1)*rowsFromInd+1 : ind*rowsFromInd;
|
|
|
|
|
|
# diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data);
|
|
|
|
|
|
# diffInSumCounts = sum(diffInCounts);
|
|
|
|
|
|
|
|
|
|
|
|
# COUNTS(:,:,pop1) = COUNTS(:,:,pop1)-diffInCounts;
|
|
|
|
|
|
# SUMCOUNTS(pop1,:) = SUMCOUNTS(pop1,:)-diffInSumCounts;
|
|
|
|
|
|
# COUNTS(:,:,pop2) = COUNTS(:,:,pop2)+diffInCounts;
|
|
|
|
|
|
# SUMCOUNTS(pop2,:) = SUMCOUNTS(pop2,:)+diffInSumCounts;
|
|
|
|
|
|
|
|
|
|
|
|
# PARTITION(ind) = pop2;
|
|
|
|
|
|
|
|
|
|
|
|
# new_logmls = computePopulationLogml([i1 i2], adjprior, priorTerm);
|
|
|
|
|
|
|
|
|
|
|
|
# muutokset(i) = sum(new_logmls);
|
|
|
|
|
|
|
|
|
|
|
|
# COUNTS(:,:,pop1) = COUNTS(:,:,pop1)+diffInCounts;
|
|
|
|
|
|
# SUMCOUNTS(pop1,:) = SUMCOUNTS(pop1,:)+diffInSumCounts;
|
|
|
|
|
|
# COUNTS(:,:,pop2) = COUNTS(:,:,pop2)-diffInCounts;
|
|
|
|
|
|
# SUMCOUNTS(pop2,:) = SUMCOUNTS(pop2,:)-diffInSumCounts;
|
|
|
|
|
|
|
|
|
|
|
|
# PARTITION(ind) = pop1;
|
|
|
|
|
|
# end
|
|
|
|
|
|
|
|
|
|
|
|
# muutokset = muutokset - i1_logml - i2_logml;
|
|
|
|
|
|
|
|
|
|
|
|
# %--------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# function diffInCounts = computeDiffInCounts(rows, max_noalle, nloci, data)
|
|
|
|
|
|
# % Muodostaa max_noalle*nloci taulukon, jossa on niiden alleelien
|
|
|
|
|
|
# % lukum<75><6D>r<EFBFBD>t (vastaavasti kuin COUNTS:issa), jotka ovat data:n
|
|
|
|
|
|
# % riveill?rows.
|
|
|
|
|
|
|
|
|
|
|
|
# diffInCounts = zeros(max_noalle, nloci);
|
|
|
|
|
|
# for i=rows
|
|
|
|
|
|
# row = data(i,:);
|
|
|
|
|
|
# notEmpty = find(row>=0);
|
|
|
|
|
|
|
|
|
|
|
|
# if length(notEmpty)>0
|
|
|
|
|
|
# diffInCounts(row(notEmpty) + (notEmpty-1)*max_noalle) = ...
|
|
|
|
|
|
# diffInCounts(row(notEmpty) + (notEmpty-1)*max_noalle) + 1;
|
|
|
|
|
|
# end
|
|
|
|
|
|
# end
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# %------------------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# function popLogml = computePopulationLogml(pops, adjprior, priorTerm)
|
|
|
|
|
|
# % Palauttaa length(pops)*1 taulukon, jossa on laskettu korikohtaiset
|
|
|
|
|
|
# % logml:t koreille, jotka on m<><6D>ritelty pops-muuttujalla.
|
|
|
|
|
|
|
|
|
|
|
|
# global COUNTS;
|
|
|
|
|
|
# global SUMCOUNTS;
|
|
|
|
|
|
# x = size(COUNTS,1);
|
|
|
|
|
|
# y = size(COUNTS,2);
|
|
|
|
|
|
# z = length(pops);
|
|
|
|
|
|
|
|
|
|
|
|
# popLogml = ...
|
|
|
|
|
|
# squeeze(sum(sum(reshape(...
|
|
|
|
|
|
# gammaln(repmat(adjprior,[1 1 length(pops)]) + COUNTS(:,:,pops)) ...
|
|
|
|
|
|
# ,[x y z]),1),2)) - sum(gammaln(1+SUMCOUNTS(pops,:)),2) - priorTerm;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# %-----------------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# function npops = poistaTyhjatPopulaatiot(npops)
|
|
|
|
|
|
# % Poistaa tyhjentyneet populaatiot COUNTS:ista ja
|
|
|
|
|
|
# % SUMCOUNTS:ista. P<>ivitt<74><74> npops:in ja PARTITION:in.
|
|
|
|
|
|
|
|
|
|
|
|
# global COUNTS;
|
|
|
|
|
|
# global SUMCOUNTS;
|
|
|
|
|
|
# global PARTITION;
|
|
|
|
|
|
|
|
|
|
|
|
# notEmpty = find(any(SUMCOUNTS,2));
|
|
|
|
|
|
# COUNTS = COUNTS(:,:,notEmpty);
|
|
|
|
|
|
# SUMCOUNTS = SUMCOUNTS(notEmpty,:);
|
|
|
|
|
|
|
|
|
|
|
|
# for n=1:length(notEmpty)
|
|
|
|
|
|
# apu = find(PARTITION==notEmpty(n));
|
|
|
|
|
|
# PARTITION(apu)=n;
|
|
|
|
|
|
# end
|
|
|
|
|
|
# npops = length(notEmpty);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# %----------------------------------------------------------------------------------
|
|
|
|
|
|
# %Seuraavat kolme funktiota liittyvat alkupartition muodostamiseen.
|
|
|
|
|
|
|
|
|
|
|
|
# function initial_partition=admixture_initialization(data_matrix,nclusters,Z)
|
|
|
|
|
|
# size_data=size(data_matrix);
|
|
|
|
|
|
# nloci=size_data(2)-1;
|
|
|
|
|
|
# n=max(data_matrix(:,end));
|
|
|
|
|
|
# T=cluster_own(Z,nclusters);
|
|
|
|
|
|
# initial_partition=zeros(size_data(1),1);
|
|
|
|
|
|
# for i=1:n
|
|
|
|
|
|
# kori=T(i);
|
|
|
|
|
|
# here=find(data_matrix(:,end)==i);
|
|
|
|
|
|
# for j=1:length(here)
|
|
|
|
|
|
# initial_partition(here(j),1)=kori;
|
|
|
|
|
|
# end
|
|
|
|
|
|
# end
|
|
|
|
|
|
|
|
|
|
|
|
# function T = cluster_own(Z,nclust)
|
|
|
|
|
|
# true=logical(1);
|
|
|
|
|
|
# false=logical(0);
|
|
|
|
|
|
# maxclust = nclust;
|
|
|
|
|
|
# % Start of algorithm
|
|
|
|
|
|
# m = size(Z,1)+1;
|
|
|
|
|
|
# T = zeros(m,1);
|
|
|
|
|
|
# % maximum number of clusters based on inconsistency
|
|
|
|
|
|
# if m <= maxclust
|
|
|
|
|
|
# T = (1:m)';
|
|
|
|
|
|
# elseif maxclust==1
|
|
|
|
|
|
# T = ones(m,1);
|
|
|
|
|
|
# else
|
|
|
|
|
|
# clsnum = 1;
|
|
|
|
|
|
# for k = (m-maxclust+1):(m-1)
|
|
|
|
|
|
# i = Z(k,1); % left tree
|
|
|
|
|
|
# if i <= m % original node, no leafs
|
|
|
|
|
|
# T(i) = clsnum;
|
|
|
|
|
|
# clsnum = clsnum + 1;
|
|
|
|
|
|
# elseif i < (2*m-maxclust+1) % created before cutoff, search down the tree
|
|
|
|
|
|
# T = clusternum(Z, T, i-m, clsnum);
|
|
|
|
|
|
# clsnum = clsnum + 1;
|
|
|
|
|
|
# end
|
|
|
|
|
|
# i = Z(k,2); % right tree
|
|
|
|
|
|
# if i <= m % original node, no leafs
|
|
|
|
|
|
# T(i) = clsnum;
|
|
|
|
|
|
# clsnum = clsnum + 1;
|
|
|
|
|
|
# elseif i < (2*m-maxclust+1) % created before cutoff, search down the tree
|
|
|
|
|
|
# T = clusternum(Z, T, i-m, clsnum);
|
|
|
|
|
|
# clsnum = clsnum + 1;
|
|
|
|
|
|
# end
|
|
|
|
|
|
# end
|
|
|
|
|
|
# end
|
|
|
|
|
|
|
|
|
|
|
|
# function T = clusternum(X, T, k, c)
|
|
|
|
|
|
# m = size(X,1)+1;
|
|
|
|
|
|
# while(~isempty(k))
|
|
|
|
|
|
# % Get the children of nodes at this level
|
|
|
|
|
|
# children = X(k,1:2);
|
|
|
|
|
|
# children = children(:);
|
|
|
|
|
|
|
|
|
|
|
|
# % Assign this node number to leaf children
|
|
|
|
|
|
# t = (children<=m);
|
|
|
|
|
|
# T(children(t)) = c;
|
|
|
|
|
|
|
|
|
|
|
|
# % Move to next level
|
|
|
|
|
|
# k = children(~t) - m;
|
|
|
|
|
|
# end
|
|
|
|
|
|
|
|
|
|
|
|
# %----------------------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# function [Z, distances]=getDistances(data_matrix,nclusters)
|
|
|
|
|
|
|
|
|
|
|
|
# %finds initial admixture clustering solution with nclusters clusters, uses simple mean Hamming distance
|
|
|
|
|
|
# %gives partition in 8-bit format
|
|
|
|
|
|
# %allocates all alleles of a single individual into the same basket
|
|
|
|
|
|
# %data_matrix contains #Loci+1 columns, last column indicate whose alleles are placed in each row,
|
|
|
|
|
|
# %i.e. ranges from 1 to #individuals. For diploids there are 2 rows per individual, for haploids only a single row
|
|
|
|
|
|
# %missing values are indicated by zeros in the partition and by negative integers in the data_matrix.
|
|
|
|
|
|
|
|
|
|
|
|
# size_data=size(data_matrix);
|
|
|
|
|
|
# nloci=size_data(2)-1;
|
|
|
|
|
|
# n=max(data_matrix(:,end));
|
|
|
|
|
|
# distances=zeros(nchoosek(n,2),1);
|
|
|
|
|
|
# pointer=1;
|
|
|
|
|
|
# for i=1:n-1
|
|
|
|
|
|
# i_data=data_matrix(find(data_matrix(:,end)==i),1:nloci);
|
|
|
|
|
|
# for j=i+1:n
|
|
|
|
|
|
# d_ij=0;
|
|
|
|
|
|
# j_data=data_matrix(find(data_matrix(:,end)==j),1:nloci);
|
|
|
|
|
|
# vertailuja = 0;
|
|
|
|
|
|
# for k=1:size(i_data,1)
|
|
|
|
|
|
# for l=1:size(j_data,1)
|
|
|
|
|
|
# here_i=find(i_data(k,:)>=0);
|
|
|
|
|
|
# here_j=find(j_data(l,:)>=0);
|
|
|
|
|
|
# here_joint=intersect(here_i,here_j);
|
|
|
|
|
|
# vertailuja = vertailuja + length(here_joint);
|
|
|
|
|
|
# d_ij = d_ij + length(find(i_data(k,here_joint)~=j_data(l,here_joint)));
|
|
|
|
|
|
# end
|
|
|
|
|
|
# end
|
|
|
|
|
|
# d_ij = d_ij / vertailuja;
|
|
|
|
|
|
# distances(pointer)=d_ij;
|
|
|
|
|
|
# pointer=pointer+1;
|
|
|
|
|
|
# end
|
|
|
|
|
|
# end
|
|
|
|
|
|
|
|
|
|
|
|
# Z=linkage(distances');
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# %----------------------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
# function logml=computeLogml(counts, sumcounts, noalle, data, rowsFromInd)
|
|
|
|
|
|
# nloci = size(counts,2);
|
|
|
|
|
|
# npops = size(counts,3);
|
|
|
|
|
|
# adjnoalle = zeros(max(noalle),nloci);
|
|
|
|
|
|
# for j=1:nloci
|
|
|
|
|
|
# adjnoalle(1:noalle(j),j)=noalle(j);
|
|
|
|
|
|
# if (noalle(j)<max(noalle))
|
|
|
|
|
|
# adjnoalle(noalle(j)+1:end,j)=1;
|
|
|
|
|
|
# end
|
|
|
|
|
|
# end
|
|
|
|
|
|
|
|
|
|
|
|
# %logml2 = sum(sum(sum(gammaln(counts+repmat(adjprior,[1 1 npops]))))) ...
|
|
|
|
|
|
# % - npops*sum(sum(gammaln(adjprior))) - ...
|
|
|
|
|
|
# % sum(sum(gammaln(1+sumcounts)));
|
|
|
|
|
|
# %logml = logml2;
|
|
|
|
|
|
|
|
|
|
|
|
# global GAMMA_LN;
|
|
|
|
|
|
# rowsInG = size(data,1)+rowsFromInd;
|
|
|
|
|
|
|
|
|
|
|
|
# logml = sum(sum(sum(GAMMA_LN(counts+1 + repmat(rowsInG*(adjnoalle-1),[1 1 npops]))))) ...
|
|
|
|
|
|
# - npops*sum(sum(GAMMA_LN(1, adjnoalle))) ...
|
|
|
|
|
|
# -sum(sum(GAMMA_LN(sumcounts+1,1)));
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# %--------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# function initializeGammaln(ninds, rowsFromInd, maxAlleles)
|
|
|
|
|
|
# %Alustaa GAMMALN muuttujan s.e. GAMMALN(i,j)=gammaln((i-1) + 1/j)
|
|
|
|
|
|
# global GAMMA_LN;
|
|
|
|
|
|
# GAMMA_LN = zeros((1+ninds)*rowsFromInd, maxAlleles);
|
|
|
|
|
|
# for i=1:(ninds+1)*rowsFromInd
|
|
|
|
|
|
# for j=1:maxAlleles
|
|
|
|
|
|
# GAMMA_LN(i,j)=gammaln((i-1) + 1/j);
|
|
|
|
|
|
# end
|
|
|
|
|
|
# end
|
|
|
|
|
|
|
|
|
|
|
|
# %---------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
# function dist2 = laskeOsaDist(inds2, dist, ninds)
|
|
|
|
|
|
# % Muodostaa dist vektorista osavektorin, joka sis<69>lt<6C><74> yksil<69>iden inds2
|
|
|
|
|
|
# % v<>liset et<65>isyydet. ninds=kaikkien yksil<69>iden lukum<75><6D>r?
|
|
|
|
|
|
|
|
|
|
|
|
# ninds2 = length(inds2);
|
|
|
|
|
|
# apu = zeros(nchoosek(ninds2,2),2);
|
|
|
|
|
|
# rivi = 1;
|
|
|
|
|
|
# for i=1:ninds2-1
|
|
|
|
|
|
# for j=i+1:ninds2
|
|
|
|
|
|
# apu(rivi, 1) = inds2(i);
|
|
|
|
|
|
# apu(rivi, 2) = inds2(j);
|
|
|
|
|
|
# rivi = rivi+1;
|
|
|
|
|
|
# end
|
|
|
|
|
|
# end
|
|
|
|
|
|
# apu = (apu(:,1)-1).*ninds - apu(:,1) ./ 2 .* (apu(:,1)-1) + (apu(:,2)-apu(:,1));
|
|
|
|
|
|
# dist2 = dist(apu);
|
|
|
|
|
|
|
|
|
|
|
|
# %--------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
# function [emptyPop, pops] = findEmptyPop(npops)
|
|
|
|
|
|
# % Palauttaa ensimm<6D>isen tyhj<68>n populaation indeksin. Jos tyhji?
|
|
|
|
|
|
# % populaatioita ei ole, palauttaa -1:n.
|
|
|
|
|
|
|
|
|
|
|
|
# global PARTITION;
|
|
|
|
|
|
# pops = unique(PARTITION)';
|
|
|
|
|
|
# if (length(pops) ==npops)
|
|
|
|
|
|
# emptyPop = -1;
|
|
|
|
|
|
# else
|
|
|
|
|
|
# popDiff = diff([0 pops npops+1]);
|
|
|
|
|
|
# emptyPop = min(find(popDiff > 1));
|
2020-07-14 15:33:00 +02:00
|
|
|
|
# end
|