2020-07-14 11:16:03 +02:00
|
|
|
|
#' @title Write Mixture Info
|
|
|
|
|
|
#' @description Writes information about the mixture
|
|
|
|
|
|
#' @param logml logml
|
|
|
|
|
|
#' @param rowsFromInd rowsFromInd
|
|
|
|
|
|
#' @param data data
|
|
|
|
|
|
#' @param adjprior adjprior
|
|
|
|
|
|
#' @param priorTerm priorTerm
|
|
|
|
|
|
#' @param outPutFile outPutFile
|
|
|
|
|
|
#' @param inputFile inputFile
|
|
|
|
|
|
#' @param partitionSummary partitionSummary
|
|
|
|
|
|
#' @param popnames popnames
|
|
|
|
|
|
#' @param fixedK fixedK
|
2023-09-11 14:06:45 +02:00
|
|
|
|
#' @param verbose if \code{TRUE}, prints extra output information
|
2022-07-28 15:47:36 +02:00
|
|
|
|
writeMixtureInfo <- function(
|
|
|
|
|
|
logml, rowsFromInd, data, adjprior, priorTerm, outPutFile, inputFile,
|
2023-09-11 14:06:45 +02:00
|
|
|
|
partitionSummary, popnames, fixedK, verbose
|
2022-07-28 15:47:36 +02:00
|
|
|
|
) {
|
2021-11-10 14:02:35 +01:00
|
|
|
|
ninds <- size(data, 1) / rowsFromInd
|
2024-04-10 16:04:07 +02:00
|
|
|
|
npops <- size(globals$COUNTS, 3)
|
2021-11-10 14:02:35 +01:00
|
|
|
|
# Check that the names refer to individuals
|
2022-07-28 15:47:36 +02:00
|
|
|
|
|
|
|
|
|
|
# Tarkistetaan ett?nimet viittaavat yksil<69>ihin
|
|
|
|
|
|
names <- (size(popnames, 1) == ninds)
|
2020-07-14 11:16:03 +02:00
|
|
|
|
|
2021-11-10 14:02:35 +01:00
|
|
|
|
if (length(outPutFile) > 0) {
|
|
|
|
|
|
fid <- load(outPutFile)
|
|
|
|
|
|
} else {
|
|
|
|
|
|
fid <- -1
|
2023-09-11 13:14:08 +02:00
|
|
|
|
outPutFile <- file.path(tempdir(), "baps4_output.baps")
|
|
|
|
|
|
message("Output saved to", outPutFile)
|
|
|
|
|
|
sink(outPutFile, split = TRUE) # save in text anyway.
|
2021-11-10 14:02:35 +01:00
|
|
|
|
}
|
2023-09-11 14:06:45 +02:00
|
|
|
|
if (verbose) {
|
|
|
|
|
|
dispLine()
|
|
|
|
|
|
cat("RESULTS OF INDIVIDUAL LEVEL MIXTURE ANALYSIS:\n")
|
|
|
|
|
|
cat("Data file: ", inputFile, "\n")
|
|
|
|
|
|
cat("Model: independent\n")
|
|
|
|
|
|
cat("Number of clustered individuals: ", ownNum2Str(ninds), "\n")
|
|
|
|
|
|
cat("Number of groups in optimal partition: ", ownNum2Str(npops), "\n")
|
|
|
|
|
|
cat("Log(marginal likelihood) of optimal partition: ", ownNum2Str(logml), "\n")
|
|
|
|
|
|
cat(" ")
|
|
|
|
|
|
}
|
2021-11-10 14:02:35 +01:00
|
|
|
|
if (fid != -1) {
|
|
|
|
|
|
append(fid, "RESULTS OF INDIVIDUAL LEVEL MIXTURE ANALYSIS:\n")
|
|
|
|
|
|
append(fid, c("Data file: ", inputFile, "\n"))
|
|
|
|
|
|
append(
|
|
|
|
|
|
fid,
|
|
|
|
|
|
c("Number of clustered individuals: ", ownNum2Str(ninds), "\n")
|
|
|
|
|
|
)
|
|
|
|
|
|
append(
|
|
|
|
|
|
fid,
|
|
|
|
|
|
c(
|
|
|
|
|
|
"Number of groups in optimal partition: ",
|
|
|
|
|
|
ownNum2Str(npops), "\n"
|
|
|
|
|
|
)
|
|
|
|
|
|
)
|
|
|
|
|
|
append(
|
|
|
|
|
|
fid,
|
|
|
|
|
|
c(
|
|
|
|
|
|
"Log(marginal likelihood) of optimal partition: ",
|
|
|
|
|
|
ownNum2Str(logml),
|
|
|
|
|
|
"\n"
|
|
|
|
|
|
)
|
|
|
|
|
|
)
|
|
|
|
|
|
}
|
2020-07-14 11:16:03 +02:00
|
|
|
|
|
2024-04-10 16:04:07 +02:00
|
|
|
|
cluster_count <- length(unique(globals$PARTITION))
|
2023-09-11 14:06:45 +02:00
|
|
|
|
if (verbose) cat("Best Partition: ")
|
2021-11-10 14:02:35 +01:00
|
|
|
|
if (fid != -1) {
|
|
|
|
|
|
append(fid, c("Best Partition: ", "\n"))
|
|
|
|
|
|
}
|
|
|
|
|
|
for (m in 1:cluster_count) {
|
2024-04-10 16:04:07 +02:00
|
|
|
|
indsInM <- matlab2r::find(globals$PARTITION == m)
|
2021-11-10 14:02:35 +01:00
|
|
|
|
length_of_beginning <- 11 + floor(log10(m))
|
|
|
|
|
|
cluster_size <- length(indsInM)
|
2020-07-14 11:16:03 +02:00
|
|
|
|
|
2021-11-10 14:02:35 +01:00
|
|
|
|
if (names) {
|
2023-09-14 11:59:44 +02:00
|
|
|
|
text <- c("Cluster", m, ": {", popnames[[indsInM[1]]])
|
2021-11-10 14:02:35 +01:00
|
|
|
|
for (k in 2:cluster_size) {
|
2023-09-14 11:59:44 +02:00
|
|
|
|
text <- c(text, ", ", popnames[[indsInM[k]]])
|
2021-11-10 14:02:35 +01:00
|
|
|
|
}
|
|
|
|
|
|
} else {
|
2023-09-14 11:59:44 +02:00
|
|
|
|
text <- c("Cluster", m, ": {", indsInM[1])
|
2021-11-10 14:02:35 +01:00
|
|
|
|
for (k in 2:cluster_size) {
|
2023-09-14 11:59:44 +02:00
|
|
|
|
text <- c(text, ", ", indsInM[k])
|
2021-11-10 14:02:35 +01:00
|
|
|
|
}
|
|
|
|
|
|
}
|
2023-09-11 12:15:30 +02:00
|
|
|
|
text <- c(text, "}\n")
|
2021-11-10 14:02:35 +01:00
|
|
|
|
while (length(text) > 58) {
|
2023-09-14 11:59:44 +02:00
|
|
|
|
# Take one line (new_line) and display it.
|
2021-11-10 14:02:35 +01:00
|
|
|
|
new_line <- takeLine(text, 58)
|
2023-09-14 11:59:44 +02:00
|
|
|
|
text <- text[(length(new_line) + 1):length(text)]
|
|
|
|
|
|
if (verbose) cat(new_line, sep = "")
|
2021-11-10 14:02:35 +01:00
|
|
|
|
if (fid != -1) {
|
|
|
|
|
|
append(fid, new_line)
|
|
|
|
|
|
append(fid, "\n")
|
|
|
|
|
|
}
|
2023-09-14 11:59:44 +02:00
|
|
|
|
if (any(is.na(text))) {
|
2021-11-10 14:02:35 +01:00
|
|
|
|
text <- ""
|
2023-09-14 11:59:44 +02:00
|
|
|
|
} else {
|
|
|
|
|
|
text <- c(blanks(length_of_beginning), text)
|
2021-11-10 14:02:35 +01:00
|
|
|
|
}
|
|
|
|
|
|
}
|
2023-09-11 12:15:30 +02:00
|
|
|
|
if (any(text != "")) {
|
2023-09-14 11:59:44 +02:00
|
|
|
|
if (verbose) cat(text, sep = "")
|
2021-11-10 14:02:35 +01:00
|
|
|
|
if (fid != -1) {
|
|
|
|
|
|
append(fid, text)
|
|
|
|
|
|
append(fid, "\n")
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
2020-07-14 11:16:03 +02:00
|
|
|
|
|
2021-11-10 14:02:35 +01:00
|
|
|
|
if (npops > 1) {
|
2023-09-11 14:06:45 +02:00
|
|
|
|
if (verbose) {
|
|
|
|
|
|
cat("\n")
|
|
|
|
|
|
cat("\n")
|
|
|
|
|
|
cat(
|
|
|
|
|
|
"Changes in log(marginal likelihood)",
|
|
|
|
|
|
" if indvidual i is moved to group j:\n"
|
|
|
|
|
|
)
|
|
|
|
|
|
}
|
2021-11-10 14:02:35 +01:00
|
|
|
|
if (fid != -1) {
|
|
|
|
|
|
append(fid, " ")
|
|
|
|
|
|
append(fid, "\n")
|
|
|
|
|
|
append(fid, " ")
|
|
|
|
|
|
append(fid, "\n")
|
|
|
|
|
|
append(
|
|
|
|
|
|
fid,
|
|
|
|
|
|
c(
|
|
|
|
|
|
"Changes in log(marginal likelihood)",
|
2023-09-11 12:15:30 +02:00
|
|
|
|
"if indvidual i is moved to group j:\n"
|
2021-11-10 14:02:35 +01:00
|
|
|
|
)
|
|
|
|
|
|
)
|
|
|
|
|
|
append(fid, "\n")
|
|
|
|
|
|
}
|
2020-07-14 11:16:03 +02:00
|
|
|
|
|
2021-11-10 14:02:35 +01:00
|
|
|
|
if (names) {
|
|
|
|
|
|
nameSizes <- zeros(ninds, 1)
|
|
|
|
|
|
for (i in 1:ninds) {
|
|
|
|
|
|
nimi <- as.character(popnames[i])
|
|
|
|
|
|
nameSizes[i] <- length(nimi)
|
|
|
|
|
|
}
|
2022-02-03 10:43:34 +01:00
|
|
|
|
maxSize <- base::max(nameSizes)
|
|
|
|
|
|
maxSize <- base::max(maxSize, 5)
|
2021-11-10 14:02:35 +01:00
|
|
|
|
erotus <- maxSize - 5
|
|
|
|
|
|
alku <- blanks(erotus)
|
|
|
|
|
|
ekarivi <- c(alku, " ind", blanks(6 + erotus))
|
|
|
|
|
|
} else {
|
|
|
|
|
|
ekarivi <- " ind "
|
|
|
|
|
|
}
|
|
|
|
|
|
for (i in 1:cluster_count) {
|
|
|
|
|
|
ekarivi <- c(ekarivi, ownNum2Str(i), blanks(8 - floor(log10(i))))
|
|
|
|
|
|
}
|
2023-09-11 14:06:45 +02:00
|
|
|
|
if (verbose) cat(ekarivi)
|
2021-11-10 14:02:35 +01:00
|
|
|
|
if (fid != -1) {
|
|
|
|
|
|
append(fid, ekarivi)
|
|
|
|
|
|
append(fid, "\n")
|
|
|
|
|
|
}
|
2020-07-14 11:16:03 +02:00
|
|
|
|
|
2021-11-10 14:02:35 +01:00
|
|
|
|
# %ninds = size(data,1)/rowsFromInd;
|
2024-04-10 16:04:07 +02:00
|
|
|
|
changesInLogml <- t(globals$LOGDIFF)
|
2021-11-10 14:02:35 +01:00
|
|
|
|
for (ind in 1:ninds) {
|
|
|
|
|
|
muutokset <- changesInLogml[, ind]
|
|
|
|
|
|
if (names) {
|
|
|
|
|
|
nimi <- as.character(popnames[ind])
|
2023-09-11 12:15:30 +02:00
|
|
|
|
rivi <- c(blanks(maxSize - length(nimi)), nimi, ":\n")
|
2021-11-10 14:02:35 +01:00
|
|
|
|
} else {
|
2023-09-11 12:15:30 +02:00
|
|
|
|
rivi <- c("\n", blanks(4 - floor(log10(ind))), ownNum2Str(ind), ":\n")
|
2021-11-10 14:02:35 +01:00
|
|
|
|
}
|
|
|
|
|
|
for (j in 1:npops) {
|
2023-09-14 11:59:44 +02:00
|
|
|
|
rivi <- c(rivi, logml2String(omaRound(muutokset[j]), ""))
|
2021-11-10 14:02:35 +01:00
|
|
|
|
}
|
2023-09-14 11:59:44 +02:00
|
|
|
|
if (verbose) cat(rivi, sep = "")
|
2021-11-10 14:02:35 +01:00
|
|
|
|
if (fid != -1) {
|
|
|
|
|
|
append(fid, rivi)
|
|
|
|
|
|
append(fid, "\n")
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
2023-09-14 11:59:44 +02:00
|
|
|
|
if (verbose) cat("\n\nKL-divergence matrix in PHYLIP format: ")
|
2020-07-14 11:16:03 +02:00
|
|
|
|
|
2021-11-10 14:02:35 +01:00
|
|
|
|
dist_mat <- zeros(npops, npops)
|
|
|
|
|
|
if (fid != -1) {
|
2023-09-14 11:59:44 +02:00
|
|
|
|
append(fid, " KL-divergence matrix in PHYLIP format: ")
|
2021-11-10 14:02:35 +01:00
|
|
|
|
}
|
2020-07-14 11:16:03 +02:00
|
|
|
|
|
2024-04-10 16:04:07 +02:00
|
|
|
|
globals$COUNTS <- globals$COUNTS[seq_len(nrow(adjprior)), seq_len(ncol(adjprior)), , drop = FALSE]
|
|
|
|
|
|
maxnoalle <- size(globals$COUNTS, 1)
|
|
|
|
|
|
nloci <- size(globals$COUNTS, 2)
|
2021-11-10 14:02:35 +01:00
|
|
|
|
d <- zeros(maxnoalle, nloci, npops)
|
|
|
|
|
|
prior <- adjprior
|
2022-02-03 10:43:34 +01:00
|
|
|
|
prior[matlab2r::find(prior == 1)] <- 0
|
2022-07-28 15:47:36 +02:00
|
|
|
|
|
|
|
|
|
|
# Loci in which only one allele was detected.
|
|
|
|
|
|
nollia <- matlab2r::find(all(prior == 0))
|
|
|
|
|
|
|
2021-11-10 14:02:35 +01:00
|
|
|
|
prior[1, nollia] <- 1
|
|
|
|
|
|
for (pop1 in 1:npops) {
|
2024-04-10 16:04:07 +02:00
|
|
|
|
squeezed_COUNTS_prior <- squeeze(globals$COUNTS[, , pop1]) + prior
|
2023-09-11 12:15:30 +02:00
|
|
|
|
d[, , pop1] <- squeezed_COUNTS_prior / sum(squeezed_COUNTS_prior)
|
2021-11-10 14:02:35 +01:00
|
|
|
|
}
|
|
|
|
|
|
ekarivi <- as.character(npops)
|
2023-09-11 14:06:45 +02:00
|
|
|
|
if (verbose) cat(ekarivi)
|
2021-11-10 14:02:35 +01:00
|
|
|
|
if (fid != -1) {
|
|
|
|
|
|
append(fid, ekarivi)
|
|
|
|
|
|
append(fid, "\n")
|
|
|
|
|
|
}
|
2020-07-14 11:16:03 +02:00
|
|
|
|
|
2021-11-10 14:02:35 +01:00
|
|
|
|
for (pop1 in 1:npops) {
|
2023-09-11 12:15:30 +02:00
|
|
|
|
for (pop2 in seq_len(pop1 - 1)) {
|
2021-11-10 14:02:35 +01:00
|
|
|
|
dist1 <- d[, , pop1]
|
|
|
|
|
|
dist2 <- d[, , pop2]
|
|
|
|
|
|
div12 <- sum(
|
2023-09-11 12:15:30 +02:00
|
|
|
|
sum(dist1 * base::log2((dist1 + 10^-10) / (dist2 + 10^-10)))
|
2021-11-10 14:02:35 +01:00
|
|
|
|
) / nloci
|
|
|
|
|
|
div21 <- sum(
|
2023-09-11 12:15:30 +02:00
|
|
|
|
sum(dist2 * base::log2((dist2 + 10^-10) / (dist1 + 10^-10)))
|
2021-11-10 14:02:35 +01:00
|
|
|
|
) / nloci
|
|
|
|
|
|
div <- (div12 + div21) / 2
|
|
|
|
|
|
dist_mat[pop1, pop2] <- div
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
2020-07-14 11:16:03 +02:00
|
|
|
|
|
|
|
|
|
|
|
2021-11-10 14:02:35 +01:00
|
|
|
|
dist_mat <- dist_mat + t(dist_mat) # make it symmetric
|
|
|
|
|
|
for (pop1 in 1:npops) {
|
2023-09-11 12:15:30 +02:00
|
|
|
|
rivi <- c("\nCluster_", as.character(pop1), "\n")
|
2021-11-10 14:02:35 +01:00
|
|
|
|
for (pop2 in 1:npops) {
|
2023-09-14 11:59:44 +02:00
|
|
|
|
rivi <- c(rivi, kldiv2str(dist_mat[pop1, pop2], 4L))
|
2021-11-10 14:02:35 +01:00
|
|
|
|
}
|
2023-09-14 11:59:44 +02:00
|
|
|
|
if (verbose) cat(rivi, sep = "")
|
2021-11-10 14:02:35 +01:00
|
|
|
|
if (fid != -1) {
|
|
|
|
|
|
append(fid, rivi)
|
|
|
|
|
|
append(fid, "\n")
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
2023-09-11 14:06:45 +02:00
|
|
|
|
if (verbose) {
|
|
|
|
|
|
cat(
|
|
|
|
|
|
"\n\nList of sizes of 10 best visited partitions",
|
|
|
|
|
|
"and corresponding log(ml) values\n"
|
|
|
|
|
|
)
|
|
|
|
|
|
}
|
2020-07-14 11:16:03 +02:00
|
|
|
|
|
2021-11-10 14:02:35 +01:00
|
|
|
|
if (fid != -1) {
|
|
|
|
|
|
append(fid, " ")
|
|
|
|
|
|
append(fid, "\n")
|
|
|
|
|
|
append(fid, " ")
|
|
|
|
|
|
append(fid, "\n")
|
|
|
|
|
|
append(
|
|
|
|
|
|
fid,
|
|
|
|
|
|
c(
|
|
|
|
|
|
"List of sizes of 10 best visited partitions",
|
|
|
|
|
|
"and corresponding log(ml) values"
|
|
|
|
|
|
)
|
|
|
|
|
|
)
|
|
|
|
|
|
append(fid, "\n")
|
|
|
|
|
|
}
|
2020-07-14 11:16:03 +02:00
|
|
|
|
|
2021-11-10 14:02:35 +01:00
|
|
|
|
partitionSummary <- sortrows(partitionSummary, 2)
|
2024-07-02 16:39:38 +02:00
|
|
|
|
partitionSummary <- partitionSummary[size(partitionSummary, 1):1, , drop = FALSE]
|
|
|
|
|
|
partitionSummary <- partitionSummary[matlab2r::find(partitionSummary[, 2] > -1e49), , drop = FALSE]
|
2021-11-10 14:02:35 +01:00
|
|
|
|
if (size(partitionSummary, 1) > 10) {
|
|
|
|
|
|
vikaPartitio <- 10
|
|
|
|
|
|
} else {
|
|
|
|
|
|
vikaPartitio <- size(partitionSummary, 1)
|
|
|
|
|
|
}
|
|
|
|
|
|
for (part in 1:vikaPartitio) {
|
|
|
|
|
|
line <- c(
|
|
|
|
|
|
as.character(partitionSummary[part, 1]),
|
|
|
|
|
|
" ",
|
2023-09-11 12:15:30 +02:00
|
|
|
|
as.character(partitionSummary[part, 2])
|
2021-11-10 14:02:35 +01:00
|
|
|
|
)
|
2023-09-11 14:06:45 +02:00
|
|
|
|
if (verbose) cat(line)
|
2021-11-10 14:02:35 +01:00
|
|
|
|
if (fid != -1) {
|
|
|
|
|
|
append(fid, line)
|
|
|
|
|
|
append(fid, "\n")
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
2020-07-14 11:16:03 +02:00
|
|
|
|
|
2021-11-10 14:02:35 +01:00
|
|
|
|
if (!fixedK) {
|
2023-09-11 14:06:45 +02:00
|
|
|
|
if (verbose) cat("\n\nProbabilities for number of clusters\n")
|
2021-11-10 14:02:35 +01:00
|
|
|
|
if (fid != -1) {
|
|
|
|
|
|
append(fid, " ")
|
|
|
|
|
|
append(fid, "\n")
|
|
|
|
|
|
append(fid, " ")
|
|
|
|
|
|
append(fid, "\n")
|
2021-11-10 14:55:11 +01:00
|
|
|
|
append(fid, "Probabilities for number of clusters")
|
2021-11-10 14:02:35 +01:00
|
|
|
|
append(fid, "\n")
|
|
|
|
|
|
}
|
2020-07-14 11:16:03 +02:00
|
|
|
|
|
2021-11-10 14:02:35 +01:00
|
|
|
|
npopsTaulu <- unique(partitionSummary[, 1])
|
|
|
|
|
|
len <- length(npopsTaulu)
|
|
|
|
|
|
probs <- zeros(len, 1)
|
|
|
|
|
|
partitionSummary[, 2] <- partitionSummary[, 2] -
|
2022-02-03 10:43:34 +01:00
|
|
|
|
base::max(partitionSummary[, 2])
|
2021-11-10 14:02:35 +01:00
|
|
|
|
sumtn <- sum(exp(partitionSummary[, 2]))
|
|
|
|
|
|
for (i in 1:len) {
|
|
|
|
|
|
npopstn <- sum(
|
|
|
|
|
|
exp(
|
2022-02-03 10:43:34 +01:00
|
|
|
|
partitionSummary[matlab2r::find(
|
2021-11-10 14:02:35 +01:00
|
|
|
|
partitionSummary[, 1] == npopsTaulu[i]
|
|
|
|
|
|
), 2]
|
|
|
|
|
|
)
|
|
|
|
|
|
)
|
|
|
|
|
|
probs[i] <- npopstn / sumtn
|
|
|
|
|
|
}
|
|
|
|
|
|
for (i in 1:len) {
|
|
|
|
|
|
if (probs[i] > 1e-5) {
|
|
|
|
|
|
line <- c(
|
|
|
|
|
|
as.character(npopsTaulu[i]), " ", as.character(probs[i])
|
|
|
|
|
|
)
|
2023-09-11 14:06:45 +02:00
|
|
|
|
if (verbose) cat(line, "\n")
|
2021-11-10 14:02:35 +01:00
|
|
|
|
if (fid != -1) {
|
|
|
|
|
|
append(fid, line)
|
|
|
|
|
|
append(fid, "\n")
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
2023-09-11 13:14:08 +02:00
|
|
|
|
# Closing sink(s)
|
|
|
|
|
|
while (sink.number() > 0L) {
|
|
|
|
|
|
sink()
|
|
|
|
|
|
}
|
2021-11-10 14:02:35 +01:00
|
|
|
|
return(changesInLogml)
|
|
|
|
|
|
}
|