ourMELONS/R/greedyPopMix.R
2022-07-28 15:47:36 +02:00

111 lines
3.9 KiB
R

#' @title Clustering of pop individuals
#' @param data data file
#' @param format Data format. Format supported: "FASTA", "VCF" ,"BAM", "GenePop"
#' @param verbose if \code{TRUE}, prints extra output information
#' @param partitionCompare a properly-named list. Proper names include
#' "partitions"
#' @importFrom utils read.delim
#' @importFrom vcfR read.vcfR
#' @importFrom Rsamtools scanBam
#' @importFrom adegenet read.genepop .readExt
#' @importFrom matlab2r uiputfile
#' @references Samtools: a suite of programs for interacting
#' with high-throughput sequencing data. <http://www.htslib.org/>
#' @export
greedyPopMix <- function(data, format, partitionCompare = NULL, verbose = TRUE
) {
# Replacing original file reading code with greedyMix()
rawdata <- greedyMix(data, format, verbose)
# Other function calls to produce necessary objects
data_greedyMix_handle <- handlePopData(rawdata)
data <- data_greedyMix_handle$data
rowsFromInd <- data_greedyMix_handle$rowsFromInd
alleleCodes <- data_greedyMix_handle$alleleCodes
noalle <- data_greedyMix_handle$noalle
adjprior <- data_greedyMix_handle$adjprior
priorTerm <- data_greedyMix_handle$priorTerm
rm(data_greedyMix_handle)
Z_dist <- getPopDistancesByKL(adjprior)
Z <- Z_dist$Z
dist <- Z_dist$dist
rm(Z_dist)
a_data <- data[, 1:(ncol(data) - 1)]
sumcounts_counts_logml <- initialPopCounts(a_data, npops, rows, noalle, adjprior)
logml <- sumcounts_counts_logml$logml
rm(sumcounts_counts_logml)
c <- list()
c$data <- data
c$rows <- rows
c$alleleCodes <- alleleCodes
c$noalle <- noalle
c$adjprior <- adjprior
c$priorTerm <- priorTerm
c$dist <- dist
c$Z <- Z
c$rowsFromInd <- rowsFromInd
# partition compare
if (!is.null(partitionCompare)) {
nsamplingunits <- size(rows, 1)
partitions <- partitionCompare$partitions
npartitions <- size(partitions, 2)
partitionLogml <- zeros(1, npartitions)
for (i in 1:npartitions) {
# number of unique partition lables
npops <- length(unique(partitions[, i]))
partitionInd <- zeros(length(rows), 1)
partitionSample <- partitions[, i]
for (j in 1:nsamplingunits) {
partitionInd[c$rows[j, 1]:c$rows[j, 2]] <- partitionSample[j]
}
partitionLogml[i] <- initialCounts(
partitionInd, data[, 1:(ncol(data) - 1)], npops, c$rows, noalle,
adjprior
)
}
# return the logml result
partitionCompare$logmls <- partitionLogml
}
data <- data[, 1:(ncol(data) - 1)]
logml_npops_partitionSummary <- indMix(c)
logml_npops_partitionSummary$logml -> logml
logml_npops_partitionSummary$npops -> npops
logml_npops_partitionSummary$partitionSummary -> partitionSummary
rm(logml_npops_partitionSummary)
changesInLogml <- writeMixtureInfoPop(
logml, rows, data, adjprior, priorTerm,
NULL, NULL, partitionSummary, popnames, fixedK = FALSE
)
talle <- questdlg(
"Do you want to save the mixture populations so that you can use them later in admixture analysis?",
"Save results?", c("Yes", "No"), "Yes"
)
if (tolower(talle) == "yes") {
waitALittle()
filename_pathname <- uiputfile()
if (rowsFromInd == 0) {
# BAPS format was used, rowsFromInd is not known.
popnames_rowsFromInd <- findOutRowsFromInd(popnames, rows)
popnames <- popnames_rowsFromInd$popnames
rows <- popnames_rowsFromInd$rows
rm(popnames_rowsFromInd)
}
groupPartition <- PARTITION
fiksaaPartitioYksiloTasolle(rows, rowsFromInd)
c$PARTITION <- PARTITION
c$COUNTS <- COUNTS
c$SUMCOUNTS <- SUMCOUNTS
c$alleleCodes <- alleleCodes
c$adjprior <- adjprior
c$rowsFromInd <- rowsFromInd
c$popnames <- popnames
c$data <- data
c$npops <- npops
c$noalle <- noalle
c$mixtureType <- "popMix"
c$groupPartition <- groupPartition
c$rows <- rows
c$logml <- logml
c$changesInLogml <- changesInLogml
}
}