ourMELONS/R/greedyPopMix.R
2022-07-19 14:34:08 +02:00

110 lines
3.9 KiB
R

#' @title Clustering of pop individuals
#' @param data data file
#' @param format Data format. Format supported: "FASTA", "VCF" ,"BAM", "GenePop"
#' @param verbose if \code{TRUE}, prints extra output information
#' @param partitionCompare a properly-named list. Proper names include
#' "partitions"
#' @importFrom utils read.delim
#' @importFrom vcfR read.vcfR
#' @importFrom Rsamtools scanBam
#' @importFrom adegenet read.genepop .readExt
#' @importFrom matlab2r uiputfile
#' @references Samtools: a suite of programs for interacting
#' with high-throughput sequencing data. <http://www.htslib.org/>
#' @export
greedyPopMix <- function(data, format, partitionCompare = NULL, verbose = TRUE) {
# Replacing original file reading code with greedyMix()
rawdata <- greedyMix(data, format, verbose)
# Other function calls to produce necessary objects
data_greedyMix_handle <- handlePopData(rawdata)
data <- data_greedyMix_handle$data
rowsFromInd <- data_greedyMix_handle$rowsFromInd
alleleCodes <- data_greedyMix_handle$alleleCodes
noalle <- data_greedyMix_handle$noalle
adjprior <- data_greedyMix_handle$adjprior
priorTerm <- data_greedyMix_handle$priorTerm
rm(data_greedyMix_handle)
Z_dist <- getPopDistancesByKL(adjprior)
Z_dist$Z -> Z
Z_dist$dist -> dist
rm(Z_dist)
a_data <- data[, 1:(ncol(data) - 1)]
sumcounts_counts_logml <- initialPopCounts(a_data, npops, rows, noalle, adjprior)
sumcounts_counts_logml$logml -> logml
rm(sumcounts_counts_logml)
c <- list()
c$data <- data
c$rows <- rows
c$alleleCodes <- alleleCodes
c$noalle <- noalle
c$adjprior <- adjprior
c$priorTerm <- priorTerm
c$dist <- dist
c$Z <- Z
c$rowsFromInd <- rowsFromInd
# partition compare
if (!is.null(partitionCompare)) {
nsamplingunits <- size(rows, 1)
partitions <- partitionCompare$partitions
npartitions <- size(partitions, 2)
partitionLogml <- zeros(1, npartitions)
for (i in 1:npartitions) {
# number of unique partition lables
npops <- length(unique(partitions[, i]))
partitionInd <- zeros(length(rows), 1)
partitionSample <- partitions[, i]
for (j in 1:nsamplingunits) {
partitionInd[c$rows[j, 1]:c$rows[j, 2]] <- partitionSample[j]
}
partitionLogml[i] <- initialCounts(
partitionInd, data[, 1:(ncol(data) - 1)], npops, c$rows, noalle,
adjprior
)
}
# return the logml result
partitionCompare$logmls <- partitionLogml
}
data <- data[, 1:(ncol(data) - 1)]
logml_npops_partitionSummary <- indMix(c)
logml_npops_partitionSummary$logml -> logml
logml_npops_partitionSummary$npops -> npops
logml_npops_partitionSummary$partitionSummary -> partitionSummary
rm(logml_npops_partitionSummary)
changesInLogml <- writeMixtureInfoPop(
logml, rows, data, adjprior, priorTerm,
NULL, NULL, partitionSummary, popnames, fixedK = FALSE
)
talle <- questdlg(
'Do you want to save the mixture populations so that you can use them later in admixture analysis?',
'Save results?', c('Yes', 'No'), 'Yes'
)
if (tolower(talle) == 'yes') {
waitALittle()
filename_pathname <- uiputfile()
if (rowsFromInd == 0) {
# BAPS format was used, rowsFromInd is not known.
popnames_rowsFromInd <- findOutRowsFromInd(popnames, rows)
popnames_rowsFromInd$popnames -> popnames
popnames_rowsFromInd$rows -> rows
rm(popnames_rowsFromInd)
}
groupPartition <- PARTITION
fiksaaPartitioYksiloTasolle(rows, rowsFromInd)
c$PARTITION <- PARTITION
c$COUNTS <- COUNTS
c$SUMCOUNTS <- SUMCOUNTS
c$alleleCodes <- alleleCodes
c$adjprior <- adjprior
c$rowsFromInd <- rowsFromInd
c$popnames <- popnames
c$data <- data
c$npops <- npops
c$noalle <- noalle
c$mixtureType = 'popMix'
c$groupPartition <- groupPartition
c$rows <- rows
c$logml <- logml
c$changesInLogml <- changesInLogml
}
}