#' @title Clustering of pop individuals #' @param data data file #' @param format Data format. Format supported: "FASTA", "VCF" ,"BAM", "GenePop" #' @param verbose if \code{TRUE}, prints extra output information #' @param partitionCompare a properly-named list. Proper names include #' "partitions" #' @importFrom utils read.delim #' @importFrom vcfR read.vcfR #' @importFrom Rsamtools scanBam #' @importFrom adegenet read.genepop .readExt #' @importFrom matlab2r uiputfile #' @references Samtools: a suite of programs for interacting #' with high-throughput sequencing data. #' @export greedyPopMix <- function(data, format, partitionCompare = NULL, verbose = TRUE) { # Replacing original file reading code with greedyMix() rawdata <- greedyMix(data, format, verbose) # Other function calls to produce necessary objects data_greedyMix_handle <- handlePopData(rawdata) data <- data_greedyMix_handle$data rowsFromInd <- data_greedyMix_handle$rowsFromInd alleleCodes <- data_greedyMix_handle$alleleCodes noalle <- data_greedyMix_handle$noalle adjprior <- data_greedyMix_handle$adjprior priorTerm <- data_greedyMix_handle$priorTerm rm(data_greedyMix_handle) Z_dist <- getPopDistancesByKL(adjprior) Z_dist$Z -> Z Z_dist$dist -> dist rm(Z_dist) a_data <- data[, 1:(ncol(data) - 1)] sumcounts_counts_logml <- initialPopCounts(a_data, npops, rows, noalle, adjprior) sumcounts_counts_logml$logml -> logml rm(sumcounts_counts_logml) c <- list() c$data <- data c$rows <- rows c$alleleCodes <- alleleCodes c$noalle <- noalle c$adjprior <- adjprior c$priorTerm <- priorTerm c$dist <- dist c$Z <- Z c$rowsFromInd <- rowsFromInd # partition compare if (!is.null(partitionCompare)) { nsamplingunits <- size(rows, 1) partitions <- partitionCompare$partitions npartitions <- size(partitions, 2) partitionLogml <- zeros(1, npartitions) for (i in 1:npartitions) { # number of unique partition lables npops <- length(unique(partitions[, i])) partitionInd <- zeros(length(rows), 1) partitionSample <- partitions[, i] for (j in 1:nsamplingunits) { partitionInd[c$rows[j, 1]:c$rows[j, 2]] <- partitionSample[j] } partitionLogml[i] <- initialCounts( partitionInd, data[, 1:(ncol(data) - 1)], npops, c$rows, noalle, adjprior ) } # return the logml result partitionCompare$logmls <- partitionLogml } data <- data[, 1:(ncol(data) - 1)] logml_npops_partitionSummary <- indMix(c) logml_npops_partitionSummary$logml -> logml logml_npops_partitionSummary$npops -> npops logml_npops_partitionSummary$partitionSummary -> partitionSummary rm(logml_npops_partitionSummary) changesInLogml <- writeMixtureInfoPop( logml, rows, data, adjprior, priorTerm, NULL, NULL, partitionSummary, popnames, fixedK = FALSE ) talle <- questdlg( 'Do you want to save the mixture populations so that you can use them later in admixture analysis?', 'Save results?', c('Yes', 'No'), 'Yes' ) if (tolower(talle) == 'yes') { waitALittle() filename_pathname <- uiputfile() if (rowsFromInd == 0) { # BAPS format was used, rowsFromInd is not known. popnames_rowsFromInd <- findOutRowsFromInd(popnames, rows) popnames_rowsFromInd$popnames -> popnames popnames_rowsFromInd$rows -> rows rm(popnames_rowsFromInd) } groupPartition <- PARTITION fiksaaPartitioYksiloTasolle(rows, rowsFromInd) c$PARTITION <- PARTITION c$COUNTS <- COUNTS c$SUMCOUNTS <- SUMCOUNTS c$alleleCodes <- alleleCodes c$adjprior <- adjprior c$rowsFromInd <- rowsFromInd c$popnames <- popnames c$data <- data c$npops <- npops c$noalle <- noalle c$mixtureType = 'popMix' c$groupPartition <- groupPartition c$rows <- rows c$logml <- logml c$changesInLogml <- changesInLogml } }