ourMELONS/R/greedyMix.R

#' @title Clustering of individuals
#' @param data data file
#' @param format Data format. Format supported: "FASTA", "VCF" ,"BAM", "GenePop"
#' @param partitionCompare a list of partitions to compare
#' @param ninds number of individuals
#' @param rowsFromInd a list of rows for each individual
#' @param noalle number of alleles
#' @param adjprior ajuster prior probabilities
#' @param npops number of populations
#' @param priorTerm prior terms
#' @param counts counts
#' @param sumcounts sumcounts
#' @param max_iter maximum number of iterations
#' @param alleleCodes allele codes
#' @param inp input file
#' @param popnames population names
#' @param fixedK if \code{TRUE}, the number of populations is fixed
#' @param verbose if \code{TRUE}, prints extra output information
#' @importFrom utils read.delim
#' @importFrom vcfR read.vcfR
#' @importFrom Rsamtools scanBam
#' @importFrom adegenet read.genepop .readExt
#' @references Samtools: a suite of programs for interacting
#' with high-throughput sequencing data. <http://www.htslib.org/>
#' @export
#' @examples
#' data <- system.file("extdata", "FASTA_clustering_haploid.fasta", package = "rBAPS")
#' greedyMix(data, "fasta")
greedyMix <- function(
  data, format, partitionCompare = NULL, ninds = NULL, rowsFromInd = NULL,
  noalle = NULL, adjprior = NULL, npops = 1L, priorTerm = NULL, counts = NULL,
  sumcounts = NULL, max_iter = 100L, alleleCodes = NULL, inp = NULL,
  popnames = NULL, fixedK = FALSE, verbose = FALSE
) {
  # Importing and handling data ================================================
  data <- importFile(data, format, verbose)
  c <- list(
    # TODO: get elements from handleData()?
    noalle = noalle,
    rows = NA,
    data = data,
    adjprior = adjprior,
    priorTerm = priorTerm,
    rowsFromInd = rowsFromInd
  )

  # Comparing partitions =======================================================
  if (!is.null(partitionCompare)) {
    logmls <- comparePartitions(
      data, nrow(data), partitionCompare[["partitions"]], ninds, rowsFromInd,
      noalle, adjprior
    )
  }


  # Generating partition summary ===============================================
  logml_npops_partitionSummary <- indMixWrapper(c, npops, counts, sumcounts, max_iter, fixedK, verbose);
  logml <- logml_npops_partitionSummary[["logml"]]
  npops <- logml_npops_partitionSummary[["npops"]]
  partitionSummary <- logml_npops_partitionSummary[["partitionSummary"]]

  # Generating output object ===================================================
  out <- list(
      "alleleCodes" = alleleCodes, "adjprior" = adjprior, "popnames" = popnames,
      "rowsFromInd" = rowsFromInd, "data" = data, "npops" = npops,
      "noalle" = noalle, "mixtureType" = "mix", "logml" = logml
    )
  if (logml == 1) {
    return(out)
  }

  # Writing mixture info =======================================================
  changesInLogml <- writeMixtureInfo(
    logml, rowsFromInd, data, adjprior, priorTerm, NULL, inp, partitionSummary,
    popnames, fixedK
  )

  # Updateing results ==========================================================
  return(c(out, "changesInLogml" = changesInLogml))
}
Added bare-bones greedyMix 2020-05-20 15:34:40 +02:00			`#' @title Clustering of individuals`
greedyMix changed to channel data load (close #16) 2021-09-03 08:43:37 +02:00			`#' @param data data file`
Improved handling of data input on greedyMix 2021-09-03 13:08:40 +02:00			`#' @param format Data format. Format supported: "FASTA", "VCF" ,"BAM", "GenePop"`
Added missing documentation for arguments (#25) 2023-08-09 15:27:45 +02:00			`#' @param partitionCompare a list of partitions to compare`
			`#' @param ninds number of individuals`
			`#' @param rowsFromInd a list of rows for each individual`
			`#' @param noalle number of alleles`
			`#' @param adjprior ajuster prior probabilities`
			`#' @param npops number of populations`
			`#' @param priorTerm prior terms`
			`#' @param counts counts`
			`#' @param sumcounts sumcounts`
			`#' @param max_iter maximum number of iterations`
			`#' @param alleleCodes allele codes`
			`#' @param inp input file`
			`#' @param popnames population names`
			`#' @param fixedK if \code{TRUE}, the number of populations is fixed`
Added greedyMix support for VCF (closes #17) 2021-09-03 11:10:06 +02:00			`#' @param verbose if \code{TRUE}, prints extra output information`
Improved documentation 2020-06-24 11:48:23 +02:00			`#' @importFrom utils read.delim`
Fixed dependencies 2021-09-03 11:17:00 +02:00			`#' @importFrom vcfR read.vcfR`
Dependency fixes 2021-09-03 12:56:00 +02:00			`#' @importFrom Rsamtools scanBam`
Added read.genepop() to greedyMix() (closes #19) 2022-01-27 11:16:32 +01:00			`#' @importFrom adegenet read.genepop .readExt`
Added SAM/BAM support (closes #18) 2021-09-03 12:50:11 +02:00			`#' @references Samtools: a suite of programs for interacting`
			`#' with high-throughput sequencing data. <http://www.htslib.org/>`
Added bare-bones greedyMix 2020-05-20 15:34:40 +02:00			`#' @export`
Moved used datasets to /isnt/extdata (#25) 2023-08-09 10:54:48 +02:00			`#' @examples`
			`#' data <- system.file("extdata", "FASTA_clustering_haploid.fasta", package = "rBAPS")`
Improved example (#25) 2023-08-09 13:13:16 +02:00			`#' greedyMix(data, "fasta")`
Incorporating subfunctions of greedyMix() (#25) 2023-08-09 12:06:09 +02:00			`greedyMix <- function(`
Improved handling of NULL objects (#25) 2023-08-09 14:17:17 +02:00			`data, format, partitionCompare = NULL, ninds = NULL, rowsFromInd = NULL,`
Fixed argument passing (#25) 2023-08-09 14:45:43 +02:00			`noalle = NULL, adjprior = NULL, npops = 1L, priorTerm = NULL, counts = NULL,`
			`sumcounts = NULL, max_iter = 100L, alleleCodes = NULL, inp = NULL,`
			`popnames = NULL, fixedK = FALSE, verbose = FALSE`
Incorporating subfunctions of greedyMix() (#25) 2023-08-09 12:06:09 +02:00			`) {`
			`# Importing and handling data ================================================`
Removing handleData() from greedyMix() (#25) Apparently that function is only used if the input is Genepop or BAPS format. However, its code might be useful for reducing some of the input of greedyMix(), as it contains the calculation for them. 2023-08-09 12:50:29 +02:00			`data <- importFile(data, format, verbose)`
Improved handling of NULL objects (#25) 2023-08-09 14:17:17 +02:00			`c <- list(`
			`# TODO: get elements from handleData()?`
			`noalle = noalle,`
			`rows = NA,`
			`data = data,`
			`adjprior = adjprior,`
			`priorTerm = priorTerm,`
			`rowsFromInd = rowsFromInd`
			`)`
Added MATLAB code from original greedyMix() (#25) 2023-08-09 11:26:29 +02:00
Improved handling of NULL objects (#25) 2023-08-09 14:17:17 +02:00			`# Comparing partitions =======================================================`
			`if (!is.null(partitionCompare)) {`
Incorporating subfunctions of greedyMix() (#25) 2023-08-09 12:06:09 +02:00			`logmls <- comparePartitions(`
Added missing documentation for arguments (#25) 2023-08-09 15:27:45 +02:00			`data, nrow(data), partitionCompare[["partitions"]], ninds, rowsFromInd,`
Improved argument retrieval on greedyMix() (#25) 2023-08-09 13:13:50 +02:00			`noalle, adjprior`
Incorporating subfunctions of greedyMix() (#25) 2023-08-09 12:06:09 +02:00			`)`
Added MATLAB code from original greedyMix() (#25) 2023-08-09 11:26:29 +02:00			`}`
Improved argument retrieval on greedyMix() (#25) 2023-08-09 13:13:50 +02:00

Incorporating subfunctions of greedyMix() (#25) 2023-08-09 12:06:09 +02:00			`# Generating partition summary ===============================================`
Fixed argument passing (#25) 2023-08-09 14:45:43 +02:00			`logml_npops_partitionSummary <- indMixWrapper(c, npops, counts, sumcounts, max_iter, fixedK, verbose);`
Incorporating subfunctions of greedyMix() (#25) 2023-08-09 12:06:09 +02:00			`logml <- logml_npops_partitionSummary[["logml"]]`
			`npops <- logml_npops_partitionSummary[["npops"]]`
			`partitionSummary <- logml_npops_partitionSummary[["partitionSummary"]]`
Improved handling of NULL objects (#25) 2023-08-09 14:17:17 +02:00
			`# Generating output object ===================================================`
			`out <- list(`
			`"alleleCodes" = alleleCodes, "adjprior" = adjprior, "popnames" = popnames,`
			`"rowsFromInd" = rowsFromInd, "data" = data, "npops" = npops,`
			`"noalle" = noalle, "mixtureType" = "mix", "logml" = logml`
			`)`
			`if (logml == 1) {`
			`return(out)`
			`}`
Added MATLAB code from original greedyMix() (#25) 2023-08-09 11:26:29 +02:00
Incorporating subfunctions of greedyMix() (#25) 2023-08-09 12:06:09 +02:00			`# Writing mixture info =======================================================`
			`changesInLogml <- writeMixtureInfo(`
			`logml, rowsFromInd, data, adjprior, priorTerm, NULL, inp, partitionSummary,`
			`popnames, fixedK`
			`)`
Added MATLAB code from original greedyMix() (#25) 2023-08-09 11:26:29 +02:00
Improved handling of NULL objects (#25) 2023-08-09 14:17:17 +02:00			`# Updateing results ==========================================================`
			`return(c(out, "changesInLogml" = changesInLogml))`
Restyled files Ran through styler::style_dir() in the R and tests directories in preparation for #23. 2021-11-10 14:02:35 +01:00			`}`