Merge branch 'issue-25' into develop

* issue-25: (38 commits) Adjusted unit tests for #25 Fixed sink() usage Fixed docs Exporting importFile() Improved handling of supported formats for greedyMix() (#25) Fixed basic parsing of FASTA files (#25) Increment version number to 0.0.0.9022 Fixed syntax (#25) Improved printing (#25) Partial reversion of b034158 (#25) Fixed to indMix (#25) Incorporated handleData() on greedyMix() (#25) Improved handleData() to handle FASTA (#25) Added numeric output option to load_fasta() (#25) Fixed test text (#25) Added missing documentation for arguments (#25) Syntax fix (#25) Delayed resolution of FIXMEs (#25) Workaround for usage of MATLAB any() (#25) Fixed argument passing (#25) ...
2023-09-11 13:15:55 +02:00 · 2023-09-11 13:15:55 +02:00 · 59fbb0a167
commit 59fbb0a167
parent 449982a083 ce954bf801
95 changed files with 45170 additions and 45032 deletions
--- a/.Rbuildignore
+++ b/.Rbuildignore
@ -4,7 +4,6 @@ PITFALLS.md
 CHANGELOG.md
 CITATION.cff
 .travis.yml
-inst/ext/ExamplesDataFormatting
+inst/testdata
 .github
 aux
--- a/2
+++ b/2
@ -1,6 +1,6 @@
 Package: rBAPS
 Title: Bayesian Analysis of Population Structure
-Version: 0.0.0.9020
+Version: 0.0.0.9022
 Date: 2020-11-09
 Authors@R:
    c(
--- a/39
+++ b/39
@ -1,45 +1,8 @@
 # Generated by roxygen2: do not edit by hand
 export(addAlleles)
 export(admix1)
 export(calculatePopLogml)
 export(computeAllFreqs2)
 export(computeIndLogml)
 export(computePersonalAllFreqs)
 export(computeRows)
 export(etsiParas)
 export(fgetl)
 export(fopen)
 export(greedyMix)
 export(greedyPopMix)
 export(handleData)
-export(handlePopData)
+export(importFile)
 export(initPopNames)
 export(learn_partition_modified)
 export(learn_simple_partition)
 export(linkage)
 export(load_fasta)
 export(logml2String)
 export(lueGenePopData)
 export(lueGenePopDataPop)
 export(lueNimi)
 export(noIndex)
 export(ownNum2Str)
 export(poistaLiianPienet)
 export(proportion2str)
 export(randdir)
 export(rivinSisaltamienMjonojenLkm)
 export(selvitaDigitFormat)
 export(simulateAllFreqs)
 export(simulateIndividuals)
 export(simuloiAlleeli)
 export(suoritaMuutos)
 export(takeLine)
 export(testaaKoordinaatit)
 export(testaaOnkoKunnollinenBapsData)
 export(testaaPop)
 export(writeMixtureInfo)
 export(writeMixtureInfoPop)
 importFrom(R6,R6Class)
 importFrom(Rsamtools,scanBam)
 importFrom(adegenet,.readExt)
--- a/NEWS.md
+++ b/NEWS.md
@ -0,0 +1,6 @@
 # rBAPS (development version)
 # rBAPS 0.0.0.9021
 * Added a `NEWS.md` file to track changes to the package.
 * Exported `greedyMix()` and `load_fasta()` functions.
--- a/R/addAlleles.R
+++ b/R/addAlleles.R
@ -4,7 +4,6 @@
 #' @param line line
 #' @param divider divider
 #' @return data (after alleles were added)
 #' @export
 addAlleles <- function(data, ind, line, divider) {
  # Lisaa BAPS-formaatissa olevaan datataulukkoon
  # yksil<69><6C> ind vastaavat rivit. Yksil<69>n alleelit
--- a/R/admix1.R
+++ b/R/admix1.R
@ -6,7 +6,6 @@
 #' alleleCodes, adjprior, popnames, rowsFromInd, data, npops, noalle
 #' @param tietue tietue
 #' @importFrom methods is
 #' @export
 admix1 <- function(tietue) {
  if (!is.list(tietue)) {
    message("Load mixture result file. These are the files in this directory:")
--- a/R/calculatePopLogml.R
+++ b/R/calculatePopLogml.R
@ -4,7 +4,6 @@
 #' for the mean parameter.
 #' @param points points
 #' @param fii fii
 #' @export
 calculatePopLogml <- function(points, fii) {
  n <- length(points)
  fuzzy_ones <- sum(points)
--- a/R/comparePartitions.R
+++ b/R/comparePartitions.R
@ -0,0 +1,23 @@
 comparePartitions <- function(data, c.rows, partitionCompare.partitions, ninds, rowsFromInd, noalle, adjprior) {
  stop("Comparing partitions not yet implemented") # TODO: implement
  # nsamplingunits = size(c.rows,1);
  # partitions = partitionCompare.partitions;
  # npartitions = size(partitions,2);
  # partitionLogml = zeros(1,npartitions);
  # for i = 1:npartitions
  #     % number of unique partition lables
  #     npops = length(unique(partitions(:,i)));
  #     partitionInd = zeros(ninds*rowsFromInd,1);
  #     partitionSample = partitions(:,i);
  #     for j = 1:nsamplingunits
  #         partitionInd([c.rows(j,1):c.rows(j,2)]) = partitionSample(j);
  #     end
  #     partitionLogml(i) = initialCounts(partitionInd, data(:,1:end-1), npops, c.rows, noalle, adjprior);
  # end
  # % return the logml result
  # partitionCompare.logmls = partitionLogml;
  # set(h1, 'userdata', partitionCompare);
  # return
 }
--- a/R/computeAllFreqs2.R
+++ b/R/computeAllFreqs2.R
@ -2,7 +2,6 @@
 #' @description Lisää a priori jokaista alleelia joka populaation joka lokukseen
 #' j 1/noalle(j) verran.
 #' @param noalle noalle
 #' @export
 computeAllFreqs2 <- function(noalle) {
  COUNTS <- ifelse(isGlobalEmpty(COUNTS), vector(), COUNTS)
  SUMCOUNTS <- ifelse(isGlobalEmpty(SUMCOUNTS), vector(), COUNTS)
--- a/R/computeIndLogml.R
+++ b/R/computeIndLogml.R
@ -3,7 +3,6 @@
 #' määritellyiksi kuten osuusTaulu:ssa.
 #' @param omaFreqs own Freqs?
 #' @param osuusTaulu Percentage table?
 #' @export
 computeIndLogml <- function(omaFreqs, osuusTaulu) {
  omaFreqs <- as.matrix(omaFreqs)
  osuusTaulu <- as.matrix(osuusTaulu)
--- a/R/computePersonalAllFreqs.R
+++ b/R/computePersonalAllFreqs.R
@ -7,8 +7,6 @@
 #' @param data data
 #' @param allFreqs allFreqs
 #' @param rowsFromInd rowsFromInd
 #' @export
 computePersonalAllFreqs <- function(ind, data, allFreqs, rowsFromInd) {
  if (isGlobalEmpty(COUNTS)) {
    nloci <- npops <- 1
--- a/R/computePopulationLogml.R
+++ b/R/computePopulationLogml.R
@ -4,9 +4,13 @@ computePopulationLogml <- function(pops, adjprior, priorTerm) {
  # ======================================================== #
  # Limiting COUNTS size                                     #
  # ======================================================== #
-  COUNTS <- COUNTS[
+  if (!is.null(adjprior)) {
-    seq_len(nrow(adjprior)), seq_len(ncol(adjprior)), pops, drop = FALSE
+    nr <- seq_len(nrow(adjprior))
-  ]
+    nc <- seq_len(ncol(adjprior))
    COUNTS <- COUNTS[nr, nc, pops, drop = FALSE]
  } else {
    COUNTS <- NA
  }
  x <- size(COUNTS, 1)
  y <- size(COUNTS, 2)
@ -15,25 +19,24 @@ computePopulationLogml <- function(pops, adjprior, priorTerm) {
  # ======================================================== #
  # Computation                                              #
  # ======================================================== #
-  isarray <- length(dim(repmat(adjprior, c(1, 1, length(pops))))) > 2
+  term1 <- NULL
-  term1 <- squeeze(
+  if (!is.null(adjprior)) {
-    sum(
+    isarray <- length(dim(repmat(adjprior, c(1, 1, length(pops))))) > 2
    term1 <- squeeze(
      sum(
-        reshape(
+        sum(
-          lgamma(
+          reshape(
-            repmat(adjprior, c(1, 1, length(pops))) +
+            lgamma(
-              COUNTS[
+              repmat(adjprior, c(1, 1, length(pops))) + COUNTS[nr, nc, pops, drop = !isarray]
-                seq_len(nrow(adjprior)), seq_len(ncol(adjprior)), pops,
+            ),
-                drop = !isarray
+            c(x, y, z)
              ]
          ),
-          c(x, y, z)
+          1
        ),
-        1
+        2
-      ),
+      )
      2
    )
-  )
+  }
  if (is.null(priorTerm)) priorTerm <- 0
  popLogml <- term1 - sum(lgamma(1 + SUMCOUNTS[pops, ]), 2) - priorTerm
  return(popLogml)
--- a/R/computeRows.R
+++ b/R/computeRows.R
@ -4,7 +4,6 @@
 #' @param rowsFromInd rowsFromInd
 #' @param inds matrix
 #' @param ninds ninds
 #' @export
 computeRows <- function(rowsFromInd, inds, ninds) {
  if (!is(inds, "matrix")) inds <- as.matrix(inds)
  if (identical(dim(inds), c(nrow(inds), 1L))) {
--- a/R/etsiParas.R
+++ b/R/etsiParas.R
@ -1,4 +1,3 @@
 #' @export
 #' @title Etsi Paras
 #' @description Search for the best?
 #' @param osuus Percentages?
--- a/R/fgetl-fopen.R
+++ b/R/fgetl-fopen.R
@ -6,7 +6,6 @@
 #' fgetl returns tline as a numeric value -1.
 #' @author Waldir Leoncio
 #' @seealso fopen
 #' @export
 fgetl <- function(file) {
  # ==========================================================================
  # Validation
@ -27,5 +26,4 @@ fgetl <- function(file) {
 #' @return The same as `readLines(filename)`
 #' @author Waldir Leoncio
 #' @seealso fgetl
 #' @export
 fopen <- function(filename) readLines(filename)
--- a/R/greedyMix.R
+++ b/R/greedyMix.R
@ -1,6 +1,16 @@
 #' @title Clustering of individuals
 #' @param data data file
 #' @param format Data format. Format supported: "FASTA", "VCF" ,"BAM", "GenePop"
 #' @param partitionCompare a list of partitions to compare
 #' @param ninds number of individuals
 #' @param npops number of populations
 #' @param counts counts
 #' @param sumcounts sumcounts
 #' @param max_iter maximum number of iterations
 #' @param alleleCodes allele codes
 #' @param inp input file
 #' @param popnames population names
 #' @param fixedK if \code{TRUE}, the number of populations is fixed
 #' @param verbose if \code{TRUE}, prints extra output information
 #' @importFrom utils read.delim
 #' @importFrom vcfR read.vcfR
@ -9,41 +19,59 @@
 #' @references Samtools: a suite of programs for interacting
 #' with high-throughput sequencing data. <http://www.htslib.org/>
 #' @export
-greedyMix <- function(data, format, verbose = TRUE) {
+#' @examples
-  # Parsing data format ------------------------------------------------------
+#' data <- system.file("extdata", "FASTA_clustering_haploid.fasta", package = "rBAPS")
 #' greedyMix(data, "fasta")
 greedyMix <- function(
  data, format, partitionCompare = NULL, ninds = 1L, npops = 1L,
  counts = NULL, sumcounts = NULL, max_iter = 100L, alleleCodes = NULL,
  inp = NULL, popnames = NULL, fixedK = FALSE, verbose = FALSE
 ) {
  # Importing and handling data ================================================
  data <- importFile(data, format, verbose)
  data <- handleData(data, tolower(format))
  c <- list(
    noalle = data[["noalle"]],
    data = data[["newData"]],
    adjprior = data[["adjprior"]],
    priorTerm = data[["priorTerm"]],
    rowsFromInd = data[["rowsFromInd"]]
  )
-  if (missing(format)) {
+  # Comparing partitions =======================================================
-    format <- gsub(".*\\.(.+)$", "\\1", data)
+  if (!is.null(partitionCompare)) {
-    message("Format not provided. Guessing from file extension: ", format)
+    logmls <- comparePartitions(
-  }
+      c[["data"]], nrow(c[["data"]]), partitionCompare[["partitions"]], ninds,
-  format <- tolower(format)
+      c[["rowsFromInd"]], c[["noalle"]], c[["adjprior"]]
  # Dispatching to proper loading function -----------------------------------
  if (format == "fasta") {
    out <- load_fasta(data)
  } else if (format == "vcf") {
    out <- vcfR::read.vcfR(data, verbose = verbose)
  } else if (format == "sam") {
    stop(
      "SAM files not directly supported. ",
      "Install the samtools software and execute\n\n",
      "samtools view -b ", data, " > out_file.bam\n\nto convert to BAM ",
      "and try running this function again with 'format=BAM'"
    )
  } else if (format == "bam") {
    out <- Rsamtools::scanBam(data)
  } else if (format == "genepop") {
    if (toupper(adegenet::.readExt(data)) == "TXT") {
      message("Creating a copy of the file with the .gen extension")
      dataGen <- gsub("txt", "gen", data)
      file.copy(data, dataGen)
      out <- adegenet::read.genepop(dataGen)
    } else {
      out <- adegenet::read.genepop(data)
    }
  } else {
    stop("Format not supported.")
  }
-  return(out)
+
  # Generating partition summary ===============================================
  ekat <- seq(1L, c[["rowsFromInd"]], ninds * c[["rowsFromInd"]]) # ekat = (1:rowsFromInd:ninds*rowsFromInd)';
  c[["rows"]] <- c(ekat, ekat + c[["rowsFromInd"]] - 1L) # c.rows = [ekat ekat+rowsFromInd-1]
  logml_npops_partitionSummary <- indMixWrapper(c, npops, counts, sumcounts, max_iter, fixedK, verbose)
  logml <- logml_npops_partitionSummary[["logml"]]
  npops <- logml_npops_partitionSummary[["npops"]]
  partitionSummary <- logml_npops_partitionSummary[["partitionSummary"]]
  # Generating output object ===================================================
  out <- list(
    "alleleCodes" = alleleCodes, "adjprior" = c[["adjprior"]],
    "popnames" = popnames, "rowsFromInd" = c[["rowsFromInd"]],
    "data" = c[["data"]], "npops" = npops, "noalle" = c[["noalle"]],
    "mixtureType" = "mix", "logml" = logml
  )
  if (logml == 1) {
    return(out)
  }
  # Writing mixture info =======================================================
  changesInLogml <- writeMixtureInfo(
    logml, c[["rowsFromInd"]], c[["data"]], c[["adjprior"]], c[["priorTerm"]],
    NULL, inp, partitionSummary, popnames, fixedK
  )
  # Updateing results ==========================================================
  return(c(out, "changesInLogml" = changesInLogml))
 }
--- a/R/greedyPopMix.R
+++ b/R/greedyPopMix.R
@ -11,7 +11,6 @@
 #' @importFrom matlab2r uiputfile
 #' @references Samtools: a suite of programs for interacting
 #' with high-throughput sequencing data. <http://www.htslib.org/>
 #' @export
 greedyPopMix <- function(data, format, partitionCompare = NULL, verbose = TRUE
 ) {
  # Replacing original file reading code with greedyMix()
--- a/R/handleData.R
+++ b/R/handleData.R
@ -1,5 +1,6 @@
 #' @title Handle Data
-#' @param raw_data Raw data
+#' @param raw_data Raw data in Genepop or BAPS format
 #' @param format data format
 #' @details The last column of the original data tells you from which
 #' individual that line is from. The function first examines how many line
 #' maximum is from one individual giving know if it is haploid, diploid, etc.
@ -7,9 +8,9 @@
 #' maximum. If the code of an allele is = 0, the function changes that allele
 #' code to the smallest code that is larger than any code in use. After this,
 #' the function changes the allele codes so that one locus j
-#' codes get values between? 1, ..., Noah (j).
+#' codes get values between? 1, ..., noalle(j).
 #' @export
-handleData <- function(raw_data) {
+handleData <- function(raw_data, format = "Genepop") {
  # Alkuper?isen datan viimeinen sarake kertoo, milt?yksil?lt?
  # kyseinen rivi on per?isin. Funktio tutkii ensin, ett?montako
  # rivi?maksimissaan on per?isin yhdelt?yksil?lt? jolloin saadaan
@ -20,28 +21,29 @@ handleData <- function(raw_data) {
  # koodi pienimm?ksi koodiksi, joka isompi kuin mik??n k?yt?ss?oleva koodi.
  # T?m?n j?lkeen funktio muuttaa alleelikoodit siten, ett?yhden lokuksen j
  # koodit saavat arvoja v?lill?1,...,noalle(j).
  nloci <- switch(
    tolower(format),
    "genepop" = ncol(raw_data) - 1L,
    "baps"    = ncol(raw_data) - 1L,
    "fasta"   = ncol(raw_data),
    "vcf"     = stop("VCF format not supported for processing yet"),
    "bam"     = stop("BAM format not supported for processing yet")
  )
  data <- as.matrix(raw_data)
  nloci <- size(raw_data, 2) - 1
  dataApu <- data[, 1:nloci]
  nollat <- matlab2r::find(dataApu == 0)
  if (!isempty(nollat)) {
-    isoinAlleeli <- base::max(max(dataApu))
+    isoinAlleeli <- base::max(base::max(dataApu))
    dataApu[nollat] <- isoinAlleeli + 1
    data[, 1:nloci] <- dataApu
  }
  # dataApu <- []
  # nollat <- []
  # isoinAlleeli <- []
  noalle <- zeros(1, nloci)
  alleelitLokuksessa <- cell(nloci, 1, expandable = TRUE)
  for (i in 1:nloci) {
    alleelitLokuksessaI <- unique(data[, i])
    alleelitLokuksessa[[i]] <- sort(alleelitLokuksessaI[
-      matlab2r::find(
+      matlab2r::find(alleelitLokuksessaI >= 0)
        alleelitLokuksessaI >= 0
      )
    ])
    noalle[i] <- length(alleelitLokuksessa[[i]])
  }
@ -49,9 +51,7 @@ handleData <- function(raw_data) {
  for (i in 1:nloci) {
    alleelitLokuksessaI <- alleelitLokuksessa[[i]]
    puuttuvia <- base::max(noalle) - length(alleelitLokuksessaI)
-    alleleCodes[, i] <- as.matrix(
+    alleleCodes[, i] <- as.matrix(c(alleelitLokuksessaI, zeros(puuttuvia, 1)))
      c(alleelitLokuksessaI, zeros(puuttuvia, 1))
    )
  }
  for (loc in seq_len(nloci)) {
@ -60,7 +60,7 @@ handleData <- function(raw_data) {
    }
  }
-  nind <- base::max(data[, ncol(data)])
+  nind <- as.integer(base::max(data[, ncol(data)]))
  nrows <- size(data, 1)
  ncols <- size(data, 2)
  rowsFromInd <- zeros(nind, 1)
@ -71,11 +71,11 @@ handleData <- function(raw_data) {
  a <- -999
  emptyRow <- repmat(a, c(1, ncols))
  lessThanMax <- matlab2r::find(rowsFromInd < maxRowsFromInd)
-  missingRows <- maxRowsFromInd * nind - nrows
+  missingRows <- max(maxRowsFromInd * nind - nrows, 0L)
  data <- rbind(data, zeros(missingRows, ncols))
  pointer <- 1
  for (ind in t(lessThanMax)) { # K?y l?pi ne yksil?t, joilta puuttuu rivej?
-    miss <- maxRowsFromInd - rowsFromInd(ind) # T?lt?yksil?lt?puuttuvien lkm.
+    miss <- maxRowsFromInd - rowsFromInd[ind] # T?lt?yksil?lt?puuttuvien lkm.
  }
  data <- sortrows(data, ncols) # Sorttaa yksil?iden mukaisesti
  newData <- data
--- a/R/handlePopData.R
+++ b/R/handlePopData.R
@ -4,7 +4,6 @@
 #' codes so that the codes for one locus have values between 1 and noalle[j].
 #' Before this change, an allele whose code is zero is changed.
 #' @param raw_data raw data
 #' @export
 handlePopData <- function(raw_data) {
  # Alkuperäisen datan viimeinen sarake kertoo, milt?yksilölt?
  # kyseinen rivi on peräisin. Funktio muuttaa alleelikoodit
--- a/R/importFile.R
+++ b/R/importFile.R
@ -0,0 +1,49 @@
 #' @title Import data file
 #' @description Imports data from several formats (FASTA, VCF, SAM, BAM,
 #' Genepop).
 #' @param data raw dataset
 #' @param format data format (guesses from extension if not provided)
 #' @param verbose if \code{TRUE}, prints extra output information
 #' @return The data in a format that can be used by the other functions
 #' @export
 #' @examples
 #' path_inst <- system.file("extdata", "", package = "rBAPS")
 #' importFile(file.path(path_inst, "FASTA_clustering_haploid.fasta"))
 importFile <- function(data, format, verbose) {
  # Parsing data format ------------------------------------------------------
  if (missing(format)) {
    format <- gsub(".*\\.(.+)$", "\\1", data)
    message("Format not provided. Guessing from file extension: ", format)
  }
  format <- tolower(format)
  # Dispatching to proper loading function -----------------------------------
  if (format == "fasta") {
    out <- load_fasta(data)
  } else if (format == "vcf") {
    out <- vcfR::read.vcfR(data, verbose = verbose)
  } else if (format == "sam") {
    stop(
      "SAM files not directly supported. ",
      "Install the samtools software and execute\n\n",
      "samtools view -b ", data, " > out_file.bam\n\nto convert to BAM ",
      "and try running this function again with 'format=BAM'"
    )
  } else if (format == "bam") {
    out <- Rsamtools::scanBam(data)
  } else if (format == "genepop") {
    if (toupper(adegenet::.readExt(data)) == "TXT") {
      message("Creating a copy of the file with the .gen extension")
      dataGen <- gsub("txt", "gen", data)
      file.copy(data, dataGen)
      out <- adegenet::read.genepop(dataGen)
    } else {
      out <- adegenet::read.genepop(data)
    }
  } else {
    stop("Format not supported.")
  }
  return(out)
 }
--- a/R/indMix.R
+++ b/R/indMix.R
@ -1,4 +1,4 @@
-indMix <- function(c, npops, dispText = TRUE) {
+indMix <- function(c, npops, counts = NULL, sumcounts = NULL, max_iter = 100L, dispText = FALSE) {
  # Greedy search algorithm with unknown number of classes for regular
  # clustering.
  # Input npops is not used if called by greedyMix or greedyPopMix.
@ -17,8 +17,11 @@ indMix <- function(c, npops, dispText = TRUE) {
  if (isfield(c, "dist")) {
    dist <- c$dist
    Z <- c$Z
  } else {
    Z <- NULL
  }
  rm(c)
  nargin <- length(as.list(match.call())) - 1
  if (nargin < 2) {
@ -65,14 +68,14 @@ indMix <- function(c, npops, dispText = TRUE) {
  nruns <- length(npopsTaulu)
  initData <- data
-  data <- data[, 1:(ncol(data) - 1)]
+  data <- data[, seq_along(noalle)]  # Original code always dropped last column.
  logmlBest <- -1e50
  partitionSummary <- -1e50 * ones(30, 2) # Tiedot 30 parhaasta partitiosta (npops ja logml)
  partitionSummary[, 1] <- zeros(30, 1)
  worstLogml <- -1e50
  worstIndex <- 1
-  for (run in 1:nruns) {
+  for (run in seq_along(nruns)) {
    npops <- npopsTaulu[[run]]
    if (dispText) {
      dispLine()
@ -84,6 +87,7 @@ indMix <- function(c, npops, dispText = TRUE) {
      )
    }
    ninds <- size(rows, 1)
    initialPartition <- admixture_initialization(initData, npops, Z)
    sumcounts_counts_logml <- initialCounts(
      initialPartition, data, npops, rows, noalle, adjprior
@ -93,16 +97,15 @@ indMix <- function(c, npops, dispText = TRUE) {
    logml <- sumcounts_counts_logml$logml
    PARTITION <- zeros(ninds, 1)
-    for (i in 1:ninds) {
+    for (i in seq_len(ninds)) {
      apu <- rows[i]
      PARTITION[i] <- initialPartition[apu[1]]
    }
    COUNTS <- counts
    SUMCOUNTS <- sumcounts
-    POP_LOGML <- computePopulationLogml(1:npops, adjprior, priorTerm)
+    POP_LOGML <- computePopulationLogml(seq_len(npops), adjprior, priorTerm)
    LOGDIFF <- repmat(-Inf, c(ninds, npops))
    rm(initialPartition, counts, sumcounts)
    # PARHAAN MIXTURE-PARTITION ETSIMINEN
    nRoundTypes <- 7
@ -120,30 +123,34 @@ indMix <- function(c, npops, dispText = TRUE) {
      )
    }
    iter <- 1L
    while (ready != 1) {
-      # FIXME: loop caught in here
+      iter <- iter + 1L
      if (iter > max_iter) {
        warning("max_iter reached. Stopping prematurely.")
        break
      }
      muutoksia <- 0
      if (dispText) {
-        message(paste("\nPerforming steps:", as.character(roundTypes)))
+        message("Performing steps: ", paste(roundTypes, collapse = " "))
      }
-      for (n in 1:length(roundTypes)) {
+      for (n in seq_along(roundTypes)) {
        round <- roundTypes[n]
        kivaluku <- 0
        if (kokeiltu[round] == 1) { # Askelta kokeiltu viime muutoksen j<>lkeen
        } else if (round == 0 | round == 1) { # Yksil<69>n siirt<72>minen toiseen populaatioon.
-          inds <- 1:ninds
+          inds <- seq_len(ninds)
-          aputaulu <- cbind(inds, rand(ninds, 1))
+          aputaulu <- cbind(t(inds), rand(ninds, 1))
-          aputaulu <- sortrows(aputaulu, 2)
+          aputaulu <- matrix(sortrows(aputaulu, 2), nrow = nrow(aputaulu))
          inds <- t(aputaulu[, 1])
          muutosNyt <- 0
          for (ind in inds) {
            i1 <- PARTITION[ind]
            muutokset_diffInCounts <- greedyMix_muutokset$new()
            # FIXME: using 100-length global variables instead of the ones in this function
            muutokset_diffInCounts <- muutokset_diffInCounts$laskeMuutokset(
              ind, rows, data, adjprior, priorTerm
            )
@ -190,7 +197,7 @@ indMix <- function(c, npops, dispText = TRUE) {
          }
        } else if (round == 2) { # Populaation yhdist<73>minen toiseen.
          maxMuutos <- 0
-          for (pop in 1:npops) {
+          for (pop in seq_len(npops)) {
            muutokset_diffInCounts <- greedyMix_muutokset$new()
            muutokset_diffInCounts <- muutokset_diffInCounts$laskeMuutokset2(
              pop, rows, data, adjprior, priorTerm
@ -234,7 +241,7 @@ indMix <- function(c, npops, dispText = TRUE) {
        } else if (round == 3 || round == 4) { # Populaation jakaminen osiin.
          maxMuutos <- 0
          ninds <- size(rows, 1)
-          for (pop in 1:npops) {
+          for (pop in seq_len(npops)) {
            inds2 <- matlab2r::find(PARTITION == pop)
            ninds2 <- length(inds2)
            if (ninds2 > 2) {
@ -265,7 +272,7 @@ indMix <- function(c, npops, dispText = TRUE) {
            muutoksia <- 1
            kokeiltu <- zeros(nRoundTypes, 1)
            rivit <- list()
-            for (i in 1:length(muuttuvat)) {
+            for (i in seq_len(muuttuvat)) {
              ind <- muuttuvat[i]
              lisa <- rows[ind, 1]:rows[ind, 2]
              rivit <- rbind(rivit, t(lisa))
@ -421,7 +428,7 @@ indMix <- function(c, npops, dispText = TRUE) {
              totalMuutos <- muutokset(1, emptyPop)
              rivit <- list()
-              for (i in 1:length(muuttuvat)) {
+              for (i in seq_len(muuttuvat)) {
                ind <- muuttuvat[i]
                lisa <- rows[ind, 1]:rows[ind, 2]
                rivit <- c(rivit, lisa)
@ -506,8 +513,6 @@ indMix <- function(c, npops, dispText = TRUE) {
          }
        }
      }
      # FIXME: muutoksia is never 0, so vaihe never equals 5 and ready 1
      print(paste("i1 =", i1, "i2 =", i2, "maxMuutos =", maxMuutos)) # TEMP
      if (muutoksia == 0) {
        if (vaihe <= 4) {
          vaihe <= vaihe + 1
@ -536,11 +541,10 @@ indMix <- function(c, npops, dispText = TRUE) {
    # TALLENNETAAN
    npops <- poistaTyhjatPopulaatiot(npops)
-    POP_LOGML <- computePopulationLogml(1:npops, adjprior, priorTerm)
+    POP_LOGML <- computePopulationLogml(seq_len(npops), adjprior, priorTerm)
    if (dispText) {
-      print(paste("Found partition with", as.character(npops), "populations."))
+      message("Found partition with ", as.character(npops), " populations.")
-      print(paste("Log(ml) =", as.character(logml)))
+      message("Log(ml) = ", as.character(logml))
      print(" ")
    }
    if (logml > logmlBest) {
--- a/R/indMixWrapper.R
+++ b/R/indMixWrapper.R
@ -0,0 +1,7 @@
 indMixWrapper <- function(c, npops, counts, sumcounts, max_iter, fixedK = FALSE, verbose = FALSE) {
  if (fixedK) {
    stop("indMix_fixK() not yet implemented.") # TODO: translate indMix_fixK.m
  } else {
    indMix(c, npops, counts, sumcounts, max_iter, verbose)
  }
 }
--- a/R/initPopNames.R
+++ b/R/initPopNames.R
@ -1,7 +1,6 @@
 #' @title Initialize Pop Names
 #' @param nameFile nameFile
 #' @param indexFile indexFile
 #' @export
 initPopNames <- function(nameFile, indexFile) {
  # Palauttaa tyhj<68>n, mik<69>li nimitiedosto ja indeksitiedosto
  #  eiv<69>t olleet yht?pitki?
--- a/R/initialCounts.R
+++ b/R/initialCounts.R
@ -2,7 +2,7 @@ initialCounts <- function(partition, data, npops, rows, noalle, adjprior) {
  nloci <- size(data, 2)
  ninds <- size(rows, 1)
-  koot <- rows[, 1] - rows[, 2] + 1
+  koot <- rows[1] - rows[2] + 1
  maxSize <- base::max(koot)
  counts <- zeros(base::max(noalle), nloci, npops)
--- a/R/laskeLoggis.R
+++ b/R/laskeLoggis.R
@ -1,7 +1,7 @@
 laskeLoggis <- function(counts, sumcounts, adjprior) {
  npops <- size(counts, 3)
-
+  replicated_adjprior <- array(adjprior, c(nrow(adjprior), ncol(adjprior), npops))
-  sum1 <- sum(sum(sum(lgamma(counts + repmat(adjprior, c(1, 1, npops))))))
+  sum1 <- sum(sum(sum(lgamma(counts + replicated_adjprior))))
  sum3 <- sum(sum(lgamma(adjprior))) - sum(sum(lgamma(1 + sumcounts)))
  logml2 <- sum1 - npops * sum3
  loggis <- logml2
--- a/R/laskeMuutokset12345.R
+++ b/R/laskeMuutokset12345.R
@ -349,12 +349,15 @@ greedyMix_muutokset <- R6Class(
      i1_logml <- POP_LOGML[i1]
      muutokset[i1] <- 0
-      rows <- globalRows[ind, 1]:globalRows[ind, 2]
+      if (is.null(dim(globalRows))) {
        rows <- globalRows[1]:globalRows[2]
      } else {
        rows <- globalRows[ind, 1]:globalRows[ind, 2]
      }
      diffInCounts <- computeDiffInCounts(
        rows, size(COUNTS, 1), size(COUNTS, 2), data
      )
      diffInSumCounts <- colSums(diffInCounts)
      COUNTS[, , i1] <- COUNTS[, , i1] - diffInCounts
      SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] - diffInSumCounts
      new_i1_logml <- computePopulationLogml(i1, adjprior, priorTerm)
--- a/R/learn_partition_modified.R
+++ b/R/learn_partition_modified.R
@ -1,5 +1,4 @@
 #' @title Learn partition (modified)
 #' @export
 #' @param ordered ordered
 #' @return part
 #' @description This function is called only if some individual has less than
--- a/R/learn_simple_partition.R
+++ b/R/learn_simple_partition.R
@ -3,7 +3,6 @@
 #' @param fii fii
 #' @description Goes through all the ways to divide the points into two or
 #' three groups. Chooses the partition which obtains highest logml.
 #' @export
 learn_simple_partition <- function(ordered_points, fii) {
  npoints <- length(ordered_points)
--- a/R/linkage.R
+++ b/R/linkage.R
@ -13,7 +13,6 @@
 #' that BAPS should use this function instead of the base one, so this is why
 #' this function is part of this package (instead of a MATLAB-replicating
 #' package such as matlab2r)
 #' @export
 linkage <- function(Y, method = "co") {
  k <- size(Y)[1]
  n <- size(Y)[2]
--- a/R/load_fasta.R
+++ b/R/load_fasta.R
@ -4,18 +4,19 @@
 #' running the hierBAPS algorithm.
 #'
 #' @param msa Either the location of a fasta file or ape DNAbin object containing the multiple sequence alignment data to be clustered
-#' @param keep.singletons A logical indicating whether to consider singleton mutations in calculating the clusters
+#' @param keep_singletons A logical indicating whether to consider singleton mutations in calculating the clusters
 #' @param output_numbers A logical indicating whether to output the data as
 #' numbers (TRUE) or letters (FALSE)
 #'
 #' @return A character matrix with filtered SNP data
 #'
 #' @examples
-#' msa <- system.file("ext", "seqs.fa", package = "rBAPS")
+#' msa <- system.file("extdata", "seqs.fa", package = "rBAPS")
-#' snp.matrix <- load_fasta(msa)
+#' snp.matrix <- rBAPS:::load_fasta(msa)
 #' @author Gerry Tonkin-Hill, Waldir Leoncio
 #' @seealso rhierbaps::load_fasta
 #' @importFrom ape read.FASTA as.DNAbin
-#' @export
+load_fasta <- function(msa, keep_singletons = FALSE, output_numbers = TRUE) {
 load_fasta <- function(msa, keep.singletons = FALSE) {
  # Check inputs
  if (is(msa, "character")) {
@ -28,7 +29,9 @@ load_fasta <- function(msa, keep.singletons = FALSE) {
  } else {
    stop("incorrect input for msa!")
  }
-  if (!is.logical(keep.singletons)) stop("Invalid keep.singletons! Must be on of TRUE/FALSE.")
+  if (!is.logical(keep_singletons)) {
    stop("Invalid keep_singletons! Must be one of TRUE/FALSE.")
  }
  # Load sequences using ape. This does a lot of the checking for us.
  seq_names <- labels(seqs)
@ -46,8 +49,8 @@ load_fasta <- function(msa, keep.singletons = FALSE) {
  conserved <- colSums(t(t(seqs) == seqs[1, ])) == nrow(seqs)
  seqs <- seqs[, !conserved]
-  if (!keep.singletons) {
+  if (!keep_singletons) {
-    # remove singletons as they are uninformative in the algorithm
+    # remove_singletons as they are uninformative in the algorithm
    is_singleton <- apply(seqs, 2, function(x) {
      tab <- table(x)
      return(x %in% names(tab)[tab == 1])
@ -58,5 +61,11 @@ load_fasta <- function(msa, keep.singletons = FALSE) {
  # Convert gaps and unknowns to same symbol
  seqs[seqs == "n"] <- "-"
  # Replace letters with numbers, dashes with zeros
  if (output_numbers) {
    seqs <- matrix(match(seqs, c("a", "c", "g", "t")), nrow(seqs))
    seqs[is.na(seqs)] <- 0
  }
  return(seqs)
 }
--- a/R/logml2String.R
+++ b/R/logml2String.R
@ -2,7 +2,6 @@
 #' @description Returns a string representation of a logml
 #' @param logml input Logml
 #' @return String version of logml
 #' @export
 logml2String <- function(logml) {
  # Palauttaa logml:n string-esityksen.
  mjono <- "       "
--- a/R/lueGenePopData.R
+++ b/R/lueGenePopData.R
@ -2,7 +2,6 @@
 #' @description Reads GenePop-formatted data
 #' @param tiedostonNimi Name of the file
 #' @return list containing data and popnames
 #' @export
 lueGenePopData <- function(tiedostonNimi) {
  fid <- readLines(tiedostonNimi)
  line <- fid[1] # ensimmäinen rivi
--- a/R/lueGenePopDataPop.R
+++ b/R/lueGenePopDataPop.R
@ -3,7 +3,6 @@
 #' group. popnames are as before.
 #' @param tiedostonNimi Name of the file
 #' @return List containing data and popnames
 #' @export
 lueGenePopDataPop <- function(tiedostonNimi) {
  # Data annetaan muodossa, jossa viimeinen sarake kertoo ryhmän.
  # popnames on kuten ennenkin.
--- a/R/lueNimi.R
+++ b/R/lueNimi.R
@ -2,7 +2,6 @@
 #' @description Returns the part of the line from the beginning that is before the comma. Useful for returning the name of a GenePop area
 #' @param line line
 #' @return nimi
 #' @export
 lueNimi <- function(line) {
  # ==========================================================================
  # Validation
--- a/R/noIndex.R
+++ b/R/noIndex.R
@ -5,7 +5,6 @@
 #' @return puredata: a data contains no index column.
 #' @param data data
 #' @param noalle noalle
 #' @export
 noIndex <- function(data, noalle) {
  limit <- ifelse(is(noalle, "matrix"), ncol(noalle), length(noalle))
  if (size(data, 2) == limit + 1) {
--- a/R/ownNum2Str.R
+++ b/R/ownNum2Str.R
@ -2,7 +2,6 @@
 #' @description Converts numbers to strings
 #' @param number number
 #' @note On Matlab, if number is NaN the output is 'NaN'. Here, the output will be an error. Also, the function belo expects "number" to have length one, whereas Matlab accepts vectors.
 #' @export
 ownNum2Str <- function(number) {
  absolute <- abs(number)
  if (absolute < 1000) {
--- a/R/poistaLiianPienet.R
+++ b/R/poistaLiianPienet.R
@ -5,7 +5,6 @@
 #' @param npops npops
 #' @param rowsFromInd rowsFromInd
 #' @param alaraja alaraja
 #' @export
 poistaLiianPienet <- function(npops, rowsFromInd, alaraja) {
  popSize <- zeros(1, npops)
  if (npops > 0) {
--- a/R/poistaTyhjatPopulaatiot.R
+++ b/R/poistaTyhjatPopulaatiot.R
@ -1,13 +1,13 @@
 poistaTyhjatPopulaatiot <- function(npops) {
  # % Poistaa tyhjentyneet populaatiot COUNTS:ista ja
  # % SUMCOUNTS:ista. P<>ivitt<74><74> npops:in ja PARTITION:in.
-  notEmpty <- matlab2r::find(any(SUMCOUNTS, 2))
+  notEmpty <- matlab2r::find(apply(SUMCOUNTS, 1, function(x) any(x > 0)))
  COUNTS <- COUNTS[, , notEmpty]
  SUMCOUNTS <- SUMCOUNTS[notEmpty, ]
  LOGDIFF <- LOGDIFF[, notEmpty]
  for (n in 1:length(notEmpty)) {
-    apu <- matlab2r::find(PARTITION == notEmpty(n))
+    apu <- matlab2r::find(PARTITION == notEmpty[n])
    PARTITION[apu] <- n
  }
  npops <- length(notEmpty)
--- a/R/proportion2str.R
+++ b/R/proportion2str.R
@ -3,7 +3,6 @@
 #' @return a 4-mark presentation of proportion
 #' @note The `round` function in R, being ISO-compliant, rounds 8.5 to 8. The
 #' Matlab equivalent rounds it to 9.
 #' @export
 proportion2str <- function(prob) {
  if (abs(prob) < 1e-3) {
    str <- "0.00"
--- a/R/randdir.R
+++ b/R/randdir.R
@ -1,10 +1,9 @@
 #' @title Generates random numbers
 #' @return vector of length `nc` with r.v. realizations from Gamma(rate=1)
-#' @examples randdir(matrix(c(10, 30, 60), 3), 3)
+#' @examples rBAPS:::randdir(matrix(c(10, 30, 60), 3), 3)
 #' @param counts shape parameter
 #' @param nc number of rows on output
 #' @seealso randga
 #' @export
 randdir <- function(counts, nc) {
  svar <- zeros(nc, 1)
  for (i in 1:nc) {
--- a/R/rivinSisaltamienMjonojenLkm.R
+++ b/R/rivinSisaltamienMjonojenLkm.R
@ -2,7 +2,6 @@
 #' @param line line number
 #' @return count
 #' @description Returns the number of queues contained in the line. There must be a space between the queues.
 #' @export
 rivinSisaltamienMjonojenLkm <- function(line) {
  # Palauttaa line:n sis<69>lt<6C>mien mjonojen lukum<75><6D>r<EFBFBD>n.
  # Mjonojen v<>liss?t<>ytyy olla v<>lily<6C>nti.
--- a/R/selvitaDigitFormat.R
+++ b/R/selvitaDigitFormat.R
@ -1,7 +1,6 @@
 #' @title Find out the Digit Format
 #' @param line the first line after the "pop" word from data in Genepop format. #  @note Function clarified based on the line format whether the alleles of the data are given using 2 or 3 numbers.
 #' @return df
 #' @export
 selvitaDigitFormat <- function(line) {
  # line on ensimm<6D>inen pop-sanan j<>lkeinen rivi
  # Genepop-formaatissa olevasta datasta. funktio selvitt<74><74>
--- a/R/simulateAllFreqs.R
+++ b/R/simulateAllFreqs.R
@ -2,8 +2,6 @@
 #' @description Lisää jokaista alleelia joka populaation joka lokukseen j1/noalle(j) verran. Näin saatuja counts:eja vastaavista Dirichlet-jakaumista simuloidaan arvot populaatioiden alleelifrekvensseille.
 #' Add each allele to each locus in each population by j 1 / noalle(j). The Dirichlet distributions corresponding to the counts thus obtained simulate values for the allele frequencies of the populations.
 #' @param noalle noalle
 #' @export
 simulateAllFreqs <- function(noalle) {
  if (isGlobalEmpty(COUNTS)) {
    max_noalle <- 0
--- a/R/simulateIndividuals.R
+++ b/R/simulateIndividuals.R
@ -6,8 +6,6 @@
 #' @param allfreqs allfreqs
 #' @param pop pop
 #' @param missing_level missing_level
 #' @export
 simulateIndividuals <- function(n, rowsFromInd, allfreqs, pop, missing_level) {
  nloci <- size(allfreqs, 2)
--- a/R/simuloiAlleeli.R
+++ b/R/simuloiAlleeli.R
@ -5,8 +5,6 @@
 #' @param allfreqs allfreqa
 #' @param pop pop
 #' @param loc loc
 #' @export
 simuloiAlleeli <- function(allfreqs, pop, loc) {
  if (length(dim(allfreqs)) == 0) {
    freqs <- 1
--- a/R/suoritaMuutos.R
+++ b/R/suoritaMuutos.R
@ -3,7 +3,6 @@
 #' @param osuusTaulu Percentage table?
 #' @param osuus percentage?
 #' @param indeksi index
 #' @export
 suoritaMuutos <- function(osuusTaulu, osuus, indeksi) {
  if (isGlobalEmpty(COUNTS)) {
    npops <- 1
--- a/R/takeLine.R
+++ b/R/takeLine.R
@ -3,7 +3,6 @@
 #' @param description description
 #' @param width width
 #' @return newline
 #' @export
 takeLine <- function(description, width) {
  # Returns one line from the description: line ends to the first
  # space after width:th mark.
--- a/R/testaaKoordinaatit.R
+++ b/R/testaaKoordinaatit.R
@ -3,7 +3,6 @@
 #' @param coordinates coordinates
 #' @param interactive prompt user for relevant questions during execution
 #' @return a list of defectives ("viallinen") and coordinates
 #' @export
 testaaKoordinaatit <- function(ninds, coordinates, interactive = TRUE) {
 # Testaa onko koordinaatit kunnollisia.
 # modified by Lu Cheng, 05.12.2012
--- a/R/testaaOnkoKunnollinenBapsData.R
+++ b/R/testaaOnkoKunnollinenBapsData.R
@ -2,7 +2,6 @@
 #' @description Test if loaded BAPS data is proper
 #' @param data dataset
 #' @return ninds
 #' @export
 testaaOnkoKunnollinenBapsData <- function(data) {
  # Tarkastaa onko viimeisess?sarakkeessa kaikki
  # luvut 1,2,...,n johonkin n:<3A><>n asti.
--- a/R/testaaPop.R
+++ b/R/testaaPop.R
@ -3,7 +3,6 @@
 #' @param rivi Line
 #' @return pal = 1 if the line starts with one of the following
 # letter combinations: Pop, pop, POP. In all others cases, pal = 0
 #' @export
 testaaPop <- function(rivi) {
  # pal=1, mik<69>li rivi alkaa jollain seuraavista
  # kirjainyhdistelmist? Pop, pop, POP. Kaikissa muissa
--- a/R/writeMixtureInfo.R
+++ b/R/writeMixtureInfo.R
@ -10,7 +10,6 @@
 #' @param partitionSummary partitionSummary
 #' @param popnames popnames
 #' @param fixedK fixedK
 #' @export
 writeMixtureInfo <- function(
  logml, rowsFromInd, data, adjprior, priorTerm, outPutFile, inputFile,
  partitionSummary, popnames, fixedK
@ -27,17 +26,18 @@ writeMixtureInfo <- function(
    fid <- load(outPutFile)
  } else {
    fid <- -1
-    # TODO: replace sink with option that will record input and output
+    outPutFile <- file.path(tempdir(), "baps4_output.baps")
-    sink("baps4_output.baps", split = TRUE) # save in text anyway.
+    message("Output saved to", outPutFile)
    sink(outPutFile, split = TRUE) # save in text anyway.
  }
  dispLine()
-  cat("RESULTS OF INDIVIDUAL LEVEL MIXTURE ANALYSIS:")
+  cat("RESULTS OF INDIVIDUAL LEVEL MIXTURE ANALYSIS:\n")
-  cat(c("Data file: ", inputFile))
+  cat("Data file: ", inputFile, "\n")
-  cat("Model: independent")
+  cat("Model: independent\n")
-  cat(c("Number of clustered individuals: ", ownNum2Str(ninds)))
+  cat("Number of clustered individuals: ", ownNum2Str(ninds), "\n")
-  cat(c("Number of groups in optimal partition: ", ownNum2Str(npops)))
+  cat("Number of groups in optimal partition: ", ownNum2Str(npops), "\n")
-  cat(c("Log(marginal likelihood) of optimal partition: ", ownNum2Str(logml)))
+  cat("Log(marginal likelihood) of optimal partition: ", ownNum2Str(logml), "\n")
  cat(" ")
  if (fid != -1) {
    append(fid, "RESULTS OF INDIVIDUAL LEVEL MIXTURE ANALYSIS:\n")
@ -88,10 +88,10 @@ writeMixtureInfo <- function(
        "Cluster ", as.character(m), ": {", as.character(indsInM[1])
      )
      for (k in 2:cluster_size) {
-        text <- c(text, ", ", as.character(indsInM[k]))
+        text <- c(text, ",", as.character(indsInM[k]))
      }
    }
-    text <- c(text, "}")
+    text <- c(text, "}\n")
    while (length(text) > 58) {
      # Take one line and display it.
      new_line <- takeLine(text, 58)
@ -107,7 +107,7 @@ writeMixtureInfo <- function(
        text <- ""
      }
    }
-    if (text != "") {
+    if (any(text != "")) {
      cat(text)
      if (fid != -1) {
        append(fid, text)
@ -117,11 +117,11 @@ writeMixtureInfo <- function(
  }
  if (npops > 1) {
-    cat(" ")
+    cat("\n")
-    cat(" ")
+    cat("\n")
    cat(
      "Changes in log(marginal likelihood)",
-      " if indvidual i is moved to group j:"
+      " if indvidual i is moved to group j:\n"
    )
    if (fid != -1) {
      append(fid, " ")
@ -132,7 +132,7 @@ writeMixtureInfo <- function(
        fid,
        c(
          "Changes in log(marginal likelihood)",
-          "if indvidual i is moved to group j:"
+          "if indvidual i is moved to group j:\n"
        )
      )
      append(fid, "\n")
@ -168,9 +168,9 @@ writeMixtureInfo <- function(
      if (names) {
        nimi <- as.character(popnames[ind])
-        rivi <- c(blanks(maxSize - length(nimi)), nimi, ":")
+        rivi <- c(blanks(maxSize - length(nimi)), nimi, ":\n")
      } else {
-        rivi <- c(blanks(4 - floor(log10(ind))), ownNum2Str(ind), ":")
+        rivi <- c("\n", blanks(4 - floor(log10(ind))), ownNum2Str(ind), ":\n")
      }
      for (j in 1:npops) {
        rivi <- c(rivi, "  ", logml2String(omaRound(muutokset[j])))
@ -182,9 +182,9 @@ writeMixtureInfo <- function(
      }
    }
-    cat(" ")
+    cat("\n")
-    cat(" ")
+    cat("\n")
-    cat("KL-divergence matrix in PHYLIP format:")
+    cat("KL-divergence matrix in PHYLIP format:\n")
    dist_mat <- zeros(npops, npops)
    if (fid != -1) {
@ -194,6 +194,7 @@ writeMixtureInfo <- function(
      append(fid, "\n")
    }
    COUNTS <- COUNTS[seq_len(nrow(adjprior)), seq_len(ncol(adjprior)), , drop = FALSE]
    maxnoalle <- size(COUNTS, 1)
    nloci <- size(COUNTS, 2)
    d <- zeros(maxnoalle, nloci, npops)
@ -205,8 +206,8 @@ writeMixtureInfo <- function(
    prior[1, nollia] <- 1
    for (pop1 in 1:npops) {
-      d[, , pop1] <- (squeeze(COUNTS[, , pop1]) + prior) /
+      squeezed_COUNTS_prior <- squeeze(COUNTS[, , pop1]) + prior
-        repmat(sum(squeeze(COUNTS[, , pop1]) + prior), c(maxnoalle, 1))
+      d[, , pop1] <- squeezed_COUNTS_prior / sum(squeezed_COUNTS_prior)
    }
    ekarivi <- as.character(npops)
    cat(ekarivi)
@ -216,14 +217,14 @@ writeMixtureInfo <- function(
    }
    for (pop1 in 1:npops) {
-      for (pop2 in 1:(pop1 - 1)) {
+      for (pop2 in seq_len(pop1 - 1)) {
        dist1 <- d[, , pop1]
        dist2 <- d[, , pop2]
        div12 <- sum(
-          sum(dist1 * log2((dist1 + 10^-10) / (dist2 + 10^-10)))
+          sum(dist1 * base::log2((dist1 + 10^-10) / (dist2 + 10^-10)))
        ) / nloci
        div21 <- sum(
-          sum(dist2 * log2((dist2 + 10^-10) / (dist1 + 10^-10)))
+          sum(dist2 * base::log2((dist2 + 10^-10) / (dist1 + 10^-10)))
        ) / nloci
        div <- (div12 + div21) / 2
        dist_mat[pop1, pop2] <- div
@ -233,9 +234,9 @@ writeMixtureInfo <- function(
    dist_mat <- dist_mat + t(dist_mat) # make it symmetric
    for (pop1 in 1:npops) {
-      rivi <- c("Cluster_", as.character(pop1), " ")
+      rivi <- c("\nCluster_", as.character(pop1), "\n")
      for (pop2 in 1:npops) {
-        rivi <- c(rivi, kldiv2str(dist_mat[pop1, pop2]), " ")
+        rivi <- c(rivi, kldiv2str(dist_mat[pop1, pop2]))
      }
      cat(rivi)
      if (fid != -1) {
@ -245,11 +246,11 @@ writeMixtureInfo <- function(
    }
  }
-  cat(" ")
+  cat("\n")
-  cat(" ")
+  cat("\n")
  cat(
    "List of sizes of 10 best visited partitions",
-    "and corresponding log(ml) values"
+    "and corresponding log(ml) values\n"
  )
  if (fid != -1) {
@ -279,7 +280,7 @@ writeMixtureInfo <- function(
    line <- c(
      as.character(partitionSummary[part, 1]),
      "    ",
-      as.character(partitionSummary(part, 2))
+      as.character(partitionSummary[part, 2])
    )
    cat(line)
    if (fid != -1) {
@ -289,9 +290,9 @@ writeMixtureInfo <- function(
  }
  if (!fixedK) {
-    cat(" ")
+    cat("\n")
-    cat(" ")
+    cat("\n")
-    cat("Probabilities for number of clusters")
+    cat("Probabilities for number of clusters\n")
    if (fid != -1) {
      append(fid, " ")
@ -323,7 +324,7 @@ writeMixtureInfo <- function(
        line <- c(
          as.character(npopsTaulu[i]), "   ", as.character(probs[i])
        )
-        cat(line)
+        cat(line, "\n")
        if (fid != -1) {
          append(fid, line)
          append(fid, "\n")
@ -331,5 +332,9 @@ writeMixtureInfo <- function(
      }
    }
  }
  # Closing sink(s)
  while (sink.number() > 0L) {
    sink()
  }
  return(changesInLogml)
 }
--- a/R/writeMixtureInfoPop.R
+++ b/R/writeMixtureInfoPop.R
@ -10,7 +10,6 @@
 #' @param partitionSummary partitionSummary
 #' @param popnames popnames
 #' @param fixedK fixedK
 #' @export
 writeMixtureInfoPop <- function(logml, rows, data, adjprior, priorTerm,
                                outPutFile, inputFile, partitionSummary,
                                popnames, fixedK) {
--- a/inst/extdata/BAPS_format_clustering_diploid.txt
+++ b/inst/extdata/BAPS_format_clustering_diploid.txt
--- a/inst/extdata/FASTA_clustering_haploid.fasta
+++ b/inst/extdata/FASTA_clustering_haploid.fasta
--- a/inst/extdata/bam_example.bam
+++ b/inst/extdata/bam_example.bam
--- a/inst/extdata/sam_example.sam
+++ b/inst/extdata/sam_example.sam
--- a/inst/extdata/seqs.fa
+++ b/inst/extdata/seqs.fa
--- a/inst/extdata/vcf_example.vcf
+++ b/inst/extdata/vcf_example.vcf
--- a/inst/testdata/ExamplesDataFormatting/Example
+++ b/inst/testdata/ExamplesDataFormatting/Example
--- a/inst/testdata/ExamplesDataFormatting/Example
+++ b/inst/testdata/ExamplesDataFormatting/Example
--- a/inst/testdata/ExamplesDataFormatting/Example
+++ b/inst/testdata/ExamplesDataFormatting/Example
--- a/inst/testdata/ExamplesDataFormatting/Example
+++ b/inst/testdata/ExamplesDataFormatting/Example
--- a/inst/testdata/ExamplesDataFormatting/Example
+++ b/inst/testdata/ExamplesDataFormatting/Example
--- a/inst/testdata/ExamplesDataFormatting/Example
+++ b/inst/testdata/ExamplesDataFormatting/Example
--- a/inst/testdata/ExamplesDataFormatting/Example
+++ b/inst/testdata/ExamplesDataFormatting/Example
--- a/inst/testdata/ExamplesDataFormatting/Example
+++ b/inst/testdata/ExamplesDataFormatting/Example
--- a/inst/testdata/ExamplesDataFormatting/Example
+++ b/inst/testdata/ExamplesDataFormatting/Example
--- a/inst/testdata/ExamplesDataFormatting/Example
+++ b/inst/testdata/ExamplesDataFormatting/Example
--- a/inst/testdata/ExamplesDataFormatting/Example
+++ b/inst/testdata/ExamplesDataFormatting/Example
--- a/inst/testdata/ExamplesDataFormatting/Example
+++ b/inst/testdata/ExamplesDataFormatting/Example
--- a/inst/testdata/ExamplesDataFormatting/Example
+++ b/inst/testdata/ExamplesDataFormatting/Example
--- a/inst/testdata/ExamplesDataFormatting/Example
+++ b/inst/testdata/ExamplesDataFormatting/Example
--- a/inst/testdata/ExamplesDataFormatting/Example
+++ b/inst/testdata/ExamplesDataFormatting/Example
--- a/inst/testdata/ExamplesDataFormatting/Example
+++ b/inst/testdata/ExamplesDataFormatting/Example
--- a/inst/testdata/ExamplesDataFormatting/Example
+++ b/inst/testdata/ExamplesDataFormatting/Example
--- a/inst/testdata/ExamplesDataFormatting/Example
+++ b/inst/testdata/ExamplesDataFormatting/Example
--- a/inst/testdata/ExamplesDataFormatting/Example
+++ b/inst/testdata/ExamplesDataFormatting/Example
--- a/inst/testdata/ExamplesDataFormatting/Example
+++ b/inst/testdata/ExamplesDataFormatting/Example
--- a/inst/testdata/ExamplesDataFormatting/Example
+++ b/inst/testdata/ExamplesDataFormatting/Example
--- a/inst/testdata/ExamplesDataFormatting/Example
+++ b/inst/testdata/ExamplesDataFormatting/Example
--- a/inst/testdata/ExamplesDataFormatting/Example
+++ b/inst/testdata/ExamplesDataFormatting/Example
--- a/inst/testdata/ExamplesDataFormatting/Example
+++ b/inst/testdata/ExamplesDataFormatting/Example
--- a/inst/testdata/ExamplesDataFormatting/Example
+++ b/inst/testdata/ExamplesDataFormatting/Example
--- a/inst/testdata/ExamplesDataFormatting/Example
+++ b/inst/testdata/ExamplesDataFormatting/Example
--- a/inst/testdata/ExamplesDataFormatting/Notes.md
+++ b/inst/testdata/ExamplesDataFormatting/Notes.md
--- a/inst/testdata/datasets-for-future-releases/GENEPOP_format_trained_clustering.gen
+++ b/inst/testdata/datasets-for-future-releases/GENEPOP_format_trained_clustering.gen
--- a/inst/testdata/datasets-for-future-releases/example_drosophila.Aligned.out.sam
+++ b/inst/testdata/datasets-for-future-releases/example_drosophila.Aligned.out.sam
--- a/man/greedyMix.Rd
+++ b/man/greedyMix.Rd
@ -4,18 +4,56 @@
 \alias{greedyMix}
 \title{Clustering of individuals}
 \usage{
-greedyMix(data, format, verbose = TRUE)
+greedyMix(
  data,
  format,
  partitionCompare = NULL,
  ninds = 1L,
  npops = 1L,
  counts = NULL,
  sumcounts = NULL,
  max_iter = 100L,
  alleleCodes = NULL,
  inp = NULL,
  popnames = NULL,
  fixedK = FALSE,
  verbose = FALSE
 )
 }
 \arguments{
 \item{data}{data file}
 \item{format}{Data format. Format supported: "FASTA", "VCF" ,"BAM", "GenePop"}
 \item{partitionCompare}{a list of partitions to compare}
 \item{ninds}{number of individuals}
 \item{npops}{number of populations}
 \item{counts}{counts}
 \item{sumcounts}{sumcounts}
 \item{max_iter}{maximum number of iterations}
 \item{alleleCodes}{allele codes}
 \item{inp}{input file}
 \item{popnames}{population names}
 \item{fixedK}{if \code{TRUE}, the number of populations is fixed}
 \item{verbose}{if \code{TRUE}, prints extra output information}
 }
 \description{
 Clustering of individuals
 }
 \examples{
 data <- system.file("extdata", "FASTA_clustering_haploid.fasta", package = "rBAPS")
 greedyMix(data, "fasta")
 }
 \references{
 Samtools: a suite of programs for interacting
 with high-throughput sequencing data. <http://www.htslib.org/>
--- a/man/handleData.Rd
+++ b/man/handleData.Rd
@ -4,10 +4,12 @@
 \alias{handleData}
 \title{Handle Data}
 \usage{
-handleData(raw_data)
+handleData(raw_data, format = "Genepop")
 }
 \arguments{
-\item{raw_data}{Raw data}
+\item{raw_data}{Raw data in Genepop or BAPS format}
 \item{format}{data format}
 }
 \description{
 Handle Data
@ -20,5 +22,5 @@ After this function. Add blank lines for individuals with fewer rows as
 maximum. If the code of an allele is = 0, the function changes that allele
 code to the smallest code that is larger than any code in use. After this,
 the function changes the allele codes so that one locus j
-codes get values between? 1, ..., Noah (j).
+codes get values between? 1, ..., noalle(j).
 }
--- a/man/importFile.Rd
+++ b/man/importFile.Rd
@ -0,0 +1,26 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/importFile.R
 \name{importFile}
 \alias{importFile}
 \title{Import data file}
 \usage{
 importFile(data, format, verbose)
 }
 \arguments{
 \item{data}{raw dataset}
 \item{format}{data format (guesses from extension if not provided)}
 \item{verbose}{if \code{TRUE}, prints extra output information}
 }
 \value{
 The data in a format that can be used by the other functions
 }
 \description{
 Imports data from several formats (FASTA, VCF, SAM, BAM,
 Genepop).
 }
 \examples{
 path_inst <- system.file("extdata", "", package = "rBAPS")
 importFile(file.path(path_inst, "FASTA_clustering_haploid.fasta"))
 }
--- a/man/load_fasta.Rd
+++ b/man/load_fasta.Rd
@ -4,12 +4,15 @@
 \alias{load_fasta}
 \title{load_fasta}
 \usage{
-load_fasta(msa, keep.singletons = FALSE)
+load_fasta(msa, keep_singletons = FALSE, output_numbers = TRUE)
 }
 \arguments{
 \item{msa}{Either the location of a fasta file or ape DNAbin object containing the multiple sequence alignment data to be clustered}
-\item{keep.singletons}{A logical indicating whether to consider singleton mutations in calculating the clusters}
+\item{keep_singletons}{A logical indicating whether to consider singleton mutations in calculating the clusters}
 \item{output_numbers}{A logical indicating whether to output the data as
 numbers (TRUE) or letters (FALSE)}
 }
 \value{
 A character matrix with filtered SNP data
@ -19,8 +22,8 @@ Loads a fasta file into matrix format ready for
 running the hierBAPS algorithm.
 }
 \examples{
-msa <- system.file("ext", "seqs.fa", package = "rBAPS")
+msa <- system.file("extdata", "seqs.fa", package = "rBAPS")
-snp.matrix <- load_fasta(msa)
+snp.matrix <- rBAPS:::load_fasta(msa)
 }
 \seealso{
 rhierbaps::load_fasta
--- a/man/randdir.Rd
+++ b/man/randdir.Rd
@ -18,7 +18,7 @@ vector of length `nc` with r.v. realizations from Gamma(rate=1)
 Generates random numbers
 }
 \examples{
-randdir(matrix(c(10, 30, 60), 3), 3)
+rBAPS:::randdir(matrix(c(10, 30, 60), 3), 3)
 }
 \seealso{
 randga
--- a/tests/testthat/test-greedyMix.R
+++ b/tests/testthat/test-greedyMix.R
@ -1,11 +1,11 @@
 context("Auxiliary functions to greedyMix")
 # Defining the relative path to current inst -----------------------------------
-path_inst <- system.file("ext", "", package = "rBAPS")
+path_inst <- system.file("extdata", "", package = "rBAPS")
 # Reading datasets -------------------------------------------------------------
 baps_diploid <- read.delim(
-  file = paste(path_inst, "BAPS_format_clustering_diploid.txt", sep = "/"),
+  file = file.path(path_inst, "BAPS_format_clustering_diploid.txt"),
  sep = " ",
  header = FALSE
 )
@ -31,26 +31,27 @@ test_that("handleData works as expected", {
  expect_equal(data_obs, data_exp)
 })
-context("Opening files on greedyMix")
+context("Processing files through greedyMix")
-df_fasta <- greedyMix(
+raw_fasta <- importFile(
  data   = file.path(path_inst, "FASTA_clustering_haploid.fasta"),
  format = "FASTA"
 )
-df_vcf <- greedyMix(
+raw_vcf <- importFile(
  data    = file.path(path_inst, "vcf_example.vcf"),
  format  = "VCF",
  verbose = FALSE
 )
-df_bam <- greedyMix(
+df_bam <- importFile(
  data    = file.path(path_inst, "bam_example.bam"),
  format  = "BAM",
 )
 test_that("Files are imported correctly", {
-  expect_equal(dim(df_fasta), c(5, 99))
+  expect_equal(dim(raw_fasta), c(5, 99))
-  expect_equal(dim(df_vcf), c(variants = 2, fix_cols = 8, gt_cols = 3))
+  expect_equal(dim(raw_vcf), c(variants = 2, fix_cols = 8, gt_cols = 3))
  expect_error(
-    greedyMix(
+    importFile(
      data    = paste(path_inst, "sam_example.sam", sep = "/"),
      format  = "SAM",
    )
@ -58,6 +59,15 @@ test_that("Files are imported correctly", {
  expect_equal(length(df_bam[[1]]), 13)
 })
 df_fasta <- greedyMix(
  data   = file.path(path_inst, "FASTA_clustering_haploid.fasta"),
  format = "FASTA"
 )
 test_that("greedyMix() works", {
  expect_error(greedyMix(file.path(path_inst, "vcf_example.vcf")))
  expect_error(greedyMix(file.path(path_inst, "bam_example.bam")))
 })
 context("Linkage")
 test_that("Linkages are properly calculated", {
--- a/tests/testthat/test-spatialMixture.R
+++ b/tests/testthat/test-spatialMixture.R
@ -79,7 +79,7 @@ test_that("lakseKlitik() and subfunctions produce expected output", {
 })
 test_that("testFastaData() produces same output as on MATLAB", {
-  msa <- system.file("ext", "seqs.fa", package = "rBAPS")
+  msa <- system.file("extdata", "seqs.fa", package = "rBAPS")
  test_msa <- testFastaData(msa)
  expect_equal(test_msa$ninds, 515)
  expect_equal(dim(test_msa$data), c(515, 745))