Merge branch 'issue-25' into develop

* issue-25: (38 commits)
  Adjusted unit tests for #25
  Fixed sink() usage
  Fixed docs
  Exporting importFile()
  Improved handling of supported formats for greedyMix() (#25)
  Fixed basic parsing of FASTA files (#25)
  Increment version number to 0.0.0.9022
  Fixed syntax (#25)
  Improved printing (#25)
  Partial reversion of b034158 (#25)
  Fixed to indMix (#25)
  Incorporated handleData() on greedyMix() (#25)
  Improved handleData() to handle FASTA (#25)
  Added numeric output option to load_fasta() (#25)
  Fixed test text (#25)
  Added missing documentation for arguments (#25)
  Syntax fix (#25)
  Delayed resolution of FIXMEs (#25)
  Workaround for usage of MATLAB any() (#25)
  Fixed argument passing (#25)
  ...
This commit is contained in:
Waldir Leoncio 2023-09-11 13:15:55 +02:00
commit 59fbb0a167
95 changed files with 45170 additions and 45032 deletions

View file

@ -4,7 +4,6 @@ PITFALLS.md
CHANGELOG.md CHANGELOG.md
CITATION.cff CITATION.cff
.travis.yml .travis.yml
inst/ext/ExamplesDataFormatting inst/testdata
.github .github
aux aux

View file

@ -1,6 +1,6 @@
Package: rBAPS Package: rBAPS
Title: Bayesian Analysis of Population Structure Title: Bayesian Analysis of Population Structure
Version: 0.0.0.9020 Version: 0.0.0.9022
Date: 2020-11-09 Date: 2020-11-09
Authors@R: Authors@R:
c( c(

View file

@ -1,45 +1,8 @@
# Generated by roxygen2: do not edit by hand # Generated by roxygen2: do not edit by hand
export(addAlleles)
export(admix1)
export(calculatePopLogml)
export(computeAllFreqs2)
export(computeIndLogml)
export(computePersonalAllFreqs)
export(computeRows)
export(etsiParas)
export(fgetl)
export(fopen)
export(greedyMix) export(greedyMix)
export(greedyPopMix)
export(handleData) export(handleData)
export(handlePopData) export(importFile)
export(initPopNames)
export(learn_partition_modified)
export(learn_simple_partition)
export(linkage)
export(load_fasta)
export(logml2String)
export(lueGenePopData)
export(lueGenePopDataPop)
export(lueNimi)
export(noIndex)
export(ownNum2Str)
export(poistaLiianPienet)
export(proportion2str)
export(randdir)
export(rivinSisaltamienMjonojenLkm)
export(selvitaDigitFormat)
export(simulateAllFreqs)
export(simulateIndividuals)
export(simuloiAlleeli)
export(suoritaMuutos)
export(takeLine)
export(testaaKoordinaatit)
export(testaaOnkoKunnollinenBapsData)
export(testaaPop)
export(writeMixtureInfo)
export(writeMixtureInfoPop)
importFrom(R6,R6Class) importFrom(R6,R6Class)
importFrom(Rsamtools,scanBam) importFrom(Rsamtools,scanBam)
importFrom(adegenet,.readExt) importFrom(adegenet,.readExt)

6
NEWS.md Normal file
View file

@ -0,0 +1,6 @@
# rBAPS (development version)
# rBAPS 0.0.0.9021
* Added a `NEWS.md` file to track changes to the package.
* Exported `greedyMix()` and `load_fasta()` functions.

View file

@ -4,7 +4,6 @@
#' @param line line #' @param line line
#' @param divider divider #' @param divider divider
#' @return data (after alleles were added) #' @return data (after alleles were added)
#' @export
addAlleles <- function(data, ind, line, divider) { addAlleles <- function(data, ind, line, divider) {
# Lisaa BAPS-formaatissa olevaan datataulukkoon # Lisaa BAPS-formaatissa olevaan datataulukkoon
# yksil<69><6C> ind vastaavat rivit. Yksil<69>n alleelit # yksil<69><6C> ind vastaavat rivit. Yksil<69>n alleelit

View file

@ -6,7 +6,6 @@
#' alleleCodes, adjprior, popnames, rowsFromInd, data, npops, noalle #' alleleCodes, adjprior, popnames, rowsFromInd, data, npops, noalle
#' @param tietue tietue #' @param tietue tietue
#' @importFrom methods is #' @importFrom methods is
#' @export
admix1 <- function(tietue) { admix1 <- function(tietue) {
if (!is.list(tietue)) { if (!is.list(tietue)) {
message("Load mixture result file. These are the files in this directory:") message("Load mixture result file. These are the files in this directory:")

View file

@ -4,7 +4,6 @@
#' for the mean parameter. #' for the mean parameter.
#' @param points points #' @param points points
#' @param fii fii #' @param fii fii
#' @export
calculatePopLogml <- function(points, fii) { calculatePopLogml <- function(points, fii) {
n <- length(points) n <- length(points)
fuzzy_ones <- sum(points) fuzzy_ones <- sum(points)

23
R/comparePartitions.R Normal file
View file

@ -0,0 +1,23 @@
comparePartitions <- function(data, c.rows, partitionCompare.partitions, ninds, rowsFromInd, noalle, adjprior) {
stop("Comparing partitions not yet implemented") # TODO: implement
# nsamplingunits = size(c.rows,1);
# partitions = partitionCompare.partitions;
# npartitions = size(partitions,2);
# partitionLogml = zeros(1,npartitions);
# for i = 1:npartitions
# % number of unique partition lables
# npops = length(unique(partitions(:,i)));
# partitionInd = zeros(ninds*rowsFromInd,1);
# partitionSample = partitions(:,i);
# for j = 1:nsamplingunits
# partitionInd([c.rows(j,1):c.rows(j,2)]) = partitionSample(j);
# end
# partitionLogml(i) = initialCounts(partitionInd, data(:,1:end-1), npops, c.rows, noalle, adjprior);
# end
# % return the logml result
# partitionCompare.logmls = partitionLogml;
# set(h1, 'userdata', partitionCompare);
# return
}

View file

@ -2,7 +2,6 @@
#' @description Lisää a priori jokaista alleelia joka populaation joka lokukseen #' @description Lisää a priori jokaista alleelia joka populaation joka lokukseen
#' j 1/noalle(j) verran. #' j 1/noalle(j) verran.
#' @param noalle noalle #' @param noalle noalle
#' @export
computeAllFreqs2 <- function(noalle) { computeAllFreqs2 <- function(noalle) {
COUNTS <- ifelse(isGlobalEmpty(COUNTS), vector(), COUNTS) COUNTS <- ifelse(isGlobalEmpty(COUNTS), vector(), COUNTS)
SUMCOUNTS <- ifelse(isGlobalEmpty(SUMCOUNTS), vector(), COUNTS) SUMCOUNTS <- ifelse(isGlobalEmpty(SUMCOUNTS), vector(), COUNTS)

View file

@ -3,7 +3,6 @@
#' määritellyiksi kuten osuusTaulu:ssa. #' määritellyiksi kuten osuusTaulu:ssa.
#' @param omaFreqs own Freqs? #' @param omaFreqs own Freqs?
#' @param osuusTaulu Percentage table? #' @param osuusTaulu Percentage table?
#' @export
computeIndLogml <- function(omaFreqs, osuusTaulu) { computeIndLogml <- function(omaFreqs, osuusTaulu) {
omaFreqs <- as.matrix(omaFreqs) omaFreqs <- as.matrix(omaFreqs)
osuusTaulu <- as.matrix(osuusTaulu) osuusTaulu <- as.matrix(osuusTaulu)

View file

@ -7,8 +7,6 @@
#' @param data data #' @param data data
#' @param allFreqs allFreqs #' @param allFreqs allFreqs
#' @param rowsFromInd rowsFromInd #' @param rowsFromInd rowsFromInd
#' @export
computePersonalAllFreqs <- function(ind, data, allFreqs, rowsFromInd) { computePersonalAllFreqs <- function(ind, data, allFreqs, rowsFromInd) {
if (isGlobalEmpty(COUNTS)) { if (isGlobalEmpty(COUNTS)) {
nloci <- npops <- 1 nloci <- npops <- 1

View file

@ -4,9 +4,13 @@ computePopulationLogml <- function(pops, adjprior, priorTerm) {
# ======================================================== # # ======================================================== #
# Limiting COUNTS size # # Limiting COUNTS size #
# ======================================================== # # ======================================================== #
COUNTS <- COUNTS[ if (!is.null(adjprior)) {
seq_len(nrow(adjprior)), seq_len(ncol(adjprior)), pops, drop = FALSE nr <- seq_len(nrow(adjprior))
] nc <- seq_len(ncol(adjprior))
COUNTS <- COUNTS[nr, nc, pops, drop = FALSE]
} else {
COUNTS <- NA
}
x <- size(COUNTS, 1) x <- size(COUNTS, 1)
y <- size(COUNTS, 2) y <- size(COUNTS, 2)
@ -15,25 +19,24 @@ computePopulationLogml <- function(pops, adjprior, priorTerm) {
# ======================================================== # # ======================================================== #
# Computation # # Computation #
# ======================================================== # # ======================================================== #
isarray <- length(dim(repmat(adjprior, c(1, 1, length(pops))))) > 2 term1 <- NULL
term1 <- squeeze( if (!is.null(adjprior)) {
sum( isarray <- length(dim(repmat(adjprior, c(1, 1, length(pops))))) > 2
term1 <- squeeze(
sum( sum(
reshape( sum(
lgamma( reshape(
repmat(adjprior, c(1, 1, length(pops))) + lgamma(
COUNTS[ repmat(adjprior, c(1, 1, length(pops))) + COUNTS[nr, nc, pops, drop = !isarray]
seq_len(nrow(adjprior)), seq_len(ncol(adjprior)), pops, ),
drop = !isarray c(x, y, z)
]
), ),
c(x, y, z) 1
), ),
1 2
), )
2
) )
) }
if (is.null(priorTerm)) priorTerm <- 0 if (is.null(priorTerm)) priorTerm <- 0
popLogml <- term1 - sum(lgamma(1 + SUMCOUNTS[pops, ]), 2) - priorTerm popLogml <- term1 - sum(lgamma(1 + SUMCOUNTS[pops, ]), 2) - priorTerm
return(popLogml) return(popLogml)

View file

@ -4,7 +4,6 @@
#' @param rowsFromInd rowsFromInd #' @param rowsFromInd rowsFromInd
#' @param inds matrix #' @param inds matrix
#' @param ninds ninds #' @param ninds ninds
#' @export
computeRows <- function(rowsFromInd, inds, ninds) { computeRows <- function(rowsFromInd, inds, ninds) {
if (!is(inds, "matrix")) inds <- as.matrix(inds) if (!is(inds, "matrix")) inds <- as.matrix(inds)
if (identical(dim(inds), c(nrow(inds), 1L))) { if (identical(dim(inds), c(nrow(inds), 1L))) {

View file

@ -1,4 +1,3 @@
#' @export
#' @title Etsi Paras #' @title Etsi Paras
#' @description Search for the best? #' @description Search for the best?
#' @param osuus Percentages? #' @param osuus Percentages?

View file

@ -6,7 +6,6 @@
#' fgetl returns tline as a numeric value -1. #' fgetl returns tline as a numeric value -1.
#' @author Waldir Leoncio #' @author Waldir Leoncio
#' @seealso fopen #' @seealso fopen
#' @export
fgetl <- function(file) { fgetl <- function(file) {
# ========================================================================== # ==========================================================================
# Validation # Validation
@ -27,5 +26,4 @@ fgetl <- function(file) {
#' @return The same as `readLines(filename)` #' @return The same as `readLines(filename)`
#' @author Waldir Leoncio #' @author Waldir Leoncio
#' @seealso fgetl #' @seealso fgetl
#' @export
fopen <- function(filename) readLines(filename) fopen <- function(filename) readLines(filename)

View file

@ -1,6 +1,16 @@
#' @title Clustering of individuals #' @title Clustering of individuals
#' @param data data file #' @param data data file
#' @param format Data format. Format supported: "FASTA", "VCF" ,"BAM", "GenePop" #' @param format Data format. Format supported: "FASTA", "VCF" ,"BAM", "GenePop"
#' @param partitionCompare a list of partitions to compare
#' @param ninds number of individuals
#' @param npops number of populations
#' @param counts counts
#' @param sumcounts sumcounts
#' @param max_iter maximum number of iterations
#' @param alleleCodes allele codes
#' @param inp input file
#' @param popnames population names
#' @param fixedK if \code{TRUE}, the number of populations is fixed
#' @param verbose if \code{TRUE}, prints extra output information #' @param verbose if \code{TRUE}, prints extra output information
#' @importFrom utils read.delim #' @importFrom utils read.delim
#' @importFrom vcfR read.vcfR #' @importFrom vcfR read.vcfR
@ -9,41 +19,59 @@
#' @references Samtools: a suite of programs for interacting #' @references Samtools: a suite of programs for interacting
#' with high-throughput sequencing data. <http://www.htslib.org/> #' with high-throughput sequencing data. <http://www.htslib.org/>
#' @export #' @export
greedyMix <- function(data, format, verbose = TRUE) { #' @examples
# Parsing data format ------------------------------------------------------ #' data <- system.file("extdata", "FASTA_clustering_haploid.fasta", package = "rBAPS")
#' greedyMix(data, "fasta")
greedyMix <- function(
data, format, partitionCompare = NULL, ninds = 1L, npops = 1L,
counts = NULL, sumcounts = NULL, max_iter = 100L, alleleCodes = NULL,
inp = NULL, popnames = NULL, fixedK = FALSE, verbose = FALSE
) {
# Importing and handling data ================================================
data <- importFile(data, format, verbose)
data <- handleData(data, tolower(format))
c <- list(
noalle = data[["noalle"]],
data = data[["newData"]],
adjprior = data[["adjprior"]],
priorTerm = data[["priorTerm"]],
rowsFromInd = data[["rowsFromInd"]]
)
if (missing(format)) { # Comparing partitions =======================================================
format <- gsub(".*\\.(.+)$", "\\1", data) if (!is.null(partitionCompare)) {
message("Format not provided. Guessing from file extension: ", format) logmls <- comparePartitions(
} c[["data"]], nrow(c[["data"]]), partitionCompare[["partitions"]], ninds,
format <- tolower(format) c[["rowsFromInd"]], c[["noalle"]], c[["adjprior"]]
# Dispatching to proper loading function -----------------------------------
if (format == "fasta") {
out <- load_fasta(data)
} else if (format == "vcf") {
out <- vcfR::read.vcfR(data, verbose = verbose)
} else if (format == "sam") {
stop(
"SAM files not directly supported. ",
"Install the samtools software and execute\n\n",
"samtools view -b ", data, " > out_file.bam\n\nto convert to BAM ",
"and try running this function again with 'format=BAM'"
) )
} else if (format == "bam") {
out <- Rsamtools::scanBam(data)
} else if (format == "genepop") {
if (toupper(adegenet::.readExt(data)) == "TXT") {
message("Creating a copy of the file with the .gen extension")
dataGen <- gsub("txt", "gen", data)
file.copy(data, dataGen)
out <- adegenet::read.genepop(dataGen)
} else {
out <- adegenet::read.genepop(data)
}
} else {
stop("Format not supported.")
} }
return(out)
# Generating partition summary ===============================================
ekat <- seq(1L, c[["rowsFromInd"]], ninds * c[["rowsFromInd"]]) # ekat = (1:rowsFromInd:ninds*rowsFromInd)';
c[["rows"]] <- c(ekat, ekat + c[["rowsFromInd"]] - 1L) # c.rows = [ekat ekat+rowsFromInd-1]
logml_npops_partitionSummary <- indMixWrapper(c, npops, counts, sumcounts, max_iter, fixedK, verbose)
logml <- logml_npops_partitionSummary[["logml"]]
npops <- logml_npops_partitionSummary[["npops"]]
partitionSummary <- logml_npops_partitionSummary[["partitionSummary"]]
# Generating output object ===================================================
out <- list(
"alleleCodes" = alleleCodes, "adjprior" = c[["adjprior"]],
"popnames" = popnames, "rowsFromInd" = c[["rowsFromInd"]],
"data" = c[["data"]], "npops" = npops, "noalle" = c[["noalle"]],
"mixtureType" = "mix", "logml" = logml
)
if (logml == 1) {
return(out)
}
# Writing mixture info =======================================================
changesInLogml <- writeMixtureInfo(
logml, c[["rowsFromInd"]], c[["data"]], c[["adjprior"]], c[["priorTerm"]],
NULL, inp, partitionSummary, popnames, fixedK
)
# Updateing results ==========================================================
return(c(out, "changesInLogml" = changesInLogml))
} }

View file

@ -11,7 +11,6 @@
#' @importFrom matlab2r uiputfile #' @importFrom matlab2r uiputfile
#' @references Samtools: a suite of programs for interacting #' @references Samtools: a suite of programs for interacting
#' with high-throughput sequencing data. <http://www.htslib.org/> #' with high-throughput sequencing data. <http://www.htslib.org/>
#' @export
greedyPopMix <- function(data, format, partitionCompare = NULL, verbose = TRUE greedyPopMix <- function(data, format, partitionCompare = NULL, verbose = TRUE
) { ) {
# Replacing original file reading code with greedyMix() # Replacing original file reading code with greedyMix()

View file

@ -1,5 +1,6 @@
#' @title Handle Data #' @title Handle Data
#' @param raw_data Raw data #' @param raw_data Raw data in Genepop or BAPS format
#' @param format data format
#' @details The last column of the original data tells you from which #' @details The last column of the original data tells you from which
#' individual that line is from. The function first examines how many line #' individual that line is from. The function first examines how many line
#' maximum is from one individual giving know if it is haploid, diploid, etc. #' maximum is from one individual giving know if it is haploid, diploid, etc.
@ -7,9 +8,9 @@
#' maximum. If the code of an allele is = 0, the function changes that allele #' maximum. If the code of an allele is = 0, the function changes that allele
#' code to the smallest code that is larger than any code in use. After this, #' code to the smallest code that is larger than any code in use. After this,
#' the function changes the allele codes so that one locus j #' the function changes the allele codes so that one locus j
#' codes get values between? 1, ..., Noah (j). #' codes get values between? 1, ..., noalle(j).
#' @export #' @export
handleData <- function(raw_data) { handleData <- function(raw_data, format = "Genepop") {
# Alkuper?isen datan viimeinen sarake kertoo, milt?yksil?lt? # Alkuper?isen datan viimeinen sarake kertoo, milt?yksil?lt?
# kyseinen rivi on per?isin. Funktio tutkii ensin, ett?montako # kyseinen rivi on per?isin. Funktio tutkii ensin, ett?montako
# rivi?maksimissaan on per?isin yhdelt?yksil?lt? jolloin saadaan # rivi?maksimissaan on per?isin yhdelt?yksil?lt? jolloin saadaan
@ -20,28 +21,29 @@ handleData <- function(raw_data) {
# koodi pienimm?ksi koodiksi, joka isompi kuin mik??n k?yt?ss?oleva koodi. # koodi pienimm?ksi koodiksi, joka isompi kuin mik??n k?yt?ss?oleva koodi.
# T?m?n j?lkeen funktio muuttaa alleelikoodit siten, ett?yhden lokuksen j # T?m?n j?lkeen funktio muuttaa alleelikoodit siten, ett?yhden lokuksen j
# koodit saavat arvoja v?lill?1,...,noalle(j). # koodit saavat arvoja v?lill?1,...,noalle(j).
nloci <- switch(
tolower(format),
"genepop" = ncol(raw_data) - 1L,
"baps" = ncol(raw_data) - 1L,
"fasta" = ncol(raw_data),
"vcf" = stop("VCF format not supported for processing yet"),
"bam" = stop("BAM format not supported for processing yet")
)
data <- as.matrix(raw_data) data <- as.matrix(raw_data)
nloci <- size(raw_data, 2) - 1
dataApu <- data[, 1:nloci] dataApu <- data[, 1:nloci]
nollat <- matlab2r::find(dataApu == 0) nollat <- matlab2r::find(dataApu == 0)
if (!isempty(nollat)) { if (!isempty(nollat)) {
isoinAlleeli <- base::max(max(dataApu)) isoinAlleeli <- base::max(base::max(dataApu))
dataApu[nollat] <- isoinAlleeli + 1 dataApu[nollat] <- isoinAlleeli + 1
data[, 1:nloci] <- dataApu data[, 1:nloci] <- dataApu
} }
# dataApu <- []
# nollat <- []
# isoinAlleeli <- []
noalle <- zeros(1, nloci) noalle <- zeros(1, nloci)
alleelitLokuksessa <- cell(nloci, 1, expandable = TRUE) alleelitLokuksessa <- cell(nloci, 1, expandable = TRUE)
for (i in 1:nloci) { for (i in 1:nloci) {
alleelitLokuksessaI <- unique(data[, i]) alleelitLokuksessaI <- unique(data[, i])
alleelitLokuksessa[[i]] <- sort(alleelitLokuksessaI[ alleelitLokuksessa[[i]] <- sort(alleelitLokuksessaI[
matlab2r::find( matlab2r::find(alleelitLokuksessaI >= 0)
alleelitLokuksessaI >= 0
)
]) ])
noalle[i] <- length(alleelitLokuksessa[[i]]) noalle[i] <- length(alleelitLokuksessa[[i]])
} }
@ -49,9 +51,7 @@ handleData <- function(raw_data) {
for (i in 1:nloci) { for (i in 1:nloci) {
alleelitLokuksessaI <- alleelitLokuksessa[[i]] alleelitLokuksessaI <- alleelitLokuksessa[[i]]
puuttuvia <- base::max(noalle) - length(alleelitLokuksessaI) puuttuvia <- base::max(noalle) - length(alleelitLokuksessaI)
alleleCodes[, i] <- as.matrix( alleleCodes[, i] <- as.matrix(c(alleelitLokuksessaI, zeros(puuttuvia, 1)))
c(alleelitLokuksessaI, zeros(puuttuvia, 1))
)
} }
for (loc in seq_len(nloci)) { for (loc in seq_len(nloci)) {
@ -60,7 +60,7 @@ handleData <- function(raw_data) {
} }
} }
nind <- base::max(data[, ncol(data)]) nind <- as.integer(base::max(data[, ncol(data)]))
nrows <- size(data, 1) nrows <- size(data, 1)
ncols <- size(data, 2) ncols <- size(data, 2)
rowsFromInd <- zeros(nind, 1) rowsFromInd <- zeros(nind, 1)
@ -71,11 +71,11 @@ handleData <- function(raw_data) {
a <- -999 a <- -999
emptyRow <- repmat(a, c(1, ncols)) emptyRow <- repmat(a, c(1, ncols))
lessThanMax <- matlab2r::find(rowsFromInd < maxRowsFromInd) lessThanMax <- matlab2r::find(rowsFromInd < maxRowsFromInd)
missingRows <- maxRowsFromInd * nind - nrows missingRows <- max(maxRowsFromInd * nind - nrows, 0L)
data <- rbind(data, zeros(missingRows, ncols)) data <- rbind(data, zeros(missingRows, ncols))
pointer <- 1 pointer <- 1
for (ind in t(lessThanMax)) { # K?y l?pi ne yksil?t, joilta puuttuu rivej? for (ind in t(lessThanMax)) { # K?y l?pi ne yksil?t, joilta puuttuu rivej?
miss <- maxRowsFromInd - rowsFromInd(ind) # T?lt?yksil?lt?puuttuvien lkm. miss <- maxRowsFromInd - rowsFromInd[ind] # T?lt?yksil?lt?puuttuvien lkm.
} }
data <- sortrows(data, ncols) # Sorttaa yksil?iden mukaisesti data <- sortrows(data, ncols) # Sorttaa yksil?iden mukaisesti
newData <- data newData <- data

View file

@ -4,7 +4,6 @@
#' codes so that the codes for one locus have values between 1 and noalle[j]. #' codes so that the codes for one locus have values between 1 and noalle[j].
#' Before this change, an allele whose code is zero is changed. #' Before this change, an allele whose code is zero is changed.
#' @param raw_data raw data #' @param raw_data raw data
#' @export
handlePopData <- function(raw_data) { handlePopData <- function(raw_data) {
# Alkuperäisen datan viimeinen sarake kertoo, milt?yksilölt? # Alkuperäisen datan viimeinen sarake kertoo, milt?yksilölt?
# kyseinen rivi on peräisin. Funktio muuttaa alleelikoodit # kyseinen rivi on peräisin. Funktio muuttaa alleelikoodit

49
R/importFile.R Normal file
View file

@ -0,0 +1,49 @@
#' @title Import data file
#' @description Imports data from several formats (FASTA, VCF, SAM, BAM,
#' Genepop).
#' @param data raw dataset
#' @param format data format (guesses from extension if not provided)
#' @param verbose if \code{TRUE}, prints extra output information
#' @return The data in a format that can be used by the other functions
#' @export
#' @examples
#' path_inst <- system.file("extdata", "", package = "rBAPS")
#' importFile(file.path(path_inst, "FASTA_clustering_haploid.fasta"))
importFile <- function(data, format, verbose) {
# Parsing data format ------------------------------------------------------
if (missing(format)) {
format <- gsub(".*\\.(.+)$", "\\1", data)
message("Format not provided. Guessing from file extension: ", format)
}
format <- tolower(format)
# Dispatching to proper loading function -----------------------------------
if (format == "fasta") {
out <- load_fasta(data)
} else if (format == "vcf") {
out <- vcfR::read.vcfR(data, verbose = verbose)
} else if (format == "sam") {
stop(
"SAM files not directly supported. ",
"Install the samtools software and execute\n\n",
"samtools view -b ", data, " > out_file.bam\n\nto convert to BAM ",
"and try running this function again with 'format=BAM'"
)
} else if (format == "bam") {
out <- Rsamtools::scanBam(data)
} else if (format == "genepop") {
if (toupper(adegenet::.readExt(data)) == "TXT") {
message("Creating a copy of the file with the .gen extension")
dataGen <- gsub("txt", "gen", data)
file.copy(data, dataGen)
out <- adegenet::read.genepop(dataGen)
} else {
out <- adegenet::read.genepop(data)
}
} else {
stop("Format not supported.")
}
return(out)
}

View file

@ -1,4 +1,4 @@
indMix <- function(c, npops, dispText = TRUE) { indMix <- function(c, npops, counts = NULL, sumcounts = NULL, max_iter = 100L, dispText = FALSE) {
# Greedy search algorithm with unknown number of classes for regular # Greedy search algorithm with unknown number of classes for regular
# clustering. # clustering.
# Input npops is not used if called by greedyMix or greedyPopMix. # Input npops is not used if called by greedyMix or greedyPopMix.
@ -17,8 +17,11 @@ indMix <- function(c, npops, dispText = TRUE) {
if (isfield(c, "dist")) { if (isfield(c, "dist")) {
dist <- c$dist dist <- c$dist
Z <- c$Z Z <- c$Z
} else {
Z <- NULL
} }
rm(c) rm(c)
nargin <- length(as.list(match.call())) - 1 nargin <- length(as.list(match.call())) - 1
if (nargin < 2) { if (nargin < 2) {
@ -65,14 +68,14 @@ indMix <- function(c, npops, dispText = TRUE) {
nruns <- length(npopsTaulu) nruns <- length(npopsTaulu)
initData <- data initData <- data
data <- data[, 1:(ncol(data) - 1)] data <- data[, seq_along(noalle)] # Original code always dropped last column.
logmlBest <- -1e50 logmlBest <- -1e50
partitionSummary <- -1e50 * ones(30, 2) # Tiedot 30 parhaasta partitiosta (npops ja logml) partitionSummary <- -1e50 * ones(30, 2) # Tiedot 30 parhaasta partitiosta (npops ja logml)
partitionSummary[, 1] <- zeros(30, 1) partitionSummary[, 1] <- zeros(30, 1)
worstLogml <- -1e50 worstLogml <- -1e50
worstIndex <- 1 worstIndex <- 1
for (run in 1:nruns) { for (run in seq_along(nruns)) {
npops <- npopsTaulu[[run]] npops <- npopsTaulu[[run]]
if (dispText) { if (dispText) {
dispLine() dispLine()
@ -84,6 +87,7 @@ indMix <- function(c, npops, dispText = TRUE) {
) )
} }
ninds <- size(rows, 1) ninds <- size(rows, 1)
initialPartition <- admixture_initialization(initData, npops, Z) initialPartition <- admixture_initialization(initData, npops, Z)
sumcounts_counts_logml <- initialCounts( sumcounts_counts_logml <- initialCounts(
initialPartition, data, npops, rows, noalle, adjprior initialPartition, data, npops, rows, noalle, adjprior
@ -93,16 +97,15 @@ indMix <- function(c, npops, dispText = TRUE) {
logml <- sumcounts_counts_logml$logml logml <- sumcounts_counts_logml$logml
PARTITION <- zeros(ninds, 1) PARTITION <- zeros(ninds, 1)
for (i in 1:ninds) { for (i in seq_len(ninds)) {
apu <- rows[i] apu <- rows[i]
PARTITION[i] <- initialPartition[apu[1]] PARTITION[i] <- initialPartition[apu[1]]
} }
COUNTS <- counts COUNTS <- counts
SUMCOUNTS <- sumcounts SUMCOUNTS <- sumcounts
POP_LOGML <- computePopulationLogml(1:npops, adjprior, priorTerm) POP_LOGML <- computePopulationLogml(seq_len(npops), adjprior, priorTerm)
LOGDIFF <- repmat(-Inf, c(ninds, npops)) LOGDIFF <- repmat(-Inf, c(ninds, npops))
rm(initialPartition, counts, sumcounts)
# PARHAAN MIXTURE-PARTITION ETSIMINEN # PARHAAN MIXTURE-PARTITION ETSIMINEN
nRoundTypes <- 7 nRoundTypes <- 7
@ -120,30 +123,34 @@ indMix <- function(c, npops, dispText = TRUE) {
) )
} }
iter <- 1L
while (ready != 1) { while (ready != 1) {
# FIXME: loop caught in here iter <- iter + 1L
if (iter > max_iter) {
warning("max_iter reached. Stopping prematurely.")
break
}
muutoksia <- 0 muutoksia <- 0
if (dispText) { if (dispText) {
message(paste("\nPerforming steps:", as.character(roundTypes))) message("Performing steps: ", paste(roundTypes, collapse = " "))
} }
for (n in 1:length(roundTypes)) { for (n in seq_along(roundTypes)) {
round <- roundTypes[n] round <- roundTypes[n]
kivaluku <- 0 kivaluku <- 0
if (kokeiltu[round] == 1) { # Askelta kokeiltu viime muutoksen j<>lkeen if (kokeiltu[round] == 1) { # Askelta kokeiltu viime muutoksen j<>lkeen
} else if (round == 0 | round == 1) { # Yksil<69>n siirt<72>minen toiseen populaatioon. } else if (round == 0 | round == 1) { # Yksil<69>n siirt<72>minen toiseen populaatioon.
inds <- 1:ninds inds <- seq_len(ninds)
aputaulu <- cbind(inds, rand(ninds, 1)) aputaulu <- cbind(t(inds), rand(ninds, 1))
aputaulu <- sortrows(aputaulu, 2) aputaulu <- matrix(sortrows(aputaulu, 2), nrow = nrow(aputaulu))
inds <- t(aputaulu[, 1]) inds <- t(aputaulu[, 1])
muutosNyt <- 0 muutosNyt <- 0
for (ind in inds) { for (ind in inds) {
i1 <- PARTITION[ind] i1 <- PARTITION[ind]
muutokset_diffInCounts <- greedyMix_muutokset$new() muutokset_diffInCounts <- greedyMix_muutokset$new()
# FIXME: using 100-length global variables instead of the ones in this function
muutokset_diffInCounts <- muutokset_diffInCounts$laskeMuutokset( muutokset_diffInCounts <- muutokset_diffInCounts$laskeMuutokset(
ind, rows, data, adjprior, priorTerm ind, rows, data, adjprior, priorTerm
) )
@ -190,7 +197,7 @@ indMix <- function(c, npops, dispText = TRUE) {
} }
} else if (round == 2) { # Populaation yhdist<73>minen toiseen. } else if (round == 2) { # Populaation yhdist<73>minen toiseen.
maxMuutos <- 0 maxMuutos <- 0
for (pop in 1:npops) { for (pop in seq_len(npops)) {
muutokset_diffInCounts <- greedyMix_muutokset$new() muutokset_diffInCounts <- greedyMix_muutokset$new()
muutokset_diffInCounts <- muutokset_diffInCounts$laskeMuutokset2( muutokset_diffInCounts <- muutokset_diffInCounts$laskeMuutokset2(
pop, rows, data, adjprior, priorTerm pop, rows, data, adjprior, priorTerm
@ -234,7 +241,7 @@ indMix <- function(c, npops, dispText = TRUE) {
} else if (round == 3 || round == 4) { # Populaation jakaminen osiin. } else if (round == 3 || round == 4) { # Populaation jakaminen osiin.
maxMuutos <- 0 maxMuutos <- 0
ninds <- size(rows, 1) ninds <- size(rows, 1)
for (pop in 1:npops) { for (pop in seq_len(npops)) {
inds2 <- matlab2r::find(PARTITION == pop) inds2 <- matlab2r::find(PARTITION == pop)
ninds2 <- length(inds2) ninds2 <- length(inds2)
if (ninds2 > 2) { if (ninds2 > 2) {
@ -265,7 +272,7 @@ indMix <- function(c, npops, dispText = TRUE) {
muutoksia <- 1 muutoksia <- 1
kokeiltu <- zeros(nRoundTypes, 1) kokeiltu <- zeros(nRoundTypes, 1)
rivit <- list() rivit <- list()
for (i in 1:length(muuttuvat)) { for (i in seq_len(muuttuvat)) {
ind <- muuttuvat[i] ind <- muuttuvat[i]
lisa <- rows[ind, 1]:rows[ind, 2] lisa <- rows[ind, 1]:rows[ind, 2]
rivit <- rbind(rivit, t(lisa)) rivit <- rbind(rivit, t(lisa))
@ -421,7 +428,7 @@ indMix <- function(c, npops, dispText = TRUE) {
totalMuutos <- muutokset(1, emptyPop) totalMuutos <- muutokset(1, emptyPop)
rivit <- list() rivit <- list()
for (i in 1:length(muuttuvat)) { for (i in seq_len(muuttuvat)) {
ind <- muuttuvat[i] ind <- muuttuvat[i]
lisa <- rows[ind, 1]:rows[ind, 2] lisa <- rows[ind, 1]:rows[ind, 2]
rivit <- c(rivit, lisa) rivit <- c(rivit, lisa)
@ -506,8 +513,6 @@ indMix <- function(c, npops, dispText = TRUE) {
} }
} }
} }
# FIXME: muutoksia is never 0, so vaihe never equals 5 and ready 1
print(paste("i1 =", i1, "i2 =", i2, "maxMuutos =", maxMuutos)) # TEMP
if (muutoksia == 0) { if (muutoksia == 0) {
if (vaihe <= 4) { if (vaihe <= 4) {
vaihe <= vaihe + 1 vaihe <= vaihe + 1
@ -536,11 +541,10 @@ indMix <- function(c, npops, dispText = TRUE) {
# TALLENNETAAN # TALLENNETAAN
npops <- poistaTyhjatPopulaatiot(npops) npops <- poistaTyhjatPopulaatiot(npops)
POP_LOGML <- computePopulationLogml(1:npops, adjprior, priorTerm) POP_LOGML <- computePopulationLogml(seq_len(npops), adjprior, priorTerm)
if (dispText) { if (dispText) {
print(paste("Found partition with", as.character(npops), "populations.")) message("Found partition with ", as.character(npops), " populations.")
print(paste("Log(ml) =", as.character(logml))) message("Log(ml) = ", as.character(logml))
print(" ")
} }
if (logml > logmlBest) { if (logml > logmlBest) {

7
R/indMixWrapper.R Normal file
View file

@ -0,0 +1,7 @@
indMixWrapper <- function(c, npops, counts, sumcounts, max_iter, fixedK = FALSE, verbose = FALSE) {
if (fixedK) {
stop("indMix_fixK() not yet implemented.") # TODO: translate indMix_fixK.m
} else {
indMix(c, npops, counts, sumcounts, max_iter, verbose)
}
}

View file

@ -1,7 +1,6 @@
#' @title Initialize Pop Names #' @title Initialize Pop Names
#' @param nameFile nameFile #' @param nameFile nameFile
#' @param indexFile indexFile #' @param indexFile indexFile
#' @export
initPopNames <- function(nameFile, indexFile) { initPopNames <- function(nameFile, indexFile) {
# Palauttaa tyhj<68>n, mik<69>li nimitiedosto ja indeksitiedosto # Palauttaa tyhj<68>n, mik<69>li nimitiedosto ja indeksitiedosto
# eiv<69>t olleet yht?pitki? # eiv<69>t olleet yht?pitki?

View file

@ -2,7 +2,7 @@ initialCounts <- function(partition, data, npops, rows, noalle, adjprior) {
nloci <- size(data, 2) nloci <- size(data, 2)
ninds <- size(rows, 1) ninds <- size(rows, 1)
koot <- rows[, 1] - rows[, 2] + 1 koot <- rows[1] - rows[2] + 1
maxSize <- base::max(koot) maxSize <- base::max(koot)
counts <- zeros(base::max(noalle), nloci, npops) counts <- zeros(base::max(noalle), nloci, npops)

View file

@ -1,7 +1,7 @@
laskeLoggis <- function(counts, sumcounts, adjprior) { laskeLoggis <- function(counts, sumcounts, adjprior) {
npops <- size(counts, 3) npops <- size(counts, 3)
replicated_adjprior <- array(adjprior, c(nrow(adjprior), ncol(adjprior), npops))
sum1 <- sum(sum(sum(lgamma(counts + repmat(adjprior, c(1, 1, npops)))))) sum1 <- sum(sum(sum(lgamma(counts + replicated_adjprior))))
sum3 <- sum(sum(lgamma(adjprior))) - sum(sum(lgamma(1 + sumcounts))) sum3 <- sum(sum(lgamma(adjprior))) - sum(sum(lgamma(1 + sumcounts)))
logml2 <- sum1 - npops * sum3 logml2 <- sum1 - npops * sum3
loggis <- logml2 loggis <- logml2

View file

@ -349,12 +349,15 @@ greedyMix_muutokset <- R6Class(
i1_logml <- POP_LOGML[i1] i1_logml <- POP_LOGML[i1]
muutokset[i1] <- 0 muutokset[i1] <- 0
rows <- globalRows[ind, 1]:globalRows[ind, 2] if (is.null(dim(globalRows))) {
rows <- globalRows[1]:globalRows[2]
} else {
rows <- globalRows[ind, 1]:globalRows[ind, 2]
}
diffInCounts <- computeDiffInCounts( diffInCounts <- computeDiffInCounts(
rows, size(COUNTS, 1), size(COUNTS, 2), data rows, size(COUNTS, 1), size(COUNTS, 2), data
) )
diffInSumCounts <- colSums(diffInCounts) diffInSumCounts <- colSums(diffInCounts)
COUNTS[, , i1] <- COUNTS[, , i1] - diffInCounts COUNTS[, , i1] <- COUNTS[, , i1] - diffInCounts
SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] - diffInSumCounts SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] - diffInSumCounts
new_i1_logml <- computePopulationLogml(i1, adjprior, priorTerm) new_i1_logml <- computePopulationLogml(i1, adjprior, priorTerm)

View file

@ -1,5 +1,4 @@
#' @title Learn partition (modified) #' @title Learn partition (modified)
#' @export
#' @param ordered ordered #' @param ordered ordered
#' @return part #' @return part
#' @description This function is called only if some individual has less than #' @description This function is called only if some individual has less than

View file

@ -3,7 +3,6 @@
#' @param fii fii #' @param fii fii
#' @description Goes through all the ways to divide the points into two or #' @description Goes through all the ways to divide the points into two or
#' three groups. Chooses the partition which obtains highest logml. #' three groups. Chooses the partition which obtains highest logml.
#' @export
learn_simple_partition <- function(ordered_points, fii) { learn_simple_partition <- function(ordered_points, fii) {
npoints <- length(ordered_points) npoints <- length(ordered_points)

View file

@ -13,7 +13,6 @@
#' that BAPS should use this function instead of the base one, so this is why #' that BAPS should use this function instead of the base one, so this is why
#' this function is part of this package (instead of a MATLAB-replicating #' this function is part of this package (instead of a MATLAB-replicating
#' package such as matlab2r) #' package such as matlab2r)
#' @export
linkage <- function(Y, method = "co") { linkage <- function(Y, method = "co") {
k <- size(Y)[1] k <- size(Y)[1]
n <- size(Y)[2] n <- size(Y)[2]

View file

@ -4,18 +4,19 @@
#' running the hierBAPS algorithm. #' running the hierBAPS algorithm.
#' #'
#' @param msa Either the location of a fasta file or ape DNAbin object containing the multiple sequence alignment data to be clustered #' @param msa Either the location of a fasta file or ape DNAbin object containing the multiple sequence alignment data to be clustered
#' @param keep.singletons A logical indicating whether to consider singleton mutations in calculating the clusters #' @param keep_singletons A logical indicating whether to consider singleton mutations in calculating the clusters
#' @param output_numbers A logical indicating whether to output the data as
#' numbers (TRUE) or letters (FALSE)
#' #'
#' @return A character matrix with filtered SNP data #' @return A character matrix with filtered SNP data
#' #'
#' @examples #' @examples
#' msa <- system.file("ext", "seqs.fa", package = "rBAPS") #' msa <- system.file("extdata", "seqs.fa", package = "rBAPS")
#' snp.matrix <- load_fasta(msa) #' snp.matrix <- rBAPS:::load_fasta(msa)
#' @author Gerry Tonkin-Hill, Waldir Leoncio #' @author Gerry Tonkin-Hill, Waldir Leoncio
#' @seealso rhierbaps::load_fasta #' @seealso rhierbaps::load_fasta
#' @importFrom ape read.FASTA as.DNAbin #' @importFrom ape read.FASTA as.DNAbin
#' @export load_fasta <- function(msa, keep_singletons = FALSE, output_numbers = TRUE) {
load_fasta <- function(msa, keep.singletons = FALSE) {
# Check inputs # Check inputs
if (is(msa, "character")) { if (is(msa, "character")) {
@ -28,7 +29,9 @@ load_fasta <- function(msa, keep.singletons = FALSE) {
} else { } else {
stop("incorrect input for msa!") stop("incorrect input for msa!")
} }
if (!is.logical(keep.singletons)) stop("Invalid keep.singletons! Must be on of TRUE/FALSE.") if (!is.logical(keep_singletons)) {
stop("Invalid keep_singletons! Must be one of TRUE/FALSE.")
}
# Load sequences using ape. This does a lot of the checking for us. # Load sequences using ape. This does a lot of the checking for us.
seq_names <- labels(seqs) seq_names <- labels(seqs)
@ -46,8 +49,8 @@ load_fasta <- function(msa, keep.singletons = FALSE) {
conserved <- colSums(t(t(seqs) == seqs[1, ])) == nrow(seqs) conserved <- colSums(t(t(seqs) == seqs[1, ])) == nrow(seqs)
seqs <- seqs[, !conserved] seqs <- seqs[, !conserved]
if (!keep.singletons) { if (!keep_singletons) {
# remove singletons as they are uninformative in the algorithm # remove_singletons as they are uninformative in the algorithm
is_singleton <- apply(seqs, 2, function(x) { is_singleton <- apply(seqs, 2, function(x) {
tab <- table(x) tab <- table(x)
return(x %in% names(tab)[tab == 1]) return(x %in% names(tab)[tab == 1])
@ -58,5 +61,11 @@ load_fasta <- function(msa, keep.singletons = FALSE) {
# Convert gaps and unknowns to same symbol # Convert gaps and unknowns to same symbol
seqs[seqs == "n"] <- "-" seqs[seqs == "n"] <- "-"
# Replace letters with numbers, dashes with zeros
if (output_numbers) {
seqs <- matrix(match(seqs, c("a", "c", "g", "t")), nrow(seqs))
seqs[is.na(seqs)] <- 0
}
return(seqs) return(seqs)
} }

View file

@ -2,7 +2,6 @@
#' @description Returns a string representation of a logml #' @description Returns a string representation of a logml
#' @param logml input Logml #' @param logml input Logml
#' @return String version of logml #' @return String version of logml
#' @export
logml2String <- function(logml) { logml2String <- function(logml) {
# Palauttaa logml:n string-esityksen. # Palauttaa logml:n string-esityksen.
mjono <- " " mjono <- " "

View file

@ -2,7 +2,6 @@
#' @description Reads GenePop-formatted data #' @description Reads GenePop-formatted data
#' @param tiedostonNimi Name of the file #' @param tiedostonNimi Name of the file
#' @return list containing data and popnames #' @return list containing data and popnames
#' @export
lueGenePopData <- function(tiedostonNimi) { lueGenePopData <- function(tiedostonNimi) {
fid <- readLines(tiedostonNimi) fid <- readLines(tiedostonNimi)
line <- fid[1] # ensimmäinen rivi line <- fid[1] # ensimmäinen rivi

View file

@ -3,7 +3,6 @@
#' group. popnames are as before. #' group. popnames are as before.
#' @param tiedostonNimi Name of the file #' @param tiedostonNimi Name of the file
#' @return List containing data and popnames #' @return List containing data and popnames
#' @export
lueGenePopDataPop <- function(tiedostonNimi) { lueGenePopDataPop <- function(tiedostonNimi) {
# Data annetaan muodossa, jossa viimeinen sarake kertoo ryhmän. # Data annetaan muodossa, jossa viimeinen sarake kertoo ryhmän.
# popnames on kuten ennenkin. # popnames on kuten ennenkin.

View file

@ -2,7 +2,6 @@
#' @description Returns the part of the line from the beginning that is before the comma. Useful for returning the name of a GenePop area #' @description Returns the part of the line from the beginning that is before the comma. Useful for returning the name of a GenePop area
#' @param line line #' @param line line
#' @return nimi #' @return nimi
#' @export
lueNimi <- function(line) { lueNimi <- function(line) {
# ========================================================================== # ==========================================================================
# Validation # Validation

View file

@ -5,7 +5,6 @@
#' @return puredata: a data contains no index column. #' @return puredata: a data contains no index column.
#' @param data data #' @param data data
#' @param noalle noalle #' @param noalle noalle
#' @export
noIndex <- function(data, noalle) { noIndex <- function(data, noalle) {
limit <- ifelse(is(noalle, "matrix"), ncol(noalle), length(noalle)) limit <- ifelse(is(noalle, "matrix"), ncol(noalle), length(noalle))
if (size(data, 2) == limit + 1) { if (size(data, 2) == limit + 1) {

View file

@ -2,7 +2,6 @@
#' @description Converts numbers to strings #' @description Converts numbers to strings
#' @param number number #' @param number number
#' @note On Matlab, if number is NaN the output is 'NaN'. Here, the output will be an error. Also, the function belo expects "number" to have length one, whereas Matlab accepts vectors. #' @note On Matlab, if number is NaN the output is 'NaN'. Here, the output will be an error. Also, the function belo expects "number" to have length one, whereas Matlab accepts vectors.
#' @export
ownNum2Str <- function(number) { ownNum2Str <- function(number) {
absolute <- abs(number) absolute <- abs(number)
if (absolute < 1000) { if (absolute < 1000) {

View file

@ -5,7 +5,6 @@
#' @param npops npops #' @param npops npops
#' @param rowsFromInd rowsFromInd #' @param rowsFromInd rowsFromInd
#' @param alaraja alaraja #' @param alaraja alaraja
#' @export
poistaLiianPienet <- function(npops, rowsFromInd, alaraja) { poistaLiianPienet <- function(npops, rowsFromInd, alaraja) {
popSize <- zeros(1, npops) popSize <- zeros(1, npops)
if (npops > 0) { if (npops > 0) {

View file

@ -1,13 +1,13 @@
poistaTyhjatPopulaatiot <- function(npops) { poistaTyhjatPopulaatiot <- function(npops) {
# % Poistaa tyhjentyneet populaatiot COUNTS:ista ja # % Poistaa tyhjentyneet populaatiot COUNTS:ista ja
# % SUMCOUNTS:ista. P<>ivitt<74><74> npops:in ja PARTITION:in. # % SUMCOUNTS:ista. P<>ivitt<74><74> npops:in ja PARTITION:in.
notEmpty <- matlab2r::find(any(SUMCOUNTS, 2)) notEmpty <- matlab2r::find(apply(SUMCOUNTS, 1, function(x) any(x > 0)))
COUNTS <- COUNTS[, , notEmpty] COUNTS <- COUNTS[, , notEmpty]
SUMCOUNTS <- SUMCOUNTS[notEmpty, ] SUMCOUNTS <- SUMCOUNTS[notEmpty, ]
LOGDIFF <- LOGDIFF[, notEmpty] LOGDIFF <- LOGDIFF[, notEmpty]
for (n in 1:length(notEmpty)) { for (n in 1:length(notEmpty)) {
apu <- matlab2r::find(PARTITION == notEmpty(n)) apu <- matlab2r::find(PARTITION == notEmpty[n])
PARTITION[apu] <- n PARTITION[apu] <- n
} }
npops <- length(notEmpty) npops <- length(notEmpty)

View file

@ -3,7 +3,6 @@
#' @return a 4-mark presentation of proportion #' @return a 4-mark presentation of proportion
#' @note The `round` function in R, being ISO-compliant, rounds 8.5 to 8. The #' @note The `round` function in R, being ISO-compliant, rounds 8.5 to 8. The
#' Matlab equivalent rounds it to 9. #' Matlab equivalent rounds it to 9.
#' @export
proportion2str <- function(prob) { proportion2str <- function(prob) {
if (abs(prob) < 1e-3) { if (abs(prob) < 1e-3) {
str <- "0.00" str <- "0.00"

View file

@ -1,10 +1,9 @@
#' @title Generates random numbers #' @title Generates random numbers
#' @return vector of length `nc` with r.v. realizations from Gamma(rate=1) #' @return vector of length `nc` with r.v. realizations from Gamma(rate=1)
#' @examples randdir(matrix(c(10, 30, 60), 3), 3) #' @examples rBAPS:::randdir(matrix(c(10, 30, 60), 3), 3)
#' @param counts shape parameter #' @param counts shape parameter
#' @param nc number of rows on output #' @param nc number of rows on output
#' @seealso randga #' @seealso randga
#' @export
randdir <- function(counts, nc) { randdir <- function(counts, nc) {
svar <- zeros(nc, 1) svar <- zeros(nc, 1)
for (i in 1:nc) { for (i in 1:nc) {

View file

@ -2,7 +2,6 @@
#' @param line line number #' @param line line number
#' @return count #' @return count
#' @description Returns the number of queues contained in the line. There must be a space between the queues. #' @description Returns the number of queues contained in the line. There must be a space between the queues.
#' @export
rivinSisaltamienMjonojenLkm <- function(line) { rivinSisaltamienMjonojenLkm <- function(line) {
# Palauttaa line:n sis<69>lt<6C>mien mjonojen lukum<75><6D>r<EFBFBD>n. # Palauttaa line:n sis<69>lt<6C>mien mjonojen lukum<75><6D>r<EFBFBD>n.
# Mjonojen v<>liss?t<>ytyy olla v<>lily<6C>nti. # Mjonojen v<>liss?t<>ytyy olla v<>lily<6C>nti.

View file

@ -1,7 +1,6 @@
#' @title Find out the Digit Format #' @title Find out the Digit Format
#' @param line the first line after the "pop" word from data in Genepop format. # @note Function clarified based on the line format whether the alleles of the data are given using 2 or 3 numbers. #' @param line the first line after the "pop" word from data in Genepop format. # @note Function clarified based on the line format whether the alleles of the data are given using 2 or 3 numbers.
#' @return df #' @return df
#' @export
selvitaDigitFormat <- function(line) { selvitaDigitFormat <- function(line) {
# line on ensimm<6D>inen pop-sanan j<>lkeinen rivi # line on ensimm<6D>inen pop-sanan j<>lkeinen rivi
# Genepop-formaatissa olevasta datasta. funktio selvitt<74><74> # Genepop-formaatissa olevasta datasta. funktio selvitt<74><74>

View file

@ -2,8 +2,6 @@
#' @description Lisää jokaista alleelia joka populaation joka lokukseen j1/noalle(j) verran. Näin saatuja counts:eja vastaavista Dirichlet-jakaumista simuloidaan arvot populaatioiden alleelifrekvensseille. #' @description Lisää jokaista alleelia joka populaation joka lokukseen j1/noalle(j) verran. Näin saatuja counts:eja vastaavista Dirichlet-jakaumista simuloidaan arvot populaatioiden alleelifrekvensseille.
#' Add each allele to each locus in each population by j 1 / noalle(j). The Dirichlet distributions corresponding to the counts thus obtained simulate values for the allele frequencies of the populations. #' Add each allele to each locus in each population by j 1 / noalle(j). The Dirichlet distributions corresponding to the counts thus obtained simulate values for the allele frequencies of the populations.
#' @param noalle noalle #' @param noalle noalle
#' @export
simulateAllFreqs <- function(noalle) { simulateAllFreqs <- function(noalle) {
if (isGlobalEmpty(COUNTS)) { if (isGlobalEmpty(COUNTS)) {
max_noalle <- 0 max_noalle <- 0

View file

@ -6,8 +6,6 @@
#' @param allfreqs allfreqs #' @param allfreqs allfreqs
#' @param pop pop #' @param pop pop
#' @param missing_level missing_level #' @param missing_level missing_level
#' @export
simulateIndividuals <- function(n, rowsFromInd, allfreqs, pop, missing_level) { simulateIndividuals <- function(n, rowsFromInd, allfreqs, pop, missing_level) {
nloci <- size(allfreqs, 2) nloci <- size(allfreqs, 2)

View file

@ -5,8 +5,6 @@
#' @param allfreqs allfreqa #' @param allfreqs allfreqa
#' @param pop pop #' @param pop pop
#' @param loc loc #' @param loc loc
#' @export
simuloiAlleeli <- function(allfreqs, pop, loc) { simuloiAlleeli <- function(allfreqs, pop, loc) {
if (length(dim(allfreqs)) == 0) { if (length(dim(allfreqs)) == 0) {
freqs <- 1 freqs <- 1

View file

@ -3,7 +3,6 @@
#' @param osuusTaulu Percentage table? #' @param osuusTaulu Percentage table?
#' @param osuus percentage? #' @param osuus percentage?
#' @param indeksi index #' @param indeksi index
#' @export
suoritaMuutos <- function(osuusTaulu, osuus, indeksi) { suoritaMuutos <- function(osuusTaulu, osuus, indeksi) {
if (isGlobalEmpty(COUNTS)) { if (isGlobalEmpty(COUNTS)) {
npops <- 1 npops <- 1

View file

@ -3,7 +3,6 @@
#' @param description description #' @param description description
#' @param width width #' @param width width
#' @return newline #' @return newline
#' @export
takeLine <- function(description, width) { takeLine <- function(description, width) {
# Returns one line from the description: line ends to the first # Returns one line from the description: line ends to the first
# space after width:th mark. # space after width:th mark.

View file

@ -3,7 +3,6 @@
#' @param coordinates coordinates #' @param coordinates coordinates
#' @param interactive prompt user for relevant questions during execution #' @param interactive prompt user for relevant questions during execution
#' @return a list of defectives ("viallinen") and coordinates #' @return a list of defectives ("viallinen") and coordinates
#' @export
testaaKoordinaatit <- function(ninds, coordinates, interactive = TRUE) { testaaKoordinaatit <- function(ninds, coordinates, interactive = TRUE) {
# Testaa onko koordinaatit kunnollisia. # Testaa onko koordinaatit kunnollisia.
# modified by Lu Cheng, 05.12.2012 # modified by Lu Cheng, 05.12.2012

View file

@ -2,7 +2,6 @@
#' @description Test if loaded BAPS data is proper #' @description Test if loaded BAPS data is proper
#' @param data dataset #' @param data dataset
#' @return ninds #' @return ninds
#' @export
testaaOnkoKunnollinenBapsData <- function(data) { testaaOnkoKunnollinenBapsData <- function(data) {
# Tarkastaa onko viimeisess?sarakkeessa kaikki # Tarkastaa onko viimeisess?sarakkeessa kaikki
# luvut 1,2,...,n johonkin n:<3A><>n asti. # luvut 1,2,...,n johonkin n:<3A><>n asti.

View file

@ -3,7 +3,6 @@
#' @param rivi Line #' @param rivi Line
#' @return pal = 1 if the line starts with one of the following #' @return pal = 1 if the line starts with one of the following
# letter combinations: Pop, pop, POP. In all others cases, pal = 0 # letter combinations: Pop, pop, POP. In all others cases, pal = 0
#' @export
testaaPop <- function(rivi) { testaaPop <- function(rivi) {
# pal=1, mik<69>li rivi alkaa jollain seuraavista # pal=1, mik<69>li rivi alkaa jollain seuraavista
# kirjainyhdistelmist? Pop, pop, POP. Kaikissa muissa # kirjainyhdistelmist? Pop, pop, POP. Kaikissa muissa

View file

@ -10,7 +10,6 @@
#' @param partitionSummary partitionSummary #' @param partitionSummary partitionSummary
#' @param popnames popnames #' @param popnames popnames
#' @param fixedK fixedK #' @param fixedK fixedK
#' @export
writeMixtureInfo <- function( writeMixtureInfo <- function(
logml, rowsFromInd, data, adjprior, priorTerm, outPutFile, inputFile, logml, rowsFromInd, data, adjprior, priorTerm, outPutFile, inputFile,
partitionSummary, popnames, fixedK partitionSummary, popnames, fixedK
@ -27,17 +26,18 @@ writeMixtureInfo <- function(
fid <- load(outPutFile) fid <- load(outPutFile)
} else { } else {
fid <- -1 fid <- -1
# TODO: replace sink with option that will record input and output outPutFile <- file.path(tempdir(), "baps4_output.baps")
sink("baps4_output.baps", split = TRUE) # save in text anyway. message("Output saved to", outPutFile)
sink(outPutFile, split = TRUE) # save in text anyway.
} }
dispLine() dispLine()
cat("RESULTS OF INDIVIDUAL LEVEL MIXTURE ANALYSIS:") cat("RESULTS OF INDIVIDUAL LEVEL MIXTURE ANALYSIS:\n")
cat(c("Data file: ", inputFile)) cat("Data file: ", inputFile, "\n")
cat("Model: independent") cat("Model: independent\n")
cat(c("Number of clustered individuals: ", ownNum2Str(ninds))) cat("Number of clustered individuals: ", ownNum2Str(ninds), "\n")
cat(c("Number of groups in optimal partition: ", ownNum2Str(npops))) cat("Number of groups in optimal partition: ", ownNum2Str(npops), "\n")
cat(c("Log(marginal likelihood) of optimal partition: ", ownNum2Str(logml))) cat("Log(marginal likelihood) of optimal partition: ", ownNum2Str(logml), "\n")
cat(" ") cat(" ")
if (fid != -1) { if (fid != -1) {
append(fid, "RESULTS OF INDIVIDUAL LEVEL MIXTURE ANALYSIS:\n") append(fid, "RESULTS OF INDIVIDUAL LEVEL MIXTURE ANALYSIS:\n")
@ -88,10 +88,10 @@ writeMixtureInfo <- function(
"Cluster ", as.character(m), ": {", as.character(indsInM[1]) "Cluster ", as.character(m), ": {", as.character(indsInM[1])
) )
for (k in 2:cluster_size) { for (k in 2:cluster_size) {
text <- c(text, ", ", as.character(indsInM[k])) text <- c(text, ",", as.character(indsInM[k]))
} }
} }
text <- c(text, "}") text <- c(text, "}\n")
while (length(text) > 58) { while (length(text) > 58) {
# Take one line and display it. # Take one line and display it.
new_line <- takeLine(text, 58) new_line <- takeLine(text, 58)
@ -107,7 +107,7 @@ writeMixtureInfo <- function(
text <- "" text <- ""
} }
} }
if (text != "") { if (any(text != "")) {
cat(text) cat(text)
if (fid != -1) { if (fid != -1) {
append(fid, text) append(fid, text)
@ -117,11 +117,11 @@ writeMixtureInfo <- function(
} }
if (npops > 1) { if (npops > 1) {
cat(" ") cat("\n")
cat(" ") cat("\n")
cat( cat(
"Changes in log(marginal likelihood)", "Changes in log(marginal likelihood)",
" if indvidual i is moved to group j:" " if indvidual i is moved to group j:\n"
) )
if (fid != -1) { if (fid != -1) {
append(fid, " ") append(fid, " ")
@ -132,7 +132,7 @@ writeMixtureInfo <- function(
fid, fid,
c( c(
"Changes in log(marginal likelihood)", "Changes in log(marginal likelihood)",
"if indvidual i is moved to group j:" "if indvidual i is moved to group j:\n"
) )
) )
append(fid, "\n") append(fid, "\n")
@ -168,9 +168,9 @@ writeMixtureInfo <- function(
if (names) { if (names) {
nimi <- as.character(popnames[ind]) nimi <- as.character(popnames[ind])
rivi <- c(blanks(maxSize - length(nimi)), nimi, ":") rivi <- c(blanks(maxSize - length(nimi)), nimi, ":\n")
} else { } else {
rivi <- c(blanks(4 - floor(log10(ind))), ownNum2Str(ind), ":") rivi <- c("\n", blanks(4 - floor(log10(ind))), ownNum2Str(ind), ":\n")
} }
for (j in 1:npops) { for (j in 1:npops) {
rivi <- c(rivi, " ", logml2String(omaRound(muutokset[j]))) rivi <- c(rivi, " ", logml2String(omaRound(muutokset[j])))
@ -182,9 +182,9 @@ writeMixtureInfo <- function(
} }
} }
cat(" ") cat("\n")
cat(" ") cat("\n")
cat("KL-divergence matrix in PHYLIP format:") cat("KL-divergence matrix in PHYLIP format:\n")
dist_mat <- zeros(npops, npops) dist_mat <- zeros(npops, npops)
if (fid != -1) { if (fid != -1) {
@ -194,6 +194,7 @@ writeMixtureInfo <- function(
append(fid, "\n") append(fid, "\n")
} }
COUNTS <- COUNTS[seq_len(nrow(adjprior)), seq_len(ncol(adjprior)), , drop = FALSE]
maxnoalle <- size(COUNTS, 1) maxnoalle <- size(COUNTS, 1)
nloci <- size(COUNTS, 2) nloci <- size(COUNTS, 2)
d <- zeros(maxnoalle, nloci, npops) d <- zeros(maxnoalle, nloci, npops)
@ -205,8 +206,8 @@ writeMixtureInfo <- function(
prior[1, nollia] <- 1 prior[1, nollia] <- 1
for (pop1 in 1:npops) { for (pop1 in 1:npops) {
d[, , pop1] <- (squeeze(COUNTS[, , pop1]) + prior) / squeezed_COUNTS_prior <- squeeze(COUNTS[, , pop1]) + prior
repmat(sum(squeeze(COUNTS[, , pop1]) + prior), c(maxnoalle, 1)) d[, , pop1] <- squeezed_COUNTS_prior / sum(squeezed_COUNTS_prior)
} }
ekarivi <- as.character(npops) ekarivi <- as.character(npops)
cat(ekarivi) cat(ekarivi)
@ -216,14 +217,14 @@ writeMixtureInfo <- function(
} }
for (pop1 in 1:npops) { for (pop1 in 1:npops) {
for (pop2 in 1:(pop1 - 1)) { for (pop2 in seq_len(pop1 - 1)) {
dist1 <- d[, , pop1] dist1 <- d[, , pop1]
dist2 <- d[, , pop2] dist2 <- d[, , pop2]
div12 <- sum( div12 <- sum(
sum(dist1 * log2((dist1 + 10^-10) / (dist2 + 10^-10))) sum(dist1 * base::log2((dist1 + 10^-10) / (dist2 + 10^-10)))
) / nloci ) / nloci
div21 <- sum( div21 <- sum(
sum(dist2 * log2((dist2 + 10^-10) / (dist1 + 10^-10))) sum(dist2 * base::log2((dist2 + 10^-10) / (dist1 + 10^-10)))
) / nloci ) / nloci
div <- (div12 + div21) / 2 div <- (div12 + div21) / 2
dist_mat[pop1, pop2] <- div dist_mat[pop1, pop2] <- div
@ -233,9 +234,9 @@ writeMixtureInfo <- function(
dist_mat <- dist_mat + t(dist_mat) # make it symmetric dist_mat <- dist_mat + t(dist_mat) # make it symmetric
for (pop1 in 1:npops) { for (pop1 in 1:npops) {
rivi <- c("Cluster_", as.character(pop1), " ") rivi <- c("\nCluster_", as.character(pop1), "\n")
for (pop2 in 1:npops) { for (pop2 in 1:npops) {
rivi <- c(rivi, kldiv2str(dist_mat[pop1, pop2]), " ") rivi <- c(rivi, kldiv2str(dist_mat[pop1, pop2]))
} }
cat(rivi) cat(rivi)
if (fid != -1) { if (fid != -1) {
@ -245,11 +246,11 @@ writeMixtureInfo <- function(
} }
} }
cat(" ") cat("\n")
cat(" ") cat("\n")
cat( cat(
"List of sizes of 10 best visited partitions", "List of sizes of 10 best visited partitions",
"and corresponding log(ml) values" "and corresponding log(ml) values\n"
) )
if (fid != -1) { if (fid != -1) {
@ -279,7 +280,7 @@ writeMixtureInfo <- function(
line <- c( line <- c(
as.character(partitionSummary[part, 1]), as.character(partitionSummary[part, 1]),
" ", " ",
as.character(partitionSummary(part, 2)) as.character(partitionSummary[part, 2])
) )
cat(line) cat(line)
if (fid != -1) { if (fid != -1) {
@ -289,9 +290,9 @@ writeMixtureInfo <- function(
} }
if (!fixedK) { if (!fixedK) {
cat(" ") cat("\n")
cat(" ") cat("\n")
cat("Probabilities for number of clusters") cat("Probabilities for number of clusters\n")
if (fid != -1) { if (fid != -1) {
append(fid, " ") append(fid, " ")
@ -323,7 +324,7 @@ writeMixtureInfo <- function(
line <- c( line <- c(
as.character(npopsTaulu[i]), " ", as.character(probs[i]) as.character(npopsTaulu[i]), " ", as.character(probs[i])
) )
cat(line) cat(line, "\n")
if (fid != -1) { if (fid != -1) {
append(fid, line) append(fid, line)
append(fid, "\n") append(fid, "\n")
@ -331,5 +332,9 @@ writeMixtureInfo <- function(
} }
} }
} }
# Closing sink(s)
while (sink.number() > 0L) {
sink()
}
return(changesInLogml) return(changesInLogml)
} }

View file

@ -10,7 +10,6 @@
#' @param partitionSummary partitionSummary #' @param partitionSummary partitionSummary
#' @param popnames popnames #' @param popnames popnames
#' @param fixedK fixedK #' @param fixedK fixedK
#' @export
writeMixtureInfoPop <- function(logml, rows, data, adjprior, priorTerm, writeMixtureInfoPop <- function(logml, rows, data, adjprior, priorTerm,
outPutFile, inputFile, partitionSummary, outPutFile, inputFile, partitionSummary,
popnames, fixedK) { popnames, fixedK) {

View file

@ -4,18 +4,56 @@
\alias{greedyMix} \alias{greedyMix}
\title{Clustering of individuals} \title{Clustering of individuals}
\usage{ \usage{
greedyMix(data, format, verbose = TRUE) greedyMix(
data,
format,
partitionCompare = NULL,
ninds = 1L,
npops = 1L,
counts = NULL,
sumcounts = NULL,
max_iter = 100L,
alleleCodes = NULL,
inp = NULL,
popnames = NULL,
fixedK = FALSE,
verbose = FALSE
)
} }
\arguments{ \arguments{
\item{data}{data file} \item{data}{data file}
\item{format}{Data format. Format supported: "FASTA", "VCF" ,"BAM", "GenePop"} \item{format}{Data format. Format supported: "FASTA", "VCF" ,"BAM", "GenePop"}
\item{partitionCompare}{a list of partitions to compare}
\item{ninds}{number of individuals}
\item{npops}{number of populations}
\item{counts}{counts}
\item{sumcounts}{sumcounts}
\item{max_iter}{maximum number of iterations}
\item{alleleCodes}{allele codes}
\item{inp}{input file}
\item{popnames}{population names}
\item{fixedK}{if \code{TRUE}, the number of populations is fixed}
\item{verbose}{if \code{TRUE}, prints extra output information} \item{verbose}{if \code{TRUE}, prints extra output information}
} }
\description{ \description{
Clustering of individuals Clustering of individuals
} }
\examples{
data <- system.file("extdata", "FASTA_clustering_haploid.fasta", package = "rBAPS")
greedyMix(data, "fasta")
}
\references{ \references{
Samtools: a suite of programs for interacting Samtools: a suite of programs for interacting
with high-throughput sequencing data. <http://www.htslib.org/> with high-throughput sequencing data. <http://www.htslib.org/>

View file

@ -4,10 +4,12 @@
\alias{handleData} \alias{handleData}
\title{Handle Data} \title{Handle Data}
\usage{ \usage{
handleData(raw_data) handleData(raw_data, format = "Genepop")
} }
\arguments{ \arguments{
\item{raw_data}{Raw data} \item{raw_data}{Raw data in Genepop or BAPS format}
\item{format}{data format}
} }
\description{ \description{
Handle Data Handle Data
@ -20,5 +22,5 @@ After this function. Add blank lines for individuals with fewer rows as
maximum. If the code of an allele is = 0, the function changes that allele maximum. If the code of an allele is = 0, the function changes that allele
code to the smallest code that is larger than any code in use. After this, code to the smallest code that is larger than any code in use. After this,
the function changes the allele codes so that one locus j the function changes the allele codes so that one locus j
codes get values between? 1, ..., Noah (j). codes get values between? 1, ..., noalle(j).
} }

26
man/importFile.Rd Normal file
View file

@ -0,0 +1,26 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/importFile.R
\name{importFile}
\alias{importFile}
\title{Import data file}
\usage{
importFile(data, format, verbose)
}
\arguments{
\item{data}{raw dataset}
\item{format}{data format (guesses from extension if not provided)}
\item{verbose}{if \code{TRUE}, prints extra output information}
}
\value{
The data in a format that can be used by the other functions
}
\description{
Imports data from several formats (FASTA, VCF, SAM, BAM,
Genepop).
}
\examples{
path_inst <- system.file("extdata", "", package = "rBAPS")
importFile(file.path(path_inst, "FASTA_clustering_haploid.fasta"))
}

View file

@ -4,12 +4,15 @@
\alias{load_fasta} \alias{load_fasta}
\title{load_fasta} \title{load_fasta}
\usage{ \usage{
load_fasta(msa, keep.singletons = FALSE) load_fasta(msa, keep_singletons = FALSE, output_numbers = TRUE)
} }
\arguments{ \arguments{
\item{msa}{Either the location of a fasta file or ape DNAbin object containing the multiple sequence alignment data to be clustered} \item{msa}{Either the location of a fasta file or ape DNAbin object containing the multiple sequence alignment data to be clustered}
\item{keep.singletons}{A logical indicating whether to consider singleton mutations in calculating the clusters} \item{keep_singletons}{A logical indicating whether to consider singleton mutations in calculating the clusters}
\item{output_numbers}{A logical indicating whether to output the data as
numbers (TRUE) or letters (FALSE)}
} }
\value{ \value{
A character matrix with filtered SNP data A character matrix with filtered SNP data
@ -19,8 +22,8 @@ Loads a fasta file into matrix format ready for
running the hierBAPS algorithm. running the hierBAPS algorithm.
} }
\examples{ \examples{
msa <- system.file("ext", "seqs.fa", package = "rBAPS") msa <- system.file("extdata", "seqs.fa", package = "rBAPS")
snp.matrix <- load_fasta(msa) snp.matrix <- rBAPS:::load_fasta(msa)
} }
\seealso{ \seealso{
rhierbaps::load_fasta rhierbaps::load_fasta

View file

@ -18,7 +18,7 @@ vector of length `nc` with r.v. realizations from Gamma(rate=1)
Generates random numbers Generates random numbers
} }
\examples{ \examples{
randdir(matrix(c(10, 30, 60), 3), 3) rBAPS:::randdir(matrix(c(10, 30, 60), 3), 3)
} }
\seealso{ \seealso{
randga randga

View file

@ -1,11 +1,11 @@
context("Auxiliary functions to greedyMix") context("Auxiliary functions to greedyMix")
# Defining the relative path to current inst ----------------------------------- # Defining the relative path to current inst -----------------------------------
path_inst <- system.file("ext", "", package = "rBAPS") path_inst <- system.file("extdata", "", package = "rBAPS")
# Reading datasets ------------------------------------------------------------- # Reading datasets -------------------------------------------------------------
baps_diploid <- read.delim( baps_diploid <- read.delim(
file = paste(path_inst, "BAPS_format_clustering_diploid.txt", sep = "/"), file = file.path(path_inst, "BAPS_format_clustering_diploid.txt"),
sep = " ", sep = " ",
header = FALSE header = FALSE
) )
@ -31,26 +31,27 @@ test_that("handleData works as expected", {
expect_equal(data_obs, data_exp) expect_equal(data_obs, data_exp)
}) })
context("Opening files on greedyMix") context("Processing files through greedyMix")
df_fasta <- greedyMix( raw_fasta <- importFile(
data = file.path(path_inst, "FASTA_clustering_haploid.fasta"), data = file.path(path_inst, "FASTA_clustering_haploid.fasta"),
format = "FASTA" format = "FASTA"
) )
df_vcf <- greedyMix( raw_vcf <- importFile(
data = file.path(path_inst, "vcf_example.vcf"), data = file.path(path_inst, "vcf_example.vcf"),
format = "VCF", format = "VCF",
verbose = FALSE verbose = FALSE
) )
df_bam <- greedyMix( df_bam <- importFile(
data = file.path(path_inst, "bam_example.bam"), data = file.path(path_inst, "bam_example.bam"),
format = "BAM", format = "BAM",
) )
test_that("Files are imported correctly", { test_that("Files are imported correctly", {
expect_equal(dim(df_fasta), c(5, 99)) expect_equal(dim(raw_fasta), c(5, 99))
expect_equal(dim(df_vcf), c(variants = 2, fix_cols = 8, gt_cols = 3)) expect_equal(dim(raw_vcf), c(variants = 2, fix_cols = 8, gt_cols = 3))
expect_error( expect_error(
greedyMix( importFile(
data = paste(path_inst, "sam_example.sam", sep = "/"), data = paste(path_inst, "sam_example.sam", sep = "/"),
format = "SAM", format = "SAM",
) )
@ -58,6 +59,15 @@ test_that("Files are imported correctly", {
expect_equal(length(df_bam[[1]]), 13) expect_equal(length(df_bam[[1]]), 13)
}) })
df_fasta <- greedyMix(
data = file.path(path_inst, "FASTA_clustering_haploid.fasta"),
format = "FASTA"
)
test_that("greedyMix() works", {
expect_error(greedyMix(file.path(path_inst, "vcf_example.vcf")))
expect_error(greedyMix(file.path(path_inst, "bam_example.bam")))
})
context("Linkage") context("Linkage")
test_that("Linkages are properly calculated", { test_that("Linkages are properly calculated", {

View file

@ -79,7 +79,7 @@ test_that("lakseKlitik() and subfunctions produce expected output", {
}) })
test_that("testFastaData() produces same output as on MATLAB", { test_that("testFastaData() produces same output as on MATLAB", {
msa <- system.file("ext", "seqs.fa", package = "rBAPS") msa <- system.file("extdata", "seqs.fa", package = "rBAPS")
test_msa <- testFastaData(msa) test_msa <- testFastaData(msa)
expect_equal(test_msa$ninds, 515) expect_equal(test_msa$ninds, 515)
expect_equal(dim(test_msa$data), c(515, 745)) expect_equal(dim(test_msa$data), c(515, 745))