Merge branch 'issue-25' into develop
* issue-25: (38 commits)
Adjusted unit tests for #25
Fixed sink() usage
Fixed docs
Exporting importFile()
Improved handling of supported formats for greedyMix() (#25)
Fixed basic parsing of FASTA files (#25)
Increment version number to 0.0.0.9022
Fixed syntax (#25)
Improved printing (#25)
Partial reversion of b034158 (#25)
Fixed to indMix (#25)
Incorporated handleData() on greedyMix() (#25)
Improved handleData() to handle FASTA (#25)
Added numeric output option to load_fasta() (#25)
Fixed test text (#25)
Added missing documentation for arguments (#25)
Syntax fix (#25)
Delayed resolution of FIXMEs (#25)
Workaround for usage of MATLAB any() (#25)
Fixed argument passing (#25)
...
This commit is contained in:
commit
59fbb0a167
95 changed files with 45170 additions and 45032 deletions
|
|
@ -4,7 +4,6 @@ PITFALLS.md
|
||||||
CHANGELOG.md
|
CHANGELOG.md
|
||||||
CITATION.cff
|
CITATION.cff
|
||||||
.travis.yml
|
.travis.yml
|
||||||
inst/ext/ExamplesDataFormatting
|
inst/testdata
|
||||||
.github
|
.github
|
||||||
aux
|
aux
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
Package: rBAPS
|
Package: rBAPS
|
||||||
Title: Bayesian Analysis of Population Structure
|
Title: Bayesian Analysis of Population Structure
|
||||||
Version: 0.0.0.9020
|
Version: 0.0.0.9022
|
||||||
Date: 2020-11-09
|
Date: 2020-11-09
|
||||||
Authors@R:
|
Authors@R:
|
||||||
c(
|
c(
|
||||||
|
|
|
||||||
39
NAMESPACE
39
NAMESPACE
|
|
@ -1,45 +1,8 @@
|
||||||
# Generated by roxygen2: do not edit by hand
|
# Generated by roxygen2: do not edit by hand
|
||||||
|
|
||||||
export(addAlleles)
|
|
||||||
export(admix1)
|
|
||||||
export(calculatePopLogml)
|
|
||||||
export(computeAllFreqs2)
|
|
||||||
export(computeIndLogml)
|
|
||||||
export(computePersonalAllFreqs)
|
|
||||||
export(computeRows)
|
|
||||||
export(etsiParas)
|
|
||||||
export(fgetl)
|
|
||||||
export(fopen)
|
|
||||||
export(greedyMix)
|
export(greedyMix)
|
||||||
export(greedyPopMix)
|
|
||||||
export(handleData)
|
export(handleData)
|
||||||
export(handlePopData)
|
export(importFile)
|
||||||
export(initPopNames)
|
|
||||||
export(learn_partition_modified)
|
|
||||||
export(learn_simple_partition)
|
|
||||||
export(linkage)
|
|
||||||
export(load_fasta)
|
|
||||||
export(logml2String)
|
|
||||||
export(lueGenePopData)
|
|
||||||
export(lueGenePopDataPop)
|
|
||||||
export(lueNimi)
|
|
||||||
export(noIndex)
|
|
||||||
export(ownNum2Str)
|
|
||||||
export(poistaLiianPienet)
|
|
||||||
export(proportion2str)
|
|
||||||
export(randdir)
|
|
||||||
export(rivinSisaltamienMjonojenLkm)
|
|
||||||
export(selvitaDigitFormat)
|
|
||||||
export(simulateAllFreqs)
|
|
||||||
export(simulateIndividuals)
|
|
||||||
export(simuloiAlleeli)
|
|
||||||
export(suoritaMuutos)
|
|
||||||
export(takeLine)
|
|
||||||
export(testaaKoordinaatit)
|
|
||||||
export(testaaOnkoKunnollinenBapsData)
|
|
||||||
export(testaaPop)
|
|
||||||
export(writeMixtureInfo)
|
|
||||||
export(writeMixtureInfoPop)
|
|
||||||
importFrom(R6,R6Class)
|
importFrom(R6,R6Class)
|
||||||
importFrom(Rsamtools,scanBam)
|
importFrom(Rsamtools,scanBam)
|
||||||
importFrom(adegenet,.readExt)
|
importFrom(adegenet,.readExt)
|
||||||
|
|
|
||||||
6
NEWS.md
Normal file
6
NEWS.md
Normal file
|
|
@ -0,0 +1,6 @@
|
||||||
|
# rBAPS (development version)
|
||||||
|
|
||||||
|
# rBAPS 0.0.0.9021
|
||||||
|
|
||||||
|
* Added a `NEWS.md` file to track changes to the package.
|
||||||
|
* Exported `greedyMix()` and `load_fasta()` functions.
|
||||||
|
|
@ -4,7 +4,6 @@
|
||||||
#' @param line line
|
#' @param line line
|
||||||
#' @param divider divider
|
#' @param divider divider
|
||||||
#' @return data (after alleles were added)
|
#' @return data (after alleles were added)
|
||||||
#' @export
|
|
||||||
addAlleles <- function(data, ind, line, divider) {
|
addAlleles <- function(data, ind, line, divider) {
|
||||||
# Lisaa BAPS-formaatissa olevaan datataulukkoon
|
# Lisaa BAPS-formaatissa olevaan datataulukkoon
|
||||||
# yksil<69><6C> ind vastaavat rivit. Yksil<69>n alleelit
|
# yksil<69><6C> ind vastaavat rivit. Yksil<69>n alleelit
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,6 @@
|
||||||
#' alleleCodes, adjprior, popnames, rowsFromInd, data, npops, noalle
|
#' alleleCodes, adjprior, popnames, rowsFromInd, data, npops, noalle
|
||||||
#' @param tietue tietue
|
#' @param tietue tietue
|
||||||
#' @importFrom methods is
|
#' @importFrom methods is
|
||||||
#' @export
|
|
||||||
admix1 <- function(tietue) {
|
admix1 <- function(tietue) {
|
||||||
if (!is.list(tietue)) {
|
if (!is.list(tietue)) {
|
||||||
message("Load mixture result file. These are the files in this directory:")
|
message("Load mixture result file. These are the files in this directory:")
|
||||||
|
|
|
||||||
|
|
@ -4,7 +4,6 @@
|
||||||
#' for the mean parameter.
|
#' for the mean parameter.
|
||||||
#' @param points points
|
#' @param points points
|
||||||
#' @param fii fii
|
#' @param fii fii
|
||||||
#' @export
|
|
||||||
calculatePopLogml <- function(points, fii) {
|
calculatePopLogml <- function(points, fii) {
|
||||||
n <- length(points)
|
n <- length(points)
|
||||||
fuzzy_ones <- sum(points)
|
fuzzy_ones <- sum(points)
|
||||||
|
|
|
||||||
23
R/comparePartitions.R
Normal file
23
R/comparePartitions.R
Normal file
|
|
@ -0,0 +1,23 @@
|
||||||
|
comparePartitions <- function(data, c.rows, partitionCompare.partitions, ninds, rowsFromInd, noalle, adjprior) {
|
||||||
|
stop("Comparing partitions not yet implemented") # TODO: implement
|
||||||
|
# nsamplingunits = size(c.rows,1);
|
||||||
|
# partitions = partitionCompare.partitions;
|
||||||
|
# npartitions = size(partitions,2);
|
||||||
|
# partitionLogml = zeros(1,npartitions);
|
||||||
|
# for i = 1:npartitions
|
||||||
|
# % number of unique partition lables
|
||||||
|
# npops = length(unique(partitions(:,i)));
|
||||||
|
|
||||||
|
# partitionInd = zeros(ninds*rowsFromInd,1);
|
||||||
|
# partitionSample = partitions(:,i);
|
||||||
|
# for j = 1:nsamplingunits
|
||||||
|
# partitionInd([c.rows(j,1):c.rows(j,2)]) = partitionSample(j);
|
||||||
|
# end
|
||||||
|
# partitionLogml(i) = initialCounts(partitionInd, data(:,1:end-1), npops, c.rows, noalle, adjprior);
|
||||||
|
|
||||||
|
# end
|
||||||
|
# % return the logml result
|
||||||
|
# partitionCompare.logmls = partitionLogml;
|
||||||
|
# set(h1, 'userdata', partitionCompare);
|
||||||
|
# return
|
||||||
|
}
|
||||||
|
|
@ -2,7 +2,6 @@
|
||||||
#' @description Lisää a priori jokaista alleelia joka populaation joka lokukseen
|
#' @description Lisää a priori jokaista alleelia joka populaation joka lokukseen
|
||||||
#' j 1/noalle(j) verran.
|
#' j 1/noalle(j) verran.
|
||||||
#' @param noalle noalle
|
#' @param noalle noalle
|
||||||
#' @export
|
|
||||||
computeAllFreqs2 <- function(noalle) {
|
computeAllFreqs2 <- function(noalle) {
|
||||||
COUNTS <- ifelse(isGlobalEmpty(COUNTS), vector(), COUNTS)
|
COUNTS <- ifelse(isGlobalEmpty(COUNTS), vector(), COUNTS)
|
||||||
SUMCOUNTS <- ifelse(isGlobalEmpty(SUMCOUNTS), vector(), COUNTS)
|
SUMCOUNTS <- ifelse(isGlobalEmpty(SUMCOUNTS), vector(), COUNTS)
|
||||||
|
|
|
||||||
|
|
@ -3,7 +3,6 @@
|
||||||
#' määritellyiksi kuten osuusTaulu:ssa.
|
#' määritellyiksi kuten osuusTaulu:ssa.
|
||||||
#' @param omaFreqs own Freqs?
|
#' @param omaFreqs own Freqs?
|
||||||
#' @param osuusTaulu Percentage table?
|
#' @param osuusTaulu Percentage table?
|
||||||
#' @export
|
|
||||||
computeIndLogml <- function(omaFreqs, osuusTaulu) {
|
computeIndLogml <- function(omaFreqs, osuusTaulu) {
|
||||||
omaFreqs <- as.matrix(omaFreqs)
|
omaFreqs <- as.matrix(omaFreqs)
|
||||||
osuusTaulu <- as.matrix(osuusTaulu)
|
osuusTaulu <- as.matrix(osuusTaulu)
|
||||||
|
|
|
||||||
|
|
@ -7,8 +7,6 @@
|
||||||
#' @param data data
|
#' @param data data
|
||||||
#' @param allFreqs allFreqs
|
#' @param allFreqs allFreqs
|
||||||
#' @param rowsFromInd rowsFromInd
|
#' @param rowsFromInd rowsFromInd
|
||||||
#' @export
|
|
||||||
|
|
||||||
computePersonalAllFreqs <- function(ind, data, allFreqs, rowsFromInd) {
|
computePersonalAllFreqs <- function(ind, data, allFreqs, rowsFromInd) {
|
||||||
if (isGlobalEmpty(COUNTS)) {
|
if (isGlobalEmpty(COUNTS)) {
|
||||||
nloci <- npops <- 1
|
nloci <- npops <- 1
|
||||||
|
|
|
||||||
|
|
@ -4,9 +4,13 @@ computePopulationLogml <- function(pops, adjprior, priorTerm) {
|
||||||
# ======================================================== #
|
# ======================================================== #
|
||||||
# Limiting COUNTS size #
|
# Limiting COUNTS size #
|
||||||
# ======================================================== #
|
# ======================================================== #
|
||||||
COUNTS <- COUNTS[
|
if (!is.null(adjprior)) {
|
||||||
seq_len(nrow(adjprior)), seq_len(ncol(adjprior)), pops, drop = FALSE
|
nr <- seq_len(nrow(adjprior))
|
||||||
]
|
nc <- seq_len(ncol(adjprior))
|
||||||
|
COUNTS <- COUNTS[nr, nc, pops, drop = FALSE]
|
||||||
|
} else {
|
||||||
|
COUNTS <- NA
|
||||||
|
}
|
||||||
|
|
||||||
x <- size(COUNTS, 1)
|
x <- size(COUNTS, 1)
|
||||||
y <- size(COUNTS, 2)
|
y <- size(COUNTS, 2)
|
||||||
|
|
@ -15,25 +19,24 @@ computePopulationLogml <- function(pops, adjprior, priorTerm) {
|
||||||
# ======================================================== #
|
# ======================================================== #
|
||||||
# Computation #
|
# Computation #
|
||||||
# ======================================================== #
|
# ======================================================== #
|
||||||
isarray <- length(dim(repmat(adjprior, c(1, 1, length(pops))))) > 2
|
term1 <- NULL
|
||||||
term1 <- squeeze(
|
if (!is.null(adjprior)) {
|
||||||
sum(
|
isarray <- length(dim(repmat(adjprior, c(1, 1, length(pops))))) > 2
|
||||||
|
term1 <- squeeze(
|
||||||
sum(
|
sum(
|
||||||
reshape(
|
sum(
|
||||||
lgamma(
|
reshape(
|
||||||
repmat(adjprior, c(1, 1, length(pops))) +
|
lgamma(
|
||||||
COUNTS[
|
repmat(adjprior, c(1, 1, length(pops))) + COUNTS[nr, nc, pops, drop = !isarray]
|
||||||
seq_len(nrow(adjprior)), seq_len(ncol(adjprior)), pops,
|
),
|
||||||
drop = !isarray
|
c(x, y, z)
|
||||||
]
|
|
||||||
),
|
),
|
||||||
c(x, y, z)
|
1
|
||||||
),
|
),
|
||||||
1
|
2
|
||||||
),
|
)
|
||||||
2
|
|
||||||
)
|
)
|
||||||
)
|
}
|
||||||
if (is.null(priorTerm)) priorTerm <- 0
|
if (is.null(priorTerm)) priorTerm <- 0
|
||||||
popLogml <- term1 - sum(lgamma(1 + SUMCOUNTS[pops, ]), 2) - priorTerm
|
popLogml <- term1 - sum(lgamma(1 + SUMCOUNTS[pops, ]), 2) - priorTerm
|
||||||
return(popLogml)
|
return(popLogml)
|
||||||
|
|
|
||||||
|
|
@ -4,7 +4,6 @@
|
||||||
#' @param rowsFromInd rowsFromInd
|
#' @param rowsFromInd rowsFromInd
|
||||||
#' @param inds matrix
|
#' @param inds matrix
|
||||||
#' @param ninds ninds
|
#' @param ninds ninds
|
||||||
#' @export
|
|
||||||
computeRows <- function(rowsFromInd, inds, ninds) {
|
computeRows <- function(rowsFromInd, inds, ninds) {
|
||||||
if (!is(inds, "matrix")) inds <- as.matrix(inds)
|
if (!is(inds, "matrix")) inds <- as.matrix(inds)
|
||||||
if (identical(dim(inds), c(nrow(inds), 1L))) {
|
if (identical(dim(inds), c(nrow(inds), 1L))) {
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,3 @@
|
||||||
#' @export
|
|
||||||
#' @title Etsi Paras
|
#' @title Etsi Paras
|
||||||
#' @description Search for the best?
|
#' @description Search for the best?
|
||||||
#' @param osuus Percentages?
|
#' @param osuus Percentages?
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,6 @@
|
||||||
#' fgetl returns tline as a numeric value -1.
|
#' fgetl returns tline as a numeric value -1.
|
||||||
#' @author Waldir Leoncio
|
#' @author Waldir Leoncio
|
||||||
#' @seealso fopen
|
#' @seealso fopen
|
||||||
#' @export
|
|
||||||
fgetl <- function(file) {
|
fgetl <- function(file) {
|
||||||
# ==========================================================================
|
# ==========================================================================
|
||||||
# Validation
|
# Validation
|
||||||
|
|
@ -27,5 +26,4 @@ fgetl <- function(file) {
|
||||||
#' @return The same as `readLines(filename)`
|
#' @return The same as `readLines(filename)`
|
||||||
#' @author Waldir Leoncio
|
#' @author Waldir Leoncio
|
||||||
#' @seealso fgetl
|
#' @seealso fgetl
|
||||||
#' @export
|
|
||||||
fopen <- function(filename) readLines(filename)
|
fopen <- function(filename) readLines(filename)
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,16 @@
|
||||||
#' @title Clustering of individuals
|
#' @title Clustering of individuals
|
||||||
#' @param data data file
|
#' @param data data file
|
||||||
#' @param format Data format. Format supported: "FASTA", "VCF" ,"BAM", "GenePop"
|
#' @param format Data format. Format supported: "FASTA", "VCF" ,"BAM", "GenePop"
|
||||||
|
#' @param partitionCompare a list of partitions to compare
|
||||||
|
#' @param ninds number of individuals
|
||||||
|
#' @param npops number of populations
|
||||||
|
#' @param counts counts
|
||||||
|
#' @param sumcounts sumcounts
|
||||||
|
#' @param max_iter maximum number of iterations
|
||||||
|
#' @param alleleCodes allele codes
|
||||||
|
#' @param inp input file
|
||||||
|
#' @param popnames population names
|
||||||
|
#' @param fixedK if \code{TRUE}, the number of populations is fixed
|
||||||
#' @param verbose if \code{TRUE}, prints extra output information
|
#' @param verbose if \code{TRUE}, prints extra output information
|
||||||
#' @importFrom utils read.delim
|
#' @importFrom utils read.delim
|
||||||
#' @importFrom vcfR read.vcfR
|
#' @importFrom vcfR read.vcfR
|
||||||
|
|
@ -9,41 +19,59 @@
|
||||||
#' @references Samtools: a suite of programs for interacting
|
#' @references Samtools: a suite of programs for interacting
|
||||||
#' with high-throughput sequencing data. <http://www.htslib.org/>
|
#' with high-throughput sequencing data. <http://www.htslib.org/>
|
||||||
#' @export
|
#' @export
|
||||||
greedyMix <- function(data, format, verbose = TRUE) {
|
#' @examples
|
||||||
# Parsing data format ------------------------------------------------------
|
#' data <- system.file("extdata", "FASTA_clustering_haploid.fasta", package = "rBAPS")
|
||||||
|
#' greedyMix(data, "fasta")
|
||||||
|
greedyMix <- function(
|
||||||
|
data, format, partitionCompare = NULL, ninds = 1L, npops = 1L,
|
||||||
|
counts = NULL, sumcounts = NULL, max_iter = 100L, alleleCodes = NULL,
|
||||||
|
inp = NULL, popnames = NULL, fixedK = FALSE, verbose = FALSE
|
||||||
|
) {
|
||||||
|
# Importing and handling data ================================================
|
||||||
|
data <- importFile(data, format, verbose)
|
||||||
|
data <- handleData(data, tolower(format))
|
||||||
|
c <- list(
|
||||||
|
noalle = data[["noalle"]],
|
||||||
|
data = data[["newData"]],
|
||||||
|
adjprior = data[["adjprior"]],
|
||||||
|
priorTerm = data[["priorTerm"]],
|
||||||
|
rowsFromInd = data[["rowsFromInd"]]
|
||||||
|
)
|
||||||
|
|
||||||
if (missing(format)) {
|
# Comparing partitions =======================================================
|
||||||
format <- gsub(".*\\.(.+)$", "\\1", data)
|
if (!is.null(partitionCompare)) {
|
||||||
message("Format not provided. Guessing from file extension: ", format)
|
logmls <- comparePartitions(
|
||||||
}
|
c[["data"]], nrow(c[["data"]]), partitionCompare[["partitions"]], ninds,
|
||||||
format <- tolower(format)
|
c[["rowsFromInd"]], c[["noalle"]], c[["adjprior"]]
|
||||||
|
|
||||||
# Dispatching to proper loading function -----------------------------------
|
|
||||||
|
|
||||||
if (format == "fasta") {
|
|
||||||
out <- load_fasta(data)
|
|
||||||
} else if (format == "vcf") {
|
|
||||||
out <- vcfR::read.vcfR(data, verbose = verbose)
|
|
||||||
} else if (format == "sam") {
|
|
||||||
stop(
|
|
||||||
"SAM files not directly supported. ",
|
|
||||||
"Install the samtools software and execute\n\n",
|
|
||||||
"samtools view -b ", data, " > out_file.bam\n\nto convert to BAM ",
|
|
||||||
"and try running this function again with 'format=BAM'"
|
|
||||||
)
|
)
|
||||||
} else if (format == "bam") {
|
|
||||||
out <- Rsamtools::scanBam(data)
|
|
||||||
} else if (format == "genepop") {
|
|
||||||
if (toupper(adegenet::.readExt(data)) == "TXT") {
|
|
||||||
message("Creating a copy of the file with the .gen extension")
|
|
||||||
dataGen <- gsub("txt", "gen", data)
|
|
||||||
file.copy(data, dataGen)
|
|
||||||
out <- adegenet::read.genepop(dataGen)
|
|
||||||
} else {
|
|
||||||
out <- adegenet::read.genepop(data)
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
stop("Format not supported.")
|
|
||||||
}
|
}
|
||||||
return(out)
|
|
||||||
|
|
||||||
|
# Generating partition summary ===============================================
|
||||||
|
ekat <- seq(1L, c[["rowsFromInd"]], ninds * c[["rowsFromInd"]]) # ekat = (1:rowsFromInd:ninds*rowsFromInd)';
|
||||||
|
c[["rows"]] <- c(ekat, ekat + c[["rowsFromInd"]] - 1L) # c.rows = [ekat ekat+rowsFromInd-1]
|
||||||
|
logml_npops_partitionSummary <- indMixWrapper(c, npops, counts, sumcounts, max_iter, fixedK, verbose)
|
||||||
|
logml <- logml_npops_partitionSummary[["logml"]]
|
||||||
|
npops <- logml_npops_partitionSummary[["npops"]]
|
||||||
|
partitionSummary <- logml_npops_partitionSummary[["partitionSummary"]]
|
||||||
|
|
||||||
|
# Generating output object ===================================================
|
||||||
|
out <- list(
|
||||||
|
"alleleCodes" = alleleCodes, "adjprior" = c[["adjprior"]],
|
||||||
|
"popnames" = popnames, "rowsFromInd" = c[["rowsFromInd"]],
|
||||||
|
"data" = c[["data"]], "npops" = npops, "noalle" = c[["noalle"]],
|
||||||
|
"mixtureType" = "mix", "logml" = logml
|
||||||
|
)
|
||||||
|
if (logml == 1) {
|
||||||
|
return(out)
|
||||||
|
}
|
||||||
|
|
||||||
|
# Writing mixture info =======================================================
|
||||||
|
changesInLogml <- writeMixtureInfo(
|
||||||
|
logml, c[["rowsFromInd"]], c[["data"]], c[["adjprior"]], c[["priorTerm"]],
|
||||||
|
NULL, inp, partitionSummary, popnames, fixedK
|
||||||
|
)
|
||||||
|
|
||||||
|
# Updateing results ==========================================================
|
||||||
|
return(c(out, "changesInLogml" = changesInLogml))
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -11,7 +11,6 @@
|
||||||
#' @importFrom matlab2r uiputfile
|
#' @importFrom matlab2r uiputfile
|
||||||
#' @references Samtools: a suite of programs for interacting
|
#' @references Samtools: a suite of programs for interacting
|
||||||
#' with high-throughput sequencing data. <http://www.htslib.org/>
|
#' with high-throughput sequencing data. <http://www.htslib.org/>
|
||||||
#' @export
|
|
||||||
greedyPopMix <- function(data, format, partitionCompare = NULL, verbose = TRUE
|
greedyPopMix <- function(data, format, partitionCompare = NULL, verbose = TRUE
|
||||||
) {
|
) {
|
||||||
# Replacing original file reading code with greedyMix()
|
# Replacing original file reading code with greedyMix()
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,6 @@
|
||||||
#' @title Handle Data
|
#' @title Handle Data
|
||||||
#' @param raw_data Raw data
|
#' @param raw_data Raw data in Genepop or BAPS format
|
||||||
|
#' @param format data format
|
||||||
#' @details The last column of the original data tells you from which
|
#' @details The last column of the original data tells you from which
|
||||||
#' individual that line is from. The function first examines how many line
|
#' individual that line is from. The function first examines how many line
|
||||||
#' maximum is from one individual giving know if it is haploid, diploid, etc.
|
#' maximum is from one individual giving know if it is haploid, diploid, etc.
|
||||||
|
|
@ -7,9 +8,9 @@
|
||||||
#' maximum. If the code of an allele is = 0, the function changes that allele
|
#' maximum. If the code of an allele is = 0, the function changes that allele
|
||||||
#' code to the smallest code that is larger than any code in use. After this,
|
#' code to the smallest code that is larger than any code in use. After this,
|
||||||
#' the function changes the allele codes so that one locus j
|
#' the function changes the allele codes so that one locus j
|
||||||
#' codes get values between? 1, ..., Noah (j).
|
#' codes get values between? 1, ..., noalle(j).
|
||||||
#' @export
|
#' @export
|
||||||
handleData <- function(raw_data) {
|
handleData <- function(raw_data, format = "Genepop") {
|
||||||
# Alkuper?isen datan viimeinen sarake kertoo, milt?yksil?lt?
|
# Alkuper?isen datan viimeinen sarake kertoo, milt?yksil?lt?
|
||||||
# kyseinen rivi on per?isin. Funktio tutkii ensin, ett?montako
|
# kyseinen rivi on per?isin. Funktio tutkii ensin, ett?montako
|
||||||
# rivi?maksimissaan on per?isin yhdelt?yksil?lt? jolloin saadaan
|
# rivi?maksimissaan on per?isin yhdelt?yksil?lt? jolloin saadaan
|
||||||
|
|
@ -20,28 +21,29 @@ handleData <- function(raw_data) {
|
||||||
# koodi pienimm?ksi koodiksi, joka isompi kuin mik??n k?yt?ss?oleva koodi.
|
# koodi pienimm?ksi koodiksi, joka isompi kuin mik??n k?yt?ss?oleva koodi.
|
||||||
# T?m?n j?lkeen funktio muuttaa alleelikoodit siten, ett?yhden lokuksen j
|
# T?m?n j?lkeen funktio muuttaa alleelikoodit siten, ett?yhden lokuksen j
|
||||||
# koodit saavat arvoja v?lill?1,...,noalle(j).
|
# koodit saavat arvoja v?lill?1,...,noalle(j).
|
||||||
|
nloci <- switch(
|
||||||
|
tolower(format),
|
||||||
|
"genepop" = ncol(raw_data) - 1L,
|
||||||
|
"baps" = ncol(raw_data) - 1L,
|
||||||
|
"fasta" = ncol(raw_data),
|
||||||
|
"vcf" = stop("VCF format not supported for processing yet"),
|
||||||
|
"bam" = stop("BAM format not supported for processing yet")
|
||||||
|
)
|
||||||
data <- as.matrix(raw_data)
|
data <- as.matrix(raw_data)
|
||||||
nloci <- size(raw_data, 2) - 1
|
|
||||||
|
|
||||||
dataApu <- data[, 1:nloci]
|
dataApu <- data[, 1:nloci]
|
||||||
nollat <- matlab2r::find(dataApu == 0)
|
nollat <- matlab2r::find(dataApu == 0)
|
||||||
if (!isempty(nollat)) {
|
if (!isempty(nollat)) {
|
||||||
isoinAlleeli <- base::max(max(dataApu))
|
isoinAlleeli <- base::max(base::max(dataApu))
|
||||||
dataApu[nollat] <- isoinAlleeli + 1
|
dataApu[nollat] <- isoinAlleeli + 1
|
||||||
data[, 1:nloci] <- dataApu
|
data[, 1:nloci] <- dataApu
|
||||||
}
|
}
|
||||||
# dataApu <- []
|
|
||||||
# nollat <- []
|
|
||||||
# isoinAlleeli <- []
|
|
||||||
|
|
||||||
noalle <- zeros(1, nloci)
|
noalle <- zeros(1, nloci)
|
||||||
alleelitLokuksessa <- cell(nloci, 1, expandable = TRUE)
|
alleelitLokuksessa <- cell(nloci, 1, expandable = TRUE)
|
||||||
for (i in 1:nloci) {
|
for (i in 1:nloci) {
|
||||||
alleelitLokuksessaI <- unique(data[, i])
|
alleelitLokuksessaI <- unique(data[, i])
|
||||||
alleelitLokuksessa[[i]] <- sort(alleelitLokuksessaI[
|
alleelitLokuksessa[[i]] <- sort(alleelitLokuksessaI[
|
||||||
matlab2r::find(
|
matlab2r::find(alleelitLokuksessaI >= 0)
|
||||||
alleelitLokuksessaI >= 0
|
|
||||||
)
|
|
||||||
])
|
])
|
||||||
noalle[i] <- length(alleelitLokuksessa[[i]])
|
noalle[i] <- length(alleelitLokuksessa[[i]])
|
||||||
}
|
}
|
||||||
|
|
@ -49,9 +51,7 @@ handleData <- function(raw_data) {
|
||||||
for (i in 1:nloci) {
|
for (i in 1:nloci) {
|
||||||
alleelitLokuksessaI <- alleelitLokuksessa[[i]]
|
alleelitLokuksessaI <- alleelitLokuksessa[[i]]
|
||||||
puuttuvia <- base::max(noalle) - length(alleelitLokuksessaI)
|
puuttuvia <- base::max(noalle) - length(alleelitLokuksessaI)
|
||||||
alleleCodes[, i] <- as.matrix(
|
alleleCodes[, i] <- as.matrix(c(alleelitLokuksessaI, zeros(puuttuvia, 1)))
|
||||||
c(alleelitLokuksessaI, zeros(puuttuvia, 1))
|
|
||||||
)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for (loc in seq_len(nloci)) {
|
for (loc in seq_len(nloci)) {
|
||||||
|
|
@ -60,7 +60,7 @@ handleData <- function(raw_data) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
nind <- base::max(data[, ncol(data)])
|
nind <- as.integer(base::max(data[, ncol(data)]))
|
||||||
nrows <- size(data, 1)
|
nrows <- size(data, 1)
|
||||||
ncols <- size(data, 2)
|
ncols <- size(data, 2)
|
||||||
rowsFromInd <- zeros(nind, 1)
|
rowsFromInd <- zeros(nind, 1)
|
||||||
|
|
@ -71,11 +71,11 @@ handleData <- function(raw_data) {
|
||||||
a <- -999
|
a <- -999
|
||||||
emptyRow <- repmat(a, c(1, ncols))
|
emptyRow <- repmat(a, c(1, ncols))
|
||||||
lessThanMax <- matlab2r::find(rowsFromInd < maxRowsFromInd)
|
lessThanMax <- matlab2r::find(rowsFromInd < maxRowsFromInd)
|
||||||
missingRows <- maxRowsFromInd * nind - nrows
|
missingRows <- max(maxRowsFromInd * nind - nrows, 0L)
|
||||||
data <- rbind(data, zeros(missingRows, ncols))
|
data <- rbind(data, zeros(missingRows, ncols))
|
||||||
pointer <- 1
|
pointer <- 1
|
||||||
for (ind in t(lessThanMax)) { # K?y l?pi ne yksil?t, joilta puuttuu rivej?
|
for (ind in t(lessThanMax)) { # K?y l?pi ne yksil?t, joilta puuttuu rivej?
|
||||||
miss <- maxRowsFromInd - rowsFromInd(ind) # T?lt?yksil?lt?puuttuvien lkm.
|
miss <- maxRowsFromInd - rowsFromInd[ind] # T?lt?yksil?lt?puuttuvien lkm.
|
||||||
}
|
}
|
||||||
data <- sortrows(data, ncols) # Sorttaa yksil?iden mukaisesti
|
data <- sortrows(data, ncols) # Sorttaa yksil?iden mukaisesti
|
||||||
newData <- data
|
newData <- data
|
||||||
|
|
|
||||||
|
|
@ -4,7 +4,6 @@
|
||||||
#' codes so that the codes for one locus have values between 1 and noalle[j].
|
#' codes so that the codes for one locus have values between 1 and noalle[j].
|
||||||
#' Before this change, an allele whose code is zero is changed.
|
#' Before this change, an allele whose code is zero is changed.
|
||||||
#' @param raw_data raw data
|
#' @param raw_data raw data
|
||||||
#' @export
|
|
||||||
handlePopData <- function(raw_data) {
|
handlePopData <- function(raw_data) {
|
||||||
# Alkuperäisen datan viimeinen sarake kertoo, milt?yksilölt?
|
# Alkuperäisen datan viimeinen sarake kertoo, milt?yksilölt?
|
||||||
# kyseinen rivi on peräisin. Funktio muuttaa alleelikoodit
|
# kyseinen rivi on peräisin. Funktio muuttaa alleelikoodit
|
||||||
|
|
|
||||||
49
R/importFile.R
Normal file
49
R/importFile.R
Normal file
|
|
@ -0,0 +1,49 @@
|
||||||
|
#' @title Import data file
|
||||||
|
#' @description Imports data from several formats (FASTA, VCF, SAM, BAM,
|
||||||
|
#' Genepop).
|
||||||
|
#' @param data raw dataset
|
||||||
|
#' @param format data format (guesses from extension if not provided)
|
||||||
|
#' @param verbose if \code{TRUE}, prints extra output information
|
||||||
|
#' @return The data in a format that can be used by the other functions
|
||||||
|
#' @export
|
||||||
|
#' @examples
|
||||||
|
#' path_inst <- system.file("extdata", "", package = "rBAPS")
|
||||||
|
#' importFile(file.path(path_inst, "FASTA_clustering_haploid.fasta"))
|
||||||
|
importFile <- function(data, format, verbose) {
|
||||||
|
# Parsing data format ------------------------------------------------------
|
||||||
|
|
||||||
|
if (missing(format)) {
|
||||||
|
format <- gsub(".*\\.(.+)$", "\\1", data)
|
||||||
|
message("Format not provided. Guessing from file extension: ", format)
|
||||||
|
}
|
||||||
|
format <- tolower(format)
|
||||||
|
|
||||||
|
# Dispatching to proper loading function -----------------------------------
|
||||||
|
|
||||||
|
if (format == "fasta") {
|
||||||
|
out <- load_fasta(data)
|
||||||
|
} else if (format == "vcf") {
|
||||||
|
out <- vcfR::read.vcfR(data, verbose = verbose)
|
||||||
|
} else if (format == "sam") {
|
||||||
|
stop(
|
||||||
|
"SAM files not directly supported. ",
|
||||||
|
"Install the samtools software and execute\n\n",
|
||||||
|
"samtools view -b ", data, " > out_file.bam\n\nto convert to BAM ",
|
||||||
|
"and try running this function again with 'format=BAM'"
|
||||||
|
)
|
||||||
|
} else if (format == "bam") {
|
||||||
|
out <- Rsamtools::scanBam(data)
|
||||||
|
} else if (format == "genepop") {
|
||||||
|
if (toupper(adegenet::.readExt(data)) == "TXT") {
|
||||||
|
message("Creating a copy of the file with the .gen extension")
|
||||||
|
dataGen <- gsub("txt", "gen", data)
|
||||||
|
file.copy(data, dataGen)
|
||||||
|
out <- adegenet::read.genepop(dataGen)
|
||||||
|
} else {
|
||||||
|
out <- adegenet::read.genepop(data)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
stop("Format not supported.")
|
||||||
|
}
|
||||||
|
return(out)
|
||||||
|
}
|
||||||
50
R/indMix.R
50
R/indMix.R
|
|
@ -1,4 +1,4 @@
|
||||||
indMix <- function(c, npops, dispText = TRUE) {
|
indMix <- function(c, npops, counts = NULL, sumcounts = NULL, max_iter = 100L, dispText = FALSE) {
|
||||||
# Greedy search algorithm with unknown number of classes for regular
|
# Greedy search algorithm with unknown number of classes for regular
|
||||||
# clustering.
|
# clustering.
|
||||||
# Input npops is not used if called by greedyMix or greedyPopMix.
|
# Input npops is not used if called by greedyMix or greedyPopMix.
|
||||||
|
|
@ -17,8 +17,11 @@ indMix <- function(c, npops, dispText = TRUE) {
|
||||||
if (isfield(c, "dist")) {
|
if (isfield(c, "dist")) {
|
||||||
dist <- c$dist
|
dist <- c$dist
|
||||||
Z <- c$Z
|
Z <- c$Z
|
||||||
|
} else {
|
||||||
|
Z <- NULL
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
rm(c)
|
rm(c)
|
||||||
nargin <- length(as.list(match.call())) - 1
|
nargin <- length(as.list(match.call())) - 1
|
||||||
if (nargin < 2) {
|
if (nargin < 2) {
|
||||||
|
|
@ -65,14 +68,14 @@ indMix <- function(c, npops, dispText = TRUE) {
|
||||||
nruns <- length(npopsTaulu)
|
nruns <- length(npopsTaulu)
|
||||||
|
|
||||||
initData <- data
|
initData <- data
|
||||||
data <- data[, 1:(ncol(data) - 1)]
|
data <- data[, seq_along(noalle)] # Original code always dropped last column.
|
||||||
|
|
||||||
logmlBest <- -1e50
|
logmlBest <- -1e50
|
||||||
partitionSummary <- -1e50 * ones(30, 2) # Tiedot 30 parhaasta partitiosta (npops ja logml)
|
partitionSummary <- -1e50 * ones(30, 2) # Tiedot 30 parhaasta partitiosta (npops ja logml)
|
||||||
partitionSummary[, 1] <- zeros(30, 1)
|
partitionSummary[, 1] <- zeros(30, 1)
|
||||||
worstLogml <- -1e50
|
worstLogml <- -1e50
|
||||||
worstIndex <- 1
|
worstIndex <- 1
|
||||||
for (run in 1:nruns) {
|
for (run in seq_along(nruns)) {
|
||||||
npops <- npopsTaulu[[run]]
|
npops <- npopsTaulu[[run]]
|
||||||
if (dispText) {
|
if (dispText) {
|
||||||
dispLine()
|
dispLine()
|
||||||
|
|
@ -84,6 +87,7 @@ indMix <- function(c, npops, dispText = TRUE) {
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
ninds <- size(rows, 1)
|
ninds <- size(rows, 1)
|
||||||
|
|
||||||
initialPartition <- admixture_initialization(initData, npops, Z)
|
initialPartition <- admixture_initialization(initData, npops, Z)
|
||||||
sumcounts_counts_logml <- initialCounts(
|
sumcounts_counts_logml <- initialCounts(
|
||||||
initialPartition, data, npops, rows, noalle, adjprior
|
initialPartition, data, npops, rows, noalle, adjprior
|
||||||
|
|
@ -93,16 +97,15 @@ indMix <- function(c, npops, dispText = TRUE) {
|
||||||
logml <- sumcounts_counts_logml$logml
|
logml <- sumcounts_counts_logml$logml
|
||||||
|
|
||||||
PARTITION <- zeros(ninds, 1)
|
PARTITION <- zeros(ninds, 1)
|
||||||
for (i in 1:ninds) {
|
for (i in seq_len(ninds)) {
|
||||||
apu <- rows[i]
|
apu <- rows[i]
|
||||||
PARTITION[i] <- initialPartition[apu[1]]
|
PARTITION[i] <- initialPartition[apu[1]]
|
||||||
}
|
}
|
||||||
|
|
||||||
COUNTS <- counts
|
COUNTS <- counts
|
||||||
SUMCOUNTS <- sumcounts
|
SUMCOUNTS <- sumcounts
|
||||||
POP_LOGML <- computePopulationLogml(1:npops, adjprior, priorTerm)
|
POP_LOGML <- computePopulationLogml(seq_len(npops), adjprior, priorTerm)
|
||||||
LOGDIFF <- repmat(-Inf, c(ninds, npops))
|
LOGDIFF <- repmat(-Inf, c(ninds, npops))
|
||||||
rm(initialPartition, counts, sumcounts)
|
|
||||||
|
|
||||||
# PARHAAN MIXTURE-PARTITION ETSIMINEN
|
# PARHAAN MIXTURE-PARTITION ETSIMINEN
|
||||||
nRoundTypes <- 7
|
nRoundTypes <- 7
|
||||||
|
|
@ -120,30 +123,34 @@ indMix <- function(c, npops, dispText = TRUE) {
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
iter <- 1L
|
||||||
while (ready != 1) {
|
while (ready != 1) {
|
||||||
# FIXME: loop caught in here
|
iter <- iter + 1L
|
||||||
|
if (iter > max_iter) {
|
||||||
|
warning("max_iter reached. Stopping prematurely.")
|
||||||
|
break
|
||||||
|
}
|
||||||
muutoksia <- 0
|
muutoksia <- 0
|
||||||
|
|
||||||
if (dispText) {
|
if (dispText) {
|
||||||
message(paste("\nPerforming steps:", as.character(roundTypes)))
|
message("Performing steps: ", paste(roundTypes, collapse = " "))
|
||||||
}
|
}
|
||||||
|
|
||||||
for (n in 1:length(roundTypes)) {
|
for (n in seq_along(roundTypes)) {
|
||||||
round <- roundTypes[n]
|
round <- roundTypes[n]
|
||||||
kivaluku <- 0
|
kivaluku <- 0
|
||||||
|
|
||||||
if (kokeiltu[round] == 1) { # Askelta kokeiltu viime muutoksen j<>lkeen
|
if (kokeiltu[round] == 1) { # Askelta kokeiltu viime muutoksen j<>lkeen
|
||||||
} else if (round == 0 | round == 1) { # Yksil<69>n siirt<72>minen toiseen populaatioon.
|
} else if (round == 0 | round == 1) { # Yksil<69>n siirt<72>minen toiseen populaatioon.
|
||||||
inds <- 1:ninds
|
inds <- seq_len(ninds)
|
||||||
aputaulu <- cbind(inds, rand(ninds, 1))
|
aputaulu <- cbind(t(inds), rand(ninds, 1))
|
||||||
aputaulu <- sortrows(aputaulu, 2)
|
aputaulu <- matrix(sortrows(aputaulu, 2), nrow = nrow(aputaulu))
|
||||||
inds <- t(aputaulu[, 1])
|
inds <- t(aputaulu[, 1])
|
||||||
muutosNyt <- 0
|
muutosNyt <- 0
|
||||||
|
|
||||||
for (ind in inds) {
|
for (ind in inds) {
|
||||||
i1 <- PARTITION[ind]
|
i1 <- PARTITION[ind]
|
||||||
muutokset_diffInCounts <- greedyMix_muutokset$new()
|
muutokset_diffInCounts <- greedyMix_muutokset$new()
|
||||||
# FIXME: using 100-length global variables instead of the ones in this function
|
|
||||||
muutokset_diffInCounts <- muutokset_diffInCounts$laskeMuutokset(
|
muutokset_diffInCounts <- muutokset_diffInCounts$laskeMuutokset(
|
||||||
ind, rows, data, adjprior, priorTerm
|
ind, rows, data, adjprior, priorTerm
|
||||||
)
|
)
|
||||||
|
|
@ -190,7 +197,7 @@ indMix <- function(c, npops, dispText = TRUE) {
|
||||||
}
|
}
|
||||||
} else if (round == 2) { # Populaation yhdist<73>minen toiseen.
|
} else if (round == 2) { # Populaation yhdist<73>minen toiseen.
|
||||||
maxMuutos <- 0
|
maxMuutos <- 0
|
||||||
for (pop in 1:npops) {
|
for (pop in seq_len(npops)) {
|
||||||
muutokset_diffInCounts <- greedyMix_muutokset$new()
|
muutokset_diffInCounts <- greedyMix_muutokset$new()
|
||||||
muutokset_diffInCounts <- muutokset_diffInCounts$laskeMuutokset2(
|
muutokset_diffInCounts <- muutokset_diffInCounts$laskeMuutokset2(
|
||||||
pop, rows, data, adjprior, priorTerm
|
pop, rows, data, adjprior, priorTerm
|
||||||
|
|
@ -234,7 +241,7 @@ indMix <- function(c, npops, dispText = TRUE) {
|
||||||
} else if (round == 3 || round == 4) { # Populaation jakaminen osiin.
|
} else if (round == 3 || round == 4) { # Populaation jakaminen osiin.
|
||||||
maxMuutos <- 0
|
maxMuutos <- 0
|
||||||
ninds <- size(rows, 1)
|
ninds <- size(rows, 1)
|
||||||
for (pop in 1:npops) {
|
for (pop in seq_len(npops)) {
|
||||||
inds2 <- matlab2r::find(PARTITION == pop)
|
inds2 <- matlab2r::find(PARTITION == pop)
|
||||||
ninds2 <- length(inds2)
|
ninds2 <- length(inds2)
|
||||||
if (ninds2 > 2) {
|
if (ninds2 > 2) {
|
||||||
|
|
@ -265,7 +272,7 @@ indMix <- function(c, npops, dispText = TRUE) {
|
||||||
muutoksia <- 1
|
muutoksia <- 1
|
||||||
kokeiltu <- zeros(nRoundTypes, 1)
|
kokeiltu <- zeros(nRoundTypes, 1)
|
||||||
rivit <- list()
|
rivit <- list()
|
||||||
for (i in 1:length(muuttuvat)) {
|
for (i in seq_len(muuttuvat)) {
|
||||||
ind <- muuttuvat[i]
|
ind <- muuttuvat[i]
|
||||||
lisa <- rows[ind, 1]:rows[ind, 2]
|
lisa <- rows[ind, 1]:rows[ind, 2]
|
||||||
rivit <- rbind(rivit, t(lisa))
|
rivit <- rbind(rivit, t(lisa))
|
||||||
|
|
@ -421,7 +428,7 @@ indMix <- function(c, npops, dispText = TRUE) {
|
||||||
totalMuutos <- muutokset(1, emptyPop)
|
totalMuutos <- muutokset(1, emptyPop)
|
||||||
|
|
||||||
rivit <- list()
|
rivit <- list()
|
||||||
for (i in 1:length(muuttuvat)) {
|
for (i in seq_len(muuttuvat)) {
|
||||||
ind <- muuttuvat[i]
|
ind <- muuttuvat[i]
|
||||||
lisa <- rows[ind, 1]:rows[ind, 2]
|
lisa <- rows[ind, 1]:rows[ind, 2]
|
||||||
rivit <- c(rivit, lisa)
|
rivit <- c(rivit, lisa)
|
||||||
|
|
@ -506,8 +513,6 @@ indMix <- function(c, npops, dispText = TRUE) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
# FIXME: muutoksia is never 0, so vaihe never equals 5 and ready 1
|
|
||||||
print(paste("i1 =", i1, "i2 =", i2, "maxMuutos =", maxMuutos)) # TEMP
|
|
||||||
if (muutoksia == 0) {
|
if (muutoksia == 0) {
|
||||||
if (vaihe <= 4) {
|
if (vaihe <= 4) {
|
||||||
vaihe <= vaihe + 1
|
vaihe <= vaihe + 1
|
||||||
|
|
@ -536,11 +541,10 @@ indMix <- function(c, npops, dispText = TRUE) {
|
||||||
# TALLENNETAAN
|
# TALLENNETAAN
|
||||||
|
|
||||||
npops <- poistaTyhjatPopulaatiot(npops)
|
npops <- poistaTyhjatPopulaatiot(npops)
|
||||||
POP_LOGML <- computePopulationLogml(1:npops, adjprior, priorTerm)
|
POP_LOGML <- computePopulationLogml(seq_len(npops), adjprior, priorTerm)
|
||||||
if (dispText) {
|
if (dispText) {
|
||||||
print(paste("Found partition with", as.character(npops), "populations."))
|
message("Found partition with ", as.character(npops), " populations.")
|
||||||
print(paste("Log(ml) =", as.character(logml)))
|
message("Log(ml) = ", as.character(logml))
|
||||||
print(" ")
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (logml > logmlBest) {
|
if (logml > logmlBest) {
|
||||||
|
|
|
||||||
7
R/indMixWrapper.R
Normal file
7
R/indMixWrapper.R
Normal file
|
|
@ -0,0 +1,7 @@
|
||||||
|
indMixWrapper <- function(c, npops, counts, sumcounts, max_iter, fixedK = FALSE, verbose = FALSE) {
|
||||||
|
if (fixedK) {
|
||||||
|
stop("indMix_fixK() not yet implemented.") # TODO: translate indMix_fixK.m
|
||||||
|
} else {
|
||||||
|
indMix(c, npops, counts, sumcounts, max_iter, verbose)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -1,7 +1,6 @@
|
||||||
#' @title Initialize Pop Names
|
#' @title Initialize Pop Names
|
||||||
#' @param nameFile nameFile
|
#' @param nameFile nameFile
|
||||||
#' @param indexFile indexFile
|
#' @param indexFile indexFile
|
||||||
#' @export
|
|
||||||
initPopNames <- function(nameFile, indexFile) {
|
initPopNames <- function(nameFile, indexFile) {
|
||||||
# Palauttaa tyhj<68>n, mik<69>li nimitiedosto ja indeksitiedosto
|
# Palauttaa tyhj<68>n, mik<69>li nimitiedosto ja indeksitiedosto
|
||||||
# eiv<69>t olleet yht?pitki?
|
# eiv<69>t olleet yht?pitki?
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,7 @@ initialCounts <- function(partition, data, npops, rows, noalle, adjprior) {
|
||||||
nloci <- size(data, 2)
|
nloci <- size(data, 2)
|
||||||
ninds <- size(rows, 1)
|
ninds <- size(rows, 1)
|
||||||
|
|
||||||
koot <- rows[, 1] - rows[, 2] + 1
|
koot <- rows[1] - rows[2] + 1
|
||||||
maxSize <- base::max(koot)
|
maxSize <- base::max(koot)
|
||||||
|
|
||||||
counts <- zeros(base::max(noalle), nloci, npops)
|
counts <- zeros(base::max(noalle), nloci, npops)
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
laskeLoggis <- function(counts, sumcounts, adjprior) {
|
laskeLoggis <- function(counts, sumcounts, adjprior) {
|
||||||
npops <- size(counts, 3)
|
npops <- size(counts, 3)
|
||||||
|
replicated_adjprior <- array(adjprior, c(nrow(adjprior), ncol(adjprior), npops))
|
||||||
sum1 <- sum(sum(sum(lgamma(counts + repmat(adjprior, c(1, 1, npops))))))
|
sum1 <- sum(sum(sum(lgamma(counts + replicated_adjprior))))
|
||||||
sum3 <- sum(sum(lgamma(adjprior))) - sum(sum(lgamma(1 + sumcounts)))
|
sum3 <- sum(sum(lgamma(adjprior))) - sum(sum(lgamma(1 + sumcounts)))
|
||||||
logml2 <- sum1 - npops * sum3
|
logml2 <- sum1 - npops * sum3
|
||||||
loggis <- logml2
|
loggis <- logml2
|
||||||
|
|
|
||||||
|
|
@ -349,12 +349,15 @@ greedyMix_muutokset <- R6Class(
|
||||||
i1_logml <- POP_LOGML[i1]
|
i1_logml <- POP_LOGML[i1]
|
||||||
muutokset[i1] <- 0
|
muutokset[i1] <- 0
|
||||||
|
|
||||||
rows <- globalRows[ind, 1]:globalRows[ind, 2]
|
if (is.null(dim(globalRows))) {
|
||||||
|
rows <- globalRows[1]:globalRows[2]
|
||||||
|
} else {
|
||||||
|
rows <- globalRows[ind, 1]:globalRows[ind, 2]
|
||||||
|
}
|
||||||
diffInCounts <- computeDiffInCounts(
|
diffInCounts <- computeDiffInCounts(
|
||||||
rows, size(COUNTS, 1), size(COUNTS, 2), data
|
rows, size(COUNTS, 1), size(COUNTS, 2), data
|
||||||
)
|
)
|
||||||
diffInSumCounts <- colSums(diffInCounts)
|
diffInSumCounts <- colSums(diffInCounts)
|
||||||
|
|
||||||
COUNTS[, , i1] <- COUNTS[, , i1] - diffInCounts
|
COUNTS[, , i1] <- COUNTS[, , i1] - diffInCounts
|
||||||
SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] - diffInSumCounts
|
SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] - diffInSumCounts
|
||||||
new_i1_logml <- computePopulationLogml(i1, adjprior, priorTerm)
|
new_i1_logml <- computePopulationLogml(i1, adjprior, priorTerm)
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,4 @@
|
||||||
#' @title Learn partition (modified)
|
#' @title Learn partition (modified)
|
||||||
#' @export
|
|
||||||
#' @param ordered ordered
|
#' @param ordered ordered
|
||||||
#' @return part
|
#' @return part
|
||||||
#' @description This function is called only if some individual has less than
|
#' @description This function is called only if some individual has less than
|
||||||
|
|
|
||||||
|
|
@ -3,7 +3,6 @@
|
||||||
#' @param fii fii
|
#' @param fii fii
|
||||||
#' @description Goes through all the ways to divide the points into two or
|
#' @description Goes through all the ways to divide the points into two or
|
||||||
#' three groups. Chooses the partition which obtains highest logml.
|
#' three groups. Chooses the partition which obtains highest logml.
|
||||||
#' @export
|
|
||||||
learn_simple_partition <- function(ordered_points, fii) {
|
learn_simple_partition <- function(ordered_points, fii) {
|
||||||
npoints <- length(ordered_points)
|
npoints <- length(ordered_points)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -13,7 +13,6 @@
|
||||||
#' that BAPS should use this function instead of the base one, so this is why
|
#' that BAPS should use this function instead of the base one, so this is why
|
||||||
#' this function is part of this package (instead of a MATLAB-replicating
|
#' this function is part of this package (instead of a MATLAB-replicating
|
||||||
#' package such as matlab2r)
|
#' package such as matlab2r)
|
||||||
#' @export
|
|
||||||
linkage <- function(Y, method = "co") {
|
linkage <- function(Y, method = "co") {
|
||||||
k <- size(Y)[1]
|
k <- size(Y)[1]
|
||||||
n <- size(Y)[2]
|
n <- size(Y)[2]
|
||||||
|
|
|
||||||
|
|
@ -4,18 +4,19 @@
|
||||||
#' running the hierBAPS algorithm.
|
#' running the hierBAPS algorithm.
|
||||||
#'
|
#'
|
||||||
#' @param msa Either the location of a fasta file or ape DNAbin object containing the multiple sequence alignment data to be clustered
|
#' @param msa Either the location of a fasta file or ape DNAbin object containing the multiple sequence alignment data to be clustered
|
||||||
#' @param keep.singletons A logical indicating whether to consider singleton mutations in calculating the clusters
|
#' @param keep_singletons A logical indicating whether to consider singleton mutations in calculating the clusters
|
||||||
|
#' @param output_numbers A logical indicating whether to output the data as
|
||||||
|
#' numbers (TRUE) or letters (FALSE)
|
||||||
#'
|
#'
|
||||||
#' @return A character matrix with filtered SNP data
|
#' @return A character matrix with filtered SNP data
|
||||||
#'
|
#'
|
||||||
#' @examples
|
#' @examples
|
||||||
#' msa <- system.file("ext", "seqs.fa", package = "rBAPS")
|
#' msa <- system.file("extdata", "seqs.fa", package = "rBAPS")
|
||||||
#' snp.matrix <- load_fasta(msa)
|
#' snp.matrix <- rBAPS:::load_fasta(msa)
|
||||||
#' @author Gerry Tonkin-Hill, Waldir Leoncio
|
#' @author Gerry Tonkin-Hill, Waldir Leoncio
|
||||||
#' @seealso rhierbaps::load_fasta
|
#' @seealso rhierbaps::load_fasta
|
||||||
#' @importFrom ape read.FASTA as.DNAbin
|
#' @importFrom ape read.FASTA as.DNAbin
|
||||||
#' @export
|
load_fasta <- function(msa, keep_singletons = FALSE, output_numbers = TRUE) {
|
||||||
load_fasta <- function(msa, keep.singletons = FALSE) {
|
|
||||||
|
|
||||||
# Check inputs
|
# Check inputs
|
||||||
if (is(msa, "character")) {
|
if (is(msa, "character")) {
|
||||||
|
|
@ -28,7 +29,9 @@ load_fasta <- function(msa, keep.singletons = FALSE) {
|
||||||
} else {
|
} else {
|
||||||
stop("incorrect input for msa!")
|
stop("incorrect input for msa!")
|
||||||
}
|
}
|
||||||
if (!is.logical(keep.singletons)) stop("Invalid keep.singletons! Must be on of TRUE/FALSE.")
|
if (!is.logical(keep_singletons)) {
|
||||||
|
stop("Invalid keep_singletons! Must be one of TRUE/FALSE.")
|
||||||
|
}
|
||||||
|
|
||||||
# Load sequences using ape. This does a lot of the checking for us.
|
# Load sequences using ape. This does a lot of the checking for us.
|
||||||
seq_names <- labels(seqs)
|
seq_names <- labels(seqs)
|
||||||
|
|
@ -46,8 +49,8 @@ load_fasta <- function(msa, keep.singletons = FALSE) {
|
||||||
conserved <- colSums(t(t(seqs) == seqs[1, ])) == nrow(seqs)
|
conserved <- colSums(t(t(seqs) == seqs[1, ])) == nrow(seqs)
|
||||||
seqs <- seqs[, !conserved]
|
seqs <- seqs[, !conserved]
|
||||||
|
|
||||||
if (!keep.singletons) {
|
if (!keep_singletons) {
|
||||||
# remove singletons as they are uninformative in the algorithm
|
# remove_singletons as they are uninformative in the algorithm
|
||||||
is_singleton <- apply(seqs, 2, function(x) {
|
is_singleton <- apply(seqs, 2, function(x) {
|
||||||
tab <- table(x)
|
tab <- table(x)
|
||||||
return(x %in% names(tab)[tab == 1])
|
return(x %in% names(tab)[tab == 1])
|
||||||
|
|
@ -58,5 +61,11 @@ load_fasta <- function(msa, keep.singletons = FALSE) {
|
||||||
# Convert gaps and unknowns to same symbol
|
# Convert gaps and unknowns to same symbol
|
||||||
seqs[seqs == "n"] <- "-"
|
seqs[seqs == "n"] <- "-"
|
||||||
|
|
||||||
|
# Replace letters with numbers, dashes with zeros
|
||||||
|
if (output_numbers) {
|
||||||
|
seqs <- matrix(match(seqs, c("a", "c", "g", "t")), nrow(seqs))
|
||||||
|
seqs[is.na(seqs)] <- 0
|
||||||
|
}
|
||||||
|
|
||||||
return(seqs)
|
return(seqs)
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,6 @@
|
||||||
#' @description Returns a string representation of a logml
|
#' @description Returns a string representation of a logml
|
||||||
#' @param logml input Logml
|
#' @param logml input Logml
|
||||||
#' @return String version of logml
|
#' @return String version of logml
|
||||||
#' @export
|
|
||||||
logml2String <- function(logml) {
|
logml2String <- function(logml) {
|
||||||
# Palauttaa logml:n string-esityksen.
|
# Palauttaa logml:n string-esityksen.
|
||||||
mjono <- " "
|
mjono <- " "
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,6 @@
|
||||||
#' @description Reads GenePop-formatted data
|
#' @description Reads GenePop-formatted data
|
||||||
#' @param tiedostonNimi Name of the file
|
#' @param tiedostonNimi Name of the file
|
||||||
#' @return list containing data and popnames
|
#' @return list containing data and popnames
|
||||||
#' @export
|
|
||||||
lueGenePopData <- function(tiedostonNimi) {
|
lueGenePopData <- function(tiedostonNimi) {
|
||||||
fid <- readLines(tiedostonNimi)
|
fid <- readLines(tiedostonNimi)
|
||||||
line <- fid[1] # ensimmäinen rivi
|
line <- fid[1] # ensimmäinen rivi
|
||||||
|
|
|
||||||
|
|
@ -3,7 +3,6 @@
|
||||||
#' group. popnames are as before.
|
#' group. popnames are as before.
|
||||||
#' @param tiedostonNimi Name of the file
|
#' @param tiedostonNimi Name of the file
|
||||||
#' @return List containing data and popnames
|
#' @return List containing data and popnames
|
||||||
#' @export
|
|
||||||
lueGenePopDataPop <- function(tiedostonNimi) {
|
lueGenePopDataPop <- function(tiedostonNimi) {
|
||||||
# Data annetaan muodossa, jossa viimeinen sarake kertoo ryhmän.
|
# Data annetaan muodossa, jossa viimeinen sarake kertoo ryhmän.
|
||||||
# popnames on kuten ennenkin.
|
# popnames on kuten ennenkin.
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,6 @@
|
||||||
#' @description Returns the part of the line from the beginning that is before the comma. Useful for returning the name of a GenePop area
|
#' @description Returns the part of the line from the beginning that is before the comma. Useful for returning the name of a GenePop area
|
||||||
#' @param line line
|
#' @param line line
|
||||||
#' @return nimi
|
#' @return nimi
|
||||||
#' @export
|
|
||||||
lueNimi <- function(line) {
|
lueNimi <- function(line) {
|
||||||
# ==========================================================================
|
# ==========================================================================
|
||||||
# Validation
|
# Validation
|
||||||
|
|
|
||||||
|
|
@ -5,7 +5,6 @@
|
||||||
#' @return puredata: a data contains no index column.
|
#' @return puredata: a data contains no index column.
|
||||||
#' @param data data
|
#' @param data data
|
||||||
#' @param noalle noalle
|
#' @param noalle noalle
|
||||||
#' @export
|
|
||||||
noIndex <- function(data, noalle) {
|
noIndex <- function(data, noalle) {
|
||||||
limit <- ifelse(is(noalle, "matrix"), ncol(noalle), length(noalle))
|
limit <- ifelse(is(noalle, "matrix"), ncol(noalle), length(noalle))
|
||||||
if (size(data, 2) == limit + 1) {
|
if (size(data, 2) == limit + 1) {
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,6 @@
|
||||||
#' @description Converts numbers to strings
|
#' @description Converts numbers to strings
|
||||||
#' @param number number
|
#' @param number number
|
||||||
#' @note On Matlab, if number is NaN the output is 'NaN'. Here, the output will be an error. Also, the function belo expects "number" to have length one, whereas Matlab accepts vectors.
|
#' @note On Matlab, if number is NaN the output is 'NaN'. Here, the output will be an error. Also, the function belo expects "number" to have length one, whereas Matlab accepts vectors.
|
||||||
#' @export
|
|
||||||
ownNum2Str <- function(number) {
|
ownNum2Str <- function(number) {
|
||||||
absolute <- abs(number)
|
absolute <- abs(number)
|
||||||
if (absolute < 1000) {
|
if (absolute < 1000) {
|
||||||
|
|
|
||||||
|
|
@ -5,7 +5,6 @@
|
||||||
#' @param npops npops
|
#' @param npops npops
|
||||||
#' @param rowsFromInd rowsFromInd
|
#' @param rowsFromInd rowsFromInd
|
||||||
#' @param alaraja alaraja
|
#' @param alaraja alaraja
|
||||||
#' @export
|
|
||||||
poistaLiianPienet <- function(npops, rowsFromInd, alaraja) {
|
poistaLiianPienet <- function(npops, rowsFromInd, alaraja) {
|
||||||
popSize <- zeros(1, npops)
|
popSize <- zeros(1, npops)
|
||||||
if (npops > 0) {
|
if (npops > 0) {
|
||||||
|
|
|
||||||
|
|
@ -1,13 +1,13 @@
|
||||||
poistaTyhjatPopulaatiot <- function(npops) {
|
poistaTyhjatPopulaatiot <- function(npops) {
|
||||||
# % Poistaa tyhjentyneet populaatiot COUNTS:ista ja
|
# % Poistaa tyhjentyneet populaatiot COUNTS:ista ja
|
||||||
# % SUMCOUNTS:ista. P<>ivitt<74><74> npops:in ja PARTITION:in.
|
# % SUMCOUNTS:ista. P<>ivitt<74><74> npops:in ja PARTITION:in.
|
||||||
notEmpty <- matlab2r::find(any(SUMCOUNTS, 2))
|
notEmpty <- matlab2r::find(apply(SUMCOUNTS, 1, function(x) any(x > 0)))
|
||||||
COUNTS <- COUNTS[, , notEmpty]
|
COUNTS <- COUNTS[, , notEmpty]
|
||||||
SUMCOUNTS <- SUMCOUNTS[notEmpty, ]
|
SUMCOUNTS <- SUMCOUNTS[notEmpty, ]
|
||||||
LOGDIFF <- LOGDIFF[, notEmpty]
|
LOGDIFF <- LOGDIFF[, notEmpty]
|
||||||
|
|
||||||
for (n in 1:length(notEmpty)) {
|
for (n in 1:length(notEmpty)) {
|
||||||
apu <- matlab2r::find(PARTITION == notEmpty(n))
|
apu <- matlab2r::find(PARTITION == notEmpty[n])
|
||||||
PARTITION[apu] <- n
|
PARTITION[apu] <- n
|
||||||
}
|
}
|
||||||
npops <- length(notEmpty)
|
npops <- length(notEmpty)
|
||||||
|
|
|
||||||
|
|
@ -3,7 +3,6 @@
|
||||||
#' @return a 4-mark presentation of proportion
|
#' @return a 4-mark presentation of proportion
|
||||||
#' @note The `round` function in R, being ISO-compliant, rounds 8.5 to 8. The
|
#' @note The `round` function in R, being ISO-compliant, rounds 8.5 to 8. The
|
||||||
#' Matlab equivalent rounds it to 9.
|
#' Matlab equivalent rounds it to 9.
|
||||||
#' @export
|
|
||||||
proportion2str <- function(prob) {
|
proportion2str <- function(prob) {
|
||||||
if (abs(prob) < 1e-3) {
|
if (abs(prob) < 1e-3) {
|
||||||
str <- "0.00"
|
str <- "0.00"
|
||||||
|
|
|
||||||
|
|
@ -1,10 +1,9 @@
|
||||||
#' @title Generates random numbers
|
#' @title Generates random numbers
|
||||||
#' @return vector of length `nc` with r.v. realizations from Gamma(rate=1)
|
#' @return vector of length `nc` with r.v. realizations from Gamma(rate=1)
|
||||||
#' @examples randdir(matrix(c(10, 30, 60), 3), 3)
|
#' @examples rBAPS:::randdir(matrix(c(10, 30, 60), 3), 3)
|
||||||
#' @param counts shape parameter
|
#' @param counts shape parameter
|
||||||
#' @param nc number of rows on output
|
#' @param nc number of rows on output
|
||||||
#' @seealso randga
|
#' @seealso randga
|
||||||
#' @export
|
|
||||||
randdir <- function(counts, nc) {
|
randdir <- function(counts, nc) {
|
||||||
svar <- zeros(nc, 1)
|
svar <- zeros(nc, 1)
|
||||||
for (i in 1:nc) {
|
for (i in 1:nc) {
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,6 @@
|
||||||
#' @param line line number
|
#' @param line line number
|
||||||
#' @return count
|
#' @return count
|
||||||
#' @description Returns the number of queues contained in the line. There must be a space between the queues.
|
#' @description Returns the number of queues contained in the line. There must be a space between the queues.
|
||||||
#' @export
|
|
||||||
rivinSisaltamienMjonojenLkm <- function(line) {
|
rivinSisaltamienMjonojenLkm <- function(line) {
|
||||||
# Palauttaa line:n sis<69>lt<6C>mien mjonojen lukum<75><6D>r<EFBFBD>n.
|
# Palauttaa line:n sis<69>lt<6C>mien mjonojen lukum<75><6D>r<EFBFBD>n.
|
||||||
# Mjonojen v<>liss?t<>ytyy olla v<>lily<6C>nti.
|
# Mjonojen v<>liss?t<>ytyy olla v<>lily<6C>nti.
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,6 @@
|
||||||
#' @title Find out the Digit Format
|
#' @title Find out the Digit Format
|
||||||
#' @param line the first line after the "pop" word from data in Genepop format. # @note Function clarified based on the line format whether the alleles of the data are given using 2 or 3 numbers.
|
#' @param line the first line after the "pop" word from data in Genepop format. # @note Function clarified based on the line format whether the alleles of the data are given using 2 or 3 numbers.
|
||||||
#' @return df
|
#' @return df
|
||||||
#' @export
|
|
||||||
selvitaDigitFormat <- function(line) {
|
selvitaDigitFormat <- function(line) {
|
||||||
# line on ensimm<6D>inen pop-sanan j<>lkeinen rivi
|
# line on ensimm<6D>inen pop-sanan j<>lkeinen rivi
|
||||||
# Genepop-formaatissa olevasta datasta. funktio selvitt<74><74>
|
# Genepop-formaatissa olevasta datasta. funktio selvitt<74><74>
|
||||||
|
|
|
||||||
|
|
@ -2,8 +2,6 @@
|
||||||
#' @description Lisää jokaista alleelia joka populaation joka lokukseen j1/noalle(j) verran. Näin saatuja counts:eja vastaavista Dirichlet-jakaumista simuloidaan arvot populaatioiden alleelifrekvensseille.
|
#' @description Lisää jokaista alleelia joka populaation joka lokukseen j1/noalle(j) verran. Näin saatuja counts:eja vastaavista Dirichlet-jakaumista simuloidaan arvot populaatioiden alleelifrekvensseille.
|
||||||
#' Add each allele to each locus in each population by j 1 / noalle(j). The Dirichlet distributions corresponding to the counts thus obtained simulate values for the allele frequencies of the populations.
|
#' Add each allele to each locus in each population by j 1 / noalle(j). The Dirichlet distributions corresponding to the counts thus obtained simulate values for the allele frequencies of the populations.
|
||||||
#' @param noalle noalle
|
#' @param noalle noalle
|
||||||
#' @export
|
|
||||||
|
|
||||||
simulateAllFreqs <- function(noalle) {
|
simulateAllFreqs <- function(noalle) {
|
||||||
if (isGlobalEmpty(COUNTS)) {
|
if (isGlobalEmpty(COUNTS)) {
|
||||||
max_noalle <- 0
|
max_noalle <- 0
|
||||||
|
|
|
||||||
|
|
@ -6,8 +6,6 @@
|
||||||
#' @param allfreqs allfreqs
|
#' @param allfreqs allfreqs
|
||||||
#' @param pop pop
|
#' @param pop pop
|
||||||
#' @param missing_level missing_level
|
#' @param missing_level missing_level
|
||||||
#' @export
|
|
||||||
|
|
||||||
simulateIndividuals <- function(n, rowsFromInd, allfreqs, pop, missing_level) {
|
simulateIndividuals <- function(n, rowsFromInd, allfreqs, pop, missing_level) {
|
||||||
nloci <- size(allfreqs, 2)
|
nloci <- size(allfreqs, 2)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -5,8 +5,6 @@
|
||||||
#' @param allfreqs allfreqa
|
#' @param allfreqs allfreqa
|
||||||
#' @param pop pop
|
#' @param pop pop
|
||||||
#' @param loc loc
|
#' @param loc loc
|
||||||
#' @export
|
|
||||||
|
|
||||||
simuloiAlleeli <- function(allfreqs, pop, loc) {
|
simuloiAlleeli <- function(allfreqs, pop, loc) {
|
||||||
if (length(dim(allfreqs)) == 0) {
|
if (length(dim(allfreqs)) == 0) {
|
||||||
freqs <- 1
|
freqs <- 1
|
||||||
|
|
|
||||||
|
|
@ -3,7 +3,6 @@
|
||||||
#' @param osuusTaulu Percentage table?
|
#' @param osuusTaulu Percentage table?
|
||||||
#' @param osuus percentage?
|
#' @param osuus percentage?
|
||||||
#' @param indeksi index
|
#' @param indeksi index
|
||||||
#' @export
|
|
||||||
suoritaMuutos <- function(osuusTaulu, osuus, indeksi) {
|
suoritaMuutos <- function(osuusTaulu, osuus, indeksi) {
|
||||||
if (isGlobalEmpty(COUNTS)) {
|
if (isGlobalEmpty(COUNTS)) {
|
||||||
npops <- 1
|
npops <- 1
|
||||||
|
|
|
||||||
|
|
@ -3,7 +3,6 @@
|
||||||
#' @param description description
|
#' @param description description
|
||||||
#' @param width width
|
#' @param width width
|
||||||
#' @return newline
|
#' @return newline
|
||||||
#' @export
|
|
||||||
takeLine <- function(description, width) {
|
takeLine <- function(description, width) {
|
||||||
# Returns one line from the description: line ends to the first
|
# Returns one line from the description: line ends to the first
|
||||||
# space after width:th mark.
|
# space after width:th mark.
|
||||||
|
|
|
||||||
|
|
@ -3,7 +3,6 @@
|
||||||
#' @param coordinates coordinates
|
#' @param coordinates coordinates
|
||||||
#' @param interactive prompt user for relevant questions during execution
|
#' @param interactive prompt user for relevant questions during execution
|
||||||
#' @return a list of defectives ("viallinen") and coordinates
|
#' @return a list of defectives ("viallinen") and coordinates
|
||||||
#' @export
|
|
||||||
testaaKoordinaatit <- function(ninds, coordinates, interactive = TRUE) {
|
testaaKoordinaatit <- function(ninds, coordinates, interactive = TRUE) {
|
||||||
# Testaa onko koordinaatit kunnollisia.
|
# Testaa onko koordinaatit kunnollisia.
|
||||||
# modified by Lu Cheng, 05.12.2012
|
# modified by Lu Cheng, 05.12.2012
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,6 @@
|
||||||
#' @description Test if loaded BAPS data is proper
|
#' @description Test if loaded BAPS data is proper
|
||||||
#' @param data dataset
|
#' @param data dataset
|
||||||
#' @return ninds
|
#' @return ninds
|
||||||
#' @export
|
|
||||||
testaaOnkoKunnollinenBapsData <- function(data) {
|
testaaOnkoKunnollinenBapsData <- function(data) {
|
||||||
# Tarkastaa onko viimeisess?sarakkeessa kaikki
|
# Tarkastaa onko viimeisess?sarakkeessa kaikki
|
||||||
# luvut 1,2,...,n johonkin n:<3A><>n asti.
|
# luvut 1,2,...,n johonkin n:<3A><>n asti.
|
||||||
|
|
|
||||||
|
|
@ -3,7 +3,6 @@
|
||||||
#' @param rivi Line
|
#' @param rivi Line
|
||||||
#' @return pal = 1 if the line starts with one of the following
|
#' @return pal = 1 if the line starts with one of the following
|
||||||
# letter combinations: Pop, pop, POP. In all others cases, pal = 0
|
# letter combinations: Pop, pop, POP. In all others cases, pal = 0
|
||||||
#' @export
|
|
||||||
testaaPop <- function(rivi) {
|
testaaPop <- function(rivi) {
|
||||||
# pal=1, mik<69>li rivi alkaa jollain seuraavista
|
# pal=1, mik<69>li rivi alkaa jollain seuraavista
|
||||||
# kirjainyhdistelmist? Pop, pop, POP. Kaikissa muissa
|
# kirjainyhdistelmist? Pop, pop, POP. Kaikissa muissa
|
||||||
|
|
|
||||||
|
|
@ -10,7 +10,6 @@
|
||||||
#' @param partitionSummary partitionSummary
|
#' @param partitionSummary partitionSummary
|
||||||
#' @param popnames popnames
|
#' @param popnames popnames
|
||||||
#' @param fixedK fixedK
|
#' @param fixedK fixedK
|
||||||
#' @export
|
|
||||||
writeMixtureInfo <- function(
|
writeMixtureInfo <- function(
|
||||||
logml, rowsFromInd, data, adjprior, priorTerm, outPutFile, inputFile,
|
logml, rowsFromInd, data, adjprior, priorTerm, outPutFile, inputFile,
|
||||||
partitionSummary, popnames, fixedK
|
partitionSummary, popnames, fixedK
|
||||||
|
|
@ -27,17 +26,18 @@ writeMixtureInfo <- function(
|
||||||
fid <- load(outPutFile)
|
fid <- load(outPutFile)
|
||||||
} else {
|
} else {
|
||||||
fid <- -1
|
fid <- -1
|
||||||
# TODO: replace sink with option that will record input and output
|
outPutFile <- file.path(tempdir(), "baps4_output.baps")
|
||||||
sink("baps4_output.baps", split = TRUE) # save in text anyway.
|
message("Output saved to", outPutFile)
|
||||||
|
sink(outPutFile, split = TRUE) # save in text anyway.
|
||||||
}
|
}
|
||||||
|
|
||||||
dispLine()
|
dispLine()
|
||||||
cat("RESULTS OF INDIVIDUAL LEVEL MIXTURE ANALYSIS:")
|
cat("RESULTS OF INDIVIDUAL LEVEL MIXTURE ANALYSIS:\n")
|
||||||
cat(c("Data file: ", inputFile))
|
cat("Data file: ", inputFile, "\n")
|
||||||
cat("Model: independent")
|
cat("Model: independent\n")
|
||||||
cat(c("Number of clustered individuals: ", ownNum2Str(ninds)))
|
cat("Number of clustered individuals: ", ownNum2Str(ninds), "\n")
|
||||||
cat(c("Number of groups in optimal partition: ", ownNum2Str(npops)))
|
cat("Number of groups in optimal partition: ", ownNum2Str(npops), "\n")
|
||||||
cat(c("Log(marginal likelihood) of optimal partition: ", ownNum2Str(logml)))
|
cat("Log(marginal likelihood) of optimal partition: ", ownNum2Str(logml), "\n")
|
||||||
cat(" ")
|
cat(" ")
|
||||||
if (fid != -1) {
|
if (fid != -1) {
|
||||||
append(fid, "RESULTS OF INDIVIDUAL LEVEL MIXTURE ANALYSIS:\n")
|
append(fid, "RESULTS OF INDIVIDUAL LEVEL MIXTURE ANALYSIS:\n")
|
||||||
|
|
@ -88,10 +88,10 @@ writeMixtureInfo <- function(
|
||||||
"Cluster ", as.character(m), ": {", as.character(indsInM[1])
|
"Cluster ", as.character(m), ": {", as.character(indsInM[1])
|
||||||
)
|
)
|
||||||
for (k in 2:cluster_size) {
|
for (k in 2:cluster_size) {
|
||||||
text <- c(text, ", ", as.character(indsInM[k]))
|
text <- c(text, ",", as.character(indsInM[k]))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
text <- c(text, "}")
|
text <- c(text, "}\n")
|
||||||
while (length(text) > 58) {
|
while (length(text) > 58) {
|
||||||
# Take one line and display it.
|
# Take one line and display it.
|
||||||
new_line <- takeLine(text, 58)
|
new_line <- takeLine(text, 58)
|
||||||
|
|
@ -107,7 +107,7 @@ writeMixtureInfo <- function(
|
||||||
text <- ""
|
text <- ""
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (text != "") {
|
if (any(text != "")) {
|
||||||
cat(text)
|
cat(text)
|
||||||
if (fid != -1) {
|
if (fid != -1) {
|
||||||
append(fid, text)
|
append(fid, text)
|
||||||
|
|
@ -117,11 +117,11 @@ writeMixtureInfo <- function(
|
||||||
}
|
}
|
||||||
|
|
||||||
if (npops > 1) {
|
if (npops > 1) {
|
||||||
cat(" ")
|
cat("\n")
|
||||||
cat(" ")
|
cat("\n")
|
||||||
cat(
|
cat(
|
||||||
"Changes in log(marginal likelihood)",
|
"Changes in log(marginal likelihood)",
|
||||||
" if indvidual i is moved to group j:"
|
" if indvidual i is moved to group j:\n"
|
||||||
)
|
)
|
||||||
if (fid != -1) {
|
if (fid != -1) {
|
||||||
append(fid, " ")
|
append(fid, " ")
|
||||||
|
|
@ -132,7 +132,7 @@ writeMixtureInfo <- function(
|
||||||
fid,
|
fid,
|
||||||
c(
|
c(
|
||||||
"Changes in log(marginal likelihood)",
|
"Changes in log(marginal likelihood)",
|
||||||
"if indvidual i is moved to group j:"
|
"if indvidual i is moved to group j:\n"
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
append(fid, "\n")
|
append(fid, "\n")
|
||||||
|
|
@ -168,9 +168,9 @@ writeMixtureInfo <- function(
|
||||||
|
|
||||||
if (names) {
|
if (names) {
|
||||||
nimi <- as.character(popnames[ind])
|
nimi <- as.character(popnames[ind])
|
||||||
rivi <- c(blanks(maxSize - length(nimi)), nimi, ":")
|
rivi <- c(blanks(maxSize - length(nimi)), nimi, ":\n")
|
||||||
} else {
|
} else {
|
||||||
rivi <- c(blanks(4 - floor(log10(ind))), ownNum2Str(ind), ":")
|
rivi <- c("\n", blanks(4 - floor(log10(ind))), ownNum2Str(ind), ":\n")
|
||||||
}
|
}
|
||||||
for (j in 1:npops) {
|
for (j in 1:npops) {
|
||||||
rivi <- c(rivi, " ", logml2String(omaRound(muutokset[j])))
|
rivi <- c(rivi, " ", logml2String(omaRound(muutokset[j])))
|
||||||
|
|
@ -182,9 +182,9 @@ writeMixtureInfo <- function(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
cat(" ")
|
cat("\n")
|
||||||
cat(" ")
|
cat("\n")
|
||||||
cat("KL-divergence matrix in PHYLIP format:")
|
cat("KL-divergence matrix in PHYLIP format:\n")
|
||||||
|
|
||||||
dist_mat <- zeros(npops, npops)
|
dist_mat <- zeros(npops, npops)
|
||||||
if (fid != -1) {
|
if (fid != -1) {
|
||||||
|
|
@ -194,6 +194,7 @@ writeMixtureInfo <- function(
|
||||||
append(fid, "\n")
|
append(fid, "\n")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
COUNTS <- COUNTS[seq_len(nrow(adjprior)), seq_len(ncol(adjprior)), , drop = FALSE]
|
||||||
maxnoalle <- size(COUNTS, 1)
|
maxnoalle <- size(COUNTS, 1)
|
||||||
nloci <- size(COUNTS, 2)
|
nloci <- size(COUNTS, 2)
|
||||||
d <- zeros(maxnoalle, nloci, npops)
|
d <- zeros(maxnoalle, nloci, npops)
|
||||||
|
|
@ -205,8 +206,8 @@ writeMixtureInfo <- function(
|
||||||
|
|
||||||
prior[1, nollia] <- 1
|
prior[1, nollia] <- 1
|
||||||
for (pop1 in 1:npops) {
|
for (pop1 in 1:npops) {
|
||||||
d[, , pop1] <- (squeeze(COUNTS[, , pop1]) + prior) /
|
squeezed_COUNTS_prior <- squeeze(COUNTS[, , pop1]) + prior
|
||||||
repmat(sum(squeeze(COUNTS[, , pop1]) + prior), c(maxnoalle, 1))
|
d[, , pop1] <- squeezed_COUNTS_prior / sum(squeezed_COUNTS_prior)
|
||||||
}
|
}
|
||||||
ekarivi <- as.character(npops)
|
ekarivi <- as.character(npops)
|
||||||
cat(ekarivi)
|
cat(ekarivi)
|
||||||
|
|
@ -216,14 +217,14 @@ writeMixtureInfo <- function(
|
||||||
}
|
}
|
||||||
|
|
||||||
for (pop1 in 1:npops) {
|
for (pop1 in 1:npops) {
|
||||||
for (pop2 in 1:(pop1 - 1)) {
|
for (pop2 in seq_len(pop1 - 1)) {
|
||||||
dist1 <- d[, , pop1]
|
dist1 <- d[, , pop1]
|
||||||
dist2 <- d[, , pop2]
|
dist2 <- d[, , pop2]
|
||||||
div12 <- sum(
|
div12 <- sum(
|
||||||
sum(dist1 * log2((dist1 + 10^-10) / (dist2 + 10^-10)))
|
sum(dist1 * base::log2((dist1 + 10^-10) / (dist2 + 10^-10)))
|
||||||
) / nloci
|
) / nloci
|
||||||
div21 <- sum(
|
div21 <- sum(
|
||||||
sum(dist2 * log2((dist2 + 10^-10) / (dist1 + 10^-10)))
|
sum(dist2 * base::log2((dist2 + 10^-10) / (dist1 + 10^-10)))
|
||||||
) / nloci
|
) / nloci
|
||||||
div <- (div12 + div21) / 2
|
div <- (div12 + div21) / 2
|
||||||
dist_mat[pop1, pop2] <- div
|
dist_mat[pop1, pop2] <- div
|
||||||
|
|
@ -233,9 +234,9 @@ writeMixtureInfo <- function(
|
||||||
|
|
||||||
dist_mat <- dist_mat + t(dist_mat) # make it symmetric
|
dist_mat <- dist_mat + t(dist_mat) # make it symmetric
|
||||||
for (pop1 in 1:npops) {
|
for (pop1 in 1:npops) {
|
||||||
rivi <- c("Cluster_", as.character(pop1), " ")
|
rivi <- c("\nCluster_", as.character(pop1), "\n")
|
||||||
for (pop2 in 1:npops) {
|
for (pop2 in 1:npops) {
|
||||||
rivi <- c(rivi, kldiv2str(dist_mat[pop1, pop2]), " ")
|
rivi <- c(rivi, kldiv2str(dist_mat[pop1, pop2]))
|
||||||
}
|
}
|
||||||
cat(rivi)
|
cat(rivi)
|
||||||
if (fid != -1) {
|
if (fid != -1) {
|
||||||
|
|
@ -245,11 +246,11 @@ writeMixtureInfo <- function(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
cat(" ")
|
cat("\n")
|
||||||
cat(" ")
|
cat("\n")
|
||||||
cat(
|
cat(
|
||||||
"List of sizes of 10 best visited partitions",
|
"List of sizes of 10 best visited partitions",
|
||||||
"and corresponding log(ml) values"
|
"and corresponding log(ml) values\n"
|
||||||
)
|
)
|
||||||
|
|
||||||
if (fid != -1) {
|
if (fid != -1) {
|
||||||
|
|
@ -279,7 +280,7 @@ writeMixtureInfo <- function(
|
||||||
line <- c(
|
line <- c(
|
||||||
as.character(partitionSummary[part, 1]),
|
as.character(partitionSummary[part, 1]),
|
||||||
" ",
|
" ",
|
||||||
as.character(partitionSummary(part, 2))
|
as.character(partitionSummary[part, 2])
|
||||||
)
|
)
|
||||||
cat(line)
|
cat(line)
|
||||||
if (fid != -1) {
|
if (fid != -1) {
|
||||||
|
|
@ -289,9 +290,9 @@ writeMixtureInfo <- function(
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!fixedK) {
|
if (!fixedK) {
|
||||||
cat(" ")
|
cat("\n")
|
||||||
cat(" ")
|
cat("\n")
|
||||||
cat("Probabilities for number of clusters")
|
cat("Probabilities for number of clusters\n")
|
||||||
|
|
||||||
if (fid != -1) {
|
if (fid != -1) {
|
||||||
append(fid, " ")
|
append(fid, " ")
|
||||||
|
|
@ -323,7 +324,7 @@ writeMixtureInfo <- function(
|
||||||
line <- c(
|
line <- c(
|
||||||
as.character(npopsTaulu[i]), " ", as.character(probs[i])
|
as.character(npopsTaulu[i]), " ", as.character(probs[i])
|
||||||
)
|
)
|
||||||
cat(line)
|
cat(line, "\n")
|
||||||
if (fid != -1) {
|
if (fid != -1) {
|
||||||
append(fid, line)
|
append(fid, line)
|
||||||
append(fid, "\n")
|
append(fid, "\n")
|
||||||
|
|
@ -331,5 +332,9 @@ writeMixtureInfo <- function(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
# Closing sink(s)
|
||||||
|
while (sink.number() > 0L) {
|
||||||
|
sink()
|
||||||
|
}
|
||||||
return(changesInLogml)
|
return(changesInLogml)
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -10,7 +10,6 @@
|
||||||
#' @param partitionSummary partitionSummary
|
#' @param partitionSummary partitionSummary
|
||||||
#' @param popnames popnames
|
#' @param popnames popnames
|
||||||
#' @param fixedK fixedK
|
#' @param fixedK fixedK
|
||||||
#' @export
|
|
||||||
writeMixtureInfoPop <- function(logml, rows, data, adjprior, priorTerm,
|
writeMixtureInfoPop <- function(logml, rows, data, adjprior, priorTerm,
|
||||||
outPutFile, inputFile, partitionSummary,
|
outPutFile, inputFile, partitionSummary,
|
||||||
popnames, fixedK) {
|
popnames, fixedK) {
|
||||||
|
|
|
||||||
0
inst/ext/seqs.fa → inst/extdata/seqs.fa
vendored
0
inst/ext/seqs.fa → inst/extdata/seqs.fa
vendored
|
|
@ -4,18 +4,56 @@
|
||||||
\alias{greedyMix}
|
\alias{greedyMix}
|
||||||
\title{Clustering of individuals}
|
\title{Clustering of individuals}
|
||||||
\usage{
|
\usage{
|
||||||
greedyMix(data, format, verbose = TRUE)
|
greedyMix(
|
||||||
|
data,
|
||||||
|
format,
|
||||||
|
partitionCompare = NULL,
|
||||||
|
ninds = 1L,
|
||||||
|
npops = 1L,
|
||||||
|
counts = NULL,
|
||||||
|
sumcounts = NULL,
|
||||||
|
max_iter = 100L,
|
||||||
|
alleleCodes = NULL,
|
||||||
|
inp = NULL,
|
||||||
|
popnames = NULL,
|
||||||
|
fixedK = FALSE,
|
||||||
|
verbose = FALSE
|
||||||
|
)
|
||||||
}
|
}
|
||||||
\arguments{
|
\arguments{
|
||||||
\item{data}{data file}
|
\item{data}{data file}
|
||||||
|
|
||||||
\item{format}{Data format. Format supported: "FASTA", "VCF" ,"BAM", "GenePop"}
|
\item{format}{Data format. Format supported: "FASTA", "VCF" ,"BAM", "GenePop"}
|
||||||
|
|
||||||
|
\item{partitionCompare}{a list of partitions to compare}
|
||||||
|
|
||||||
|
\item{ninds}{number of individuals}
|
||||||
|
|
||||||
|
\item{npops}{number of populations}
|
||||||
|
|
||||||
|
\item{counts}{counts}
|
||||||
|
|
||||||
|
\item{sumcounts}{sumcounts}
|
||||||
|
|
||||||
|
\item{max_iter}{maximum number of iterations}
|
||||||
|
|
||||||
|
\item{alleleCodes}{allele codes}
|
||||||
|
|
||||||
|
\item{inp}{input file}
|
||||||
|
|
||||||
|
\item{popnames}{population names}
|
||||||
|
|
||||||
|
\item{fixedK}{if \code{TRUE}, the number of populations is fixed}
|
||||||
|
|
||||||
\item{verbose}{if \code{TRUE}, prints extra output information}
|
\item{verbose}{if \code{TRUE}, prints extra output information}
|
||||||
}
|
}
|
||||||
\description{
|
\description{
|
||||||
Clustering of individuals
|
Clustering of individuals
|
||||||
}
|
}
|
||||||
|
\examples{
|
||||||
|
data <- system.file("extdata", "FASTA_clustering_haploid.fasta", package = "rBAPS")
|
||||||
|
greedyMix(data, "fasta")
|
||||||
|
}
|
||||||
\references{
|
\references{
|
||||||
Samtools: a suite of programs for interacting
|
Samtools: a suite of programs for interacting
|
||||||
with high-throughput sequencing data. <http://www.htslib.org/>
|
with high-throughput sequencing data. <http://www.htslib.org/>
|
||||||
|
|
|
||||||
|
|
@ -4,10 +4,12 @@
|
||||||
\alias{handleData}
|
\alias{handleData}
|
||||||
\title{Handle Data}
|
\title{Handle Data}
|
||||||
\usage{
|
\usage{
|
||||||
handleData(raw_data)
|
handleData(raw_data, format = "Genepop")
|
||||||
}
|
}
|
||||||
\arguments{
|
\arguments{
|
||||||
\item{raw_data}{Raw data}
|
\item{raw_data}{Raw data in Genepop or BAPS format}
|
||||||
|
|
||||||
|
\item{format}{data format}
|
||||||
}
|
}
|
||||||
\description{
|
\description{
|
||||||
Handle Data
|
Handle Data
|
||||||
|
|
@ -20,5 +22,5 @@ After this function. Add blank lines for individuals with fewer rows as
|
||||||
maximum. If the code of an allele is = 0, the function changes that allele
|
maximum. If the code of an allele is = 0, the function changes that allele
|
||||||
code to the smallest code that is larger than any code in use. After this,
|
code to the smallest code that is larger than any code in use. After this,
|
||||||
the function changes the allele codes so that one locus j
|
the function changes the allele codes so that one locus j
|
||||||
codes get values between? 1, ..., Noah (j).
|
codes get values between? 1, ..., noalle(j).
|
||||||
}
|
}
|
||||||
|
|
|
||||||
26
man/importFile.Rd
Normal file
26
man/importFile.Rd
Normal file
|
|
@ -0,0 +1,26 @@
|
||||||
|
% Generated by roxygen2: do not edit by hand
|
||||||
|
% Please edit documentation in R/importFile.R
|
||||||
|
\name{importFile}
|
||||||
|
\alias{importFile}
|
||||||
|
\title{Import data file}
|
||||||
|
\usage{
|
||||||
|
importFile(data, format, verbose)
|
||||||
|
}
|
||||||
|
\arguments{
|
||||||
|
\item{data}{raw dataset}
|
||||||
|
|
||||||
|
\item{format}{data format (guesses from extension if not provided)}
|
||||||
|
|
||||||
|
\item{verbose}{if \code{TRUE}, prints extra output information}
|
||||||
|
}
|
||||||
|
\value{
|
||||||
|
The data in a format that can be used by the other functions
|
||||||
|
}
|
||||||
|
\description{
|
||||||
|
Imports data from several formats (FASTA, VCF, SAM, BAM,
|
||||||
|
Genepop).
|
||||||
|
}
|
||||||
|
\examples{
|
||||||
|
path_inst <- system.file("extdata", "", package = "rBAPS")
|
||||||
|
importFile(file.path(path_inst, "FASTA_clustering_haploid.fasta"))
|
||||||
|
}
|
||||||
|
|
@ -4,12 +4,15 @@
|
||||||
\alias{load_fasta}
|
\alias{load_fasta}
|
||||||
\title{load_fasta}
|
\title{load_fasta}
|
||||||
\usage{
|
\usage{
|
||||||
load_fasta(msa, keep.singletons = FALSE)
|
load_fasta(msa, keep_singletons = FALSE, output_numbers = TRUE)
|
||||||
}
|
}
|
||||||
\arguments{
|
\arguments{
|
||||||
\item{msa}{Either the location of a fasta file or ape DNAbin object containing the multiple sequence alignment data to be clustered}
|
\item{msa}{Either the location of a fasta file or ape DNAbin object containing the multiple sequence alignment data to be clustered}
|
||||||
|
|
||||||
\item{keep.singletons}{A logical indicating whether to consider singleton mutations in calculating the clusters}
|
\item{keep_singletons}{A logical indicating whether to consider singleton mutations in calculating the clusters}
|
||||||
|
|
||||||
|
\item{output_numbers}{A logical indicating whether to output the data as
|
||||||
|
numbers (TRUE) or letters (FALSE)}
|
||||||
}
|
}
|
||||||
\value{
|
\value{
|
||||||
A character matrix with filtered SNP data
|
A character matrix with filtered SNP data
|
||||||
|
|
@ -19,8 +22,8 @@ Loads a fasta file into matrix format ready for
|
||||||
running the hierBAPS algorithm.
|
running the hierBAPS algorithm.
|
||||||
}
|
}
|
||||||
\examples{
|
\examples{
|
||||||
msa <- system.file("ext", "seqs.fa", package = "rBAPS")
|
msa <- system.file("extdata", "seqs.fa", package = "rBAPS")
|
||||||
snp.matrix <- load_fasta(msa)
|
snp.matrix <- rBAPS:::load_fasta(msa)
|
||||||
}
|
}
|
||||||
\seealso{
|
\seealso{
|
||||||
rhierbaps::load_fasta
|
rhierbaps::load_fasta
|
||||||
|
|
|
||||||
|
|
@ -18,7 +18,7 @@ vector of length `nc` with r.v. realizations from Gamma(rate=1)
|
||||||
Generates random numbers
|
Generates random numbers
|
||||||
}
|
}
|
||||||
\examples{
|
\examples{
|
||||||
randdir(matrix(c(10, 30, 60), 3), 3)
|
rBAPS:::randdir(matrix(c(10, 30, 60), 3), 3)
|
||||||
}
|
}
|
||||||
\seealso{
|
\seealso{
|
||||||
randga
|
randga
|
||||||
|
|
|
||||||
|
|
@ -1,11 +1,11 @@
|
||||||
context("Auxiliary functions to greedyMix")
|
context("Auxiliary functions to greedyMix")
|
||||||
|
|
||||||
# Defining the relative path to current inst -----------------------------------
|
# Defining the relative path to current inst -----------------------------------
|
||||||
path_inst <- system.file("ext", "", package = "rBAPS")
|
path_inst <- system.file("extdata", "", package = "rBAPS")
|
||||||
|
|
||||||
# Reading datasets -------------------------------------------------------------
|
# Reading datasets -------------------------------------------------------------
|
||||||
baps_diploid <- read.delim(
|
baps_diploid <- read.delim(
|
||||||
file = paste(path_inst, "BAPS_format_clustering_diploid.txt", sep = "/"),
|
file = file.path(path_inst, "BAPS_format_clustering_diploid.txt"),
|
||||||
sep = " ",
|
sep = " ",
|
||||||
header = FALSE
|
header = FALSE
|
||||||
)
|
)
|
||||||
|
|
@ -31,26 +31,27 @@ test_that("handleData works as expected", {
|
||||||
expect_equal(data_obs, data_exp)
|
expect_equal(data_obs, data_exp)
|
||||||
})
|
})
|
||||||
|
|
||||||
context("Opening files on greedyMix")
|
context("Processing files through greedyMix")
|
||||||
|
|
||||||
df_fasta <- greedyMix(
|
raw_fasta <- importFile(
|
||||||
data = file.path(path_inst, "FASTA_clustering_haploid.fasta"),
|
data = file.path(path_inst, "FASTA_clustering_haploid.fasta"),
|
||||||
format = "FASTA"
|
format = "FASTA"
|
||||||
)
|
)
|
||||||
df_vcf <- greedyMix(
|
raw_vcf <- importFile(
|
||||||
data = file.path(path_inst, "vcf_example.vcf"),
|
data = file.path(path_inst, "vcf_example.vcf"),
|
||||||
format = "VCF",
|
format = "VCF",
|
||||||
verbose = FALSE
|
verbose = FALSE
|
||||||
)
|
)
|
||||||
df_bam <- greedyMix(
|
df_bam <- importFile(
|
||||||
data = file.path(path_inst, "bam_example.bam"),
|
data = file.path(path_inst, "bam_example.bam"),
|
||||||
format = "BAM",
|
format = "BAM",
|
||||||
)
|
)
|
||||||
|
|
||||||
test_that("Files are imported correctly", {
|
test_that("Files are imported correctly", {
|
||||||
expect_equal(dim(df_fasta), c(5, 99))
|
expect_equal(dim(raw_fasta), c(5, 99))
|
||||||
expect_equal(dim(df_vcf), c(variants = 2, fix_cols = 8, gt_cols = 3))
|
expect_equal(dim(raw_vcf), c(variants = 2, fix_cols = 8, gt_cols = 3))
|
||||||
expect_error(
|
expect_error(
|
||||||
greedyMix(
|
importFile(
|
||||||
data = paste(path_inst, "sam_example.sam", sep = "/"),
|
data = paste(path_inst, "sam_example.sam", sep = "/"),
|
||||||
format = "SAM",
|
format = "SAM",
|
||||||
)
|
)
|
||||||
|
|
@ -58,6 +59,15 @@ test_that("Files are imported correctly", {
|
||||||
expect_equal(length(df_bam[[1]]), 13)
|
expect_equal(length(df_bam[[1]]), 13)
|
||||||
})
|
})
|
||||||
|
|
||||||
|
df_fasta <- greedyMix(
|
||||||
|
data = file.path(path_inst, "FASTA_clustering_haploid.fasta"),
|
||||||
|
format = "FASTA"
|
||||||
|
)
|
||||||
|
test_that("greedyMix() works", {
|
||||||
|
expect_error(greedyMix(file.path(path_inst, "vcf_example.vcf")))
|
||||||
|
expect_error(greedyMix(file.path(path_inst, "bam_example.bam")))
|
||||||
|
})
|
||||||
|
|
||||||
context("Linkage")
|
context("Linkage")
|
||||||
|
|
||||||
test_that("Linkages are properly calculated", {
|
test_that("Linkages are properly calculated", {
|
||||||
|
|
|
||||||
|
|
@ -79,7 +79,7 @@ test_that("lakseKlitik() and subfunctions produce expected output", {
|
||||||
})
|
})
|
||||||
|
|
||||||
test_that("testFastaData() produces same output as on MATLAB", {
|
test_that("testFastaData() produces same output as on MATLAB", {
|
||||||
msa <- system.file("ext", "seqs.fa", package = "rBAPS")
|
msa <- system.file("extdata", "seqs.fa", package = "rBAPS")
|
||||||
test_msa <- testFastaData(msa)
|
test_msa <- testFastaData(msa)
|
||||||
expect_equal(test_msa$ninds, 515)
|
expect_equal(test_msa$ninds, 515)
|
||||||
expect_equal(dim(test_msa$data), c(515, 745))
|
expect_equal(dim(test_msa$data), c(515, 745))
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue