Merge branch 'issue-25' into develop
* issue-25: (38 commits)
Adjusted unit tests for #25
Fixed sink() usage
Fixed docs
Exporting importFile()
Improved handling of supported formats for greedyMix() (#25)
Fixed basic parsing of FASTA files (#25)
Increment version number to 0.0.0.9022
Fixed syntax (#25)
Improved printing (#25)
Partial reversion of b034158 (#25)
Fixed to indMix (#25)
Incorporated handleData() on greedyMix() (#25)
Improved handleData() to handle FASTA (#25)
Added numeric output option to load_fasta() (#25)
Fixed test text (#25)
Added missing documentation for arguments (#25)
Syntax fix (#25)
Delayed resolution of FIXMEs (#25)
Workaround for usage of MATLAB any() (#25)
Fixed argument passing (#25)
...
This commit is contained in:
commit
59fbb0a167
95 changed files with 45170 additions and 45032 deletions
|
|
@ -4,7 +4,6 @@ PITFALLS.md
|
|||
CHANGELOG.md
|
||||
CITATION.cff
|
||||
.travis.yml
|
||||
inst/ext/ExamplesDataFormatting
|
||||
inst/testdata
|
||||
.github
|
||||
aux
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
Package: rBAPS
|
||||
Title: Bayesian Analysis of Population Structure
|
||||
Version: 0.0.0.9020
|
||||
Version: 0.0.0.9022
|
||||
Date: 2020-11-09
|
||||
Authors@R:
|
||||
c(
|
||||
|
|
|
|||
39
NAMESPACE
39
NAMESPACE
|
|
@ -1,45 +1,8 @@
|
|||
# Generated by roxygen2: do not edit by hand
|
||||
|
||||
export(addAlleles)
|
||||
export(admix1)
|
||||
export(calculatePopLogml)
|
||||
export(computeAllFreqs2)
|
||||
export(computeIndLogml)
|
||||
export(computePersonalAllFreqs)
|
||||
export(computeRows)
|
||||
export(etsiParas)
|
||||
export(fgetl)
|
||||
export(fopen)
|
||||
export(greedyMix)
|
||||
export(greedyPopMix)
|
||||
export(handleData)
|
||||
export(handlePopData)
|
||||
export(initPopNames)
|
||||
export(learn_partition_modified)
|
||||
export(learn_simple_partition)
|
||||
export(linkage)
|
||||
export(load_fasta)
|
||||
export(logml2String)
|
||||
export(lueGenePopData)
|
||||
export(lueGenePopDataPop)
|
||||
export(lueNimi)
|
||||
export(noIndex)
|
||||
export(ownNum2Str)
|
||||
export(poistaLiianPienet)
|
||||
export(proportion2str)
|
||||
export(randdir)
|
||||
export(rivinSisaltamienMjonojenLkm)
|
||||
export(selvitaDigitFormat)
|
||||
export(simulateAllFreqs)
|
||||
export(simulateIndividuals)
|
||||
export(simuloiAlleeli)
|
||||
export(suoritaMuutos)
|
||||
export(takeLine)
|
||||
export(testaaKoordinaatit)
|
||||
export(testaaOnkoKunnollinenBapsData)
|
||||
export(testaaPop)
|
||||
export(writeMixtureInfo)
|
||||
export(writeMixtureInfoPop)
|
||||
export(importFile)
|
||||
importFrom(R6,R6Class)
|
||||
importFrom(Rsamtools,scanBam)
|
||||
importFrom(adegenet,.readExt)
|
||||
|
|
|
|||
6
NEWS.md
Normal file
6
NEWS.md
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
# rBAPS (development version)
|
||||
|
||||
# rBAPS 0.0.0.9021
|
||||
|
||||
* Added a `NEWS.md` file to track changes to the package.
|
||||
* Exported `greedyMix()` and `load_fasta()` functions.
|
||||
|
|
@ -4,7 +4,6 @@
|
|||
#' @param line line
|
||||
#' @param divider divider
|
||||
#' @return data (after alleles were added)
|
||||
#' @export
|
||||
addAlleles <- function(data, ind, line, divider) {
|
||||
# Lisaa BAPS-formaatissa olevaan datataulukkoon
|
||||
# yksil<69><6C> ind vastaavat rivit. Yksil<69>n alleelit
|
||||
|
|
|
|||
|
|
@ -6,7 +6,6 @@
|
|||
#' alleleCodes, adjprior, popnames, rowsFromInd, data, npops, noalle
|
||||
#' @param tietue tietue
|
||||
#' @importFrom methods is
|
||||
#' @export
|
||||
admix1 <- function(tietue) {
|
||||
if (!is.list(tietue)) {
|
||||
message("Load mixture result file. These are the files in this directory:")
|
||||
|
|
|
|||
|
|
@ -4,7 +4,6 @@
|
|||
#' for the mean parameter.
|
||||
#' @param points points
|
||||
#' @param fii fii
|
||||
#' @export
|
||||
calculatePopLogml <- function(points, fii) {
|
||||
n <- length(points)
|
||||
fuzzy_ones <- sum(points)
|
||||
|
|
|
|||
23
R/comparePartitions.R
Normal file
23
R/comparePartitions.R
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
comparePartitions <- function(data, c.rows, partitionCompare.partitions, ninds, rowsFromInd, noalle, adjprior) {
|
||||
stop("Comparing partitions not yet implemented") # TODO: implement
|
||||
# nsamplingunits = size(c.rows,1);
|
||||
# partitions = partitionCompare.partitions;
|
||||
# npartitions = size(partitions,2);
|
||||
# partitionLogml = zeros(1,npartitions);
|
||||
# for i = 1:npartitions
|
||||
# % number of unique partition lables
|
||||
# npops = length(unique(partitions(:,i)));
|
||||
|
||||
# partitionInd = zeros(ninds*rowsFromInd,1);
|
||||
# partitionSample = partitions(:,i);
|
||||
# for j = 1:nsamplingunits
|
||||
# partitionInd([c.rows(j,1):c.rows(j,2)]) = partitionSample(j);
|
||||
# end
|
||||
# partitionLogml(i) = initialCounts(partitionInd, data(:,1:end-1), npops, c.rows, noalle, adjprior);
|
||||
|
||||
# end
|
||||
# % return the logml result
|
||||
# partitionCompare.logmls = partitionLogml;
|
||||
# set(h1, 'userdata', partitionCompare);
|
||||
# return
|
||||
}
|
||||
|
|
@ -2,7 +2,6 @@
|
|||
#' @description Lisää a priori jokaista alleelia joka populaation joka lokukseen
|
||||
#' j 1/noalle(j) verran.
|
||||
#' @param noalle noalle
|
||||
#' @export
|
||||
computeAllFreqs2 <- function(noalle) {
|
||||
COUNTS <- ifelse(isGlobalEmpty(COUNTS), vector(), COUNTS)
|
||||
SUMCOUNTS <- ifelse(isGlobalEmpty(SUMCOUNTS), vector(), COUNTS)
|
||||
|
|
|
|||
|
|
@ -3,7 +3,6 @@
|
|||
#' määritellyiksi kuten osuusTaulu:ssa.
|
||||
#' @param omaFreqs own Freqs?
|
||||
#' @param osuusTaulu Percentage table?
|
||||
#' @export
|
||||
computeIndLogml <- function(omaFreqs, osuusTaulu) {
|
||||
omaFreqs <- as.matrix(omaFreqs)
|
||||
osuusTaulu <- as.matrix(osuusTaulu)
|
||||
|
|
|
|||
|
|
@ -7,8 +7,6 @@
|
|||
#' @param data data
|
||||
#' @param allFreqs allFreqs
|
||||
#' @param rowsFromInd rowsFromInd
|
||||
#' @export
|
||||
|
||||
computePersonalAllFreqs <- function(ind, data, allFreqs, rowsFromInd) {
|
||||
if (isGlobalEmpty(COUNTS)) {
|
||||
nloci <- npops <- 1
|
||||
|
|
|
|||
|
|
@ -4,9 +4,13 @@ computePopulationLogml <- function(pops, adjprior, priorTerm) {
|
|||
# ======================================================== #
|
||||
# Limiting COUNTS size #
|
||||
# ======================================================== #
|
||||
COUNTS <- COUNTS[
|
||||
seq_len(nrow(adjprior)), seq_len(ncol(adjprior)), pops, drop = FALSE
|
||||
]
|
||||
if (!is.null(adjprior)) {
|
||||
nr <- seq_len(nrow(adjprior))
|
||||
nc <- seq_len(ncol(adjprior))
|
||||
COUNTS <- COUNTS[nr, nc, pops, drop = FALSE]
|
||||
} else {
|
||||
COUNTS <- NA
|
||||
}
|
||||
|
||||
x <- size(COUNTS, 1)
|
||||
y <- size(COUNTS, 2)
|
||||
|
|
@ -15,25 +19,24 @@ computePopulationLogml <- function(pops, adjprior, priorTerm) {
|
|||
# ======================================================== #
|
||||
# Computation #
|
||||
# ======================================================== #
|
||||
isarray <- length(dim(repmat(adjprior, c(1, 1, length(pops))))) > 2
|
||||
term1 <- squeeze(
|
||||
sum(
|
||||
term1 <- NULL
|
||||
if (!is.null(adjprior)) {
|
||||
isarray <- length(dim(repmat(adjprior, c(1, 1, length(pops))))) > 2
|
||||
term1 <- squeeze(
|
||||
sum(
|
||||
reshape(
|
||||
lgamma(
|
||||
repmat(adjprior, c(1, 1, length(pops))) +
|
||||
COUNTS[
|
||||
seq_len(nrow(adjprior)), seq_len(ncol(adjprior)), pops,
|
||||
drop = !isarray
|
||||
]
|
||||
sum(
|
||||
reshape(
|
||||
lgamma(
|
||||
repmat(adjprior, c(1, 1, length(pops))) + COUNTS[nr, nc, pops, drop = !isarray]
|
||||
),
|
||||
c(x, y, z)
|
||||
),
|
||||
c(x, y, z)
|
||||
1
|
||||
),
|
||||
1
|
||||
),
|
||||
2
|
||||
2
|
||||
)
|
||||
)
|
||||
)
|
||||
}
|
||||
if (is.null(priorTerm)) priorTerm <- 0
|
||||
popLogml <- term1 - sum(lgamma(1 + SUMCOUNTS[pops, ]), 2) - priorTerm
|
||||
return(popLogml)
|
||||
|
|
|
|||
|
|
@ -4,7 +4,6 @@
|
|||
#' @param rowsFromInd rowsFromInd
|
||||
#' @param inds matrix
|
||||
#' @param ninds ninds
|
||||
#' @export
|
||||
computeRows <- function(rowsFromInd, inds, ninds) {
|
||||
if (!is(inds, "matrix")) inds <- as.matrix(inds)
|
||||
if (identical(dim(inds), c(nrow(inds), 1L))) {
|
||||
|
|
|
|||
|
|
@ -1,4 +1,3 @@
|
|||
#' @export
|
||||
#' @title Etsi Paras
|
||||
#' @description Search for the best?
|
||||
#' @param osuus Percentages?
|
||||
|
|
|
|||
|
|
@ -6,7 +6,6 @@
|
|||
#' fgetl returns tline as a numeric value -1.
|
||||
#' @author Waldir Leoncio
|
||||
#' @seealso fopen
|
||||
#' @export
|
||||
fgetl <- function(file) {
|
||||
# ==========================================================================
|
||||
# Validation
|
||||
|
|
@ -27,5 +26,4 @@ fgetl <- function(file) {
|
|||
#' @return The same as `readLines(filename)`
|
||||
#' @author Waldir Leoncio
|
||||
#' @seealso fgetl
|
||||
#' @export
|
||||
fopen <- function(filename) readLines(filename)
|
||||
|
|
|
|||
|
|
@ -1,6 +1,16 @@
|
|||
#' @title Clustering of individuals
|
||||
#' @param data data file
|
||||
#' @param format Data format. Format supported: "FASTA", "VCF" ,"BAM", "GenePop"
|
||||
#' @param partitionCompare a list of partitions to compare
|
||||
#' @param ninds number of individuals
|
||||
#' @param npops number of populations
|
||||
#' @param counts counts
|
||||
#' @param sumcounts sumcounts
|
||||
#' @param max_iter maximum number of iterations
|
||||
#' @param alleleCodes allele codes
|
||||
#' @param inp input file
|
||||
#' @param popnames population names
|
||||
#' @param fixedK if \code{TRUE}, the number of populations is fixed
|
||||
#' @param verbose if \code{TRUE}, prints extra output information
|
||||
#' @importFrom utils read.delim
|
||||
#' @importFrom vcfR read.vcfR
|
||||
|
|
@ -9,41 +19,59 @@
|
|||
#' @references Samtools: a suite of programs for interacting
|
||||
#' with high-throughput sequencing data. <http://www.htslib.org/>
|
||||
#' @export
|
||||
greedyMix <- function(data, format, verbose = TRUE) {
|
||||
# Parsing data format ------------------------------------------------------
|
||||
#' @examples
|
||||
#' data <- system.file("extdata", "FASTA_clustering_haploid.fasta", package = "rBAPS")
|
||||
#' greedyMix(data, "fasta")
|
||||
greedyMix <- function(
|
||||
data, format, partitionCompare = NULL, ninds = 1L, npops = 1L,
|
||||
counts = NULL, sumcounts = NULL, max_iter = 100L, alleleCodes = NULL,
|
||||
inp = NULL, popnames = NULL, fixedK = FALSE, verbose = FALSE
|
||||
) {
|
||||
# Importing and handling data ================================================
|
||||
data <- importFile(data, format, verbose)
|
||||
data <- handleData(data, tolower(format))
|
||||
c <- list(
|
||||
noalle = data[["noalle"]],
|
||||
data = data[["newData"]],
|
||||
adjprior = data[["adjprior"]],
|
||||
priorTerm = data[["priorTerm"]],
|
||||
rowsFromInd = data[["rowsFromInd"]]
|
||||
)
|
||||
|
||||
if (missing(format)) {
|
||||
format <- gsub(".*\\.(.+)$", "\\1", data)
|
||||
message("Format not provided. Guessing from file extension: ", format)
|
||||
}
|
||||
format <- tolower(format)
|
||||
|
||||
# Dispatching to proper loading function -----------------------------------
|
||||
|
||||
if (format == "fasta") {
|
||||
out <- load_fasta(data)
|
||||
} else if (format == "vcf") {
|
||||
out <- vcfR::read.vcfR(data, verbose = verbose)
|
||||
} else if (format == "sam") {
|
||||
stop(
|
||||
"SAM files not directly supported. ",
|
||||
"Install the samtools software and execute\n\n",
|
||||
"samtools view -b ", data, " > out_file.bam\n\nto convert to BAM ",
|
||||
"and try running this function again with 'format=BAM'"
|
||||
# Comparing partitions =======================================================
|
||||
if (!is.null(partitionCompare)) {
|
||||
logmls <- comparePartitions(
|
||||
c[["data"]], nrow(c[["data"]]), partitionCompare[["partitions"]], ninds,
|
||||
c[["rowsFromInd"]], c[["noalle"]], c[["adjprior"]]
|
||||
)
|
||||
} else if (format == "bam") {
|
||||
out <- Rsamtools::scanBam(data)
|
||||
} else if (format == "genepop") {
|
||||
if (toupper(adegenet::.readExt(data)) == "TXT") {
|
||||
message("Creating a copy of the file with the .gen extension")
|
||||
dataGen <- gsub("txt", "gen", data)
|
||||
file.copy(data, dataGen)
|
||||
out <- adegenet::read.genepop(dataGen)
|
||||
} else {
|
||||
out <- adegenet::read.genepop(data)
|
||||
}
|
||||
} else {
|
||||
stop("Format not supported.")
|
||||
}
|
||||
return(out)
|
||||
|
||||
|
||||
# Generating partition summary ===============================================
|
||||
ekat <- seq(1L, c[["rowsFromInd"]], ninds * c[["rowsFromInd"]]) # ekat = (1:rowsFromInd:ninds*rowsFromInd)';
|
||||
c[["rows"]] <- c(ekat, ekat + c[["rowsFromInd"]] - 1L) # c.rows = [ekat ekat+rowsFromInd-1]
|
||||
logml_npops_partitionSummary <- indMixWrapper(c, npops, counts, sumcounts, max_iter, fixedK, verbose)
|
||||
logml <- logml_npops_partitionSummary[["logml"]]
|
||||
npops <- logml_npops_partitionSummary[["npops"]]
|
||||
partitionSummary <- logml_npops_partitionSummary[["partitionSummary"]]
|
||||
|
||||
# Generating output object ===================================================
|
||||
out <- list(
|
||||
"alleleCodes" = alleleCodes, "adjprior" = c[["adjprior"]],
|
||||
"popnames" = popnames, "rowsFromInd" = c[["rowsFromInd"]],
|
||||
"data" = c[["data"]], "npops" = npops, "noalle" = c[["noalle"]],
|
||||
"mixtureType" = "mix", "logml" = logml
|
||||
)
|
||||
if (logml == 1) {
|
||||
return(out)
|
||||
}
|
||||
|
||||
# Writing mixture info =======================================================
|
||||
changesInLogml <- writeMixtureInfo(
|
||||
logml, c[["rowsFromInd"]], c[["data"]], c[["adjprior"]], c[["priorTerm"]],
|
||||
NULL, inp, partitionSummary, popnames, fixedK
|
||||
)
|
||||
|
||||
# Updateing results ==========================================================
|
||||
return(c(out, "changesInLogml" = changesInLogml))
|
||||
}
|
||||
|
|
|
|||
|
|
@ -11,7 +11,6 @@
|
|||
#' @importFrom matlab2r uiputfile
|
||||
#' @references Samtools: a suite of programs for interacting
|
||||
#' with high-throughput sequencing data. <http://www.htslib.org/>
|
||||
#' @export
|
||||
greedyPopMix <- function(data, format, partitionCompare = NULL, verbose = TRUE
|
||||
) {
|
||||
# Replacing original file reading code with greedyMix()
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
#' @title Handle Data
|
||||
#' @param raw_data Raw data
|
||||
#' @param raw_data Raw data in Genepop or BAPS format
|
||||
#' @param format data format
|
||||
#' @details The last column of the original data tells you from which
|
||||
#' individual that line is from. The function first examines how many line
|
||||
#' maximum is from one individual giving know if it is haploid, diploid, etc.
|
||||
|
|
@ -7,9 +8,9 @@
|
|||
#' maximum. If the code of an allele is = 0, the function changes that allele
|
||||
#' code to the smallest code that is larger than any code in use. After this,
|
||||
#' the function changes the allele codes so that one locus j
|
||||
#' codes get values between? 1, ..., Noah (j).
|
||||
#' codes get values between? 1, ..., noalle(j).
|
||||
#' @export
|
||||
handleData <- function(raw_data) {
|
||||
handleData <- function(raw_data, format = "Genepop") {
|
||||
# Alkuper?isen datan viimeinen sarake kertoo, milt?yksil?lt?
|
||||
# kyseinen rivi on per?isin. Funktio tutkii ensin, ett?montako
|
||||
# rivi?maksimissaan on per?isin yhdelt?yksil?lt? jolloin saadaan
|
||||
|
|
@ -20,28 +21,29 @@ handleData <- function(raw_data) {
|
|||
# koodi pienimm?ksi koodiksi, joka isompi kuin mik??n k?yt?ss?oleva koodi.
|
||||
# T?m?n j?lkeen funktio muuttaa alleelikoodit siten, ett?yhden lokuksen j
|
||||
# koodit saavat arvoja v?lill?1,...,noalle(j).
|
||||
nloci <- switch(
|
||||
tolower(format),
|
||||
"genepop" = ncol(raw_data) - 1L,
|
||||
"baps" = ncol(raw_data) - 1L,
|
||||
"fasta" = ncol(raw_data),
|
||||
"vcf" = stop("VCF format not supported for processing yet"),
|
||||
"bam" = stop("BAM format not supported for processing yet")
|
||||
)
|
||||
data <- as.matrix(raw_data)
|
||||
nloci <- size(raw_data, 2) - 1
|
||||
|
||||
dataApu <- data[, 1:nloci]
|
||||
nollat <- matlab2r::find(dataApu == 0)
|
||||
if (!isempty(nollat)) {
|
||||
isoinAlleeli <- base::max(max(dataApu))
|
||||
isoinAlleeli <- base::max(base::max(dataApu))
|
||||
dataApu[nollat] <- isoinAlleeli + 1
|
||||
data[, 1:nloci] <- dataApu
|
||||
}
|
||||
# dataApu <- []
|
||||
# nollat <- []
|
||||
# isoinAlleeli <- []
|
||||
|
||||
noalle <- zeros(1, nloci)
|
||||
alleelitLokuksessa <- cell(nloci, 1, expandable = TRUE)
|
||||
for (i in 1:nloci) {
|
||||
alleelitLokuksessaI <- unique(data[, i])
|
||||
alleelitLokuksessa[[i]] <- sort(alleelitLokuksessaI[
|
||||
matlab2r::find(
|
||||
alleelitLokuksessaI >= 0
|
||||
)
|
||||
matlab2r::find(alleelitLokuksessaI >= 0)
|
||||
])
|
||||
noalle[i] <- length(alleelitLokuksessa[[i]])
|
||||
}
|
||||
|
|
@ -49,9 +51,7 @@ handleData <- function(raw_data) {
|
|||
for (i in 1:nloci) {
|
||||
alleelitLokuksessaI <- alleelitLokuksessa[[i]]
|
||||
puuttuvia <- base::max(noalle) - length(alleelitLokuksessaI)
|
||||
alleleCodes[, i] <- as.matrix(
|
||||
c(alleelitLokuksessaI, zeros(puuttuvia, 1))
|
||||
)
|
||||
alleleCodes[, i] <- as.matrix(c(alleelitLokuksessaI, zeros(puuttuvia, 1)))
|
||||
}
|
||||
|
||||
for (loc in seq_len(nloci)) {
|
||||
|
|
@ -60,7 +60,7 @@ handleData <- function(raw_data) {
|
|||
}
|
||||
}
|
||||
|
||||
nind <- base::max(data[, ncol(data)])
|
||||
nind <- as.integer(base::max(data[, ncol(data)]))
|
||||
nrows <- size(data, 1)
|
||||
ncols <- size(data, 2)
|
||||
rowsFromInd <- zeros(nind, 1)
|
||||
|
|
@ -71,11 +71,11 @@ handleData <- function(raw_data) {
|
|||
a <- -999
|
||||
emptyRow <- repmat(a, c(1, ncols))
|
||||
lessThanMax <- matlab2r::find(rowsFromInd < maxRowsFromInd)
|
||||
missingRows <- maxRowsFromInd * nind - nrows
|
||||
missingRows <- max(maxRowsFromInd * nind - nrows, 0L)
|
||||
data <- rbind(data, zeros(missingRows, ncols))
|
||||
pointer <- 1
|
||||
for (ind in t(lessThanMax)) { # K?y l?pi ne yksil?t, joilta puuttuu rivej?
|
||||
miss <- maxRowsFromInd - rowsFromInd(ind) # T?lt?yksil?lt?puuttuvien lkm.
|
||||
miss <- maxRowsFromInd - rowsFromInd[ind] # T?lt?yksil?lt?puuttuvien lkm.
|
||||
}
|
||||
data <- sortrows(data, ncols) # Sorttaa yksil?iden mukaisesti
|
||||
newData <- data
|
||||
|
|
|
|||
|
|
@ -4,7 +4,6 @@
|
|||
#' codes so that the codes for one locus have values between 1 and noalle[j].
|
||||
#' Before this change, an allele whose code is zero is changed.
|
||||
#' @param raw_data raw data
|
||||
#' @export
|
||||
handlePopData <- function(raw_data) {
|
||||
# Alkuperäisen datan viimeinen sarake kertoo, milt?yksilölt?
|
||||
# kyseinen rivi on peräisin. Funktio muuttaa alleelikoodit
|
||||
|
|
|
|||
49
R/importFile.R
Normal file
49
R/importFile.R
Normal file
|
|
@ -0,0 +1,49 @@
|
|||
#' @title Import data file
|
||||
#' @description Imports data from several formats (FASTA, VCF, SAM, BAM,
|
||||
#' Genepop).
|
||||
#' @param data raw dataset
|
||||
#' @param format data format (guesses from extension if not provided)
|
||||
#' @param verbose if \code{TRUE}, prints extra output information
|
||||
#' @return The data in a format that can be used by the other functions
|
||||
#' @export
|
||||
#' @examples
|
||||
#' path_inst <- system.file("extdata", "", package = "rBAPS")
|
||||
#' importFile(file.path(path_inst, "FASTA_clustering_haploid.fasta"))
|
||||
importFile <- function(data, format, verbose) {
|
||||
# Parsing data format ------------------------------------------------------
|
||||
|
||||
if (missing(format)) {
|
||||
format <- gsub(".*\\.(.+)$", "\\1", data)
|
||||
message("Format not provided. Guessing from file extension: ", format)
|
||||
}
|
||||
format <- tolower(format)
|
||||
|
||||
# Dispatching to proper loading function -----------------------------------
|
||||
|
||||
if (format == "fasta") {
|
||||
out <- load_fasta(data)
|
||||
} else if (format == "vcf") {
|
||||
out <- vcfR::read.vcfR(data, verbose = verbose)
|
||||
} else if (format == "sam") {
|
||||
stop(
|
||||
"SAM files not directly supported. ",
|
||||
"Install the samtools software and execute\n\n",
|
||||
"samtools view -b ", data, " > out_file.bam\n\nto convert to BAM ",
|
||||
"and try running this function again with 'format=BAM'"
|
||||
)
|
||||
} else if (format == "bam") {
|
||||
out <- Rsamtools::scanBam(data)
|
||||
} else if (format == "genepop") {
|
||||
if (toupper(adegenet::.readExt(data)) == "TXT") {
|
||||
message("Creating a copy of the file with the .gen extension")
|
||||
dataGen <- gsub("txt", "gen", data)
|
||||
file.copy(data, dataGen)
|
||||
out <- adegenet::read.genepop(dataGen)
|
||||
} else {
|
||||
out <- adegenet::read.genepop(data)
|
||||
}
|
||||
} else {
|
||||
stop("Format not supported.")
|
||||
}
|
||||
return(out)
|
||||
}
|
||||
50
R/indMix.R
50
R/indMix.R
|
|
@ -1,4 +1,4 @@
|
|||
indMix <- function(c, npops, dispText = TRUE) {
|
||||
indMix <- function(c, npops, counts = NULL, sumcounts = NULL, max_iter = 100L, dispText = FALSE) {
|
||||
# Greedy search algorithm with unknown number of classes for regular
|
||||
# clustering.
|
||||
# Input npops is not used if called by greedyMix or greedyPopMix.
|
||||
|
|
@ -17,8 +17,11 @@ indMix <- function(c, npops, dispText = TRUE) {
|
|||
if (isfield(c, "dist")) {
|
||||
dist <- c$dist
|
||||
Z <- c$Z
|
||||
} else {
|
||||
Z <- NULL
|
||||
}
|
||||
|
||||
|
||||
rm(c)
|
||||
nargin <- length(as.list(match.call())) - 1
|
||||
if (nargin < 2) {
|
||||
|
|
@ -65,14 +68,14 @@ indMix <- function(c, npops, dispText = TRUE) {
|
|||
nruns <- length(npopsTaulu)
|
||||
|
||||
initData <- data
|
||||
data <- data[, 1:(ncol(data) - 1)]
|
||||
data <- data[, seq_along(noalle)] # Original code always dropped last column.
|
||||
|
||||
logmlBest <- -1e50
|
||||
partitionSummary <- -1e50 * ones(30, 2) # Tiedot 30 parhaasta partitiosta (npops ja logml)
|
||||
partitionSummary[, 1] <- zeros(30, 1)
|
||||
worstLogml <- -1e50
|
||||
worstIndex <- 1
|
||||
for (run in 1:nruns) {
|
||||
for (run in seq_along(nruns)) {
|
||||
npops <- npopsTaulu[[run]]
|
||||
if (dispText) {
|
||||
dispLine()
|
||||
|
|
@ -84,6 +87,7 @@ indMix <- function(c, npops, dispText = TRUE) {
|
|||
)
|
||||
}
|
||||
ninds <- size(rows, 1)
|
||||
|
||||
initialPartition <- admixture_initialization(initData, npops, Z)
|
||||
sumcounts_counts_logml <- initialCounts(
|
||||
initialPartition, data, npops, rows, noalle, adjprior
|
||||
|
|
@ -93,16 +97,15 @@ indMix <- function(c, npops, dispText = TRUE) {
|
|||
logml <- sumcounts_counts_logml$logml
|
||||
|
||||
PARTITION <- zeros(ninds, 1)
|
||||
for (i in 1:ninds) {
|
||||
for (i in seq_len(ninds)) {
|
||||
apu <- rows[i]
|
||||
PARTITION[i] <- initialPartition[apu[1]]
|
||||
}
|
||||
|
||||
COUNTS <- counts
|
||||
SUMCOUNTS <- sumcounts
|
||||
POP_LOGML <- computePopulationLogml(1:npops, adjprior, priorTerm)
|
||||
POP_LOGML <- computePopulationLogml(seq_len(npops), adjprior, priorTerm)
|
||||
LOGDIFF <- repmat(-Inf, c(ninds, npops))
|
||||
rm(initialPartition, counts, sumcounts)
|
||||
|
||||
# PARHAAN MIXTURE-PARTITION ETSIMINEN
|
||||
nRoundTypes <- 7
|
||||
|
|
@ -120,30 +123,34 @@ indMix <- function(c, npops, dispText = TRUE) {
|
|||
)
|
||||
}
|
||||
|
||||
iter <- 1L
|
||||
while (ready != 1) {
|
||||
# FIXME: loop caught in here
|
||||
iter <- iter + 1L
|
||||
if (iter > max_iter) {
|
||||
warning("max_iter reached. Stopping prematurely.")
|
||||
break
|
||||
}
|
||||
muutoksia <- 0
|
||||
|
||||
if (dispText) {
|
||||
message(paste("\nPerforming steps:", as.character(roundTypes)))
|
||||
message("Performing steps: ", paste(roundTypes, collapse = " "))
|
||||
}
|
||||
|
||||
for (n in 1:length(roundTypes)) {
|
||||
for (n in seq_along(roundTypes)) {
|
||||
round <- roundTypes[n]
|
||||
kivaluku <- 0
|
||||
|
||||
if (kokeiltu[round] == 1) { # Askelta kokeiltu viime muutoksen j<>lkeen
|
||||
} else if (round == 0 | round == 1) { # Yksil<69>n siirt<72>minen toiseen populaatioon.
|
||||
inds <- 1:ninds
|
||||
aputaulu <- cbind(inds, rand(ninds, 1))
|
||||
aputaulu <- sortrows(aputaulu, 2)
|
||||
inds <- seq_len(ninds)
|
||||
aputaulu <- cbind(t(inds), rand(ninds, 1))
|
||||
aputaulu <- matrix(sortrows(aputaulu, 2), nrow = nrow(aputaulu))
|
||||
inds <- t(aputaulu[, 1])
|
||||
muutosNyt <- 0
|
||||
|
||||
for (ind in inds) {
|
||||
i1 <- PARTITION[ind]
|
||||
muutokset_diffInCounts <- greedyMix_muutokset$new()
|
||||
# FIXME: using 100-length global variables instead of the ones in this function
|
||||
muutokset_diffInCounts <- muutokset_diffInCounts$laskeMuutokset(
|
||||
ind, rows, data, adjprior, priorTerm
|
||||
)
|
||||
|
|
@ -190,7 +197,7 @@ indMix <- function(c, npops, dispText = TRUE) {
|
|||
}
|
||||
} else if (round == 2) { # Populaation yhdist<73>minen toiseen.
|
||||
maxMuutos <- 0
|
||||
for (pop in 1:npops) {
|
||||
for (pop in seq_len(npops)) {
|
||||
muutokset_diffInCounts <- greedyMix_muutokset$new()
|
||||
muutokset_diffInCounts <- muutokset_diffInCounts$laskeMuutokset2(
|
||||
pop, rows, data, adjprior, priorTerm
|
||||
|
|
@ -234,7 +241,7 @@ indMix <- function(c, npops, dispText = TRUE) {
|
|||
} else if (round == 3 || round == 4) { # Populaation jakaminen osiin.
|
||||
maxMuutos <- 0
|
||||
ninds <- size(rows, 1)
|
||||
for (pop in 1:npops) {
|
||||
for (pop in seq_len(npops)) {
|
||||
inds2 <- matlab2r::find(PARTITION == pop)
|
||||
ninds2 <- length(inds2)
|
||||
if (ninds2 > 2) {
|
||||
|
|
@ -265,7 +272,7 @@ indMix <- function(c, npops, dispText = TRUE) {
|
|||
muutoksia <- 1
|
||||
kokeiltu <- zeros(nRoundTypes, 1)
|
||||
rivit <- list()
|
||||
for (i in 1:length(muuttuvat)) {
|
||||
for (i in seq_len(muuttuvat)) {
|
||||
ind <- muuttuvat[i]
|
||||
lisa <- rows[ind, 1]:rows[ind, 2]
|
||||
rivit <- rbind(rivit, t(lisa))
|
||||
|
|
@ -421,7 +428,7 @@ indMix <- function(c, npops, dispText = TRUE) {
|
|||
totalMuutos <- muutokset(1, emptyPop)
|
||||
|
||||
rivit <- list()
|
||||
for (i in 1:length(muuttuvat)) {
|
||||
for (i in seq_len(muuttuvat)) {
|
||||
ind <- muuttuvat[i]
|
||||
lisa <- rows[ind, 1]:rows[ind, 2]
|
||||
rivit <- c(rivit, lisa)
|
||||
|
|
@ -506,8 +513,6 @@ indMix <- function(c, npops, dispText = TRUE) {
|
|||
}
|
||||
}
|
||||
}
|
||||
# FIXME: muutoksia is never 0, so vaihe never equals 5 and ready 1
|
||||
print(paste("i1 =", i1, "i2 =", i2, "maxMuutos =", maxMuutos)) # TEMP
|
||||
if (muutoksia == 0) {
|
||||
if (vaihe <= 4) {
|
||||
vaihe <= vaihe + 1
|
||||
|
|
@ -536,11 +541,10 @@ indMix <- function(c, npops, dispText = TRUE) {
|
|||
# TALLENNETAAN
|
||||
|
||||
npops <- poistaTyhjatPopulaatiot(npops)
|
||||
POP_LOGML <- computePopulationLogml(1:npops, adjprior, priorTerm)
|
||||
POP_LOGML <- computePopulationLogml(seq_len(npops), adjprior, priorTerm)
|
||||
if (dispText) {
|
||||
print(paste("Found partition with", as.character(npops), "populations."))
|
||||
print(paste("Log(ml) =", as.character(logml)))
|
||||
print(" ")
|
||||
message("Found partition with ", as.character(npops), " populations.")
|
||||
message("Log(ml) = ", as.character(logml))
|
||||
}
|
||||
|
||||
if (logml > logmlBest) {
|
||||
|
|
|
|||
7
R/indMixWrapper.R
Normal file
7
R/indMixWrapper.R
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
indMixWrapper <- function(c, npops, counts, sumcounts, max_iter, fixedK = FALSE, verbose = FALSE) {
|
||||
if (fixedK) {
|
||||
stop("indMix_fixK() not yet implemented.") # TODO: translate indMix_fixK.m
|
||||
} else {
|
||||
indMix(c, npops, counts, sumcounts, max_iter, verbose)
|
||||
}
|
||||
}
|
||||
|
|
@ -1,7 +1,6 @@
|
|||
#' @title Initialize Pop Names
|
||||
#' @param nameFile nameFile
|
||||
#' @param indexFile indexFile
|
||||
#' @export
|
||||
initPopNames <- function(nameFile, indexFile) {
|
||||
# Palauttaa tyhj<68>n, mik<69>li nimitiedosto ja indeksitiedosto
|
||||
# eiv<69>t olleet yht?pitki?
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@ initialCounts <- function(partition, data, npops, rows, noalle, adjprior) {
|
|||
nloci <- size(data, 2)
|
||||
ninds <- size(rows, 1)
|
||||
|
||||
koot <- rows[, 1] - rows[, 2] + 1
|
||||
koot <- rows[1] - rows[2] + 1
|
||||
maxSize <- base::max(koot)
|
||||
|
||||
counts <- zeros(base::max(noalle), nloci, npops)
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
laskeLoggis <- function(counts, sumcounts, adjprior) {
|
||||
npops <- size(counts, 3)
|
||||
|
||||
sum1 <- sum(sum(sum(lgamma(counts + repmat(adjprior, c(1, 1, npops))))))
|
||||
replicated_adjprior <- array(adjprior, c(nrow(adjprior), ncol(adjprior), npops))
|
||||
sum1 <- sum(sum(sum(lgamma(counts + replicated_adjprior))))
|
||||
sum3 <- sum(sum(lgamma(adjprior))) - sum(sum(lgamma(1 + sumcounts)))
|
||||
logml2 <- sum1 - npops * sum3
|
||||
loggis <- logml2
|
||||
|
|
|
|||
|
|
@ -349,12 +349,15 @@ greedyMix_muutokset <- R6Class(
|
|||
i1_logml <- POP_LOGML[i1]
|
||||
muutokset[i1] <- 0
|
||||
|
||||
rows <- globalRows[ind, 1]:globalRows[ind, 2]
|
||||
if (is.null(dim(globalRows))) {
|
||||
rows <- globalRows[1]:globalRows[2]
|
||||
} else {
|
||||
rows <- globalRows[ind, 1]:globalRows[ind, 2]
|
||||
}
|
||||
diffInCounts <- computeDiffInCounts(
|
||||
rows, size(COUNTS, 1), size(COUNTS, 2), data
|
||||
)
|
||||
diffInSumCounts <- colSums(diffInCounts)
|
||||
|
||||
COUNTS[, , i1] <- COUNTS[, , i1] - diffInCounts
|
||||
SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] - diffInSumCounts
|
||||
new_i1_logml <- computePopulationLogml(i1, adjprior, priorTerm)
|
||||
|
|
|
|||
|
|
@ -1,5 +1,4 @@
|
|||
#' @title Learn partition (modified)
|
||||
#' @export
|
||||
#' @param ordered ordered
|
||||
#' @return part
|
||||
#' @description This function is called only if some individual has less than
|
||||
|
|
|
|||
|
|
@ -3,7 +3,6 @@
|
|||
#' @param fii fii
|
||||
#' @description Goes through all the ways to divide the points into two or
|
||||
#' three groups. Chooses the partition which obtains highest logml.
|
||||
#' @export
|
||||
learn_simple_partition <- function(ordered_points, fii) {
|
||||
npoints <- length(ordered_points)
|
||||
|
||||
|
|
|
|||
|
|
@ -13,7 +13,6 @@
|
|||
#' that BAPS should use this function instead of the base one, so this is why
|
||||
#' this function is part of this package (instead of a MATLAB-replicating
|
||||
#' package such as matlab2r)
|
||||
#' @export
|
||||
linkage <- function(Y, method = "co") {
|
||||
k <- size(Y)[1]
|
||||
n <- size(Y)[2]
|
||||
|
|
|
|||
|
|
@ -4,18 +4,19 @@
|
|||
#' running the hierBAPS algorithm.
|
||||
#'
|
||||
#' @param msa Either the location of a fasta file or ape DNAbin object containing the multiple sequence alignment data to be clustered
|
||||
#' @param keep.singletons A logical indicating whether to consider singleton mutations in calculating the clusters
|
||||
#' @param keep_singletons A logical indicating whether to consider singleton mutations in calculating the clusters
|
||||
#' @param output_numbers A logical indicating whether to output the data as
|
||||
#' numbers (TRUE) or letters (FALSE)
|
||||
#'
|
||||
#' @return A character matrix with filtered SNP data
|
||||
#'
|
||||
#' @examples
|
||||
#' msa <- system.file("ext", "seqs.fa", package = "rBAPS")
|
||||
#' snp.matrix <- load_fasta(msa)
|
||||
#' msa <- system.file("extdata", "seqs.fa", package = "rBAPS")
|
||||
#' snp.matrix <- rBAPS:::load_fasta(msa)
|
||||
#' @author Gerry Tonkin-Hill, Waldir Leoncio
|
||||
#' @seealso rhierbaps::load_fasta
|
||||
#' @importFrom ape read.FASTA as.DNAbin
|
||||
#' @export
|
||||
load_fasta <- function(msa, keep.singletons = FALSE) {
|
||||
load_fasta <- function(msa, keep_singletons = FALSE, output_numbers = TRUE) {
|
||||
|
||||
# Check inputs
|
||||
if (is(msa, "character")) {
|
||||
|
|
@ -28,7 +29,9 @@ load_fasta <- function(msa, keep.singletons = FALSE) {
|
|||
} else {
|
||||
stop("incorrect input for msa!")
|
||||
}
|
||||
if (!is.logical(keep.singletons)) stop("Invalid keep.singletons! Must be on of TRUE/FALSE.")
|
||||
if (!is.logical(keep_singletons)) {
|
||||
stop("Invalid keep_singletons! Must be one of TRUE/FALSE.")
|
||||
}
|
||||
|
||||
# Load sequences using ape. This does a lot of the checking for us.
|
||||
seq_names <- labels(seqs)
|
||||
|
|
@ -46,8 +49,8 @@ load_fasta <- function(msa, keep.singletons = FALSE) {
|
|||
conserved <- colSums(t(t(seqs) == seqs[1, ])) == nrow(seqs)
|
||||
seqs <- seqs[, !conserved]
|
||||
|
||||
if (!keep.singletons) {
|
||||
# remove singletons as they are uninformative in the algorithm
|
||||
if (!keep_singletons) {
|
||||
# remove_singletons as they are uninformative in the algorithm
|
||||
is_singleton <- apply(seqs, 2, function(x) {
|
||||
tab <- table(x)
|
||||
return(x %in% names(tab)[tab == 1])
|
||||
|
|
@ -58,5 +61,11 @@ load_fasta <- function(msa, keep.singletons = FALSE) {
|
|||
# Convert gaps and unknowns to same symbol
|
||||
seqs[seqs == "n"] <- "-"
|
||||
|
||||
# Replace letters with numbers, dashes with zeros
|
||||
if (output_numbers) {
|
||||
seqs <- matrix(match(seqs, c("a", "c", "g", "t")), nrow(seqs))
|
||||
seqs[is.na(seqs)] <- 0
|
||||
}
|
||||
|
||||
return(seqs)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,7 +2,6 @@
|
|||
#' @description Returns a string representation of a logml
|
||||
#' @param logml input Logml
|
||||
#' @return String version of logml
|
||||
#' @export
|
||||
logml2String <- function(logml) {
|
||||
# Palauttaa logml:n string-esityksen.
|
||||
mjono <- " "
|
||||
|
|
|
|||
|
|
@ -2,7 +2,6 @@
|
|||
#' @description Reads GenePop-formatted data
|
||||
#' @param tiedostonNimi Name of the file
|
||||
#' @return list containing data and popnames
|
||||
#' @export
|
||||
lueGenePopData <- function(tiedostonNimi) {
|
||||
fid <- readLines(tiedostonNimi)
|
||||
line <- fid[1] # ensimmäinen rivi
|
||||
|
|
|
|||
|
|
@ -3,7 +3,6 @@
|
|||
#' group. popnames are as before.
|
||||
#' @param tiedostonNimi Name of the file
|
||||
#' @return List containing data and popnames
|
||||
#' @export
|
||||
lueGenePopDataPop <- function(tiedostonNimi) {
|
||||
# Data annetaan muodossa, jossa viimeinen sarake kertoo ryhmän.
|
||||
# popnames on kuten ennenkin.
|
||||
|
|
|
|||
|
|
@ -2,7 +2,6 @@
|
|||
#' @description Returns the part of the line from the beginning that is before the comma. Useful for returning the name of a GenePop area
|
||||
#' @param line line
|
||||
#' @return nimi
|
||||
#' @export
|
||||
lueNimi <- function(line) {
|
||||
# ==========================================================================
|
||||
# Validation
|
||||
|
|
|
|||
|
|
@ -5,7 +5,6 @@
|
|||
#' @return puredata: a data contains no index column.
|
||||
#' @param data data
|
||||
#' @param noalle noalle
|
||||
#' @export
|
||||
noIndex <- function(data, noalle) {
|
||||
limit <- ifelse(is(noalle, "matrix"), ncol(noalle), length(noalle))
|
||||
if (size(data, 2) == limit + 1) {
|
||||
|
|
|
|||
|
|
@ -2,7 +2,6 @@
|
|||
#' @description Converts numbers to strings
|
||||
#' @param number number
|
||||
#' @note On Matlab, if number is NaN the output is 'NaN'. Here, the output will be an error. Also, the function belo expects "number" to have length one, whereas Matlab accepts vectors.
|
||||
#' @export
|
||||
ownNum2Str <- function(number) {
|
||||
absolute <- abs(number)
|
||||
if (absolute < 1000) {
|
||||
|
|
|
|||
|
|
@ -5,7 +5,6 @@
|
|||
#' @param npops npops
|
||||
#' @param rowsFromInd rowsFromInd
|
||||
#' @param alaraja alaraja
|
||||
#' @export
|
||||
poistaLiianPienet <- function(npops, rowsFromInd, alaraja) {
|
||||
popSize <- zeros(1, npops)
|
||||
if (npops > 0) {
|
||||
|
|
|
|||
|
|
@ -1,13 +1,13 @@
|
|||
poistaTyhjatPopulaatiot <- function(npops) {
|
||||
# % Poistaa tyhjentyneet populaatiot COUNTS:ista ja
|
||||
# % SUMCOUNTS:ista. P<>ivitt<74><74> npops:in ja PARTITION:in.
|
||||
notEmpty <- matlab2r::find(any(SUMCOUNTS, 2))
|
||||
notEmpty <- matlab2r::find(apply(SUMCOUNTS, 1, function(x) any(x > 0)))
|
||||
COUNTS <- COUNTS[, , notEmpty]
|
||||
SUMCOUNTS <- SUMCOUNTS[notEmpty, ]
|
||||
LOGDIFF <- LOGDIFF[, notEmpty]
|
||||
|
||||
for (n in 1:length(notEmpty)) {
|
||||
apu <- matlab2r::find(PARTITION == notEmpty(n))
|
||||
apu <- matlab2r::find(PARTITION == notEmpty[n])
|
||||
PARTITION[apu] <- n
|
||||
}
|
||||
npops <- length(notEmpty)
|
||||
|
|
|
|||
|
|
@ -3,7 +3,6 @@
|
|||
#' @return a 4-mark presentation of proportion
|
||||
#' @note The `round` function in R, being ISO-compliant, rounds 8.5 to 8. The
|
||||
#' Matlab equivalent rounds it to 9.
|
||||
#' @export
|
||||
proportion2str <- function(prob) {
|
||||
if (abs(prob) < 1e-3) {
|
||||
str <- "0.00"
|
||||
|
|
|
|||
|
|
@ -1,10 +1,9 @@
|
|||
#' @title Generates random numbers
|
||||
#' @return vector of length `nc` with r.v. realizations from Gamma(rate=1)
|
||||
#' @examples randdir(matrix(c(10, 30, 60), 3), 3)
|
||||
#' @examples rBAPS:::randdir(matrix(c(10, 30, 60), 3), 3)
|
||||
#' @param counts shape parameter
|
||||
#' @param nc number of rows on output
|
||||
#' @seealso randga
|
||||
#' @export
|
||||
randdir <- function(counts, nc) {
|
||||
svar <- zeros(nc, 1)
|
||||
for (i in 1:nc) {
|
||||
|
|
|
|||
|
|
@ -2,7 +2,6 @@
|
|||
#' @param line line number
|
||||
#' @return count
|
||||
#' @description Returns the number of queues contained in the line. There must be a space between the queues.
|
||||
#' @export
|
||||
rivinSisaltamienMjonojenLkm <- function(line) {
|
||||
# Palauttaa line:n sis<69>lt<6C>mien mjonojen lukum<75><6D>r<EFBFBD>n.
|
||||
# Mjonojen v<>liss?t<>ytyy olla v<>lily<6C>nti.
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
#' @title Find out the Digit Format
|
||||
#' @param line the first line after the "pop" word from data in Genepop format. # @note Function clarified based on the line format whether the alleles of the data are given using 2 or 3 numbers.
|
||||
#' @return df
|
||||
#' @export
|
||||
selvitaDigitFormat <- function(line) {
|
||||
# line on ensimm<6D>inen pop-sanan j<>lkeinen rivi
|
||||
# Genepop-formaatissa olevasta datasta. funktio selvitt<74><74>
|
||||
|
|
|
|||
|
|
@ -2,8 +2,6 @@
|
|||
#' @description Lisää jokaista alleelia joka populaation joka lokukseen j1/noalle(j) verran. Näin saatuja counts:eja vastaavista Dirichlet-jakaumista simuloidaan arvot populaatioiden alleelifrekvensseille.
|
||||
#' Add each allele to each locus in each population by j 1 / noalle(j). The Dirichlet distributions corresponding to the counts thus obtained simulate values for the allele frequencies of the populations.
|
||||
#' @param noalle noalle
|
||||
#' @export
|
||||
|
||||
simulateAllFreqs <- function(noalle) {
|
||||
if (isGlobalEmpty(COUNTS)) {
|
||||
max_noalle <- 0
|
||||
|
|
|
|||
|
|
@ -6,8 +6,6 @@
|
|||
#' @param allfreqs allfreqs
|
||||
#' @param pop pop
|
||||
#' @param missing_level missing_level
|
||||
#' @export
|
||||
|
||||
simulateIndividuals <- function(n, rowsFromInd, allfreqs, pop, missing_level) {
|
||||
nloci <- size(allfreqs, 2)
|
||||
|
||||
|
|
|
|||
|
|
@ -5,8 +5,6 @@
|
|||
#' @param allfreqs allfreqa
|
||||
#' @param pop pop
|
||||
#' @param loc loc
|
||||
#' @export
|
||||
|
||||
simuloiAlleeli <- function(allfreqs, pop, loc) {
|
||||
if (length(dim(allfreqs)) == 0) {
|
||||
freqs <- 1
|
||||
|
|
|
|||
|
|
@ -3,7 +3,6 @@
|
|||
#' @param osuusTaulu Percentage table?
|
||||
#' @param osuus percentage?
|
||||
#' @param indeksi index
|
||||
#' @export
|
||||
suoritaMuutos <- function(osuusTaulu, osuus, indeksi) {
|
||||
if (isGlobalEmpty(COUNTS)) {
|
||||
npops <- 1
|
||||
|
|
|
|||
|
|
@ -3,7 +3,6 @@
|
|||
#' @param description description
|
||||
#' @param width width
|
||||
#' @return newline
|
||||
#' @export
|
||||
takeLine <- function(description, width) {
|
||||
# Returns one line from the description: line ends to the first
|
||||
# space after width:th mark.
|
||||
|
|
|
|||
|
|
@ -3,7 +3,6 @@
|
|||
#' @param coordinates coordinates
|
||||
#' @param interactive prompt user for relevant questions during execution
|
||||
#' @return a list of defectives ("viallinen") and coordinates
|
||||
#' @export
|
||||
testaaKoordinaatit <- function(ninds, coordinates, interactive = TRUE) {
|
||||
# Testaa onko koordinaatit kunnollisia.
|
||||
# modified by Lu Cheng, 05.12.2012
|
||||
|
|
|
|||
|
|
@ -2,7 +2,6 @@
|
|||
#' @description Test if loaded BAPS data is proper
|
||||
#' @param data dataset
|
||||
#' @return ninds
|
||||
#' @export
|
||||
testaaOnkoKunnollinenBapsData <- function(data) {
|
||||
# Tarkastaa onko viimeisess?sarakkeessa kaikki
|
||||
# luvut 1,2,...,n johonkin n:<3A><>n asti.
|
||||
|
|
|
|||
|
|
@ -3,7 +3,6 @@
|
|||
#' @param rivi Line
|
||||
#' @return pal = 1 if the line starts with one of the following
|
||||
# letter combinations: Pop, pop, POP. In all others cases, pal = 0
|
||||
#' @export
|
||||
testaaPop <- function(rivi) {
|
||||
# pal=1, mik<69>li rivi alkaa jollain seuraavista
|
||||
# kirjainyhdistelmist? Pop, pop, POP. Kaikissa muissa
|
||||
|
|
|
|||
|
|
@ -10,7 +10,6 @@
|
|||
#' @param partitionSummary partitionSummary
|
||||
#' @param popnames popnames
|
||||
#' @param fixedK fixedK
|
||||
#' @export
|
||||
writeMixtureInfo <- function(
|
||||
logml, rowsFromInd, data, adjprior, priorTerm, outPutFile, inputFile,
|
||||
partitionSummary, popnames, fixedK
|
||||
|
|
@ -27,17 +26,18 @@ writeMixtureInfo <- function(
|
|||
fid <- load(outPutFile)
|
||||
} else {
|
||||
fid <- -1
|
||||
# TODO: replace sink with option that will record input and output
|
||||
sink("baps4_output.baps", split = TRUE) # save in text anyway.
|
||||
outPutFile <- file.path(tempdir(), "baps4_output.baps")
|
||||
message("Output saved to", outPutFile)
|
||||
sink(outPutFile, split = TRUE) # save in text anyway.
|
||||
}
|
||||
|
||||
dispLine()
|
||||
cat("RESULTS OF INDIVIDUAL LEVEL MIXTURE ANALYSIS:")
|
||||
cat(c("Data file: ", inputFile))
|
||||
cat("Model: independent")
|
||||
cat(c("Number of clustered individuals: ", ownNum2Str(ninds)))
|
||||
cat(c("Number of groups in optimal partition: ", ownNum2Str(npops)))
|
||||
cat(c("Log(marginal likelihood) of optimal partition: ", ownNum2Str(logml)))
|
||||
cat("RESULTS OF INDIVIDUAL LEVEL MIXTURE ANALYSIS:\n")
|
||||
cat("Data file: ", inputFile, "\n")
|
||||
cat("Model: independent\n")
|
||||
cat("Number of clustered individuals: ", ownNum2Str(ninds), "\n")
|
||||
cat("Number of groups in optimal partition: ", ownNum2Str(npops), "\n")
|
||||
cat("Log(marginal likelihood) of optimal partition: ", ownNum2Str(logml), "\n")
|
||||
cat(" ")
|
||||
if (fid != -1) {
|
||||
append(fid, "RESULTS OF INDIVIDUAL LEVEL MIXTURE ANALYSIS:\n")
|
||||
|
|
@ -88,10 +88,10 @@ writeMixtureInfo <- function(
|
|||
"Cluster ", as.character(m), ": {", as.character(indsInM[1])
|
||||
)
|
||||
for (k in 2:cluster_size) {
|
||||
text <- c(text, ", ", as.character(indsInM[k]))
|
||||
text <- c(text, ",", as.character(indsInM[k]))
|
||||
}
|
||||
}
|
||||
text <- c(text, "}")
|
||||
text <- c(text, "}\n")
|
||||
while (length(text) > 58) {
|
||||
# Take one line and display it.
|
||||
new_line <- takeLine(text, 58)
|
||||
|
|
@ -107,7 +107,7 @@ writeMixtureInfo <- function(
|
|||
text <- ""
|
||||
}
|
||||
}
|
||||
if (text != "") {
|
||||
if (any(text != "")) {
|
||||
cat(text)
|
||||
if (fid != -1) {
|
||||
append(fid, text)
|
||||
|
|
@ -117,11 +117,11 @@ writeMixtureInfo <- function(
|
|||
}
|
||||
|
||||
if (npops > 1) {
|
||||
cat(" ")
|
||||
cat(" ")
|
||||
cat("\n")
|
||||
cat("\n")
|
||||
cat(
|
||||
"Changes in log(marginal likelihood)",
|
||||
" if indvidual i is moved to group j:"
|
||||
" if indvidual i is moved to group j:\n"
|
||||
)
|
||||
if (fid != -1) {
|
||||
append(fid, " ")
|
||||
|
|
@ -132,7 +132,7 @@ writeMixtureInfo <- function(
|
|||
fid,
|
||||
c(
|
||||
"Changes in log(marginal likelihood)",
|
||||
"if indvidual i is moved to group j:"
|
||||
"if indvidual i is moved to group j:\n"
|
||||
)
|
||||
)
|
||||
append(fid, "\n")
|
||||
|
|
@ -168,9 +168,9 @@ writeMixtureInfo <- function(
|
|||
|
||||
if (names) {
|
||||
nimi <- as.character(popnames[ind])
|
||||
rivi <- c(blanks(maxSize - length(nimi)), nimi, ":")
|
||||
rivi <- c(blanks(maxSize - length(nimi)), nimi, ":\n")
|
||||
} else {
|
||||
rivi <- c(blanks(4 - floor(log10(ind))), ownNum2Str(ind), ":")
|
||||
rivi <- c("\n", blanks(4 - floor(log10(ind))), ownNum2Str(ind), ":\n")
|
||||
}
|
||||
for (j in 1:npops) {
|
||||
rivi <- c(rivi, " ", logml2String(omaRound(muutokset[j])))
|
||||
|
|
@ -182,9 +182,9 @@ writeMixtureInfo <- function(
|
|||
}
|
||||
}
|
||||
|
||||
cat(" ")
|
||||
cat(" ")
|
||||
cat("KL-divergence matrix in PHYLIP format:")
|
||||
cat("\n")
|
||||
cat("\n")
|
||||
cat("KL-divergence matrix in PHYLIP format:\n")
|
||||
|
||||
dist_mat <- zeros(npops, npops)
|
||||
if (fid != -1) {
|
||||
|
|
@ -194,6 +194,7 @@ writeMixtureInfo <- function(
|
|||
append(fid, "\n")
|
||||
}
|
||||
|
||||
COUNTS <- COUNTS[seq_len(nrow(adjprior)), seq_len(ncol(adjprior)), , drop = FALSE]
|
||||
maxnoalle <- size(COUNTS, 1)
|
||||
nloci <- size(COUNTS, 2)
|
||||
d <- zeros(maxnoalle, nloci, npops)
|
||||
|
|
@ -205,8 +206,8 @@ writeMixtureInfo <- function(
|
|||
|
||||
prior[1, nollia] <- 1
|
||||
for (pop1 in 1:npops) {
|
||||
d[, , pop1] <- (squeeze(COUNTS[, , pop1]) + prior) /
|
||||
repmat(sum(squeeze(COUNTS[, , pop1]) + prior), c(maxnoalle, 1))
|
||||
squeezed_COUNTS_prior <- squeeze(COUNTS[, , pop1]) + prior
|
||||
d[, , pop1] <- squeezed_COUNTS_prior / sum(squeezed_COUNTS_prior)
|
||||
}
|
||||
ekarivi <- as.character(npops)
|
||||
cat(ekarivi)
|
||||
|
|
@ -216,14 +217,14 @@ writeMixtureInfo <- function(
|
|||
}
|
||||
|
||||
for (pop1 in 1:npops) {
|
||||
for (pop2 in 1:(pop1 - 1)) {
|
||||
for (pop2 in seq_len(pop1 - 1)) {
|
||||
dist1 <- d[, , pop1]
|
||||
dist2 <- d[, , pop2]
|
||||
div12 <- sum(
|
||||
sum(dist1 * log2((dist1 + 10^-10) / (dist2 + 10^-10)))
|
||||
sum(dist1 * base::log2((dist1 + 10^-10) / (dist2 + 10^-10)))
|
||||
) / nloci
|
||||
div21 <- sum(
|
||||
sum(dist2 * log2((dist2 + 10^-10) / (dist1 + 10^-10)))
|
||||
sum(dist2 * base::log2((dist2 + 10^-10) / (dist1 + 10^-10)))
|
||||
) / nloci
|
||||
div <- (div12 + div21) / 2
|
||||
dist_mat[pop1, pop2] <- div
|
||||
|
|
@ -233,9 +234,9 @@ writeMixtureInfo <- function(
|
|||
|
||||
dist_mat <- dist_mat + t(dist_mat) # make it symmetric
|
||||
for (pop1 in 1:npops) {
|
||||
rivi <- c("Cluster_", as.character(pop1), " ")
|
||||
rivi <- c("\nCluster_", as.character(pop1), "\n")
|
||||
for (pop2 in 1:npops) {
|
||||
rivi <- c(rivi, kldiv2str(dist_mat[pop1, pop2]), " ")
|
||||
rivi <- c(rivi, kldiv2str(dist_mat[pop1, pop2]))
|
||||
}
|
||||
cat(rivi)
|
||||
if (fid != -1) {
|
||||
|
|
@ -245,11 +246,11 @@ writeMixtureInfo <- function(
|
|||
}
|
||||
}
|
||||
|
||||
cat(" ")
|
||||
cat(" ")
|
||||
cat("\n")
|
||||
cat("\n")
|
||||
cat(
|
||||
"List of sizes of 10 best visited partitions",
|
||||
"and corresponding log(ml) values"
|
||||
"and corresponding log(ml) values\n"
|
||||
)
|
||||
|
||||
if (fid != -1) {
|
||||
|
|
@ -279,7 +280,7 @@ writeMixtureInfo <- function(
|
|||
line <- c(
|
||||
as.character(partitionSummary[part, 1]),
|
||||
" ",
|
||||
as.character(partitionSummary(part, 2))
|
||||
as.character(partitionSummary[part, 2])
|
||||
)
|
||||
cat(line)
|
||||
if (fid != -1) {
|
||||
|
|
@ -289,9 +290,9 @@ writeMixtureInfo <- function(
|
|||
}
|
||||
|
||||
if (!fixedK) {
|
||||
cat(" ")
|
||||
cat(" ")
|
||||
cat("Probabilities for number of clusters")
|
||||
cat("\n")
|
||||
cat("\n")
|
||||
cat("Probabilities for number of clusters\n")
|
||||
|
||||
if (fid != -1) {
|
||||
append(fid, " ")
|
||||
|
|
@ -323,7 +324,7 @@ writeMixtureInfo <- function(
|
|||
line <- c(
|
||||
as.character(npopsTaulu[i]), " ", as.character(probs[i])
|
||||
)
|
||||
cat(line)
|
||||
cat(line, "\n")
|
||||
if (fid != -1) {
|
||||
append(fid, line)
|
||||
append(fid, "\n")
|
||||
|
|
@ -331,5 +332,9 @@ writeMixtureInfo <- function(
|
|||
}
|
||||
}
|
||||
}
|
||||
# Closing sink(s)
|
||||
while (sink.number() > 0L) {
|
||||
sink()
|
||||
}
|
||||
return(changesInLogml)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -10,7 +10,6 @@
|
|||
#' @param partitionSummary partitionSummary
|
||||
#' @param popnames popnames
|
||||
#' @param fixedK fixedK
|
||||
#' @export
|
||||
writeMixtureInfoPop <- function(logml, rows, data, adjprior, priorTerm,
|
||||
outPutFile, inputFile, partitionSummary,
|
||||
popnames, fixedK) {
|
||||
|
|
|
|||
|
|
@ -1,10 +1,10 @@
|
|||
-9 102 56 80 100 90 118 90 88 104 1
|
||||
-9 102 54 82 102 92 116 90 86 104 1
|
||||
88 104 56 84 102 -9 120 90 88 100 2
|
||||
86 102 56 80 102 -9 116 90 86 100 2
|
||||
88 102 54 80 102 90 116 92 -9 100 3
|
||||
88 102 56 80 100 90 118 90 -9 104 3
|
||||
80 102 54 82 102 92 116 90 86 104 4
|
||||
88 104 56 84 102 92 120 90 88 100 4
|
||||
86 102 56 80 -9 90 116 90 86 100 5
|
||||
88 102 54 80 -9 90 116 92 86 100 5
|
||||
-9 102 56 80 100 90 118 90 88 104 1
|
||||
-9 102 54 82 102 92 116 90 86 104 1
|
||||
88 104 56 84 102 -9 120 90 88 100 2
|
||||
86 102 56 80 102 -9 116 90 86 100 2
|
||||
88 102 54 80 102 90 116 92 -9 100 3
|
||||
88 102 56 80 100 90 118 90 -9 104 3
|
||||
80 102 54 82 102 92 116 90 86 104 4
|
||||
88 104 56 84 102 92 120 90 88 100 4
|
||||
86 102 56 80 -9 90 116 90 86 100 5
|
||||
88 102 54 80 -9 90 116 92 86 100 5
|
||||
|
|
@ -1,20 +1,20 @@
|
|||
>1
|
||||
AACGAAACGATCGCGTCACCGGAACGTTGTCCGTCTCGAATAGCACTGTGGGAACGTGTTTTACATTCGT
|
||||
TAGTAACATGGTCAGCTGCTCATCCGTATT
|
||||
|
||||
>2
|
||||
ATCAGCAAACGAGAAGTTGCAGAGGTCTTTGGTTTGAGCATTGCCCCCATACAATCGACTTCTGGCCTGG
|
||||
AATGCACCACAAACATACCCCACAGGCTCG
|
||||
|
||||
>3
|
||||
GCTTTTACTAAGGCCTATCGGATTCAACGTCACTAAGACTCGGCACTAACAGGCCGTTGTAAGCCGCTCT
|
||||
GTCTGAGTATGGATGGTGGAGGCGGAGCCG
|
||||
|
||||
>4
|
||||
ACCTGGACCTCTGTATTAACGGCTGTGATTCTGAGGGGGGTATCGCAGCGCACTTTCTAGCTATATCACG
|
||||
CAAGGATAAAGTTCACCCATCACGTTGACC
|
||||
|
||||
>5
|
||||
ACAATACGTCATCCACACCGCGCCTATGGAAGAATTTGCCCTTTCGGCGACAGCCCATGCTGTCAAGGAG
|
||||
GTAACATAGCTACCAGGTCCCATTCCAGGA
|
||||
|
||||
>1
|
||||
AACGAAACGATCGCGTCACCGGAACGTTGTCCGTCTCGAATAGCACTGTGGGAACGTGTTTTACATTCGT
|
||||
TAGTAACATGGTCAGCTGCTCATCCGTATT
|
||||
|
||||
>2
|
||||
ATCAGCAAACGAGAAGTTGCAGAGGTCTTTGGTTTGAGCATTGCCCCCATACAATCGACTTCTGGCCTGG
|
||||
AATGCACCACAAACATACCCCACAGGCTCG
|
||||
|
||||
>3
|
||||
GCTTTTACTAAGGCCTATCGGATTCAACGTCACTAAGACTCGGCACTAACAGGCCGTTGTAAGCCGCTCT
|
||||
GTCTGAGTATGGATGGTGGAGGCGGAGCCG
|
||||
|
||||
>4
|
||||
ACCTGGACCTCTGTATTAACGGCTGTGATTCTGAGGGGGGTATCGCAGCGCACTTTCTAGCTATATCACG
|
||||
CAAGGATAAAGTTCACCCATCACGTTGACC
|
||||
|
||||
>5
|
||||
ACAATACGTCATCCACACCGCGCCTATGGAAGAATTTGCCCTTTCGGCGACAGCCCATGCTGTCAAGGAG
|
||||
GTAACATAGCTACCAGGTCCCATTCCAGGA
|
||||
|
||||
44290
inst/ext/seqs.fa → inst/extdata/seqs.fa
vendored
44290
inst/ext/seqs.fa → inst/extdata/seqs.fa
vendored
File diff suppressed because it is too large
Load diff
|
|
@ -1,108 +1,108 @@
|
|||
Baseline data for trained clustering
|
||||
1
|
||||
2
|
||||
3
|
||||
4
|
||||
5
|
||||
6
|
||||
7
|
||||
8
|
||||
9
|
||||
10
|
||||
POP
|
||||
Area1_1, 1717 1111 0000 0404 0505 0606 0707 1212 0707 1212
|
||||
Area1_2, 0909 1010 0505 0404 0505 1010 0707 1111 0707 1212
|
||||
Area1_3, 0000 1010 1414 0404 0505 0606 1111 1212 0707 1212
|
||||
Area1_4, 1111 1010 1515 0404 0101 0606 1313 1111 0707 0808
|
||||
Area1_5, 0101 1010 1212 0404 0505 0606 0909 1212 0707 1212
|
||||
Area1_6, 1111 1010 1414 0404 0505 0606 1111 1212 0707 1010
|
||||
Area1_7, 1515 1010 0707 0404 0505 0606 0707 1212 0707 1212
|
||||
Area1_8, 1515 1212 0606 0404 0505 0606 1616 1212 0707 1010
|
||||
Area1_9, 0909 1010 1010 0404 0505 0606 1919 1212 0707 1010
|
||||
Area1_10, 0000 0000 0000 0404 0505 0606 1111 1010 0707 0000
|
||||
Area1_11, 0000 0000 0000 0404 0505 0505 1919 1212 0404 1111
|
||||
Area1_12, 1515 1010 0606 0404 0505 0606 1616 1212 0202 1212
|
||||
Area1_13, 1515 1010 1515 0404 0505 0606 0707 0606 0707 1212
|
||||
Area1_14, 0505 1010 1919 0404 0202 0606 1111 1010 0202 1212
|
||||
Area1_15, 1111 1010 1414 0404 0505 0606 1616 0606 0707 1212
|
||||
Area1_16, 1515 1212 0000 0404 0505 0606 1919 1212 0707 1212
|
||||
Area1_17, 1515 1010 1515 0404 0505 0606 1919 1212 0707 1212
|
||||
Area1_18, 1515 1010 1212 0404 0505 0606 0909 1212 0707 1010
|
||||
Area1_19, 1515 1212 0606 0404 0505 0606 1515 1212 0707 1212
|
||||
Area1_20, 1717 1414 1717 0404 0505 0606 0808 1212 0707 0808
|
||||
Area1_21, 1515 1212 1212 0404 0505 0606 0808 1818 0707 1212
|
||||
Area1_22, 1313 1111 1212 0404 0505 0606 1313 1212 0707 1212
|
||||
Area1_23, 1515 1212 1212 0404 0505 0606 1313 1010 0707 1212
|
||||
Area1_24, 0000 0000 0000 0404 0505 0606 1919 1212 0404 0000
|
||||
Area1_25, 0000 0000 0000 0404 0202 0101 1919 1919 0707 1212
|
||||
Area1_26, 1313 1010 0202 0404 0505 0606 1717 1212 0707 1212
|
||||
POP
|
||||
Area2_1, 1706 0505 1212 0404 0606 0808 1309 1111 0909 1413
|
||||
Area2_2, 1511 0707 1209 0404 0602 0808 1111 1004 0902 1310
|
||||
Area2_3, 1711 0000 1412 0402 0000 0000 1913 1002 0000 1313
|
||||
Area2_4, 1715 1515 1412 0404 0505 0604 1313 1010 1007 1309
|
||||
Area2_5, 1515 1515 1412 0404 0505 0606 1515 1111 0707 0908
|
||||
Area2_6, 1309 0000 1212 0404 0603 0802 1311 1204 0903 1309
|
||||
Area2_7, 1313 0505 1212 0404 0606 0808 1616 1414 0909 1206
|
||||
Area2_8, 1307 0707 1212 0404 0606 0808 1313 1004 0909 1313
|
||||
Area2_9, 0000 1212 1209 0404 0505 0606 0000 1106 0707 1208
|
||||
Area2_10, 1307 0000 1212 0404 0606 0806 1313 1005 0909 1111
|
||||
Area2_11, 0000 0707 1409 0505 0606 0808 1616 1110 0909 1309
|
||||
Area2_12, 1807 0000 1212 0505 0606 0000 0909 0401 0909 1309
|
||||
Area2_13, 1511 1212 1212 0404 0505 0604 1313 1110 0707 1210
|
||||
Area2_14, 1111 1515 1412 0505 0606 0808 1414 1004 0909 1313
|
||||
Area2_15, 1817 0707 1212 0505 0707 0909 1111 1004 0909 1313
|
||||
Area2_16, 1913 1511 1212 0404 0606 0000 0909 1212 0909 1313
|
||||
Area2_17, 1515 0000 0000 0000 0505 1006 1313 1005 0707 1212
|
||||
Area2_18, 0707 0606 1408 0404 0202 0101 1313 1615 0202 1307
|
||||
Area2_19, 0707 1309 0909 0502 0202 0101 2009 1510 0202 0704
|
||||
POP
|
||||
Area3_1, 1507 0706 1212 0202 0202 0000 0905 1409 0202 0707
|
||||
Area3_2, 1507 1313 1212 0202 0202 0101 1613 1510 0202 0807
|
||||
Area3_3, 1313 1414 1212 0404 0202 0101 1909 1510 0202 0704
|
||||
Area3_4, 1515 0909 1212 0502 0202 0101 1409 1210 0202 0807
|
||||
Area3_5, 1515 0808 1212 0502 0202 0101 1111 1510 0202 1007
|
||||
Area3_6, 1306 0909 1212 0202 0202 0101 0807 1512 0202 0707
|
||||
Area3_7, 0000 1009 1212 0404 0202 0101 1109 0702 0202 0808
|
||||
Area3_8, 1507 0606 1212 0404 0202 0101 1908 1409 0202 0707
|
||||
Area3_9, 1515 0606 1212 0202 0202 0101 0909 1510 0202 1207
|
||||
Area3_10, 1307 1010 1412 0202 0202 0101 1709 1615 0202 1207
|
||||
Area3_11, 1307 1005 1212 0404 0202 0101 1709 1510 0202 0703
|
||||
Area3_12, 1109 0902 1212 0404 0202 0101 0909 1002 0202 1207
|
||||
Area3_13, 1307 0606 1412 0404 0202 0101 0807 1515 0202 1207
|
||||
Area3_14, 1717 1407 1212 0404 0202 0101 1107 1409 0202 0805
|
||||
Area3_15, 1307 1007 1412 0404 0505 0101 0909 0000 0202 0807
|
||||
Area3_16, 1811 0000 1212 0404 0505 0000 1515 0707 0000 1212
|
||||
Area3_17, 1907 1414 1512 0402 0705 0000 0000 0000 0909 1212
|
||||
POP
|
||||
Area4_1, 1311 2019 1212 0404 0505 0000 1919 0707 0707 1109
|
||||
Area4_2, 1309 2018 1512 0404 0505 0000 1919 0808 0707 1111
|
||||
Area4_3, 1509 2118 1212 0404 0505 0000 1515 0707 0707 1107
|
||||
Area4_4, 1715 2221 1512 0404 0505 0000 1919 0707 0707 1111
|
||||
Area4_5, 1515 2121 1512 0404 0505 0000 1515 0707 0606 0707
|
||||
Area4_6, 1717 2222 1512 0404 0505 0000 1913 0707 0707 0907
|
||||
Area4_7, 1715 2221 1512 0404 0505 0000 1313 0707 0606 1111
|
||||
Area4_8, 1813 2320 1512 0404 0505 0000 1515 0707 0707 1107
|
||||
Area4_9, 1311 2019 1512 0404 0505 0000 1515 0707 0707 1111
|
||||
Area4_10, 1311 2019 1512 0000 0505 0000 1313 0707 0707 0909
|
||||
Area4_11, 1111 2625 0000 0000 0505 0606 0707 0903 0505 0505
|
||||
Area4_12, 0907 2724 0000 0404 0505 0605 1914 1105 0505 0707
|
||||
Area4_13, 1511 1610 1212 0404 0505 0605 1513 1105 1107 0704
|
||||
Area4_14, 1513 0404 1111 0404 0505 0606 1515 1111 1207 1107
|
||||
Area4_15, 1311 1616 1212 0404 0505 0606 1313 1111 0707 1107
|
||||
Area4_16, 1109 0606 1212 0404 0000 0000 1515 0902 0807 0505
|
||||
Area4_17, 1107 1004 1506 0404 0505 0606 1515 1212 0707 1108
|
||||
Area4_18, 1107 0904 1512 0404 0505 0606 1913 1105 1107 1107
|
||||
Area4_19, 1109 1313 1212 0404 0505 0606 1915 1111 1007 1107
|
||||
Area4_20, 1711 1604 1212 0404 0505 0606 1915 1212 1107 1007
|
||||
Area4_21, 1111 0606 1515 0404 0505 0606 1707 1009 0807 0502
|
||||
Area4_22, 1311 0603 1512 0404 0505 0606 1714 0707 0807 0501
|
||||
POP
|
||||
Area5_1, 0711 1212 1513 0202 0707 0808 1408 0000 0909 1210
|
||||
Area5_2, 1118 0101 1212 0202 0803 0901 0808 0000 1101 1211
|
||||
Area5_3, 1518 0000 1512 0404 0707 0806 0909 0000 0909 1212
|
||||
Area5_4, 1309 0000 1512 0202 0606 0707 1508 0000 0808 1204
|
||||
Area5_5, 0718 0000 1512 0402 0707 0806 0707 0000 0909 1208
|
||||
Area5_6, 1818 1414 1212 0404 0707 0000 1916 0000 0909 1208
|
||||
Area5_7, 1318 1313 1212 0404 0606 0000 1908 0000 0808 1008
|
||||
Area5_8, 1818 0000 1212 0404 0000 0806 1616 0000 0808 1212
|
||||
Baseline data for trained clustering
|
||||
1
|
||||
2
|
||||
3
|
||||
4
|
||||
5
|
||||
6
|
||||
7
|
||||
8
|
||||
9
|
||||
10
|
||||
POP
|
||||
Area1_1, 1717 1111 0000 0404 0505 0606 0707 1212 0707 1212
|
||||
Area1_2, 0909 1010 0505 0404 0505 1010 0707 1111 0707 1212
|
||||
Area1_3, 0000 1010 1414 0404 0505 0606 1111 1212 0707 1212
|
||||
Area1_4, 1111 1010 1515 0404 0101 0606 1313 1111 0707 0808
|
||||
Area1_5, 0101 1010 1212 0404 0505 0606 0909 1212 0707 1212
|
||||
Area1_6, 1111 1010 1414 0404 0505 0606 1111 1212 0707 1010
|
||||
Area1_7, 1515 1010 0707 0404 0505 0606 0707 1212 0707 1212
|
||||
Area1_8, 1515 1212 0606 0404 0505 0606 1616 1212 0707 1010
|
||||
Area1_9, 0909 1010 1010 0404 0505 0606 1919 1212 0707 1010
|
||||
Area1_10, 0000 0000 0000 0404 0505 0606 1111 1010 0707 0000
|
||||
Area1_11, 0000 0000 0000 0404 0505 0505 1919 1212 0404 1111
|
||||
Area1_12, 1515 1010 0606 0404 0505 0606 1616 1212 0202 1212
|
||||
Area1_13, 1515 1010 1515 0404 0505 0606 0707 0606 0707 1212
|
||||
Area1_14, 0505 1010 1919 0404 0202 0606 1111 1010 0202 1212
|
||||
Area1_15, 1111 1010 1414 0404 0505 0606 1616 0606 0707 1212
|
||||
Area1_16, 1515 1212 0000 0404 0505 0606 1919 1212 0707 1212
|
||||
Area1_17, 1515 1010 1515 0404 0505 0606 1919 1212 0707 1212
|
||||
Area1_18, 1515 1010 1212 0404 0505 0606 0909 1212 0707 1010
|
||||
Area1_19, 1515 1212 0606 0404 0505 0606 1515 1212 0707 1212
|
||||
Area1_20, 1717 1414 1717 0404 0505 0606 0808 1212 0707 0808
|
||||
Area1_21, 1515 1212 1212 0404 0505 0606 0808 1818 0707 1212
|
||||
Area1_22, 1313 1111 1212 0404 0505 0606 1313 1212 0707 1212
|
||||
Area1_23, 1515 1212 1212 0404 0505 0606 1313 1010 0707 1212
|
||||
Area1_24, 0000 0000 0000 0404 0505 0606 1919 1212 0404 0000
|
||||
Area1_25, 0000 0000 0000 0404 0202 0101 1919 1919 0707 1212
|
||||
Area1_26, 1313 1010 0202 0404 0505 0606 1717 1212 0707 1212
|
||||
POP
|
||||
Area2_1, 1706 0505 1212 0404 0606 0808 1309 1111 0909 1413
|
||||
Area2_2, 1511 0707 1209 0404 0602 0808 1111 1004 0902 1310
|
||||
Area2_3, 1711 0000 1412 0402 0000 0000 1913 1002 0000 1313
|
||||
Area2_4, 1715 1515 1412 0404 0505 0604 1313 1010 1007 1309
|
||||
Area2_5, 1515 1515 1412 0404 0505 0606 1515 1111 0707 0908
|
||||
Area2_6, 1309 0000 1212 0404 0603 0802 1311 1204 0903 1309
|
||||
Area2_7, 1313 0505 1212 0404 0606 0808 1616 1414 0909 1206
|
||||
Area2_8, 1307 0707 1212 0404 0606 0808 1313 1004 0909 1313
|
||||
Area2_9, 0000 1212 1209 0404 0505 0606 0000 1106 0707 1208
|
||||
Area2_10, 1307 0000 1212 0404 0606 0806 1313 1005 0909 1111
|
||||
Area2_11, 0000 0707 1409 0505 0606 0808 1616 1110 0909 1309
|
||||
Area2_12, 1807 0000 1212 0505 0606 0000 0909 0401 0909 1309
|
||||
Area2_13, 1511 1212 1212 0404 0505 0604 1313 1110 0707 1210
|
||||
Area2_14, 1111 1515 1412 0505 0606 0808 1414 1004 0909 1313
|
||||
Area2_15, 1817 0707 1212 0505 0707 0909 1111 1004 0909 1313
|
||||
Area2_16, 1913 1511 1212 0404 0606 0000 0909 1212 0909 1313
|
||||
Area2_17, 1515 0000 0000 0000 0505 1006 1313 1005 0707 1212
|
||||
Area2_18, 0707 0606 1408 0404 0202 0101 1313 1615 0202 1307
|
||||
Area2_19, 0707 1309 0909 0502 0202 0101 2009 1510 0202 0704
|
||||
POP
|
||||
Area3_1, 1507 0706 1212 0202 0202 0000 0905 1409 0202 0707
|
||||
Area3_2, 1507 1313 1212 0202 0202 0101 1613 1510 0202 0807
|
||||
Area3_3, 1313 1414 1212 0404 0202 0101 1909 1510 0202 0704
|
||||
Area3_4, 1515 0909 1212 0502 0202 0101 1409 1210 0202 0807
|
||||
Area3_5, 1515 0808 1212 0502 0202 0101 1111 1510 0202 1007
|
||||
Area3_6, 1306 0909 1212 0202 0202 0101 0807 1512 0202 0707
|
||||
Area3_7, 0000 1009 1212 0404 0202 0101 1109 0702 0202 0808
|
||||
Area3_8, 1507 0606 1212 0404 0202 0101 1908 1409 0202 0707
|
||||
Area3_9, 1515 0606 1212 0202 0202 0101 0909 1510 0202 1207
|
||||
Area3_10, 1307 1010 1412 0202 0202 0101 1709 1615 0202 1207
|
||||
Area3_11, 1307 1005 1212 0404 0202 0101 1709 1510 0202 0703
|
||||
Area3_12, 1109 0902 1212 0404 0202 0101 0909 1002 0202 1207
|
||||
Area3_13, 1307 0606 1412 0404 0202 0101 0807 1515 0202 1207
|
||||
Area3_14, 1717 1407 1212 0404 0202 0101 1107 1409 0202 0805
|
||||
Area3_15, 1307 1007 1412 0404 0505 0101 0909 0000 0202 0807
|
||||
Area3_16, 1811 0000 1212 0404 0505 0000 1515 0707 0000 1212
|
||||
Area3_17, 1907 1414 1512 0402 0705 0000 0000 0000 0909 1212
|
||||
POP
|
||||
Area4_1, 1311 2019 1212 0404 0505 0000 1919 0707 0707 1109
|
||||
Area4_2, 1309 2018 1512 0404 0505 0000 1919 0808 0707 1111
|
||||
Area4_3, 1509 2118 1212 0404 0505 0000 1515 0707 0707 1107
|
||||
Area4_4, 1715 2221 1512 0404 0505 0000 1919 0707 0707 1111
|
||||
Area4_5, 1515 2121 1512 0404 0505 0000 1515 0707 0606 0707
|
||||
Area4_6, 1717 2222 1512 0404 0505 0000 1913 0707 0707 0907
|
||||
Area4_7, 1715 2221 1512 0404 0505 0000 1313 0707 0606 1111
|
||||
Area4_8, 1813 2320 1512 0404 0505 0000 1515 0707 0707 1107
|
||||
Area4_9, 1311 2019 1512 0404 0505 0000 1515 0707 0707 1111
|
||||
Area4_10, 1311 2019 1512 0000 0505 0000 1313 0707 0707 0909
|
||||
Area4_11, 1111 2625 0000 0000 0505 0606 0707 0903 0505 0505
|
||||
Area4_12, 0907 2724 0000 0404 0505 0605 1914 1105 0505 0707
|
||||
Area4_13, 1511 1610 1212 0404 0505 0605 1513 1105 1107 0704
|
||||
Area4_14, 1513 0404 1111 0404 0505 0606 1515 1111 1207 1107
|
||||
Area4_15, 1311 1616 1212 0404 0505 0606 1313 1111 0707 1107
|
||||
Area4_16, 1109 0606 1212 0404 0000 0000 1515 0902 0807 0505
|
||||
Area4_17, 1107 1004 1506 0404 0505 0606 1515 1212 0707 1108
|
||||
Area4_18, 1107 0904 1512 0404 0505 0606 1913 1105 1107 1107
|
||||
Area4_19, 1109 1313 1212 0404 0505 0606 1915 1111 1007 1107
|
||||
Area4_20, 1711 1604 1212 0404 0505 0606 1915 1212 1107 1007
|
||||
Area4_21, 1111 0606 1515 0404 0505 0606 1707 1009 0807 0502
|
||||
Area4_22, 1311 0603 1512 0404 0505 0606 1714 0707 0807 0501
|
||||
POP
|
||||
Area5_1, 0711 1212 1513 0202 0707 0808 1408 0000 0909 1210
|
||||
Area5_2, 1118 0101 1212 0202 0803 0901 0808 0000 1101 1211
|
||||
Area5_3, 1518 0000 1512 0404 0707 0806 0909 0000 0909 1212
|
||||
Area5_4, 1309 0000 1512 0202 0606 0707 1508 0000 0808 1204
|
||||
Area5_5, 0718 0000 1512 0402 0707 0806 0707 0000 0909 1208
|
||||
Area5_6, 1818 1414 1212 0404 0707 0000 1916 0000 0909 1208
|
||||
Area5_7, 1318 1313 1212 0404 0606 0000 1908 0000 0808 1008
|
||||
Area5_8, 1818 0000 1212 0404 0000 0806 1616 0000 0808 1212
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
3.1013610e+006 6.7104850e+006
|
||||
3.1011410e+006 6.7108850e+006
|
||||
3.1015790e+006 6.7101790e+006
|
||||
3.1015910e+006 6.7100650e+006
|
||||
3.1013610e+006 6.7104850e+006
|
||||
3.1011410e+006 6.7108850e+006
|
||||
3.1015790e+006 6.7101790e+006
|
||||
3.1015910e+006 6.7100650e+006
|
||||
3.1017660e+006 6.7104190e+006
|
||||
|
|
@ -1,10 +1,10 @@
|
|||
3.1013610e+006 6.7104850e+006
|
||||
3.1016310e+006 6.7101990e+006
|
||||
3.1015790e+006 6.7101790e+006
|
||||
3.1015910e+006 6.7100650e+006
|
||||
3.1017660e+006 6.7104190e+006
|
||||
3.1017500e+006 6.7106040e+006
|
||||
3.1019250e+006 6.7092640e+006
|
||||
3.1017430e+006 6.7094990e+006
|
||||
3.1015240e+006 6.7097430e+006
|
||||
0 0
|
||||
3.1013610e+006 6.7104850e+006
|
||||
3.1016310e+006 6.7101990e+006
|
||||
3.1015790e+006 6.7101790e+006
|
||||
3.1015910e+006 6.7100650e+006
|
||||
3.1017660e+006 6.7104190e+006
|
||||
3.1017500e+006 6.7106040e+006
|
||||
3.1019250e+006 6.7092640e+006
|
||||
3.1017430e+006 6.7094990e+006
|
||||
3.1015240e+006 6.7097430e+006
|
||||
0 0
|
||||
|
|
@ -1,10 +1,10 @@
|
|||
-9 102 56 80 100 90 118 90 88 104 1
|
||||
-9 102 54 82 102 92 116 90 86 104 1
|
||||
88 104 56 84 102 -9 120 90 88 100 2
|
||||
86 102 56 80 102 -9 116 90 86 100 2
|
||||
88 102 54 80 102 90 116 92 -9 100 3
|
||||
88 102 56 80 100 90 118 90 -9 104 3
|
||||
80 102 54 82 102 92 116 90 86 104 4
|
||||
88 104 56 84 102 92 120 90 88 100 4
|
||||
86 102 56 80 -9 90 116 90 86 100 5
|
||||
88 102 54 80 -9 90 116 92 86 100 5
|
||||
-9 102 56 80 100 90 118 90 88 104 1
|
||||
-9 102 54 82 102 92 116 90 86 104 1
|
||||
88 104 56 84 102 -9 120 90 88 100 2
|
||||
86 102 56 80 102 -9 116 90 86 100 2
|
||||
88 102 54 80 102 90 116 92 -9 100 3
|
||||
88 102 56 80 100 90 118 90 -9 104 3
|
||||
80 102 54 82 102 92 116 90 86 104 4
|
||||
88 104 56 84 102 92 120 90 88 100 4
|
||||
86 102 56 80 -9 90 116 90 86 100 5
|
||||
88 102 54 80 -9 90 116 92 86 100 5
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
88 102 56 80 100 90 118 -9 88 104 1
|
||||
80 102 54 82 102 92 116 90 86 104 2
|
||||
88 104 56 84 102 -9 120 90 88 100 3
|
||||
86 102 56 80 102 90 116 90 86 100 4
|
||||
88 102 -9 80 102 90 116 92 86 100 5
|
||||
88 102 56 80 100 90 118 -9 88 104 1
|
||||
80 102 54 82 102 92 116 90 86 104 2
|
||||
88 104 56 84 102 -9 120 90 88 100 3
|
||||
86 102 56 80 102 90 116 90 86 100 4
|
||||
88 102 -9 80 102 90 116 92 86 100 5
|
||||
|
|
@ -1,20 +1,20 @@
|
|||
88 102 56 80 100 90 118 90 88 104 1
|
||||
80 102 54 82 102 92 116 90 86 104 1
|
||||
88 102 56 80 100 90 118 90 88 104 1
|
||||
80 102 54 82 102 92 116 90 86 104 1
|
||||
88 -9 56 84 102 92 120 90 88 100 2
|
||||
86 -9 56 80 102 90 116 90 86 102 2
|
||||
88 -9 56 84 102 92 120 90 88 100 2
|
||||
86 -9 56 80 102 90 116 90 86 100 2
|
||||
88 102 54 82 102 90 116 92 86 102 3
|
||||
88 102 56 80 100 90 118 90 88 104 3
|
||||
88 102 54 80 102 90 116 92 86 102 3
|
||||
88 102 56 80 100 90 118 90 88 104 3
|
||||
80 102 54 82 102 92 116 -9 86 104 4
|
||||
88 104 56 84 102 92 120 -9 88 100 4
|
||||
80 102 54 82 102 92 116 -9 86 104 4
|
||||
88 104 56 84 102 92 120 -9 88 100 4
|
||||
86 102 56 80 102 90 116 90 86 100 5
|
||||
88 104 54 80 102 90 116 92 86 100 5
|
||||
86 102 56 80 104 90 116 90 86 100 5
|
||||
88 102 54 80 102 90 116 92 86 100 5
|
||||
88 102 56 80 100 90 118 90 88 104 1
|
||||
80 102 54 82 102 92 116 90 86 104 1
|
||||
88 102 56 80 100 90 118 90 88 104 1
|
||||
80 102 54 82 102 92 116 90 86 104 1
|
||||
88 -9 56 84 102 92 120 90 88 100 2
|
||||
86 -9 56 80 102 90 116 90 86 102 2
|
||||
88 -9 56 84 102 92 120 90 88 100 2
|
||||
86 -9 56 80 102 90 116 90 86 100 2
|
||||
88 102 54 82 102 90 116 92 86 102 3
|
||||
88 102 56 80 100 90 118 90 88 104 3
|
||||
88 102 54 80 102 90 116 92 86 102 3
|
||||
88 102 56 80 100 90 118 90 88 104 3
|
||||
80 102 54 82 102 92 116 -9 86 104 4
|
||||
88 104 56 84 102 92 120 -9 88 100 4
|
||||
80 102 54 82 102 92 116 -9 86 104 4
|
||||
88 104 56 84 102 92 120 -9 88 100 4
|
||||
86 102 56 80 102 90 116 90 86 100 5
|
||||
88 104 54 80 102 90 116 92 86 100 5
|
||||
86 102 56 80 104 90 116 90 86 100 5
|
||||
88 102 54 80 102 90 116 92 86 100 5
|
||||
|
|
@ -1,10 +1,10 @@
|
|||
-9 102 56 80 100 90 118 90 88 104 1
|
||||
-9 102 54 82 102 92 116 90 86 104 1
|
||||
88 104 56 84 102 -9 120 90 88 100 1
|
||||
86 102 56 80 102 -9 116 90 86 100 1
|
||||
88 102 54 80 102 90 116 92 -9 100 2
|
||||
88 102 56 80 100 90 118 90 -9 104 2
|
||||
80 102 54 82 102 92 116 90 86 104 2
|
||||
88 104 56 84 102 92 120 90 88 100 2
|
||||
86 102 56 80 -9 90 116 90 86 100 3
|
||||
88 102 54 80 -9 90 116 92 86 100 3
|
||||
-9 102 56 80 100 90 118 90 88 104 1
|
||||
-9 102 54 82 102 92 116 90 86 104 1
|
||||
88 104 56 84 102 -9 120 90 88 100 1
|
||||
86 102 56 80 102 -9 116 90 86 100 1
|
||||
88 102 54 80 102 90 116 92 -9 100 2
|
||||
88 102 56 80 100 90 118 90 -9 104 2
|
||||
80 102 54 82 102 92 116 90 86 104 2
|
||||
88 104 56 84 102 92 120 90 88 100 2
|
||||
86 102 56 80 -9 90 116 90 86 100 3
|
||||
88 102 54 80 -9 90 116 92 86 100 3
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
88 102 56 80 100 90 118 -9 88 104 1
|
||||
80 102 54 82 102 92 116 90 86 104 1
|
||||
88 104 56 84 102 -9 120 90 88 100 2
|
||||
86 102 56 80 102 90 116 90 86 100 2
|
||||
88 102 -9 80 102 90 116 92 86 100 3
|
||||
88 102 56 80 100 90 118 -9 88 104 1
|
||||
80 102 54 82 102 92 116 90 86 104 1
|
||||
88 104 56 84 102 -9 120 90 88 100 2
|
||||
86 102 56 80 102 90 116 90 86 100 2
|
||||
88 102 -9 80 102 90 116 92 86 100 3
|
||||
|
|
@ -1,20 +1,20 @@
|
|||
88 102 56 80 100 90 118 90 88 104 1
|
||||
80 102 54 82 102 92 116 90 86 104 1
|
||||
88 102 56 80 100 90 118 90 88 104 1
|
||||
80 102 54 82 102 92 116 90 86 104 1
|
||||
88 -9 56 84 102 92 120 90 88 100 1
|
||||
86 -9 56 80 102 90 116 90 86 102 1
|
||||
88 -9 56 84 102 92 120 90 88 100 1
|
||||
86 -9 56 80 102 90 116 90 86 100 1
|
||||
88 102 54 82 102 90 116 92 86 102 2
|
||||
88 102 56 80 100 90 118 90 88 104 2
|
||||
88 102 54 80 102 90 116 92 86 102 2
|
||||
88 102 56 80 100 90 118 90 88 104 2
|
||||
80 102 54 82 102 92 116 -9 86 104 2
|
||||
88 104 56 84 102 92 120 -9 88 100 2
|
||||
80 102 54 82 102 92 116 -9 86 104 2
|
||||
88 104 56 84 102 92 120 -9 88 100 2
|
||||
86 102 56 80 102 90 116 90 86 100 3
|
||||
88 104 54 80 102 90 116 92 86 100 3
|
||||
86 102 56 80 104 90 116 90 86 100 3
|
||||
88 102 54 80 102 90 116 92 86 100 3
|
||||
88 102 56 80 100 90 118 90 88 104 1
|
||||
80 102 54 82 102 92 116 90 86 104 1
|
||||
88 102 56 80 100 90 118 90 88 104 1
|
||||
80 102 54 82 102 92 116 90 86 104 1
|
||||
88 -9 56 84 102 92 120 90 88 100 1
|
||||
86 -9 56 80 102 90 116 90 86 102 1
|
||||
88 -9 56 84 102 92 120 90 88 100 1
|
||||
86 -9 56 80 102 90 116 90 86 100 1
|
||||
88 102 54 82 102 90 116 92 86 102 2
|
||||
88 102 56 80 100 90 118 90 88 104 2
|
||||
88 102 54 80 102 90 116 92 86 102 2
|
||||
88 102 56 80 100 90 118 90 88 104 2
|
||||
80 102 54 82 102 92 116 -9 86 104 2
|
||||
88 104 56 84 102 92 120 -9 88 100 2
|
||||
80 102 54 82 102 92 116 -9 86 104 2
|
||||
88 104 56 84 102 92 120 -9 88 100 2
|
||||
86 102 56 80 102 90 116 90 86 100 3
|
||||
88 104 54 80 102 90 116 92 86 100 3
|
||||
86 102 56 80 104 90 116 90 86 100 3
|
||||
88 102 54 80 102 90 116 92 86 100 3
|
||||
|
|
@ -1,20 +1,20 @@
|
|||
>1
|
||||
AACGAAACGATCGCGTCACCGGAACGTTGTCCGTCTCGAATAGCACTGTGGGAACGTGTTTTACATTCGT
|
||||
TAGTAACATGGTCAGCTGCTCATCCGTATT
|
||||
|
||||
>2
|
||||
ATCAGCAAACGAGAAGTTGCAGAGGTCTTTGGTTTGAGCATTGCCCCCATACAATCGACTTCTGGCCTGG
|
||||
AATGCACCACAAACATACCCCACAGGCTCG
|
||||
|
||||
>3
|
||||
GCTTTTACTAAGGCCTATCGGATTCAACGTCACTAAGACTCGGCACTAACAGGCCGTTGTAAGCCGCTCT
|
||||
GTCTGAGTATGGATGGTGGAGGCGGAGCCG
|
||||
|
||||
>4
|
||||
ACCTGGACCTCTGTATTAACGGCTGTGATTCTGAGGGGGGTATCGCAGCGCACTTTCTAGCTATATCACG
|
||||
CAAGGATAAAGTTCACCCATCACGTTGACC
|
||||
|
||||
>5
|
||||
ACAATACGTCATCCACACCGCGCCTATGGAAGAATTTGCCCTTTCGGCGACAGCCCATGCTGTCAAGGAG
|
||||
GTAACATAGCTACCAGGTCCCATTCCAGGA
|
||||
|
||||
>1
|
||||
AACGAAACGATCGCGTCACCGGAACGTTGTCCGTCTCGAATAGCACTGTGGGAACGTGTTTTACATTCGT
|
||||
TAGTAACATGGTCAGCTGCTCATCCGTATT
|
||||
|
||||
>2
|
||||
ATCAGCAAACGAGAAGTTGCAGAGGTCTTTGGTTTGAGCATTGCCCCCATACAATCGACTTCTGGCCTGG
|
||||
AATGCACCACAAACATACCCCACAGGCTCG
|
||||
|
||||
>3
|
||||
GCTTTTACTAAGGCCTATCGGATTCAACGTCACTAAGACTCGGCACTAACAGGCCGTTGTAAGCCGCTCT
|
||||
GTCTGAGTATGGATGGTGGAGGCGGAGCCG
|
||||
|
||||
>4
|
||||
ACCTGGACCTCTGTATTAACGGCTGTGATTCTGAGGGGGGTATCGCAGCGCACTTTCTAGCTATATCACG
|
||||
CAAGGATAAAGTTCACCCATCACGTTGACC
|
||||
|
||||
>5
|
||||
ACAATACGTCATCCACACCGCGCCTATGGAAGAATTTGCCCTTTCGGCGACAGCCCATGCTGTCAAGGAG
|
||||
GTAACATAGCTACCAGGTCCCATTCCAGGA
|
||||
|
||||
|
|
@ -1,8 +1,8 @@
|
|||
CTAACGCTTGAGCTTATTGCTGCAGCGCAGAAAGTAGGTAAAACGTGTGCATTCGTTGATGCGGAA 1
|
||||
GCACTTGACCCTATCTACGCTCAAAAGCTTGGTGTTGATATTGACGCTTTGCTTGTATCTCAACCT 1
|
||||
GATACGGGTGAACAAGCTCTAGAAATTTGTGATGCACTGGCTCGTTCAGGTGCTATCGATGTTCTT 2
|
||||
GTTATCTAC--GGTTGCTGCACTAACACCT-AGCTGAGATCGA-GGCGAAATGGGCGATAGCCACA 2
|
||||
CTAACGCTTGAGCTTATTGCTGCAGCGCAGAAAGTAGGTAAAACGTGTGCATTCGTTGATGCGGAA 3
|
||||
GCACTTGACCCTATCTACGCTCAAAAGCTTGGTGTTGATATTGACGCTTTGCTTGTATCTCAACCT 3
|
||||
GATACGGGTGAACAAGCTCTAGAAATTTGTGATGCACTGGCTCGTTCAGGTGCTATCGATGTTCTT 4
|
||||
GTTATCTAC--GGTTGCTGCACTAACACCT-AGCTGAGATCGA-GGCGAAATGGGCGATAGCCACA 4
|
||||
CTAACGCTTGAGCTTATTGCTGCAGCGCAGAAAGTAGGTAAAACGTGTGCATTCGTTGATGCGGAA 1
|
||||
GCACTTGACCCTATCTACGCTCAAAAGCTTGGTGTTGATATTGACGCTTTGCTTGTATCTCAACCT 1
|
||||
GATACGGGTGAACAAGCTCTAGAAATTTGTGATGCACTGGCTCGTTCAGGTGCTATCGATGTTCTT 2
|
||||
GTTATCTAC--GGTTGCTGCACTAACACCT-AGCTGAGATCGA-GGCGAAATGGGCGATAGCCACA 2
|
||||
CTAACGCTTGAGCTTATTGCTGCAGCGCAGAAAGTAGGTAAAACGTGTGCATTCGTTGATGCGGAA 3
|
||||
GCACTTGACCCTATCTACGCTCAAAAGCTTGGTGTTGATATTGACGCTTTGCTTGTATCTCAACCT 3
|
||||
GATACGGGTGAACAAGCTCTAGAAATTTGTGATGCACTGGCTCGTTCAGGTGCTATCGATGTTCTT 4
|
||||
GTTATCTAC--GGTTGCTGCACTAACACCT-AGCTGAGATCGA-GGCGAAATGGGCGATAGCCACA 4
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
CTAACGCTTGAGCTTATTGCTGCAGCGCAGAAAGTAGGTAAAACGTGTGCATTCGTTGATGCGGAA 1
|
||||
GATACGGGTGAACAAGCTCTAGAAATTTGTGATGCACTGGCTCGTTCAGGTGCTATCGATGTTCTT 2
|
||||
CTAACGCTTGAGCTTATTGCTGCAGCGCAGAAAGTAGGTAAAACGTGTGCATTCGTTGATGCGGAA 3
|
||||
GTTATCTAC--GGTTGCTGCACTAACACCT-AGCTGAGATCGA-GGCGAAATGGGCGATAGCCACA 4
|
||||
CTAACGCTTGAGCTTATTGCTGCAGCGCAGAAAGTAGGTAAAACGTGTGCATTCGTTGATGCGGAA 1
|
||||
GATACGGGTGAACAAGCTCTAGAAATTTGTGATGCACTGGCTCGTTCAGGTGCTATCGATGTTCTT 2
|
||||
CTAACGCTTGAGCTTATTGCTGCAGCGCAGAAAGTAGGTAAAACGTGTGCATTCGTTGATGCGGAA 3
|
||||
GTTATCTAC--GGTTGCTGCACTAACACCT-AGCTGAGATCGA-GGCGAAATGGGCGATAGCCACA 4
|
||||
|
|
@ -1,16 +1,16 @@
|
|||
CTAACGCTTGAGCTTATTGCTGCAGCGCAGAAAGTAGGTAAAACGTGTGCATTCGTTGATGCGGAA 1
|
||||
GCACTTGACCCTATCTACGCTCAAAAGCTTGGTGTTGATATTGACGCTTTGCTTGTATCTCAACCT 1
|
||||
GATACGGGTGAACAAGCTCTAGAAATTTGTGATGCACTGGCTCGTTCAGGTGCTATCGATGTTCTT 1
|
||||
GTTATTGACTCGGTTGCTGCACTAACACCT-AGCTGAGATCGA-GGCGAAATGGGCGATAGCCACA 1
|
||||
TATCTAC--GAGCTTATTGCTGCAGCGCAGAAAGTAGGTAAAACGTGTGCATTCGTTGATGCGGAA 2
|
||||
GCACTTGACCCTATCTACGCTCAAAAGCTTGGTGTTGATATTGACGCTTTGCTTGTATCTCAACCT 2
|
||||
GATACGGGTGAACAAGCTCTAGAAATTTGTGATGCACTGGCTCGTTCAGGTGCTATCGATGTTCTT 2
|
||||
GTTATCTAC--GGTTGCTGCACTAACACCT-AGCTGAGATCGA-GGCGAAATGGGCGATAGCCACA 2
|
||||
CTAACGCTTGAGCTTATTGCTGCAGCGCAGAAAGTAGGTAAAACGTGTGCATTCGTTGATGCGGAA 3
|
||||
GCACTTGACCCTATCTACGCTCAAAAGCTTGGTGTTGATATTGACGCTTTGCTTGTATCTCAACCT 3
|
||||
GATACGGGTGAACAAGCTCTAGAAATTTGTGATGCACTGGCTCGTTCAGGTGCTATCGATGTTCTT 3
|
||||
GTTATTGACTCGGTTGCTGCACTAACACCT-AGCTGAGATCGA-GGCGAAATGGGCGATAGCCACA 3
|
||||
TATCTAC--GAGCTTATTGCTGCAGCGCAGAAAGTAGGTAAAACGTGTGCATTCGTTGATGCGGAA 4
|
||||
GCACTTGACCCTATCTACGCTCAAAAGCTTGGTGTTGATATTGACGCTTTGCTTGTATCTCAACCT 4
|
||||
GATACGGGTGAACAAGCTCTAGAAATTTGTGATGCACTGGCTCGTTCAGGTGCTATCGATGTTCTT 4
|
||||
GTTATCTAC--GGTTGCTGCACTAACACCT-AGCTGAGATCGA-GGCGAAATGGGCGATAGCCACA 4
|
||||
CTAACGCTTGAGCTTATTGCTGCAGCGCAGAAAGTAGGTAAAACGTGTGCATTCGTTGATGCGGAA 1
|
||||
GCACTTGACCCTATCTACGCTCAAAAGCTTGGTGTTGATATTGACGCTTTGCTTGTATCTCAACCT 1
|
||||
GATACGGGTGAACAAGCTCTAGAAATTTGTGATGCACTGGCTCGTTCAGGTGCTATCGATGTTCTT 1
|
||||
GTTATTGACTCGGTTGCTGCACTAACACCT-AGCTGAGATCGA-GGCGAAATGGGCGATAGCCACA 1
|
||||
TATCTAC--GAGCTTATTGCTGCAGCGCAGAAAGTAGGTAAAACGTGTGCATTCGTTGATGCGGAA 2
|
||||
GCACTTGACCCTATCTACGCTCAAAAGCTTGGTGTTGATATTGACGCTTTGCTTGTATCTCAACCT 2
|
||||
GATACGGGTGAACAAGCTCTAGAAATTTGTGATGCACTGGCTCGTTCAGGTGCTATCGATGTTCTT 2
|
||||
GTTATCTAC--GGTTGCTGCACTAACACCT-AGCTGAGATCGA-GGCGAAATGGGCGATAGCCACA 2
|
||||
CTAACGCTTGAGCTTATTGCTGCAGCGCAGAAAGTAGGTAAAACGTGTGCATTCGTTGATGCGGAA 3
|
||||
GCACTTGACCCTATCTACGCTCAAAAGCTTGGTGTTGATATTGACGCTTTGCTTGTATCTCAACCT 3
|
||||
GATACGGGTGAACAAGCTCTAGAAATTTGTGATGCACTGGCTCGTTCAGGTGCTATCGATGTTCTT 3
|
||||
GTTATTGACTCGGTTGCTGCACTAACACCT-AGCTGAGATCGA-GGCGAAATGGGCGATAGCCACA 3
|
||||
TATCTAC--GAGCTTATTGCTGCAGCGCAGAAAGTAGGTAAAACGTGTGCATTCGTTGATGCGGAA 4
|
||||
GCACTTGACCCTATCTACGCTCAAAAGCTTGGTGTTGATATTGACGCTTTGCTTGTATCTCAACCT 4
|
||||
GATACGGGTGAACAAGCTCTAGAAATTTGTGATGCACTGGCTCGTTCAGGTGCTATCGATGTTCTT 4
|
||||
GTTATCTAC--GGTTGCTGCACTAACACCT-AGCTGAGATCGA-GGCGAAATGGGCGATAGCCACA 4
|
||||
|
|
@ -1,8 +1,8 @@
|
|||
>RecA-1
|
||||
CTAACGCTTGAGCTTATT-CTGCAGCCAAATTCGTATGAAAATTGGTGTAATGTTCGGTAACCTAACGCTTGAGCTTATT-CTGCAGCCAAATTCGTATGAAAATTGGTGTAATGTTCGGTAAC
|
||||
>RecA-2
|
||||
CTAACGCTTGAGCTTATTGCTGCAGCACAGA-AGTGGGCAAAACGTGTGCATTCGTCGATGCCTAACGCTTGAGCTTATT-CTGCAGCCAAATTCGTATGAAAATTGGTGTAATGTTCGGTAAC
|
||||
>RecA-3
|
||||
CTAACGC--GAGCTTATT-CTGCAGCCAAATTCGTATGAAAATTGGTGTAATGTTCGGTAACCTAACGCTTGAGCTTATT-CTGCAGCCAAATTCGTATGAAAATTGGTGTAATGTTCGGTAAC
|
||||
>RecA-4
|
||||
CTAACGCTTGAGCTTATTGCTGCAGCACAGA-AGTGGGCAAAACGTGTGCATTCGTCGATGCCTAACGCTTGAGCTTATT-CTGCAGCCAAATTCGTATGAAAATTGGTGTAATGTTCGGTAAC
|
||||
>RecA-1
|
||||
CTAACGCTTGAGCTTATT-CTGCAGCCAAATTCGTATGAAAATTGGTGTAATGTTCGGTAACCTAACGCTTGAGCTTATT-CTGCAGCCAAATTCGTATGAAAATTGGTGTAATGTTCGGTAAC
|
||||
>RecA-2
|
||||
CTAACGCTTGAGCTTATTGCTGCAGCACAGA-AGTGGGCAAAACGTGTGCATTCGTCGATGCCTAACGCTTGAGCTTATT-CTGCAGCCAAATTCGTATGAAAATTGGTGTAATGTTCGGTAAC
|
||||
>RecA-3
|
||||
CTAACGC--GAGCTTATT-CTGCAGCCAAATTCGTATGAAAATTGGTGTAATGTTCGGTAACCTAACGCTTGAGCTTATT-CTGCAGCCAAATTCGTATGAAAATTGGTGTAATGTTCGGTAAC
|
||||
>RecA-4
|
||||
CTAACGCTTGAGCTTATTGCTGCAGCACAGA-AGTGGGCAAAACGTGTGCATTCGTCGATGCCTAACGCTTGAGCTTATT-CTGCAGCCAAATTCGTATGAAAATTGGTGTAATGTTCGGTAAC
|
||||
|
|
@ -1,10 +1,10 @@
|
|||
1
|
||||
1
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
3
|
||||
3
|
||||
3
|
||||
1
|
||||
1
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
3
|
||||
3
|
||||
3
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
ST Isolate Species Adk GyrB Hsp60 Mdh Pgi RecA
|
||||
1 1A1 My. splendidone 1 1 1 1 1 1
|
||||
2 1B1 A. dent 2 2 2 2 2 2
|
||||
3 1B2 A. dent 3 3 3 3 3 3
|
||||
4 1A5 D. gently 4 4 4 4 4 4
|
||||
ST Isolate Species Adk GyrB Hsp60 Mdh Pgi RecA
|
||||
1 1A1 My. splendidone 1 1 1 1 1 1
|
||||
2 1B1 A. dent 2 2 2 2 2 2
|
||||
3 1B2 A. dent 3 3 3 3 3 3
|
||||
4 1A5 D. gently 4 4 4 4 4 4
|
||||
File diff suppressed because it is too large
Load diff
|
|
@ -1,3 +1,3 @@
|
|||
1
|
||||
3
|
||||
5
|
||||
1
|
||||
3
|
||||
5
|
||||
|
|
@ -1,3 +1,3 @@
|
|||
Example population 1
|
||||
Example population 2
|
||||
Example population 1
|
||||
Example population 2
|
||||
Example population 3
|
||||
|
|
@ -1,25 +1,25 @@
|
|||
Sample data for trained clustering
|
||||
1
|
||||
2
|
||||
3
|
||||
4
|
||||
5
|
||||
6
|
||||
7
|
||||
8
|
||||
9
|
||||
10
|
||||
POP
|
||||
AreaUnknown_11, 1717 1010 1717 0404 0505 0606 0808 1010 0707 1212
|
||||
AreaUnknown_12, 1515 1212 1414 0404 0505 0606 1919 1010 0707 1414
|
||||
AreaUnknown_13, 1717 0303 1515 0404 0505 0606 1414 1010 0707 1212
|
||||
POP
|
||||
AreaUnknown_24, 1311 1205 1212 0302 0505 0606 1313 1005 0707 1107
|
||||
AreaUnknown_25, 1306 0707 1212 0505 0603 0808 1109 1003 0000 0806
|
||||
POP
|
||||
AreaUnknown_36, 0909 0606 1212 0404 0606 0808 1717 1001 0909 1308
|
||||
POP
|
||||
AreaUnknown_47, 1507 1208 1212 0404 0202 0101 1915 1510 0202 0705
|
||||
AreaUnknown_48, 1507 1313 1212 0202 0202 0101 1414 1510 0202 0704
|
||||
AreaUnknown_49, 0907 0907 1212 0404 0202 0101 1908 0000 0202 1507
|
||||
AreaUnknown_410, 1109 1918 1512 0505 0505 0000 1605 0706 0707 0505
|
||||
Sample data for trained clustering
|
||||
1
|
||||
2
|
||||
3
|
||||
4
|
||||
5
|
||||
6
|
||||
7
|
||||
8
|
||||
9
|
||||
10
|
||||
POP
|
||||
AreaUnknown_11, 1717 1010 1717 0404 0505 0606 0808 1010 0707 1212
|
||||
AreaUnknown_12, 1515 1212 1414 0404 0505 0606 1919 1010 0707 1414
|
||||
AreaUnknown_13, 1717 0303 1515 0404 0505 0606 1414 1010 0707 1212
|
||||
POP
|
||||
AreaUnknown_24, 1311 1205 1212 0302 0505 0606 1313 1005 0707 1107
|
||||
AreaUnknown_25, 1306 0707 1212 0505 0603 0808 1109 1003 0000 0806
|
||||
POP
|
||||
AreaUnknown_36, 0909 0606 1212 0404 0606 0808 1717 1001 0909 1308
|
||||
POP
|
||||
AreaUnknown_47, 1507 1208 1212 0404 0202 0101 1915 1510 0202 0705
|
||||
AreaUnknown_48, 1507 1313 1212 0202 0202 0101 1414 1510 0202 0704
|
||||
AreaUnknown_49, 0907 0907 1212 0404 0202 0101 1908 0000 0202 1507
|
||||
AreaUnknown_410, 1109 1918 1512 0505 0505 0000 1605 0706 0707 0505
|
||||
|
|
@ -1,31 +1,31 @@
|
|||
Sample data for trained clustering
|
||||
1
|
||||
2
|
||||
3
|
||||
4
|
||||
5
|
||||
6
|
||||
7
|
||||
8
|
||||
9
|
||||
10
|
||||
POP
|
||||
AreaUnknown1, 1717 1010 1717 0404 0505 0606 0808 1010 0707 1212
|
||||
POP
|
||||
AreaUnknown2, 1515 1212 1414 0404 0505 0606 1919 1010 0707 1414
|
||||
POP
|
||||
AreaUnknown3, 1717 0303 1515 0404 0505 0606 1414 1010 0707 1212
|
||||
POP
|
||||
AreaUnknown4, 1311 1205 1212 0302 0505 0606 1313 1005 0707 1107
|
||||
POP
|
||||
AreaUnknown5, 1306 0707 1212 0505 0603 0808 1109 1003 0000 0806
|
||||
POP
|
||||
AreaUnknown6, 0909 0606 1212 0404 0606 0808 1717 1001 0909 1308
|
||||
POP
|
||||
AreaUnknown7, 1507 1208 1212 0404 0202 0101 1915 1510 0202 0705
|
||||
POP
|
||||
AreaUnknown8, 1507 1313 1212 0202 0202 0101 1414 1510 0202 0704
|
||||
POP
|
||||
AreaUnknown9, 0907 0907 1212 0404 0202 0101 1908 0000 0202 1507
|
||||
POP
|
||||
AreaUnknown10, 1109 1918 1512 0505 0505 0000 1605 0706 0707 0505
|
||||
Sample data for trained clustering
|
||||
1
|
||||
2
|
||||
3
|
||||
4
|
||||
5
|
||||
6
|
||||
7
|
||||
8
|
||||
9
|
||||
10
|
||||
POP
|
||||
AreaUnknown1, 1717 1010 1717 0404 0505 0606 0808 1010 0707 1212
|
||||
POP
|
||||
AreaUnknown2, 1515 1212 1414 0404 0505 0606 1919 1010 0707 1414
|
||||
POP
|
||||
AreaUnknown3, 1717 0303 1515 0404 0505 0606 1414 1010 0707 1212
|
||||
POP
|
||||
AreaUnknown4, 1311 1205 1212 0302 0505 0606 1313 1005 0707 1107
|
||||
POP
|
||||
AreaUnknown5, 1306 0707 1212 0505 0603 0808 1109 1003 0000 0806
|
||||
POP
|
||||
AreaUnknown6, 0909 0606 1212 0404 0606 0808 1717 1001 0909 1308
|
||||
POP
|
||||
AreaUnknown7, 1507 1208 1212 0404 0202 0101 1915 1510 0202 0705
|
||||
POP
|
||||
AreaUnknown8, 1507 1313 1212 0202 0202 0101 1414 1510 0202 0704
|
||||
POP
|
||||
AreaUnknown9, 0907 0907 1212 0404 0202 0101 1908 0000 0202 1507
|
||||
POP
|
||||
AreaUnknown10, 1109 1918 1512 0505 0505 0000 1605 0706 0707 0505
|
||||
|
|
@ -1,108 +1,108 @@
|
|||
Baseline data for trained clustering
|
||||
1
|
||||
2
|
||||
3
|
||||
4
|
||||
5
|
||||
6
|
||||
7
|
||||
8
|
||||
9
|
||||
10
|
||||
POP
|
||||
Area1_1, 1717 1111 0000 0404 0505 0606 0707 1212 0707 1212
|
||||
Area1_2, 0909 1010 0505 0404 0505 1010 0707 1111 0707 1212
|
||||
Area1_3, 0000 1010 1414 0404 0505 0606 1111 1212 0707 1212
|
||||
Area1_4, 1111 1010 1515 0404 0101 0606 1313 1111 0707 0808
|
||||
Area1_5, 0101 1010 1212 0404 0505 0606 0909 1212 0707 1212
|
||||
Area1_6, 1111 1010 1414 0404 0505 0606 1111 1212 0707 1010
|
||||
Area1_7, 1515 1010 0707 0404 0505 0606 0707 1212 0707 1212
|
||||
Area1_8, 1515 1212 0606 0404 0505 0606 1616 1212 0707 1010
|
||||
Area1_9, 0909 1010 1010 0404 0505 0606 1919 1212 0707 1010
|
||||
Area1_10, 0000 0000 0000 0404 0505 0606 1111 1010 0707 0000
|
||||
Area1_11, 0000 0000 0000 0404 0505 0505 1919 1212 0404 1111
|
||||
Area1_12, 1515 1010 0606 0404 0505 0606 1616 1212 0202 1212
|
||||
Area1_13, 1515 1010 1515 0404 0505 0606 0707 0606 0707 1212
|
||||
Area1_14, 0505 1010 1919 0404 0202 0606 1111 1010 0202 1212
|
||||
Area1_15, 1111 1010 1414 0404 0505 0606 1616 0606 0707 1212
|
||||
Area1_16, 1515 1212 0000 0404 0505 0606 1919 1212 0707 1212
|
||||
Area1_17, 1515 1010 1515 0404 0505 0606 1919 1212 0707 1212
|
||||
Area1_18, 1515 1010 1212 0404 0505 0606 0909 1212 0707 1010
|
||||
Area1_19, 1515 1212 0606 0404 0505 0606 1515 1212 0707 1212
|
||||
Area1_20, 1717 1414 1717 0404 0505 0606 0808 1212 0707 0808
|
||||
Area1_21, 1515 1212 1212 0404 0505 0606 0808 1818 0707 1212
|
||||
Area1_22, 1313 1111 1212 0404 0505 0606 1313 1212 0707 1212
|
||||
Area1_23, 1515 1212 1212 0404 0505 0606 1313 1010 0707 1212
|
||||
Area1_24, 0000 0000 0000 0404 0505 0606 1919 1212 0404 0000
|
||||
Area1_25, 0000 0000 0000 0404 0202 0101 1919 1919 0707 1212
|
||||
Area1_26, 1313 1010 0202 0404 0505 0606 1717 1212 0707 1212
|
||||
POP
|
||||
Area2_1, 1706 0505 1212 0404 0606 0808 1309 1111 0909 1413
|
||||
Area2_2, 1511 0707 1209 0404 0602 0808 1111 1004 0902 1310
|
||||
Area2_3, 1711 0000 1412 0402 0000 0000 1913 1002 0000 1313
|
||||
Area2_4, 1715 1515 1412 0404 0505 0604 1313 1010 1007 1309
|
||||
Area2_5, 1515 1515 1412 0404 0505 0606 1515 1111 0707 0908
|
||||
Area2_6, 1309 0000 1212 0404 0603 0802 1311 1204 0903 1309
|
||||
Area2_7, 1313 0505 1212 0404 0606 0808 1616 1414 0909 1206
|
||||
Area2_8, 1307 0707 1212 0404 0606 0808 1313 1004 0909 1313
|
||||
Area2_9, 0000 1212 1209 0404 0505 0606 0000 1106 0707 1208
|
||||
Area2_10, 1307 0000 1212 0404 0606 0806 1313 1005 0909 1111
|
||||
Area2_11, 0000 0707 1409 0505 0606 0808 1616 1110 0909 1309
|
||||
Area2_12, 1807 0000 1212 0505 0606 0000 0909 0401 0909 1309
|
||||
Area2_13, 1511 1212 1212 0404 0505 0604 1313 1110 0707 1210
|
||||
Area2_14, 1111 1515 1412 0505 0606 0808 1414 1004 0909 1313
|
||||
Area2_15, 1817 0707 1212 0505 0707 0909 1111 1004 0909 1313
|
||||
Area2_16, 1913 1511 1212 0404 0606 0000 0909 1212 0909 1313
|
||||
Area2_17, 1515 0000 0000 0000 0505 1006 1313 1005 0707 1212
|
||||
Area2_18, 0707 0606 1408 0404 0202 0101 1313 1615 0202 1307
|
||||
Area2_19, 0707 1309 0909 0502 0202 0101 2009 1510 0202 0704
|
||||
POP
|
||||
Area3_1, 1507 0706 1212 0202 0202 0000 0905 1409 0202 0707
|
||||
Area3_2, 1507 1313 1212 0202 0202 0101 1613 1510 0202 0807
|
||||
Area3_3, 1313 1414 1212 0404 0202 0101 1909 1510 0202 0704
|
||||
Area3_4, 1515 0909 1212 0502 0202 0101 1409 1210 0202 0807
|
||||
Area3_5, 1515 0808 1212 0502 0202 0101 1111 1510 0202 1007
|
||||
Area3_6, 1306 0909 1212 0202 0202 0101 0807 1512 0202 0707
|
||||
Area3_7, 0000 1009 1212 0404 0202 0101 1109 0702 0202 0808
|
||||
Area3_8, 1507 0606 1212 0404 0202 0101 1908 1409 0202 0707
|
||||
Area3_9, 1515 0606 1212 0202 0202 0101 0909 1510 0202 1207
|
||||
Area3_10, 1307 1010 1412 0202 0202 0101 1709 1615 0202 1207
|
||||
Area3_11, 1307 1005 1212 0404 0202 0101 1709 1510 0202 0703
|
||||
Area3_12, 1109 0902 1212 0404 0202 0101 0909 1002 0202 1207
|
||||
Area3_13, 1307 0606 1412 0404 0202 0101 0807 1515 0202 1207
|
||||
Area3_14, 1717 1407 1212 0404 0202 0101 1107 1409 0202 0805
|
||||
Area3_15, 1307 1007 1412 0404 0505 0101 0909 0000 0202 0807
|
||||
Area3_16, 1811 0000 1212 0404 0505 0000 1515 0707 0000 1212
|
||||
Area3_17, 1907 1414 1512 0402 0705 0000 0000 0000 0909 1212
|
||||
POP
|
||||
Area4_1, 1311 2019 1212 0404 0505 0000 1919 0707 0707 1109
|
||||
Area4_2, 1309 2018 1512 0404 0505 0000 1919 0808 0707 1111
|
||||
Area4_3, 1509 2118 1212 0404 0505 0000 1515 0707 0707 1107
|
||||
Area4_4, 1715 2221 1512 0404 0505 0000 1919 0707 0707 1111
|
||||
Area4_5, 1515 2121 1512 0404 0505 0000 1515 0707 0606 0707
|
||||
Area4_6, 1717 2222 1512 0404 0505 0000 1913 0707 0707 0907
|
||||
Area4_7, 1715 2221 1512 0404 0505 0000 1313 0707 0606 1111
|
||||
Area4_8, 1813 2320 1512 0404 0505 0000 1515 0707 0707 1107
|
||||
Area4_9, 1311 2019 1512 0404 0505 0000 1515 0707 0707 1111
|
||||
Area4_10, 1311 2019 1512 0000 0505 0000 1313 0707 0707 0909
|
||||
Area4_11, 1111 2625 0000 0000 0505 0606 0707 0903 0505 0505
|
||||
Area4_12, 0907 2724 0000 0404 0505 0605 1914 1105 0505 0707
|
||||
Area4_13, 1511 1610 1212 0404 0505 0605 1513 1105 1107 0704
|
||||
Area4_14, 1513 0404 1111 0404 0505 0606 1515 1111 1207 1107
|
||||
Area4_15, 1311 1616 1212 0404 0505 0606 1313 1111 0707 1107
|
||||
Area4_16, 1109 0606 1212 0404 0000 0000 1515 0902 0807 0505
|
||||
Area4_17, 1107 1004 1506 0404 0505 0606 1515 1212 0707 1108
|
||||
Area4_18, 1107 0904 1512 0404 0505 0606 1913 1105 1107 1107
|
||||
Area4_19, 1109 1313 1212 0404 0505 0606 1915 1111 1007 1107
|
||||
Area4_20, 1711 1604 1212 0404 0505 0606 1915 1212 1107 1007
|
||||
Area4_21, 1111 0606 1515 0404 0505 0606 1707 1009 0807 0502
|
||||
Area4_22, 1311 0603 1512 0404 0505 0606 1714 0707 0807 0501
|
||||
POP
|
||||
Area5_1, 0711 1212 1513 0202 0707 0808 1408 0000 0909 1210
|
||||
Area5_2, 1118 0101 1212 0202 0803 0901 0808 0000 1101 1211
|
||||
Area5_3, 1518 0000 1512 0404 0707 0806 0909 0000 0909 1212
|
||||
Area5_4, 1309 0000 1512 0202 0606 0707 1508 0000 0808 1204
|
||||
Area5_5, 0718 0000 1512 0402 0707 0806 0707 0000 0909 1208
|
||||
Area5_6, 1818 1414 1212 0404 0707 0000 1916 0000 0909 1208
|
||||
Area5_7, 1318 1313 1212 0404 0606 0000 1908 0000 0808 1008
|
||||
Area5_8, 1818 0000 1212 0404 0000 0806 1616 0000 0808 1212
|
||||
Baseline data for trained clustering
|
||||
1
|
||||
2
|
||||
3
|
||||
4
|
||||
5
|
||||
6
|
||||
7
|
||||
8
|
||||
9
|
||||
10
|
||||
POP
|
||||
Area1_1, 1717 1111 0000 0404 0505 0606 0707 1212 0707 1212
|
||||
Area1_2, 0909 1010 0505 0404 0505 1010 0707 1111 0707 1212
|
||||
Area1_3, 0000 1010 1414 0404 0505 0606 1111 1212 0707 1212
|
||||
Area1_4, 1111 1010 1515 0404 0101 0606 1313 1111 0707 0808
|
||||
Area1_5, 0101 1010 1212 0404 0505 0606 0909 1212 0707 1212
|
||||
Area1_6, 1111 1010 1414 0404 0505 0606 1111 1212 0707 1010
|
||||
Area1_7, 1515 1010 0707 0404 0505 0606 0707 1212 0707 1212
|
||||
Area1_8, 1515 1212 0606 0404 0505 0606 1616 1212 0707 1010
|
||||
Area1_9, 0909 1010 1010 0404 0505 0606 1919 1212 0707 1010
|
||||
Area1_10, 0000 0000 0000 0404 0505 0606 1111 1010 0707 0000
|
||||
Area1_11, 0000 0000 0000 0404 0505 0505 1919 1212 0404 1111
|
||||
Area1_12, 1515 1010 0606 0404 0505 0606 1616 1212 0202 1212
|
||||
Area1_13, 1515 1010 1515 0404 0505 0606 0707 0606 0707 1212
|
||||
Area1_14, 0505 1010 1919 0404 0202 0606 1111 1010 0202 1212
|
||||
Area1_15, 1111 1010 1414 0404 0505 0606 1616 0606 0707 1212
|
||||
Area1_16, 1515 1212 0000 0404 0505 0606 1919 1212 0707 1212
|
||||
Area1_17, 1515 1010 1515 0404 0505 0606 1919 1212 0707 1212
|
||||
Area1_18, 1515 1010 1212 0404 0505 0606 0909 1212 0707 1010
|
||||
Area1_19, 1515 1212 0606 0404 0505 0606 1515 1212 0707 1212
|
||||
Area1_20, 1717 1414 1717 0404 0505 0606 0808 1212 0707 0808
|
||||
Area1_21, 1515 1212 1212 0404 0505 0606 0808 1818 0707 1212
|
||||
Area1_22, 1313 1111 1212 0404 0505 0606 1313 1212 0707 1212
|
||||
Area1_23, 1515 1212 1212 0404 0505 0606 1313 1010 0707 1212
|
||||
Area1_24, 0000 0000 0000 0404 0505 0606 1919 1212 0404 0000
|
||||
Area1_25, 0000 0000 0000 0404 0202 0101 1919 1919 0707 1212
|
||||
Area1_26, 1313 1010 0202 0404 0505 0606 1717 1212 0707 1212
|
||||
POP
|
||||
Area2_1, 1706 0505 1212 0404 0606 0808 1309 1111 0909 1413
|
||||
Area2_2, 1511 0707 1209 0404 0602 0808 1111 1004 0902 1310
|
||||
Area2_3, 1711 0000 1412 0402 0000 0000 1913 1002 0000 1313
|
||||
Area2_4, 1715 1515 1412 0404 0505 0604 1313 1010 1007 1309
|
||||
Area2_5, 1515 1515 1412 0404 0505 0606 1515 1111 0707 0908
|
||||
Area2_6, 1309 0000 1212 0404 0603 0802 1311 1204 0903 1309
|
||||
Area2_7, 1313 0505 1212 0404 0606 0808 1616 1414 0909 1206
|
||||
Area2_8, 1307 0707 1212 0404 0606 0808 1313 1004 0909 1313
|
||||
Area2_9, 0000 1212 1209 0404 0505 0606 0000 1106 0707 1208
|
||||
Area2_10, 1307 0000 1212 0404 0606 0806 1313 1005 0909 1111
|
||||
Area2_11, 0000 0707 1409 0505 0606 0808 1616 1110 0909 1309
|
||||
Area2_12, 1807 0000 1212 0505 0606 0000 0909 0401 0909 1309
|
||||
Area2_13, 1511 1212 1212 0404 0505 0604 1313 1110 0707 1210
|
||||
Area2_14, 1111 1515 1412 0505 0606 0808 1414 1004 0909 1313
|
||||
Area2_15, 1817 0707 1212 0505 0707 0909 1111 1004 0909 1313
|
||||
Area2_16, 1913 1511 1212 0404 0606 0000 0909 1212 0909 1313
|
||||
Area2_17, 1515 0000 0000 0000 0505 1006 1313 1005 0707 1212
|
||||
Area2_18, 0707 0606 1408 0404 0202 0101 1313 1615 0202 1307
|
||||
Area2_19, 0707 1309 0909 0502 0202 0101 2009 1510 0202 0704
|
||||
POP
|
||||
Area3_1, 1507 0706 1212 0202 0202 0000 0905 1409 0202 0707
|
||||
Area3_2, 1507 1313 1212 0202 0202 0101 1613 1510 0202 0807
|
||||
Area3_3, 1313 1414 1212 0404 0202 0101 1909 1510 0202 0704
|
||||
Area3_4, 1515 0909 1212 0502 0202 0101 1409 1210 0202 0807
|
||||
Area3_5, 1515 0808 1212 0502 0202 0101 1111 1510 0202 1007
|
||||
Area3_6, 1306 0909 1212 0202 0202 0101 0807 1512 0202 0707
|
||||
Area3_7, 0000 1009 1212 0404 0202 0101 1109 0702 0202 0808
|
||||
Area3_8, 1507 0606 1212 0404 0202 0101 1908 1409 0202 0707
|
||||
Area3_9, 1515 0606 1212 0202 0202 0101 0909 1510 0202 1207
|
||||
Area3_10, 1307 1010 1412 0202 0202 0101 1709 1615 0202 1207
|
||||
Area3_11, 1307 1005 1212 0404 0202 0101 1709 1510 0202 0703
|
||||
Area3_12, 1109 0902 1212 0404 0202 0101 0909 1002 0202 1207
|
||||
Area3_13, 1307 0606 1412 0404 0202 0101 0807 1515 0202 1207
|
||||
Area3_14, 1717 1407 1212 0404 0202 0101 1107 1409 0202 0805
|
||||
Area3_15, 1307 1007 1412 0404 0505 0101 0909 0000 0202 0807
|
||||
Area3_16, 1811 0000 1212 0404 0505 0000 1515 0707 0000 1212
|
||||
Area3_17, 1907 1414 1512 0402 0705 0000 0000 0000 0909 1212
|
||||
POP
|
||||
Area4_1, 1311 2019 1212 0404 0505 0000 1919 0707 0707 1109
|
||||
Area4_2, 1309 2018 1512 0404 0505 0000 1919 0808 0707 1111
|
||||
Area4_3, 1509 2118 1212 0404 0505 0000 1515 0707 0707 1107
|
||||
Area4_4, 1715 2221 1512 0404 0505 0000 1919 0707 0707 1111
|
||||
Area4_5, 1515 2121 1512 0404 0505 0000 1515 0707 0606 0707
|
||||
Area4_6, 1717 2222 1512 0404 0505 0000 1913 0707 0707 0907
|
||||
Area4_7, 1715 2221 1512 0404 0505 0000 1313 0707 0606 1111
|
||||
Area4_8, 1813 2320 1512 0404 0505 0000 1515 0707 0707 1107
|
||||
Area4_9, 1311 2019 1512 0404 0505 0000 1515 0707 0707 1111
|
||||
Area4_10, 1311 2019 1512 0000 0505 0000 1313 0707 0707 0909
|
||||
Area4_11, 1111 2625 0000 0000 0505 0606 0707 0903 0505 0505
|
||||
Area4_12, 0907 2724 0000 0404 0505 0605 1914 1105 0505 0707
|
||||
Area4_13, 1511 1610 1212 0404 0505 0605 1513 1105 1107 0704
|
||||
Area4_14, 1513 0404 1111 0404 0505 0606 1515 1111 1207 1107
|
||||
Area4_15, 1311 1616 1212 0404 0505 0606 1313 1111 0707 1107
|
||||
Area4_16, 1109 0606 1212 0404 0000 0000 1515 0902 0807 0505
|
||||
Area4_17, 1107 1004 1506 0404 0505 0606 1515 1212 0707 1108
|
||||
Area4_18, 1107 0904 1512 0404 0505 0606 1913 1105 1107 1107
|
||||
Area4_19, 1109 1313 1212 0404 0505 0606 1915 1111 1007 1107
|
||||
Area4_20, 1711 1604 1212 0404 0505 0606 1915 1212 1107 1007
|
||||
Area4_21, 1111 0606 1515 0404 0505 0606 1707 1009 0807 0502
|
||||
Area4_22, 1311 0603 1512 0404 0505 0606 1714 0707 0807 0501
|
||||
POP
|
||||
Area5_1, 0711 1212 1513 0202 0707 0808 1408 0000 0909 1210
|
||||
Area5_2, 1118 0101 1212 0202 0803 0901 0808 0000 1101 1211
|
||||
Area5_3, 1518 0000 1512 0404 0707 0806 0909 0000 0909 1212
|
||||
Area5_4, 1309 0000 1512 0202 0606 0707 1508 0000 0808 1204
|
||||
Area5_5, 0718 0000 1512 0402 0707 0806 0707 0000 0909 1208
|
||||
Area5_6, 1818 1414 1212 0404 0707 0000 1916 0000 0909 1208
|
||||
Area5_7, 1318 1313 1212 0404 0606 0000 1908 0000 0808 1008
|
||||
Area5_8, 1818 0000 1212 0404 0000 0806 1616 0000 0808 1212
|
||||
|
|
@ -4,18 +4,56 @@
|
|||
\alias{greedyMix}
|
||||
\title{Clustering of individuals}
|
||||
\usage{
|
||||
greedyMix(data, format, verbose = TRUE)
|
||||
greedyMix(
|
||||
data,
|
||||
format,
|
||||
partitionCompare = NULL,
|
||||
ninds = 1L,
|
||||
npops = 1L,
|
||||
counts = NULL,
|
||||
sumcounts = NULL,
|
||||
max_iter = 100L,
|
||||
alleleCodes = NULL,
|
||||
inp = NULL,
|
||||
popnames = NULL,
|
||||
fixedK = FALSE,
|
||||
verbose = FALSE
|
||||
)
|
||||
}
|
||||
\arguments{
|
||||
\item{data}{data file}
|
||||
|
||||
\item{format}{Data format. Format supported: "FASTA", "VCF" ,"BAM", "GenePop"}
|
||||
|
||||
\item{partitionCompare}{a list of partitions to compare}
|
||||
|
||||
\item{ninds}{number of individuals}
|
||||
|
||||
\item{npops}{number of populations}
|
||||
|
||||
\item{counts}{counts}
|
||||
|
||||
\item{sumcounts}{sumcounts}
|
||||
|
||||
\item{max_iter}{maximum number of iterations}
|
||||
|
||||
\item{alleleCodes}{allele codes}
|
||||
|
||||
\item{inp}{input file}
|
||||
|
||||
\item{popnames}{population names}
|
||||
|
||||
\item{fixedK}{if \code{TRUE}, the number of populations is fixed}
|
||||
|
||||
\item{verbose}{if \code{TRUE}, prints extra output information}
|
||||
}
|
||||
\description{
|
||||
Clustering of individuals
|
||||
}
|
||||
\examples{
|
||||
data <- system.file("extdata", "FASTA_clustering_haploid.fasta", package = "rBAPS")
|
||||
greedyMix(data, "fasta")
|
||||
}
|
||||
\references{
|
||||
Samtools: a suite of programs for interacting
|
||||
with high-throughput sequencing data. <http://www.htslib.org/>
|
||||
|
|
|
|||
|
|
@ -4,10 +4,12 @@
|
|||
\alias{handleData}
|
||||
\title{Handle Data}
|
||||
\usage{
|
||||
handleData(raw_data)
|
||||
handleData(raw_data, format = "Genepop")
|
||||
}
|
||||
\arguments{
|
||||
\item{raw_data}{Raw data}
|
||||
\item{raw_data}{Raw data in Genepop or BAPS format}
|
||||
|
||||
\item{format}{data format}
|
||||
}
|
||||
\description{
|
||||
Handle Data
|
||||
|
|
@ -20,5 +22,5 @@ After this function. Add blank lines for individuals with fewer rows as
|
|||
maximum. If the code of an allele is = 0, the function changes that allele
|
||||
code to the smallest code that is larger than any code in use. After this,
|
||||
the function changes the allele codes so that one locus j
|
||||
codes get values between? 1, ..., Noah (j).
|
||||
codes get values between? 1, ..., noalle(j).
|
||||
}
|
||||
|
|
|
|||
26
man/importFile.Rd
Normal file
26
man/importFile.Rd
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/importFile.R
|
||||
\name{importFile}
|
||||
\alias{importFile}
|
||||
\title{Import data file}
|
||||
\usage{
|
||||
importFile(data, format, verbose)
|
||||
}
|
||||
\arguments{
|
||||
\item{data}{raw dataset}
|
||||
|
||||
\item{format}{data format (guesses from extension if not provided)}
|
||||
|
||||
\item{verbose}{if \code{TRUE}, prints extra output information}
|
||||
}
|
||||
\value{
|
||||
The data in a format that can be used by the other functions
|
||||
}
|
||||
\description{
|
||||
Imports data from several formats (FASTA, VCF, SAM, BAM,
|
||||
Genepop).
|
||||
}
|
||||
\examples{
|
||||
path_inst <- system.file("extdata", "", package = "rBAPS")
|
||||
importFile(file.path(path_inst, "FASTA_clustering_haploid.fasta"))
|
||||
}
|
||||
|
|
@ -4,12 +4,15 @@
|
|||
\alias{load_fasta}
|
||||
\title{load_fasta}
|
||||
\usage{
|
||||
load_fasta(msa, keep.singletons = FALSE)
|
||||
load_fasta(msa, keep_singletons = FALSE, output_numbers = TRUE)
|
||||
}
|
||||
\arguments{
|
||||
\item{msa}{Either the location of a fasta file or ape DNAbin object containing the multiple sequence alignment data to be clustered}
|
||||
|
||||
\item{keep.singletons}{A logical indicating whether to consider singleton mutations in calculating the clusters}
|
||||
\item{keep_singletons}{A logical indicating whether to consider singleton mutations in calculating the clusters}
|
||||
|
||||
\item{output_numbers}{A logical indicating whether to output the data as
|
||||
numbers (TRUE) or letters (FALSE)}
|
||||
}
|
||||
\value{
|
||||
A character matrix with filtered SNP data
|
||||
|
|
@ -19,8 +22,8 @@ Loads a fasta file into matrix format ready for
|
|||
running the hierBAPS algorithm.
|
||||
}
|
||||
\examples{
|
||||
msa <- system.file("ext", "seqs.fa", package = "rBAPS")
|
||||
snp.matrix <- load_fasta(msa)
|
||||
msa <- system.file("extdata", "seqs.fa", package = "rBAPS")
|
||||
snp.matrix <- rBAPS:::load_fasta(msa)
|
||||
}
|
||||
\seealso{
|
||||
rhierbaps::load_fasta
|
||||
|
|
|
|||
|
|
@ -18,7 +18,7 @@ vector of length `nc` with r.v. realizations from Gamma(rate=1)
|
|||
Generates random numbers
|
||||
}
|
||||
\examples{
|
||||
randdir(matrix(c(10, 30, 60), 3), 3)
|
||||
rBAPS:::randdir(matrix(c(10, 30, 60), 3), 3)
|
||||
}
|
||||
\seealso{
|
||||
randga
|
||||
|
|
|
|||
|
|
@ -87,7 +87,7 @@ for run = 1:nruns
|
|||
apu = rows(i);
|
||||
PARTITION(i) = initialPartition(apu(1));
|
||||
end
|
||||
|
||||
|
||||
COUNTS = counts; SUMCOUNTS = sumcounts;
|
||||
POP_LOGML = computePopulationLogml(1:npops, adjprior, priorTerm);
|
||||
LOGDIFF = repmat(-Inf,ninds,npops);
|
||||
|
|
@ -98,7 +98,7 @@ for run = 1:nruns
|
|||
kokeiltu = zeros(nRoundTypes, 1);
|
||||
roundTypes = [1 1]; %Ykkösvaiheen sykli kahteen kertaan.
|
||||
ready = 0; vaihe = 1;
|
||||
|
||||
|
||||
if dispText
|
||||
disp(' ');
|
||||
disp(['Mixture analysis started with initial ' num2str(npops) ' populations.']);
|
||||
|
|
@ -106,11 +106,11 @@ for run = 1:nruns
|
|||
|
||||
while ready ~= 1
|
||||
muutoksia = 0;
|
||||
|
||||
|
||||
if dispText
|
||||
disp(['Performing steps: ' num2str(roundTypes)]);
|
||||
end
|
||||
|
||||
|
||||
for n = 1:length(roundTypes)
|
||||
|
||||
round = roundTypes(n);
|
||||
|
|
@ -465,7 +465,7 @@ for run = 1:nruns
|
|||
|
||||
npops = poistaTyhjatPopulaatiot(npops);
|
||||
POP_LOGML = computePopulationLogml(1:npops, adjprior, priorTerm);
|
||||
if dispText
|
||||
if dispText
|
||||
disp(['Found partition with ' num2str(npops) ' populations.']);
|
||||
disp(['Log(ml) = ' num2str(logml)]);
|
||||
disp(' ');
|
||||
|
|
@ -491,7 +491,7 @@ COUNTS = countsBest;
|
|||
SUMCOUNTS = sumCountsBest;
|
||||
POP_LOGML = pop_logmlBest;
|
||||
LOGDIFF = logdiffbest;
|
||||
|
||||
|
||||
%--------------------------------------------------------------------------
|
||||
|
||||
function clearGlobalVars
|
||||
|
|
@ -509,9 +509,9 @@ function Z = linkage(Y, method)
|
|||
[k, n] = size(Y);
|
||||
m = (1+sqrt(1+8*n))/2;
|
||||
if k ~= 1 | m ~= fix(m)
|
||||
error('The first input has to match the output of the PDIST function in size.');
|
||||
error('The first input has to match the output of the PDIST function in size.');
|
||||
end
|
||||
if nargin == 1 % set default switch to be 'co'
|
||||
if nargin == 1 % set default switch to be 'co'
|
||||
method = 'co';
|
||||
end
|
||||
method = lower(method(1:2)); % simplify the switch string.
|
||||
|
|
@ -519,19 +519,19 @@ monotonic = 1;
|
|||
Z = zeros(m-1,3); % allocate the output matrix.
|
||||
N = zeros(1,2*m-1);
|
||||
N(1:m) = 1;
|
||||
n = m; % since m is changing, we need to save m in n.
|
||||
n = m; % since m is changing, we need to save m in n.
|
||||
R = 1:n;
|
||||
for s = 1:(n-1)
|
||||
X = Y;
|
||||
[v, k] = min(X);
|
||||
i = floor(m+1/2-sqrt(m^2-m+1/4-2*(k-1)));
|
||||
j = k - (i-1)*(m-i/2)+i;
|
||||
Z(s,:) = [R(i) R(j) v]; % update one more row to the output matrix A
|
||||
Z(s,:) = [R(i) R(j) v]; % update one more row to the output matrix A
|
||||
I1 = 1:(i-1); I2 = (i+1):(j-1); I3 = (j+1):m; % these are temp variables.
|
||||
U = [I1 I2 I3];
|
||||
I = [I1.*(m-(I1+1)/2)-m+i i*(m-(i+1)/2)-m+I2 i*(m-(i+1)/2)-m+I3];
|
||||
J = [I1.*(m-(I1+1)/2)-m+j I2.*(m-(I2+1)/2)-m+j j*(m-(j+1)/2)-m+I3];
|
||||
|
||||
|
||||
switch method
|
||||
case 'si' %single linkage
|
||||
Y(I) = min(Y(I),Y(J));
|
||||
|
|
@ -548,12 +548,12 @@ for s = 1:(n-1)
|
|||
end
|
||||
J = [J i*(m-(i+1)/2)-m+j];
|
||||
Y(J) = []; % no need for the cluster information about j.
|
||||
|
||||
|
||||
% update m, N, R
|
||||
m = m-1;
|
||||
m = m-1;
|
||||
N(n+s) = N(R(i)) + N(R(j));
|
||||
R(i) = n+s;
|
||||
R(j:(n-1))=R((j+1):n);
|
||||
R(j:(n-1))=R((j+1):n);
|
||||
end
|
||||
|
||||
|
||||
|
|
@ -623,7 +623,7 @@ function [muutokset, diffInCounts] = ...
|
|||
%
|
||||
% Lisäys 25.9.2007:
|
||||
% Otettu käyttöön globaali muuttuja LOGDIFF, johon on tallennettu muutokset
|
||||
% logml:ssä siirrettäessä yksilöitä toisiin populaatioihin.
|
||||
% logml:ssä siirrettäessä yksilöitä toisiin populaatioihin.
|
||||
|
||||
global COUNTS; global SUMCOUNTS;
|
||||
global PARTITION; global POP_LOGML;
|
||||
|
|
@ -647,7 +647,7 @@ COUNTS(:,:,i1) = COUNTS(:,:,i1)+diffInCounts;
|
|||
SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)+diffInSumCounts;
|
||||
|
||||
i2 = find(muutokset==-Inf); % Etsitään populaatiot jotka muuttuneet viime kerran jälkeen.
|
||||
i2 = setdiff(i2,i1);
|
||||
i2 = setdiff(i2,i1);
|
||||
i2_logml = POP_LOGML(i2);
|
||||
|
||||
ni2 = length(i2);
|
||||
|
|
@ -668,19 +668,19 @@ LOGDIFF(ind,:) = muutokset;
|
|||
|
||||
function diffInCounts = computeDiffInCounts(rows, max_noalle, nloci, data)
|
||||
% Muodostaa max_noalle*nloci taulukon, jossa on niiden alleelien
|
||||
% lukumäärät (vastaavasti kuin COUNTS:issa), jotka ovat data:n
|
||||
% lukumäärät (vastaavasti kuin COUNTS:issa), jotka ovat data:n
|
||||
% riveillä rows. rows pitää olla vaakavektori.
|
||||
|
||||
diffInCounts = zeros(max_noalle, nloci);
|
||||
for i=rows
|
||||
row = data(i,:);
|
||||
notEmpty = find(row>=0);
|
||||
|
||||
|
||||
if length(notEmpty)>0
|
||||
diffInCounts(row(notEmpty) + (notEmpty-1)*max_noalle) = ...
|
||||
diffInCounts(row(notEmpty) + (notEmpty-1)*max_noalle) + 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
%------------------------------------------------------------------------
|
||||
|
||||
|
|
@ -693,8 +693,8 @@ function updateGlobalVariables(ind, i2, diffInCounts, ...
|
|||
% Suorittaa globaalien muuttujien muutokset, kun yksilö ind
|
||||
% on siirretään koriin i2.
|
||||
|
||||
global PARTITION;
|
||||
global COUNTS;
|
||||
global PARTITION;
|
||||
global COUNTS;
|
||||
global SUMCOUNTS;
|
||||
global POP_LOGML;
|
||||
global LOGDIFF;
|
||||
|
|
@ -724,7 +724,7 @@ function [muutokset, diffInCounts] = laskeMuutokset2( ...
|
|||
i1, globalRows, data, adjprior, priorTerm);
|
||||
% Palauttaa npops*1 taulun, jossa i:s alkio kertoo, mikä olisi
|
||||
% muutos logml:ssä, mikäli korin i1 kaikki yksilöt siirretään
|
||||
% koriin i.
|
||||
% koriin i.
|
||||
|
||||
global COUNTS; global SUMCOUNTS;
|
||||
global PARTITION; global POP_LOGML;
|
||||
|
|
@ -839,7 +839,7 @@ for pop2 = 1:npops2
|
|||
|
||||
i2 = [1:i1-1 , i1+1:npops];
|
||||
i2_logml = POP_LOGML(i2)';
|
||||
|
||||
|
||||
COUNTS(:,:,i2) = COUNTS(:,:,i2)+repmat(diffInCounts, [1 1 npops-1]);
|
||||
SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)+repmat(diffInSumCounts,[npops-1 1]);
|
||||
new_i2_logml = computePopulationLogml(i2, adjprior, priorTerm)';
|
||||
|
|
@ -848,7 +848,7 @@ for pop2 = 1:npops2
|
|||
|
||||
muutokset(pop2,i2) = new_i1_logml - i1_logml ...
|
||||
+ new_i2_logml - i2_logml;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
%------------------------------------------------------------------------------------
|
||||
|
|
@ -858,7 +858,7 @@ function muutokset = laskeMuutokset5(inds, globalRows, data, adjprior, ...
|
|||
|
||||
% Palauttaa length(inds)*1 taulun, jossa i:s alkio kertoo, mikä olisi
|
||||
% muutos logml:ssä, mikäli yksilö i vaihtaisi koria i1:n ja i2:n välillä.
|
||||
|
||||
|
||||
global COUNTS; global SUMCOUNTS;
|
||||
global PARTITION; global POP_LOGML;
|
||||
|
||||
|
|
@ -885,14 +885,14 @@ for i = 1:ninds
|
|||
SUMCOUNTS(pop1,:) = SUMCOUNTS(pop1,:)-diffInSumCounts;
|
||||
COUNTS(:,:,pop2) = COUNTS(:,:,pop2)+diffInCounts;
|
||||
SUMCOUNTS(pop2,:) = SUMCOUNTS(pop2,:)+diffInSumCounts;
|
||||
|
||||
|
||||
new_logmls = computePopulationLogml([i1 i2], adjprior, priorTerm);
|
||||
muutokset(i) = sum(new_logmls);
|
||||
|
||||
|
||||
COUNTS(:,:,pop1) = COUNTS(:,:,pop1)+diffInCounts;
|
||||
SUMCOUNTS(pop1,:) = SUMCOUNTS(pop1,:)+diffInSumCounts;
|
||||
COUNTS(:,:,pop2) = COUNTS(:,:,pop2)-diffInCounts;
|
||||
SUMCOUNTS(pop2,:) = SUMCOUNTS(pop2,:)-diffInSumCounts;
|
||||
SUMCOUNTS(pop2,:) = SUMCOUNTS(pop2,:)-diffInSumCounts;
|
||||
end
|
||||
|
||||
muutokset = muutokset - i1_logml - i2_logml;
|
||||
|
|
@ -952,7 +952,7 @@ dist2 = dist(apu);
|
|||
|
||||
|
||||
function npops = poistaTyhjatPopulaatiot(npops)
|
||||
% Poistaa tyhjentyneet populaatiot COUNTS:ista ja
|
||||
% Poistaa tyhjentyneet populaatiot COUNTS:ista ja
|
||||
% SUMCOUNTS:ista. Päivittää npops:in ja PARTITION:in.
|
||||
|
||||
global COUNTS;
|
||||
|
|
@ -1006,7 +1006,7 @@ if abs(logml)<10000
|
|||
end
|
||||
if logml<0
|
||||
mjono(pointer-1) = '-';
|
||||
end
|
||||
end
|
||||
else
|
||||
suurinYks = 4;
|
||||
while abs(logml)/(10^(suurinYks+1)) >= 1
|
||||
|
|
@ -1035,8 +1035,8 @@ end
|
|||
|
||||
function digit = palautaYks(num,yks)
|
||||
% palauttaa luvun num 10^yks termin kertoimen
|
||||
% string:inä
|
||||
% yks täytyy olla kokonaisluku, joka on
|
||||
% string:inä
|
||||
% yks täytyy olla kokonaisluku, joka on
|
||||
% vähintään -1:n suuruinen. Pienemmillä
|
||||
% luvuilla tapahtuu jokin pyöristysvirhe.
|
||||
|
||||
|
|
@ -1063,7 +1063,7 @@ if abs(div)<100
|
|||
if arvo>0
|
||||
mjono(1) = num2str(arvo);
|
||||
end
|
||||
|
||||
|
||||
else
|
||||
suurinYks = floor(log10(div));
|
||||
mjono(6) = num2str(suurinYks);
|
||||
|
|
@ -1125,7 +1125,7 @@ T = zeros(m,1);
|
|||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
function T = clusternum(X, T, k, c)
|
||||
m = size(X,1)+1;
|
||||
while(~isempty(k))
|
||||
|
|
@ -1136,7 +1136,7 @@ while(~isempty(k))
|
|||
% Assign this node number to leaf children
|
||||
t = (children<=m);
|
||||
T(children(t)) = c;
|
||||
|
||||
|
||||
% Move to next level
|
||||
k = children(~t) - m;
|
||||
end
|
||||
|
|
|
|||
|
|
@ -1,11 +1,11 @@
|
|||
context("Auxiliary functions to greedyMix")
|
||||
|
||||
# Defining the relative path to current inst -----------------------------------
|
||||
path_inst <- system.file("ext", "", package = "rBAPS")
|
||||
path_inst <- system.file("extdata", "", package = "rBAPS")
|
||||
|
||||
# Reading datasets -------------------------------------------------------------
|
||||
baps_diploid <- read.delim(
|
||||
file = paste(path_inst, "BAPS_format_clustering_diploid.txt", sep = "/"),
|
||||
file = file.path(path_inst, "BAPS_format_clustering_diploid.txt"),
|
||||
sep = " ",
|
||||
header = FALSE
|
||||
)
|
||||
|
|
@ -31,26 +31,27 @@ test_that("handleData works as expected", {
|
|||
expect_equal(data_obs, data_exp)
|
||||
})
|
||||
|
||||
context("Opening files on greedyMix")
|
||||
context("Processing files through greedyMix")
|
||||
|
||||
df_fasta <- greedyMix(
|
||||
raw_fasta <- importFile(
|
||||
data = file.path(path_inst, "FASTA_clustering_haploid.fasta"),
|
||||
format = "FASTA"
|
||||
)
|
||||
df_vcf <- greedyMix(
|
||||
raw_vcf <- importFile(
|
||||
data = file.path(path_inst, "vcf_example.vcf"),
|
||||
format = "VCF",
|
||||
verbose = FALSE
|
||||
)
|
||||
df_bam <- greedyMix(
|
||||
df_bam <- importFile(
|
||||
data = file.path(path_inst, "bam_example.bam"),
|
||||
format = "BAM",
|
||||
)
|
||||
|
||||
test_that("Files are imported correctly", {
|
||||
expect_equal(dim(df_fasta), c(5, 99))
|
||||
expect_equal(dim(df_vcf), c(variants = 2, fix_cols = 8, gt_cols = 3))
|
||||
expect_equal(dim(raw_fasta), c(5, 99))
|
||||
expect_equal(dim(raw_vcf), c(variants = 2, fix_cols = 8, gt_cols = 3))
|
||||
expect_error(
|
||||
greedyMix(
|
||||
importFile(
|
||||
data = paste(path_inst, "sam_example.sam", sep = "/"),
|
||||
format = "SAM",
|
||||
)
|
||||
|
|
@ -58,6 +59,15 @@ test_that("Files are imported correctly", {
|
|||
expect_equal(length(df_bam[[1]]), 13)
|
||||
})
|
||||
|
||||
df_fasta <- greedyMix(
|
||||
data = file.path(path_inst, "FASTA_clustering_haploid.fasta"),
|
||||
format = "FASTA"
|
||||
)
|
||||
test_that("greedyMix() works", {
|
||||
expect_error(greedyMix(file.path(path_inst, "vcf_example.vcf")))
|
||||
expect_error(greedyMix(file.path(path_inst, "bam_example.bam")))
|
||||
})
|
||||
|
||||
context("Linkage")
|
||||
|
||||
test_that("Linkages are properly calculated", {
|
||||
|
|
|
|||
|
|
@ -79,7 +79,7 @@ test_that("lakseKlitik() and subfunctions produce expected output", {
|
|||
})
|
||||
|
||||
test_that("testFastaData() produces same output as on MATLAB", {
|
||||
msa <- system.file("ext", "seqs.fa", package = "rBAPS")
|
||||
msa <- system.file("extdata", "seqs.fa", package = "rBAPS")
|
||||
test_msa <- testFastaData(msa)
|
||||
expect_equal(test_msa$ninds, 515)
|
||||
expect_equal(dim(test_msa$data), c(515, 745))
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue