Merge branch 'translate-greedyMix' into dev (issue #1)
This commit is contained in:
commit
4aba2528da
84 changed files with 1927 additions and 794 deletions
|
|
@ -1,7 +1,7 @@
|
|||
LICENSE
|
||||
TODO.md
|
||||
matlab
|
||||
^matlab
|
||||
CHANGELOG.md
|
||||
CITATION.cff
|
||||
.travis.yml
|
||||
data/ExamplesDataFormatting
|
||||
inst/ext/ExamplesDataFormatting
|
||||
10
DESCRIPTION
10
DESCRIPTION
|
|
@ -1,7 +1,7 @@
|
|||
Package: rBAPS
|
||||
Title: Bayesian Analysis of Population Structure
|
||||
Version: 0.0.0.9000
|
||||
Date: 2020-01-14
|
||||
Version: 0.0.0.9001
|
||||
Date: 2020-11-09
|
||||
Authors@R:
|
||||
c(
|
||||
person(
|
||||
|
|
@ -30,14 +30,14 @@ Description: Partial R implementation of the BAPS software
|
|||
Corander et al. 2008b <doi:10.1007/s00180-007-0072-x>;
|
||||
Tang et al. 2009 <doi:10.1371/journal.pcbi.1000455>;
|
||||
Cheng et al. 2011 <doi:10.1186/1471-2105-12-302>,
|
||||
available at <http://www.helsinki.fi/bsg/software/BAPS/Z>), provides a
|
||||
computationally-efficient method for the identification of admixture events
|
||||
available at <http://www.helsinki.fi/bsg/software/BAPS/Z>), provides a
|
||||
computationally-efficient method for the identification of admixture events
|
||||
in genetic population history.
|
||||
License: GPL-3
|
||||
Encoding: UTF-8
|
||||
LazyData: true
|
||||
RoxygenNote: 7.1.1
|
||||
Suggests:
|
||||
Suggests:
|
||||
testthat (>= 2.1.0)
|
||||
Imports:
|
||||
methods
|
||||
|
|
|
|||
|
|
@ -10,6 +10,8 @@ export(computeIndLogml)
|
|||
export(computePersonalAllFreqs)
|
||||
export(computeRows)
|
||||
export(etsiParas)
|
||||
export(fgetl)
|
||||
export(fopen)
|
||||
export(greedyMix)
|
||||
export(handleData)
|
||||
export(initPopNames)
|
||||
|
|
@ -21,6 +23,7 @@ export(linkage)
|
|||
export(logml2String)
|
||||
export(lueGenePopData)
|
||||
export(lueNimi)
|
||||
export(matlab2r)
|
||||
export(noIndex)
|
||||
export(ownNum2Str)
|
||||
export(poistaLiianPienet)
|
||||
|
|
@ -47,3 +50,4 @@ export(writeMixtureInfo)
|
|||
importFrom(methods,is)
|
||||
importFrom(stats,runif)
|
||||
importFrom(utils,read.delim)
|
||||
importFrom(utils,write.table)
|
||||
|
|
|
|||
|
|
@ -12,22 +12,25 @@ addAlleles <- function(data, ind, line, divider) {
|
|||
# line. Jos data on 3 digit formaatissa on divider=1000.
|
||||
# Jos data on 2 digit formaatissa on divider=100.
|
||||
|
||||
nloci <- size(data, 2) - 1
|
||||
nloci <- size(data, 2) # added 1 from original code
|
||||
if (size(data, 1) < (2 * ind)) {
|
||||
data <- c(data, zeros(100, nloci + 1))
|
||||
data <- rbind(data, zeros(100, nloci)) # subtracted 1 from original code
|
||||
}
|
||||
|
||||
k <- 1
|
||||
merkki <- line[k]
|
||||
merkki <- substring(line, k, k)
|
||||
while (merkki != ',') {
|
||||
k <- k + 1
|
||||
merkki <- line[k]
|
||||
k <- k + 1
|
||||
merkki <- substring(line, k, k)
|
||||
}
|
||||
line <- line[k + 1:length(line)]
|
||||
line <- substring(line, k + 1)
|
||||
# clear k; clear merkki;
|
||||
|
||||
alleeliTaulu <- as.numeric(strsplit(line, split = " ")[[1]])
|
||||
|
||||
if (grepl(" ", line)) {
|
||||
alleeliTaulu <- as.numeric(strsplit(line, split = " ")[[1]])
|
||||
} else if (grepl("\t", line)) {
|
||||
alleeliTaulu <- as.numeric(strsplit(line, split = "\t")[[1]])
|
||||
}
|
||||
|
||||
if (length(alleeliTaulu) != nloci) {
|
||||
stop('Incorrect data format.')
|
||||
|
|
@ -35,9 +38,9 @@ addAlleles <- function(data, ind, line, divider) {
|
|||
|
||||
for (j in seq_len(nloci)) {
|
||||
ekaAlleeli <- floor(alleeliTaulu[j] / divider)
|
||||
if (ekaAlleeli == 0) ekaAlleeli <- -999
|
||||
if (is.na(ekaAlleeli) | ekaAlleeli == 0) ekaAlleeli <- -999
|
||||
tokaAlleeli <- alleeliTaulu[j] %% divider
|
||||
if (tokaAlleeli == 0) tokaAlleeli <- -999
|
||||
if (is.na(tokaAlleeli) | tokaAlleeli == 0) tokaAlleeli <- -999
|
||||
|
||||
data[2 * ind - 1, j] <- ekaAlleeli
|
||||
data[2 * ind, j] <- tokaAlleeli
|
||||
|
|
|
|||
18
R/addToSummary.R
Normal file
18
R/addToSummary.R
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
addToSummary <- function(logml, partitionSummary, worstIndex) {
|
||||
# Tiedet<65><74>n, ett<74> annettu logml on isompi kuin huonoin arvo
|
||||
# partitionSummary taulukossa. Jos partitionSummary:ss<73> ei viel<65> ole
|
||||
# annettua logml arvoa, niin lis<69>t<EFBFBD><74>n worstIndex:in kohtaan uusi logml ja
|
||||
# nykyist<73> partitiota vastaava nclusters:in arvo. Muutoin ei tehd<68> mit<69><74>n.
|
||||
|
||||
apu <- find(abs(partitionSummary[, 2] - logml) < 1e-5)
|
||||
if (isempty(apu)) {
|
||||
# Nyt l<>ydetty partitio ei ole viel<65> kirjattuna summaryyn.
|
||||
npops <- length(unique(PARTITION))
|
||||
partitionSummary[worstIndex, 1] <- npops
|
||||
partitionSummary[worstIndex, 2] <- logml
|
||||
added <- 1
|
||||
} else {
|
||||
added <- 0
|
||||
}
|
||||
return(list(partitionSummary = partitionSummary, added = added))
|
||||
}
|
||||
20
R/admixture_initialization.R
Normal file
20
R/admixture_initialization.R
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
#' @title Seuraavat kolme funktiota liittyvat alkupartition muodostamiseen.
|
||||
#' @param data_matrix data_matrix
|
||||
#' @param nclusters ncluster
|
||||
#' @param Z Z
|
||||
|
||||
admixture_initialization <- function (data_matrix, nclusters, Z) {
|
||||
size_data <- size(data_matrix)
|
||||
nloci <- size_data[2] - 1
|
||||
n <- max(data_matrix[, ncol(data_matrix)])
|
||||
T <- cluster_own(Z, nclusters)
|
||||
initial_partition <- zeros(size_data[1], 1)
|
||||
for (i in 1:n) {
|
||||
kori <- T[i]
|
||||
here <- find(data_matrix[, ncol(data_matrix)] == i)
|
||||
for (j in 1:length(here)) {
|
||||
initial_partition[here[j], 1] <- kori
|
||||
}
|
||||
}
|
||||
return(initial_partition)
|
||||
}
|
||||
14
R/arvoSeuraavaTi.R
Normal file
14
R/arvoSeuraavaTi.R
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
arvoSeuraavaTila <- function(muutokset, logml) {
|
||||
# Suorittaa yksil<69>n seuraavan tilan arvonnan
|
||||
|
||||
y <- logml + muutokset # siirron j<>lkeiset logml:t
|
||||
y <- y - max(y)
|
||||
y <- exp(y)
|
||||
summa <- sum(y)
|
||||
y <- y / summa
|
||||
y <- cumsum(y)
|
||||
|
||||
i2 <- rand_disc(y) # uusi kori
|
||||
suurin <- muutokset(i2)
|
||||
return(list(suurin = suurin, i2 = i2))
|
||||
}
|
||||
6
R/cell.R
6
R/cell.R
|
|
@ -6,10 +6,10 @@
|
|||
#' @return An array of zeroes with the dimensions passed on call
|
||||
cell <- function(n, sz = c(n, n), ...) {
|
||||
if (length(sz) == 1 & missing(...)) {
|
||||
return(array(dim = c(n, sz)))
|
||||
return(array(0, dim = c(n, sz)))
|
||||
} else if (length(sz) == 2) {
|
||||
return(array(dim = sz))
|
||||
return(array(0, dim = sz))
|
||||
} else {
|
||||
return(array(dim = c(n, sz, ...)))
|
||||
return(array(0, dim = c(n, sz, ...)))
|
||||
}
|
||||
}
|
||||
|
|
@ -1,7 +1,7 @@
|
|||
clearGlobalVars <- function() {
|
||||
COUNTS <- SUMCOUNTS <- PARTITION <- POP_LOGML <- vector() # placeholders
|
||||
COUNTS <<- vector()
|
||||
SUMCOUNTS <<- vector()
|
||||
PARTITION <<- vector()
|
||||
POP_LOGML <<- vector()
|
||||
COUNTS <- vector()
|
||||
SUMCOUNTS <- vector()
|
||||
PARTITION <- vector()
|
||||
POP_LOGML <- vector()
|
||||
LOGDIFF <- vector()
|
||||
}
|
||||
51
R/cluster_own.R
Normal file
51
R/cluster_own.R
Normal file
|
|
@ -0,0 +1,51 @@
|
|||
cluster_own <- function(Z, nclust) {
|
||||
true <- TRUE
|
||||
false <- FALSE
|
||||
maxclust <- nclust
|
||||
# % Start of algorithm
|
||||
m <- size(Z, 1) + 1
|
||||
T <- zeros(m, 1)
|
||||
# % maximum number of clusters based on inconsistency
|
||||
if (m <= maxclust) {
|
||||
T = t((1:m))
|
||||
} else if (maxclust == 1) {
|
||||
T <- ones(m, 1)
|
||||
} else {
|
||||
clsnum <- 1
|
||||
for (k in (m - maxclust + 1):(m - 1)) {
|
||||
i = Z(k, 1) # left tree
|
||||
if (i <= m) { # original node, no leafs
|
||||
T[i] = clsnum
|
||||
clsnum = clsnum + 1
|
||||
} else if (i < (2 * m - maxclust + 1)) { # created before cutoff, search down the tree
|
||||
T <- clusternum(Z, T, i - m, clsnum)
|
||||
clsnum <- clsnum + 1
|
||||
}
|
||||
i <- Z(k, 2) # right tree
|
||||
if (i <= m) { # original node, no leafs
|
||||
T[i] <- clsnum
|
||||
clsnum <- clsnum + 1
|
||||
} else if (i < (2 * m - maxclust + 1)) { # created before cutoff, search down the tree
|
||||
T <- clusternum(Z, T, i - m, clsnum)
|
||||
clsnum <- clsnum + 1
|
||||
}
|
||||
}
|
||||
}
|
||||
return(T)
|
||||
}
|
||||
|
||||
clusternum <- function(X, T, k, c) {
|
||||
m <- size(X, 1) + 1
|
||||
while (!isempty(k)) {
|
||||
# Get the children of nodes at this level
|
||||
children <- X[k, 1:2]
|
||||
children <- children
|
||||
|
||||
# Assign this node number to leaf children
|
||||
t <- (children <= m)
|
||||
T[children[t]] <- c
|
||||
# Move to next level
|
||||
k <- children(!t) - m
|
||||
}
|
||||
return(T)
|
||||
}
|
||||
16
R/clusternum.R
Normal file
16
R/clusternum.R
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
clusternum <- function(X, T, k, c) {
|
||||
m <- size(X, 1) + 1
|
||||
while (!is.null(k)) {
|
||||
# % Get the children of nodes at this level
|
||||
children <- X[k, 1:2]
|
||||
children <- children[, ]
|
||||
|
||||
# % Assign this node number to leaf children
|
||||
t <- (children <= m)
|
||||
T[children(t)] <- c
|
||||
|
||||
# % Move to next level
|
||||
k <- children(!t) - m
|
||||
}
|
||||
return(T)
|
||||
}
|
||||
17
R/computeDiffInCounts.R
Normal file
17
R/computeDiffInCounts.R
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
computeDiffInCounts <- function(rows, max_noalle, nloci, data) {
|
||||
# % Muodostaa max_noalle*nloci taulukon, jossa on niiden alleelien
|
||||
# % lukum<75><6D>r<EFBFBD>t (vastaavasti kuin COUNTS:issa), jotka ovat data:n
|
||||
# % riveill<6C> rows. rows pit<69><74> olla vaakavektori.
|
||||
|
||||
diffInCounts <- zeros(max_noalle, nloci)
|
||||
for (i in rows) {
|
||||
row <- data[i, ]
|
||||
notEmpty <- find(row>=0)
|
||||
|
||||
if (length(notEmpty) > 0) {
|
||||
diffInCounts[row(notEmpty) + (notEmpty - 1) * max_noalle] <-
|
||||
diffInCounts[row(notEmpty) + (notEmpty - 1) * max_noalle] + 1
|
||||
}
|
||||
}
|
||||
return(diffInCounts)
|
||||
}
|
||||
27
R/computeLogml.R
Normal file
27
R/computeLogml.R
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
computeLogml <- function(counts, sumcounts, noalle, data, rowsFromInd) {
|
||||
nloci <- size(counts, 2)
|
||||
npops <- size(counts, 3)
|
||||
adjnoalle <- zeros(max(noalle), nloci)
|
||||
for (j in 1:nloci) {
|
||||
adjnoalle[1:noalle[j], j] <- noalle(j)
|
||||
if ((noalle(j)<max(noalle))) {
|
||||
adjnoalle[noalle[j] + 1:ncol(adjnoalle), j] <- 1
|
||||
}
|
||||
}
|
||||
|
||||
rowsInG <- size(data, 1) + rowsFromInd
|
||||
|
||||
logml <- sum(
|
||||
sum(
|
||||
sum(
|
||||
GAMMA_LN[
|
||||
counts + 1 +
|
||||
repmat(rowsInG * (adjnoalle - 1), c(1, 1, npops))
|
||||
]
|
||||
)
|
||||
)
|
||||
) -
|
||||
npops * sum(sum(GAMMA_LN[1, adjnoalle])) -
|
||||
sum(sum(GAMMA_LN[sumcounts + 1, 1]))
|
||||
return(logml)
|
||||
}
|
||||
24
R/computePopulationLogml.R
Normal file
24
R/computePopulationLogml.R
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
computePopulationLogml <- function(pops, adjprior, priorTerm) {
|
||||
# Palauttaa length(pops)*1 taulukon, jossa on laskettu korikohtaiset
|
||||
|
||||
x <- size(COUNTS, 1)
|
||||
y <- size(COUNTS, 2)
|
||||
z <- length(pops)
|
||||
|
||||
popLogml <- squeeze(
|
||||
sum(
|
||||
sum(
|
||||
reshape(
|
||||
lgamma(
|
||||
repmat(adjprior, c(1, 1, length(pops))) +
|
||||
COUNTS[, , pops]
|
||||
),
|
||||
c(x, y, z)
|
||||
),
|
||||
1
|
||||
),
|
||||
2
|
||||
)
|
||||
) - sum(lgamma(1 + SUMCOUNTS[pops, ]), 2) - priorTerm
|
||||
return(popLogml)
|
||||
}
|
||||
27
R/fgetl-fopen.R
Normal file
27
R/fgetl-fopen.R
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
#' @title Read line from file, removing newline characters
|
||||
#' @description Equivalent function to its homonymous Matlab equivalent.
|
||||
#' @param file character vector to be read, usually an output of `fopen()`
|
||||
#' @return If the file is nonempty, then fgetl returns tline as a character vector. If the file is empty and contains only the end-of-file marker, then fgetl returns tline as a numeric value -1.
|
||||
#' @author Waldir Leoncio
|
||||
#' @seealso fopen
|
||||
#' @export
|
||||
fgetl <- function(file) {
|
||||
# ==========================================================================
|
||||
# Validation
|
||||
# ==========================================================================
|
||||
if (length(file) <= 1) return(-1)
|
||||
# ==========================================================================
|
||||
# Returning file minus the first line
|
||||
# ==========================================================================
|
||||
out <- file[-1]
|
||||
return(out)
|
||||
}
|
||||
|
||||
#' @title Open file
|
||||
#' @description Open a text file
|
||||
#' @param filename Path and name of file to be open
|
||||
#' @return The same as `readLines(filename)`
|
||||
#' @author Waldir Leoncio
|
||||
#' @seealso fgetl
|
||||
#' @export
|
||||
fopen <- function(filename) readLines(filename)
|
||||
12
R/findEmptyPop.R
Normal file
12
R/findEmptyPop.R
Normal file
|
|
@ -0,0 +1,12 @@
|
|||
findEmptyPop <- function(npops) {
|
||||
# % Palauttaa ensimm<6D>isen tyhj<68>n populaation indeksin. Jos tyhji<6A>
|
||||
# % populaatioita ei ole, palauttaa -1:n.
|
||||
pops <- t(unique(PARTITION))
|
||||
if (length(pops) == npops) {
|
||||
emptyPop <- -1
|
||||
} else {
|
||||
popDiff <- diff(c(0, pops, npops + 1))
|
||||
emptyPop <- min(find(popDiff > 1))
|
||||
}
|
||||
return(list(emptyPop = emptyPop, pops = pops))
|
||||
}
|
||||
43
R/getDistances.R
Normal file
43
R/getDistances.R
Normal file
|
|
@ -0,0 +1,43 @@
|
|||
getDistances <- function(data_matrix, nclusters) {
|
||||
|
||||
# %finds initial admixture clustering solution with nclusters clusters, uses simple mean Hamming distance
|
||||
# %gives partition in 8 - bit format
|
||||
# %allocates all alleles of a single individual into the same basket
|
||||
# %data_matrix contains #Loci + 1 columns, last column indicate whose alleles are placed in each row,
|
||||
# %i.e. ranges from 1 to #individuals. For diploids there are 2 rows per individual, for haploids only a single row
|
||||
# %missing values are indicated by zeros in the partition and by negative integers in the data_matrix.
|
||||
|
||||
size_data <- size(data_matrix)
|
||||
nloci <- size_data[2] - 1
|
||||
n <- max(data_matrix[, ncol(data_matrix)])
|
||||
distances <- zeros(choose(n, 2), 1)
|
||||
pointer <- 1
|
||||
for (i in 1:n - 1) {
|
||||
i_data <- data_matrix[
|
||||
find(data_matrix[, ncol(data_matrix)] == i),
|
||||
1:nloci
|
||||
]
|
||||
for (j in (i + 1):n) {
|
||||
d_ij <- 0
|
||||
j_data <- data_matrix[find(data_matrix[, ncol()] == j), 1:nloci]
|
||||
vertailuja <- 0
|
||||
for (k in 1:size(i_data, 1)) {
|
||||
for (l in 1:size(j_data, 1)) {
|
||||
here_i <- find(i_data[k, ] >= 0)
|
||||
here_j <- find(j_data[l, ] >= 0)
|
||||
here_joint <- intersect(here_i, here_j)
|
||||
vertailuja <- vertailuja + length(here_joint)
|
||||
d_ij <- d_ij + length(
|
||||
find(i_data[k, here_joint] != j_data[l, here_joint])
|
||||
)
|
||||
}
|
||||
}
|
||||
d_ij <- d_ij / vertailuja
|
||||
distances[pointer] <- d_ij
|
||||
pointer <- pointer + 1
|
||||
}
|
||||
}
|
||||
|
||||
Z <- linkage(t(distances))
|
||||
return(list(Z = Z, distances = distances))
|
||||
}
|
||||
3
R/globals.R
Normal file
3
R/globals.R
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
utils::globalVariables(
|
||||
c("PARTITION", "COUNTS", "SUMCOUNTS", "LOGDIFF", "POP_LOGML", "GAMMA_LN")
|
||||
)
|
||||
689
R/greedyMix.R
689
R/greedyMix.R
|
|
@ -11,10 +11,11 @@ greedyMix <- function(
|
|||
savePreProcessed = NULL,
|
||||
filePreProcessed = NULL
|
||||
) {
|
||||
# ASK: graphical components. Remove?
|
||||
# ASK: Unclear when fixedk == TRUE. Remove?
|
||||
# check whether fixed k mode is selected
|
||||
# h0 <- findobj('Tag','fixk_menu')
|
||||
# fixedK = get(h0, 'userdata');
|
||||
fixedK <- FALSE
|
||||
|
||||
# if fixedK
|
||||
# if ~(fixKWarning == 1) % call function fixKWarning
|
||||
|
|
@ -22,9 +23,11 @@ greedyMix <- function(
|
|||
# end
|
||||
# end
|
||||
|
||||
# ASK: ditto
|
||||
# % check whether partition compare mode is selected
|
||||
# h1 = findobj('Tag','partitioncompare_menu');
|
||||
# partitionCompare = get(h1, 'userdata');
|
||||
partitionCompare <- FALSE
|
||||
|
||||
if (is(tietue, "list") | is(tietue, "character")) {
|
||||
# ----------------------------------------------------------------------
|
||||
|
|
@ -149,13 +152,25 @@ greedyMix <- function(
|
|||
|
||||
kunnossa <- testaaGenePopData(filename_pathname)
|
||||
if (kunnossa == 0) stop("testaaGenePopData returned 0")
|
||||
# [data,popnames]=lueGenePopData([pathname filename]); # TODO: trans
|
||||
data_popnames <- lueGenePopData(filename_pathname)
|
||||
data <- data_popnames$data
|
||||
popnames <- data_popnames$popnames
|
||||
|
||||
# h0 = findobj('Tag','filename1_text');
|
||||
# set(h0,'String',filename); clear h0;
|
||||
|
||||
# [data, rowsFromInd, alleleCodes, noalle, adjprior, priorTerm] = handleData(data); # TODO:trans
|
||||
# [Z,dist] = newGetDistances(data,rowsFromInd); # TODO: trans
|
||||
list_dranap <- handleData(data)
|
||||
data <- list_dranap$newData
|
||||
rowsFromInd <- list_dranap$rowsFromInd
|
||||
alleleCodes <- list_dranap$alleleCodes
|
||||
noalle <- list_dranap$noalle
|
||||
adjprior <- list_dranap$adjprior
|
||||
priorTerm <- list_dranap$prioterm
|
||||
|
||||
list_Zd <- newGetDistances(data,rowsFromInd) # FIXME: debug
|
||||
Z <- list_Zd$Z
|
||||
dist <- list_Zd$dist
|
||||
|
||||
if (is.null(savePreProcessed)) {
|
||||
save_preproc <- questdlg(
|
||||
quest = 'Do you wish to save pre-processed data?',
|
||||
|
|
@ -232,15 +247,17 @@ greedyMix <- function(
|
|||
}
|
||||
|
||||
# ==========================================================================
|
||||
# Declaring global variables
|
||||
# Declaring global variables and changing environment of children functions
|
||||
# ==========================================================================
|
||||
PARTITION <- vector()
|
||||
COUNTS <- vector()
|
||||
SUMCOUNTS <- vector()
|
||||
POP_LOGML <- vector()
|
||||
clearGlobalVars <- vector()
|
||||
# ==========================================================================
|
||||
clearGlobalVars()
|
||||
|
||||
environment(writeMixtureInfo) <- environment()
|
||||
# ==========================================================================
|
||||
c <- list()
|
||||
c$data <- data
|
||||
c$noalle <- noalle
|
||||
c$adjprior <- adjprior
|
||||
|
|
@ -253,43 +270,49 @@ greedyMix <- function(
|
|||
ekat <- t(seq(1, ninds, rowsFromInd) * rowsFromInd)
|
||||
c$rows <- c(ekat, ekat + rowsFromInd - 1)
|
||||
|
||||
# ASK remove?
|
||||
# partition compare
|
||||
if (!is.null(partitionCompare)) {
|
||||
nsamplingunits <- size(c$rows, 1)
|
||||
partitions <- partitionCompare$partitions
|
||||
npartitions <- size(partitions, 2)
|
||||
partitionLogml <- zeros(1, npartitions)
|
||||
for (i in seq_len(npartitions)) {
|
||||
# number of unique partition lables
|
||||
npops <- length(unique(partitions[, i]))
|
||||
# if (!is.null(partitionCompare)) {
|
||||
# nsamplingunits <- size(c$rows, 1)
|
||||
# partitions <- partitionCompare$partitions
|
||||
# npartitions <- size(partitions, 2)
|
||||
# partitionLogml <- zeros(1, npartitions)
|
||||
# for (i in seq_len(npartitions)) {
|
||||
# # number of unique partition lables
|
||||
# npops <- length(unique(partitions[, i]))
|
||||
|
||||
partitionInd <- zeros(ninds * rowsFromInd, 1)
|
||||
partitionSample <- partitions[, i]
|
||||
for (j in seq_len(nsamplingunits)) {
|
||||
partitionInd[c$rows[j, 1]:c$rows[j, 2]] <- partitionSample[j]
|
||||
}
|
||||
# partitionLogml[i] = initialCounts(
|
||||
# partitionInd,
|
||||
# data[, seq_len(end - 1)],
|
||||
# npops,
|
||||
# c$rows,
|
||||
# noalle,
|
||||
# adjprior
|
||||
# ) #TODO translate
|
||||
}
|
||||
# return the logml result
|
||||
partitionCompare$logmls <- partitionLogml
|
||||
# set(h1, 'userdata', partitionCompare) # ASK remove?
|
||||
return()
|
||||
}
|
||||
|
||||
# ASK remove (graphical part)?
|
||||
# if (fixedK) {
|
||||
# #logml_npops_partitionSummary <- indMix_fixK(c) # ASK translate?
|
||||
# } else {
|
||||
# #logml_npops_partitionSummary <- indMix(c) # ASK translate?
|
||||
# partitionInd <- zeros(ninds * rowsFromInd, 1)
|
||||
# partitionSample <- partitions[, i]
|
||||
# for (j in seq_len(nsamplingunits)) {
|
||||
# partitionInd[c$rows[j, 1]:c$rows[j, 2]] <- partitionSample[j]
|
||||
# }
|
||||
# # partitionLogml[i] = initialCounts(
|
||||
# # partitionInd,
|
||||
# # data[, seq_len(end - 1)],
|
||||
# # npops,
|
||||
# # c$rows,
|
||||
# # noalle,
|
||||
# # adjprior
|
||||
# # ) #TODO translate
|
||||
# }
|
||||
# # return the logml result
|
||||
# partitionCompare$logmls <- partitionLogml
|
||||
# # set(h1, 'userdata', partitionCompare)
|
||||
# return()
|
||||
# }
|
||||
# if (logml_npops_partitionSummary$logml == 1) return()
|
||||
|
||||
if (fixedK) {
|
||||
# logml_npops_partitionSummary <- indMix_fixK(c) # TODO: translate
|
||||
# logml <- logml_npops_partitionSummary$logml
|
||||
# npops <- logml_npops_partitionSummary$npops
|
||||
# partitionSummary <- logml_npops_partitionSummary$partitionSummary
|
||||
} else {
|
||||
logml_npops_partitionSummary <- indMix(c) # TODO: translate
|
||||
logml <- logml_npops_partitionSummary$logml
|
||||
npops <- logml_npops_partitionSummary$npops
|
||||
partitionSummary <- logml_npops_partitionSummary$partitionSummary
|
||||
}
|
||||
if (logml_npops_partitionSummary$logml == 1) return()
|
||||
|
||||
data <- data[, seq_len(ncol(data) - 1)]
|
||||
|
||||
|
|
@ -298,11 +321,13 @@ greedyMix <- function(
|
|||
# inp = get(h0,'String');
|
||||
# h0 = findobj('Tag','filename2_text')
|
||||
# outp = get(h0,'String');
|
||||
inp <- vector()
|
||||
outp <- vector()
|
||||
|
||||
changesInLogml <- writeMixtureInfo(
|
||||
logml, rowsFromInd, data, adjprior, priorTerm, outp, inp,
|
||||
popnames, fixedK
|
||||
) # FIXMEL depends on get function above
|
||||
) # FIXME: broken
|
||||
|
||||
# viewMixPartition(PARTITION, popnames) # ASK translate? On graph folder
|
||||
|
||||
|
|
@ -356,583 +381,3 @@ greedyMix <- function(
|
|||
if (file.exists('baps4_output.baps')) file.remove('baps4_output.baps')
|
||||
}
|
||||
}
|
||||
|
||||
# %-------------------------------------------------------------------------------------
|
||||
|
||||
# function [partitionSummary, added] = addToSummary(logml, partitionSummary, worstIndex)
|
||||
# % Tiedet<65><74>n, ett?annettu logml on isompi kuin huonoin arvo
|
||||
# % partitionSummary taulukossa. Jos partitionSummary:ss?ei viel?ole
|
||||
# % annettua logml arvoa, niin lis<69>t<EFBFBD><74>n worstIndex:in kohtaan uusi logml ja
|
||||
# % nykyist?partitiota vastaava nclusters:in arvo. Muutoin ei tehd?mit<69><74>n.
|
||||
|
||||
# apu = find(abs(partitionSummary(:,2)-logml)<1e-5);
|
||||
# if isempty(apu)
|
||||
# % Nyt l<>ydetty partitio ei ole viel?kirjattuna summaryyn.
|
||||
# global PARTITION;
|
||||
# npops = length(unique(PARTITION));
|
||||
# partitionSummary(worstIndex,1) = npops;
|
||||
# partitionSummary(worstIndex,2) = logml;
|
||||
# added = 1;
|
||||
# else
|
||||
# added = 0;
|
||||
# end
|
||||
|
||||
|
||||
# %--------------------------------------------------------------------------
|
||||
|
||||
|
||||
# function [suurin, i2] = arvoSeuraavaTila(muutokset, logml)
|
||||
# % Suorittaa yksil<69>n seuraavan tilan arvonnan
|
||||
|
||||
# y = logml + muutokset; % siirron j<>lkeiset logml:t
|
||||
# y = y - max(y);
|
||||
# y = exp(y);
|
||||
# summa = sum(y);
|
||||
# y = y/summa;
|
||||
# y = cumsum(y);
|
||||
|
||||
# i2 = rand_disc(y); % uusi kori
|
||||
# suurin = muutokset(i2);
|
||||
|
||||
|
||||
# %--------------------------------------------------------------------------------------
|
||||
|
||||
|
||||
# function svar=rand_disc(CDF)
|
||||
# %returns an index of a value from a discrete distribution using inversion method
|
||||
# slump=rand;
|
||||
# har=find(CDF>slump);
|
||||
# svar=har(1);
|
||||
|
||||
|
||||
# %-------------------------------------------------------------------------------------
|
||||
|
||||
|
||||
# function updateGlobalVariables(ind, i2, rowsFromInd, diffInCounts, ...
|
||||
# adjprior, priorTerm)
|
||||
# % Suorittaa globaalien muuttujien muutokset, kun yksil?ind
|
||||
# % on siirret<65><74>n koriin i2.
|
||||
|
||||
# global PARTITION;
|
||||
# global COUNTS;
|
||||
# global SUMCOUNTS;
|
||||
# global POP_LOGML;
|
||||
|
||||
# i1 = PARTITION(ind);
|
||||
# PARTITION(ind)=i2;
|
||||
|
||||
# COUNTS(:,:,i1) = COUNTS(:,:,i1) - diffInCounts;
|
||||
# COUNTS(:,:,i2) = COUNTS(:,:,i2) + diffInCounts;
|
||||
# SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:) - sum(diffInCounts);
|
||||
# SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:) + sum(diffInCounts);
|
||||
|
||||
# POP_LOGML([i1 i2]) = computePopulationLogml([i1 i2], adjprior, priorTerm);
|
||||
|
||||
|
||||
# %---------------------------------------------------------------------------------
|
||||
|
||||
|
||||
# function updateGlobalVariables2( ...
|
||||
# i1, i2, rowsFromInd, diffInCounts, adjprior, priorTerm);
|
||||
# % Suorittaa globaalien muuttujien muutokset, kun kaikki
|
||||
# % korissa i1 olevat yksil<69>t siirret<65><74>n koriin i2.
|
||||
|
||||
# global PARTITION;
|
||||
# global COUNTS;
|
||||
# global SUMCOUNTS;
|
||||
# global POP_LOGML;
|
||||
|
||||
# inds = find(PARTITION==i1);
|
||||
# PARTITION(inds) = i2;
|
||||
|
||||
# COUNTS(:,:,i1) = COUNTS(:,:,i1) - diffInCounts;
|
||||
# COUNTS(:,:,i2) = COUNTS(:,:,i2) + diffInCounts;
|
||||
# SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:) - sum(diffInCounts);
|
||||
# SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:) + sum(diffInCounts);
|
||||
|
||||
# POP_LOGML(i1) = 0;
|
||||
# POP_LOGML(i2) = computePopulationLogml(i2, adjprior, priorTerm);
|
||||
|
||||
|
||||
# %------------------------------------------------------------------------------------
|
||||
|
||||
|
||||
# function updateGlobalVariables3(muuttuvat, rowsFromInd, diffInCounts, ...
|
||||
# adjprior, priorTerm, i2);
|
||||
# % Suorittaa globaalien muuttujien p<>ivitykset, kun yksil<69>t 'muuttuvat'
|
||||
# % siirret<65><74>n koriin i2. Ennen siirtoa yksil<69>iden on kuuluttava samaan
|
||||
# % koriin.
|
||||
|
||||
# global PARTITION;
|
||||
# global COUNTS;
|
||||
# global SUMCOUNTS;
|
||||
# global POP_LOGML;
|
||||
|
||||
# i1 = PARTITION(muuttuvat(1));
|
||||
# PARTITION(muuttuvat) = i2;
|
||||
|
||||
# COUNTS(:,:,i1) = COUNTS(:,:,i1) - diffInCounts;
|
||||
# COUNTS(:,:,i2) = COUNTS(:,:,i2) + diffInCounts;
|
||||
# SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:) - sum(diffInCounts);
|
||||
# SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:) + sum(diffInCounts);
|
||||
|
||||
# POP_LOGML([i1 i2]) = computePopulationLogml([i1 i2], adjprior, priorTerm);
|
||||
|
||||
|
||||
# %----------------------------------------------------------------------
|
||||
|
||||
|
||||
# function inds = returnInOrder(inds, pop, rowsFromInd, data, adjprior, priorTerm)
|
||||
# % Palauttaa yksil<69>t j<>rjestyksess?siten, ett?ensimm<6D>isen?on
|
||||
# % se, jonka poistaminen populaatiosta pop nostaisi logml:n
|
||||
# % arvoa eniten.
|
||||
|
||||
# global COUNTS; global SUMCOUNTS;
|
||||
# ninds = length(inds);
|
||||
# apuTaulu = [inds, zeros(ninds,1)];
|
||||
|
||||
# for i=1:ninds
|
||||
# ind = inds(i);
|
||||
# rows = (ind-1)*rowsFromInd+1 : ind*rowsFromInd;
|
||||
# diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data);
|
||||
# diffInSumCounts = sum(diffInCounts);
|
||||
|
||||
# COUNTS(:,:,pop) = COUNTS(:,:,pop)-diffInCounts;
|
||||
# SUMCOUNTS(pop,:) = SUMCOUNTS(pop,:)-diffInSumCounts;
|
||||
# apuTaulu(i, 2) = computePopulationLogml(pop, adjprior, priorTerm);
|
||||
# COUNTS(:,:,pop) = COUNTS(:,:,pop)+diffInCounts;
|
||||
# SUMCOUNTS(pop,:) = SUMCOUNTS(pop,:)+diffInSumCounts;
|
||||
# end
|
||||
# apuTaulu = sortrows(apuTaulu,2);
|
||||
# inds = apuTaulu(ninds:-1:1,1);
|
||||
|
||||
# %------------------------------------------------------------------------------------
|
||||
|
||||
|
||||
# function [muutokset, diffInCounts] = ...
|
||||
# laskeMuutokset(ind, rowsFromInd, data, adjprior, priorTerm)
|
||||
# % Palauttaa npops*1 taulun, jossa i:s alkio kertoo, mik?olisi
|
||||
# % muutos logml:ss? mik<69>li yksil?ind siirret<65><74>n koriin i.
|
||||
# % diffInCounts on poistettava COUNTS:in siivusta i1 ja lis<69>tt<74>v?
|
||||
# % COUNTS:in siivuun i2, mik<69>li muutos toteutetaan.
|
||||
|
||||
# global COUNTS; global SUMCOUNTS;
|
||||
# global PARTITION; global POP_LOGML;
|
||||
# npops = size(COUNTS,3);
|
||||
# muutokset = zeros(npops,1);
|
||||
|
||||
# i1 = PARTITION(ind);
|
||||
# i1_logml = POP_LOGML(i1);
|
||||
|
||||
# rows = (ind-1)*rowsFromInd+1 : ind*rowsFromInd;
|
||||
# diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data);
|
||||
# diffInSumCounts = sum(diffInCounts);
|
||||
|
||||
# COUNTS(:,:,i1) = COUNTS(:,:,i1)-diffInCounts;
|
||||
# SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)-diffInSumCounts;
|
||||
# new_i1_logml = computePopulationLogml(i1, adjprior, priorTerm);
|
||||
# COUNTS(:,:,i1) = COUNTS(:,:,i1)+diffInCounts;
|
||||
# SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)+diffInSumCounts;
|
||||
|
||||
# i2 = [1:i1-1 , i1+1:npops];
|
||||
# i2_logml = POP_LOGML(i2);
|
||||
|
||||
# COUNTS(:,:,i2) = COUNTS(:,:,i2)+repmat(diffInCounts, [1 1 npops-1]);
|
||||
# SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)+repmat(diffInSumCounts,[npops-1 1]);
|
||||
# new_i2_logml = computePopulationLogml(i2, adjprior, priorTerm);
|
||||
# COUNTS(:,:,i2) = COUNTS(:,:,i2)-repmat(diffInCounts, [1 1 npops-1]);
|
||||
# SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)-repmat(diffInSumCounts,[npops-1 1]);
|
||||
|
||||
# muutokset(i2) = new_i1_logml - i1_logml ...
|
||||
# + new_i2_logml - i2_logml;
|
||||
|
||||
|
||||
# %------------------------------------------------------------------------------------
|
||||
|
||||
|
||||
# function [muutokset, diffInCounts] = laskeMuutokset2( ...
|
||||
# i1, rowsFromInd, data, adjprior, priorTerm);
|
||||
# % Palauttaa npops*1 taulun, jossa i:s alkio kertoo, mik?olisi
|
||||
# % muutos logml:ss? mik<69>li korin i1 kaikki yksil<69>t siirret<65><74>n
|
||||
# % koriin i.
|
||||
|
||||
# global COUNTS; global SUMCOUNTS;
|
||||
# global PARTITION; global POP_LOGML;
|
||||
# npops = size(COUNTS,3);
|
||||
# muutokset = zeros(npops,1);
|
||||
|
||||
# i1_logml = POP_LOGML(i1);
|
||||
|
||||
# inds = find(PARTITION==i1);
|
||||
# ninds = length(inds);
|
||||
|
||||
# if ninds==0
|
||||
# diffInCounts = zeros(size(COUNTS,1), size(COUNTS,2));
|
||||
# return;
|
||||
# end
|
||||
|
||||
# rows = computeRows(rowsFromInd, inds, ninds);
|
||||
|
||||
# diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data);
|
||||
# diffInSumCounts = sum(diffInCounts);
|
||||
|
||||
# COUNTS(:,:,i1) = COUNTS(:,:,i1)-diffInCounts;
|
||||
# SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)-diffInSumCounts;
|
||||
# new_i1_logml = computePopulationLogml(i1, adjprior, priorTerm);
|
||||
# COUNTS(:,:,i1) = COUNTS(:,:,i1)+diffInCounts;
|
||||
# SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)+diffInSumCounts;
|
||||
|
||||
# i2 = [1:i1-1 , i1+1:npops];
|
||||
# i2_logml = POP_LOGML(i2);
|
||||
|
||||
# COUNTS(:,:,i2) = COUNTS(:,:,i2)+repmat(diffInCounts, [1 1 npops-1]);
|
||||
# SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)+repmat(diffInSumCounts,[npops-1 1]);
|
||||
# new_i2_logml = computePopulationLogml(i2, adjprior, priorTerm);
|
||||
# COUNTS(:,:,i2) = COUNTS(:,:,i2)-repmat(diffInCounts, [1 1 npops-1]);
|
||||
# SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)-repmat(diffInSumCounts,[npops-1 1]);
|
||||
|
||||
# muutokset(i2) = new_i1_logml - i1_logml ...
|
||||
# + new_i2_logml - i2_logml;
|
||||
|
||||
|
||||
|
||||
# %------------------------------------------------------------------------------------
|
||||
|
||||
|
||||
# function muutokset = laskeMuutokset3(T2, inds2, rowsFromInd, ...
|
||||
# data, adjprior, priorTerm, i1)
|
||||
# % Palauttaa length(unique(T2))*npops taulun, jossa (i,j):s alkio
|
||||
# % kertoo, mik?olisi muutos logml:ss? jos populaation i1 osapopulaatio
|
||||
# % inds2(find(T2==i)) siirret<65><74>n koriin j.
|
||||
|
||||
# global COUNTS; global SUMCOUNTS;
|
||||
# global PARTITION; global POP_LOGML;
|
||||
# npops = size(COUNTS,3);
|
||||
# npops2 = length(unique(T2));
|
||||
# muutokset = zeros(npops2, npops);
|
||||
|
||||
# i1_logml = POP_LOGML(i1);
|
||||
|
||||
# for pop2 = 1:npops2
|
||||
# inds = inds2(find(T2==pop2));
|
||||
# ninds = length(inds);
|
||||
# if ninds>0
|
||||
# rows = computeRows(rowsFromInd, inds, ninds);
|
||||
# diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data);
|
||||
# diffInSumCounts = sum(diffInCounts);
|
||||
|
||||
# COUNTS(:,:,i1) = COUNTS(:,:,i1)-diffInCounts;
|
||||
# SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)-diffInSumCounts;
|
||||
# new_i1_logml = computePopulationLogml(i1, adjprior, priorTerm);
|
||||
# COUNTS(:,:,i1) = COUNTS(:,:,i1)+diffInCounts;
|
||||
# SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)+diffInSumCounts;
|
||||
|
||||
# i2 = [1:i1-1 , i1+1:npops];
|
||||
# i2_logml = POP_LOGML(i2)';
|
||||
|
||||
# COUNTS(:,:,i2) = COUNTS(:,:,i2)+repmat(diffInCounts, [1 1 npops-1]);
|
||||
# SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)+repmat(diffInSumCounts,[npops-1 1]);
|
||||
# new_i2_logml = computePopulationLogml(i2, adjprior, priorTerm)';
|
||||
# COUNTS(:,:,i2) = COUNTS(:,:,i2)-repmat(diffInCounts, [1 1 npops-1]);
|
||||
# SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)-repmat(diffInSumCounts,[npops-1 1]);
|
||||
|
||||
# muutokset(pop2,i2) = new_i1_logml - i1_logml ...
|
||||
# + new_i2_logml - i2_logml;
|
||||
# end
|
||||
# end
|
||||
|
||||
|
||||
# %------------------------------------------------------------------------------------
|
||||
|
||||
# function muutokset = laskeMuutokset5(inds, rowsFromInd, data, adjprior, ...
|
||||
# priorTerm, i1, i2)
|
||||
|
||||
# % Palauttaa length(inds)*1 taulun, jossa i:s alkio kertoo, mik?olisi
|
||||
# % muutos logml:ss? mik<69>li yksil?i vaihtaisi koria i1:n ja i2:n v<>lill?
|
||||
|
||||
# global COUNTS; global SUMCOUNTS;
|
||||
# global PARTITION; global POP_LOGML;
|
||||
|
||||
# ninds = length(inds);
|
||||
# muutokset = zeros(ninds,1);
|
||||
|
||||
# i1_logml = POP_LOGML(i1);
|
||||
# i2_logml = POP_LOGML(i2);
|
||||
|
||||
# for i = 1:ninds
|
||||
# ind = inds(i);
|
||||
# if PARTITION(ind)==i1
|
||||
# pop1 = i1; %mist?
|
||||
# pop2 = i2; %mihin
|
||||
# else
|
||||
# pop1 = i2;
|
||||
# pop2 = i1;
|
||||
# end
|
||||
# rows = (ind-1)*rowsFromInd+1 : ind*rowsFromInd;
|
||||
# diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data);
|
||||
# diffInSumCounts = sum(diffInCounts);
|
||||
|
||||
# COUNTS(:,:,pop1) = COUNTS(:,:,pop1)-diffInCounts;
|
||||
# SUMCOUNTS(pop1,:) = SUMCOUNTS(pop1,:)-diffInSumCounts;
|
||||
# COUNTS(:,:,pop2) = COUNTS(:,:,pop2)+diffInCounts;
|
||||
# SUMCOUNTS(pop2,:) = SUMCOUNTS(pop2,:)+diffInSumCounts;
|
||||
|
||||
# PARTITION(ind) = pop2;
|
||||
|
||||
# new_logmls = computePopulationLogml([i1 i2], adjprior, priorTerm);
|
||||
|
||||
# muutokset(i) = sum(new_logmls);
|
||||
|
||||
# COUNTS(:,:,pop1) = COUNTS(:,:,pop1)+diffInCounts;
|
||||
# SUMCOUNTS(pop1,:) = SUMCOUNTS(pop1,:)+diffInSumCounts;
|
||||
# COUNTS(:,:,pop2) = COUNTS(:,:,pop2)-diffInCounts;
|
||||
# SUMCOUNTS(pop2,:) = SUMCOUNTS(pop2,:)-diffInSumCounts;
|
||||
|
||||
# PARTITION(ind) = pop1;
|
||||
# end
|
||||
|
||||
# muutokset = muutokset - i1_logml - i2_logml;
|
||||
|
||||
# %--------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
# function diffInCounts = computeDiffInCounts(rows, max_noalle, nloci, data)
|
||||
# % Muodostaa max_noalle*nloci taulukon, jossa on niiden alleelien
|
||||
# % lukum<75><6D>r<EFBFBD>t (vastaavasti kuin COUNTS:issa), jotka ovat data:n
|
||||
# % riveill?rows.
|
||||
|
||||
# diffInCounts = zeros(max_noalle, nloci);
|
||||
# for i=rows
|
||||
# row = data(i,:);
|
||||
# notEmpty = find(row>=0);
|
||||
|
||||
# if length(notEmpty)>0
|
||||
# diffInCounts(row(notEmpty) + (notEmpty-1)*max_noalle) = ...
|
||||
# diffInCounts(row(notEmpty) + (notEmpty-1)*max_noalle) + 1;
|
||||
# end
|
||||
# end
|
||||
|
||||
|
||||
|
||||
# %------------------------------------------------------------------------------------
|
||||
|
||||
|
||||
# function popLogml = computePopulationLogml(pops, adjprior, priorTerm)
|
||||
# % Palauttaa length(pops)*1 taulukon, jossa on laskettu korikohtaiset
|
||||
# % logml:t koreille, jotka on m<><6D>ritelty pops-muuttujalla.
|
||||
|
||||
# global COUNTS;
|
||||
# global SUMCOUNTS;
|
||||
# x = size(COUNTS,1);
|
||||
# y = size(COUNTS,2);
|
||||
# z = length(pops);
|
||||
|
||||
# popLogml = ...
|
||||
# squeeze(sum(sum(reshape(...
|
||||
# gammaln(repmat(adjprior,[1 1 length(pops)]) + COUNTS(:,:,pops)) ...
|
||||
# ,[x y z]),1),2)) - sum(gammaln(1+SUMCOUNTS(pops,:)),2) - priorTerm;
|
||||
|
||||
|
||||
# %-----------------------------------------------------------------------------------
|
||||
|
||||
|
||||
# function npops = poistaTyhjatPopulaatiot(npops)
|
||||
# % Poistaa tyhjentyneet populaatiot COUNTS:ista ja
|
||||
# % SUMCOUNTS:ista. P<>ivitt<74><74> npops:in ja PARTITION:in.
|
||||
|
||||
# global COUNTS;
|
||||
# global SUMCOUNTS;
|
||||
# global PARTITION;
|
||||
|
||||
# notEmpty = find(any(SUMCOUNTS,2));
|
||||
# COUNTS = COUNTS(:,:,notEmpty);
|
||||
# SUMCOUNTS = SUMCOUNTS(notEmpty,:);
|
||||
|
||||
# for n=1:length(notEmpty)
|
||||
# apu = find(PARTITION==notEmpty(n));
|
||||
# PARTITION(apu)=n;
|
||||
# end
|
||||
# npops = length(notEmpty);
|
||||
|
||||
|
||||
# %----------------------------------------------------------------------------------
|
||||
# %Seuraavat kolme funktiota liittyvat alkupartition muodostamiseen.
|
||||
|
||||
# function initial_partition=admixture_initialization(data_matrix,nclusters,Z)
|
||||
# size_data=size(data_matrix);
|
||||
# nloci=size_data(2)-1;
|
||||
# n=max(data_matrix(:,end));
|
||||
# T=cluster_own(Z,nclusters);
|
||||
# initial_partition=zeros(size_data(1),1);
|
||||
# for i=1:n
|
||||
# kori=T(i);
|
||||
# here=find(data_matrix(:,end)==i);
|
||||
# for j=1:length(here)
|
||||
# initial_partition(here(j),1)=kori;
|
||||
# end
|
||||
# end
|
||||
|
||||
# function T = cluster_own(Z,nclust)
|
||||
# true=logical(1);
|
||||
# false=logical(0);
|
||||
# maxclust = nclust;
|
||||
# % Start of algorithm
|
||||
# m = size(Z,1)+1;
|
||||
# T = zeros(m,1);
|
||||
# % maximum number of clusters based on inconsistency
|
||||
# if m <= maxclust
|
||||
# T = (1:m)';
|
||||
# elseif maxclust==1
|
||||
# T = ones(m,1);
|
||||
# else
|
||||
# clsnum = 1;
|
||||
# for k = (m-maxclust+1):(m-1)
|
||||
# i = Z(k,1); % left tree
|
||||
# if i <= m % original node, no leafs
|
||||
# T(i) = clsnum;
|
||||
# clsnum = clsnum + 1;
|
||||
# elseif i < (2*m-maxclust+1) % created before cutoff, search down the tree
|
||||
# T = clusternum(Z, T, i-m, clsnum);
|
||||
# clsnum = clsnum + 1;
|
||||
# end
|
||||
# i = Z(k,2); % right tree
|
||||
# if i <= m % original node, no leafs
|
||||
# T(i) = clsnum;
|
||||
# clsnum = clsnum + 1;
|
||||
# elseif i < (2*m-maxclust+1) % created before cutoff, search down the tree
|
||||
# T = clusternum(Z, T, i-m, clsnum);
|
||||
# clsnum = clsnum + 1;
|
||||
# end
|
||||
# end
|
||||
# end
|
||||
|
||||
# function T = clusternum(X, T, k, c)
|
||||
# m = size(X,1)+1;
|
||||
# while(~isempty(k))
|
||||
# % Get the children of nodes at this level
|
||||
# children = X(k,1:2);
|
||||
# children = children(:);
|
||||
|
||||
# % Assign this node number to leaf children
|
||||
# t = (children<=m);
|
||||
# T(children(t)) = c;
|
||||
|
||||
# % Move to next level
|
||||
# k = children(~t) - m;
|
||||
# end
|
||||
|
||||
# %----------------------------------------------------------------------------------------
|
||||
|
||||
|
||||
# function [Z, distances]=getDistances(data_matrix,nclusters)
|
||||
|
||||
# %finds initial admixture clustering solution with nclusters clusters, uses simple mean Hamming distance
|
||||
# %gives partition in 8-bit format
|
||||
# %allocates all alleles of a single individual into the same basket
|
||||
# %data_matrix contains #Loci+1 columns, last column indicate whose alleles are placed in each row,
|
||||
# %i.e. ranges from 1 to #individuals. For diploids there are 2 rows per individual, for haploids only a single row
|
||||
# %missing values are indicated by zeros in the partition and by negative integers in the data_matrix.
|
||||
|
||||
# size_data=size(data_matrix);
|
||||
# nloci=size_data(2)-1;
|
||||
# n=max(data_matrix(:,end));
|
||||
# distances=zeros(nchoosek(n,2),1);
|
||||
# pointer=1;
|
||||
# for i=1:n-1
|
||||
# i_data=data_matrix(find(data_matrix(:,end)==i),1:nloci);
|
||||
# for j=i+1:n
|
||||
# d_ij=0;
|
||||
# j_data=data_matrix(find(data_matrix(:,end)==j),1:nloci);
|
||||
# vertailuja = 0;
|
||||
# for k=1:size(i_data,1)
|
||||
# for l=1:size(j_data,1)
|
||||
# here_i=find(i_data(k,:)>=0);
|
||||
# here_j=find(j_data(l,:)>=0);
|
||||
# here_joint=intersect(here_i,here_j);
|
||||
# vertailuja = vertailuja + length(here_joint);
|
||||
# d_ij = d_ij + length(find(i_data(k,here_joint)~=j_data(l,here_joint)));
|
||||
# end
|
||||
# end
|
||||
# d_ij = d_ij / vertailuja;
|
||||
# distances(pointer)=d_ij;
|
||||
# pointer=pointer+1;
|
||||
# end
|
||||
# end
|
||||
|
||||
# Z=linkage(distances');
|
||||
|
||||
|
||||
|
||||
# %----------------------------------------------------------------------------------------
|
||||
|
||||
# function logml=computeLogml(counts, sumcounts, noalle, data, rowsFromInd)
|
||||
# nloci = size(counts,2);
|
||||
# npops = size(counts,3);
|
||||
# adjnoalle = zeros(max(noalle),nloci);
|
||||
# for j=1:nloci
|
||||
# adjnoalle(1:noalle(j),j)=noalle(j);
|
||||
# if (noalle(j)<max(noalle))
|
||||
# adjnoalle(noalle(j)+1:end,j)=1;
|
||||
# end
|
||||
# end
|
||||
|
||||
# %logml2 = sum(sum(sum(gammaln(counts+repmat(adjprior,[1 1 npops]))))) ...
|
||||
# % - npops*sum(sum(gammaln(adjprior))) - ...
|
||||
# % sum(sum(gammaln(1+sumcounts)));
|
||||
# %logml = logml2;
|
||||
|
||||
# global GAMMA_LN;
|
||||
# rowsInG = size(data,1)+rowsFromInd;
|
||||
|
||||
# logml = sum(sum(sum(GAMMA_LN(counts+1 + repmat(rowsInG*(adjnoalle-1),[1 1 npops]))))) ...
|
||||
# - npops*sum(sum(GAMMA_LN(1, adjnoalle))) ...
|
||||
# -sum(sum(GAMMA_LN(sumcounts+1,1)));
|
||||
|
||||
|
||||
# %--------------------------------------------------------------------------
|
||||
|
||||
|
||||
# function initializeGammaln(ninds, rowsFromInd, maxAlleles)
|
||||
# %Alustaa GAMMALN muuttujan s.e. GAMMALN(i,j)=gammaln((i-1) + 1/j)
|
||||
# global GAMMA_LN;
|
||||
# GAMMA_LN = zeros((1+ninds)*rowsFromInd, maxAlleles);
|
||||
# for i=1:(ninds+1)*rowsFromInd
|
||||
# for j=1:maxAlleles
|
||||
# GAMMA_LN(i,j)=gammaln((i-1) + 1/j);
|
||||
# end
|
||||
# end
|
||||
|
||||
# %---------------------------------------------------------------
|
||||
|
||||
# function dist2 = laskeOsaDist(inds2, dist, ninds)
|
||||
# % Muodostaa dist vektorista osavektorin, joka sis<69>lt<6C><74> yksil<69>iden inds2
|
||||
# % v<>liset et<65>isyydet. ninds=kaikkien yksil<69>iden lukum<75><6D>r?
|
||||
|
||||
# ninds2 = length(inds2);
|
||||
# apu = zeros(nchoosek(ninds2,2),2);
|
||||
# rivi = 1;
|
||||
# for i=1:ninds2-1
|
||||
# for j=i+1:ninds2
|
||||
# apu(rivi, 1) = inds2(i);
|
||||
# apu(rivi, 2) = inds2(j);
|
||||
# rivi = rivi+1;
|
||||
# end
|
||||
# end
|
||||
# apu = (apu(:,1)-1).*ninds - apu(:,1) ./ 2 .* (apu(:,1)-1) + (apu(:,2)-apu(:,1));
|
||||
# dist2 = dist(apu);
|
||||
|
||||
# %--------------------------------------------------------------------------
|
||||
|
||||
# function [emptyPop, pops] = findEmptyPop(npops)
|
||||
# % Palauttaa ensimm<6D>isen tyhj<68>n populaation indeksin. Jos tyhji?
|
||||
# % populaatioita ei ole, palauttaa -1:n.
|
||||
|
||||
# global PARTITION;
|
||||
# pops = unique(PARTITION)';
|
||||
# if (length(pops) ==npops)
|
||||
# emptyPop = -1;
|
||||
# else
|
||||
# popDiff = diff([0 pops npops+1]);
|
||||
# emptyPop = min(find(popDiff > 1));
|
||||
# end
|
||||
|
|
@ -20,12 +20,11 @@ handleData <- function(raw_data) {
|
|||
# koodi pienimm?ksi koodiksi, joka isompi kuin mik??n k?yt?ss?oleva koodi.
|
||||
# T?m?n j?lkeen funktio muuttaa alleelikoodit siten, ett?yhden lokuksen j
|
||||
# koodit saavat arvoja v?lill?1,...,noalle(j).
|
||||
|
||||
data <- raw_data
|
||||
nloci <- size(raw_data, 2) - 1
|
||||
|
||||
dataApu <- data[, 1:nloci]
|
||||
nollat <- find(dataApu==0)
|
||||
nollat <- find(dataApu == 0)
|
||||
if (!isempty(nollat)) {
|
||||
isoinAlleeli <- max(max(dataApu))
|
||||
dataApu[nollat] <- isoinAlleeli + 1
|
||||
|
|
@ -39,9 +38,12 @@ handleData <- function(raw_data) {
|
|||
alleelitLokuksessa <- cell(nloci, 1)
|
||||
for (i in 1:nloci) {
|
||||
alleelitLokuksessaI <- unique(data[, i])
|
||||
alleelitLokuksessa[i, 1] <- alleelitLokuksessaI[
|
||||
find(alleelitLokuksessaI >= 0)
|
||||
]
|
||||
alleelitLokuksessaI_pos <- find(alleelitLokuksessaI >= 0)
|
||||
alleelitLokuksessa[i, 1] <- ifelse(
|
||||
test = length(alleelitLokuksessaI_pos) > 0,
|
||||
yes = alleelitLokuksessaI[alleelitLokuksessaI_pos],
|
||||
no = 0
|
||||
)
|
||||
noalle[i] <- length(alleelitLokuksessa[i, 1])
|
||||
}
|
||||
alleleCodes <- zeros(max(noalle), nloci)
|
||||
|
|
@ -65,10 +67,10 @@ handleData <- function(raw_data) {
|
|||
emptyRow <- repmat(a, c(1, ncols))
|
||||
lessThanMax <- find(rowsFromInd < maxRowsFromInd)
|
||||
missingRows <- maxRowsFromInd * nind - nrows
|
||||
data <- as.matrix(c(data, zeros(missingRows, ncols)))
|
||||
data <- rbind(data, zeros(missingRows, ncols))
|
||||
pointer <- 1
|
||||
for (ind in t(lessThanMax)) { #K?y l?pi ne yksil?t, joilta puuttuu rivej?
|
||||
miss = maxRowsFromInd-rowsFromInd(ind); # T?lt?yksil?lt?puuttuvien lkm.
|
||||
miss <- maxRowsFromInd - rowsFromInd(ind) # T?lt?yksil?lt?puuttuvien lkm.
|
||||
}
|
||||
data <- sortrows(data, ncols) # Sorttaa yksil?iden mukaisesti
|
||||
newData <- data
|
||||
|
|
@ -84,12 +86,12 @@ handleData <- function(raw_data) {
|
|||
priorTerm <- priorTerm + noalle[j] * lgamma(1 / noalle[j])
|
||||
}
|
||||
out <- list(
|
||||
newData = newData,
|
||||
newData = newData,
|
||||
rowsFromInd = rowsFromInd,
|
||||
alleleCodes = alleleCodes,
|
||||
noalle = noalle,
|
||||
adjprior = adjprior,
|
||||
priorTerm = priorTerm
|
||||
noalle = noalle,
|
||||
adjprior = adjprior,
|
||||
priorTerm = priorTerm
|
||||
)
|
||||
return(out)
|
||||
}
|
||||
567
R/indMix.R
Normal file
567
R/indMix.R
Normal file
|
|
@ -0,0 +1,567 @@
|
|||
indMix <- function(c, npops, dispText) {
|
||||
# Greedy search algorithm with unknown number of classes for regular
|
||||
# clustering.
|
||||
# Input npops is not used if called by greedyMix or greedyPopMix.
|
||||
|
||||
logml <- 1
|
||||
clearGlobalVars()
|
||||
|
||||
noalle <- c$noalle
|
||||
rows <- c$rows
|
||||
data <- c$data
|
||||
|
||||
adjprior <- c$adjprior
|
||||
priorTerm <- c$priorTerm
|
||||
rowsFromInd <- c$rowsFromInd
|
||||
|
||||
if (isfield(c, 'dist')) {
|
||||
dist <- c$dist
|
||||
Z <- c$Z
|
||||
}
|
||||
|
||||
rm(c)
|
||||
nargin <- length(as.list(match.call())) - 1
|
||||
|
||||
if (nargin < 2) {
|
||||
dispText <- 1
|
||||
npopstext <- matrix()
|
||||
ready <- FALSE
|
||||
teksti <- 'Input upper bound to the number of populations (possibly multiple values)'
|
||||
while (!ready) {
|
||||
npopstextExtra <- inputdlg(
|
||||
teksti,
|
||||
1,
|
||||
'20'
|
||||
)
|
||||
if (isempty(npopstextExtra)) { # Painettu Cancel:ia
|
||||
return()
|
||||
}
|
||||
npopstextExtra <- npopstextExtra[1]
|
||||
if (length(npopstextExtra)>=255) {
|
||||
npopstextExtra <- npopstextExtra[1:255]
|
||||
npopstext <- c(npopstext, ' ', npopstextExtra)
|
||||
teksti <- 'The input field length limit (255 characters) was reached. Input more values: '
|
||||
} else {
|
||||
npopstext <- c(npopstext, ' ', npopstextExtra)
|
||||
ready <- TRUE
|
||||
}
|
||||
}
|
||||
rm(ready, teksti)
|
||||
if (isempty(npopstext) | length(npopstext) == 1) {
|
||||
return()
|
||||
} else {
|
||||
npopsTaulu <- as.numeric(npopstext)
|
||||
ykkoset <- find(npopsTaulu == 1)
|
||||
npopsTaulu[ykkoset] <- list() # Mik<69>li ykk<6B>si<73> annettu yl<79>rajaksi, ne poistetaan.
|
||||
if (isempty(npopsTaulu)) {
|
||||
logml <- 1
|
||||
partitionSummary <- 1
|
||||
npops <- 1
|
||||
return()
|
||||
}
|
||||
rm(ykkoset)
|
||||
}
|
||||
} else {
|
||||
npopsTaulu <- npops
|
||||
}
|
||||
|
||||
nruns <- length(npopsTaulu)
|
||||
|
||||
initData <- data
|
||||
data <- data[,1:(ncol(data) - 1)]
|
||||
|
||||
logmlBest <- -1e50
|
||||
partitionSummary <- -1e50 * ones(30, 2) # Tiedot 30 parhaasta partitiosta (npops ja logml)
|
||||
partitionSummary[,1] <- zeros(30, 1)
|
||||
worstLogml <- -1e50
|
||||
worstIndex <- 1
|
||||
for (run in 1:nruns) {
|
||||
npops <- npopsTaulu[run]
|
||||
if (dispText) {
|
||||
dispLine()
|
||||
print(
|
||||
paste0(
|
||||
'Run ', as.character(run), '/', as.character(nruns),
|
||||
', maximum number of populations ', as.character(npops), '.'
|
||||
)
|
||||
)
|
||||
}
|
||||
ninds <- size(rows, 1)
|
||||
|
||||
initialPartition <- admixture_initialization(initData, npops, Z) # TODO: translate
|
||||
sumcounts_counts_logml = initialCounts(
|
||||
initialPartition, data, npops, rows, noalle, adjprior
|
||||
) # TODO: translate
|
||||
sumcounts <- sumcounts_counts_logml$sumcounts
|
||||
counts <- sumcounts_counts_logml$counts
|
||||
logml <- sumcounts_counts_logml$logml
|
||||
|
||||
PARTITION <- zeros(ninds, 1)
|
||||
for (i in 1:ninds) {
|
||||
apu <- rows[i]
|
||||
PARTITION[i] <- initialPartition(apu[1])
|
||||
}
|
||||
|
||||
COUNTS <- counts
|
||||
SUMCOUNTS <- sumcounts
|
||||
POP_LOGML <- computePopulationLogml(1:npops, adjprior, priorTerm) # TODO: translate
|
||||
LOGDIFF <- repmat(-Inf, c(ninds, npops))
|
||||
rm(initialPartition, counts, sumcounts)
|
||||
|
||||
# PARHAAN MIXTURE-PARTITION ETSIMINEN
|
||||
nRoundTypes <- 7
|
||||
kokeiltu <- zeros(nRoundTypes, 1)
|
||||
roundTypes <- c(1, 1) # Ykk<6B>svaiheen sykli kahteen kertaan.
|
||||
ready <- 0
|
||||
vaihe <- 1
|
||||
|
||||
if (dispText) {
|
||||
print(' ')
|
||||
print(
|
||||
paste0(
|
||||
'Mixture analysis started with initial',
|
||||
as.character(npops),
|
||||
'populations.'
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
while (ready != 1) {
|
||||
muutoksia <- 0
|
||||
|
||||
if (dispText) {
|
||||
print(paste('Performing steps:', as.character(roundTypes)))
|
||||
}
|
||||
|
||||
for (n in 1:length(roundTypes)) {
|
||||
|
||||
round <- roundTypes[n]
|
||||
kivaluku <- 0
|
||||
|
||||
if (kokeiltu(round) == 1) { #Askelta kokeiltu viime muutoksen j<>lkeen
|
||||
|
||||
} else if (round == 0 | round == 1) { #Yksil<69>n siirt<72>minen toiseen populaatioon.
|
||||
inds <- 1:ninds
|
||||
aputaulu <- c(t(inds), rand(ninds, 1))
|
||||
aputaulu <- sortrows(aputaulu, 2)
|
||||
inds <- t(aputaulu[, 1])
|
||||
muutosNyt <- 0
|
||||
|
||||
for (ind in inds) {
|
||||
i1 <- PARTITION[ind]
|
||||
muutokset_diffInCounts = laskeMuutokset(
|
||||
ind, rows, data, adjprior, priorTerm
|
||||
)
|
||||
muutokset <- muutokset_diffInCounts$muutokset
|
||||
diffInCounts <- muutokset_diffInCounts$diffInCounts
|
||||
|
||||
if (round == 1) {
|
||||
maxMuutos <- max_MATLAB(muutokset)[[1]]
|
||||
i2 <- max_MATLAB(muutokset)[[2]]
|
||||
}
|
||||
|
||||
if (i1 != i2 & maxMuutos > 1e-5) {
|
||||
# Tapahtui muutos
|
||||
muutoksia <- 1
|
||||
if (muutosNyt == 0) {
|
||||
muutosNyt <- 1
|
||||
if (dispText) {
|
||||
print('Action 1')
|
||||
}
|
||||
}
|
||||
kokeiltu <- zeros(nRoundTypes, 1)
|
||||
kivaluku <- kivaluku + 1
|
||||
updateGlobalVariables(
|
||||
ind, i2, diffInCounts, adjprior, priorTerm
|
||||
)
|
||||
logml <- logml+maxMuutos
|
||||
if (logml > worstLogml) {
|
||||
partitionSummary_added = addToSummary(
|
||||
logml, partitionSummary, worstIndex
|
||||
)
|
||||
partitionSummary_added <- partitionSummary_added$partitionSummary
|
||||
added <- partitionSummary_added$added
|
||||
if (added == 1) {
|
||||
worstLogml <- min_MATLAB(partitionSummary[, 2])[[1]]
|
||||
worstIndex <- min_MATLAB(partitionSummary[, 2])[[2]]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (muutosNyt == 0) {
|
||||
kokeiltu[round] <- 1
|
||||
}
|
||||
|
||||
} else if (round == 2) { # Populaation yhdist<73>minen toiseen.
|
||||
maxMuutos <- 0
|
||||
for (pop in 1:npops) {
|
||||
muutokset_diffInCounts <- laskeMuutokset2(
|
||||
pop, rows, data, adjprior, priorTerm
|
||||
)
|
||||
muutokset <- muutokset_diffInCounts$muutokset
|
||||
diffInCounts <- muutokset_diffInCounts$diffInCounts
|
||||
isoin <- max_MATLAB(muutokset)[[1]]
|
||||
indeksi <- max_MATLAB(muutokset)[[2]]
|
||||
if (isoin > maxMuutos) {
|
||||
maxMuutos <- isoin
|
||||
i1 <- pop
|
||||
i2 <- indeksi
|
||||
diffInCountsBest <- diffInCounts
|
||||
}
|
||||
}
|
||||
|
||||
if (maxMuutos > 1e-5) {
|
||||
muutoksia <- 1
|
||||
kokeiltu <- zeros(nRoundTypes, 1)
|
||||
updateGlobalVariables2(
|
||||
i1, i2, diffInCountsBest, adjprior, priorTerm
|
||||
)
|
||||
logml <- logml + maxMuutos
|
||||
if (dispText) {
|
||||
print('Action 2')
|
||||
}
|
||||
if (logml > worstLogml) {
|
||||
partitionSummary_added <- addToSummary(
|
||||
logml, partitionSummary, worstIndex
|
||||
)
|
||||
partitionSummary <- partitionSummary_added$partitionSummary
|
||||
added <- partitionSummary_added$added
|
||||
if (added==1) {
|
||||
worstLogml <- min_MATLAB(partitionSummary[, 2])[[1]]
|
||||
worstIndex <- min_MATLAB(partitionSummary[, 2])[[2]]
|
||||
}
|
||||
}
|
||||
} else {
|
||||
kokeiltu[round] <- 1
|
||||
}
|
||||
|
||||
|
||||
} else if (round == 3 || round == 4) { #Populaation jakaminen osiin.
|
||||
maxMuutos <- 0
|
||||
ninds <- size(rows, 1)
|
||||
for (pop in 1:npops) {
|
||||
inds2 <- find(PARTITION == pop)
|
||||
ninds2 <- length(inds2)
|
||||
if (ninds2 > 2) {
|
||||
dist2 <- laskeOsaDist(inds2, dist, ninds)
|
||||
Z2 <- linkage(t(dist2))
|
||||
if (round == 3) {
|
||||
npops2 <- max(min(20, floor(ninds2 / 5)), 2)
|
||||
} else if (round == 4) {
|
||||
npops2 <- 2 # Moneenko osaan jaetaan
|
||||
}
|
||||
T2 <- cluster_own(Z2, npops2)
|
||||
muutokset <- laskeMuutokset3(
|
||||
T2, inds2, rows, data, adjprior, priorTerm, pop
|
||||
)
|
||||
isoin <- max_MATLAB(muutokset)[[1]]
|
||||
indeksi <- max_MATLAB(muutokset)[[2]]
|
||||
if (isoin > maxMuutos) {
|
||||
maxMuutos <- isoin
|
||||
muuttuvaPop2 <- indeksi %% npops2
|
||||
if (muuttuvaPop2==0) muuttuvaPop2 <- npops2
|
||||
muuttuvat <- inds2[find(T2 == muuttuvaPop2)]
|
||||
i2 <- ceiling(indeksi / npops2)
|
||||
}
|
||||
}
|
||||
}
|
||||
if (maxMuutos > 1e-5) {
|
||||
muutoksia <- 1
|
||||
kokeiltu <- zeros(nRoundTypes, 1)
|
||||
rivit <- list()
|
||||
for (i in 1:length(muuttuvat)) {
|
||||
ind <- muuttuvat[i]
|
||||
lisa <- rows[ind, 1]:rows[ind, 2]
|
||||
rivit <- rbind(rivit, t(lisa))
|
||||
}
|
||||
diffInCounts <- computeDiffInCounts(
|
||||
t(rivit), size(COUNTS, 1), size(COUNTS, 2), data
|
||||
)
|
||||
i1 <- PARTITION(muuttuvat[1])
|
||||
updateGlobalVariables3(
|
||||
muuttuvat, diffInCounts, adjprior, priorTerm, i2
|
||||
)
|
||||
logml <- logml + maxMuutos
|
||||
if (dispText) {
|
||||
if (round == 3) {
|
||||
print('Action 3')
|
||||
} else {
|
||||
print('Action 4')
|
||||
}
|
||||
}
|
||||
if (logml > worstLogml) {
|
||||
partitionSummary_added <- addToSummary(
|
||||
logml, partitionSummary, worstIndex
|
||||
)
|
||||
partitionSummary <- partitionSummary_added$partitionSummary
|
||||
added <- partitionSummary_added$added
|
||||
if (added==1) {
|
||||
worstLogml <- min_MATLAB(partitionSummary[, 2])[[1]]
|
||||
worstIndex <- min_MATLAB(partitionSummary[, 2])[[2]]
|
||||
}
|
||||
}
|
||||
|
||||
} else {
|
||||
kokeiltu[round] <- 1
|
||||
}
|
||||
} else if (round == 5 || round == 6) {
|
||||
j <- 0
|
||||
muutettu <- 0
|
||||
poplogml <- POP_LOGML
|
||||
partition <- PARTITION
|
||||
counts <- COUNTS
|
||||
sumcounts <- SUMCOUNTS
|
||||
logdiff <- LOGDIFF
|
||||
|
||||
pops <- sample(npops)
|
||||
while (j < npops & muutettu == 0) {
|
||||
j <- j + 1
|
||||
pop <- pops[j]
|
||||
totalMuutos <- 0
|
||||
inds <- find(PARTITION == pop)
|
||||
if (round == 5) {
|
||||
aputaulu <- c(inds, rand(length(inds), 1))
|
||||
aputaulu <- sortrows(aputaulu, 2)
|
||||
inds <- t(aputaulu[, 1])
|
||||
} else if (round == 6) {
|
||||
inds <- returnInOrder(
|
||||
inds, pop, rows, data, adjprior, priorTerm
|
||||
)
|
||||
}
|
||||
|
||||
i <- 0
|
||||
|
||||
while (length(inds) > 0 & i < length(inds)) {
|
||||
i <- i + 1
|
||||
ind <- inds[i]
|
||||
|
||||
muutokset_diffInCounts <- laskeMuutokset(
|
||||
ind, rows, data, adjprior, priorTerm
|
||||
)
|
||||
muutokset <- muutokset_diffInCounts$muutokset
|
||||
diffInCounts <- muutokset_diffInCounts$diffInCounts
|
||||
|
||||
muutokset[pop] <- -1e50 # Varmasti ei suurin!!!
|
||||
maxMuutos <- max_MATLAB(muutokset)[[1]]
|
||||
i2 <- max_MATLAB(muutokset)[[2]]
|
||||
updateGlobalVariables(
|
||||
ind, i2, diffInCounts, adjprior, priorTerm
|
||||
)
|
||||
|
||||
totalMuutos <- totalMuutos+maxMuutos
|
||||
logml <- logml + maxMuutos
|
||||
if (round == 6) {
|
||||
# Lopetetaan heti kun muutos on positiivinen.
|
||||
if (totalMuutos > 1e-5) {
|
||||
i <- length(inds)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (totalMuutos > 1e-5) {
|
||||
kokeiltu <- zeros(nRoundTypes, 1)
|
||||
muutettu <- 1
|
||||
if (muutoksia == 0) {
|
||||
muutoksia <- 1 # Ulompi kirjanpito.
|
||||
if (dispText) {
|
||||
if (round == 5) {
|
||||
print('Action 5')
|
||||
} else {
|
||||
print('Action 6')
|
||||
}
|
||||
}
|
||||
}
|
||||
if (logml > worstLogml) {
|
||||
partitionSummary_added <- addToSummary(
|
||||
logml, partitionSummary, worstIndex
|
||||
)
|
||||
partitionSummary <- partitionSummary_added$partitionSummary
|
||||
added <- partitionSummary_added$added
|
||||
if (added==1) {
|
||||
worstLogml <- min_MATLAB(partitionSummary[, 2])[[1]]
|
||||
worstIndex <- min_MATLAB(partitionSummary[, 2])[[2]]
|
||||
}
|
||||
}
|
||||
} else {
|
||||
# Miss<73><73>n vaiheessa tila ei parantunut.
|
||||
# Perutaan kaikki muutokset.
|
||||
PARTITION <- partition
|
||||
SUMCOUNTS <- sumcounts
|
||||
POP_LOGML <- poplogml
|
||||
COUNTS <- counts
|
||||
logml <- logml - totalMuutos
|
||||
LOGDIFF <- logdiff
|
||||
kokeiltu[round] <- 1
|
||||
}
|
||||
}
|
||||
rm(partition, sumcounts, counts, poplogml)
|
||||
|
||||
} else if (round == 7) {
|
||||
emptyPop <- findEmptyPop(npops)
|
||||
j <- 0
|
||||
pops <- sample(npops)
|
||||
muutoksiaNyt <- 0
|
||||
if (emptyPop == -1) {
|
||||
j <- npops
|
||||
}
|
||||
while (j < npops) {
|
||||
j <- j + 1
|
||||
pop <- pops[j]
|
||||
inds2 <- find(PARTITION == pop)
|
||||
ninds2 <- length(inds2)
|
||||
if (ninds2 > 5) {
|
||||
partition <- PARTITION
|
||||
sumcounts <- SUMCOUNTS
|
||||
counts <- COUNTS
|
||||
poplogml <- POP_LOGML
|
||||
logdiff <- LOGDIFF
|
||||
|
||||
dist2 <- laskeOsaDist(inds2, dist, ninds);
|
||||
Z2 <- linkage(t(dist2))
|
||||
T2 <- cluster_own(Z2, 2)
|
||||
muuttuvat <- inds2[find(T2 == 1)]
|
||||
|
||||
muutokset <- laskeMuutokset3(
|
||||
T2, inds2, rows, data, adjprior, priorTerm, pop
|
||||
)
|
||||
totalMuutos <- muutokset(1, emptyPop)
|
||||
|
||||
rivit <- list()
|
||||
for (i in 1:length(muuttuvat)) {
|
||||
ind <- muuttuvat[i]
|
||||
lisa <- rows[ind, 1]:rows[ind, 2]
|
||||
rivit <- c(rivit, lisa)
|
||||
}
|
||||
diffInCounts <- computeDiffInCounts(
|
||||
rivit, size(COUNTS, 1), size(COUNTS, 2), data
|
||||
)
|
||||
|
||||
updateGlobalVariables3(
|
||||
muuttuvat, diffInCounts, adjprior, priorTerm,
|
||||
emptyPop
|
||||
)
|
||||
|
||||
muutettu <- 1
|
||||
while (muutettu == 1) {
|
||||
muutettu <- 0
|
||||
# Siirret<65><74>n yksil<69>it<69> populaatioiden v<>lill<6C>
|
||||
muutokset <- laskeMuutokset5(
|
||||
inds2, rows, data, adjprior, priorTerm,
|
||||
pop, emptyPop
|
||||
)
|
||||
|
||||
maxMuutos <- indeksi <- max_MATLAB(muutokset)
|
||||
|
||||
muuttuva <- inds2(indeksi)
|
||||
if (PARTITION(muuttuva) == pop) {
|
||||
i2 <- emptyPop
|
||||
} else {
|
||||
i2 <- pop
|
||||
}
|
||||
|
||||
if (maxMuutos > 1e-5) {
|
||||
rivit <- rows[muuttuva, 1]:rows[muuttuva, 2]
|
||||
diffInCounts <- computeDiffInCounts(
|
||||
rivit, size(COUNTS, 1), size(COUNTS, 2),
|
||||
data
|
||||
)
|
||||
updateGlobalVariables3(
|
||||
muuttuva,diffInCounts, adjprior,
|
||||
priorTerm, i2
|
||||
)
|
||||
muutettu <- 1
|
||||
totalMuutos <- totalMuutos + maxMuutos
|
||||
}
|
||||
}
|
||||
if (totalMuutos > 1e-5) {
|
||||
muutoksia <- 1
|
||||
logml <- logml + totalMuutos
|
||||
if (logml > worstLogml) {
|
||||
partitionSummary_added = addToSummary(
|
||||
logml, partitionSummary, worstIndex
|
||||
)
|
||||
partitionSummary_added <- partitionSummary_added$partitionSummary
|
||||
added <- partitionSummary_added$added
|
||||
if (added == 1) {
|
||||
worstLogml <- min_MATLAB(partitionSummary[, 2])[[1]]
|
||||
worstIndex <- min_MATLAB(partitionSummary[, 2])[[2]]
|
||||
}
|
||||
}
|
||||
if (muutoksiaNyt == 0) {
|
||||
if (dispText) {
|
||||
print('Action 7')
|
||||
}
|
||||
muutoksiaNyt <- 1
|
||||
}
|
||||
kokeiltu <- zeros(nRoundTypes, 1)
|
||||
j <- npops
|
||||
} else {
|
||||
# palutetaan vanhat arvot
|
||||
PARTITION <- partition
|
||||
SUMCOUNTS <- sumcounts
|
||||
COUNTS <- counts
|
||||
POP_LOGML <- poplogml
|
||||
LOGDIFF <- logdiff
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if (muutoksiaNyt == 0) {
|
||||
kokeiltu[round] <- 1
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
if (muutoksia == 0) {
|
||||
if (vaihe <= 4) {
|
||||
vaihe <= vaihe + 1
|
||||
} else if (vaihe == 5) {
|
||||
ready <- 1
|
||||
}
|
||||
} else {
|
||||
muutoksia <- 0
|
||||
}
|
||||
|
||||
if (ready == 0) {
|
||||
if (vaihe == 1) {
|
||||
roundTypes <- c(1)
|
||||
} else if (vaihe == 2) {
|
||||
roundTypes <- c(2, 1)
|
||||
} else if (vaihe == 3) {
|
||||
roundTypes <- c(5, 5, 7)
|
||||
} else if (vaihe == 4) {
|
||||
roundTypes = c(4, 3, 1)
|
||||
} else if (vaihe == 5) {
|
||||
roundTypes <- c(6, 7, 2, 3, 4, 1)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# TALLENNETAAN
|
||||
|
||||
npops <- poistaTyhjatPopulaatiot(npops)
|
||||
POP_LOGML <- computePopulationLogml(1:npops, adjprior, priorTerm)
|
||||
if (dispText) {
|
||||
print(paste('Found partition with', as.character(npops), 'populations.'))
|
||||
print(paste('Log(ml) =', as.character(logml)))
|
||||
print(' ')
|
||||
}
|
||||
|
||||
if (logml > logmlBest) {
|
||||
# P<>ivitet<65><74>n parasta l<>ydetty<74> partitiota.
|
||||
logmlBest <- logml
|
||||
npopsBest <- npops
|
||||
partitionBest <- PARTITION
|
||||
countsBest <- COUNTS
|
||||
sumCountsBest <- SUMCOUNTS
|
||||
pop_logmlBest <- POP_LOGML
|
||||
logdiffbest <- LOGDIFF
|
||||
}
|
||||
}
|
||||
return(
|
||||
list(logml = logml, npops = npops, partitionSummary = partitionSummary)
|
||||
)
|
||||
}
|
||||
20
R/initialPopCounts.R
Normal file
20
R/initialPopCounts.R
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
initialPopCounts <- function(data, npops, rows, noalle, adjprior) {
|
||||
nloci <- size(data, 2)
|
||||
counts <- zeros(max(noalle), nloci, npops)
|
||||
sumcounts <- zeros(npops, nloci)
|
||||
|
||||
for (i in 1:npops) {
|
||||
for (j in 1:nloci) {
|
||||
i_rivit <- rows(i, 1):rows(i, 2)
|
||||
havainnotLokuksessa <- find(data[i_rivit, j] >= 0)
|
||||
sumcounts[i, j] <- length(havainnotLokuksessa)
|
||||
for (k in 1:noalle[j]) {
|
||||
alleleCode <- k
|
||||
N_ijk <- length(find(data[i_rivit, j] == alleleCode))
|
||||
counts[k, j, i] <- N_ijk
|
||||
}
|
||||
}
|
||||
}
|
||||
logml <- laskeLoggis(counts, sumcounts, adjprior)
|
||||
return(sumcounts = sumcounts, counts = counts, logml = logml)
|
||||
}
|
||||
9
R/initializeGammaln.R
Normal file
9
R/initializeGammaln.R
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
initializeGammaln <- function(ninds, rowsFromInd, maxAlleles) {
|
||||
#Alustaa GAMMALN muuttujan s.e. GAMMALN(i, j)=gammaln((i - 1) + 1/j)
|
||||
GAMMA_LN <- zeros((1 + ninds) * rowsFromInd, maxAlleles)
|
||||
for (i in 1:(ninds + 1) * rowsFromInd) {
|
||||
for (j in 1:maxAlleles) {
|
||||
GAMMA_LN[i, j] <- log_gamma((i - 1) + 1/j)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,7 +1,7 @@
|
|||
laskeLoggis <- function(counts, sumcounts, adjprior) {
|
||||
npops <- size(counts, 3)
|
||||
|
||||
sum1 <- sum(sum(sum(gammaln(counts + repmat(adjprior, c(1, 1, npops))))))
|
||||
sum1 <- sum(sum(sum(lgamma(counts + repmat(adjprior, c(1, 1, npops))))))
|
||||
sum3 <- sum(sum(lgamma(adjprior))) - sum(sum(lgamma(1 + sumcounts)))
|
||||
logml2 <- sum1 - npops * sum3
|
||||
loggis <- logml2
|
||||
|
|
|
|||
226
R/laskeMuutokset12345.R
Normal file
226
R/laskeMuutokset12345.R
Normal file
|
|
@ -0,0 +1,226 @@
|
|||
#' @title Calculate changes (?)
|
||||
#' @description Palauttaa npops*npops taulun, jonka alkio (i,j) kertoo, mik?on
|
||||
#' muutos logml:ss? mikäli populaatiosta i siirretään osuuden verran
|
||||
#' todennäköisyysmassaa populaatioon j. Mikäli populaatiossa i ei ole mitään
|
||||
#' siirrettävää, on vastaavassa kohdassa rivi nollia.
|
||||
#' @param osuus Percentages?
|
||||
#' @param omaFreqs own Freqs?
|
||||
#' @param osuusTaulu Percentage table?
|
||||
#' @param logml log maximum likelihood
|
||||
#' @param COUNTS COUNTS
|
||||
#' @export
|
||||
laskeMuutokset4 <- function (
|
||||
osuus, osuusTaulu, omaFreqs, logml, COUNTS = matrix(0)
|
||||
) {
|
||||
npops <- ifelse(is.na(dim(COUNTS)[3]), 1, dim(COUNTS)[3])
|
||||
notEmpty <- which(osuusTaulu > 0.005)
|
||||
muutokset <- zeros(npops)
|
||||
empties <- !notEmpty
|
||||
|
||||
for (i1 in notEmpty) {
|
||||
osuusTaulu[i1] <- osuusTaulu[i1] - osuus
|
||||
for (i2 in c(colon(1, i1 - 1), colon(i1 + 1, npops))) {
|
||||
osuusTaulu[i2] <- osuusTaulu[i2] + osuus
|
||||
loggis <- computeIndLogml(omaFreqs, osuusTaulu)
|
||||
|
||||
# Work around Matlab OOB bug
|
||||
if (i1 > nrow(muutokset)) {
|
||||
muutokset <- rbind(muutokset, muutokset * 0)
|
||||
}
|
||||
if (i2 > ncol(muutokset)) {
|
||||
muutokset <- cbind(muutokset, muutokset * 0)
|
||||
}
|
||||
|
||||
muutokset[i1, i2] <- loggis - logml
|
||||
osuusTaulu[i2] <- osuusTaulu[i2] - osuus
|
||||
}
|
||||
osuusTaulu[i1] <- osuusTaulu[i1] + osuus
|
||||
}
|
||||
return (muutokset)
|
||||
}
|
||||
|
||||
# Palauttaa npops*1 taulun, jossa i:s alkio kertoo, mik<69> olisi
|
||||
# muutos logml:ss<73>, mik<69>li yksil<69> ind siirret<65><74>n koriin i.
|
||||
# diffInCounts on poistettava COUNTS:in siivusta i1 ja lis<69>tt<74>v<EFBFBD>
|
||||
# COUNTS:in siivuun i2, mik<69>li muutos toteutetaan.
|
||||
#
|
||||
# Lis<69>ys 25.9.2007:
|
||||
# Otettu k<>ytt<74><74>n globaali muuttuja LOGDIFF, johon on tallennettu muutokset
|
||||
# logml:ss<73> siirrett<74>ess<73> yksil<69>it<69> toisiin populaatioihin.
|
||||
laskeMuutokset <- function(ind, globalRows, data, adjprior, priorTerm) {
|
||||
npops <- size(COUNTS, 3)
|
||||
muutokset <- LOGDIFF[ind, ]
|
||||
|
||||
i1 <- PARTITION[ind]
|
||||
i1_logml <- POP_LOGML[i1]
|
||||
muutokset[i1] <- 0
|
||||
|
||||
rows <- globalRows[ind, 1]:globalRows[ind, 2]
|
||||
diffInCounts <- computeDiffInCounts(
|
||||
rows, size(COUNTS, 1), size(COUNTS, 2), data
|
||||
)
|
||||
diffInSumCounts <- sum(diffInCounts)
|
||||
|
||||
COUNTS[, , i1] <- COUNTS[, , i1] - diffInCounts
|
||||
SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] - diffInSumCounts
|
||||
new_i1_logml <- computePopulationLogml(i1, adjprior, priorTerm)
|
||||
COUNTS[, , i1] <- COUNTS[, , i1] + diffInCounts
|
||||
SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] + diffInSumCounts
|
||||
|
||||
i2 <- find(muutokset == -Inf) # Etsit<69><74>n populaatiot jotka muuttuneet viime kerran j<>lkeen.
|
||||
i2 <- setdiff(i2, i1)
|
||||
i2_logml <- POP_LOGML[i2]
|
||||
|
||||
ni2 <- length(i2)
|
||||
|
||||
COUNTS[, , i2] <- COUNTS[, , i2] + repmat(diffInCounts, c(1, 1, ni2))
|
||||
SUMCOUNTS[i2, ] <- SUMCOUNTS[i2, ] + repmat(diffInSumCounts, c(ni2, 1))
|
||||
new_i2_logml <- computePopulationLogml(i2, adjprior, priorTerm)
|
||||
COUNTS[, , i2] <- COUNTS[, , i2] - repmat(diffInCounts, c(1, 1, ni2))
|
||||
SUMCOUNTS[i2, ] <- SUMCOUNTS[i2, ] - repmat(diffInSumCounts, c(ni2, 1))
|
||||
|
||||
muutokset[i2] <- new_i1_logml - i1_logml + new_i2_logml - i2_logml
|
||||
LOGDIFF[ind, ] = muutokset
|
||||
return(list(muutokset = muutokset, diffInCounts = diffInCounts))
|
||||
}
|
||||
|
||||
laskeMuutokset2 <- function(i1, globalRows, data, adjprior, priorTerm) {
|
||||
# % Palauttaa npops*1 taulun, jossa i:s alkio kertoo, mik<69> olisi
|
||||
# % muutos logml:ss<73>, mik<69>li korin i1 kaikki yksil<69>t siirret<65><74>n
|
||||
# % koriin i.
|
||||
|
||||
npops <- size(COUNTS, 3)
|
||||
muutokset <- zeros(npops, 1)
|
||||
|
||||
i1_logml <- POP_LOGML[i1]
|
||||
|
||||
inds <- find(PARTITION == i1)
|
||||
ninds <- length(inds)
|
||||
|
||||
if (ninds == 0) {
|
||||
diffInCounts <- zeros(size(COUNTS, 1), size(COUNTS, 2))
|
||||
return()
|
||||
}
|
||||
|
||||
rows = list()
|
||||
for (i in 1:ninds) {
|
||||
ind <- inds(i)
|
||||
lisa <- globalRows(ind, 1):globalRows(ind, 2)
|
||||
rows <- c(rows, t(lisa))
|
||||
}
|
||||
|
||||
diffInCounts <- computeDiffInCounts(
|
||||
t(rows), size(COUNTS, 1), size(COUNTS, 2), data
|
||||
)
|
||||
diffInSumCounts <- sum(diffInCounts)
|
||||
|
||||
COUNTS[, , i1] <- COUNTS[, , i1] - diffInCounts
|
||||
SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] - diffInSumCounts
|
||||
new_i1_logml <- computePopulationLogml(i1, adjprior, priorTerm)
|
||||
COUNTS[, , i1] <- COUNTS[, , i1] + diffInCounts
|
||||
SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] + diffInSumCounts
|
||||
|
||||
i2 <- c(1:i1-1, i1+1:npops)
|
||||
i2_logml <- POP_LOGML[i2]
|
||||
|
||||
COUNTS[, , i2] <- COUNTS[, , i2] + repmat(diffInCounts, c(1, 1, npops - 1))
|
||||
SUMCOUNTS[i2, ] <- SUMCOUNTS[i2, ] + repmat(diffInSumCounts, c(npops - 1, 1))
|
||||
new_i2_logml <- computePopulationLogml(i2, adjprior, priorTerm)
|
||||
COUNTS[, , i2] <- COUNTS[, , i2] - repmat(diffInCounts, c(1, 1, npops - 1))
|
||||
SUMCOUNTS[i2, ] <- SUMCOUNTS[i2, ] - repmat(diffInSumCounts, c(npops - 1, 1))
|
||||
|
||||
muutokset[i2] <- new_i1_logml - i1_logml + new_i2_logml - i2_logml
|
||||
return(list(muutokset = muutokset, diffInCounts = diffInCounts))
|
||||
}
|
||||
|
||||
|
||||
laskeMuutokset3 <- function(T2, inds2, globalRows, data, adjprior, priorTerm, i1) {
|
||||
# Palauttaa length(unique(T2))*npops taulun, jossa (i,j):s alkio
|
||||
# kertoo, mik<69> olisi muutos logml:ss<73>, jos populaation i1 osapopulaatio
|
||||
# inds2(find(T2==i)) siirret<65><74>n koriin j.
|
||||
|
||||
npops <- size(COUNTS, 3)
|
||||
npops2 <- length(unique(T2))
|
||||
muutokset <- zeros(npops2, npops)
|
||||
|
||||
i1_logml = POP_LOGML[i1]
|
||||
for (pop2 in 1:npops2) {
|
||||
inds <- inds2[find(T2==pop2)]
|
||||
ninds <- length(inds);
|
||||
if (ninds > 0) {
|
||||
rows <- list()
|
||||
for (i in 1:ninds) {
|
||||
ind <- inds[i]
|
||||
lisa <- globalRows[ind, 1]:globalRows[ind, 2]
|
||||
rows <- c(rows, t(lisa))
|
||||
}
|
||||
diffInCounts <- computeDiffInCounts(
|
||||
t(rows), size(COUNTS, 1), size(COUNTS, 2), data
|
||||
)
|
||||
diffInSumCounts <- sum(diffInCounts)
|
||||
|
||||
COUNTS[, , i1] <- COUNTS[, , i1] - diffInCounts
|
||||
SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] - diffInSumCounts
|
||||
new_i1_logml <- computePopulationLogml(i1, adjprior, priorTerm)
|
||||
COUNTS[, , i1] <- COUNTS[, , i1] + diffInCounts
|
||||
SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] + diffInSumCounts
|
||||
|
||||
i2 <- c(1:i1-1, i1+1:npops)
|
||||
i2_logml <- t(POP_LOGML[i2])
|
||||
|
||||
COUNTS[, , i2] <- COUNTS[, , i2] + repmat(diffInCounts, c(1, 1, npops - 1))
|
||||
SUMCOUNTS[i2, ] <- SUMCOUNTS[i2, ] + repmat(diffInSumCounts, c(npops - 1, 1))
|
||||
new_i2_logml <- t(computePopulationLogml(i2, adjprior, priorTerm))
|
||||
COUNTS[, , i2] <- COUNTS[, , i2] - repmat(diffInCounts, c(1, 1, npops - 1))
|
||||
SUMCOUNTS[i2, ] <- SUMCOUNTS[i2, ] - repmat(diffInSumCounts, c(npops - 1, 1))
|
||||
|
||||
muutokset[pop2, i2] <- new_i1_logml - i1_logml + new_i2_logml - i2_logml
|
||||
}
|
||||
}
|
||||
return(muutokset)
|
||||
}
|
||||
|
||||
laskeMuutokset5 <- function(inds, globalRows, data, adjprior, priorTerm, i1, i2) {
|
||||
# Palauttaa length(inds)*1 taulun, jossa i:s alkio kertoo, mik<69> olisi
|
||||
# muutos logml:ss<73>, mik<69>li yksil<69> i vaihtaisi koria i1:n ja i2:n v<>lill<6C>.
|
||||
|
||||
ninds <- length(inds)
|
||||
muutokset <- zeros(ninds, 1)
|
||||
|
||||
i1_logml <- POP_LOGML[i1]
|
||||
i2_logml <- POP_LOGML[i2]
|
||||
|
||||
for (i in 1:ninds) {
|
||||
ind <- inds[i]
|
||||
if (PARTITION[ind] == i1) {
|
||||
pop1 <- i1 #mist<73>
|
||||
pop2 <- i2 #mihin
|
||||
} else {
|
||||
pop1 <- i2
|
||||
pop2 <- i1
|
||||
}
|
||||
rows <- globalRows[ind, 1]:globalRows[ind, 2]
|
||||
diffInCounts <- computeDiffInCounts(
|
||||
rows, size(COUNTS, 1), size(COUNTS, 2), data
|
||||
)
|
||||
diffInSumCounts <- sum(diffInCounts)
|
||||
|
||||
|
||||
|
||||
COUNTS[, , pop1] <- COUNTS[, , pop1] - diffInCounts
|
||||
SUMCOUNTS[pop1, ] <- SUMCOUNTS[pop1, ] - diffInSumCounts
|
||||
COUNTS[, , pop2] <- COUNTS[, , pop2] + diffInCounts
|
||||
SUMCOUNTS[pop2, ] <- SUMCOUNTS[pop2, ] + diffInSumCounts
|
||||
|
||||
new_logmls <- computePopulationLogml(c(i1, i2), adjprior, priorTerm)
|
||||
muutokset[i] <- sum(new_logmls)
|
||||
|
||||
COUNTS[, , pop1] <- COUNTS[, , pop1] + diffInCounts
|
||||
SUMCOUNTS[pop1, ] <- SUMCOUNTS[pop1, ] + diffInSumCounts
|
||||
COUNTS[, , pop2] <- COUNTS[, , pop2] - diffInCounts
|
||||
SUMCOUNTS[pop2, ] <- SUMCOUNTS[pop2, ] - diffInSumCounts
|
||||
}
|
||||
|
||||
muutokset <- muutokset - i1_logml - i2_logml
|
||||
return(muutokset)
|
||||
}
|
||||
|
|
@ -1,39 +0,0 @@
|
|||
#' @title Calculate changes?
|
||||
#' @description Palauttaa npops*npops taulun, jonka alkio (i,j) kertoo, mik?on
|
||||
#' muutos logml:ss? mikäli populaatiosta i siirretään osuuden verran
|
||||
#' todennäköisyysmassaa populaatioon j. Mikäli populaatiossa i ei ole mitään
|
||||
#' siirrettävää, on vastaavassa kohdassa rivi nollia.
|
||||
#' @param osuus Percentages?
|
||||
#' @param omaFreqs own Freqs?
|
||||
#' @param osuusTaulu Percentage table?
|
||||
#' @param logml log maximum likelihood
|
||||
#' @param COUNTS COUNTS
|
||||
#' @export
|
||||
laskeMuutokset4 <- function (osuus, osuusTaulu, omaFreqs, logml,
|
||||
COUNTS = matrix(0)) {
|
||||
npops <- ifelse(is.na(dim(COUNTS)[3]), 1, dim(COUNTS)[3])
|
||||
notEmpty <- which(osuusTaulu > 0.005)
|
||||
muutokset <- zeros(npops)
|
||||
empties <- !notEmpty
|
||||
|
||||
for (i1 in notEmpty) {
|
||||
osuusTaulu[i1] <- osuusTaulu[i1] - osuus
|
||||
for (i2 in c(colon(1, i1 - 1), colon(i1 + 1, npops))) {
|
||||
osuusTaulu[i2] <- osuusTaulu[i2] + osuus
|
||||
loggis <- computeIndLogml(omaFreqs, osuusTaulu)
|
||||
|
||||
# Work around Matlab OOB bug
|
||||
if (i1 > nrow(muutokset)) {
|
||||
muutokset <- rbind(muutokset, muutokset * 0)
|
||||
}
|
||||
if (i2 > ncol(muutokset)) {
|
||||
muutokset <- cbind(muutokset, muutokset * 0)
|
||||
}
|
||||
|
||||
muutokset[i1, i2] <- loggis - logml
|
||||
osuusTaulu[i2] <- osuusTaulu[i2] - osuus
|
||||
}
|
||||
osuusTaulu[i1] <- osuusTaulu[i1] + osuus
|
||||
}
|
||||
return (muutokset)
|
||||
}
|
||||
25
R/laskeOsaDist.R
Normal file
25
R/laskeOsaDist.R
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
#' @title Lower part of the dist
|
||||
#' @description Constructs from the dist vector a subvector containing the individual inds2, Forms dist sub-vectors the vector, which includes yksiliden inds2
|
||||
#' @param inds2 inds2
|
||||
#' @param dist dist
|
||||
#' @param ninds ninds
|
||||
#' @author Waldir Leoncio
|
||||
laskeOsaDist <- function(inds2, dist, ninds) {
|
||||
# % Muodostaa dist vektorista osavektorin, joka sis<69>lt<6C><74> yksil<69>iden inds2
|
||||
# % v<>liset et<65>isyydet. ninds=kaikkien yksil<69>iden lukum<75><6D>r<EFBFBD>.
|
||||
|
||||
ninds2 <- length(inds2)
|
||||
apu <- zeros(choose(ninds2, 2), 2)
|
||||
rivi <- 1
|
||||
for (i in 1:ninds2-1) {
|
||||
for (j in i+1:ninds2) {
|
||||
apu[rivi, 1] <- inds2[i]
|
||||
apu[rivi, 2] <- inds2[j]
|
||||
rivi <- rivi + 1
|
||||
}
|
||||
}
|
||||
apu <- (apu[, 1]-1) * ninds - apu[, 1] / 2 *
|
||||
(apu[, 1]-1) + (apu[, 2] - apu[, 1])
|
||||
dist2 <- dist(apu)
|
||||
return(dist2)
|
||||
}
|
||||
50
R/linkage.R
50
R/linkage.R
|
|
@ -4,10 +4,14 @@
|
|||
#' linkage algorithm. The input Y is a distance matrix such as is generated by
|
||||
#' PDIST. Y may also be a more general dissimilarity matrix conforming to the
|
||||
#' output format of PDIST.
|
||||
#' @param Y data
|
||||
#'
|
||||
#' Z = linkage(X) returns a matrix Z that encodes a tree containing hierarchical clusters of the rows of the input data matrix X.
|
||||
#' @param Y matrix
|
||||
#' @param method either 'si', 'av', 'co' 'ce' or 'wa'
|
||||
#' @note This is also a base Matlab function. The reason why the source code is also present here is unclear.
|
||||
#' @export
|
||||
linkage <- function(Y, method = 'co') {
|
||||
#TODO: compare R output with MATLAB output
|
||||
k <- size(Y)[1]
|
||||
n <- size(Y)[2]
|
||||
m <- (1 + sqrt(1 + 8 * n)) / 2
|
||||
|
|
@ -24,18 +28,33 @@ linkage <- function(Y, method = 'co') {
|
|||
N[1:m] <- 1
|
||||
n <- m; # since m is changing, we need to save m in n.
|
||||
R <- 1:n
|
||||
for (s in 1:(n-1)) {
|
||||
X <- Y
|
||||
v <- min(X)[1]
|
||||
k <- min(X)[2]
|
||||
for (s in 1:(n - 1)) {
|
||||
X <- as.matrix(as.vector(Y), ncol=1)
|
||||
|
||||
v <- min_MATLAB(X)$mins
|
||||
k <- min_MATLAB(X)$idx
|
||||
|
||||
i <- floor(m + 1 / 2 - sqrt(m ^ 2 - m + 1 / 4 - 2 * (k - 1)))
|
||||
j <- k - (i - 1) * (m - i / 2) + i
|
||||
Z[s, ] <- c(R[i], R[j], v) # update one more row to the output matrix A
|
||||
# Temp variables
|
||||
I1 <- 1:(i - 1)
|
||||
I2 <- (i + 1):(j - 1)
|
||||
I3 <- (j + 1):m
|
||||
|
||||
Z[s, ] <- c(R[i], R[j], v) # update one more row to the output matrix A
|
||||
|
||||
# Temp variables
|
||||
if (i > 1) {
|
||||
I1 <- 1:(i - 1)
|
||||
} else {
|
||||
I1 <- NULL
|
||||
}
|
||||
if (i + 1 <= j - 1) {
|
||||
I2 <- (i + 1):(j - 1)
|
||||
} else {
|
||||
I2 <- NULL
|
||||
}
|
||||
if (j + 1 <= m) {
|
||||
I3 <- (j + 1):m
|
||||
} else {
|
||||
I3 <- NULL
|
||||
}
|
||||
U <- c(I1, I2, I3)
|
||||
I <- c(
|
||||
I1 * (m - (I1 + 1) / 2) - m + i,
|
||||
|
|
@ -47,11 +66,13 @@ linkage <- function(Y, method = 'co') {
|
|||
I2 * (m - (I2 + 1) / 2) - m + j,
|
||||
j * (m - (j + 1) / 2) - m + I3
|
||||
)
|
||||
|
||||
# Workaround in R for negative values in I and J
|
||||
# I <- I[I > 0 & I <= length(Y)]
|
||||
# J <- J[J > 0 & J <= length(Y)]
|
||||
switch(method,
|
||||
'si' = Y[I] <- min(Y[I], Y[J]), # single linkage
|
||||
'si' = Y[I] <- apply(cbind(Y[I], Y[J]), 1, min), # single linkage
|
||||
'av' = Y[I] <- Y[I] + Y[J], # average linkage
|
||||
'co' = Y[I] <- max(Y[I], Y[J]), #complete linkage
|
||||
'co' = Y[I] <- apply(cbind(Y[I], Y[J]), 1, max), #complete linkage
|
||||
'ce' = {
|
||||
K <- N[R[i]] + N[R[j]] # centroid linkage
|
||||
Y[I] <- (N[R[i]] * Y[I] + N[R[j]] * Y[J] -
|
||||
|
|
@ -61,8 +82,7 @@ linkage <- function(Y, method = 'co') {
|
|||
Y[J] - N[R[U]] * v) / (N[R[i]] + N[R[j]] + N[R[U]])
|
||||
)
|
||||
J <- c(J, i * (m - (i + 1) / 2) - m + j)
|
||||
Y[J] <- vector() # no need for the cluster information about j
|
||||
|
||||
Y <- Y[-J] # no need for the cluster information about j
|
||||
# update m, N, R
|
||||
m <- m - 1
|
||||
N[n + s] <- N[R[i]] + N[R[j]]
|
||||
|
|
|
|||
|
|
@ -4,17 +4,16 @@
|
|||
#' @return list containing data and popnames
|
||||
#' @export
|
||||
lueGenePopData <- function (tiedostonNimi) {
|
||||
|
||||
fid <- load(tiedostonNimi)
|
||||
line1 <- readLines(fid)[1] # ensimmäinen rivi
|
||||
line2 <- readLines(fid)[2] # toinen rivi
|
||||
fid <- readLines(tiedostonNimi)
|
||||
line <- fid[1] # ensimmäinen rivi
|
||||
line <- fid[2] # toinen rivi
|
||||
count <- rivinSisaltamienMjonojenLkm(line)
|
||||
|
||||
line <- readLines(fid)[3]
|
||||
line <- fid[3]
|
||||
lokusRiveja <- 1
|
||||
while (testaaPop(line) == 0) {
|
||||
lokusRiveja <- lokusRiveja + 1 # locus row
|
||||
line <- readLines(fid)[3 + lokusRiveja]
|
||||
line <- fid[2 + lokusRiveja]
|
||||
}
|
||||
|
||||
if (lokusRiveja > 1) {
|
||||
|
|
@ -29,38 +28,34 @@ lueGenePopData <- function (tiedostonNimi) {
|
|||
ninds <- 0
|
||||
poimiNimi <- 1
|
||||
digitFormat <- -1
|
||||
while (line != -1) {
|
||||
line <- readLines(fid)[lokusRiveja + 1]
|
||||
lokusRiveja <- lokusRiveja + 1
|
||||
|
||||
while (lokusRiveja < length(fid) - 2) {
|
||||
lokusRiveja <- lokusRiveja + 1 # Keeps the loop moving along
|
||||
line <- fid[lokusRiveja + 2]
|
||||
if (poimiNimi == 1) {
|
||||
# Edellinen rivi oli 'pop'
|
||||
# Edellinen rivi oli 'pop' (previous line was pop)
|
||||
nimienLkm <- nimienLkm + 1
|
||||
ninds <- ninds + 1
|
||||
if (nimienLkm > size(popnames, 1)) {
|
||||
popnames <- c(popnames, cell(10, 2))
|
||||
popnames <- rbind(popnames, cell(10, 2))
|
||||
}
|
||||
nimi <- lueNimi(line)
|
||||
if (digitFormat == -1) {
|
||||
digitFormat <- selvitaDigitFormat(line)
|
||||
divider <- 10 ^ digitFormat
|
||||
}
|
||||
popnames[nimienLkm, 1] <- nimi #N<>in se on greedyMix:iss<73>kin?!?
|
||||
popnames[nimienLkm, 1] <- nimi #N<>in se on greedyMix:iss<73>kin?!?
|
||||
popnames[nimienLkm, 2] <- ninds
|
||||
poimiNimi <- 0
|
||||
|
||||
data <- addAlleles(data, ninds, line, divider)
|
||||
|
||||
} else if (testaaPop(line)) {
|
||||
poimiNimi <- 1
|
||||
|
||||
} else if (line != -1) {
|
||||
} else if (!is.na(line)) {
|
||||
ninds <- ninds + 1
|
||||
data <- addAlleles(data, ninds, line, divider)
|
||||
}
|
||||
}
|
||||
|
||||
data <- data[1:(ninds * 2),]
|
||||
popnames <- popnames[seq_len(nimienLkm),]
|
||||
data <- data[1:(ninds * 2), ]
|
||||
popnames <- popnames[seq_len(nimienLkm), ]
|
||||
return(list(data = data, popnames = popnames))
|
||||
}
|
||||
14
R/lueNimi.R
14
R/lueNimi.R
|
|
@ -1,17 +1,23 @@
|
|||
#' @title Read the Name
|
||||
#' @description Reads the line name
|
||||
#' @description Returns the part of the line from the beginning that is before the comma. Useful for returning the name of a GenePop area
|
||||
#' @param line line
|
||||
#' @return nimi
|
||||
#' @export
|
||||
lueNimi <- function(line) {
|
||||
# ==========================================================================
|
||||
# Validation
|
||||
# ==========================================================================
|
||||
if (!grepl(",", line)) {
|
||||
stop("There are no commas in this line")
|
||||
}
|
||||
# Palauttaa line:n alusta sen osan, joka on ennen pilkkua.
|
||||
n <- 1
|
||||
merkki <- line[n]
|
||||
merkki <- substring(line, n, n)
|
||||
nimi <- ''
|
||||
while (merkki != ',') {
|
||||
nimi <- c(nimi, merkki)
|
||||
n <- n + 1
|
||||
merkki <- line[n]
|
||||
merkki <- substring(line, n, n)
|
||||
}
|
||||
return(nimi)
|
||||
return(paste(nimi, collapse=""))
|
||||
}
|
||||
160
R/matlab2r.R
Normal file
160
R/matlab2r.R
Normal file
|
|
@ -0,0 +1,160 @@
|
|||
#' @title Convert Matlab function to R
|
||||
#' @description Performs basic syntax conversion from Matlab to R
|
||||
#' @param filename name of the file
|
||||
#' @param output can be "asis", "clean", "save" or "diff"
|
||||
#' @param improve_formatting if `TRUE` (default), makes minor changes
|
||||
#' to conform to best-practice formatting conventions
|
||||
#' @param change_assignment if `TRUE` (default), uses `<-` as the assignment operator
|
||||
#' @param append if `FALSE` (default), overwrites file; otherwise, append
|
||||
#' output to input
|
||||
#' @return text converted to R, printed to screen or replacing input file
|
||||
#' @author Waldir Leoncio
|
||||
#' @importFrom utils write.table
|
||||
#' @export
|
||||
#' @note This function is intended to expedite the process of converting a
|
||||
#' Matlab function to R by making common replacements. It does not have the
|
||||
#' immediate goal of outputting a ready-to-use function. In other words,
|
||||
#' after using this function you should go back to it and make minor changes.
|
||||
#'
|
||||
#' It is also advised to do a dry-run with `output = "clean"` and only switching
|
||||
#' to `output = "save"` when you are confident that no important code will be
|
||||
#' lost (for shorter functions, a careful visual inspection should suffice).
|
||||
matlab2r <- function(
|
||||
filename, output = "diff", improve_formatting=TRUE, change_assignment=TRUE,
|
||||
append=FALSE
|
||||
) {
|
||||
# TODO: this function is too long! Split into subfunctions
|
||||
# (say, by rule and/or section)
|
||||
# ======================================================== #
|
||||
# Verification #
|
||||
# ======================================================== #
|
||||
if (!file.exists(filename)) stop("File not found")
|
||||
|
||||
# ======================================================== #
|
||||
# Reading file into R #
|
||||
# ======================================================== #
|
||||
txt <- readLines(filename)
|
||||
original <- txt
|
||||
|
||||
# ======================================================== #
|
||||
# Replacing text #
|
||||
# ======================================================== #
|
||||
|
||||
# Uncommenting ------------------------------------------- #
|
||||
txt <- gsub("^#\\s?(.+)", "\\1", txt)
|
||||
|
||||
# Output variable ---------------------------------------- #
|
||||
out <- gsub(
|
||||
pattern = "\\t*function ((\\S|\\,\\s)+)\\s?=\\s?(\\w+)\\((.+)\\)",
|
||||
replacement = "\\1",
|
||||
x = txt[1]
|
||||
) # TODO: improve by detecting listed outputs
|
||||
if (substring(out, 1, 1) == "[") {
|
||||
out <- strsplit(out, "(\\,|\\[|\\]|\\s)")[[1]]
|
||||
out <- out[which(out != "")]
|
||||
out <- sapply(seq_along(out), function(x) paste(out[x], "=", out[x]))
|
||||
out <- paste0("list(", paste(out, collapse=", "), ")")
|
||||
}
|
||||
|
||||
# Function header ---------------------------------------- #
|
||||
txt <- gsub(
|
||||
pattern = "\\t*function (.+)\\s*=\\s*(.+)\\((.+)\\)",
|
||||
replacement = "\\2 <- function(\\3) {",
|
||||
x = txt
|
||||
)
|
||||
txt <- gsub(
|
||||
pattern = "function (.+)\\((.+)\\)",
|
||||
replacement = "\\1 <- function(\\2) {",
|
||||
x = txt
|
||||
)
|
||||
|
||||
# Function body ------------------------------------------ #
|
||||
txt <- gsub("(.+)\\.\\.\\.", "\\1", txt)
|
||||
txt <- gsub(";", "", txt)
|
||||
|
||||
# Loops and if-statements
|
||||
txt <- gsub("for (.+)=(.+)", "for (\\1 in \\2) {", txt)
|
||||
txt <- gsub("end$", "}", txt)
|
||||
txt <- gsub("if (.+)", "if (\\1) {", txt) # FIXME: paste comments after {
|
||||
txt <- gsub("else$", "} else {", txt)
|
||||
txt <- gsub("elseif", "} else if", txt)
|
||||
txt <- gsub("while (.+)", "while \\1 {", txt)
|
||||
|
||||
# MATLAB-equivalent functions in R
|
||||
txt <- gsub("gamma_ln", "log_gamma", txt)
|
||||
txt <- gsub("nchoosek", "choose", txt)
|
||||
txt <- gsub("isempty", "is.null", txt)
|
||||
# txt <- gsub("(.+)\\'", "t(\\1)", txt)
|
||||
|
||||
# Subsets ------------------------------------------------ #
|
||||
ass_op <- ifelse(change_assignment, "<-", "=")
|
||||
txt <- gsub(
|
||||
pattern = "([^\\(]+)\\(([^\\(]+)\\)=(.+)",
|
||||
replacement = paste0("\\1[\\2] ", ass_op, "\\3"),
|
||||
x = txt
|
||||
)
|
||||
txt <- gsub("\\(:\\)", "[, ]", txt)
|
||||
txt <- gsub("(.+)(\\[|\\():,end(\\]|\\()", "\\1[, ncol()]", txt)
|
||||
|
||||
# Formatting --------------------------------------------- #
|
||||
if (improve_formatting) {
|
||||
txt <- gsub("(.),(\\S)", "\\1, \\2", txt)
|
||||
# Math operators
|
||||
txt <- gsub("(\\S)\\+(\\S)", "\\1 + \\2", txt)
|
||||
txt <- gsub("(\\S)\\-(\\S)", "\\1 - \\2", txt)
|
||||
txt <- gsub("(\\S)\\*(\\S)", "\\1 * \\2", txt)
|
||||
txt <- gsub("(\\S)\\/(\\S)", "\\1 / \\2", txt)
|
||||
# Logic operators
|
||||
txt <- gsub("~", "!", txt)
|
||||
txt <- gsub("(\\S)>=(\\S)", "\\1 >= \\2", txt)
|
||||
txt <- gsub("(\\S)<=(\\S)", "\\1 <= \\2", txt)
|
||||
txt <- gsub("(\\S)==(\\S)", "\\1 == \\2", txt)
|
||||
# Assignment
|
||||
txt <- gsub(
|
||||
pattern = "(\\w)(\\s?)=(\\s?)(\\w)",
|
||||
replacement = paste0("\\1 ", ass_op, " \\4"),
|
||||
x = txt
|
||||
)
|
||||
# txt <- gsub(
|
||||
# pattern = "(\\s+(.|\\_|\\[|\\])+)(\\s?)=(\\s?)(.+)",
|
||||
# replacement = paste0("\\1 ", ass_op, "\\5"),
|
||||
# x = txt
|
||||
# )
|
||||
txt <- gsub("%(\\s?)(\\w)", "# \\2", txt)
|
||||
}
|
||||
|
||||
# Adding output and end-of-file brace -------------------- #
|
||||
txt <- append(txt, paste0("\treturn(", out, ")\n}"))
|
||||
|
||||
# Returning converted code ------------------------------- #
|
||||
if (output == "asis") {
|
||||
return(txt)
|
||||
} else if (output == "clean") {
|
||||
return(cat(txt, sep="\n"))
|
||||
} else if (output == "save") {
|
||||
return(
|
||||
write.table(
|
||||
x = txt,
|
||||
file = filename,
|
||||
quote = FALSE,
|
||||
row.names = FALSE,
|
||||
col.names = FALSE,
|
||||
append = append
|
||||
)
|
||||
)
|
||||
} else if (output == "diff") {
|
||||
diff_text <- vector(mode="character", length=(2 * length(original) + 1))
|
||||
for (i in seq_along(txt)) {
|
||||
new_i <- (2 * i) + i - 2
|
||||
diff_text[new_i] <- paste(
|
||||
"-----------------------", "line", i, "-----------------------"
|
||||
)
|
||||
diff_text[new_i + 1] <- original[i]
|
||||
diff_text[new_i + 2] <- txt[i]
|
||||
}
|
||||
message("Displaying line number, original content and modified content")
|
||||
return(cat(diff_text, sep="\n"))
|
||||
} else {
|
||||
stop ("Invalid output argument")
|
||||
}
|
||||
}
|
||||
31
R/min_max_MATLAB.R
Normal file
31
R/min_max_MATLAB.R
Normal file
|
|
@ -0,0 +1,31 @@
|
|||
#' @title Minimum (MATLAB version)
|
||||
#' @description Finds the minimum value for each column of a matrix, potentially returning the indices instead
|
||||
#' @param X matrix
|
||||
#' @param indices return indices?
|
||||
#' @return Either a list or a vector
|
||||
#' @author Waldir Leoncio
|
||||
min_MATLAB <- function(X, indices = TRUE) {
|
||||
mins <- apply(X, 2, min)
|
||||
idx <- sapply(seq_len(ncol(X)), function(x) match(mins[x], X[, x]))
|
||||
if (indices) {
|
||||
return(list(mins = mins, idx = idx))
|
||||
} else {
|
||||
return(mins)
|
||||
}
|
||||
}
|
||||
|
||||
#' @title Maximum (MATLAB version)
|
||||
#' @description Finds the minimum value for each column of a matrix, potentially returning the indices instead
|
||||
#' @param X matrix
|
||||
#' @param indices return indices?
|
||||
#' @return Either a list or a vector
|
||||
#' @author Waldir Leoncio
|
||||
max_MATLAB <- function(X, indices = TRUE) {
|
||||
maxs <- apply(X, 2, max)
|
||||
idx <- sapply(seq_len(ncol(X)), function(x) match(maxs[x], X[, x]))
|
||||
if (indices) {
|
||||
return(list(maxs = maxs, idx = idx))
|
||||
} else {
|
||||
return(maxs)
|
||||
}
|
||||
}
|
||||
10
R/nargin.R
Normal file
10
R/nargin.R
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
#' @title Number of function input arguments
|
||||
#' @description Returns the number of arguments passed to the parent function
|
||||
#' @return An integer
|
||||
#' @author Waldir Leoncio
|
||||
#' @note This function only makes sense inside another function
|
||||
#' @references https://stackoverflow.com/q/64422780/1169233
|
||||
nargin <- function() {
|
||||
if(sys.nframe() < 2) stop("must be called from inside a function")
|
||||
length(as.list(sys.call(-1))) - 1
|
||||
}
|
||||
|
|
@ -5,14 +5,14 @@ newGetDistances <- function(data, rowsFromInd) {
|
|||
|
||||
empties <- find(data < 0)
|
||||
data[empties] <- 0
|
||||
data <- as.integer(data) # max(noalle) oltava <256
|
||||
data <- apply(data, 2, as.numeric) # max(noalle) oltava <256
|
||||
|
||||
pariTaulu <- zeros(riviLkm, 2)
|
||||
aPointer <- 1
|
||||
for (a in (1:ninds) - 1) {
|
||||
pariTaulu[aPointer:(aPointer + ninds - 1 - a), 1] <-
|
||||
ones(ninds - a, 1) * a
|
||||
pariTaulu[aPointer:aPointer + ninds - 1 - a, 2] <- t(a + 1:ninds)
|
||||
for (a in 1:(ninds - 1)) {
|
||||
pariTaulu_rows <- aPointer:(aPointer + ninds - 1 - a)
|
||||
pariTaulu[pariTaulu_rows, 1] <- ones(ninds - a, 1) * a
|
||||
pariTaulu[pariTaulu_rows, 2] <- t((a + 1):ninds)
|
||||
aPointer <- aPointer + ninds - a
|
||||
}
|
||||
|
||||
|
|
@ -31,9 +31,9 @@ newGetDistances <- function(data, rowsFromInd) {
|
|||
rm(pariTaulu, miinus)
|
||||
|
||||
x <- zeros(size(eka))
|
||||
x <- as.integer(x)
|
||||
x <- apply(x, 2, as.integer)
|
||||
y <- zeros(size(toka))
|
||||
y <- as.integer(y)
|
||||
y <- apply(y, 2, as.integer)
|
||||
|
||||
for (j in 1:nloci) {
|
||||
for (k in 1:rowsFromInd) {
|
||||
|
|
@ -57,7 +57,6 @@ newGetDistances <- function(data, rowsFromInd) {
|
|||
muut <- find(vertailuja > 0)
|
||||
dist[muut] <- summa[muut] / vertailuja[muut]
|
||||
rm(summa, vertailuja)
|
||||
|
||||
Z = linkage(t(dist))
|
||||
Z <- linkage(t(dist))
|
||||
return(list(Z = Z, dist = dist))
|
||||
}
|
||||
|
|
@ -41,7 +41,7 @@ poistaLiianPienet <- function (npops, rowsFromInd, alaraja,
|
|||
PARTITION[yksilot] == n
|
||||
}
|
||||
|
||||
# TODO: add COUNTS, SUMCOUNTS and PARTITION to return or use <<-
|
||||
# TODO: add COUNTS, SUMCOUNTS and PARTITION to return or use <-
|
||||
COUNTS[, , miniPops] <- NA
|
||||
SUMCOUNTS[miniPops, ] <- NA
|
||||
|
||||
|
|
|
|||
15
R/poistaTyhjatPopulaatiot.R
Normal file
15
R/poistaTyhjatPopulaatiot.R
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
poistaTyhjatPopulaatiot <- function(npops) {
|
||||
# % Poistaa tyhjentyneet populaatiot COUNTS:ista ja
|
||||
# % SUMCOUNTS:ista. P<>ivitt<74><74> npops:in ja PARTITION:in.
|
||||
notEmpty <- find(any(SUMCOUNTS, 2))
|
||||
COUNTS <- COUNTS[, , notEmpty]
|
||||
SUMCOUNTS <- SUMCOUNTS[notEmpty, ]
|
||||
LOGDIFF <- LOGDIFF[, notEmpty]
|
||||
|
||||
for (n in 1:length(notEmpty)) {
|
||||
apu <- find(PARTITION == notEmpty(n))
|
||||
PARTITION[apu] <- n
|
||||
}
|
||||
npops <- length(notEmpty)
|
||||
return(npops)
|
||||
}
|
||||
7
R/rand_disc.R
Normal file
7
R/rand_disc.R
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
rand_disc <- function(CDF) {
|
||||
# %returns an index of a value from a discrete distribution using inversion method
|
||||
slump <- rand
|
||||
har <- find(CDF > slump)
|
||||
svar <- har(1)
|
||||
return(svar)
|
||||
}
|
||||
26
R/returnInOrder.R
Normal file
26
R/returnInOrder.R
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
returnInOrder <- function(inds, pop, globalRows, data, adjprior, priorTerm) {
|
||||
# % Palauttaa yksil<69>t j<>rjestyksess<73> siten, ett<74> ensimm<6D>isen<65> on
|
||||
# % se, jonka poistaminen populaatiosta pop nostaisi logml:n
|
||||
# % arvoa eniten.
|
||||
|
||||
ninds <- length(inds)
|
||||
apuTaulu <- c(inds, zeros(ninds, 1))
|
||||
|
||||
for (i in 1:ninds) {
|
||||
ind <- inds[i]
|
||||
rows <- globalRows[i, 1]:globalRows[i, 2]
|
||||
diffInCounts <- computeDiffInCounts(
|
||||
rows, size[COUNTS, 1], size[COUNTS, 2], data
|
||||
)
|
||||
diffInSumCounts <- sum(diffInCounts)
|
||||
|
||||
COUNTS[ , ,pop] <- COUNTS[ , ,pop] - diffInCounts
|
||||
SUMCOUNTS[pop, ] <- SUMCOUNTS[pop, ] - diffInSumCounts
|
||||
apuTaulu[i, 2] <- computePopulationLogml(pop, adjprior, priorTerm)
|
||||
COUNTS[ , ,pop] <- COUNTS[ , ,pop] + diffInCounts
|
||||
SUMCOUNTS[pop, ] <- SUMCOUNTS[pop, ] + diffInSumCounts
|
||||
}
|
||||
apuTaulu <- sortrows(apuTaulu, 2)
|
||||
inds <- apuTaulu[ninds:1, 1]
|
||||
return(inds)
|
||||
}
|
||||
|
|
@ -7,23 +7,22 @@ selvitaDigitFormat <- function(line) {
|
|||
# Genepop-formaatissa olevasta datasta. funktio selvitt<74><74>
|
||||
# rivin muodon perusteella, ovatko datan alleelit annettu
|
||||
# 2 vai 3 numeron avulla.
|
||||
|
||||
n <- 1
|
||||
merkki <- line[n]
|
||||
merkki <- substring(line, n, n)
|
||||
while (merkki != ',') {
|
||||
n <- n + 1
|
||||
merkki <- line[n]
|
||||
merkki <- substring(line, n, n)
|
||||
}
|
||||
|
||||
while (!any(merkki == '0123456789')) {
|
||||
while (!any(merkki %in% as.character(0:9))) {
|
||||
n <- n + 1
|
||||
merkki <- line[n]
|
||||
merkki <- substring(line, n, n)
|
||||
}
|
||||
numeroja <- 0
|
||||
while (any(merkki == '0123456789')) {
|
||||
while (any(merkki %in% as.character(0:9))) {
|
||||
numeroja <- numeroja + 1
|
||||
n <- n + 1
|
||||
merkki <- line[n]
|
||||
merkki <- substring(line, n, n)
|
||||
}
|
||||
|
||||
df <- numeroja / 2
|
||||
|
|
|
|||
16
R/setdiff_MATLAB.R
Normal file
16
R/setdiff_MATLAB.R
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
#' @title Set differences of two arrays
|
||||
#' @description Loosely replicates the behavior of the homonym Matlab function
|
||||
#' @param A first array
|
||||
#' @param B second array
|
||||
#' @param legacy if `TRUE`, preserves the behavior of the setdiff function from MATLAB R2012b and prior releases. (currently not supported)
|
||||
#' @author Waldir Leoncio
|
||||
setdiff_MATLAB <- function(A, B, legacy = FALSE) {
|
||||
if (legacy) message("legacy=TRUE not supported. Ignoring.")
|
||||
if (is(A, "numeric") & is(B, "numeric")) {
|
||||
values <- sort(unique(A[is.na(match(A, B))]))
|
||||
} else if (is(A, "data.frame") & is(B, "data.frame")) {
|
||||
stop("Not implemented for data frames")
|
||||
}
|
||||
# TODO: add support for indices (if necessary)
|
||||
return(values)
|
||||
}
|
||||
64
R/updateGlobalVariables.R
Normal file
64
R/updateGlobalVariables.R
Normal file
|
|
@ -0,0 +1,64 @@
|
|||
updateGlobalVariables <- function(ind, i2, diffInCounts, adjprior, priorTerm) {
|
||||
# % Suorittaa globaalien muuttujien muutokset, kun yksil<69> ind
|
||||
# % on siirret<65><74>n koriin i2.
|
||||
|
||||
i1 <- PARTITION[ind]
|
||||
PARTITION[ind] <- i2
|
||||
|
||||
COUNTS[, , i1] <- COUNTS[, , i1] - diffInCounts
|
||||
COUNTS[, , i2] <- COUNTS[, , i2] + diffInCounts
|
||||
SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] - sum[diffInCounts]
|
||||
SUMCOUNTS[i2, ] <- SUMCOUNTS[i2, ] + sum[diffInCounts]
|
||||
|
||||
POP_LOGML[c(i1, i2)] <- computePopulationLogml(
|
||||
c(i1, i2), adjprior, priorTerm
|
||||
)
|
||||
|
||||
LOGDIFF[, c(i1, i2)] <- -Inf
|
||||
inx <- c(find(PARTITION == i1), find(PARTITION==i2))
|
||||
LOGDIFF[inx, ] <- -Inf
|
||||
}
|
||||
|
||||
updateGlobalVariables2 <- function(i1, i2, diffInCounts, adjprior, priorTerm) {
|
||||
# % Suorittaa globaalien muuttujien muutokset, kun kaikki
|
||||
# % korissa i1 olevat yksil<69>t siirret<65><74>n koriin i2.
|
||||
|
||||
inds <- find(PARTITION == i1)
|
||||
PARTITION[inds] <- i2
|
||||
|
||||
COUNTS[, , i1] <- COUNTS[, , i1] - diffInCounts
|
||||
COUNTS[, , i2] <- COUNTS[, , i2] + diffInCounts
|
||||
SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] - sum[diffInCounts]
|
||||
SUMCOUNTS[i2, ] <- SUMCOUNTS[i2, ] + sum[diffInCounts]
|
||||
|
||||
POP_LOGML[i1] <- 0
|
||||
POP_LOGML[i2] <- computePopulationLogml(i2, adjprior, priorTerm)
|
||||
|
||||
LOGDIFF[, c(i1, i2)] <- -Inf
|
||||
inx <- c(find(PARTITION == i1), find(PARTITION == i2))
|
||||
LOGDIFF[inx, ] <- -Inf
|
||||
}
|
||||
|
||||
updateGlobalVariables3 <- function(
|
||||
muuttuvat, diffInCounts, adjprior, priorTerm, i2
|
||||
) {
|
||||
# % Suorittaa globaalien muuttujien p<>ivitykset, kun yksil<69>t 'muuttuvat'
|
||||
# % siirret<65><74>n koriin i2. Ennen siirtoa yksil<69>iden on kuuluttava samaan
|
||||
# % koriin.
|
||||
|
||||
i1 <- PARTITION[muuttuvat(1)]
|
||||
PARTITION[muuttuvat] <- i2
|
||||
|
||||
COUNTS[, , i1] <- COUNTS[, , i1] - diffInCounts
|
||||
COUNTS[, , i2] <- COUNTS[, , i2] + diffInCounts
|
||||
SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] - sum[diffInCounts]
|
||||
SUMCOUNTS[i2, ] <- SUMCOUNTS[i2, ] + sum[diffInCounts]
|
||||
|
||||
POP_LOGML[c(i1, i2)] <- computePopulationLogml(
|
||||
c(i1, i2), adjprior, priorTerm
|
||||
)
|
||||
|
||||
LOGDIFF[, c(i1, i2)] <- -Inf
|
||||
inx <- c(find(PARTITION == i1), find(PARTITION == i2))
|
||||
LOGDIFF[inx, ] <- -Inf
|
||||
}
|
||||
|
|
@ -10,16 +10,10 @@
|
|||
#' @param partitionSummary partitionSummary
|
||||
#' @param popnames popnames
|
||||
#' @param fixedK fixedK
|
||||
#' @param PARTITION PARTITION
|
||||
#' @param COUNTS COUNTS
|
||||
#' @param SUMCOUNTS SUMCOUNTS
|
||||
#' @param LOGDIFF LOGDIFF
|
||||
#' @export
|
||||
writeMixtureInfo <- function(
|
||||
logml, rowsFromInd, data, adjprior, priorTerm, outPutFile, inputFile, partitionSummary, popnames, fixedK, PARTITION, COUNTS, SUMCOUNTS,
|
||||
LOGDIFF
|
||||
logml, rowsFromInd, data, adjprior, priorTerm, outPutFile, inputFile, partitionSummary, popnames, fixedK
|
||||
) {
|
||||
|
||||
changesInLogml <- list()
|
||||
ninds <- size(data, 1) / rowsFromInd
|
||||
npops <- size(COUNTS, 3)
|
||||
|
|
@ -30,7 +24,6 @@ writeMixtureInfo <- function(
|
|||
fid <- load(outPutFile)
|
||||
} else {
|
||||
fid <- -1
|
||||
message('Diverting output to baps4_output.baps')
|
||||
# TODO: replace sink with option that will record input and output
|
||||
sink('baps4_output.baps', split=TRUE) # save in text anyway.
|
||||
}
|
||||
|
|
|
|||
18
man/admixture_initialization.Rd
Normal file
18
man/admixture_initialization.Rd
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/admixture_initialization.R
|
||||
\name{admixture_initialization}
|
||||
\alias{admixture_initialization}
|
||||
\title{Seuraavat kolme funktiota liittyvat alkupartition muodostamiseen.}
|
||||
\usage{
|
||||
admixture_initialization(data_matrix, nclusters, Z)
|
||||
}
|
||||
\arguments{
|
||||
\item{data_matrix}{data_matrix}
|
||||
|
||||
\item{nclusters}{ncluster}
|
||||
|
||||
\item{Z}{Z}
|
||||
}
|
||||
\description{
|
||||
Seuraavat kolme funktiota liittyvat alkupartition muodostamiseen.
|
||||
}
|
||||
23
man/fgetl.Rd
Normal file
23
man/fgetl.Rd
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/fgetl-fopen.R
|
||||
\name{fgetl}
|
||||
\alias{fgetl}
|
||||
\title{Read line from file, removing newline characters}
|
||||
\usage{
|
||||
fgetl(file)
|
||||
}
|
||||
\arguments{
|
||||
\item{file}{character vector to be read, usually an output of `fopen()`}
|
||||
}
|
||||
\value{
|
||||
If the file is nonempty, then fgetl returns tline as a character vector. If the file is empty and contains only the end-of-file marker, then fgetl returns tline as a numeric value -1.
|
||||
}
|
||||
\description{
|
||||
Equivalent function to its homonymous Matlab equivalent.
|
||||
}
|
||||
\seealso{
|
||||
fopen
|
||||
}
|
||||
\author{
|
||||
Waldir Leoncio
|
||||
}
|
||||
23
man/fopen.Rd
Normal file
23
man/fopen.Rd
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/fgetl-fopen.R
|
||||
\name{fopen}
|
||||
\alias{fopen}
|
||||
\title{Open file}
|
||||
\usage{
|
||||
fopen(filename)
|
||||
}
|
||||
\arguments{
|
||||
\item{filename}{Path and name of file to be open}
|
||||
}
|
||||
\value{
|
||||
The same as `readLines(filename)`
|
||||
}
|
||||
\description{
|
||||
Open a text file
|
||||
}
|
||||
\seealso{
|
||||
fgetl
|
||||
}
|
||||
\author{
|
||||
Waldir Leoncio
|
||||
}
|
||||
|
|
@ -1,8 +1,8 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/laskeMuutokset4.R
|
||||
% Please edit documentation in R/laskeMuutokset12345.R
|
||||
\name{laskeMuutokset4}
|
||||
\alias{laskeMuutokset4}
|
||||
\title{Calculate changes?}
|
||||
\title{Calculate changes (?)}
|
||||
\usage{
|
||||
laskeMuutokset4(osuus, osuusTaulu, omaFreqs, logml, COUNTS = matrix(0))
|
||||
}
|
||||
|
|
@ -20,6 +20,6 @@ laskeMuutokset4(osuus, osuusTaulu, omaFreqs, logml, COUNTS = matrix(0))
|
|||
\description{
|
||||
Palauttaa npops*npops taulun, jonka alkio (i,j) kertoo, mik?on
|
||||
muutos logml:ss? mikäli populaatiosta i siirretään osuuden verran
|
||||
todennäköisyysmassaa populaatioon j. Mikäli populaatiossa i ei ole mitään
|
||||
todennäköisyysmassaa populaatioon j. Mikäli populaatiossa i ei ole mitään
|
||||
siirrettävää, on vastaavassa kohdassa rivi nollia.
|
||||
}
|
||||
|
|
|
|||
21
man/laskeOsaDist.Rd
Normal file
21
man/laskeOsaDist.Rd
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/laskeOsaDist.R
|
||||
\name{laskeOsaDist}
|
||||
\alias{laskeOsaDist}
|
||||
\title{Lower part of the dist}
|
||||
\usage{
|
||||
laskeOsaDist(inds2, dist, ninds)
|
||||
}
|
||||
\arguments{
|
||||
\item{inds2}{inds2}
|
||||
|
||||
\item{dist}{dist}
|
||||
|
||||
\item{ninds}{ninds}
|
||||
}
|
||||
\description{
|
||||
Constructs from the dist vector a subvector containing the individual inds2, Forms dist sub-vectors the vector, which includes yksiliden inds2
|
||||
}
|
||||
\author{
|
||||
Waldir Leoncio
|
||||
}
|
||||
|
|
@ -7,7 +7,7 @@
|
|||
linkage(Y, method = "co")
|
||||
}
|
||||
\arguments{
|
||||
\item{Y}{data}
|
||||
\item{Y}{matrix}
|
||||
|
||||
\item{method}{either 'si', 'av', 'co' 'ce' or 'wa'}
|
||||
}
|
||||
|
|
@ -19,4 +19,9 @@ Z = LINKAGE(Y) creates a hierarchical cluster tree, using the single
|
|||
linkage algorithm. The input Y is a distance matrix such as is generated by
|
||||
PDIST. Y may also be a more general dissimilarity matrix conforming to the
|
||||
output format of PDIST.
|
||||
|
||||
Z = linkage(X) returns a matrix Z that encodes a tree containing hierarchical clusters of the rows of the input data matrix X.
|
||||
}
|
||||
\note{
|
||||
This is also a base Matlab function. The reason why the source code is also present here is unclear.
|
||||
}
|
||||
|
|
|
|||
|
|
@ -13,5 +13,5 @@ lueNimi(line)
|
|||
nimi
|
||||
}
|
||||
\description{
|
||||
Reads the line name
|
||||
Returns the part of the line from the beginning that is before the comma. Useful for returning the name of a GenePop area
|
||||
}
|
||||
|
|
|
|||
46
man/matlab2r.Rd
Normal file
46
man/matlab2r.Rd
Normal file
|
|
@ -0,0 +1,46 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/matlab2r.R
|
||||
\name{matlab2r}
|
||||
\alias{matlab2r}
|
||||
\title{Convert Matlab function to R}
|
||||
\usage{
|
||||
matlab2r(
|
||||
filename,
|
||||
output = "diff",
|
||||
improve_formatting = TRUE,
|
||||
change_assignment = TRUE,
|
||||
append = FALSE
|
||||
)
|
||||
}
|
||||
\arguments{
|
||||
\item{filename}{name of the file}
|
||||
|
||||
\item{output}{can be "asis", "clean", "save" or "diff"}
|
||||
|
||||
\item{improve_formatting}{if `TRUE` (default), makes minor changes
|
||||
to conform to best-practice formatting conventions}
|
||||
|
||||
\item{change_assignment}{if `TRUE` (default), uses `<-` as the assignment operator}
|
||||
|
||||
\item{append}{if `FALSE` (default), overwrites file; otherwise, append
|
||||
output to input}
|
||||
}
|
||||
\value{
|
||||
text converted to R, printed to screen or replacing input file
|
||||
}
|
||||
\description{
|
||||
Performs basic syntax conversion from Matlab to R
|
||||
}
|
||||
\note{
|
||||
This function is intended to expedite the process of converting a
|
||||
Matlab function to R by making common replacements. It does not have the
|
||||
immediate goal of outputting a ready-to-use function. In other words,
|
||||
after using this function you should go back to it and make minor changes.
|
||||
|
||||
It is also advised to do a dry-run with `output = "clean"` and only switching
|
||||
to `output = "save"` when you are confident that no important code will be
|
||||
lost (for shorter functions, a careful visual inspection should suffice).
|
||||
}
|
||||
\author{
|
||||
Waldir Leoncio
|
||||
}
|
||||
22
man/max_MATLAB.Rd
Normal file
22
man/max_MATLAB.Rd
Normal file
|
|
@ -0,0 +1,22 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/min_max_MATLAB.R
|
||||
\name{max_MATLAB}
|
||||
\alias{max_MATLAB}
|
||||
\title{Maximum (MATLAB version)}
|
||||
\usage{
|
||||
max_MATLAB(X, indices = TRUE)
|
||||
}
|
||||
\arguments{
|
||||
\item{X}{matrix}
|
||||
|
||||
\item{indices}{return indices?}
|
||||
}
|
||||
\value{
|
||||
Either a list or a vector
|
||||
}
|
||||
\description{
|
||||
Finds the minimum value for each column of a matrix, potentially returning the indices instead
|
||||
}
|
||||
\author{
|
||||
Waldir Leoncio
|
||||
}
|
||||
22
man/min_MATLAB.Rd
Normal file
22
man/min_MATLAB.Rd
Normal file
|
|
@ -0,0 +1,22 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/min_max_MATLAB.R
|
||||
\name{min_MATLAB}
|
||||
\alias{min_MATLAB}
|
||||
\title{Minimum (MATLAB version)}
|
||||
\usage{
|
||||
min_MATLAB(X, indices = TRUE)
|
||||
}
|
||||
\arguments{
|
||||
\item{X}{matrix}
|
||||
|
||||
\item{indices}{return indices?}
|
||||
}
|
||||
\value{
|
||||
Either a list or a vector
|
||||
}
|
||||
\description{
|
||||
Finds the minimum value for each column of a matrix, potentially returning the indices instead
|
||||
}
|
||||
\author{
|
||||
Waldir Leoncio
|
||||
}
|
||||
23
man/nargin.Rd
Normal file
23
man/nargin.Rd
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/nargin.R
|
||||
\name{nargin}
|
||||
\alias{nargin}
|
||||
\title{Number of function input arguments}
|
||||
\usage{
|
||||
nargin()
|
||||
}
|
||||
\value{
|
||||
An integer
|
||||
}
|
||||
\description{
|
||||
Returns the number of arguments passed to the parent function
|
||||
}
|
||||
\note{
|
||||
This function only makes sense inside another function
|
||||
}
|
||||
\references{
|
||||
https://stackoverflow.com/q/64422780/1169233
|
||||
}
|
||||
\author{
|
||||
Waldir Leoncio
|
||||
}
|
||||
21
man/setdiff_MATLAB.Rd
Normal file
21
man/setdiff_MATLAB.Rd
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/setdiff_MATLAB.R
|
||||
\name{setdiff_MATLAB}
|
||||
\alias{setdiff_MATLAB}
|
||||
\title{Set differences of two arrays}
|
||||
\usage{
|
||||
setdiff_MATLAB(A, B, legacy = FALSE)
|
||||
}
|
||||
\arguments{
|
||||
\item{A}{first array}
|
||||
|
||||
\item{B}{second array}
|
||||
|
||||
\item{legacy}{if `TRUE`, preserves the behavior of the setdiff function from MATLAB R2012b and prior releases. (currently not supported)}
|
||||
}
|
||||
\description{
|
||||
Loosely replicates the behavior of the homonym Matlab function
|
||||
}
|
||||
\author{
|
||||
Waldir Leoncio
|
||||
}
|
||||
|
|
@ -14,11 +14,7 @@ writeMixtureInfo(
|
|||
inputFile,
|
||||
partitionSummary,
|
||||
popnames,
|
||||
fixedK,
|
||||
PARTITION,
|
||||
COUNTS,
|
||||
SUMCOUNTS,
|
||||
LOGDIFF
|
||||
fixedK
|
||||
)
|
||||
}
|
||||
\arguments{
|
||||
|
|
@ -41,14 +37,6 @@ writeMixtureInfo(
|
|||
\item{popnames}{popnames}
|
||||
|
||||
\item{fixedK}{fixedK}
|
||||
|
||||
\item{PARTITION}{PARTITION}
|
||||
|
||||
\item{COUNTS}{COUNTS}
|
||||
|
||||
\item{SUMCOUNTS}{SUMCOUNTS}
|
||||
|
||||
\item{LOGDIFF}{LOGDIFF}
|
||||
}
|
||||
\description{
|
||||
Writes information about the mixture
|
||||
|
|
|
|||
|
|
@ -158,18 +158,18 @@ test_that("find works as expected", {
|
|||
})
|
||||
|
||||
test_that("sortrows works as expected", {
|
||||
mx <- matrix(c(3, 2, 2, 1, 1, 10, 0, pi), 4)
|
||||
expect_equal(sortrows(mx), matrix(c(1, 2, 2, 3, pi, 10, 0, 1), 4))
|
||||
expect_equal(sortrows(mx, 2), matrix(c(2, 3, 1, 2, 0, 1, pi, 10), 4))
|
||||
expect_equal(sortrows(mx, 1:2), mx[order(mx[, 1], mx[, 2]), ])
|
||||
mx <- matrix(c(3, 2, 2, 1, 1, 10, 0, pi), 4)
|
||||
expect_equal(sortrows(mx), matrix(c(1, 2, 2, 3, pi, 10, 0, 1), 4))
|
||||
expect_equal(sortrows(mx, 2), matrix(c(2, 3, 1, 2, 0, 1, pi, 10), 4))
|
||||
expect_equal(sortrows(mx, 1:2), mx[order(mx[, 1], mx[, 2]), ])
|
||||
})
|
||||
|
||||
test_that("cell works as expected", {
|
||||
expect_equal(cell(0), array(dim = c(0, 0)))
|
||||
expect_equal(cell(1), array(dim = c(1, 1)))
|
||||
expect_equal(cell(2), array(dim = c(2, 2)))
|
||||
expect_equal(cell(3, 4), array(dim = c(3, 4)))
|
||||
expect_equal(cell(5, 7, 6), array(dim = c(5, 7, 6)))
|
||||
expect_equivalent(cell(0), array(0, dim = c(0, 0)))
|
||||
expect_equivalent(cell(1), array(0, dim = c(1, 1)))
|
||||
expect_equivalent(cell(2), array(0, dim = c(2, 2)))
|
||||
expect_equivalent(cell(3, 4), array(0, dim = c(3, 4)))
|
||||
expect_equivalent(cell(5, 7, 6), array(0, dim = c(5, 7, 6)))
|
||||
})
|
||||
|
||||
test_that("blanks works as expected", {
|
||||
|
|
@ -201,4 +201,44 @@ test_that("isspace works as expected", {
|
|||
X <- '\t a b\tcde f'
|
||||
expect_identical(isspace(chr), c(0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0))
|
||||
expect_identical(isspace(X), c(1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0))
|
||||
})
|
||||
|
||||
test_that("nargin works correctly", {
|
||||
addme <- function(a, b) {
|
||||
if (nargin() == 2) {
|
||||
c <- a + b
|
||||
} else if (nargin() == 1) {
|
||||
c <- a + a
|
||||
} else {
|
||||
c <- 0
|
||||
}
|
||||
return(c)
|
||||
}
|
||||
expect_equal(addme(13, 42), 55)
|
||||
expect_equal(addme(13), 26)
|
||||
expect_equal(addme(), 0)
|
||||
})
|
||||
|
||||
test_that("setdiff works as expected", {
|
||||
A <- c(3, 6, 2, 1, 5, 1, 1)
|
||||
B <- c(2, 4, 6)
|
||||
C <- c(1, 3, 5)
|
||||
# expect_equal(setdiff_MATLAB(A, B), C) # TODO: export setdiff_MATLAB
|
||||
A <- data.frame(
|
||||
Var1 = 1:5,
|
||||
Var2 = LETTERS[1:5],
|
||||
Var3 = c(FALSE, TRUE, FALSE, TRUE, FALSE)
|
||||
)
|
||||
B <- data.frame(
|
||||
Var1 = seq(1, 9, by = 2),
|
||||
Var2 = LETTERS[seq(1, 9, by = 2)],
|
||||
Var3 = rep(FALSE, 5)
|
||||
)
|
||||
C <- data.frame(
|
||||
Var1 = c(2, 4),
|
||||
Var2 = c('B', 'D'),
|
||||
Var3 = c(TRUE, TRUE)
|
||||
)
|
||||
# expect_equal(setdiff_MATLAB(A, B), C) # TODO: implement for data frames
|
||||
# TODO: add more examples from https://se.mathworks.com/help/matlab/ref/double.setdiff.html;jsessionid=0d8d42582d4d299b8224403899f1
|
||||
})
|
||||
|
|
@ -1,11 +1,17 @@
|
|||
# library(devtools)#TEMP
|
||||
# library(testthat)#TEMP
|
||||
# library(rBAPS)#TEMP
|
||||
|
||||
context("Opening files on greedyMix")
|
||||
|
||||
# greedyMix(
|
||||
# tietue = "data/ExamplesDataFormatting/Example baseline data in GENEPOP format for Trained clustering.txt",
|
||||
# tietue = "inst/ext/ExamplesDataFormatting/Example baseline data in GENEPOP format for Trained clustering.txt",
|
||||
# format = "GenePop",
|
||||
# savePreProcessed = FALSE
|
||||
# )
|
||||
# )
|
||||
|
||||
context("Linkage")
|
||||
|
||||
test_that("Linkages are properly calculated", {
|
||||
Y <- c(0.5, 0.3, 0.6, 0.3, 0.3, 0.2, 0.3, 0.3, 0.3, 0.5)
|
||||
expect_equal(
|
||||
object = linkage(Y),
|
||||
expected = matrix(c(2, 1, 7, 8, 4, 3, 5, 6, .2, .3, .3, .6), ncol=3)
|
||||
)
|
||||
})
|
||||
Loading…
Add table
Reference in a new issue