Merge branch 'translate-greedyMix' into dev (issue #1)
This commit is contained in:
commit
4aba2528da
84 changed files with 1927 additions and 794 deletions
|
|
@ -1,7 +1,7 @@
|
||||||
LICENSE
|
LICENSE
|
||||||
TODO.md
|
TODO.md
|
||||||
matlab
|
^matlab
|
||||||
CHANGELOG.md
|
CHANGELOG.md
|
||||||
CITATION.cff
|
CITATION.cff
|
||||||
.travis.yml
|
.travis.yml
|
||||||
data/ExamplesDataFormatting
|
inst/ext/ExamplesDataFormatting
|
||||||
10
DESCRIPTION
10
DESCRIPTION
|
|
@ -1,7 +1,7 @@
|
||||||
Package: rBAPS
|
Package: rBAPS
|
||||||
Title: Bayesian Analysis of Population Structure
|
Title: Bayesian Analysis of Population Structure
|
||||||
Version: 0.0.0.9000
|
Version: 0.0.0.9001
|
||||||
Date: 2020-01-14
|
Date: 2020-11-09
|
||||||
Authors@R:
|
Authors@R:
|
||||||
c(
|
c(
|
||||||
person(
|
person(
|
||||||
|
|
@ -30,14 +30,14 @@ Description: Partial R implementation of the BAPS software
|
||||||
Corander et al. 2008b <doi:10.1007/s00180-007-0072-x>;
|
Corander et al. 2008b <doi:10.1007/s00180-007-0072-x>;
|
||||||
Tang et al. 2009 <doi:10.1371/journal.pcbi.1000455>;
|
Tang et al. 2009 <doi:10.1371/journal.pcbi.1000455>;
|
||||||
Cheng et al. 2011 <doi:10.1186/1471-2105-12-302>,
|
Cheng et al. 2011 <doi:10.1186/1471-2105-12-302>,
|
||||||
available at <http://www.helsinki.fi/bsg/software/BAPS/Z>), provides a
|
available at <http://www.helsinki.fi/bsg/software/BAPS/Z>), provides a
|
||||||
computationally-efficient method for the identification of admixture events
|
computationally-efficient method for the identification of admixture events
|
||||||
in genetic population history.
|
in genetic population history.
|
||||||
License: GPL-3
|
License: GPL-3
|
||||||
Encoding: UTF-8
|
Encoding: UTF-8
|
||||||
LazyData: true
|
LazyData: true
|
||||||
RoxygenNote: 7.1.1
|
RoxygenNote: 7.1.1
|
||||||
Suggests:
|
Suggests:
|
||||||
testthat (>= 2.1.0)
|
testthat (>= 2.1.0)
|
||||||
Imports:
|
Imports:
|
||||||
methods
|
methods
|
||||||
|
|
|
||||||
|
|
@ -10,6 +10,8 @@ export(computeIndLogml)
|
||||||
export(computePersonalAllFreqs)
|
export(computePersonalAllFreqs)
|
||||||
export(computeRows)
|
export(computeRows)
|
||||||
export(etsiParas)
|
export(etsiParas)
|
||||||
|
export(fgetl)
|
||||||
|
export(fopen)
|
||||||
export(greedyMix)
|
export(greedyMix)
|
||||||
export(handleData)
|
export(handleData)
|
||||||
export(initPopNames)
|
export(initPopNames)
|
||||||
|
|
@ -21,6 +23,7 @@ export(linkage)
|
||||||
export(logml2String)
|
export(logml2String)
|
||||||
export(lueGenePopData)
|
export(lueGenePopData)
|
||||||
export(lueNimi)
|
export(lueNimi)
|
||||||
|
export(matlab2r)
|
||||||
export(noIndex)
|
export(noIndex)
|
||||||
export(ownNum2Str)
|
export(ownNum2Str)
|
||||||
export(poistaLiianPienet)
|
export(poistaLiianPienet)
|
||||||
|
|
@ -47,3 +50,4 @@ export(writeMixtureInfo)
|
||||||
importFrom(methods,is)
|
importFrom(methods,is)
|
||||||
importFrom(stats,runif)
|
importFrom(stats,runif)
|
||||||
importFrom(utils,read.delim)
|
importFrom(utils,read.delim)
|
||||||
|
importFrom(utils,write.table)
|
||||||
|
|
|
||||||
|
|
@ -12,22 +12,25 @@ addAlleles <- function(data, ind, line, divider) {
|
||||||
# line. Jos data on 3 digit formaatissa on divider=1000.
|
# line. Jos data on 3 digit formaatissa on divider=1000.
|
||||||
# Jos data on 2 digit formaatissa on divider=100.
|
# Jos data on 2 digit formaatissa on divider=100.
|
||||||
|
|
||||||
nloci <- size(data, 2) - 1
|
nloci <- size(data, 2) # added 1 from original code
|
||||||
if (size(data, 1) < (2 * ind)) {
|
if (size(data, 1) < (2 * ind)) {
|
||||||
data <- c(data, zeros(100, nloci + 1))
|
data <- rbind(data, zeros(100, nloci)) # subtracted 1 from original code
|
||||||
}
|
}
|
||||||
|
|
||||||
k <- 1
|
k <- 1
|
||||||
merkki <- line[k]
|
merkki <- substring(line, k, k)
|
||||||
while (merkki != ',') {
|
while (merkki != ',') {
|
||||||
k <- k + 1
|
k <- k + 1
|
||||||
merkki <- line[k]
|
merkki <- substring(line, k, k)
|
||||||
}
|
}
|
||||||
line <- line[k + 1:length(line)]
|
line <- substring(line, k + 1)
|
||||||
# clear k; clear merkki;
|
# clear k; clear merkki;
|
||||||
|
|
||||||
alleeliTaulu <- as.numeric(strsplit(line, split = " ")[[1]])
|
if (grepl(" ", line)) {
|
||||||
|
alleeliTaulu <- as.numeric(strsplit(line, split = " ")[[1]])
|
||||||
|
} else if (grepl("\t", line)) {
|
||||||
|
alleeliTaulu <- as.numeric(strsplit(line, split = "\t")[[1]])
|
||||||
|
}
|
||||||
|
|
||||||
if (length(alleeliTaulu) != nloci) {
|
if (length(alleeliTaulu) != nloci) {
|
||||||
stop('Incorrect data format.')
|
stop('Incorrect data format.')
|
||||||
|
|
@ -35,9 +38,9 @@ addAlleles <- function(data, ind, line, divider) {
|
||||||
|
|
||||||
for (j in seq_len(nloci)) {
|
for (j in seq_len(nloci)) {
|
||||||
ekaAlleeli <- floor(alleeliTaulu[j] / divider)
|
ekaAlleeli <- floor(alleeliTaulu[j] / divider)
|
||||||
if (ekaAlleeli == 0) ekaAlleeli <- -999
|
if (is.na(ekaAlleeli) | ekaAlleeli == 0) ekaAlleeli <- -999
|
||||||
tokaAlleeli <- alleeliTaulu[j] %% divider
|
tokaAlleeli <- alleeliTaulu[j] %% divider
|
||||||
if (tokaAlleeli == 0) tokaAlleeli <- -999
|
if (is.na(tokaAlleeli) | tokaAlleeli == 0) tokaAlleeli <- -999
|
||||||
|
|
||||||
data[2 * ind - 1, j] <- ekaAlleeli
|
data[2 * ind - 1, j] <- ekaAlleeli
|
||||||
data[2 * ind, j] <- tokaAlleeli
|
data[2 * ind, j] <- tokaAlleeli
|
||||||
|
|
|
||||||
18
R/addToSummary.R
Normal file
18
R/addToSummary.R
Normal file
|
|
@ -0,0 +1,18 @@
|
||||||
|
addToSummary <- function(logml, partitionSummary, worstIndex) {
|
||||||
|
# Tiedet<65><74>n, ett<74> annettu logml on isompi kuin huonoin arvo
|
||||||
|
# partitionSummary taulukossa. Jos partitionSummary:ss<73> ei viel<65> ole
|
||||||
|
# annettua logml arvoa, niin lis<69>t<EFBFBD><74>n worstIndex:in kohtaan uusi logml ja
|
||||||
|
# nykyist<73> partitiota vastaava nclusters:in arvo. Muutoin ei tehd<68> mit<69><74>n.
|
||||||
|
|
||||||
|
apu <- find(abs(partitionSummary[, 2] - logml) < 1e-5)
|
||||||
|
if (isempty(apu)) {
|
||||||
|
# Nyt l<>ydetty partitio ei ole viel<65> kirjattuna summaryyn.
|
||||||
|
npops <- length(unique(PARTITION))
|
||||||
|
partitionSummary[worstIndex, 1] <- npops
|
||||||
|
partitionSummary[worstIndex, 2] <- logml
|
||||||
|
added <- 1
|
||||||
|
} else {
|
||||||
|
added <- 0
|
||||||
|
}
|
||||||
|
return(list(partitionSummary = partitionSummary, added = added))
|
||||||
|
}
|
||||||
20
R/admixture_initialization.R
Normal file
20
R/admixture_initialization.R
Normal file
|
|
@ -0,0 +1,20 @@
|
||||||
|
#' @title Seuraavat kolme funktiota liittyvat alkupartition muodostamiseen.
|
||||||
|
#' @param data_matrix data_matrix
|
||||||
|
#' @param nclusters ncluster
|
||||||
|
#' @param Z Z
|
||||||
|
|
||||||
|
admixture_initialization <- function (data_matrix, nclusters, Z) {
|
||||||
|
size_data <- size(data_matrix)
|
||||||
|
nloci <- size_data[2] - 1
|
||||||
|
n <- max(data_matrix[, ncol(data_matrix)])
|
||||||
|
T <- cluster_own(Z, nclusters)
|
||||||
|
initial_partition <- zeros(size_data[1], 1)
|
||||||
|
for (i in 1:n) {
|
||||||
|
kori <- T[i]
|
||||||
|
here <- find(data_matrix[, ncol(data_matrix)] == i)
|
||||||
|
for (j in 1:length(here)) {
|
||||||
|
initial_partition[here[j], 1] <- kori
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return(initial_partition)
|
||||||
|
}
|
||||||
14
R/arvoSeuraavaTi.R
Normal file
14
R/arvoSeuraavaTi.R
Normal file
|
|
@ -0,0 +1,14 @@
|
||||||
|
arvoSeuraavaTila <- function(muutokset, logml) {
|
||||||
|
# Suorittaa yksil<69>n seuraavan tilan arvonnan
|
||||||
|
|
||||||
|
y <- logml + muutokset # siirron j<>lkeiset logml:t
|
||||||
|
y <- y - max(y)
|
||||||
|
y <- exp(y)
|
||||||
|
summa <- sum(y)
|
||||||
|
y <- y / summa
|
||||||
|
y <- cumsum(y)
|
||||||
|
|
||||||
|
i2 <- rand_disc(y) # uusi kori
|
||||||
|
suurin <- muutokset(i2)
|
||||||
|
return(list(suurin = suurin, i2 = i2))
|
||||||
|
}
|
||||||
6
R/cell.R
6
R/cell.R
|
|
@ -6,10 +6,10 @@
|
||||||
#' @return An array of zeroes with the dimensions passed on call
|
#' @return An array of zeroes with the dimensions passed on call
|
||||||
cell <- function(n, sz = c(n, n), ...) {
|
cell <- function(n, sz = c(n, n), ...) {
|
||||||
if (length(sz) == 1 & missing(...)) {
|
if (length(sz) == 1 & missing(...)) {
|
||||||
return(array(dim = c(n, sz)))
|
return(array(0, dim = c(n, sz)))
|
||||||
} else if (length(sz) == 2) {
|
} else if (length(sz) == 2) {
|
||||||
return(array(dim = sz))
|
return(array(0, dim = sz))
|
||||||
} else {
|
} else {
|
||||||
return(array(dim = c(n, sz, ...)))
|
return(array(0, dim = c(n, sz, ...)))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
clearGlobalVars <- function() {
|
clearGlobalVars <- function() {
|
||||||
COUNTS <- SUMCOUNTS <- PARTITION <- POP_LOGML <- vector() # placeholders
|
COUNTS <- vector()
|
||||||
COUNTS <<- vector()
|
SUMCOUNTS <- vector()
|
||||||
SUMCOUNTS <<- vector()
|
PARTITION <- vector()
|
||||||
PARTITION <<- vector()
|
POP_LOGML <- vector()
|
||||||
POP_LOGML <<- vector()
|
LOGDIFF <- vector()
|
||||||
}
|
}
|
||||||
51
R/cluster_own.R
Normal file
51
R/cluster_own.R
Normal file
|
|
@ -0,0 +1,51 @@
|
||||||
|
cluster_own <- function(Z, nclust) {
|
||||||
|
true <- TRUE
|
||||||
|
false <- FALSE
|
||||||
|
maxclust <- nclust
|
||||||
|
# % Start of algorithm
|
||||||
|
m <- size(Z, 1) + 1
|
||||||
|
T <- zeros(m, 1)
|
||||||
|
# % maximum number of clusters based on inconsistency
|
||||||
|
if (m <= maxclust) {
|
||||||
|
T = t((1:m))
|
||||||
|
} else if (maxclust == 1) {
|
||||||
|
T <- ones(m, 1)
|
||||||
|
} else {
|
||||||
|
clsnum <- 1
|
||||||
|
for (k in (m - maxclust + 1):(m - 1)) {
|
||||||
|
i = Z(k, 1) # left tree
|
||||||
|
if (i <= m) { # original node, no leafs
|
||||||
|
T[i] = clsnum
|
||||||
|
clsnum = clsnum + 1
|
||||||
|
} else if (i < (2 * m - maxclust + 1)) { # created before cutoff, search down the tree
|
||||||
|
T <- clusternum(Z, T, i - m, clsnum)
|
||||||
|
clsnum <- clsnum + 1
|
||||||
|
}
|
||||||
|
i <- Z(k, 2) # right tree
|
||||||
|
if (i <= m) { # original node, no leafs
|
||||||
|
T[i] <- clsnum
|
||||||
|
clsnum <- clsnum + 1
|
||||||
|
} else if (i < (2 * m - maxclust + 1)) { # created before cutoff, search down the tree
|
||||||
|
T <- clusternum(Z, T, i - m, clsnum)
|
||||||
|
clsnum <- clsnum + 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return(T)
|
||||||
|
}
|
||||||
|
|
||||||
|
clusternum <- function(X, T, k, c) {
|
||||||
|
m <- size(X, 1) + 1
|
||||||
|
while (!isempty(k)) {
|
||||||
|
# Get the children of nodes at this level
|
||||||
|
children <- X[k, 1:2]
|
||||||
|
children <- children
|
||||||
|
|
||||||
|
# Assign this node number to leaf children
|
||||||
|
t <- (children <= m)
|
||||||
|
T[children[t]] <- c
|
||||||
|
# Move to next level
|
||||||
|
k <- children(!t) - m
|
||||||
|
}
|
||||||
|
return(T)
|
||||||
|
}
|
||||||
16
R/clusternum.R
Normal file
16
R/clusternum.R
Normal file
|
|
@ -0,0 +1,16 @@
|
||||||
|
clusternum <- function(X, T, k, c) {
|
||||||
|
m <- size(X, 1) + 1
|
||||||
|
while (!is.null(k)) {
|
||||||
|
# % Get the children of nodes at this level
|
||||||
|
children <- X[k, 1:2]
|
||||||
|
children <- children[, ]
|
||||||
|
|
||||||
|
# % Assign this node number to leaf children
|
||||||
|
t <- (children <= m)
|
||||||
|
T[children(t)] <- c
|
||||||
|
|
||||||
|
# % Move to next level
|
||||||
|
k <- children(!t) - m
|
||||||
|
}
|
||||||
|
return(T)
|
||||||
|
}
|
||||||
17
R/computeDiffInCounts.R
Normal file
17
R/computeDiffInCounts.R
Normal file
|
|
@ -0,0 +1,17 @@
|
||||||
|
computeDiffInCounts <- function(rows, max_noalle, nloci, data) {
|
||||||
|
# % Muodostaa max_noalle*nloci taulukon, jossa on niiden alleelien
|
||||||
|
# % lukum<75><6D>r<EFBFBD>t (vastaavasti kuin COUNTS:issa), jotka ovat data:n
|
||||||
|
# % riveill<6C> rows. rows pit<69><74> olla vaakavektori.
|
||||||
|
|
||||||
|
diffInCounts <- zeros(max_noalle, nloci)
|
||||||
|
for (i in rows) {
|
||||||
|
row <- data[i, ]
|
||||||
|
notEmpty <- find(row>=0)
|
||||||
|
|
||||||
|
if (length(notEmpty) > 0) {
|
||||||
|
diffInCounts[row(notEmpty) + (notEmpty - 1) * max_noalle] <-
|
||||||
|
diffInCounts[row(notEmpty) + (notEmpty - 1) * max_noalle] + 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return(diffInCounts)
|
||||||
|
}
|
||||||
27
R/computeLogml.R
Normal file
27
R/computeLogml.R
Normal file
|
|
@ -0,0 +1,27 @@
|
||||||
|
computeLogml <- function(counts, sumcounts, noalle, data, rowsFromInd) {
|
||||||
|
nloci <- size(counts, 2)
|
||||||
|
npops <- size(counts, 3)
|
||||||
|
adjnoalle <- zeros(max(noalle), nloci)
|
||||||
|
for (j in 1:nloci) {
|
||||||
|
adjnoalle[1:noalle[j], j] <- noalle(j)
|
||||||
|
if ((noalle(j)<max(noalle))) {
|
||||||
|
adjnoalle[noalle[j] + 1:ncol(adjnoalle), j] <- 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
rowsInG <- size(data, 1) + rowsFromInd
|
||||||
|
|
||||||
|
logml <- sum(
|
||||||
|
sum(
|
||||||
|
sum(
|
||||||
|
GAMMA_LN[
|
||||||
|
counts + 1 +
|
||||||
|
repmat(rowsInG * (adjnoalle - 1), c(1, 1, npops))
|
||||||
|
]
|
||||||
|
)
|
||||||
|
)
|
||||||
|
) -
|
||||||
|
npops * sum(sum(GAMMA_LN[1, adjnoalle])) -
|
||||||
|
sum(sum(GAMMA_LN[sumcounts + 1, 1]))
|
||||||
|
return(logml)
|
||||||
|
}
|
||||||
24
R/computePopulationLogml.R
Normal file
24
R/computePopulationLogml.R
Normal file
|
|
@ -0,0 +1,24 @@
|
||||||
|
computePopulationLogml <- function(pops, adjprior, priorTerm) {
|
||||||
|
# Palauttaa length(pops)*1 taulukon, jossa on laskettu korikohtaiset
|
||||||
|
|
||||||
|
x <- size(COUNTS, 1)
|
||||||
|
y <- size(COUNTS, 2)
|
||||||
|
z <- length(pops)
|
||||||
|
|
||||||
|
popLogml <- squeeze(
|
||||||
|
sum(
|
||||||
|
sum(
|
||||||
|
reshape(
|
||||||
|
lgamma(
|
||||||
|
repmat(adjprior, c(1, 1, length(pops))) +
|
||||||
|
COUNTS[, , pops]
|
||||||
|
),
|
||||||
|
c(x, y, z)
|
||||||
|
),
|
||||||
|
1
|
||||||
|
),
|
||||||
|
2
|
||||||
|
)
|
||||||
|
) - sum(lgamma(1 + SUMCOUNTS[pops, ]), 2) - priorTerm
|
||||||
|
return(popLogml)
|
||||||
|
}
|
||||||
27
R/fgetl-fopen.R
Normal file
27
R/fgetl-fopen.R
Normal file
|
|
@ -0,0 +1,27 @@
|
||||||
|
#' @title Read line from file, removing newline characters
|
||||||
|
#' @description Equivalent function to its homonymous Matlab equivalent.
|
||||||
|
#' @param file character vector to be read, usually an output of `fopen()`
|
||||||
|
#' @return If the file is nonempty, then fgetl returns tline as a character vector. If the file is empty and contains only the end-of-file marker, then fgetl returns tline as a numeric value -1.
|
||||||
|
#' @author Waldir Leoncio
|
||||||
|
#' @seealso fopen
|
||||||
|
#' @export
|
||||||
|
fgetl <- function(file) {
|
||||||
|
# ==========================================================================
|
||||||
|
# Validation
|
||||||
|
# ==========================================================================
|
||||||
|
if (length(file) <= 1) return(-1)
|
||||||
|
# ==========================================================================
|
||||||
|
# Returning file minus the first line
|
||||||
|
# ==========================================================================
|
||||||
|
out <- file[-1]
|
||||||
|
return(out)
|
||||||
|
}
|
||||||
|
|
||||||
|
#' @title Open file
|
||||||
|
#' @description Open a text file
|
||||||
|
#' @param filename Path and name of file to be open
|
||||||
|
#' @return The same as `readLines(filename)`
|
||||||
|
#' @author Waldir Leoncio
|
||||||
|
#' @seealso fgetl
|
||||||
|
#' @export
|
||||||
|
fopen <- function(filename) readLines(filename)
|
||||||
12
R/findEmptyPop.R
Normal file
12
R/findEmptyPop.R
Normal file
|
|
@ -0,0 +1,12 @@
|
||||||
|
findEmptyPop <- function(npops) {
|
||||||
|
# % Palauttaa ensimm<6D>isen tyhj<68>n populaation indeksin. Jos tyhji<6A>
|
||||||
|
# % populaatioita ei ole, palauttaa -1:n.
|
||||||
|
pops <- t(unique(PARTITION))
|
||||||
|
if (length(pops) == npops) {
|
||||||
|
emptyPop <- -1
|
||||||
|
} else {
|
||||||
|
popDiff <- diff(c(0, pops, npops + 1))
|
||||||
|
emptyPop <- min(find(popDiff > 1))
|
||||||
|
}
|
||||||
|
return(list(emptyPop = emptyPop, pops = pops))
|
||||||
|
}
|
||||||
43
R/getDistances.R
Normal file
43
R/getDistances.R
Normal file
|
|
@ -0,0 +1,43 @@
|
||||||
|
getDistances <- function(data_matrix, nclusters) {
|
||||||
|
|
||||||
|
# %finds initial admixture clustering solution with nclusters clusters, uses simple mean Hamming distance
|
||||||
|
# %gives partition in 8 - bit format
|
||||||
|
# %allocates all alleles of a single individual into the same basket
|
||||||
|
# %data_matrix contains #Loci + 1 columns, last column indicate whose alleles are placed in each row,
|
||||||
|
# %i.e. ranges from 1 to #individuals. For diploids there are 2 rows per individual, for haploids only a single row
|
||||||
|
# %missing values are indicated by zeros in the partition and by negative integers in the data_matrix.
|
||||||
|
|
||||||
|
size_data <- size(data_matrix)
|
||||||
|
nloci <- size_data[2] - 1
|
||||||
|
n <- max(data_matrix[, ncol(data_matrix)])
|
||||||
|
distances <- zeros(choose(n, 2), 1)
|
||||||
|
pointer <- 1
|
||||||
|
for (i in 1:n - 1) {
|
||||||
|
i_data <- data_matrix[
|
||||||
|
find(data_matrix[, ncol(data_matrix)] == i),
|
||||||
|
1:nloci
|
||||||
|
]
|
||||||
|
for (j in (i + 1):n) {
|
||||||
|
d_ij <- 0
|
||||||
|
j_data <- data_matrix[find(data_matrix[, ncol()] == j), 1:nloci]
|
||||||
|
vertailuja <- 0
|
||||||
|
for (k in 1:size(i_data, 1)) {
|
||||||
|
for (l in 1:size(j_data, 1)) {
|
||||||
|
here_i <- find(i_data[k, ] >= 0)
|
||||||
|
here_j <- find(j_data[l, ] >= 0)
|
||||||
|
here_joint <- intersect(here_i, here_j)
|
||||||
|
vertailuja <- vertailuja + length(here_joint)
|
||||||
|
d_ij <- d_ij + length(
|
||||||
|
find(i_data[k, here_joint] != j_data[l, here_joint])
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
d_ij <- d_ij / vertailuja
|
||||||
|
distances[pointer] <- d_ij
|
||||||
|
pointer <- pointer + 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Z <- linkage(t(distances))
|
||||||
|
return(list(Z = Z, distances = distances))
|
||||||
|
}
|
||||||
3
R/globals.R
Normal file
3
R/globals.R
Normal file
|
|
@ -0,0 +1,3 @@
|
||||||
|
utils::globalVariables(
|
||||||
|
c("PARTITION", "COUNTS", "SUMCOUNTS", "LOGDIFF", "POP_LOGML", "GAMMA_LN")
|
||||||
|
)
|
||||||
689
R/greedyMix.R
689
R/greedyMix.R
|
|
@ -11,10 +11,11 @@ greedyMix <- function(
|
||||||
savePreProcessed = NULL,
|
savePreProcessed = NULL,
|
||||||
filePreProcessed = NULL
|
filePreProcessed = NULL
|
||||||
) {
|
) {
|
||||||
# ASK: graphical components. Remove?
|
# ASK: Unclear when fixedk == TRUE. Remove?
|
||||||
# check whether fixed k mode is selected
|
# check whether fixed k mode is selected
|
||||||
# h0 <- findobj('Tag','fixk_menu')
|
# h0 <- findobj('Tag','fixk_menu')
|
||||||
# fixedK = get(h0, 'userdata');
|
# fixedK = get(h0, 'userdata');
|
||||||
|
fixedK <- FALSE
|
||||||
|
|
||||||
# if fixedK
|
# if fixedK
|
||||||
# if ~(fixKWarning == 1) % call function fixKWarning
|
# if ~(fixKWarning == 1) % call function fixKWarning
|
||||||
|
|
@ -22,9 +23,11 @@ greedyMix <- function(
|
||||||
# end
|
# end
|
||||||
# end
|
# end
|
||||||
|
|
||||||
|
# ASK: ditto
|
||||||
# % check whether partition compare mode is selected
|
# % check whether partition compare mode is selected
|
||||||
# h1 = findobj('Tag','partitioncompare_menu');
|
# h1 = findobj('Tag','partitioncompare_menu');
|
||||||
# partitionCompare = get(h1, 'userdata');
|
# partitionCompare = get(h1, 'userdata');
|
||||||
|
partitionCompare <- FALSE
|
||||||
|
|
||||||
if (is(tietue, "list") | is(tietue, "character")) {
|
if (is(tietue, "list") | is(tietue, "character")) {
|
||||||
# ----------------------------------------------------------------------
|
# ----------------------------------------------------------------------
|
||||||
|
|
@ -149,13 +152,25 @@ greedyMix <- function(
|
||||||
|
|
||||||
kunnossa <- testaaGenePopData(filename_pathname)
|
kunnossa <- testaaGenePopData(filename_pathname)
|
||||||
if (kunnossa == 0) stop("testaaGenePopData returned 0")
|
if (kunnossa == 0) stop("testaaGenePopData returned 0")
|
||||||
# [data,popnames]=lueGenePopData([pathname filename]); # TODO: trans
|
data_popnames <- lueGenePopData(filename_pathname)
|
||||||
|
data <- data_popnames$data
|
||||||
|
popnames <- data_popnames$popnames
|
||||||
|
|
||||||
# h0 = findobj('Tag','filename1_text');
|
# h0 = findobj('Tag','filename1_text');
|
||||||
# set(h0,'String',filename); clear h0;
|
# set(h0,'String',filename); clear h0;
|
||||||
|
|
||||||
# [data, rowsFromInd, alleleCodes, noalle, adjprior, priorTerm] = handleData(data); # TODO:trans
|
list_dranap <- handleData(data)
|
||||||
# [Z,dist] = newGetDistances(data,rowsFromInd); # TODO: trans
|
data <- list_dranap$newData
|
||||||
|
rowsFromInd <- list_dranap$rowsFromInd
|
||||||
|
alleleCodes <- list_dranap$alleleCodes
|
||||||
|
noalle <- list_dranap$noalle
|
||||||
|
adjprior <- list_dranap$adjprior
|
||||||
|
priorTerm <- list_dranap$prioterm
|
||||||
|
|
||||||
|
list_Zd <- newGetDistances(data,rowsFromInd) # FIXME: debug
|
||||||
|
Z <- list_Zd$Z
|
||||||
|
dist <- list_Zd$dist
|
||||||
|
|
||||||
if (is.null(savePreProcessed)) {
|
if (is.null(savePreProcessed)) {
|
||||||
save_preproc <- questdlg(
|
save_preproc <- questdlg(
|
||||||
quest = 'Do you wish to save pre-processed data?',
|
quest = 'Do you wish to save pre-processed data?',
|
||||||
|
|
@ -232,15 +247,17 @@ greedyMix <- function(
|
||||||
}
|
}
|
||||||
|
|
||||||
# ==========================================================================
|
# ==========================================================================
|
||||||
# Declaring global variables
|
# Declaring global variables and changing environment of children functions
|
||||||
# ==========================================================================
|
# ==========================================================================
|
||||||
PARTITION <- vector()
|
PARTITION <- vector()
|
||||||
COUNTS <- vector()
|
COUNTS <- vector()
|
||||||
SUMCOUNTS <- vector()
|
SUMCOUNTS <- vector()
|
||||||
POP_LOGML <- vector()
|
POP_LOGML <- vector()
|
||||||
clearGlobalVars <- vector()
|
clearGlobalVars()
|
||||||
# ==========================================================================
|
|
||||||
|
|
||||||
|
environment(writeMixtureInfo) <- environment()
|
||||||
|
# ==========================================================================
|
||||||
|
c <- list()
|
||||||
c$data <- data
|
c$data <- data
|
||||||
c$noalle <- noalle
|
c$noalle <- noalle
|
||||||
c$adjprior <- adjprior
|
c$adjprior <- adjprior
|
||||||
|
|
@ -253,43 +270,49 @@ greedyMix <- function(
|
||||||
ekat <- t(seq(1, ninds, rowsFromInd) * rowsFromInd)
|
ekat <- t(seq(1, ninds, rowsFromInd) * rowsFromInd)
|
||||||
c$rows <- c(ekat, ekat + rowsFromInd - 1)
|
c$rows <- c(ekat, ekat + rowsFromInd - 1)
|
||||||
|
|
||||||
|
# ASK remove?
|
||||||
# partition compare
|
# partition compare
|
||||||
if (!is.null(partitionCompare)) {
|
# if (!is.null(partitionCompare)) {
|
||||||
nsamplingunits <- size(c$rows, 1)
|
# nsamplingunits <- size(c$rows, 1)
|
||||||
partitions <- partitionCompare$partitions
|
# partitions <- partitionCompare$partitions
|
||||||
npartitions <- size(partitions, 2)
|
# npartitions <- size(partitions, 2)
|
||||||
partitionLogml <- zeros(1, npartitions)
|
# partitionLogml <- zeros(1, npartitions)
|
||||||
for (i in seq_len(npartitions)) {
|
# for (i in seq_len(npartitions)) {
|
||||||
# number of unique partition lables
|
# # number of unique partition lables
|
||||||
npops <- length(unique(partitions[, i]))
|
# npops <- length(unique(partitions[, i]))
|
||||||
|
|
||||||
partitionInd <- zeros(ninds * rowsFromInd, 1)
|
# partitionInd <- zeros(ninds * rowsFromInd, 1)
|
||||||
partitionSample <- partitions[, i]
|
# partitionSample <- partitions[, i]
|
||||||
for (j in seq_len(nsamplingunits)) {
|
# for (j in seq_len(nsamplingunits)) {
|
||||||
partitionInd[c$rows[j, 1]:c$rows[j, 2]] <- partitionSample[j]
|
# partitionInd[c$rows[j, 1]:c$rows[j, 2]] <- partitionSample[j]
|
||||||
}
|
# }
|
||||||
# partitionLogml[i] = initialCounts(
|
# # partitionLogml[i] = initialCounts(
|
||||||
# partitionInd,
|
# # partitionInd,
|
||||||
# data[, seq_len(end - 1)],
|
# # data[, seq_len(end - 1)],
|
||||||
# npops,
|
# # npops,
|
||||||
# c$rows,
|
# # c$rows,
|
||||||
# noalle,
|
# # noalle,
|
||||||
# adjprior
|
# # adjprior
|
||||||
# ) #TODO translate
|
# # ) #TODO translate
|
||||||
}
|
# }
|
||||||
# return the logml result
|
# # return the logml result
|
||||||
partitionCompare$logmls <- partitionLogml
|
# partitionCompare$logmls <- partitionLogml
|
||||||
# set(h1, 'userdata', partitionCompare) # ASK remove?
|
# # set(h1, 'userdata', partitionCompare)
|
||||||
return()
|
# return()
|
||||||
}
|
|
||||||
|
|
||||||
# ASK remove (graphical part)?
|
|
||||||
# if (fixedK) {
|
|
||||||
# #logml_npops_partitionSummary <- indMix_fixK(c) # ASK translate?
|
|
||||||
# } else {
|
|
||||||
# #logml_npops_partitionSummary <- indMix(c) # ASK translate?
|
|
||||||
# }
|
# }
|
||||||
# if (logml_npops_partitionSummary$logml == 1) return()
|
|
||||||
|
if (fixedK) {
|
||||||
|
# logml_npops_partitionSummary <- indMix_fixK(c) # TODO: translate
|
||||||
|
# logml <- logml_npops_partitionSummary$logml
|
||||||
|
# npops <- logml_npops_partitionSummary$npops
|
||||||
|
# partitionSummary <- logml_npops_partitionSummary$partitionSummary
|
||||||
|
} else {
|
||||||
|
logml_npops_partitionSummary <- indMix(c) # TODO: translate
|
||||||
|
logml <- logml_npops_partitionSummary$logml
|
||||||
|
npops <- logml_npops_partitionSummary$npops
|
||||||
|
partitionSummary <- logml_npops_partitionSummary$partitionSummary
|
||||||
|
}
|
||||||
|
if (logml_npops_partitionSummary$logml == 1) return()
|
||||||
|
|
||||||
data <- data[, seq_len(ncol(data) - 1)]
|
data <- data[, seq_len(ncol(data) - 1)]
|
||||||
|
|
||||||
|
|
@ -298,11 +321,13 @@ greedyMix <- function(
|
||||||
# inp = get(h0,'String');
|
# inp = get(h0,'String');
|
||||||
# h0 = findobj('Tag','filename2_text')
|
# h0 = findobj('Tag','filename2_text')
|
||||||
# outp = get(h0,'String');
|
# outp = get(h0,'String');
|
||||||
|
inp <- vector()
|
||||||
|
outp <- vector()
|
||||||
|
|
||||||
changesInLogml <- writeMixtureInfo(
|
changesInLogml <- writeMixtureInfo(
|
||||||
logml, rowsFromInd, data, adjprior, priorTerm, outp, inp,
|
logml, rowsFromInd, data, adjprior, priorTerm, outp, inp,
|
||||||
popnames, fixedK
|
popnames, fixedK
|
||||||
) # FIXMEL depends on get function above
|
) # FIXME: broken
|
||||||
|
|
||||||
# viewMixPartition(PARTITION, popnames) # ASK translate? On graph folder
|
# viewMixPartition(PARTITION, popnames) # ASK translate? On graph folder
|
||||||
|
|
||||||
|
|
@ -356,583 +381,3 @@ greedyMix <- function(
|
||||||
if (file.exists('baps4_output.baps')) file.remove('baps4_output.baps')
|
if (file.exists('baps4_output.baps')) file.remove('baps4_output.baps')
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
# %-------------------------------------------------------------------------------------
|
|
||||||
|
|
||||||
# function [partitionSummary, added] = addToSummary(logml, partitionSummary, worstIndex)
|
|
||||||
# % Tiedet<65><74>n, ett?annettu logml on isompi kuin huonoin arvo
|
|
||||||
# % partitionSummary taulukossa. Jos partitionSummary:ss?ei viel?ole
|
|
||||||
# % annettua logml arvoa, niin lis<69>t<EFBFBD><74>n worstIndex:in kohtaan uusi logml ja
|
|
||||||
# % nykyist?partitiota vastaava nclusters:in arvo. Muutoin ei tehd?mit<69><74>n.
|
|
||||||
|
|
||||||
# apu = find(abs(partitionSummary(:,2)-logml)<1e-5);
|
|
||||||
# if isempty(apu)
|
|
||||||
# % Nyt l<>ydetty partitio ei ole viel?kirjattuna summaryyn.
|
|
||||||
# global PARTITION;
|
|
||||||
# npops = length(unique(PARTITION));
|
|
||||||
# partitionSummary(worstIndex,1) = npops;
|
|
||||||
# partitionSummary(worstIndex,2) = logml;
|
|
||||||
# added = 1;
|
|
||||||
# else
|
|
||||||
# added = 0;
|
|
||||||
# end
|
|
||||||
|
|
||||||
|
|
||||||
# %--------------------------------------------------------------------------
|
|
||||||
|
|
||||||
|
|
||||||
# function [suurin, i2] = arvoSeuraavaTila(muutokset, logml)
|
|
||||||
# % Suorittaa yksil<69>n seuraavan tilan arvonnan
|
|
||||||
|
|
||||||
# y = logml + muutokset; % siirron j<>lkeiset logml:t
|
|
||||||
# y = y - max(y);
|
|
||||||
# y = exp(y);
|
|
||||||
# summa = sum(y);
|
|
||||||
# y = y/summa;
|
|
||||||
# y = cumsum(y);
|
|
||||||
|
|
||||||
# i2 = rand_disc(y); % uusi kori
|
|
||||||
# suurin = muutokset(i2);
|
|
||||||
|
|
||||||
|
|
||||||
# %--------------------------------------------------------------------------------------
|
|
||||||
|
|
||||||
|
|
||||||
# function svar=rand_disc(CDF)
|
|
||||||
# %returns an index of a value from a discrete distribution using inversion method
|
|
||||||
# slump=rand;
|
|
||||||
# har=find(CDF>slump);
|
|
||||||
# svar=har(1);
|
|
||||||
|
|
||||||
|
|
||||||
# %-------------------------------------------------------------------------------------
|
|
||||||
|
|
||||||
|
|
||||||
# function updateGlobalVariables(ind, i2, rowsFromInd, diffInCounts, ...
|
|
||||||
# adjprior, priorTerm)
|
|
||||||
# % Suorittaa globaalien muuttujien muutokset, kun yksil?ind
|
|
||||||
# % on siirret<65><74>n koriin i2.
|
|
||||||
|
|
||||||
# global PARTITION;
|
|
||||||
# global COUNTS;
|
|
||||||
# global SUMCOUNTS;
|
|
||||||
# global POP_LOGML;
|
|
||||||
|
|
||||||
# i1 = PARTITION(ind);
|
|
||||||
# PARTITION(ind)=i2;
|
|
||||||
|
|
||||||
# COUNTS(:,:,i1) = COUNTS(:,:,i1) - diffInCounts;
|
|
||||||
# COUNTS(:,:,i2) = COUNTS(:,:,i2) + diffInCounts;
|
|
||||||
# SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:) - sum(diffInCounts);
|
|
||||||
# SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:) + sum(diffInCounts);
|
|
||||||
|
|
||||||
# POP_LOGML([i1 i2]) = computePopulationLogml([i1 i2], adjprior, priorTerm);
|
|
||||||
|
|
||||||
|
|
||||||
# %---------------------------------------------------------------------------------
|
|
||||||
|
|
||||||
|
|
||||||
# function updateGlobalVariables2( ...
|
|
||||||
# i1, i2, rowsFromInd, diffInCounts, adjprior, priorTerm);
|
|
||||||
# % Suorittaa globaalien muuttujien muutokset, kun kaikki
|
|
||||||
# % korissa i1 olevat yksil<69>t siirret<65><74>n koriin i2.
|
|
||||||
|
|
||||||
# global PARTITION;
|
|
||||||
# global COUNTS;
|
|
||||||
# global SUMCOUNTS;
|
|
||||||
# global POP_LOGML;
|
|
||||||
|
|
||||||
# inds = find(PARTITION==i1);
|
|
||||||
# PARTITION(inds) = i2;
|
|
||||||
|
|
||||||
# COUNTS(:,:,i1) = COUNTS(:,:,i1) - diffInCounts;
|
|
||||||
# COUNTS(:,:,i2) = COUNTS(:,:,i2) + diffInCounts;
|
|
||||||
# SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:) - sum(diffInCounts);
|
|
||||||
# SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:) + sum(diffInCounts);
|
|
||||||
|
|
||||||
# POP_LOGML(i1) = 0;
|
|
||||||
# POP_LOGML(i2) = computePopulationLogml(i2, adjprior, priorTerm);
|
|
||||||
|
|
||||||
|
|
||||||
# %------------------------------------------------------------------------------------
|
|
||||||
|
|
||||||
|
|
||||||
# function updateGlobalVariables3(muuttuvat, rowsFromInd, diffInCounts, ...
|
|
||||||
# adjprior, priorTerm, i2);
|
|
||||||
# % Suorittaa globaalien muuttujien p<>ivitykset, kun yksil<69>t 'muuttuvat'
|
|
||||||
# % siirret<65><74>n koriin i2. Ennen siirtoa yksil<69>iden on kuuluttava samaan
|
|
||||||
# % koriin.
|
|
||||||
|
|
||||||
# global PARTITION;
|
|
||||||
# global COUNTS;
|
|
||||||
# global SUMCOUNTS;
|
|
||||||
# global POP_LOGML;
|
|
||||||
|
|
||||||
# i1 = PARTITION(muuttuvat(1));
|
|
||||||
# PARTITION(muuttuvat) = i2;
|
|
||||||
|
|
||||||
# COUNTS(:,:,i1) = COUNTS(:,:,i1) - diffInCounts;
|
|
||||||
# COUNTS(:,:,i2) = COUNTS(:,:,i2) + diffInCounts;
|
|
||||||
# SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:) - sum(diffInCounts);
|
|
||||||
# SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:) + sum(diffInCounts);
|
|
||||||
|
|
||||||
# POP_LOGML([i1 i2]) = computePopulationLogml([i1 i2], adjprior, priorTerm);
|
|
||||||
|
|
||||||
|
|
||||||
# %----------------------------------------------------------------------
|
|
||||||
|
|
||||||
|
|
||||||
# function inds = returnInOrder(inds, pop, rowsFromInd, data, adjprior, priorTerm)
|
|
||||||
# % Palauttaa yksil<69>t j<>rjestyksess?siten, ett?ensimm<6D>isen?on
|
|
||||||
# % se, jonka poistaminen populaatiosta pop nostaisi logml:n
|
|
||||||
# % arvoa eniten.
|
|
||||||
|
|
||||||
# global COUNTS; global SUMCOUNTS;
|
|
||||||
# ninds = length(inds);
|
|
||||||
# apuTaulu = [inds, zeros(ninds,1)];
|
|
||||||
|
|
||||||
# for i=1:ninds
|
|
||||||
# ind = inds(i);
|
|
||||||
# rows = (ind-1)*rowsFromInd+1 : ind*rowsFromInd;
|
|
||||||
# diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data);
|
|
||||||
# diffInSumCounts = sum(diffInCounts);
|
|
||||||
|
|
||||||
# COUNTS(:,:,pop) = COUNTS(:,:,pop)-diffInCounts;
|
|
||||||
# SUMCOUNTS(pop,:) = SUMCOUNTS(pop,:)-diffInSumCounts;
|
|
||||||
# apuTaulu(i, 2) = computePopulationLogml(pop, adjprior, priorTerm);
|
|
||||||
# COUNTS(:,:,pop) = COUNTS(:,:,pop)+diffInCounts;
|
|
||||||
# SUMCOUNTS(pop,:) = SUMCOUNTS(pop,:)+diffInSumCounts;
|
|
||||||
# end
|
|
||||||
# apuTaulu = sortrows(apuTaulu,2);
|
|
||||||
# inds = apuTaulu(ninds:-1:1,1);
|
|
||||||
|
|
||||||
# %------------------------------------------------------------------------------------
|
|
||||||
|
|
||||||
|
|
||||||
# function [muutokset, diffInCounts] = ...
|
|
||||||
# laskeMuutokset(ind, rowsFromInd, data, adjprior, priorTerm)
|
|
||||||
# % Palauttaa npops*1 taulun, jossa i:s alkio kertoo, mik?olisi
|
|
||||||
# % muutos logml:ss? mik<69>li yksil?ind siirret<65><74>n koriin i.
|
|
||||||
# % diffInCounts on poistettava COUNTS:in siivusta i1 ja lis<69>tt<74>v?
|
|
||||||
# % COUNTS:in siivuun i2, mik<69>li muutos toteutetaan.
|
|
||||||
|
|
||||||
# global COUNTS; global SUMCOUNTS;
|
|
||||||
# global PARTITION; global POP_LOGML;
|
|
||||||
# npops = size(COUNTS,3);
|
|
||||||
# muutokset = zeros(npops,1);
|
|
||||||
|
|
||||||
# i1 = PARTITION(ind);
|
|
||||||
# i1_logml = POP_LOGML(i1);
|
|
||||||
|
|
||||||
# rows = (ind-1)*rowsFromInd+1 : ind*rowsFromInd;
|
|
||||||
# diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data);
|
|
||||||
# diffInSumCounts = sum(diffInCounts);
|
|
||||||
|
|
||||||
# COUNTS(:,:,i1) = COUNTS(:,:,i1)-diffInCounts;
|
|
||||||
# SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)-diffInSumCounts;
|
|
||||||
# new_i1_logml = computePopulationLogml(i1, adjprior, priorTerm);
|
|
||||||
# COUNTS(:,:,i1) = COUNTS(:,:,i1)+diffInCounts;
|
|
||||||
# SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)+diffInSumCounts;
|
|
||||||
|
|
||||||
# i2 = [1:i1-1 , i1+1:npops];
|
|
||||||
# i2_logml = POP_LOGML(i2);
|
|
||||||
|
|
||||||
# COUNTS(:,:,i2) = COUNTS(:,:,i2)+repmat(diffInCounts, [1 1 npops-1]);
|
|
||||||
# SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)+repmat(diffInSumCounts,[npops-1 1]);
|
|
||||||
# new_i2_logml = computePopulationLogml(i2, adjprior, priorTerm);
|
|
||||||
# COUNTS(:,:,i2) = COUNTS(:,:,i2)-repmat(diffInCounts, [1 1 npops-1]);
|
|
||||||
# SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)-repmat(diffInSumCounts,[npops-1 1]);
|
|
||||||
|
|
||||||
# muutokset(i2) = new_i1_logml - i1_logml ...
|
|
||||||
# + new_i2_logml - i2_logml;
|
|
||||||
|
|
||||||
|
|
||||||
# %------------------------------------------------------------------------------------
|
|
||||||
|
|
||||||
|
|
||||||
# function [muutokset, diffInCounts] = laskeMuutokset2( ...
|
|
||||||
# i1, rowsFromInd, data, adjprior, priorTerm);
|
|
||||||
# % Palauttaa npops*1 taulun, jossa i:s alkio kertoo, mik?olisi
|
|
||||||
# % muutos logml:ss? mik<69>li korin i1 kaikki yksil<69>t siirret<65><74>n
|
|
||||||
# % koriin i.
|
|
||||||
|
|
||||||
# global COUNTS; global SUMCOUNTS;
|
|
||||||
# global PARTITION; global POP_LOGML;
|
|
||||||
# npops = size(COUNTS,3);
|
|
||||||
# muutokset = zeros(npops,1);
|
|
||||||
|
|
||||||
# i1_logml = POP_LOGML(i1);
|
|
||||||
|
|
||||||
# inds = find(PARTITION==i1);
|
|
||||||
# ninds = length(inds);
|
|
||||||
|
|
||||||
# if ninds==0
|
|
||||||
# diffInCounts = zeros(size(COUNTS,1), size(COUNTS,2));
|
|
||||||
# return;
|
|
||||||
# end
|
|
||||||
|
|
||||||
# rows = computeRows(rowsFromInd, inds, ninds);
|
|
||||||
|
|
||||||
# diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data);
|
|
||||||
# diffInSumCounts = sum(diffInCounts);
|
|
||||||
|
|
||||||
# COUNTS(:,:,i1) = COUNTS(:,:,i1)-diffInCounts;
|
|
||||||
# SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)-diffInSumCounts;
|
|
||||||
# new_i1_logml = computePopulationLogml(i1, adjprior, priorTerm);
|
|
||||||
# COUNTS(:,:,i1) = COUNTS(:,:,i1)+diffInCounts;
|
|
||||||
# SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)+diffInSumCounts;
|
|
||||||
|
|
||||||
# i2 = [1:i1-1 , i1+1:npops];
|
|
||||||
# i2_logml = POP_LOGML(i2);
|
|
||||||
|
|
||||||
# COUNTS(:,:,i2) = COUNTS(:,:,i2)+repmat(diffInCounts, [1 1 npops-1]);
|
|
||||||
# SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)+repmat(diffInSumCounts,[npops-1 1]);
|
|
||||||
# new_i2_logml = computePopulationLogml(i2, adjprior, priorTerm);
|
|
||||||
# COUNTS(:,:,i2) = COUNTS(:,:,i2)-repmat(diffInCounts, [1 1 npops-1]);
|
|
||||||
# SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)-repmat(diffInSumCounts,[npops-1 1]);
|
|
||||||
|
|
||||||
# muutokset(i2) = new_i1_logml - i1_logml ...
|
|
||||||
# + new_i2_logml - i2_logml;
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# %------------------------------------------------------------------------------------
|
|
||||||
|
|
||||||
|
|
||||||
# function muutokset = laskeMuutokset3(T2, inds2, rowsFromInd, ...
|
|
||||||
# data, adjprior, priorTerm, i1)
|
|
||||||
# % Palauttaa length(unique(T2))*npops taulun, jossa (i,j):s alkio
|
|
||||||
# % kertoo, mik?olisi muutos logml:ss? jos populaation i1 osapopulaatio
|
|
||||||
# % inds2(find(T2==i)) siirret<65><74>n koriin j.
|
|
||||||
|
|
||||||
# global COUNTS; global SUMCOUNTS;
|
|
||||||
# global PARTITION; global POP_LOGML;
|
|
||||||
# npops = size(COUNTS,3);
|
|
||||||
# npops2 = length(unique(T2));
|
|
||||||
# muutokset = zeros(npops2, npops);
|
|
||||||
|
|
||||||
# i1_logml = POP_LOGML(i1);
|
|
||||||
|
|
||||||
# for pop2 = 1:npops2
|
|
||||||
# inds = inds2(find(T2==pop2));
|
|
||||||
# ninds = length(inds);
|
|
||||||
# if ninds>0
|
|
||||||
# rows = computeRows(rowsFromInd, inds, ninds);
|
|
||||||
# diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data);
|
|
||||||
# diffInSumCounts = sum(diffInCounts);
|
|
||||||
|
|
||||||
# COUNTS(:,:,i1) = COUNTS(:,:,i1)-diffInCounts;
|
|
||||||
# SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)-diffInSumCounts;
|
|
||||||
# new_i1_logml = computePopulationLogml(i1, adjprior, priorTerm);
|
|
||||||
# COUNTS(:,:,i1) = COUNTS(:,:,i1)+diffInCounts;
|
|
||||||
# SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)+diffInSumCounts;
|
|
||||||
|
|
||||||
# i2 = [1:i1-1 , i1+1:npops];
|
|
||||||
# i2_logml = POP_LOGML(i2)';
|
|
||||||
|
|
||||||
# COUNTS(:,:,i2) = COUNTS(:,:,i2)+repmat(diffInCounts, [1 1 npops-1]);
|
|
||||||
# SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)+repmat(diffInSumCounts,[npops-1 1]);
|
|
||||||
# new_i2_logml = computePopulationLogml(i2, adjprior, priorTerm)';
|
|
||||||
# COUNTS(:,:,i2) = COUNTS(:,:,i2)-repmat(diffInCounts, [1 1 npops-1]);
|
|
||||||
# SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)-repmat(diffInSumCounts,[npops-1 1]);
|
|
||||||
|
|
||||||
# muutokset(pop2,i2) = new_i1_logml - i1_logml ...
|
|
||||||
# + new_i2_logml - i2_logml;
|
|
||||||
# end
|
|
||||||
# end
|
|
||||||
|
|
||||||
|
|
||||||
# %------------------------------------------------------------------------------------
|
|
||||||
|
|
||||||
# function muutokset = laskeMuutokset5(inds, rowsFromInd, data, adjprior, ...
|
|
||||||
# priorTerm, i1, i2)
|
|
||||||
|
|
||||||
# % Palauttaa length(inds)*1 taulun, jossa i:s alkio kertoo, mik?olisi
|
|
||||||
# % muutos logml:ss? mik<69>li yksil?i vaihtaisi koria i1:n ja i2:n v<>lill?
|
|
||||||
|
|
||||||
# global COUNTS; global SUMCOUNTS;
|
|
||||||
# global PARTITION; global POP_LOGML;
|
|
||||||
|
|
||||||
# ninds = length(inds);
|
|
||||||
# muutokset = zeros(ninds,1);
|
|
||||||
|
|
||||||
# i1_logml = POP_LOGML(i1);
|
|
||||||
# i2_logml = POP_LOGML(i2);
|
|
||||||
|
|
||||||
# for i = 1:ninds
|
|
||||||
# ind = inds(i);
|
|
||||||
# if PARTITION(ind)==i1
|
|
||||||
# pop1 = i1; %mist?
|
|
||||||
# pop2 = i2; %mihin
|
|
||||||
# else
|
|
||||||
# pop1 = i2;
|
|
||||||
# pop2 = i1;
|
|
||||||
# end
|
|
||||||
# rows = (ind-1)*rowsFromInd+1 : ind*rowsFromInd;
|
|
||||||
# diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data);
|
|
||||||
# diffInSumCounts = sum(diffInCounts);
|
|
||||||
|
|
||||||
# COUNTS(:,:,pop1) = COUNTS(:,:,pop1)-diffInCounts;
|
|
||||||
# SUMCOUNTS(pop1,:) = SUMCOUNTS(pop1,:)-diffInSumCounts;
|
|
||||||
# COUNTS(:,:,pop2) = COUNTS(:,:,pop2)+diffInCounts;
|
|
||||||
# SUMCOUNTS(pop2,:) = SUMCOUNTS(pop2,:)+diffInSumCounts;
|
|
||||||
|
|
||||||
# PARTITION(ind) = pop2;
|
|
||||||
|
|
||||||
# new_logmls = computePopulationLogml([i1 i2], adjprior, priorTerm);
|
|
||||||
|
|
||||||
# muutokset(i) = sum(new_logmls);
|
|
||||||
|
|
||||||
# COUNTS(:,:,pop1) = COUNTS(:,:,pop1)+diffInCounts;
|
|
||||||
# SUMCOUNTS(pop1,:) = SUMCOUNTS(pop1,:)+diffInSumCounts;
|
|
||||||
# COUNTS(:,:,pop2) = COUNTS(:,:,pop2)-diffInCounts;
|
|
||||||
# SUMCOUNTS(pop2,:) = SUMCOUNTS(pop2,:)-diffInSumCounts;
|
|
||||||
|
|
||||||
# PARTITION(ind) = pop1;
|
|
||||||
# end
|
|
||||||
|
|
||||||
# muutokset = muutokset - i1_logml - i2_logml;
|
|
||||||
|
|
||||||
# %--------------------------------------------------------------------------
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# function diffInCounts = computeDiffInCounts(rows, max_noalle, nloci, data)
|
|
||||||
# % Muodostaa max_noalle*nloci taulukon, jossa on niiden alleelien
|
|
||||||
# % lukum<75><6D>r<EFBFBD>t (vastaavasti kuin COUNTS:issa), jotka ovat data:n
|
|
||||||
# % riveill?rows.
|
|
||||||
|
|
||||||
# diffInCounts = zeros(max_noalle, nloci);
|
|
||||||
# for i=rows
|
|
||||||
# row = data(i,:);
|
|
||||||
# notEmpty = find(row>=0);
|
|
||||||
|
|
||||||
# if length(notEmpty)>0
|
|
||||||
# diffInCounts(row(notEmpty) + (notEmpty-1)*max_noalle) = ...
|
|
||||||
# diffInCounts(row(notEmpty) + (notEmpty-1)*max_noalle) + 1;
|
|
||||||
# end
|
|
||||||
# end
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# %------------------------------------------------------------------------------------
|
|
||||||
|
|
||||||
|
|
||||||
# function popLogml = computePopulationLogml(pops, adjprior, priorTerm)
|
|
||||||
# % Palauttaa length(pops)*1 taulukon, jossa on laskettu korikohtaiset
|
|
||||||
# % logml:t koreille, jotka on m<><6D>ritelty pops-muuttujalla.
|
|
||||||
|
|
||||||
# global COUNTS;
|
|
||||||
# global SUMCOUNTS;
|
|
||||||
# x = size(COUNTS,1);
|
|
||||||
# y = size(COUNTS,2);
|
|
||||||
# z = length(pops);
|
|
||||||
|
|
||||||
# popLogml = ...
|
|
||||||
# squeeze(sum(sum(reshape(...
|
|
||||||
# gammaln(repmat(adjprior,[1 1 length(pops)]) + COUNTS(:,:,pops)) ...
|
|
||||||
# ,[x y z]),1),2)) - sum(gammaln(1+SUMCOUNTS(pops,:)),2) - priorTerm;
|
|
||||||
|
|
||||||
|
|
||||||
# %-----------------------------------------------------------------------------------
|
|
||||||
|
|
||||||
|
|
||||||
# function npops = poistaTyhjatPopulaatiot(npops)
|
|
||||||
# % Poistaa tyhjentyneet populaatiot COUNTS:ista ja
|
|
||||||
# % SUMCOUNTS:ista. P<>ivitt<74><74> npops:in ja PARTITION:in.
|
|
||||||
|
|
||||||
# global COUNTS;
|
|
||||||
# global SUMCOUNTS;
|
|
||||||
# global PARTITION;
|
|
||||||
|
|
||||||
# notEmpty = find(any(SUMCOUNTS,2));
|
|
||||||
# COUNTS = COUNTS(:,:,notEmpty);
|
|
||||||
# SUMCOUNTS = SUMCOUNTS(notEmpty,:);
|
|
||||||
|
|
||||||
# for n=1:length(notEmpty)
|
|
||||||
# apu = find(PARTITION==notEmpty(n));
|
|
||||||
# PARTITION(apu)=n;
|
|
||||||
# end
|
|
||||||
# npops = length(notEmpty);
|
|
||||||
|
|
||||||
|
|
||||||
# %----------------------------------------------------------------------------------
|
|
||||||
# %Seuraavat kolme funktiota liittyvat alkupartition muodostamiseen.
|
|
||||||
|
|
||||||
# function initial_partition=admixture_initialization(data_matrix,nclusters,Z)
|
|
||||||
# size_data=size(data_matrix);
|
|
||||||
# nloci=size_data(2)-1;
|
|
||||||
# n=max(data_matrix(:,end));
|
|
||||||
# T=cluster_own(Z,nclusters);
|
|
||||||
# initial_partition=zeros(size_data(1),1);
|
|
||||||
# for i=1:n
|
|
||||||
# kori=T(i);
|
|
||||||
# here=find(data_matrix(:,end)==i);
|
|
||||||
# for j=1:length(here)
|
|
||||||
# initial_partition(here(j),1)=kori;
|
|
||||||
# end
|
|
||||||
# end
|
|
||||||
|
|
||||||
# function T = cluster_own(Z,nclust)
|
|
||||||
# true=logical(1);
|
|
||||||
# false=logical(0);
|
|
||||||
# maxclust = nclust;
|
|
||||||
# % Start of algorithm
|
|
||||||
# m = size(Z,1)+1;
|
|
||||||
# T = zeros(m,1);
|
|
||||||
# % maximum number of clusters based on inconsistency
|
|
||||||
# if m <= maxclust
|
|
||||||
# T = (1:m)';
|
|
||||||
# elseif maxclust==1
|
|
||||||
# T = ones(m,1);
|
|
||||||
# else
|
|
||||||
# clsnum = 1;
|
|
||||||
# for k = (m-maxclust+1):(m-1)
|
|
||||||
# i = Z(k,1); % left tree
|
|
||||||
# if i <= m % original node, no leafs
|
|
||||||
# T(i) = clsnum;
|
|
||||||
# clsnum = clsnum + 1;
|
|
||||||
# elseif i < (2*m-maxclust+1) % created before cutoff, search down the tree
|
|
||||||
# T = clusternum(Z, T, i-m, clsnum);
|
|
||||||
# clsnum = clsnum + 1;
|
|
||||||
# end
|
|
||||||
# i = Z(k,2); % right tree
|
|
||||||
# if i <= m % original node, no leafs
|
|
||||||
# T(i) = clsnum;
|
|
||||||
# clsnum = clsnum + 1;
|
|
||||||
# elseif i < (2*m-maxclust+1) % created before cutoff, search down the tree
|
|
||||||
# T = clusternum(Z, T, i-m, clsnum);
|
|
||||||
# clsnum = clsnum + 1;
|
|
||||||
# end
|
|
||||||
# end
|
|
||||||
# end
|
|
||||||
|
|
||||||
# function T = clusternum(X, T, k, c)
|
|
||||||
# m = size(X,1)+1;
|
|
||||||
# while(~isempty(k))
|
|
||||||
# % Get the children of nodes at this level
|
|
||||||
# children = X(k,1:2);
|
|
||||||
# children = children(:);
|
|
||||||
|
|
||||||
# % Assign this node number to leaf children
|
|
||||||
# t = (children<=m);
|
|
||||||
# T(children(t)) = c;
|
|
||||||
|
|
||||||
# % Move to next level
|
|
||||||
# k = children(~t) - m;
|
|
||||||
# end
|
|
||||||
|
|
||||||
# %----------------------------------------------------------------------------------------
|
|
||||||
|
|
||||||
|
|
||||||
# function [Z, distances]=getDistances(data_matrix,nclusters)
|
|
||||||
|
|
||||||
# %finds initial admixture clustering solution with nclusters clusters, uses simple mean Hamming distance
|
|
||||||
# %gives partition in 8-bit format
|
|
||||||
# %allocates all alleles of a single individual into the same basket
|
|
||||||
# %data_matrix contains #Loci+1 columns, last column indicate whose alleles are placed in each row,
|
|
||||||
# %i.e. ranges from 1 to #individuals. For diploids there are 2 rows per individual, for haploids only a single row
|
|
||||||
# %missing values are indicated by zeros in the partition and by negative integers in the data_matrix.
|
|
||||||
|
|
||||||
# size_data=size(data_matrix);
|
|
||||||
# nloci=size_data(2)-1;
|
|
||||||
# n=max(data_matrix(:,end));
|
|
||||||
# distances=zeros(nchoosek(n,2),1);
|
|
||||||
# pointer=1;
|
|
||||||
# for i=1:n-1
|
|
||||||
# i_data=data_matrix(find(data_matrix(:,end)==i),1:nloci);
|
|
||||||
# for j=i+1:n
|
|
||||||
# d_ij=0;
|
|
||||||
# j_data=data_matrix(find(data_matrix(:,end)==j),1:nloci);
|
|
||||||
# vertailuja = 0;
|
|
||||||
# for k=1:size(i_data,1)
|
|
||||||
# for l=1:size(j_data,1)
|
|
||||||
# here_i=find(i_data(k,:)>=0);
|
|
||||||
# here_j=find(j_data(l,:)>=0);
|
|
||||||
# here_joint=intersect(here_i,here_j);
|
|
||||||
# vertailuja = vertailuja + length(here_joint);
|
|
||||||
# d_ij = d_ij + length(find(i_data(k,here_joint)~=j_data(l,here_joint)));
|
|
||||||
# end
|
|
||||||
# end
|
|
||||||
# d_ij = d_ij / vertailuja;
|
|
||||||
# distances(pointer)=d_ij;
|
|
||||||
# pointer=pointer+1;
|
|
||||||
# end
|
|
||||||
# end
|
|
||||||
|
|
||||||
# Z=linkage(distances');
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# %----------------------------------------------------------------------------------------
|
|
||||||
|
|
||||||
# function logml=computeLogml(counts, sumcounts, noalle, data, rowsFromInd)
|
|
||||||
# nloci = size(counts,2);
|
|
||||||
# npops = size(counts,3);
|
|
||||||
# adjnoalle = zeros(max(noalle),nloci);
|
|
||||||
# for j=1:nloci
|
|
||||||
# adjnoalle(1:noalle(j),j)=noalle(j);
|
|
||||||
# if (noalle(j)<max(noalle))
|
|
||||||
# adjnoalle(noalle(j)+1:end,j)=1;
|
|
||||||
# end
|
|
||||||
# end
|
|
||||||
|
|
||||||
# %logml2 = sum(sum(sum(gammaln(counts+repmat(adjprior,[1 1 npops]))))) ...
|
|
||||||
# % - npops*sum(sum(gammaln(adjprior))) - ...
|
|
||||||
# % sum(sum(gammaln(1+sumcounts)));
|
|
||||||
# %logml = logml2;
|
|
||||||
|
|
||||||
# global GAMMA_LN;
|
|
||||||
# rowsInG = size(data,1)+rowsFromInd;
|
|
||||||
|
|
||||||
# logml = sum(sum(sum(GAMMA_LN(counts+1 + repmat(rowsInG*(adjnoalle-1),[1 1 npops]))))) ...
|
|
||||||
# - npops*sum(sum(GAMMA_LN(1, adjnoalle))) ...
|
|
||||||
# -sum(sum(GAMMA_LN(sumcounts+1,1)));
|
|
||||||
|
|
||||||
|
|
||||||
# %--------------------------------------------------------------------------
|
|
||||||
|
|
||||||
|
|
||||||
# function initializeGammaln(ninds, rowsFromInd, maxAlleles)
|
|
||||||
# %Alustaa GAMMALN muuttujan s.e. GAMMALN(i,j)=gammaln((i-1) + 1/j)
|
|
||||||
# global GAMMA_LN;
|
|
||||||
# GAMMA_LN = zeros((1+ninds)*rowsFromInd, maxAlleles);
|
|
||||||
# for i=1:(ninds+1)*rowsFromInd
|
|
||||||
# for j=1:maxAlleles
|
|
||||||
# GAMMA_LN(i,j)=gammaln((i-1) + 1/j);
|
|
||||||
# end
|
|
||||||
# end
|
|
||||||
|
|
||||||
# %---------------------------------------------------------------
|
|
||||||
|
|
||||||
# function dist2 = laskeOsaDist(inds2, dist, ninds)
|
|
||||||
# % Muodostaa dist vektorista osavektorin, joka sis<69>lt<6C><74> yksil<69>iden inds2
|
|
||||||
# % v<>liset et<65>isyydet. ninds=kaikkien yksil<69>iden lukum<75><6D>r?
|
|
||||||
|
|
||||||
# ninds2 = length(inds2);
|
|
||||||
# apu = zeros(nchoosek(ninds2,2),2);
|
|
||||||
# rivi = 1;
|
|
||||||
# for i=1:ninds2-1
|
|
||||||
# for j=i+1:ninds2
|
|
||||||
# apu(rivi, 1) = inds2(i);
|
|
||||||
# apu(rivi, 2) = inds2(j);
|
|
||||||
# rivi = rivi+1;
|
|
||||||
# end
|
|
||||||
# end
|
|
||||||
# apu = (apu(:,1)-1).*ninds - apu(:,1) ./ 2 .* (apu(:,1)-1) + (apu(:,2)-apu(:,1));
|
|
||||||
# dist2 = dist(apu);
|
|
||||||
|
|
||||||
# %--------------------------------------------------------------------------
|
|
||||||
|
|
||||||
# function [emptyPop, pops] = findEmptyPop(npops)
|
|
||||||
# % Palauttaa ensimm<6D>isen tyhj<68>n populaation indeksin. Jos tyhji?
|
|
||||||
# % populaatioita ei ole, palauttaa -1:n.
|
|
||||||
|
|
||||||
# global PARTITION;
|
|
||||||
# pops = unique(PARTITION)';
|
|
||||||
# if (length(pops) ==npops)
|
|
||||||
# emptyPop = -1;
|
|
||||||
# else
|
|
||||||
# popDiff = diff([0 pops npops+1]);
|
|
||||||
# emptyPop = min(find(popDiff > 1));
|
|
||||||
# end
|
|
||||||
|
|
@ -20,12 +20,11 @@ handleData <- function(raw_data) {
|
||||||
# koodi pienimm?ksi koodiksi, joka isompi kuin mik??n k?yt?ss?oleva koodi.
|
# koodi pienimm?ksi koodiksi, joka isompi kuin mik??n k?yt?ss?oleva koodi.
|
||||||
# T?m?n j?lkeen funktio muuttaa alleelikoodit siten, ett?yhden lokuksen j
|
# T?m?n j?lkeen funktio muuttaa alleelikoodit siten, ett?yhden lokuksen j
|
||||||
# koodit saavat arvoja v?lill?1,...,noalle(j).
|
# koodit saavat arvoja v?lill?1,...,noalle(j).
|
||||||
|
|
||||||
data <- raw_data
|
data <- raw_data
|
||||||
nloci <- size(raw_data, 2) - 1
|
nloci <- size(raw_data, 2) - 1
|
||||||
|
|
||||||
dataApu <- data[, 1:nloci]
|
dataApu <- data[, 1:nloci]
|
||||||
nollat <- find(dataApu==0)
|
nollat <- find(dataApu == 0)
|
||||||
if (!isempty(nollat)) {
|
if (!isempty(nollat)) {
|
||||||
isoinAlleeli <- max(max(dataApu))
|
isoinAlleeli <- max(max(dataApu))
|
||||||
dataApu[nollat] <- isoinAlleeli + 1
|
dataApu[nollat] <- isoinAlleeli + 1
|
||||||
|
|
@ -39,9 +38,12 @@ handleData <- function(raw_data) {
|
||||||
alleelitLokuksessa <- cell(nloci, 1)
|
alleelitLokuksessa <- cell(nloci, 1)
|
||||||
for (i in 1:nloci) {
|
for (i in 1:nloci) {
|
||||||
alleelitLokuksessaI <- unique(data[, i])
|
alleelitLokuksessaI <- unique(data[, i])
|
||||||
alleelitLokuksessa[i, 1] <- alleelitLokuksessaI[
|
alleelitLokuksessaI_pos <- find(alleelitLokuksessaI >= 0)
|
||||||
find(alleelitLokuksessaI >= 0)
|
alleelitLokuksessa[i, 1] <- ifelse(
|
||||||
]
|
test = length(alleelitLokuksessaI_pos) > 0,
|
||||||
|
yes = alleelitLokuksessaI[alleelitLokuksessaI_pos],
|
||||||
|
no = 0
|
||||||
|
)
|
||||||
noalle[i] <- length(alleelitLokuksessa[i, 1])
|
noalle[i] <- length(alleelitLokuksessa[i, 1])
|
||||||
}
|
}
|
||||||
alleleCodes <- zeros(max(noalle), nloci)
|
alleleCodes <- zeros(max(noalle), nloci)
|
||||||
|
|
@ -65,10 +67,10 @@ handleData <- function(raw_data) {
|
||||||
emptyRow <- repmat(a, c(1, ncols))
|
emptyRow <- repmat(a, c(1, ncols))
|
||||||
lessThanMax <- find(rowsFromInd < maxRowsFromInd)
|
lessThanMax <- find(rowsFromInd < maxRowsFromInd)
|
||||||
missingRows <- maxRowsFromInd * nind - nrows
|
missingRows <- maxRowsFromInd * nind - nrows
|
||||||
data <- as.matrix(c(data, zeros(missingRows, ncols)))
|
data <- rbind(data, zeros(missingRows, ncols))
|
||||||
pointer <- 1
|
pointer <- 1
|
||||||
for (ind in t(lessThanMax)) { #K?y l?pi ne yksil?t, joilta puuttuu rivej?
|
for (ind in t(lessThanMax)) { #K?y l?pi ne yksil?t, joilta puuttuu rivej?
|
||||||
miss = maxRowsFromInd-rowsFromInd(ind); # T?lt?yksil?lt?puuttuvien lkm.
|
miss <- maxRowsFromInd - rowsFromInd(ind) # T?lt?yksil?lt?puuttuvien lkm.
|
||||||
}
|
}
|
||||||
data <- sortrows(data, ncols) # Sorttaa yksil?iden mukaisesti
|
data <- sortrows(data, ncols) # Sorttaa yksil?iden mukaisesti
|
||||||
newData <- data
|
newData <- data
|
||||||
|
|
@ -84,12 +86,12 @@ handleData <- function(raw_data) {
|
||||||
priorTerm <- priorTerm + noalle[j] * lgamma(1 / noalle[j])
|
priorTerm <- priorTerm + noalle[j] * lgamma(1 / noalle[j])
|
||||||
}
|
}
|
||||||
out <- list(
|
out <- list(
|
||||||
newData = newData,
|
newData = newData,
|
||||||
rowsFromInd = rowsFromInd,
|
rowsFromInd = rowsFromInd,
|
||||||
alleleCodes = alleleCodes,
|
alleleCodes = alleleCodes,
|
||||||
noalle = noalle,
|
noalle = noalle,
|
||||||
adjprior = adjprior,
|
adjprior = adjprior,
|
||||||
priorTerm = priorTerm
|
priorTerm = priorTerm
|
||||||
)
|
)
|
||||||
return(out)
|
return(out)
|
||||||
}
|
}
|
||||||
567
R/indMix.R
Normal file
567
R/indMix.R
Normal file
|
|
@ -0,0 +1,567 @@
|
||||||
|
indMix <- function(c, npops, dispText) {
|
||||||
|
# Greedy search algorithm with unknown number of classes for regular
|
||||||
|
# clustering.
|
||||||
|
# Input npops is not used if called by greedyMix or greedyPopMix.
|
||||||
|
|
||||||
|
logml <- 1
|
||||||
|
clearGlobalVars()
|
||||||
|
|
||||||
|
noalle <- c$noalle
|
||||||
|
rows <- c$rows
|
||||||
|
data <- c$data
|
||||||
|
|
||||||
|
adjprior <- c$adjprior
|
||||||
|
priorTerm <- c$priorTerm
|
||||||
|
rowsFromInd <- c$rowsFromInd
|
||||||
|
|
||||||
|
if (isfield(c, 'dist')) {
|
||||||
|
dist <- c$dist
|
||||||
|
Z <- c$Z
|
||||||
|
}
|
||||||
|
|
||||||
|
rm(c)
|
||||||
|
nargin <- length(as.list(match.call())) - 1
|
||||||
|
|
||||||
|
if (nargin < 2) {
|
||||||
|
dispText <- 1
|
||||||
|
npopstext <- matrix()
|
||||||
|
ready <- FALSE
|
||||||
|
teksti <- 'Input upper bound to the number of populations (possibly multiple values)'
|
||||||
|
while (!ready) {
|
||||||
|
npopstextExtra <- inputdlg(
|
||||||
|
teksti,
|
||||||
|
1,
|
||||||
|
'20'
|
||||||
|
)
|
||||||
|
if (isempty(npopstextExtra)) { # Painettu Cancel:ia
|
||||||
|
return()
|
||||||
|
}
|
||||||
|
npopstextExtra <- npopstextExtra[1]
|
||||||
|
if (length(npopstextExtra)>=255) {
|
||||||
|
npopstextExtra <- npopstextExtra[1:255]
|
||||||
|
npopstext <- c(npopstext, ' ', npopstextExtra)
|
||||||
|
teksti <- 'The input field length limit (255 characters) was reached. Input more values: '
|
||||||
|
} else {
|
||||||
|
npopstext <- c(npopstext, ' ', npopstextExtra)
|
||||||
|
ready <- TRUE
|
||||||
|
}
|
||||||
|
}
|
||||||
|
rm(ready, teksti)
|
||||||
|
if (isempty(npopstext) | length(npopstext) == 1) {
|
||||||
|
return()
|
||||||
|
} else {
|
||||||
|
npopsTaulu <- as.numeric(npopstext)
|
||||||
|
ykkoset <- find(npopsTaulu == 1)
|
||||||
|
npopsTaulu[ykkoset] <- list() # Mik<69>li ykk<6B>si<73> annettu yl<79>rajaksi, ne poistetaan.
|
||||||
|
if (isempty(npopsTaulu)) {
|
||||||
|
logml <- 1
|
||||||
|
partitionSummary <- 1
|
||||||
|
npops <- 1
|
||||||
|
return()
|
||||||
|
}
|
||||||
|
rm(ykkoset)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
npopsTaulu <- npops
|
||||||
|
}
|
||||||
|
|
||||||
|
nruns <- length(npopsTaulu)
|
||||||
|
|
||||||
|
initData <- data
|
||||||
|
data <- data[,1:(ncol(data) - 1)]
|
||||||
|
|
||||||
|
logmlBest <- -1e50
|
||||||
|
partitionSummary <- -1e50 * ones(30, 2) # Tiedot 30 parhaasta partitiosta (npops ja logml)
|
||||||
|
partitionSummary[,1] <- zeros(30, 1)
|
||||||
|
worstLogml <- -1e50
|
||||||
|
worstIndex <- 1
|
||||||
|
for (run in 1:nruns) {
|
||||||
|
npops <- npopsTaulu[run]
|
||||||
|
if (dispText) {
|
||||||
|
dispLine()
|
||||||
|
print(
|
||||||
|
paste0(
|
||||||
|
'Run ', as.character(run), '/', as.character(nruns),
|
||||||
|
', maximum number of populations ', as.character(npops), '.'
|
||||||
|
)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
ninds <- size(rows, 1)
|
||||||
|
|
||||||
|
initialPartition <- admixture_initialization(initData, npops, Z) # TODO: translate
|
||||||
|
sumcounts_counts_logml = initialCounts(
|
||||||
|
initialPartition, data, npops, rows, noalle, adjprior
|
||||||
|
) # TODO: translate
|
||||||
|
sumcounts <- sumcounts_counts_logml$sumcounts
|
||||||
|
counts <- sumcounts_counts_logml$counts
|
||||||
|
logml <- sumcounts_counts_logml$logml
|
||||||
|
|
||||||
|
PARTITION <- zeros(ninds, 1)
|
||||||
|
for (i in 1:ninds) {
|
||||||
|
apu <- rows[i]
|
||||||
|
PARTITION[i] <- initialPartition(apu[1])
|
||||||
|
}
|
||||||
|
|
||||||
|
COUNTS <- counts
|
||||||
|
SUMCOUNTS <- sumcounts
|
||||||
|
POP_LOGML <- computePopulationLogml(1:npops, adjprior, priorTerm) # TODO: translate
|
||||||
|
LOGDIFF <- repmat(-Inf, c(ninds, npops))
|
||||||
|
rm(initialPartition, counts, sumcounts)
|
||||||
|
|
||||||
|
# PARHAAN MIXTURE-PARTITION ETSIMINEN
|
||||||
|
nRoundTypes <- 7
|
||||||
|
kokeiltu <- zeros(nRoundTypes, 1)
|
||||||
|
roundTypes <- c(1, 1) # Ykk<6B>svaiheen sykli kahteen kertaan.
|
||||||
|
ready <- 0
|
||||||
|
vaihe <- 1
|
||||||
|
|
||||||
|
if (dispText) {
|
||||||
|
print(' ')
|
||||||
|
print(
|
||||||
|
paste0(
|
||||||
|
'Mixture analysis started with initial',
|
||||||
|
as.character(npops),
|
||||||
|
'populations.'
|
||||||
|
)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
while (ready != 1) {
|
||||||
|
muutoksia <- 0
|
||||||
|
|
||||||
|
if (dispText) {
|
||||||
|
print(paste('Performing steps:', as.character(roundTypes)))
|
||||||
|
}
|
||||||
|
|
||||||
|
for (n in 1:length(roundTypes)) {
|
||||||
|
|
||||||
|
round <- roundTypes[n]
|
||||||
|
kivaluku <- 0
|
||||||
|
|
||||||
|
if (kokeiltu(round) == 1) { #Askelta kokeiltu viime muutoksen j<>lkeen
|
||||||
|
|
||||||
|
} else if (round == 0 | round == 1) { #Yksil<69>n siirt<72>minen toiseen populaatioon.
|
||||||
|
inds <- 1:ninds
|
||||||
|
aputaulu <- c(t(inds), rand(ninds, 1))
|
||||||
|
aputaulu <- sortrows(aputaulu, 2)
|
||||||
|
inds <- t(aputaulu[, 1])
|
||||||
|
muutosNyt <- 0
|
||||||
|
|
||||||
|
for (ind in inds) {
|
||||||
|
i1 <- PARTITION[ind]
|
||||||
|
muutokset_diffInCounts = laskeMuutokset(
|
||||||
|
ind, rows, data, adjprior, priorTerm
|
||||||
|
)
|
||||||
|
muutokset <- muutokset_diffInCounts$muutokset
|
||||||
|
diffInCounts <- muutokset_diffInCounts$diffInCounts
|
||||||
|
|
||||||
|
if (round == 1) {
|
||||||
|
maxMuutos <- max_MATLAB(muutokset)[[1]]
|
||||||
|
i2 <- max_MATLAB(muutokset)[[2]]
|
||||||
|
}
|
||||||
|
|
||||||
|
if (i1 != i2 & maxMuutos > 1e-5) {
|
||||||
|
# Tapahtui muutos
|
||||||
|
muutoksia <- 1
|
||||||
|
if (muutosNyt == 0) {
|
||||||
|
muutosNyt <- 1
|
||||||
|
if (dispText) {
|
||||||
|
print('Action 1')
|
||||||
|
}
|
||||||
|
}
|
||||||
|
kokeiltu <- zeros(nRoundTypes, 1)
|
||||||
|
kivaluku <- kivaluku + 1
|
||||||
|
updateGlobalVariables(
|
||||||
|
ind, i2, diffInCounts, adjprior, priorTerm
|
||||||
|
)
|
||||||
|
logml <- logml+maxMuutos
|
||||||
|
if (logml > worstLogml) {
|
||||||
|
partitionSummary_added = addToSummary(
|
||||||
|
logml, partitionSummary, worstIndex
|
||||||
|
)
|
||||||
|
partitionSummary_added <- partitionSummary_added$partitionSummary
|
||||||
|
added <- partitionSummary_added$added
|
||||||
|
if (added == 1) {
|
||||||
|
worstLogml <- min_MATLAB(partitionSummary[, 2])[[1]]
|
||||||
|
worstIndex <- min_MATLAB(partitionSummary[, 2])[[2]]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (muutosNyt == 0) {
|
||||||
|
kokeiltu[round] <- 1
|
||||||
|
}
|
||||||
|
|
||||||
|
} else if (round == 2) { # Populaation yhdist<73>minen toiseen.
|
||||||
|
maxMuutos <- 0
|
||||||
|
for (pop in 1:npops) {
|
||||||
|
muutokset_diffInCounts <- laskeMuutokset2(
|
||||||
|
pop, rows, data, adjprior, priorTerm
|
||||||
|
)
|
||||||
|
muutokset <- muutokset_diffInCounts$muutokset
|
||||||
|
diffInCounts <- muutokset_diffInCounts$diffInCounts
|
||||||
|
isoin <- max_MATLAB(muutokset)[[1]]
|
||||||
|
indeksi <- max_MATLAB(muutokset)[[2]]
|
||||||
|
if (isoin > maxMuutos) {
|
||||||
|
maxMuutos <- isoin
|
||||||
|
i1 <- pop
|
||||||
|
i2 <- indeksi
|
||||||
|
diffInCountsBest <- diffInCounts
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (maxMuutos > 1e-5) {
|
||||||
|
muutoksia <- 1
|
||||||
|
kokeiltu <- zeros(nRoundTypes, 1)
|
||||||
|
updateGlobalVariables2(
|
||||||
|
i1, i2, diffInCountsBest, adjprior, priorTerm
|
||||||
|
)
|
||||||
|
logml <- logml + maxMuutos
|
||||||
|
if (dispText) {
|
||||||
|
print('Action 2')
|
||||||
|
}
|
||||||
|
if (logml > worstLogml) {
|
||||||
|
partitionSummary_added <- addToSummary(
|
||||||
|
logml, partitionSummary, worstIndex
|
||||||
|
)
|
||||||
|
partitionSummary <- partitionSummary_added$partitionSummary
|
||||||
|
added <- partitionSummary_added$added
|
||||||
|
if (added==1) {
|
||||||
|
worstLogml <- min_MATLAB(partitionSummary[, 2])[[1]]
|
||||||
|
worstIndex <- min_MATLAB(partitionSummary[, 2])[[2]]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
kokeiltu[round] <- 1
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
} else if (round == 3 || round == 4) { #Populaation jakaminen osiin.
|
||||||
|
maxMuutos <- 0
|
||||||
|
ninds <- size(rows, 1)
|
||||||
|
for (pop in 1:npops) {
|
||||||
|
inds2 <- find(PARTITION == pop)
|
||||||
|
ninds2 <- length(inds2)
|
||||||
|
if (ninds2 > 2) {
|
||||||
|
dist2 <- laskeOsaDist(inds2, dist, ninds)
|
||||||
|
Z2 <- linkage(t(dist2))
|
||||||
|
if (round == 3) {
|
||||||
|
npops2 <- max(min(20, floor(ninds2 / 5)), 2)
|
||||||
|
} else if (round == 4) {
|
||||||
|
npops2 <- 2 # Moneenko osaan jaetaan
|
||||||
|
}
|
||||||
|
T2 <- cluster_own(Z2, npops2)
|
||||||
|
muutokset <- laskeMuutokset3(
|
||||||
|
T2, inds2, rows, data, adjprior, priorTerm, pop
|
||||||
|
)
|
||||||
|
isoin <- max_MATLAB(muutokset)[[1]]
|
||||||
|
indeksi <- max_MATLAB(muutokset)[[2]]
|
||||||
|
if (isoin > maxMuutos) {
|
||||||
|
maxMuutos <- isoin
|
||||||
|
muuttuvaPop2 <- indeksi %% npops2
|
||||||
|
if (muuttuvaPop2==0) muuttuvaPop2 <- npops2
|
||||||
|
muuttuvat <- inds2[find(T2 == muuttuvaPop2)]
|
||||||
|
i2 <- ceiling(indeksi / npops2)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (maxMuutos > 1e-5) {
|
||||||
|
muutoksia <- 1
|
||||||
|
kokeiltu <- zeros(nRoundTypes, 1)
|
||||||
|
rivit <- list()
|
||||||
|
for (i in 1:length(muuttuvat)) {
|
||||||
|
ind <- muuttuvat[i]
|
||||||
|
lisa <- rows[ind, 1]:rows[ind, 2]
|
||||||
|
rivit <- rbind(rivit, t(lisa))
|
||||||
|
}
|
||||||
|
diffInCounts <- computeDiffInCounts(
|
||||||
|
t(rivit), size(COUNTS, 1), size(COUNTS, 2), data
|
||||||
|
)
|
||||||
|
i1 <- PARTITION(muuttuvat[1])
|
||||||
|
updateGlobalVariables3(
|
||||||
|
muuttuvat, diffInCounts, adjprior, priorTerm, i2
|
||||||
|
)
|
||||||
|
logml <- logml + maxMuutos
|
||||||
|
if (dispText) {
|
||||||
|
if (round == 3) {
|
||||||
|
print('Action 3')
|
||||||
|
} else {
|
||||||
|
print('Action 4')
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (logml > worstLogml) {
|
||||||
|
partitionSummary_added <- addToSummary(
|
||||||
|
logml, partitionSummary, worstIndex
|
||||||
|
)
|
||||||
|
partitionSummary <- partitionSummary_added$partitionSummary
|
||||||
|
added <- partitionSummary_added$added
|
||||||
|
if (added==1) {
|
||||||
|
worstLogml <- min_MATLAB(partitionSummary[, 2])[[1]]
|
||||||
|
worstIndex <- min_MATLAB(partitionSummary[, 2])[[2]]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} else {
|
||||||
|
kokeiltu[round] <- 1
|
||||||
|
}
|
||||||
|
} else if (round == 5 || round == 6) {
|
||||||
|
j <- 0
|
||||||
|
muutettu <- 0
|
||||||
|
poplogml <- POP_LOGML
|
||||||
|
partition <- PARTITION
|
||||||
|
counts <- COUNTS
|
||||||
|
sumcounts <- SUMCOUNTS
|
||||||
|
logdiff <- LOGDIFF
|
||||||
|
|
||||||
|
pops <- sample(npops)
|
||||||
|
while (j < npops & muutettu == 0) {
|
||||||
|
j <- j + 1
|
||||||
|
pop <- pops[j]
|
||||||
|
totalMuutos <- 0
|
||||||
|
inds <- find(PARTITION == pop)
|
||||||
|
if (round == 5) {
|
||||||
|
aputaulu <- c(inds, rand(length(inds), 1))
|
||||||
|
aputaulu <- sortrows(aputaulu, 2)
|
||||||
|
inds <- t(aputaulu[, 1])
|
||||||
|
} else if (round == 6) {
|
||||||
|
inds <- returnInOrder(
|
||||||
|
inds, pop, rows, data, adjprior, priorTerm
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
i <- 0
|
||||||
|
|
||||||
|
while (length(inds) > 0 & i < length(inds)) {
|
||||||
|
i <- i + 1
|
||||||
|
ind <- inds[i]
|
||||||
|
|
||||||
|
muutokset_diffInCounts <- laskeMuutokset(
|
||||||
|
ind, rows, data, adjprior, priorTerm
|
||||||
|
)
|
||||||
|
muutokset <- muutokset_diffInCounts$muutokset
|
||||||
|
diffInCounts <- muutokset_diffInCounts$diffInCounts
|
||||||
|
|
||||||
|
muutokset[pop] <- -1e50 # Varmasti ei suurin!!!
|
||||||
|
maxMuutos <- max_MATLAB(muutokset)[[1]]
|
||||||
|
i2 <- max_MATLAB(muutokset)[[2]]
|
||||||
|
updateGlobalVariables(
|
||||||
|
ind, i2, diffInCounts, adjprior, priorTerm
|
||||||
|
)
|
||||||
|
|
||||||
|
totalMuutos <- totalMuutos+maxMuutos
|
||||||
|
logml <- logml + maxMuutos
|
||||||
|
if (round == 6) {
|
||||||
|
# Lopetetaan heti kun muutos on positiivinen.
|
||||||
|
if (totalMuutos > 1e-5) {
|
||||||
|
i <- length(inds)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (totalMuutos > 1e-5) {
|
||||||
|
kokeiltu <- zeros(nRoundTypes, 1)
|
||||||
|
muutettu <- 1
|
||||||
|
if (muutoksia == 0) {
|
||||||
|
muutoksia <- 1 # Ulompi kirjanpito.
|
||||||
|
if (dispText) {
|
||||||
|
if (round == 5) {
|
||||||
|
print('Action 5')
|
||||||
|
} else {
|
||||||
|
print('Action 6')
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (logml > worstLogml) {
|
||||||
|
partitionSummary_added <- addToSummary(
|
||||||
|
logml, partitionSummary, worstIndex
|
||||||
|
)
|
||||||
|
partitionSummary <- partitionSummary_added$partitionSummary
|
||||||
|
added <- partitionSummary_added$added
|
||||||
|
if (added==1) {
|
||||||
|
worstLogml <- min_MATLAB(partitionSummary[, 2])[[1]]
|
||||||
|
worstIndex <- min_MATLAB(partitionSummary[, 2])[[2]]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
# Miss<73><73>n vaiheessa tila ei parantunut.
|
||||||
|
# Perutaan kaikki muutokset.
|
||||||
|
PARTITION <- partition
|
||||||
|
SUMCOUNTS <- sumcounts
|
||||||
|
POP_LOGML <- poplogml
|
||||||
|
COUNTS <- counts
|
||||||
|
logml <- logml - totalMuutos
|
||||||
|
LOGDIFF <- logdiff
|
||||||
|
kokeiltu[round] <- 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
rm(partition, sumcounts, counts, poplogml)
|
||||||
|
|
||||||
|
} else if (round == 7) {
|
||||||
|
emptyPop <- findEmptyPop(npops)
|
||||||
|
j <- 0
|
||||||
|
pops <- sample(npops)
|
||||||
|
muutoksiaNyt <- 0
|
||||||
|
if (emptyPop == -1) {
|
||||||
|
j <- npops
|
||||||
|
}
|
||||||
|
while (j < npops) {
|
||||||
|
j <- j + 1
|
||||||
|
pop <- pops[j]
|
||||||
|
inds2 <- find(PARTITION == pop)
|
||||||
|
ninds2 <- length(inds2)
|
||||||
|
if (ninds2 > 5) {
|
||||||
|
partition <- PARTITION
|
||||||
|
sumcounts <- SUMCOUNTS
|
||||||
|
counts <- COUNTS
|
||||||
|
poplogml <- POP_LOGML
|
||||||
|
logdiff <- LOGDIFF
|
||||||
|
|
||||||
|
dist2 <- laskeOsaDist(inds2, dist, ninds);
|
||||||
|
Z2 <- linkage(t(dist2))
|
||||||
|
T2 <- cluster_own(Z2, 2)
|
||||||
|
muuttuvat <- inds2[find(T2 == 1)]
|
||||||
|
|
||||||
|
muutokset <- laskeMuutokset3(
|
||||||
|
T2, inds2, rows, data, adjprior, priorTerm, pop
|
||||||
|
)
|
||||||
|
totalMuutos <- muutokset(1, emptyPop)
|
||||||
|
|
||||||
|
rivit <- list()
|
||||||
|
for (i in 1:length(muuttuvat)) {
|
||||||
|
ind <- muuttuvat[i]
|
||||||
|
lisa <- rows[ind, 1]:rows[ind, 2]
|
||||||
|
rivit <- c(rivit, lisa)
|
||||||
|
}
|
||||||
|
diffInCounts <- computeDiffInCounts(
|
||||||
|
rivit, size(COUNTS, 1), size(COUNTS, 2), data
|
||||||
|
)
|
||||||
|
|
||||||
|
updateGlobalVariables3(
|
||||||
|
muuttuvat, diffInCounts, adjprior, priorTerm,
|
||||||
|
emptyPop
|
||||||
|
)
|
||||||
|
|
||||||
|
muutettu <- 1
|
||||||
|
while (muutettu == 1) {
|
||||||
|
muutettu <- 0
|
||||||
|
# Siirret<65><74>n yksil<69>it<69> populaatioiden v<>lill<6C>
|
||||||
|
muutokset <- laskeMuutokset5(
|
||||||
|
inds2, rows, data, adjprior, priorTerm,
|
||||||
|
pop, emptyPop
|
||||||
|
)
|
||||||
|
|
||||||
|
maxMuutos <- indeksi <- max_MATLAB(muutokset)
|
||||||
|
|
||||||
|
muuttuva <- inds2(indeksi)
|
||||||
|
if (PARTITION(muuttuva) == pop) {
|
||||||
|
i2 <- emptyPop
|
||||||
|
} else {
|
||||||
|
i2 <- pop
|
||||||
|
}
|
||||||
|
|
||||||
|
if (maxMuutos > 1e-5) {
|
||||||
|
rivit <- rows[muuttuva, 1]:rows[muuttuva, 2]
|
||||||
|
diffInCounts <- computeDiffInCounts(
|
||||||
|
rivit, size(COUNTS, 1), size(COUNTS, 2),
|
||||||
|
data
|
||||||
|
)
|
||||||
|
updateGlobalVariables3(
|
||||||
|
muuttuva,diffInCounts, adjprior,
|
||||||
|
priorTerm, i2
|
||||||
|
)
|
||||||
|
muutettu <- 1
|
||||||
|
totalMuutos <- totalMuutos + maxMuutos
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (totalMuutos > 1e-5) {
|
||||||
|
muutoksia <- 1
|
||||||
|
logml <- logml + totalMuutos
|
||||||
|
if (logml > worstLogml) {
|
||||||
|
partitionSummary_added = addToSummary(
|
||||||
|
logml, partitionSummary, worstIndex
|
||||||
|
)
|
||||||
|
partitionSummary_added <- partitionSummary_added$partitionSummary
|
||||||
|
added <- partitionSummary_added$added
|
||||||
|
if (added == 1) {
|
||||||
|
worstLogml <- min_MATLAB(partitionSummary[, 2])[[1]]
|
||||||
|
worstIndex <- min_MATLAB(partitionSummary[, 2])[[2]]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (muutoksiaNyt == 0) {
|
||||||
|
if (dispText) {
|
||||||
|
print('Action 7')
|
||||||
|
}
|
||||||
|
muutoksiaNyt <- 1
|
||||||
|
}
|
||||||
|
kokeiltu <- zeros(nRoundTypes, 1)
|
||||||
|
j <- npops
|
||||||
|
} else {
|
||||||
|
# palutetaan vanhat arvot
|
||||||
|
PARTITION <- partition
|
||||||
|
SUMCOUNTS <- sumcounts
|
||||||
|
COUNTS <- counts
|
||||||
|
POP_LOGML <- poplogml
|
||||||
|
LOGDIFF <- logdiff
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
if (muutoksiaNyt == 0) {
|
||||||
|
kokeiltu[round] <- 1
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (muutoksia == 0) {
|
||||||
|
if (vaihe <= 4) {
|
||||||
|
vaihe <= vaihe + 1
|
||||||
|
} else if (vaihe == 5) {
|
||||||
|
ready <- 1
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
muutoksia <- 0
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ready == 0) {
|
||||||
|
if (vaihe == 1) {
|
||||||
|
roundTypes <- c(1)
|
||||||
|
} else if (vaihe == 2) {
|
||||||
|
roundTypes <- c(2, 1)
|
||||||
|
} else if (vaihe == 3) {
|
||||||
|
roundTypes <- c(5, 5, 7)
|
||||||
|
} else if (vaihe == 4) {
|
||||||
|
roundTypes = c(4, 3, 1)
|
||||||
|
} else if (vaihe == 5) {
|
||||||
|
roundTypes <- c(6, 7, 2, 3, 4, 1)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# TALLENNETAAN
|
||||||
|
|
||||||
|
npops <- poistaTyhjatPopulaatiot(npops)
|
||||||
|
POP_LOGML <- computePopulationLogml(1:npops, adjprior, priorTerm)
|
||||||
|
if (dispText) {
|
||||||
|
print(paste('Found partition with', as.character(npops), 'populations.'))
|
||||||
|
print(paste('Log(ml) =', as.character(logml)))
|
||||||
|
print(' ')
|
||||||
|
}
|
||||||
|
|
||||||
|
if (logml > logmlBest) {
|
||||||
|
# P<>ivitet<65><74>n parasta l<>ydetty<74> partitiota.
|
||||||
|
logmlBest <- logml
|
||||||
|
npopsBest <- npops
|
||||||
|
partitionBest <- PARTITION
|
||||||
|
countsBest <- COUNTS
|
||||||
|
sumCountsBest <- SUMCOUNTS
|
||||||
|
pop_logmlBest <- POP_LOGML
|
||||||
|
logdiffbest <- LOGDIFF
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return(
|
||||||
|
list(logml = logml, npops = npops, partitionSummary = partitionSummary)
|
||||||
|
)
|
||||||
|
}
|
||||||
20
R/initialPopCounts.R
Normal file
20
R/initialPopCounts.R
Normal file
|
|
@ -0,0 +1,20 @@
|
||||||
|
initialPopCounts <- function(data, npops, rows, noalle, adjprior) {
|
||||||
|
nloci <- size(data, 2)
|
||||||
|
counts <- zeros(max(noalle), nloci, npops)
|
||||||
|
sumcounts <- zeros(npops, nloci)
|
||||||
|
|
||||||
|
for (i in 1:npops) {
|
||||||
|
for (j in 1:nloci) {
|
||||||
|
i_rivit <- rows(i, 1):rows(i, 2)
|
||||||
|
havainnotLokuksessa <- find(data[i_rivit, j] >= 0)
|
||||||
|
sumcounts[i, j] <- length(havainnotLokuksessa)
|
||||||
|
for (k in 1:noalle[j]) {
|
||||||
|
alleleCode <- k
|
||||||
|
N_ijk <- length(find(data[i_rivit, j] == alleleCode))
|
||||||
|
counts[k, j, i] <- N_ijk
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
logml <- laskeLoggis(counts, sumcounts, adjprior)
|
||||||
|
return(sumcounts = sumcounts, counts = counts, logml = logml)
|
||||||
|
}
|
||||||
9
R/initializeGammaln.R
Normal file
9
R/initializeGammaln.R
Normal file
|
|
@ -0,0 +1,9 @@
|
||||||
|
initializeGammaln <- function(ninds, rowsFromInd, maxAlleles) {
|
||||||
|
#Alustaa GAMMALN muuttujan s.e. GAMMALN(i, j)=gammaln((i - 1) + 1/j)
|
||||||
|
GAMMA_LN <- zeros((1 + ninds) * rowsFromInd, maxAlleles)
|
||||||
|
for (i in 1:(ninds + 1) * rowsFromInd) {
|
||||||
|
for (j in 1:maxAlleles) {
|
||||||
|
GAMMA_LN[i, j] <- log_gamma((i - 1) + 1/j)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
laskeLoggis <- function(counts, sumcounts, adjprior) {
|
laskeLoggis <- function(counts, sumcounts, adjprior) {
|
||||||
npops <- size(counts, 3)
|
npops <- size(counts, 3)
|
||||||
|
|
||||||
sum1 <- sum(sum(sum(gammaln(counts + repmat(adjprior, c(1, 1, npops))))))
|
sum1 <- sum(sum(sum(lgamma(counts + repmat(adjprior, c(1, 1, npops))))))
|
||||||
sum3 <- sum(sum(lgamma(adjprior))) - sum(sum(lgamma(1 + sumcounts)))
|
sum3 <- sum(sum(lgamma(adjprior))) - sum(sum(lgamma(1 + sumcounts)))
|
||||||
logml2 <- sum1 - npops * sum3
|
logml2 <- sum1 - npops * sum3
|
||||||
loggis <- logml2
|
loggis <- logml2
|
||||||
|
|
|
||||||
226
R/laskeMuutokset12345.R
Normal file
226
R/laskeMuutokset12345.R
Normal file
|
|
@ -0,0 +1,226 @@
|
||||||
|
#' @title Calculate changes (?)
|
||||||
|
#' @description Palauttaa npops*npops taulun, jonka alkio (i,j) kertoo, mik?on
|
||||||
|
#' muutos logml:ss? mikäli populaatiosta i siirretään osuuden verran
|
||||||
|
#' todennäköisyysmassaa populaatioon j. Mikäli populaatiossa i ei ole mitään
|
||||||
|
#' siirrettävää, on vastaavassa kohdassa rivi nollia.
|
||||||
|
#' @param osuus Percentages?
|
||||||
|
#' @param omaFreqs own Freqs?
|
||||||
|
#' @param osuusTaulu Percentage table?
|
||||||
|
#' @param logml log maximum likelihood
|
||||||
|
#' @param COUNTS COUNTS
|
||||||
|
#' @export
|
||||||
|
laskeMuutokset4 <- function (
|
||||||
|
osuus, osuusTaulu, omaFreqs, logml, COUNTS = matrix(0)
|
||||||
|
) {
|
||||||
|
npops <- ifelse(is.na(dim(COUNTS)[3]), 1, dim(COUNTS)[3])
|
||||||
|
notEmpty <- which(osuusTaulu > 0.005)
|
||||||
|
muutokset <- zeros(npops)
|
||||||
|
empties <- !notEmpty
|
||||||
|
|
||||||
|
for (i1 in notEmpty) {
|
||||||
|
osuusTaulu[i1] <- osuusTaulu[i1] - osuus
|
||||||
|
for (i2 in c(colon(1, i1 - 1), colon(i1 + 1, npops))) {
|
||||||
|
osuusTaulu[i2] <- osuusTaulu[i2] + osuus
|
||||||
|
loggis <- computeIndLogml(omaFreqs, osuusTaulu)
|
||||||
|
|
||||||
|
# Work around Matlab OOB bug
|
||||||
|
if (i1 > nrow(muutokset)) {
|
||||||
|
muutokset <- rbind(muutokset, muutokset * 0)
|
||||||
|
}
|
||||||
|
if (i2 > ncol(muutokset)) {
|
||||||
|
muutokset <- cbind(muutokset, muutokset * 0)
|
||||||
|
}
|
||||||
|
|
||||||
|
muutokset[i1, i2] <- loggis - logml
|
||||||
|
osuusTaulu[i2] <- osuusTaulu[i2] - osuus
|
||||||
|
}
|
||||||
|
osuusTaulu[i1] <- osuusTaulu[i1] + osuus
|
||||||
|
}
|
||||||
|
return (muutokset)
|
||||||
|
}
|
||||||
|
|
||||||
|
# Palauttaa npops*1 taulun, jossa i:s alkio kertoo, mik<69> olisi
|
||||||
|
# muutos logml:ss<73>, mik<69>li yksil<69> ind siirret<65><74>n koriin i.
|
||||||
|
# diffInCounts on poistettava COUNTS:in siivusta i1 ja lis<69>tt<74>v<EFBFBD>
|
||||||
|
# COUNTS:in siivuun i2, mik<69>li muutos toteutetaan.
|
||||||
|
#
|
||||||
|
# Lis<69>ys 25.9.2007:
|
||||||
|
# Otettu k<>ytt<74><74>n globaali muuttuja LOGDIFF, johon on tallennettu muutokset
|
||||||
|
# logml:ss<73> siirrett<74>ess<73> yksil<69>it<69> toisiin populaatioihin.
|
||||||
|
laskeMuutokset <- function(ind, globalRows, data, adjprior, priorTerm) {
|
||||||
|
npops <- size(COUNTS, 3)
|
||||||
|
muutokset <- LOGDIFF[ind, ]
|
||||||
|
|
||||||
|
i1 <- PARTITION[ind]
|
||||||
|
i1_logml <- POP_LOGML[i1]
|
||||||
|
muutokset[i1] <- 0
|
||||||
|
|
||||||
|
rows <- globalRows[ind, 1]:globalRows[ind, 2]
|
||||||
|
diffInCounts <- computeDiffInCounts(
|
||||||
|
rows, size(COUNTS, 1), size(COUNTS, 2), data
|
||||||
|
)
|
||||||
|
diffInSumCounts <- sum(diffInCounts)
|
||||||
|
|
||||||
|
COUNTS[, , i1] <- COUNTS[, , i1] - diffInCounts
|
||||||
|
SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] - diffInSumCounts
|
||||||
|
new_i1_logml <- computePopulationLogml(i1, adjprior, priorTerm)
|
||||||
|
COUNTS[, , i1] <- COUNTS[, , i1] + diffInCounts
|
||||||
|
SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] + diffInSumCounts
|
||||||
|
|
||||||
|
i2 <- find(muutokset == -Inf) # Etsit<69><74>n populaatiot jotka muuttuneet viime kerran j<>lkeen.
|
||||||
|
i2 <- setdiff(i2, i1)
|
||||||
|
i2_logml <- POP_LOGML[i2]
|
||||||
|
|
||||||
|
ni2 <- length(i2)
|
||||||
|
|
||||||
|
COUNTS[, , i2] <- COUNTS[, , i2] + repmat(diffInCounts, c(1, 1, ni2))
|
||||||
|
SUMCOUNTS[i2, ] <- SUMCOUNTS[i2, ] + repmat(diffInSumCounts, c(ni2, 1))
|
||||||
|
new_i2_logml <- computePopulationLogml(i2, adjprior, priorTerm)
|
||||||
|
COUNTS[, , i2] <- COUNTS[, , i2] - repmat(diffInCounts, c(1, 1, ni2))
|
||||||
|
SUMCOUNTS[i2, ] <- SUMCOUNTS[i2, ] - repmat(diffInSumCounts, c(ni2, 1))
|
||||||
|
|
||||||
|
muutokset[i2] <- new_i1_logml - i1_logml + new_i2_logml - i2_logml
|
||||||
|
LOGDIFF[ind, ] = muutokset
|
||||||
|
return(list(muutokset = muutokset, diffInCounts = diffInCounts))
|
||||||
|
}
|
||||||
|
|
||||||
|
laskeMuutokset2 <- function(i1, globalRows, data, adjprior, priorTerm) {
|
||||||
|
# % Palauttaa npops*1 taulun, jossa i:s alkio kertoo, mik<69> olisi
|
||||||
|
# % muutos logml:ss<73>, mik<69>li korin i1 kaikki yksil<69>t siirret<65><74>n
|
||||||
|
# % koriin i.
|
||||||
|
|
||||||
|
npops <- size(COUNTS, 3)
|
||||||
|
muutokset <- zeros(npops, 1)
|
||||||
|
|
||||||
|
i1_logml <- POP_LOGML[i1]
|
||||||
|
|
||||||
|
inds <- find(PARTITION == i1)
|
||||||
|
ninds <- length(inds)
|
||||||
|
|
||||||
|
if (ninds == 0) {
|
||||||
|
diffInCounts <- zeros(size(COUNTS, 1), size(COUNTS, 2))
|
||||||
|
return()
|
||||||
|
}
|
||||||
|
|
||||||
|
rows = list()
|
||||||
|
for (i in 1:ninds) {
|
||||||
|
ind <- inds(i)
|
||||||
|
lisa <- globalRows(ind, 1):globalRows(ind, 2)
|
||||||
|
rows <- c(rows, t(lisa))
|
||||||
|
}
|
||||||
|
|
||||||
|
diffInCounts <- computeDiffInCounts(
|
||||||
|
t(rows), size(COUNTS, 1), size(COUNTS, 2), data
|
||||||
|
)
|
||||||
|
diffInSumCounts <- sum(diffInCounts)
|
||||||
|
|
||||||
|
COUNTS[, , i1] <- COUNTS[, , i1] - diffInCounts
|
||||||
|
SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] - diffInSumCounts
|
||||||
|
new_i1_logml <- computePopulationLogml(i1, adjprior, priorTerm)
|
||||||
|
COUNTS[, , i1] <- COUNTS[, , i1] + diffInCounts
|
||||||
|
SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] + diffInSumCounts
|
||||||
|
|
||||||
|
i2 <- c(1:i1-1, i1+1:npops)
|
||||||
|
i2_logml <- POP_LOGML[i2]
|
||||||
|
|
||||||
|
COUNTS[, , i2] <- COUNTS[, , i2] + repmat(diffInCounts, c(1, 1, npops - 1))
|
||||||
|
SUMCOUNTS[i2, ] <- SUMCOUNTS[i2, ] + repmat(diffInSumCounts, c(npops - 1, 1))
|
||||||
|
new_i2_logml <- computePopulationLogml(i2, adjprior, priorTerm)
|
||||||
|
COUNTS[, , i2] <- COUNTS[, , i2] - repmat(diffInCounts, c(1, 1, npops - 1))
|
||||||
|
SUMCOUNTS[i2, ] <- SUMCOUNTS[i2, ] - repmat(diffInSumCounts, c(npops - 1, 1))
|
||||||
|
|
||||||
|
muutokset[i2] <- new_i1_logml - i1_logml + new_i2_logml - i2_logml
|
||||||
|
return(list(muutokset = muutokset, diffInCounts = diffInCounts))
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
laskeMuutokset3 <- function(T2, inds2, globalRows, data, adjprior, priorTerm, i1) {
|
||||||
|
# Palauttaa length(unique(T2))*npops taulun, jossa (i,j):s alkio
|
||||||
|
# kertoo, mik<69> olisi muutos logml:ss<73>, jos populaation i1 osapopulaatio
|
||||||
|
# inds2(find(T2==i)) siirret<65><74>n koriin j.
|
||||||
|
|
||||||
|
npops <- size(COUNTS, 3)
|
||||||
|
npops2 <- length(unique(T2))
|
||||||
|
muutokset <- zeros(npops2, npops)
|
||||||
|
|
||||||
|
i1_logml = POP_LOGML[i1]
|
||||||
|
for (pop2 in 1:npops2) {
|
||||||
|
inds <- inds2[find(T2==pop2)]
|
||||||
|
ninds <- length(inds);
|
||||||
|
if (ninds > 0) {
|
||||||
|
rows <- list()
|
||||||
|
for (i in 1:ninds) {
|
||||||
|
ind <- inds[i]
|
||||||
|
lisa <- globalRows[ind, 1]:globalRows[ind, 2]
|
||||||
|
rows <- c(rows, t(lisa))
|
||||||
|
}
|
||||||
|
diffInCounts <- computeDiffInCounts(
|
||||||
|
t(rows), size(COUNTS, 1), size(COUNTS, 2), data
|
||||||
|
)
|
||||||
|
diffInSumCounts <- sum(diffInCounts)
|
||||||
|
|
||||||
|
COUNTS[, , i1] <- COUNTS[, , i1] - diffInCounts
|
||||||
|
SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] - diffInSumCounts
|
||||||
|
new_i1_logml <- computePopulationLogml(i1, adjprior, priorTerm)
|
||||||
|
COUNTS[, , i1] <- COUNTS[, , i1] + diffInCounts
|
||||||
|
SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] + diffInSumCounts
|
||||||
|
|
||||||
|
i2 <- c(1:i1-1, i1+1:npops)
|
||||||
|
i2_logml <- t(POP_LOGML[i2])
|
||||||
|
|
||||||
|
COUNTS[, , i2] <- COUNTS[, , i2] + repmat(diffInCounts, c(1, 1, npops - 1))
|
||||||
|
SUMCOUNTS[i2, ] <- SUMCOUNTS[i2, ] + repmat(diffInSumCounts, c(npops - 1, 1))
|
||||||
|
new_i2_logml <- t(computePopulationLogml(i2, adjprior, priorTerm))
|
||||||
|
COUNTS[, , i2] <- COUNTS[, , i2] - repmat(diffInCounts, c(1, 1, npops - 1))
|
||||||
|
SUMCOUNTS[i2, ] <- SUMCOUNTS[i2, ] - repmat(diffInSumCounts, c(npops - 1, 1))
|
||||||
|
|
||||||
|
muutokset[pop2, i2] <- new_i1_logml - i1_logml + new_i2_logml - i2_logml
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return(muutokset)
|
||||||
|
}
|
||||||
|
|
||||||
|
laskeMuutokset5 <- function(inds, globalRows, data, adjprior, priorTerm, i1, i2) {
|
||||||
|
# Palauttaa length(inds)*1 taulun, jossa i:s alkio kertoo, mik<69> olisi
|
||||||
|
# muutos logml:ss<73>, mik<69>li yksil<69> i vaihtaisi koria i1:n ja i2:n v<>lill<6C>.
|
||||||
|
|
||||||
|
ninds <- length(inds)
|
||||||
|
muutokset <- zeros(ninds, 1)
|
||||||
|
|
||||||
|
i1_logml <- POP_LOGML[i1]
|
||||||
|
i2_logml <- POP_LOGML[i2]
|
||||||
|
|
||||||
|
for (i in 1:ninds) {
|
||||||
|
ind <- inds[i]
|
||||||
|
if (PARTITION[ind] == i1) {
|
||||||
|
pop1 <- i1 #mist<73>
|
||||||
|
pop2 <- i2 #mihin
|
||||||
|
} else {
|
||||||
|
pop1 <- i2
|
||||||
|
pop2 <- i1
|
||||||
|
}
|
||||||
|
rows <- globalRows[ind, 1]:globalRows[ind, 2]
|
||||||
|
diffInCounts <- computeDiffInCounts(
|
||||||
|
rows, size(COUNTS, 1), size(COUNTS, 2), data
|
||||||
|
)
|
||||||
|
diffInSumCounts <- sum(diffInCounts)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
COUNTS[, , pop1] <- COUNTS[, , pop1] - diffInCounts
|
||||||
|
SUMCOUNTS[pop1, ] <- SUMCOUNTS[pop1, ] - diffInSumCounts
|
||||||
|
COUNTS[, , pop2] <- COUNTS[, , pop2] + diffInCounts
|
||||||
|
SUMCOUNTS[pop2, ] <- SUMCOUNTS[pop2, ] + diffInSumCounts
|
||||||
|
|
||||||
|
new_logmls <- computePopulationLogml(c(i1, i2), adjprior, priorTerm)
|
||||||
|
muutokset[i] <- sum(new_logmls)
|
||||||
|
|
||||||
|
COUNTS[, , pop1] <- COUNTS[, , pop1] + diffInCounts
|
||||||
|
SUMCOUNTS[pop1, ] <- SUMCOUNTS[pop1, ] + diffInSumCounts
|
||||||
|
COUNTS[, , pop2] <- COUNTS[, , pop2] - diffInCounts
|
||||||
|
SUMCOUNTS[pop2, ] <- SUMCOUNTS[pop2, ] - diffInSumCounts
|
||||||
|
}
|
||||||
|
|
||||||
|
muutokset <- muutokset - i1_logml - i2_logml
|
||||||
|
return(muutokset)
|
||||||
|
}
|
||||||
|
|
@ -1,39 +0,0 @@
|
||||||
#' @title Calculate changes?
|
|
||||||
#' @description Palauttaa npops*npops taulun, jonka alkio (i,j) kertoo, mik?on
|
|
||||||
#' muutos logml:ss? mikäli populaatiosta i siirretään osuuden verran
|
|
||||||
#' todennäköisyysmassaa populaatioon j. Mikäli populaatiossa i ei ole mitään
|
|
||||||
#' siirrettävää, on vastaavassa kohdassa rivi nollia.
|
|
||||||
#' @param osuus Percentages?
|
|
||||||
#' @param omaFreqs own Freqs?
|
|
||||||
#' @param osuusTaulu Percentage table?
|
|
||||||
#' @param logml log maximum likelihood
|
|
||||||
#' @param COUNTS COUNTS
|
|
||||||
#' @export
|
|
||||||
laskeMuutokset4 <- function (osuus, osuusTaulu, omaFreqs, logml,
|
|
||||||
COUNTS = matrix(0)) {
|
|
||||||
npops <- ifelse(is.na(dim(COUNTS)[3]), 1, dim(COUNTS)[3])
|
|
||||||
notEmpty <- which(osuusTaulu > 0.005)
|
|
||||||
muutokset <- zeros(npops)
|
|
||||||
empties <- !notEmpty
|
|
||||||
|
|
||||||
for (i1 in notEmpty) {
|
|
||||||
osuusTaulu[i1] <- osuusTaulu[i1] - osuus
|
|
||||||
for (i2 in c(colon(1, i1 - 1), colon(i1 + 1, npops))) {
|
|
||||||
osuusTaulu[i2] <- osuusTaulu[i2] + osuus
|
|
||||||
loggis <- computeIndLogml(omaFreqs, osuusTaulu)
|
|
||||||
|
|
||||||
# Work around Matlab OOB bug
|
|
||||||
if (i1 > nrow(muutokset)) {
|
|
||||||
muutokset <- rbind(muutokset, muutokset * 0)
|
|
||||||
}
|
|
||||||
if (i2 > ncol(muutokset)) {
|
|
||||||
muutokset <- cbind(muutokset, muutokset * 0)
|
|
||||||
}
|
|
||||||
|
|
||||||
muutokset[i1, i2] <- loggis - logml
|
|
||||||
osuusTaulu[i2] <- osuusTaulu[i2] - osuus
|
|
||||||
}
|
|
||||||
osuusTaulu[i1] <- osuusTaulu[i1] + osuus
|
|
||||||
}
|
|
||||||
return (muutokset)
|
|
||||||
}
|
|
||||||
25
R/laskeOsaDist.R
Normal file
25
R/laskeOsaDist.R
Normal file
|
|
@ -0,0 +1,25 @@
|
||||||
|
#' @title Lower part of the dist
|
||||||
|
#' @description Constructs from the dist vector a subvector containing the individual inds2, Forms dist sub-vectors the vector, which includes yksiliden inds2
|
||||||
|
#' @param inds2 inds2
|
||||||
|
#' @param dist dist
|
||||||
|
#' @param ninds ninds
|
||||||
|
#' @author Waldir Leoncio
|
||||||
|
laskeOsaDist <- function(inds2, dist, ninds) {
|
||||||
|
# % Muodostaa dist vektorista osavektorin, joka sis<69>lt<6C><74> yksil<69>iden inds2
|
||||||
|
# % v<>liset et<65>isyydet. ninds=kaikkien yksil<69>iden lukum<75><6D>r<EFBFBD>.
|
||||||
|
|
||||||
|
ninds2 <- length(inds2)
|
||||||
|
apu <- zeros(choose(ninds2, 2), 2)
|
||||||
|
rivi <- 1
|
||||||
|
for (i in 1:ninds2-1) {
|
||||||
|
for (j in i+1:ninds2) {
|
||||||
|
apu[rivi, 1] <- inds2[i]
|
||||||
|
apu[rivi, 2] <- inds2[j]
|
||||||
|
rivi <- rivi + 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
apu <- (apu[, 1]-1) * ninds - apu[, 1] / 2 *
|
||||||
|
(apu[, 1]-1) + (apu[, 2] - apu[, 1])
|
||||||
|
dist2 <- dist(apu)
|
||||||
|
return(dist2)
|
||||||
|
}
|
||||||
50
R/linkage.R
50
R/linkage.R
|
|
@ -4,10 +4,14 @@
|
||||||
#' linkage algorithm. The input Y is a distance matrix such as is generated by
|
#' linkage algorithm. The input Y is a distance matrix such as is generated by
|
||||||
#' PDIST. Y may also be a more general dissimilarity matrix conforming to the
|
#' PDIST. Y may also be a more general dissimilarity matrix conforming to the
|
||||||
#' output format of PDIST.
|
#' output format of PDIST.
|
||||||
#' @param Y data
|
#'
|
||||||
|
#' Z = linkage(X) returns a matrix Z that encodes a tree containing hierarchical clusters of the rows of the input data matrix X.
|
||||||
|
#' @param Y matrix
|
||||||
#' @param method either 'si', 'av', 'co' 'ce' or 'wa'
|
#' @param method either 'si', 'av', 'co' 'ce' or 'wa'
|
||||||
|
#' @note This is also a base Matlab function. The reason why the source code is also present here is unclear.
|
||||||
#' @export
|
#' @export
|
||||||
linkage <- function(Y, method = 'co') {
|
linkage <- function(Y, method = 'co') {
|
||||||
|
#TODO: compare R output with MATLAB output
|
||||||
k <- size(Y)[1]
|
k <- size(Y)[1]
|
||||||
n <- size(Y)[2]
|
n <- size(Y)[2]
|
||||||
m <- (1 + sqrt(1 + 8 * n)) / 2
|
m <- (1 + sqrt(1 + 8 * n)) / 2
|
||||||
|
|
@ -24,18 +28,33 @@ linkage <- function(Y, method = 'co') {
|
||||||
N[1:m] <- 1
|
N[1:m] <- 1
|
||||||
n <- m; # since m is changing, we need to save m in n.
|
n <- m; # since m is changing, we need to save m in n.
|
||||||
R <- 1:n
|
R <- 1:n
|
||||||
for (s in 1:(n-1)) {
|
for (s in 1:(n - 1)) {
|
||||||
X <- Y
|
X <- as.matrix(as.vector(Y), ncol=1)
|
||||||
v <- min(X)[1]
|
|
||||||
k <- min(X)[2]
|
v <- min_MATLAB(X)$mins
|
||||||
|
k <- min_MATLAB(X)$idx
|
||||||
|
|
||||||
i <- floor(m + 1 / 2 - sqrt(m ^ 2 - m + 1 / 4 - 2 * (k - 1)))
|
i <- floor(m + 1 / 2 - sqrt(m ^ 2 - m + 1 / 4 - 2 * (k - 1)))
|
||||||
j <- k - (i - 1) * (m - i / 2) + i
|
j <- k - (i - 1) * (m - i / 2) + i
|
||||||
Z[s, ] <- c(R[i], R[j], v) # update one more row to the output matrix A
|
|
||||||
# Temp variables
|
|
||||||
I1 <- 1:(i - 1)
|
|
||||||
I2 <- (i + 1):(j - 1)
|
|
||||||
I3 <- (j + 1):m
|
|
||||||
|
|
||||||
|
Z[s, ] <- c(R[i], R[j], v) # update one more row to the output matrix A
|
||||||
|
|
||||||
|
# Temp variables
|
||||||
|
if (i > 1) {
|
||||||
|
I1 <- 1:(i - 1)
|
||||||
|
} else {
|
||||||
|
I1 <- NULL
|
||||||
|
}
|
||||||
|
if (i + 1 <= j - 1) {
|
||||||
|
I2 <- (i + 1):(j - 1)
|
||||||
|
} else {
|
||||||
|
I2 <- NULL
|
||||||
|
}
|
||||||
|
if (j + 1 <= m) {
|
||||||
|
I3 <- (j + 1):m
|
||||||
|
} else {
|
||||||
|
I3 <- NULL
|
||||||
|
}
|
||||||
U <- c(I1, I2, I3)
|
U <- c(I1, I2, I3)
|
||||||
I <- c(
|
I <- c(
|
||||||
I1 * (m - (I1 + 1) / 2) - m + i,
|
I1 * (m - (I1 + 1) / 2) - m + i,
|
||||||
|
|
@ -47,11 +66,13 @@ linkage <- function(Y, method = 'co') {
|
||||||
I2 * (m - (I2 + 1) / 2) - m + j,
|
I2 * (m - (I2 + 1) / 2) - m + j,
|
||||||
j * (m - (j + 1) / 2) - m + I3
|
j * (m - (j + 1) / 2) - m + I3
|
||||||
)
|
)
|
||||||
|
# Workaround in R for negative values in I and J
|
||||||
|
# I <- I[I > 0 & I <= length(Y)]
|
||||||
|
# J <- J[J > 0 & J <= length(Y)]
|
||||||
switch(method,
|
switch(method,
|
||||||
'si' = Y[I] <- min(Y[I], Y[J]), # single linkage
|
'si' = Y[I] <- apply(cbind(Y[I], Y[J]), 1, min), # single linkage
|
||||||
'av' = Y[I] <- Y[I] + Y[J], # average linkage
|
'av' = Y[I] <- Y[I] + Y[J], # average linkage
|
||||||
'co' = Y[I] <- max(Y[I], Y[J]), #complete linkage
|
'co' = Y[I] <- apply(cbind(Y[I], Y[J]), 1, max), #complete linkage
|
||||||
'ce' = {
|
'ce' = {
|
||||||
K <- N[R[i]] + N[R[j]] # centroid linkage
|
K <- N[R[i]] + N[R[j]] # centroid linkage
|
||||||
Y[I] <- (N[R[i]] * Y[I] + N[R[j]] * Y[J] -
|
Y[I] <- (N[R[i]] * Y[I] + N[R[j]] * Y[J] -
|
||||||
|
|
@ -61,8 +82,7 @@ linkage <- function(Y, method = 'co') {
|
||||||
Y[J] - N[R[U]] * v) / (N[R[i]] + N[R[j]] + N[R[U]])
|
Y[J] - N[R[U]] * v) / (N[R[i]] + N[R[j]] + N[R[U]])
|
||||||
)
|
)
|
||||||
J <- c(J, i * (m - (i + 1) / 2) - m + j)
|
J <- c(J, i * (m - (i + 1) / 2) - m + j)
|
||||||
Y[J] <- vector() # no need for the cluster information about j
|
Y <- Y[-J] # no need for the cluster information about j
|
||||||
|
|
||||||
# update m, N, R
|
# update m, N, R
|
||||||
m <- m - 1
|
m <- m - 1
|
||||||
N[n + s] <- N[R[i]] + N[R[j]]
|
N[n + s] <- N[R[i]] + N[R[j]]
|
||||||
|
|
|
||||||
|
|
@ -4,17 +4,16 @@
|
||||||
#' @return list containing data and popnames
|
#' @return list containing data and popnames
|
||||||
#' @export
|
#' @export
|
||||||
lueGenePopData <- function (tiedostonNimi) {
|
lueGenePopData <- function (tiedostonNimi) {
|
||||||
|
fid <- readLines(tiedostonNimi)
|
||||||
fid <- load(tiedostonNimi)
|
line <- fid[1] # ensimmäinen rivi
|
||||||
line1 <- readLines(fid)[1] # ensimmäinen rivi
|
line <- fid[2] # toinen rivi
|
||||||
line2 <- readLines(fid)[2] # toinen rivi
|
|
||||||
count <- rivinSisaltamienMjonojenLkm(line)
|
count <- rivinSisaltamienMjonojenLkm(line)
|
||||||
|
|
||||||
line <- readLines(fid)[3]
|
line <- fid[3]
|
||||||
lokusRiveja <- 1
|
lokusRiveja <- 1
|
||||||
while (testaaPop(line) == 0) {
|
while (testaaPop(line) == 0) {
|
||||||
lokusRiveja <- lokusRiveja + 1 # locus row
|
lokusRiveja <- lokusRiveja + 1 # locus row
|
||||||
line <- readLines(fid)[3 + lokusRiveja]
|
line <- fid[2 + lokusRiveja]
|
||||||
}
|
}
|
||||||
|
|
||||||
if (lokusRiveja > 1) {
|
if (lokusRiveja > 1) {
|
||||||
|
|
@ -29,38 +28,34 @@ lueGenePopData <- function (tiedostonNimi) {
|
||||||
ninds <- 0
|
ninds <- 0
|
||||||
poimiNimi <- 1
|
poimiNimi <- 1
|
||||||
digitFormat <- -1
|
digitFormat <- -1
|
||||||
while (line != -1) {
|
while (lokusRiveja < length(fid) - 2) {
|
||||||
line <- readLines(fid)[lokusRiveja + 1]
|
lokusRiveja <- lokusRiveja + 1 # Keeps the loop moving along
|
||||||
lokusRiveja <- lokusRiveja + 1
|
line <- fid[lokusRiveja + 2]
|
||||||
|
|
||||||
if (poimiNimi == 1) {
|
if (poimiNimi == 1) {
|
||||||
# Edellinen rivi oli 'pop'
|
# Edellinen rivi oli 'pop' (previous line was pop)
|
||||||
nimienLkm <- nimienLkm + 1
|
nimienLkm <- nimienLkm + 1
|
||||||
ninds <- ninds + 1
|
ninds <- ninds + 1
|
||||||
if (nimienLkm > size(popnames, 1)) {
|
if (nimienLkm > size(popnames, 1)) {
|
||||||
popnames <- c(popnames, cell(10, 2))
|
popnames <- rbind(popnames, cell(10, 2))
|
||||||
}
|
}
|
||||||
nimi <- lueNimi(line)
|
nimi <- lueNimi(line)
|
||||||
if (digitFormat == -1) {
|
if (digitFormat == -1) {
|
||||||
digitFormat <- selvitaDigitFormat(line)
|
digitFormat <- selvitaDigitFormat(line)
|
||||||
divider <- 10 ^ digitFormat
|
divider <- 10 ^ digitFormat
|
||||||
}
|
}
|
||||||
popnames[nimienLkm, 1] <- nimi #N<>in se on greedyMix:iss<73>kin?!?
|
popnames[nimienLkm, 1] <- nimi #N<>in se on greedyMix:iss<73>kin?!?
|
||||||
popnames[nimienLkm, 2] <- ninds
|
popnames[nimienLkm, 2] <- ninds
|
||||||
poimiNimi <- 0
|
poimiNimi <- 0
|
||||||
|
|
||||||
data <- addAlleles(data, ninds, line, divider)
|
data <- addAlleles(data, ninds, line, divider)
|
||||||
|
|
||||||
} else if (testaaPop(line)) {
|
} else if (testaaPop(line)) {
|
||||||
poimiNimi <- 1
|
poimiNimi <- 1
|
||||||
|
} else if (!is.na(line)) {
|
||||||
} else if (line != -1) {
|
|
||||||
ninds <- ninds + 1
|
ninds <- ninds + 1
|
||||||
data <- addAlleles(data, ninds, line, divider)
|
data <- addAlleles(data, ninds, line, divider)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
data <- data[1:(ninds * 2),]
|
data <- data[1:(ninds * 2), ]
|
||||||
popnames <- popnames[seq_len(nimienLkm),]
|
popnames <- popnames[seq_len(nimienLkm), ]
|
||||||
return(list(data = data, popnames = popnames))
|
return(list(data = data, popnames = popnames))
|
||||||
}
|
}
|
||||||
14
R/lueNimi.R
14
R/lueNimi.R
|
|
@ -1,17 +1,23 @@
|
||||||
#' @title Read the Name
|
#' @title Read the Name
|
||||||
#' @description Reads the line name
|
#' @description Returns the part of the line from the beginning that is before the comma. Useful for returning the name of a GenePop area
|
||||||
#' @param line line
|
#' @param line line
|
||||||
#' @return nimi
|
#' @return nimi
|
||||||
#' @export
|
#' @export
|
||||||
lueNimi <- function(line) {
|
lueNimi <- function(line) {
|
||||||
|
# ==========================================================================
|
||||||
|
# Validation
|
||||||
|
# ==========================================================================
|
||||||
|
if (!grepl(",", line)) {
|
||||||
|
stop("There are no commas in this line")
|
||||||
|
}
|
||||||
# Palauttaa line:n alusta sen osan, joka on ennen pilkkua.
|
# Palauttaa line:n alusta sen osan, joka on ennen pilkkua.
|
||||||
n <- 1
|
n <- 1
|
||||||
merkki <- line[n]
|
merkki <- substring(line, n, n)
|
||||||
nimi <- ''
|
nimi <- ''
|
||||||
while (merkki != ',') {
|
while (merkki != ',') {
|
||||||
nimi <- c(nimi, merkki)
|
nimi <- c(nimi, merkki)
|
||||||
n <- n + 1
|
n <- n + 1
|
||||||
merkki <- line[n]
|
merkki <- substring(line, n, n)
|
||||||
}
|
}
|
||||||
return(nimi)
|
return(paste(nimi, collapse=""))
|
||||||
}
|
}
|
||||||
160
R/matlab2r.R
Normal file
160
R/matlab2r.R
Normal file
|
|
@ -0,0 +1,160 @@
|
||||||
|
#' @title Convert Matlab function to R
|
||||||
|
#' @description Performs basic syntax conversion from Matlab to R
|
||||||
|
#' @param filename name of the file
|
||||||
|
#' @param output can be "asis", "clean", "save" or "diff"
|
||||||
|
#' @param improve_formatting if `TRUE` (default), makes minor changes
|
||||||
|
#' to conform to best-practice formatting conventions
|
||||||
|
#' @param change_assignment if `TRUE` (default), uses `<-` as the assignment operator
|
||||||
|
#' @param append if `FALSE` (default), overwrites file; otherwise, append
|
||||||
|
#' output to input
|
||||||
|
#' @return text converted to R, printed to screen or replacing input file
|
||||||
|
#' @author Waldir Leoncio
|
||||||
|
#' @importFrom utils write.table
|
||||||
|
#' @export
|
||||||
|
#' @note This function is intended to expedite the process of converting a
|
||||||
|
#' Matlab function to R by making common replacements. It does not have the
|
||||||
|
#' immediate goal of outputting a ready-to-use function. In other words,
|
||||||
|
#' after using this function you should go back to it and make minor changes.
|
||||||
|
#'
|
||||||
|
#' It is also advised to do a dry-run with `output = "clean"` and only switching
|
||||||
|
#' to `output = "save"` when you are confident that no important code will be
|
||||||
|
#' lost (for shorter functions, a careful visual inspection should suffice).
|
||||||
|
matlab2r <- function(
|
||||||
|
filename, output = "diff", improve_formatting=TRUE, change_assignment=TRUE,
|
||||||
|
append=FALSE
|
||||||
|
) {
|
||||||
|
# TODO: this function is too long! Split into subfunctions
|
||||||
|
# (say, by rule and/or section)
|
||||||
|
# ======================================================== #
|
||||||
|
# Verification #
|
||||||
|
# ======================================================== #
|
||||||
|
if (!file.exists(filename)) stop("File not found")
|
||||||
|
|
||||||
|
# ======================================================== #
|
||||||
|
# Reading file into R #
|
||||||
|
# ======================================================== #
|
||||||
|
txt <- readLines(filename)
|
||||||
|
original <- txt
|
||||||
|
|
||||||
|
# ======================================================== #
|
||||||
|
# Replacing text #
|
||||||
|
# ======================================================== #
|
||||||
|
|
||||||
|
# Uncommenting ------------------------------------------- #
|
||||||
|
txt <- gsub("^#\\s?(.+)", "\\1", txt)
|
||||||
|
|
||||||
|
# Output variable ---------------------------------------- #
|
||||||
|
out <- gsub(
|
||||||
|
pattern = "\\t*function ((\\S|\\,\\s)+)\\s?=\\s?(\\w+)\\((.+)\\)",
|
||||||
|
replacement = "\\1",
|
||||||
|
x = txt[1]
|
||||||
|
) # TODO: improve by detecting listed outputs
|
||||||
|
if (substring(out, 1, 1) == "[") {
|
||||||
|
out <- strsplit(out, "(\\,|\\[|\\]|\\s)")[[1]]
|
||||||
|
out <- out[which(out != "")]
|
||||||
|
out <- sapply(seq_along(out), function(x) paste(out[x], "=", out[x]))
|
||||||
|
out <- paste0("list(", paste(out, collapse=", "), ")")
|
||||||
|
}
|
||||||
|
|
||||||
|
# Function header ---------------------------------------- #
|
||||||
|
txt <- gsub(
|
||||||
|
pattern = "\\t*function (.+)\\s*=\\s*(.+)\\((.+)\\)",
|
||||||
|
replacement = "\\2 <- function(\\3) {",
|
||||||
|
x = txt
|
||||||
|
)
|
||||||
|
txt <- gsub(
|
||||||
|
pattern = "function (.+)\\((.+)\\)",
|
||||||
|
replacement = "\\1 <- function(\\2) {",
|
||||||
|
x = txt
|
||||||
|
)
|
||||||
|
|
||||||
|
# Function body ------------------------------------------ #
|
||||||
|
txt <- gsub("(.+)\\.\\.\\.", "\\1", txt)
|
||||||
|
txt <- gsub(";", "", txt)
|
||||||
|
|
||||||
|
# Loops and if-statements
|
||||||
|
txt <- gsub("for (.+)=(.+)", "for (\\1 in \\2) {", txt)
|
||||||
|
txt <- gsub("end$", "}", txt)
|
||||||
|
txt <- gsub("if (.+)", "if (\\1) {", txt) # FIXME: paste comments after {
|
||||||
|
txt <- gsub("else$", "} else {", txt)
|
||||||
|
txt <- gsub("elseif", "} else if", txt)
|
||||||
|
txt <- gsub("while (.+)", "while \\1 {", txt)
|
||||||
|
|
||||||
|
# MATLAB-equivalent functions in R
|
||||||
|
txt <- gsub("gamma_ln", "log_gamma", txt)
|
||||||
|
txt <- gsub("nchoosek", "choose", txt)
|
||||||
|
txt <- gsub("isempty", "is.null", txt)
|
||||||
|
# txt <- gsub("(.+)\\'", "t(\\1)", txt)
|
||||||
|
|
||||||
|
# Subsets ------------------------------------------------ #
|
||||||
|
ass_op <- ifelse(change_assignment, "<-", "=")
|
||||||
|
txt <- gsub(
|
||||||
|
pattern = "([^\\(]+)\\(([^\\(]+)\\)=(.+)",
|
||||||
|
replacement = paste0("\\1[\\2] ", ass_op, "\\3"),
|
||||||
|
x = txt
|
||||||
|
)
|
||||||
|
txt <- gsub("\\(:\\)", "[, ]", txt)
|
||||||
|
txt <- gsub("(.+)(\\[|\\():,end(\\]|\\()", "\\1[, ncol()]", txt)
|
||||||
|
|
||||||
|
# Formatting --------------------------------------------- #
|
||||||
|
if (improve_formatting) {
|
||||||
|
txt <- gsub("(.),(\\S)", "\\1, \\2", txt)
|
||||||
|
# Math operators
|
||||||
|
txt <- gsub("(\\S)\\+(\\S)", "\\1 + \\2", txt)
|
||||||
|
txt <- gsub("(\\S)\\-(\\S)", "\\1 - \\2", txt)
|
||||||
|
txt <- gsub("(\\S)\\*(\\S)", "\\1 * \\2", txt)
|
||||||
|
txt <- gsub("(\\S)\\/(\\S)", "\\1 / \\2", txt)
|
||||||
|
# Logic operators
|
||||||
|
txt <- gsub("~", "!", txt)
|
||||||
|
txt <- gsub("(\\S)>=(\\S)", "\\1 >= \\2", txt)
|
||||||
|
txt <- gsub("(\\S)<=(\\S)", "\\1 <= \\2", txt)
|
||||||
|
txt <- gsub("(\\S)==(\\S)", "\\1 == \\2", txt)
|
||||||
|
# Assignment
|
||||||
|
txt <- gsub(
|
||||||
|
pattern = "(\\w)(\\s?)=(\\s?)(\\w)",
|
||||||
|
replacement = paste0("\\1 ", ass_op, " \\4"),
|
||||||
|
x = txt
|
||||||
|
)
|
||||||
|
# txt <- gsub(
|
||||||
|
# pattern = "(\\s+(.|\\_|\\[|\\])+)(\\s?)=(\\s?)(.+)",
|
||||||
|
# replacement = paste0("\\1 ", ass_op, "\\5"),
|
||||||
|
# x = txt
|
||||||
|
# )
|
||||||
|
txt <- gsub("%(\\s?)(\\w)", "# \\2", txt)
|
||||||
|
}
|
||||||
|
|
||||||
|
# Adding output and end-of-file brace -------------------- #
|
||||||
|
txt <- append(txt, paste0("\treturn(", out, ")\n}"))
|
||||||
|
|
||||||
|
# Returning converted code ------------------------------- #
|
||||||
|
if (output == "asis") {
|
||||||
|
return(txt)
|
||||||
|
} else if (output == "clean") {
|
||||||
|
return(cat(txt, sep="\n"))
|
||||||
|
} else if (output == "save") {
|
||||||
|
return(
|
||||||
|
write.table(
|
||||||
|
x = txt,
|
||||||
|
file = filename,
|
||||||
|
quote = FALSE,
|
||||||
|
row.names = FALSE,
|
||||||
|
col.names = FALSE,
|
||||||
|
append = append
|
||||||
|
)
|
||||||
|
)
|
||||||
|
} else if (output == "diff") {
|
||||||
|
diff_text <- vector(mode="character", length=(2 * length(original) + 1))
|
||||||
|
for (i in seq_along(txt)) {
|
||||||
|
new_i <- (2 * i) + i - 2
|
||||||
|
diff_text[new_i] <- paste(
|
||||||
|
"-----------------------", "line", i, "-----------------------"
|
||||||
|
)
|
||||||
|
diff_text[new_i + 1] <- original[i]
|
||||||
|
diff_text[new_i + 2] <- txt[i]
|
||||||
|
}
|
||||||
|
message("Displaying line number, original content and modified content")
|
||||||
|
return(cat(diff_text, sep="\n"))
|
||||||
|
} else {
|
||||||
|
stop ("Invalid output argument")
|
||||||
|
}
|
||||||
|
}
|
||||||
31
R/min_max_MATLAB.R
Normal file
31
R/min_max_MATLAB.R
Normal file
|
|
@ -0,0 +1,31 @@
|
||||||
|
#' @title Minimum (MATLAB version)
|
||||||
|
#' @description Finds the minimum value for each column of a matrix, potentially returning the indices instead
|
||||||
|
#' @param X matrix
|
||||||
|
#' @param indices return indices?
|
||||||
|
#' @return Either a list or a vector
|
||||||
|
#' @author Waldir Leoncio
|
||||||
|
min_MATLAB <- function(X, indices = TRUE) {
|
||||||
|
mins <- apply(X, 2, min)
|
||||||
|
idx <- sapply(seq_len(ncol(X)), function(x) match(mins[x], X[, x]))
|
||||||
|
if (indices) {
|
||||||
|
return(list(mins = mins, idx = idx))
|
||||||
|
} else {
|
||||||
|
return(mins)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#' @title Maximum (MATLAB version)
|
||||||
|
#' @description Finds the minimum value for each column of a matrix, potentially returning the indices instead
|
||||||
|
#' @param X matrix
|
||||||
|
#' @param indices return indices?
|
||||||
|
#' @return Either a list or a vector
|
||||||
|
#' @author Waldir Leoncio
|
||||||
|
max_MATLAB <- function(X, indices = TRUE) {
|
||||||
|
maxs <- apply(X, 2, max)
|
||||||
|
idx <- sapply(seq_len(ncol(X)), function(x) match(maxs[x], X[, x]))
|
||||||
|
if (indices) {
|
||||||
|
return(list(maxs = maxs, idx = idx))
|
||||||
|
} else {
|
||||||
|
return(maxs)
|
||||||
|
}
|
||||||
|
}
|
||||||
10
R/nargin.R
Normal file
10
R/nargin.R
Normal file
|
|
@ -0,0 +1,10 @@
|
||||||
|
#' @title Number of function input arguments
|
||||||
|
#' @description Returns the number of arguments passed to the parent function
|
||||||
|
#' @return An integer
|
||||||
|
#' @author Waldir Leoncio
|
||||||
|
#' @note This function only makes sense inside another function
|
||||||
|
#' @references https://stackoverflow.com/q/64422780/1169233
|
||||||
|
nargin <- function() {
|
||||||
|
if(sys.nframe() < 2) stop("must be called from inside a function")
|
||||||
|
length(as.list(sys.call(-1))) - 1
|
||||||
|
}
|
||||||
|
|
@ -5,14 +5,14 @@ newGetDistances <- function(data, rowsFromInd) {
|
||||||
|
|
||||||
empties <- find(data < 0)
|
empties <- find(data < 0)
|
||||||
data[empties] <- 0
|
data[empties] <- 0
|
||||||
data <- as.integer(data) # max(noalle) oltava <256
|
data <- apply(data, 2, as.numeric) # max(noalle) oltava <256
|
||||||
|
|
||||||
pariTaulu <- zeros(riviLkm, 2)
|
pariTaulu <- zeros(riviLkm, 2)
|
||||||
aPointer <- 1
|
aPointer <- 1
|
||||||
for (a in (1:ninds) - 1) {
|
for (a in 1:(ninds - 1)) {
|
||||||
pariTaulu[aPointer:(aPointer + ninds - 1 - a), 1] <-
|
pariTaulu_rows <- aPointer:(aPointer + ninds - 1 - a)
|
||||||
ones(ninds - a, 1) * a
|
pariTaulu[pariTaulu_rows, 1] <- ones(ninds - a, 1) * a
|
||||||
pariTaulu[aPointer:aPointer + ninds - 1 - a, 2] <- t(a + 1:ninds)
|
pariTaulu[pariTaulu_rows, 2] <- t((a + 1):ninds)
|
||||||
aPointer <- aPointer + ninds - a
|
aPointer <- aPointer + ninds - a
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -31,9 +31,9 @@ newGetDistances <- function(data, rowsFromInd) {
|
||||||
rm(pariTaulu, miinus)
|
rm(pariTaulu, miinus)
|
||||||
|
|
||||||
x <- zeros(size(eka))
|
x <- zeros(size(eka))
|
||||||
x <- as.integer(x)
|
x <- apply(x, 2, as.integer)
|
||||||
y <- zeros(size(toka))
|
y <- zeros(size(toka))
|
||||||
y <- as.integer(y)
|
y <- apply(y, 2, as.integer)
|
||||||
|
|
||||||
for (j in 1:nloci) {
|
for (j in 1:nloci) {
|
||||||
for (k in 1:rowsFromInd) {
|
for (k in 1:rowsFromInd) {
|
||||||
|
|
@ -57,7 +57,6 @@ newGetDistances <- function(data, rowsFromInd) {
|
||||||
muut <- find(vertailuja > 0)
|
muut <- find(vertailuja > 0)
|
||||||
dist[muut] <- summa[muut] / vertailuja[muut]
|
dist[muut] <- summa[muut] / vertailuja[muut]
|
||||||
rm(summa, vertailuja)
|
rm(summa, vertailuja)
|
||||||
|
Z <- linkage(t(dist))
|
||||||
Z = linkage(t(dist))
|
|
||||||
return(list(Z = Z, dist = dist))
|
return(list(Z = Z, dist = dist))
|
||||||
}
|
}
|
||||||
|
|
@ -41,7 +41,7 @@ poistaLiianPienet <- function (npops, rowsFromInd, alaraja,
|
||||||
PARTITION[yksilot] == n
|
PARTITION[yksilot] == n
|
||||||
}
|
}
|
||||||
|
|
||||||
# TODO: add COUNTS, SUMCOUNTS and PARTITION to return or use <<-
|
# TODO: add COUNTS, SUMCOUNTS and PARTITION to return or use <-
|
||||||
COUNTS[, , miniPops] <- NA
|
COUNTS[, , miniPops] <- NA
|
||||||
SUMCOUNTS[miniPops, ] <- NA
|
SUMCOUNTS[miniPops, ] <- NA
|
||||||
|
|
||||||
|
|
|
||||||
15
R/poistaTyhjatPopulaatiot.R
Normal file
15
R/poistaTyhjatPopulaatiot.R
Normal file
|
|
@ -0,0 +1,15 @@
|
||||||
|
poistaTyhjatPopulaatiot <- function(npops) {
|
||||||
|
# % Poistaa tyhjentyneet populaatiot COUNTS:ista ja
|
||||||
|
# % SUMCOUNTS:ista. P<>ivitt<74><74> npops:in ja PARTITION:in.
|
||||||
|
notEmpty <- find(any(SUMCOUNTS, 2))
|
||||||
|
COUNTS <- COUNTS[, , notEmpty]
|
||||||
|
SUMCOUNTS <- SUMCOUNTS[notEmpty, ]
|
||||||
|
LOGDIFF <- LOGDIFF[, notEmpty]
|
||||||
|
|
||||||
|
for (n in 1:length(notEmpty)) {
|
||||||
|
apu <- find(PARTITION == notEmpty(n))
|
||||||
|
PARTITION[apu] <- n
|
||||||
|
}
|
||||||
|
npops <- length(notEmpty)
|
||||||
|
return(npops)
|
||||||
|
}
|
||||||
7
R/rand_disc.R
Normal file
7
R/rand_disc.R
Normal file
|
|
@ -0,0 +1,7 @@
|
||||||
|
rand_disc <- function(CDF) {
|
||||||
|
# %returns an index of a value from a discrete distribution using inversion method
|
||||||
|
slump <- rand
|
||||||
|
har <- find(CDF > slump)
|
||||||
|
svar <- har(1)
|
||||||
|
return(svar)
|
||||||
|
}
|
||||||
26
R/returnInOrder.R
Normal file
26
R/returnInOrder.R
Normal file
|
|
@ -0,0 +1,26 @@
|
||||||
|
returnInOrder <- function(inds, pop, globalRows, data, adjprior, priorTerm) {
|
||||||
|
# % Palauttaa yksil<69>t j<>rjestyksess<73> siten, ett<74> ensimm<6D>isen<65> on
|
||||||
|
# % se, jonka poistaminen populaatiosta pop nostaisi logml:n
|
||||||
|
# % arvoa eniten.
|
||||||
|
|
||||||
|
ninds <- length(inds)
|
||||||
|
apuTaulu <- c(inds, zeros(ninds, 1))
|
||||||
|
|
||||||
|
for (i in 1:ninds) {
|
||||||
|
ind <- inds[i]
|
||||||
|
rows <- globalRows[i, 1]:globalRows[i, 2]
|
||||||
|
diffInCounts <- computeDiffInCounts(
|
||||||
|
rows, size[COUNTS, 1], size[COUNTS, 2], data
|
||||||
|
)
|
||||||
|
diffInSumCounts <- sum(diffInCounts)
|
||||||
|
|
||||||
|
COUNTS[ , ,pop] <- COUNTS[ , ,pop] - diffInCounts
|
||||||
|
SUMCOUNTS[pop, ] <- SUMCOUNTS[pop, ] - diffInSumCounts
|
||||||
|
apuTaulu[i, 2] <- computePopulationLogml(pop, adjprior, priorTerm)
|
||||||
|
COUNTS[ , ,pop] <- COUNTS[ , ,pop] + diffInCounts
|
||||||
|
SUMCOUNTS[pop, ] <- SUMCOUNTS[pop, ] + diffInSumCounts
|
||||||
|
}
|
||||||
|
apuTaulu <- sortrows(apuTaulu, 2)
|
||||||
|
inds <- apuTaulu[ninds:1, 1]
|
||||||
|
return(inds)
|
||||||
|
}
|
||||||
|
|
@ -7,23 +7,22 @@ selvitaDigitFormat <- function(line) {
|
||||||
# Genepop-formaatissa olevasta datasta. funktio selvitt<74><74>
|
# Genepop-formaatissa olevasta datasta. funktio selvitt<74><74>
|
||||||
# rivin muodon perusteella, ovatko datan alleelit annettu
|
# rivin muodon perusteella, ovatko datan alleelit annettu
|
||||||
# 2 vai 3 numeron avulla.
|
# 2 vai 3 numeron avulla.
|
||||||
|
|
||||||
n <- 1
|
n <- 1
|
||||||
merkki <- line[n]
|
merkki <- substring(line, n, n)
|
||||||
while (merkki != ',') {
|
while (merkki != ',') {
|
||||||
n <- n + 1
|
n <- n + 1
|
||||||
merkki <- line[n]
|
merkki <- substring(line, n, n)
|
||||||
}
|
}
|
||||||
|
|
||||||
while (!any(merkki == '0123456789')) {
|
while (!any(merkki %in% as.character(0:9))) {
|
||||||
n <- n + 1
|
n <- n + 1
|
||||||
merkki <- line[n]
|
merkki <- substring(line, n, n)
|
||||||
}
|
}
|
||||||
numeroja <- 0
|
numeroja <- 0
|
||||||
while (any(merkki == '0123456789')) {
|
while (any(merkki %in% as.character(0:9))) {
|
||||||
numeroja <- numeroja + 1
|
numeroja <- numeroja + 1
|
||||||
n <- n + 1
|
n <- n + 1
|
||||||
merkki <- line[n]
|
merkki <- substring(line, n, n)
|
||||||
}
|
}
|
||||||
|
|
||||||
df <- numeroja / 2
|
df <- numeroja / 2
|
||||||
|
|
|
||||||
16
R/setdiff_MATLAB.R
Normal file
16
R/setdiff_MATLAB.R
Normal file
|
|
@ -0,0 +1,16 @@
|
||||||
|
#' @title Set differences of two arrays
|
||||||
|
#' @description Loosely replicates the behavior of the homonym Matlab function
|
||||||
|
#' @param A first array
|
||||||
|
#' @param B second array
|
||||||
|
#' @param legacy if `TRUE`, preserves the behavior of the setdiff function from MATLAB R2012b and prior releases. (currently not supported)
|
||||||
|
#' @author Waldir Leoncio
|
||||||
|
setdiff_MATLAB <- function(A, B, legacy = FALSE) {
|
||||||
|
if (legacy) message("legacy=TRUE not supported. Ignoring.")
|
||||||
|
if (is(A, "numeric") & is(B, "numeric")) {
|
||||||
|
values <- sort(unique(A[is.na(match(A, B))]))
|
||||||
|
} else if (is(A, "data.frame") & is(B, "data.frame")) {
|
||||||
|
stop("Not implemented for data frames")
|
||||||
|
}
|
||||||
|
# TODO: add support for indices (if necessary)
|
||||||
|
return(values)
|
||||||
|
}
|
||||||
64
R/updateGlobalVariables.R
Normal file
64
R/updateGlobalVariables.R
Normal file
|
|
@ -0,0 +1,64 @@
|
||||||
|
updateGlobalVariables <- function(ind, i2, diffInCounts, adjprior, priorTerm) {
|
||||||
|
# % Suorittaa globaalien muuttujien muutokset, kun yksil<69> ind
|
||||||
|
# % on siirret<65><74>n koriin i2.
|
||||||
|
|
||||||
|
i1 <- PARTITION[ind]
|
||||||
|
PARTITION[ind] <- i2
|
||||||
|
|
||||||
|
COUNTS[, , i1] <- COUNTS[, , i1] - diffInCounts
|
||||||
|
COUNTS[, , i2] <- COUNTS[, , i2] + diffInCounts
|
||||||
|
SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] - sum[diffInCounts]
|
||||||
|
SUMCOUNTS[i2, ] <- SUMCOUNTS[i2, ] + sum[diffInCounts]
|
||||||
|
|
||||||
|
POP_LOGML[c(i1, i2)] <- computePopulationLogml(
|
||||||
|
c(i1, i2), adjprior, priorTerm
|
||||||
|
)
|
||||||
|
|
||||||
|
LOGDIFF[, c(i1, i2)] <- -Inf
|
||||||
|
inx <- c(find(PARTITION == i1), find(PARTITION==i2))
|
||||||
|
LOGDIFF[inx, ] <- -Inf
|
||||||
|
}
|
||||||
|
|
||||||
|
updateGlobalVariables2 <- function(i1, i2, diffInCounts, adjprior, priorTerm) {
|
||||||
|
# % Suorittaa globaalien muuttujien muutokset, kun kaikki
|
||||||
|
# % korissa i1 olevat yksil<69>t siirret<65><74>n koriin i2.
|
||||||
|
|
||||||
|
inds <- find(PARTITION == i1)
|
||||||
|
PARTITION[inds] <- i2
|
||||||
|
|
||||||
|
COUNTS[, , i1] <- COUNTS[, , i1] - diffInCounts
|
||||||
|
COUNTS[, , i2] <- COUNTS[, , i2] + diffInCounts
|
||||||
|
SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] - sum[diffInCounts]
|
||||||
|
SUMCOUNTS[i2, ] <- SUMCOUNTS[i2, ] + sum[diffInCounts]
|
||||||
|
|
||||||
|
POP_LOGML[i1] <- 0
|
||||||
|
POP_LOGML[i2] <- computePopulationLogml(i2, adjprior, priorTerm)
|
||||||
|
|
||||||
|
LOGDIFF[, c(i1, i2)] <- -Inf
|
||||||
|
inx <- c(find(PARTITION == i1), find(PARTITION == i2))
|
||||||
|
LOGDIFF[inx, ] <- -Inf
|
||||||
|
}
|
||||||
|
|
||||||
|
updateGlobalVariables3 <- function(
|
||||||
|
muuttuvat, diffInCounts, adjprior, priorTerm, i2
|
||||||
|
) {
|
||||||
|
# % Suorittaa globaalien muuttujien p<>ivitykset, kun yksil<69>t 'muuttuvat'
|
||||||
|
# % siirret<65><74>n koriin i2. Ennen siirtoa yksil<69>iden on kuuluttava samaan
|
||||||
|
# % koriin.
|
||||||
|
|
||||||
|
i1 <- PARTITION[muuttuvat(1)]
|
||||||
|
PARTITION[muuttuvat] <- i2
|
||||||
|
|
||||||
|
COUNTS[, , i1] <- COUNTS[, , i1] - diffInCounts
|
||||||
|
COUNTS[, , i2] <- COUNTS[, , i2] + diffInCounts
|
||||||
|
SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] - sum[diffInCounts]
|
||||||
|
SUMCOUNTS[i2, ] <- SUMCOUNTS[i2, ] + sum[diffInCounts]
|
||||||
|
|
||||||
|
POP_LOGML[c(i1, i2)] <- computePopulationLogml(
|
||||||
|
c(i1, i2), adjprior, priorTerm
|
||||||
|
)
|
||||||
|
|
||||||
|
LOGDIFF[, c(i1, i2)] <- -Inf
|
||||||
|
inx <- c(find(PARTITION == i1), find(PARTITION == i2))
|
||||||
|
LOGDIFF[inx, ] <- -Inf
|
||||||
|
}
|
||||||
|
|
@ -10,16 +10,10 @@
|
||||||
#' @param partitionSummary partitionSummary
|
#' @param partitionSummary partitionSummary
|
||||||
#' @param popnames popnames
|
#' @param popnames popnames
|
||||||
#' @param fixedK fixedK
|
#' @param fixedK fixedK
|
||||||
#' @param PARTITION PARTITION
|
|
||||||
#' @param COUNTS COUNTS
|
|
||||||
#' @param SUMCOUNTS SUMCOUNTS
|
|
||||||
#' @param LOGDIFF LOGDIFF
|
|
||||||
#' @export
|
#' @export
|
||||||
writeMixtureInfo <- function(
|
writeMixtureInfo <- function(
|
||||||
logml, rowsFromInd, data, adjprior, priorTerm, outPutFile, inputFile, partitionSummary, popnames, fixedK, PARTITION, COUNTS, SUMCOUNTS,
|
logml, rowsFromInd, data, adjprior, priorTerm, outPutFile, inputFile, partitionSummary, popnames, fixedK
|
||||||
LOGDIFF
|
|
||||||
) {
|
) {
|
||||||
|
|
||||||
changesInLogml <- list()
|
changesInLogml <- list()
|
||||||
ninds <- size(data, 1) / rowsFromInd
|
ninds <- size(data, 1) / rowsFromInd
|
||||||
npops <- size(COUNTS, 3)
|
npops <- size(COUNTS, 3)
|
||||||
|
|
@ -30,7 +24,6 @@ writeMixtureInfo <- function(
|
||||||
fid <- load(outPutFile)
|
fid <- load(outPutFile)
|
||||||
} else {
|
} else {
|
||||||
fid <- -1
|
fid <- -1
|
||||||
message('Diverting output to baps4_output.baps')
|
|
||||||
# TODO: replace sink with option that will record input and output
|
# TODO: replace sink with option that will record input and output
|
||||||
sink('baps4_output.baps', split=TRUE) # save in text anyway.
|
sink('baps4_output.baps', split=TRUE) # save in text anyway.
|
||||||
}
|
}
|
||||||
|
|
|
||||||
18
man/admixture_initialization.Rd
Normal file
18
man/admixture_initialization.Rd
Normal file
|
|
@ -0,0 +1,18 @@
|
||||||
|
% Generated by roxygen2: do not edit by hand
|
||||||
|
% Please edit documentation in R/admixture_initialization.R
|
||||||
|
\name{admixture_initialization}
|
||||||
|
\alias{admixture_initialization}
|
||||||
|
\title{Seuraavat kolme funktiota liittyvat alkupartition muodostamiseen.}
|
||||||
|
\usage{
|
||||||
|
admixture_initialization(data_matrix, nclusters, Z)
|
||||||
|
}
|
||||||
|
\arguments{
|
||||||
|
\item{data_matrix}{data_matrix}
|
||||||
|
|
||||||
|
\item{nclusters}{ncluster}
|
||||||
|
|
||||||
|
\item{Z}{Z}
|
||||||
|
}
|
||||||
|
\description{
|
||||||
|
Seuraavat kolme funktiota liittyvat alkupartition muodostamiseen.
|
||||||
|
}
|
||||||
23
man/fgetl.Rd
Normal file
23
man/fgetl.Rd
Normal file
|
|
@ -0,0 +1,23 @@
|
||||||
|
% Generated by roxygen2: do not edit by hand
|
||||||
|
% Please edit documentation in R/fgetl-fopen.R
|
||||||
|
\name{fgetl}
|
||||||
|
\alias{fgetl}
|
||||||
|
\title{Read line from file, removing newline characters}
|
||||||
|
\usage{
|
||||||
|
fgetl(file)
|
||||||
|
}
|
||||||
|
\arguments{
|
||||||
|
\item{file}{character vector to be read, usually an output of `fopen()`}
|
||||||
|
}
|
||||||
|
\value{
|
||||||
|
If the file is nonempty, then fgetl returns tline as a character vector. If the file is empty and contains only the end-of-file marker, then fgetl returns tline as a numeric value -1.
|
||||||
|
}
|
||||||
|
\description{
|
||||||
|
Equivalent function to its homonymous Matlab equivalent.
|
||||||
|
}
|
||||||
|
\seealso{
|
||||||
|
fopen
|
||||||
|
}
|
||||||
|
\author{
|
||||||
|
Waldir Leoncio
|
||||||
|
}
|
||||||
23
man/fopen.Rd
Normal file
23
man/fopen.Rd
Normal file
|
|
@ -0,0 +1,23 @@
|
||||||
|
% Generated by roxygen2: do not edit by hand
|
||||||
|
% Please edit documentation in R/fgetl-fopen.R
|
||||||
|
\name{fopen}
|
||||||
|
\alias{fopen}
|
||||||
|
\title{Open file}
|
||||||
|
\usage{
|
||||||
|
fopen(filename)
|
||||||
|
}
|
||||||
|
\arguments{
|
||||||
|
\item{filename}{Path and name of file to be open}
|
||||||
|
}
|
||||||
|
\value{
|
||||||
|
The same as `readLines(filename)`
|
||||||
|
}
|
||||||
|
\description{
|
||||||
|
Open a text file
|
||||||
|
}
|
||||||
|
\seealso{
|
||||||
|
fgetl
|
||||||
|
}
|
||||||
|
\author{
|
||||||
|
Waldir Leoncio
|
||||||
|
}
|
||||||
|
|
@ -1,8 +1,8 @@
|
||||||
% Generated by roxygen2: do not edit by hand
|
% Generated by roxygen2: do not edit by hand
|
||||||
% Please edit documentation in R/laskeMuutokset4.R
|
% Please edit documentation in R/laskeMuutokset12345.R
|
||||||
\name{laskeMuutokset4}
|
\name{laskeMuutokset4}
|
||||||
\alias{laskeMuutokset4}
|
\alias{laskeMuutokset4}
|
||||||
\title{Calculate changes?}
|
\title{Calculate changes (?)}
|
||||||
\usage{
|
\usage{
|
||||||
laskeMuutokset4(osuus, osuusTaulu, omaFreqs, logml, COUNTS = matrix(0))
|
laskeMuutokset4(osuus, osuusTaulu, omaFreqs, logml, COUNTS = matrix(0))
|
||||||
}
|
}
|
||||||
|
|
@ -20,6 +20,6 @@ laskeMuutokset4(osuus, osuusTaulu, omaFreqs, logml, COUNTS = matrix(0))
|
||||||
\description{
|
\description{
|
||||||
Palauttaa npops*npops taulun, jonka alkio (i,j) kertoo, mik?on
|
Palauttaa npops*npops taulun, jonka alkio (i,j) kertoo, mik?on
|
||||||
muutos logml:ss? mikäli populaatiosta i siirretään osuuden verran
|
muutos logml:ss? mikäli populaatiosta i siirretään osuuden verran
|
||||||
todennäköisyysmassaa populaatioon j. Mikäli populaatiossa i ei ole mitään
|
todennäköisyysmassaa populaatioon j. Mikäli populaatiossa i ei ole mitään
|
||||||
siirrettävää, on vastaavassa kohdassa rivi nollia.
|
siirrettävää, on vastaavassa kohdassa rivi nollia.
|
||||||
}
|
}
|
||||||
|
|
|
||||||
21
man/laskeOsaDist.Rd
Normal file
21
man/laskeOsaDist.Rd
Normal file
|
|
@ -0,0 +1,21 @@
|
||||||
|
% Generated by roxygen2: do not edit by hand
|
||||||
|
% Please edit documentation in R/laskeOsaDist.R
|
||||||
|
\name{laskeOsaDist}
|
||||||
|
\alias{laskeOsaDist}
|
||||||
|
\title{Lower part of the dist}
|
||||||
|
\usage{
|
||||||
|
laskeOsaDist(inds2, dist, ninds)
|
||||||
|
}
|
||||||
|
\arguments{
|
||||||
|
\item{inds2}{inds2}
|
||||||
|
|
||||||
|
\item{dist}{dist}
|
||||||
|
|
||||||
|
\item{ninds}{ninds}
|
||||||
|
}
|
||||||
|
\description{
|
||||||
|
Constructs from the dist vector a subvector containing the individual inds2, Forms dist sub-vectors the vector, which includes yksiliden inds2
|
||||||
|
}
|
||||||
|
\author{
|
||||||
|
Waldir Leoncio
|
||||||
|
}
|
||||||
|
|
@ -7,7 +7,7 @@
|
||||||
linkage(Y, method = "co")
|
linkage(Y, method = "co")
|
||||||
}
|
}
|
||||||
\arguments{
|
\arguments{
|
||||||
\item{Y}{data}
|
\item{Y}{matrix}
|
||||||
|
|
||||||
\item{method}{either 'si', 'av', 'co' 'ce' or 'wa'}
|
\item{method}{either 'si', 'av', 'co' 'ce' or 'wa'}
|
||||||
}
|
}
|
||||||
|
|
@ -19,4 +19,9 @@ Z = LINKAGE(Y) creates a hierarchical cluster tree, using the single
|
||||||
linkage algorithm. The input Y is a distance matrix such as is generated by
|
linkage algorithm. The input Y is a distance matrix such as is generated by
|
||||||
PDIST. Y may also be a more general dissimilarity matrix conforming to the
|
PDIST. Y may also be a more general dissimilarity matrix conforming to the
|
||||||
output format of PDIST.
|
output format of PDIST.
|
||||||
|
|
||||||
|
Z = linkage(X) returns a matrix Z that encodes a tree containing hierarchical clusters of the rows of the input data matrix X.
|
||||||
|
}
|
||||||
|
\note{
|
||||||
|
This is also a base Matlab function. The reason why the source code is also present here is unclear.
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -13,5 +13,5 @@ lueNimi(line)
|
||||||
nimi
|
nimi
|
||||||
}
|
}
|
||||||
\description{
|
\description{
|
||||||
Reads the line name
|
Returns the part of the line from the beginning that is before the comma. Useful for returning the name of a GenePop area
|
||||||
}
|
}
|
||||||
|
|
|
||||||
46
man/matlab2r.Rd
Normal file
46
man/matlab2r.Rd
Normal file
|
|
@ -0,0 +1,46 @@
|
||||||
|
% Generated by roxygen2: do not edit by hand
|
||||||
|
% Please edit documentation in R/matlab2r.R
|
||||||
|
\name{matlab2r}
|
||||||
|
\alias{matlab2r}
|
||||||
|
\title{Convert Matlab function to R}
|
||||||
|
\usage{
|
||||||
|
matlab2r(
|
||||||
|
filename,
|
||||||
|
output = "diff",
|
||||||
|
improve_formatting = TRUE,
|
||||||
|
change_assignment = TRUE,
|
||||||
|
append = FALSE
|
||||||
|
)
|
||||||
|
}
|
||||||
|
\arguments{
|
||||||
|
\item{filename}{name of the file}
|
||||||
|
|
||||||
|
\item{output}{can be "asis", "clean", "save" or "diff"}
|
||||||
|
|
||||||
|
\item{improve_formatting}{if `TRUE` (default), makes minor changes
|
||||||
|
to conform to best-practice formatting conventions}
|
||||||
|
|
||||||
|
\item{change_assignment}{if `TRUE` (default), uses `<-` as the assignment operator}
|
||||||
|
|
||||||
|
\item{append}{if `FALSE` (default), overwrites file; otherwise, append
|
||||||
|
output to input}
|
||||||
|
}
|
||||||
|
\value{
|
||||||
|
text converted to R, printed to screen or replacing input file
|
||||||
|
}
|
||||||
|
\description{
|
||||||
|
Performs basic syntax conversion from Matlab to R
|
||||||
|
}
|
||||||
|
\note{
|
||||||
|
This function is intended to expedite the process of converting a
|
||||||
|
Matlab function to R by making common replacements. It does not have the
|
||||||
|
immediate goal of outputting a ready-to-use function. In other words,
|
||||||
|
after using this function you should go back to it and make minor changes.
|
||||||
|
|
||||||
|
It is also advised to do a dry-run with `output = "clean"` and only switching
|
||||||
|
to `output = "save"` when you are confident that no important code will be
|
||||||
|
lost (for shorter functions, a careful visual inspection should suffice).
|
||||||
|
}
|
||||||
|
\author{
|
||||||
|
Waldir Leoncio
|
||||||
|
}
|
||||||
22
man/max_MATLAB.Rd
Normal file
22
man/max_MATLAB.Rd
Normal file
|
|
@ -0,0 +1,22 @@
|
||||||
|
% Generated by roxygen2: do not edit by hand
|
||||||
|
% Please edit documentation in R/min_max_MATLAB.R
|
||||||
|
\name{max_MATLAB}
|
||||||
|
\alias{max_MATLAB}
|
||||||
|
\title{Maximum (MATLAB version)}
|
||||||
|
\usage{
|
||||||
|
max_MATLAB(X, indices = TRUE)
|
||||||
|
}
|
||||||
|
\arguments{
|
||||||
|
\item{X}{matrix}
|
||||||
|
|
||||||
|
\item{indices}{return indices?}
|
||||||
|
}
|
||||||
|
\value{
|
||||||
|
Either a list or a vector
|
||||||
|
}
|
||||||
|
\description{
|
||||||
|
Finds the minimum value for each column of a matrix, potentially returning the indices instead
|
||||||
|
}
|
||||||
|
\author{
|
||||||
|
Waldir Leoncio
|
||||||
|
}
|
||||||
22
man/min_MATLAB.Rd
Normal file
22
man/min_MATLAB.Rd
Normal file
|
|
@ -0,0 +1,22 @@
|
||||||
|
% Generated by roxygen2: do not edit by hand
|
||||||
|
% Please edit documentation in R/min_max_MATLAB.R
|
||||||
|
\name{min_MATLAB}
|
||||||
|
\alias{min_MATLAB}
|
||||||
|
\title{Minimum (MATLAB version)}
|
||||||
|
\usage{
|
||||||
|
min_MATLAB(X, indices = TRUE)
|
||||||
|
}
|
||||||
|
\arguments{
|
||||||
|
\item{X}{matrix}
|
||||||
|
|
||||||
|
\item{indices}{return indices?}
|
||||||
|
}
|
||||||
|
\value{
|
||||||
|
Either a list or a vector
|
||||||
|
}
|
||||||
|
\description{
|
||||||
|
Finds the minimum value for each column of a matrix, potentially returning the indices instead
|
||||||
|
}
|
||||||
|
\author{
|
||||||
|
Waldir Leoncio
|
||||||
|
}
|
||||||
23
man/nargin.Rd
Normal file
23
man/nargin.Rd
Normal file
|
|
@ -0,0 +1,23 @@
|
||||||
|
% Generated by roxygen2: do not edit by hand
|
||||||
|
% Please edit documentation in R/nargin.R
|
||||||
|
\name{nargin}
|
||||||
|
\alias{nargin}
|
||||||
|
\title{Number of function input arguments}
|
||||||
|
\usage{
|
||||||
|
nargin()
|
||||||
|
}
|
||||||
|
\value{
|
||||||
|
An integer
|
||||||
|
}
|
||||||
|
\description{
|
||||||
|
Returns the number of arguments passed to the parent function
|
||||||
|
}
|
||||||
|
\note{
|
||||||
|
This function only makes sense inside another function
|
||||||
|
}
|
||||||
|
\references{
|
||||||
|
https://stackoverflow.com/q/64422780/1169233
|
||||||
|
}
|
||||||
|
\author{
|
||||||
|
Waldir Leoncio
|
||||||
|
}
|
||||||
21
man/setdiff_MATLAB.Rd
Normal file
21
man/setdiff_MATLAB.Rd
Normal file
|
|
@ -0,0 +1,21 @@
|
||||||
|
% Generated by roxygen2: do not edit by hand
|
||||||
|
% Please edit documentation in R/setdiff_MATLAB.R
|
||||||
|
\name{setdiff_MATLAB}
|
||||||
|
\alias{setdiff_MATLAB}
|
||||||
|
\title{Set differences of two arrays}
|
||||||
|
\usage{
|
||||||
|
setdiff_MATLAB(A, B, legacy = FALSE)
|
||||||
|
}
|
||||||
|
\arguments{
|
||||||
|
\item{A}{first array}
|
||||||
|
|
||||||
|
\item{B}{second array}
|
||||||
|
|
||||||
|
\item{legacy}{if `TRUE`, preserves the behavior of the setdiff function from MATLAB R2012b and prior releases. (currently not supported)}
|
||||||
|
}
|
||||||
|
\description{
|
||||||
|
Loosely replicates the behavior of the homonym Matlab function
|
||||||
|
}
|
||||||
|
\author{
|
||||||
|
Waldir Leoncio
|
||||||
|
}
|
||||||
|
|
@ -14,11 +14,7 @@ writeMixtureInfo(
|
||||||
inputFile,
|
inputFile,
|
||||||
partitionSummary,
|
partitionSummary,
|
||||||
popnames,
|
popnames,
|
||||||
fixedK,
|
fixedK
|
||||||
PARTITION,
|
|
||||||
COUNTS,
|
|
||||||
SUMCOUNTS,
|
|
||||||
LOGDIFF
|
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
\arguments{
|
\arguments{
|
||||||
|
|
@ -41,14 +37,6 @@ writeMixtureInfo(
|
||||||
\item{popnames}{popnames}
|
\item{popnames}{popnames}
|
||||||
|
|
||||||
\item{fixedK}{fixedK}
|
\item{fixedK}{fixedK}
|
||||||
|
|
||||||
\item{PARTITION}{PARTITION}
|
|
||||||
|
|
||||||
\item{COUNTS}{COUNTS}
|
|
||||||
|
|
||||||
\item{SUMCOUNTS}{SUMCOUNTS}
|
|
||||||
|
|
||||||
\item{LOGDIFF}{LOGDIFF}
|
|
||||||
}
|
}
|
||||||
\description{
|
\description{
|
||||||
Writes information about the mixture
|
Writes information about the mixture
|
||||||
|
|
|
||||||
|
|
@ -158,18 +158,18 @@ test_that("find works as expected", {
|
||||||
})
|
})
|
||||||
|
|
||||||
test_that("sortrows works as expected", {
|
test_that("sortrows works as expected", {
|
||||||
mx <- matrix(c(3, 2, 2, 1, 1, 10, 0, pi), 4)
|
mx <- matrix(c(3, 2, 2, 1, 1, 10, 0, pi), 4)
|
||||||
expect_equal(sortrows(mx), matrix(c(1, 2, 2, 3, pi, 10, 0, 1), 4))
|
expect_equal(sortrows(mx), matrix(c(1, 2, 2, 3, pi, 10, 0, 1), 4))
|
||||||
expect_equal(sortrows(mx, 2), matrix(c(2, 3, 1, 2, 0, 1, pi, 10), 4))
|
expect_equal(sortrows(mx, 2), matrix(c(2, 3, 1, 2, 0, 1, pi, 10), 4))
|
||||||
expect_equal(sortrows(mx, 1:2), mx[order(mx[, 1], mx[, 2]), ])
|
expect_equal(sortrows(mx, 1:2), mx[order(mx[, 1], mx[, 2]), ])
|
||||||
})
|
})
|
||||||
|
|
||||||
test_that("cell works as expected", {
|
test_that("cell works as expected", {
|
||||||
expect_equal(cell(0), array(dim = c(0, 0)))
|
expect_equivalent(cell(0), array(0, dim = c(0, 0)))
|
||||||
expect_equal(cell(1), array(dim = c(1, 1)))
|
expect_equivalent(cell(1), array(0, dim = c(1, 1)))
|
||||||
expect_equal(cell(2), array(dim = c(2, 2)))
|
expect_equivalent(cell(2), array(0, dim = c(2, 2)))
|
||||||
expect_equal(cell(3, 4), array(dim = c(3, 4)))
|
expect_equivalent(cell(3, 4), array(0, dim = c(3, 4)))
|
||||||
expect_equal(cell(5, 7, 6), array(dim = c(5, 7, 6)))
|
expect_equivalent(cell(5, 7, 6), array(0, dim = c(5, 7, 6)))
|
||||||
})
|
})
|
||||||
|
|
||||||
test_that("blanks works as expected", {
|
test_that("blanks works as expected", {
|
||||||
|
|
@ -201,4 +201,44 @@ test_that("isspace works as expected", {
|
||||||
X <- '\t a b\tcde f'
|
X <- '\t a b\tcde f'
|
||||||
expect_identical(isspace(chr), c(0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0))
|
expect_identical(isspace(chr), c(0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0))
|
||||||
expect_identical(isspace(X), c(1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0))
|
expect_identical(isspace(X), c(1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0))
|
||||||
|
})
|
||||||
|
|
||||||
|
test_that("nargin works correctly", {
|
||||||
|
addme <- function(a, b) {
|
||||||
|
if (nargin() == 2) {
|
||||||
|
c <- a + b
|
||||||
|
} else if (nargin() == 1) {
|
||||||
|
c <- a + a
|
||||||
|
} else {
|
||||||
|
c <- 0
|
||||||
|
}
|
||||||
|
return(c)
|
||||||
|
}
|
||||||
|
expect_equal(addme(13, 42), 55)
|
||||||
|
expect_equal(addme(13), 26)
|
||||||
|
expect_equal(addme(), 0)
|
||||||
|
})
|
||||||
|
|
||||||
|
test_that("setdiff works as expected", {
|
||||||
|
A <- c(3, 6, 2, 1, 5, 1, 1)
|
||||||
|
B <- c(2, 4, 6)
|
||||||
|
C <- c(1, 3, 5)
|
||||||
|
# expect_equal(setdiff_MATLAB(A, B), C) # TODO: export setdiff_MATLAB
|
||||||
|
A <- data.frame(
|
||||||
|
Var1 = 1:5,
|
||||||
|
Var2 = LETTERS[1:5],
|
||||||
|
Var3 = c(FALSE, TRUE, FALSE, TRUE, FALSE)
|
||||||
|
)
|
||||||
|
B <- data.frame(
|
||||||
|
Var1 = seq(1, 9, by = 2),
|
||||||
|
Var2 = LETTERS[seq(1, 9, by = 2)],
|
||||||
|
Var3 = rep(FALSE, 5)
|
||||||
|
)
|
||||||
|
C <- data.frame(
|
||||||
|
Var1 = c(2, 4),
|
||||||
|
Var2 = c('B', 'D'),
|
||||||
|
Var3 = c(TRUE, TRUE)
|
||||||
|
)
|
||||||
|
# expect_equal(setdiff_MATLAB(A, B), C) # TODO: implement for data frames
|
||||||
|
# TODO: add more examples from https://se.mathworks.com/help/matlab/ref/double.setdiff.html;jsessionid=0d8d42582d4d299b8224403899f1
|
||||||
})
|
})
|
||||||
|
|
@ -1,11 +1,17 @@
|
||||||
# library(devtools)#TEMP
|
|
||||||
# library(testthat)#TEMP
|
|
||||||
# library(rBAPS)#TEMP
|
|
||||||
|
|
||||||
context("Opening files on greedyMix")
|
context("Opening files on greedyMix")
|
||||||
|
|
||||||
# greedyMix(
|
# greedyMix(
|
||||||
# tietue = "data/ExamplesDataFormatting/Example baseline data in GENEPOP format for Trained clustering.txt",
|
# tietue = "inst/ext/ExamplesDataFormatting/Example baseline data in GENEPOP format for Trained clustering.txt",
|
||||||
# format = "GenePop",
|
# format = "GenePop",
|
||||||
# savePreProcessed = FALSE
|
# savePreProcessed = FALSE
|
||||||
# )
|
# )
|
||||||
|
|
||||||
|
context("Linkage")
|
||||||
|
|
||||||
|
test_that("Linkages are properly calculated", {
|
||||||
|
Y <- c(0.5, 0.3, 0.6, 0.3, 0.3, 0.2, 0.3, 0.3, 0.3, 0.5)
|
||||||
|
expect_equal(
|
||||||
|
object = linkage(Y),
|
||||||
|
expected = matrix(c(2, 1, 7, 8, 4, 3, 5, 6, .2, .3, .3, .6), ncol=3)
|
||||||
|
)
|
||||||
|
})
|
||||||
Loading…
Add table
Reference in a new issue