Merge branch 'translate-greedyMix' into dev (issue #1)

This commit is contained in:
Waldir Leoncio 2020-11-19 13:33:57 +01:00
commit 4aba2528da
84 changed files with 1927 additions and 794 deletions

View file

@ -1,7 +1,7 @@
LICENSE
TODO.md
matlab
^matlab
CHANGELOG.md
CITATION.cff
.travis.yml
data/ExamplesDataFormatting
inst/ext/ExamplesDataFormatting

View file

@ -1,7 +1,7 @@
Package: rBAPS
Title: Bayesian Analysis of Population Structure
Version: 0.0.0.9000
Date: 2020-01-14
Version: 0.0.0.9001
Date: 2020-11-09
Authors@R:
c(
person(
@ -30,14 +30,14 @@ Description: Partial R implementation of the BAPS software
Corander et al. 2008b <doi:10.1007/s00180-007-0072-x>;
Tang et al. 2009 <doi:10.1371/journal.pcbi.1000455>;
Cheng et al. 2011 <doi:10.1186/1471-2105-12-302>,
available at <http://www.helsinki.fi/bsg/software/BAPS/Z>), provides a
computationally-efficient method for the identification of admixture events
available at <http://www.helsinki.fi/bsg/software/BAPS/Z>), provides a
computationally-efficient method for the identification of admixture events
in genetic population history.
License: GPL-3
Encoding: UTF-8
LazyData: true
RoxygenNote: 7.1.1
Suggests:
Suggests:
testthat (>= 2.1.0)
Imports:
methods

View file

@ -10,6 +10,8 @@ export(computeIndLogml)
export(computePersonalAllFreqs)
export(computeRows)
export(etsiParas)
export(fgetl)
export(fopen)
export(greedyMix)
export(handleData)
export(initPopNames)
@ -21,6 +23,7 @@ export(linkage)
export(logml2String)
export(lueGenePopData)
export(lueNimi)
export(matlab2r)
export(noIndex)
export(ownNum2Str)
export(poistaLiianPienet)
@ -47,3 +50,4 @@ export(writeMixtureInfo)
importFrom(methods,is)
importFrom(stats,runif)
importFrom(utils,read.delim)
importFrom(utils,write.table)

View file

@ -12,22 +12,25 @@ addAlleles <- function(data, ind, line, divider) {
# line. Jos data on 3 digit formaatissa on divider=1000.
# Jos data on 2 digit formaatissa on divider=100.
nloci <- size(data, 2) - 1
nloci <- size(data, 2) # added 1 from original code
if (size(data, 1) < (2 * ind)) {
data <- c(data, zeros(100, nloci + 1))
data <- rbind(data, zeros(100, nloci)) # subtracted 1 from original code
}
k <- 1
merkki <- line[k]
merkki <- substring(line, k, k)
while (merkki != ',') {
k <- k + 1
merkki <- line[k]
k <- k + 1
merkki <- substring(line, k, k)
}
line <- line[k + 1:length(line)]
line <- substring(line, k + 1)
# clear k; clear merkki;
alleeliTaulu <- as.numeric(strsplit(line, split = " ")[[1]])
if (grepl(" ", line)) {
alleeliTaulu <- as.numeric(strsplit(line, split = " ")[[1]])
} else if (grepl("\t", line)) {
alleeliTaulu <- as.numeric(strsplit(line, split = "\t")[[1]])
}
if (length(alleeliTaulu) != nloci) {
stop('Incorrect data format.')
@ -35,9 +38,9 @@ addAlleles <- function(data, ind, line, divider) {
for (j in seq_len(nloci)) {
ekaAlleeli <- floor(alleeliTaulu[j] / divider)
if (ekaAlleeli == 0) ekaAlleeli <- -999
if (is.na(ekaAlleeli) | ekaAlleeli == 0) ekaAlleeli <- -999
tokaAlleeli <- alleeliTaulu[j] %% divider
if (tokaAlleeli == 0) tokaAlleeli <- -999
if (is.na(tokaAlleeli) | tokaAlleeli == 0) tokaAlleeli <- -999
data[2 * ind - 1, j] <- ekaAlleeli
data[2 * ind, j] <- tokaAlleeli

18
R/addToSummary.R Normal file
View file

@ -0,0 +1,18 @@
addToSummary <- function(logml, partitionSummary, worstIndex) {
# Tiedet<65><74>n, ett<74> annettu logml on isompi kuin huonoin arvo
# partitionSummary taulukossa. Jos partitionSummary:ss<73> ei viel<65> ole
# annettua logml arvoa, niin lis<69>t<EFBFBD><74>n worstIndex:in kohtaan uusi logml ja
# nykyist<73> partitiota vastaava nclusters:in arvo. Muutoin ei tehd<68> mit<69><74>n.
apu <- find(abs(partitionSummary[, 2] - logml) < 1e-5)
if (isempty(apu)) {
# Nyt l<>ydetty partitio ei ole viel<65> kirjattuna summaryyn.
npops <- length(unique(PARTITION))
partitionSummary[worstIndex, 1] <- npops
partitionSummary[worstIndex, 2] <- logml
added <- 1
} else {
added <- 0
}
return(list(partitionSummary = partitionSummary, added = added))
}

View file

@ -0,0 +1,20 @@
#' @title Seuraavat kolme funktiota liittyvat alkupartition muodostamiseen.
#' @param data_matrix data_matrix
#' @param nclusters ncluster
#' @param Z Z
admixture_initialization <- function (data_matrix, nclusters, Z) {
size_data <- size(data_matrix)
nloci <- size_data[2] - 1
n <- max(data_matrix[, ncol(data_matrix)])
T <- cluster_own(Z, nclusters)
initial_partition <- zeros(size_data[1], 1)
for (i in 1:n) {
kori <- T[i]
here <- find(data_matrix[, ncol(data_matrix)] == i)
for (j in 1:length(here)) {
initial_partition[here[j], 1] <- kori
}
}
return(initial_partition)
}

14
R/arvoSeuraavaTi.R Normal file
View file

@ -0,0 +1,14 @@
arvoSeuraavaTila <- function(muutokset, logml) {
# Suorittaa yksil<69>n seuraavan tilan arvonnan
y <- logml + muutokset # siirron j<>lkeiset logml:t
y <- y - max(y)
y <- exp(y)
summa <- sum(y)
y <- y / summa
y <- cumsum(y)
i2 <- rand_disc(y) # uusi kori
suurin <- muutokset(i2)
return(list(suurin = suurin, i2 = i2))
}

View file

@ -6,10 +6,10 @@
#' @return An array of zeroes with the dimensions passed on call
cell <- function(n, sz = c(n, n), ...) {
if (length(sz) == 1 & missing(...)) {
return(array(dim = c(n, sz)))
return(array(0, dim = c(n, sz)))
} else if (length(sz) == 2) {
return(array(dim = sz))
return(array(0, dim = sz))
} else {
return(array(dim = c(n, sz, ...)))
return(array(0, dim = c(n, sz, ...)))
}
}

View file

@ -1,7 +1,7 @@
clearGlobalVars <- function() {
COUNTS <- SUMCOUNTS <- PARTITION <- POP_LOGML <- vector() # placeholders
COUNTS <<- vector()
SUMCOUNTS <<- vector()
PARTITION <<- vector()
POP_LOGML <<- vector()
COUNTS <- vector()
SUMCOUNTS <- vector()
PARTITION <- vector()
POP_LOGML <- vector()
LOGDIFF <- vector()
}

51
R/cluster_own.R Normal file
View file

@ -0,0 +1,51 @@
cluster_own <- function(Z, nclust) {
true <- TRUE
false <- FALSE
maxclust <- nclust
# % Start of algorithm
m <- size(Z, 1) + 1
T <- zeros(m, 1)
# % maximum number of clusters based on inconsistency
if (m <= maxclust) {
T = t((1:m))
} else if (maxclust == 1) {
T <- ones(m, 1)
} else {
clsnum <- 1
for (k in (m - maxclust + 1):(m - 1)) {
i = Z(k, 1) # left tree
if (i <= m) { # original node, no leafs
T[i] = clsnum
clsnum = clsnum + 1
} else if (i < (2 * m - maxclust + 1)) { # created before cutoff, search down the tree
T <- clusternum(Z, T, i - m, clsnum)
clsnum <- clsnum + 1
}
i <- Z(k, 2) # right tree
if (i <= m) { # original node, no leafs
T[i] <- clsnum
clsnum <- clsnum + 1
} else if (i < (2 * m - maxclust + 1)) { # created before cutoff, search down the tree
T <- clusternum(Z, T, i - m, clsnum)
clsnum <- clsnum + 1
}
}
}
return(T)
}
clusternum <- function(X, T, k, c) {
m <- size(X, 1) + 1
while (!isempty(k)) {
# Get the children of nodes at this level
children <- X[k, 1:2]
children <- children
# Assign this node number to leaf children
t <- (children <= m)
T[children[t]] <- c
# Move to next level
k <- children(!t) - m
}
return(T)
}

16
R/clusternum.R Normal file
View file

@ -0,0 +1,16 @@
clusternum <- function(X, T, k, c) {
m <- size(X, 1) + 1
while (!is.null(k)) {
# % Get the children of nodes at this level
children <- X[k, 1:2]
children <- children[, ]
# % Assign this node number to leaf children
t <- (children <= m)
T[children(t)] <- c
# % Move to next level
k <- children(!t) - m
}
return(T)
}

17
R/computeDiffInCounts.R Normal file
View file

@ -0,0 +1,17 @@
computeDiffInCounts <- function(rows, max_noalle, nloci, data) {
# % Muodostaa max_noalle*nloci taulukon, jossa on niiden alleelien
# % lukum<75><6D>r<EFBFBD>t (vastaavasti kuin COUNTS:issa), jotka ovat data:n
# % riveill<6C> rows. rows pit<69><74> olla vaakavektori.
diffInCounts <- zeros(max_noalle, nloci)
for (i in rows) {
row <- data[i, ]
notEmpty <- find(row>=0)
if (length(notEmpty) > 0) {
diffInCounts[row(notEmpty) + (notEmpty - 1) * max_noalle] <-
diffInCounts[row(notEmpty) + (notEmpty - 1) * max_noalle] + 1
}
}
return(diffInCounts)
}

27
R/computeLogml.R Normal file
View file

@ -0,0 +1,27 @@
computeLogml <- function(counts, sumcounts, noalle, data, rowsFromInd) {
nloci <- size(counts, 2)
npops <- size(counts, 3)
adjnoalle <- zeros(max(noalle), nloci)
for (j in 1:nloci) {
adjnoalle[1:noalle[j], j] <- noalle(j)
if ((noalle(j)<max(noalle))) {
adjnoalle[noalle[j] + 1:ncol(adjnoalle), j] <- 1
}
}
rowsInG <- size(data, 1) + rowsFromInd
logml <- sum(
sum(
sum(
GAMMA_LN[
counts + 1 +
repmat(rowsInG * (adjnoalle - 1), c(1, 1, npops))
]
)
)
) -
npops * sum(sum(GAMMA_LN[1, adjnoalle])) -
sum(sum(GAMMA_LN[sumcounts + 1, 1]))
return(logml)
}

View file

@ -0,0 +1,24 @@
computePopulationLogml <- function(pops, adjprior, priorTerm) {
# Palauttaa length(pops)*1 taulukon, jossa on laskettu korikohtaiset
x <- size(COUNTS, 1)
y <- size(COUNTS, 2)
z <- length(pops)
popLogml <- squeeze(
sum(
sum(
reshape(
lgamma(
repmat(adjprior, c(1, 1, length(pops))) +
COUNTS[, , pops]
),
c(x, y, z)
),
1
),
2
)
) - sum(lgamma(1 + SUMCOUNTS[pops, ]), 2) - priorTerm
return(popLogml)
}

27
R/fgetl-fopen.R Normal file
View file

@ -0,0 +1,27 @@
#' @title Read line from file, removing newline characters
#' @description Equivalent function to its homonymous Matlab equivalent.
#' @param file character vector to be read, usually an output of `fopen()`
#' @return If the file is nonempty, then fgetl returns tline as a character vector. If the file is empty and contains only the end-of-file marker, then fgetl returns tline as a numeric value -1.
#' @author Waldir Leoncio
#' @seealso fopen
#' @export
fgetl <- function(file) {
# ==========================================================================
# Validation
# ==========================================================================
if (length(file) <= 1) return(-1)
# ==========================================================================
# Returning file minus the first line
# ==========================================================================
out <- file[-1]
return(out)
}
#' @title Open file
#' @description Open a text file
#' @param filename Path and name of file to be open
#' @return The same as `readLines(filename)`
#' @author Waldir Leoncio
#' @seealso fgetl
#' @export
fopen <- function(filename) readLines(filename)

12
R/findEmptyPop.R Normal file
View file

@ -0,0 +1,12 @@
findEmptyPop <- function(npops) {
# % Palauttaa ensimm<6D>isen tyhj<68>n populaation indeksin. Jos tyhji<6A>
# % populaatioita ei ole, palauttaa -1:n.
pops <- t(unique(PARTITION))
if (length(pops) == npops) {
emptyPop <- -1
} else {
popDiff <- diff(c(0, pops, npops + 1))
emptyPop <- min(find(popDiff > 1))
}
return(list(emptyPop = emptyPop, pops = pops))
}

43
R/getDistances.R Normal file
View file

@ -0,0 +1,43 @@
getDistances <- function(data_matrix, nclusters) {
# %finds initial admixture clustering solution with nclusters clusters, uses simple mean Hamming distance
# %gives partition in 8 - bit format
# %allocates all alleles of a single individual into the same basket
# %data_matrix contains #Loci + 1 columns, last column indicate whose alleles are placed in each row,
# %i.e. ranges from 1 to #individuals. For diploids there are 2 rows per individual, for haploids only a single row
# %missing values are indicated by zeros in the partition and by negative integers in the data_matrix.
size_data <- size(data_matrix)
nloci <- size_data[2] - 1
n <- max(data_matrix[, ncol(data_matrix)])
distances <- zeros(choose(n, 2), 1)
pointer <- 1
for (i in 1:n - 1) {
i_data <- data_matrix[
find(data_matrix[, ncol(data_matrix)] == i),
1:nloci
]
for (j in (i + 1):n) {
d_ij <- 0
j_data <- data_matrix[find(data_matrix[, ncol()] == j), 1:nloci]
vertailuja <- 0
for (k in 1:size(i_data, 1)) {
for (l in 1:size(j_data, 1)) {
here_i <- find(i_data[k, ] >= 0)
here_j <- find(j_data[l, ] >= 0)
here_joint <- intersect(here_i, here_j)
vertailuja <- vertailuja + length(here_joint)
d_ij <- d_ij + length(
find(i_data[k, here_joint] != j_data[l, here_joint])
)
}
}
d_ij <- d_ij / vertailuja
distances[pointer] <- d_ij
pointer <- pointer + 1
}
}
Z <- linkage(t(distances))
return(list(Z = Z, distances = distances))
}

3
R/globals.R Normal file
View file

@ -0,0 +1,3 @@
utils::globalVariables(
c("PARTITION", "COUNTS", "SUMCOUNTS", "LOGDIFF", "POP_LOGML", "GAMMA_LN")
)

View file

@ -11,10 +11,11 @@ greedyMix <- function(
savePreProcessed = NULL,
filePreProcessed = NULL
) {
# ASK: graphical components. Remove?
# ASK: Unclear when fixedk == TRUE. Remove?
# check whether fixed k mode is selected
# h0 <- findobj('Tag','fixk_menu')
# fixedK = get(h0, 'userdata');
fixedK <- FALSE
# if fixedK
# if ~(fixKWarning == 1) % call function fixKWarning
@ -22,9 +23,11 @@ greedyMix <- function(
# end
# end
# ASK: ditto
# % check whether partition compare mode is selected
# h1 = findobj('Tag','partitioncompare_menu');
# partitionCompare = get(h1, 'userdata');
partitionCompare <- FALSE
if (is(tietue, "list") | is(tietue, "character")) {
# ----------------------------------------------------------------------
@ -149,13 +152,25 @@ greedyMix <- function(
kunnossa <- testaaGenePopData(filename_pathname)
if (kunnossa == 0) stop("testaaGenePopData returned 0")
# [data,popnames]=lueGenePopData([pathname filename]); # TODO: trans
data_popnames <- lueGenePopData(filename_pathname)
data <- data_popnames$data
popnames <- data_popnames$popnames
# h0 = findobj('Tag','filename1_text');
# set(h0,'String',filename); clear h0;
# [data, rowsFromInd, alleleCodes, noalle, adjprior, priorTerm] = handleData(data); # TODO:trans
# [Z,dist] = newGetDistances(data,rowsFromInd); # TODO: trans
list_dranap <- handleData(data)
data <- list_dranap$newData
rowsFromInd <- list_dranap$rowsFromInd
alleleCodes <- list_dranap$alleleCodes
noalle <- list_dranap$noalle
adjprior <- list_dranap$adjprior
priorTerm <- list_dranap$prioterm
list_Zd <- newGetDistances(data,rowsFromInd) # FIXME: debug
Z <- list_Zd$Z
dist <- list_Zd$dist
if (is.null(savePreProcessed)) {
save_preproc <- questdlg(
quest = 'Do you wish to save pre-processed data?',
@ -232,15 +247,17 @@ greedyMix <- function(
}
# ==========================================================================
# Declaring global variables
# Declaring global variables and changing environment of children functions
# ==========================================================================
PARTITION <- vector()
COUNTS <- vector()
SUMCOUNTS <- vector()
POP_LOGML <- vector()
clearGlobalVars <- vector()
# ==========================================================================
clearGlobalVars()
environment(writeMixtureInfo) <- environment()
# ==========================================================================
c <- list()
c$data <- data
c$noalle <- noalle
c$adjprior <- adjprior
@ -253,43 +270,49 @@ greedyMix <- function(
ekat <- t(seq(1, ninds, rowsFromInd) * rowsFromInd)
c$rows <- c(ekat, ekat + rowsFromInd - 1)
# ASK remove?
# partition compare
if (!is.null(partitionCompare)) {
nsamplingunits <- size(c$rows, 1)
partitions <- partitionCompare$partitions
npartitions <- size(partitions, 2)
partitionLogml <- zeros(1, npartitions)
for (i in seq_len(npartitions)) {
# number of unique partition lables
npops <- length(unique(partitions[, i]))
# if (!is.null(partitionCompare)) {
# nsamplingunits <- size(c$rows, 1)
# partitions <- partitionCompare$partitions
# npartitions <- size(partitions, 2)
# partitionLogml <- zeros(1, npartitions)
# for (i in seq_len(npartitions)) {
# # number of unique partition lables
# npops <- length(unique(partitions[, i]))
partitionInd <- zeros(ninds * rowsFromInd, 1)
partitionSample <- partitions[, i]
for (j in seq_len(nsamplingunits)) {
partitionInd[c$rows[j, 1]:c$rows[j, 2]] <- partitionSample[j]
}
# partitionLogml[i] = initialCounts(
# partitionInd,
# data[, seq_len(end - 1)],
# npops,
# c$rows,
# noalle,
# adjprior
# ) #TODO translate
}
# return the logml result
partitionCompare$logmls <- partitionLogml
# set(h1, 'userdata', partitionCompare) # ASK remove?
return()
}
# ASK remove (graphical part)?
# if (fixedK) {
# #logml_npops_partitionSummary <- indMix_fixK(c) # ASK translate?
# } else {
# #logml_npops_partitionSummary <- indMix(c) # ASK translate?
# partitionInd <- zeros(ninds * rowsFromInd, 1)
# partitionSample <- partitions[, i]
# for (j in seq_len(nsamplingunits)) {
# partitionInd[c$rows[j, 1]:c$rows[j, 2]] <- partitionSample[j]
# }
# # partitionLogml[i] = initialCounts(
# # partitionInd,
# # data[, seq_len(end - 1)],
# # npops,
# # c$rows,
# # noalle,
# # adjprior
# # ) #TODO translate
# }
# # return the logml result
# partitionCompare$logmls <- partitionLogml
# # set(h1, 'userdata', partitionCompare)
# return()
# }
# if (logml_npops_partitionSummary$logml == 1) return()
if (fixedK) {
# logml_npops_partitionSummary <- indMix_fixK(c) # TODO: translate
# logml <- logml_npops_partitionSummary$logml
# npops <- logml_npops_partitionSummary$npops
# partitionSummary <- logml_npops_partitionSummary$partitionSummary
} else {
logml_npops_partitionSummary <- indMix(c) # TODO: translate
logml <- logml_npops_partitionSummary$logml
npops <- logml_npops_partitionSummary$npops
partitionSummary <- logml_npops_partitionSummary$partitionSummary
}
if (logml_npops_partitionSummary$logml == 1) return()
data <- data[, seq_len(ncol(data) - 1)]
@ -298,11 +321,13 @@ greedyMix <- function(
# inp = get(h0,'String');
# h0 = findobj('Tag','filename2_text')
# outp = get(h0,'String');
inp <- vector()
outp <- vector()
changesInLogml <- writeMixtureInfo(
logml, rowsFromInd, data, adjprior, priorTerm, outp, inp,
popnames, fixedK
) # FIXMEL depends on get function above
) # FIXME: broken
# viewMixPartition(PARTITION, popnames) # ASK translate? On graph folder
@ -356,583 +381,3 @@ greedyMix <- function(
if (file.exists('baps4_output.baps')) file.remove('baps4_output.baps')
}
}
# %-------------------------------------------------------------------------------------
# function [partitionSummary, added] = addToSummary(logml, partitionSummary, worstIndex)
# % Tiedet<65><74>n, ett?annettu logml on isompi kuin huonoin arvo
# % partitionSummary taulukossa. Jos partitionSummary:ss?ei viel?ole
# % annettua logml arvoa, niin lis<69>t<EFBFBD><74>n worstIndex:in kohtaan uusi logml ja
# % nykyist?partitiota vastaava nclusters:in arvo. Muutoin ei tehd?mit<69><74>n.
# apu = find(abs(partitionSummary(:,2)-logml)<1e-5);
# if isempty(apu)
# % Nyt l<>ydetty partitio ei ole viel?kirjattuna summaryyn.
# global PARTITION;
# npops = length(unique(PARTITION));
# partitionSummary(worstIndex,1) = npops;
# partitionSummary(worstIndex,2) = logml;
# added = 1;
# else
# added = 0;
# end
# %--------------------------------------------------------------------------
# function [suurin, i2] = arvoSeuraavaTila(muutokset, logml)
# % Suorittaa yksil<69>n seuraavan tilan arvonnan
# y = logml + muutokset; % siirron j<>lkeiset logml:t
# y = y - max(y);
# y = exp(y);
# summa = sum(y);
# y = y/summa;
# y = cumsum(y);
# i2 = rand_disc(y); % uusi kori
# suurin = muutokset(i2);
# %--------------------------------------------------------------------------------------
# function svar=rand_disc(CDF)
# %returns an index of a value from a discrete distribution using inversion method
# slump=rand;
# har=find(CDF>slump);
# svar=har(1);
# %-------------------------------------------------------------------------------------
# function updateGlobalVariables(ind, i2, rowsFromInd, diffInCounts, ...
# adjprior, priorTerm)
# % Suorittaa globaalien muuttujien muutokset, kun yksil?ind
# % on siirret<65><74>n koriin i2.
# global PARTITION;
# global COUNTS;
# global SUMCOUNTS;
# global POP_LOGML;
# i1 = PARTITION(ind);
# PARTITION(ind)=i2;
# COUNTS(:,:,i1) = COUNTS(:,:,i1) - diffInCounts;
# COUNTS(:,:,i2) = COUNTS(:,:,i2) + diffInCounts;
# SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:) - sum(diffInCounts);
# SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:) + sum(diffInCounts);
# POP_LOGML([i1 i2]) = computePopulationLogml([i1 i2], adjprior, priorTerm);
# %---------------------------------------------------------------------------------
# function updateGlobalVariables2( ...
# i1, i2, rowsFromInd, diffInCounts, adjprior, priorTerm);
# % Suorittaa globaalien muuttujien muutokset, kun kaikki
# % korissa i1 olevat yksil<69>t siirret<65><74>n koriin i2.
# global PARTITION;
# global COUNTS;
# global SUMCOUNTS;
# global POP_LOGML;
# inds = find(PARTITION==i1);
# PARTITION(inds) = i2;
# COUNTS(:,:,i1) = COUNTS(:,:,i1) - diffInCounts;
# COUNTS(:,:,i2) = COUNTS(:,:,i2) + diffInCounts;
# SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:) - sum(diffInCounts);
# SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:) + sum(diffInCounts);
# POP_LOGML(i1) = 0;
# POP_LOGML(i2) = computePopulationLogml(i2, adjprior, priorTerm);
# %------------------------------------------------------------------------------------
# function updateGlobalVariables3(muuttuvat, rowsFromInd, diffInCounts, ...
# adjprior, priorTerm, i2);
# % Suorittaa globaalien muuttujien p<>ivitykset, kun yksil<69>t 'muuttuvat'
# % siirret<65><74>n koriin i2. Ennen siirtoa yksil<69>iden on kuuluttava samaan
# % koriin.
# global PARTITION;
# global COUNTS;
# global SUMCOUNTS;
# global POP_LOGML;
# i1 = PARTITION(muuttuvat(1));
# PARTITION(muuttuvat) = i2;
# COUNTS(:,:,i1) = COUNTS(:,:,i1) - diffInCounts;
# COUNTS(:,:,i2) = COUNTS(:,:,i2) + diffInCounts;
# SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:) - sum(diffInCounts);
# SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:) + sum(diffInCounts);
# POP_LOGML([i1 i2]) = computePopulationLogml([i1 i2], adjprior, priorTerm);
# %----------------------------------------------------------------------
# function inds = returnInOrder(inds, pop, rowsFromInd, data, adjprior, priorTerm)
# % Palauttaa yksil<69>t j<>rjestyksess?siten, ett?ensimm<6D>isen?on
# % se, jonka poistaminen populaatiosta pop nostaisi logml:n
# % arvoa eniten.
# global COUNTS; global SUMCOUNTS;
# ninds = length(inds);
# apuTaulu = [inds, zeros(ninds,1)];
# for i=1:ninds
# ind = inds(i);
# rows = (ind-1)*rowsFromInd+1 : ind*rowsFromInd;
# diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data);
# diffInSumCounts = sum(diffInCounts);
# COUNTS(:,:,pop) = COUNTS(:,:,pop)-diffInCounts;
# SUMCOUNTS(pop,:) = SUMCOUNTS(pop,:)-diffInSumCounts;
# apuTaulu(i, 2) = computePopulationLogml(pop, adjprior, priorTerm);
# COUNTS(:,:,pop) = COUNTS(:,:,pop)+diffInCounts;
# SUMCOUNTS(pop,:) = SUMCOUNTS(pop,:)+diffInSumCounts;
# end
# apuTaulu = sortrows(apuTaulu,2);
# inds = apuTaulu(ninds:-1:1,1);
# %------------------------------------------------------------------------------------
# function [muutokset, diffInCounts] = ...
# laskeMuutokset(ind, rowsFromInd, data, adjprior, priorTerm)
# % Palauttaa npops*1 taulun, jossa i:s alkio kertoo, mik?olisi
# % muutos logml:ss? mik<69>li yksil?ind siirret<65><74>n koriin i.
# % diffInCounts on poistettava COUNTS:in siivusta i1 ja lis<69>tt<74>v?
# % COUNTS:in siivuun i2, mik<69>li muutos toteutetaan.
# global COUNTS; global SUMCOUNTS;
# global PARTITION; global POP_LOGML;
# npops = size(COUNTS,3);
# muutokset = zeros(npops,1);
# i1 = PARTITION(ind);
# i1_logml = POP_LOGML(i1);
# rows = (ind-1)*rowsFromInd+1 : ind*rowsFromInd;
# diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data);
# diffInSumCounts = sum(diffInCounts);
# COUNTS(:,:,i1) = COUNTS(:,:,i1)-diffInCounts;
# SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)-diffInSumCounts;
# new_i1_logml = computePopulationLogml(i1, adjprior, priorTerm);
# COUNTS(:,:,i1) = COUNTS(:,:,i1)+diffInCounts;
# SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)+diffInSumCounts;
# i2 = [1:i1-1 , i1+1:npops];
# i2_logml = POP_LOGML(i2);
# COUNTS(:,:,i2) = COUNTS(:,:,i2)+repmat(diffInCounts, [1 1 npops-1]);
# SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)+repmat(diffInSumCounts,[npops-1 1]);
# new_i2_logml = computePopulationLogml(i2, adjprior, priorTerm);
# COUNTS(:,:,i2) = COUNTS(:,:,i2)-repmat(diffInCounts, [1 1 npops-1]);
# SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)-repmat(diffInSumCounts,[npops-1 1]);
# muutokset(i2) = new_i1_logml - i1_logml ...
# + new_i2_logml - i2_logml;
# %------------------------------------------------------------------------------------
# function [muutokset, diffInCounts] = laskeMuutokset2( ...
# i1, rowsFromInd, data, adjprior, priorTerm);
# % Palauttaa npops*1 taulun, jossa i:s alkio kertoo, mik?olisi
# % muutos logml:ss? mik<69>li korin i1 kaikki yksil<69>t siirret<65><74>n
# % koriin i.
# global COUNTS; global SUMCOUNTS;
# global PARTITION; global POP_LOGML;
# npops = size(COUNTS,3);
# muutokset = zeros(npops,1);
# i1_logml = POP_LOGML(i1);
# inds = find(PARTITION==i1);
# ninds = length(inds);
# if ninds==0
# diffInCounts = zeros(size(COUNTS,1), size(COUNTS,2));
# return;
# end
# rows = computeRows(rowsFromInd, inds, ninds);
# diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data);
# diffInSumCounts = sum(diffInCounts);
# COUNTS(:,:,i1) = COUNTS(:,:,i1)-diffInCounts;
# SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)-diffInSumCounts;
# new_i1_logml = computePopulationLogml(i1, adjprior, priorTerm);
# COUNTS(:,:,i1) = COUNTS(:,:,i1)+diffInCounts;
# SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)+diffInSumCounts;
# i2 = [1:i1-1 , i1+1:npops];
# i2_logml = POP_LOGML(i2);
# COUNTS(:,:,i2) = COUNTS(:,:,i2)+repmat(diffInCounts, [1 1 npops-1]);
# SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)+repmat(diffInSumCounts,[npops-1 1]);
# new_i2_logml = computePopulationLogml(i2, adjprior, priorTerm);
# COUNTS(:,:,i2) = COUNTS(:,:,i2)-repmat(diffInCounts, [1 1 npops-1]);
# SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)-repmat(diffInSumCounts,[npops-1 1]);
# muutokset(i2) = new_i1_logml - i1_logml ...
# + new_i2_logml - i2_logml;
# %------------------------------------------------------------------------------------
# function muutokset = laskeMuutokset3(T2, inds2, rowsFromInd, ...
# data, adjprior, priorTerm, i1)
# % Palauttaa length(unique(T2))*npops taulun, jossa (i,j):s alkio
# % kertoo, mik?olisi muutos logml:ss? jos populaation i1 osapopulaatio
# % inds2(find(T2==i)) siirret<65><74>n koriin j.
# global COUNTS; global SUMCOUNTS;
# global PARTITION; global POP_LOGML;
# npops = size(COUNTS,3);
# npops2 = length(unique(T2));
# muutokset = zeros(npops2, npops);
# i1_logml = POP_LOGML(i1);
# for pop2 = 1:npops2
# inds = inds2(find(T2==pop2));
# ninds = length(inds);
# if ninds>0
# rows = computeRows(rowsFromInd, inds, ninds);
# diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data);
# diffInSumCounts = sum(diffInCounts);
# COUNTS(:,:,i1) = COUNTS(:,:,i1)-diffInCounts;
# SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)-diffInSumCounts;
# new_i1_logml = computePopulationLogml(i1, adjprior, priorTerm);
# COUNTS(:,:,i1) = COUNTS(:,:,i1)+diffInCounts;
# SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)+diffInSumCounts;
# i2 = [1:i1-1 , i1+1:npops];
# i2_logml = POP_LOGML(i2)';
# COUNTS(:,:,i2) = COUNTS(:,:,i2)+repmat(diffInCounts, [1 1 npops-1]);
# SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)+repmat(diffInSumCounts,[npops-1 1]);
# new_i2_logml = computePopulationLogml(i2, adjprior, priorTerm)';
# COUNTS(:,:,i2) = COUNTS(:,:,i2)-repmat(diffInCounts, [1 1 npops-1]);
# SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)-repmat(diffInSumCounts,[npops-1 1]);
# muutokset(pop2,i2) = new_i1_logml - i1_logml ...
# + new_i2_logml - i2_logml;
# end
# end
# %------------------------------------------------------------------------------------
# function muutokset = laskeMuutokset5(inds, rowsFromInd, data, adjprior, ...
# priorTerm, i1, i2)
# % Palauttaa length(inds)*1 taulun, jossa i:s alkio kertoo, mik?olisi
# % muutos logml:ss? mik<69>li yksil?i vaihtaisi koria i1:n ja i2:n v<>lill?
# global COUNTS; global SUMCOUNTS;
# global PARTITION; global POP_LOGML;
# ninds = length(inds);
# muutokset = zeros(ninds,1);
# i1_logml = POP_LOGML(i1);
# i2_logml = POP_LOGML(i2);
# for i = 1:ninds
# ind = inds(i);
# if PARTITION(ind)==i1
# pop1 = i1; %mist?
# pop2 = i2; %mihin
# else
# pop1 = i2;
# pop2 = i1;
# end
# rows = (ind-1)*rowsFromInd+1 : ind*rowsFromInd;
# diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data);
# diffInSumCounts = sum(diffInCounts);
# COUNTS(:,:,pop1) = COUNTS(:,:,pop1)-diffInCounts;
# SUMCOUNTS(pop1,:) = SUMCOUNTS(pop1,:)-diffInSumCounts;
# COUNTS(:,:,pop2) = COUNTS(:,:,pop2)+diffInCounts;
# SUMCOUNTS(pop2,:) = SUMCOUNTS(pop2,:)+diffInSumCounts;
# PARTITION(ind) = pop2;
# new_logmls = computePopulationLogml([i1 i2], adjprior, priorTerm);
# muutokset(i) = sum(new_logmls);
# COUNTS(:,:,pop1) = COUNTS(:,:,pop1)+diffInCounts;
# SUMCOUNTS(pop1,:) = SUMCOUNTS(pop1,:)+diffInSumCounts;
# COUNTS(:,:,pop2) = COUNTS(:,:,pop2)-diffInCounts;
# SUMCOUNTS(pop2,:) = SUMCOUNTS(pop2,:)-diffInSumCounts;
# PARTITION(ind) = pop1;
# end
# muutokset = muutokset - i1_logml - i2_logml;
# %--------------------------------------------------------------------------
# function diffInCounts = computeDiffInCounts(rows, max_noalle, nloci, data)
# % Muodostaa max_noalle*nloci taulukon, jossa on niiden alleelien
# % lukum<75><6D>r<EFBFBD>t (vastaavasti kuin COUNTS:issa), jotka ovat data:n
# % riveill?rows.
# diffInCounts = zeros(max_noalle, nloci);
# for i=rows
# row = data(i,:);
# notEmpty = find(row>=0);
# if length(notEmpty)>0
# diffInCounts(row(notEmpty) + (notEmpty-1)*max_noalle) = ...
# diffInCounts(row(notEmpty) + (notEmpty-1)*max_noalle) + 1;
# end
# end
# %------------------------------------------------------------------------------------
# function popLogml = computePopulationLogml(pops, adjprior, priorTerm)
# % Palauttaa length(pops)*1 taulukon, jossa on laskettu korikohtaiset
# % logml:t koreille, jotka on m<><6D>ritelty pops-muuttujalla.
# global COUNTS;
# global SUMCOUNTS;
# x = size(COUNTS,1);
# y = size(COUNTS,2);
# z = length(pops);
# popLogml = ...
# squeeze(sum(sum(reshape(...
# gammaln(repmat(adjprior,[1 1 length(pops)]) + COUNTS(:,:,pops)) ...
# ,[x y z]),1),2)) - sum(gammaln(1+SUMCOUNTS(pops,:)),2) - priorTerm;
# %-----------------------------------------------------------------------------------
# function npops = poistaTyhjatPopulaatiot(npops)
# % Poistaa tyhjentyneet populaatiot COUNTS:ista ja
# % SUMCOUNTS:ista. P<>ivitt<74><74> npops:in ja PARTITION:in.
# global COUNTS;
# global SUMCOUNTS;
# global PARTITION;
# notEmpty = find(any(SUMCOUNTS,2));
# COUNTS = COUNTS(:,:,notEmpty);
# SUMCOUNTS = SUMCOUNTS(notEmpty,:);
# for n=1:length(notEmpty)
# apu = find(PARTITION==notEmpty(n));
# PARTITION(apu)=n;
# end
# npops = length(notEmpty);
# %----------------------------------------------------------------------------------
# %Seuraavat kolme funktiota liittyvat alkupartition muodostamiseen.
# function initial_partition=admixture_initialization(data_matrix,nclusters,Z)
# size_data=size(data_matrix);
# nloci=size_data(2)-1;
# n=max(data_matrix(:,end));
# T=cluster_own(Z,nclusters);
# initial_partition=zeros(size_data(1),1);
# for i=1:n
# kori=T(i);
# here=find(data_matrix(:,end)==i);
# for j=1:length(here)
# initial_partition(here(j),1)=kori;
# end
# end
# function T = cluster_own(Z,nclust)
# true=logical(1);
# false=logical(0);
# maxclust = nclust;
# % Start of algorithm
# m = size(Z,1)+1;
# T = zeros(m,1);
# % maximum number of clusters based on inconsistency
# if m <= maxclust
# T = (1:m)';
# elseif maxclust==1
# T = ones(m,1);
# else
# clsnum = 1;
# for k = (m-maxclust+1):(m-1)
# i = Z(k,1); % left tree
# if i <= m % original node, no leafs
# T(i) = clsnum;
# clsnum = clsnum + 1;
# elseif i < (2*m-maxclust+1) % created before cutoff, search down the tree
# T = clusternum(Z, T, i-m, clsnum);
# clsnum = clsnum + 1;
# end
# i = Z(k,2); % right tree
# if i <= m % original node, no leafs
# T(i) = clsnum;
# clsnum = clsnum + 1;
# elseif i < (2*m-maxclust+1) % created before cutoff, search down the tree
# T = clusternum(Z, T, i-m, clsnum);
# clsnum = clsnum + 1;
# end
# end
# end
# function T = clusternum(X, T, k, c)
# m = size(X,1)+1;
# while(~isempty(k))
# % Get the children of nodes at this level
# children = X(k,1:2);
# children = children(:);
# % Assign this node number to leaf children
# t = (children<=m);
# T(children(t)) = c;
# % Move to next level
# k = children(~t) - m;
# end
# %----------------------------------------------------------------------------------------
# function [Z, distances]=getDistances(data_matrix,nclusters)
# %finds initial admixture clustering solution with nclusters clusters, uses simple mean Hamming distance
# %gives partition in 8-bit format
# %allocates all alleles of a single individual into the same basket
# %data_matrix contains #Loci+1 columns, last column indicate whose alleles are placed in each row,
# %i.e. ranges from 1 to #individuals. For diploids there are 2 rows per individual, for haploids only a single row
# %missing values are indicated by zeros in the partition and by negative integers in the data_matrix.
# size_data=size(data_matrix);
# nloci=size_data(2)-1;
# n=max(data_matrix(:,end));
# distances=zeros(nchoosek(n,2),1);
# pointer=1;
# for i=1:n-1
# i_data=data_matrix(find(data_matrix(:,end)==i),1:nloci);
# for j=i+1:n
# d_ij=0;
# j_data=data_matrix(find(data_matrix(:,end)==j),1:nloci);
# vertailuja = 0;
# for k=1:size(i_data,1)
# for l=1:size(j_data,1)
# here_i=find(i_data(k,:)>=0);
# here_j=find(j_data(l,:)>=0);
# here_joint=intersect(here_i,here_j);
# vertailuja = vertailuja + length(here_joint);
# d_ij = d_ij + length(find(i_data(k,here_joint)~=j_data(l,here_joint)));
# end
# end
# d_ij = d_ij / vertailuja;
# distances(pointer)=d_ij;
# pointer=pointer+1;
# end
# end
# Z=linkage(distances');
# %----------------------------------------------------------------------------------------
# function logml=computeLogml(counts, sumcounts, noalle, data, rowsFromInd)
# nloci = size(counts,2);
# npops = size(counts,3);
# adjnoalle = zeros(max(noalle),nloci);
# for j=1:nloci
# adjnoalle(1:noalle(j),j)=noalle(j);
# if (noalle(j)<max(noalle))
# adjnoalle(noalle(j)+1:end,j)=1;
# end
# end
# %logml2 = sum(sum(sum(gammaln(counts+repmat(adjprior,[1 1 npops]))))) ...
# % - npops*sum(sum(gammaln(adjprior))) - ...
# % sum(sum(gammaln(1+sumcounts)));
# %logml = logml2;
# global GAMMA_LN;
# rowsInG = size(data,1)+rowsFromInd;
# logml = sum(sum(sum(GAMMA_LN(counts+1 + repmat(rowsInG*(adjnoalle-1),[1 1 npops]))))) ...
# - npops*sum(sum(GAMMA_LN(1, adjnoalle))) ...
# -sum(sum(GAMMA_LN(sumcounts+1,1)));
# %--------------------------------------------------------------------------
# function initializeGammaln(ninds, rowsFromInd, maxAlleles)
# %Alustaa GAMMALN muuttujan s.e. GAMMALN(i,j)=gammaln((i-1) + 1/j)
# global GAMMA_LN;
# GAMMA_LN = zeros((1+ninds)*rowsFromInd, maxAlleles);
# for i=1:(ninds+1)*rowsFromInd
# for j=1:maxAlleles
# GAMMA_LN(i,j)=gammaln((i-1) + 1/j);
# end
# end
# %---------------------------------------------------------------
# function dist2 = laskeOsaDist(inds2, dist, ninds)
# % Muodostaa dist vektorista osavektorin, joka sis<69>lt<6C><74> yksil<69>iden inds2
# % v<>liset et<65>isyydet. ninds=kaikkien yksil<69>iden lukum<75><6D>r?
# ninds2 = length(inds2);
# apu = zeros(nchoosek(ninds2,2),2);
# rivi = 1;
# for i=1:ninds2-1
# for j=i+1:ninds2
# apu(rivi, 1) = inds2(i);
# apu(rivi, 2) = inds2(j);
# rivi = rivi+1;
# end
# end
# apu = (apu(:,1)-1).*ninds - apu(:,1) ./ 2 .* (apu(:,1)-1) + (apu(:,2)-apu(:,1));
# dist2 = dist(apu);
# %--------------------------------------------------------------------------
# function [emptyPop, pops] = findEmptyPop(npops)
# % Palauttaa ensimm<6D>isen tyhj<68>n populaation indeksin. Jos tyhji?
# % populaatioita ei ole, palauttaa -1:n.
# global PARTITION;
# pops = unique(PARTITION)';
# if (length(pops) ==npops)
# emptyPop = -1;
# else
# popDiff = diff([0 pops npops+1]);
# emptyPop = min(find(popDiff > 1));
# end

View file

@ -20,12 +20,11 @@ handleData <- function(raw_data) {
# koodi pienimm?ksi koodiksi, joka isompi kuin mik??n k?yt?ss?oleva koodi.
# T?m?n j?lkeen funktio muuttaa alleelikoodit siten, ett?yhden lokuksen j
# koodit saavat arvoja v?lill?1,...,noalle(j).
data <- raw_data
nloci <- size(raw_data, 2) - 1
dataApu <- data[, 1:nloci]
nollat <- find(dataApu==0)
nollat <- find(dataApu == 0)
if (!isempty(nollat)) {
isoinAlleeli <- max(max(dataApu))
dataApu[nollat] <- isoinAlleeli + 1
@ -39,9 +38,12 @@ handleData <- function(raw_data) {
alleelitLokuksessa <- cell(nloci, 1)
for (i in 1:nloci) {
alleelitLokuksessaI <- unique(data[, i])
alleelitLokuksessa[i, 1] <- alleelitLokuksessaI[
find(alleelitLokuksessaI >= 0)
]
alleelitLokuksessaI_pos <- find(alleelitLokuksessaI >= 0)
alleelitLokuksessa[i, 1] <- ifelse(
test = length(alleelitLokuksessaI_pos) > 0,
yes = alleelitLokuksessaI[alleelitLokuksessaI_pos],
no = 0
)
noalle[i] <- length(alleelitLokuksessa[i, 1])
}
alleleCodes <- zeros(max(noalle), nloci)
@ -65,10 +67,10 @@ handleData <- function(raw_data) {
emptyRow <- repmat(a, c(1, ncols))
lessThanMax <- find(rowsFromInd < maxRowsFromInd)
missingRows <- maxRowsFromInd * nind - nrows
data <- as.matrix(c(data, zeros(missingRows, ncols)))
data <- rbind(data, zeros(missingRows, ncols))
pointer <- 1
for (ind in t(lessThanMax)) { #K?y l?pi ne yksil?t, joilta puuttuu rivej?
miss = maxRowsFromInd-rowsFromInd(ind); # T?lt?yksil?lt?puuttuvien lkm.
miss <- maxRowsFromInd - rowsFromInd(ind) # T?lt?yksil?lt?puuttuvien lkm.
}
data <- sortrows(data, ncols) # Sorttaa yksil?iden mukaisesti
newData <- data
@ -84,12 +86,12 @@ handleData <- function(raw_data) {
priorTerm <- priorTerm + noalle[j] * lgamma(1 / noalle[j])
}
out <- list(
newData = newData,
newData = newData,
rowsFromInd = rowsFromInd,
alleleCodes = alleleCodes,
noalle = noalle,
adjprior = adjprior,
priorTerm = priorTerm
noalle = noalle,
adjprior = adjprior,
priorTerm = priorTerm
)
return(out)
}

567
R/indMix.R Normal file
View file

@ -0,0 +1,567 @@
indMix <- function(c, npops, dispText) {
# Greedy search algorithm with unknown number of classes for regular
# clustering.
# Input npops is not used if called by greedyMix or greedyPopMix.
logml <- 1
clearGlobalVars()
noalle <- c$noalle
rows <- c$rows
data <- c$data
adjprior <- c$adjprior
priorTerm <- c$priorTerm
rowsFromInd <- c$rowsFromInd
if (isfield(c, 'dist')) {
dist <- c$dist
Z <- c$Z
}
rm(c)
nargin <- length(as.list(match.call())) - 1
if (nargin < 2) {
dispText <- 1
npopstext <- matrix()
ready <- FALSE
teksti <- 'Input upper bound to the number of populations (possibly multiple values)'
while (!ready) {
npopstextExtra <- inputdlg(
teksti,
1,
'20'
)
if (isempty(npopstextExtra)) { # Painettu Cancel:ia
return()
}
npopstextExtra <- npopstextExtra[1]
if (length(npopstextExtra)>=255) {
npopstextExtra <- npopstextExtra[1:255]
npopstext <- c(npopstext, ' ', npopstextExtra)
teksti <- 'The input field length limit (255 characters) was reached. Input more values: '
} else {
npopstext <- c(npopstext, ' ', npopstextExtra)
ready <- TRUE
}
}
rm(ready, teksti)
if (isempty(npopstext) | length(npopstext) == 1) {
return()
} else {
npopsTaulu <- as.numeric(npopstext)
ykkoset <- find(npopsTaulu == 1)
npopsTaulu[ykkoset] <- list() # Mik<69>li ykk<6B>si<73> annettu yl<79>rajaksi, ne poistetaan.
if (isempty(npopsTaulu)) {
logml <- 1
partitionSummary <- 1
npops <- 1
return()
}
rm(ykkoset)
}
} else {
npopsTaulu <- npops
}
nruns <- length(npopsTaulu)
initData <- data
data <- data[,1:(ncol(data) - 1)]
logmlBest <- -1e50
partitionSummary <- -1e50 * ones(30, 2) # Tiedot 30 parhaasta partitiosta (npops ja logml)
partitionSummary[,1] <- zeros(30, 1)
worstLogml <- -1e50
worstIndex <- 1
for (run in 1:nruns) {
npops <- npopsTaulu[run]
if (dispText) {
dispLine()
print(
paste0(
'Run ', as.character(run), '/', as.character(nruns),
', maximum number of populations ', as.character(npops), '.'
)
)
}
ninds <- size(rows, 1)
initialPartition <- admixture_initialization(initData, npops, Z) # TODO: translate
sumcounts_counts_logml = initialCounts(
initialPartition, data, npops, rows, noalle, adjprior
) # TODO: translate
sumcounts <- sumcounts_counts_logml$sumcounts
counts <- sumcounts_counts_logml$counts
logml <- sumcounts_counts_logml$logml
PARTITION <- zeros(ninds, 1)
for (i in 1:ninds) {
apu <- rows[i]
PARTITION[i] <- initialPartition(apu[1])
}
COUNTS <- counts
SUMCOUNTS <- sumcounts
POP_LOGML <- computePopulationLogml(1:npops, adjprior, priorTerm) # TODO: translate
LOGDIFF <- repmat(-Inf, c(ninds, npops))
rm(initialPartition, counts, sumcounts)
# PARHAAN MIXTURE-PARTITION ETSIMINEN
nRoundTypes <- 7
kokeiltu <- zeros(nRoundTypes, 1)
roundTypes <- c(1, 1) # Ykk<6B>svaiheen sykli kahteen kertaan.
ready <- 0
vaihe <- 1
if (dispText) {
print(' ')
print(
paste0(
'Mixture analysis started with initial',
as.character(npops),
'populations.'
)
)
}
while (ready != 1) {
muutoksia <- 0
if (dispText) {
print(paste('Performing steps:', as.character(roundTypes)))
}
for (n in 1:length(roundTypes)) {
round <- roundTypes[n]
kivaluku <- 0
if (kokeiltu(round) == 1) { #Askelta kokeiltu viime muutoksen j<>lkeen
} else if (round == 0 | round == 1) { #Yksil<69>n siirt<72>minen toiseen populaatioon.
inds <- 1:ninds
aputaulu <- c(t(inds), rand(ninds, 1))
aputaulu <- sortrows(aputaulu, 2)
inds <- t(aputaulu[, 1])
muutosNyt <- 0
for (ind in inds) {
i1 <- PARTITION[ind]
muutokset_diffInCounts = laskeMuutokset(
ind, rows, data, adjprior, priorTerm
)
muutokset <- muutokset_diffInCounts$muutokset
diffInCounts <- muutokset_diffInCounts$diffInCounts
if (round == 1) {
maxMuutos <- max_MATLAB(muutokset)[[1]]
i2 <- max_MATLAB(muutokset)[[2]]
}
if (i1 != i2 & maxMuutos > 1e-5) {
# Tapahtui muutos
muutoksia <- 1
if (muutosNyt == 0) {
muutosNyt <- 1
if (dispText) {
print('Action 1')
}
}
kokeiltu <- zeros(nRoundTypes, 1)
kivaluku <- kivaluku + 1
updateGlobalVariables(
ind, i2, diffInCounts, adjprior, priorTerm
)
logml <- logml+maxMuutos
if (logml > worstLogml) {
partitionSummary_added = addToSummary(
logml, partitionSummary, worstIndex
)
partitionSummary_added <- partitionSummary_added$partitionSummary
added <- partitionSummary_added$added
if (added == 1) {
worstLogml <- min_MATLAB(partitionSummary[, 2])[[1]]
worstIndex <- min_MATLAB(partitionSummary[, 2])[[2]]
}
}
}
}
if (muutosNyt == 0) {
kokeiltu[round] <- 1
}
} else if (round == 2) { # Populaation yhdist<73>minen toiseen.
maxMuutos <- 0
for (pop in 1:npops) {
muutokset_diffInCounts <- laskeMuutokset2(
pop, rows, data, adjprior, priorTerm
)
muutokset <- muutokset_diffInCounts$muutokset
diffInCounts <- muutokset_diffInCounts$diffInCounts
isoin <- max_MATLAB(muutokset)[[1]]
indeksi <- max_MATLAB(muutokset)[[2]]
if (isoin > maxMuutos) {
maxMuutos <- isoin
i1 <- pop
i2 <- indeksi
diffInCountsBest <- diffInCounts
}
}
if (maxMuutos > 1e-5) {
muutoksia <- 1
kokeiltu <- zeros(nRoundTypes, 1)
updateGlobalVariables2(
i1, i2, diffInCountsBest, adjprior, priorTerm
)
logml <- logml + maxMuutos
if (dispText) {
print('Action 2')
}
if (logml > worstLogml) {
partitionSummary_added <- addToSummary(
logml, partitionSummary, worstIndex
)
partitionSummary <- partitionSummary_added$partitionSummary
added <- partitionSummary_added$added
if (added==1) {
worstLogml <- min_MATLAB(partitionSummary[, 2])[[1]]
worstIndex <- min_MATLAB(partitionSummary[, 2])[[2]]
}
}
} else {
kokeiltu[round] <- 1
}
} else if (round == 3 || round == 4) { #Populaation jakaminen osiin.
maxMuutos <- 0
ninds <- size(rows, 1)
for (pop in 1:npops) {
inds2 <- find(PARTITION == pop)
ninds2 <- length(inds2)
if (ninds2 > 2) {
dist2 <- laskeOsaDist(inds2, dist, ninds)
Z2 <- linkage(t(dist2))
if (round == 3) {
npops2 <- max(min(20, floor(ninds2 / 5)), 2)
} else if (round == 4) {
npops2 <- 2 # Moneenko osaan jaetaan
}
T2 <- cluster_own(Z2, npops2)
muutokset <- laskeMuutokset3(
T2, inds2, rows, data, adjprior, priorTerm, pop
)
isoin <- max_MATLAB(muutokset)[[1]]
indeksi <- max_MATLAB(muutokset)[[2]]
if (isoin > maxMuutos) {
maxMuutos <- isoin
muuttuvaPop2 <- indeksi %% npops2
if (muuttuvaPop2==0) muuttuvaPop2 <- npops2
muuttuvat <- inds2[find(T2 == muuttuvaPop2)]
i2 <- ceiling(indeksi / npops2)
}
}
}
if (maxMuutos > 1e-5) {
muutoksia <- 1
kokeiltu <- zeros(nRoundTypes, 1)
rivit <- list()
for (i in 1:length(muuttuvat)) {
ind <- muuttuvat[i]
lisa <- rows[ind, 1]:rows[ind, 2]
rivit <- rbind(rivit, t(lisa))
}
diffInCounts <- computeDiffInCounts(
t(rivit), size(COUNTS, 1), size(COUNTS, 2), data
)
i1 <- PARTITION(muuttuvat[1])
updateGlobalVariables3(
muuttuvat, diffInCounts, adjprior, priorTerm, i2
)
logml <- logml + maxMuutos
if (dispText) {
if (round == 3) {
print('Action 3')
} else {
print('Action 4')
}
}
if (logml > worstLogml) {
partitionSummary_added <- addToSummary(
logml, partitionSummary, worstIndex
)
partitionSummary <- partitionSummary_added$partitionSummary
added <- partitionSummary_added$added
if (added==1) {
worstLogml <- min_MATLAB(partitionSummary[, 2])[[1]]
worstIndex <- min_MATLAB(partitionSummary[, 2])[[2]]
}
}
} else {
kokeiltu[round] <- 1
}
} else if (round == 5 || round == 6) {
j <- 0
muutettu <- 0
poplogml <- POP_LOGML
partition <- PARTITION
counts <- COUNTS
sumcounts <- SUMCOUNTS
logdiff <- LOGDIFF
pops <- sample(npops)
while (j < npops & muutettu == 0) {
j <- j + 1
pop <- pops[j]
totalMuutos <- 0
inds <- find(PARTITION == pop)
if (round == 5) {
aputaulu <- c(inds, rand(length(inds), 1))
aputaulu <- sortrows(aputaulu, 2)
inds <- t(aputaulu[, 1])
} else if (round == 6) {
inds <- returnInOrder(
inds, pop, rows, data, adjprior, priorTerm
)
}
i <- 0
while (length(inds) > 0 & i < length(inds)) {
i <- i + 1
ind <- inds[i]
muutokset_diffInCounts <- laskeMuutokset(
ind, rows, data, adjprior, priorTerm
)
muutokset <- muutokset_diffInCounts$muutokset
diffInCounts <- muutokset_diffInCounts$diffInCounts
muutokset[pop] <- -1e50 # Varmasti ei suurin!!!
maxMuutos <- max_MATLAB(muutokset)[[1]]
i2 <- max_MATLAB(muutokset)[[2]]
updateGlobalVariables(
ind, i2, diffInCounts, adjprior, priorTerm
)
totalMuutos <- totalMuutos+maxMuutos
logml <- logml + maxMuutos
if (round == 6) {
# Lopetetaan heti kun muutos on positiivinen.
if (totalMuutos > 1e-5) {
i <- length(inds)
}
}
}
if (totalMuutos > 1e-5) {
kokeiltu <- zeros(nRoundTypes, 1)
muutettu <- 1
if (muutoksia == 0) {
muutoksia <- 1 # Ulompi kirjanpito.
if (dispText) {
if (round == 5) {
print('Action 5')
} else {
print('Action 6')
}
}
}
if (logml > worstLogml) {
partitionSummary_added <- addToSummary(
logml, partitionSummary, worstIndex
)
partitionSummary <- partitionSummary_added$partitionSummary
added <- partitionSummary_added$added
if (added==1) {
worstLogml <- min_MATLAB(partitionSummary[, 2])[[1]]
worstIndex <- min_MATLAB(partitionSummary[, 2])[[2]]
}
}
} else {
# Miss<73><73>n vaiheessa tila ei parantunut.
# Perutaan kaikki muutokset.
PARTITION <- partition
SUMCOUNTS <- sumcounts
POP_LOGML <- poplogml
COUNTS <- counts
logml <- logml - totalMuutos
LOGDIFF <- logdiff
kokeiltu[round] <- 1
}
}
rm(partition, sumcounts, counts, poplogml)
} else if (round == 7) {
emptyPop <- findEmptyPop(npops)
j <- 0
pops <- sample(npops)
muutoksiaNyt <- 0
if (emptyPop == -1) {
j <- npops
}
while (j < npops) {
j <- j + 1
pop <- pops[j]
inds2 <- find(PARTITION == pop)
ninds2 <- length(inds2)
if (ninds2 > 5) {
partition <- PARTITION
sumcounts <- SUMCOUNTS
counts <- COUNTS
poplogml <- POP_LOGML
logdiff <- LOGDIFF
dist2 <- laskeOsaDist(inds2, dist, ninds);
Z2 <- linkage(t(dist2))
T2 <- cluster_own(Z2, 2)
muuttuvat <- inds2[find(T2 == 1)]
muutokset <- laskeMuutokset3(
T2, inds2, rows, data, adjprior, priorTerm, pop
)
totalMuutos <- muutokset(1, emptyPop)
rivit <- list()
for (i in 1:length(muuttuvat)) {
ind <- muuttuvat[i]
lisa <- rows[ind, 1]:rows[ind, 2]
rivit <- c(rivit, lisa)
}
diffInCounts <- computeDiffInCounts(
rivit, size(COUNTS, 1), size(COUNTS, 2), data
)
updateGlobalVariables3(
muuttuvat, diffInCounts, adjprior, priorTerm,
emptyPop
)
muutettu <- 1
while (muutettu == 1) {
muutettu <- 0
# Siirret<65><74>n yksil<69>it<69> populaatioiden v<>lill<6C>
muutokset <- laskeMuutokset5(
inds2, rows, data, adjprior, priorTerm,
pop, emptyPop
)
maxMuutos <- indeksi <- max_MATLAB(muutokset)
muuttuva <- inds2(indeksi)
if (PARTITION(muuttuva) == pop) {
i2 <- emptyPop
} else {
i2 <- pop
}
if (maxMuutos > 1e-5) {
rivit <- rows[muuttuva, 1]:rows[muuttuva, 2]
diffInCounts <- computeDiffInCounts(
rivit, size(COUNTS, 1), size(COUNTS, 2),
data
)
updateGlobalVariables3(
muuttuva,diffInCounts, adjprior,
priorTerm, i2
)
muutettu <- 1
totalMuutos <- totalMuutos + maxMuutos
}
}
if (totalMuutos > 1e-5) {
muutoksia <- 1
logml <- logml + totalMuutos
if (logml > worstLogml) {
partitionSummary_added = addToSummary(
logml, partitionSummary, worstIndex
)
partitionSummary_added <- partitionSummary_added$partitionSummary
added <- partitionSummary_added$added
if (added == 1) {
worstLogml <- min_MATLAB(partitionSummary[, 2])[[1]]
worstIndex <- min_MATLAB(partitionSummary[, 2])[[2]]
}
}
if (muutoksiaNyt == 0) {
if (dispText) {
print('Action 7')
}
muutoksiaNyt <- 1
}
kokeiltu <- zeros(nRoundTypes, 1)
j <- npops
} else {
# palutetaan vanhat arvot
PARTITION <- partition
SUMCOUNTS <- sumcounts
COUNTS <- counts
POP_LOGML <- poplogml
LOGDIFF <- logdiff
}
}
}
if (muutoksiaNyt == 0) {
kokeiltu[round] <- 1
}
}
}
if (muutoksia == 0) {
if (vaihe <= 4) {
vaihe <= vaihe + 1
} else if (vaihe == 5) {
ready <- 1
}
} else {
muutoksia <- 0
}
if (ready == 0) {
if (vaihe == 1) {
roundTypes <- c(1)
} else if (vaihe == 2) {
roundTypes <- c(2, 1)
} else if (vaihe == 3) {
roundTypes <- c(5, 5, 7)
} else if (vaihe == 4) {
roundTypes = c(4, 3, 1)
} else if (vaihe == 5) {
roundTypes <- c(6, 7, 2, 3, 4, 1)
}
}
}
# TALLENNETAAN
npops <- poistaTyhjatPopulaatiot(npops)
POP_LOGML <- computePopulationLogml(1:npops, adjprior, priorTerm)
if (dispText) {
print(paste('Found partition with', as.character(npops), 'populations.'))
print(paste('Log(ml) =', as.character(logml)))
print(' ')
}
if (logml > logmlBest) {
# P<>ivitet<65><74>n parasta l<>ydetty<74> partitiota.
logmlBest <- logml
npopsBest <- npops
partitionBest <- PARTITION
countsBest <- COUNTS
sumCountsBest <- SUMCOUNTS
pop_logmlBest <- POP_LOGML
logdiffbest <- LOGDIFF
}
}
return(
list(logml = logml, npops = npops, partitionSummary = partitionSummary)
)
}

20
R/initialPopCounts.R Normal file
View file

@ -0,0 +1,20 @@
initialPopCounts <- function(data, npops, rows, noalle, adjprior) {
nloci <- size(data, 2)
counts <- zeros(max(noalle), nloci, npops)
sumcounts <- zeros(npops, nloci)
for (i in 1:npops) {
for (j in 1:nloci) {
i_rivit <- rows(i, 1):rows(i, 2)
havainnotLokuksessa <- find(data[i_rivit, j] >= 0)
sumcounts[i, j] <- length(havainnotLokuksessa)
for (k in 1:noalle[j]) {
alleleCode <- k
N_ijk <- length(find(data[i_rivit, j] == alleleCode))
counts[k, j, i] <- N_ijk
}
}
}
logml <- laskeLoggis(counts, sumcounts, adjprior)
return(sumcounts = sumcounts, counts = counts, logml = logml)
}

9
R/initializeGammaln.R Normal file
View file

@ -0,0 +1,9 @@
initializeGammaln <- function(ninds, rowsFromInd, maxAlleles) {
#Alustaa GAMMALN muuttujan s.e. GAMMALN(i, j)=gammaln((i - 1) + 1/j)
GAMMA_LN <- zeros((1 + ninds) * rowsFromInd, maxAlleles)
for (i in 1:(ninds + 1) * rowsFromInd) {
for (j in 1:maxAlleles) {
GAMMA_LN[i, j] <- log_gamma((i - 1) + 1/j)
}
}
}

View file

@ -1,7 +1,7 @@
laskeLoggis <- function(counts, sumcounts, adjprior) {
npops <- size(counts, 3)
sum1 <- sum(sum(sum(gammaln(counts + repmat(adjprior, c(1, 1, npops))))))
sum1 <- sum(sum(sum(lgamma(counts + repmat(adjprior, c(1, 1, npops))))))
sum3 <- sum(sum(lgamma(adjprior))) - sum(sum(lgamma(1 + sumcounts)))
logml2 <- sum1 - npops * sum3
loggis <- logml2

226
R/laskeMuutokset12345.R Normal file
View file

@ -0,0 +1,226 @@
#' @title Calculate changes (?)
#' @description Palauttaa npops*npops taulun, jonka alkio (i,j) kertoo, mik?on
#' muutos logml:ss? mikäli populaatiosta i siirretään osuuden verran
#' todennäköisyysmassaa populaatioon j. Mikäli populaatiossa i ei ole mitään
#' siirrettävää, on vastaavassa kohdassa rivi nollia.
#' @param osuus Percentages?
#' @param omaFreqs own Freqs?
#' @param osuusTaulu Percentage table?
#' @param logml log maximum likelihood
#' @param COUNTS COUNTS
#' @export
laskeMuutokset4 <- function (
osuus, osuusTaulu, omaFreqs, logml, COUNTS = matrix(0)
) {
npops <- ifelse(is.na(dim(COUNTS)[3]), 1, dim(COUNTS)[3])
notEmpty <- which(osuusTaulu > 0.005)
muutokset <- zeros(npops)
empties <- !notEmpty
for (i1 in notEmpty) {
osuusTaulu[i1] <- osuusTaulu[i1] - osuus
for (i2 in c(colon(1, i1 - 1), colon(i1 + 1, npops))) {
osuusTaulu[i2] <- osuusTaulu[i2] + osuus
loggis <- computeIndLogml(omaFreqs, osuusTaulu)
# Work around Matlab OOB bug
if (i1 > nrow(muutokset)) {
muutokset <- rbind(muutokset, muutokset * 0)
}
if (i2 > ncol(muutokset)) {
muutokset <- cbind(muutokset, muutokset * 0)
}
muutokset[i1, i2] <- loggis - logml
osuusTaulu[i2] <- osuusTaulu[i2] - osuus
}
osuusTaulu[i1] <- osuusTaulu[i1] + osuus
}
return (muutokset)
}
# Palauttaa npops*1 taulun, jossa i:s alkio kertoo, mik<69> olisi
# muutos logml:ss<73>, mik<69>li yksil<69> ind siirret<65><74>n koriin i.
# diffInCounts on poistettava COUNTS:in siivusta i1 ja lis<69>tt<74>v<EFBFBD>
# COUNTS:in siivuun i2, mik<69>li muutos toteutetaan.
#
# Lis<69>ys 25.9.2007:
# Otettu k<>ytt<74><74>n globaali muuttuja LOGDIFF, johon on tallennettu muutokset
# logml:ss<73> siirrett<74>ess<73> yksil<69>it<69> toisiin populaatioihin.
laskeMuutokset <- function(ind, globalRows, data, adjprior, priorTerm) {
npops <- size(COUNTS, 3)
muutokset <- LOGDIFF[ind, ]
i1 <- PARTITION[ind]
i1_logml <- POP_LOGML[i1]
muutokset[i1] <- 0
rows <- globalRows[ind, 1]:globalRows[ind, 2]
diffInCounts <- computeDiffInCounts(
rows, size(COUNTS, 1), size(COUNTS, 2), data
)
diffInSumCounts <- sum(diffInCounts)
COUNTS[, , i1] <- COUNTS[, , i1] - diffInCounts
SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] - diffInSumCounts
new_i1_logml <- computePopulationLogml(i1, adjprior, priorTerm)
COUNTS[, , i1] <- COUNTS[, , i1] + diffInCounts
SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] + diffInSumCounts
i2 <- find(muutokset == -Inf) # Etsit<69><74>n populaatiot jotka muuttuneet viime kerran j<>lkeen.
i2 <- setdiff(i2, i1)
i2_logml <- POP_LOGML[i2]
ni2 <- length(i2)
COUNTS[, , i2] <- COUNTS[, , i2] + repmat(diffInCounts, c(1, 1, ni2))
SUMCOUNTS[i2, ] <- SUMCOUNTS[i2, ] + repmat(diffInSumCounts, c(ni2, 1))
new_i2_logml <- computePopulationLogml(i2, adjprior, priorTerm)
COUNTS[, , i2] <- COUNTS[, , i2] - repmat(diffInCounts, c(1, 1, ni2))
SUMCOUNTS[i2, ] <- SUMCOUNTS[i2, ] - repmat(diffInSumCounts, c(ni2, 1))
muutokset[i2] <- new_i1_logml - i1_logml + new_i2_logml - i2_logml
LOGDIFF[ind, ] = muutokset
return(list(muutokset = muutokset, diffInCounts = diffInCounts))
}
laskeMuutokset2 <- function(i1, globalRows, data, adjprior, priorTerm) {
# % Palauttaa npops*1 taulun, jossa i:s alkio kertoo, mik<69> olisi
# % muutos logml:ss<73>, mik<69>li korin i1 kaikki yksil<69>t siirret<65><74>n
# % koriin i.
npops <- size(COUNTS, 3)
muutokset <- zeros(npops, 1)
i1_logml <- POP_LOGML[i1]
inds <- find(PARTITION == i1)
ninds <- length(inds)
if (ninds == 0) {
diffInCounts <- zeros(size(COUNTS, 1), size(COUNTS, 2))
return()
}
rows = list()
for (i in 1:ninds) {
ind <- inds(i)
lisa <- globalRows(ind, 1):globalRows(ind, 2)
rows <- c(rows, t(lisa))
}
diffInCounts <- computeDiffInCounts(
t(rows), size(COUNTS, 1), size(COUNTS, 2), data
)
diffInSumCounts <- sum(diffInCounts)
COUNTS[, , i1] <- COUNTS[, , i1] - diffInCounts
SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] - diffInSumCounts
new_i1_logml <- computePopulationLogml(i1, adjprior, priorTerm)
COUNTS[, , i1] <- COUNTS[, , i1] + diffInCounts
SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] + diffInSumCounts
i2 <- c(1:i1-1, i1+1:npops)
i2_logml <- POP_LOGML[i2]
COUNTS[, , i2] <- COUNTS[, , i2] + repmat(diffInCounts, c(1, 1, npops - 1))
SUMCOUNTS[i2, ] <- SUMCOUNTS[i2, ] + repmat(diffInSumCounts, c(npops - 1, 1))
new_i2_logml <- computePopulationLogml(i2, adjprior, priorTerm)
COUNTS[, , i2] <- COUNTS[, , i2] - repmat(diffInCounts, c(1, 1, npops - 1))
SUMCOUNTS[i2, ] <- SUMCOUNTS[i2, ] - repmat(diffInSumCounts, c(npops - 1, 1))
muutokset[i2] <- new_i1_logml - i1_logml + new_i2_logml - i2_logml
return(list(muutokset = muutokset, diffInCounts = diffInCounts))
}
laskeMuutokset3 <- function(T2, inds2, globalRows, data, adjprior, priorTerm, i1) {
# Palauttaa length(unique(T2))*npops taulun, jossa (i,j):s alkio
# kertoo, mik<69> olisi muutos logml:ss<73>, jos populaation i1 osapopulaatio
# inds2(find(T2==i)) siirret<65><74>n koriin j.
npops <- size(COUNTS, 3)
npops2 <- length(unique(T2))
muutokset <- zeros(npops2, npops)
i1_logml = POP_LOGML[i1]
for (pop2 in 1:npops2) {
inds <- inds2[find(T2==pop2)]
ninds <- length(inds);
if (ninds > 0) {
rows <- list()
for (i in 1:ninds) {
ind <- inds[i]
lisa <- globalRows[ind, 1]:globalRows[ind, 2]
rows <- c(rows, t(lisa))
}
diffInCounts <- computeDiffInCounts(
t(rows), size(COUNTS, 1), size(COUNTS, 2), data
)
diffInSumCounts <- sum(diffInCounts)
COUNTS[, , i1] <- COUNTS[, , i1] - diffInCounts
SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] - diffInSumCounts
new_i1_logml <- computePopulationLogml(i1, adjprior, priorTerm)
COUNTS[, , i1] <- COUNTS[, , i1] + diffInCounts
SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] + diffInSumCounts
i2 <- c(1:i1-1, i1+1:npops)
i2_logml <- t(POP_LOGML[i2])
COUNTS[, , i2] <- COUNTS[, , i2] + repmat(diffInCounts, c(1, 1, npops - 1))
SUMCOUNTS[i2, ] <- SUMCOUNTS[i2, ] + repmat(diffInSumCounts, c(npops - 1, 1))
new_i2_logml <- t(computePopulationLogml(i2, adjprior, priorTerm))
COUNTS[, , i2] <- COUNTS[, , i2] - repmat(diffInCounts, c(1, 1, npops - 1))
SUMCOUNTS[i2, ] <- SUMCOUNTS[i2, ] - repmat(diffInSumCounts, c(npops - 1, 1))
muutokset[pop2, i2] <- new_i1_logml - i1_logml + new_i2_logml - i2_logml
}
}
return(muutokset)
}
laskeMuutokset5 <- function(inds, globalRows, data, adjprior, priorTerm, i1, i2) {
# Palauttaa length(inds)*1 taulun, jossa i:s alkio kertoo, mik<69> olisi
# muutos logml:ss<73>, mik<69>li yksil<69> i vaihtaisi koria i1:n ja i2:n v<>lill<6C>.
ninds <- length(inds)
muutokset <- zeros(ninds, 1)
i1_logml <- POP_LOGML[i1]
i2_logml <- POP_LOGML[i2]
for (i in 1:ninds) {
ind <- inds[i]
if (PARTITION[ind] == i1) {
pop1 <- i1 #mist<73>
pop2 <- i2 #mihin
} else {
pop1 <- i2
pop2 <- i1
}
rows <- globalRows[ind, 1]:globalRows[ind, 2]
diffInCounts <- computeDiffInCounts(
rows, size(COUNTS, 1), size(COUNTS, 2), data
)
diffInSumCounts <- sum(diffInCounts)
COUNTS[, , pop1] <- COUNTS[, , pop1] - diffInCounts
SUMCOUNTS[pop1, ] <- SUMCOUNTS[pop1, ] - diffInSumCounts
COUNTS[, , pop2] <- COUNTS[, , pop2] + diffInCounts
SUMCOUNTS[pop2, ] <- SUMCOUNTS[pop2, ] + diffInSumCounts
new_logmls <- computePopulationLogml(c(i1, i2), adjprior, priorTerm)
muutokset[i] <- sum(new_logmls)
COUNTS[, , pop1] <- COUNTS[, , pop1] + diffInCounts
SUMCOUNTS[pop1, ] <- SUMCOUNTS[pop1, ] + diffInSumCounts
COUNTS[, , pop2] <- COUNTS[, , pop2] - diffInCounts
SUMCOUNTS[pop2, ] <- SUMCOUNTS[pop2, ] - diffInSumCounts
}
muutokset <- muutokset - i1_logml - i2_logml
return(muutokset)
}

View file

@ -1,39 +0,0 @@
#' @title Calculate changes?
#' @description Palauttaa npops*npops taulun, jonka alkio (i,j) kertoo, mik?on
#' muutos logml:ss? mikäli populaatiosta i siirretään osuuden verran
#' todennäköisyysmassaa populaatioon j. Mikäli populaatiossa i ei ole mitään
#' siirrettävää, on vastaavassa kohdassa rivi nollia.
#' @param osuus Percentages?
#' @param omaFreqs own Freqs?
#' @param osuusTaulu Percentage table?
#' @param logml log maximum likelihood
#' @param COUNTS COUNTS
#' @export
laskeMuutokset4 <- function (osuus, osuusTaulu, omaFreqs, logml,
COUNTS = matrix(0)) {
npops <- ifelse(is.na(dim(COUNTS)[3]), 1, dim(COUNTS)[3])
notEmpty <- which(osuusTaulu > 0.005)
muutokset <- zeros(npops)
empties <- !notEmpty
for (i1 in notEmpty) {
osuusTaulu[i1] <- osuusTaulu[i1] - osuus
for (i2 in c(colon(1, i1 - 1), colon(i1 + 1, npops))) {
osuusTaulu[i2] <- osuusTaulu[i2] + osuus
loggis <- computeIndLogml(omaFreqs, osuusTaulu)
# Work around Matlab OOB bug
if (i1 > nrow(muutokset)) {
muutokset <- rbind(muutokset, muutokset * 0)
}
if (i2 > ncol(muutokset)) {
muutokset <- cbind(muutokset, muutokset * 0)
}
muutokset[i1, i2] <- loggis - logml
osuusTaulu[i2] <- osuusTaulu[i2] - osuus
}
osuusTaulu[i1] <- osuusTaulu[i1] + osuus
}
return (muutokset)
}

25
R/laskeOsaDist.R Normal file
View file

@ -0,0 +1,25 @@
#' @title Lower part of the dist
#' @description Constructs from the dist vector a subvector containing the individual inds2, Forms dist sub-vectors the vector, which includes yksiliden inds2
#' @param inds2 inds2
#' @param dist dist
#' @param ninds ninds
#' @author Waldir Leoncio
laskeOsaDist <- function(inds2, dist, ninds) {
# % Muodostaa dist vektorista osavektorin, joka sis<69>lt<6C><74> yksil<69>iden inds2
# % v<>liset et<65>isyydet. ninds=kaikkien yksil<69>iden lukum<75><6D>r<EFBFBD>.
ninds2 <- length(inds2)
apu <- zeros(choose(ninds2, 2), 2)
rivi <- 1
for (i in 1:ninds2-1) {
for (j in i+1:ninds2) {
apu[rivi, 1] <- inds2[i]
apu[rivi, 2] <- inds2[j]
rivi <- rivi + 1
}
}
apu <- (apu[, 1]-1) * ninds - apu[, 1] / 2 *
(apu[, 1]-1) + (apu[, 2] - apu[, 1])
dist2 <- dist(apu)
return(dist2)
}

View file

@ -4,10 +4,14 @@
#' linkage algorithm. The input Y is a distance matrix such as is generated by
#' PDIST. Y may also be a more general dissimilarity matrix conforming to the
#' output format of PDIST.
#' @param Y data
#'
#' Z = linkage(X) returns a matrix Z that encodes a tree containing hierarchical clusters of the rows of the input data matrix X.
#' @param Y matrix
#' @param method either 'si', 'av', 'co' 'ce' or 'wa'
#' @note This is also a base Matlab function. The reason why the source code is also present here is unclear.
#' @export
linkage <- function(Y, method = 'co') {
#TODO: compare R output with MATLAB output
k <- size(Y)[1]
n <- size(Y)[2]
m <- (1 + sqrt(1 + 8 * n)) / 2
@ -24,18 +28,33 @@ linkage <- function(Y, method = 'co') {
N[1:m] <- 1
n <- m; # since m is changing, we need to save m in n.
R <- 1:n
for (s in 1:(n-1)) {
X <- Y
v <- min(X)[1]
k <- min(X)[2]
for (s in 1:(n - 1)) {
X <- as.matrix(as.vector(Y), ncol=1)
v <- min_MATLAB(X)$mins
k <- min_MATLAB(X)$idx
i <- floor(m + 1 / 2 - sqrt(m ^ 2 - m + 1 / 4 - 2 * (k - 1)))
j <- k - (i - 1) * (m - i / 2) + i
Z[s, ] <- c(R[i], R[j], v) # update one more row to the output matrix A
# Temp variables
I1 <- 1:(i - 1)
I2 <- (i + 1):(j - 1)
I3 <- (j + 1):m
Z[s, ] <- c(R[i], R[j], v) # update one more row to the output matrix A
# Temp variables
if (i > 1) {
I1 <- 1:(i - 1)
} else {
I1 <- NULL
}
if (i + 1 <= j - 1) {
I2 <- (i + 1):(j - 1)
} else {
I2 <- NULL
}
if (j + 1 <= m) {
I3 <- (j + 1):m
} else {
I3 <- NULL
}
U <- c(I1, I2, I3)
I <- c(
I1 * (m - (I1 + 1) / 2) - m + i,
@ -47,11 +66,13 @@ linkage <- function(Y, method = 'co') {
I2 * (m - (I2 + 1) / 2) - m + j,
j * (m - (j + 1) / 2) - m + I3
)
# Workaround in R for negative values in I and J
# I <- I[I > 0 & I <= length(Y)]
# J <- J[J > 0 & J <= length(Y)]
switch(method,
'si' = Y[I] <- min(Y[I], Y[J]), # single linkage
'si' = Y[I] <- apply(cbind(Y[I], Y[J]), 1, min), # single linkage
'av' = Y[I] <- Y[I] + Y[J], # average linkage
'co' = Y[I] <- max(Y[I], Y[J]), #complete linkage
'co' = Y[I] <- apply(cbind(Y[I], Y[J]), 1, max), #complete linkage
'ce' = {
K <- N[R[i]] + N[R[j]] # centroid linkage
Y[I] <- (N[R[i]] * Y[I] + N[R[j]] * Y[J] -
@ -61,8 +82,7 @@ linkage <- function(Y, method = 'co') {
Y[J] - N[R[U]] * v) / (N[R[i]] + N[R[j]] + N[R[U]])
)
J <- c(J, i * (m - (i + 1) / 2) - m + j)
Y[J] <- vector() # no need for the cluster information about j
Y <- Y[-J] # no need for the cluster information about j
# update m, N, R
m <- m - 1
N[n + s] <- N[R[i]] + N[R[j]]

View file

@ -4,17 +4,16 @@
#' @return list containing data and popnames
#' @export
lueGenePopData <- function (tiedostonNimi) {
fid <- load(tiedostonNimi)
line1 <- readLines(fid)[1] # ensimmäinen rivi
line2 <- readLines(fid)[2] # toinen rivi
fid <- readLines(tiedostonNimi)
line <- fid[1] # ensimmäinen rivi
line <- fid[2] # toinen rivi
count <- rivinSisaltamienMjonojenLkm(line)
line <- readLines(fid)[3]
line <- fid[3]
lokusRiveja <- 1
while (testaaPop(line) == 0) {
lokusRiveja <- lokusRiveja + 1 # locus row
line <- readLines(fid)[3 + lokusRiveja]
line <- fid[2 + lokusRiveja]
}
if (lokusRiveja > 1) {
@ -29,38 +28,34 @@ lueGenePopData <- function (tiedostonNimi) {
ninds <- 0
poimiNimi <- 1
digitFormat <- -1
while (line != -1) {
line <- readLines(fid)[lokusRiveja + 1]
lokusRiveja <- lokusRiveja + 1
while (lokusRiveja < length(fid) - 2) {
lokusRiveja <- lokusRiveja + 1 # Keeps the loop moving along
line <- fid[lokusRiveja + 2]
if (poimiNimi == 1) {
# Edellinen rivi oli 'pop'
# Edellinen rivi oli 'pop' (previous line was pop)
nimienLkm <- nimienLkm + 1
ninds <- ninds + 1
if (nimienLkm > size(popnames, 1)) {
popnames <- c(popnames, cell(10, 2))
popnames <- rbind(popnames, cell(10, 2))
}
nimi <- lueNimi(line)
if (digitFormat == -1) {
digitFormat <- selvitaDigitFormat(line)
divider <- 10 ^ digitFormat
}
popnames[nimienLkm, 1] <- nimi #N<>in se on greedyMix:iss<73>kin?!?
popnames[nimienLkm, 1] <- nimi #N<>in se on greedyMix:iss<73>kin?!?
popnames[nimienLkm, 2] <- ninds
poimiNimi <- 0
data <- addAlleles(data, ninds, line, divider)
} else if (testaaPop(line)) {
poimiNimi <- 1
} else if (line != -1) {
} else if (!is.na(line)) {
ninds <- ninds + 1
data <- addAlleles(data, ninds, line, divider)
}
}
data <- data[1:(ninds * 2),]
popnames <- popnames[seq_len(nimienLkm),]
data <- data[1:(ninds * 2), ]
popnames <- popnames[seq_len(nimienLkm), ]
return(list(data = data, popnames = popnames))
}

View file

@ -1,17 +1,23 @@
#' @title Read the Name
#' @description Reads the line name
#' @description Returns the part of the line from the beginning that is before the comma. Useful for returning the name of a GenePop area
#' @param line line
#' @return nimi
#' @export
lueNimi <- function(line) {
# ==========================================================================
# Validation
# ==========================================================================
if (!grepl(",", line)) {
stop("There are no commas in this line")
}
# Palauttaa line:n alusta sen osan, joka on ennen pilkkua.
n <- 1
merkki <- line[n]
merkki <- substring(line, n, n)
nimi <- ''
while (merkki != ',') {
nimi <- c(nimi, merkki)
n <- n + 1
merkki <- line[n]
merkki <- substring(line, n, n)
}
return(nimi)
return(paste(nimi, collapse=""))
}

160
R/matlab2r.R Normal file
View file

@ -0,0 +1,160 @@
#' @title Convert Matlab function to R
#' @description Performs basic syntax conversion from Matlab to R
#' @param filename name of the file
#' @param output can be "asis", "clean", "save" or "diff"
#' @param improve_formatting if `TRUE` (default), makes minor changes
#' to conform to best-practice formatting conventions
#' @param change_assignment if `TRUE` (default), uses `<-` as the assignment operator
#' @param append if `FALSE` (default), overwrites file; otherwise, append
#' output to input
#' @return text converted to R, printed to screen or replacing input file
#' @author Waldir Leoncio
#' @importFrom utils write.table
#' @export
#' @note This function is intended to expedite the process of converting a
#' Matlab function to R by making common replacements. It does not have the
#' immediate goal of outputting a ready-to-use function. In other words,
#' after using this function you should go back to it and make minor changes.
#'
#' It is also advised to do a dry-run with `output = "clean"` and only switching
#' to `output = "save"` when you are confident that no important code will be
#' lost (for shorter functions, a careful visual inspection should suffice).
matlab2r <- function(
filename, output = "diff", improve_formatting=TRUE, change_assignment=TRUE,
append=FALSE
) {
# TODO: this function is too long! Split into subfunctions
# (say, by rule and/or section)
# ======================================================== #
# Verification #
# ======================================================== #
if (!file.exists(filename)) stop("File not found")
# ======================================================== #
# Reading file into R #
# ======================================================== #
txt <- readLines(filename)
original <- txt
# ======================================================== #
# Replacing text #
# ======================================================== #
# Uncommenting ------------------------------------------- #
txt <- gsub("^#\\s?(.+)", "\\1", txt)
# Output variable ---------------------------------------- #
out <- gsub(
pattern = "\\t*function ((\\S|\\,\\s)+)\\s?=\\s?(\\w+)\\((.+)\\)",
replacement = "\\1",
x = txt[1]
) # TODO: improve by detecting listed outputs
if (substring(out, 1, 1) == "[") {
out <- strsplit(out, "(\\,|\\[|\\]|\\s)")[[1]]
out <- out[which(out != "")]
out <- sapply(seq_along(out), function(x) paste(out[x], "=", out[x]))
out <- paste0("list(", paste(out, collapse=", "), ")")
}
# Function header ---------------------------------------- #
txt <- gsub(
pattern = "\\t*function (.+)\\s*=\\s*(.+)\\((.+)\\)",
replacement = "\\2 <- function(\\3) {",
x = txt
)
txt <- gsub(
pattern = "function (.+)\\((.+)\\)",
replacement = "\\1 <- function(\\2) {",
x = txt
)
# Function body ------------------------------------------ #
txt <- gsub("(.+)\\.\\.\\.", "\\1", txt)
txt <- gsub(";", "", txt)
# Loops and if-statements
txt <- gsub("for (.+)=(.+)", "for (\\1 in \\2) {", txt)
txt <- gsub("end$", "}", txt)
txt <- gsub("if (.+)", "if (\\1) {", txt) # FIXME: paste comments after {
txt <- gsub("else$", "} else {", txt)
txt <- gsub("elseif", "} else if", txt)
txt <- gsub("while (.+)", "while \\1 {", txt)
# MATLAB-equivalent functions in R
txt <- gsub("gamma_ln", "log_gamma", txt)
txt <- gsub("nchoosek", "choose", txt)
txt <- gsub("isempty", "is.null", txt)
# txt <- gsub("(.+)\\'", "t(\\1)", txt)
# Subsets ------------------------------------------------ #
ass_op <- ifelse(change_assignment, "<-", "=")
txt <- gsub(
pattern = "([^\\(]+)\\(([^\\(]+)\\)=(.+)",
replacement = paste0("\\1[\\2] ", ass_op, "\\3"),
x = txt
)
txt <- gsub("\\(:\\)", "[, ]", txt)
txt <- gsub("(.+)(\\[|\\():,end(\\]|\\()", "\\1[, ncol()]", txt)
# Formatting --------------------------------------------- #
if (improve_formatting) {
txt <- gsub("(.),(\\S)", "\\1, \\2", txt)
# Math operators
txt <- gsub("(\\S)\\+(\\S)", "\\1 + \\2", txt)
txt <- gsub("(\\S)\\-(\\S)", "\\1 - \\2", txt)
txt <- gsub("(\\S)\\*(\\S)", "\\1 * \\2", txt)
txt <- gsub("(\\S)\\/(\\S)", "\\1 / \\2", txt)
# Logic operators
txt <- gsub("~", "!", txt)
txt <- gsub("(\\S)>=(\\S)", "\\1 >= \\2", txt)
txt <- gsub("(\\S)<=(\\S)", "\\1 <= \\2", txt)
txt <- gsub("(\\S)==(\\S)", "\\1 == \\2", txt)
# Assignment
txt <- gsub(
pattern = "(\\w)(\\s?)=(\\s?)(\\w)",
replacement = paste0("\\1 ", ass_op, " \\4"),
x = txt
)
# txt <- gsub(
# pattern = "(\\s+(.|\\_|\\[|\\])+)(\\s?)=(\\s?)(.+)",
# replacement = paste0("\\1 ", ass_op, "\\5"),
# x = txt
# )
txt <- gsub("%(\\s?)(\\w)", "# \\2", txt)
}
# Adding output and end-of-file brace -------------------- #
txt <- append(txt, paste0("\treturn(", out, ")\n}"))
# Returning converted code ------------------------------- #
if (output == "asis") {
return(txt)
} else if (output == "clean") {
return(cat(txt, sep="\n"))
} else if (output == "save") {
return(
write.table(
x = txt,
file = filename,
quote = FALSE,
row.names = FALSE,
col.names = FALSE,
append = append
)
)
} else if (output == "diff") {
diff_text <- vector(mode="character", length=(2 * length(original) + 1))
for (i in seq_along(txt)) {
new_i <- (2 * i) + i - 2
diff_text[new_i] <- paste(
"-----------------------", "line", i, "-----------------------"
)
diff_text[new_i + 1] <- original[i]
diff_text[new_i + 2] <- txt[i]
}
message("Displaying line number, original content and modified content")
return(cat(diff_text, sep="\n"))
} else {
stop ("Invalid output argument")
}
}

31
R/min_max_MATLAB.R Normal file
View file

@ -0,0 +1,31 @@
#' @title Minimum (MATLAB version)
#' @description Finds the minimum value for each column of a matrix, potentially returning the indices instead
#' @param X matrix
#' @param indices return indices?
#' @return Either a list or a vector
#' @author Waldir Leoncio
min_MATLAB <- function(X, indices = TRUE) {
mins <- apply(X, 2, min)
idx <- sapply(seq_len(ncol(X)), function(x) match(mins[x], X[, x]))
if (indices) {
return(list(mins = mins, idx = idx))
} else {
return(mins)
}
}
#' @title Maximum (MATLAB version)
#' @description Finds the minimum value for each column of a matrix, potentially returning the indices instead
#' @param X matrix
#' @param indices return indices?
#' @return Either a list or a vector
#' @author Waldir Leoncio
max_MATLAB <- function(X, indices = TRUE) {
maxs <- apply(X, 2, max)
idx <- sapply(seq_len(ncol(X)), function(x) match(maxs[x], X[, x]))
if (indices) {
return(list(maxs = maxs, idx = idx))
} else {
return(maxs)
}
}

10
R/nargin.R Normal file
View file

@ -0,0 +1,10 @@
#' @title Number of function input arguments
#' @description Returns the number of arguments passed to the parent function
#' @return An integer
#' @author Waldir Leoncio
#' @note This function only makes sense inside another function
#' @references https://stackoverflow.com/q/64422780/1169233
nargin <- function() {
if(sys.nframe() < 2) stop("must be called from inside a function")
length(as.list(sys.call(-1))) - 1
}

View file

@ -5,14 +5,14 @@ newGetDistances <- function(data, rowsFromInd) {
empties <- find(data < 0)
data[empties] <- 0
data <- as.integer(data) # max(noalle) oltava <256
data <- apply(data, 2, as.numeric) # max(noalle) oltava <256
pariTaulu <- zeros(riviLkm, 2)
aPointer <- 1
for (a in (1:ninds) - 1) {
pariTaulu[aPointer:(aPointer + ninds - 1 - a), 1] <-
ones(ninds - a, 1) * a
pariTaulu[aPointer:aPointer + ninds - 1 - a, 2] <- t(a + 1:ninds)
for (a in 1:(ninds - 1)) {
pariTaulu_rows <- aPointer:(aPointer + ninds - 1 - a)
pariTaulu[pariTaulu_rows, 1] <- ones(ninds - a, 1) * a
pariTaulu[pariTaulu_rows, 2] <- t((a + 1):ninds)
aPointer <- aPointer + ninds - a
}
@ -31,9 +31,9 @@ newGetDistances <- function(data, rowsFromInd) {
rm(pariTaulu, miinus)
x <- zeros(size(eka))
x <- as.integer(x)
x <- apply(x, 2, as.integer)
y <- zeros(size(toka))
y <- as.integer(y)
y <- apply(y, 2, as.integer)
for (j in 1:nloci) {
for (k in 1:rowsFromInd) {
@ -57,7 +57,6 @@ newGetDistances <- function(data, rowsFromInd) {
muut <- find(vertailuja > 0)
dist[muut] <- summa[muut] / vertailuja[muut]
rm(summa, vertailuja)
Z = linkage(t(dist))
Z <- linkage(t(dist))
return(list(Z = Z, dist = dist))
}

View file

@ -41,7 +41,7 @@ poistaLiianPienet <- function (npops, rowsFromInd, alaraja,
PARTITION[yksilot] == n
}
# TODO: add COUNTS, SUMCOUNTS and PARTITION to return or use <<-
# TODO: add COUNTS, SUMCOUNTS and PARTITION to return or use <-
COUNTS[, , miniPops] <- NA
SUMCOUNTS[miniPops, ] <- NA

View file

@ -0,0 +1,15 @@
poistaTyhjatPopulaatiot <- function(npops) {
# % Poistaa tyhjentyneet populaatiot COUNTS:ista ja
# % SUMCOUNTS:ista. P<>ivitt<74><74> npops:in ja PARTITION:in.
notEmpty <- find(any(SUMCOUNTS, 2))
COUNTS <- COUNTS[, , notEmpty]
SUMCOUNTS <- SUMCOUNTS[notEmpty, ]
LOGDIFF <- LOGDIFF[, notEmpty]
for (n in 1:length(notEmpty)) {
apu <- find(PARTITION == notEmpty(n))
PARTITION[apu] <- n
}
npops <- length(notEmpty)
return(npops)
}

7
R/rand_disc.R Normal file
View file

@ -0,0 +1,7 @@
rand_disc <- function(CDF) {
# %returns an index of a value from a discrete distribution using inversion method
slump <- rand
har <- find(CDF > slump)
svar <- har(1)
return(svar)
}

26
R/returnInOrder.R Normal file
View file

@ -0,0 +1,26 @@
returnInOrder <- function(inds, pop, globalRows, data, adjprior, priorTerm) {
# % Palauttaa yksil<69>t j<>rjestyksess<73> siten, ett<74> ensimm<6D>isen<65> on
# % se, jonka poistaminen populaatiosta pop nostaisi logml:n
# % arvoa eniten.
ninds <- length(inds)
apuTaulu <- c(inds, zeros(ninds, 1))
for (i in 1:ninds) {
ind <- inds[i]
rows <- globalRows[i, 1]:globalRows[i, 2]
diffInCounts <- computeDiffInCounts(
rows, size[COUNTS, 1], size[COUNTS, 2], data
)
diffInSumCounts <- sum(diffInCounts)
COUNTS[ , ,pop] <- COUNTS[ , ,pop] - diffInCounts
SUMCOUNTS[pop, ] <- SUMCOUNTS[pop, ] - diffInSumCounts
apuTaulu[i, 2] <- computePopulationLogml(pop, adjprior, priorTerm)
COUNTS[ , ,pop] <- COUNTS[ , ,pop] + diffInCounts
SUMCOUNTS[pop, ] <- SUMCOUNTS[pop, ] + diffInSumCounts
}
apuTaulu <- sortrows(apuTaulu, 2)
inds <- apuTaulu[ninds:1, 1]
return(inds)
}

View file

@ -7,23 +7,22 @@ selvitaDigitFormat <- function(line) {
# Genepop-formaatissa olevasta datasta. funktio selvitt<74><74>
# rivin muodon perusteella, ovatko datan alleelit annettu
# 2 vai 3 numeron avulla.
n <- 1
merkki <- line[n]
merkki <- substring(line, n, n)
while (merkki != ',') {
n <- n + 1
merkki <- line[n]
merkki <- substring(line, n, n)
}
while (!any(merkki == '0123456789')) {
while (!any(merkki %in% as.character(0:9))) {
n <- n + 1
merkki <- line[n]
merkki <- substring(line, n, n)
}
numeroja <- 0
while (any(merkki == '0123456789')) {
while (any(merkki %in% as.character(0:9))) {
numeroja <- numeroja + 1
n <- n + 1
merkki <- line[n]
merkki <- substring(line, n, n)
}
df <- numeroja / 2

16
R/setdiff_MATLAB.R Normal file
View file

@ -0,0 +1,16 @@
#' @title Set differences of two arrays
#' @description Loosely replicates the behavior of the homonym Matlab function
#' @param A first array
#' @param B second array
#' @param legacy if `TRUE`, preserves the behavior of the setdiff function from MATLAB R2012b and prior releases. (currently not supported)
#' @author Waldir Leoncio
setdiff_MATLAB <- function(A, B, legacy = FALSE) {
if (legacy) message("legacy=TRUE not supported. Ignoring.")
if (is(A, "numeric") & is(B, "numeric")) {
values <- sort(unique(A[is.na(match(A, B))]))
} else if (is(A, "data.frame") & is(B, "data.frame")) {
stop("Not implemented for data frames")
}
# TODO: add support for indices (if necessary)
return(values)
}

64
R/updateGlobalVariables.R Normal file
View file

@ -0,0 +1,64 @@
updateGlobalVariables <- function(ind, i2, diffInCounts, adjprior, priorTerm) {
# % Suorittaa globaalien muuttujien muutokset, kun yksil<69> ind
# % on siirret<65><74>n koriin i2.
i1 <- PARTITION[ind]
PARTITION[ind] <- i2
COUNTS[, , i1] <- COUNTS[, , i1] - diffInCounts
COUNTS[, , i2] <- COUNTS[, , i2] + diffInCounts
SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] - sum[diffInCounts]
SUMCOUNTS[i2, ] <- SUMCOUNTS[i2, ] + sum[diffInCounts]
POP_LOGML[c(i1, i2)] <- computePopulationLogml(
c(i1, i2), adjprior, priorTerm
)
LOGDIFF[, c(i1, i2)] <- -Inf
inx <- c(find(PARTITION == i1), find(PARTITION==i2))
LOGDIFF[inx, ] <- -Inf
}
updateGlobalVariables2 <- function(i1, i2, diffInCounts, adjprior, priorTerm) {
# % Suorittaa globaalien muuttujien muutokset, kun kaikki
# % korissa i1 olevat yksil<69>t siirret<65><74>n koriin i2.
inds <- find(PARTITION == i1)
PARTITION[inds] <- i2
COUNTS[, , i1] <- COUNTS[, , i1] - diffInCounts
COUNTS[, , i2] <- COUNTS[, , i2] + diffInCounts
SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] - sum[diffInCounts]
SUMCOUNTS[i2, ] <- SUMCOUNTS[i2, ] + sum[diffInCounts]
POP_LOGML[i1] <- 0
POP_LOGML[i2] <- computePopulationLogml(i2, adjprior, priorTerm)
LOGDIFF[, c(i1, i2)] <- -Inf
inx <- c(find(PARTITION == i1), find(PARTITION == i2))
LOGDIFF[inx, ] <- -Inf
}
updateGlobalVariables3 <- function(
muuttuvat, diffInCounts, adjprior, priorTerm, i2
) {
# % Suorittaa globaalien muuttujien p<>ivitykset, kun yksil<69>t 'muuttuvat'
# % siirret<65><74>n koriin i2. Ennen siirtoa yksil<69>iden on kuuluttava samaan
# % koriin.
i1 <- PARTITION[muuttuvat(1)]
PARTITION[muuttuvat] <- i2
COUNTS[, , i1] <- COUNTS[, , i1] - diffInCounts
COUNTS[, , i2] <- COUNTS[, , i2] + diffInCounts
SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] - sum[diffInCounts]
SUMCOUNTS[i2, ] <- SUMCOUNTS[i2, ] + sum[diffInCounts]
POP_LOGML[c(i1, i2)] <- computePopulationLogml(
c(i1, i2), adjprior, priorTerm
)
LOGDIFF[, c(i1, i2)] <- -Inf
inx <- c(find(PARTITION == i1), find(PARTITION == i2))
LOGDIFF[inx, ] <- -Inf
}

View file

@ -10,16 +10,10 @@
#' @param partitionSummary partitionSummary
#' @param popnames popnames
#' @param fixedK fixedK
#' @param PARTITION PARTITION
#' @param COUNTS COUNTS
#' @param SUMCOUNTS SUMCOUNTS
#' @param LOGDIFF LOGDIFF
#' @export
writeMixtureInfo <- function(
logml, rowsFromInd, data, adjprior, priorTerm, outPutFile, inputFile, partitionSummary, popnames, fixedK, PARTITION, COUNTS, SUMCOUNTS,
LOGDIFF
logml, rowsFromInd, data, adjprior, priorTerm, outPutFile, inputFile, partitionSummary, popnames, fixedK
) {
changesInLogml <- list()
ninds <- size(data, 1) / rowsFromInd
npops <- size(COUNTS, 3)
@ -30,7 +24,6 @@ writeMixtureInfo <- function(
fid <- load(outPutFile)
} else {
fid <- -1
message('Diverting output to baps4_output.baps')
# TODO: replace sink with option that will record input and output
sink('baps4_output.baps', split=TRUE) # save in text anyway.
}

View file

@ -0,0 +1,18 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/admixture_initialization.R
\name{admixture_initialization}
\alias{admixture_initialization}
\title{Seuraavat kolme funktiota liittyvat alkupartition muodostamiseen.}
\usage{
admixture_initialization(data_matrix, nclusters, Z)
}
\arguments{
\item{data_matrix}{data_matrix}
\item{nclusters}{ncluster}
\item{Z}{Z}
}
\description{
Seuraavat kolme funktiota liittyvat alkupartition muodostamiseen.
}

23
man/fgetl.Rd Normal file
View file

@ -0,0 +1,23 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/fgetl-fopen.R
\name{fgetl}
\alias{fgetl}
\title{Read line from file, removing newline characters}
\usage{
fgetl(file)
}
\arguments{
\item{file}{character vector to be read, usually an output of `fopen()`}
}
\value{
If the file is nonempty, then fgetl returns tline as a character vector. If the file is empty and contains only the end-of-file marker, then fgetl returns tline as a numeric value -1.
}
\description{
Equivalent function to its homonymous Matlab equivalent.
}
\seealso{
fopen
}
\author{
Waldir Leoncio
}

23
man/fopen.Rd Normal file
View file

@ -0,0 +1,23 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/fgetl-fopen.R
\name{fopen}
\alias{fopen}
\title{Open file}
\usage{
fopen(filename)
}
\arguments{
\item{filename}{Path and name of file to be open}
}
\value{
The same as `readLines(filename)`
}
\description{
Open a text file
}
\seealso{
fgetl
}
\author{
Waldir Leoncio
}

View file

@ -1,8 +1,8 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/laskeMuutokset4.R
% Please edit documentation in R/laskeMuutokset12345.R
\name{laskeMuutokset4}
\alias{laskeMuutokset4}
\title{Calculate changes?}
\title{Calculate changes (?)}
\usage{
laskeMuutokset4(osuus, osuusTaulu, omaFreqs, logml, COUNTS = matrix(0))
}
@ -20,6 +20,6 @@ laskeMuutokset4(osuus, osuusTaulu, omaFreqs, logml, COUNTS = matrix(0))
\description{
Palauttaa npops*npops taulun, jonka alkio (i,j) kertoo, mik?on
muutos logml:ss? mikäli populaatiosta i siirretään osuuden verran
todennäköisyysmassaa populaatioon j. Mikäli populaatiossa i ei ole mitään
todennäköisyysmassaa populaatioon j. Mikäli populaatiossa i ei ole mitään
siirrettävää, on vastaavassa kohdassa rivi nollia.
}

21
man/laskeOsaDist.Rd Normal file
View file

@ -0,0 +1,21 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/laskeOsaDist.R
\name{laskeOsaDist}
\alias{laskeOsaDist}
\title{Lower part of the dist}
\usage{
laskeOsaDist(inds2, dist, ninds)
}
\arguments{
\item{inds2}{inds2}
\item{dist}{dist}
\item{ninds}{ninds}
}
\description{
Constructs from the dist vector a subvector containing the individual inds2, Forms dist sub-vectors the vector, which includes yksiliden inds2
}
\author{
Waldir Leoncio
}

View file

@ -7,7 +7,7 @@
linkage(Y, method = "co")
}
\arguments{
\item{Y}{data}
\item{Y}{matrix}
\item{method}{either 'si', 'av', 'co' 'ce' or 'wa'}
}
@ -19,4 +19,9 @@ Z = LINKAGE(Y) creates a hierarchical cluster tree, using the single
linkage algorithm. The input Y is a distance matrix such as is generated by
PDIST. Y may also be a more general dissimilarity matrix conforming to the
output format of PDIST.
Z = linkage(X) returns a matrix Z that encodes a tree containing hierarchical clusters of the rows of the input data matrix X.
}
\note{
This is also a base Matlab function. The reason why the source code is also present here is unclear.
}

View file

@ -13,5 +13,5 @@ lueNimi(line)
nimi
}
\description{
Reads the line name
Returns the part of the line from the beginning that is before the comma. Useful for returning the name of a GenePop area
}

46
man/matlab2r.Rd Normal file
View file

@ -0,0 +1,46 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/matlab2r.R
\name{matlab2r}
\alias{matlab2r}
\title{Convert Matlab function to R}
\usage{
matlab2r(
filename,
output = "diff",
improve_formatting = TRUE,
change_assignment = TRUE,
append = FALSE
)
}
\arguments{
\item{filename}{name of the file}
\item{output}{can be "asis", "clean", "save" or "diff"}
\item{improve_formatting}{if `TRUE` (default), makes minor changes
to conform to best-practice formatting conventions}
\item{change_assignment}{if `TRUE` (default), uses `<-` as the assignment operator}
\item{append}{if `FALSE` (default), overwrites file; otherwise, append
output to input}
}
\value{
text converted to R, printed to screen or replacing input file
}
\description{
Performs basic syntax conversion from Matlab to R
}
\note{
This function is intended to expedite the process of converting a
Matlab function to R by making common replacements. It does not have the
immediate goal of outputting a ready-to-use function. In other words,
after using this function you should go back to it and make minor changes.
It is also advised to do a dry-run with `output = "clean"` and only switching
to `output = "save"` when you are confident that no important code will be
lost (for shorter functions, a careful visual inspection should suffice).
}
\author{
Waldir Leoncio
}

22
man/max_MATLAB.Rd Normal file
View file

@ -0,0 +1,22 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/min_max_MATLAB.R
\name{max_MATLAB}
\alias{max_MATLAB}
\title{Maximum (MATLAB version)}
\usage{
max_MATLAB(X, indices = TRUE)
}
\arguments{
\item{X}{matrix}
\item{indices}{return indices?}
}
\value{
Either a list or a vector
}
\description{
Finds the minimum value for each column of a matrix, potentially returning the indices instead
}
\author{
Waldir Leoncio
}

22
man/min_MATLAB.Rd Normal file
View file

@ -0,0 +1,22 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/min_max_MATLAB.R
\name{min_MATLAB}
\alias{min_MATLAB}
\title{Minimum (MATLAB version)}
\usage{
min_MATLAB(X, indices = TRUE)
}
\arguments{
\item{X}{matrix}
\item{indices}{return indices?}
}
\value{
Either a list or a vector
}
\description{
Finds the minimum value for each column of a matrix, potentially returning the indices instead
}
\author{
Waldir Leoncio
}

23
man/nargin.Rd Normal file
View file

@ -0,0 +1,23 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/nargin.R
\name{nargin}
\alias{nargin}
\title{Number of function input arguments}
\usage{
nargin()
}
\value{
An integer
}
\description{
Returns the number of arguments passed to the parent function
}
\note{
This function only makes sense inside another function
}
\references{
https://stackoverflow.com/q/64422780/1169233
}
\author{
Waldir Leoncio
}

21
man/setdiff_MATLAB.Rd Normal file
View file

@ -0,0 +1,21 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/setdiff_MATLAB.R
\name{setdiff_MATLAB}
\alias{setdiff_MATLAB}
\title{Set differences of two arrays}
\usage{
setdiff_MATLAB(A, B, legacy = FALSE)
}
\arguments{
\item{A}{first array}
\item{B}{second array}
\item{legacy}{if `TRUE`, preserves the behavior of the setdiff function from MATLAB R2012b and prior releases. (currently not supported)}
}
\description{
Loosely replicates the behavior of the homonym Matlab function
}
\author{
Waldir Leoncio
}

View file

@ -14,11 +14,7 @@ writeMixtureInfo(
inputFile,
partitionSummary,
popnames,
fixedK,
PARTITION,
COUNTS,
SUMCOUNTS,
LOGDIFF
fixedK
)
}
\arguments{
@ -41,14 +37,6 @@ writeMixtureInfo(
\item{popnames}{popnames}
\item{fixedK}{fixedK}
\item{PARTITION}{PARTITION}
\item{COUNTS}{COUNTS}
\item{SUMCOUNTS}{SUMCOUNTS}
\item{LOGDIFF}{LOGDIFF}
}
\description{
Writes information about the mixture

View file

@ -158,18 +158,18 @@ test_that("find works as expected", {
})
test_that("sortrows works as expected", {
mx <- matrix(c(3, 2, 2, 1, 1, 10, 0, pi), 4)
expect_equal(sortrows(mx), matrix(c(1, 2, 2, 3, pi, 10, 0, 1), 4))
expect_equal(sortrows(mx, 2), matrix(c(2, 3, 1, 2, 0, 1, pi, 10), 4))
expect_equal(sortrows(mx, 1:2), mx[order(mx[, 1], mx[, 2]), ])
mx <- matrix(c(3, 2, 2, 1, 1, 10, 0, pi), 4)
expect_equal(sortrows(mx), matrix(c(1, 2, 2, 3, pi, 10, 0, 1), 4))
expect_equal(sortrows(mx, 2), matrix(c(2, 3, 1, 2, 0, 1, pi, 10), 4))
expect_equal(sortrows(mx, 1:2), mx[order(mx[, 1], mx[, 2]), ])
})
test_that("cell works as expected", {
expect_equal(cell(0), array(dim = c(0, 0)))
expect_equal(cell(1), array(dim = c(1, 1)))
expect_equal(cell(2), array(dim = c(2, 2)))
expect_equal(cell(3, 4), array(dim = c(3, 4)))
expect_equal(cell(5, 7, 6), array(dim = c(5, 7, 6)))
expect_equivalent(cell(0), array(0, dim = c(0, 0)))
expect_equivalent(cell(1), array(0, dim = c(1, 1)))
expect_equivalent(cell(2), array(0, dim = c(2, 2)))
expect_equivalent(cell(3, 4), array(0, dim = c(3, 4)))
expect_equivalent(cell(5, 7, 6), array(0, dim = c(5, 7, 6)))
})
test_that("blanks works as expected", {
@ -201,4 +201,44 @@ test_that("isspace works as expected", {
X <- '\t a b\tcde f'
expect_identical(isspace(chr), c(0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0))
expect_identical(isspace(X), c(1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0))
})
test_that("nargin works correctly", {
addme <- function(a, b) {
if (nargin() == 2) {
c <- a + b
} else if (nargin() == 1) {
c <- a + a
} else {
c <- 0
}
return(c)
}
expect_equal(addme(13, 42), 55)
expect_equal(addme(13), 26)
expect_equal(addme(), 0)
})
test_that("setdiff works as expected", {
A <- c(3, 6, 2, 1, 5, 1, 1)
B <- c(2, 4, 6)
C <- c(1, 3, 5)
# expect_equal(setdiff_MATLAB(A, B), C) # TODO: export setdiff_MATLAB
A <- data.frame(
Var1 = 1:5,
Var2 = LETTERS[1:5],
Var3 = c(FALSE, TRUE, FALSE, TRUE, FALSE)
)
B <- data.frame(
Var1 = seq(1, 9, by = 2),
Var2 = LETTERS[seq(1, 9, by = 2)],
Var3 = rep(FALSE, 5)
)
C <- data.frame(
Var1 = c(2, 4),
Var2 = c('B', 'D'),
Var3 = c(TRUE, TRUE)
)
# expect_equal(setdiff_MATLAB(A, B), C) # TODO: implement for data frames
# TODO: add more examples from https://se.mathworks.com/help/matlab/ref/double.setdiff.html;jsessionid=0d8d42582d4d299b8224403899f1
})

View file

@ -1,11 +1,17 @@
# library(devtools)#TEMP
# library(testthat)#TEMP
# library(rBAPS)#TEMP
context("Opening files on greedyMix")
# greedyMix(
# tietue = "data/ExamplesDataFormatting/Example baseline data in GENEPOP format for Trained clustering.txt",
# tietue = "inst/ext/ExamplesDataFormatting/Example baseline data in GENEPOP format for Trained clustering.txt",
# format = "GenePop",
# savePreProcessed = FALSE
# )
# )
context("Linkage")
test_that("Linkages are properly calculated", {
Y <- c(0.5, 0.3, 0.6, 0.3, 0.3, 0.2, 0.3, 0.3, 0.3, 0.5)
expect_equal(
object = linkage(Y),
expected = matrix(c(2, 1, 7, 8, 4, 3, 5, 6, .2, .3, .3, .6), ncol=3)
)
})