Fixed basic parsing of FASTA files (#25)
This commit is contained in:
parent
a88f31b3a5
commit
76828387a3
8 changed files with 80 additions and 78 deletions
|
|
@ -1,6 +1,7 @@
|
||||||
# Generated by roxygen2: do not edit by hand
|
# Generated by roxygen2: do not edit by hand
|
||||||
|
|
||||||
export(greedyMix)
|
export(greedyMix)
|
||||||
|
export(handleData)
|
||||||
export(load_fasta)
|
export(load_fasta)
|
||||||
importFrom(R6,R6Class)
|
importFrom(R6,R6Class)
|
||||||
importFrom(Rsamtools,scanBam)
|
importFrom(Rsamtools,scanBam)
|
||||||
|
|
|
||||||
|
|
@ -54,7 +54,7 @@ greedyMix <- function(
|
||||||
# Generating partition summary ===============================================
|
# Generating partition summary ===============================================
|
||||||
ekat <- seq(1L, c[["rowsFromInd"]], ninds * c[["rowsFromInd"]]) # ekat = (1:rowsFromInd:ninds*rowsFromInd)';
|
ekat <- seq(1L, c[["rowsFromInd"]], ninds * c[["rowsFromInd"]]) # ekat = (1:rowsFromInd:ninds*rowsFromInd)';
|
||||||
c[["rows"]] <- c(ekat, ekat + c[["rowsFromInd"]] - 1L) # c.rows = [ekat ekat+rowsFromInd-1]
|
c[["rows"]] <- c(ekat, ekat + c[["rowsFromInd"]] - 1L) # c.rows = [ekat ekat+rowsFromInd-1]
|
||||||
logml_npops_partitionSummary <- indMixWrapper(c, npops, counts, sumcounts, max_iter, fixedK, verbose);
|
logml_npops_partitionSummary <- indMixWrapper(c, npops, counts, sumcounts, max_iter, fixedK, verbose)
|
||||||
logml <- logml_npops_partitionSummary[["logml"]]
|
logml <- logml_npops_partitionSummary[["logml"]]
|
||||||
npops <- logml_npops_partitionSummary[["npops"]]
|
npops <- logml_npops_partitionSummary[["npops"]]
|
||||||
partitionSummary <- logml_npops_partitionSummary[["partitionSummary"]]
|
partitionSummary <- logml_npops_partitionSummary[["partitionSummary"]]
|
||||||
|
|
@ -72,8 +72,8 @@ greedyMix <- function(
|
||||||
|
|
||||||
# Writing mixture info =======================================================
|
# Writing mixture info =======================================================
|
||||||
changesInLogml <- writeMixtureInfo(
|
changesInLogml <- writeMixtureInfo(
|
||||||
logml, rowsFromInd, data, adjprior, priorTerm, NULL, inp, partitionSummary,
|
logml, c[["rowsFromInd"]], c[["data"]], c[["adjprior"]], c[["priorTerm"]],
|
||||||
popnames, fixedK
|
NULL, inp, partitionSummary, popnames, fixedK
|
||||||
)
|
)
|
||||||
|
|
||||||
# Updateing results ==========================================================
|
# Updateing results ==========================================================
|
||||||
|
|
|
||||||
|
|
@ -9,6 +9,7 @@
|
||||||
#' code to the smallest code that is larger than any code in use. After this,
|
#' code to the smallest code that is larger than any code in use. After this,
|
||||||
#' the function changes the allele codes so that one locus j
|
#' the function changes the allele codes so that one locus j
|
||||||
#' codes get values between? 1, ..., noalle(j).
|
#' codes get values between? 1, ..., noalle(j).
|
||||||
|
#' @export
|
||||||
handleData <- function(raw_data, format = "Genepop") {
|
handleData <- function(raw_data, format = "Genepop") {
|
||||||
# Alkuper?isen datan viimeinen sarake kertoo, milt?yksil?lt?
|
# Alkuper?isen datan viimeinen sarake kertoo, milt?yksil?lt?
|
||||||
# kyseinen rivi on per?isin. Funktio tutkii ensin, ett?montako
|
# kyseinen rivi on per?isin. Funktio tutkii ensin, ett?montako
|
||||||
|
|
|
||||||
|
|
@ -68,7 +68,7 @@ indMix <- function(c, npops, counts = NULL, sumcounts = NULL, max_iter = 100L, d
|
||||||
nruns <- length(npopsTaulu)
|
nruns <- length(npopsTaulu)
|
||||||
|
|
||||||
initData <- data
|
initData <- data
|
||||||
data <- data[, 1:(ncol(data) - 1)]
|
data <- data[, seq_along(noalle)] # Original code always dropped last column.
|
||||||
|
|
||||||
logmlBest <- -1e50
|
logmlBest <- -1e50
|
||||||
partitionSummary <- -1e50 * ones(30, 2) # Tiedot 30 parhaasta partitiosta (npops ja logml)
|
partitionSummary <- -1e50 * ones(30, 2) # Tiedot 30 parhaasta partitiosta (npops ja logml)
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
laskeLoggis <- function(counts, sumcounts, adjprior) {
|
laskeLoggis <- function(counts, sumcounts, adjprior) {
|
||||||
npops <- size(counts, 3)
|
npops <- size(counts, 3)
|
||||||
|
replicated_adjprior <- array(adjprior, c(nrow(adjprior), ncol(adjprior), npops))
|
||||||
sum1 <- sum(sum(sum(lgamma(counts + repmat(adjprior, c(1, 1, npops))))))
|
sum1 <- sum(sum(sum(lgamma(counts + replicated_adjprior))))
|
||||||
sum3 <- sum(sum(lgamma(adjprior))) - sum(sum(lgamma(1 + sumcounts)))
|
sum3 <- sum(sum(lgamma(adjprior))) - sum(sum(lgamma(1 + sumcounts)))
|
||||||
logml2 <- sum1 - npops * sum3
|
logml2 <- sum1 - npops * sum3
|
||||||
loggis <- logml2
|
loggis <- logml2
|
||||||
|
|
|
||||||
|
|
@ -31,12 +31,12 @@ writeMixtureInfo <- function(
|
||||||
}
|
}
|
||||||
|
|
||||||
dispLine()
|
dispLine()
|
||||||
cat("RESULTS OF INDIVIDUAL LEVEL MIXTURE ANALYSIS:")
|
cat("RESULTS OF INDIVIDUAL LEVEL MIXTURE ANALYSIS:\n")
|
||||||
cat(c("Data file: ", inputFile))
|
cat("Data file: ", inputFile, "\n")
|
||||||
cat("Model: independent")
|
cat("Model: independent\n")
|
||||||
cat(c("Number of clustered individuals: ", ownNum2Str(ninds)))
|
cat("Number of clustered individuals: ", ownNum2Str(ninds), "\n")
|
||||||
cat(c("Number of groups in optimal partition: ", ownNum2Str(npops)))
|
cat("Number of groups in optimal partition: ", ownNum2Str(npops), "\n")
|
||||||
cat(c("Log(marginal likelihood) of optimal partition: ", ownNum2Str(logml)))
|
cat("Log(marginal likelihood) of optimal partition: ", ownNum2Str(logml), "\n")
|
||||||
cat(" ")
|
cat(" ")
|
||||||
if (fid != -1) {
|
if (fid != -1) {
|
||||||
append(fid, "RESULTS OF INDIVIDUAL LEVEL MIXTURE ANALYSIS:\n")
|
append(fid, "RESULTS OF INDIVIDUAL LEVEL MIXTURE ANALYSIS:\n")
|
||||||
|
|
@ -87,10 +87,10 @@ writeMixtureInfo <- function(
|
||||||
"Cluster ", as.character(m), ": {", as.character(indsInM[1])
|
"Cluster ", as.character(m), ": {", as.character(indsInM[1])
|
||||||
)
|
)
|
||||||
for (k in 2:cluster_size) {
|
for (k in 2:cluster_size) {
|
||||||
text <- c(text, ", ", as.character(indsInM[k]))
|
text <- c(text, ",", as.character(indsInM[k]))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
text <- c(text, "}")
|
text <- c(text, "}\n")
|
||||||
while (length(text) > 58) {
|
while (length(text) > 58) {
|
||||||
# Take one line and display it.
|
# Take one line and display it.
|
||||||
new_line <- takeLine(text, 58)
|
new_line <- takeLine(text, 58)
|
||||||
|
|
@ -106,7 +106,7 @@ writeMixtureInfo <- function(
|
||||||
text <- ""
|
text <- ""
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (text != "") {
|
if (any(text != "")) {
|
||||||
cat(text)
|
cat(text)
|
||||||
if (fid != -1) {
|
if (fid != -1) {
|
||||||
append(fid, text)
|
append(fid, text)
|
||||||
|
|
@ -116,11 +116,11 @@ writeMixtureInfo <- function(
|
||||||
}
|
}
|
||||||
|
|
||||||
if (npops > 1) {
|
if (npops > 1) {
|
||||||
cat(" ")
|
cat("\n")
|
||||||
cat(" ")
|
cat("\n")
|
||||||
cat(
|
cat(
|
||||||
"Changes in log(marginal likelihood)",
|
"Changes in log(marginal likelihood)",
|
||||||
" if indvidual i is moved to group j:"
|
" if indvidual i is moved to group j:\n"
|
||||||
)
|
)
|
||||||
if (fid != -1) {
|
if (fid != -1) {
|
||||||
append(fid, " ")
|
append(fid, " ")
|
||||||
|
|
@ -131,7 +131,7 @@ writeMixtureInfo <- function(
|
||||||
fid,
|
fid,
|
||||||
c(
|
c(
|
||||||
"Changes in log(marginal likelihood)",
|
"Changes in log(marginal likelihood)",
|
||||||
"if indvidual i is moved to group j:"
|
"if indvidual i is moved to group j:\n"
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
append(fid, "\n")
|
append(fid, "\n")
|
||||||
|
|
@ -167,9 +167,9 @@ writeMixtureInfo <- function(
|
||||||
|
|
||||||
if (names) {
|
if (names) {
|
||||||
nimi <- as.character(popnames[ind])
|
nimi <- as.character(popnames[ind])
|
||||||
rivi <- c(blanks(maxSize - length(nimi)), nimi, ":")
|
rivi <- c(blanks(maxSize - length(nimi)), nimi, ":\n")
|
||||||
} else {
|
} else {
|
||||||
rivi <- c(blanks(4 - floor(log10(ind))), ownNum2Str(ind), ":")
|
rivi <- c("\n", blanks(4 - floor(log10(ind))), ownNum2Str(ind), ":\n")
|
||||||
}
|
}
|
||||||
for (j in 1:npops) {
|
for (j in 1:npops) {
|
||||||
rivi <- c(rivi, " ", logml2String(omaRound(muutokset[j])))
|
rivi <- c(rivi, " ", logml2String(omaRound(muutokset[j])))
|
||||||
|
|
@ -181,9 +181,9 @@ writeMixtureInfo <- function(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
cat(" ")
|
cat("\n")
|
||||||
cat(" ")
|
cat("\n")
|
||||||
cat("KL-divergence matrix in PHYLIP format:")
|
cat("KL-divergence matrix in PHYLIP format:\n")
|
||||||
|
|
||||||
dist_mat <- zeros(npops, npops)
|
dist_mat <- zeros(npops, npops)
|
||||||
if (fid != -1) {
|
if (fid != -1) {
|
||||||
|
|
@ -193,6 +193,7 @@ writeMixtureInfo <- function(
|
||||||
append(fid, "\n")
|
append(fid, "\n")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
COUNTS <- COUNTS[seq_len(nrow(adjprior)), seq_len(ncol(adjprior)), , drop = FALSE]
|
||||||
maxnoalle <- size(COUNTS, 1)
|
maxnoalle <- size(COUNTS, 1)
|
||||||
nloci <- size(COUNTS, 2)
|
nloci <- size(COUNTS, 2)
|
||||||
d <- zeros(maxnoalle, nloci, npops)
|
d <- zeros(maxnoalle, nloci, npops)
|
||||||
|
|
@ -204,8 +205,8 @@ writeMixtureInfo <- function(
|
||||||
|
|
||||||
prior[1, nollia] <- 1
|
prior[1, nollia] <- 1
|
||||||
for (pop1 in 1:npops) {
|
for (pop1 in 1:npops) {
|
||||||
d[, , pop1] <- (squeeze(COUNTS[, , pop1]) + prior) /
|
squeezed_COUNTS_prior <- squeeze(COUNTS[, , pop1]) + prior
|
||||||
repmat(sum(squeeze(COUNTS[, , pop1]) + prior), c(maxnoalle, 1))
|
d[, , pop1] <- squeezed_COUNTS_prior / sum(squeezed_COUNTS_prior)
|
||||||
}
|
}
|
||||||
ekarivi <- as.character(npops)
|
ekarivi <- as.character(npops)
|
||||||
cat(ekarivi)
|
cat(ekarivi)
|
||||||
|
|
@ -215,14 +216,14 @@ writeMixtureInfo <- function(
|
||||||
}
|
}
|
||||||
|
|
||||||
for (pop1 in 1:npops) {
|
for (pop1 in 1:npops) {
|
||||||
for (pop2 in 1:(pop1 - 1)) {
|
for (pop2 in seq_len(pop1 - 1)) {
|
||||||
dist1 <- d[, , pop1]
|
dist1 <- d[, , pop1]
|
||||||
dist2 <- d[, , pop2]
|
dist2 <- d[, , pop2]
|
||||||
div12 <- sum(
|
div12 <- sum(
|
||||||
sum(dist1 * log2((dist1 + 10^-10) / (dist2 + 10^-10)))
|
sum(dist1 * base::log2((dist1 + 10^-10) / (dist2 + 10^-10)))
|
||||||
) / nloci
|
) / nloci
|
||||||
div21 <- sum(
|
div21 <- sum(
|
||||||
sum(dist2 * log2((dist2 + 10^-10) / (dist1 + 10^-10)))
|
sum(dist2 * base::log2((dist2 + 10^-10) / (dist1 + 10^-10)))
|
||||||
) / nloci
|
) / nloci
|
||||||
div <- (div12 + div21) / 2
|
div <- (div12 + div21) / 2
|
||||||
dist_mat[pop1, pop2] <- div
|
dist_mat[pop1, pop2] <- div
|
||||||
|
|
@ -232,9 +233,9 @@ writeMixtureInfo <- function(
|
||||||
|
|
||||||
dist_mat <- dist_mat + t(dist_mat) # make it symmetric
|
dist_mat <- dist_mat + t(dist_mat) # make it symmetric
|
||||||
for (pop1 in 1:npops) {
|
for (pop1 in 1:npops) {
|
||||||
rivi <- c("Cluster_", as.character(pop1), " ")
|
rivi <- c("\nCluster_", as.character(pop1), "\n")
|
||||||
for (pop2 in 1:npops) {
|
for (pop2 in 1:npops) {
|
||||||
rivi <- c(rivi, kldiv2str(dist_mat[pop1, pop2]), " ")
|
rivi <- c(rivi, kldiv2str(dist_mat[pop1, pop2]))
|
||||||
}
|
}
|
||||||
cat(rivi)
|
cat(rivi)
|
||||||
if (fid != -1) {
|
if (fid != -1) {
|
||||||
|
|
@ -244,11 +245,11 @@ writeMixtureInfo <- function(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
cat(" ")
|
cat("\n")
|
||||||
cat(" ")
|
cat("\n")
|
||||||
cat(
|
cat(
|
||||||
"List of sizes of 10 best visited partitions",
|
"List of sizes of 10 best visited partitions",
|
||||||
"and corresponding log(ml) values"
|
"and corresponding log(ml) values\n"
|
||||||
)
|
)
|
||||||
|
|
||||||
if (fid != -1) {
|
if (fid != -1) {
|
||||||
|
|
@ -278,7 +279,7 @@ writeMixtureInfo <- function(
|
||||||
line <- c(
|
line <- c(
|
||||||
as.character(partitionSummary[part, 1]),
|
as.character(partitionSummary[part, 1]),
|
||||||
" ",
|
" ",
|
||||||
as.character(partitionSummary(part, 2))
|
as.character(partitionSummary[part, 2])
|
||||||
)
|
)
|
||||||
cat(line)
|
cat(line)
|
||||||
if (fid != -1) {
|
if (fid != -1) {
|
||||||
|
|
@ -288,9 +289,9 @@ writeMixtureInfo <- function(
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!fixedK) {
|
if (!fixedK) {
|
||||||
cat(" ")
|
cat("\n")
|
||||||
cat(" ")
|
cat("\n")
|
||||||
cat("Probabilities for number of clusters")
|
cat("Probabilities for number of clusters\n")
|
||||||
|
|
||||||
if (fid != -1) {
|
if (fid != -1) {
|
||||||
append(fid, " ")
|
append(fid, " ")
|
||||||
|
|
@ -322,7 +323,7 @@ writeMixtureInfo <- function(
|
||||||
line <- c(
|
line <- c(
|
||||||
as.character(npopsTaulu[i]), " ", as.character(probs[i])
|
as.character(npopsTaulu[i]), " ", as.character(probs[i])
|
||||||
)
|
)
|
||||||
cat(line)
|
cat(line, "\n")
|
||||||
if (fid != -1) {
|
if (fid != -1) {
|
||||||
append(fid, line)
|
append(fid, line)
|
||||||
append(fid, "\n")
|
append(fid, "\n")
|
||||||
|
|
|
||||||
|
|
@ -8,9 +8,8 @@ greedyMix(
|
||||||
data,
|
data,
|
||||||
format,
|
format,
|
||||||
partitionCompare = NULL,
|
partitionCompare = NULL,
|
||||||
ninds = NULL,
|
ninds = 1L,
|
||||||
npops = 1L,
|
npops = 1L,
|
||||||
priorTerm = NULL,
|
|
||||||
counts = NULL,
|
counts = NULL,
|
||||||
sumcounts = NULL,
|
sumcounts = NULL,
|
||||||
max_iter = 100L,
|
max_iter = 100L,
|
||||||
|
|
@ -32,8 +31,6 @@ greedyMix(
|
||||||
|
|
||||||
\item{npops}{number of populations}
|
\item{npops}{number of populations}
|
||||||
|
|
||||||
\item{priorTerm}{prior terms}
|
|
||||||
|
|
||||||
\item{counts}{counts}
|
\item{counts}{counts}
|
||||||
|
|
||||||
\item{sumcounts}{sumcounts}
|
\item{sumcounts}{sumcounts}
|
||||||
|
|
@ -55,6 +52,8 @@ greedyMix(
|
||||||
\item{noalle}{number of alleles}
|
\item{noalle}{number of alleles}
|
||||||
|
|
||||||
\item{adjprior}{ajuster prior probabilities}
|
\item{adjprior}{ajuster prior probabilities}
|
||||||
|
|
||||||
|
\item{priorTerm}{prior terms}
|
||||||
}
|
}
|
||||||
\description{
|
\description{
|
||||||
Clustering of individuals
|
Clustering of individuals
|
||||||
|
|
|
||||||
|
|
@ -87,7 +87,7 @@ for run = 1:nruns
|
||||||
apu = rows(i);
|
apu = rows(i);
|
||||||
PARTITION(i) = initialPartition(apu(1));
|
PARTITION(i) = initialPartition(apu(1));
|
||||||
end
|
end
|
||||||
|
|
||||||
COUNTS = counts; SUMCOUNTS = sumcounts;
|
COUNTS = counts; SUMCOUNTS = sumcounts;
|
||||||
POP_LOGML = computePopulationLogml(1:npops, adjprior, priorTerm);
|
POP_LOGML = computePopulationLogml(1:npops, adjprior, priorTerm);
|
||||||
LOGDIFF = repmat(-Inf,ninds,npops);
|
LOGDIFF = repmat(-Inf,ninds,npops);
|
||||||
|
|
@ -98,7 +98,7 @@ for run = 1:nruns
|
||||||
kokeiltu = zeros(nRoundTypes, 1);
|
kokeiltu = zeros(nRoundTypes, 1);
|
||||||
roundTypes = [1 1]; %Ykkösvaiheen sykli kahteen kertaan.
|
roundTypes = [1 1]; %Ykkösvaiheen sykli kahteen kertaan.
|
||||||
ready = 0; vaihe = 1;
|
ready = 0; vaihe = 1;
|
||||||
|
|
||||||
if dispText
|
if dispText
|
||||||
disp(' ');
|
disp(' ');
|
||||||
disp(['Mixture analysis started with initial ' num2str(npops) ' populations.']);
|
disp(['Mixture analysis started with initial ' num2str(npops) ' populations.']);
|
||||||
|
|
@ -106,11 +106,11 @@ for run = 1:nruns
|
||||||
|
|
||||||
while ready ~= 1
|
while ready ~= 1
|
||||||
muutoksia = 0;
|
muutoksia = 0;
|
||||||
|
|
||||||
if dispText
|
if dispText
|
||||||
disp(['Performing steps: ' num2str(roundTypes)]);
|
disp(['Performing steps: ' num2str(roundTypes)]);
|
||||||
end
|
end
|
||||||
|
|
||||||
for n = 1:length(roundTypes)
|
for n = 1:length(roundTypes)
|
||||||
|
|
||||||
round = roundTypes(n);
|
round = roundTypes(n);
|
||||||
|
|
@ -465,7 +465,7 @@ for run = 1:nruns
|
||||||
|
|
||||||
npops = poistaTyhjatPopulaatiot(npops);
|
npops = poistaTyhjatPopulaatiot(npops);
|
||||||
POP_LOGML = computePopulationLogml(1:npops, adjprior, priorTerm);
|
POP_LOGML = computePopulationLogml(1:npops, adjprior, priorTerm);
|
||||||
if dispText
|
if dispText
|
||||||
disp(['Found partition with ' num2str(npops) ' populations.']);
|
disp(['Found partition with ' num2str(npops) ' populations.']);
|
||||||
disp(['Log(ml) = ' num2str(logml)]);
|
disp(['Log(ml) = ' num2str(logml)]);
|
||||||
disp(' ');
|
disp(' ');
|
||||||
|
|
@ -491,7 +491,7 @@ COUNTS = countsBest;
|
||||||
SUMCOUNTS = sumCountsBest;
|
SUMCOUNTS = sumCountsBest;
|
||||||
POP_LOGML = pop_logmlBest;
|
POP_LOGML = pop_logmlBest;
|
||||||
LOGDIFF = logdiffbest;
|
LOGDIFF = logdiffbest;
|
||||||
|
|
||||||
%--------------------------------------------------------------------------
|
%--------------------------------------------------------------------------
|
||||||
|
|
||||||
function clearGlobalVars
|
function clearGlobalVars
|
||||||
|
|
@ -509,9 +509,9 @@ function Z = linkage(Y, method)
|
||||||
[k, n] = size(Y);
|
[k, n] = size(Y);
|
||||||
m = (1+sqrt(1+8*n))/2;
|
m = (1+sqrt(1+8*n))/2;
|
||||||
if k ~= 1 | m ~= fix(m)
|
if k ~= 1 | m ~= fix(m)
|
||||||
error('The first input has to match the output of the PDIST function in size.');
|
error('The first input has to match the output of the PDIST function in size.');
|
||||||
end
|
end
|
||||||
if nargin == 1 % set default switch to be 'co'
|
if nargin == 1 % set default switch to be 'co'
|
||||||
method = 'co';
|
method = 'co';
|
||||||
end
|
end
|
||||||
method = lower(method(1:2)); % simplify the switch string.
|
method = lower(method(1:2)); % simplify the switch string.
|
||||||
|
|
@ -519,19 +519,19 @@ monotonic = 1;
|
||||||
Z = zeros(m-1,3); % allocate the output matrix.
|
Z = zeros(m-1,3); % allocate the output matrix.
|
||||||
N = zeros(1,2*m-1);
|
N = zeros(1,2*m-1);
|
||||||
N(1:m) = 1;
|
N(1:m) = 1;
|
||||||
n = m; % since m is changing, we need to save m in n.
|
n = m; % since m is changing, we need to save m in n.
|
||||||
R = 1:n;
|
R = 1:n;
|
||||||
for s = 1:(n-1)
|
for s = 1:(n-1)
|
||||||
X = Y;
|
X = Y;
|
||||||
[v, k] = min(X);
|
[v, k] = min(X);
|
||||||
i = floor(m+1/2-sqrt(m^2-m+1/4-2*(k-1)));
|
i = floor(m+1/2-sqrt(m^2-m+1/4-2*(k-1)));
|
||||||
j = k - (i-1)*(m-i/2)+i;
|
j = k - (i-1)*(m-i/2)+i;
|
||||||
Z(s,:) = [R(i) R(j) v]; % update one more row to the output matrix A
|
Z(s,:) = [R(i) R(j) v]; % update one more row to the output matrix A
|
||||||
I1 = 1:(i-1); I2 = (i+1):(j-1); I3 = (j+1):m; % these are temp variables.
|
I1 = 1:(i-1); I2 = (i+1):(j-1); I3 = (j+1):m; % these are temp variables.
|
||||||
U = [I1 I2 I3];
|
U = [I1 I2 I3];
|
||||||
I = [I1.*(m-(I1+1)/2)-m+i i*(m-(i+1)/2)-m+I2 i*(m-(i+1)/2)-m+I3];
|
I = [I1.*(m-(I1+1)/2)-m+i i*(m-(i+1)/2)-m+I2 i*(m-(i+1)/2)-m+I3];
|
||||||
J = [I1.*(m-(I1+1)/2)-m+j I2.*(m-(I2+1)/2)-m+j j*(m-(j+1)/2)-m+I3];
|
J = [I1.*(m-(I1+1)/2)-m+j I2.*(m-(I2+1)/2)-m+j j*(m-(j+1)/2)-m+I3];
|
||||||
|
|
||||||
switch method
|
switch method
|
||||||
case 'si' %single linkage
|
case 'si' %single linkage
|
||||||
Y(I) = min(Y(I),Y(J));
|
Y(I) = min(Y(I),Y(J));
|
||||||
|
|
@ -548,12 +548,12 @@ for s = 1:(n-1)
|
||||||
end
|
end
|
||||||
J = [J i*(m-(i+1)/2)-m+j];
|
J = [J i*(m-(i+1)/2)-m+j];
|
||||||
Y(J) = []; % no need for the cluster information about j.
|
Y(J) = []; % no need for the cluster information about j.
|
||||||
|
|
||||||
% update m, N, R
|
% update m, N, R
|
||||||
m = m-1;
|
m = m-1;
|
||||||
N(n+s) = N(R(i)) + N(R(j));
|
N(n+s) = N(R(i)) + N(R(j));
|
||||||
R(i) = n+s;
|
R(i) = n+s;
|
||||||
R(j:(n-1))=R((j+1):n);
|
R(j:(n-1))=R((j+1):n);
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -623,7 +623,7 @@ function [muutokset, diffInCounts] = ...
|
||||||
%
|
%
|
||||||
% Lisäys 25.9.2007:
|
% Lisäys 25.9.2007:
|
||||||
% Otettu käyttöön globaali muuttuja LOGDIFF, johon on tallennettu muutokset
|
% Otettu käyttöön globaali muuttuja LOGDIFF, johon on tallennettu muutokset
|
||||||
% logml:ssä siirrettäessä yksilöitä toisiin populaatioihin.
|
% logml:ssä siirrettäessä yksilöitä toisiin populaatioihin.
|
||||||
|
|
||||||
global COUNTS; global SUMCOUNTS;
|
global COUNTS; global SUMCOUNTS;
|
||||||
global PARTITION; global POP_LOGML;
|
global PARTITION; global POP_LOGML;
|
||||||
|
|
@ -647,7 +647,7 @@ COUNTS(:,:,i1) = COUNTS(:,:,i1)+diffInCounts;
|
||||||
SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)+diffInSumCounts;
|
SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)+diffInSumCounts;
|
||||||
|
|
||||||
i2 = find(muutokset==-Inf); % Etsitään populaatiot jotka muuttuneet viime kerran jälkeen.
|
i2 = find(muutokset==-Inf); % Etsitään populaatiot jotka muuttuneet viime kerran jälkeen.
|
||||||
i2 = setdiff(i2,i1);
|
i2 = setdiff(i2,i1);
|
||||||
i2_logml = POP_LOGML(i2);
|
i2_logml = POP_LOGML(i2);
|
||||||
|
|
||||||
ni2 = length(i2);
|
ni2 = length(i2);
|
||||||
|
|
@ -668,19 +668,19 @@ LOGDIFF(ind,:) = muutokset;
|
||||||
|
|
||||||
function diffInCounts = computeDiffInCounts(rows, max_noalle, nloci, data)
|
function diffInCounts = computeDiffInCounts(rows, max_noalle, nloci, data)
|
||||||
% Muodostaa max_noalle*nloci taulukon, jossa on niiden alleelien
|
% Muodostaa max_noalle*nloci taulukon, jossa on niiden alleelien
|
||||||
% lukumäärät (vastaavasti kuin COUNTS:issa), jotka ovat data:n
|
% lukumäärät (vastaavasti kuin COUNTS:issa), jotka ovat data:n
|
||||||
% riveillä rows. rows pitää olla vaakavektori.
|
% riveillä rows. rows pitää olla vaakavektori.
|
||||||
|
|
||||||
diffInCounts = zeros(max_noalle, nloci);
|
diffInCounts = zeros(max_noalle, nloci);
|
||||||
for i=rows
|
for i=rows
|
||||||
row = data(i,:);
|
row = data(i,:);
|
||||||
notEmpty = find(row>=0);
|
notEmpty = find(row>=0);
|
||||||
|
|
||||||
if length(notEmpty)>0
|
if length(notEmpty)>0
|
||||||
diffInCounts(row(notEmpty) + (notEmpty-1)*max_noalle) = ...
|
diffInCounts(row(notEmpty) + (notEmpty-1)*max_noalle) = ...
|
||||||
diffInCounts(row(notEmpty) + (notEmpty-1)*max_noalle) + 1;
|
diffInCounts(row(notEmpty) + (notEmpty-1)*max_noalle) + 1;
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
%------------------------------------------------------------------------
|
%------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
@ -693,8 +693,8 @@ function updateGlobalVariables(ind, i2, diffInCounts, ...
|
||||||
% Suorittaa globaalien muuttujien muutokset, kun yksilö ind
|
% Suorittaa globaalien muuttujien muutokset, kun yksilö ind
|
||||||
% on siirretään koriin i2.
|
% on siirretään koriin i2.
|
||||||
|
|
||||||
global PARTITION;
|
global PARTITION;
|
||||||
global COUNTS;
|
global COUNTS;
|
||||||
global SUMCOUNTS;
|
global SUMCOUNTS;
|
||||||
global POP_LOGML;
|
global POP_LOGML;
|
||||||
global LOGDIFF;
|
global LOGDIFF;
|
||||||
|
|
@ -724,7 +724,7 @@ function [muutokset, diffInCounts] = laskeMuutokset2( ...
|
||||||
i1, globalRows, data, adjprior, priorTerm);
|
i1, globalRows, data, adjprior, priorTerm);
|
||||||
% Palauttaa npops*1 taulun, jossa i:s alkio kertoo, mikä olisi
|
% Palauttaa npops*1 taulun, jossa i:s alkio kertoo, mikä olisi
|
||||||
% muutos logml:ssä, mikäli korin i1 kaikki yksilöt siirretään
|
% muutos logml:ssä, mikäli korin i1 kaikki yksilöt siirretään
|
||||||
% koriin i.
|
% koriin i.
|
||||||
|
|
||||||
global COUNTS; global SUMCOUNTS;
|
global COUNTS; global SUMCOUNTS;
|
||||||
global PARTITION; global POP_LOGML;
|
global PARTITION; global POP_LOGML;
|
||||||
|
|
@ -839,7 +839,7 @@ for pop2 = 1:npops2
|
||||||
|
|
||||||
i2 = [1:i1-1 , i1+1:npops];
|
i2 = [1:i1-1 , i1+1:npops];
|
||||||
i2_logml = POP_LOGML(i2)';
|
i2_logml = POP_LOGML(i2)';
|
||||||
|
|
||||||
COUNTS(:,:,i2) = COUNTS(:,:,i2)+repmat(diffInCounts, [1 1 npops-1]);
|
COUNTS(:,:,i2) = COUNTS(:,:,i2)+repmat(diffInCounts, [1 1 npops-1]);
|
||||||
SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)+repmat(diffInSumCounts,[npops-1 1]);
|
SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)+repmat(diffInSumCounts,[npops-1 1]);
|
||||||
new_i2_logml = computePopulationLogml(i2, adjprior, priorTerm)';
|
new_i2_logml = computePopulationLogml(i2, adjprior, priorTerm)';
|
||||||
|
|
@ -848,7 +848,7 @@ for pop2 = 1:npops2
|
||||||
|
|
||||||
muutokset(pop2,i2) = new_i1_logml - i1_logml ...
|
muutokset(pop2,i2) = new_i1_logml - i1_logml ...
|
||||||
+ new_i2_logml - i2_logml;
|
+ new_i2_logml - i2_logml;
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
%------------------------------------------------------------------------------------
|
%------------------------------------------------------------------------------------
|
||||||
|
|
@ -858,7 +858,7 @@ function muutokset = laskeMuutokset5(inds, globalRows, data, adjprior, ...
|
||||||
|
|
||||||
% Palauttaa length(inds)*1 taulun, jossa i:s alkio kertoo, mikä olisi
|
% Palauttaa length(inds)*1 taulun, jossa i:s alkio kertoo, mikä olisi
|
||||||
% muutos logml:ssä, mikäli yksilö i vaihtaisi koria i1:n ja i2:n välillä.
|
% muutos logml:ssä, mikäli yksilö i vaihtaisi koria i1:n ja i2:n välillä.
|
||||||
|
|
||||||
global COUNTS; global SUMCOUNTS;
|
global COUNTS; global SUMCOUNTS;
|
||||||
global PARTITION; global POP_LOGML;
|
global PARTITION; global POP_LOGML;
|
||||||
|
|
||||||
|
|
@ -885,14 +885,14 @@ for i = 1:ninds
|
||||||
SUMCOUNTS(pop1,:) = SUMCOUNTS(pop1,:)-diffInSumCounts;
|
SUMCOUNTS(pop1,:) = SUMCOUNTS(pop1,:)-diffInSumCounts;
|
||||||
COUNTS(:,:,pop2) = COUNTS(:,:,pop2)+diffInCounts;
|
COUNTS(:,:,pop2) = COUNTS(:,:,pop2)+diffInCounts;
|
||||||
SUMCOUNTS(pop2,:) = SUMCOUNTS(pop2,:)+diffInSumCounts;
|
SUMCOUNTS(pop2,:) = SUMCOUNTS(pop2,:)+diffInSumCounts;
|
||||||
|
|
||||||
new_logmls = computePopulationLogml([i1 i2], adjprior, priorTerm);
|
new_logmls = computePopulationLogml([i1 i2], adjprior, priorTerm);
|
||||||
muutokset(i) = sum(new_logmls);
|
muutokset(i) = sum(new_logmls);
|
||||||
|
|
||||||
COUNTS(:,:,pop1) = COUNTS(:,:,pop1)+diffInCounts;
|
COUNTS(:,:,pop1) = COUNTS(:,:,pop1)+diffInCounts;
|
||||||
SUMCOUNTS(pop1,:) = SUMCOUNTS(pop1,:)+diffInSumCounts;
|
SUMCOUNTS(pop1,:) = SUMCOUNTS(pop1,:)+diffInSumCounts;
|
||||||
COUNTS(:,:,pop2) = COUNTS(:,:,pop2)-diffInCounts;
|
COUNTS(:,:,pop2) = COUNTS(:,:,pop2)-diffInCounts;
|
||||||
SUMCOUNTS(pop2,:) = SUMCOUNTS(pop2,:)-diffInSumCounts;
|
SUMCOUNTS(pop2,:) = SUMCOUNTS(pop2,:)-diffInSumCounts;
|
||||||
end
|
end
|
||||||
|
|
||||||
muutokset = muutokset - i1_logml - i2_logml;
|
muutokset = muutokset - i1_logml - i2_logml;
|
||||||
|
|
@ -952,7 +952,7 @@ dist2 = dist(apu);
|
||||||
|
|
||||||
|
|
||||||
function npops = poistaTyhjatPopulaatiot(npops)
|
function npops = poistaTyhjatPopulaatiot(npops)
|
||||||
% Poistaa tyhjentyneet populaatiot COUNTS:ista ja
|
% Poistaa tyhjentyneet populaatiot COUNTS:ista ja
|
||||||
% SUMCOUNTS:ista. Päivittää npops:in ja PARTITION:in.
|
% SUMCOUNTS:ista. Päivittää npops:in ja PARTITION:in.
|
||||||
|
|
||||||
global COUNTS;
|
global COUNTS;
|
||||||
|
|
@ -1006,7 +1006,7 @@ if abs(logml)<10000
|
||||||
end
|
end
|
||||||
if logml<0
|
if logml<0
|
||||||
mjono(pointer-1) = '-';
|
mjono(pointer-1) = '-';
|
||||||
end
|
end
|
||||||
else
|
else
|
||||||
suurinYks = 4;
|
suurinYks = 4;
|
||||||
while abs(logml)/(10^(suurinYks+1)) >= 1
|
while abs(logml)/(10^(suurinYks+1)) >= 1
|
||||||
|
|
@ -1035,8 +1035,8 @@ end
|
||||||
|
|
||||||
function digit = palautaYks(num,yks)
|
function digit = palautaYks(num,yks)
|
||||||
% palauttaa luvun num 10^yks termin kertoimen
|
% palauttaa luvun num 10^yks termin kertoimen
|
||||||
% string:inä
|
% string:inä
|
||||||
% yks täytyy olla kokonaisluku, joka on
|
% yks täytyy olla kokonaisluku, joka on
|
||||||
% vähintään -1:n suuruinen. Pienemmillä
|
% vähintään -1:n suuruinen. Pienemmillä
|
||||||
% luvuilla tapahtuu jokin pyöristysvirhe.
|
% luvuilla tapahtuu jokin pyöristysvirhe.
|
||||||
|
|
||||||
|
|
@ -1063,7 +1063,7 @@ if abs(div)<100
|
||||||
if arvo>0
|
if arvo>0
|
||||||
mjono(1) = num2str(arvo);
|
mjono(1) = num2str(arvo);
|
||||||
end
|
end
|
||||||
|
|
||||||
else
|
else
|
||||||
suurinYks = floor(log10(div));
|
suurinYks = floor(log10(div));
|
||||||
mjono(6) = num2str(suurinYks);
|
mjono(6) = num2str(suurinYks);
|
||||||
|
|
@ -1125,7 +1125,7 @@ T = zeros(m,1);
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
function T = clusternum(X, T, k, c)
|
function T = clusternum(X, T, k, c)
|
||||||
m = size(X,1)+1;
|
m = size(X,1)+1;
|
||||||
while(~isempty(k))
|
while(~isempty(k))
|
||||||
|
|
@ -1136,7 +1136,7 @@ while(~isempty(k))
|
||||||
% Assign this node number to leaf children
|
% Assign this node number to leaf children
|
||||||
t = (children<=m);
|
t = (children<=m);
|
||||||
T(children(t)) = c;
|
T(children(t)) = c;
|
||||||
|
|
||||||
% Move to next level
|
% Move to next level
|
||||||
k = children(~t) - m;
|
k = children(~t) - m;
|
||||||
end
|
end
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue