diff --git a/NAMESPACE b/NAMESPACE index f4a2451..11639f0 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,6 +1,7 @@ # Generated by roxygen2: do not edit by hand export(greedyMix) +export(handleData) export(load_fasta) importFrom(R6,R6Class) importFrom(Rsamtools,scanBam) diff --git a/R/greedyMix.R b/R/greedyMix.R index 8923eab..a5b31e6 100644 --- a/R/greedyMix.R +++ b/R/greedyMix.R @@ -54,7 +54,7 @@ greedyMix <- function( # Generating partition summary =============================================== ekat <- seq(1L, c[["rowsFromInd"]], ninds * c[["rowsFromInd"]]) # ekat = (1:rowsFromInd:ninds*rowsFromInd)'; c[["rows"]] <- c(ekat, ekat + c[["rowsFromInd"]] - 1L) # c.rows = [ekat ekat+rowsFromInd-1] - logml_npops_partitionSummary <- indMixWrapper(c, npops, counts, sumcounts, max_iter, fixedK, verbose); + logml_npops_partitionSummary <- indMixWrapper(c, npops, counts, sumcounts, max_iter, fixedK, verbose) logml <- logml_npops_partitionSummary[["logml"]] npops <- logml_npops_partitionSummary[["npops"]] partitionSummary <- logml_npops_partitionSummary[["partitionSummary"]] @@ -72,8 +72,8 @@ greedyMix <- function( # Writing mixture info ======================================================= changesInLogml <- writeMixtureInfo( - logml, rowsFromInd, data, adjprior, priorTerm, NULL, inp, partitionSummary, - popnames, fixedK + logml, c[["rowsFromInd"]], c[["data"]], c[["adjprior"]], c[["priorTerm"]], + NULL, inp, partitionSummary, popnames, fixedK ) # Updateing results ========================================================== diff --git a/R/handleData.R b/R/handleData.R index 86d40a3..97a8d32 100644 --- a/R/handleData.R +++ b/R/handleData.R @@ -9,6 +9,7 @@ #' code to the smallest code that is larger than any code in use. After this, #' the function changes the allele codes so that one locus j #' codes get values between? 1, ..., noalle(j). +#' @export handleData <- function(raw_data, format = "Genepop") { # Alkuper?isen datan viimeinen sarake kertoo, milt?yksil?lt? # kyseinen rivi on per?isin. Funktio tutkii ensin, ett?montako diff --git a/R/indMix.R b/R/indMix.R index 8008f6a..6d2a1ad 100644 --- a/R/indMix.R +++ b/R/indMix.R @@ -68,7 +68,7 @@ indMix <- function(c, npops, counts = NULL, sumcounts = NULL, max_iter = 100L, d nruns <- length(npopsTaulu) initData <- data - data <- data[, 1:(ncol(data) - 1)] + data <- data[, seq_along(noalle)] # Original code always dropped last column. logmlBest <- -1e50 partitionSummary <- -1e50 * ones(30, 2) # Tiedot 30 parhaasta partitiosta (npops ja logml) diff --git a/R/laskeLoggis.R b/R/laskeLoggis.R index aa147eb..bb6ced9 100644 --- a/R/laskeLoggis.R +++ b/R/laskeLoggis.R @@ -1,7 +1,7 @@ laskeLoggis <- function(counts, sumcounts, adjprior) { npops <- size(counts, 3) - - sum1 <- sum(sum(sum(lgamma(counts + repmat(adjprior, c(1, 1, npops)))))) + replicated_adjprior <- array(adjprior, c(nrow(adjprior), ncol(adjprior), npops)) + sum1 <- sum(sum(sum(lgamma(counts + replicated_adjprior)))) sum3 <- sum(sum(lgamma(adjprior))) - sum(sum(lgamma(1 + sumcounts))) logml2 <- sum1 - npops * sum3 loggis <- logml2 diff --git a/R/writeMixtureInfo.R b/R/writeMixtureInfo.R index c0d331a..187e793 100644 --- a/R/writeMixtureInfo.R +++ b/R/writeMixtureInfo.R @@ -31,12 +31,12 @@ writeMixtureInfo <- function( } dispLine() - cat("RESULTS OF INDIVIDUAL LEVEL MIXTURE ANALYSIS:") - cat(c("Data file: ", inputFile)) - cat("Model: independent") - cat(c("Number of clustered individuals: ", ownNum2Str(ninds))) - cat(c("Number of groups in optimal partition: ", ownNum2Str(npops))) - cat(c("Log(marginal likelihood) of optimal partition: ", ownNum2Str(logml))) + cat("RESULTS OF INDIVIDUAL LEVEL MIXTURE ANALYSIS:\n") + cat("Data file: ", inputFile, "\n") + cat("Model: independent\n") + cat("Number of clustered individuals: ", ownNum2Str(ninds), "\n") + cat("Number of groups in optimal partition: ", ownNum2Str(npops), "\n") + cat("Log(marginal likelihood) of optimal partition: ", ownNum2Str(logml), "\n") cat(" ") if (fid != -1) { append(fid, "RESULTS OF INDIVIDUAL LEVEL MIXTURE ANALYSIS:\n") @@ -87,10 +87,10 @@ writeMixtureInfo <- function( "Cluster ", as.character(m), ": {", as.character(indsInM[1]) ) for (k in 2:cluster_size) { - text <- c(text, ", ", as.character(indsInM[k])) + text <- c(text, ",", as.character(indsInM[k])) } } - text <- c(text, "}") + text <- c(text, "}\n") while (length(text) > 58) { # Take one line and display it. new_line <- takeLine(text, 58) @@ -106,7 +106,7 @@ writeMixtureInfo <- function( text <- "" } } - if (text != "") { + if (any(text != "")) { cat(text) if (fid != -1) { append(fid, text) @@ -116,11 +116,11 @@ writeMixtureInfo <- function( } if (npops > 1) { - cat(" ") - cat(" ") + cat("\n") + cat("\n") cat( "Changes in log(marginal likelihood)", - " if indvidual i is moved to group j:" + " if indvidual i is moved to group j:\n" ) if (fid != -1) { append(fid, " ") @@ -131,7 +131,7 @@ writeMixtureInfo <- function( fid, c( "Changes in log(marginal likelihood)", - "if indvidual i is moved to group j:" + "if indvidual i is moved to group j:\n" ) ) append(fid, "\n") @@ -167,9 +167,9 @@ writeMixtureInfo <- function( if (names) { nimi <- as.character(popnames[ind]) - rivi <- c(blanks(maxSize - length(nimi)), nimi, ":") + rivi <- c(blanks(maxSize - length(nimi)), nimi, ":\n") } else { - rivi <- c(blanks(4 - floor(log10(ind))), ownNum2Str(ind), ":") + rivi <- c("\n", blanks(4 - floor(log10(ind))), ownNum2Str(ind), ":\n") } for (j in 1:npops) { rivi <- c(rivi, " ", logml2String(omaRound(muutokset[j]))) @@ -181,9 +181,9 @@ writeMixtureInfo <- function( } } - cat(" ") - cat(" ") - cat("KL-divergence matrix in PHYLIP format:") + cat("\n") + cat("\n") + cat("KL-divergence matrix in PHYLIP format:\n") dist_mat <- zeros(npops, npops) if (fid != -1) { @@ -193,6 +193,7 @@ writeMixtureInfo <- function( append(fid, "\n") } + COUNTS <- COUNTS[seq_len(nrow(adjprior)), seq_len(ncol(adjprior)), , drop = FALSE] maxnoalle <- size(COUNTS, 1) nloci <- size(COUNTS, 2) d <- zeros(maxnoalle, nloci, npops) @@ -204,8 +205,8 @@ writeMixtureInfo <- function( prior[1, nollia] <- 1 for (pop1 in 1:npops) { - d[, , pop1] <- (squeeze(COUNTS[, , pop1]) + prior) / - repmat(sum(squeeze(COUNTS[, , pop1]) + prior), c(maxnoalle, 1)) + squeezed_COUNTS_prior <- squeeze(COUNTS[, , pop1]) + prior + d[, , pop1] <- squeezed_COUNTS_prior / sum(squeezed_COUNTS_prior) } ekarivi <- as.character(npops) cat(ekarivi) @@ -215,14 +216,14 @@ writeMixtureInfo <- function( } for (pop1 in 1:npops) { - for (pop2 in 1:(pop1 - 1)) { + for (pop2 in seq_len(pop1 - 1)) { dist1 <- d[, , pop1] dist2 <- d[, , pop2] div12 <- sum( - sum(dist1 * log2((dist1 + 10^-10) / (dist2 + 10^-10))) + sum(dist1 * base::log2((dist1 + 10^-10) / (dist2 + 10^-10))) ) / nloci div21 <- sum( - sum(dist2 * log2((dist2 + 10^-10) / (dist1 + 10^-10))) + sum(dist2 * base::log2((dist2 + 10^-10) / (dist1 + 10^-10))) ) / nloci div <- (div12 + div21) / 2 dist_mat[pop1, pop2] <- div @@ -232,9 +233,9 @@ writeMixtureInfo <- function( dist_mat <- dist_mat + t(dist_mat) # make it symmetric for (pop1 in 1:npops) { - rivi <- c("Cluster_", as.character(pop1), " ") + rivi <- c("\nCluster_", as.character(pop1), "\n") for (pop2 in 1:npops) { - rivi <- c(rivi, kldiv2str(dist_mat[pop1, pop2]), " ") + rivi <- c(rivi, kldiv2str(dist_mat[pop1, pop2])) } cat(rivi) if (fid != -1) { @@ -244,11 +245,11 @@ writeMixtureInfo <- function( } } - cat(" ") - cat(" ") + cat("\n") + cat("\n") cat( "List of sizes of 10 best visited partitions", - "and corresponding log(ml) values" + "and corresponding log(ml) values\n" ) if (fid != -1) { @@ -278,7 +279,7 @@ writeMixtureInfo <- function( line <- c( as.character(partitionSummary[part, 1]), " ", - as.character(partitionSummary(part, 2)) + as.character(partitionSummary[part, 2]) ) cat(line) if (fid != -1) { @@ -288,9 +289,9 @@ writeMixtureInfo <- function( } if (!fixedK) { - cat(" ") - cat(" ") - cat("Probabilities for number of clusters") + cat("\n") + cat("\n") + cat("Probabilities for number of clusters\n") if (fid != -1) { append(fid, " ") @@ -322,7 +323,7 @@ writeMixtureInfo <- function( line <- c( as.character(npopsTaulu[i]), " ", as.character(probs[i]) ) - cat(line) + cat(line, "\n") if (fid != -1) { append(fid, line) append(fid, "\n") diff --git a/man/greedyMix.Rd b/man/greedyMix.Rd index dc26245..7f5b799 100644 --- a/man/greedyMix.Rd +++ b/man/greedyMix.Rd @@ -8,9 +8,8 @@ greedyMix( data, format, partitionCompare = NULL, - ninds = NULL, + ninds = 1L, npops = 1L, - priorTerm = NULL, counts = NULL, sumcounts = NULL, max_iter = 100L, @@ -32,8 +31,6 @@ greedyMix( \item{npops}{number of populations} -\item{priorTerm}{prior terms} - \item{counts}{counts} \item{sumcounts}{sumcounts} @@ -55,6 +52,8 @@ greedyMix( \item{noalle}{number of alleles} \item{adjprior}{ajuster prior probabilities} + +\item{priorTerm}{prior terms} } \description{ Clustering of individuals diff --git a/matlab/independent/indMix.m b/matlab/independent/indMix.m index 2296a45..98ea2be 100644 --- a/matlab/independent/indMix.m +++ b/matlab/independent/indMix.m @@ -87,7 +87,7 @@ for run = 1:nruns apu = rows(i); PARTITION(i) = initialPartition(apu(1)); end - + COUNTS = counts; SUMCOUNTS = sumcounts; POP_LOGML = computePopulationLogml(1:npops, adjprior, priorTerm); LOGDIFF = repmat(-Inf,ninds,npops); @@ -98,7 +98,7 @@ for run = 1:nruns kokeiltu = zeros(nRoundTypes, 1); roundTypes = [1 1]; %Ykkösvaiheen sykli kahteen kertaan. ready = 0; vaihe = 1; - + if dispText disp(' '); disp(['Mixture analysis started with initial ' num2str(npops) ' populations.']); @@ -106,11 +106,11 @@ for run = 1:nruns while ready ~= 1 muutoksia = 0; - + if dispText disp(['Performing steps: ' num2str(roundTypes)]); end - + for n = 1:length(roundTypes) round = roundTypes(n); @@ -465,7 +465,7 @@ for run = 1:nruns npops = poistaTyhjatPopulaatiot(npops); POP_LOGML = computePopulationLogml(1:npops, adjprior, priorTerm); - if dispText + if dispText disp(['Found partition with ' num2str(npops) ' populations.']); disp(['Log(ml) = ' num2str(logml)]); disp(' '); @@ -491,7 +491,7 @@ COUNTS = countsBest; SUMCOUNTS = sumCountsBest; POP_LOGML = pop_logmlBest; LOGDIFF = logdiffbest; - + %-------------------------------------------------------------------------- function clearGlobalVars @@ -509,9 +509,9 @@ function Z = linkage(Y, method) [k, n] = size(Y); m = (1+sqrt(1+8*n))/2; if k ~= 1 | m ~= fix(m) - error('The first input has to match the output of the PDIST function in size.'); + error('The first input has to match the output of the PDIST function in size.'); end -if nargin == 1 % set default switch to be 'co' +if nargin == 1 % set default switch to be 'co' method = 'co'; end method = lower(method(1:2)); % simplify the switch string. @@ -519,19 +519,19 @@ monotonic = 1; Z = zeros(m-1,3); % allocate the output matrix. N = zeros(1,2*m-1); N(1:m) = 1; -n = m; % since m is changing, we need to save m in n. +n = m; % since m is changing, we need to save m in n. R = 1:n; for s = 1:(n-1) X = Y; [v, k] = min(X); i = floor(m+1/2-sqrt(m^2-m+1/4-2*(k-1))); j = k - (i-1)*(m-i/2)+i; - Z(s,:) = [R(i) R(j) v]; % update one more row to the output matrix A + Z(s,:) = [R(i) R(j) v]; % update one more row to the output matrix A I1 = 1:(i-1); I2 = (i+1):(j-1); I3 = (j+1):m; % these are temp variables. U = [I1 I2 I3]; I = [I1.*(m-(I1+1)/2)-m+i i*(m-(i+1)/2)-m+I2 i*(m-(i+1)/2)-m+I3]; J = [I1.*(m-(I1+1)/2)-m+j I2.*(m-(I2+1)/2)-m+j j*(m-(j+1)/2)-m+I3]; - + switch method case 'si' %single linkage Y(I) = min(Y(I),Y(J)); @@ -548,12 +548,12 @@ for s = 1:(n-1) end J = [J i*(m-(i+1)/2)-m+j]; Y(J) = []; % no need for the cluster information about j. - + % update m, N, R - m = m-1; + m = m-1; N(n+s) = N(R(i)) + N(R(j)); R(i) = n+s; - R(j:(n-1))=R((j+1):n); + R(j:(n-1))=R((j+1):n); end @@ -623,7 +623,7 @@ function [muutokset, diffInCounts] = ... % % Lisäys 25.9.2007: % Otettu käyttöön globaali muuttuja LOGDIFF, johon on tallennettu muutokset -% logml:ssä siirrettäessä yksilöitä toisiin populaatioihin. +% logml:ssä siirrettäessä yksilöitä toisiin populaatioihin. global COUNTS; global SUMCOUNTS; global PARTITION; global POP_LOGML; @@ -647,7 +647,7 @@ COUNTS(:,:,i1) = COUNTS(:,:,i1)+diffInCounts; SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)+diffInSumCounts; i2 = find(muutokset==-Inf); % Etsitään populaatiot jotka muuttuneet viime kerran jälkeen. -i2 = setdiff(i2,i1); +i2 = setdiff(i2,i1); i2_logml = POP_LOGML(i2); ni2 = length(i2); @@ -668,19 +668,19 @@ LOGDIFF(ind,:) = muutokset; function diffInCounts = computeDiffInCounts(rows, max_noalle, nloci, data) % Muodostaa max_noalle*nloci taulukon, jossa on niiden alleelien -% lukumäärät (vastaavasti kuin COUNTS:issa), jotka ovat data:n +% lukumäärät (vastaavasti kuin COUNTS:issa), jotka ovat data:n % riveillä rows. rows pitää olla vaakavektori. diffInCounts = zeros(max_noalle, nloci); for i=rows row = data(i,:); notEmpty = find(row>=0); - + if length(notEmpty)>0 diffInCounts(row(notEmpty) + (notEmpty-1)*max_noalle) = ... diffInCounts(row(notEmpty) + (notEmpty-1)*max_noalle) + 1; end -end +end %------------------------------------------------------------------------ @@ -693,8 +693,8 @@ function updateGlobalVariables(ind, i2, diffInCounts, ... % Suorittaa globaalien muuttujien muutokset, kun yksilö ind % on siirretään koriin i2. -global PARTITION; -global COUNTS; +global PARTITION; +global COUNTS; global SUMCOUNTS; global POP_LOGML; global LOGDIFF; @@ -724,7 +724,7 @@ function [muutokset, diffInCounts] = laskeMuutokset2( ... i1, globalRows, data, adjprior, priorTerm); % Palauttaa npops*1 taulun, jossa i:s alkio kertoo, mikä olisi % muutos logml:ssä, mikäli korin i1 kaikki yksilöt siirretään -% koriin i. +% koriin i. global COUNTS; global SUMCOUNTS; global PARTITION; global POP_LOGML; @@ -839,7 +839,7 @@ for pop2 = 1:npops2 i2 = [1:i1-1 , i1+1:npops]; i2_logml = POP_LOGML(i2)'; - + COUNTS(:,:,i2) = COUNTS(:,:,i2)+repmat(diffInCounts, [1 1 npops-1]); SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)+repmat(diffInSumCounts,[npops-1 1]); new_i2_logml = computePopulationLogml(i2, adjprior, priorTerm)'; @@ -848,7 +848,7 @@ for pop2 = 1:npops2 muutokset(pop2,i2) = new_i1_logml - i1_logml ... + new_i2_logml - i2_logml; - end + end end %------------------------------------------------------------------------------------ @@ -858,7 +858,7 @@ function muutokset = laskeMuutokset5(inds, globalRows, data, adjprior, ... % Palauttaa length(inds)*1 taulun, jossa i:s alkio kertoo, mikä olisi % muutos logml:ssä, mikäli yksilö i vaihtaisi koria i1:n ja i2:n välillä. - + global COUNTS; global SUMCOUNTS; global PARTITION; global POP_LOGML; @@ -885,14 +885,14 @@ for i = 1:ninds SUMCOUNTS(pop1,:) = SUMCOUNTS(pop1,:)-diffInSumCounts; COUNTS(:,:,pop2) = COUNTS(:,:,pop2)+diffInCounts; SUMCOUNTS(pop2,:) = SUMCOUNTS(pop2,:)+diffInSumCounts; - + new_logmls = computePopulationLogml([i1 i2], adjprior, priorTerm); muutokset(i) = sum(new_logmls); - + COUNTS(:,:,pop1) = COUNTS(:,:,pop1)+diffInCounts; SUMCOUNTS(pop1,:) = SUMCOUNTS(pop1,:)+diffInSumCounts; COUNTS(:,:,pop2) = COUNTS(:,:,pop2)-diffInCounts; - SUMCOUNTS(pop2,:) = SUMCOUNTS(pop2,:)-diffInSumCounts; + SUMCOUNTS(pop2,:) = SUMCOUNTS(pop2,:)-diffInSumCounts; end muutokset = muutokset - i1_logml - i2_logml; @@ -952,7 +952,7 @@ dist2 = dist(apu); function npops = poistaTyhjatPopulaatiot(npops) -% Poistaa tyhjentyneet populaatiot COUNTS:ista ja +% Poistaa tyhjentyneet populaatiot COUNTS:ista ja % SUMCOUNTS:ista. Päivittää npops:in ja PARTITION:in. global COUNTS; @@ -1006,7 +1006,7 @@ if abs(logml)<10000 end if logml<0 mjono(pointer-1) = '-'; - end + end else suurinYks = 4; while abs(logml)/(10^(suurinYks+1)) >= 1 @@ -1035,8 +1035,8 @@ end function digit = palautaYks(num,yks) % palauttaa luvun num 10^yks termin kertoimen -% string:inä -% yks täytyy olla kokonaisluku, joka on +% string:inä +% yks täytyy olla kokonaisluku, joka on % vähintään -1:n suuruinen. Pienemmillä % luvuilla tapahtuu jokin pyöristysvirhe. @@ -1063,7 +1063,7 @@ if abs(div)<100 if arvo>0 mjono(1) = num2str(arvo); end - + else suurinYks = floor(log10(div)); mjono(6) = num2str(suurinYks); @@ -1125,7 +1125,7 @@ T = zeros(m,1); end end end - + function T = clusternum(X, T, k, c) m = size(X,1)+1; while(~isempty(k)) @@ -1136,7 +1136,7 @@ while(~isempty(k)) % Assign this node number to leaf children t = (children<=m); T(children(t)) = c; - + % Move to next level k = children(~t) - m; end