diff --git a/.Rbuildignore b/.Rbuildignore index 9238dee..740b69e 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -1,7 +1,7 @@ LICENSE TODO.md -matlab +^matlab CHANGELOG.md CITATION.cff .travis.yml -data/ExamplesDataFormatting \ No newline at end of file +inst/ext/ExamplesDataFormatting \ No newline at end of file diff --git a/DESCRIPTION b/DESCRIPTION index e7e0081..aa9e85a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: rBAPS Title: Bayesian Analysis of Population Structure -Version: 0.0.0.9000 -Date: 2020-01-14 +Version: 0.0.0.9001 +Date: 2020-11-09 Authors@R: c( person( @@ -30,14 +30,14 @@ Description: Partial R implementation of the BAPS software Corander et al. 2008b ; Tang et al. 2009 ; Cheng et al. 2011 , - available at ), provides a - computationally-efficient method for the identification of admixture events + available at ), provides a + computationally-efficient method for the identification of admixture events in genetic population history. License: GPL-3 Encoding: UTF-8 LazyData: true RoxygenNote: 7.1.1 -Suggests: +Suggests: testthat (>= 2.1.0) Imports: methods diff --git a/NAMESPACE b/NAMESPACE index b7d74df..0be8af8 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -10,6 +10,8 @@ export(computeIndLogml) export(computePersonalAllFreqs) export(computeRows) export(etsiParas) +export(fgetl) +export(fopen) export(greedyMix) export(handleData) export(initPopNames) @@ -21,6 +23,7 @@ export(linkage) export(logml2String) export(lueGenePopData) export(lueNimi) +export(matlab2r) export(noIndex) export(ownNum2Str) export(poistaLiianPienet) @@ -47,3 +50,4 @@ export(writeMixtureInfo) importFrom(methods,is) importFrom(stats,runif) importFrom(utils,read.delim) +importFrom(utils,write.table) diff --git a/R/addAlleles.R b/R/addAlleles.R index a1c87bd..6dd1c08 100644 --- a/R/addAlleles.R +++ b/R/addAlleles.R @@ -12,22 +12,25 @@ addAlleles <- function(data, ind, line, divider) { # line. Jos data on 3 digit formaatissa on divider=1000. # Jos data on 2 digit formaatissa on divider=100. - nloci <- size(data, 2) - 1 + nloci <- size(data, 2) # added 1 from original code if (size(data, 1) < (2 * ind)) { - data <- c(data, zeros(100, nloci + 1)) + data <- rbind(data, zeros(100, nloci)) # subtracted 1 from original code } k <- 1 - merkki <- line[k] + merkki <- substring(line, k, k) while (merkki != ',') { - k <- k + 1 - merkki <- line[k] + k <- k + 1 + merkki <- substring(line, k, k) } - line <- line[k + 1:length(line)] + line <- substring(line, k + 1) # clear k; clear merkki; - alleeliTaulu <- as.numeric(strsplit(line, split = " ")[[1]]) - + if (grepl(" ", line)) { + alleeliTaulu <- as.numeric(strsplit(line, split = " ")[[1]]) + } else if (grepl("\t", line)) { + alleeliTaulu <- as.numeric(strsplit(line, split = "\t")[[1]]) + } if (length(alleeliTaulu) != nloci) { stop('Incorrect data format.') @@ -35,9 +38,9 @@ addAlleles <- function(data, ind, line, divider) { for (j in seq_len(nloci)) { ekaAlleeli <- floor(alleeliTaulu[j] / divider) - if (ekaAlleeli == 0) ekaAlleeli <- -999 + if (is.na(ekaAlleeli) | ekaAlleeli == 0) ekaAlleeli <- -999 tokaAlleeli <- alleeliTaulu[j] %% divider - if (tokaAlleeli == 0) tokaAlleeli <- -999 + if (is.na(tokaAlleeli) | tokaAlleeli == 0) tokaAlleeli <- -999 data[2 * ind - 1, j] <- ekaAlleeli data[2 * ind, j] <- tokaAlleeli diff --git a/R/addToSummary.R b/R/addToSummary.R new file mode 100644 index 0000000..4affbe9 --- /dev/null +++ b/R/addToSummary.R @@ -0,0 +1,18 @@ +addToSummary <- function(logml, partitionSummary, worstIndex) { + # Tiedet��n, ett� annettu logml on isompi kuin huonoin arvo + # partitionSummary taulukossa. Jos partitionSummary:ss� ei viel� ole + # annettua logml arvoa, niin lis�t��n worstIndex:in kohtaan uusi logml ja + # nykyist� partitiota vastaava nclusters:in arvo. Muutoin ei tehd� mit��n. + + apu <- find(abs(partitionSummary[, 2] - logml) < 1e-5) + if (isempty(apu)) { + # Nyt l�ydetty partitio ei ole viel� kirjattuna summaryyn. + npops <- length(unique(PARTITION)) + partitionSummary[worstIndex, 1] <- npops + partitionSummary[worstIndex, 2] <- logml + added <- 1 + } else { + added <- 0 + } + return(list(partitionSummary = partitionSummary, added = added)) +} \ No newline at end of file diff --git a/R/admixture_initialization.R b/R/admixture_initialization.R new file mode 100644 index 0000000..c69dd63 --- /dev/null +++ b/R/admixture_initialization.R @@ -0,0 +1,20 @@ +#' @title Seuraavat kolme funktiota liittyvat alkupartition muodostamiseen. +#' @param data_matrix data_matrix +#' @param nclusters ncluster +#' @param Z Z + +admixture_initialization <- function (data_matrix, nclusters, Z) { + size_data <- size(data_matrix) + nloci <- size_data[2] - 1 + n <- max(data_matrix[, ncol(data_matrix)]) + T <- cluster_own(Z, nclusters) + initial_partition <- zeros(size_data[1], 1) + for (i in 1:n) { + kori <- T[i] + here <- find(data_matrix[, ncol(data_matrix)] == i) + for (j in 1:length(here)) { + initial_partition[here[j], 1] <- kori + } + } + return(initial_partition) +} \ No newline at end of file diff --git a/R/arvoSeuraavaTi.R b/R/arvoSeuraavaTi.R new file mode 100644 index 0000000..e276889 --- /dev/null +++ b/R/arvoSeuraavaTi.R @@ -0,0 +1,14 @@ +arvoSeuraavaTila <- function(muutokset, logml) { + # Suorittaa yksil�n seuraavan tilan arvonnan + + y <- logml + muutokset # siirron j�lkeiset logml:t + y <- y - max(y) + y <- exp(y) + summa <- sum(y) + y <- y / summa + y <- cumsum(y) + + i2 <- rand_disc(y) # uusi kori + suurin <- muutokset(i2) + return(list(suurin = suurin, i2 = i2)) +} diff --git a/R/cell.R b/R/cell.R index beac532..746634e 100644 --- a/R/cell.R +++ b/R/cell.R @@ -6,10 +6,10 @@ #' @return An array of zeroes with the dimensions passed on call cell <- function(n, sz = c(n, n), ...) { if (length(sz) == 1 & missing(...)) { - return(array(dim = c(n, sz))) + return(array(0, dim = c(n, sz))) } else if (length(sz) == 2) { - return(array(dim = sz)) + return(array(0, dim = sz)) } else { - return(array(dim = c(n, sz, ...))) + return(array(0, dim = c(n, sz, ...))) } } \ No newline at end of file diff --git a/R/clearGlobalVars.R b/R/clearGlobalVars.R index 1d77d9c..3a7746c 100644 --- a/R/clearGlobalVars.R +++ b/R/clearGlobalVars.R @@ -1,7 +1,7 @@ clearGlobalVars <- function() { - COUNTS <- SUMCOUNTS <- PARTITION <- POP_LOGML <- vector() # placeholders - COUNTS <<- vector() - SUMCOUNTS <<- vector() - PARTITION <<- vector() - POP_LOGML <<- vector() + COUNTS <- vector() + SUMCOUNTS <- vector() + PARTITION <- vector() + POP_LOGML <- vector() + LOGDIFF <- vector() } \ No newline at end of file diff --git a/R/cluster_own.R b/R/cluster_own.R new file mode 100644 index 0000000..306fbbf --- /dev/null +++ b/R/cluster_own.R @@ -0,0 +1,51 @@ +cluster_own <- function(Z, nclust) { + true <- TRUE + false <- FALSE + maxclust <- nclust + # % Start of algorithm + m <- size(Z, 1) + 1 + T <- zeros(m, 1) + # % maximum number of clusters based on inconsistency + if (m <= maxclust) { + T = t((1:m)) + } else if (maxclust == 1) { + T <- ones(m, 1) + } else { + clsnum <- 1 + for (k in (m - maxclust + 1):(m - 1)) { + i = Z(k, 1) # left tree + if (i <= m) { # original node, no leafs + T[i] = clsnum + clsnum = clsnum + 1 + } else if (i < (2 * m - maxclust + 1)) { # created before cutoff, search down the tree + T <- clusternum(Z, T, i - m, clsnum) + clsnum <- clsnum + 1 + } + i <- Z(k, 2) # right tree + if (i <= m) { # original node, no leafs + T[i] <- clsnum + clsnum <- clsnum + 1 + } else if (i < (2 * m - maxclust + 1)) { # created before cutoff, search down the tree + T <- clusternum(Z, T, i - m, clsnum) + clsnum <- clsnum + 1 + } + } + } + return(T) +} + +clusternum <- function(X, T, k, c) { + m <- size(X, 1) + 1 + while (!isempty(k)) { + # Get the children of nodes at this level + children <- X[k, 1:2] + children <- children + + # Assign this node number to leaf children + t <- (children <= m) + T[children[t]] <- c + # Move to next level + k <- children(!t) - m + } + return(T) +} \ No newline at end of file diff --git a/R/clusternum.R b/R/clusternum.R new file mode 100644 index 0000000..dbdcdbb --- /dev/null +++ b/R/clusternum.R @@ -0,0 +1,16 @@ +clusternum <- function(X, T, k, c) { + m <- size(X, 1) + 1 + while (!is.null(k)) { + # % Get the children of nodes at this level + children <- X[k, 1:2] + children <- children[, ] + + # % Assign this node number to leaf children + t <- (children <= m) + T[children(t)] <- c + + # % Move to next level + k <- children(!t) - m + } + return(T) +} diff --git a/R/computeDiffInCounts.R b/R/computeDiffInCounts.R new file mode 100644 index 0000000..af53f61 --- /dev/null +++ b/R/computeDiffInCounts.R @@ -0,0 +1,17 @@ +computeDiffInCounts <- function(rows, max_noalle, nloci, data) { + # % Muodostaa max_noalle*nloci taulukon, jossa on niiden alleelien + # % lukum��r�t (vastaavasti kuin COUNTS:issa), jotka ovat data:n + # % riveill� rows. rows pit�� olla vaakavektori. + + diffInCounts <- zeros(max_noalle, nloci) + for (i in rows) { + row <- data[i, ] + notEmpty <- find(row>=0) + + if (length(notEmpty) > 0) { + diffInCounts[row(notEmpty) + (notEmpty - 1) * max_noalle] <- + diffInCounts[row(notEmpty) + (notEmpty - 1) * max_noalle] + 1 + } + } + return(diffInCounts) +} diff --git a/R/computeLogml.R b/R/computeLogml.R new file mode 100644 index 0000000..795925c --- /dev/null +++ b/R/computeLogml.R @@ -0,0 +1,27 @@ +computeLogml <- function(counts, sumcounts, noalle, data, rowsFromInd) { + nloci <- size(counts, 2) + npops <- size(counts, 3) + adjnoalle <- zeros(max(noalle), nloci) + for (j in 1:nloci) { + adjnoalle[1:noalle[j], j] <- noalle(j) + if ((noalle(j) 1)) + } + return(list(emptyPop = emptyPop, pops = pops)) +} diff --git a/R/getDistances.R b/R/getDistances.R new file mode 100644 index 0000000..853be3b --- /dev/null +++ b/R/getDistances.R @@ -0,0 +1,43 @@ +getDistances <- function(data_matrix, nclusters) { + + # %finds initial admixture clustering solution with nclusters clusters, uses simple mean Hamming distance + # %gives partition in 8 - bit format + # %allocates all alleles of a single individual into the same basket + # %data_matrix contains #Loci + 1 columns, last column indicate whose alleles are placed in each row, + # %i.e. ranges from 1 to #individuals. For diploids there are 2 rows per individual, for haploids only a single row + # %missing values are indicated by zeros in the partition and by negative integers in the data_matrix. + + size_data <- size(data_matrix) + nloci <- size_data[2] - 1 + n <- max(data_matrix[, ncol(data_matrix)]) + distances <- zeros(choose(n, 2), 1) + pointer <- 1 + for (i in 1:n - 1) { + i_data <- data_matrix[ + find(data_matrix[, ncol(data_matrix)] == i), + 1:nloci + ] + for (j in (i + 1):n) { + d_ij <- 0 + j_data <- data_matrix[find(data_matrix[, ncol()] == j), 1:nloci] + vertailuja <- 0 + for (k in 1:size(i_data, 1)) { + for (l in 1:size(j_data, 1)) { + here_i <- find(i_data[k, ] >= 0) + here_j <- find(j_data[l, ] >= 0) + here_joint <- intersect(here_i, here_j) + vertailuja <- vertailuja + length(here_joint) + d_ij <- d_ij + length( + find(i_data[k, here_joint] != j_data[l, here_joint]) + ) + } + } + d_ij <- d_ij / vertailuja + distances[pointer] <- d_ij + pointer <- pointer + 1 + } + } + + Z <- linkage(t(distances)) + return(list(Z = Z, distances = distances)) +} diff --git a/R/globals.R b/R/globals.R new file mode 100644 index 0000000..b994258 --- /dev/null +++ b/R/globals.R @@ -0,0 +1,3 @@ +utils::globalVariables( + c("PARTITION", "COUNTS", "SUMCOUNTS", "LOGDIFF", "POP_LOGML", "GAMMA_LN") +) \ No newline at end of file diff --git a/R/greedyMix.R b/R/greedyMix.R index edac561..132299e 100644 --- a/R/greedyMix.R +++ b/R/greedyMix.R @@ -11,10 +11,11 @@ greedyMix <- function( savePreProcessed = NULL, filePreProcessed = NULL ) { - # ASK: graphical components. Remove? + # ASK: Unclear when fixedk == TRUE. Remove? # check whether fixed k mode is selected # h0 <- findobj('Tag','fixk_menu') # fixedK = get(h0, 'userdata'); + fixedK <- FALSE # if fixedK # if ~(fixKWarning == 1) % call function fixKWarning @@ -22,9 +23,11 @@ greedyMix <- function( # end # end + # ASK: ditto # % check whether partition compare mode is selected # h1 = findobj('Tag','partitioncompare_menu'); # partitionCompare = get(h1, 'userdata'); + partitionCompare <- FALSE if (is(tietue, "list") | is(tietue, "character")) { # ---------------------------------------------------------------------- @@ -149,13 +152,25 @@ greedyMix <- function( kunnossa <- testaaGenePopData(filename_pathname) if (kunnossa == 0) stop("testaaGenePopData returned 0") - # [data,popnames]=lueGenePopData([pathname filename]); # TODO: trans + data_popnames <- lueGenePopData(filename_pathname) + data <- data_popnames$data + popnames <- data_popnames$popnames # h0 = findobj('Tag','filename1_text'); # set(h0,'String',filename); clear h0; -# [data, rowsFromInd, alleleCodes, noalle, adjprior, priorTerm] = handleData(data); # TODO:trans -# [Z,dist] = newGetDistances(data,rowsFromInd); # TODO: trans + list_dranap <- handleData(data) + data <- list_dranap$newData + rowsFromInd <- list_dranap$rowsFromInd + alleleCodes <- list_dranap$alleleCodes + noalle <- list_dranap$noalle + adjprior <- list_dranap$adjprior + priorTerm <- list_dranap$prioterm + + list_Zd <- newGetDistances(data,rowsFromInd) # FIXME: debug + Z <- list_Zd$Z + dist <- list_Zd$dist + if (is.null(savePreProcessed)) { save_preproc <- questdlg( quest = 'Do you wish to save pre-processed data?', @@ -232,15 +247,17 @@ greedyMix <- function( } # ========================================================================== - # Declaring global variables + # Declaring global variables and changing environment of children functions # ========================================================================== PARTITION <- vector() COUNTS <- vector() SUMCOUNTS <- vector() POP_LOGML <- vector() - clearGlobalVars <- vector() - # ========================================================================== + clearGlobalVars() + environment(writeMixtureInfo) <- environment() + # ========================================================================== + c <- list() c$data <- data c$noalle <- noalle c$adjprior <- adjprior @@ -253,43 +270,49 @@ greedyMix <- function( ekat <- t(seq(1, ninds, rowsFromInd) * rowsFromInd) c$rows <- c(ekat, ekat + rowsFromInd - 1) + # ASK remove? # partition compare - if (!is.null(partitionCompare)) { - nsamplingunits <- size(c$rows, 1) - partitions <- partitionCompare$partitions - npartitions <- size(partitions, 2) - partitionLogml <- zeros(1, npartitions) - for (i in seq_len(npartitions)) { - # number of unique partition lables - npops <- length(unique(partitions[, i])) + # if (!is.null(partitionCompare)) { + # nsamplingunits <- size(c$rows, 1) + # partitions <- partitionCompare$partitions + # npartitions <- size(partitions, 2) + # partitionLogml <- zeros(1, npartitions) + # for (i in seq_len(npartitions)) { + # # number of unique partition lables + # npops <- length(unique(partitions[, i])) - partitionInd <- zeros(ninds * rowsFromInd, 1) - partitionSample <- partitions[, i] - for (j in seq_len(nsamplingunits)) { - partitionInd[c$rows[j, 1]:c$rows[j, 2]] <- partitionSample[j] - } - # partitionLogml[i] = initialCounts( - # partitionInd, - # data[, seq_len(end - 1)], - # npops, - # c$rows, - # noalle, - # adjprior - # ) #TODO translate - } - # return the logml result - partitionCompare$logmls <- partitionLogml - # set(h1, 'userdata', partitionCompare) # ASK remove? - return() - } - - # ASK remove (graphical part)? - # if (fixedK) { - # #logml_npops_partitionSummary <- indMix_fixK(c) # ASK translate? - # } else { - # #logml_npops_partitionSummary <- indMix(c) # ASK translate? + # partitionInd <- zeros(ninds * rowsFromInd, 1) + # partitionSample <- partitions[, i] + # for (j in seq_len(nsamplingunits)) { + # partitionInd[c$rows[j, 1]:c$rows[j, 2]] <- partitionSample[j] + # } + # # partitionLogml[i] = initialCounts( + # # partitionInd, + # # data[, seq_len(end - 1)], + # # npops, + # # c$rows, + # # noalle, + # # adjprior + # # ) #TODO translate + # } + # # return the logml result + # partitionCompare$logmls <- partitionLogml + # # set(h1, 'userdata', partitionCompare) + # return() # } - # if (logml_npops_partitionSummary$logml == 1) return() + + if (fixedK) { + # logml_npops_partitionSummary <- indMix_fixK(c) # TODO: translate + # logml <- logml_npops_partitionSummary$logml + # npops <- logml_npops_partitionSummary$npops + # partitionSummary <- logml_npops_partitionSummary$partitionSummary + } else { + logml_npops_partitionSummary <- indMix(c) # TODO: translate + logml <- logml_npops_partitionSummary$logml + npops <- logml_npops_partitionSummary$npops + partitionSummary <- logml_npops_partitionSummary$partitionSummary + } + if (logml_npops_partitionSummary$logml == 1) return() data <- data[, seq_len(ncol(data) - 1)] @@ -298,11 +321,13 @@ greedyMix <- function( # inp = get(h0,'String'); # h0 = findobj('Tag','filename2_text') # outp = get(h0,'String'); + inp <- vector() + outp <- vector() changesInLogml <- writeMixtureInfo( logml, rowsFromInd, data, adjprior, priorTerm, outp, inp, popnames, fixedK - ) # FIXMEL depends on get function above + ) # FIXME: broken # viewMixPartition(PARTITION, popnames) # ASK translate? On graph folder @@ -356,583 +381,3 @@ greedyMix <- function( if (file.exists('baps4_output.baps')) file.remove('baps4_output.baps') } } - -# %------------------------------------------------------------------------------------- - -# function [partitionSummary, added] = addToSummary(logml, partitionSummary, worstIndex) -# % Tiedet��n, ett?annettu logml on isompi kuin huonoin arvo -# % partitionSummary taulukossa. Jos partitionSummary:ss?ei viel?ole -# % annettua logml arvoa, niin lis�t��n worstIndex:in kohtaan uusi logml ja -# % nykyist?partitiota vastaava nclusters:in arvo. Muutoin ei tehd?mit��n. - -# apu = find(abs(partitionSummary(:,2)-logml)<1e-5); -# if isempty(apu) -# % Nyt l�ydetty partitio ei ole viel?kirjattuna summaryyn. -# global PARTITION; -# npops = length(unique(PARTITION)); -# partitionSummary(worstIndex,1) = npops; -# partitionSummary(worstIndex,2) = logml; -# added = 1; -# else -# added = 0; -# end - - -# %-------------------------------------------------------------------------- - - -# function [suurin, i2] = arvoSeuraavaTila(muutokset, logml) -# % Suorittaa yksil�n seuraavan tilan arvonnan - -# y = logml + muutokset; % siirron j�lkeiset logml:t -# y = y - max(y); -# y = exp(y); -# summa = sum(y); -# y = y/summa; -# y = cumsum(y); - -# i2 = rand_disc(y); % uusi kori -# suurin = muutokset(i2); - - -# %-------------------------------------------------------------------------------------- - - -# function svar=rand_disc(CDF) -# %returns an index of a value from a discrete distribution using inversion method -# slump=rand; -# har=find(CDF>slump); -# svar=har(1); - - -# %------------------------------------------------------------------------------------- - - -# function updateGlobalVariables(ind, i2, rowsFromInd, diffInCounts, ... -# adjprior, priorTerm) -# % Suorittaa globaalien muuttujien muutokset, kun yksil?ind -# % on siirret��n koriin i2. - -# global PARTITION; -# global COUNTS; -# global SUMCOUNTS; -# global POP_LOGML; - -# i1 = PARTITION(ind); -# PARTITION(ind)=i2; - -# COUNTS(:,:,i1) = COUNTS(:,:,i1) - diffInCounts; -# COUNTS(:,:,i2) = COUNTS(:,:,i2) + diffInCounts; -# SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:) - sum(diffInCounts); -# SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:) + sum(diffInCounts); - -# POP_LOGML([i1 i2]) = computePopulationLogml([i1 i2], adjprior, priorTerm); - - -# %--------------------------------------------------------------------------------- - - -# function updateGlobalVariables2( ... -# i1, i2, rowsFromInd, diffInCounts, adjprior, priorTerm); -# % Suorittaa globaalien muuttujien muutokset, kun kaikki -# % korissa i1 olevat yksil�t siirret��n koriin i2. - -# global PARTITION; -# global COUNTS; -# global SUMCOUNTS; -# global POP_LOGML; - -# inds = find(PARTITION==i1); -# PARTITION(inds) = i2; - -# COUNTS(:,:,i1) = COUNTS(:,:,i1) - diffInCounts; -# COUNTS(:,:,i2) = COUNTS(:,:,i2) + diffInCounts; -# SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:) - sum(diffInCounts); -# SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:) + sum(diffInCounts); - -# POP_LOGML(i1) = 0; -# POP_LOGML(i2) = computePopulationLogml(i2, adjprior, priorTerm); - - -# %------------------------------------------------------------------------------------ - - -# function updateGlobalVariables3(muuttuvat, rowsFromInd, diffInCounts, ... -# adjprior, priorTerm, i2); -# % Suorittaa globaalien muuttujien p�ivitykset, kun yksil�t 'muuttuvat' -# % siirret��n koriin i2. Ennen siirtoa yksil�iden on kuuluttava samaan -# % koriin. - -# global PARTITION; -# global COUNTS; -# global SUMCOUNTS; -# global POP_LOGML; - -# i1 = PARTITION(muuttuvat(1)); -# PARTITION(muuttuvat) = i2; - -# COUNTS(:,:,i1) = COUNTS(:,:,i1) - diffInCounts; -# COUNTS(:,:,i2) = COUNTS(:,:,i2) + diffInCounts; -# SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:) - sum(diffInCounts); -# SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:) + sum(diffInCounts); - -# POP_LOGML([i1 i2]) = computePopulationLogml([i1 i2], adjprior, priorTerm); - - -# %---------------------------------------------------------------------- - - -# function inds = returnInOrder(inds, pop, rowsFromInd, data, adjprior, priorTerm) -# % Palauttaa yksil�t j�rjestyksess?siten, ett?ensimm�isen?on -# % se, jonka poistaminen populaatiosta pop nostaisi logml:n -# % arvoa eniten. - -# global COUNTS; global SUMCOUNTS; -# ninds = length(inds); -# apuTaulu = [inds, zeros(ninds,1)]; - -# for i=1:ninds -# ind = inds(i); -# rows = (ind-1)*rowsFromInd+1 : ind*rowsFromInd; -# diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data); -# diffInSumCounts = sum(diffInCounts); - -# COUNTS(:,:,pop) = COUNTS(:,:,pop)-diffInCounts; -# SUMCOUNTS(pop,:) = SUMCOUNTS(pop,:)-diffInSumCounts; -# apuTaulu(i, 2) = computePopulationLogml(pop, adjprior, priorTerm); -# COUNTS(:,:,pop) = COUNTS(:,:,pop)+diffInCounts; -# SUMCOUNTS(pop,:) = SUMCOUNTS(pop,:)+diffInSumCounts; -# end -# apuTaulu = sortrows(apuTaulu,2); -# inds = apuTaulu(ninds:-1:1,1); - -# %------------------------------------------------------------------------------------ - - -# function [muutokset, diffInCounts] = ... -# laskeMuutokset(ind, rowsFromInd, data, adjprior, priorTerm) -# % Palauttaa npops*1 taulun, jossa i:s alkio kertoo, mik?olisi -# % muutos logml:ss? mik�li yksil?ind siirret��n koriin i. -# % diffInCounts on poistettava COUNTS:in siivusta i1 ja lis�tt�v? -# % COUNTS:in siivuun i2, mik�li muutos toteutetaan. - -# global COUNTS; global SUMCOUNTS; -# global PARTITION; global POP_LOGML; -# npops = size(COUNTS,3); -# muutokset = zeros(npops,1); - -# i1 = PARTITION(ind); -# i1_logml = POP_LOGML(i1); - -# rows = (ind-1)*rowsFromInd+1 : ind*rowsFromInd; -# diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data); -# diffInSumCounts = sum(diffInCounts); - -# COUNTS(:,:,i1) = COUNTS(:,:,i1)-diffInCounts; -# SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)-diffInSumCounts; -# new_i1_logml = computePopulationLogml(i1, adjprior, priorTerm); -# COUNTS(:,:,i1) = COUNTS(:,:,i1)+diffInCounts; -# SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)+diffInSumCounts; - -# i2 = [1:i1-1 , i1+1:npops]; -# i2_logml = POP_LOGML(i2); - -# COUNTS(:,:,i2) = COUNTS(:,:,i2)+repmat(diffInCounts, [1 1 npops-1]); -# SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)+repmat(diffInSumCounts,[npops-1 1]); -# new_i2_logml = computePopulationLogml(i2, adjprior, priorTerm); -# COUNTS(:,:,i2) = COUNTS(:,:,i2)-repmat(diffInCounts, [1 1 npops-1]); -# SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)-repmat(diffInSumCounts,[npops-1 1]); - -# muutokset(i2) = new_i1_logml - i1_logml ... -# + new_i2_logml - i2_logml; - - -# %------------------------------------------------------------------------------------ - - -# function [muutokset, diffInCounts] = laskeMuutokset2( ... -# i1, rowsFromInd, data, adjprior, priorTerm); -# % Palauttaa npops*1 taulun, jossa i:s alkio kertoo, mik?olisi -# % muutos logml:ss? mik�li korin i1 kaikki yksil�t siirret��n -# % koriin i. - -# global COUNTS; global SUMCOUNTS; -# global PARTITION; global POP_LOGML; -# npops = size(COUNTS,3); -# muutokset = zeros(npops,1); - -# i1_logml = POP_LOGML(i1); - -# inds = find(PARTITION==i1); -# ninds = length(inds); - -# if ninds==0 -# diffInCounts = zeros(size(COUNTS,1), size(COUNTS,2)); -# return; -# end - -# rows = computeRows(rowsFromInd, inds, ninds); - -# diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data); -# diffInSumCounts = sum(diffInCounts); - -# COUNTS(:,:,i1) = COUNTS(:,:,i1)-diffInCounts; -# SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)-diffInSumCounts; -# new_i1_logml = computePopulationLogml(i1, adjprior, priorTerm); -# COUNTS(:,:,i1) = COUNTS(:,:,i1)+diffInCounts; -# SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)+diffInSumCounts; - -# i2 = [1:i1-1 , i1+1:npops]; -# i2_logml = POP_LOGML(i2); - -# COUNTS(:,:,i2) = COUNTS(:,:,i2)+repmat(diffInCounts, [1 1 npops-1]); -# SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)+repmat(diffInSumCounts,[npops-1 1]); -# new_i2_logml = computePopulationLogml(i2, adjprior, priorTerm); -# COUNTS(:,:,i2) = COUNTS(:,:,i2)-repmat(diffInCounts, [1 1 npops-1]); -# SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)-repmat(diffInSumCounts,[npops-1 1]); - -# muutokset(i2) = new_i1_logml - i1_logml ... -# + new_i2_logml - i2_logml; - - - -# %------------------------------------------------------------------------------------ - - -# function muutokset = laskeMuutokset3(T2, inds2, rowsFromInd, ... -# data, adjprior, priorTerm, i1) -# % Palauttaa length(unique(T2))*npops taulun, jossa (i,j):s alkio -# % kertoo, mik?olisi muutos logml:ss? jos populaation i1 osapopulaatio -# % inds2(find(T2==i)) siirret��n koriin j. - -# global COUNTS; global SUMCOUNTS; -# global PARTITION; global POP_LOGML; -# npops = size(COUNTS,3); -# npops2 = length(unique(T2)); -# muutokset = zeros(npops2, npops); - -# i1_logml = POP_LOGML(i1); - -# for pop2 = 1:npops2 -# inds = inds2(find(T2==pop2)); -# ninds = length(inds); -# if ninds>0 -# rows = computeRows(rowsFromInd, inds, ninds); -# diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data); -# diffInSumCounts = sum(diffInCounts); - -# COUNTS(:,:,i1) = COUNTS(:,:,i1)-diffInCounts; -# SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)-diffInSumCounts; -# new_i1_logml = computePopulationLogml(i1, adjprior, priorTerm); -# COUNTS(:,:,i1) = COUNTS(:,:,i1)+diffInCounts; -# SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)+diffInSumCounts; - -# i2 = [1:i1-1 , i1+1:npops]; -# i2_logml = POP_LOGML(i2)'; - -# COUNTS(:,:,i2) = COUNTS(:,:,i2)+repmat(diffInCounts, [1 1 npops-1]); -# SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)+repmat(diffInSumCounts,[npops-1 1]); -# new_i2_logml = computePopulationLogml(i2, adjprior, priorTerm)'; -# COUNTS(:,:,i2) = COUNTS(:,:,i2)-repmat(diffInCounts, [1 1 npops-1]); -# SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)-repmat(diffInSumCounts,[npops-1 1]); - -# muutokset(pop2,i2) = new_i1_logml - i1_logml ... -# + new_i2_logml - i2_logml; -# end -# end - - -# %------------------------------------------------------------------------------------ - -# function muutokset = laskeMuutokset5(inds, rowsFromInd, data, adjprior, ... -# priorTerm, i1, i2) - -# % Palauttaa length(inds)*1 taulun, jossa i:s alkio kertoo, mik?olisi -# % muutos logml:ss? mik�li yksil?i vaihtaisi koria i1:n ja i2:n v�lill? - -# global COUNTS; global SUMCOUNTS; -# global PARTITION; global POP_LOGML; - -# ninds = length(inds); -# muutokset = zeros(ninds,1); - -# i1_logml = POP_LOGML(i1); -# i2_logml = POP_LOGML(i2); - -# for i = 1:ninds -# ind = inds(i); -# if PARTITION(ind)==i1 -# pop1 = i1; %mist? -# pop2 = i2; %mihin -# else -# pop1 = i2; -# pop2 = i1; -# end -# rows = (ind-1)*rowsFromInd+1 : ind*rowsFromInd; -# diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data); -# diffInSumCounts = sum(diffInCounts); - -# COUNTS(:,:,pop1) = COUNTS(:,:,pop1)-diffInCounts; -# SUMCOUNTS(pop1,:) = SUMCOUNTS(pop1,:)-diffInSumCounts; -# COUNTS(:,:,pop2) = COUNTS(:,:,pop2)+diffInCounts; -# SUMCOUNTS(pop2,:) = SUMCOUNTS(pop2,:)+diffInSumCounts; - -# PARTITION(ind) = pop2; - -# new_logmls = computePopulationLogml([i1 i2], adjprior, priorTerm); - -# muutokset(i) = sum(new_logmls); - -# COUNTS(:,:,pop1) = COUNTS(:,:,pop1)+diffInCounts; -# SUMCOUNTS(pop1,:) = SUMCOUNTS(pop1,:)+diffInSumCounts; -# COUNTS(:,:,pop2) = COUNTS(:,:,pop2)-diffInCounts; -# SUMCOUNTS(pop2,:) = SUMCOUNTS(pop2,:)-diffInSumCounts; - -# PARTITION(ind) = pop1; -# end - -# muutokset = muutokset - i1_logml - i2_logml; - -# %-------------------------------------------------------------------------- - - - -# function diffInCounts = computeDiffInCounts(rows, max_noalle, nloci, data) -# % Muodostaa max_noalle*nloci taulukon, jossa on niiden alleelien -# % lukum��r�t (vastaavasti kuin COUNTS:issa), jotka ovat data:n -# % riveill?rows. - -# diffInCounts = zeros(max_noalle, nloci); -# for i=rows -# row = data(i,:); -# notEmpty = find(row>=0); - -# if length(notEmpty)>0 -# diffInCounts(row(notEmpty) + (notEmpty-1)*max_noalle) = ... -# diffInCounts(row(notEmpty) + (notEmpty-1)*max_noalle) + 1; -# end -# end - - - -# %------------------------------------------------------------------------------------ - - -# function popLogml = computePopulationLogml(pops, adjprior, priorTerm) -# % Palauttaa length(pops)*1 taulukon, jossa on laskettu korikohtaiset -# % logml:t koreille, jotka on m��ritelty pops-muuttujalla. - -# global COUNTS; -# global SUMCOUNTS; -# x = size(COUNTS,1); -# y = size(COUNTS,2); -# z = length(pops); - -# popLogml = ... -# squeeze(sum(sum(reshape(... -# gammaln(repmat(adjprior,[1 1 length(pops)]) + COUNTS(:,:,pops)) ... -# ,[x y z]),1),2)) - sum(gammaln(1+SUMCOUNTS(pops,:)),2) - priorTerm; - - -# %----------------------------------------------------------------------------------- - - -# function npops = poistaTyhjatPopulaatiot(npops) -# % Poistaa tyhjentyneet populaatiot COUNTS:ista ja -# % SUMCOUNTS:ista. P�ivitt�� npops:in ja PARTITION:in. - -# global COUNTS; -# global SUMCOUNTS; -# global PARTITION; - -# notEmpty = find(any(SUMCOUNTS,2)); -# COUNTS = COUNTS(:,:,notEmpty); -# SUMCOUNTS = SUMCOUNTS(notEmpty,:); - -# for n=1:length(notEmpty) -# apu = find(PARTITION==notEmpty(n)); -# PARTITION(apu)=n; -# end -# npops = length(notEmpty); - - -# %---------------------------------------------------------------------------------- -# %Seuraavat kolme funktiota liittyvat alkupartition muodostamiseen. - -# function initial_partition=admixture_initialization(data_matrix,nclusters,Z) -# size_data=size(data_matrix); -# nloci=size_data(2)-1; -# n=max(data_matrix(:,end)); -# T=cluster_own(Z,nclusters); -# initial_partition=zeros(size_data(1),1); -# for i=1:n -# kori=T(i); -# here=find(data_matrix(:,end)==i); -# for j=1:length(here) -# initial_partition(here(j),1)=kori; -# end -# end - -# function T = cluster_own(Z,nclust) -# true=logical(1); -# false=logical(0); -# maxclust = nclust; -# % Start of algorithm -# m = size(Z,1)+1; -# T = zeros(m,1); -# % maximum number of clusters based on inconsistency -# if m <= maxclust -# T = (1:m)'; -# elseif maxclust==1 -# T = ones(m,1); -# else -# clsnum = 1; -# for k = (m-maxclust+1):(m-1) -# i = Z(k,1); % left tree -# if i <= m % original node, no leafs -# T(i) = clsnum; -# clsnum = clsnum + 1; -# elseif i < (2*m-maxclust+1) % created before cutoff, search down the tree -# T = clusternum(Z, T, i-m, clsnum); -# clsnum = clsnum + 1; -# end -# i = Z(k,2); % right tree -# if i <= m % original node, no leafs -# T(i) = clsnum; -# clsnum = clsnum + 1; -# elseif i < (2*m-maxclust+1) % created before cutoff, search down the tree -# T = clusternum(Z, T, i-m, clsnum); -# clsnum = clsnum + 1; -# end -# end -# end - -# function T = clusternum(X, T, k, c) -# m = size(X,1)+1; -# while(~isempty(k)) -# % Get the children of nodes at this level -# children = X(k,1:2); -# children = children(:); - -# % Assign this node number to leaf children -# t = (children<=m); -# T(children(t)) = c; - -# % Move to next level -# k = children(~t) - m; -# end - -# %---------------------------------------------------------------------------------------- - - -# function [Z, distances]=getDistances(data_matrix,nclusters) - -# %finds initial admixture clustering solution with nclusters clusters, uses simple mean Hamming distance -# %gives partition in 8-bit format -# %allocates all alleles of a single individual into the same basket -# %data_matrix contains #Loci+1 columns, last column indicate whose alleles are placed in each row, -# %i.e. ranges from 1 to #individuals. For diploids there are 2 rows per individual, for haploids only a single row -# %missing values are indicated by zeros in the partition and by negative integers in the data_matrix. - -# size_data=size(data_matrix); -# nloci=size_data(2)-1; -# n=max(data_matrix(:,end)); -# distances=zeros(nchoosek(n,2),1); -# pointer=1; -# for i=1:n-1 -# i_data=data_matrix(find(data_matrix(:,end)==i),1:nloci); -# for j=i+1:n -# d_ij=0; -# j_data=data_matrix(find(data_matrix(:,end)==j),1:nloci); -# vertailuja = 0; -# for k=1:size(i_data,1) -# for l=1:size(j_data,1) -# here_i=find(i_data(k,:)>=0); -# here_j=find(j_data(l,:)>=0); -# here_joint=intersect(here_i,here_j); -# vertailuja = vertailuja + length(here_joint); -# d_ij = d_ij + length(find(i_data(k,here_joint)~=j_data(l,here_joint))); -# end -# end -# d_ij = d_ij / vertailuja; -# distances(pointer)=d_ij; -# pointer=pointer+1; -# end -# end - -# Z=linkage(distances'); - - - -# %---------------------------------------------------------------------------------------- - -# function logml=computeLogml(counts, sumcounts, noalle, data, rowsFromInd) -# nloci = size(counts,2); -# npops = size(counts,3); -# adjnoalle = zeros(max(noalle),nloci); -# for j=1:nloci -# adjnoalle(1:noalle(j),j)=noalle(j); -# if (noalle(j) 1)); -# end \ No newline at end of file diff --git a/R/handleData.R b/R/handleData.R index 40d39e7..5bfd072 100644 --- a/R/handleData.R +++ b/R/handleData.R @@ -20,12 +20,11 @@ handleData <- function(raw_data) { # koodi pienimm?ksi koodiksi, joka isompi kuin mik??n k?yt?ss?oleva koodi. # T?m?n j?lkeen funktio muuttaa alleelikoodit siten, ett?yhden lokuksen j # koodit saavat arvoja v?lill?1,...,noalle(j). - data <- raw_data nloci <- size(raw_data, 2) - 1 dataApu <- data[, 1:nloci] - nollat <- find(dataApu==0) + nollat <- find(dataApu == 0) if (!isempty(nollat)) { isoinAlleeli <- max(max(dataApu)) dataApu[nollat] <- isoinAlleeli + 1 @@ -39,9 +38,12 @@ handleData <- function(raw_data) { alleelitLokuksessa <- cell(nloci, 1) for (i in 1:nloci) { alleelitLokuksessaI <- unique(data[, i]) - alleelitLokuksessa[i, 1] <- alleelitLokuksessaI[ - find(alleelitLokuksessaI >= 0) - ] + alleelitLokuksessaI_pos <- find(alleelitLokuksessaI >= 0) + alleelitLokuksessa[i, 1] <- ifelse( + test = length(alleelitLokuksessaI_pos) > 0, + yes = alleelitLokuksessaI[alleelitLokuksessaI_pos], + no = 0 + ) noalle[i] <- length(alleelitLokuksessa[i, 1]) } alleleCodes <- zeros(max(noalle), nloci) @@ -65,10 +67,10 @@ handleData <- function(raw_data) { emptyRow <- repmat(a, c(1, ncols)) lessThanMax <- find(rowsFromInd < maxRowsFromInd) missingRows <- maxRowsFromInd * nind - nrows - data <- as.matrix(c(data, zeros(missingRows, ncols))) + data <- rbind(data, zeros(missingRows, ncols)) pointer <- 1 for (ind in t(lessThanMax)) { #K?y l?pi ne yksil?t, joilta puuttuu rivej? - miss = maxRowsFromInd-rowsFromInd(ind); # T?lt?yksil?lt?puuttuvien lkm. + miss <- maxRowsFromInd - rowsFromInd(ind) # T?lt?yksil?lt?puuttuvien lkm. } data <- sortrows(data, ncols) # Sorttaa yksil?iden mukaisesti newData <- data @@ -84,12 +86,12 @@ handleData <- function(raw_data) { priorTerm <- priorTerm + noalle[j] * lgamma(1 / noalle[j]) } out <- list( - newData = newData, + newData = newData, rowsFromInd = rowsFromInd, alleleCodes = alleleCodes, - noalle = noalle, - adjprior = adjprior, - priorTerm = priorTerm + noalle = noalle, + adjprior = adjprior, + priorTerm = priorTerm ) return(out) } \ No newline at end of file diff --git a/R/indMix.R b/R/indMix.R new file mode 100644 index 0000000..3fc2359 --- /dev/null +++ b/R/indMix.R @@ -0,0 +1,567 @@ +indMix <- function(c, npops, dispText) { + # Greedy search algorithm with unknown number of classes for regular + # clustering. + # Input npops is not used if called by greedyMix or greedyPopMix. + + logml <- 1 + clearGlobalVars() + + noalle <- c$noalle + rows <- c$rows + data <- c$data + + adjprior <- c$adjprior + priorTerm <- c$priorTerm + rowsFromInd <- c$rowsFromInd + + if (isfield(c, 'dist')) { + dist <- c$dist + Z <- c$Z + } + + rm(c) + nargin <- length(as.list(match.call())) - 1 + + if (nargin < 2) { + dispText <- 1 + npopstext <- matrix() + ready <- FALSE + teksti <- 'Input upper bound to the number of populations (possibly multiple values)' + while (!ready) { + npopstextExtra <- inputdlg( + teksti, + 1, + '20' + ) + if (isempty(npopstextExtra)) { # Painettu Cancel:ia + return() + } + npopstextExtra <- npopstextExtra[1] + if (length(npopstextExtra)>=255) { + npopstextExtra <- npopstextExtra[1:255] + npopstext <- c(npopstext, ' ', npopstextExtra) + teksti <- 'The input field length limit (255 characters) was reached. Input more values: ' + } else { + npopstext <- c(npopstext, ' ', npopstextExtra) + ready <- TRUE + } + } + rm(ready, teksti) + if (isempty(npopstext) | length(npopstext) == 1) { + return() + } else { + npopsTaulu <- as.numeric(npopstext) + ykkoset <- find(npopsTaulu == 1) + npopsTaulu[ykkoset] <- list() # Mik�li ykk�si� annettu yl�rajaksi, ne poistetaan. + if (isempty(npopsTaulu)) { + logml <- 1 + partitionSummary <- 1 + npops <- 1 + return() + } + rm(ykkoset) + } + } else { + npopsTaulu <- npops + } + + nruns <- length(npopsTaulu) + + initData <- data + data <- data[,1:(ncol(data) - 1)] + + logmlBest <- -1e50 + partitionSummary <- -1e50 * ones(30, 2) # Tiedot 30 parhaasta partitiosta (npops ja logml) + partitionSummary[,1] <- zeros(30, 1) + worstLogml <- -1e50 + worstIndex <- 1 + for (run in 1:nruns) { + npops <- npopsTaulu[run] + if (dispText) { + dispLine() + print( + paste0( + 'Run ', as.character(run), '/', as.character(nruns), + ', maximum number of populations ', as.character(npops), '.' + ) + ) + } + ninds <- size(rows, 1) + + initialPartition <- admixture_initialization(initData, npops, Z) # TODO: translate + sumcounts_counts_logml = initialCounts( + initialPartition, data, npops, rows, noalle, adjprior + ) # TODO: translate + sumcounts <- sumcounts_counts_logml$sumcounts + counts <- sumcounts_counts_logml$counts + logml <- sumcounts_counts_logml$logml + + PARTITION <- zeros(ninds, 1) + for (i in 1:ninds) { + apu <- rows[i] + PARTITION[i] <- initialPartition(apu[1]) + } + + COUNTS <- counts + SUMCOUNTS <- sumcounts + POP_LOGML <- computePopulationLogml(1:npops, adjprior, priorTerm) # TODO: translate + LOGDIFF <- repmat(-Inf, c(ninds, npops)) + rm(initialPartition, counts, sumcounts) + + # PARHAAN MIXTURE-PARTITION ETSIMINEN + nRoundTypes <- 7 + kokeiltu <- zeros(nRoundTypes, 1) + roundTypes <- c(1, 1) # Ykk�svaiheen sykli kahteen kertaan. + ready <- 0 + vaihe <- 1 + + if (dispText) { + print(' ') + print( + paste0( + 'Mixture analysis started with initial', + as.character(npops), + 'populations.' + ) + ) + } + + while (ready != 1) { + muutoksia <- 0 + + if (dispText) { + print(paste('Performing steps:', as.character(roundTypes))) + } + + for (n in 1:length(roundTypes)) { + + round <- roundTypes[n] + kivaluku <- 0 + + if (kokeiltu(round) == 1) { #Askelta kokeiltu viime muutoksen j�lkeen + + } else if (round == 0 | round == 1) { #Yksil�n siirt�minen toiseen populaatioon. + inds <- 1:ninds + aputaulu <- c(t(inds), rand(ninds, 1)) + aputaulu <- sortrows(aputaulu, 2) + inds <- t(aputaulu[, 1]) + muutosNyt <- 0 + + for (ind in inds) { + i1 <- PARTITION[ind] + muutokset_diffInCounts = laskeMuutokset( + ind, rows, data, adjprior, priorTerm + ) + muutokset <- muutokset_diffInCounts$muutokset + diffInCounts <- muutokset_diffInCounts$diffInCounts + + if (round == 1) { + maxMuutos <- max_MATLAB(muutokset)[[1]] + i2 <- max_MATLAB(muutokset)[[2]] + } + + if (i1 != i2 & maxMuutos > 1e-5) { + # Tapahtui muutos + muutoksia <- 1 + if (muutosNyt == 0) { + muutosNyt <- 1 + if (dispText) { + print('Action 1') + } + } + kokeiltu <- zeros(nRoundTypes, 1) + kivaluku <- kivaluku + 1 + updateGlobalVariables( + ind, i2, diffInCounts, adjprior, priorTerm + ) + logml <- logml+maxMuutos + if (logml > worstLogml) { + partitionSummary_added = addToSummary( + logml, partitionSummary, worstIndex + ) + partitionSummary_added <- partitionSummary_added$partitionSummary + added <- partitionSummary_added$added + if (added == 1) { + worstLogml <- min_MATLAB(partitionSummary[, 2])[[1]] + worstIndex <- min_MATLAB(partitionSummary[, 2])[[2]] + } + } + } + } + + if (muutosNyt == 0) { + kokeiltu[round] <- 1 + } + + } else if (round == 2) { # Populaation yhdist�minen toiseen. + maxMuutos <- 0 + for (pop in 1:npops) { + muutokset_diffInCounts <- laskeMuutokset2( + pop, rows, data, adjprior, priorTerm + ) + muutokset <- muutokset_diffInCounts$muutokset + diffInCounts <- muutokset_diffInCounts$diffInCounts + isoin <- max_MATLAB(muutokset)[[1]] + indeksi <- max_MATLAB(muutokset)[[2]] + if (isoin > maxMuutos) { + maxMuutos <- isoin + i1 <- pop + i2 <- indeksi + diffInCountsBest <- diffInCounts + } + } + + if (maxMuutos > 1e-5) { + muutoksia <- 1 + kokeiltu <- zeros(nRoundTypes, 1) + updateGlobalVariables2( + i1, i2, diffInCountsBest, adjprior, priorTerm + ) + logml <- logml + maxMuutos + if (dispText) { + print('Action 2') + } + if (logml > worstLogml) { + partitionSummary_added <- addToSummary( + logml, partitionSummary, worstIndex + ) + partitionSummary <- partitionSummary_added$partitionSummary + added <- partitionSummary_added$added + if (added==1) { + worstLogml <- min_MATLAB(partitionSummary[, 2])[[1]] + worstIndex <- min_MATLAB(partitionSummary[, 2])[[2]] + } + } + } else { + kokeiltu[round] <- 1 + } + + + } else if (round == 3 || round == 4) { #Populaation jakaminen osiin. + maxMuutos <- 0 + ninds <- size(rows, 1) + for (pop in 1:npops) { + inds2 <- find(PARTITION == pop) + ninds2 <- length(inds2) + if (ninds2 > 2) { + dist2 <- laskeOsaDist(inds2, dist, ninds) + Z2 <- linkage(t(dist2)) + if (round == 3) { + npops2 <- max(min(20, floor(ninds2 / 5)), 2) + } else if (round == 4) { + npops2 <- 2 # Moneenko osaan jaetaan + } + T2 <- cluster_own(Z2, npops2) + muutokset <- laskeMuutokset3( + T2, inds2, rows, data, adjprior, priorTerm, pop + ) + isoin <- max_MATLAB(muutokset)[[1]] + indeksi <- max_MATLAB(muutokset)[[2]] + if (isoin > maxMuutos) { + maxMuutos <- isoin + muuttuvaPop2 <- indeksi %% npops2 + if (muuttuvaPop2==0) muuttuvaPop2 <- npops2 + muuttuvat <- inds2[find(T2 == muuttuvaPop2)] + i2 <- ceiling(indeksi / npops2) + } + } + } + if (maxMuutos > 1e-5) { + muutoksia <- 1 + kokeiltu <- zeros(nRoundTypes, 1) + rivit <- list() + for (i in 1:length(muuttuvat)) { + ind <- muuttuvat[i] + lisa <- rows[ind, 1]:rows[ind, 2] + rivit <- rbind(rivit, t(lisa)) + } + diffInCounts <- computeDiffInCounts( + t(rivit), size(COUNTS, 1), size(COUNTS, 2), data + ) + i1 <- PARTITION(muuttuvat[1]) + updateGlobalVariables3( + muuttuvat, diffInCounts, adjprior, priorTerm, i2 + ) + logml <- logml + maxMuutos + if (dispText) { + if (round == 3) { + print('Action 3') + } else { + print('Action 4') + } + } + if (logml > worstLogml) { + partitionSummary_added <- addToSummary( + logml, partitionSummary, worstIndex + ) + partitionSummary <- partitionSummary_added$partitionSummary + added <- partitionSummary_added$added + if (added==1) { + worstLogml <- min_MATLAB(partitionSummary[, 2])[[1]] + worstIndex <- min_MATLAB(partitionSummary[, 2])[[2]] + } + } + + } else { + kokeiltu[round] <- 1 + } + } else if (round == 5 || round == 6) { + j <- 0 + muutettu <- 0 + poplogml <- POP_LOGML + partition <- PARTITION + counts <- COUNTS + sumcounts <- SUMCOUNTS + logdiff <- LOGDIFF + + pops <- sample(npops) + while (j < npops & muutettu == 0) { + j <- j + 1 + pop <- pops[j] + totalMuutos <- 0 + inds <- find(PARTITION == pop) + if (round == 5) { + aputaulu <- c(inds, rand(length(inds), 1)) + aputaulu <- sortrows(aputaulu, 2) + inds <- t(aputaulu[, 1]) + } else if (round == 6) { + inds <- returnInOrder( + inds, pop, rows, data, adjprior, priorTerm + ) + } + + i <- 0 + + while (length(inds) > 0 & i < length(inds)) { + i <- i + 1 + ind <- inds[i] + + muutokset_diffInCounts <- laskeMuutokset( + ind, rows, data, adjprior, priorTerm + ) + muutokset <- muutokset_diffInCounts$muutokset + diffInCounts <- muutokset_diffInCounts$diffInCounts + + muutokset[pop] <- -1e50 # Varmasti ei suurin!!! + maxMuutos <- max_MATLAB(muutokset)[[1]] + i2 <- max_MATLAB(muutokset)[[2]] + updateGlobalVariables( + ind, i2, diffInCounts, adjprior, priorTerm + ) + + totalMuutos <- totalMuutos+maxMuutos + logml <- logml + maxMuutos + if (round == 6) { + # Lopetetaan heti kun muutos on positiivinen. + if (totalMuutos > 1e-5) { + i <- length(inds) + } + } + } + + if (totalMuutos > 1e-5) { + kokeiltu <- zeros(nRoundTypes, 1) + muutettu <- 1 + if (muutoksia == 0) { + muutoksia <- 1 # Ulompi kirjanpito. + if (dispText) { + if (round == 5) { + print('Action 5') + } else { + print('Action 6') + } + } + } + if (logml > worstLogml) { + partitionSummary_added <- addToSummary( + logml, partitionSummary, worstIndex + ) + partitionSummary <- partitionSummary_added$partitionSummary + added <- partitionSummary_added$added + if (added==1) { + worstLogml <- min_MATLAB(partitionSummary[, 2])[[1]] + worstIndex <- min_MATLAB(partitionSummary[, 2])[[2]] + } + } + } else { + # Miss��n vaiheessa tila ei parantunut. + # Perutaan kaikki muutokset. + PARTITION <- partition + SUMCOUNTS <- sumcounts + POP_LOGML <- poplogml + COUNTS <- counts + logml <- logml - totalMuutos + LOGDIFF <- logdiff + kokeiltu[round] <- 1 + } + } + rm(partition, sumcounts, counts, poplogml) + + } else if (round == 7) { + emptyPop <- findEmptyPop(npops) + j <- 0 + pops <- sample(npops) + muutoksiaNyt <- 0 + if (emptyPop == -1) { + j <- npops + } + while (j < npops) { + j <- j + 1 + pop <- pops[j] + inds2 <- find(PARTITION == pop) + ninds2 <- length(inds2) + if (ninds2 > 5) { + partition <- PARTITION + sumcounts <- SUMCOUNTS + counts <- COUNTS + poplogml <- POP_LOGML + logdiff <- LOGDIFF + + dist2 <- laskeOsaDist(inds2, dist, ninds); + Z2 <- linkage(t(dist2)) + T2 <- cluster_own(Z2, 2) + muuttuvat <- inds2[find(T2 == 1)] + + muutokset <- laskeMuutokset3( + T2, inds2, rows, data, adjprior, priorTerm, pop + ) + totalMuutos <- muutokset(1, emptyPop) + + rivit <- list() + for (i in 1:length(muuttuvat)) { + ind <- muuttuvat[i] + lisa <- rows[ind, 1]:rows[ind, 2] + rivit <- c(rivit, lisa) + } + diffInCounts <- computeDiffInCounts( + rivit, size(COUNTS, 1), size(COUNTS, 2), data + ) + + updateGlobalVariables3( + muuttuvat, diffInCounts, adjprior, priorTerm, + emptyPop + ) + + muutettu <- 1 + while (muutettu == 1) { + muutettu <- 0 + # Siirret��n yksil�it� populaatioiden v�lill� + muutokset <- laskeMuutokset5( + inds2, rows, data, adjprior, priorTerm, + pop, emptyPop + ) + + maxMuutos <- indeksi <- max_MATLAB(muutokset) + + muuttuva <- inds2(indeksi) + if (PARTITION(muuttuva) == pop) { + i2 <- emptyPop + } else { + i2 <- pop + } + + if (maxMuutos > 1e-5) { + rivit <- rows[muuttuva, 1]:rows[muuttuva, 2] + diffInCounts <- computeDiffInCounts( + rivit, size(COUNTS, 1), size(COUNTS, 2), + data + ) + updateGlobalVariables3( + muuttuva,diffInCounts, adjprior, + priorTerm, i2 + ) + muutettu <- 1 + totalMuutos <- totalMuutos + maxMuutos + } + } + if (totalMuutos > 1e-5) { + muutoksia <- 1 + logml <- logml + totalMuutos + if (logml > worstLogml) { + partitionSummary_added = addToSummary( + logml, partitionSummary, worstIndex + ) + partitionSummary_added <- partitionSummary_added$partitionSummary + added <- partitionSummary_added$added + if (added == 1) { + worstLogml <- min_MATLAB(partitionSummary[, 2])[[1]] + worstIndex <- min_MATLAB(partitionSummary[, 2])[[2]] + } + } + if (muutoksiaNyt == 0) { + if (dispText) { + print('Action 7') + } + muutoksiaNyt <- 1 + } + kokeiltu <- zeros(nRoundTypes, 1) + j <- npops + } else { + # palutetaan vanhat arvot + PARTITION <- partition + SUMCOUNTS <- sumcounts + COUNTS <- counts + POP_LOGML <- poplogml + LOGDIFF <- logdiff + } + } + + } + + if (muutoksiaNyt == 0) { + kokeiltu[round] <- 1 + } + + } + } + + if (muutoksia == 0) { + if (vaihe <= 4) { + vaihe <= vaihe + 1 + } else if (vaihe == 5) { + ready <- 1 + } + } else { + muutoksia <- 0 + } + + if (ready == 0) { + if (vaihe == 1) { + roundTypes <- c(1) + } else if (vaihe == 2) { + roundTypes <- c(2, 1) + } else if (vaihe == 3) { + roundTypes <- c(5, 5, 7) + } else if (vaihe == 4) { + roundTypes = c(4, 3, 1) + } else if (vaihe == 5) { + roundTypes <- c(6, 7, 2, 3, 4, 1) + } + } + } + + # TALLENNETAAN + + npops <- poistaTyhjatPopulaatiot(npops) + POP_LOGML <- computePopulationLogml(1:npops, adjprior, priorTerm) + if (dispText) { + print(paste('Found partition with', as.character(npops), 'populations.')) + print(paste('Log(ml) =', as.character(logml))) + print(' ') + } + + if (logml > logmlBest) { + # P�ivitet��n parasta l�ydetty� partitiota. + logmlBest <- logml + npopsBest <- npops + partitionBest <- PARTITION + countsBest <- COUNTS + sumCountsBest <- SUMCOUNTS + pop_logmlBest <- POP_LOGML + logdiffbest <- LOGDIFF + } + } + return( + list(logml = logml, npops = npops, partitionSummary = partitionSummary) + ) +} diff --git a/R/initialPopCounts.R b/R/initialPopCounts.R new file mode 100644 index 0000000..2bd08ed --- /dev/null +++ b/R/initialPopCounts.R @@ -0,0 +1,20 @@ +initialPopCounts <- function(data, npops, rows, noalle, adjprior) { + nloci <- size(data, 2) + counts <- zeros(max(noalle), nloci, npops) + sumcounts <- zeros(npops, nloci) + + for (i in 1:npops) { + for (j in 1:nloci) { + i_rivit <- rows(i, 1):rows(i, 2) + havainnotLokuksessa <- find(data[i_rivit, j] >= 0) + sumcounts[i, j] <- length(havainnotLokuksessa) + for (k in 1:noalle[j]) { + alleleCode <- k + N_ijk <- length(find(data[i_rivit, j] == alleleCode)) + counts[k, j, i] <- N_ijk + } + } + } + logml <- laskeLoggis(counts, sumcounts, adjprior) + return(sumcounts = sumcounts, counts = counts, logml = logml) +} diff --git a/R/initializeGammaln.R b/R/initializeGammaln.R new file mode 100644 index 0000000..ac1864f --- /dev/null +++ b/R/initializeGammaln.R @@ -0,0 +1,9 @@ +initializeGammaln <- function(ninds, rowsFromInd, maxAlleles) { + #Alustaa GAMMALN muuttujan s.e. GAMMALN(i, j)=gammaln((i - 1) + 1/j) + GAMMA_LN <- zeros((1 + ninds) * rowsFromInd, maxAlleles) + for (i in 1:(ninds + 1) * rowsFromInd) { + for (j in 1:maxAlleles) { + GAMMA_LN[i, j] <- log_gamma((i - 1) + 1/j) + } + } +} diff --git a/R/laskeLoggis.R b/R/laskeLoggis.R index 7bcd26f..672dec9 100644 --- a/R/laskeLoggis.R +++ b/R/laskeLoggis.R @@ -1,7 +1,7 @@ laskeLoggis <- function(counts, sumcounts, adjprior) { npops <- size(counts, 3) - sum1 <- sum(sum(sum(gammaln(counts + repmat(adjprior, c(1, 1, npops)))))) + sum1 <- sum(sum(sum(lgamma(counts + repmat(adjprior, c(1, 1, npops)))))) sum3 <- sum(sum(lgamma(adjprior))) - sum(sum(lgamma(1 + sumcounts))) logml2 <- sum1 - npops * sum3 loggis <- logml2 diff --git a/R/laskeMuutokset12345.R b/R/laskeMuutokset12345.R new file mode 100644 index 0000000..bfb4d59 --- /dev/null +++ b/R/laskeMuutokset12345.R @@ -0,0 +1,226 @@ +#' @title Calculate changes (?) +#' @description Palauttaa npops*npops taulun, jonka alkio (i,j) kertoo, mik?on +#' muutos logml:ss? mikäli populaatiosta i siirretään osuuden verran +#' todennäköisyysmassaa populaatioon j. Mikäli populaatiossa i ei ole mitään +#' siirrettävää, on vastaavassa kohdassa rivi nollia. +#' @param osuus Percentages? +#' @param omaFreqs own Freqs? +#' @param osuusTaulu Percentage table? +#' @param logml log maximum likelihood +#' @param COUNTS COUNTS +#' @export +laskeMuutokset4 <- function ( + osuus, osuusTaulu, omaFreqs, logml, COUNTS = matrix(0) +) { + npops <- ifelse(is.na(dim(COUNTS)[3]), 1, dim(COUNTS)[3]) + notEmpty <- which(osuusTaulu > 0.005) + muutokset <- zeros(npops) + empties <- !notEmpty + + for (i1 in notEmpty) { + osuusTaulu[i1] <- osuusTaulu[i1] - osuus + for (i2 in c(colon(1, i1 - 1), colon(i1 + 1, npops))) { + osuusTaulu[i2] <- osuusTaulu[i2] + osuus + loggis <- computeIndLogml(omaFreqs, osuusTaulu) + + # Work around Matlab OOB bug + if (i1 > nrow(muutokset)) { + muutokset <- rbind(muutokset, muutokset * 0) + } + if (i2 > ncol(muutokset)) { + muutokset <- cbind(muutokset, muutokset * 0) + } + + muutokset[i1, i2] <- loggis - logml + osuusTaulu[i2] <- osuusTaulu[i2] - osuus + } + osuusTaulu[i1] <- osuusTaulu[i1] + osuus + } + return (muutokset) +} + +# Palauttaa npops*1 taulun, jossa i:s alkio kertoo, mik� olisi +# muutos logml:ss�, mik�li yksil� ind siirret��n koriin i. +# diffInCounts on poistettava COUNTS:in siivusta i1 ja lis�tt�v� +# COUNTS:in siivuun i2, mik�li muutos toteutetaan. +# +# Lis�ys 25.9.2007: +# Otettu k�ytt��n globaali muuttuja LOGDIFF, johon on tallennettu muutokset +# logml:ss� siirrett�ess� yksil�it� toisiin populaatioihin. +laskeMuutokset <- function(ind, globalRows, data, adjprior, priorTerm) { + npops <- size(COUNTS, 3) + muutokset <- LOGDIFF[ind, ] + + i1 <- PARTITION[ind] + i1_logml <- POP_LOGML[i1] + muutokset[i1] <- 0 + + rows <- globalRows[ind, 1]:globalRows[ind, 2] + diffInCounts <- computeDiffInCounts( + rows, size(COUNTS, 1), size(COUNTS, 2), data + ) + diffInSumCounts <- sum(diffInCounts) + + COUNTS[, , i1] <- COUNTS[, , i1] - diffInCounts + SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] - diffInSumCounts + new_i1_logml <- computePopulationLogml(i1, adjprior, priorTerm) + COUNTS[, , i1] <- COUNTS[, , i1] + diffInCounts + SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] + diffInSumCounts + + i2 <- find(muutokset == -Inf) # Etsit��n populaatiot jotka muuttuneet viime kerran j�lkeen. + i2 <- setdiff(i2, i1) + i2_logml <- POP_LOGML[i2] + + ni2 <- length(i2) + + COUNTS[, , i2] <- COUNTS[, , i2] + repmat(diffInCounts, c(1, 1, ni2)) + SUMCOUNTS[i2, ] <- SUMCOUNTS[i2, ] + repmat(diffInSumCounts, c(ni2, 1)) + new_i2_logml <- computePopulationLogml(i2, adjprior, priorTerm) + COUNTS[, , i2] <- COUNTS[, , i2] - repmat(diffInCounts, c(1, 1, ni2)) + SUMCOUNTS[i2, ] <- SUMCOUNTS[i2, ] - repmat(diffInSumCounts, c(ni2, 1)) + + muutokset[i2] <- new_i1_logml - i1_logml + new_i2_logml - i2_logml + LOGDIFF[ind, ] = muutokset + return(list(muutokset = muutokset, diffInCounts = diffInCounts)) +} + +laskeMuutokset2 <- function(i1, globalRows, data, adjprior, priorTerm) { + # % Palauttaa npops*1 taulun, jossa i:s alkio kertoo, mik� olisi + # % muutos logml:ss�, mik�li korin i1 kaikki yksil�t siirret��n + # % koriin i. + + npops <- size(COUNTS, 3) + muutokset <- zeros(npops, 1) + + i1_logml <- POP_LOGML[i1] + + inds <- find(PARTITION == i1) + ninds <- length(inds) + + if (ninds == 0) { + diffInCounts <- zeros(size(COUNTS, 1), size(COUNTS, 2)) + return() + } + + rows = list() + for (i in 1:ninds) { + ind <- inds(i) + lisa <- globalRows(ind, 1):globalRows(ind, 2) + rows <- c(rows, t(lisa)) + } + + diffInCounts <- computeDiffInCounts( + t(rows), size(COUNTS, 1), size(COUNTS, 2), data + ) + diffInSumCounts <- sum(diffInCounts) + + COUNTS[, , i1] <- COUNTS[, , i1] - diffInCounts + SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] - diffInSumCounts + new_i1_logml <- computePopulationLogml(i1, adjprior, priorTerm) + COUNTS[, , i1] <- COUNTS[, , i1] + diffInCounts + SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] + diffInSumCounts + + i2 <- c(1:i1-1, i1+1:npops) + i2_logml <- POP_LOGML[i2] + + COUNTS[, , i2] <- COUNTS[, , i2] + repmat(diffInCounts, c(1, 1, npops - 1)) + SUMCOUNTS[i2, ] <- SUMCOUNTS[i2, ] + repmat(diffInSumCounts, c(npops - 1, 1)) + new_i2_logml <- computePopulationLogml(i2, adjprior, priorTerm) + COUNTS[, , i2] <- COUNTS[, , i2] - repmat(diffInCounts, c(1, 1, npops - 1)) + SUMCOUNTS[i2, ] <- SUMCOUNTS[i2, ] - repmat(diffInSumCounts, c(npops - 1, 1)) + + muutokset[i2] <- new_i1_logml - i1_logml + new_i2_logml - i2_logml + return(list(muutokset = muutokset, diffInCounts = diffInCounts)) +} + + +laskeMuutokset3 <- function(T2, inds2, globalRows, data, adjprior, priorTerm, i1) { + # Palauttaa length(unique(T2))*npops taulun, jossa (i,j):s alkio + # kertoo, mik� olisi muutos logml:ss�, jos populaation i1 osapopulaatio + # inds2(find(T2==i)) siirret��n koriin j. + + npops <- size(COUNTS, 3) + npops2 <- length(unique(T2)) + muutokset <- zeros(npops2, npops) + + i1_logml = POP_LOGML[i1] + for (pop2 in 1:npops2) { + inds <- inds2[find(T2==pop2)] + ninds <- length(inds); + if (ninds > 0) { + rows <- list() + for (i in 1:ninds) { + ind <- inds[i] + lisa <- globalRows[ind, 1]:globalRows[ind, 2] + rows <- c(rows, t(lisa)) + } + diffInCounts <- computeDiffInCounts( + t(rows), size(COUNTS, 1), size(COUNTS, 2), data + ) + diffInSumCounts <- sum(diffInCounts) + + COUNTS[, , i1] <- COUNTS[, , i1] - diffInCounts + SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] - diffInSumCounts + new_i1_logml <- computePopulationLogml(i1, adjprior, priorTerm) + COUNTS[, , i1] <- COUNTS[, , i1] + diffInCounts + SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] + diffInSumCounts + + i2 <- c(1:i1-1, i1+1:npops) + i2_logml <- t(POP_LOGML[i2]) + + COUNTS[, , i2] <- COUNTS[, , i2] + repmat(diffInCounts, c(1, 1, npops - 1)) + SUMCOUNTS[i2, ] <- SUMCOUNTS[i2, ] + repmat(diffInSumCounts, c(npops - 1, 1)) + new_i2_logml <- t(computePopulationLogml(i2, adjprior, priorTerm)) + COUNTS[, , i2] <- COUNTS[, , i2] - repmat(diffInCounts, c(1, 1, npops - 1)) + SUMCOUNTS[i2, ] <- SUMCOUNTS[i2, ] - repmat(diffInSumCounts, c(npops - 1, 1)) + + muutokset[pop2, i2] <- new_i1_logml - i1_logml + new_i2_logml - i2_logml + } + } + return(muutokset) +} + +laskeMuutokset5 <- function(inds, globalRows, data, adjprior, priorTerm, i1, i2) { + # Palauttaa length(inds)*1 taulun, jossa i:s alkio kertoo, mik� olisi + # muutos logml:ss�, mik�li yksil� i vaihtaisi koria i1:n ja i2:n v�lill�. + + ninds <- length(inds) + muutokset <- zeros(ninds, 1) + + i1_logml <- POP_LOGML[i1] + i2_logml <- POP_LOGML[i2] + + for (i in 1:ninds) { + ind <- inds[i] + if (PARTITION[ind] == i1) { + pop1 <- i1 #mist� + pop2 <- i2 #mihin + } else { + pop1 <- i2 + pop2 <- i1 + } + rows <- globalRows[ind, 1]:globalRows[ind, 2] + diffInCounts <- computeDiffInCounts( + rows, size(COUNTS, 1), size(COUNTS, 2), data + ) + diffInSumCounts <- sum(diffInCounts) + + + + COUNTS[, , pop1] <- COUNTS[, , pop1] - diffInCounts + SUMCOUNTS[pop1, ] <- SUMCOUNTS[pop1, ] - diffInSumCounts + COUNTS[, , pop2] <- COUNTS[, , pop2] + diffInCounts + SUMCOUNTS[pop2, ] <- SUMCOUNTS[pop2, ] + diffInSumCounts + + new_logmls <- computePopulationLogml(c(i1, i2), adjprior, priorTerm) + muutokset[i] <- sum(new_logmls) + + COUNTS[, , pop1] <- COUNTS[, , pop1] + diffInCounts + SUMCOUNTS[pop1, ] <- SUMCOUNTS[pop1, ] + diffInSumCounts + COUNTS[, , pop2] <- COUNTS[, , pop2] - diffInCounts + SUMCOUNTS[pop2, ] <- SUMCOUNTS[pop2, ] - diffInSumCounts + } + + muutokset <- muutokset - i1_logml - i2_logml + return(muutokset) +} \ No newline at end of file diff --git a/R/laskeMuutokset4.R b/R/laskeMuutokset4.R deleted file mode 100644 index 56bcef3..0000000 --- a/R/laskeMuutokset4.R +++ /dev/null @@ -1,39 +0,0 @@ -#' @title Calculate changes? -#' @description Palauttaa npops*npops taulun, jonka alkio (i,j) kertoo, mik?on -#' muutos logml:ss? mikäli populaatiosta i siirretään osuuden verran -#' todennäköisyysmassaa populaatioon j. Mikäli populaatiossa i ei ole mitään -#' siirrettävää, on vastaavassa kohdassa rivi nollia. -#' @param osuus Percentages? -#' @param omaFreqs own Freqs? -#' @param osuusTaulu Percentage table? -#' @param logml log maximum likelihood -#' @param COUNTS COUNTS -#' @export -laskeMuutokset4 <- function (osuus, osuusTaulu, omaFreqs, logml, - COUNTS = matrix(0)) { - npops <- ifelse(is.na(dim(COUNTS)[3]), 1, dim(COUNTS)[3]) - notEmpty <- which(osuusTaulu > 0.005) - muutokset <- zeros(npops) - empties <- !notEmpty - - for (i1 in notEmpty) { - osuusTaulu[i1] <- osuusTaulu[i1] - osuus - for (i2 in c(colon(1, i1 - 1), colon(i1 + 1, npops))) { - osuusTaulu[i2] <- osuusTaulu[i2] + osuus - loggis <- computeIndLogml(omaFreqs, osuusTaulu) - - # Work around Matlab OOB bug - if (i1 > nrow(muutokset)) { - muutokset <- rbind(muutokset, muutokset * 0) - } - if (i2 > ncol(muutokset)) { - muutokset <- cbind(muutokset, muutokset * 0) - } - - muutokset[i1, i2] <- loggis - logml - osuusTaulu[i2] <- osuusTaulu[i2] - osuus - } - osuusTaulu[i1] <- osuusTaulu[i1] + osuus - } - return (muutokset) -} \ No newline at end of file diff --git a/R/laskeOsaDist.R b/R/laskeOsaDist.R new file mode 100644 index 0000000..018614d --- /dev/null +++ b/R/laskeOsaDist.R @@ -0,0 +1,25 @@ +#' @title Lower part of the dist +#' @description Constructs from the dist vector a subvector containing the individual inds2, Forms dist sub-vectors the vector, which includes yksiliden inds2 +#' @param inds2 inds2 +#' @param dist dist +#' @param ninds ninds +#' @author Waldir Leoncio +laskeOsaDist <- function(inds2, dist, ninds) { + # % Muodostaa dist vektorista osavektorin, joka sis�lt�� yksil�iden inds2 + # % v�liset et�isyydet. ninds=kaikkien yksil�iden lukum��r�. + + ninds2 <- length(inds2) + apu <- zeros(choose(ninds2, 2), 2) + rivi <- 1 + for (i in 1:ninds2-1) { + for (j in i+1:ninds2) { + apu[rivi, 1] <- inds2[i] + apu[rivi, 2] <- inds2[j] + rivi <- rivi + 1 + } + } + apu <- (apu[, 1]-1) * ninds - apu[, 1] / 2 * + (apu[, 1]-1) + (apu[, 2] - apu[, 1]) + dist2 <- dist(apu) + return(dist2) +} \ No newline at end of file diff --git a/R/linkage.R b/R/linkage.R index 80d6b30..ccaf9b5 100644 --- a/R/linkage.R +++ b/R/linkage.R @@ -4,10 +4,14 @@ #' linkage algorithm. The input Y is a distance matrix such as is generated by #' PDIST. Y may also be a more general dissimilarity matrix conforming to the #' output format of PDIST. -#' @param Y data +#' +#' Z = linkage(X) returns a matrix Z that encodes a tree containing hierarchical clusters of the rows of the input data matrix X. +#' @param Y matrix #' @param method either 'si', 'av', 'co' 'ce' or 'wa' +#' @note This is also a base Matlab function. The reason why the source code is also present here is unclear. #' @export linkage <- function(Y, method = 'co') { + #TODO: compare R output with MATLAB output k <- size(Y)[1] n <- size(Y)[2] m <- (1 + sqrt(1 + 8 * n)) / 2 @@ -24,18 +28,33 @@ linkage <- function(Y, method = 'co') { N[1:m] <- 1 n <- m; # since m is changing, we need to save m in n. R <- 1:n - for (s in 1:(n-1)) { - X <- Y - v <- min(X)[1] - k <- min(X)[2] + for (s in 1:(n - 1)) { + X <- as.matrix(as.vector(Y), ncol=1) + + v <- min_MATLAB(X)$mins + k <- min_MATLAB(X)$idx + i <- floor(m + 1 / 2 - sqrt(m ^ 2 - m + 1 / 4 - 2 * (k - 1))) j <- k - (i - 1) * (m - i / 2) + i - Z[s, ] <- c(R[i], R[j], v) # update one more row to the output matrix A - # Temp variables - I1 <- 1:(i - 1) - I2 <- (i + 1):(j - 1) - I3 <- (j + 1):m + Z[s, ] <- c(R[i], R[j], v) # update one more row to the output matrix A + + # Temp variables + if (i > 1) { + I1 <- 1:(i - 1) + } else { + I1 <- NULL + } + if (i + 1 <= j - 1) { + I2 <- (i + 1):(j - 1) + } else { + I2 <- NULL + } + if (j + 1 <= m) { + I3 <- (j + 1):m + } else { + I3 <- NULL + } U <- c(I1, I2, I3) I <- c( I1 * (m - (I1 + 1) / 2) - m + i, @@ -47,11 +66,13 @@ linkage <- function(Y, method = 'co') { I2 * (m - (I2 + 1) / 2) - m + j, j * (m - (j + 1) / 2) - m + I3 ) - + # Workaround in R for negative values in I and J + # I <- I[I > 0 & I <= length(Y)] + # J <- J[J > 0 & J <= length(Y)] switch(method, - 'si' = Y[I] <- min(Y[I], Y[J]), # single linkage + 'si' = Y[I] <- apply(cbind(Y[I], Y[J]), 1, min), # single linkage 'av' = Y[I] <- Y[I] + Y[J], # average linkage - 'co' = Y[I] <- max(Y[I], Y[J]), #complete linkage + 'co' = Y[I] <- apply(cbind(Y[I], Y[J]), 1, max), #complete linkage 'ce' = { K <- N[R[i]] + N[R[j]] # centroid linkage Y[I] <- (N[R[i]] * Y[I] + N[R[j]] * Y[J] - @@ -61,8 +82,7 @@ linkage <- function(Y, method = 'co') { Y[J] - N[R[U]] * v) / (N[R[i]] + N[R[j]] + N[R[U]]) ) J <- c(J, i * (m - (i + 1) / 2) - m + j) - Y[J] <- vector() # no need for the cluster information about j - + Y <- Y[-J] # no need for the cluster information about j # update m, N, R m <- m - 1 N[n + s] <- N[R[i]] + N[R[j]] diff --git a/R/lueGenePopData.R b/R/lueGenePopData.R index 2c79d03..7276537 100644 --- a/R/lueGenePopData.R +++ b/R/lueGenePopData.R @@ -4,17 +4,16 @@ #' @return list containing data and popnames #' @export lueGenePopData <- function (tiedostonNimi) { - - fid <- load(tiedostonNimi) - line1 <- readLines(fid)[1] # ensimmäinen rivi - line2 <- readLines(fid)[2] # toinen rivi + fid <- readLines(tiedostonNimi) + line <- fid[1] # ensimmäinen rivi + line <- fid[2] # toinen rivi count <- rivinSisaltamienMjonojenLkm(line) - line <- readLines(fid)[3] + line <- fid[3] lokusRiveja <- 1 while (testaaPop(line) == 0) { lokusRiveja <- lokusRiveja + 1 # locus row - line <- readLines(fid)[3 + lokusRiveja] + line <- fid[2 + lokusRiveja] } if (lokusRiveja > 1) { @@ -29,38 +28,34 @@ lueGenePopData <- function (tiedostonNimi) { ninds <- 0 poimiNimi <- 1 digitFormat <- -1 - while (line != -1) { - line <- readLines(fid)[lokusRiveja + 1] - lokusRiveja <- lokusRiveja + 1 - + while (lokusRiveja < length(fid) - 2) { + lokusRiveja <- lokusRiveja + 1 # Keeps the loop moving along + line <- fid[lokusRiveja + 2] if (poimiNimi == 1) { - # Edellinen rivi oli 'pop' + # Edellinen rivi oli 'pop' (previous line was pop) nimienLkm <- nimienLkm + 1 ninds <- ninds + 1 if (nimienLkm > size(popnames, 1)) { - popnames <- c(popnames, cell(10, 2)) + popnames <- rbind(popnames, cell(10, 2)) } nimi <- lueNimi(line) if (digitFormat == -1) { digitFormat <- selvitaDigitFormat(line) divider <- 10 ^ digitFormat } - popnames[nimienLkm, 1] <- nimi #N�in se on greedyMix:iss�kin?!? + popnames[nimienLkm, 1] <- nimi #N�in se on greedyMix:iss�kin?!? popnames[nimienLkm, 2] <- ninds poimiNimi <- 0 - data <- addAlleles(data, ninds, line, divider) - } else if (testaaPop(line)) { poimiNimi <- 1 - - } else if (line != -1) { + } else if (!is.na(line)) { ninds <- ninds + 1 data <- addAlleles(data, ninds, line, divider) } } - data <- data[1:(ninds * 2),] - popnames <- popnames[seq_len(nimienLkm),] + data <- data[1:(ninds * 2), ] + popnames <- popnames[seq_len(nimienLkm), ] return(list(data = data, popnames = popnames)) } \ No newline at end of file diff --git a/R/lueNimi.R b/R/lueNimi.R index a5bbb3f..377e791 100644 --- a/R/lueNimi.R +++ b/R/lueNimi.R @@ -1,17 +1,23 @@ #' @title Read the Name -#' @description Reads the line name +#' @description Returns the part of the line from the beginning that is before the comma. Useful for returning the name of a GenePop area #' @param line line #' @return nimi #' @export lueNimi <- function(line) { + # ========================================================================== + # Validation + # ========================================================================== + if (!grepl(",", line)) { + stop("There are no commas in this line") + } # Palauttaa line:n alusta sen osan, joka on ennen pilkkua. n <- 1 - merkki <- line[n] + merkki <- substring(line, n, n) nimi <- '' while (merkki != ',') { nimi <- c(nimi, merkki) n <- n + 1 - merkki <- line[n] + merkki <- substring(line, n, n) } - return(nimi) + return(paste(nimi, collapse="")) } \ No newline at end of file diff --git a/R/matlab2r.R b/R/matlab2r.R new file mode 100644 index 0000000..092897e --- /dev/null +++ b/R/matlab2r.R @@ -0,0 +1,160 @@ +#' @title Convert Matlab function to R +#' @description Performs basic syntax conversion from Matlab to R +#' @param filename name of the file +#' @param output can be "asis", "clean", "save" or "diff" +#' @param improve_formatting if `TRUE` (default), makes minor changes +#' to conform to best-practice formatting conventions +#' @param change_assignment if `TRUE` (default), uses `<-` as the assignment operator +#' @param append if `FALSE` (default), overwrites file; otherwise, append +#' output to input +#' @return text converted to R, printed to screen or replacing input file +#' @author Waldir Leoncio +#' @importFrom utils write.table +#' @export +#' @note This function is intended to expedite the process of converting a +#' Matlab function to R by making common replacements. It does not have the +#' immediate goal of outputting a ready-to-use function. In other words, +#' after using this function you should go back to it and make minor changes. +#' +#' It is also advised to do a dry-run with `output = "clean"` and only switching +#' to `output = "save"` when you are confident that no important code will be +#' lost (for shorter functions, a careful visual inspection should suffice). +matlab2r <- function( + filename, output = "diff", improve_formatting=TRUE, change_assignment=TRUE, + append=FALSE +) { + # TODO: this function is too long! Split into subfunctions + # (say, by rule and/or section) + # ======================================================== # + # Verification # + # ======================================================== # + if (!file.exists(filename)) stop("File not found") + + # ======================================================== # + # Reading file into R # + # ======================================================== # + txt <- readLines(filename) + original <- txt + + # ======================================================== # + # Replacing text # + # ======================================================== # + + # Uncommenting ------------------------------------------- # + txt <- gsub("^#\\s?(.+)", "\\1", txt) + + # Output variable ---------------------------------------- # + out <- gsub( + pattern = "\\t*function ((\\S|\\,\\s)+)\\s?=\\s?(\\w+)\\((.+)\\)", + replacement = "\\1", + x = txt[1] + ) # TODO: improve by detecting listed outputs + if (substring(out, 1, 1) == "[") { + out <- strsplit(out, "(\\,|\\[|\\]|\\s)")[[1]] + out <- out[which(out != "")] + out <- sapply(seq_along(out), function(x) paste(out[x], "=", out[x])) + out <- paste0("list(", paste(out, collapse=", "), ")") + } + + # Function header ---------------------------------------- # + txt <- gsub( + pattern = "\\t*function (.+)\\s*=\\s*(.+)\\((.+)\\)", + replacement = "\\2 <- function(\\3) {", + x = txt + ) + txt <- gsub( + pattern = "function (.+)\\((.+)\\)", + replacement = "\\1 <- function(\\2) {", + x = txt + ) + + # Function body ------------------------------------------ # + txt <- gsub("(.+)\\.\\.\\.", "\\1", txt) + txt <- gsub(";", "", txt) + + # Loops and if-statements + txt <- gsub("for (.+)=(.+)", "for (\\1 in \\2) {", txt) + txt <- gsub("end$", "}", txt) + txt <- gsub("if (.+)", "if (\\1) {", txt) # FIXME: paste comments after { + txt <- gsub("else$", "} else {", txt) + txt <- gsub("elseif", "} else if", txt) + txt <- gsub("while (.+)", "while \\1 {", txt) + + # MATLAB-equivalent functions in R + txt <- gsub("gamma_ln", "log_gamma", txt) + txt <- gsub("nchoosek", "choose", txt) + txt <- gsub("isempty", "is.null", txt) + # txt <- gsub("(.+)\\'", "t(\\1)", txt) + + # Subsets ------------------------------------------------ # + ass_op <- ifelse(change_assignment, "<-", "=") + txt <- gsub( + pattern = "([^\\(]+)\\(([^\\(]+)\\)=(.+)", + replacement = paste0("\\1[\\2] ", ass_op, "\\3"), + x = txt + ) + txt <- gsub("\\(:\\)", "[, ]", txt) + txt <- gsub("(.+)(\\[|\\():,end(\\]|\\()", "\\1[, ncol()]", txt) + + # Formatting --------------------------------------------- # + if (improve_formatting) { + txt <- gsub("(.),(\\S)", "\\1, \\2", txt) + # Math operators + txt <- gsub("(\\S)\\+(\\S)", "\\1 + \\2", txt) + txt <- gsub("(\\S)\\-(\\S)", "\\1 - \\2", txt) + txt <- gsub("(\\S)\\*(\\S)", "\\1 * \\2", txt) + txt <- gsub("(\\S)\\/(\\S)", "\\1 / \\2", txt) + # Logic operators + txt <- gsub("~", "!", txt) + txt <- gsub("(\\S)>=(\\S)", "\\1 >= \\2", txt) + txt <- gsub("(\\S)<=(\\S)", "\\1 <= \\2", txt) + txt <- gsub("(\\S)==(\\S)", "\\1 == \\2", txt) + # Assignment + txt <- gsub( + pattern = "(\\w)(\\s?)=(\\s?)(\\w)", + replacement = paste0("\\1 ", ass_op, " \\4"), + x = txt + ) + # txt <- gsub( + # pattern = "(\\s+(.|\\_|\\[|\\])+)(\\s?)=(\\s?)(.+)", + # replacement = paste0("\\1 ", ass_op, "\\5"), + # x = txt + # ) + txt <- gsub("%(\\s?)(\\w)", "# \\2", txt) + } + + # Adding output and end-of-file brace -------------------- # + txt <- append(txt, paste0("\treturn(", out, ")\n}")) + + # Returning converted code ------------------------------- # + if (output == "asis") { + return(txt) + } else if (output == "clean") { + return(cat(txt, sep="\n")) + } else if (output == "save") { + return( + write.table( + x = txt, + file = filename, + quote = FALSE, + row.names = FALSE, + col.names = FALSE, + append = append + ) + ) + } else if (output == "diff") { + diff_text <- vector(mode="character", length=(2 * length(original) + 1)) + for (i in seq_along(txt)) { + new_i <- (2 * i) + i - 2 + diff_text[new_i] <- paste( + "-----------------------", "line", i, "-----------------------" + ) + diff_text[new_i + 1] <- original[i] + diff_text[new_i + 2] <- txt[i] + } + message("Displaying line number, original content and modified content") + return(cat(diff_text, sep="\n")) + } else { + stop ("Invalid output argument") + } +} diff --git a/R/min_max_MATLAB.R b/R/min_max_MATLAB.R new file mode 100644 index 0000000..7ad9d0e --- /dev/null +++ b/R/min_max_MATLAB.R @@ -0,0 +1,31 @@ +#' @title Minimum (MATLAB version) +#' @description Finds the minimum value for each column of a matrix, potentially returning the indices instead +#' @param X matrix +#' @param indices return indices? +#' @return Either a list or a vector +#' @author Waldir Leoncio +min_MATLAB <- function(X, indices = TRUE) { + mins <- apply(X, 2, min) + idx <- sapply(seq_len(ncol(X)), function(x) match(mins[x], X[, x])) + if (indices) { + return(list(mins = mins, idx = idx)) + } else { + return(mins) + } +} + +#' @title Maximum (MATLAB version) +#' @description Finds the minimum value for each column of a matrix, potentially returning the indices instead +#' @param X matrix +#' @param indices return indices? +#' @return Either a list or a vector +#' @author Waldir Leoncio +max_MATLAB <- function(X, indices = TRUE) { + maxs <- apply(X, 2, max) + idx <- sapply(seq_len(ncol(X)), function(x) match(maxs[x], X[, x])) + if (indices) { + return(list(maxs = maxs, idx = idx)) + } else { + return(maxs) + } +} \ No newline at end of file diff --git a/R/nargin.R b/R/nargin.R new file mode 100644 index 0000000..6ff20cc --- /dev/null +++ b/R/nargin.R @@ -0,0 +1,10 @@ +#' @title Number of function input arguments +#' @description Returns the number of arguments passed to the parent function +#' @return An integer +#' @author Waldir Leoncio +#' @note This function only makes sense inside another function +#' @references https://stackoverflow.com/q/64422780/1169233 +nargin <- function() { + if(sys.nframe() < 2) stop("must be called from inside a function") + length(as.list(sys.call(-1))) - 1 +} diff --git a/R/newGetDistances.R b/R/newGetDistances.R index 896b1a3..b386afa 100644 --- a/R/newGetDistances.R +++ b/R/newGetDistances.R @@ -5,14 +5,14 @@ newGetDistances <- function(data, rowsFromInd) { empties <- find(data < 0) data[empties] <- 0 - data <- as.integer(data) # max(noalle) oltava <256 + data <- apply(data, 2, as.numeric) # max(noalle) oltava <256 pariTaulu <- zeros(riviLkm, 2) aPointer <- 1 - for (a in (1:ninds) - 1) { - pariTaulu[aPointer:(aPointer + ninds - 1 - a), 1] <- - ones(ninds - a, 1) * a - pariTaulu[aPointer:aPointer + ninds - 1 - a, 2] <- t(a + 1:ninds) + for (a in 1:(ninds - 1)) { + pariTaulu_rows <- aPointer:(aPointer + ninds - 1 - a) + pariTaulu[pariTaulu_rows, 1] <- ones(ninds - a, 1) * a + pariTaulu[pariTaulu_rows, 2] <- t((a + 1):ninds) aPointer <- aPointer + ninds - a } @@ -31,9 +31,9 @@ newGetDistances <- function(data, rowsFromInd) { rm(pariTaulu, miinus) x <- zeros(size(eka)) - x <- as.integer(x) + x <- apply(x, 2, as.integer) y <- zeros(size(toka)) - y <- as.integer(y) + y <- apply(y, 2, as.integer) for (j in 1:nloci) { for (k in 1:rowsFromInd) { @@ -57,7 +57,6 @@ newGetDistances <- function(data, rowsFromInd) { muut <- find(vertailuja > 0) dist[muut] <- summa[muut] / vertailuja[muut] rm(summa, vertailuja) - - Z = linkage(t(dist)) + Z <- linkage(t(dist)) return(list(Z = Z, dist = dist)) } \ No newline at end of file diff --git a/R/poistaLiianPienet.R b/R/poistaLiianPienet.R index 4ff581d..19eabea 100644 --- a/R/poistaLiianPienet.R +++ b/R/poistaLiianPienet.R @@ -41,7 +41,7 @@ poistaLiianPienet <- function (npops, rowsFromInd, alaraja, PARTITION[yksilot] == n } - # TODO: add COUNTS, SUMCOUNTS and PARTITION to return or use <<- + # TODO: add COUNTS, SUMCOUNTS and PARTITION to return or use <- COUNTS[, , miniPops] <- NA SUMCOUNTS[miniPops, ] <- NA diff --git a/R/poistaTyhjatPopulaatiot.R b/R/poistaTyhjatPopulaatiot.R new file mode 100644 index 0000000..badef3e --- /dev/null +++ b/R/poistaTyhjatPopulaatiot.R @@ -0,0 +1,15 @@ +poistaTyhjatPopulaatiot <- function(npops) { + # % Poistaa tyhjentyneet populaatiot COUNTS:ista ja + # % SUMCOUNTS:ista. P�ivitt�� npops:in ja PARTITION:in. + notEmpty <- find(any(SUMCOUNTS, 2)) + COUNTS <- COUNTS[, , notEmpty] + SUMCOUNTS <- SUMCOUNTS[notEmpty, ] + LOGDIFF <- LOGDIFF[, notEmpty] + + for (n in 1:length(notEmpty)) { + apu <- find(PARTITION == notEmpty(n)) + PARTITION[apu] <- n + } + npops <- length(notEmpty) + return(npops) +} \ No newline at end of file diff --git a/R/rand_disc.R b/R/rand_disc.R new file mode 100644 index 0000000..e75f1c4 --- /dev/null +++ b/R/rand_disc.R @@ -0,0 +1,7 @@ +rand_disc <- function(CDF) { + # %returns an index of a value from a discrete distribution using inversion method + slump <- rand + har <- find(CDF > slump) + svar <- har(1) + return(svar) +} diff --git a/R/returnInOrder.R b/R/returnInOrder.R new file mode 100644 index 0000000..c6a48d8 --- /dev/null +++ b/R/returnInOrder.R @@ -0,0 +1,26 @@ +returnInOrder <- function(inds, pop, globalRows, data, adjprior, priorTerm) { + # % Palauttaa yksil�t j�rjestyksess� siten, ett� ensimm�isen� on + # % se, jonka poistaminen populaatiosta pop nostaisi logml:n + # % arvoa eniten. + + ninds <- length(inds) + apuTaulu <- c(inds, zeros(ninds, 1)) + + for (i in 1:ninds) { + ind <- inds[i] + rows <- globalRows[i, 1]:globalRows[i, 2] + diffInCounts <- computeDiffInCounts( + rows, size[COUNTS, 1], size[COUNTS, 2], data + ) + diffInSumCounts <- sum(diffInCounts) + + COUNTS[ , ,pop] <- COUNTS[ , ,pop] - diffInCounts + SUMCOUNTS[pop, ] <- SUMCOUNTS[pop, ] - diffInSumCounts + apuTaulu[i, 2] <- computePopulationLogml(pop, adjprior, priorTerm) + COUNTS[ , ,pop] <- COUNTS[ , ,pop] + diffInCounts + SUMCOUNTS[pop, ] <- SUMCOUNTS[pop, ] + diffInSumCounts + } + apuTaulu <- sortrows(apuTaulu, 2) + inds <- apuTaulu[ninds:1, 1] + return(inds) +} \ No newline at end of file diff --git a/R/selvitaDigitFormat.R b/R/selvitaDigitFormat.R index 4492ea8..32af592 100644 --- a/R/selvitaDigitFormat.R +++ b/R/selvitaDigitFormat.R @@ -7,23 +7,22 @@ selvitaDigitFormat <- function(line) { # Genepop-formaatissa olevasta datasta. funktio selvitt�� # rivin muodon perusteella, ovatko datan alleelit annettu # 2 vai 3 numeron avulla. - n <- 1 - merkki <- line[n] + merkki <- substring(line, n, n) while (merkki != ',') { n <- n + 1 - merkki <- line[n] + merkki <- substring(line, n, n) } - while (!any(merkki == '0123456789')) { + while (!any(merkki %in% as.character(0:9))) { n <- n + 1 - merkki <- line[n] + merkki <- substring(line, n, n) } numeroja <- 0 - while (any(merkki == '0123456789')) { + while (any(merkki %in% as.character(0:9))) { numeroja <- numeroja + 1 n <- n + 1 - merkki <- line[n] + merkki <- substring(line, n, n) } df <- numeroja / 2 diff --git a/R/setdiff_MATLAB.R b/R/setdiff_MATLAB.R new file mode 100644 index 0000000..3c2324b --- /dev/null +++ b/R/setdiff_MATLAB.R @@ -0,0 +1,16 @@ +#' @title Set differences of two arrays +#' @description Loosely replicates the behavior of the homonym Matlab function +#' @param A first array +#' @param B second array +#' @param legacy if `TRUE`, preserves the behavior of the setdiff function from MATLAB R2012b and prior releases. (currently not supported) +#' @author Waldir Leoncio +setdiff_MATLAB <- function(A, B, legacy = FALSE) { + if (legacy) message("legacy=TRUE not supported. Ignoring.") + if (is(A, "numeric") & is(B, "numeric")) { + values <- sort(unique(A[is.na(match(A, B))])) + } else if (is(A, "data.frame") & is(B, "data.frame")) { + stop("Not implemented for data frames") + } + # TODO: add support for indices (if necessary) + return(values) +} \ No newline at end of file diff --git a/R/updateGlobalVariables.R b/R/updateGlobalVariables.R new file mode 100644 index 0000000..1602faf --- /dev/null +++ b/R/updateGlobalVariables.R @@ -0,0 +1,64 @@ +updateGlobalVariables <- function(ind, i2, diffInCounts, adjprior, priorTerm) { + # % Suorittaa globaalien muuttujien muutokset, kun yksil� ind + # % on siirret��n koriin i2. + + i1 <- PARTITION[ind] + PARTITION[ind] <- i2 + + COUNTS[, , i1] <- COUNTS[, , i1] - diffInCounts + COUNTS[, , i2] <- COUNTS[, , i2] + diffInCounts + SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] - sum[diffInCounts] + SUMCOUNTS[i2, ] <- SUMCOUNTS[i2, ] + sum[diffInCounts] + + POP_LOGML[c(i1, i2)] <- computePopulationLogml( + c(i1, i2), adjprior, priorTerm + ) + + LOGDIFF[, c(i1, i2)] <- -Inf + inx <- c(find(PARTITION == i1), find(PARTITION==i2)) + LOGDIFF[inx, ] <- -Inf +} + +updateGlobalVariables2 <- function(i1, i2, diffInCounts, adjprior, priorTerm) { + # % Suorittaa globaalien muuttujien muutokset, kun kaikki + # % korissa i1 olevat yksil�t siirret��n koriin i2. + + inds <- find(PARTITION == i1) + PARTITION[inds] <- i2 + + COUNTS[, , i1] <- COUNTS[, , i1] - diffInCounts + COUNTS[, , i2] <- COUNTS[, , i2] + diffInCounts + SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] - sum[diffInCounts] + SUMCOUNTS[i2, ] <- SUMCOUNTS[i2, ] + sum[diffInCounts] + + POP_LOGML[i1] <- 0 + POP_LOGML[i2] <- computePopulationLogml(i2, adjprior, priorTerm) + + LOGDIFF[, c(i1, i2)] <- -Inf + inx <- c(find(PARTITION == i1), find(PARTITION == i2)) + LOGDIFF[inx, ] <- -Inf +} + +updateGlobalVariables3 <- function( + muuttuvat, diffInCounts, adjprior, priorTerm, i2 +) { + # % Suorittaa globaalien muuttujien p�ivitykset, kun yksil�t 'muuttuvat' + # % siirret��n koriin i2. Ennen siirtoa yksil�iden on kuuluttava samaan + # % koriin. + + i1 <- PARTITION[muuttuvat(1)] + PARTITION[muuttuvat] <- i2 + + COUNTS[, , i1] <- COUNTS[, , i1] - diffInCounts + COUNTS[, , i2] <- COUNTS[, , i2] + diffInCounts + SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] - sum[diffInCounts] + SUMCOUNTS[i2, ] <- SUMCOUNTS[i2, ] + sum[diffInCounts] + + POP_LOGML[c(i1, i2)] <- computePopulationLogml( + c(i1, i2), adjprior, priorTerm + ) + + LOGDIFF[, c(i1, i2)] <- -Inf + inx <- c(find(PARTITION == i1), find(PARTITION == i2)) + LOGDIFF[inx, ] <- -Inf +} diff --git a/R/writeMixtureInfo.R b/R/writeMixtureInfo.R index ca8efea..ec5120b 100644 --- a/R/writeMixtureInfo.R +++ b/R/writeMixtureInfo.R @@ -10,16 +10,10 @@ #' @param partitionSummary partitionSummary #' @param popnames popnames #' @param fixedK fixedK -#' @param PARTITION PARTITION -#' @param COUNTS COUNTS -#' @param SUMCOUNTS SUMCOUNTS -#' @param LOGDIFF LOGDIFF #' @export writeMixtureInfo <- function( - logml, rowsFromInd, data, adjprior, priorTerm, outPutFile, inputFile, partitionSummary, popnames, fixedK, PARTITION, COUNTS, SUMCOUNTS, - LOGDIFF + logml, rowsFromInd, data, adjprior, priorTerm, outPutFile, inputFile, partitionSummary, popnames, fixedK ) { - changesInLogml <- list() ninds <- size(data, 1) / rowsFromInd npops <- size(COUNTS, 3) @@ -30,7 +24,6 @@ writeMixtureInfo <- function( fid <- load(outPutFile) } else { fid <- -1 - message('Diverting output to baps4_output.baps') # TODO: replace sink with option that will record input and output sink('baps4_output.baps', split=TRUE) # save in text anyway. } diff --git a/data/ExamplesDataFormatting/Example MLST DNA sequence data in concatenated Excel format.xls b/inst/ext/ExamplesDataFormatting/Example MLST DNA sequence data in concatenated Excel format.xls similarity index 100% rename from data/ExamplesDataFormatting/Example MLST DNA sequence data in concatenated Excel format.xls rename to inst/ext/ExamplesDataFormatting/Example MLST DNA sequence data in concatenated Excel format.xls diff --git a/data/ExamplesDataFormatting/Example baseline data in GENEPOP format for Trained clustering.txt b/inst/ext/ExamplesDataFormatting/Example baseline data in GENEPOP format for Trained clustering.txt similarity index 100% rename from data/ExamplesDataFormatting/Example baseline data in GENEPOP format for Trained clustering.txt rename to inst/ext/ExamplesDataFormatting/Example baseline data in GENEPOP format for Trained clustering.txt diff --git a/data/ExamplesDataFormatting/Example coordinates for the spatial clustering of DNA sequences.txt b/inst/ext/ExamplesDataFormatting/Example coordinates for the spatial clustering of DNA sequences.txt similarity index 100% rename from data/ExamplesDataFormatting/Example coordinates for the spatial clustering of DNA sequences.txt rename to inst/ext/ExamplesDataFormatting/Example coordinates for the spatial clustering of DNA sequences.txt diff --git a/data/ExamplesDataFormatting/Example coordinates for the spatial clustering.txt b/inst/ext/ExamplesDataFormatting/Example coordinates for the spatial clustering.txt similarity index 100% rename from data/ExamplesDataFormatting/Example coordinates for the spatial clustering.txt rename to inst/ext/ExamplesDataFormatting/Example coordinates for the spatial clustering.txt diff --git a/data/ExamplesDataFormatting/Example data in BAPS format for clustering of diploid individuals.txt b/inst/ext/ExamplesDataFormatting/Example data in BAPS format for clustering of diploid individuals.txt similarity index 100% rename from data/ExamplesDataFormatting/Example data in BAPS format for clustering of diploid individuals.txt rename to inst/ext/ExamplesDataFormatting/Example data in BAPS format for clustering of diploid individuals.txt diff --git a/data/ExamplesDataFormatting/Example data in BAPS format for clustering of haploid individuals.txt b/inst/ext/ExamplesDataFormatting/Example data in BAPS format for clustering of haploid individuals.txt similarity index 100% rename from data/ExamplesDataFormatting/Example data in BAPS format for clustering of haploid individuals.txt rename to inst/ext/ExamplesDataFormatting/Example data in BAPS format for clustering of haploid individuals.txt diff --git a/data/ExamplesDataFormatting/Example data in BAPS format for clustering of tetraploid individuals.txt b/inst/ext/ExamplesDataFormatting/Example data in BAPS format for clustering of tetraploid individuals.txt similarity index 100% rename from data/ExamplesDataFormatting/Example data in BAPS format for clustering of tetraploid individuals.txt rename to inst/ext/ExamplesDataFormatting/Example data in BAPS format for clustering of tetraploid individuals.txt diff --git a/data/ExamplesDataFormatting/Example data in BAPS format for group-wise clustering of diploid individuals.txt b/inst/ext/ExamplesDataFormatting/Example data in BAPS format for group-wise clustering of diploid individuals.txt similarity index 100% rename from data/ExamplesDataFormatting/Example data in BAPS format for group-wise clustering of diploid individuals.txt rename to inst/ext/ExamplesDataFormatting/Example data in BAPS format for group-wise clustering of diploid individuals.txt diff --git a/data/ExamplesDataFormatting/Example data in BAPS format for group-wise clustering of haploid individuals.txt b/inst/ext/ExamplesDataFormatting/Example data in BAPS format for group-wise clustering of haploid individuals.txt similarity index 100% rename from data/ExamplesDataFormatting/Example data in BAPS format for group-wise clustering of haploid individuals.txt rename to inst/ext/ExamplesDataFormatting/Example data in BAPS format for group-wise clustering of haploid individuals.txt diff --git a/data/ExamplesDataFormatting/Example data in BAPS format for group-wise clustering of tetraploid individuals.txt b/inst/ext/ExamplesDataFormatting/Example data in BAPS format for group-wise clustering of tetraploid individuals.txt similarity index 100% rename from data/ExamplesDataFormatting/Example data in BAPS format for group-wise clustering of tetraploid individuals.txt rename to inst/ext/ExamplesDataFormatting/Example data in BAPS format for group-wise clustering of tetraploid individuals.txt diff --git a/data/ExamplesDataFormatting/Example data in FASTA format for clustering of haploid individuals.fasta b/inst/ext/ExamplesDataFormatting/Example data in FASTA format for clustering of haploid individuals.fasta similarity index 100% rename from data/ExamplesDataFormatting/Example data in FASTA format for clustering of haploid individuals.fasta rename to inst/ext/ExamplesDataFormatting/Example data in FASTA format for clustering of haploid individuals.fasta diff --git a/data/ExamplesDataFormatting/Example of a BAPS formatted diploid sequence file for clustering with linked loci.txt b/inst/ext/ExamplesDataFormatting/Example of a BAPS formatted diploid sequence file for clustering with linked loci.txt similarity index 100% rename from data/ExamplesDataFormatting/Example of a BAPS formatted diploid sequence file for clustering with linked loci.txt rename to inst/ext/ExamplesDataFormatting/Example of a BAPS formatted diploid sequence file for clustering with linked loci.txt diff --git a/data/ExamplesDataFormatting/Example of a BAPS formatted haploid sequence file for clustering with linked loci.txt b/inst/ext/ExamplesDataFormatting/Example of a BAPS formatted haploid sequence file for clustering with linked loci.txt similarity index 100% rename from data/ExamplesDataFormatting/Example of a BAPS formatted haploid sequence file for clustering with linked loci.txt rename to inst/ext/ExamplesDataFormatting/Example of a BAPS formatted haploid sequence file for clustering with linked loci.txt diff --git a/data/ExamplesDataFormatting/Example of a BAPS formatted tetraploid sequence file for clustering with linked loci.txt b/inst/ext/ExamplesDataFormatting/Example of a BAPS formatted tetraploid sequence file for clustering with linked loci.txt similarity index 100% rename from data/ExamplesDataFormatting/Example of a BAPS formatted tetraploid sequence file for clustering with linked loci.txt rename to inst/ext/ExamplesDataFormatting/Example of a BAPS formatted tetraploid sequence file for clustering with linked loci.txt diff --git a/data/ExamplesDataFormatting/Example of a fasta formatted sequence file for gene recA.txt b/inst/ext/ExamplesDataFormatting/Example of a fasta formatted sequence file for gene recA.txt similarity index 100% rename from data/ExamplesDataFormatting/Example of a fasta formatted sequence file for gene recA.txt rename to inst/ext/ExamplesDataFormatting/Example of a fasta formatted sequence file for gene recA.txt diff --git a/data/ExamplesDataFormatting/Example of a file specifying gene boundaries for a concatenated sequence in BAPS format.txt b/inst/ext/ExamplesDataFormatting/Example of a file specifying gene boundaries for a concatenated sequence in BAPS format.txt similarity index 100% rename from data/ExamplesDataFormatting/Example of a file specifying gene boundaries for a concatenated sequence in BAPS format.txt rename to inst/ext/ExamplesDataFormatting/Example of a file specifying gene boundaries for a concatenated sequence in BAPS format.txt diff --git a/data/ExamplesDataFormatting/Example of a partition file for admixture analysis based on user-specified populations.txt b/inst/ext/ExamplesDataFormatting/Example of a partition file for admixture analysis based on user-specified populations.txt similarity index 100% rename from data/ExamplesDataFormatting/Example of a partition file for admixture analysis based on user-specified populations.txt rename to inst/ext/ExamplesDataFormatting/Example of a partition file for admixture analysis based on user-specified populations.txt diff --git a/data/ExamplesDataFormatting/Example of an MLST profile file for 6 genes.txt b/inst/ext/ExamplesDataFormatting/Example of an MLST profile file for 6 genes.txt similarity index 100% rename from data/ExamplesDataFormatting/Example of an MLST profile file for 6 genes.txt rename to inst/ext/ExamplesDataFormatting/Example of an MLST profile file for 6 genes.txt diff --git a/data/ExamplesDataFormatting/Example of hierBAPS input in FASTA format.fa b/inst/ext/ExamplesDataFormatting/Example of hierBAPS input in FASTA format.fa similarity index 100% rename from data/ExamplesDataFormatting/Example of hierBAPS input in FASTA format.fa rename to inst/ext/ExamplesDataFormatting/Example of hierBAPS input in FASTA format.fa diff --git a/data/ExamplesDataFormatting/Example of hierBAPS input in XLS format.xls b/inst/ext/ExamplesDataFormatting/Example of hierBAPS input in XLS format.xls similarity index 100% rename from data/ExamplesDataFormatting/Example of hierBAPS input in XLS format.xls rename to inst/ext/ExamplesDataFormatting/Example of hierBAPS input in XLS format.xls diff --git a/data/ExamplesDataFormatting/Example of sample population index file to be used with BAPS formatted data.txt b/inst/ext/ExamplesDataFormatting/Example of sample population index file to be used with BAPS formatted data.txt similarity index 100% rename from data/ExamplesDataFormatting/Example of sample population index file to be used with BAPS formatted data.txt rename to inst/ext/ExamplesDataFormatting/Example of sample population index file to be used with BAPS formatted data.txt diff --git a/data/ExamplesDataFormatting/Example of sample population names file to be used with BAPS formatted data.txt b/inst/ext/ExamplesDataFormatting/Example of sample population names file to be used with BAPS formatted data.txt similarity index 100% rename from data/ExamplesDataFormatting/Example of sample population names file to be used with BAPS formatted data.txt rename to inst/ext/ExamplesDataFormatting/Example of sample population names file to be used with BAPS formatted data.txt diff --git a/data/ExamplesDataFormatting/Example pre-grouped sample data in GENEPOP format for Trained clustering.txt b/inst/ext/ExamplesDataFormatting/Example pre-grouped sample data in GENEPOP format for Trained clustering.txt similarity index 100% rename from data/ExamplesDataFormatting/Example pre-grouped sample data in GENEPOP format for Trained clustering.txt rename to inst/ext/ExamplesDataFormatting/Example pre-grouped sample data in GENEPOP format for Trained clustering.txt diff --git a/data/ExamplesDataFormatting/Example sample data in GENEPOP format for Trained clustering.txt b/inst/ext/ExamplesDataFormatting/Example sample data in GENEPOP format for Trained clustering.txt similarity index 100% rename from data/ExamplesDataFormatting/Example sample data in GENEPOP format for Trained clustering.txt rename to inst/ext/ExamplesDataFormatting/Example sample data in GENEPOP format for Trained clustering.txt diff --git a/data/ExamplesDataFormatting/Example test data in XLS format for Trained clustering.xls b/inst/ext/ExamplesDataFormatting/Example test data in XLS format for Trained clustering.xls similarity index 100% rename from data/ExamplesDataFormatting/Example test data in XLS format for Trained clustering.xls rename to inst/ext/ExamplesDataFormatting/Example test data in XLS format for Trained clustering.xls diff --git a/data/ExamplesDataFormatting/Example training data in XLS format for Trained clustering.xls b/inst/ext/ExamplesDataFormatting/Example training data in XLS format for Trained clustering.xls similarity index 100% rename from data/ExamplesDataFormatting/Example training data in XLS format for Trained clustering.xls rename to inst/ext/ExamplesDataFormatting/Example training data in XLS format for Trained clustering.xls diff --git a/data/ExamplesDataFormatting/Notes.md b/inst/ext/ExamplesDataFormatting/Notes.md similarity index 100% rename from data/ExamplesDataFormatting/Notes.md rename to inst/ext/ExamplesDataFormatting/Notes.md diff --git a/man/admixture_initialization.Rd b/man/admixture_initialization.Rd new file mode 100644 index 0000000..c10e4f8 --- /dev/null +++ b/man/admixture_initialization.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/admixture_initialization.R +\name{admixture_initialization} +\alias{admixture_initialization} +\title{Seuraavat kolme funktiota liittyvat alkupartition muodostamiseen.} +\usage{ +admixture_initialization(data_matrix, nclusters, Z) +} +\arguments{ +\item{data_matrix}{data_matrix} + +\item{nclusters}{ncluster} + +\item{Z}{Z} +} +\description{ +Seuraavat kolme funktiota liittyvat alkupartition muodostamiseen. +} diff --git a/man/fgetl.Rd b/man/fgetl.Rd new file mode 100644 index 0000000..be0116b --- /dev/null +++ b/man/fgetl.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/fgetl-fopen.R +\name{fgetl} +\alias{fgetl} +\title{Read line from file, removing newline characters} +\usage{ +fgetl(file) +} +\arguments{ +\item{file}{character vector to be read, usually an output of `fopen()`} +} +\value{ +If the file is nonempty, then fgetl returns tline as a character vector. If the file is empty and contains only the end-of-file marker, then fgetl returns tline as a numeric value -1. +} +\description{ +Equivalent function to its homonymous Matlab equivalent. +} +\seealso{ +fopen +} +\author{ +Waldir Leoncio +} diff --git a/man/fopen.Rd b/man/fopen.Rd new file mode 100644 index 0000000..e1806eb --- /dev/null +++ b/man/fopen.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/fgetl-fopen.R +\name{fopen} +\alias{fopen} +\title{Open file} +\usage{ +fopen(filename) +} +\arguments{ +\item{filename}{Path and name of file to be open} +} +\value{ +The same as `readLines(filename)` +} +\description{ +Open a text file +} +\seealso{ +fgetl +} +\author{ +Waldir Leoncio +} diff --git a/man/laskeMuutokset4.Rd b/man/laskeMuutokset4.Rd index 7c16a19..e3382de 100644 --- a/man/laskeMuutokset4.Rd +++ b/man/laskeMuutokset4.Rd @@ -1,8 +1,8 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/laskeMuutokset4.R +% Please edit documentation in R/laskeMuutokset12345.R \name{laskeMuutokset4} \alias{laskeMuutokset4} -\title{Calculate changes?} +\title{Calculate changes (?)} \usage{ laskeMuutokset4(osuus, osuusTaulu, omaFreqs, logml, COUNTS = matrix(0)) } @@ -20,6 +20,6 @@ laskeMuutokset4(osuus, osuusTaulu, omaFreqs, logml, COUNTS = matrix(0)) \description{ Palauttaa npops*npops taulun, jonka alkio (i,j) kertoo, mik?on muutos logml:ss? mikäli populaatiosta i siirretään osuuden verran -todennäköisyysmassaa populaatioon j. Mikäli populaatiossa i ei ole mitään +todennäköisyysmassaa populaatioon j. Mikäli populaatiossa i ei ole mitään siirrettävää, on vastaavassa kohdassa rivi nollia. } diff --git a/man/laskeOsaDist.Rd b/man/laskeOsaDist.Rd new file mode 100644 index 0000000..f577c89 --- /dev/null +++ b/man/laskeOsaDist.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/laskeOsaDist.R +\name{laskeOsaDist} +\alias{laskeOsaDist} +\title{Lower part of the dist} +\usage{ +laskeOsaDist(inds2, dist, ninds) +} +\arguments{ +\item{inds2}{inds2} + +\item{dist}{dist} + +\item{ninds}{ninds} +} +\description{ +Constructs from the dist vector a subvector containing the individual inds2, Forms dist sub-vectors the vector, which includes yksiliden inds2 +} +\author{ +Waldir Leoncio +} diff --git a/man/linkage.Rd b/man/linkage.Rd index 4f75835..5014a8b 100644 --- a/man/linkage.Rd +++ b/man/linkage.Rd @@ -7,7 +7,7 @@ linkage(Y, method = "co") } \arguments{ -\item{Y}{data} +\item{Y}{matrix} \item{method}{either 'si', 'av', 'co' 'ce' or 'wa'} } @@ -19,4 +19,9 @@ Z = LINKAGE(Y) creates a hierarchical cluster tree, using the single linkage algorithm. The input Y is a distance matrix such as is generated by PDIST. Y may also be a more general dissimilarity matrix conforming to the output format of PDIST. + +Z = linkage(X) returns a matrix Z that encodes a tree containing hierarchical clusters of the rows of the input data matrix X. +} +\note{ +This is also a base Matlab function. The reason why the source code is also present here is unclear. } diff --git a/man/lueNimi.Rd b/man/lueNimi.Rd index 74cd665..38b9940 100644 --- a/man/lueNimi.Rd +++ b/man/lueNimi.Rd @@ -13,5 +13,5 @@ lueNimi(line) nimi } \description{ -Reads the line name +Returns the part of the line from the beginning that is before the comma. Useful for returning the name of a GenePop area } diff --git a/man/matlab2r.Rd b/man/matlab2r.Rd new file mode 100644 index 0000000..e809709 --- /dev/null +++ b/man/matlab2r.Rd @@ -0,0 +1,46 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/matlab2r.R +\name{matlab2r} +\alias{matlab2r} +\title{Convert Matlab function to R} +\usage{ +matlab2r( + filename, + output = "diff", + improve_formatting = TRUE, + change_assignment = TRUE, + append = FALSE +) +} +\arguments{ +\item{filename}{name of the file} + +\item{output}{can be "asis", "clean", "save" or "diff"} + +\item{improve_formatting}{if `TRUE` (default), makes minor changes +to conform to best-practice formatting conventions} + +\item{change_assignment}{if `TRUE` (default), uses `<-` as the assignment operator} + +\item{append}{if `FALSE` (default), overwrites file; otherwise, append +output to input} +} +\value{ +text converted to R, printed to screen or replacing input file +} +\description{ +Performs basic syntax conversion from Matlab to R +} +\note{ +This function is intended to expedite the process of converting a +Matlab function to R by making common replacements. It does not have the +immediate goal of outputting a ready-to-use function. In other words, +after using this function you should go back to it and make minor changes. + +It is also advised to do a dry-run with `output = "clean"` and only switching +to `output = "save"` when you are confident that no important code will be +lost (for shorter functions, a careful visual inspection should suffice). +} +\author{ +Waldir Leoncio +} diff --git a/man/max_MATLAB.Rd b/man/max_MATLAB.Rd new file mode 100644 index 0000000..d7fbdf7 --- /dev/null +++ b/man/max_MATLAB.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/min_max_MATLAB.R +\name{max_MATLAB} +\alias{max_MATLAB} +\title{Maximum (MATLAB version)} +\usage{ +max_MATLAB(X, indices = TRUE) +} +\arguments{ +\item{X}{matrix} + +\item{indices}{return indices?} +} +\value{ +Either a list or a vector +} +\description{ +Finds the minimum value for each column of a matrix, potentially returning the indices instead +} +\author{ +Waldir Leoncio +} diff --git a/man/min_MATLAB.Rd b/man/min_MATLAB.Rd new file mode 100644 index 0000000..9825e90 --- /dev/null +++ b/man/min_MATLAB.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/min_max_MATLAB.R +\name{min_MATLAB} +\alias{min_MATLAB} +\title{Minimum (MATLAB version)} +\usage{ +min_MATLAB(X, indices = TRUE) +} +\arguments{ +\item{X}{matrix} + +\item{indices}{return indices?} +} +\value{ +Either a list or a vector +} +\description{ +Finds the minimum value for each column of a matrix, potentially returning the indices instead +} +\author{ +Waldir Leoncio +} diff --git a/man/nargin.Rd b/man/nargin.Rd new file mode 100644 index 0000000..a7922a6 --- /dev/null +++ b/man/nargin.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/nargin.R +\name{nargin} +\alias{nargin} +\title{Number of function input arguments} +\usage{ +nargin() +} +\value{ +An integer +} +\description{ +Returns the number of arguments passed to the parent function +} +\note{ +This function only makes sense inside another function +} +\references{ +https://stackoverflow.com/q/64422780/1169233 +} +\author{ +Waldir Leoncio +} diff --git a/man/setdiff_MATLAB.Rd b/man/setdiff_MATLAB.Rd new file mode 100644 index 0000000..dcc6c00 --- /dev/null +++ b/man/setdiff_MATLAB.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/setdiff_MATLAB.R +\name{setdiff_MATLAB} +\alias{setdiff_MATLAB} +\title{Set differences of two arrays} +\usage{ +setdiff_MATLAB(A, B, legacy = FALSE) +} +\arguments{ +\item{A}{first array} + +\item{B}{second array} + +\item{legacy}{if `TRUE`, preserves the behavior of the setdiff function from MATLAB R2012b and prior releases. (currently not supported)} +} +\description{ +Loosely replicates the behavior of the homonym Matlab function +} +\author{ +Waldir Leoncio +} diff --git a/man/writeMixtureInfo.Rd b/man/writeMixtureInfo.Rd index e973db9..f30bde0 100644 --- a/man/writeMixtureInfo.Rd +++ b/man/writeMixtureInfo.Rd @@ -14,11 +14,7 @@ writeMixtureInfo( inputFile, partitionSummary, popnames, - fixedK, - PARTITION, - COUNTS, - SUMCOUNTS, - LOGDIFF + fixedK ) } \arguments{ @@ -41,14 +37,6 @@ writeMixtureInfo( \item{popnames}{popnames} \item{fixedK}{fixedK} - -\item{PARTITION}{PARTITION} - -\item{COUNTS}{COUNTS} - -\item{SUMCOUNTS}{SUMCOUNTS} - -\item{LOGDIFF}{LOGDIFF} } \description{ Writes information about the mixture diff --git a/tests/testthat/test-convertedBaseFunctions.R b/tests/testthat/test-convertedBaseFunctions.R index bfc3fe8..52d14f8 100644 --- a/tests/testthat/test-convertedBaseFunctions.R +++ b/tests/testthat/test-convertedBaseFunctions.R @@ -158,18 +158,18 @@ test_that("find works as expected", { }) test_that("sortrows works as expected", { - mx <- matrix(c(3, 2, 2, 1, 1, 10, 0, pi), 4) - expect_equal(sortrows(mx), matrix(c(1, 2, 2, 3, pi, 10, 0, 1), 4)) - expect_equal(sortrows(mx, 2), matrix(c(2, 3, 1, 2, 0, 1, pi, 10), 4)) - expect_equal(sortrows(mx, 1:2), mx[order(mx[, 1], mx[, 2]), ]) + mx <- matrix(c(3, 2, 2, 1, 1, 10, 0, pi), 4) + expect_equal(sortrows(mx), matrix(c(1, 2, 2, 3, pi, 10, 0, 1), 4)) + expect_equal(sortrows(mx, 2), matrix(c(2, 3, 1, 2, 0, 1, pi, 10), 4)) + expect_equal(sortrows(mx, 1:2), mx[order(mx[, 1], mx[, 2]), ]) }) test_that("cell works as expected", { - expect_equal(cell(0), array(dim = c(0, 0))) - expect_equal(cell(1), array(dim = c(1, 1))) - expect_equal(cell(2), array(dim = c(2, 2))) - expect_equal(cell(3, 4), array(dim = c(3, 4))) - expect_equal(cell(5, 7, 6), array(dim = c(5, 7, 6))) + expect_equivalent(cell(0), array(0, dim = c(0, 0))) + expect_equivalent(cell(1), array(0, dim = c(1, 1))) + expect_equivalent(cell(2), array(0, dim = c(2, 2))) + expect_equivalent(cell(3, 4), array(0, dim = c(3, 4))) + expect_equivalent(cell(5, 7, 6), array(0, dim = c(5, 7, 6))) }) test_that("blanks works as expected", { @@ -201,4 +201,44 @@ test_that("isspace works as expected", { X <- '\t a b\tcde f' expect_identical(isspace(chr), c(0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0)) expect_identical(isspace(X), c(1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0)) +}) + +test_that("nargin works correctly", { + addme <- function(a, b) { + if (nargin() == 2) { + c <- a + b + } else if (nargin() == 1) { + c <- a + a + } else { + c <- 0 + } + return(c) + } + expect_equal(addme(13, 42), 55) + expect_equal(addme(13), 26) + expect_equal(addme(), 0) +}) + +test_that("setdiff works as expected", { + A <- c(3, 6, 2, 1, 5, 1, 1) + B <- c(2, 4, 6) + C <- c(1, 3, 5) + # expect_equal(setdiff_MATLAB(A, B), C) # TODO: export setdiff_MATLAB + A <- data.frame( + Var1 = 1:5, + Var2 = LETTERS[1:5], + Var3 = c(FALSE, TRUE, FALSE, TRUE, FALSE) + ) + B <- data.frame( + Var1 = seq(1, 9, by = 2), + Var2 = LETTERS[seq(1, 9, by = 2)], + Var3 = rep(FALSE, 5) + ) + C <- data.frame( + Var1 = c(2, 4), + Var2 = c('B', 'D'), + Var3 = c(TRUE, TRUE) + ) + # expect_equal(setdiff_MATLAB(A, B), C) # TODO: implement for data frames + # TODO: add more examples from https://se.mathworks.com/help/matlab/ref/double.setdiff.html;jsessionid=0d8d42582d4d299b8224403899f1 }) \ No newline at end of file diff --git a/tests/testthat/test-greedyMix.R b/tests/testthat/test-greedyMix.R index 18e72e7..f1faf53 100644 --- a/tests/testthat/test-greedyMix.R +++ b/tests/testthat/test-greedyMix.R @@ -1,11 +1,17 @@ -# library(devtools)#TEMP -# library(testthat)#TEMP -# library(rBAPS)#TEMP - context("Opening files on greedyMix") # greedyMix( -# tietue = "data/ExamplesDataFormatting/Example baseline data in GENEPOP format for Trained clustering.txt", +# tietue = "inst/ext/ExamplesDataFormatting/Example baseline data in GENEPOP format for Trained clustering.txt", # format = "GenePop", # savePreProcessed = FALSE -# ) \ No newline at end of file +# ) + +context("Linkage") + +test_that("Linkages are properly calculated", { + Y <- c(0.5, 0.3, 0.6, 0.3, 0.3, 0.2, 0.3, 0.3, 0.3, 0.5) + expect_equal( + object = linkage(Y), + expected = matrix(c(2, 1, 7, 8, 4, 3, 5, 6, .2, .3, .3, .6), ncol=3) + ) +}) \ No newline at end of file