From 2c3d40c9a719502c4e105a4422b9650634cb6954 Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Tue, 28 Jul 2020 12:50:54 +0200 Subject: [PATCH 01/71] Fixed behavior of lueNimi and selvitaDigitFormat --- R/lueNimi.R | 8 ++++---- R/selvitaDigitFormat.R | 13 ++++++------- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/R/lueNimi.R b/R/lueNimi.R index a5bbb3f..7e0811d 100644 --- a/R/lueNimi.R +++ b/R/lueNimi.R @@ -1,17 +1,17 @@ #' @title Read the Name -#' @description Reads the line name +#' @description Returns the part of the line from the beginning that is before the comma. Useful for returning the name of a GenePop area #' @param line line #' @return nimi #' @export lueNimi <- function(line) { # Palauttaa line:n alusta sen osan, joka on ennen pilkkua. n <- 1 - merkki <- line[n] + merkki <- substring(line, n, n) nimi <- '' while (merkki != ',') { nimi <- c(nimi, merkki) n <- n + 1 - merkki <- line[n] + merkki <- substring(line, n, n) } - return(nimi) + return(paste(nimi, collapse="")) } \ No newline at end of file diff --git a/R/selvitaDigitFormat.R b/R/selvitaDigitFormat.R index 4492ea8..32af592 100644 --- a/R/selvitaDigitFormat.R +++ b/R/selvitaDigitFormat.R @@ -7,23 +7,22 @@ selvitaDigitFormat <- function(line) { # Genepop-formaatissa olevasta datasta. funktio selvitt�� # rivin muodon perusteella, ovatko datan alleelit annettu # 2 vai 3 numeron avulla. - n <- 1 - merkki <- line[n] + merkki <- substring(line, n, n) while (merkki != ',') { n <- n + 1 - merkki <- line[n] + merkki <- substring(line, n, n) } - while (!any(merkki == '0123456789')) { + while (!any(merkki %in% as.character(0:9))) { n <- n + 1 - merkki <- line[n] + merkki <- substring(line, n, n) } numeroja <- 0 - while (any(merkki == '0123456789')) { + while (any(merkki %in% as.character(0:9))) { numeroja <- numeroja + 1 n <- n + 1 - merkki <- line[n] + merkki <- substring(line, n, n) } df <- numeroja / 2 From 88fc203c3dcfe81303b09278e924a73bd10d154f Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Tue, 28 Jul 2020 15:35:04 +0200 Subject: [PATCH 02/71] Fixed addAlleles and lueGenePopData --- R/addAlleles.R | 21 ++++++++++++--------- R/lueGenePopData.R | 33 ++++++++++++++------------------- 2 files changed, 26 insertions(+), 28 deletions(-) diff --git a/R/addAlleles.R b/R/addAlleles.R index a1c87bd..33d0f38 100644 --- a/R/addAlleles.R +++ b/R/addAlleles.R @@ -12,22 +12,25 @@ addAlleles <- function(data, ind, line, divider) { # line. Jos data on 3 digit formaatissa on divider=1000. # Jos data on 2 digit formaatissa on divider=100. - nloci <- size(data, 2) - 1 + nloci <- size(data, 2) # added 1 from original code if (size(data, 1) < (2 * ind)) { - data <- c(data, zeros(100, nloci + 1)) + data <- rbind(data, zeros(100, nloci)) # subtracted 1 from original code } k <- 1 - merkki <- line[k] + merkki <- substring(line, k, k) while (merkki != ',') { k <- k + 1 - merkki <- line[k] + merkki <- substring(line, k, k) } - line <- line[k + 1:length(line)] + line <- substring(line, k + 1) # clear k; clear merkki; - alleeliTaulu <- as.numeric(strsplit(line, split = " ")[[1]]) - + if (grepl(" ", line)) { + alleeliTaulu <- as.numeric(strsplit(line, split = " ")[[1]]) + } else if (grepl("\t", line)) { + alleeliTaulu <- as.numeric(strsplit(line, split = "\t")[[1]]) + } if (length(alleeliTaulu) != nloci) { stop('Incorrect data format.') @@ -35,9 +38,9 @@ addAlleles <- function(data, ind, line, divider) { for (j in seq_len(nloci)) { ekaAlleeli <- floor(alleeliTaulu[j] / divider) - if (ekaAlleeli == 0) ekaAlleeli <- -999 + if (is.na(ekaAlleeli) | ekaAlleeli == 0) ekaAlleeli <- -999 tokaAlleeli <- alleeliTaulu[j] %% divider - if (tokaAlleeli == 0) tokaAlleeli <- -999 + if (is.na(tokaAlleeli) | tokaAlleeli == 0) tokaAlleeli <- -999 data[2 * ind - 1, j] <- ekaAlleeli data[2 * ind, j] <- tokaAlleeli diff --git a/R/lueGenePopData.R b/R/lueGenePopData.R index 2c79d03..7276537 100644 --- a/R/lueGenePopData.R +++ b/R/lueGenePopData.R @@ -4,17 +4,16 @@ #' @return list containing data and popnames #' @export lueGenePopData <- function (tiedostonNimi) { - - fid <- load(tiedostonNimi) - line1 <- readLines(fid)[1] # ensimmäinen rivi - line2 <- readLines(fid)[2] # toinen rivi + fid <- readLines(tiedostonNimi) + line <- fid[1] # ensimmäinen rivi + line <- fid[2] # toinen rivi count <- rivinSisaltamienMjonojenLkm(line) - line <- readLines(fid)[3] + line <- fid[3] lokusRiveja <- 1 while (testaaPop(line) == 0) { lokusRiveja <- lokusRiveja + 1 # locus row - line <- readLines(fid)[3 + lokusRiveja] + line <- fid[2 + lokusRiveja] } if (lokusRiveja > 1) { @@ -29,38 +28,34 @@ lueGenePopData <- function (tiedostonNimi) { ninds <- 0 poimiNimi <- 1 digitFormat <- -1 - while (line != -1) { - line <- readLines(fid)[lokusRiveja + 1] - lokusRiveja <- lokusRiveja + 1 - + while (lokusRiveja < length(fid) - 2) { + lokusRiveja <- lokusRiveja + 1 # Keeps the loop moving along + line <- fid[lokusRiveja + 2] if (poimiNimi == 1) { - # Edellinen rivi oli 'pop' + # Edellinen rivi oli 'pop' (previous line was pop) nimienLkm <- nimienLkm + 1 ninds <- ninds + 1 if (nimienLkm > size(popnames, 1)) { - popnames <- c(popnames, cell(10, 2)) + popnames <- rbind(popnames, cell(10, 2)) } nimi <- lueNimi(line) if (digitFormat == -1) { digitFormat <- selvitaDigitFormat(line) divider <- 10 ^ digitFormat } - popnames[nimienLkm, 1] <- nimi #N�in se on greedyMix:iss�kin?!? + popnames[nimienLkm, 1] <- nimi #N�in se on greedyMix:iss�kin?!? popnames[nimienLkm, 2] <- ninds poimiNimi <- 0 - data <- addAlleles(data, ninds, line, divider) - } else if (testaaPop(line)) { poimiNimi <- 1 - - } else if (line != -1) { + } else if (!is.na(line)) { ninds <- ninds + 1 data <- addAlleles(data, ninds, line, divider) } } - data <- data[1:(ninds * 2),] - popnames <- popnames[seq_len(nimienLkm),] + data <- data[1:(ninds * 2), ] + popnames <- popnames[seq_len(nimienLkm), ] return(list(data = data, popnames = popnames)) } \ No newline at end of file From 2f809d01df29b58c9eb781f4881d0f38faae5eed Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Tue, 28 Jul 2020 15:35:17 +0200 Subject: [PATCH 03/71] Improved validation --- R/lueNimi.R | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/R/lueNimi.R b/R/lueNimi.R index 7e0811d..377e791 100644 --- a/R/lueNimi.R +++ b/R/lueNimi.R @@ -4,6 +4,12 @@ #' @return nimi #' @export lueNimi <- function(line) { + # ========================================================================== + # Validation + # ========================================================================== + if (!grepl(",", line)) { + stop("There are no commas in this line") + } # Palauttaa line:n alusta sen osan, joka on ennen pilkkua. n <- 1 merkki <- substring(line, n, n) From c9ff8e76a962787a11bc5f3828eb2ab18f822134 Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Tue, 28 Jul 2020 15:35:29 +0200 Subject: [PATCH 04/71] Changed base Matlab function behavior --- R/cell.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/cell.R b/R/cell.R index beac532..746634e 100644 --- a/R/cell.R +++ b/R/cell.R @@ -6,10 +6,10 @@ #' @return An array of zeroes with the dimensions passed on call cell <- function(n, sz = c(n, n), ...) { if (length(sz) == 1 & missing(...)) { - return(array(dim = c(n, sz))) + return(array(0, dim = c(n, sz))) } else if (length(sz) == 2) { - return(array(dim = sz)) + return(array(0, dim = sz)) } else { - return(array(dim = c(n, sz, ...))) + return(array(0, dim = c(n, sz, ...))) } } \ No newline at end of file From 56d8a5c081625ece3a118d0d1d0f98054d10c280 Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Tue, 28 Jul 2020 15:51:28 +0200 Subject: [PATCH 05/71] Unraveled internal functions of greedyMix --- R/greedyMix.R | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/R/greedyMix.R b/R/greedyMix.R index edac561..a9573cf 100644 --- a/R/greedyMix.R +++ b/R/greedyMix.R @@ -149,13 +149,26 @@ greedyMix <- function( kunnossa <- testaaGenePopData(filename_pathname) if (kunnossa == 0) stop("testaaGenePopData returned 0") - # [data,popnames]=lueGenePopData([pathname filename]); # TODO: trans + data_popnames <- lueGenePopData(filename_pathname) + data <- data_popnames$data + popnames <- data_popnames$popnames # h0 = findobj('Tag','filename1_text'); # set(h0,'String',filename); clear h0; -# [data, rowsFromInd, alleleCodes, noalle, adjprior, priorTerm] = handleData(data); # TODO:trans -# [Z,dist] = newGetDistances(data,rowsFromInd); # TODO: trans + browser()#TEMP + list_dranap <- handleData(data) # FIXME: debug + data <- list_dranap$newData + rowsFromInd <- list_dranap$rowsFromInd + alleleCodes <- list_dranap$alleleCodes + noalle <- list_dranap$noalle + adjprior <- list_dranap$adjprior + priorTerm <- list_dranap$prioterm + + list_Zd <- newGetDistances(data,rowsFromInd) # FIXME: debug + Z <- lizt_Zd$Z + dist <- lizt_Zd$dist + if (is.null(savePreProcessed)) { save_preproc <- questdlg( quest = 'Do you wish to save pre-processed data?', From f036a33bca2b358608d2e26b4fc2723bbf6eb91d Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Fri, 31 Jul 2020 09:20:44 +0200 Subject: [PATCH 06/71] Initial idea for fgetl --- R/fgetl.R | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 R/fgetl.R diff --git a/R/fgetl.R b/R/fgetl.R new file mode 100644 index 0000000..5e4eec6 --- /dev/null +++ b/R/fgetl.R @@ -0,0 +1,33 @@ +#' @title Read line from file, removing newline characters +#' @description Equivalent function to its homonymous Matlab equivalent. +#' @param file file to be read +#' @return If the file is nonempty, then fgetl returns tline as a character vector. If the file is empty and contains only the end-of-file marker, then fgetl returns tline as a numeric value -1. +#' @author Waldir Leoncio +#' @export +fgetl <- function(file) { + # ========================================================================== + # Validation + # ========================================================================== + if (file == "") return(-1) + # ========================================================================== + # Determine next line to be read + # ========================================================================== + if (is.null(attr(file, "last_read_line"))) { + attr(file, "last_read_line") <- 1 + } else { + attr(file, "last_read_line") <- attr(file, "last_read_line") + 1 + } + # ========================================================================== + # Returning next line + # ========================================================================== + out <- file[attr(file, "last_read_line")] + return(out) +} + +#' @title Open file +#' @description Open a text file +#' @param filename Path and name of file to be open +#' @return The same as `readLines(filename)` +#' @author Waldir Leoncio +#' @export +fopen <- function(filename) readLines(filename) \ No newline at end of file From 372500d75057103558f70edfc0f8e45335b3b7d9 Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Fri, 31 Jul 2020 09:26:57 +0200 Subject: [PATCH 07/71] Reimplemented fgetl --- R/fgetl.R | 18 ++++++------------ man/fgetl.Rd | 23 +++++++++++++++++++++++ man/fopen.Rd | 23 +++++++++++++++++++++++ 3 files changed, 52 insertions(+), 12 deletions(-) create mode 100644 man/fgetl.Rd create mode 100644 man/fopen.Rd diff --git a/R/fgetl.R b/R/fgetl.R index 5e4eec6..176a330 100644 --- a/R/fgetl.R +++ b/R/fgetl.R @@ -1,26 +1,19 @@ #' @title Read line from file, removing newline characters #' @description Equivalent function to its homonymous Matlab equivalent. -#' @param file file to be read +#' @param file character vector to be read, usually an output of `fopen()` #' @return If the file is nonempty, then fgetl returns tline as a character vector. If the file is empty and contains only the end-of-file marker, then fgetl returns tline as a numeric value -1. #' @author Waldir Leoncio +#' @seealso fopen #' @export fgetl <- function(file) { # ========================================================================== # Validation # ========================================================================== - if (file == "") return(-1) + if (length(file) <= 1) return(-1) # ========================================================================== - # Determine next line to be read + # Returning file minus the first line # ========================================================================== - if (is.null(attr(file, "last_read_line"))) { - attr(file, "last_read_line") <- 1 - } else { - attr(file, "last_read_line") <- attr(file, "last_read_line") + 1 - } - # ========================================================================== - # Returning next line - # ========================================================================== - out <- file[attr(file, "last_read_line")] + out <- file[-1] return(out) } @@ -29,5 +22,6 @@ fgetl <- function(file) { #' @param filename Path and name of file to be open #' @return The same as `readLines(filename)` #' @author Waldir Leoncio +#' @seealso fgetl #' @export fopen <- function(filename) readLines(filename) \ No newline at end of file diff --git a/man/fgetl.Rd b/man/fgetl.Rd new file mode 100644 index 0000000..cd49776 --- /dev/null +++ b/man/fgetl.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/fgetl.R +\name{fgetl} +\alias{fgetl} +\title{Read line from file, removing newline characters} +\usage{ +fgetl(file) +} +\arguments{ +\item{file}{character vector to be read, usually an output of `fopen()`} +} +\value{ +If the file is nonempty, then fgetl returns tline as a character vector. If the file is empty and contains only the end-of-file marker, then fgetl returns tline as a numeric value -1. +} +\description{ +Equivalent function to its homonymous Matlab equivalent. +} +\seealso{ +fopen +} +\author{ +Waldir Leoncio +} diff --git a/man/fopen.Rd b/man/fopen.Rd new file mode 100644 index 0000000..3e2d3a7 --- /dev/null +++ b/man/fopen.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/fgetl.R +\name{fopen} +\alias{fopen} +\title{Open file} +\usage{ +fopen(filename) +} +\arguments{ +\item{filename}{Path and name of file to be open} +} +\value{ +The same as `readLines(filename)` +} +\description{ +Open a text file +} +\seealso{ +fgetl +} +\author{ +Waldir Leoncio +} From 8f4a3a72eeef301e8456cd2487e20c9b45c4c105 Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Fri, 31 Jul 2020 09:27:18 +0200 Subject: [PATCH 08/71] Renamed file --- R/{fgetl.R => fgetl-fopen.R} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename R/{fgetl.R => fgetl-fopen.R} (100%) diff --git a/R/fgetl.R b/R/fgetl-fopen.R similarity index 100% rename from R/fgetl.R rename to R/fgetl-fopen.R From 154a491b431bb421c2bbc347a3210aa17822b0f6 Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Fri, 31 Jul 2020 09:54:55 +0200 Subject: [PATCH 09/71] Fixed handleData --- R/handleData.R | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/R/handleData.R b/R/handleData.R index 40d39e7..5bfd072 100644 --- a/R/handleData.R +++ b/R/handleData.R @@ -20,12 +20,11 @@ handleData <- function(raw_data) { # koodi pienimm?ksi koodiksi, joka isompi kuin mik??n k?yt?ss?oleva koodi. # T?m?n j?lkeen funktio muuttaa alleelikoodit siten, ett?yhden lokuksen j # koodit saavat arvoja v?lill?1,...,noalle(j). - data <- raw_data nloci <- size(raw_data, 2) - 1 dataApu <- data[, 1:nloci] - nollat <- find(dataApu==0) + nollat <- find(dataApu == 0) if (!isempty(nollat)) { isoinAlleeli <- max(max(dataApu)) dataApu[nollat] <- isoinAlleeli + 1 @@ -39,9 +38,12 @@ handleData <- function(raw_data) { alleelitLokuksessa <- cell(nloci, 1) for (i in 1:nloci) { alleelitLokuksessaI <- unique(data[, i]) - alleelitLokuksessa[i, 1] <- alleelitLokuksessaI[ - find(alleelitLokuksessaI >= 0) - ] + alleelitLokuksessaI_pos <- find(alleelitLokuksessaI >= 0) + alleelitLokuksessa[i, 1] <- ifelse( + test = length(alleelitLokuksessaI_pos) > 0, + yes = alleelitLokuksessaI[alleelitLokuksessaI_pos], + no = 0 + ) noalle[i] <- length(alleelitLokuksessa[i, 1]) } alleleCodes <- zeros(max(noalle), nloci) @@ -65,10 +67,10 @@ handleData <- function(raw_data) { emptyRow <- repmat(a, c(1, ncols)) lessThanMax <- find(rowsFromInd < maxRowsFromInd) missingRows <- maxRowsFromInd * nind - nrows - data <- as.matrix(c(data, zeros(missingRows, ncols))) + data <- rbind(data, zeros(missingRows, ncols)) pointer <- 1 for (ind in t(lessThanMax)) { #K?y l?pi ne yksil?t, joilta puuttuu rivej? - miss = maxRowsFromInd-rowsFromInd(ind); # T?lt?yksil?lt?puuttuvien lkm. + miss <- maxRowsFromInd - rowsFromInd(ind) # T?lt?yksil?lt?puuttuvien lkm. } data <- sortrows(data, ncols) # Sorttaa yksil?iden mukaisesti newData <- data @@ -84,12 +86,12 @@ handleData <- function(raw_data) { priorTerm <- priorTerm + noalle[j] * lgamma(1 / noalle[j]) } out <- list( - newData = newData, + newData = newData, rowsFromInd = rowsFromInd, alleleCodes = alleleCodes, - noalle = noalle, - adjprior = adjprior, - priorTerm = priorTerm + noalle = noalle, + adjprior = adjprior, + priorTerm = priorTerm ) return(out) } \ No newline at end of file From debe7463ccee74345579c5634a241ba46f576be9 Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Fri, 31 Jul 2020 13:59:04 +0200 Subject: [PATCH 10/71] Added min_MATLAB function --- R/min.R | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 R/min.R diff --git a/R/min.R b/R/min.R new file mode 100644 index 0000000..2d8d617 --- /dev/null +++ b/R/min.R @@ -0,0 +1,16 @@ +#' @title Minimum (MATLAB version) +#' @description Finds the minimum value for each column of a matrix, potentially returning the indices instead +#' @param X matrix +#' @param indices return indices? +#' @return Either a list or a vector +#' @author Waldir Leoncio +#' @export +min_MATLAB <- function(X, indices = TRUE) { + mins <- apply(X, 2, min) + idx <- sapply(seq_len(ncol(X)), function(x) match(mins[x], X[, x])) + if (indices) { + return(list(mins = mins, idx = idx)) + } else { + return(mins) + } +} \ No newline at end of file From 301842723772985e29c3ebd21fd17be030ca098c Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Fri, 31 Jul 2020 13:59:11 +0200 Subject: [PATCH 11/71] Fixed linkage() --- R/linkage.R | 50 +++++++++++++++++++++++++++++++++++--------------- 1 file changed, 35 insertions(+), 15 deletions(-) diff --git a/R/linkage.R b/R/linkage.R index 80d6b30..ccaf9b5 100644 --- a/R/linkage.R +++ b/R/linkage.R @@ -4,10 +4,14 @@ #' linkage algorithm. The input Y is a distance matrix such as is generated by #' PDIST. Y may also be a more general dissimilarity matrix conforming to the #' output format of PDIST. -#' @param Y data +#' +#' Z = linkage(X) returns a matrix Z that encodes a tree containing hierarchical clusters of the rows of the input data matrix X. +#' @param Y matrix #' @param method either 'si', 'av', 'co' 'ce' or 'wa' +#' @note This is also a base Matlab function. The reason why the source code is also present here is unclear. #' @export linkage <- function(Y, method = 'co') { + #TODO: compare R output with MATLAB output k <- size(Y)[1] n <- size(Y)[2] m <- (1 + sqrt(1 + 8 * n)) / 2 @@ -24,18 +28,33 @@ linkage <- function(Y, method = 'co') { N[1:m] <- 1 n <- m; # since m is changing, we need to save m in n. R <- 1:n - for (s in 1:(n-1)) { - X <- Y - v <- min(X)[1] - k <- min(X)[2] + for (s in 1:(n - 1)) { + X <- as.matrix(as.vector(Y), ncol=1) + + v <- min_MATLAB(X)$mins + k <- min_MATLAB(X)$idx + i <- floor(m + 1 / 2 - sqrt(m ^ 2 - m + 1 / 4 - 2 * (k - 1))) j <- k - (i - 1) * (m - i / 2) + i - Z[s, ] <- c(R[i], R[j], v) # update one more row to the output matrix A - # Temp variables - I1 <- 1:(i - 1) - I2 <- (i + 1):(j - 1) - I3 <- (j + 1):m + Z[s, ] <- c(R[i], R[j], v) # update one more row to the output matrix A + + # Temp variables + if (i > 1) { + I1 <- 1:(i - 1) + } else { + I1 <- NULL + } + if (i + 1 <= j - 1) { + I2 <- (i + 1):(j - 1) + } else { + I2 <- NULL + } + if (j + 1 <= m) { + I3 <- (j + 1):m + } else { + I3 <- NULL + } U <- c(I1, I2, I3) I <- c( I1 * (m - (I1 + 1) / 2) - m + i, @@ -47,11 +66,13 @@ linkage <- function(Y, method = 'co') { I2 * (m - (I2 + 1) / 2) - m + j, j * (m - (j + 1) / 2) - m + I3 ) - + # Workaround in R for negative values in I and J + # I <- I[I > 0 & I <= length(Y)] + # J <- J[J > 0 & J <= length(Y)] switch(method, - 'si' = Y[I] <- min(Y[I], Y[J]), # single linkage + 'si' = Y[I] <- apply(cbind(Y[I], Y[J]), 1, min), # single linkage 'av' = Y[I] <- Y[I] + Y[J], # average linkage - 'co' = Y[I] <- max(Y[I], Y[J]), #complete linkage + 'co' = Y[I] <- apply(cbind(Y[I], Y[J]), 1, max), #complete linkage 'ce' = { K <- N[R[i]] + N[R[j]] # centroid linkage Y[I] <- (N[R[i]] * Y[I] + N[R[j]] * Y[J] - @@ -61,8 +82,7 @@ linkage <- function(Y, method = 'co') { Y[J] - N[R[U]] * v) / (N[R[i]] + N[R[j]] + N[R[U]]) ) J <- c(J, i * (m - (i + 1) / 2) - m + j) - Y[J] <- vector() # no need for the cluster information about j - + Y <- Y[-J] # no need for the cluster information about j # update m, N, R m <- m - 1 N[n + s] <- N[R[i]] + N[R[j]] From a31fafd5de47b6f84a22a5204d86ce45399beb07 Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Fri, 31 Jul 2020 13:59:30 +0200 Subject: [PATCH 12/71] Added test for linkage() --- tests/testthat/test-greedyMix.R | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/tests/testthat/test-greedyMix.R b/tests/testthat/test-greedyMix.R index 18e72e7..9a1c05a 100644 --- a/tests/testthat/test-greedyMix.R +++ b/tests/testthat/test-greedyMix.R @@ -1,11 +1,21 @@ # library(devtools)#TEMP -# library(testthat)#TEMP +library(testthat)#TEMP # library(rBAPS)#TEMP context("Opening files on greedyMix") -# greedyMix( -# tietue = "data/ExamplesDataFormatting/Example baseline data in GENEPOP format for Trained clustering.txt", -# format = "GenePop", -# savePreProcessed = FALSE -# ) \ No newline at end of file +greedyMix( + tietue = "data/ExamplesDataFormatting/Example baseline data in GENEPOP format for Trained clustering.txt", + format = "GenePop", + savePreProcessed = FALSE +) + +context("Linkage") + +test_that("Linkages are properly calculated", { + Y <- c(0.5, 0.3, 0.6, 0.3, 0.3, 0.2, 0.3, 0.3, 0.3, 0.5) + expect_equal( + object = linkage(Y), + expected = matrix(c(2, 1, 7, 8, 4, 3, 5, 6, .2, .3, .3, .6), ncol=3) + ) +}) \ No newline at end of file From 05d8f16975af4a41ce749ab0f5c85cdbda766489 Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Fri, 31 Jul 2020 13:59:40 +0200 Subject: [PATCH 13/71] Fixed newGetDistances() --- R/newGetDistances.R | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/R/newGetDistances.R b/R/newGetDistances.R index 896b1a3..b386afa 100644 --- a/R/newGetDistances.R +++ b/R/newGetDistances.R @@ -5,14 +5,14 @@ newGetDistances <- function(data, rowsFromInd) { empties <- find(data < 0) data[empties] <- 0 - data <- as.integer(data) # max(noalle) oltava <256 + data <- apply(data, 2, as.numeric) # max(noalle) oltava <256 pariTaulu <- zeros(riviLkm, 2) aPointer <- 1 - for (a in (1:ninds) - 1) { - pariTaulu[aPointer:(aPointer + ninds - 1 - a), 1] <- - ones(ninds - a, 1) * a - pariTaulu[aPointer:aPointer + ninds - 1 - a, 2] <- t(a + 1:ninds) + for (a in 1:(ninds - 1)) { + pariTaulu_rows <- aPointer:(aPointer + ninds - 1 - a) + pariTaulu[pariTaulu_rows, 1] <- ones(ninds - a, 1) * a + pariTaulu[pariTaulu_rows, 2] <- t((a + 1):ninds) aPointer <- aPointer + ninds - a } @@ -31,9 +31,9 @@ newGetDistances <- function(data, rowsFromInd) { rm(pariTaulu, miinus) x <- zeros(size(eka)) - x <- as.integer(x) + x <- apply(x, 2, as.integer) y <- zeros(size(toka)) - y <- as.integer(y) + y <- apply(y, 2, as.integer) for (j in 1:nloci) { for (k in 1:rowsFromInd) { @@ -57,7 +57,6 @@ newGetDistances <- function(data, rowsFromInd) { muut <- find(vertailuja > 0) dist[muut] <- summa[muut] / vertailuja[muut] rm(summa, vertailuja) - - Z = linkage(t(dist)) + Z <- linkage(t(dist)) return(list(Z = Z, dist = dist)) } \ No newline at end of file From f89a6e9fa3653b4bee76b60ceea0d0f1969c7801 Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Fri, 31 Jul 2020 14:00:29 +0200 Subject: [PATCH 14/71] Fixed syntax --- R/greedyMix.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/greedyMix.R b/R/greedyMix.R index a9573cf..3e02d75 100644 --- a/R/greedyMix.R +++ b/R/greedyMix.R @@ -166,8 +166,8 @@ greedyMix <- function( priorTerm <- list_dranap$prioterm list_Zd <- newGetDistances(data,rowsFromInd) # FIXME: debug - Z <- lizt_Zd$Z - dist <- lizt_Zd$dist + Z <- list_Zd$Z + dist <- list_Zd$dist if (is.null(savePreProcessed)) { save_preproc <- questdlg( From d57be6a8f3b8e519cb05abd50f63e0aafd11239c Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Fri, 31 Jul 2020 14:05:40 +0200 Subject: [PATCH 15/71] Updated documentation --- NAMESPACE | 3 ++ R/greedyMix.R | 62 +++++++++++++++--------------- man/fgetl.Rd | 2 +- man/fopen.Rd | 2 +- man/linkage.Rd | 7 +++- man/lueNimi.Rd | 2 +- man/min_MATLAB.Rd | 22 +++++++++++ man/rivinSisaltamienMjonojenLkm.Rd | 2 +- man/testaaGenePopData.Rd | 6 +++ man/testaaPop.Rd | 2 +- 10 files changed, 73 insertions(+), 37 deletions(-) create mode 100644 man/min_MATLAB.Rd diff --git a/NAMESPACE b/NAMESPACE index b7d74df..2dd1c5e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -10,6 +10,8 @@ export(computeIndLogml) export(computePersonalAllFreqs) export(computeRows) export(etsiParas) +export(fgetl) +export(fopen) export(greedyMix) export(handleData) export(initPopNames) @@ -21,6 +23,7 @@ export(linkage) export(logml2String) export(lueGenePopData) export(lueNimi) +export(min_MATLAB) export(noIndex) export(ownNum2Str) export(poistaLiianPienet) diff --git a/R/greedyMix.R b/R/greedyMix.R index 3e02d75..ef91b17 100644 --- a/R/greedyMix.R +++ b/R/greedyMix.R @@ -156,8 +156,7 @@ greedyMix <- function( # h0 = findobj('Tag','filename1_text'); # set(h0,'String',filename); clear h0; - browser()#TEMP - list_dranap <- handleData(data) # FIXME: debug + list_dranap <- handleData(data) data <- list_dranap$newData rowsFromInd <- list_dranap$rowsFromInd alleleCodes <- list_dranap$alleleCodes @@ -253,7 +252,7 @@ greedyMix <- function( POP_LOGML <- vector() clearGlobalVars <- vector() # ========================================================================== - + c <- list() c$data <- data c$noalle <- noalle c$adjprior <- adjprior @@ -267,34 +266,34 @@ greedyMix <- function( c$rows <- c(ekat, ekat + rowsFromInd - 1) # partition compare - if (!is.null(partitionCompare)) { - nsamplingunits <- size(c$rows, 1) - partitions <- partitionCompare$partitions - npartitions <- size(partitions, 2) - partitionLogml <- zeros(1, npartitions) - for (i in seq_len(npartitions)) { - # number of unique partition lables - npops <- length(unique(partitions[, i])) + # if (!is.null(partitionCompare)) { + # nsamplingunits <- size(c$rows, 1) + # partitions <- partitionCompare$partitions + # npartitions <- size(partitions, 2) + # partitionLogml <- zeros(1, npartitions) + # for (i in seq_len(npartitions)) { + # # number of unique partition lables + # npops <- length(unique(partitions[, i])) - partitionInd <- zeros(ninds * rowsFromInd, 1) - partitionSample <- partitions[, i] - for (j in seq_len(nsamplingunits)) { - partitionInd[c$rows[j, 1]:c$rows[j, 2]] <- partitionSample[j] - } - # partitionLogml[i] = initialCounts( - # partitionInd, - # data[, seq_len(end - 1)], - # npops, - # c$rows, - # noalle, - # adjprior - # ) #TODO translate - } - # return the logml result - partitionCompare$logmls <- partitionLogml - # set(h1, 'userdata', partitionCompare) # ASK remove? - return() - } + # partitionInd <- zeros(ninds * rowsFromInd, 1) + # partitionSample <- partitions[, i] + # for (j in seq_len(nsamplingunits)) { + # partitionInd[c$rows[j, 1]:c$rows[j, 2]] <- partitionSample[j] + # } + # # partitionLogml[i] = initialCounts( + # # partitionInd, + # # data[, seq_len(end - 1)], + # # npops, + # # c$rows, + # # noalle, + # # adjprior + # # ) #TODO translate + # } + # # return the logml result + # partitionCompare$logmls <- partitionLogml + # # set(h1, 'userdata', partitionCompare) # ASK remove? + # return() + # } # ASK remove (graphical part)? # if (fixedK) { @@ -312,10 +311,11 @@ greedyMix <- function( # h0 = findobj('Tag','filename2_text') # outp = get(h0,'String'); + browser() # TEMP changesInLogml <- writeMixtureInfo( logml, rowsFromInd, data, adjprior, priorTerm, outp, inp, popnames, fixedK - ) # FIXMEL depends on get function above + ) # FIXME: broken # viewMixPartition(PARTITION, popnames) # ASK translate? On graph folder diff --git a/man/fgetl.Rd b/man/fgetl.Rd index cd49776..be0116b 100644 --- a/man/fgetl.Rd +++ b/man/fgetl.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/fgetl.R +% Please edit documentation in R/fgetl-fopen.R \name{fgetl} \alias{fgetl} \title{Read line from file, removing newline characters} diff --git a/man/fopen.Rd b/man/fopen.Rd index 3e2d3a7..e1806eb 100644 --- a/man/fopen.Rd +++ b/man/fopen.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/fgetl.R +% Please edit documentation in R/fgetl-fopen.R \name{fopen} \alias{fopen} \title{Open file} diff --git a/man/linkage.Rd b/man/linkage.Rd index 4f75835..5014a8b 100644 --- a/man/linkage.Rd +++ b/man/linkage.Rd @@ -7,7 +7,7 @@ linkage(Y, method = "co") } \arguments{ -\item{Y}{data} +\item{Y}{matrix} \item{method}{either 'si', 'av', 'co' 'ce' or 'wa'} } @@ -19,4 +19,9 @@ Z = LINKAGE(Y) creates a hierarchical cluster tree, using the single linkage algorithm. The input Y is a distance matrix such as is generated by PDIST. Y may also be a more general dissimilarity matrix conforming to the output format of PDIST. + +Z = linkage(X) returns a matrix Z that encodes a tree containing hierarchical clusters of the rows of the input data matrix X. +} +\note{ +This is also a base Matlab function. The reason why the source code is also present here is unclear. } diff --git a/man/lueNimi.Rd b/man/lueNimi.Rd index 74cd665..38b9940 100644 --- a/man/lueNimi.Rd +++ b/man/lueNimi.Rd @@ -13,5 +13,5 @@ lueNimi(line) nimi } \description{ -Reads the line name +Returns the part of the line from the beginning that is before the comma. Useful for returning the name of a GenePop area } diff --git a/man/min_MATLAB.Rd b/man/min_MATLAB.Rd new file mode 100644 index 0000000..9376fed --- /dev/null +++ b/man/min_MATLAB.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/min.R +\name{min_MATLAB} +\alias{min_MATLAB} +\title{Minimum (MATLAB version)} +\usage{ +min_MATLAB(X, indices = TRUE) +} +\arguments{ +\item{X}{matrix} + +\item{indices}{return indices?} +} +\value{ +Either a list or a vector +} +\description{ +Finds the minimum value for each column of a matrix, potentially returning the indices instead +} +\author{ +Waldir Leoncio +} diff --git a/man/rivinSisaltamienMjonojenLkm.Rd b/man/rivinSisaltamienMjonojenLkm.Rd index bb7a547..e9a162c 100644 --- a/man/rivinSisaltamienMjonojenLkm.Rd +++ b/man/rivinSisaltamienMjonojenLkm.Rd @@ -13,5 +13,5 @@ rivinSisaltamienMjonojenLkm(line) count } \description{ -Number of M queues +Returns the number of queues contained in the line. There must be a space between the queues. } diff --git a/man/testaaGenePopData.Rd b/man/testaaGenePopData.Rd index 1f8864d..d711514 100644 --- a/man/testaaGenePopData.Rd +++ b/man/testaaGenePopData.Rd @@ -9,6 +9,12 @@ testaaGenePopData(tiedostonNimi) \arguments{ \item{tiedostonNimi}{Filename} } +\value{ +kunnossa (binary "ok" condition value) == 0 if the data is not valid genePop data. Otherwise, kunnossa == 1. +} \description{ Tests GenePop data } +\details{ +GenePop data are textfiles that follow the GenePop format. This function checks if such file is properly formatted as GenePop. +} diff --git a/man/testaaPop.Rd b/man/testaaPop.Rd index 873dd6a..b83bc9f 100644 --- a/man/testaaPop.Rd +++ b/man/testaaPop.Rd @@ -10,7 +10,7 @@ testaaPop(rivi) \item{rivi}{Line} } \value{ -pal +pal = 1 if the line starts with one of the following } \description{ Test a line in the population From 86afa61b12d0a29b0f65b184f5d3573df0ae2c3a Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Mon, 19 Oct 2020 08:24:55 +0200 Subject: [PATCH 16/71] Moved dataset locaiton --- ...A sequence data in concatenated Excel format.xls | Bin ...ata in GENEPOP format for Trained clustering.txt | 0 ... for the spatial clustering of DNA sequences.txt | 0 ...ample coordinates for the spatial clustering.txt | 0 ...format for clustering of diploid individuals.txt | 0 ...format for clustering of haploid individuals.txt | 0 ...mat for clustering of tetraploid individuals.txt | 0 ...group-wise clustering of diploid individuals.txt | 0 ...group-wise clustering of haploid individuals.txt | 0 ...up-wise clustering of tetraploid individuals.txt | 0 ...rmat for clustering of haploid individuals.fasta | 0 ...equence file for clustering with linked loci.txt | 0 ...equence file for clustering with linked loci.txt | 0 ...equence file for clustering with linked loci.txt | 0 ... fasta formatted sequence file for gene recA.txt | 0 ...s for a concatenated sequence in BAPS format.txt | 0 ...analysis based on user-specified populations.txt | 0 .../Example of an MLST profile file for 6 genes.txt | 0 .../Example of hierBAPS input in FASTA format.fa | 0 .../Example of hierBAPS input in XLS format.xls | Bin ...dex file to be used with BAPS formatted data.txt | 0 ...mes file to be used with BAPS formatted data.txt | 0 ...ata in GENEPOP format for Trained clustering.txt | 0 ...ata in GENEPOP format for Trained clustering.txt | 0 ...st data in XLS format for Trained clustering.xls | Bin ...ng data in XLS format for Trained clustering.xls | Bin {data => inst/ext}/ExamplesDataFormatting/Notes.md | 0 27 files changed, 0 insertions(+), 0 deletions(-) rename {data => inst/ext}/ExamplesDataFormatting/Example MLST DNA sequence data in concatenated Excel format.xls (100%) rename {data => inst/ext}/ExamplesDataFormatting/Example baseline data in GENEPOP format for Trained clustering.txt (100%) rename {data => inst/ext}/ExamplesDataFormatting/Example coordinates for the spatial clustering of DNA sequences.txt (100%) rename {data => inst/ext}/ExamplesDataFormatting/Example coordinates for the spatial clustering.txt (100%) rename {data => inst/ext}/ExamplesDataFormatting/Example data in BAPS format for clustering of diploid individuals.txt (100%) rename {data => inst/ext}/ExamplesDataFormatting/Example data in BAPS format for clustering of haploid individuals.txt (100%) rename {data => inst/ext}/ExamplesDataFormatting/Example data in BAPS format for clustering of tetraploid individuals.txt (100%) rename {data => inst/ext}/ExamplesDataFormatting/Example data in BAPS format for group-wise clustering of diploid individuals.txt (100%) rename {data => inst/ext}/ExamplesDataFormatting/Example data in BAPS format for group-wise clustering of haploid individuals.txt (100%) rename {data => inst/ext}/ExamplesDataFormatting/Example data in BAPS format for group-wise clustering of tetraploid individuals.txt (100%) rename {data => inst/ext}/ExamplesDataFormatting/Example data in FASTA format for clustering of haploid individuals.fasta (100%) rename {data => inst/ext}/ExamplesDataFormatting/Example of a BAPS formatted diploid sequence file for clustering with linked loci.txt (100%) rename {data => inst/ext}/ExamplesDataFormatting/Example of a BAPS formatted haploid sequence file for clustering with linked loci.txt (100%) rename {data => inst/ext}/ExamplesDataFormatting/Example of a BAPS formatted tetraploid sequence file for clustering with linked loci.txt (100%) rename {data => inst/ext}/ExamplesDataFormatting/Example of a fasta formatted sequence file for gene recA.txt (100%) rename {data => inst/ext}/ExamplesDataFormatting/Example of a file specifying gene boundaries for a concatenated sequence in BAPS format.txt (100%) rename {data => inst/ext}/ExamplesDataFormatting/Example of a partition file for admixture analysis based on user-specified populations.txt (100%) rename {data => inst/ext}/ExamplesDataFormatting/Example of an MLST profile file for 6 genes.txt (100%) rename {data => inst/ext}/ExamplesDataFormatting/Example of hierBAPS input in FASTA format.fa (100%) rename {data => inst/ext}/ExamplesDataFormatting/Example of hierBAPS input in XLS format.xls (100%) rename {data => inst/ext}/ExamplesDataFormatting/Example of sample population index file to be used with BAPS formatted data.txt (100%) rename {data => inst/ext}/ExamplesDataFormatting/Example of sample population names file to be used with BAPS formatted data.txt (100%) rename {data => inst/ext}/ExamplesDataFormatting/Example pre-grouped sample data in GENEPOP format for Trained clustering.txt (100%) rename {data => inst/ext}/ExamplesDataFormatting/Example sample data in GENEPOP format for Trained clustering.txt (100%) rename {data => inst/ext}/ExamplesDataFormatting/Example test data in XLS format for Trained clustering.xls (100%) rename {data => inst/ext}/ExamplesDataFormatting/Example training data in XLS format for Trained clustering.xls (100%) rename {data => inst/ext}/ExamplesDataFormatting/Notes.md (100%) diff --git a/data/ExamplesDataFormatting/Example MLST DNA sequence data in concatenated Excel format.xls b/inst/ext/ExamplesDataFormatting/Example MLST DNA sequence data in concatenated Excel format.xls similarity index 100% rename from data/ExamplesDataFormatting/Example MLST DNA sequence data in concatenated Excel format.xls rename to inst/ext/ExamplesDataFormatting/Example MLST DNA sequence data in concatenated Excel format.xls diff --git a/data/ExamplesDataFormatting/Example baseline data in GENEPOP format for Trained clustering.txt b/inst/ext/ExamplesDataFormatting/Example baseline data in GENEPOP format for Trained clustering.txt similarity index 100% rename from data/ExamplesDataFormatting/Example baseline data in GENEPOP format for Trained clustering.txt rename to inst/ext/ExamplesDataFormatting/Example baseline data in GENEPOP format for Trained clustering.txt diff --git a/data/ExamplesDataFormatting/Example coordinates for the spatial clustering of DNA sequences.txt b/inst/ext/ExamplesDataFormatting/Example coordinates for the spatial clustering of DNA sequences.txt similarity index 100% rename from data/ExamplesDataFormatting/Example coordinates for the spatial clustering of DNA sequences.txt rename to inst/ext/ExamplesDataFormatting/Example coordinates for the spatial clustering of DNA sequences.txt diff --git a/data/ExamplesDataFormatting/Example coordinates for the spatial clustering.txt b/inst/ext/ExamplesDataFormatting/Example coordinates for the spatial clustering.txt similarity index 100% rename from data/ExamplesDataFormatting/Example coordinates for the spatial clustering.txt rename to inst/ext/ExamplesDataFormatting/Example coordinates for the spatial clustering.txt diff --git a/data/ExamplesDataFormatting/Example data in BAPS format for clustering of diploid individuals.txt b/inst/ext/ExamplesDataFormatting/Example data in BAPS format for clustering of diploid individuals.txt similarity index 100% rename from data/ExamplesDataFormatting/Example data in BAPS format for clustering of diploid individuals.txt rename to inst/ext/ExamplesDataFormatting/Example data in BAPS format for clustering of diploid individuals.txt diff --git a/data/ExamplesDataFormatting/Example data in BAPS format for clustering of haploid individuals.txt b/inst/ext/ExamplesDataFormatting/Example data in BAPS format for clustering of haploid individuals.txt similarity index 100% rename from data/ExamplesDataFormatting/Example data in BAPS format for clustering of haploid individuals.txt rename to inst/ext/ExamplesDataFormatting/Example data in BAPS format for clustering of haploid individuals.txt diff --git a/data/ExamplesDataFormatting/Example data in BAPS format for clustering of tetraploid individuals.txt b/inst/ext/ExamplesDataFormatting/Example data in BAPS format for clustering of tetraploid individuals.txt similarity index 100% rename from data/ExamplesDataFormatting/Example data in BAPS format for clustering of tetraploid individuals.txt rename to inst/ext/ExamplesDataFormatting/Example data in BAPS format for clustering of tetraploid individuals.txt diff --git a/data/ExamplesDataFormatting/Example data in BAPS format for group-wise clustering of diploid individuals.txt b/inst/ext/ExamplesDataFormatting/Example data in BAPS format for group-wise clustering of diploid individuals.txt similarity index 100% rename from data/ExamplesDataFormatting/Example data in BAPS format for group-wise clustering of diploid individuals.txt rename to inst/ext/ExamplesDataFormatting/Example data in BAPS format for group-wise clustering of diploid individuals.txt diff --git a/data/ExamplesDataFormatting/Example data in BAPS format for group-wise clustering of haploid individuals.txt b/inst/ext/ExamplesDataFormatting/Example data in BAPS format for group-wise clustering of haploid individuals.txt similarity index 100% rename from data/ExamplesDataFormatting/Example data in BAPS format for group-wise clustering of haploid individuals.txt rename to inst/ext/ExamplesDataFormatting/Example data in BAPS format for group-wise clustering of haploid individuals.txt diff --git a/data/ExamplesDataFormatting/Example data in BAPS format for group-wise clustering of tetraploid individuals.txt b/inst/ext/ExamplesDataFormatting/Example data in BAPS format for group-wise clustering of tetraploid individuals.txt similarity index 100% rename from data/ExamplesDataFormatting/Example data in BAPS format for group-wise clustering of tetraploid individuals.txt rename to inst/ext/ExamplesDataFormatting/Example data in BAPS format for group-wise clustering of tetraploid individuals.txt diff --git a/data/ExamplesDataFormatting/Example data in FASTA format for clustering of haploid individuals.fasta b/inst/ext/ExamplesDataFormatting/Example data in FASTA format for clustering of haploid individuals.fasta similarity index 100% rename from data/ExamplesDataFormatting/Example data in FASTA format for clustering of haploid individuals.fasta rename to inst/ext/ExamplesDataFormatting/Example data in FASTA format for clustering of haploid individuals.fasta diff --git a/data/ExamplesDataFormatting/Example of a BAPS formatted diploid sequence file for clustering with linked loci.txt b/inst/ext/ExamplesDataFormatting/Example of a BAPS formatted diploid sequence file for clustering with linked loci.txt similarity index 100% rename from data/ExamplesDataFormatting/Example of a BAPS formatted diploid sequence file for clustering with linked loci.txt rename to inst/ext/ExamplesDataFormatting/Example of a BAPS formatted diploid sequence file for clustering with linked loci.txt diff --git a/data/ExamplesDataFormatting/Example of a BAPS formatted haploid sequence file for clustering with linked loci.txt b/inst/ext/ExamplesDataFormatting/Example of a BAPS formatted haploid sequence file for clustering with linked loci.txt similarity index 100% rename from data/ExamplesDataFormatting/Example of a BAPS formatted haploid sequence file for clustering with linked loci.txt rename to inst/ext/ExamplesDataFormatting/Example of a BAPS formatted haploid sequence file for clustering with linked loci.txt diff --git a/data/ExamplesDataFormatting/Example of a BAPS formatted tetraploid sequence file for clustering with linked loci.txt b/inst/ext/ExamplesDataFormatting/Example of a BAPS formatted tetraploid sequence file for clustering with linked loci.txt similarity index 100% rename from data/ExamplesDataFormatting/Example of a BAPS formatted tetraploid sequence file for clustering with linked loci.txt rename to inst/ext/ExamplesDataFormatting/Example of a BAPS formatted tetraploid sequence file for clustering with linked loci.txt diff --git a/data/ExamplesDataFormatting/Example of a fasta formatted sequence file for gene recA.txt b/inst/ext/ExamplesDataFormatting/Example of a fasta formatted sequence file for gene recA.txt similarity index 100% rename from data/ExamplesDataFormatting/Example of a fasta formatted sequence file for gene recA.txt rename to inst/ext/ExamplesDataFormatting/Example of a fasta formatted sequence file for gene recA.txt diff --git a/data/ExamplesDataFormatting/Example of a file specifying gene boundaries for a concatenated sequence in BAPS format.txt b/inst/ext/ExamplesDataFormatting/Example of a file specifying gene boundaries for a concatenated sequence in BAPS format.txt similarity index 100% rename from data/ExamplesDataFormatting/Example of a file specifying gene boundaries for a concatenated sequence in BAPS format.txt rename to inst/ext/ExamplesDataFormatting/Example of a file specifying gene boundaries for a concatenated sequence in BAPS format.txt diff --git a/data/ExamplesDataFormatting/Example of a partition file for admixture analysis based on user-specified populations.txt b/inst/ext/ExamplesDataFormatting/Example of a partition file for admixture analysis based on user-specified populations.txt similarity index 100% rename from data/ExamplesDataFormatting/Example of a partition file for admixture analysis based on user-specified populations.txt rename to inst/ext/ExamplesDataFormatting/Example of a partition file for admixture analysis based on user-specified populations.txt diff --git a/data/ExamplesDataFormatting/Example of an MLST profile file for 6 genes.txt b/inst/ext/ExamplesDataFormatting/Example of an MLST profile file for 6 genes.txt similarity index 100% rename from data/ExamplesDataFormatting/Example of an MLST profile file for 6 genes.txt rename to inst/ext/ExamplesDataFormatting/Example of an MLST profile file for 6 genes.txt diff --git a/data/ExamplesDataFormatting/Example of hierBAPS input in FASTA format.fa b/inst/ext/ExamplesDataFormatting/Example of hierBAPS input in FASTA format.fa similarity index 100% rename from data/ExamplesDataFormatting/Example of hierBAPS input in FASTA format.fa rename to inst/ext/ExamplesDataFormatting/Example of hierBAPS input in FASTA format.fa diff --git a/data/ExamplesDataFormatting/Example of hierBAPS input in XLS format.xls b/inst/ext/ExamplesDataFormatting/Example of hierBAPS input in XLS format.xls similarity index 100% rename from data/ExamplesDataFormatting/Example of hierBAPS input in XLS format.xls rename to inst/ext/ExamplesDataFormatting/Example of hierBAPS input in XLS format.xls diff --git a/data/ExamplesDataFormatting/Example of sample population index file to be used with BAPS formatted data.txt b/inst/ext/ExamplesDataFormatting/Example of sample population index file to be used with BAPS formatted data.txt similarity index 100% rename from data/ExamplesDataFormatting/Example of sample population index file to be used with BAPS formatted data.txt rename to inst/ext/ExamplesDataFormatting/Example of sample population index file to be used with BAPS formatted data.txt diff --git a/data/ExamplesDataFormatting/Example of sample population names file to be used with BAPS formatted data.txt b/inst/ext/ExamplesDataFormatting/Example of sample population names file to be used with BAPS formatted data.txt similarity index 100% rename from data/ExamplesDataFormatting/Example of sample population names file to be used with BAPS formatted data.txt rename to inst/ext/ExamplesDataFormatting/Example of sample population names file to be used with BAPS formatted data.txt diff --git a/data/ExamplesDataFormatting/Example pre-grouped sample data in GENEPOP format for Trained clustering.txt b/inst/ext/ExamplesDataFormatting/Example pre-grouped sample data in GENEPOP format for Trained clustering.txt similarity index 100% rename from data/ExamplesDataFormatting/Example pre-grouped sample data in GENEPOP format for Trained clustering.txt rename to inst/ext/ExamplesDataFormatting/Example pre-grouped sample data in GENEPOP format for Trained clustering.txt diff --git a/data/ExamplesDataFormatting/Example sample data in GENEPOP format for Trained clustering.txt b/inst/ext/ExamplesDataFormatting/Example sample data in GENEPOP format for Trained clustering.txt similarity index 100% rename from data/ExamplesDataFormatting/Example sample data in GENEPOP format for Trained clustering.txt rename to inst/ext/ExamplesDataFormatting/Example sample data in GENEPOP format for Trained clustering.txt diff --git a/data/ExamplesDataFormatting/Example test data in XLS format for Trained clustering.xls b/inst/ext/ExamplesDataFormatting/Example test data in XLS format for Trained clustering.xls similarity index 100% rename from data/ExamplesDataFormatting/Example test data in XLS format for Trained clustering.xls rename to inst/ext/ExamplesDataFormatting/Example test data in XLS format for Trained clustering.xls diff --git a/data/ExamplesDataFormatting/Example training data in XLS format for Trained clustering.xls b/inst/ext/ExamplesDataFormatting/Example training data in XLS format for Trained clustering.xls similarity index 100% rename from data/ExamplesDataFormatting/Example training data in XLS format for Trained clustering.xls rename to inst/ext/ExamplesDataFormatting/Example training data in XLS format for Trained clustering.xls diff --git a/data/ExamplesDataFormatting/Notes.md b/inst/ext/ExamplesDataFormatting/Notes.md similarity index 100% rename from data/ExamplesDataFormatting/Notes.md rename to inst/ext/ExamplesDataFormatting/Notes.md From bf6c08d3eb862a931668be0a4fe162c412c6c41b Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Mon, 19 Oct 2020 08:29:50 +0200 Subject: [PATCH 17/71] Ignoring example data --- .Rbuildignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.Rbuildignore b/.Rbuildignore index 9238dee..548f11c 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -4,4 +4,4 @@ matlab CHANGELOG.md CITATION.cff .travis.yml -data/ExamplesDataFormatting \ No newline at end of file +inst/ext/ExamplesDataFormatting \ No newline at end of file From 03089d7e42bdb2d0066b1c6bd0818bcce1b1d044 Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Mon, 19 Oct 2020 08:44:10 +0200 Subject: [PATCH 18/71] Added basic nargin function --- R/nargin.R | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 R/nargin.R diff --git a/R/nargin.R b/R/nargin.R new file mode 100644 index 0000000..e843e87 --- /dev/null +++ b/R/nargin.R @@ -0,0 +1,11 @@ +#' @title Number of function input arguments +#' @description Returns the number of arguments passed to the parent function +#' @return An integer +#' @author Waldir Leoncio +#' @note This function only makes sense inside another function +nargin <- function() { + # FIXME: returning 0 because it is using its own envir instead of parent's + print(parent.env(environment())) + length(as.list(match.call(envir = parent.env(environment())))) - 1 + # length(ls(envir=parent.env(environment()))) - 1 +} \ No newline at end of file From 3250349c6069c35409df95e19d8c5979458dc9fb Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Mon, 19 Oct 2020 09:49:30 +0200 Subject: [PATCH 19/71] Added nargin function --- R/nargin.R | 9 ++++---- man/nargin.Rd | 23 ++++++++++++++++++++ tests/testthat/test-convertedBaseFunctions.R | 16 ++++++++++++++ 3 files changed, 43 insertions(+), 5 deletions(-) create mode 100644 man/nargin.Rd diff --git a/R/nargin.R b/R/nargin.R index e843e87..6ff20cc 100644 --- a/R/nargin.R +++ b/R/nargin.R @@ -3,9 +3,8 @@ #' @return An integer #' @author Waldir Leoncio #' @note This function only makes sense inside another function +#' @references https://stackoverflow.com/q/64422780/1169233 nargin <- function() { - # FIXME: returning 0 because it is using its own envir instead of parent's - print(parent.env(environment())) - length(as.list(match.call(envir = parent.env(environment())))) - 1 - # length(ls(envir=parent.env(environment()))) - 1 -} \ No newline at end of file + if(sys.nframe() < 2) stop("must be called from inside a function") + length(as.list(sys.call(-1))) - 1 +} diff --git a/man/nargin.Rd b/man/nargin.Rd new file mode 100644 index 0000000..a7922a6 --- /dev/null +++ b/man/nargin.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/nargin.R +\name{nargin} +\alias{nargin} +\title{Number of function input arguments} +\usage{ +nargin() +} +\value{ +An integer +} +\description{ +Returns the number of arguments passed to the parent function +} +\note{ +This function only makes sense inside another function +} +\references{ +https://stackoverflow.com/q/64422780/1169233 +} +\author{ +Waldir Leoncio +} diff --git a/tests/testthat/test-convertedBaseFunctions.R b/tests/testthat/test-convertedBaseFunctions.R index bfc3fe8..11988f8 100644 --- a/tests/testthat/test-convertedBaseFunctions.R +++ b/tests/testthat/test-convertedBaseFunctions.R @@ -201,4 +201,20 @@ test_that("isspace works as expected", { X <- '\t a b\tcde f' expect_identical(isspace(chr), c(0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0)) expect_identical(isspace(X), c(1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0)) +}) + +test_that("nargin works correctly", { + addme <- function(a, b) { + if (nargin() == 2) { + c <- a + b + } else if (nargin() == 1) { + c <- a + a + } else { + c <- 0 + } + return(c) + } + expect_equal(addme(13, 42), 55) + expect_equal(addme(13), 26) + expect_equal(addme(), 0) }) \ No newline at end of file From dde6dd2e3ac35a50aa37ae0836903758eb16202b Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Mon, 19 Oct 2020 13:11:52 +0200 Subject: [PATCH 20/71] Added MATLAB versions of min and max --- R/min_MATLAB.R | 33 +++++++++++++++++++++++++++++++++ R/min_max_MATLAB.R | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+) create mode 100644 R/min_MATLAB.R create mode 100644 R/min_max_MATLAB.R diff --git a/R/min_MATLAB.R b/R/min_MATLAB.R new file mode 100644 index 0000000..af91efd --- /dev/null +++ b/R/min_MATLAB.R @@ -0,0 +1,33 @@ +#' @title Minimum (MATLAB version) +#' @description Finds the minimum value for each column of a matrix, potentially returning the indices instead +#' @param X matrix +#' @param indices return indices? +#' @return Either a list or a vector +#' @author Waldir Leoncio +#' @export +min_MATLAB <- function(X, indices = TRUE) { + mins <- apply(X, 2, min) + idx <- sapply(seq_len(ncol(X)), function(x) match(mins[x], X[, x])) + if (indices) { + return(list(mins = mins, idx = idx)) + } else { + return(mins) + } +} + +#' @title Minimum (MATLAB version) +#' @description Finds the minimum value for each column of a matrix, potentially returning the indices instead +#' @param X matrix +#' @param indices return indices? +#' @return Either a list or a vector +#' @author Waldir Leoncio +#' @export +min_MATLAB <- function(X, indices = TRUE) { + mins <- apply(X, 2, min) + idx <- sapply(seq_len(ncol(X)), function(x) match(mins[x], X[, x])) + if (indices) { + return(list(mins = mins, idx = idx)) + } else { + return(mins) + } +} \ No newline at end of file diff --git a/R/min_max_MATLAB.R b/R/min_max_MATLAB.R new file mode 100644 index 0000000..8f5069f --- /dev/null +++ b/R/min_max_MATLAB.R @@ -0,0 +1,33 @@ +#' @title Minimum (MATLAB version) +#' @description Finds the minimum value for each column of a matrix, potentially returning the indices instead +#' @param X matrix +#' @param indices return indices? +#' @return Either a list or a vector +#' @author Waldir Leoncio +#' @export +min_MATLAB <- function(X, indices = TRUE) { + mins <- apply(X, 2, min) + idx <- sapply(seq_len(ncol(X)), function(x) match(mins[x], X[, x])) + if (indices) { + return(list(mins = mins, idx = idx)) + } else { + return(mins) + } +} + +#' @title Maximum (MATLAB version) +#' @description Finds the minimum value for each column of a matrix, potentially returning the indices instead +#' @param X matrix +#' @param indices return indices? +#' @return Either a list or a vector +#' @author Waldir Leoncio +#' @export +max_MATLAB <- function(X, indices = TRUE) { + maxs <- apply(X, 2, max) + idx <- sapply(seq_len(ncol(X)), function(x) match(maxs[x], X[, x])) + if (indices) { + return(list(maxs = maxs, idx = idx)) + } else { + return(maxs) + } +} \ No newline at end of file From 542297ee82028ec0f99b92864e8d4cd54d85cba4 Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Mon, 19 Oct 2020 13:33:41 +0200 Subject: [PATCH 21/71] Translated indMix() --- R/indMix.R | 1316 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1316 insertions(+) create mode 100644 R/indMix.R diff --git a/R/indMix.R b/R/indMix.R new file mode 100644 index 0000000..1d62ac9 --- /dev/null +++ b/R/indMix.R @@ -0,0 +1,1316 @@ +indMix <- function(c, npops, dispText) { + # Greedy search algorithm with unknown number of classes for regular + # clustering. + # Input npops is not used if called by greedyMix or greedyPopMix. + + logml <- 1 + clearGlobalVars() + + noalle <- c$noalle + rows <- c$rows + data <- c$data + + adjprior <- c$adjprior + priorTerm <- c$priorTerm + rowsFromInd <- c$rowsFromInd + + if (isfield(c, 'dist')) { + dist <- c$dist + Z <- c$Z + } + + rm(c) + nargin <- length(as.list(match.call())) - 1 + + if (nargin < 2) { + dispText <- 1 + npopstext <- matrix() + ready <- FALSE + teksti <- 'Input upper bound to the number of populations (possibly multiple values)' + while (!ready) { + npopstextExtra <- inputdlg( + teksti, + 1, + '20' + ) + if (isempty(npopstextExtra)) { # Painettu Cancel:ia + return() + } + npopstextExtra <- npopstextExtra[1] + if (length(npopstextExtra)>=255) { + npopstextExtra <- npopstextExtra[1:255] + npopstext <- c(npopstext, ' ', npopstextExtra) + teksti <- 'The input field length limit (255 characters) was reached. Input more values: ' + } else { + npopstext <- c(npopstext, ' ', npopstextExtra) + ready <- TRUE + } + } + rm(ready, teksti) + if (isempty(npopstext) | length(npopstext) == 1) { + return() + } else { + npopsTaulu <- as.numeric(npopstext) + ykkoset <- find(npopsTaulu == 1) + npopsTaulu(ykkoset) <- list() # Mik�li ykk�si� annettu yl�rajaksi, ne poistetaan. + if (isempty(npopsTaulu)) { + logml <- 1 + partitionSummary <- 1 + npops <- 1 + return() + } + rm(ykkoset) + } + } else { + npopsTaulu <- npops + } + + nruns <- length(npopsTaulu) + + initData <- data + data <- data[,1:(end - 1)] + + logmlBest <- -1e50 + partitionSummary <- -1e50 * ones(30, 2) # Tiedot 30 parhaasta partitiosta (npops ja logml) + partitionSummary[,1] <- zeros(30, 1) + worstLogml <- -1e50 + worstIndex <- 1 + for (run in 1:nruns) { + npops <- npopsTaulu(run) + if (dispText) { + dispLine() + print( + paste0( + 'Run ', num2str(run), '/', num2str(nruns), + ', maximum number of populations ', num2str(npops), '.' + ) + ) + } + ninds <- size(rows, 1) + + initialPartition <- admixture_initialization(initData, npops, Z) # TODO: translate + sumcounts_counts_logml = initialCounts( + initialPartition, data, npops, rows, noalle, adjprior + ) # TODO: translate + sumcounts <- sumcounts_counts_logml$sumcounts + counts <- sumcounts_counts_logml$counts + logml <- sumcounts_counts_logml$logml + + PARTITION <- zeros(ninds, 1) + for (i in 1:ninds) { + apu <- rows[i] + PARTITION[i] <- initialPartition(apu[1]) + } + + COUNTS <- counts + SUMCOUNTS <- sumcounts + POP_LOGML <- computePopulationLogml(1:npops, adjprior, priorTerm) # TODO: translate + LOGDIFF <- repmat(-Inf, ninds, npops) + rm(initialPartition, counts, sumcounts) + + # PARHAAN MIXTURE-PARTITION ETSIMINEN + nRoundTypes <- 7 + kokeiltu <- zeros(nRoundTypes, 1) + roundTypes <- c(1, 1) # Ykk�svaiheen sykli kahteen kertaan. + ready <- 0 + vaihe <- 1 + + if (dispText) { + print(' ') + print( + paste0( + 'Mixture analysis started with initial', + num2str(npops), + 'populations.' + ) + ) + } + + while (ready != 1) { + muutoksia <- 0 + + if (dispText) { + print(paste('Performing steps:', num2str(roundTypes))) + } + + for (n in 1:length(roundTypes)) { + + round <- roundTypes[n] + kivaluku <- 0 + + if (kokeiltu(round) == 1) { #Askelta kokeiltu viime muutoksen j�lkeen + + } else if (round == 0 | round == 1) { #Yksil�n siirt�minen toiseen populaatioon. + inds <- 1:ninds + aputaulu <- c(t(inds), rand(ninds, 1)) + aputaulu <- sortrows(aputaulu, 2) + inds <- t(aputaulu[, 1]) + muutosNyt <- 0 + + for (ind in inds) { + i1 <- PARTITION[ind] + muutokset_diffInCounts = laskeMuutokset( + ind, rows, data, adjprior, priorTerm + ) + muutokset <- muutokset_diffInCounts$muutokset + diffInCounts <- muutokset_diffInCounts$diffInCounts + + if (round == 1) { + maxMuutos <- max_MATLAB(muutokset)[[1]] + i2 <- max_MATLAB(muutokset)[[2]] + } + + if (i1 != i2 & maxMuutos > 1e-5) { + # Tapahtui muutos + muutoksia <- 1 + if (muutosNyt == 0) { + muutosNyt <- 1 + if (dispText) { + print('Action 1') + } + } + kokeiltu <- zeros(nRoundTypes, 1) + kivaluku <- kivaluku + 1 + updateGlobalVariables( + ind, i2, diffInCounts, adjprior, priorTerm + ) + logml <- logml+maxMuutos + if (logml > worstLogml) { + partitionSummary_added = addToSummary( + logml, partitionSummary, worstIndex + ) + partitionSummary_added <- partitionSummary_added$partitionSummary + added <- partitionSummary_added$added + if (added == 1) { + worstLogml <- min_MATLAB(partitionSummary[, 2])[[1]] + worstIndex <- min_MATLAB(partitionSummary[, 2])[[2]] + } + } + } + } + + if (muutosNyt == 0) { + kokeiltu[round] <- 1 + } + + } else if (round == 2) { # Populaation yhdist�minen toiseen. + maxMuutos <- 0 + for (pop in 1:npops) { + muutokset_diffInCounts <- laskeMuutokset2( + pop, rows, data, adjprior, priorTerm + ) + muutokset <- muutokset_diffInCounts$muutokset + diffInCounts <- muutokset_diffInCounts$diffInCounts + isoin <- max_MATLAB(muutokset)[[1]] + indeksi <- max_MATLAB(muutokset)[[2]] + if (isoin > maxMuutos) { + maxMuutos <- isoin + i1 <- pop + i2 <- indeksi + diffInCountsBest <- diffInCounts + } + } + + if (maxMuutos > 1e-5) { + muutoksia <- 1 + kokeiltu <- zeros(nRoundTypes, 1) + updateGlobalVariables2( + i1, i2, diffInCountsBest, adjprior, priorTerm + ) + logml <- logml + maxMuutos + if (dispText) { + print('Action 2') + } + if (logml > worstLogml) { + partitionSummary_added <- addToSummary( + logml, partitionSummary, worstIndex + ) + partitionSummary <- partitionSummary_added$partitionSummary + added <- partitionSummary_added$added + if (added==1) { + worstLogml <- min_MATLAB(partitionSummary[, 2])[[1]] + worstIndex <- min_MATLAB(partitionSummary[, 2])[[2]] + } + } + } else { + kokeiltu[round] <- 1 + } + + + } else if (round == 3 || round == 4) { #Populaation jakaminen osiin. + maxMuutos <- 0 + ninds <- size(rows, 1) + for (pop in 1:npops) { + inds2 <- find(PARTITION == pop) + ninds2 <- length(inds2) + if (ninds2 > 2) { + dist2 <- laskeOsaDist(inds2, dist, ninds) + Z2 <- linkage(t(dist2)) + if (round == 3) { + npops2 <- max(min(20, floor(ninds2 / 5)), 2) + } else if (round == 4) { + npops2 <- 2 # Moneenko osaan jaetaan + } + T2 <- cluster_own(Z2, npops2) + muutokset <- laskeMuutokset3( + T2, inds2, rows, data, adjprior, priorTerm, pop + ) + isoin <- max_MATLAB(muutokset)[[1]] + indeksi <- max_MATLAB(muutokset)[[2]] + if (isoin > maxMuutos) { + maxMuutos <- isoin + muuttuvaPop2 <- indeksi %% npops2 + if (muuttuvaPop2==0) muuttuvaPop2 <- npops2 + muuttuvat <- inds2[find(T2 == muuttuvaPop2)] + i2 <- ceiling(indeksi / npops2) + } + } + } + if (maxMuutos > 1e-5) { + muutoksia <- 1 + kokeiltu <- zeros(nRoundTypes, 1) + rivit <- list() + for (i in 1:length(muuttuvat)) { + ind <- muuttuvat[i] + lisa <- rows[ind, 1]:rows[ind, 2] + rivit <- rbind(rivit, t(lisa)) + } + diffInCounts <- computeDiffInCounts( + t(rivit), size(COUNTS, 1), size(COUNTS, 2), data + ) + i1 <- PARTITION(muuttuvat[1]) + updateGlobalVariables3( + muuttuvat, diffInCounts, adjprior, priorTerm, i2 + ) + logml <- logml + maxMuutos + if (dispText) { + if (round == 3) { + print('Action 3') + } else { + print('Action 4') + } + } + if (logml > worstLogml) { + partitionSummary_added <- addToSummary( + logml, partitionSummary, worstIndex + ) + partitionSummary <- partitionSummary_added$partitionSummary + added <- partitionSummary_added$added + if (added==1) { + worstLogml <- min_MATLAB(partitionSummary[, 2])[[1]] + worstIndex <- min_MATLAB(partitionSummary[, 2])[[2]] + } + } + + } else { + kokeiltu[round] <- 1 + } + } else if (round == 5 || round == 6) { + j <- 0 + muutettu <- 0 + poplogml <- POP_LOGML + partition <- PARTITION + counts <- COUNTS + sumcounts <- SUMCOUNTS + logdiff <- LOGDIFF + + pops <- sample(npops) + while (j < npops & muutettu == 0) { + j <- j + 1 + pop <- pops[j] + totalMuutos <- 0 + inds <- find(PARTITION == pop) + if (round == 5) { + aputaulu <- c(inds, rand(length(inds), 1)) + aputaulu <- sortrows(aputaulu, 2) + inds <- t(aputaulu[, 1]) + } else if (round == 6) { + inds <- returnInOrder( + inds, pop, rows, data, adjprior, priorTerm + ) + } + + i <- 0 + + while (length(inds) > 0 & i < length(inds)) { + i <- i + 1 + ind <- inds[i] + + muutokset_diffInCounts <- laskeMuutokset( + ind, rows, data, adjprior, priorTerm + ) + muutokset <- muutokset_diffInCounts$muutokset + diffInCounts <- muutokset_diffInCounts$diffInCounts + + muutokset[pop] <- -1e50 # Varmasti ei suurin!!! + maxMuutos <- max_MATLAB(muutokset)[[1]] + i2 <- max_MATLAB(muutokset)[[2]] + updateGlobalVariables( + ind, i2, diffInCounts, adjprior, priorTerm + ) + + totalMuutos <- totalMuutos+maxMuutos + logml <- logml + maxMuutos + if (round == 6) { + # Lopetetaan heti kun muutos on positiivinen. + if (totalMuutos > 1e-5) { + i <- length(inds) + } + } + } + + if (totalMuutos > 1e-5) { + kokeiltu <- zeros(nRoundTypes, 1) + muutettu <- 1 + if (muutoksia == 0) { + muutoksia <- 1 # Ulompi kirjanpito. + if (dispText) { + if (round == 5) { + print('Action 5') + } else { + print('Action 6') + } + } + } + if (logml > worstLogml) { + partitionSummary_added <- addToSummary( + logml, partitionSummary, worstIndex + ) + partitionSummary <- partitionSummary_added$partitionSummary + added <- partitionSummary_added$added + if (added==1) { + worstLogml <- min_MATLAB(partitionSummary[, 2])[[1]] + worstIndex <- min_MATLAB(partitionSummary[, 2])[[2]] + } + } + } else { + # Miss��n vaiheessa tila ei parantunut. + # Perutaan kaikki muutokset. + PARTITION <- partition + SUMCOUNTS <- sumcounts + POP_LOGML <- poplogml + COUNTS <- counts + logml <- logml - totalMuutos + LOGDIFF <- logdiff + kokeiltu[round] <- 1 + } + } + rm(partition, sumcounts, counts, poplogml) + + } else if (round == 7) { + emptyPop <- findEmptyPop(npops) + j <- 0 + pops <- sample(npops) + muutoksiaNyt <- 0 + if (emptyPop == -1) { + j <- npops + } + while (j < npops) { + j <- j + 1 + pop <- pops[j] + inds2 <- find(PARTITION == pop) + ninds2 <- length(inds2) + if (ninds2 > 5) { + partition <- PARTITION + sumcounts <- SUMCOUNTS + counts <- COUNTS + poplogml <- POP_LOGML + logdiff <- LOGDIFF + + dist2 <- laskeOsaDist(inds2, dist, ninds); + Z2 <- linkage(t(dist2)) + T2 <- cluster_own(Z2, 2) + muuttuvat <- inds2[find(T2 == 1)] + + muutokset <- laskeMuutokset3( + T2, inds2, rows, data, adjprior, priorTerm, pop + ) + totalMuutos <- muutokset(1, emptyPop) + + rivit <- list() + for (i in 1:length(muuttuvat)) { + ind <- muuttuvat[i] + lisa <- rows[ind, 1]:rows[ind, 2] + rivit <- c(rivit, lisa) + } + diffInCounts <- computeDiffInCounts( + rivit, size(COUNTS, 1), size(COUNTS, 2), data + ) + + updateGlobalVariables3( + muuttuvat, diffInCounts, adjprior, priorTerm, + emptyPop + ) + + muutettu <- 1 + while (muutettu == 1) { + muutettu <- 0 + # Siirret��n yksil�it� populaatioiden v�lill� + muutokset <- laskeMuutokset5( + inds2, rows, data, adjprior, priorTerm, + pop, emptyPop + ) + + maxMuutos <- indeksi <- max_MATLAB(muutokset) + + muuttuva <- inds2(indeksi) + if (PARTITION(muuttuva) == pop) { + i2 <- emptyPop + } else { + i2 <- pop + } + + if (maxMuutos > 1e-5) { + rivit <- rows[muuttuva, 1]:rows[muuttuva, 2] + diffInCounts <- computeDiffInCounts( + rivit, size(COUNTS, 1), size(COUNTS, 2), + data + ) + updateGlobalVariables3( + muuttuva,diffInCounts, adjprior, + priorTerm, i2 + ) + muutettu <- 1 + totalMuutos <- totalMuutos + maxMuutos + } + } + if (totalMuutos > 1e-5) { + muutoksia <- 1 + logml <- logml + totalMuutos + if (logml > worstLogml) { + partitionSummary_added = addToSummary( + logml, partitionSummary, worstIndex + ) + partitionSummary_added <- partitionSummary_added$partitionSummary + added <- partitionSummary_added$added + if (added == 1) { + worstLogml <- min_MATLAB(partitionSummary[, 2])[[1]] + worstIndex <- min_MATLAB(partitionSummary[, 2])[[2]] + } + } + if (muutoksiaNyt == 0) { + if (dispText) { + print('Action 7') + } + muutoksiaNyt <- 1 + } + kokeiltu <- zeros(nRoundTypes, 1) + j <- npops + } else { + # palutetaan vanhat arvot + PARTITION <- partition + SUMCOUNTS <- sumcounts + COUNTS <- counts + POP_LOGML <- poplogml + LOGDIFF <- logdiff + } + } + + } + + if (muutoksiaNyt == 0) { + kokeiltu[round] <- 1 + } + + } + } + + if (muutoksia == 0) { + if (vaihe <= 4) { + vaihe <= vaihe + 1 + } else if (vaihe == 5) { + ready <- 1 + } + } else { + muutoksia <- 0 + } + + if (ready == 0) { + if (vaihe == 1) { + roundTypes <- c(1) + } else if (vaihe == 2) { + roundTypes <- c(2, 1) + } else if (vaihe == 3) { + roundTypes <- c(5, 5, 7) + } else if (vaihe == 4) { + roundTypes = c(4, 3, 1) + } else if (vaihe == 5) { + roundTypes <- c(6, 7, 2, 3, 4, 1) + } + } + } + + # TALLENNETAAN + + npops <- poistaTyhjatPopulaatiot(npops) + POP_LOGML <- computePopulationLogml(1:npops, adjprior, priorTerm) + if (dispText) { + print(paste('Found partition with', num2str(npops), 'populations.')) + print(paste('Log(ml) =', as.character(logml))) + print(' ') + } + + if (logml > logmlBest) { + # P�ivitet��n parasta l�ydetty� partitiota. + logmlBest <- logml + npopsBest <- npops + partitionBest <- PARTITION + countsBest <- COUNTS + sumCountsBest <- SUMCOUNTS + pop_logmlBest <- POP_LOGML + logdiffbest <- LOGDIFF + } + } + return( + list(logml = logml, npops = npops, partitionSummary = partitionSummary) + ) +} + +# logml = logmlBest; +# npops = npopsBest; +# PARTITION = partitionBest; +# COUNTS = countsBest; +# SUMCOUNTS = sumCountsBest; +# POP_LOGML = pop_logmlBest; +# LOGDIFF = logdiffbest; + +# %-------------------------------------------------------------------------- + +# function clearGlobalVars + +# global COUNTS; COUNTS = []; +# global SUMCOUNTS; SUMCOUNTS = []; +# global PARTITION; PARTITION = []; +# global POP_LOGML; POP_LOGML = []; +# global LOGDIFF; LOGDIFF = []; + +# %-------------------------------------------------------------------------- + + +# function Z = linkage(Y, method) +# [k, n] = size(Y); +# m = (1+sqrt(1+8*n))/2; +# if k ~= 1 | m ~= fix(m) +# error('The first input has to match the output of the PDIST function in size.'); +# end +# if nargin == 1 % set default switch to be 'co' +# method = 'co'; +# end +# method = lower(method(1:2)); % simplify the switch string. +# monotonic = 1; +# Z = zeros(m-1,3); % allocate the output matrix. +# N = zeros(1,2*m-1); +# N(1:m) = 1; +# n = m; % since m is changing, we need to save m in n. +# R = 1:n; +# for s = 1:(n-1) +# X = Y; +# [v, k] = min(X); +# i = floor(m+1/2-sqrt(m^2-m+1/4-2*(k-1))); +# j = k - (i-1)*(m-i/2)+i; +# Z(s,:) = [R(i) R(j) v]; % update one more row to the output matrix A +# I1 = 1:(i-1); I2 = (i+1):(j-1); I3 = (j+1):m; % these are temp variables. +# U = [I1 I2 I3]; +# I = [I1.*(m-(I1+1)/2)-m+i i*(m-(i+1)/2)-m+I2 i*(m-(i+1)/2)-m+I3]; +# J = [I1.*(m-(I1+1)/2)-m+j I2.*(m-(I2+1)/2)-m+j j*(m-(j+1)/2)-m+I3]; + +# switch method +# case 'si' %single linkage +# Y(I) = min(Y(I),Y(J)); +# case 'av' % average linkage +# Y(I) = Y(I) + Y(J); +# case 'co' %complete linkage +# Y(I) = max(Y(I),Y(J)); +# case 'ce' % centroid linkage +# K = N(R(i))+N(R(j)); +# Y(I) = (N(R(i)).*Y(I)+N(R(j)).*Y(J)-(N(R(i)).*N(R(j))*v^2)./K)./K; +# case 'wa' +# Y(I) = ((N(R(U))+N(R(i))).*Y(I) + (N(R(U))+N(R(j))).*Y(J) - ... +# N(R(U))*v)./(N(R(i))+N(R(j))+N(R(U))); +# end +# J = [J i*(m-(i+1)/2)-m+j]; +# Y(J) = []; % no need for the cluster information about j. + +# % update m, N, R +# m = m-1; +# N(n+s) = N(R(i)) + N(R(j)); +# R(i) = n+s; +# R(j:(n-1))=R((j+1):n); +# end + + +# %----------------------------------------------------------------------- + + +# function [sumcounts, counts, logml] = ... +# initialPopCounts(data, npops, rows, noalle, adjprior) + +# nloci=size(data,2); +# counts = zeros(max(noalle),nloci,npops); +# sumcounts = zeros(npops,nloci); + +# for i=1:npops +# for j=1:nloci +# i_rivit = rows(i,1):rows(i,2); +# havainnotLokuksessa = find(data(i_rivit,j)>=0); +# sumcounts(i,j) = length(havainnotLokuksessa); +# for k=1:noalle(j) +# alleleCode = k; +# N_ijk = length(find(data(i_rivit,j)==alleleCode)); +# counts(k,j,i) = N_ijk; +# end +# end +# end + +# logml = laskeLoggis(counts, sumcounts, adjprior); + +# %----------------------------------------------------------------------- + + +# function loggis = laskeLoggis(counts, sumcounts, adjprior) +# npops = size(counts,3); + +# logml2 = sum(sum(sum(gammaln(counts+repmat(adjprior,[1 1 npops]))))) ... +# - npops*sum(sum(gammaln(adjprior))) - ... +# sum(sum(gammaln(1+sumcounts))); +# loggis = logml2; + + +# %------------------------------------------------------------------------------------ + + +# function popLogml = computePopulationLogml(pops, adjprior, priorTerm) +# % Palauttaa length(pops)*1 taulukon, jossa on laskettu korikohtaiset +# % logml:t koreille, jotka on m��ritelty pops-muuttujalla. + +# global COUNTS; +# global SUMCOUNTS; +# x = size(COUNTS,1); +# y = size(COUNTS,2); +# z = length(pops); + +# popLogml = ... +# squeeze(sum(sum(reshape(... +# gammaln(repmat(adjprior,[1 1 length(pops)]) + COUNTS(:,:,pops)) ... +# ,[x y z]),1),2)) - sum(gammaln(1+SUMCOUNTS(pops,:)),2) - priorTerm; +# %-------------------------------------------------------------------------- + + +# function [muutokset, diffInCounts] = ... +# laskeMuutokset(ind, globalRows, data, adjprior, priorTerm) +# % Palauttaa npops*1 taulun, jossa i:s alkio kertoo, mik� olisi +# % muutos logml:ss�, mik�li yksil� ind siirret��n koriin i. +# % diffInCounts on poistettava COUNTS:in siivusta i1 ja lis�tt�v� +# % COUNTS:in siivuun i2, mik�li muutos toteutetaan. +# % +# % Lis�ys 25.9.2007: +# % Otettu k�ytt��n globaali muuttuja LOGDIFF, johon on tallennettu muutokset +# % logml:ss� siirrett�ess� yksil�it� toisiin populaatioihin. + +# global COUNTS; global SUMCOUNTS; +# global PARTITION; global POP_LOGML; +# global LOGDIFF; + +# npops = size(COUNTS,3); +# muutokset = LOGDIFF(ind,:); + +# i1 = PARTITION(ind); +# i1_logml = POP_LOGML(i1); +# muutokset(i1) = 0; + +# rows = globalRows(ind,1):globalRows(ind,2); +# diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data); +# diffInSumCounts = sum(diffInCounts); + +# COUNTS(:,:,i1) = COUNTS(:,:,i1)-diffInCounts; +# SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)-diffInSumCounts; +# new_i1_logml = computePopulationLogml(i1, adjprior, priorTerm); +# COUNTS(:,:,i1) = COUNTS(:,:,i1)+diffInCounts; +# SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)+diffInSumCounts; + +# i2 = find(muutokset==-Inf); % Etsit��n populaatiot jotka muuttuneet viime kerran j�lkeen. +# i2 = setdiff(i2,i1); +# i2_logml = POP_LOGML(i2); + +# ni2 = length(i2); + +# COUNTS(:,:,i2) = COUNTS(:,:,i2)+repmat(diffInCounts, [1 1 ni2]); +# SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)+repmat(diffInSumCounts,[ni2 1]); +# new_i2_logml = computePopulationLogml(i2, adjprior, priorTerm); +# COUNTS(:,:,i2) = COUNTS(:,:,i2)-repmat(diffInCounts, [1 1 ni2]); +# SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)-repmat(diffInSumCounts,[ni2 1]); + +# muutokset(i2) = new_i1_logml - i1_logml ... +# + new_i2_logml - i2_logml; +# LOGDIFF(ind,:) = muutokset; + + +# %---------------------------------------------------------------------- + + +# function diffInCounts = computeDiffInCounts(rows, max_noalle, nloci, data) +# % Muodostaa max_noalle*nloci taulukon, jossa on niiden alleelien +# % lukum��r�t (vastaavasti kuin COUNTS:issa), jotka ovat data:n +# % riveill� rows. rows pit�� olla vaakavektori. + +# diffInCounts = zeros(max_noalle, nloci); +# for i=rows +# row = data(i,:); +# notEmpty = find(row>=0); + +# if length(notEmpty)>0 +# diffInCounts(row(notEmpty) + (notEmpty-1)*max_noalle) = ... +# diffInCounts(row(notEmpty) + (notEmpty-1)*max_noalle) + 1; +# end +# end + +# %------------------------------------------------------------------------ + + +# %------------------------------------------------------------------------------------- + + +# function updateGlobalVariables(ind, i2, diffInCounts, ... +# adjprior, priorTerm) +# % Suorittaa globaalien muuttujien muutokset, kun yksil� ind +# % on siirret��n koriin i2. + +# global PARTITION; +# global COUNTS; +# global SUMCOUNTS; +# global POP_LOGML; +# global LOGDIFF; + +# i1 = PARTITION(ind); +# PARTITION(ind)=i2; + +# COUNTS(:,:,i1) = COUNTS(:,:,i1) - diffInCounts; +# COUNTS(:,:,i2) = COUNTS(:,:,i2) + diffInCounts; +# SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:) - sum(diffInCounts); +# SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:) + sum(diffInCounts); + +# POP_LOGML([i1 i2]) = computePopulationLogml([i1 i2], adjprior, priorTerm); + +# LOGDIFF(:,[i1 i2]) = -Inf; +# inx = [find(PARTITION==i1); find(PARTITION==i2)]; +# LOGDIFF(inx,:) = -Inf; + + +# %-------------------------------------------------------------------------- +# %-- + +# %------------------------------------------------------------------------------------ + + +# function [muutokset, diffInCounts] = laskeMuutokset2( ... +# i1, globalRows, data, adjprior, priorTerm); +# % Palauttaa npops*1 taulun, jossa i:s alkio kertoo, mik� olisi +# % muutos logml:ss�, mik�li korin i1 kaikki yksil�t siirret��n +# % koriin i. + +# global COUNTS; global SUMCOUNTS; +# global PARTITION; global POP_LOGML; +# npops = size(COUNTS,3); +# muutokset = zeros(npops,1); + +# i1_logml = POP_LOGML(i1); + +# inds = find(PARTITION==i1); +# ninds = length(inds); + +# if ninds==0 +# diffInCounts = zeros(size(COUNTS,1), size(COUNTS,2)); +# return; +# end + +# rows = []; +# for i = 1:ninds +# ind = inds(i); +# lisa = globalRows(ind,1):globalRows(ind,2); +# rows = [rows; lisa']; +# %rows = [rows; globalRows{ind}']; +# end + +# diffInCounts = computeDiffInCounts(rows', size(COUNTS,1), size(COUNTS,2), data); +# diffInSumCounts = sum(diffInCounts); + +# COUNTS(:,:,i1) = COUNTS(:,:,i1)-diffInCounts; +# SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)-diffInSumCounts; +# new_i1_logml = computePopulationLogml(i1, adjprior, priorTerm); +# COUNTS(:,:,i1) = COUNTS(:,:,i1)+diffInCounts; +# SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)+diffInSumCounts; + +# i2 = [1:i1-1 , i1+1:npops]; +# i2_logml = POP_LOGML(i2); + +# COUNTS(:,:,i2) = COUNTS(:,:,i2)+repmat(diffInCounts, [1 1 npops-1]); +# SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)+repmat(diffInSumCounts,[npops-1 1]); +# new_i2_logml = computePopulationLogml(i2, adjprior, priorTerm); +# COUNTS(:,:,i2) = COUNTS(:,:,i2)-repmat(diffInCounts, [1 1 npops-1]); +# SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)-repmat(diffInSumCounts,[npops-1 1]); + +# muutokset(i2) = new_i1_logml - i1_logml ... +# + new_i2_logml - i2_logml; + + +# %--------------------------------------------------------------------------------- + + +# function updateGlobalVariables2( ... +# i1, i2, diffInCounts, adjprior, priorTerm); +# % Suorittaa globaalien muuttujien muutokset, kun kaikki +# % korissa i1 olevat yksil�t siirret��n koriin i2. + +# global PARTITION; +# global COUNTS; +# global SUMCOUNTS; +# global POP_LOGML; +# global LOGDIFF; + +# inds = find(PARTITION==i1); +# PARTITION(inds) = i2; + +# COUNTS(:,:,i1) = COUNTS(:,:,i1) - diffInCounts; +# COUNTS(:,:,i2) = COUNTS(:,:,i2) + diffInCounts; +# SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:) - sum(diffInCounts); +# SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:) + sum(diffInCounts); + +# POP_LOGML(i1) = 0; +# POP_LOGML(i2) = computePopulationLogml(i2, adjprior, priorTerm); + +# LOGDIFF(:,[i1 i2]) = -Inf; +# inx = [find(PARTITION==i1); find(PARTITION==i2)]; +# LOGDIFF(inx,:) = -Inf; + + +# %-------------------------------------------------------------------------- +# %---- + +# function muutokset = laskeMuutokset3(T2, inds2, globalRows, ... +# data, adjprior, priorTerm, i1) +# % Palauttaa length(unique(T2))*npops taulun, jossa (i,j):s alkio +# % kertoo, mik� olisi muutos logml:ss�, jos populaation i1 osapopulaatio +# % inds2(find(T2==i)) siirret��n koriin j. + +# global COUNTS; global SUMCOUNTS; +# global PARTITION; global POP_LOGML; +# npops = size(COUNTS,3); +# npops2 = length(unique(T2)); +# muutokset = zeros(npops2, npops); + +# i1_logml = POP_LOGML(i1); +# for pop2 = 1:npops2 +# inds = inds2(find(T2==pop2)); +# ninds = length(inds); +# if ninds>0 +# rows = []; +# for i = 1:ninds +# ind = inds(i); +# lisa = globalRows(ind,1):globalRows(ind,2); +# rows = [rows; lisa']; +# %rows = [rows; globalRows{ind}']; +# end +# diffInCounts = computeDiffInCounts(rows', size(COUNTS,1), size(COUNTS,2), data); +# diffInSumCounts = sum(diffInCounts); + +# COUNTS(:,:,i1) = COUNTS(:,:,i1)-diffInCounts; +# SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)-diffInSumCounts; +# new_i1_logml = computePopulationLogml(i1, adjprior, priorTerm); +# COUNTS(:,:,i1) = COUNTS(:,:,i1)+diffInCounts; +# SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)+diffInSumCounts; + +# i2 = [1:i1-1 , i1+1:npops]; +# i2_logml = POP_LOGML(i2)'; + +# COUNTS(:,:,i2) = COUNTS(:,:,i2)+repmat(diffInCounts, [1 1 npops-1]); +# SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)+repmat(diffInSumCounts,[npops-1 1]); +# new_i2_logml = computePopulationLogml(i2, adjprior, priorTerm)'; +# COUNTS(:,:,i2) = COUNTS(:,:,i2)-repmat(diffInCounts, [1 1 npops-1]); +# SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)-repmat(diffInSumCounts,[npops-1 1]); + +# muutokset(pop2,i2) = new_i1_logml - i1_logml ... +# + new_i2_logml - i2_logml; +# end +# end + +# %------------------------------------------------------------------------------------ + +# function muutokset = laskeMuutokset5(inds, globalRows, data, adjprior, ... +# priorTerm, i1, i2) + +# % Palauttaa length(inds)*1 taulun, jossa i:s alkio kertoo, mik� olisi +# % muutos logml:ss�, mik�li yksil� i vaihtaisi koria i1:n ja i2:n v�lill�. + +# global COUNTS; global SUMCOUNTS; +# global PARTITION; global POP_LOGML; + +# ninds = length(inds); +# muutokset = zeros(ninds,1); + +# i1_logml = POP_LOGML(i1); +# i2_logml = POP_LOGML(i2); + +# for i = 1:ninds +# ind = inds(i); +# if PARTITION(ind)==i1 +# pop1 = i1; %mist� +# pop2 = i2; %mihin +# else +# pop1 = i2; +# pop2 = i1; +# end +# rows = globalRows(ind,1):globalRows(ind,2); +# diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data); +# diffInSumCounts = sum(diffInCounts); + +# COUNTS(:,:,pop1) = COUNTS(:,:,pop1)-diffInCounts; +# SUMCOUNTS(pop1,:) = SUMCOUNTS(pop1,:)-diffInSumCounts; +# COUNTS(:,:,pop2) = COUNTS(:,:,pop2)+diffInCounts; +# SUMCOUNTS(pop2,:) = SUMCOUNTS(pop2,:)+diffInSumCounts; + +# new_logmls = computePopulationLogml([i1 i2], adjprior, priorTerm); +# muutokset(i) = sum(new_logmls); + +# COUNTS(:,:,pop1) = COUNTS(:,:,pop1)+diffInCounts; +# SUMCOUNTS(pop1,:) = SUMCOUNTS(pop1,:)+diffInSumCounts; +# COUNTS(:,:,pop2) = COUNTS(:,:,pop2)-diffInCounts; +# SUMCOUNTS(pop2,:) = SUMCOUNTS(pop2,:)-diffInSumCounts; +# end + +# muutokset = muutokset - i1_logml - i2_logml; + +# %------------------------------------------------------------------------------------ + + +# function updateGlobalVariables3(muuttuvat, diffInCounts, ... +# adjprior, priorTerm, i2); +# % Suorittaa globaalien muuttujien p�ivitykset, kun yksil�t 'muuttuvat' +# % siirret��n koriin i2. Ennen siirtoa yksil�iden on kuuluttava samaan +# % koriin. + +# global PARTITION; +# global COUNTS; +# global SUMCOUNTS; +# global POP_LOGML; +# global LOGDIFF; + +# i1 = PARTITION(muuttuvat(1)); +# PARTITION(muuttuvat) = i2; + +# COUNTS(:,:,i1) = COUNTS(:,:,i1) - diffInCounts; +# COUNTS(:,:,i2) = COUNTS(:,:,i2) + diffInCounts; +# SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:) - sum(diffInCounts); +# SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:) + sum(diffInCounts); + +# POP_LOGML([i1 i2]) = computePopulationLogml([i1 i2], adjprior, priorTerm); + +# LOGDIFF(:,[i1 i2]) = -Inf; +# inx = [find(PARTITION==i1); find(PARTITION==i2)]; +# LOGDIFF(inx,:) = -Inf; + + +# %---------------------------------------------------------------------------- + + +# function dist2 = laskeOsaDist(inds2, dist, ninds) +# % Muodostaa dist vektorista osavektorin, joka sis�lt�� yksil�iden inds2 +# % v�liset et�isyydet. ninds=kaikkien yksil�iden lukum��r�. + +# ninds2 = length(inds2); +# apu = zeros(nchoosek(ninds2,2),2); +# rivi = 1; +# for i=1:ninds2-1 +# for j=i+1:ninds2 +# apu(rivi, 1) = inds2(i); +# apu(rivi, 2) = inds2(j); +# rivi = rivi+1; +# end +# end +# apu = (apu(:,1)-1).*ninds - apu(:,1) ./ 2 .* (apu(:,1)-1) + (apu(:,2)-apu(:,1)); +# dist2 = dist(apu); + + +# %----------------------------------------------------------------------------------- + + +# function npops = poistaTyhjatPopulaatiot(npops) +# % Poistaa tyhjentyneet populaatiot COUNTS:ista ja +# % SUMCOUNTS:ista. P�ivitt�� npops:in ja PARTITION:in. + +# global COUNTS; +# global SUMCOUNTS; +# global PARTITION; +# global LOGDIFF; + +# notEmpty = find(any(SUMCOUNTS,2)); +# COUNTS = COUNTS(:,:,notEmpty); +# SUMCOUNTS = SUMCOUNTS(notEmpty,:); +# LOGDIFF = LOGDIFF(:,notEmpty); + +# for n=1:length(notEmpty) +# apu = find(PARTITION==notEmpty(n)); +# PARTITION(apu)=n; +# end +# npops = length(notEmpty); + + +# %--------------------------------------------------------------- + + +# function dispLine; +# disp('---------------------------------------------------'); + +# %-------------------------------------------------------------- + +# function num2 = omaRound(num) +# % Py�rist�� luvun num 1 desimaalin tarkkuuteen +# num = num*10; +# num = round(num); +# num2 = num/10; + +# %--------------------------------------------------------- +# function mjono = logml2String(logml) +# % Palauttaa logml:n string-esityksen. + +# mjono = ' '; +# if abs(logml)<10000 +# %Ei tarvita e-muotoa +# mjono(7) = palautaYks(abs(logml),-1); +# mjono(6) = '.'; +# mjono(5) = palautaYks(abs(logml),0); +# mjono(4) = palautaYks(abs(logml),1); +# mjono(3) = palautaYks(abs(logml),2); +# mjono(2) = palautaYks(abs(logml),3); +# pointer = 2; +# while mjono(pointer)=='0' & pointer<7 +# mjono(pointer) = ' '; +# pointer=pointer+1; +# end +# if logml<0 +# mjono(pointer-1) = '-'; +# end +# else +# suurinYks = 4; +# while abs(logml)/(10^(suurinYks+1)) >= 1 +# suurinYks = suurinYks+1; +# end +# if suurinYks<10 +# mjono(7) = num2str(suurinYks); +# mjono(6) = 'e'; +# mjono(5) = palautaYks(abs(logml),suurinYks-1); +# mjono(4) = '.'; +# mjono(3) = palautaYks(abs(logml),suurinYks); +# if logml<0 +# mjono(2) = '-'; +# end +# elseif suurinYks>=10 +# mjono(6:7) = num2str(suurinYks); +# mjono(5) = 'e'; +# mjono(4) = palautaYks(abs(logml),suurinYks-1); +# mjono(3) = '.'; +# mjono(2) = palautaYks(abs(logml),suurinYks); +# if logml<0 +# mjono(1) = '-'; +# end +# end +# end + +# function digit = palautaYks(num,yks) +# % palauttaa luvun num 10^yks termin kertoimen +# % string:in� +# % yks t�ytyy olla kokonaisluku, joka on +# % v�hint��n -1:n suuruinen. Pienemmill� +# % luvuilla tapahtuu jokin py�ristysvirhe. + +# if yks>=0 +# digit = rem(num, 10^(yks+1)); +# digit = floor(digit/(10^yks)); +# else +# digit = num*10; +# digit = floor(rem(digit,10)); +# end +# digit = num2str(digit); + + +# function mjono = kldiv2str(div) +# mjono = ' '; +# if abs(div)<100 +# %Ei tarvita e-muotoa +# mjono(6) = num2str(rem(floor(div*1000),10)); +# mjono(5) = num2str(rem(floor(div*100),10)); +# mjono(4) = num2str(rem(floor(div*10),10)); +# mjono(3) = '.'; +# mjono(2) = num2str(rem(floor(div),10)); +# arvo = rem(floor(div/10),10); +# if arvo>0 +# mjono(1) = num2str(arvo); +# end + +# else +# suurinYks = floor(log10(div)); +# mjono(6) = num2str(suurinYks); +# mjono(5) = 'e'; +# mjono(4) = palautaYks(abs(div),suurinYks-1); +# mjono(3) = '.'; +# mjono(2) = palautaYks(abs(div),suurinYks); +# end + + +# %-------------------------------------------------------------------------- +# %Seuraavat kolme funktiota liittyvat alkupartition muodostamiseen. + +# function initial_partition=admixture_initialization(data_matrix,nclusters, Z) + +# size_data=size(data_matrix); +# nloci=size_data(2)-1; +# n=max(data_matrix(:,end)); +# T=cluster_own(Z,nclusters); +# initial_partition=zeros(size_data(1),1); +# for i=1:n +# kori=T(i); +# here=find(data_matrix(:,end)==i); +# for j=1:length(here) +# initial_partition(here(j),1)=kori; +# end +# end + +# function T = cluster_own(Z,nclust) +# true=logical(1); +# false=logical(0); +# maxclust = nclust; +# % Start of algorithm +# m = size(Z,1)+1; +# T = zeros(m,1); +# % maximum number of clusters based on inconsistency +# if m <= maxclust +# T = (1:m)'; +# elseif maxclust==1 +# T = ones(m,1); +# else +# clsnum = 1; +# for k = (m-maxclust+1):(m-1) +# i = Z(k,1); % left tree +# if i <= m % original node, no leafs +# T(i) = clsnum; +# clsnum = clsnum + 1; +# elseif i < (2*m-maxclust+1) % created before cutoff, search down the tree +# T = clusternum(Z, T, i-m, clsnum); +# clsnum = clsnum + 1; +# end +# i = Z(k,2); % right tree +# if i <= m % original node, no leafs +# T(i) = clsnum; +# clsnum = clsnum + 1; +# elseif i < (2*m-maxclust+1) % created before cutoff, search down the tree +# T = clusternum(Z, T, i-m, clsnum); +# clsnum = clsnum + 1; +# end +# end +# end + +# function T = clusternum(X, T, k, c) +# m = size(X,1)+1; +# while(~isempty(k)) +# % Get the children of nodes at this level +# children = X(k,1:2); +# children = children(:); + +# % Assign this node number to leaf children +# t = (children<=m); +# T(children(t)) = c; + +# % Move to next level +# k = children(~t) - m; +# end + +# %-------------------------------------------------------------------------- + +# function [sumcounts, counts, logml] = ... +# initialCounts(partition, data, npops, rows, noalle, adjprior) + +# nloci=size(data,2); +# ninds = size(rows, 1); + +# koot = rows(:,1) - rows(:,2) + 1; +# maxSize = max(koot); + +# counts = zeros(max(noalle),nloci,npops); +# sumcounts = zeros(npops,nloci); +# for i=1:npops +# for j=1:nloci +# havainnotLokuksessa = find(partition==i & data(:,j)>=0); +# sumcounts(i,j) = length(havainnotLokuksessa); +# for k=1:noalle(j) +# alleleCode = k; +# N_ijk = length(find(data(havainnotLokuksessa,j)==alleleCode)); +# counts(k,j,i) = N_ijk; +# end +# end +# end + +# %initializeGammaln(ninds, maxSize, max(noalle)); + +# logml = laskeLoggis(counts, sumcounts, adjprior); + +# %-------------------------------------------------------------------------- + + +# function [partitionSummary, added] = addToSummary(logml, partitionSummary, worstIndex) +# % Tiedet��n, ett� annettu logml on isompi kuin huonoin arvo +# % partitionSummary taulukossa. Jos partitionSummary:ss� ei viel� ole +# % annettua logml arvoa, niin lis�t��n worstIndex:in kohtaan uusi logml ja +# % nykyist� partitiota vastaava nclusters:in arvo. Muutoin ei tehd� mit��n. + +# apu = find(abs(partitionSummary(:,2)-logml)<1e-5); +# if isempty(apu) +# % Nyt l�ydetty partitio ei ole viel� kirjattuna summaryyn. +# global PARTITION; +# npops = length(unique(PARTITION)); +# partitionSummary(worstIndex,1) = npops; +# partitionSummary(worstIndex,2) = logml; +# added = 1; +# else +# added = 0; +# end + +# %-------------------------------------------------------------------------- + +# function inds = returnInOrder(inds, pop, globalRows, data, ... +# adjprior, priorTerm) +# % Palauttaa yksil�t j�rjestyksess� siten, ett� ensimm�isen� on +# % se, jonka poistaminen populaatiosta pop nostaisi logml:n +# % arvoa eniten. + +# global COUNTS; global SUMCOUNTS; +# ninds = length(inds); +# apuTaulu = [inds, zeros(ninds,1)]; + +# for i=1:ninds +# ind =inds(i); +# rows = globalRows(i,1):globalRows(i,2); +# diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data); +# diffInSumCounts = sum(diffInCounts); + +# COUNTS(:,:,pop) = COUNTS(:,:,pop)-diffInCounts; +# SUMCOUNTS(pop,:) = SUMCOUNTS(pop,:)-diffInSumCounts; +# apuTaulu(i, 2) = computePopulationLogml(pop, adjprior, priorTerm); +# COUNTS(:,:,pop) = COUNTS(:,:,pop)+diffInCounts; +# SUMCOUNTS(pop,:) = SUMCOUNTS(pop,:)+diffInSumCounts; +# end +# apuTaulu = sortrows(apuTaulu,2); +# inds = apuTaulu(ninds:-1:1,1); + +# %-------------------------------------------------------------------------- + +# function [emptyPop, pops] = findEmptyPop(npops) +# % Palauttaa ensimm�isen tyhj�n populaation indeksin. Jos tyhji� +# % populaatioita ei ole, palauttaa -1:n. + +# global PARTITION; +# pops = unique(PARTITION)'; +# if (length(pops) ==npops) +# emptyPop = -1; +# else +# popDiff = diff([0 pops npops+1]); +# emptyPop = min(find(popDiff > 1)); +# end From f886feb3a8bdc205542364019d32092d4306797f Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Mon, 19 Oct 2020 13:44:18 +0200 Subject: [PATCH 22/71] Translated admixture_initialization --- R/admixture_initialization.R | 17 ++++++++++++++++ R/clearGlobalVars.R | 2 +- R/greedyMix.R | 36 ++++++++++++++++++++++----------- R/indMix.R | 16 --------------- R/min_MATLAB.R | 4 +++- R/writeMixtureInfo.R | 9 +-------- man/min_MATLAB.Rd | 10 ++++++++- man/writeMixtureInfo.Rd | 14 +------------ tests/testthat/test-greedyMix.R | 6 +----- 9 files changed, 57 insertions(+), 57 deletions(-) create mode 100644 R/admixture_initialization.R diff --git a/R/admixture_initialization.R b/R/admixture_initialization.R new file mode 100644 index 0000000..bc8166c --- /dev/null +++ b/R/admixture_initialization.R @@ -0,0 +1,17 @@ +#'@title Seuraavat kolme funktiota liittyvat alkupartition muodostamiseen. + +admixture_initialization <- function (data_matrix, nclusters, Z) { + size_data <- size(data_matrix) + nloci <- size_data[2] - 1 + n <- max(data_matrix[, end]) + T <- cluster_own(Z, nclusters) + initial_partition <- zeros(size_data[1], 1) + for (i in 1:n) { + kori <- T[i] + here <- find(data_matrix[,end] == i) + for (j in 1:length(here)) { + initial_partition[here[j], 1] <- kori + } + } + return(initial_partition) +} \ No newline at end of file diff --git a/R/clearGlobalVars.R b/R/clearGlobalVars.R index 1d77d9c..6cddf96 100644 --- a/R/clearGlobalVars.R +++ b/R/clearGlobalVars.R @@ -1,5 +1,5 @@ clearGlobalVars <- function() { - COUNTS <- SUMCOUNTS <- PARTITION <- POP_LOGML <- vector() # placeholders + # COUNTS <- SUMCOUNTS <- PARTITION <- POP_LOGML <- vector() # placeholders COUNTS <<- vector() SUMCOUNTS <<- vector() PARTITION <<- vector() diff --git a/R/greedyMix.R b/R/greedyMix.R index ef91b17..7b0004e 100644 --- a/R/greedyMix.R +++ b/R/greedyMix.R @@ -11,10 +11,11 @@ greedyMix <- function( savePreProcessed = NULL, filePreProcessed = NULL ) { - # ASK: graphical components. Remove? + # ASK: Unclear when fixedk == TRUE. Remove? # check whether fixed k mode is selected # h0 <- findobj('Tag','fixk_menu') # fixedK = get(h0, 'userdata'); + fixedK <- FALSE # if fixedK # if ~(fixKWarning == 1) % call function fixKWarning @@ -22,9 +23,11 @@ greedyMix <- function( # end # end + # ASK: ditto # % check whether partition compare mode is selected # h1 = findobj('Tag','partitioncompare_menu'); # partitionCompare = get(h1, 'userdata'); + partitionCompare <- FALSE if (is(tietue, "list") | is(tietue, "character")) { # ---------------------------------------------------------------------- @@ -244,13 +247,15 @@ greedyMix <- function( } # ========================================================================== - # Declaring global variables + # Declaring global variables and changing environment of children functions # ========================================================================== PARTITION <- vector() COUNTS <- vector() SUMCOUNTS <- vector() POP_LOGML <- vector() - clearGlobalVars <- vector() + clearGlobalVars() + + environment(writeMixtureInfo) <- environment() # ========================================================================== c <- list() c$data <- data @@ -265,6 +270,7 @@ greedyMix <- function( ekat <- t(seq(1, ninds, rowsFromInd) * rowsFromInd) c$rows <- c(ekat, ekat + rowsFromInd - 1) + # ASK remove? # partition compare # if (!is.null(partitionCompare)) { # nsamplingunits <- size(c$rows, 1) @@ -291,17 +297,22 @@ greedyMix <- function( # } # # return the logml result # partitionCompare$logmls <- partitionLogml - # # set(h1, 'userdata', partitionCompare) # ASK remove? + # # set(h1, 'userdata', partitionCompare) # return() # } - # ASK remove (graphical part)? - # if (fixedK) { - # #logml_npops_partitionSummary <- indMix_fixK(c) # ASK translate? - # } else { - # #logml_npops_partitionSummary <- indMix(c) # ASK translate? - # } - # if (logml_npops_partitionSummary$logml == 1) return() + if (fixedK) { + # logml_npops_partitionSummary <- indMix_fixK(c) # TODO: translate + # logml <- logml_npops_partitionSummary$logml + # npops <- logml_npops_partitionSummary$npops + # partitionSummary <- logml_npops_partitionSummary$partitionSummary + } else { + logml_npops_partitionSummary <- indMix(c) # TODO: translate + logml <- logml_npops_partitionSummary$logml + npops <- logml_npops_partitionSummary$npops + partitionSummary <- logml_npops_partitionSummary$partitionSummary + } + if (logml_npops_partitionSummary$logml == 1) return() data <- data[, seq_len(ncol(data) - 1)] @@ -310,8 +321,9 @@ greedyMix <- function( # inp = get(h0,'String'); # h0 = findobj('Tag','filename2_text') # outp = get(h0,'String'); + inp <- vector() + outp <- vector() - browser() # TEMP changesInLogml <- writeMixtureInfo( logml, rowsFromInd, data, adjprior, priorTerm, outp, inp, popnames, fixedK diff --git a/R/indMix.R b/R/indMix.R index 1d62ac9..1265423 100644 --- a/R/indMix.R +++ b/R/indMix.R @@ -1157,22 +1157,6 @@ indMix <- function(c, npops, dispText) { # %-------------------------------------------------------------------------- -# %Seuraavat kolme funktiota liittyvat alkupartition muodostamiseen. - -# function initial_partition=admixture_initialization(data_matrix,nclusters, Z) - -# size_data=size(data_matrix); -# nloci=size_data(2)-1; -# n=max(data_matrix(:,end)); -# T=cluster_own(Z,nclusters); -# initial_partition=zeros(size_data(1),1); -# for i=1:n -# kori=T(i); -# here=find(data_matrix(:,end)==i); -# for j=1:length(here) -# initial_partition(here(j),1)=kori; -# end -# end # function T = cluster_own(Z,nclust) # true=logical(1); diff --git a/R/min_MATLAB.R b/R/min_MATLAB.R index af91efd..2c749c6 100644 --- a/R/min_MATLAB.R +++ b/R/min_MATLAB.R @@ -30,4 +30,6 @@ min_MATLAB <- function(X, indices = TRUE) { } else { return(mins) } -} \ No newline at end of file +} + +# TODO: consider using methods instead (maybe for the package) \ No newline at end of file diff --git a/R/writeMixtureInfo.R b/R/writeMixtureInfo.R index ca8efea..ec5120b 100644 --- a/R/writeMixtureInfo.R +++ b/R/writeMixtureInfo.R @@ -10,16 +10,10 @@ #' @param partitionSummary partitionSummary #' @param popnames popnames #' @param fixedK fixedK -#' @param PARTITION PARTITION -#' @param COUNTS COUNTS -#' @param SUMCOUNTS SUMCOUNTS -#' @param LOGDIFF LOGDIFF #' @export writeMixtureInfo <- function( - logml, rowsFromInd, data, adjprior, priorTerm, outPutFile, inputFile, partitionSummary, popnames, fixedK, PARTITION, COUNTS, SUMCOUNTS, - LOGDIFF + logml, rowsFromInd, data, adjprior, priorTerm, outPutFile, inputFile, partitionSummary, popnames, fixedK ) { - changesInLogml <- list() ninds <- size(data, 1) / rowsFromInd npops <- size(COUNTS, 3) @@ -30,7 +24,6 @@ writeMixtureInfo <- function( fid <- load(outPutFile) } else { fid <- -1 - message('Diverting output to baps4_output.baps') # TODO: replace sink with option that will record input and output sink('baps4_output.baps', split=TRUE) # save in text anyway. } diff --git a/man/min_MATLAB.Rd b/man/min_MATLAB.Rd index 9376fed..bd1113e 100644 --- a/man/min_MATLAB.Rd +++ b/man/min_MATLAB.Rd @@ -1,9 +1,11 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/min.R +% Please edit documentation in R/min.R, R/min_MATLAB.R \name{min_MATLAB} \alias{min_MATLAB} \title{Minimum (MATLAB version)} \usage{ +min_MATLAB(X, indices = TRUE) + min_MATLAB(X, indices = TRUE) } \arguments{ @@ -12,11 +14,17 @@ min_MATLAB(X, indices = TRUE) \item{indices}{return indices?} } \value{ +Either a list or a vector + Either a list or a vector } \description{ +Finds the minimum value for each column of a matrix, potentially returning the indices instead + Finds the minimum value for each column of a matrix, potentially returning the indices instead } \author{ +Waldir Leoncio + Waldir Leoncio } diff --git a/man/writeMixtureInfo.Rd b/man/writeMixtureInfo.Rd index e973db9..f30bde0 100644 --- a/man/writeMixtureInfo.Rd +++ b/man/writeMixtureInfo.Rd @@ -14,11 +14,7 @@ writeMixtureInfo( inputFile, partitionSummary, popnames, - fixedK, - PARTITION, - COUNTS, - SUMCOUNTS, - LOGDIFF + fixedK ) } \arguments{ @@ -41,14 +37,6 @@ writeMixtureInfo( \item{popnames}{popnames} \item{fixedK}{fixedK} - -\item{PARTITION}{PARTITION} - -\item{COUNTS}{COUNTS} - -\item{SUMCOUNTS}{SUMCOUNTS} - -\item{LOGDIFF}{LOGDIFF} } \description{ Writes information about the mixture diff --git a/tests/testthat/test-greedyMix.R b/tests/testthat/test-greedyMix.R index 9a1c05a..51cdf8b 100644 --- a/tests/testthat/test-greedyMix.R +++ b/tests/testthat/test-greedyMix.R @@ -1,11 +1,7 @@ -# library(devtools)#TEMP -library(testthat)#TEMP -# library(rBAPS)#TEMP - context("Opening files on greedyMix") greedyMix( - tietue = "data/ExamplesDataFormatting/Example baseline data in GENEPOP format for Trained clustering.txt", + tietue = "inst/ext/ExamplesDataFormatting/Example baseline data in GENEPOP format for Trained clustering.txt", format = "GenePop", savePreProcessed = FALSE ) From d3c217c316a78309e2aa5266004df29603435aa6 Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Mon, 19 Oct 2020 14:07:12 +0200 Subject: [PATCH 23/71] Translated computePopulationLogml --- R/indMix.R | 48 ------------------------------------------------ 1 file changed, 48 deletions(-) diff --git a/R/indMix.R b/R/indMix.R index 1265423..538e1c3 100644 --- a/R/indMix.R +++ b/R/indMix.R @@ -678,24 +678,6 @@ indMix <- function(c, npops, dispText) { # %------------------------------------------------------------------------------------ - -# function popLogml = computePopulationLogml(pops, adjprior, priorTerm) -# % Palauttaa length(pops)*1 taulukon, jossa on laskettu korikohtaiset -# % logml:t koreille, jotka on m��ritelty pops-muuttujalla. - -# global COUNTS; -# global SUMCOUNTS; -# x = size(COUNTS,1); -# y = size(COUNTS,2); -# z = length(pops); - -# popLogml = ... -# squeeze(sum(sum(reshape(... -# gammaln(repmat(adjprior,[1 1 length(pops)]) + COUNTS(:,:,pops)) ... -# ,[x y z]),1),2)) - sum(gammaln(1+SUMCOUNTS(pops,:)),2) - priorTerm; -# %-------------------------------------------------------------------------- - - # function [muutokset, diffInCounts] = ... # laskeMuutokset(ind, globalRows, data, adjprior, priorTerm) # % Palauttaa npops*1 taulun, jossa i:s alkio kertoo, mik� olisi @@ -1209,36 +1191,6 @@ indMix <- function(c, npops, dispText) { # %-------------------------------------------------------------------------- -# function [sumcounts, counts, logml] = ... -# initialCounts(partition, data, npops, rows, noalle, adjprior) - -# nloci=size(data,2); -# ninds = size(rows, 1); - -# koot = rows(:,1) - rows(:,2) + 1; -# maxSize = max(koot); - -# counts = zeros(max(noalle),nloci,npops); -# sumcounts = zeros(npops,nloci); -# for i=1:npops -# for j=1:nloci -# havainnotLokuksessa = find(partition==i & data(:,j)>=0); -# sumcounts(i,j) = length(havainnotLokuksessa); -# for k=1:noalle(j) -# alleleCode = k; -# N_ijk = length(find(data(havainnotLokuksessa,j)==alleleCode)); -# counts(k,j,i) = N_ijk; -# end -# end -# end - -# %initializeGammaln(ninds, maxSize, max(noalle)); - -# logml = laskeLoggis(counts, sumcounts, adjprior); - -# %-------------------------------------------------------------------------- - - # function [partitionSummary, added] = addToSummary(logml, partitionSummary, worstIndex) # % Tiedet��n, ett� annettu logml on isompi kuin huonoin arvo # % partitionSummary taulukossa. Jos partitionSummary:ss� ei viel� ole From 8abd467ca64f849ae6c19e7659ed8465ec47f782 Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Mon, 19 Oct 2020 14:08:25 +0200 Subject: [PATCH 24/71] Added computePopulationLogml.R --- R/computePopulationLogml.R | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 R/computePopulationLogml.R diff --git a/R/computePopulationLogml.R b/R/computePopulationLogml.R new file mode 100644 index 0000000..bf9782d --- /dev/null +++ b/R/computePopulationLogml.R @@ -0,0 +1,24 @@ +computePopulationLogml <- function(pops, adjprior, priorTerm) { + # Palauttaa length(pops)*1 taulukon, jossa on laskettu korikohtaiset + + x <- size(COUNTS, 1) + y <- size(COUNTS, 2) + z <- length(pops) + + popLogml <- squeeze( + sum( + sum( + reshape( + lgamma( + repmat(adjprior, c(1, 1, length(pops))) + + COUNTS[, , pops] + ), + c(x, y, z) + ), + 1 + ), + 2 + ) + ) - sum(lgamma(1 + SUMCOUNTS[pops, ]), 2) - priorTerm + return(popLogml) +} \ No newline at end of file From 457b352b9c48eb14ba1c347ec9a3337596b8255a Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Mon, 19 Oct 2020 14:14:06 +0200 Subject: [PATCH 25/71] Translated addToSummary --- R/indMix.R | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/R/indMix.R b/R/indMix.R index 538e1c3..62d488a 100644 --- a/R/indMix.R +++ b/R/indMix.R @@ -1191,26 +1191,6 @@ indMix <- function(c, npops, dispText) { # %-------------------------------------------------------------------------- -# function [partitionSummary, added] = addToSummary(logml, partitionSummary, worstIndex) -# % Tiedet��n, ett� annettu logml on isompi kuin huonoin arvo -# % partitionSummary taulukossa. Jos partitionSummary:ss� ei viel� ole -# % annettua logml arvoa, niin lis�t��n worstIndex:in kohtaan uusi logml ja -# % nykyist� partitiota vastaava nclusters:in arvo. Muutoin ei tehd� mit��n. - -# apu = find(abs(partitionSummary(:,2)-logml)<1e-5); -# if isempty(apu) -# % Nyt l�ydetty partitio ei ole viel� kirjattuna summaryyn. -# global PARTITION; -# npops = length(unique(PARTITION)); -# partitionSummary(worstIndex,1) = npops; -# partitionSummary(worstIndex,2) = logml; -# added = 1; -# else -# added = 0; -# end - -# %-------------------------------------------------------------------------- - # function inds = returnInOrder(inds, pop, globalRows, data, ... # adjprior, priorTerm) # % Palauttaa yksil�t j�rjestyksess� siten, ett� ensimm�isen� on From 95598800fe3d37081da4dc2c679c69aaeec8ce2e Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Mon, 19 Oct 2020 14:15:13 +0200 Subject: [PATCH 26/71] Added addToSummary.R --- R/addToSummary.R | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 R/addToSummary.R diff --git a/R/addToSummary.R b/R/addToSummary.R new file mode 100644 index 0000000..1c79147 --- /dev/null +++ b/R/addToSummary.R @@ -0,0 +1,18 @@ +addToSummary <- funciton(logml, partitionSummary, worstIndex) { + # Tiedet��n, ett� annettu logml on isompi kuin huonoin arvo + # partitionSummary taulukossa. Jos partitionSummary:ss� ei viel� ole + # annettua logml arvoa, niin lis�t��n worstIndex:in kohtaan uusi logml ja + # nykyist� partitiota vastaava nclusters:in arvo. Muutoin ei tehd� mit��n. + + apu <- find(abs(partitionSummary[, 2] - logml) < 1e-5) + if (isempty(apu)) { + # Nyt l�ydetty partitio ei ole viel� kirjattuna summaryyn. + npops <- length(unique(PARTITION)) + partitionSummary[worstIndex, 1] <- npops + partitionSummary[worstIndex, 2] <- logml + added <- 1 + } else { + added <- 0 + } + return(list(partitionSummary = partitionSummary, added = added)) +} \ No newline at end of file From f3812079505bf98e9f51cef3bc55b7ebcc061a2e Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Mon, 19 Oct 2020 14:23:22 +0200 Subject: [PATCH 27/71] moved code from all laskeMuutokset to same R file --- R/indMix.R | 204 ---------------------------------- R/laskeMuutokset12345.R | 236 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 236 insertions(+), 204 deletions(-) create mode 100644 R/laskeMuutokset12345.R diff --git a/R/indMix.R b/R/indMix.R index 62d488a..91fe4dd 100644 --- a/R/indMix.R +++ b/R/indMix.R @@ -678,57 +678,6 @@ indMix <- function(c, npops, dispText) { # %------------------------------------------------------------------------------------ -# function [muutokset, diffInCounts] = ... -# laskeMuutokset(ind, globalRows, data, adjprior, priorTerm) -# % Palauttaa npops*1 taulun, jossa i:s alkio kertoo, mik� olisi -# % muutos logml:ss�, mik�li yksil� ind siirret��n koriin i. -# % diffInCounts on poistettava COUNTS:in siivusta i1 ja lis�tt�v� -# % COUNTS:in siivuun i2, mik�li muutos toteutetaan. -# % -# % Lis�ys 25.9.2007: -# % Otettu k�ytt��n globaali muuttuja LOGDIFF, johon on tallennettu muutokset -# % logml:ss� siirrett�ess� yksil�it� toisiin populaatioihin. - -# global COUNTS; global SUMCOUNTS; -# global PARTITION; global POP_LOGML; -# global LOGDIFF; - -# npops = size(COUNTS,3); -# muutokset = LOGDIFF(ind,:); - -# i1 = PARTITION(ind); -# i1_logml = POP_LOGML(i1); -# muutokset(i1) = 0; - -# rows = globalRows(ind,1):globalRows(ind,2); -# diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data); -# diffInSumCounts = sum(diffInCounts); - -# COUNTS(:,:,i1) = COUNTS(:,:,i1)-diffInCounts; -# SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)-diffInSumCounts; -# new_i1_logml = computePopulationLogml(i1, adjprior, priorTerm); -# COUNTS(:,:,i1) = COUNTS(:,:,i1)+diffInCounts; -# SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)+diffInSumCounts; - -# i2 = find(muutokset==-Inf); % Etsit��n populaatiot jotka muuttuneet viime kerran j�lkeen. -# i2 = setdiff(i2,i1); -# i2_logml = POP_LOGML(i2); - -# ni2 = length(i2); - -# COUNTS(:,:,i2) = COUNTS(:,:,i2)+repmat(diffInCounts, [1 1 ni2]); -# SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)+repmat(diffInSumCounts,[ni2 1]); -# new_i2_logml = computePopulationLogml(i2, adjprior, priorTerm); -# COUNTS(:,:,i2) = COUNTS(:,:,i2)-repmat(diffInCounts, [1 1 ni2]); -# SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)-repmat(diffInSumCounts,[ni2 1]); - -# muutokset(i2) = new_i1_logml - i1_logml ... -# + new_i2_logml - i2_logml; -# LOGDIFF(ind,:) = muutokset; - - -# %---------------------------------------------------------------------- - # function diffInCounts = computeDiffInCounts(rows, max_noalle, nloci, data) # % Muodostaa max_noalle*nloci taulukon, jossa on niiden alleelien @@ -781,62 +730,6 @@ indMix <- function(c, npops, dispText) { # %-------------------------------------------------------------------------- # %-- -# %------------------------------------------------------------------------------------ - - -# function [muutokset, diffInCounts] = laskeMuutokset2( ... -# i1, globalRows, data, adjprior, priorTerm); -# % Palauttaa npops*1 taulun, jossa i:s alkio kertoo, mik� olisi -# % muutos logml:ss�, mik�li korin i1 kaikki yksil�t siirret��n -# % koriin i. - -# global COUNTS; global SUMCOUNTS; -# global PARTITION; global POP_LOGML; -# npops = size(COUNTS,3); -# muutokset = zeros(npops,1); - -# i1_logml = POP_LOGML(i1); - -# inds = find(PARTITION==i1); -# ninds = length(inds); - -# if ninds==0 -# diffInCounts = zeros(size(COUNTS,1), size(COUNTS,2)); -# return; -# end - -# rows = []; -# for i = 1:ninds -# ind = inds(i); -# lisa = globalRows(ind,1):globalRows(ind,2); -# rows = [rows; lisa']; -# %rows = [rows; globalRows{ind}']; -# end - -# diffInCounts = computeDiffInCounts(rows', size(COUNTS,1), size(COUNTS,2), data); -# diffInSumCounts = sum(diffInCounts); - -# COUNTS(:,:,i1) = COUNTS(:,:,i1)-diffInCounts; -# SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)-diffInSumCounts; -# new_i1_logml = computePopulationLogml(i1, adjprior, priorTerm); -# COUNTS(:,:,i1) = COUNTS(:,:,i1)+diffInCounts; -# SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)+diffInSumCounts; - -# i2 = [1:i1-1 , i1+1:npops]; -# i2_logml = POP_LOGML(i2); - -# COUNTS(:,:,i2) = COUNTS(:,:,i2)+repmat(diffInCounts, [1 1 npops-1]); -# SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)+repmat(diffInSumCounts,[npops-1 1]); -# new_i2_logml = computePopulationLogml(i2, adjprior, priorTerm); -# COUNTS(:,:,i2) = COUNTS(:,:,i2)-repmat(diffInCounts, [1 1 npops-1]); -# SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)-repmat(diffInSumCounts,[npops-1 1]); - -# muutokset(i2) = new_i1_logml - i1_logml ... -# + new_i2_logml - i2_logml; - - -# %--------------------------------------------------------------------------------- - # function updateGlobalVariables2( ... # i1, i2, diffInCounts, adjprior, priorTerm); @@ -864,103 +757,6 @@ indMix <- function(c, npops, dispText) { # inx = [find(PARTITION==i1); find(PARTITION==i2)]; # LOGDIFF(inx,:) = -Inf; - -# %-------------------------------------------------------------------------- -# %---- - -# function muutokset = laskeMuutokset3(T2, inds2, globalRows, ... -# data, adjprior, priorTerm, i1) -# % Palauttaa length(unique(T2))*npops taulun, jossa (i,j):s alkio -# % kertoo, mik� olisi muutos logml:ss�, jos populaation i1 osapopulaatio -# % inds2(find(T2==i)) siirret��n koriin j. - -# global COUNTS; global SUMCOUNTS; -# global PARTITION; global POP_LOGML; -# npops = size(COUNTS,3); -# npops2 = length(unique(T2)); -# muutokset = zeros(npops2, npops); - -# i1_logml = POP_LOGML(i1); -# for pop2 = 1:npops2 -# inds = inds2(find(T2==pop2)); -# ninds = length(inds); -# if ninds>0 -# rows = []; -# for i = 1:ninds -# ind = inds(i); -# lisa = globalRows(ind,1):globalRows(ind,2); -# rows = [rows; lisa']; -# %rows = [rows; globalRows{ind}']; -# end -# diffInCounts = computeDiffInCounts(rows', size(COUNTS,1), size(COUNTS,2), data); -# diffInSumCounts = sum(diffInCounts); - -# COUNTS(:,:,i1) = COUNTS(:,:,i1)-diffInCounts; -# SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)-diffInSumCounts; -# new_i1_logml = computePopulationLogml(i1, adjprior, priorTerm); -# COUNTS(:,:,i1) = COUNTS(:,:,i1)+diffInCounts; -# SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)+diffInSumCounts; - -# i2 = [1:i1-1 , i1+1:npops]; -# i2_logml = POP_LOGML(i2)'; - -# COUNTS(:,:,i2) = COUNTS(:,:,i2)+repmat(diffInCounts, [1 1 npops-1]); -# SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)+repmat(diffInSumCounts,[npops-1 1]); -# new_i2_logml = computePopulationLogml(i2, adjprior, priorTerm)'; -# COUNTS(:,:,i2) = COUNTS(:,:,i2)-repmat(diffInCounts, [1 1 npops-1]); -# SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)-repmat(diffInSumCounts,[npops-1 1]); - -# muutokset(pop2,i2) = new_i1_logml - i1_logml ... -# + new_i2_logml - i2_logml; -# end -# end - -# %------------------------------------------------------------------------------------ - -# function muutokset = laskeMuutokset5(inds, globalRows, data, adjprior, ... -# priorTerm, i1, i2) - -# % Palauttaa length(inds)*1 taulun, jossa i:s alkio kertoo, mik� olisi -# % muutos logml:ss�, mik�li yksil� i vaihtaisi koria i1:n ja i2:n v�lill�. - -# global COUNTS; global SUMCOUNTS; -# global PARTITION; global POP_LOGML; - -# ninds = length(inds); -# muutokset = zeros(ninds,1); - -# i1_logml = POP_LOGML(i1); -# i2_logml = POP_LOGML(i2); - -# for i = 1:ninds -# ind = inds(i); -# if PARTITION(ind)==i1 -# pop1 = i1; %mist� -# pop2 = i2; %mihin -# else -# pop1 = i2; -# pop2 = i1; -# end -# rows = globalRows(ind,1):globalRows(ind,2); -# diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data); -# diffInSumCounts = sum(diffInCounts); - -# COUNTS(:,:,pop1) = COUNTS(:,:,pop1)-diffInCounts; -# SUMCOUNTS(pop1,:) = SUMCOUNTS(pop1,:)-diffInSumCounts; -# COUNTS(:,:,pop2) = COUNTS(:,:,pop2)+diffInCounts; -# SUMCOUNTS(pop2,:) = SUMCOUNTS(pop2,:)+diffInSumCounts; - -# new_logmls = computePopulationLogml([i1 i2], adjprior, priorTerm); -# muutokset(i) = sum(new_logmls); - -# COUNTS(:,:,pop1) = COUNTS(:,:,pop1)+diffInCounts; -# SUMCOUNTS(pop1,:) = SUMCOUNTS(pop1,:)+diffInSumCounts; -# COUNTS(:,:,pop2) = COUNTS(:,:,pop2)-diffInCounts; -# SUMCOUNTS(pop2,:) = SUMCOUNTS(pop2,:)-diffInSumCounts; -# end - -# muutokset = muutokset - i1_logml - i2_logml; - # %------------------------------------------------------------------------------------ diff --git a/R/laskeMuutokset12345.R b/R/laskeMuutokset12345.R new file mode 100644 index 0000000..13c9196 --- /dev/null +++ b/R/laskeMuutokset12345.R @@ -0,0 +1,236 @@ +#' @title Calculate changes? +#' @description Palauttaa npops*npops taulun, jonka alkio (i,j) kertoo, mik?on +#' muutos logml:ss? mikäli populaatiosta i siirretään osuuden verran +#' todennäköisyysmassaa populaatioon j. Mikäli populaatiossa i ei ole mitään +#' siirrettävää, on vastaavassa kohdassa rivi nollia. +#' @param osuus Percentages? +#' @param omaFreqs own Freqs? +#' @param osuusTaulu Percentage table? +#' @param logml log maximum likelihood +#' @param COUNTS COUNTS +#' @export +laskeMuutokset4 <- function (osuus, osuusTaulu, omaFreqs, logml, + COUNTS = matrix(0)) { + npops <- ifelse(is.na(dim(COUNTS)[3]), 1, dim(COUNTS)[3]) + notEmpty <- which(osuusTaulu > 0.005) + muutokset <- zeros(npops) + empties <- !notEmpty + + for (i1 in notEmpty) { + osuusTaulu[i1] <- osuusTaulu[i1] - osuus + for (i2 in c(colon(1, i1 - 1), colon(i1 + 1, npops))) { + osuusTaulu[i2] <- osuusTaulu[i2] + osuus + loggis <- computeIndLogml(omaFreqs, osuusTaulu) + + # Work around Matlab OOB bug + if (i1 > nrow(muutokset)) { + muutokset <- rbind(muutokset, muutokset * 0) + } + if (i2 > ncol(muutokset)) { + muutokset <- cbind(muutokset, muutokset * 0) + } + + muutokset[i1, i2] <- loggis - logml + osuusTaulu[i2] <- osuusTaulu[i2] - osuus + } + osuusTaulu[i1] <- osuusTaulu[i1] + osuus + } + return (muutokset) +} + + +laskeMuutokset <- function(ind, globalRows, data, adjprior, priorTerm) { + # % Palauttaa npops*1 taulun, jossa i:s alkio kertoo, mik� olisi + # % muutos logml:ss�, mik�li yksil� ind siirret��n koriin i. + # % diffInCounts on poistettava COUNTS:in siivusta i1 ja lis�tt�v� + # % COUNTS:in siivuun i2, mik�li muutos toteutetaan. + # % + # % Lis�ys 25.9.2007: + # % Otettu k�ytt��n globaali muuttuja LOGDIFF, johon on tallennettu muutokset + # % logml:ss� siirrett�ess� yksil�it� toisiin populaatioihin. + + # global COUNTS; global SUMCOUNTS; + # global PARTITION; global POP_LOGML; + # global LOGDIFF; + + # npops = size(COUNTS,3); + # muutokset = LOGDIFF(ind,:); + + # i1 = PARTITION(ind); + # i1_logml = POP_LOGML(i1); + # muutokset(i1) = 0; + + # rows = globalRows(ind,1):globalRows(ind,2); + # diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data); + # diffInSumCounts = sum(diffInCounts); + + # COUNTS(:,:,i1) = COUNTS(:,:,i1)-diffInCounts; + # SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)-diffInSumCounts; + # new_i1_logml = computePopulationLogml(i1, adjprior, priorTerm); + # COUNTS(:,:,i1) = COUNTS(:,:,i1)+diffInCounts; + # SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)+diffInSumCounts; + + # i2 = find(muutokset==-Inf); % Etsit��n populaatiot jotka muuttuneet viime kerran j�lkeen. + # i2 = setdiff(i2,i1); + # i2_logml = POP_LOGML(i2); + + # ni2 = length(i2); + + # COUNTS(:,:,i2) = COUNTS(:,:,i2)+repmat(diffInCounts, [1 1 ni2]); + # SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)+repmat(diffInSumCounts,[ni2 1]); + # new_i2_logml = computePopulationLogml(i2, adjprior, priorTerm); + # COUNTS(:,:,i2) = COUNTS(:,:,i2)-repmat(diffInCounts, [1 1 ni2]); + # SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)-repmat(diffInSumCounts,[ni2 1]); + + # muutokset(i2) = new_i1_logml - i1_logml ... + # + new_i2_logml - i2_logml; + # LOGDIFF(ind,:) = muutokset; + return(list(muutokset = muutokset, diffInCounts = diffInCounts)) +} + +# %------------------------------------------------------------------------------------ + + +laskeMuutokset2 <- function(i1, globalRows, data, adjprior, priorTerm) { + # % Palauttaa npops*1 taulun, jossa i:s alkio kertoo, mik� olisi + # % muutos logml:ss�, mik�li korin i1 kaikki yksil�t siirret��n + # % koriin i. + + # global COUNTS; global SUMCOUNTS; + # global PARTITION; global POP_LOGML; + # npops = size(COUNTS,3); + # muutokset = zeros(npops,1); + + # i1_logml = POP_LOGML(i1); + + # inds = find(PARTITION==i1); + # ninds = length(inds); + + # if ninds==0 + # diffInCounts = zeros(size(COUNTS,1), size(COUNTS,2)); + # return; + # end + + # rows = []; + # for i = 1:ninds + # ind = inds(i); + # lisa = globalRows(ind,1):globalRows(ind,2); + # rows = [rows; lisa']; + # %rows = [rows; globalRows{ind}']; + # end + + # diffInCounts = computeDiffInCounts(rows', size(COUNTS,1), size(COUNTS,2), data); + # diffInSumCounts = sum(diffInCounts); + + # COUNTS(:,:,i1) = COUNTS(:,:,i1)-diffInCounts; + # SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)-diffInSumCounts; + # new_i1_logml = computePopulationLogml(i1, adjprior, priorTerm); + # COUNTS(:,:,i1) = COUNTS(:,:,i1)+diffInCounts; + # SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)+diffInSumCounts; + + # i2 = [1:i1-1 , i1+1:npops]; + # i2_logml = POP_LOGML(i2); + + # COUNTS(:,:,i2) = COUNTS(:,:,i2)+repmat(diffInCounts, [1 1 npops-1]); + # SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)+repmat(diffInSumCounts,[npops-1 1]); + # new_i2_logml = computePopulationLogml(i2, adjprior, priorTerm); + # COUNTS(:,:,i2) = COUNTS(:,:,i2)-repmat(diffInCounts, [1 1 npops-1]); + # SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)-repmat(diffInSumCounts,[npops-1 1]); + + # muutokset(i2) = new_i1_logml - i1_logml ... + # + new_i2_logml - i2_logml; + return(list(muutokset = muutokset, diffInCounts = diffInCounts)) +} + + +laskeMuutokset3 <- function(T2, inds2, globalRows, data, adjprior, priorTerm, i1) { + # % Palauttaa length(unique(T2))*npops taulun, jossa (i,j):s alkio + # % kertoo, mik� olisi muutos logml:ss�, jos populaation i1 osapopulaatio + # % inds2(find(T2==i)) siirret��n koriin j. + + # global COUNTS; global SUMCOUNTS; + # global PARTITION; global POP_LOGML; + # npops = size(COUNTS,3); + # npops2 = length(unique(T2)); + # muutokset = zeros(npops2, npops); + + # i1_logml = POP_LOGML(i1); + # for pop2 = 1:npops2 + # inds = inds2(find(T2==pop2)); + # ninds = length(inds); + # if ninds>0 + # rows = []; + # for i = 1:ninds + # ind = inds(i); + # lisa = globalRows(ind,1):globalRows(ind,2); + # rows = [rows; lisa']; + # %rows = [rows; globalRows{ind}']; + # end + # diffInCounts = computeDiffInCounts(rows', size(COUNTS,1), size(COUNTS,2), data); + # diffInSumCounts = sum(diffInCounts); + + # COUNTS(:,:,i1) = COUNTS(:,:,i1)-diffInCounts; + # SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)-diffInSumCounts; + # new_i1_logml = computePopulationLogml(i1, adjprior, priorTerm); + # COUNTS(:,:,i1) = COUNTS(:,:,i1)+diffInCounts; + # SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)+diffInSumCounts; + + # i2 = [1:i1-1 , i1+1:npops]; + # i2_logml = POP_LOGML(i2)'; + + # COUNTS(:,:,i2) = COUNTS(:,:,i2)+repmat(diffInCounts, [1 1 npops-1]); + # SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)+repmat(diffInSumCounts,[npops-1 1]); + # new_i2_logml = computePopulationLogml(i2, adjprior, priorTerm)'; + # COUNTS(:,:,i2) = COUNTS(:,:,i2)-repmat(diffInCounts, [1 1 npops-1]); + # SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)-repmat(diffInSumCounts,[npops-1 1]); + + # muutokset(pop2,i2) = new_i1_logml - i1_logml ... + # + new_i2_logml - i2_logml; + # end + # end + return(muutokset) +} + +laskeMuutokset5 <- function(inds, globalRows, data, adjprior, priorTerm, i1, i2) { + # % Palauttaa length(inds)*1 taulun, jossa i:s alkio kertoo, mik� olisi + # % muutos logml:ss�, mik�li yksil� i vaihtaisi koria i1:n ja i2:n v�lill�. + + # global COUNTS; global SUMCOUNTS; + # global PARTITION; global POP_LOGML; + + # ninds = length(inds); + # muutokset = zeros(ninds,1); + + # i1_logml = POP_LOGML(i1); + # i2_logml = POP_LOGML(i2); + + # for i = 1:ninds + # ind = inds(i); + # if PARTITION(ind)==i1 + # pop1 = i1; %mist� + # pop2 = i2; %mihin + # else + # pop1 = i2; + # pop2 = i1; + # end + # rows = globalRows(ind,1):globalRows(ind,2); + # diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data); + # diffInSumCounts = sum(diffInCounts); + + # COUNTS(:,:,pop1) = COUNTS(:,:,pop1)-diffInCounts; + # SUMCOUNTS(pop1,:) = SUMCOUNTS(pop1,:)-diffInSumCounts; + # COUNTS(:,:,pop2) = COUNTS(:,:,pop2)+diffInCounts; + # SUMCOUNTS(pop2,:) = SUMCOUNTS(pop2,:)+diffInSumCounts; + + # new_logmls = computePopulationLogml([i1 i2], adjprior, priorTerm); + # muutokset(i) = sum(new_logmls); + + # COUNTS(:,:,pop1) = COUNTS(:,:,pop1)+diffInCounts; + # SUMCOUNTS(pop1,:) = SUMCOUNTS(pop1,:)+diffInSumCounts; + # COUNTS(:,:,pop2) = COUNTS(:,:,pop2)-diffInCounts; + # SUMCOUNTS(pop2,:) = SUMCOUNTS(pop2,:)-diffInSumCounts; + # end + + # muutokset = muutokset - i1_logml - i2_logml; + return(muutokset) +} \ No newline at end of file From f28bf5d7632ce9fa714673b17bfee37be56eb071 Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Mon, 19 Oct 2020 14:36:21 +0200 Subject: [PATCH 28/71] Translated laskeMuutokset1 --- R/laskeMuutokset12345.R | 71 ++++++++++++++++++++--------------------- R/laskeMuutokset4.R | 39 ---------------------- 2 files changed, 34 insertions(+), 76 deletions(-) delete mode 100644 R/laskeMuutokset4.R diff --git a/R/laskeMuutokset12345.R b/R/laskeMuutokset12345.R index 13c9196..410d6d0 100644 --- a/R/laskeMuutokset12345.R +++ b/R/laskeMuutokset12345.R @@ -40,51 +40,48 @@ laskeMuutokset4 <- function (osuus, osuusTaulu, omaFreqs, logml, laskeMuutokset <- function(ind, globalRows, data, adjprior, priorTerm) { - # % Palauttaa npops*1 taulun, jossa i:s alkio kertoo, mik� olisi - # % muutos logml:ss�, mik�li yksil� ind siirret��n koriin i. - # % diffInCounts on poistettava COUNTS:in siivusta i1 ja lis�tt�v� - # % COUNTS:in siivuun i2, mik�li muutos toteutetaan. - # % - # % Lis�ys 25.9.2007: - # % Otettu k�ytt��n globaali muuttuja LOGDIFF, johon on tallennettu muutokset - # % logml:ss� siirrett�ess� yksil�it� toisiin populaatioihin. + # Palauttaa npops*1 taulun, jossa i:s alkio kertoo, mik� olisi + # muutos logml:ss�, mik�li yksil� ind siirret��n koriin i. + # diffInCounts on poistettava COUNTS:in siivusta i1 ja lis�tt�v� + # COUNTS:in siivuun i2, mik�li muutos toteutetaan. + # + # Lis�ys 25.9.2007: + # Otettu k�ytt��n globaali muuttuja LOGDIFF, johon on tallennettu muutokset + # logml:ss� siirrett�ess� yksil�it� toisiin populaatioihin. - # global COUNTS; global SUMCOUNTS; - # global PARTITION; global POP_LOGML; - # global LOGDIFF; + npops <- size(COUNTS, 3) + muutokset <- LOGDIFF[ind, ] - # npops = size(COUNTS,3); - # muutokset = LOGDIFF(ind,:); + i1 <- PARTITION[ind] + i1_logml <- POP_LOGML[i1] + muutokset[i1] <- 0 - # i1 = PARTITION(ind); - # i1_logml = POP_LOGML(i1); - # muutokset(i1) = 0; + rows <- globalRows[ind, 1]:globalRows[ind, 2] + diffInCounts <- computeDiffInCounts( + rows, size(COUNTS, 1), size(COUNTS, 2), data + ) + diffInSumCounts <- sum(diffInCounts) - # rows = globalRows(ind,1):globalRows(ind,2); - # diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data); - # diffInSumCounts = sum(diffInCounts); + COUNTS[, , i1] <- COUNTS[, , i1] - diffInCounts + SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] - diffInSumCounts + new_i1_logml <- computePopulationLogml(i1, adjprior, priorTerm) + COUNTS[, , i1] <- COUNTS[, , i1] + diffInCounts + SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] + diffInSumCounts - # COUNTS(:,:,i1) = COUNTS(:,:,i1)-diffInCounts; - # SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)-diffInSumCounts; - # new_i1_logml = computePopulationLogml(i1, adjprior, priorTerm); - # COUNTS(:,:,i1) = COUNTS(:,:,i1)+diffInCounts; - # SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)+diffInSumCounts; + i2 <- find(muutokset == -Inf) # Etsit��n populaatiot jotka muuttuneet viime kerran j�lkeen. + i2 <- setdiff(i2, i1) + i2_logml <- POP_LOGML(i2) - # i2 = find(muutokset==-Inf); % Etsit��n populaatiot jotka muuttuneet viime kerran j�lkeen. - # i2 = setdiff(i2,i1); - # i2_logml = POP_LOGML(i2); + ni2 <- length(i2) - # ni2 = length(i2); + COUNTS[, , i2] <- COUNTS[, , i2] + repmat(diffInCounts, c(1, 1, ni2)) + SUMCOUNTS[i2, ] <- SUMCOUNTS[i2, ] + repmat(diffInSumCounts, c(ni2, 1)) + new_i2_logml <- computePopulationLogml(i2, adjprior, priorTerm) + COUNTS[, , i2] <- COUNTS[, , i2] - repmat(diffInCounts, c(1, 1, ni2)) + SUMCOUNTS[i2, ] <- SUMCOUNTS[i2, ] - repmat(diffInSumCounts, c(ni2, 1)) - # COUNTS(:,:,i2) = COUNTS(:,:,i2)+repmat(diffInCounts, [1 1 ni2]); - # SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)+repmat(diffInSumCounts,[ni2 1]); - # new_i2_logml = computePopulationLogml(i2, adjprior, priorTerm); - # COUNTS(:,:,i2) = COUNTS(:,:,i2)-repmat(diffInCounts, [1 1 ni2]); - # SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)-repmat(diffInSumCounts,[ni2 1]); - - # muutokset(i2) = new_i1_logml - i1_logml ... - # + new_i2_logml - i2_logml; - # LOGDIFF(ind,:) = muutokset; + muutokset[i2] <- new_i1_logml - i1_logml + new_i2_logml - i2_logml + LOGDIFF[ind, ] = muutokset return(list(muutokset = muutokset, diffInCounts = diffInCounts)) } diff --git a/R/laskeMuutokset4.R b/R/laskeMuutokset4.R deleted file mode 100644 index 56bcef3..0000000 --- a/R/laskeMuutokset4.R +++ /dev/null @@ -1,39 +0,0 @@ -#' @title Calculate changes? -#' @description Palauttaa npops*npops taulun, jonka alkio (i,j) kertoo, mik?on -#' muutos logml:ss? mikäli populaatiosta i siirretään osuuden verran -#' todennäköisyysmassaa populaatioon j. Mikäli populaatiossa i ei ole mitään -#' siirrettävää, on vastaavassa kohdassa rivi nollia. -#' @param osuus Percentages? -#' @param omaFreqs own Freqs? -#' @param osuusTaulu Percentage table? -#' @param logml log maximum likelihood -#' @param COUNTS COUNTS -#' @export -laskeMuutokset4 <- function (osuus, osuusTaulu, omaFreqs, logml, - COUNTS = matrix(0)) { - npops <- ifelse(is.na(dim(COUNTS)[3]), 1, dim(COUNTS)[3]) - notEmpty <- which(osuusTaulu > 0.005) - muutokset <- zeros(npops) - empties <- !notEmpty - - for (i1 in notEmpty) { - osuusTaulu[i1] <- osuusTaulu[i1] - osuus - for (i2 in c(colon(1, i1 - 1), colon(i1 + 1, npops))) { - osuusTaulu[i2] <- osuusTaulu[i2] + osuus - loggis <- computeIndLogml(omaFreqs, osuusTaulu) - - # Work around Matlab OOB bug - if (i1 > nrow(muutokset)) { - muutokset <- rbind(muutokset, muutokset * 0) - } - if (i2 > ncol(muutokset)) { - muutokset <- cbind(muutokset, muutokset * 0) - } - - muutokset[i1, i2] <- loggis - logml - osuusTaulu[i2] <- osuusTaulu[i2] - osuus - } - osuusTaulu[i1] <- osuusTaulu[i1] + osuus - } - return (muutokset) -} \ No newline at end of file From eba8705d9edd303d8b9f8707440f4f2ecd1390e5 Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Mon, 19 Oct 2020 15:32:34 +0200 Subject: [PATCH 29/71] Implemented basic functionality of setdiff --- R/setdiff.R | 13 +++++++++++++ tests/testthat/test-convertedBaseFunctions.R | 15 +++++++++++---- 2 files changed, 24 insertions(+), 4 deletions(-) create mode 100644 R/setdiff.R diff --git a/R/setdiff.R b/R/setdiff.R new file mode 100644 index 0000000..49e5633 --- /dev/null +++ b/R/setdiff.R @@ -0,0 +1,13 @@ +#' @title Set differences of two arrays +#' @description Loosely replicates the behavior of the homonym Matlab function +#' @param A first array +#' @param B second awway +#' @param legacy if `TRUE`, preserves the behavior of +#' @return +#' @author Waldir Leoncio +#' @export +setdiff <- function(A, B, legacy = FALSE) { + values <- sort(unique(A[is.na(match(A, B))])) + # browser() # TEMP + return(values) +} \ No newline at end of file diff --git a/tests/testthat/test-convertedBaseFunctions.R b/tests/testthat/test-convertedBaseFunctions.R index 11988f8..08e36cc 100644 --- a/tests/testthat/test-convertedBaseFunctions.R +++ b/tests/testthat/test-convertedBaseFunctions.R @@ -158,10 +158,10 @@ test_that("find works as expected", { }) test_that("sortrows works as expected", { - mx <- matrix(c(3, 2, 2, 1, 1, 10, 0, pi), 4) - expect_equal(sortrows(mx), matrix(c(1, 2, 2, 3, pi, 10, 0, 1), 4)) - expect_equal(sortrows(mx, 2), matrix(c(2, 3, 1, 2, 0, 1, pi, 10), 4)) - expect_equal(sortrows(mx, 1:2), mx[order(mx[, 1], mx[, 2]), ]) + mx <- matrix(c(3, 2, 2, 1, 1, 10, 0, pi), 4) + expect_equal(sortrows(mx), matrix(c(1, 2, 2, 3, pi, 10, 0, 1), 4)) + expect_equal(sortrows(mx, 2), matrix(c(2, 3, 1, 2, 0, 1, pi, 10), 4)) + expect_equal(sortrows(mx, 1:2), mx[order(mx[, 1], mx[, 2]), ]) }) test_that("cell works as expected", { @@ -217,4 +217,11 @@ test_that("nargin works correctly", { expect_equal(addme(13, 42), 55) expect_equal(addme(13), 26) expect_equal(addme(), 0) +}) + +test_that("setdiff works as expected", { + A <- c(3, 6, 2, 1, 5, 1, 1) + B <- c(2, 4, 6) + expect_equal(setdiff(A, B), list(c(1, 3, 5), 4, 1, 5)) + # TODO: add more examples from https://se.mathworks.com/help/matlab/ref/double.setdiff.html;jsessionid=0d8d42582d4d299b8224403899f1 }) \ No newline at end of file From 6b1a1910e3194bfc0919b815e9f4b95e40dac78e Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Mon, 19 Oct 2020 15:57:15 +0200 Subject: [PATCH 30/71] Improvements to setdiff --- NAMESPACE | 1 + R/setdiff.R | 10 +++++++--- tests/testthat/test-convertedBaseFunctions.R | 19 ++++++++++++++++++- 3 files changed, 26 insertions(+), 4 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 2dd1c5e..51d5cf2 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -34,6 +34,7 @@ export(randdir) export(repmat) export(rivinSisaltamienMjonojenLkm) export(selvitaDigitFormat) +export(setdiff_MATLAB) export(simulateAllFreqs) export(simulateIndividuals) export(simuloiAlleeli) diff --git a/R/setdiff.R b/R/setdiff.R index 49e5633..af199a9 100644 --- a/R/setdiff.R +++ b/R/setdiff.R @@ -6,8 +6,12 @@ #' @return #' @author Waldir Leoncio #' @export -setdiff <- function(A, B, legacy = FALSE) { - values <- sort(unique(A[is.na(match(A, B))])) - # browser() # TEMP +setdiff_MATLAB <- function(A, B, legacy = FALSE) { + if (is(A, "numeric") & is(B, "numeric")) { + values <- sort(unique(A[is.na(match(A, B))])) + } else if (is(A, "data.frame") & is(B, "data.frame")) { + stop("Not implemented for data frames") + } + # TODO: add support for indices (if necessary) return(values) } \ No newline at end of file diff --git a/tests/testthat/test-convertedBaseFunctions.R b/tests/testthat/test-convertedBaseFunctions.R index 08e36cc..01f6e2e 100644 --- a/tests/testthat/test-convertedBaseFunctions.R +++ b/tests/testthat/test-convertedBaseFunctions.R @@ -222,6 +222,23 @@ test_that("nargin works correctly", { test_that("setdiff works as expected", { A <- c(3, 6, 2, 1, 5, 1, 1) B <- c(2, 4, 6) - expect_equal(setdiff(A, B), list(c(1, 3, 5), 4, 1, 5)) + C <- c(1, 3, 5) + expect_equal(setdiff(A, B), C) + A <- data.frame( + Var1 = 1:5, + Var2 = LETTERS[1:5], + Var3 = c(FALSE, TRUE, FALSE, TRUE, FALSE) + ) + B <- data.frame( + Var1 = seq(1, 9, by = 2), + Var2 = LETTERS[seq(1, 9, by = 2)], + Var3 = rep(FALSE, 5) + ) + C <- data.frame( + Var1 = c(2, 4), + Var2 = c('B', 'D'), + Var3 = c(TRUE, TRUE) + ) + expect_equal(setdiff(A, B), C) # TODO: add more examples from https://se.mathworks.com/help/matlab/ref/double.setdiff.html;jsessionid=0d8d42582d4d299b8224403899f1 }) \ No newline at end of file From 637df694bb2559e1346160a623b07f20412475c1 Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Mon, 19 Oct 2020 15:57:30 +0200 Subject: [PATCH 31/71] Updated docs --- NAMESPACE | 1 + man/admixture_initialization.Rd | 11 +++++++++++ man/laskeMuutokset4.Rd | 4 ++-- man/max_MATLAB.Rd | 22 ++++++++++++++++++++++ man/min_MATLAB.Rd | 18 +++++++++++++++++- man/setdiff_MATLAB.Rd | 24 ++++++++++++++++++++++++ 6 files changed, 77 insertions(+), 3 deletions(-) create mode 100644 man/admixture_initialization.Rd create mode 100644 man/max_MATLAB.Rd create mode 100644 man/setdiff_MATLAB.Rd diff --git a/NAMESPACE b/NAMESPACE index 51d5cf2..94690c0 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -23,6 +23,7 @@ export(linkage) export(logml2String) export(lueGenePopData) export(lueNimi) +export(max_MATLAB) export(min_MATLAB) export(noIndex) export(ownNum2Str) diff --git a/man/admixture_initialization.Rd b/man/admixture_initialization.Rd new file mode 100644 index 0000000..ae2c9bc --- /dev/null +++ b/man/admixture_initialization.Rd @@ -0,0 +1,11 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/admixture_initialization.R +\name{admixture_initialization} +\alias{admixture_initialization} +\title{Seuraavat kolme funktiota liittyvat alkupartition muodostamiseen.} +\usage{ +admixture_initialization(data_matrix, nclusters, Z) +} +\description{ +Seuraavat kolme funktiota liittyvat alkupartition muodostamiseen. +} diff --git a/man/laskeMuutokset4.Rd b/man/laskeMuutokset4.Rd index 7c16a19..5c41852 100644 --- a/man/laskeMuutokset4.Rd +++ b/man/laskeMuutokset4.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/laskeMuutokset4.R +% Please edit documentation in R/laskeMuutokset12345.R \name{laskeMuutokset4} \alias{laskeMuutokset4} \title{Calculate changes?} @@ -20,6 +20,6 @@ laskeMuutokset4(osuus, osuusTaulu, omaFreqs, logml, COUNTS = matrix(0)) \description{ Palauttaa npops*npops taulun, jonka alkio (i,j) kertoo, mik?on muutos logml:ss? mikäli populaatiosta i siirretään osuuden verran -todennäköisyysmassaa populaatioon j. Mikäli populaatiossa i ei ole mitään +todennäköisyysmassaa populaatioon j. Mikäli populaatiossa i ei ole mitään siirrettävää, on vastaavassa kohdassa rivi nollia. } diff --git a/man/max_MATLAB.Rd b/man/max_MATLAB.Rd new file mode 100644 index 0000000..d7fbdf7 --- /dev/null +++ b/man/max_MATLAB.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/min_max_MATLAB.R +\name{max_MATLAB} +\alias{max_MATLAB} +\title{Maximum (MATLAB version)} +\usage{ +max_MATLAB(X, indices = TRUE) +} +\arguments{ +\item{X}{matrix} + +\item{indices}{return indices?} +} +\value{ +Either a list or a vector +} +\description{ +Finds the minimum value for each column of a matrix, potentially returning the indices instead +} +\author{ +Waldir Leoncio +} diff --git a/man/min_MATLAB.Rd b/man/min_MATLAB.Rd index bd1113e..9bb1166 100644 --- a/man/min_MATLAB.Rd +++ b/man/min_MATLAB.Rd @@ -1,11 +1,15 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/min.R, R/min_MATLAB.R +% Please edit documentation in R/min.R, R/min_MATLAB.R, R/min_max_MATLAB.R \name{min_MATLAB} \alias{min_MATLAB} \title{Minimum (MATLAB version)} \usage{ min_MATLAB(X, indices = TRUE) +min_MATLAB(X, indices = TRUE) + +min_MATLAB(X, indices = TRUE) + min_MATLAB(X, indices = TRUE) } \arguments{ @@ -16,15 +20,27 @@ min_MATLAB(X, indices = TRUE) \value{ Either a list or a vector +Either a list or a vector + +Either a list or a vector + Either a list or a vector } \description{ Finds the minimum value for each column of a matrix, potentially returning the indices instead +Finds the minimum value for each column of a matrix, potentially returning the indices instead + +Finds the minimum value for each column of a matrix, potentially returning the indices instead + Finds the minimum value for each column of a matrix, potentially returning the indices instead } \author{ Waldir Leoncio +Waldir Leoncio + +Waldir Leoncio + Waldir Leoncio } diff --git a/man/setdiff_MATLAB.Rd b/man/setdiff_MATLAB.Rd new file mode 100644 index 0000000..681e9f9 --- /dev/null +++ b/man/setdiff_MATLAB.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/setdiff.R +\name{setdiff_MATLAB} +\alias{setdiff_MATLAB} +\title{Set differences of two arrays} +\usage{ +setdiff_MATLAB(A, B, legacy = FALSE) +} +\arguments{ +\item{A}{first array} + +\item{B}{second awway} + +\item{legacy}{if `TRUE`, preserves the behavior of} +} +\value{ + +} +\description{ +Loosely replicates the behavior of the homonym Matlab function +} +\author{ +Waldir Leoncio +} From 74e11205b72092508a13a18125f1bdf5a9704fcd Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Mon, 19 Oct 2020 15:58:08 +0200 Subject: [PATCH 32/71] Fixed syntax --- R/addToSummary.R | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/R/addToSummary.R b/R/addToSummary.R index 1c79147..4affbe9 100644 --- a/R/addToSummary.R +++ b/R/addToSummary.R @@ -1,4 +1,4 @@ -addToSummary <- funciton(logml, partitionSummary, worstIndex) { +addToSummary <- function(logml, partitionSummary, worstIndex) { # Tiedet��n, ett� annettu logml on isompi kuin huonoin arvo # partitionSummary taulukossa. Jos partitionSummary:ss� ei viel� ole # annettua logml arvoa, niin lis�t��n worstIndex:in kohtaan uusi logml ja @@ -6,13 +6,13 @@ addToSummary <- funciton(logml, partitionSummary, worstIndex) { apu <- find(abs(partitionSummary[, 2] - logml) < 1e-5) if (isempty(apu)) { - # Nyt l�ydetty partitio ei ole viel� kirjattuna summaryyn. - npops <- length(unique(PARTITION)) - partitionSummary[worstIndex, 1] <- npops - partitionSummary[worstIndex, 2] <- logml - added <- 1 + # Nyt l�ydetty partitio ei ole viel� kirjattuna summaryyn. + npops <- length(unique(PARTITION)) + partitionSummary[worstIndex, 1] <- npops + partitionSummary[worstIndex, 2] <- logml + added <- 1 } else { - added <- 0 + added <- 0 } return(list(partitionSummary = partitionSummary, added = added)) } \ No newline at end of file From 448373796cbf80d8498a2314e6f42552fa4da001 Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Mon, 19 Oct 2020 15:58:17 +0200 Subject: [PATCH 33/71] Added FIXME --- tests/testthat/test-convertedBaseFunctions.R | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/testthat/test-convertedBaseFunctions.R b/tests/testthat/test-convertedBaseFunctions.R index 01f6e2e..5623e38 100644 --- a/tests/testthat/test-convertedBaseFunctions.R +++ b/tests/testthat/test-convertedBaseFunctions.R @@ -164,6 +164,7 @@ test_that("sortrows works as expected", { expect_equal(sortrows(mx, 1:2), mx[order(mx[, 1], mx[, 2]), ]) }) +# FIXME: failing tests test_that("cell works as expected", { expect_equal(cell(0), array(dim = c(0, 0))) expect_equal(cell(1), array(dim = c(1, 1))) From 9a38c59ab98c0224aebd573e0413d4456441857b Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Mon, 19 Oct 2020 16:12:22 +0200 Subject: [PATCH 34/71] Translated laskeMuutokset2 and 3 --- R/laskeMuutokset12345.R | 149 +++++++++++++++++++--------------------- 1 file changed, 71 insertions(+), 78 deletions(-) diff --git a/R/laskeMuutokset12345.R b/R/laskeMuutokset12345.R index 410d6d0..3a4f3ae 100644 --- a/R/laskeMuutokset12345.R +++ b/R/laskeMuutokset12345.R @@ -70,7 +70,7 @@ laskeMuutokset <- function(ind, globalRows, data, adjprior, priorTerm) { i2 <- find(muutokset == -Inf) # Etsit��n populaatiot jotka muuttuneet viime kerran j�lkeen. i2 <- setdiff(i2, i1) - i2_logml <- POP_LOGML(i2) + i2_logml <- POP_LOGML[i2] ni2 <- length(i2) @@ -85,107 +85,100 @@ laskeMuutokset <- function(ind, globalRows, data, adjprior, priorTerm) { return(list(muutokset = muutokset, diffInCounts = diffInCounts)) } -# %------------------------------------------------------------------------------------ - - laskeMuutokset2 <- function(i1, globalRows, data, adjprior, priorTerm) { # % Palauttaa npops*1 taulun, jossa i:s alkio kertoo, mik� olisi # % muutos logml:ss�, mik�li korin i1 kaikki yksil�t siirret��n # % koriin i. - # global COUNTS; global SUMCOUNTS; - # global PARTITION; global POP_LOGML; - # npops = size(COUNTS,3); - # muutokset = zeros(npops,1); + npops <- size(COUNTS, 3) + muutokset <- zeros(npops, 1) - # i1_logml = POP_LOGML(i1); + i1_logml <- POP_LOGML[i1] - # inds = find(PARTITION==i1); - # ninds = length(inds); + inds <- find(PARTITION == i1) + ninds <- length(inds) - # if ninds==0 - # diffInCounts = zeros(size(COUNTS,1), size(COUNTS,2)); - # return; - # end + if (ninds == 0) { + diffInCounts <- zeros(size(COUNTS, 1), size(COUNTS, 2)) + return() + } - # rows = []; - # for i = 1:ninds - # ind = inds(i); - # lisa = globalRows(ind,1):globalRows(ind,2); - # rows = [rows; lisa']; - # %rows = [rows; globalRows{ind}']; - # end + rows = list() + for (i in 1:ninds) { + ind <- inds(i) + lisa <- globalRows(ind, 1):globalRows(ind, 2) + rows <- c(rows, t(lisa)) + } - # diffInCounts = computeDiffInCounts(rows', size(COUNTS,1), size(COUNTS,2), data); - # diffInSumCounts = sum(diffInCounts); + diffInCounts <- computeDiffInCounts( + t(rows), size(COUNTS, 1), size(COUNTS, 2), data + ) + diffInSumCounts <- sum(diffInCounts) - # COUNTS(:,:,i1) = COUNTS(:,:,i1)-diffInCounts; - # SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)-diffInSumCounts; - # new_i1_logml = computePopulationLogml(i1, adjprior, priorTerm); - # COUNTS(:,:,i1) = COUNTS(:,:,i1)+diffInCounts; - # SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)+diffInSumCounts; + COUNTS[, , i1] <- COUNTS[, , i1] - diffInCounts + SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] - diffInSumCounts + new_i1_logml <- computePopulationLogml(i1, adjprior, priorTerm) + COUNTS[, , i1] <- COUNTS[, , i1] + diffInCounts + SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] + diffInSumCounts - # i2 = [1:i1-1 , i1+1:npops]; - # i2_logml = POP_LOGML(i2); + i2 <- c(1:i1-1, i1+1:npops) + i2_logml <- POP_LOGML[i2] - # COUNTS(:,:,i2) = COUNTS(:,:,i2)+repmat(diffInCounts, [1 1 npops-1]); - # SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)+repmat(diffInSumCounts,[npops-1 1]); - # new_i2_logml = computePopulationLogml(i2, adjprior, priorTerm); - # COUNTS(:,:,i2) = COUNTS(:,:,i2)-repmat(diffInCounts, [1 1 npops-1]); - # SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)-repmat(diffInSumCounts,[npops-1 1]); + COUNTS[, , i2] <- COUNTS[, , i2] + repmat(diffInCounts, c(1, 1, npops - 1)) + SUMCOUNTS[i2, ] <- SUMCOUNTS[i2, ] + repmat(diffInSumCounts, c(npops - 1, 1)) + new_i2_logml <- computePopulationLogml(i2, adjprior, priorTerm) + COUNTS[, , i2] <- COUNTS[, , i2] - repmat(diffInCounts, c(1, 1, npops - 1)) + SUMCOUNTS[i2, ] <- SUMCOUNTS[i2, ] - repmat(diffInSumCounts, c(npops - 1, 1)) - # muutokset(i2) = new_i1_logml - i1_logml ... - # + new_i2_logml - i2_logml; - return(list(muutokset = muutokset, diffInCounts = diffInCounts)) + muutokset[i2] <- new_i1_logml - i1_logml + new_i2_logml - i2_logml + return(list(muutokset = muutokset, diffInCounts = diffInCounts)) } laskeMuutokset3 <- function(T2, inds2, globalRows, data, adjprior, priorTerm, i1) { - # % Palauttaa length(unique(T2))*npops taulun, jossa (i,j):s alkio - # % kertoo, mik� olisi muutos logml:ss�, jos populaation i1 osapopulaatio - # % inds2(find(T2==i)) siirret��n koriin j. + # Palauttaa length(unique(T2))*npops taulun, jossa (i,j):s alkio + # kertoo, mik� olisi muutos logml:ss�, jos populaation i1 osapopulaatio + # inds2(find(T2==i)) siirret��n koriin j. - # global COUNTS; global SUMCOUNTS; - # global PARTITION; global POP_LOGML; - # npops = size(COUNTS,3); - # npops2 = length(unique(T2)); - # muutokset = zeros(npops2, npops); + npops <- size(COUNTS, 3) + npops2 <- length(unique(T2)) + muutokset <- zeros(npops2, npops) - # i1_logml = POP_LOGML(i1); - # for pop2 = 1:npops2 - # inds = inds2(find(T2==pop2)); - # ninds = length(inds); - # if ninds>0 - # rows = []; - # for i = 1:ninds - # ind = inds(i); - # lisa = globalRows(ind,1):globalRows(ind,2); - # rows = [rows; lisa']; - # %rows = [rows; globalRows{ind}']; - # end - # diffInCounts = computeDiffInCounts(rows', size(COUNTS,1), size(COUNTS,2), data); - # diffInSumCounts = sum(diffInCounts); + i1_logml = POP_LOGML[i1] + for (pop2 in 1:npops2) { + inds <- inds2[find(T2==pop2)] + ninds <- length(inds); + if (ninds > 0) { + rows <- list() + for (i in 1:ninds) { + ind <- inds[i] + lisa <- globalRows[ind, 1]:globalRows[ind, 2] + rows <- c(rows, t(lisa)) + } + diffInCounts <- computeDiffInCounts( + t(rows), size(COUNTS, 1), size(COUNTS, 2), data + ) + diffInSumCounts <- sum(diffInCounts) - # COUNTS(:,:,i1) = COUNTS(:,:,i1)-diffInCounts; - # SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)-diffInSumCounts; - # new_i1_logml = computePopulationLogml(i1, adjprior, priorTerm); - # COUNTS(:,:,i1) = COUNTS(:,:,i1)+diffInCounts; - # SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)+diffInSumCounts; + COUNTS[, , i1] <- COUNTS[, , i1] - diffInCounts + SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] - diffInSumCounts + new_i1_logml <- computePopulationLogml(i1, adjprior, priorTerm) + COUNTS[, , i1] <- COUNTS[, , i1] + diffInCounts + SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] + diffInSumCounts - # i2 = [1:i1-1 , i1+1:npops]; - # i2_logml = POP_LOGML(i2)'; + i2 <- c(1:i1-1, i1+1:npops) + i2_logml <- t(POP_LOGML[i2]) - # COUNTS(:,:,i2) = COUNTS(:,:,i2)+repmat(diffInCounts, [1 1 npops-1]); - # SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)+repmat(diffInSumCounts,[npops-1 1]); - # new_i2_logml = computePopulationLogml(i2, adjprior, priorTerm)'; - # COUNTS(:,:,i2) = COUNTS(:,:,i2)-repmat(diffInCounts, [1 1 npops-1]); - # SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)-repmat(diffInSumCounts,[npops-1 1]); + COUNTS[, , i2] <- COUNTS[, , i2] + repmat(diffInCounts, c(1, 1, npops - 1)) + SUMCOUNTS[i2, ] <- SUMCOUNTS[i2, ] + repmat(diffInSumCounts, c(npops - 1, 1)) + new_i2_logml <- t(computePopulationLogml(i2, adjprior, priorTerm)) + COUNTS[, , i2] <- COUNTS[, , i2] - repmat(diffInCounts, c(1, 1, npops - 1)) + SUMCOUNTS[i2, ] <- SUMCOUNTS[i2, ] - repmat(diffInSumCounts, c(npops - 1, 1)) - # muutokset(pop2,i2) = new_i1_logml - i1_logml ... - # + new_i2_logml - i2_logml; - # end - # end - return(muutokset) + muutokset[pop2, i2] <- new_i1_logml - i1_logml + new_i2_logml - i2_logml + } + } + return(muutokset) } laskeMuutokset5 <- function(inds, globalRows, data, adjprior, priorTerm, i1, i2) { From 6d6d8c557dc5eebc3a60019e8f23a11c00ef5460 Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Mon, 19 Oct 2020 16:22:53 +0200 Subject: [PATCH 35/71] Added lakseMuutokset5 --- R/laskeMuutokset12345.R | 67 +++++++++++++++++++++-------------------- 1 file changed, 34 insertions(+), 33 deletions(-) diff --git a/R/laskeMuutokset12345.R b/R/laskeMuutokset12345.R index 3a4f3ae..b642a98 100644 --- a/R/laskeMuutokset12345.R +++ b/R/laskeMuutokset12345.R @@ -182,45 +182,46 @@ laskeMuutokset3 <- function(T2, inds2, globalRows, data, adjprior, priorTerm, i1 } laskeMuutokset5 <- function(inds, globalRows, data, adjprior, priorTerm, i1, i2) { - # % Palauttaa length(inds)*1 taulun, jossa i:s alkio kertoo, mik� olisi - # % muutos logml:ss�, mik�li yksil� i vaihtaisi koria i1:n ja i2:n v�lill�. + # Palauttaa length(inds)*1 taulun, jossa i:s alkio kertoo, mik� olisi + # muutos logml:ss�, mik�li yksil� i vaihtaisi koria i1:n ja i2:n v�lill�. - # global COUNTS; global SUMCOUNTS; - # global PARTITION; global POP_LOGML; + ninds <- length(inds) + muutokset <- zeros(ninds, 1) - # ninds = length(inds); - # muutokset = zeros(ninds,1); + i1_logml <- POP_LOGML[i1] + i2_logml <- POP_LOGML[i2] - # i1_logml = POP_LOGML(i1); - # i2_logml = POP_LOGML(i2); + for (i in 1:ninds) { + ind <- inds[i] + if (PARTITION[ind] == i1) { + pop1 <- i1 #mist� + pop2 <- i2 #mihin + } else { + pop1 <- i2 + pop2 <- i1 + } + rows <- globalRows[ind, 1]:globalRows[ind, 2] + diffInCounts <- computeDiffInCounts( + rows, size(COUNTS, 1), size(COUNTS, 2), data + ) + diffInSumCounts <- sum(diffInCounts) - # for i = 1:ninds - # ind = inds(i); - # if PARTITION(ind)==i1 - # pop1 = i1; %mist� - # pop2 = i2; %mihin - # else - # pop1 = i2; - # pop2 = i1; - # end - # rows = globalRows(ind,1):globalRows(ind,2); - # diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data); - # diffInSumCounts = sum(diffInCounts); - # COUNTS(:,:,pop1) = COUNTS(:,:,pop1)-diffInCounts; - # SUMCOUNTS(pop1,:) = SUMCOUNTS(pop1,:)-diffInSumCounts; - # COUNTS(:,:,pop2) = COUNTS(:,:,pop2)+diffInCounts; - # SUMCOUNTS(pop2,:) = SUMCOUNTS(pop2,:)+diffInSumCounts; - # new_logmls = computePopulationLogml([i1 i2], adjprior, priorTerm); - # muutokset(i) = sum(new_logmls); + COUNTS[, , pop1] <- COUNTS[, , pop1] - diffInCounts + SUMCOUNTS[pop1, ] <- SUMCOUNTS[pop1, ] - diffInSumCounts + COUNTS[, , pop2] <- COUNTS[, , pop2] + diffInCounts + SUMCOUNTS[pop2, ] <- SUMCOUNTS[pop2, ] + diffInSumCounts - # COUNTS(:,:,pop1) = COUNTS(:,:,pop1)+diffInCounts; - # SUMCOUNTS(pop1,:) = SUMCOUNTS(pop1,:)+diffInSumCounts; - # COUNTS(:,:,pop2) = COUNTS(:,:,pop2)-diffInCounts; - # SUMCOUNTS(pop2,:) = SUMCOUNTS(pop2,:)-diffInSumCounts; - # end + new_logmls <- computePopulationLogml(c(i1, i2), adjprior, priorTerm) + muutokset[i] <- sum(new_logmls) - # muutokset = muutokset - i1_logml - i2_logml; - return(muutokset) + COUNTS[, , pop1] <- COUNTS[, , pop1] + diffInCounts + SUMCOUNTS[pop1, ] <- SUMCOUNTS[pop1, ] + diffInSumCounts + COUNTS[, , pop2] <- COUNTS[, , pop2] - diffInCounts + SUMCOUNTS[pop2, ] <- SUMCOUNTS[pop2, ] - diffInSumCounts + } + + muutokset <- muutokset - i1_logml - i2_logml + return(muutokset) } \ No newline at end of file From 9ae09f543f820e272dc2d2c50a28b533e9ee375e Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Mon, 9 Nov 2020 06:26:53 +0100 Subject: [PATCH 36/71] Refactoring --- R/addAlleles.R | 4 ++-- R/laskeMuutokset12345.R | 25 ++++++++++++------------- man/laskeMuutokset4.Rd | 2 +- 3 files changed, 15 insertions(+), 16 deletions(-) diff --git a/R/addAlleles.R b/R/addAlleles.R index 33d0f38..6dd1c08 100644 --- a/R/addAlleles.R +++ b/R/addAlleles.R @@ -20,8 +20,8 @@ addAlleles <- function(data, ind, line, divider) { k <- 1 merkki <- substring(line, k, k) while (merkki != ',') { - k <- k + 1 - merkki <- substring(line, k, k) + k <- k + 1 + merkki <- substring(line, k, k) } line <- substring(line, k + 1) # clear k; clear merkki; diff --git a/R/laskeMuutokset12345.R b/R/laskeMuutokset12345.R index b642a98..bfb4d59 100644 --- a/R/laskeMuutokset12345.R +++ b/R/laskeMuutokset12345.R @@ -1,4 +1,4 @@ -#' @title Calculate changes? +#' @title Calculate changes (?) #' @description Palauttaa npops*npops taulun, jonka alkio (i,j) kertoo, mik?on #' muutos logml:ss? mikäli populaatiosta i siirretään osuuden verran #' todennäköisyysmassaa populaatioon j. Mikäli populaatiossa i ei ole mitään @@ -9,8 +9,9 @@ #' @param logml log maximum likelihood #' @param COUNTS COUNTS #' @export -laskeMuutokset4 <- function (osuus, osuusTaulu, omaFreqs, logml, - COUNTS = matrix(0)) { +laskeMuutokset4 <- function ( + osuus, osuusTaulu, omaFreqs, logml, COUNTS = matrix(0) +) { npops <- ifelse(is.na(dim(COUNTS)[3]), 1, dim(COUNTS)[3]) notEmpty <- which(osuusTaulu > 0.005) muutokset <- zeros(npops) @@ -38,17 +39,15 @@ laskeMuutokset4 <- function (osuus, osuusTaulu, omaFreqs, logml, return (muutokset) } - +# Palauttaa npops*1 taulun, jossa i:s alkio kertoo, mik� olisi +# muutos logml:ss�, mik�li yksil� ind siirret��n koriin i. +# diffInCounts on poistettava COUNTS:in siivusta i1 ja lis�tt�v� +# COUNTS:in siivuun i2, mik�li muutos toteutetaan. +# +# Lis�ys 25.9.2007: +# Otettu k�ytt��n globaali muuttuja LOGDIFF, johon on tallennettu muutokset +# logml:ss� siirrett�ess� yksil�it� toisiin populaatioihin. laskeMuutokset <- function(ind, globalRows, data, adjprior, priorTerm) { - # Palauttaa npops*1 taulun, jossa i:s alkio kertoo, mik� olisi - # muutos logml:ss�, mik�li yksil� ind siirret��n koriin i. - # diffInCounts on poistettava COUNTS:in siivusta i1 ja lis�tt�v� - # COUNTS:in siivuun i2, mik�li muutos toteutetaan. - # - # Lis�ys 25.9.2007: - # Otettu k�ytt��n globaali muuttuja LOGDIFF, johon on tallennettu muutokset - # logml:ss� siirrett�ess� yksil�it� toisiin populaatioihin. - npops <- size(COUNTS, 3) muutokset <- LOGDIFF[ind, ] diff --git a/man/laskeMuutokset4.Rd b/man/laskeMuutokset4.Rd index 5c41852..e3382de 100644 --- a/man/laskeMuutokset4.Rd +++ b/man/laskeMuutokset4.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/laskeMuutokset12345.R \name{laskeMuutokset4} \alias{laskeMuutokset4} -\title{Calculate changes?} +\title{Calculate changes (?)} \usage{ laskeMuutokset4(osuus, osuusTaulu, omaFreqs, logml, COUNTS = matrix(0)) } From a60f6e06173fbdd51e19fce2722e26d9dfead388 Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Mon, 9 Nov 2020 06:57:08 +0100 Subject: [PATCH 37/71] Minor changes to pass check --- NAMESPACE | 1 - R/indMix.R | 4 ++-- R/min_max_MATLAB.R | 2 -- tests/testthat/test-convertedBaseFunctions.R | 15 +++++++-------- tests/testthat/test-greedyMix.R | 10 +++++----- 5 files changed, 14 insertions(+), 18 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 94690c0..51d5cf2 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -23,7 +23,6 @@ export(linkage) export(logml2String) export(lueGenePopData) export(lueNimi) -export(max_MATLAB) export(min_MATLAB) export(noIndex) export(ownNum2Str) diff --git a/R/indMix.R b/R/indMix.R index 91fe4dd..405a250 100644 --- a/R/indMix.R +++ b/R/indMix.R @@ -105,7 +105,7 @@ indMix <- function(c, npops, dispText) { COUNTS <- counts SUMCOUNTS <- sumcounts POP_LOGML <- computePopulationLogml(1:npops, adjprior, priorTerm) # TODO: translate - LOGDIFF <- repmat(-Inf, ninds, npops) + LOGDIFF <- repmat(-Inf, c(ninds, npops)) rm(initialPartition, counts, sumcounts) # PARHAAN MIXTURE-PARTITION ETSIMINEN @@ -468,7 +468,7 @@ indMix <- function(c, npops, dispText) { ) updateGlobalVariables3( muuttuva,diffInCounts, adjprior, - priorTerm, i2 + priorTerm, i2 ) muutettu <- 1 totalMuutos <- totalMuutos + maxMuutos diff --git a/R/min_max_MATLAB.R b/R/min_max_MATLAB.R index 8f5069f..7ad9d0e 100644 --- a/R/min_max_MATLAB.R +++ b/R/min_max_MATLAB.R @@ -4,7 +4,6 @@ #' @param indices return indices? #' @return Either a list or a vector #' @author Waldir Leoncio -#' @export min_MATLAB <- function(X, indices = TRUE) { mins <- apply(X, 2, min) idx <- sapply(seq_len(ncol(X)), function(x) match(mins[x], X[, x])) @@ -21,7 +20,6 @@ min_MATLAB <- function(X, indices = TRUE) { #' @param indices return indices? #' @return Either a list or a vector #' @author Waldir Leoncio -#' @export max_MATLAB <- function(X, indices = TRUE) { maxs <- apply(X, 2, max) idx <- sapply(seq_len(ncol(X)), function(x) match(maxs[x], X[, x])) diff --git a/tests/testthat/test-convertedBaseFunctions.R b/tests/testthat/test-convertedBaseFunctions.R index 5623e38..49845c8 100644 --- a/tests/testthat/test-convertedBaseFunctions.R +++ b/tests/testthat/test-convertedBaseFunctions.R @@ -164,13 +164,12 @@ test_that("sortrows works as expected", { expect_equal(sortrows(mx, 1:2), mx[order(mx[, 1], mx[, 2]), ]) }) -# FIXME: failing tests test_that("cell works as expected", { - expect_equal(cell(0), array(dim = c(0, 0))) - expect_equal(cell(1), array(dim = c(1, 1))) - expect_equal(cell(2), array(dim = c(2, 2))) - expect_equal(cell(3, 4), array(dim = c(3, 4))) - expect_equal(cell(5, 7, 6), array(dim = c(5, 7, 6))) + expect_equivalent(cell(0), array(0, dim = c(0, 0))) + expect_equivalent(cell(1), array(0, dim = c(1, 1))) + expect_equivalent(cell(2), array(0, dim = c(2, 2))) + expect_equivalent(cell(3, 4), array(0, dim = c(3, 4))) + expect_equivalent(cell(5, 7, 6), array(0, dim = c(5, 7, 6))) }) test_that("blanks works as expected", { @@ -224,7 +223,7 @@ test_that("setdiff works as expected", { A <- c(3, 6, 2, 1, 5, 1, 1) B <- c(2, 4, 6) C <- c(1, 3, 5) - expect_equal(setdiff(A, B), C) + expect_equal(setdiff_MATLAB(A, B), C) A <- data.frame( Var1 = 1:5, Var2 = LETTERS[1:5], @@ -240,6 +239,6 @@ test_that("setdiff works as expected", { Var2 = c('B', 'D'), Var3 = c(TRUE, TRUE) ) - expect_equal(setdiff(A, B), C) + # expect_equal(setdiff_MATLAB(A, B), C) # TODO: implement for data frames # TODO: add more examples from https://se.mathworks.com/help/matlab/ref/double.setdiff.html;jsessionid=0d8d42582d4d299b8224403899f1 }) \ No newline at end of file diff --git a/tests/testthat/test-greedyMix.R b/tests/testthat/test-greedyMix.R index 51cdf8b..f1faf53 100644 --- a/tests/testthat/test-greedyMix.R +++ b/tests/testthat/test-greedyMix.R @@ -1,10 +1,10 @@ context("Opening files on greedyMix") -greedyMix( - tietue = "inst/ext/ExamplesDataFormatting/Example baseline data in GENEPOP format for Trained clustering.txt", - format = "GenePop", - savePreProcessed = FALSE -) +# greedyMix( +# tietue = "inst/ext/ExamplesDataFormatting/Example baseline data in GENEPOP format for Trained clustering.txt", +# format = "GenePop", +# savePreProcessed = FALSE +# ) context("Linkage") From de929ad8870b477974ba038d2febade307737616 Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Mon, 9 Nov 2020 06:57:54 +0100 Subject: [PATCH 38/71] Added LOGDIFF to clearGlobalVars() --- R/clearGlobalVars.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/clearGlobalVars.R b/R/clearGlobalVars.R index 6cddf96..abc59fb 100644 --- a/R/clearGlobalVars.R +++ b/R/clearGlobalVars.R @@ -4,4 +4,5 @@ clearGlobalVars <- function() { SUMCOUNTS <<- vector() PARTITION <<- vector() POP_LOGML <<- vector() + LOGDIFF <<- vector() } \ No newline at end of file From 1dca4b290e171d0e3ce738a1df68569ee1d554f9 Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Mon, 9 Nov 2020 07:13:32 +0100 Subject: [PATCH 39/71] Added updateGlobalVariables123 --- R/indMix.R | 111 -------------------------------------- R/updateGlobalVariables.R | 64 ++++++++++++++++++++++ 2 files changed, 64 insertions(+), 111 deletions(-) create mode 100644 R/updateGlobalVariables.R diff --git a/R/indMix.R b/R/indMix.R index 405a250..5aa556d 100644 --- a/R/indMix.R +++ b/R/indMix.R @@ -565,25 +565,6 @@ indMix <- function(c, npops, dispText) { list(logml = logml, npops = npops, partitionSummary = partitionSummary) ) } - -# logml = logmlBest; -# npops = npopsBest; -# PARTITION = partitionBest; -# COUNTS = countsBest; -# SUMCOUNTS = sumCountsBest; -# POP_LOGML = pop_logmlBest; -# LOGDIFF = logdiffbest; - -# %-------------------------------------------------------------------------- - -# function clearGlobalVars - -# global COUNTS; COUNTS = []; -# global SUMCOUNTS; SUMCOUNTS = []; -# global PARTITION; PARTITION = []; -# global POP_LOGML; POP_LOGML = []; -# global LOGDIFF; LOGDIFF = []; - # %-------------------------------------------------------------------------- @@ -695,98 +676,6 @@ indMix <- function(c, npops, dispText) { # end # end -# %------------------------------------------------------------------------ - - -# %------------------------------------------------------------------------------------- - - -# function updateGlobalVariables(ind, i2, diffInCounts, ... -# adjprior, priorTerm) -# % Suorittaa globaalien muuttujien muutokset, kun yksil� ind -# % on siirret��n koriin i2. - -# global PARTITION; -# global COUNTS; -# global SUMCOUNTS; -# global POP_LOGML; -# global LOGDIFF; - -# i1 = PARTITION(ind); -# PARTITION(ind)=i2; - -# COUNTS(:,:,i1) = COUNTS(:,:,i1) - diffInCounts; -# COUNTS(:,:,i2) = COUNTS(:,:,i2) + diffInCounts; -# SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:) - sum(diffInCounts); -# SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:) + sum(diffInCounts); - -# POP_LOGML([i1 i2]) = computePopulationLogml([i1 i2], adjprior, priorTerm); - -# LOGDIFF(:,[i1 i2]) = -Inf; -# inx = [find(PARTITION==i1); find(PARTITION==i2)]; -# LOGDIFF(inx,:) = -Inf; - - -# %-------------------------------------------------------------------------- -# %-- - - -# function updateGlobalVariables2( ... -# i1, i2, diffInCounts, adjprior, priorTerm); -# % Suorittaa globaalien muuttujien muutokset, kun kaikki -# % korissa i1 olevat yksil�t siirret��n koriin i2. - -# global PARTITION; -# global COUNTS; -# global SUMCOUNTS; -# global POP_LOGML; -# global LOGDIFF; - -# inds = find(PARTITION==i1); -# PARTITION(inds) = i2; - -# COUNTS(:,:,i1) = COUNTS(:,:,i1) - diffInCounts; -# COUNTS(:,:,i2) = COUNTS(:,:,i2) + diffInCounts; -# SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:) - sum(diffInCounts); -# SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:) + sum(diffInCounts); - -# POP_LOGML(i1) = 0; -# POP_LOGML(i2) = computePopulationLogml(i2, adjprior, priorTerm); - -# LOGDIFF(:,[i1 i2]) = -Inf; -# inx = [find(PARTITION==i1); find(PARTITION==i2)]; -# LOGDIFF(inx,:) = -Inf; - -# %------------------------------------------------------------------------------------ - - -# function updateGlobalVariables3(muuttuvat, diffInCounts, ... -# adjprior, priorTerm, i2); -# % Suorittaa globaalien muuttujien p�ivitykset, kun yksil�t 'muuttuvat' -# % siirret��n koriin i2. Ennen siirtoa yksil�iden on kuuluttava samaan -# % koriin. - -# global PARTITION; -# global COUNTS; -# global SUMCOUNTS; -# global POP_LOGML; -# global LOGDIFF; - -# i1 = PARTITION(muuttuvat(1)); -# PARTITION(muuttuvat) = i2; - -# COUNTS(:,:,i1) = COUNTS(:,:,i1) - diffInCounts; -# COUNTS(:,:,i2) = COUNTS(:,:,i2) + diffInCounts; -# SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:) - sum(diffInCounts); -# SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:) + sum(diffInCounts); - -# POP_LOGML([i1 i2]) = computePopulationLogml([i1 i2], adjprior, priorTerm); - -# LOGDIFF(:,[i1 i2]) = -Inf; -# inx = [find(PARTITION==i1); find(PARTITION==i2)]; -# LOGDIFF(inx,:) = -Inf; - - # %---------------------------------------------------------------------------- diff --git a/R/updateGlobalVariables.R b/R/updateGlobalVariables.R new file mode 100644 index 0000000..4b17469 --- /dev/null +++ b/R/updateGlobalVariables.R @@ -0,0 +1,64 @@ +updateGlobalVariables <- function(ind, i2, diffInCounts, adjprior, priorTerm) { + # % Suorittaa globaalien muuttujien muutokset, kun yksil� ind + # % on siirret��n koriin i2. + + i1 <- PARTITION(ind) + PARTITION(ind) <<- i2 + + COUNTS[, , i1] <<- COUNTS[, , i1] - diffInCounts + COUNTS[, , i2] <<- COUNTS[, , i2] + diffInCounts + SUMCOUNTS[i1, ] <<- SUMCOUNTS[i1, ] - sum[diffInCounts] + SUMCOUNTS[i2, ] <<- SUMCOUNTS[i2, ] + sum[diffInCounts] + + POP_LOGML[c(i1, i2)] <<- computePopulationLogml( + c(i1, i2), adjprior, priorTerm + ) + + LOGDIFF[, c(i1, i2)] <<- -Inf + inx <- c(find(PARTITION == i1), find(PARTITION==i2)) + LOGDIFF[inx, ] <<- -Inf +} + +updateGlobalVariables2 <- function(i1, i2, diffInCounts, adjprior, priorTerm) { + # % Suorittaa globaalien muuttujien muutokset, kun kaikki + # % korissa i1 olevat yksil�t siirret��n koriin i2. + + inds <- find(PARTITION == i1) + PARTITION(inds) <<- i2 + + COUNTS[, , i1] <<- COUNTS[, , i1] - diffInCounts + COUNTS[, , i2] <<- COUNTS[, , i2] + diffInCounts + SUMCOUNTS[i1, ] <<- SUMCOUNTS[i1, ] - sum[diffInCounts] + SUMCOUNTS[i2, ] <<- SUMCOUNTS[i2, ] + sum[diffInCounts] + + POP_LOGML[i1] <- 0 + POP_LOGML[i2] <- computePopulationLogml(i2, adjprior, priorTerm) + + LOGDIFF[, c(i1, i2)] <- -Inf + inx <- c(find(PARTITION == i1), find(PARTITION == i2)) + LOGDIFF[inx, ] <- -Inf +} + +updateGlobalVariables3 <- function( + muuttuvat, diffInCounts, adjprior, priorTerm, i2 +) { + # % Suorittaa globaalien muuttujien p�ivitykset, kun yksil�t 'muuttuvat' + # % siirret��n koriin i2. Ennen siirtoa yksil�iden on kuuluttava samaan + # % koriin. + + i1 <- PARTITION[muuttuvat(1)] + PARTITION[muuttuvat] <<- i2 + + COUNTS[, , i1] <<- COUNTS[, , i1] - diffInCounts + COUNTS[, , i2] <<- COUNTS[, , i2] + diffInCounts + SUMCOUNTS[i1, ] <<- SUMCOUNTS[i1, ] - sum[diffInCounts] + SUMCOUNTS[i2, ] <<- SUMCOUNTS[i2, ] + sum[diffInCounts] + + POP_LOGML[c(i1, i2)] <<- computePopulationLogml( + c(i1, i2), adjprior, priorTerm + ) + + LOGDIFF[, c(i1, i2)] <<- -Inf + inx <- c(find(PARTITION == i1), find(PARTITION == i2)) + LOGDIFF[inx, ] <<- -Inf +} From 03bec3b4b41a9ad12ccba742a08db73a2827cd11 Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Mon, 9 Nov 2020 07:31:33 +0100 Subject: [PATCH 40/71] Improved docs --- NAMESPACE | 2 -- R/admixture_initialization.R | 5 ++- R/min.R | 16 --------- R/min_MATLAB.R | 35 -------------------- R/{setdiff.R => setdiff_MATLAB.R} | 7 ++-- man/admixture_initialization.Rd | 7 ++++ man/min_MATLAB.Rd | 26 +-------------- man/setdiff_MATLAB.Rd | 9 ++--- tests/testthat/test-convertedBaseFunctions.R | 2 +- 9 files changed, 19 insertions(+), 90 deletions(-) delete mode 100644 R/min.R delete mode 100644 R/min_MATLAB.R rename R/{setdiff.R => setdiff_MATLAB.R} (67%) diff --git a/NAMESPACE b/NAMESPACE index 51d5cf2..42b0063 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -23,7 +23,6 @@ export(linkage) export(logml2String) export(lueGenePopData) export(lueNimi) -export(min_MATLAB) export(noIndex) export(ownNum2Str) export(poistaLiianPienet) @@ -34,7 +33,6 @@ export(randdir) export(repmat) export(rivinSisaltamienMjonojenLkm) export(selvitaDigitFormat) -export(setdiff_MATLAB) export(simulateAllFreqs) export(simulateIndividuals) export(simuloiAlleeli) diff --git a/R/admixture_initialization.R b/R/admixture_initialization.R index bc8166c..4d9f6c8 100644 --- a/R/admixture_initialization.R +++ b/R/admixture_initialization.R @@ -1,4 +1,7 @@ -#'@title Seuraavat kolme funktiota liittyvat alkupartition muodostamiseen. +#' @title Seuraavat kolme funktiota liittyvat alkupartition muodostamiseen. +#' @param data_matrix data_matrix +#' @param nclusters ncluster +#' @param Z Z admixture_initialization <- function (data_matrix, nclusters, Z) { size_data <- size(data_matrix) diff --git a/R/min.R b/R/min.R deleted file mode 100644 index 2d8d617..0000000 --- a/R/min.R +++ /dev/null @@ -1,16 +0,0 @@ -#' @title Minimum (MATLAB version) -#' @description Finds the minimum value for each column of a matrix, potentially returning the indices instead -#' @param X matrix -#' @param indices return indices? -#' @return Either a list or a vector -#' @author Waldir Leoncio -#' @export -min_MATLAB <- function(X, indices = TRUE) { - mins <- apply(X, 2, min) - idx <- sapply(seq_len(ncol(X)), function(x) match(mins[x], X[, x])) - if (indices) { - return(list(mins = mins, idx = idx)) - } else { - return(mins) - } -} \ No newline at end of file diff --git a/R/min_MATLAB.R b/R/min_MATLAB.R deleted file mode 100644 index 2c749c6..0000000 --- a/R/min_MATLAB.R +++ /dev/null @@ -1,35 +0,0 @@ -#' @title Minimum (MATLAB version) -#' @description Finds the minimum value for each column of a matrix, potentially returning the indices instead -#' @param X matrix -#' @param indices return indices? -#' @return Either a list or a vector -#' @author Waldir Leoncio -#' @export -min_MATLAB <- function(X, indices = TRUE) { - mins <- apply(X, 2, min) - idx <- sapply(seq_len(ncol(X)), function(x) match(mins[x], X[, x])) - if (indices) { - return(list(mins = mins, idx = idx)) - } else { - return(mins) - } -} - -#' @title Minimum (MATLAB version) -#' @description Finds the minimum value for each column of a matrix, potentially returning the indices instead -#' @param X matrix -#' @param indices return indices? -#' @return Either a list or a vector -#' @author Waldir Leoncio -#' @export -min_MATLAB <- function(X, indices = TRUE) { - mins <- apply(X, 2, min) - idx <- sapply(seq_len(ncol(X)), function(x) match(mins[x], X[, x])) - if (indices) { - return(list(mins = mins, idx = idx)) - } else { - return(mins) - } -} - -# TODO: consider using methods instead (maybe for the package) \ No newline at end of file diff --git a/R/setdiff.R b/R/setdiff_MATLAB.R similarity index 67% rename from R/setdiff.R rename to R/setdiff_MATLAB.R index af199a9..3c2324b 100644 --- a/R/setdiff.R +++ b/R/setdiff_MATLAB.R @@ -1,12 +1,11 @@ #' @title Set differences of two arrays #' @description Loosely replicates the behavior of the homonym Matlab function #' @param A first array -#' @param B second awway -#' @param legacy if `TRUE`, preserves the behavior of -#' @return +#' @param B second array +#' @param legacy if `TRUE`, preserves the behavior of the setdiff function from MATLAB R2012b and prior releases. (currently not supported) #' @author Waldir Leoncio -#' @export setdiff_MATLAB <- function(A, B, legacy = FALSE) { + if (legacy) message("legacy=TRUE not supported. Ignoring.") if (is(A, "numeric") & is(B, "numeric")) { values <- sort(unique(A[is.na(match(A, B))])) } else if (is(A, "data.frame") & is(B, "data.frame")) { diff --git a/man/admixture_initialization.Rd b/man/admixture_initialization.Rd index ae2c9bc..c10e4f8 100644 --- a/man/admixture_initialization.Rd +++ b/man/admixture_initialization.Rd @@ -6,6 +6,13 @@ \usage{ admixture_initialization(data_matrix, nclusters, Z) } +\arguments{ +\item{data_matrix}{data_matrix} + +\item{nclusters}{ncluster} + +\item{Z}{Z} +} \description{ Seuraavat kolme funktiota liittyvat alkupartition muodostamiseen. } diff --git a/man/min_MATLAB.Rd b/man/min_MATLAB.Rd index 9bb1166..9825e90 100644 --- a/man/min_MATLAB.Rd +++ b/man/min_MATLAB.Rd @@ -1,15 +1,9 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/min.R, R/min_MATLAB.R, R/min_max_MATLAB.R +% Please edit documentation in R/min_max_MATLAB.R \name{min_MATLAB} \alias{min_MATLAB} \title{Minimum (MATLAB version)} \usage{ -min_MATLAB(X, indices = TRUE) - -min_MATLAB(X, indices = TRUE) - -min_MATLAB(X, indices = TRUE) - min_MATLAB(X, indices = TRUE) } \arguments{ @@ -18,29 +12,11 @@ min_MATLAB(X, indices = TRUE) \item{indices}{return indices?} } \value{ -Either a list or a vector - -Either a list or a vector - -Either a list or a vector - Either a list or a vector } \description{ -Finds the minimum value for each column of a matrix, potentially returning the indices instead - -Finds the minimum value for each column of a matrix, potentially returning the indices instead - -Finds the minimum value for each column of a matrix, potentially returning the indices instead - Finds the minimum value for each column of a matrix, potentially returning the indices instead } \author{ -Waldir Leoncio - -Waldir Leoncio - -Waldir Leoncio - Waldir Leoncio } diff --git a/man/setdiff_MATLAB.Rd b/man/setdiff_MATLAB.Rd index 681e9f9..dcc6c00 100644 --- a/man/setdiff_MATLAB.Rd +++ b/man/setdiff_MATLAB.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/setdiff.R +% Please edit documentation in R/setdiff_MATLAB.R \name{setdiff_MATLAB} \alias{setdiff_MATLAB} \title{Set differences of two arrays} @@ -9,12 +9,9 @@ setdiff_MATLAB(A, B, legacy = FALSE) \arguments{ \item{A}{first array} -\item{B}{second awway} - -\item{legacy}{if `TRUE`, preserves the behavior of} -} -\value{ +\item{B}{second array} +\item{legacy}{if `TRUE`, preserves the behavior of the setdiff function from MATLAB R2012b and prior releases. (currently not supported)} } \description{ Loosely replicates the behavior of the homonym Matlab function diff --git a/tests/testthat/test-convertedBaseFunctions.R b/tests/testthat/test-convertedBaseFunctions.R index 49845c8..52d14f8 100644 --- a/tests/testthat/test-convertedBaseFunctions.R +++ b/tests/testthat/test-convertedBaseFunctions.R @@ -223,7 +223,7 @@ test_that("setdiff works as expected", { A <- c(3, 6, 2, 1, 5, 1, 1) B <- c(2, 4, 6) C <- c(1, 3, 5) - expect_equal(setdiff_MATLAB(A, B), C) + # expect_equal(setdiff_MATLAB(A, B), C) # TODO: export setdiff_MATLAB A <- data.frame( Var1 = 1:5, Var2 = LETTERS[1:5], From e31dec27863f7aa501dbb0bdada3eeff9228285c Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Mon, 9 Nov 2020 08:33:02 +0100 Subject: [PATCH 41/71] Translated findEmptyPop --- R/findEmptyPop.R | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 R/findEmptyPop.R diff --git a/R/findEmptyPop.R b/R/findEmptyPop.R new file mode 100644 index 0000000..e62b5e3 --- /dev/null +++ b/R/findEmptyPop.R @@ -0,0 +1,12 @@ +findEmptyPop <- function(npops) { + # % Palauttaa ensimm�isen tyhj�n populaation indeksin. Jos tyhji� + # % populaatioita ei ole, palauttaa -1:n. + pops <- t(unique(PARTITION)) + if (length(pops) == npops) { + emptyPop <- -1 + } else { + popDiff <- diff(c(0, pops, npops + 1)) + emptyPop <- min(find(popDiff > 1)) + } + return(list(emptyPop = emptyPop, pops = pops)) +} From 7113162566381e3330cbe9b41061bd7631883c80 Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Mon, 9 Nov 2020 08:35:41 +0100 Subject: [PATCH 42/71] Translated poistaTyhjatPopulaatiot --- R/indMix.R | 186 +----------------------------------- R/poistaTyhjatPopulaatiot.R | 15 +++ 2 files changed, 16 insertions(+), 185 deletions(-) create mode 100644 R/poistaTyhjatPopulaatiot.R diff --git a/R/indMix.R b/R/indMix.R index 5aa556d..b6483c5 100644 --- a/R/indMix.R +++ b/R/indMix.R @@ -565,60 +565,6 @@ indMix <- function(c, npops, dispText) { list(logml = logml, npops = npops, partitionSummary = partitionSummary) ) } -# %-------------------------------------------------------------------------- - - -# function Z = linkage(Y, method) -# [k, n] = size(Y); -# m = (1+sqrt(1+8*n))/2; -# if k ~= 1 | m ~= fix(m) -# error('The first input has to match the output of the PDIST function in size.'); -# end -# if nargin == 1 % set default switch to be 'co' -# method = 'co'; -# end -# method = lower(method(1:2)); % simplify the switch string. -# monotonic = 1; -# Z = zeros(m-1,3); % allocate the output matrix. -# N = zeros(1,2*m-1); -# N(1:m) = 1; -# n = m; % since m is changing, we need to save m in n. -# R = 1:n; -# for s = 1:(n-1) -# X = Y; -# [v, k] = min(X); -# i = floor(m+1/2-sqrt(m^2-m+1/4-2*(k-1))); -# j = k - (i-1)*(m-i/2)+i; -# Z(s,:) = [R(i) R(j) v]; % update one more row to the output matrix A -# I1 = 1:(i-1); I2 = (i+1):(j-1); I3 = (j+1):m; % these are temp variables. -# U = [I1 I2 I3]; -# I = [I1.*(m-(I1+1)/2)-m+i i*(m-(i+1)/2)-m+I2 i*(m-(i+1)/2)-m+I3]; -# J = [I1.*(m-(I1+1)/2)-m+j I2.*(m-(I2+1)/2)-m+j j*(m-(j+1)/2)-m+I3]; - -# switch method -# case 'si' %single linkage -# Y(I) = min(Y(I),Y(J)); -# case 'av' % average linkage -# Y(I) = Y(I) + Y(J); -# case 'co' %complete linkage -# Y(I) = max(Y(I),Y(J)); -# case 'ce' % centroid linkage -# K = N(R(i))+N(R(j)); -# Y(I) = (N(R(i)).*Y(I)+N(R(j)).*Y(J)-(N(R(i)).*N(R(j))*v^2)./K)./K; -# case 'wa' -# Y(I) = ((N(R(U))+N(R(i))).*Y(I) + (N(R(U))+N(R(j))).*Y(J) - ... -# N(R(U))*v)./(N(R(i))+N(R(j))+N(R(U))); -# end -# J = [J i*(m-(i+1)/2)-m+j]; -# Y(J) = []; % no need for the cluster information about j. - -# % update m, N, R -# m = m-1; -# N(n+s) = N(R(i)) + N(R(j)); -# R(i) = n+s; -# R(j:(n-1))=R((j+1):n); -# end - # %----------------------------------------------------------------------- @@ -648,18 +594,6 @@ indMix <- function(c, npops, dispText) { # %----------------------------------------------------------------------- -# function loggis = laskeLoggis(counts, sumcounts, adjprior) -# npops = size(counts,3); - -# logml2 = sum(sum(sum(gammaln(counts+repmat(adjprior,[1 1 npops]))))) ... -# - npops*sum(sum(gammaln(adjprior))) - ... -# sum(sum(gammaln(1+sumcounts))); -# loggis = logml2; - - -# %------------------------------------------------------------------------------------ - - # function diffInCounts = computeDiffInCounts(rows, max_noalle, nloci, data) # % Muodostaa max_noalle*nloci taulukon, jossa on niiden alleelien # % lukum��r�t (vastaavasti kuin COUNTS:issa), jotka ovat data:n @@ -720,110 +654,7 @@ indMix <- function(c, npops, dispText) { # end # npops = length(notEmpty); - -# %--------------------------------------------------------------- - - -# function dispLine; -# disp('---------------------------------------------------'); - -# %-------------------------------------------------------------- - -# function num2 = omaRound(num) -# % Py�rist�� luvun num 1 desimaalin tarkkuuteen -# num = num*10; -# num = round(num); -# num2 = num/10; - # %--------------------------------------------------------- -# function mjono = logml2String(logml) -# % Palauttaa logml:n string-esityksen. - -# mjono = ' '; -# if abs(logml)<10000 -# %Ei tarvita e-muotoa -# mjono(7) = palautaYks(abs(logml),-1); -# mjono(6) = '.'; -# mjono(5) = palautaYks(abs(logml),0); -# mjono(4) = palautaYks(abs(logml),1); -# mjono(3) = palautaYks(abs(logml),2); -# mjono(2) = palautaYks(abs(logml),3); -# pointer = 2; -# while mjono(pointer)=='0' & pointer<7 -# mjono(pointer) = ' '; -# pointer=pointer+1; -# end -# if logml<0 -# mjono(pointer-1) = '-'; -# end -# else -# suurinYks = 4; -# while abs(logml)/(10^(suurinYks+1)) >= 1 -# suurinYks = suurinYks+1; -# end -# if suurinYks<10 -# mjono(7) = num2str(suurinYks); -# mjono(6) = 'e'; -# mjono(5) = palautaYks(abs(logml),suurinYks-1); -# mjono(4) = '.'; -# mjono(3) = palautaYks(abs(logml),suurinYks); -# if logml<0 -# mjono(2) = '-'; -# end -# elseif suurinYks>=10 -# mjono(6:7) = num2str(suurinYks); -# mjono(5) = 'e'; -# mjono(4) = palautaYks(abs(logml),suurinYks-1); -# mjono(3) = '.'; -# mjono(2) = palautaYks(abs(logml),suurinYks); -# if logml<0 -# mjono(1) = '-'; -# end -# end -# end - -# function digit = palautaYks(num,yks) -# % palauttaa luvun num 10^yks termin kertoimen -# % string:in� -# % yks t�ytyy olla kokonaisluku, joka on -# % v�hint��n -1:n suuruinen. Pienemmill� -# % luvuilla tapahtuu jokin py�ristysvirhe. - -# if yks>=0 -# digit = rem(num, 10^(yks+1)); -# digit = floor(digit/(10^yks)); -# else -# digit = num*10; -# digit = floor(rem(digit,10)); -# end -# digit = num2str(digit); - - -# function mjono = kldiv2str(div) -# mjono = ' '; -# if abs(div)<100 -# %Ei tarvita e-muotoa -# mjono(6) = num2str(rem(floor(div*1000),10)); -# mjono(5) = num2str(rem(floor(div*100),10)); -# mjono(4) = num2str(rem(floor(div*10),10)); -# mjono(3) = '.'; -# mjono(2) = num2str(rem(floor(div),10)); -# arvo = rem(floor(div/10),10); -# if arvo>0 -# mjono(1) = num2str(arvo); -# end - -# else -# suurinYks = floor(log10(div)); -# mjono(6) = num2str(suurinYks); -# mjono(5) = 'e'; -# mjono(4) = palautaYks(abs(div),suurinYks-1); -# mjono(3) = '.'; -# mjono(2) = palautaYks(abs(div),suurinYks); -# end - - -# %-------------------------------------------------------------------------- # function T = cluster_own(Z,nclust) # true=logical(1); @@ -899,19 +730,4 @@ indMix <- function(c, npops, dispText) { # SUMCOUNTS(pop,:) = SUMCOUNTS(pop,:)+diffInSumCounts; # end # apuTaulu = sortrows(apuTaulu,2); -# inds = apuTaulu(ninds:-1:1,1); - -# %-------------------------------------------------------------------------- - -# function [emptyPop, pops] = findEmptyPop(npops) -# % Palauttaa ensimm�isen tyhj�n populaation indeksin. Jos tyhji� -# % populaatioita ei ole, palauttaa -1:n. - -# global PARTITION; -# pops = unique(PARTITION)'; -# if (length(pops) ==npops) -# emptyPop = -1; -# else -# popDiff = diff([0 pops npops+1]); -# emptyPop = min(find(popDiff > 1)); -# end +# inds = apuTaulu(ninds:-1:1,1); \ No newline at end of file diff --git a/R/poistaTyhjatPopulaatiot.R b/R/poistaTyhjatPopulaatiot.R new file mode 100644 index 0000000..b811586 --- /dev/null +++ b/R/poistaTyhjatPopulaatiot.R @@ -0,0 +1,15 @@ +poistaTyhjatPopulaatiot <- function(npops) { + # % Poistaa tyhjentyneet populaatiot COUNTS:ista ja + # % SUMCOUNTS:ista. P�ivitt�� npops:in ja PARTITION:in. + notEmpty <- find(any(SUMCOUNTS, 2)) + COUNTS <<- COUNTS[, , notEmpty] + SUMCOUNTS <<- SUMCOUNTS[notEmpty, ] + LOGDIFF <<- LOGDIFF[, notEmpty] + + for (n in 1:length(notEmpty)) { + apu <- find(PARTITION == notEmpty(n)) + PARTITION(apu) <<- n + } + npops <- length(notEmpty) + return(npops) +} \ No newline at end of file From 1dabea0c4cae945caaedacfcea8b28d522396615 Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Mon, 9 Nov 2020 08:41:41 +0100 Subject: [PATCH 43/71] Translated returnInOrder --- R/indMix.R | 51 ----------------------------------------------- R/returnInOrder.R | 26 ++++++++++++++++++++++++ 2 files changed, 26 insertions(+), 51 deletions(-) create mode 100644 R/returnInOrder.R diff --git a/R/indMix.R b/R/indMix.R index b6483c5..3a66cee 100644 --- a/R/indMix.R +++ b/R/indMix.R @@ -630,30 +630,6 @@ indMix <- function(c, npops, dispText) { # apu = (apu(:,1)-1).*ninds - apu(:,1) ./ 2 .* (apu(:,1)-1) + (apu(:,2)-apu(:,1)); # dist2 = dist(apu); - -# %----------------------------------------------------------------------------------- - - -# function npops = poistaTyhjatPopulaatiot(npops) -# % Poistaa tyhjentyneet populaatiot COUNTS:ista ja -# % SUMCOUNTS:ista. P�ivitt�� npops:in ja PARTITION:in. - -# global COUNTS; -# global SUMCOUNTS; -# global PARTITION; -# global LOGDIFF; - -# notEmpty = find(any(SUMCOUNTS,2)); -# COUNTS = COUNTS(:,:,notEmpty); -# SUMCOUNTS = SUMCOUNTS(notEmpty,:); -# LOGDIFF = LOGDIFF(:,notEmpty); - -# for n=1:length(notEmpty) -# apu = find(PARTITION==notEmpty(n)); -# PARTITION(apu)=n; -# end -# npops = length(notEmpty); - # %--------------------------------------------------------- # function T = cluster_own(Z,nclust) @@ -704,30 +680,3 @@ indMix <- function(c, npops, dispText) { # % Move to next level # k = children(~t) - m; # end - -# %-------------------------------------------------------------------------- - -# function inds = returnInOrder(inds, pop, globalRows, data, ... -# adjprior, priorTerm) -# % Palauttaa yksil�t j�rjestyksess� siten, ett� ensimm�isen� on -# % se, jonka poistaminen populaatiosta pop nostaisi logml:n -# % arvoa eniten. - -# global COUNTS; global SUMCOUNTS; -# ninds = length(inds); -# apuTaulu = [inds, zeros(ninds,1)]; - -# for i=1:ninds -# ind =inds(i); -# rows = globalRows(i,1):globalRows(i,2); -# diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data); -# diffInSumCounts = sum(diffInCounts); - -# COUNTS(:,:,pop) = COUNTS(:,:,pop)-diffInCounts; -# SUMCOUNTS(pop,:) = SUMCOUNTS(pop,:)-diffInSumCounts; -# apuTaulu(i, 2) = computePopulationLogml(pop, adjprior, priorTerm); -# COUNTS(:,:,pop) = COUNTS(:,:,pop)+diffInCounts; -# SUMCOUNTS(pop,:) = SUMCOUNTS(pop,:)+diffInSumCounts; -# end -# apuTaulu = sortrows(apuTaulu,2); -# inds = apuTaulu(ninds:-1:1,1); \ No newline at end of file diff --git a/R/returnInOrder.R b/R/returnInOrder.R new file mode 100644 index 0000000..3a57b29 --- /dev/null +++ b/R/returnInOrder.R @@ -0,0 +1,26 @@ +returnInOrder <- function(inds, pop, globalRows, data, adjprior, priorTerm) { + # % Palauttaa yksil�t j�rjestyksess� siten, ett� ensimm�isen� on + # % se, jonka poistaminen populaatiosta pop nostaisi logml:n + # % arvoa eniten. + + ninds <- length(inds) + apuTaulu <- [inds, zeros(ninds,1)]; + + for (i in 1:ninds) { + ind <- inds[i] + rows <- globalRows[i, 1]:globalRows[i, 2] + diffInCounts <- computeDiffInCounts( + rows, size[COUNTS, 1], size[COUNTS, 2], data + ) + diffInSumCounts <- sum(diffInCounts) + + COUNTS[ , ,pop] <- COUNTS[ , ,pop] - diffInCounts + SUMCOUNTS[pop, ] <- SUMCOUNTS[pop, ] - diffInSumCounts + apuTaulu[i, 2] <- computePopulationLogml(pop, adjprior, priorTerm) + COUNTS[ , ,pop] <- COUNTS[ , ,pop] + diffInCounts + SUMCOUNTS[pop, ] <- SUMCOUNTS[pop, ] + diffInSumCounts + } + apuTaulu <- sortrows(apuTaulu, 2) + inds <- apuTaulu[ninds:1, 1] + return(inds) +} \ No newline at end of file From ec03a04507fabdce03db752a59a772d7027a0bc5 Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Mon, 9 Nov 2020 09:06:45 +0100 Subject: [PATCH 44/71] Translated laskeOsaDist --- R/indMix.R | 20 -------------------- R/laskeOsaDist.R | 19 +++++++++++++++++++ 2 files changed, 19 insertions(+), 20 deletions(-) create mode 100644 R/laskeOsaDist.R diff --git a/R/indMix.R b/R/indMix.R index 3a66cee..a070ce5 100644 --- a/R/indMix.R +++ b/R/indMix.R @@ -610,26 +610,6 @@ indMix <- function(c, npops, dispText) { # end # end -# %---------------------------------------------------------------------------- - - -# function dist2 = laskeOsaDist(inds2, dist, ninds) -# % Muodostaa dist vektorista osavektorin, joka sis�lt�� yksil�iden inds2 -# % v�liset et�isyydet. ninds=kaikkien yksil�iden lukum��r�. - -# ninds2 = length(inds2); -# apu = zeros(nchoosek(ninds2,2),2); -# rivi = 1; -# for i=1:ninds2-1 -# for j=i+1:ninds2 -# apu(rivi, 1) = inds2(i); -# apu(rivi, 2) = inds2(j); -# rivi = rivi+1; -# end -# end -# apu = (apu(:,1)-1).*ninds - apu(:,1) ./ 2 .* (apu(:,1)-1) + (apu(:,2)-apu(:,1)); -# dist2 = dist(apu); - # %--------------------------------------------------------- # function T = cluster_own(Z,nclust) diff --git a/R/laskeOsaDist.R b/R/laskeOsaDist.R new file mode 100644 index 0000000..1ea9495 --- /dev/null +++ b/R/laskeOsaDist.R @@ -0,0 +1,19 @@ +laskeOsaDist <- function(inds2, dist, ninds) { + # % Muodostaa dist vektorista osavektorin, joka sis�lt�� yksil�iden inds2 + # % v�liset et�isyydet. ninds=kaikkien yksil�iden lukum��r�. + + ninds2 <- length(inds2) + apu <- zeros(nchoosek(ninds2, 2), 2) + rivi <- 1 + for (i in 1:ninds2-1) { + for (j in i+1:ninds2) { + apu[rivi, 1] <- inds2[i] + apu[rivi, 2] <- inds2[j] + rivi <- rivi + 1 + } + } + apu <- (apu[, 1]-1) * ninds - apu[, 1] / 2 * + (apu[, 1]-1) + (apu[, 2] - apu[, 1]) + dist2 <- dist(apu) + return(dist2) +} \ No newline at end of file From 7904dc357a021a69a2ae82f714588700fda82a91 Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Mon, 9 Nov 2020 14:30:09 +0100 Subject: [PATCH 45/71] added matlab2r() --- R/matlab2r.R | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 R/matlab2r.R diff --git a/R/matlab2r.R b/R/matlab2r.R new file mode 100644 index 0000000..12403d0 --- /dev/null +++ b/R/matlab2r.R @@ -0,0 +1,31 @@ +#' @title Convert Matlab function to R +#' @description Performs basic syntax conversion +#' @param filename name of the file +#' @param saveOutput if `TRUE`, `filename` is overwritten. Defaults to `FALSE` +#' @return text converted to R +#' @author Waldir Leoncio +#' @export +matlab2r <- function(filename, saveOutput = FALSE) { + # Verification + if (!file.exists(filename)) stop("File not found") + # Reading file into R + txt <- readLines(filename) + # Replacing text + txt <- gsub(";", "", txt) + txt <- gsub("for (.+)=(.+)", "for (\\1 in \\2) {", txt) + txt <- gsub("end", "}", txt) + # Returning converted code + if (!saveOutput) { + return(cat(txt, sep="\n")) + } else { + return( + write.table( + x = txt, + file = filename, + quote = FALSE, + row.names = FALSE, + col.names = FALSE + ) + ) + } +} \ No newline at end of file From 32d54844ac20351cfc3699d1ddcde5fbeec11500 Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Mon, 9 Nov 2020 14:30:29 +0100 Subject: [PATCH 46/71] Added initialPopCounts() --- R/initialPopCounts.R | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 R/initialPopCounts.R diff --git a/R/initialPopCounts.R b/R/initialPopCounts.R new file mode 100644 index 0000000..883fa80 --- /dev/null +++ b/R/initialPopCounts.R @@ -0,0 +1,22 @@ +initialPopCounts <- function(data, npops, rows, noalle, adjprior) { + # nloci=size(data,2) + # counts <- zeros(max(noalle),nloci,npops) + # sumcounts <- zeros(npops,nloci) + + # for (i in 1:npops) { + # for (j in 1:nloci) { + # i_rivit <- rows(i,1):rows(i,2) + # havainnotLokuksessa <- find(data(i_rivit,j)>=0) + # sumcounts(i,j) <- length(havainnotLokuksessa) + # for (k in 1:noalle(j)) { + # alleleCode <- k + # N_ijk <- length(find(data(i_rivit,j)==alleleCode)) + # counts(k,j,i) <- N_ijk + # } + # } + # } + + # logml <- laskeLoggis(counts, sumcounts, adjprior) + + return(sumcounts = sumcounts, counts = counts, logml = logml) +} From 71092c4eeb0a0306f580f13904379eccd0dc4d73 Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Mon, 9 Nov 2020 14:30:53 +0100 Subject: [PATCH 47/71] Fixed syntax error --- R/returnInOrder.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/returnInOrder.R b/R/returnInOrder.R index 3a57b29..c6a48d8 100644 --- a/R/returnInOrder.R +++ b/R/returnInOrder.R @@ -4,7 +4,7 @@ returnInOrder <- function(inds, pop, globalRows, data, adjprior, priorTerm) { # % arvoa eniten. ninds <- length(inds) - apuTaulu <- [inds, zeros(ninds,1)]; + apuTaulu <- c(inds, zeros(ninds, 1)) for (i in 1:ninds) { ind <- inds[i] From 203835b866bb0a2eae8fe9cabff92c43f7f78d2d Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Mon, 9 Nov 2020 14:34:54 +0100 Subject: [PATCH 48/71] Translated remaining code on initialPopCounts --- R/initialPopCounts.R | 34 ++++++++++++++++------------------ 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/R/initialPopCounts.R b/R/initialPopCounts.R index 883fa80..a865243 100644 --- a/R/initialPopCounts.R +++ b/R/initialPopCounts.R @@ -1,22 +1,20 @@ initialPopCounts <- function(data, npops, rows, noalle, adjprior) { - # nloci=size(data,2) - # counts <- zeros(max(noalle),nloci,npops) - # sumcounts <- zeros(npops,nloci) - - # for (i in 1:npops) { - # for (j in 1:nloci) { - # i_rivit <- rows(i,1):rows(i,2) - # havainnotLokuksessa <- find(data(i_rivit,j)>=0) - # sumcounts(i,j) <- length(havainnotLokuksessa) - # for (k in 1:noalle(j)) { - # alleleCode <- k - # N_ijk <- length(find(data(i_rivit,j)==alleleCode)) - # counts(k,j,i) <- N_ijk - # } - # } - # } - - # logml <- laskeLoggis(counts, sumcounts, adjprior) + nloci <- size(data, 2) + counts <- zeros(max(noalle), nloci, npops) + sumcounts <- zeros(npops, nloci) + for (i in 1:npops) { + for (j in 1:nloci) { + i_rivit <- rows(i, 1):rows(i, 2) + havainnotLokuksessa <- find(data(i_rivit, j) >= 0) + sumcounts(i, j) <- length(havainnotLokuksessa) + for (k in 1:noalle[j]) { + alleleCode <- k + N_ijk <- length(find(data(i_rivit, j) == alleleCode)) + counts(k, j, i) <- N_ijk + } + } + } + logml <- laskeLoggis(counts, sumcounts, adjprior) return(sumcounts = sumcounts, counts = counts, logml = logml) } From 0b6a899de8174d94bfbb8c4dc5384f65b162a34b Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Mon, 9 Nov 2020 14:58:16 +0100 Subject: [PATCH 49/71] Translated computeDiffInCounts --- R/computeDiffInCounts.R | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 R/computeDiffInCounts.R diff --git a/R/computeDiffInCounts.R b/R/computeDiffInCounts.R new file mode 100644 index 0000000..de19e30 --- /dev/null +++ b/R/computeDiffInCounts.R @@ -0,0 +1,17 @@ +computeDiffInCounts <- function(rows, max_noalle, nloci, data) { + # % Muodostaa max_noalle*nloci taulukon, jossa on niiden alleelien + # % lukum��r�t (vastaavasti kuin COUNTS:issa), jotka ovat data:n + # % riveill� rows. rows pit�� olla vaakavektori. + + diffInCounts <- zeros(max_noalle, nloci) + for (i in rows) { + row <- data[i, ] + notEmpty <- find(row>=0) + + if (length(notEmpty) > 0) { + diffInCounts(row(notEmpty) + (notEmpty - 1) * max_noalle) <- + diffInCounts(row(notEmpty) + (notEmpty - 1) * max_noalle) + 1 + } + } + return(diffInCounts) +} From 228b4d808ac6647b71a603e417eaf09c9bad1147 Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Mon, 9 Nov 2020 15:00:07 +0100 Subject: [PATCH 50/71] Fixed syntax --- R/initialPopCounts.R | 4 ++-- R/matlab2r.R | 13 ++++++++++--- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/R/initialPopCounts.R b/R/initialPopCounts.R index a865243..2cb4806 100644 --- a/R/initialPopCounts.R +++ b/R/initialPopCounts.R @@ -6,11 +6,11 @@ initialPopCounts <- function(data, npops, rows, noalle, adjprior) { for (i in 1:npops) { for (j in 1:nloci) { i_rivit <- rows(i, 1):rows(i, 2) - havainnotLokuksessa <- find(data(i_rivit, j) >= 0) + havainnotLokuksessa <- find(data[i_rivit, j] >= 0) sumcounts(i, j) <- length(havainnotLokuksessa) for (k in 1:noalle[j]) { alleleCode <- k - N_ijk <- length(find(data(i_rivit, j) == alleleCode)) + N_ijk <- length(find(data[i_rivit, j] == alleleCode)) counts(k, j, i) <- N_ijk } } diff --git a/R/matlab2r.R b/R/matlab2r.R index 12403d0..c54bde7 100644 --- a/R/matlab2r.R +++ b/R/matlab2r.R @@ -1,8 +1,8 @@ #' @title Convert Matlab function to R -#' @description Performs basic syntax conversion +#' @description Performs basic syntax conversion from Matlab to R #' @param filename name of the file #' @param saveOutput if `TRUE`, `filename` is overwritten. Defaults to `FALSE` -#' @return text converted to R +#' @return text converted to R, printed to screen or replacing input file #' @author Waldir Leoncio #' @export matlab2r <- function(filename, saveOutput = FALSE) { @@ -11,9 +11,16 @@ matlab2r <- function(filename, saveOutput = FALSE) { # Reading file into R txt <- readLines(filename) # Replacing text + txt <- gsub( + pattern = "function (.+)\\s*=\\s*(.+)\\((.+)\\)", + replacement = "\\2 <- function(\\3) { return(\\1)", + x = txt + ) txt <- gsub(";", "", txt) txt <- gsub("for (.+)=(.+)", "for (\\1 in \\2) {", txt) txt <- gsub("end", "}", txt) + # TODO: replace forms like (:,:) with [, ] + # TODO: reformat if statements # Returning converted code if (!saveOutput) { return(cat(txt, sep="\n")) @@ -28,4 +35,4 @@ matlab2r <- function(filename, saveOutput = FALSE) { ) ) } -} \ No newline at end of file +} From 4fd114f2b226bd28aecd143bf0c1e778b74628ce Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Mon, 9 Nov 2020 15:22:10 +0100 Subject: [PATCH 51/71] Translated cluster_own --- R/cluster_own.R | 51 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 R/cluster_own.R diff --git a/R/cluster_own.R b/R/cluster_own.R new file mode 100644 index 0000000..b94b7b8 --- /dev/null +++ b/R/cluster_own.R @@ -0,0 +1,51 @@ +cluster_own <- function(Z, nclust) { + true <- logical(1) + false <- logical(0) + maxclust <- nclust + # % Start of algorithm + m <- size(Z, 1) + 1 + T <- zeros(m, 1) + # % maximum number of clusters based on inconsistency + if (m <= maxclust) { + T = t((1:m)) + } else if (maxclust == 1) { + T <- ones(m, 1) + } else { + clsnum <- 1 + for (k in (m - maxclust + 1):(m - 1)) { + i = Z(k, 1) # left tree + if (i <= m) { # original node, no leafs + T(i) = clsnum + clsnum = clsnum + 1 + } else if (i < (2 * m - maxclust + 1)) { # created before cutoff, search down the tree + T <- clusternum(Z, T, i - m, clsnum) + clsnum <- clsnum + 1 + } + i <- Z(k, 2) # right tree + if (i <= m) { # original node, no leafs + T[i] <- clsnum + clsnum <- clsnum + 1 + } else if (i < (2 * m - maxclust + 1)) { # created before cutoff, search down the tree + T <- clusternum(Z, T, i - m, clsnum) + clsnum <- clsnum + 1 + } + } + } + return(T) +} + +clusternum <- function(X, T, k, c) { + m <- size(X, 1) + 1 + while (!isempty(k)) { + # Get the children of nodes at this level + children <- X[k, 1:2] + children <- children + + # Assign this node number to leaf children + t <- (children <= m) + T[children[t]] <- c + # Move to next level + k <- children(!t) - m + } + return(T) +} \ No newline at end of file From f405cabeb211556416038aec39c46d8d0193d8af Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Mon, 9 Nov 2020 15:22:22 +0100 Subject: [PATCH 52/71] Removed commented out code --- R/indMix.R | 95 ------------------------------------------------------ 1 file changed, 95 deletions(-) diff --git a/R/indMix.R b/R/indMix.R index a070ce5..6b11bf9 100644 --- a/R/indMix.R +++ b/R/indMix.R @@ -565,98 +565,3 @@ indMix <- function(c, npops, dispText) { list(logml = logml, npops = npops, partitionSummary = partitionSummary) ) } - -# %----------------------------------------------------------------------- - - -# function [sumcounts, counts, logml] = ... -# initialPopCounts(data, npops, rows, noalle, adjprior) - -# nloci=size(data,2); -# counts = zeros(max(noalle),nloci,npops); -# sumcounts = zeros(npops,nloci); - -# for i=1:npops -# for j=1:nloci -# i_rivit = rows(i,1):rows(i,2); -# havainnotLokuksessa = find(data(i_rivit,j)>=0); -# sumcounts(i,j) = length(havainnotLokuksessa); -# for k=1:noalle(j) -# alleleCode = k; -# N_ijk = length(find(data(i_rivit,j)==alleleCode)); -# counts(k,j,i) = N_ijk; -# end -# end -# end - -# logml = laskeLoggis(counts, sumcounts, adjprior); - -# %----------------------------------------------------------------------- - - -# function diffInCounts = computeDiffInCounts(rows, max_noalle, nloci, data) -# % Muodostaa max_noalle*nloci taulukon, jossa on niiden alleelien -# % lukum��r�t (vastaavasti kuin COUNTS:issa), jotka ovat data:n -# % riveill� rows. rows pit�� olla vaakavektori. - -# diffInCounts = zeros(max_noalle, nloci); -# for i=rows -# row = data(i,:); -# notEmpty = find(row>=0); - -# if length(notEmpty)>0 -# diffInCounts(row(notEmpty) + (notEmpty-1)*max_noalle) = ... -# diffInCounts(row(notEmpty) + (notEmpty-1)*max_noalle) + 1; -# end -# end - -# %--------------------------------------------------------- - -# function T = cluster_own(Z,nclust) -# true=logical(1); -# false=logical(0); -# maxclust = nclust; -# % Start of algorithm -# m = size(Z,1)+1; -# T = zeros(m,1); -# % maximum number of clusters based on inconsistency -# if m <= maxclust -# T = (1:m)'; -# elseif maxclust==1 -# T = ones(m,1); -# else -# clsnum = 1; -# for k = (m-maxclust+1):(m-1) -# i = Z(k,1); % left tree -# if i <= m % original node, no leafs -# T(i) = clsnum; -# clsnum = clsnum + 1; -# elseif i < (2*m-maxclust+1) % created before cutoff, search down the tree -# T = clusternum(Z, T, i-m, clsnum); -# clsnum = clsnum + 1; -# end -# i = Z(k,2); % right tree -# if i <= m % original node, no leafs -# T(i) = clsnum; -# clsnum = clsnum + 1; -# elseif i < (2*m-maxclust+1) % created before cutoff, search down the tree -# T = clusternum(Z, T, i-m, clsnum); -# clsnum = clsnum + 1; -# end -# end -# end - -# function T = clusternum(X, T, k, c) -# m = size(X,1)+1; -# while(~isempty(k)) -# % Get the children of nodes at this level -# children = X(k,1:2); -# children = children(:); - -# % Assign this node number to leaf children -# t = (children<=m); -# T(children(t)) = c; - -# % Move to next level -# k = children(~t) - m; -# end From ee5f807d218e5b70e1a78b7880fedd7c0f85833f Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Mon, 9 Nov 2020 15:22:45 +0100 Subject: [PATCH 53/71] Version bump --- DESCRIPTION | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index e7e0081..aa9e85a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: rBAPS Title: Bayesian Analysis of Population Structure -Version: 0.0.0.9000 -Date: 2020-01-14 +Version: 0.0.0.9001 +Date: 2020-11-09 Authors@R: c( person( @@ -30,14 +30,14 @@ Description: Partial R implementation of the BAPS software Corander et al. 2008b ; Tang et al. 2009 ; Cheng et al. 2011 , - available at ), provides a - computationally-efficient method for the identification of admixture events + available at ), provides a + computationally-efficient method for the identification of admixture events in genetic population history. License: GPL-3 Encoding: UTF-8 LazyData: true RoxygenNote: 7.1.1 -Suggests: +Suggests: testthat (>= 2.1.0) Imports: methods From c565a55d50cc075fcc4721737c445a6ee9ea2af3 Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Mon, 9 Nov 2020 15:22:56 +0100 Subject: [PATCH 54/71] Improved translation function --- R/matlab2r.R | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/R/matlab2r.R b/R/matlab2r.R index c54bde7..e041634 100644 --- a/R/matlab2r.R +++ b/R/matlab2r.R @@ -12,15 +12,26 @@ matlab2r <- function(filename, saveOutput = FALSE) { txt <- readLines(filename) # Replacing text txt <- gsub( - pattern = "function (.+)\\s*=\\s*(.+)\\((.+)\\)", + pattern = "function (.+)\\s+=\\s*(.+)\\((.+)\\)", replacement = "\\2 <- function(\\3) { return(\\1)", x = txt ) + # txt <- gsub("\\%\\s*(\\w+)", "# \\1", txt) txt <- gsub(";", "", txt) txt <- gsub("for (.+)=(.+)", "for (\\1 in \\2) {", txt) txt <- gsub("end", "}", txt) + txt <- gsub("(.),(\\S)", "\\1, \\2", txt) # TODO: replace forms like (:,:) with [, ] - # TODO: reformat if statements + # TODO: add argument to skip some of these rules + txt <- gsub("if (.+)", "if (\\1) {", txt) # FIXME: paste comments after { + txt <- gsub("else$", "} else {", txt) + txt <- gsub("elseif", "} else if", txt) + txt <- gsub("\\(~", "(!", txt) + txt <- gsub("while (.+)", "while \\1 {", txt) + ## Math operators + txt <- gsub("(\\S)\\+(\\S)", "\\1 + \\2", txt) + txt <- gsub("(\\S)\\-(\\S)", "\\1 - \\2", txt) + txt <- gsub("(\\S)\\*(\\S)", "\\1 * \\2", txt) # Returning converted code if (!saveOutput) { return(cat(txt, sep="\n")) From 4d2a00403bdbcfbab126ee1096029ae36ccab112 Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Thu, 19 Nov 2020 07:26:02 +0100 Subject: [PATCH 55/71] Fixed ignoring valid MATLAB-named files --- .Rbuildignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.Rbuildignore b/.Rbuildignore index 548f11c..740b69e 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -1,6 +1,6 @@ LICENSE TODO.md -matlab +^matlab CHANGELOG.md CITATION.cff .travis.yml From 97af90d05d05dae640f3e9d93870a64c6bc896f2 Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Thu, 19 Nov 2020 07:49:30 +0100 Subject: [PATCH 56/71] Minor improvements to translator function --- R/matlab2r.R | 3 ++- man/matlab2r.Rd | 22 ++++++++++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) create mode 100644 man/matlab2r.Rd diff --git a/R/matlab2r.R b/R/matlab2r.R index e041634..f69dcb8 100644 --- a/R/matlab2r.R +++ b/R/matlab2r.R @@ -4,6 +4,7 @@ #' @param saveOutput if `TRUE`, `filename` is overwritten. Defaults to `FALSE` #' @return text converted to R, printed to screen or replacing input file #' @author Waldir Leoncio +#' @importFrom utils write.table #' @export matlab2r <- function(filename, saveOutput = FALSE) { # Verification @@ -21,7 +22,7 @@ matlab2r <- function(filename, saveOutput = FALSE) { txt <- gsub("for (.+)=(.+)", "for (\\1 in \\2) {", txt) txt <- gsub("end", "}", txt) txt <- gsub("(.),(\\S)", "\\1, \\2", txt) - # TODO: replace forms like (:,:) with [, ] + # TODO: replace forms like (:,:) with [, ] if they come before <- # TODO: add argument to skip some of these rules txt <- gsub("if (.+)", "if (\\1) {", txt) # FIXME: paste comments after { txt <- gsub("else$", "} else {", txt) diff --git a/man/matlab2r.Rd b/man/matlab2r.Rd new file mode 100644 index 0000000..e5a3f9e --- /dev/null +++ b/man/matlab2r.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/matlab2r.R +\name{matlab2r} +\alias{matlab2r} +\title{Convert Matlab function to R} +\usage{ +matlab2r(filename, saveOutput = FALSE) +} +\arguments{ +\item{filename}{name of the file} + +\item{saveOutput}{if `TRUE`, `filename` is overwritten. Defaults to `FALSE`} +} +\value{ +text converted to R, printed to screen or replacing input file +} +\description{ +Performs basic syntax conversion from Matlab to R +} +\author{ +Waldir Leoncio +} From 46a9249e5ad7df702c19df9c8f26f6bc65f0adf1 Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Thu, 19 Nov 2020 07:53:36 +0100 Subject: [PATCH 57/71] Fixed syntax --- NAMESPACE | 2 ++ R/admixture_initialization.R | 4 ++-- R/cluster_own.R | 2 +- R/computeDiffInCounts.R | 4 ++-- R/indMix.R | 16 ++++++++-------- R/initialPopCounts.R | 4 ++-- R/laskeLoggis.R | 2 +- R/laskeOsaDist.R | 8 +++++++- man/laskeOsaDist.Rd | 21 +++++++++++++++++++++ 9 files changed, 46 insertions(+), 17 deletions(-) create mode 100644 man/laskeOsaDist.Rd diff --git a/NAMESPACE b/NAMESPACE index 42b0063..0be8af8 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -23,6 +23,7 @@ export(linkage) export(logml2String) export(lueGenePopData) export(lueNimi) +export(matlab2r) export(noIndex) export(ownNum2Str) export(poistaLiianPienet) @@ -49,3 +50,4 @@ export(writeMixtureInfo) importFrom(methods,is) importFrom(stats,runif) importFrom(utils,read.delim) +importFrom(utils,write.table) diff --git a/R/admixture_initialization.R b/R/admixture_initialization.R index 4d9f6c8..c69dd63 100644 --- a/R/admixture_initialization.R +++ b/R/admixture_initialization.R @@ -6,12 +6,12 @@ admixture_initialization <- function (data_matrix, nclusters, Z) { size_data <- size(data_matrix) nloci <- size_data[2] - 1 - n <- max(data_matrix[, end]) + n <- max(data_matrix[, ncol(data_matrix)]) T <- cluster_own(Z, nclusters) initial_partition <- zeros(size_data[1], 1) for (i in 1:n) { kori <- T[i] - here <- find(data_matrix[,end] == i) + here <- find(data_matrix[, ncol(data_matrix)] == i) for (j in 1:length(here)) { initial_partition[here[j], 1] <- kori } diff --git a/R/cluster_own.R b/R/cluster_own.R index b94b7b8..0d64102 100644 --- a/R/cluster_own.R +++ b/R/cluster_own.R @@ -15,7 +15,7 @@ cluster_own <- function(Z, nclust) { for (k in (m - maxclust + 1):(m - 1)) { i = Z(k, 1) # left tree if (i <= m) { # original node, no leafs - T(i) = clsnum + T[i] = clsnum clsnum = clsnum + 1 } else if (i < (2 * m - maxclust + 1)) { # created before cutoff, search down the tree T <- clusternum(Z, T, i - m, clsnum) diff --git a/R/computeDiffInCounts.R b/R/computeDiffInCounts.R index de19e30..af53f61 100644 --- a/R/computeDiffInCounts.R +++ b/R/computeDiffInCounts.R @@ -9,8 +9,8 @@ computeDiffInCounts <- function(rows, max_noalle, nloci, data) { notEmpty <- find(row>=0) if (length(notEmpty) > 0) { - diffInCounts(row(notEmpty) + (notEmpty - 1) * max_noalle) <- - diffInCounts(row(notEmpty) + (notEmpty - 1) * max_noalle) + 1 + diffInCounts[row(notEmpty) + (notEmpty - 1) * max_noalle] <- + diffInCounts[row(notEmpty) + (notEmpty - 1) * max_noalle] + 1 } } return(diffInCounts) diff --git a/R/indMix.R b/R/indMix.R index 6b11bf9..3fc2359 100644 --- a/R/indMix.R +++ b/R/indMix.R @@ -52,7 +52,7 @@ indMix <- function(c, npops, dispText) { } else { npopsTaulu <- as.numeric(npopstext) ykkoset <- find(npopsTaulu == 1) - npopsTaulu(ykkoset) <- list() # Mik�li ykk�si� annettu yl�rajaksi, ne poistetaan. + npopsTaulu[ykkoset] <- list() # Mik�li ykk�si� annettu yl�rajaksi, ne poistetaan. if (isempty(npopsTaulu)) { logml <- 1 partitionSummary <- 1 @@ -68,7 +68,7 @@ indMix <- function(c, npops, dispText) { nruns <- length(npopsTaulu) initData <- data - data <- data[,1:(end - 1)] + data <- data[,1:(ncol(data) - 1)] logmlBest <- -1e50 partitionSummary <- -1e50 * ones(30, 2) # Tiedot 30 parhaasta partitiosta (npops ja logml) @@ -76,13 +76,13 @@ indMix <- function(c, npops, dispText) { worstLogml <- -1e50 worstIndex <- 1 for (run in 1:nruns) { - npops <- npopsTaulu(run) + npops <- npopsTaulu[run] if (dispText) { dispLine() print( paste0( - 'Run ', num2str(run), '/', num2str(nruns), - ', maximum number of populations ', num2str(npops), '.' + 'Run ', as.character(run), '/', as.character(nruns), + ', maximum number of populations ', as.character(npops), '.' ) ) } @@ -120,7 +120,7 @@ indMix <- function(c, npops, dispText) { print( paste0( 'Mixture analysis started with initial', - num2str(npops), + as.character(npops), 'populations.' ) ) @@ -130,7 +130,7 @@ indMix <- function(c, npops, dispText) { muutoksia <- 0 if (dispText) { - print(paste('Performing steps:', num2str(roundTypes))) + print(paste('Performing steps:', as.character(roundTypes))) } for (n in 1:length(roundTypes)) { @@ -545,7 +545,7 @@ indMix <- function(c, npops, dispText) { npops <- poistaTyhjatPopulaatiot(npops) POP_LOGML <- computePopulationLogml(1:npops, adjprior, priorTerm) if (dispText) { - print(paste('Found partition with', num2str(npops), 'populations.')) + print(paste('Found partition with', as.character(npops), 'populations.')) print(paste('Log(ml) =', as.character(logml))) print(' ') } diff --git a/R/initialPopCounts.R b/R/initialPopCounts.R index 2cb4806..2bd08ed 100644 --- a/R/initialPopCounts.R +++ b/R/initialPopCounts.R @@ -7,11 +7,11 @@ initialPopCounts <- function(data, npops, rows, noalle, adjprior) { for (j in 1:nloci) { i_rivit <- rows(i, 1):rows(i, 2) havainnotLokuksessa <- find(data[i_rivit, j] >= 0) - sumcounts(i, j) <- length(havainnotLokuksessa) + sumcounts[i, j] <- length(havainnotLokuksessa) for (k in 1:noalle[j]) { alleleCode <- k N_ijk <- length(find(data[i_rivit, j] == alleleCode)) - counts(k, j, i) <- N_ijk + counts[k, j, i] <- N_ijk } } } diff --git a/R/laskeLoggis.R b/R/laskeLoggis.R index 7bcd26f..672dec9 100644 --- a/R/laskeLoggis.R +++ b/R/laskeLoggis.R @@ -1,7 +1,7 @@ laskeLoggis <- function(counts, sumcounts, adjprior) { npops <- size(counts, 3) - sum1 <- sum(sum(sum(gammaln(counts + repmat(adjprior, c(1, 1, npops)))))) + sum1 <- sum(sum(sum(lgamma(counts + repmat(adjprior, c(1, 1, npops)))))) sum3 <- sum(sum(lgamma(adjprior))) - sum(sum(lgamma(1 + sumcounts))) logml2 <- sum1 - npops * sum3 loggis <- logml2 diff --git a/R/laskeOsaDist.R b/R/laskeOsaDist.R index 1ea9495..018614d 100644 --- a/R/laskeOsaDist.R +++ b/R/laskeOsaDist.R @@ -1,9 +1,15 @@ +#' @title Lower part of the dist +#' @description Constructs from the dist vector a subvector containing the individual inds2, Forms dist sub-vectors the vector, which includes yksiliden inds2 +#' @param inds2 inds2 +#' @param dist dist +#' @param ninds ninds +#' @author Waldir Leoncio laskeOsaDist <- function(inds2, dist, ninds) { # % Muodostaa dist vektorista osavektorin, joka sis�lt�� yksil�iden inds2 # % v�liset et�isyydet. ninds=kaikkien yksil�iden lukum��r�. ninds2 <- length(inds2) - apu <- zeros(nchoosek(ninds2, 2), 2) + apu <- zeros(choose(ninds2, 2), 2) rivi <- 1 for (i in 1:ninds2-1) { for (j in i+1:ninds2) { diff --git a/man/laskeOsaDist.Rd b/man/laskeOsaDist.Rd new file mode 100644 index 0000000..f577c89 --- /dev/null +++ b/man/laskeOsaDist.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/laskeOsaDist.R +\name{laskeOsaDist} +\alias{laskeOsaDist} +\title{Lower part of the dist} +\usage{ +laskeOsaDist(inds2, dist, ninds) +} +\arguments{ +\item{inds2}{inds2} + +\item{dist}{dist} + +\item{ninds}{ninds} +} +\description{ +Constructs from the dist vector a subvector containing the individual inds2, Forms dist sub-vectors the vector, which includes yksiliden inds2 +} +\author{ +Waldir Leoncio +} From de9683dd7de3f019015de758b0c0689520d73f9a Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Thu, 19 Nov 2020 08:09:04 +0100 Subject: [PATCH 58/71] Fixed global variable handling --- R/clearGlobalVars.R | 11 +++++----- R/globals.R | 1 + R/poistaLiianPienet.R | 2 +- R/poistaTyhjatPopulaatiot.R | 8 +++---- R/updateGlobalVariables.R | 44 ++++++++++++++++++------------------- 5 files changed, 33 insertions(+), 33 deletions(-) create mode 100644 R/globals.R diff --git a/R/clearGlobalVars.R b/R/clearGlobalVars.R index abc59fb..3a7746c 100644 --- a/R/clearGlobalVars.R +++ b/R/clearGlobalVars.R @@ -1,8 +1,7 @@ clearGlobalVars <- function() { - # COUNTS <- SUMCOUNTS <- PARTITION <- POP_LOGML <- vector() # placeholders - COUNTS <<- vector() - SUMCOUNTS <<- vector() - PARTITION <<- vector() - POP_LOGML <<- vector() - LOGDIFF <<- vector() + COUNTS <- vector() + SUMCOUNTS <- vector() + PARTITION <- vector() + POP_LOGML <- vector() + LOGDIFF <- vector() } \ No newline at end of file diff --git a/R/globals.R b/R/globals.R new file mode 100644 index 0000000..143c664 --- /dev/null +++ b/R/globals.R @@ -0,0 +1 @@ +utils::globalVariables(c("PARTITION", "COUNTS", "SUMCOUNTS", "LOGDIFF", "POP_LOGML")) \ No newline at end of file diff --git a/R/poistaLiianPienet.R b/R/poistaLiianPienet.R index 4ff581d..19eabea 100644 --- a/R/poistaLiianPienet.R +++ b/R/poistaLiianPienet.R @@ -41,7 +41,7 @@ poistaLiianPienet <- function (npops, rowsFromInd, alaraja, PARTITION[yksilot] == n } - # TODO: add COUNTS, SUMCOUNTS and PARTITION to return or use <<- + # TODO: add COUNTS, SUMCOUNTS and PARTITION to return or use <- COUNTS[, , miniPops] <- NA SUMCOUNTS[miniPops, ] <- NA diff --git a/R/poistaTyhjatPopulaatiot.R b/R/poistaTyhjatPopulaatiot.R index b811586..badef3e 100644 --- a/R/poistaTyhjatPopulaatiot.R +++ b/R/poistaTyhjatPopulaatiot.R @@ -2,13 +2,13 @@ poistaTyhjatPopulaatiot <- function(npops) { # % Poistaa tyhjentyneet populaatiot COUNTS:ista ja # % SUMCOUNTS:ista. P�ivitt�� npops:in ja PARTITION:in. notEmpty <- find(any(SUMCOUNTS, 2)) - COUNTS <<- COUNTS[, , notEmpty] - SUMCOUNTS <<- SUMCOUNTS[notEmpty, ] - LOGDIFF <<- LOGDIFF[, notEmpty] + COUNTS <- COUNTS[, , notEmpty] + SUMCOUNTS <- SUMCOUNTS[notEmpty, ] + LOGDIFF <- LOGDIFF[, notEmpty] for (n in 1:length(notEmpty)) { apu <- find(PARTITION == notEmpty(n)) - PARTITION(apu) <<- n + PARTITION[apu] <- n } npops <- length(notEmpty) return(npops) diff --git a/R/updateGlobalVariables.R b/R/updateGlobalVariables.R index 4b17469..1602faf 100644 --- a/R/updateGlobalVariables.R +++ b/R/updateGlobalVariables.R @@ -2,21 +2,21 @@ updateGlobalVariables <- function(ind, i2, diffInCounts, adjprior, priorTerm) { # % Suorittaa globaalien muuttujien muutokset, kun yksil� ind # % on siirret��n koriin i2. - i1 <- PARTITION(ind) - PARTITION(ind) <<- i2 + i1 <- PARTITION[ind] + PARTITION[ind] <- i2 - COUNTS[, , i1] <<- COUNTS[, , i1] - diffInCounts - COUNTS[, , i2] <<- COUNTS[, , i2] + diffInCounts - SUMCOUNTS[i1, ] <<- SUMCOUNTS[i1, ] - sum[diffInCounts] - SUMCOUNTS[i2, ] <<- SUMCOUNTS[i2, ] + sum[diffInCounts] + COUNTS[, , i1] <- COUNTS[, , i1] - diffInCounts + COUNTS[, , i2] <- COUNTS[, , i2] + diffInCounts + SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] - sum[diffInCounts] + SUMCOUNTS[i2, ] <- SUMCOUNTS[i2, ] + sum[diffInCounts] - POP_LOGML[c(i1, i2)] <<- computePopulationLogml( + POP_LOGML[c(i1, i2)] <- computePopulationLogml( c(i1, i2), adjprior, priorTerm ) - LOGDIFF[, c(i1, i2)] <<- -Inf + LOGDIFF[, c(i1, i2)] <- -Inf inx <- c(find(PARTITION == i1), find(PARTITION==i2)) - LOGDIFF[inx, ] <<- -Inf + LOGDIFF[inx, ] <- -Inf } updateGlobalVariables2 <- function(i1, i2, diffInCounts, adjprior, priorTerm) { @@ -24,12 +24,12 @@ updateGlobalVariables2 <- function(i1, i2, diffInCounts, adjprior, priorTerm) { # % korissa i1 olevat yksil�t siirret��n koriin i2. inds <- find(PARTITION == i1) - PARTITION(inds) <<- i2 + PARTITION[inds] <- i2 - COUNTS[, , i1] <<- COUNTS[, , i1] - diffInCounts - COUNTS[, , i2] <<- COUNTS[, , i2] + diffInCounts - SUMCOUNTS[i1, ] <<- SUMCOUNTS[i1, ] - sum[diffInCounts] - SUMCOUNTS[i2, ] <<- SUMCOUNTS[i2, ] + sum[diffInCounts] + COUNTS[, , i1] <- COUNTS[, , i1] - diffInCounts + COUNTS[, , i2] <- COUNTS[, , i2] + diffInCounts + SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] - sum[diffInCounts] + SUMCOUNTS[i2, ] <- SUMCOUNTS[i2, ] + sum[diffInCounts] POP_LOGML[i1] <- 0 POP_LOGML[i2] <- computePopulationLogml(i2, adjprior, priorTerm) @@ -47,18 +47,18 @@ updateGlobalVariables3 <- function( # % koriin. i1 <- PARTITION[muuttuvat(1)] - PARTITION[muuttuvat] <<- i2 + PARTITION[muuttuvat] <- i2 - COUNTS[, , i1] <<- COUNTS[, , i1] - diffInCounts - COUNTS[, , i2] <<- COUNTS[, , i2] + diffInCounts - SUMCOUNTS[i1, ] <<- SUMCOUNTS[i1, ] - sum[diffInCounts] - SUMCOUNTS[i2, ] <<- SUMCOUNTS[i2, ] + sum[diffInCounts] + COUNTS[, , i1] <- COUNTS[, , i1] - diffInCounts + COUNTS[, , i2] <- COUNTS[, , i2] + diffInCounts + SUMCOUNTS[i1, ] <- SUMCOUNTS[i1, ] - sum[diffInCounts] + SUMCOUNTS[i2, ] <- SUMCOUNTS[i2, ] + sum[diffInCounts] - POP_LOGML[c(i1, i2)] <<- computePopulationLogml( + POP_LOGML[c(i1, i2)] <- computePopulationLogml( c(i1, i2), adjprior, priorTerm ) - LOGDIFF[, c(i1, i2)] <<- -Inf + LOGDIFF[, c(i1, i2)] <- -Inf inx <- c(find(PARTITION == i1), find(PARTITION == i2)) - LOGDIFF[inx, ] <<- -Inf + LOGDIFF[inx, ] <- -Inf } From f3c7e8092be507c4e88cd24cd2874dc85bda2999 Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Thu, 19 Nov 2020 08:22:38 +0100 Subject: [PATCH 59/71] Fixed syntax --- R/cluster_own.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/cluster_own.R b/R/cluster_own.R index 0d64102..306fbbf 100644 --- a/R/cluster_own.R +++ b/R/cluster_own.R @@ -1,6 +1,6 @@ cluster_own <- function(Z, nclust) { - true <- logical(1) - false <- logical(0) + true <- TRUE + false <- FALSE maxclust <- nclust # % Start of algorithm m <- size(Z, 1) + 1 From dc9b4f7f9cb44ab171eb3e6d54737eb88e3167ef Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Thu, 19 Nov 2020 08:44:38 +0100 Subject: [PATCH 60/71] Translated initializeGammaln --- R/initializeGammaln.R | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 R/initializeGammaln.R diff --git a/R/initializeGammaln.R b/R/initializeGammaln.R new file mode 100644 index 0000000..ac1864f --- /dev/null +++ b/R/initializeGammaln.R @@ -0,0 +1,9 @@ +initializeGammaln <- function(ninds, rowsFromInd, maxAlleles) { + #Alustaa GAMMALN muuttujan s.e. GAMMALN(i, j)=gammaln((i - 1) + 1/j) + GAMMA_LN <- zeros((1 + ninds) * rowsFromInd, maxAlleles) + for (i in 1:(ninds + 1) * rowsFromInd) { + for (j in 1:maxAlleles) { + GAMMA_LN[i, j] <- log_gamma((i - 1) + 1/j) + } + } +} From a7291a9e79f94af2325953add77b8801d2302b79 Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Thu, 19 Nov 2020 08:45:50 +0100 Subject: [PATCH 61/71] Refactoring --- R/greedyMix.R | 501 +------------------------------------------------- R/matlab2r.R | 22 ++- 2 files changed, 21 insertions(+), 502 deletions(-) diff --git a/R/greedyMix.R b/R/greedyMix.R index 7b0004e..2300494 100644 --- a/R/greedyMix.R +++ b/R/greedyMix.R @@ -382,27 +382,6 @@ greedyMix <- function( } } -# %------------------------------------------------------------------------------------- - -# function [partitionSummary, added] = addToSummary(logml, partitionSummary, worstIndex) -# % Tiedet��n, ett?annettu logml on isompi kuin huonoin arvo -# % partitionSummary taulukossa. Jos partitionSummary:ss?ei viel?ole -# % annettua logml arvoa, niin lis�t��n worstIndex:in kohtaan uusi logml ja -# % nykyist?partitiota vastaava nclusters:in arvo. Muutoin ei tehd?mit��n. - -# apu = find(abs(partitionSummary(:,2)-logml)<1e-5); -# if isempty(apu) -# % Nyt l�ydetty partitio ei ole viel?kirjattuna summaryyn. -# global PARTITION; -# npops = length(unique(PARTITION)); -# partitionSummary(worstIndex,1) = npops; -# partitionSummary(worstIndex,2) = logml; -# added = 1; -# else -# added = 0; -# end - - # %-------------------------------------------------------------------------- @@ -429,408 +408,7 @@ greedyMix <- function( # har=find(CDF>slump); # svar=har(1); - -# %------------------------------------------------------------------------------------- - - -# function updateGlobalVariables(ind, i2, rowsFromInd, diffInCounts, ... -# adjprior, priorTerm) -# % Suorittaa globaalien muuttujien muutokset, kun yksil?ind -# % on siirret��n koriin i2. - -# global PARTITION; -# global COUNTS; -# global SUMCOUNTS; -# global POP_LOGML; - -# i1 = PARTITION(ind); -# PARTITION(ind)=i2; - -# COUNTS(:,:,i1) = COUNTS(:,:,i1) - diffInCounts; -# COUNTS(:,:,i2) = COUNTS(:,:,i2) + diffInCounts; -# SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:) - sum(diffInCounts); -# SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:) + sum(diffInCounts); - -# POP_LOGML([i1 i2]) = computePopulationLogml([i1 i2], adjprior, priorTerm); - - -# %--------------------------------------------------------------------------------- - - -# function updateGlobalVariables2( ... -# i1, i2, rowsFromInd, diffInCounts, adjprior, priorTerm); -# % Suorittaa globaalien muuttujien muutokset, kun kaikki -# % korissa i1 olevat yksil�t siirret��n koriin i2. - -# global PARTITION; -# global COUNTS; -# global SUMCOUNTS; -# global POP_LOGML; - -# inds = find(PARTITION==i1); -# PARTITION(inds) = i2; - -# COUNTS(:,:,i1) = COUNTS(:,:,i1) - diffInCounts; -# COUNTS(:,:,i2) = COUNTS(:,:,i2) + diffInCounts; -# SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:) - sum(diffInCounts); -# SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:) + sum(diffInCounts); - -# POP_LOGML(i1) = 0; -# POP_LOGML(i2) = computePopulationLogml(i2, adjprior, priorTerm); - - -# %------------------------------------------------------------------------------------ - - -# function updateGlobalVariables3(muuttuvat, rowsFromInd, diffInCounts, ... -# adjprior, priorTerm, i2); -# % Suorittaa globaalien muuttujien p�ivitykset, kun yksil�t 'muuttuvat' -# % siirret��n koriin i2. Ennen siirtoa yksil�iden on kuuluttava samaan -# % koriin. - -# global PARTITION; -# global COUNTS; -# global SUMCOUNTS; -# global POP_LOGML; - -# i1 = PARTITION(muuttuvat(1)); -# PARTITION(muuttuvat) = i2; - -# COUNTS(:,:,i1) = COUNTS(:,:,i1) - diffInCounts; -# COUNTS(:,:,i2) = COUNTS(:,:,i2) + diffInCounts; -# SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:) - sum(diffInCounts); -# SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:) + sum(diffInCounts); - -# POP_LOGML([i1 i2]) = computePopulationLogml([i1 i2], adjprior, priorTerm); - - -# %---------------------------------------------------------------------- - - -# function inds = returnInOrder(inds, pop, rowsFromInd, data, adjprior, priorTerm) -# % Palauttaa yksil�t j�rjestyksess?siten, ett?ensimm�isen?on -# % se, jonka poistaminen populaatiosta pop nostaisi logml:n -# % arvoa eniten. - -# global COUNTS; global SUMCOUNTS; -# ninds = length(inds); -# apuTaulu = [inds, zeros(ninds,1)]; - -# for i=1:ninds -# ind = inds(i); -# rows = (ind-1)*rowsFromInd+1 : ind*rowsFromInd; -# diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data); -# diffInSumCounts = sum(diffInCounts); - -# COUNTS(:,:,pop) = COUNTS(:,:,pop)-diffInCounts; -# SUMCOUNTS(pop,:) = SUMCOUNTS(pop,:)-diffInSumCounts; -# apuTaulu(i, 2) = computePopulationLogml(pop, adjprior, priorTerm); -# COUNTS(:,:,pop) = COUNTS(:,:,pop)+diffInCounts; -# SUMCOUNTS(pop,:) = SUMCOUNTS(pop,:)+diffInSumCounts; -# end -# apuTaulu = sortrows(apuTaulu,2); -# inds = apuTaulu(ninds:-1:1,1); - -# %------------------------------------------------------------------------------------ - - -# function [muutokset, diffInCounts] = ... -# laskeMuutokset(ind, rowsFromInd, data, adjprior, priorTerm) -# % Palauttaa npops*1 taulun, jossa i:s alkio kertoo, mik?olisi -# % muutos logml:ss? mik�li yksil?ind siirret��n koriin i. -# % diffInCounts on poistettava COUNTS:in siivusta i1 ja lis�tt�v? -# % COUNTS:in siivuun i2, mik�li muutos toteutetaan. - -# global COUNTS; global SUMCOUNTS; -# global PARTITION; global POP_LOGML; -# npops = size(COUNTS,3); -# muutokset = zeros(npops,1); - -# i1 = PARTITION(ind); -# i1_logml = POP_LOGML(i1); - -# rows = (ind-1)*rowsFromInd+1 : ind*rowsFromInd; -# diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data); -# diffInSumCounts = sum(diffInCounts); - -# COUNTS(:,:,i1) = COUNTS(:,:,i1)-diffInCounts; -# SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)-diffInSumCounts; -# new_i1_logml = computePopulationLogml(i1, adjprior, priorTerm); -# COUNTS(:,:,i1) = COUNTS(:,:,i1)+diffInCounts; -# SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)+diffInSumCounts; - -# i2 = [1:i1-1 , i1+1:npops]; -# i2_logml = POP_LOGML(i2); - -# COUNTS(:,:,i2) = COUNTS(:,:,i2)+repmat(diffInCounts, [1 1 npops-1]); -# SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)+repmat(diffInSumCounts,[npops-1 1]); -# new_i2_logml = computePopulationLogml(i2, adjprior, priorTerm); -# COUNTS(:,:,i2) = COUNTS(:,:,i2)-repmat(diffInCounts, [1 1 npops-1]); -# SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)-repmat(diffInSumCounts,[npops-1 1]); - -# muutokset(i2) = new_i1_logml - i1_logml ... -# + new_i2_logml - i2_logml; - - -# %------------------------------------------------------------------------------------ - - -# function [muutokset, diffInCounts] = laskeMuutokset2( ... -# i1, rowsFromInd, data, adjprior, priorTerm); -# % Palauttaa npops*1 taulun, jossa i:s alkio kertoo, mik?olisi -# % muutos logml:ss? mik�li korin i1 kaikki yksil�t siirret��n -# % koriin i. - -# global COUNTS; global SUMCOUNTS; -# global PARTITION; global POP_LOGML; -# npops = size(COUNTS,3); -# muutokset = zeros(npops,1); - -# i1_logml = POP_LOGML(i1); - -# inds = find(PARTITION==i1); -# ninds = length(inds); - -# if ninds==0 -# diffInCounts = zeros(size(COUNTS,1), size(COUNTS,2)); -# return; -# end - -# rows = computeRows(rowsFromInd, inds, ninds); - -# diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data); -# diffInSumCounts = sum(diffInCounts); - -# COUNTS(:,:,i1) = COUNTS(:,:,i1)-diffInCounts; -# SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)-diffInSumCounts; -# new_i1_logml = computePopulationLogml(i1, adjprior, priorTerm); -# COUNTS(:,:,i1) = COUNTS(:,:,i1)+diffInCounts; -# SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)+diffInSumCounts; - -# i2 = [1:i1-1 , i1+1:npops]; -# i2_logml = POP_LOGML(i2); - -# COUNTS(:,:,i2) = COUNTS(:,:,i2)+repmat(diffInCounts, [1 1 npops-1]); -# SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)+repmat(diffInSumCounts,[npops-1 1]); -# new_i2_logml = computePopulationLogml(i2, adjprior, priorTerm); -# COUNTS(:,:,i2) = COUNTS(:,:,i2)-repmat(diffInCounts, [1 1 npops-1]); -# SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)-repmat(diffInSumCounts,[npops-1 1]); - -# muutokset(i2) = new_i1_logml - i1_logml ... -# + new_i2_logml - i2_logml; - - - -# %------------------------------------------------------------------------------------ - - -# function muutokset = laskeMuutokset3(T2, inds2, rowsFromInd, ... -# data, adjprior, priorTerm, i1) -# % Palauttaa length(unique(T2))*npops taulun, jossa (i,j):s alkio -# % kertoo, mik?olisi muutos logml:ss? jos populaation i1 osapopulaatio -# % inds2(find(T2==i)) siirret��n koriin j. - -# global COUNTS; global SUMCOUNTS; -# global PARTITION; global POP_LOGML; -# npops = size(COUNTS,3); -# npops2 = length(unique(T2)); -# muutokset = zeros(npops2, npops); - -# i1_logml = POP_LOGML(i1); - -# for pop2 = 1:npops2 -# inds = inds2(find(T2==pop2)); -# ninds = length(inds); -# if ninds>0 -# rows = computeRows(rowsFromInd, inds, ninds); -# diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data); -# diffInSumCounts = sum(diffInCounts); - -# COUNTS(:,:,i1) = COUNTS(:,:,i1)-diffInCounts; -# SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)-diffInSumCounts; -# new_i1_logml = computePopulationLogml(i1, adjprior, priorTerm); -# COUNTS(:,:,i1) = COUNTS(:,:,i1)+diffInCounts; -# SUMCOUNTS(i1,:) = SUMCOUNTS(i1,:)+diffInSumCounts; - -# i2 = [1:i1-1 , i1+1:npops]; -# i2_logml = POP_LOGML(i2)'; - -# COUNTS(:,:,i2) = COUNTS(:,:,i2)+repmat(diffInCounts, [1 1 npops-1]); -# SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)+repmat(diffInSumCounts,[npops-1 1]); -# new_i2_logml = computePopulationLogml(i2, adjprior, priorTerm)'; -# COUNTS(:,:,i2) = COUNTS(:,:,i2)-repmat(diffInCounts, [1 1 npops-1]); -# SUMCOUNTS(i2,:) = SUMCOUNTS(i2,:)-repmat(diffInSumCounts,[npops-1 1]); - -# muutokset(pop2,i2) = new_i1_logml - i1_logml ... -# + new_i2_logml - i2_logml; -# end -# end - - -# %------------------------------------------------------------------------------------ - -# function muutokset = laskeMuutokset5(inds, rowsFromInd, data, adjprior, ... -# priorTerm, i1, i2) - -# % Palauttaa length(inds)*1 taulun, jossa i:s alkio kertoo, mik?olisi -# % muutos logml:ss? mik�li yksil?i vaihtaisi koria i1:n ja i2:n v�lill? - -# global COUNTS; global SUMCOUNTS; -# global PARTITION; global POP_LOGML; - -# ninds = length(inds); -# muutokset = zeros(ninds,1); - -# i1_logml = POP_LOGML(i1); -# i2_logml = POP_LOGML(i2); - -# for i = 1:ninds -# ind = inds(i); -# if PARTITION(ind)==i1 -# pop1 = i1; %mist? -# pop2 = i2; %mihin -# else -# pop1 = i2; -# pop2 = i1; -# end -# rows = (ind-1)*rowsFromInd+1 : ind*rowsFromInd; -# diffInCounts = computeDiffInCounts(rows, size(COUNTS,1), size(COUNTS,2), data); -# diffInSumCounts = sum(diffInCounts); - -# COUNTS(:,:,pop1) = COUNTS(:,:,pop1)-diffInCounts; -# SUMCOUNTS(pop1,:) = SUMCOUNTS(pop1,:)-diffInSumCounts; -# COUNTS(:,:,pop2) = COUNTS(:,:,pop2)+diffInCounts; -# SUMCOUNTS(pop2,:) = SUMCOUNTS(pop2,:)+diffInSumCounts; - -# PARTITION(ind) = pop2; - -# new_logmls = computePopulationLogml([i1 i2], adjprior, priorTerm); - -# muutokset(i) = sum(new_logmls); - -# COUNTS(:,:,pop1) = COUNTS(:,:,pop1)+diffInCounts; -# SUMCOUNTS(pop1,:) = SUMCOUNTS(pop1,:)+diffInSumCounts; -# COUNTS(:,:,pop2) = COUNTS(:,:,pop2)-diffInCounts; -# SUMCOUNTS(pop2,:) = SUMCOUNTS(pop2,:)-diffInSumCounts; - -# PARTITION(ind) = pop1; -# end - -# muutokset = muutokset - i1_logml - i2_logml; - -# %-------------------------------------------------------------------------- - - - -# function diffInCounts = computeDiffInCounts(rows, max_noalle, nloci, data) -# % Muodostaa max_noalle*nloci taulukon, jossa on niiden alleelien -# % lukum��r�t (vastaavasti kuin COUNTS:issa), jotka ovat data:n -# % riveill?rows. - -# diffInCounts = zeros(max_noalle, nloci); -# for i=rows -# row = data(i,:); -# notEmpty = find(row>=0); - -# if length(notEmpty)>0 -# diffInCounts(row(notEmpty) + (notEmpty-1)*max_noalle) = ... -# diffInCounts(row(notEmpty) + (notEmpty-1)*max_noalle) + 1; -# end -# end - - - -# %------------------------------------------------------------------------------------ - - -# function popLogml = computePopulationLogml(pops, adjprior, priorTerm) -# % Palauttaa length(pops)*1 taulukon, jossa on laskettu korikohtaiset -# % logml:t koreille, jotka on m��ritelty pops-muuttujalla. - -# global COUNTS; -# global SUMCOUNTS; -# x = size(COUNTS,1); -# y = size(COUNTS,2); -# z = length(pops); - -# popLogml = ... -# squeeze(sum(sum(reshape(... -# gammaln(repmat(adjprior,[1 1 length(pops)]) + COUNTS(:,:,pops)) ... -# ,[x y z]),1),2)) - sum(gammaln(1+SUMCOUNTS(pops,:)),2) - priorTerm; - - -# %----------------------------------------------------------------------------------- - - -# function npops = poistaTyhjatPopulaatiot(npops) -# % Poistaa tyhjentyneet populaatiot COUNTS:ista ja -# % SUMCOUNTS:ista. P�ivitt�� npops:in ja PARTITION:in. - -# global COUNTS; -# global SUMCOUNTS; -# global PARTITION; - -# notEmpty = find(any(SUMCOUNTS,2)); -# COUNTS = COUNTS(:,:,notEmpty); -# SUMCOUNTS = SUMCOUNTS(notEmpty,:); - -# for n=1:length(notEmpty) -# apu = find(PARTITION==notEmpty(n)); -# PARTITION(apu)=n; -# end -# npops = length(notEmpty); - - -# %---------------------------------------------------------------------------------- -# %Seuraavat kolme funktiota liittyvat alkupartition muodostamiseen. - -# function initial_partition=admixture_initialization(data_matrix,nclusters,Z) -# size_data=size(data_matrix); -# nloci=size_data(2)-1; -# n=max(data_matrix(:,end)); -# T=cluster_own(Z,nclusters); -# initial_partition=zeros(size_data(1),1); -# for i=1:n -# kori=T(i); -# here=find(data_matrix(:,end)==i); -# for j=1:length(here) -# initial_partition(here(j),1)=kori; -# end -# end - -# function T = cluster_own(Z,nclust) -# true=logical(1); -# false=logical(0); -# maxclust = nclust; -# % Start of algorithm -# m = size(Z,1)+1; -# T = zeros(m,1); -# % maximum number of clusters based on inconsistency -# if m <= maxclust -# T = (1:m)'; -# elseif maxclust==1 -# T = ones(m,1); -# else -# clsnum = 1; -# for k = (m-maxclust+1):(m-1) -# i = Z(k,1); % left tree -# if i <= m % original node, no leafs -# T(i) = clsnum; -# clsnum = clsnum + 1; -# elseif i < (2*m-maxclust+1) % created before cutoff, search down the tree -# T = clusternum(Z, T, i-m, clsnum); -# clsnum = clsnum + 1; -# end -# i = Z(k,2); % right tree -# if i <= m % original node, no leafs -# T(i) = clsnum; -# clsnum = clsnum + 1; -# elseif i < (2*m-maxclust+1) % created before cutoff, search down the tree -# T = clusternum(Z, T, i-m, clsnum); -# clsnum = clsnum + 1; -# end -# end -# end +# %---------------------------------------------------------------------------------- # function T = clusternum(X, T, k, c) # m = size(X,1)+1; @@ -885,79 +463,4 @@ greedyMix <- function( # end # end -# Z=linkage(distances'); - - - -# %---------------------------------------------------------------------------------------- - -# function logml=computeLogml(counts, sumcounts, noalle, data, rowsFromInd) -# nloci = size(counts,2); -# npops = size(counts,3); -# adjnoalle = zeros(max(noalle),nloci); -# for j=1:nloci -# adjnoalle(1:noalle(j),j)=noalle(j); -# if (noalle(j) 1)); -# end \ No newline at end of file +# Z=linkage(distances'); \ No newline at end of file diff --git a/R/matlab2r.R b/R/matlab2r.R index f69dcb8..0b6cb25 100644 --- a/R/matlab2r.R +++ b/R/matlab2r.R @@ -7,16 +7,32 @@ #' @importFrom utils write.table #' @export matlab2r <- function(filename, saveOutput = FALSE) { - # Verification + + # ======================================================== # + # Verification # + # ======================================================== # if (!file.exists(filename)) stop("File not found") - # Reading file into R + + # ======================================================== # + # Reading file into R # + # ======================================================== # txt <- readLines(filename) - # Replacing text + + # ======================================================== # + # Replacing text # + # ======================================================== # + + # Function header ---------------------------------------- # txt <- gsub( pattern = "function (.+)\\s+=\\s*(.+)\\((.+)\\)", replacement = "\\2 <- function(\\3) { return(\\1)", x = txt ) + txt <- gsub( + pattern = "function (.+)\\((.+)\\)", + replacement = "\\1 <- function(\\2) {", + x = txt + ) # txt <- gsub("\\%\\s*(\\w+)", "# \\1", txt) txt <- gsub(";", "", txt) txt <- gsub("for (.+)=(.+)", "for (\\1 in \\2) {", txt) From 22cea5b2238584621c0e9ce22051dc060eda7106 Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Thu, 19 Nov 2020 10:46:37 +0100 Subject: [PATCH 62/71] Translated computeLogml --- R/computeLogml.R | 27 +++++++++++++++++++++++++++ R/globals.R | 4 +++- 2 files changed, 30 insertions(+), 1 deletion(-) create mode 100644 R/computeLogml.R diff --git a/R/computeLogml.R b/R/computeLogml.R new file mode 100644 index 0000000..795925c --- /dev/null +++ b/R/computeLogml.R @@ -0,0 +1,27 @@ +computeLogml <- function(counts, sumcounts, noalle, data, rowsFromInd) { + nloci <- size(counts, 2) + npops <- size(counts, 3) + adjnoalle <- zeros(max(noalle), nloci) + for (j in 1:nloci) { + adjnoalle[1:noalle[j], j] <- noalle(j) + if ((noalle(j) Date: Thu, 19 Nov 2020 10:48:33 +0100 Subject: [PATCH 63/71] Reorganization and improvements on matlab2r --- R/matlab2r.R | 66 ++++++++++++++++++++++++++++++++++++------------- man/matlab2r.Rd | 7 ++++-- 2 files changed, 54 insertions(+), 19 deletions(-) diff --git a/R/matlab2r.R b/R/matlab2r.R index 0b6cb25..aeda6b9 100644 --- a/R/matlab2r.R +++ b/R/matlab2r.R @@ -1,13 +1,16 @@ #' @title Convert Matlab function to R #' @description Performs basic syntax conversion from Matlab to R #' @param filename name of the file -#' @param saveOutput if `TRUE`, `filename` is overwritten. Defaults to `FALSE` +#' @param output can be "asis", "clean" (default) or "save" +#' @param improve_formatting if `TRUE` (default), makes minor changes +#' to conform to best-practice formatting conventions #' @return text converted to R, printed to screen or replacing input file #' @author Waldir Leoncio #' @importFrom utils write.table #' @export -matlab2r <- function(filename, saveOutput = FALSE) { - +matlab2r <- function( + filename, output = "clean", improve_formatting=TRUE +) { # ======================================================== # # Verification # # ======================================================== # @@ -22,10 +25,18 @@ matlab2r <- function(filename, saveOutput = FALSE) { # Replacing text # # ======================================================== # + # Uncommenting ------------------------------------------- # + txt <- gsub("^#\\s?(.+)", "\\1", txt) + # Function header ---------------------------------------- # + out <- gsub( + pattern = "\\t*function (.+)\\s*=\\s*(.+)\\((.+)\\)", + replacement = "\treturn(\\1)", + x = txt[1] + ) txt <- gsub( - pattern = "function (.+)\\s+=\\s*(.+)\\((.+)\\)", - replacement = "\\2 <- function(\\3) { return(\\1)", + pattern = "\\t*function (.+)\\s*=\\s*(.+)\\((.+)\\)", + replacement = "\\2 <- function(\\3) {", x = txt ) txt <- gsub( @@ -33,24 +44,45 @@ matlab2r <- function(filename, saveOutput = FALSE) { replacement = "\\1 <- function(\\2) {", x = txt ) - # txt <- gsub("\\%\\s*(\\w+)", "# \\1", txt) + + # Function body ------------------------------------------ # + txt <- gsub("(.+)\\.\\.\\.", "\\1", txt) txt <- gsub(";", "", txt) + + # Loops and if-statements txt <- gsub("for (.+)=(.+)", "for (\\1 in \\2) {", txt) - txt <- gsub("end", "}", txt) - txt <- gsub("(.),(\\S)", "\\1, \\2", txt) - # TODO: replace forms like (:,:) with [, ] if they come before <- - # TODO: add argument to skip some of these rules + txt <- gsub("end$", "}", txt) txt <- gsub("if (.+)", "if (\\1) {", txt) # FIXME: paste comments after { txt <- gsub("else$", "} else {", txt) txt <- gsub("elseif", "} else if", txt) - txt <- gsub("\\(~", "(!", txt) txt <- gsub("while (.+)", "while \\1 {", txt) - ## Math operators - txt <- gsub("(\\S)\\+(\\S)", "\\1 + \\2", txt) - txt <- gsub("(\\S)\\-(\\S)", "\\1 - \\2", txt) - txt <- gsub("(\\S)\\*(\\S)", "\\1 * \\2", txt) - # Returning converted code - if (!saveOutput) { + + # MATLAB-equivalent functions in R + txt <- gsub("gamma_ln", "log_gamma", txt) + + # Subsets ------------------------------------------------ # + txt <- gsub("([^\\(]+)\\((.+)\\)\\s?=(.+)", "\\1[\\2] <- \\3", txt) + + # Formatting --------------------------------------------- # + if (improve_formatting) { + txt <- gsub("(.),(\\S)", "\\1, \\2", txt) + # Math operators + txt <- gsub("(\\S)\\+(\\S)", "\\1 + \\2", txt) + txt <- gsub("(\\S)\\-(\\S)", "\\1 - \\2", txt) + txt <- gsub("(\\S)\\*(\\S)", "\\1 * \\2", txt) + # Logic operators + txt <- gsub("\\(~", "(!", txt) + # Assignment + txt <- gsub("(.+)\\s?=\\s?(.+)", "\\1 <- \\2", txt) + } + + # Adding output and end-of-file brace -------------------- # + txt <- append(txt, paste(out, "\n}")) + + # Returning converted code ------------------------------- # + if (output == "asis") { + return(txt) + } else if (output == "clean") { return(cat(txt, sep="\n")) } else { return( diff --git a/man/matlab2r.Rd b/man/matlab2r.Rd index e5a3f9e..6088487 100644 --- a/man/matlab2r.Rd +++ b/man/matlab2r.Rd @@ -4,12 +4,15 @@ \alias{matlab2r} \title{Convert Matlab function to R} \usage{ -matlab2r(filename, saveOutput = FALSE) +matlab2r(filename, output = "clean", improve_formatting = TRUE) } \arguments{ \item{filename}{name of the file} -\item{saveOutput}{if `TRUE`, `filename` is overwritten. Defaults to `FALSE`} +\item{output}{can be "asis", "clean" (default) or "save"} + +\item{improve_formatting}{if `TRUE` (default), makes minor changes +to conform to best-practice formatting conventions} } \value{ text converted to R, printed to screen or replacing input file From 838805877eeb610928db1c4442a5860a77feb4c3 Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Thu, 19 Nov 2020 11:51:13 +0100 Subject: [PATCH 64/71] Translated getDistances --- R/getDistances.R | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 R/getDistances.R diff --git a/R/getDistances.R b/R/getDistances.R new file mode 100644 index 0000000..853be3b --- /dev/null +++ b/R/getDistances.R @@ -0,0 +1,43 @@ +getDistances <- function(data_matrix, nclusters) { + + # %finds initial admixture clustering solution with nclusters clusters, uses simple mean Hamming distance + # %gives partition in 8 - bit format + # %allocates all alleles of a single individual into the same basket + # %data_matrix contains #Loci + 1 columns, last column indicate whose alleles are placed in each row, + # %i.e. ranges from 1 to #individuals. For diploids there are 2 rows per individual, for haploids only a single row + # %missing values are indicated by zeros in the partition and by negative integers in the data_matrix. + + size_data <- size(data_matrix) + nloci <- size_data[2] - 1 + n <- max(data_matrix[, ncol(data_matrix)]) + distances <- zeros(choose(n, 2), 1) + pointer <- 1 + for (i in 1:n - 1) { + i_data <- data_matrix[ + find(data_matrix[, ncol(data_matrix)] == i), + 1:nloci + ] + for (j in (i + 1):n) { + d_ij <- 0 + j_data <- data_matrix[find(data_matrix[, ncol()] == j), 1:nloci] + vertailuja <- 0 + for (k in 1:size(i_data, 1)) { + for (l in 1:size(j_data, 1)) { + here_i <- find(i_data[k, ] >= 0) + here_j <- find(j_data[l, ] >= 0) + here_joint <- intersect(here_i, here_j) + vertailuja <- vertailuja + length(here_joint) + d_ij <- d_ij + length( + find(i_data[k, here_joint] != j_data[l, here_joint]) + ) + } + } + d_ij <- d_ij / vertailuja + distances[pointer] <- d_ij + pointer <- pointer + 1 + } + } + + Z <- linkage(t(distances)) + return(list(Z = Z, distances = distances)) +} From 02b20f50a97efbdde006dbbc6ac4ee1d5b7e9468 Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Thu, 19 Nov 2020 12:12:00 +0100 Subject: [PATCH 65/71] Translate clusternum --- R/clusternum.R | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 R/clusternum.R diff --git a/R/clusternum.R b/R/clusternum.R new file mode 100644 index 0000000..dbdcdbb --- /dev/null +++ b/R/clusternum.R @@ -0,0 +1,16 @@ +clusternum <- function(X, T, k, c) { + m <- size(X, 1) + 1 + while (!is.null(k)) { + # % Get the children of nodes at this level + children <- X[k, 1:2] + children <- children[, ] + + # % Assign this node number to leaf children + t <- (children <= m) + T[children(t)] <- c + + # % Move to next level + k <- children(!t) - m + } + return(T) +} From a3fcaf17dd21cb3d7894d2d4d38d884b3b69304e Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Thu, 19 Nov 2020 12:15:16 +0100 Subject: [PATCH 66/71] Translated rand_disc --- R/greedyMix.R | 66 ------------------------------------------------- R/matlab2r.R | 57 +++++++++++++++++++++++++++++++++++------- R/rand_disc.R | 7 ++++++ man/matlab2r.Rd | 25 +++++++++++++++++-- 4 files changed, 78 insertions(+), 77 deletions(-) create mode 100644 R/rand_disc.R diff --git a/R/greedyMix.R b/R/greedyMix.R index 2300494..344f966 100644 --- a/R/greedyMix.R +++ b/R/greedyMix.R @@ -398,69 +398,3 @@ greedyMix <- function( # i2 = rand_disc(y); % uusi kori # suurin = muutokset(i2); - -# %-------------------------------------------------------------------------------------- - - -# function svar=rand_disc(CDF) -# %returns an index of a value from a discrete distribution using inversion method -# slump=rand; -# har=find(CDF>slump); -# svar=har(1); - -# %---------------------------------------------------------------------------------- - -# function T = clusternum(X, T, k, c) -# m = size(X,1)+1; -# while(~isempty(k)) -# % Get the children of nodes at this level -# children = X(k,1:2); -# children = children(:); - -# % Assign this node number to leaf children -# t = (children<=m); -# T(children(t)) = c; - -# % Move to next level -# k = children(~t) - m; -# end - -# %---------------------------------------------------------------------------------------- - - -# function [Z, distances]=getDistances(data_matrix,nclusters) - -# %finds initial admixture clustering solution with nclusters clusters, uses simple mean Hamming distance -# %gives partition in 8-bit format -# %allocates all alleles of a single individual into the same basket -# %data_matrix contains #Loci+1 columns, last column indicate whose alleles are placed in each row, -# %i.e. ranges from 1 to #individuals. For diploids there are 2 rows per individual, for haploids only a single row -# %missing values are indicated by zeros in the partition and by negative integers in the data_matrix. - -# size_data=size(data_matrix); -# nloci=size_data(2)-1; -# n=max(data_matrix(:,end)); -# distances=zeros(nchoosek(n,2),1); -# pointer=1; -# for i=1:n-1 -# i_data=data_matrix(find(data_matrix(:,end)==i),1:nloci); -# for j=i+1:n -# d_ij=0; -# j_data=data_matrix(find(data_matrix(:,end)==j),1:nloci); -# vertailuja = 0; -# for k=1:size(i_data,1) -# for l=1:size(j_data,1) -# here_i=find(i_data(k,:)>=0); -# here_j=find(j_data(l,:)>=0); -# here_joint=intersect(here_i,here_j); -# vertailuja = vertailuja + length(here_joint); -# d_ij = d_ij + length(find(i_data(k,here_joint)~=j_data(l,here_joint))); -# end -# end -# d_ij = d_ij / vertailuja; -# distances(pointer)=d_ij; -# pointer=pointer+1; -# end -# end - -# Z=linkage(distances'); \ No newline at end of file diff --git a/R/matlab2r.R b/R/matlab2r.R index aeda6b9..8583aff 100644 --- a/R/matlab2r.R +++ b/R/matlab2r.R @@ -1,16 +1,30 @@ #' @title Convert Matlab function to R #' @description Performs basic syntax conversion from Matlab to R #' @param filename name of the file -#' @param output can be "asis", "clean" (default) or "save" +#' @param output can be "asis", "clean" (default), "save" or "append" #' @param improve_formatting if `TRUE` (default), makes minor changes #' to conform to best-practice formatting conventions +#' @param change_assignment if `TRUE` (default), uses `<-` as the assignment operator +#' @param append if `FALSE` (default), overwrites file; otherwise, append +#' output to input #' @return text converted to R, printed to screen or replacing input file #' @author Waldir Leoncio #' @importFrom utils write.table #' @export +#' @note This function is intended to expedite the process of converting a +#' Matlab function to R by making common replacements. It does not have the +#' immediate goal of outputting a ready-to-use function. In other words, +#' after using this function you should go back to it and make minor changes. +#' +#' It is also advised to do a dry-run with `output = "clean"` and only switching +#' to `output = "save"` when you are confident that no important code will be +#' lost (for shorter functions, a careful visual inspection should suffice). matlab2r <- function( - filename, output = "clean", improve_formatting=TRUE + filename, output = "clean", improve_formatting=TRUE, change_assignment=TRUE, + append=FALSE ) { + # TODO: this function is too long! Split into subfunctions + # (say, by rule and/or section) # ======================================================== # # Verification # # ======================================================== # @@ -30,10 +44,10 @@ matlab2r <- function( # Function header ---------------------------------------- # out <- gsub( - pattern = "\\t*function (.+)\\s*=\\s*(.+)\\((.+)\\)", + pattern = "\\t*function (\\S+)\\s*=\\s*(.+)\\((.+)\\)", replacement = "\treturn(\\1)", x = txt[1] - ) + ) # TODO: improve by detecting listed outputs txt <- gsub( pattern = "\\t*function (.+)\\s*=\\s*(.+)\\((.+)\\)", replacement = "\\2 <- function(\\3) {", @@ -59,9 +73,19 @@ matlab2r <- function( # MATLAB-equivalent functions in R txt <- gsub("gamma_ln", "log_gamma", txt) + txt <- gsub("nchoosek", "choose", txt) + txt <- gsub("isempty", "is.null", txt) + # txt <- gsub("(.+)\\'", "t(\\1)", txt) # Subsets ------------------------------------------------ # - txt <- gsub("([^\\(]+)\\((.+)\\)\\s?=(.+)", "\\1[\\2] <- \\3", txt) + ass_op <- ifelse(change_assignment, "<-", "=") + txt <- gsub( + pattern = "([^\\(]+)\\(([^\\(]+)\\)=(.+)", + replacement = paste0("\\1[\\2] ", ass_op, "\\3"), + x = txt + ) + txt <- gsub("\\(:\\)", "[, ]", txt) + txt <- gsub("(.+)(\\[|\\():,end(\\]|\\()", "\\1[, ncol()]", txt) # Formatting --------------------------------------------- # if (improve_formatting) { @@ -71,9 +95,21 @@ matlab2r <- function( txt <- gsub("(\\S)\\-(\\S)", "\\1 - \\2", txt) txt <- gsub("(\\S)\\*(\\S)", "\\1 * \\2", txt) # Logic operators - txt <- gsub("\\(~", "(!", txt) + txt <- gsub("~", "!", txt) + txt <- gsub("(\\S)>=(\\S)", "\\1 >= \\2", txt) + txt <- gsub("(\\S)<=(\\S)", "\\1 <= \\2", txt) + txt <- gsub("(\\S)==(\\S)", "\\1 == \\2", txt) # Assignment - txt <- gsub("(.+)\\s?=\\s?(.+)", "\\1 <- \\2", txt) + txt <- gsub( + pattern = "(\\w)(\\s?)=(\\s?)(\\w)", + replacement = paste0("\\1 ", ass_op, " \\4"), + x = txt + ) + # txt <- gsub( + # pattern = "(\\s+(.|\\_|\\[|\\])+)(\\s?)=(\\s?)(.+)", + # replacement = paste0("\\1 ", ass_op, "\\5"), + # x = txt + # ) } # Adding output and end-of-file brace -------------------- # @@ -84,15 +120,18 @@ matlab2r <- function( return(txt) } else if (output == "clean") { return(cat(txt, sep="\n")) - } else { + } else if (output == "save") { return( write.table( x = txt, file = filename, quote = FALSE, row.names = FALSE, - col.names = FALSE + col.names = FALSE, + append = append ) ) + } else { + stop ("Invalid output argument") } } diff --git a/R/rand_disc.R b/R/rand_disc.R new file mode 100644 index 0000000..e75f1c4 --- /dev/null +++ b/R/rand_disc.R @@ -0,0 +1,7 @@ +rand_disc <- function(CDF) { + # %returns an index of a value from a discrete distribution using inversion method + slump <- rand + har <- find(CDF > slump) + svar <- har(1) + return(svar) +} diff --git a/man/matlab2r.Rd b/man/matlab2r.Rd index 6088487..2ae7c67 100644 --- a/man/matlab2r.Rd +++ b/man/matlab2r.Rd @@ -4,15 +4,26 @@ \alias{matlab2r} \title{Convert Matlab function to R} \usage{ -matlab2r(filename, output = "clean", improve_formatting = TRUE) +matlab2r( + filename, + output = "clean", + improve_formatting = TRUE, + change_assignment = TRUE, + append = FALSE +) } \arguments{ \item{filename}{name of the file} -\item{output}{can be "asis", "clean" (default) or "save"} +\item{output}{can be "asis", "clean" (default), "save" or "append"} \item{improve_formatting}{if `TRUE` (default), makes minor changes to conform to best-practice formatting conventions} + +\item{change_assignment}{if `TRUE` (default), uses `<-` as the assignment operator} + +\item{append}{if `FALSE` (default), overwrites file; otherwise, append +output to input} } \value{ text converted to R, printed to screen or replacing input file @@ -20,6 +31,16 @@ text converted to R, printed to screen or replacing input file \description{ Performs basic syntax conversion from Matlab to R } +\note{ +This function is intended to expedite the process of converting a +Matlab function to R by making common replacements. It does not have the +immediate goal of outputting a ready-to-use function. In other words, +after using this function you should go back to it and make minor changes. + +It is also advised to do a dry-run with `output = "clean"` and only switching +to `output = "save"` when you are confident that no important code will be +lost (for shorter functions, a careful visual inspection should suffice). +} \author{ Waldir Leoncio } From d55b618c751165100396c11fdd7034888fc766fd Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Thu, 19 Nov 2020 13:29:54 +0100 Subject: [PATCH 67/71] Translated arvoSeuraavaTi --- R/arvoSeuraavaTi.R | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 R/arvoSeuraavaTi.R diff --git a/R/arvoSeuraavaTi.R b/R/arvoSeuraavaTi.R new file mode 100644 index 0000000..e276889 --- /dev/null +++ b/R/arvoSeuraavaTi.R @@ -0,0 +1,14 @@ +arvoSeuraavaTila <- function(muutokset, logml) { + # Suorittaa yksil�n seuraavan tilan arvonnan + + y <- logml + muutokset # siirron j�lkeiset logml:t + y <- y - max(y) + y <- exp(y) + summa <- sum(y) + y <- y / summa + y <- cumsum(y) + + i2 <- rand_disc(y) # uusi kori + suurin <- muutokset(i2) + return(list(suurin = suurin, i2 = i2)) +} From 0fec3ab096b1db45b52966d958207f61208873a6 Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Thu, 19 Nov 2020 13:31:20 +0100 Subject: [PATCH 68/71] Cleanup of commented-out code --- R/greedyMix.R | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/R/greedyMix.R b/R/greedyMix.R index 344f966..132299e 100644 --- a/R/greedyMix.R +++ b/R/greedyMix.R @@ -381,20 +381,3 @@ greedyMix <- function( if (file.exists('baps4_output.baps')) file.remove('baps4_output.baps') } } - -# %-------------------------------------------------------------------------- - - -# function [suurin, i2] = arvoSeuraavaTila(muutokset, logml) -# % Suorittaa yksil�n seuraavan tilan arvonnan - -# y = logml + muutokset; % siirron j�lkeiset logml:t -# y = y - max(y); -# y = exp(y); -# summa = sum(y); -# y = y/summa; -# y = cumsum(y); - -# i2 = rand_disc(y); % uusi kori -# suurin = muutokset(i2); - From ba5f9f2690b198b2e419da0320400c5711f2e30d Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Thu, 19 Nov 2020 13:31:48 +0100 Subject: [PATCH 69/71] Added diff --- R/matlab2r.R | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/R/matlab2r.R b/R/matlab2r.R index 8583aff..8656a30 100644 --- a/R/matlab2r.R +++ b/R/matlab2r.R @@ -1,7 +1,7 @@ #' @title Convert Matlab function to R #' @description Performs basic syntax conversion from Matlab to R #' @param filename name of the file -#' @param output can be "asis", "clean" (default), "save" or "append" +#' @param output can be "asis", "clean", "save" or "diff" #' @param improve_formatting if `TRUE` (default), makes minor changes #' to conform to best-practice formatting conventions #' @param change_assignment if `TRUE` (default), uses `<-` as the assignment operator @@ -131,6 +131,18 @@ matlab2r <- function( append = append ) ) + } else if (output == "diff") { + diff_text <- vector(mode="character", length=(2 * length(original) + 1)) + for (i in seq_along(txt)) { + new_i <- (2 * i) + i - 2 + diff_text[new_i] <- paste( + "-----------------------", "line", i, "-----------------------" + ) + diff_text[new_i + 1] <- original[i] + diff_text[new_i + 2] <- txt[i] + } + message("Displaying line number, original content and modified content") + return(cat(diff_text, sep="\n")) } else { stop ("Invalid output argument") } From d835c10322552d73b5932251da201b4e625bcdd8 Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Thu, 19 Nov 2020 13:32:14 +0100 Subject: [PATCH 70/71] Improved lots of substutitions --- R/matlab2r.R | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/R/matlab2r.R b/R/matlab2r.R index 8656a30..092897e 100644 --- a/R/matlab2r.R +++ b/R/matlab2r.R @@ -20,7 +20,7 @@ #' to `output = "save"` when you are confident that no important code will be #' lost (for shorter functions, a careful visual inspection should suffice). matlab2r <- function( - filename, output = "clean", improve_formatting=TRUE, change_assignment=TRUE, + filename, output = "diff", improve_formatting=TRUE, change_assignment=TRUE, append=FALSE ) { # TODO: this function is too long! Split into subfunctions @@ -34,6 +34,7 @@ matlab2r <- function( # Reading file into R # # ======================================================== # txt <- readLines(filename) + original <- txt # ======================================================== # # Replacing text # @@ -42,12 +43,20 @@ matlab2r <- function( # Uncommenting ------------------------------------------- # txt <- gsub("^#\\s?(.+)", "\\1", txt) - # Function header ---------------------------------------- # + # Output variable ---------------------------------------- # out <- gsub( - pattern = "\\t*function (\\S+)\\s*=\\s*(.+)\\((.+)\\)", - replacement = "\treturn(\\1)", + pattern = "\\t*function ((\\S|\\,\\s)+)\\s?=\\s?(\\w+)\\((.+)\\)", + replacement = "\\1", x = txt[1] ) # TODO: improve by detecting listed outputs + if (substring(out, 1, 1) == "[") { + out <- strsplit(out, "(\\,|\\[|\\]|\\s)")[[1]] + out <- out[which(out != "")] + out <- sapply(seq_along(out), function(x) paste(out[x], "=", out[x])) + out <- paste0("list(", paste(out, collapse=", "), ")") + } + + # Function header ---------------------------------------- # txt <- gsub( pattern = "\\t*function (.+)\\s*=\\s*(.+)\\((.+)\\)", replacement = "\\2 <- function(\\3) {", @@ -94,6 +103,7 @@ matlab2r <- function( txt <- gsub("(\\S)\\+(\\S)", "\\1 + \\2", txt) txt <- gsub("(\\S)\\-(\\S)", "\\1 - \\2", txt) txt <- gsub("(\\S)\\*(\\S)", "\\1 * \\2", txt) + txt <- gsub("(\\S)\\/(\\S)", "\\1 / \\2", txt) # Logic operators txt <- gsub("~", "!", txt) txt <- gsub("(\\S)>=(\\S)", "\\1 >= \\2", txt) @@ -110,10 +120,11 @@ matlab2r <- function( # replacement = paste0("\\1 ", ass_op, "\\5"), # x = txt # ) + txt <- gsub("%(\\s?)(\\w)", "# \\2", txt) } # Adding output and end-of-file brace -------------------- # - txt <- append(txt, paste(out, "\n}")) + txt <- append(txt, paste0("\treturn(", out, ")\n}")) # Returning converted code ------------------------------- # if (output == "asis") { From 28d0cb614b58ec4a76d57e3aa830b2da4c6e4d27 Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Thu, 19 Nov 2020 13:33:21 +0100 Subject: [PATCH 71/71] Updated docs --- man/matlab2r.Rd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/man/matlab2r.Rd b/man/matlab2r.Rd index 2ae7c67..e809709 100644 --- a/man/matlab2r.Rd +++ b/man/matlab2r.Rd @@ -6,7 +6,7 @@ \usage{ matlab2r( filename, - output = "clean", + output = "diff", improve_formatting = TRUE, change_assignment = TRUE, append = FALSE @@ -15,7 +15,7 @@ matlab2r( \arguments{ \item{filename}{name of the file} -\item{output}{can be "asis", "clean" (default), "save" or "append"} +\item{output}{can be "asis", "clean", "save" or "diff"} \item{improve_formatting}{if `TRUE` (default), makes minor changes to conform to best-practice formatting conventions}