ourMELONS/R/matlab2r.R

161 lines
5.7 KiB
R
Raw Normal View History

2020-11-09 14:30:09 +01:00
#' @title Convert Matlab function to R
2020-11-09 15:00:07 +01:00
#' @description Performs basic syntax conversion from Matlab to R
2020-11-09 14:30:09 +01:00
#' @param filename name of the file
2020-11-19 13:31:48 +01:00
#' @param output can be "asis", "clean", "save" or "diff"
#' @param improve_formatting if `TRUE` (default), makes minor changes
#' to conform to best-practice formatting conventions
2020-11-19 12:15:16 +01:00
#' @param change_assignment if `TRUE` (default), uses `<-` as the assignment operator
#' @param append if `FALSE` (default), overwrites file; otherwise, append
#' output to input
2020-11-09 15:00:07 +01:00
#' @return text converted to R, printed to screen or replacing input file
2020-11-09 14:30:09 +01:00
#' @author Waldir Leoncio
#' @importFrom utils write.table
2020-11-09 14:30:09 +01:00
#' @export
2020-11-19 12:15:16 +01:00
#' @note This function is intended to expedite the process of converting a
#' Matlab function to R by making common replacements. It does not have the
#' immediate goal of outputting a ready-to-use function. In other words,
#' after using this function you should go back to it and make minor changes.
#'
#' It is also advised to do a dry-run with `output = "clean"` and only switching
#' to `output = "save"` when you are confident that no important code will be
#' lost (for shorter functions, a careful visual inspection should suffice).
matlab2r <- function(
2020-11-19 13:32:14 +01:00
filename, output = "diff", improve_formatting=TRUE, change_assignment=TRUE,
2020-11-19 12:15:16 +01:00
append=FALSE
) {
2020-11-19 12:15:16 +01:00
# TODO: this function is too long! Split into subfunctions
# (say, by rule and/or section)
2020-11-19 08:45:50 +01:00
# ======================================================== #
# Verification #
# ======================================================== #
2020-11-09 14:30:09 +01:00
if (!file.exists(filename)) stop("File not found")
2020-11-19 08:45:50 +01:00
# ======================================================== #
# Reading file into R #
# ======================================================== #
2020-11-09 14:30:09 +01:00
txt <- readLines(filename)
2020-11-19 13:32:14 +01:00
original <- txt
2020-11-19 08:45:50 +01:00
# ======================================================== #
# Replacing text #
# ======================================================== #
# Uncommenting ------------------------------------------- #
txt <- gsub("^#\\s?(.+)", "\\1", txt)
2020-11-19 13:32:14 +01:00
# Output variable ---------------------------------------- #
out <- gsub(
2020-11-19 13:32:14 +01:00
pattern = "\\t*function ((\\S|\\,\\s)+)\\s?=\\s?(\\w+)\\((.+)\\)",
replacement = "\\1",
x = txt[1]
2020-11-19 12:15:16 +01:00
) # TODO: improve by detecting listed outputs
2020-11-19 13:32:14 +01:00
if (substring(out, 1, 1) == "[") {
out <- strsplit(out, "(\\,|\\[|\\]|\\s)")[[1]]
out <- out[which(out != "")]
out <- sapply(seq_along(out), function(x) paste(out[x], "=", out[x]))
out <- paste0("list(", paste(out, collapse=", "), ")")
}
# Function header ---------------------------------------- #
2020-11-09 15:00:07 +01:00
txt <- gsub(
pattern = "\\t*function (.+)\\s*=\\s*(.+)\\((.+)\\)",
replacement = "\\2 <- function(\\3) {",
2020-11-09 15:00:07 +01:00
x = txt
)
2020-11-19 08:45:50 +01:00
txt <- gsub(
pattern = "function (.+)\\((.+)\\)",
replacement = "\\1 <- function(\\2) {",
x = txt
)
# Function body ------------------------------------------ #
txt <- gsub("(.+)\\.\\.\\.", "\\1", txt)
2020-11-09 14:30:09 +01:00
txt <- gsub(";", "", txt)
# Loops and if-statements
2020-11-09 14:30:09 +01:00
txt <- gsub("for (.+)=(.+)", "for (\\1 in \\2) {", txt)
txt <- gsub("end$", "}", txt)
2020-11-09 15:22:56 +01:00
txt <- gsub("if (.+)", "if (\\1) {", txt) # FIXME: paste comments after {
txt <- gsub("else$", "} else {", txt)
txt <- gsub("elseif", "} else if", txt)
txt <- gsub("while (.+)", "while \\1 {", txt)
# MATLAB-equivalent functions in R
txt <- gsub("gamma_ln", "log_gamma", txt)
2020-11-19 12:15:16 +01:00
txt <- gsub("nchoosek", "choose", txt)
txt <- gsub("isempty", "is.null", txt)
# txt <- gsub("(.+)\\'", "t(\\1)", txt)
# Subsets ------------------------------------------------ #
2020-11-19 12:15:16 +01:00
ass_op <- ifelse(change_assignment, "<-", "=")
txt <- gsub(
pattern = "([^\\(]+)\\(([^\\(]+)\\)=(.+)",
replacement = paste0("\\1[\\2] ", ass_op, "\\3"),
x = txt
)
txt <- gsub("\\(:\\)", "[, ]", txt)
txt <- gsub("(.+)(\\[|\\():,end(\\]|\\()", "\\1[, ncol()]", txt)
# Formatting --------------------------------------------- #
if (improve_formatting) {
txt <- gsub("(.),(\\S)", "\\1, \\2", txt)
# Math operators
txt <- gsub("(\\S)\\+(\\S)", "\\1 + \\2", txt)
txt <- gsub("(\\S)\\-(\\S)", "\\1 - \\2", txt)
txt <- gsub("(\\S)\\*(\\S)", "\\1 * \\2", txt)
2020-11-19 13:32:14 +01:00
txt <- gsub("(\\S)\\/(\\S)", "\\1 / \\2", txt)
# Logic operators
2020-11-19 12:15:16 +01:00
txt <- gsub("~", "!", txt)
txt <- gsub("(\\S)>=(\\S)", "\\1 >= \\2", txt)
txt <- gsub("(\\S)<=(\\S)", "\\1 <= \\2", txt)
txt <- gsub("(\\S)==(\\S)", "\\1 == \\2", txt)
# Assignment
2020-11-19 12:15:16 +01:00
txt <- gsub(
pattern = "(\\w)(\\s?)=(\\s?)(\\w)",
replacement = paste0("\\1 ", ass_op, " \\4"),
x = txt
)
# txt <- gsub(
# pattern = "(\\s+(.|\\_|\\[|\\])+)(\\s?)=(\\s?)(.+)",
# replacement = paste0("\\1 ", ass_op, "\\5"),
# x = txt
# )
2020-11-19 13:32:14 +01:00
txt <- gsub("%(\\s?)(\\w)", "# \\2", txt)
}
# Adding output and end-of-file brace -------------------- #
2020-11-19 13:32:14 +01:00
txt <- append(txt, paste0("\treturn(", out, ")\n}"))
# Returning converted code ------------------------------- #
if (output == "asis") {
return(txt)
} else if (output == "clean") {
2020-11-09 14:30:09 +01:00
return(cat(txt, sep="\n"))
2020-11-19 12:15:16 +01:00
} else if (output == "save") {
2020-11-09 14:30:09 +01:00
return(
write.table(
x = txt,
file = filename,
quote = FALSE,
row.names = FALSE,
2020-11-19 12:15:16 +01:00
col.names = FALSE,
append = append
2020-11-09 14:30:09 +01:00
)
)
2020-11-19 13:31:48 +01:00
} else if (output == "diff") {
diff_text <- vector(mode="character", length=(2 * length(original) + 1))
for (i in seq_along(txt)) {
new_i <- (2 * i) + i - 2
diff_text[new_i] <- paste(
"-----------------------", "line", i, "-----------------------"
)
diff_text[new_i + 1] <- original[i]
diff_text[new_i + 2] <- txt[i]
}
message("Displaying line number, original content and modified content")
return(cat(diff_text, sep="\n"))
2020-11-19 12:15:16 +01:00
} else {
stop ("Invalid output argument")
2020-11-09 14:30:09 +01:00
}
2020-11-09 15:00:07 +01:00
}