diff --git a/R/greedyMix.R b/R/greedyMix.R index 2300494..344f966 100644 --- a/R/greedyMix.R +++ b/R/greedyMix.R @@ -398,69 +398,3 @@ greedyMix <- function( # i2 = rand_disc(y); % uusi kori # suurin = muutokset(i2); - -# %-------------------------------------------------------------------------------------- - - -# function svar=rand_disc(CDF) -# %returns an index of a value from a discrete distribution using inversion method -# slump=rand; -# har=find(CDF>slump); -# svar=har(1); - -# %---------------------------------------------------------------------------------- - -# function T = clusternum(X, T, k, c) -# m = size(X,1)+1; -# while(~isempty(k)) -# % Get the children of nodes at this level -# children = X(k,1:2); -# children = children(:); - -# % Assign this node number to leaf children -# t = (children<=m); -# T(children(t)) = c; - -# % Move to next level -# k = children(~t) - m; -# end - -# %---------------------------------------------------------------------------------------- - - -# function [Z, distances]=getDistances(data_matrix,nclusters) - -# %finds initial admixture clustering solution with nclusters clusters, uses simple mean Hamming distance -# %gives partition in 8-bit format -# %allocates all alleles of a single individual into the same basket -# %data_matrix contains #Loci+1 columns, last column indicate whose alleles are placed in each row, -# %i.e. ranges from 1 to #individuals. For diploids there are 2 rows per individual, for haploids only a single row -# %missing values are indicated by zeros in the partition and by negative integers in the data_matrix. - -# size_data=size(data_matrix); -# nloci=size_data(2)-1; -# n=max(data_matrix(:,end)); -# distances=zeros(nchoosek(n,2),1); -# pointer=1; -# for i=1:n-1 -# i_data=data_matrix(find(data_matrix(:,end)==i),1:nloci); -# for j=i+1:n -# d_ij=0; -# j_data=data_matrix(find(data_matrix(:,end)==j),1:nloci); -# vertailuja = 0; -# for k=1:size(i_data,1) -# for l=1:size(j_data,1) -# here_i=find(i_data(k,:)>=0); -# here_j=find(j_data(l,:)>=0); -# here_joint=intersect(here_i,here_j); -# vertailuja = vertailuja + length(here_joint); -# d_ij = d_ij + length(find(i_data(k,here_joint)~=j_data(l,here_joint))); -# end -# end -# d_ij = d_ij / vertailuja; -# distances(pointer)=d_ij; -# pointer=pointer+1; -# end -# end - -# Z=linkage(distances'); \ No newline at end of file diff --git a/R/matlab2r.R b/R/matlab2r.R index aeda6b9..8583aff 100644 --- a/R/matlab2r.R +++ b/R/matlab2r.R @@ -1,16 +1,30 @@ #' @title Convert Matlab function to R #' @description Performs basic syntax conversion from Matlab to R #' @param filename name of the file -#' @param output can be "asis", "clean" (default) or "save" +#' @param output can be "asis", "clean" (default), "save" or "append" #' @param improve_formatting if `TRUE` (default), makes minor changes #' to conform to best-practice formatting conventions +#' @param change_assignment if `TRUE` (default), uses `<-` as the assignment operator +#' @param append if `FALSE` (default), overwrites file; otherwise, append +#' output to input #' @return text converted to R, printed to screen or replacing input file #' @author Waldir Leoncio #' @importFrom utils write.table #' @export +#' @note This function is intended to expedite the process of converting a +#' Matlab function to R by making common replacements. It does not have the +#' immediate goal of outputting a ready-to-use function. In other words, +#' after using this function you should go back to it and make minor changes. +#' +#' It is also advised to do a dry-run with `output = "clean"` and only switching +#' to `output = "save"` when you are confident that no important code will be +#' lost (for shorter functions, a careful visual inspection should suffice). matlab2r <- function( - filename, output = "clean", improve_formatting=TRUE + filename, output = "clean", improve_formatting=TRUE, change_assignment=TRUE, + append=FALSE ) { + # TODO: this function is too long! Split into subfunctions + # (say, by rule and/or section) # ======================================================== # # Verification # # ======================================================== # @@ -30,10 +44,10 @@ matlab2r <- function( # Function header ---------------------------------------- # out <- gsub( - pattern = "\\t*function (.+)\\s*=\\s*(.+)\\((.+)\\)", + pattern = "\\t*function (\\S+)\\s*=\\s*(.+)\\((.+)\\)", replacement = "\treturn(\\1)", x = txt[1] - ) + ) # TODO: improve by detecting listed outputs txt <- gsub( pattern = "\\t*function (.+)\\s*=\\s*(.+)\\((.+)\\)", replacement = "\\2 <- function(\\3) {", @@ -59,9 +73,19 @@ matlab2r <- function( # MATLAB-equivalent functions in R txt <- gsub("gamma_ln", "log_gamma", txt) + txt <- gsub("nchoosek", "choose", txt) + txt <- gsub("isempty", "is.null", txt) + # txt <- gsub("(.+)\\'", "t(\\1)", txt) # Subsets ------------------------------------------------ # - txt <- gsub("([^\\(]+)\\((.+)\\)\\s?=(.+)", "\\1[\\2] <- \\3", txt) + ass_op <- ifelse(change_assignment, "<-", "=") + txt <- gsub( + pattern = "([^\\(]+)\\(([^\\(]+)\\)=(.+)", + replacement = paste0("\\1[\\2] ", ass_op, "\\3"), + x = txt + ) + txt <- gsub("\\(:\\)", "[, ]", txt) + txt <- gsub("(.+)(\\[|\\():,end(\\]|\\()", "\\1[, ncol()]", txt) # Formatting --------------------------------------------- # if (improve_formatting) { @@ -71,9 +95,21 @@ matlab2r <- function( txt <- gsub("(\\S)\\-(\\S)", "\\1 - \\2", txt) txt <- gsub("(\\S)\\*(\\S)", "\\1 * \\2", txt) # Logic operators - txt <- gsub("\\(~", "(!", txt) + txt <- gsub("~", "!", txt) + txt <- gsub("(\\S)>=(\\S)", "\\1 >= \\2", txt) + txt <- gsub("(\\S)<=(\\S)", "\\1 <= \\2", txt) + txt <- gsub("(\\S)==(\\S)", "\\1 == \\2", txt) # Assignment - txt <- gsub("(.+)\\s?=\\s?(.+)", "\\1 <- \\2", txt) + txt <- gsub( + pattern = "(\\w)(\\s?)=(\\s?)(\\w)", + replacement = paste0("\\1 ", ass_op, " \\4"), + x = txt + ) + # txt <- gsub( + # pattern = "(\\s+(.|\\_|\\[|\\])+)(\\s?)=(\\s?)(.+)", + # replacement = paste0("\\1 ", ass_op, "\\5"), + # x = txt + # ) } # Adding output and end-of-file brace -------------------- # @@ -84,15 +120,18 @@ matlab2r <- function( return(txt) } else if (output == "clean") { return(cat(txt, sep="\n")) - } else { + } else if (output == "save") { return( write.table( x = txt, file = filename, quote = FALSE, row.names = FALSE, - col.names = FALSE + col.names = FALSE, + append = append ) ) + } else { + stop ("Invalid output argument") } } diff --git a/R/rand_disc.R b/R/rand_disc.R new file mode 100644 index 0000000..e75f1c4 --- /dev/null +++ b/R/rand_disc.R @@ -0,0 +1,7 @@ +rand_disc <- function(CDF) { + # %returns an index of a value from a discrete distribution using inversion method + slump <- rand + har <- find(CDF > slump) + svar <- har(1) + return(svar) +} diff --git a/man/matlab2r.Rd b/man/matlab2r.Rd index 6088487..2ae7c67 100644 --- a/man/matlab2r.Rd +++ b/man/matlab2r.Rd @@ -4,15 +4,26 @@ \alias{matlab2r} \title{Convert Matlab function to R} \usage{ -matlab2r(filename, output = "clean", improve_formatting = TRUE) +matlab2r( + filename, + output = "clean", + improve_formatting = TRUE, + change_assignment = TRUE, + append = FALSE +) } \arguments{ \item{filename}{name of the file} -\item{output}{can be "asis", "clean" (default) or "save"} +\item{output}{can be "asis", "clean" (default), "save" or "append"} \item{improve_formatting}{if `TRUE` (default), makes minor changes to conform to best-practice formatting conventions} + +\item{change_assignment}{if `TRUE` (default), uses `<-` as the assignment operator} + +\item{append}{if `FALSE` (default), overwrites file; otherwise, append +output to input} } \value{ text converted to R, printed to screen or replacing input file @@ -20,6 +31,16 @@ text converted to R, printed to screen or replacing input file \description{ Performs basic syntax conversion from Matlab to R } +\note{ +This function is intended to expedite the process of converting a +Matlab function to R by making common replacements. It does not have the +immediate goal of outputting a ready-to-use function. In other words, +after using this function you should go back to it and make minor changes. + +It is also advised to do a dry-run with `output = "clean"` and only switching +to `output = "save"` when you are confident that no important code will be +lost (for shorter functions, a careful visual inspection should suffice). +} \author{ Waldir Leoncio }