Translated rand_disc

This commit is contained in:
Waldir Leoncio 2020-11-19 12:15:16 +01:00
parent 02b20f50a9
commit a3fcaf17dd
4 changed files with 78 additions and 77 deletions

View file

@ -398,69 +398,3 @@ greedyMix <- function(
# i2 = rand_disc(y); % uusi kori
# suurin = muutokset(i2);
# %--------------------------------------------------------------------------------------
# function svar=rand_disc(CDF)
# %returns an index of a value from a discrete distribution using inversion method
# slump=rand;
# har=find(CDF>slump);
# svar=har(1);
# %----------------------------------------------------------------------------------
# function T = clusternum(X, T, k, c)
# m = size(X,1)+1;
# while(~isempty(k))
# % Get the children of nodes at this level
# children = X(k,1:2);
# children = children(:);
# % Assign this node number to leaf children
# t = (children<=m);
# T(children(t)) = c;
# % Move to next level
# k = children(~t) - m;
# end
# %----------------------------------------------------------------------------------------
# function [Z, distances]=getDistances(data_matrix,nclusters)
# %finds initial admixture clustering solution with nclusters clusters, uses simple mean Hamming distance
# %gives partition in 8-bit format
# %allocates all alleles of a single individual into the same basket
# %data_matrix contains #Loci+1 columns, last column indicate whose alleles are placed in each row,
# %i.e. ranges from 1 to #individuals. For diploids there are 2 rows per individual, for haploids only a single row
# %missing values are indicated by zeros in the partition and by negative integers in the data_matrix.
# size_data=size(data_matrix);
# nloci=size_data(2)-1;
# n=max(data_matrix(:,end));
# distances=zeros(nchoosek(n,2),1);
# pointer=1;
# for i=1:n-1
# i_data=data_matrix(find(data_matrix(:,end)==i),1:nloci);
# for j=i+1:n
# d_ij=0;
# j_data=data_matrix(find(data_matrix(:,end)==j),1:nloci);
# vertailuja = 0;
# for k=1:size(i_data,1)
# for l=1:size(j_data,1)
# here_i=find(i_data(k,:)>=0);
# here_j=find(j_data(l,:)>=0);
# here_joint=intersect(here_i,here_j);
# vertailuja = vertailuja + length(here_joint);
# d_ij = d_ij + length(find(i_data(k,here_joint)~=j_data(l,here_joint)));
# end
# end
# d_ij = d_ij / vertailuja;
# distances(pointer)=d_ij;
# pointer=pointer+1;
# end
# end
# Z=linkage(distances');

View file

@ -1,16 +1,30 @@
#' @title Convert Matlab function to R
#' @description Performs basic syntax conversion from Matlab to R
#' @param filename name of the file
#' @param output can be "asis", "clean" (default) or "save"
#' @param output can be "asis", "clean" (default), "save" or "append"
#' @param improve_formatting if `TRUE` (default), makes minor changes
#' to conform to best-practice formatting conventions
#' @param change_assignment if `TRUE` (default), uses `<-` as the assignment operator
#' @param append if `FALSE` (default), overwrites file; otherwise, append
#' output to input
#' @return text converted to R, printed to screen or replacing input file
#' @author Waldir Leoncio
#' @importFrom utils write.table
#' @export
#' @note This function is intended to expedite the process of converting a
#' Matlab function to R by making common replacements. It does not have the
#' immediate goal of outputting a ready-to-use function. In other words,
#' after using this function you should go back to it and make minor changes.
#'
#' It is also advised to do a dry-run with `output = "clean"` and only switching
#' to `output = "save"` when you are confident that no important code will be
#' lost (for shorter functions, a careful visual inspection should suffice).
matlab2r <- function(
filename, output = "clean", improve_formatting=TRUE
filename, output = "clean", improve_formatting=TRUE, change_assignment=TRUE,
append=FALSE
) {
# TODO: this function is too long! Split into subfunctions
# (say, by rule and/or section)
# ======================================================== #
# Verification #
# ======================================================== #
@ -30,10 +44,10 @@ matlab2r <- function(
# Function header ---------------------------------------- #
out <- gsub(
pattern = "\\t*function (.+)\\s*=\\s*(.+)\\((.+)\\)",
pattern = "\\t*function (\\S+)\\s*=\\s*(.+)\\((.+)\\)",
replacement = "\treturn(\\1)",
x = txt[1]
)
) # TODO: improve by detecting listed outputs
txt <- gsub(
pattern = "\\t*function (.+)\\s*=\\s*(.+)\\((.+)\\)",
replacement = "\\2 <- function(\\3) {",
@ -59,9 +73,19 @@ matlab2r <- function(
# MATLAB-equivalent functions in R
txt <- gsub("gamma_ln", "log_gamma", txt)
txt <- gsub("nchoosek", "choose", txt)
txt <- gsub("isempty", "is.null", txt)
# txt <- gsub("(.+)\\'", "t(\\1)", txt)
# Subsets ------------------------------------------------ #
txt <- gsub("([^\\(]+)\\((.+)\\)\\s?=(.+)", "\\1[\\2] <- \\3", txt)
ass_op <- ifelse(change_assignment, "<-", "=")
txt <- gsub(
pattern = "([^\\(]+)\\(([^\\(]+)\\)=(.+)",
replacement = paste0("\\1[\\2] ", ass_op, "\\3"),
x = txt
)
txt <- gsub("\\(:\\)", "[, ]", txt)
txt <- gsub("(.+)(\\[|\\():,end(\\]|\\()", "\\1[, ncol()]", txt)
# Formatting --------------------------------------------- #
if (improve_formatting) {
@ -71,9 +95,21 @@ matlab2r <- function(
txt <- gsub("(\\S)\\-(\\S)", "\\1 - \\2", txt)
txt <- gsub("(\\S)\\*(\\S)", "\\1 * \\2", txt)
# Logic operators
txt <- gsub("\\(~", "(!", txt)
txt <- gsub("~", "!", txt)
txt <- gsub("(\\S)>=(\\S)", "\\1 >= \\2", txt)
txt <- gsub("(\\S)<=(\\S)", "\\1 <= \\2", txt)
txt <- gsub("(\\S)==(\\S)", "\\1 == \\2", txt)
# Assignment
txt <- gsub("(.+)\\s?=\\s?(.+)", "\\1 <- \\2", txt)
txt <- gsub(
pattern = "(\\w)(\\s?)=(\\s?)(\\w)",
replacement = paste0("\\1 ", ass_op, " \\4"),
x = txt
)
# txt <- gsub(
# pattern = "(\\s+(.|\\_|\\[|\\])+)(\\s?)=(\\s?)(.+)",
# replacement = paste0("\\1 ", ass_op, "\\5"),
# x = txt
# )
}
# Adding output and end-of-file brace -------------------- #
@ -84,15 +120,18 @@ matlab2r <- function(
return(txt)
} else if (output == "clean") {
return(cat(txt, sep="\n"))
} else {
} else if (output == "save") {
return(
write.table(
x = txt,
file = filename,
quote = FALSE,
row.names = FALSE,
col.names = FALSE
col.names = FALSE,
append = append
)
)
} else {
stop ("Invalid output argument")
}
}

7
R/rand_disc.R Normal file
View file

@ -0,0 +1,7 @@
rand_disc <- function(CDF) {
# %returns an index of a value from a discrete distribution using inversion method
slump <- rand
har <- find(CDF > slump)
svar <- har(1)
return(svar)
}

View file

@ -4,15 +4,26 @@
\alias{matlab2r}
\title{Convert Matlab function to R}
\usage{
matlab2r(filename, output = "clean", improve_formatting = TRUE)
matlab2r(
filename,
output = "clean",
improve_formatting = TRUE,
change_assignment = TRUE,
append = FALSE
)
}
\arguments{
\item{filename}{name of the file}
\item{output}{can be "asis", "clean" (default) or "save"}
\item{output}{can be "asis", "clean" (default), "save" or "append"}
\item{improve_formatting}{if `TRUE` (default), makes minor changes
to conform to best-practice formatting conventions}
\item{change_assignment}{if `TRUE` (default), uses `<-` as the assignment operator}
\item{append}{if `FALSE` (default), overwrites file; otherwise, append
output to input}
}
\value{
text converted to R, printed to screen or replacing input file
@ -20,6 +31,16 @@ text converted to R, printed to screen or replacing input file
\description{
Performs basic syntax conversion from Matlab to R
}
\note{
This function is intended to expedite the process of converting a
Matlab function to R by making common replacements. It does not have the
immediate goal of outputting a ready-to-use function. In other words,
after using this function you should go back to it and make minor changes.
It is also advised to do a dry-run with `output = "clean"` and only switching
to `output = "save"` when you are confident that no important code will be
lost (for shorter functions, a careful visual inspection should suffice).
}
\author{
Waldir Leoncio
}