Merge branch 'issue-3' into develop

2022-12-22 15:06:13 +01:00 · 2022-12-22 15:06:13 +01:00 · ffca7ea3c4
commit ffca7ea3c4
parent b0eccc5e6b a3e51c1577
19 changed files with 478 additions and 2 deletions
--- a/4
+++ b/4
@ -36,8 +36,8 @@ Description: Partial R implementation of the BAPS software
 License: GPL-3
 BugReports: https://github.com/ocbe-uio/rBAPS/issues
 Encoding: UTF-8
-RoxygenNote: 7.2.1
+RoxygenNote: 7.2.3
 Suggests:
    testthat (>= 2.1.0)
 Imports:
-    methods, ape, vcfR, Rsamtools, adegenet, matlab2r, R6
+    methods, ape, vcfR, Rsamtools, adegenet, matlab2r, R6, zeallot
--- a/1
+++ b/1
@ -74,3 +74,4 @@ importFrom(stats,runif)
 importFrom(stats,sd)
 importFrom(utils,read.delim)
 importFrom(vcfR,read.vcfR)
 importFrom(zeallot,"%<-%")
--- a/R/argmin.R
+++ b/R/argmin.R
@ -0,0 +1,12 @@
 argmin <- function(v) {
  #  ARGMIN Return as a subscript vector the location of the smallest element of a multidimensional array v.
  #  indices <- argmin(v)
  #  Returns the first minimum in the case of ties.
  #  Example:
  #  X = [2 8 4 7 3 9]
  #  argmin[X] <- [1 1], i.e., row 1 column 1
  m <- i <- NA
  c(m, i) %<-% matlab2r::min(v)
  indices <- ind2subv(mysize(v), i)
  return(indices)
 }
--- a/R/cliques_to_jtree.R
+++ b/R/cliques_to_jtree.R
@ -0,0 +1,49 @@
 cliques_to_jtree <- function(cliques, ns) {
  #  MK_JTREE Make an optimal junction tree.
  #  [jtree, root, B, w] = mk_jtree(cliques, ns)
  #  A junction tree is a tree that satisfies the jtree property, which says:
  #  for each pair of cliques U, V with intersection S, all cliques on the path between U and V
  #  contain S. (This ensures that local propagation leads to # global consistency.)
  #  We can create a junction tree by computing the maximal spanning tree of the junction graph.
  #  (The junction graph connects all cliques, and the weight of an edge (i, j) is
  #  |C(i) intersect C(j)|, where C(i) is the i'th clique.)
  #  The best jtree is the maximal spanning tree which minimizes the sum of the costs on each edge,
  #  where cost[i, j] <- w(C(i)) + w(C(j)), and w(C) is the weight of clique C,
  #  which is the total number of values C can take on.
  #  For details, see
  #   - Jensen and Jensen, "Optimal Junction Trees", UAI 94.
  #  Input:
  #   cliques{i} = nodes in clique i
  #   ns[i] <- number of values node i can take on
  #  Output:
  #   jtree[i, j] <- 1 iff cliques i and j aer connected
  #   root <- the clique that should be used as root
  #   B[i, j] <- 1 iff node j occurs in clique i
  #   w[i] <- weight of clique i
  num_cliques <- length(cliques)
  w <- zeros(num_cliques, 1)
  B <- zeros(num_cliques, 1)
  for (i in 1:num_cliques) {
    B[i, cliques[[i]]] <- 1
    w[i] <- prod(ns(cliques[[i]]))
  }
  #  C1[i, j] <- length(intersect(cliques{i}, cliques{j}))
  #  The length of the intersection of two sets is the dot product of their bit vector representation.
  C1 <- B %*% t(B)
  C1 <- setdiag(C1, 0)
  #  C2[i, j] <- w(i) + w(j)
  num_cliques <- length(w)
  W <- repmat(w, c(1, num_cliques))
  C2 <- W + t(W)
  C2 <- setdiag(C2, 0)
  jtree <- zeros(minimum_spanning_tree(-C1, C2))# Using - C1 gives * maximum * spanning tree
  #  The root is arbitrary, but since the first pass is towards the root,
  #  we would like this to correspond to going forward in time in a DBN.
  root <- num_cliques
  return(list("jtree" = jtree, "root" = root, "B" = B, "w" = w))
 }
--- a/R/elim_order.R
+++ b/R/elim_order.R
@ -0,0 +1,67 @@
 elim_order <- function(G, node_sizes) {
  #  BEST_FIRST_ELIM_ORDER Greedily search for an optimal elimination order.
  #  order <- best_first_elim_order(moral_graph, node_sizes)
  #  Find an order in which to eliminate nodes from the graph in such a way as to try and minimize the
  #  weight of the resulting triangulated graph.  The weight of a graph is the sum of the weights of each
  #  of its cliques the weight of a clique is the product of the weights of each of its members the
  #  weight of a node is the number of values it can take on.
  #  Since this is an NP - hard problem, we use the following greedy heuristic:
  #  at each step, eliminate that node which will result in the addition of the least
  #  number of fill - in edges, breaking ties by choosing the node that induces the lighest clique.
  #  For details, see
  #  - Kjaerulff, "Triangulation of graphs  - - algorithms giving small total state space",
  #       Univ. Aalborg tech report, 1990 (www.cs.auc.dk/!uk)
  #  - C. Huang and A. Darwiche, "Inference in Belief Networks: A procedural guide",
  #       Intl. J. Approx. Reasoning, 11, 1994
  #  Warning: This code is pretty old and could probably be made faster.
  n <- length(G)
  # if (nargin < 3, stage = { 1:n } end# no constraints) {
  #  For long DBNs, it may be useful to eliminate all the nodes in slice t before slice t + 1.
  #  This will ensure that the jtree has a repeating structure (at least away from both edges).
  #  This is why we have stages.
  #  See the discussion of splicing jtrees on p68 of
  #  Geoff Zweig's PhD thesis, Dept. Comp. Sci., UC Berkeley, 1998.
  #  This constraint can increase the clique size significantly.
  MG <- G# copy the original graph
  uneliminated <- ones(1, n)
  order <- zeros(1, n)
  # t <- 1 # Counts which time slice we are on
  for (i in 1:n) {
    U <- find(uneliminated)
    # valid <- myintersect(U, stage{t})
    valid <- U
    # Choose the best node from the set of valid candidates
    min_fill <- zeros(1, length(valid))
    min_weight <- zeros(1, length(valid))
    for (j in 1:length(valid)) {
      k <- valid(j)
      nbrs <- myintersect(neighbors(G, k), U)
      l <- length(nbrs)
      M <- MG[nbrs, nbrs]
      min_fill[j] <- l^2 - sum(M)# num. added edges
      min_weight[j] <- prod(node_sizes[k, nbrs])# weight of clique
    }
    lightest_nbrs <- find(min_weight == min(min_weight))
    # break ties using min - fill heuristic
    best_nbr_ndx <- argmin(min_fill[lightest_nbrs])
    j <- lightest_nbrs[best_nbr_ndx] # we will eliminate the j'th element of valid
    # j1s <- find(score1 == min(score1))
    # j <- j1s(argmin(score2(j1s)))
    k <- valid(j)
    uneliminated[k] <- 0
    order[i] <- k
    ns <- myintersect(neighbors(G, k), U)
    if (!is.null(ns)) {
      G[ns, ns] <- 1
      G <- setdiag(G, 0)
    }
    # if (!any(as.logical(uneliminated(stage{t})))# are we allowed to the next slice?) {
    #   t <- t + 1
    # }
  }
  return(order)
 }
--- a/R/findCliques.R
+++ b/R/findCliques.R
@ -0,0 +1,48 @@
 findCliques <- function(M) {
  # Muuttaa graafin M kolmioituvaksi ja laskee siitה klikit ja
  # separaattorit.
  # Hyצdynnetההn Kevin Murphyn algoritmeja Graph Theory toolboxista.
  # Pהivitetty 12.8.2005
  order <- elim_order(M, ones(length(M)))
  G <- cliques <- root <- NULL
  c(G, cliques) %<-% triangulate(M, order)
  c(jtree, root) %<-% cliques_to_jtree(cliques, ones(length(M)))
  ncliq <- length(cliques)
  separators <- cell(ncliq - 1, 1)    # n - solmuisessa puussa n - 1 viivaa
  jono <- zeros(length(ncliq))
  jono[1] <- root
  i <- 1
  pointer <- 2                     # Seuraava tyhjה paikka
  while (!is.null(find(jono != 0))) {  # Puun leveyssuuntainen lהpikהynti) {
    lapset <- find(jtree[jono[i], ] != 0)
    jtree[, jono[i]] <- 0         # Klikki kהsitelty
    jono[pointer:(pointer + length(lapset) - 1)] <- lapset
    for (j in 1:length(lapset)) {
      ehdokas <- myintersect(cliques[[jono[i]]], cliques[[lapset[j]]])
      kelpaa <- 1
      for (k in 1:(pointer + j - 3)) {
        # Tutkitaan, ettה separaattoriehdokasta ei vielה kהsitelty
        if (ehdokas == separators[[k]]) {
          kelpaa <- 0
        }
      }
      if (kelpaa) {
        separators[[pointer + j - 2]] <- ehdokas
      }
    }
    jono[i] <- 0
    pointer <- pointer + length(lapset)
    i <- i + 1
  }
  notEmpty <- zeros(ncliq - 1, 1)
  for (i in 1:(ncliq - 1)) {
    if (!is.null(separators[[i]])) {
      notEmpty[i] <- 1
    }
  }
  notEmpty <- find(notEmpty == 1)
  separators <- separators(notEmpty)
  return(list("cliques" = cliques, "separators" = separators, "G" = G))
 }
--- a/R/ind2subv.R
+++ b/R/ind2subv.R
@ -0,0 +1,38 @@
 ind2subv <- function(siz, ndx) stop("Needs translation")
 # function sub = ind2subv(siz, ndx)
 # % IND2SUBV Like the built-in ind2sub, but returns the answer as a row vector.
 # % sub = ind2subv(siz, ndx)
 # %
 # % siz and ndx can be row or column vectors.
 # % sub will be of size length(ndx) * length(siz).
 # %
 # % Example
 # % ind2subv([2 2 2], 1:8) returns
 # %  [1 1 1
 # %   2 1 1
 # %   ...
 # %   2 2 2]
 # % That is, the leftmost digit toggle fastest.
 # %
 # % See also SUBV2IND
 # n = length(siz);
 # if n==0
 #   sub = ndx;
 #   return;
 # end
 # if all(siz==2)
 #   sub = dec2bitv(ndx-1, n);
 #   sub = sub(:,n:-1:1)+1;
 #   return;
 # end
 # cp = [1 cumprod(siz(:)')];
 # ndx = ndx(:) - 1;
 # sub = zeros(length(ndx), n);
 # for i = n:-1:1 % i'th digit
 #   sub(:,i) = floor(ndx/cp(i))+1;
 #   ndx = rem(ndx,cp(i));
 # end
--- a/R/laskeKlitik.R
+++ b/R/laskeKlitik.R
@ -0,0 +1,28 @@
 laskeKlikit <- function(M, maxCliqSize, maxSepSize) {
  # Laskee samankokoisten klikkien mההrהn verkosta M
  # ncliques(i)=kokoa i olevien klikkien mההr?
  # nseparators vastaavasti
  ncliques <- zeros(1, maxCliqSize)
  nseparators <- zeros(1, maxSepSize)
  if (M == c()) {
    return()
  }
  cliques_separators <- findCliques(M)
  cliques <- cliques_separators$cliques
  separators <- cliques_separators$separators
  rm(cliques_separators)
  for (i in 1:length(cliques)) {
    ncliques[length[cliques[[i]]]] <- ncliques[length(cliques[[i]])] + 1
  }
  # cliqmax=max(find(ncliques!=0))
  # ncliques=ncliques(1:cliqmax)
  for (i in 1:length(separators)) {
    nseparators[length[separators[[i]]]] <- nseparators[length(separators[[i]])] + 1
  }
  return(
    list(
      ncliques = ncliques, nseparators = nseparators, cliques = cliques,
      separators = separators
    )
  )
 }
--- a/R/minimum_spanning_tree.R
+++ b/R/minimum_spanning_tree.R
@ -0,0 +1,51 @@
 minimum_spanning_tree <- function(C1, C2) stop("needs translation")
 # function A = minimum_spanning_tree(C1, C2)
 # %
 # % Find the minimum spanning tree using Prim's algorithm.
 # % C1(i,j) is the primary cost of connecting i to j.
 # % C2(i,j) is the (optional) secondary cost of connecting i to j, used to break ties.
 # % We assume that absent edges have 0 cost.
 # % To find the maximum spanning tree, used -1*C.
 # % See Aho, Hopcroft & Ullman 1983, "Data structures and algorithms", p 237.
 # % Prim's is O(V^2). Kruskal's algorithm is O(E log E) and hence is more efficient
 # % for sparse graphs, but is implemented in terms of a priority queue.
 # % We partition the nodes into those in U and those not in U.
 # % closest(i) is the vertex in U that is closest to i in V-U.
 # % lowcost(i) is the cost of the edge (i, closest(i)), or infinity is i has been used.
 # % In Aho, they say C(i,j) should be "some appropriate large value" if the edge is missing.
 # % We set it to infinity.
 # % However, since lowcost is initialized from C, we must distinguish absent edges from used nodes.
 # n = length(C1);
 # if nargin==1, C2 = zeros(n); end
 # A = zeros(n);
 # closest = ones(1,n);
 # used = zeros(1,n); % contains the members of U
 # used(1) = 1; % start with node 1
 # C1(find(C1==0))=inf;
 # C2(find(C2==0))=inf;
 # lowcost1 = C1(1,:);
 # lowcost2 = C2(1,:);
 # for i=2:n
 #   ks = find(lowcost1==min(lowcost1));
 #   k = ks(argmin(lowcost2(ks)));
 #   A(k, closest(k)) = 1;
 #   A(closest(k), k) = 1;
 #   lowcost1(k) = inf;
 #   lowcost2(k) = inf;
 #   used(k) = 1;
 #   NU = find(used==0);
 #   for ji=1:length(NU)
 #     for j=NU(ji)
 #       if C1(k,j) < lowcost1(j)
 # 	lowcost1(j) = C1(k,j);
 # 	lowcost2(j) = C2(k,j);
 # 	closest(j) = k;
 #       end
 #     end
 #   end
 # end
--- a/R/myintersect.R
+++ b/R/myintersect.R
@ -0,0 +1,29 @@
 myintersect <- function(A, B) {
  #  MYINTERSECT Intersection of two sets of positive integers (much faster than built - in intersect)
  #  C <- myintersect(A, B)
  A <- t(A)
  B <- t(B)
  if (is.null(A)) {
    ma <- 0
  } else {
    ma <- max(A)
  }
  if (is.null(B)) {
    mb <- 0
  } else {
    mb <- max(B)
  }
  if (ma == 0 | mb == 0) {
    C <- vector()
  } else {
    # bits <- sparse(1, max(ma, mb))
    bits <- zeros(1, max(ma, mb))
    bits[A] <- 1
    C <- B[as.logical(bits[B])]
  }
  return(C)
 }
--- a/R/myisvector.R
+++ b/R/myisvector.R
@ -0,0 +1,7 @@
 myisvector <- function(V) {
  # Kuten isvector(V)
  A <- size(V)
  r <- (length(A) == 2) & (min(A) == 1)
  return(r)
 }
--- a/R/mysize.R
+++ b/R/mysize.R
@ -0,0 +1,17 @@
 mysize <- function(M) {
  #  MYSIZE Like the built - in size, except it returns n if (M is a vector of length n, and 1 if M is a scalar.) {
  #  sz <- mysize(M)
  #  The behavior is best explained by examples
  #  - M <- rand(1, 1),   mysize[M] <- 1,      size(M) = [1 1]
  #  - M <- rand(2, 1),   mysize[M] <- 2,      size(M) = [2 1]
  #  - M <- rand(1, 2),   mysize[M] <- 2,      size(M) = [1 2]
  #  - M <- rand(2, 2,1), mysize[M] <- [2 2],  size(M) = [2 2]
  #  - M <- rand(1, 2,1), mysize[M] <- 2,      size(M) = [1 2]
  if (myisvector(M)) {
    sz <- length(M)
  } else {
    sz <- size(M)
  }
  return(sz)
 }
--- a/R/mysubset.R
+++ b/R/mysubset.R
@ -0,0 +1,13 @@
 mysubset <- function(small, large) {
  #  MYSUBSET Is the small set of + ve integers a subset of the large set?
  #  p <- mysubset(small, large)
  #  Surprisingly, this is not built - in.
  if (is.null(small)) {
    p <- 1# is.null(large)
  } else {
    p <- length(myintersect(small, large)) == length(small)
  }
  return(p)
 }
--- a/R/myunion.R
+++ b/R/myunion.R
@ -0,0 +1,31 @@
 myunion <- function(A, B) {
  #  MYUNION Union of two sets of positive integers (much faster than built - in union)
  #  C <- myunion(A, B)
  if (is.null(A)) {
    ma <- 0
  } else {
    ma <- max(A)
  }
  if (is.null(B)) {
    mb <- 0
  } else {
    mb <- max(B)
  }
  if (ma == 0 & mb == 0) {
    C <- vector()
  } else if (ma == 0 & mb > 0) {
    C <- B
  } else if (ma > 0 & mb == 0) {
    C <- A
  } else {
    # bits <- sparse(1, max(ma, mb))
    bits <- zeros(1, max(c(ma, mb)))
    bits[A] <- 1
    bits[B] <- 1
    C <- find(bits)
  }
  return(C)
 }
--- a/R/neighbors.R
+++ b/R/neighbors.R
@ -0,0 +1,8 @@
 neighbors <- function(adj_mat, i) {
  #  NEIGHBORS Find the parents and children of a node in a graph.
  #  ns <- neighbors(adj_mat, i)
  # ns <- myunion(children(adj_mat, i), parents(adj_mat, i))
  ns <- find(adj_mat[i, ])
  return(ns)
 }
--- a/R/rBAPS-package.R
+++ b/R/rBAPS-package.R
@ -9,4 +9,5 @@
 #' isempty isfield isspace max min ones questdlg rand repmat reshape
 #' size sortrows squeeze strcmp times zeros disp
 #' @importFrom stats runif
 #' @importFrom zeallot %<-%
 NULL
--- a/R/setdiag.R
+++ b/R/setdiag.R
@ -0,0 +1,19 @@
 setdiag <- function(M, v) {
  #  SETDIAG Set the diagonal of a matrix to a specified scalar/vector.
  #  M <- set_diag(M, v)
  n <- length(M)
  if (length(v) == 1) {
    v <- repmat(v, c(1, n))
  }
  #  e.g., for 3x3 matrix,  elements are numbered
  #  1 4 7
  #  2 5 8
  #  3 6 9
  #  so diagnoal = [1 5 9]
  J <- seq(1, n ^ 2, n + 1)
  M[J] <- v
  return(M)
 }
--- a/R/triangulate.R
+++ b/R/triangulate.R
@ -0,0 +1,39 @@
 triangulate <- function(G, order) {
  #  TRIANGULATE Ensure G is triangulated (chordal), i.e., every cycle of length > 3 has a chord.
  #  [G, cliques, fill_ins, cliques_containing_node] = triangulate(G, order)
  #  cliques{i} is the i'th maximal complete subgraph of the triangulated graph.
  #  fill_ins[i, j] <- 1 iff we add a fill - in arc between i and j.
  #  To find the maximal cliques, we save each induced cluster (created by adding connecting
  #  neighbors) that is not a subset of any previously saved cluster. (A cluster is a complete,
  #  but not necessarily maximal, set of nodes.)
  MG <- G
  n <- length(G)
  eliminated <- zeros(1, n)
  cliques = list()
  for (i in 1:n) {
    u <- order[i]
    U <- find(!eliminated)# uneliminated
    nodes <- myintersect(neighbors(G, u), U)# look up neighbors in the partially filled - in graph
    nodes <- myunion(nodes, u)# the clique will always contain at least u
    G[nodes, nodes] <- 1# make them all connected to each other
    G <- setdiag(G, 0)
    eliminated[u] <- 1
    exclude <- 0
    for (c in 1:length(cliques)) {
      if (mysubset(nodes, cliques[[c]])) { # not maximal)
        exclude <- 1
        break
      }
    }
    if (!exclude) {
      cnum <- length(cliques) + 1
      cliques[[cnum]] <- nodes
    }
  }
  # fill_ins <- sparse(triu(max(0, G - MG), 1))
  fill_ins <- 1
  return(list("G" = G, "cliques" = cliques, "fill_ins" = fill_ins))
 }
--- a/tests/testthat/test-spatialMixture.R
+++ b/tests/testthat/test-spatialMixture.R
@ -33,3 +33,21 @@ test_that("testaaKoordinaatit works as expected", {
      )
  )
 })
 test_that("lakseKlitik() and subfunctions produce expected output", {
  # TODO: test elim_order()
  # TODO: test triangulate()
  # TODO: test neighbors()
  # TODO: test myintersect()
  # TODO: test mysubset()
  # TODO: test findCliques()
  # TODO: test cliques_to_jtree()
  # TODO: test minimum_spanning_tree()
  # TODO: test myunion()
  # TODO: test argmin()
  # TODO: test mysize()
  # TODO: test ind2subv()
  # TODO: test myisvector()
  # TODO: ... and anythin left from findCliques.m
  # TODO: test lakseKlitik()
 })