Added source Matlab code for reference
This commit is contained in:
parent
b8af977117
commit
b5d99903d2
186 changed files with 61405 additions and 1 deletions
86
matlab/spatial/addPoints.m
Normal file
86
matlab/spatial/addPoints.m
Normal file
|
|
@ -0,0 +1,86 @@
|
|||
function uusiData=addPoints(data)
|
||||
%Lisää koordinaatipisteiden joukkoon pisteitä, jotta jokainen datapiste
|
||||
%kuuluisi äärelliseen voronoi soluun voronoi tessellaatiota
|
||||
%muodostettaessa. Apupisteet lisätään muodostamalla hila
|
||||
%koordinaattipisteiden päälle ja ottamalla voronoi tessellaatio hilasta. Ne
|
||||
%hilan pisteet, joita vastaavien solujen sisällä ei ole yhtään
|
||||
%koordinaattipistettä, jäävät apupisteiksi
|
||||
|
||||
x = data(:,1);
|
||||
y = data(:,2);
|
||||
|
||||
xmax = max(x);
|
||||
xmin = min(x);
|
||||
ymax = max(y);
|
||||
ymin = min(y);
|
||||
|
||||
npist = size(unique(data, 'rows'),1);
|
||||
nstep = ceil(npist^0.4) + 7;
|
||||
xstep = (xmax-xmin)/(nstep-7);
|
||||
ystep = (ymax-ymin)/(nstep-7);
|
||||
|
||||
apuPisteet = zeros(nstep^2,2);
|
||||
|
||||
for i=1:nstep
|
||||
apuPisteet((i-1)*nstep+1 : i*nstep,1) = xmin + (i-4)*xstep;
|
||||
apuPisteet((i-1)*nstep+1 : i*nstep,2) = ymin + ((1:nstep)-4)*ystep;
|
||||
end
|
||||
|
||||
|
||||
|
||||
[V,C] = voronoin(apuPisteet,{'Qt','Qbb','Qc','Qz'});
|
||||
|
||||
if 0
|
||||
figure
|
||||
hold on
|
||||
for i=1:length(C)
|
||||
if isempty(find(C{i} == 1))
|
||||
X = V(C{i},:);
|
||||
hull = convhull(X(:,1),X(:,2));
|
||||
plot(X(hull,1), X(hull,2));
|
||||
end
|
||||
end
|
||||
axis([-2 7 -2 8]);
|
||||
plot(data(:,1), data(:,2), 'r*');
|
||||
plot(apuPisteet(:,1), apuPisteet(:,2), 'b+');
|
||||
|
||||
hold off
|
||||
end
|
||||
empty = zeros(nstep^2,1);
|
||||
|
||||
for i = 1:length(C)
|
||||
if isempty(find(C{i} == 1)) %Tutkitaan vain rajoitetut solut
|
||||
vx = V(C{i},1);
|
||||
vy = V(C{i},2);
|
||||
IN = any(inpolygon(x,y,vx,vy));
|
||||
if IN == 0
|
||||
empty(i) = 1;
|
||||
end
|
||||
|
||||
end
|
||||
end
|
||||
|
||||
empty = find(empty == 1);
|
||||
C = C(empty);
|
||||
|
||||
apuPisteet = apuPisteet(empty, :);
|
||||
|
||||
if 0
|
||||
figure
|
||||
hold on
|
||||
for i=1:length(C)
|
||||
if isempty(find(C{i} == 1))
|
||||
X = V(C{i},:);
|
||||
hull = convhull(X(:,1),X(:,2));
|
||||
plot(X(hull,1), X(hull,2));
|
||||
end
|
||||
end
|
||||
plot(data(:,1), data(:,2), 'r*');
|
||||
plot(apuPisteet(:,1), apuPisteet(:,2), 'b+');
|
||||
axis([-2 7 -2 8]);
|
||||
hold off
|
||||
end
|
||||
|
||||
uusiData = [data; apuPisteet];
|
||||
|
||||
|
||||
509
matlab/spatial/findCliques.m
Normal file
509
matlab/spatial/findCliques.m
Normal file
|
|
@ -0,0 +1,509 @@
|
|||
function [cliques, separators, G] = findCliques(M)
|
||||
%Muuttaa graafin M kolmioituvaksi ja laskee siitä klikit ja
|
||||
%separaattorit.
|
||||
%Hyödynnetään Kevin Murphyn algoritmeja Graph Theory toolboxista.
|
||||
%Päivitetty 12.8.2005
|
||||
|
||||
order=elim_order(M,ones(length(M)));
|
||||
[G,cliques]=triangulate(M,order);
|
||||
[jtree,root]=cliques_to_jtree(cliques,ones(length(M)));
|
||||
ncliq=length(cliques);
|
||||
separators=cell(ncliq-1,1); %n-solmuisessa puussa n-1 viivaa
|
||||
|
||||
jono=zeros(length(ncliq));
|
||||
jono(1)=root;
|
||||
i=1;
|
||||
pointer=2; %Seuraava tyhjä paikka
|
||||
|
||||
while ~isempty(find(jono~=0)) %Puun leveyssuuntainen läpikäynti
|
||||
lapset=find(jtree(jono(i),:)~=0);
|
||||
jtree(:,jono(i))=0; %Klikki käsitelty
|
||||
jono(pointer:pointer+length(lapset)-1)=lapset;
|
||||
for j=1:length(lapset)
|
||||
ehdokas = myintersect(cliques{jono(i)},cliques{lapset(j)});
|
||||
kelpaa = 1;
|
||||
for k = 1:(pointer+j-3)
|
||||
% Tutkitaan, että separaattoriehdokasta ei vielä käsitelty
|
||||
if isequal(ehdokas,separators{k})
|
||||
kelpaa = 0;
|
||||
end
|
||||
end
|
||||
if kelpaa
|
||||
separators{pointer+j-2} = ehdokas;
|
||||
end
|
||||
end
|
||||
jono(i)=0;
|
||||
pointer=pointer+length(lapset);
|
||||
i=i+1;
|
||||
end
|
||||
|
||||
notEmpty=zeros(ncliq-1,1);
|
||||
for i=1:ncliq-1
|
||||
if ~isempty(separators{i})
|
||||
notEmpty(i)=1;
|
||||
end
|
||||
end
|
||||
notEmpty=find(notEmpty==1);
|
||||
separators=separators(notEmpty);
|
||||
|
||||
|
||||
%--------------------------------------------------------------------------
|
||||
%--------------------------------------------------------------------------
|
||||
|
||||
function order = elim_order(G, node_sizes)
|
||||
% BEST_FIRST_ELIM_ORDER Greedily search for an optimal elimination order.
|
||||
% order = best_first_elim_order(moral_graph, node_sizes)
|
||||
%
|
||||
% Find an order in which to eliminate nodes from the graph in such a way as to try and minimize the
|
||||
% weight of the resulting triangulated graph. The weight of a graph is the sum of the weights of each
|
||||
% of its cliques; the weight of a clique is the product of the weights of each of its members; the
|
||||
% weight of a node is the number of values it can take on.
|
||||
%
|
||||
% Since this is an NP-hard problem, we use the following greedy heuristic:
|
||||
% at each step, eliminate that node which will result in the addition of the least
|
||||
% number of fill-in edges, breaking ties by choosing the node that induces the lighest clique.
|
||||
% For details, see
|
||||
% - Kjaerulff, "Triangulation of graphs -- algorithms giving small total state space",
|
||||
% Univ. Aalborg tech report, 1990 (www.cs.auc.dk/~uk)
|
||||
% - C. Huang and A. Darwiche, "Inference in Belief Networks: A procedural guide",
|
||||
% Intl. J. Approx. Reasoning, 11, 1994
|
||||
%
|
||||
|
||||
% Warning: This code is pretty old and could probably be made faster.
|
||||
|
||||
n = length(G);
|
||||
%if nargin < 3, stage = { 1:n }; end % no constraints
|
||||
|
||||
% For long DBNs, it may be useful to eliminate all the nodes in slice t before slice t+1.
|
||||
% This will ensure that the jtree has a repeating structure (at least away from both edges).
|
||||
% This is why we have stages.
|
||||
% See the discussion of splicing jtrees on p68 of
|
||||
% Geoff Zweig's PhD thesis, Dept. Comp. Sci., UC Berkeley, 1998.
|
||||
% This constraint can increase the clique size significantly.
|
||||
|
||||
MG = G; % copy the original graph
|
||||
uneliminated = ones(1,n);
|
||||
order = zeros(1,n);
|
||||
%t = 1; % Counts which time slice we are on
|
||||
for i=1:n
|
||||
U = find(uneliminated);
|
||||
%valid = myintersect(U, stage{t});
|
||||
valid = U;
|
||||
% Choose the best node from the set of valid candidates
|
||||
min_fill = zeros(1,length(valid));
|
||||
min_weight = zeros(1,length(valid));
|
||||
for j=1:length(valid)
|
||||
k = valid(j);
|
||||
nbrs = myintersect(neighbors(G, k), U);
|
||||
l = length(nbrs);
|
||||
M = MG(nbrs,nbrs);
|
||||
min_fill(j) = l^2 - sum(M(:)); % num. added edges
|
||||
min_weight(j) = prod(node_sizes([k nbrs])); % weight of clique
|
||||
end
|
||||
lightest_nbrs = find(min_weight==min(min_weight));
|
||||
% break ties using min-fill heuristic
|
||||
best_nbr_ndx = argmin(min_fill(lightest_nbrs));
|
||||
j = lightest_nbrs(best_nbr_ndx); % we will eliminate the j'th element of valid
|
||||
%j1s = find(score1==min(score1));
|
||||
%j = j1s(argmin(score2(j1s)));
|
||||
k = valid(j);
|
||||
uneliminated(k) = 0;
|
||||
order(i) = k;
|
||||
ns = myintersect(neighbors(G, k), U);
|
||||
if ~isempty(ns)
|
||||
G(ns,ns) = 1;
|
||||
G = setdiag(G,0);
|
||||
end
|
||||
%if ~any(logical(uneliminated(stage{t}))) % are we allowed to the next slice?
|
||||
% t = t + 1;
|
||||
%end
|
||||
end
|
||||
|
||||
%--------------------------------------------------------------------------
|
||||
|
||||
function [G, cliques, fill_ins] = triangulate(G, order)
|
||||
% TRIANGULATE Ensure G is triangulated (chordal), i.e., every cycle of length > 3 has a chord.
|
||||
% [G, cliques, fill_ins, cliques_containing_node] = triangulate(G, order)
|
||||
%
|
||||
% cliques{i} is the i'th maximal complete subgraph of the triangulated graph.
|
||||
% fill_ins(i,j) = 1 iff we add a fill-in arc between i and j.
|
||||
%
|
||||
% To find the maximal cliques, we save each induced cluster (created by adding connecting
|
||||
% neighbors) that is not a subset of any previously saved cluster. (A cluster is a complete,
|
||||
% but not necessarily maximal, set of nodes.)
|
||||
|
||||
MG = G;
|
||||
n = length(G);
|
||||
eliminated = zeros(1,n);
|
||||
cliques = {};
|
||||
for i=1:n
|
||||
u = order(i);
|
||||
U = find(~eliminated); % uneliminated
|
||||
nodes = myintersect(neighbors(G,u), U); % look up neighbors in the partially filled-in graph
|
||||
nodes = myunion(nodes, u); % the clique will always contain at least u
|
||||
G(nodes,nodes) = 1; % make them all connected to each other
|
||||
G = setdiag(G,0);
|
||||
eliminated(u) = 1;
|
||||
|
||||
exclude = 0;
|
||||
for c=1:length(cliques)
|
||||
if mysubset(nodes,cliques{c}) % not maximal
|
||||
exclude = 1;
|
||||
break;
|
||||
end
|
||||
end
|
||||
if ~exclude
|
||||
cnum = length(cliques)+1;
|
||||
cliques{cnum} = nodes;
|
||||
end
|
||||
end
|
||||
|
||||
%fill_ins = sparse(triu(max(0, G - MG), 1));
|
||||
fill_ins=1;
|
||||
|
||||
%--------------------------------------------------------------------------
|
||||
|
||||
function [jtree, root, B, w] = cliques_to_jtree(cliques, ns)
|
||||
% MK_JTREE Make an optimal junction tree.
|
||||
% [jtree, root, B, w] = mk_jtree(cliques, ns)
|
||||
%
|
||||
% A junction tree is a tree that satisfies the jtree property, which says:
|
||||
% for each pair of cliques U,V with intersection S, all cliques on the path between U and V
|
||||
% contain S. (This ensures that local propagation leads to global consistency.)
|
||||
%
|
||||
% We can create a junction tree by computing the maximal spanning tree of the junction graph.
|
||||
% (The junction graph connects all cliques, and the weight of an edge (i,j) is
|
||||
% |C(i) intersect C(j)|, where C(i) is the i'th clique.)
|
||||
%
|
||||
% The best jtree is the maximal spanning tree which minimizes the sum of the costs on each edge,
|
||||
% where cost(i,j) = w(C(i)) + w(C(j)), and w(C) is the weight of clique C,
|
||||
% which is the total number of values C can take on.
|
||||
%
|
||||
% For details, see
|
||||
% - Jensen and Jensen, "Optimal Junction Trees", UAI 94.
|
||||
%
|
||||
% Input:
|
||||
% cliques{i} = nodes in clique i
|
||||
% ns(i) = number of values node i can take on
|
||||
% Output:
|
||||
% jtree(i,j) = 1 iff cliques i and j aer connected
|
||||
% root = the clique that should be used as root
|
||||
% B(i,j) = 1 iff node j occurs in clique i
|
||||
% w(i) = weight of clique i
|
||||
|
||||
|
||||
|
||||
num_cliques = length(cliques);
|
||||
w = zeros(num_cliques, 1);
|
||||
B = sparse(num_cliques, 1);
|
||||
for i=1:num_cliques
|
||||
B(i, cliques{i}) = 1;
|
||||
w(i) = prod(ns(cliques{i}));
|
||||
end
|
||||
|
||||
|
||||
% C1(i,j) = length(intersect(cliques{i}, cliques{j}));
|
||||
% The length of the intersection of two sets is the dot product of their bit vector representation.
|
||||
C1 = B*B';
|
||||
C1 = setdiag(C1, 0);
|
||||
|
||||
% C2(i,j) = w(i) + w(j)
|
||||
num_cliques = length(w);
|
||||
W = repmat(w, 1, num_cliques);
|
||||
C2 = W + W';
|
||||
C2 = setdiag(C2, 0);
|
||||
|
||||
jtree = sparse(minimum_spanning_tree(-C1, C2)); % Using -C1 gives *maximum* spanning tree
|
||||
|
||||
% The root is arbitrary, but since the first pass is towards the root,
|
||||
% we would like this to correspond to going forward in time in a DBN.
|
||||
root = num_cliques;
|
||||
|
||||
%--------------------------------------------------------------------------
|
||||
|
||||
|
||||
function C = myintersect(A,B)
|
||||
% MYINTERSECT Intersection of two sets of positive integers (much faster than built-in intersect)
|
||||
% C = myintersect(A,B)
|
||||
|
||||
A = A(:)'; B = B(:)';
|
||||
|
||||
if isempty(A)
|
||||
ma = 0;
|
||||
else
|
||||
ma = max(A);
|
||||
end
|
||||
|
||||
if isempty(B)
|
||||
mb = 0;
|
||||
else
|
||||
mb = max(B);
|
||||
end
|
||||
|
||||
if ma==0 | mb==0
|
||||
C = [];
|
||||
else
|
||||
%bits = sparse(1, max(ma,mb));
|
||||
bits = zeros(1, max(ma,mb));
|
||||
bits(A) = 1;
|
||||
C = B(logical(bits(B)));
|
||||
end
|
||||
|
||||
%sum( bitget( bitand( cliquesb(i), cliquesb(j) ), 1:52 ) );
|
||||
|
||||
%--------------------------------------------------------------------------
|
||||
|
||||
function ns = neighbors(adj_mat, i)
|
||||
% NEIGHBORS Find the parents and children of a node in a graph.
|
||||
% ns = neighbors(adj_mat, i)
|
||||
|
||||
%ns = myunion(children(adj_mat, i), parents(adj_mat, i));
|
||||
ns = find(adj_mat(i,:));
|
||||
|
||||
%--------------------------------------------------------------------------
|
||||
|
||||
function C = myunion(A,B)
|
||||
% MYUNION Union of two sets of positive integers (much faster than built-in union)
|
||||
% C = myunion(A,B)
|
||||
|
||||
if isempty(A)
|
||||
ma = 0;
|
||||
else
|
||||
ma = max(A);
|
||||
end
|
||||
|
||||
if isempty(B)
|
||||
mb = 0;
|
||||
else
|
||||
mb = max(B);
|
||||
end
|
||||
|
||||
if ma==0 & mb==0
|
||||
C = [];
|
||||
elseif ma==0 & mb>0
|
||||
C = B;
|
||||
elseif ma>0 & mb==0
|
||||
C = A;
|
||||
else
|
||||
%bits = sparse(1, max(ma,mb));
|
||||
bits = zeros(1, max(ma,mb));
|
||||
bits(A) = 1;
|
||||
bits(B) = 1;
|
||||
C = find(bits);
|
||||
end
|
||||
|
||||
%--------------------------------------------------------------------------
|
||||
|
||||
|
||||
function ps = parents(adj_mat, i)
|
||||
% PARENTS Return the list of parents of node i
|
||||
% ps = parents(adj_mat, i)
|
||||
|
||||
ps = find(adj_mat(:,i))';
|
||||
|
||||
%--------------------------------------------------------------------------
|
||||
|
||||
function cs = children(adj_mat, i, t)
|
||||
% CHILDREN Return the indices of a node's children in sorted order
|
||||
% c = children(adj_mat, i, t)
|
||||
%
|
||||
% t is an optional argument: if present, dag is assumed to be a 2-slice DBN
|
||||
|
||||
if nargin < 3
|
||||
cs = find(adj_mat(i,:));
|
||||
else
|
||||
if t==1
|
||||
cs = find(adj_mat(i,:));
|
||||
else
|
||||
ss = length(adj_mat)/2;
|
||||
j = i+ss;
|
||||
cs = find(adj_mat(j,:)) + (t-2)*ss;
|
||||
end
|
||||
end
|
||||
|
||||
%--------------------------------------------------------------------------
|
||||
|
||||
function p=mysubset(small,large)
|
||||
% MYSUBSET Is the small set of +ve integers a subset of the large set?
|
||||
% p = mysubset(small, large)
|
||||
|
||||
% Surprisingly, this is not built-in.
|
||||
|
||||
if isempty(small)
|
||||
p = 1; % isempty(large);
|
||||
else
|
||||
p = length(myintersect(small,large)) == length(small);
|
||||
end
|
||||
|
||||
%--------------------------------------------------------------------------
|
||||
|
||||
function A = minimum_spanning_tree(C1, C2)
|
||||
%
|
||||
% Find the minimum spanning tree using Prim's algorithm.
|
||||
% C1(i,j) is the primary cost of connecting i to j.
|
||||
% C2(i,j) is the (optional) secondary cost of connecting i to j, used to break ties.
|
||||
% We assume that absent edges have 0 cost.
|
||||
% To find the maximum spanning tree, used -1*C.
|
||||
% See Aho, Hopcroft & Ullman 1983, "Data structures and algorithms", p 237.
|
||||
|
||||
% Prim's is O(V^2). Kruskal's algorithm is O(E log E) and hence is more efficient
|
||||
% for sparse graphs, but is implemented in terms of a priority queue.
|
||||
|
||||
% We partition the nodes into those in U and those not in U.
|
||||
% closest(i) is the vertex in U that is closest to i in V-U.
|
||||
% lowcost(i) is the cost of the edge (i, closest(i)), or infinity is i has been used.
|
||||
% In Aho, they say C(i,j) should be "some appropriate large value" if the edge is missing.
|
||||
% We set it to infinity.
|
||||
% However, since lowcost is initialized from C, we must distinguish absent edges from used nodes.
|
||||
|
||||
n = length(C1);
|
||||
if nargin==1, C2 = zeros(n); end
|
||||
A = zeros(n);
|
||||
|
||||
closest = ones(1,n);
|
||||
used = zeros(1,n); % contains the members of U
|
||||
used(1) = 1; % start with node 1
|
||||
C1(find(C1==0))=inf;
|
||||
C2(find(C2==0))=inf;
|
||||
lowcost1 = C1(1,:);
|
||||
lowcost2 = C2(1,:);
|
||||
|
||||
for i=2:n
|
||||
ks = find(lowcost1==min(lowcost1));
|
||||
k = ks(argmin(lowcost2(ks)));
|
||||
A(k, closest(k)) = 1;
|
||||
A(closest(k), k) = 1;
|
||||
lowcost1(k) = inf;
|
||||
lowcost2(k) = inf;
|
||||
used(k) = 1;
|
||||
NU = find(used==0);
|
||||
for ji=1:length(NU)
|
||||
for j=NU(ji)
|
||||
if C1(k,j) < lowcost1(j)
|
||||
lowcost1(j) = C1(k,j);
|
||||
lowcost2(j) = C2(k,j);
|
||||
closest(j) = k;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
%--------------------------------------------------------------------------
|
||||
|
||||
function indices = argmin(v)
|
||||
% ARGMIN Return as a subscript vector the location of the smallest element of a multidimensional array v.
|
||||
% indices = argmin(v)
|
||||
%
|
||||
% Returns the first minimum in the case of ties.
|
||||
% Example:
|
||||
% X = [2 8 4; 7 3 9];
|
||||
% argmin(X) = [1 1], i.e., row 1 column 1
|
||||
|
||||
[m i] = min(v(:));
|
||||
indices = ind2subv(mysize(v), i);
|
||||
|
||||
%--------------------------------------------------------------------------
|
||||
|
||||
function M = setdiag(M, v)
|
||||
% SETDIAG Set the diagonal of a matrix to a specified scalar/vector.
|
||||
% M = set_diag(M, v)
|
||||
|
||||
n = length(M);
|
||||
if length(v)==1
|
||||
v = repmat(v, 1, n);
|
||||
end
|
||||
|
||||
% e.g., for 3x3 matrix, elements are numbered
|
||||
% 1 4 7
|
||||
% 2 5 8
|
||||
% 3 6 9
|
||||
% so diagnoal = [1 5 9]
|
||||
|
||||
|
||||
J = 1:n+1:n^2;
|
||||
M(J) = v;
|
||||
|
||||
%-------------------------------------------------------------------------
|
||||
|
||||
function sz = mysize(M)
|
||||
% MYSIZE Like the built-in size, except it returns n if M is a vector of length n, and 1 if M is a scalar.
|
||||
% sz = mysize(M)
|
||||
%
|
||||
% The behavior is best explained by examples
|
||||
% - M = rand(1,1), mysize(M) = 1, size(M) = [1 1]
|
||||
% - M = rand(2,1), mysize(M) = 2, size(M) = [2 1]
|
||||
% - M = rand(1,2), mysize(M) = 2, size(M) = [1 2]
|
||||
% - M = rand(2,2,1), mysize(M) = [2 2], size(M) = [2 2]
|
||||
% - M = rand(1,2,1), mysize(M) = 2, size(M) = [1 2]
|
||||
|
||||
if myisvector(M)
|
||||
sz = length(M);
|
||||
else
|
||||
sz = size(M);
|
||||
end
|
||||
|
||||
%--------------------------------------------------------------------------
|
||||
|
||||
function sub = ind2subv(siz, ndx)
|
||||
% IND2SUBV Like the built-in ind2sub, but returns the answer as a row vector.
|
||||
% sub = ind2subv(siz, ndx)
|
||||
%
|
||||
% siz and ndx can be row or column vectors.
|
||||
% sub will be of size length(ndx) * length(siz).
|
||||
%
|
||||
% Example
|
||||
% ind2subv([2 2 2], 1:8) returns
|
||||
% [1 1 1
|
||||
% 2 1 1
|
||||
% ...
|
||||
% 2 2 2]
|
||||
% That is, the leftmost digit toggle fastest.
|
||||
%
|
||||
% See also SUBV2IND
|
||||
|
||||
n = length(siz);
|
||||
|
||||
if n==0
|
||||
sub = ndx;
|
||||
return;
|
||||
end
|
||||
|
||||
if all(siz==2)
|
||||
sub = dec2bitv(ndx-1, n);
|
||||
sub = sub(:,n:-1:1)+1;
|
||||
return;
|
||||
end
|
||||
|
||||
cp = [1 cumprod(siz(:)')];
|
||||
ndx = ndx(:) - 1;
|
||||
sub = zeros(length(ndx), n);
|
||||
for i = n:-1:1 % i'th digit
|
||||
sub(:,i) = floor(ndx/cp(i))+1;
|
||||
ndx = rem(ndx,cp(i));
|
||||
end
|
||||
|
||||
%%%%%%%%%%
|
||||
|
||||
function bits = dec2bitv(d,n)
|
||||
% DEC2BITV Convert a decimal integer to a bit vector.
|
||||
% bits = dec2bitv(d,n) is just like the built-in dec2bin, except the answer is a vector, not a string.
|
||||
% n is an optional minimum length on the bit vector.
|
||||
% If d is a vector, each row of the output array will be a bit vector.
|
||||
|
||||
|
||||
if (nargin<2)
|
||||
n=1; % Need at least one digit even for 0.
|
||||
end
|
||||
d = d(:);
|
||||
|
||||
[f,e]=log2(max(d)); % How many digits do we need to represent the numbers?
|
||||
bits=rem(floor(d*pow2(1-max(n,e):0)),2);
|
||||
|
||||
|
||||
%------------------------------------------------------------------------
|
||||
|
||||
function r = myisvector(V)
|
||||
%Kuten isvector(V)
|
||||
|
||||
A = size(V);
|
||||
r = (length(A) == 2) & (min(A) == 1);
|
||||
307
matlab/spatial/handleIndiFastaCase.m
Normal file
307
matlab/spatial/handleIndiFastaCase.m
Normal file
|
|
@ -0,0 +1,307 @@
|
|||
function handleIndiFastaCase(cc,dist,Z)
|
||||
% specicially written to handle FASTA file format of individual clustering
|
||||
% Lu Cheng, 15.12.2012
|
||||
|
||||
OUTPUT_FILE = 'baps6_output.baps';
|
||||
|
||||
teksti = 'Input upper bound to the number of populations (only one value): ';
|
||||
npopstextExtra = inputdlg(teksti ,'Input maximum number of populations',1,{'20'});
|
||||
if isempty(npopstextExtra) % Painettu Cancel:ia
|
||||
return
|
||||
else
|
||||
nMaxPops = str2num(npopstextExtra{1});
|
||||
nMaxPops = nMaxPops(1);
|
||||
end
|
||||
|
||||
initPart = cluster_own(Z,nMaxPops);
|
||||
|
||||
roundTypes = [2*ones(1,nMaxPops) ...
|
||||
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ...
|
||||
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ...
|
||||
3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 ...
|
||||
3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 1 1 1 1 ...
|
||||
1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 ...
|
||||
1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 ...
|
||||
1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4];
|
||||
|
||||
[partition, logml, partitionSummary, logmldiff] = model_search_parallel(cc, initPart, dist, roundTypes);
|
||||
cc.PARTITION = partition;
|
||||
cc.npops = length(unique(partition));
|
||||
cc.logml = logml;
|
||||
cc.partitionSummary = partitionSummary;
|
||||
cc.logmldiff = logmldiff;
|
||||
|
||||
if cc.npops==nMaxPops
|
||||
choice = questdlg(sprintf('%d populations discovered, which is the same as input. We suggest you to set a larger number. Do you want to quit?', cc.npops),...
|
||||
'Yes''No','Yes');
|
||||
if strcmp(choice,'Yes')
|
||||
return
|
||||
end
|
||||
end
|
||||
|
||||
writeMixtureInfo(cc);
|
||||
|
||||
popnames = cc.popnames;
|
||||
pointers = cc.pointers;
|
||||
vorPoints = cc.vorPoints;
|
||||
vorCells = cc.vorCells;
|
||||
coordinates = cc.coordinates;
|
||||
heds = cc.heds;
|
||||
|
||||
viewMixPartition(partition, popnames);
|
||||
|
||||
if isequal(popnames, [])
|
||||
names = pointers;
|
||||
else
|
||||
names = cell(size(pointers));
|
||||
indices = zeros(size(popnames(:,2)));
|
||||
for i=1:length(popnames(:,2));
|
||||
indices(i) = popnames{i,2};
|
||||
end
|
||||
for i = 1:length(pointers)
|
||||
inds = pointers{i};
|
||||
namesInCell = [];
|
||||
for j = 1:length(inds)
|
||||
ind = inds(j);
|
||||
I = find(indices > ind);
|
||||
if isempty(I)
|
||||
nameIndex = length(indices);
|
||||
else
|
||||
nameIndex = min(I) -1;
|
||||
end
|
||||
name = popnames{nameIndex};
|
||||
namesInCell = [namesInCell name];
|
||||
end
|
||||
names{i} = namesInCell;
|
||||
end
|
||||
end
|
||||
vorPlot(vorPoints, vorCells, partition, pointers, coordinates, names);
|
||||
|
||||
talle = questdlg(['Do you want to save the mixture populations ' ...
|
||||
'so that you can use them later in admixture analysis or plot ' ...
|
||||
'additional images?'], ...
|
||||
'Save results?','Yes','No','Yes');
|
||||
if isequal(talle,'Yes')
|
||||
%%waitALittle; % Hetki odotusta, jotta muistaa kysy?..
|
||||
[filename, pathname] = uiputfile('*.mat','Save results as');
|
||||
|
||||
if (filename == 0) & (pathname == 0)
|
||||
% Cancel was pressed
|
||||
return
|
||||
else % copy 'baps4_output.baps' into the text file with the same name.
|
||||
if exist(OUTPUT_FILE,'file')
|
||||
copyfile(OUTPUT_FILE,[pathname filename '.txt'])
|
||||
delete(OUTPUT_FILE)
|
||||
end
|
||||
end
|
||||
|
||||
% added by Lu Cheng, 05.12.2012
|
||||
tmpFile = [pathname filename '.mapfile.txt'];
|
||||
fid = fopen(tmpFile,'w+');
|
||||
fprintf(fid,'Name\tLatitude\tLongitude\tDescription\tLabel\n');
|
||||
if exist('heds','var')
|
||||
for i=1:length(heds)
|
||||
fprintf(fid,'%s\t%.10f\t%.10f\t%s_%d\t%d\n',heds{i},coordinates(i,1),coordinates(i,2),...
|
||||
heds{i},partition(i),partition(i));
|
||||
end
|
||||
else
|
||||
for i=1:ninds
|
||||
fprintf(fid,'%d\t%.10f\t%.10f\t%d_%d\t%d\n',i,coordinates(i,1),coordinates(i,2),...
|
||||
i,partition(i),partition(i));
|
||||
end
|
||||
end
|
||||
fclose(fid);
|
||||
|
||||
% save([pathname filename], 'c');
|
||||
format_type = 'FASTA';
|
||||
save([pathname filename], 'cc','dist','Z','format_type','-v7.3');
|
||||
else
|
||||
if exist(OUTPUT_FILE,'file')
|
||||
delete(OUTPUT_FILE)
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
|
||||
%%%%%%%%%%%%%
|
||||
function writeMixtureInfo(c)
|
||||
|
||||
outputFile = 'baps6_output.baps';
|
||||
|
||||
% output the semi-supervised clustering results to the outputFile
|
||||
% modified by Lu Cheng, 28.03.2010
|
||||
|
||||
ninds = length(c.PARTITION);
|
||||
npops = c.npops;
|
||||
popnames = c.popnames;
|
||||
logml = c.logml;
|
||||
partition = c.PARTITION;
|
||||
partitionSummary = c.partitionSummary;
|
||||
|
||||
if isempty(popnames)
|
||||
popnames = c.heds;
|
||||
end
|
||||
|
||||
if ~isempty(outputFile)
|
||||
fid = fopen(outputFile,'w+');
|
||||
else
|
||||
fid = -1;
|
||||
%diary('baps5_semi_output.baps'); % save in text anyway.
|
||||
end
|
||||
|
||||
dispLine;
|
||||
disp('RESULTS OF INDIVIDUAL LEVEL MIXTURE ANALYSIS:');
|
||||
disp(['Number of clustered individuals: ' ownNum2Str(ninds)]);
|
||||
disp(['Number of groups in optimal partition: ' ownNum2Str(npops)]);
|
||||
disp(['Log(marginal likelihood) of optimal partition: ' ownNum2Str(logml)]);
|
||||
disp(' ');
|
||||
if (fid ~= -1)
|
||||
fprintf(fid,'%10s\n', ['RESULTS OF INDIVIDUAL LEVEL MIXTURE ANALYSIS:']);
|
||||
fprintf(fid,'%20s\n', ['Number of clustered individuals: ' ownNum2Str(ninds)]);
|
||||
fprintf(fid,'%20s\n', ['Number of groups in optimal partition: ' ownNum2Str(npops)]);
|
||||
fprintf(fid,'%20s\n\n', ['Log(marginal likelihood) of optimal partition: ' ownNum2Str(logml)]);
|
||||
end
|
||||
|
||||
disp('Best Partition: ');
|
||||
if (fid ~= -1)
|
||||
fprintf(fid,'%s \n','Best Partition: ');
|
||||
end
|
||||
for m=1:npops
|
||||
indsInM = find(partition==m);
|
||||
|
||||
if isempty(indsInM)
|
||||
continue;
|
||||
end
|
||||
|
||||
length_of_beginning = 11 + floor(log10(m));
|
||||
cluster_size = length(indsInM);
|
||||
|
||||
text = ['Cluster ' num2str(m) ': {' char(popnames{indsInM(1)})];
|
||||
for k = 2:cluster_size
|
||||
text = [text ', ' char(popnames{indsInM(k)})];
|
||||
end;
|
||||
text = [text '}'];
|
||||
|
||||
while length(text)>58
|
||||
%Take one line and display it.
|
||||
new_line = takeLine(text,58);
|
||||
text = text(length(new_line)+1:end);
|
||||
disp(new_line);
|
||||
if (fid ~= -1)
|
||||
fprintf(fid,'%s \n',new_line);
|
||||
end
|
||||
if length(text)>0
|
||||
text = [blanks(length_of_beginning) text];
|
||||
else
|
||||
text = [];
|
||||
end;
|
||||
end;
|
||||
|
||||
if ~isempty(text)
|
||||
disp(text);
|
||||
if (fid ~= -1)
|
||||
fprintf(fid,'%s \n',text);
|
||||
end
|
||||
end;
|
||||
end
|
||||
|
||||
names = true;
|
||||
|
||||
logmldiff = c.logmldiff;
|
||||
if npops == 1
|
||||
logmldiff = [];
|
||||
else
|
||||
disp(' ');
|
||||
disp(' ');
|
||||
disp('Changes in log(marginal likelihood) if indvidual i is moved to cluster j:');
|
||||
|
||||
if (fid ~= -1)
|
||||
fprintf(fid, '%s \n', ' '); fprintf(fid, '\n');
|
||||
fprintf(fid, '%s \n', 'Changes in log(marginal likelihood) if indvidual i is moved to cluster j:'); fprintf(fid, '\n');
|
||||
end
|
||||
|
||||
text = sprintf('%10s','ind');
|
||||
for ii = 1:npops
|
||||
tmpstr = sprintf('\t%10s',num2str(ii));
|
||||
text = [text tmpstr];
|
||||
end
|
||||
|
||||
disp(text);
|
||||
if (fid ~= -1)
|
||||
fprintf(fid, '%s \n', text);
|
||||
end
|
||||
|
||||
for ii = 1:ninds
|
||||
text = sprintf('%10s',popnames{ii});
|
||||
for jj = 1:npops
|
||||
tmpstr = sprintf('\t%10s',num2str(logmldiff(ii,jj),'%10.6f'));
|
||||
text = [text tmpstr];
|
||||
end
|
||||
|
||||
if ii<100
|
||||
disp(text);
|
||||
elseif ii==101
|
||||
disp('.......................................');
|
||||
disp('..........see output file..............');
|
||||
end
|
||||
if (fid ~= -1)
|
||||
fprintf(fid, '%s \n', text);
|
||||
end
|
||||
text = [];
|
||||
end
|
||||
end
|
||||
|
||||
disp(' ');
|
||||
disp(' ');
|
||||
disp('List of sizes of 10 best visited partitions and corresponding log(ml) values');
|
||||
|
||||
if (fid ~= -1)
|
||||
fprintf(fid, '%s \n\n', ' ');
|
||||
fprintf(fid, '%s \n', 'List of sizes of 10 best visited partitions and corresponding log(ml) values'); fprintf(fid, '\n');
|
||||
end
|
||||
|
||||
partitionSummaryKaikki = partitionSummary;
|
||||
partitionSummary =[];
|
||||
for i=1:size(partitionSummaryKaikki,3)
|
||||
partitionSummary = [partitionSummary; partitionSummaryKaikki(:,:,i)];
|
||||
end
|
||||
% [I,J] = find(partitionSummaryKaikki(:,2,:)>-1e49);
|
||||
% partitionSummaryKaikki = partitionSummaryKaikki(I,:,:);
|
||||
|
||||
partitionSummary = sortrows(partitionSummary,2);
|
||||
partitionSummary = partitionSummary(size(partitionSummary,1):-1:1 , :);
|
||||
partitionSummary = partitionSummary(logical(partitionSummary(:,2)>-1e49),:);
|
||||
if size(partitionSummary,1)>10
|
||||
vikaPartitio = 10;
|
||||
else
|
||||
vikaPartitio = size(partitionSummary,1);
|
||||
end
|
||||
for part = 1:vikaPartitio
|
||||
line = [num2str(partitionSummary(part,1),'%20d') ' ' num2str(partitionSummary(part,2),'%20.6f')];
|
||||
disp(line);
|
||||
if (fid ~= -1)
|
||||
fprintf(fid, '%s \n', line);
|
||||
end
|
||||
end
|
||||
|
||||
if (fid ~= -1)
|
||||
fclose(fid);
|
||||
else
|
||||
diary off
|
||||
end
|
||||
|
||||
|
||||
%%%%%%%%%%
|
||||
|
||||
|
||||
%--------------------------------------------------------------
|
||||
function newline = takeLine(description,width)
|
||||
%Returns one line from the description: line ends to the first
|
||||
%space after width:th mark.
|
||||
% newLine = description(1:width);
|
||||
n = width+1;
|
||||
while ~isspace(description(n)) && n<length(description)
|
||||
n = n+1;
|
||||
end;
|
||||
newline = description(1:n);
|
||||
|
||||
302
matlab/spatial/handlePopFastaCase.m
Normal file
302
matlab/spatial/handlePopFastaCase.m
Normal file
|
|
@ -0,0 +1,302 @@
|
|||
function handlePopFastaCase(cc,pgPart,pgDist)
|
||||
% specicially written to handle FASTA file format of individual clustering
|
||||
% Lu Cheng, 15.12.2012
|
||||
|
||||
OUTPUT_FILE = 'baps6_output.baps';
|
||||
|
||||
teksti = 'Input upper bound to the number of populations (only one value): ';
|
||||
npopstextExtra = inputdlg(teksti ,'Input maximum number of populations',1,{'20'});
|
||||
if isempty(npopstextExtra) % Painettu Cancel:ia
|
||||
return
|
||||
else
|
||||
nMaxPops = str2num(npopstextExtra{1});
|
||||
nMaxPops = nMaxPops(1);
|
||||
end
|
||||
|
||||
nPregroup = length(unique(pgPart));
|
||||
|
||||
roundTypes = [2*ones(1,nMaxPops) ...
|
||||
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ...
|
||||
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ...
|
||||
3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 ...
|
||||
3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 1 1 1 1 ...
|
||||
1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 ...
|
||||
1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 ...
|
||||
1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4];
|
||||
|
||||
[partition, logml, partitionSummary, logmldiff] = model_search_pregroup(cc, pgPart, pgDist, roundTypes, nMaxPops);
|
||||
|
||||
cc.PARTITION = partition; %note that the partition only contain nPregroup elements
|
||||
cc.npops = length(unique(partition));
|
||||
cc.logml = logml;
|
||||
cc.partitionSummary = partitionSummary;
|
||||
cc.logmldiff = logmldiff;
|
||||
|
||||
if cc.npops==nMaxPops
|
||||
choice = questdlg(sprintf('%d populations discovered, which is the same as input. We suggest you to set a larger number. Do you want to quit?', cc.npops),...
|
||||
'Yes''No','Yes');
|
||||
if strcmp(choice,'Yes')
|
||||
return
|
||||
end
|
||||
end
|
||||
|
||||
writeMixtureInfo(cc);
|
||||
|
||||
popnames = cc.popnames;
|
||||
pointers = cc.pointers;
|
||||
vorPoints = cc.vorPoints;
|
||||
vorCells = cc.vorCells;
|
||||
coordinates = cc.coordinates;
|
||||
|
||||
if isequal(popnames, [])
|
||||
names = pointers;
|
||||
else
|
||||
names = cell(size(pointers));
|
||||
indices = zeros(size(popnames(:,2)));
|
||||
for i=1:length(popnames(:,2));
|
||||
indices(i) = popnames{i,2};
|
||||
end
|
||||
for i = 1:length(pointers)
|
||||
inds = pointers{i};
|
||||
namesInCell = [];
|
||||
for j = 1:length(inds)
|
||||
ind = inds(j);
|
||||
I = find(indices > ind);
|
||||
if isempty(I)
|
||||
nameIndex = length(indices);
|
||||
else
|
||||
nameIndex = min(I) -1;
|
||||
end
|
||||
name = popnames{nameIndex};
|
||||
namesInCell = [namesInCell name];
|
||||
end
|
||||
names{i} = namesInCell;
|
||||
end
|
||||
end
|
||||
viewMixPartition(partition, popnames);
|
||||
vorPlot(vorPoints, vorCells, partition, pointers, coordinates, names);
|
||||
|
||||
talle = questdlg(['Do you want to save the mixture populations ' ...
|
||||
'so that you can use them later in admixture analysis or plot ' ...
|
||||
'additional images?'], ...
|
||||
'Save results?','Yes','No','Yes');
|
||||
if isequal(talle,'Yes')
|
||||
%%waitALittle; % Hetki odotusta, jotta muistaa kysy?..
|
||||
[filename, pathname] = uiputfile('*.mat','Save results as');
|
||||
|
||||
if (filename == 0) & (pathname == 0)
|
||||
% Cancel was pressed
|
||||
return
|
||||
else % copy 'baps4_output.baps' into the text file with the same name.
|
||||
if exist(OUTPUT_FILE,'file')
|
||||
copyfile(OUTPUT_FILE,[pathname filename '.txt'])
|
||||
delete(OUTPUT_FILE)
|
||||
end
|
||||
end
|
||||
|
||||
% added by Lu Cheng, 05.12.2012
|
||||
tmpFile = [pathname filename '.mapfile.txt'];
|
||||
fid = fopen(tmpFile,'w+');
|
||||
fprintf(fid,'GroupLabel\tLatitude\tLongitude\tDescription\tLabel\n');
|
||||
for i=1:nPregroup
|
||||
fprintf(fid,'%d\t%.10f\t%.10f\t%d_%d\t%d\n',i,coordinates(i,1),coordinates(i,2),...
|
||||
i,partition(i),partition(i));
|
||||
end
|
||||
fclose(fid);
|
||||
|
||||
% save([pathname filename], 'c');
|
||||
format_type = 'FASTA';
|
||||
save([pathname filename], 'cc','partition','pgDist','pgPart','format_type','-v7.3');
|
||||
else
|
||||
if exist(OUTPUT_FILE,'file')
|
||||
delete(OUTPUT_FILE)
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
|
||||
%%%%%%%%%%%%%
|
||||
function writeMixtureInfo(c)
|
||||
|
||||
outputFile = 'baps6_output.baps';
|
||||
|
||||
% output the semi-supervised clustering results to the outputFile
|
||||
% modified by Lu Cheng, 28.03.2010
|
||||
|
||||
ninds = length(c.PARTITION);
|
||||
npops = c.npops;
|
||||
popnames = c.popnames;
|
||||
logml = c.logml;
|
||||
partition = c.PARTITION;
|
||||
partitionSummary = c.partitionSummary;
|
||||
|
||||
if isempty(popnames)
|
||||
popnames = cell(c.nPregroup,1);
|
||||
for i=1:c.nPregroup
|
||||
popnames{i} = num2str(i);
|
||||
end
|
||||
end
|
||||
|
||||
if ~isempty(outputFile)
|
||||
fid = fopen(outputFile,'w+');
|
||||
else
|
||||
fid = -1;
|
||||
%diary('baps5_semi_output.baps'); % save in text anyway.
|
||||
end
|
||||
|
||||
dispLine;
|
||||
disp('RESULTS OF INDIVIDUAL LEVEL MIXTURE ANALYSIS:');
|
||||
disp(['Number of clustered individuals: ' ownNum2Str(ninds)]);
|
||||
disp(['Number of groups in optimal partition: ' ownNum2Str(npops)]);
|
||||
disp(['Log(marginal likelihood) of optimal partition: ' ownNum2Str(logml)]);
|
||||
disp(' ');
|
||||
if (fid ~= -1)
|
||||
fprintf(fid,'%10s\n', ['RESULTS OF INDIVIDUAL LEVEL MIXTURE ANALYSIS:']);
|
||||
fprintf(fid,'%20s\n', ['Number of clustered individuals: ' ownNum2Str(ninds)]);
|
||||
fprintf(fid,'%20s\n', ['Number of groups in optimal partition: ' ownNum2Str(npops)]);
|
||||
fprintf(fid,'%20s\n\n', ['Log(marginal likelihood) of optimal partition: ' ownNum2Str(logml)]);
|
||||
end
|
||||
|
||||
disp('Best Partition: ');
|
||||
if (fid ~= -1)
|
||||
fprintf(fid,'%s \n','Best Partition: ');
|
||||
end
|
||||
for m=1:npops
|
||||
indsInM = unique(c.groupPartition(partition==m));
|
||||
|
||||
if isempty(indsInM)
|
||||
continue;
|
||||
end
|
||||
|
||||
length_of_beginning = 11 + floor(log10(m));
|
||||
cluster_size = length(indsInM);
|
||||
|
||||
text = ['Cluster ' num2str(m) ': {' char(popnames{indsInM(1)})];
|
||||
for k = 2:cluster_size
|
||||
text = [text ', ' char(popnames{indsInM(k)})];
|
||||
end;
|
||||
text = [text '}'];
|
||||
|
||||
while length(text)>58
|
||||
%Take one line and display it.
|
||||
new_line = takeLine(text,58);
|
||||
text = text(length(new_line)+1:end);
|
||||
disp(new_line);
|
||||
if (fid ~= -1)
|
||||
fprintf(fid,'%s \n',new_line);
|
||||
end
|
||||
if length(text)>0
|
||||
text = [blanks(length_of_beginning) text];
|
||||
else
|
||||
text = [];
|
||||
end;
|
||||
end;
|
||||
|
||||
if ~isempty(text)
|
||||
disp(text);
|
||||
if (fid ~= -1)
|
||||
fprintf(fid,'%s \n',text);
|
||||
end
|
||||
end;
|
||||
end
|
||||
|
||||
names = true;
|
||||
|
||||
logmldiff = c.logmldiff;
|
||||
if npops == 1
|
||||
logmldiff = [];
|
||||
else
|
||||
disp(' ');
|
||||
disp(' ');
|
||||
disp('Changes in log(marginal likelihood) if pregroup i is moved to cluster j:');
|
||||
|
||||
if (fid ~= -1)
|
||||
fprintf(fid, '%s \n', ' '); fprintf(fid, '\n');
|
||||
fprintf(fid, '%s \n', 'Changes in log(marginal likelihood) if indvidual i is moved to cluster j:'); fprintf(fid, '\n');
|
||||
end
|
||||
|
||||
text = sprintf('%10s','ind');
|
||||
for ii = 1:npops
|
||||
tmpstr = sprintf('\t%10s',num2str(ii));
|
||||
text = [text tmpstr];
|
||||
end
|
||||
|
||||
disp(text);
|
||||
if (fid ~= -1)
|
||||
fprintf(fid, '%s \n', text);
|
||||
end
|
||||
|
||||
for ii = 1:c.nPregroup
|
||||
text = sprintf('%10s',popnames{ii});
|
||||
for jj = 1:npops
|
||||
tmpstr = sprintf('\t%10s',num2str(logmldiff(ii,jj),'%10.6f'));
|
||||
text = [text tmpstr];
|
||||
end
|
||||
|
||||
if ii<100
|
||||
disp(text);
|
||||
elseif ii==101
|
||||
disp('.......................................');
|
||||
disp('..........see output file..............');
|
||||
end
|
||||
if (fid ~= -1)
|
||||
fprintf(fid, '%s \n', text);
|
||||
end
|
||||
text = [];
|
||||
end
|
||||
end
|
||||
|
||||
disp(' ');
|
||||
disp(' ');
|
||||
disp('List of sizes of 10 best visited partitions and corresponding log(ml) values');
|
||||
|
||||
if (fid ~= -1)
|
||||
fprintf(fid, '%s \n\n', ' ');
|
||||
fprintf(fid, '%s \n', 'List of sizes of 10 best visited partitions and corresponding log(ml) values'); fprintf(fid, '\n');
|
||||
end
|
||||
|
||||
partitionSummaryKaikki = partitionSummary;
|
||||
partitionSummary =[];
|
||||
for i=1:size(partitionSummaryKaikki,3)
|
||||
partitionSummary = [partitionSummary; partitionSummaryKaikki(:,:,i)];
|
||||
end
|
||||
% [I,J] = find(partitionSummaryKaikki(:,2,:)>-1e49);
|
||||
% partitionSummaryKaikki = partitionSummaryKaikki(I,:,:);
|
||||
|
||||
partitionSummary = sortrows(partitionSummary,2);
|
||||
partitionSummary = partitionSummary(size(partitionSummary,1):-1:1 , :);
|
||||
partitionSummary = partitionSummary(logical(partitionSummary(:,2)>-1e49),:);
|
||||
if size(partitionSummary,1)>10
|
||||
vikaPartitio = 10;
|
||||
else
|
||||
vikaPartitio = size(partitionSummary,1);
|
||||
end
|
||||
for part = 1:vikaPartitio
|
||||
line = [num2str(partitionSummary(part,1),'%20d') ' ' num2str(partitionSummary(part,2),'%20.6f')];
|
||||
disp(line);
|
||||
if (fid ~= -1)
|
||||
fprintf(fid, '%s \n', line);
|
||||
end
|
||||
end
|
||||
|
||||
if (fid ~= -1)
|
||||
fclose(fid);
|
||||
else
|
||||
diary off
|
||||
end
|
||||
|
||||
|
||||
%%%%%%%%%%
|
||||
|
||||
|
||||
%--------------------------------------------------------------
|
||||
function newline = takeLine(description,width)
|
||||
%Returns one line from the description: line ends to the first
|
||||
%space after width:th mark.
|
||||
% newLine = description(1:width);
|
||||
n = width+1;
|
||||
while ~isspace(description(n)) && n<length(description)
|
||||
n = n+1;
|
||||
end;
|
||||
newline = description(1:n);
|
||||
|
||||
718
matlab/spatial/initSpatialMixture.m
Normal file
718
matlab/spatial/initSpatialMixture.m
Normal file
|
|
@ -0,0 +1,718 @@
|
|||
function [partition, counts, sumcounts] = initSpatialMixture(initData, ...
|
||||
npops, Z, rowsFromInd, noalle, dist, adjprior, priorTerm);
|
||||
% Etsii spatial mixturelle alkutilan baps 3.1:n ahneella algoritmilla.
|
||||
|
||||
global PARTITION_IN; global COUNTS_IN;
|
||||
global SUMCOUNTS_IN; global POP_LOGML_IN;
|
||||
|
||||
data = initData(:,1:end-1);
|
||||
initialPartition = admixture_initialization(initData, npops, Z);
|
||||
|
||||
[sumcounts, counts, logml] = ...
|
||||
initialCounts(initialPartition, data, npops, rowsFromInd, noalle);
|
||||
|
||||
PARTITION_IN = initialPartition(1:rowsFromInd:end);
|
||||
COUNTS_IN = counts; SUMCOUNTS_IN = sumcounts;
|
||||
|
||||
partition = PARTITION_IN;
|
||||
return
|
||||
|
||||
POP_LOGML_IN = computePopulationLogml(1:npops, adjprior, priorTerm);
|
||||
|
||||
|
||||
clear initialPartition; clear counts; clear sumcounts;
|
||||
|
||||
% PARHAAN MIXTURE-PARTITION_IN ETSIMINEN
|
||||
roundTypes = [1 1]; %Ykkösvaiheen sykli kahteen kertaan.
|
||||
ready = 0; vaihe = 1;
|
||||
ninds = size(data,1)/rowsFromInd;
|
||||
|
||||
|
||||
|
||||
while ready ~= 1
|
||||
muutoksia = 0;
|
||||
|
||||
for n = 1:length(roundTypes)
|
||||
|
||||
round = roundTypes(n);
|
||||
kivaluku=0;
|
||||
|
||||
if round==0 | round==1 %Yksilön siirtäminen toiseen populaatioon.
|
||||
inds = 1:ninds;
|
||||
aputaulu = [inds' rand(ninds,1)];
|
||||
aputaulu = sortrows(aputaulu,2);
|
||||
inds = aputaulu(:,1)';
|
||||
|
||||
muutosNyt = 0;
|
||||
for ind = inds
|
||||
i1 = PARTITION_IN(ind);
|
||||
[muutokset, diffInCounts] = laskeMuutokset(ind, rowsFromInd, ...
|
||||
data, adjprior, priorTerm);
|
||||
|
||||
if round==1, [maxMuutos, i2] = max(muutokset); end
|
||||
|
||||
if (i1~=i2 & maxMuutos>1e-5)
|
||||
% Tapahtui muutos
|
||||
muutoksia = 1;
|
||||
kivaluku = kivaluku+1;
|
||||
updateGlobalVariables(ind, i2, rowsFromInd, diffInCounts,...
|
||||
adjprior, priorTerm);
|
||||
logml = logml+maxMuutos;
|
||||
|
||||
end
|
||||
end
|
||||
|
||||
elseif round==2 %Populaation yhdistäminen toiseen.
|
||||
maxMuutos = 0;
|
||||
for pop = 1:npops
|
||||
[muutokset, diffInCounts] = laskeMuutokset2(pop, rowsFromInd, ...
|
||||
data, adjprior, priorTerm);
|
||||
[isoin, indeksi] = max(muutokset);
|
||||
if isoin>maxMuutos
|
||||
maxMuutos = isoin;
|
||||
i1 = pop;
|
||||
i2 = indeksi;
|
||||
diffInCountsBest = diffInCounts;
|
||||
end
|
||||
end
|
||||
|
||||
if maxMuutos>1e-5
|
||||
muutoksia = 1;
|
||||
updateGlobalVariables2(i1,i2,rowsFromInd, diffInCountsBest, ...
|
||||
adjprior, priorTerm);
|
||||
logml = logml + maxMuutos;
|
||||
end
|
||||
|
||||
|
||||
elseif round==3 | round==4 %Populaation jakaminen osiin.
|
||||
maxMuutos = 0;
|
||||
ninds = size(data,1)/rowsFromInd;
|
||||
for pop = 1:npops
|
||||
inds2 = find(PARTITION_IN==pop);
|
||||
ninds2 = length(inds2);
|
||||
if ninds2>5
|
||||
dist2 = laskeOsaDist(inds2, dist, ninds);
|
||||
Z2 = linkage(dist2');
|
||||
if round==3
|
||||
npops2 = min(20, floor(ninds2 / 5)); %Moneenko osaan jaetaan
|
||||
elseif round==4
|
||||
npops2 = 2;
|
||||
end
|
||||
T2 = cluster_own(Z2, npops2);
|
||||
muutokset = laskeMuutokset3(T2, inds2, rowsFromInd, data, ...
|
||||
adjprior, priorTerm, pop);
|
||||
[isoin, indeksi] = max(muutokset(1:end));
|
||||
if isoin>maxMuutos
|
||||
maxMuutos = isoin;
|
||||
muuttuvaPop2 = rem(indeksi,npops2);
|
||||
if muuttuvaPop2==0, muuttuvaPop2 = npops2; end
|
||||
muuttuvat = inds2(find(T2==muuttuvaPop2));
|
||||
i2 = ceil(indeksi/npops2);
|
||||
end
|
||||
end
|
||||
end
|
||||
if maxMuutos>1e-5
|
||||
muutoksia = 1;
|
||||
rows = computeRows(rowsFromInd, muuttuvat, length(muuttuvat));
|
||||
diffInCounts = computeDiffInCounts(rows, size(COUNTS_IN,1), ...
|
||||
size(COUNTS_IN,2), data);
|
||||
i1 = PARTITION_IN(muuttuvat(1));
|
||||
updateGlobalVariables3(muuttuvat, rowsFromInd, diffInCounts, ...
|
||||
adjprior, priorTerm, i2);
|
||||
logml = logml + maxMuutos;
|
||||
|
||||
end
|
||||
|
||||
elseif round == 5 | round == 6
|
||||
pop=0;
|
||||
muutettu = 0;
|
||||
poplogml = POP_LOGML_IN;
|
||||
partition = PARTITION_IN;
|
||||
counts = COUNTS_IN;
|
||||
sumcounts = SUMCOUNTS_IN;
|
||||
|
||||
while (pop < npops & muutettu == 0)
|
||||
pop = pop+1;
|
||||
totalMuutos = 0;
|
||||
inds = find(PARTITION_IN==pop);
|
||||
if round == 5
|
||||
aputaulu = [inds rand(length(inds),1)];
|
||||
aputaulu = sortrows(aputaulu,2);
|
||||
inds = aputaulu(:,1)';
|
||||
elseif round == 6
|
||||
inds = returnInOrder(inds, pop, rowsFromInd, data, adjprior, priorTerm);
|
||||
end
|
||||
|
||||
i=0;
|
||||
|
||||
while (length(inds)>0 & i<length(inds))
|
||||
i = i+1;
|
||||
ind = inds(i);
|
||||
[muutokset, diffInCounts] = laskeMuutokset(ind, rowsFromInd, ...
|
||||
data, adjprior, priorTerm);
|
||||
muutokset(pop) = -1e50; % Varmasti ei suurin!!!
|
||||
[maxMuutos, i2] = max(muutokset);
|
||||
updateGlobalVariables(ind, i2, rowsFromInd, diffInCounts,...
|
||||
adjprior, priorTerm);
|
||||
totalMuutos = totalMuutos+maxMuutos;
|
||||
logml = logml+maxMuutos;
|
||||
if round == 6
|
||||
% Lopetetaan heti kun muutos on positiivinen.
|
||||
if totalMuutos > 1e-5
|
||||
i=length(inds);
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
if totalMuutos>1e-5
|
||||
muutettu=1;
|
||||
muutoksia = 1; % Ulompi kirjanpito.
|
||||
else
|
||||
% Missään vaiheessa tila ei parantunut.
|
||||
% Perutaan kaikki muutokset.
|
||||
PARTITION_IN = partition;
|
||||
SUMCOUNTS_IN = sumcounts;
|
||||
POP_LOGML_IN = poplogml;
|
||||
COUNTS_IN = counts;
|
||||
logml = logml - totalMuutos;
|
||||
end
|
||||
end
|
||||
clear partition; clear sumcounts; clear counts; clear poplogml;
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
if muutoksia == 0
|
||||
if vaihe==1
|
||||
vaihe = 2;
|
||||
elseif vaihe==2
|
||||
vaihe = 3;
|
||||
elseif vaihe==3
|
||||
vaihe = 4;
|
||||
elseif vaihe==4;
|
||||
vaihe = 5;
|
||||
elseif vaihe==5
|
||||
ready = 1;
|
||||
end
|
||||
else
|
||||
muutoksia = 0;
|
||||
end
|
||||
|
||||
if ready==0
|
||||
if vaihe==1
|
||||
roundTypes=[1];
|
||||
elseif vaihe==2
|
||||
roundTypes = [2];
|
||||
elseif vaihe==3
|
||||
roundTypes=[5];
|
||||
elseif vaihe==4
|
||||
roundTypes=[4 3 1];
|
||||
elseif vaihe
|
||||
roundTypes=[6 2 3 4 1];
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
partition = PARTITION_IN;
|
||||
counts = COUNTS_IN;
|
||||
sumcounts = SUMCOUNTS_IN;
|
||||
|
||||
|
||||
|
||||
%-------------------------------------------------------------------------------------
|
||||
|
||||
function [sumcounts, counts, logml] = ...
|
||||
initialCounts(partition, data, npops, rowsFromInd, noalle)
|
||||
|
||||
nloci=size(data,2);
|
||||
ninds = size(data,1)/rowsFromInd;
|
||||
|
||||
counts = zeros(max(noalle),nloci,npops);
|
||||
sumcounts = zeros(npops,nloci);
|
||||
for i=1:npops
|
||||
for j=1:nloci
|
||||
havainnotLokuksessa = find(partition==i & data(:,j)>=0);
|
||||
sumcounts(i,j) = length(havainnotLokuksessa);
|
||||
for k=1:noalle(j)
|
||||
alleleCode = k;
|
||||
N_ijk = length(find(data(havainnotLokuksessa,j)==alleleCode));
|
||||
counts(k,j,i) = N_ijk;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
initializeGammaln(ninds, rowsFromInd, max(noalle));
|
||||
|
||||
logml = computeLogml(counts, sumcounts, noalle, data, rowsFromInd);
|
||||
|
||||
|
||||
%-----------------------------------------------------------------------
|
||||
|
||||
|
||||
function logml=computeLogml(counts, sumcounts, noalle, data, rowsFromInd)
|
||||
nloci = size(counts,2);
|
||||
npops = size(counts,3);
|
||||
adjnoalle = zeros(max(noalle),nloci);
|
||||
for j=1:nloci
|
||||
adjnoalle(1:noalle(j),j)=noalle(j);
|
||||
if (noalle(j)<max(noalle))
|
||||
adjnoalle(noalle(j)+1:end,j)=1;
|
||||
end
|
||||
end
|
||||
|
||||
%logml2 = sum(sum(sum(gammaln(counts+repmat(adjprior,[1 1 npops]))))) ...
|
||||
% - npops*sum(sum(gammaln(adjprior))) - ...
|
||||
% sum(sum(gammaln(1+sumcounts)));
|
||||
%logml = logml2;
|
||||
|
||||
global GAMMA_LN;
|
||||
rowsInG = size(data,1)+rowsFromInd;
|
||||
|
||||
logml = sum(sum(sum(GAMMA_LN(counts+1 + repmat(rowsInG*(adjnoalle-1),[1 1 npops]))))) ...
|
||||
- npops*sum(sum(GAMMA_LN(1, adjnoalle))) ...
|
||||
-sum(sum(GAMMA_LN(sumcounts+1,1)));
|
||||
|
||||
|
||||
%--------------------------------------------------------------------------
|
||||
|
||||
|
||||
function initializeGammaln(ninds, rowsFromInd, maxAlleles)
|
||||
%Alustaa GAMMALN muuttujan s.e. GAMMALN(i,j)=gammaln((i-1) + 1/j)
|
||||
global GAMMA_LN;
|
||||
GAMMA_LN = zeros((1+ninds)*rowsFromInd, maxAlleles);
|
||||
for i=1:(ninds+1)*rowsFromInd
|
||||
for j=1:maxAlleles
|
||||
GAMMA_LN(i,j)=gammaln((i-1) + 1/j);
|
||||
end
|
||||
end
|
||||
|
||||
%--------------------------------------------------------------------------
|
||||
%Seuraavat kolme funktiota liittyvat alkupartition muodostamiseen.
|
||||
|
||||
function initial_partition=admixture_initialization(data_matrix,nclusters,Z)
|
||||
|
||||
size_data=size(data_matrix);
|
||||
nloci=size_data(2)-1;
|
||||
n=max(data_matrix(:,end));
|
||||
T=cluster_own(Z,nclusters);
|
||||
initial_partition=zeros(size_data(1),1);
|
||||
for i=1:n
|
||||
kori=T(i);
|
||||
here=find(data_matrix(:,end)==i);
|
||||
for j=1:length(here)
|
||||
initial_partition(here(j),1)=kori;
|
||||
end
|
||||
end
|
||||
|
||||
function T = cluster_own(Z,nclust)
|
||||
true=logical(1);
|
||||
false=logical(0);
|
||||
maxclust = nclust;
|
||||
% Start of algorithm
|
||||
m = size(Z,1)+1;
|
||||
T = zeros(m,1);
|
||||
% maximum number of clusters based on inconsistency
|
||||
if m <= maxclust
|
||||
T = (1:m)';
|
||||
elseif maxclust==1
|
||||
T = ones(m,1);
|
||||
else
|
||||
clsnum = 1;
|
||||
for k = (m-maxclust+1):(m-1)
|
||||
i = Z(k,1); % left tree
|
||||
if i <= m % original node, no leafs
|
||||
T(i) = clsnum;
|
||||
clsnum = clsnum + 1;
|
||||
elseif i < (2*m-maxclust+1) % created before cutoff, search down the tree
|
||||
T = clusternum(Z, T, i-m, clsnum);
|
||||
clsnum = clsnum + 1;
|
||||
end
|
||||
i = Z(k,2); % right tree
|
||||
if i <= m % original node, no leafs
|
||||
T(i) = clsnum;
|
||||
clsnum = clsnum + 1;
|
||||
elseif i < (2*m-maxclust+1) % created before cutoff, search down the tree
|
||||
T = clusternum(Z, T, i-m, clsnum);
|
||||
clsnum = clsnum + 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
function T = clusternum(X, T, k, c)
|
||||
m = size(X,1)+1;
|
||||
while(~isempty(k))
|
||||
% Get the children of nodes at this level
|
||||
children = X(k,1:2);
|
||||
children = children(:);
|
||||
|
||||
% Assign this node number to leaf children
|
||||
t = (children<=m);
|
||||
T(children(t)) = c;
|
||||
|
||||
% Move to next level
|
||||
k = children(~t) - m;
|
||||
end
|
||||
|
||||
%--------------------------------------------------------------------------
|
||||
|
||||
function rows = computeRows(rowsFromInd, inds, ninds)
|
||||
% On annettu yksilöt inds. Funktio palauttaa vektorin, joka
|
||||
% sisältää niiden rivien numerot, jotka sisältävät yksilöiden
|
||||
% dataa.
|
||||
|
||||
rows = inds(:, ones(1,rowsFromInd));
|
||||
rows = rows*rowsFromInd;
|
||||
miinus = repmat(rowsFromInd-1 : -1 : 0, [ninds 1]);
|
||||
rows = rows - miinus;
|
||||
rows = reshape(rows', [1,rowsFromInd*ninds]);
|
||||
|
||||
%-------------------------------------------------------------------------------------
|
||||
|
||||
|
||||
function updateGlobalVariables(ind, i2, rowsFromInd, diffInCounts, ...
|
||||
adjprior, priorTerm)
|
||||
% Suorittaa globaalien muuttujien muutokset, kun yksilö ind
|
||||
% on siirretään koriin i2.
|
||||
|
||||
global PARTITION_IN;
|
||||
global COUNTS_IN;
|
||||
global SUMCOUNTS_IN;
|
||||
global POP_LOGML_IN;
|
||||
|
||||
i1 = PARTITION_IN(ind);
|
||||
PARTITION_IN(ind)=i2;
|
||||
|
||||
COUNTS_IN(:,:,i1) = COUNTS_IN(:,:,i1) - diffInCounts;
|
||||
COUNTS_IN(:,:,i2) = COUNTS_IN(:,:,i2) + diffInCounts;
|
||||
SUMCOUNTS_IN(i1,:) = SUMCOUNTS_IN(i1,:) - sum(diffInCounts);
|
||||
SUMCOUNTS_IN(i2,:) = SUMCOUNTS_IN(i2,:) + sum(diffInCounts);
|
||||
|
||||
POP_LOGML_IN([i1 i2]) = computePopulationLogml([i1 i2], adjprior, priorTerm);
|
||||
|
||||
|
||||
%---------------------------------------------------------------------------------
|
||||
|
||||
|
||||
function updateGlobalVariables2( ...
|
||||
i1, i2, rowsFromInd, diffInCounts, adjprior, priorTerm);
|
||||
% Suorittaa globaalien muuttujien muutokset, kun kaikki
|
||||
% korissa i1 olevat yksilöt siirretään koriin i2.
|
||||
|
||||
global PARTITION_IN;
|
||||
global COUNTS_IN;
|
||||
global SUMCOUNTS_IN;
|
||||
global POP_LOGML_IN;
|
||||
|
||||
inds = find(PARTITION_IN==i1);
|
||||
PARTITION_IN(inds) = i2;
|
||||
|
||||
COUNTS_IN(:,:,i1) = COUNTS_IN(:,:,i1) - diffInCounts;
|
||||
COUNTS_IN(:,:,i2) = COUNTS_IN(:,:,i2) + diffInCounts;
|
||||
SUMCOUNTS_IN(i1,:) = SUMCOUNTS_IN(i1,:) - sum(diffInCounts);
|
||||
SUMCOUNTS_IN(i2,:) = SUMCOUNTS_IN(i2,:) + sum(diffInCounts);
|
||||
|
||||
POP_LOGML_IN(i1) = 0;
|
||||
POP_LOGML_IN(i2) = computePopulationLogml(i2, adjprior, priorTerm);
|
||||
|
||||
|
||||
%------------------------------------------------------------------------------------
|
||||
|
||||
|
||||
function updateGlobalVariables3(muuttuvat, rowsFromInd, diffInCounts, ...
|
||||
adjprior, priorTerm, i2);
|
||||
% Suorittaa globaalien muuttujien päivitykset, kun yksilöt 'muuttuvat'
|
||||
% siirretään koriin i2. Ennen siirtoa yksilöiden on kuuluttava samaan
|
||||
% koriin.
|
||||
|
||||
global PARTITION_IN;
|
||||
global COUNTS_IN;
|
||||
global SUMCOUNTS_IN;
|
||||
global POP_LOGML_IN;
|
||||
|
||||
i1 = PARTITION_IN(muuttuvat(1));
|
||||
PARTITION_IN(muuttuvat) = i2;
|
||||
|
||||
COUNTS_IN(:,:,i1) = COUNTS_IN(:,:,i1) - diffInCounts;
|
||||
COUNTS_IN(:,:,i2) = COUNTS_IN(:,:,i2) + diffInCounts;
|
||||
SUMCOUNTS_IN(i1,:) = SUMCOUNTS_IN(i1,:) - sum(diffInCounts);
|
||||
SUMCOUNTS_IN(i2,:) = SUMCOUNTS_IN(i2,:) + sum(diffInCounts);
|
||||
|
||||
POP_LOGML_IN([i1 i2]) = computePopulationLogml([i1 i2], adjprior, priorTerm);
|
||||
|
||||
|
||||
%----------------------------------------------------------------------
|
||||
|
||||
|
||||
function inds = returnInOrder(inds, pop, rowsFromInd, data, adjprior, priorTerm)
|
||||
% Palauttaa yksilöt järjestyksessä siten, että ensimmäisenä on
|
||||
% se, jonka poistaminen populaatiosta pop nostaisi logml:n
|
||||
% arvoa eniten.
|
||||
|
||||
global COUNTS_IN; global SUMCOUNTS_IN;
|
||||
ninds = length(inds);
|
||||
apuTaulu = [inds, zeros(ninds,1)];
|
||||
|
||||
for i=1:ninds
|
||||
ind = inds(i);
|
||||
rows = (ind-1)*rowsFromInd+1 : ind*rowsFromInd;
|
||||
diffInCounts = computeDiffInCounts(rows, size(COUNTS_IN,1), size(COUNTS_IN,2), data);
|
||||
diffInSumCounts = sum(diffInCounts);
|
||||
|
||||
COUNTS_IN(:,:,pop) = COUNTS_IN(:,:,pop)-diffInCounts;
|
||||
SUMCOUNTS_IN(pop,:) = SUMCOUNTS_IN(pop,:)-diffInSumCounts;
|
||||
apuTaulu(i, 2) = computePopulationLogml(pop, adjprior, priorTerm);
|
||||
COUNTS_IN(:,:,pop) = COUNTS_IN(:,:,pop)+diffInCounts;
|
||||
SUMCOUNTS_IN(pop,:) = SUMCOUNTS_IN(pop,:)+diffInSumCounts;
|
||||
end
|
||||
apuTaulu = sortrows(apuTaulu,2);
|
||||
inds = apuTaulu(ninds:-1:1,1);
|
||||
|
||||
%------------------------------------------------------------------------------------
|
||||
|
||||
|
||||
function [muutokset, diffInCounts] = ...
|
||||
laskeMuutokset(ind, rowsFromInd, data, adjprior, priorTerm)
|
||||
% Palauttaa npops*1 taulun, jossa i:s alkio kertoo, mikä olisi
|
||||
% muutos logml:ssä, mikäli yksilö ind siirretään koriin i.
|
||||
% diffInCounts on poistettava COUNTS_IN:in siivusta i1 ja lisättävä
|
||||
% COUNTS_IN:in siivuun i2, mikäli muutos toteutetaan.
|
||||
|
||||
global COUNTS_IN; global SUMCOUNTS_IN;
|
||||
global PARTITION_IN; global POP_LOGML_IN;
|
||||
npops = size(COUNTS_IN,3);
|
||||
muutokset = zeros(npops,1);
|
||||
|
||||
i1 = PARTITION_IN(ind);
|
||||
i1_logml = POP_LOGML_IN(i1);
|
||||
|
||||
rows = (ind-1)*rowsFromInd+1 : ind*rowsFromInd;
|
||||
diffInCounts = computeDiffInCounts(rows, size(COUNTS_IN,1), size(COUNTS_IN,2), data);
|
||||
diffInSumCounts = sum(diffInCounts);
|
||||
|
||||
COUNTS_IN(:,:,i1) = COUNTS_IN(:,:,i1)-diffInCounts;
|
||||
SUMCOUNTS_IN(i1,:) = SUMCOUNTS_IN(i1,:)-diffInSumCounts;
|
||||
new_i1_logml = computePopulationLogml(i1, adjprior, priorTerm);
|
||||
COUNTS_IN(:,:,i1) = COUNTS_IN(:,:,i1)+diffInCounts;
|
||||
SUMCOUNTS_IN(i1,:) = SUMCOUNTS_IN(i1,:)+diffInSumCounts;
|
||||
|
||||
i2 = [1:i1-1 , i1+1:npops];
|
||||
i2_logml = POP_LOGML_IN(i2);
|
||||
|
||||
COUNTS_IN(:,:,i2) = COUNTS_IN(:,:,i2)+repmat(diffInCounts, [1 1 npops-1]);
|
||||
SUMCOUNTS_IN(i2,:) = SUMCOUNTS_IN(i2,:)+repmat(diffInSumCounts,[npops-1 1]);
|
||||
new_i2_logml = computePopulationLogml(i2, adjprior, priorTerm);
|
||||
COUNTS_IN(:,:,i2) = COUNTS_IN(:,:,i2)-repmat(diffInCounts, [1 1 npops-1]);
|
||||
SUMCOUNTS_IN(i2,:) = SUMCOUNTS_IN(i2,:)-repmat(diffInSumCounts,[npops-1 1]);
|
||||
|
||||
muutokset(i2) = new_i1_logml - i1_logml ...
|
||||
+ new_i2_logml - i2_logml;
|
||||
|
||||
|
||||
%------------------------------------------------------------------------------------
|
||||
|
||||
|
||||
function [muutokset, diffInCounts] = laskeMuutokset2( ...
|
||||
i1, rowsFromInd, data, adjprior, priorTerm);
|
||||
% Palauttaa npops*1 taulun, jossa i:s alkio kertoo, mikä olisi
|
||||
% muutos logml:ssä, mikäli korin i1 kaikki yksilöt siirretään
|
||||
% koriin i.
|
||||
|
||||
global COUNTS_IN; global SUMCOUNTS_IN;
|
||||
global PARTITION_IN; global POP_LOGML_IN;
|
||||
npops = size(COUNTS_IN,3);
|
||||
muutokset = zeros(npops,1);
|
||||
|
||||
i1_logml = POP_LOGML_IN(i1);
|
||||
|
||||
inds = find(PARTITION_IN==i1);
|
||||
ninds = length(inds);
|
||||
|
||||
if ninds==0
|
||||
diffInCounts = zeros(size(COUNTS_IN,1), size(COUNTS_IN,2));
|
||||
return;
|
||||
end
|
||||
|
||||
rows = computeRows(rowsFromInd, inds, ninds);
|
||||
|
||||
diffInCounts = computeDiffInCounts(rows, size(COUNTS_IN,1), size(COUNTS_IN,2), data);
|
||||
diffInSumCounts = sum(diffInCounts);
|
||||
|
||||
COUNTS_IN(:,:,i1) = COUNTS_IN(:,:,i1)-diffInCounts;
|
||||
SUMCOUNTS_IN(i1,:) = SUMCOUNTS_IN(i1,:)-diffInSumCounts;
|
||||
new_i1_logml = computePopulationLogml(i1, adjprior, priorTerm);
|
||||
COUNTS_IN(:,:,i1) = COUNTS_IN(:,:,i1)+diffInCounts;
|
||||
SUMCOUNTS_IN(i1,:) = SUMCOUNTS_IN(i1,:)+diffInSumCounts;
|
||||
|
||||
i2 = [1:i1-1 , i1+1:npops];
|
||||
i2_logml = POP_LOGML_IN(i2);
|
||||
|
||||
COUNTS_IN(:,:,i2) = COUNTS_IN(:,:,i2)+repmat(diffInCounts, [1 1 npops-1]);
|
||||
SUMCOUNTS_IN(i2,:) = SUMCOUNTS_IN(i2,:)+repmat(diffInSumCounts,[npops-1 1]);
|
||||
new_i2_logml = computePopulationLogml(i2, adjprior, priorTerm);
|
||||
COUNTS_IN(:,:,i2) = COUNTS_IN(:,:,i2)-repmat(diffInCounts, [1 1 npops-1]);
|
||||
SUMCOUNTS_IN(i2,:) = SUMCOUNTS_IN(i2,:)-repmat(diffInSumCounts,[npops-1 1]);
|
||||
|
||||
muutokset(i2) = new_i1_logml - i1_logml ...
|
||||
+ new_i2_logml - i2_logml;
|
||||
|
||||
|
||||
|
||||
%------------------------------------------------------------------------------------
|
||||
|
||||
|
||||
function muutokset = laskeMuutokset3(T2, inds2, rowsFromInd, ...
|
||||
data, adjprior, priorTerm, i1)
|
||||
% Palauttaa length(unique(T2))*npops taulun, jossa (i,j):s alkio
|
||||
% kertoo, mikä olisi muutos logml:ssä, jos populaation i1 osapopulaatio
|
||||
% inds2(find(T2==i)) siirretään koriin j.
|
||||
|
||||
global COUNTS_IN; global SUMCOUNTS_IN;
|
||||
global PARTITION_IN; global POP_LOGML_IN;
|
||||
npops = size(COUNTS_IN,3);
|
||||
npops2 = length(unique(T2));
|
||||
muutokset = zeros(npops2, npops);
|
||||
|
||||
i1_logml = POP_LOGML_IN(i1);
|
||||
|
||||
for pop2 = 1:npops2
|
||||
inds = inds2(find(T2==pop2));
|
||||
ninds = length(inds);
|
||||
if ninds>0
|
||||
rows = computeRows(rowsFromInd, inds, ninds);
|
||||
diffInCounts = computeDiffInCounts(rows, size(COUNTS_IN,1), size(COUNTS_IN,2), data);
|
||||
diffInSumCounts = sum(diffInCounts);
|
||||
|
||||
COUNTS_IN(:,:,i1) = COUNTS_IN(:,:,i1)-diffInCounts;
|
||||
SUMCOUNTS_IN(i1,:) = SUMCOUNTS_IN(i1,:)-diffInSumCounts;
|
||||
new_i1_logml = computePopulationLogml(i1, adjprior, priorTerm);
|
||||
COUNTS_IN(:,:,i1) = COUNTS_IN(:,:,i1)+diffInCounts;
|
||||
SUMCOUNTS_IN(i1,:) = SUMCOUNTS_IN(i1,:)+diffInSumCounts;
|
||||
|
||||
i2 = [1:i1-1 , i1+1:npops];
|
||||
i2_logml = POP_LOGML_IN(i2)';
|
||||
|
||||
COUNTS_IN(:,:,i2) = COUNTS_IN(:,:,i2)+repmat(diffInCounts, [1 1 npops-1]);
|
||||
SUMCOUNTS_IN(i2,:) = SUMCOUNTS_IN(i2,:)+repmat(diffInSumCounts,[npops-1 1]);
|
||||
new_i2_logml = computePopulationLogml(i2, adjprior, priorTerm)';
|
||||
COUNTS_IN(:,:,i2) = COUNTS_IN(:,:,i2)-repmat(diffInCounts, [1 1 npops-1]);
|
||||
SUMCOUNTS_IN(i2,:) = SUMCOUNTS_IN(i2,:)-repmat(diffInSumCounts,[npops-1 1]);
|
||||
|
||||
muutokset(pop2,i2) = new_i1_logml - i1_logml ...
|
||||
+ new_i2_logml - i2_logml;
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
%------------------------------------------------------------------------------------
|
||||
|
||||
function diffInCounts = computeDiffInCounts(rows, max_noalle, nloci, data)
|
||||
% Muodostaa max_noalle*nloci taulukon, jossa on niiden alleelien
|
||||
% lukumäärät (vastaavasti kuin COUNTS_IN:issa), jotka ovat data:n
|
||||
% riveillä rows.
|
||||
|
||||
diffInCounts = zeros(max_noalle, nloci);
|
||||
for i=rows
|
||||
row = data(i,:);
|
||||
notEmpty = find(row>=0);
|
||||
|
||||
if length(notEmpty)>0
|
||||
diffInCounts(row(notEmpty) + (notEmpty-1)*max_noalle) = ...
|
||||
diffInCounts(row(notEmpty) + (notEmpty-1)*max_noalle) + 1;
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
|
||||
%------------------------------------------------------------------------------------
|
||||
|
||||
|
||||
function popLogml = computePopulationLogml(pops, adjprior, priorTerm)
|
||||
% Palauttaa length(pops)*1 taulukon, jossa on laskettu korikohtaiset
|
||||
% logml:t koreille, jotka on määritelty pops-muuttujalla.
|
||||
|
||||
global COUNTS_IN;
|
||||
global SUMCOUNTS_IN;
|
||||
x = size(COUNTS_IN,1);
|
||||
y = size(COUNTS_IN,2);
|
||||
z = length(pops);
|
||||
|
||||
popLogml = ...
|
||||
squeeze(sum(sum(reshape(...
|
||||
gammaln(repmat(adjprior,[1 1 length(pops)]) + COUNTS_IN(:,:,pops)) ...
|
||||
,[x y z]),1),2)) - sum(gammaln(1+SUMCOUNTS_IN(pops,:)),2) - priorTerm;
|
||||
|
||||
|
||||
%----------------------------------------------------------------------------
|
||||
|
||||
|
||||
function dist2 = laskeOsaDist(inds2, dist, ninds)
|
||||
% Muodostaa dist vektorista osavektorin, joka sisältää yksilöiden inds2
|
||||
% väliset etäisyydet. ninds=kaikkien yksilöiden lukumäärä.
|
||||
|
||||
ninds2 = length(inds2);
|
||||
apu = zeros(nchoosek(ninds2,2),2);
|
||||
rivi = 1;
|
||||
for i=1:ninds2-1
|
||||
for j=i+1:ninds2
|
||||
apu(rivi, 1) = inds2(i);
|
||||
apu(rivi, 2) = inds2(j);
|
||||
rivi = rivi+1;
|
||||
end
|
||||
end
|
||||
apu = (apu(:,1)-1).*ninds - apu(:,1) ./ 2 .* (apu(:,1)-1) + (apu(:,2)-apu(:,1));
|
||||
dist2 = dist(apu);
|
||||
|
||||
|
||||
%----------------------------------------------------------------------------------------
|
||||
|
||||
|
||||
function Z = linkage(Y, method)
|
||||
[k, n] = size(Y);
|
||||
m = (1+sqrt(1+8*n))/2;
|
||||
if k ~= 1 | m ~= fix(m)
|
||||
error('The first input has to match the output of the PDIST function in size.');
|
||||
end
|
||||
if nargin == 1 % set default switch to be 'co'
|
||||
method = 'co';
|
||||
end
|
||||
method = lower(method(1:2)); % simplify the switch string.
|
||||
monotonic = 1;
|
||||
Z = zeros(m-1,3); % allocate the output matrix.
|
||||
N = zeros(1,2*m-1);
|
||||
N(1:m) = 1;
|
||||
n = m; % since m is changing, we need to save m in n.
|
||||
R = 1:n;
|
||||
for s = 1:(n-1)
|
||||
X = Y;
|
||||
[v, k] = min(X);
|
||||
i = floor(m+1/2-sqrt(m^2-m+1/4-2*(k-1)));
|
||||
j = k - (i-1)*(m-i/2)+i;
|
||||
Z(s,:) = [R(i) R(j) v]; % update one more row to the output matrix A
|
||||
I1 = 1:(i-1); I2 = (i+1):(j-1); I3 = (j+1):m; % these are temp variables.
|
||||
U = [I1 I2 I3];
|
||||
I = [I1.*(m-(I1+1)/2)-m+i i*(m-(i+1)/2)-m+I2 i*(m-(i+1)/2)-m+I3];
|
||||
J = [I1.*(m-(I1+1)/2)-m+j I2.*(m-(I2+1)/2)-m+j j*(m-(j+1)/2)-m+I3];
|
||||
|
||||
switch method
|
||||
case 'si' %single linkage
|
||||
Y(I) = min(Y(I),Y(J));
|
||||
case 'av' % average linkage
|
||||
Y(I) = Y(I) + Y(J);
|
||||
case 'co' %complete linkage
|
||||
Y(I) = max(Y(I),Y(J));
|
||||
case 'ce' % centroid linkage
|
||||
K = N(R(i))+N(R(j));
|
||||
Y(I) = (N(R(i)).*Y(I)+N(R(j)).*Y(J)-(N(R(i)).*N(R(j))*v^2)./K)./K;
|
||||
case 'wa'
|
||||
Y(I) = ((N(R(U))+N(R(i))).*Y(I) + (N(R(U))+N(R(j))).*Y(J) - ...
|
||||
N(R(U))*v)./(N(R(i))+N(R(j))+N(R(U)));
|
||||
end
|
||||
J = [J i*(m-(i+1)/2)-m+j];
|
||||
Y(J) = []; % no need for the cluster information about j.
|
||||
|
||||
% update m, N, R
|
||||
m = m-1;
|
||||
N(n+s) = N(R(i)) + N(R(j));
|
||||
R(i) = n+s;
|
||||
R(j:(n-1))=R((j+1):n);
|
||||
end
|
||||
18
matlab/spatial/initSpatialMultiMixture.m
Normal file
18
matlab/spatial/initSpatialMultiMixture.m
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
function [partition, counts, sumcounts] = initSpatialMultiMixture(initData, ...
|
||||
npops, Z, rows, noalle, dist, adjprior, priorTerm, fixedK);
|
||||
% Etsii spatial multimixturelle alkutilan baps 3.1:n ahneella algoritmilla.
|
||||
% toimii!
|
||||
|
||||
global PARTITION; global COUNTS;
|
||||
global SUMCOUNTS; global POP_LOGML;
|
||||
|
||||
c.data = initData; c.Z = Z; c.rows=rows; c.rowsFromInd=0; c.noalle=noalle;
|
||||
c.dist = dist; c.adjprior = adjprior; c.priorTerm = priorTerm;
|
||||
|
||||
indMix_fixK(c,npops,1,0);
|
||||
|
||||
partition = PARTITION; counts = COUNTS; sumcounts = SUMCOUNTS;
|
||||
|
||||
|
||||
|
||||
|
||||
82
matlab/spatial/private/calcLogmlChanges.m
Normal file
82
matlab/spatial/private/calcLogmlChanges.m
Normal file
|
|
@ -0,0 +1,82 @@
|
|||
function changes = calcLogmlChanges(inds, cqData, nCqCodes, spData, nSpCodes, locCliques, locSeparators, logml)
|
||||
% compute the logml change if the given inds are moved to another cluster
|
||||
% the input inds are supposed to come from the same cluster
|
||||
% changes is a npops*1 vector
|
||||
% Lu Cheng, 15.12.2012
|
||||
|
||||
global CQ_COUNTS; global SUM_CQ_COUNTS;
|
||||
global SP_COUNTS; global SUM_SP_COUNTS;
|
||||
global PARTITION;
|
||||
|
||||
global LOC_CQ_COUNTS;
|
||||
global LOC_SP_COUNTS;
|
||||
|
||||
npops = size(CQ_COUNTS,3);
|
||||
changes = zeros(npops,1);
|
||||
indsToBeMoved = inds;
|
||||
|
||||
if isempty(indsToBeMoved), return, end
|
||||
|
||||
i1 = PARTITION(indsToBeMoved(1));
|
||||
[diffCqCounts diffCqSumCounts]= computeDiffInCounts(indsToBeMoved, cqData, nCqCodes);
|
||||
[diffSpCounts diffSpSumCounts]= computeDiffInCounts(indsToBeMoved, spData, nSpCodes);
|
||||
|
||||
diffLocCqCounts = computeDiffInCliqCounts(locCliques, indsToBeMoved);
|
||||
diffLocSpCounts = computeDiffInCliqCounts(locSeparators, indsToBeMoved);
|
||||
|
||||
CQ_COUNTS(:,:,i1) = CQ_COUNTS(:,:,i1) - diffCqCounts;
|
||||
SP_COUNTS(:,:,i1) = SP_COUNTS(:,:,i1) - diffSpCounts;
|
||||
|
||||
SUM_CQ_COUNTS(:,i1) = SUM_CQ_COUNTS(:,i1) - diffCqSumCounts;
|
||||
SUM_SP_COUNTS(:,i1) = SUM_SP_COUNTS(:,i1) - diffSpSumCounts;
|
||||
|
||||
LOC_CQ_COUNTS(:,i1) = LOC_CQ_COUNTS(:,i1) - diffLocCqCounts;
|
||||
LOC_SP_COUNTS(:,i1) = LOC_SP_COUNTS(:,i1) - diffLocSpCounts;
|
||||
|
||||
% PARTITION(inds) = -1;
|
||||
updateLogmlTable(i1);
|
||||
|
||||
for i2 = 1:npops
|
||||
if i2 ~= i1
|
||||
CQ_COUNTS(:,:,i2) = CQ_COUNTS(:,:,i2) + diffCqCounts;
|
||||
SP_COUNTS(:,:,i2) = SP_COUNTS(:,:,i2) + diffSpCounts;
|
||||
|
||||
SUM_CQ_COUNTS(:,i2) = SUM_CQ_COUNTS(:,i2) + diffCqSumCounts;
|
||||
SUM_SP_COUNTS(:,i2) = SUM_SP_COUNTS(:,i2) + diffSpSumCounts;
|
||||
|
||||
LOC_CQ_COUNTS(:,i2) = LOC_CQ_COUNTS(:,i2) + diffLocCqCounts;
|
||||
LOC_SP_COUNTS(:,i2) = LOC_SP_COUNTS(:,i2) + diffLocSpCounts;
|
||||
|
||||
% PARTITION(inds) = i2;
|
||||
updateLogmlTable(i2);
|
||||
logml_new = computeTotalLogml();
|
||||
changes(i2) = logml_new - logml;
|
||||
|
||||
CQ_COUNTS(:,:,i2) = CQ_COUNTS(:,:,i2) - diffCqCounts;
|
||||
SUM_CQ_COUNTS(:,i2) = SUM_CQ_COUNTS(:,i2) - diffCqSumCounts;
|
||||
|
||||
SP_COUNTS(:,:,i2) = SP_COUNTS(:,:,i2) - diffSpCounts;
|
||||
SUM_SP_COUNTS(:,i2) = SUM_SP_COUNTS(:,i2) - diffSpSumCounts;
|
||||
|
||||
LOC_CQ_COUNTS(:,i2) = LOC_CQ_COUNTS(:,i2) - diffLocCqCounts;
|
||||
LOC_SP_COUNTS(:,i2) = LOC_SP_COUNTS(:,i2) - diffLocSpCounts;
|
||||
|
||||
% PARTITION(inds) = -1;
|
||||
updateLogmlTable(i2);
|
||||
end
|
||||
end
|
||||
|
||||
CQ_COUNTS(:,:,i1) = CQ_COUNTS(:,:,i1) + diffCqCounts;
|
||||
SUM_CQ_COUNTS(:,i1) = SUM_CQ_COUNTS(:,i1) + diffCqSumCounts;
|
||||
|
||||
SP_COUNTS(:,:,i1) = SP_COUNTS(:,:,i1) + diffSpCounts;
|
||||
SUM_SP_COUNTS(:,i1) = SUM_SP_COUNTS(:,i1) + diffSpSumCounts;
|
||||
|
||||
LOC_CQ_COUNTS(:,i1) = LOC_CQ_COUNTS(:,i1) + diffLocCqCounts;
|
||||
LOC_SP_COUNTS(:,i1) = LOC_SP_COUNTS(:,i1) + diffLocSpCounts;
|
||||
|
||||
% PARTITION(inds) = i1;
|
||||
updateLogmlTable(i1);
|
||||
|
||||
|
||||
%---------------------------------------------------------------------
|
||||
18
matlab/spatial/private/clearGlobalVars.m
Normal file
18
matlab/spatial/private/clearGlobalVars.m
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
function clearGlobalVars
|
||||
% Lu Cheng, 15.12.2012
|
||||
|
||||
global COUNTS; COUNTS = [];
|
||||
global SUMCOUNTS; SUMCOUNTS = [];
|
||||
global PARTITION; PARTITION = [];
|
||||
global LOGML_TABLE; LOGML_TABLE = [];
|
||||
global ADDITION_DIFFERENCE; ADDITION_DIFFERENCE = [];
|
||||
global REMOVAL_DIFFERENCE; REMOVAL_DIFFERENCE = [];
|
||||
global JOIN_DIFFERENCE; JOIN_DIFFERENCE = [];
|
||||
global CQ_COUNTS; CQ_COUNTS = [];
|
||||
global SP_COUNTS; SP_COUNTS = [];
|
||||
global SUM_CQ_COUNTS; SUM_CQ_COUNTS = [];
|
||||
global SUM_SP_COUNTS; SUM_SP_COUNTS = [];
|
||||
global CQ_PRIOR; CQ_PRIOR = [];
|
||||
global SP_PRIOR; SP_PRIOR = [];
|
||||
global LOC_SP_COUNTS; LOC_SP_COUNTS = [];
|
||||
global LOC_CQ_COUNTS; LOC_CQ_COUNTS = [];
|
||||
52
matlab/spatial/private/cluster_own.m
Normal file
52
matlab/spatial/private/cluster_own.m
Normal file
|
|
@ -0,0 +1,52 @@
|
|||
function T = cluster_own(Z,nclust)
|
||||
% search down the dendogram from the root, until nclust clusters are found
|
||||
% comments added by Lu Cheng
|
||||
% 04.01.2011
|
||||
|
||||
maxclust = nclust;
|
||||
% Start of algorithm
|
||||
m = size(Z,1)+1;
|
||||
T = zeros(m,1);
|
||||
|
||||
% maximum number of clusters based on inconsistency
|
||||
if m <= maxclust
|
||||
T = (1:m)';
|
||||
elseif maxclust==1
|
||||
T = ones(m,1);
|
||||
else
|
||||
clsnum = 1;
|
||||
for k = (m-maxclust+1):(m-1)
|
||||
i = Z(k,1); % left tree
|
||||
if i <= m % original node, no leafs
|
||||
T(i) = clsnum;
|
||||
clsnum = clsnum + 1;
|
||||
elseif i < (2*m-maxclust+1) % created before cutoff, search down the tree
|
||||
T = clusternum(Z, T, i-m, clsnum);
|
||||
clsnum = clsnum + 1;
|
||||
end
|
||||
i = Z(k,2); % right tree
|
||||
if i <= m % original node, no leafs
|
||||
T(i) = clsnum;
|
||||
clsnum = clsnum + 1;
|
||||
elseif i < (2*m-maxclust+1) % created before cutoff, search down the tree
|
||||
T = clusternum(Z, T, i-m, clsnum);
|
||||
clsnum = clsnum + 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
function T = clusternum(X, T, k, c)
|
||||
m = size(X,1)+1;
|
||||
while(~isempty(k))
|
||||
% Get the children of nodes at this level
|
||||
children = X(k,1:2);
|
||||
children = children(:);
|
||||
|
||||
% Assign this node number to leaf children
|
||||
t = (children<=m);
|
||||
T(children(t)) = c;
|
||||
|
||||
% Move to next level
|
||||
k = children(~t) - m;
|
||||
end
|
||||
|
||||
27
matlab/spatial/private/computeCounts.m
Normal file
27
matlab/spatial/private/computeCounts.m
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
function [cliqcounts, sepcounts] = computeCounts(cliques, separators, npops)
|
||||
|
||||
global PARTITION;
|
||||
ncliq = size(cliques,1);
|
||||
nsep = size(separators,1);
|
||||
|
||||
cliqPartition = zeros(ncliq, size(cliques,2));
|
||||
sepPartition = zeros(nsep, size(separators, 2));
|
||||
|
||||
apuCliq = find(cliques > 0);
|
||||
apuSep = find(separators > 0);
|
||||
|
||||
cliqPartition(apuCliq) = PARTITION(cliques(apuCliq));
|
||||
sepPartition(apuSep) = PARTITION(separators(apuSep));
|
||||
|
||||
|
||||
cliqcounts = zeros(ncliq, npops);
|
||||
for i = 1:npops
|
||||
cliqcounts(:,i) = sum(cliqPartition == i, 2);
|
||||
end
|
||||
|
||||
sepcounts = zeros(nsep, npops);
|
||||
for i = 1:npops
|
||||
sepcounts(:,i) = sum(sepPartition == i, 2);
|
||||
end
|
||||
|
||||
%-------------------------------------------------------------------------
|
||||
19
matlab/spatial/private/computeDiffInCliqCounts.m
Normal file
19
matlab/spatial/private/computeDiffInCliqCounts.m
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
function diffInCliqCounts = computeDiffInCliqCounts(cliques, inds)
|
||||
% Laskee muutoksen CLIQCOUNTS:ssa (tai SEPCOUNTS:ssa, jos syötteen?
|
||||
% separators) kun yksilöt inds siirretään.
|
||||
% diffInCliqcounts on ncliq*1 taulu, joka on CLIQCOUNTS:n sarakkeesta josta
|
||||
% yksilöt inds siirretään ja lisättäv?sarakkeeseen, johon yksilöt
|
||||
% siirretään.
|
||||
|
||||
% taken from spatial model of Jukka Siren's code
|
||||
% Lu Cheng
|
||||
% 15.12.2012
|
||||
|
||||
ncliq = size(cliques,1);
|
||||
diffInCliqCounts = zeros(ncliq,1);
|
||||
ninds = length(inds);
|
||||
for i = 1:ninds
|
||||
ind = inds(i);
|
||||
rivit = sum((cliques == ind),2);
|
||||
diffInCliqCounts = diffInCliqCounts + rivit;
|
||||
end
|
||||
7
matlab/spatial/private/computeDiffInCounts.m
Normal file
7
matlab/spatial/private/computeDiffInCounts.m
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
function [counts sumcounts] = computeDiffInCounts(rows, data, nLetters)
|
||||
% calculate the counts of the given rows of the data (ninds*nLoci)
|
||||
% nLetters is the maximum number of different symbols over all loci
|
||||
% Lu Cheng, 25.05.2011
|
||||
|
||||
counts = histc(data(rows,:),1:nLetters,1);
|
||||
sumcounts = sum(counts,1)';
|
||||
42
matlab/spatial/private/computeTotalLogml.m
Normal file
42
matlab/spatial/private/computeTotalLogml.m
Normal file
|
|
@ -0,0 +1,42 @@
|
|||
function logml = computeTotalLogml
|
||||
% compute the log marginal likelihood of the data
|
||||
% Lu Cheng, 15.12.2012
|
||||
|
||||
global LOGML_TABLE;
|
||||
global LOC_CQ_COUNTS;
|
||||
global LOC_SP_COUNTS;
|
||||
|
||||
|
||||
notEmpty = any(LOC_CQ_COUNTS,1);
|
||||
npops = length(find(notEmpty == 1));
|
||||
|
||||
% the following codes added by Lu Cheng, 15.12.2012
|
||||
% some lines might all be zero if some sequence is deleted
|
||||
tmpIndsCq = find(any(LOC_CQ_COUNTS,2));
|
||||
tmpIndsSp = find(any(LOC_SP_COUNTS,2));
|
||||
|
||||
locCqCounts = LOC_CQ_COUNTS(tmpIndsCq,notEmpty);
|
||||
locSpCounts = LOC_SP_COUNTS(tmpIndsSp,notEmpty);
|
||||
|
||||
sumcliq=sum(locCqCounts, 2);
|
||||
sumsep=sum(locSpCounts, 2);
|
||||
|
||||
ncliq = length(tmpIndsCq);
|
||||
nsep = length(tmpIndsSp);
|
||||
cliqsizes = sum(locCqCounts, 2)';
|
||||
sepsizes = sum(locSpCounts, 2)';
|
||||
cliqsizes = min([cliqsizes; npops*ones(1,ncliq)])';
|
||||
sepsizes = min([sepsizes; npops*ones(1,nsep)])';
|
||||
|
||||
klikkitn = sum(sum(gammaln(locCqCounts + repmat(1./cliqsizes, [1 npops])))) ...
|
||||
- sum(npops*(gammaln(1./cliqsizes))) ...
|
||||
- sum(gammaln(sumcliq + 1));
|
||||
|
||||
septn = sum(sum(gammaln(locSpCounts + repmat(1./sepsizes, [1 npops])))) ...
|
||||
- sum(npops*(gammaln(1./sepsizes))) ...
|
||||
- sum(gammaln(sumsep + 1));
|
||||
|
||||
spatialPrior = (klikkitn - septn);
|
||||
|
||||
|
||||
logml = sum(LOGML_TABLE) + spatialPrior;
|
||||
17
matlab/spatial/private/initialCounts2.m
Normal file
17
matlab/spatial/private/initialCounts2.m
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
function [sumcounts, counts] = initialCounts2(partition, data, npops, nLetters)
|
||||
% initialize counts and sumcounts for the initial partition
|
||||
% npops: number of populations in the partition
|
||||
% nLetters: the maximum number of different symbols over all loci
|
||||
% Lu Cheng, 25.05.2011
|
||||
|
||||
[nSeq nLoci] = size(data);
|
||||
|
||||
counts = zeros(nLetters,nLoci,npops);
|
||||
sumcounts = zeros(nLoci,npops);
|
||||
|
||||
for i=1:npops
|
||||
inds = (partition==i);
|
||||
counts(:,:,i) = histc(data(inds,:),1:nLetters,1);
|
||||
sumcounts(:,i) = sum(counts(:,:,i),1);
|
||||
end
|
||||
|
||||
366
matlab/spatial/private/model_search_parallel.m
Normal file
366
matlab/spatial/private/model_search_parallel.m
Normal file
|
|
@ -0,0 +1,366 @@
|
|||
function [partition, logml, partitionSummary, logmldiff] = model_search_parallel(c, partition, orig_dist, roundTypes)
|
||||
% This function clusters DNA alignment using "codon" model in Corander and Tang's
|
||||
% paper: Bayesian analysis of population structure based on linked
|
||||
% molecular information (2007), Mathematical Biosciences
|
||||
% c: preprocessed data for the sequence alignment
|
||||
% partition: initial partition of the individuals
|
||||
% origdist: hamming distance between individuals, indexed by
|
||||
% (1,2)(1,3)(14)...(2,3)(2,4).....(3,4)...(n-1,n)
|
||||
% roundTypes: array of operation types
|
||||
|
||||
% Lu Cheng
|
||||
% 15.12.2012
|
||||
|
||||
interactive = false;
|
||||
|
||||
global PARTITION;
|
||||
global CQ_COUNTS;global SUM_CQ_COUNTS;
|
||||
global SP_COUNTS;global SUM_SP_COUNTS;
|
||||
global CQ_PRIOR; global SP_PRIOR;
|
||||
global LOGML_TABLE;
|
||||
global ADDITION_DIFFERENCE;
|
||||
global REMOVAL_DIFFERENCE;
|
||||
global JOIN_DIFFERENCE;
|
||||
|
||||
global LOC_SP_COUNTS;
|
||||
global LOC_CQ_COUNTS;
|
||||
|
||||
clearGlobalVars;
|
||||
|
||||
nPOPS = length(unique(partition));
|
||||
|
||||
% PRIOR VALUES:
|
||||
CQ_PRIOR = c.cqPrior;
|
||||
SP_PRIOR = c.spPrior;
|
||||
|
||||
% Initialize PARTITION, **_COUNTS, SUM_**_COUNTS, alnMat
|
||||
[sumCqCounts, cqCounts] = initialCounts2(partition, c.cqData, nPOPS, c.nMaxCqCodes);
|
||||
[sumSpCounts, spCounts] = initialCounts2(partition, c.spData, nPOPS, c.nMaxSpCodes);
|
||||
|
||||
CQ_COUNTS = cqCounts; SUM_CQ_COUNTS = sumCqCounts;
|
||||
SP_COUNTS = spCounts; SUM_SP_COUNTS = sumSpCounts;
|
||||
|
||||
PARTITION = partition;
|
||||
[cliqcounts, sepcounts] = computeCounts(c.locCliques, c.locSeparators, nPOPS);
|
||||
|
||||
LOC_CQ_COUNTS = cliqcounts;
|
||||
LOC_SP_COUNTS = sepcounts;
|
||||
|
||||
% alnMat = c.alnMat;
|
||||
partitionSummary = -Inf*ones(30,2,nPOPS); % Tiedot 30 parhaasta partitiosta (npops ja logml)
|
||||
partitionSummary(:,1,:) = zeros(30,1,nPOPS);
|
||||
worstLogml = -Inf*ones(1, nPOPS); worstIndex = ones(1, nPOPS);
|
||||
|
||||
clear partition cqCounts sumCqCounts spCounts sumSpCounts
|
||||
|
||||
% Initialize LOGML_TABLE:
|
||||
nINDS = c.nSeq;
|
||||
LOGML_TABLE = zeros(nPOPS,1);
|
||||
updateLogmlTable(1:nPOPS);
|
||||
|
||||
REMOVAL_DIFFERENCE = zeros(nINDS,1);
|
||||
REMOVAL_DIFFERENCE(:,:) = nan;
|
||||
ADDITION_DIFFERENCE = zeros(nINDS,nPOPS);
|
||||
ADDITION_DIFFERENCE(:,:) = nan;
|
||||
JOIN_DIFFERENCE = zeros(nPOPS, nPOPS);
|
||||
JOIN_DIFFERENCE(:,:) = nan;
|
||||
|
||||
% ***********Doc:********************
|
||||
% REMOVAL_DIFFERENCE(ind) tells the change in logml if ind is removed from
|
||||
% its cluster. nan, if the cluster has changed, since the value was last
|
||||
% calculated.
|
||||
%
|
||||
% ADDITION_DIFFERENCE(ind, pop) tells the change in logml if ind is added
|
||||
% to cluster pop. nan, if the cluster has changed since the value was last
|
||||
% calculated. Always nan, if pop is ind's own cluster.
|
||||
%
|
||||
% JOIN_DIFFERENCE(pop1,pop2) = tells the change in logml if pop1 and pop2
|
||||
% are combined. nan, if either cluster has changed since the value was last
|
||||
% calculated.
|
||||
% ***********Doc end*****************
|
||||
|
||||
logml = computeTotalLogml;
|
||||
|
||||
disp('The beginning:');
|
||||
% disp(['Partition: ' num2str(PARTITION')]);
|
||||
disp(['Nclusters: ' num2str(length(unique(PARTITION)))]);
|
||||
disp(['Log(ml*prior): ' num2str(logml)]);
|
||||
disp(' ');
|
||||
|
||||
|
||||
nnotEmptyPops = length(unique(PARTITION));
|
||||
if logml>worstLogml(nnotEmptyPops);
|
||||
[partitionSummary(:,:,nnotEmptyPops), added] = addToSummary(logml, ...
|
||||
partitionSummary(:,:,nnotEmptyPops), worstIndex(nnotEmptyPops));
|
||||
if (added==1)
|
||||
[worstLogml(nnotEmptyPops), worstIndex(nnotEmptyPops)] = ...
|
||||
min(partitionSummary(:,2,nnotEmptyPops));
|
||||
end
|
||||
end
|
||||
|
||||
% START SEARCH OF THE BEST PARTITION:
|
||||
|
||||
vipu = zeros(1,14);
|
||||
if interactive
|
||||
roundTypes = input('Input steps: ');
|
||||
if ischar(roundTypes), roundTypes = str2num(roundTypes); end
|
||||
end
|
||||
ready = 0;
|
||||
|
||||
|
||||
while ready ~= 1
|
||||
|
||||
% disp(['Performing steps: ' num2str(roundTypes)]);
|
||||
|
||||
for n = 1:length(roundTypes)
|
||||
round = roundTypes(n);
|
||||
moveCounter = 0;
|
||||
|
||||
if round==1 && vipu(1)==0 % move an individual to another population
|
||||
|
||||
% inds = randperm(nINDS);
|
||||
inds = getMoveInds(orig_dist,nINDS); % get inds to be moved
|
||||
|
||||
for ind = inds(:)'
|
||||
update_difference_tables(ind, c.cqData, c.nMaxCqCodes, ...
|
||||
c.spData, c.nMaxSpCodes, c.locCliques, c.locSeparators, logml);
|
||||
tmpDiff = REMOVAL_DIFFERENCE(ind) + ADDITION_DIFFERENCE(ind,:);
|
||||
tmpDiff(PARTITION(ind)) = 0;
|
||||
[maxChange, maxIndex] = max(tmpDiff);
|
||||
if maxChange>1e-5
|
||||
updateGlobalVariables(ind, maxIndex, c.cqData, c.nMaxCqCodes, ...
|
||||
c.spData, c.nMaxSpCodes, c.locCliques, c.locSeparators);
|
||||
% fprintf('moving from %d to %d.\n',PARTITION(ind),maxIndex)
|
||||
logml = computeTotalLogml();
|
||||
moveCounter = moveCounter+1;
|
||||
vipu = zeros(1,14);
|
||||
|
||||
nnotEmptyPops = length(unique(PARTITION));
|
||||
if logml>worstLogml(nnotEmptyPops);
|
||||
[partitionSummary(:,:,nnotEmptyPops), added] = addToSummary(logml, ...
|
||||
partitionSummary(:,:,nnotEmptyPops), worstIndex(nnotEmptyPops));
|
||||
if (added==1)
|
||||
[worstLogml(nnotEmptyPops), worstIndex(nnotEmptyPops)] = ...
|
||||
min(partitionSummary(:,2,nnotEmptyPops));
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
if moveCounter==0, vipu(1)=1; end
|
||||
disp(['Step 1: ' num2str(moveCounter) ' individuals were moved.']);
|
||||
|
||||
elseif round==2 && vipu(2)==0 % join two populations
|
||||
|
||||
update_join_difference(c.cqData, c.nMaxCqCodes, ...
|
||||
c.spData, c.nMaxSpCodes, c.locCliques, c.locSeparators, logml);
|
||||
[maxChange, aux] = max(JOIN_DIFFERENCE(:));
|
||||
[i1, i2] = ind2sub([nPOPS,nPOPS],aux);
|
||||
|
||||
if maxChange>1e-5
|
||||
tmpInds = find(PARTITION==i1);
|
||||
updateGlobalVariables(tmpInds, i2, c.cqData, c.nMaxCqCodes, ...
|
||||
c.spData, c.nMaxSpCodes, c.locCliques, c.locSeparators);
|
||||
logml = computeTotalLogml;
|
||||
|
||||
disp(['Step 2: Clusters ' num2str(i1) ' and ' num2str(i2) ' combined.']);
|
||||
vipu = zeros(1,14);
|
||||
|
||||
nnotEmptyPops = length(unique(PARTITION));
|
||||
if logml>worstLogml(nnotEmptyPops);
|
||||
[partitionSummary(:,:,nnotEmptyPops), added] = addToSummary(logml, ...
|
||||
partitionSummary(:,:,nnotEmptyPops), worstIndex(nnotEmptyPops));
|
||||
if (added==1)
|
||||
[worstLogml(nnotEmptyPops), worstIndex(nnotEmptyPops)] = ...
|
||||
min(partitionSummary(:,2,nnotEmptyPops));
|
||||
end
|
||||
end
|
||||
else
|
||||
disp('Step 2: no changes.');
|
||||
vipu(2)=1;
|
||||
end
|
||||
elseif ismember(round, 3:4) && vipu(round)==0 % Split a population, and move one subpopulation to another population
|
||||
|
||||
pops = randperm(nPOPS);
|
||||
|
||||
splitFlags = zeros(nPOPS,1);
|
||||
for pop = pops(:)'
|
||||
|
||||
maxChange = 0;
|
||||
indsToBeMoved = [];
|
||||
|
||||
inds2 = find(PARTITION==pop);
|
||||
ninds2 = length(inds2);
|
||||
if ninds2>4
|
||||
|
||||
if round==3
|
||||
dist3 = getDistance(inds2, orig_dist, nINDS);
|
||||
npops2 = min(20, floor(ninds2 / 5)); %Moneenko osaan jaetaan
|
||||
elseif round==4
|
||||
dist3 = getDistance(inds2, orig_dist, nINDS);
|
||||
npops2 = 2;
|
||||
end
|
||||
|
||||
Z3 = linkage(dist3);
|
||||
T3 = cluster_own(Z3, npops2);
|
||||
|
||||
for i = 1:npops2
|
||||
indsX = inds2(T3==i); indsX = indsX';
|
||||
tmpChanges = calcLogmlChanges(indsX, c.cqData, c.nMaxCqCodes, ...
|
||||
c.spData, c.nMaxSpCodes, c.locCliques, c.locSeparators, logml);
|
||||
[tmpMaxChange, tmpMaxPop] = max(tmpChanges);
|
||||
if tmpMaxChange>maxChange
|
||||
maxChange = tmpMaxChange;
|
||||
% i1 = pop;
|
||||
i2 = tmpMaxPop;
|
||||
indsToBeMoved = indsX;
|
||||
end
|
||||
end
|
||||
if maxChange>1e-5
|
||||
updateGlobalVariables(indsToBeMoved, i2, c.cqData, c.nMaxCqCodes, ...
|
||||
c.spData, c.nMaxSpCodes, c.locCliques, c.locSeparators);
|
||||
logml = computeTotalLogml;
|
||||
splitFlags(pop)=1;
|
||||
|
||||
nnotEmptyPops = length(unique(PARTITION));
|
||||
if logml>worstLogml(nnotEmptyPops);
|
||||
[partitionSummary(:,:,nnotEmptyPops), added] = addToSummary(logml, ...
|
||||
partitionSummary(:,:,nnotEmptyPops), worstIndex(nnotEmptyPops));
|
||||
if (added==1)
|
||||
[worstLogml(nnotEmptyPops), worstIndex(nnotEmptyPops)] = ...
|
||||
min(partitionSummary(:,2,nnotEmptyPops));
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
if any(splitFlags)
|
||||
disp(['Step ' num2str(round) ': ' num2str(sum(splitFlags)) ' populations were split.']);
|
||||
vipu = zeros(1,14);
|
||||
else
|
||||
disp(['Step ' num2str(round) ': no changes.']);
|
||||
vipu(round)=1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
if interactive
|
||||
roundTypes = input('Input extra steps: ');
|
||||
if ischar(roundTypes), roundTypes = str2num(roundTypes); end
|
||||
else
|
||||
roundTypes = [];
|
||||
end
|
||||
|
||||
if isempty(roundTypes)
|
||||
ready = 1;
|
||||
end
|
||||
end
|
||||
|
||||
%disp(' ');
|
||||
disp('BEST PARTITION: ');
|
||||
%disp(['Partition: ' num2str(PARTITION')]);
|
||||
disp(['Nclusters: ' num2str(length(unique(PARTITION)))]);
|
||||
disp(['Log(ml): ' num2str(logml)]);
|
||||
disp(' ');
|
||||
|
||||
nPOPS= rmEmptyPopulation(c.locCliques, c.locSeparators);
|
||||
ADDITION_DIFFERENCE(:) = NaN;
|
||||
REMOVAL_DIFFERENCE(:) = NaN;
|
||||
|
||||
logmldiff = zeros(nINDS,nPOPS); % the change of logml if individual i is moved to group j
|
||||
for i=1:nINDS
|
||||
update_difference_tables(i, c.cqData, c.nMaxCqCodes, ...
|
||||
c.spData, c.nMaxSpCodes, c.locCliques, c.locSeparators, logml);
|
||||
logmldiff(i,:) = REMOVAL_DIFFERENCE(i)+ADDITION_DIFFERENCE(i,:);
|
||||
if all(isnan(logmldiff(i,:)))
|
||||
keyboard
|
||||
end
|
||||
end
|
||||
logmldiff(isnan(logmldiff))=0;
|
||||
partition = PARTITION;
|
||||
|
||||
|
||||
|
||||
|
||||
%----------------------------------------------------------------------------
|
||||
|
||||
|
||||
function [dist2, dind1, dind2] = getDistance(inds2, dist_orig, ninds)
|
||||
% pick out the distrances between samples in "inds2" from "dist_orig"
|
||||
% dist_orig specifies the distances of (1,2),(1,3),(1,4)......(ninds-1,ninds)
|
||||
% Lu Cheng, 22.06.2011
|
||||
|
||||
if ~issorted(inds2)
|
||||
error('inds2 is not in ascending order!');
|
||||
end
|
||||
|
||||
ninds2 = length(inds2);
|
||||
apu = zeros(nchoosek(ninds2,2),2);
|
||||
irow = 1;
|
||||
for i=1:ninds2-1
|
||||
for j=i+1:ninds2
|
||||
apu(irow, 1) = inds2(i);
|
||||
apu(irow, 2) = inds2(j);
|
||||
irow = irow+1;
|
||||
end
|
||||
end
|
||||
|
||||
dind1 = apu(:,1);
|
||||
dind2 = apu(:,2);
|
||||
|
||||
apu = (apu(:,1)-1).*ninds - apu(:,1) ./ 2 .* (apu(:,1)-1) + (apu(:,2)-apu(:,1));
|
||||
dist2 = dist_orig(apu);
|
||||
|
||||
|
||||
%---------------------------------------------------------------
|
||||
|
||||
|
||||
function inds = getMoveInds(dist_orig, ninds)
|
||||
% get individual indexs to be moved to another cluster
|
||||
% we always take the 30% individuals of each cluster which are most distant
|
||||
% to each other
|
||||
% Lu Cheng, 25.05.2011
|
||||
|
||||
global PARTITION;
|
||||
|
||||
pops = unique(PARTITION);
|
||||
inds = [];
|
||||
|
||||
for tmpPop = pops(:)'
|
||||
tmpInds = find(PARTITION == tmpPop)';
|
||||
|
||||
if(length(tmpInds)<20)
|
||||
inds = [inds tmpInds(:)']; %#ok<AGROW>
|
||||
continue;
|
||||
end
|
||||
|
||||
[tmpDist, dind1, dind2] = getDistance(tmpInds,dist_orig,ninds);
|
||||
tmpSDist = sort(tmpDist,'Descend');
|
||||
tmpInds2 = find(tmpDist>tmpSDist(round(length(tmpSDist)*0.3)));
|
||||
tmpInds3 = union(unique(dind1(tmpInds2)), unique(dind2(tmpInds2)));
|
||||
inds = [inds tmpInds3(:)']; %#ok<AGROW>
|
||||
end
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
% ------------------------------------------------------------
|
||||
|
||||
function [partitionSummary, added] = addToSummary(logml, partitionSummary, worstIndex)
|
||||
% Tiedetään, ett?annettu logml on isompi kuin huonoin arvo
|
||||
% partitionSummary taulukossa. Jos partitionSummary:ss?ei viel?ole
|
||||
% annettua logml arvoa, niin lisätään worstIndex:in kohtaan uusi logml ja
|
||||
% nykyist?partitiota vastaava nclusters:in arvo. Muutoin ei tehd?mitään.
|
||||
global PARTITION;
|
||||
|
||||
apu = find(abs(partitionSummary(:,2)-logml)<1e-5);
|
||||
if isempty(apu)
|
||||
% Nyt löydetty partitio ei ole viel?kirjattuna summaryyn.
|
||||
|
||||
npops = length(unique(PARTITION));
|
||||
partitionSummary(worstIndex,1) = npops;
|
||||
partitionSummary(worstIndex,2) = logml;
|
||||
added = 1;
|
||||
else
|
||||
added = 0;
|
||||
end
|
||||
385
matlab/spatial/private/model_search_pregroup.m
Normal file
385
matlab/spatial/private/model_search_pregroup.m
Normal file
|
|
@ -0,0 +1,385 @@
|
|||
function [partition, logml, partitionSummary, logmldiff] = model_search_pregroup(c, pgPart, pgDist, roundTypes, nMaxPops)
|
||||
% This function clusters DNA alignment using "codon" model in Corander and Tang's
|
||||
% paper: Bayesian analysis of population structure based on linked
|
||||
% molecular information (2007), Mathematical Biosciences
|
||||
% c: preprocessed data for the sequence alignment
|
||||
% pgPart: partition which assign sequences to pregroups
|
||||
% pgDist: distances between the pregroups
|
||||
% (1,2)(1,3)(1,4)...(2,3)(2,4).....(3,4)...(n-1,n)
|
||||
% roundTypes: array of operation types
|
||||
|
||||
% Lu Cheng
|
||||
% 21.03.2012
|
||||
|
||||
interactive = false;
|
||||
|
||||
global PARTITION;
|
||||
global CQ_COUNTS;global SUM_CQ_COUNTS;
|
||||
global SP_COUNTS;global SUM_SP_COUNTS;
|
||||
global CQ_PRIOR; global SP_PRIOR;
|
||||
global LOGML_TABLE;
|
||||
global ADDITION_DIFFERENCE;
|
||||
global REMOVAL_DIFFERENCE;
|
||||
global JOIN_DIFFERENCE;
|
||||
|
||||
global LOC_SP_COUNTS;
|
||||
global LOC_CQ_COUNTS;
|
||||
|
||||
clearGlobalVars;
|
||||
|
||||
nINDS = c.nSeq;
|
||||
nPOPS = nMaxPops;
|
||||
|
||||
% load pregroup information
|
||||
nPregroup = length(unique(pgPart));
|
||||
if nPregroup<nMaxPops
|
||||
error('#pregroup: %d, nMaxPops: %d. Number of pregroups should be higher than maximum number of population. \n',nPregroup,nMaxPops);
|
||||
end
|
||||
|
||||
pregroups = cell(nPregroup,1);
|
||||
pgSize = zeros(nPregroup,1);
|
||||
for i=1:nPregroup
|
||||
pregroups{i} = find(pgPart==i);
|
||||
pgSize(i) = length(pregroups{i});
|
||||
end
|
||||
|
||||
pgZ = linkage(pgDist(:)','complete');
|
||||
initPart = cluster(pgZ,'maxclust',nPOPS);
|
||||
partition = zeros(nINDS,1);
|
||||
for i=1:nPregroup
|
||||
partition(pregroups{i}) = initPart(i);
|
||||
end
|
||||
clear i pgZ initPart
|
||||
|
||||
% PRIOR VALUES:
|
||||
CQ_PRIOR = c.cqPrior;
|
||||
SP_PRIOR = c.spPrior;
|
||||
|
||||
% Initialize PARTITION, **_COUNTS, SUM_**_COUNTS, alnMat
|
||||
[sumCqCounts, cqCounts] = initialCounts2(partition, c.cqData, nPOPS, c.nMaxCqCodes);
|
||||
[sumSpCounts, spCounts] = initialCounts2(partition, c.spData, nPOPS, c.nMaxSpCodes);
|
||||
|
||||
CQ_COUNTS = cqCounts; SUM_CQ_COUNTS = sumCqCounts;
|
||||
SP_COUNTS = spCounts; SUM_SP_COUNTS = sumSpCounts;
|
||||
|
||||
PARTITION = partition;
|
||||
[cliqcounts, sepcounts] = computeCounts(c.locCliques, c.locSeparators, nPOPS);
|
||||
LOC_CQ_COUNTS = cliqcounts;
|
||||
LOC_SP_COUNTS = sepcounts;
|
||||
|
||||
partitionSummary = -Inf*ones(30,2,nPOPS); % Tiedot 30 parhaasta partitiosta (npops ja logml)
|
||||
partitionSummary(:,1,:) = zeros(30,1,nPOPS);
|
||||
worstLogml = -Inf*ones(1, nPOPS); worstIndex = ones(1, nPOPS);
|
||||
|
||||
clear partition cqCounts sumCqCounts spCounts sumSpCounts;
|
||||
|
||||
% Initialize LOGML_TABLE:
|
||||
nINDS = c.nSeq;
|
||||
LOGML_TABLE = zeros(nPOPS,1);
|
||||
updateLogmlTable(1:nPOPS);
|
||||
|
||||
REMOVAL_DIFFERENCE = zeros(nINDS,1);
|
||||
REMOVAL_DIFFERENCE(:,:) = nan;
|
||||
ADDITION_DIFFERENCE = zeros(nINDS,nPOPS);
|
||||
ADDITION_DIFFERENCE(:,:) = nan;
|
||||
JOIN_DIFFERENCE = zeros(nPOPS, nPOPS);
|
||||
JOIN_DIFFERENCE(:,:) = nan;
|
||||
|
||||
% ***********Doc:********************
|
||||
% REMOVAL_DIFFERENCE(ind) tells the change in logml if ind is removed from
|
||||
% its cluster. nan, if the cluster has changed, since the value was last
|
||||
% calculated.
|
||||
%
|
||||
% ADDITION_DIFFERENCE(ind, pop) tells the change in logml if ind is added
|
||||
% to cluster pop. nan, if the cluster has changed since the value was last
|
||||
% calculated. Always nan, if pop is ind's own cluster.
|
||||
%
|
||||
% JOIN_DIFFERENCE(pop1,pop2) = tells the change in logml if pop1 and pop2
|
||||
% are combined. nan, if either cluster has changed since the value was last
|
||||
% calculated.
|
||||
% ***********Doc end*****************
|
||||
|
||||
logml = computeTotalLogml;
|
||||
|
||||
disp('The beginning:');
|
||||
% disp(['Partition: ' num2str(PARTITION')]);
|
||||
disp(['Nclusters: ' num2str(length(unique(PARTITION)))]);
|
||||
disp(['Log(ml*prior): ' num2str(logml)]);
|
||||
disp(' ');
|
||||
|
||||
|
||||
nnotEmptyPops = length(unique(PARTITION));
|
||||
if logml>worstLogml(nnotEmptyPops);
|
||||
[partitionSummary(:,:,nnotEmptyPops), added] = addToSummary(logml, ...
|
||||
partitionSummary(:,:,nnotEmptyPops), worstIndex(nnotEmptyPops));
|
||||
if (added==1)
|
||||
[worstLogml(nnotEmptyPops), worstIndex(nnotEmptyPops)] = ...
|
||||
min(partitionSummary(:,2,nnotEmptyPops));
|
||||
end
|
||||
end
|
||||
|
||||
% START SEARCH OF THE BEST PARTITION:
|
||||
|
||||
vipu = zeros(1,14);
|
||||
if interactive
|
||||
roundTypes = input('Input steps: ');
|
||||
if ischar(roundTypes), roundTypes = str2num(roundTypes); end
|
||||
end
|
||||
ready = 0;
|
||||
|
||||
|
||||
while ready ~= 1
|
||||
|
||||
% disp(['Performing steps: ' num2str(roundTypes)]);
|
||||
|
||||
for n = 1:length(roundTypes)
|
||||
round = roundTypes(n);
|
||||
moveCounter = 0;
|
||||
|
||||
if round==1 && vipu(1)==0 % move an individual to another population
|
||||
|
||||
pgInds = getMoveInds(pgPart,pgDist,nPregroup); % get pregroup inds to be moved
|
||||
|
||||
for pgind = pgInds(:)'
|
||||
% inds = cell2mat(pregroups(pgInds));
|
||||
tmpInds = pregroups{pgind};
|
||||
tmpChanges = calcLogmlChanges(tmpInds, c.cqData, c.nMaxCqCodes, ...
|
||||
c.spData, c.nMaxSpCodes, c.locCliques, c.locSeparators, logml);
|
||||
|
||||
[maxChange, maxIndex] = max(tmpChanges);
|
||||
if maxChange>1e-5
|
||||
updateGlobalVariables(tmpInds, maxIndex, c.cqData, c.nMaxCqCodes, ...
|
||||
c.spData, c.nMaxSpCodes,c.locCliques, c.locSeparators);
|
||||
% fprintf('moving from %d to %d.\n',PARTITION(ind),maxIndex)
|
||||
logml = computeTotalLogml();
|
||||
moveCounter = moveCounter+length(pgInds);
|
||||
vipu = zeros(1,14);
|
||||
|
||||
nnotEmptyPops = length(unique(PARTITION));
|
||||
if logml>worstLogml(nnotEmptyPops);
|
||||
[partitionSummary(:,:,nnotEmptyPops), added] = addToSummary(logml, ...
|
||||
partitionSummary(:,:,nnotEmptyPops), worstIndex(nnotEmptyPops));
|
||||
if (added==1)
|
||||
[worstLogml(nnotEmptyPops), worstIndex(nnotEmptyPops)] = ...
|
||||
min(partitionSummary(:,2,nnotEmptyPops));
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
if moveCounter==0, vipu(1)=1; end
|
||||
disp(['Step 1: ' num2str(moveCounter) ' pregroups were moved.']);
|
||||
|
||||
elseif round==2 && vipu(2)==0 % join two populations
|
||||
|
||||
update_join_difference(c.cqData, c.nMaxCqCodes, ...
|
||||
c.spData, c.nMaxSpCodes, c.locCliques, c.locSeparators, logml);
|
||||
[maxChange, aux] = max(JOIN_DIFFERENCE(:));
|
||||
[i1, i2] = ind2sub([nPOPS,nPOPS],aux);
|
||||
|
||||
if maxChange>1e-5
|
||||
tmpInds = find(PARTITION==i1);
|
||||
updateGlobalVariables(tmpInds, i2, c.cqData, c.nMaxCqCodes, ...
|
||||
c.spData, c.nMaxSpCodes, c.locCliques, c.locSeparators);
|
||||
logml = computeTotalLogml;
|
||||
|
||||
disp(['Step 2: Clusters ' num2str(i1) ' and ' num2str(i2) ' combined.']);
|
||||
vipu = zeros(1,14);
|
||||
|
||||
nnotEmptyPops = length(unique(PARTITION));
|
||||
if logml>worstLogml(nnotEmptyPops);
|
||||
[partitionSummary(:,:,nnotEmptyPops), added] = addToSummary(logml, ...
|
||||
partitionSummary(:,:,nnotEmptyPops), worstIndex(nnotEmptyPops));
|
||||
if (added==1)
|
||||
[worstLogml(nnotEmptyPops), worstIndex(nnotEmptyPops)] = ...
|
||||
min(partitionSummary(:,2,nnotEmptyPops));
|
||||
end
|
||||
end
|
||||
else
|
||||
disp('Step 2: no changes.');
|
||||
vipu(2)=1;
|
||||
end
|
||||
elseif ismember(round, 3:4) && vipu(round)==0 % Split a population, and move one subpopulation to another population
|
||||
|
||||
pops = randperm(nPOPS);
|
||||
|
||||
splitFlags = zeros(nPOPS,1);
|
||||
for pop = pops(:)'
|
||||
|
||||
maxChange = 0;
|
||||
indsToBeMoved = [];
|
||||
|
||||
inds2 = find(PARTITION==pop);
|
||||
pgInds2 = unique(pgPart(inds2));
|
||||
nPgInds2 = length(unique(pgPart(inds2)));
|
||||
if nPgInds2>4
|
||||
|
||||
if round==3
|
||||
dist3 = getDistance(pgInds2,pgDist,nPregroup);
|
||||
npops2 = min(20, floor(nPgInds2 / 5));
|
||||
elseif round==4
|
||||
dist3 = getDistance(pgInds2,pgDist,nPregroup);
|
||||
npops2 = 2;
|
||||
end
|
||||
|
||||
Z3 = linkage(dist3(:)','complete');
|
||||
T3 = cluster(Z3, 'maxclust', npops2);
|
||||
|
||||
for i = 1:npops2
|
||||
indsX = pgInds2(T3==i);
|
||||
indsX = cell2mat(pregroups(indsX));
|
||||
tmpChanges = calcLogmlChanges(indsX, c.cqData, c.nMaxCqCodes, ...
|
||||
c.spData, c.nMaxSpCodes, c.locCliques, c.locSeparators, logml);
|
||||
[tmpMaxChange, tmpMaxPop] = max(tmpChanges);
|
||||
if tmpMaxChange>maxChange
|
||||
maxChange = tmpMaxChange;
|
||||
% i1 = pop;
|
||||
i2 = tmpMaxPop;
|
||||
indsToBeMoved = indsX;
|
||||
end
|
||||
end
|
||||
if maxChange>1e-5
|
||||
updateGlobalVariables(indsToBeMoved, i2, c.cqData, c.nMaxCqCodes, ...
|
||||
c.spData, c.nMaxSpCodes, c.locCliques, c.locSeparators);
|
||||
logml = computeTotalLogml;
|
||||
splitFlags(pop)=1;
|
||||
|
||||
nnotEmptyPops = length(unique(PARTITION));
|
||||
if logml>worstLogml(nnotEmptyPops);
|
||||
[partitionSummary(:,:,nnotEmptyPops), added] = addToSummary(logml, ...
|
||||
partitionSummary(:,:,nnotEmptyPops), worstIndex(nnotEmptyPops));
|
||||
if (added==1)
|
||||
[worstLogml(nnotEmptyPops), worstIndex(nnotEmptyPops)] = ...
|
||||
min(partitionSummary(:,2,nnotEmptyPops));
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
if any(splitFlags)
|
||||
disp(['Step ' num2str(round) ': ' num2str(sum(splitFlags)) ' populations were split.']);
|
||||
vipu = zeros(1,14);
|
||||
else
|
||||
disp(['Step ' num2str(round) ': no changes.']);
|
||||
vipu(round)=1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
if interactive
|
||||
roundTypes = input('Input extra steps: ');
|
||||
if ischar(roundTypes), roundTypes = str2num(roundTypes); end
|
||||
else
|
||||
roundTypes = [];
|
||||
end
|
||||
|
||||
if isempty(roundTypes)
|
||||
ready = 1;
|
||||
end
|
||||
end
|
||||
|
||||
% disp(' ');
|
||||
disp('BEST PARTITION: ');
|
||||
% disp(['Partition: ' num2str(PARTITION')]);
|
||||
disp(['Nclusters: ' num2str(length(unique(PARTITION)))]);
|
||||
disp(['Log(ml): ' num2str(logml)]);
|
||||
disp(' ');
|
||||
|
||||
nPOPS = rmEmptyPopulation(c.locCliques, c.locSeparators);
|
||||
|
||||
logmldiff = zeros(nPregroup,nPOPS); % the change of logml if pregroup i is moved to group j
|
||||
for i=1:nPregroup
|
||||
tmpInds = pregroups{i};
|
||||
tmpChanges = calcLogmlChanges(tmpInds, c.cqData, c.nMaxCqCodes, ...
|
||||
c.spData, c.nMaxSpCodes, c.locCliques, c.locSeparators, logml);
|
||||
logmldiff(i,:) = tmpChanges';
|
||||
end
|
||||
logmldiff(isnan(logmldiff))=0;
|
||||
|
||||
partition = zeros(nPregroup,1);
|
||||
for i=1:nPregroup
|
||||
partition(i)=unique(PARTITION(pgPart==i));
|
||||
end
|
||||
|
||||
%----------------------------------------------------------------------------
|
||||
|
||||
|
||||
function [dist2, dind1, dind2] = getDistance(inds2, origDist, ninds)
|
||||
% pick out the distrances between samples in "inds2" from "origDist"
|
||||
% origDist specifies the distances of (1,2),(1,3),(1,4)......(ninds-1,ninds)
|
||||
% Lu Cheng, 22.06.2011
|
||||
|
||||
if ~issorted(inds2)
|
||||
error('inds2 is not in ascending order!');
|
||||
end
|
||||
|
||||
ninds2 = length(inds2);
|
||||
apu = zeros(nchoosek(ninds2,2),2);
|
||||
irow = 1;
|
||||
for i=1:ninds2-1
|
||||
for j=i+1:ninds2
|
||||
apu(irow, 1) = inds2(i);
|
||||
apu(irow, 2) = inds2(j);
|
||||
irow = irow+1;
|
||||
end
|
||||
end
|
||||
|
||||
dind1 = apu(:,1);
|
||||
dind2 = apu(:,2);
|
||||
|
||||
apu = (apu(:,1)-1).*ninds - apu(:,1) ./ 2 .* (apu(:,1)-1) + (apu(:,2)-apu(:,1));
|
||||
dist2 = origDist(apu);
|
||||
|
||||
%---------------------------------------------------------------
|
||||
|
||||
|
||||
function inds = getMoveInds(pgPart, pgDist, nPregroup)
|
||||
% get pregroup indexs to be moved to another cluster
|
||||
% we always take the 35% pregroups of each cluster which are most distant
|
||||
% to each other
|
||||
% Lu Cheng, 22.06.2011
|
||||
|
||||
global PARTITION;
|
||||
|
||||
pops = unique(PARTITION);
|
||||
inds = [];
|
||||
|
||||
for tmpPop = pops(:)'
|
||||
tmpInds = unique(pgPart(PARTITION==tmpPop));
|
||||
|
||||
if(length(tmpInds)<20)
|
||||
inds = [inds tmpInds(:)']; %#ok<AGROW>
|
||||
continue;
|
||||
end
|
||||
|
||||
[tmpDist, dind1, dind2] = getDistance(tmpInds,pgDist,nPregroup);
|
||||
tmpVal = quantile(tmpDist,0.65);
|
||||
tmpInds2 = find(tmpDist>tmpVal);
|
||||
tmpInds3 = union(unique(dind1(tmpInds2)), unique(dind2(tmpInds2)));
|
||||
inds = [inds tmpInds3(:)']; %#ok<AGROW>
|
||||
end
|
||||
|
||||
|
||||
% ------------------------------------------------------------
|
||||
|
||||
function [partitionSummary, added] = addToSummary(logml, partitionSummary, worstIndex)
|
||||
% Tiedetään, ett?annettu logml on isompi kuin huonoin arvo
|
||||
% partitionSummary taulukossa. Jos partitionSummary:ss?ei viel?ole
|
||||
% annettua logml arvoa, niin lisätään worstIndex:in kohtaan uusi logml ja
|
||||
% nykyist?partitiota vastaava nclusters:in arvo. Muutoin ei tehd?mitään.
|
||||
global PARTITION;
|
||||
|
||||
apu = find(abs(partitionSummary(:,2)-logml)<1e-5);
|
||||
if isempty(apu)
|
||||
% Nyt löydetty partitio ei ole viel?kirjattuna summaryyn.
|
||||
|
||||
npops = length(unique(PARTITION));
|
||||
partitionSummary(worstIndex,1) = npops;
|
||||
partitionSummary(worstIndex,2) = logml;
|
||||
added = 1;
|
||||
else
|
||||
added = 0;
|
||||
end
|
||||
|
||||
|
||||
|
||||
|
||||
97
matlab/spatial/private/preprocAln.m
Normal file
97
matlab/spatial/private/preprocAln.m
Normal file
|
|
@ -0,0 +1,97 @@
|
|||
function c = preprocAln(alnMat)
|
||||
% This function preprocess the alignment matrix to cliques and separators
|
||||
% Lu Cheng, 24.05.2011
|
||||
|
||||
[nSeq nLoci] = size(alnMat);
|
||||
|
||||
alnCell = mat2cell(alnMat,nSeq,ones(1,nLoci));
|
||||
|
||||
arrUniqBase = cellfun(@unique,alnCell,'UniformOutput',false); % unique base at each loci
|
||||
arrUniqBaseNum = cellfun(@length,arrUniqBase);
|
||||
|
||||
arrCqNum = arrUniqBaseNum(1:end-2).*arrUniqBaseNum(2:end-1).*arrUniqBaseNum(3:end);
|
||||
arrSpNum = arrUniqBaseNum(2:end-2).*arrUniqBaseNum(3:end-1);
|
||||
|
||||
nMaxCqCodes = max(arrCqNum);
|
||||
nMaxSpCodes = max(arrSpNum);
|
||||
|
||||
cqCodes = cellfun(@myProd,arrUniqBase(1:end-2),arrUniqBase(2:end-1),arrUniqBase(3:end), ...
|
||||
'UniformOutput',false);
|
||||
spCodes = cellfun(@myProd,arrUniqBase(2:end-2),arrUniqBase(3:end-1), ...
|
||||
'UniformOutput',false);
|
||||
|
||||
cqData = zeros(nSeq,length(cqCodes));
|
||||
spData = zeros(nSeq,length(spCodes));
|
||||
|
||||
cqCounts = zeros(nMaxCqCodes,length(cqCodes));
|
||||
spCounts = zeros(nMaxSpCodes,length(spCodes));
|
||||
|
||||
cqPrior = ones(nMaxCqCodes,length(cqCodes));
|
||||
spPrior = ones(nMaxSpCodes,length(spCodes));
|
||||
|
||||
for i=1:nLoci-2
|
||||
|
||||
nCodeTmp = size(cqCodes{i},1);
|
||||
for j=1:nCodeTmp
|
||||
tmpInds = ismember(alnMat(:,i:i+2),cqCodes{i}(j,:),'rows');
|
||||
cqData(tmpInds,i) = j;
|
||||
cqCounts(j,i) = sum(tmpInds);
|
||||
end
|
||||
|
||||
cqPrior(1:nCodeTmp,i) = 1/nCodeTmp;
|
||||
|
||||
if i==1
|
||||
continue;
|
||||
end
|
||||
|
||||
k=i-1;
|
||||
nCodeTmp = size(spCodes{k},1);
|
||||
for j=1:nCodeTmp
|
||||
tmpInds = ismember(alnMat(:,i:i+1),spCodes{k}(j,:),'rows');
|
||||
spData(tmpInds,k) = j;
|
||||
spCounts(j,k) = sum(tmpInds);
|
||||
end
|
||||
|
||||
spPrior(1:nCodeTmp,k) = 1/nCodeTmp;
|
||||
end
|
||||
|
||||
c.nSeq = nSeq;
|
||||
% c.alnMat = alnMat;
|
||||
|
||||
c.arrUniqBase = arrUniqBase;
|
||||
c.arrUniqBaseNum = arrUniqBaseNum;
|
||||
|
||||
c.nMaxCqCodes = nMaxCqCodes;
|
||||
c.nMaxSpCodes = nMaxSpCodes;
|
||||
|
||||
c.cqCodes = cqCodes;
|
||||
c.spCodes = spCodes;
|
||||
|
||||
c.cqData = cqData;
|
||||
c.spData = spData;
|
||||
|
||||
c.cqCounts = cqCounts;
|
||||
c.spCounts = spCounts;
|
||||
|
||||
c.cqPrior = cqPrior;
|
||||
c.spPrior = spPrior;
|
||||
|
||||
|
||||
function y = myProd(varargin)
|
||||
% calculate the cartesian product for the input
|
||||
% Lu Cheng, 24.05.2011
|
||||
|
||||
if nargin==2
|
||||
set1 = varargin{1};
|
||||
set2 = varargin{2};
|
||||
[t1 t2] = meshgrid(set1,set2);
|
||||
y = [t1(:) t2(:)];
|
||||
elseif nargin==3
|
||||
set1 = varargin{1};
|
||||
set2 = varargin{2};
|
||||
set3 = varargin{3};
|
||||
[t1 t2 t3] = meshgrid(set1,set2,set3);
|
||||
y = [t1(:) t2(:) t3(:)];
|
||||
else
|
||||
y = [];
|
||||
end
|
||||
41
matlab/spatial/private/rmEmptyPopulation.m
Normal file
41
matlab/spatial/private/rmEmptyPopulation.m
Normal file
|
|
@ -0,0 +1,41 @@
|
|||
function [npops notEmpty] = rmEmptyPopulation(locCliques,locSeparators)
|
||||
% remove empty populations from CQ_COUNTS and SUM_CQ_COUNTS, SP_COUNTS,
|
||||
% SUM_SP_COUNTS
|
||||
% update PARTITION
|
||||
% Lu Cheng, 15.12.2012
|
||||
|
||||
global CQ_COUNTS; global SUM_CQ_COUNTS;
|
||||
global SP_COUNTS; global SUM_SP_COUNTS;
|
||||
global PARTITION;
|
||||
|
||||
global LOGML_TABLE;
|
||||
global ADDITION_DIFFERENCE;
|
||||
global JOIN_DIFFERENCE;
|
||||
|
||||
global LOC_CQ_COUNTS;
|
||||
global LOC_SP_COUNTS;
|
||||
|
||||
notEmpty = find(any(SUM_CQ_COUNTS,1) & any(SUM_SP_COUNTS,1));
|
||||
|
||||
CQ_COUNTS = CQ_COUNTS(:,:,notEmpty);
|
||||
SP_COUNTS = SP_COUNTS(:,:,notEmpty);
|
||||
|
||||
SUM_CQ_COUNTS = SUM_CQ_COUNTS(:,notEmpty);
|
||||
SUM_SP_COUNTS = SUM_SP_COUNTS(:,notEmpty);
|
||||
|
||||
LOGML_TABLE = LOGML_TABLE(notEmpty);
|
||||
ADDITION_DIFFERENCE = ADDITION_DIFFERENCE(:,notEmpty);
|
||||
JOIN_DIFFERENCE = JOIN_DIFFERENCE(notEmpty,notEmpty);
|
||||
|
||||
for i=1:length(notEmpty)
|
||||
apu = (PARTITION==notEmpty(i));
|
||||
PARTITION(apu)=i;
|
||||
end
|
||||
|
||||
npops = length(notEmpty);
|
||||
|
||||
[cliqcounts, sepcounts] = computeCounts(locCliques, locSeparators, npops);
|
||||
|
||||
LOC_CQ_COUNTS = cliqcounts;
|
||||
LOC_SP_COUNTS = sepcounts;
|
||||
|
||||
61
matlab/spatial/private/updateGlobalVariables.m
Normal file
61
matlab/spatial/private/updateGlobalVariables.m
Normal file
|
|
@ -0,0 +1,61 @@
|
|||
function updateGlobalVariables(inds, i2, cqData, nCqCodes, spData, nSpCodes, locCliques, locSeparators)
|
||||
% this function moves the samples specified by "inds" to cluser i2
|
||||
% then update all the global variables, "inds" are supposed to come from the
|
||||
% same cluster
|
||||
% Lu Cheng, 15.12.2012
|
||||
|
||||
global PARTITION;
|
||||
global CQ_COUNTS; global SUM_CQ_COUNTS;
|
||||
global SP_COUNTS; global SUM_SP_COUNTS;
|
||||
global ADDITION_DIFFERENCE;
|
||||
global REMOVAL_DIFFERENCE;
|
||||
global JOIN_DIFFERENCE;
|
||||
|
||||
global LOC_SP_COUNTS;
|
||||
global LOC_CQ_COUNTS;
|
||||
|
||||
i1 = PARTITION(inds(1));
|
||||
PARTITION(inds)=i2;
|
||||
|
||||
[diffCqCounts diffCqSumCounts]= computeDiffInCounts(inds, cqData, nCqCodes);
|
||||
[diffSpCounts diffSpSumCounts]= computeDiffInCounts(inds, spData, nSpCodes);
|
||||
|
||||
diffLocCqCounts = computeDiffInCliqCounts(locCliques, inds);
|
||||
diffLocSpCounts = computeDiffInCliqCounts(locSeparators, inds);
|
||||
|
||||
CQ_COUNTS(:,:,i1) = CQ_COUNTS(:,:,i1) - diffCqCounts;
|
||||
SP_COUNTS(:,:,i1) = SP_COUNTS(:,:,i1) - diffSpCounts;
|
||||
|
||||
SUM_CQ_COUNTS(:,i1) = SUM_CQ_COUNTS(:,i1) - diffCqSumCounts;
|
||||
SUM_SP_COUNTS(:,i1) = SUM_SP_COUNTS(:,i1) - diffSpSumCounts;
|
||||
|
||||
LOC_CQ_COUNTS(:,i1) = LOC_CQ_COUNTS(:,i1) - diffLocCqCounts;
|
||||
LOC_SP_COUNTS(:,i1) = LOC_SP_COUNTS(:,i1) - diffLocSpCounts;
|
||||
|
||||
CQ_COUNTS(:,:,i2) = CQ_COUNTS(:,:,i2) + diffCqCounts;
|
||||
SP_COUNTS(:,:,i2) = SP_COUNTS(:,:,i2) + diffSpCounts;
|
||||
|
||||
SUM_CQ_COUNTS(:,i2) = SUM_CQ_COUNTS(:,i2) + diffCqSumCounts;
|
||||
SUM_SP_COUNTS(:,i2) = SUM_SP_COUNTS(:,i2) + diffSpSumCounts;
|
||||
|
||||
LOC_CQ_COUNTS(:,i2) = LOC_CQ_COUNTS(:,i2) + diffLocCqCounts;
|
||||
LOC_SP_COUNTS(:,i2) = LOC_SP_COUNTS(:,i2) + diffLocSpCounts;
|
||||
|
||||
updateLogmlTable([i1 i2]);
|
||||
|
||||
REMOVAL_DIFFERENCE(PARTITION==i1) = nan;
|
||||
REMOVAL_DIFFERENCE(PARTITION==i2) = nan;
|
||||
ADDITION_DIFFERENCE(:,[i1 i2]) = nan;
|
||||
|
||||
JOIN_DIFFERENCE(:,i2) = nan;
|
||||
JOIN_DIFFERENCE(i2,:) = nan;
|
||||
|
||||
if ~any(PARTITION==i1)
|
||||
% i1 became empty
|
||||
JOIN_DIFFERENCE(:,i1) = 0;
|
||||
JOIN_DIFFERENCE(i1,:) = 0;
|
||||
JOIN_DIFFERENCE(i1,i1) = nan;
|
||||
else
|
||||
JOIN_DIFFERENCE(:,i1) = nan;
|
||||
JOIN_DIFFERENCE(i1,:) = nan;
|
||||
end
|
||||
37
matlab/spatial/private/updateLogmlTable.m
Normal file
37
matlab/spatial/private/updateLogmlTable.m
Normal file
|
|
@ -0,0 +1,37 @@
|
|||
function updateLogmlTable(pops)
|
||||
% Updates global variables LOGML_TABLE, npops*1 array, logml values for
|
||||
% each population given in "pops"
|
||||
% After the updates, the values are based on the current values of the
|
||||
% global variables CQ_COUNTS, SUM_CQ_COUNTS, SP_COUNTS, SUM_SP_COUNTS
|
||||
% Lu Cheng, 25.05.2011
|
||||
|
||||
global CQ_COUNTS; global SUM_CQ_COUNTS;
|
||||
global SP_COUNTS; global SUM_SP_COUNTS;
|
||||
global CQ_PRIOR; global SP_PRIOR;
|
||||
|
||||
global LOGML_TABLE;
|
||||
|
||||
tmpN = length(pops);
|
||||
tmpCqPrior = repmat(CQ_PRIOR,[1 1 tmpN]);
|
||||
tmpSpPrior = repmat(SP_PRIOR,[1 1 tmpN]);
|
||||
|
||||
term1 = 0-gammaln(1+SUM_CQ_COUNTS(:,pops));
|
||||
term2 = sum(gammaln(tmpCqPrior+CQ_COUNTS(:,:,pops))-gammaln(tmpCqPrior) , 1);
|
||||
if length(pops) > 1
|
||||
term2 = squeeze(term2);
|
||||
else
|
||||
term2 = term2';
|
||||
end
|
||||
|
||||
term3 = 0-gammaln(1+SUM_SP_COUNTS(:,pops));
|
||||
term4 = sum(gammaln(tmpSpPrior+SP_COUNTS(:,:,pops))-gammaln(tmpSpPrior) , 1);
|
||||
|
||||
if length(pops) > 1
|
||||
term4 = squeeze(term4);
|
||||
else
|
||||
term4 = term4';
|
||||
end
|
||||
|
||||
LOGML_TABLE(pops) = sum(term1+term2) - sum(term3+term4);
|
||||
|
||||
%----------------------------------------------------------------------
|
||||
92
matlab/spatial/private/update_difference_tables.m
Normal file
92
matlab/spatial/private/update_difference_tables.m
Normal file
|
|
@ -0,0 +1,92 @@
|
|||
function update_difference_tables(ind, cqData, nCqLetter, ...
|
||||
spData, nSpLetter, locCliques, locSeparators,logml)
|
||||
% update ADDITION_DIFFERENCE and REMOVAL_DIFFERENCE
|
||||
% Lu Cheng, 15.12.2012
|
||||
|
||||
global CQ_COUNTS; global SUM_CQ_COUNTS;
|
||||
global SP_COUNTS; global SUM_SP_COUNTS;
|
||||
global PARTITION;
|
||||
global ADDITION_DIFFERENCE;
|
||||
global REMOVAL_DIFFERENCE;
|
||||
|
||||
global LOC_CQ_COUNTS;
|
||||
global LOC_SP_COUNTS;
|
||||
|
||||
rem_old = REMOVAL_DIFFERENCE;
|
||||
add_old = ADDITION_DIFFERENCE;
|
||||
|
||||
[diffCqCounts diffCqSumCounts] = computeDiffInCounts(ind, cqData, nCqLetter);
|
||||
[diffSpCounts diffSpSumCounts] = computeDiffInCounts(ind, spData, nSpLetter);
|
||||
diffLocCqCounts = computeDiffInCliqCounts(locCliques, ind);
|
||||
diffLocSpCounts = computeDiffInCliqCounts(locSeparators, ind);
|
||||
|
||||
i1 = PARTITION(ind);
|
||||
|
||||
if isnan(rem_old(ind))
|
||||
% Update removal difference for the individual:
|
||||
% note that we did NOT add the removed item to other clusters
|
||||
CQ_COUNTS(:,:,i1) = CQ_COUNTS(:,:,i1) - diffCqCounts;
|
||||
SP_COUNTS(:,:,i1) = SP_COUNTS(:,:,i1) - diffSpCounts;
|
||||
|
||||
SUM_CQ_COUNTS(:,i1) = SUM_CQ_COUNTS(:,i1) - diffCqSumCounts;
|
||||
SUM_SP_COUNTS(:,i1) = SUM_SP_COUNTS(:,i1) - diffSpSumCounts;
|
||||
|
||||
LOC_CQ_COUNTS(:,i1) = LOC_CQ_COUNTS(:,i1) - diffLocCqCounts;
|
||||
LOC_SP_COUNTS(:,i1) = LOC_SP_COUNTS(:,i1) - diffLocSpCounts;
|
||||
|
||||
% PARTITION(ind) = -1;
|
||||
updateLogmlTable(i1);
|
||||
logml_new = computeTotalLogml();
|
||||
rem_old(ind) = logml_new-logml;
|
||||
|
||||
CQ_COUNTS(:,:,i1) = CQ_COUNTS(:,:,i1) + diffCqCounts;
|
||||
SP_COUNTS(:,:,i1) = SP_COUNTS(:,:,i1) + diffSpCounts;
|
||||
|
||||
SUM_CQ_COUNTS(:,i1) = SUM_CQ_COUNTS(:,i1) + diffCqSumCounts;
|
||||
SUM_SP_COUNTS(:,i1) = SUM_SP_COUNTS(:,i1) + diffSpSumCounts;
|
||||
|
||||
LOC_CQ_COUNTS(:,i1) = LOC_CQ_COUNTS(:,i1) + diffLocCqCounts;
|
||||
LOC_SP_COUNTS(:,i1) = LOC_SP_COUNTS(:,i1) + diffLocSpCounts;
|
||||
|
||||
% PARTITION(ind) = i1;
|
||||
updateLogmlTable(i1);
|
||||
end
|
||||
|
||||
new_pops = isnan(add_old(ind,:));
|
||||
new_pops(i1) = 0; % Own cluster needs never be calculated.
|
||||
new_pops = find(new_pops);
|
||||
|
||||
for i2 = new_pops(:)'
|
||||
% Update addition differences for the individual:
|
||||
% note that we did NOT remove the item
|
||||
CQ_COUNTS(:,:,i2) = CQ_COUNTS(:,:,i2) + diffCqCounts;
|
||||
SP_COUNTS(:,:,i2) = SP_COUNTS(:,:,i2) + diffSpCounts;
|
||||
|
||||
SUM_CQ_COUNTS(:,i2) = SUM_CQ_COUNTS(:,i2) + diffCqSumCounts;
|
||||
SUM_SP_COUNTS(:,i2) = SUM_SP_COUNTS(:,i2) + diffSpSumCounts;
|
||||
|
||||
LOC_CQ_COUNTS(:,i2) = LOC_CQ_COUNTS(:,i2) + diffLocCqCounts;
|
||||
LOC_SP_COUNTS(:,i2) = LOC_SP_COUNTS(:,i2) + diffLocSpCounts;
|
||||
|
||||
% PARTITION(ind) = i2;
|
||||
updateLogmlTable(i2);
|
||||
logml_new = computeTotalLogml();
|
||||
add_old(ind,i2) = logml_new - logml;
|
||||
|
||||
CQ_COUNTS(:,:,i2) = CQ_COUNTS(:,:,i2) - diffCqCounts;
|
||||
SP_COUNTS(:,:,i2) = SP_COUNTS(:,:,i2) - diffSpCounts;
|
||||
|
||||
SUM_CQ_COUNTS(:,i2) = SUM_CQ_COUNTS(:,i2) - diffCqSumCounts;
|
||||
SUM_SP_COUNTS(:,i2) = SUM_SP_COUNTS(:,i2) - diffSpSumCounts;
|
||||
|
||||
LOC_CQ_COUNTS(:,i2) = LOC_CQ_COUNTS(:,i2) - diffLocCqCounts;
|
||||
LOC_SP_COUNTS(:,i2) = LOC_SP_COUNTS(:,i2) - diffLocSpCounts;
|
||||
|
||||
% PARTITION(ind) = i1;
|
||||
updateLogmlTable(i2);
|
||||
end
|
||||
|
||||
REMOVAL_DIFFERENCE = rem_old;
|
||||
ADDITION_DIFFERENCE = add_old;
|
||||
|
||||
%---------------------------------------------------------------------
|
||||
83
matlab/spatial/private/update_join_difference.m
Normal file
83
matlab/spatial/private/update_join_difference.m
Normal file
|
|
@ -0,0 +1,83 @@
|
|||
function update_join_difference(cqData, nCqCodes, spData, nSpCodes, locCliques, locSeparators, logml)
|
||||
% update JOIN_DIFFERENCE
|
||||
% Lu Cheng, 15.12.2012
|
||||
|
||||
global CQ_COUNTS; global SUM_CQ_COUNTS;
|
||||
global SP_COUNTS; global SUM_SP_COUNTS;
|
||||
global PARTITION;
|
||||
global JOIN_DIFFERENCE;
|
||||
|
||||
global LOC_CQ_COUNTS;
|
||||
global LOC_SP_COUNTS;
|
||||
|
||||
npops = size(CQ_COUNTS,3);
|
||||
|
||||
for i1 = 1:npops-1
|
||||
indsToBeMoved = find(PARTITION==i1);
|
||||
if isempty(indsToBeMoved)
|
||||
% Cluster i1 is empty
|
||||
JOIN_DIFFERENCE(i1,(i1+1):npops) = 0;
|
||||
JOIN_DIFFERENCE((i1+1):npops,i1) = 0;
|
||||
else
|
||||
[diffCqCounts diffCqSumCounts] = computeDiffInCounts(indsToBeMoved, cqData, nCqCodes);
|
||||
[diffSpCounts diffSpSumCounts] = computeDiffInCounts(indsToBeMoved, spData, nSpCodes);
|
||||
diffLocCqCounts = computeDiffInCliqCounts(locCliques, indsToBeMoved);
|
||||
diffLocSpCounts = computeDiffInCliqCounts(locSeparators, indsToBeMoved);
|
||||
|
||||
unknown_pops = find(isnan(JOIN_DIFFERENCE(i1,(i1+1):end)));
|
||||
unknown_pops = unknown_pops+i1;
|
||||
|
||||
CQ_COUNTS(:,:,i1) = CQ_COUNTS(:,:,i1) - diffCqCounts;
|
||||
SUM_CQ_COUNTS(:,i1) = SUM_CQ_COUNTS(:,i1) - diffCqSumCounts;
|
||||
|
||||
SP_COUNTS(:,:,i1) = SP_COUNTS(:,:,i1) - diffSpCounts;
|
||||
SUM_SP_COUNTS(:,i1) = SUM_SP_COUNTS(:,i1) - diffSpSumCounts;
|
||||
|
||||
LOC_CQ_COUNTS(:,i1) = LOC_CQ_COUNTS(:,i1) - diffLocCqCounts;
|
||||
LOC_SP_COUNTS(:,i1) = LOC_SP_COUNTS(:,i1) - diffLocSpCounts;
|
||||
|
||||
% PARTITION(indsToBeMoved) = -1;
|
||||
updateLogmlTable(i1);
|
||||
|
||||
for i2 = unknown_pops
|
||||
CQ_COUNTS(:,:,i2) = CQ_COUNTS(:,:,i2) + diffCqCounts;
|
||||
SUM_CQ_COUNTS(:,i2) = SUM_CQ_COUNTS(:,i2) + diffCqSumCounts;
|
||||
|
||||
SP_COUNTS(:,:,i2) = SP_COUNTS(:,:,i2) + diffSpCounts;
|
||||
SUM_SP_COUNTS(:,i2) = SUM_SP_COUNTS(:,i2) + diffSpSumCounts;
|
||||
|
||||
LOC_CQ_COUNTS(:,i2) = LOC_CQ_COUNTS(:,i2) + diffLocCqCounts;
|
||||
LOC_SP_COUNTS(:,i2) = LOC_SP_COUNTS(:,i2) + diffLocSpCounts;
|
||||
|
||||
% PARTITION(indsToBeMoved) = i2;
|
||||
updateLogmlTable(i2);
|
||||
logml_new = computeTotalLogml();
|
||||
JOIN_DIFFERENCE(i1,i2) = logml_new-logml;
|
||||
JOIN_DIFFERENCE(i2,i1) = logml_new-logml;
|
||||
|
||||
CQ_COUNTS(:,:,i2) = CQ_COUNTS(:,:,i2) - diffCqCounts;
|
||||
SUM_CQ_COUNTS(:,i2) = SUM_CQ_COUNTS(:,i2) - diffCqSumCounts;
|
||||
|
||||
SP_COUNTS(:,:,i2) = SP_COUNTS(:,:,i2) - diffSpCounts;
|
||||
SUM_SP_COUNTS(:,i2) = SUM_SP_COUNTS(:,i2) - diffSpSumCounts;
|
||||
|
||||
LOC_CQ_COUNTS(:,i2) = LOC_CQ_COUNTS(:,i2) - diffLocCqCounts;
|
||||
LOC_SP_COUNTS(:,i2) = LOC_SP_COUNTS(:,i2) - diffLocSpCounts;
|
||||
|
||||
% PARTITION(indsToBeMoved) = -1;
|
||||
updateLogmlTable(i2);
|
||||
end
|
||||
|
||||
CQ_COUNTS(:,:,i1) = CQ_COUNTS(:,:,i1) + diffCqCounts;
|
||||
SUM_CQ_COUNTS(:,i1) = SUM_CQ_COUNTS(:,i1) + diffCqSumCounts;
|
||||
|
||||
SP_COUNTS(:,:,i1) = SP_COUNTS(:,:,i1) + diffSpCounts;
|
||||
SUM_SP_COUNTS(:,i1) = SUM_SP_COUNTS(:,i1) + diffSpSumCounts;
|
||||
|
||||
LOC_CQ_COUNTS(:,i1) = LOC_CQ_COUNTS(:,i1) + diffLocCqCounts;
|
||||
LOC_SP_COUNTS(:,i1) = LOC_SP_COUNTS(:,i1) + diffLocSpCounts;
|
||||
|
||||
% PARTITION(indsToBeMoved) = i1;
|
||||
updateLogmlTable(i1);
|
||||
end
|
||||
end
|
||||
1512
matlab/spatial/spatialMix.m
Normal file
1512
matlab/spatial/spatialMix.m
Normal file
File diff suppressed because it is too large
Load diff
1621
matlab/spatial/spatialMix_fixK.m
Normal file
1621
matlab/spatial/spatialMix_fixK.m
Normal file
File diff suppressed because it is too large
Load diff
2609
matlab/spatial/spatialMixture.m
Normal file
2609
matlab/spatial/spatialMixture.m
Normal file
File diff suppressed because it is too large
Load diff
2645
matlab/spatial/spatialPopMixture.m
Normal file
2645
matlab/spatial/spatialPopMixture.m
Normal file
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue