Added source Matlab code for reference

This commit is contained in:
Waldir Leoncio 2019-12-16 16:47:21 +01:00
parent b8af977117
commit b5d99903d2
186 changed files with 61405 additions and 1 deletions

View file

@ -0,0 +1,86 @@
function uusiData=addPoints(data)
%Lisää koordinaatipisteiden joukkoon pisteitä, jotta jokainen datapiste
%kuuluisi äärelliseen voronoi soluun voronoi tessellaatiota
%muodostettaessa. Apupisteet lisätään muodostamalla hila
%koordinaattipisteiden päälle ja ottamalla voronoi tessellaatio hilasta. Ne
%hilan pisteet, joita vastaavien solujen sisällä ei ole yhtään
%koordinaattipistettä, jäävät apupisteiksi
x = data(:,1);
y = data(:,2);
xmax = max(x);
xmin = min(x);
ymax = max(y);
ymin = min(y);
npist = size(unique(data, 'rows'),1);
nstep = ceil(npist^0.4) + 7;
xstep = (xmax-xmin)/(nstep-7);
ystep = (ymax-ymin)/(nstep-7);
apuPisteet = zeros(nstep^2,2);
for i=1:nstep
apuPisteet((i-1)*nstep+1 : i*nstep,1) = xmin + (i-4)*xstep;
apuPisteet((i-1)*nstep+1 : i*nstep,2) = ymin + ((1:nstep)-4)*ystep;
end
[V,C] = voronoin(apuPisteet,{'Qt','Qbb','Qc','Qz'});
if 0
figure
hold on
for i=1:length(C)
if isempty(find(C{i} == 1))
X = V(C{i},:);
hull = convhull(X(:,1),X(:,2));
plot(X(hull,1), X(hull,2));
end
end
axis([-2 7 -2 8]);
plot(data(:,1), data(:,2), 'r*');
plot(apuPisteet(:,1), apuPisteet(:,2), 'b+');
hold off
end
empty = zeros(nstep^2,1);
for i = 1:length(C)
if isempty(find(C{i} == 1)) %Tutkitaan vain rajoitetut solut
vx = V(C{i},1);
vy = V(C{i},2);
IN = any(inpolygon(x,y,vx,vy));
if IN == 0
empty(i) = 1;
end
end
end
empty = find(empty == 1);
C = C(empty);
apuPisteet = apuPisteet(empty, :);
if 0
figure
hold on
for i=1:length(C)
if isempty(find(C{i} == 1))
X = V(C{i},:);
hull = convhull(X(:,1),X(:,2));
plot(X(hull,1), X(hull,2));
end
end
plot(data(:,1), data(:,2), 'r*');
plot(apuPisteet(:,1), apuPisteet(:,2), 'b+');
axis([-2 7 -2 8]);
hold off
end
uusiData = [data; apuPisteet];

View file

@ -0,0 +1,509 @@
function [cliques, separators, G] = findCliques(M)
%Muuttaa graafin M kolmioituvaksi ja laskee siitä klikit ja
%separaattorit.
%Hyödynnetään Kevin Murphyn algoritmeja Graph Theory toolboxista.
%Päivitetty 12.8.2005
order=elim_order(M,ones(length(M)));
[G,cliques]=triangulate(M,order);
[jtree,root]=cliques_to_jtree(cliques,ones(length(M)));
ncliq=length(cliques);
separators=cell(ncliq-1,1); %n-solmuisessa puussa n-1 viivaa
jono=zeros(length(ncliq));
jono(1)=root;
i=1;
pointer=2; %Seuraava tyhjä paikka
while ~isempty(find(jono~=0)) %Puun leveyssuuntainen läpikäynti
lapset=find(jtree(jono(i),:)~=0);
jtree(:,jono(i))=0; %Klikki käsitelty
jono(pointer:pointer+length(lapset)-1)=lapset;
for j=1:length(lapset)
ehdokas = myintersect(cliques{jono(i)},cliques{lapset(j)});
kelpaa = 1;
for k = 1:(pointer+j-3)
% Tutkitaan, että separaattoriehdokasta ei vielä käsitelty
if isequal(ehdokas,separators{k})
kelpaa = 0;
end
end
if kelpaa
separators{pointer+j-2} = ehdokas;
end
end
jono(i)=0;
pointer=pointer+length(lapset);
i=i+1;
end
notEmpty=zeros(ncliq-1,1);
for i=1:ncliq-1
if ~isempty(separators{i})
notEmpty(i)=1;
end
end
notEmpty=find(notEmpty==1);
separators=separators(notEmpty);
%--------------------------------------------------------------------------
%--------------------------------------------------------------------------
function order = elim_order(G, node_sizes)
% BEST_FIRST_ELIM_ORDER Greedily search for an optimal elimination order.
% order = best_first_elim_order(moral_graph, node_sizes)
%
% Find an order in which to eliminate nodes from the graph in such a way as to try and minimize the
% weight of the resulting triangulated graph. The weight of a graph is the sum of the weights of each
% of its cliques; the weight of a clique is the product of the weights of each of its members; the
% weight of a node is the number of values it can take on.
%
% Since this is an NP-hard problem, we use the following greedy heuristic:
% at each step, eliminate that node which will result in the addition of the least
% number of fill-in edges, breaking ties by choosing the node that induces the lighest clique.
% For details, see
% - Kjaerulff, "Triangulation of graphs -- algorithms giving small total state space",
% Univ. Aalborg tech report, 1990 (www.cs.auc.dk/~uk)
% - C. Huang and A. Darwiche, "Inference in Belief Networks: A procedural guide",
% Intl. J. Approx. Reasoning, 11, 1994
%
% Warning: This code is pretty old and could probably be made faster.
n = length(G);
%if nargin < 3, stage = { 1:n }; end % no constraints
% For long DBNs, it may be useful to eliminate all the nodes in slice t before slice t+1.
% This will ensure that the jtree has a repeating structure (at least away from both edges).
% This is why we have stages.
% See the discussion of splicing jtrees on p68 of
% Geoff Zweig's PhD thesis, Dept. Comp. Sci., UC Berkeley, 1998.
% This constraint can increase the clique size significantly.
MG = G; % copy the original graph
uneliminated = ones(1,n);
order = zeros(1,n);
%t = 1; % Counts which time slice we are on
for i=1:n
U = find(uneliminated);
%valid = myintersect(U, stage{t});
valid = U;
% Choose the best node from the set of valid candidates
min_fill = zeros(1,length(valid));
min_weight = zeros(1,length(valid));
for j=1:length(valid)
k = valid(j);
nbrs = myintersect(neighbors(G, k), U);
l = length(nbrs);
M = MG(nbrs,nbrs);
min_fill(j) = l^2 - sum(M(:)); % num. added edges
min_weight(j) = prod(node_sizes([k nbrs])); % weight of clique
end
lightest_nbrs = find(min_weight==min(min_weight));
% break ties using min-fill heuristic
best_nbr_ndx = argmin(min_fill(lightest_nbrs));
j = lightest_nbrs(best_nbr_ndx); % we will eliminate the j'th element of valid
%j1s = find(score1==min(score1));
%j = j1s(argmin(score2(j1s)));
k = valid(j);
uneliminated(k) = 0;
order(i) = k;
ns = myintersect(neighbors(G, k), U);
if ~isempty(ns)
G(ns,ns) = 1;
G = setdiag(G,0);
end
%if ~any(logical(uneliminated(stage{t}))) % are we allowed to the next slice?
% t = t + 1;
%end
end
%--------------------------------------------------------------------------
function [G, cliques, fill_ins] = triangulate(G, order)
% TRIANGULATE Ensure G is triangulated (chordal), i.e., every cycle of length > 3 has a chord.
% [G, cliques, fill_ins, cliques_containing_node] = triangulate(G, order)
%
% cliques{i} is the i'th maximal complete subgraph of the triangulated graph.
% fill_ins(i,j) = 1 iff we add a fill-in arc between i and j.
%
% To find the maximal cliques, we save each induced cluster (created by adding connecting
% neighbors) that is not a subset of any previously saved cluster. (A cluster is a complete,
% but not necessarily maximal, set of nodes.)
MG = G;
n = length(G);
eliminated = zeros(1,n);
cliques = {};
for i=1:n
u = order(i);
U = find(~eliminated); % uneliminated
nodes = myintersect(neighbors(G,u), U); % look up neighbors in the partially filled-in graph
nodes = myunion(nodes, u); % the clique will always contain at least u
G(nodes,nodes) = 1; % make them all connected to each other
G = setdiag(G,0);
eliminated(u) = 1;
exclude = 0;
for c=1:length(cliques)
if mysubset(nodes,cliques{c}) % not maximal
exclude = 1;
break;
end
end
if ~exclude
cnum = length(cliques)+1;
cliques{cnum} = nodes;
end
end
%fill_ins = sparse(triu(max(0, G - MG), 1));
fill_ins=1;
%--------------------------------------------------------------------------
function [jtree, root, B, w] = cliques_to_jtree(cliques, ns)
% MK_JTREE Make an optimal junction tree.
% [jtree, root, B, w] = mk_jtree(cliques, ns)
%
% A junction tree is a tree that satisfies the jtree property, which says:
% for each pair of cliques U,V with intersection S, all cliques on the path between U and V
% contain S. (This ensures that local propagation leads to global consistency.)
%
% We can create a junction tree by computing the maximal spanning tree of the junction graph.
% (The junction graph connects all cliques, and the weight of an edge (i,j) is
% |C(i) intersect C(j)|, where C(i) is the i'th clique.)
%
% The best jtree is the maximal spanning tree which minimizes the sum of the costs on each edge,
% where cost(i,j) = w(C(i)) + w(C(j)), and w(C) is the weight of clique C,
% which is the total number of values C can take on.
%
% For details, see
% - Jensen and Jensen, "Optimal Junction Trees", UAI 94.
%
% Input:
% cliques{i} = nodes in clique i
% ns(i) = number of values node i can take on
% Output:
% jtree(i,j) = 1 iff cliques i and j aer connected
% root = the clique that should be used as root
% B(i,j) = 1 iff node j occurs in clique i
% w(i) = weight of clique i
num_cliques = length(cliques);
w = zeros(num_cliques, 1);
B = sparse(num_cliques, 1);
for i=1:num_cliques
B(i, cliques{i}) = 1;
w(i) = prod(ns(cliques{i}));
end
% C1(i,j) = length(intersect(cliques{i}, cliques{j}));
% The length of the intersection of two sets is the dot product of their bit vector representation.
C1 = B*B';
C1 = setdiag(C1, 0);
% C2(i,j) = w(i) + w(j)
num_cliques = length(w);
W = repmat(w, 1, num_cliques);
C2 = W + W';
C2 = setdiag(C2, 0);
jtree = sparse(minimum_spanning_tree(-C1, C2)); % Using -C1 gives *maximum* spanning tree
% The root is arbitrary, but since the first pass is towards the root,
% we would like this to correspond to going forward in time in a DBN.
root = num_cliques;
%--------------------------------------------------------------------------
function C = myintersect(A,B)
% MYINTERSECT Intersection of two sets of positive integers (much faster than built-in intersect)
% C = myintersect(A,B)
A = A(:)'; B = B(:)';
if isempty(A)
ma = 0;
else
ma = max(A);
end
if isempty(B)
mb = 0;
else
mb = max(B);
end
if ma==0 | mb==0
C = [];
else
%bits = sparse(1, max(ma,mb));
bits = zeros(1, max(ma,mb));
bits(A) = 1;
C = B(logical(bits(B)));
end
%sum( bitget( bitand( cliquesb(i), cliquesb(j) ), 1:52 ) );
%--------------------------------------------------------------------------
function ns = neighbors(adj_mat, i)
% NEIGHBORS Find the parents and children of a node in a graph.
% ns = neighbors(adj_mat, i)
%ns = myunion(children(adj_mat, i), parents(adj_mat, i));
ns = find(adj_mat(i,:));
%--------------------------------------------------------------------------
function C = myunion(A,B)
% MYUNION Union of two sets of positive integers (much faster than built-in union)
% C = myunion(A,B)
if isempty(A)
ma = 0;
else
ma = max(A);
end
if isempty(B)
mb = 0;
else
mb = max(B);
end
if ma==0 & mb==0
C = [];
elseif ma==0 & mb>0
C = B;
elseif ma>0 & mb==0
C = A;
else
%bits = sparse(1, max(ma,mb));
bits = zeros(1, max(ma,mb));
bits(A) = 1;
bits(B) = 1;
C = find(bits);
end
%--------------------------------------------------------------------------
function ps = parents(adj_mat, i)
% PARENTS Return the list of parents of node i
% ps = parents(adj_mat, i)
ps = find(adj_mat(:,i))';
%--------------------------------------------------------------------------
function cs = children(adj_mat, i, t)
% CHILDREN Return the indices of a node's children in sorted order
% c = children(adj_mat, i, t)
%
% t is an optional argument: if present, dag is assumed to be a 2-slice DBN
if nargin < 3
cs = find(adj_mat(i,:));
else
if t==1
cs = find(adj_mat(i,:));
else
ss = length(adj_mat)/2;
j = i+ss;
cs = find(adj_mat(j,:)) + (t-2)*ss;
end
end
%--------------------------------------------------------------------------
function p=mysubset(small,large)
% MYSUBSET Is the small set of +ve integers a subset of the large set?
% p = mysubset(small, large)
% Surprisingly, this is not built-in.
if isempty(small)
p = 1; % isempty(large);
else
p = length(myintersect(small,large)) == length(small);
end
%--------------------------------------------------------------------------
function A = minimum_spanning_tree(C1, C2)
%
% Find the minimum spanning tree using Prim's algorithm.
% C1(i,j) is the primary cost of connecting i to j.
% C2(i,j) is the (optional) secondary cost of connecting i to j, used to break ties.
% We assume that absent edges have 0 cost.
% To find the maximum spanning tree, used -1*C.
% See Aho, Hopcroft & Ullman 1983, "Data structures and algorithms", p 237.
% Prim's is O(V^2). Kruskal's algorithm is O(E log E) and hence is more efficient
% for sparse graphs, but is implemented in terms of a priority queue.
% We partition the nodes into those in U and those not in U.
% closest(i) is the vertex in U that is closest to i in V-U.
% lowcost(i) is the cost of the edge (i, closest(i)), or infinity is i has been used.
% In Aho, they say C(i,j) should be "some appropriate large value" if the edge is missing.
% We set it to infinity.
% However, since lowcost is initialized from C, we must distinguish absent edges from used nodes.
n = length(C1);
if nargin==1, C2 = zeros(n); end
A = zeros(n);
closest = ones(1,n);
used = zeros(1,n); % contains the members of U
used(1) = 1; % start with node 1
C1(find(C1==0))=inf;
C2(find(C2==0))=inf;
lowcost1 = C1(1,:);
lowcost2 = C2(1,:);
for i=2:n
ks = find(lowcost1==min(lowcost1));
k = ks(argmin(lowcost2(ks)));
A(k, closest(k)) = 1;
A(closest(k), k) = 1;
lowcost1(k) = inf;
lowcost2(k) = inf;
used(k) = 1;
NU = find(used==0);
for ji=1:length(NU)
for j=NU(ji)
if C1(k,j) < lowcost1(j)
lowcost1(j) = C1(k,j);
lowcost2(j) = C2(k,j);
closest(j) = k;
end
end
end
end
%--------------------------------------------------------------------------
function indices = argmin(v)
% ARGMIN Return as a subscript vector the location of the smallest element of a multidimensional array v.
% indices = argmin(v)
%
% Returns the first minimum in the case of ties.
% Example:
% X = [2 8 4; 7 3 9];
% argmin(X) = [1 1], i.e., row 1 column 1
[m i] = min(v(:));
indices = ind2subv(mysize(v), i);
%--------------------------------------------------------------------------
function M = setdiag(M, v)
% SETDIAG Set the diagonal of a matrix to a specified scalar/vector.
% M = set_diag(M, v)
n = length(M);
if length(v)==1
v = repmat(v, 1, n);
end
% e.g., for 3x3 matrix, elements are numbered
% 1 4 7
% 2 5 8
% 3 6 9
% so diagnoal = [1 5 9]
J = 1:n+1:n^2;
M(J) = v;
%-------------------------------------------------------------------------
function sz = mysize(M)
% MYSIZE Like the built-in size, except it returns n if M is a vector of length n, and 1 if M is a scalar.
% sz = mysize(M)
%
% The behavior is best explained by examples
% - M = rand(1,1), mysize(M) = 1, size(M) = [1 1]
% - M = rand(2,1), mysize(M) = 2, size(M) = [2 1]
% - M = rand(1,2), mysize(M) = 2, size(M) = [1 2]
% - M = rand(2,2,1), mysize(M) = [2 2], size(M) = [2 2]
% - M = rand(1,2,1), mysize(M) = 2, size(M) = [1 2]
if myisvector(M)
sz = length(M);
else
sz = size(M);
end
%--------------------------------------------------------------------------
function sub = ind2subv(siz, ndx)
% IND2SUBV Like the built-in ind2sub, but returns the answer as a row vector.
% sub = ind2subv(siz, ndx)
%
% siz and ndx can be row or column vectors.
% sub will be of size length(ndx) * length(siz).
%
% Example
% ind2subv([2 2 2], 1:8) returns
% [1 1 1
% 2 1 1
% ...
% 2 2 2]
% That is, the leftmost digit toggle fastest.
%
% See also SUBV2IND
n = length(siz);
if n==0
sub = ndx;
return;
end
if all(siz==2)
sub = dec2bitv(ndx-1, n);
sub = sub(:,n:-1:1)+1;
return;
end
cp = [1 cumprod(siz(:)')];
ndx = ndx(:) - 1;
sub = zeros(length(ndx), n);
for i = n:-1:1 % i'th digit
sub(:,i) = floor(ndx/cp(i))+1;
ndx = rem(ndx,cp(i));
end
%%%%%%%%%%
function bits = dec2bitv(d,n)
% DEC2BITV Convert a decimal integer to a bit vector.
% bits = dec2bitv(d,n) is just like the built-in dec2bin, except the answer is a vector, not a string.
% n is an optional minimum length on the bit vector.
% If d is a vector, each row of the output array will be a bit vector.
if (nargin<2)
n=1; % Need at least one digit even for 0.
end
d = d(:);
[f,e]=log2(max(d)); % How many digits do we need to represent the numbers?
bits=rem(floor(d*pow2(1-max(n,e):0)),2);
%------------------------------------------------------------------------
function r = myisvector(V)
%Kuten isvector(V)
A = size(V);
r = (length(A) == 2) & (min(A) == 1);

View file

@ -0,0 +1,307 @@
function handleIndiFastaCase(cc,dist,Z)
% specicially written to handle FASTA file format of individual clustering
% Lu Cheng, 15.12.2012
OUTPUT_FILE = 'baps6_output.baps';
teksti = 'Input upper bound to the number of populations (only one value): ';
npopstextExtra = inputdlg(teksti ,'Input maximum number of populations',1,{'20'});
if isempty(npopstextExtra) % Painettu Cancel:ia
return
else
nMaxPops = str2num(npopstextExtra{1});
nMaxPops = nMaxPops(1);
end
initPart = cluster_own(Z,nMaxPops);
roundTypes = [2*ones(1,nMaxPops) ...
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ...
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ...
3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 ...
3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 1 1 1 1 ...
1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 ...
1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 ...
1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4];
[partition, logml, partitionSummary, logmldiff] = model_search_parallel(cc, initPart, dist, roundTypes);
cc.PARTITION = partition;
cc.npops = length(unique(partition));
cc.logml = logml;
cc.partitionSummary = partitionSummary;
cc.logmldiff = logmldiff;
if cc.npops==nMaxPops
choice = questdlg(sprintf('%d populations discovered, which is the same as input. We suggest you to set a larger number. Do you want to quit?', cc.npops),...
'Yes''No','Yes');
if strcmp(choice,'Yes')
return
end
end
writeMixtureInfo(cc);
popnames = cc.popnames;
pointers = cc.pointers;
vorPoints = cc.vorPoints;
vorCells = cc.vorCells;
coordinates = cc.coordinates;
heds = cc.heds;
viewMixPartition(partition, popnames);
if isequal(popnames, [])
names = pointers;
else
names = cell(size(pointers));
indices = zeros(size(popnames(:,2)));
for i=1:length(popnames(:,2));
indices(i) = popnames{i,2};
end
for i = 1:length(pointers)
inds = pointers{i};
namesInCell = [];
for j = 1:length(inds)
ind = inds(j);
I = find(indices > ind);
if isempty(I)
nameIndex = length(indices);
else
nameIndex = min(I) -1;
end
name = popnames{nameIndex};
namesInCell = [namesInCell name];
end
names{i} = namesInCell;
end
end
vorPlot(vorPoints, vorCells, partition, pointers, coordinates, names);
talle = questdlg(['Do you want to save the mixture populations ' ...
'so that you can use them later in admixture analysis or plot ' ...
'additional images?'], ...
'Save results?','Yes','No','Yes');
if isequal(talle,'Yes')
%%waitALittle; % Hetki odotusta, jotta muistaa kysy?..
[filename, pathname] = uiputfile('*.mat','Save results as');
if (filename == 0) & (pathname == 0)
% Cancel was pressed
return
else % copy 'baps4_output.baps' into the text file with the same name.
if exist(OUTPUT_FILE,'file')
copyfile(OUTPUT_FILE,[pathname filename '.txt'])
delete(OUTPUT_FILE)
end
end
% added by Lu Cheng, 05.12.2012
tmpFile = [pathname filename '.mapfile.txt'];
fid = fopen(tmpFile,'w+');
fprintf(fid,'Name\tLatitude\tLongitude\tDescription\tLabel\n');
if exist('heds','var')
for i=1:length(heds)
fprintf(fid,'%s\t%.10f\t%.10f\t%s_%d\t%d\n',heds{i},coordinates(i,1),coordinates(i,2),...
heds{i},partition(i),partition(i));
end
else
for i=1:ninds
fprintf(fid,'%d\t%.10f\t%.10f\t%d_%d\t%d\n',i,coordinates(i,1),coordinates(i,2),...
i,partition(i),partition(i));
end
end
fclose(fid);
% save([pathname filename], 'c');
format_type = 'FASTA';
save([pathname filename], 'cc','dist','Z','format_type','-v7.3');
else
if exist(OUTPUT_FILE,'file')
delete(OUTPUT_FILE)
end
end
%%%%%%%%%%%%%
function writeMixtureInfo(c)
outputFile = 'baps6_output.baps';
% output the semi-supervised clustering results to the outputFile
% modified by Lu Cheng, 28.03.2010
ninds = length(c.PARTITION);
npops = c.npops;
popnames = c.popnames;
logml = c.logml;
partition = c.PARTITION;
partitionSummary = c.partitionSummary;
if isempty(popnames)
popnames = c.heds;
end
if ~isempty(outputFile)
fid = fopen(outputFile,'w+');
else
fid = -1;
%diary('baps5_semi_output.baps'); % save in text anyway.
end
dispLine;
disp('RESULTS OF INDIVIDUAL LEVEL MIXTURE ANALYSIS:');
disp(['Number of clustered individuals: ' ownNum2Str(ninds)]);
disp(['Number of groups in optimal partition: ' ownNum2Str(npops)]);
disp(['Log(marginal likelihood) of optimal partition: ' ownNum2Str(logml)]);
disp(' ');
if (fid ~= -1)
fprintf(fid,'%10s\n', ['RESULTS OF INDIVIDUAL LEVEL MIXTURE ANALYSIS:']);
fprintf(fid,'%20s\n', ['Number of clustered individuals: ' ownNum2Str(ninds)]);
fprintf(fid,'%20s\n', ['Number of groups in optimal partition: ' ownNum2Str(npops)]);
fprintf(fid,'%20s\n\n', ['Log(marginal likelihood) of optimal partition: ' ownNum2Str(logml)]);
end
disp('Best Partition: ');
if (fid ~= -1)
fprintf(fid,'%s \n','Best Partition: ');
end
for m=1:npops
indsInM = find(partition==m);
if isempty(indsInM)
continue;
end
length_of_beginning = 11 + floor(log10(m));
cluster_size = length(indsInM);
text = ['Cluster ' num2str(m) ': {' char(popnames{indsInM(1)})];
for k = 2:cluster_size
text = [text ', ' char(popnames{indsInM(k)})];
end;
text = [text '}'];
while length(text)>58
%Take one line and display it.
new_line = takeLine(text,58);
text = text(length(new_line)+1:end);
disp(new_line);
if (fid ~= -1)
fprintf(fid,'%s \n',new_line);
end
if length(text)>0
text = [blanks(length_of_beginning) text];
else
text = [];
end;
end;
if ~isempty(text)
disp(text);
if (fid ~= -1)
fprintf(fid,'%s \n',text);
end
end;
end
names = true;
logmldiff = c.logmldiff;
if npops == 1
logmldiff = [];
else
disp(' ');
disp(' ');
disp('Changes in log(marginal likelihood) if indvidual i is moved to cluster j:');
if (fid ~= -1)
fprintf(fid, '%s \n', ' '); fprintf(fid, '\n');
fprintf(fid, '%s \n', 'Changes in log(marginal likelihood) if indvidual i is moved to cluster j:'); fprintf(fid, '\n');
end
text = sprintf('%10s','ind');
for ii = 1:npops
tmpstr = sprintf('\t%10s',num2str(ii));
text = [text tmpstr];
end
disp(text);
if (fid ~= -1)
fprintf(fid, '%s \n', text);
end
for ii = 1:ninds
text = sprintf('%10s',popnames{ii});
for jj = 1:npops
tmpstr = sprintf('\t%10s',num2str(logmldiff(ii,jj),'%10.6f'));
text = [text tmpstr];
end
if ii<100
disp(text);
elseif ii==101
disp('.......................................');
disp('..........see output file..............');
end
if (fid ~= -1)
fprintf(fid, '%s \n', text);
end
text = [];
end
end
disp(' ');
disp(' ');
disp('List of sizes of 10 best visited partitions and corresponding log(ml) values');
if (fid ~= -1)
fprintf(fid, '%s \n\n', ' ');
fprintf(fid, '%s \n', 'List of sizes of 10 best visited partitions and corresponding log(ml) values'); fprintf(fid, '\n');
end
partitionSummaryKaikki = partitionSummary;
partitionSummary =[];
for i=1:size(partitionSummaryKaikki,3)
partitionSummary = [partitionSummary; partitionSummaryKaikki(:,:,i)];
end
% [I,J] = find(partitionSummaryKaikki(:,2,:)>-1e49);
% partitionSummaryKaikki = partitionSummaryKaikki(I,:,:);
partitionSummary = sortrows(partitionSummary,2);
partitionSummary = partitionSummary(size(partitionSummary,1):-1:1 , :);
partitionSummary = partitionSummary(logical(partitionSummary(:,2)>-1e49),:);
if size(partitionSummary,1)>10
vikaPartitio = 10;
else
vikaPartitio = size(partitionSummary,1);
end
for part = 1:vikaPartitio
line = [num2str(partitionSummary(part,1),'%20d') ' ' num2str(partitionSummary(part,2),'%20.6f')];
disp(line);
if (fid ~= -1)
fprintf(fid, '%s \n', line);
end
end
if (fid ~= -1)
fclose(fid);
else
diary off
end
%%%%%%%%%%
%--------------------------------------------------------------
function newline = takeLine(description,width)
%Returns one line from the description: line ends to the first
%space after width:th mark.
% newLine = description(1:width);
n = width+1;
while ~isspace(description(n)) && n<length(description)
n = n+1;
end;
newline = description(1:n);

View file

@ -0,0 +1,302 @@
function handlePopFastaCase(cc,pgPart,pgDist)
% specicially written to handle FASTA file format of individual clustering
% Lu Cheng, 15.12.2012
OUTPUT_FILE = 'baps6_output.baps';
teksti = 'Input upper bound to the number of populations (only one value): ';
npopstextExtra = inputdlg(teksti ,'Input maximum number of populations',1,{'20'});
if isempty(npopstextExtra) % Painettu Cancel:ia
return
else
nMaxPops = str2num(npopstextExtra{1});
nMaxPops = nMaxPops(1);
end
nPregroup = length(unique(pgPart));
roundTypes = [2*ones(1,nMaxPops) ...
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ...
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ...
3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 ...
3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 1 1 1 1 ...
1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 ...
1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 ...
1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4];
[partition, logml, partitionSummary, logmldiff] = model_search_pregroup(cc, pgPart, pgDist, roundTypes, nMaxPops);
cc.PARTITION = partition; %note that the partition only contain nPregroup elements
cc.npops = length(unique(partition));
cc.logml = logml;
cc.partitionSummary = partitionSummary;
cc.logmldiff = logmldiff;
if cc.npops==nMaxPops
choice = questdlg(sprintf('%d populations discovered, which is the same as input. We suggest you to set a larger number. Do you want to quit?', cc.npops),...
'Yes''No','Yes');
if strcmp(choice,'Yes')
return
end
end
writeMixtureInfo(cc);
popnames = cc.popnames;
pointers = cc.pointers;
vorPoints = cc.vorPoints;
vorCells = cc.vorCells;
coordinates = cc.coordinates;
if isequal(popnames, [])
names = pointers;
else
names = cell(size(pointers));
indices = zeros(size(popnames(:,2)));
for i=1:length(popnames(:,2));
indices(i) = popnames{i,2};
end
for i = 1:length(pointers)
inds = pointers{i};
namesInCell = [];
for j = 1:length(inds)
ind = inds(j);
I = find(indices > ind);
if isempty(I)
nameIndex = length(indices);
else
nameIndex = min(I) -1;
end
name = popnames{nameIndex};
namesInCell = [namesInCell name];
end
names{i} = namesInCell;
end
end
viewMixPartition(partition, popnames);
vorPlot(vorPoints, vorCells, partition, pointers, coordinates, names);
talle = questdlg(['Do you want to save the mixture populations ' ...
'so that you can use them later in admixture analysis or plot ' ...
'additional images?'], ...
'Save results?','Yes','No','Yes');
if isequal(talle,'Yes')
%%waitALittle; % Hetki odotusta, jotta muistaa kysy?..
[filename, pathname] = uiputfile('*.mat','Save results as');
if (filename == 0) & (pathname == 0)
% Cancel was pressed
return
else % copy 'baps4_output.baps' into the text file with the same name.
if exist(OUTPUT_FILE,'file')
copyfile(OUTPUT_FILE,[pathname filename '.txt'])
delete(OUTPUT_FILE)
end
end
% added by Lu Cheng, 05.12.2012
tmpFile = [pathname filename '.mapfile.txt'];
fid = fopen(tmpFile,'w+');
fprintf(fid,'GroupLabel\tLatitude\tLongitude\tDescription\tLabel\n');
for i=1:nPregroup
fprintf(fid,'%d\t%.10f\t%.10f\t%d_%d\t%d\n',i,coordinates(i,1),coordinates(i,2),...
i,partition(i),partition(i));
end
fclose(fid);
% save([pathname filename], 'c');
format_type = 'FASTA';
save([pathname filename], 'cc','partition','pgDist','pgPart','format_type','-v7.3');
else
if exist(OUTPUT_FILE,'file')
delete(OUTPUT_FILE)
end
end
%%%%%%%%%%%%%
function writeMixtureInfo(c)
outputFile = 'baps6_output.baps';
% output the semi-supervised clustering results to the outputFile
% modified by Lu Cheng, 28.03.2010
ninds = length(c.PARTITION);
npops = c.npops;
popnames = c.popnames;
logml = c.logml;
partition = c.PARTITION;
partitionSummary = c.partitionSummary;
if isempty(popnames)
popnames = cell(c.nPregroup,1);
for i=1:c.nPregroup
popnames{i} = num2str(i);
end
end
if ~isempty(outputFile)
fid = fopen(outputFile,'w+');
else
fid = -1;
%diary('baps5_semi_output.baps'); % save in text anyway.
end
dispLine;
disp('RESULTS OF INDIVIDUAL LEVEL MIXTURE ANALYSIS:');
disp(['Number of clustered individuals: ' ownNum2Str(ninds)]);
disp(['Number of groups in optimal partition: ' ownNum2Str(npops)]);
disp(['Log(marginal likelihood) of optimal partition: ' ownNum2Str(logml)]);
disp(' ');
if (fid ~= -1)
fprintf(fid,'%10s\n', ['RESULTS OF INDIVIDUAL LEVEL MIXTURE ANALYSIS:']);
fprintf(fid,'%20s\n', ['Number of clustered individuals: ' ownNum2Str(ninds)]);
fprintf(fid,'%20s\n', ['Number of groups in optimal partition: ' ownNum2Str(npops)]);
fprintf(fid,'%20s\n\n', ['Log(marginal likelihood) of optimal partition: ' ownNum2Str(logml)]);
end
disp('Best Partition: ');
if (fid ~= -1)
fprintf(fid,'%s \n','Best Partition: ');
end
for m=1:npops
indsInM = unique(c.groupPartition(partition==m));
if isempty(indsInM)
continue;
end
length_of_beginning = 11 + floor(log10(m));
cluster_size = length(indsInM);
text = ['Cluster ' num2str(m) ': {' char(popnames{indsInM(1)})];
for k = 2:cluster_size
text = [text ', ' char(popnames{indsInM(k)})];
end;
text = [text '}'];
while length(text)>58
%Take one line and display it.
new_line = takeLine(text,58);
text = text(length(new_line)+1:end);
disp(new_line);
if (fid ~= -1)
fprintf(fid,'%s \n',new_line);
end
if length(text)>0
text = [blanks(length_of_beginning) text];
else
text = [];
end;
end;
if ~isempty(text)
disp(text);
if (fid ~= -1)
fprintf(fid,'%s \n',text);
end
end;
end
names = true;
logmldiff = c.logmldiff;
if npops == 1
logmldiff = [];
else
disp(' ');
disp(' ');
disp('Changes in log(marginal likelihood) if pregroup i is moved to cluster j:');
if (fid ~= -1)
fprintf(fid, '%s \n', ' '); fprintf(fid, '\n');
fprintf(fid, '%s \n', 'Changes in log(marginal likelihood) if indvidual i is moved to cluster j:'); fprintf(fid, '\n');
end
text = sprintf('%10s','ind');
for ii = 1:npops
tmpstr = sprintf('\t%10s',num2str(ii));
text = [text tmpstr];
end
disp(text);
if (fid ~= -1)
fprintf(fid, '%s \n', text);
end
for ii = 1:c.nPregroup
text = sprintf('%10s',popnames{ii});
for jj = 1:npops
tmpstr = sprintf('\t%10s',num2str(logmldiff(ii,jj),'%10.6f'));
text = [text tmpstr];
end
if ii<100
disp(text);
elseif ii==101
disp('.......................................');
disp('..........see output file..............');
end
if (fid ~= -1)
fprintf(fid, '%s \n', text);
end
text = [];
end
end
disp(' ');
disp(' ');
disp('List of sizes of 10 best visited partitions and corresponding log(ml) values');
if (fid ~= -1)
fprintf(fid, '%s \n\n', ' ');
fprintf(fid, '%s \n', 'List of sizes of 10 best visited partitions and corresponding log(ml) values'); fprintf(fid, '\n');
end
partitionSummaryKaikki = partitionSummary;
partitionSummary =[];
for i=1:size(partitionSummaryKaikki,3)
partitionSummary = [partitionSummary; partitionSummaryKaikki(:,:,i)];
end
% [I,J] = find(partitionSummaryKaikki(:,2,:)>-1e49);
% partitionSummaryKaikki = partitionSummaryKaikki(I,:,:);
partitionSummary = sortrows(partitionSummary,2);
partitionSummary = partitionSummary(size(partitionSummary,1):-1:1 , :);
partitionSummary = partitionSummary(logical(partitionSummary(:,2)>-1e49),:);
if size(partitionSummary,1)>10
vikaPartitio = 10;
else
vikaPartitio = size(partitionSummary,1);
end
for part = 1:vikaPartitio
line = [num2str(partitionSummary(part,1),'%20d') ' ' num2str(partitionSummary(part,2),'%20.6f')];
disp(line);
if (fid ~= -1)
fprintf(fid, '%s \n', line);
end
end
if (fid ~= -1)
fclose(fid);
else
diary off
end
%%%%%%%%%%
%--------------------------------------------------------------
function newline = takeLine(description,width)
%Returns one line from the description: line ends to the first
%space after width:th mark.
% newLine = description(1:width);
n = width+1;
while ~isspace(description(n)) && n<length(description)
n = n+1;
end;
newline = description(1:n);

View file

@ -0,0 +1,718 @@
function [partition, counts, sumcounts] = initSpatialMixture(initData, ...
npops, Z, rowsFromInd, noalle, dist, adjprior, priorTerm);
% Etsii spatial mixturelle alkutilan baps 3.1:n ahneella algoritmilla.
global PARTITION_IN; global COUNTS_IN;
global SUMCOUNTS_IN; global POP_LOGML_IN;
data = initData(:,1:end-1);
initialPartition = admixture_initialization(initData, npops, Z);
[sumcounts, counts, logml] = ...
initialCounts(initialPartition, data, npops, rowsFromInd, noalle);
PARTITION_IN = initialPartition(1:rowsFromInd:end);
COUNTS_IN = counts; SUMCOUNTS_IN = sumcounts;
partition = PARTITION_IN;
return
POP_LOGML_IN = computePopulationLogml(1:npops, adjprior, priorTerm);
clear initialPartition; clear counts; clear sumcounts;
% PARHAAN MIXTURE-PARTITION_IN ETSIMINEN
roundTypes = [1 1]; %Ykkösvaiheen sykli kahteen kertaan.
ready = 0; vaihe = 1;
ninds = size(data,1)/rowsFromInd;
while ready ~= 1
muutoksia = 0;
for n = 1:length(roundTypes)
round = roundTypes(n);
kivaluku=0;
if round==0 | round==1 %Yksilön siirtäminen toiseen populaatioon.
inds = 1:ninds;
aputaulu = [inds' rand(ninds,1)];
aputaulu = sortrows(aputaulu,2);
inds = aputaulu(:,1)';
muutosNyt = 0;
for ind = inds
i1 = PARTITION_IN(ind);
[muutokset, diffInCounts] = laskeMuutokset(ind, rowsFromInd, ...
data, adjprior, priorTerm);
if round==1, [maxMuutos, i2] = max(muutokset); end
if (i1~=i2 & maxMuutos>1e-5)
% Tapahtui muutos
muutoksia = 1;
kivaluku = kivaluku+1;
updateGlobalVariables(ind, i2, rowsFromInd, diffInCounts,...
adjprior, priorTerm);
logml = logml+maxMuutos;
end
end
elseif round==2 %Populaation yhdistäminen toiseen.
maxMuutos = 0;
for pop = 1:npops
[muutokset, diffInCounts] = laskeMuutokset2(pop, rowsFromInd, ...
data, adjprior, priorTerm);
[isoin, indeksi] = max(muutokset);
if isoin>maxMuutos
maxMuutos = isoin;
i1 = pop;
i2 = indeksi;
diffInCountsBest = diffInCounts;
end
end
if maxMuutos>1e-5
muutoksia = 1;
updateGlobalVariables2(i1,i2,rowsFromInd, diffInCountsBest, ...
adjprior, priorTerm);
logml = logml + maxMuutos;
end
elseif round==3 | round==4 %Populaation jakaminen osiin.
maxMuutos = 0;
ninds = size(data,1)/rowsFromInd;
for pop = 1:npops
inds2 = find(PARTITION_IN==pop);
ninds2 = length(inds2);
if ninds2>5
dist2 = laskeOsaDist(inds2, dist, ninds);
Z2 = linkage(dist2');
if round==3
npops2 = min(20, floor(ninds2 / 5)); %Moneenko osaan jaetaan
elseif round==4
npops2 = 2;
end
T2 = cluster_own(Z2, npops2);
muutokset = laskeMuutokset3(T2, inds2, rowsFromInd, data, ...
adjprior, priorTerm, pop);
[isoin, indeksi] = max(muutokset(1:end));
if isoin>maxMuutos
maxMuutos = isoin;
muuttuvaPop2 = rem(indeksi,npops2);
if muuttuvaPop2==0, muuttuvaPop2 = npops2; end
muuttuvat = inds2(find(T2==muuttuvaPop2));
i2 = ceil(indeksi/npops2);
end
end
end
if maxMuutos>1e-5
muutoksia = 1;
rows = computeRows(rowsFromInd, muuttuvat, length(muuttuvat));
diffInCounts = computeDiffInCounts(rows, size(COUNTS_IN,1), ...
size(COUNTS_IN,2), data);
i1 = PARTITION_IN(muuttuvat(1));
updateGlobalVariables3(muuttuvat, rowsFromInd, diffInCounts, ...
adjprior, priorTerm, i2);
logml = logml + maxMuutos;
end
elseif round == 5 | round == 6
pop=0;
muutettu = 0;
poplogml = POP_LOGML_IN;
partition = PARTITION_IN;
counts = COUNTS_IN;
sumcounts = SUMCOUNTS_IN;
while (pop < npops & muutettu == 0)
pop = pop+1;
totalMuutos = 0;
inds = find(PARTITION_IN==pop);
if round == 5
aputaulu = [inds rand(length(inds),1)];
aputaulu = sortrows(aputaulu,2);
inds = aputaulu(:,1)';
elseif round == 6
inds = returnInOrder(inds, pop, rowsFromInd, data, adjprior, priorTerm);
end
i=0;
while (length(inds)>0 & i<length(inds))
i = i+1;
ind = inds(i);
[muutokset, diffInCounts] = laskeMuutokset(ind, rowsFromInd, ...
data, adjprior, priorTerm);
muutokset(pop) = -1e50; % Varmasti ei suurin!!!
[maxMuutos, i2] = max(muutokset);
updateGlobalVariables(ind, i2, rowsFromInd, diffInCounts,...
adjprior, priorTerm);
totalMuutos = totalMuutos+maxMuutos;
logml = logml+maxMuutos;
if round == 6
% Lopetetaan heti kun muutos on positiivinen.
if totalMuutos > 1e-5
i=length(inds);
end
end
end
if totalMuutos>1e-5
muutettu=1;
muutoksia = 1; % Ulompi kirjanpito.
else
% Missään vaiheessa tila ei parantunut.
% Perutaan kaikki muutokset.
PARTITION_IN = partition;
SUMCOUNTS_IN = sumcounts;
POP_LOGML_IN = poplogml;
COUNTS_IN = counts;
logml = logml - totalMuutos;
end
end
clear partition; clear sumcounts; clear counts; clear poplogml;
end
end
if muutoksia == 0
if vaihe==1
vaihe = 2;
elseif vaihe==2
vaihe = 3;
elseif vaihe==3
vaihe = 4;
elseif vaihe==4;
vaihe = 5;
elseif vaihe==5
ready = 1;
end
else
muutoksia = 0;
end
if ready==0
if vaihe==1
roundTypes=[1];
elseif vaihe==2
roundTypes = [2];
elseif vaihe==3
roundTypes=[5];
elseif vaihe==4
roundTypes=[4 3 1];
elseif vaihe
roundTypes=[6 2 3 4 1];
end
end
end
partition = PARTITION_IN;
counts = COUNTS_IN;
sumcounts = SUMCOUNTS_IN;
%-------------------------------------------------------------------------------------
function [sumcounts, counts, logml] = ...
initialCounts(partition, data, npops, rowsFromInd, noalle)
nloci=size(data,2);
ninds = size(data,1)/rowsFromInd;
counts = zeros(max(noalle),nloci,npops);
sumcounts = zeros(npops,nloci);
for i=1:npops
for j=1:nloci
havainnotLokuksessa = find(partition==i & data(:,j)>=0);
sumcounts(i,j) = length(havainnotLokuksessa);
for k=1:noalle(j)
alleleCode = k;
N_ijk = length(find(data(havainnotLokuksessa,j)==alleleCode));
counts(k,j,i) = N_ijk;
end
end
end
initializeGammaln(ninds, rowsFromInd, max(noalle));
logml = computeLogml(counts, sumcounts, noalle, data, rowsFromInd);
%-----------------------------------------------------------------------
function logml=computeLogml(counts, sumcounts, noalle, data, rowsFromInd)
nloci = size(counts,2);
npops = size(counts,3);
adjnoalle = zeros(max(noalle),nloci);
for j=1:nloci
adjnoalle(1:noalle(j),j)=noalle(j);
if (noalle(j)<max(noalle))
adjnoalle(noalle(j)+1:end,j)=1;
end
end
%logml2 = sum(sum(sum(gammaln(counts+repmat(adjprior,[1 1 npops]))))) ...
% - npops*sum(sum(gammaln(adjprior))) - ...
% sum(sum(gammaln(1+sumcounts)));
%logml = logml2;
global GAMMA_LN;
rowsInG = size(data,1)+rowsFromInd;
logml = sum(sum(sum(GAMMA_LN(counts+1 + repmat(rowsInG*(adjnoalle-1),[1 1 npops]))))) ...
- npops*sum(sum(GAMMA_LN(1, adjnoalle))) ...
-sum(sum(GAMMA_LN(sumcounts+1,1)));
%--------------------------------------------------------------------------
function initializeGammaln(ninds, rowsFromInd, maxAlleles)
%Alustaa GAMMALN muuttujan s.e. GAMMALN(i,j)=gammaln((i-1) + 1/j)
global GAMMA_LN;
GAMMA_LN = zeros((1+ninds)*rowsFromInd, maxAlleles);
for i=1:(ninds+1)*rowsFromInd
for j=1:maxAlleles
GAMMA_LN(i,j)=gammaln((i-1) + 1/j);
end
end
%--------------------------------------------------------------------------
%Seuraavat kolme funktiota liittyvat alkupartition muodostamiseen.
function initial_partition=admixture_initialization(data_matrix,nclusters,Z)
size_data=size(data_matrix);
nloci=size_data(2)-1;
n=max(data_matrix(:,end));
T=cluster_own(Z,nclusters);
initial_partition=zeros(size_data(1),1);
for i=1:n
kori=T(i);
here=find(data_matrix(:,end)==i);
for j=1:length(here)
initial_partition(here(j),1)=kori;
end
end
function T = cluster_own(Z,nclust)
true=logical(1);
false=logical(0);
maxclust = nclust;
% Start of algorithm
m = size(Z,1)+1;
T = zeros(m,1);
% maximum number of clusters based on inconsistency
if m <= maxclust
T = (1:m)';
elseif maxclust==1
T = ones(m,1);
else
clsnum = 1;
for k = (m-maxclust+1):(m-1)
i = Z(k,1); % left tree
if i <= m % original node, no leafs
T(i) = clsnum;
clsnum = clsnum + 1;
elseif i < (2*m-maxclust+1) % created before cutoff, search down the tree
T = clusternum(Z, T, i-m, clsnum);
clsnum = clsnum + 1;
end
i = Z(k,2); % right tree
if i <= m % original node, no leafs
T(i) = clsnum;
clsnum = clsnum + 1;
elseif i < (2*m-maxclust+1) % created before cutoff, search down the tree
T = clusternum(Z, T, i-m, clsnum);
clsnum = clsnum + 1;
end
end
end
function T = clusternum(X, T, k, c)
m = size(X,1)+1;
while(~isempty(k))
% Get the children of nodes at this level
children = X(k,1:2);
children = children(:);
% Assign this node number to leaf children
t = (children<=m);
T(children(t)) = c;
% Move to next level
k = children(~t) - m;
end
%--------------------------------------------------------------------------
function rows = computeRows(rowsFromInd, inds, ninds)
% On annettu yksilöt inds. Funktio palauttaa vektorin, joka
% sisältää niiden rivien numerot, jotka sisältävät yksilöiden
% dataa.
rows = inds(:, ones(1,rowsFromInd));
rows = rows*rowsFromInd;
miinus = repmat(rowsFromInd-1 : -1 : 0, [ninds 1]);
rows = rows - miinus;
rows = reshape(rows', [1,rowsFromInd*ninds]);
%-------------------------------------------------------------------------------------
function updateGlobalVariables(ind, i2, rowsFromInd, diffInCounts, ...
adjprior, priorTerm)
% Suorittaa globaalien muuttujien muutokset, kun yksilö ind
% on siirretään koriin i2.
global PARTITION_IN;
global COUNTS_IN;
global SUMCOUNTS_IN;
global POP_LOGML_IN;
i1 = PARTITION_IN(ind);
PARTITION_IN(ind)=i2;
COUNTS_IN(:,:,i1) = COUNTS_IN(:,:,i1) - diffInCounts;
COUNTS_IN(:,:,i2) = COUNTS_IN(:,:,i2) + diffInCounts;
SUMCOUNTS_IN(i1,:) = SUMCOUNTS_IN(i1,:) - sum(diffInCounts);
SUMCOUNTS_IN(i2,:) = SUMCOUNTS_IN(i2,:) + sum(diffInCounts);
POP_LOGML_IN([i1 i2]) = computePopulationLogml([i1 i2], adjprior, priorTerm);
%---------------------------------------------------------------------------------
function updateGlobalVariables2( ...
i1, i2, rowsFromInd, diffInCounts, adjprior, priorTerm);
% Suorittaa globaalien muuttujien muutokset, kun kaikki
% korissa i1 olevat yksilöt siirretään koriin i2.
global PARTITION_IN;
global COUNTS_IN;
global SUMCOUNTS_IN;
global POP_LOGML_IN;
inds = find(PARTITION_IN==i1);
PARTITION_IN(inds) = i2;
COUNTS_IN(:,:,i1) = COUNTS_IN(:,:,i1) - diffInCounts;
COUNTS_IN(:,:,i2) = COUNTS_IN(:,:,i2) + diffInCounts;
SUMCOUNTS_IN(i1,:) = SUMCOUNTS_IN(i1,:) - sum(diffInCounts);
SUMCOUNTS_IN(i2,:) = SUMCOUNTS_IN(i2,:) + sum(diffInCounts);
POP_LOGML_IN(i1) = 0;
POP_LOGML_IN(i2) = computePopulationLogml(i2, adjprior, priorTerm);
%------------------------------------------------------------------------------------
function updateGlobalVariables3(muuttuvat, rowsFromInd, diffInCounts, ...
adjprior, priorTerm, i2);
% Suorittaa globaalien muuttujien päivitykset, kun yksilöt 'muuttuvat'
% siirretään koriin i2. Ennen siirtoa yksilöiden on kuuluttava samaan
% koriin.
global PARTITION_IN;
global COUNTS_IN;
global SUMCOUNTS_IN;
global POP_LOGML_IN;
i1 = PARTITION_IN(muuttuvat(1));
PARTITION_IN(muuttuvat) = i2;
COUNTS_IN(:,:,i1) = COUNTS_IN(:,:,i1) - diffInCounts;
COUNTS_IN(:,:,i2) = COUNTS_IN(:,:,i2) + diffInCounts;
SUMCOUNTS_IN(i1,:) = SUMCOUNTS_IN(i1,:) - sum(diffInCounts);
SUMCOUNTS_IN(i2,:) = SUMCOUNTS_IN(i2,:) + sum(diffInCounts);
POP_LOGML_IN([i1 i2]) = computePopulationLogml([i1 i2], adjprior, priorTerm);
%----------------------------------------------------------------------
function inds = returnInOrder(inds, pop, rowsFromInd, data, adjprior, priorTerm)
% Palauttaa yksilöt järjestyksessä siten, että ensimmäisenä on
% se, jonka poistaminen populaatiosta pop nostaisi logml:n
% arvoa eniten.
global COUNTS_IN; global SUMCOUNTS_IN;
ninds = length(inds);
apuTaulu = [inds, zeros(ninds,1)];
for i=1:ninds
ind = inds(i);
rows = (ind-1)*rowsFromInd+1 : ind*rowsFromInd;
diffInCounts = computeDiffInCounts(rows, size(COUNTS_IN,1), size(COUNTS_IN,2), data);
diffInSumCounts = sum(diffInCounts);
COUNTS_IN(:,:,pop) = COUNTS_IN(:,:,pop)-diffInCounts;
SUMCOUNTS_IN(pop,:) = SUMCOUNTS_IN(pop,:)-diffInSumCounts;
apuTaulu(i, 2) = computePopulationLogml(pop, adjprior, priorTerm);
COUNTS_IN(:,:,pop) = COUNTS_IN(:,:,pop)+diffInCounts;
SUMCOUNTS_IN(pop,:) = SUMCOUNTS_IN(pop,:)+diffInSumCounts;
end
apuTaulu = sortrows(apuTaulu,2);
inds = apuTaulu(ninds:-1:1,1);
%------------------------------------------------------------------------------------
function [muutokset, diffInCounts] = ...
laskeMuutokset(ind, rowsFromInd, data, adjprior, priorTerm)
% Palauttaa npops*1 taulun, jossa i:s alkio kertoo, mikä olisi
% muutos logml:ssä, mikäli yksilö ind siirretään koriin i.
% diffInCounts on poistettava COUNTS_IN:in siivusta i1 ja lisättävä
% COUNTS_IN:in siivuun i2, mikäli muutos toteutetaan.
global COUNTS_IN; global SUMCOUNTS_IN;
global PARTITION_IN; global POP_LOGML_IN;
npops = size(COUNTS_IN,3);
muutokset = zeros(npops,1);
i1 = PARTITION_IN(ind);
i1_logml = POP_LOGML_IN(i1);
rows = (ind-1)*rowsFromInd+1 : ind*rowsFromInd;
diffInCounts = computeDiffInCounts(rows, size(COUNTS_IN,1), size(COUNTS_IN,2), data);
diffInSumCounts = sum(diffInCounts);
COUNTS_IN(:,:,i1) = COUNTS_IN(:,:,i1)-diffInCounts;
SUMCOUNTS_IN(i1,:) = SUMCOUNTS_IN(i1,:)-diffInSumCounts;
new_i1_logml = computePopulationLogml(i1, adjprior, priorTerm);
COUNTS_IN(:,:,i1) = COUNTS_IN(:,:,i1)+diffInCounts;
SUMCOUNTS_IN(i1,:) = SUMCOUNTS_IN(i1,:)+diffInSumCounts;
i2 = [1:i1-1 , i1+1:npops];
i2_logml = POP_LOGML_IN(i2);
COUNTS_IN(:,:,i2) = COUNTS_IN(:,:,i2)+repmat(diffInCounts, [1 1 npops-1]);
SUMCOUNTS_IN(i2,:) = SUMCOUNTS_IN(i2,:)+repmat(diffInSumCounts,[npops-1 1]);
new_i2_logml = computePopulationLogml(i2, adjprior, priorTerm);
COUNTS_IN(:,:,i2) = COUNTS_IN(:,:,i2)-repmat(diffInCounts, [1 1 npops-1]);
SUMCOUNTS_IN(i2,:) = SUMCOUNTS_IN(i2,:)-repmat(diffInSumCounts,[npops-1 1]);
muutokset(i2) = new_i1_logml - i1_logml ...
+ new_i2_logml - i2_logml;
%------------------------------------------------------------------------------------
function [muutokset, diffInCounts] = laskeMuutokset2( ...
i1, rowsFromInd, data, adjprior, priorTerm);
% Palauttaa npops*1 taulun, jossa i:s alkio kertoo, mikä olisi
% muutos logml:ssä, mikäli korin i1 kaikki yksilöt siirretään
% koriin i.
global COUNTS_IN; global SUMCOUNTS_IN;
global PARTITION_IN; global POP_LOGML_IN;
npops = size(COUNTS_IN,3);
muutokset = zeros(npops,1);
i1_logml = POP_LOGML_IN(i1);
inds = find(PARTITION_IN==i1);
ninds = length(inds);
if ninds==0
diffInCounts = zeros(size(COUNTS_IN,1), size(COUNTS_IN,2));
return;
end
rows = computeRows(rowsFromInd, inds, ninds);
diffInCounts = computeDiffInCounts(rows, size(COUNTS_IN,1), size(COUNTS_IN,2), data);
diffInSumCounts = sum(diffInCounts);
COUNTS_IN(:,:,i1) = COUNTS_IN(:,:,i1)-diffInCounts;
SUMCOUNTS_IN(i1,:) = SUMCOUNTS_IN(i1,:)-diffInSumCounts;
new_i1_logml = computePopulationLogml(i1, adjprior, priorTerm);
COUNTS_IN(:,:,i1) = COUNTS_IN(:,:,i1)+diffInCounts;
SUMCOUNTS_IN(i1,:) = SUMCOUNTS_IN(i1,:)+diffInSumCounts;
i2 = [1:i1-1 , i1+1:npops];
i2_logml = POP_LOGML_IN(i2);
COUNTS_IN(:,:,i2) = COUNTS_IN(:,:,i2)+repmat(diffInCounts, [1 1 npops-1]);
SUMCOUNTS_IN(i2,:) = SUMCOUNTS_IN(i2,:)+repmat(diffInSumCounts,[npops-1 1]);
new_i2_logml = computePopulationLogml(i2, adjprior, priorTerm);
COUNTS_IN(:,:,i2) = COUNTS_IN(:,:,i2)-repmat(diffInCounts, [1 1 npops-1]);
SUMCOUNTS_IN(i2,:) = SUMCOUNTS_IN(i2,:)-repmat(diffInSumCounts,[npops-1 1]);
muutokset(i2) = new_i1_logml - i1_logml ...
+ new_i2_logml - i2_logml;
%------------------------------------------------------------------------------------
function muutokset = laskeMuutokset3(T2, inds2, rowsFromInd, ...
data, adjprior, priorTerm, i1)
% Palauttaa length(unique(T2))*npops taulun, jossa (i,j):s alkio
% kertoo, mikä olisi muutos logml:ssä, jos populaation i1 osapopulaatio
% inds2(find(T2==i)) siirretään koriin j.
global COUNTS_IN; global SUMCOUNTS_IN;
global PARTITION_IN; global POP_LOGML_IN;
npops = size(COUNTS_IN,3);
npops2 = length(unique(T2));
muutokset = zeros(npops2, npops);
i1_logml = POP_LOGML_IN(i1);
for pop2 = 1:npops2
inds = inds2(find(T2==pop2));
ninds = length(inds);
if ninds>0
rows = computeRows(rowsFromInd, inds, ninds);
diffInCounts = computeDiffInCounts(rows, size(COUNTS_IN,1), size(COUNTS_IN,2), data);
diffInSumCounts = sum(diffInCounts);
COUNTS_IN(:,:,i1) = COUNTS_IN(:,:,i1)-diffInCounts;
SUMCOUNTS_IN(i1,:) = SUMCOUNTS_IN(i1,:)-diffInSumCounts;
new_i1_logml = computePopulationLogml(i1, adjprior, priorTerm);
COUNTS_IN(:,:,i1) = COUNTS_IN(:,:,i1)+diffInCounts;
SUMCOUNTS_IN(i1,:) = SUMCOUNTS_IN(i1,:)+diffInSumCounts;
i2 = [1:i1-1 , i1+1:npops];
i2_logml = POP_LOGML_IN(i2)';
COUNTS_IN(:,:,i2) = COUNTS_IN(:,:,i2)+repmat(diffInCounts, [1 1 npops-1]);
SUMCOUNTS_IN(i2,:) = SUMCOUNTS_IN(i2,:)+repmat(diffInSumCounts,[npops-1 1]);
new_i2_logml = computePopulationLogml(i2, adjprior, priorTerm)';
COUNTS_IN(:,:,i2) = COUNTS_IN(:,:,i2)-repmat(diffInCounts, [1 1 npops-1]);
SUMCOUNTS_IN(i2,:) = SUMCOUNTS_IN(i2,:)-repmat(diffInSumCounts,[npops-1 1]);
muutokset(pop2,i2) = new_i1_logml - i1_logml ...
+ new_i2_logml - i2_logml;
end
end
%------------------------------------------------------------------------------------
function diffInCounts = computeDiffInCounts(rows, max_noalle, nloci, data)
% Muodostaa max_noalle*nloci taulukon, jossa on niiden alleelien
% lukumäärät (vastaavasti kuin COUNTS_IN:issa), jotka ovat data:n
% riveillä rows.
diffInCounts = zeros(max_noalle, nloci);
for i=rows
row = data(i,:);
notEmpty = find(row>=0);
if length(notEmpty)>0
diffInCounts(row(notEmpty) + (notEmpty-1)*max_noalle) = ...
diffInCounts(row(notEmpty) + (notEmpty-1)*max_noalle) + 1;
end
end
%------------------------------------------------------------------------------------
function popLogml = computePopulationLogml(pops, adjprior, priorTerm)
% Palauttaa length(pops)*1 taulukon, jossa on laskettu korikohtaiset
% logml:t koreille, jotka on määritelty pops-muuttujalla.
global COUNTS_IN;
global SUMCOUNTS_IN;
x = size(COUNTS_IN,1);
y = size(COUNTS_IN,2);
z = length(pops);
popLogml = ...
squeeze(sum(sum(reshape(...
gammaln(repmat(adjprior,[1 1 length(pops)]) + COUNTS_IN(:,:,pops)) ...
,[x y z]),1),2)) - sum(gammaln(1+SUMCOUNTS_IN(pops,:)),2) - priorTerm;
%----------------------------------------------------------------------------
function dist2 = laskeOsaDist(inds2, dist, ninds)
% Muodostaa dist vektorista osavektorin, joka sisältää yksilöiden inds2
% väliset etäisyydet. ninds=kaikkien yksilöiden lukumäärä.
ninds2 = length(inds2);
apu = zeros(nchoosek(ninds2,2),2);
rivi = 1;
for i=1:ninds2-1
for j=i+1:ninds2
apu(rivi, 1) = inds2(i);
apu(rivi, 2) = inds2(j);
rivi = rivi+1;
end
end
apu = (apu(:,1)-1).*ninds - apu(:,1) ./ 2 .* (apu(:,1)-1) + (apu(:,2)-apu(:,1));
dist2 = dist(apu);
%----------------------------------------------------------------------------------------
function Z = linkage(Y, method)
[k, n] = size(Y);
m = (1+sqrt(1+8*n))/2;
if k ~= 1 | m ~= fix(m)
error('The first input has to match the output of the PDIST function in size.');
end
if nargin == 1 % set default switch to be 'co'
method = 'co';
end
method = lower(method(1:2)); % simplify the switch string.
monotonic = 1;
Z = zeros(m-1,3); % allocate the output matrix.
N = zeros(1,2*m-1);
N(1:m) = 1;
n = m; % since m is changing, we need to save m in n.
R = 1:n;
for s = 1:(n-1)
X = Y;
[v, k] = min(X);
i = floor(m+1/2-sqrt(m^2-m+1/4-2*(k-1)));
j = k - (i-1)*(m-i/2)+i;
Z(s,:) = [R(i) R(j) v]; % update one more row to the output matrix A
I1 = 1:(i-1); I2 = (i+1):(j-1); I3 = (j+1):m; % these are temp variables.
U = [I1 I2 I3];
I = [I1.*(m-(I1+1)/2)-m+i i*(m-(i+1)/2)-m+I2 i*(m-(i+1)/2)-m+I3];
J = [I1.*(m-(I1+1)/2)-m+j I2.*(m-(I2+1)/2)-m+j j*(m-(j+1)/2)-m+I3];
switch method
case 'si' %single linkage
Y(I) = min(Y(I),Y(J));
case 'av' % average linkage
Y(I) = Y(I) + Y(J);
case 'co' %complete linkage
Y(I) = max(Y(I),Y(J));
case 'ce' % centroid linkage
K = N(R(i))+N(R(j));
Y(I) = (N(R(i)).*Y(I)+N(R(j)).*Y(J)-(N(R(i)).*N(R(j))*v^2)./K)./K;
case 'wa'
Y(I) = ((N(R(U))+N(R(i))).*Y(I) + (N(R(U))+N(R(j))).*Y(J) - ...
N(R(U))*v)./(N(R(i))+N(R(j))+N(R(U)));
end
J = [J i*(m-(i+1)/2)-m+j];
Y(J) = []; % no need for the cluster information about j.
% update m, N, R
m = m-1;
N(n+s) = N(R(i)) + N(R(j));
R(i) = n+s;
R(j:(n-1))=R((j+1):n);
end

View file

@ -0,0 +1,18 @@
function [partition, counts, sumcounts] = initSpatialMultiMixture(initData, ...
npops, Z, rows, noalle, dist, adjprior, priorTerm, fixedK);
% Etsii spatial multimixturelle alkutilan baps 3.1:n ahneella algoritmilla.
% toimii!
global PARTITION; global COUNTS;
global SUMCOUNTS; global POP_LOGML;
c.data = initData; c.Z = Z; c.rows=rows; c.rowsFromInd=0; c.noalle=noalle;
c.dist = dist; c.adjprior = adjprior; c.priorTerm = priorTerm;
indMix_fixK(c,npops,1,0);
partition = PARTITION; counts = COUNTS; sumcounts = SUMCOUNTS;

View file

@ -0,0 +1,82 @@
function changes = calcLogmlChanges(inds, cqData, nCqCodes, spData, nSpCodes, locCliques, locSeparators, logml)
% compute the logml change if the given inds are moved to another cluster
% the input inds are supposed to come from the same cluster
% changes is a npops*1 vector
% Lu Cheng, 15.12.2012
global CQ_COUNTS; global SUM_CQ_COUNTS;
global SP_COUNTS; global SUM_SP_COUNTS;
global PARTITION;
global LOC_CQ_COUNTS;
global LOC_SP_COUNTS;
npops = size(CQ_COUNTS,3);
changes = zeros(npops,1);
indsToBeMoved = inds;
if isempty(indsToBeMoved), return, end
i1 = PARTITION(indsToBeMoved(1));
[diffCqCounts diffCqSumCounts]= computeDiffInCounts(indsToBeMoved, cqData, nCqCodes);
[diffSpCounts diffSpSumCounts]= computeDiffInCounts(indsToBeMoved, spData, nSpCodes);
diffLocCqCounts = computeDiffInCliqCounts(locCliques, indsToBeMoved);
diffLocSpCounts = computeDiffInCliqCounts(locSeparators, indsToBeMoved);
CQ_COUNTS(:,:,i1) = CQ_COUNTS(:,:,i1) - diffCqCounts;
SP_COUNTS(:,:,i1) = SP_COUNTS(:,:,i1) - diffSpCounts;
SUM_CQ_COUNTS(:,i1) = SUM_CQ_COUNTS(:,i1) - diffCqSumCounts;
SUM_SP_COUNTS(:,i1) = SUM_SP_COUNTS(:,i1) - diffSpSumCounts;
LOC_CQ_COUNTS(:,i1) = LOC_CQ_COUNTS(:,i1) - diffLocCqCounts;
LOC_SP_COUNTS(:,i1) = LOC_SP_COUNTS(:,i1) - diffLocSpCounts;
% PARTITION(inds) = -1;
updateLogmlTable(i1);
for i2 = 1:npops
if i2 ~= i1
CQ_COUNTS(:,:,i2) = CQ_COUNTS(:,:,i2) + diffCqCounts;
SP_COUNTS(:,:,i2) = SP_COUNTS(:,:,i2) + diffSpCounts;
SUM_CQ_COUNTS(:,i2) = SUM_CQ_COUNTS(:,i2) + diffCqSumCounts;
SUM_SP_COUNTS(:,i2) = SUM_SP_COUNTS(:,i2) + diffSpSumCounts;
LOC_CQ_COUNTS(:,i2) = LOC_CQ_COUNTS(:,i2) + diffLocCqCounts;
LOC_SP_COUNTS(:,i2) = LOC_SP_COUNTS(:,i2) + diffLocSpCounts;
% PARTITION(inds) = i2;
updateLogmlTable(i2);
logml_new = computeTotalLogml();
changes(i2) = logml_new - logml;
CQ_COUNTS(:,:,i2) = CQ_COUNTS(:,:,i2) - diffCqCounts;
SUM_CQ_COUNTS(:,i2) = SUM_CQ_COUNTS(:,i2) - diffCqSumCounts;
SP_COUNTS(:,:,i2) = SP_COUNTS(:,:,i2) - diffSpCounts;
SUM_SP_COUNTS(:,i2) = SUM_SP_COUNTS(:,i2) - diffSpSumCounts;
LOC_CQ_COUNTS(:,i2) = LOC_CQ_COUNTS(:,i2) - diffLocCqCounts;
LOC_SP_COUNTS(:,i2) = LOC_SP_COUNTS(:,i2) - diffLocSpCounts;
% PARTITION(inds) = -1;
updateLogmlTable(i2);
end
end
CQ_COUNTS(:,:,i1) = CQ_COUNTS(:,:,i1) + diffCqCounts;
SUM_CQ_COUNTS(:,i1) = SUM_CQ_COUNTS(:,i1) + diffCqSumCounts;
SP_COUNTS(:,:,i1) = SP_COUNTS(:,:,i1) + diffSpCounts;
SUM_SP_COUNTS(:,i1) = SUM_SP_COUNTS(:,i1) + diffSpSumCounts;
LOC_CQ_COUNTS(:,i1) = LOC_CQ_COUNTS(:,i1) + diffLocCqCounts;
LOC_SP_COUNTS(:,i1) = LOC_SP_COUNTS(:,i1) + diffLocSpCounts;
% PARTITION(inds) = i1;
updateLogmlTable(i1);
%---------------------------------------------------------------------

View file

@ -0,0 +1,18 @@
function clearGlobalVars
% Lu Cheng, 15.12.2012
global COUNTS; COUNTS = [];
global SUMCOUNTS; SUMCOUNTS = [];
global PARTITION; PARTITION = [];
global LOGML_TABLE; LOGML_TABLE = [];
global ADDITION_DIFFERENCE; ADDITION_DIFFERENCE = [];
global REMOVAL_DIFFERENCE; REMOVAL_DIFFERENCE = [];
global JOIN_DIFFERENCE; JOIN_DIFFERENCE = [];
global CQ_COUNTS; CQ_COUNTS = [];
global SP_COUNTS; SP_COUNTS = [];
global SUM_CQ_COUNTS; SUM_CQ_COUNTS = [];
global SUM_SP_COUNTS; SUM_SP_COUNTS = [];
global CQ_PRIOR; CQ_PRIOR = [];
global SP_PRIOR; SP_PRIOR = [];
global LOC_SP_COUNTS; LOC_SP_COUNTS = [];
global LOC_CQ_COUNTS; LOC_CQ_COUNTS = [];

View file

@ -0,0 +1,52 @@
function T = cluster_own(Z,nclust)
% search down the dendogram from the root, until nclust clusters are found
% comments added by Lu Cheng
% 04.01.2011
maxclust = nclust;
% Start of algorithm
m = size(Z,1)+1;
T = zeros(m,1);
% maximum number of clusters based on inconsistency
if m <= maxclust
T = (1:m)';
elseif maxclust==1
T = ones(m,1);
else
clsnum = 1;
for k = (m-maxclust+1):(m-1)
i = Z(k,1); % left tree
if i <= m % original node, no leafs
T(i) = clsnum;
clsnum = clsnum + 1;
elseif i < (2*m-maxclust+1) % created before cutoff, search down the tree
T = clusternum(Z, T, i-m, clsnum);
clsnum = clsnum + 1;
end
i = Z(k,2); % right tree
if i <= m % original node, no leafs
T(i) = clsnum;
clsnum = clsnum + 1;
elseif i < (2*m-maxclust+1) % created before cutoff, search down the tree
T = clusternum(Z, T, i-m, clsnum);
clsnum = clsnum + 1;
end
end
end
function T = clusternum(X, T, k, c)
m = size(X,1)+1;
while(~isempty(k))
% Get the children of nodes at this level
children = X(k,1:2);
children = children(:);
% Assign this node number to leaf children
t = (children<=m);
T(children(t)) = c;
% Move to next level
k = children(~t) - m;
end

View file

@ -0,0 +1,27 @@
function [cliqcounts, sepcounts] = computeCounts(cliques, separators, npops)
global PARTITION;
ncliq = size(cliques,1);
nsep = size(separators,1);
cliqPartition = zeros(ncliq, size(cliques,2));
sepPartition = zeros(nsep, size(separators, 2));
apuCliq = find(cliques > 0);
apuSep = find(separators > 0);
cliqPartition(apuCliq) = PARTITION(cliques(apuCliq));
sepPartition(apuSep) = PARTITION(separators(apuSep));
cliqcounts = zeros(ncliq, npops);
for i = 1:npops
cliqcounts(:,i) = sum(cliqPartition == i, 2);
end
sepcounts = zeros(nsep, npops);
for i = 1:npops
sepcounts(:,i) = sum(sepPartition == i, 2);
end
%-------------------------------------------------------------------------

View file

@ -0,0 +1,19 @@
function diffInCliqCounts = computeDiffInCliqCounts(cliques, inds)
% Laskee muutoksen CLIQCOUNTS:ssa (tai SEPCOUNTS:ssa, jos syötteen?
% separators) kun yksilöt inds siirretään.
% diffInCliqcounts on ncliq*1 taulu, joka on CLIQCOUNTS:n sarakkeesta josta
% yksilöt inds siirretään ja lisättäv?sarakkeeseen, johon yksilöt
% siirretään.
% taken from spatial model of Jukka Siren's code
% Lu Cheng
% 15.12.2012
ncliq = size(cliques,1);
diffInCliqCounts = zeros(ncliq,1);
ninds = length(inds);
for i = 1:ninds
ind = inds(i);
rivit = sum((cliques == ind),2);
diffInCliqCounts = diffInCliqCounts + rivit;
end

View file

@ -0,0 +1,7 @@
function [counts sumcounts] = computeDiffInCounts(rows, data, nLetters)
% calculate the counts of the given rows of the data (ninds*nLoci)
% nLetters is the maximum number of different symbols over all loci
% Lu Cheng, 25.05.2011
counts = histc(data(rows,:),1:nLetters,1);
sumcounts = sum(counts,1)';

View file

@ -0,0 +1,42 @@
function logml = computeTotalLogml
% compute the log marginal likelihood of the data
% Lu Cheng, 15.12.2012
global LOGML_TABLE;
global LOC_CQ_COUNTS;
global LOC_SP_COUNTS;
notEmpty = any(LOC_CQ_COUNTS,1);
npops = length(find(notEmpty == 1));
% the following codes added by Lu Cheng, 15.12.2012
% some lines might all be zero if some sequence is deleted
tmpIndsCq = find(any(LOC_CQ_COUNTS,2));
tmpIndsSp = find(any(LOC_SP_COUNTS,2));
locCqCounts = LOC_CQ_COUNTS(tmpIndsCq,notEmpty);
locSpCounts = LOC_SP_COUNTS(tmpIndsSp,notEmpty);
sumcliq=sum(locCqCounts, 2);
sumsep=sum(locSpCounts, 2);
ncliq = length(tmpIndsCq);
nsep = length(tmpIndsSp);
cliqsizes = sum(locCqCounts, 2)';
sepsizes = sum(locSpCounts, 2)';
cliqsizes = min([cliqsizes; npops*ones(1,ncliq)])';
sepsizes = min([sepsizes; npops*ones(1,nsep)])';
klikkitn = sum(sum(gammaln(locCqCounts + repmat(1./cliqsizes, [1 npops])))) ...
- sum(npops*(gammaln(1./cliqsizes))) ...
- sum(gammaln(sumcliq + 1));
septn = sum(sum(gammaln(locSpCounts + repmat(1./sepsizes, [1 npops])))) ...
- sum(npops*(gammaln(1./sepsizes))) ...
- sum(gammaln(sumsep + 1));
spatialPrior = (klikkitn - septn);
logml = sum(LOGML_TABLE) + spatialPrior;

View file

@ -0,0 +1,17 @@
function [sumcounts, counts] = initialCounts2(partition, data, npops, nLetters)
% initialize counts and sumcounts for the initial partition
% npops: number of populations in the partition
% nLetters: the maximum number of different symbols over all loci
% Lu Cheng, 25.05.2011
[nSeq nLoci] = size(data);
counts = zeros(nLetters,nLoci,npops);
sumcounts = zeros(nLoci,npops);
for i=1:npops
inds = (partition==i);
counts(:,:,i) = histc(data(inds,:),1:nLetters,1);
sumcounts(:,i) = sum(counts(:,:,i),1);
end

View file

@ -0,0 +1,366 @@
function [partition, logml, partitionSummary, logmldiff] = model_search_parallel(c, partition, orig_dist, roundTypes)
% This function clusters DNA alignment using "codon" model in Corander and Tang's
% paper: Bayesian analysis of population structure based on linked
% molecular information (2007), Mathematical Biosciences
% c: preprocessed data for the sequence alignment
% partition: initial partition of the individuals
% origdist: hamming distance between individuals, indexed by
% (1,2)(1,3)(14)...(2,3)(2,4).....(3,4)...(n-1,n)
% roundTypes: array of operation types
% Lu Cheng
% 15.12.2012
interactive = false;
global PARTITION;
global CQ_COUNTS;global SUM_CQ_COUNTS;
global SP_COUNTS;global SUM_SP_COUNTS;
global CQ_PRIOR; global SP_PRIOR;
global LOGML_TABLE;
global ADDITION_DIFFERENCE;
global REMOVAL_DIFFERENCE;
global JOIN_DIFFERENCE;
global LOC_SP_COUNTS;
global LOC_CQ_COUNTS;
clearGlobalVars;
nPOPS = length(unique(partition));
% PRIOR VALUES:
CQ_PRIOR = c.cqPrior;
SP_PRIOR = c.spPrior;
% Initialize PARTITION, **_COUNTS, SUM_**_COUNTS, alnMat
[sumCqCounts, cqCounts] = initialCounts2(partition, c.cqData, nPOPS, c.nMaxCqCodes);
[sumSpCounts, spCounts] = initialCounts2(partition, c.spData, nPOPS, c.nMaxSpCodes);
CQ_COUNTS = cqCounts; SUM_CQ_COUNTS = sumCqCounts;
SP_COUNTS = spCounts; SUM_SP_COUNTS = sumSpCounts;
PARTITION = partition;
[cliqcounts, sepcounts] = computeCounts(c.locCliques, c.locSeparators, nPOPS);
LOC_CQ_COUNTS = cliqcounts;
LOC_SP_COUNTS = sepcounts;
% alnMat = c.alnMat;
partitionSummary = -Inf*ones(30,2,nPOPS); % Tiedot 30 parhaasta partitiosta (npops ja logml)
partitionSummary(:,1,:) = zeros(30,1,nPOPS);
worstLogml = -Inf*ones(1, nPOPS); worstIndex = ones(1, nPOPS);
clear partition cqCounts sumCqCounts spCounts sumSpCounts
% Initialize LOGML_TABLE:
nINDS = c.nSeq;
LOGML_TABLE = zeros(nPOPS,1);
updateLogmlTable(1:nPOPS);
REMOVAL_DIFFERENCE = zeros(nINDS,1);
REMOVAL_DIFFERENCE(:,:) = nan;
ADDITION_DIFFERENCE = zeros(nINDS,nPOPS);
ADDITION_DIFFERENCE(:,:) = nan;
JOIN_DIFFERENCE = zeros(nPOPS, nPOPS);
JOIN_DIFFERENCE(:,:) = nan;
% ***********Doc:********************
% REMOVAL_DIFFERENCE(ind) tells the change in logml if ind is removed from
% its cluster. nan, if the cluster has changed, since the value was last
% calculated.
%
% ADDITION_DIFFERENCE(ind, pop) tells the change in logml if ind is added
% to cluster pop. nan, if the cluster has changed since the value was last
% calculated. Always nan, if pop is ind's own cluster.
%
% JOIN_DIFFERENCE(pop1,pop2) = tells the change in logml if pop1 and pop2
% are combined. nan, if either cluster has changed since the value was last
% calculated.
% ***********Doc end*****************
logml = computeTotalLogml;
disp('The beginning:');
% disp(['Partition: ' num2str(PARTITION')]);
disp(['Nclusters: ' num2str(length(unique(PARTITION)))]);
disp(['Log(ml*prior): ' num2str(logml)]);
disp(' ');
nnotEmptyPops = length(unique(PARTITION));
if logml>worstLogml(nnotEmptyPops);
[partitionSummary(:,:,nnotEmptyPops), added] = addToSummary(logml, ...
partitionSummary(:,:,nnotEmptyPops), worstIndex(nnotEmptyPops));
if (added==1)
[worstLogml(nnotEmptyPops), worstIndex(nnotEmptyPops)] = ...
min(partitionSummary(:,2,nnotEmptyPops));
end
end
% START SEARCH OF THE BEST PARTITION:
vipu = zeros(1,14);
if interactive
roundTypes = input('Input steps: ');
if ischar(roundTypes), roundTypes = str2num(roundTypes); end
end
ready = 0;
while ready ~= 1
% disp(['Performing steps: ' num2str(roundTypes)]);
for n = 1:length(roundTypes)
round = roundTypes(n);
moveCounter = 0;
if round==1 && vipu(1)==0 % move an individual to another population
% inds = randperm(nINDS);
inds = getMoveInds(orig_dist,nINDS); % get inds to be moved
for ind = inds(:)'
update_difference_tables(ind, c.cqData, c.nMaxCqCodes, ...
c.spData, c.nMaxSpCodes, c.locCliques, c.locSeparators, logml);
tmpDiff = REMOVAL_DIFFERENCE(ind) + ADDITION_DIFFERENCE(ind,:);
tmpDiff(PARTITION(ind)) = 0;
[maxChange, maxIndex] = max(tmpDiff);
if maxChange>1e-5
updateGlobalVariables(ind, maxIndex, c.cqData, c.nMaxCqCodes, ...
c.spData, c.nMaxSpCodes, c.locCliques, c.locSeparators);
% fprintf('moving from %d to %d.\n',PARTITION(ind),maxIndex)
logml = computeTotalLogml();
moveCounter = moveCounter+1;
vipu = zeros(1,14);
nnotEmptyPops = length(unique(PARTITION));
if logml>worstLogml(nnotEmptyPops);
[partitionSummary(:,:,nnotEmptyPops), added] = addToSummary(logml, ...
partitionSummary(:,:,nnotEmptyPops), worstIndex(nnotEmptyPops));
if (added==1)
[worstLogml(nnotEmptyPops), worstIndex(nnotEmptyPops)] = ...
min(partitionSummary(:,2,nnotEmptyPops));
end
end
end
end
if moveCounter==0, vipu(1)=1; end
disp(['Step 1: ' num2str(moveCounter) ' individuals were moved.']);
elseif round==2 && vipu(2)==0 % join two populations
update_join_difference(c.cqData, c.nMaxCqCodes, ...
c.spData, c.nMaxSpCodes, c.locCliques, c.locSeparators, logml);
[maxChange, aux] = max(JOIN_DIFFERENCE(:));
[i1, i2] = ind2sub([nPOPS,nPOPS],aux);
if maxChange>1e-5
tmpInds = find(PARTITION==i1);
updateGlobalVariables(tmpInds, i2, c.cqData, c.nMaxCqCodes, ...
c.spData, c.nMaxSpCodes, c.locCliques, c.locSeparators);
logml = computeTotalLogml;
disp(['Step 2: Clusters ' num2str(i1) ' and ' num2str(i2) ' combined.']);
vipu = zeros(1,14);
nnotEmptyPops = length(unique(PARTITION));
if logml>worstLogml(nnotEmptyPops);
[partitionSummary(:,:,nnotEmptyPops), added] = addToSummary(logml, ...
partitionSummary(:,:,nnotEmptyPops), worstIndex(nnotEmptyPops));
if (added==1)
[worstLogml(nnotEmptyPops), worstIndex(nnotEmptyPops)] = ...
min(partitionSummary(:,2,nnotEmptyPops));
end
end
else
disp('Step 2: no changes.');
vipu(2)=1;
end
elseif ismember(round, 3:4) && vipu(round)==0 % Split a population, and move one subpopulation to another population
pops = randperm(nPOPS);
splitFlags = zeros(nPOPS,1);
for pop = pops(:)'
maxChange = 0;
indsToBeMoved = [];
inds2 = find(PARTITION==pop);
ninds2 = length(inds2);
if ninds2>4
if round==3
dist3 = getDistance(inds2, orig_dist, nINDS);
npops2 = min(20, floor(ninds2 / 5)); %Moneenko osaan jaetaan
elseif round==4
dist3 = getDistance(inds2, orig_dist, nINDS);
npops2 = 2;
end
Z3 = linkage(dist3);
T3 = cluster_own(Z3, npops2);
for i = 1:npops2
indsX = inds2(T3==i); indsX = indsX';
tmpChanges = calcLogmlChanges(indsX, c.cqData, c.nMaxCqCodes, ...
c.spData, c.nMaxSpCodes, c.locCliques, c.locSeparators, logml);
[tmpMaxChange, tmpMaxPop] = max(tmpChanges);
if tmpMaxChange>maxChange
maxChange = tmpMaxChange;
% i1 = pop;
i2 = tmpMaxPop;
indsToBeMoved = indsX;
end
end
if maxChange>1e-5
updateGlobalVariables(indsToBeMoved, i2, c.cqData, c.nMaxCqCodes, ...
c.spData, c.nMaxSpCodes, c.locCliques, c.locSeparators);
logml = computeTotalLogml;
splitFlags(pop)=1;
nnotEmptyPops = length(unique(PARTITION));
if logml>worstLogml(nnotEmptyPops);
[partitionSummary(:,:,nnotEmptyPops), added] = addToSummary(logml, ...
partitionSummary(:,:,nnotEmptyPops), worstIndex(nnotEmptyPops));
if (added==1)
[worstLogml(nnotEmptyPops), worstIndex(nnotEmptyPops)] = ...
min(partitionSummary(:,2,nnotEmptyPops));
end
end
end
end
end
if any(splitFlags)
disp(['Step ' num2str(round) ': ' num2str(sum(splitFlags)) ' populations were split.']);
vipu = zeros(1,14);
else
disp(['Step ' num2str(round) ': no changes.']);
vipu(round)=1;
end
end
end
if interactive
roundTypes = input('Input extra steps: ');
if ischar(roundTypes), roundTypes = str2num(roundTypes); end
else
roundTypes = [];
end
if isempty(roundTypes)
ready = 1;
end
end
%disp(' ');
disp('BEST PARTITION: ');
%disp(['Partition: ' num2str(PARTITION')]);
disp(['Nclusters: ' num2str(length(unique(PARTITION)))]);
disp(['Log(ml): ' num2str(logml)]);
disp(' ');
nPOPS= rmEmptyPopulation(c.locCliques, c.locSeparators);
ADDITION_DIFFERENCE(:) = NaN;
REMOVAL_DIFFERENCE(:) = NaN;
logmldiff = zeros(nINDS,nPOPS); % the change of logml if individual i is moved to group j
for i=1:nINDS
update_difference_tables(i, c.cqData, c.nMaxCqCodes, ...
c.spData, c.nMaxSpCodes, c.locCliques, c.locSeparators, logml);
logmldiff(i,:) = REMOVAL_DIFFERENCE(i)+ADDITION_DIFFERENCE(i,:);
if all(isnan(logmldiff(i,:)))
keyboard
end
end
logmldiff(isnan(logmldiff))=0;
partition = PARTITION;
%----------------------------------------------------------------------------
function [dist2, dind1, dind2] = getDistance(inds2, dist_orig, ninds)
% pick out the distrances between samples in "inds2" from "dist_orig"
% dist_orig specifies the distances of (1,2),(1,3),(1,4)......(ninds-1,ninds)
% Lu Cheng, 22.06.2011
if ~issorted(inds2)
error('inds2 is not in ascending order!');
end
ninds2 = length(inds2);
apu = zeros(nchoosek(ninds2,2),2);
irow = 1;
for i=1:ninds2-1
for j=i+1:ninds2
apu(irow, 1) = inds2(i);
apu(irow, 2) = inds2(j);
irow = irow+1;
end
end
dind1 = apu(:,1);
dind2 = apu(:,2);
apu = (apu(:,1)-1).*ninds - apu(:,1) ./ 2 .* (apu(:,1)-1) + (apu(:,2)-apu(:,1));
dist2 = dist_orig(apu);
%---------------------------------------------------------------
function inds = getMoveInds(dist_orig, ninds)
% get individual indexs to be moved to another cluster
% we always take the 30% individuals of each cluster which are most distant
% to each other
% Lu Cheng, 25.05.2011
global PARTITION;
pops = unique(PARTITION);
inds = [];
for tmpPop = pops(:)'
tmpInds = find(PARTITION == tmpPop)';
if(length(tmpInds)<20)
inds = [inds tmpInds(:)']; %#ok<AGROW>
continue;
end
[tmpDist, dind1, dind2] = getDistance(tmpInds,dist_orig,ninds);
tmpSDist = sort(tmpDist,'Descend');
tmpInds2 = find(tmpDist>tmpSDist(round(length(tmpSDist)*0.3)));
tmpInds3 = union(unique(dind1(tmpInds2)), unique(dind2(tmpInds2)));
inds = [inds tmpInds3(:)']; %#ok<AGROW>
end
% ------------------------------------------------------------
function [partitionSummary, added] = addToSummary(logml, partitionSummary, worstIndex)
% Tiedetään, ett?annettu logml on isompi kuin huonoin arvo
% partitionSummary taulukossa. Jos partitionSummary:ss?ei viel?ole
% annettua logml arvoa, niin lisätään worstIndex:in kohtaan uusi logml ja
% nykyist?partitiota vastaava nclusters:in arvo. Muutoin ei tehd?mitään.
global PARTITION;
apu = find(abs(partitionSummary(:,2)-logml)<1e-5);
if isempty(apu)
% Nyt löydetty partitio ei ole viel?kirjattuna summaryyn.
npops = length(unique(PARTITION));
partitionSummary(worstIndex,1) = npops;
partitionSummary(worstIndex,2) = logml;
added = 1;
else
added = 0;
end

View file

@ -0,0 +1,385 @@
function [partition, logml, partitionSummary, logmldiff] = model_search_pregroup(c, pgPart, pgDist, roundTypes, nMaxPops)
% This function clusters DNA alignment using "codon" model in Corander and Tang's
% paper: Bayesian analysis of population structure based on linked
% molecular information (2007), Mathematical Biosciences
% c: preprocessed data for the sequence alignment
% pgPart: partition which assign sequences to pregroups
% pgDist: distances between the pregroups
% (1,2)(1,3)(1,4)...(2,3)(2,4).....(3,4)...(n-1,n)
% roundTypes: array of operation types
% Lu Cheng
% 21.03.2012
interactive = false;
global PARTITION;
global CQ_COUNTS;global SUM_CQ_COUNTS;
global SP_COUNTS;global SUM_SP_COUNTS;
global CQ_PRIOR; global SP_PRIOR;
global LOGML_TABLE;
global ADDITION_DIFFERENCE;
global REMOVAL_DIFFERENCE;
global JOIN_DIFFERENCE;
global LOC_SP_COUNTS;
global LOC_CQ_COUNTS;
clearGlobalVars;
nINDS = c.nSeq;
nPOPS = nMaxPops;
% load pregroup information
nPregroup = length(unique(pgPart));
if nPregroup<nMaxPops
error('#pregroup: %d, nMaxPops: %d. Number of pregroups should be higher than maximum number of population. \n',nPregroup,nMaxPops);
end
pregroups = cell(nPregroup,1);
pgSize = zeros(nPregroup,1);
for i=1:nPregroup
pregroups{i} = find(pgPart==i);
pgSize(i) = length(pregroups{i});
end
pgZ = linkage(pgDist(:)','complete');
initPart = cluster(pgZ,'maxclust',nPOPS);
partition = zeros(nINDS,1);
for i=1:nPregroup
partition(pregroups{i}) = initPart(i);
end
clear i pgZ initPart
% PRIOR VALUES:
CQ_PRIOR = c.cqPrior;
SP_PRIOR = c.spPrior;
% Initialize PARTITION, **_COUNTS, SUM_**_COUNTS, alnMat
[sumCqCounts, cqCounts] = initialCounts2(partition, c.cqData, nPOPS, c.nMaxCqCodes);
[sumSpCounts, spCounts] = initialCounts2(partition, c.spData, nPOPS, c.nMaxSpCodes);
CQ_COUNTS = cqCounts; SUM_CQ_COUNTS = sumCqCounts;
SP_COUNTS = spCounts; SUM_SP_COUNTS = sumSpCounts;
PARTITION = partition;
[cliqcounts, sepcounts] = computeCounts(c.locCliques, c.locSeparators, nPOPS);
LOC_CQ_COUNTS = cliqcounts;
LOC_SP_COUNTS = sepcounts;
partitionSummary = -Inf*ones(30,2,nPOPS); % Tiedot 30 parhaasta partitiosta (npops ja logml)
partitionSummary(:,1,:) = zeros(30,1,nPOPS);
worstLogml = -Inf*ones(1, nPOPS); worstIndex = ones(1, nPOPS);
clear partition cqCounts sumCqCounts spCounts sumSpCounts;
% Initialize LOGML_TABLE:
nINDS = c.nSeq;
LOGML_TABLE = zeros(nPOPS,1);
updateLogmlTable(1:nPOPS);
REMOVAL_DIFFERENCE = zeros(nINDS,1);
REMOVAL_DIFFERENCE(:,:) = nan;
ADDITION_DIFFERENCE = zeros(nINDS,nPOPS);
ADDITION_DIFFERENCE(:,:) = nan;
JOIN_DIFFERENCE = zeros(nPOPS, nPOPS);
JOIN_DIFFERENCE(:,:) = nan;
% ***********Doc:********************
% REMOVAL_DIFFERENCE(ind) tells the change in logml if ind is removed from
% its cluster. nan, if the cluster has changed, since the value was last
% calculated.
%
% ADDITION_DIFFERENCE(ind, pop) tells the change in logml if ind is added
% to cluster pop. nan, if the cluster has changed since the value was last
% calculated. Always nan, if pop is ind's own cluster.
%
% JOIN_DIFFERENCE(pop1,pop2) = tells the change in logml if pop1 and pop2
% are combined. nan, if either cluster has changed since the value was last
% calculated.
% ***********Doc end*****************
logml = computeTotalLogml;
disp('The beginning:');
% disp(['Partition: ' num2str(PARTITION')]);
disp(['Nclusters: ' num2str(length(unique(PARTITION)))]);
disp(['Log(ml*prior): ' num2str(logml)]);
disp(' ');
nnotEmptyPops = length(unique(PARTITION));
if logml>worstLogml(nnotEmptyPops);
[partitionSummary(:,:,nnotEmptyPops), added] = addToSummary(logml, ...
partitionSummary(:,:,nnotEmptyPops), worstIndex(nnotEmptyPops));
if (added==1)
[worstLogml(nnotEmptyPops), worstIndex(nnotEmptyPops)] = ...
min(partitionSummary(:,2,nnotEmptyPops));
end
end
% START SEARCH OF THE BEST PARTITION:
vipu = zeros(1,14);
if interactive
roundTypes = input('Input steps: ');
if ischar(roundTypes), roundTypes = str2num(roundTypes); end
end
ready = 0;
while ready ~= 1
% disp(['Performing steps: ' num2str(roundTypes)]);
for n = 1:length(roundTypes)
round = roundTypes(n);
moveCounter = 0;
if round==1 && vipu(1)==0 % move an individual to another population
pgInds = getMoveInds(pgPart,pgDist,nPregroup); % get pregroup inds to be moved
for pgind = pgInds(:)'
% inds = cell2mat(pregroups(pgInds));
tmpInds = pregroups{pgind};
tmpChanges = calcLogmlChanges(tmpInds, c.cqData, c.nMaxCqCodes, ...
c.spData, c.nMaxSpCodes, c.locCliques, c.locSeparators, logml);
[maxChange, maxIndex] = max(tmpChanges);
if maxChange>1e-5
updateGlobalVariables(tmpInds, maxIndex, c.cqData, c.nMaxCqCodes, ...
c.spData, c.nMaxSpCodes,c.locCliques, c.locSeparators);
% fprintf('moving from %d to %d.\n',PARTITION(ind),maxIndex)
logml = computeTotalLogml();
moveCounter = moveCounter+length(pgInds);
vipu = zeros(1,14);
nnotEmptyPops = length(unique(PARTITION));
if logml>worstLogml(nnotEmptyPops);
[partitionSummary(:,:,nnotEmptyPops), added] = addToSummary(logml, ...
partitionSummary(:,:,nnotEmptyPops), worstIndex(nnotEmptyPops));
if (added==1)
[worstLogml(nnotEmptyPops), worstIndex(nnotEmptyPops)] = ...
min(partitionSummary(:,2,nnotEmptyPops));
end
end
end
end
if moveCounter==0, vipu(1)=1; end
disp(['Step 1: ' num2str(moveCounter) ' pregroups were moved.']);
elseif round==2 && vipu(2)==0 % join two populations
update_join_difference(c.cqData, c.nMaxCqCodes, ...
c.spData, c.nMaxSpCodes, c.locCliques, c.locSeparators, logml);
[maxChange, aux] = max(JOIN_DIFFERENCE(:));
[i1, i2] = ind2sub([nPOPS,nPOPS],aux);
if maxChange>1e-5
tmpInds = find(PARTITION==i1);
updateGlobalVariables(tmpInds, i2, c.cqData, c.nMaxCqCodes, ...
c.spData, c.nMaxSpCodes, c.locCliques, c.locSeparators);
logml = computeTotalLogml;
disp(['Step 2: Clusters ' num2str(i1) ' and ' num2str(i2) ' combined.']);
vipu = zeros(1,14);
nnotEmptyPops = length(unique(PARTITION));
if logml>worstLogml(nnotEmptyPops);
[partitionSummary(:,:,nnotEmptyPops), added] = addToSummary(logml, ...
partitionSummary(:,:,nnotEmptyPops), worstIndex(nnotEmptyPops));
if (added==1)
[worstLogml(nnotEmptyPops), worstIndex(nnotEmptyPops)] = ...
min(partitionSummary(:,2,nnotEmptyPops));
end
end
else
disp('Step 2: no changes.');
vipu(2)=1;
end
elseif ismember(round, 3:4) && vipu(round)==0 % Split a population, and move one subpopulation to another population
pops = randperm(nPOPS);
splitFlags = zeros(nPOPS,1);
for pop = pops(:)'
maxChange = 0;
indsToBeMoved = [];
inds2 = find(PARTITION==pop);
pgInds2 = unique(pgPart(inds2));
nPgInds2 = length(unique(pgPart(inds2)));
if nPgInds2>4
if round==3
dist3 = getDistance(pgInds2,pgDist,nPregroup);
npops2 = min(20, floor(nPgInds2 / 5));
elseif round==4
dist3 = getDistance(pgInds2,pgDist,nPregroup);
npops2 = 2;
end
Z3 = linkage(dist3(:)','complete');
T3 = cluster(Z3, 'maxclust', npops2);
for i = 1:npops2
indsX = pgInds2(T3==i);
indsX = cell2mat(pregroups(indsX));
tmpChanges = calcLogmlChanges(indsX, c.cqData, c.nMaxCqCodes, ...
c.spData, c.nMaxSpCodes, c.locCliques, c.locSeparators, logml);
[tmpMaxChange, tmpMaxPop] = max(tmpChanges);
if tmpMaxChange>maxChange
maxChange = tmpMaxChange;
% i1 = pop;
i2 = tmpMaxPop;
indsToBeMoved = indsX;
end
end
if maxChange>1e-5
updateGlobalVariables(indsToBeMoved, i2, c.cqData, c.nMaxCqCodes, ...
c.spData, c.nMaxSpCodes, c.locCliques, c.locSeparators);
logml = computeTotalLogml;
splitFlags(pop)=1;
nnotEmptyPops = length(unique(PARTITION));
if logml>worstLogml(nnotEmptyPops);
[partitionSummary(:,:,nnotEmptyPops), added] = addToSummary(logml, ...
partitionSummary(:,:,nnotEmptyPops), worstIndex(nnotEmptyPops));
if (added==1)
[worstLogml(nnotEmptyPops), worstIndex(nnotEmptyPops)] = ...
min(partitionSummary(:,2,nnotEmptyPops));
end
end
end
end
end
if any(splitFlags)
disp(['Step ' num2str(round) ': ' num2str(sum(splitFlags)) ' populations were split.']);
vipu = zeros(1,14);
else
disp(['Step ' num2str(round) ': no changes.']);
vipu(round)=1;
end
end
end
if interactive
roundTypes = input('Input extra steps: ');
if ischar(roundTypes), roundTypes = str2num(roundTypes); end
else
roundTypes = [];
end
if isempty(roundTypes)
ready = 1;
end
end
% disp(' ');
disp('BEST PARTITION: ');
% disp(['Partition: ' num2str(PARTITION')]);
disp(['Nclusters: ' num2str(length(unique(PARTITION)))]);
disp(['Log(ml): ' num2str(logml)]);
disp(' ');
nPOPS = rmEmptyPopulation(c.locCliques, c.locSeparators);
logmldiff = zeros(nPregroup,nPOPS); % the change of logml if pregroup i is moved to group j
for i=1:nPregroup
tmpInds = pregroups{i};
tmpChanges = calcLogmlChanges(tmpInds, c.cqData, c.nMaxCqCodes, ...
c.spData, c.nMaxSpCodes, c.locCliques, c.locSeparators, logml);
logmldiff(i,:) = tmpChanges';
end
logmldiff(isnan(logmldiff))=0;
partition = zeros(nPregroup,1);
for i=1:nPregroup
partition(i)=unique(PARTITION(pgPart==i));
end
%----------------------------------------------------------------------------
function [dist2, dind1, dind2] = getDistance(inds2, origDist, ninds)
% pick out the distrances between samples in "inds2" from "origDist"
% origDist specifies the distances of (1,2),(1,3),(1,4)......(ninds-1,ninds)
% Lu Cheng, 22.06.2011
if ~issorted(inds2)
error('inds2 is not in ascending order!');
end
ninds2 = length(inds2);
apu = zeros(nchoosek(ninds2,2),2);
irow = 1;
for i=1:ninds2-1
for j=i+1:ninds2
apu(irow, 1) = inds2(i);
apu(irow, 2) = inds2(j);
irow = irow+1;
end
end
dind1 = apu(:,1);
dind2 = apu(:,2);
apu = (apu(:,1)-1).*ninds - apu(:,1) ./ 2 .* (apu(:,1)-1) + (apu(:,2)-apu(:,1));
dist2 = origDist(apu);
%---------------------------------------------------------------
function inds = getMoveInds(pgPart, pgDist, nPregroup)
% get pregroup indexs to be moved to another cluster
% we always take the 35% pregroups of each cluster which are most distant
% to each other
% Lu Cheng, 22.06.2011
global PARTITION;
pops = unique(PARTITION);
inds = [];
for tmpPop = pops(:)'
tmpInds = unique(pgPart(PARTITION==tmpPop));
if(length(tmpInds)<20)
inds = [inds tmpInds(:)']; %#ok<AGROW>
continue;
end
[tmpDist, dind1, dind2] = getDistance(tmpInds,pgDist,nPregroup);
tmpVal = quantile(tmpDist,0.65);
tmpInds2 = find(tmpDist>tmpVal);
tmpInds3 = union(unique(dind1(tmpInds2)), unique(dind2(tmpInds2)));
inds = [inds tmpInds3(:)']; %#ok<AGROW>
end
% ------------------------------------------------------------
function [partitionSummary, added] = addToSummary(logml, partitionSummary, worstIndex)
% Tiedetään, ett?annettu logml on isompi kuin huonoin arvo
% partitionSummary taulukossa. Jos partitionSummary:ss?ei viel?ole
% annettua logml arvoa, niin lisätään worstIndex:in kohtaan uusi logml ja
% nykyist?partitiota vastaava nclusters:in arvo. Muutoin ei tehd?mitään.
global PARTITION;
apu = find(abs(partitionSummary(:,2)-logml)<1e-5);
if isempty(apu)
% Nyt löydetty partitio ei ole viel?kirjattuna summaryyn.
npops = length(unique(PARTITION));
partitionSummary(worstIndex,1) = npops;
partitionSummary(worstIndex,2) = logml;
added = 1;
else
added = 0;
end

View file

@ -0,0 +1,97 @@
function c = preprocAln(alnMat)
% This function preprocess the alignment matrix to cliques and separators
% Lu Cheng, 24.05.2011
[nSeq nLoci] = size(alnMat);
alnCell = mat2cell(alnMat,nSeq,ones(1,nLoci));
arrUniqBase = cellfun(@unique,alnCell,'UniformOutput',false); % unique base at each loci
arrUniqBaseNum = cellfun(@length,arrUniqBase);
arrCqNum = arrUniqBaseNum(1:end-2).*arrUniqBaseNum(2:end-1).*arrUniqBaseNum(3:end);
arrSpNum = arrUniqBaseNum(2:end-2).*arrUniqBaseNum(3:end-1);
nMaxCqCodes = max(arrCqNum);
nMaxSpCodes = max(arrSpNum);
cqCodes = cellfun(@myProd,arrUniqBase(1:end-2),arrUniqBase(2:end-1),arrUniqBase(3:end), ...
'UniformOutput',false);
spCodes = cellfun(@myProd,arrUniqBase(2:end-2),arrUniqBase(3:end-1), ...
'UniformOutput',false);
cqData = zeros(nSeq,length(cqCodes));
spData = zeros(nSeq,length(spCodes));
cqCounts = zeros(nMaxCqCodes,length(cqCodes));
spCounts = zeros(nMaxSpCodes,length(spCodes));
cqPrior = ones(nMaxCqCodes,length(cqCodes));
spPrior = ones(nMaxSpCodes,length(spCodes));
for i=1:nLoci-2
nCodeTmp = size(cqCodes{i},1);
for j=1:nCodeTmp
tmpInds = ismember(alnMat(:,i:i+2),cqCodes{i}(j,:),'rows');
cqData(tmpInds,i) = j;
cqCounts(j,i) = sum(tmpInds);
end
cqPrior(1:nCodeTmp,i) = 1/nCodeTmp;
if i==1
continue;
end
k=i-1;
nCodeTmp = size(spCodes{k},1);
for j=1:nCodeTmp
tmpInds = ismember(alnMat(:,i:i+1),spCodes{k}(j,:),'rows');
spData(tmpInds,k) = j;
spCounts(j,k) = sum(tmpInds);
end
spPrior(1:nCodeTmp,k) = 1/nCodeTmp;
end
c.nSeq = nSeq;
% c.alnMat = alnMat;
c.arrUniqBase = arrUniqBase;
c.arrUniqBaseNum = arrUniqBaseNum;
c.nMaxCqCodes = nMaxCqCodes;
c.nMaxSpCodes = nMaxSpCodes;
c.cqCodes = cqCodes;
c.spCodes = spCodes;
c.cqData = cqData;
c.spData = spData;
c.cqCounts = cqCounts;
c.spCounts = spCounts;
c.cqPrior = cqPrior;
c.spPrior = spPrior;
function y = myProd(varargin)
% calculate the cartesian product for the input
% Lu Cheng, 24.05.2011
if nargin==2
set1 = varargin{1};
set2 = varargin{2};
[t1 t2] = meshgrid(set1,set2);
y = [t1(:) t2(:)];
elseif nargin==3
set1 = varargin{1};
set2 = varargin{2};
set3 = varargin{3};
[t1 t2 t3] = meshgrid(set1,set2,set3);
y = [t1(:) t2(:) t3(:)];
else
y = [];
end

View file

@ -0,0 +1,41 @@
function [npops notEmpty] = rmEmptyPopulation(locCliques,locSeparators)
% remove empty populations from CQ_COUNTS and SUM_CQ_COUNTS, SP_COUNTS,
% SUM_SP_COUNTS
% update PARTITION
% Lu Cheng, 15.12.2012
global CQ_COUNTS; global SUM_CQ_COUNTS;
global SP_COUNTS; global SUM_SP_COUNTS;
global PARTITION;
global LOGML_TABLE;
global ADDITION_DIFFERENCE;
global JOIN_DIFFERENCE;
global LOC_CQ_COUNTS;
global LOC_SP_COUNTS;
notEmpty = find(any(SUM_CQ_COUNTS,1) & any(SUM_SP_COUNTS,1));
CQ_COUNTS = CQ_COUNTS(:,:,notEmpty);
SP_COUNTS = SP_COUNTS(:,:,notEmpty);
SUM_CQ_COUNTS = SUM_CQ_COUNTS(:,notEmpty);
SUM_SP_COUNTS = SUM_SP_COUNTS(:,notEmpty);
LOGML_TABLE = LOGML_TABLE(notEmpty);
ADDITION_DIFFERENCE = ADDITION_DIFFERENCE(:,notEmpty);
JOIN_DIFFERENCE = JOIN_DIFFERENCE(notEmpty,notEmpty);
for i=1:length(notEmpty)
apu = (PARTITION==notEmpty(i));
PARTITION(apu)=i;
end
npops = length(notEmpty);
[cliqcounts, sepcounts] = computeCounts(locCliques, locSeparators, npops);
LOC_CQ_COUNTS = cliqcounts;
LOC_SP_COUNTS = sepcounts;

View file

@ -0,0 +1,61 @@
function updateGlobalVariables(inds, i2, cqData, nCqCodes, spData, nSpCodes, locCliques, locSeparators)
% this function moves the samples specified by "inds" to cluser i2
% then update all the global variables, "inds" are supposed to come from the
% same cluster
% Lu Cheng, 15.12.2012
global PARTITION;
global CQ_COUNTS; global SUM_CQ_COUNTS;
global SP_COUNTS; global SUM_SP_COUNTS;
global ADDITION_DIFFERENCE;
global REMOVAL_DIFFERENCE;
global JOIN_DIFFERENCE;
global LOC_SP_COUNTS;
global LOC_CQ_COUNTS;
i1 = PARTITION(inds(1));
PARTITION(inds)=i2;
[diffCqCounts diffCqSumCounts]= computeDiffInCounts(inds, cqData, nCqCodes);
[diffSpCounts diffSpSumCounts]= computeDiffInCounts(inds, spData, nSpCodes);
diffLocCqCounts = computeDiffInCliqCounts(locCliques, inds);
diffLocSpCounts = computeDiffInCliqCounts(locSeparators, inds);
CQ_COUNTS(:,:,i1) = CQ_COUNTS(:,:,i1) - diffCqCounts;
SP_COUNTS(:,:,i1) = SP_COUNTS(:,:,i1) - diffSpCounts;
SUM_CQ_COUNTS(:,i1) = SUM_CQ_COUNTS(:,i1) - diffCqSumCounts;
SUM_SP_COUNTS(:,i1) = SUM_SP_COUNTS(:,i1) - diffSpSumCounts;
LOC_CQ_COUNTS(:,i1) = LOC_CQ_COUNTS(:,i1) - diffLocCqCounts;
LOC_SP_COUNTS(:,i1) = LOC_SP_COUNTS(:,i1) - diffLocSpCounts;
CQ_COUNTS(:,:,i2) = CQ_COUNTS(:,:,i2) + diffCqCounts;
SP_COUNTS(:,:,i2) = SP_COUNTS(:,:,i2) + diffSpCounts;
SUM_CQ_COUNTS(:,i2) = SUM_CQ_COUNTS(:,i2) + diffCqSumCounts;
SUM_SP_COUNTS(:,i2) = SUM_SP_COUNTS(:,i2) + diffSpSumCounts;
LOC_CQ_COUNTS(:,i2) = LOC_CQ_COUNTS(:,i2) + diffLocCqCounts;
LOC_SP_COUNTS(:,i2) = LOC_SP_COUNTS(:,i2) + diffLocSpCounts;
updateLogmlTable([i1 i2]);
REMOVAL_DIFFERENCE(PARTITION==i1) = nan;
REMOVAL_DIFFERENCE(PARTITION==i2) = nan;
ADDITION_DIFFERENCE(:,[i1 i2]) = nan;
JOIN_DIFFERENCE(:,i2) = nan;
JOIN_DIFFERENCE(i2,:) = nan;
if ~any(PARTITION==i1)
% i1 became empty
JOIN_DIFFERENCE(:,i1) = 0;
JOIN_DIFFERENCE(i1,:) = 0;
JOIN_DIFFERENCE(i1,i1) = nan;
else
JOIN_DIFFERENCE(:,i1) = nan;
JOIN_DIFFERENCE(i1,:) = nan;
end

View file

@ -0,0 +1,37 @@
function updateLogmlTable(pops)
% Updates global variables LOGML_TABLE, npops*1 array, logml values for
% each population given in "pops"
% After the updates, the values are based on the current values of the
% global variables CQ_COUNTS, SUM_CQ_COUNTS, SP_COUNTS, SUM_SP_COUNTS
% Lu Cheng, 25.05.2011
global CQ_COUNTS; global SUM_CQ_COUNTS;
global SP_COUNTS; global SUM_SP_COUNTS;
global CQ_PRIOR; global SP_PRIOR;
global LOGML_TABLE;
tmpN = length(pops);
tmpCqPrior = repmat(CQ_PRIOR,[1 1 tmpN]);
tmpSpPrior = repmat(SP_PRIOR,[1 1 tmpN]);
term1 = 0-gammaln(1+SUM_CQ_COUNTS(:,pops));
term2 = sum(gammaln(tmpCqPrior+CQ_COUNTS(:,:,pops))-gammaln(tmpCqPrior) , 1);
if length(pops) > 1
term2 = squeeze(term2);
else
term2 = term2';
end
term3 = 0-gammaln(1+SUM_SP_COUNTS(:,pops));
term4 = sum(gammaln(tmpSpPrior+SP_COUNTS(:,:,pops))-gammaln(tmpSpPrior) , 1);
if length(pops) > 1
term4 = squeeze(term4);
else
term4 = term4';
end
LOGML_TABLE(pops) = sum(term1+term2) - sum(term3+term4);
%----------------------------------------------------------------------

View file

@ -0,0 +1,92 @@
function update_difference_tables(ind, cqData, nCqLetter, ...
spData, nSpLetter, locCliques, locSeparators,logml)
% update ADDITION_DIFFERENCE and REMOVAL_DIFFERENCE
% Lu Cheng, 15.12.2012
global CQ_COUNTS; global SUM_CQ_COUNTS;
global SP_COUNTS; global SUM_SP_COUNTS;
global PARTITION;
global ADDITION_DIFFERENCE;
global REMOVAL_DIFFERENCE;
global LOC_CQ_COUNTS;
global LOC_SP_COUNTS;
rem_old = REMOVAL_DIFFERENCE;
add_old = ADDITION_DIFFERENCE;
[diffCqCounts diffCqSumCounts] = computeDiffInCounts(ind, cqData, nCqLetter);
[diffSpCounts diffSpSumCounts] = computeDiffInCounts(ind, spData, nSpLetter);
diffLocCqCounts = computeDiffInCliqCounts(locCliques, ind);
diffLocSpCounts = computeDiffInCliqCounts(locSeparators, ind);
i1 = PARTITION(ind);
if isnan(rem_old(ind))
% Update removal difference for the individual:
% note that we did NOT add the removed item to other clusters
CQ_COUNTS(:,:,i1) = CQ_COUNTS(:,:,i1) - diffCqCounts;
SP_COUNTS(:,:,i1) = SP_COUNTS(:,:,i1) - diffSpCounts;
SUM_CQ_COUNTS(:,i1) = SUM_CQ_COUNTS(:,i1) - diffCqSumCounts;
SUM_SP_COUNTS(:,i1) = SUM_SP_COUNTS(:,i1) - diffSpSumCounts;
LOC_CQ_COUNTS(:,i1) = LOC_CQ_COUNTS(:,i1) - diffLocCqCounts;
LOC_SP_COUNTS(:,i1) = LOC_SP_COUNTS(:,i1) - diffLocSpCounts;
% PARTITION(ind) = -1;
updateLogmlTable(i1);
logml_new = computeTotalLogml();
rem_old(ind) = logml_new-logml;
CQ_COUNTS(:,:,i1) = CQ_COUNTS(:,:,i1) + diffCqCounts;
SP_COUNTS(:,:,i1) = SP_COUNTS(:,:,i1) + diffSpCounts;
SUM_CQ_COUNTS(:,i1) = SUM_CQ_COUNTS(:,i1) + diffCqSumCounts;
SUM_SP_COUNTS(:,i1) = SUM_SP_COUNTS(:,i1) + diffSpSumCounts;
LOC_CQ_COUNTS(:,i1) = LOC_CQ_COUNTS(:,i1) + diffLocCqCounts;
LOC_SP_COUNTS(:,i1) = LOC_SP_COUNTS(:,i1) + diffLocSpCounts;
% PARTITION(ind) = i1;
updateLogmlTable(i1);
end
new_pops = isnan(add_old(ind,:));
new_pops(i1) = 0; % Own cluster needs never be calculated.
new_pops = find(new_pops);
for i2 = new_pops(:)'
% Update addition differences for the individual:
% note that we did NOT remove the item
CQ_COUNTS(:,:,i2) = CQ_COUNTS(:,:,i2) + diffCqCounts;
SP_COUNTS(:,:,i2) = SP_COUNTS(:,:,i2) + diffSpCounts;
SUM_CQ_COUNTS(:,i2) = SUM_CQ_COUNTS(:,i2) + diffCqSumCounts;
SUM_SP_COUNTS(:,i2) = SUM_SP_COUNTS(:,i2) + diffSpSumCounts;
LOC_CQ_COUNTS(:,i2) = LOC_CQ_COUNTS(:,i2) + diffLocCqCounts;
LOC_SP_COUNTS(:,i2) = LOC_SP_COUNTS(:,i2) + diffLocSpCounts;
% PARTITION(ind) = i2;
updateLogmlTable(i2);
logml_new = computeTotalLogml();
add_old(ind,i2) = logml_new - logml;
CQ_COUNTS(:,:,i2) = CQ_COUNTS(:,:,i2) - diffCqCounts;
SP_COUNTS(:,:,i2) = SP_COUNTS(:,:,i2) - diffSpCounts;
SUM_CQ_COUNTS(:,i2) = SUM_CQ_COUNTS(:,i2) - diffCqSumCounts;
SUM_SP_COUNTS(:,i2) = SUM_SP_COUNTS(:,i2) - diffSpSumCounts;
LOC_CQ_COUNTS(:,i2) = LOC_CQ_COUNTS(:,i2) - diffLocCqCounts;
LOC_SP_COUNTS(:,i2) = LOC_SP_COUNTS(:,i2) - diffLocSpCounts;
% PARTITION(ind) = i1;
updateLogmlTable(i2);
end
REMOVAL_DIFFERENCE = rem_old;
ADDITION_DIFFERENCE = add_old;
%---------------------------------------------------------------------

View file

@ -0,0 +1,83 @@
function update_join_difference(cqData, nCqCodes, spData, nSpCodes, locCliques, locSeparators, logml)
% update JOIN_DIFFERENCE
% Lu Cheng, 15.12.2012
global CQ_COUNTS; global SUM_CQ_COUNTS;
global SP_COUNTS; global SUM_SP_COUNTS;
global PARTITION;
global JOIN_DIFFERENCE;
global LOC_CQ_COUNTS;
global LOC_SP_COUNTS;
npops = size(CQ_COUNTS,3);
for i1 = 1:npops-1
indsToBeMoved = find(PARTITION==i1);
if isempty(indsToBeMoved)
% Cluster i1 is empty
JOIN_DIFFERENCE(i1,(i1+1):npops) = 0;
JOIN_DIFFERENCE((i1+1):npops,i1) = 0;
else
[diffCqCounts diffCqSumCounts] = computeDiffInCounts(indsToBeMoved, cqData, nCqCodes);
[diffSpCounts diffSpSumCounts] = computeDiffInCounts(indsToBeMoved, spData, nSpCodes);
diffLocCqCounts = computeDiffInCliqCounts(locCliques, indsToBeMoved);
diffLocSpCounts = computeDiffInCliqCounts(locSeparators, indsToBeMoved);
unknown_pops = find(isnan(JOIN_DIFFERENCE(i1,(i1+1):end)));
unknown_pops = unknown_pops+i1;
CQ_COUNTS(:,:,i1) = CQ_COUNTS(:,:,i1) - diffCqCounts;
SUM_CQ_COUNTS(:,i1) = SUM_CQ_COUNTS(:,i1) - diffCqSumCounts;
SP_COUNTS(:,:,i1) = SP_COUNTS(:,:,i1) - diffSpCounts;
SUM_SP_COUNTS(:,i1) = SUM_SP_COUNTS(:,i1) - diffSpSumCounts;
LOC_CQ_COUNTS(:,i1) = LOC_CQ_COUNTS(:,i1) - diffLocCqCounts;
LOC_SP_COUNTS(:,i1) = LOC_SP_COUNTS(:,i1) - diffLocSpCounts;
% PARTITION(indsToBeMoved) = -1;
updateLogmlTable(i1);
for i2 = unknown_pops
CQ_COUNTS(:,:,i2) = CQ_COUNTS(:,:,i2) + diffCqCounts;
SUM_CQ_COUNTS(:,i2) = SUM_CQ_COUNTS(:,i2) + diffCqSumCounts;
SP_COUNTS(:,:,i2) = SP_COUNTS(:,:,i2) + diffSpCounts;
SUM_SP_COUNTS(:,i2) = SUM_SP_COUNTS(:,i2) + diffSpSumCounts;
LOC_CQ_COUNTS(:,i2) = LOC_CQ_COUNTS(:,i2) + diffLocCqCounts;
LOC_SP_COUNTS(:,i2) = LOC_SP_COUNTS(:,i2) + diffLocSpCounts;
% PARTITION(indsToBeMoved) = i2;
updateLogmlTable(i2);
logml_new = computeTotalLogml();
JOIN_DIFFERENCE(i1,i2) = logml_new-logml;
JOIN_DIFFERENCE(i2,i1) = logml_new-logml;
CQ_COUNTS(:,:,i2) = CQ_COUNTS(:,:,i2) - diffCqCounts;
SUM_CQ_COUNTS(:,i2) = SUM_CQ_COUNTS(:,i2) - diffCqSumCounts;
SP_COUNTS(:,:,i2) = SP_COUNTS(:,:,i2) - diffSpCounts;
SUM_SP_COUNTS(:,i2) = SUM_SP_COUNTS(:,i2) - diffSpSumCounts;
LOC_CQ_COUNTS(:,i2) = LOC_CQ_COUNTS(:,i2) - diffLocCqCounts;
LOC_SP_COUNTS(:,i2) = LOC_SP_COUNTS(:,i2) - diffLocSpCounts;
% PARTITION(indsToBeMoved) = -1;
updateLogmlTable(i2);
end
CQ_COUNTS(:,:,i1) = CQ_COUNTS(:,:,i1) + diffCqCounts;
SUM_CQ_COUNTS(:,i1) = SUM_CQ_COUNTS(:,i1) + diffCqSumCounts;
SP_COUNTS(:,:,i1) = SP_COUNTS(:,:,i1) + diffSpCounts;
SUM_SP_COUNTS(:,i1) = SUM_SP_COUNTS(:,i1) + diffSpSumCounts;
LOC_CQ_COUNTS(:,i1) = LOC_CQ_COUNTS(:,i1) + diffLocCqCounts;
LOC_SP_COUNTS(:,i1) = LOC_SP_COUNTS(:,i1) + diffLocSpCounts;
% PARTITION(indsToBeMoved) = i1;
updateLogmlTable(i1);
end
end

1512
matlab/spatial/spatialMix.m Normal file

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff