Added source Matlab code for reference
This commit is contained in:
parent
b8af977117
commit
b5d99903d2
186 changed files with 61405 additions and 1 deletions
235
matlab/linkage/transform4.m
Normal file
235
matlab/linkage/transform4.m
Normal file
|
|
@ -0,0 +1,235 @@
|
|||
function [data_clique, data_separator, noalle_clique, noalle_separator] = transform4(data, component_mat, linkage_model)
|
||||
% Filename: transform4.m
|
||||
% [data_clique,data_clique,noalle_clique, noalle_separator] = tranform4(data, component_mat, linkage_model)
|
||||
|
||||
% DATA SHRINKAGE STEP IN THE END TO SAVE MEMORY USE
|
||||
% Further shrink the data using uint8 format.
|
||||
% Make sure that allele numbers smaller than 256.
|
||||
|
||||
|
||||
if strcmp(linkage_model,'linear')
|
||||
nindividuals = size(data,1);
|
||||
nloci = size(data,2)-1;
|
||||
% if ~all(diag(adj_mat)), adj_mat = adj_mat + diag(ones(nloci,1));, end;
|
||||
% [p,p,r]=dmperm(adj_mat);
|
||||
|
||||
% UNSOLVED --- Need to find a better algorithm to count the numbers of
|
||||
% separators ,cliques and singletons.
|
||||
ncomponents = size(component_mat,1);
|
||||
cardinality = sum(component_mat>0,2)';
|
||||
ncliques = sum(cardinality-1);
|
||||
nsingletons = sum(cardinality==1);
|
||||
nseparators = sum(cardinality(find(cardinality>1))-2);
|
||||
|
||||
data_clique = zeros(nindividuals, ncliques+nsingletons,'uint16');
|
||||
noalle_clique = zeros(ncliques+nsingletons,1,'uint16');
|
||||
data_separator = zeros(nindividuals, nseparators,'uint16');
|
||||
noalle_separator = zeros(nseparators,1,'uint16');
|
||||
% data_separator = []; % used for vectorization
|
||||
k = 1;
|
||||
for i = 1:ncomponents
|
||||
if (cardinality(i)==1) % singleton
|
||||
data_clique(:,k) = data(:,component_mat(i,1));
|
||||
temp = unique(data(:,component_mat(i,1)));
|
||||
if temp(1) == 0 % missing data found
|
||||
noalle_clique(k) = size(temp,1) -1;
|
||||
else
|
||||
noalle_clique(k) = size(temp,1);
|
||||
end
|
||||
k = k+1;
|
||||
else
|
||||
for j = 1:cardinality(i)-1
|
||||
|
||||
% dealing with missing data
|
||||
% nonmissing = find(all(data(:,[component_mat(i,j) component_mat(i,j+1)]),2));
|
||||
% [b,m,n] = unique(data(nonmissing,[component_mat(i,j) component_mat(i,j+1)]),'rows');
|
||||
% data_clique(nonmissing,k) = n;
|
||||
% noalle_clique(k) = size(unique(data(nonmissing,component_mat(i,j))),1)*...
|
||||
% size(unique(data(nonmissing,component_mat(i,j+1))),1);
|
||||
|
||||
[b,m,n] = unique(data(:,[component_mat(i,j) component_mat(i,j+1)]),'rows');
|
||||
data_clique(:,k) = n;
|
||||
|
||||
noalle_clique(k) = size(unique(data(:,component_mat(i,j))),1)*...
|
||||
size(unique(data(:,component_mat(i,j+1))),1);
|
||||
k = k+1;
|
||||
end
|
||||
|
||||
end
|
||||
end
|
||||
|
||||
k = 1;
|
||||
for i = 1:ncomponents
|
||||
if (cardinality(i)>2)
|
||||
for j = 2:cardinality(i)-1
|
||||
data_separator(:,k) = data(:,component_mat(i,j));
|
||||
k = k+1;
|
||||
end
|
||||
end
|
||||
end
|
||||
if (k-1)~=nseparators
|
||||
error('ERROR in transform.m');
|
||||
return
|
||||
end
|
||||
for i = 1:nseparators
|
||||
% nonmissing = find(all(data_separator(:,i),2));
|
||||
% noalle_separator(i) = length(unique(data_separator(nonmissing,i)));
|
||||
noalle_separator(i) = length(unique(data_separator(:,i)));
|
||||
end
|
||||
|
||||
elseif strcmp(linkage_model,'codon')
|
||||
nindividuals = size(data,1);
|
||||
nloci = size(data,2)-1;
|
||||
ncomponents = size(component_mat,1);
|
||||
cardinality = sum(component_mat>0,2)';
|
||||
|
||||
ncliques = sum(cardinality(find(cardinality>2))-2);
|
||||
nsingletons = sum(cardinality==1 | cardinality ==2);
|
||||
nseparators = sum(cardinality(find(cardinality>2))-3);
|
||||
|
||||
data_clique = zeros(nindividuals,ncliques+nsingletons,'uint16');
|
||||
noalle_clique = zeros(ncliques+nsingletons,1,'uint16');
|
||||
data_separator = zeros(nindividuals,nseparators,'uint16');
|
||||
noalle_separator = zeros(nseparators,1,'uint16');
|
||||
k = 1;
|
||||
for i = 1:ncomponents
|
||||
if (cardinality(i)==1) % singleton
|
||||
data_clique(:,k) = data(:,component_mat(i,1));
|
||||
temp = unique(data(:,component_mat(i,1)));
|
||||
if temp(1) == 0 % missing data found
|
||||
noalle_clique(k) = size(temp,1) -1;
|
||||
else
|
||||
noalle_clique(k) = size(temp,1);
|
||||
end
|
||||
k = k+1;
|
||||
elseif (cardinality(i)==2) % transform to linear linkage
|
||||
% dealing with missing data
|
||||
% nonmissing = find(all(data(:,[component_mat(i,1) component_mat(i,2)]),2));
|
||||
% [b,m,n] = unique(data(nonmissing,[component_mat(i,1) component_mat(i,2)]),'rows');
|
||||
|
||||
[b,m,n] = unique(data(:,[component_mat(i,1) component_mat(i,2)]),'rows');
|
||||
data_clique(:,k) = uint16(n);
|
||||
% data_clique(nonmissing,k) = n;
|
||||
|
||||
% noalle_clique(k) = size(unique(data(nonmissing,component_mat(i,1))),1)*...
|
||||
% size(unique(data(nonmissing,component_mat(i,2))),1);
|
||||
noalle_clique(k) = size(unique(data(:,component_mat(i,1))),1)*...
|
||||
size(unique(data(:,component_mat(i,2))),1);
|
||||
k = k+1;
|
||||
else
|
||||
for j = 1:cardinality(i)-2
|
||||
% dealing with missing data
|
||||
% nonmissing = find(all(data(:,[component_mat(i,j) component_mat(i,j+1) component_mat(i,j+2)]),2));
|
||||
% [b,m,n] = unique(data(nonmissing,[component_mat(i,j) component_mat(i,j+1) component_mat(i,j+2)]),'rows');
|
||||
% data_clique(nonmissing,k) = n;
|
||||
|
||||
[b,m,n] = unique(data(:,[component_mat(i,j) component_mat(i,j+1) component_mat(i,j+2)]),'rows');
|
||||
data_clique(:,k) = uint16(n);
|
||||
|
||||
% noalle_clique(k) = size(unique(data(nonmissing,component_mat(i,j))),1)*...
|
||||
% size(unique(data(nonmissing,component_mat(i,j+1))),1)*size(unique(data(nonmissing,component_mat(i,j+2))),1);
|
||||
noalle_clique(k) = size(unique(data(:,component_mat(i,j))),1)*...
|
||||
size(unique(data(:,component_mat(i,j+1))),1)*size(unique(data(:,component_mat(i,j+2))),1);
|
||||
k = k+1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
k = 1;
|
||||
for i = 1:ncomponents
|
||||
if (cardinality(i)>2)
|
||||
for j = 2:cardinality(i)-2
|
||||
% dealing with missing data
|
||||
% nonmissing = find(all(data(:,[component_mat(i,j) component_mat(i,j+1)]),2));
|
||||
% [b,m,n] = unique(data(nonmissing,[component_mat(i,j) component_mat(i,j+1)]),'rows');
|
||||
[b,m,n] = unique(data(:,[component_mat(i,j) component_mat(i,j+1)]),'rows');
|
||||
% data_separator(nonmissing,k) = n;
|
||||
data_separator(:,k) = uint16(n);
|
||||
% noalle_separator(k) = size(unique(data(nonmissing,component_mat(i,j))),1)*...
|
||||
% size(unique(data(nonmissing,component_mat(i,j+1))),1);
|
||||
noalle_separator(k) = size(unique(data(:,component_mat(i,j))),1)*...
|
||||
size(unique(data(:,component_mat(i,j+1))),1);
|
||||
k = k+1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
elseif strcmpi(linkage_model,'Independent')
|
||||
data_clique = data(:,[1:end-1]);
|
||||
data_separator = [];
|
||||
noalle_clique = max(data_clique); % Be sure the data is normalized.
|
||||
noalle_separator = [];
|
||||
|
||||
elseif strcmp(linkage_model,'random')
|
||||
adj_mat = mk_adjmat(component_mat);
|
||||
nloci = size(adj_mat,1);
|
||||
ns = 4*ones(1,nloci);
|
||||
porder = [];
|
||||
stages = {[1:nloci]};
|
||||
clusters = {};
|
||||
|
||||
[jtree,root2,cliques,B,w]=graph_to_jtree(adj_mat,ns,porder,stages,clusters);
|
||||
C = length(cliques);
|
||||
[is,js] = find(jtree > 0);
|
||||
separator = cell(C,C);
|
||||
for k=1:length(is)
|
||||
i = is(k); j = js(k);
|
||||
separator{i,j} = find(B(i,:) & B(j,:)); % intersect(cliques{i}, cliques{j});
|
||||
end
|
||||
|
||||
|
||||
nindividuals = size(data,1);
|
||||
if nloci ~= size(data,2)-1 error('*** ERROR: in transform3.m');
|
||||
return
|
||||
end
|
||||
ncliques = length(cliques);
|
||||
[i,j] = find(~cellfun('isempty',triu(separator)));
|
||||
ij = [i j];
|
||||
nseparators = length(i);
|
||||
|
||||
data_clique = zeros(nindividuals, ncliques);
|
||||
noalle_clique = zeros(ncliques,1);
|
||||
data_separator = zeros(nindividuals, nseparators);
|
||||
noalle_separator = zeros(nseparators,1);
|
||||
|
||||
k = 1;
|
||||
for i = 1:ncliques
|
||||
[b,m,n] = unique(data(:,cliques{i}),'rows');
|
||||
data_clique(:,k) = n;
|
||||
% for j = 1:length(cliques{i})
|
||||
% sz(j) = size(unique(data(:,cliques{i}(j))),1);
|
||||
% end
|
||||
% noalle_clique(k) = prod(sz);
|
||||
noalle_clique(k) = 4^length(cliques{i});
|
||||
k = k+1;
|
||||
% clear sz;
|
||||
end
|
||||
k = 1;
|
||||
for i = 1:nseparators
|
||||
[b,m,n] = unique(data(:,separator{ij(i,1),ij(i,2)}),'rows');
|
||||
data_separator(:,k) = n;
|
||||
|
||||
% for j = 1:length(separator{ij(i,1),ij(i,2)})
|
||||
% sz(j) = size(unique(data(:,separator{ij(i,1),ij(i,2)}(j))),1);
|
||||
% end
|
||||
% noalle_separator(k) = prod(sz);
|
||||
noalle_separator(k) = 4^length(separator{ij(i,1),ij(i,2)});
|
||||
k = k+1;
|
||||
% clear sz;
|
||||
end
|
||||
end
|
||||
|
||||
% Data shrinkage step
|
||||
informative_clique = find(noalle_clique~=1);
|
||||
informative_separator = find(noalle_separator~=1);
|
||||
data_clique = data_clique(:,informative_clique);
|
||||
data_separator = data_separator(:,informative_separator);
|
||||
noalle_clique = noalle_clique(informative_clique);
|
||||
noalle_separator = noalle_separator(informative_separator);
|
||||
clear data;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Loading…
Add table
Add a link
Reference in a new issue