Added source Matlab code for reference
This commit is contained in:
parent
b8af977117
commit
b5d99903d2
186 changed files with 61405 additions and 1 deletions
70
matlab/linkage/makecomplete.m
Normal file
70
matlab/linkage/makecomplete.m
Normal file
|
|
@ -0,0 +1,70 @@
|
|||
function [data, totalmissing] = makecomplete(data)
|
||||
%MAKECOMPLETE ESTIMATING missing alleles in a loci.
|
||||
% Input:
|
||||
% data: a baps data format, e.g. last column is index. Missing values
|
||||
% are denoted as any non-positive integers, commonly as 0 or -999.
|
||||
|
||||
|
||||
[nrows, ncols] = size(data);
|
||||
id = all(data>0);
|
||||
if all(id) % no missing values
|
||||
return
|
||||
else
|
||||
fprintf(1,'Estimating the missing values...');
|
||||
|
||||
% remove the totally missing loci
|
||||
missingloci = find(id==0);
|
||||
totalmissing = find(all(data(:,missingloci)<=0));
|
||||
if ~isempty(totalmissing)
|
||||
disp('Totally missing loci were found.');
|
||||
data(:,missingloci(totalmissing)) = ones(nrows,length(totalmissing));
|
||||
end
|
||||
% goodloci = setdiff([1:ncols],missingloci(totalmissing));
|
||||
% data = data(:,goodloci);
|
||||
% missingloci = find(all(data>0)==0);
|
||||
|
||||
data_in = data(:,[missingloci end]);
|
||||
[counts,noalle,prior,adjprior,rawalleles,data_in] = allfreqsnew3(data_in);
|
||||
data(:,missingloci) = data_in(:,[1:end-1]);
|
||||
|
||||
s_counts = sum(counts,3);
|
||||
% m_counts = size(counts,3)*ones(1, size(counts,2)) - sum(s_counts); % counts of missing values
|
||||
m_counts = nrows*ones(1, size(counts,2)) - sum(s_counts); % counts of missing values
|
||||
m = repmat(m_counts,size(s_counts,1),1);
|
||||
a = s_counts + prior;
|
||||
a = a./repmat(sum(a),size(a,1),1);
|
||||
c = round(m.*a);
|
||||
|
||||
% Be sure that the sum matches
|
||||
reassign = find(sum(c)~=m_counts);
|
||||
for i = reassign
|
||||
dif = sum(c(:,i)) - m_counts(:,i);
|
||||
if dif > 0
|
||||
remove = find(c(:,i)==max(c(:,i)));
|
||||
c(remove(1),i)=c(remove(1),i) - dif; % decrease by the diff
|
||||
else
|
||||
add = find(c(:,i)==min(c(:,i)));
|
||||
c(add(1),i)=c(add(1),i) - dif; % increase by the diff
|
||||
end
|
||||
end
|
||||
try
|
||||
% Fill in the missing values
|
||||
for i = 1:length(missingloci)
|
||||
missingrows = find(data(:,missingloci(i))<=0);
|
||||
p = randperm(m_counts(i));
|
||||
cum = [0 cumsum(c(:,i))'];
|
||||
for j = 1:size(c,1)
|
||||
data(missingrows(p([cum(j)+1:cum(j+1)])),missingloci(i)) = j;
|
||||
end
|
||||
end
|
||||
catch
|
||||
% disp('*** ERROR: in completing the missing data.');
|
||||
data = [];
|
||||
totalmissing = [];
|
||||
return
|
||||
end
|
||||
fprintf(1,'Finished.\n');
|
||||
end
|
||||
|
||||
|
||||
|
||||
Loading…
Add table
Add a link
Reference in a new issue