ourMELONS/matlab/independent/processxls.m
2019-12-16 16:47:21 +01:00

81 lines
No EOL
2.1 KiB
Matlab

function [data, component_mat, popnames] = processxls(filename)
%
% a bug in line 64-68 was fixed
data = [];
component_mat = [];
popnames = [];
try
if ispc
[A,B] = xlsread(filename);
else
[A,B] = myxlsread(filename);
end
catch
display('*** ERROR: Wrong Excel format');
return
end
if size(A,2)~=1 % more than one columns containing numeric ST values
display('*** ERROR: multiple columns of numeric values');
data = []; component_mat = []; popnames = [];
return
end
if size(A,1)~=size(B,1)-1
display('*** ERROR: Wrong format');
data = []; component_mat = []; popnames = [];
return
end
B = deblank(B); % remove any trailing blanks
nstrains = size(B,1)-1;
nheader = size(B,2);
for i = 1:nheader
if strcmpi('ST',B{1,i}) ix_ST = i; end
if strcmpi('Strain', B{1,i}) || strcmpi('Isolate',B{1,i})
ix_Strain = i;
end
end
if ~exist('ix_ST')
display('*** ERROR: ST column needed');
data = []; component_mat = []; popnames = [];
return
end
if ~exist('ix_Strain')
ix_gene = setdiff([1:nheader],ix_ST);
else
ix_gene = setdiff([1:nheader],[ix_ST ix_Strain]);
end
ngenes = length(ix_gene);
C = cell(nstrains,ngenes);
if ~isempty(A)
for i=1:nstrains
B{i+1,ix_ST}=num2str(A(i));
for j=1:ngenes
C{i,j}=uint16(i_encode_n(B{i+1,ix_gene(j)})); % save the memory.
end
end
end
genesize=cellfun('size',C(1,:),2);
data=cell2mat(C);
data=[data uint16([1:nstrains]')];
component_mat = zeros(ngenes,max(genesize));
cum = cumsum(genesize);
component_mat(1,[1:genesize(1)]) = [1:cum(1)];
for i=2:ngenes
component_mat(i,[1:genesize(i)]) = [(cum(i-1)+1):cum(i)];
end
if ~exist('ix_Strain')
popnames = num2cell(B([2:end],ix_ST));
else % store the strain names only
popnames = num2cell(B([2:end],ix_Strain));
end
popnames(:,2)=num2cell([1:nstrains]');
display('---------------------------------------------------');
display(['# of strains: ', num2str(nstrains)]);
display(['# of genes: ', num2str(ngenes)]);