81 lines
2.1 KiB
Mathematica
81 lines
2.1 KiB
Mathematica
|
|
function [data, component_mat, popnames] = processxls(filename)
|
||
|
|
%
|
||
|
|
% a bug in line 64-68 was fixed
|
||
|
|
data = [];
|
||
|
|
component_mat = [];
|
||
|
|
popnames = [];
|
||
|
|
try
|
||
|
|
if ispc
|
||
|
|
[A,B] = xlsread(filename);
|
||
|
|
else
|
||
|
|
[A,B] = myxlsread(filename);
|
||
|
|
end
|
||
|
|
catch
|
||
|
|
display('*** ERROR: Wrong Excel format');
|
||
|
|
return
|
||
|
|
end
|
||
|
|
|
||
|
|
if size(A,2)~=1 % more than one columns containing numeric ST values
|
||
|
|
display('*** ERROR: multiple columns of numeric values');
|
||
|
|
data = []; component_mat = []; popnames = [];
|
||
|
|
return
|
||
|
|
end
|
||
|
|
|
||
|
|
if size(A,1)~=size(B,1)-1
|
||
|
|
display('*** ERROR: Wrong format');
|
||
|
|
data = []; component_mat = []; popnames = [];
|
||
|
|
return
|
||
|
|
end
|
||
|
|
|
||
|
|
B = deblank(B); % remove any trailing blanks
|
||
|
|
nstrains = size(B,1)-1;
|
||
|
|
nheader = size(B,2);
|
||
|
|
for i = 1:nheader
|
||
|
|
if strcmpi('ST',B{1,i}) ix_ST = i; end
|
||
|
|
if strcmpi('Strain', B{1,i}) || strcmpi('Isolate',B{1,i})
|
||
|
|
ix_Strain = i;
|
||
|
|
end
|
||
|
|
end
|
||
|
|
if ~exist('ix_ST')
|
||
|
|
display('*** ERROR: ST column needed');
|
||
|
|
data = []; component_mat = []; popnames = [];
|
||
|
|
return
|
||
|
|
end
|
||
|
|
|
||
|
|
if ~exist('ix_Strain')
|
||
|
|
ix_gene = setdiff([1:nheader],ix_ST);
|
||
|
|
else
|
||
|
|
ix_gene = setdiff([1:nheader],[ix_ST ix_Strain]);
|
||
|
|
end
|
||
|
|
|
||
|
|
ngenes = length(ix_gene);
|
||
|
|
|
||
|
|
C = cell(nstrains,ngenes);
|
||
|
|
if ~isempty(A)
|
||
|
|
for i=1:nstrains
|
||
|
|
B{i+1,ix_ST}=num2str(A(i));
|
||
|
|
for j=1:ngenes
|
||
|
|
C{i,j}=uint16(i_encode_n(B{i+1,ix_gene(j)})); % save the memory.
|
||
|
|
end
|
||
|
|
end
|
||
|
|
end
|
||
|
|
genesize=cellfun('size',C(1,:),2);
|
||
|
|
data=cell2mat(C);
|
||
|
|
data=[data uint16([1:nstrains]')];
|
||
|
|
component_mat = zeros(ngenes,max(genesize));
|
||
|
|
cum = cumsum(genesize);
|
||
|
|
component_mat(1,[1:genesize(1)]) = [1:cum(1)];
|
||
|
|
for i=2:ngenes
|
||
|
|
component_mat(i,[1:genesize(i)]) = [(cum(i-1)+1):cum(i)];
|
||
|
|
end
|
||
|
|
|
||
|
|
if ~exist('ix_Strain')
|
||
|
|
popnames = num2cell(B([2:end],ix_ST));
|
||
|
|
else % store the strain names only
|
||
|
|
popnames = num2cell(B([2:end],ix_Strain));
|
||
|
|
end
|
||
|
|
popnames(:,2)=num2cell([1:nstrains]');
|
||
|
|
|
||
|
|
display('---------------------------------------------------');
|
||
|
|
display(['# of strains: ', num2str(nstrains)]);
|
||
|
|
display(['# of genes: ', num2str(ngenes)]);
|