function [D]=chooseDistance(aln) % CHOOSEDISTANCE Display a quest dialog for choosing a genetic distance % input: a structure containing part of variables from the mixture result. % ouput: a symmetric distance matrix if (isempty(aln)) error('Need input.'); end if aln.npops == 1 disp('*** ERROR: no population structure is found.'); return end ButtonName=questdlg('Choose a type of genetic distance?', ... 'Select genetic distance', ... 'KL','Nei','Hamming','KL'); switch ButtonName, case 'KL' disp('Using KL distance.'); D = dn_kl(aln); case 'Nei' disp('Using Nei distance.'); D = dn_nei(aln); case 'Hamming' disp('Using Hamming distance. Please wait...'); D = dn_hamming(aln); % case 'LogDet' % disp('Using LogDet distance'); % D= dn_logdet(aln); otherwise D = []; end D = D + D'; % make it symmetric %-------------------------------------------------------------------------- % SUBFUNCTIONS %-------------------------------------------------------------------------- function [dist_mat] = dn_kl(aln) npops = aln.npops; COUNTS = aln.COUNTS; adjprior = aln.adjprior; % data = noIndex(aln.data, aln.noalle); % partition = aln.partition; dist_mat = zeros(npops, npops); maxnoalle = size(COUNTS,1); nloci = size(COUNTS,2); d = zeros(maxnoalle, nloci, npops); prior = adjprior; prior(find(prior==1))=0; nollia = find(all(prior==0)); %Lokukset, joissa oli havaittu vain yhtä alleelia. prior(1,nollia)=1; for pop1 = 1:npops d(:,:,pop1) = (squeeze(COUNTS(:,:,pop1))+prior) ./ repmat(sum(squeeze(COUNTS(:,:,pop1))+prior),maxnoalle,1); %dist1(pop1) = (squeeze(COUNTS(:,:,pop1))+adjprior) ./ repmat((SUMCOUNTS(pop1,:)+adjprior), maxnoalle, 1); end for pop1 = 1:npops % rivi = [blanks(2-floor(log10(pop1))) num2str(pop1) ' ']; for pop2 = 1:pop1-1 dist1 = d(:,:,pop1); dist2 = d(:,:,pop2); div12 = sum(sum(dist1.*log2((dist1+10^-10) ./ (dist2+10^-10))))/nloci; div21 = sum(sum(dist2.*log2((dist2+10^-10) ./ (dist1+10^-10))))/nloci; div = (div12+div21)/2; dist_mat(pop1,pop2) = div; end end %-------------------------------------------------------------------------- function [dist_mat] = dn_nei(aln) npops = aln.npops; COUNTS = aln.COUNTS; % adjprior = aln.adjprior; % data = aln.data; % partition = aln.partition; dist_mat = zeros(npops, npops); maxnoalle = size(COUNTS,1); nloci = size(COUNTS,2); d = zeros(maxnoalle, nloci, npops); for pop1 = 1:npops d(:,:,pop1) = (squeeze(COUNTS(:,:,pop1))) ./ repmat(sum(squeeze(COUNTS(:,:,pop1))),maxnoalle,1); %dist1(pop1) = (squeeze(COUNTS(:,:,pop1))+adjprior) ./ repmat((SUMCOUNTS(pop1,:)+adjprior), maxnoalle, 1); end for pop1 = 1:npops % rivi = [blanks(2-floor(log10(pop1))) num2str(pop1) ' ']; for pop2 = 1:pop1-1 dist1 = d(:,:,pop1); dist2 = d(:,:,pop2); div1 = sum(sum(dist1.*dist2)); div2 = sqrt(sum(sum(dist1.^2)))*sqrt(sum(sum(dist2.^2))); div = -log(div1/div2); dist_mat(pop1,pop2) = div; end end %-------------------------------------------------------------------------- function [dist_mat] = dn_hamming(aln) npops = aln.npops; data = noIndex(aln.data, aln.noalle); partition = aln.partition; dist_mat = zeros(npops, npops); for pop1 = 1:npops for pop2 = 1:pop1-1 dist_mat(pop1,pop2) = hamming_dist(data(logical(partition==pop1),[1:end-1]),... data(logical(partition==pop2),[1:end-1])); end end function dist = hamming_dist(data1,data2) [length1,nloci] = size(data1); length2 = size(data2,1); dist1 = 0; for i = 1:length1 dist2 = 0; for j = 1:length2 dist2 = dist2 + sum(data1(i,:)~=data2(j,:))/nloci; end dist1 = dist1 + dist2/length2; end dist = dist1/length1; %-------------------------------------------------------------------------- function [D]=dn_logdet(aln) %DN_LOGDET - Log-det (paralinear) distance %The LogDet model computes the distance from the determinant of the matrix of %co-occurrence of nucleotides in the two species, according to the formula % % D = - 1/4(loge(|F|) - 1/2loge(fA1 fC1 fG1 fT1 fA2 fC2 fG2 fT2)) % %Where F is a matrix whose (i,j) element is the fraction of sites at which base %i occurs in one species and base j occurs in the other. fji is the fraction of %sites at which species i has base j. The LogDet distance cannot cope with %ambiguity codes. It must have completely defined sequences. One limitation of %the LogDet distance is that it may be infinite sometimes, if there are too many %changes between certain pairs of nucleotides. This can be particularly %noticeable with distances computed from bootstrapped sequences. % % Syntax: [D]=dn_logdet(aln) % % Inputs: % aln - Alignment structure % % Outputs: % D - Distance matrix % % See also: % Molecular Biology & Evolution Toolbox, (C) 2006 % Author: James J. Cai % Email: jamescai@hku.hk % Website: http://bioinformatics.org/mbetoolbox/ % Last revision: 5/3/2006 if (isstruct(aln)), S=aln.data; else S=aln; end [n,m] = size(S); D = zeros(n,n); for i=1:n-1 for j=i+1:n D(i,j) = d_logdet(S(i,:), S(j,:)); D(j,i) = D(i,j); end end %-------------------------------------------------------------------------- function d=d_logdet(seq1, seq2) [S,gap] = countntchange(seq1, seq2); % S=count_ntchanges(seq1, seq2); if (det(S)<=0) d=inf; else f1=sum(S); f2=sum(S'); d=(-1/4)*(log(det(S))-(1/2)*log( prod(f1)*prod(f2))); end %-------------------------------------------------------------------------- function [D,gap]=countntchange(s1,s2) %COUNTNTCHANGE - Count nucleotide changes in two DNA sequences %D is a 4x4 array, with bases in seq1 along top, seq2 along side, %in order A,C,G,T. % % Syntax: [D,gap]=countntchange(s1,s2) % % Inputs: % s1 - Sequence 1 vector % s2 - Sequence 2 vector % % Outputs: % D - Codon Adaptation Index value % gap - Codon Adaptation Index value % % % See also: COUNTAACHANGE COUNTCDCHANGE % Molecular Biology & Evolution Toolbox, (C) 2006 % Author: James J. Cai % Email: jamescai@hku.hk % Website: http://bioinformatics.org/mbetoolbox/ % Last revision: 3/28/2006 if (nargout>1) [D,gap]=countchange(s1,s2,4); else [D]=countchange(s1,s2,4); end % ------------------------------------------------------------------------- function [D,gap] = countchange(s1,s2,nword) if ~(ismember(nword, [4 20 61])), error('Wrong NWORD') end if (length(s1)~=length(s2)), error('Sequences are not of same length.') end D=zeros(nword); WORD=1:nword; for j=1:nword s1sites=(s1==WORD(j)); for i=1:nword D(i,j)=sum(s1sites & (s2==WORD(i))); end end if (nargout>1), gap = length(s1)-sum(D(:)); % gaps end