ourMELONS/matlab/general/kstest2.m
2019-12-16 16:47:21 +01:00

178 lines
5.8 KiB
Matlab

function [H, pValue, KSstatistic] = kstest2(x1, x2, alpha, tail)
%KSTEST2 Two-sample Kolmogorov-Smirnov goodness-of-fit hypothesis test.
% H = KSTEST2(X1,X2,ALPHA,TAIL) performs a Kolmogorov-Smirnov (K-S) test
% to determine if independent random samples, X1 and X2, are drawn from
% the same underlying continuous population. ALPHA and TAIL are optional
% scalar inputs: ALPHA is the desired significance level (default = 0.05);
% TAIL indicates the type of test (default = 0). H indicates the result of
% the hypothesis test:
% H = 0 => Do not reject the null hypothesis at significance level ALPHA.
% H = 1 => Reject the null hypothesis at significance level ALPHA.
%
% Let S1(x) and S2(x) be the empirical distribution functions from the
% sample vectors X1 and X2, respectively, and F1(x) and F2(x) be the
% corresponding true (but unknown) population CDFs. The two-sample K-S
% test tests the null hypothesis that F1(x) = F2(x) for all x, against the
% alternative specified by TAIL:
% 'unequal' -- "F1(x) not equal to F2(x)" (two-sided test)
% 'larger' -- "F1(x) > F2(x)" (one-sided test)
% 'smaller' -- "F1(x) < F2(x)" (one-sided test)
%
% For TAIL = 'unequal', 'larger', and 'smaller', the test statistics are
% max|S1(x) - S2(x)|, max[S1(x) - S2(x)], and max[S2(x) - S1(x)],
% respectively.
%
% The decision to reject the null hypothesis occurs when the significance
% level, ALPHA, equals or exceeds the P-value.
%
% X1 and X2 are vectors of lengths N1 and N2, respectively, and represent
% random samples from some underlying distribution(s). Missing
% observations, indicated by NaNs (Not-a-Number), are ignored.
%
% [H,P] = KSTEST2(...) also returns the asymptotic P-value P.
%
% [H,P,KSSTAT] = KSTEST2(...) also returns the K-S test statistic KSSTAT
% defined above for the test type indicated by TAIL.
%
% The asymptotic P-value becomes very accurate for large sample sizes, and
% is believed to be reasonably accurate for sample sizes N1 and N2 such
% that (N1*N2)/(N1 + N2) >= 4.
%
% See also KSTEST, LILLIETEST, CDFPLOT.
%
% Copyright 1993-2007 The MathWorks, Inc.
% $Revision: 1.5.2.5 $ $ Date: 1998/01/30 13:45:34 $
% References:
% Massey, F.J., (1951) "The Kolmogorov-Smirnov Test for Goodness of Fit",
% Journal of the American Statistical Association, 46(253):68-78.
% Miller, L.H., (1956) "Table of Percentage Points of Kolmogorov Statistics",
% Journal of the American Statistical Association, 51(273):111-121.
% Stephens, M.A., (1970) "Use of the Kolmogorov-Smirnov, Cramer-Von Mises and
% Related Statistics Without Extensive Tables", Journal of the Royal
% Statistical Society. Series B, 32(1):115-122.
% Conover, W.J., (1980) Practical Nonparametric Statistics, Wiley.
% Press, W.H., et. al., (1992) Numerical Recipes in C, Cambridge Univ. Press.
if nargin < 2
error('stats:kstest2:TooFewInputs','At least 2 inputs are required.');
end
%
% Ensure each sample is a VECTOR.
%
if ~isvector(x1) || ~isvector(x2)
error('stats:kstest2:VectorRequired','The samples X1 and X2 must be vectors.');
end
%
% Remove missing observations indicated by NaN's, and
% ensure that valid observations remain.
%
x1 = x1(~isnan(x1));
x2 = x2(~isnan(x2));
x1 = x1(:);
x2 = x2(:);
if isempty(x1)
error('stats:kstest2:NotEnoughData', 'Sample vector X1 contains no data.');
end
if isempty(x2)
error('stats:kstest2:NotEnoughData', 'Sample vector X2 contains no data.');
end
%
% Ensure the significance level, ALPHA, is a scalar
% between 0 and 1 and set default if necessary.
%
if (nargin >= 3) && ~isempty(alpha)
if ~isscalar(alpha) || (alpha <= 0 || alpha >= 1)
error('stats:kstest2:BadAlpha',...
'Significance level ALPHA must be a scalar between 0 and 1.');
end
else
alpha = 0.05;
end
%
% Ensure the type-of-test indicator, TAIL, is a scalar integer from
% the allowable set, and set default if necessary.
%
if (nargin >= 4) && ~isempty(tail)
if ischar(tail)
tail = strmatch(lower(tail), {'smaller','unequal','larger'}) - 2;
if isempty(tail)
error('stats:kstest2:BadTail',...
'Type-of-test indicator TAIL must be ''unequal'', ''smaller'', or ''larger''.');
end
elseif ~isscalar(tail) || ~((tail==-1) || (tail==0) || (tail==1))
error('stats:kstest2:BadTail',...
'Type-of-test indicator TAIL must be ''unequal'', ''smaller'', or ''larger''.');
end
else
tail = 0;
end
%
% Calculate F1(x) and F2(x), the empirical (i.e., sample) CDFs.
%
binEdges = [-inf ; sort([x1;x2]) ; inf];
binCounts1 = histc (x1 , binEdges, 1);
binCounts2 = histc (x2 , binEdges, 1);
sumCounts1 = cumsum(binCounts1)./sum(binCounts1);
sumCounts2 = cumsum(binCounts2)./sum(binCounts2);
sampleCDF1 = sumCounts1(1:end-1);
sampleCDF2 = sumCounts2(1:end-1);
%
% Compute the test statistic of interest.
%
switch tail
case 0 % 2-sided test: T = max|F1(x) - F2(x)|.
deltaCDF = abs(sampleCDF1 - sampleCDF2);
case -1 % 1-sided test: T = max[F2(x) - F1(x)].
deltaCDF = sampleCDF2 - sampleCDF1;
case 1 % 1-sided test: T = max[F1(x) - F2(x)].
deltaCDF = sampleCDF1 - sampleCDF2;
end
KSstatistic = max(deltaCDF);
%
% Compute the asymptotic P-value approximation and accept or
% reject the null hypothesis on the basis of the P-value.
%
n1 = length(x1);
n2 = length(x2);
n = n1 * n2 /(n1 + n2);
lambda = max((sqrt(n) + 0.12 + 0.11/sqrt(n)) * KSstatistic , 0);
if tail ~= 0 % 1-sided test.
pValue = exp(-2 * lambda * lambda);
else % 2-sided test (default).
%
% Use the asymptotic Q-function to approximate the 2-sided P-value.
%
j = (1:101)';
pValue = 2 * sum((-1).^(j-1).*exp(-2*lambda*lambda*j.^2));
pValue = min(max(pValue, 0), 1);
end
H = (alpha >= pValue);