/// Student's distribution probability function S(t,n), for n degrees of freedom. /// Student's distribution probability is used in the test of whether two observed /// distributions have the same mean. S(t,n) is the probability, for n degrees of /// freedom, that a statistic t (measuring the observed difference of means) /// would be smaller than the observed value if the means were in fact the same. /// Two means are significantly different if, e.g. S(t,n) > 0.99; /// in other words 1-S(t,n) is the significance level at which the hypothesis /// that the means are equal is disproved. /// @param double t input statistic value /// @param int n degrees of freedom, n > 0 /// @return Student's distribution probability P(t,n) double StudentsDistProbability(const double& t, const int& n) throw(Exception) { if(n <= 0) { Exception e("Non-positive degrees of freedom in StudentsDistribution()"); GPSTK_THROW(e); } return (1.0 - incompleteBeta(double(n)/(t*t+double(n)),double(n)/2,0.5)); }
/// F-distribution cumulative distribution function FDistCDF(F,n1,n2) F>=0 n1,n2>0. /// This function occurs in the statistical test of whether two observed samples /// have the same variance. If F is the ratio of the observed dispersion (variance) /// of the first sample to that of the second, where the first sample has n1 /// degrees of freedom and the second has n2 degrees of freedom, then this function /// returns the probability that F would be as large as it is if the first /// sample's distribution has smaller variance than the second's. In other words, /// FDistCDF(f,n1,n2) is the significance level at which the hypothesis /// "sample 1 has smaller variance than sample 2" can be rejected. /// A small numerical value implies a significant rejection, in turn implying /// high confidence in the hypothesis "sample 1 has variance greater than or equal /// to that of sample 2". /// Ref http://www.itl.nist.gov/div898/handbook/ 1.3.6.6.5 /// @param F input statistic value, the ratio variance1/variance2, F >= 0 /// @param n1 degrees of freedom of first sample, n1 > 0 /// @param n2 degrees of freedom of second sample, n2 > 0 /// @return probability that the sample is less than F. double FDistCDF(const double& F, const int& n1, const int& n2) throw(Exception) { if(F < 0) GPSTK_THROW(Exception("Negative statistic")); if(n1 <= 0 || n2 <= 0) GPSTK_THROW(Exception("Non-positive degree of freedom")); try { return (1.0 - incompleteBeta(double(n2)/(double(n2)+double(n1)*F), double(n2)/2.0,double(n1)/2.0)); } catch(Exception& e) { GPSTK_RETHROW(e); } }
/// Cumulative Distribution Function CDF() for Student-t-distribution CDF. /// If X is a random variable following a normal distribution with mean zero and /// variance unity, and chisq is a random variable following an independent /// chi-square distribution with n degrees of freedom, then the distribution of /// the ratio X/sqrt(chisq/n) is called Student's t-distribution with n degrees /// of freedom. The probability that |X/sqrt(chisq/n)| will be less than a fixed /// constant t is StudentCDF(t,n); /// Ref http://www.itl.nist.gov/div898/handbook/ 1.3.6.6.4 /// Abramowitz and Stegun 26.7.1 /// @param t input statistic value /// @param n degrees of freedom of first sample, n > 0 /// @return probability that the sample is less than X. double StudentsCDF(const double& t, const int& n) throw(Exception) { if(n <= 0) GPSTK_THROW(Exception("Non-positive degree of freedom")); try { // NB StudentsCDF(-t,n) = 1.0-StudentsCDF(t,n); double x = 0.5*incompleteBeta(double(n)/(t*t+double(n)),double(n)/2,0.5); if(t >= 0.0) return (1.0 - x); return (x); } catch(Exception& e) { GPSTK_RETHROW(e); } }
/// F distribution probability function F(f,n1,n2), f>=0, n1,n2>0 /// This function occurs in the statistical test of whether two observed samples /// have the same variance. If f is the ratio of the observed dispersion of the /// first sample to that of the second one, where the first sample has n1 degrees /// of freedom and the second has n2 degrees of freedom, then this function /// returns the probability that f would be as large as it is if the first /// sample's distribution has smaller variance than the second's. In other words, /// FDistribution(f,n1,n2) is the significance level at which the hypothesis /// "sample 1 has smaller variance than sample 2" can be rejected. /// A small numerical value implies a significant rejection, in turn implying /// high confidence in the hypothesis "sample 1 has variance greater than or equal /// to that of sample 2". /// @param double f input statistic value, the ratio variance1/variance2, f >= 0 /// @param int n1 degrees of freedom of first sample, n1 > 0 /// @param int n2 degrees of freedom of second sample, n2 > 0 /// @return F distribution F(f,n1,n2) double FDistProbability(const double& f, const int& n1, const int& n2) throw(Exception) { if(f < 0) { Exception e("Negative statistic in FDistribution()"); GPSTK_THROW(e); } if(n1 <= 0 || n2 <= 0) { Exception e("Non-positive degrees of freedom in FDistribution()"); GPSTK_THROW(e); } return incompleteBeta(double(n2)/(double(n2)+double(n1)*f), double(n2)/2.0,double(n1)/2.0); }
/****************************** Computes the inverse of the beta CDF: given a prob. value, calculates the x for which the integral over 0 to x of beta CDF = prob. Adapted from: 1. Majumder and Bhattacharjee (1973) App. Stat. 22(3) 411-414 and the corrections: 2. Cran et al. (1977) App. Stat. 26(1) 111-114 3. Berry et al. (1990) App. Stat. 39(2) 309-310 and another adaptation made in the code of Yang (tools.c) ****************************/ MDOUBLE inverseCDFBeta(MDOUBLE a, MDOUBLE b, MDOUBLE prob){ if(a<0 || b<0 || prob<0 || prob>1) { errorMsg::reportError("error in inverseCDFBeta,illegal parameter"); } if (prob == 0 || prob == 1) return prob; int maxIter=100; MDOUBLE epsilonLow=1e-300; MDOUBLE fpu=3e-308; /****** changing the tail direction (prob=1-prob)*/ bool tail=false; MDOUBLE probA=prob; if (prob > 0.5) { prob = 1.0 - prob; tail = true; MDOUBLE tmp=a; a=b; b=tmp; } MDOUBLE lnBetaVal=betaln(a,b); MDOUBLE x; /****** calculating chi square evaluator */ MDOUBLE r = sqrt(-log(prob * prob)); MDOUBLE y = r - (2.30753+0.27061*r)/(1.+ (0.99229+0.04481*r) * r); MDOUBLE chiSquare = 1.0/(9.0 * b); chiSquare = b*2 * pow(1.0 - chiSquare + y * sqrt(chiSquare), 3.0); // MDOUBLE chiSquare2=gammq(b,prob/2.0); //chi square valued of prob with 2q df MDOUBLE T=(4.0*a+2.0*b-2)/chiSquare; /****** initializing x0 */ if (a > 1.0 && b > 1.0) { r = (y * y - 3.) / 6.; MDOUBLE s = 1. / (a*2. - 1.); MDOUBLE t = 1. / (b*2. - 1.); MDOUBLE h = 2. / (s + t); MDOUBLE w = y * sqrt(h + r) / h - (t - s) * (r + 5./6. - 2./(3.*h)); x = a / (a + b * exp(w + w)); } else { if (chiSquare<0){ x=exp((log(b*(1-prob))+lnBetaVal)/b); } else if (T<1){ x=exp((log(prob*a)+lnBetaVal)/a); } else { x=(T-1.0)/(T+1.0); } } if(x<=fpu || x>=1-2.22e-16) x=(prob+0.5)/2; // 0<x<1 but to avoid underflow a little smaller /****** iterating with a modified version of newton-raphson */ MDOUBLE adj, newX=x, prev=0; MDOUBLE yprev = 0.; adj = 1.; MDOUBLE eps = pow(10., -13. - 2.5/(probA * probA) - 0.5/(probA *probA)); eps = (eps>epsilonLow?eps:epsilonLow); for (int i=0; i<maxIter; i++) { y = incompleteBeta(a,b,x); y = (y - prob) * exp(lnBetaVal + (1.0-a) * log(x) + (1.0-b) * log(1.0 - x)); //the classical newton-raphson formula if (y * yprev <= 0) prev = (fabs(adj)>fpu?fabs(adj):fpu); MDOUBLE g = 1; for (int j=0; j<maxIter; j++) { adj = g * y; if (fabs(adj) < prev) { newX = x - adj; // new x if (newX >= 0. && newX <= 1.) { if (prev <= eps || fabs(y) <= eps) return(tail?1.0-x:x);; if (newX != 0. && newX != 1.0) break; } } g /= 3.; } if (fabs(newX-x)<fpu) return (tail?1.0-x:x);; x = newX; yprev = y; } return (tail?1.0-x:x); }
/****************************** Computes the average r value in percentile k whose boundaries are leftBound and rightBound ****************************/ MDOUBLE computeAverage_r(MDOUBLE leftBound, MDOUBLE rightBound, MDOUBLE alpha, MDOUBLE beta, int k){ MDOUBLE tmp; tmp= incompleteBeta(alpha+1,beta,rightBound) - incompleteBeta(alpha+1,beta,leftBound); tmp= (tmp*alpha/(alpha+beta))*k; return tmp; }