int DiscreteGamma (double freqK[], double rK[], double alfa, double beta, int K, int median) { /* discretization of gamma distribution with equal proportions in each category */ int i; double gap05=1.0/(2.0*K), t, factor=alfa/beta*K, lnga1; if (median) { for (i=0; i<K; i++) rK[i]=PointGamma((i*2.0+1)*gap05, alfa, beta); for (i=0,t=0; i<K; i++) t+=rK[i]; for (i=0; i<K; i++) rK[i]*=factor/t; } else { lnga1=LnGamma(alfa+1); for (i=0; i<K-1; i++) freqK[i]=PointGamma((i+1.0)/K, alfa, beta); for (i=0; i<K-1; i++) freqK[i]=IncompleteGamma(freqK[i]*beta, alfa+1, lnga1); rK[0] = freqK[0]*factor; rK[K-1] = (1-freqK[K-2])*factor; for (i=1; i<K-1; i++) rK[i] = (freqK[i]-freqK[i-1])*factor; } for (i=0; i<K; i++) freqK[i]=1.0/K; return (0); }
static double PointChi2 (double prob, double v) { /* returns z so that Prob{x<z}=prob where x is Chi2 distributed with df=v returns -1 if in error. 0.000002<prob<0.999998 RATNEST FORTRAN by Best DJ & Roberts DE (1975) The percentage points of the Chi2 distribution. Applied Statistics 24: 385-388. (AS91) Converted into C by Ziheng Yang, Oct. 1993. */ double e=.5e-6, aa=.6931471805, p=prob, g; double xx, c, ch, a=0,q=0,p1=0,p2=0,t=0,x=0,b=0,s1,s2,s3,s4,s5,s6; if (p<.000002 || p>.999998 || v<=0) return (-1); g = LnGamma (v/2); xx=v/2; c=xx-1; if (v >= -1.24*log(p)) goto l1; ch=pow((p*xx*exp(g+xx*aa)), 1/xx); if (ch-e<0) return (ch); goto l4; l1: if (v>.32) goto l3; ch=0.4; a=log(1-p); l2: q=ch; p1=1+ch*(4.67+ch); p2=ch*(6.73+ch*(6.66+ch)); t=-0.5+(4.67+2*ch)/p1 - (6.73+ch*(13.32+3*ch))/p2; ch-=(1-exp(a+g+.5*ch+c*aa)*p2/p1)/t; if (fabs(q/ch-1)-.01 <= 0) goto l4; else goto l2; l3: x=PointNormal (p); p1=0.222222/v; ch=v*pow((x*sqrt(p1)+1-p1), 3.0); if (ch>2.2*v+6) ch=-2*(log(1-p)-c*log(.5*ch)+g); l4: do { q=ch; p1=.5*ch; if ((t=IncompleteGamma (p1, xx, g))<0) { return (-1); } p2=p-t; t=p2*exp(xx*aa+g+p1-c*log(ch)); b=t/ch; a=0.5*t-b*c; s1=(210+a*(140+a*(105+a*(84+a*(70+60*a))))) / 420; s2=(420+a*(735+a*(966+a*(1141+1278*a))))/2520; s3=(210+a*(462+a*(707+932*a)))/2520; s4=(252+a*(672+1182*a)+c*(294+a*(889+1740*a)))/5040; s5=(84+264*a+c*(175+606*a))/2520; s6=(120+c*(346+127*c))/5040; ch+=t*(1+0.5*t*s1-b*c*(s1-b*(s2-b*(s3-b*(s4-b*(s5-b*s6)))))); } while (fabs(q/ch-1) > e); return (ch); }
/* Gamma cdf */ double cdfGamma (double x, double shape) { double result; result = IncompleteGamma (shape*x, shape, LnGamma(shape)); return result; }
void DGamRateProcess::UpdateDiscreteCategories() { if (withpinv) { double* x = new double[GetNcat()-1]; double* y = new double[GetNcat()-1]; double lg = rnd::GetRandom().logGamma(alpha+1.0); for (int i=0; i<GetNcat()-1; i++) { x[i] = PointGamma((i+1.0)/(GetNcat()-1),alpha,alpha); } for (int i=0; i<GetNcat()-2; i++) { y[i] = IncompleteGamma(alpha*x[i],alpha+1,lg); } y[GetNcat()-2] = 1.0; rate[0] = 0; rate[1] = (GetNcat()-1) * y[0]; for (int i=1; i<(GetNcat()-1); i++) { rate[i+1] = (GetNcat()-1) * (y[i] - y[i-1]); } delete[] x; delete[] y; } else { double* x = new double[GetNcat()]; double* y = new double[GetNcat()]; double lg = rnd::GetRandom().logGamma(alpha+1.0); for (int i=0; i<GetNcat(); i++) { x[i] = PointGamma((i+1.0)/GetNcat(),alpha,alpha); } for (int i=0; i<GetNcat()-1; i++) { y[i] = IncompleteGamma(alpha*x[i],alpha+1,lg); } y[GetNcat()-1] = 1.0; rate[0] = GetNcat() * y[0]; for (int i=1; i<GetNcat(); i++) { rate[i] = GetNcat() * (y[i] - y[i-1]); } delete[] x; delete[] y; } }
void GaussianSetHistogram(Histogram * h, float mean, float sd) { int sc; int hsize, idx; int nbins; float delta; UnfitHistogram(h); h->fit_type = HISTFIT_GAUSSIAN; h->param[GAUSS_MEAN] = mean; h->param[GAUSS_SD] = sd; /* Calculate the expected values for the histogram. */ hsize = h->max - h->min + 1; h->expect = (float *) ckalloc(sizeof(float) * hsize); if( h->expect == NULL ) { fatal("Unable to allocate expect size in expected histogram..."); } for (idx = 0; idx < hsize; idx++) h->expect[idx] = 0.; /* Note: ideally we'd use the Gaussian distribution function * to find the histogram occupancy in the window sc..sc+1. * However, the distribution function is hard to calculate. * Instead, estimate the histogram by taking the density at sc+0.5. */ for (sc = h->min; sc <= h->max; sc++) { delta = ((float)sc + 0.5) - h->param[GAUSS_MEAN]; h->expect[sc - h->min] = (float) h->total * ((1. / (h->param[GAUSS_SD] * sqrt(2.*3.14159))) * (exp(-1.*delta*delta / (2. * h->param[GAUSS_SD] * h->param[GAUSS_SD])))); } /* Calculate the goodness-of-fit (within whole region) */ h->chisq = 0.; nbins = 0; for (sc = h->lowscore; sc <= h->highscore; sc++) if (h->expect[sc-h->min] >= 5. && h->histogram[sc-h->min] >= 5) { delta = (float) h->histogram[sc-h->min] - h->expect[sc-h->min]; h->chisq += delta * delta / h->expect[sc-h->min]; nbins++; } /* -1 d.f. for normalization */ if (nbins > 1) h->chip = (float) IncompleteGamma((double)(nbins-1)/2., (double) h->chisq/2.); else h->chip = 0.; }
void ExtremeValueSetHistogram(Histogram * h, float mu, float lambda, float lowbound, float highbound, float wonka, int ndegrees) { int sc; int hsize, idx; int nbins; float delta; UnfitHistogram(h); h->fit_type = HISTFIT_EVD; h->param[EVD_LAMBDA] = lambda; h->param[EVD_MU] = mu; h->param[EVD_WONKA] = wonka; hsize = h->max - h->min + 1; h->expect = (float *) ckalloc(sizeof(float) * hsize); if( h->expect == NULL ) { fatal("Cannot make memory for expect thing... "); } for (idx = 0; idx < hsize; idx++) h->expect[idx] = 0.; /* Calculate the expected values for the histogram. */ for (sc = h->min; sc <= h->max; sc++) h->expect[sc - h->min] = ExtremeValueE((float)(sc), h->param[EVD_MU], h->param[EVD_LAMBDA], h->total) - ExtremeValueE((float)(sc+1), h->param[EVD_MU], h->param[EVD_LAMBDA], h->total); /* Calculate the goodness-of-fit (within whole region) */ h->chisq = 0.; nbins = 0; for (sc = lowbound; sc <= highbound; sc++) if (h->expect[sc-h->min] >= 5. && h->histogram[sc-h->min] >= 5) { delta = (float) h->histogram[sc-h->min] - h->expect[sc-h->min]; h->chisq += delta * delta / h->expect[sc-h->min]; nbins++; } /* Since we fit the whole histogram, there is at least * one constraint on chi-square: the normalization to h->total. */ if (nbins > 1 + ndegrees) h->chip = (float) IncompleteGamma((double)(nbins-1-ndegrees)/2., (double) h->chisq/2.); else h->chip = 0.; }
int DiscreteGamma(double freqK[], double rK[], double alpha, double beta, int K, int UseMedian) { /* discretization of G(alpha, beta) with equal proportions in each category. */ int i; double t, mean = alpha / beta, lnga1; if (UseMedian) { /* median */ for (i = 0; i < K; i++) { rK[i] = QuantileGamma((i * 2. + 1) / (2. * K), alpha, beta); } for (i = 0, t = 0; i < K; i++) { t += rK[i]; } for (i = 0; i < K; i++) { rK[i] *= mean * K / t; /* rescale so that the mean is alpha/beta. */ } } else { /* mean */ lnga1 = LnGamma(alpha + 1); for (i = 0; i < K - 1; i++) { /* cutting points, Eq. 9 */ freqK[i] = QuantileGamma((i + 1.0) / K, alpha, beta); } for (i = 0; i < K - 1; i++) { /* Eq. 10 */ freqK[i] = IncompleteGamma(freqK[i] * beta, alpha + 1, lnga1); } rK[0] = freqK[0] * mean * K; for (i = 1; i < K - 1; i++) { rK[i] = (freqK[i] - freqK[i - 1]) * mean * K; } rK[K - 1] = (1 - freqK[K - 2]) * mean * K; } for (i = 0; i < K; i++) freqK[i] = 1.0 / K; return (0); }
/* Function: GaussianFitHistogram() * * Purpose: Fit a score histogram to a Gaussian distribution. * Set the parameters mean and sd in the histogram * structure, as well as a chi-squared test for * goodness of fit. * * Args: h - histogram to fit * high_hint - score cutoff; above this are `real' hits that aren't fit * * Return: 1 if fit is judged to be valid. * else 0 if fit is invalid (too few seqs.) */ int GaussianFitHistogram(struct histogram_s *h, float high_hint) { float sum; float sqsum; float delta; int sc; int nbins; int hsize, idx; /* Clear any previous fitting from the histogram. */ UnfitHistogram(h); /* Determine if we have enough hits to fit the histogram; * arbitrarily require 1000. */ if (h->total < 1000) { h->fit_type = HISTFIT_NONE; return 0; } /* Simplest algorithm for mean and sd; * no outlier detection yet (not even using high_hint) * * Magic 0.5 correction is because our histogram is for * scores between x and x+1; we estimate the expectation * (roughly) as x + 0.5. */ sum = sqsum = 0.; for (sc = h->lowscore; sc <= h->highscore; sc++) { delta = (float) sc + 0.5; sum += (float) h->histogram[sc-h->min] * delta; sqsum += (float) h->histogram[sc-h->min] * delta * delta; } h->fit_type = HISTFIT_GAUSSIAN; h->param[GAUSS_MEAN] = sum / (float) h->total; h->param[GAUSS_SD] = sqrt((sqsum - (sum*sum/(float)h->total)) / (float)(h->total-1)); /* Calculate the expected values for the histogram. * Note that the magic 0.5 correction appears again. * Calculating difference between distribution functions for Gaussian * would be correct but hard. */ hsize = h->max - h->min + 1; h->expect = (float *) MallocOrDie(sizeof(float) * hsize); for (idx = 0; idx < hsize; idx++) h->expect[idx] = 0.; for (sc = h->min; sc <= h->max; sc++) { delta = (float) sc + 0.5 - h->param[GAUSS_MEAN]; h->expect[sc - h->min] = (float) h->total * ((1. / (h->param[GAUSS_SD] * sqrt(2.*3.14159))) * (exp(-1.* delta*delta / (2. * h->param[GAUSS_SD] * h->param[GAUSS_SD])))); } /* Calculate the goodness-of-fit (within region that was fitted) */ h->chisq = 0.; nbins = 0; for (sc = h->lowscore; sc <= h->highscore; sc++) if (h->expect[sc-h->min] >= 5. && h->histogram[sc-h->min] >= 5) { delta = (float) h->histogram[sc-h->min] - h->expect[sc-h->min]; h->chisq += delta * delta / h->expect[sc-h->min]; nbins++; } /* -1 d.f. for normalization; -2 d.f. for two free parameters */ if (nbins > 3) h->chip = (float) IncompleteGamma((double)(nbins-3)/2., (double) h->chisq/2.); else h->chip = 0.; return 1; }
/* Incomplete Gamma function Q(a,x) - this is a cleanroom implementation of NRs gammq(a,x) */ double IncompleteGammaQ (double a, double x) { return 1.0-IncompleteGamma (x, a, LnGamma(a)); }