double dwilcox(double x, double m, double n, int give_log) { double d; #ifdef IEEE_754 /* NaNs propagated correctly */ if (ISNAN(x) || ISNAN(m) || ISNAN(n)) return(x + m + n); #endif m = R_forceint(m); n = R_forceint(n); if (m <= 0 || n <= 0) ML_ERR_return_NAN; if (fabs(x - R_forceint(x)) > 1e-7) return(R_D__0); x = R_forceint(x); if ((x < 0) || (x > m * n)) return(R_D__0); int mm = (int) m, nn = (int) n, xx = (int) x; w_init_maybe(mm, nn); d = give_log ? log(cwilcox(xx, mm, nn)) - lchoose(m + n, n) : cwilcox(xx, mm, nn) / choose(m + n, n); return(d); }
double rwilcox(double m, double n) { int i, j, k, *x; double r; #ifdef IEEE_754 /* NaNs propagated correctly */ if (ISNAN(m) || ISNAN(n)) return(m + n); #endif m = R_forceint(m); n = R_forceint(n); if ((m < 0) || (n < 0)) ML_ERR_return_NAN; if ((m == 0) || (n == 0)) return(0); r = 0.0; k = (int) (m + n); x = (int *) calloc((size_t) k, sizeof(int)); #ifdef MATHLIB_STANDALONE if (!x) MATHLIB_ERROR(_("wilcox allocation error %d"), 4); #endif for (i = 0; i < k; i++) x[i] = i; for (i = 0; i < n; i++) { j = (int) floor(k * unif_rand()); r += x[j]; x[j] = x[--k]; } free(x); return(r - n * (n - 1) / 2); }
double qwilcox(double x, double m, double n, int lower_tail, int log_p) { double c, p; #ifdef IEEE_754 if (ISNAN(x) || ISNAN(m) || ISNAN(n)) return(x + m + n); #endif if(!R_FINITE(x) || !R_FINITE(m) || !R_FINITE(n)) ML_ERR_return_NAN; R_Q_P01_check(x); m = R_forceint(m); n = R_forceint(n); if (m <= 0 || n <= 0) ML_ERR_return_NAN; if (x == R_DT_0) return(0); if (x == R_DT_1) return(m * n); if(log_p || !lower_tail) x = R_DT_qIv(x); /* lower_tail,non-log "p" */ int mm = (int) m, nn = (int) n; w_init_maybe(mm, nn); c = choose(m + n, n); p = 0; int q = 0; if (x <= 0.5) { x = x - 10 * DBL_EPSILON; for (;;) { p += cwilcox(q, mm, nn) / c; if (p >= x) break; q++; } } else { x = 1 - x + 10 * DBL_EPSILON; for (;;) { p += cwilcox(q, mm, nn) / c; if (p > x) { q = (int) (m * n - q); break; } q++; } } return(q); }
double dbinom(double x, double n, double p, int give_log) { #ifdef IEEE_754 /* NaNs propagated correctly */ if (ISNAN(x) || ISNAN(n) || ISNAN(p)) return x + n + p; #endif if (p < 0 || p > 1 || R_D_negInonint(n)) ML_ERR_return_NAN; R_D_nonint_check(x); if (x < 0 || !R_FINITE(x)) return R_D__0; n = R_forceint(n); x = R_forceint(x); return dbinom_raw(x, n, p, 1-p, give_log); }
/* 30 is somewhat arbitrary: it is on the *safe* side: * both speed and precision are clearly improved for k < 30. */ double choose(double n, double k) { double r, k0 = k; k = R_forceint(k); #ifdef IEEE_754 /* NaNs propagated correctly */ if(ISNAN(n) || ISNAN(k)) return n + k; #endif #ifndef MATHLIB_STANDALONE R_CheckStack(); #endif if (fabs(k - k0) > 1e-7) MATHLIB_WARNING2(_("'k' (%.2f) must be integer, rounded to %.0f"), k0, k); if (k < k_small_max) { int j; if(n-k < k && n >= 0 && R_IS_INT(n)) k = n-k; /* <- Symmetry */ if (k < 0) return 0.; if (k == 0) return 1.; /* else: k >= 1 */ r = n; for(j = 2; j <= k; j++) r *= (n-j+1)/j; return R_IS_INT(n) ? R_forceint(r) : r; /* might have got rounding errors */ } /* else: k >= k_small_max */ if (n < 0) { r = choose(-n+ k-1, k); if (ODD(k)) r = -r; return r; } else if (R_IS_INT(n)) { n = R_forceint(n); if(n < k) return 0.; if(n - k < k_small_max) return choose(n, n-k); /* <- Symmetry */ return R_forceint(exp(lfastchoose(n, k))); } /* else non-integer n >= 0 : */ if (n < k-1) { int s_choose; r = lfastchoose2(n, k, /* -> */ &s_choose); return s_choose * exp(r); } return exp(lfastchoose(n, k)); }
/* args have the same meaning as R function pwilcox */ double pwilcox(double q, double m, double n, int lower_tail, int log_p) { int i; double c, p; #ifdef IEEE_754 if (ISNAN(q) || ISNAN(m) || ISNAN(n)) return(q + m + n); #endif if (!R_FINITE(m) || !R_FINITE(n)) ML_ERR_return_NAN; m = R_forceint(m); n = R_forceint(n); if (m <= 0 || n <= 0) ML_ERR_return_NAN; q = floor(q + 1e-7); if (q < 0.0) return(R_DT_0); if (q >= m * n) return(R_DT_1); int mm = (int) m, nn = (int) n; w_init_maybe(mm, nn); c = choose(m + n, n); p = 0; /* Use summation of probs over the shorter range */ if (q <= (m * n / 2)) { for (i = 0; i <= q; i++) p += cwilcox(i, mm, nn) / c; } else { q = m * n - q; for (i = 0; i < q; i++) p += cwilcox(i, mm, nn) / c; lower_tail = !lower_tail; /* p = 1 - p; */ } return(R_DT_val(p)); } /* pwilcox */
double lchoose(double n, double k) { double k0 = k; k = R_forceint(k); #ifdef IEEE_754 /* NaNs propagated correctly */ if(ISNAN(n) || ISNAN(k)) return n + k; #endif #ifndef MATHLIB_STANDALONE R_CheckStack(); #endif if (fabs(k - k0) > 1e-7) MATHLIB_WARNING2(_("'k' (%.2f) must be integer, rounded to %.0f"), k0, k); if (k < 2) { if (k < 0) return ML_NEGINF; if (k == 0) return 0.; /* else: k == 1 */ return log(fabs(n)); } /* else: k >= 2 */ if (n < 0) { return lchoose(-n+ k-1, k); } else if (R_IS_INT(n)) { n = R_forceint(n); if(n < k) return ML_NEGINF; /* k <= n :*/ if(n - k < 2) return lchoose(n, n-k); /* <- Symmetry */ /* else: n >= k+2 */ return lfastchoose(n, k); } /* else non-integer n >= 0 : */ if (n < k-1) { int s; return lfastchoose2(n, k, &s); } return lfastchoose(n, k); }
double dnbinom(double x, double size, double prob, int give_log) { double ans, p; #ifdef IEEE_754 if (ISNAN(x) || ISNAN(size) || ISNAN(prob)) return x + size + prob; #endif if (prob <= 0 || prob > 1 || size < 0) ML_ERR_return_NAN; R_D_nonint_check(x); if (x < 0 || !R_FINITE(x)) return R_D__0; /* limiting case as size approaches zero is point mass at zero */ if (x == 0 && size==0) return R_D__1; x = R_forceint(x); ans = dbinom_raw(size, x+size, prob, 1-prob, give_log); p = ((double)size)/(size+x); return((give_log) ? log(p) + ans : p * ans); }
double pbinom(double x, double n, double p, int lower_tail, int log_p) { #ifdef IEEE_754 if (ISNAN(x) || ISNAN(n) || ISNAN(p)) return x + n + p; if (!R_FINITE(n) || !R_FINITE(p)) ML_ERR_return_NAN; #endif if(R_nonint(n)) { MATHLIB_WARNING(_("non-integer n = %f"), n); ML_ERR_return_NAN; } n = R_forceint(n); /* PR#8560: n=0 is a valid value */ if(n < 0 || p < 0 || p > 1) ML_ERR_return_NAN; if (x < 0) return R_DT_0; x = floor(x + 1e-7); if (n <= x) return R_DT_1; return pbeta(p, x + 1, n - x, !lower_tail, log_p); }
double psigamma(double x, double deriv) { /* n-th derivative of psi(x); e.g., psigamma(x,0) == digamma(x) */ double ans; int nz, ierr, k, n; if(ISNAN(x)) return x; deriv = R_forceint(deriv); n = (int)deriv; if(n > n_max) { MATHLIB_WARNING2(_("deriv = %d > %d (= n_max)\n"), n, n_max); return ML_NAN; } dpsifn(x, n, 1, 1, &ans, &nz, &ierr); ML_TREAT_psigam(ierr); /* Now, ans == A := (-1)^(n+1) / gamma(n+1) * psi(n, x) */ ans = -ans; /* = (-1)^(0+1) * gamma(0+1) * A */ for(k = 1; k <= n; k++) ans *= (-k);/* = (-1)^(k+1) * gamma(k+1) * A */ return ans;/* = psi(n, x) */ }
double dnbinom_mu(double x, double size, double mu, int give_log) { /* originally, just set prob := size / (size + mu) and called dbinom_raw(), * but that suffers from cancellation when mu << size */ #ifdef IEEE_754 if (ISNAN(x) || ISNAN(size) || ISNAN(mu)) return x + size + mu; #endif if (mu < 0 || size < 0) ML_ERR_return_NAN; R_D_nonint_check(x); if (x < 0 || !R_FINITE(x)) return R_D__0; /* limiting case as size approaches zero is point mass at zero, * even if mu is kept constant. limit distribution does not * have mean mu, though. */ if (x == 0 && size == 0) return R_D__1; x = R_forceint(x); if(!R_FINITE(size)) // limit case: Poisson return(dpois_raw(x, mu, give_log)); if(x == 0)/* be accurate, both for n << mu, and n >> mu :*/ return R_D_exp(size * (size < mu ? log(size/(size+mu)) : log1p(- mu/(size+mu)))); if(x < 1e-10 * size) { /* don't use dbinom_raw() but MM's formula: */ /* FIXME --- 1e-8 shows problem; rather use algdiv() from ./toms708.c */ double p = (size < mu ? log(size/(1 + size/mu)) : log(mu / (1 + mu/size))); return R_D_exp(x * p - mu - lgamma(x+1) + log1p(x*(x-1)/(2*size))); } else { /* no unnecessary cancellation inside dbinom_raw, when * x_ = size and n_ = x+size are so close that n_ - x_ loses accuracy */ double p = ((double)size)/(size+x), ans = dbinom_raw(size, x+size, size/(size+mu), mu/(size+mu), give_log); return((give_log) ? log(p) + ans : p * ans); } }
// rhyper(NR, NB, n) -- NR 'red', NB 'blue', n drawn, how many are 'red' double rhyper(double nn1in, double nn2in, double kkin) { /* extern double afc(int); */ int nn1, nn2, kk; int ix; // return value (coerced to double at the very end) Rboolean setup1, setup2; /* These should become 'thread_local globals' : */ static int ks = -1, n1s = -1, n2s = -1; static int m, minjx, maxjx; static int k, n1, n2; // <- not allowing larger integer par static double tn; // II : static double w; // III: static double a, d, s, xl, xr, kl, kr, lamdl, lamdr, p1, p2, p3; /* check parameter validity */ if(!R_FINITE(nn1in) || !R_FINITE(nn2in) || !R_FINITE(kkin)) ML_ERR_return_NAN; nn1in = R_forceint(nn1in); nn2in = R_forceint(nn2in); kkin = R_forceint(kkin); if (nn1in < 0 || nn2in < 0 || kkin < 0 || kkin > nn1in + nn2in) ML_ERR_return_NAN; if (nn1in >= INT_MAX || nn2in >= INT_MAX || kkin >= INT_MAX) { /* large n -- evade integer overflow (and inappropriate algorithms) -------- */ // FIXME: Much faster to give rbinom() approx when appropriate; -> see Kuensch(1989) // Johnson, Kotz,.. p.258 (top) mention the *four* different binomial approximations if(kkin == 1.) { // Bernoulli return rbinom(kkin, nn1in / (nn1in + nn2in)); } // Slow, but safe: return F^{-1}(U) where F(.) = phyper(.) and U ~ U[0,1] return qhyper(unif_rand(), nn1in, nn2in, kkin, FALSE, FALSE); } nn1 = (int)nn1in; nn2 = (int)nn2in; kk = (int)kkin; /* if new parameter values, initialize */ if (nn1 != n1s || nn2 != n2s) { setup1 = TRUE; setup2 = TRUE; } else if (kk != ks) { setup1 = FALSE; setup2 = TRUE; } else { setup1 = FALSE; setup2 = FALSE; } if (setup1) { n1s = nn1; n2s = nn2; tn = nn1 + nn2; if (nn1 <= nn2) { n1 = nn1; n2 = nn2; } else { n1 = nn2; n2 = nn1; } } if (setup2) { ks = kk; if (kk + kk >= tn) { k = (int)(tn - kk); } else { k = kk; } } if (setup1 || setup2) { m = (int) ((k + 1.) * (n1 + 1.) / (tn + 2.)); minjx = imax2(0, k - n2); maxjx = imin2(n1, k); #ifdef DEBUG_rhyper REprintf("rhyper(nn1=%d, nn2=%d, kk=%d), setup: floor(mean)= m=%d, jx in (%d..%d)\n", nn1, nn2, kk, m, minjx, maxjx); #endif } /* generate random variate --- Three basic cases */ if (minjx == maxjx) { /* I: degenerate distribution ---------------- */ #ifdef DEBUG_rhyper REprintf("rhyper(), branch I (degenerate)\n"); #endif ix = maxjx; goto L_finis; // return appropriate variate } else if (m - minjx < 10) { // II: (Scaled) algorithm HIN (inverse transformation) ---- const static double scale = 1e25; // scaling factor against (early) underflow const static double con = 57.5646273248511421; // 25*log(10) = log(scale) { <==> exp(con) == scale } if (setup1 || setup2) { double lw; // log(w); w = exp(lw) * scale = exp(lw + log(scale)) = exp(lw + con) if (k < n2) { lw = afc(n2) + afc(n1 + n2 - k) - afc(n2 - k) - afc(n1 + n2); } else { lw = afc(n1) + afc( k ) - afc(k - n2) - afc(n1 + n2); } w = exp(lw + con); } double p, u; #ifdef DEBUG_rhyper REprintf("rhyper(), branch II; w = %g > 0\n", w); #endif L10: p = w; ix = minjx; u = unif_rand() * scale; #ifdef DEBUG_rhyper REprintf(" _new_ u = %g\n", u); #endif while (u > p) { u -= p; p *= ((double) n1 - ix) * (k - ix); ix++; p = p / ix / (n2 - k + ix); #ifdef DEBUG_rhyper REprintf(" ix=%3d, u=%11g, p=%20.14g (u-p=%g)\n", ix, u, p, u-p); #endif if (ix > maxjx) goto L10; // FIXME if(p == 0.) we also "have lost" => goto L10 } } else { /* III : H2PE Algorithm --------------------------------------- */ double u,v; if (setup1 || setup2) { s = sqrt((tn - k) * k * n1 * n2 / (tn - 1) / tn / tn); /* remark: d is defined in reference without int. */ /* the truncation centers the cell boundaries at 0.5 */ d = (int) (1.5 * s) + .5; xl = m - d + .5; xr = m + d + .5; a = afc(m) + afc(n1 - m) + afc(k - m) + afc(n2 - k + m); kl = exp(a - afc((int) (xl)) - afc((int) (n1 - xl)) - afc((int) (k - xl)) - afc((int) (n2 - k + xl))); kr = exp(a - afc((int) (xr - 1)) - afc((int) (n1 - xr + 1)) - afc((int) (k - xr + 1)) - afc((int) (n2 - k + xr - 1))); lamdl = -log(xl * (n2 - k + xl) / (n1 - xl + 1) / (k - xl + 1)); lamdr = -log((n1 - xr + 1) * (k - xr + 1) / xr / (n2 - k + xr)); p1 = d + d; p2 = p1 + kl / lamdl; p3 = p2 + kr / lamdr; } #ifdef DEBUG_rhyper REprintf("rhyper(), branch III {accept/reject}: (xl,xr)= (%g,%g); (lamdl,lamdr)= (%g,%g)\n", xl, xr, lamdl,lamdr); REprintf("-------- p123= c(%g,%g,%g)\n", p1,p2, p3); #endif int n_uv = 0; L30: u = unif_rand() * p3; v = unif_rand(); n_uv++; if(n_uv >= 10000) { REprintf("rhyper() branch III: giving up after %d rejections", n_uv); ML_ERR_return_NAN; } #ifdef DEBUG_rhyper REprintf(" ... L30: new (u=%g, v ~ U[0,1])[%d]\n", u, n_uv); #endif if (u < p1) { /* rectangular region */ ix = (int) (xl + u); } else if (u <= p2) { /* left tail */ ix = (int) (xl + log(v) / lamdl); if (ix < minjx) goto L30; v = v * (u - p1) * lamdl; } else { /* right tail */ ix = (int) (xr - log(v) / lamdr); if (ix > maxjx) goto L30; v = v * (u - p2) * lamdr; } /* acceptance/rejection test */ Rboolean reject = TRUE; if (m < 100 || ix <= 50) { /* explicit evaluation */ /* The original algorithm (and TOMS 668) have f = f * i * (n2 - k + i) / (n1 - i) / (k - i); in the (m > ix) case, but the definition of the recurrence relation on p134 shows that the +1 is needed. */ int i; double f = 1.0; if (m < ix) { for (i = m + 1; i <= ix; i++) f = f * (n1 - i + 1) * (k - i + 1) / (n2 - k + i) / i; } else if (m > ix) { for (i = ix + 1; i <= m; i++) f = f * i * (n2 - k + i) / (n1 - i + 1) / (k - i + 1); } if (v <= f) { reject = FALSE; } } else { const static double deltal = 0.0078; const static double deltau = 0.0034; double e, g, r, t, y; double de, dg, dr, ds, dt, gl, gu, nk, nm, ub; double xk, xm, xn, y1, ym, yn, yk, alv; #ifdef DEBUG_rhyper REprintf(" ... accept/reject 'large' case v=%g\n", v); #endif /* squeeze using upper and lower bounds */ y = ix; y1 = y + 1.0; ym = y - m; yn = n1 - y + 1.0; yk = k - y + 1.0; nk = n2 - k + y1; r = -ym / y1; s = ym / yn; t = ym / yk; e = -ym / nk; g = yn * yk / (y1 * nk) - 1.0; dg = 1.0; if (g < 0.0) dg = 1.0 + g; gu = g * (1.0 + g * (-0.5 + g / 3.0)); gl = gu - .25 * (g * g * g * g) / dg; xm = m + 0.5; xn = n1 - m + 0.5; xk = k - m + 0.5; nm = n2 - k + xm; ub = y * gu - m * gl + deltau + xm * r * (1. + r * (-0.5 + r / 3.0)) + xn * s * (1. + s * (-0.5 + s / 3.0)) + xk * t * (1. + t * (-0.5 + t / 3.0)) + nm * e * (1. + e * (-0.5 + e / 3.0)); /* test against upper bound */ alv = log(v); if (alv > ub) { reject = TRUE; } else { /* test against lower bound */ dr = xm * (r * r * r * r); if (r < 0.0) dr /= (1.0 + r); ds = xn * (s * s * s * s); if (s < 0.0) ds /= (1.0 + s); dt = xk * (t * t * t * t); if (t < 0.0) dt /= (1.0 + t); de = nm * (e * e * e * e); if (e < 0.0) de /= (1.0 + e); if (alv < ub - 0.25 * (dr + ds + dt + de) + (y + m) * (gl - gu) - deltal) { reject = FALSE; } else { /* * Stirling's formula to machine accuracy */ if (alv <= (a - afc(ix) - afc(n1 - ix) - afc(k - ix) - afc(n2 - k + ix))) { reject = FALSE; } else { reject = TRUE; } } } } // else if (reject) goto L30; } L_finis: /* return appropriate variate */ if (kk + kk >= tn) { if (nn1 > nn2) { ix = kk - nn2 + ix; } else { ix = nn1 - ix; } } else { if (nn1 > nn2) ix = kk - ix; } return ix; }
double dnorm4(double x, double mu, double sigma, int give_log) { #ifdef IEEE_754 if (ISNAN(x) || ISNAN(mu) || ISNAN(sigma)) return x + mu + sigma; #endif if(!R_FINITE(sigma)) return R_D__0; if(!R_FINITE(x) && mu == x) return ML_NAN;/* x-mu is NaN */ if (sigma <= 0) { if (sigma < 0) ML_ERR_return_NAN; /* sigma == 0 */ return (x == mu) ? ML_POSINF : R_D__0; } x = (x - mu) / sigma; if(!R_FINITE(x)) return R_D__0; x = fabs (x); if (x >= 2 * sqrt(DBL_MAX)) return R_D__0; if (give_log) return -(M_LN_SQRT_2PI + 0.5 * x * x + log(sigma)); // M_1_SQRT_2PI = 1 / sqrt(2 * pi) #ifdef MATHLIB_FAST_dnorm // and for R <= 3.0.x and R-devel upto 2014-01-01: return M_1_SQRT_2PI * exp(-0.5 * x * x) / sigma; #else // more accurate, less fast : if (x < 5) return M_1_SQRT_2PI * exp(-0.5 * x * x) / sigma; /* ELSE: * x*x may lose upto about two digits accuracy for "large" x * Morten Welinder's proposal for PR#15620 * https://bugs.r-project.org/bugzilla/show_bug.cgi?id=15620 * -- 1 -- No hoop jumping when we underflow to zero anyway: * -x^2/2 < log(2)*.Machine$double.min.exp <==> * x > sqrt(-2*log(2)*.Machine$double.min.exp) =IEEE= 37.64031 * but "thanks" to denormalized numbers, underflow happens a bit later, * effective.D.MIN.EXP <- with(.Machine, double.min.exp + double.ulp.digits) * for IEEE, DBL_MIN_EXP is -1022 but "effective" is -1074 * ==> boundary = sqrt(-2*log(2)*(.Machine$double.min.exp + .Machine$double.ulp.digits)) * =IEEE= 38.58601 * [on one x86_64 platform, effective boundary a bit lower: 38.56804] */ if (x > sqrt(-2*M_LN2*(DBL_MIN_EXP + 1-DBL_MANT_DIG))) return 0.; /* Now, to get full accurary, split x into two parts, * x = x1+x2, such that |x2| <= 2^-16. * Assuming that we are using IEEE doubles, that means that * x1*x1 is error free for x<1024 (but we have x < 38.6 anyway). * If we do not have IEEE this is still an improvement over the naive formula. */ double x1 = // R_forceint(x * 65536) / 65536 = ldexp( R_forceint(ldexp(x, 16)), -16); double x2 = x - x1; return M_1_SQRT_2PI / sigma * (exp(-0.5 * x1 * x1) * exp( (-0.5 * x2 - x1) * x2 ) ); #endif }
double rhyper(double nn1in, double nn2in, double kkin) { const static double deltal = 0.0078; const static double deltau = 0.0034; /* extern double afc(int); */ int nn1, nn2, kk; int i, ix; Rboolean reject, setup1, setup2; double e, f, g, r, t, y; /* These should become 'thread_local globals' : */ static int ks = -1, n1s = -1, n2s = -1; static int k, m, minjx, maxjx, n1, n2; static double tn; // II : static double w; // III: static double a, d, s, xl, xr, kl, kr, lamdl, lamdr, p1, p2, p3; /* check parameter validity */ if(!R_FINITE(nn1in) || !R_FINITE(nn2in) || !R_FINITE(kkin)) ML_ERR_return_NAN; // Disabling large (nn1, nn2, kk) { =^= rhyper (m,n,k) }: nn1 = (int) R_forceint(nn1in); nn2 = (int) R_forceint(nn2in); kk = (int) R_forceint(kkin); if (nn1 < 0 || nn2 < 0 || kk < 0 || kk > nn1 + nn2) ML_ERR_return_NAN; /* if new parameter values, initialize */ reject = TRUE; if (nn1 != n1s || nn2 != n2s) { setup1 = TRUE; setup2 = TRUE; } else if (kk != ks) { setup1 = FALSE; setup2 = TRUE; } else { setup1 = FALSE; setup2 = FALSE; } if (setup1) { n1s = nn1; n2s = nn2; tn = nn1 + nn2; if (nn1 <= nn2) { n1 = nn1; n2 = nn2; } else { n1 = nn2; n2 = nn1; } } if (setup2) { ks = kk; if (kk + kk >= tn) { k = (int)(tn - kk); } else { k = kk; } } if (setup1 || setup2) { m = (int) ((k + 1.0) * (n1 + 1.0) / (tn + 2.0)); minjx = imax2(0, k - n2); maxjx = imin2(n1, k); } /* generate random variate --- Three basic cases */ if (minjx == maxjx) { /* I: degenerate distribution ---------------- */ ix = maxjx; /* return ix; No, need to unmangle <TSL>*/ /* return appropriate variate */ if (kk + kk >= tn) { if (nn1 > nn2) { ix = kk - nn2 + ix; } else { ix = nn1 - ix; } } else { if (nn1 > nn2) ix = kk - ix; } return ix; } else if (m - minjx < 10) { // II: (Scaled) algorithm HIN (inverse transformation) ---- const static double scale = 1e25; // scaling factor against (early) underflow const static double con = 57.5646273248511421; // 25*log(10) = log(scale) { <==> exp(con) == scale } if (setup1 || setup2) { double lw; // log(w); w = exp(lw) * scale = exp(lw + log(scale)) = exp(lw + con) if (k < n2) { lw = afc(n2) + afc(n1 + n2 - k) - afc(n2 - k) - afc(n1 + n2); } else { lw = afc(n1) + afc( k ) - afc(k - n2) - afc(n1 + n2); } w = exp(lw + con); } double p, u; L10: p = w; ix = minjx; u = unif_rand() * scale; while (u > p) { u -= p; p *= ((double) n1 - ix) * (k - ix); ix++; p = p / ix / (n2 - k + ix); if (ix > maxjx) goto L10; } } else { /* III : H2PE Algorithm --------------------------------------- */ double u,v; if (setup1 || setup2) { s = sqrt((tn - k) * k * n1 * n2 / (tn - 1) / tn / tn); /* remark: d is defined in reference without int. */ /* the truncation centers the cell boundaries at 0.5 */ d = (int) (1.5 * s) + .5; xl = m - d + .5; xr = m + d + .5; a = afc(m) + afc(n1 - m) + afc(k - m) + afc(n2 - k + m); kl = exp(a - afc((int) (xl)) - afc((int) (n1 - xl)) - afc((int) (k - xl)) - afc((int) (n2 - k + xl))); kr = exp(a - afc((int) (xr - 1)) - afc((int) (n1 - xr + 1)) - afc((int) (k - xr + 1)) - afc((int) (n2 - k + xr - 1))); lamdl = -log(xl * (n2 - k + xl) / (n1 - xl + 1) / (k - xl + 1)); lamdr = -log((n1 - xr + 1) * (k - xr + 1) / xr / (n2 - k + xr)); p1 = d + d; p2 = p1 + kl / lamdl; p3 = p2 + kr / lamdr; } L30: u = unif_rand() * p3; v = unif_rand(); if (u < p1) { /* rectangular region */ ix = (int) (xl + u); } else if (u <= p2) { /* left tail */ ix = (int) (xl + log(v) / lamdl); if (ix < minjx) goto L30; v = v * (u - p1) * lamdl; } else { /* right tail */ ix = (int) (xr - log(v) / lamdr); if (ix > maxjx) goto L30; v = v * (u - p2) * lamdr; } /* acceptance/rejection test */ if (m < 100 || ix <= 50) { /* explicit evaluation */ /* The original algorithm (and TOMS 668) have f = f * i * (n2 - k + i) / (n1 - i) / (k - i); in the (m > ix) case, but the definition of the recurrence relation on p134 shows that the +1 is needed. */ f = 1.0; if (m < ix) { for (i = m + 1; i <= ix; i++) f = f * (n1 - i + 1) * (k - i + 1) / (n2 - k + i) / i; } else if (m > ix) { for (i = ix + 1; i <= m; i++) f = f * i * (n2 - k + i) / (n1 - i + 1) / (k - i + 1); } if (v <= f) { reject = FALSE; } } else { double de, dg, dr, ds, dt, gl, gu, nk, nm, ub; double xk, xm, xn, y1, ym, yn, yk, alv; /* squeeze using upper and lower bounds */ y = ix; y1 = y + 1.0; ym = y - m; yn = n1 - y + 1.0; yk = k - y + 1.0; nk = n2 - k + y1; r = -ym / y1; s = ym / yn; t = ym / yk; e = -ym / nk; g = yn * yk / (y1 * nk) - 1.0; dg = 1.0; if (g < 0.0) dg = 1.0 + g; gu = g * (1.0 + g * (-0.5 + g / 3.0)); gl = gu - .25 * (g * g * g * g) / dg; xm = m + 0.5; xn = n1 - m + 0.5; xk = k - m + 0.5; nm = n2 - k + xm; ub = y * gu - m * gl + deltau + xm * r * (1. + r * (-0.5 + r / 3.0)) + xn * s * (1. + s * (-0.5 + s / 3.0)) + xk * t * (1. + t * (-0.5 + t / 3.0)) + nm * e * (1. + e * (-0.5 + e / 3.0)); /* test against upper bound */ alv = log(v); if (alv > ub) { reject = TRUE; } else { /* test against lower bound */ dr = xm * (r * r * r * r); if (r < 0.0) dr /= (1.0 + r); ds = xn * (s * s * s * s); if (s < 0.0) ds /= (1.0 + s); dt = xk * (t * t * t * t); if (t < 0.0) dt /= (1.0 + t); de = nm * (e * e * e * e); if (e < 0.0) de /= (1.0 + e); if (alv < ub - 0.25 * (dr + ds + dt + de) + (y + m) * (gl - gu) - deltal) { reject = FALSE; } else { /* * Stirling's formula to machine accuracy */ if (alv <= (a - afc(ix) - afc(n1 - ix) - afc(k - ix) - afc(n2 - k + ix))) { reject = FALSE; } else { reject = TRUE; } } } } // else if (reject) goto L30; } /* return appropriate variate */ if (kk + kk >= tn) { if (nn1 > nn2) { ix = kk - nn2 + ix; } else { ix = nn1 - ix; } } else { if (nn1 > nn2) ix = kk - ix; } return ix; }