/* Function: esl_gumbel_Sample() * Synopsis: Return a Gumbel-distributed random sample $x$. * Incept: SRE, Thu Jun 23 11:38:39 2005 [St. Louis] * * Purpose: Sample a Gumbel-distributed random variate * by the transformation method. */ double esl_gumbel_Sample(ESL_RANDOMNESS *r, double mu, double lambda) { double p; p = esl_rnd_UniformPositive(r); return esl_gumbel_invcdf(p, mu, lambda); }
/* Function: p7_Tau() * Synopsis: Determine Forward tau by brief simulation. * Incept: SRE, Thu Aug 9 15:08:39 2007 [Janelia] * * Purpose: Determine the <tau> parameter for an exponential tail fit * to the Forward score distribution for model <om>, on * random sequences with the composition of the background * model <bg>. This <tau> parameter is for an exponential * distribution anchored from $P=1.0$, so it's not really a * tail per se; but it's only an accurate fit in the tail * of the Forward score distribution, from about $P=0.001$ * or so. * * The determination of <tau> is done by a brief simulation * in which we fit a Gumbel distribution to a small number * of Forward scores of random sequences, and use that to * predict the location of the tail at probability <tailp>. * * The Gumbel is of course inaccurate, but we can use it * here solely as an empirical distribution to determine * the location of a reasonable <tau> more accurately on a * smaller number of samples than we could do with raw * order statistics. * * Typical choices are L=100, N=200, tailp=0.04, which * typically yield estimates $\hat{\mu}$ with a precision * (standard deviation) of $\pm$ 0.2 bits, corresponding to * a $\pm$ 15\% error in E-values. See [J1/135]. * * The use of Gumbel fitting to a small number of $N$ * samples and the extrapolation of $\hat{\mu}$ from the * estimated location of the 0.04 tail mass are both * empirical and carefully optimized against several * tradeoffs. Most importantly, around this choice of tail * probability, a systematic error introduced by the use of * the Gumbel fit is being cancelled by systematic error * introduced by the use of a higher tail probability than * the regime in which the exponential tail is a valid * approximation. See [J1/135] for discussion. * * This function changes the length configuration of both * <om> and <bg>. The caller must remember to reconfigure * both of their length models appropriately for any * subsequent alignments. * * Args: r : source of randomness * om : configured profile to sample sequences from * bg : null model (for background residue frequencies) * L : mean length model for seq emission from profile * N : number of sequences to generate * lambda : expected slope of the exponential tail (from p7_Lambda()) * tailp : tail mass from which we will extrapolate mu * ret_mu : RETURN: estimate for the Forward mu (base of exponential tail) * * Returns: <eslOK> on success, and <*ret_fv> is the score difference * in bits. * * Throws: <eslEMEM> on allocation error, and <*ret_fv> is 0. */ int p7_Tau(ESL_RANDOMNESS *r, P7_OPROFILE *om, P7_BG *bg, int L, int N, double lambda, double tailp, double *ret_tau) { P7_OMX *ox = p7_omx_Create(om->M, 0, L); /* DP matrix: for ForwardParser, L rows */ ESL_DSQ *dsq = NULL; double *xv = NULL; float fsc, nullsc; double gmu, glam; int status; int i; ESL_ALLOC(xv, sizeof(double) * N); ESL_ALLOC(dsq, sizeof(ESL_DSQ) * (L+2)); if (ox == NULL) { status = eslEMEM; goto ERROR; } p7_oprofile_ReconfigLength(om, L); p7_bg_SetLength(bg, L); for (i = 0; i < N; i++) { if ((status = esl_rsq_xfIID(r, bg->f, om->abc->K, L, dsq)) != eslOK) goto ERROR; if ((status = p7_ForwardParser(dsq, L, om, ox, &fsc)) != eslOK) goto ERROR; if ((status = p7_bg_NullOne(bg, dsq, L, &nullsc)) != eslOK) goto ERROR; xv[i] = (fsc - nullsc) / eslCONST_LOG2; } if ((status = esl_gumbel_FitComplete(xv, N, &gmu, &glam)) != eslOK) goto ERROR; /* Explanation of the eqn below: first find the x at which the Gumbel tail * mass is predicted to be equal to tailp. Then back up from that x * by log(tailp)/lambda to set the origin of the exponential tail to 1.0 * instead of tailp. */ *ret_tau = esl_gumbel_invcdf(1.0-tailp, gmu, glam) + (log(tailp) / lambda); free(xv); free(dsq); p7_omx_Destroy(ox); return eslOK; ERROR: *ret_tau = 0.; if (xv != NULL) free(xv); if (dsq != NULL) free(dsq); if (ox != NULL) p7_omx_Destroy(ox); return status; }
/* Function: esl_gumbel_generic_invcdf() * Incept: SRE, Sun Aug 21 12:12:27 2005 [St. Louis] * * Purpose: Generic-API version of inverse CDF. */ double esl_gumbel_generic_invcdf(double p, void *params) { double *v = (double *) params; return esl_gumbel_invcdf(p, v[0], v[1]); }