/* M(i,k) is reached from B(i-1), M(i-1,k-1), D(i-1,k-1), or I(i-1,k-1). */ static inline int select_m(const P7_OPROFILE *om, const P7_OMX *ox, int i, int k) { int Q = p7O_NQF(ox->M); int q = (k-1) % Q; /* (q,r) is position of the current DP cell M(i,k) */ int r = (k-1) / Q; vector float *tp = om->tfv + 7*q; /* *tp now at start of transitions to cur cell M(i,k) */ vector float xBv; vector float zerov; vector float mpv, dpv, ipv; union { vector float v; float p[4]; } u, tv; float path[4]; int state[4] = { p7T_M, p7T_I, p7T_D, p7T_B }; xBv = esl_vmx_set_float(ox->xmx[(i-1)*p7X_NXCELLS+p7X_B]); zerov = (vector float) vec_splat_u32(0); if (q > 0) { mpv = ox->dpf[i-1][(q-1)*3 + p7X_M]; dpv = ox->dpf[i-1][(q-1)*3 + p7X_D]; ipv = ox->dpf[i-1][(q-1)*3 + p7X_I]; } else { mpv = vec_sld(zerov, ox->dpf[i-1][(Q-1)*3 + p7X_M], 12); dpv = vec_sld(zerov, ox->dpf[i-1][(Q-1)*3 + p7X_D], 12); ipv = vec_sld(zerov, ox->dpf[i-1][(Q-1)*3 + p7X_I], 12); } /* paths are numbered so that most desirable choice in case of tie is first. */ u.v = xBv; tv.v = *tp; path[3] = ((tv.p[r] == 0.0) ? -eslINFINITY : u.p[r]); tp++; u.v = mpv; tv.v = *tp; path[0] = ((tv.p[r] == 0.0) ? -eslINFINITY : u.p[r]); tp++; u.v = ipv; tv.v = *tp; path[1] = ((tv.p[r] == 0.0) ? -eslINFINITY : u.p[r]); tp++; u.v = dpv; tv.v = *tp; path[2] = ((tv.p[r] == 0.0) ? -eslINFINITY : u.p[r]); return state[esl_vec_FArgMax(path, 4)]; }
/* M(i,k) is reached from B(i-1), M(i-1,k-1), D(i-1,k-1), or I(i-1,k-1). */ static inline int select_m(ESL_RANDOMNESS *rng, const P7_OPROFILE *om, const P7_OMX *ox, int i, int k) { int Q = p7O_NQF(ox->M); int q = (k-1) % Q; /* (q,r) is position of the current DP cell M(i,k) */ int r = (k-1) / Q; vector float *tp = om->tfv + 7*q; /* *tp now at start of transitions to cur cell M(i,k) */ vector float xBv; vector float zerov; vector float mpv, dpv, ipv; union { vector float v; float p[4]; } u; float path[4]; int state[4] = { p7T_B, p7T_M, p7T_I, p7T_D }; xBv = esl_vmx_set_float(ox->xmx[(i-1)*p7X_NXCELLS+p7X_B]); zerov = (vector float) vec_splat_u32(0); if (q > 0) { mpv = ox->dpf[i-1][(q-1)*3 + p7X_M]; dpv = ox->dpf[i-1][(q-1)*3 + p7X_D]; ipv = ox->dpf[i-1][(q-1)*3 + p7X_I]; } else { mpv = vec_sld(zerov, ox->dpf[i-1][(Q-1)*3 + p7X_M], 12); dpv = vec_sld(zerov, ox->dpf[i-1][(Q-1)*3 + p7X_D], 12); ipv = vec_sld(zerov, ox->dpf[i-1][(Q-1)*3 + p7X_I], 12); } u.v = vec_madd(xBv, *tp, zerov); tp++; path[0] = u.p[r]; u.v = vec_madd(mpv, *tp, zerov); tp++; path[1] = u.p[r]; u.v = vec_madd(ipv, *tp, zerov); tp++; path[2] = u.p[r]; u.v = vec_madd(dpv, *tp, zerov); path[3] = u.p[r]; esl_vec_FNorm(path, 4); return state[esl_rnd_FChoose(rng, path, 4)]; }
/* Using FChoose() here would mean allocating tmp space for 2M-1 paths; * instead we use the fact that E(i) is itself the necessary normalization * factor, and implement FChoose's algorithm here for an on-the-fly * calculation. */ static inline int select_e(ESL_RANDOMNESS *rng, const P7_OPROFILE *om, const P7_OMX *ox, int i, int *ret_k) { int Q = p7O_NQF(ox->M); double sum = 0.0; double roll = esl_random(rng); double norm = 1.0 / ox->xmx[i*p7X_NXCELLS+p7X_E]; /* all M, D already scaled exactly the same */ vector float xEv = esl_vmx_set_float(norm); vector float zerov = (vector float) vec_splat_u32(0); union { vector float v; float p[4]; } u; int q,r; while (1) { for (q = 0; q < Q; q++) { u.v = vec_madd(ox->dpf[i][q*3 + p7X_M], xEv, zerov); for (r = 0; r < 4; r++) { sum += u.p[r]; if (roll < sum) { *ret_k = r*Q + q + 1; return p7T_M;} } u.v = vec_madd(ox->dpf[i][q*3 + p7X_D], xEv, zerov); for (r = 0; r < 4; r++) { sum += u.p[r]; if (roll < sum) { *ret_k = r*Q + q + 1; return p7T_D;} } } ESL_DASSERT1(sum > 0.99); } /*UNREACHED*/ ESL_EXCEPTION(-1, "unreached code was reached. universe collapses."); }
/* Function: p7_Null2_ByExpectation() * Synopsis: Calculate null2 model from posterior probabilities. * Incept: SRE, Mon Aug 18 08:32:55 2008 [Janelia] * * Purpose: Identical to <p7_GNull2_ByExpectation()> except that * <om>, <pp> are VMX optimized versions of the profile * and the residue posterior probability matrix. See * <p7_GNull2_ByExpectation()> documentation. * * Args: om - profile, in any mode, target length model set to <L> * pp - posterior prob matrix, for <om> against domain envelope <dsq+i-1> (offset) * null2 - RETURN: null2 log odds scores per residue; <0..Kp-1>; caller allocated space */ int p7_Null2_ByExpectation(const P7_OPROFILE *om, const P7_OMX *pp, float *null2) { int M = om->M; int Ld = pp->L; int Q = p7O_NQF(M); float *xmx = pp->xmx; /* enables use of XMXo(i,s) macro */ float norm; float xfactor; int i,q,x; vector float *rp; vector float sv; vector float zerov; zerov = (vector float) vec_splat_u32(0); /* Calculate expected # of times that each emitting state was used * in generating the Ld residues in this domain. * The 0 row in <wrk> is used to hold these numbers. */ memcpy(pp->dpf[0], pp->dpf[1], sizeof(vector float) * 3 * Q); XMXo(0,p7X_N) = XMXo(1,p7X_N); XMXo(0,p7X_C) = XMXo(1,p7X_C); /* 0.0 */ XMXo(0,p7X_J) = XMXo(1,p7X_J); /* 0.0 */ for (i = 2; i <= Ld; i++) { for (q = 0; q < Q; q++) { pp->dpf[0][q*3 + p7X_M] = vec_add(pp->dpf[i][q*3 + p7X_M], pp->dpf[0][q*3 + p7X_M]); pp->dpf[0][q*3 + p7X_I] = vec_add(pp->dpf[i][q*3 + p7X_I], pp->dpf[0][q*3 + p7X_I]); } XMXo(0,p7X_N) += XMXo(i,p7X_N); XMXo(0,p7X_C) += XMXo(i,p7X_C); XMXo(0,p7X_J) += XMXo(i,p7X_J); } /* Convert those expected #'s to frequencies, to use as posterior weights. */ norm = 1.0 / (float) Ld; sv = esl_vmx_set_float(norm); for (q = 0; q < Q; q++) { pp->dpf[0][q*3 + p7X_M] = vec_madd(pp->dpf[0][q*3 + p7X_M], sv, zerov); pp->dpf[0][q*3 + p7X_I] = vec_madd(pp->dpf[0][q*3 + p7X_I], sv, zerov); } XMXo(0,p7X_N) *= norm; XMXo(0,p7X_C) *= norm; XMXo(0,p7X_J) *= norm; /* Calculate null2's emission odds, by taking posterior weighted sum * over all emission vectors used in paths explaining the domain. */ xfactor = XMXo(0, p7X_N) + XMXo(0, p7X_C) + XMXo(0, p7X_J); for (x = 0; x < om->abc->K; x++) { sv = (vector float) vec_splat_u32(0); rp = om->rfv[x]; for (q = 0; q < Q; q++) { sv = vec_madd(pp->dpf[0][q*3 + p7X_M], *rp, sv); rp++; sv = vec_add(sv, pp->dpf[0][q*3 + p7X_I]); /* insert odds implicitly 1.0 */ } null2[x] = esl_vmx_hsum_float(sv); null2[x] += xfactor; } /* now null2[x] = \frac{f_d(x)}{f_0(x)} for all x in alphabet, * 0..K-1, where f_d(x) are the ad hoc "null2" residue frequencies * for this envelope. */ /* make valid scores for all degeneracies, by averaging the odds ratios. */ esl_abc_FAvgScVec(om->abc, null2); null2[om->abc->K] = 1.0; /* gap character */ null2[om->abc->Kp-2] = 1.0; /* nonresidue "*" */ null2[om->abc->Kp-1] = 1.0; /* missing data "~" */ /* ta-da */ return eslOK; }
/* Function: p7_Null2_ByTrace() * Synopsis: Assign null2 scores to an envelope by the sampling method. * Incept: SRE, Mon Aug 18 10:22:49 2008 [Janelia] * * Purpose: Identical to <p7_GNull2_ByTrace()> except that * <om>, <wrk> are VMX optimized versions of the profile * and the residue posterior probability matrix. See * <p7_GNull2_ByTrace()> documentation. */ int p7_Null2_ByTrace(const P7_OPROFILE *om, const P7_TRACE *tr, int zstart, int zend, P7_OMX *wrk, float *null2) { union { vector float v; float p[4]; } u; int Q = p7O_NQF(om->M); int Ld = 0; float *xmx = wrk->xmx; /* enables use of XMXo macro */ float norm; float xfactor; int q, r, s; int x; int z; vector float sv; vector float *rp; vector float zerov; zerov = (vector float) vec_splat_u32(0); /* We'll use the i=0 row in wrk for working space: dp[0][] and xmx[][0]. */ for (q = 0; q < Q; q++) { wrk->dpf[0][q*3 + p7X_M] = (vector float) vec_splat_u32(0); wrk->dpf[0][q*3 + p7X_I] = (vector float) vec_splat_u32(0); } XMXo(0,p7X_N) = 0.0; XMXo(0,p7X_C) = 0.0; XMXo(0,p7X_J) = 0.0; /* Calculate emitting state usage in this particular trace segment */ for (z = zstart; z <= zend; z++) { if (tr->i[z] == 0) continue; /* quick test for whether this trace elem emitted or not */ Ld++; if (tr->k[z] > 0) /* must be an M or I */ { /* surely there's an easier way? but our workspace is striped, interleaved quads... */ s = ( (tr->st[z] == p7T_M) ? p7X_M : p7X_I); q = p7X_NSCELLS * ( (tr->k[z] - 1) % Q) + p7X_M; r = (tr->k[z] - 1) / Q; u.v = wrk->dpf[0][q]; u.p[r] += 1.0; /* all this to increment a count by one! */ wrk->dpf[0][q] = u.v; } else /* emitted an x_i with no k; must be an N,C,J */ { switch (tr->st[z]) { case p7T_N: XMXo(0,p7X_N) += 1.0; break; case p7T_C: XMXo(0,p7X_C) += 1.0; break; case p7T_J: XMXo(0,p7X_J) += 1.0; break; } } } norm = 1.0 / (float) Ld; sv = esl_vmx_set_float(norm); for (q = 0; q < Q; q++) { wrk->dpf[0][q*3 + p7X_M] = vec_madd(wrk->dpf[0][q*3 + p7X_M], sv, zerov); wrk->dpf[0][q*3 + p7X_I] = vec_madd(wrk->dpf[0][q*3 + p7X_I], sv, zerov); } XMXo(0,p7X_N) *= norm; XMXo(0,p7X_C) *= norm; XMXo(0,p7X_J) *= norm; /* Calculate null2's emission odds, by taking posterior weighted sum * over all emission vectors used in paths explaining the domain. */ xfactor = XMXo(0,p7X_N) + XMXo(0,p7X_C) + XMXo(0,p7X_J); for (x = 0; x < om->abc->K; x++) { sv = (vector float) vec_splat_u32(0); rp = om->rfv[x]; for (q = 0; q < Q; q++) { sv = vec_madd(wrk->dpf[0][q*3 + p7X_M], *rp, sv); rp++; sv = vec_add(sv, wrk->dpf[0][q*3 + p7X_I]); /* insert emission odds implicitly 1.0 */ // sv = _mm_add_ps(sv, _mm_mul_ps(wrk->dpf[0][q*3 + p7X_I], *rp)); rp++; } null2[x] = esl_vmx_hsum_float(sv); null2[x] += xfactor; } /* now null2[x] = \frac{f_d(x)}{f_0(x)} for all x in alphabet, * 0..K-1, where f_d(x) are the ad hoc "null2" residue frequencies * for this envelope. */ /* make valid scores for all degeneracies, by averaging the odds ratios. */ esl_abc_FAvgScVec(om->abc, null2); null2[om->abc->K] = 1.0; /* gap character */ null2[om->abc->Kp-2] = 1.0; /* nonresidue "*" */ null2[om->abc->Kp-1] = 1.0; /* missing data "~" */ return eslOK; }
/* Function: p7_OptimalAccuracy() * Synopsis: DP fill of an optimal accuracy alignment calculation. * Incept: SRE, Mon Aug 18 11:04:48 2008 [Janelia] * * Purpose: Calculates the fill step of the optimal accuracy decoding * algorithm \citep{Kall05}. * * Caller provides the posterior decoding matrix <pp>, * which was calculated by Forward/Backward on a target sequence * of length <pp->L> using the query model <om>. * * Caller also provides a DP matrix <ox>, allocated for a full * <om->M> by <L> comparison. The routine fills this in * with OA scores. * * Args: gm - query profile * pp - posterior decoding matrix created by <p7_GPosteriorDecoding()> * gx - RESULT: caller provided DP matrix for <gm->M> by <L> * ret_e - RETURN: expected number of correctly decoded positions * * Returns: <eslOK> on success, and <*ret_e> contains the final OA * score, which is the expected number of correctly decoded * positions in the target sequence (up to <L>). * * Throws: (no abnormal error conditions) */ int p7_OptimalAccuracy(const P7_OPROFILE *om, const P7_OMX *pp, P7_OMX *ox, float *ret_e) { vector float mpv, dpv, ipv; /* previous row values */ vector float sv; /* temp storage of 1 curr row value in progress */ vector float xEv; /* E state: keeps max for Mk->E as we go */ vector float xBv; /* B state: splatted vector of B[i-1] for B->Mk calculations */ vector float dcv; float *xmx = ox->xmx; vector float *dpc = ox->dpf[0]; /* current row, for use in {MDI}MO(dpp,q) access macro */ vector float *dpp; /* previous row, for use in {MDI}MO(dpp,q) access macro */ vector float *ppp; /* quads in the <pp> posterior probability matrix */ vector float *tp; /* quads in the <om->tfv> transition scores */ vector float zerov; vector float infv; int M = om->M; int Q = p7O_NQF(M); int q; int j; int i; float t1, t2; zerov = (vector float) vec_splat_u32(0); infv = esl_vmx_set_float(-eslINFINITY); ox->M = om->M; ox->L = pp->L; for (q = 0; q < Q; q++) MMO(dpc, q) = IMO(dpc,q) = DMO(dpc,q) = infv; XMXo(0, p7X_E) = -eslINFINITY; XMXo(0, p7X_N) = 0.; XMXo(0, p7X_J) = -eslINFINITY; XMXo(0, p7X_B) = 0.; XMXo(0, p7X_C) = -eslINFINITY; for (i = 1; i <= pp->L; i++) { dpp = dpc; /* previous DP row in OA matrix */ dpc = ox->dpf[i]; /* current DP row in OA matrix */ ppp = pp->dpf[i]; /* current row in the posterior probabilities per position */ tp = om->tfv; /* transition probabilities */ dcv = infv; xEv = infv; xBv = esl_vmx_set_float(XMXo(i-1, p7X_B)); mpv = vec_sld(infv, MMO(dpp,Q-1), 12); /* Right shifts by 4 bytes. 4,8,12,x becomes x,4,8,12. */ dpv = vec_sld(infv, DMO(dpp,Q-1), 12); ipv = vec_sld(infv, IMO(dpp,Q-1), 12); for (q = 0; q < Q; q++) { sv = vec_and(vec_cmpgt(*tp, zerov), xBv); tp++; sv = vec_max(sv, vec_and(vec_cmpgt(*tp, zerov), mpv)); tp++; sv = vec_max(sv, vec_and(vec_cmpgt(*tp, zerov), ipv)); tp++; sv = vec_max(sv, vec_and(vec_cmpgt(*tp, zerov), dpv)); tp++; sv = vec_add(sv, *ppp); ppp += 2; xEv = vec_max(xEv, sv); mpv = MMO(dpp,q); dpv = DMO(dpp,q); ipv = IMO(dpp,q); MMO(dpc,q) = sv; DMO(dpc,q) = dcv; dcv = vec_and(vec_cmpgt(*tp, zerov), sv); tp++; sv = vec_and(vec_cmpgt(*tp, zerov), mpv); tp++; sv = vec_max(sv, vec_and(vec_cmpgt(*tp, zerov), ipv)); tp++; IMO(dpc,q) = vec_add(sv, *ppp); ppp++; } /* dcv has carried through from end of q loop above; store it * in first pass, we add M->D and D->D path into DMX */ dcv = vec_sld(infv, dcv, 12); tp = om->tfv + 7*Q; /* set tp to start of the DD's */ for (q = 0; q < Q; q++) { DMO(dpc, q) = vec_max(dcv, DMO(dpc, q)); dcv = vec_and(vec_cmpgt(*tp, zerov), DMO(dpc,q)); tp++; } /* fully serialized D->D; can optimize later */ for (j = 1; j < 4; j++) { dcv = vec_sld(infv, dcv, 12); tp = om->tfv + 7*Q; for (q = 0; q < Q; q++) { DMO(dpc, q) = vec_max(dcv, DMO(dpc, q)); dcv = vec_and(vec_cmpgt(*tp, zerov), dcv); tp++; } } /* D->E paths */ for (q = 0; q < Q; q++) xEv = vec_max(xEv, DMO(dpc,q)); /* Specials */ XMXo(i,p7X_E) = esl_vmx_hmax_float(xEv); t1 = ( (om->xf[p7O_J][p7O_LOOP] == 0.0) ? 0.0 : ox->xmx[(i-1)*p7X_NXCELLS+p7X_J] + pp->xmx[i*p7X_NXCELLS+p7X_J]); t2 = ( (om->xf[p7O_E][p7O_LOOP] == 0.0) ? 0.0 : ox->xmx[ i *p7X_NXCELLS+p7X_E]); ox->xmx[i*p7X_NXCELLS+p7X_J] = ESL_MAX(t1, t2); t1 = ( (om->xf[p7O_C][p7O_LOOP] == 0.0) ? 0.0 : ox->xmx[(i-1)*p7X_NXCELLS+p7X_C] + pp->xmx[i*p7X_NXCELLS+p7X_C]); t2 = ( (om->xf[p7O_E][p7O_MOVE] == 0.0) ? 0.0 : ox->xmx[ i *p7X_NXCELLS+p7X_E]); ox->xmx[i*p7X_NXCELLS+p7X_C] = ESL_MAX(t1, t2); ox->xmx[i*p7X_NXCELLS+p7X_N] = ((om->xf[p7O_N][p7O_LOOP] == 0.0) ? 0.0 : ox->xmx[(i-1)*p7X_NXCELLS+p7X_N] + pp->xmx[i*p7X_NXCELLS+p7X_N]); t1 = ( (om->xf[p7O_N][p7O_MOVE] == 0.0) ? 0.0 : ox->xmx[i*p7X_NXCELLS+p7X_N]); t2 = ( (om->xf[p7O_J][p7O_MOVE] == 0.0) ? 0.0 : ox->xmx[i*p7X_NXCELLS+p7X_J]); ox->xmx[i*p7X_NXCELLS+p7X_B] = ESL_MAX(t1, t2); } *ret_e = ox->xmx[pp->L*p7X_NXCELLS+p7X_C]; return eslOK; }
/* Function: p7_ViterbiScore() * Synopsis: Calculates Viterbi score, correctly, and vewy vewy fast. * Incept: SRE, Tue Nov 27 09:15:24 2007 [Janelia] * * Purpose: Calculates the Viterbi score for sequence <dsq> of length <L> * residues, using optimized profile <om>, and a preallocated * one-row DP matrix <ox>. Return the Viterbi score (in nats) * in <ret_sc>. * * The model <om> must be configured specially to have * lspace float scores, not its usual pspace float scores for * <p7_ForwardFilter()>. * * As with all <*Score()> implementations, the score is * accurate (full range and precision) and can be * calculated on models in any mode, not only local modes. * * Args: dsq - digital target sequence, 1..L * L - length of dsq in residues * om - optimized profile * ox - DP matrix * ret_sc - RETURN: Viterbi score (in nats) * * Returns: <eslOK> on success. * * Throws: <eslEINVAL> if <ox> allocation is too small. */ int p7_ViterbiScore(const ESL_DSQ *dsq, int L, const P7_OPROFILE *om, P7_OMX *ox, float *ret_sc) { vector float mpv, dpv, ipv; /* previous row values */ vector float sv; /* temp storage of 1 curr row value in progress */ vector float dcv; /* delayed storage of D(i,q+1) */ vector float xEv; /* E state: keeps max for Mk->E as we go */ vector float xBv; /* B state: splatted vector of B[i-1] for B->Mk calculations */ vector float Dmaxv; /* keeps track of maximum D cell on row */ vector float infv; /* -eslINFINITY in a vector */ float xN, xE, xB, xC, xJ; /* special states' scores */ float Dmax; /* maximum D cell on row */ int i; /* counter over sequence positions 1..L */ int q; /* counter over vectors 0..nq-1 */ int Q = p7O_NQF(om->M); /* segment length: # of vectors */ vector float *dp = ox->dpf[0]; /* using {MDI}MX(q) macro requires initialization of <dp> */ vector float *rsc; /* will point at om->rf[x] for residue x[i] */ vector float *tsc; /* will point into (and step thru) om->tf */ /* Check that the DP matrix is ok for us. */ if (Q > ox->allocQ4) ESL_EXCEPTION(eslEINVAL, "DP matrix allocated too small"); ox->M = om->M; /* Initialization. */ infv = esl_vmx_set_float(-eslINFINITY); for (q = 0; q < Q; q++) MMXo(q) = IMXo(q) = DMXo(q) = infv; xN = 0.; xB = om->xf[p7O_N][p7O_MOVE]; xE = -eslINFINITY; xJ = -eslINFINITY; xC = -eslINFINITY; #if p7_DEBUGGING if (ox->debugging) p7_omx_DumpFloatRow(ox, FALSE, 0, 5, 2, xE, xN, xJ, xB, xC); /* logify=FALSE, <rowi>=0, width=5, precision=2*/ #endif for (i = 1; i <= L; i++) { rsc = om->rf[dsq[i]]; tsc = om->tf; dcv = infv; xEv = infv; Dmaxv = infv; xBv = esl_vmx_set_float(xB); mpv = vec_sld(infv, MMXo(Q-1), 12); /* Right shifts by 4 bytes. 4,8,12,x becomes x,4,8,12. */ dpv = vec_sld(infv, DMXo(Q-1), 12); ipv = vec_sld(infv, IMXo(Q-1), 12); for (q = 0; q < Q; q++) { /* Calculate new MMXo(i,q); don't store it yet, hold it in sv. */ sv = vec_add(xBv, *tsc); tsc++; sv = vec_max(sv, vec_add(mpv, *tsc)); tsc++; sv = vec_max(sv, vec_add(ipv, *tsc)); tsc++; sv = vec_max(sv, vec_add(dpv, *tsc)); tsc++; sv = vec_add(sv, *rsc); rsc++; xEv = vec_max(xEv, sv); /* Load {MDI}(i-1,q) into mpv, dpv, ipv; * {MDI}MX(q) is then the current, not the prev row */ mpv = MMXo(q); dpv = DMXo(q); ipv = IMXo(q); /* Do the delayed stores of {MD}(i,q) now that memory is usable */ MMXo(q) = sv; DMXo(q) = dcv; /* Calculate the next D(i,q+1) partially: M->D only; * delay storage, holding it in dcv */ dcv = vec_add(sv, *tsc); tsc++; Dmaxv = vec_max(dcv, Dmaxv); /* Calculate and store I(i,q) */ sv = vec_add(mpv, *tsc); tsc++; sv = vec_max(sv, vec_add(ipv, *tsc)); tsc++; IMXo(q) = vec_add(sv, *rsc); rsc++; } /* Now the "special" states, which start from Mk->E (->C, ->J->B) */ xE = esl_vmx_hmax_float(xEv); xN = xN + om->xf[p7O_N][p7O_LOOP]; xC = ESL_MAX(xC + om->xf[p7O_C][p7O_LOOP], xE + om->xf[p7O_E][p7O_MOVE]); xJ = ESL_MAX(xJ + om->xf[p7O_J][p7O_LOOP], xE + om->xf[p7O_E][p7O_LOOP]); xB = ESL_MAX(xJ + om->xf[p7O_J][p7O_MOVE], xN + om->xf[p7O_N][p7O_MOVE]); /* and now xB will carry over into next i, and xC carries over after i=L */ /* Finally the "lazy F" loop (sensu [Farrar07]). We can often * prove that we don't need to evaluate any D->D paths at all. * * The observation is that if we can show that on the next row, * B->M(i+1,k) paths always dominate M->D->...->D->M(i+1,k) paths * for all k, then we don't need any D->D calculations. * * The test condition is: * max_k D(i,k) + max_k ( TDD(k-2) + TDM(k-1) - TBM(k) ) < xB(i) * So: * max_k (TDD(k-2) + TDM(k-1) - TBM(k)) is precalc'ed in om->dd_bound; * max_k D(i,k) is why we tracked Dmaxv; * xB(i) was just calculated above. */ Dmax = esl_vmx_hmax_float(Dmaxv); if (Dmax + om->ddbound_f > xB) { /* Now we're obligated to do at least one complete DD path to be sure. */ /* dcv has carried through from end of q loop above */ dcv = vec_sld(infv, dcv, 12); tsc = om->tf + 7*Q; /* set tsc to start of the DD's */ for (q = 0; q < Q; q++) { DMXo(q) = vec_max(dcv, DMXo(q)); dcv = vec_add(DMXo(q), *tsc); tsc++; } /* We may have to do up to three more passes; the check * is for whether crossing a segment boundary can improve * our score. */ do { dcv = vec_sld(infv, dcv, 12); tsc = om->tf + 7*Q; /* set tsc to start of the DD's */ for (q = 0; q < Q; q++) { if (! vec_any_gt(dcv, DMXo(q))) break; DMXo(q) = vec_max(dcv, DMXo(q)); dcv = vec_add(DMXo(q), *tsc); tsc++; } } while (q == Q); } else { /* not calculating DD? then just store that last MD vector we calc'ed. */ dcv = vec_sld(infv, dcv, 12); DMXo(0) = dcv; } #if p7_DEBUGGING if (ox->debugging) p7_omx_DumpFloatRow(ox, FALSE, i, 5, 2, xE, xN, xJ, xB, xC); /* logify=FALSE, <rowi>=i, width=5, precision=2*/ #endif } /* end loop over sequence residues 1..L */ /* finally C->T */ *ret_sc = xC + om->xf[p7O_C][p7O_MOVE]; return eslOK; }
static int backward_engine(int do_full, const ESL_DSQ *dsq, int L, const P7_OPROFILE *om, const P7_OMX *fwd, P7_OMX *bck, float *opt_sc) { vector float mpv, ipv, dpv; /* previous row values */ vector float mcv, dcv; /* current row values */ vector float tmmv, timv, tdmv; /* tmp vars for accessing rotated transition scores */ vector float xBv; /* collects B->Mk components of B(i) */ vector float xEv; /* splatted E(i) */ vector float zerov; /* splatted 0.0's in a vector */ float xN, xE, xB, xC, xJ; /* special states' scores */ int i; /* counter over sequence positions 0,1..L */ int q; /* counter over quads 0..Q-1 */ int Q = p7O_NQF(om->M); /* segment length: # of vectors */ int j; /* DD segment iteration counter (4 = full serialization) */ vector float *dpc; /* current DP row */ vector float *dpp; /* next ("previous") DP row */ vector float *rp; /* will point into om->rfv[x] for residue x[i+1] */ vector float *tp; /* will point into (and step thru) om->tfv transition scores */ /* initialize the L row. */ bck->M = om->M; bck->L = L; bck->has_own_scales = FALSE; /* backwards scale factors are *usually* given by <fwd> */ dpc = bck->dpf[L * do_full]; xJ = 0.0; xB = 0.0; xN = 0.0; xC = om->xf[p7O_C][p7O_MOVE]; /* C<-T */ xE = xC * om->xf[p7O_E][p7O_MOVE]; /* E<-C, no tail */ xEv = esl_vmx_set_float(xE); zerov = (vector float) vec_splat_u32(0); dcv = (vector float) vec_splat_u32(0);; /* solely to silence a compiler warning */ for (q = 0; q < Q; q++) MMO(dpc,q) = DMO(dpc,q) = xEv; for (q = 0; q < Q; q++) IMO(dpc,q) = zerov; /* init row L's DD paths, 1) first segment includes xE, from DMO(q) */ tp = om->tfv + 8*Q - 1; /* <*tp> now the [4 8 12 x] TDD quad */ dpv = vec_sld(DMO(dpc,Q-1), zerov, 4); for (q = Q-1; q >= 1; q--) { DMO(dpc,q) = vec_madd(dpv, *tp, DMO(dpc,q)); tp--; dpv = DMO(dpc,q); } dcv = vec_madd(dpv, *tp, zerov); DMO(dpc,q) = vec_add(DMO(dpc,q), dcv); /* 2) three more passes, only extending DD component (dcv only; no xE contrib from DMO(q)) */ for (j = 1; j < 4; j++) { tp = om->tfv + 8*Q - 1; /* <*tp> now the [4 8 12 x] TDD quad */ dcv = vec_sld(dcv, zerov, 4); for (q = Q-1; q >= 0; q--) { dcv = vec_madd(dcv, *tp, zerov); tp--; DMO(dpc,q) = vec_add(DMO(dpc,q), dcv); } } /* now MD init */ tp = om->tfv + 7*Q - 3; /* <*tp> now the [4 8 12 x] Mk->Dk+1 quad */ dcv = vec_sld(DMO(dpc,0), zerov, 4); for (q = Q-1; q >= 0; q--) { MMO(dpc,q) = vec_madd(dcv, *tp, MMO(dpc,q)); tp -= 7; dcv = DMO(dpc,q); } /* Sparse rescaling: same scale factors as fwd matrix */ if (fwd->xmx[L*p7X_NXCELLS+p7X_SCALE] > 1.0) { xE = xE / fwd->xmx[L*p7X_NXCELLS+p7X_SCALE]; xN = xN / fwd->xmx[L*p7X_NXCELLS+p7X_SCALE]; xC = xC / fwd->xmx[L*p7X_NXCELLS+p7X_SCALE]; xJ = xJ / fwd->xmx[L*p7X_NXCELLS+p7X_SCALE]; xB = xB / fwd->xmx[L*p7X_NXCELLS+p7X_SCALE]; xEv = esl_vmx_set_float(1.0 / fwd->xmx[L*p7X_NXCELLS+p7X_SCALE]); for (q = 0; q < Q; q++) { MMO(dpc,q) = vec_madd(MMO(dpc,q), xEv, zerov); DMO(dpc,q) = vec_madd(DMO(dpc,q), xEv, zerov); IMO(dpc,q) = vec_madd(IMO(dpc,q), xEv, zerov); } } bck->xmx[L*p7X_NXCELLS+p7X_SCALE] = fwd->xmx[L*p7X_NXCELLS+p7X_SCALE]; bck->totscale = log(bck->xmx[L*p7X_NXCELLS+p7X_SCALE]); /* Stores */ bck->xmx[L*p7X_NXCELLS+p7X_E] = xE; bck->xmx[L*p7X_NXCELLS+p7X_N] = xN; bck->xmx[L*p7X_NXCELLS+p7X_J] = xJ; bck->xmx[L*p7X_NXCELLS+p7X_B] = xB; bck->xmx[L*p7X_NXCELLS+p7X_C] = xC; #if p7_DEBUGGING if (bck->debugging) p7_omx_DumpFBRow(bck, TRUE, L, 9, 4, xE, xN, xJ, xB, xC); /* logify=TRUE, <rowi>=L, width=9, precision=4*/ #endif /* main recursion */ for (i = L-1; i >= 1; i--) /* backwards stride */ { /* phase 1. B(i) collected. Old row destroyed, new row contains * complete I(i,k), partial {MD}(i,k) w/ no {MD}->{DE} paths yet. */ dpc = bck->dpf[i * do_full]; dpp = bck->dpf[(i+1) * do_full]; rp = om->rfv[dsq[i+1]] + Q-1; /* <*rp> is now the [4 8 12 x] match emission quad */ tp = om->tfv + 7*Q - 1; /* <*tp> is now the [4 8 12 x] TII transition quad */ /* leftshift the first transition quads */ tmmv = vec_sld(om->tfv[1], zerov, 4); timv = vec_sld(om->tfv[2], zerov, 4); tdmv = vec_sld(om->tfv[3], zerov, 4); mpv = vec_madd(MMO(dpp,0), om->rfv[dsq[i+1]][0], zerov); /* precalc M(i+1,k+1)*e(M_k+1,x_{i+1}) */ mpv = vec_sld(mpv, zerov, 4); xBv = zerov; for (q = Q-1; q >= 0; q--) /* backwards stride */ { vector float t1; ipv = IMO(dpp,q); /* assumes emission odds ratio of 1.0; i+1's IMO(q) now free */ t1 = vec_madd(mpv, timv, zerov); IMO(dpc,q) = vec_madd(ipv, *tp, t1); tp--; DMO(dpc,q) = vec_madd(mpv, tdmv, zerov); t1 = vec_madd(mpv, tmmv, zerov); mcv = vec_madd(ipv, *tp, t1); tp -= 2; /* obtain mpv for next q. i+1's MMO(q) is freed */ mpv = vec_madd(MMO(dpp,q), *rp, zerov); rp--; MMO(dpc,q) = mcv; tdmv = *tp; tp--; timv = *tp; tp--; tmmv = *tp; tp--; xBv = vec_madd(mpv, *tp, xBv); tp--; } /* phase 2: now that we have accumulated the B->Mk transitions in xBv, we can do the specials */ xB = esl_vmx_hsum_float(xBv); xC = xC * om->xf[p7O_C][p7O_LOOP]; xJ = (xB * om->xf[p7O_J][p7O_MOVE]) + (xJ * om->xf[p7O_J][p7O_LOOP]); /* must come after xB */ xN = (xB * om->xf[p7O_N][p7O_MOVE]) + (xN * om->xf[p7O_N][p7O_LOOP]); /* must come after xB */ xE = (xC * om->xf[p7O_E][p7O_MOVE]) + (xJ * om->xf[p7O_E][p7O_LOOP]); /* must come after xJ, xC */ xEv = esl_vmx_set_float(xE); /* splat */ /* phase 3: {MD}->E paths and one step of the D->D paths */ tp = om->tfv + 8*Q - 1; /* <*tp> now the [4 8 12 x] TDD quad */ dpv = vec_add(DMO(dpc,0), xEv); dpv = vec_sld(dpv, zerov, 4); for (q = Q-1; q >= 1; q--) { dcv = vec_madd(dpv, *tp, xEv); tp--; DMO(dpc,q) = vec_add(DMO(dpc,q), dcv); dpv = DMO(dpc,q); MMO(dpc,q) = vec_add(MMO(dpc,q), xEv); } dcv = vec_madd(dpv, *tp, zerov); DMO(dpc,q) = vec_add(DMO(dpc,q), vec_add(dcv, xEv)); MMO(dpc,q) = vec_add(MMO(dpc,q), xEv); /* phase 4: finish extending the DD paths */ /* fully serialized for now */ for (j = 1; j < 4; j++) /* three passes: we've already done 1 segment, we need 4 total */ { dcv = vec_sld(dcv, zerov, 4); tp = om->tfv + 8*Q - 1; /* <*tp> now the [4 8 12 x] TDD quad */ for (q = Q-1; q >= 0; q--) { dcv = vec_madd(dcv, *tp, zerov); tp--; DMO(dpc,q) = vec_add(DMO(dpc,q), dcv); } } /* phase 5: add M->D paths */ dcv = vec_sld(DMO(dpc,0), zerov, 4); tp = om->tfv + 7*Q - 3; /* <*tp> is now the [4 8 12 x] Mk->Dk+1 quad */ for (q = Q-1; q >= 0; q--) { MMO(dpc,q) = vec_madd(dcv, *tp, MMO(dpc,q)); tp -= 7; dcv = DMO(dpc,q); } /* Sparse rescaling */ /* In rare cases [J3/119] scale factors from <fwd> are * insufficient and backwards will overflow. In this case, we * switch on the fly to using our own scale factors, different * from those in <fwd>. This will complicate subsequent * posterior decoding routines. */ if (xB > 1.0e16) bck->has_own_scales = TRUE; if (bck->has_own_scales) bck->xmx[i*p7X_NXCELLS+p7X_SCALE] = (xB > 1.0e4) ? xB : 1.0; else bck->xmx[i*p7X_NXCELLS+p7X_SCALE] = fwd->xmx[i*p7X_NXCELLS+p7X_SCALE]; if (bck->xmx[i*p7X_NXCELLS+p7X_SCALE] > 1.0) { xE /= bck->xmx[i*p7X_NXCELLS+p7X_SCALE]; xN /= bck->xmx[i*p7X_NXCELLS+p7X_SCALE]; xJ /= bck->xmx[i*p7X_NXCELLS+p7X_SCALE]; xB /= bck->xmx[i*p7X_NXCELLS+p7X_SCALE]; xC /= bck->xmx[i*p7X_NXCELLS+p7X_SCALE]; xBv = esl_vmx_set_float(1.0 / bck->xmx[i*p7X_NXCELLS+p7X_SCALE]); for (q = 0; q < Q; q++) { MMO(dpc,q) = vec_madd(MMO(dpc,q), xBv, zerov); DMO(dpc,q) = vec_madd(DMO(dpc,q), xBv, zerov); IMO(dpc,q) = vec_madd(IMO(dpc,q), xBv, zerov); } bck->totscale += log(bck->xmx[i*p7X_NXCELLS+p7X_SCALE]); } /* Stores are separate only for pedagogical reasons: easy to * turn this into a more memory efficient version just by * deleting the stores. */ bck->xmx[i*p7X_NXCELLS+p7X_E] = xE; bck->xmx[i*p7X_NXCELLS+p7X_N] = xN; bck->xmx[i*p7X_NXCELLS+p7X_J] = xJ; bck->xmx[i*p7X_NXCELLS+p7X_B] = xB; bck->xmx[i*p7X_NXCELLS+p7X_C] = xC; #if p7_DEBUGGING if (bck->debugging) p7_omx_DumpFBRow(bck, TRUE, i, 9, 4, xE, xN, xJ, xB, xC); /* logify=TRUE, <rowi>=i, width=9, precision=4*/ #endif } /* thus ends the loop over sequence positions i */ /* Termination at i=0, where we can only reach N,B states. */ dpp = bck->dpf[1 * do_full]; tp = om->tfv; /* <*tp> is now the [1 5 9 13] TBMk transition quad */ rp = om->rfv[dsq[1]]; /* <*rp> is now the [1 5 9 13] match emission quad */ xBv = (vector float) vec_splat_u32(0); for (q = 0; q < Q; q++) { mpv = vec_madd(MMO(dpp,q), *rp, zerov); rp++; xBv = vec_madd(mpv, *tp, xBv); tp += 7; } /* horizontal sum of xBv */ xB = esl_vmx_hsum_float(xBv); xN = (xB * om->xf[p7O_N][p7O_MOVE]) + (xN * om->xf[p7O_N][p7O_LOOP]); bck->xmx[p7X_B] = xB; bck->xmx[p7X_C] = 0.0; bck->xmx[p7X_J] = 0.0; bck->xmx[p7X_N] = xN; bck->xmx[p7X_E] = 0.0; bck->xmx[p7X_SCALE] = 1.0; #if p7_DEBUGGING dpc = bck->dpf[0]; for (q = 0; q < Q; q++) /* Not strictly necessary, but if someone's looking at DP matrices, this is nice to do: */ MMO(dpc,q) = DMO(dpc,q) = IMO(dpc,q) = zerov; if (bck->debugging) p7_omx_DumpFBRow(bck, TRUE, 0, 9, 4, bck->xmx[p7X_E], bck->xmx[p7X_N], bck->xmx[p7X_J], bck->xmx[p7X_B], bck->xmx[p7X_C]); /* logify=TRUE, <rowi>=0, width=9, precision=4*/ #endif if (isnan(xN)) ESL_EXCEPTION(eslERANGE, "backward score is NaN"); else if (L>0 && xN == 0.0) ESL_EXCEPTION(eslERANGE, "backward score underflow (is 0.0)"); /* [J5/118] */ else if (isinf(xN) == 1) ESL_EXCEPTION(eslERANGE, "backward score overflow (is infinity)"); if (opt_sc != NULL) *opt_sc = bck->totscale + log(xN); return eslOK; }
static int forward_engine(int do_full, const ESL_DSQ *dsq, int L, const P7_OPROFILE *om, P7_OMX *ox, float *opt_sc) { vector float mpv, dpv, ipv; /* previous row values */ vector float sv; /* temp storage of 1 curr row value in progress */ vector float dcv; /* delayed storage of D(i,q+1) */ vector float xEv; /* E state: keeps max for Mk->E as we go */ vector float xBv; /* B state: splatted vector of B[i-1] for B->Mk calculations */ vector float zerov; /* splatted 0.0's in a vector */ float xN, xE, xB, xC, xJ; /* special states' scores */ int i; /* counter over sequence positions 1..L */ int q; /* counter over quads 0..nq-1 */ int j; /* counter over DD iterations (4 is full serialization) */ int Q = p7O_NQF(om->M); /* segment length: # of vectors */ vector float *dpc = ox->dpf[0]; /* current row, for use in {MDI}MO(dpp,q) access macro */ vector float *dpp; /* previous row, for use in {MDI}MO(dpp,q) access macro */ vector float *rp; /* will point at om->rfv[x] for residue x[i] */ vector float *tp; /* will point into (and step thru) om->tfv */ /* Initialization. */ ox->M = om->M; ox->L = L; ox->has_own_scales = TRUE; /* all forward matrices control their own scalefactors */ zerov = (vector float) vec_splat_u32(0); for (q = 0; q < Q; q++) MMO(dpc,q) = IMO(dpc,q) = DMO(dpc,q) = zerov; xE = ox->xmx[p7X_E] = 0.; xN = ox->xmx[p7X_N] = 1.; xJ = ox->xmx[p7X_J] = 0.; xB = ox->xmx[p7X_B] = om->xf[p7O_N][p7O_MOVE]; xC = ox->xmx[p7X_C] = 0.; ox->xmx[p7X_SCALE] = 1.0; ox->totscale = 0.0; #if p7_DEBUGGING if (ox->debugging) p7_omx_DumpFBRow(ox, TRUE, 0, 9, 5, xE, xN, xJ, xB, xC); /* logify=TRUE, <rowi>=0, width=8, precision=5*/ #endif for (i = 1; i <= L; i++) { dpp = dpc; dpc = ox->dpf[do_full * i]; /* avoid conditional, use do_full as kronecker delta */ rp = om->rfv[dsq[i]]; tp = om->tfv; dcv = (vector float) vec_splat_u32(0); xEv = (vector float) vec_splat_u32(0); xBv = esl_vmx_set_float(xB); /* Right shifts by 4 bytes. 4,8,12,x becomes x,4,8,12. Shift zeros on. */ mpv = vec_sld(zerov, MMO(dpp,Q-1), 12); dpv = vec_sld(zerov, DMO(dpp,Q-1), 12); ipv = vec_sld(zerov, IMO(dpp,Q-1), 12); for (q = 0; q < Q; q++) { /* Calculate new MMO(i,q); don't store it yet, hold it in sv. */ sv = (vector float) vec_splat_u32(0); sv = vec_madd(xBv, *tp, sv); tp++; sv = vec_madd(mpv, *tp, sv); tp++; sv = vec_madd(ipv, *tp, sv); tp++; sv = vec_madd(dpv, *tp, sv); tp++; sv = vec_madd(sv, *rp, zerov); rp++; xEv = vec_add(xEv, sv); /* Load {MDI}(i-1,q) into mpv, dpv, ipv; * {MDI}MX(q) is then the current, not the prev row */ mpv = MMO(dpp,q); dpv = DMO(dpp,q); ipv = IMO(dpp,q); /* Do the delayed stores of {MD}(i,q) now that memory is usable */ MMO(dpc,q) = sv; DMO(dpc,q) = dcv; /* Calculate the next D(i,q+1) partially: M->D only; * delay storage, holding it in dcv */ dcv = vec_madd(sv, *tp, zerov); tp++; /* Calculate and store I(i,q); assumes odds ratio for emission is 1.0 */ sv = vec_madd(mpv, *tp, zerov); tp++; IMO(dpc,q) = vec_madd(ipv, *tp, sv); tp++; } /* Now the DD paths. We would rather not serialize them but * in an accurate Forward calculation, we have few options. */ /* dcv has carried through from end of q loop above; store it * in first pass, we add M->D and D->D path into DMX */ /* We're almost certainly're obligated to do at least one complete * DD path to be sure: */ dcv = vec_sld(zerov, dcv, 12); DMO(dpc,0) = (vector float) vec_splat_u32(0); tp = om->tfv + 7*Q; /* set tp to start of the DD's */ for (q = 0; q < Q; q++) { DMO(dpc,q) = vec_add(dcv, DMO(dpc,q)); dcv = vec_madd(DMO(dpc,q), *tp, zerov); tp++; /* extend DMO(q), so we include M->D and D->D paths */ } /* now. on small models, it seems best (empirically) to just go * ahead and serialize. on large models, we can do a bit better, * by testing for when dcv (DD path) accrued to DMO(q) is below * machine epsilon for all q, in which case we know DMO(q) are all * at their final values. The tradeoff point is (empirically) somewhere around M=100, * at least on my desktop. We don't worry about the conditional here; * it's outside any inner loops. */ if (om->M < 100) { /* Fully serialized version */ for (j = 1; j < 4; j++) { dcv = vec_sld(zerov, dcv, 12); tp = om->tfv + 7*Q; /* set tp to start of the DD's */ for (q = 0; q < Q; q++) { /* note, extend dcv, not DMO(q); only adding DD paths now */ DMO(dpc,q) = vec_add(dcv, DMO(dpc,q)); dcv = vec_madd(dcv, *tp, zerov); tp++; } } } else { /* Slightly parallelized version, but which incurs some overhead */ for (j = 1; j < 4; j++) { vector bool int cv; /* keeps track of whether any DD's change DMO(q) */ dcv = vec_sld(zerov, dcv, 12); tp = om->tfv + 7*Q; /* set tp to start of the DD's */ cv = (vector bool int) vec_splat_u32(0); for (q = 0; q < Q; q++) { /* using cmpgt below tests if DD changed any DMO(q) *without* conditional branch */ sv = vec_add(dcv, DMO(dpc,q)); cv = vec_or(cv, vec_cmpgt(sv, DMO(dpc,q))); DMO(dpc,q) = sv; /* store new DMO(q) */ dcv = vec_madd(dcv, *tp, zerov); tp++; /* note, extend dcv, not DMO(q) */ } /* DD's didn't change any DMO(q)? Then done, break out. */ if (vec_all_eq(cv, (vector bool int)zerov)) break; } } /* Add D's to xEv */ for (q = 0; q < Q; q++) xEv = vec_add(DMO(dpc,q), xEv); /* Finally the "special" states, which start from Mk->E (->C, ->J->B) */ /* The following incantation is a horizontal sum of xEv's elements */ /* These must follow DD calculations, because D's contribute to E in Forward * (as opposed to Viterbi) */ xE = esl_vmx_hsum_float(xEv); xN = xN * om->xf[p7O_N][p7O_LOOP]; xC = (xC * om->xf[p7O_C][p7O_LOOP]) + (xE * om->xf[p7O_E][p7O_MOVE]); xJ = (xJ * om->xf[p7O_J][p7O_LOOP]) + (xE * om->xf[p7O_E][p7O_LOOP]); xB = (xJ * om->xf[p7O_J][p7O_MOVE]) + (xN * om->xf[p7O_N][p7O_MOVE]); /* and now xB will carry over into next i, and xC carries over after i=L */ /* Sparse rescaling. xE above threshold? trigger a rescaling event. */ if (xE > 1.0e4) /* that's a little less than e^10, ~10% of our dynamic range */ { xN = xN / xE; xC = xC / xE; xJ = xJ / xE; xB = xB / xE; xEv = esl_vmx_set_float(1.0 / xE); for (q = 0; q < Q; q++) { MMO(dpc,q) = vec_madd(MMO(dpc,q), xEv, zerov); DMO(dpc,q) = vec_madd(DMO(dpc,q), xEv, zerov); IMO(dpc,q) = vec_madd(IMO(dpc,q), xEv, zerov); } ox->xmx[i*p7X_NXCELLS+p7X_SCALE] = xE; ox->totscale += log(xE); xE = 1.0; } else ox->xmx[i*p7X_NXCELLS+p7X_SCALE] = 1.0; /* Storage of the specials. We could've stored these already * but using xE, etc. variables makes it easy to convert this * code to O(M) memory versions just by deleting storage steps. */ ox->xmx[i*p7X_NXCELLS+p7X_E] = xE; ox->xmx[i*p7X_NXCELLS+p7X_N] = xN; ox->xmx[i*p7X_NXCELLS+p7X_J] = xJ; ox->xmx[i*p7X_NXCELLS+p7X_B] = xB; ox->xmx[i*p7X_NXCELLS+p7X_C] = xC; #if p7_DEBUGGING if (ox->debugging) p7_omx_DumpFBRow(ox, TRUE, i, 9, 5, xE, xN, xJ, xB, xC); /* logify=TRUE, <rowi>=i, width=8, precision=5*/ #endif } /* end loop over sequence residues 1..L */ /* finally C->T, and flip total score back to log space (nats) */ /* On overflow, xC is inf or nan (nan arises because inf*0 = nan). */ /* On an underflow (which shouldn't happen), we counterintuitively return infinity: * the effect of this is to force the caller to rescore us with full range. */ if (isnan(xC)) ESL_EXCEPTION(eslERANGE, "forward score is NaN"); else if (L>0 && xC == 0.0) ESL_EXCEPTION(eslERANGE, "forward score underflow (is 0.0)"); /* [J5/118] */ else if (isinf(xC) == 1) ESL_EXCEPTION(eslERANGE, "forward score overflow (is infinity)"); if (opt_sc != NULL) *opt_sc = ox->totscale + log(xC * om->xf[p7O_C][p7O_MOVE]); return eslOK; }