/* Function: p7_Null2_ByTrace() * Synopsis: Assign null2 scores to an envelope by the sampling method. * Incept: SRE, Mon Aug 18 10:22:49 2008 [Janelia] * * Purpose: Identical to <p7_GNull2_ByTrace()> except that * <om>, <wrk> are SSE optimized versions of the profile * and the residue posterior probability matrix. See * <p7_GNull2_ByTrace()> documentation. */ int p7_Null2_ByTrace(const P7_OPROFILE *om, const P7_TRACE *tr, int zstart, int zend, P7_OMX *wrk, float *null2) { union { __m128 v; float p[4]; } u; int Q = p7O_NQF(om->M); int Ld = 0; float *xmx = wrk->xmx; /* enables use of XMXo macro */ float norm; float xfactor; __m128 sv; __m128 *rp; int q, r, s; int x; int z; /* We'll use the i=0 row in wrk for working space: dp[0][] and xmx[][0]. */ for (q = 0; q < Q; q++) { wrk->dpf[0][q*3 + p7X_M] = _mm_setzero_ps(); wrk->dpf[0][q*3 + p7X_I] = _mm_setzero_ps(); } XMXo(0,p7X_N) = 0.0; XMXo(0,p7X_C) = 0.0; XMXo(0,p7X_J) = 0.0; /* Calculate emitting state usage in this particular trace segment */ for (z = zstart; z <= zend; z++) { if (tr->i[z] == 0) continue; /* quick test for whether this trace elem emitted or not */ Ld++; if (tr->k[z] > 0) /* must be an M or I */ { /* surely there's an easier way? but our workspace is striped, interleaved quads... */ s = ( (tr->st[z] == p7T_M) ? p7X_M : p7X_I); q = p7X_NSCELLS * ( (tr->k[z] - 1) % Q) + p7X_M; r = (tr->k[z] - 1) / Q; u.v = wrk->dpf[0][q]; u.p[r] += 1.0; /* all this to increment a count by one! */ wrk->dpf[0][q] = u.v; } else /* emitted an x_i with no k; must be an N,C,J */ { switch (tr->st[z]) { case p7T_N: XMXo(0,p7X_N) += 1.0; break; case p7T_C: XMXo(0,p7X_C) += 1.0; break; case p7T_J: XMXo(0,p7X_J) += 1.0; break; } } } norm = 1.0 / (float) Ld; sv = _mm_set1_ps(norm); for (q = 0; q < Q; q++) { wrk->dpf[0][q*3 + p7X_M] = _mm_mul_ps(wrk->dpf[0][q*3 + p7X_M], sv); wrk->dpf[0][q*3 + p7X_I] = _mm_mul_ps(wrk->dpf[0][q*3 + p7X_I], sv); } XMXo(0,p7X_N) *= norm; XMXo(0,p7X_C) *= norm; XMXo(0,p7X_J) *= norm; /* Calculate null2's emission odds, by taking posterior weighted sum * over all emission vectors used in paths explaining the domain. */ xfactor = XMXo(0,p7X_N) + XMXo(0,p7X_C) + XMXo(0,p7X_J); for (x = 0; x < om->abc->K; x++) { sv = _mm_setzero_ps(); rp = om->rfv[x]; for (q = 0; q < Q; q++) { sv = _mm_add_ps(sv, _mm_mul_ps(wrk->dpf[0][q*3 + p7X_M], *rp)); rp++; sv = _mm_add_ps(sv, wrk->dpf[0][q*3 + p7X_I]); /* insert emission odds implicitly 1.0 */ // sv = _mm_add_ps(sv, _mm_mul_ps(wrk->dpf[0][q*3 + p7X_I], *rp)); rp++; } esl_sse_hsum_ps(sv, &(null2[x])); null2[x] += xfactor; } /* now null2[x] = \frac{f_d(x)}{f_0(x)} for all x in alphabet, * 0..K-1, where f_d(x) are the ad hoc "null2" residue frequencies * for this envelope. */ /* make valid scores for all degeneracies, by averaging the odds ratios. */ esl_abc_FAvgScVec(om->abc, null2); null2[om->abc->K] = 1.0; /* gap character */ null2[om->abc->Kp-2] = 1.0; /* nonresidue "*" */ null2[om->abc->Kp-1] = 1.0; /* missing data "~" */ return eslOK; }
/* Function: p7_Null2_ByExpectation() * Synopsis: Calculate null2 model from posterior probabilities. * Incept: SRE, Mon Aug 18 08:32:55 2008 [Janelia] * * Purpose: Identical to <p7_GNull2_ByExpectation()> except that * <om>, <pp> are SSE optimized versions of the profile * and the residue posterior probability matrix. See * <p7_GNull2_ByExpectation()> documentation. * * Args: om - profile, in any mode, target length model set to <L> * pp - posterior prob matrix, for <om> against domain envelope <dsq+i-1> (offset) * null2 - RETURN: null2 log odds scores per residue; <0..Kp-1>; caller allocated space */ int p7_Null2_ByExpectation(const P7_OPROFILE *om, const P7_OMX *pp, float *null2) { int M = om->M; int Ld = pp->L; int Q = p7O_NQF(M); float *xmx = pp->xmx; /* enables use of XMXo(i,s) macro */ float norm; __m128 *rp; __m128 sv; float xfactor; int i,q,x; /* Calculate expected # of times that each emitting state was used * in generating the Ld residues in this domain. * The 0 row in <wrk> is used to hold these numbers. */ memcpy(pp->dpf[0], pp->dpf[1], sizeof(__m128) * 3 * Q); XMXo(0,p7X_N) = XMXo(1,p7X_N); XMXo(0,p7X_C) = XMXo(1,p7X_C); /* 0.0 */ XMXo(0,p7X_J) = XMXo(1,p7X_J); /* 0.0 */ for (i = 2; i <= Ld; i++) { for (q = 0; q < Q; q++) { pp->dpf[0][q*3 + p7X_M] = _mm_add_ps(pp->dpf[i][q*3 + p7X_M], pp->dpf[0][q*3 + p7X_M]); pp->dpf[0][q*3 + p7X_I] = _mm_add_ps(pp->dpf[i][q*3 + p7X_I], pp->dpf[0][q*3 + p7X_I]); } XMXo(0,p7X_N) += XMXo(i,p7X_N); XMXo(0,p7X_C) += XMXo(i,p7X_C); XMXo(0,p7X_J) += XMXo(i,p7X_J); } /* Convert those expected #'s to frequencies, to use as posterior weights. */ norm = 1.0 / (float) Ld; sv = _mm_set1_ps(norm); for (q = 0; q < Q; q++) { pp->dpf[0][q*3 + p7X_M] = _mm_mul_ps(pp->dpf[0][q*3 + p7X_M], sv); pp->dpf[0][q*3 + p7X_I] = _mm_mul_ps(pp->dpf[0][q*3 + p7X_I], sv); } XMXo(0,p7X_N) *= norm; XMXo(0,p7X_C) *= norm; XMXo(0,p7X_J) *= norm; /* Calculate null2's emission odds, by taking posterior weighted sum * over all emission vectors used in paths explaining the domain. */ xfactor = XMXo(0, p7X_N) + XMXo(0, p7X_C) + XMXo(0, p7X_J); for (x = 0; x < om->abc->K; x++) { sv = _mm_setzero_ps(); rp = om->rfv[x]; for (q = 0; q < Q; q++) { sv = _mm_add_ps(sv, _mm_mul_ps(pp->dpf[0][q*3 + p7X_M], *rp)); rp++; sv = _mm_add_ps(sv, pp->dpf[0][q*3 + p7X_I]); /* insert odds implicitly 1.0 */ // sv = _mm_add_ps(sv, _mm_mul_ps(pp->dpf[0][q*3 + p7X_I], *rp)); rp++; } esl_sse_hsum_ps(sv, &(null2[x])); null2[x] += xfactor; } /* now null2[x] = \frac{f_d(x)}{f_0(x)} for all x in alphabet, * 0..K-1, where f_d(x) are the ad hoc "null2" residue frequencies * for this envelope. */ /* make valid scores for all degeneracies, by averaging the odds ratios. */ esl_abc_FAvgScVec(om->abc, null2); null2[om->abc->K] = 1.0; /* gap character */ null2[om->abc->Kp-2] = 1.0; /* nonresidue "*" */ null2[om->abc->Kp-1] = 1.0; /* missing data "~" */ return eslOK; }
/* Function: p7_OptimalAccuracy() * Synopsis: DP fill of an optimal accuracy alignment calculation. * Incept: SRE, Mon Aug 18 11:04:48 2008 [Janelia] * * Purpose: Calculates the fill step of the optimal accuracy decoding * algorithm \citep{Kall05}. * * Caller provides the posterior decoding matrix <pp>, * which was calculated by Forward/Backward on a target sequence * of length <pp->L> using the query model <om>. * * Caller also provides a DP matrix <ox>, allocated for a full * <om->M> by <L> comparison. The routine fills this in * with OA scores. * * Args: gm - query profile * pp - posterior decoding matrix created by <p7_GPosteriorDecoding()> * gx - RESULT: caller provided DP matrix for <gm->M> by <L> * ret_e - RETURN: expected number of correctly decoded positions * * Returns: <eslOK> on success, and <*ret_e> contains the final OA * score, which is the expected number of correctly decoded * positions in the target sequence (up to <L>). * * Throws: (no abnormal error conditions) */ int p7_OptimalAccuracy(const P7_OPROFILE *om, const P7_OMX *pp, P7_OMX *ox, float *ret_e) { vector float mpv, dpv, ipv; /* previous row values */ vector float sv; /* temp storage of 1 curr row value in progress */ vector float xEv; /* E state: keeps max for Mk->E as we go */ vector float xBv; /* B state: splatted vector of B[i-1] for B->Mk calculations */ vector float dcv; float *xmx = ox->xmx; vector float *dpc = ox->dpf[0]; /* current row, for use in {MDI}MO(dpp,q) access macro */ vector float *dpp; /* previous row, for use in {MDI}MO(dpp,q) access macro */ vector float *ppp; /* quads in the <pp> posterior probability matrix */ vector float *tp; /* quads in the <om->tfv> transition scores */ vector float zerov; vector float infv; int M = om->M; int Q = p7O_NQF(M); int q; int j; int i; float t1, t2; zerov = (vector float) vec_splat_u32(0); infv = esl_vmx_set_float(-eslINFINITY); ox->M = om->M; ox->L = pp->L; for (q = 0; q < Q; q++) MMO(dpc, q) = IMO(dpc,q) = DMO(dpc,q) = infv; XMXo(0, p7X_E) = -eslINFINITY; XMXo(0, p7X_N) = 0.; XMXo(0, p7X_J) = -eslINFINITY; XMXo(0, p7X_B) = 0.; XMXo(0, p7X_C) = -eslINFINITY; for (i = 1; i <= pp->L; i++) { dpp = dpc; /* previous DP row in OA matrix */ dpc = ox->dpf[i]; /* current DP row in OA matrix */ ppp = pp->dpf[i]; /* current row in the posterior probabilities per position */ tp = om->tfv; /* transition probabilities */ dcv = infv; xEv = infv; xBv = esl_vmx_set_float(XMXo(i-1, p7X_B)); mpv = vec_sld(infv, MMO(dpp,Q-1), 12); /* Right shifts by 4 bytes. 4,8,12,x becomes x,4,8,12. */ dpv = vec_sld(infv, DMO(dpp,Q-1), 12); ipv = vec_sld(infv, IMO(dpp,Q-1), 12); for (q = 0; q < Q; q++) { sv = vec_and(vec_cmpgt(*tp, zerov), xBv); tp++; sv = vec_max(sv, vec_and(vec_cmpgt(*tp, zerov), mpv)); tp++; sv = vec_max(sv, vec_and(vec_cmpgt(*tp, zerov), ipv)); tp++; sv = vec_max(sv, vec_and(vec_cmpgt(*tp, zerov), dpv)); tp++; sv = vec_add(sv, *ppp); ppp += 2; xEv = vec_max(xEv, sv); mpv = MMO(dpp,q); dpv = DMO(dpp,q); ipv = IMO(dpp,q); MMO(dpc,q) = sv; DMO(dpc,q) = dcv; dcv = vec_and(vec_cmpgt(*tp, zerov), sv); tp++; sv = vec_and(vec_cmpgt(*tp, zerov), mpv); tp++; sv = vec_max(sv, vec_and(vec_cmpgt(*tp, zerov), ipv)); tp++; IMO(dpc,q) = vec_add(sv, *ppp); ppp++; } /* dcv has carried through from end of q loop above; store it * in first pass, we add M->D and D->D path into DMX */ dcv = vec_sld(infv, dcv, 12); tp = om->tfv + 7*Q; /* set tp to start of the DD's */ for (q = 0; q < Q; q++) { DMO(dpc, q) = vec_max(dcv, DMO(dpc, q)); dcv = vec_and(vec_cmpgt(*tp, zerov), DMO(dpc,q)); tp++; } /* fully serialized D->D; can optimize later */ for (j = 1; j < 4; j++) { dcv = vec_sld(infv, dcv, 12); tp = om->tfv + 7*Q; for (q = 0; q < Q; q++) { DMO(dpc, q) = vec_max(dcv, DMO(dpc, q)); dcv = vec_and(vec_cmpgt(*tp, zerov), dcv); tp++; } } /* D->E paths */ for (q = 0; q < Q; q++) xEv = vec_max(xEv, DMO(dpc,q)); /* Specials */ XMXo(i,p7X_E) = esl_vmx_hmax_float(xEv); t1 = ( (om->xf[p7O_J][p7O_LOOP] == 0.0) ? 0.0 : ox->xmx[(i-1)*p7X_NXCELLS+p7X_J] + pp->xmx[i*p7X_NXCELLS+p7X_J]); t2 = ( (om->xf[p7O_E][p7O_LOOP] == 0.0) ? 0.0 : ox->xmx[ i *p7X_NXCELLS+p7X_E]); ox->xmx[i*p7X_NXCELLS+p7X_J] = ESL_MAX(t1, t2); t1 = ( (om->xf[p7O_C][p7O_LOOP] == 0.0) ? 0.0 : ox->xmx[(i-1)*p7X_NXCELLS+p7X_C] + pp->xmx[i*p7X_NXCELLS+p7X_C]); t2 = ( (om->xf[p7O_E][p7O_MOVE] == 0.0) ? 0.0 : ox->xmx[ i *p7X_NXCELLS+p7X_E]); ox->xmx[i*p7X_NXCELLS+p7X_C] = ESL_MAX(t1, t2); ox->xmx[i*p7X_NXCELLS+p7X_N] = ((om->xf[p7O_N][p7O_LOOP] == 0.0) ? 0.0 : ox->xmx[(i-1)*p7X_NXCELLS+p7X_N] + pp->xmx[i*p7X_NXCELLS+p7X_N]); t1 = ( (om->xf[p7O_N][p7O_MOVE] == 0.0) ? 0.0 : ox->xmx[i*p7X_NXCELLS+p7X_N]); t2 = ( (om->xf[p7O_J][p7O_MOVE] == 0.0) ? 0.0 : ox->xmx[i*p7X_NXCELLS+p7X_J]); ox->xmx[i*p7X_NXCELLS+p7X_B] = ESL_MAX(t1, t2); } *ret_e = ox->xmx[pp->L*p7X_NXCELLS+p7X_C]; return eslOK; }