/* Function: p7_ViterbiFilter() * Synopsis: Calculates Viterbi score, vewy vewy fast, in limited precision. * Incept: SRE, Tue Nov 27 09:15:24 2007 [Janelia] * * Purpose: Calculates an approximation of the Viterbi score for sequence * <dsq> of length <L> residues, using optimized profile <om>, * and a preallocated one-row DP matrix <ox>. Return the * estimated Viterbi score (in nats) in <ret_sc>. * * Score may overflow (and will, on high-scoring * sequences), but will not underflow. * * The model must be in a local alignment mode; other modes * cannot provide the necessary guarantee of no underflow. * * This is a striped SIMD Viterbi implementation using Intel * VMX integer intrinsics \citep{Farrar07}, in reduced * precision (signed words, 16 bits). * * Args: dsq - digital target sequence, 1..L * L - length of dsq in residues * om - optimized profile * ox - DP matrix * ret_sc - RETURN: Viterbi score (in nats) * * Returns: <eslOK> on success; * <eslERANGE> if the score overflows; in this case * <*ret_sc> is <eslINFINITY>, and the sequence can * be treated as a high-scoring hit. * * Throws: <eslEINVAL> if <ox> allocation is too small, or if * profile isn't in a local alignment mode. (Must be in local * alignment mode because that's what helps us guarantee * limited dynamic range.) * * Xref: [Farrar07] for ideas behind striped SIMD DP. * J2/46-47 for layout of HMMER's striped SIMD DP. * J2/50 for single row DP. * J2/60 for reduced precision (epu8) * J2/65 for initial benchmarking * J2/66 for precision maximization * J4/138-140 for reimplementation in 16-bit precision */ int p7_ViterbiFilter(const ESL_DSQ *dsq, int L, const P7_OPROFILE *om, P7_OMX *ox, float *ret_sc) { vector signed short mpv, dpv, ipv; /* previous row values */ vector signed short sv; /* temp storage of 1 curr row value in progress */ vector signed short dcv; /* delayed storage of D(i,q+1) */ vector signed short xEv; /* E state: keeps max for Mk->E as we go */ vector signed short xBv; /* B state: splatted vector of B[i-1] for B->Mk calculations */ vector signed short Dmaxv; /* keeps track of maximum D cell on row */ int16_t xE, xB, xC, xJ, xN; /* special states' scores */ int16_t Dmax; /* maximum D cell score on row */ int i; /* counter over sequence positions 1..L */ int q; /* counter over vectors 0..nq-1 */ int Q; /* segment length: # of vectors */ vector signed short *dp; /* using {MDI}MX(q) macro requires initialization of <dp> */ vector signed short *rsc; /* will point at om->ru[x] for residue x[i] */ vector signed short *tsc; /* will point into (and step thru) om->tu */ vector signed short negInfv; Q = p7O_NQW(om->M); dp = ox->dpw[0]; /* Check that the DP matrix is ok for us. */ if (Q > ox->allocQ8) ESL_EXCEPTION(eslEINVAL, "DP matrix allocated too small"); if (om->mode != p7_LOCAL && om->mode != p7_UNILOCAL) ESL_EXCEPTION(eslEINVAL, "Fast filter only works for local alignment"); ox->M = om->M; negInfv = esl_vmx_set_s16((signed short)-32768); /* Initialization. In unsigned arithmetic, -infinity is -32768 */ for (q = 0; q < Q; q++) MMXo(q) = IMXo(q) = DMXo(q) = negInfv; xN = om->base_w; xB = xN + om->xw[p7O_N][p7O_MOVE]; xJ = -32768; xC = -32768; xE = -32768; #if p7_DEBUGGING if (ox->debugging) p7_omx_DumpVFRow(ox, 0, xE, 0, xJ, xB, xC); /* first 0 is <rowi>: do header. second 0 is xN: always 0 here. */ #endif for (i = 1; i <= L; i++) { rsc = om->rwv[dsq[i]]; tsc = om->twv; dcv = negInfv; /* "-infinity" */ xEv = negInfv; Dmaxv = negInfv; xBv = esl_vmx_set_s16(xB); /* Right shifts by 1 value (2 bytes). 4,8,12,x becomes x,4,8,12. * Because ia32 is littlendian, this means a left bit shift. * Zeros shift on automatically; replace it with -32768. */ mpv = MMXo(Q-1); mpv = vec_sld(negInfv, mpv, 14); dpv = DMXo(Q-1); dpv = vec_sld(negInfv, dpv, 14); ipv = IMXo(Q-1); ipv = vec_sld(negInfv, ipv, 14); for (q = 0; q < Q; q++) { /* Calculate new MMXo(i,q); don't store it yet, hold it in sv. */ sv = vec_adds(xBv, *tsc); tsc++; sv = vec_max (sv, vec_adds(mpv, *tsc)); tsc++; sv = vec_max (sv, vec_adds(ipv, *tsc)); tsc++; sv = vec_max (sv, vec_adds(dpv, *tsc)); tsc++; sv = vec_adds(sv, *rsc); rsc++; xEv = vec_max(xEv, sv); /* Load {MDI}(i-1,q) into mpv, dpv, ipv; * {MDI}MX(q) is then the current, not the prev row */ mpv = MMXo(q); dpv = DMXo(q); ipv = IMXo(q); /* Do the delayed stores of {MD}(i,q) now that memory is usable */ MMXo(q) = sv; DMXo(q) = dcv; /* Calculate the next D(i,q+1) partially: M->D only; * delay storage, holding it in dcv */ dcv = vec_adds(sv, *tsc); tsc++; Dmaxv = vec_max(dcv, Dmaxv); /* Calculate and store I(i,q) */ sv = vec_adds(mpv, *tsc); tsc++; IMXo(q)= vec_max(sv, vec_adds(ipv, *tsc)); tsc++; } /* Now the "special" states, which start from Mk->E (->C, ->J->B) */ xE = esl_vmx_hmax_s16(xEv); if (xE >= 32767) { *ret_sc = eslINFINITY; return eslERANGE; } /* immediately detect overflow */ xN = xN + om->xw[p7O_N][p7O_LOOP]; xC = ESL_MAX(xC + om->xw[p7O_C][p7O_LOOP], xE + om->xw[p7O_E][p7O_MOVE]); xJ = ESL_MAX(xJ + om->xw[p7O_J][p7O_LOOP], xE + om->xw[p7O_E][p7O_LOOP]); xB = ESL_MAX(xJ + om->xw[p7O_J][p7O_MOVE], xN + om->xw[p7O_N][p7O_MOVE]); /* and now xB will carry over into next i, and xC carries over after i=L */ /* Finally the "lazy F" loop (sensu [Farrar07]). We can often * prove that we don't need to evaluate any D->D paths at all. * * The observation is that if we can show that on the next row, * B->M(i+1,k) paths always dominate M->D->...->D->M(i+1,k) paths * for all k, then we don't need any D->D calculations. * * The test condition is: * max_k D(i,k) + max_k ( TDD(k-2) + TDM(k-1) - TBM(k) ) < xB(i) * So: * max_k (TDD(k-2) + TDM(k-1) - TBM(k)) is precalc'ed in om->dd_bound; * max_k D(i,k) is why we tracked Dmaxv; * xB(i) was just calculated above. */ Dmax = esl_vmx_hmax_s16(Dmaxv); if (Dmax + om->ddbound_w > xB) { /* Now we're obligated to do at least one complete DD path to be sure. */ /* dcv has carried through from end of q loop above */ dcv = vec_sld(negInfv, dcv, 14); tsc = om->twv + 7*Q; /* set tsc to start of the DD's */ for (q = 0; q < Q; q++) { DMXo(q) = vec_max(dcv, DMXo(q)); dcv = vec_adds(DMXo(q), *tsc); tsc++; } /* We may have to do up to three more passes; the check * is for whether crossing a segment boundary can improve * our score. */ do { dcv = vec_sld(negInfv, dcv, 14); tsc = om->twv + 7*Q; /* set tsc to start of the DD's */ for (q = 0; q < Q; q++) { if (! vec_any_gt(dcv, DMXo(q))) break; DMXo(q) = vec_max(dcv, DMXo(q)); dcv = vec_adds(DMXo(q), *tsc); tsc++; } } while (q == Q); } else /* not calculating DD? then just store the last M->D vector calc'ed.*/ DMXo(0) = vec_sld(negInfv, dcv, 14); #if p7_DEBUGGING if (ox->debugging) p7_omx_DumpVFRow(ox, i, xE, 0, xJ, xB, xC); #endif } /* end loop over sequence residues 1..L */ /* finally C->T */ if (xC > -32768) { *ret_sc = (float) xC + (float) om->xw[p7O_C][p7O_MOVE] - (float) om->base_w; /* *ret_sc += L * om->ncj_roundoff; see J4/150 for rationale: superceded by -3.0nat approximation*/ *ret_sc /= om->scale_w; *ret_sc -= 3.0; /* the NN/CC/JJ=0,-3nat approximation: see J5/36. That's ~ L \log \frac{L}{L+3}, for our NN,CC,JJ contrib */ } else *ret_sc = -eslINFINITY; return eslOK; }
/* Function: p7_omx_DumpFBRow() * Synopsis: Dump one row from float part of a DP matrix. * Incept: SRE, Wed Jul 30 16:45:16 2008 [Janelia] * * Purpose: Dump current row of Forward/Backward (float) part of DP * matrix <ox> for diagnostics, and include the values of * specials <xE>, etc. The index <rowi> for the current row * is used as a row label. * * The output format of the floats is controlled by * <width>, <precision>; 8,5 is good for pspace, 5,2 is * fine for lspace. * * If <rowi> is 0, print a header first too. * * If <logify> is TRUE, then scores are printed as log(score); this is * useful for comparing DP with pspace scores with other DP matrices * (like generic P7_GMX ones) that use log-odds scores. * * The output format is coordinated with <p7_gmx_Dump()> to * facilitate comparison to a known answer. * * Returns: <eslOK> on success. * * Throws: <eslEMEM> on allocation failure. */ int p7_omx_DumpFBRow(P7_OMX *ox, int logify, int rowi, int width, int precision, float xE, float xN, float xJ, float xB, float xC) { vector float *dp; int M = ox->M; int Q = p7O_NQF(M); float *v = NULL; /* array of uninterleaved, unstriped scores */ int q,z,k; union { vector float v; float x[4]; } tmp; int status; dp = (ox->allocR == 1) ? ox->dpf[0] : ox->dpf[rowi]; /* must set <dp> before using {MDI}MX macros */ ESL_ALLOC(v, sizeof(float) * ((Q*4)+1)); v[0] = 0.; if (rowi == 0) { fprintf(ox->dfp, " "); for (k = 0; k <= M; k++) fprintf(ox->dfp, "%*d ", width, k); fprintf(ox->dfp, "%*s %*s %*s %*s %*s\n", width, "E", width, "N", width, "J", width, "B", width, "C"); fprintf(ox->dfp, " "); for (k = 0; k <= M+5; k++) fprintf(ox->dfp, "%*s ", width, "--------"); fprintf(ox->dfp, "\n"); } /* Unpack, unstripe, then print M's. */ for (q = 0; q < Q; q++) { tmp.v = MMXo(q); for (z = 0; z < 4; z++) v[q+Q*z+1] = tmp.x[z]; } fprintf(ox->dfp, "%3d M ", rowi); if (logify) for (k = 0; k <= M; k++) fprintf(ox->dfp, "%*.*f ", width, precision, v[k] == 0. ? -eslINFINITY : log(v[k])); else for (k = 0; k <= M; k++) fprintf(ox->dfp, "%*.*f ", width, precision, v[k]); /* The specials */ if (logify) fprintf(ox->dfp, "%*.*f %*.*f %*.*f %*.*f %*.*f\n", width, precision, xE == 0. ? -eslINFINITY : log(xE), width, precision, xN == 0. ? -eslINFINITY : log(xN), width, precision, xJ == 0. ? -eslINFINITY : log(xJ), width, precision, xB == 0. ? -eslINFINITY : log(xB), width, precision, xC == 0. ? -eslINFINITY : log(xC)); else fprintf(ox->dfp, "%*.*f %*.*f %*.*f %*.*f %*.*f\n", width, precision, xE, width, precision, xN, width, precision, xJ, width, precision, xB, width, precision, xC); /* Unpack, unstripe, then print I's. */ for (q = 0; q < Q; q++) { tmp.v = IMXo(q); for (z = 0; z < 4; z++) v[q+Q*z+1] = tmp.x[z]; } fprintf(ox->dfp, "%3d I ", rowi); if (logify) for (k = 0; k <= M; k++) fprintf(ox->dfp, "%*.*f ", width, precision, v[k] == 0. ? -eslINFINITY : log(v[k])); else for (k = 0; k <= M; k++) fprintf(ox->dfp, "%*.*f ", width, precision, v[k]); fprintf(ox->dfp, "\n"); /* Unpack, unstripe, then print D's. */ for (q = 0; q < Q; q++) { tmp.v = DMXo(q); for (z = 0; z < 4; z++) v[q+Q*z+1] = tmp.x[z]; } fprintf(ox->dfp, "%3d D ", rowi); if (logify) for (k = 0; k <= M; k++) fprintf(ox->dfp, "%*.*f ", width, precision, v[k] == 0. ? -eslINFINITY : log(v[k])); else for (k = 0; k <= M; k++) fprintf(ox->dfp, "%*.*f ", width, precision, v[k]); fprintf(ox->dfp, "\n\n"); free(v); return eslOK; ERROR: free(v); return status; }
/* Function: p7_ViterbiScore() * Synopsis: Calculates Viterbi score, correctly, and vewy vewy fast. * Incept: SRE, Tue Nov 27 09:15:24 2007 [Janelia] * * Purpose: Calculates the Viterbi score for sequence <dsq> of length <L> * residues, using optimized profile <om>, and a preallocated * one-row DP matrix <ox>. Return the Viterbi score (in nats) * in <ret_sc>. * * The model <om> must be configured specially to have * lspace float scores, not its usual pspace float scores for * <p7_ForwardFilter()>. * * As with all <*Score()> implementations, the score is * accurate (full range and precision) and can be * calculated on models in any mode, not only local modes. * * Args: dsq - digital target sequence, 1..L * L - length of dsq in residues * om - optimized profile * ox - DP matrix * ret_sc - RETURN: Viterbi score (in nats) * * Returns: <eslOK> on success. * * Throws: <eslEINVAL> if <ox> allocation is too small. */ int p7_ViterbiScore(const ESL_DSQ *dsq, int L, const P7_OPROFILE *om, P7_OMX *ox, float *ret_sc) { vector float mpv, dpv, ipv; /* previous row values */ vector float sv; /* temp storage of 1 curr row value in progress */ vector float dcv; /* delayed storage of D(i,q+1) */ vector float xEv; /* E state: keeps max for Mk->E as we go */ vector float xBv; /* B state: splatted vector of B[i-1] for B->Mk calculations */ vector float Dmaxv; /* keeps track of maximum D cell on row */ vector float infv; /* -eslINFINITY in a vector */ float xN, xE, xB, xC, xJ; /* special states' scores */ float Dmax; /* maximum D cell on row */ int i; /* counter over sequence positions 1..L */ int q; /* counter over vectors 0..nq-1 */ int Q = p7O_NQF(om->M); /* segment length: # of vectors */ vector float *dp = ox->dpf[0]; /* using {MDI}MX(q) macro requires initialization of <dp> */ vector float *rsc; /* will point at om->rf[x] for residue x[i] */ vector float *tsc; /* will point into (and step thru) om->tf */ /* Check that the DP matrix is ok for us. */ if (Q > ox->allocQ4) ESL_EXCEPTION(eslEINVAL, "DP matrix allocated too small"); ox->M = om->M; /* Initialization. */ infv = esl_vmx_set_float(-eslINFINITY); for (q = 0; q < Q; q++) MMXo(q) = IMXo(q) = DMXo(q) = infv; xN = 0.; xB = om->xf[p7O_N][p7O_MOVE]; xE = -eslINFINITY; xJ = -eslINFINITY; xC = -eslINFINITY; #if p7_DEBUGGING if (ox->debugging) p7_omx_DumpFloatRow(ox, FALSE, 0, 5, 2, xE, xN, xJ, xB, xC); /* logify=FALSE, <rowi>=0, width=5, precision=2*/ #endif for (i = 1; i <= L; i++) { rsc = om->rf[dsq[i]]; tsc = om->tf; dcv = infv; xEv = infv; Dmaxv = infv; xBv = esl_vmx_set_float(xB); mpv = vec_sld(infv, MMXo(Q-1), 12); /* Right shifts by 4 bytes. 4,8,12,x becomes x,4,8,12. */ dpv = vec_sld(infv, DMXo(Q-1), 12); ipv = vec_sld(infv, IMXo(Q-1), 12); for (q = 0; q < Q; q++) { /* Calculate new MMXo(i,q); don't store it yet, hold it in sv. */ sv = vec_add(xBv, *tsc); tsc++; sv = vec_max(sv, vec_add(mpv, *tsc)); tsc++; sv = vec_max(sv, vec_add(ipv, *tsc)); tsc++; sv = vec_max(sv, vec_add(dpv, *tsc)); tsc++; sv = vec_add(sv, *rsc); rsc++; xEv = vec_max(xEv, sv); /* Load {MDI}(i-1,q) into mpv, dpv, ipv; * {MDI}MX(q) is then the current, not the prev row */ mpv = MMXo(q); dpv = DMXo(q); ipv = IMXo(q); /* Do the delayed stores of {MD}(i,q) now that memory is usable */ MMXo(q) = sv; DMXo(q) = dcv; /* Calculate the next D(i,q+1) partially: M->D only; * delay storage, holding it in dcv */ dcv = vec_add(sv, *tsc); tsc++; Dmaxv = vec_max(dcv, Dmaxv); /* Calculate and store I(i,q) */ sv = vec_add(mpv, *tsc); tsc++; sv = vec_max(sv, vec_add(ipv, *tsc)); tsc++; IMXo(q) = vec_add(sv, *rsc); rsc++; } /* Now the "special" states, which start from Mk->E (->C, ->J->B) */ xE = esl_vmx_hmax_float(xEv); xN = xN + om->xf[p7O_N][p7O_LOOP]; xC = ESL_MAX(xC + om->xf[p7O_C][p7O_LOOP], xE + om->xf[p7O_E][p7O_MOVE]); xJ = ESL_MAX(xJ + om->xf[p7O_J][p7O_LOOP], xE + om->xf[p7O_E][p7O_LOOP]); xB = ESL_MAX(xJ + om->xf[p7O_J][p7O_MOVE], xN + om->xf[p7O_N][p7O_MOVE]); /* and now xB will carry over into next i, and xC carries over after i=L */ /* Finally the "lazy F" loop (sensu [Farrar07]). We can often * prove that we don't need to evaluate any D->D paths at all. * * The observation is that if we can show that on the next row, * B->M(i+1,k) paths always dominate M->D->...->D->M(i+1,k) paths * for all k, then we don't need any D->D calculations. * * The test condition is: * max_k D(i,k) + max_k ( TDD(k-2) + TDM(k-1) - TBM(k) ) < xB(i) * So: * max_k (TDD(k-2) + TDM(k-1) - TBM(k)) is precalc'ed in om->dd_bound; * max_k D(i,k) is why we tracked Dmaxv; * xB(i) was just calculated above. */ Dmax = esl_vmx_hmax_float(Dmaxv); if (Dmax + om->ddbound_f > xB) { /* Now we're obligated to do at least one complete DD path to be sure. */ /* dcv has carried through from end of q loop above */ dcv = vec_sld(infv, dcv, 12); tsc = om->tf + 7*Q; /* set tsc to start of the DD's */ for (q = 0; q < Q; q++) { DMXo(q) = vec_max(dcv, DMXo(q)); dcv = vec_add(DMXo(q), *tsc); tsc++; } /* We may have to do up to three more passes; the check * is for whether crossing a segment boundary can improve * our score. */ do { dcv = vec_sld(infv, dcv, 12); tsc = om->tf + 7*Q; /* set tsc to start of the DD's */ for (q = 0; q < Q; q++) { if (! vec_any_gt(dcv, DMXo(q))) break; DMXo(q) = vec_max(dcv, DMXo(q)); dcv = vec_add(DMXo(q), *tsc); tsc++; } } while (q == Q); } else { /* not calculating DD? then just store that last MD vector we calc'ed. */ dcv = vec_sld(infv, dcv, 12); DMXo(0) = dcv; } #if p7_DEBUGGING if (ox->debugging) p7_omx_DumpFloatRow(ox, FALSE, i, 5, 2, xE, xN, xJ, xB, xC); /* logify=FALSE, <rowi>=i, width=5, precision=2*/ #endif } /* end loop over sequence residues 1..L */ /* finally C->T */ *ret_sc = xC + om->xf[p7O_C][p7O_MOVE]; return eslOK; }
/* Function: p7_omx_DumpVFRow() * Synopsis: Dump current row of ViterbiFilter (int16) part of <ox> matrix. * Incept: SRE, Wed Jul 30 16:43:21 2008 [Janelia] * * Purpose: Dump current row of ViterbiFilter (int16) part of DP * matrix <ox> for diagnostics, and include the values of * specials <xE>, etc. The index <rowi> for the current row * is used as a row label. * * If <rowi> is 0, print a header first too. * * The output format is coordinated with <p7_gmx_Dump()> to * facilitate comparison to a known answer. * * Returns: <eslOK> on success. * * Throws: <eslEMEM> on allocation failure. */ int p7_omx_DumpVFRow(P7_OMX *ox, int rowi, int16_t xE, int16_t xN, int16_t xJ, int16_t xB, int16_t xC) { vector signed short *dp = ox->dpw[0]; /* must set <dp> before using {MDI}MX macros */ int M = ox->M; int Q = p7O_NQW(M); int16_t *v = NULL; /* array of unstriped, uninterleaved scores */ int q,z,k; union { vector signed short v; int16_t i[8]; } tmp; int status; ESL_ALLOC(v, sizeof(int16_t) * ((Q*8)+1)); v[0] = 0; /* Header (if we're on the 0th row) */ if (rowi == 0) { fprintf(ox->dfp, " "); for (k = 0; k <= M; k++) fprintf(ox->dfp, "%6d ", k); fprintf(ox->dfp, "%6s %6s %6s %6s %6s\n", "E", "N", "J", "B", "C"); fprintf(ox->dfp, " "); for (k = 0; k <= M+5; k++) fprintf(ox->dfp, "%6s ", "------"); fprintf(ox->dfp, "\n"); } /* Unpack and unstripe, then print M's. */ for (q = 0; q < Q; q++) { tmp.v = MMXo(q); for (z = 0; z < 8; z++) v[q+Q*z+1] = tmp.i[z]; } fprintf(ox->dfp, "%4d M ", rowi); for (k = 0; k <= M; k++) fprintf(ox->dfp, "%6d ", v[k]); /* The specials */ fprintf(ox->dfp, "%6d %6d %6d %6d %6d\n", xE, xN, xJ, xB, xC); /* Unpack and unstripe, then print I's. */ for (q = 0; q < Q; q++) { tmp.v = IMXo(q); for (z = 0; z < 8; z++) v[q+Q*z+1] = tmp.i[z]; } fprintf(ox->dfp, "%4d I ", rowi); for (k = 0; k <= M; k++) fprintf(ox->dfp, "%6d ", v[k]); fprintf(ox->dfp, "\n"); /* Unpack, unstripe, then print D's. */ for (q = 0; q < Q; q++) { tmp.v = DMXo(q); for (z = 0; z < 8; z++) v[q+Q*z+1] = tmp.i[z]; } fprintf(ox->dfp, "%4d D ", rowi); for (k = 0; k <= M; k++) fprintf(ox->dfp, "%6d ", v[k]); fprintf(ox->dfp, "\n\n"); free(v); return eslOK; ERROR: free(v); return status; }
/* Function: p7_ViterbiFilter_longtarget() * Synopsis: Finds windows within potentially long sequence blocks with Viterbi * scores above threshold (vewy vewy fast, in limited precision) * * Purpose: Calculates an approximation of the Viterbi score for regions * of sequence <dsq>, using optimized profile <om>, and a pre- * allocated one-row DP matrix <ox>, and captures the positions * at which such regions exceed the score required to be * significant in the eyes of the calling function (usually * p=0.001). * * The resulting landmarks are converted to subsequence * windows by the calling function * * The model must be in a local alignment mode; other modes * cannot provide the necessary guarantee of no underflow. * * This is a striped SIMD Viterbi implementation using Intel * VMX integer intrinsics \citep{Farrar07}, in reduced * precision (signed words, 16 bits). * * Args: dsq - digital target sequence, 1..L * L - length of dsq in residues * om - optimized profile * ox - DP matrix * filtersc - null or bias correction, required for translating a P-value threshold into a score threshold * P - p-value below which a region is captured as being above threshold * windowlist - RETURN: array of hit windows (start and end of diagonal) for the above-threshold areas * * Returns: <eslOK> on success; * * Throws: <eslEINVAL> if <ox> allocation is too small, or if * profile isn't in a local alignment mode. (Must be in local * alignment mode because that's what helps us guarantee * limited dynamic range.) * * Xref: See p7_ViterbiFilter() */ int p7_ViterbiFilter_longtarget(const ESL_DSQ *dsq, int L, const P7_OPROFILE *om, P7_OMX *ox, float filtersc, double P, P7_HMM_WINDOWLIST *windowlist) { vector signed short mpv, dpv, ipv; /* previous row values */ vector signed short sv; /* temp storage of 1 curr row value in progress */ vector signed short dcv; /* delayed storage of D(i,q+1) */ vector signed short xEv; /* E state: keeps max for Mk->E as we go */ vector signed short xBv; /* B state: splatted vector of B[i-1] for B->Mk calculations */ vector signed short Dmaxv; /* keeps track of maximum D cell on row */ int16_t xE, xB, xC, xJ, xN; /* special states' scores */ int16_t Dmax; /* maximum D cell score on row */ int i; /* counter over sequence positions 1..L */ int q; /* counter over vectors 0..nq-1 */ int Q = p7O_NQW(om->M); /* segment length: # of vectors */ vector signed short *dp = ox->dpw[0]; /* using {MDI}MX(q) macro requires initialization of <dp> */ vector signed short *rsc; /* will point at om->ru[x] for residue x[i] */ vector signed short *tsc; /* will point into (and step thru) om->tu */ vector signed short negInfv; int16_t sc_thresh; float invP; int z; union { vector signed short v; int16_t i[8]; } tmp; windowlist->count = 0; /* * In p7_ViterbiFilter, converting from a scaled int Viterbi score * S (aka xE the score getting to state E) to a probability * goes like this: * vsc = S + om->xw[p7O_E][p7O_MOVE] + om->xw[p7O_C][p7O_MOVE] - om->base_w * ret_sc /= om->scale_w; * vsc -= 3.0; * P = esl_gumbel_surv((vfsc - filtersc) / eslCONST_LOG2 , om->evparam[p7_VMU], om->evparam[p7_VLAMBDA]); * and we're computing the threshold vsc, so invert it: * (vsc - filtersc) / eslCONST_LOG2 = esl_gumbel_invsurv( P, om->evparam[p7_VMU], om->evparam[p7_VLAMBDA]) * vsc = filtersc + eslCONST_LOG2 * esl_gumbel_invsurv( P, om->evparam[p7_VMU], om->evparam[p7_VLAMBDA]) * vsc += 3.0 * vsc *= om->scale_w * S = vsc - (float)om->xw[p7O_E][p7O_MOVE] - (float)om->xw[p7O_C][p7O_MOVE] + (float)om->base_w */ invP = esl_gumbel_invsurv(P, om->evparam[p7_VMU], om->evparam[p7_VLAMBDA]); sc_thresh = (int) ceil ( ( (filtersc + (eslCONST_LOG2 * invP) + 3.0) * om->scale_w ) - (float)om->xw[p7O_E][p7O_MOVE] - (float)om->xw[p7O_C][p7O_MOVE] + (float)om->base_w ); /* Check that the DP matrix is ok for us. */ if (Q > ox->allocQ8) ESL_EXCEPTION(eslEINVAL, "DP matrix allocated too small"); if (om->mode != p7_LOCAL && om->mode != p7_UNILOCAL) ESL_EXCEPTION(eslEINVAL, "Fast filter only works for local alignment"); ox->M = om->M; negInfv = esl_vmx_set_s16((signed short)-32768); /* Initialization. In unsigned arithmetic, -infinity is -32768 */ for (q = 0; q < Q; q++) MMXo(q) = IMXo(q) = DMXo(q) = negInfv; xN = om->base_w; xB = xN + om->xw[p7O_N][p7O_MOVE]; xJ = -32768; xC = -32768; xE = -32768; #if p7_DEBUGGING if (ox->debugging) p7_omx_DumpVFRow(ox, 0, xE, 0, xJ, xB, xC); /* first 0 is <rowi>: do header. second 0 is xN: always 0 here. */ #endif for (i = 1; i <= L; i++) { rsc = om->rwv[dsq[i]]; tsc = om->twv; dcv = negInfv; /* "-infinity" */ xEv = negInfv; Dmaxv = negInfv; xBv = esl_vmx_set_s16(xB); /* Right shifts by 1 value (2 bytes). 4,8,12,x becomes x,4,8,12. * Because ia32 is littlendian, this means a left bit shift. * Zeros shift on automatically; replace it with -32768. */ mpv = MMXo(Q-1); mpv = vec_sld(negInfv, mpv, 14); dpv = DMXo(Q-1); dpv = vec_sld(negInfv, dpv, 14); ipv = IMXo(Q-1); ipv = vec_sld(negInfv, ipv, 14); for (q = 0; q < Q; q++) { /* Calculate new MMXo(i,q); don't store it yet, hold it in sv. */ sv = vec_adds(xBv, *tsc); tsc++; sv = vec_max (sv, vec_adds(mpv, *tsc)); tsc++; sv = vec_max (sv, vec_adds(ipv, *tsc)); tsc++; sv = vec_max (sv, vec_adds(dpv, *tsc)); tsc++; sv = vec_adds(sv, *rsc); rsc++; xEv = vec_max(xEv, sv); /* Load {MDI}(i-1,q) into mpv, dpv, ipv; * {MDI}MX(q) is then the current, not the prev row */ mpv = MMXo(q); dpv = DMXo(q); ipv = IMXo(q); /* Do the delayed stores of {MD}(i,q) now that memory is usable */ MMXo(q) = sv; DMXo(q) = dcv; /* Calculate the next D(i,q+1) partially: M->D only; * delay storage, holding it in dcv */ dcv = vec_adds(sv, *tsc); tsc++; Dmaxv = vec_max(dcv, Dmaxv); /* Calculate and store I(i,q) */ sv = vec_adds(mpv, *tsc); tsc++; IMXo(q)= vec_max(sv, vec_adds(ipv, *tsc)); tsc++; } /* Now the "special" states, which start from Mk->E (->C, ->J->B) */ xE = esl_vmx_hmax_s16(xEv); if (xE >= sc_thresh) { //hit score threshold. Add a window to the list, then reset scores. /* Unpack and unstripe, then find the position responsible for the hit */ for (q = 0; q < Q; q++) { tmp.v = MMXo(q); for (z = 0; z < 8; z++) { // unstripe if ( tmp.i[z] == xE && (q+Q*z+1) <= om->M) { // (q+Q*z+1) is the model position k at which the xE score is found p7_hmmwindow_new(windowlist, 0, i, 0, (q+Q*z+1), 1, 0.0, p7_NOCOMPLEMENT ); } } MMXo(q) = IMXo(q) = DMXo(q) = negInfv; //reset score to start search for next vit window. } } else { xN = xN + om->xw[p7O_N][p7O_LOOP]; xC = ESL_MAX(xC + om->xw[p7O_C][p7O_LOOP], xE + om->xw[p7O_E][p7O_MOVE]); xJ = ESL_MAX(xJ + om->xw[p7O_J][p7O_LOOP], xE + om->xw[p7O_E][p7O_LOOP]); xB = ESL_MAX(xJ + om->xw[p7O_J][p7O_MOVE], xN + om->xw[p7O_N][p7O_MOVE]); /* and now xB will carry over into next i, and xC carries over after i=L */ /* Finally the "lazy F" loop (sensu [Farrar07]). We can often * prove that we don't need to evaluate any D->D paths at all. * * The observation is that if we can show that on the next row, * B->M(i+1,k) paths always dominate M->D->...->D->M(i+1,k) paths * for all k, then we don't need any D->D calculations. * * The test condition is: * max_k D(i,k) + max_k ( TDD(k-2) + TDM(k-1) - TBM(k) ) < xB(i) * So: * max_k (TDD(k-2) + TDM(k-1) - TBM(k)) is precalc'ed in om->dd_bound; * max_k D(i,k) is why we tracked Dmaxv; * xB(i) was just calculated above. */ Dmax = esl_vmx_hmax_s16(Dmaxv); if (Dmax + om->ddbound_w > xB) { /* Now we're obligated to do at least one complete DD path to be sure. */ /* dcv has carried through from end of q loop above */ dcv = vec_sld(negInfv, dcv, 14); tsc = om->twv + 7*Q; /* set tsc to start of the DD's */ for (q = 0; q < Q; q++) { DMXo(q) = vec_max(dcv, DMXo(q)); dcv = vec_adds(DMXo(q), *tsc); tsc++; } /* We may have to do up to three more passes; the check * is for whether crossing a segment boundary can improve * our score. */ do { dcv = vec_sld(negInfv, dcv, 14); tsc = om->twv + 7*Q; /* set tsc to start of the DD's */ for (q = 0; q < Q; q++) { if (! vec_any_gt(dcv, DMXo(q))) break; DMXo(q) = vec_max(dcv, DMXo(q)); dcv = vec_adds(DMXo(q), *tsc); tsc++; } } while (q == Q); } else /* not calculating DD? then just store the last M->D vector calc'ed.*/ DMXo(0) = vec_sld(negInfv, dcv, 14); #if p7_DEBUGGING if (ox->debugging) p7_omx_DumpVFRow(ox, i, xE, 0, xJ, xB, xC); #endif } } /* end loop over sequence residues 1..L */ return eslOK; }