/* Function: p7_ViterbiFilter() * Synopsis: Calculates Viterbi score, vewy vewy fast, in limited precision. * Incept: SRE, Tue Nov 27 09:15:24 2007 [Janelia] * * Purpose: Calculates an approximation of the Viterbi score for sequence * <dsq> of length <L> residues, using optimized profile <om>, * and a preallocated one-row DP matrix <ox>. Return the * estimated Viterbi score (in nats) in <ret_sc>. * * Score may overflow (and will, on high-scoring * sequences), but will not underflow. * * The model must be in a local alignment mode; other modes * cannot provide the necessary guarantee of no underflow. * * This is a striped SIMD Viterbi implementation using Intel * VMX integer intrinsics \citep{Farrar07}, in reduced * precision (signed words, 16 bits). * * Args: dsq - digital target sequence, 1..L * L - length of dsq in residues * om - optimized profile * ox - DP matrix * ret_sc - RETURN: Viterbi score (in nats) * * Returns: <eslOK> on success; * <eslERANGE> if the score overflows; in this case * <*ret_sc> is <eslINFINITY>, and the sequence can * be treated as a high-scoring hit. * * Throws: <eslEINVAL> if <ox> allocation is too small, or if * profile isn't in a local alignment mode. (Must be in local * alignment mode because that's what helps us guarantee * limited dynamic range.) * * Xref: [Farrar07] for ideas behind striped SIMD DP. * J2/46-47 for layout of HMMER's striped SIMD DP. * J2/50 for single row DP. * J2/60 for reduced precision (epu8) * J2/65 for initial benchmarking * J2/66 for precision maximization * J4/138-140 for reimplementation in 16-bit precision */ int p7_ViterbiFilter(const ESL_DSQ *dsq, int L, const P7_OPROFILE *om, P7_OMX *ox, float *ret_sc) { vector signed short mpv, dpv, ipv; /* previous row values */ vector signed short sv; /* temp storage of 1 curr row value in progress */ vector signed short dcv; /* delayed storage of D(i,q+1) */ vector signed short xEv; /* E state: keeps max for Mk->E as we go */ vector signed short xBv; /* B state: splatted vector of B[i-1] for B->Mk calculations */ vector signed short Dmaxv; /* keeps track of maximum D cell on row */ int16_t xE, xB, xC, xJ, xN; /* special states' scores */ int16_t Dmax; /* maximum D cell score on row */ int i; /* counter over sequence positions 1..L */ int q; /* counter over vectors 0..nq-1 */ int Q; /* segment length: # of vectors */ vector signed short *dp; /* using {MDI}MX(q) macro requires initialization of <dp> */ vector signed short *rsc; /* will point at om->ru[x] for residue x[i] */ vector signed short *tsc; /* will point into (and step thru) om->tu */ vector signed short negInfv; Q = p7O_NQW(om->M); dp = ox->dpw[0]; /* Check that the DP matrix is ok for us. */ if (Q > ox->allocQ8) ESL_EXCEPTION(eslEINVAL, "DP matrix allocated too small"); if (om->mode != p7_LOCAL && om->mode != p7_UNILOCAL) ESL_EXCEPTION(eslEINVAL, "Fast filter only works for local alignment"); ox->M = om->M; negInfv = esl_vmx_set_s16((signed short)-32768); /* Initialization. In unsigned arithmetic, -infinity is -32768 */ for (q = 0; q < Q; q++) MMXo(q) = IMXo(q) = DMXo(q) = negInfv; xN = om->base_w; xB = xN + om->xw[p7O_N][p7O_MOVE]; xJ = -32768; xC = -32768; xE = -32768; #if p7_DEBUGGING if (ox->debugging) p7_omx_DumpVFRow(ox, 0, xE, 0, xJ, xB, xC); /* first 0 is <rowi>: do header. second 0 is xN: always 0 here. */ #endif for (i = 1; i <= L; i++) { rsc = om->rwv[dsq[i]]; tsc = om->twv; dcv = negInfv; /* "-infinity" */ xEv = negInfv; Dmaxv = negInfv; xBv = esl_vmx_set_s16(xB); /* Right shifts by 1 value (2 bytes). 4,8,12,x becomes x,4,8,12. * Because ia32 is littlendian, this means a left bit shift. * Zeros shift on automatically; replace it with -32768. */ mpv = MMXo(Q-1); mpv = vec_sld(negInfv, mpv, 14); dpv = DMXo(Q-1); dpv = vec_sld(negInfv, dpv, 14); ipv = IMXo(Q-1); ipv = vec_sld(negInfv, ipv, 14); for (q = 0; q < Q; q++) { /* Calculate new MMXo(i,q); don't store it yet, hold it in sv. */ sv = vec_adds(xBv, *tsc); tsc++; sv = vec_max (sv, vec_adds(mpv, *tsc)); tsc++; sv = vec_max (sv, vec_adds(ipv, *tsc)); tsc++; sv = vec_max (sv, vec_adds(dpv, *tsc)); tsc++; sv = vec_adds(sv, *rsc); rsc++; xEv = vec_max(xEv, sv); /* Load {MDI}(i-1,q) into mpv, dpv, ipv; * {MDI}MX(q) is then the current, not the prev row */ mpv = MMXo(q); dpv = DMXo(q); ipv = IMXo(q); /* Do the delayed stores of {MD}(i,q) now that memory is usable */ MMXo(q) = sv; DMXo(q) = dcv; /* Calculate the next D(i,q+1) partially: M->D only; * delay storage, holding it in dcv */ dcv = vec_adds(sv, *tsc); tsc++; Dmaxv = vec_max(dcv, Dmaxv); /* Calculate and store I(i,q) */ sv = vec_adds(mpv, *tsc); tsc++; IMXo(q)= vec_max(sv, vec_adds(ipv, *tsc)); tsc++; } /* Now the "special" states, which start from Mk->E (->C, ->J->B) */ xE = esl_vmx_hmax_s16(xEv); if (xE >= 32767) { *ret_sc = eslINFINITY; return eslERANGE; } /* immediately detect overflow */ xN = xN + om->xw[p7O_N][p7O_LOOP]; xC = ESL_MAX(xC + om->xw[p7O_C][p7O_LOOP], xE + om->xw[p7O_E][p7O_MOVE]); xJ = ESL_MAX(xJ + om->xw[p7O_J][p7O_LOOP], xE + om->xw[p7O_E][p7O_LOOP]); xB = ESL_MAX(xJ + om->xw[p7O_J][p7O_MOVE], xN + om->xw[p7O_N][p7O_MOVE]); /* and now xB will carry over into next i, and xC carries over after i=L */ /* Finally the "lazy F" loop (sensu [Farrar07]). We can often * prove that we don't need to evaluate any D->D paths at all. * * The observation is that if we can show that on the next row, * B->M(i+1,k) paths always dominate M->D->...->D->M(i+1,k) paths * for all k, then we don't need any D->D calculations. * * The test condition is: * max_k D(i,k) + max_k ( TDD(k-2) + TDM(k-1) - TBM(k) ) < xB(i) * So: * max_k (TDD(k-2) + TDM(k-1) - TBM(k)) is precalc'ed in om->dd_bound; * max_k D(i,k) is why we tracked Dmaxv; * xB(i) was just calculated above. */ Dmax = esl_vmx_hmax_s16(Dmaxv); if (Dmax + om->ddbound_w > xB) { /* Now we're obligated to do at least one complete DD path to be sure. */ /* dcv has carried through from end of q loop above */ dcv = vec_sld(negInfv, dcv, 14); tsc = om->twv + 7*Q; /* set tsc to start of the DD's */ for (q = 0; q < Q; q++) { DMXo(q) = vec_max(dcv, DMXo(q)); dcv = vec_adds(DMXo(q), *tsc); tsc++; } /* We may have to do up to three more passes; the check * is for whether crossing a segment boundary can improve * our score. */ do { dcv = vec_sld(negInfv, dcv, 14); tsc = om->twv + 7*Q; /* set tsc to start of the DD's */ for (q = 0; q < Q; q++) { if (! vec_any_gt(dcv, DMXo(q))) break; DMXo(q) = vec_max(dcv, DMXo(q)); dcv = vec_adds(DMXo(q), *tsc); tsc++; } } while (q == Q); } else /* not calculating DD? then just store the last M->D vector calc'ed.*/ DMXo(0) = vec_sld(negInfv, dcv, 14); #if p7_DEBUGGING if (ox->debugging) p7_omx_DumpVFRow(ox, i, xE, 0, xJ, xB, xC); #endif } /* end loop over sequence residues 1..L */ /* finally C->T */ if (xC > -32768) { *ret_sc = (float) xC + (float) om->xw[p7O_C][p7O_MOVE] - (float) om->base_w; /* *ret_sc += L * om->ncj_roundoff; see J4/150 for rationale: superceded by -3.0nat approximation*/ *ret_sc /= om->scale_w; *ret_sc -= 3.0; /* the NN/CC/JJ=0,-3nat approximation: see J5/36. That's ~ L \log \frac{L}{L+3}, for our NN,CC,JJ contrib */ } else *ret_sc = -eslINFINITY; return eslOK; }
int main () { vector float fa = {1.0, 2.0, 3.0, -4.0}; vector float fb = {-2.0, -3.0, -4.0, -5.0}; vector float fc = vec_cpsgn (fa, fb); vector long long la = {5L, 14L}; vector long long lb = {3L, 86L}; vector long long lc = vec_and (la, lb); vector bool long long ld = {0, -1}; vector long long le = vec_and (la, ld); vector long long lf = vec_and (ld, lb); vector unsigned long long ua = {5L, 14L}; vector unsigned long long ub = {3L, 86L}; vector unsigned long long uc = vec_and (ua, ub); vector bool long long ud = {0, -1}; vector unsigned long long ue = vec_and (ua, ud); vector unsigned long long uf = vec_and (ud, ub); vector long long lg = vec_andc (la, lb); vector long long lh = vec_andc (la, ld); vector long long li = vec_andc (ld, lb); vector unsigned long long ug = vec_andc (ua, ub); vector unsigned long long uh = vec_andc (ua, ud); vector unsigned long long ui = vec_andc (ud, ub); vector double da = {1.0, -4.0}; vector double db = {-2.0, 5.0}; vector double dc = vec_cpsgn (da, db); vector long long lj = vec_mergeh (la, lb); vector long long lk = vec_mergeh (la, ld); vector long long ll = vec_mergeh (ld, la); vector unsigned long long uj = vec_mergeh (ua, ub); vector unsigned long long uk = vec_mergeh (ua, ud); vector unsigned long long ul = vec_mergeh (ud, ua); vector long long lm = vec_mergel (la, lb); vector long long ln = vec_mergel (la, ld); vector long long lo = vec_mergel (ld, la); vector unsigned long long um = vec_mergel (ua, ub); vector unsigned long long un = vec_mergel (ua, ud); vector unsigned long long uo = vec_mergel (ud, ua); vector long long lp = vec_nor (la, lb); vector long long lq = vec_nor (la, ld); vector long long lr = vec_nor (ld, la); vector unsigned long long up = vec_nor (ua, ub); vector unsigned long long uq = vec_nor (ua, ud); vector unsigned long long ur = vec_nor (ud, ua); vector long long ls = vec_or (la, lb); vector long long lt = vec_or (la, ld); vector long long lu = vec_or (ld, la); vector unsigned long long us = vec_or (ua, ub); vector unsigned long long ut = vec_or (ua, ud); vector unsigned long long uu = vec_or (ud, ua); vector unsigned char ca = {0,4,8,1,5,9,2,6,10,3,7,11,15,12,14,13}; vector long long lv = vec_perm (la, lb, ca); vector unsigned long long uv = vec_perm (ua, ub, ca); vector long long lw = vec_sel (la, lb, lc); vector long long lx = vec_sel (la, lb, uc); vector long long ly = vec_sel (la, lb, ld); vector unsigned long long uw = vec_sel (ua, ub, lc); vector unsigned long long ux = vec_sel (ua, ub, uc); vector unsigned long long uy = vec_sel (ua, ub, ld); vector long long lz = vec_xor (la, lb); vector long long l0 = vec_xor (la, ld); vector long long l1 = vec_xor (ld, la); vector unsigned long long uz = vec_xor (ua, ub); vector unsigned long long u0 = vec_xor (ua, ud); vector unsigned long long u1 = vec_xor (ud, ua); int ia = vec_all_eq (ua, ub); int ib = vec_all_ge (ua, ub); int ic = vec_all_gt (ua, ub); int id = vec_all_le (ua, ub); int ie = vec_all_lt (ua, ub); int ig = vec_all_ne (ua, ub); int ih = vec_any_eq (ua, ub); int ii = vec_any_ge (ua, ub); int ij = vec_any_gt (ua, ub); int ik = vec_any_le (ua, ub); int il = vec_any_lt (ua, ub); int im = vec_any_ne (ua, ub); vector int sia = {9, 16, 25, 36}; vector int sib = {-8, -27, -64, -125}; vector int sic = vec_mergee (sia, sib); vector int sid = vec_mergeo (sia, sib); vector unsigned int uia = {9, 16, 25, 36}; vector unsigned int uib = {8, 27, 64, 125}; vector unsigned int uic = vec_mergee (uia, uib); vector unsigned int uid = vec_mergeo (uia, uib); vector bool int bia = {0, -1, -1, 0}; vector bool int bib = {-1, -1, 0, -1}; vector bool int bic = vec_mergee (bia, bib); vector bool int bid = vec_mergeo (bia, bib); vector unsigned int uie = vec_packsu (ua, ub); vector long long l2 = vec_cntlz (la); vector unsigned long long u2 = vec_cntlz (ua); vector int sie = vec_cntlz (sia); vector unsigned int uif = vec_cntlz (uia); vector short ssa = {20, -40, -60, 80, 100, -120, -140, 160}; vector short ssb = vec_cntlz (ssa); vector unsigned short usa = {81, 72, 63, 54, 45, 36, 27, 18}; vector unsigned short usb = vec_cntlz (usa); vector signed char sca = {-4, 3, -9, 15, -31, 31, 0, 0, 1, 117, -36, 99, 98, 97, 96, 95}; vector signed char scb = vec_cntlz (sca); vector unsigned char cb = vec_cntlz (ca); vector double dd = vec_xl (0, &y); vec_xst (dd, 0, &z); vector double de = vec_round (dd); vector double df = vec_splat (de, 0); vector double dg = vec_splat (de, 1); vector long long l3 = vec_splat (l2, 0); vector long long l4 = vec_splat (l2, 1); vector unsigned long long u3 = vec_splat (u2, 0); vector unsigned long long u4 = vec_splat (u2, 1); vector bool long long l5 = vec_splat (ld, 0); vector bool long long l6 = vec_splat (ld, 1); vector long long l7 = vec_div (l3, l4); vector unsigned long long u5 = vec_div (u3, u4); vector long long l8 = vec_mul (l3, l4); vector unsigned long long u6 = vec_mul (u3, u4); vector double dh = vec_ctf (la, -2); vector double di = vec_ctf (ua, 2); vector long long l9 = vec_cts (dh, -2); vector unsigned long long u7 = vec_ctu (di, 2); return 0; }
/* Function: p7_ViterbiScore() * Synopsis: Calculates Viterbi score, correctly, and vewy vewy fast. * Incept: SRE, Tue Nov 27 09:15:24 2007 [Janelia] * * Purpose: Calculates the Viterbi score for sequence <dsq> of length <L> * residues, using optimized profile <om>, and a preallocated * one-row DP matrix <ox>. Return the Viterbi score (in nats) * in <ret_sc>. * * The model <om> must be configured specially to have * lspace float scores, not its usual pspace float scores for * <p7_ForwardFilter()>. * * As with all <*Score()> implementations, the score is * accurate (full range and precision) and can be * calculated on models in any mode, not only local modes. * * Args: dsq - digital target sequence, 1..L * L - length of dsq in residues * om - optimized profile * ox - DP matrix * ret_sc - RETURN: Viterbi score (in nats) * * Returns: <eslOK> on success. * * Throws: <eslEINVAL> if <ox> allocation is too small. */ int p7_ViterbiScore(const ESL_DSQ *dsq, int L, const P7_OPROFILE *om, P7_OMX *ox, float *ret_sc) { vector float mpv, dpv, ipv; /* previous row values */ vector float sv; /* temp storage of 1 curr row value in progress */ vector float dcv; /* delayed storage of D(i,q+1) */ vector float xEv; /* E state: keeps max for Mk->E as we go */ vector float xBv; /* B state: splatted vector of B[i-1] for B->Mk calculations */ vector float Dmaxv; /* keeps track of maximum D cell on row */ vector float infv; /* -eslINFINITY in a vector */ float xN, xE, xB, xC, xJ; /* special states' scores */ float Dmax; /* maximum D cell on row */ int i; /* counter over sequence positions 1..L */ int q; /* counter over vectors 0..nq-1 */ int Q = p7O_NQF(om->M); /* segment length: # of vectors */ vector float *dp = ox->dpf[0]; /* using {MDI}MX(q) macro requires initialization of <dp> */ vector float *rsc; /* will point at om->rf[x] for residue x[i] */ vector float *tsc; /* will point into (and step thru) om->tf */ /* Check that the DP matrix is ok for us. */ if (Q > ox->allocQ4) ESL_EXCEPTION(eslEINVAL, "DP matrix allocated too small"); ox->M = om->M; /* Initialization. */ infv = esl_vmx_set_float(-eslINFINITY); for (q = 0; q < Q; q++) MMXo(q) = IMXo(q) = DMXo(q) = infv; xN = 0.; xB = om->xf[p7O_N][p7O_MOVE]; xE = -eslINFINITY; xJ = -eslINFINITY; xC = -eslINFINITY; #if p7_DEBUGGING if (ox->debugging) p7_omx_DumpFloatRow(ox, FALSE, 0, 5, 2, xE, xN, xJ, xB, xC); /* logify=FALSE, <rowi>=0, width=5, precision=2*/ #endif for (i = 1; i <= L; i++) { rsc = om->rf[dsq[i]]; tsc = om->tf; dcv = infv; xEv = infv; Dmaxv = infv; xBv = esl_vmx_set_float(xB); mpv = vec_sld(infv, MMXo(Q-1), 12); /* Right shifts by 4 bytes. 4,8,12,x becomes x,4,8,12. */ dpv = vec_sld(infv, DMXo(Q-1), 12); ipv = vec_sld(infv, IMXo(Q-1), 12); for (q = 0; q < Q; q++) { /* Calculate new MMXo(i,q); don't store it yet, hold it in sv. */ sv = vec_add(xBv, *tsc); tsc++; sv = vec_max(sv, vec_add(mpv, *tsc)); tsc++; sv = vec_max(sv, vec_add(ipv, *tsc)); tsc++; sv = vec_max(sv, vec_add(dpv, *tsc)); tsc++; sv = vec_add(sv, *rsc); rsc++; xEv = vec_max(xEv, sv); /* Load {MDI}(i-1,q) into mpv, dpv, ipv; * {MDI}MX(q) is then the current, not the prev row */ mpv = MMXo(q); dpv = DMXo(q); ipv = IMXo(q); /* Do the delayed stores of {MD}(i,q) now that memory is usable */ MMXo(q) = sv; DMXo(q) = dcv; /* Calculate the next D(i,q+1) partially: M->D only; * delay storage, holding it in dcv */ dcv = vec_add(sv, *tsc); tsc++; Dmaxv = vec_max(dcv, Dmaxv); /* Calculate and store I(i,q) */ sv = vec_add(mpv, *tsc); tsc++; sv = vec_max(sv, vec_add(ipv, *tsc)); tsc++; IMXo(q) = vec_add(sv, *rsc); rsc++; } /* Now the "special" states, which start from Mk->E (->C, ->J->B) */ xE = esl_vmx_hmax_float(xEv); xN = xN + om->xf[p7O_N][p7O_LOOP]; xC = ESL_MAX(xC + om->xf[p7O_C][p7O_LOOP], xE + om->xf[p7O_E][p7O_MOVE]); xJ = ESL_MAX(xJ + om->xf[p7O_J][p7O_LOOP], xE + om->xf[p7O_E][p7O_LOOP]); xB = ESL_MAX(xJ + om->xf[p7O_J][p7O_MOVE], xN + om->xf[p7O_N][p7O_MOVE]); /* and now xB will carry over into next i, and xC carries over after i=L */ /* Finally the "lazy F" loop (sensu [Farrar07]). We can often * prove that we don't need to evaluate any D->D paths at all. * * The observation is that if we can show that on the next row, * B->M(i+1,k) paths always dominate M->D->...->D->M(i+1,k) paths * for all k, then we don't need any D->D calculations. * * The test condition is: * max_k D(i,k) + max_k ( TDD(k-2) + TDM(k-1) - TBM(k) ) < xB(i) * So: * max_k (TDD(k-2) + TDM(k-1) - TBM(k)) is precalc'ed in om->dd_bound; * max_k D(i,k) is why we tracked Dmaxv; * xB(i) was just calculated above. */ Dmax = esl_vmx_hmax_float(Dmaxv); if (Dmax + om->ddbound_f > xB) { /* Now we're obligated to do at least one complete DD path to be sure. */ /* dcv has carried through from end of q loop above */ dcv = vec_sld(infv, dcv, 12); tsc = om->tf + 7*Q; /* set tsc to start of the DD's */ for (q = 0; q < Q; q++) { DMXo(q) = vec_max(dcv, DMXo(q)); dcv = vec_add(DMXo(q), *tsc); tsc++; } /* We may have to do up to three more passes; the check * is for whether crossing a segment boundary can improve * our score. */ do { dcv = vec_sld(infv, dcv, 12); tsc = om->tf + 7*Q; /* set tsc to start of the DD's */ for (q = 0; q < Q; q++) { if (! vec_any_gt(dcv, DMXo(q))) break; DMXo(q) = vec_max(dcv, DMXo(q)); dcv = vec_add(DMXo(q), *tsc); tsc++; } } while (q == Q); } else { /* not calculating DD? then just store that last MD vector we calc'ed. */ dcv = vec_sld(infv, dcv, 12); DMXo(0) = dcv; } #if p7_DEBUGGING if (ox->debugging) p7_omx_DumpFloatRow(ox, FALSE, i, 5, 2, xE, xN, xJ, xB, xC); /* logify=FALSE, <rowi>=i, width=5, precision=2*/ #endif } /* end loop over sequence residues 1..L */ /* finally C->T */ *ret_sc = xC + om->xf[p7O_C][p7O_MOVE]; return eslOK; }
// CHECK-LABEL: define void @test1 void test1() { /* vec_cmpeq */ res_vbll = vec_cmpeq(vsll, vsll); // CHECK: @llvm.ppc.altivec.vcmpequd // CHECK-LE: @llvm.ppc.altivec.vcmpequd // CHECK-PPC: error: call to 'vec_cmpeq' is ambiguous res_vbll = vec_cmpeq(vull, vull); // CHECK: @llvm.ppc.altivec.vcmpequd // CHECK-LE: @llvm.ppc.altivec.vcmpequd // CHECK-PPC: error: call to 'vec_cmpeq' is ambiguous /* vec_cmpgt */ res_vbll = vec_cmpgt(vsll, vsll); // CHECK: @llvm.ppc.altivec.vcmpgtsd // CHECK-LE: @llvm.ppc.altivec.vcmpgtsd // CHECK-PPC: error: call to 'vec_cmpgt' is ambiguous res_vbll = vec_cmpgt(vull, vull); // CHECK: @llvm.ppc.altivec.vcmpgtud // CHECK-LE: @llvm.ppc.altivec.vcmpgtud // CHECK-PPC: error: call to 'vec_cmpgt' is ambiguous /* ----------------------- predicates --------------------------- */ /* vec_all_eq */ res_i = vec_all_eq(vsll, vsll); // CHECK: @llvm.ppc.altivec.vcmpequd.p // CHECK-LE: @llvm.ppc.altivec.vcmpequd.p // CHECK-PPC: error: call to 'vec_all_eq' is ambiguous res_i = vec_all_eq(vsll, vbll); // CHECK: @llvm.ppc.altivec.vcmpequd.p // CHECK-LE: @llvm.ppc.altivec.vcmpequd.p // CHECK-PPC: error: call to 'vec_all_eq' is ambiguous res_i = vec_all_eq(vull, vull); // CHECK: @llvm.ppc.altivec.vcmpequd.p // CHECK-LE: @llvm.ppc.altivec.vcmpequd.p // CHECK-PPC: error: call to 'vec_all_eq' is ambiguous res_i = vec_all_eq(vull, vbll); // CHECK: @llvm.ppc.altivec.vcmpequd.p // CHECK-LE: @llvm.ppc.altivec.vcmpequd.p // CHECK-PPC: error: call to 'vec_all_eq' is ambiguous res_i = vec_all_eq(vbll, vsll); // CHECK: @llvm.ppc.altivec.vcmpequd.p // CHECK-LE: @llvm.ppc.altivec.vcmpequd.p // CHECK-PPC: error: call to 'vec_all_eq' is ambiguous res_i = vec_all_eq(vbll, vull); // CHECK: @llvm.ppc.altivec.vcmpequd.p // CHECK-LE: @llvm.ppc.altivec.vcmpequd.p // CHECK-PPC: error: call to 'vec_all_eq' is ambiguous res_i = vec_all_eq(vbll, vbll); // CHECK: @llvm.ppc.altivec.vcmpequd.p // CHECK-LE: @llvm.ppc.altivec.vcmpequd.p // CHECK-PPC: error: call to 'vec_all_eq' is ambiguous /* vec_all_ne */ res_i = vec_all_ne(vsll, vsll); // CHECK: @llvm.ppc.altivec.vcmpequd.p // CHECK-LE: @llvm.ppc.altivec.vcmpequd.p // CHECK-PPC: error: call to 'vec_all_ne' is ambiguous res_i = vec_all_ne(vsll, vbll); // CHECK: @llvm.ppc.altivec.vcmpequd.p // CHECK-LE: @llvm.ppc.altivec.vcmpequd.p // CHECK-PPC: error: call to 'vec_all_ne' is ambiguous res_i = vec_all_ne(vull, vull); // CHECK: @llvm.ppc.altivec.vcmpequd.p // CHECK-LE: @llvm.ppc.altivec.vcmpequd.p // CHECK-PPC: error: call to 'vec_all_ne' is ambiguous res_i = vec_all_ne(vull, vbll); // CHECK: @llvm.ppc.altivec.vcmpequd.p // CHECK-LE: @llvm.ppc.altivec.vcmpequd.p // CHECK-PPC: error: call to 'vec_all_ne' is ambiguous res_i = vec_all_ne(vbll, vsll); // CHECK: @llvm.ppc.altivec.vcmpequd.p // CHECK-LE: @llvm.ppc.altivec.vcmpequd.p // CHECK-PPC: error: call to 'vec_all_ne' is ambiguous res_i = vec_all_ne(vbll, vull); // CHECK: @llvm.ppc.altivec.vcmpequd.p // CHECK-LE: @llvm.ppc.altivec.vcmpequd.p // CHECK-PPC: error: call to 'vec_all_ne' is ambiguous res_i = vec_all_ne(vbll, vbll); // CHECK: @llvm.ppc.altivec.vcmpequd.p // CHECK-LE: @llvm.ppc.altivec.vcmpequd.p // CHECK-PPC: error: call to 'vec_all_ne' is ambiguous /* vec_any_eq */ res_i = vec_any_eq(vsll, vsll); // CHECK: @llvm.ppc.altivec.vcmpequd.p // CHECK-LE: @llvm.ppc.altivec.vcmpequd.p // CHECK-PPC: error: call to 'vec_any_eq' is ambiguous res_i = vec_any_eq(vsll, vbll); // CHECK: @llvm.ppc.altivec.vcmpequd.p // CHECK-LE: @llvm.ppc.altivec.vcmpequd.p // CHECK-PPC: error: call to 'vec_any_eq' is ambiguous res_i = vec_any_eq(vull, vull); // CHECK: @llvm.ppc.altivec.vcmpequd.p // CHECK-LE: @llvm.ppc.altivec.vcmpequd.p // CHECK-PPC: error: call to 'vec_any_eq' is ambiguous res_i = vec_any_eq(vull, vbll); // CHECK: @llvm.ppc.altivec.vcmpequd.p // CHECK-LE: @llvm.ppc.altivec.vcmpequd.p // CHECK-PPC: error: call to 'vec_any_eq' is ambiguous res_i = vec_any_eq(vbll, vsll); // CHECK: @llvm.ppc.altivec.vcmpequd.p // CHECK-LE: @llvm.ppc.altivec.vcmpequd.p // CHECK-PPC: error: call to 'vec_any_eq' is ambiguous res_i = vec_any_eq(vbll, vull); // CHECK: @llvm.ppc.altivec.vcmpequd.p // CHECK-LE: @llvm.ppc.altivec.vcmpequd.p // CHECK-PPC: error: call to 'vec_any_eq' is ambiguous res_i = vec_any_eq(vbll, vbll); // CHECK: @llvm.ppc.altivec.vcmpequd.p // CHECK-LE: @llvm.ppc.altivec.vcmpequd.p // CHECK-PPC: error: call to 'vec_any_eq' is ambiguous /* vec_any_ne */ res_i = vec_any_ne(vsll, vsll); // CHECK: @llvm.ppc.altivec.vcmpequd.p // CHECK-LE: @llvm.ppc.altivec.vcmpequd.p // CHECK-PPC: error: call to 'vec_any_ne' is ambiguous res_i = vec_any_ne(vsll, vbll); // CHECK: @llvm.ppc.altivec.vcmpequd.p // CHECK-LE: @llvm.ppc.altivec.vcmpequd.p // CHECK-PPC: error: call to 'vec_any_ne' is ambiguous res_i = vec_any_ne(vull, vull); // CHECK: @llvm.ppc.altivec.vcmpequd.p // CHECK-LE: @llvm.ppc.altivec.vcmpequd.p // CHECK-PPC: error: call to 'vec_any_ne' is ambiguous res_i = vec_any_ne(vull, vbll); // CHECK: @llvm.ppc.altivec.vcmpequd.p // CHECK-LE: @llvm.ppc.altivec.vcmpequd.p // CHECK-PPC: error: call to 'vec_any_ne' is ambiguous res_i = vec_any_ne(vbll, vsll); // CHECK: @llvm.ppc.altivec.vcmpequd.p // CHECK-LE: @llvm.ppc.altivec.vcmpequd.p // CHECK-PPC: error: call to 'vec_any_ne' is ambiguous res_i = vec_any_ne(vbll, vull); // CHECK: @llvm.ppc.altivec.vcmpequd.p // CHECK-LE: @llvm.ppc.altivec.vcmpequd.p // CHECK-PPC: error: call to 'vec_any_ne' is ambiguous res_i = vec_any_ne(vbll, vbll); // CHECK: @llvm.ppc.altivec.vcmpequd.p // CHECK-LE: @llvm.ppc.altivec.vcmpequd.p // CHECK-PPC: error: call to 'vec_any_ne' is ambiguous /* vec_all_ge */ res_i = vec_all_ge(vsll, vsll); // CHECK: @llvm.ppc.altivec.vcmpgtsd.p // CHECK-LE: @llvm.ppc.altivec.vcmpgtsd.p // CHECK-PPC: error: call to 'vec_all_ge' is ambiguous res_i = vec_all_ge(vsll, vbll); // CHECK: @llvm.ppc.altivec.vcmpgtsd.p // CHECK-LE: @llvm.ppc.altivec.vcmpgtsd.p // CHECK-PPC: error: call to 'vec_all_ge' is ambiguous res_i = vec_all_ge(vull, vull); // CHECK: @llvm.ppc.altivec.vcmpgtud.p // CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p // CHECK-PPC: error: call to 'vec_all_ge' is ambiguous res_i = vec_all_ge(vull, vbll); // CHECK: @llvm.ppc.altivec.vcmpgtud.p // CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p // CHECK-PPC: error: call to 'vec_all_ge' is ambiguous res_i = vec_all_ge(vbll, vsll); // CHECK: @llvm.ppc.altivec.vcmpgtud.p // CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p // CHECK-PPC: error: call to 'vec_all_ge' is ambiguous res_i = vec_all_ge(vbll, vull); // CHECK: @llvm.ppc.altivec.vcmpgtud.p // CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p // CHECK-PPC: error: call to 'vec_all_ge' is ambiguous res_i = vec_all_ge(vbll, vbll); // CHECK: @llvm.ppc.altivec.vcmpgtud.p // CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p // CHECK-PPC: error: call to 'vec_all_ge' is ambiguous /* vec_all_gt */ res_i = vec_all_gt(vsll, vsll); // CHECK: @llvm.ppc.altivec.vcmpgtsd.p // CHECK-LE: @llvm.ppc.altivec.vcmpgtsd.p // CHECK-PPC: error: call to 'vec_all_gt' is ambiguous res_i = vec_all_gt(vsll, vbll); // CHECK: @llvm.ppc.altivec.vcmpgtsd.p // CHECK-LE: @llvm.ppc.altivec.vcmpgtsd.p // CHECK-PPC: error: call to 'vec_all_gt' is ambiguous res_i = vec_all_gt(vull, vull); // CHECK: @llvm.ppc.altivec.vcmpgtud.p // CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p // CHECK-PPC: error: call to 'vec_all_gt' is ambiguous res_i = vec_all_gt(vull, vbll); // CHECK: @llvm.ppc.altivec.vcmpgtud.p // CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p // CHECK-PPC: error: call to 'vec_all_gt' is ambiguous res_i = vec_all_gt(vbll, vsll); // CHECK: @llvm.ppc.altivec.vcmpgtud.p // CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p // CHECK-PPC: error: call to 'vec_all_gt' is ambiguous res_i = vec_all_gt(vbll, vull); // CHECK: @llvm.ppc.altivec.vcmpgtud.p // CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p // CHECK-PPC: error: call to 'vec_all_gt' is ambiguous res_i = vec_all_gt(vbll, vbll); // CHECK: @llvm.ppc.altivec.vcmpgtud.p // CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p // CHECK-PPC: error: call to 'vec_all_gt' is ambiguous /* vec_all_le */ res_i = vec_all_le(vsll, vsll); // CHECK: @llvm.ppc.altivec.vcmpgtsd.p // CHECK-LE: @llvm.ppc.altivec.vcmpgtsd.p // CHECK-PPC: error: call to 'vec_all_le' is ambiguous res_i = vec_all_le(vsll, vbll); // CHECK: @llvm.ppc.altivec.vcmpgtsd.p // CHECK-LE: @llvm.ppc.altivec.vcmpgtsd.p // CHECK-PPC: error: call to 'vec_all_le' is ambiguous res_i = vec_all_le(vull, vull); // CHECK: @llvm.ppc.altivec.vcmpgtud.p // CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p // CHECK-PPC: error: call to 'vec_all_le' is ambiguous res_i = vec_all_le(vull, vbll); // CHECK: @llvm.ppc.altivec.vcmpgtud.p // CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p // CHECK-PPC: error: call to 'vec_all_le' is ambiguous res_i = vec_all_le(vbll, vsll); // CHECK: @llvm.ppc.altivec.vcmpgtud.p // CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p // CHECK-PPC: error: call to 'vec_all_le' is ambiguous res_i = vec_all_le(vbll, vull); // CHECK: @llvm.ppc.altivec.vcmpgtud.p // CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p // CHECK-PPC: error: call to 'vec_all_le' is ambiguous res_i = vec_all_le(vbll, vbll); // CHECK: @llvm.ppc.altivec.vcmpgtud.p // CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p // CHECK-PPC: error: call to 'vec_all_le' is ambiguous /* vec_all_lt */ res_i = vec_all_lt(vsll, vsll); // CHECK: @llvm.ppc.altivec.vcmpgtsd.p // CHECK-LE: @llvm.ppc.altivec.vcmpgtsd.p // CHECK-PPC: error: call to 'vec_all_lt' is ambiguous res_i = vec_all_lt(vsll, vbll); // CHECK: @llvm.ppc.altivec.vcmpgtsd.p // CHECK-LE: @llvm.ppc.altivec.vcmpgtsd.p // CHECK-PPC: error: call to 'vec_all_lt' is ambiguous res_i = vec_all_lt(vull, vull); // CHECK: @llvm.ppc.altivec.vcmpgtud.p // CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p // CHECK-PPC: error: call to 'vec_all_lt' is ambiguous res_i = vec_all_lt(vull, vbll); // CHECK: @llvm.ppc.altivec.vcmpgtud.p // CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p // CHECK-PPC: error: call to 'vec_all_lt' is ambiguous res_i = vec_all_lt(vbll, vsll); // CHECK: @llvm.ppc.altivec.vcmpgtud.p // CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p // CHECK-PPC: error: call to 'vec_all_lt' is ambiguous res_i = vec_all_lt(vbll, vull); // CHECK: @llvm.ppc.altivec.vcmpgtud.p // CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p // CHECK-PPC: error: call to 'vec_all_lt' is ambiguous res_i = vec_all_lt(vbll, vbll); // CHECK: @llvm.ppc.altivec.vcmpgtud.p // CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p // CHECK-PPC: error: call to 'vec_all_lt' is ambiguous /* vec_any_ge */ res_i = vec_any_ge(vsll, vsll); // CHECK: @llvm.ppc.altivec.vcmpgtsd.p // CHECK-LE: @llvm.ppc.altivec.vcmpgtsd.p // CHECK-PPC: error: call to 'vec_any_ge' is ambiguous res_i = vec_any_ge(vsll, vbll); // CHECK: @llvm.ppc.altivec.vcmpgtsd.p // CHECK-LE: @llvm.ppc.altivec.vcmpgtsd.p // CHECK-PPC: error: call to 'vec_any_ge' is ambiguous res_i = vec_any_ge(vull, vull); // CHECK: @llvm.ppc.altivec.vcmpgtud.p // CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p // CHECK-PPC: error: call to 'vec_any_ge' is ambiguous res_i = vec_any_ge(vull, vbll); // CHECK: @llvm.ppc.altivec.vcmpgtud.p // CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p // CHECK-PPC: error: call to 'vec_any_ge' is ambiguous res_i = vec_any_ge(vbll, vsll); // CHECK: @llvm.ppc.altivec.vcmpgtud.p // CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p // CHECK-PPC: error: call to 'vec_any_ge' is ambiguous res_i = vec_any_ge(vbll, vull); // CHECK: @llvm.ppc.altivec.vcmpgtud.p // CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p // CHECK-PPC: error: call to 'vec_any_ge' is ambiguous res_i = vec_any_ge(vbll, vbll); // CHECK: @llvm.ppc.altivec.vcmpgtud.p // CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p // CHECK-PPC: error: call to 'vec_any_ge' is ambiguous /* vec_any_gt */ res_i = vec_any_gt(vsll, vsll); // CHECK: @llvm.ppc.altivec.vcmpgtsd.p // CHECK-LE: @llvm.ppc.altivec.vcmpgtsd.p // CHECK-PPC: error: call to 'vec_any_gt' is ambiguous res_i = vec_any_gt(vsll, vbll); // CHECK: @llvm.ppc.altivec.vcmpgtsd.p // CHECK-LE: @llvm.ppc.altivec.vcmpgtsd.p // CHECK-PPC: error: call to 'vec_any_gt' is ambiguous res_i = vec_any_gt(vull, vull); // CHECK: @llvm.ppc.altivec.vcmpgtud.p // CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p // CHECK-PPC: error: call to 'vec_any_gt' is ambiguous res_i = vec_any_gt(vull, vbll); // CHECK: @llvm.ppc.altivec.vcmpgtud.p // CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p // CHECK-PPC: error: call to 'vec_any_gt' is ambiguous res_i = vec_any_gt(vbll, vsll); // CHECK: @llvm.ppc.altivec.vcmpgtud.p // CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p // CHECK-PPC: error: call to 'vec_any_gt' is ambiguous res_i = vec_any_gt(vbll, vull); // CHECK: @llvm.ppc.altivec.vcmpgtud.p // CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p // CHECK-PPC: error: call to 'vec_any_gt' is ambiguous res_i = vec_any_gt(vbll, vbll); // CHECK: @llvm.ppc.altivec.vcmpgtud.p // CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p // CHECK-PPC: error: call to 'vec_any_gt' is ambiguous /* vec_any_le */ res_i = vec_any_le(vsll, vsll); // CHECK: @llvm.ppc.altivec.vcmpgtsd.p // CHECK-LE: @llvm.ppc.altivec.vcmpgtsd.p // CHECK-PPC: error: call to 'vec_any_le' is ambiguous res_i = vec_any_le(vsll, vbll); // CHECK: @llvm.ppc.altivec.vcmpgtsd.p // CHECK-LE: @llvm.ppc.altivec.vcmpgtsd.p // CHECK-PPC: error: call to 'vec_any_le' is ambiguous res_i = vec_any_le(vull, vull); // CHECK: @llvm.ppc.altivec.vcmpgtud.p // CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p // CHECK-PPC: error: call to 'vec_any_le' is ambiguous res_i = vec_any_le(vull, vbll); // CHECK: @llvm.ppc.altivec.vcmpgtud.p // CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p // CHECK-PPC: error: call to 'vec_any_le' is ambiguous res_i = vec_any_le(vbll, vsll); // CHECK: @llvm.ppc.altivec.vcmpgtud.p // CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p // CHECK-PPC: error: call to 'vec_any_le' is ambiguous res_i = vec_any_le(vbll, vull); // CHECK: @llvm.ppc.altivec.vcmpgtud.p // CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p // CHECK-PPC: error: call to 'vec_any_le' is ambiguous res_i = vec_any_le(vbll, vbll); // CHECK: @llvm.ppc.altivec.vcmpgtud.p // CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p // CHECK-PPC: error: call to 'vec_any_le' is ambiguous /* vec_any_lt */ res_i = vec_any_lt(vsll, vsll); // CHECK: @llvm.ppc.altivec.vcmpgtsd.p // CHECK-LE: @llvm.ppc.altivec.vcmpgtsd.p // CHECK-PPC: error: call to 'vec_any_lt' is ambiguous res_i = vec_any_lt(vsll, vbll); // CHECK: @llvm.ppc.altivec.vcmpgtsd.p // CHECK-LE: @llvm.ppc.altivec.vcmpgtsd.p // CHECK-PPC: error: call to 'vec_any_lt' is ambiguous res_i = vec_any_lt(vull, vull); // CHECK: @llvm.ppc.altivec.vcmpgtud.p // CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p // CHECK-PPC: error: call to 'vec_any_lt' is ambiguous res_i = vec_any_lt(vull, vbll); // CHECK: @llvm.ppc.altivec.vcmpgtud.p // CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p // CHECK-PPC: error: call to 'vec_any_lt' is ambiguous res_i = vec_any_lt(vbll, vsll); // CHECK: @llvm.ppc.altivec.vcmpgtud.p // CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p // CHECK-PPC: error: call to 'vec_any_lt' is ambiguous res_i = vec_any_lt(vbll, vull); // CHECK: @llvm.ppc.altivec.vcmpgtud.p // CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p // CHECK-PPC: error: call to 'vec_any_lt' is ambiguous res_i = vec_any_lt(vbll, vbll); // CHECK: @llvm.ppc.altivec.vcmpgtud.p // CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p // CHECK-PPC: error: call to 'vec_any_lt' is ambiguous /* vec_max */ res_vsll = vec_max(vsll, vsll); // CHECK: @llvm.ppc.altivec.vmaxsd // CHECK-LE: @llvm.ppc.altivec.vmaxsd // CHECK-PPC: error: call to 'vec_max' is ambiguous res_vsll = vec_max(vbll, vsll); // CHECK: @llvm.ppc.altivec.vmaxsd // CHECK-LE: @llvm.ppc.altivec.vmaxsd // CHECK-PPC: error: call to 'vec_max' is ambiguous res_vsll = vec_max(vsll, vbll); // CHECK: @llvm.ppc.altivec.vmaxsd // CHECK-LE: @llvm.ppc.altivec.vmaxsd // CHECK-PPC: error: call to 'vec_max' is ambiguous res_vull = vec_max(vull, vull); // CHECK: @llvm.ppc.altivec.vmaxud // CHECK-LE: @llvm.ppc.altivec.vmaxud // CHECK-PPC: error: call to 'vec_max' is ambiguous res_vull = vec_max(vbll, vull); // CHECK: @llvm.ppc.altivec.vmaxud // CHECK-LE: @llvm.ppc.altivec.vmaxud // CHECK-PPC: error: call to 'vec_max' is ambiguous res_vull = vec_max(vull, vbll); // CHECK: @llvm.ppc.altivec.vmaxud // CHECK-LE: @llvm.ppc.altivec.vmaxud // CHECK-PPC: error: call to 'vec_max' is ambiguous /* vec_min */ res_vsll = vec_min(vsll, vsll); // CHECK: @llvm.ppc.altivec.vminsd // CHECK-LE: @llvm.ppc.altivec.vminsd // CHECK-PPC: error: call to 'vec_min' is ambiguous res_vsll = vec_min(vbll, vsll); // CHECK: @llvm.ppc.altivec.vminsd // CHECK-LE: @llvm.ppc.altivec.vminsd // CHECK-PPC: error: call to 'vec_min' is ambiguous res_vsll = vec_min(vsll, vbll); // CHECK: @llvm.ppc.altivec.vminsd // CHECK-LE: @llvm.ppc.altivec.vminsd // CHECK-PPC: error: call to 'vec_min' is ambiguous res_vull = vec_min(vull, vull); // CHECK: @llvm.ppc.altivec.vminud // CHECK-LE: @llvm.ppc.altivec.vminud // CHECK-PPC: error: call to 'vec_min' is ambiguous res_vull = vec_min(vbll, vull); // CHECK: @llvm.ppc.altivec.vminud // CHECK-LE: @llvm.ppc.altivec.vminud // CHECK-PPC: error: call to 'vec_min' is ambiguous res_vull = vec_min(vull, vbll); // CHECK: @llvm.ppc.altivec.vminud // CHECK-LE: @llvm.ppc.altivec.vminud // CHECK-PPC: error: call to 'vec_min' is ambiguous /* vec_mule */ res_vsll = vec_mule(vi, vi); // CHECK: @llvm.ppc.altivec.vmulesw // CHECK-LE: @llvm.ppc.altivec.vmulosw // CHECK-PPC: error: call to 'vec_mule' is ambiguous res_vull = vec_mule(vui , vui); // CHECK: @llvm.ppc.altivec.vmuleuw // CHECK-LE: @llvm.ppc.altivec.vmulouw // CHECK-PPC: error: call to 'vec_mule' is ambiguous /* vec_mulo */ res_vsll = vec_mulo(vi, vi); // CHECK: @llvm.ppc.altivec.vmulosw // CHECK-LE: @llvm.ppc.altivec.vmulesw // CHECK-PPC: error: call to 'vec_mulo' is ambiguous res_vull = vec_mulo(vui, vui); // CHECK: @llvm.ppc.altivec.vmulouw // CHECK-LE: @llvm.ppc.altivec.vmuleuw // CHECK-PPC: error: call to 'vec_mulo' is ambiguous /* vec_packs */ res_vi = vec_packs(vsll, vsll); // CHECK: @llvm.ppc.altivec.vpksdss // CHECK-LE: @llvm.ppc.altivec.vpksdss // CHECK-PPC: error: call to 'vec_packs' is ambiguous res_vui = vec_packs(vull, vull); // CHECK: @llvm.ppc.altivec.vpkudus // CHECK-LE: @llvm.ppc.altivec.vpkudus // CHECK-PPC: error: call to 'vec_packs' is ambiguous /* vec_packsu */ res_vui = vec_packsu(vsll, vsll); // CHECK: @llvm.ppc.altivec.vpksdus // CHECK-LE: @llvm.ppc.altivec.vpksdus // CHECK-PPC: error: call to 'vec_packsu' is ambiguous res_vui = vec_packsu(vull, vull); // CHECK: @llvm.ppc.altivec.vpkudus // CHECK-LE: @llvm.ppc.altivec.vpkudus // CHECK-PPC: error: call to 'vec_packsu' is ambiguous /* vec_rl */ res_vsll = vec_rl(vsll, vull); // CHECK: @llvm.ppc.altivec.vrld // CHECK-LE: @llvm.ppc.altivec.vrld // CHECK-PPC: error: call to 'vec_rl' is ambiguous res_vull = vec_rl(vull, vull); // CHECK: @llvm.ppc.altivec.vrld // CHECK-LE: @llvm.ppc.altivec.vrld // CHECK-PPC: error: call to 'vec_rl' is ambiguous /* vec_sl */ res_vsll = vec_sl(vsll, vull); // CHECK: shl <2 x i64> // CHECK-LE: shl <2 x i64> // CHECK-PPC: error: call to 'vec_sl' is ambiguous res_vull = vec_sl(vull, vull); // CHECK: shl <2 x i64> // CHECK-LE: shl <2 x i64> // CHECK-PPC: error: call to 'vec_sl' is ambiguous /* vec_sr */ res_vsll = vec_sr(vsll, vull); // CHECK: ashr <2 x i64> // CHECK-LE: ashr <2 x i64> // CHECK-PPC: error: call to 'vec_sr' is ambiguous res_vull = vec_sr(vull, vull); // CHECK: lshr <2 x i64> // CHECK-LE: lshr <2 x i64> // CHECK-PPC: error: call to 'vec_sr' is ambiguous /* vec_sra */ res_vsll = vec_sra(vsll, vull); // CHECK: ashr <2 x i64> // CHECK-LE: ashr <2 x i64> // CHECK-PPC: error: call to 'vec_sra' is ambiguous res_vull = vec_sra(vull, vull); // CHECK: ashr <2 x i64> // CHECK-LE: ashr <2 x i64> // CHECK-PPC: error: call to 'vec_sra' is ambiguous /* vec_unpackh */ res_vsll = vec_unpackh(vi); // CHECK: llvm.ppc.altivec.vupkhsw // CHECK-LE: llvm.ppc.altivec.vupklsw // CHECK-PPC: error: call to 'vec_unpackh' is ambiguous res_vbll = vec_unpackh(vbi); // CHECK: llvm.ppc.altivec.vupkhsw // CHECK-LE: llvm.ppc.altivec.vupklsw // CHECK-PPC: error: call to 'vec_unpackh' is ambiguous /* vec_unpackl */ res_vsll = vec_unpackl(vi); // CHECK: llvm.ppc.altivec.vupklsw // CHECK-LE: llvm.ppc.altivec.vupkhsw // CHECK-PPC: error: call to 'vec_unpackl' is ambiguous res_vbll = vec_unpackl(vbi); // CHECK: llvm.ppc.altivec.vupklsw // CHECK-LE: llvm.ppc.altivec.vupkhsw // CHECK-PPC: error: call to 'vec_unpackl' is ambiguous /* vec_vpksdss */ res_vi = vec_vpksdss(vsll, vsll); // CHECK: llvm.ppc.altivec.vpksdss // CHECK-LE: llvm.ppc.altivec.vpksdss // CHECK-PPC: warning: implicit declaration of function 'vec_vpksdss' /* vec_vpksdus */ res_vui = vec_vpksdus(vsll, vsll); // CHECK: llvm.ppc.altivec.vpksdus // CHECK-LE: llvm.ppc.altivec.vpksdus // CHECK-PPC: warning: implicit declaration of function 'vec_vpksdus' /* vec_vpkudum */ res_vi = vec_vpkudum(vsll, vsll); // CHECK: vperm // CHECK-LE: vperm // CHECK-PPC: warning: implicit declaration of function 'vec_vpkudum' res_vui = vec_vpkudum(vull, vull); // CHECK: vperm // CHECK-LE: vperm res_vui = vec_vpkudus(vull, vull); // CHECK: llvm.ppc.altivec.vpkudus // CHECK-LE: llvm.ppc.altivec.vpkudus // CHECK-PPC: warning: implicit declaration of function 'vec_vpkudus' /* vec_vupkhsw */ res_vsll = vec_vupkhsw(vi); // CHECK: llvm.ppc.altivec.vupkhsw // CHECK-LE: llvm.ppc.altivec.vupklsw // CHECK-PPC: warning: implicit declaration of function 'vec_vupkhsw' res_vbll = vec_vupkhsw(vbi); // CHECK: llvm.ppc.altivec.vupkhsw // CHECK-LE: llvm.ppc.altivec.vupklsw /* vec_vupklsw */ res_vsll = vec_vupklsw(vi); // CHECK: llvm.ppc.altivec.vupklsw // CHECK-LE: llvm.ppc.altivec.vupkhsw // CHECK-PPC: warning: implicit declaration of function 'vec_vupklsw' res_vbll = vec_vupklsw(vbi); // CHECK: llvm.ppc.altivec.vupklsw // CHECK-LE: llvm.ppc.altivec.vupkhsw /* vec_max */ res_vsll = vec_max(vsll, vsll); // CHECK: @llvm.ppc.altivec.vmaxsd // CHECK-LE: @llvm.ppc.altivec.vmaxsd res_vsll = vec_max(vbll, vsll); // CHECK: @llvm.ppc.altivec.vmaxsd // CHECK-LE: @llvm.ppc.altivec.vmaxsd res_vsll = vec_max(vsll, vbll); // CHECK: @llvm.ppc.altivec.vmaxsd // CHECK-LE: @llvm.ppc.altivec.vmaxsd res_vull = vec_max(vull, vull); // CHECK: @llvm.ppc.altivec.vmaxud // CHECK-LE: @llvm.ppc.altivec.vmaxud res_vull = vec_max(vbll, vull); // CHECK: @llvm.ppc.altivec.vmaxud // CHECK-LE: @llvm.ppc.altivec.vmaxud /* vec_min */ res_vsll = vec_min(vsll, vsll); // CHECK: @llvm.ppc.altivec.vminsd // CHECK-LE: @llvm.ppc.altivec.vminsd res_vsll = vec_min(vbll, vsll); // CHECK: @llvm.ppc.altivec.vminsd // CHECK-LE: @llvm.ppc.altivec.vminsd res_vsll = vec_min(vsll, vbll); // CHECK: @llvm.ppc.altivec.vminsd // CHECK-LE: @llvm.ppc.altivec.vminsd res_vull = vec_min(vull, vull); // CHECK: @llvm.ppc.altivec.vminud // CHECK-LE: @llvm.ppc.altivec.vminud res_vull = vec_min(vbll, vull); // CHECK: @llvm.ppc.altivec.vminud // CHECK-LE: @llvm.ppc.altivec.vminud }
/* Function: p7_MSVFilter() * Synopsis: Calculates MSV score, vewy vewy fast, in limited precision. * Incept: SRE, Wed Dec 26 15:12:25 2007 [Janelia] * * Purpose: Calculates an approximation of the MSV score for sequence * <dsq> of length <L> residues, using optimized profile <om>, * and a preallocated one-row DP matrix <ox>. Return the * estimated MSV score (in nats) in <ret_sc>. * * Score may overflow (and will, on high-scoring * sequences), but will not underflow. * * The model may be in any mode, because only its match * emission scores will be used. The MSV filter inherently * assumes a multihit local mode, and uses its own special * state transition scores, not the scores in the profile. * * Args: dsq - digital target sequence, 1..L * L - length of dsq in residues * om - optimized profile * ox - DP matrix * ret_sc - RETURN: MSV score (in nats) * * Note: We misuse the matrix <ox> here, using only a third of the * first dp row, accessing it as <dp[0..Q-1]> rather than * in triplets via <{MDI}MX(q)> macros, since we only need * to store M state values. We know that if <ox> was big * enough for normal DP calculations, it must be big enough * to hold the MSVFilter calculation. * * Returns: <eslOK> on success. * <eslERANGE> if the score overflows the limited range; in * this case, this is a high-scoring hit. * * Throws: <eslEINVAL> if <ox> allocation is too small. */ int p7_MSVFilter(const ESL_DSQ *dsq, int L, const P7_OPROFILE *om, P7_OMX *ox, float *ret_sc) { vector unsigned char mpv; /* previous row values */ vector unsigned char xEv; /* E state: keeps max for Mk->E as we go */ vector unsigned char xBv; /* B state: splatted vector of B[i-1] for B->Mk calculations */ vector unsigned char sv; /* temp storage of 1 curr row value in progress */ vector unsigned char biasv; /* emission bias in a vector */ uint8_t xJ; /* special states' scores */ int i; /* counter over sequence positions 1..L */ int q; /* counter over vectors 0..nq-1 */ int Q = p7O_NQB(om->M); /* segment length: # of vectors */ vector unsigned char *dp; /* we're going to use dp[0][0..q..Q-1], not {MDI}MX(q) macros*/ vector unsigned char *rsc; /* will point at om->rbv[x] for residue x[i] */ vector unsigned char zerov; /* vector of zeros */ vector unsigned char xJv; /* vector for states score */ vector unsigned char tjbmv; /* vector for B->Mk cost */ vector unsigned char tecv; /* vector for E->C cost */ vector unsigned char basev; /* offset for scores */ vector unsigned char ceilingv; /* saturateed simd value used to test for overflow */ vector unsigned char tempv; /* Check that the DP matrix is ok for us. */ if (Q > ox->allocQ16) ESL_EXCEPTION(eslEINVAL, "DP matrix allocated too small"); ox->M = om->M; /* Initialization. In offset unsigned arithmetic, -infinity is 0, and 0 is om->base. */ dp = ox->dpb[0]; for (q = 0; q < Q; q++) dp[q] = vec_splat_u8(0); xJ = 0; biasv = esl_vmx_set_u8(om->bias_b); zerov = vec_splat_u8(0); /* saturate simd register for overflow test */ tempv = vec_splat_u8(1); ceilingv = (vector unsigned char)vec_cmpeq(biasv, biasv); ceilingv = vec_subs(ceilingv, biasv); ceilingv = vec_subs(ceilingv, tempv); basev = esl_vmx_set_u8((int8_t) om->base_b); tecv = esl_vmx_set_u8((int8_t) om->tec_b); tjbmv = esl_vmx_set_u8((int8_t) om->tjb_b + (int8_t) om->tbm_b); xJv = vec_subs(biasv, biasv); xBv = vec_subs(basev, tjbmv); #if p7_DEBUGGING if (ox->debugging) { unsigned char xB; vec_ste(xBv, 0, &xB); vec_ste(xJv, 0, &xJ); p7_omx_DumpMFRow(ox, 0, 0, 0, xJ, xB, xJ); } #endif for (i = 1; i <= L; i++) { rsc = om->rbv[dsq[i]]; xEv = vec_splat_u8(0); // xBv = vec_sub(xBv, tbmv); /* Right shifts by 1 byte. 4,8,12,x becomes x,4,8,12. * Because ia32 is littlendian, this means a left bit shift. * Zeros shift on automatically, which is our -infinity. */ mpv = vec_sld(zerov, dp[Q-1], 15); for (q = 0; q < Q; q++) { /* Calculate new MMXo(i,q); don't store it yet, hold it in sv. */ sv = vec_max(mpv, xBv); sv = vec_adds(sv, biasv); sv = vec_subs(sv, *rsc); rsc++; xEv = vec_max(xEv, sv); mpv = dp[q]; /* Load {MDI}(i-1,q) into mpv */ dp[q] = sv; /* Do delayed store of M(i,q) now that memory is usable */ } /* Now the "special" states, which start from Mk->E (->C, ->J->B) * Use rotates instead of shifts so when the last max has completed, * all elements of the simd register will contain the max value. */ tempv = vec_sld(xEv, xEv, 1); xEv = vec_max(xEv, tempv); tempv = vec_sld(xEv, xEv, 2); xEv = vec_max(xEv, tempv); tempv = vec_sld(xEv, xEv, 4); xEv = vec_max(xEv, tempv); tempv = vec_sld(xEv, xEv, 8); xEv = vec_max(xEv, tempv); /* immediately detect overflow */ if (vec_any_gt(xEv, ceilingv)) { *ret_sc = eslINFINITY; return eslERANGE; } xEv = vec_subs(xEv, tecv); xJv = vec_max(xJv,xEv); xBv = vec_max(basev, xJv); xBv = vec_subs(xBv, tjbmv); #if p7_DEBUGGING if (ox->debugging) { unsigned char xB, xE; vec_ste(xBv, 0, &xB); vec_ste(xEv, 0, &xE); vec_ste(xJv, 0, &xJ); p7_omx_DumpMFRow(ox, i, xE, 0, xJ, xB, xJ); } #endif } /* end loop over sequence residues 1..L */ /* finally C->T, and add our missing precision on the NN,CC,JJ back */ vec_ste(xJv, 0, &xJ); *ret_sc = ((float) (xJ - om->tjb_b) - (float) om->base_b); *ret_sc /= om->scale_b; *ret_sc -= 3.0; /* that's ~ L \log \frac{L}{L+3}, for our NN,CC,JJ */ return eslOK; }
/* Function: p7_SSVFilter_longtarget() * Synopsis: Finds windows with SSV scores above some threshold (vewy vewy fast, in limited precision) * * Purpose: Calculates an approximation of the SSV (single ungapped diagonal) * score for regions of sequence <dsq> of length <L> residues, using * optimized profile <om>, and a preallocated one-row DP matrix <ox>, * and captures the positions at which such regions exceed the score * required to be significant in the eyes of the calling function, * which depends on the <bg> and <p> (usually p=0.02 for nhmmer). * Note that this variant performs only SSV computations, never * passing through the J state - the score required to pass SSV at * the default threshold (or less restrictive) is sufficient to * pass MSV in essentially all DNA models we've tested. * * Above-threshold diagonals are captured into a preallocated list * <windowlist>. Rather than simply capturing positions at which a * score threshold is reached, this function establishes windows * around those high-scoring positions, using scores in <msvdata>. * These windows can be merged by the calling function. * * * Args: dsq - digital target sequence, 1..L * L - length of dsq in residues * om - optimized profile * ox - DP matrix * msvdata - compact representation of substitution scores, for backtracking diagonals * bg - the background model, required for translating a P-value threshold into a score threshold * P - p-value below which a region is captured as being above threshold * windowlist - preallocated container for all hits (resized if necessary) * * * Note: We misuse the matrix <ox> here, using only a third of the * first dp row, accessing it as <dp[0..Q-1]> rather than * in triplets via <{MDI}MX(q)> macros, since we only need * to store M state values. We know that if <ox> was big * enough for normal DP calculations, it must be big enough * to hold the MSVFilter calculation. * * Returns: <eslOK> on success. * * Throws: <eslEINVAL> if <ox> allocation is too small. */ int p7_SSVFilter_longtarget(const ESL_DSQ *dsq, int L, P7_OPROFILE *om, P7_OMX *ox, const P7_SCOREDATA *ssvdata, P7_BG *bg, double P, P7_HMM_WINDOWLIST *windowlist) { vector unsigned char mpv; /* previous row values */ vector unsigned char xEv; /* E state: keeps max for Mk->E as we go */ vector unsigned char xBv; /* B state: splatted vector of B[i-1] for B->Mk calculations */ vector unsigned char sv; /* temp storage of 1 curr row value in progress */ vector unsigned char biasv; /* emission bias in a vector */ uint8_t xJ; /* special states' scores */ int i; /* counter over sequence positions 1..L */ int q; /* counter over vectors 0..nq-1 */ int Q = p7O_NQB(om->M); /* segment length: # of vectors */ vector unsigned char *dp = ox->dpb[0]; /* we're going to use dp[0][0..q..Q-1], not {MDI}MX(q) macros*/ vector unsigned char *rsc; /* will point at om->rbv[x] for residue x[i] */ vector unsigned char zerov; /* vector of zeros */ vector unsigned char tecv; /* vector for E->C cost */ vector unsigned char tjbmv; /* vector for [JN]->B->M move cost */ vector unsigned char basev; /* offset for scores */ int status; int k; int n; int end; int rem_sc; int start; int target_end; int target_start; int max_end; int max_sc; int sc; int pos_since_max; float ret_sc; union { vector unsigned char v; uint8_t b[16]; } u; /* * Computing the score required to let P meet the F1 prob threshold * In original code, converting from a scaled int MSV * score S (the score getting to state E) to a probability goes like this: * usc = S - om->tec_b - om->tjb_b - om->base_b; * usc /= om->scale_b; * usc -= 3.0; * P = f ( (usc - nullsc) / eslCONST_LOG2 , mu, lambda) * and we're computing the threshold usc, so reverse it: * (usc - nullsc) / eslCONST_LOG2 = inv_f( P, mu, lambda) * usc = nullsc + eslCONST_LOG2 * inv_f( P, mu, lambda) * usc += 3 * usc *= om->scale_b * S = usc + om->tec_b + om->tjb_b + om->base_b * * Here, I compute threshold with length model based on max_length. Doesn't * matter much - in any case, both the bg and om models will change with roughly * 1 bit for each doubling of the length model, so they offset. */ float nullsc; float invP = esl_gumbel_invsurv(P, om->evparam[p7_MMU], om->evparam[p7_MLAMBDA]); vector unsigned char sc_threshv; /* pushes value to saturation if it's above pthresh */ int sc_thresh; /* Check that the DP matrix is ok for us. */ if (Q > ox->allocQ16) ESL_EXCEPTION(eslEINVAL, "DP matrix allocated too small"); ox->M = om->M; p7_bg_SetLength(bg, om->max_length); p7_oprofile_ReconfigMSVLength(om, om->max_length); p7_bg_NullOne (bg, dsq, om->max_length, &nullsc); sc_thresh = (int) ceil( ( ( nullsc + (invP * eslCONST_LOG2) + 3.0 ) * om->scale_b ) + om->base_b + om->tec_b + om->tjb_b ); sc_threshv = esl_vmx_set_u8( (int8_t)sc_thresh - 1); /* Initialization. In offset unsigned arithmetic, -infinity is 0, and 0 is om->base. */ biasv = esl_vmx_set_u8(om->bias_b); for (q = 0; q < Q; q++) dp[q] = vec_splat_u8(0); xJ = 0; zerov = vec_splat_u8(0); basev = esl_vmx_set_u8((int8_t) om->base_b); tecv = esl_vmx_set_u8((int8_t) om->tec_b); tjbmv = esl_vmx_set_u8((int8_t) om->tjb_b + (int8_t) om->tbm_b); xBv = vec_subs(basev, tjbmv); for (i = 1; i <= L; i++) { rsc = om->rbv[dsq[i]]; xEv = vec_splat_u8(0); /* Right shifts by 1 byte. 4,8,12,x becomes x,4,8,12. * Because ia32 is littlendian, this means a left bit shift. * Zeros shift on automatically, which is our -infinity. */ mpv = vec_sld(zerov, dp[Q-1], 15); for (q = 0; q < Q; q++) { /* Calculate new MMXo(i,q); don't store it yet, hold it in sv. */ sv = vec_max(mpv, xBv); sv = vec_adds(sv, biasv); sv = vec_subs(sv, *rsc); rsc++; xEv = vec_max(xEv, sv); mpv = dp[q]; /* Load {MDI}(i-1,q) into mpv */ dp[q] = sv; /* Do delayed store of M(i,q) now that memory is usable */ } if (vec_any_gt(xEv, sc_threshv) ) { //hit pthresh, so add position to list and reset values //figure out which model state hit threshold end = -1; rem_sc = -1; for (q = 0; q < Q; q++) { /// Unpack and unstripe, so we can find the state that exceeded pthresh u.v = dp[q]; for (k = 0; k < 16; k++) { // unstripe //(q+Q*k+1) is the model position k at which the xE score is found if (u.b[k] >= sc_thresh && u.b[k] > rem_sc && (q+Q*k+1) <= om->M) { end = (q+Q*k+1); rem_sc = u.b[k]; } } dp[q] = vec_splat_u8(0); // while we're here ... this will cause values to get reset to xB in next dp iteration } //recover the diagonal that hit threshold start = end; target_end = target_start = i; sc = rem_sc; while (rem_sc > om->base_b - om->tjb_b - om->tbm_b) { rem_sc -= om->bias_b - ssvdata->ssv_scores[start*om->abc->Kp + dsq[target_start]]; --start; --target_start; //if ( start == 0 || target_start==0) break; } start++; target_start++; //extend diagonal further with single diagonal extension k = end+1; n = target_end+1; max_end = target_end; max_sc = sc; pos_since_max = 0; while (k<om->M && n<=L) { sc += om->bias_b - ssvdata->ssv_scores[k*om->abc->Kp + dsq[n]]; if (sc >= max_sc) { max_sc = sc; max_end = n; pos_since_max=0; } else { pos_since_max++; if (pos_since_max == 5) break; } k++; n++; } end += (max_end - target_end); target_end = max_end; ret_sc = ((float) (max_sc - om->tjb_b) - (float) om->base_b); ret_sc /= om->scale_b; ret_sc -= 3.0; // that's ~ L \log \frac{L}{L+3}, for our NN,CC,JJ p7_hmmwindow_new( windowlist, 0, // sequence_id; used in the FM-based filter, but not here target_start, // position in the target at which the diagonal starts 0, // position in the target fm_index at which diagonal starts; not used here, just in FM-based filter end, // position in the model at which the diagonal ends end-start+1 , // length of diagonal ret_sc, // score of diagonal p7_NOCOMPLEMENT, // always p7_NOCOMPLEMENT here; varies in FM-based filter L ); i = target_end; // skip forward } } /* end loop over sequence residues 1..L */ return eslOK; ERROR: ESL_EXCEPTION(eslEMEM, "Error allocating memory for hit list\n"); }
/* Function: p7_ViterbiFilter_longtarget() * Synopsis: Finds windows within potentially long sequence blocks with Viterbi * scores above threshold (vewy vewy fast, in limited precision) * * Purpose: Calculates an approximation of the Viterbi score for regions * of sequence <dsq>, using optimized profile <om>, and a pre- * allocated one-row DP matrix <ox>, and captures the positions * at which such regions exceed the score required to be * significant in the eyes of the calling function (usually * p=0.001). * * The resulting landmarks are converted to subsequence * windows by the calling function * * The model must be in a local alignment mode; other modes * cannot provide the necessary guarantee of no underflow. * * This is a striped SIMD Viterbi implementation using Intel * VMX integer intrinsics \citep{Farrar07}, in reduced * precision (signed words, 16 bits). * * Args: dsq - digital target sequence, 1..L * L - length of dsq in residues * om - optimized profile * ox - DP matrix * filtersc - null or bias correction, required for translating a P-value threshold into a score threshold * P - p-value below which a region is captured as being above threshold * windowlist - RETURN: array of hit windows (start and end of diagonal) for the above-threshold areas * * Returns: <eslOK> on success; * * Throws: <eslEINVAL> if <ox> allocation is too small, or if * profile isn't in a local alignment mode. (Must be in local * alignment mode because that's what helps us guarantee * limited dynamic range.) * * Xref: See p7_ViterbiFilter() */ int p7_ViterbiFilter_longtarget(const ESL_DSQ *dsq, int L, const P7_OPROFILE *om, P7_OMX *ox, float filtersc, double P, P7_HMM_WINDOWLIST *windowlist) { vector signed short mpv, dpv, ipv; /* previous row values */ vector signed short sv; /* temp storage of 1 curr row value in progress */ vector signed short dcv; /* delayed storage of D(i,q+1) */ vector signed short xEv; /* E state: keeps max for Mk->E as we go */ vector signed short xBv; /* B state: splatted vector of B[i-1] for B->Mk calculations */ vector signed short Dmaxv; /* keeps track of maximum D cell on row */ int16_t xE, xB, xC, xJ, xN; /* special states' scores */ int16_t Dmax; /* maximum D cell score on row */ int i; /* counter over sequence positions 1..L */ int q; /* counter over vectors 0..nq-1 */ int Q = p7O_NQW(om->M); /* segment length: # of vectors */ vector signed short *dp = ox->dpw[0]; /* using {MDI}MX(q) macro requires initialization of <dp> */ vector signed short *rsc; /* will point at om->ru[x] for residue x[i] */ vector signed short *tsc; /* will point into (and step thru) om->tu */ vector signed short negInfv; int16_t sc_thresh; float invP; int z; union { vector signed short v; int16_t i[8]; } tmp; windowlist->count = 0; /* * In p7_ViterbiFilter, converting from a scaled int Viterbi score * S (aka xE the score getting to state E) to a probability * goes like this: * vsc = S + om->xw[p7O_E][p7O_MOVE] + om->xw[p7O_C][p7O_MOVE] - om->base_w * ret_sc /= om->scale_w; * vsc -= 3.0; * P = esl_gumbel_surv((vfsc - filtersc) / eslCONST_LOG2 , om->evparam[p7_VMU], om->evparam[p7_VLAMBDA]); * and we're computing the threshold vsc, so invert it: * (vsc - filtersc) / eslCONST_LOG2 = esl_gumbel_invsurv( P, om->evparam[p7_VMU], om->evparam[p7_VLAMBDA]) * vsc = filtersc + eslCONST_LOG2 * esl_gumbel_invsurv( P, om->evparam[p7_VMU], om->evparam[p7_VLAMBDA]) * vsc += 3.0 * vsc *= om->scale_w * S = vsc - (float)om->xw[p7O_E][p7O_MOVE] - (float)om->xw[p7O_C][p7O_MOVE] + (float)om->base_w */ invP = esl_gumbel_invsurv(P, om->evparam[p7_VMU], om->evparam[p7_VLAMBDA]); sc_thresh = (int) ceil ( ( (filtersc + (eslCONST_LOG2 * invP) + 3.0) * om->scale_w ) - (float)om->xw[p7O_E][p7O_MOVE] - (float)om->xw[p7O_C][p7O_MOVE] + (float)om->base_w ); /* Check that the DP matrix is ok for us. */ if (Q > ox->allocQ8) ESL_EXCEPTION(eslEINVAL, "DP matrix allocated too small"); if (om->mode != p7_LOCAL && om->mode != p7_UNILOCAL) ESL_EXCEPTION(eslEINVAL, "Fast filter only works for local alignment"); ox->M = om->M; negInfv = esl_vmx_set_s16((signed short)-32768); /* Initialization. In unsigned arithmetic, -infinity is -32768 */ for (q = 0; q < Q; q++) MMXo(q) = IMXo(q) = DMXo(q) = negInfv; xN = om->base_w; xB = xN + om->xw[p7O_N][p7O_MOVE]; xJ = -32768; xC = -32768; xE = -32768; #if p7_DEBUGGING if (ox->debugging) p7_omx_DumpVFRow(ox, 0, xE, 0, xJ, xB, xC); /* first 0 is <rowi>: do header. second 0 is xN: always 0 here. */ #endif for (i = 1; i <= L; i++) { rsc = om->rwv[dsq[i]]; tsc = om->twv; dcv = negInfv; /* "-infinity" */ xEv = negInfv; Dmaxv = negInfv; xBv = esl_vmx_set_s16(xB); /* Right shifts by 1 value (2 bytes). 4,8,12,x becomes x,4,8,12. * Because ia32 is littlendian, this means a left bit shift. * Zeros shift on automatically; replace it with -32768. */ mpv = MMXo(Q-1); mpv = vec_sld(negInfv, mpv, 14); dpv = DMXo(Q-1); dpv = vec_sld(negInfv, dpv, 14); ipv = IMXo(Q-1); ipv = vec_sld(negInfv, ipv, 14); for (q = 0; q < Q; q++) { /* Calculate new MMXo(i,q); don't store it yet, hold it in sv. */ sv = vec_adds(xBv, *tsc); tsc++; sv = vec_max (sv, vec_adds(mpv, *tsc)); tsc++; sv = vec_max (sv, vec_adds(ipv, *tsc)); tsc++; sv = vec_max (sv, vec_adds(dpv, *tsc)); tsc++; sv = vec_adds(sv, *rsc); rsc++; xEv = vec_max(xEv, sv); /* Load {MDI}(i-1,q) into mpv, dpv, ipv; * {MDI}MX(q) is then the current, not the prev row */ mpv = MMXo(q); dpv = DMXo(q); ipv = IMXo(q); /* Do the delayed stores of {MD}(i,q) now that memory is usable */ MMXo(q) = sv; DMXo(q) = dcv; /* Calculate the next D(i,q+1) partially: M->D only; * delay storage, holding it in dcv */ dcv = vec_adds(sv, *tsc); tsc++; Dmaxv = vec_max(dcv, Dmaxv); /* Calculate and store I(i,q) */ sv = vec_adds(mpv, *tsc); tsc++; IMXo(q)= vec_max(sv, vec_adds(ipv, *tsc)); tsc++; } /* Now the "special" states, which start from Mk->E (->C, ->J->B) */ xE = esl_vmx_hmax_s16(xEv); if (xE >= sc_thresh) { //hit score threshold. Add a window to the list, then reset scores. /* Unpack and unstripe, then find the position responsible for the hit */ for (q = 0; q < Q; q++) { tmp.v = MMXo(q); for (z = 0; z < 8; z++) { // unstripe if ( tmp.i[z] == xE && (q+Q*z+1) <= om->M) { // (q+Q*z+1) is the model position k at which the xE score is found p7_hmmwindow_new(windowlist, 0, i, 0, (q+Q*z+1), 1, 0.0, p7_NOCOMPLEMENT ); } } MMXo(q) = IMXo(q) = DMXo(q) = negInfv; //reset score to start search for next vit window. } } else { xN = xN + om->xw[p7O_N][p7O_LOOP]; xC = ESL_MAX(xC + om->xw[p7O_C][p7O_LOOP], xE + om->xw[p7O_E][p7O_MOVE]); xJ = ESL_MAX(xJ + om->xw[p7O_J][p7O_LOOP], xE + om->xw[p7O_E][p7O_LOOP]); xB = ESL_MAX(xJ + om->xw[p7O_J][p7O_MOVE], xN + om->xw[p7O_N][p7O_MOVE]); /* and now xB will carry over into next i, and xC carries over after i=L */ /* Finally the "lazy F" loop (sensu [Farrar07]). We can often * prove that we don't need to evaluate any D->D paths at all. * * The observation is that if we can show that on the next row, * B->M(i+1,k) paths always dominate M->D->...->D->M(i+1,k) paths * for all k, then we don't need any D->D calculations. * * The test condition is: * max_k D(i,k) + max_k ( TDD(k-2) + TDM(k-1) - TBM(k) ) < xB(i) * So: * max_k (TDD(k-2) + TDM(k-1) - TBM(k)) is precalc'ed in om->dd_bound; * max_k D(i,k) is why we tracked Dmaxv; * xB(i) was just calculated above. */ Dmax = esl_vmx_hmax_s16(Dmaxv); if (Dmax + om->ddbound_w > xB) { /* Now we're obligated to do at least one complete DD path to be sure. */ /* dcv has carried through from end of q loop above */ dcv = vec_sld(negInfv, dcv, 14); tsc = om->twv + 7*Q; /* set tsc to start of the DD's */ for (q = 0; q < Q; q++) { DMXo(q) = vec_max(dcv, DMXo(q)); dcv = vec_adds(DMXo(q), *tsc); tsc++; } /* We may have to do up to three more passes; the check * is for whether crossing a segment boundary can improve * our score. */ do { dcv = vec_sld(negInfv, dcv, 14); tsc = om->twv + 7*Q; /* set tsc to start of the DD's */ for (q = 0; q < Q; q++) { if (! vec_any_gt(dcv, DMXo(q))) break; DMXo(q) = vec_max(dcv, DMXo(q)); dcv = vec_adds(DMXo(q), *tsc); tsc++; } } while (q == Q); } else /* not calculating DD? then just store the last M->D vector calc'ed.*/ DMXo(0) = vec_sld(negInfv, dcv, 14); #if p7_DEBUGGING if (ox->debugging) p7_omx_DumpVFRow(ox, i, xE, 0, xJ, xB, xC); #endif } } /* end loop over sequence residues 1..L */ return eslOK; }