Пример #1
0
/* Function:  p7_ViterbiFilter()
 * Synopsis:  Calculates Viterbi score, vewy vewy fast, in limited precision.
 * Incept:    SRE, Tue Nov 27 09:15:24 2007 [Janelia]
 *
 * Purpose:   Calculates an approximation of the Viterbi score for sequence
 *            <dsq> of length <L> residues, using optimized profile <om>,
 *            and a preallocated one-row DP matrix <ox>. Return the 
 *            estimated Viterbi score (in nats) in <ret_sc>.
 *            
 *            Score may overflow (and will, on high-scoring
 *            sequences), but will not underflow. 
 *            
 *            The model must be in a local alignment mode; other modes
 *            cannot provide the necessary guarantee of no underflow.
 *            
 *            This is a striped SIMD Viterbi implementation using Intel
 *            VMX integer intrinsics \citep{Farrar07}, in reduced
 *            precision (signed words, 16 bits).
 *
 * Args:      dsq     - digital target sequence, 1..L
 *            L       - length of dsq in residues          
 *            om      - optimized profile
 *            ox      - DP matrix
 *            ret_sc  - RETURN: Viterbi score (in nats)          
 *
 * Returns:   <eslOK> on success;
 *            <eslERANGE> if the score overflows; in this case
 *            <*ret_sc> is <eslINFINITY>, and the sequence can 
 *            be treated as a high-scoring hit.
 *
 * Throws:    <eslEINVAL> if <ox> allocation is too small, or if
 *            profile isn't in a local alignment mode. (Must be in local
 *            alignment mode because that's what helps us guarantee 
 *            limited dynamic range.)
 *
 * Xref:      [Farrar07] for ideas behind striped SIMD DP.
 *            J2/46-47 for layout of HMMER's striped SIMD DP.
 *            J2/50 for single row DP.
 *            J2/60 for reduced precision (epu8)
 *            J2/65 for initial benchmarking
 *            J2/66 for precision maximization
 *            J4/138-140 for reimplementation in 16-bit precision
 */
int
p7_ViterbiFilter(const ESL_DSQ *dsq, int L, const P7_OPROFILE *om, P7_OMX *ox, float *ret_sc)
{
  vector signed short mpv, dpv, ipv; /* previous row values                                       */
  vector signed short sv;	     /* temp storage of 1 curr row value in progress              */
  vector signed short dcv;	     /* delayed storage of D(i,q+1)                               */
  vector signed short xEv;	     /* E state: keeps max for Mk->E as we go                     */
  vector signed short xBv;	     /* B state: splatted vector of B[i-1] for B->Mk calculations */
  vector signed short Dmaxv;         /* keeps track of maximum D cell on row                      */
  int16_t  xE, xB, xC, xJ, xN;	     /* special states' scores                                    */
  int16_t  Dmax;		     /* maximum D cell score on row                               */
  int i;			     /* counter over sequence positions 1..L                      */
  int q;			     /* counter over vectors 0..nq-1                              */
  int Q;                             /* segment length: # of vectors                              */
  vector signed short *dp;           /* using {MDI}MX(q) macro requires initialization of <dp>    */
  vector signed short *rsc;	     /* will point at om->ru[x] for residue x[i]                  */
  vector signed short *tsc;	     /* will point into (and step thru) om->tu                    */

  vector signed short negInfv;

  Q = p7O_NQW(om->M);
  dp = ox->dpw[0];

  /* Check that the DP matrix is ok for us. */
  if (Q > ox->allocQ8)                                 ESL_EXCEPTION(eslEINVAL, "DP matrix allocated too small");
  if (om->mode != p7_LOCAL && om->mode != p7_UNILOCAL) ESL_EXCEPTION(eslEINVAL, "Fast filter only works for local alignment");
  ox->M   = om->M;

  negInfv = esl_vmx_set_s16((signed short)-32768);
  
  /* Initialization. In unsigned arithmetic, -infinity is -32768
   */
  for (q = 0; q < Q; q++)
    MMXo(q) = IMXo(q) = DMXo(q) = negInfv;
  xN   = om->base_w;
  xB   = xN + om->xw[p7O_N][p7O_MOVE];
  xJ   = -32768;
  xC   = -32768;
  xE   = -32768;

#if p7_DEBUGGING
  if (ox->debugging) p7_omx_DumpVFRow(ox, 0, xE, 0, xJ, xB, xC); /* first 0 is <rowi>: do header. second 0 is xN: always 0 here. */
#endif

  for (i = 1; i <= L; i++)
    {
      rsc   = om->rwv[dsq[i]];
      tsc   = om->twv;
      dcv   = negInfv;               /* "-infinity" */
      xEv   = negInfv;
      Dmaxv = negInfv;
      xBv   = esl_vmx_set_s16(xB);

      /* Right shifts by 1 value (2 bytes). 4,8,12,x becomes x,4,8,12. 
       * Because ia32 is littlendian, this means a left bit shift.
       * Zeros shift on automatically; replace it with -32768.
       */
      mpv = MMXo(Q-1);  mpv = vec_sld(negInfv, mpv, 14);
      dpv = DMXo(Q-1);  dpv = vec_sld(negInfv, dpv, 14);
      ipv = IMXo(Q-1);  ipv = vec_sld(negInfv, ipv, 14);

      for (q = 0; q < Q; q++)
	{
	  /* Calculate new MMXo(i,q); don't store it yet, hold it in sv. */
	  sv   =              vec_adds(xBv, *tsc);  tsc++;
	  sv   = vec_max (sv, vec_adds(mpv, *tsc)); tsc++;
	  sv   = vec_max (sv, vec_adds(ipv, *tsc)); tsc++;
	  sv   = vec_max (sv, vec_adds(dpv, *tsc)); tsc++;
	  sv   = vec_adds(sv, *rsc);                rsc++;
	  xEv  = vec_max(xEv, sv);
	  
	  /* Load {MDI}(i-1,q) into mpv, dpv, ipv;
	   * {MDI}MX(q) is then the current, not the prev row
	   */
	  mpv = MMXo(q);
	  dpv = DMXo(q);
	  ipv = IMXo(q);

	  /* Do the delayed stores of {MD}(i,q) now that memory is usable */
	  MMXo(q) = sv;
	  DMXo(q) = dcv;

	  /* Calculate the next D(i,q+1) partially: M->D only;
           * delay storage, holding it in dcv
	   */
	  dcv   = vec_adds(sv, *tsc);  tsc++;
	  Dmaxv = vec_max(dcv, Dmaxv);

	  /* Calculate and store I(i,q) */
	  sv     =             vec_adds(mpv, *tsc);  tsc++;
	  IMXo(q)= vec_max(sv, vec_adds(ipv, *tsc)); tsc++;
	}	  

      /* Now the "special" states, which start from Mk->E (->C, ->J->B) */
      xE = esl_vmx_hmax_s16(xEv);
      if (xE >= 32767) { *ret_sc = eslINFINITY; return eslERANGE; }	/* immediately detect overflow */
      xN = xN + om->xw[p7O_N][p7O_LOOP];
      xC = ESL_MAX(xC + om->xw[p7O_C][p7O_LOOP], xE + om->xw[p7O_E][p7O_MOVE]);
      xJ = ESL_MAX(xJ + om->xw[p7O_J][p7O_LOOP], xE + om->xw[p7O_E][p7O_LOOP]);
      xB = ESL_MAX(xJ + om->xw[p7O_J][p7O_MOVE], xN + om->xw[p7O_N][p7O_MOVE]);
      /* and now xB will carry over into next i, and xC carries over after i=L */

      /* Finally the "lazy F" loop (sensu [Farrar07]). We can often
       * prove that we don't need to evaluate any D->D paths at all.
       *
       * The observation is that if we can show that on the next row,
       * B->M(i+1,k) paths always dominate M->D->...->D->M(i+1,k) paths
       * for all k, then we don't need any D->D calculations.
       * 
       * The test condition is:
       *      max_k D(i,k) + max_k ( TDD(k-2) + TDM(k-1) - TBM(k) ) < xB(i)
       * So:
       *   max_k (TDD(k-2) + TDM(k-1) - TBM(k)) is precalc'ed in om->dd_bound;
       *   max_k D(i,k) is why we tracked Dmaxv;
       *   xB(i) was just calculated above.
       */
      Dmax = esl_vmx_hmax_s16(Dmaxv);
      if (Dmax + om->ddbound_w > xB) 
	{
	  /* Now we're obligated to do at least one complete DD path to be sure. */
	  /* dcv has carried through from end of q loop above */
	  dcv = vec_sld(negInfv, dcv, 14); 
	  tsc = om->twv + 7*Q;	/* set tsc to start of the DD's */
	  for (q = 0; q < Q; q++) 
	    {
	      DMXo(q) = vec_max(dcv, DMXo(q));	
	      dcv     = vec_adds(DMXo(q), *tsc); tsc++;
	    }

	  /* We may have to do up to three more passes; the check
	   * is for whether crossing a segment boundary can improve
	   * our score. 
	   */
	  do {
	    dcv = vec_sld(negInfv, dcv, 14); 
	    tsc = om->twv + 7*Q;	/* set tsc to start of the DD's */
	    for (q = 0; q < Q; q++) 
	      {
		if (! vec_any_gt(dcv, DMXo(q))) break;
		DMXo(q) = vec_max(dcv, DMXo(q));	
		dcv     = vec_adds(DMXo(q), *tsc);   tsc++;
	      }	    
	  } while (q == Q);
	}
      else  /* not calculating DD? then just store the last M->D vector calc'ed.*/
	DMXo(0) = vec_sld(negInfv, dcv, 14);
	  
#if p7_DEBUGGING
      if (ox->debugging) p7_omx_DumpVFRow(ox, i, xE, 0, xJ, xB, xC);   
#endif
    } /* end loop over sequence residues 1..L */

  /* finally C->T */
  if (xC > -32768) 
    {
      *ret_sc = (float) xC + (float) om->xw[p7O_C][p7O_MOVE] - (float) om->base_w;
      /* *ret_sc += L * om->ncj_roundoff;  see J4/150 for rationale: superceded by -3.0nat approximation*/
      *ret_sc /= om->scale_w;
      *ret_sc -= 3.0; /* the NN/CC/JJ=0,-3nat approximation: see J5/36. That's ~ L \log \frac{L}{L+3}, for our NN,CC,JJ contrib */
    }
  else *ret_sc = -eslINFINITY;
  return eslOK;
}
Пример #2
0
int main ()
{
  vector float fa = {1.0, 2.0, 3.0, -4.0};
  vector float fb = {-2.0, -3.0, -4.0, -5.0};
  vector float fc = vec_cpsgn (fa, fb);

  vector long long la = {5L, 14L};
  vector long long lb = {3L, 86L};
  vector long long lc = vec_and (la, lb);
  vector bool long long ld = {0, -1};
  vector long long le = vec_and (la, ld);
  vector long long lf = vec_and (ld, lb);

  vector unsigned long long ua = {5L, 14L};
  vector unsigned long long ub = {3L, 86L};
  vector unsigned long long uc = vec_and (ua, ub);
  vector bool long long ud = {0, -1};
  vector unsigned long long ue = vec_and (ua, ud);
  vector unsigned long long uf = vec_and (ud, ub);

  vector long long lg = vec_andc (la, lb);
  vector long long lh = vec_andc (la, ld);
  vector long long li = vec_andc (ld, lb);

  vector unsigned long long ug = vec_andc (ua, ub);
  vector unsigned long long uh = vec_andc (ua, ud);
  vector unsigned long long ui = vec_andc (ud, ub);

  vector double da = {1.0, -4.0};
  vector double db = {-2.0, 5.0};
  vector double dc = vec_cpsgn (da, db);

  vector long long lj = vec_mergeh (la, lb);
  vector long long lk = vec_mergeh (la, ld);
  vector long long ll = vec_mergeh (ld, la);

  vector unsigned long long uj = vec_mergeh (ua, ub);
  vector unsigned long long uk = vec_mergeh (ua, ud);
  vector unsigned long long ul = vec_mergeh (ud, ua);

  vector long long lm = vec_mergel (la, lb);
  vector long long ln = vec_mergel (la, ld);
  vector long long lo = vec_mergel (ld, la);

  vector unsigned long long um = vec_mergel (ua, ub);
  vector unsigned long long un = vec_mergel (ua, ud);
  vector unsigned long long uo = vec_mergel (ud, ua);

  vector long long lp = vec_nor (la, lb);
  vector long long lq = vec_nor (la, ld);
  vector long long lr = vec_nor (ld, la);

  vector unsigned long long up = vec_nor (ua, ub);
  vector unsigned long long uq = vec_nor (ua, ud);
  vector unsigned long long ur = vec_nor (ud, ua);

  vector long long ls = vec_or (la, lb);
  vector long long lt = vec_or (la, ld);
  vector long long lu = vec_or (ld, la);

  vector unsigned long long us = vec_or (ua, ub);
  vector unsigned long long ut = vec_or (ua, ud);
  vector unsigned long long uu = vec_or (ud, ua);

  vector unsigned char ca = {0,4,8,1,5,9,2,6,10,3,7,11,15,12,14,13};
  vector long long lv = vec_perm (la, lb, ca);
  vector unsigned long long uv = vec_perm (ua, ub, ca);

  vector long long lw = vec_sel (la, lb, lc);
  vector long long lx = vec_sel (la, lb, uc);
  vector long long ly = vec_sel (la, lb, ld);

  vector unsigned long long uw = vec_sel (ua, ub, lc);
  vector unsigned long long ux = vec_sel (ua, ub, uc);
  vector unsigned long long uy = vec_sel (ua, ub, ld);

  vector long long lz = vec_xor (la, lb);
  vector long long l0 = vec_xor (la, ld);
  vector long long l1 = vec_xor (ld, la);

  vector unsigned long long uz = vec_xor (ua, ub);
  vector unsigned long long u0 = vec_xor (ua, ud);
  vector unsigned long long u1 = vec_xor (ud, ua);

  int ia = vec_all_eq (ua, ub);
  int ib = vec_all_ge (ua, ub);
  int ic = vec_all_gt (ua, ub);
  int id = vec_all_le (ua, ub);
  int ie = vec_all_lt (ua, ub);
  int ig = vec_all_ne (ua, ub);

  int ih = vec_any_eq (ua, ub);
  int ii = vec_any_ge (ua, ub);
  int ij = vec_any_gt (ua, ub);
  int ik = vec_any_le (ua, ub);
  int il = vec_any_lt (ua, ub);
  int im = vec_any_ne (ua, ub);

  vector int sia = {9, 16, 25, 36};
  vector int sib = {-8, -27, -64, -125};
  vector int sic = vec_mergee (sia, sib);
  vector int sid = vec_mergeo (sia, sib);

  vector unsigned int uia = {9, 16, 25, 36};
  vector unsigned int uib = {8, 27, 64, 125};
  vector unsigned int uic = vec_mergee (uia, uib);
  vector unsigned int uid = vec_mergeo (uia, uib);

  vector bool int bia = {0, -1, -1, 0};
  vector bool int bib = {-1, -1, 0, -1};
  vector bool int bic = vec_mergee (bia, bib);
  vector bool int bid = vec_mergeo (bia, bib);

  vector unsigned int uie = vec_packsu (ua, ub);

  vector long long l2 = vec_cntlz (la);
  vector unsigned long long u2 = vec_cntlz (ua);
  vector int sie = vec_cntlz (sia);
  vector unsigned int uif = vec_cntlz (uia);
  vector short ssa = {20, -40, -60, 80, 100, -120, -140, 160};
  vector short ssb = vec_cntlz (ssa);
  vector unsigned short usa = {81, 72, 63, 54, 45, 36, 27, 18};
  vector unsigned short usb = vec_cntlz (usa);
  vector signed char sca = {-4, 3, -9, 15, -31, 31, 0, 0,
		            1, 117, -36, 99, 98, 97, 96, 95};
  vector signed char scb = vec_cntlz (sca);
  vector unsigned char cb = vec_cntlz (ca);

  vector double dd = vec_xl (0, &y);
  vec_xst (dd, 0, &z);

  vector double de = vec_round (dd);

  vector double df = vec_splat (de, 0);
  vector double dg = vec_splat (de, 1);
  vector long long l3 = vec_splat (l2, 0);
  vector long long l4 = vec_splat (l2, 1);
  vector unsigned long long u3 = vec_splat (u2, 0);
  vector unsigned long long u4 = vec_splat (u2, 1);
  vector bool long long l5 = vec_splat (ld, 0);
  vector bool long long l6 = vec_splat (ld, 1);

  vector long long l7 = vec_div (l3, l4);
  vector unsigned long long u5 = vec_div (u3, u4);

  vector long long l8 = vec_mul (l3, l4);
  vector unsigned long long u6 = vec_mul (u3, u4);

  vector double dh = vec_ctf (la, -2);
  vector double di = vec_ctf (ua, 2);
  vector long long l9 = vec_cts (dh, -2);
  vector unsigned long long u7 = vec_ctu (di, 2);

  return 0;
}
Пример #3
0
/* Function:  p7_ViterbiScore()
 * Synopsis:  Calculates Viterbi score, correctly, and vewy vewy fast.
 * Incept:    SRE, Tue Nov 27 09:15:24 2007 [Janelia]
 *
 * Purpose:   Calculates the Viterbi score for sequence <dsq> of length <L> 
 *            residues, using optimized profile <om>, and a preallocated
 *            one-row DP matrix <ox>. Return the Viterbi score (in nats)
 *            in <ret_sc>.
 *            
 *            The model <om> must be configured specially to have
 *            lspace float scores, not its usual pspace float scores for
 *            <p7_ForwardFilter()>.
 *            
 *            As with all <*Score()> implementations, the score is
 *            accurate (full range and precision) and can be
 *            calculated on models in any mode, not only local modes.
 *            
 * Args:      dsq     - digital target sequence, 1..L
 *            L       - length of dsq in residues          
 *            om      - optimized profile
 *            ox      - DP matrix
 *            ret_sc  - RETURN: Viterbi score (in nats)          
 *
 * Returns:   <eslOK> on success.
 *
 * Throws:    <eslEINVAL> if <ox> allocation is too small.
 */
int
p7_ViterbiScore(const ESL_DSQ *dsq, int L, const P7_OPROFILE *om, P7_OMX *ox, float *ret_sc)
{
  vector float mpv, dpv, ipv;      /* previous row values                                       */
  vector float sv;		   /* temp storage of 1 curr row value in progress              */
  vector float dcv;		   /* delayed storage of D(i,q+1)                               */
  vector float xEv;		   /* E state: keeps max for Mk->E as we go                     */
  vector float xBv;		   /* B state: splatted vector of B[i-1] for B->Mk calculations */
  vector float Dmaxv;              /* keeps track of maximum D cell on row                      */
  vector float infv;		   /* -eslINFINITY in a vector                                  */
  float    xN, xE, xB, xC, xJ;	   /* special states' scores                                    */
  float    Dmax;		   /* maximum D cell on row                                     */
  int i;			   /* counter over sequence positions 1..L                      */
  int q;			   /* counter over vectors 0..nq-1                              */
  int Q       = p7O_NQF(om->M);	   /* segment length: # of vectors                              */
  vector float *dp  = ox->dpf[0];  /* using {MDI}MX(q) macro requires initialization of <dp>    */
  vector float *rsc;		   /* will point at om->rf[x] for residue x[i]                  */
  vector float *tsc;		   /* will point into (and step thru) om->tf                    */

  /* Check that the DP matrix is ok for us. */
  if (Q > ox->allocQ4) ESL_EXCEPTION(eslEINVAL, "DP matrix allocated too small");
  ox->M  = om->M;

  /* Initialization. */
  infv = esl_vmx_set_float(-eslINFINITY);
  for (q = 0; q < Q; q++)
    MMXo(q) = IMXo(q) = DMXo(q) = infv;
  xN   = 0.;
  xB   = om->xf[p7O_N][p7O_MOVE];
  xE   = -eslINFINITY;
  xJ   = -eslINFINITY;
  xC   = -eslINFINITY;

#if p7_DEBUGGING
  if (ox->debugging) p7_omx_DumpFloatRow(ox, FALSE, 0, 5, 2, xE, xN, xJ, xB, xC); /* logify=FALSE, <rowi>=0, width=5, precision=2*/
#endif

  for (i = 1; i <= L; i++)
    {
      rsc   = om->rf[dsq[i]];
      tsc   = om->tf;
      dcv   = infv;
      xEv   = infv;
      Dmaxv = infv;
      xBv   = esl_vmx_set_float(xB);

      mpv = vec_sld(infv, MMXo(Q-1), 12);  /* Right shifts by 4 bytes. 4,8,12,x becomes x,4,8,12. */
      dpv = vec_sld(infv, DMXo(Q-1), 12);
      ipv = vec_sld(infv, IMXo(Q-1), 12);
      for (q = 0; q < Q; q++)
	{
	  /* Calculate new MMXo(i,q); don't store it yet, hold it in sv. */
	  sv   =                vec_add(xBv, *tsc);  tsc++;
	  sv   = vec_max(sv, vec_add(mpv, *tsc));    tsc++;
	  sv   = vec_max(sv, vec_add(ipv, *tsc));    tsc++;
	  sv   = vec_max(sv, vec_add(dpv, *tsc));    tsc++;
	  sv   = vec_add(sv, *rsc);                  rsc++;
	  xEv  = vec_max(xEv, sv);

	  /* Load {MDI}(i-1,q) into mpv, dpv, ipv;
	   * {MDI}MX(q) is then the current, not the prev row
	   */
	  mpv = MMXo(q);
	  dpv = DMXo(q);
	  ipv = IMXo(q);

	  /* Do the delayed stores of {MD}(i,q) now that memory is usable */
	  MMXo(q) = sv;
	  DMXo(q) = dcv;

	  /* Calculate the next D(i,q+1) partially: M->D only;
           * delay storage, holding it in dcv
	   */
	  dcv   = vec_add(sv, *tsc); tsc++;
	  Dmaxv = vec_max(dcv, Dmaxv);

	  /* Calculate and store I(i,q) */
	  sv      =             vec_add(mpv, *tsc);  tsc++;
	  sv      = vec_max(sv, vec_add(ipv, *tsc)); tsc++;
	  IMXo(q) = vec_add(sv, *rsc);               rsc++;
	}	  

      /* Now the "special" states, which start from Mk->E (->C, ->J->B) */
      xE = esl_vmx_hmax_float(xEv);
      xN = xN +  om->xf[p7O_N][p7O_LOOP];
      xC = ESL_MAX(xC + om->xf[p7O_C][p7O_LOOP],  xE + om->xf[p7O_E][p7O_MOVE]);
      xJ = ESL_MAX(xJ + om->xf[p7O_J][p7O_LOOP],  xE + om->xf[p7O_E][p7O_LOOP]);
      xB = ESL_MAX(xJ + om->xf[p7O_J][p7O_MOVE],  xN + om->xf[p7O_N][p7O_MOVE]);
      /* and now xB will carry over into next i, and xC carries over after i=L */

      /* Finally the "lazy F" loop (sensu [Farrar07]). We can often
       * prove that we don't need to evaluate any D->D paths at all.
       *
       * The observation is that if we can show that on the next row,
       * B->M(i+1,k) paths always dominate M->D->...->D->M(i+1,k) paths
       * for all k, then we don't need any D->D calculations.
       * 
       * The test condition is:
       *      max_k D(i,k) + max_k ( TDD(k-2) + TDM(k-1) - TBM(k) ) < xB(i)
       * So:
       *   max_k (TDD(k-2) + TDM(k-1) - TBM(k)) is precalc'ed in om->dd_bound;
       *   max_k D(i,k) is why we tracked Dmaxv;
       *   xB(i) was just calculated above.
       */
      Dmax = esl_vmx_hmax_float(Dmaxv);
      if (Dmax + om->ddbound_f > xB) 
	{
	  /* Now we're obligated to do at least one complete DD path to be sure. */
	  /* dcv has carried through from end of q loop above */
	  dcv = vec_sld(infv, dcv, 12);
	  tsc = om->tf + 7*Q;	/* set tsc to start of the DD's */
	  for (q = 0; q < Q; q++) 
	    {
	      DMXo(q) = vec_max(dcv, DMXo(q));	
	      dcv     = vec_add(DMXo(q), *tsc); tsc++;
	    }

	  /* We may have to do up to three more passes; the check
	   * is for whether crossing a segment boundary can improve
	   * our score. 
	   */
	  do {
	    dcv = vec_sld(infv, dcv, 12);
	    tsc = om->tf + 7*Q;	/* set tsc to start of the DD's */
	    for (q = 0; q < Q; q++) 
	      {
		if (! vec_any_gt(dcv, DMXo(q))) break;
		DMXo(q) = vec_max(dcv, DMXo(q));	
		dcv     = vec_add(DMXo(q), *tsc);   tsc++;
	      }	    
	  } while (q == Q);
	}
      else
	{ /* not calculating DD? then just store that last MD vector we calc'ed. */
	  dcv     = vec_sld(infv, dcv, 12);
	  DMXo(0) = dcv;
	}

#if p7_DEBUGGING
      if (ox->debugging) p7_omx_DumpFloatRow(ox, FALSE, i, 5, 2, xE, xN, xJ, xB, xC); /* logify=FALSE, <rowi>=i, width=5, precision=2*/
#endif
    } /* end loop over sequence residues 1..L */

  /* finally C->T */
  *ret_sc = xC + om->xf[p7O_C][p7O_MOVE];
  return eslOK;
}
Пример #4
0
// CHECK-LABEL: define void @test1
void test1() {

  /* vec_cmpeq */
  res_vbll = vec_cmpeq(vsll, vsll);
// CHECK: @llvm.ppc.altivec.vcmpequd
// CHECK-LE: @llvm.ppc.altivec.vcmpequd
// CHECK-PPC: error: call to 'vec_cmpeq' is ambiguous

  res_vbll = vec_cmpeq(vull, vull);
// CHECK: @llvm.ppc.altivec.vcmpequd
// CHECK-LE: @llvm.ppc.altivec.vcmpequd
// CHECK-PPC: error: call to 'vec_cmpeq' is ambiguous

  /* vec_cmpgt */
  res_vbll = vec_cmpgt(vsll, vsll);
// CHECK: @llvm.ppc.altivec.vcmpgtsd
// CHECK-LE: @llvm.ppc.altivec.vcmpgtsd
// CHECK-PPC: error: call to 'vec_cmpgt' is ambiguous

  res_vbll = vec_cmpgt(vull, vull);
// CHECK: @llvm.ppc.altivec.vcmpgtud
// CHECK-LE: @llvm.ppc.altivec.vcmpgtud
// CHECK-PPC: error: call to 'vec_cmpgt' is ambiguous

  /* ----------------------- predicates --------------------------- */
  /* vec_all_eq */
  res_i = vec_all_eq(vsll, vsll);
// CHECK: @llvm.ppc.altivec.vcmpequd.p
// CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
// CHECK-PPC: error: call to 'vec_all_eq' is ambiguous

  res_i = vec_all_eq(vsll, vbll);
// CHECK: @llvm.ppc.altivec.vcmpequd.p
// CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
// CHECK-PPC: error: call to 'vec_all_eq' is ambiguous

  res_i = vec_all_eq(vull, vull);
// CHECK: @llvm.ppc.altivec.vcmpequd.p
// CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
// CHECK-PPC: error: call to 'vec_all_eq' is ambiguous

  res_i = vec_all_eq(vull, vbll);
// CHECK: @llvm.ppc.altivec.vcmpequd.p
// CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
// CHECK-PPC: error: call to 'vec_all_eq' is ambiguous

  res_i = vec_all_eq(vbll, vsll);
// CHECK: @llvm.ppc.altivec.vcmpequd.p
// CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
// CHECK-PPC: error: call to 'vec_all_eq' is ambiguous

  res_i = vec_all_eq(vbll, vull);
// CHECK: @llvm.ppc.altivec.vcmpequd.p
// CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
// CHECK-PPC: error: call to 'vec_all_eq' is ambiguous

  res_i = vec_all_eq(vbll, vbll);
// CHECK: @llvm.ppc.altivec.vcmpequd.p
// CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
// CHECK-PPC: error: call to 'vec_all_eq' is ambiguous

  /* vec_all_ne */
  res_i = vec_all_ne(vsll, vsll);
// CHECK: @llvm.ppc.altivec.vcmpequd.p
// CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
// CHECK-PPC: error: call to 'vec_all_ne' is ambiguous

  res_i = vec_all_ne(vsll, vbll);
// CHECK: @llvm.ppc.altivec.vcmpequd.p
// CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
// CHECK-PPC: error: call to 'vec_all_ne' is ambiguous

  res_i = vec_all_ne(vull, vull);
// CHECK: @llvm.ppc.altivec.vcmpequd.p
// CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
// CHECK-PPC: error: call to 'vec_all_ne' is ambiguous

  res_i = vec_all_ne(vull, vbll);
// CHECK: @llvm.ppc.altivec.vcmpequd.p
// CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
// CHECK-PPC: error: call to 'vec_all_ne' is ambiguous

  res_i = vec_all_ne(vbll, vsll);
// CHECK: @llvm.ppc.altivec.vcmpequd.p
// CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
// CHECK-PPC: error: call to 'vec_all_ne' is ambiguous

  res_i = vec_all_ne(vbll, vull);
// CHECK: @llvm.ppc.altivec.vcmpequd.p
// CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
// CHECK-PPC: error: call to 'vec_all_ne' is ambiguous

  res_i = vec_all_ne(vbll, vbll);
// CHECK: @llvm.ppc.altivec.vcmpequd.p
// CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
// CHECK-PPC: error: call to 'vec_all_ne' is ambiguous

  /* vec_any_eq */
  res_i = vec_any_eq(vsll, vsll);
// CHECK: @llvm.ppc.altivec.vcmpequd.p
// CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
// CHECK-PPC: error: call to 'vec_any_eq' is ambiguous

  res_i = vec_any_eq(vsll, vbll);
// CHECK: @llvm.ppc.altivec.vcmpequd.p
// CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
// CHECK-PPC: error: call to 'vec_any_eq' is ambiguous

  res_i = vec_any_eq(vull, vull);
// CHECK: @llvm.ppc.altivec.vcmpequd.p
// CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
// CHECK-PPC: error: call to 'vec_any_eq' is ambiguous

  res_i = vec_any_eq(vull, vbll);
// CHECK: @llvm.ppc.altivec.vcmpequd.p
// CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
// CHECK-PPC: error: call to 'vec_any_eq' is ambiguous

  res_i = vec_any_eq(vbll, vsll);
// CHECK: @llvm.ppc.altivec.vcmpequd.p
// CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
// CHECK-PPC: error: call to 'vec_any_eq' is ambiguous

  res_i = vec_any_eq(vbll, vull);
// CHECK: @llvm.ppc.altivec.vcmpequd.p
// CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
// CHECK-PPC: error: call to 'vec_any_eq' is ambiguous

  res_i = vec_any_eq(vbll, vbll);
// CHECK: @llvm.ppc.altivec.vcmpequd.p
// CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
// CHECK-PPC: error: call to 'vec_any_eq' is ambiguous

  /* vec_any_ne */
  res_i = vec_any_ne(vsll, vsll);
// CHECK: @llvm.ppc.altivec.vcmpequd.p
// CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
// CHECK-PPC: error: call to 'vec_any_ne' is ambiguous

  res_i = vec_any_ne(vsll, vbll);
// CHECK: @llvm.ppc.altivec.vcmpequd.p
// CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
// CHECK-PPC: error: call to 'vec_any_ne' is ambiguous

  res_i = vec_any_ne(vull, vull);
// CHECK: @llvm.ppc.altivec.vcmpequd.p
// CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
// CHECK-PPC: error: call to 'vec_any_ne' is ambiguous

  res_i = vec_any_ne(vull, vbll);
// CHECK: @llvm.ppc.altivec.vcmpequd.p
// CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
// CHECK-PPC: error: call to 'vec_any_ne' is ambiguous

  res_i = vec_any_ne(vbll, vsll);
// CHECK: @llvm.ppc.altivec.vcmpequd.p
// CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
// CHECK-PPC: error: call to 'vec_any_ne' is ambiguous

  res_i = vec_any_ne(vbll, vull);
// CHECK: @llvm.ppc.altivec.vcmpequd.p
// CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
// CHECK-PPC: error: call to 'vec_any_ne' is ambiguous

  res_i = vec_any_ne(vbll, vbll);
// CHECK: @llvm.ppc.altivec.vcmpequd.p
// CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
// CHECK-PPC: error: call to 'vec_any_ne' is ambiguous

  /* vec_all_ge */
  res_i = vec_all_ge(vsll, vsll);
// CHECK: @llvm.ppc.altivec.vcmpgtsd.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtsd.p
// CHECK-PPC: error: call to 'vec_all_ge' is ambiguous

  res_i = vec_all_ge(vsll, vbll);
// CHECK: @llvm.ppc.altivec.vcmpgtsd.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtsd.p
// CHECK-PPC: error: call to 'vec_all_ge' is ambiguous

  res_i = vec_all_ge(vull, vull);
// CHECK: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-PPC: error: call to 'vec_all_ge' is ambiguous

  res_i = vec_all_ge(vull, vbll);
// CHECK: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-PPC: error: call to 'vec_all_ge' is ambiguous

  res_i = vec_all_ge(vbll, vsll);
// CHECK: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-PPC: error: call to 'vec_all_ge' is ambiguous

  res_i = vec_all_ge(vbll, vull);
// CHECK: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-PPC: error: call to 'vec_all_ge' is ambiguous

  res_i = vec_all_ge(vbll, vbll);
// CHECK: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-PPC: error: call to 'vec_all_ge' is ambiguous

  /* vec_all_gt */
  res_i = vec_all_gt(vsll, vsll);
// CHECK: @llvm.ppc.altivec.vcmpgtsd.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtsd.p
// CHECK-PPC: error: call to 'vec_all_gt' is ambiguous

  res_i = vec_all_gt(vsll, vbll);
// CHECK: @llvm.ppc.altivec.vcmpgtsd.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtsd.p
// CHECK-PPC: error: call to 'vec_all_gt' is ambiguous

  res_i = vec_all_gt(vull, vull);
// CHECK: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-PPC: error: call to 'vec_all_gt' is ambiguous

  res_i = vec_all_gt(vull, vbll);
// CHECK: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-PPC: error: call to 'vec_all_gt' is ambiguous

  res_i = vec_all_gt(vbll, vsll);
// CHECK: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-PPC: error: call to 'vec_all_gt' is ambiguous

  res_i = vec_all_gt(vbll, vull);
// CHECK: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-PPC: error: call to 'vec_all_gt' is ambiguous

  res_i = vec_all_gt(vbll, vbll);
// CHECK: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-PPC: error: call to 'vec_all_gt' is ambiguous

  /* vec_all_le */
  res_i = vec_all_le(vsll, vsll);
// CHECK: @llvm.ppc.altivec.vcmpgtsd.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtsd.p
// CHECK-PPC: error: call to 'vec_all_le' is ambiguous

  res_i = vec_all_le(vsll, vbll);
// CHECK: @llvm.ppc.altivec.vcmpgtsd.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtsd.p
// CHECK-PPC: error: call to 'vec_all_le' is ambiguous

  res_i = vec_all_le(vull, vull);
// CHECK: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-PPC: error: call to 'vec_all_le' is ambiguous

  res_i = vec_all_le(vull, vbll);
// CHECK: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-PPC: error: call to 'vec_all_le' is ambiguous

  res_i = vec_all_le(vbll, vsll);
// CHECK: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-PPC: error: call to 'vec_all_le' is ambiguous

  res_i = vec_all_le(vbll, vull);
// CHECK: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-PPC: error: call to 'vec_all_le' is ambiguous

  res_i = vec_all_le(vbll, vbll);
// CHECK: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-PPC: error: call to 'vec_all_le' is ambiguous

  /* vec_all_lt */
  res_i = vec_all_lt(vsll, vsll);
// CHECK: @llvm.ppc.altivec.vcmpgtsd.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtsd.p
// CHECK-PPC: error: call to 'vec_all_lt' is ambiguous

  res_i = vec_all_lt(vsll, vbll);
// CHECK: @llvm.ppc.altivec.vcmpgtsd.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtsd.p
// CHECK-PPC: error: call to 'vec_all_lt' is ambiguous

  res_i = vec_all_lt(vull, vull);
// CHECK: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-PPC: error: call to 'vec_all_lt' is ambiguous

  res_i = vec_all_lt(vull, vbll);
// CHECK: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-PPC: error: call to 'vec_all_lt' is ambiguous

  res_i = vec_all_lt(vbll, vsll);
// CHECK: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-PPC: error: call to 'vec_all_lt' is ambiguous

  res_i = vec_all_lt(vbll, vull);
// CHECK: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-PPC: error: call to 'vec_all_lt' is ambiguous

  res_i = vec_all_lt(vbll, vbll);
// CHECK: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-PPC: error: call to 'vec_all_lt' is ambiguous

  /* vec_any_ge */
  res_i = vec_any_ge(vsll, vsll);
// CHECK: @llvm.ppc.altivec.vcmpgtsd.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtsd.p
// CHECK-PPC: error: call to 'vec_any_ge' is ambiguous

  res_i = vec_any_ge(vsll, vbll);
// CHECK: @llvm.ppc.altivec.vcmpgtsd.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtsd.p
// CHECK-PPC: error: call to 'vec_any_ge' is ambiguous

  res_i = vec_any_ge(vull, vull);
// CHECK: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-PPC: error: call to 'vec_any_ge' is ambiguous

  res_i = vec_any_ge(vull, vbll);
// CHECK: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-PPC: error: call to 'vec_any_ge' is ambiguous

  res_i = vec_any_ge(vbll, vsll);
// CHECK: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-PPC: error: call to 'vec_any_ge' is ambiguous

  res_i = vec_any_ge(vbll, vull);
// CHECK: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-PPC: error: call to 'vec_any_ge' is ambiguous

  res_i = vec_any_ge(vbll, vbll);
// CHECK: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-PPC: error: call to 'vec_any_ge' is ambiguous

  /* vec_any_gt */
  res_i = vec_any_gt(vsll, vsll);
// CHECK: @llvm.ppc.altivec.vcmpgtsd.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtsd.p
// CHECK-PPC: error: call to 'vec_any_gt' is ambiguous

  res_i = vec_any_gt(vsll, vbll);
// CHECK: @llvm.ppc.altivec.vcmpgtsd.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtsd.p
// CHECK-PPC: error: call to 'vec_any_gt' is ambiguous

  res_i = vec_any_gt(vull, vull);
// CHECK: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-PPC: error: call to 'vec_any_gt' is ambiguous

  res_i = vec_any_gt(vull, vbll);
// CHECK: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-PPC: error: call to 'vec_any_gt' is ambiguous

  res_i = vec_any_gt(vbll, vsll);
// CHECK: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-PPC: error: call to 'vec_any_gt' is ambiguous

  res_i = vec_any_gt(vbll, vull);
// CHECK: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-PPC: error: call to 'vec_any_gt' is ambiguous

  res_i = vec_any_gt(vbll, vbll);
// CHECK: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-PPC: error: call to 'vec_any_gt' is ambiguous

  /* vec_any_le */
  res_i = vec_any_le(vsll, vsll);
// CHECK: @llvm.ppc.altivec.vcmpgtsd.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtsd.p
// CHECK-PPC: error: call to 'vec_any_le' is ambiguous

  res_i = vec_any_le(vsll, vbll);
// CHECK: @llvm.ppc.altivec.vcmpgtsd.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtsd.p
// CHECK-PPC: error: call to 'vec_any_le' is ambiguous

  res_i = vec_any_le(vull, vull);
// CHECK: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-PPC: error: call to 'vec_any_le' is ambiguous

  res_i = vec_any_le(vull, vbll);
// CHECK: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-PPC: error: call to 'vec_any_le' is ambiguous

  res_i = vec_any_le(vbll, vsll);
// CHECK: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-PPC: error: call to 'vec_any_le' is ambiguous

  res_i = vec_any_le(vbll, vull);
// CHECK: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-PPC: error: call to 'vec_any_le' is ambiguous

  res_i = vec_any_le(vbll, vbll);
// CHECK: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-PPC: error: call to 'vec_any_le' is ambiguous

  /* vec_any_lt */
  res_i = vec_any_lt(vsll, vsll);
// CHECK: @llvm.ppc.altivec.vcmpgtsd.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtsd.p
// CHECK-PPC: error: call to 'vec_any_lt' is ambiguous

  res_i = vec_any_lt(vsll, vbll);
// CHECK: @llvm.ppc.altivec.vcmpgtsd.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtsd.p
// CHECK-PPC: error: call to 'vec_any_lt' is ambiguous

  res_i = vec_any_lt(vull, vull);
// CHECK: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-PPC: error: call to 'vec_any_lt' is ambiguous

  res_i = vec_any_lt(vull, vbll);
// CHECK: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-PPC: error: call to 'vec_any_lt' is ambiguous

  res_i = vec_any_lt(vbll, vsll);
// CHECK: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-PPC: error: call to 'vec_any_lt' is ambiguous

  res_i = vec_any_lt(vbll, vull);
// CHECK: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-PPC: error: call to 'vec_any_lt' is ambiguous

  res_i = vec_any_lt(vbll, vbll);
// CHECK: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
// CHECK-PPC: error: call to 'vec_any_lt' is ambiguous

  /* vec_max */
  res_vsll = vec_max(vsll, vsll);
// CHECK: @llvm.ppc.altivec.vmaxsd
// CHECK-LE: @llvm.ppc.altivec.vmaxsd
// CHECK-PPC: error: call to 'vec_max' is ambiguous

  res_vsll = vec_max(vbll, vsll);
// CHECK: @llvm.ppc.altivec.vmaxsd
// CHECK-LE: @llvm.ppc.altivec.vmaxsd
// CHECK-PPC: error: call to 'vec_max' is ambiguous

  res_vsll = vec_max(vsll, vbll);
// CHECK: @llvm.ppc.altivec.vmaxsd
// CHECK-LE: @llvm.ppc.altivec.vmaxsd
// CHECK-PPC: error: call to 'vec_max' is ambiguous

  res_vull = vec_max(vull, vull);
// CHECK: @llvm.ppc.altivec.vmaxud
// CHECK-LE: @llvm.ppc.altivec.vmaxud
// CHECK-PPC: error: call to 'vec_max' is ambiguous

  res_vull = vec_max(vbll, vull);
// CHECK: @llvm.ppc.altivec.vmaxud
// CHECK-LE: @llvm.ppc.altivec.vmaxud
// CHECK-PPC: error: call to 'vec_max' is ambiguous

  res_vull = vec_max(vull, vbll);
// CHECK: @llvm.ppc.altivec.vmaxud
// CHECK-LE: @llvm.ppc.altivec.vmaxud
// CHECK-PPC: error: call to 'vec_max' is ambiguous

  /* vec_min */
  res_vsll = vec_min(vsll, vsll);
// CHECK: @llvm.ppc.altivec.vminsd
// CHECK-LE: @llvm.ppc.altivec.vminsd
// CHECK-PPC: error: call to 'vec_min' is ambiguous

  res_vsll = vec_min(vbll, vsll);
// CHECK: @llvm.ppc.altivec.vminsd
// CHECK-LE: @llvm.ppc.altivec.vminsd
// CHECK-PPC: error: call to 'vec_min' is ambiguous

  res_vsll = vec_min(vsll, vbll);
// CHECK: @llvm.ppc.altivec.vminsd
// CHECK-LE: @llvm.ppc.altivec.vminsd
// CHECK-PPC: error: call to 'vec_min' is ambiguous

  res_vull = vec_min(vull, vull);
// CHECK: @llvm.ppc.altivec.vminud
// CHECK-LE: @llvm.ppc.altivec.vminud
// CHECK-PPC: error: call to 'vec_min' is ambiguous

  res_vull = vec_min(vbll, vull);
// CHECK: @llvm.ppc.altivec.vminud
// CHECK-LE: @llvm.ppc.altivec.vminud
// CHECK-PPC: error: call to 'vec_min' is ambiguous

  res_vull = vec_min(vull, vbll);
// CHECK: @llvm.ppc.altivec.vminud
// CHECK-LE: @llvm.ppc.altivec.vminud
// CHECK-PPC: error: call to 'vec_min' is ambiguous

  /* vec_mule */
  res_vsll = vec_mule(vi, vi);
// CHECK: @llvm.ppc.altivec.vmulesw
// CHECK-LE: @llvm.ppc.altivec.vmulosw
// CHECK-PPC: error: call to 'vec_mule' is ambiguous

  res_vull = vec_mule(vui , vui);
// CHECK: @llvm.ppc.altivec.vmuleuw
// CHECK-LE: @llvm.ppc.altivec.vmulouw
// CHECK-PPC: error: call to 'vec_mule' is ambiguous

  /* vec_mulo */
  res_vsll = vec_mulo(vi, vi);
// CHECK: @llvm.ppc.altivec.vmulosw
// CHECK-LE: @llvm.ppc.altivec.vmulesw
// CHECK-PPC: error: call to 'vec_mulo' is ambiguous

  res_vull = vec_mulo(vui, vui);
// CHECK: @llvm.ppc.altivec.vmulouw
// CHECK-LE: @llvm.ppc.altivec.vmuleuw
// CHECK-PPC: error: call to 'vec_mulo' is ambiguous

  /* vec_packs */
  res_vi = vec_packs(vsll, vsll);
// CHECK: @llvm.ppc.altivec.vpksdss
// CHECK-LE: @llvm.ppc.altivec.vpksdss
// CHECK-PPC: error: call to 'vec_packs' is ambiguous

  res_vui = vec_packs(vull, vull);
// CHECK: @llvm.ppc.altivec.vpkudus
// CHECK-LE: @llvm.ppc.altivec.vpkudus
// CHECK-PPC: error: call to 'vec_packs' is ambiguous

  /* vec_packsu */
  res_vui = vec_packsu(vsll, vsll);
// CHECK: @llvm.ppc.altivec.vpksdus
// CHECK-LE: @llvm.ppc.altivec.vpksdus
// CHECK-PPC: error: call to 'vec_packsu' is ambiguous

  res_vui = vec_packsu(vull, vull);
// CHECK: @llvm.ppc.altivec.vpkudus
// CHECK-LE: @llvm.ppc.altivec.vpkudus
// CHECK-PPC: error: call to 'vec_packsu' is ambiguous

  /* vec_rl */
  res_vsll = vec_rl(vsll, vull);
// CHECK: @llvm.ppc.altivec.vrld
// CHECK-LE: @llvm.ppc.altivec.vrld
// CHECK-PPC: error: call to 'vec_rl' is ambiguous

  res_vull = vec_rl(vull, vull);
// CHECK: @llvm.ppc.altivec.vrld
// CHECK-LE: @llvm.ppc.altivec.vrld
// CHECK-PPC: error: call to 'vec_rl' is ambiguous

  /* vec_sl */
  res_vsll = vec_sl(vsll, vull);
// CHECK: shl <2 x i64>
// CHECK-LE: shl <2 x i64>
// CHECK-PPC: error: call to 'vec_sl' is ambiguous

  res_vull = vec_sl(vull, vull);
// CHECK: shl <2 x i64>
// CHECK-LE: shl <2 x i64>
// CHECK-PPC: error: call to 'vec_sl' is ambiguous

  /* vec_sr */
  res_vsll = vec_sr(vsll, vull);
// CHECK: ashr <2 x i64>
// CHECK-LE: ashr <2 x i64>
// CHECK-PPC: error: call to 'vec_sr' is ambiguous

  res_vull = vec_sr(vull, vull);
// CHECK: lshr <2 x i64>
// CHECK-LE: lshr <2 x i64>
// CHECK-PPC: error: call to 'vec_sr' is ambiguous

  /* vec_sra */
  res_vsll = vec_sra(vsll, vull);
// CHECK: ashr <2 x i64>
// CHECK-LE: ashr <2 x i64>
// CHECK-PPC: error: call to 'vec_sra' is ambiguous

  res_vull = vec_sra(vull, vull);
// CHECK: ashr <2 x i64>
// CHECK-LE: ashr <2 x i64>
// CHECK-PPC: error: call to 'vec_sra' is ambiguous

  /* vec_unpackh */
  res_vsll = vec_unpackh(vi);
// CHECK: llvm.ppc.altivec.vupkhsw
// CHECK-LE: llvm.ppc.altivec.vupklsw
// CHECK-PPC: error: call to 'vec_unpackh' is ambiguous

  res_vbll = vec_unpackh(vbi);
// CHECK: llvm.ppc.altivec.vupkhsw
// CHECK-LE: llvm.ppc.altivec.vupklsw
// CHECK-PPC: error: call to 'vec_unpackh' is ambiguous

  /* vec_unpackl */
  res_vsll = vec_unpackl(vi);
// CHECK: llvm.ppc.altivec.vupklsw
// CHECK-LE: llvm.ppc.altivec.vupkhsw
// CHECK-PPC: error: call to 'vec_unpackl' is ambiguous

  res_vbll = vec_unpackl(vbi);
// CHECK: llvm.ppc.altivec.vupklsw
// CHECK-LE: llvm.ppc.altivec.vupkhsw
// CHECK-PPC: error: call to 'vec_unpackl' is ambiguous

  /* vec_vpksdss */
  res_vi = vec_vpksdss(vsll, vsll);
// CHECK: llvm.ppc.altivec.vpksdss
// CHECK-LE: llvm.ppc.altivec.vpksdss
// CHECK-PPC: warning: implicit declaration of function 'vec_vpksdss'

  /* vec_vpksdus */
  res_vui = vec_vpksdus(vsll, vsll);
// CHECK: llvm.ppc.altivec.vpksdus
// CHECK-LE: llvm.ppc.altivec.vpksdus
// CHECK-PPC: warning: implicit declaration of function 'vec_vpksdus'

  /* vec_vpkudum */
  res_vi = vec_vpkudum(vsll, vsll);
// CHECK: vperm
// CHECK-LE: vperm
// CHECK-PPC: warning: implicit declaration of function 'vec_vpkudum'

  res_vui = vec_vpkudum(vull, vull);
// CHECK: vperm
// CHECK-LE: vperm

  res_vui = vec_vpkudus(vull, vull);
// CHECK: llvm.ppc.altivec.vpkudus
// CHECK-LE: llvm.ppc.altivec.vpkudus
// CHECK-PPC: warning: implicit declaration of function 'vec_vpkudus'

  /* vec_vupkhsw */
  res_vsll = vec_vupkhsw(vi);
// CHECK: llvm.ppc.altivec.vupkhsw
// CHECK-LE: llvm.ppc.altivec.vupklsw
// CHECK-PPC: warning: implicit declaration of function 'vec_vupkhsw'

  res_vbll = vec_vupkhsw(vbi);
// CHECK: llvm.ppc.altivec.vupkhsw
// CHECK-LE: llvm.ppc.altivec.vupklsw

  /* vec_vupklsw */
  res_vsll = vec_vupklsw(vi);
// CHECK: llvm.ppc.altivec.vupklsw
// CHECK-LE: llvm.ppc.altivec.vupkhsw
// CHECK-PPC: warning: implicit declaration of function 'vec_vupklsw'

  res_vbll = vec_vupklsw(vbi);
// CHECK: llvm.ppc.altivec.vupklsw
// CHECK-LE: llvm.ppc.altivec.vupkhsw

  /* vec_max */
  res_vsll = vec_max(vsll, vsll);
// CHECK: @llvm.ppc.altivec.vmaxsd
// CHECK-LE: @llvm.ppc.altivec.vmaxsd

  res_vsll = vec_max(vbll, vsll);
// CHECK: @llvm.ppc.altivec.vmaxsd
// CHECK-LE: @llvm.ppc.altivec.vmaxsd

  res_vsll = vec_max(vsll, vbll);
// CHECK: @llvm.ppc.altivec.vmaxsd
// CHECK-LE: @llvm.ppc.altivec.vmaxsd

  res_vull = vec_max(vull, vull);
// CHECK: @llvm.ppc.altivec.vmaxud
// CHECK-LE: @llvm.ppc.altivec.vmaxud

  res_vull = vec_max(vbll, vull);
// CHECK: @llvm.ppc.altivec.vmaxud
// CHECK-LE: @llvm.ppc.altivec.vmaxud

  /* vec_min */
  res_vsll = vec_min(vsll, vsll);
// CHECK: @llvm.ppc.altivec.vminsd
// CHECK-LE: @llvm.ppc.altivec.vminsd

  res_vsll = vec_min(vbll, vsll);
// CHECK: @llvm.ppc.altivec.vminsd
// CHECK-LE: @llvm.ppc.altivec.vminsd

  res_vsll = vec_min(vsll, vbll);
// CHECK: @llvm.ppc.altivec.vminsd
// CHECK-LE: @llvm.ppc.altivec.vminsd

  res_vull = vec_min(vull, vull);
// CHECK: @llvm.ppc.altivec.vminud
// CHECK-LE: @llvm.ppc.altivec.vminud

  res_vull = vec_min(vbll, vull);
// CHECK: @llvm.ppc.altivec.vminud
// CHECK-LE: @llvm.ppc.altivec.vminud

}
Пример #5
0
/* Function:  p7_MSVFilter()
 * Synopsis:  Calculates MSV score, vewy vewy fast, in limited precision.
 * Incept:    SRE, Wed Dec 26 15:12:25 2007 [Janelia]
 *
 * Purpose:   Calculates an approximation of the MSV score for sequence
 *            <dsq> of length <L> residues, using optimized profile <om>,
 *            and a preallocated one-row DP matrix <ox>. Return the 
 *            estimated MSV score (in nats) in <ret_sc>.
 *            
 *            Score may overflow (and will, on high-scoring
 *            sequences), but will not underflow.
 *            
 *            The model may be in any mode, because only its match
 *            emission scores will be used. The MSV filter inherently
 *            assumes a multihit local mode, and uses its own special
 *            state transition scores, not the scores in the profile.
 *
 * Args:      dsq     - digital target sequence, 1..L
 *            L       - length of dsq in residues          
 *            om      - optimized profile
 *            ox      - DP matrix
 *            ret_sc  - RETURN: MSV score (in nats)          
 *                      
 * Note:      We misuse the matrix <ox> here, using only a third of the
 *            first dp row, accessing it as <dp[0..Q-1]> rather than
 *            in triplets via <{MDI}MX(q)> macros, since we only need
 *            to store M state values. We know that if <ox> was big
 *            enough for normal DP calculations, it must be big enough
 *            to hold the MSVFilter calculation.
 *
 * Returns:   <eslOK> on success.
 *            <eslERANGE> if the score overflows the limited range; in
 *            this case, this is a high-scoring hit.
 *
 * Throws:    <eslEINVAL> if <ox> allocation is too small.
 */
int
p7_MSVFilter(const ESL_DSQ *dsq, int L, const P7_OPROFILE *om, P7_OMX *ox, float *ret_sc)
{
  vector unsigned char mpv;        /* previous row values                                       */
  vector unsigned char xEv;	   /* E state: keeps max for Mk->E as we go                     */
  vector unsigned char xBv;	   /* B state: splatted vector of B[i-1] for B->Mk calculations */
  vector unsigned char sv;	   /* temp storage of 1 curr row value in progress              */
  vector unsigned char biasv;	   /* emission bias in a vector                                 */
  uint8_t xJ;                      /* special states' scores                                    */
  int i;			   /* counter over sequence positions 1..L                      */
  int q;			   /* counter over vectors 0..nq-1                              */
  int Q        = p7O_NQB(om->M);   /* segment length: # of vectors                              */
  vector unsigned char *dp;	   /* we're going to use dp[0][0..q..Q-1], not {MDI}MX(q) macros*/
  vector unsigned char *rsc;	   /* will point at om->rbv[x] for residue x[i]                 */

  vector unsigned char zerov;	   /* vector of zeros                                           */
  vector unsigned char xJv;        /* vector for states score                                   */
  vector unsigned char tjbmv;       /* vector for B->Mk cost                                     */
  vector unsigned char tecv;       /* vector for E->C  cost                                     */
  vector unsigned char basev;      /* offset for scores                                         */
  vector unsigned char ceilingv;   /* saturateed simd value used to test for overflow           */
  vector unsigned char tempv;

  /* Check that the DP matrix is ok for us. */
  if (Q > ox->allocQ16)  ESL_EXCEPTION(eslEINVAL, "DP matrix allocated too small");
  ox->M   = om->M;

  /* Initialization. In offset unsigned arithmetic, -infinity is 0, and 0 is om->base.
   */
  dp  = ox->dpb[0];
  for (q = 0; q < Q; q++) dp[q] = vec_splat_u8(0);
  xJ   = 0;

  biasv = esl_vmx_set_u8(om->bias_b);
  zerov = vec_splat_u8(0);

  /* saturate simd register for overflow test */
  tempv = vec_splat_u8(1);
  ceilingv = (vector unsigned char)vec_cmpeq(biasv, biasv);
  ceilingv = vec_subs(ceilingv, biasv);
  ceilingv = vec_subs(ceilingv, tempv);

  basev = esl_vmx_set_u8((int8_t) om->base_b);

  tecv = esl_vmx_set_u8((int8_t) om->tec_b);
  tjbmv = esl_vmx_set_u8((int8_t) om->tjb_b + (int8_t) om->tbm_b);

  xJv = vec_subs(biasv, biasv);
  xBv = vec_subs(basev, tjbmv);

#if p7_DEBUGGING
  if (ox->debugging)
	{
	  unsigned char xB;
	  vec_ste(xBv, 0, &xB);
	  vec_ste(xJv, 0, &xJ);
	  p7_omx_DumpMFRow(ox, 0, 0, 0, xJ, xB, xJ);
	}
#endif

  for (i = 1; i <= L; i++)
  {
      rsc = om->rbv[dsq[i]];
      xEv = vec_splat_u8(0);
//      xBv = vec_sub(xBv, tbmv);

      /* Right shifts by 1 byte. 4,8,12,x becomes x,4,8,12. 
       * Because ia32 is littlendian, this means a left bit shift.
       * Zeros shift on automatically, which is our -infinity.
       */
      mpv = vec_sld(zerov, dp[Q-1], 15);   
      for (q = 0; q < Q; q++)
      {
        /* Calculate new MMXo(i,q); don't store it yet, hold it in sv. */
        sv   = vec_max(mpv, xBv);
        sv   = vec_adds(sv, biasv);
        sv   = vec_subs(sv, *rsc);   rsc++;
        xEv  = vec_max(xEv, sv);

        mpv   = dp[q];   	  /* Load {MDI}(i-1,q) into mpv */
        dp[q] = sv;       	  /* Do delayed store of M(i,q) now that memory is usable */
      }

      /* Now the "special" states, which start from Mk->E (->C, ->J->B)
       * Use rotates instead of shifts so when the last max has completed,
       * all elements of the simd register will contain the max value.
       */
      tempv = vec_sld(xEv, xEv, 1);
      xEv = vec_max(xEv, tempv);
      tempv = vec_sld(xEv, xEv, 2);
      xEv = vec_max(xEv, tempv);
      tempv = vec_sld(xEv, xEv, 4);
      xEv = vec_max(xEv, tempv);
      tempv = vec_sld(xEv, xEv, 8);
      xEv = vec_max(xEv, tempv);

      /* immediately detect overflow */
      if (vec_any_gt(xEv, ceilingv))
      {
        *ret_sc = eslINFINITY;
        return eslERANGE;
      }

      xEv = vec_subs(xEv, tecv);
      xJv = vec_max(xJv,xEv);

      xBv = vec_max(basev, xJv);
      xBv = vec_subs(xBv, tjbmv);
	  
#if p7_DEBUGGING
      if (ox->debugging)
      {
        unsigned char xB, xE;
        vec_ste(xBv, 0, &xB);
        vec_ste(xEv, 0, &xE);
        vec_ste(xJv, 0, &xJ);
        p7_omx_DumpMFRow(ox, i, xE, 0, xJ, xB, xJ);
      }
#endif
  } /* end loop over sequence residues 1..L */

  /* finally C->T, and add our missing precision on the NN,CC,JJ back */
  vec_ste(xJv, 0, &xJ);
  *ret_sc = ((float) (xJ - om->tjb_b) - (float) om->base_b);
  *ret_sc /= om->scale_b;
  *ret_sc -= 3.0; /* that's ~ L \log \frac{L}{L+3}, for our NN,CC,JJ */

  return eslOK;
}
Пример #6
0
/* Function:  p7_SSVFilter_longtarget()
 * Synopsis:  Finds windows with SSV scores above some threshold (vewy vewy fast, in limited precision)
 *
 * Purpose:   Calculates an approximation of the SSV (single ungapped diagonal)
 *            score for regions of sequence <dsq> of length <L> residues, using
 *            optimized profile <om>, and a preallocated one-row DP matrix <ox>,
 *            and captures the positions at which such regions exceed the score
 *            required to be significant in the eyes of the calling function,
 *            which depends on the <bg> and <p> (usually p=0.02 for nhmmer).
 *            Note that this variant performs only SSV computations, never
 *            passing through the J state - the score required to pass SSV at
 *            the default threshold (or less restrictive) is sufficient to
 *            pass MSV in essentially all DNA models we've tested.
 *
 *            Above-threshold diagonals are captured into a preallocated list
 *            <windowlist>. Rather than simply capturing positions at which a
 *            score threshold is reached, this function establishes windows
 *            around those high-scoring positions, using scores in <msvdata>.
 *            These windows can be merged by the calling function.
 *
 *
 * Args:      dsq     - digital target sequence, 1..L
 *            L       - length of dsq in residues
 *            om      - optimized profile
 *            ox      - DP matrix
 *            msvdata    - compact representation of substitution scores, for backtracking diagonals
 *            bg         - the background model, required for translating a P-value threshold into a score threshold
 *            P          - p-value below which a region is captured as being above threshold
 *            windowlist - preallocated container for all hits (resized if necessary)
 *
 *
 * Note:      We misuse the matrix <ox> here, using only a third of the
 *            first dp row, accessing it as <dp[0..Q-1]> rather than
 *            in triplets via <{MDI}MX(q)> macros, since we only need
 *            to store M state values. We know that if <ox> was big
 *            enough for normal DP calculations, it must be big enough
 *            to hold the MSVFilter calculation.
 *
 * Returns:   <eslOK> on success.
 *
 * Throws:    <eslEINVAL> if <ox> allocation is too small.
 */
int
p7_SSVFilter_longtarget(const ESL_DSQ *dsq, int L, P7_OPROFILE *om, P7_OMX *ox, const P7_SCOREDATA *ssvdata,
                        P7_BG *bg, double P, P7_HMM_WINDOWLIST *windowlist)
{

  vector unsigned char mpv;        /* previous row values                                       */
  vector unsigned char xEv;		   /* E state: keeps max for Mk->E as we go                     */
  vector unsigned char xBv;		   /* B state: splatted vector of B[i-1] for B->Mk calculations */
  vector unsigned char sv;		   /* temp storage of 1 curr row value in progress              */
  vector unsigned char biasv;	   /* emission bias in a vector                                 */
  uint8_t  xJ;                 /* special states' scores                                    */
  int i;			           /* counter over sequence positions 1..L                      */
  int q;			           /* counter over vectors 0..nq-1                              */
  int Q        = p7O_NQB(om->M);   /* segment length: # of vectors                              */
  vector unsigned char *dp  = ox->dpb[0];	   /* we're going to use dp[0][0..q..Q-1], not {MDI}MX(q) macros*/
  vector unsigned char *rsc;			        /* will point at om->rbv[x] for residue x[i]                 */

  vector unsigned char zerov;	   /* vector of zeros                                           */
  vector unsigned char tecv;                    /* vector for E->C  cost                                     */
  vector unsigned char tjbmv;                    /* vector for [JN]->B->M move cost                                  */
  vector unsigned char basev;                   /* offset for scores                                         */

  int status;

  int k;
  int n;
  int end;
  int rem_sc;
  int start;
  int target_end;
  int target_start;
  int max_end;
  int max_sc;
  int sc;
  int pos_since_max;
  float ret_sc;

  union { vector unsigned char v; uint8_t b[16]; } u;


  /*
   * Computing the score required to let P meet the F1 prob threshold
   * In original code, converting from a scaled int MSV
   * score S (the score getting to state E) to a probability goes like this:
   *  usc =  S - om->tec_b - om->tjb_b - om->base_b;
   *  usc /= om->scale_b;
   *  usc -= 3.0;
   *  P = f ( (usc - nullsc) / eslCONST_LOG2 , mu, lambda)
   * and we're computing the threshold usc, so reverse it:
   *  (usc - nullsc) /  eslCONST_LOG2 = inv_f( P, mu, lambda)
   *  usc = nullsc + eslCONST_LOG2 * inv_f( P, mu, lambda)
   *  usc += 3
   *  usc *= om->scale_b
   *  S = usc + om->tec_b + om->tjb_b + om->base_b
   *
   *  Here, I compute threshold with length model based on max_length.  Doesn't
   *  matter much - in any case, both the bg and om models will change with roughly
   *  1 bit for each doubling of the length model, so they offset.
   */
  float nullsc;
  float invP = esl_gumbel_invsurv(P, om->evparam[p7_MMU],  om->evparam[p7_MLAMBDA]);
  vector unsigned char sc_threshv;               /* pushes value to saturation if it's above pthresh  */
  int sc_thresh;

  /* Check that the DP matrix is ok for us. */
  if (Q > ox->allocQ16)  ESL_EXCEPTION(eslEINVAL, "DP matrix allocated too small");
  ox->M   = om->M;


  p7_bg_SetLength(bg, om->max_length);
  p7_oprofile_ReconfigMSVLength(om, om->max_length);
  p7_bg_NullOne  (bg, dsq, om->max_length, &nullsc);

  sc_thresh = (int) ceil( ( ( nullsc  + (invP * eslCONST_LOG2) + 3.0 )  * om->scale_b ) + om->base_b +  om->tec_b  + om->tjb_b  );
  sc_threshv = esl_vmx_set_u8( (int8_t)sc_thresh - 1);


  /* Initialization. In offset unsigned arithmetic, -infinity is 0, and 0 is om->base.
   */
  biasv = esl_vmx_set_u8(om->bias_b);
  for (q = 0; q < Q; q++) dp[q] = vec_splat_u8(0);
  xJ   = 0;
  zerov = vec_splat_u8(0);


  basev = esl_vmx_set_u8((int8_t) om->base_b);
  tecv = esl_vmx_set_u8((int8_t) om->tec_b);
  tjbmv = esl_vmx_set_u8((int8_t) om->tjb_b + (int8_t) om->tbm_b);

  xBv = vec_subs(basev, tjbmv);

  for (i = 1; i <= L; i++) {
	  rsc = om->rbv[dsq[i]];
    xEv = vec_splat_u8(0);

	  /* Right shifts by 1 byte. 4,8,12,x becomes x,4,8,12.
	   * Because ia32 is littlendian, this means a left bit shift.
	   * Zeros shift on automatically, which is our -infinity.
	   */
    mpv = vec_sld(zerov, dp[Q-1], 15);
	  for (q = 0; q < Q; q++)
	  {
		  /* Calculate new MMXo(i,q); don't store it yet, hold it in sv. */
		  sv   = vec_max(mpv, xBv);
		  sv   = vec_adds(sv, biasv);
		  sv   = vec_subs(sv, *rsc);   rsc++;
		  xEv  = vec_max(xEv, sv);

		  mpv   = dp[q];   	  /* Load {MDI}(i-1,q) into mpv */
		  dp[q] = sv;       	  /* Do delayed store of M(i,q) now that memory is usable */
	  }


	  if (vec_any_gt(xEv, sc_threshv) ) { //hit pthresh, so add position to list and reset values
      //figure out which model state hit threshold
      end = -1;
      rem_sc = -1;
      for (q = 0; q < Q; q++) {  /// Unpack and unstripe, so we can find the state that exceeded pthresh
          u.v = dp[q];
          for (k = 0; k < 16; k++) { // unstripe
            //(q+Q*k+1) is the model position k at which the xE score is found
            if (u.b[k] >= sc_thresh && u.b[k] > rem_sc && (q+Q*k+1) <= om->M) {
              end = (q+Q*k+1);
              rem_sc = u.b[k];
            }
          }
          dp[q] = vec_splat_u8(0); // while we're here ... this will cause values to get reset to xB in next dp iteration
      }

      //recover the diagonal that hit threshold
      start = end;
      target_end = target_start = i;
      sc = rem_sc;
      while (rem_sc > om->base_b - om->tjb_b - om->tbm_b) {
        rem_sc -= om->bias_b -  ssvdata->ssv_scores[start*om->abc->Kp + dsq[target_start]];
        --start;
        --target_start;
        //if ( start == 0 || target_start==0)    break;
      }
      start++;
      target_start++;



      //extend diagonal further with single diagonal extension
      k = end+1;
      n = target_end+1;
      max_end = target_end;
      max_sc = sc;
      pos_since_max = 0;
      while (k<om->M && n<=L) {
        sc += om->bias_b -  ssvdata->ssv_scores[k*om->abc->Kp + dsq[n]];
        if (sc >= max_sc) {
          max_sc = sc;
          max_end = n;
          pos_since_max=0;
        } else {
          pos_since_max++;
          if (pos_since_max == 5)
            break;
        }
        k++;
        n++;
      }

      end  +=  (max_end - target_end);
      target_end = max_end;

      ret_sc = ((float) (max_sc - om->tjb_b) - (float) om->base_b);
      ret_sc /= om->scale_b;
      ret_sc -= 3.0; // that's ~ L \log \frac{L}{L+3}, for our NN,CC,JJ

      p7_hmmwindow_new(  windowlist,
                         0,                  // sequence_id; used in the FM-based filter, but not here
                         target_start,       // position in the target at which the diagonal starts
                         0,                  // position in the target fm_index at which diagonal starts;  not used here, just in FM-based filter
                         end,                // position in the model at which the diagonal ends
                         end-start+1 ,       // length of diagonal
                         ret_sc,             // score of diagonal
                         p7_NOCOMPLEMENT,    // always p7_NOCOMPLEMENT here;  varies in FM-based filter
                         L
                       );



      i = target_end; // skip forward


	  }

  } /* end loop over sequence residues 1..L */

  return eslOK;


  ERROR:
  ESL_EXCEPTION(eslEMEM, "Error allocating memory for hit list\n");

}
Пример #7
0
/* Function:  p7_ViterbiFilter_longtarget()
 * Synopsis:  Finds windows within potentially long sequence blocks with Viterbi
 *            scores above threshold (vewy vewy fast, in limited precision)
 *
 * Purpose:   Calculates an approximation of the Viterbi score for regions
 *            of sequence <dsq>, using optimized profile <om>, and a pre-
 *            allocated one-row DP matrix <ox>, and captures the positions
 *            at which such regions exceed the score required to be
 *            significant in the eyes of the calling function (usually
 *            p=0.001).
 *
 *            The resulting landmarks are converted to subsequence
 *            windows by the calling function
 *
 *            The model must be in a local alignment mode; other modes
 *            cannot provide the necessary guarantee of no underflow.
 *
 *            This is a striped SIMD Viterbi implementation using Intel
 *            VMX integer intrinsics \citep{Farrar07}, in reduced
 *            precision (signed words, 16 bits).
 *
 * Args:      dsq     - digital target sequence, 1..L
 *            L       - length of dsq in residues
 *            om      - optimized profile
 *            ox      - DP matrix
 *            filtersc   - null or bias correction, required for translating a P-value threshold into a score threshold
 *            P          - p-value below which a region is captured as being above threshold
 *            windowlist - RETURN: array of hit windows (start and end of diagonal) for the above-threshold areas
 *
 * Returns:   <eslOK> on success;
 *
 * Throws:    <eslEINVAL> if <ox> allocation is too small, or if
 *            profile isn't in a local alignment mode. (Must be in local
 *            alignment mode because that's what helps us guarantee
 *            limited dynamic range.)
 *
 * Xref:      See p7_ViterbiFilter()
 */
int
p7_ViterbiFilter_longtarget(const ESL_DSQ *dsq, int L, const P7_OPROFILE *om, P7_OMX *ox,
                            float filtersc, double P, P7_HMM_WINDOWLIST *windowlist)
{
  vector signed short mpv, dpv, ipv; /* previous row values                                       */
  vector signed short sv;      /* temp storage of 1 curr row value in progress              */
  vector signed short dcv;       /* delayed storage of D(i,q+1)                               */
  vector signed short xEv;       /* E state: keeps max for Mk->E as we go                     */
  vector signed short xBv;       /* B state: splatted vector of B[i-1] for B->Mk calculations */
  vector signed short Dmaxv;         /* keeps track of maximum D cell on row                      */
  int16_t  xE, xB, xC, xJ, xN;       /* special states' scores                                    */
  int16_t  Dmax;         /* maximum D cell score on row                               */
  int i;           /* counter over sequence positions 1..L                      */
  int q;           /* counter over vectors 0..nq-1                              */
  int Q          =  p7O_NQW(om->M);                             /* segment length: # of vectors                              */
  vector signed short *dp  =  ox->dpw[0];    /* using {MDI}MX(q) macro requires initialization of <dp>    */
  vector signed short *rsc;      /* will point at om->ru[x] for residue x[i]                  */
  vector signed short *tsc;      /* will point into (and step thru) om->tu                    */

  vector signed short negInfv;

  int16_t sc_thresh;
  float invP;

  int z;
  union { vector signed short v; int16_t i[8]; } tmp;
  windowlist->count = 0;

  /*
   *  In p7_ViterbiFilter, converting from a scaled int Viterbi score
   *  S (aka xE the score getting to state E) to a probability
   *  goes like this:
   *    vsc =  S + om->xw[p7O_E][p7O_MOVE] + om->xw[p7O_C][p7O_MOVE] - om->base_w
   *    ret_sc /= om->scale_w;
   *    vsc -= 3.0;
   *    P  = esl_gumbel_surv((vfsc - filtersc) / eslCONST_LOG2  ,  om->evparam[p7_VMU],  om->evparam[p7_VLAMBDA]);
   *  and we're computing the threshold vsc, so invert it:
   *    (vsc - filtersc) /  eslCONST_LOG2 = esl_gumbel_invsurv( P, om->evparam[p7_VMU],  om->evparam[p7_VLAMBDA])
   *    vsc = filtersc + eslCONST_LOG2 * esl_gumbel_invsurv( P, om->evparam[p7_VMU],  om->evparam[p7_VLAMBDA])
   *    vsc += 3.0
   *    vsc *= om->scale_w
   *    S = vsc - (float)om->xw[p7O_E][p7O_MOVE] - (float)om->xw[p7O_C][p7O_MOVE] + (float)om->base_w
   */
  invP = esl_gumbel_invsurv(P, om->evparam[p7_VMU],  om->evparam[p7_VLAMBDA]);
  sc_thresh =   (int) ceil ( ( (filtersc + (eslCONST_LOG2 * invP) + 3.0) * om->scale_w )
                - (float)om->xw[p7O_E][p7O_MOVE] - (float)om->xw[p7O_C][p7O_MOVE] + (float)om->base_w );


  /* Check that the DP matrix is ok for us. */
  if (Q > ox->allocQ8)                                 ESL_EXCEPTION(eslEINVAL, "DP matrix allocated too small");
  if (om->mode != p7_LOCAL && om->mode != p7_UNILOCAL) ESL_EXCEPTION(eslEINVAL, "Fast filter only works for local alignment");
  ox->M   = om->M;

  negInfv = esl_vmx_set_s16((signed short)-32768);

  /* Initialization. In unsigned arithmetic, -infinity is -32768
   */
  for (q = 0; q < Q; q++)
    MMXo(q) = IMXo(q) = DMXo(q) = negInfv;
  xN   = om->base_w;
  xB   = xN + om->xw[p7O_N][p7O_MOVE];
  xJ   = -32768;
  xC   = -32768;
  xE   = -32768;

#if p7_DEBUGGING
  if (ox->debugging) p7_omx_DumpVFRow(ox, 0, xE, 0, xJ, xB, xC); /* first 0 is <rowi>: do header. second 0 is xN: always 0 here. */
#endif

  for (i = 1; i <= L; i++)
    {
      rsc   = om->rwv[dsq[i]];
      tsc   = om->twv;
      dcv   = negInfv;               /* "-infinity" */
      xEv   = negInfv;
      Dmaxv = negInfv;
      xBv   = esl_vmx_set_s16(xB);

      /* Right shifts by 1 value (2 bytes). 4,8,12,x becomes x,4,8,12.
       * Because ia32 is littlendian, this means a left bit shift.
       * Zeros shift on automatically; replace it with -32768.
       */
      mpv = MMXo(Q-1);  mpv = vec_sld(negInfv, mpv, 14);
      dpv = DMXo(Q-1);  dpv = vec_sld(negInfv, dpv, 14);
      ipv = IMXo(Q-1);  ipv = vec_sld(negInfv, ipv, 14);

      for (q = 0; q < Q; q++)
      {
        /* Calculate new MMXo(i,q); don't store it yet, hold it in sv. */
        sv   =              vec_adds(xBv, *tsc);  tsc++;
        sv   = vec_max (sv, vec_adds(mpv, *tsc)); tsc++;
        sv   = vec_max (sv, vec_adds(ipv, *tsc)); tsc++;
        sv   = vec_max (sv, vec_adds(dpv, *tsc)); tsc++;
        sv   = vec_adds(sv, *rsc);                rsc++;
        xEv  = vec_max(xEv, sv);

        /* Load {MDI}(i-1,q) into mpv, dpv, ipv;
         * {MDI}MX(q) is then the current, not the prev row
         */
        mpv = MMXo(q);
        dpv = DMXo(q);
        ipv = IMXo(q);

        /* Do the delayed stores of {MD}(i,q) now that memory is usable */
        MMXo(q) = sv;
        DMXo(q) = dcv;

        /* Calculate the next D(i,q+1) partially: M->D only;
               * delay storage, holding it in dcv
         */
        dcv   = vec_adds(sv, *tsc);  tsc++;
        Dmaxv = vec_max(dcv, Dmaxv);

        /* Calculate and store I(i,q) */
        sv     =             vec_adds(mpv, *tsc);  tsc++;
        IMXo(q)= vec_max(sv, vec_adds(ipv, *tsc)); tsc++;
      }

      /* Now the "special" states, which start from Mk->E (->C, ->J->B) */
      xE = esl_vmx_hmax_s16(xEv);

      if (xE >= sc_thresh) {
        //hit score threshold. Add a window to the list, then reset scores.

        /* Unpack and unstripe, then find the position responsible for the hit */

        for (q = 0; q < Q; q++) {
          tmp.v = MMXo(q);
          for (z = 0; z < 8; z++)  { // unstripe
            if ( tmp.i[z] == xE && (q+Q*z+1) <= om->M) {
              // (q+Q*z+1) is the model position k at which the xE score is found
              p7_hmmwindow_new(windowlist, 0, i, 0, (q+Q*z+1), 1, 0.0, p7_NOCOMPLEMENT );
            }
          }
          MMXo(q) = IMXo(q) = DMXo(q) = negInfv; //reset score to start search for next vit window.
        }

      } else {

          xN = xN + om->xw[p7O_N][p7O_LOOP];
          xC = ESL_MAX(xC + om->xw[p7O_C][p7O_LOOP], xE + om->xw[p7O_E][p7O_MOVE]);
          xJ = ESL_MAX(xJ + om->xw[p7O_J][p7O_LOOP], xE + om->xw[p7O_E][p7O_LOOP]);
          xB = ESL_MAX(xJ + om->xw[p7O_J][p7O_MOVE], xN + om->xw[p7O_N][p7O_MOVE]);
          /* and now xB will carry over into next i, and xC carries over after i=L */

          /* Finally the "lazy F" loop (sensu [Farrar07]). We can often
           * prove that we don't need to evaluate any D->D paths at all.
           *
           * The observation is that if we can show that on the next row,
           * B->M(i+1,k) paths always dominate M->D->...->D->M(i+1,k) paths
           * for all k, then we don't need any D->D calculations.
           *
           * The test condition is:
           *      max_k D(i,k) + max_k ( TDD(k-2) + TDM(k-1) - TBM(k) ) < xB(i)
           * So:
           *   max_k (TDD(k-2) + TDM(k-1) - TBM(k)) is precalc'ed in om->dd_bound;
           *   max_k D(i,k) is why we tracked Dmaxv;
           *   xB(i) was just calculated above.
           */
          Dmax = esl_vmx_hmax_s16(Dmaxv);
          if (Dmax + om->ddbound_w > xB)
          {
            /* Now we're obligated to do at least one complete DD path to be sure. */
            /* dcv has carried through from end of q loop above */
            dcv = vec_sld(negInfv, dcv, 14);
            tsc = om->twv + 7*Q;  /* set tsc to start of the DD's */
            for (q = 0; q < Q; q++)
              {
                DMXo(q) = vec_max(dcv, DMXo(q));
                dcv     = vec_adds(DMXo(q), *tsc); tsc++;
              }

            /* We may have to do up to three more passes; the check
             * is for whether crossing a segment boundary can improve
             * our score.
             */
            do {
              dcv = vec_sld(negInfv, dcv, 14);
              tsc = om->twv + 7*Q;  /* set tsc to start of the DD's */
              for (q = 0; q < Q; q++)
                {
            if (! vec_any_gt(dcv, DMXo(q))) break;
            DMXo(q) = vec_max(dcv, DMXo(q));
            dcv     = vec_adds(DMXo(q), *tsc);   tsc++;
                }
            } while (q == Q);
          }
          else  /* not calculating DD? then just store the last M->D vector calc'ed.*/
            DMXo(0) = vec_sld(negInfv, dcv, 14);

#if p7_DEBUGGING
          if (ox->debugging) p7_omx_DumpVFRow(ox, i, xE, 0, xJ, xB, xC);
#endif

      }
    } /* end loop over sequence residues 1..L */


  return eslOK;
}