Example #1
0
inline static unsigned int
count_leading_zeros (UTItype x)
{
  qword c = si_clz (*(qword *) & x);
  qword cmp0 = si_cgti (c, 31);
  qword cmp1 = si_and (cmp0, si_shlqbyi (cmp0, 4));
  qword cmp2 = si_and (cmp1, si_shlqbyi (cmp0, 8));
  qword s = si_a (c, si_and (cmp0, si_shlqbyi (c, 4)));
  s = si_a (s, si_and (cmp1, si_shlqbyi (c, 8)));
  s = si_a (s, si_and (cmp2, si_shlqbyi (c, 12)));
  return si_to_uint (s);
}
Example #2
0
UTItype
__udivmodti4 (UTItype num, UTItype den, UTItype * rp)
{
  qword shift =
    si_from_uint (count_leading_zeros (den) - count_leading_zeros (num));
  qword n0 = si_from_UTItype (num);
  qword d0 = si_from_UTItype (den);
  qword bit = si_andi (si_fsmbi (1), 1);
  qword r0 = si_il (0);
  qword m1 = si_fsmbi (0x000f);
  qword mask, r1, n1;

  d0 = si_shlqbybi (si_shlqbi (d0, shift), shift);
  bit = si_shlqbybi (si_shlqbi (bit, shift), shift);

  do
    {
      r1 = si_or (r0, bit);

      // n1 = n0 - d0 in TImode
      n1 = si_bg (d0, n0);
      n1 = si_shlqbyi (n1, 4);
      n1 = si_sf (m1, n1);
      n1 = si_bgx (d0, n0, n1);
      n1 = si_shlqbyi (n1, 4);
      n1 = si_sf (m1, n1);
      n1 = si_bgx (d0, n0, n1);
      n1 = si_shlqbyi (n1, 4);
      n1 = si_sf (m1, n1);
      n1 = si_sfx (d0, n0, n1);

      mask = si_fsm (si_cgti (n1, -1));
      r0 = si_selb (r0, r1, mask);
      n0 = si_selb (n0, n1, mask);
      bit = si_rotqmbii (bit, -1);
      d0 = si_rotqmbii (d0, -1);
    }
  while (si_to_uint (si_orx (bit)));
  if (rp)
    *rp = si_to_UTItype (n0);
  return si_to_UTItype (r0);
}
Example #3
0
/**
 * Sort vertices from top to bottom.
 * Compute area and determine front vs. back facing.
 * Do coarse clip test against tile bounds
 * \return  FALSE if tri is totally outside tile, TRUE otherwise
 */
static boolean
setup_sort_vertices(const qword vs)
{
   float area, sign;

#if DEBUG_VERTS
   if (spu.init.id==0) {
      fprintf(stderr, "SPU %u: Triangle:\n", spu.init.id);
      print_vertex(v0);
      print_vertex(v1);
      print_vertex(v2);
   }
#endif

   {
      /* Load the float values for various processing... */
      const qword f0 = (qword)(((const struct vertex_header*)si_to_ptr(vs))->data[0]);
      const qword f1 = (qword)(((const struct vertex_header*)si_to_ptr(si_rotqbyi(vs, 4)))->data[0]);
      const qword f2 = (qword)(((const struct vertex_header*)si_to_ptr(si_rotqbyi(vs, 8)))->data[0]);

      /* Check if triangle is completely outside the tile bounds
       * Find the min and max x and y positions of the three poits */
      const qword minf = min3fq(f0, f1, f2);
      const qword maxf = max3fq(f0, f1, f2);

      /* Compare min and max against cliprect vals */
      const qword maxsmins = si_shufb(maxf, minf, SHUFB4(A,B,a,b));
      const qword outside = si_fcgt(maxsmins, si_csflt(setup.cliprect, 0));

      /* Use a little magic to work out of the tri is visible or not */
      if(si_to_uint(si_xori(si_gb(outside), 0xc))) return FALSE;

      /* determine bottom to top order of vertices */
      /* A table of shuffle patterns for putting vertex_header pointers into
         correct order.  Quite magical. */
      const qword sort_order_patterns[] = {
         SHUFB4(A,B,C,C),
         SHUFB4(C,A,B,C),
         SHUFB4(A,C,B,C),
         SHUFB4(B,C,A,C),
         SHUFB4(B,A,C,C),
         SHUFB4(C,B,A,C) };

      /* Collate y values into two vectors for comparison.
         Using only one shuffle constant! ;) */
      const qword y_02_ = si_shufb(f0, f2, SHUFB4(0,B,b,C));
      const qword y_10_ = si_shufb(f1, f0, SHUFB4(0,B,b,C));
      const qword y_012 = si_shufb(y_02_, f1, SHUFB4(0,B,b,C));
      const qword y_120 = si_shufb(y_10_, f2, SHUFB4(0,B,b,C));

      /* Perform comparison: {y0,y1,y2} > {y1,y2,y0} */
      const qword compare = si_fcgt(y_012, y_120);
      /* Compress the result of the comparison into 4 bits */
      const qword gather = si_gb(compare);
      /* Subtract one to attain the index into the LUT.  Magical. */
      const unsigned int index = si_to_uint(gather) - 1;

      /* Load the appropriate pattern and construct the desired vector. */
      setup.vertex_headers = si_shufb(vs, vs, sort_order_patterns[index]);

      /* Using the result of the comparison, set sign.
         Very magical. */
      sign = ((si_to_uint(si_cntb(gather)) == 2) ? 1.0f : -1.0f);
   }

   setup.ebot.ds = spu_sub(setup.vmid->data[0], setup.vmin->data[0]);
   setup.emaj.ds = spu_sub(setup.vmax->data[0], setup.vmin->data[0]);
   setup.etop.ds = spu_sub(setup.vmax->data[0], setup.vmid->data[0]);

   /*
    * Compute triangle's area.  Use 1/area to compute partial
    * derivatives of attributes later.
    */
   area = setup.emaj.dx * setup.ebot.dy - setup.ebot.dx * setup.emaj.dy;

   setup.oneOverArea = 1.0f / area;

   /* The product of area * sign indicates front/back orientation (0/1).
    * Just in case someone gets the bright idea of switching the front
    * and back constants without noticing that we're assuming their
    * values in this operation, also assert that the values are
    * what we think they are.
    */
   ASSERT(CELL_FACING_FRONT == 0);
   ASSERT(CELL_FACING_BACK == 1);
   setup.facing = (area * sign > 0.0f)
      ^ (spu.rasterizer.front_winding == PIPE_WINDING_CW);

   return TRUE;
}