inline static unsigned int count_leading_zeros (UTItype x) { qword c = si_clz (*(qword *) & x); qword cmp0 = si_cgti (c, 31); qword cmp1 = si_and (cmp0, si_shlqbyi (cmp0, 4)); qword cmp2 = si_and (cmp1, si_shlqbyi (cmp0, 8)); qword s = si_a (c, si_and (cmp0, si_shlqbyi (c, 4))); s = si_a (s, si_and (cmp1, si_shlqbyi (c, 8))); s = si_a (s, si_and (cmp2, si_shlqbyi (c, 12))); return si_to_uint (s); }
UTItype __udivmodti4 (UTItype num, UTItype den, UTItype * rp) { qword shift = si_from_uint (count_leading_zeros (den) - count_leading_zeros (num)); qword n0 = si_from_UTItype (num); qword d0 = si_from_UTItype (den); qword bit = si_andi (si_fsmbi (1), 1); qword r0 = si_il (0); qword m1 = si_fsmbi (0x000f); qword mask, r1, n1; d0 = si_shlqbybi (si_shlqbi (d0, shift), shift); bit = si_shlqbybi (si_shlqbi (bit, shift), shift); do { r1 = si_or (r0, bit); // n1 = n0 - d0 in TImode n1 = si_bg (d0, n0); n1 = si_shlqbyi (n1, 4); n1 = si_sf (m1, n1); n1 = si_bgx (d0, n0, n1); n1 = si_shlqbyi (n1, 4); n1 = si_sf (m1, n1); n1 = si_bgx (d0, n0, n1); n1 = si_shlqbyi (n1, 4); n1 = si_sf (m1, n1); n1 = si_sfx (d0, n0, n1); mask = si_fsm (si_cgti (n1, -1)); r0 = si_selb (r0, r1, mask); n0 = si_selb (n0, n1, mask); bit = si_rotqmbii (bit, -1); d0 = si_rotqmbii (d0, -1); } while (si_to_uint (si_orx (bit))); if (rp) *rp = si_to_UTItype (n0); return si_to_UTItype (r0); }
/** * Sort vertices from top to bottom. * Compute area and determine front vs. back facing. * Do coarse clip test against tile bounds * \return FALSE if tri is totally outside tile, TRUE otherwise */ static boolean setup_sort_vertices(const qword vs) { float area, sign; #if DEBUG_VERTS if (spu.init.id==0) { fprintf(stderr, "SPU %u: Triangle:\n", spu.init.id); print_vertex(v0); print_vertex(v1); print_vertex(v2); } #endif { /* Load the float values for various processing... */ const qword f0 = (qword)(((const struct vertex_header*)si_to_ptr(vs))->data[0]); const qword f1 = (qword)(((const struct vertex_header*)si_to_ptr(si_rotqbyi(vs, 4)))->data[0]); const qword f2 = (qword)(((const struct vertex_header*)si_to_ptr(si_rotqbyi(vs, 8)))->data[0]); /* Check if triangle is completely outside the tile bounds * Find the min and max x and y positions of the three poits */ const qword minf = min3fq(f0, f1, f2); const qword maxf = max3fq(f0, f1, f2); /* Compare min and max against cliprect vals */ const qword maxsmins = si_shufb(maxf, minf, SHUFB4(A,B,a,b)); const qword outside = si_fcgt(maxsmins, si_csflt(setup.cliprect, 0)); /* Use a little magic to work out of the tri is visible or not */ if(si_to_uint(si_xori(si_gb(outside), 0xc))) return FALSE; /* determine bottom to top order of vertices */ /* A table of shuffle patterns for putting vertex_header pointers into correct order. Quite magical. */ const qword sort_order_patterns[] = { SHUFB4(A,B,C,C), SHUFB4(C,A,B,C), SHUFB4(A,C,B,C), SHUFB4(B,C,A,C), SHUFB4(B,A,C,C), SHUFB4(C,B,A,C) }; /* Collate y values into two vectors for comparison. Using only one shuffle constant! ;) */ const qword y_02_ = si_shufb(f0, f2, SHUFB4(0,B,b,C)); const qword y_10_ = si_shufb(f1, f0, SHUFB4(0,B,b,C)); const qword y_012 = si_shufb(y_02_, f1, SHUFB4(0,B,b,C)); const qword y_120 = si_shufb(y_10_, f2, SHUFB4(0,B,b,C)); /* Perform comparison: {y0,y1,y2} > {y1,y2,y0} */ const qword compare = si_fcgt(y_012, y_120); /* Compress the result of the comparison into 4 bits */ const qword gather = si_gb(compare); /* Subtract one to attain the index into the LUT. Magical. */ const unsigned int index = si_to_uint(gather) - 1; /* Load the appropriate pattern and construct the desired vector. */ setup.vertex_headers = si_shufb(vs, vs, sort_order_patterns[index]); /* Using the result of the comparison, set sign. Very magical. */ sign = ((si_to_uint(si_cntb(gather)) == 2) ? 1.0f : -1.0f); } setup.ebot.ds = spu_sub(setup.vmid->data[0], setup.vmin->data[0]); setup.emaj.ds = spu_sub(setup.vmax->data[0], setup.vmin->data[0]); setup.etop.ds = spu_sub(setup.vmax->data[0], setup.vmid->data[0]); /* * Compute triangle's area. Use 1/area to compute partial * derivatives of attributes later. */ area = setup.emaj.dx * setup.ebot.dy - setup.ebot.dx * setup.emaj.dy; setup.oneOverArea = 1.0f / area; /* The product of area * sign indicates front/back orientation (0/1). * Just in case someone gets the bright idea of switching the front * and back constants without noticing that we're assuming their * values in this operation, also assert that the values are * what we think they are. */ ASSERT(CELL_FACING_FRONT == 0); ASSERT(CELL_FACING_BACK == 1); setup.facing = (area * sign > 0.0f) ^ (spu.rasterizer.front_winding == PIPE_WINDING_CW); return TRUE; }