/** * @v * @vth */ inline vec_float4 updateF(vec_float4 v,vec_float4 vth) { vec_float4 updateF_rtn; updateF_rtn = spu_sel(updateF_rtn,1.0,spu_cmpgt(spu_sub(v,vth),0.0)); updateF_rtn = spu_sel(updateF_rtn,0.0,spu_nand(spu_cmpgt(spu_sub(v,vth),0.0),spu_cmpgt(spu_sub(v,vth),0.0))); return updateF_rtn; }
/** * Setup fragment shader inputs by evaluating triangle's vertex * attribute coefficient info. * \param x quad x pos * \param y quad y pos * \param fragZ returns quad Z values * \param fragInputs returns fragment program inputs * Note: this code could be incorporated into the fragment program * itself to avoid the loop and switch. */ static void eval_inputs(float x, float y, vector float *fragZ, vector float fragInputs[]) { static const vector float deltaX = (const vector float) {0, 1, 0, 1}; static const vector float deltaY = (const vector float) {0, 0, 1, 1}; const uint posSlot = 0; const vector float pos = setup.coef[posSlot].a0; const vector float dposdx = setup.coef[posSlot].dadx; const vector float dposdy = setup.coef[posSlot].dady; const vector float fragX = spu_splats(x) + deltaX; const vector float fragY = spu_splats(y) + deltaY; vector float fragW, wInv; uint i; *fragZ = splatz(pos) + fragX * splatz(dposdx) + fragY * splatz(dposdy); fragW = splatw(pos) + fragX * splatw(dposdx) + fragY * splatw(dposdy); wInv = spu_re(fragW); /* 1 / w */ /* loop over fragment program inputs */ for (i = 0; i < spu.vertex_info.num_attribs; i++) { uint attr = i + 1; enum interp_mode interp = spu.vertex_info.attrib[attr].interp_mode; /* constant term */ vector float a0 = setup.coef[attr].a0; vector float r0 = splatx(a0); vector float r1 = splaty(a0); vector float r2 = splatz(a0); vector float r3 = splatw(a0); if (interp == INTERP_LINEAR || interp == INTERP_PERSPECTIVE) { /* linear term */ vector float dadx = setup.coef[attr].dadx; vector float dady = setup.coef[attr].dady; /* Use SPU intrinsics here to get slightly better code. * originally: r0 += fragX * splatx(dadx) + fragY * splatx(dady); */ r0 = spu_madd(fragX, splatx(dadx), spu_madd(fragY, splatx(dady), r0)); r1 = spu_madd(fragX, splaty(dadx), spu_madd(fragY, splaty(dady), r1)); r2 = spu_madd(fragX, splatz(dadx), spu_madd(fragY, splatz(dady), r2)); r3 = spu_madd(fragX, splatw(dadx), spu_madd(fragY, splatw(dady), r3)); if (interp == INTERP_PERSPECTIVE) { /* perspective term */ r0 *= wInv; r1 *= wInv; r2 *= wInv; r3 *= wInv; } } fragInputs[CHAN0] = r0; fragInputs[CHAN1] = r1; fragInputs[CHAN2] = r2; fragInputs[CHAN3] = r3; fragInputs += 4; } } /** * Emit a quad (pass to next stage). No clipping is done. * Note: about 1/5 to 1/7 of the time, mask is zero and this function * should be skipped. But adding the test for that slows things down * overall. */ static INLINE void emit_quad( int x, int y, mask_t mask) { /* If any bits in mask are set... */ if (spu_extract(spu_orx(mask), 0)) { const int ix = x - setup.cliprect_minx; const int iy = y - setup.cliprect_miny; spu.cur_ctile_status = TILE_STATUS_DIRTY; spu.cur_ztile_status = TILE_STATUS_DIRTY; { /* * Run fragment shader, execute per-fragment ops, update fb/tile. */ vector float inputs[4*4], outputs[2*4]; vector unsigned int kill_mask; vector float fragZ; eval_inputs((float) x, (float) y, &fragZ, inputs); ASSERT(spu.fragment_program); ASSERT(spu.fragment_ops); /* Execute the current fragment program */ kill_mask = spu.fragment_program(inputs, outputs, spu.constants); mask = spu_andc(mask, kill_mask); /* Execute per-fragment/quad operations, including: * alpha test, z test, stencil test, blend and framebuffer writing. * Note that there are two different fragment operations functions * that can be called, one for front-facing fragments, and one * for back-facing fragments. (Often the two are the same; * but in some cases, like two-sided stenciling, they can be * very different.) So choose the correct function depending * on the calculated facing. */ spu.fragment_ops[setup.facing](ix, iy, &spu.ctile, &spu.ztile, fragZ, outputs[0*4+0], outputs[0*4+1], outputs[0*4+2], outputs[0*4+3], mask); } } } /** * Given an X or Y coordinate, return the block/quad coordinate that it * belongs to. */ static INLINE int block(int x) { return x & ~1; } /** * Render a horizontal span of quads */ static void flush_spans(void) { int minleft, maxright; const int l0 = spu_extract(setup.span.quad, 0); const int l1 = spu_extract(setup.span.quad, 1); const int r0 = spu_extract(setup.span.quad, 2); const int r1 = spu_extract(setup.span.quad, 3); switch (setup.span.y_flags) { case 0x3: /* both odd and even lines written (both quad rows) */ minleft = MIN2(l0, l1); maxright = MAX2(r0, r1); break; case 0x1: /* only even line written (quad top row) */ minleft = l0; maxright = r0; break; case 0x2: /* only odd line written (quad bottom row) */ minleft = l1; maxright = r1; break; default: return; } /* OK, we're very likely to need the tile data now. * clear or finish waiting if needed. */ if (spu.cur_ctile_status == TILE_STATUS_GETTING) { /* wait for mfc_get() to complete */ //printf("SPU: %u: waiting for ctile\n", spu.init.id); wait_on_mask(1 << TAG_READ_TILE_COLOR); spu.cur_ctile_status = TILE_STATUS_CLEAN; } else if (spu.cur_ctile_status == TILE_STATUS_CLEAR) { //printf("SPU %u: clearing C tile %u, %u\n", spu.init.id, setup.tx, setup.ty); clear_c_tile(&spu.ctile); spu.cur_ctile_status = TILE_STATUS_DIRTY; } ASSERT(spu.cur_ctile_status != TILE_STATUS_DEFINED); if (spu.read_depth_stencil) { if (spu.cur_ztile_status == TILE_STATUS_GETTING) { /* wait for mfc_get() to complete */ //printf("SPU: %u: waiting for ztile\n", spu.init.id); wait_on_mask(1 << TAG_READ_TILE_Z); spu.cur_ztile_status = TILE_STATUS_CLEAN; } else if (spu.cur_ztile_status == TILE_STATUS_CLEAR) { //printf("SPU %u: clearing Z tile %u, %u\n", spu.init.id, setup.tx, setup.ty); clear_z_tile(&spu.ztile); spu.cur_ztile_status = TILE_STATUS_DIRTY; } ASSERT(spu.cur_ztile_status != TILE_STATUS_DEFINED); } /* XXX this loop could be moved into the above switch cases... */ /* Setup for mask calculation */ const vec_int4 quad_LlRr = setup.span.quad; const vec_int4 quad_RrLl = spu_rlqwbyte(quad_LlRr, 8); const vec_int4 quad_LLll = spu_shuffle(quad_LlRr, quad_LlRr, SHUFFLE4(A,A,B,B)); const vec_int4 quad_RRrr = spu_shuffle(quad_RrLl, quad_RrLl, SHUFFLE4(A,A,B,B)); const vec_int4 twos = spu_splats(2); const int x = block(minleft); vec_int4 xs = {x, x+1, x, x+1}; for (; spu_extract(xs, 0) <= block(maxright); xs += twos) { /** * Computes mask to indicate which pixels in the 2x2 quad are actually * inside the triangle's bounds. */ /* Calculate ({x,x+1,x,x+1} >= {l[0],l[0],l[1],l[1]}) */ const mask_t gt_LLll_xs = spu_cmpgt(quad_LLll, xs); const mask_t gte_xs_LLll = spu_nand(gt_LLll_xs, gt_LLll_xs); /* Calculate ({r[0],r[0],r[1],r[1]} > {x,x+1,x,x+1}) */ const mask_t gt_RRrr_xs = spu_cmpgt(quad_RRrr, xs); /* Combine results to create mask */ const mask_t mask = spu_and(gte_xs_LLll, gt_RRrr_xs); emit_quad(spu_extract(xs, 0), setup.span.y, mask); } setup.span.y = 0; setup.span.y_flags = 0; /* Zero right elements */ setup.span.quad = spu_shuffle(setup.span.quad, setup.span.quad, SHUFFLE4(A,B,0,0)); } #if DEBUG_VERTS static void print_vertex(const struct vertex_header *v) { uint i; fprintf(stderr, " Vertex: (%p)\n", v); for (i = 0; i < spu.vertex_info.num_attribs; i++) { fprintf(stderr, " %d: %f %f %f %f\n", i, spu_extract(v->data[i], 0), spu_extract(v->data[i], 1), spu_extract(v->data[i], 2), spu_extract(v->data[i], 3)); } }