static void merge_edgeflags( struct brw_clip_compile *c ) { struct brw_compile *p = &c->func; const struct brw_context *brw = p->brw; struct brw_reg tmp0 = get_element_ud(c->reg.tmp0, 0); brw_AND(p, tmp0, get_element_ud(c->reg.R0, 2), brw_imm_ud(PRIM_MASK)); brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_EQ, tmp0, brw_imm_ud(_3DPRIM_POLYGON)); /* Get away with using reg.vertex because we know that this is not * a _3DPRIM_TRISTRIP_REVERSE: */ brw_IF(p, BRW_EXECUTE_1); { brw_AND(p, vec1(brw_null_reg()), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<8)); brw_inst_set_cond_modifier(brw, brw_last_inst, BRW_CONDITIONAL_EQ); brw_MOV(p, byte_offset(c->reg.vertex[0], brw_varying_to_offset(&c->vue_map, VARYING_SLOT_EDGE)), brw_imm_f(0)); brw_inst_set_pred_control(brw, brw_last_inst, BRW_PREDICATE_NORMAL); brw_AND(p, vec1(brw_null_reg()), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<9)); brw_inst_set_cond_modifier(brw, brw_last_inst, BRW_CONDITIONAL_EQ); brw_MOV(p, byte_offset(c->reg.vertex[2], brw_varying_to_offset(&c->vue_map, VARYING_SLOT_EDGE)), brw_imm_f(0)); brw_inst_set_pred_control(brw, brw_last_inst, BRW_PREDICATE_NORMAL); } brw_ENDIF(p); }
void brw_clip_tri_emit_polygon(struct brw_clip_compile *c) { struct brw_codegen *p = &c->func; /* for (loopcount = nr_verts-2; loopcount > 0; loopcount--) */ brw_ADD(p, c->reg.loopcount, c->reg.nr_verts, brw_imm_d(-2)); brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_G); brw_IF(p, BRW_EXECUTE_1); { struct brw_indirect v0 = brw_indirect(0, 0); struct brw_indirect vptr = brw_indirect(1, 0); brw_MOV(p, get_addr_reg(vptr), brw_address(c->reg.inlist)); brw_MOV(p, get_addr_reg(v0), deref_1uw(vptr, 0)); brw_clip_emit_vue(c, v0, BRW_URB_WRITE_ALLOCATE_COMPLETE, ((_3DPRIM_TRIFAN << URB_WRITE_PRIM_TYPE_SHIFT) | URB_WRITE_PRIM_START)); brw_ADD(p, get_addr_reg(vptr), get_addr_reg(vptr), brw_imm_uw(2)); brw_MOV(p, get_addr_reg(v0), deref_1uw(vptr, 0)); brw_DO(p, BRW_EXECUTE_1); { brw_clip_emit_vue(c, v0, BRW_URB_WRITE_ALLOCATE_COMPLETE, (_3DPRIM_TRIFAN << URB_WRITE_PRIM_TYPE_SHIFT)); brw_ADD(p, get_addr_reg(vptr), get_addr_reg(vptr), brw_imm_uw(2)); brw_MOV(p, get_addr_reg(v0), deref_1uw(vptr, 0)); brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1)); brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ); } brw_WHILE(p); brw_inst_set_pred_control(p->devinfo, brw_last_inst, BRW_PREDICATE_NORMAL); brw_clip_emit_vue(c, v0, BRW_URB_WRITE_EOT_COMPLETE, ((_3DPRIM_TRIFAN << URB_WRITE_PRIM_TYPE_SHIFT) | URB_WRITE_PRIM_END)); } brw_ENDIF(p); }
static void emit_points(struct brw_clip_compile *c, bool do_offset ) { struct brw_compile *p = &c->func; const struct brw_context *brw = p->brw; struct brw_indirect v0 = brw_indirect(0, 0); struct brw_indirect v0ptr = brw_indirect(2, 0); brw_MOV(p, c->reg.loopcount, c->reg.nr_verts); brw_MOV(p, get_addr_reg(v0ptr), brw_address(c->reg.inlist)); brw_DO(p, BRW_EXECUTE_1); { brw_MOV(p, get_addr_reg(v0), deref_1uw(v0ptr, 0)); brw_ADD(p, get_addr_reg(v0ptr), get_addr_reg(v0ptr), brw_imm_uw(2)); /* draw if edgeflag != 0 */ brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, deref_1f(v0, brw_varying_to_offset(&c->vue_map, VARYING_SLOT_EDGE)), brw_imm_f(0)); brw_IF(p, BRW_EXECUTE_1); { if (do_offset) apply_one_offset(c, v0); brw_clip_emit_vue(c, v0, BRW_URB_WRITE_ALLOCATE_COMPLETE, (_3DPRIM_POINTLIST << URB_WRITE_PRIM_TYPE_SHIFT) | URB_WRITE_PRIM_START | URB_WRITE_PRIM_END); } brw_ENDIF(p); brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1)); brw_inst_set_cond_modifier(brw, brw_last_inst, BRW_CONDITIONAL_NZ); } brw_WHILE(p); brw_inst_set_pred_control(brw, brw_last_inst, BRW_PREDICATE_NORMAL); }
/* GLfloat iz = 1.0 / dir.z; GLfloat ac = dir.x * iz; GLfloat bc = dir.y * iz; offset = ctx->Polygon.OffsetUnits * DEPTH_SCALE; offset += MAX2( abs(ac), abs(bc) ) * ctx->Polygon.OffsetFactor; offset *= MRD; */ static void compute_offset( struct brw_clip_compile *c ) { struct brw_compile *p = &c->func; const struct brw_context *brw = p->brw; struct brw_reg off = c->reg.offset; struct brw_reg dir = c->reg.dir; brw_math_invert(p, get_element(off, 2), get_element(dir, 2)); brw_MUL(p, vec2(off), dir, get_element(off, 2)); brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_GE, brw_abs(get_element(off, 0)), brw_abs(get_element(off, 1))); brw_SEL(p, vec1(off), brw_abs(get_element(off, 0)), brw_abs(get_element(off, 1))); brw_inst_set_pred_control(brw, brw_last_inst, BRW_PREDICATE_NORMAL); brw_MUL(p, vec1(off), off, brw_imm_f(c->key.offset_factor)); brw_ADD(p, vec1(off), off, brw_imm_f(c->key.offset_units)); }
/*********************************************************************** * Output clipped polygon as an unfilled primitive: */ static void emit_lines(struct brw_clip_compile *c, bool do_offset) { struct brw_compile *p = &c->func; const struct brw_context *brw = p->brw; struct brw_indirect v0 = brw_indirect(0, 0); struct brw_indirect v1 = brw_indirect(1, 0); struct brw_indirect v0ptr = brw_indirect(2, 0); struct brw_indirect v1ptr = brw_indirect(3, 0); /* Need a seperate loop for offset: */ if (do_offset) { brw_MOV(p, c->reg.loopcount, c->reg.nr_verts); brw_MOV(p, get_addr_reg(v0ptr), brw_address(c->reg.inlist)); brw_DO(p, BRW_EXECUTE_1); { brw_MOV(p, get_addr_reg(v0), deref_1uw(v0ptr, 0)); brw_ADD(p, get_addr_reg(v0ptr), get_addr_reg(v0ptr), brw_imm_uw(2)); apply_one_offset(c, v0); brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1)); brw_inst_set_cond_modifier(brw, brw_last_inst, BRW_CONDITIONAL_G); } brw_WHILE(p); brw_inst_set_pred_control(brw, brw_last_inst, BRW_PREDICATE_NORMAL); } /* v1ptr = &inlist[nr_verts] * *v1ptr = v0 */ brw_MOV(p, c->reg.loopcount, c->reg.nr_verts); brw_MOV(p, get_addr_reg(v0ptr), brw_address(c->reg.inlist)); brw_ADD(p, get_addr_reg(v1ptr), get_addr_reg(v0ptr), retype(c->reg.nr_verts, BRW_REGISTER_TYPE_UW)); brw_ADD(p, get_addr_reg(v1ptr), get_addr_reg(v1ptr), retype(c->reg.nr_verts, BRW_REGISTER_TYPE_UW)); brw_MOV(p, deref_1uw(v1ptr, 0), deref_1uw(v0ptr, 0)); brw_DO(p, BRW_EXECUTE_1); { brw_MOV(p, get_addr_reg(v0), deref_1uw(v0ptr, 0)); brw_MOV(p, get_addr_reg(v1), deref_1uw(v0ptr, 2)); brw_ADD(p, get_addr_reg(v0ptr), get_addr_reg(v0ptr), brw_imm_uw(2)); /* draw edge if edgeflag != 0 */ brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, deref_1f(v0, brw_varying_to_offset(&c->vue_map, VARYING_SLOT_EDGE)), brw_imm_f(0)); brw_IF(p, BRW_EXECUTE_1); { brw_clip_emit_vue(c, v0, BRW_URB_WRITE_ALLOCATE_COMPLETE, (_3DPRIM_LINESTRIP << URB_WRITE_PRIM_TYPE_SHIFT) | URB_WRITE_PRIM_START); brw_clip_emit_vue(c, v1, BRW_URB_WRITE_ALLOCATE_COMPLETE, (_3DPRIM_LINESTRIP << URB_WRITE_PRIM_TYPE_SHIFT) | URB_WRITE_PRIM_END); } brw_ENDIF(p); brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1)); brw_inst_set_cond_modifier(brw, brw_last_inst, BRW_CONDITIONAL_NZ); } brw_WHILE(p); brw_inst_set_pred_control(brw, brw_last_inst, BRW_PREDICATE_NORMAL); }
/* Line clipping, more or less following the following algorithm: * * for (p=0;p<MAX_PLANES;p++) { * if (clipmask & (1 << p)) { * GLfloat dp0 = DOTPROD( vtx0, plane[p] ); * GLfloat dp1 = DOTPROD( vtx1, plane[p] ); * * if (dp1 < 0.0f) { * GLfloat t = dp1 / (dp1 - dp0); * if (t > t1) t1 = t; * } else { * GLfloat t = dp0 / (dp0 - dp1); * if (t > t0) t0 = t; * } * * if (t0 + t1 >= 1.0) * return; * } * } * * interp( ctx, newvtx0, vtx0, vtx1, t0 ); * interp( ctx, newvtx1, vtx1, vtx0, t1 ); * */ static void clip_and_emit_line( struct brw_clip_compile *c ) { struct brw_codegen *p = &c->func; struct brw_indirect vtx0 = brw_indirect(0, 0); struct brw_indirect vtx1 = brw_indirect(1, 0); struct brw_indirect newvtx0 = brw_indirect(2, 0); struct brw_indirect newvtx1 = brw_indirect(3, 0); struct brw_indirect plane_ptr = brw_indirect(4, 0); struct brw_reg v1_null_ud = retype(vec1(brw_null_reg()), BRW_REGISTER_TYPE_UD); GLuint hpos_offset = brw_varying_to_offset(&c->vue_map, VARYING_SLOT_POS); GLint clipdist0_offset = c->key.nr_userclip ? brw_varying_to_offset(&c->vue_map, VARYING_SLOT_CLIP_DIST0) : 0; brw_MOV(p, get_addr_reg(vtx0), brw_address(c->reg.vertex[0])); brw_MOV(p, get_addr_reg(vtx1), brw_address(c->reg.vertex[1])); brw_MOV(p, get_addr_reg(newvtx0), brw_address(c->reg.vertex[2])); brw_MOV(p, get_addr_reg(newvtx1), brw_address(c->reg.vertex[3])); brw_MOV(p, get_addr_reg(plane_ptr), brw_clip_plane0_address(c)); /* Note: init t0, t1 together: */ brw_MOV(p, vec2(c->reg.t0), brw_imm_f(0)); brw_clip_init_planes(c); brw_clip_init_clipmask(c); /* -ve rhw workaround */ if (p->devinfo->has_negative_rhw_bug) { brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<20)); brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ); brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(0x3f)); brw_inst_set_pred_control(p->devinfo, brw_last_inst, BRW_PREDICATE_NORMAL); } /* Set the initial vertex source mask: The first 6 planes are the bounds * of the view volume; the next 8 planes are the user clipping planes. */ brw_MOV(p, c->reg.vertex_src_mask, brw_imm_ud(0x3fc0)); /* Set the initial clipdistance offset to be 6 floats before gl_ClipDistance[0]. * We'll increment 6 times before we start hitting actual user clipping. */ brw_MOV(p, c->reg.clipdistance_offset, brw_imm_d(clipdist0_offset - 6*sizeof(float))); brw_DO(p, BRW_EXECUTE_1); { /* if (planemask & 1) */ brw_AND(p, v1_null_ud, c->reg.planemask, brw_imm_ud(1)); brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ); brw_IF(p, BRW_EXECUTE_1); { brw_AND(p, v1_null_ud, c->reg.vertex_src_mask, brw_imm_ud(1)); brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ); brw_IF(p, BRW_EXECUTE_1); { /* user clip distance: just fetch the correct float from each vertex */ struct brw_indirect temp_ptr = brw_indirect(7, 0); brw_ADD(p, get_addr_reg(temp_ptr), get_addr_reg(vtx0), c->reg.clipdistance_offset); brw_MOV(p, c->reg.dp0, deref_1f(temp_ptr, 0)); brw_ADD(p, get_addr_reg(temp_ptr), get_addr_reg(vtx1), c->reg.clipdistance_offset); brw_MOV(p, c->reg.dp1, deref_1f(temp_ptr, 0)); } brw_ELSE(p); { /* fixed plane: fetch the hpos, dp4 against the plane. */ if (c->key.nr_userclip) brw_MOV(p, c->reg.plane_equation, deref_4f(plane_ptr, 0)); else brw_MOV(p, c->reg.plane_equation, deref_4b(plane_ptr, 0)); brw_DP4(p, vec4(c->reg.dp0), deref_4f(vtx0, hpos_offset), c->reg.plane_equation); brw_DP4(p, vec4(c->reg.dp1), deref_4f(vtx1, hpos_offset), c->reg.plane_equation); } brw_ENDIF(p); brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, vec1(c->reg.dp1), brw_imm_f(0.0f)); brw_IF(p, BRW_EXECUTE_1); { /* * Both can be negative on GM965/G965 due to RHW workaround * if so, this object should be rejected. */ if (p->devinfo->has_negative_rhw_bug) { brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_LE, c->reg.dp0, brw_imm_f(0.0)); brw_IF(p, BRW_EXECUTE_1); { brw_clip_kill_thread(c); } brw_ENDIF(p); } brw_ADD(p, c->reg.t, c->reg.dp1, negate(c->reg.dp0)); brw_math_invert(p, c->reg.t, c->reg.t); brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp1); brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_G, c->reg.t, c->reg.t1 ); brw_MOV(p, c->reg.t1, c->reg.t); brw_inst_set_pred_control(p->devinfo, brw_last_inst, BRW_PREDICATE_NORMAL); } brw_ELSE(p); { /* Coming back in. We know that both cannot be negative * because the line would have been culled in that case. */ /* If both are positive, do nothing */ /* Only on GM965/G965 */ if (p->devinfo->has_negative_rhw_bug) { brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.dp0, brw_imm_f(0.0)); brw_IF(p, BRW_EXECUTE_1); } { brw_ADD(p, c->reg.t, c->reg.dp0, negate(c->reg.dp1)); brw_math_invert(p, c->reg.t, c->reg.t); brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp0); brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_G, c->reg.t, c->reg.t0 ); brw_MOV(p, c->reg.t0, c->reg.t); brw_inst_set_pred_control(p->devinfo, brw_last_inst, BRW_PREDICATE_NORMAL); } if (p->devinfo->has_negative_rhw_bug) { brw_ENDIF(p); } } brw_ENDIF(p); } brw_ENDIF(p); /* plane_ptr++; */ brw_ADD(p, get_addr_reg(plane_ptr), get_addr_reg(plane_ptr), brw_clip_plane_stride(c)); /* while (planemask>>=1) != 0 */ brw_SHR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(1)); brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ); brw_SHR(p, c->reg.vertex_src_mask, c->reg.vertex_src_mask, brw_imm_ud(1)); brw_inst_set_pred_control(p->devinfo, brw_last_inst, BRW_PREDICATE_NORMAL); brw_ADD(p, c->reg.clipdistance_offset, c->reg.clipdistance_offset, brw_imm_w(sizeof(float))); brw_inst_set_pred_control(p->devinfo, brw_last_inst, BRW_PREDICATE_NORMAL); } brw_WHILE(p); brw_inst_set_pred_control(p->devinfo, brw_last_inst, BRW_PREDICATE_NORMAL); brw_ADD(p, c->reg.t, c->reg.t0, c->reg.t1); brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.t, brw_imm_f(1.0)); brw_IF(p, BRW_EXECUTE_1); { brw_clip_interp_vertex(c, newvtx0, vtx0, vtx1, c->reg.t0, false); brw_clip_interp_vertex(c, newvtx1, vtx1, vtx0, c->reg.t1, false); brw_clip_emit_vue(c, newvtx0, BRW_URB_WRITE_ALLOCATE_COMPLETE, (_3DPRIM_LINESTRIP << URB_WRITE_PRIM_TYPE_SHIFT) | URB_WRITE_PRIM_START); brw_clip_emit_vue(c, newvtx1, BRW_URB_WRITE_EOT_COMPLETE, (_3DPRIM_LINESTRIP << URB_WRITE_PRIM_TYPE_SHIFT) | URB_WRITE_PRIM_END); } brw_ENDIF(p); brw_clip_kill_thread(c); }
/** * Generate assembly for a Vec4 IR instruction. * * \param instruction The Vec4 IR instruction to generate code for. * \param dst The destination register. * \param src An array of up to three source registers. */ void vec4_generator::generate_vec4_instruction(vec4_instruction *instruction, struct brw_reg dst, struct brw_reg *src) { vec4_instruction *inst = (vec4_instruction *) instruction; if (dst.width == BRW_WIDTH_4) { /* This happens in attribute fixups for "dual instanced" geometry * shaders, since they use attributes that are vec4's. Since the exec * width is only 4, it's essential that the caller set * force_writemask_all in order to make sure the instruction is executed * regardless of which channels are enabled. */ assert(inst->force_writemask_all); /* Fix up any <8;8,1> or <0;4,1> source registers to <4;4,1> to satisfy * the following register region restrictions (from Graphics BSpec: * 3D-Media-GPGPU Engine > EU Overview > Registers and Register Regions * > Register Region Restrictions) * * 1. ExecSize must be greater than or equal to Width. * * 2. If ExecSize = Width and HorzStride != 0, VertStride must be set * to Width * HorzStride." */ for (int i = 0; i < 3; i++) { if (src[i].file == BRW_GENERAL_REGISTER_FILE) src[i] = stride(src[i], 4, 4, 1); } } switch (inst->opcode) { case BRW_OPCODE_MOV: brw_MOV(p, dst, src[0]); break; case BRW_OPCODE_ADD: brw_ADD(p, dst, src[0], src[1]); break; case BRW_OPCODE_MUL: brw_MUL(p, dst, src[0], src[1]); break; case BRW_OPCODE_MACH: brw_MACH(p, dst, src[0], src[1]); break; case BRW_OPCODE_MAD: assert(brw->gen >= 6); brw_MAD(p, dst, src[0], src[1], src[2]); break; case BRW_OPCODE_FRC: brw_FRC(p, dst, src[0]); break; case BRW_OPCODE_RNDD: brw_RNDD(p, dst, src[0]); break; case BRW_OPCODE_RNDE: brw_RNDE(p, dst, src[0]); break; case BRW_OPCODE_RNDZ: brw_RNDZ(p, dst, src[0]); break; case BRW_OPCODE_AND: brw_AND(p, dst, src[0], src[1]); break; case BRW_OPCODE_OR: brw_OR(p, dst, src[0], src[1]); break; case BRW_OPCODE_XOR: brw_XOR(p, dst, src[0], src[1]); break; case BRW_OPCODE_NOT: brw_NOT(p, dst, src[0]); break; case BRW_OPCODE_ASR: brw_ASR(p, dst, src[0], src[1]); break; case BRW_OPCODE_SHR: brw_SHR(p, dst, src[0], src[1]); break; case BRW_OPCODE_SHL: brw_SHL(p, dst, src[0], src[1]); break; case BRW_OPCODE_CMP: brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]); break; case BRW_OPCODE_SEL: brw_SEL(p, dst, src[0], src[1]); break; case BRW_OPCODE_DPH: brw_DPH(p, dst, src[0], src[1]); break; case BRW_OPCODE_DP4: brw_DP4(p, dst, src[0], src[1]); break; case BRW_OPCODE_DP3: brw_DP3(p, dst, src[0], src[1]); break; case BRW_OPCODE_DP2: brw_DP2(p, dst, src[0], src[1]); break; case BRW_OPCODE_F32TO16: assert(brw->gen >= 7); brw_F32TO16(p, dst, src[0]); break; case BRW_OPCODE_F16TO32: assert(brw->gen >= 7); brw_F16TO32(p, dst, src[0]); break; case BRW_OPCODE_LRP: assert(brw->gen >= 6); brw_LRP(p, dst, src[0], src[1], src[2]); break; case BRW_OPCODE_BFREV: assert(brw->gen >= 7); /* BFREV only supports UD type for src and dst. */ brw_BFREV(p, retype(dst, BRW_REGISTER_TYPE_UD), retype(src[0], BRW_REGISTER_TYPE_UD)); break; case BRW_OPCODE_FBH: assert(brw->gen >= 7); /* FBH only supports UD type for dst. */ brw_FBH(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]); break; case BRW_OPCODE_FBL: assert(brw->gen >= 7); /* FBL only supports UD type for dst. */ brw_FBL(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]); break; case BRW_OPCODE_CBIT: assert(brw->gen >= 7); /* CBIT only supports UD type for dst. */ brw_CBIT(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]); break; case BRW_OPCODE_ADDC: assert(brw->gen >= 7); brw_ADDC(p, dst, src[0], src[1]); break; case BRW_OPCODE_SUBB: assert(brw->gen >= 7); brw_SUBB(p, dst, src[0], src[1]); break; case BRW_OPCODE_MAC: brw_MAC(p, dst, src[0], src[1]); break; case BRW_OPCODE_BFE: assert(brw->gen >= 7); brw_BFE(p, dst, src[0], src[1], src[2]); break; case BRW_OPCODE_BFI1: assert(brw->gen >= 7); brw_BFI1(p, dst, src[0], src[1]); break; case BRW_OPCODE_BFI2: assert(brw->gen >= 7); brw_BFI2(p, dst, src[0], src[1], src[2]); break; case BRW_OPCODE_IF: if (inst->src[0].file != BAD_FILE) { /* The instruction has an embedded compare (only allowed on gen6) */ assert(brw->gen == 6); gen6_IF(p, inst->conditional_mod, src[0], src[1]); } else { brw_inst *if_inst = brw_IF(p, BRW_EXECUTE_8); brw_inst_set_pred_control(brw, if_inst, inst->predicate); } break; case BRW_OPCODE_ELSE: brw_ELSE(p); break; case BRW_OPCODE_ENDIF: brw_ENDIF(p); break; case BRW_OPCODE_DO: brw_DO(p, BRW_EXECUTE_8); break; case BRW_OPCODE_BREAK: brw_BREAK(p); brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); break; case BRW_OPCODE_CONTINUE: brw_CONT(p); brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); break; case BRW_OPCODE_WHILE: brw_WHILE(p); break; case SHADER_OPCODE_RCP: case SHADER_OPCODE_RSQ: case SHADER_OPCODE_SQRT: case SHADER_OPCODE_EXP2: case SHADER_OPCODE_LOG2: case SHADER_OPCODE_SIN: case SHADER_OPCODE_COS: if (brw->gen >= 7) { gen6_math(p, dst, brw_math_function(inst->opcode), src[0], brw_null_reg()); } else if (brw->gen == 6) { generate_math_gen6(inst, dst, src[0], brw_null_reg()); } else { generate_math1_gen4(inst, dst, src[0]); } break; case SHADER_OPCODE_POW: case SHADER_OPCODE_INT_QUOTIENT: case SHADER_OPCODE_INT_REMAINDER: if (brw->gen >= 7) { gen6_math(p, dst, brw_math_function(inst->opcode), src[0], src[1]); } else if (brw->gen == 6) { generate_math_gen6(inst, dst, src[0], src[1]); } else { generate_math2_gen4(inst, dst, src[0], src[1]); } break; case SHADER_OPCODE_TEX: case SHADER_OPCODE_TXD: case SHADER_OPCODE_TXF: case SHADER_OPCODE_TXF_CMS: case SHADER_OPCODE_TXF_MCS: case SHADER_OPCODE_TXL: case SHADER_OPCODE_TXS: case SHADER_OPCODE_TG4: case SHADER_OPCODE_TG4_OFFSET: generate_tex(inst, dst, src[0], src[1]); break; case VS_OPCODE_URB_WRITE: generate_vs_urb_write(inst); break; case SHADER_OPCODE_GEN4_SCRATCH_READ: generate_scratch_read(inst, dst, src[0]); break; case SHADER_OPCODE_GEN4_SCRATCH_WRITE: generate_scratch_write(inst, dst, src[0], src[1]); break; case VS_OPCODE_PULL_CONSTANT_LOAD: generate_pull_constant_load(inst, dst, src[0], src[1]); break; case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7: generate_pull_constant_load_gen7(inst, dst, src[0], src[1]); break; case GS_OPCODE_URB_WRITE: generate_gs_urb_write(inst); break; case GS_OPCODE_THREAD_END: generate_gs_thread_end(inst); break; case GS_OPCODE_SET_WRITE_OFFSET: generate_gs_set_write_offset(dst, src[0], src[1]); break; case GS_OPCODE_SET_VERTEX_COUNT: generate_gs_set_vertex_count(dst, src[0]); break; case GS_OPCODE_SET_DWORD_2_IMMED: generate_gs_set_dword_2_immed(dst, src[0]); break; case GS_OPCODE_PREPARE_CHANNEL_MASKS: generate_gs_prepare_channel_masks(dst); break; case GS_OPCODE_SET_CHANNEL_MASKS: generate_gs_set_channel_masks(dst, src[0]); break; case GS_OPCODE_GET_INSTANCE_ID: generate_gs_get_instance_id(dst); break; case SHADER_OPCODE_SHADER_TIME_ADD: brw_shader_time_add(p, src[0], prog_data->base.binding_table.shader_time_start); brw_mark_surface_used(&prog_data->base, prog_data->base.binding_table.shader_time_start); break; case SHADER_OPCODE_UNTYPED_ATOMIC: generate_untyped_atomic(inst, dst, src[0], src[1]); break; case SHADER_OPCODE_UNTYPED_SURFACE_READ: generate_untyped_surface_read(inst, dst, src[0]); break; case VS_OPCODE_UNPACK_FLAGS_SIMD4X2: generate_unpack_flags(inst, dst); break; default: if (inst->opcode < (int) ARRAY_SIZE(opcode_descs)) { _mesa_problem(&brw->ctx, "Unsupported opcode in `%s' in vec4\n", opcode_descs[inst->opcode].name); } else { _mesa_problem(&brw->ctx, "Unsupported opcode %d in vec4", inst->opcode); } abort(); } }
static void brw_clip_test( struct brw_clip_compile *c ) { struct brw_reg t = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); struct brw_reg t1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); struct brw_reg t2 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); struct brw_reg t3 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); struct brw_reg v0 = get_tmp(c); struct brw_reg v1 = get_tmp(c); struct brw_reg v2 = get_tmp(c); struct brw_indirect vt0 = brw_indirect(0, 0); struct brw_indirect vt1 = brw_indirect(1, 0); struct brw_indirect vt2 = brw_indirect(2, 0); struct brw_codegen *p = &c->func; struct brw_reg tmp0 = c->reg.loopcount; /* handy temporary */ GLuint hpos_offset = brw_varying_to_offset(&c->vue_map, VARYING_SLOT_POS); brw_MOV(p, get_addr_reg(vt0), brw_address(c->reg.vertex[0])); brw_MOV(p, get_addr_reg(vt1), brw_address(c->reg.vertex[1])); brw_MOV(p, get_addr_reg(vt2), brw_address(c->reg.vertex[2])); brw_MOV(p, v0, deref_4f(vt0, hpos_offset)); brw_MOV(p, v1, deref_4f(vt1, hpos_offset)); brw_MOV(p, v2, deref_4f(vt2, hpos_offset)); brw_AND(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(~0x3f)); /* test nearz, xmin, ymin plane */ /* clip.xyz < -clip.w */ brw_CMP(p, t1, BRW_CONDITIONAL_L, v0, negate(get_element(v0, 3))); brw_CMP(p, t2, BRW_CONDITIONAL_L, v1, negate(get_element(v1, 3))); brw_CMP(p, t3, BRW_CONDITIONAL_L, v2, negate(get_element(v2, 3))); /* All vertices are outside of a plane, rejected */ brw_AND(p, t, t1, t2); brw_AND(p, t, t, t3); brw_OR(p, tmp0, get_element(t, 0), get_element(t, 1)); brw_OR(p, tmp0, tmp0, get_element(t, 2)); brw_AND(p, brw_null_reg(), tmp0, brw_imm_ud(0x1)); brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ); brw_IF(p, BRW_EXECUTE_1); { brw_clip_kill_thread(c); } brw_ENDIF(p); brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); /* some vertices are inside a plane, some are outside,need to clip */ brw_XOR(p, t, t1, t2); brw_XOR(p, t1, t2, t3); brw_OR(p, t, t, t1); brw_AND(p, t, t, brw_imm_ud(0x1)); brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, get_element(t, 0), brw_imm_ud(0)); brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<5))); brw_inst_set_pred_control(p->devinfo, brw_last_inst, BRW_PREDICATE_NORMAL); brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, get_element(t, 1), brw_imm_ud(0)); brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<3))); brw_inst_set_pred_control(p->devinfo, brw_last_inst, BRW_PREDICATE_NORMAL); brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, get_element(t, 2), brw_imm_ud(0)); brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<1))); brw_inst_set_pred_control(p->devinfo, brw_last_inst, BRW_PREDICATE_NORMAL); /* test farz, xmax, ymax plane */ /* clip.xyz > clip.w */ brw_CMP(p, t1, BRW_CONDITIONAL_G, v0, get_element(v0, 3)); brw_CMP(p, t2, BRW_CONDITIONAL_G, v1, get_element(v1, 3)); brw_CMP(p, t3, BRW_CONDITIONAL_G, v2, get_element(v2, 3)); /* All vertices are outside of a plane, rejected */ brw_AND(p, t, t1, t2); brw_AND(p, t, t, t3); brw_OR(p, tmp0, get_element(t, 0), get_element(t, 1)); brw_OR(p, tmp0, tmp0, get_element(t, 2)); brw_AND(p, brw_null_reg(), tmp0, brw_imm_ud(0x1)); brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ); brw_IF(p, BRW_EXECUTE_1); { brw_clip_kill_thread(c); } brw_ENDIF(p); brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); /* some vertices are inside a plane, some are outside,need to clip */ brw_XOR(p, t, t1, t2); brw_XOR(p, t1, t2, t3); brw_OR(p, t, t, t1); brw_AND(p, t, t, brw_imm_ud(0x1)); brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, get_element(t, 0), brw_imm_ud(0)); brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<4))); brw_inst_set_pred_control(p->devinfo, brw_last_inst, BRW_PREDICATE_NORMAL); brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, get_element(t, 1), brw_imm_ud(0)); brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<2))); brw_inst_set_pred_control(p->devinfo, brw_last_inst, BRW_PREDICATE_NORMAL); brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, get_element(t, 2), brw_imm_ud(0)); brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<0))); brw_inst_set_pred_control(p->devinfo, brw_last_inst, BRW_PREDICATE_NORMAL); release_tmps(c); }
/* Use mesa's clipping algorithms, translated to GEN4 assembly. */ void brw_clip_tri( struct brw_clip_compile *c ) { struct brw_codegen *p = &c->func; struct brw_indirect vtx = brw_indirect(0, 0); struct brw_indirect vtxPrev = brw_indirect(1, 0); struct brw_indirect vtxOut = brw_indirect(2, 0); struct brw_indirect plane_ptr = brw_indirect(3, 0); struct brw_indirect inlist_ptr = brw_indirect(4, 0); struct brw_indirect outlist_ptr = brw_indirect(5, 0); struct brw_indirect freelist_ptr = brw_indirect(6, 0); GLuint hpos_offset = brw_varying_to_offset(&c->vue_map, VARYING_SLOT_POS); GLint clipdist0_offset = c->key.nr_userclip ? brw_varying_to_offset(&c->vue_map, VARYING_SLOT_CLIP_DIST0) : 0; brw_MOV(p, get_addr_reg(vtxPrev), brw_address(c->reg.vertex[2]) ); brw_MOV(p, get_addr_reg(plane_ptr), brw_clip_plane0_address(c)); brw_MOV(p, get_addr_reg(inlist_ptr), brw_address(c->reg.inlist)); brw_MOV(p, get_addr_reg(outlist_ptr), brw_address(c->reg.outlist)); brw_MOV(p, get_addr_reg(freelist_ptr), brw_address(c->reg.vertex[3]) ); /* Set the initial vertex source mask: The first 6 planes are the bounds * of the view volume; the next 8 planes are the user clipping planes. */ brw_MOV(p, c->reg.vertex_src_mask, brw_imm_ud(0x3fc0)); /* Set the initial clipdistance offset to be 6 floats before gl_ClipDistance[0]. * We'll increment 6 times before we start hitting actual user clipping. */ brw_MOV(p, c->reg.clipdistance_offset, brw_imm_d(clipdist0_offset - 6*sizeof(float))); brw_DO(p, BRW_EXECUTE_1); { /* if (planemask & 1) */ brw_AND(p, vec1(brw_null_reg()), c->reg.planemask, brw_imm_ud(1)); brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ); brw_IF(p, BRW_EXECUTE_1); { /* vtxOut = freelist_ptr++ */ brw_MOV(p, get_addr_reg(vtxOut), get_addr_reg(freelist_ptr) ); brw_ADD(p, get_addr_reg(freelist_ptr), get_addr_reg(freelist_ptr), brw_imm_uw(c->nr_regs * REG_SIZE)); if (c->key.nr_userclip) brw_MOV(p, c->reg.plane_equation, deref_4f(plane_ptr, 0)); else brw_MOV(p, c->reg.plane_equation, deref_4b(plane_ptr, 0)); brw_MOV(p, c->reg.loopcount, c->reg.nr_verts); brw_MOV(p, c->reg.nr_verts, brw_imm_ud(0)); brw_DO(p, BRW_EXECUTE_1); { /* vtx = *input_ptr; */ brw_MOV(p, get_addr_reg(vtx), deref_1uw(inlist_ptr, 0)); load_clip_distance(c, vtxPrev, c->reg.dpPrev, hpos_offset, BRW_CONDITIONAL_L); /* (prev < 0.0f) */ brw_IF(p, BRW_EXECUTE_1); { load_clip_distance(c, vtx, c->reg.dp, hpos_offset, BRW_CONDITIONAL_GE); /* IS_POSITIVE(next) */ brw_IF(p, BRW_EXECUTE_1); { /* Coming back in. */ brw_ADD(p, c->reg.t, c->reg.dpPrev, negate(c->reg.dp)); brw_math_invert(p, c->reg.t, c->reg.t); brw_MUL(p, c->reg.t, c->reg.t, c->reg.dpPrev); /* If (vtxOut == 0) vtxOut = vtxPrev */ brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_EQ, get_addr_reg(vtxOut), brw_imm_uw(0) ); brw_MOV(p, get_addr_reg(vtxOut), get_addr_reg(vtxPrev)); brw_inst_set_pred_control(p->devinfo, brw_last_inst, BRW_PREDICATE_NORMAL); brw_clip_interp_vertex(c, vtxOut, vtxPrev, vtx, c->reg.t, false); /* *outlist_ptr++ = vtxOut; * nr_verts++; * vtxOut = 0; */ brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxOut)); brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short))); brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1)); brw_MOV(p, get_addr_reg(vtxOut), brw_imm_uw(0) ); } brw_ENDIF(p); } brw_ELSE(p); { /* *outlist_ptr++ = vtxPrev; * nr_verts++; */ brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxPrev)); brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short))); brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1)); load_clip_distance(c, vtx, c->reg.dp, hpos_offset, BRW_CONDITIONAL_L); /* (next < 0.0f) */ brw_IF(p, BRW_EXECUTE_1); { /* Going out of bounds. Avoid division by zero as we * know dp != dpPrev from DIFFERENT_SIGNS, above. */ brw_ADD(p, c->reg.t, c->reg.dp, negate(c->reg.dpPrev)); brw_math_invert(p, c->reg.t, c->reg.t); brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp); /* If (vtxOut == 0) vtxOut = vtx */ brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_EQ, get_addr_reg(vtxOut), brw_imm_uw(0) ); brw_MOV(p, get_addr_reg(vtxOut), get_addr_reg(vtx)); brw_inst_set_pred_control(p->devinfo, brw_last_inst, BRW_PREDICATE_NORMAL); brw_clip_interp_vertex(c, vtxOut, vtx, vtxPrev, c->reg.t, true); /* *outlist_ptr++ = vtxOut; * nr_verts++; * vtxOut = 0; */ brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxOut)); brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short))); brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1)); brw_MOV(p, get_addr_reg(vtxOut), brw_imm_uw(0) ); } brw_ENDIF(p); } brw_ENDIF(p); /* vtxPrev = vtx; * inlist_ptr++; */ brw_MOV(p, get_addr_reg(vtxPrev), get_addr_reg(vtx)); brw_ADD(p, get_addr_reg(inlist_ptr), get_addr_reg(inlist_ptr), brw_imm_uw(sizeof(short))); /* while (--loopcount != 0) */ brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1)); brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ); } brw_WHILE(p); brw_inst_set_pred_control(p->devinfo, brw_last_inst, BRW_PREDICATE_NORMAL); /* vtxPrev = *(outlist_ptr-1) OR: outlist[nr_verts-1] * inlist = outlist * inlist_ptr = &inlist[0] * outlist_ptr = &outlist[0] */ brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_w(-2)); brw_MOV(p, get_addr_reg(vtxPrev), deref_1uw(outlist_ptr, 0)); brw_MOV(p, brw_vec8_grf(c->reg.inlist.nr, 0), brw_vec8_grf(c->reg.outlist.nr, 0)); brw_MOV(p, get_addr_reg(inlist_ptr), brw_address(c->reg.inlist)); brw_MOV(p, get_addr_reg(outlist_ptr), brw_address(c->reg.outlist)); } brw_ENDIF(p); /* plane_ptr++; */ brw_ADD(p, get_addr_reg(plane_ptr), get_addr_reg(plane_ptr), brw_clip_plane_stride(c)); /* nr_verts >= 3 */ brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_GE, c->reg.nr_verts, brw_imm_ud(3)); brw_set_default_predicate_control(p, BRW_PREDICATE_NORMAL); /* && (planemask>>=1) != 0 */ brw_SHR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(1)); brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ); brw_SHR(p, c->reg.vertex_src_mask, c->reg.vertex_src_mask, brw_imm_ud(1)); brw_ADD(p, c->reg.clipdistance_offset, c->reg.clipdistance_offset, brw_imm_w(sizeof(float))); } brw_WHILE(p); brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); }
/** * Generate the geometry shader program used on Gen6 to perform stream output * (transform feedback). */ void gen6_sol_program(struct brw_ff_gs_compile *c, struct brw_ff_gs_prog_key *key, unsigned num_verts, bool check_edge_flags) { struct brw_codegen *p = &c->func; brw_inst *inst; c->prog_data.svbi_postincrement_value = num_verts; brw_ff_gs_alloc_regs(c, num_verts, true); brw_ff_gs_initialize_header(c); if (key->num_transform_feedback_bindings > 0) { unsigned vertex, binding; struct brw_reg destination_indices_uw = vec8(retype(c->reg.destination_indices, BRW_REGISTER_TYPE_UW)); /* Note: since we use the binding table to keep track of buffer offsets * and stride, the GS doesn't need to keep track of a separate pointer * into each buffer; it uses a single pointer which increments by 1 for * each vertex. So we use SVBI0 for this pointer, regardless of whether * transform feedback is in interleaved or separate attribs mode. * * Make sure that the buffers have enough room for all the vertices. */ brw_ADD(p, get_element_ud(c->reg.temp, 0), get_element_ud(c->reg.SVBI, 0), brw_imm_ud(num_verts)); brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_LE, get_element_ud(c->reg.temp, 0), get_element_ud(c->reg.SVBI, 4)); brw_IF(p, BRW_EXECUTE_1); /* Compute the destination indices to write to. Usually we use SVBI[0] * + (0, 1, 2). However, for odd-numbered triangles in tristrips, the * vertices come down the pipeline in reversed winding order, so we need * to flip the order when writing to the transform feedback buffer. To * ensure that flatshading accuracy is preserved, we need to write them * in order SVBI[0] + (0, 2, 1) if we're using the first provoking * vertex convention, and in order SVBI[0] + (1, 0, 2) if we're using * the last provoking vertex convention. * * Note: since brw_imm_v can only be used in instructions in * packed-word execution mode, and SVBI is a double-word, we need to * first move the appropriate immediate constant ((0, 1, 2), (0, 2, 1), * or (1, 0, 2)) to the destination_indices register, and then add SVBI * using a separate instruction. Also, since the immediate constant is * expressed as packed words, and we need to load double-words into * destination_indices, we need to intersperse zeros to fill the upper * halves of each double-word. */ brw_MOV(p, destination_indices_uw, brw_imm_v(0x00020100)); /* (0, 1, 2) */ if (num_verts == 3) { /* Get primitive type into temp register. */ brw_AND(p, get_element_ud(c->reg.temp, 0), get_element_ud(c->reg.R0, 2), brw_imm_ud(0x1f)); /* Test if primitive type is TRISTRIP_REVERSE. We need to do this as * an 8-wide comparison so that the conditional MOV that follows * moves all 8 words correctly. */ brw_CMP(p, vec8(brw_null_reg()), BRW_CONDITIONAL_EQ, get_element_ud(c->reg.temp, 0), brw_imm_ud(_3DPRIM_TRISTRIP_REVERSE)); /* If so, then overwrite destination_indices_uw with the appropriate * reordering. */ inst = brw_MOV(p, destination_indices_uw, brw_imm_v(key->pv_first ? 0x00010200 /* (0, 2, 1) */ : 0x00020001)); /* (1, 0, 2) */ brw_inst_set_pred_control(p->devinfo, inst, BRW_PREDICATE_NORMAL); } assert(c->reg.destination_indices.width == BRW_EXECUTE_4); brw_push_insn_state(p); brw_set_default_exec_size(p, BRW_EXECUTE_4); brw_ADD(p, c->reg.destination_indices, c->reg.destination_indices, get_element_ud(c->reg.SVBI, 0)); brw_pop_insn_state(p); /* For each vertex, generate code to output each varying using the * appropriate binding table entry. */ for (vertex = 0; vertex < num_verts; ++vertex) { /* Set up the correct destination index for this vertex */ brw_MOV(p, get_element_ud(c->reg.header, 5), get_element_ud(c->reg.destination_indices, vertex)); for (binding = 0; binding < key->num_transform_feedback_bindings; ++binding) { unsigned char varying = key->transform_feedback_bindings[binding]; unsigned char slot = c->vue_map.varying_to_slot[varying]; /* From the Sandybridge PRM, Volume 2, Part 1, Section 4.5.1: * * "Prior to End of Thread with a URB_WRITE, the kernel must * ensure that all writes are complete by sending the final * write as a committed write." */ bool final_write = binding == key->num_transform_feedback_bindings - 1 && vertex == num_verts - 1; struct brw_reg vertex_slot = c->reg.vertex[vertex]; vertex_slot.nr += slot / 2; vertex_slot.subnr = (slot % 2) * 16; /* gl_PointSize is stored in VARYING_SLOT_PSIZ.w. */ vertex_slot.swizzle = varying == VARYING_SLOT_PSIZ ? BRW_SWIZZLE_WWWW : key->transform_feedback_swizzles[binding]; brw_set_default_access_mode(p, BRW_ALIGN_16); brw_push_insn_state(p); brw_set_default_exec_size(p, BRW_EXECUTE_4); brw_MOV(p, stride(c->reg.header, 4, 4, 1), retype(vertex_slot, BRW_REGISTER_TYPE_UD)); brw_pop_insn_state(p); brw_set_default_access_mode(p, BRW_ALIGN_1); brw_svb_write(p, final_write ? c->reg.temp : brw_null_reg(), /* dest */ 1, /* msg_reg_nr */ c->reg.header, /* src0 */ SURF_INDEX_GEN6_SOL_BINDING(binding), /* binding_table_index */ final_write); /* send_commit_msg */ } } brw_ENDIF(p); /* Now, reinitialize the header register from R0 to restore the parts of * the register that we overwrote while streaming out transform feedback * data. */ brw_ff_gs_initialize_header(c); /* Finally, wait for the write commit to occur so that we can proceed to * other things safely. * * From the Sandybridge PRM, Volume 4, Part 1, Section 3.3: * * The write commit does not modify the destination register, but * merely clears the dependency associated with the destination * register. Thus, a simple “mov” instruction using the register as a * source is sufficient to wait for the write commit to occur. */ brw_MOV(p, c->reg.temp, c->reg.temp); } brw_ff_gs_ff_sync(c, 1); brw_ff_gs_overwrite_header_dw2_from_r0(c); switch (num_verts) { case 1: brw_ff_gs_offset_header_dw2(c, URB_WRITE_PRIM_START | URB_WRITE_PRIM_END); brw_ff_gs_emit_vue(c, c->reg.vertex[0], true); break; case 2: brw_ff_gs_offset_header_dw2(c, URB_WRITE_PRIM_START); brw_ff_gs_emit_vue(c, c->reg.vertex[0], false); brw_ff_gs_offset_header_dw2(c, URB_WRITE_PRIM_END - URB_WRITE_PRIM_START); brw_ff_gs_emit_vue(c, c->reg.vertex[1], true); break; case 3: if (check_edge_flags) { /* Only emit vertices 0 and 1 if this is the first triangle of the * polygon. Otherwise they are redundant. */ brw_AND(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), get_element_ud(c->reg.R0, 2), brw_imm_ud(BRW_GS_EDGE_INDICATOR_0)); brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ); brw_IF(p, BRW_EXECUTE_1); } brw_ff_gs_offset_header_dw2(c, URB_WRITE_PRIM_START); brw_ff_gs_emit_vue(c, c->reg.vertex[0], false); brw_ff_gs_offset_header_dw2(c, -URB_WRITE_PRIM_START); brw_ff_gs_emit_vue(c, c->reg.vertex[1], false); if (check_edge_flags) { brw_ENDIF(p); /* Only emit vertex 2 in PRIM_END mode if this is the last triangle * of the polygon. Otherwise leave the primitive incomplete because * there are more polygon vertices coming. */ brw_AND(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), get_element_ud(c->reg.R0, 2), brw_imm_ud(BRW_GS_EDGE_INDICATOR_1)); brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ); brw_set_default_predicate_control(p, BRW_PREDICATE_NORMAL); } brw_ff_gs_offset_header_dw2(c, URB_WRITE_PRIM_END); brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); brw_ff_gs_emit_vue(c, c->reg.vertex[2], true); break; } }
void brw_set_default_predicate_control( struct brw_compile *p, unsigned pc ) { brw_inst_set_pred_control(p->brw, p->current, pc); }
void brw_set_default_predicate_control( struct brw_codegen *p, unsigned pc ) { brw_inst_set_pred_control(p->devinfo, p->current, pc); }