void brw_emit_anyprim_setup( struct brw_sf_compile *c ) { struct brw_compile *p = &c->func; struct brw_context *brw = p->brw; struct brw_reg payload_prim = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0); struct brw_reg payload_attr = get_element_ud(brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0), 0); struct brw_reg primmask; int jmp; struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD)); c->nr_verts = 3; alloc_regs(c); primmask = retype(get_element(c->tmp, 0), BRW_REGISTER_TYPE_UD); brw_MOV(p, primmask, brw_imm_ud(1)); brw_SHL(p, primmask, primmask, payload_prim); brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_TRILIST) | (1<<_3DPRIM_TRISTRIP) | (1<<_3DPRIM_TRIFAN) | (1<<_3DPRIM_TRISTRIP_REVERSE) | (1<<_3DPRIM_POLYGON) | (1<<_3DPRIM_RECTLIST) | (1<<_3DPRIM_TRIFAN_NOSTIPPLE))); brw_inst_set_cond_modifier(brw, brw_last_inst, BRW_CONDITIONAL_Z); jmp = brw_JMPI(p, brw_imm_d(0), BRW_PREDICATE_NORMAL) - p->store; brw_emit_tri_setup(c, false); brw_land_fwd_jump(p, jmp); brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_LINELIST) | (1<<_3DPRIM_LINESTRIP) | (1<<_3DPRIM_LINELOOP) | (1<<_3DPRIM_LINESTRIP_CONT) | (1<<_3DPRIM_LINESTRIP_BF) | (1<<_3DPRIM_LINESTRIP_CONT_BF))); brw_inst_set_cond_modifier(brw, brw_last_inst, BRW_CONDITIONAL_Z); jmp = brw_JMPI(p, brw_imm_d(0), BRW_PREDICATE_NORMAL) - p->store; brw_emit_line_setup(c, false); brw_land_fwd_jump(p, jmp); brw_AND(p, v1_null_ud, payload_attr, brw_imm_ud(1<<BRW_SPRITE_POINT_ENABLE)); brw_inst_set_cond_modifier(brw, brw_last_inst, BRW_CONDITIONAL_Z); jmp = brw_JMPI(p, brw_imm_d(0), BRW_PREDICATE_NORMAL) - p->store; brw_emit_point_sprite_setup(c, false); brw_land_fwd_jump(p, jmp); brw_emit_point_setup( c, false ); }
void brw_clip_tri_emit_polygon(struct brw_clip_compile *c) { struct brw_codegen *p = &c->func; /* for (loopcount = nr_verts-2; loopcount > 0; loopcount--) */ brw_ADD(p, c->reg.loopcount, c->reg.nr_verts, brw_imm_d(-2)); brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_G); brw_IF(p, BRW_EXECUTE_1); { struct brw_indirect v0 = brw_indirect(0, 0); struct brw_indirect vptr = brw_indirect(1, 0); brw_MOV(p, get_addr_reg(vptr), brw_address(c->reg.inlist)); brw_MOV(p, get_addr_reg(v0), deref_1uw(vptr, 0)); brw_clip_emit_vue(c, v0, BRW_URB_WRITE_ALLOCATE_COMPLETE, ((_3DPRIM_TRIFAN << URB_WRITE_PRIM_TYPE_SHIFT) | URB_WRITE_PRIM_START)); brw_ADD(p, get_addr_reg(vptr), get_addr_reg(vptr), brw_imm_uw(2)); brw_MOV(p, get_addr_reg(v0), deref_1uw(vptr, 0)); brw_DO(p, BRW_EXECUTE_1); { brw_clip_emit_vue(c, v0, BRW_URB_WRITE_ALLOCATE_COMPLETE, (_3DPRIM_TRIFAN << URB_WRITE_PRIM_TYPE_SHIFT)); brw_ADD(p, get_addr_reg(vptr), get_addr_reg(vptr), brw_imm_uw(2)); brw_MOV(p, get_addr_reg(v0), deref_1uw(vptr, 0)); brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1)); brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ); } brw_WHILE(p); brw_inst_set_pred_control(p->devinfo, brw_last_inst, BRW_PREDICATE_NORMAL); brw_clip_emit_vue(c, v0, BRW_URB_WRITE_EOT_COMPLETE, ((_3DPRIM_TRIFAN << URB_WRITE_PRIM_TYPE_SHIFT) | URB_WRITE_PRIM_END)); } brw_ENDIF(p); }
static void merge_edgeflags( struct brw_clip_compile *c ) { struct brw_compile *p = &c->func; const struct brw_context *brw = p->brw; struct brw_reg tmp0 = get_element_ud(c->reg.tmp0, 0); brw_AND(p, tmp0, get_element_ud(c->reg.R0, 2), brw_imm_ud(PRIM_MASK)); brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_EQ, tmp0, brw_imm_ud(_3DPRIM_POLYGON)); /* Get away with using reg.vertex because we know that this is not * a _3DPRIM_TRISTRIP_REVERSE: */ brw_IF(p, BRW_EXECUTE_1); { brw_AND(p, vec1(brw_null_reg()), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<8)); brw_inst_set_cond_modifier(brw, brw_last_inst, BRW_CONDITIONAL_EQ); brw_MOV(p, byte_offset(c->reg.vertex[0], brw_varying_to_offset(&c->vue_map, VARYING_SLOT_EDGE)), brw_imm_f(0)); brw_inst_set_pred_control(brw, brw_last_inst, BRW_PREDICATE_NORMAL); brw_AND(p, vec1(brw_null_reg()), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<9)); brw_inst_set_cond_modifier(brw, brw_last_inst, BRW_CONDITIONAL_EQ); brw_MOV(p, byte_offset(c->reg.vertex[2], brw_varying_to_offset(&c->vue_map, VARYING_SLOT_EDGE)), brw_imm_f(0)); brw_inst_set_pred_control(brw, brw_last_inst, BRW_PREDICATE_NORMAL); } brw_ENDIF(p); }
void vec4_generator::generate_pull_constant_load(vec4_instruction *inst, struct brw_reg dst, struct brw_reg index, struct brw_reg offset) { assert(brw->gen <= 7); assert(index.file == BRW_IMMEDIATE_VALUE && index.type == BRW_REGISTER_TYPE_UD); uint32_t surf_index = index.dw1.ud; struct brw_reg header = brw_vec8_grf(0, 0); gen6_resolve_implied_move(p, &header, inst->base_mrf); brw_MOV(p, retype(brw_message_reg(inst->base_mrf + 1), BRW_REGISTER_TYPE_D), offset); uint32_t msg_type; if (brw->gen >= 6) msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; else if (brw->gen == 5 || brw->is_g4x) msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; else msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; /* Each of the 8 channel enables is considered for whether each * dword is written. */ brw_inst *send = brw_next_insn(p, BRW_OPCODE_SEND); brw_set_dest(p, send, dst); brw_set_src0(p, send, header); if (brw->gen < 6) brw_inst_set_cond_modifier(brw, send, inst->base_mrf); brw_set_dp_read_message(p, send, surf_index, BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD, msg_type, BRW_DATAPORT_READ_TARGET_DATA_CACHE, 2, /* mlen */ true, /* header_present */ 1 /* rlen */); brw_mark_surface_used(&prog_data->base, surf_index); }
static void emit_points(struct brw_clip_compile *c, bool do_offset ) { struct brw_compile *p = &c->func; const struct brw_context *brw = p->brw; struct brw_indirect v0 = brw_indirect(0, 0); struct brw_indirect v0ptr = brw_indirect(2, 0); brw_MOV(p, c->reg.loopcount, c->reg.nr_verts); brw_MOV(p, get_addr_reg(v0ptr), brw_address(c->reg.inlist)); brw_DO(p, BRW_EXECUTE_1); { brw_MOV(p, get_addr_reg(v0), deref_1uw(v0ptr, 0)); brw_ADD(p, get_addr_reg(v0ptr), get_addr_reg(v0ptr), brw_imm_uw(2)); /* draw if edgeflag != 0 */ brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, deref_1f(v0, brw_varying_to_offset(&c->vue_map, VARYING_SLOT_EDGE)), brw_imm_f(0)); brw_IF(p, BRW_EXECUTE_1); { if (do_offset) apply_one_offset(c, v0); brw_clip_emit_vue(c, v0, BRW_URB_WRITE_ALLOCATE_COMPLETE, (_3DPRIM_POINTLIST << URB_WRITE_PRIM_TYPE_SHIFT) | URB_WRITE_PRIM_START | URB_WRITE_PRIM_END); } brw_ENDIF(p); brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1)); brw_inst_set_cond_modifier(brw, brw_last_inst, BRW_CONDITIONAL_NZ); } brw_WHILE(p); brw_inst_set_pred_control(brw, brw_last_inst, BRW_PREDICATE_NORMAL); }
void brw_clip_ff_sync(struct brw_clip_compile *c) { struct brw_codegen *p = &c->func; if (p->devinfo->gen == 5) { brw_AND(p, brw_null_reg(), c->reg.ff_sync, brw_imm_ud(0x1)); brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_Z); brw_IF(p, BRW_EXECUTE_1); { brw_OR(p, c->reg.ff_sync, c->reg.ff_sync, brw_imm_ud(0x1)); brw_ff_sync(p, c->reg.R0, 0, c->reg.R0, 1, /* allocate */ 1, /* response length */ 0 /* eot */); } brw_ENDIF(p); brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); } }
void brw_emit_tri_clip( struct brw_clip_compile *c ) { struct brw_codegen *p = &c->func; brw_clip_tri_alloc_regs(c, 3 + c->key.nr_userclip + 6); brw_clip_tri_init_vertices(c); brw_clip_init_clipmask(c); brw_clip_init_ff_sync(c); /* if -ve rhw workaround bit is set, do cliptest */ if (p->devinfo->has_negative_rhw_bug) { brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<20)); brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ); brw_IF(p, BRW_EXECUTE_1); { brw_clip_test(c); } brw_ENDIF(p); } /* Can't push into do_clip_tri because with polygon (or quad) * flatshading, need to apply the flatshade here because we don't * respect the PV when converting to trifan for emit: */ if (c->has_flat_shading) brw_clip_tri_flat_shade(c); if ((c->key.clip_mode == BRW_CLIPMODE_NORMAL) || (c->key.clip_mode == BRW_CLIPMODE_KERNEL_CLIP)) do_clip_tri(c); else maybe_do_clip_tri(c); brw_clip_tri_emit_polygon(c); /* Send an empty message to kill the thread: */ brw_clip_kill_thread(c); }
void vec4_generator::generate_scratch_read(vec4_instruction *inst, struct brw_reg dst, struct brw_reg index) { struct brw_reg header = brw_vec8_grf(0, 0); gen6_resolve_implied_move(p, &header, inst->base_mrf); generate_oword_dual_block_offsets(brw_message_reg(inst->base_mrf + 1), index); uint32_t msg_type; if (brw->gen >= 6) msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; else if (brw->gen == 5 || brw->is_g4x) msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; else msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; /* Each of the 8 channel enables is considered for whether each * dword is written. */ brw_inst *send = brw_next_insn(p, BRW_OPCODE_SEND); brw_set_dest(p, send, dst); brw_set_src0(p, send, header); if (brw->gen < 6) brw_inst_set_cond_modifier(brw, send, inst->base_mrf); brw_set_dp_read_message(p, send, 255, /* binding table index: stateless access */ BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD, msg_type, BRW_DATAPORT_READ_TARGET_RENDER_CACHE, 2, /* mlen */ true, /* header_present */ 1 /* rlen */); }
/** * Loads the clip distance for a vertex into `dst`, and ends with * a comparison of it to zero with the condition `cond`. * * - If using a fixed plane, the distance is dot(hpos, plane). * - If using a user clip plane, the distance is directly available in the vertex. */ static inline void load_clip_distance(struct brw_clip_compile *c, struct brw_indirect vtx, struct brw_reg dst, GLuint hpos_offset, int cond) { struct brw_codegen *p = &c->func; dst = vec4(dst); brw_AND(p, vec1(brw_null_reg()), c->reg.vertex_src_mask, brw_imm_ud(1)); brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ); brw_IF(p, BRW_EXECUTE_1); { struct brw_indirect temp_ptr = brw_indirect(7, 0); brw_ADD(p, get_addr_reg(temp_ptr), get_addr_reg(vtx), c->reg.clipdistance_offset); brw_MOV(p, vec1(dst), deref_1f(temp_ptr, 0)); } brw_ELSE(p); { brw_MOV(p, dst, deref_4f(vtx, hpos_offset)); brw_DP4(p, dst, dst, c->reg.plane_equation); } brw_ENDIF(p); brw_CMP(p, brw_null_reg(), cond, vec1(dst), brw_imm_f(0.0f)); }
/*********************************************************************** * Output clipped polygon as an unfilled primitive: */ static void emit_lines(struct brw_clip_compile *c, bool do_offset) { struct brw_compile *p = &c->func; const struct brw_context *brw = p->brw; struct brw_indirect v0 = brw_indirect(0, 0); struct brw_indirect v1 = brw_indirect(1, 0); struct brw_indirect v0ptr = brw_indirect(2, 0); struct brw_indirect v1ptr = brw_indirect(3, 0); /* Need a seperate loop for offset: */ if (do_offset) { brw_MOV(p, c->reg.loopcount, c->reg.nr_verts); brw_MOV(p, get_addr_reg(v0ptr), brw_address(c->reg.inlist)); brw_DO(p, BRW_EXECUTE_1); { brw_MOV(p, get_addr_reg(v0), deref_1uw(v0ptr, 0)); brw_ADD(p, get_addr_reg(v0ptr), get_addr_reg(v0ptr), brw_imm_uw(2)); apply_one_offset(c, v0); brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1)); brw_inst_set_cond_modifier(brw, brw_last_inst, BRW_CONDITIONAL_G); } brw_WHILE(p); brw_inst_set_pred_control(brw, brw_last_inst, BRW_PREDICATE_NORMAL); } /* v1ptr = &inlist[nr_verts] * *v1ptr = v0 */ brw_MOV(p, c->reg.loopcount, c->reg.nr_verts); brw_MOV(p, get_addr_reg(v0ptr), brw_address(c->reg.inlist)); brw_ADD(p, get_addr_reg(v1ptr), get_addr_reg(v0ptr), retype(c->reg.nr_verts, BRW_REGISTER_TYPE_UW)); brw_ADD(p, get_addr_reg(v1ptr), get_addr_reg(v1ptr), retype(c->reg.nr_verts, BRW_REGISTER_TYPE_UW)); brw_MOV(p, deref_1uw(v1ptr, 0), deref_1uw(v0ptr, 0)); brw_DO(p, BRW_EXECUTE_1); { brw_MOV(p, get_addr_reg(v0), deref_1uw(v0ptr, 0)); brw_MOV(p, get_addr_reg(v1), deref_1uw(v0ptr, 2)); brw_ADD(p, get_addr_reg(v0ptr), get_addr_reg(v0ptr), brw_imm_uw(2)); /* draw edge if edgeflag != 0 */ brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, deref_1f(v0, brw_varying_to_offset(&c->vue_map, VARYING_SLOT_EDGE)), brw_imm_f(0)); brw_IF(p, BRW_EXECUTE_1); { brw_clip_emit_vue(c, v0, BRW_URB_WRITE_ALLOCATE_COMPLETE, (_3DPRIM_LINESTRIP << URB_WRITE_PRIM_TYPE_SHIFT) | URB_WRITE_PRIM_START); brw_clip_emit_vue(c, v1, BRW_URB_WRITE_ALLOCATE_COMPLETE, (_3DPRIM_LINESTRIP << URB_WRITE_PRIM_TYPE_SHIFT) | URB_WRITE_PRIM_END); } brw_ENDIF(p); brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1)); brw_inst_set_cond_modifier(brw, brw_last_inst, BRW_CONDITIONAL_NZ); } brw_WHILE(p); brw_inst_set_pred_control(brw, brw_last_inst, BRW_PREDICATE_NORMAL); }
/* Line clipping, more or less following the following algorithm: * * for (p=0;p<MAX_PLANES;p++) { * if (clipmask & (1 << p)) { * GLfloat dp0 = DOTPROD( vtx0, plane[p] ); * GLfloat dp1 = DOTPROD( vtx1, plane[p] ); * * if (dp1 < 0.0f) { * GLfloat t = dp1 / (dp1 - dp0); * if (t > t1) t1 = t; * } else { * GLfloat t = dp0 / (dp0 - dp1); * if (t > t0) t0 = t; * } * * if (t0 + t1 >= 1.0) * return; * } * } * * interp( ctx, newvtx0, vtx0, vtx1, t0 ); * interp( ctx, newvtx1, vtx1, vtx0, t1 ); * */ static void clip_and_emit_line( struct brw_clip_compile *c ) { struct brw_codegen *p = &c->func; struct brw_indirect vtx0 = brw_indirect(0, 0); struct brw_indirect vtx1 = brw_indirect(1, 0); struct brw_indirect newvtx0 = brw_indirect(2, 0); struct brw_indirect newvtx1 = brw_indirect(3, 0); struct brw_indirect plane_ptr = brw_indirect(4, 0); struct brw_reg v1_null_ud = retype(vec1(brw_null_reg()), BRW_REGISTER_TYPE_UD); GLuint hpos_offset = brw_varying_to_offset(&c->vue_map, VARYING_SLOT_POS); GLint clipdist0_offset = c->key.nr_userclip ? brw_varying_to_offset(&c->vue_map, VARYING_SLOT_CLIP_DIST0) : 0; brw_MOV(p, get_addr_reg(vtx0), brw_address(c->reg.vertex[0])); brw_MOV(p, get_addr_reg(vtx1), brw_address(c->reg.vertex[1])); brw_MOV(p, get_addr_reg(newvtx0), brw_address(c->reg.vertex[2])); brw_MOV(p, get_addr_reg(newvtx1), brw_address(c->reg.vertex[3])); brw_MOV(p, get_addr_reg(plane_ptr), brw_clip_plane0_address(c)); /* Note: init t0, t1 together: */ brw_MOV(p, vec2(c->reg.t0), brw_imm_f(0)); brw_clip_init_planes(c); brw_clip_init_clipmask(c); /* -ve rhw workaround */ if (p->devinfo->has_negative_rhw_bug) { brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<20)); brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ); brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(0x3f)); brw_inst_set_pred_control(p->devinfo, brw_last_inst, BRW_PREDICATE_NORMAL); } /* Set the initial vertex source mask: The first 6 planes are the bounds * of the view volume; the next 8 planes are the user clipping planes. */ brw_MOV(p, c->reg.vertex_src_mask, brw_imm_ud(0x3fc0)); /* Set the initial clipdistance offset to be 6 floats before gl_ClipDistance[0]. * We'll increment 6 times before we start hitting actual user clipping. */ brw_MOV(p, c->reg.clipdistance_offset, brw_imm_d(clipdist0_offset - 6*sizeof(float))); brw_DO(p, BRW_EXECUTE_1); { /* if (planemask & 1) */ brw_AND(p, v1_null_ud, c->reg.planemask, brw_imm_ud(1)); brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ); brw_IF(p, BRW_EXECUTE_1); { brw_AND(p, v1_null_ud, c->reg.vertex_src_mask, brw_imm_ud(1)); brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ); brw_IF(p, BRW_EXECUTE_1); { /* user clip distance: just fetch the correct float from each vertex */ struct brw_indirect temp_ptr = brw_indirect(7, 0); brw_ADD(p, get_addr_reg(temp_ptr), get_addr_reg(vtx0), c->reg.clipdistance_offset); brw_MOV(p, c->reg.dp0, deref_1f(temp_ptr, 0)); brw_ADD(p, get_addr_reg(temp_ptr), get_addr_reg(vtx1), c->reg.clipdistance_offset); brw_MOV(p, c->reg.dp1, deref_1f(temp_ptr, 0)); } brw_ELSE(p); { /* fixed plane: fetch the hpos, dp4 against the plane. */ if (c->key.nr_userclip) brw_MOV(p, c->reg.plane_equation, deref_4f(plane_ptr, 0)); else brw_MOV(p, c->reg.plane_equation, deref_4b(plane_ptr, 0)); brw_DP4(p, vec4(c->reg.dp0), deref_4f(vtx0, hpos_offset), c->reg.plane_equation); brw_DP4(p, vec4(c->reg.dp1), deref_4f(vtx1, hpos_offset), c->reg.plane_equation); } brw_ENDIF(p); brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, vec1(c->reg.dp1), brw_imm_f(0.0f)); brw_IF(p, BRW_EXECUTE_1); { /* * Both can be negative on GM965/G965 due to RHW workaround * if so, this object should be rejected. */ if (p->devinfo->has_negative_rhw_bug) { brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_LE, c->reg.dp0, brw_imm_f(0.0)); brw_IF(p, BRW_EXECUTE_1); { brw_clip_kill_thread(c); } brw_ENDIF(p); } brw_ADD(p, c->reg.t, c->reg.dp1, negate(c->reg.dp0)); brw_math_invert(p, c->reg.t, c->reg.t); brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp1); brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_G, c->reg.t, c->reg.t1 ); brw_MOV(p, c->reg.t1, c->reg.t); brw_inst_set_pred_control(p->devinfo, brw_last_inst, BRW_PREDICATE_NORMAL); } brw_ELSE(p); { /* Coming back in. We know that both cannot be negative * because the line would have been culled in that case. */ /* If both are positive, do nothing */ /* Only on GM965/G965 */ if (p->devinfo->has_negative_rhw_bug) { brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.dp0, brw_imm_f(0.0)); brw_IF(p, BRW_EXECUTE_1); } { brw_ADD(p, c->reg.t, c->reg.dp0, negate(c->reg.dp1)); brw_math_invert(p, c->reg.t, c->reg.t); brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp0); brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_G, c->reg.t, c->reg.t0 ); brw_MOV(p, c->reg.t0, c->reg.t); brw_inst_set_pred_control(p->devinfo, brw_last_inst, BRW_PREDICATE_NORMAL); } if (p->devinfo->has_negative_rhw_bug) { brw_ENDIF(p); } } brw_ENDIF(p); } brw_ENDIF(p); /* plane_ptr++; */ brw_ADD(p, get_addr_reg(plane_ptr), get_addr_reg(plane_ptr), brw_clip_plane_stride(c)); /* while (planemask>>=1) != 0 */ brw_SHR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(1)); brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ); brw_SHR(p, c->reg.vertex_src_mask, c->reg.vertex_src_mask, brw_imm_ud(1)); brw_inst_set_pred_control(p->devinfo, brw_last_inst, BRW_PREDICATE_NORMAL); brw_ADD(p, c->reg.clipdistance_offset, c->reg.clipdistance_offset, brw_imm_w(sizeof(float))); brw_inst_set_pred_control(p->devinfo, brw_last_inst, BRW_PREDICATE_NORMAL); } brw_WHILE(p); brw_inst_set_pred_control(p->devinfo, brw_last_inst, BRW_PREDICATE_NORMAL); brw_ADD(p, c->reg.t, c->reg.t0, c->reg.t1); brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.t, brw_imm_f(1.0)); brw_IF(p, BRW_EXECUTE_1); { brw_clip_interp_vertex(c, newvtx0, vtx0, vtx1, c->reg.t0, false); brw_clip_interp_vertex(c, newvtx1, vtx1, vtx0, c->reg.t1, false); brw_clip_emit_vue(c, newvtx0, BRW_URB_WRITE_ALLOCATE_COMPLETE, (_3DPRIM_LINESTRIP << URB_WRITE_PRIM_TYPE_SHIFT) | URB_WRITE_PRIM_START); brw_clip_emit_vue(c, newvtx1, BRW_URB_WRITE_EOT_COMPLETE, (_3DPRIM_LINESTRIP << URB_WRITE_PRIM_TYPE_SHIFT) | URB_WRITE_PRIM_END); } brw_ENDIF(p); brw_clip_kill_thread(c); }
void vec4_generator::generate_scratch_write(vec4_instruction *inst, struct brw_reg dst, struct brw_reg src, struct brw_reg index) { struct brw_reg header = brw_vec8_grf(0, 0); bool write_commit; /* If the instruction is predicated, we'll predicate the send, not * the header setup. */ brw_set_default_predicate_control(p, false); gen6_resolve_implied_move(p, &header, inst->base_mrf); generate_oword_dual_block_offsets(brw_message_reg(inst->base_mrf + 1), index); brw_MOV(p, retype(brw_message_reg(inst->base_mrf + 2), BRW_REGISTER_TYPE_D), retype(src, BRW_REGISTER_TYPE_D)); uint32_t msg_type; if (brw->gen >= 7) msg_type = GEN7_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE; else if (brw->gen == 6) msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE; else msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE; brw_set_default_predicate_control(p, inst->predicate); /* Pre-gen6, we have to specify write commits to ensure ordering * between reads and writes within a thread. Afterwards, that's * guaranteed and write commits only matter for inter-thread * synchronization. */ if (brw->gen >= 6) { write_commit = false; } else { /* The visitor set up our destination register to be g0. This * means that when the next read comes along, we will end up * reading from g0 and causing a block on the write commit. For * write-after-read, we are relying on the value of the previous * read being used (and thus blocking on completion) before our * write is executed. This means we have to be careful in * instruction scheduling to not violate this assumption. */ write_commit = true; } /* Each of the 8 channel enables is considered for whether each * dword is written. */ brw_inst *send = brw_next_insn(p, BRW_OPCODE_SEND); brw_set_dest(p, send, dst); brw_set_src0(p, send, header); if (brw->gen < 6) brw_inst_set_cond_modifier(brw, send, inst->base_mrf); brw_set_dp_write_message(p, send, 255, /* binding table index: stateless access */ BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD, msg_type, 3, /* mlen */ true, /* header present */ false, /* not a render target write */ write_commit, /* rlen */ false, /* eot */ write_commit); }
void vec4_generator::generate_code(const cfg_t *cfg) { struct annotation_info annotation; memset(&annotation, 0, sizeof(annotation)); foreach_block_and_inst (block, vec4_instruction, inst, cfg) { struct brw_reg src[3], dst; if (unlikely(debug_flag)) annotate(brw, &annotation, cfg, inst, p->next_insn_offset); for (unsigned int i = 0; i < 3; i++) { src[i] = inst->get_src(this->prog_data, i); } dst = inst->get_dst(); brw_set_default_predicate_control(p, inst->predicate); brw_set_default_predicate_inverse(p, inst->predicate_inverse); brw_set_default_saturate(p, inst->saturate); brw_set_default_mask_control(p, inst->force_writemask_all); brw_set_default_acc_write_control(p, inst->writes_accumulator); unsigned pre_emit_nr_insn = p->nr_insn; generate_vec4_instruction(inst, dst, src); if (inst->no_dd_clear || inst->no_dd_check || inst->conditional_mod) { assert(p->nr_insn == pre_emit_nr_insn + 1 || !"conditional_mod, no_dd_check, or no_dd_clear set for IR " "emitting more than 1 instruction"); brw_inst *last = &p->store[pre_emit_nr_insn]; brw_inst_set_cond_modifier(brw, last, inst->conditional_mod); brw_inst_set_no_dd_clear(brw, last, inst->no_dd_clear); brw_inst_set_no_dd_check(brw, last, inst->no_dd_check); } } brw_set_uip_jip(p); annotation_finalize(&annotation, p->next_insn_offset); int before_size = p->next_insn_offset; brw_compact_instructions(p, 0, annotation.ann_count, annotation.ann); int after_size = p->next_insn_offset; if (unlikely(debug_flag)) { if (shader_prog) { fprintf(stderr, "Native code for %s vertex shader %d:\n", shader_prog->Label ? shader_prog->Label : "unnamed", shader_prog->Name); } else { fprintf(stderr, "Native code for vertex program %d:\n", prog->Id); } fprintf(stderr, "vec4 shader: %d instructions. Compacted %d to %d" " bytes (%.0f%%)\n", before_size / 16, before_size, after_size, 100.0f * (before_size - after_size) / before_size); dump_assembly(p->store, annotation.ann_count, annotation.ann, brw, prog); ralloc_free(annotation.ann); } }
static void brw_clip_test( struct brw_clip_compile *c ) { struct brw_reg t = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); struct brw_reg t1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); struct brw_reg t2 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); struct brw_reg t3 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); struct brw_reg v0 = get_tmp(c); struct brw_reg v1 = get_tmp(c); struct brw_reg v2 = get_tmp(c); struct brw_indirect vt0 = brw_indirect(0, 0); struct brw_indirect vt1 = brw_indirect(1, 0); struct brw_indirect vt2 = brw_indirect(2, 0); struct brw_codegen *p = &c->func; struct brw_reg tmp0 = c->reg.loopcount; /* handy temporary */ GLuint hpos_offset = brw_varying_to_offset(&c->vue_map, VARYING_SLOT_POS); brw_MOV(p, get_addr_reg(vt0), brw_address(c->reg.vertex[0])); brw_MOV(p, get_addr_reg(vt1), brw_address(c->reg.vertex[1])); brw_MOV(p, get_addr_reg(vt2), brw_address(c->reg.vertex[2])); brw_MOV(p, v0, deref_4f(vt0, hpos_offset)); brw_MOV(p, v1, deref_4f(vt1, hpos_offset)); brw_MOV(p, v2, deref_4f(vt2, hpos_offset)); brw_AND(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(~0x3f)); /* test nearz, xmin, ymin plane */ /* clip.xyz < -clip.w */ brw_CMP(p, t1, BRW_CONDITIONAL_L, v0, negate(get_element(v0, 3))); brw_CMP(p, t2, BRW_CONDITIONAL_L, v1, negate(get_element(v1, 3))); brw_CMP(p, t3, BRW_CONDITIONAL_L, v2, negate(get_element(v2, 3))); /* All vertices are outside of a plane, rejected */ brw_AND(p, t, t1, t2); brw_AND(p, t, t, t3); brw_OR(p, tmp0, get_element(t, 0), get_element(t, 1)); brw_OR(p, tmp0, tmp0, get_element(t, 2)); brw_AND(p, brw_null_reg(), tmp0, brw_imm_ud(0x1)); brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ); brw_IF(p, BRW_EXECUTE_1); { brw_clip_kill_thread(c); } brw_ENDIF(p); brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); /* some vertices are inside a plane, some are outside,need to clip */ brw_XOR(p, t, t1, t2); brw_XOR(p, t1, t2, t3); brw_OR(p, t, t, t1); brw_AND(p, t, t, brw_imm_ud(0x1)); brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, get_element(t, 0), brw_imm_ud(0)); brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<5))); brw_inst_set_pred_control(p->devinfo, brw_last_inst, BRW_PREDICATE_NORMAL); brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, get_element(t, 1), brw_imm_ud(0)); brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<3))); brw_inst_set_pred_control(p->devinfo, brw_last_inst, BRW_PREDICATE_NORMAL); brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, get_element(t, 2), brw_imm_ud(0)); brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<1))); brw_inst_set_pred_control(p->devinfo, brw_last_inst, BRW_PREDICATE_NORMAL); /* test farz, xmax, ymax plane */ /* clip.xyz > clip.w */ brw_CMP(p, t1, BRW_CONDITIONAL_G, v0, get_element(v0, 3)); brw_CMP(p, t2, BRW_CONDITIONAL_G, v1, get_element(v1, 3)); brw_CMP(p, t3, BRW_CONDITIONAL_G, v2, get_element(v2, 3)); /* All vertices are outside of a plane, rejected */ brw_AND(p, t, t1, t2); brw_AND(p, t, t, t3); brw_OR(p, tmp0, get_element(t, 0), get_element(t, 1)); brw_OR(p, tmp0, tmp0, get_element(t, 2)); brw_AND(p, brw_null_reg(), tmp0, brw_imm_ud(0x1)); brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ); brw_IF(p, BRW_EXECUTE_1); { brw_clip_kill_thread(c); } brw_ENDIF(p); brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); /* some vertices are inside a plane, some are outside,need to clip */ brw_XOR(p, t, t1, t2); brw_XOR(p, t1, t2, t3); brw_OR(p, t, t, t1); brw_AND(p, t, t, brw_imm_ud(0x1)); brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, get_element(t, 0), brw_imm_ud(0)); brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<4))); brw_inst_set_pred_control(p->devinfo, brw_last_inst, BRW_PREDICATE_NORMAL); brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, get_element(t, 1), brw_imm_ud(0)); brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<2))); brw_inst_set_pred_control(p->devinfo, brw_last_inst, BRW_PREDICATE_NORMAL); brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, get_element(t, 2), brw_imm_ud(0)); brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<0))); brw_inst_set_pred_control(p->devinfo, brw_last_inst, BRW_PREDICATE_NORMAL); release_tmps(c); }
/* Use mesa's clipping algorithms, translated to GEN4 assembly. */ void brw_clip_tri( struct brw_clip_compile *c ) { struct brw_codegen *p = &c->func; struct brw_indirect vtx = brw_indirect(0, 0); struct brw_indirect vtxPrev = brw_indirect(1, 0); struct brw_indirect vtxOut = brw_indirect(2, 0); struct brw_indirect plane_ptr = brw_indirect(3, 0); struct brw_indirect inlist_ptr = brw_indirect(4, 0); struct brw_indirect outlist_ptr = brw_indirect(5, 0); struct brw_indirect freelist_ptr = brw_indirect(6, 0); GLuint hpos_offset = brw_varying_to_offset(&c->vue_map, VARYING_SLOT_POS); GLint clipdist0_offset = c->key.nr_userclip ? brw_varying_to_offset(&c->vue_map, VARYING_SLOT_CLIP_DIST0) : 0; brw_MOV(p, get_addr_reg(vtxPrev), brw_address(c->reg.vertex[2]) ); brw_MOV(p, get_addr_reg(plane_ptr), brw_clip_plane0_address(c)); brw_MOV(p, get_addr_reg(inlist_ptr), brw_address(c->reg.inlist)); brw_MOV(p, get_addr_reg(outlist_ptr), brw_address(c->reg.outlist)); brw_MOV(p, get_addr_reg(freelist_ptr), brw_address(c->reg.vertex[3]) ); /* Set the initial vertex source mask: The first 6 planes are the bounds * of the view volume; the next 8 planes are the user clipping planes. */ brw_MOV(p, c->reg.vertex_src_mask, brw_imm_ud(0x3fc0)); /* Set the initial clipdistance offset to be 6 floats before gl_ClipDistance[0]. * We'll increment 6 times before we start hitting actual user clipping. */ brw_MOV(p, c->reg.clipdistance_offset, brw_imm_d(clipdist0_offset - 6*sizeof(float))); brw_DO(p, BRW_EXECUTE_1); { /* if (planemask & 1) */ brw_AND(p, vec1(brw_null_reg()), c->reg.planemask, brw_imm_ud(1)); brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ); brw_IF(p, BRW_EXECUTE_1); { /* vtxOut = freelist_ptr++ */ brw_MOV(p, get_addr_reg(vtxOut), get_addr_reg(freelist_ptr) ); brw_ADD(p, get_addr_reg(freelist_ptr), get_addr_reg(freelist_ptr), brw_imm_uw(c->nr_regs * REG_SIZE)); if (c->key.nr_userclip) brw_MOV(p, c->reg.plane_equation, deref_4f(plane_ptr, 0)); else brw_MOV(p, c->reg.plane_equation, deref_4b(plane_ptr, 0)); brw_MOV(p, c->reg.loopcount, c->reg.nr_verts); brw_MOV(p, c->reg.nr_verts, brw_imm_ud(0)); brw_DO(p, BRW_EXECUTE_1); { /* vtx = *input_ptr; */ brw_MOV(p, get_addr_reg(vtx), deref_1uw(inlist_ptr, 0)); load_clip_distance(c, vtxPrev, c->reg.dpPrev, hpos_offset, BRW_CONDITIONAL_L); /* (prev < 0.0f) */ brw_IF(p, BRW_EXECUTE_1); { load_clip_distance(c, vtx, c->reg.dp, hpos_offset, BRW_CONDITIONAL_GE); /* IS_POSITIVE(next) */ brw_IF(p, BRW_EXECUTE_1); { /* Coming back in. */ brw_ADD(p, c->reg.t, c->reg.dpPrev, negate(c->reg.dp)); brw_math_invert(p, c->reg.t, c->reg.t); brw_MUL(p, c->reg.t, c->reg.t, c->reg.dpPrev); /* If (vtxOut == 0) vtxOut = vtxPrev */ brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_EQ, get_addr_reg(vtxOut), brw_imm_uw(0) ); brw_MOV(p, get_addr_reg(vtxOut), get_addr_reg(vtxPrev)); brw_inst_set_pred_control(p->devinfo, brw_last_inst, BRW_PREDICATE_NORMAL); brw_clip_interp_vertex(c, vtxOut, vtxPrev, vtx, c->reg.t, false); /* *outlist_ptr++ = vtxOut; * nr_verts++; * vtxOut = 0; */ brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxOut)); brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short))); brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1)); brw_MOV(p, get_addr_reg(vtxOut), brw_imm_uw(0) ); } brw_ENDIF(p); } brw_ELSE(p); { /* *outlist_ptr++ = vtxPrev; * nr_verts++; */ brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxPrev)); brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short))); brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1)); load_clip_distance(c, vtx, c->reg.dp, hpos_offset, BRW_CONDITIONAL_L); /* (next < 0.0f) */ brw_IF(p, BRW_EXECUTE_1); { /* Going out of bounds. Avoid division by zero as we * know dp != dpPrev from DIFFERENT_SIGNS, above. */ brw_ADD(p, c->reg.t, c->reg.dp, negate(c->reg.dpPrev)); brw_math_invert(p, c->reg.t, c->reg.t); brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp); /* If (vtxOut == 0) vtxOut = vtx */ brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_EQ, get_addr_reg(vtxOut), brw_imm_uw(0) ); brw_MOV(p, get_addr_reg(vtxOut), get_addr_reg(vtx)); brw_inst_set_pred_control(p->devinfo, brw_last_inst, BRW_PREDICATE_NORMAL); brw_clip_interp_vertex(c, vtxOut, vtx, vtxPrev, c->reg.t, true); /* *outlist_ptr++ = vtxOut; * nr_verts++; * vtxOut = 0; */ brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxOut)); brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short))); brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1)); brw_MOV(p, get_addr_reg(vtxOut), brw_imm_uw(0) ); } brw_ENDIF(p); } brw_ENDIF(p); /* vtxPrev = vtx; * inlist_ptr++; */ brw_MOV(p, get_addr_reg(vtxPrev), get_addr_reg(vtx)); brw_ADD(p, get_addr_reg(inlist_ptr), get_addr_reg(inlist_ptr), brw_imm_uw(sizeof(short))); /* while (--loopcount != 0) */ brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1)); brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ); } brw_WHILE(p); brw_inst_set_pred_control(p->devinfo, brw_last_inst, BRW_PREDICATE_NORMAL); /* vtxPrev = *(outlist_ptr-1) OR: outlist[nr_verts-1] * inlist = outlist * inlist_ptr = &inlist[0] * outlist_ptr = &outlist[0] */ brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_w(-2)); brw_MOV(p, get_addr_reg(vtxPrev), deref_1uw(outlist_ptr, 0)); brw_MOV(p, brw_vec8_grf(c->reg.inlist.nr, 0), brw_vec8_grf(c->reg.outlist.nr, 0)); brw_MOV(p, get_addr_reg(inlist_ptr), brw_address(c->reg.inlist)); brw_MOV(p, get_addr_reg(outlist_ptr), brw_address(c->reg.outlist)); } brw_ENDIF(p); /* plane_ptr++; */ brw_ADD(p, get_addr_reg(plane_ptr), get_addr_reg(plane_ptr), brw_clip_plane_stride(c)); /* nr_verts >= 3 */ brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_GE, c->reg.nr_verts, brw_imm_ud(3)); brw_set_default_predicate_control(p, BRW_PREDICATE_NORMAL); /* && (planemask>>=1) != 0 */ brw_SHR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(1)); brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ); brw_SHR(p, c->reg.vertex_src_mask, c->reg.vertex_src_mask, brw_imm_ud(1)); brw_ADD(p, c->reg.clipdistance_offset, c->reg.clipdistance_offset, brw_imm_w(sizeof(float))); } brw_WHILE(p); brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); }
/** * Generate the geometry shader program used on Gen6 to perform stream output * (transform feedback). */ void gen6_sol_program(struct brw_ff_gs_compile *c, struct brw_ff_gs_prog_key *key, unsigned num_verts, bool check_edge_flags) { struct brw_codegen *p = &c->func; brw_inst *inst; c->prog_data.svbi_postincrement_value = num_verts; brw_ff_gs_alloc_regs(c, num_verts, true); brw_ff_gs_initialize_header(c); if (key->num_transform_feedback_bindings > 0) { unsigned vertex, binding; struct brw_reg destination_indices_uw = vec8(retype(c->reg.destination_indices, BRW_REGISTER_TYPE_UW)); /* Note: since we use the binding table to keep track of buffer offsets * and stride, the GS doesn't need to keep track of a separate pointer * into each buffer; it uses a single pointer which increments by 1 for * each vertex. So we use SVBI0 for this pointer, regardless of whether * transform feedback is in interleaved or separate attribs mode. * * Make sure that the buffers have enough room for all the vertices. */ brw_ADD(p, get_element_ud(c->reg.temp, 0), get_element_ud(c->reg.SVBI, 0), brw_imm_ud(num_verts)); brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_LE, get_element_ud(c->reg.temp, 0), get_element_ud(c->reg.SVBI, 4)); brw_IF(p, BRW_EXECUTE_1); /* Compute the destination indices to write to. Usually we use SVBI[0] * + (0, 1, 2). However, for odd-numbered triangles in tristrips, the * vertices come down the pipeline in reversed winding order, so we need * to flip the order when writing to the transform feedback buffer. To * ensure that flatshading accuracy is preserved, we need to write them * in order SVBI[0] + (0, 2, 1) if we're using the first provoking * vertex convention, and in order SVBI[0] + (1, 0, 2) if we're using * the last provoking vertex convention. * * Note: since brw_imm_v can only be used in instructions in * packed-word execution mode, and SVBI is a double-word, we need to * first move the appropriate immediate constant ((0, 1, 2), (0, 2, 1), * or (1, 0, 2)) to the destination_indices register, and then add SVBI * using a separate instruction. Also, since the immediate constant is * expressed as packed words, and we need to load double-words into * destination_indices, we need to intersperse zeros to fill the upper * halves of each double-word. */ brw_MOV(p, destination_indices_uw, brw_imm_v(0x00020100)); /* (0, 1, 2) */ if (num_verts == 3) { /* Get primitive type into temp register. */ brw_AND(p, get_element_ud(c->reg.temp, 0), get_element_ud(c->reg.R0, 2), brw_imm_ud(0x1f)); /* Test if primitive type is TRISTRIP_REVERSE. We need to do this as * an 8-wide comparison so that the conditional MOV that follows * moves all 8 words correctly. */ brw_CMP(p, vec8(brw_null_reg()), BRW_CONDITIONAL_EQ, get_element_ud(c->reg.temp, 0), brw_imm_ud(_3DPRIM_TRISTRIP_REVERSE)); /* If so, then overwrite destination_indices_uw with the appropriate * reordering. */ inst = brw_MOV(p, destination_indices_uw, brw_imm_v(key->pv_first ? 0x00010200 /* (0, 2, 1) */ : 0x00020001)); /* (1, 0, 2) */ brw_inst_set_pred_control(p->devinfo, inst, BRW_PREDICATE_NORMAL); } assert(c->reg.destination_indices.width == BRW_EXECUTE_4); brw_push_insn_state(p); brw_set_default_exec_size(p, BRW_EXECUTE_4); brw_ADD(p, c->reg.destination_indices, c->reg.destination_indices, get_element_ud(c->reg.SVBI, 0)); brw_pop_insn_state(p); /* For each vertex, generate code to output each varying using the * appropriate binding table entry. */ for (vertex = 0; vertex < num_verts; ++vertex) { /* Set up the correct destination index for this vertex */ brw_MOV(p, get_element_ud(c->reg.header, 5), get_element_ud(c->reg.destination_indices, vertex)); for (binding = 0; binding < key->num_transform_feedback_bindings; ++binding) { unsigned char varying = key->transform_feedback_bindings[binding]; unsigned char slot = c->vue_map.varying_to_slot[varying]; /* From the Sandybridge PRM, Volume 2, Part 1, Section 4.5.1: * * "Prior to End of Thread with a URB_WRITE, the kernel must * ensure that all writes are complete by sending the final * write as a committed write." */ bool final_write = binding == key->num_transform_feedback_bindings - 1 && vertex == num_verts - 1; struct brw_reg vertex_slot = c->reg.vertex[vertex]; vertex_slot.nr += slot / 2; vertex_slot.subnr = (slot % 2) * 16; /* gl_PointSize is stored in VARYING_SLOT_PSIZ.w. */ vertex_slot.swizzle = varying == VARYING_SLOT_PSIZ ? BRW_SWIZZLE_WWWW : key->transform_feedback_swizzles[binding]; brw_set_default_access_mode(p, BRW_ALIGN_16); brw_push_insn_state(p); brw_set_default_exec_size(p, BRW_EXECUTE_4); brw_MOV(p, stride(c->reg.header, 4, 4, 1), retype(vertex_slot, BRW_REGISTER_TYPE_UD)); brw_pop_insn_state(p); brw_set_default_access_mode(p, BRW_ALIGN_1); brw_svb_write(p, final_write ? c->reg.temp : brw_null_reg(), /* dest */ 1, /* msg_reg_nr */ c->reg.header, /* src0 */ SURF_INDEX_GEN6_SOL_BINDING(binding), /* binding_table_index */ final_write); /* send_commit_msg */ } } brw_ENDIF(p); /* Now, reinitialize the header register from R0 to restore the parts of * the register that we overwrote while streaming out transform feedback * data. */ brw_ff_gs_initialize_header(c); /* Finally, wait for the write commit to occur so that we can proceed to * other things safely. * * From the Sandybridge PRM, Volume 4, Part 1, Section 3.3: * * The write commit does not modify the destination register, but * merely clears the dependency associated with the destination * register. Thus, a simple “mov” instruction using the register as a * source is sufficient to wait for the write commit to occur. */ brw_MOV(p, c->reg.temp, c->reg.temp); } brw_ff_gs_ff_sync(c, 1); brw_ff_gs_overwrite_header_dw2_from_r0(c); switch (num_verts) { case 1: brw_ff_gs_offset_header_dw2(c, URB_WRITE_PRIM_START | URB_WRITE_PRIM_END); brw_ff_gs_emit_vue(c, c->reg.vertex[0], true); break; case 2: brw_ff_gs_offset_header_dw2(c, URB_WRITE_PRIM_START); brw_ff_gs_emit_vue(c, c->reg.vertex[0], false); brw_ff_gs_offset_header_dw2(c, URB_WRITE_PRIM_END - URB_WRITE_PRIM_START); brw_ff_gs_emit_vue(c, c->reg.vertex[1], true); break; case 3: if (check_edge_flags) { /* Only emit vertices 0 and 1 if this is the first triangle of the * polygon. Otherwise they are redundant. */ brw_AND(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), get_element_ud(c->reg.R0, 2), brw_imm_ud(BRW_GS_EDGE_INDICATOR_0)); brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ); brw_IF(p, BRW_EXECUTE_1); } brw_ff_gs_offset_header_dw2(c, URB_WRITE_PRIM_START); brw_ff_gs_emit_vue(c, c->reg.vertex[0], false); brw_ff_gs_offset_header_dw2(c, -URB_WRITE_PRIM_START); brw_ff_gs_emit_vue(c, c->reg.vertex[1], false); if (check_edge_flags) { brw_ENDIF(p); /* Only emit vertex 2 in PRIM_END mode if this is the last triangle * of the polygon. Otherwise leave the primitive incomplete because * there are more polygon vertices coming. */ brw_AND(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), get_element_ud(c->reg.R0, 2), brw_imm_ud(BRW_GS_EDGE_INDICATOR_1)); brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ); brw_set_default_predicate_control(p, BRW_PREDICATE_NORMAL); } brw_ff_gs_offset_header_dw2(c, URB_WRITE_PRIM_END); brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); brw_ff_gs_emit_vue(c, c->reg.vertex[2], true); break; } }