void brw_emit_anyprim_setup( struct brw_sf_compile *c ) { struct brw_compile *p = &c->func; struct brw_context *brw = p->brw; struct brw_reg payload_prim = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0); struct brw_reg payload_attr = get_element_ud(brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0), 0); struct brw_reg primmask; int jmp; struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD)); c->nr_verts = 3; alloc_regs(c); primmask = retype(get_element(c->tmp, 0), BRW_REGISTER_TYPE_UD); brw_MOV(p, primmask, brw_imm_ud(1)); brw_SHL(p, primmask, primmask, payload_prim); brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_TRILIST) | (1<<_3DPRIM_TRISTRIP) | (1<<_3DPRIM_TRIFAN) | (1<<_3DPRIM_TRISTRIP_REVERSE) | (1<<_3DPRIM_POLYGON) | (1<<_3DPRIM_RECTLIST) | (1<<_3DPRIM_TRIFAN_NOSTIPPLE))); brw_inst_set_cond_modifier(brw, brw_last_inst, BRW_CONDITIONAL_Z); jmp = brw_JMPI(p, brw_imm_d(0), BRW_PREDICATE_NORMAL) - p->store; brw_emit_tri_setup(c, false); brw_land_fwd_jump(p, jmp); brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_LINELIST) | (1<<_3DPRIM_LINESTRIP) | (1<<_3DPRIM_LINELOOP) | (1<<_3DPRIM_LINESTRIP_CONT) | (1<<_3DPRIM_LINESTRIP_BF) | (1<<_3DPRIM_LINESTRIP_CONT_BF))); brw_inst_set_cond_modifier(brw, brw_last_inst, BRW_CONDITIONAL_Z); jmp = brw_JMPI(p, brw_imm_d(0), BRW_PREDICATE_NORMAL) - p->store; brw_emit_line_setup(c, false); brw_land_fwd_jump(p, jmp); brw_AND(p, v1_null_ud, payload_attr, brw_imm_ud(1<<BRW_SPRITE_POINT_ENABLE)); brw_inst_set_cond_modifier(brw, brw_last_inst, BRW_CONDITIONAL_Z); jmp = brw_JMPI(p, brw_imm_d(0), BRW_PREDICATE_NORMAL) - p->store; brw_emit_point_sprite_setup(c, false); brw_land_fwd_jump(p, jmp); brw_emit_point_setup( c, false ); }
/* Post-fragment-program processing. Send the results to the * framebuffer. * \param arg0 the fragment color * \param arg1 the pass-through depth value * \param arg2 the shader-computed depth value */ void emit_fb_write(struct brw_wm_compile *c, struct brw_reg *arg0, struct brw_reg *arg1, struct brw_reg *arg2, GLuint target, GLuint eot) { struct brw_compile *p = &c->func; struct brw_context *brw = p->brw; struct intel_context *intel = &brw->intel; GLuint nr = 2; GLuint channel; /* Reserve a space for AA - may not be needed: */ if (c->aa_dest_stencil_reg) nr += 1; /* I don't really understand how this achieves the color interleave * (ie RGBARGBA) in the result: [Do the saturation here] */ brw_push_insn_state(p); if (c->key.clamp_fragment_color) brw_set_saturate(p, 1); for (channel = 0; channel < 4; channel++) { if (intel->gen >= 6) { /* gen6 SIMD16 single source DP write looks like: * m + 0: r0 * m + 1: r1 * m + 2: g0 * m + 3: g1 * m + 4: b0 * m + 5: b1 * m + 6: a0 * m + 7: a1 */ if (c->dispatch_width == 16) { brw_MOV(p, brw_message_reg(nr + channel * 2), arg0[channel]); } else { brw_MOV(p, brw_message_reg(nr + channel), arg0[channel]); } } else if (c->dispatch_width == 16 && brw->has_compr4) { /* pre-gen6 SIMD16 single source DP write looks like: * m + 0: r0 * m + 1: g0 * m + 2: b0 * m + 3: a0 * m + 4: r1 * m + 5: g1 * m + 6: b1 * m + 7: a1 * * By setting the high bit of the MRF register number, we indicate * that we want COMPR4 mode - instead of doing the usual destination * + 1 for the second half we get destination + 4. */ brw_MOV(p, brw_message_reg(nr + channel + BRW_MRF_COMPR4), arg0[channel]); } else { /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */ /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */ brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_MOV(p, brw_message_reg(nr + channel), arg0[channel]); if (c->dispatch_width == 16) { brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); brw_MOV(p, brw_message_reg(nr + channel + 4), sechalf(arg0[channel])); } } } brw_set_saturate(p, 0); /* skip over the regs populated above: */ if (c->dispatch_width == 16) nr += 8; else nr += 4; brw_pop_insn_state(p); if (c->source_depth_to_render_target) { if (c->computes_depth) brw_MOV(p, brw_message_reg(nr), arg2[2]); else brw_MOV(p, brw_message_reg(nr), arg1[1]); /* ? */ nr += 2; } if (c->dest_depth_reg) { GLuint comp = c->dest_depth_reg / 2; GLuint off = c->dest_depth_reg % 2; if (off != 0) { brw_push_insn_state(p); brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_MOV(p, brw_message_reg(nr), offset(arg1[comp],1)); /* 2nd half? */ brw_MOV(p, brw_message_reg(nr+1), arg1[comp+1]); brw_pop_insn_state(p); } else { brw_MOV(p, brw_message_reg(nr), arg1[comp]); } nr += 2; } if (intel->gen >= 6) { /* Load the message header. There's no implied move from src0 * to the base mrf on gen6. */ brw_push_insn_state(p); brw_set_mask_control(p, BRW_MASK_DISABLE); brw_MOV(p, retype(brw_message_reg(0), BRW_REGISTER_TYPE_UD), retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); brw_pop_insn_state(p); if (target != 0) { brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 0, 2), BRW_REGISTER_TYPE_UD), brw_imm_ud(target)); } } if (!c->runtime_check_aads_emit) { if (c->aa_dest_stencil_reg) emit_aa(c, arg1, 2); fire_fb_write(c, 0, nr, target, eot); } else { struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD)); struct brw_reg ip = brw_ip_reg(); struct brw_instruction *jmp; brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_set_conditionalmod(p, BRW_CONDITIONAL_Z); brw_AND(p, v1_null_ud, get_element_ud(brw_vec8_grf(1,0), 6), brw_imm_ud(1<<26)); jmp = brw_JMPI(p, ip, ip, brw_imm_w(0)); { emit_aa(c, arg1, 2); fire_fb_write(c, 0, nr, target, eot); /* note - thread killed in subroutine */ } brw_land_fwd_jump(p, jmp); /* ELSE: Shuffle up one register to fill in the hole left for AA: */ fire_fb_write(c, 1, nr-1, target, eot); } }
void brw_emit_anyprim_setup( struct brw_sf_compile *c ) { struct brw_compile *p = &c->func; struct brw_reg ip = brw_ip_reg(); struct brw_reg payload_prim = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0); struct brw_reg payload_attr = get_element_ud(brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0), 0); struct brw_reg primmask; struct brw_instruction *jmp; struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD)); GLuint saveflag; c->nr_verts = 3; alloc_regs(c); primmask = retype(get_element(c->tmp, 0), BRW_REGISTER_TYPE_UD); brw_MOV(p, primmask, brw_imm_ud(1)); brw_SHL(p, primmask, primmask, payload_prim); brw_set_conditionalmod(p, BRW_CONDITIONAL_Z); brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_TRILIST) | (1<<_3DPRIM_TRISTRIP) | (1<<_3DPRIM_TRIFAN) | (1<<_3DPRIM_TRISTRIP_REVERSE) | (1<<_3DPRIM_POLYGON) | (1<<_3DPRIM_RECTLIST) | (1<<_3DPRIM_TRIFAN_NOSTIPPLE))); jmp = brw_JMPI(p, ip, ip, brw_imm_d(0)); { saveflag = p->flag_value; brw_push_insn_state(p); brw_emit_tri_setup( c, GL_FALSE ); brw_pop_insn_state(p); p->flag_value = saveflag; /* note - thread killed in subroutine, so must * restore the flag which is changed when building * the subroutine. fix #13240 */ } brw_land_fwd_jump(p, jmp); brw_set_conditionalmod(p, BRW_CONDITIONAL_Z); brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_LINELIST) | (1<<_3DPRIM_LINESTRIP) | (1<<_3DPRIM_LINELOOP) | (1<<_3DPRIM_LINESTRIP_CONT) | (1<<_3DPRIM_LINESTRIP_BF) | (1<<_3DPRIM_LINESTRIP_CONT_BF))); jmp = brw_JMPI(p, ip, ip, brw_imm_d(0)); { saveflag = p->flag_value; brw_push_insn_state(p); brw_emit_line_setup( c, GL_FALSE ); brw_pop_insn_state(p); p->flag_value = saveflag; /* note - thread killed in subroutine */ } brw_land_fwd_jump(p, jmp); brw_set_conditionalmod(p, BRW_CONDITIONAL_Z); brw_AND(p, v1_null_ud, payload_attr, brw_imm_ud(1<<BRW_SPRITE_POINT_ENABLE)); jmp = brw_JMPI(p, ip, ip, brw_imm_d(0)); { saveflag = p->flag_value; brw_push_insn_state(p); brw_emit_point_sprite_setup( c, GL_FALSE ); brw_pop_insn_state(p); p->flag_value = saveflag; } brw_land_fwd_jump(p, jmp); brw_emit_point_setup( c, GL_FALSE ); }
void gen8_vec4_generator::generate_tex(vec4_instruction *ir, struct brw_reg dst) { int msg_type = 0; switch (ir->opcode) { case SHADER_OPCODE_TEX: case SHADER_OPCODE_TXL: if (ir->shadow_compare) { msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE; } else { msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD; } break; case SHADER_OPCODE_TXD: if (ir->shadow_compare) { msg_type = HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE; } else { msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS; } break; case SHADER_OPCODE_TXF: msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD; break; case SHADER_OPCODE_TXF_CMS: msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS; break; case SHADER_OPCODE_TXF_MCS: msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD_MCS; break; case SHADER_OPCODE_TXS: msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO; break; case SHADER_OPCODE_TG4: if (ir->shadow_compare) { msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_C; } else { msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4; } break; case SHADER_OPCODE_TG4_OFFSET: if (ir->shadow_compare) { msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_C; } else { msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO; } break; default: assert(!"should not get here: invalid VS texture opcode"); break; } if (ir->header_present) { MOV_RAW(retype(brw_message_reg(ir->base_mrf), BRW_REGISTER_TYPE_UD), retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); default_state.access_mode = BRW_ALIGN_1; if (ir->texture_offset) { /* Set the offset bits in DWord 2. */ MOV_RAW(retype(brw_vec1_reg(MRF, ir->base_mrf, 2), BRW_REGISTER_TYPE_UD), brw_imm_ud(ir->texture_offset)); } if (ir->sampler >= 16) { /* The "Sampler Index" field can only store values between 0 and 15. * However, we can add an offset to the "Sampler State Pointer" * field, effectively selecting a different set of 16 samplers. * * The "Sampler State Pointer" needs to be aligned to a 32-byte * offset, and each sampler state is only 16-bytes, so we can't * exclusively use the offset - we have to use both. */ gen8_instruction *add = ADD(get_element_ud(brw_message_reg(ir->base_mrf), 3), get_element_ud(brw_vec8_grf(0, 0), 3), brw_imm_ud(16 * (ir->sampler / 16) * sizeof(gen7_sampler_state))); gen8_set_mask_control(add, BRW_MASK_DISABLE); } default_state.access_mode = BRW_ALIGN_16; } uint32_t surf_index = prog_data->base.binding_table.texture_start + ir->sampler; gen8_instruction *inst = next_inst(BRW_OPCODE_SEND); gen8_set_dst(brw, inst, dst); gen8_set_src0(brw, inst, brw_message_reg(ir->base_mrf)); gen8_set_sampler_message(brw, inst, surf_index, ir->sampler % 16, msg_type, 1, ir->mlen, ir->header_present, BRW_SAMPLER_SIMD_MODE_SIMD4X2); mark_surface_used(surf_index); }
void vec4_generator::generate_tex(vec4_instruction *inst, struct brw_reg dst, struct brw_reg src) { int msg_type = -1; if (intel->gen >= 5) { switch (inst->opcode) { case SHADER_OPCODE_TEX: case SHADER_OPCODE_TXL: if (inst->shadow_compare) { msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE; } else { msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD; } break; case SHADER_OPCODE_TXD: /* There is no sample_d_c message; comparisons are done manually. */ msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS; break; case SHADER_OPCODE_TXF: msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD; break; case SHADER_OPCODE_TXS: msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO; break; default: assert(!"should not get here: invalid VS texture opcode"); break; } } else { switch (inst->opcode) { case SHADER_OPCODE_TEX: case SHADER_OPCODE_TXL: if (inst->shadow_compare) { msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD_COMPARE; assert(inst->mlen == 3); } else { msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD; assert(inst->mlen == 2); } break; case SHADER_OPCODE_TXD: /* There is no sample_d_c message; comparisons are done manually. */ msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS; assert(inst->mlen == 4); break; case SHADER_OPCODE_TXF: msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_LD; assert(inst->mlen == 2); break; case SHADER_OPCODE_TXS: msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_RESINFO; assert(inst->mlen == 2); break; default: assert(!"should not get here: invalid VS texture opcode"); break; } } assert(msg_type != -1); /* Load the message header if present. If there's a texture offset, we need * to set it up explicitly and load the offset bitfield. Otherwise, we can * use an implied move from g0 to the first message register. */ if (inst->texture_offset) { /* Explicitly set up the message header by copying g0 to the MRF. */ brw_MOV(p, retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_UD), retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); /* Then set the offset bits in DWord 2. */ brw_set_access_mode(p, BRW_ALIGN_1); brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, inst->base_mrf, 2), BRW_REGISTER_TYPE_UD), brw_imm_uw(inst->texture_offset)); brw_set_access_mode(p, BRW_ALIGN_16); } else if (inst->header_present) { /* Set up an implied move from g0 to the MRF. */ src = brw_vec8_grf(0, 0); } uint32_t return_format; switch (dst.type) { case BRW_REGISTER_TYPE_D: return_format = BRW_SAMPLER_RETURN_FORMAT_SINT32; break; case BRW_REGISTER_TYPE_UD: return_format = BRW_SAMPLER_RETURN_FORMAT_UINT32; break; default: return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32; break; } brw_SAMPLE(p, dst, inst->base_mrf, src, SURF_INDEX_VS_TEXTURE(inst->sampler), inst->sampler, WRITEMASK_XYZW, msg_type, 1, /* response length */ inst->mlen, inst->header_present, BRW_SAMPLER_SIMD_MODE_SIMD4X2, return_format); }