void vec4_generator::generate_pull_constant_load(vec4_instruction *inst, struct brw_reg dst, struct brw_reg index, struct brw_reg offset) { assert(index.file == BRW_IMMEDIATE_VALUE && index.type == BRW_REGISTER_TYPE_UD); uint32_t surf_index = index.dw1.ud; if (intel->gen == 7) { gen6_resolve_implied_move(p, &offset, inst->base_mrf); brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND); brw_set_dest(p, insn, dst); brw_set_src0(p, insn, offset); brw_set_sampler_message(p, insn, surf_index, 0, /* LD message ignores sampler unit */ GEN5_SAMPLER_MESSAGE_SAMPLE_LD, 1, /* rlen */ 1, /* mlen */ false, /* no header */ BRW_SAMPLER_SIMD_MODE_SIMD4X2, 0); return; } struct brw_reg header = brw_vec8_grf(0, 0); gen6_resolve_implied_move(p, &header, inst->base_mrf); brw_MOV(p, retype(brw_message_reg(inst->base_mrf + 1), BRW_REGISTER_TYPE_D), offset); uint32_t msg_type; if (intel->gen >= 6) msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; else if (intel->gen == 5 || intel->is_g4x) msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; else msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; /* Each of the 8 channel enables is considered for whether each * dword is written. */ struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND); brw_set_dest(p, send, dst); brw_set_src0(p, send, header); if (intel->gen < 6) send->header.destreg__conditionalmod = inst->base_mrf; brw_set_dp_read_message(p, send, surf_index, BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD, msg_type, BRW_DATAPORT_READ_TARGET_DATA_CACHE, 2, /* mlen */ 1 /* rlen */); }
void vec4_generator::generate_pull_constant_load_gen7(vec4_instruction *inst, struct brw_reg dst, struct brw_reg surf_index, struct brw_reg offset) { assert(surf_index.file == BRW_IMMEDIATE_VALUE && surf_index.type == BRW_REGISTER_TYPE_UD); brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND); brw_set_dest(p, insn, dst); brw_set_src0(p, insn, offset); brw_set_sampler_message(p, insn, surf_index.dw1.ud, 0, /* LD message ignores sampler unit */ GEN5_SAMPLER_MESSAGE_SAMPLE_LD, 1, /* rlen */ 1, /* mlen */ false, /* no header */ BRW_SAMPLER_SIMD_MODE_SIMD4X2, 0); brw_mark_surface_used(&prog_data->base, surf_index.dw1.ud); }
void vec4_generator::generate_pull_constant_load_gen7(vec4_instruction *inst, struct brw_reg dst, struct brw_reg surf_index, struct brw_reg offset) { assert(surf_index.type == BRW_REGISTER_TYPE_UD); if (surf_index.file == BRW_IMMEDIATE_VALUE) { brw_inst *insn = brw_next_insn(p, BRW_OPCODE_SEND); brw_set_dest(p, insn, dst); brw_set_src0(p, insn, offset); brw_set_sampler_message(p, insn, surf_index.dw1.ud, 0, /* LD message ignores sampler unit */ GEN5_SAMPLER_MESSAGE_SAMPLE_LD, 1, /* rlen */ 1, /* mlen */ false, /* no header */ BRW_SAMPLER_SIMD_MODE_SIMD4X2, 0); brw_mark_surface_used(&prog_data->base, surf_index.dw1.ud); } else { struct brw_reg addr = vec1(retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD)); brw_push_insn_state(p); brw_set_default_mask_control(p, BRW_MASK_DISABLE); brw_set_default_access_mode(p, BRW_ALIGN_1); /* a0.0 = surf_index & 0xff */ brw_inst *insn_and = brw_next_insn(p, BRW_OPCODE_AND); brw_inst_set_exec_size(p->brw, insn_and, BRW_EXECUTE_1); brw_set_dest(p, insn_and, addr); brw_set_src0(p, insn_and, vec1(retype(surf_index, BRW_REGISTER_TYPE_UD))); brw_set_src1(p, insn_and, brw_imm_ud(0x0ff)); /* a0.0 |= <descriptor> */ brw_inst *insn_or = brw_next_insn(p, BRW_OPCODE_OR); brw_set_sampler_message(p, insn_or, 0 /* surface */, 0 /* sampler */, GEN5_SAMPLER_MESSAGE_SAMPLE_LD, 1 /* rlen */, 1 /* mlen */, false /* header */, BRW_SAMPLER_SIMD_MODE_SIMD4X2, 0); brw_inst_set_exec_size(p->brw, insn_or, BRW_EXECUTE_1); brw_inst_set_src1_reg_type(p->brw, insn_or, BRW_REGISTER_TYPE_UD); brw_set_src0(p, insn_or, addr); brw_set_dest(p, insn_or, addr); /* dst = send(offset, a0.0) */ brw_inst *insn_send = brw_next_insn(p, BRW_OPCODE_SEND); brw_set_dest(p, insn_send, dst); brw_set_src0(p, insn_send, offset); brw_set_indirect_send_descriptor(p, insn_send, BRW_SFID_SAMPLER, addr); brw_pop_insn_state(p); /* visitor knows more than we do about the surface limit required, * so has already done marking. */ } }
void vec4_generator::generate_tex(vec4_instruction *inst, struct brw_reg dst, struct brw_reg src, struct brw_reg sampler_index) { int msg_type = -1; if (brw->gen >= 5) { switch (inst->opcode) { case SHADER_OPCODE_TEX: case SHADER_OPCODE_TXL: if (inst->shadow_compare) { msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE; } else { msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD; } break; case SHADER_OPCODE_TXD: if (inst->shadow_compare) { /* Gen7.5+. Otherwise, lowered by brw_lower_texture_gradients(). */ assert(brw->gen >= 8 || brw->is_haswell); msg_type = HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE; } else { msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS; } break; case SHADER_OPCODE_TXF: msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD; break; case SHADER_OPCODE_TXF_CMS: if (brw->gen >= 7) msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS; else msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD; break; case SHADER_OPCODE_TXF_MCS: assert(brw->gen >= 7); msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD_MCS; break; case SHADER_OPCODE_TXS: msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO; break; case SHADER_OPCODE_TG4: if (inst->shadow_compare) { msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_C; } else { msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4; } break; case SHADER_OPCODE_TG4_OFFSET: if (inst->shadow_compare) { msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_C; } else { msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO; } break; default: unreachable("should not get here: invalid vec4 texture opcode"); } } else { switch (inst->opcode) { case SHADER_OPCODE_TEX: case SHADER_OPCODE_TXL: if (inst->shadow_compare) { msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD_COMPARE; assert(inst->mlen == 3); } else { msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD; assert(inst->mlen == 2); } break; case SHADER_OPCODE_TXD: /* There is no sample_d_c message; comparisons are done manually. */ msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS; assert(inst->mlen == 4); break; case SHADER_OPCODE_TXF: msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_LD; assert(inst->mlen == 2); break; case SHADER_OPCODE_TXS: msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_RESINFO; assert(inst->mlen == 2); break; default: unreachable("should not get here: invalid vec4 texture opcode"); } } assert(msg_type != -1); assert(sampler_index.type == BRW_REGISTER_TYPE_UD); /* Load the message header if present. If there's a texture offset, we need * to set it up explicitly and load the offset bitfield. Otherwise, we can * use an implied move from g0 to the first message register. */ if (inst->header_present) { if (brw->gen < 6 && !inst->texture_offset) { /* Set up an implied move from g0 to the MRF. */ src = brw_vec8_grf(0, 0); } else { struct brw_reg header = retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_UD); /* Explicitly set up the message header by copying g0 to the MRF. */ brw_push_insn_state(p); brw_set_default_mask_control(p, BRW_MASK_DISABLE); brw_MOV(p, header, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); brw_set_default_access_mode(p, BRW_ALIGN_1); if (inst->texture_offset) { /* Set the texel offset bits in DWord 2. */ brw_MOV(p, get_element_ud(header, 2), brw_imm_ud(inst->texture_offset)); } brw_adjust_sampler_state_pointer(p, header, sampler_index, dst); brw_pop_insn_state(p); } } uint32_t return_format; switch (dst.type) { case BRW_REGISTER_TYPE_D: return_format = BRW_SAMPLER_RETURN_FORMAT_SINT32; break; case BRW_REGISTER_TYPE_UD: return_format = BRW_SAMPLER_RETURN_FORMAT_UINT32; break; default: return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32; break; } uint32_t base_binding_table_index = (inst->opcode == SHADER_OPCODE_TG4 || inst->opcode == SHADER_OPCODE_TG4_OFFSET) ? prog_data->base.binding_table.gather_texture_start : prog_data->base.binding_table.texture_start; if (sampler_index.file == BRW_IMMEDIATE_VALUE) { uint32_t sampler = sampler_index.dw1.ud; brw_SAMPLE(p, dst, inst->base_mrf, src, sampler + base_binding_table_index, sampler % 16, msg_type, 1, /* response length */ inst->mlen, inst->header_present, BRW_SAMPLER_SIMD_MODE_SIMD4X2, return_format); brw_mark_surface_used(&prog_data->base, sampler + base_binding_table_index); } else { /* Non-constant sampler index. */ /* Note: this clobbers `dst` as a temporary before emitting the send */ struct brw_reg addr = vec1(retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD)); struct brw_reg temp = vec1(retype(dst, BRW_REGISTER_TYPE_UD)); struct brw_reg sampler_reg = vec1(retype(sampler_index, BRW_REGISTER_TYPE_UD)); brw_push_insn_state(p); brw_set_default_mask_control(p, BRW_MASK_DISABLE); brw_set_default_access_mode(p, BRW_ALIGN_1); /* Some care required: `sampler` and `temp` may alias: * addr = sampler & 0xff * temp = (sampler << 8) & 0xf00 * addr = addr | temp */ brw_ADD(p, addr, sampler_reg, brw_imm_ud(base_binding_table_index)); brw_SHL(p, temp, sampler_reg, brw_imm_ud(8u)); brw_AND(p, temp, temp, brw_imm_ud(0x0f00)); brw_AND(p, addr, addr, brw_imm_ud(0x0ff)); brw_OR(p, addr, addr, temp); /* a0.0 |= <descriptor> */ brw_inst *insn_or = brw_next_insn(p, BRW_OPCODE_OR); brw_set_sampler_message(p, insn_or, 0 /* surface */, 0 /* sampler */, msg_type, 1 /* rlen */, inst->mlen /* mlen */, inst->header_present /* header */, BRW_SAMPLER_SIMD_MODE_SIMD4X2, return_format); brw_inst_set_exec_size(p->brw, insn_or, BRW_EXECUTE_1); brw_inst_set_src1_reg_type(p->brw, insn_or, BRW_REGISTER_TYPE_UD); brw_set_src0(p, insn_or, addr); brw_set_dest(p, insn_or, addr); /* dst = send(offset, a0.0) */ brw_inst *insn_send = brw_next_insn(p, BRW_OPCODE_SEND); brw_set_dest(p, insn_send, dst); brw_set_src0(p, insn_send, src); brw_set_indirect_send_descriptor(p, insn_send, BRW_SFID_SAMPLER, addr); brw_pop_insn_state(p); /* visitor knows more than we do about the surface limit required, * so has already done marking. */ } }
/** * Texture sample instruction. * Note: the msg_type plus msg_length values determine exactly what kind * of sampling operation is performed. See volume 4, page 161 of docs. */ void brw_SAMPLE(struct brw_compile *p, struct brw_reg dest, GLuint msg_reg_nr, struct brw_reg src0, GLuint binding_table_index, GLuint sampler, GLuint writemask, GLuint msg_type, GLuint response_length, GLuint msg_length, GLboolean eot, GLuint header_present, GLuint simd_mode) { GLboolean need_stall = 0; if (writemask == 0) { /*_mesa_printf("%s: zero writemask??\n", __FUNCTION__); */ return; } /* Hardware doesn't do destination dependency checking on send * instructions properly. Add a workaround which generates the * dependency by other means. In practice it seems like this bug * only crops up for texture samples, and only where registers are * written by the send and then written again later without being * read in between. Luckily for us, we already track that * information and use it to modify the writemask for the * instruction, so that is a guide for whether a workaround is * needed. */ if (writemask != WRITEMASK_XYZW) { GLuint dst_offset = 0; GLuint i, newmask = 0, len = 0; for (i = 0; i < 4; i++) { if (writemask & (1<<i)) break; dst_offset += 2; } for (; i < 4; i++) { if (!(writemask & (1<<i))) break; newmask |= 1<<i; len++; } if (newmask != writemask) { need_stall = 1; /* _mesa_printf("need stall %x %x\n", newmask , writemask); */ } else { struct brw_reg m1 = brw_message_reg(msg_reg_nr); newmask = ~newmask & WRITEMASK_XYZW; brw_push_insn_state(p); brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_set_mask_control(p, BRW_MASK_DISABLE); brw_MOV(p, m1, brw_vec8_grf(0,0)); brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12)); brw_pop_insn_state(p); src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW); dest = offset(dest, dst_offset); response_length = len * 2; } } { struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); insn->header.predicate_control = 0; /* XXX */ insn->header.compression_control = BRW_COMPRESSION_NONE; insn->header.destreg__conditionalmod = msg_reg_nr; brw_set_dest(insn, dest); brw_set_src0(insn, src0); brw_set_sampler_message(p->brw, insn, binding_table_index, sampler, msg_type, response_length, msg_length, eot, header_present, simd_mode); } if (need_stall) { struct brw_reg reg = vec8(offset(dest, response_length-1)); /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 } */ brw_push_insn_state(p); brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_MOV(p, reg, reg); brw_pop_insn_state(p); } }