void gen8_vec4_generator::generate_gs_set_write_offset(struct brw_reg dst, struct brw_reg src0, struct brw_reg src1) { /* From p22 of volume 4 part 2 of the Ivy Bridge PRM (2.4.3.1 Message * Header: M0.3): * * Slot 0 Offset. This field, after adding to the Global Offset field * in the message descriptor, specifies the offset (in 256-bit units) * from the start of the URB entry, as referenced by URB Handle 0, at * which the data will be accessed. * * Similar text describes DWORD M0.4, which is slot 1 offset. * * Therefore, we want to multiply DWORDs 0 and 4 of src0 (the x components * of the register for geometry shader invocations 0 and 1) by the * immediate value in src1, and store the result in DWORDs 3 and 4 of dst. * * We can do this with the following EU instruction: * * mul(2) dst.3<1>UD src0<8;2,4>UD src1 { Align1 WE_all } */ default_state.access_mode = BRW_ALIGN_1; gen8_instruction *inst = MUL(suboffset(stride(dst, 2, 2, 1), 3), stride(src0, 8, 2, 4), src1); gen8_set_mask_control(inst, BRW_MASK_DISABLE); default_state.access_mode = BRW_ALIGN_16; }
gen8_instruction * gen8_generator::next_inst(unsigned opcode) { gen8_instruction *inst; if (nr_inst + 1 > unsigned(store_size)) { store_size <<= 1; store = reralloc(mem_ctx, store, gen8_instruction, store_size); assert(store); } next_inst_offset += 16; inst = &store[nr_inst++]; memset(inst, 0, sizeof(gen8_instruction)); gen8_set_opcode(inst, opcode); gen8_set_exec_size(inst, default_state.exec_size); gen8_set_access_mode(inst, default_state.access_mode); gen8_set_mask_control(inst, default_state.mask_control); gen8_set_qtr_control(inst, default_state.qtr_control); gen8_set_cond_modifier(inst, default_state.conditional_mod); gen8_set_pred_control(inst, default_state.predicate); gen8_set_pred_inv(inst, default_state.predicate_inverse); gen8_set_saturate(inst, default_state.saturate); gen8_set_flag_subreg_nr(inst, default_state.flag_subreg_nr); return inst; }
void gen8_vec4_generator::generate_urb_write(vec4_instruction *ir, bool vs) { struct brw_reg header = brw_vec8_grf(GEN7_MRF_HACK_START + ir->base_mrf, 0); /* Copy g0. */ if (vs) MOV_RAW(header, brw_vec8_grf(0, 0)); gen8_instruction *inst; if (!(ir->urb_write_flags & BRW_URB_WRITE_USE_CHANNEL_MASKS)) { /* Enable Channel Masks in the URB_WRITE_OWORD message header */ default_state.access_mode = BRW_ALIGN_1; inst = OR(retype(brw_vec1_grf(GEN7_MRF_HACK_START + ir->base_mrf, 5), BRW_REGISTER_TYPE_UD), retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD), brw_imm_ud(0xff00)); gen8_set_mask_control(inst, BRW_MASK_DISABLE); default_state.access_mode = BRW_ALIGN_16; } inst = next_inst(BRW_OPCODE_SEND); gen8_set_urb_message(brw, inst, ir->urb_write_flags, ir->mlen, 0, ir->offset, true); gen8_set_dst(brw, inst, brw_null_reg()); gen8_set_src0(brw, inst, header); }
gen8_instruction * gen8_generator::MOV_RAW(struct brw_reg dst, struct brw_reg src0) { gen8_instruction *inst = next_inst(BRW_OPCODE_MOV); gen8_set_dst(brw, inst, retype(dst, BRW_REGISTER_TYPE_UD)); gen8_set_src0(brw, inst, retype(src0, BRW_REGISTER_TYPE_UD)); gen8_set_mask_control(inst, BRW_MASK_DISABLE); return inst; }
void gen8_vec4_generator::generate_gs_set_dword_2_immed(struct brw_reg dst, struct brw_reg src) { assert(src.file == BRW_IMMEDIATE_VALUE); default_state.access_mode = BRW_ALIGN_1; gen8_instruction *inst = MOV(suboffset(vec1(dst), 2), src); gen8_set_mask_control(inst, BRW_MASK_DISABLE); default_state.access_mode = BRW_ALIGN_16; }
void gen8_vec4_generator::generate_gs_prepare_channel_masks(struct brw_reg dst) { /* We want to left shift just DWORD 4 (the x component belonging to the * second geometry shader invocation) by 4 bits. So generate the * instruction: * * shl(1) dst.4<1>UD dst.4<0,1,0>UD 4UD { align1 WE_all } */ dst = suboffset(vec1(dst), 4); default_state.access_mode = BRW_ALIGN_1; gen8_instruction *inst = SHL(dst, dst, brw_imm_ud(4)); gen8_set_mask_control(inst, BRW_MASK_DISABLE); default_state.access_mode = BRW_ALIGN_16; }
void gen8_vec4_generator::generate_gs_thread_end(vec4_instruction *ir) { struct brw_reg src = brw_vec8_grf(GEN7_MRF_HACK_START + ir->base_mrf, 0); gen8_instruction *inst; /* Enable Channel Masks in the URB_WRITE_HWORD message header */ default_state.access_mode = BRW_ALIGN_1; inst = OR(retype(brw_vec1_grf(GEN7_MRF_HACK_START + ir->base_mrf, 5), BRW_REGISTER_TYPE_UD), retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD), brw_imm_ud(0xff00)); /* could be 0x1100 but shouldn't matter */ gen8_set_mask_control(inst, BRW_MASK_DISABLE); default_state.access_mode = BRW_ALIGN_16; /* mlen = 2: g0 header + vertex count */ inst = next_inst(BRW_OPCODE_SEND); gen8_set_urb_message(brw, inst, BRW_URB_WRITE_EOT, 2, 0, 0, true); gen8_set_dst(brw, inst, brw_null_reg()); gen8_set_src0(brw, inst, src); }
void gen8_vec4_generator::generate_tex(vec4_instruction *ir, struct brw_reg dst) { int msg_type = 0; switch (ir->opcode) { case SHADER_OPCODE_TEX: case SHADER_OPCODE_TXL: if (ir->shadow_compare) { msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE; } else { msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD; } break; case SHADER_OPCODE_TXD: if (ir->shadow_compare) { msg_type = HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE; } else { msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS; } break; case SHADER_OPCODE_TXF: msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD; break; case SHADER_OPCODE_TXF_CMS: msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS; break; case SHADER_OPCODE_TXF_MCS: msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD_MCS; break; case SHADER_OPCODE_TXS: msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO; break; case SHADER_OPCODE_TG4: if (ir->shadow_compare) { msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_C; } else { msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4; } break; case SHADER_OPCODE_TG4_OFFSET: if (ir->shadow_compare) { msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_C; } else { msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO; } break; default: assert(!"should not get here: invalid VS texture opcode"); break; } if (ir->header_present) { MOV_RAW(retype(brw_message_reg(ir->base_mrf), BRW_REGISTER_TYPE_UD), retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); default_state.access_mode = BRW_ALIGN_1; if (ir->texture_offset) { /* Set the offset bits in DWord 2. */ MOV_RAW(retype(brw_vec1_reg(MRF, ir->base_mrf, 2), BRW_REGISTER_TYPE_UD), brw_imm_ud(ir->texture_offset)); } if (ir->sampler >= 16) { /* The "Sampler Index" field can only store values between 0 and 15. * However, we can add an offset to the "Sampler State Pointer" * field, effectively selecting a different set of 16 samplers. * * The "Sampler State Pointer" needs to be aligned to a 32-byte * offset, and each sampler state is only 16-bytes, so we can't * exclusively use the offset - we have to use both. */ gen8_instruction *add = ADD(get_element_ud(brw_message_reg(ir->base_mrf), 3), get_element_ud(brw_vec8_grf(0, 0), 3), brw_imm_ud(16 * (ir->sampler / 16) * sizeof(gen7_sampler_state))); gen8_set_mask_control(add, BRW_MASK_DISABLE); } default_state.access_mode = BRW_ALIGN_16; } uint32_t surf_index = prog_data->base.binding_table.texture_start + ir->sampler; gen8_instruction *inst = next_inst(BRW_OPCODE_SEND); gen8_set_dst(brw, inst, dst); gen8_set_src0(brw, inst, brw_message_reg(ir->base_mrf)); gen8_set_sampler_message(brw, inst, surf_index, ir->sampler % 16, msg_type, 1, ir->mlen, ir->header_present, BRW_SAMPLER_SIMD_MODE_SIMD4X2); mark_surface_used(surf_index); }
void gen8_vec4_generator::generate_gs_set_channel_masks(struct brw_reg dst, struct brw_reg src) { /* From p21 of volume 4 part 2 of the Ivy Bridge PRM (2.4.3.1 Message * Header: M0.5): * * 15 Vertex 1 DATA [3] / Vertex 0 DATA[7] Channel Mask * * When Swizzle Control = URB_INTERLEAVED this bit controls Vertex 1 * DATA[3], when Swizzle Control = URB_NOSWIZZLE this bit controls * Vertex 0 DATA[7]. This bit is ANDed with the corresponding * channel enable to determine the final channel enable. For the * URB_READ_OWORD & URB_READ_HWORD messages, when final channel * enable is 1 it indicates that Vertex 1 DATA [3] will be included * in the writeback message. For the URB_WRITE_OWORD & * URB_WRITE_HWORD messages, when final channel enable is 1 it * indicates that Vertex 1 DATA [3] will be written to the surface. * * 0: Vertex 1 DATA [3] / Vertex 0 DATA[7] channel not included * 1: Vertex DATA [3] / Vertex 0 DATA[7] channel included * * 14 Vertex 1 DATA [2] Channel Mask * 13 Vertex 1 DATA [1] Channel Mask * 12 Vertex 1 DATA [0] Channel Mask * 11 Vertex 0 DATA [3] Channel Mask * 10 Vertex 0 DATA [2] Channel Mask * 9 Vertex 0 DATA [1] Channel Mask * 8 Vertex 0 DATA [0] Channel Mask * * (This is from a section of the PRM that is agnostic to the particular * type of shader being executed, so "Vertex 0" and "Vertex 1" refer to * geometry shader invocations 0 and 1, respectively). Since we have the * enable flags for geometry shader invocation 0 in bits 3:0 of DWORD 0, * and the enable flags for geometry shader invocation 1 in bits 7:0 of * DWORD 4, we just need to OR them together and store the result in bits * 15:8 of DWORD 5. * * It's easier to get the EU to do this if we think of the src and dst * registers as composed of 32 bytes each; then, we want to pick up the * contents of bytes 0 and 16 from src, OR them together, and store them in * byte 21. * * We can do that by the following EU instruction: * * or(1) dst.21<1>UB src<0,1,0>UB src.16<0,1,0>UB { align1 WE_all } * * Note: this relies on the source register having zeros in (a) bits 7:4 of * DWORD 0 and (b) bits 3:0 of DWORD 4. We can rely on (b) because the * source register was prepared by GS_OPCODE_PREPARE_CHANNEL_MASKS (which * shifts DWORD 4 left by 4 bits), and we can rely on (a) because prior to * the execution of GS_OPCODE_PREPARE_CHANNEL_MASKS, DWORDs 0 and 4 need to * contain valid channel mask values (which are in the range 0x0-0xf). */ dst = retype(dst, BRW_REGISTER_TYPE_UB); src = retype(src, BRW_REGISTER_TYPE_UB); default_state.access_mode = BRW_ALIGN_1; gen8_instruction *inst = OR(suboffset(vec1(dst), 21), vec1(src), suboffset(vec1(src), 16)); gen8_set_mask_control(inst, BRW_MASK_DISABLE); default_state.access_mode = BRW_ALIGN_16; }