void vec4_generator::generate_gs_set_write_offset(struct brw_reg dst, struct brw_reg src0, struct brw_reg src1) { /* From p22 of volume 4 part 2 of the Ivy Bridge PRM (2.4.3.1 Message * Header: M0.3): * * Slot 0 Offset. This field, after adding to the Global Offset field * in the message descriptor, specifies the offset (in 256-bit units) * from the start of the URB entry, as referenced by URB Handle 0, at * which the data will be accessed. * * Similar text describes DWORD M0.4, which is slot 1 offset. * * Therefore, we want to multiply DWORDs 0 and 4 of src0 (the x components * of the register for geometry shader invocations 0 and 1) by the * immediate value in src1, and store the result in DWORDs 3 and 4 of dst. * * We can do this with the following EU instruction: * * mul(2) dst.3<1>UD src0<8;2,4>UD src1 { Align1 WE_all } */ brw_push_insn_state(p); brw_set_access_mode(p, BRW_ALIGN_1); brw_set_mask_control(p, BRW_MASK_DISABLE); brw_MUL(p, suboffset(stride(dst, 2, 2, 1), 3), stride(src0, 8, 2, 4), src1); brw_set_access_mode(p, BRW_ALIGN_16); brw_pop_insn_state(p); }
static void do_flatshade_line( struct brw_sf_compile *c ) { struct brw_compile *p = &c->func; struct intel_context *intel = &p->brw->intel; struct brw_reg ip = brw_ip_reg(); GLuint nr = brw_count_bits(c->key.attrs & VERT_RESULT_COLOR_BITS); GLuint jmpi = 1; if (!nr) return; /* Already done in clip program: */ if (c->key.primitive == SF_UNFILLED_TRIS) return; if (intel->gen == 5) jmpi = 2; brw_push_insn_state(p); brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr+1))); brw_JMPI(p, ip, ip, c->pv); copy_colors(c, c->vert[1], c->vert[0]); brw_JMPI(p, ip, ip, brw_imm_ud(jmpi*nr)); copy_colors(c, c->vert[0], c->vert[1]); brw_pop_insn_state(p); }
static void emit_min(struct brw_wm_compile *c, struct prog_instruction *inst) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; struct brw_reg src0, src1, dst; int i; brw_push_insn_state(p); for (i = 0; i < 4; i++) { if (mask & (1<<i)) { dst = get_dst_reg(c, inst, i, 1); src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); brw_MOV(p, dst, src0); brw_set_saturate(p, 0); brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, src1, src0); brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); brw_MOV(p, dst, src1); brw_set_saturate(p, 0); brw_set_predicate_control_flag_value(p, 0xff); } } brw_pop_insn_state(p); }
/* Kill pixel - set execution mask to zero for those pixels which * fail. */ static void emit_kil( struct brw_wm_compile *c, struct brw_reg *arg0) { struct brw_compile *p = &c->func; struct intel_context *intel = &p->brw->intel; struct brw_reg pixelmask; GLuint i, j; if (intel->gen >= 6) pixelmask = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW); else pixelmask = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); for (i = 0; i < 4; i++) { /* Check if we've already done the comparison for this reg * -- common when someone does KIL TEMP.wwww. */ for (j = 0; j < i; j++) { if (memcmp(&arg0[j], &arg0[i], sizeof(arg0[0])) == 0) break; } if (j != i) continue; brw_push_insn_state(p); brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], brw_imm_f(0)); brw_set_predicate_control_flag_value(p, 0xff); brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_AND(p, pixelmask, brw_flag_reg(), pixelmask); brw_pop_insn_state(p); } }
static void fire_fb_write( struct brw_wm_compile *c, GLuint base_reg, GLuint nr, GLuint target, GLuint eot ) { struct brw_compile *p = &c->func; /* Pass through control information: */ /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */ { brw_push_insn_state(p); brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */ brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_MOV(p, brw_message_reg(base_reg + 1), brw_vec8_grf(1, 0)); brw_pop_insn_state(p); } /* Send framebuffer write message: */ /* send (16) null.0<1>:uw m0 r0.0<8;8,1>:uw 0x85a04000:ud { Align1 EOT } */ brw_fb_WRITE(p, retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW), base_reg, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW), target, nr, 0, eot); }
/* Need to use a computed jump to copy flatshaded attributes as the * vertices are ordered according to y-coordinate before reaching this * point, so the PV could be anywhere. */ static void do_flatshade_triangle( struct brw_sf_compile *c ) { struct brw_compile *p = &c->func; struct brw_reg ip = brw_ip_reg(); GLuint nr = brw_count_bits(c->key.attrs & VERT_RESULT_COLOR_BITS); if (!nr) return; /* Already done in clip program: */ if (c->key.primitive == SF_UNFILLED_TRIS) return; brw_push_insn_state(p); brw_MUL(p, c->pv, c->pv, brw_imm_ud(nr*2+1)); brw_JMPI(p, ip, ip, c->pv); copy_colors(c, c->vert[1], c->vert[0]); copy_colors(c, c->vert[2], c->vert[0]); brw_JMPI(p, ip, ip, brw_imm_ud(nr*4+1)); copy_colors(c, c->vert[0], c->vert[1]); copy_colors(c, c->vert[2], c->vert[1]); brw_JMPI(p, ip, ip, brw_imm_ud(nr*2)); copy_colors(c, c->vert[0], c->vert[2]); copy_colors(c, c->vert[1], c->vert[2]); brw_pop_insn_state(p); }
void emit_math1(struct brw_wm_compile *c, GLuint function, const struct brw_reg *dst, GLuint mask, const struct brw_reg *arg0) { struct brw_compile *p = &c->func; struct intel_context *intel = &p->brw->intel; int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; GLuint saturate = ((mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE); struct brw_reg src; if (!(mask & WRITEMASK_XYZW)) return; /* Do not emit dead code */ assert(is_power_of_two(mask & WRITEMASK_XYZW)); if (intel->gen >= 6 && ((arg0[0].hstride == BRW_HORIZONTAL_STRIDE_0 || arg0[0].file != BRW_GENERAL_REGISTER_FILE) || arg0[0].negate || arg0[0].abs)) { /* Gen6 math requires that source and dst horizontal stride be 1, * and that the argument be in the GRF. * * The hardware ignores source modifiers (negate and abs) on math * instructions, so we also move to a temp to set those up. */ src = dst[dst_chan]; brw_MOV(p, src, arg0[0]); } else { src = arg0[0]; } /* Send two messages to perform all 16 operations: */ brw_push_insn_state(p); brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_math(p, dst[dst_chan], function, saturate, 2, src, BRW_MATH_DATA_VECTOR, BRW_MATH_PRECISION_FULL); if (c->dispatch_width == 16) { brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); brw_math(p, offset(dst[dst_chan],1), function, saturate, 3, sechalf(src), BRW_MATH_DATA_VECTOR, BRW_MATH_PRECISION_FULL); } brw_pop_insn_state(p); }
void vec4_generator::generate_gs_set_vertex_count(struct brw_reg dst, struct brw_reg src) { brw_push_insn_state(p); brw_set_access_mode(p, BRW_ALIGN_1); brw_set_mask_control(p, BRW_MASK_DISABLE); /* If we think of the src and dst registers as composed of 8 DWORDs each, * we want to pick up the contents of DWORDs 0 and 4 from src, truncate * them to WORDs, and then pack them into DWORD 2 of dst. * * It's easier to get the EU to do this if we think of the src and dst * registers as composed of 16 WORDS each; then, we want to pick up the * contents of WORDs 0 and 8 from src, and pack them into WORDs 4 and 5 of * dst. * * We can do that by the following EU instruction: * * mov (2) dst.4<1>:uw src<8;1,0>:uw { Align1, Q1, NoMask } */ brw_MOV(p, suboffset(stride(retype(dst, BRW_REGISTER_TYPE_UW), 2, 2, 1), 4), stride(retype(src, BRW_REGISTER_TYPE_UW), 8, 1, 0)); brw_set_access_mode(p, BRW_ALIGN_16); brw_pop_insn_state(p); }
void vec4_generator::generate_math2_gen4(vec4_instruction *inst, struct brw_reg dst, struct brw_reg src0, struct brw_reg src1) { /* From the Ironlake PRM, Volume 4, Part 1, Section 6.1.13 * "Message Payload": * * "Operand0[7]. For the INT DIV functions, this operand is the * denominator." * ... * "Operand1[7]. For the INT DIV functions, this operand is the * numerator." */ bool is_int_div = inst->opcode != SHADER_OPCODE_POW; struct brw_reg &op0 = is_int_div ? src1 : src0; struct brw_reg &op1 = is_int_div ? src0 : src1; brw_push_insn_state(p); brw_set_saturate(p, false); brw_set_predicate_control(p, BRW_PREDICATE_NONE); brw_MOV(p, retype(brw_message_reg(inst->base_mrf + 1), op1.type), op1); brw_pop_insn_state(p); brw_math(p, dst, brw_math_function(inst->opcode), inst->base_mrf, op0, BRW_MATH_DATA_VECTOR, BRW_MATH_PRECISION_FULL); }
static void emit_kil(struct brw_wm_compile *c) { struct brw_compile *p = &c->func; struct brw_reg depth = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); brw_push_insn_state(p); brw_set_mask_control(p, BRW_MASK_DISABLE); brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); //IMASK brw_AND(p, depth, c->emit_mask_reg, depth); brw_pop_insn_state(p); }
static void gen_f0_0_MOV_GRF_GRF(struct brw_codegen *p) { struct brw_reg g0 = brw_vec8_grf(0, 0); struct brw_reg g2 = brw_vec8_grf(2, 0); brw_push_insn_state(p); brw_set_default_predicate_control(p, true); brw_MOV(p, g0, g2); brw_pop_insn_state(p); }
/* The handling of f0.1 vs f0.0 changes between gen6 and gen7. Explicitly test * it, so that we run the fuzzing can run over all the other bits that might * interact with it. */ static void gen_f0_1_MOV_GRF_GRF(struct brw_compile *p) { struct brw_reg g0 = brw_vec8_grf(0, 0); struct brw_reg g2 = brw_vec8_grf(2, 0); brw_push_insn_state(p); brw_set_predicate_control(p, true); struct brw_instruction *mov = brw_MOV(p, g0, g2); mov->bits2.da1.flag_subreg_nr = 1; brw_pop_insn_state(p); }
/* The handling of f0.1 vs f0.0 changes between gen6 and gen7. Explicitly test * it, so that we run the fuzzing can run over all the other bits that might * interact with it. */ static void gen_f0_1_MOV_GRF_GRF(struct brw_codegen *p) { struct brw_reg g0 = brw_vec8_grf(0, 0); struct brw_reg g2 = brw_vec8_grf(2, 0); brw_push_insn_state(p); brw_set_default_predicate_control(p, true); brw_inst *mov = brw_MOV(p, g0, g2); brw_inst_set_flag_subreg_nr(p->devinfo, mov, 1); brw_pop_insn_state(p); }
void vec4_generator::generate_gs_set_dword_2_immed(struct brw_reg dst, struct brw_reg src) { assert(src.file == BRW_IMMEDIATE_VALUE); brw_push_insn_state(p); brw_set_access_mode(p, BRW_ALIGN_1); brw_set_mask_control(p, BRW_MASK_DISABLE); brw_MOV(p, suboffset(vec1(dst), 2), src); brw_set_access_mode(p, BRW_ALIGN_16); brw_pop_insn_state(p); }
/** * Extended math function, float[16]. * Use 2 send instructions. */ void brw_math_16( struct brw_compile *p, struct brw_reg dest, GLuint function, GLuint saturate, GLuint msg_reg_nr, struct brw_reg src, GLuint precision ) { struct brw_instruction *insn; GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1; GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1; /* First instruction: */ brw_push_insn_state(p); brw_set_predicate_control_flag_value(p, 0xff); brw_set_compression_control(p, BRW_COMPRESSION_NONE); insn = next_insn(p, BRW_OPCODE_SEND); insn->header.destreg__conditionalmod = msg_reg_nr; brw_set_dest(insn, dest); brw_set_src0(insn, src); brw_set_math_message(p->brw, insn, msg_length, response_length, function, BRW_MATH_INTEGER_UNSIGNED, precision, saturate, BRW_MATH_DATA_VECTOR); /* Second instruction: */ insn = next_insn(p, BRW_OPCODE_SEND); insn->header.compression_control = BRW_COMPRESSION_2NDHALF; insn->header.destreg__conditionalmod = msg_reg_nr+1; brw_set_dest(insn, offset(dest,1)); brw_set_src0(insn, src); brw_set_math_message(p->brw, insn, msg_length, response_length, function, BRW_MATH_INTEGER_UNSIGNED, precision, saturate, BRW_MATH_DATA_VECTOR); brw_pop_insn_state(p); }
static void emit_aa( struct brw_wm_compile *c, struct brw_reg *arg1, GLuint reg ) { struct brw_compile *p = &c->func; GLuint comp = c->aa_dest_stencil_reg / 2; GLuint off = c->aa_dest_stencil_reg % 2; struct brw_reg aa = offset(arg1[comp], off); brw_push_insn_state(p); brw_set_compression_control(p, BRW_COMPRESSION_NONE); /* ?? */ brw_MOV(p, brw_message_reg(reg), aa); brw_pop_insn_state(p); }
/** * Read a float[4] vector from the data port Data Cache (const buffer). * Location (in buffer) should be a multiple of 16. * Used for fetching shader constants. * If relAddr is true, we'll do an indirect fetch using the address register. */ void brw_dp_READ_4( struct brw_compile *p, struct brw_reg dest, GLboolean relAddr, GLuint location, GLuint bind_table_index ) { /* XXX: relAddr not implemented */ GLuint msg_reg_nr = 1; { struct brw_reg b; brw_push_insn_state(p); brw_set_predicate_control(p, BRW_PREDICATE_NONE); brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_set_mask_control(p, BRW_MASK_DISABLE); /* Setup MRF[1] with location/offset into const buffer */ b = brw_message_reg(msg_reg_nr); b = retype(b, BRW_REGISTER_TYPE_UD); /* XXX I think we're setting all the dwords of MRF[1] to 'location'. * when the docs say only dword[2] should be set. Hmmm. But it works. */ brw_MOV(p, b, brw_imm_ud(location)); brw_pop_insn_state(p); } { struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); insn->header.predicate_control = BRW_PREDICATE_NONE; insn->header.compression_control = BRW_COMPRESSION_NONE; insn->header.destreg__conditionalmod = msg_reg_nr; insn->header.mask_control = BRW_MASK_DISABLE; /* cast dest to a uword[8] vector */ dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW); brw_set_dest(insn, dest); brw_set_src0(insn, brw_null_reg()); brw_set_dp_read_message(p->brw, insn, bind_table_index, 0, /* msg_control (0 means 1 Oword) */ BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ 0, /* source cache = data cache */ 1, /* msg_length */ 1, /* response_length (1 Oword) */ 0); /* eot */ } }
static void copy_z_inv_w( struct brw_sf_compile *c ) { struct brw_compile *p = &c->func; GLuint i; brw_push_insn_state(p); /* Copy both scalars with a single MOV: */ for (i = 0; i < c->nr_verts; i++) brw_MOV(p, vec2(suboffset(c->vert[i], 2)), vec2(c->z[i])); brw_pop_insn_state(p); }
static void emit_math2( struct brw_compile *p, GLuint function, const struct brw_reg *dst, GLuint mask, const struct brw_reg *arg0, const struct brw_reg *arg1) { if (!(mask & WRITEMASK_XYZW)) return; /* Do not emit dead code*/ assert((mask & WRITEMASK_XYZW) == WRITEMASK_X); brw_push_insn_state(p); brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_MOV(p, brw_message_reg(2), arg0[0]); brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); brw_MOV(p, brw_message_reg(4), sechalf(arg0[0])); brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_MOV(p, brw_message_reg(3), arg1[0]); brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); brw_MOV(p, brw_message_reg(5), sechalf(arg1[0])); /* Send two messages to perform all 16 operations: */ brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_math(p, dst[0], function, (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE, 2, brw_null_reg(), BRW_MATH_DATA_VECTOR, BRW_MATH_PRECISION_FULL); brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); brw_math(p, offset(dst[0],1), function, (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE, 4, brw_null_reg(), BRW_MATH_DATA_VECTOR, BRW_MATH_PRECISION_FULL); brw_pop_insn_state(p); }
static void brw_fb_write(struct brw_compile *p, int dw) { struct brw_instruction *insn; unsigned msg_control, msg_type, msg_len; struct brw_reg src0; bool header; if (dw == 16) { brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE; msg_len = 8; } else { brw_set_compression_control(p, BRW_COMPRESSION_NONE); msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01; msg_len = 4; } if (p->gen < 060) { brw_push_insn_state(p); brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_set_mask_control(p, BRW_MASK_DISABLE); brw_MOV(p, brw_message_reg(1), brw_vec8_grf(1, 0)); brw_pop_insn_state(p); msg_len += 2; } /* The execution mask is ignored for render target writes. */ insn = brw_next_insn(p, BRW_OPCODE_SEND); insn->header.predicate_control = 0; insn->header.compression_control = BRW_COMPRESSION_NONE; if (p->gen >= 060) { msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE; src0 = brw_message_reg(2); header = false; } else { insn->header.destreg__conditionalmod = 0; msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE; src0 = __retype_uw(brw_vec8_grf(0, 0)); header = true; } brw_set_dest(p, insn, null_result(dw)); brw_set_src0(p, insn, src0); brw_set_dp_write_message(p, insn, 0, msg_control, msg_type, msg_len, header, true, 0, true, false); }
/* How does predicate control work when execution_size != 8? Do I * need to test/set for 0xffff when execution_size is 16? */ void brw_set_predicate_control_flag_value( struct brw_compile *p, GLuint value ) { p->current->header.predicate_control = BRW_PREDICATE_NONE; if (value != 0xff) { if (value != p->flag_value) { brw_push_insn_state(p); brw_MOV(p, brw_flag_reg(), brw_imm_uw(value)); p->flag_value = value; brw_pop_insn_state(p); } p->current->header.predicate_control = BRW_PREDICATE_NORMAL; } }
void vec4_generator::generate_gs_get_instance_id(struct brw_reg dst) { /* We want to right shift R0.0 & R0.1 by GEN7_GS_PAYLOAD_INSTANCE_ID_SHIFT * and store into dst.0 & dst.4. So generate the instruction: * * shr(8) dst<1> R0<1,4,0> GEN7_GS_PAYLOAD_INSTANCE_ID_SHIFT { align1 WE_normal 1Q } */ brw_push_insn_state(p); brw_set_access_mode(p, BRW_ALIGN_1); dst = retype(dst, BRW_REGISTER_TYPE_UD); struct brw_reg r0(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); brw_SHR(p, dst, stride(r0, 1, 4, 0), brw_imm_ud(GEN7_GS_PAYLOAD_INSTANCE_ID_SHIFT)); brw_pop_insn_state(p); }
void vec4_generator::generate_gs_prepare_channel_masks(struct brw_reg dst) { /* We want to left shift just DWORD 4 (the x component belonging to the * second geometry shader invocation) by 4 bits. So generate the * instruction: * * shl(1) dst.4<1>UD dst.4<0,1,0>UD 4UD { align1 WE_all } */ dst = suboffset(vec1(dst), 4); brw_push_insn_state(p); brw_set_access_mode(p, BRW_ALIGN_1); brw_set_mask_control(p, BRW_MASK_DISABLE); brw_SHL(p, dst, dst, brw_imm_ud(4)); brw_pop_insn_state(p); }
void vec4_generator::generate_unpack_flags(vec4_instruction *inst, struct brw_reg dst) { brw_push_insn_state(p); brw_set_mask_control(p, BRW_MASK_DISABLE); brw_set_access_mode(p, BRW_ALIGN_1); struct brw_reg flags = brw_flag_reg(0, 0); struct brw_reg dst_0 = suboffset(vec1(dst), 0); struct brw_reg dst_4 = suboffset(vec1(dst), 4); brw_AND(p, dst_0, flags, brw_imm_ud(0x0f)); brw_AND(p, dst_4, flags, brw_imm_ud(0xf0)); brw_SHR(p, dst_4, dst_4, brw_imm_ud(4)); brw_pop_insn_state(p); }
static void fire_fb_write( struct brw_wm_compile *c, GLuint base_reg, GLuint nr, GLuint target, GLuint eot ) { struct brw_compile *p = &c->func; struct intel_context *intel = &p->brw->intel; uint32_t msg_control; /* Pass through control information: * * Gen6 has done m1 mov in emit_fb_write() for current SIMD16 case. */ /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */ if (intel->gen < 6) { brw_push_insn_state(p); brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */ brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_MOV(p, brw_message_reg(base_reg + 1), brw_vec8_grf(1, 0)); brw_pop_insn_state(p); } if (c->dispatch_width == 16) msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE; else msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01; /* Send framebuffer write message: */ /* send (16) null.0<1>:uw m0 r0.0<8;8,1>:uw 0x85a04000:ud { Align1 EOT } */ brw_fb_WRITE(p, c->dispatch_width, base_reg, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW), msg_control, target, nr, 0, eot, true); }
/* Kill pixel - set execution mask to zero for those pixels which * fail. */ static void emit_kil( struct brw_wm_compile *c, struct brw_reg *arg0) { struct brw_compile *p = &c->func; struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); GLuint i; /* XXX - usually won't need 4 compares! */ for (i = 0; i < 4; i++) { brw_push_insn_state(p); brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], brw_imm_f(0)); brw_set_predicate_control_flag_value(p, 0xff); brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_AND(p, r0uw, brw_flag_reg(), r0uw); brw_pop_insn_state(p); } }
static void emit_fb_write(struct brw_wm_compile *c, struct prog_instruction *inst) { struct brw_compile *p = &c->func; int nr = 2; int channel; GLuint target, eot; struct brw_reg src0; /* Reserve a space for AA - may not be needed: */ if (c->key.aa_dest_stencil_reg) nr += 1; { brw_push_insn_state(p); for (channel = 0; channel < 4; channel++) { src0 = get_src_reg(c, &inst->SrcReg[0], channel, 1); /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */ /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */ brw_MOV(p, brw_message_reg(nr + channel), src0); } /* skip over the regs populated above: */ nr += 8; brw_pop_insn_state(p); } if (c->key.source_depth_to_render_target) { if (c->key.computes_depth) { src0 = get_src_reg(c, &inst->SrcReg[2], 2, 1); brw_MOV(p, brw_message_reg(nr), src0); } else { src0 = get_src_reg(c, &inst->SrcReg[1], 1, 1); brw_MOV(p, brw_message_reg(nr), src0); } nr += 2; } target = inst->Sampler >> 1; eot = inst->Sampler & 1; fire_fb_write(c, 0, nr, target, eot); }
/** * Write block of 16 dwords/floats to the data port Render Cache scratch buffer. * Scratch offset should be a multiple of 64. * Used for register spilling. */ void brw_dp_WRITE_16( struct brw_compile *p, struct brw_reg src, GLuint scratch_offset ) { GLuint msg_reg_nr = 1; { brw_push_insn_state(p); brw_set_mask_control(p, BRW_MASK_DISABLE); brw_set_compression_control(p, BRW_COMPRESSION_NONE); /* set message header global offset field (reg 0, element 2) */ brw_MOV(p, retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D), brw_imm_d(scratch_offset)); brw_pop_insn_state(p); } { GLuint msg_length = 3; struct brw_reg dest = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW); struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); insn->header.predicate_control = 0; /* XXX */ insn->header.compression_control = BRW_COMPRESSION_NONE; insn->header.destreg__conditionalmod = msg_reg_nr; brw_set_dest(insn, dest); brw_set_src0(insn, src); brw_set_dp_write_message(p->brw, insn, 255, /* binding table index (255=stateless) */ BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */ BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */ msg_length, 0, /* pixel scoreboard */ 0, /* response_length */ 0); /* eot */ } }
static void wm_src_sample_argb(struct brw_compile *p) { static const uint32_t fragment[][4] = { #include "exa_wm_src_affine.g6b" #include "exa_wm_src_sample_argb.g6b" #include "exa_wm_write.g6b" }; int n; brw_push_insn_state(p); brw_set_mask_control(p, BRW_MASK_DISABLE); brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_MOV(p, retype(brw_vec1_grf(0,2), BRW_REGISTER_TYPE_UD), brw_imm_ud(0)); brw_pop_insn_state(p); brw_SAMPLE(p, retype(vec16(brw_vec8_grf(14, 0)), BRW_REGISTER_TYPE_UW), 1, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD), 1, 0, WRITEMASK_XYZW, GEN5_SAMPLER_MESSAGE_SAMPLE, 8, 5, true, BRW_SAMPLER_SIMD_MODE_SIMD16); for (n = 0; n < p->nr_insn; n++) { brw_disasm(stdout, &p->store[n], 60); } printf("\n\n"); for (n = 0; n < ARRAY_SIZE(fragment); n++) { brw_disasm(stdout, (const struct brw_instruction *)&fragment[n][0], 60); } }
void emit_sop(struct brw_compile *p, const struct brw_reg *dst, GLuint mask, GLuint cond, const struct brw_reg *arg0, const struct brw_reg *arg1) { GLuint i; for (i = 0; i < 4; i++) { if (mask & (1<<i)) { brw_push_insn_state(p); brw_CMP(p, brw_null_reg(), cond, arg0[i], arg1[i]); brw_set_predicate_control(p, BRW_PREDICATE_NONE); brw_MOV(p, dst[i], brw_imm_f(0)); brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); brw_MOV(p, dst[i], brw_imm_f(1.0)); brw_pop_insn_state(p); } } }