void gen8_vec4_generator::generate_scratch_write(vec4_instruction *ir, struct brw_reg dst, struct brw_reg src, struct brw_reg index) { struct brw_reg header = brw_vec8_grf(GEN7_MRF_HACK_START + ir->base_mrf, 0); MOV_RAW(header, brw_vec8_grf(0, 0)); generate_oword_dual_block_offsets(brw_message_reg(ir->base_mrf + 1), index); MOV(retype(brw_message_reg(ir->base_mrf + 2), BRW_REGISTER_TYPE_D), retype(src, BRW_REGISTER_TYPE_D)); /* Each of the 8 channel enables is considered for whether each * dword is written. */ gen8_instruction *send = next_inst(BRW_OPCODE_SEND); gen8_set_dst(brw, send, dst); gen8_set_src0(brw, send, header); gen8_set_pred_control(send, ir->predicate); gen8_set_dp_message(brw, send, GEN7_SFID_DATAPORT_DATA_CACHE, 255, /* binding table index: stateless access */ GEN7_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE, BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD, 3, /* mlen */ 0, /* rlen */ true, /* header present */ false); /* EOT */ }
static void wm_src_affine(struct brw_compile *p) { brw_PLN(p, brw_message_reg(2), brw_vec1_grf(6,0), brw_vec8_grf(2,0)); brw_PLN(p, brw_message_reg(3), brw_vec1_grf(6,0), brw_vec8_grf(4,0)); brw_PLN(p, brw_message_reg(4), brw_vec1_grf(6,4), brw_vec8_grf(2,0)); brw_PLN(p, brw_message_reg(5), brw_vec1_grf(6,4), brw_vec8_grf(4,0)); }
static void emit_math2( struct brw_compile *p, GLuint function, const struct brw_reg *dst, GLuint mask, const struct brw_reg *arg0, const struct brw_reg *arg1) { if (!(mask & WRITEMASK_XYZW)) return; /* Do not emit dead code*/ assert((mask & WRITEMASK_XYZW) == WRITEMASK_X); brw_push_insn_state(p); brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_MOV(p, brw_message_reg(2), arg0[0]); brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); brw_MOV(p, brw_message_reg(4), sechalf(arg0[0])); brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_MOV(p, brw_message_reg(3), arg1[0]); brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); brw_MOV(p, brw_message_reg(5), sechalf(arg1[0])); /* Send two messages to perform all 16 operations: */ brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_math(p, dst[0], function, (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE, 2, brw_null_reg(), BRW_MATH_DATA_VECTOR, BRW_MATH_PRECISION_FULL); brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); brw_math(p, offset(dst[0],1), function, (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE, 4, brw_null_reg(), BRW_MATH_DATA_VECTOR, BRW_MATH_PRECISION_FULL); brw_pop_insn_state(p); }
static void brw_fb_write(struct brw_compile *p, int dw) { struct brw_instruction *insn; unsigned msg_control, msg_type, msg_len; struct brw_reg src0; bool header; if (dw == 16) { brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE; msg_len = 8; } else { brw_set_compression_control(p, BRW_COMPRESSION_NONE); msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01; msg_len = 4; } if (p->gen < 060) { brw_push_insn_state(p); brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_set_mask_control(p, BRW_MASK_DISABLE); brw_MOV(p, brw_message_reg(1), brw_vec8_grf(1, 0)); brw_pop_insn_state(p); msg_len += 2; } /* The execution mask is ignored for render target writes. */ insn = brw_next_insn(p, BRW_OPCODE_SEND); insn->header.predicate_control = 0; insn->header.compression_control = BRW_COMPRESSION_NONE; if (p->gen >= 060) { msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE; src0 = brw_message_reg(2); header = false; } else { insn->header.destreg__conditionalmod = 0; msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE; src0 = __retype_uw(brw_vec8_grf(0, 0)); header = true; } brw_set_dest(p, insn, null_result(dw)); brw_set_src0(p, insn, src0); brw_set_dp_write_message(p, insn, 0, msg_control, msg_type, msg_len, header, true, 0, true, false); }
void vec4_generator::generate_math2_gen4(vec4_instruction *inst, struct brw_reg dst, struct brw_reg src0, struct brw_reg src1) { /* From the Ironlake PRM, Volume 4, Part 1, Section 6.1.13 * "Message Payload": * * "Operand0[7]. For the INT DIV functions, this operand is the * denominator." * ... * "Operand1[7]. For the INT DIV functions, this operand is the * numerator." */ bool is_int_div = inst->opcode != SHADER_OPCODE_POW; struct brw_reg &op0 = is_int_div ? src1 : src0; struct brw_reg &op1 = is_int_div ? src0 : src1; brw_push_insn_state(p); brw_set_saturate(p, false); brw_set_predicate_control(p, BRW_PREDICATE_NONE); brw_MOV(p, retype(brw_message_reg(inst->base_mrf + 1), op1.type), op1); brw_pop_insn_state(p); brw_math(p, dst, brw_math_function(inst->opcode), inst->base_mrf, op0, BRW_MATH_DATA_VECTOR, BRW_MATH_PRECISION_FULL); }
static int brw_wm_sample__alpha(struct brw_compile *p, int dw, int channel, int msg, int result) { struct brw_reg src0; int mlen, rlen; if (dw == 8) { /* SIMD8 sample return is not masked */ mlen = 3; rlen = 4; } else { mlen = 5; rlen = 2; } if (p->gen >= 060) src0 = brw_message_reg(msg); else src0 = brw_vec8_grf(0, 0); brw_SAMPLE(p, sample_result(dw, result), msg, src0, channel+1, channel, WRITEMASK_W, 0, rlen, mlen, true, simd(dw)); if (dw == 8) result += 3; return result; }
void gen8_vec4_generator::generate_untyped_atomic(vec4_instruction *ir, struct brw_reg dst, struct brw_reg atomic_op, struct brw_reg surf_index) { assert(atomic_op.file == BRW_IMMEDIATE_VALUE && atomic_op.type == BRW_REGISTER_TYPE_UD && surf_index.file == BRW_IMMEDIATE_VALUE && surf_index.type == BRW_REGISTER_TYPE_UD); assert((atomic_op.dw1.ud & ~0xf) == 0); unsigned msg_control = atomic_op.dw1.ud | /* Atomic Operation Type: BRW_AOP_* */ (1 << 5); /* Return data expected */ gen8_instruction *inst = next_inst(BRW_OPCODE_SEND); gen8_set_dst(brw, inst, retype(dst, BRW_REGISTER_TYPE_UD)); gen8_set_src0(brw, inst, brw_message_reg(ir->base_mrf)); gen8_set_dp_message(brw, inst, HSW_SFID_DATAPORT_DATA_CACHE_1, surf_index.dw1.ud, HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2, msg_control, ir->mlen, 1, ir->header_present, false); brw_mark_surface_used(&prog_data->base, surf_index.dw1.ud); }
static void fire_fb_write( struct brw_wm_compile *c, GLuint base_reg, GLuint nr, GLuint target, GLuint eot ) { struct brw_compile *p = &c->func; /* Pass through control information: */ /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */ { brw_push_insn_state(p); brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */ brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_MOV(p, brw_message_reg(base_reg + 1), brw_vec8_grf(1, 0)); brw_pop_insn_state(p); } /* Send framebuffer write message: */ /* send (16) null.0<1>:uw m0 r0.0<8;8,1>:uw 0x85a04000:ud { Align1 EOT } */ brw_fb_WRITE(p, retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW), base_reg, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW), target, nr, 0, eot); }
static void emit_pixel_w( struct brw_compile *p, const struct brw_reg *dst, GLuint mask, const struct brw_reg *arg0, const struct brw_reg *deltas) { /* Don't need this if all you are doing is interpolating color, for * instance. */ if (mask & WRITEMASK_W) { struct brw_reg interp3 = brw_vec1_grf(arg0[0].nr+1, 4); /* Calc 1/w - just linterp wpos[3] optimized by putting the * result straight into a message reg. */ brw_LINE(p, brw_null_reg(), interp3, deltas[0]); brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), deltas[1]); /* Calc w */ brw_math_16( p, dst[3], BRW_MATH_FUNCTION_INV, BRW_MATH_SATURATE_NONE, 2, brw_null_reg(), BRW_MATH_PRECISION_FULL); } }
static void emit_pixel_w( struct brw_wm_compile *c, struct prog_instruction *inst) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; if (mask & WRITEMASK_W) { struct brw_reg dst, src0, delta0, delta1; struct brw_reg interp3; dst = get_dst_reg(c, inst, 3, 1); src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); delta0 = get_src_reg(c, &inst->SrcReg[1], 0, 1); delta1 = get_src_reg(c, &inst->SrcReg[1], 1, 1); interp3 = brw_vec1_grf(src0.nr+1, 4); /* Calc 1/w - just linterp wpos[3] optimized by putting the * result straight into a message reg. */ brw_LINE(p, brw_null_reg(), interp3, delta0); brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), delta1); /* Calc w */ brw_math_16( p, dst, BRW_MATH_FUNCTION_INV, BRW_MATH_SATURATE_NONE, 2, brw_null_reg(), BRW_MATH_PRECISION_FULL); } }
void vec4_generator::generate_gs_set_vertex_count(struct brw_reg dst, struct brw_reg src) { brw_push_insn_state(p); brw_set_default_mask_control(p, BRW_MASK_DISABLE); if (brw->gen >= 8) { /* Move the vertex count into the second MRF for the EOT write. */ brw_MOV(p, retype(brw_message_reg(dst.nr + 1), BRW_REGISTER_TYPE_UD), src); } else { /* If we think of the src and dst registers as composed of 8 DWORDs each, * we want to pick up the contents of DWORDs 0 and 4 from src, truncate * them to WORDs, and then pack them into DWORD 2 of dst. * * It's easier to get the EU to do this if we think of the src and dst * registers as composed of 16 WORDS each; then, we want to pick up the * contents of WORDs 0 and 8 from src, and pack them into WORDs 4 and 5 * of dst. * * We can do that by the following EU instruction: * * mov (2) dst.4<1>:uw src<8;1,0>:uw { Align1, Q1, NoMask } */ brw_set_default_access_mode(p, BRW_ALIGN_1); brw_MOV(p, suboffset(stride(retype(dst, BRW_REGISTER_TYPE_UW), 2, 2, 1), 4), stride(retype(src, BRW_REGISTER_TYPE_UW), 8, 1, 0)); brw_set_default_access_mode(p, BRW_ALIGN_16); } brw_pop_insn_state(p); }
static void emit_txb( struct brw_wm_compile *c, const struct brw_wm_instruction *inst, struct brw_reg *dst, GLuint dst_flags, struct brw_reg *arg ) { struct brw_compile *p = &c->func; GLuint msgLength; /* Shadow ignored for txb. */ switch (inst->tex_idx) { case TEXTURE_1D_INDEX: brw_MOV(p, brw_message_reg(2), arg[0]); brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); brw_MOV(p, brw_message_reg(6), brw_imm_f(0)); break; case TEXTURE_2D_INDEX: case TEXTURE_RECT_INDEX: brw_MOV(p, brw_message_reg(2), arg[0]); brw_MOV(p, brw_message_reg(4), arg[1]); brw_MOV(p, brw_message_reg(6), brw_imm_f(0)); break; default: brw_MOV(p, brw_message_reg(2), arg[0]); brw_MOV(p, brw_message_reg(4), arg[1]); brw_MOV(p, brw_message_reg(6), arg[2]); break; } brw_MOV(p, brw_message_reg(8), arg[3]); msgLength = 9; brw_SAMPLE(p, retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW), 1, retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW), inst->tex_unit + MAX_DRAW_BUFFERS, /* surface */ inst->tex_unit, /* sampler */ inst->writemask, BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS, 8, /* responseLength */ msgLength, 0); }
void vec4_generator::generate_pull_constant_load(vec4_instruction *inst, struct brw_reg dst, struct brw_reg index, struct brw_reg offset) { assert(index.file == BRW_IMMEDIATE_VALUE && index.type == BRW_REGISTER_TYPE_UD); uint32_t surf_index = index.dw1.ud; if (intel->gen == 7) { gen6_resolve_implied_move(p, &offset, inst->base_mrf); brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND); brw_set_dest(p, insn, dst); brw_set_src0(p, insn, offset); brw_set_sampler_message(p, insn, surf_index, 0, /* LD message ignores sampler unit */ GEN5_SAMPLER_MESSAGE_SAMPLE_LD, 1, /* rlen */ 1, /* mlen */ false, /* no header */ BRW_SAMPLER_SIMD_MODE_SIMD4X2, 0); return; } struct brw_reg header = brw_vec8_grf(0, 0); gen6_resolve_implied_move(p, &header, inst->base_mrf); brw_MOV(p, retype(brw_message_reg(inst->base_mrf + 1), BRW_REGISTER_TYPE_D), offset); uint32_t msg_type; if (intel->gen >= 6) msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; else if (intel->gen == 5 || intel->is_g4x) msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; else msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; /* Each of the 8 channel enables is considered for whether each * dword is written. */ struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND); brw_set_dest(p, send, dst); brw_set_src0(p, send, header); if (intel->gen < 6) send->header.destreg__conditionalmod = inst->base_mrf; brw_set_dp_read_message(p, send, surf_index, BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD, msg_type, BRW_DATAPORT_READ_TARGET_DATA_CACHE, 2, /* mlen */ 1 /* rlen */); }
/** * Move a GPR to scratch memory. */ static void emit_spill( struct brw_wm_compile *c, struct brw_reg reg, GLuint slot ) { struct brw_compile *p = &c->func; /* mov (16) m2.0<1>:ud r2.0<8;8,1>:ud { Align1 Compr } */ brw_MOV(p, brw_message_reg(2), reg); /* mov (1) r0.2<1>:d 0x00000080:d { Align1 NoMask } send (16) null.0<1>:uw m1 r0.0<8;8,1>:uw 0x053003ff:ud { Align1 } */ brw_oword_block_write_scratch(p, brw_message_reg(1), 2, slot); }
void emit_pixel_w(struct brw_wm_compile *c, const struct brw_reg *dst, GLuint mask, const struct brw_reg *arg0, const struct brw_reg *deltas) { struct brw_compile *p = &c->func; struct intel_context *intel = &p->brw->intel; struct brw_reg src; struct brw_reg temp_dst; if (intel->gen >= 6) temp_dst = dst[3]; else temp_dst = brw_message_reg(2); assert(intel->gen < 6); /* Don't need this if all you are doing is interpolating color, for * instance. */ if (mask & WRITEMASK_W) { struct brw_reg interp3 = brw_vec1_grf(arg0[0].nr+1, 4); /* Calc 1/w - just linterp wpos[3] optimized by putting the * result straight into a message reg. */ if (can_do_pln(intel, deltas)) { brw_PLN(p, temp_dst, interp3, deltas[0]); } else { brw_LINE(p, brw_null_reg(), interp3, deltas[0]); brw_MAC(p, temp_dst, suboffset(interp3, 1), deltas[1]); } /* Calc w */ if (intel->gen >= 6) src = temp_dst; else src = brw_null_reg(); if (c->dispatch_width == 16) { brw_math_16(p, dst[3], BRW_MATH_FUNCTION_INV, BRW_MATH_SATURATE_NONE, 2, src, BRW_MATH_PRECISION_FULL); } else { brw_math(p, dst[3], BRW_MATH_FUNCTION_INV, BRW_MATH_SATURATE_NONE, 2, src, BRW_MATH_DATA_VECTOR, BRW_MATH_PRECISION_FULL); } } }
static void emit_fb_write(struct brw_wm_compile *c, struct prog_instruction *inst) { struct brw_compile *p = &c->func; int nr = 2; int channel; GLuint target, eot; struct brw_reg src0; /* Reserve a space for AA - may not be needed: */ if (c->key.aa_dest_stencil_reg) nr += 1; { brw_push_insn_state(p); for (channel = 0; channel < 4; channel++) { src0 = get_src_reg(c, &inst->SrcReg[0], channel, 1); /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */ /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */ brw_MOV(p, brw_message_reg(nr + channel), src0); } /* skip over the regs populated above: */ nr += 8; brw_pop_insn_state(p); } if (c->key.source_depth_to_render_target) { if (c->key.computes_depth) { src0 = get_src_reg(c, &inst->SrcReg[2], 2, 1); brw_MOV(p, brw_message_reg(nr), src0); } else { src0 = get_src_reg(c, &inst->SrcReg[1], 1, 1); brw_MOV(p, brw_message_reg(nr), src0); } nr += 2; } target = inst->Sampler >> 1; eot = inst->Sampler & 1; fire_fb_write(c, 0, nr, target, eot); }
void brw_clip_emit_vue(struct brw_clip_compile *c, struct brw_indirect vert, GLboolean allocate, GLboolean eot, GLuint header) { struct brw_compile *p = &c->func; GLuint start = c->last_mrf; brw_clip_ff_sync(c); assert(!(allocate && eot)); /* Cycle through mrf regs - probably futile as we have to wait for * the allocation response anyway. Also, the order this function * is invoked doesn't correspond to the order the instructions will * be executed, so it won't have any effect in many cases. */ #if 0 if (start + c->nr_regs + 1 >= MAX_MRF) start = 0; c->last_mrf = start + c->nr_regs + 1; #endif /* Copy the vertex from vertn into m1..mN+1: */ brw_copy_from_indirect(p, brw_message_reg(start+1), vert, c->nr_regs); /* Overwrite PrimType and PrimStart in the message header, for * each vertex in turn: */ brw_MOV(p, get_element_ud(c->reg.R0, 2), brw_imm_ud(header)); /* Send each vertex as a seperate write to the urb. This * is different to the concept in brw_sf_emit.c, where * subsequent writes are used to build up a single urb * entry. Each of these writes instantiates a seperate * urb entry - (I think... what about 'allocate'?) */ brw_urb_WRITE(p, allocate ? c->reg.R0 : retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), start, c->reg.R0, allocate, 1, /* used */ c->nr_regs + 1, /* msg length */ allocate ? 1 : 0, /* response_length */ eot, /* eot */ 1, /* writes_complete */ 0, /* urb offset */ BRW_URB_SWIZZLE_NONE); }
static void emit_pow(struct brw_wm_compile *c, struct prog_instruction *inst) { struct brw_compile *p = &c->func; struct brw_reg dst, src0, src1; dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1); src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); src1 = get_src_reg(c, &inst->SrcReg[1], 0, 1); brw_MOV(p, brw_message_reg(2), src0); brw_MOV(p, brw_message_reg(3), src1); brw_math(p, dst, BRW_MATH_FUNCTION_POW, (inst->SaturateMode != SATURATE_OFF) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE, 2, brw_null_reg(), BRW_MATH_DATA_VECTOR, BRW_MATH_PRECISION_FULL); }
void vec4_generator::generate_untyped_surface_read(vec4_instruction *inst, struct brw_reg dst, struct brw_reg surf_index) { assert(surf_index.file == BRW_IMMEDIATE_VALUE && surf_index.type == BRW_REGISTER_TYPE_UD); brw_untyped_surface_read(p, dst, brw_message_reg(inst->base_mrf), surf_index.dw1.ud, inst->mlen, 1); brw_mark_surface_used(&prog_data->base, surf_index.dw1.ud); }
static void emit_aa( struct brw_wm_compile *c, struct brw_reg *arg1, GLuint reg ) { struct brw_compile *p = &c->func; GLuint comp = c->aa_dest_stencil_reg / 2; GLuint off = c->aa_dest_stencil_reg % 2; struct brw_reg aa = offset(arg1[comp], off); brw_push_insn_state(p); brw_set_compression_control(p, BRW_COMPRESSION_NONE); /* ?? */ brw_MOV(p, brw_message_reg(reg), aa); brw_pop_insn_state(p); }
void vec4_generator::generate_gs_thread_end(vec4_instruction *inst) { struct brw_reg src = brw_message_reg(inst->base_mrf); brw_urb_WRITE(p, brw_null_reg(), /* dest */ inst->base_mrf, /* starting mrf reg nr */ src, BRW_URB_WRITE_EOT, 1, /* message len */ 0, /* response len */ 0, /* urb destination offset */ BRW_URB_SWIZZLE_INTERLEAVE); }
void vec4_generator::generate_gs_urb_write(vec4_instruction *inst) { struct brw_reg src = brw_message_reg(inst->base_mrf); brw_urb_WRITE(p, brw_null_reg(), /* dest */ inst->base_mrf, /* starting mrf reg nr */ src, inst->urb_write_flags, inst->mlen, 0, /* response len */ inst->offset, /* urb destination offset */ BRW_URB_SWIZZLE_INTERLEAVE); }
/** * Read a float[4] vector from the data port Data Cache (const buffer). * Location (in buffer) should be a multiple of 16. * Used for fetching shader constants. * If relAddr is true, we'll do an indirect fetch using the address register. */ void brw_dp_READ_4( struct brw_compile *p, struct brw_reg dest, GLboolean relAddr, GLuint location, GLuint bind_table_index ) { /* XXX: relAddr not implemented */ GLuint msg_reg_nr = 1; { struct brw_reg b; brw_push_insn_state(p); brw_set_predicate_control(p, BRW_PREDICATE_NONE); brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_set_mask_control(p, BRW_MASK_DISABLE); /* Setup MRF[1] with location/offset into const buffer */ b = brw_message_reg(msg_reg_nr); b = retype(b, BRW_REGISTER_TYPE_UD); /* XXX I think we're setting all the dwords of MRF[1] to 'location'. * when the docs say only dword[2] should be set. Hmmm. But it works. */ brw_MOV(p, b, brw_imm_ud(location)); brw_pop_insn_state(p); } { struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); insn->header.predicate_control = BRW_PREDICATE_NONE; insn->header.compression_control = BRW_COMPRESSION_NONE; insn->header.destreg__conditionalmod = msg_reg_nr; insn->header.mask_control = BRW_MASK_DISABLE; /* cast dest to a uword[8] vector */ dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW); brw_set_dest(insn, dest); brw_set_src0(insn, brw_null_reg()); brw_set_dp_read_message(p->brw, insn, bind_table_index, 0, /* msg_control (0 means 1 Oword) */ BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ 0, /* source cache = data cache */ 1, /* msg_length */ 1, /* response_length (1 Oword) */ 0); /* eot */ } }
static void brw_wm_affine_st(struct brw_compile *p, int dw, int channel, int msg) { int uv; if (dw == 16) { brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); uv = p->gen >= 060 ? 6 : 3; } else { brw_set_compression_control(p, BRW_COMPRESSION_NONE); uv = p->gen >= 060 ? 4 : 3; } uv += 2*channel; msg++; if (p->gen >= 060) { brw_PLN(p, brw_message_reg(msg), brw_vec1_grf(uv, 0), brw_vec8_grf(2, 0)); msg += dw/8; brw_PLN(p, brw_message_reg(msg), brw_vec1_grf(uv, 4), brw_vec8_grf(2, 0)); } else { struct brw_reg r = brw_vec1_grf(uv, 0); brw_LINE(p, brw_null_reg(), __suboffset(r, 0), brw_vec8_grf(X16, 0)); brw_MAC(p, brw_message_reg(msg), __suboffset(r, 1), brw_vec8_grf(Y16, 0)); msg += dw/8; brw_LINE(p, brw_null_reg(), __suboffset(r, 4), brw_vec8_grf(X16, 0)); brw_MAC(p, brw_message_reg(msg), __suboffset(r, 5), brw_vec8_grf(Y16, 0)); } }
static void brw_gs_emit_vue(struct brw_gs_compile *c, struct brw_reg vert, GLboolean last, GLuint header) { struct brw_compile *p = &c->func; struct intel_context *intel = &c->func.brw->intel; GLboolean allocate = !last; struct brw_reg temp; if (intel->gen < 6) temp = c->reg.R0; else { temp = c->reg.temp; brw_MOV(p, retype(temp, BRW_REGISTER_TYPE_UD), retype(c->reg.R0, BRW_REGISTER_TYPE_UD)); } /* Overwrite PrimType and PrimStart in the message header, for * each vertex in turn: */ brw_MOV(p, get_element_ud(temp, 2), brw_imm_ud(header)); /* Copy the vertex from vertn into m1..mN+1: */ brw_copy8(p, brw_message_reg(1), vert, c->nr_regs); /* Send each vertex as a seperate write to the urb. This is * different to the concept in brw_sf_emit.c, where subsequent * writes are used to build up a single urb entry. Each of these * writes instantiates a seperate urb entry, and a new one must be * allocated each time. */ brw_urb_WRITE(p, allocate ? temp : retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), 0, temp, allocate, 1, /* used */ c->nr_regs + 1, /* msg length */ allocate ? 1 : 0, /* response length */ allocate ? 0 : 1, /* eot */ 1, /* writes_complete */ 0, /* urb offset */ BRW_URB_SWIZZLE_NONE); if (intel->gen >= 6 && allocate) brw_MOV(p, get_element_ud(c->reg.R0, 0), get_element_ud(temp, 0)); }
/** * Emit a vertex using the URB_WRITE message. Use the contents of * c->reg.header for the message header, and the registers starting at \c vert * for the vertex data. * * If \c last is true, then this is the last vertex, so no further URB space * should be allocated, and this message should end the thread. * * If \c last is false, then a new URB entry will be allocated, and its handle * will be stored in DWORD 0 of c->reg.header for use in the next URB_WRITE * message. */ static void brw_ff_gs_emit_vue(struct brw_ff_gs_compile *c, struct brw_reg vert, bool last) { struct brw_codegen *p = &c->func; int write_offset = 0; bool complete = false; do { /* We can't write more than 14 registers at a time to the URB */ int write_len = MIN2(c->nr_regs - write_offset, 14); if (write_len == c->nr_regs - write_offset) complete = true; /* Copy the vertex from vertn into m1..mN+1: */ brw_copy8(p, brw_message_reg(1), offset(vert, write_offset), write_len); /* Send the vertex data to the URB. If this is the last write for this * vertex, then we mark it as complete, and either end the thread or * allocate another vertex URB entry (depending whether this is the last * vertex). */ enum brw_urb_write_flags flags; if (!complete) flags = BRW_URB_WRITE_NO_FLAGS; else if (last) flags = BRW_URB_WRITE_EOT_COMPLETE; else flags = BRW_URB_WRITE_ALLOCATE_COMPLETE; brw_urb_WRITE(p, (flags & BRW_URB_WRITE_ALLOCATE) ? c->reg.temp : retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), 0, c->reg.header, flags, write_len + 1, /* msg length */ (flags & BRW_URB_WRITE_ALLOCATE) ? 1 : 0, /* response length */ write_offset, /* urb offset */ BRW_URB_SWIZZLE_NONE); write_offset += write_len; } while (!complete); if (!last) { brw_MOV(p, get_element_ud(c->reg.header, 0), get_element_ud(c->reg.temp, 0)); } }
void brw_blorp_eu_emitter::emit_texture_lookup(const struct brw_reg &dst, enum opcode op, unsigned base_mrf, unsigned msg_length) { fs_inst *inst = new (mem_ctx) fs_inst(op, 16, dst, brw_message_reg(base_mrf), brw_imm_ud(0u), brw_imm_ud(0u)); inst->base_mrf = base_mrf; inst->mlen = msg_length; inst->header_size = 0; insts.push_tail(inst); }
static void brw_wm_write(struct brw_compile *p, int dw, int src) { int n; if (dw == 8 && p->gen >= 060) { /* XXX pixel execution mask? */ brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_MOV(p, brw_message_reg(2), brw_vec8_grf(src+0, 0)); brw_MOV(p, brw_message_reg(3), brw_vec8_grf(src+1, 0)); brw_MOV(p, brw_message_reg(4), brw_vec8_grf(src+2, 0)); brw_MOV(p, brw_message_reg(5), brw_vec8_grf(src+3, 0)); goto done; } brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); for (n = 0; n < 4; n++) { if (p->gen >= 060) { brw_MOV(p, brw_message_reg(2 + 2*n), brw_vec8_grf(src + 2*n, 0)); } else if (p->gen >= 045 && dw == 16) { brw_MOV(p, brw_message_reg(2 + n + BRW_MRF_COMPR4), brw_vec8_grf(src + 2*n, 0)); } else { brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_MOV(p, brw_message_reg(2 + n), brw_vec8_grf(src + 2*n, 0)); if (dw == 16) { brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); brw_MOV(p, brw_message_reg(2 + n + 4), brw_vec8_grf(src + 2*n+1, 0)); } } } done: brw_fb_write(p, dw); }
void vec4_generator::generate_untyped_atomic(vec4_instruction *inst, struct brw_reg dst, struct brw_reg atomic_op, struct brw_reg surf_index) { assert(atomic_op.file == BRW_IMMEDIATE_VALUE && atomic_op.type == BRW_REGISTER_TYPE_UD && surf_index.file == BRW_IMMEDIATE_VALUE && surf_index.type == BRW_REGISTER_TYPE_UD); brw_untyped_atomic(p, dst, brw_message_reg(inst->base_mrf), atomic_op.dw1.ud, surf_index.dw1.ud, inst->mlen, 1); mark_surface_used(surf_index.dw1.ud); }
void vec4_generator::generate_pull_constant_load(vec4_instruction *inst, struct brw_reg dst, struct brw_reg index, struct brw_reg offset) { assert(brw->gen <= 7); assert(index.file == BRW_IMMEDIATE_VALUE && index.type == BRW_REGISTER_TYPE_UD); uint32_t surf_index = index.dw1.ud; struct brw_reg header = brw_vec8_grf(0, 0); gen6_resolve_implied_move(p, &header, inst->base_mrf); brw_MOV(p, retype(brw_message_reg(inst->base_mrf + 1), BRW_REGISTER_TYPE_D), offset); uint32_t msg_type; if (brw->gen >= 6) msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; else if (brw->gen == 5 || brw->is_g4x) msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; else msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; /* Each of the 8 channel enables is considered for whether each * dword is written. */ struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND); brw_set_dest(p, send, dst); brw_set_src0(p, send, header); if (brw->gen < 6) send->header.destreg__conditionalmod = inst->base_mrf; brw_set_dp_read_message(p, send, surf_index, BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD, msg_type, BRW_DATAPORT_READ_TARGET_DATA_CACHE, 2, /* mlen */ true, /* header_present */ 1 /* rlen */); brw_mark_surface_used(&prog_data->base, surf_index); }