static void emit_pixel_xy(struct brw_compile *p, const struct brw_reg *dst, GLuint mask, const struct brw_reg *arg0) { struct brw_reg r1 = brw_vec1_grf(1, 0); struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW); brw_set_compression_control(p, BRW_COMPRESSION_NONE); /* Calculate pixel centers by adding 1 or 0 to each of the * micro-tile coordinates passed in r1. */ if (mask & WRITEMASK_X) { brw_ADD(p, vec16(retype(dst[0], BRW_REGISTER_TYPE_UW)), stride(suboffset(r1_uw, 4), 2, 4, 0), brw_imm_v(0x10101010)); } if (mask & WRITEMASK_Y) { brw_ADD(p, vec16(retype(dst[1], BRW_REGISTER_TYPE_UW)), stride(suboffset(r1_uw,5), 2, 4, 0), brw_imm_v(0x11001100)); } brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); }
static void fire_fb_write( struct brw_wm_compile *c, GLuint base_reg, GLuint nr, GLuint target, GLuint eot ) { struct brw_compile *p = &c->func; /* Pass through control information: */ /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */ { brw_push_insn_state(p); brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */ brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_MOV(p, brw_message_reg(base_reg + 1), brw_vec8_grf(1, 0)); brw_pop_insn_state(p); } /* Send framebuffer write message: */ /* send (16) null.0<1>:uw m0 r0.0<8;8,1>:uw 0x85a04000:ud { Align1 EOT } */ brw_fb_WRITE(p, retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW), base_reg, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW), target, nr, 0, eot); }
fs_inst * brw_blorp_eu_emitter::emit_cmp(enum brw_conditional_mod op, const struct brw_reg &x, const struct brw_reg &y) { fs_inst *cmp = new (mem_ctx) fs_inst(BRW_OPCODE_CMP, 16, vec16(brw_null_reg()), x, y); cmp->conditional_mod = op; insts.push_tail(cmp); return cmp; }
/** * Computes the screen-space x,y position of the pixels. * * This will be used by emit_delta_xy() or emit_wpos_xy() for * interpolation of attributes.. * * Payload R0: * * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 tiles, * corresponding to each of the 16 execution channels. * R0.1..8 -- ? * R1.0 -- triangle vertex 0.X * R1.1 -- triangle vertex 0.Y * R1.2 -- tile 0 x,y coords (2 packed uwords) * R1.3 -- tile 1 x,y coords (2 packed uwords) * R1.4 -- tile 2 x,y coords (2 packed uwords) * R1.5 -- tile 3 x,y coords (2 packed uwords) * R1.6 -- ? * R1.7 -- ? * R1.8 -- ? */ void emit_pixel_xy(struct brw_wm_compile *c, const struct brw_reg *dst, GLuint mask) { struct brw_compile *p = &c->func; struct brw_reg r1 = brw_vec1_grf(1, 0); struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW); struct brw_reg dst0_uw, dst1_uw; brw_push_insn_state(p); brw_set_compression_control(p, BRW_COMPRESSION_NONE); if (c->dispatch_width == 16) { dst0_uw = vec16(retype(dst[0], BRW_REGISTER_TYPE_UW)); dst1_uw = vec16(retype(dst[1], BRW_REGISTER_TYPE_UW)); } else { dst0_uw = vec8(retype(dst[0], BRW_REGISTER_TYPE_UW)); dst1_uw = vec8(retype(dst[1], BRW_REGISTER_TYPE_UW)); } /* Calculate pixel centers by adding 1 or 0 to each of the * micro-tile coordinates passed in r1. */ if (mask & WRITEMASK_X) { brw_ADD(p, dst0_uw, stride(suboffset(r1_uw, 4), 2, 4, 0), brw_imm_v(0x10101010)); } if (mask & WRITEMASK_Y) { brw_ADD(p, dst1_uw, stride(suboffset(r1_uw,5), 2, 4, 0), brw_imm_v(0x11001100)); } brw_pop_insn_state(p); }
static void emit_txb( struct brw_wm_compile *c, const struct brw_wm_instruction *inst, struct brw_reg *dst, GLuint dst_flags, struct brw_reg *arg ) { struct brw_compile *p = &c->func; GLuint msgLength; /* Shadow ignored for txb. */ switch (inst->tex_idx) { case TEXTURE_1D_INDEX: brw_MOV(p, brw_message_reg(2), arg[0]); brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); brw_MOV(p, brw_message_reg(6), brw_imm_f(0)); break; case TEXTURE_2D_INDEX: case TEXTURE_RECT_INDEX: brw_MOV(p, brw_message_reg(2), arg[0]); brw_MOV(p, brw_message_reg(4), arg[1]); brw_MOV(p, brw_message_reg(6), brw_imm_f(0)); break; default: brw_MOV(p, brw_message_reg(2), arg[0]); brw_MOV(p, brw_message_reg(4), arg[1]); brw_MOV(p, brw_message_reg(6), arg[2]); break; } brw_MOV(p, brw_message_reg(8), arg[3]); msgLength = 9; brw_SAMPLE(p, retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW), 1, retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW), inst->tex_unit + MAX_DRAW_BUFFERS, /* surface */ inst->tex_unit, /* sampler */ inst->writemask, BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS, 8, /* responseLength */ msgLength, 0); }
/** * Load a GPR from scratch memory. */ static void emit_unspill( struct brw_wm_compile *c, struct brw_reg reg, GLuint slot ) { struct brw_compile *p = &c->func; /* Slot 0 is the undef value. */ if (slot == 0) { brw_MOV(p, reg, brw_imm_f(0)); return; } /* mov (1) r0.2<1>:d 0x000000c0:d { Align1 NoMask } send (16) r110.0<1>:uw m1 r0.0<8;8,1>:uw 0x041243ff:ud { Align1 } */ brw_oword_block_read(p, vec16(reg), brw_message_reg(1), 2, slot); }
/* Post-fragment-program processing. Send the results to the * framebuffer. */ static void emit_spill( struct brw_wm_compile *c, struct brw_reg reg, GLuint slot ) { struct brw_compile *p = &c->func; /* mov (16) m2.0<1>:ud r2.0<8;8,1>:ud { Align1 Compr } */ brw_MOV(p, brw_message_reg(2), reg); /* mov (1) r0.2<1>:d 0x00000080:d { Align1 NoMask } send (16) null.0<1>:uw m1 r0.0<8;8,1>:uw 0x053003ff:ud { Align1 } */ brw_dp_WRITE_16(p, retype(vec16(brw_vec8_grf(0, 0)), BRW_REGISTER_TYPE_UW), 1, slot); }
static void wm_src_sample_argb(struct brw_compile *p) { static const uint32_t fragment[][4] = { #include "exa_wm_src_affine.g6b" #include "exa_wm_src_sample_argb.g6b" #include "exa_wm_write.g6b" }; int n; brw_push_insn_state(p); brw_set_mask_control(p, BRW_MASK_DISABLE); brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_MOV(p, retype(brw_vec1_grf(0,2), BRW_REGISTER_TYPE_UD), brw_imm_ud(0)); brw_pop_insn_state(p); brw_SAMPLE(p, retype(vec16(brw_vec8_grf(14, 0)), BRW_REGISTER_TYPE_UW), 1, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD), 1, 0, WRITEMASK_XYZW, GEN5_SAMPLER_MESSAGE_SAMPLE, 8, 5, true, BRW_SAMPLER_SIMD_MODE_SIMD16); for (n = 0; n < p->nr_insn; n++) { brw_disasm(stdout, &p->store[n], 60); } printf("\n\n"); for (n = 0; n < ARRAY_SIZE(fragment); n++) { brw_disasm(stdout, (const struct brw_instruction *)&fragment[n][0], 60); } }
void brw_blorp_eu_emitter::emit_scattered_read(const struct brw_reg &dst, enum opcode opcode, const struct brw_reg &src0, unsigned msg_reg_nr, unsigned msg_length, int dispatch_width, bool use_header) { assert(opcode == SHADER_OPCODE_DWORD_SCATTERED_READ || (brw_ctx->gen >= 7 && opcode == SHADER_OPCODE_BYTE_SCATTERED_READ)); fs_inst *inst = new (mem_ctx) fs_inst(opcode); switch (dispatch_width) { case 1: default: inst->dst = vec1(dst); break; case 2: inst->dst = vec2(dst); break; case 4: inst->dst = vec4(dst); break; case 8: inst->dst = vec8(dst); break; case 16: inst->dst = vec16(dst); break; } inst->src[0] = src0; inst->base_mrf = msg_reg_nr; inst->mlen = msg_length; inst->header_present = use_header; inst->target = BRW_BLORP_TEXTURE_BINDING_TABLE_INDEX; insts.push_tail(inst); }
static void emit_unspill( struct brw_wm_compile *c, struct brw_reg reg, GLuint slot ) { struct brw_compile *p = &c->func; /* Slot 0 is the undef value. */ if (slot == 0) { brw_MOV(p, reg, brw_imm_f(0)); return; } /* mov (1) r0.2<1>:d 0x000000c0:d { Align1 NoMask } send (16) r110.0<1>:uw m1 r0.0<8;8,1>:uw 0x041243ff:ud { Align1 } */ brw_dp_READ_16(p, retype(vec16(reg), BRW_REGISTER_TYPE_UW), 1, slot); }
void emit_txb(struct brw_wm_compile *c, struct brw_reg *dst, GLuint dst_flags, struct brw_reg *arg, struct brw_reg depth_payload, GLuint tex_idx, GLuint sampler) { struct brw_compile *p = &c->func; struct intel_context *intel = &p->brw->intel; GLuint msgLength; GLuint msg_type; GLuint mrf_per_channel; GLuint response_length; struct brw_reg dst_retyped; /* The G45 and older chipsets don't support 8-wide dispatch for LOD biased * samples, so we'll use the 16-wide instruction, leave the second halves * undefined, and trust the execution mask to keep the undefined pixels * from mattering. */ if (c->dispatch_width == 16 || intel->gen < 5) { if (intel->gen >= 5) msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS; else msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS; mrf_per_channel = 2; dst_retyped = retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW); response_length = 8; } else { msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS; mrf_per_channel = 1; dst_retyped = retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW); response_length = 4; } /* Shadow ignored for txb. */ switch (tex_idx) { case TEXTURE_1D_INDEX: brw_MOV(p, brw_message_reg(2 + 0 * mrf_per_channel), arg[0]); brw_MOV(p, brw_message_reg(2 + 1 * mrf_per_channel), brw_imm_f(0)); brw_MOV(p, brw_message_reg(2 + 2 * mrf_per_channel), brw_imm_f(0)); break; case TEXTURE_2D_INDEX: case TEXTURE_RECT_INDEX: brw_MOV(p, brw_message_reg(2 + 0 * mrf_per_channel), arg[0]); brw_MOV(p, brw_message_reg(2 + 1 * mrf_per_channel), arg[1]); brw_MOV(p, brw_message_reg(2 + 2 * mrf_per_channel), brw_imm_f(0)); break; case TEXTURE_3D_INDEX: case TEXTURE_CUBE_INDEX: brw_MOV(p, brw_message_reg(2 + 0 * mrf_per_channel), arg[0]); brw_MOV(p, brw_message_reg(2 + 1 * mrf_per_channel), arg[1]); brw_MOV(p, brw_message_reg(2 + 2 * mrf_per_channel), arg[2]); break; default: /* unexpected target */ abort(); } brw_MOV(p, brw_message_reg(2 + 3 * mrf_per_channel), arg[3]); msgLength = 2 + 4 * mrf_per_channel - 1; brw_SAMPLE(p, dst_retyped, 1, retype(depth_payload, BRW_REGISTER_TYPE_UW), SURF_INDEX_TEXTURE(sampler), sampler, dst_flags & WRITEMASK_XYZW, msg_type, response_length, msgLength, 1, BRW_SAMPLER_SIMD_MODE_SIMD16, BRW_SAMPLER_RETURN_FORMAT_FLOAT32); }
void emit_tex(struct brw_wm_compile *c, struct brw_reg *dst, GLuint dst_flags, struct brw_reg *arg, struct brw_reg depth_payload, GLuint tex_idx, GLuint sampler, bool shadow) { struct brw_compile *p = &c->func; struct intel_context *intel = &p->brw->intel; struct brw_reg dst_retyped; GLuint cur_mrf = 2, response_length; GLuint i, nr_texcoords; GLuint emit; GLuint msg_type; GLuint mrf_per_channel; GLuint simd_mode; if (c->dispatch_width == 16) { mrf_per_channel = 2; response_length = 8; dst_retyped = retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW); simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16; } else { mrf_per_channel = 1; response_length = 4; dst_retyped = retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW); simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8; } /* How many input regs are there? */ switch (tex_idx) { case TEXTURE_1D_INDEX: emit = WRITEMASK_X; nr_texcoords = 1; break; case TEXTURE_2D_INDEX: case TEXTURE_1D_ARRAY_INDEX: case TEXTURE_RECT_INDEX: emit = WRITEMASK_XY; nr_texcoords = 2; break; case TEXTURE_3D_INDEX: case TEXTURE_2D_ARRAY_INDEX: case TEXTURE_CUBE_INDEX: emit = WRITEMASK_XYZ; nr_texcoords = 3; break; default: /* unexpected target */ abort(); } /* Pre-Ironlake, the 8-wide sampler always took u,v,r. */ if (intel->gen < 5 && c->dispatch_width == 8) nr_texcoords = 3; if (shadow) { if (intel->gen < 7) { /* For shadow comparisons, we have to supply u,v,r. */ nr_texcoords = 3; } else { /* On Ivybridge, the shadow comparitor comes first. Just load it. */ brw_MOV(p, brw_message_reg(cur_mrf), arg[2]); cur_mrf += mrf_per_channel; } } /* Emit the texcoords. */ for (i = 0; i < nr_texcoords; i++) { if (c->key.tex.gl_clamp_mask[i] & (1 << sampler)) brw_set_saturate(p, true); if (emit & (1<<i)) brw_MOV(p, brw_message_reg(cur_mrf), arg[i]); else brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0)); cur_mrf += mrf_per_channel; brw_set_saturate(p, false); } /* Fill in the shadow comparison reference value. */ if (shadow && intel->gen < 7) { if (intel->gen >= 5) { /* Fill in the cube map array index value. */ brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0)); cur_mrf += mrf_per_channel; } else if (c->dispatch_width == 8) { /* Fill in the LOD bias value. */ brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0)); cur_mrf += mrf_per_channel; } brw_MOV(p, brw_message_reg(cur_mrf), arg[2]); cur_mrf += mrf_per_channel; } if (intel->gen >= 5) { if (shadow) msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE; else msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE; } else { /* Note that G45 and older determines shadow compare and dispatch width * from message length for most messages. */ if (c->dispatch_width == 16 && shadow) msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE; else msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE; } brw_SAMPLE(p, dst_retyped, 1, retype(depth_payload, BRW_REGISTER_TYPE_UW), SURF_INDEX_TEXTURE(sampler), sampler, dst_flags & WRITEMASK_XYZW, msg_type, response_length, cur_mrf - 1, 1, simd_mode, BRW_SAMPLER_RETURN_FORMAT_FLOAT32); }
const GLuint * brw_blorp_const_color_program::compile(struct brw_context *brw, GLuint *program_size) { /* Set up prog_data */ memset(&prog_data, 0, sizeof(prog_data)); prog_data.persample_msaa_dispatch = false; alloc_regs(); brw_set_compression_control(&func, BRW_COMPRESSION_NONE); struct brw_reg mrf_rt_write = retype(vec16(brw_message_reg(base_mrf)), BRW_REGISTER_TYPE_F); uint32_t mlen, msg_type; if (key->use_simd16_replicated_data) { /* The message payload is a single register with the low 4 floats/ints * filled with the constant clear color. */ brw_set_mask_control(&func, BRW_MASK_DISABLE); brw_MOV(&func, vec4(brw_message_reg(base_mrf)), clear_rgba); brw_set_mask_control(&func, BRW_MASK_ENABLE); msg_type = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED; mlen = 1; } else { for (int i = 0; i < 4; i++) { /* The message payload is pairs of registers for 16 pixels each of r, * g, b, and a. */ brw_set_compression_control(&func, BRW_COMPRESSION_COMPRESSED); brw_MOV(&func, brw_message_reg(base_mrf + i * 2), brw_vec1_grf(clear_rgba.nr, i)); brw_set_compression_control(&func, BRW_COMPRESSION_NONE); } msg_type = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE; mlen = 8; } /* Now write to the render target and terminate the thread */ brw_fb_WRITE(&func, 16 /* dispatch_width */, base_mrf /* msg_reg_nr */, mrf_rt_write /* src0 */, msg_type, BRW_BLORP_RENDERBUFFER_BINDING_TABLE_INDEX, mlen, 0 /* response_length */, true /* eot */, false /* header present */); if (unlikely(INTEL_DEBUG & DEBUG_BLORP)) { fprintf(stderr, "Native code for BLORP clear:\n"); brw_dump_compile(&func, stderr, 0, func.next_insn_offset); fprintf(stderr, "\n"); } return brw_get_program(&func, program_size); }
static void emit_tex( struct brw_wm_compile *c, const struct brw_wm_instruction *inst, struct brw_reg *dst, GLuint dst_flags, struct brw_reg *arg ) { struct brw_compile *p = &c->func; GLuint msgLength, responseLength; GLboolean shadow = (c->key.shadowtex_mask & (1<<inst->tex_unit)) ? 1 : 0; GLuint i, nr; GLuint emit; /* How many input regs are there? */ switch (inst->tex_idx) { case TEXTURE_1D_INDEX: emit = WRITEMASK_X; nr = 1; break; case TEXTURE_2D_INDEX: case TEXTURE_RECT_INDEX: emit = WRITEMASK_XY; nr = 2; break; default: emit = WRITEMASK_XYZ; nr = 3; break; } if (shadow) { nr = 4; emit |= WRITEMASK_W; } msgLength = 1; for (i = 0; i < nr; i++) { static const GLuint swz[4] = {0,1,2,2}; if (emit & (1<<i)) brw_MOV(p, brw_message_reg(msgLength+1), arg[swz[i]]); else brw_MOV(p, brw_message_reg(msgLength+1), brw_imm_f(0)); msgLength += 2; } responseLength = 8; /* always */ brw_SAMPLE(p, retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW), 1, retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW), inst->tex_unit + MAX_DRAW_BUFFERS, /* surface */ inst->tex_unit, /* sampler */ inst->writemask, (shadow ? BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE : BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE), responseLength, msgLength, 0); }