/** * Read float[4] constant(s) from VS constant buffer. * For relative addressing, two float[4] constants will be read into 'dest'. * Otherwise, one float[4] constant will be read into the lower half of 'dest'. */ void brw_dp_READ_4_vs(struct brw_compile *p, struct brw_reg dest, GLuint oword, GLboolean relAddr, struct brw_reg addrReg, GLuint location, GLuint bind_table_index) { GLuint msg_reg_nr = 1; assert(oword < 2); /* printf("vs const read msg, location %u, msg_reg_nr %d\n", location, msg_reg_nr); */ /* Setup MRF[1] with location/offset into const buffer */ { struct brw_reg b; brw_push_insn_state(p); brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_set_mask_control(p, BRW_MASK_DISABLE); brw_set_predicate_control(p, BRW_PREDICATE_NONE); /*brw_set_access_mode(p, BRW_ALIGN_16);*/ /* XXX I think we're setting all the dwords of MRF[1] to 'location'. * when the docs say only dword[2] should be set. Hmmm. But it works. */ b = brw_message_reg(msg_reg_nr); b = retype(b, BRW_REGISTER_TYPE_UD); /*b = get_element_ud(b, 2);*/ if (relAddr) { brw_ADD(p, b, addrReg, brw_imm_ud(location)); } else { brw_MOV(p, b, brw_imm_ud(location)); } brw_pop_insn_state(p); } { struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); insn->header.predicate_control = BRW_PREDICATE_NONE; insn->header.compression_control = BRW_COMPRESSION_NONE; insn->header.destreg__conditionalmod = msg_reg_nr; insn->header.mask_control = BRW_MASK_DISABLE; /*insn->header.access_mode = BRW_ALIGN_16;*/ brw_set_dest(insn, dest); brw_set_src0(insn, brw_null_reg()); brw_set_dp_read_message(p->brw, insn, bind_table_index, oword, /* 0 = lower Oword, 1 = upper Oword */ BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ 0, /* source cache = data cache */ 1, /* msg_length */ 1, /* response_length (1 Oword) */ 0); /* eot */ } }
/** * Texture sample instruction. * Note: the msg_type plus msg_length values determine exactly what kind * of sampling operation is performed. See volume 4, page 161 of docs. */ void brw_SAMPLE(struct brw_compile *p, struct brw_reg dest, GLuint msg_reg_nr, struct brw_reg src0, GLuint binding_table_index, GLuint sampler, GLuint writemask, GLuint msg_type, GLuint response_length, GLuint msg_length, GLboolean eot, GLuint header_present, GLuint simd_mode) { GLboolean need_stall = 0; if (writemask == 0) { /*_mesa_printf("%s: zero writemask??\n", __FUNCTION__); */ return; } /* Hardware doesn't do destination dependency checking on send * instructions properly. Add a workaround which generates the * dependency by other means. In practice it seems like this bug * only crops up for texture samples, and only where registers are * written by the send and then written again later without being * read in between. Luckily for us, we already track that * information and use it to modify the writemask for the * instruction, so that is a guide for whether a workaround is * needed. */ if (writemask != WRITEMASK_XYZW) { GLuint dst_offset = 0; GLuint i, newmask = 0, len = 0; for (i = 0; i < 4; i++) { if (writemask & (1<<i)) break; dst_offset += 2; } for (; i < 4; i++) { if (!(writemask & (1<<i))) break; newmask |= 1<<i; len++; } if (newmask != writemask) { need_stall = 1; /* _mesa_printf("need stall %x %x\n", newmask , writemask); */ } else { struct brw_reg m1 = brw_message_reg(msg_reg_nr); newmask = ~newmask & WRITEMASK_XYZW; brw_push_insn_state(p); brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_set_mask_control(p, BRW_MASK_DISABLE); brw_MOV(p, m1, brw_vec8_grf(0,0)); brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12)); brw_pop_insn_state(p); src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW); dest = offset(dest, dst_offset); response_length = len * 2; } } { struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); insn->header.predicate_control = 0; /* XXX */ insn->header.compression_control = BRW_COMPRESSION_NONE; insn->header.destreg__conditionalmod = msg_reg_nr; brw_set_dest(insn, dest); brw_set_src0(insn, src0); brw_set_sampler_message(p->brw, insn, binding_table_index, sampler, msg_type, response_length, msg_length, eot, header_present, simd_mode); } if (need_stall) { struct brw_reg reg = vec8(offset(dest, response_length-1)); /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 } */ brw_push_insn_state(p); brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_MOV(p, reg, reg); brw_pop_insn_state(p); } }
const GLuint * brw_blorp_const_color_program::compile(struct brw_context *brw, GLuint *program_size) { /* Set up prog_data */ memset(&prog_data, 0, sizeof(prog_data)); prog_data.persample_msaa_dispatch = false; alloc_regs(); brw_set_compression_control(&func, BRW_COMPRESSION_NONE); struct brw_reg mrf_rt_write = retype(vec16(brw_message_reg(base_mrf)), BRW_REGISTER_TYPE_F); uint32_t mlen, msg_type; if (key->use_simd16_replicated_data) { /* The message payload is a single register with the low 4 floats/ints * filled with the constant clear color. */ brw_set_mask_control(&func, BRW_MASK_DISABLE); brw_MOV(&func, vec4(brw_message_reg(base_mrf)), clear_rgba); brw_set_mask_control(&func, BRW_MASK_ENABLE); msg_type = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED; mlen = 1; } else { for (int i = 0; i < 4; i++) { /* The message payload is pairs of registers for 16 pixels each of r, * g, b, and a. */ brw_set_compression_control(&func, BRW_COMPRESSION_COMPRESSED); brw_MOV(&func, brw_message_reg(base_mrf + i * 2), brw_vec1_grf(clear_rgba.nr, i)); brw_set_compression_control(&func, BRW_COMPRESSION_NONE); } msg_type = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE; mlen = 8; } /* Now write to the render target and terminate the thread */ brw_fb_WRITE(&func, 16 /* dispatch_width */, base_mrf /* msg_reg_nr */, mrf_rt_write /* src0 */, msg_type, BRW_BLORP_RENDERBUFFER_BINDING_TABLE_INDEX, mlen, 0 /* response_length */, true /* eot */, false /* header present */); if (unlikely(INTEL_DEBUG & DEBUG_BLORP)) { printf("Native code for BLORP clear:\n"); brw_dump_compile(&func, stdout, 0, func.next_insn_offset); printf("\n"); } return brw_get_program(&func, program_size); }
static void compile_gs_prog( struct brw_context *brw, struct brw_gs_prog_key *key ) { struct intel_context *intel = &brw->intel; struct brw_gs_compile c; const GLuint *program; void *mem_ctx; GLuint program_size; memset(&c, 0, sizeof(c)); c.key = *key; /* The geometry shader needs to access the entire VUE. */ brw_compute_vue_map(&c.vue_map, intel, c.key.userclip_active, c.key.attrs); c.nr_regs = (c.vue_map.num_slots + 1)/2; mem_ctx = NULL; /* Begin the compilation: */ brw_init_compile(brw, &c.func, mem_ctx); c.func.single_program_flow = 1; /* For some reason the thread is spawned with only 4 channels * unmasked. */ brw_set_mask_control(&c.func, BRW_MASK_DISABLE); if (intel->gen >= 6) { unsigned num_verts; bool check_edge_flag; /* On Sandybridge, we use the GS for implementing transform feedback * (called "Stream Out" in the PRM). */ switch (key->primitive) { case _3DPRIM_POINTLIST: num_verts = 1; check_edge_flag = false; break; case _3DPRIM_LINELIST: case _3DPRIM_LINESTRIP: case _3DPRIM_LINELOOP: num_verts = 2; check_edge_flag = false; break; case _3DPRIM_TRILIST: case _3DPRIM_TRIFAN: case _3DPRIM_TRISTRIP: case _3DPRIM_RECTLIST: num_verts = 3; check_edge_flag = false; break; case _3DPRIM_QUADLIST: case _3DPRIM_QUADSTRIP: case _3DPRIM_POLYGON: num_verts = 3; check_edge_flag = true; break; default: assert(!"Unexpected primitive type in Gen6 SOL program."); return; } gen6_sol_program(&c, key, num_verts, check_edge_flag); } else { /* On Gen4-5, we use the GS to decompose certain types of primitives. * Note that primitives which don't require a GS program have already * been weeded out by now. */ switch (key->primitive) { case _3DPRIM_QUADLIST: brw_gs_quads( &c, key ); break; case _3DPRIM_QUADSTRIP: brw_gs_quad_strip( &c, key ); break; case _3DPRIM_LINELOOP: brw_gs_lines( &c ); break; default: ralloc_free(mem_ctx); return; } } /* get the program */ program = brw_get_program(&c.func, &program_size); if (unlikely(INTEL_DEBUG & DEBUG_GS)) { int i; printf("gs:\n"); for (i = 0; i < program_size / sizeof(struct brw_instruction); i++) brw_disasm(stdout, &((struct brw_instruction *)program)[i], intel->gen); printf("\n"); } brw_upload_cache(&brw->cache, BRW_GS_PROG, &c.key, sizeof(c.key), program, program_size, &c.prog_data, sizeof(c.prog_data), &brw->gs.prog_offset, &brw->gs.prog_data); ralloc_free(mem_ctx); }
void vec4_generator::generate_gs_set_channel_masks(struct brw_reg dst, struct brw_reg src) { /* From p21 of volume 4 part 2 of the Ivy Bridge PRM (2.4.3.1 Message * Header: M0.5): * * 15 Vertex 1 DATA [3] / Vertex 0 DATA[7] Channel Mask * * When Swizzle Control = URB_INTERLEAVED this bit controls Vertex 1 * DATA[3], when Swizzle Control = URB_NOSWIZZLE this bit controls * Vertex 0 DATA[7]. This bit is ANDed with the corresponding * channel enable to determine the final channel enable. For the * URB_READ_OWORD & URB_READ_HWORD messages, when final channel * enable is 1 it indicates that Vertex 1 DATA [3] will be included * in the writeback message. For the URB_WRITE_OWORD & * URB_WRITE_HWORD messages, when final channel enable is 1 it * indicates that Vertex 1 DATA [3] will be written to the surface. * * 0: Vertex 1 DATA [3] / Vertex 0 DATA[7] channel not included * 1: Vertex DATA [3] / Vertex 0 DATA[7] channel included * * 14 Vertex 1 DATA [2] Channel Mask * 13 Vertex 1 DATA [1] Channel Mask * 12 Vertex 1 DATA [0] Channel Mask * 11 Vertex 0 DATA [3] Channel Mask * 10 Vertex 0 DATA [2] Channel Mask * 9 Vertex 0 DATA [1] Channel Mask * 8 Vertex 0 DATA [0] Channel Mask * * (This is from a section of the PRM that is agnostic to the particular * type of shader being executed, so "Vertex 0" and "Vertex 1" refer to * geometry shader invocations 0 and 1, respectively). Since we have the * enable flags for geometry shader invocation 0 in bits 3:0 of DWORD 0, * and the enable flags for geometry shader invocation 1 in bits 7:0 of * DWORD 4, we just need to OR them together and store the result in bits * 15:8 of DWORD 5. * * It's easier to get the EU to do this if we think of the src and dst * registers as composed of 32 bytes each; then, we want to pick up the * contents of bytes 0 and 16 from src, OR them together, and store them in * byte 21. * * We can do that by the following EU instruction: * * or(1) dst.21<1>UB src<0,1,0>UB src.16<0,1,0>UB { align1 WE_all } * * Note: this relies on the source register having zeros in (a) bits 7:4 of * DWORD 0 and (b) bits 3:0 of DWORD 4. We can rely on (b) because the * source register was prepared by GS_OPCODE_PREPARE_CHANNEL_MASKS (which * shifts DWORD 4 left by 4 bits), and we can rely on (a) because prior to * the execution of GS_OPCODE_PREPARE_CHANNEL_MASKS, DWORDs 0 and 4 need to * contain valid channel mask values (which are in the range 0x0-0xf). */ dst = retype(dst, BRW_REGISTER_TYPE_UB); src = retype(src, BRW_REGISTER_TYPE_UB); brw_push_insn_state(p); brw_set_access_mode(p, BRW_ALIGN_1); brw_set_mask_control(p, BRW_MASK_DISABLE); brw_OR(p, suboffset(vec1(dst), 21), vec1(src), suboffset(vec1(src), 16)); brw_pop_insn_state(p); }
void vec4_generator::generate_tex(vec4_instruction *inst, struct brw_reg dst, struct brw_reg src) { int msg_type = -1; if (brw->gen >= 5) { switch (inst->opcode) { case SHADER_OPCODE_TEX: case SHADER_OPCODE_TXL: if (inst->shadow_compare) { msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE; } else { msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD; } break; case SHADER_OPCODE_TXD: if (inst->shadow_compare) { /* Gen7.5+. Otherwise, lowered by brw_lower_texture_gradients(). */ assert(brw->is_haswell); msg_type = HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE; } else { msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS; } break; case SHADER_OPCODE_TXF: msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD; break; case SHADER_OPCODE_TXF_CMS: if (brw->gen >= 7) msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS; else msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD; break; case SHADER_OPCODE_TXF_MCS: assert(brw->gen >= 7); msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD_MCS; break; case SHADER_OPCODE_TXS: msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO; break; case SHADER_OPCODE_TG4: if (inst->shadow_compare) { msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_C; } else { msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4; } break; case SHADER_OPCODE_TG4_OFFSET: if (inst->shadow_compare) { msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_C; } else { msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO; } break; default: assert(!"should not get here: invalid vec4 texture opcode"); break; } } else { switch (inst->opcode) { case SHADER_OPCODE_TEX: case SHADER_OPCODE_TXL: if (inst->shadow_compare) { msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD_COMPARE; assert(inst->mlen == 3); } else { msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD; assert(inst->mlen == 2); } break; case SHADER_OPCODE_TXD: /* There is no sample_d_c message; comparisons are done manually. */ msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS; assert(inst->mlen == 4); break; case SHADER_OPCODE_TXF: msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_LD; assert(inst->mlen == 2); break; case SHADER_OPCODE_TXS: msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_RESINFO; assert(inst->mlen == 2); break; default: assert(!"should not get here: invalid vec4 texture opcode"); break; } } assert(msg_type != -1); /* Load the message header if present. If there's a texture offset, we need * to set it up explicitly and load the offset bitfield. Otherwise, we can * use an implied move from g0 to the first message register. */ if (inst->header_present) { if (brw->gen < 6 && !inst->texture_offset) { /* Set up an implied move from g0 to the MRF. */ src = brw_vec8_grf(0, 0); } else { struct brw_reg header = retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_UD); /* Explicitly set up the message header by copying g0 to the MRF. */ brw_push_insn_state(p); brw_set_mask_control(p, BRW_MASK_DISABLE); brw_MOV(p, header, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); brw_set_access_mode(p, BRW_ALIGN_1); if (inst->texture_offset) { /* Set the texel offset bits in DWord 2. */ brw_MOV(p, get_element_ud(header, 2), brw_imm_ud(inst->texture_offset)); } if (inst->sampler >= 16) { /* The "Sampler Index" field can only store values between 0 and 15. * However, we can add an offset to the "Sampler State Pointer" * field, effectively selecting a different set of 16 samplers. * * The "Sampler State Pointer" needs to be aligned to a 32-byte * offset, and each sampler state is only 16-bytes, so we can't * exclusively use the offset - we have to use both. */ assert(brw->is_haswell); /* field only exists on Haswell */ brw_ADD(p, get_element_ud(header, 3), get_element_ud(brw_vec8_grf(0, 0), 3), brw_imm_ud(16 * (inst->sampler / 16) * sizeof(gen7_sampler_state))); } brw_pop_insn_state(p); } } uint32_t return_format; switch (dst.type) { case BRW_REGISTER_TYPE_D: return_format = BRW_SAMPLER_RETURN_FORMAT_SINT32; break; case BRW_REGISTER_TYPE_UD: return_format = BRW_SAMPLER_RETURN_FORMAT_UINT32; break; default: return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32; break; } uint32_t surface_index = ((inst->opcode == SHADER_OPCODE_TG4 || inst->opcode == SHADER_OPCODE_TG4_OFFSET) ? prog_data->base.binding_table.gather_texture_start : prog_data->base.binding_table.texture_start) + inst->sampler; brw_SAMPLE(p, dst, inst->base_mrf, src, surface_index, inst->sampler % 16, msg_type, 1, /* response length */ inst->mlen, inst->header_present, BRW_SAMPLER_SIMD_MODE_SIMD4X2, return_format); mark_surface_used(surface_index); }
void vec4_generator::generate_code(exec_list *instructions) { int last_native_insn_offset = 0; const char *last_annotation_string = NULL; const void *last_annotation_ir = NULL; if (unlikely(debug_flag)) { if (shader_prog) { printf("Native code for vertex shader %d:\n", shader_prog->Name); } else { printf("Native code for vertex program %d:\n", prog->Id); } } foreach_list(node, instructions) { vec4_instruction *inst = (vec4_instruction *)node; struct brw_reg src[3], dst; if (unlikely(debug_flag)) { if (last_annotation_ir != inst->ir) { last_annotation_ir = inst->ir; if (last_annotation_ir) { printf(" "); if (shader_prog) { ((ir_instruction *) last_annotation_ir)->print(); } else { const prog_instruction *vpi; vpi = (const prog_instruction *) inst->ir; printf("%d: ", (int)(vpi - prog->Instructions)); _mesa_fprint_instruction_opt(stdout, vpi, 0, PROG_PRINT_DEBUG, NULL); } printf("\n"); } } if (last_annotation_string != inst->annotation) { last_annotation_string = inst->annotation; if (last_annotation_string) printf(" %s\n", last_annotation_string); } } for (unsigned int i = 0; i < 3; i++) { src[i] = inst->get_src(this->prog_data, i); } dst = inst->get_dst(); brw_set_conditionalmod(p, inst->conditional_mod); brw_set_predicate_control(p, inst->predicate); brw_set_predicate_inverse(p, inst->predicate_inverse); brw_set_saturate(p, inst->saturate); brw_set_mask_control(p, inst->force_writemask_all); unsigned pre_emit_nr_insn = p->nr_insn; generate_vec4_instruction(inst, dst, src); if (inst->no_dd_clear || inst->no_dd_check) { assert(p->nr_insn == pre_emit_nr_insn + 1 || !"no_dd_check or no_dd_clear set for IR emitting more " "than 1 instruction"); struct brw_instruction *last = &p->store[pre_emit_nr_insn]; if (inst->no_dd_clear) last->header.dependency_control |= BRW_DEPENDENCY_NOTCLEARED; if (inst->no_dd_check) last->header.dependency_control |= BRW_DEPENDENCY_NOTCHECKED; } if (unlikely(debug_flag)) { brw_dump_compile(p, stdout, last_native_insn_offset, p->next_insn_offset); } last_native_insn_offset = p->next_insn_offset; }
static void compile_clip_prog( struct brw_context *brw, struct brw_clip_prog_key *key ) { struct intel_context *intel = &brw->intel; struct brw_clip_compile c; const GLuint *program; GLuint program_size; GLuint delta; GLuint i; GLuint header_regs; memset(&c, 0, sizeof(c)); /* Begin the compilation: */ brw_init_compile(brw, &c.func); c.func.single_program_flow = 1; c.key = *key; /* Need to locate the two positions present in vertex + header. * These are currently hardcoded: */ c.header_position_offset = ATTR_SIZE; if (intel->gen == 5) header_regs = 3; else header_regs = 1; delta = header_regs * REG_SIZE; for (i = 0; i < VERT_RESULT_MAX; i++) { if (c.key.attrs & BITFIELD64_BIT(i)) { c.offset[i] = delta; delta += ATTR_SIZE; c.idx_to_attr[c.nr_attrs] = i; c.nr_attrs++; } } /* The vertex attributes start at a URB row-aligned offset after * the 8-20 dword vertex header, and continue for a URB row-aligned * length. nr_regs determines the urb_read_length from the start * of the header to the end of the vertex data. */ c.nr_regs = header_regs + (c.nr_attrs + 1) / 2; c.nr_bytes = c.nr_regs * REG_SIZE; c.prog_data.clip_mode = c.key.clip_mode; /* XXX */ /* For some reason the thread is spawned with only 4 channels * unmasked. */ brw_set_mask_control(&c.func, BRW_MASK_DISABLE); /* Would ideally have the option of producing a program which could * do all three: */ switch (key->primitive) { case GL_TRIANGLES: if (key->do_unfilled) brw_emit_unfilled_clip( &c ); else brw_emit_tri_clip( &c ); break; case GL_LINES: brw_emit_line_clip( &c ); break; case GL_POINTS: brw_emit_point_clip( &c ); break; default: assert(0); return; } /* get the program */ program = brw_get_program(&c.func, &program_size); if (INTEL_DEBUG & DEBUG_CLIP) { printf("clip:\n"); for (i = 0; i < program_size / sizeof(struct brw_instruction); i++) brw_disasm(stdout, &((struct brw_instruction *)program)[i], intel->gen); printf("\n"); } /* Upload */ drm_intel_bo_unreference(brw->clip.prog_bo); brw->clip.prog_bo = brw_upload_cache_with_auxdata(&brw->cache, BRW_CLIP_PROG, &c.key, sizeof(c.key), NULL, 0, program, program_size, &c.prog_data, sizeof(c.prog_data), &brw->clip.prog_data); }
static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) { #define MAX_IFSN 32 #define MAX_LOOP_DEPTH 32 struct brw_instruction *if_inst[MAX_IFSN], *loop_inst[MAX_LOOP_DEPTH]; struct brw_instruction *inst0, *inst1; int i, if_insn = 0, loop_insn = 0; struct brw_compile *p = &c->func; struct brw_indirect stack_index = brw_indirect(0, 0); c->reg_index = 0; prealloc_reg(c); brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack)); for (i = 0; i < c->nr_fp_insns; i++) { struct prog_instruction *inst = &c->prog_instructions[i]; struct prog_instruction *orig_inst; if ((orig_inst = inst->Data) != 0) orig_inst->Data = current_insn(p); if (inst->CondUpdate) brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); else brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE); switch (inst->Opcode) { case WM_PIXELXY: emit_pixel_xy(c, inst); break; case WM_DELTAXY: emit_delta_xy(c, inst); break; case WM_PIXELW: emit_pixel_w(c, inst); break; case WM_LINTERP: emit_linterp(c, inst); break; case WM_PINTERP: emit_pinterp(c, inst); break; case WM_CINTERP: emit_cinterp(c, inst); break; case WM_WPOSXY: emit_wpos_xy(c, inst); break; case WM_FB_WRITE: emit_fb_write(c, inst); break; case OPCODE_ABS: emit_abs(c, inst); break; case OPCODE_ADD: emit_add(c, inst); break; case OPCODE_SUB: emit_sub(c, inst); break; case OPCODE_FRC: emit_frc(c, inst); break; case OPCODE_FLR: emit_flr(c, inst); break; case OPCODE_LRP: emit_lrp(c, inst); break; case OPCODE_INT: emit_int(c, inst); break; case OPCODE_MOV: emit_mov(c, inst); break; case OPCODE_DP3: emit_dp3(c, inst); break; case OPCODE_DP4: emit_dp4(c, inst); break; case OPCODE_XPD: emit_xpd(c, inst); break; case OPCODE_DPH: emit_dph(c, inst); break; case OPCODE_RCP: emit_rcp(c, inst); break; case OPCODE_RSQ: emit_rsq(c, inst); break; case OPCODE_SIN: emit_sin(c, inst); break; case OPCODE_COS: emit_cos(c, inst); break; case OPCODE_EX2: emit_ex2(c, inst); break; case OPCODE_LG2: emit_lg2(c, inst); break; case OPCODE_MAX: emit_max(c, inst); break; case OPCODE_MIN: emit_min(c, inst); break; case OPCODE_DDX: emit_ddx(c, inst); break; case OPCODE_DDY: emit_ddy(c, inst); break; case OPCODE_SLT: emit_slt(c, inst); break; case OPCODE_SLE: emit_sle(c, inst); break; case OPCODE_SGT: emit_sgt(c, inst); break; case OPCODE_SGE: emit_sge(c, inst); break; case OPCODE_SEQ: emit_seq(c, inst); break; case OPCODE_SNE: emit_sne(c, inst); break; case OPCODE_MUL: emit_mul(c, inst); break; case OPCODE_POW: emit_pow(c, inst); break; case OPCODE_MAD: emit_mad(c, inst); break; case OPCODE_TEX: emit_tex(c, inst); break; case OPCODE_TXB: emit_txb(c, inst); break; case OPCODE_KIL_NV: emit_kil(c); break; case OPCODE_IF: assert(if_insn < MAX_IFSN); if_inst[if_insn++] = brw_IF(p, BRW_EXECUTE_8); break; case OPCODE_ELSE: if_inst[if_insn-1] = brw_ELSE(p, if_inst[if_insn-1]); break; case OPCODE_ENDIF: assert(if_insn > 0); brw_ENDIF(p, if_inst[--if_insn]); break; case OPCODE_BGNSUB: case OPCODE_ENDSUB: break; case OPCODE_CAL: brw_push_insn_state(p); brw_set_mask_control(p, BRW_MASK_DISABLE); brw_set_access_mode(p, BRW_ALIGN_1); brw_ADD(p, deref_1ud(stack_index, 0), brw_ip_reg(), brw_imm_d(3*16)); brw_set_access_mode(p, BRW_ALIGN_16); brw_ADD(p, get_addr_reg(stack_index), get_addr_reg(stack_index), brw_imm_d(4)); orig_inst = inst->Data; orig_inst->Data = &p->store[p->nr_insn]; brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); brw_pop_insn_state(p); break; case OPCODE_RET: brw_push_insn_state(p); brw_set_mask_control(p, BRW_MASK_DISABLE); brw_ADD(p, get_addr_reg(stack_index), get_addr_reg(stack_index), brw_imm_d(-4)); brw_set_access_mode(p, BRW_ALIGN_1); brw_MOV(p, brw_ip_reg(), deref_1ud(stack_index, 0)); brw_set_access_mode(p, BRW_ALIGN_16); brw_pop_insn_state(p); break; case OPCODE_BGNLOOP: loop_inst[loop_insn++] = brw_DO(p, BRW_EXECUTE_8); break; case OPCODE_BRK: brw_BREAK(p); brw_set_predicate_control(p, BRW_PREDICATE_NONE); break; case OPCODE_CONT: brw_CONT(p); brw_set_predicate_control(p, BRW_PREDICATE_NONE); break; case OPCODE_ENDLOOP: loop_insn--; inst0 = inst1 = brw_WHILE(p, loop_inst[loop_insn]); /* patch all the BREAK instructions from last BEGINLOOP */ while (inst0 > loop_inst[loop_insn]) { inst0--; if (inst0->header.opcode == BRW_OPCODE_BREAK) { inst0->bits3.if_else.jump_count = inst1 - inst0 + 1; inst0->bits3.if_else.pop_count = 0; } else if (inst0->header.opcode == BRW_OPCODE_CONTINUE) { inst0->bits3.if_else.jump_count = inst1 - inst0; inst0->bits3.if_else.pop_count = 0; } } break; default: _mesa_printf("unsupported IR in fragment shader %d\n", inst->Opcode); } if (inst->CondUpdate) brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); else brw_set_predicate_control(p, BRW_PREDICATE_NONE); } post_wm_emit(c); for (i = 0; i < c->fp->program.Base.NumInstructions; i++) c->fp->program.Base.Instructions[i].Data = NULL; }