static void emit_min(struct brw_wm_compile *c, struct prog_instruction *inst) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; struct brw_reg src0, src1, dst; int i; brw_push_insn_state(p); for (i = 0; i < 4; i++) { if (mask & (1<<i)) { dst = get_dst_reg(c, inst, i, 1); src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); brw_MOV(p, dst, src0); brw_set_saturate(p, 0); brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, src1, src0); brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); brw_MOV(p, dst, src1); brw_set_saturate(p, 0); brw_set_predicate_control_flag_value(p, 0xff); } } brw_pop_insn_state(p); }
static void emit_lit( struct brw_compile *p, const struct brw_reg *dst, GLuint mask, const struct brw_reg *arg0 ) { assert((mask & WRITEMASK_XW) == 0); if (mask & WRITEMASK_Y) { brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); brw_MOV(p, dst[1], arg0[0]); brw_set_saturate(p, 0); } if (mask & WRITEMASK_Z) { emit_math2(p, BRW_MATH_FUNCTION_POW, &dst[2], WRITEMASK_X | (mask & SATURATE), &arg0[1], &arg0[3]); } /* Ordinarily you'd use an iff statement to skip or shortcircuit * some of the POW calculations above, but 16-wide iff statements * seem to lock c1 hardware, so this is a nasty workaround: */ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_LE, arg0[0], brw_imm_f(0)); { if (mask & WRITEMASK_Y) brw_MOV(p, dst[1], brw_imm_f(0)); if (mask & WRITEMASK_Z) brw_MOV(p, dst[2], brw_imm_f(0)); } brw_set_predicate_control(p, BRW_PREDICATE_NONE); }
static void emit_ddy(struct brw_wm_compile *c, struct prog_instruction *inst) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; struct brw_reg interp[4]; struct brw_reg dst; struct brw_reg src0, w; GLuint nr, i; src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); nr = src0.nr; w = get_src_reg(c, &inst->SrcReg[1], 3, 1); interp[0] = brw_vec1_grf(nr, 0); interp[1] = brw_vec1_grf(nr, 4); interp[2] = brw_vec1_grf(nr+1, 0); interp[3] = brw_vec1_grf(nr+1, 4); brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF); for(i = 0; i < 4; i++ ) { if (mask & (1<<i)) { dst = get_dst_reg(c, inst, i, 1); brw_MOV(p, dst, suboffset(interp[i], 1)); brw_MUL(p, dst, dst, w); } } brw_set_saturate(p, 0); }
/* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input * looking like: * * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br * * and we're trying to produce: * * DDX DDY * dst: (ss0.tr - ss0.tl) (ss0.tl - ss0.bl) * (ss0.tr - ss0.tl) (ss0.tr - ss0.br) * (ss0.br - ss0.bl) (ss0.tl - ss0.bl) * (ss0.br - ss0.bl) (ss0.tr - ss0.br) * (ss1.tr - ss1.tl) (ss1.tl - ss1.bl) * (ss1.tr - ss1.tl) (ss1.tr - ss1.br) * (ss1.br - ss1.bl) (ss1.tl - ss1.bl) * (ss1.br - ss1.bl) (ss1.tr - ss1.br) * * and add another set of two more subspans if in 16-pixel dispatch mode. * * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result * for each pair, and vertstride = 2 jumps us 2 elements after processing a * pair. But for DDY, it's harder, as we want to produce the pairs swizzled * between each other. We could probably do it like ddx and swizzle the right * order later, but bail for now and just produce * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4) * * The negate_value boolean is used to negate the d/dy computation for FBOs, * since they place the origin at the upper left instead of the lower left. */ void emit_ddxy(struct brw_compile *p, const struct brw_reg *dst, GLuint mask, bool is_ddx, const struct brw_reg *arg0, bool negate_value) { int i; struct brw_reg src0, src1; if (mask & SATURATE) brw_set_saturate(p, 1); for (i = 0; i < 4; i++ ) { if (mask & (1<<i)) { if (is_ddx) { src0 = brw_reg(arg0[i].file, arg0[i].nr, 1, BRW_REGISTER_TYPE_F, BRW_VERTICAL_STRIDE_2, BRW_WIDTH_2, BRW_HORIZONTAL_STRIDE_0, BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); src1 = brw_reg(arg0[i].file, arg0[i].nr, 0, BRW_REGISTER_TYPE_F, BRW_VERTICAL_STRIDE_2, BRW_WIDTH_2, BRW_HORIZONTAL_STRIDE_0, BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); } else { src0 = brw_reg(arg0[i].file, arg0[i].nr, 0, BRW_REGISTER_TYPE_F, BRW_VERTICAL_STRIDE_4, BRW_WIDTH_4, BRW_HORIZONTAL_STRIDE_0, BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); src1 = brw_reg(arg0[i].file, arg0[i].nr, 2, BRW_REGISTER_TYPE_F, BRW_VERTICAL_STRIDE_4, BRW_WIDTH_4, BRW_HORIZONTAL_STRIDE_0, BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); } if (negate_value) brw_ADD(p, dst[i], src1, negate(src0)); else brw_ADD(p, dst[i], src0, negate(src1)); } } if (mask & SATURATE) brw_set_saturate(p, 0); }
void brw_init_compile(struct brw_context *brw, struct brw_compile *p, void *mem_ctx) { p->brw = brw; /* * Set the initial instruction store array size to 1024, if found that * isn't enough, then it will double the store size at brw_next_insn() * until out of memory. */ p->store_size = 1024; p->store = rzalloc_array(mem_ctx, struct brw_instruction, p->store_size); p->nr_insn = 0; p->current = p->stack; p->compressed = false; memset(p->current, 0, sizeof(p->current[0])); p->mem_ctx = mem_ctx; /* Some defaults? */ brw_set_mask_control(p, BRW_MASK_ENABLE); /* what does this do? */ brw_set_saturate(p, 0); brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_set_predicate_control_flag_value(p, 0xff); /* Set up control flow stack */ p->if_stack_depth = 0; p->if_stack_array_size = 16; p->if_stack = rzalloc_array(mem_ctx, int, p->if_stack_array_size); p->loop_stack_depth = 0; p->loop_stack_array_size = 16; p->loop_stack = rzalloc_array(mem_ctx, int, p->loop_stack_array_size); p->if_depth_in_loop = rzalloc_array(mem_ctx, int, p->loop_stack_array_size); }
static void emit_abs( struct brw_wm_compile *c, struct prog_instruction *inst) { int i; struct brw_compile *p = &c->func; brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF); for (i = 0; i < 4; i++) { if (inst->DstReg.WriteMask & (1<<i)) { struct brw_reg src, dst; dst = get_dst_reg(c, inst, i, 1); src = get_src_reg(c, &inst->SrcReg[0], i, 1); brw_MOV(p, dst, brw_abs(src)); } } brw_set_saturate(p, 0); }
void vec4_generator::generate_math2_gen4(vec4_instruction *inst, struct brw_reg dst, struct brw_reg src0, struct brw_reg src1) { /* From the Ironlake PRM, Volume 4, Part 1, Section 6.1.13 * "Message Payload": * * "Operand0[7]. For the INT DIV functions, this operand is the * denominator." * ... * "Operand1[7]. For the INT DIV functions, this operand is the * numerator." */ bool is_int_div = inst->opcode != SHADER_OPCODE_POW; struct brw_reg &op0 = is_int_div ? src1 : src0; struct brw_reg &op1 = is_int_div ? src0 : src1; brw_push_insn_state(p); brw_set_saturate(p, false); brw_set_predicate_control(p, BRW_PREDICATE_NONE); brw_MOV(p, retype(brw_message_reg(inst->base_mrf + 1), op1.type), op1); brw_pop_insn_state(p); brw_math(p, dst, brw_math_function(inst->opcode), inst->base_mrf, op0, BRW_MATH_DATA_VECTOR, BRW_MATH_PRECISION_FULL); }
static void emit_flr(struct brw_wm_compile *c, struct prog_instruction *inst) { struct brw_compile *p = &c->func; struct brw_reg src0, dst; GLuint mask = inst->DstReg.WriteMask; int i; brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); for (i = 0 ; i < 4; i++) { if (mask & (1<<i)) { dst = get_dst_reg(c, inst, i, 1); src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); brw_RNDD(p, dst, src0); } } brw_set_saturate(p, 0); }
static void emit_dp3( struct brw_compile *p, const struct brw_reg *dst, GLuint mask, const struct brw_reg *arg0, const struct brw_reg *arg1 ) { if (!(mask & WRITEMASK_XYZW)) return; /* Do not emit dead code*/ assert((mask & WRITEMASK_XYZW) == WRITEMASK_X); brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]); brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]); brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); brw_MAC(p, dst[0], arg0[2], arg1[2]); brw_set_saturate(p, 0); }
static void emit_dph(struct brw_wm_compile *c, struct prog_instruction *inst) { struct brw_reg src0[4], src1[4], dst; int i; struct brw_compile *p = &c->func; for (i = 0; i < 4; i++) { src0[i] = get_src_reg(c, &inst->SrcReg[0], i, 1); src1[i] = get_src_reg(c, &inst->SrcReg[1], i, 1); } dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1); brw_MUL(p, brw_null_reg(), src0[0], src1[0]); brw_MAC(p, brw_null_reg(), src0[1], src1[1]); brw_MAC(p, dst, src0[2], src1[2]); brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); brw_ADD(p, dst, src0[3], src1[3]); brw_set_saturate(p, 0); }
void emit_mad(struct brw_compile *p, const struct brw_reg *dst, GLuint mask, const struct brw_reg *arg0, const struct brw_reg *arg1, const struct brw_reg *arg2) { GLuint i; for (i = 0; i < 4; i++) { if (mask & (1<<i)) { brw_MUL(p, dst[i], arg0[i], arg1[i]); brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); brw_ADD(p, dst[i], dst[i], arg2[i]); brw_set_saturate(p, 0); } } }
void emit_min(struct brw_compile *p, const struct brw_reg *dst, GLuint mask, const struct brw_reg *arg0, const struct brw_reg *arg1) { GLuint i; for (i = 0; i < 4; i++) { if (mask & (1<<i)) { brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]); brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); brw_SEL(p, dst[i], arg0[i], arg1[i]); brw_set_saturate(p, 0); brw_set_predicate_control_flag_value(p, 0xff); } } }
void emit_dp2(struct brw_compile *p, const struct brw_reg *dst, GLuint mask, const struct brw_reg *arg0, const struct brw_reg *arg1) { int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; if (!(mask & WRITEMASK_XYZW)) return; /* Do not emit dead code */ assert(is_power_of_two(mask & WRITEMASK_XYZW)); brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]); brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); brw_MAC(p, dst[dst_chan], arg0[1], arg1[1]); brw_set_saturate(p, 0); }
void brw_init_compile( struct brw_compile *p ) { p->nr_insn = 0; p->current = p->stack; memset(p->current, 0, sizeof(p->current[0])); /* Some defaults? */ brw_set_mask_control(p, BRW_MASK_ENABLE); /* what does this do? */ brw_set_saturate(p, 0); brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_set_predicate_control_flag_value(p, 0xff); }
static void emit_mad(struct brw_wm_compile *c, struct prog_instruction *inst) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; struct brw_reg dst, src0, src1, src2; int i; for (i = 0; i < 4; i++) { if (mask & (1<<i)) { dst = get_dst_reg(c, inst, i, 1); src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); src2 = get_src_reg(c, &inst->SrcReg[2], i, 1); brw_MUL(p, dst, src0, src1); brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); brw_ADD(p, dst, dst, src2); brw_set_saturate(p, 0); } } }
void emit_alu1(struct brw_compile *p, struct brw_instruction *(*func)(struct brw_compile *, struct brw_reg, struct brw_reg), const struct brw_reg *dst, GLuint mask, const struct brw_reg *arg0) { GLuint i; if (mask & SATURATE) brw_set_saturate(p, 1); for (i = 0; i < 4; i++) { if (mask & (1<<i)) { func(p, dst[i], arg0[i]); } } if (mask & SATURATE) brw_set_saturate(p, 0); }
void emit_xpd(struct brw_compile *p, const struct brw_reg *dst, GLuint mask, const struct brw_reg *arg0, const struct brw_reg *arg1) { GLuint i; assert((mask & WRITEMASK_W) != WRITEMASK_W); for (i = 0 ; i < 3; i++) { if (mask & (1<<i)) { GLuint i2 = (i+2)%3; GLuint i1 = (i+1)%3; brw_MUL(p, brw_null_reg(), negate(arg0[i2]), arg1[i1]); brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); brw_MAC(p, dst[i], arg0[i1], arg1[i2]); brw_set_saturate(p, 0); } } }
void emit_lrp(struct brw_compile *p, const struct brw_reg *dst, GLuint mask, const struct brw_reg *arg0, const struct brw_reg *arg1, const struct brw_reg *arg2) { GLuint i; /* Uses dst as a temporary: */ for (i = 0; i < 4; i++) { if (mask & (1<<i)) { /* Can I use the LINE instruction for this? */ brw_ADD(p, dst[i], negate(arg0[i]), brw_imm_f(1.0)); brw_MUL(p, brw_null_reg(), dst[i], arg2[i]); brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); brw_MAC(p, dst[i], arg0[i], arg1[i]); brw_set_saturate(p, 0); } } }
static void emit_xpd(struct brw_wm_compile *c, struct prog_instruction *inst) { int i; struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; for (i = 0; i < 4; i++) { GLuint i2 = (i+2)%3; GLuint i1 = (i+1)%3; if (mask & (1<<i)) { struct brw_reg src0, src1, dst; dst = get_dst_reg(c, inst, i, 1); src0 = negate(get_src_reg(c, &inst->SrcReg[0], i2, 1)); src1 = get_src_reg(c, &inst->SrcReg[1], i1, 1); brw_MUL(p, brw_null_reg(), src0, src1); src0 = get_src_reg(c, &inst->SrcReg[0], i1, 1); src1 = get_src_reg(c, &inst->SrcReg[1], i2, 1); brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF); brw_MAC(p, dst, src0, src1); brw_set_saturate(p, 0); } } brw_set_saturate(p, 0); }
static void emit_lrp(struct brw_wm_compile *c, struct prog_instruction *inst) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; struct brw_reg dst, tmp1, tmp2, src0, src1, src2; int i; for (i = 0; i < 4; i++) { if (mask & (1<<i)) { dst = get_dst_reg(c, inst, i, 1); src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); if (src1.nr == dst.nr) { tmp1 = alloc_tmp(c); brw_MOV(p, tmp1, src1); } else tmp1 = src1; src2 = get_src_reg(c, &inst->SrcReg[2], i, 1); if (src2.nr == dst.nr) { tmp2 = alloc_tmp(c); brw_MOV(p, tmp2, src2); } else tmp2 = src2; brw_ADD(p, dst, negate(src0), brw_imm_f(1.0)); brw_MUL(p, brw_null_reg(), dst, tmp2); brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); brw_MAC(p, dst, src0, tmp1); brw_set_saturate(p, 0); } release_tmps(c); } }
void brw_compile_init(struct brw_compile *p, int gen, void *store) { assert(gen); p->gen = gen; p->store = store; p->nr_insn = 0; p->current = p->stack; p->compressed = false; memset(p->current, 0, sizeof(p->current[0])); /* Some defaults? */ brw_set_mask_control(p, BRW_MASK_ENABLE); /* what does this do? */ brw_set_saturate(p, 0); brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_set_predicate_control_flag_value(p, 0xff); p->if_stack_depth = 0; p->if_stack_array_size = 0; p->if_stack = NULL; }
void emit_math2(struct brw_wm_compile *c, GLuint function, const struct brw_reg *dst, GLuint mask, const struct brw_reg *arg0, const struct brw_reg *arg1) { struct brw_compile *p = &c->func; struct intel_context *intel = &p->brw->intel; int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; if (!(mask & WRITEMASK_XYZW)) return; /* Do not emit dead code */ assert(is_power_of_two(mask & WRITEMASK_XYZW)); brw_push_insn_state(p); /* math can only operate on up to a vec8 at a time, so in * dispatch_width==16 we have to do the second half manually. */ if (intel->gen >= 6) { struct brw_reg src0 = arg0[0]; struct brw_reg src1 = arg1[0]; struct brw_reg temp_dst = dst[dst_chan]; if (arg0[0].hstride == BRW_HORIZONTAL_STRIDE_0) { brw_MOV(p, temp_dst, src0); src0 = temp_dst; } if (arg1[0].hstride == BRW_HORIZONTAL_STRIDE_0) { /* This is a heinous hack to get a temporary register for use * in case both arg0 and arg1 are constants. Why you're * doing exponentiation on constant values in the shader, we * don't know. * * max_wm_grf is almost surely less than the maximum GRF, and * gen6 doesn't care about the number of GRFs used in a * shader like pre-gen6 did. */ struct brw_reg temp = brw_vec8_grf(c->max_wm_grf, 0); brw_MOV(p, temp, src1); src1 = temp; } brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_math2(p, temp_dst, function, src0, src1); if (c->dispatch_width == 16) { brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); brw_math2(p, sechalf(temp_dst), function, sechalf(src0), sechalf(src1)); } } else { GLuint saturate = ((mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE); brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_MOV(p, brw_message_reg(3), arg1[0]); if (c->dispatch_width == 16) { brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); brw_MOV(p, brw_message_reg(5), sechalf(arg1[0])); } brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_math(p, dst[dst_chan], function, saturate, 2, arg0[0], BRW_MATH_DATA_VECTOR, BRW_MATH_PRECISION_FULL); /* Send two messages to perform all 16 operations: */ if (c->dispatch_width == 16) { brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); brw_math(p, offset(dst[dst_chan],1), function, saturate, 4, sechalf(arg0[0]), BRW_MATH_DATA_VECTOR, BRW_MATH_PRECISION_FULL); } } brw_pop_insn_state(p); }
void vec4_generator::generate_code(exec_list *instructions) { int last_native_insn_offset = 0; const char *last_annotation_string = NULL; const void *last_annotation_ir = NULL; if (unlikely(INTEL_DEBUG & DEBUG_VS)) { if (shader) { printf("Native code for vertex shader %d:\n", prog->Name); } else { printf("Native code for vertex program %d:\n", c->vp->program.Base.Id); } } foreach_list(node, instructions) { vec4_instruction *inst = (vec4_instruction *)node; struct brw_reg src[3], dst; if (unlikely(INTEL_DEBUG & DEBUG_VS)) { if (last_annotation_ir != inst->ir) { last_annotation_ir = inst->ir; if (last_annotation_ir) { printf(" "); if (shader) { ((ir_instruction *) last_annotation_ir)->print(); } else { const prog_instruction *vpi; vpi = (const prog_instruction *) inst->ir; printf("%d: ", (int)(vpi - vp->Base.Instructions)); _mesa_fprint_instruction_opt(stdout, vpi, 0, PROG_PRINT_DEBUG, NULL); } printf("\n"); } } if (last_annotation_string != inst->annotation) { last_annotation_string = inst->annotation; if (last_annotation_string) printf(" %s\n", last_annotation_string); } } for (unsigned int i = 0; i < 3; i++) { src[i] = inst->get_src(i); } dst = inst->get_dst(); brw_set_conditionalmod(p, inst->conditional_mod); brw_set_predicate_control(p, inst->predicate); brw_set_predicate_inverse(p, inst->predicate_inverse); brw_set_saturate(p, inst->saturate); switch (inst->opcode) { case BRW_OPCODE_MOV: brw_MOV(p, dst, src[0]); break; case BRW_OPCODE_ADD: brw_ADD(p, dst, src[0], src[1]); break; case BRW_OPCODE_MUL: brw_MUL(p, dst, src[0], src[1]); break; case BRW_OPCODE_MACH: brw_set_acc_write_control(p, 1); brw_MACH(p, dst, src[0], src[1]); brw_set_acc_write_control(p, 0); break; case BRW_OPCODE_FRC: brw_FRC(p, dst, src[0]); break; case BRW_OPCODE_RNDD: brw_RNDD(p, dst, src[0]); break; case BRW_OPCODE_RNDE: brw_RNDE(p, dst, src[0]); break; case BRW_OPCODE_RNDZ: brw_RNDZ(p, dst, src[0]); break; case BRW_OPCODE_AND: brw_AND(p, dst, src[0], src[1]); break; case BRW_OPCODE_OR: brw_OR(p, dst, src[0], src[1]); break; case BRW_OPCODE_XOR: brw_XOR(p, dst, src[0], src[1]); break; case BRW_OPCODE_NOT: brw_NOT(p, dst, src[0]); break; case BRW_OPCODE_ASR: brw_ASR(p, dst, src[0], src[1]); break; case BRW_OPCODE_SHR: brw_SHR(p, dst, src[0], src[1]); break; case BRW_OPCODE_SHL: brw_SHL(p, dst, src[0], src[1]); break; case BRW_OPCODE_CMP: brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]); break; case BRW_OPCODE_SEL: brw_SEL(p, dst, src[0], src[1]); break; case BRW_OPCODE_DPH: brw_DPH(p, dst, src[0], src[1]); break; case BRW_OPCODE_DP4: brw_DP4(p, dst, src[0], src[1]); break; case BRW_OPCODE_DP3: brw_DP3(p, dst, src[0], src[1]); break; case BRW_OPCODE_DP2: brw_DP2(p, dst, src[0], src[1]); break; case BRW_OPCODE_IF: if (inst->src[0].file != BAD_FILE) { /* The instruction has an embedded compare (only allowed on gen6) */ assert(intel->gen == 6); gen6_IF(p, inst->conditional_mod, src[0], src[1]); } else { struct brw_instruction *brw_inst = brw_IF(p, BRW_EXECUTE_8); brw_inst->header.predicate_control = inst->predicate; } break; case BRW_OPCODE_ELSE: brw_ELSE(p); break; case BRW_OPCODE_ENDIF: brw_ENDIF(p); break; case BRW_OPCODE_DO: brw_DO(p, BRW_EXECUTE_8); break; case BRW_OPCODE_BREAK: brw_BREAK(p); brw_set_predicate_control(p, BRW_PREDICATE_NONE); break; case BRW_OPCODE_CONTINUE: /* FINISHME: We need to write the loop instruction support still. */ if (intel->gen >= 6) gen6_CONT(p); else brw_CONT(p); brw_set_predicate_control(p, BRW_PREDICATE_NONE); break; case BRW_OPCODE_WHILE: brw_WHILE(p); break; default: generate_vs_instruction(inst, dst, src); break; } if (unlikely(INTEL_DEBUG & DEBUG_VS)) { brw_dump_compile(p, stdout, last_native_insn_offset, p->next_insn_offset); } last_native_insn_offset = p->next_insn_offset; }
/* Post-fragment-program processing. Send the results to the * framebuffer. * \param arg0 the fragment color * \param arg1 the pass-through depth value * \param arg2 the shader-computed depth value */ void emit_fb_write(struct brw_wm_compile *c, struct brw_reg *arg0, struct brw_reg *arg1, struct brw_reg *arg2, GLuint target, GLuint eot) { struct brw_compile *p = &c->func; struct brw_context *brw = p->brw; struct intel_context *intel = &brw->intel; GLuint nr = 2; GLuint channel; /* Reserve a space for AA - may not be needed: */ if (c->aa_dest_stencil_reg) nr += 1; /* I don't really understand how this achieves the color interleave * (ie RGBARGBA) in the result: [Do the saturation here] */ brw_push_insn_state(p); if (c->key.clamp_fragment_color) brw_set_saturate(p, 1); for (channel = 0; channel < 4; channel++) { if (intel->gen >= 6) { /* gen6 SIMD16 single source DP write looks like: * m + 0: r0 * m + 1: r1 * m + 2: g0 * m + 3: g1 * m + 4: b0 * m + 5: b1 * m + 6: a0 * m + 7: a1 */ if (c->dispatch_width == 16) { brw_MOV(p, brw_message_reg(nr + channel * 2), arg0[channel]); } else { brw_MOV(p, brw_message_reg(nr + channel), arg0[channel]); } } else if (c->dispatch_width == 16 && brw->has_compr4) { /* pre-gen6 SIMD16 single source DP write looks like: * m + 0: r0 * m + 1: g0 * m + 2: b0 * m + 3: a0 * m + 4: r1 * m + 5: g1 * m + 6: b1 * m + 7: a1 * * By setting the high bit of the MRF register number, we indicate * that we want COMPR4 mode - instead of doing the usual destination * + 1 for the second half we get destination + 4. */ brw_MOV(p, brw_message_reg(nr + channel + BRW_MRF_COMPR4), arg0[channel]); } else { /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */ /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */ brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_MOV(p, brw_message_reg(nr + channel), arg0[channel]); if (c->dispatch_width == 16) { brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); brw_MOV(p, brw_message_reg(nr + channel + 4), sechalf(arg0[channel])); } } } brw_set_saturate(p, 0); /* skip over the regs populated above: */ if (c->dispatch_width == 16) nr += 8; else nr += 4; brw_pop_insn_state(p); if (c->source_depth_to_render_target) { if (c->computes_depth) brw_MOV(p, brw_message_reg(nr), arg2[2]); else brw_MOV(p, brw_message_reg(nr), arg1[1]); /* ? */ nr += 2; } if (c->dest_depth_reg) { GLuint comp = c->dest_depth_reg / 2; GLuint off = c->dest_depth_reg % 2; if (off != 0) { brw_push_insn_state(p); brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_MOV(p, brw_message_reg(nr), offset(arg1[comp],1)); /* 2nd half? */ brw_MOV(p, brw_message_reg(nr+1), arg1[comp+1]); brw_pop_insn_state(p); } else { brw_MOV(p, brw_message_reg(nr), arg1[comp]); } nr += 2; } if (intel->gen >= 6) { /* Load the message header. There's no implied move from src0 * to the base mrf on gen6. */ brw_push_insn_state(p); brw_set_mask_control(p, BRW_MASK_DISABLE); brw_MOV(p, retype(brw_message_reg(0), BRW_REGISTER_TYPE_UD), retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); brw_pop_insn_state(p); if (target != 0) { brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 0, 2), BRW_REGISTER_TYPE_UD), brw_imm_ud(target)); } } if (!c->runtime_check_aads_emit) { if (c->aa_dest_stencil_reg) emit_aa(c, arg1, 2); fire_fb_write(c, 0, nr, target, eot); } else { struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD)); struct brw_reg ip = brw_ip_reg(); struct brw_instruction *jmp; brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_set_conditionalmod(p, BRW_CONDITIONAL_Z); brw_AND(p, v1_null_ud, get_element_ud(brw_vec8_grf(1,0), 6), brw_imm_ud(1<<26)); jmp = brw_JMPI(p, ip, ip, brw_imm_w(0)); { emit_aa(c, arg1, 2); fire_fb_write(c, 0, nr, target, eot); /* note - thread killed in subroutine */ } brw_land_fwd_jump(p, jmp); /* ELSE: Shuffle up one register to fill in the hole left for AA: */ fire_fb_write(c, 1, nr-1, target, eot); } }
void emit_tex(struct brw_wm_compile *c, struct brw_reg *dst, GLuint dst_flags, struct brw_reg *arg, struct brw_reg depth_payload, GLuint tex_idx, GLuint sampler, bool shadow) { struct brw_compile *p = &c->func; struct intel_context *intel = &p->brw->intel; struct brw_reg dst_retyped; GLuint cur_mrf = 2, response_length; GLuint i, nr_texcoords; GLuint emit; GLuint msg_type; GLuint mrf_per_channel; GLuint simd_mode; if (c->dispatch_width == 16) { mrf_per_channel = 2; response_length = 8; dst_retyped = retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW); simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16; } else { mrf_per_channel = 1; response_length = 4; dst_retyped = retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW); simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8; } /* How many input regs are there? */ switch (tex_idx) { case TEXTURE_1D_INDEX: emit = WRITEMASK_X; nr_texcoords = 1; break; case TEXTURE_2D_INDEX: case TEXTURE_1D_ARRAY_INDEX: case TEXTURE_RECT_INDEX: emit = WRITEMASK_XY; nr_texcoords = 2; break; case TEXTURE_3D_INDEX: case TEXTURE_2D_ARRAY_INDEX: case TEXTURE_CUBE_INDEX: emit = WRITEMASK_XYZ; nr_texcoords = 3; break; default: /* unexpected target */ abort(); } /* Pre-Ironlake, the 8-wide sampler always took u,v,r. */ if (intel->gen < 5 && c->dispatch_width == 8) nr_texcoords = 3; if (shadow) { if (intel->gen < 7) { /* For shadow comparisons, we have to supply u,v,r. */ nr_texcoords = 3; } else { /* On Ivybridge, the shadow comparitor comes first. Just load it. */ brw_MOV(p, brw_message_reg(cur_mrf), arg[2]); cur_mrf += mrf_per_channel; } } /* Emit the texcoords. */ for (i = 0; i < nr_texcoords; i++) { if (c->key.tex.gl_clamp_mask[i] & (1 << sampler)) brw_set_saturate(p, true); if (emit & (1<<i)) brw_MOV(p, brw_message_reg(cur_mrf), arg[i]); else brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0)); cur_mrf += mrf_per_channel; brw_set_saturate(p, false); } /* Fill in the shadow comparison reference value. */ if (shadow && intel->gen < 7) { if (intel->gen >= 5) { /* Fill in the cube map array index value. */ brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0)); cur_mrf += mrf_per_channel; } else if (c->dispatch_width == 8) { /* Fill in the LOD bias value. */ brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0)); cur_mrf += mrf_per_channel; } brw_MOV(p, brw_message_reg(cur_mrf), arg[2]); cur_mrf += mrf_per_channel; } if (intel->gen >= 5) { if (shadow) msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE; else msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE; } else { /* Note that G45 and older determines shadow compare and dispatch width * from message length for most messages. */ if (c->dispatch_width == 16 && shadow) msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE; else msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE; } brw_SAMPLE(p, dst_retyped, 1, retype(depth_payload, BRW_REGISTER_TYPE_UW), SURF_INDEX_TEXTURE(sampler), sampler, dst_flags & WRITEMASK_XYZW, msg_type, response_length, cur_mrf - 1, 1, simd_mode, BRW_SAMPLER_RETURN_FORMAT_FLOAT32); }
void vec4_generator::generate_code(exec_list *instructions) { int last_native_insn_offset = 0; const char *last_annotation_string = NULL; const void *last_annotation_ir = NULL; if (unlikely(debug_flag)) { if (shader_prog) { fprintf(stderr, "Native code for %s vertex shader %d:\n", shader_prog->Label ? shader_prog->Label : "unnamed", shader_prog->Name); } else { fprintf(stderr, "Native code for vertex program %d:\n", prog->Id); } } foreach_list(node, instructions) { vec4_instruction *inst = (vec4_instruction *)node; struct brw_reg src[3], dst; if (unlikely(debug_flag)) { if (last_annotation_ir != inst->ir) { last_annotation_ir = inst->ir; if (last_annotation_ir) { fprintf(stderr, " "); if (shader_prog) { ((ir_instruction *) last_annotation_ir)->fprint(stderr); } else { const prog_instruction *vpi; vpi = (const prog_instruction *) inst->ir; fprintf(stderr, "%d: ", (int)(vpi - prog->Instructions)); _mesa_fprint_instruction_opt(stderr, vpi, 0, PROG_PRINT_DEBUG, NULL); } fprintf(stderr, "\n"); } } if (last_annotation_string != inst->annotation) { last_annotation_string = inst->annotation; if (last_annotation_string) fprintf(stderr, " %s\n", last_annotation_string); } } for (unsigned int i = 0; i < 3; i++) { src[i] = inst->get_src(this->prog_data, i); } dst = inst->get_dst(); brw_set_conditionalmod(p, inst->conditional_mod); brw_set_predicate_control(p, inst->predicate); brw_set_predicate_inverse(p, inst->predicate_inverse); brw_set_saturate(p, inst->saturate); brw_set_mask_control(p, inst->force_writemask_all); unsigned pre_emit_nr_insn = p->nr_insn; generate_vec4_instruction(inst, dst, src); if (inst->no_dd_clear || inst->no_dd_check) { assert(p->nr_insn == pre_emit_nr_insn + 1 || !"no_dd_check or no_dd_clear set for IR emitting more " "than 1 instruction"); struct brw_instruction *last = &p->store[pre_emit_nr_insn]; if (inst->no_dd_clear) last->header.dependency_control |= BRW_DEPENDENCY_NOTCLEARED; if (inst->no_dd_check) last->header.dependency_control |= BRW_DEPENDENCY_NOTCHECKED; } if (unlikely(debug_flag)) { brw_dump_compile(p, stderr, last_native_insn_offset, p->next_insn_offset); } last_native_insn_offset = p->next_insn_offset; }