void vec4_generator::generate_math2_gen7(vec4_instruction *inst, struct brw_reg dst, struct brw_reg src0, struct brw_reg src1) { brw_math2(p, dst, brw_math_function(inst->opcode), src0, src1); }
void vec4_generator::generate_math2_gen6(vec4_instruction *inst, struct brw_reg dst, struct brw_reg src0, struct brw_reg src1) { /* Can't do writemask because math can't be align16. */ assert(dst.dw1.bits.writemask == WRITEMASK_XYZW); /* Source swizzles are ignored. */ check_gen6_math_src_arg(src0); check_gen6_math_src_arg(src1); brw_set_access_mode(p, BRW_ALIGN_1); brw_math2(p, dst, brw_math_function(inst->opcode), src0, src1); brw_set_access_mode(p, BRW_ALIGN_16); }
void emit_math2(struct brw_wm_compile *c, GLuint function, const struct brw_reg *dst, GLuint mask, const struct brw_reg *arg0, const struct brw_reg *arg1) { struct brw_compile *p = &c->func; struct intel_context *intel = &p->brw->intel; int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; if (!(mask & WRITEMASK_XYZW)) return; /* Do not emit dead code */ assert(is_power_of_two(mask & WRITEMASK_XYZW)); brw_push_insn_state(p); /* math can only operate on up to a vec8 at a time, so in * dispatch_width==16 we have to do the second half manually. */ if (intel->gen >= 6) { struct brw_reg src0 = arg0[0]; struct brw_reg src1 = arg1[0]; struct brw_reg temp_dst = dst[dst_chan]; if (arg0[0].hstride == BRW_HORIZONTAL_STRIDE_0) { brw_MOV(p, temp_dst, src0); src0 = temp_dst; } if (arg1[0].hstride == BRW_HORIZONTAL_STRIDE_0) { /* This is a heinous hack to get a temporary register for use * in case both arg0 and arg1 are constants. Why you're * doing exponentiation on constant values in the shader, we * don't know. * * max_wm_grf is almost surely less than the maximum GRF, and * gen6 doesn't care about the number of GRFs used in a * shader like pre-gen6 did. */ struct brw_reg temp = brw_vec8_grf(c->max_wm_grf, 0); brw_MOV(p, temp, src1); src1 = temp; } brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_math2(p, temp_dst, function, src0, src1); if (c->dispatch_width == 16) { brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); brw_math2(p, sechalf(temp_dst), function, sechalf(src0), sechalf(src1)); } } else { GLuint saturate = ((mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE); brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_MOV(p, brw_message_reg(3), arg1[0]); if (c->dispatch_width == 16) { brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); brw_MOV(p, brw_message_reg(5), sechalf(arg1[0])); } brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_math(p, dst[dst_chan], function, saturate, 2, arg0[0], BRW_MATH_DATA_VECTOR, BRW_MATH_PRECISION_FULL); /* Send two messages to perform all 16 operations: */ if (c->dispatch_width == 16) { brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); brw_math(p, offset(dst[dst_chan],1), function, saturate, 4, sechalf(arg0[0]), BRW_MATH_DATA_VECTOR, BRW_MATH_PRECISION_FULL); } } brw_pop_insn_state(p); }