void emit_math1(struct brw_wm_compile *c, GLuint function, const struct brw_reg *dst, GLuint mask, const struct brw_reg *arg0) { struct brw_compile *p = &c->func; struct intel_context *intel = &p->brw->intel; int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; GLuint saturate = ((mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE); struct brw_reg src; if (!(mask & WRITEMASK_XYZW)) return; /* Do not emit dead code */ assert(is_power_of_two(mask & WRITEMASK_XYZW)); if (intel->gen >= 6 && ((arg0[0].hstride == BRW_HORIZONTAL_STRIDE_0 || arg0[0].file != BRW_GENERAL_REGISTER_FILE) || arg0[0].negate || arg0[0].abs)) { /* Gen6 math requires that source and dst horizontal stride be 1, * and that the argument be in the GRF. * * The hardware ignores source modifiers (negate and abs) on math * instructions, so we also move to a temp to set those up. */ src = dst[dst_chan]; brw_MOV(p, src, arg0[0]); } else { src = arg0[0]; } /* Send two messages to perform all 16 operations: */ brw_push_insn_state(p); brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_math(p, dst[dst_chan], function, saturate, 2, src, BRW_MATH_DATA_VECTOR, BRW_MATH_PRECISION_FULL); if (c->dispatch_width == 16) { brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); brw_math(p, offset(dst[dst_chan],1), function, saturate, 3, sechalf(src), BRW_MATH_DATA_VECTOR, BRW_MATH_PRECISION_FULL); } brw_pop_insn_state(p); }
static void emit_math2( struct brw_compile *p, GLuint function, const struct brw_reg *dst, GLuint mask, const struct brw_reg *arg0, const struct brw_reg *arg1) { if (!(mask & WRITEMASK_XYZW)) return; /* Do not emit dead code*/ assert((mask & WRITEMASK_XYZW) == WRITEMASK_X); brw_push_insn_state(p); brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_MOV(p, brw_message_reg(2), arg0[0]); brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); brw_MOV(p, brw_message_reg(4), sechalf(arg0[0])); brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_MOV(p, brw_message_reg(3), arg1[0]); brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); brw_MOV(p, brw_message_reg(5), sechalf(arg1[0])); /* Send two messages to perform all 16 operations: */ brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_math(p, dst[0], function, (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE, 2, brw_null_reg(), BRW_MATH_DATA_VECTOR, BRW_MATH_PRECISION_FULL); brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); brw_math(p, offset(dst[0],1), function, (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE, 4, brw_null_reg(), BRW_MATH_DATA_VECTOR, BRW_MATH_PRECISION_FULL); brw_pop_insn_state(p); }
void vec4_generator::generate_math2_gen4(vec4_instruction *inst, struct brw_reg dst, struct brw_reg src0, struct brw_reg src1) { /* From the Ironlake PRM, Volume 4, Part 1, Section 6.1.13 * "Message Payload": * * "Operand0[7]. For the INT DIV functions, this operand is the * denominator." * ... * "Operand1[7]. For the INT DIV functions, this operand is the * numerator." */ bool is_int_div = inst->opcode != SHADER_OPCODE_POW; struct brw_reg &op0 = is_int_div ? src1 : src0; struct brw_reg &op1 = is_int_div ? src0 : src1; brw_push_insn_state(p); brw_set_saturate(p, false); brw_set_predicate_control(p, BRW_PREDICATE_NONE); brw_MOV(p, retype(brw_message_reg(inst->base_mrf + 1), op1.type), op1); brw_pop_insn_state(p); brw_math(p, dst, brw_math_function(inst->opcode), inst->base_mrf, op0, BRW_MATH_DATA_VECTOR, BRW_MATH_PRECISION_FULL); }
void emit_pixel_w(struct brw_wm_compile *c, const struct brw_reg *dst, GLuint mask, const struct brw_reg *arg0, const struct brw_reg *deltas) { struct brw_compile *p = &c->func; struct intel_context *intel = &p->brw->intel; struct brw_reg src; struct brw_reg temp_dst; if (intel->gen >= 6) temp_dst = dst[3]; else temp_dst = brw_message_reg(2); assert(intel->gen < 6); /* Don't need this if all you are doing is interpolating color, for * instance. */ if (mask & WRITEMASK_W) { struct brw_reg interp3 = brw_vec1_grf(arg0[0].nr+1, 4); /* Calc 1/w - just linterp wpos[3] optimized by putting the * result straight into a message reg. */ if (can_do_pln(intel, deltas)) { brw_PLN(p, temp_dst, interp3, deltas[0]); } else { brw_LINE(p, brw_null_reg(), interp3, deltas[0]); brw_MAC(p, temp_dst, suboffset(interp3, 1), deltas[1]); } /* Calc w */ if (intel->gen >= 6) src = temp_dst; else src = brw_null_reg(); if (c->dispatch_width == 16) { brw_math_16(p, dst[3], BRW_MATH_FUNCTION_INV, BRW_MATH_SATURATE_NONE, 2, src, BRW_MATH_PRECISION_FULL); } else { brw_math(p, dst[3], BRW_MATH_FUNCTION_INV, BRW_MATH_SATURATE_NONE, 2, src, BRW_MATH_DATA_VECTOR, BRW_MATH_PRECISION_FULL); } } }
void brw_math_invert( struct brw_compile *p, struct brw_reg dst, struct brw_reg src) { brw_math( p, dst, BRW_MATH_FUNCTION_INV, 0, src, BRW_MATH_PRECISION_FULL, BRW_MATH_DATA_VECTOR ); }
void vec4_generator::generate_math1_gen4(vec4_instruction *inst, struct brw_reg dst, struct brw_reg src) { brw_math(p, dst, brw_math_function(inst->opcode), inst->base_mrf, src, BRW_MATH_DATA_VECTOR, BRW_MATH_PRECISION_FULL); }
static void invert_det( struct brw_sf_compile *c) { /* Looks like we invert all 8 elements just to get 1/det in * position 2 !?! */ brw_math(&c->func, c->inv_det, BRW_MATH_FUNCTION_INV, 0, c->det, BRW_MATH_DATA_SCALAR, BRW_MATH_PRECISION_FULL); }
static void emit_math1(struct brw_wm_compile *c, struct prog_instruction *inst, GLuint func) { struct brw_compile *p = &c->func; struct brw_reg src0, dst; src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1); brw_MOV(p, brw_message_reg(2), src0); brw_math(p, dst, func, (inst->SaturateMode != SATURATE_OFF) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE, 2, brw_null_reg(), BRW_MATH_DATA_VECTOR, BRW_MATH_PRECISION_FULL); }
void vec4_generator::generate_math1_gen6(vec4_instruction *inst, struct brw_reg dst, struct brw_reg src) { /* Can't do writemask because math can't be align16. */ assert(dst.dw1.bits.writemask == WRITEMASK_XYZW); check_gen6_math_src_arg(src); brw_set_access_mode(p, BRW_ALIGN_1); brw_math(p, dst, brw_math_function(inst->opcode), inst->base_mrf, src, BRW_MATH_DATA_SCALAR, BRW_MATH_PRECISION_FULL); brw_set_access_mode(p, BRW_ALIGN_16); }
void emit_math2(struct brw_wm_compile *c, GLuint function, const struct brw_reg *dst, GLuint mask, const struct brw_reg *arg0, const struct brw_reg *arg1) { struct brw_compile *p = &c->func; struct intel_context *intel = &p->brw->intel; int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; if (!(mask & WRITEMASK_XYZW)) return; /* Do not emit dead code */ assert(is_power_of_two(mask & WRITEMASK_XYZW)); brw_push_insn_state(p); /* math can only operate on up to a vec8 at a time, so in * dispatch_width==16 we have to do the second half manually. */ if (intel->gen >= 6) { struct brw_reg src0 = arg0[0]; struct brw_reg src1 = arg1[0]; struct brw_reg temp_dst = dst[dst_chan]; if (arg0[0].hstride == BRW_HORIZONTAL_STRIDE_0) { brw_MOV(p, temp_dst, src0); src0 = temp_dst; } if (arg1[0].hstride == BRW_HORIZONTAL_STRIDE_0) { /* This is a heinous hack to get a temporary register for use * in case both arg0 and arg1 are constants. Why you're * doing exponentiation on constant values in the shader, we * don't know. * * max_wm_grf is almost surely less than the maximum GRF, and * gen6 doesn't care about the number of GRFs used in a * shader like pre-gen6 did. */ struct brw_reg temp = brw_vec8_grf(c->max_wm_grf, 0); brw_MOV(p, temp, src1); src1 = temp; } brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_math2(p, temp_dst, function, src0, src1); if (c->dispatch_width == 16) { brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); brw_math2(p, sechalf(temp_dst), function, sechalf(src0), sechalf(src1)); } } else { GLuint saturate = ((mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE); brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_MOV(p, brw_message_reg(3), arg1[0]); if (c->dispatch_width == 16) { brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); brw_MOV(p, brw_message_reg(5), sechalf(arg1[0])); } brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_math(p, dst[dst_chan], function, saturate, 2, arg0[0], BRW_MATH_DATA_VECTOR, BRW_MATH_PRECISION_FULL); /* Send two messages to perform all 16 operations: */ if (c->dispatch_width == 16) { brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); brw_math(p, offset(dst[dst_chan],1), function, saturate, 4, sechalf(arg0[0]), BRW_MATH_DATA_VECTOR, BRW_MATH_PRECISION_FULL); } } brw_pop_insn_state(p); }
void brw_emit_point_sprite_setup( struct brw_sf_compile *c, GLboolean allocate) { struct brw_compile *p = &c->func; GLuint i; c->nr_verts = 1; if (allocate) alloc_regs(c); copy_z_inv_w(c); for (i = 0; i < c->nr_setup_regs; i++) { struct brw_reg a0 = offset(c->vert[0], i); GLushort pc, pc_persp, pc_linear, pc_coord_replace; GLboolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear); pc_coord_replace = calculate_point_sprite_mask(c, i); pc_persp &= ~pc_coord_replace; if (pc_persp) { brw_set_predicate_control_flag_value(p, pc_persp); brw_MUL(p, a0, a0, c->inv_w[0]); } /* Point sprite coordinate replacement: A texcoord with this * enabled gets replaced with the value (x, y, 0, 1) where x and * y vary from 0 to 1 across the horizontal and vertical of the * point. */ if (pc_coord_replace) { brw_set_predicate_control_flag_value(p, pc_coord_replace); /* Caculate 1.0/PointWidth */ brw_math(&c->func, c->tmp, BRW_MATH_FUNCTION_INV, BRW_MATH_SATURATE_NONE, 0, c->dx0, BRW_MATH_DATA_SCALAR, BRW_MATH_PRECISION_FULL); brw_set_access_mode(p, BRW_ALIGN_16); /* dA/dx, dA/dy */ brw_MOV(p, c->m1Cx, brw_imm_f(0.0)); brw_MOV(p, c->m2Cy, brw_imm_f(0.0)); brw_MOV(p, brw_writemask(c->m1Cx, WRITEMASK_X), c->tmp); if (c->key.sprite_origin_lower_left) { brw_MOV(p, brw_writemask(c->m2Cy, WRITEMASK_Y), negate(c->tmp)); } else { brw_MOV(p, brw_writemask(c->m2Cy, WRITEMASK_Y), c->tmp); } /* attribute constant offset */ brw_MOV(p, c->m3C0, brw_imm_f(0.0)); if (c->key.sprite_origin_lower_left) { brw_MOV(p, brw_writemask(c->m3C0, WRITEMASK_YW), brw_imm_f(1.0)); } else { brw_MOV(p, brw_writemask(c->m3C0, WRITEMASK_W), brw_imm_f(1.0)); } brw_set_access_mode(p, BRW_ALIGN_1); } if (pc & ~pc_coord_replace) { brw_set_predicate_control_flag_value(p, pc & ~pc_coord_replace); brw_MOV(p, c->m1Cx, brw_imm_ud(0)); brw_MOV(p, c->m2Cy, brw_imm_ud(0)); brw_MOV(p, c->m3C0, a0); /* constant value */ } brw_set_predicate_control_flag_value(p, pc); /* Copy m0..m3 to URB. */ brw_urb_WRITE(p, brw_null_reg(), 0, brw_vec8_grf(0, 0), 0, /* allocate */ 1, /* used */ 4, /* msg len */ 0, /* response len */ last, /* eot */ last, /* writes complete */ i*4, /* urb destination offset */ BRW_URB_SWIZZLE_TRANSPOSE); } }
void brw_emit_point_sprite_setup( struct brw_sf_compile *c, GLboolean allocate) { struct brw_compile *p = &c->func; GLuint i; c->nr_verts = 1; if (allocate) alloc_regs(c); copy_z_inv_w(c); for (i = 0; i < c->nr_setup_regs; i++) { struct brw_sf_point_tex *tex = &c->point_attrs[c->idx_to_attr[2*i]]; struct brw_reg a0 = offset(c->vert[0], i); GLushort pc, pc_persp, pc_linear; GLboolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear); if (pc_persp) { if (!tex->CoordReplace) { brw_set_predicate_control_flag_value(p, pc_persp); brw_MUL(p, a0, a0, c->inv_w[0]); } } if (tex->CoordReplace) { /* Caculate 1.0/PointWidth */ brw_math(&c->func, c->tmp, BRW_MATH_FUNCTION_INV, BRW_MATH_SATURATE_NONE, 0, c->dx0, BRW_MATH_DATA_SCALAR, BRW_MATH_PRECISION_FULL); if (c->key.SpriteOrigin == GL_LOWER_LEFT) { brw_MUL(p, c->m1Cx, c->tmp, c->inv_w[0]); brw_MOV(p, vec1(suboffset(c->m1Cx, 1)), brw_imm_f(0.0)); brw_MUL(p, c->m2Cy, c->tmp, negate(c->inv_w[0])); brw_MOV(p, vec1(suboffset(c->m2Cy, 0)), brw_imm_f(0.0)); } else { brw_MUL(p, c->m1Cx, c->tmp, c->inv_w[0]); brw_MOV(p, vec1(suboffset(c->m1Cx, 1)), brw_imm_f(0.0)); brw_MUL(p, c->m2Cy, c->tmp, c->inv_w[0]); brw_MOV(p, vec1(suboffset(c->m2Cy, 0)), brw_imm_f(0.0)); } } else { brw_MOV(p, c->m1Cx, brw_imm_ud(0)); brw_MOV(p, c->m2Cy, brw_imm_ud(0)); } { brw_set_predicate_control_flag_value(p, pc); if (tex->CoordReplace) { if (c->key.SpriteOrigin == GL_LOWER_LEFT) { brw_MUL(p, c->m3C0, c->inv_w[0], brw_imm_f(1.0)); brw_MOV(p, vec1(suboffset(c->m3C0, 0)), brw_imm_f(0.0)); } else brw_MOV(p, c->m3C0, brw_imm_f(0.0)); } else { brw_MOV(p, c->m3C0, a0); /* constant value */ } /* Copy m0..m3 to URB. */ brw_urb_WRITE(p, brw_null_reg(), 0, brw_vec8_grf(0, 0), 0, /* allocate */ 1, /* used */ 4, /* msg len */ 0, /* response len */ last, /* eot */ last, /* writes complete */ i*4, /* urb destination offset */ BRW_URB_SWIZZLE_TRANSPOSE); } } }