static void emit_pixel_w( struct brw_compile *p, const struct brw_reg *dst, GLuint mask, const struct brw_reg *arg0, const struct brw_reg *deltas) { /* Don't need this if all you are doing is interpolating color, for * instance. */ if (mask & WRITEMASK_W) { struct brw_reg interp3 = brw_vec1_grf(arg0[0].nr+1, 4); /* Calc 1/w - just linterp wpos[3] optimized by putting the * result straight into a message reg. */ brw_LINE(p, brw_null_reg(), interp3, deltas[0]); brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), deltas[1]); /* Calc w */ brw_math_16( p, dst[3], BRW_MATH_FUNCTION_INV, BRW_MATH_SATURATE_NONE, 2, brw_null_reg(), BRW_MATH_PRECISION_FULL); } }
void emit_linterp(struct brw_compile *p, const struct brw_reg *dst, GLuint mask, const struct brw_reg *arg0, const struct brw_reg *deltas) { struct intel_context *intel = &p->brw->intel; struct brw_reg interp[4]; GLuint nr = arg0[0].nr; GLuint i; interp[0] = brw_vec1_grf(nr, 0); interp[1] = brw_vec1_grf(nr, 4); interp[2] = brw_vec1_grf(nr+1, 0); interp[3] = brw_vec1_grf(nr+1, 4); for (i = 0; i < 4; i++) { if (mask & (1<<i)) { if (intel->gen >= 6) { brw_PLN(p, dst[i], interp[i], brw_vec8_grf(2, 0)); } else if (can_do_pln(intel, deltas)) { brw_PLN(p, dst[i], interp[i], deltas[0]); } else { brw_LINE(p, brw_null_reg(), interp[i], deltas[0]); brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]); } } } }
static void emit_pinterp( struct brw_compile *p, const struct brw_reg *dst, GLuint mask, const struct brw_reg *arg0, const struct brw_reg *deltas, const struct brw_reg *w) { struct brw_reg interp[4]; GLuint nr = arg0[0].nr; GLuint i; interp[0] = brw_vec1_grf(nr, 0); interp[1] = brw_vec1_grf(nr, 4); interp[2] = brw_vec1_grf(nr+1, 0); interp[3] = brw_vec1_grf(nr+1, 4); for(i = 0; i < 4; i++ ) { if (mask & (1<<i)) { brw_LINE(p, brw_null_reg(), interp[i], deltas[0]); brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]); } } for(i = 0; i < 4; i++ ) { if (mask & (1<<i)) { brw_MUL(p, dst[i], dst[i], w[3]); } } }
/* Interpolate between two vertices and put the result into a0.0. * Increment a0.0 accordingly. */ void brw_clip_interp_vertex( struct brw_clip_compile *c, struct brw_indirect dest_ptr, struct brw_indirect v0_ptr, /* from */ struct brw_indirect v1_ptr, /* to */ struct brw_reg t0, GLboolean force_edgeflag) { struct brw_compile *p = &c->func; struct brw_reg tmp = get_tmp(c); GLuint i; /* Just copy the vertex header: */ brw_copy_indirect_to_indirect(p, dest_ptr, v0_ptr, 1); /* Iterate over each attribute (could be done in pairs?) */ for (i = 0; i < c->nr_attrs; i++) { GLuint delta = i*16 + 32; if (delta == c->offset[VERT_RESULT_EDGE]) { if (force_edgeflag) brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(1)); else brw_MOV(p, deref_4f(dest_ptr, delta), deref_4f(v0_ptr, delta)); } else { /* Interpolate: * * New = attr0 + t*attr1 - t*attr0 */ brw_MUL(p, vec4(brw_null_reg()), deref_4f(v1_ptr, delta), t0); brw_MAC(p, tmp, negate(deref_4f(v0_ptr, delta)), t0); brw_ADD(p, deref_4f(dest_ptr, delta), deref_4f(v0_ptr, delta), tmp); } } if (i & 1) { GLuint delta = i*16 + 32; brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(0)); } release_tmp(c, tmp); /* Recreate the projected (NDC) coordinate in the new vertex * header: */ brw_clip_project_vertex(c, dest_ptr ); }
static void emit_pinterp(struct brw_wm_compile *c, struct prog_instruction *inst) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; struct brw_reg interp[4]; struct brw_reg dst, delta0, delta1; struct brw_reg src0, w; src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); delta0 = get_src_reg(c, &inst->SrcReg[1], 0, 1); delta1 = get_src_reg(c, &inst->SrcReg[1], 1, 1); w = get_src_reg(c, &inst->SrcReg[2], 3, 1); GLuint nr = src0.nr; int i; interp[0] = brw_vec1_grf(nr, 0); interp[1] = brw_vec1_grf(nr, 4); interp[2] = brw_vec1_grf(nr+1, 0); interp[3] = brw_vec1_grf(nr+1, 4); for(i = 0; i < 4; i++ ) { if (mask & (1<<i)) { dst = get_dst_reg(c, inst, i, 1); brw_LINE(p, brw_null_reg(), interp[i], delta0); brw_MAC(p, dst, suboffset(interp[i],1), delta1); brw_MUL(p, dst, dst, w); } } }
static void emit_pixel_w( struct brw_wm_compile *c, struct prog_instruction *inst) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; if (mask & WRITEMASK_W) { struct brw_reg dst, src0, delta0, delta1; struct brw_reg interp3; dst = get_dst_reg(c, inst, 3, 1); src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); delta0 = get_src_reg(c, &inst->SrcReg[1], 0, 1); delta1 = get_src_reg(c, &inst->SrcReg[1], 1, 1); interp3 = brw_vec1_grf(src0.nr+1, 4); /* Calc 1/w - just linterp wpos[3] optimized by putting the * result straight into a message reg. */ brw_LINE(p, brw_null_reg(), interp3, delta0); brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), delta1); /* Calc w */ brw_math_16( p, dst, BRW_MATH_FUNCTION_INV, BRW_MATH_SATURATE_NONE, 2, brw_null_reg(), BRW_MATH_PRECISION_FULL); } }
static void emit_dp3( struct brw_compile *p, const struct brw_reg *dst, GLuint mask, const struct brw_reg *arg0, const struct brw_reg *arg1 ) { if (!(mask & WRITEMASK_XYZW)) return; /* Do not emit dead code*/ assert((mask & WRITEMASK_XYZW) == WRITEMASK_X); brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]); brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]); brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); brw_MAC(p, dst[0], arg0[2], arg1[2]); brw_set_saturate(p, 0); }
static void emit_dph(struct brw_wm_compile *c, struct prog_instruction *inst) { struct brw_reg src0[4], src1[4], dst; int i; struct brw_compile *p = &c->func; for (i = 0; i < 4; i++) { src0[i] = get_src_reg(c, &inst->SrcReg[0], i, 1); src1[i] = get_src_reg(c, &inst->SrcReg[1], i, 1); } dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1); brw_MUL(p, brw_null_reg(), src0[0], src1[0]); brw_MAC(p, brw_null_reg(), src0[1], src1[1]); brw_MAC(p, dst, src0[2], src1[2]); brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); brw_ADD(p, dst, src0[3], src1[3]); brw_set_saturate(p, 0); }
void emit_pixel_w(struct brw_wm_compile *c, const struct brw_reg *dst, GLuint mask, const struct brw_reg *arg0, const struct brw_reg *deltas) { struct brw_compile *p = &c->func; struct intel_context *intel = &p->brw->intel; struct brw_reg src; struct brw_reg temp_dst; if (intel->gen >= 6) temp_dst = dst[3]; else temp_dst = brw_message_reg(2); assert(intel->gen < 6); /* Don't need this if all you are doing is interpolating color, for * instance. */ if (mask & WRITEMASK_W) { struct brw_reg interp3 = brw_vec1_grf(arg0[0].nr+1, 4); /* Calc 1/w - just linterp wpos[3] optimized by putting the * result straight into a message reg. */ if (can_do_pln(intel, deltas)) { brw_PLN(p, temp_dst, interp3, deltas[0]); } else { brw_LINE(p, brw_null_reg(), interp3, deltas[0]); brw_MAC(p, temp_dst, suboffset(interp3, 1), deltas[1]); } /* Calc w */ if (intel->gen >= 6) src = temp_dst; else src = brw_null_reg(); if (c->dispatch_width == 16) { brw_math_16(p, dst[3], BRW_MATH_FUNCTION_INV, BRW_MATH_SATURATE_NONE, 2, src, BRW_MATH_PRECISION_FULL); } else { brw_math(p, dst[3], BRW_MATH_FUNCTION_INV, BRW_MATH_SATURATE_NONE, 2, src, BRW_MATH_DATA_VECTOR, BRW_MATH_PRECISION_FULL); } } }
void emit_dp3(struct brw_compile *p, const struct brw_reg *dst, GLuint mask, const struct brw_reg *arg0, const struct brw_reg *arg1) { int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; if (!(mask & WRITEMASK_XYZW)) return; /* Do not emit dead code */ assert(is_power_of_two(mask & WRITEMASK_XYZW)); brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]); brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]); brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); brw_MAC(p, dst[dst_chan], arg0[2], arg1[2]); brw_set_saturate(p, 0); }
static void brw_wm_affine_st(struct brw_compile *p, int dw, int channel, int msg) { int uv; if (dw == 16) { brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); uv = p->gen >= 060 ? 6 : 3; } else { brw_set_compression_control(p, BRW_COMPRESSION_NONE); uv = p->gen >= 060 ? 4 : 3; } uv += 2*channel; msg++; if (p->gen >= 060) { brw_PLN(p, brw_message_reg(msg), brw_vec1_grf(uv, 0), brw_vec8_grf(2, 0)); msg += dw/8; brw_PLN(p, brw_message_reg(msg), brw_vec1_grf(uv, 4), brw_vec8_grf(2, 0)); } else { struct brw_reg r = brw_vec1_grf(uv, 0); brw_LINE(p, brw_null_reg(), __suboffset(r, 0), brw_vec8_grf(X16, 0)); brw_MAC(p, brw_message_reg(msg), __suboffset(r, 1), brw_vec8_grf(Y16, 0)); msg += dw/8; brw_LINE(p, brw_null_reg(), __suboffset(r, 4), brw_vec8_grf(X16, 0)); brw_MAC(p, brw_message_reg(msg), __suboffset(r, 5), brw_vec8_grf(Y16, 0)); } }
/* This is performed against the original triangles, so no indirection * required: BZZZT! */ static void compute_tri_direction( struct brw_clip_compile *c ) { struct brw_compile *p = &c->func; struct brw_reg e = c->reg.tmp0; struct brw_reg f = c->reg.tmp1; GLuint hpos_offset = brw_vert_result_to_offset(&c->vue_map, VARYING_SLOT_POS); struct brw_reg v0 = byte_offset(c->reg.vertex[0], hpos_offset); struct brw_reg v1 = byte_offset(c->reg.vertex[1], hpos_offset); struct brw_reg v2 = byte_offset(c->reg.vertex[2], hpos_offset); struct brw_reg v0n = get_tmp(c); struct brw_reg v1n = get_tmp(c); struct brw_reg v2n = get_tmp(c); /* Convert to NDC. * NOTE: We can't modify the original vertex coordinates, * as it may impact further operations. * So, we have to keep normalized coordinates in temp registers. * * TBD-KC * Try to optimize unnecessary MOV's. */ brw_MOV(p, v0n, v0); brw_MOV(p, v1n, v1); brw_MOV(p, v2n, v2); brw_clip_project_position(c, v0n); brw_clip_project_position(c, v1n); brw_clip_project_position(c, v2n); /* Calculate the vectors of two edges of the triangle: */ brw_ADD(p, e, v0n, negate(v2n)); brw_ADD(p, f, v1n, negate(v2n)); /* Take their crossproduct: */ brw_set_access_mode(p, BRW_ALIGN_16); brw_MUL(p, vec4(brw_null_reg()), brw_swizzle(e, 1,2,0,3), brw_swizzle(f,2,0,1,3)); brw_MAC(p, vec4(e), negate(brw_swizzle(e, 2,0,1,3)), brw_swizzle(f,1,2,0,3)); brw_set_access_mode(p, BRW_ALIGN_1); brw_MUL(p, c->reg.dir, c->reg.dir, vec4(e)); }
void emit_xpd(struct brw_compile *p, const struct brw_reg *dst, GLuint mask, const struct brw_reg *arg0, const struct brw_reg *arg1) { GLuint i; assert((mask & WRITEMASK_W) != WRITEMASK_W); for (i = 0 ; i < 3; i++) { if (mask & (1<<i)) { GLuint i2 = (i+2)%3; GLuint i1 = (i+1)%3; brw_MUL(p, brw_null_reg(), negate(arg0[i2]), arg1[i1]); brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); brw_MAC(p, dst[i], arg0[i1], arg1[i2]); brw_set_saturate(p, 0); } } }
static void emit_xpd(struct brw_wm_compile *c, struct prog_instruction *inst) { int i; struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; for (i = 0; i < 4; i++) { GLuint i2 = (i+2)%3; GLuint i1 = (i+1)%3; if (mask & (1<<i)) { struct brw_reg src0, src1, dst; dst = get_dst_reg(c, inst, i, 1); src0 = negate(get_src_reg(c, &inst->SrcReg[0], i2, 1)); src1 = get_src_reg(c, &inst->SrcReg[1], i1, 1); brw_MUL(p, brw_null_reg(), src0, src1); src0 = get_src_reg(c, &inst->SrcReg[0], i1, 1); src1 = get_src_reg(c, &inst->SrcReg[1], i2, 1); brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF); brw_MAC(p, dst, src0, src1); brw_set_saturate(p, 0); } } brw_set_saturate(p, 0); }
static void emit_lrp(struct brw_wm_compile *c, struct prog_instruction *inst) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; struct brw_reg dst, tmp1, tmp2, src0, src1, src2; int i; for (i = 0; i < 4; i++) { if (mask & (1<<i)) { dst = get_dst_reg(c, inst, i, 1); src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); if (src1.nr == dst.nr) { tmp1 = alloc_tmp(c); brw_MOV(p, tmp1, src1); } else tmp1 = src1; src2 = get_src_reg(c, &inst->SrcReg[2], i, 1); if (src2.nr == dst.nr) { tmp2 = alloc_tmp(c); brw_MOV(p, tmp2, src2); } else tmp2 = src2; brw_ADD(p, dst, negate(src0), brw_imm_f(1.0)); brw_MUL(p, brw_null_reg(), dst, tmp2); brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); brw_MAC(p, dst, src0, tmp1); brw_set_saturate(p, 0); } release_tmps(c); } }
void emit_lrp(struct brw_compile *p, const struct brw_reg *dst, GLuint mask, const struct brw_reg *arg0, const struct brw_reg *arg1, const struct brw_reg *arg2) { GLuint i; /* Uses dst as a temporary: */ for (i = 0; i < 4; i++) { if (mask & (1<<i)) { /* Can I use the LINE instruction for this? */ brw_ADD(p, dst[i], negate(arg0[i]), brw_imm_f(1.0)); brw_MUL(p, brw_null_reg(), dst[i], arg2[i]); brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); brw_MAC(p, dst[i], arg0[i], arg1[i]); brw_set_saturate(p, 0); } } }
/* This is performed against the original triangles, so no indirection * required: BZZZT! */ static void compute_tri_direction( struct brw_clip_compile *c ) { struct brw_compile *p = &c->func; struct brw_reg e = c->reg.tmp0; struct brw_reg f = c->reg.tmp1; struct brw_reg v0 = byte_offset(c->reg.vertex[0], c->offset[VERT_RESULT_HPOS]); struct brw_reg v1 = byte_offset(c->reg.vertex[1], c->offset[VERT_RESULT_HPOS]); struct brw_reg v2 = byte_offset(c->reg.vertex[2], c->offset[VERT_RESULT_HPOS]); /* Calculate the vectors of two edges of the triangle: */ brw_ADD(p, e, v0, negate(v2)); brw_ADD(p, f, v1, negate(v2)); /* Take their crossproduct: */ brw_set_access_mode(p, BRW_ALIGN_16); brw_MUL(p, vec4(brw_null_reg()), brw_swizzle(e, 1,2,0,3), brw_swizzle(f,2,0,1,3)); brw_MAC(p, vec4(e), negate(brw_swizzle(e, 2,0,1,3)), brw_swizzle(f,1,2,0,3)); brw_set_access_mode(p, BRW_ALIGN_1); brw_MUL(p, c->reg.dir, c->reg.dir, vec4(e)); }
/* Interpolate between two vertices and put the result into a0.0. * Increment a0.0 accordingly. */ void brw_clip_interp_vertex( struct brw_clip_compile *c, struct brw_indirect dest_ptr, struct brw_indirect v0_ptr, /* from */ struct brw_indirect v1_ptr, /* to */ struct brw_reg t0, bool force_edgeflag) { struct brw_compile *p = &c->func; struct brw_reg tmp = get_tmp(c); GLuint slot; /* Just copy the vertex header: */ /* * After CLIP stage, only first 256 bits of the VUE are read * back on Ironlake, so needn't change it */ brw_copy_indirect_to_indirect(p, dest_ptr, v0_ptr, 1); /* Iterate over each attribute (could be done in pairs?) */ for (slot = 0; slot < c->vue_map.num_slots; slot++) { int varying = c->vue_map.slot_to_varying[slot]; GLuint delta = brw_vue_slot_to_offset(slot); if (varying == VARYING_SLOT_EDGE) { if (force_edgeflag) brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(1)); else brw_MOV(p, deref_4f(dest_ptr, delta), deref_4f(v0_ptr, delta)); } else if (varying == VARYING_SLOT_PSIZ || varying == VARYING_SLOT_CLIP_DIST0 || varying == VARYING_SLOT_CLIP_DIST1) { /* PSIZ doesn't need interpolation because it isn't used by the * fragment shader. CLIP_DIST0 and CLIP_DIST1 don't need * intepolation because on pre-GEN6, these are just placeholder VUE * slots that don't perform any action. */ } else if (varying < VARYING_SLOT_MAX) { /* This is a true vertex result (and not a special value for the VUE * header), so interpolate: * * New = attr0 + t*attr1 - t*attr0 */ brw_MUL(p, vec4(brw_null_reg()), deref_4f(v1_ptr, delta), t0); brw_MAC(p, tmp, negate(deref_4f(v0_ptr, delta)), t0); brw_ADD(p, deref_4f(dest_ptr, delta), deref_4f(v0_ptr, delta), tmp); } } if (c->vue_map.num_slots % 2) { GLuint delta = brw_vue_slot_to_offset(c->vue_map.num_slots); brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(0)); } release_tmp(c, tmp); /* Recreate the projected (NDC) coordinate in the new vertex * header: */ brw_clip_project_vertex(c, dest_ptr ); }
/* Interpolate between two vertices and put the result into a0.0. * Increment a0.0 accordingly. * * Beware that dest_ptr can be equal to v0_ptr! */ void brw_clip_interp_vertex( struct brw_clip_compile *c, struct brw_indirect dest_ptr, struct brw_indirect v0_ptr, /* from */ struct brw_indirect v1_ptr, /* to */ struct brw_reg t0, bool force_edgeflag) { struct brw_codegen *p = &c->func; struct brw_reg t_nopersp, v0_ndc_copy; GLuint slot; /* Just copy the vertex header: */ /* * After CLIP stage, only first 256 bits of the VUE are read * back on Ironlake, so needn't change it */ brw_copy_indirect_to_indirect(p, dest_ptr, v0_ptr, 1); /* First handle the 3D and NDC interpolation, in case we * need noperspective interpolation. Doing it early has no * performance impact in any case. */ /* Take a copy of the v0 NDC coordinates, in case dest == v0. */ if (c->has_noperspective_shading) { GLuint offset = brw_varying_to_offset(&c->vue_map, BRW_VARYING_SLOT_NDC); v0_ndc_copy = get_tmp(c); brw_MOV(p, v0_ndc_copy, deref_4f(v0_ptr, offset)); } /* Compute the new 3D position * * dest_hpos = v0_hpos * (1 - t0) + v1_hpos * t0 */ { GLuint delta = brw_varying_to_offset(&c->vue_map, VARYING_SLOT_POS); struct brw_reg tmp = get_tmp(c); brw_MUL(p, vec4(brw_null_reg()), deref_4f(v1_ptr, delta), t0); brw_MAC(p, tmp, negate(deref_4f(v0_ptr, delta)), t0); brw_ADD(p, deref_4f(dest_ptr, delta), deref_4f(v0_ptr, delta), tmp); release_tmp(c, tmp); } /* Recreate the projected (NDC) coordinate in the new vertex header */ brw_clip_project_vertex(c, dest_ptr); /* If we have noperspective attributes, * we need to compute the screen-space t */ if (c->has_noperspective_shading) { GLuint delta = brw_varying_to_offset(&c->vue_map, BRW_VARYING_SLOT_NDC); struct brw_reg tmp = get_tmp(c); t_nopersp = get_tmp(c); /* t_nopersp = vec4(v1.xy, dest.xy) */ brw_MOV(p, t_nopersp, deref_4f(v1_ptr, delta)); brw_MOV(p, tmp, deref_4f(dest_ptr, delta)); brw_set_default_access_mode(p, BRW_ALIGN_16); brw_MOV(p, brw_writemask(t_nopersp, WRITEMASK_ZW), brw_swizzle(tmp, 0, 1, 0, 1)); /* t_nopersp = vec4(v1.xy, dest.xy) - v0.xyxy */ brw_ADD(p, t_nopersp, t_nopersp, negate(brw_swizzle(v0_ndc_copy, 0, 1, 0, 1))); /* Add the absolute values of the X and Y deltas so that if * the points aren't in the same place on the screen we get * nonzero values to divide. * * After that, we have vert1 - vert0 in t_nopersp.x and * vertnew - vert0 in t_nopersp.y * * t_nopersp = vec2(|v1.x -v0.x| + |v1.y -v0.y|, * |dest.x-v0.x| + |dest.y-v0.y|) */ brw_ADD(p, brw_writemask(t_nopersp, WRITEMASK_XY), brw_abs(brw_swizzle(t_nopersp, 0, 2, 0, 0)), brw_abs(brw_swizzle(t_nopersp, 1, 3, 0, 0))); brw_set_default_access_mode(p, BRW_ALIGN_1); /* If the points are in the same place, just substitute a * value to avoid divide-by-zero */ brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_EQ, vec1(t_nopersp), brw_imm_f(0)); brw_IF(p, BRW_EXECUTE_1); brw_MOV(p, t_nopersp, brw_imm_vf4(brw_float_to_vf(1.0), brw_float_to_vf(0.0), brw_float_to_vf(0.0), brw_float_to_vf(0.0))); brw_ENDIF(p); /* Now compute t_nopersp = t_nopersp.y/t_nopersp.x and broadcast it. */ brw_math_invert(p, get_element(t_nopersp, 0), get_element(t_nopersp, 0)); brw_MUL(p, vec1(t_nopersp), vec1(t_nopersp), vec1(suboffset(t_nopersp, 1))); brw_set_default_access_mode(p, BRW_ALIGN_16); brw_MOV(p, t_nopersp, brw_swizzle(t_nopersp, 0, 0, 0, 0)); brw_set_default_access_mode(p, BRW_ALIGN_1); release_tmp(c, tmp); release_tmp(c, v0_ndc_copy); } /* Now we can iterate over each attribute * (could be done in pairs?) */ for (slot = 0; slot < c->vue_map.num_slots; slot++) { int varying = c->vue_map.slot_to_varying[slot]; GLuint delta = brw_vue_slot_to_offset(slot); /* HPOS, NDC already handled above */ if (varying == VARYING_SLOT_POS || varying == BRW_VARYING_SLOT_NDC) continue; if (varying == VARYING_SLOT_EDGE) { if (force_edgeflag) brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(1)); else brw_MOV(p, deref_4f(dest_ptr, delta), deref_4f(v0_ptr, delta)); } else if (varying == VARYING_SLOT_PSIZ) { /* PSIZ doesn't need interpolation because it isn't used by the * fragment shader. */ } else if (varying < VARYING_SLOT_MAX) { /* This is a true vertex result (and not a special value for the VUE * header), so interpolate: * * New = attr0 + t*attr1 - t*attr0 * * Unless the attribute is flat shaded -- in which case just copy * from one of the sources (doesn't matter which; already copied from pv) */ GLuint interp = c->key.interpolation_mode.mode[slot]; if (interp != INTERP_QUALIFIER_FLAT) { struct brw_reg tmp = get_tmp(c); struct brw_reg t = interp == INTERP_QUALIFIER_NOPERSPECTIVE ? t_nopersp : t0; brw_MUL(p, vec4(brw_null_reg()), deref_4f(v1_ptr, delta), t); brw_MAC(p, tmp, negate(deref_4f(v0_ptr, delta)), t); brw_ADD(p, deref_4f(dest_ptr, delta), deref_4f(v0_ptr, delta), tmp); release_tmp(c, tmp); } else { brw_MOV(p, deref_4f(dest_ptr, delta), deref_4f(v0_ptr, delta)); } } } if (c->vue_map.num_slots % 2) { GLuint delta = brw_vue_slot_to_offset(c->vue_map.num_slots); brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(0)); } if (c->has_noperspective_shading) release_tmp(c, t_nopersp); }
static void brw_wm_projective_st(struct brw_compile *p, int dw, int channel, int msg) { int uv; if (dw == 16) { brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); uv = p->gen >= 060 ? 6 : 3; } else { brw_set_compression_control(p, BRW_COMPRESSION_NONE); uv = p->gen >= 060 ? 4 : 3; } uv += 2*channel; msg++; if (p->gen >= 060) { /* First compute 1/z */ brw_PLN(p, brw_vec8_grf(30, 0), brw_vec1_grf(uv+1, 0), brw_vec8_grf(2, 0)); if (dw == 16) { brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_math_invert(p, brw_vec8_grf(30, 0), brw_vec8_grf(30, 0)); brw_math_invert(p, brw_vec8_grf(31, 0), brw_vec8_grf(31, 0)); brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); } else brw_math_invert(p, brw_vec8_grf(30, 0), brw_vec8_grf(30, 0)); brw_PLN(p, brw_vec8_grf(26, 0), brw_vec1_grf(uv, 0), brw_vec8_grf(2, 0)); brw_PLN(p, brw_vec8_grf(28, 0), brw_vec1_grf(uv, 4), brw_vec8_grf(2, 0)); brw_MUL(p, brw_message_reg(msg), brw_vec8_grf(26, 0), brw_vec8_grf(30, 0)); brw_MUL(p, brw_message_reg(msg + dw/8), brw_vec8_grf(28, 0), brw_vec8_grf(30, 0)); } else { struct brw_reg r = brw_vec1_grf(uv, 0); /* First compute 1/z */ brw_LINE(p, brw_null_reg(), brw_vec1_grf(uv+1, 0), brw_vec8_grf(X16, 0)); brw_MAC(p, brw_vec8_grf(30, 0), brw_vec1_grf(uv+1, 1), brw_vec8_grf(Y16, 0)); if (dw == 16) { brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_math_invert(p, brw_vec8_grf(30, 0), brw_vec8_grf(30, 0)); brw_math_invert(p, brw_vec8_grf(31, 0), brw_vec8_grf(31, 0)); brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); } else brw_math_invert(p, brw_vec8_grf(30, 0), brw_vec8_grf(30, 0)); /* Now compute the output s,t values */ brw_LINE(p, brw_null_reg(), __suboffset(r, 0), brw_vec8_grf(X16, 0)); brw_MAC(p, brw_vec8_grf(28, 0), __suboffset(r, 1), brw_vec8_grf(Y16, 0)); brw_MUL(p, brw_message_reg(msg), brw_vec8_grf(28, 0), brw_vec8_grf(30, 0)); msg += dw/8; brw_LINE(p, brw_null_reg(), __suboffset(r, 4), brw_vec8_grf(X16, 0)); brw_MAC(p, brw_vec8_grf(28, 0), __suboffset(r, 5), brw_vec8_grf(Y16, 0)); brw_MUL(p, brw_message_reg(msg), brw_vec8_grf(28, 0), brw_vec8_grf(30, 0)); } }
void brw_emit_tri_setup(struct brw_sf_compile *c, bool allocate) { struct brw_compile *p = &c->func; GLuint i; c->flag_value = 0xff; c->nr_verts = 3; if (allocate) alloc_regs(c); invert_det(c); copy_z_inv_w(c); if (c->key.do_twoside_color) do_twoside_color(c); if (c->has_flat_shading) do_flatshade_triangle(c); for (i = 0; i < c->nr_setup_regs; i++) { /* Pair of incoming attributes: */ struct brw_reg a0 = offset(c->vert[0], i); struct brw_reg a1 = offset(c->vert[1], i); struct brw_reg a2 = offset(c->vert[2], i); GLushort pc, pc_persp, pc_linear; bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear); if (pc_persp) { set_predicate_control_flag_value(p, c, pc_persp); brw_MUL(p, a0, a0, c->inv_w[0]); brw_MUL(p, a1, a1, c->inv_w[1]); brw_MUL(p, a2, a2, c->inv_w[2]); } /* Calculate coefficients for interpolated values: */ if (pc_linear) { set_predicate_control_flag_value(p, c, pc_linear); brw_ADD(p, c->a1_sub_a0, a1, negate(a0)); brw_ADD(p, c->a2_sub_a0, a2, negate(a0)); /* calculate dA/dx */ brw_MUL(p, brw_null_reg(), c->a1_sub_a0, c->dy2); brw_MAC(p, c->tmp, c->a2_sub_a0, negate(c->dy0)); brw_MUL(p, c->m1Cx, c->tmp, c->inv_det); /* calculate dA/dy */ brw_MUL(p, brw_null_reg(), c->a2_sub_a0, c->dx0); brw_MAC(p, c->tmp, c->a1_sub_a0, negate(c->dx2)); brw_MUL(p, c->m2Cy, c->tmp, c->inv_det); } { set_predicate_control_flag_value(p, c, pc); /* start point for interpolation */ brw_MOV(p, c->m3C0, a0); /* Copy m0..m3 to URB. m0 is implicitly copied from r0 in * the send instruction: */ brw_urb_WRITE(p, brw_null_reg(), 0, brw_vec8_grf(0, 0), /* r0, will be copied to m0 */ last ? BRW_URB_WRITE_EOT_COMPLETE : BRW_URB_WRITE_NO_FLAGS, 4, /* msg len */ 0, /* response len */ i*4, /* offset */ BRW_URB_SWIZZLE_TRANSPOSE); /* XXX: Swizzle control "SF to windower" */ } } brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); }
/** * Generate assembly for a Vec4 IR instruction. * * \param instruction The Vec4 IR instruction to generate code for. * \param dst The destination register. * \param src An array of up to three source registers. */ void vec4_generator::generate_vec4_instruction(vec4_instruction *instruction, struct brw_reg dst, struct brw_reg *src) { vec4_instruction *inst = (vec4_instruction *) instruction; if (dst.width == BRW_WIDTH_4) { /* This happens in attribute fixups for "dual instanced" geometry * shaders, since they use attributes that are vec4's. Since the exec * width is only 4, it's essential that the caller set * force_writemask_all in order to make sure the instruction is executed * regardless of which channels are enabled. */ assert(inst->force_writemask_all); /* Fix up any <8;8,1> or <0;4,1> source registers to <4;4,1> to satisfy * the following register region restrictions (from Graphics BSpec: * 3D-Media-GPGPU Engine > EU Overview > Registers and Register Regions * > Register Region Restrictions) * * 1. ExecSize must be greater than or equal to Width. * * 2. If ExecSize = Width and HorzStride != 0, VertStride must be set * to Width * HorzStride." */ for (int i = 0; i < 3; i++) { if (src[i].file == BRW_GENERAL_REGISTER_FILE) src[i] = stride(src[i], 4, 4, 1); } } switch (inst->opcode) { case BRW_OPCODE_MOV: brw_MOV(p, dst, src[0]); break; case BRW_OPCODE_ADD: brw_ADD(p, dst, src[0], src[1]); break; case BRW_OPCODE_MUL: brw_MUL(p, dst, src[0], src[1]); break; case BRW_OPCODE_MACH: brw_MACH(p, dst, src[0], src[1]); break; case BRW_OPCODE_MAD: assert(brw->gen >= 6); brw_MAD(p, dst, src[0], src[1], src[2]); break; case BRW_OPCODE_FRC: brw_FRC(p, dst, src[0]); break; case BRW_OPCODE_RNDD: brw_RNDD(p, dst, src[0]); break; case BRW_OPCODE_RNDE: brw_RNDE(p, dst, src[0]); break; case BRW_OPCODE_RNDZ: brw_RNDZ(p, dst, src[0]); break; case BRW_OPCODE_AND: brw_AND(p, dst, src[0], src[1]); break; case BRW_OPCODE_OR: brw_OR(p, dst, src[0], src[1]); break; case BRW_OPCODE_XOR: brw_XOR(p, dst, src[0], src[1]); break; case BRW_OPCODE_NOT: brw_NOT(p, dst, src[0]); break; case BRW_OPCODE_ASR: brw_ASR(p, dst, src[0], src[1]); break; case BRW_OPCODE_SHR: brw_SHR(p, dst, src[0], src[1]); break; case BRW_OPCODE_SHL: brw_SHL(p, dst, src[0], src[1]); break; case BRW_OPCODE_CMP: brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]); break; case BRW_OPCODE_SEL: brw_SEL(p, dst, src[0], src[1]); break; case BRW_OPCODE_DPH: brw_DPH(p, dst, src[0], src[1]); break; case BRW_OPCODE_DP4: brw_DP4(p, dst, src[0], src[1]); break; case BRW_OPCODE_DP3: brw_DP3(p, dst, src[0], src[1]); break; case BRW_OPCODE_DP2: brw_DP2(p, dst, src[0], src[1]); break; case BRW_OPCODE_F32TO16: assert(brw->gen >= 7); brw_F32TO16(p, dst, src[0]); break; case BRW_OPCODE_F16TO32: assert(brw->gen >= 7); brw_F16TO32(p, dst, src[0]); break; case BRW_OPCODE_LRP: assert(brw->gen >= 6); brw_LRP(p, dst, src[0], src[1], src[2]); break; case BRW_OPCODE_BFREV: assert(brw->gen >= 7); /* BFREV only supports UD type for src and dst. */ brw_BFREV(p, retype(dst, BRW_REGISTER_TYPE_UD), retype(src[0], BRW_REGISTER_TYPE_UD)); break; case BRW_OPCODE_FBH: assert(brw->gen >= 7); /* FBH only supports UD type for dst. */ brw_FBH(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]); break; case BRW_OPCODE_FBL: assert(brw->gen >= 7); /* FBL only supports UD type for dst. */ brw_FBL(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]); break; case BRW_OPCODE_CBIT: assert(brw->gen >= 7); /* CBIT only supports UD type for dst. */ brw_CBIT(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]); break; case BRW_OPCODE_ADDC: assert(brw->gen >= 7); brw_ADDC(p, dst, src[0], src[1]); break; case BRW_OPCODE_SUBB: assert(brw->gen >= 7); brw_SUBB(p, dst, src[0], src[1]); break; case BRW_OPCODE_MAC: brw_MAC(p, dst, src[0], src[1]); break; case BRW_OPCODE_BFE: assert(brw->gen >= 7); brw_BFE(p, dst, src[0], src[1], src[2]); break; case BRW_OPCODE_BFI1: assert(brw->gen >= 7); brw_BFI1(p, dst, src[0], src[1]); break; case BRW_OPCODE_BFI2: assert(brw->gen >= 7); brw_BFI2(p, dst, src[0], src[1], src[2]); break; case BRW_OPCODE_IF: if (inst->src[0].file != BAD_FILE) { /* The instruction has an embedded compare (only allowed on gen6) */ assert(brw->gen == 6); gen6_IF(p, inst->conditional_mod, src[0], src[1]); } else { brw_inst *if_inst = brw_IF(p, BRW_EXECUTE_8); brw_inst_set_pred_control(brw, if_inst, inst->predicate); } break; case BRW_OPCODE_ELSE: brw_ELSE(p); break; case BRW_OPCODE_ENDIF: brw_ENDIF(p); break; case BRW_OPCODE_DO: brw_DO(p, BRW_EXECUTE_8); break; case BRW_OPCODE_BREAK: brw_BREAK(p); brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); break; case BRW_OPCODE_CONTINUE: brw_CONT(p); brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); break; case BRW_OPCODE_WHILE: brw_WHILE(p); break; case SHADER_OPCODE_RCP: case SHADER_OPCODE_RSQ: case SHADER_OPCODE_SQRT: case SHADER_OPCODE_EXP2: case SHADER_OPCODE_LOG2: case SHADER_OPCODE_SIN: case SHADER_OPCODE_COS: if (brw->gen >= 7) { gen6_math(p, dst, brw_math_function(inst->opcode), src[0], brw_null_reg()); } else if (brw->gen == 6) { generate_math_gen6(inst, dst, src[0], brw_null_reg()); } else { generate_math1_gen4(inst, dst, src[0]); } break; case SHADER_OPCODE_POW: case SHADER_OPCODE_INT_QUOTIENT: case SHADER_OPCODE_INT_REMAINDER: if (brw->gen >= 7) { gen6_math(p, dst, brw_math_function(inst->opcode), src[0], src[1]); } else if (brw->gen == 6) { generate_math_gen6(inst, dst, src[0], src[1]); } else { generate_math2_gen4(inst, dst, src[0], src[1]); } break; case SHADER_OPCODE_TEX: case SHADER_OPCODE_TXD: case SHADER_OPCODE_TXF: case SHADER_OPCODE_TXF_CMS: case SHADER_OPCODE_TXF_MCS: case SHADER_OPCODE_TXL: case SHADER_OPCODE_TXS: case SHADER_OPCODE_TG4: case SHADER_OPCODE_TG4_OFFSET: generate_tex(inst, dst, src[0], src[1]); break; case VS_OPCODE_URB_WRITE: generate_vs_urb_write(inst); break; case SHADER_OPCODE_GEN4_SCRATCH_READ: generate_scratch_read(inst, dst, src[0]); break; case SHADER_OPCODE_GEN4_SCRATCH_WRITE: generate_scratch_write(inst, dst, src[0], src[1]); break; case VS_OPCODE_PULL_CONSTANT_LOAD: generate_pull_constant_load(inst, dst, src[0], src[1]); break; case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7: generate_pull_constant_load_gen7(inst, dst, src[0], src[1]); break; case GS_OPCODE_URB_WRITE: generate_gs_urb_write(inst); break; case GS_OPCODE_THREAD_END: generate_gs_thread_end(inst); break; case GS_OPCODE_SET_WRITE_OFFSET: generate_gs_set_write_offset(dst, src[0], src[1]); break; case GS_OPCODE_SET_VERTEX_COUNT: generate_gs_set_vertex_count(dst, src[0]); break; case GS_OPCODE_SET_DWORD_2_IMMED: generate_gs_set_dword_2_immed(dst, src[0]); break; case GS_OPCODE_PREPARE_CHANNEL_MASKS: generate_gs_prepare_channel_masks(dst); break; case GS_OPCODE_SET_CHANNEL_MASKS: generate_gs_set_channel_masks(dst, src[0]); break; case GS_OPCODE_GET_INSTANCE_ID: generate_gs_get_instance_id(dst); break; case SHADER_OPCODE_SHADER_TIME_ADD: brw_shader_time_add(p, src[0], prog_data->base.binding_table.shader_time_start); brw_mark_surface_used(&prog_data->base, prog_data->base.binding_table.shader_time_start); break; case SHADER_OPCODE_UNTYPED_ATOMIC: generate_untyped_atomic(inst, dst, src[0], src[1]); break; case SHADER_OPCODE_UNTYPED_SURFACE_READ: generate_untyped_surface_read(inst, dst, src[0]); break; case VS_OPCODE_UNPACK_FLAGS_SIMD4X2: generate_unpack_flags(inst, dst); break; default: if (inst->opcode < (int) ARRAY_SIZE(opcode_descs)) { _mesa_problem(&brw->ctx, "Unsupported opcode in `%s' in vec4\n", opcode_descs[inst->opcode].name); } else { _mesa_problem(&brw->ctx, "Unsupported opcode %d in vec4", inst->opcode); } abort(); } }