static void emit_pixel_w( struct brw_wm_compile *c, struct prog_instruction *inst) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; if (mask & WRITEMASK_W) { struct brw_reg dst, src0, delta0, delta1; struct brw_reg interp3; dst = get_dst_reg(c, inst, 3, 1); src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); delta0 = get_src_reg(c, &inst->SrcReg[1], 0, 1); delta1 = get_src_reg(c, &inst->SrcReg[1], 1, 1); interp3 = brw_vec1_grf(src0.nr+1, 4); /* Calc 1/w - just linterp wpos[3] optimized by putting the * result straight into a message reg. */ brw_LINE(p, brw_null_reg(), interp3, delta0); brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), delta1); /* Calc w */ brw_math_16( p, dst, BRW_MATH_FUNCTION_INV, BRW_MATH_SATURATE_NONE, 2, brw_null_reg(), BRW_MATH_PRECISION_FULL); } }
static void brw_wm_xy(struct brw_compile *p, int dw) { struct brw_reg r1 = brw_vec1_grf(1, 0); struct brw_reg r1_uw = __retype_uw(r1); struct brw_reg x_uw, y_uw; brw_set_compression_control(p, BRW_COMPRESSION_NONE); if (dw == 16) { x_uw = brw_uw16_grf(30, 0); y_uw = brw_uw16_grf(28, 0); } else { x_uw = brw_uw8_grf(30, 0); y_uw = brw_uw8_grf(28, 0); } brw_ADD(p, x_uw, __stride(__suboffset(r1_uw, 4), 2, 4, 0), brw_imm_v(0x10101010)); brw_ADD(p, y_uw, __stride(__suboffset(r1_uw, 5), 2, 4, 0), brw_imm_v(0x11001100)); brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); brw_ADD(p, brw_vec8_grf(X16, 0), vec8(x_uw), brw_negate(r1)); brw_ADD(p, brw_vec8_grf(Y16, 0), vec8(y_uw), brw_negate(__suboffset(r1, 1))); }
static void emit_pixel_xy(struct brw_wm_compile *c, struct prog_instruction *inst) { struct brw_reg r1 = brw_vec1_grf(1, 0); struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW); struct brw_reg dst0, dst1; struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; dst0 = get_dst_reg(c, inst, 0, 1); dst1 = get_dst_reg(c, inst, 1, 1); /* Calculate pixel centers by adding 1 or 0 to each of the * micro-tile coordinates passed in r1. */ if (mask & WRITEMASK_X) { brw_ADD(p, vec8(retype(dst0, BRW_REGISTER_TYPE_UW)), stride(suboffset(r1_uw, 4), 2, 4, 0), brw_imm_v(0x10101010)); } if (mask & WRITEMASK_Y) { brw_ADD(p, vec8(retype(dst1, BRW_REGISTER_TYPE_UW)), stride(suboffset(r1_uw, 5), 2, 4, 0), brw_imm_v(0x11001100)); } }
static const struct brw_wm_ref *get_param_ref( struct brw_wm_compile *c, const GLfloat *param_ptr ) { GLuint i = c->prog_data.nr_params++; if (i >= BRW_WM_MAX_PARAM) { printf("%s: out of params\n", __FUNCTION__); c->prog_data.error = 1; return NULL; } else { struct brw_wm_ref *ref = get_ref(c); c->prog_data.param[i] = param_ptr; c->prog_data.param_convert[i] = PARAM_NO_CONVERT; c->nr_creg = (i+16)/16; /* Push the offsets into hw_reg. These will be added to the * real register numbers once one is allocated in pass2. */ ref->hw_reg = brw_vec1_grf((i&8)?1:0, i%8); ref->value = &c->creg[i/16]; ref->insn = 0; ref->prevuse = NULL; return ref; } }
static void emit_pixel_xy(struct brw_compile *p, const struct brw_reg *dst, GLuint mask, const struct brw_reg *arg0) { struct brw_reg r1 = brw_vec1_grf(1, 0); struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW); brw_set_compression_control(p, BRW_COMPRESSION_NONE); /* Calculate pixel centers by adding 1 or 0 to each of the * micro-tile coordinates passed in r1. */ if (mask & WRITEMASK_X) { brw_ADD(p, vec16(retype(dst[0], BRW_REGISTER_TYPE_UW)), stride(suboffset(r1_uw, 4), 2, 4, 0), brw_imm_v(0x10101010)); } if (mask & WRITEMASK_Y) { brw_ADD(p, vec16(retype(dst[1], BRW_REGISTER_TYPE_UW)), stride(suboffset(r1_uw,5), 2, 4, 0), brw_imm_v(0x11001100)); } brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); }
/* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */ void emit_frontfacing(struct brw_compile *p, const struct brw_reg *dst, GLuint mask) { struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD); GLuint i; if (!(mask & WRITEMASK_XYZW)) return; for (i = 0; i < 4; i++) { if (mask & (1<<i)) { brw_MOV(p, dst[i], brw_imm_f(0.0)); } } /* bit 31 is "primitive is back face", so checking < (1 << 31) gives * us front face */ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, r1_6ud, brw_imm_ud(1 << 31)); for (i = 0; i < 4; i++) { if (mask & (1<<i)) { brw_MOV(p, dst[i], brw_imm_f(1.0)); } } brw_set_predicate_control_flag_value(p, 0xff); }
static void emit_delta_xy(struct brw_compile *p, const struct brw_reg *dst, GLuint mask, const struct brw_reg *arg0, const struct brw_reg *arg1) { struct brw_reg r1 = brw_vec1_grf(1, 0); /* Calc delta X,Y by subtracting origin in r1 from the pixel * centers. */ if (mask & WRITEMASK_X) { brw_ADD(p, dst[0], retype(arg0[0], BRW_REGISTER_TYPE_UW), negate(r1)); } if (mask & WRITEMASK_Y) { brw_ADD(p, dst[1], retype(arg0[1], BRW_REGISTER_TYPE_UW), negate(suboffset(r1,1))); } }
static void emit_pixel_w( struct brw_compile *p, const struct brw_reg *dst, GLuint mask, const struct brw_reg *arg0, const struct brw_reg *deltas) { /* Don't need this if all you are doing is interpolating color, for * instance. */ if (mask & WRITEMASK_W) { struct brw_reg interp3 = brw_vec1_grf(arg0[0].nr+1, 4); /* Calc 1/w - just linterp wpos[3] optimized by putting the * result straight into a message reg. */ brw_LINE(p, brw_null_reg(), interp3, deltas[0]); brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), deltas[1]); /* Calc w */ brw_math_16( p, dst[3], BRW_MATH_FUNCTION_INV, BRW_MATH_SATURATE_NONE, 2, brw_null_reg(), BRW_MATH_PRECISION_FULL); } }
static void emit_delta_xy(struct brw_wm_compile *c, struct prog_instruction *inst) { struct brw_reg r1 = brw_vec1_grf(1, 0); struct brw_reg dst0, dst1, src0, src1; struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; dst0 = get_dst_reg(c, inst, 0, 1); dst1 = get_dst_reg(c, inst, 1, 1); src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); src1 = get_src_reg(c, &inst->SrcReg[0], 1, 1); /* Calc delta X,Y by subtracting origin in r1 from the pixel * centers. */ if (mask & WRITEMASK_X) { brw_ADD(p, dst0, retype(src0, BRW_REGISTER_TYPE_UW), negate(r1)); } if (mask & WRITEMASK_Y) { brw_ADD(p, dst1, retype(src1, BRW_REGISTER_TYPE_UW), negate(suboffset(r1,1))); } }
static void gen_PLN_MRF_GRF_GRF(struct brw_codegen *p) { struct brw_reg m6 = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 6, 0); struct brw_reg interp = brw_vec1_grf(2, 0); struct brw_reg g4 = brw_vec8_grf(4, 0); brw_PLN(p, m6, interp, g4); }
void emit_pixel_w(struct brw_wm_compile *c, const struct brw_reg *dst, GLuint mask, const struct brw_reg *arg0, const struct brw_reg *deltas) { struct brw_compile *p = &c->func; struct intel_context *intel = &p->brw->intel; struct brw_reg src; struct brw_reg temp_dst; if (intel->gen >= 6) temp_dst = dst[3]; else temp_dst = brw_message_reg(2); assert(intel->gen < 6); /* Don't need this if all you are doing is interpolating color, for * instance. */ if (mask & WRITEMASK_W) { struct brw_reg interp3 = brw_vec1_grf(arg0[0].nr+1, 4); /* Calc 1/w - just linterp wpos[3] optimized by putting the * result straight into a message reg. */ if (can_do_pln(intel, deltas)) { brw_PLN(p, temp_dst, interp3, deltas[0]); } else { brw_LINE(p, brw_null_reg(), interp3, deltas[0]); brw_MAC(p, temp_dst, suboffset(interp3, 1), deltas[1]); } /* Calc w */ if (intel->gen >= 6) src = temp_dst; else src = brw_null_reg(); if (c->dispatch_width == 16) { brw_math_16(p, dst[3], BRW_MATH_FUNCTION_INV, BRW_MATH_SATURATE_NONE, 2, src, BRW_MATH_PRECISION_FULL); } else { brw_math(p, dst[3], BRW_MATH_FUNCTION_INV, BRW_MATH_SATURATE_NONE, 2, src, BRW_MATH_DATA_VECTOR, BRW_MATH_PRECISION_FULL); } } }
static void emit_kil(struct brw_wm_compile *c) { struct brw_compile *p = &c->func; struct brw_reg depth = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); brw_push_insn_state(p); brw_set_mask_control(p, BRW_MASK_DISABLE); brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); //IMASK brw_AND(p, depth, c->emit_mask_reg, depth); brw_pop_insn_state(p); }
void emit_cinterp(struct brw_compile *p, const struct brw_reg *dst, GLuint mask, const struct brw_reg *arg0) { struct brw_reg interp[4]; GLuint nr = arg0[0].nr; GLuint i; interp[0] = brw_vec1_grf(nr, 0); interp[1] = brw_vec1_grf(nr, 4); interp[2] = brw_vec1_grf(nr+1, 0); interp[3] = brw_vec1_grf(nr+1, 4); for (i = 0; i < 4; i++) { if (mask & (1<<i)) { brw_MOV(p, dst[i], suboffset(interp[i],3)); /* TODO: optimize away like other moves */ } } }
void gen8_vec4_generator::generate_gs_thread_end(vec4_instruction *ir) { struct brw_reg src = brw_vec8_grf(GEN7_MRF_HACK_START + ir->base_mrf, 0); gen8_instruction *inst; /* Enable Channel Masks in the URB_WRITE_HWORD message header */ default_state.access_mode = BRW_ALIGN_1; inst = OR(retype(brw_vec1_grf(GEN7_MRF_HACK_START + ir->base_mrf, 5), BRW_REGISTER_TYPE_UD), retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD), brw_imm_ud(0xff00)); /* could be 0x1100 but shouldn't matter */ gen8_set_mask_control(inst, BRW_MASK_DISABLE); default_state.access_mode = BRW_ALIGN_16; /* mlen = 2: g0 header + vertex count */ inst = next_inst(BRW_OPCODE_SEND); gen8_set_urb_message(brw, inst, BRW_URB_WRITE_EOT, 2, 0, 0, true); gen8_set_dst(brw, inst, brw_null_reg()); gen8_set_src0(brw, inst, src); }
static void emit_linterp( struct brw_compile *p, const struct brw_reg *dst, GLuint mask, const struct brw_reg *arg0, const struct brw_reg *deltas ) { struct brw_reg interp[4]; GLuint nr = arg0[0].nr; GLuint i; interp[0] = brw_vec1_grf(nr, 0); interp[1] = brw_vec1_grf(nr, 4); interp[2] = brw_vec1_grf(nr+1, 0); interp[3] = brw_vec1_grf(nr+1, 4); for(i = 0; i < 4; i++ ) { if (mask & (1<<i)) { brw_LINE(p, brw_null_reg(), interp[i], deltas[0]); brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]); } } }
void emit_pinterp(struct brw_compile *p, const struct brw_reg *dst, GLuint mask, const struct brw_reg *arg0, const struct brw_reg *deltas, const struct brw_reg *w) { struct intel_context *intel = &p->brw->intel; struct brw_reg interp[4]; GLuint nr = arg0[0].nr; GLuint i; if (intel->gen >= 6) { emit_linterp(p, dst, mask, arg0, interp); return; } interp[0] = brw_vec1_grf(nr, 0); interp[1] = brw_vec1_grf(nr, 4); interp[2] = brw_vec1_grf(nr+1, 0); interp[3] = brw_vec1_grf(nr+1, 4); for (i = 0; i < 4; i++) { if (mask & (1<<i)) { if (can_do_pln(intel, deltas)) { brw_PLN(p, dst[i], interp[i], deltas[0]); } else { brw_LINE(p, brw_null_reg(), interp[i], deltas[0]); brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]); } } } for (i = 0; i < 4; i++) { if (mask & (1<<i)) { brw_MUL(p, dst[i], dst[i], w[3]); } } }
static void brw_wm_affine_st(struct brw_compile *p, int dw, int channel, int msg) { int uv; if (dw == 16) { brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); uv = p->gen >= 060 ? 6 : 3; } else { brw_set_compression_control(p, BRW_COMPRESSION_NONE); uv = p->gen >= 060 ? 4 : 3; } uv += 2*channel; msg++; if (p->gen >= 060) { brw_PLN(p, brw_message_reg(msg), brw_vec1_grf(uv, 0), brw_vec8_grf(2, 0)); msg += dw/8; brw_PLN(p, brw_message_reg(msg), brw_vec1_grf(uv, 4), brw_vec8_grf(2, 0)); } else { struct brw_reg r = brw_vec1_grf(uv, 0); brw_LINE(p, brw_null_reg(), __suboffset(r, 0), brw_vec8_grf(X16, 0)); brw_MAC(p, brw_message_reg(msg), __suboffset(r, 1), brw_vec8_grf(Y16, 0)); msg += dw/8; brw_LINE(p, brw_null_reg(), __suboffset(r, 4), brw_vec8_grf(X16, 0)); brw_MAC(p, brw_message_reg(msg), __suboffset(r, 5), brw_vec8_grf(Y16, 0)); } }
static void prealloc_reg(struct brw_wm_compile *c) { int i, j; struct brw_reg reg; int nr_interp_regs = 0; GLuint inputs = FRAG_BIT_WPOS | c->fp_interp_emitted | c->fp_deriv_emitted; for (i = 0; i < 4; i++) { reg = (i < c->key.nr_depth_regs) ? brw_vec8_grf(i*2, 0) : brw_vec8_grf(0, 0); set_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, i, reg); } c->reg_index += 2*c->key.nr_depth_regs; { int nr_params = c->fp->program.Base.Parameters->NumParameters; struct gl_program_parameter_list *plist = c->fp->program.Base.Parameters; int index = 0; c->prog_data.nr_params = 4*nr_params; for (i = 0; i < nr_params; i++) { for (j = 0; j < 4; j++, index++) { reg = brw_vec1_grf(c->reg_index + index/8, index%8); c->prog_data.param[index] = &plist->ParameterValues[i][j]; set_reg(c, PROGRAM_STATE_VAR, i, j, reg); } } c->nr_creg = 2*((4*nr_params+15)/16); c->reg_index += c->nr_creg; } for (i = 0; i < FRAG_ATTRIB_MAX; i++) { if (inputs & (1<<i)) { nr_interp_regs++; reg = brw_vec8_grf(c->reg_index, 0); for (j = 0; j < 4; j++) set_reg(c, PROGRAM_PAYLOAD, i, j, reg); c->reg_index += 2; } } c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2; c->prog_data.urb_read_length = nr_interp_regs * 2; c->prog_data.curb_read_length = c->nr_creg; c->emit_mask_reg = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0); c->reg_index++; c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0); c->reg_index += 2; }
/* Kill pixel - set execution mask to zero for those pixels which * fail. */ static void emit_kil( struct brw_wm_compile *c, struct brw_reg *arg0) { struct brw_compile *p = &c->func; struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); GLuint i; /* XXX - usually won't need 4 compares! */ for (i = 0; i < 4; i++) { brw_push_insn_state(p); brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], brw_imm_f(0)); brw_set_predicate_control_flag_value(p, 0xff); brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_AND(p, r0uw, brw_flag_reg(), r0uw); brw_pop_insn_state(p); } }
/** * Emit code that kills pixels whose X and Y coordinates are outside the * boundary of the rectangle defined by the push constants (dst_x0, dst_y0, * dst_x1, dst_y1). */ void brw_blorp_eu_emitter::emit_kill_if_outside_rect(const struct brw_reg &x, const struct brw_reg &y, const struct brw_reg &dst_x0, const struct brw_reg &dst_x1, const struct brw_reg &dst_y0, const struct brw_reg &dst_y1) { struct brw_reg f0 = brw_flag_reg(0, 0); struct brw_reg g1 = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW); emit_cmp(BRW_CONDITIONAL_GE, x, dst_x0); emit_cmp(BRW_CONDITIONAL_GE, y, dst_y0)->predicate = BRW_PREDICATE_NORMAL; emit_cmp(BRW_CONDITIONAL_L, x, dst_x1)->predicate = BRW_PREDICATE_NORMAL; emit_cmp(BRW_CONDITIONAL_L, y, dst_y1)->predicate = BRW_PREDICATE_NORMAL; fs_inst *inst = new (mem_ctx) fs_inst(BRW_OPCODE_AND, 16, g1, f0, g1); inst->force_writemask_all = true; insts.push_tail(inst); }
/** * Write block of 16 dwords/floats to the data port Render Cache scratch buffer. * Scratch offset should be a multiple of 64. * Used for register spilling. */ void brw_dp_WRITE_16( struct brw_compile *p, struct brw_reg src, GLuint scratch_offset ) { GLuint msg_reg_nr = 1; { brw_push_insn_state(p); brw_set_mask_control(p, BRW_MASK_DISABLE); brw_set_compression_control(p, BRW_COMPRESSION_NONE); /* set message header global offset field (reg 0, element 2) */ brw_MOV(p, retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D), brw_imm_d(scratch_offset)); brw_pop_insn_state(p); } { GLuint msg_length = 3; struct brw_reg dest = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW); struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); insn->header.predicate_control = 0; /* XXX */ insn->header.compression_control = BRW_COMPRESSION_NONE; insn->header.destreg__conditionalmod = msg_reg_nr; brw_set_dest(insn, dest); brw_set_src0(insn, src); brw_set_dp_write_message(p->brw, insn, 255, /* binding table index (255=stateless) */ BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */ BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */ msg_length, 0, /* pixel scoreboard */ 0, /* response_length */ 0); /* eot */ } }
static void wm_src_sample_argb(struct brw_compile *p) { static const uint32_t fragment[][4] = { #include "exa_wm_src_affine.g6b" #include "exa_wm_src_sample_argb.g6b" #include "exa_wm_write.g6b" }; int n; brw_push_insn_state(p); brw_set_mask_control(p, BRW_MASK_DISABLE); brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_MOV(p, retype(brw_vec1_grf(0,2), BRW_REGISTER_TYPE_UD), brw_imm_ud(0)); brw_pop_insn_state(p); brw_SAMPLE(p, retype(vec16(brw_vec8_grf(14, 0)), BRW_REGISTER_TYPE_UW), 1, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD), 1, 0, WRITEMASK_XYZW, GEN5_SAMPLER_MESSAGE_SAMPLE, 8, 5, true, BRW_SAMPLER_SIMD_MODE_SIMD16); for (n = 0; n < p->nr_insn; n++) { brw_disasm(stdout, &p->store[n], 60); } printf("\n\n"); for (n = 0; n < ARRAY_SIZE(fragment); n++) { brw_disasm(stdout, (const struct brw_instruction *)&fragment[n][0], 60); } }
/** * Computes the screen-space x,y distance of the pixels from the start * vertex. * * This will be used in linterp or pinterp with the start vertex value * and the Cx, Cy, and C0 coefficients passed in from the setup engine * to produce interpolated attribute values. */ void emit_delta_xy(struct brw_compile *p, const struct brw_reg *dst, GLuint mask, const struct brw_reg *arg0) { struct intel_context *intel = &p->brw->intel; struct brw_reg r1 = brw_vec1_grf(1, 0); if (mask == 0) return; assert(mask == WRITEMASK_XY); if (intel->gen >= 6) { /* XXX Gen6 WM doesn't have Xstart/Ystart in payload r1.0/r1.1. Just add them with 0.0 for dst reg.. */ r1 = brw_imm_v(0x00000000); brw_ADD(p, dst[0], retype(arg0[0], BRW_REGISTER_TYPE_UW), r1); brw_ADD(p, dst[1], retype(arg0[1], BRW_REGISTER_TYPE_UW), r1); return; } /* Calc delta X,Y by subtracting origin in r1 from the pixel * centers produced by emit_pixel_xy(). */ brw_ADD(p, dst[0], retype(arg0[0], BRW_REGISTER_TYPE_UW), negate(r1)); brw_ADD(p, dst[1], retype(arg0[1], BRW_REGISTER_TYPE_UW), negate(suboffset(r1,1))); }
/** * Read block of 16 dwords/floats from the data port Render Cache scratch buffer. * Scratch offset should be a multiple of 64. * Used for register spilling. */ void brw_dp_READ_16( struct brw_compile *p, struct brw_reg dest, GLuint scratch_offset ) { GLuint msg_reg_nr = 1; { brw_push_insn_state(p); brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_set_mask_control(p, BRW_MASK_DISABLE); /* set message header global offset field (reg 0, element 2) */ brw_MOV(p, retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D), brw_imm_d(scratch_offset)); brw_pop_insn_state(p); } { struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); insn->header.predicate_control = 0; /* XXX */ insn->header.compression_control = BRW_COMPRESSION_NONE; insn->header.destreg__conditionalmod = msg_reg_nr; brw_set_dest(insn, dest); /* UW? */ brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW)); brw_set_dp_read_message(p->brw, insn, 255, /* binding table index (255=stateless) */ 3, /* msg_control (3 means 4 Owords) */ BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ 1, /* target cache (render/scratch) */ 1, /* msg_length */ 2, /* response_length */ 0); /* eot */ } }
/** * Computes the screen-space x,y position of the pixels. * * This will be used by emit_delta_xy() or emit_wpos_xy() for * interpolation of attributes.. * * Payload R0: * * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 tiles, * corresponding to each of the 16 execution channels. * R0.1..8 -- ? * R1.0 -- triangle vertex 0.X * R1.1 -- triangle vertex 0.Y * R1.2 -- tile 0 x,y coords (2 packed uwords) * R1.3 -- tile 1 x,y coords (2 packed uwords) * R1.4 -- tile 2 x,y coords (2 packed uwords) * R1.5 -- tile 3 x,y coords (2 packed uwords) * R1.6 -- ? * R1.7 -- ? * R1.8 -- ? */ void emit_pixel_xy(struct brw_wm_compile *c, const struct brw_reg *dst, GLuint mask) { struct brw_compile *p = &c->func; struct brw_reg r1 = brw_vec1_grf(1, 0); struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW); struct brw_reg dst0_uw, dst1_uw; brw_push_insn_state(p); brw_set_compression_control(p, BRW_COMPRESSION_NONE); if (c->dispatch_width == 16) { dst0_uw = vec16(retype(dst[0], BRW_REGISTER_TYPE_UW)); dst1_uw = vec16(retype(dst[1], BRW_REGISTER_TYPE_UW)); } else { dst0_uw = vec8(retype(dst[0], BRW_REGISTER_TYPE_UW)); dst1_uw = vec8(retype(dst[1], BRW_REGISTER_TYPE_UW)); } /* Calculate pixel centers by adding 1 or 0 to each of the * micro-tile coordinates passed in r1. */ if (mask & WRITEMASK_X) { brw_ADD(p, dst0_uw, stride(suboffset(r1_uw, 4), 2, 4, 0), brw_imm_v(0x10101010)); } if (mask & WRITEMASK_Y) { brw_ADD(p, dst1_uw, stride(suboffset(r1_uw,5), 2, 4, 0), brw_imm_v(0x11001100)); } brw_pop_insn_state(p); }
void gen8_vec4_generator::generate_urb_write(vec4_instruction *ir, bool vs) { struct brw_reg header = brw_vec8_grf(GEN7_MRF_HACK_START + ir->base_mrf, 0); /* Copy g0. */ if (vs) MOV_RAW(header, brw_vec8_grf(0, 0)); gen8_instruction *inst; if (!(ir->urb_write_flags & BRW_URB_WRITE_USE_CHANNEL_MASKS)) { /* Enable Channel Masks in the URB_WRITE_OWORD message header */ default_state.access_mode = BRW_ALIGN_1; MOV_RAW(brw_vec1_grf(GEN7_MRF_HACK_START + ir->base_mrf, 5), brw_imm_ud(0xff00)); default_state.access_mode = BRW_ALIGN_16; } inst = next_inst(BRW_OPCODE_SEND); gen8_set_urb_message(brw, inst, ir->urb_write_flags, ir->mlen, 0, ir->offset, true); gen8_set_dst(brw, inst, brw_null_reg()); gen8_set_src0(brw, inst, header); }
static void alloc_regs( struct brw_sf_compile *c ) { GLuint reg, i; /* Values computed by fixed function unit: */ c->pv = retype(brw_vec1_grf(1, 1), BRW_REGISTER_TYPE_D); c->det = brw_vec1_grf(1, 2); c->dx0 = brw_vec1_grf(1, 3); c->dx2 = brw_vec1_grf(1, 4); c->dy0 = brw_vec1_grf(1, 5); c->dy2 = brw_vec1_grf(1, 6); /* z and 1/w passed in seperately: */ c->z[0] = brw_vec1_grf(2, 0); c->inv_w[0] = brw_vec1_grf(2, 1); c->z[1] = brw_vec1_grf(2, 2); c->inv_w[1] = brw_vec1_grf(2, 3); c->z[2] = brw_vec1_grf(2, 4); c->inv_w[2] = brw_vec1_grf(2, 5); /* The vertices: */ reg = 3; for (i = 0; i < c->nr_verts; i++) { c->vert[i] = brw_vec8_grf(reg, 0); reg += c->nr_attr_regs; } /* Temporaries, allocated after last vertex reg. */ c->inv_det = brw_vec1_grf(reg, 0); reg++; c->a1_sub_a0 = brw_vec8_grf(reg, 0); reg++; c->a2_sub_a0 = brw_vec8_grf(reg, 0); reg++; c->tmp = brw_vec8_grf(reg, 0); reg++; /* Note grf allocation: */ c->prog_data.total_grf = reg; /* Outputs of this program - interpolation coefficients for * rasterization: */ c->m1Cx = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 1, 0); c->m2Cy = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 2, 0); c->m3C0 = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 3, 0); }
static void brw_wm_projective_st(struct brw_compile *p, int dw, int channel, int msg) { int uv; if (dw == 16) { brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); uv = p->gen >= 060 ? 6 : 3; } else { brw_set_compression_control(p, BRW_COMPRESSION_NONE); uv = p->gen >= 060 ? 4 : 3; } uv += 2*channel; msg++; if (p->gen >= 060) { /* First compute 1/z */ brw_PLN(p, brw_vec8_grf(30, 0), brw_vec1_grf(uv+1, 0), brw_vec8_grf(2, 0)); if (dw == 16) { brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_math_invert(p, brw_vec8_grf(30, 0), brw_vec8_grf(30, 0)); brw_math_invert(p, brw_vec8_grf(31, 0), brw_vec8_grf(31, 0)); brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); } else brw_math_invert(p, brw_vec8_grf(30, 0), brw_vec8_grf(30, 0)); brw_PLN(p, brw_vec8_grf(26, 0), brw_vec1_grf(uv, 0), brw_vec8_grf(2, 0)); brw_PLN(p, brw_vec8_grf(28, 0), brw_vec1_grf(uv, 4), brw_vec8_grf(2, 0)); brw_MUL(p, brw_message_reg(msg), brw_vec8_grf(26, 0), brw_vec8_grf(30, 0)); brw_MUL(p, brw_message_reg(msg + dw/8), brw_vec8_grf(28, 0), brw_vec8_grf(30, 0)); } else { struct brw_reg r = brw_vec1_grf(uv, 0); /* First compute 1/z */ brw_LINE(p, brw_null_reg(), brw_vec1_grf(uv+1, 0), brw_vec8_grf(X16, 0)); brw_MAC(p, brw_vec8_grf(30, 0), brw_vec1_grf(uv+1, 1), brw_vec8_grf(Y16, 0)); if (dw == 16) { brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_math_invert(p, brw_vec8_grf(30, 0), brw_vec8_grf(30, 0)); brw_math_invert(p, brw_vec8_grf(31, 0), brw_vec8_grf(31, 0)); brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); } else brw_math_invert(p, brw_vec8_grf(30, 0), brw_vec8_grf(30, 0)); /* Now compute the output s,t values */ brw_LINE(p, brw_null_reg(), __suboffset(r, 0), brw_vec8_grf(X16, 0)); brw_MAC(p, brw_vec8_grf(28, 0), __suboffset(r, 1), brw_vec8_grf(Y16, 0)); brw_MUL(p, brw_message_reg(msg), brw_vec8_grf(28, 0), brw_vec8_grf(30, 0)); msg += dw/8; brw_LINE(p, brw_null_reg(), __suboffset(r, 4), brw_vec8_grf(X16, 0)); brw_MAC(p, brw_vec8_grf(28, 0), __suboffset(r, 5), brw_vec8_grf(Y16, 0)); brw_MUL(p, brw_message_reg(msg), brw_vec8_grf(28, 0), brw_vec8_grf(30, 0)); } }
static void brw_wm_write__opacity(struct brw_compile *p, int dw, int src, int mask) { int n; if (dw == 8 && p->gen >= 060) { brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_MUL(p, brw_message_reg(2), brw_vec8_grf(src+0, 0), brw_vec1_grf(mask, 3)); brw_MUL(p, brw_message_reg(3), brw_vec8_grf(src+1, 0), brw_vec1_grf(mask, 3)); brw_MUL(p, brw_message_reg(4), brw_vec8_grf(src+2, 0), brw_vec1_grf(mask, 3)); brw_MUL(p, brw_message_reg(5), brw_vec8_grf(src+3, 0), brw_vec1_grf(mask, 3)); goto done; } brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); for (n = 0; n < 4; n++) { if (p->gen >= 060) { brw_MUL(p, brw_message_reg(2 + 2*n), brw_vec8_grf(src + 2*n, 0), brw_vec1_grf(mask, 3)); } else if (p->gen >= 045 && dw == 16) { brw_MUL(p, brw_message_reg(2 + n + BRW_MRF_COMPR4), brw_vec8_grf(src + 2*n, 0), brw_vec1_grf(mask, 3)); } else { brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_MUL(p, brw_message_reg(2 + n), brw_vec8_grf(src + 2*n, 0), brw_vec1_grf(mask, 3)); if (dw == 16) { brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); brw_MUL(p, brw_message_reg(2 + n + 4), brw_vec8_grf(src + 2*n+1, 0), brw_vec1_grf(mask, 3)); } } } done: brw_fb_write(p, dw); }
void brw_clip_tri_alloc_regs( struct brw_clip_compile *c, GLuint nr_verts ) { GLuint i = 0,j; /* Register usage is static, precompute here: */ c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++; if (c->key.nr_userclip) { c->reg.fixed_planes = brw_vec4_grf(i, 0); i += (6 + c->key.nr_userclip + 1) / 2; c->prog_data.curb_read_length = (6 + c->key.nr_userclip + 1) / 2; } else c->prog_data.curb_read_length = 0; /* Payload vertices plus space for more generated vertices: */ for (j = 0; j < nr_verts; j++) { c->reg.vertex[j] = brw_vec4_grf(i, 0); i += c->nr_regs; } if (c->key.nr_attrs & 1) { for (j = 0; j < 3; j++) { GLuint delta = c->key.nr_attrs*16 + 32; if (c->chipset.is_igdng) delta = c->key.nr_attrs * 16 + 32 * 3; brw_MOV(&c->func, byte_offset(c->reg.vertex[j], delta), brw_imm_f(0)); } } c->reg.t = brw_vec1_grf(i, 0); c->reg.loopcount = retype(brw_vec1_grf(i, 1), BRW_REGISTER_TYPE_D); c->reg.nr_verts = retype(brw_vec1_grf(i, 2), BRW_REGISTER_TYPE_UD); c->reg.planemask = retype(brw_vec1_grf(i, 3), BRW_REGISTER_TYPE_UD); c->reg.plane_equation = brw_vec4_grf(i, 4); i++; c->reg.dpPrev = brw_vec1_grf(i, 0); /* fixme - dp4 will clobber r.1,2,3 */ c->reg.dp = brw_vec1_grf(i, 4); i++; c->reg.inlist = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0); i++; c->reg.outlist = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0); i++; c->reg.freelist = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0); i++; if (!c->key.nr_userclip) { c->reg.fixed_planes = brw_vec8_grf(i, 0); i++; } if (c->key.do_unfilled) { c->reg.dir = brw_vec4_grf(i, 0); c->reg.offset = brw_vec4_grf(i, 4); i++; c->reg.tmp0 = brw_vec4_grf(i, 0); c->reg.tmp1 = brw_vec4_grf(i, 4); i++; } if (c->need_ff_sync) { c->reg.ff_sync = retype(brw_vec1_grf(i, 0), BRW_REGISTER_TYPE_UD); i++; } c->first_tmp = i; c->last_tmp = i; c->prog_data.urb_read_length = c->nr_regs; /* ? */ c->prog_data.total_grf = i; }