void brw_emit_anyprim_setup( struct brw_sf_compile *c ) { struct brw_codegen *p = &c->func; struct brw_reg payload_prim = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0); struct brw_reg payload_attr = get_element_ud(brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0), 0); struct brw_reg primmask; int jmp; struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD)); c->nr_verts = 3; alloc_regs(c); primmask = retype(get_element(c->tmp, 0), BRW_REGISTER_TYPE_UD); brw_MOV(p, primmask, brw_imm_ud(1)); brw_SHL(p, primmask, primmask, payload_prim); brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_TRILIST) | (1<<_3DPRIM_TRISTRIP) | (1<<_3DPRIM_TRIFAN) | (1<<_3DPRIM_TRISTRIP_REVERSE) | (1<<_3DPRIM_POLYGON) | (1<<_3DPRIM_RECTLIST) | (1<<_3DPRIM_TRIFAN_NOSTIPPLE))); brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_Z); jmp = brw_JMPI(p, brw_imm_d(0), BRW_PREDICATE_NORMAL) - p->store; brw_emit_tri_setup(c, false); brw_land_fwd_jump(p, jmp); brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_LINELIST) | (1<<_3DPRIM_LINESTRIP) | (1<<_3DPRIM_LINELOOP) | (1<<_3DPRIM_LINESTRIP_CONT) | (1<<_3DPRIM_LINESTRIP_BF) | (1<<_3DPRIM_LINESTRIP_CONT_BF))); brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_Z); jmp = brw_JMPI(p, brw_imm_d(0), BRW_PREDICATE_NORMAL) - p->store; brw_emit_line_setup(c, false); brw_land_fwd_jump(p, jmp); brw_AND(p, v1_null_ud, payload_attr, brw_imm_ud(1<<BRW_SPRITE_POINT_ENABLE)); brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_Z); jmp = brw_JMPI(p, brw_imm_d(0), BRW_PREDICATE_NORMAL) - p->store; brw_emit_point_sprite_setup(c, false); brw_land_fwd_jump(p, jmp); brw_emit_point_setup( c, false ); }
void brw_copy8(struct brw_compile *p, struct brw_reg dst, struct brw_reg src, GLuint count) { GLuint i; dst = vec8(dst); src = vec8(src); for (i = 0; i < count; i++) { GLuint delta = i*32; brw_MOV(p, byte_offset(dst, delta), byte_offset(src, delta)); } }
static void emit_abs( struct brw_wm_compile *c, struct prog_instruction *inst) { int i; struct brw_compile *p = &c->func; brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF); for (i = 0; i < 4; i++) { if (inst->DstReg.WriteMask & (1<<i)) { struct brw_reg src, dst; dst = get_dst_reg(c, inst, i, 1); src = get_src_reg(c, &inst->SrcReg[0], i, 1); brw_MOV(p, dst, brw_abs(src)); } } brw_set_saturate(p, 0); }
static void set_predicate_control_flag_value(struct brw_compile *p, struct brw_sf_compile *c, unsigned value) { brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); if (value != 0xff) { if (value != c->flag_value) { brw_MOV(p, brw_flag_reg(0, 0), brw_imm_uw(value)); c->flag_value = value; } brw_set_default_predicate_control(p, BRW_PREDICATE_NORMAL); } }
static void set_predicate_control_flag_value(struct brw_compile *p, struct brw_sf_compile *c, unsigned value) { p->current->header.predicate_control = BRW_PREDICATE_NONE; if (value != 0xff) { if (value != c->flag_value) { brw_MOV(p, brw_flag_reg(0, 0), brw_imm_uw(value)); c->flag_value = value; } p->current->header.predicate_control = BRW_PREDICATE_NORMAL; } }
void brw_copy8(struct brw_codegen *p, struct brw_reg dst, struct brw_reg src, unsigned count) { unsigned i; dst = vec8(dst); src = vec8(src); for (i = 0; i < count; i++) { unsigned delta = i*32; brw_MOV(p, byte_offset(dst, delta), byte_offset(src, delta)); } }
static void brw_wm_write(struct brw_compile *p, int dw, int src) { int n; if (dw == 8 && p->gen >= 060) { /* XXX pixel execution mask? */ brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_MOV(p, brw_message_reg(2), brw_vec8_grf(src+0, 0)); brw_MOV(p, brw_message_reg(3), brw_vec8_grf(src+1, 0)); brw_MOV(p, brw_message_reg(4), brw_vec8_grf(src+2, 0)); brw_MOV(p, brw_message_reg(5), brw_vec8_grf(src+3, 0)); goto done; } brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); for (n = 0; n < 4; n++) { if (p->gen >= 060) { brw_MOV(p, brw_message_reg(2 + 2*n), brw_vec8_grf(src + 2*n, 0)); } else if (p->gen >= 045 && dw == 16) { brw_MOV(p, brw_message_reg(2 + n + BRW_MRF_COMPR4), brw_vec8_grf(src + 2*n, 0)); } else { brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_MOV(p, brw_message_reg(2 + n), brw_vec8_grf(src + 2*n, 0)); if (dw == 16) { brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); brw_MOV(p, brw_message_reg(2 + n + 4), brw_vec8_grf(src + 2*n+1, 0)); } } } done: brw_fb_write(p, dw); }
/** * Move a GPR to scratch memory. */ static void emit_spill( struct brw_wm_compile *c, struct brw_reg reg, GLuint slot ) { struct brw_compile *p = &c->func; /* mov (16) m2.0<1>:ud r2.0<8;8,1>:ud { Align1 Compr } */ brw_MOV(p, brw_message_reg(2), reg); /* mov (1) r0.2<1>:d 0x00000080:d { Align1 NoMask } send (16) null.0<1>:uw m1 r0.0<8;8,1>:uw 0x053003ff:ud { Align1 } */ brw_oword_block_write_scratch(p, brw_message_reg(1), 2, slot); }
static void brw_gs_ff_sync(struct brw_gs_compile *c, int num_prim) { struct brw_compile *p = &c->func; brw_MOV(p, get_element_ud(c->reg.R0, 1), brw_imm_ud(num_prim)); brw_ff_sync(p, c->reg.R0, 0, c->reg.R0, 1, 1, /* used */ 1, /* msg length */ 1, /* response length */ 0, /* eot */ 1, /* write compelete */ 0, /* urb offset */ BRW_URB_SWIZZLE_NONE); }
void vec4_generator::generate_pull_constant_load(vec4_instruction *inst, struct brw_reg dst, struct brw_reg index, struct brw_reg offset) { assert(brw->gen <= 7); assert(index.file == BRW_IMMEDIATE_VALUE && index.type == BRW_REGISTER_TYPE_UD); uint32_t surf_index = index.dw1.ud; struct brw_reg header = brw_vec8_grf(0, 0); gen6_resolve_implied_move(p, &header, inst->base_mrf); brw_MOV(p, retype(brw_message_reg(inst->base_mrf + 1), BRW_REGISTER_TYPE_D), offset); uint32_t msg_type; if (brw->gen >= 6) msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; else if (brw->gen == 5 || brw->is_g4x) msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; else msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; /* Each of the 8 channel enables is considered for whether each * dword is written. */ struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND); brw_set_dest(p, send, dst); brw_set_src0(p, send, header); if (brw->gen < 6) send->header.destreg__conditionalmod = inst->base_mrf; brw_set_dp_read_message(p, send, surf_index, BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD, msg_type, BRW_DATAPORT_READ_TARGET_DATA_CACHE, 2, /* mlen */ true, /* header_present */ 1 /* rlen */); brw_mark_surface_used(&prog_data->base, surf_index); }
void brw_clip_tri_init_vertices( struct brw_clip_compile *c ) { struct brw_compile *p = &c->func; struct brw_reg tmp0 = c->reg.loopcount; /* handy temporary */ struct brw_instruction *is_rev; /* Initial list of indices for incoming vertexes: */ brw_AND(p, tmp0, get_element_ud(c->reg.R0, 2), brw_imm_ud(PRIM_MASK)); brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_EQ, tmp0, brw_imm_ud(_3DPRIM_TRISTRIP_REVERSE)); /* XXX: Is there an easier way to do this? Need to reverse every * second tristrip element: Can ignore sometimes? */ is_rev = brw_IF(p, BRW_EXECUTE_1); { brw_MOV(p, get_element(c->reg.inlist, 0), brw_address(c->reg.vertex[1]) ); brw_MOV(p, get_element(c->reg.inlist, 1), brw_address(c->reg.vertex[0]) ); if (c->need_direction) brw_MOV(p, c->reg.dir, brw_imm_f(-1)); } is_rev = brw_ELSE(p, is_rev); { brw_MOV(p, get_element(c->reg.inlist, 0), brw_address(c->reg.vertex[0]) ); brw_MOV(p, get_element(c->reg.inlist, 1), brw_address(c->reg.vertex[1]) ); if (c->need_direction) brw_MOV(p, c->reg.dir, brw_imm_f(1)); } brw_ENDIF(p, is_rev); brw_MOV(p, get_element(c->reg.inlist, 2), brw_address(c->reg.vertex[2]) ); brw_MOV(p, brw_vec8_grf(c->reg.outlist.nr, 0), brw_imm_f(0)); brw_MOV(p, c->reg.nr_verts, brw_imm_ud(3)); }
static void emit_math1(struct brw_wm_compile *c, struct prog_instruction *inst, GLuint func) { struct brw_compile *p = &c->func; struct brw_reg src0, dst; src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1); brw_MOV(p, brw_message_reg(2), src0); brw_math(p, dst, func, (inst->SaturateMode != SATURATE_OFF) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE, 2, brw_null_reg(), BRW_MATH_DATA_VECTOR, BRW_MATH_PRECISION_FULL); }
static void fire_fb_write( struct brw_wm_compile *c, GLuint base_reg, GLuint nr, GLuint target, GLuint eot ) { struct brw_compile *p = &c->func; struct intel_context *intel = &p->brw->intel; uint32_t msg_control; /* Pass through control information: * * Gen6 has done m1 mov in emit_fb_write() for current SIMD16 case. */ /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */ if (intel->gen < 6) { brw_push_insn_state(p); brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */ brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_MOV(p, brw_message_reg(base_reg + 1), brw_vec8_grf(1, 0)); brw_pop_insn_state(p); } if (c->dispatch_width == 16) msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE; else msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01; /* Send framebuffer write message: */ /* send (16) null.0<1>:uw m0 r0.0<8;8,1>:uw 0x85a04000:ud { Align1 EOT } */ brw_fb_WRITE(p, c->dispatch_width, base_reg, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW), msg_control, target, nr, 0, eot, true); }
void brw_clip_init_planes( struct brw_clip_compile *c ) { struct brw_compile *p = &c->func; if (!c->key.nr_userclip) { brw_MOV(p, get_element_ud(c->reg.fixed_planes, 0), make_plane_ud( 0, 0, 0xff, 1)); brw_MOV(p, get_element_ud(c->reg.fixed_planes, 1), make_plane_ud( 0, 0, 1, 1)); brw_MOV(p, get_element_ud(c->reg.fixed_planes, 2), make_plane_ud( 0, 0xff, 0, 1)); brw_MOV(p, get_element_ud(c->reg.fixed_planes, 3), make_plane_ud( 0, 1, 0, 1)); brw_MOV(p, get_element_ud(c->reg.fixed_planes, 4), make_plane_ud(0xff, 0, 0, 1)); brw_MOV(p, get_element_ud(c->reg.fixed_planes, 5), make_plane_ud( 1, 0, 0, 1)); } }
/* Post-fragment-program processing. Send the results to the * framebuffer. */ static void emit_spill( struct brw_wm_compile *c, struct brw_reg reg, GLuint slot ) { struct brw_compile *p = &c->func; /* mov (16) m2.0<1>:ud r2.0<8;8,1>:ud { Align1 Compr } */ brw_MOV(p, brw_message_reg(2), reg); /* mov (1) r0.2<1>:d 0x00000080:d { Align1 NoMask } send (16) null.0<1>:uw m1 r0.0<8;8,1>:uw 0x053003ff:ud { Align1 } */ brw_dp_WRITE_16(p, retype(vec16(brw_vec8_grf(0, 0)), BRW_REGISTER_TYPE_UW), 1, slot); }
void emit_cinterp(struct brw_compile *p, const struct brw_reg *dst, GLuint mask, const struct brw_reg *arg0) { struct brw_reg interp[4]; GLuint nr = arg0[0].nr; GLuint i; interp[0] = brw_vec1_grf(nr, 0); interp[1] = brw_vec1_grf(nr, 4); interp[2] = brw_vec1_grf(nr+1, 0); interp[3] = brw_vec1_grf(nr+1, 4); for (i = 0; i < 4; i++) { if (mask & (1<<i)) { brw_MOV(p, dst[i], suboffset(interp[i],3)); /* TODO: optimize away like other moves */ } } }
/** * Load a GPR from scratch memory. */ static void emit_unspill( struct brw_wm_compile *c, struct brw_reg reg, GLuint slot ) { struct brw_compile *p = &c->func; /* Slot 0 is the undef value. */ if (slot == 0) { brw_MOV(p, reg, brw_imm_f(0)); return; } /* mov (1) r0.2<1>:d 0x000000c0:d { Align1 NoMask } send (16) r110.0<1>:uw m1 r0.0<8;8,1>:uw 0x041243ff:ud { Align1 } */ brw_oword_block_read(p, vec16(reg), brw_message_reg(1), 2, slot); }
void brw_clip_emit_vue(struct brw_clip_compile *c, struct brw_indirect vert, enum brw_urb_write_flags flags, GLuint header) { struct brw_codegen *p = &c->func; bool allocate = flags & BRW_URB_WRITE_ALLOCATE; brw_clip_ff_sync(c); /* Any URB entry that is allocated must subsequently be used or discarded, * so it doesn't make sense to mark EOT and ALLOCATE at the same time. */ assert(!(allocate && (flags & BRW_URB_WRITE_EOT))); /* Copy the vertex from vertn into m1..mN+1: */ brw_copy_from_indirect(p, brw_message_reg(1), vert, c->nr_regs); /* Overwrite PrimType and PrimStart in the message header, for * each vertex in turn: */ brw_MOV(p, get_element_ud(c->reg.R0, 2), brw_imm_ud(header)); /* Send each vertex as a separate write to the urb. This * is different to the concept in brw_sf_emit.c, where * subsequent writes are used to build up a single urb * entry. Each of these writes instantiates a separate * urb entry - (I think... what about 'allocate'?) */ brw_urb_WRITE(p, allocate ? c->reg.R0 : retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), 0, c->reg.R0, flags, c->nr_regs + 1, /* msg length */ allocate ? 1 : 0, /* response_length */ 0, /* urb offset */ BRW_URB_SWIZZLE_NONE); }
void brw_clip_emit_vue(struct brw_clip_compile *c, struct brw_indirect vert, bool allocate, bool eot, GLuint header) { struct brw_compile *p = &c->func; brw_clip_ff_sync(c); assert(!(allocate && eot)); /* Copy the vertex from vertn into m1..mN+1: */ brw_copy_from_indirect(p, brw_message_reg(1), vert, c->nr_regs); /* Overwrite PrimType and PrimStart in the message header, for * each vertex in turn: */ brw_MOV(p, get_element_ud(c->reg.R0, 2), brw_imm_ud(header)); /* Send each vertex as a seperate write to the urb. This * is different to the concept in brw_sf_emit.c, where * subsequent writes are used to build up a single urb * entry. Each of these writes instantiates a seperate * urb entry - (I think... what about 'allocate'?) */ brw_urb_WRITE(p, allocate ? c->reg.R0 : retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), 0, c->reg.R0, allocate, 1, /* used */ c->nr_regs + 1, /* msg length */ allocate ? 1 : 0, /* response_length */ eot, /* eot */ 1, /* writes_complete */ 0, /* urb offset */ BRW_URB_SWIZZLE_NONE); }
/** * Write block of 16 dwords/floats to the data port Render Cache scratch buffer. * Scratch offset should be a multiple of 64. * Used for register spilling. */ void brw_dp_WRITE_16( struct brw_compile *p, struct brw_reg src, GLuint scratch_offset ) { GLuint msg_reg_nr = 1; { brw_push_insn_state(p); brw_set_mask_control(p, BRW_MASK_DISABLE); brw_set_compression_control(p, BRW_COMPRESSION_NONE); /* set message header global offset field (reg 0, element 2) */ brw_MOV(p, retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D), brw_imm_d(scratch_offset)); brw_pop_insn_state(p); } { GLuint msg_length = 3; struct brw_reg dest = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW); struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); insn->header.predicate_control = 0; /* XXX */ insn->header.compression_control = BRW_COMPRESSION_NONE; insn->header.destreg__conditionalmod = msg_reg_nr; brw_set_dest(insn, dest); brw_set_src0(insn, src); brw_set_dp_write_message(p->brw, insn, 255, /* binding table index (255=stateless) */ BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */ BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */ msg_length, 0, /* pixel scoreboard */ 0, /* response_length */ 0); /* eot */ } }
static void wm_src_sample_argb(struct brw_compile *p) { static const uint32_t fragment[][4] = { #include "exa_wm_src_affine.g6b" #include "exa_wm_src_sample_argb.g6b" #include "exa_wm_write.g6b" }; int n; brw_push_insn_state(p); brw_set_mask_control(p, BRW_MASK_DISABLE); brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_MOV(p, retype(brw_vec1_grf(0,2), BRW_REGISTER_TYPE_UD), brw_imm_ud(0)); brw_pop_insn_state(p); brw_SAMPLE(p, retype(vec16(brw_vec8_grf(14, 0)), BRW_REGISTER_TYPE_UW), 1, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD), 1, 0, WRITEMASK_XYZW, GEN5_SAMPLER_MESSAGE_SAMPLE, 8, 5, true, BRW_SAMPLER_SIMD_MODE_SIMD16); for (n = 0; n < p->nr_insn; n++) { brw_disasm(stdout, &p->store[n], 60); } printf("\n\n"); for (n = 0; n < ARRAY_SIZE(fragment); n++) { brw_disasm(stdout, (const struct brw_instruction *)&fragment[n][0], 60); } }
/** * Read block of 16 dwords/floats from the data port Render Cache scratch buffer. * Scratch offset should be a multiple of 64. * Used for register spilling. */ void brw_dp_READ_16( struct brw_compile *p, struct brw_reg dest, GLuint scratch_offset ) { GLuint msg_reg_nr = 1; { brw_push_insn_state(p); brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_set_mask_control(p, BRW_MASK_DISABLE); /* set message header global offset field (reg 0, element 2) */ brw_MOV(p, retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D), brw_imm_d(scratch_offset)); brw_pop_insn_state(p); } { struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); insn->header.predicate_control = 0; /* XXX */ insn->header.compression_control = BRW_COMPRESSION_NONE; insn->header.destreg__conditionalmod = msg_reg_nr; brw_set_dest(insn, dest); /* UW? */ brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW)); brw_set_dp_read_message(p->brw, insn, 255, /* binding table index (255=stateless) */ 3, /* msg_control (3 means 4 Owords) */ BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ 1, /* target cache (render/scratch) */ 1, /* msg_length */ 2, /* response_length */ 0); /* eot */ } }
static void emit_unspill( struct brw_wm_compile *c, struct brw_reg reg, GLuint slot ) { struct brw_compile *p = &c->func; /* Slot 0 is the undef value. */ if (slot == 0) { brw_MOV(p, reg, brw_imm_f(0)); return; } /* mov (1) r0.2<1>:d 0x000000c0:d { Align1 NoMask } send (16) r110.0<1>:uw m1 r0.0<8;8,1>:uw 0x041243ff:ud { Align1 } */ brw_dp_READ_16(p, retype(vec16(reg), BRW_REGISTER_TYPE_UW), 1, slot); }
/** * Emit a vertex using the URB_WRITE message. Use the contents of * c->reg.header for the message header, and the registers starting at \c vert * for the vertex data. * * If \c last is true, then this is the last vertex, so no further URB space * should be allocated, and this message should end the thread. * * If \c last is false, then a new URB entry will be allocated, and its handle * will be stored in DWORD 0 of c->reg.header for use in the next URB_WRITE * message. */ static void brw_gs_emit_vue(struct brw_gs_compile *c, struct brw_reg vert, bool last) { struct brw_compile *p = &c->func; bool allocate = !last; /* Copy the vertex from vertn into m1..mN+1: */ brw_copy8(p, brw_message_reg(1), vert, c->nr_regs); /* Send each vertex as a seperate write to the urb. This is * different to the concept in brw_sf_emit.c, where subsequent * writes are used to build up a single urb entry. Each of these * writes instantiates a seperate urb entry, and a new one must be * allocated each time. */ brw_urb_WRITE(p, allocate ? c->reg.temp : retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), 0, c->reg.header, allocate, 1, /* used */ c->nr_regs + 1, /* msg length */ allocate ? 1 : 0, /* response length */ allocate ? 0 : 1, /* eot */ 1, /* writes_complete */ 0, /* urb offset */ BRW_URB_SWIZZLE_NONE); if (allocate) { brw_MOV(p, get_element_ud(c->reg.header, 0), get_element_ud(c->reg.temp, 0)); } }
static void emit_math1( struct brw_compile *p, GLuint function, const struct brw_reg *dst, GLuint mask, const struct brw_reg *arg0 ) { if (!(mask & WRITEMASK_XYZW)) return; /* Do not emit dead code*/ //assert((mask & WRITEMASK_XYZW) == WRITEMASK_X || // function == BRW_MATH_FUNCTION_SINCOS); brw_MOV(p, brw_message_reg(2), arg0[0]); /* Send two messages to perform all 16 operations: */ brw_math_16(p, dst[0], function, (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE, 2, brw_null_reg(), BRW_MATH_PRECISION_FULL); }
/** * Generate the geometry shader program used on Gen6 to perform stream output * (transform feedback). */ void gen6_sol_program(struct brw_gs_compile *c, struct brw_gs_prog_key *key, unsigned num_verts, bool check_edge_flags) { struct brw_compile *p = &c->func; c->prog_data.svbi_postincrement_value = num_verts; brw_gs_alloc_regs(c, num_verts, true); brw_gs_initialize_header(c); if (key->num_transform_feedback_bindings > 0) { unsigned vertex, binding; struct brw_reg destination_indices_uw = vec8(retype(c->reg.destination_indices, BRW_REGISTER_TYPE_UW)); /* Note: since we use the binding table to keep track of buffer offsets * and stride, the GS doesn't need to keep track of a separate pointer * into each buffer; it uses a single pointer which increments by 1 for * each vertex. So we use SVBI0 for this pointer, regardless of whether * transform feedback is in interleaved or separate attribs mode. * * Make sure that the buffers have enough room for all the vertices. */ brw_ADD(p, get_element_ud(c->reg.temp, 0), get_element_ud(c->reg.SVBI, 0), brw_imm_ud(num_verts)); brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_LE, get_element_ud(c->reg.temp, 0), get_element_ud(c->reg.SVBI, 4)); brw_IF(p, BRW_EXECUTE_1); /* Compute the destination indices to write to. Usually we use SVBI[0] * + (0, 1, 2). However, for odd-numbered triangles in tristrips, the * vertices come down the pipeline in reversed winding order, so we need * to flip the order when writing to the transform feedback buffer. To * ensure that flatshading accuracy is preserved, we need to write them * in order SVBI[0] + (0, 2, 1) if we're using the first provoking * vertex convention, and in order SVBI[0] + (1, 0, 2) if we're using * the last provoking vertex convention. * * Note: since brw_imm_v can only be used in instructions in * packed-word execution mode, and SVBI is a double-word, we need to * first move the appropriate immediate constant ((0, 1, 2), (0, 2, 1), * or (1, 0, 2)) to the destination_indices register, and then add SVBI * using a separate instruction. Also, since the immediate constant is * expressed as packed words, and we need to load double-words into * destination_indices, we need to intersperse zeros to fill the upper * halves of each double-word. */ brw_MOV(p, destination_indices_uw, brw_imm_v(0x00020100)); /* (0, 1, 2) */ if (num_verts == 3) { /* Get primitive type into temp register. */ brw_AND(p, get_element_ud(c->reg.temp, 0), get_element_ud(c->reg.R0, 2), brw_imm_ud(0x1f)); /* Test if primitive type is TRISTRIP_REVERSE. We need to do this as * an 8-wide comparison so that the conditional MOV that follows * moves all 8 words correctly. */ brw_CMP(p, vec8(brw_null_reg()), BRW_CONDITIONAL_EQ, get_element_ud(c->reg.temp, 0), brw_imm_ud(_3DPRIM_TRISTRIP_REVERSE)); /* If so, then overwrite destination_indices_uw with the appropriate * reordering. */ brw_MOV(p, destination_indices_uw, brw_imm_v(key->pv_first ? 0x00010200 /* (0, 2, 1) */ : 0x00020001)); /* (1, 0, 2) */ brw_set_predicate_control(p, BRW_PREDICATE_NONE); } brw_ADD(p, c->reg.destination_indices, c->reg.destination_indices, get_element_ud(c->reg.SVBI, 0)); /* For each vertex, generate code to output each varying using the * appropriate binding table entry. */ for (vertex = 0; vertex < num_verts; ++vertex) { /* Set up the correct destination index for this vertex */ brw_MOV(p, get_element_ud(c->reg.header, 5), get_element_ud(c->reg.destination_indices, vertex)); for (binding = 0; binding < key->num_transform_feedback_bindings; ++binding) { unsigned char varying = key->transform_feedback_bindings[binding]; unsigned char slot = c->vue_map.varying_to_slot[varying]; /* From the Sandybridge PRM, Volume 2, Part 1, Section 4.5.1: * * "Prior to End of Thread with a URB_WRITE, the kernel must * ensure that all writes are complete by sending the final * write as a committed write." */ bool final_write = binding == key->num_transform_feedback_bindings - 1 && vertex == num_verts - 1; struct brw_reg vertex_slot = c->reg.vertex[vertex]; vertex_slot.nr += slot / 2; vertex_slot.subnr = (slot % 2) * 16; /* gl_PointSize is stored in VARYING_SLOT_PSIZ.w. */ vertex_slot.dw1.bits.swizzle = varying == VARYING_SLOT_PSIZ ? BRW_SWIZZLE_WWWW : key->transform_feedback_swizzles[binding]; brw_set_access_mode(p, BRW_ALIGN_16); brw_MOV(p, stride(c->reg.header, 4, 4, 1), retype(vertex_slot, BRW_REGISTER_TYPE_UD)); brw_set_access_mode(p, BRW_ALIGN_1); brw_svb_write(p, final_write ? c->reg.temp : brw_null_reg(), /* dest */ 1, /* msg_reg_nr */ c->reg.header, /* src0 */ SURF_INDEX_SOL_BINDING(binding), /* binding_table_index */ final_write); /* send_commit_msg */ } } brw_ENDIF(p); /* Now, reinitialize the header register from R0 to restore the parts of * the register that we overwrote while streaming out transform feedback * data. */ brw_gs_initialize_header(c); /* Finally, wait for the write commit to occur so that we can proceed to * other things safely. * * From the Sandybridge PRM, Volume 4, Part 1, Section 3.3: * * The write commit does not modify the destination register, but * merely clears the dependency associated with the destination * register. Thus, a simple “mov” instruction using the register as a * source is sufficient to wait for the write commit to occur. */ brw_MOV(p, c->reg.temp, c->reg.temp); } brw_gs_ff_sync(c, 1); /* If RASTERIZER_DISCARD is enabled, we have nothing further to do, so * release the URB that was just allocated, and terminate the thread. */ if (key->rasterizer_discard) { brw_gs_terminate(c); return; } brw_gs_overwrite_header_dw2_from_r0(c); switch (num_verts) { case 1: brw_gs_offset_header_dw2(c, URB_WRITE_PRIM_START | URB_WRITE_PRIM_END); brw_gs_emit_vue(c, c->reg.vertex[0], true); break; case 2: brw_gs_offset_header_dw2(c, URB_WRITE_PRIM_START); brw_gs_emit_vue(c, c->reg.vertex[0], false); brw_gs_offset_header_dw2(c, URB_WRITE_PRIM_END - URB_WRITE_PRIM_START); brw_gs_emit_vue(c, c->reg.vertex[1], true); break; case 3: if (check_edge_flags) { /* Only emit vertices 0 and 1 if this is the first triangle of the * polygon. Otherwise they are redundant. */ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); brw_AND(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), get_element_ud(c->reg.R0, 2), brw_imm_ud(BRW_GS_EDGE_INDICATOR_0)); brw_IF(p, BRW_EXECUTE_1); } brw_gs_offset_header_dw2(c, URB_WRITE_PRIM_START); brw_gs_emit_vue(c, c->reg.vertex[0], false); brw_gs_offset_header_dw2(c, -URB_WRITE_PRIM_START); brw_gs_emit_vue(c, c->reg.vertex[1], false); if (check_edge_flags) { brw_ENDIF(p); /* Only emit vertex 2 in PRIM_END mode if this is the last triangle * of the polygon. Otherwise leave the primitive incomplete because * there are more polygon vertices coming. */ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); brw_AND(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), get_element_ud(c->reg.R0, 2), brw_imm_ud(BRW_GS_EDGE_INDICATOR_1)); brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); } brw_gs_offset_header_dw2(c, URB_WRITE_PRIM_END); brw_set_predicate_control(p, BRW_PREDICATE_NONE); brw_gs_emit_vue(c, c->reg.vertex[2], true); break; } }
/** * Overwrite DWORD 2 of c->reg.header with the given immediate unsigned value. * * In URB_WRITE messages, DWORD 2 contains the fields PrimType, PrimStart, * PrimEnd, Increment CL_INVOCATIONS, and SONumPrimsWritten, many of which we * need to be able to update on a per-vertex basis. */ static void brw_gs_overwrite_header_dw2(struct brw_gs_compile *c, unsigned dw2) { struct brw_compile *p = &c->func; brw_MOV(p, get_element_ud(c->reg.header, 2), brw_imm_ud(dw2)); }
/** * Set up the initial value of c->reg.header register based on c->reg.R0. * * The following information is passed to the GS thread in R0, and needs to be * included in the first URB_WRITE or FF_SYNC message sent by the GS: * * - DWORD 0 [31:0] handle info (Gen4 only) * - DWORD 5 [7:0] FFTID * - DWORD 6 [31:0] Debug info * - DWORD 7 [31:0] Debug info * * This function sets up the above data by copying by copying the contents of * R0 to the header register. */ static void brw_gs_initialize_header(struct brw_gs_compile *c) { struct brw_compile *p = &c->func; brw_MOV(p, c->reg.header, c->reg.R0); }
void brw_emit_point_sprite_setup(struct brw_sf_compile *c, bool allocate) { struct brw_compile *p = &c->func; GLuint i; c->flag_value = 0xff; c->nr_verts = 1; if (allocate) alloc_regs(c); copy_z_inv_w(c); for (i = 0; i < c->nr_setup_regs; i++) { struct brw_reg a0 = offset(c->vert[0], i); GLushort pc, pc_persp, pc_linear, pc_coord_replace; bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear); pc_coord_replace = calculate_point_sprite_mask(c, i); pc_persp &= ~pc_coord_replace; if (pc_persp) { set_predicate_control_flag_value(p, c, pc_persp); brw_MUL(p, a0, a0, c->inv_w[0]); } /* Point sprite coordinate replacement: A texcoord with this * enabled gets replaced with the value (x, y, 0, 1) where x and * y vary from 0 to 1 across the horizontal and vertical of the * point. */ if (pc_coord_replace) { set_predicate_control_flag_value(p, c, pc_coord_replace); /* Caculate 1.0/PointWidth */ gen4_math(&c->func, c->tmp, BRW_MATH_FUNCTION_INV, 0, c->dx0, BRW_MATH_DATA_SCALAR, BRW_MATH_PRECISION_FULL); brw_set_default_access_mode(p, BRW_ALIGN_16); /* dA/dx, dA/dy */ brw_MOV(p, c->m1Cx, brw_imm_f(0.0)); brw_MOV(p, c->m2Cy, brw_imm_f(0.0)); brw_MOV(p, brw_writemask(c->m1Cx, WRITEMASK_X), c->tmp); if (c->key.sprite_origin_lower_left) { brw_MOV(p, brw_writemask(c->m2Cy, WRITEMASK_Y), negate(c->tmp)); } else { brw_MOV(p, brw_writemask(c->m2Cy, WRITEMASK_Y), c->tmp); } /* attribute constant offset */ brw_MOV(p, c->m3C0, brw_imm_f(0.0)); if (c->key.sprite_origin_lower_left) { brw_MOV(p, brw_writemask(c->m3C0, WRITEMASK_YW), brw_imm_f(1.0)); } else { brw_MOV(p, brw_writemask(c->m3C0, WRITEMASK_W), brw_imm_f(1.0)); } brw_set_default_access_mode(p, BRW_ALIGN_1); } if (pc & ~pc_coord_replace) { set_predicate_control_flag_value(p, c, pc & ~pc_coord_replace); brw_MOV(p, c->m1Cx, brw_imm_ud(0)); brw_MOV(p, c->m2Cy, brw_imm_ud(0)); brw_MOV(p, c->m3C0, a0); /* constant value */ } set_predicate_control_flag_value(p, c, pc); /* Copy m0..m3 to URB. */ brw_urb_WRITE(p, brw_null_reg(), 0, brw_vec8_grf(0, 0), last ? BRW_URB_WRITE_EOT_COMPLETE : BRW_URB_WRITE_NO_FLAGS, 4, /* msg len */ 0, /* response len */ i*4, /* urb destination offset */ BRW_URB_SWIZZLE_TRANSPOSE); } brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); }
void brw_emit_line_setup(struct brw_sf_compile *c, bool allocate) { struct brw_compile *p = &c->func; GLuint i; c->flag_value = 0xff; c->nr_verts = 2; if (allocate) alloc_regs(c); invert_det(c); copy_z_inv_w(c); if (c->has_flat_shading) do_flatshade_line(c); for (i = 0; i < c->nr_setup_regs; i++) { /* Pair of incoming attributes: */ struct brw_reg a0 = offset(c->vert[0], i); struct brw_reg a1 = offset(c->vert[1], i); GLushort pc, pc_persp, pc_linear; bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear); if (pc_persp) { set_predicate_control_flag_value(p, c, pc_persp); brw_MUL(p, a0, a0, c->inv_w[0]); brw_MUL(p, a1, a1, c->inv_w[1]); } /* Calculate coefficients for position, color: */ if (pc_linear) { set_predicate_control_flag_value(p, c, pc_linear); brw_ADD(p, c->a1_sub_a0, a1, negate(a0)); brw_MUL(p, c->tmp, c->a1_sub_a0, c->dx0); brw_MUL(p, c->m1Cx, c->tmp, c->inv_det); brw_MUL(p, c->tmp, c->a1_sub_a0, c->dy0); brw_MUL(p, c->m2Cy, c->tmp, c->inv_det); } { set_predicate_control_flag_value(p, c, pc); /* start point for interpolation */ brw_MOV(p, c->m3C0, a0); /* Copy m0..m3 to URB. */ brw_urb_WRITE(p, brw_null_reg(), 0, brw_vec8_grf(0, 0), last ? BRW_URB_WRITE_EOT_COMPLETE : BRW_URB_WRITE_NO_FLAGS, 4, /* msg len */ 0, /* response len */ i*4, /* urb destination offset */ BRW_URB_SWIZZLE_TRANSPOSE); } } brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); }