static void brw_clip_test( struct brw_clip_compile *c ) { struct brw_reg t = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); struct brw_reg t1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); struct brw_reg t2 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); struct brw_reg t3 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); struct brw_reg v0 = get_tmp(c); struct brw_reg v1 = get_tmp(c); struct brw_reg v2 = get_tmp(c); struct brw_indirect vt0 = brw_indirect(0, 0); struct brw_indirect vt1 = brw_indirect(1, 0); struct brw_indirect vt2 = brw_indirect(2, 0); struct brw_compile *p = &c->func; struct brw_instruction *is_outside; struct brw_reg tmp0 = c->reg.loopcount; /* handy temporary */ brw_MOV(p, get_addr_reg(vt0), brw_address(c->reg.vertex[0])); brw_MOV(p, get_addr_reg(vt1), brw_address(c->reg.vertex[1])); brw_MOV(p, get_addr_reg(vt2), brw_address(c->reg.vertex[2])); brw_MOV(p, v0, deref_4f(vt0, c->offset[VERT_RESULT_HPOS])); brw_MOV(p, v1, deref_4f(vt1, c->offset[VERT_RESULT_HPOS])); brw_MOV(p, v2, deref_4f(vt2, c->offset[VERT_RESULT_HPOS])); brw_AND(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(~0x3f)); /* test nearz, xmin, ymin plane */ /* clip.xyz < -clip.w */ brw_CMP(p, t1, BRW_CONDITIONAL_L, v0, negate(get_element(v0, 3))); brw_set_predicate_control(p, BRW_PREDICATE_NONE); brw_CMP(p, t2, BRW_CONDITIONAL_L, v1, negate(get_element(v1, 3))); brw_set_predicate_control(p, BRW_PREDICATE_NONE); brw_CMP(p, t3, BRW_CONDITIONAL_L, v2, negate(get_element(v2, 3))); brw_set_predicate_control(p, BRW_PREDICATE_NONE); /* All vertices are outside of a plane, rejected */ brw_AND(p, t, t1, t2); brw_AND(p, t, t, t3); brw_OR(p, tmp0, get_element(t, 0), get_element(t, 1)); brw_OR(p, tmp0, tmp0, get_element(t, 2)); brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); brw_AND(p, brw_null_reg(), tmp0, brw_imm_ud(0x1)); is_outside = brw_IF(p, BRW_EXECUTE_1); { brw_clip_kill_thread(c); } brw_ENDIF(p, is_outside); brw_set_predicate_control(p, BRW_PREDICATE_NONE); /* some vertices are inside a plane, some are outside,need to clip */ brw_XOR(p, t, t1, t2); brw_XOR(p, t1, t2, t3); brw_OR(p, t, t, t1); brw_AND(p, t, t, brw_imm_ud(0x1)); brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, get_element(t, 0), brw_imm_ud(0)); brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<5))); brw_set_predicate_control(p, BRW_PREDICATE_NONE); brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, get_element(t, 1), brw_imm_ud(0)); brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<3))); brw_set_predicate_control(p, BRW_PREDICATE_NONE); brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, get_element(t, 2), brw_imm_ud(0)); brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<1))); brw_set_predicate_control(p, BRW_PREDICATE_NONE); /* test farz, xmax, ymax plane */ /* clip.xyz > clip.w */ brw_CMP(p, t1, BRW_CONDITIONAL_G, v0, get_element(v0, 3)); brw_set_predicate_control(p, BRW_PREDICATE_NONE); brw_CMP(p, t2, BRW_CONDITIONAL_G, v1, get_element(v1, 3)); brw_set_predicate_control(p, BRW_PREDICATE_NONE); brw_CMP(p, t3, BRW_CONDITIONAL_G, v2, get_element(v2, 3)); brw_set_predicate_control(p, BRW_PREDICATE_NONE); /* All vertices are outside of a plane, rejected */ brw_AND(p, t, t1, t2); brw_AND(p, t, t, t3); brw_OR(p, tmp0, get_element(t, 0), get_element(t, 1)); brw_OR(p, tmp0, tmp0, get_element(t, 2)); brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); brw_AND(p, brw_null_reg(), tmp0, brw_imm_ud(0x1)); is_outside = brw_IF(p, BRW_EXECUTE_1); { brw_clip_kill_thread(c); } brw_ENDIF(p, is_outside); brw_set_predicate_control(p, BRW_PREDICATE_NONE); /* some vertices are inside a plane, some are outside,need to clip */ brw_XOR(p, t, t1, t2); brw_XOR(p, t1, t2, t3); brw_OR(p, t, t, t1); brw_AND(p, t, t, brw_imm_ud(0x1)); brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, get_element(t, 0), brw_imm_ud(0)); brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<4))); brw_set_predicate_control(p, BRW_PREDICATE_NONE); brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, get_element(t, 1), brw_imm_ud(0)); brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<2))); brw_set_predicate_control(p, BRW_PREDICATE_NONE); brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, get_element(t, 2), brw_imm_ud(0)); brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<0))); brw_set_predicate_control(p, BRW_PREDICATE_NONE); release_tmps(c); }
void gen8_vec4_generator::generate_vec4_instruction(vec4_instruction *instruction, struct brw_reg dst, struct brw_reg *src) { vec4_instruction *ir = (vec4_instruction *) instruction; if (dst.width == BRW_WIDTH_4) { /* This happens in attribute fixups for "dual instanced" geometry * shaders, since they use attributes that are vec4's. Since the exec * width is only 4, it's essential that the caller set * force_writemask_all in order to make sure the instruction is executed * regardless of which channels are enabled. */ assert(ir->force_writemask_all); /* Fix up any <8;8,1> or <0;4,1> source registers to <4;4,1> to satisfy * the following register region restrictions (from Graphics BSpec: * 3D-Media-GPGPU Engine > EU Overview > Registers and Register Regions * > Register Region Restrictions) * * 1. ExecSize must be greater than or equal to Width. * * 2. If ExecSize = Width and HorzStride != 0, VertStride must be set * to Width * HorzStride." */ for (int i = 0; i < 3; i++) { if (src[i].file == BRW_GENERAL_REGISTER_FILE) src[i] = stride(src[i], 4, 4, 1); } } switch (ir->opcode) { case BRW_OPCODE_MOV: MOV(dst, src[0]); break; case BRW_OPCODE_ADD: ADD(dst, src[0], src[1]); break; case BRW_OPCODE_MUL: MUL(dst, src[0], src[1]); break; case BRW_OPCODE_MACH: MACH(dst, src[0], src[1]); break; case BRW_OPCODE_MAD: MAD(dst, src[0], src[1], src[2]); break; case BRW_OPCODE_FRC: FRC(dst, src[0]); break; case BRW_OPCODE_RNDD: RNDD(dst, src[0]); break; case BRW_OPCODE_RNDE: RNDE(dst, src[0]); break; case BRW_OPCODE_RNDZ: RNDZ(dst, src[0]); break; case BRW_OPCODE_AND: AND(dst, src[0], src[1]); break; case BRW_OPCODE_OR: OR(dst, src[0], src[1]); break; case BRW_OPCODE_XOR: XOR(dst, src[0], src[1]); break; case BRW_OPCODE_NOT: NOT(dst, src[0]); break; case BRW_OPCODE_ASR: ASR(dst, src[0], src[1]); break; case BRW_OPCODE_SHR: SHR(dst, src[0], src[1]); break; case BRW_OPCODE_SHL: SHL(dst, src[0], src[1]); break; case BRW_OPCODE_CMP: CMP(dst, ir->conditional_mod, src[0], src[1]); break; case BRW_OPCODE_SEL: SEL(dst, src[0], src[1]); break; case BRW_OPCODE_DPH: DPH(dst, src[0], src[1]); break; case BRW_OPCODE_DP4: DP4(dst, src[0], src[1]); break; case BRW_OPCODE_DP3: DP3(dst, src[0], src[1]); break; case BRW_OPCODE_DP2: DP2(dst, src[0], src[1]); break; case BRW_OPCODE_F32TO16: /* Emulate the Gen7 zeroing bug. */ MOV(retype(dst, BRW_REGISTER_TYPE_UD), brw_imm_ud(0u)); MOV(retype(dst, BRW_REGISTER_TYPE_HF), src[0]); break; case BRW_OPCODE_F16TO32: MOV(dst, retype(src[0], BRW_REGISTER_TYPE_HF)); break; case BRW_OPCODE_LRP: LRP(dst, src[0], src[1], src[2]); break; case BRW_OPCODE_BFREV: /* BFREV only supports UD type for src and dst. */ BFREV(retype(dst, BRW_REGISTER_TYPE_UD), retype(src[0], BRW_REGISTER_TYPE_UD)); break; case BRW_OPCODE_FBH: /* FBH only supports UD type for dst. */ FBH(retype(dst, BRW_REGISTER_TYPE_UD), src[0]); break; case BRW_OPCODE_FBL: /* FBL only supports UD type for dst. */ FBL(retype(dst, BRW_REGISTER_TYPE_UD), src[0]); break; case BRW_OPCODE_CBIT: /* CBIT only supports UD type for dst. */ CBIT(retype(dst, BRW_REGISTER_TYPE_UD), src[0]); break; case BRW_OPCODE_ADDC: ADDC(dst, src[0], src[1]); break; case BRW_OPCODE_SUBB: SUBB(dst, src[0], src[1]); break; case BRW_OPCODE_BFE: BFE(dst, src[0], src[1], src[2]); break; case BRW_OPCODE_BFI1: BFI1(dst, src[0], src[1]); break; case BRW_OPCODE_BFI2: BFI2(dst, src[0], src[1], src[2]); break; case BRW_OPCODE_IF: IF(ir->predicate); break; case BRW_OPCODE_ELSE: ELSE(); break; case BRW_OPCODE_ENDIF: ENDIF(); break; case BRW_OPCODE_DO: DO(); break; case BRW_OPCODE_BREAK: BREAK(); break; case BRW_OPCODE_CONTINUE: CONTINUE(); break; case BRW_OPCODE_WHILE: WHILE(); break; case SHADER_OPCODE_RCP: MATH(BRW_MATH_FUNCTION_INV, dst, src[0]); break; case SHADER_OPCODE_RSQ: MATH(BRW_MATH_FUNCTION_RSQ, dst, src[0]); break; case SHADER_OPCODE_SQRT: MATH(BRW_MATH_FUNCTION_SQRT, dst, src[0]); break; case SHADER_OPCODE_EXP2: MATH(BRW_MATH_FUNCTION_EXP, dst, src[0]); break; case SHADER_OPCODE_LOG2: MATH(BRW_MATH_FUNCTION_LOG, dst, src[0]); break; case SHADER_OPCODE_SIN: MATH(BRW_MATH_FUNCTION_SIN, dst, src[0]); break; case SHADER_OPCODE_COS: MATH(BRW_MATH_FUNCTION_COS, dst, src[0]); break; case SHADER_OPCODE_POW: MATH(BRW_MATH_FUNCTION_POW, dst, src[0], src[1]); break; case SHADER_OPCODE_INT_QUOTIENT: MATH(BRW_MATH_FUNCTION_INT_DIV_QUOTIENT, dst, src[0], src[1]); break; case SHADER_OPCODE_INT_REMAINDER: MATH(BRW_MATH_FUNCTION_INT_DIV_REMAINDER, dst, src[0], src[1]); break; case SHADER_OPCODE_TEX: case SHADER_OPCODE_TXD: case SHADER_OPCODE_TXF: case SHADER_OPCODE_TXF_CMS: case SHADER_OPCODE_TXF_MCS: case SHADER_OPCODE_TXL: case SHADER_OPCODE_TXS: case SHADER_OPCODE_TG4: case SHADER_OPCODE_TG4_OFFSET: generate_tex(ir, dst); break; case VS_OPCODE_URB_WRITE: generate_urb_write(ir, true); break; case SHADER_OPCODE_GEN4_SCRATCH_READ: generate_scratch_read(ir, dst, src[0]); break; case SHADER_OPCODE_GEN4_SCRATCH_WRITE: generate_scratch_write(ir, dst, src[0], src[1]); break; case VS_OPCODE_PULL_CONSTANT_LOAD: case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7: generate_pull_constant_load(ir, dst, src[0], src[1]); break; case GS_OPCODE_URB_WRITE: generate_urb_write(ir, false); break; case GS_OPCODE_THREAD_END: generate_gs_thread_end(ir); break; case GS_OPCODE_SET_WRITE_OFFSET: generate_gs_set_write_offset(dst, src[0], src[1]); break; case GS_OPCODE_SET_VERTEX_COUNT: generate_gs_set_vertex_count(dst, src[0]); break; case GS_OPCODE_SET_DWORD_2_IMMED: generate_gs_set_dword_2_immed(dst, src[0]); break; case GS_OPCODE_PREPARE_CHANNEL_MASKS: generate_gs_prepare_channel_masks(dst); break; case GS_OPCODE_SET_CHANNEL_MASKS: generate_gs_set_channel_masks(dst, src[0]); break; case SHADER_OPCODE_SHADER_TIME_ADD: assert(!"XXX: Missing Gen8 vec4 support for INTEL_DEBUG=shader_time"); break; case SHADER_OPCODE_UNTYPED_ATOMIC: assert(!"XXX: Missing Gen8 vec4 support for UNTYPED_ATOMIC"); break; case SHADER_OPCODE_UNTYPED_SURFACE_READ: assert(!"XXX: Missing Gen8 vec4 support for UNTYPED_SURFACE_READ"); break; case VS_OPCODE_UNPACK_FLAGS_SIMD4X2: assert(!"VS_OPCODE_UNPACK_FLAGS_SIMD4X2 should not be used on Gen8+."); break; default: if (ir->opcode < (int) ARRAY_SIZE(opcode_descs)) { _mesa_problem(ctx, "Unsupported opcode in `%s' in VS\n", opcode_descs[ir->opcode].name); } else { _mesa_problem(ctx, "Unsupported opcode %d in VS", ir->opcode); } abort(); } }
/* Post-fragment-program processing. Send the results to the * framebuffer. */ static void emit_fb_write( struct brw_wm_compile *c, struct brw_reg *arg0, struct brw_reg *arg1, struct brw_reg *arg2, GLuint target, GLuint eot) { struct brw_compile *p = &c->func; GLuint nr = 2; GLuint channel; /* Reserve a space for AA - may not be needed: */ if (c->key.aa_dest_stencil_reg) nr += 1; /* I don't really understand how this achieves the color interleave * (ie RGBARGBA) in the result: [Do the saturation here] */ { brw_push_insn_state(p); for (channel = 0; channel < 4; channel++) { /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */ /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */ brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_MOV(p, brw_message_reg(nr + channel), arg0[channel]); brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); brw_MOV(p, brw_message_reg(nr + channel + 4), sechalf(arg0[channel])); } /* skip over the regs populated above: */ nr += 8; brw_pop_insn_state(p); } if (c->key.source_depth_to_render_target) { if (c->key.computes_depth) brw_MOV(p, brw_message_reg(nr), arg2[2]); else brw_MOV(p, brw_message_reg(nr), arg1[1]); /* ? */ nr += 2; } if (c->key.dest_depth_reg) { GLuint comp = c->key.dest_depth_reg / 2; GLuint off = c->key.dest_depth_reg % 2; if (off != 0) { brw_push_insn_state(p); brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_MOV(p, brw_message_reg(nr), offset(arg1[comp],1)); /* 2nd half? */ brw_MOV(p, brw_message_reg(nr+1), arg1[comp+1]); brw_pop_insn_state(p); } else { brw_MOV(p, brw_message_reg(nr), arg1[comp]); } nr += 2; } if (!c->key.runtime_check_aads_emit) { if (c->key.aa_dest_stencil_reg) emit_aa(c, arg1, 2); fire_fb_write(c, 0, nr, target, eot); } else { struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD)); struct brw_reg ip = brw_ip_reg(); struct brw_instruction *jmp; brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_set_conditionalmod(p, BRW_CONDITIONAL_Z); brw_AND(p, v1_null_ud, get_element_ud(brw_vec8_grf(1,0), 6), brw_imm_ud(1<<26)); jmp = brw_JMPI(p, ip, ip, brw_imm_w(0)); { emit_aa(c, arg1, 2); fire_fb_write(c, 0, nr, target, eot); /* note - thread killed in subroutine */ } brw_land_fwd_jump(p, jmp); /* ELSE: Shuffle up one register to fill in the hole left for AA: */ fire_fb_write(c, 1, nr-1, target, eot); } }
/* Use mesa's clipping algorithms, translated to GEN4 assembly. */ void brw_clip_tri( struct brw_clip_compile *c ) { struct brw_compile *p = &c->func; struct brw_indirect vtx = brw_indirect(0, 0); struct brw_indirect vtxPrev = brw_indirect(1, 0); struct brw_indirect vtxOut = brw_indirect(2, 0); struct brw_indirect plane_ptr = brw_indirect(3, 0); struct brw_indirect inlist_ptr = brw_indirect(4, 0); struct brw_indirect outlist_ptr = brw_indirect(5, 0); struct brw_indirect freelist_ptr = brw_indirect(6, 0); struct brw_instruction *plane_loop; struct brw_instruction *plane_active; struct brw_instruction *vertex_loop; struct brw_instruction *next_test; struct brw_instruction *prev_test; brw_MOV(p, get_addr_reg(vtxPrev), brw_address(c->reg.vertex[2]) ); brw_MOV(p, get_addr_reg(plane_ptr), brw_clip_plane0_address(c)); brw_MOV(p, get_addr_reg(inlist_ptr), brw_address(c->reg.inlist)); brw_MOV(p, get_addr_reg(outlist_ptr), brw_address(c->reg.outlist)); brw_MOV(p, get_addr_reg(freelist_ptr), brw_address(c->reg.vertex[3]) ); plane_loop = brw_DO(p, BRW_EXECUTE_1); { /* if (planemask & 1) */ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); brw_AND(p, vec1(brw_null_reg()), c->reg.planemask, brw_imm_ud(1)); plane_active = brw_IF(p, BRW_EXECUTE_1); { /* vtxOut = freelist_ptr++ */ brw_MOV(p, get_addr_reg(vtxOut), get_addr_reg(freelist_ptr) ); brw_ADD(p, get_addr_reg(freelist_ptr), get_addr_reg(freelist_ptr), brw_imm_uw(c->nr_regs * REG_SIZE)); if (c->key.nr_userclip) brw_MOV(p, c->reg.plane_equation, deref_4f(plane_ptr, 0)); else brw_MOV(p, c->reg.plane_equation, deref_4b(plane_ptr, 0)); brw_MOV(p, c->reg.loopcount, c->reg.nr_verts); brw_MOV(p, c->reg.nr_verts, brw_imm_ud(0)); vertex_loop = brw_DO(p, BRW_EXECUTE_1); { /* vtx = *input_ptr; */ brw_MOV(p, get_addr_reg(vtx), deref_1uw(inlist_ptr, 0)); /* IS_NEGATIVE(prev) */ brw_set_conditionalmod(p, BRW_CONDITIONAL_L); brw_DP4(p, vec4(c->reg.dpPrev), deref_4f(vtxPrev, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation); prev_test = brw_IF(p, BRW_EXECUTE_1); { /* IS_POSITIVE(next) */ brw_set_conditionalmod(p, BRW_CONDITIONAL_GE); brw_DP4(p, vec4(c->reg.dp), deref_4f(vtx, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation); next_test = brw_IF(p, BRW_EXECUTE_1); { /* Coming back in. */ brw_ADD(p, c->reg.t, c->reg.dpPrev, negate(c->reg.dp)); brw_math_invert(p, c->reg.t, c->reg.t); brw_MUL(p, c->reg.t, c->reg.t, c->reg.dpPrev); /* If (vtxOut == 0) vtxOut = vtxPrev */ brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_EQ, get_addr_reg(vtxOut), brw_imm_uw(0) ); brw_MOV(p, get_addr_reg(vtxOut), get_addr_reg(vtxPrev) ); brw_set_predicate_control(p, BRW_PREDICATE_NONE); brw_clip_interp_vertex(c, vtxOut, vtxPrev, vtx, c->reg.t, GL_FALSE); /* *outlist_ptr++ = vtxOut; * nr_verts++; * vtxOut = 0; */ brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxOut)); brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short))); brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1)); brw_MOV(p, get_addr_reg(vtxOut), brw_imm_uw(0) ); } brw_ENDIF(p, next_test); } prev_test = brw_ELSE(p, prev_test); { /* *outlist_ptr++ = vtxPrev; * nr_verts++; */ brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxPrev)); brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short))); brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1)); /* IS_NEGATIVE(next) */ brw_set_conditionalmod(p, BRW_CONDITIONAL_L); brw_DP4(p, vec4(c->reg.dp), deref_4f(vtx, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation); next_test = brw_IF(p, BRW_EXECUTE_1); { /* Going out of bounds. Avoid division by zero as we * know dp != dpPrev from DIFFERENT_SIGNS, above. */ brw_ADD(p, c->reg.t, c->reg.dp, negate(c->reg.dpPrev)); brw_math_invert(p, c->reg.t, c->reg.t); brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp); /* If (vtxOut == 0) vtxOut = vtx */ brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_EQ, get_addr_reg(vtxOut), brw_imm_uw(0) ); brw_MOV(p, get_addr_reg(vtxOut), get_addr_reg(vtx) ); brw_set_predicate_control(p, BRW_PREDICATE_NONE); brw_clip_interp_vertex(c, vtxOut, vtx, vtxPrev, c->reg.t, GL_TRUE); /* *outlist_ptr++ = vtxOut; * nr_verts++; * vtxOut = 0; */ brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxOut)); brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short))); brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1)); brw_MOV(p, get_addr_reg(vtxOut), brw_imm_uw(0) ); } brw_ENDIF(p, next_test); } brw_ENDIF(p, prev_test); /* vtxPrev = vtx; * inlist_ptr++; */ brw_MOV(p, get_addr_reg(vtxPrev), get_addr_reg(vtx)); brw_ADD(p, get_addr_reg(inlist_ptr), get_addr_reg(inlist_ptr), brw_imm_uw(sizeof(short))); /* while (--loopcount != 0) */ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1)); } brw_WHILE(p, vertex_loop); /* vtxPrev = *(outlist_ptr-1) OR: outlist[nr_verts-1] * inlist = outlist * inlist_ptr = &inlist[0] * outlist_ptr = &outlist[0] */ brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_w(-2)); brw_MOV(p, get_addr_reg(vtxPrev), deref_1uw(outlist_ptr, 0)); brw_MOV(p, brw_vec8_grf(c->reg.inlist.nr, 0), brw_vec8_grf(c->reg.outlist.nr, 0)); brw_MOV(p, get_addr_reg(inlist_ptr), brw_address(c->reg.inlist)); brw_MOV(p, get_addr_reg(outlist_ptr), brw_address(c->reg.outlist)); } brw_ENDIF(p, plane_active); /* plane_ptr++; */ brw_ADD(p, get_addr_reg(plane_ptr), get_addr_reg(plane_ptr), brw_clip_plane_stride(c)); /* nr_verts >= 3 */ brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_GE, c->reg.nr_verts, brw_imm_ud(3)); /* && (planemask>>=1) != 0 */ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); brw_SHR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(1)); } brw_WHILE(p, plane_loop); }
void brw_emit_point_sprite_setup(struct brw_sf_compile *c, bool allocate) { struct brw_compile *p = &c->func; GLuint i; c->nr_verts = 1; if (allocate) alloc_regs(c); copy_z_inv_w(c); for (i = 0; i < c->nr_setup_regs; i++) { struct brw_reg a0 = offset(c->vert[0], i); GLushort pc, pc_persp, pc_linear, pc_coord_replace; bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear); pc_coord_replace = calculate_point_sprite_mask(c, i); pc_persp &= ~pc_coord_replace; if (pc_persp) { brw_set_predicate_control_flag_value(p, pc_persp); brw_MUL(p, a0, a0, c->inv_w[0]); } /* Point sprite coordinate replacement: A texcoord with this * enabled gets replaced with the value (x, y, 0, 1) where x and * y vary from 0 to 1 across the horizontal and vertical of the * point. */ if (pc_coord_replace) { brw_set_predicate_control_flag_value(p, pc_coord_replace); /* Caculate 1.0/PointWidth */ brw_math(&c->func, c->tmp, BRW_MATH_FUNCTION_INV, BRW_MATH_SATURATE_NONE, 0, c->dx0, BRW_MATH_DATA_SCALAR, BRW_MATH_PRECISION_FULL); brw_set_access_mode(p, BRW_ALIGN_16); /* dA/dx, dA/dy */ brw_MOV(p, c->m1Cx, brw_imm_f(0.0)); brw_MOV(p, c->m2Cy, brw_imm_f(0.0)); brw_MOV(p, brw_writemask(c->m1Cx, WRITEMASK_X), c->tmp); if (c->key.sprite_origin_lower_left) { brw_MOV(p, brw_writemask(c->m2Cy, WRITEMASK_Y), negate(c->tmp)); } else { brw_MOV(p, brw_writemask(c->m2Cy, WRITEMASK_Y), c->tmp); } /* attribute constant offset */ brw_MOV(p, c->m3C0, brw_imm_f(0.0)); if (c->key.sprite_origin_lower_left) { brw_MOV(p, brw_writemask(c->m3C0, WRITEMASK_YW), brw_imm_f(1.0)); } else { brw_MOV(p, brw_writemask(c->m3C0, WRITEMASK_W), brw_imm_f(1.0)); } brw_set_access_mode(p, BRW_ALIGN_1); } if (pc & ~pc_coord_replace) { brw_set_predicate_control_flag_value(p, pc & ~pc_coord_replace); brw_MOV(p, c->m1Cx, brw_imm_ud(0)); brw_MOV(p, c->m2Cy, brw_imm_ud(0)); brw_MOV(p, c->m3C0, a0); /* constant value */ } brw_set_predicate_control_flag_value(p, pc); /* Copy m0..m3 to URB. */ brw_urb_WRITE(p, brw_null_reg(), 0, brw_vec8_grf(0, 0), 0, /* allocate */ 1, /* used */ 4, /* msg len */ 0, /* response len */ last, /* eot */ last, /* writes complete */ i*4, /* urb destination offset */ BRW_URB_SWIZZLE_TRANSPOSE); } }
void brw_emit_anyprim_setup( struct brw_sf_compile *c ) { struct brw_compile *p = &c->func; struct brw_reg ip = brw_ip_reg(); struct brw_reg payload_prim = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0); struct brw_reg payload_attr = get_element_ud(brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0), 0); struct brw_reg primmask; int jmp; struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD)); GLuint saveflag; c->nr_verts = 3; alloc_regs(c); primmask = retype(get_element(c->tmp, 0), BRW_REGISTER_TYPE_UD); brw_MOV(p, primmask, brw_imm_ud(1)); brw_SHL(p, primmask, primmask, payload_prim); brw_set_conditionalmod(p, BRW_CONDITIONAL_Z); brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_TRILIST) | (1<<_3DPRIM_TRISTRIP) | (1<<_3DPRIM_TRIFAN) | (1<<_3DPRIM_TRISTRIP_REVERSE) | (1<<_3DPRIM_POLYGON) | (1<<_3DPRIM_RECTLIST) | (1<<_3DPRIM_TRIFAN_NOSTIPPLE))); jmp = brw_JMPI(p, ip, ip, brw_imm_d(0)) - p->store; { saveflag = p->flag_value; brw_push_insn_state(p); brw_emit_tri_setup( c, false ); brw_pop_insn_state(p); p->flag_value = saveflag; /* note - thread killed in subroutine, so must * restore the flag which is changed when building * the subroutine. fix #13240 */ } brw_land_fwd_jump(p, jmp); brw_set_conditionalmod(p, BRW_CONDITIONAL_Z); brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_LINELIST) | (1<<_3DPRIM_LINESTRIP) | (1<<_3DPRIM_LINELOOP) | (1<<_3DPRIM_LINESTRIP_CONT) | (1<<_3DPRIM_LINESTRIP_BF) | (1<<_3DPRIM_LINESTRIP_CONT_BF))); jmp = brw_JMPI(p, ip, ip, brw_imm_d(0)) - p->store; { saveflag = p->flag_value; brw_push_insn_state(p); brw_emit_line_setup( c, false ); brw_pop_insn_state(p); p->flag_value = saveflag; /* note - thread killed in subroutine */ } brw_land_fwd_jump(p, jmp); brw_set_conditionalmod(p, BRW_CONDITIONAL_Z); brw_AND(p, v1_null_ud, payload_attr, brw_imm_ud(1<<BRW_SPRITE_POINT_ENABLE)); jmp = brw_JMPI(p, ip, ip, brw_imm_d(0)) - p->store; { saveflag = p->flag_value; brw_push_insn_state(p); brw_emit_point_sprite_setup( c, false ); brw_pop_insn_state(p); p->flag_value = saveflag; } brw_land_fwd_jump(p, jmp); brw_emit_point_setup( c, false ); }
void brw_emit_unfilled_clip( struct brw_clip_compile *c ) { struct brw_compile *p = &c->func; struct brw_instruction *do_clip; c->need_direction = ((c->key.offset_ccw || c->key.offset_cw) || (c->key.fill_ccw != c->key.fill_cw) || c->key.fill_ccw == CLIP_CULL || c->key.fill_cw == CLIP_CULL || c->key.copy_bfc_cw || c->key.copy_bfc_ccw); brw_clip_tri_alloc_regs(c, 3 + c->key.nr_userclip + 6); brw_clip_tri_init_vertices(c); assert(c->offset[VERT_RESULT_EDGE]); if (c->key.fill_ccw == CLIP_CULL && c->key.fill_cw == CLIP_CULL) { brw_clip_kill_thread(c); return; } merge_edgeflags(c); /* Need to use the inlist indirection here: */ if (c->need_direction) compute_tri_direction(c); if (c->key.fill_ccw == CLIP_CULL || c->key.fill_cw == CLIP_CULL) cull_direction(c); if (c->key.offset_ccw || c->key.offset_cw) compute_offset(c); if (c->key.copy_bfc_ccw || c->key.copy_bfc_cw) copy_bfc(c); /* Need to do this whether we clip or not: */ if (c->key.do_flat_shading) brw_clip_tri_flat_shade(c); brw_clip_init_clipmask(c); brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, c->reg.planemask, brw_imm_ud(0)); do_clip = brw_IF(p, BRW_EXECUTE_1); { brw_clip_init_planes(c); brw_clip_tri(c); check_nr_verts(c); } brw_ENDIF(p, do_clip); emit_unfilled_primitives(c); brw_clip_kill_thread(c); }
void vec4_gs_visitor::gs_emit_vertex(int stream_id) { this->current_annotation = "emit vertex: safety check"; /* Haswell and later hardware ignores the "Render Stream Select" bits * from the 3DSTATE_STREAMOUT packet when the SOL stage is disabled, * and instead sends all primitives down the pipeline for rasterization. * If the SOL stage is enabled, "Render Stream Select" is honored and * primitives bound to non-zero streams are discarded after stream output. * * Since the only purpose of primives sent to non-zero streams is to * be recorded by transform feedback, we can simply discard all geometry * bound to these streams when transform feedback is disabled. */ if (stream_id > 0 && !nir->info.has_transform_feedback_varyings) return; /* If we're outputting 32 control data bits or less, then we can wait * until the shader is over to output them all. Otherwise we need to * output them as we go. Now is the time to do it, since we're about to * output the vertex_count'th vertex, so it's guaranteed that the * control data bits associated with the (vertex_count - 1)th vertex are * correct. */ if (c->control_data_header_size_bits > 32) { this->current_annotation = "emit vertex: emit control data bits"; /* Only emit control data bits if we've finished accumulating a batch * of 32 bits. This is the case when: * * (vertex_count * bits_per_vertex) % 32 == 0 * * (in other words, when the last 5 bits of vertex_count * * bits_per_vertex are 0). Assuming bits_per_vertex == 2^n for some * integer n (which is always the case, since bits_per_vertex is * always 1 or 2), this is equivalent to requiring that the last 5-n * bits of vertex_count are 0: * * vertex_count & (2^(5-n) - 1) == 0 * * 2^(5-n) == 2^5 / 2^n == 32 / bits_per_vertex, so this is * equivalent to: * * vertex_count & (32 / bits_per_vertex - 1) == 0 */ vec4_instruction *inst = emit(AND(dst_null_ud(), this->vertex_count, brw_imm_ud(32 / c->control_data_bits_per_vertex - 1))); inst->conditional_mod = BRW_CONDITIONAL_Z; emit(IF(BRW_PREDICATE_NORMAL)); { /* If vertex_count is 0, then no control data bits have been * accumulated yet, so we skip emitting them. */ emit(CMP(dst_null_ud(), this->vertex_count, brw_imm_ud(0u), BRW_CONDITIONAL_NEQ)); emit(IF(BRW_PREDICATE_NORMAL)); emit_control_data_bits(); emit(BRW_OPCODE_ENDIF); /* Reset control_data_bits to 0 so we can start accumulating a new * batch. * * Note: in the case where vertex_count == 0, this neutralizes the * effect of any call to EndPrimitive() that the shader may have * made before outputting its first vertex. */ inst = emit(MOV(dst_reg(this->control_data_bits), brw_imm_ud(0u))); inst->force_writemask_all = true; } emit(BRW_OPCODE_ENDIF); } this->current_annotation = "emit vertex: vertex data"; emit_vertex(); /* In stream mode we have to set control data bits for all vertices * unless we have disabled control data bits completely (which we do * do for GL_POINTS outputs that don't use streams). */ if (c->control_data_header_size_bits > 0 && gs_prog_data->control_data_format == GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID) { this->current_annotation = "emit vertex: Stream control data bits"; set_stream_control_data_bits(stream_id); } this->current_annotation = NULL; }
/** * Write out a batch of 32 control data bits from the control_data_bits * register to the URB. * * The current value of the vertex_count register determines which DWORD in * the URB receives the control data bits. The control_data_bits register is * assumed to contain the correct data for the vertex that was most recently * output, and all previous vertices that share the same DWORD. * * This function takes care of ensuring that if no vertices have been output * yet, no control bits are emitted. */ void vec4_gs_visitor::emit_control_data_bits() { assert(c->control_data_bits_per_vertex != 0); /* Since the URB_WRITE_OWORD message operates with 128-bit (vec4 sized) * granularity, we need to use two tricks to ensure that the batch of 32 * control data bits is written to the appropriate DWORD in the URB. To * select which vec4 we are writing to, we use the "slot {0,1} offset" * fields of the message header. To select which DWORD in the vec4 we are * writing to, we use the channel mask fields of the message header. To * avoid penalizing geometry shaders that emit a small number of vertices * with extra bookkeeping, we only do each of these tricks when * c->prog_data.control_data_header_size_bits is large enough to make it * necessary. * * Note: this means that if we're outputting just a single DWORD of control * data bits, we'll actually replicate it four times since we won't do any * channel masking. But that's not a problem since in this case the * hardware only pays attention to the first DWORD. */ enum brw_urb_write_flags urb_write_flags = BRW_URB_WRITE_OWORD; if (c->control_data_header_size_bits > 32) urb_write_flags = urb_write_flags | BRW_URB_WRITE_USE_CHANNEL_MASKS; if (c->control_data_header_size_bits > 128) urb_write_flags = urb_write_flags | BRW_URB_WRITE_PER_SLOT_OFFSET; /* If we are using either channel masks or a per-slot offset, then we * need to figure out which DWORD we are trying to write to, using the * formula: * * dword_index = (vertex_count - 1) * bits_per_vertex / 32 * * Since bits_per_vertex is a power of two, and is known at compile * time, this can be optimized to: * * dword_index = (vertex_count - 1) >> (6 - log2(bits_per_vertex)) */ src_reg dword_index(this, glsl_type::uint_type); if (urb_write_flags) { src_reg prev_count(this, glsl_type::uint_type); emit(ADD(dst_reg(prev_count), this->vertex_count, brw_imm_ud(0xffffffffu))); unsigned log2_bits_per_vertex = _mesa_fls(c->control_data_bits_per_vertex); emit(SHR(dst_reg(dword_index), prev_count, brw_imm_ud(6 - log2_bits_per_vertex))); } /* Start building the URB write message. The first MRF gets a copy of * R0. */ int base_mrf = 1; dst_reg mrf_reg(MRF, base_mrf); src_reg r0(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); vec4_instruction *inst = emit(MOV(mrf_reg, r0)); inst->force_writemask_all = true; if (urb_write_flags & BRW_URB_WRITE_PER_SLOT_OFFSET) { /* Set the per-slot offset to dword_index / 4, to that we'll write to * the appropriate OWORD within the control data header. */ src_reg per_slot_offset(this, glsl_type::uint_type); emit(SHR(dst_reg(per_slot_offset), dword_index, brw_imm_ud(2u))); emit(GS_OPCODE_SET_WRITE_OFFSET, mrf_reg, per_slot_offset, brw_imm_ud(1u)); } if (urb_write_flags & BRW_URB_WRITE_USE_CHANNEL_MASKS) { /* Set the channel masks to 1 << (dword_index % 4), so that we'll * write to the appropriate DWORD within the OWORD. We need to do * this computation with force_writemask_all, otherwise garbage data * from invocation 0 might clobber the mask for invocation 1 when * GS_OPCODE_PREPARE_CHANNEL_MASKS tries to OR the two masks * together. */ src_reg channel(this, glsl_type::uint_type); inst = emit(AND(dst_reg(channel), dword_index, brw_imm_ud(3u))); inst->force_writemask_all = true; src_reg one(this, glsl_type::uint_type); inst = emit(MOV(dst_reg(one), brw_imm_ud(1u))); inst->force_writemask_all = true; src_reg channel_mask(this, glsl_type::uint_type); inst = emit(SHL(dst_reg(channel_mask), one, channel)); inst->force_writemask_all = true; emit(GS_OPCODE_PREPARE_CHANNEL_MASKS, dst_reg(channel_mask), channel_mask); emit(GS_OPCODE_SET_CHANNEL_MASKS, mrf_reg, channel_mask); } /* Store the control data bits in the message payload and send it. */ dst_reg mrf_reg2(MRF, base_mrf + 1); inst = emit(MOV(mrf_reg2, this->control_data_bits)); inst->force_writemask_all = true; inst = emit(GS_OPCODE_URB_WRITE); inst->urb_write_flags = urb_write_flags; /* We need to increment Global Offset by 256-bits to make room for * Broadwell's extra "Vertex Count" payload at the beginning of the * URB entry. Since this is an OWord message, Global Offset is counted * in 128-bit units, so we must set it to 2. */ if (devinfo->gen >= 8 && gs_prog_data->static_vertex_count == -1) inst->offset = 2; inst->base_mrf = base_mrf; inst->mlen = 2; }
void brw_SAMPLE(struct brw_compile *p, struct brw_reg dest, GLuint msg_reg_nr, struct brw_reg src0, GLuint binding_table_index, GLuint sampler, GLuint writemask, GLuint msg_type, GLuint response_length, GLuint msg_length, GLboolean eot) { GLboolean need_stall = 0; if(writemask == 0) { /* _mesa_printf("%s: zero writemask??\n", __FUNCTION__); */ return; } /* Hardware doesn't do destination dependency checking on send * instructions properly. Add a workaround which generates the * dependency by other means. In practice it seems like this bug * only crops up for texture samples, and only where registers are * written by the send and then written again later without being * read in between. Luckily for us, we already track that * information and use it to modify the writemask for the * instruction, so that is a guide for whether a workaround is * needed. */ if (writemask != WRITEMASK_XYZW) { GLuint dst_offset = 0; GLuint i, newmask = 0, len = 0; for (i = 0; i < 4; i++) { if (writemask & (1<<i)) break; dst_offset += 2; } for (; i < 4; i++) { if (!(writemask & (1<<i))) break; newmask |= 1<<i; len++; } if (newmask != writemask) { need_stall = 1; /* _mesa_printf("need stall %x %x\n", newmask , writemask); */ } else { struct brw_reg m1 = brw_message_reg(msg_reg_nr); newmask = ~newmask & WRITEMASK_XYZW; brw_push_insn_state(p); brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_set_mask_control(p, BRW_MASK_DISABLE); brw_MOV(p, m1, brw_vec8_grf(0,0)); brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12)); brw_pop_insn_state(p); src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW); dest = offset(dest, dst_offset); response_length = len * 2; } } { struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); insn->header.predicate_control = 0; /* XXX */ insn->header.compression_control = BRW_COMPRESSION_NONE; insn->header.destreg__conditonalmod = msg_reg_nr; brw_set_dest(insn, dest); brw_set_src0(insn, src0); brw_set_sampler_message(p->brw, insn, binding_table_index, sampler, msg_type, response_length, msg_length, eot); } if (need_stall) { struct brw_reg reg = vec8(offset(dest, response_length-1)); /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 } */ brw_push_insn_state(p); brw_set_compression_control(p, GL_FALSE); brw_MOV(p, reg, reg); brw_pop_insn_state(p); } }
void vec4_tcs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) { switch (instr->intrinsic) { case nir_intrinsic_load_invocation_id: emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_UD), invocation_id)); break; case nir_intrinsic_load_primitive_id: emit(TCS_OPCODE_GET_PRIMITIVE_ID, get_nir_dest(instr->dest, BRW_REGISTER_TYPE_UD)); break; case nir_intrinsic_load_patch_vertices_in: emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D), brw_imm_d(key->input_vertices))); break; case nir_intrinsic_load_per_vertex_input: { src_reg indirect_offset = get_indirect_offset(instr); unsigned imm_offset = instr->const_index[0]; nir_const_value *vertex_const = nir_src_as_const_value(instr->src[0]); src_reg vertex_index = vertex_const ? src_reg(brw_imm_ud(vertex_const->u32[0])) : get_nir_src(instr->src[0], BRW_REGISTER_TYPE_UD, 1); dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D); dst.writemask = brw_writemask_for_size(instr->num_components); emit_input_urb_read(dst, vertex_index, imm_offset, indirect_offset); break; } case nir_intrinsic_load_input: unreachable("nir_lower_io should use load_per_vertex_input intrinsics"); break; case nir_intrinsic_load_output: case nir_intrinsic_load_per_vertex_output: { src_reg indirect_offset = get_indirect_offset(instr); unsigned imm_offset = instr->const_index[0];; dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D); dst.writemask = brw_writemask_for_size(instr->num_components); if (imm_offset == 0 && indirect_offset.file == BAD_FILE) { dst.type = BRW_REGISTER_TYPE_F; /* This is a read of gl_TessLevelInner[], which lives in the * Patch URB header. The layout depends on the domain. */ switch (key->tes_primitive_mode) { case GL_QUADS: { /* DWords 3-2 (reversed); use offset 0 and WZYX swizzle. */ dst_reg tmp(this, glsl_type::vec4_type); emit_output_urb_read(tmp, 0, src_reg()); emit(MOV(writemask(dst, WRITEMASK_XY), swizzle(src_reg(tmp), BRW_SWIZZLE_WZYX))); break; } case GL_TRIANGLES: /* DWord 4; use offset 1 but normal swizzle/writemask. */ emit_output_urb_read(writemask(dst, WRITEMASK_X), 1, src_reg()); break; case GL_ISOLINES: /* All channels are undefined. */ return; default: unreachable("Bogus tessellation domain"); } } else if (imm_offset == 1 && indirect_offset.file == BAD_FILE) { dst.type = BRW_REGISTER_TYPE_F; unsigned swiz = BRW_SWIZZLE_WZYX; /* This is a read of gl_TessLevelOuter[], which lives in the * high 4 DWords of the Patch URB header, in reverse order. */ switch (key->tes_primitive_mode) { case GL_QUADS: dst.writemask = WRITEMASK_XYZW; break; case GL_TRIANGLES: dst.writemask = WRITEMASK_XYZ; break; case GL_ISOLINES: /* Isolines are not reversed; swizzle .zw -> .xy */ swiz = BRW_SWIZZLE_ZWZW; dst.writemask = WRITEMASK_XY; return; default: unreachable("Bogus tessellation domain"); } dst_reg tmp(this, glsl_type::vec4_type); emit_output_urb_read(tmp, 1, src_reg()); emit(MOV(dst, swizzle(src_reg(tmp), swiz))); } else { emit_output_urb_read(dst, imm_offset, indirect_offset); } break; } case nir_intrinsic_store_output: case nir_intrinsic_store_per_vertex_output: { src_reg value = get_nir_src(instr->src[0]); unsigned mask = instr->const_index[1]; unsigned swiz = BRW_SWIZZLE_XYZW; src_reg indirect_offset = get_indirect_offset(instr); unsigned imm_offset = instr->const_index[0]; if (imm_offset == 0 && indirect_offset.file == BAD_FILE) { value.type = BRW_REGISTER_TYPE_F; mask &= (1 << tesslevel_inner_components(key->tes_primitive_mode)) - 1; /* This is a write to gl_TessLevelInner[], which lives in the * Patch URB header. The layout depends on the domain. */ switch (key->tes_primitive_mode) { case GL_QUADS: /* gl_TessLevelInner[].xy lives at DWords 3-2 (reversed). * We use an XXYX swizzle to reverse put .xy in the .wz * channels, and use a .zw writemask. */ swiz = BRW_SWIZZLE4(0, 0, 1, 0); mask = writemask_for_backwards_vector(mask); break; case GL_TRIANGLES: /* gl_TessLevelInner[].x lives at DWord 4, so we set the * writemask to X and bump the URB offset by 1. */ imm_offset = 1; break; case GL_ISOLINES: /* Skip; gl_TessLevelInner[] doesn't exist for isolines. */ return; default: unreachable("Bogus tessellation domain"); } } else if (imm_offset == 1 && indirect_offset.file == BAD_FILE) { value.type = BRW_REGISTER_TYPE_F; mask &= (1 << tesslevel_outer_components(key->tes_primitive_mode)) - 1; /* This is a write to gl_TessLevelOuter[] which lives in the * Patch URB Header at DWords 4-7. However, it's reversed, so * instead of .xyzw we have .wzyx. */ if (key->tes_primitive_mode == GL_ISOLINES) { /* Isolines .xy should be stored in .zw, in order. */ swiz = BRW_SWIZZLE4(0, 0, 0, 1); mask <<= 2; } else { /* Other domains are reversed; store .wzyx instead of .xyzw. */ swiz = BRW_SWIZZLE_WZYX; mask = writemask_for_backwards_vector(mask); } } emit_urb_write(swizzle(value, swiz), mask, imm_offset, indirect_offset); break; } case nir_intrinsic_barrier: { dst_reg header = dst_reg(this, glsl_type::uvec4_type); emit(TCS_OPCODE_CREATE_BARRIER_HEADER, header); emit(SHADER_OPCODE_BARRIER, dst_null_ud(), src_reg(header)); break; } default: vec4_visitor::nir_emit_intrinsic(instr); } }