/* 3DSTATE_GS * * Disable the geometry shader. */ void gen6_blorp_emit_gs_disable(struct brw_context *brw, const brw_blorp_params *params) { /* Disable all the constant buffers. */ BEGIN_BATCH(5); OUT_BATCH(_3DSTATE_CONSTANT_GS << 16 | (5 - 2)); OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); ADVANCE_BATCH(); BEGIN_BATCH(7); OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2)); OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); ADVANCE_BATCH(); }
static void gen7_upload_hs_state(struct brw_context *brw) { const struct brw_stage_state *stage_state = &brw->tcs.base; /* BRW_NEW_TESS_PROGRAMS */ bool active = brw->tess_eval_program; /* BRW_NEW_TCS_PROG_DATA */ const struct brw_vue_prog_data *prog_data = &brw->tcs.prog_data->base; if (active) { BEGIN_BATCH(7); OUT_BATCH(_3DSTATE_HS << 16 | (7 - 2)); OUT_BATCH(SET_FIELD(DIV_ROUND_UP(stage_state->sampler_count, 4), GEN7_HS_SAMPLER_COUNT) | SET_FIELD(prog_data->base.binding_table.size_bytes / 4, GEN7_HS_BINDING_TABLE_ENTRY_COUNT) | (brw->max_hs_threads - 1)); OUT_BATCH(GEN7_HS_ENABLE | GEN7_HS_STATISTICS_ENABLE | SET_FIELD(brw->tcs.prog_data->instances - 1, GEN7_HS_INSTANCE_COUNT)); OUT_BATCH(stage_state->prog_offset); if (prog_data->base.total_scratch) { OUT_RELOC(stage_state->scratch_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, ffs(prog_data->base.total_scratch) - 11); } else { OUT_BATCH(0); } OUT_BATCH(GEN7_HS_INCLUDE_VERTEX_HANDLES | SET_FIELD(prog_data->base.dispatch_grf_start_reg, GEN7_HS_DISPATCH_START_GRF)); /* Ignore URB semaphores */ OUT_BATCH(0); ADVANCE_BATCH(); } else { BEGIN_BATCH(7); OUT_BATCH(_3DSTATE_HS << 16 | (7 - 2)); OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); ADVANCE_BATCH(); } brw->tcs.enabled = active; }
static void upload_gs_state(struct brw_context *brw) { const struct gen_device_info *devinfo = &brw->screen->devinfo; const struct brw_stage_state *stage_state = &brw->gs.base; const int max_threads_shift = brw->is_haswell ? HSW_GS_MAX_THREADS_SHIFT : GEN6_GS_MAX_THREADS_SHIFT; /* BRW_NEW_GEOMETRY_PROGRAM */ bool active = brw->geometry_program; /* BRW_NEW_GS_PROG_DATA */ const struct brw_stage_prog_data *prog_data = stage_state->prog_data; const struct brw_vue_prog_data *vue_prog_data = brw_vue_prog_data(stage_state->prog_data); const struct brw_gs_prog_data *gs_prog_data = brw_gs_prog_data(stage_state->prog_data); /** * From Graphics BSpec: 3D-Media-GPGPU Engine > 3D Pipeline Stages > * Geometry > Geometry Shader > State: * * "Note: Because of corruption in IVB:GT2, software needs to flush the * whole fixed function pipeline when the GS enable changes value in * the 3DSTATE_GS." * * The hardware architects have clarified that in this context "flush the * whole fixed function pipeline" means to emit a PIPE_CONTROL with the "CS * Stall" bit set. */ if (!brw->is_haswell && brw->gt == 2 && brw->gs.enabled != active) gen7_emit_cs_stall_flush(brw); if (active) { BEGIN_BATCH(7); OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2)); OUT_BATCH(stage_state->prog_offset); OUT_BATCH(((ALIGN(stage_state->sampler_count, 4)/4) << GEN6_GS_SAMPLER_COUNT_SHIFT) | ((prog_data->binding_table.size_bytes / 4) << GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); if (prog_data->total_scratch) { OUT_RELOC(stage_state->scratch_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, ffs(stage_state->per_thread_scratch) - 11); } else { OUT_BATCH(0); } uint32_t dw4 = ((gs_prog_data->output_vertex_size_hwords * 2 - 1) << GEN7_GS_OUTPUT_VERTEX_SIZE_SHIFT) | (gs_prog_data->output_topology << GEN7_GS_OUTPUT_TOPOLOGY_SHIFT) | (vue_prog_data->urb_read_length << GEN6_GS_URB_READ_LENGTH_SHIFT) | (0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT) | (prog_data->dispatch_grf_start_reg << GEN6_GS_DISPATCH_START_GRF_SHIFT); /* Note: the meaning of the GEN7_GS_REORDER_TRAILING bit changes between * Ivy Bridge and Haswell. * * On Ivy Bridge, setting this bit causes the vertices of a triangle * strip to be delivered to the geometry shader in an order that does * not strictly follow the OpenGL spec, but preserves triangle * orientation. For example, if the vertices are (1, 2, 3, 4, 5), then * the geometry shader sees triangles: * * (1, 2, 3), (2, 4, 3), (3, 4, 5) * * (Clearing the bit is even worse, because it fails to preserve * orientation). * * Triangle strips with adjacency always ordered in a way that preserves * triangle orientation but does not strictly follow the OpenGL spec, * regardless of the setting of this bit. * * On Haswell, both triangle strips and triangle strips with adjacency * are always ordered in a way that preserves triangle orientation. * Setting this bit causes the ordering to strictly follow the OpenGL * spec. * * So in either case we want to set the bit. Unfortunately on Ivy * Bridge this will get the order close to correct but not perfect. */ uint32_t dw5 = ((devinfo->max_gs_threads - 1) << max_threads_shift) | (gs_prog_data->control_data_header_size_hwords << GEN7_GS_CONTROL_DATA_HEADER_SIZE_SHIFT) | ((gs_prog_data->invocations - 1) << GEN7_GS_INSTANCE_CONTROL_SHIFT) | SET_FIELD(vue_prog_data->dispatch_mode, GEN7_GS_DISPATCH_MODE) | GEN6_GS_STATISTICS_ENABLE | (gs_prog_data->include_primitive_id ? GEN7_GS_INCLUDE_PRIMITIVE_ID : 0) | GEN7_GS_REORDER_TRAILING | GEN7_GS_ENABLE; uint32_t dw6 = 0; if (brw->is_haswell) { dw6 |= gs_prog_data->control_data_format << HSW_GS_CONTROL_DATA_FORMAT_SHIFT; } else { dw5 |= gs_prog_data->control_data_format << IVB_GS_CONTROL_DATA_FORMAT_SHIFT; } OUT_BATCH(dw4); OUT_BATCH(dw5); OUT_BATCH(dw6); ADVANCE_BATCH(); } else { BEGIN_BATCH(7); OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2)); OUT_BATCH(0); /* prog_bo */ OUT_BATCH((0 << GEN6_GS_SAMPLER_COUNT_SHIFT) | (0 << GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); OUT_BATCH(0); /* scratch space base offset */ OUT_BATCH((1 << GEN6_GS_DISPATCH_START_GRF_SHIFT) | (0 << GEN6_GS_URB_READ_LENGTH_SHIFT) | GEN7_GS_INCLUDE_VERTEX_HANDLES | (0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT)); OUT_BATCH((0 << GEN6_GS_MAX_THREADS_SHIFT) | GEN6_GS_STATISTICS_ENABLE); OUT_BATCH(0); ADVANCE_BATCH(); } brw->gs.enabled = active; }
void i915_composite(PixmapPtr dest, int srcX, int srcY, int maskX, int maskY, int dstX, int dstY, int w, int h) { ScrnInfoPtr scrn = xf86Screens[dest->drawable.pScreen->myNum]; intel_screen_private *intel = intel_get_screen_private(scrn); /* 28 + 16 + 10 + 20 + 32 + 16 */ intel_batch_start_atomic(scrn, 150); if (intel->needs_render_state_emit) i915_emit_composite_setup(scrn); if (intel->needs_render_vertex_emit || intel_vertex_space(intel) < 3*4*intel->floats_per_vertex) { i915_vertex_flush(intel); if (intel_vertex_space(intel) < 256) { intel_next_vertex(intel); OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(0) | I1_LOAD_S(1) | 1); OUT_RELOC(intel->vertex_bo, I915_GEM_DOMAIN_VERTEX, 0, 0); OUT_BATCH((intel->floats_per_vertex << S1_VERTEX_WIDTH_SHIFT) | (intel->floats_per_vertex << S1_VERTEX_PITCH_SHIFT)); intel->vertex_index = 0; } else if (intel->floats_per_vertex != intel->last_floats_per_vertex){ OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(1) | 0); OUT_BATCH((intel->floats_per_vertex << S1_VERTEX_WIDTH_SHIFT) | (intel->floats_per_vertex << S1_VERTEX_PITCH_SHIFT)); intel->vertex_index = (intel->vertex_used + intel->floats_per_vertex - 1) / intel->floats_per_vertex; intel->vertex_used = intel->vertex_index * intel->floats_per_vertex; } intel->last_floats_per_vertex = intel->floats_per_vertex; intel->needs_render_vertex_emit = FALSE; } if (intel->prim_offset == 0) { if (intel->needs_render_ca_pass) { OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(6) | 0); OUT_BATCH(i915_get_blend_cntl(PictOpOutReverse, intel->render_mask_picture, intel->render_dest_picture->format)); i915_composite_emit_shader(intel, PictOpOutReverse); } intel->prim_offset = intel->batch_used; OUT_BATCH(PRIM3D_RECTLIST | PRIM3D_INDIRECT_SEQUENTIAL); OUT_BATCH(intel->vertex_index); } intel->vertex_count += 3; intel->prim_emit(intel, srcX, srcY, maskX, maskY, dstX, dstY, w, h); intel_batch_end_atomic(scrn); }
/** * Define the base addresses which some state is referenced from. * * This allows us to avoid having to emit relocations for the objects, * and is actually required for binding table pointers on gen6. * * Surface state base address covers binding table pointers and * surface state objects, but not the surfaces that the surface state * objects point to. */ static void upload_state_base_address( struct brw_context *brw ) { struct intel_context *intel = &brw->intel; /* FINISHME: According to section 3.6.1 "STATE_BASE_ADDRESS" of * vol1a of the G45 PRM, MI_FLUSH with the ISC invalidate should be * programmed prior to STATE_BASE_ADDRESS. * * However, given that the instruction SBA (general state base * address) on this chipset is always set to 0 across X and GL, * maybe this isn't required for us in particular. */ if (intel->gen >= 6) { if (intel->gen == 6) intel_emit_post_sync_nonzero_flush(intel); BEGIN_BATCH(10); OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (10 - 2)); /* General state base address: stateless DP read/write requests */ OUT_BATCH(1); /* Surface state base address: * BINDING_TABLE_STATE * SURFACE_STATE */ OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0, 1); /* Dynamic state base address: * SAMPLER_STATE * SAMPLER_BORDER_COLOR_STATE * CLIP, SF, WM/CC viewport state * COLOR_CALC_STATE * DEPTH_STENCIL_STATE * BLEND_STATE * Push constants (when INSTPM: CONSTANT_BUFFER Address Offset * Disable is clear, which we rely on) */ OUT_RELOC(intel->batch.bo, (I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION), 0, 1); OUT_BATCH(1); /* Indirect object base address: MEDIA_OBJECT data */ OUT_RELOC(brw->cache.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1); /* Instruction base address: shader kernels (incl. SIP) */ OUT_BATCH(1); /* General state upper bound */ /* Dynamic state upper bound. Although the documentation says that * programming it to zero will cause it to be ignored, that is a lie. * If this isn't programmed to a real bound, the sampler border color * pointer is rejected, causing border color to mysteriously fail. */ OUT_BATCH(0xfffff001); OUT_BATCH(1); /* Indirect object upper bound */ OUT_BATCH(1); /* Instruction access upper bound */ ADVANCE_BATCH(); } else if (intel->gen == 5) { BEGIN_BATCH(8); OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (8 - 2)); OUT_BATCH(1); /* General state base address */ OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0, 1); /* Surface state base address */ OUT_BATCH(1); /* Indirect object base address */ OUT_RELOC(brw->cache.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1); /* Instruction base address */ OUT_BATCH(0xfffff001); /* General state upper bound */ OUT_BATCH(1); /* Indirect object upper bound */ OUT_BATCH(1); /* Instruction access upper bound */ ADVANCE_BATCH(); } else { BEGIN_BATCH(6); OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (6 - 2)); OUT_BATCH(1); /* General state base address */ OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0, 1); /* Surface state base address */ OUT_BATCH(1); /* Indirect object base address */ OUT_BATCH(1); /* General state upper bound */ OUT_BATCH(1); /* Indirect object upper bound */ ADVANCE_BATCH(); } /* According to section 3.6.1 of VOL1 of the 965 PRM, * STATE_BASE_ADDRESS updates require a reissue of: * * 3DSTATE_PIPELINE_POINTERS * 3DSTATE_BINDING_TABLE_POINTERS * MEDIA_STATE_POINTERS * * and this continues through Ironlake. The Sandy Bridge PRM, vol * 1 part 1 says that the folowing packets must be reissued: * * 3DSTATE_CC_POINTERS * 3DSTATE_BINDING_TABLE_POINTERS * 3DSTATE_SAMPLER_STATE_POINTERS * 3DSTATE_VIEWPORT_STATE_POINTERS * MEDIA_STATE_POINTERS * * Those are always reissued following SBA updates anyway (new * batch time), except in the case of the program cache BO * changing. Having a separate state flag makes the sequence more * obvious. */ brw->state.dirty.brw |= BRW_NEW_STATE_BASE_ADDRESS; }
/** * Extract the needed fields from vertex_header and emit i915 dwords. * Recall that the vertices are constructed by the 'draw' module and * have a couple of slots at the beginning (1-dword header, 4-dword * clip pos) that we ignore here. */ static INLINE void emit_hw_vertex( struct i915_context *i915, const struct vertex_header *vertex) { const struct vertex_info *vinfo = &i915->current.vertex_info; uint i; uint count = 0; /* for debug/sanity */ assert(!i915->dirty); for (i = 0; i < vinfo->num_attribs; i++) { const uint j = vinfo->attrib[i].src_index; const float *attrib = vertex->data[j]; switch (vinfo->attrib[i].emit) { case EMIT_1F: OUT_BATCH( fui(attrib[0]) ); count++; break; case EMIT_2F: OUT_BATCH( fui(attrib[0]) ); OUT_BATCH( fui(attrib[1]) ); count += 2; break; case EMIT_3F: OUT_BATCH( fui(attrib[0]) ); OUT_BATCH( fui(attrib[1]) ); OUT_BATCH( fui(attrib[2]) ); count += 3; break; case EMIT_4F: OUT_BATCH( fui(attrib[0]) ); OUT_BATCH( fui(attrib[1]) ); OUT_BATCH( fui(attrib[2]) ); OUT_BATCH( fui(attrib[3]) ); count += 4; break; case EMIT_4UB: OUT_BATCH( pack_ub4(float_to_ubyte( attrib[0] ), float_to_ubyte( attrib[1] ), float_to_ubyte( attrib[2] ), float_to_ubyte( attrib[3] )) ); count += 1; break; case EMIT_4UB_BGRA: OUT_BATCH( pack_ub4(float_to_ubyte( attrib[2] ), float_to_ubyte( attrib[1] ), float_to_ubyte( attrib[0] ), float_to_ubyte( attrib[3] )) ); count += 1; break; default: assert(0); } } assert(count == vinfo->size); }
void I915DisplayVideoTextured(ScrnInfoPtr scrn, intel_adaptor_private *adaptor_priv, int id, RegionPtr dstRegion, short width, short height, int video_pitch, int video_pitch2, short src_w, short src_h, short drw_w, short drw_h, PixmapPtr pixmap) { intel_screen_private *intel = intel_get_screen_private(scrn); uint32_t format, ms3, s5, tiling; BoxPtr pbox = REGION_RECTS(dstRegion); int nbox_total = REGION_NUM_RECTS(dstRegion); int nbox_this_time; int dxo, dyo, pix_xoff, pix_yoff; PixmapPtr target; #if 0 ErrorF("I915DisplayVideo: %dx%d (pitch %d)\n", width, height, video_pitch); #endif dxo = dstRegion->extents.x1; dyo = dstRegion->extents.y1; if (pixmap->drawable.width > 2048 || pixmap->drawable.height > 2048 || !intel_uxa_check_pitch_3d(pixmap)) { ScreenPtr screen = pixmap->drawable.pScreen; target = screen->CreatePixmap(screen, dstRegion->extents.x2 - dxo, dstRegion->extents.y2 - dyo, pixmap->drawable.depth, CREATE_PIXMAP_USAGE_SCRATCH); if (target == NULL) return; if (intel_uxa_get_pixmap_bo(target) == NULL) { screen->DestroyPixmap(target); return; } pix_xoff = -dxo; pix_yoff = -dyo; } else { target = pixmap; /* Set up the offset for translating from the given region * (in screen coordinates) to the backing pixmap. */ #ifdef COMPOSITE pix_xoff = -target->screen_x + target->drawable.x; pix_yoff = -target->screen_y + target->drawable.y; #else pix_xoff = 0; pix_yoff = 0; #endif } #define BYTES_FOR_BOXES(n) ((200 + (n) * 20) * 4) #define BOXES_IN_BYTES(s) ((((s)/4) - 200) / 20) #define BATCH_BYTES(p) ((p)->batch_bo->size - 16) while (nbox_total) { nbox_this_time = nbox_total; if (BYTES_FOR_BOXES(nbox_this_time) > BATCH_BYTES(intel)) nbox_this_time = BOXES_IN_BYTES(BATCH_BYTES(intel)); nbox_total -= nbox_this_time; intel_batch_start_atomic(scrn, 200 + 20 * nbox_this_time); IntelEmitInvarientState(scrn); intel->last_3d = LAST_3D_VIDEO; /* draw rect -- just clipping */ OUT_BATCH(_3DSTATE_DRAW_RECT_CMD); OUT_BATCH(DRAW_DITHER_OFS_X(pixmap->drawable.x & 3) | DRAW_DITHER_OFS_Y(pixmap->drawable.y & 3)); OUT_BATCH(0x00000000); /* ymin, xmin */ /* ymax, xmax */ OUT_BATCH((target->drawable.width - 1) | (target->drawable.height - 1) << 16); OUT_BATCH(0x00000000); /* yorigin, xorigin */ OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(2) | I1_LOAD_S(5) | I1_LOAD_S(6) | 2); OUT_BATCH(S2_TEXCOORD_FMT(0, TEXCOORDFMT_2D) | S2_TEXCOORD_FMT(1, TEXCOORDFMT_NOT_PRESENT) | S2_TEXCOORD_FMT(2, TEXCOORDFMT_NOT_PRESENT) | S2_TEXCOORD_FMT(3, TEXCOORDFMT_NOT_PRESENT) | S2_TEXCOORD_FMT(4, TEXCOORDFMT_NOT_PRESENT) | S2_TEXCOORD_FMT(5, TEXCOORDFMT_NOT_PRESENT) | S2_TEXCOORD_FMT(6, TEXCOORDFMT_NOT_PRESENT) | S2_TEXCOORD_FMT(7, TEXCOORDFMT_NOT_PRESENT)); s5 = 0x0; if (intel->cpp == 2) s5 |= S5_COLOR_DITHER_ENABLE; OUT_BATCH(s5); /* S5 - enable bits */ OUT_BATCH((2 << S6_DEPTH_TEST_FUNC_SHIFT) | (2 << S6_CBUF_SRC_BLEND_FACT_SHIFT) | (1 << S6_CBUF_DST_BLEND_FACT_SHIFT) | S6_COLOR_WRITE_ENABLE | (2 << S6_TRISTRIP_PV_SHIFT)); OUT_BATCH(_3DSTATE_CONST_BLEND_COLOR_CMD); OUT_BATCH(0x00000000); OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD); if (intel->cpp == 2) format = COLR_BUF_RGB565; else format = COLR_BUF_ARGB8888 | DEPTH_FRMT_24_FIXED_8_OTHER; OUT_BATCH(LOD_PRECLAMP_OGL | DSTORG_HORT_BIAS(0x8) | DSTORG_VERT_BIAS(0x8) | format); /* front buffer, pitch, offset */ if (intel_uxa_pixmap_tiled(target)) { tiling = BUF_3D_TILED_SURFACE; if (intel_uxa_get_pixmap_private(target)->tiling == I915_TILING_Y) tiling |= BUF_3D_TILE_WALK_Y; } else tiling = 0; OUT_BATCH(_3DSTATE_BUF_INFO_CMD); OUT_BATCH(BUF_3D_ID_COLOR_BACK | tiling | BUF_3D_PITCH(intel_pixmap_pitch(target))); OUT_RELOC_PIXMAP(target, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0); if (!is_planar_fourcc(id)) { FS_LOCALS(); OUT_BATCH(_3DSTATE_PIXEL_SHADER_CONSTANTS | 4); OUT_BATCH(0x0000001); /* constant 0 */ /* constant 0: brightness/contrast */ OUT_BATCH_F(adaptor_priv->brightness / 128.0); OUT_BATCH_F(adaptor_priv->contrast / 255.0); OUT_BATCH_F(0.0); OUT_BATCH_F(0.0); OUT_BATCH(_3DSTATE_SAMPLER_STATE | 3); OUT_BATCH(0x00000001); OUT_BATCH(SS2_COLORSPACE_CONVERSION | (FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) | (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT)); OUT_BATCH((TEXCOORDMODE_CLAMP_EDGE << SS3_TCX_ADDR_MODE_SHIFT) | (TEXCOORDMODE_CLAMP_EDGE << SS3_TCY_ADDR_MODE_SHIFT) | (0 << SS3_TEXTUREMAP_INDEX_SHIFT) | SS3_NORMALIZED_COORDS); OUT_BATCH(0x00000000); OUT_BATCH(_3DSTATE_MAP_STATE | 3); OUT_BATCH(0x00000001); /* texture map #1 */ if (adaptor_priv->buf) OUT_RELOC(adaptor_priv->buf, I915_GEM_DOMAIN_SAMPLER, 0, adaptor_priv->YBufOffset); else OUT_BATCH(adaptor_priv->YBufOffset); ms3 = MAPSURF_422; switch (id) { case FOURCC_YUY2: ms3 |= MT_422_YCRCB_NORMAL; break; case FOURCC_UYVY: ms3 |= MT_422_YCRCB_SWAPY; break; } ms3 |= (height - 1) << MS3_HEIGHT_SHIFT; ms3 |= (width - 1) << MS3_WIDTH_SHIFT; OUT_BATCH(ms3); OUT_BATCH(((video_pitch / 4) - 1) << MS4_PITCH_SHIFT); FS_BEGIN(); i915_fs_dcl(FS_S0); i915_fs_dcl(FS_T0); i915_fs_texld(FS_OC, FS_S0, FS_T0); if (adaptor_priv->brightness != 0) { i915_fs_add(FS_OC, i915_fs_operand_reg(FS_OC), i915_fs_operand(FS_C0, X, X, X, ZERO)); } FS_END(); } else { FS_LOCALS(); /* For the planar formats, we set up three samplers -- * one for each plane, in a Y8 format. Because I * couldn't get the special PLANAR_TO_PACKED * shader setup to work, I did the manual pixel shader: * * y' = y - .0625 * u' = u - .5 * v' = v - .5; * * r = 1.1643 * y' + 0.0 * u' + 1.5958 * v' * g = 1.1643 * y' - 0.39173 * u' - 0.81290 * v' * b = 1.1643 * y' + 2.017 * u' + 0.0 * v' * * register assignment: * r0 = (y',u',v',0) * r1 = (y,y,y,y) * r2 = (u,u,u,u) * r3 = (v,v,v,v) * OC = (r,g,b,1) */ OUT_BATCH(_3DSTATE_PIXEL_SHADER_CONSTANTS | (22 - 2)); OUT_BATCH(0x000001f); /* constants 0-4 */ /* constant 0: normalization offsets */ OUT_BATCH_F(-0.0625); OUT_BATCH_F(-0.5); OUT_BATCH_F(-0.5); OUT_BATCH_F(0.0); /* constant 1: r coefficients */ OUT_BATCH_F(1.1643); OUT_BATCH_F(0.0); OUT_BATCH_F(1.5958); OUT_BATCH_F(0.0); /* constant 2: g coefficients */ OUT_BATCH_F(1.1643); OUT_BATCH_F(-0.39173); OUT_BATCH_F(-0.81290); OUT_BATCH_F(0.0); /* constant 3: b coefficients */ OUT_BATCH_F(1.1643); OUT_BATCH_F(2.017); OUT_BATCH_F(0.0); OUT_BATCH_F(0.0); /* constant 4: brightness/contrast */ OUT_BATCH_F(adaptor_priv->brightness / 128.0); OUT_BATCH_F(adaptor_priv->contrast / 255.0); OUT_BATCH_F(0.0); OUT_BATCH_F(0.0); OUT_BATCH(_3DSTATE_SAMPLER_STATE | 9); OUT_BATCH(0x00000007); /* sampler 0 */ OUT_BATCH((FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) | (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT)); OUT_BATCH((TEXCOORDMODE_CLAMP_EDGE << SS3_TCX_ADDR_MODE_SHIFT) | (TEXCOORDMODE_CLAMP_EDGE << SS3_TCY_ADDR_MODE_SHIFT) | (0 << SS3_TEXTUREMAP_INDEX_SHIFT) | SS3_NORMALIZED_COORDS); OUT_BATCH(0x00000000); /* sampler 1 */ OUT_BATCH((FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) | (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT)); OUT_BATCH((TEXCOORDMODE_CLAMP_EDGE << SS3_TCX_ADDR_MODE_SHIFT) | (TEXCOORDMODE_CLAMP_EDGE << SS3_TCY_ADDR_MODE_SHIFT) | (1 << SS3_TEXTUREMAP_INDEX_SHIFT) | SS3_NORMALIZED_COORDS); OUT_BATCH(0x00000000); /* sampler 2 */ OUT_BATCH((FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) | (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT)); OUT_BATCH((TEXCOORDMODE_CLAMP_EDGE << SS3_TCX_ADDR_MODE_SHIFT) | (TEXCOORDMODE_CLAMP_EDGE << SS3_TCY_ADDR_MODE_SHIFT) | (2 << SS3_TEXTUREMAP_INDEX_SHIFT) | SS3_NORMALIZED_COORDS); OUT_BATCH(0x00000000); OUT_BATCH(_3DSTATE_MAP_STATE | 9); OUT_BATCH(0x00000007); if (adaptor_priv->buf) OUT_RELOC(adaptor_priv->buf, I915_GEM_DOMAIN_SAMPLER, 0, adaptor_priv->YBufOffset); else OUT_BATCH(adaptor_priv->YBufOffset); ms3 = MAPSURF_8BIT | MT_8BIT_I8; ms3 |= (height - 1) << MS3_HEIGHT_SHIFT; ms3 |= (width - 1) << MS3_WIDTH_SHIFT; OUT_BATCH(ms3); /* check to see if Y has special pitch than normal * double u/v pitch, e.g i915 XvMC hw requires at * least 1K alignment, so Y pitch might * be same as U/V's.*/ if (video_pitch2) OUT_BATCH(((video_pitch2 / 4) - 1) << MS4_PITCH_SHIFT); else OUT_BATCH(((video_pitch * 2 / 4) - 1) << MS4_PITCH_SHIFT); if (adaptor_priv->buf) OUT_RELOC(adaptor_priv->buf, I915_GEM_DOMAIN_SAMPLER, 0, adaptor_priv->UBufOffset); else OUT_BATCH(adaptor_priv->UBufOffset); ms3 = MAPSURF_8BIT | MT_8BIT_I8; ms3 |= (height / 2 - 1) << MS3_HEIGHT_SHIFT; ms3 |= (width / 2 - 1) << MS3_WIDTH_SHIFT; OUT_BATCH(ms3); OUT_BATCH(((video_pitch / 4) - 1) << MS4_PITCH_SHIFT); if (adaptor_priv->buf) OUT_RELOC(adaptor_priv->buf, I915_GEM_DOMAIN_SAMPLER, 0, adaptor_priv->VBufOffset); else OUT_BATCH(adaptor_priv->VBufOffset); ms3 = MAPSURF_8BIT | MT_8BIT_I8; ms3 |= (height / 2 - 1) << MS3_HEIGHT_SHIFT; ms3 |= (width / 2 - 1) << MS3_WIDTH_SHIFT; OUT_BATCH(ms3); OUT_BATCH(((video_pitch / 4) - 1) << MS4_PITCH_SHIFT); FS_BEGIN(); /* Declare samplers */ i915_fs_dcl(FS_S0); /* Y */ i915_fs_dcl(FS_S1); /* U */ i915_fs_dcl(FS_S2); /* V */ i915_fs_dcl(FS_T0); /* normalized coords */ /* Load samplers to temporaries. */ i915_fs_texld(FS_R1, FS_S0, FS_T0); i915_fs_texld(FS_R2, FS_S1, FS_T0); i915_fs_texld(FS_R3, FS_S2, FS_T0); /* Move the sampled YUV data in R[123] to the first * 3 channels of R0. */ i915_fs_mov_masked(FS_R0, MASK_X, i915_fs_operand_reg(FS_R1)); i915_fs_mov_masked(FS_R0, MASK_Y, i915_fs_operand_reg(FS_R2)); i915_fs_mov_masked(FS_R0, MASK_Z, i915_fs_operand_reg(FS_R3)); /* Normalize the YUV data */ i915_fs_add(FS_R0, i915_fs_operand_reg(FS_R0), i915_fs_operand_reg(FS_C0)); /* dot-product the YUV data in R0 by the vectors of * coefficients for calculating R, G, and B, storing * the results in the R, G, or B channels of the output * color. The OC results are implicitly clamped * at the end of the program. */ i915_fs_dp3(FS_OC, MASK_X, i915_fs_operand_reg(FS_R0), i915_fs_operand_reg(FS_C1)); i915_fs_dp3(FS_OC, MASK_Y, i915_fs_operand_reg(FS_R0), i915_fs_operand_reg(FS_C2)); i915_fs_dp3(FS_OC, MASK_Z, i915_fs_operand_reg(FS_R0), i915_fs_operand_reg(FS_C3)); /* Set alpha of the output to 1.0, by wiring W to 1 * and not actually using the source. */ i915_fs_mov_masked(FS_OC, MASK_W, i915_fs_operand_one()); if (adaptor_priv->brightness != 0) { i915_fs_add(FS_OC, i915_fs_operand_reg(FS_OC), i915_fs_operand(FS_C4, X, X, X, ZERO)); } FS_END(); } OUT_BATCH(PRIM3D_RECTLIST | (12 * nbox_this_time - 1)); while (nbox_this_time--) { int box_x1 = pbox->x1; int box_y1 = pbox->y1; int box_x2 = pbox->x2; int box_y2 = pbox->y2; float src_scale_x, src_scale_y; pbox++; src_scale_x = ((float)src_w / width) / drw_w; src_scale_y = ((float)src_h / height) / drw_h; /* vertex data - rect list consists of bottom right, * bottom left, and top left vertices. */ /* bottom right */ OUT_BATCH_F(box_x2 + pix_xoff); OUT_BATCH_F(box_y2 + pix_yoff); OUT_BATCH_F((box_x2 - dxo) * src_scale_x); OUT_BATCH_F((box_y2 - dyo) * src_scale_y); /* bottom left */ OUT_BATCH_F(box_x1 + pix_xoff); OUT_BATCH_F(box_y2 + pix_yoff); OUT_BATCH_F((box_x1 - dxo) * src_scale_x); OUT_BATCH_F((box_y2 - dyo) * src_scale_y); /* top left */ OUT_BATCH_F(box_x1 + pix_xoff); OUT_BATCH_F(box_y1 + pix_yoff); OUT_BATCH_F((box_x1 - dxo) * src_scale_x); OUT_BATCH_F((box_y1 - dyo) * src_scale_y); } intel_batch_end_atomic(scrn); } if (target != pixmap) { GCPtr gc; gc = GetScratchGC(pixmap->drawable.depth, pixmap->drawable.pScreen); if (gc) { gc->subWindowMode = ClipByChildren; if (REGION_NUM_RECTS(dstRegion) > 1) { RegionPtr tmp; tmp = REGION_CREATE(pixmap->drawable.pScreen, NULL, 0); if (tmp) { REGION_COPY(pixmap->drawable.pScreen, tmp, dstRegion); gc->funcs->ChangeClip(gc, CT_REGION, tmp, 0); } } ValidateGC(&pixmap->drawable, gc); gc->ops->CopyArea(&target->drawable, &pixmap->drawable, gc, 0, 0, target->drawable.width, target->drawable.height, -pix_xoff, -pix_yoff); FreeScratchGC(gc); } target->drawable.pScreen->DestroyPixmap(target); } intel_uxa_debug_flush(scrn); }
static void upload_sbe_state(struct brw_context *brw) { struct intel_context *intel = &brw->intel; struct gl_context *ctx = &intel->ctx; /* BRW_NEW_FRAGMENT_PROGRAM */ uint32_t num_outputs = _mesa_bitcount_64(brw->fragment_program->Base.InputsRead); /* _NEW_LIGHT */ bool shade_model_flat = ctx->Light.ShadeModel == GL_FLAT; uint32_t dw1, dw10, dw11; int i; int attr = 0, input_index = 0; int urb_entry_read_offset = 1; uint16_t attr_overrides[FRAG_ATTRIB_MAX]; /* _NEW_BUFFERS */ bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer); uint32_t point_sprite_origin; /* FINISHME: Attribute Swizzle Control Mode? */ dw1 = GEN7_SBE_SWIZZLE_ENABLE | num_outputs << GEN7_SBE_NUM_OUTPUTS_SHIFT; /* _NEW_POINT * * Window coordinates in an FBO are inverted, which means point * sprite origin must be inverted. */ if ((ctx->Point.SpriteOrigin == GL_LOWER_LEFT) != render_to_fbo) { point_sprite_origin = GEN6_SF_POINT_SPRITE_LOWERLEFT; } else { point_sprite_origin = GEN6_SF_POINT_SPRITE_UPPERLEFT; } dw1 |= point_sprite_origin; dw10 = 0; dw11 = 0; /* Create the mapping from the FS inputs we produce to the VS outputs * they source from. */ uint32_t max_source_attr = 0; for (; attr < FRAG_ATTRIB_MAX; attr++) { enum glsl_interp_qualifier interp_qualifier = brw->fragment_program->InterpQualifier[attr]; bool is_gl_Color = attr == FRAG_ATTRIB_COL0 || attr == FRAG_ATTRIB_COL1; if (!(brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(attr))) continue; if (ctx->Point.PointSprite && attr >= FRAG_ATTRIB_TEX0 && attr <= FRAG_ATTRIB_TEX7 && ctx->Point.CoordReplace[attr - FRAG_ATTRIB_TEX0]) { dw10 |= (1 << input_index); } if (attr == FRAG_ATTRIB_PNTC) dw10 |= (1 << input_index); /* flat shading */ if (interp_qualifier == INTERP_QUALIFIER_FLAT || (shade_model_flat && is_gl_Color && interp_qualifier == INTERP_QUALIFIER_NONE)) dw11 |= (1 << input_index); /* The hardware can only do the overrides on 16 overrides at a * time, and the other up to 16 have to be lined up so that the * input index = the output index. We'll need to do some * tweaking to make sure that's the case. */ assert(input_index < 16 || attr == input_index); /* CACHE_NEW_VS_PROG | _NEW_LIGHT | _NEW_PROGRAM */ attr_overrides[input_index++] = get_attr_override(&brw->vs.prog_data->vue_map, urb_entry_read_offset, attr, ctx->VertexProgram._TwoSideEnabled, &max_source_attr); } /* From the Ivy Bridge PRM, Volume 2, Part 1, documentation for * 3DSTATE_SBE DWord 1 bits 15:11, "Vertex URB Entry Read Length": * * "This field should be set to the minimum length required to read the * maximum source attribute. The maximum source attribute is indicated * by the maximum value of the enabled Attribute # Source Attribute if * Attribute Swizzle Enable is set, Number of Output Attributes-1 if * enable is not set. * * read_length = ceiling((max_source_attr + 1) / 2)" */ uint32_t urb_entry_read_length = ALIGN(max_source_attr + 1, 2) / 2; dw1 |= urb_entry_read_length << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT | urb_entry_read_offset << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT; for (; input_index < FRAG_ATTRIB_MAX; input_index++) attr_overrides[input_index] = 0; BEGIN_BATCH(14); OUT_BATCH(_3DSTATE_SBE << 16 | (14 - 2)); OUT_BATCH(dw1); /* Output dwords 2 through 9 */ for (i = 0; i < 8; i++) { OUT_BATCH(attr_overrides[i * 2] | attr_overrides[i * 2 + 1] << 16); } OUT_BATCH(dw10); /* point sprite texcoord bitmask */ OUT_BATCH(dw11); /* constant interp bitmask */ OUT_BATCH(0); /* wrapshortest enables 0-7 */ OUT_BATCH(0); /* wrapshortest enables 8-15 */ ADVANCE_BATCH(); }
static void gen7_blorp_emit_depth_stencil_config(struct brw_context *brw, const brw_blorp_params *params) { struct intel_context *intel = &brw->intel; uint32_t draw_x = params->depth.x_offset; uint32_t draw_y = params->depth.y_offset; uint32_t tile_mask_x, tile_mask_y; gen6_blorp_compute_tile_masks(params, &tile_mask_x, &tile_mask_y); /* 3DSTATE_DEPTH_BUFFER */ { uint32_t tile_x = draw_x & tile_mask_x; uint32_t tile_y = draw_y & tile_mask_y; uint32_t offset = intel_region_get_aligned_offset(params->depth.mt->region, draw_x & ~tile_mask_x, draw_y & ~tile_mask_y, false); /* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327 * (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth * Coordinate Offset X/Y": * * "The 3 LSBs of both offsets must be zero to ensure correct * alignment" * * We have no guarantee that tile_x and tile_y are correctly aligned, * since they are determined by the mipmap layout, which is only aligned * to multiples of 4. * * So, to avoid hanging the GPU, just smash the low order 3 bits of * tile_x and tile_y to 0. This is a temporary workaround until we come * up with a better solution. */ tile_x &= ~7; tile_y &= ~7; intel_emit_depth_stall_flushes(intel); BEGIN_BATCH(7); OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2)); uint32_t pitch_bytes = params->depth.mt->region->pitch * params->depth.mt->region->cpp; OUT_BATCH((pitch_bytes - 1) | params->depth_format << 18 | 1 << 22 | /* hiz enable */ 1 << 28 | /* depth write */ BRW_SURFACE_2D << 29); OUT_RELOC(params->depth.mt->region->bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, offset); OUT_BATCH((params->depth.width + tile_x - 1) << 4 | (params->depth.height + tile_y - 1) << 18); OUT_BATCH(0); OUT_BATCH(tile_x | tile_y << 16); OUT_BATCH(0); ADVANCE_BATCH(); } /* 3DSTATE_HIER_DEPTH_BUFFER */ { struct intel_region *hiz_region = params->depth.mt->hiz_mt->region; uint32_t hiz_offset = intel_region_get_aligned_offset(hiz_region, draw_x & ~tile_mask_x, (draw_y & ~tile_mask_y) / 2, false); BEGIN_BATCH(3); OUT_BATCH((GEN7_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2)); OUT_BATCH(hiz_region->pitch * hiz_region->cpp - 1); OUT_RELOC(hiz_region->bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, hiz_offset); ADVANCE_BATCH(); } /* 3DSTATE_STENCIL_BUFFER */ { BEGIN_BATCH(3); OUT_BATCH((GEN7_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2)); OUT_BATCH(0); OUT_BATCH(0); ADVANCE_BATCH(); } }
/** * Gathers together all the uniform values into a block of memory to be * uploaded into the CURBE, then emits the state packet telling the hardware * the new location. */ static void brw_upload_constant_buffer(struct brw_context *brw) { struct gl_context *ctx = &brw->ctx; /* BRW_NEW_CURBE_OFFSETS */ const GLuint sz = brw->curbe.total_size; const GLuint bufsz = sz * 16 * sizeof(GLfloat); gl_constant_value *buf; GLuint i; gl_clip_plane *clip_planes; if (sz == 0) { goto emit; } buf = intel_upload_space(brw, bufsz, 64, &brw->curbe.curbe_bo, &brw->curbe.curbe_offset); STATIC_ASSERT(sizeof(gl_constant_value) == sizeof(float)); /* fragment shader constants */ if (brw->curbe.wm_size) { _mesa_load_state_parameters(ctx, brw->fragment_program->Base.Parameters); /* BRW_NEW_CURBE_OFFSETS */ GLuint offset = brw->curbe.wm_start * 16; /* BRW_NEW_FS_PROG_DATA | _NEW_PROGRAM_CONSTANTS: copy uniform values */ for (i = 0; i < brw->wm.prog_data->base.nr_params; i++) { buf[offset + i] = *brw->wm.prog_data->base.param[i]; } } /* clipper constants */ if (brw->curbe.clip_size) { GLuint offset = brw->curbe.clip_start * 16; GLuint j; /* If any planes are going this way, send them all this way: */ for (i = 0; i < 6; i++) { buf[offset + i * 4 + 0].f = fixed_plane[i][0]; buf[offset + i * 4 + 1].f = fixed_plane[i][1]; buf[offset + i * 4 + 2].f = fixed_plane[i][2]; buf[offset + i * 4 + 3].f = fixed_plane[i][3]; } /* Clip planes: _NEW_TRANSFORM plus _NEW_PROJECTION to get to * clip-space: */ clip_planes = brw_select_clip_planes(ctx); for (j = 0; j < MAX_CLIP_PLANES; j++) { if (ctx->Transform.ClipPlanesEnabled & (1<<j)) { buf[offset + i * 4 + 0].f = clip_planes[j][0]; buf[offset + i * 4 + 1].f = clip_planes[j][1]; buf[offset + i * 4 + 2].f = clip_planes[j][2]; buf[offset + i * 4 + 3].f = clip_planes[j][3]; i++; } } } /* vertex shader constants */ if (brw->curbe.vs_size) { _mesa_load_state_parameters(ctx, brw->vertex_program->Base.Parameters); GLuint offset = brw->curbe.vs_start * 16; /* BRW_NEW_VS_PROG_DATA | _NEW_PROGRAM_CONSTANTS: copy uniform values */ for (i = 0; i < brw->vs.prog_data->base.base.nr_params; i++) { buf[offset + i] = *brw->vs.prog_data->base.base.param[i]; } } if (0) { for (i = 0; i < sz*16; i+=4) fprintf(stderr, "curbe %d.%d: %f %f %f %f\n", i/8, i&4, buf[i+0].f, buf[i+1].f, buf[i+2].f, buf[i+3].f); } /* Because this provokes an action (ie copy the constants into the * URB), it shouldn't be shortcircuited if identical to the * previous time - because eg. the urb destination may have * changed, or the urb contents different to last time. * * Note that the data referred to is actually copied internally, * not just used in place according to passed pointer. * * It appears that the CS unit takes care of using each available * URB entry (Const URB Entry == CURBE) in turn, and issuing * flushes as necessary when doublebuffering of CURBEs isn't * possible. */ emit: /* Work around mysterious 965 hangs that appear to happen if you do * two 3DPRIMITIVEs with only a CONSTANT_BUFFER inbetween. If we * haven't already flushed for some other reason, explicitly do so. * * We've found no documented reason why this should be necessary. */ if (brw->gen == 4 && !brw->is_g4x && (brw->ctx.NewDriverState & (BRW_NEW_BATCH | BRW_NEW_PSP)) == 0) { BEGIN_BATCH(1); OUT_BATCH(MI_FLUSH); ADVANCE_BATCH(); } /* BRW_NEW_URB_FENCE: From the gen4 PRM, volume 1, section 3.9.8 * (CONSTANT_BUFFER (CURBE Load)): * * "Modifying the CS URB allocation via URB_FENCE invalidates any * previous CURBE entries. Therefore software must subsequently * [re]issue a CONSTANT_BUFFER command before CURBE data can be used * in the pipeline." */ BEGIN_BATCH(2); if (brw->curbe.total_size == 0) { OUT_BATCH((CMD_CONST_BUFFER << 16) | (2 - 2)); OUT_BATCH(0); } else { OUT_BATCH((CMD_CONST_BUFFER << 16) | (1 << 8) | (2 - 2)); OUT_RELOC(brw->curbe.curbe_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, (brw->curbe.total_size - 1) + brw->curbe.curbe_offset); } ADVANCE_BATCH(); }
static void upload_sf_state(struct brw_context *brw) { struct intel_context *intel = &brw->intel; struct gl_context *ctx = &intel->ctx; uint32_t dw1, dw2, dw3; float point_size; /* _NEW_BUFFERS */ bool render_to_fbo = _mesa_is_user_fbo(brw->intel.ctx.DrawBuffer); bool multisampled_fbo = ctx->DrawBuffer->Visual.samples > 1; dw1 = GEN6_SF_STATISTICS_ENABLE | GEN6_SF_VIEWPORT_TRANSFORM_ENABLE; /* _NEW_BUFFERS */ dw1 |= (brw_depthbuffer_format(brw) << GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT); /* _NEW_POLYGON */ if ((ctx->Polygon.FrontFace == GL_CCW) ^ render_to_fbo) dw1 |= GEN6_SF_WINDING_CCW; if (ctx->Polygon.OffsetFill) dw1 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID; if (ctx->Polygon.OffsetLine) dw1 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME; if (ctx->Polygon.OffsetPoint) dw1 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT; switch (ctx->Polygon.FrontMode) { case GL_FILL: dw1 |= GEN6_SF_FRONT_SOLID; break; case GL_LINE: dw1 |= GEN6_SF_FRONT_WIREFRAME; break; case GL_POINT: dw1 |= GEN6_SF_FRONT_POINT; break; default: assert(0); break; } switch (ctx->Polygon.BackMode) { case GL_FILL: dw1 |= GEN6_SF_BACK_SOLID; break; case GL_LINE: dw1 |= GEN6_SF_BACK_WIREFRAME; break; case GL_POINT: dw1 |= GEN6_SF_BACK_POINT; break; default: assert(0); break; } dw2 = 0; if (ctx->Polygon.CullFlag) { switch (ctx->Polygon.CullFaceMode) { case GL_FRONT: dw2 |= GEN6_SF_CULL_FRONT; break; case GL_BACK: dw2 |= GEN6_SF_CULL_BACK; break; case GL_FRONT_AND_BACK: dw2 |= GEN6_SF_CULL_BOTH; break; default: assert(0); break; } } else { dw2 |= GEN6_SF_CULL_NONE; } /* _NEW_SCISSOR */ if (ctx->Scissor.Enabled) dw2 |= GEN6_SF_SCISSOR_ENABLE; /* _NEW_LINE */ { uint32_t line_width_u3_7 = U_FIXED(CLAMP(ctx->Line.Width, 0.0, 7.99), 7); /* TODO: line width of 0 is not allowed when MSAA enabled */ if (line_width_u3_7 == 0) line_width_u3_7 = 1; dw2 |= line_width_u3_7 << GEN6_SF_LINE_WIDTH_SHIFT; } if (ctx->Line.SmoothFlag) { dw2 |= GEN6_SF_LINE_AA_ENABLE; dw2 |= GEN6_SF_LINE_END_CAP_WIDTH_1_0; } if (ctx->Line.StippleFlag && intel->is_haswell) { dw2 |= HSW_SF_LINE_STIPPLE_ENABLE; } /* _NEW_MULTISAMPLE */ if (multisampled_fbo && ctx->Multisample.Enabled) dw2 |= GEN6_SF_MSRAST_ON_PATTERN; /* FINISHME: Last Pixel Enable? Vertex Sub Pixel Precision Select? */ dw3 = GEN6_SF_LINE_AA_MODE_TRUE; /* _NEW_PROGRAM | _NEW_POINT */ if (!(ctx->VertexProgram.PointSizeEnabled || ctx->Point._Attenuated)) dw3 |= GEN6_SF_USE_STATE_POINT_WIDTH; /* Clamp to ARB_point_parameters user limits */ point_size = CLAMP(ctx->Point.Size, ctx->Point.MinSize, ctx->Point.MaxSize); /* Clamp to the hardware limits and convert to fixed point */ dw3 |= U_FIXED(CLAMP(point_size, 0.125, 255.875), 3); /* _NEW_LIGHT */ if (ctx->Light.ProvokingVertex != GL_FIRST_VERTEX_CONVENTION) { dw3 |= (2 << GEN6_SF_TRI_PROVOKE_SHIFT) | (2 << GEN6_SF_TRIFAN_PROVOKE_SHIFT) | (1 << GEN6_SF_LINE_PROVOKE_SHIFT); } else { dw3 |= (1 << GEN6_SF_TRIFAN_PROVOKE_SHIFT); } BEGIN_BATCH(7); OUT_BATCH(_3DSTATE_SF << 16 | (7 - 2)); OUT_BATCH(dw1); OUT_BATCH(dw2); OUT_BATCH(dw3); OUT_BATCH_F(ctx->Polygon.OffsetUnits * 2); /* constant. copied from gen4 */ OUT_BATCH_F(ctx->Polygon.OffsetFactor); /* scale */ OUT_BATCH_F(0.0); /* XXX: global depth offset clamp */ ADVANCE_BATCH(); }
static void brw_emit_vertices(struct brw_context *brw) { struct gl_context *ctx = &brw->intel.ctx; struct intel_context *intel = intel_context(ctx); GLuint i, nr_elements; brw_prepare_vertices(brw); brw_emit_query_begin(brw); /* If the VS doesn't read any inputs (calculating vertex position from * a state variable for some reason, for example), emit a single pad * VERTEX_ELEMENT struct and bail. * * The stale VB state stays in place, but they don't do anything unless * a VE loads from them. */ if (brw->vb.nr_enabled == 0) { BEGIN_BATCH(3); OUT_BATCH((_3DSTATE_VERTEX_ELEMENTS << 16) | 1); if (intel->gen >= 6) { OUT_BATCH((0 << GEN6_VE0_INDEX_SHIFT) | GEN6_VE0_VALID | (BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) | (0 << BRW_VE0_SRC_OFFSET_SHIFT)); } else { OUT_BATCH((0 << BRW_VE0_INDEX_SHIFT) | BRW_VE0_VALID | (BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) | (0 << BRW_VE0_SRC_OFFSET_SHIFT)); } OUT_BATCH((BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT) | (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) | (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) | (BRW_VE1_COMPONENT_STORE_1_FLT << BRW_VE1_COMPONENT_3_SHIFT)); CACHED_BATCH(); return; } /* Now emit VB and VEP state packets. */ if (brw->vb.nr_buffers) { if (intel->gen >= 6) { assert(brw->vb.nr_buffers <= 33); } else { assert(brw->vb.nr_buffers <= 17); } BEGIN_BATCH(1 + 4*brw->vb.nr_buffers); OUT_BATCH((_3DSTATE_VERTEX_BUFFERS << 16) | (4*brw->vb.nr_buffers - 1)); for (i = 0; i < brw->vb.nr_buffers; i++) { struct brw_vertex_buffer *buffer = &brw->vb.buffers[i]; uint32_t dw0; if (intel->gen >= 6) { dw0 = buffer->step_rate ? GEN6_VB0_ACCESS_INSTANCEDATA : GEN6_VB0_ACCESS_VERTEXDATA; dw0 |= i << GEN6_VB0_INDEX_SHIFT; } else { dw0 = buffer->step_rate ? BRW_VB0_ACCESS_INSTANCEDATA : BRW_VB0_ACCESS_VERTEXDATA; dw0 |= i << BRW_VB0_INDEX_SHIFT; } if (intel->gen >= 7) dw0 |= GEN7_VB0_ADDRESS_MODIFYENABLE; OUT_BATCH(dw0 | (buffer->stride << BRW_VB0_PITCH_SHIFT)); OUT_RELOC(buffer->bo, I915_GEM_DOMAIN_VERTEX, 0, buffer->offset); if (intel->gen >= 5) { OUT_RELOC(buffer->bo, I915_GEM_DOMAIN_VERTEX, 0, buffer->bo->size - 1); } else OUT_BATCH(0); OUT_BATCH(buffer->step_rate); brw->vb.current_buffers[i].handle = buffer->bo->handle; brw->vb.current_buffers[i].offset = buffer->offset; brw->vb.current_buffers[i].stride = buffer->stride; brw->vb.current_buffers[i].step_rate = buffer->step_rate; } brw->vb.nr_current_buffers = i; ADVANCE_BATCH(); } nr_elements = brw->vb.nr_enabled + brw->vs.prog_data->uses_vertexid; /* The hardware allows one more VERTEX_ELEMENTS than VERTEX_BUFFERS, presumably * for VertexID/InstanceID. */ if (intel->gen >= 6) { assert(nr_elements <= 34); } else { assert(nr_elements <= 18); } BEGIN_BATCH(1 + nr_elements * 2); OUT_BATCH((_3DSTATE_VERTEX_ELEMENTS << 16) | (2 * nr_elements - 1)); for (i = 0; i < brw->vb.nr_enabled; i++) { struct brw_vertex_element *input = brw->vb.enabled[i]; uint32_t format = get_surface_type(input->glarray->Type, input->glarray->Size, input->glarray->Format, input->glarray->Normalized, input->glarray->Integer); uint32_t comp0 = BRW_VE1_COMPONENT_STORE_SRC; uint32_t comp1 = BRW_VE1_COMPONENT_STORE_SRC; uint32_t comp2 = BRW_VE1_COMPONENT_STORE_SRC; uint32_t comp3 = BRW_VE1_COMPONENT_STORE_SRC; switch (input->glarray->Size) { case 0: comp0 = BRW_VE1_COMPONENT_STORE_0; case 1: comp1 = BRW_VE1_COMPONENT_STORE_0; case 2: comp2 = BRW_VE1_COMPONENT_STORE_0; case 3: comp3 = input->glarray->Integer ? BRW_VE1_COMPONENT_STORE_1_INT : BRW_VE1_COMPONENT_STORE_1_FLT; break; } if (intel->gen >= 6) { OUT_BATCH((input->buffer << GEN6_VE0_INDEX_SHIFT) | GEN6_VE0_VALID | (format << BRW_VE0_FORMAT_SHIFT) | (input->offset << BRW_VE0_SRC_OFFSET_SHIFT)); } else { OUT_BATCH((input->buffer << BRW_VE0_INDEX_SHIFT) | BRW_VE0_VALID | (format << BRW_VE0_FORMAT_SHIFT) | (input->offset << BRW_VE0_SRC_OFFSET_SHIFT)); } if (intel->gen >= 5) OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) | (comp1 << BRW_VE1_COMPONENT_1_SHIFT) | (comp2 << BRW_VE1_COMPONENT_2_SHIFT) | (comp3 << BRW_VE1_COMPONENT_3_SHIFT)); else OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) | (comp1 << BRW_VE1_COMPONENT_1_SHIFT) | (comp2 << BRW_VE1_COMPONENT_2_SHIFT) | (comp3 << BRW_VE1_COMPONENT_3_SHIFT) | ((i * 4) << BRW_VE1_DST_OFFSET_SHIFT)); } if (brw->vs.prog_data->uses_vertexid) { uint32_t dw0 = 0, dw1 = 0; dw1 = ((BRW_VE1_COMPONENT_STORE_VID << BRW_VE1_COMPONENT_0_SHIFT) | (BRW_VE1_COMPONENT_STORE_IID << BRW_VE1_COMPONENT_1_SHIFT) | (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) | (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT)); if (intel->gen >= 6) { dw0 |= GEN6_VE0_VALID; } else { dw0 |= BRW_VE0_VALID; dw1 |= (i * 4) << BRW_VE1_DST_OFFSET_SHIFT; } /* Note that for gl_VertexID, gl_InstanceID, and gl_PrimitiveID values, * the format is ignored and the value is always int. */ OUT_BATCH(dw0); OUT_BATCH(dw1); } CACHED_BATCH(); }
static void gen6_blorp_emit_depth_stencil_config(struct brw_context *brw, const brw_blorp_params *params) { struct gl_context *ctx = &brw->ctx; uint32_t draw_x = params->depth.x_offset; uint32_t draw_y = params->depth.y_offset; uint32_t tile_mask_x, tile_mask_y; brw_get_depthstencil_tile_masks(params->depth.mt, params->depth.level, params->depth.layer, NULL, &tile_mask_x, &tile_mask_y); /* 3DSTATE_DEPTH_BUFFER */ { uint32_t tile_x = draw_x & tile_mask_x; uint32_t tile_y = draw_y & tile_mask_y; uint32_t offset = intel_region_get_aligned_offset(params->depth.mt->region, draw_x & ~tile_mask_x, draw_y & ~tile_mask_y, false); /* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327 * (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth * Coordinate Offset X/Y": * * "The 3 LSBs of both offsets must be zero to ensure correct * alignment" * * We have no guarantee that tile_x and tile_y are correctly aligned, * since they are determined by the mipmap layout, which is only aligned * to multiples of 4. * * So, to avoid hanging the GPU, just smash the low order 3 bits of * tile_x and tile_y to 0. This is a temporary workaround until we come * up with a better solution. */ WARN_ONCE((tile_x & 7) || (tile_y & 7), "Depth/stencil buffer needs alignment to 8-pixel boundaries.\n" "Truncating offset, bad rendering may occur.\n"); tile_x &= ~7; tile_y &= ~7; intel_emit_post_sync_nonzero_flush(brw); intel_emit_depth_stall_flushes(brw); BEGIN_BATCH(7); OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2)); OUT_BATCH((params->depth.mt->region->pitch - 1) | params->depth_format << 18 | 1 << 21 | /* separate stencil enable */ 1 << 22 | /* hiz enable */ BRW_TILEWALK_YMAJOR << 26 | 1 << 27 | /* y-tiled */ BRW_SURFACE_2D << 29); OUT_RELOC(params->depth.mt->region->bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, offset); OUT_BATCH(BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1 | (params->depth.width + tile_x - 1) << 6 | (params->depth.height + tile_y - 1) << 19); OUT_BATCH(0); OUT_BATCH(tile_x | tile_y << 16); OUT_BATCH(0); ADVANCE_BATCH(); } /* 3DSTATE_HIER_DEPTH_BUFFER */ { struct intel_region *hiz_region = params->depth.mt->hiz_mt->region; uint32_t hiz_offset = intel_region_get_aligned_offset(hiz_region, draw_x & ~tile_mask_x, (draw_y & ~tile_mask_y) / 2, false); BEGIN_BATCH(3); OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2)); OUT_BATCH(hiz_region->pitch - 1); OUT_RELOC(hiz_region->bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, hiz_offset); ADVANCE_BATCH(); } /* 3DSTATE_STENCIL_BUFFER */ { BEGIN_BATCH(3); OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2)); OUT_BATCH(0); OUT_BATCH(0); ADVANCE_BATCH(); } }
/** * Enable or disable thread dispatch and set the HiZ op appropriately. */ static void gen6_blorp_emit_wm_config(struct brw_context *brw, const brw_blorp_params *params, uint32_t prog_offset, brw_blorp_prog_data *prog_data) { uint32_t dw2, dw4, dw5, dw6; /* Even when thread dispatch is disabled, max threads (dw5.25:31) must be * nonzero to prevent the GPU from hanging. While the documentation doesn't * mention this explicitly, it notes that the valid range for the field is * [1,39] = [2,40] threads, which excludes zero. * * To be safe (and to minimize extraneous code) we go ahead and fully * configure the WM state whether or not there is a WM program. */ dw2 = dw4 = dw5 = dw6 = 0; switch (params->hiz_op) { case GEN6_HIZ_OP_DEPTH_CLEAR: dw4 |= GEN6_WM_DEPTH_CLEAR; break; case GEN6_HIZ_OP_DEPTH_RESOLVE: dw4 |= GEN6_WM_DEPTH_RESOLVE; break; case GEN6_HIZ_OP_HIZ_RESOLVE: dw4 |= GEN6_WM_HIERARCHICAL_DEPTH_RESOLVE; break; case GEN6_HIZ_OP_NONE: break; default: assert(0); break; } dw5 |= GEN6_WM_LINE_AA_WIDTH_1_0; dw5 |= GEN6_WM_LINE_END_CAP_AA_WIDTH_0_5; dw5 |= (brw->max_wm_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT; dw6 |= 0 << GEN6_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT; /* No interp */ dw6 |= 0 << GEN6_WM_NUM_SF_OUTPUTS_SHIFT; /* No inputs from SF */ if (params->use_wm_prog) { dw2 |= 1 << GEN6_WM_SAMPLER_COUNT_SHIFT; /* Up to 4 samplers */ dw4 |= prog_data->first_curbe_grf << GEN6_WM_DISPATCH_START_GRF_SHIFT_0; dw5 |= GEN6_WM_16_DISPATCH_ENABLE; dw5 |= GEN6_WM_KILL_ENABLE; /* TODO: temporarily smash on */ dw5 |= GEN6_WM_DISPATCH_ENABLE; /* We are rendering */ } if (params->num_samples > 1) { dw6 |= GEN6_WM_MSRAST_ON_PATTERN; if (prog_data && prog_data->persample_msaa_dispatch) dw6 |= GEN6_WM_MSDISPMODE_PERSAMPLE; else dw6 |= GEN6_WM_MSDISPMODE_PERPIXEL; } else { dw6 |= GEN6_WM_MSRAST_OFF_PIXEL; dw6 |= GEN6_WM_MSDISPMODE_PERSAMPLE; } BEGIN_BATCH(9); OUT_BATCH(_3DSTATE_WM << 16 | (9 - 2)); OUT_BATCH(params->use_wm_prog ? prog_offset : 0); OUT_BATCH(dw2); OUT_BATCH(0); /* No scratch needed */ OUT_BATCH(dw4); OUT_BATCH(dw5); OUT_BATCH(dw6); OUT_BATCH(0); /* No other programs */ OUT_BATCH(0); /* No other programs */ ADVANCE_BATCH(); }
static void gen7_upload_ps_state(struct brw_context *brw, const struct brw_stage_state *stage_state, const struct brw_wm_prog_data *prog_data, bool enable_dual_src_blend, unsigned sample_mask, unsigned fast_clear_op) { const struct gen_device_info *devinfo = &brw->screen->devinfo; uint32_t dw2, dw4, dw5, ksp0, ksp2; const int max_threads_shift = brw->is_haswell ? HSW_PS_MAX_THREADS_SHIFT : IVB_PS_MAX_THREADS_SHIFT; dw2 = dw4 = dw5 = ksp2 = 0; const unsigned sampler_count = DIV_ROUND_UP(CLAMP(stage_state->sampler_count, 0, 16), 4); dw2 |= SET_FIELD(sampler_count, GEN7_PS_SAMPLER_COUNT); dw2 |= ((prog_data->base.binding_table.size_bytes / 4) << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT); if (prog_data->base.use_alt_mode) dw2 |= GEN7_PS_FLOATING_POINT_MODE_ALT; /* Haswell requires the sample mask to be set in this packet as well as * in 3DSTATE_SAMPLE_MASK; the values should match. */ /* _NEW_BUFFERS, _NEW_MULTISAMPLE */ if (brw->is_haswell) dw4 |= SET_FIELD(sample_mask, HSW_PS_SAMPLE_MASK); dw4 |= (devinfo->max_wm_threads - 1) << max_threads_shift; if (prog_data->base.nr_params > 0) dw4 |= GEN7_PS_PUSH_CONSTANT_ENABLE; /* From the IVB PRM, volume 2 part 1, page 287: * "This bit is inserted in the PS payload header and made available to * the DataPort (either via the message header or via header bypass) to * indicate that oMask data (one or two phases) is included in Render * Target Write messages. If present, the oMask data is used to mask off * samples." */ if (prog_data->uses_omask) dw4 |= GEN7_PS_OMASK_TO_RENDER_TARGET; /* From the IVB PRM, volume 2 part 1, page 287: * "If the PS kernel does not need the Position XY Offsets to * compute a Position Value, then this field should be programmed * to POSOFFSET_NONE." * "SW Recommendation: If the PS kernel needs the Position Offsets * to compute a Position XY value, this field should match Position * ZW Interpolation Mode to ensure a consistent position.xyzw * computation." * We only require XY sample offsets. So, this recommendation doesn't * look useful at the moment. We might need this in future. */ if (prog_data->uses_pos_offset) dw4 |= GEN7_PS_POSOFFSET_SAMPLE; else dw4 |= GEN7_PS_POSOFFSET_NONE; /* The hardware wedges if you have this bit set but don't turn on any dual * source blend factors. */ if (enable_dual_src_blend) dw4 |= GEN7_PS_DUAL_SOURCE_BLEND_ENABLE; /* BRW_NEW_FS_PROG_DATA */ if (prog_data->num_varying_inputs != 0) dw4 |= GEN7_PS_ATTRIBUTE_ENABLE; dw4 |= fast_clear_op; if (prog_data->dispatch_16) dw4 |= GEN7_PS_16_DISPATCH_ENABLE; if (prog_data->dispatch_8) dw4 |= GEN7_PS_8_DISPATCH_ENABLE; dw5 |= prog_data->base.dispatch_grf_start_reg << GEN7_PS_DISPATCH_START_GRF_SHIFT_0; dw5 |= prog_data->dispatch_grf_start_reg_2 << GEN7_PS_DISPATCH_START_GRF_SHIFT_2; ksp0 = stage_state->prog_offset; ksp2 = stage_state->prog_offset + prog_data->prog_offset_2; BEGIN_BATCH(8); OUT_BATCH(_3DSTATE_PS << 16 | (8 - 2)); OUT_BATCH(ksp0); OUT_BATCH(dw2); if (prog_data->base.total_scratch) { OUT_RELOC(brw->wm.base.scratch_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, ffs(stage_state->per_thread_scratch) - 11); } else { OUT_BATCH(0); } OUT_BATCH(dw4); OUT_BATCH(dw5); OUT_BATCH(0); /* kernel 1 pointer */ OUT_BATCH(ksp2); ADVANCE_BATCH(); }
static void upload_sf_state(struct brw_context *brw) { struct intel_context *intel = &brw->intel; struct gl_context *ctx = &intel->ctx; uint32_t urb_entry_read_length; /* BRW_NEW_FRAGMENT_PROGRAM */ uint32_t num_outputs = _mesa_bitcount_64(brw->fragment_program->Base.InputsRead); /* _NEW_LIGHT */ bool shade_model_flat = ctx->Light.ShadeModel == GL_FLAT; uint32_t dw1, dw2, dw3, dw4, dw16, dw17; int i; /* _NEW_BUFFER */ bool render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0; int attr = 0, input_index = 0; int urb_entry_read_offset = 1; float point_size; uint16_t attr_overrides[FRAG_ATTRIB_MAX]; uint32_t point_sprite_origin; /* CACHE_NEW_VS_PROG */ urb_entry_read_length = ((brw->vs.prog_data->vue_map.num_slots + 1) / 2 - urb_entry_read_offset); if (urb_entry_read_length == 0) { /* Setting the URB entry read length to 0 causes undefined behavior, so * if we have no URB data to read, set it to 1. */ urb_entry_read_length = 1; } dw1 = GEN6_SF_SWIZZLE_ENABLE | num_outputs << GEN6_SF_NUM_OUTPUTS_SHIFT | urb_entry_read_length << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT | urb_entry_read_offset << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT; dw2 = GEN6_SF_STATISTICS_ENABLE | GEN6_SF_VIEWPORT_TRANSFORM_ENABLE; dw3 = 0; dw4 = 0; dw16 = 0; dw17 = 0; /* _NEW_POLYGON */ if ((ctx->Polygon.FrontFace == GL_CCW) ^ render_to_fbo) dw2 |= GEN6_SF_WINDING_CCW; if (ctx->Polygon.OffsetFill) dw2 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID; if (ctx->Polygon.OffsetLine) dw2 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME; if (ctx->Polygon.OffsetPoint) dw2 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT; switch (ctx->Polygon.FrontMode) { case GL_FILL: dw2 |= GEN6_SF_FRONT_SOLID; break; case GL_LINE: dw2 |= GEN6_SF_FRONT_WIREFRAME; break; case GL_POINT: dw2 |= GEN6_SF_FRONT_POINT; break; default: assert(0); break; } switch (ctx->Polygon.BackMode) { case GL_FILL: dw2 |= GEN6_SF_BACK_SOLID; break; case GL_LINE: dw2 |= GEN6_SF_BACK_WIREFRAME; break; case GL_POINT: dw2 |= GEN6_SF_BACK_POINT; break; default: assert(0); break; } /* _NEW_SCISSOR */ if (ctx->Scissor.Enabled) dw3 |= GEN6_SF_SCISSOR_ENABLE; /* _NEW_POLYGON */ if (ctx->Polygon.CullFlag) { switch (ctx->Polygon.CullFaceMode) { case GL_FRONT: dw3 |= GEN6_SF_CULL_FRONT; break; case GL_BACK: dw3 |= GEN6_SF_CULL_BACK; break; case GL_FRONT_AND_BACK: dw3 |= GEN6_SF_CULL_BOTH; break; default: assert(0); break; } } else { dw3 |= GEN6_SF_CULL_NONE; } /* _NEW_LINE */ dw3 |= U_FIXED(CLAMP(ctx->Line.Width, 0.0, 7.99), 7) << GEN6_SF_LINE_WIDTH_SHIFT; if (ctx->Line.SmoothFlag) { dw3 |= GEN6_SF_LINE_AA_ENABLE; dw3 |= GEN6_SF_LINE_AA_MODE_TRUE; dw3 |= GEN6_SF_LINE_END_CAP_WIDTH_1_0; } /* _NEW_PROGRAM | _NEW_POINT */ if (!(ctx->VertexProgram.PointSizeEnabled || ctx->Point._Attenuated)) dw4 |= GEN6_SF_USE_STATE_POINT_WIDTH; /* Clamp to ARB_point_parameters user limits */ point_size = CLAMP(ctx->Point.Size, ctx->Point.MinSize, ctx->Point.MaxSize); /* Clamp to the hardware limits and convert to fixed point */ dw4 |= U_FIXED(CLAMP(point_size, 0.125, 255.875), 3); /* * Window coordinates in an FBO are inverted, which means point * sprite origin must be inverted, too. */ if ((ctx->Point.SpriteOrigin == GL_LOWER_LEFT) != render_to_fbo) { point_sprite_origin = GEN6_SF_POINT_SPRITE_LOWERLEFT; } else { point_sprite_origin = GEN6_SF_POINT_SPRITE_UPPERLEFT; } dw1 |= point_sprite_origin; /* _NEW_LIGHT */ if (ctx->Light.ProvokingVertex != GL_FIRST_VERTEX_CONVENTION) { dw4 |= (2 << GEN6_SF_TRI_PROVOKE_SHIFT) | (2 << GEN6_SF_TRIFAN_PROVOKE_SHIFT) | (1 << GEN6_SF_LINE_PROVOKE_SHIFT); } else { dw4 |= (1 << GEN6_SF_TRIFAN_PROVOKE_SHIFT); } /* Create the mapping from the FS inputs we produce to the VS outputs * they source from. */ for (; attr < FRAG_ATTRIB_MAX; attr++) { enum glsl_interp_qualifier interp_qualifier = brw->fragment_program->InterpQualifier[attr]; bool is_gl_Color = attr == FRAG_ATTRIB_COL0 || attr == FRAG_ATTRIB_COL1; if (!(brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(attr))) continue; /* _NEW_POINT */ if (ctx->Point.PointSprite && (attr >= FRAG_ATTRIB_TEX0 && attr <= FRAG_ATTRIB_TEX7) && ctx->Point.CoordReplace[attr - FRAG_ATTRIB_TEX0]) { dw16 |= (1 << input_index); } if (attr == FRAG_ATTRIB_PNTC) dw16 |= (1 << input_index); /* flat shading */ if (interp_qualifier == INTERP_QUALIFIER_FLAT || (shade_model_flat && is_gl_Color && interp_qualifier == INTERP_QUALIFIER_NONE)) dw17 |= (1 << input_index); /* The hardware can only do the overrides on 16 overrides at a * time, and the other up to 16 have to be lined up so that the * input index = the output index. We'll need to do some * tweaking to make sure that's the case. */ assert(input_index < 16 || attr == input_index); /* CACHE_NEW_VS_PROG | _NEW_LIGHT | _NEW_PROGRAM */ attr_overrides[input_index++] = get_attr_override(&brw->vs.prog_data->vue_map, urb_entry_read_offset, attr, ctx->VertexProgram._TwoSideEnabled); } for (; input_index < FRAG_ATTRIB_MAX; input_index++) attr_overrides[input_index] = 0; BEGIN_BATCH(20); OUT_BATCH(_3DSTATE_SF << 16 | (20 - 2)); OUT_BATCH(dw1); OUT_BATCH(dw2); OUT_BATCH(dw3); OUT_BATCH(dw4); OUT_BATCH_F(ctx->Polygon.OffsetUnits * 2); /* constant. copied from gen4 */ OUT_BATCH_F(ctx->Polygon.OffsetFactor); /* scale */ OUT_BATCH_F(0.0); /* XXX: global depth offset clamp */ for (i = 0; i < 8; i++) { OUT_BATCH(attr_overrides[i * 2] | attr_overrides[i * 2 + 1] << 16); } OUT_BATCH(dw16); /* point sprite texcoord bitmask */ OUT_BATCH(dw17); /* constant interp bitmask */ OUT_BATCH(0); /* wrapshortest enables 0-7 */ OUT_BATCH(0); /* wrapshortest enables 8-15 */ ADVANCE_BATCH(); }
static void upload_wm_state(struct brw_context *brw) { struct gl_context *ctx = &brw->ctx; /* BRW_NEW_FS_PROG_DATA */ const struct brw_wm_prog_data *prog_data = brw_wm_prog_data(brw->wm.base.prog_data); bool writes_depth = prog_data->computed_depth_mode != BRW_PSCDEPTH_OFF; uint32_t dw1, dw2; /* _NEW_BUFFERS */ const bool multisampled_fbo = _mesa_geometric_samples(ctx->DrawBuffer) > 1; dw1 = dw2 = 0; dw1 |= GEN7_WM_STATISTICS_ENABLE; dw1 |= GEN7_WM_LINE_AA_WIDTH_1_0; dw1 |= GEN7_WM_LINE_END_CAP_AA_WIDTH_0_5; /* _NEW_LINE */ if (ctx->Line.StippleFlag) dw1 |= GEN7_WM_LINE_STIPPLE_ENABLE; /* _NEW_POLYGON */ if (ctx->Polygon.StippleFlag) dw1 |= GEN7_WM_POLYGON_STIPPLE_ENABLE; if (prog_data->uses_src_depth) dw1 |= GEN7_WM_USES_SOURCE_DEPTH; if (prog_data->uses_src_w) dw1 |= GEN7_WM_USES_SOURCE_W; dw1 |= prog_data->computed_depth_mode << GEN7_WM_COMPUTED_DEPTH_MODE_SHIFT; dw1 |= prog_data->barycentric_interp_modes << GEN7_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT; /* _NEW_COLOR, _NEW_MULTISAMPLE _NEW_BUFFERS */ /* Enable if the pixel shader kernel generates and outputs oMask. */ if (prog_data->uses_kill || _mesa_is_alpha_test_enabled(ctx) || _mesa_is_alpha_to_coverage_enabled(ctx) || prog_data->uses_omask) { dw1 |= GEN7_WM_KILL_ENABLE; } /* _NEW_BUFFERS | _NEW_COLOR */ if (brw_color_buffer_write_enabled(brw) || writes_depth || prog_data->has_side_effects || dw1 & GEN7_WM_KILL_ENABLE) { dw1 |= GEN7_WM_DISPATCH_ENABLE; } if (multisampled_fbo) { /* _NEW_MULTISAMPLE */ if (ctx->Multisample.Enabled) dw1 |= GEN7_WM_MSRAST_ON_PATTERN; else dw1 |= GEN7_WM_MSRAST_OFF_PIXEL; if (prog_data->persample_dispatch) dw2 |= GEN7_WM_MSDISPMODE_PERSAMPLE; else dw2 |= GEN7_WM_MSDISPMODE_PERPIXEL; } else { dw1 |= GEN7_WM_MSRAST_OFF_PIXEL; dw2 |= GEN7_WM_MSDISPMODE_PERSAMPLE; } if (prog_data->uses_sample_mask) { dw1 |= GEN7_WM_USES_INPUT_COVERAGE_MASK; } /* BRW_NEW_FS_PROG_DATA */ if (prog_data->early_fragment_tests) dw1 |= GEN7_WM_EARLY_DS_CONTROL_PREPS; else if (prog_data->has_side_effects) dw1 |= GEN7_WM_EARLY_DS_CONTROL_PSEXEC; /* The "UAV access enable" bits are unnecessary on HSW because they only * seem to have an effect on the HW-assisted coherency mechanism which we * don't need, and the rasterization-related UAV_ONLY flag and the * DISPATCH_ENABLE bit can be set independently from it. * C.f. gen8_upload_ps_extra(). * * BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_FS_PROG_DATA | _NEW_BUFFERS | _NEW_COLOR */ if (brw->is_haswell && !(brw_color_buffer_write_enabled(brw) || writes_depth) && prog_data->has_side_effects) dw2 |= HSW_WM_UAV_ONLY; BEGIN_BATCH(3); OUT_BATCH(_3DSTATE_WM << 16 | (3 - 2)); OUT_BATCH(dw1); OUT_BATCH(dw2); ADVANCE_BATCH(); }
static void gen8_upload_gs_state(struct brw_context *brw) { struct gl_context *ctx = &brw->ctx; const struct brw_stage_state *stage_state = &brw->gs.base; /* BRW_NEW_GEOMETRY_PROGRAM */ bool active = brw->geometry_program; /* BRW_NEW_GS_PROG_DATA */ const struct brw_vue_prog_data *prog_data = &brw->gs.prog_data->base; if (active) { int urb_entry_write_offset = 1; uint32_t urb_entry_output_length = ((prog_data->vue_map.num_slots + 1) / 2 - urb_entry_write_offset); if (urb_entry_output_length == 0) urb_entry_output_length = 1; BEGIN_BATCH(10); OUT_BATCH(_3DSTATE_GS << 16 | (10 - 2)); OUT_BATCH(stage_state->prog_offset); OUT_BATCH(0); OUT_BATCH(GEN6_GS_VECTOR_MASK_ENABLE | brw->geometry_program->VerticesIn | ((ALIGN(stage_state->sampler_count, 4)/4) << GEN6_GS_SAMPLER_COUNT_SHIFT) | ((prog_data->base.binding_table.size_bytes / 4) << GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); if (brw->gs.prog_data->base.base.total_scratch) { OUT_RELOC64(stage_state->scratch_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, ffs(brw->gs.prog_data->base.base.total_scratch) - 11); WARN_ONCE(true, "May need to implement a temporary workaround: GS Number of " "URB Entries must be less than or equal to the GS Maximum " "Number of Threads.\n"); } else { OUT_BATCH(0); OUT_BATCH(0); } /* DW6 */ OUT_BATCH(((brw->gs.prog_data->output_vertex_size_hwords * 2 - 1) << GEN7_GS_OUTPUT_VERTEX_SIZE_SHIFT) | (brw->gs.prog_data->output_topology << GEN7_GS_OUTPUT_TOPOLOGY_SHIFT) | (prog_data->urb_read_length << GEN6_GS_URB_READ_LENGTH_SHIFT) | (0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT) | (prog_data->base.dispatch_grf_start_reg << GEN6_GS_DISPATCH_START_GRF_SHIFT)); uint32_t dw7 = (brw->gs.prog_data->control_data_header_size_hwords << GEN7_GS_CONTROL_DATA_HEADER_SIZE_SHIFT) | brw->gs.prog_data->dispatch_mode | ((brw->gs.prog_data->invocations - 1) << GEN7_GS_INSTANCE_CONTROL_SHIFT) | GEN6_GS_STATISTICS_ENABLE | (brw->gs.prog_data->include_primitive_id ? GEN7_GS_INCLUDE_PRIMITIVE_ID : 0) | GEN7_GS_REORDER_TRAILING | GEN7_GS_ENABLE; uint32_t dw8 = brw->gs.prog_data->control_data_format << HSW_GS_CONTROL_DATA_FORMAT_SHIFT; if (brw->gen < 9) dw7 |= (brw->max_gs_threads / 2 - 1) << HSW_GS_MAX_THREADS_SHIFT; else dw8 |= brw->max_gs_threads - 1; /* DW7 */ OUT_BATCH(dw7); /* DW8 */ OUT_BATCH(dw8); /* DW9 / _NEW_TRANSFORM */ OUT_BATCH((ctx->Transform.ClipPlanesEnabled << GEN8_GS_USER_CLIP_DISTANCE_SHIFT) | (urb_entry_output_length << GEN8_GS_URB_OUTPUT_LENGTH_SHIFT) | (urb_entry_write_offset << GEN8_GS_URB_ENTRY_OUTPUT_OFFSET_SHIFT)); ADVANCE_BATCH(); } else { BEGIN_BATCH(10); OUT_BATCH(_3DSTATE_GS << 16 | (10 - 2)); OUT_BATCH(0); /* prog_bo */ OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); /* scratch space base offset */ OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(GEN6_GS_STATISTICS_ENABLE); OUT_BATCH(0); OUT_BATCH(0); ADVANCE_BATCH(); } }
static void upload_wm_state(struct brw_context *brw) { struct intel_context *intel = &brw->intel; struct gl_context *ctx = &intel->ctx; const struct brw_fragment_program *fp = brw_fragment_program_const(brw->fragment_program); uint32_t dw2, dw4, dw5, dw6; /* _NEW_BUFFERS */ bool multisampled_fbo = ctx->DrawBuffer->Visual.samples > 1; /* CACHE_NEW_WM_PROG */ if (brw->wm.prog_data->nr_params == 0) { /* Disable the push constant buffers. */ BEGIN_BATCH(5); OUT_BATCH(_3DSTATE_CONSTANT_PS << 16 | (5 - 2)); OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); ADVANCE_BATCH(); } else { BEGIN_BATCH(5); OUT_BATCH(_3DSTATE_CONSTANT_PS << 16 | GEN6_CONSTANT_BUFFER_0_ENABLE | (5 - 2)); /* Pointer to the WM constant buffer. Covered by the set of * state flags from gen6_upload_wm_push_constants. */ OUT_BATCH(brw->wm.push_const_offset + ALIGN(brw->wm.prog_data->nr_params, brw->wm.prog_data->dispatch_width) / 8 - 1); OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); ADVANCE_BATCH(); } dw2 = dw4 = dw5 = dw6 = 0; dw4 |= GEN6_WM_STATISTICS_ENABLE; dw5 |= GEN6_WM_LINE_AA_WIDTH_1_0; dw5 |= GEN6_WM_LINE_END_CAP_AA_WIDTH_0_5; /* Use ALT floating point mode for ARB fragment programs, because they * require 0^0 == 1. Even though _CurrentFragmentProgram is used for * rendering, CurrentFragmentProgram is used for this check to * differentiate between the GLSL and non-GLSL cases. */ if (ctx->Shader.CurrentFragmentProgram == NULL) dw2 |= GEN6_WM_FLOATING_POINT_MODE_ALT; /* CACHE_NEW_SAMPLER */ dw2 |= (ALIGN(brw->sampler.count, 4) / 4) << GEN6_WM_SAMPLER_COUNT_SHIFT; dw4 |= (brw->wm.prog_data->first_curbe_grf << GEN6_WM_DISPATCH_START_GRF_SHIFT_0); dw4 |= (brw->wm.prog_data->first_curbe_grf_16 << GEN6_WM_DISPATCH_START_GRF_SHIFT_2); dw5 |= (brw->max_wm_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT; /* CACHE_NEW_WM_PROG */ if (brw->wm.prog_data->dispatch_width == 8) { dw5 |= GEN6_WM_8_DISPATCH_ENABLE; if (brw->wm.prog_data->prog_offset_16) dw5 |= GEN6_WM_16_DISPATCH_ENABLE; } else { dw5 |= GEN6_WM_16_DISPATCH_ENABLE; } /* CACHE_NEW_WM_PROG | _NEW_COLOR */ if (brw->wm.prog_data->dual_src_blend && (ctx->Color.BlendEnabled & 1) && ctx->Color.Blend[0]._UsesDualSrc) { dw5 |= GEN6_WM_DUAL_SOURCE_BLEND_ENABLE; } /* _NEW_LINE */ if (ctx->Line.StippleFlag) dw5 |= GEN6_WM_LINE_STIPPLE_ENABLE; /* _NEW_POLYGON */ if (ctx->Polygon.StippleFlag) dw5 |= GEN6_WM_POLYGON_STIPPLE_ENABLE; /* BRW_NEW_FRAGMENT_PROGRAM */ if (fp->program.Base.InputsRead & FRAG_BIT_WPOS) dw5 |= GEN6_WM_USES_SOURCE_DEPTH | GEN6_WM_USES_SOURCE_W; if (fp->program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) dw5 |= GEN6_WM_COMPUTED_DEPTH; /* CACHE_NEW_WM_PROG */ dw6 |= brw->wm.prog_data->barycentric_interp_modes << GEN6_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT; /* _NEW_COLOR, _NEW_MULTISAMPLE */ if (fp->program.UsesKill || ctx->Color.AlphaEnabled || ctx->Multisample.SampleAlphaToCoverage) dw5 |= GEN6_WM_KILL_ENABLE; if (brw_color_buffer_write_enabled(brw) || dw5 & (GEN6_WM_KILL_ENABLE | GEN6_WM_COMPUTED_DEPTH)) { dw5 |= GEN6_WM_DISPATCH_ENABLE; } dw6 |= _mesa_bitcount_64(brw->fragment_program->Base.InputsRead) << GEN6_WM_NUM_SF_OUTPUTS_SHIFT; if (multisampled_fbo) { /* _NEW_MULTISAMPLE */ if (ctx->Multisample.Enabled) dw6 |= GEN6_WM_MSRAST_ON_PATTERN; else dw6 |= GEN6_WM_MSRAST_OFF_PIXEL; dw6 |= GEN6_WM_MSDISPMODE_PERPIXEL; } else { dw6 |= GEN6_WM_MSRAST_OFF_PIXEL; dw6 |= GEN6_WM_MSDISPMODE_PERSAMPLE; } BEGIN_BATCH(9); OUT_BATCH(_3DSTATE_WM << 16 | (9 - 2)); OUT_BATCH(brw->wm.prog_offset); OUT_BATCH(dw2); if (brw->wm.prog_data->total_scratch) { OUT_RELOC(brw->wm.scratch_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, ffs(brw->wm.prog_data->total_scratch) - 11); } else { OUT_BATCH(0); } OUT_BATCH(dw4); OUT_BATCH(dw5); OUT_BATCH(dw6); OUT_BATCH(0); /* kernel 1 pointer */ /* kernel 2 pointer */ OUT_BATCH(brw->wm.prog_offset + brw->wm.prog_data->prog_offset_16); ADVANCE_BATCH(); }
static void emit_depthbuffer(struct brw_context *brw) { struct intel_context *intel = &brw->intel; struct gl_context *ctx = &intel->ctx; struct gl_framebuffer *fb = ctx->DrawBuffer; /* _NEW_BUFFERS */ struct intel_renderbuffer *depth_irb = intel_get_renderbuffer(fb, BUFFER_DEPTH); struct intel_renderbuffer *stencil_irb = intel_get_renderbuffer(fb, BUFFER_STENCIL); struct intel_mipmap_tree *depth_mt = brw->depthstencil.depth_mt; struct intel_mipmap_tree *stencil_mt = brw->depthstencil.stencil_mt; struct intel_mipmap_tree *hiz_mt = brw->depthstencil.hiz_mt; uint32_t tile_x = brw->depthstencil.tile_x; uint32_t tile_y = brw->depthstencil.tile_y; unsigned int len; bool separate_stencil = false; if (stencil_mt && stencil_mt->format == MESA_FORMAT_S8) separate_stencil = true; /* 3DSTATE_DEPTH_BUFFER, 3DSTATE_STENCIL_BUFFER are both * non-pipelined state that will need the PIPE_CONTROL workaround. */ if (intel->gen == 6) { intel_emit_post_sync_nonzero_flush(intel); intel_emit_depth_stall_flushes(intel); } /* If there's a packed depth/stencil bound to stencil only, we need to * emit the packed depth/stencil buffer packet. */ if (!depth_irb && stencil_irb && !separate_stencil) { depth_irb = stencil_irb; depth_mt = stencil_mt; } if (intel->gen >= 6) len = 7; else if (intel->is_g4x || intel->gen == 5) len = 6; else len = 5; if (!depth_irb && !separate_stencil) { BEGIN_BATCH(len); OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (len - 2)); OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT << 18) | (BRW_SURFACE_NULL << 29)); OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); if (intel->is_g4x || intel->gen >= 5) OUT_BATCH(0); if (intel->gen >= 6) OUT_BATCH(0); ADVANCE_BATCH(); } else if (!depth_irb && separate_stencil) { /* * There exists a separate stencil buffer but no depth buffer. * * The stencil buffer inherits most of its fields from * 3DSTATE_DEPTH_BUFFER: namely the tile walk, surface type, width, and * height. * * Enable the hiz bit because it and the separate stencil bit must have * the same value. From Section 2.11.5.6.1.1 3DSTATE_DEPTH_BUFFER, Bit * 1.21 "Separate Stencil Enable": * [DevIL]: If this field is enabled, Hierarchical Depth Buffer * Enable must also be enabled. * * [DevGT]: This field must be set to the same value (enabled or * disabled) as Hierarchical Depth Buffer Enable * * The tiled bit must be set. From the Sandybridge PRM, Volume 2, Part 1, * Section 7.5.5.1.1 3DSTATE_DEPTH_BUFFER, Bit 1.27 Tiled Surface: * [DevGT+]: This field must be set to TRUE. */ assert(intel->has_separate_stencil); BEGIN_BATCH(len); OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (len - 2)); OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT << 18) | (1 << 21) | /* separate stencil enable */ (1 << 22) | /* hiz enable */ (BRW_TILEWALK_YMAJOR << 26) | (1 << 27) | /* tiled surface */ (BRW_SURFACE_2D << 29)); OUT_BATCH(0); OUT_BATCH(((stencil_irb->Base.Base.Width + tile_x - 1) << 6) | (stencil_irb->Base.Base.Height + tile_y - 1) << 19); OUT_BATCH(0); if (intel->is_g4x || intel->gen >= 5) OUT_BATCH(tile_x | (tile_y << 16)); else assert(tile_x == 0 && tile_y == 0); if (intel->gen >= 6) OUT_BATCH(0); ADVANCE_BATCH(); } else { struct intel_region *region = depth_mt->region; /* If using separate stencil, hiz must be enabled. */ assert(!separate_stencil || hiz_mt); assert(intel->gen < 6 || region->tiling == I915_TILING_Y); assert(!hiz_mt || region->tiling == I915_TILING_Y); BEGIN_BATCH(len); OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (len - 2)); OUT_BATCH((region->pitch - 1) | (brw_depthbuffer_format(brw) << 18) | ((hiz_mt ? 1 : 0) << 21) | /* separate stencil enable */ ((hiz_mt ? 1 : 0) << 22) | /* hiz enable */ (BRW_TILEWALK_YMAJOR << 26) | ((region->tiling != I915_TILING_NONE) << 27) | (BRW_SURFACE_2D << 29)); OUT_RELOC(region->bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, brw->depthstencil.depth_offset); OUT_BATCH((BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1) | (((depth_irb->Base.Base.Width + tile_x) - 1) << 6) | (((depth_irb->Base.Base.Height + tile_y) - 1) << 19)); OUT_BATCH(0); if (intel->is_g4x || intel->gen >= 5) OUT_BATCH(tile_x | (tile_y << 16)); else assert(tile_x == 0 && tile_y == 0); if (intel->gen >= 6) OUT_BATCH(0); ADVANCE_BATCH(); } if (hiz_mt || separate_stencil) { /* * In the 3DSTATE_DEPTH_BUFFER batch emitted above, the 'separate * stencil enable' and 'hiz enable' bits were set. Therefore we must * emit 3DSTATE_HIER_DEPTH_BUFFER and 3DSTATE_STENCIL_BUFFER. Even if * there is no stencil buffer, 3DSTATE_STENCIL_BUFFER must be emitted; * failure to do so causes hangs on gen5 and a stall on gen6. */ /* Emit hiz buffer. */ if (hiz_mt) { BEGIN_BATCH(3); OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2)); OUT_BATCH(hiz_mt->region->pitch - 1); OUT_RELOC(hiz_mt->region->bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, brw->depthstencil.hiz_offset); ADVANCE_BATCH(); } else { BEGIN_BATCH(3); OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2)); OUT_BATCH(0); OUT_BATCH(0); ADVANCE_BATCH(); } /* Emit stencil buffer. */ if (separate_stencil) { struct intel_region *region = stencil_mt->region; BEGIN_BATCH(3); OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2)); /* The stencil buffer has quirky pitch requirements. From Vol 2a, * 11.5.6.2.1 3DSTATE_STENCIL_BUFFER, field "Surface Pitch": * The pitch must be set to 2x the value computed based on width, as * the stencil buffer is stored with two rows interleaved. */ OUT_BATCH(2 * region->pitch - 1); OUT_RELOC(region->bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, brw->depthstencil.stencil_offset); ADVANCE_BATCH(); } else { BEGIN_BATCH(3); OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2)); OUT_BATCH(0); OUT_BATCH(0); ADVANCE_BATCH(); } } /* * On Gen >= 6, emit clear params for safety. If using hiz, then clear * params must be emitted. * * From Section 2.11.5.6.4.1 3DSTATE_CLEAR_PARAMS: * 3DSTATE_CLEAR_PARAMS packet must follow the DEPTH_BUFFER_STATE packet * when HiZ is enabled and the DEPTH_BUFFER_STATE changes. */ if (intel->gen >= 6 || hiz_mt) { if (intel->gen == 6) intel_emit_post_sync_nonzero_flush(intel); BEGIN_BATCH(2); OUT_BATCH(_3DSTATE_CLEAR_PARAMS << 16 | GEN5_DEPTH_CLEAR_VALID | (2 - 2)); OUT_BATCH(depth_irb ? depth_irb->mt->depth_clear_value : 0); ADVANCE_BATCH(); } }
static void ctx_emit_cs(struct gl_context *ctx, struct radeon_state_atom *atom) { r100ContextPtr r100 = R100_CONTEXT(ctx); BATCH_LOCALS(&r100->radeon); struct radeon_renderbuffer *rrb, *drb; uint32_t cbpitch = 0; uint32_t zbpitch = 0; uint32_t dwords = atom->check(ctx, atom); uint32_t depth_fmt; rrb = radeon_get_colorbuffer(&r100->radeon); if (!rrb || !rrb->bo) { fprintf(stderr, "no rrb\n"); return; } atom->cmd[CTX_RB3D_CNTL] &= ~(0xf << 10); if (rrb->cpp == 4) atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_ARGB8888; else switch (rrb->base.Base.Format) { case MESA_FORMAT_RGB565: atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_RGB565; break; case MESA_FORMAT_ARGB4444: atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_ARGB4444; break; case MESA_FORMAT_ARGB1555: atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_ARGB1555; break; default: _mesa_problem(ctx, "unexpected format in ctx_emit_cs()"); } cbpitch = (rrb->pitch / rrb->cpp); if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) cbpitch |= R200_COLOR_TILE_ENABLE; if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) cbpitch |= RADEON_COLOR_MICROTILE_ENABLE; drb = radeon_get_depthbuffer(&r100->radeon); if (drb) { zbpitch = (drb->pitch / drb->cpp); if (drb->cpp == 4) depth_fmt = RADEON_DEPTH_FORMAT_24BIT_INT_Z; else depth_fmt = RADEON_DEPTH_FORMAT_16BIT_INT_Z; atom->cmd[CTX_RB3D_ZSTENCILCNTL] &= ~RADEON_DEPTH_FORMAT_MASK; atom->cmd[CTX_RB3D_ZSTENCILCNTL] |= depth_fmt; } BEGIN_BATCH_NO_AUTOSTATE(dwords); /* In the CS case we need to split this up */ OUT_BATCH(CP_PACKET0(packet[0].start, 3)); OUT_BATCH_TABLE((atom->cmd + 1), 4); if (drb) { OUT_BATCH(CP_PACKET0(RADEON_RB3D_DEPTHOFFSET, 0)); OUT_BATCH_RELOC(0, drb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); OUT_BATCH(CP_PACKET0(RADEON_RB3D_DEPTHPITCH, 0)); OUT_BATCH(zbpitch); } OUT_BATCH(CP_PACKET0(RADEON_RB3D_ZSTENCILCNTL, 0)); OUT_BATCH(atom->cmd[CTX_RB3D_ZSTENCILCNTL]); OUT_BATCH(CP_PACKET0(RADEON_PP_CNTL, 1)); OUT_BATCH(atom->cmd[CTX_PP_CNTL]); OUT_BATCH(atom->cmd[CTX_RB3D_CNTL]); if (rrb) { OUT_BATCH(CP_PACKET0(RADEON_RB3D_COLOROFFSET, 0)); OUT_BATCH_RELOC(rrb->draw_offset, rrb->bo, rrb->draw_offset, 0, RADEON_GEM_DOMAIN_VRAM, 0); OUT_BATCH(CP_PACKET0(RADEON_RB3D_COLORPITCH, 0)); OUT_BATCH_RELOC(cbpitch, rrb->bo, cbpitch, 0, RADEON_GEM_DOMAIN_VRAM, 0); } // if (atom->cmd_size == CTX_STATE_SIZE_NEWDRM) { // OUT_BATCH_TABLE((atom->cmd + 14), 4); // } END_BATCH(); BEGIN_BATCH_NO_AUTOSTATE(4); OUT_BATCH(CP_PACKET0(RADEON_RE_TOP_LEFT, 0)); OUT_BATCH(0); OUT_BATCH(CP_PACKET0(RADEON_RE_WIDTH_HEIGHT, 0)); if (rrb) { OUT_BATCH(((rrb->base.Base.Width - 1) << RADEON_RE_WIDTH_SHIFT) | ((rrb->base.Base.Height - 1) << RADEON_RE_HEIGHT_SHIFT)); } else { OUT_BATCH(0); } END_BATCH(); }
static void brw_emit_vertices(struct brw_context *brw) { struct gl_context *ctx = &brw->ctx; GLuint i, nr_elements; brw_prepare_vertices(brw); brw_emit_query_begin(brw); nr_elements = brw->vb.nr_enabled + brw->vs.prog_data->uses_vertexid; /* If the VS doesn't read any inputs (calculating vertex position from * a state variable for some reason, for example), emit a single pad * VERTEX_ELEMENT struct and bail. * * The stale VB state stays in place, but they don't do anything unless * a VE loads from them. */ if (nr_elements == 0) { BEGIN_BATCH(3); OUT_BATCH((_3DSTATE_VERTEX_ELEMENTS << 16) | 1); if (brw->gen >= 6) { OUT_BATCH((0 << GEN6_VE0_INDEX_SHIFT) | GEN6_VE0_VALID | (BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) | (0 << BRW_VE0_SRC_OFFSET_SHIFT)); } else { OUT_BATCH((0 << BRW_VE0_INDEX_SHIFT) | BRW_VE0_VALID | (BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) | (0 << BRW_VE0_SRC_OFFSET_SHIFT)); } OUT_BATCH((BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT) | (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) | (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) | (BRW_VE1_COMPONENT_STORE_1_FLT << BRW_VE1_COMPONENT_3_SHIFT)); ADVANCE_BATCH(); return; } /* Now emit VB and VEP state packets. */ if (brw->vb.nr_buffers) { if (brw->gen >= 6) { assert(brw->vb.nr_buffers <= 33); } else { assert(brw->vb.nr_buffers <= 17); } BEGIN_BATCH(1 + 4*brw->vb.nr_buffers); OUT_BATCH((_3DSTATE_VERTEX_BUFFERS << 16) | (4*brw->vb.nr_buffers - 1)); for (i = 0; i < brw->vb.nr_buffers; i++) { struct brw_vertex_buffer *buffer = &brw->vb.buffers[i]; uint32_t dw0; if (brw->gen >= 6) { dw0 = buffer->step_rate ? GEN6_VB0_ACCESS_INSTANCEDATA : GEN6_VB0_ACCESS_VERTEXDATA; dw0 |= i << GEN6_VB0_INDEX_SHIFT; } else { dw0 = buffer->step_rate ? BRW_VB0_ACCESS_INSTANCEDATA : BRW_VB0_ACCESS_VERTEXDATA; dw0 |= i << BRW_VB0_INDEX_SHIFT; } if (brw->gen >= 7) dw0 |= GEN7_VB0_ADDRESS_MODIFYENABLE; if (brw->gen == 7) dw0 |= GEN7_MOCS_L3 << 16; WARN_ONCE(buffer->stride >= (brw->gen >= 5 ? 2048 : 2047), "VBO stride %d too large, bad rendering may occur\n", buffer->stride); OUT_BATCH(dw0 | (buffer->stride << BRW_VB0_PITCH_SHIFT)); OUT_RELOC(buffer->bo, I915_GEM_DOMAIN_VERTEX, 0, buffer->offset); if (brw->gen >= 5) { OUT_RELOC(buffer->bo, I915_GEM_DOMAIN_VERTEX, 0, buffer->bo->size - 1); } else OUT_BATCH(0); OUT_BATCH(buffer->step_rate); } ADVANCE_BATCH(); } /* The hardware allows one more VERTEX_ELEMENTS than VERTEX_BUFFERS, presumably * for VertexID/InstanceID. */ if (brw->gen >= 6) { assert(nr_elements <= 34); } else { assert(nr_elements <= 18); } struct brw_vertex_element *gen6_edgeflag_input = NULL; BEGIN_BATCH(1 + nr_elements * 2); OUT_BATCH((_3DSTATE_VERTEX_ELEMENTS << 16) | (2 * nr_elements - 1)); for (i = 0; i < brw->vb.nr_enabled; i++) { struct brw_vertex_element *input = brw->vb.enabled[i]; uint32_t format = brw_get_vertex_surface_type(brw, input->glarray); uint32_t comp0 = BRW_VE1_COMPONENT_STORE_SRC; uint32_t comp1 = BRW_VE1_COMPONENT_STORE_SRC; uint32_t comp2 = BRW_VE1_COMPONENT_STORE_SRC; uint32_t comp3 = BRW_VE1_COMPONENT_STORE_SRC; if (input == &brw->vb.inputs[VERT_ATTRIB_EDGEFLAG]) { /* Gen6+ passes edgeflag as sideband along with the vertex, instead * of in the VUE. We have to upload it sideband as the last vertex * element according to the B-Spec. */ if (brw->gen >= 6) { gen6_edgeflag_input = input; continue; } } switch (input->glarray->Size) { case 0: comp0 = BRW_VE1_COMPONENT_STORE_0; case 1: comp1 = BRW_VE1_COMPONENT_STORE_0; case 2: comp2 = BRW_VE1_COMPONENT_STORE_0; case 3: comp3 = input->glarray->Integer ? BRW_VE1_COMPONENT_STORE_1_INT : BRW_VE1_COMPONENT_STORE_1_FLT; break; } if (brw->gen >= 6) { OUT_BATCH((input->buffer << GEN6_VE0_INDEX_SHIFT) | GEN6_VE0_VALID | (format << BRW_VE0_FORMAT_SHIFT) | (input->offset << BRW_VE0_SRC_OFFSET_SHIFT)); } else { OUT_BATCH((input->buffer << BRW_VE0_INDEX_SHIFT) | BRW_VE0_VALID | (format << BRW_VE0_FORMAT_SHIFT) | (input->offset << BRW_VE0_SRC_OFFSET_SHIFT)); } if (brw->gen >= 5) OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) | (comp1 << BRW_VE1_COMPONENT_1_SHIFT) | (comp2 << BRW_VE1_COMPONENT_2_SHIFT) | (comp3 << BRW_VE1_COMPONENT_3_SHIFT)); else OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) | (comp1 << BRW_VE1_COMPONENT_1_SHIFT) | (comp2 << BRW_VE1_COMPONENT_2_SHIFT) | (comp3 << BRW_VE1_COMPONENT_3_SHIFT) | ((i * 4) << BRW_VE1_DST_OFFSET_SHIFT)); } if (brw->gen >= 6 && gen6_edgeflag_input) { uint32_t format = brw_get_vertex_surface_type(brw, gen6_edgeflag_input->glarray); OUT_BATCH((gen6_edgeflag_input->buffer << GEN6_VE0_INDEX_SHIFT) | GEN6_VE0_VALID | GEN6_VE0_EDGE_FLAG_ENABLE | (format << BRW_VE0_FORMAT_SHIFT) | (gen6_edgeflag_input->offset << BRW_VE0_SRC_OFFSET_SHIFT)); OUT_BATCH((BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) | (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) | (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) | (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT)); } if (brw->vs.prog_data->uses_vertexid) { uint32_t dw0 = 0, dw1 = 0; dw1 = ((BRW_VE1_COMPONENT_STORE_VID << BRW_VE1_COMPONENT_0_SHIFT) | (BRW_VE1_COMPONENT_STORE_IID << BRW_VE1_COMPONENT_1_SHIFT) | (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) | (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT)); if (brw->gen >= 6) { dw0 |= GEN6_VE0_VALID; } else { dw0 |= BRW_VE0_VALID; dw1 |= (i * 4) << BRW_VE1_DST_OFFSET_SHIFT; } /* Note that for gl_VertexID, gl_InstanceID, and gl_PrimitiveID values, * the format is ignored and the value is always int. */ OUT_BATCH(dw0); OUT_BATCH(dw1); } ADVANCE_BATCH(); }
static void i915_emit_composite_setup(ScrnInfoPtr scrn) { intel_screen_private *intel = intel_get_screen_private(scrn); int op = intel->i915_render_state.op; PicturePtr mask_picture = intel->render_mask_picture; PicturePtr dest_picture = intel->render_dest_picture; PixmapPtr mask = intel->render_mask; PixmapPtr dest = intel->render_dest; Bool is_solid_src, is_solid_mask; int tex_count, t; intel->needs_render_state_emit = FALSE; IntelEmitInvarientState(scrn); intel->last_3d = LAST_3D_RENDER; is_solid_src = intel->render_source_is_solid; is_solid_mask = intel->render_mask_is_solid; tex_count = 0; tex_count += ! is_solid_src; tex_count += mask && ! is_solid_mask; assert(intel->in_batch_atomic); if (tex_count != 0) { OUT_BATCH(_3DSTATE_MAP_STATE | (3 * tex_count)); OUT_BATCH((1 << tex_count) - 1); for (t = 0; t < tex_count; t++) { OUT_RELOC_PIXMAP(intel->texture[t], I915_GEM_DOMAIN_SAMPLER, 0, 0); OUT_BATCH(intel->mapstate[3*t + 1]); OUT_BATCH(intel->mapstate[3*t + 2]); } OUT_BATCH(_3DSTATE_SAMPLER_STATE | (3 * tex_count)); OUT_BATCH((1 << tex_count) - 1); for (t = 0; t < tex_count; t++) { OUT_BATCH(intel->samplerstate[3*t + 0]); OUT_BATCH(intel->samplerstate[3*t + 1]); OUT_BATCH(intel->samplerstate[3*t + 2]); } } if (is_solid_src) { OUT_BATCH (_3DSTATE_DFLT_DIFFUSE_CMD); OUT_BATCH (intel->render_source_solid); } if (mask && is_solid_mask) { OUT_BATCH (_3DSTATE_DFLT_SPEC_CMD); OUT_BATCH (intel->render_mask_solid); } /* BUF_INFO is an implicit flush, so avoid if the target has not changed. * XXX However for reasons unfathomed, correct rendering in KDE requires * at least a MI_FLUSH | INHIBIT_RENDER_CACHE_FLUSH here. */ if (1 || dest != intel->render_current_dest) { uint32_t tiling_bits; intel_batch_do_flush(scrn); if (intel_pixmap_tiled(dest)) { tiling_bits = BUF_3D_TILED_SURFACE; if (intel_get_pixmap_private(dest)->tiling == I915_TILING_Y) tiling_bits |= BUF_3D_TILE_WALK_Y; } else tiling_bits = 0; OUT_BATCH(_3DSTATE_BUF_INFO_CMD); OUT_BATCH(BUF_3D_ID_COLOR_BACK | tiling_bits | BUF_3D_PITCH(intel_pixmap_pitch(dest))); OUT_RELOC_PIXMAP(dest, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0); OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD); OUT_BATCH(intel->i915_render_state.dst_format); /* draw rect is unconditional */ OUT_BATCH(_3DSTATE_DRAW_RECT_CMD); OUT_BATCH(0x00000000); OUT_BATCH(0x00000000); /* ymin, xmin */ OUT_BATCH(DRAW_YMAX(dest->drawable.height - 1) | DRAW_XMAX(dest->drawable.width - 1)); /* yorig, xorig (relate to color buffer?) */ OUT_BATCH(0x00000000); intel->render_current_dest = dest; } { uint32_t ss2; ss2 = ~0; t = 0; if (! is_solid_src) { ss2 &= ~S2_TEXCOORD_FMT(t, TEXCOORDFMT_NOT_PRESENT); ss2 |= S2_TEXCOORD_FMT(t, intel_transform_is_affine(intel->transform[t]) ? TEXCOORDFMT_2D : TEXCOORDFMT_4D); t++; } if (mask && ! is_solid_mask) { ss2 &= ~S2_TEXCOORD_FMT(t, TEXCOORDFMT_NOT_PRESENT); ss2 |= S2_TEXCOORD_FMT(t, intel_transform_is_affine(intel->transform[t]) ? TEXCOORDFMT_2D : TEXCOORDFMT_4D); t++; } if (intel->needs_render_ca_pass) { OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(2) | 0); OUT_BATCH(ss2); } else { OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(2) | I1_LOAD_S(6) | 1); OUT_BATCH(ss2); OUT_BATCH(i915_get_blend_cntl(op, mask_picture, dest_picture->format)); } } if (! intel->needs_render_ca_pass) i915_composite_emit_shader(intel, op); }
static void upload_wm_state(struct brw_context *brw) { struct gl_context *ctx = &brw->ctx; /* BRW_NEW_FRAGMENT_PROGRAM */ const struct brw_fragment_program *fp = brw_fragment_program_const(brw->fragment_program); /* BRW_NEW_FS_PROG_DATA */ const struct brw_wm_prog_data *prog_data = brw->wm.prog_data; bool writes_depth = prog_data->computed_depth_mode != BRW_PSCDEPTH_OFF; uint32_t dw1, dw2; /* _NEW_BUFFERS */ const bool multisampled_fbo = _mesa_geometric_samples(ctx->DrawBuffer) > 1; dw1 = dw2 = 0; dw1 |= GEN7_WM_STATISTICS_ENABLE; dw1 |= GEN7_WM_LINE_AA_WIDTH_1_0; dw1 |= GEN7_WM_LINE_END_CAP_AA_WIDTH_0_5; /* _NEW_LINE */ if (ctx->Line.StippleFlag) dw1 |= GEN7_WM_LINE_STIPPLE_ENABLE; /* _NEW_POLYGON */ if (ctx->Polygon.StippleFlag) dw1 |= GEN7_WM_POLYGON_STIPPLE_ENABLE; if (fp->program.Base.InputsRead & VARYING_BIT_POS) dw1 |= GEN7_WM_USES_SOURCE_DEPTH | GEN7_WM_USES_SOURCE_W; dw1 |= prog_data->computed_depth_mode << GEN7_WM_COMPUTED_DEPTH_MODE_SHIFT; dw1 |= prog_data->barycentric_interp_modes << GEN7_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT; /* _NEW_COLOR, _NEW_MULTISAMPLE */ /* Enable if the pixel shader kernel generates and outputs oMask. */ if (prog_data->uses_kill || ctx->Color.AlphaEnabled || ctx->Multisample.SampleAlphaToCoverage || prog_data->uses_omask) { dw1 |= GEN7_WM_KILL_ENABLE; } if (_mesa_active_fragment_shader_has_atomic_ops(&brw->ctx)) { dw1 |= GEN7_WM_DISPATCH_ENABLE; } /* _NEW_BUFFERS | _NEW_COLOR */ if (brw_color_buffer_write_enabled(brw) || writes_depth || dw1 & GEN7_WM_KILL_ENABLE) { dw1 |= GEN7_WM_DISPATCH_ENABLE; } if (multisampled_fbo) { /* _NEW_MULTISAMPLE */ if (ctx->Multisample.Enabled) dw1 |= GEN7_WM_MSRAST_ON_PATTERN; else dw1 |= GEN7_WM_MSRAST_OFF_PIXEL; if (_mesa_get_min_invocations_per_fragment(ctx, brw->fragment_program, false) > 1) dw2 |= GEN7_WM_MSDISPMODE_PERSAMPLE; else dw2 |= GEN7_WM_MSDISPMODE_PERPIXEL; } else { dw1 |= GEN7_WM_MSRAST_OFF_PIXEL; dw2 |= GEN7_WM_MSDISPMODE_PERSAMPLE; } if (fp->program.Base.SystemValuesRead & SYSTEM_BIT_SAMPLE_MASK_IN) { dw1 |= GEN7_WM_USES_INPUT_COVERAGE_MASK; } BEGIN_BATCH(3); OUT_BATCH(_3DSTATE_WM << 16 | (3 - 2)); OUT_BATCH(dw1); OUT_BATCH(dw2); ADVANCE_BATCH(); }
void brw_emit_depth_stencil_hiz(struct brw_context *brw, struct intel_mipmap_tree *depth_mt, uint32_t depth_offset, uint32_t depthbuffer_format, uint32_t depth_surface_type, struct intel_mipmap_tree *stencil_mt, struct intel_mipmap_tree *hiz_mt, bool separate_stencil, uint32_t width, uint32_t height, uint32_t tile_x, uint32_t tile_y) { struct intel_context *intel = &brw->intel; /* Enable the hiz bit if we're doing separate stencil, because it and the * separate stencil bit must have the same value. From Section 2.11.5.6.1.1 * 3DSTATE_DEPTH_BUFFER, Bit 1.21 "Separate Stencil Enable": * [DevIL]: If this field is enabled, Hierarchical Depth Buffer * Enable must also be enabled. * * [DevGT]: This field must be set to the same value (enabled or * disabled) as Hierarchical Depth Buffer Enable */ bool enable_hiz_ss = hiz_mt || separate_stencil; /* 3DSTATE_DEPTH_BUFFER, 3DSTATE_STENCIL_BUFFER are both * non-pipelined state that will need the PIPE_CONTROL workaround. */ if (intel->gen == 6) { intel_emit_post_sync_nonzero_flush(intel); intel_emit_depth_stall_flushes(intel); } unsigned int len; if (intel->gen >= 6) len = 7; else if (intel->is_g4x || intel->gen == 5) len = 6; else len = 5; BEGIN_BATCH(len); OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (len - 2)); OUT_BATCH((depth_mt ? depth_mt->region->pitch - 1 : 0) | (depthbuffer_format << 18) | ((enable_hiz_ss ? 1 : 0) << 21) | /* separate stencil enable */ ((enable_hiz_ss ? 1 : 0) << 22) | /* hiz enable */ (BRW_TILEWALK_YMAJOR << 26) | ((depth_mt ? depth_mt->region->tiling != I915_TILING_NONE : 1) << 27) | (depth_surface_type << 29)); if (depth_mt) { OUT_RELOC(depth_mt->region->bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, depth_offset); } else { OUT_BATCH(0); } OUT_BATCH(((width + tile_x - 1) << 6) | ((height + tile_y - 1) << 19)); OUT_BATCH(0); if (intel->is_g4x || intel->gen >= 5) OUT_BATCH(tile_x | (tile_y << 16)); else assert(tile_x == 0 && tile_y == 0); if (intel->gen >= 6) OUT_BATCH(0); ADVANCE_BATCH(); if (hiz_mt || separate_stencil) { /* * In the 3DSTATE_DEPTH_BUFFER batch emitted above, the 'separate * stencil enable' and 'hiz enable' bits were set. Therefore we must * emit 3DSTATE_HIER_DEPTH_BUFFER and 3DSTATE_STENCIL_BUFFER. Even if * there is no stencil buffer, 3DSTATE_STENCIL_BUFFER must be emitted; * failure to do so causes hangs on gen5 and a stall on gen6. */ /* Emit hiz buffer. */ if (hiz_mt) { BEGIN_BATCH(3); OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2)); OUT_BATCH(hiz_mt->region->pitch - 1); OUT_RELOC(hiz_mt->region->bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, brw->depthstencil.hiz_offset); ADVANCE_BATCH(); } else { BEGIN_BATCH(3); OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2)); OUT_BATCH(0); OUT_BATCH(0); ADVANCE_BATCH(); } /* Emit stencil buffer. */ if (separate_stencil) { struct intel_region *region = stencil_mt->region; BEGIN_BATCH(3); OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2)); /* The stencil buffer has quirky pitch requirements. From Vol 2a, * 11.5.6.2.1 3DSTATE_STENCIL_BUFFER, field "Surface Pitch": * The pitch must be set to 2x the value computed based on width, as * the stencil buffer is stored with two rows interleaved. */ OUT_BATCH(2 * region->pitch - 1); OUT_RELOC(region->bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, brw->depthstencil.stencil_offset); ADVANCE_BATCH(); } else { BEGIN_BATCH(3); OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2)); OUT_BATCH(0); OUT_BATCH(0); ADVANCE_BATCH(); } } /* * On Gen >= 6, emit clear params for safety. If using hiz, then clear * params must be emitted. * * From Section 2.11.5.6.4.1 3DSTATE_CLEAR_PARAMS: * 3DSTATE_CLEAR_PARAMS packet must follow the DEPTH_BUFFER_STATE packet * when HiZ is enabled and the DEPTH_BUFFER_STATE changes. */ if (intel->gen >= 6 || hiz_mt) { if (intel->gen == 6) intel_emit_post_sync_nonzero_flush(intel); BEGIN_BATCH(2); OUT_BATCH(_3DSTATE_CLEAR_PARAMS << 16 | GEN5_DEPTH_CLEAR_VALID | (2 - 2)); OUT_BATCH(depth_mt ? depth_mt->depth_clear_value : 0); ADVANCE_BATCH(); } }
/** * When the GS is not in use, we assign the entire URB space to the VS. When * the GS is in use, we split the URB space evenly between the VS and the GS. * This is not ideal, but it's simple. * * URB size / 2 URB size / 2 * _____________-______________ _____________-______________ * / \ / \ * +-------------------------------------------------------------+ * | Vertex Shader Entries | Geometry Shader Entries | * +-------------------------------------------------------------+ * * Sandybridge GT1 has 32kB of URB space, while GT2 has 64kB. * (See the Sandybridge PRM, Volume 2, Part 1, Section 1.4.7: 3DSTATE_URB.) */ static void gen6_upload_urb( struct brw_context *brw ) { int nr_vs_entries, nr_gs_entries; int total_urb_size = brw->urb.size * 1024; /* in bytes */ bool gs_present = brw->ff_gs.prog_active || brw->geometry_program; /* CACHE_NEW_VS_PROG */ unsigned vs_size = MAX2(brw->vs.prog_data->base.urb_entry_size, 1); /* Whe using GS to do transform feedback only we use the same VUE layout for * VS outputs and GS outputs (as it's what the SF and Clipper expect), so we * can simply make the GS URB entry size the same as for the VS. This may * technically be too large in cases where we have few vertex attributes and * a lot of varyings, since the VS size is determined by the larger of the * two. For now, it's safe. * * For user-provided GS the assumption above does not hold since the GS * outputs can be different from the VS outputs. */ unsigned gs_size = vs_size; if (brw->geometry_program) { gs_size = brw->gs.prog_data->base.urb_entry_size; assert(gs_size >= 1); } /* Calculate how many entries fit in each stage's section of the URB */ if (gs_present) { nr_vs_entries = (total_urb_size/2) / (vs_size * 128); nr_gs_entries = (total_urb_size/2) / (gs_size * 128); } else { nr_vs_entries = total_urb_size / (vs_size * 128); nr_gs_entries = 0; } /* Then clamp to the maximum allowed by the hardware */ if (nr_vs_entries > brw->urb.max_vs_entries) nr_vs_entries = brw->urb.max_vs_entries; if (nr_gs_entries > brw->urb.max_gs_entries) nr_gs_entries = brw->urb.max_gs_entries; /* Finally, both must be a multiple of 4 (see 3DSTATE_URB in the PRM). */ brw->urb.nr_vs_entries = ROUND_DOWN_TO(nr_vs_entries, 4); brw->urb.nr_gs_entries = ROUND_DOWN_TO(nr_gs_entries, 4); assert(brw->urb.nr_vs_entries >= brw->urb.min_vs_entries); assert(brw->urb.nr_vs_entries % 4 == 0); assert(brw->urb.nr_gs_entries % 4 == 0); assert(vs_size <= 5); assert(gs_size <= 5); BEGIN_BATCH(3); OUT_BATCH(_3DSTATE_URB << 16 | (3 - 2)); OUT_BATCH(((vs_size - 1) << GEN6_URB_VS_SIZE_SHIFT) | ((brw->urb.nr_vs_entries) << GEN6_URB_VS_ENTRIES_SHIFT)); OUT_BATCH(((gs_size - 1) << GEN6_URB_GS_SIZE_SHIFT) | ((brw->urb.nr_gs_entries) << GEN6_URB_GS_ENTRIES_SHIFT)); ADVANCE_BATCH(); /* From the PRM Volume 2 part 1, section 1.4.7: * * Because of a urb corruption caused by allocating a previous gsunit’s * urb entry to vsunit software is required to send a "GS NULL * Fence"(Send URB fence with VS URB size == 1 and GS URB size == 0) plus * a dummy DRAW call before any case where VS will be taking over GS URB * space. * * It is not clear exactly what this means ("URB fence" is a command that * doesn't exist on Gen6). So for now we just do a full pipeline flush as * a workaround. */ if (brw->urb.gen6_gs_previously_active && !gs_present) intel_batchbuffer_emit_mi_flush(brw); brw->urb.gen6_gs_previously_active = gs_present; }
static void brw_emit_prim(struct brw_context *brw, const struct _mesa_prim *prim, uint32_t hw_prim) { int verts_per_instance; int vertex_access_type; int start_vertex_location; int base_vertex_location; int indirect_flag; DBG("PRIM: %s %d %d\n", _mesa_lookup_enum_by_nr(prim->mode), prim->start, prim->count); start_vertex_location = prim->start; base_vertex_location = prim->basevertex; if (prim->indexed) { vertex_access_type = brw->gen >= 7 ? GEN7_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM : GEN4_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM; start_vertex_location += brw->ib.start_vertex_offset; base_vertex_location += brw->vb.start_vertex_bias; } else { vertex_access_type = brw->gen >= 7 ? GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL : GEN4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL; start_vertex_location += brw->vb.start_vertex_bias; } /* We only need to trim the primitive count on pre-Gen6. */ if (brw->gen < 6) verts_per_instance = trim(prim->mode, prim->count); else verts_per_instance = prim->count; /* If nothing to emit, just return. */ if (verts_per_instance == 0 && !prim->is_indirect) return; /* If we're set to always flush, do it before and after the primitive emit. * We want to catch both missed flushes that hurt instruction/state cache * and missed flushes of the render cache as it heads to other parts of * the besides the draw code. */ if (brw->always_flush_cache) { intel_batchbuffer_emit_mi_flush(brw); } /* If indirect, emit a bunch of loads from the indirect BO. */ if (prim->is_indirect) { struct gl_buffer_object *indirect_buffer = brw->ctx.DrawIndirectBuffer; drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_buffer_object(indirect_buffer), prim->indirect_offset, 5 * sizeof(GLuint)); indirect_flag = GEN7_3DPRIM_INDIRECT_PARAMETER_ENABLE; BEGIN_BATCH(15); OUT_BATCH(GEN7_MI_LOAD_REGISTER_MEM | (3 - 2)); OUT_BATCH(GEN7_3DPRIM_VERTEX_COUNT); OUT_RELOC(bo, I915_GEM_DOMAIN_VERTEX, 0, prim->indirect_offset + 0); OUT_BATCH(GEN7_MI_LOAD_REGISTER_MEM | (3 - 2)); OUT_BATCH(GEN7_3DPRIM_INSTANCE_COUNT); OUT_RELOC(bo, I915_GEM_DOMAIN_VERTEX, 0, prim->indirect_offset + 4); OUT_BATCH(GEN7_MI_LOAD_REGISTER_MEM | (3 - 2)); OUT_BATCH(GEN7_3DPRIM_START_VERTEX); OUT_RELOC(bo, I915_GEM_DOMAIN_VERTEX, 0, prim->indirect_offset + 8); if (prim->indexed) { OUT_BATCH(GEN7_MI_LOAD_REGISTER_MEM | (3 - 2)); OUT_BATCH(GEN7_3DPRIM_BASE_VERTEX); OUT_RELOC(bo, I915_GEM_DOMAIN_VERTEX, 0, prim->indirect_offset + 12); OUT_BATCH(GEN7_MI_LOAD_REGISTER_MEM | (3 - 2)); OUT_BATCH(GEN7_3DPRIM_START_INSTANCE); OUT_RELOC(bo, I915_GEM_DOMAIN_VERTEX, 0, prim->indirect_offset + 16); } else { OUT_BATCH(GEN7_MI_LOAD_REGISTER_MEM | (3 - 2)); OUT_BATCH(GEN7_3DPRIM_START_INSTANCE); OUT_RELOC(bo, I915_GEM_DOMAIN_VERTEX, 0, prim->indirect_offset + 12); OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2)); OUT_BATCH(GEN7_3DPRIM_BASE_VERTEX); OUT_BATCH(0); } ADVANCE_BATCH(); } else { indirect_flag = 0; } if (brw->gen >= 7) { BEGIN_BATCH(7); OUT_BATCH(CMD_3D_PRIM << 16 | (7 - 2) | indirect_flag); OUT_BATCH(hw_prim | vertex_access_type); } else { BEGIN_BATCH(6); OUT_BATCH(CMD_3D_PRIM << 16 | (6 - 2) | hw_prim << GEN4_3DPRIM_TOPOLOGY_TYPE_SHIFT | vertex_access_type); } OUT_BATCH(verts_per_instance); OUT_BATCH(start_vertex_location); OUT_BATCH(prim->num_instances); OUT_BATCH(prim->base_instance); OUT_BATCH(base_vertex_location); ADVANCE_BATCH(); /* Only used on Sandybridge; harmless to set elsewhere. */ brw->batch.need_workaround_flush = true; if (brw->always_flush_cache) { intel_batchbuffer_emit_mi_flush(brw); } }
static void upload_clip_state(struct brw_context *brw) { struct gl_context *ctx = &brw->ctx; /* BRW_NEW_META_IN_PROGRESS */ uint32_t dw1 = brw->meta_in_progress ? 0 : GEN6_CLIP_STATISTICS_ENABLE; uint32_t dw2 = 0; /* _NEW_BUFFERS */ struct gl_framebuffer *fb = ctx->DrawBuffer; /* BRW_NEW_FS_PROG_DATA */ if (brw->wm.prog_data->barycentric_interp_modes & BRW_WM_NONPERSPECTIVE_BARYCENTRIC_BITS) { dw2 |= GEN6_CLIP_NON_PERSPECTIVE_BARYCENTRIC_ENABLE; } dw1 |= brw->vs.prog_data->base.cull_distance_mask; if (brw->gen >= 7) dw1 |= GEN7_CLIP_EARLY_CULL; if (brw->gen == 7) { /* _NEW_POLYGON */ if (ctx->Polygon._FrontBit == _mesa_is_user_fbo(fb)) dw1 |= GEN7_CLIP_WINDING_CCW; if (ctx->Polygon.CullFlag) { switch (ctx->Polygon.CullFaceMode) { case GL_FRONT: dw1 |= GEN7_CLIP_CULLMODE_FRONT; break; case GL_BACK: dw1 |= GEN7_CLIP_CULLMODE_BACK; break; case GL_FRONT_AND_BACK: dw1 |= GEN7_CLIP_CULLMODE_BOTH; break; default: unreachable("Should not get here: invalid CullFlag"); } } else { dw1 |= GEN7_CLIP_CULLMODE_NONE; } } if (brw->gen < 8 && !ctx->Transform.DepthClamp) dw2 |= GEN6_CLIP_Z_TEST; /* _NEW_LIGHT */ if (ctx->Light.ProvokingVertex == GL_FIRST_VERTEX_CONVENTION) { dw2 |= (0 << GEN6_CLIP_TRI_PROVOKE_SHIFT) | (1 << GEN6_CLIP_TRIFAN_PROVOKE_SHIFT) | (0 << GEN6_CLIP_LINE_PROVOKE_SHIFT); } else { dw2 |= (2 << GEN6_CLIP_TRI_PROVOKE_SHIFT) | (2 << GEN6_CLIP_TRIFAN_PROVOKE_SHIFT) | (1 << GEN6_CLIP_LINE_PROVOKE_SHIFT); } /* _NEW_TRANSFORM */ dw2 |= (ctx->Transform.ClipPlanesEnabled << GEN6_USER_CLIP_CLIP_DISTANCES_SHIFT); if (ctx->Transform.ClipDepthMode == GL_ZERO_TO_ONE) dw2 |= GEN6_CLIP_API_D3D; else dw2 |= GEN6_CLIP_API_OGL; dw2 |= GEN6_CLIP_GB_TEST; /* We need to disable guardband clipping if the guardband (which we always * program to the maximum screen-space bounding box of 8K x 8K) will be * smaller than the viewport. * * Closely examining the clip determination formulas in the documentation * reveals that objects will be discarded entirely if they're outside the * (small) guardband, even if they're within the (large) viewport: * * TR = TR_GB || TR_VPXY || TR_VPZ || TR_UC || TR_NEGW * TA = !TR && TA_GB && TA_VPZ && TA_NEGW * MC = !(TA || TR) * * (TA is "Trivial Accept", TR is "Trivial Reject", MC is "Must Clip".) * * Disabling guardband clipping removes the TR_GB condition, which means * they'll be considered MC ("Must Clip") unless they're rejected for * some other reason. * * Note that there is no TA_VPXY condition. If there were, objects entirely * inside a 16384x16384 viewport would be trivially accepted, breaking the * "objects must have a screenspace bounding box not exceeding 8K in the X * or Y direction" restriction. Instead, they're clipped. */ for (unsigned i = 0; i < ctx->Const.MaxViewports; i++) { if (ctx->ViewportArray[i].Width > 8192 || ctx->ViewportArray[i].Height > 8192) { dw2 &= ~GEN6_CLIP_GB_TEST; break; } } /* If the viewport dimensions are smaller than the drawable dimensions, * we have to disable guardband clipping prior to Gen8. We always program * the guardband to a fixed size, which is almost always larger than the * viewport. Any geometry which intersects the viewport but lies within * the guardband would bypass the 3D clipping stage, so it wouldn't be * clipped to the viewport. Rendering would happen beyond the viewport, * but still inside the drawable. * * Gen8+ introduces a viewport extents test which restricts rendering to * the viewport, so we can ignore this restriction. */ if (brw->gen < 8) { const float fb_width = (float)_mesa_geometric_width(fb); const float fb_height = (float)_mesa_geometric_height(fb); for (unsigned i = 0; i < ctx->Const.MaxViewports; i++) { if (ctx->ViewportArray[i].X != 0 || ctx->ViewportArray[i].Y != 0 || ctx->ViewportArray[i].Width != fb_width || ctx->ViewportArray[i].Height != fb_height) { dw2 &= ~GEN6_CLIP_GB_TEST; break; } } } /* BRW_NEW_RASTERIZER_DISCARD */ if (ctx->RasterDiscard) { dw2 |= GEN6_CLIP_MODE_REJECT_ALL; if (brw->gen == 6) { perf_debug("Rasterizer discard is currently implemented via the " "clipper; having the GS not write primitives would " "likely be faster.\n"); } } uint32_t enable; if (brw->primitive == _3DPRIM_RECTLIST) enable = 0; else enable = GEN6_CLIP_ENABLE; if (!is_drawing_points(brw) && !is_drawing_lines(brw)) dw2 |= GEN6_CLIP_XY_TEST; /* BRW_NEW_VUE_MAP_GEOM_OUT */ const int max_vp_index = (brw->vue_map_geom_out.slots_valid & VARYING_BIT_VIEWPORT) != 0 ? ctx->Const.MaxViewports : 1; BEGIN_BATCH(4); OUT_BATCH(_3DSTATE_CLIP << 16 | (4 - 2)); OUT_BATCH(dw1); OUT_BATCH(enable | GEN6_CLIP_MODE_NORMAL | dw2); OUT_BATCH(U_FIXED(0.125, 3) << GEN6_CLIP_MIN_POINT_WIDTH_SHIFT | U_FIXED(255.875, 3) << GEN6_CLIP_MAX_POINT_WIDTH_SHIFT | (_mesa_geometric_layers(fb) > 0 ? 0 : GEN6_CLIP_FORCE_ZERO_RTAINDEX) | ((max_vp_index - 1) & GEN6_CLIP_MAX_VP_INDEX_MASK)); ADVANCE_BATCH(); }
/** * 3DSTATE_PS * * Pixel shader dispatch is disabled above in 3DSTATE_WM, dw1.29. Despite * that, thread dispatch info must still be specified. * - Maximum Number of Threads (dw4.24:31) must be nonzero, as the * valid range for this field is [0x3, 0x2f]. * - A dispatch mode must be given; that is, at least one of the * "N Pixel Dispatch Enable" (N=8,16,32) fields must be set. This was * discovered through simulator error messages. */ static void gen8_blorp_emit_ps_config(struct brw_context *brw, const struct brw_blorp_params *params) { const struct brw_blorp_prog_data *prog_data = params->wm_prog_data; uint32_t dw3, dw5, dw6, dw7, ksp0, ksp2; dw3 = dw5 = dw6 = dw7 = ksp0 = ksp2 = 0; dw3 |= GEN7_PS_VECTOR_MASK_ENABLE; if (params->src.mt) { dw3 |= 1 << GEN7_PS_SAMPLER_COUNT_SHIFT; /* Up to 4 samplers */ dw3 |= 2 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT; /* Two surfaces */ } else { dw3 |= 1 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT; /* One surface */ } dw7 |= prog_data->first_curbe_grf_0 << GEN7_PS_DISPATCH_START_GRF_SHIFT_0; dw7 |= prog_data->first_curbe_grf_2 << GEN7_PS_DISPATCH_START_GRF_SHIFT_2; if (params->wm_prog_data->dispatch_8) dw6 |= GEN7_PS_8_DISPATCH_ENABLE; if (params->wm_prog_data->dispatch_16) dw6 |= GEN7_PS_16_DISPATCH_ENABLE; ksp0 = params->wm_prog_kernel; ksp2 = params->wm_prog_kernel + params->wm_prog_data->ksp_offset_2; /* 3DSTATE_PS expects the number of threads per PSD, which is always 64; * it implicitly scales for different GT levels (which have some # of PSDs). * * In Gen8 the format is U8-2 whereas in Gen9 it is U8-1. */ if (brw->gen >= 9) dw6 |= (64 - 1) << HSW_PS_MAX_THREADS_SHIFT; else dw6 |= (64 - 2) << HSW_PS_MAX_THREADS_SHIFT; dw6 |= GEN7_PS_POSOFFSET_NONE; dw6 |= params->fast_clear_op; BEGIN_BATCH(12); OUT_BATCH(_3DSTATE_PS << 16 | (12 - 2)); OUT_BATCH(ksp0); OUT_BATCH(0); OUT_BATCH(dw3); OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(dw6); OUT_BATCH(dw7); OUT_BATCH(0); /* kernel 1 pointer */ OUT_BATCH(0); OUT_BATCH(ksp2); OUT_BATCH(0); ADVANCE_BATCH(); }
/* 3DSTATE_VS * * Disable vertex shader. */ void gen6_blorp_emit_vs_disable(struct brw_context *brw, const brw_blorp_params *params) { if (brw->gen == 6) { /* From the BSpec, 3D Pipeline > Geometry > Vertex Shader > State, * 3DSTATE_VS, Dword 5.0 "VS Function Enable": * * [DevSNB] A pipeline flush must be programmed prior to a * 3DSTATE_VS command that causes the VS Function Enable to * toggle. Pipeline flush can be executed by sending a PIPE_CONTROL * command with CS stall bit set and a post sync operation. */ intel_emit_post_sync_nonzero_flush(brw); } /* Disable the push constant buffers. */ BEGIN_BATCH(5); OUT_BATCH(_3DSTATE_CONSTANT_VS << 16 | (5 - 2)); OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); ADVANCE_BATCH(); BEGIN_BATCH(6); OUT_BATCH(_3DSTATE_VS << 16 | (6 - 2)); OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); ADVANCE_BATCH(); }