void
intel_blt_copy(struct intel_batchbuffer *batch,
	      drm_intel_bo *src_bo, int src_x1, int src_y1, int src_pitch,
	      drm_intel_bo *dst_bo, int dst_x1, int dst_y1, int dst_pitch,
	      int width, int height, int bpp)
{
	uint32_t src_tiling, dst_tiling, swizzle;
	uint32_t cmd_bits = 0;
	uint32_t br13_bits;

	drm_intel_bo_get_tiling(src_bo, &src_tiling, &swizzle);
	drm_intel_bo_get_tiling(dst_bo, &dst_tiling, &swizzle);

	if (IS_965(batch->devid) && src_tiling != I915_TILING_NONE) {
		src_pitch /= 4;
		cmd_bits |= XY_SRC_COPY_BLT_SRC_TILED;
	}

	if (IS_965(batch->devid) && dst_tiling != I915_TILING_NONE) {
		dst_pitch /= 4;
		cmd_bits |= XY_SRC_COPY_BLT_DST_TILED;
	}

	br13_bits = 0;
	switch (bpp) {
	case 8:
		break;
	case 16:		/* supporting only RGB565, not ARGB1555 */
		br13_bits |= 1 << 24;
		break;
	case 32:
		br13_bits |= 3 << 24;
		cmd_bits |= XY_SRC_COPY_BLT_WRITE_ALPHA |
			    XY_SRC_COPY_BLT_WRITE_RGB;
		break;
	default:
		abort();
	}

#define CHECK_RANGE(x)	((x) >= 0 && (x) < (1 << 15))
	assert(CHECK_RANGE(src_x1) && CHECK_RANGE(src_y1) &&
	       CHECK_RANGE(dst_x1) && CHECK_RANGE(dst_y1) &&
	       CHECK_RANGE(width) && CHECK_RANGE(height) &&
	       CHECK_RANGE(src_x1 + width) && CHECK_RANGE(src_y1 + height) &&
	       CHECK_RANGE(dst_x1 + width) && CHECK_RANGE(dst_y1 + height) &&
	       CHECK_RANGE(src_pitch) && CHECK_RANGE(dst_pitch));
#undef CHECK_RANGE

	BEGIN_BATCH(8);
	OUT_BATCH(XY_SRC_COPY_BLT_CMD | cmd_bits);
	OUT_BATCH((br13_bits) |
		  (0xcc << 16) | /* copy ROP */
		  dst_pitch);
	OUT_BATCH((dst_y1 << 16) | dst_x1); /* dst x1,y1 */
	OUT_BATCH(((dst_y1 + height) << 16) | (dst_x1 + width)); /* dst x2,y2 */
	OUT_RELOC(dst_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
	OUT_BATCH((src_y1 << 16) | src_x1); /* src x1,y1 */
	OUT_BATCH(src_pitch);
	OUT_RELOC(src_bo, I915_GEM_DOMAIN_RENDER, 0, 0);
	ADVANCE_BATCH();

	intel_batchbuffer_flush(batch);
}
示例#2
0
static void
upload_ps_state(struct brw_context *brw)
{
    struct intel_context *intel = &brw->intel;
    struct gl_context *ctx = &intel->ctx;
    uint32_t dw2, dw4, dw5;
    const int max_threads_shift = brw->intel.is_haswell ?
                                  HSW_PS_MAX_THREADS_SHIFT : IVB_PS_MAX_THREADS_SHIFT;

    /* BRW_NEW_PS_BINDING_TABLE */
    BEGIN_BATCH(2);
    OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_PS << 16 | (2 - 2));
    OUT_BATCH(brw->wm.bind_bo_offset);
    ADVANCE_BATCH();

    /* CACHE_NEW_SAMPLER */
    BEGIN_BATCH(2);
    OUT_BATCH(_3DSTATE_SAMPLER_STATE_POINTERS_PS << 16 | (2 - 2));
    OUT_BATCH(brw->sampler.offset);
    ADVANCE_BATCH();

    /* CACHE_NEW_WM_PROG */
    if (brw->wm.prog_data->nr_params == 0) {
        /* Disable the push constant buffers. */
        BEGIN_BATCH(7);
        OUT_BATCH(_3DSTATE_CONSTANT_PS << 16 | (7 - 2));
        OUT_BATCH(0);
        OUT_BATCH(0);
        OUT_BATCH(0);
        OUT_BATCH(0);
        OUT_BATCH(0);
        OUT_BATCH(0);
        ADVANCE_BATCH();
    } else {
        BEGIN_BATCH(7);
        OUT_BATCH(_3DSTATE_CONSTANT_PS << 16 | (7 - 2));

        OUT_BATCH(ALIGN(brw->wm.prog_data->nr_params,
                        brw->wm.prog_data->dispatch_width) / 8);
        OUT_BATCH(0);
        /* Pointer to the WM constant buffer.  Covered by the set of
         * state flags from gen6_upload_wm_push_constants.
         */
        OUT_BATCH(brw->wm.push_const_offset);
        OUT_BATCH(0);
        OUT_BATCH(0);
        OUT_BATCH(0);
        ADVANCE_BATCH();
    }

    dw2 = dw4 = dw5 = 0;

    /* CACHE_NEW_SAMPLER */
    dw2 |= (ALIGN(brw->sampler.count, 4) / 4) << GEN7_PS_SAMPLER_COUNT_SHIFT;

    /* Use ALT floating point mode for ARB fragment programs, because they
     * require 0^0 == 1.  Even though _CurrentFragmentProgram is used for
     * rendering, CurrentFragmentProgram is used for this check to
     * differentiate between the GLSL and non-GLSL cases.
     */
    if (intel->ctx.Shader.CurrentFragmentProgram == NULL)
        dw2 |= GEN7_PS_FLOATING_POINT_MODE_ALT;

    if (intel->is_haswell)
        dw4 |= SET_FIELD(1, HSW_PS_SAMPLE_MASK); /* 1 sample for now */

    dw4 |= (brw->max_wm_threads - 1) << max_threads_shift;

    /* CACHE_NEW_WM_PROG */
    if (brw->wm.prog_data->nr_params > 0)
        dw4 |= GEN7_PS_PUSH_CONSTANT_ENABLE;

    /* CACHE_NEW_WM_PROG | _NEW_COLOR
     *
     * The hardware wedges if you have this bit set but don't turn on any dual
     * source blend factors.
     */
    if (brw->wm.prog_data->dual_src_blend &&
            (ctx->Color.BlendEnabled & 1) &&
            ctx->Color.Blend[0]._UsesDualSrc) {
        dw4 |= GEN7_PS_DUAL_SOURCE_BLEND_ENABLE;
    }

    /* BRW_NEW_FRAGMENT_PROGRAM */
    if (brw->fragment_program->Base.InputsRead != 0)
        dw4 |= GEN7_PS_ATTRIBUTE_ENABLE;

    dw4 |= GEN7_PS_8_DISPATCH_ENABLE;
    if (brw->wm.prog_data->prog_offset_16)
        dw4 |= GEN7_PS_16_DISPATCH_ENABLE;

    dw5 |= (brw->wm.prog_data->first_curbe_grf <<
            GEN7_PS_DISPATCH_START_GRF_SHIFT_0);
    dw5 |= (brw->wm.prog_data->first_curbe_grf_16 <<
            GEN7_PS_DISPATCH_START_GRF_SHIFT_2);

    BEGIN_BATCH(8);
    OUT_BATCH(_3DSTATE_PS << 16 | (8 - 2));
    OUT_BATCH(brw->wm.prog_offset);
    OUT_BATCH(dw2);
    if (brw->wm.prog_data->total_scratch) {
        OUT_RELOC(brw->wm.scratch_bo,
                  I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
                  ffs(brw->wm.prog_data->total_scratch) - 11);
    } else {
        OUT_BATCH(0);
    }
    OUT_BATCH(dw4);
    OUT_BATCH(dw5);
    OUT_BATCH(0); /* kernel 1 pointer */
    OUT_BATCH(brw->wm.prog_offset + brw->wm.prog_data->prog_offset_16);
    ADVANCE_BATCH();
}
示例#3
0
文件: r200_blit.c 项目: DirectFB/mesa
static void inline emit_tx_setup(struct r200_context *r200,
				 mesa_format src_mesa_format,
				 mesa_format dst_mesa_format,
				 struct radeon_bo *bo,
				 intptr_t offset,
				 unsigned width,
				 unsigned height,
				 unsigned pitch)
{
    uint32_t txformat = R200_TXFORMAT_NON_POWER2;
    BATCH_LOCALS(&r200->radeon);

    assert(width <= 2048);
    assert(height <= 2048);
    assert(offset % 32 == 0);

    /* XXX others?  BE/LE? */
    switch (src_mesa_format) {
    case MESA_FORMAT_B8G8R8A8_UNORM:
	    txformat |= R200_TXFORMAT_ARGB8888 | R200_TXFORMAT_ALPHA_IN_MAP;
	    break;
    case MESA_FORMAT_A8B8G8R8_UNORM:
	    txformat |= R200_TXFORMAT_RGBA8888 | R200_TXFORMAT_ALPHA_IN_MAP;
	    break;
    case MESA_FORMAT_R8G8B8A8_UNORM:
	    txformat |= R200_TXFORMAT_ABGR8888 | R200_TXFORMAT_ALPHA_IN_MAP;
	    break;
    case MESA_FORMAT_B8G8R8X8_UNORM:
	    txformat |= R200_TXFORMAT_ARGB8888;
	    break;
    case MESA_FORMAT_B5G6R5_UNORM:
	    txformat |= R200_TXFORMAT_RGB565;
	    break;
    case MESA_FORMAT_B4G4R4A4_UNORM:
	    txformat |= R200_TXFORMAT_ARGB4444 | R200_TXFORMAT_ALPHA_IN_MAP;
	    break;
    case MESA_FORMAT_B5G5R5A1_UNORM:
	    txformat |= R200_TXFORMAT_ARGB1555 | R200_TXFORMAT_ALPHA_IN_MAP;
	    break;
    case MESA_FORMAT_A_UNORM8:
    case MESA_FORMAT_I_UNORM8:
	    txformat |= R200_TXFORMAT_I8 | R200_TXFORMAT_ALPHA_IN_MAP;
	    break;
    case MESA_FORMAT_L_UNORM8:
	    txformat |= R200_TXFORMAT_I8;
	    break;
    case MESA_FORMAT_L8A8_UNORM:
	    txformat |= R200_TXFORMAT_AI88 | R200_TXFORMAT_ALPHA_IN_MAP;
	    break;
    default:
	    break;
    }

    if (bo->flags & RADEON_BO_FLAGS_MACRO_TILE)
	offset |= R200_TXO_MACRO_TILE;
    if (bo->flags & RADEON_BO_FLAGS_MICRO_TILE)
	offset |= R200_TXO_MICRO_TILE;

    switch (dst_mesa_format) {
    case MESA_FORMAT_B8G8R8A8_UNORM:
    case MESA_FORMAT_B8G8R8X8_UNORM:
    case MESA_FORMAT_B5G6R5_UNORM:
    case MESA_FORMAT_B4G4R4A4_UNORM:
    case MESA_FORMAT_B5G5R5A1_UNORM:
    case MESA_FORMAT_A_UNORM8:
    case MESA_FORMAT_L_UNORM8:
    case MESA_FORMAT_I_UNORM8:
    default:
	    /* no swizzle required */
	    BEGIN_BATCH(10);
	    OUT_BATCH_REGVAL(RADEON_PP_CNTL, (RADEON_TEX_0_ENABLE |
					      RADEON_TEX_BLEND_0_ENABLE));
	    OUT_BATCH_REGVAL(R200_PP_TXCBLEND_0, (R200_TXC_ARG_A_ZERO |
						  R200_TXC_ARG_B_ZERO |
						  R200_TXC_ARG_C_R0_COLOR |
						  R200_TXC_OP_MADD));
	    OUT_BATCH_REGVAL(R200_PP_TXCBLEND2_0, (R200_TXC_CLAMP_0_1 |
						   R200_TXC_OUTPUT_REG_R0));
	    OUT_BATCH_REGVAL(R200_PP_TXABLEND_0, (R200_TXA_ARG_A_ZERO |
						  R200_TXA_ARG_B_ZERO |
						  R200_TXA_ARG_C_R0_ALPHA |
						  R200_TXA_OP_MADD));
	    OUT_BATCH_REGVAL(R200_PP_TXABLEND2_0, (R200_TXA_CLAMP_0_1 |
						   R200_TXA_OUTPUT_REG_R0));
	    END_BATCH();
	    break;
    case MESA_FORMAT_A8B8G8R8_UNORM:
	    BEGIN_BATCH(10);
	    OUT_BATCH_REGVAL(RADEON_PP_CNTL, (RADEON_TEX_0_ENABLE |
					      RADEON_TEX_BLEND_0_ENABLE));
	    OUT_BATCH_REGVAL(R200_PP_TXCBLEND_0, (R200_TXC_ARG_A_ZERO |
						  R200_TXC_ARG_B_ZERO |
						  R200_TXC_ARG_C_R0_COLOR |
						  R200_TXC_OP_MADD));
	    OUT_BATCH_REGVAL(R200_PP_TXCBLEND2_0, (R200_TXC_CLAMP_0_1 |
						   R200_TXC_OUTPUT_ROTATE_GBA |
						   R200_TXC_OUTPUT_REG_R0));
	    OUT_BATCH_REGVAL(R200_PP_TXABLEND_0, (R200_TXA_ARG_A_ZERO |
						  R200_TXA_ARG_B_ZERO |
						  R200_TXA_ARG_C_R0_ALPHA |
						  R200_TXA_OP_MADD));
	    OUT_BATCH_REGVAL(R200_PP_TXABLEND2_0, (R200_TXA_CLAMP_0_1 |
						   (R200_TXA_REPL_RED << R200_TXA_REPL_ARG_C_SHIFT) |
						   R200_TXA_OUTPUT_REG_R0));
	    END_BATCH();
	    break;
    case MESA_FORMAT_R8G8B8A8_UNORM:
	    BEGIN_BATCH(34);
	    OUT_BATCH_REGVAL(RADEON_PP_CNTL, (RADEON_TEX_0_ENABLE |
					      RADEON_TEX_BLEND_0_ENABLE |
					      RADEON_TEX_BLEND_1_ENABLE |
					      RADEON_TEX_BLEND_2_ENABLE |
					      RADEON_TEX_BLEND_3_ENABLE));
	    /* r1.r = r0.b */
	    OUT_BATCH_REGVAL(R200_PP_TXCBLEND_0, (R200_TXC_ARG_A_ZERO |
						  R200_TXC_ARG_B_ZERO |
						  R200_TXC_ARG_C_R0_COLOR |
						  R200_TXC_OP_MADD));
	    OUT_BATCH_REGVAL(R200_PP_TXCBLEND2_0, (R200_TXC_CLAMP_0_1 |
						   R200_TXC_OUTPUT_MASK_R |
						   (R200_TXC_REPL_BLUE << R200_TXC_REPL_ARG_C_SHIFT) |
						   R200_TXC_OUTPUT_REG_R1));
	    /* r1.a = r0.a */
	    OUT_BATCH_REGVAL(R200_PP_TXABLEND_0, (R200_TXA_ARG_A_ZERO |
						  R200_TXA_ARG_B_ZERO |
						  R200_TXA_ARG_C_R0_ALPHA |
						  R200_TXA_OP_MADD));
	    OUT_BATCH_REGVAL(R200_PP_TXABLEND2_0, (R200_TXA_CLAMP_0_1 |
						   R200_TXA_OUTPUT_REG_R1));
	    /* r1.g = r0.g */
	    OUT_BATCH_REGVAL(R200_PP_TXCBLEND_1, (R200_TXC_ARG_A_ZERO |
						  R200_TXC_ARG_B_ZERO |
						  R200_TXC_ARG_C_R0_COLOR |
						  R200_TXC_OP_MADD));
	    OUT_BATCH_REGVAL(R200_PP_TXCBLEND2_1, (R200_TXC_CLAMP_0_1 |
						   R200_TXC_OUTPUT_MASK_G |
						   (R200_TXC_REPL_GREEN << R200_TXC_REPL_ARG_C_SHIFT) |
						   R200_TXC_OUTPUT_REG_R1));
	    /* r1.a = r0.a */
	    OUT_BATCH_REGVAL(R200_PP_TXABLEND_1, (R200_TXA_ARG_A_ZERO |
						  R200_TXA_ARG_B_ZERO |
						  R200_TXA_ARG_C_R0_ALPHA |
						  R200_TXA_OP_MADD));
	    OUT_BATCH_REGVAL(R200_PP_TXABLEND2_1, (R200_TXA_CLAMP_0_1 |
						   R200_TXA_OUTPUT_REG_R1));
	    /* r1.b = r0.r */
	    OUT_BATCH_REGVAL(R200_PP_TXCBLEND_2, (R200_TXC_ARG_A_ZERO |
						  R200_TXC_ARG_B_ZERO |
						  R200_TXC_ARG_C_R0_COLOR |
						  R200_TXC_OP_MADD));
	    OUT_BATCH_REGVAL(R200_PP_TXCBLEND2_2, (R200_TXC_CLAMP_0_1 |
						   R200_TXC_OUTPUT_MASK_B |
						   (R200_TXC_REPL_RED << R200_TXC_REPL_ARG_C_SHIFT) |
						   R200_TXC_OUTPUT_REG_R1));
	    /* r1.a = r0.a */
	    OUT_BATCH_REGVAL(R200_PP_TXABLEND_2, (R200_TXA_ARG_A_ZERO |
						  R200_TXA_ARG_B_ZERO |
						  R200_TXA_ARG_C_R0_ALPHA |
						  R200_TXA_OP_MADD));
	    OUT_BATCH_REGVAL(R200_PP_TXABLEND2_2, (R200_TXA_CLAMP_0_1 |
						   R200_TXA_OUTPUT_REG_R1));
	    /* r0.rgb = r1.rgb */
	    OUT_BATCH_REGVAL(R200_PP_TXCBLEND_3, (R200_TXC_ARG_A_ZERO |
						  R200_TXC_ARG_B_ZERO |
						  R200_TXC_ARG_C_R1_COLOR |
						  R200_TXC_OP_MADD));
	    OUT_BATCH_REGVAL(R200_PP_TXCBLEND2_3, (R200_TXC_CLAMP_0_1 |
						   R200_TXC_OUTPUT_REG_R0));
	    /* r0.a = r1.a */
	    OUT_BATCH_REGVAL(R200_PP_TXABLEND_3, (R200_TXA_ARG_A_ZERO |
						  R200_TXA_ARG_B_ZERO |
						  R200_TXA_ARG_C_R1_ALPHA |
						  R200_TXA_OP_MADD));
	    OUT_BATCH_REGVAL(R200_PP_TXABLEND2_3, (R200_TXA_CLAMP_0_1 |
						   R200_TXA_OUTPUT_REG_R0));
	    END_BATCH();
	    break;
    }

    BEGIN_BATCH(18);
    OUT_BATCH_REGVAL(R200_PP_CNTL_X, 0);
    OUT_BATCH_REGVAL(R200_PP_TXMULTI_CTL_0, 0);
    OUT_BATCH_REGVAL(R200_PP_TXFILTER_0, (R200_CLAMP_S_CLAMP_LAST |
					  R200_CLAMP_T_CLAMP_LAST |
					  R200_MAG_FILTER_NEAREST |
					  R200_MIN_FILTER_NEAREST));
    OUT_BATCH_REGVAL(R200_PP_TXFORMAT_0, txformat);
    OUT_BATCH_REGVAL(R200_PP_TXFORMAT_X_0, 0);
    OUT_BATCH_REGVAL(R200_PP_TXSIZE_0, ((width - 1) |
					((height - 1) << RADEON_TEX_VSIZE_SHIFT)));
    OUT_BATCH_REGVAL(R200_PP_TXPITCH_0, pitch * _mesa_get_format_bytes(src_mesa_format) - 32);

    OUT_BATCH_REGSEQ(R200_PP_TXOFFSET_0, 1);
    OUT_BATCH_RELOC(offset, bo, offset, RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);

    END_BATCH();
}
void
intelEmitImmediateColorExpandBlit(struct intel_context *intel,
				  GLuint cpp,
				  GLubyte *src_bits, GLuint src_size,
				  GLuint fg_color,
				  GLshort dst_pitch,
				  dri_bo *dst_buffer,
				  GLuint dst_offset,
				  GLboolean dst_tiled,
				  GLshort x, GLshort y,
				  GLshort w, GLshort h,
				  GLenum logic_op)
{
   int dwords = ALIGN(src_size, 8) / 4;
   uint32_t opcode, br13, blit_cmd;

   assert( logic_op - GL_CLEAR >= 0 );
   assert( logic_op - GL_CLEAR < 0x10 );

   if (w < 0 || h < 0)
      return;

   dst_pitch *= cpp;

   DBG("%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d, %d bytes %d dwords\n",
       __FUNCTION__,
       dst_buffer, dst_pitch, dst_offset, x, y, w, h, src_size, dwords);

   intel_batchbuffer_require_space( intel->batch,
				    (8 * 4) +
				    (3 * 4) +
				    dwords,
				    NO_LOOP_CLIPRECTS );

   opcode = XY_SETUP_BLT_CMD;
   if (cpp == 4)
      opcode |= XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
#ifndef I915
   if (dst_tiled) {
      opcode |= XY_DST_TILED;
      dst_pitch /= 4;
   }
#endif

   br13 = dst_pitch | (translate_raster_op(logic_op) << 16) | (1 << 29);
   if (cpp == 2)
      br13 |= BR13_565;
   else
      br13 |= BR13_8888;

   blit_cmd = XY_TEXT_IMMEDIATE_BLIT_CMD | XY_TEXT_BYTE_PACKED; /* packing? */
   if (dst_tiled)
      blit_cmd |= XY_DST_TILED;

   BEGIN_BATCH(8 + 3, NO_LOOP_CLIPRECTS);
   OUT_BATCH(opcode);
   OUT_BATCH(br13);
   OUT_BATCH((0 << 16) | 0); /* clip x1, y1 */
   OUT_BATCH((100 << 16) | 100); /* clip x2, y2 */
   OUT_RELOC(dst_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, dst_offset);
   OUT_BATCH(0); /* bg */
   OUT_BATCH(fg_color); /* fg */
   OUT_BATCH(0); /* pattern base addr */

   OUT_BATCH(blit_cmd | ((3 - 2) + dwords));
   OUT_BATCH((y << 16) | x);
   OUT_BATCH(((y + h) << 16) | (x + w));
   ADVANCE_BATCH();

   intel_batchbuffer_data( intel->batch,
			   src_bits,
			   dwords * 4,
			   NO_LOOP_CLIPRECTS );
}
示例#5
0
void r200EmitAOS(r200ContextPtr rmesa, GLuint nr, GLuint offset)
{
   BATCH_LOCALS(&rmesa->radeon);
   uint32_t voffset;
   int sz = 1 + (nr >> 1) * 3 + (nr & 1) * 2;
   int i;
   
   radeon_print(RADEON_RENDER, RADEON_VERBOSE,
           "%s: nr=%d, ofs=0x%08x\n",
           __FUNCTION__, nr, offset);

   BEGIN_BATCH(sz+2+ (nr*2));
   OUT_BATCH_PACKET3(R200_CP_CMD_3D_LOAD_VBPNTR, sz - 1);
   OUT_BATCH(nr);

   {
      for (i = 0; i + 1 < nr; i += 2) {
	 OUT_BATCH((rmesa->radeon.tcl.aos[i].components << 0) |
		   (rmesa->radeon.tcl.aos[i].stride << 8) |
		   (rmesa->radeon.tcl.aos[i + 1].components << 16) |
		   (rmesa->radeon.tcl.aos[i + 1].stride << 24));
	 
	 voffset =  rmesa->radeon.tcl.aos[i + 0].offset +
	    offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride;
	 OUT_BATCH(voffset);
	 voffset =  rmesa->radeon.tcl.aos[i + 1].offset +
	    offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride;
	 OUT_BATCH(voffset);
      }
      
      if (nr & 1) {
	 OUT_BATCH((rmesa->radeon.tcl.aos[nr - 1].components << 0) |
		   (rmesa->radeon.tcl.aos[nr - 1].stride << 8));
	 voffset =  rmesa->radeon.tcl.aos[nr - 1].offset +
	    offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride;
	 OUT_BATCH(voffset);
      }
      for (i = 0; i + 1 < nr; i += 2) {
	 voffset =  rmesa->radeon.tcl.aos[i + 0].offset +
	    offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride;
	 radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
			       rmesa->radeon.tcl.aos[i+0].bo,
			       RADEON_GEM_DOMAIN_GTT,
			       0, 0);
	 voffset =  rmesa->radeon.tcl.aos[i + 1].offset +
	    offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride;
	 radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
			       rmesa->radeon.tcl.aos[i+1].bo,
			       RADEON_GEM_DOMAIN_GTT,
			       0, 0);
      }
      if (nr & 1) {
	 voffset =  rmesa->radeon.tcl.aos[nr - 1].offset +
	    offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride;
	 radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
			       rmesa->radeon.tcl.aos[nr-1].bo,
			       RADEON_GEM_DOMAIN_GTT,
			       0, 0);
      }
   }
   END_BATCH();
}
示例#6
0
static void
upload_clip_state(struct brw_context *brw)
{
   struct intel_context *intel = &brw->intel;
   struct gl_context *ctx = &intel->ctx;
   uint32_t depth_clamp = 0;
   uint32_t provoking, userclip;
   uint32_t dw1 = GEN6_CLIP_STATISTICS_ENABLE;
   uint32_t nonperspective_barycentric_enable_flag = 0;

   /* _NEW_BUFFERS */
   bool render_to_fbo = _mesa_is_user_fbo(brw->intel.ctx.DrawBuffer);

   /* CACHE_NEW_WM_PROG */
   if (brw->wm.prog_data->barycentric_interp_modes &
       (1 << BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC)) {
      nonperspective_barycentric_enable_flag =
         GEN6_CLIP_NON_PERSPECTIVE_BARYCENTRIC_ENABLE;
   }

   dw1 |= GEN7_CLIP_EARLY_CULL;

   /* _NEW_POLYGON */
   if ((ctx->Polygon.FrontFace == GL_CCW) ^ render_to_fbo)
      dw1 |= GEN7_CLIP_WINDING_CCW;

   if (ctx->Polygon.CullFlag) {
      switch (ctx->Polygon.CullFaceMode) {
      case GL_FRONT:
	 dw1 |= GEN7_CLIP_CULLMODE_FRONT;
	 break;
      case GL_BACK:
	 dw1 |= GEN7_CLIP_CULLMODE_BACK;
	 break;
      case GL_FRONT_AND_BACK:
	 dw1 |= GEN7_CLIP_CULLMODE_BOTH;
	 break;
      default:
	 assert(!"Should not get here: invalid CullFlag");
	 break;
      }
   } else {
      dw1 |= GEN7_CLIP_CULLMODE_NONE;
   }

   /* _NEW_TRANSFORM */
   if (!ctx->Transform.DepthClamp)
      depth_clamp = GEN6_CLIP_Z_TEST;

   /* _NEW_LIGHT */
   if (ctx->Light.ProvokingVertex == GL_FIRST_VERTEX_CONVENTION) {
      provoking =
	 (0 << GEN6_CLIP_TRI_PROVOKE_SHIFT) |
	 (1 << GEN6_CLIP_TRIFAN_PROVOKE_SHIFT) |
	 (0 << GEN6_CLIP_LINE_PROVOKE_SHIFT);
   } else {
      provoking =
	 (2 << GEN6_CLIP_TRI_PROVOKE_SHIFT) |
	 (2 << GEN6_CLIP_TRIFAN_PROVOKE_SHIFT) |
	 (1 << GEN6_CLIP_LINE_PROVOKE_SHIFT);
   }

   /* _NEW_TRANSFORM */
   userclip = ctx->Transform.ClipPlanesEnabled;

   BEGIN_BATCH(4);
   OUT_BATCH(_3DSTATE_CLIP << 16 | (4 - 2));
   OUT_BATCH(dw1);
   OUT_BATCH(GEN6_CLIP_ENABLE |
	     GEN6_CLIP_API_OGL |
	     GEN6_CLIP_MODE_NORMAL |
             nonperspective_barycentric_enable_flag |
	     GEN6_CLIP_XY_TEST |
	     GEN6_CLIP_GB_TEST |
	     userclip << GEN6_USER_CLIP_CLIP_DISTANCES_SHIFT |
	     depth_clamp |
	     provoking);
   OUT_BATCH(U_FIXED(0.125, 3) << GEN6_CLIP_MIN_POINT_WIDTH_SHIFT |
             U_FIXED(255.875, 3) << GEN6_CLIP_MAX_POINT_WIDTH_SHIFT |
             GEN6_CLIP_FORCE_ZERO_RTAINDEX);
   ADVANCE_BATCH();
}
/**
 * Use blitting to clear the renderbuffers named by 'flags'.
 * Note: we can't use the ctx->DrawBuffer->_ColorDrawBufferIndexes field
 * since that might include software renderbuffers or renderbuffers
 * which we're clearing with triangles.
 * \param mask  bitmask of BUFFER_BIT_* values indicating buffers to clear
 */
void
intelClearWithBlit(GLcontext *ctx, GLbitfield mask)
{
   struct intel_context *intel = intel_context(ctx);
   struct gl_framebuffer *fb = ctx->DrawBuffer;
   GLuint clear_depth;
   GLbitfield skipBuffers = 0;
   BATCH_LOCALS;

   /*
    * Compute values for clearing the buffers.
    */
   clear_depth = 0;
   if (mask & BUFFER_BIT_DEPTH) {
      clear_depth = (GLuint) (fb->_DepthMax * ctx->Depth.Clear);
   }
   if (mask & BUFFER_BIT_STENCIL) {
      clear_depth |= (ctx->Stencil.Clear & 0xff) << 24;
   }

   /* If clearing both depth and stencil, skip BUFFER_BIT_STENCIL in
    * the loop below.
    */
   if ((mask & BUFFER_BIT_DEPTH) && (mask & BUFFER_BIT_STENCIL)) {
      skipBuffers = BUFFER_BIT_STENCIL;
   }

   /* XXX Move this flush/lock into the following conditional? */
   intelFlush(&intel->ctx);
   LOCK_HARDWARE(intel);

   if (intel->numClipRects) {
      GLint cx, cy, cw, ch;
      drm_clip_rect_t clear;
      int i;

      /* Get clear bounds after locking */
      cx = fb->_Xmin;
      cy = fb->_Ymin;
      cw = fb->_Xmax - cx;
      ch = fb->_Ymax - cy;

      if (fb->Name == 0) {
         /* clearing a window */

         /* flip top to bottom */
         clear.x1 = cx + intel->drawX;
         clear.y1 = intel->driDrawable->y + intel->driDrawable->h - cy - ch;
         clear.x2 = clear.x1 + cw;
         clear.y2 = clear.y1 + ch;
      }
      else {
         /* clearing FBO */
         assert(intel->numClipRects == 1);
         assert(intel->pClipRects == &intel->fboRect);
         clear.x1 = cx;
         clear.y1 = cy;
         clear.x2 = clear.x1 + cw;
         clear.y2 = clear.y1 + ch;
         /* no change to mask */
      }

      for (i = 0; i < intel->numClipRects; i++) {
         const drm_clip_rect_t *box = &intel->pClipRects[i];
         drm_clip_rect_t b;
         GLuint buf;
         GLuint clearMask = mask;      /* use copy, since we modify it below */
         GLboolean all = (cw == fb->Width && ch == fb->Height);

         if (!all) {
            intel_intersect_cliprects(&b, &clear, box);
         }
         else {
            b = *box;
         }

         if (b.x1 >= b.x2 || b.y1 >= b.y2)
            continue;

         if (0)
            _mesa_printf("clear %d,%d..%d,%d, mask %x\n",
                         b.x1, b.y1, b.x2, b.y2, mask);

         /* Loop over all renderbuffers */
         for (buf = 0; buf < BUFFER_COUNT && clearMask; buf++) {
            const GLbitfield bufBit = 1 << buf;
            if ((clearMask & bufBit) && !(bufBit & skipBuffers)) {
               /* OK, clear this renderbuffer */
               struct intel_region *irb_region =
		  intel_get_rb_region(fb, buf);
               dri_bo *write_buffer =
                  intel_region_buffer(intel, irb_region,
                                      all ? INTEL_WRITE_FULL :
                                      INTEL_WRITE_PART);

               GLuint clearVal;
               GLint pitch, cpp;
               GLuint BR13, CMD;

               ASSERT(irb_region);

               pitch = irb_region->pitch;
               cpp = irb_region->cpp;

               DBG("%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n",
                   __FUNCTION__,
                   irb_region->buffer, (pitch * cpp),
                   irb_region->draw_offset,
                   b.x1, b.y1, b.x2 - b.x1, b.y2 - b.y1);

	       BR13 = 0xf0 << 16;
	       CMD = XY_COLOR_BLT_CMD;

               /* Setup the blit command */
               if (cpp == 4) {
                  BR13 |= (1 << 24) | (1 << 25);
                  if (buf == BUFFER_DEPTH || buf == BUFFER_STENCIL) {
                     if (clearMask & BUFFER_BIT_DEPTH)
                        CMD |= XY_BLT_WRITE_RGB;
                     if (clearMask & BUFFER_BIT_STENCIL)
                        CMD |= XY_BLT_WRITE_ALPHA;
                  }
                  else {
                     /* clearing RGBA */
                     CMD |= XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
                  }
               }
               else {
                  ASSERT(cpp == 2 || cpp == 0);
                  BR13 |= (1 << 24);
               }

#ifndef I915
	       if (irb_region->tiled) {
		  CMD |= XY_DST_TILED;
		  pitch /= 4;
	       }
#endif
	       BR13 |= (pitch * cpp);

               if (buf == BUFFER_DEPTH || buf == BUFFER_STENCIL) {
                  clearVal = clear_depth;
               }
               else {
                  clearVal = (cpp == 4)
                     ? intel->ClearColor8888 : intel->ClearColor565;
               }
               /*
                  _mesa_debug(ctx, "hardware blit clear buf %d rb id %d\n",
                  buf, irb->Base.Name);
                */
	       intel_wait_flips(intel);

               assert(b.x1 < b.x2);
               assert(b.y1 < b.y2);

               BEGIN_BATCH(6, REFERENCES_CLIPRECTS);
               OUT_BATCH(CMD);
               OUT_BATCH(BR13);
               OUT_BATCH((b.y1 << 16) | b.x1);
               OUT_BATCH((b.y2 << 16) | b.x2);
               OUT_RELOC(write_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE,
                         irb_region->draw_offset);
               OUT_BATCH(clearVal);
               ADVANCE_BATCH();
               clearMask &= ~bufBit;    /* turn off bit, for faster loop exit */
            }
         }
      }
      intel_batchbuffer_flush(intel->batch);
   }

   UNLOCK_HARDWARE(intel);
}
示例#8
0
static void
brw_upload_cs_state(struct brw_context *brw)
{
   if (!brw->cs.prog_data)
      return;

   uint32_t offset;
   uint32_t *desc = (uint32_t*) brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
                                                8 * 4, 64, &offset);
   struct brw_stage_state *stage_state = &brw->cs.base;
   struct brw_cs_prog_data *cs_prog_data = brw->cs.prog_data;
   struct brw_stage_prog_data *prog_data = &cs_prog_data->base;

   if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
      brw->vtbl.emit_buffer_surface_state(
         brw, &stage_state->surf_offset[
                 prog_data->binding_table.shader_time_start],
         brw->shader_time.bo, 0, BRW_SURFACEFORMAT_RAW,
         brw->shader_time.bo->size, 1, true);
   }

   uint32_t *bind = (uint32_t*) brw_state_batch(brw, AUB_TRACE_BINDING_TABLE,
                                            prog_data->binding_table.size_bytes,
                                            32, &stage_state->bind_bo_offset);

   uint32_t dwords = brw->gen < 8 ? 8 : 9;
   BEGIN_BATCH(dwords);
   OUT_BATCH(MEDIA_VFE_STATE << 16 | (dwords - 2));

   if (prog_data->total_scratch) {
      if (brw->gen >= 8)
         OUT_RELOC64(stage_state->scratch_bo,
                     I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
                     ffs(prog_data->total_scratch) - 11);
      else
         OUT_RELOC(stage_state->scratch_bo,
                   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
                   ffs(prog_data->total_scratch) - 11);
   } else {
      OUT_BATCH(0);
      if (brw->gen >= 8)
         OUT_BATCH(0);
   }

   const uint32_t vfe_num_urb_entries = brw->gen >= 8 ? 2 : 0;
   const uint32_t vfe_gpgpu_mode =
      brw->gen == 7 ? SET_FIELD(1, GEN7_MEDIA_VFE_STATE_GPGPU_MODE) : 0;
   OUT_BATCH(SET_FIELD(brw->max_cs_threads - 1, MEDIA_VFE_STATE_MAX_THREADS) |
             SET_FIELD(vfe_num_urb_entries, MEDIA_VFE_STATE_URB_ENTRIES) |
             SET_FIELD(1, MEDIA_VFE_STATE_RESET_GTW_TIMER) |
             SET_FIELD(1, MEDIA_VFE_STATE_BYPASS_GTW) |
             vfe_gpgpu_mode);

   OUT_BATCH(0);
   const uint32_t vfe_urb_allocation = brw->gen >= 8 ? 2 : 0;

   /* We are uploading duplicated copies of push constant uniforms for each
    * thread. Although the local id data needs to vary per thread, it won't
    * change for other uniform data. Unfortunately this duplication is
    * required for gen7. As of Haswell, this duplication can be avoided, but
    * this older mechanism with duplicated data continues to work.
    *
    * FINISHME: As of Haswell, we could make use of the
    * INTERFACE_DESCRIPTOR_DATA "Cross-Thread Constant Data Read Length" field
    * to only store one copy of uniform data.
    *
    * FINISHME: Broadwell adds a new alternative "Indirect Payload Storage"
    * which is described in the GPGPU_WALKER command and in the Broadwell PRM
    * Volume 7: 3D Media GPGPU, under Media GPGPU Pipeline => Mode of
    * Operations => GPGPU Mode => Indirect Payload Storage.
    *
    * Note: The constant data is built in brw_upload_cs_push_constants below.
    */
   const uint32_t vfe_curbe_allocation =
      ALIGN(cs_prog_data->push.per_thread.regs * cs_prog_data->threads +
            cs_prog_data->push.cross_thread.regs, 2);
   OUT_BATCH(SET_FIELD(vfe_urb_allocation, MEDIA_VFE_STATE_URB_ALLOC) |
             SET_FIELD(vfe_curbe_allocation, MEDIA_VFE_STATE_CURBE_ALLOC));
   OUT_BATCH(0);
   OUT_BATCH(0);
   OUT_BATCH(0);
   ADVANCE_BATCH();

   if (cs_prog_data->push.total.size > 0) {
      BEGIN_BATCH(4);
      OUT_BATCH(MEDIA_CURBE_LOAD << 16 | (4 - 2));
      OUT_BATCH(0);
      OUT_BATCH(ALIGN(cs_prog_data->push.total.size, 64));
      OUT_BATCH(stage_state->push_const_offset);
      ADVANCE_BATCH();
   }

   /* BRW_NEW_SURFACES and BRW_NEW_*_CONSTBUF */
   memcpy(bind, stage_state->surf_offset,
          prog_data->binding_table.size_bytes);

   memset(desc, 0, 8 * 4);

   int dw = 0;
   desc[dw++] = brw->cs.base.prog_offset;
   if (brw->gen >= 8)
      desc[dw++] = 0; /* Kernel Start Pointer High */
   desc[dw++] = 0;
   desc[dw++] = stage_state->sampler_offset |
      ((stage_state->sampler_count + 3) / 4);
   desc[dw++] = stage_state->bind_bo_offset;
   desc[dw++] = SET_FIELD(cs_prog_data->push.per_thread.regs,
                          MEDIA_CURBE_READ_LENGTH);
   const uint32_t media_threads =
      brw->gen >= 8 ?
      SET_FIELD(cs_prog_data->threads, GEN8_MEDIA_GPGPU_THREAD_COUNT) :
      SET_FIELD(cs_prog_data->threads, MEDIA_GPGPU_THREAD_COUNT);
   assert(cs_prog_data->threads <= brw->max_cs_threads);

   assert(prog_data->total_shared <= 64 * 1024);
   uint32_t slm_size = 0;
   if (prog_data->total_shared > 0) {
      /* slm_size is in 4k increments, but must be a power of 2. */
      slm_size = 4 * 1024;
      while (slm_size < prog_data->total_shared)
         slm_size <<= 1;
      slm_size /= 4 * 1024;
   }

   desc[dw++] =
      SET_FIELD(cs_prog_data->uses_barrier, MEDIA_BARRIER_ENABLE) |
      SET_FIELD(slm_size, MEDIA_SHARED_LOCAL_MEMORY_SIZE) |
      media_threads;

   desc[dw++] =
      SET_FIELD(cs_prog_data->push.cross_thread.regs, CROSS_THREAD_READ_LENGTH);

   BEGIN_BATCH(4);
   OUT_BATCH(MEDIA_INTERFACE_DESCRIPTOR_LOAD << 16 | (4 - 2));
   OUT_BATCH(0);
   OUT_BATCH(8 * 4);
   OUT_BATCH(offset);
   ADVANCE_BATCH();
}
/* Push the state into the sarea and/or texture memory.
 */
static void
i915_emit_state(struct intel_context *intel)
{
   struct i915_context *i915 = i915_context(&intel->ctx);
   struct i915_hw_state *state = &i915->state;
   int i, count, aper_count;
   GLuint dirty;
   drm_intel_bo *aper_array[3 + I915_TEX_UNITS];
   GET_CURRENT_CONTEXT(ctx);
   BATCH_LOCALS;

   /* We don't hold the lock at this point, so want to make sure that
    * there won't be a buffer wrap between the state emits and the primitive
    * emit header.
    *
    * It might be better to talk about explicit places where
    * scheduling is allowed, rather than assume that it is whenever a
    * batchbuffer fills up.
    */
   intel_batchbuffer_require_space(intel,
				   get_state_size(state) +
                                   INTEL_PRIM_EMIT_SIZE);
   count = 0;
 again:
   if (intel->batch.bo == NULL) {
      _mesa_error(ctx, GL_OUT_OF_MEMORY, "i915 emit state");
      assert(0);
   }
   aper_count = 0;
   dirty = get_dirty(state);

   aper_array[aper_count++] = intel->batch.bo;
   if (dirty & I915_UPLOAD_BUFFERS) {
      if (state->draw_region)
	 aper_array[aper_count++] = state->draw_region->bo;
      if (state->depth_region)
	 aper_array[aper_count++] = state->depth_region->bo;
   }

   if (dirty & I915_UPLOAD_TEX_ALL) {
      for (i = 0; i < I915_TEX_UNITS; i++) {
	 if (dirty & I915_UPLOAD_TEX(i)) {
	    if (state->tex_buffer[i]) {
	       aper_array[aper_count++] = state->tex_buffer[i];
	    }
	 }
      }
   }

   if (dri_bufmgr_check_aperture_space(aper_array, aper_count)) {
       if (count == 0) {
	   count++;
	   intel_batchbuffer_flush(intel);
	   goto again;
       } else {
	   _mesa_error(ctx, GL_OUT_OF_MEMORY, "i915 emit state");
	   assert(0);
       }
   }

   /* work out list of buffers to emit */
   
   /* Do this here as we may have flushed the batchbuffer above,
    * causing more state to be dirty!
    */
   dirty = get_dirty(state);
   state->emitted |= dirty;
   assert(get_dirty(state) == 0);

   if (INTEL_DEBUG & DEBUG_STATE)
      fprintf(stderr, "%s dirty: %x\n", __func__, dirty);

   if (dirty & I915_UPLOAD_INVARIENT) {
      if (INTEL_DEBUG & DEBUG_STATE)
         fprintf(stderr, "I915_UPLOAD_INVARIENT:\n");
      i915_emit_invarient_state(intel);
   }

   if (dirty & I915_UPLOAD_RASTER_RULES) {
      if (INTEL_DEBUG & DEBUG_STATE)
         fprintf(stderr, "I915_UPLOAD_RASTER_RULES:\n");
      emit(intel, state->RasterRules, sizeof(state->RasterRules));
   }

   if (dirty & I915_UPLOAD_CTX) {
      if (INTEL_DEBUG & DEBUG_STATE)
         fprintf(stderr, "I915_UPLOAD_CTX:\n");

      emit(intel, state->Ctx, sizeof(state->Ctx));
   }

   if (dirty & I915_UPLOAD_BLEND) {
      if (INTEL_DEBUG & DEBUG_STATE)
         fprintf(stderr, "I915_UPLOAD_BLEND:\n");

      emit(intel, state->Blend, sizeof(state->Blend));
   }

   if (dirty & I915_UPLOAD_BUFFERS) {
      GLuint count;

      if (INTEL_DEBUG & DEBUG_STATE)
         fprintf(stderr, "I915_UPLOAD_BUFFERS:\n");

      count = 17;
      if (state->Buffer[I915_DESTREG_DRAWRECT0] != MI_NOOP)
         count++;

      BEGIN_BATCH(count);
      OUT_BATCH(state->Buffer[I915_DESTREG_CBUFADDR0]);
      OUT_BATCH(state->Buffer[I915_DESTREG_CBUFADDR1]);
      if (state->draw_region) {
	 OUT_RELOC(state->draw_region->bo,
		   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
      } else {
	 OUT_BATCH(0);
      }

      OUT_BATCH(state->Buffer[I915_DESTREG_DBUFADDR0]);
      OUT_BATCH(state->Buffer[I915_DESTREG_DBUFADDR1]);
      if (state->depth_region) {
         OUT_RELOC(state->depth_region->bo,
		   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
      } else {
	 OUT_BATCH(0);
      }

      OUT_BATCH(state->Buffer[I915_DESTREG_DV0]);
      OUT_BATCH(state->Buffer[I915_DESTREG_DV1]);
      OUT_BATCH(state->Buffer[I915_DESTREG_SR0]);
      OUT_BATCH(state->Buffer[I915_DESTREG_SR1]);
      OUT_BATCH(state->Buffer[I915_DESTREG_SR2]);
      OUT_BATCH(state->Buffer[I915_DESTREG_SENABLE]);

      if (state->Buffer[I915_DESTREG_DRAWRECT0] != MI_NOOP)
         OUT_BATCH(state->Buffer[I915_DESTREG_DRAWRECT0]);
      OUT_BATCH(state->Buffer[I915_DESTREG_DRAWRECT1]);
      OUT_BATCH(state->Buffer[I915_DESTREG_DRAWRECT2]);
      OUT_BATCH(state->Buffer[I915_DESTREG_DRAWRECT3]);
      OUT_BATCH(state->Buffer[I915_DESTREG_DRAWRECT4]);
      OUT_BATCH(state->Buffer[I915_DESTREG_DRAWRECT5]);

      ADVANCE_BATCH();
   }

   if (dirty & I915_UPLOAD_STIPPLE) {
      if (INTEL_DEBUG & DEBUG_STATE)
         fprintf(stderr, "I915_UPLOAD_STIPPLE:\n");
      emit(intel, state->Stipple, sizeof(state->Stipple));
   }

   /* Combine all the dirty texture state into a single command to
    * avoid lockups on I915 hardware. 
    */
   if (dirty & I915_UPLOAD_TEX_ALL) {
      int nr = 0;
      GLuint unwind;

      for (i = 0; i < I915_TEX_UNITS; i++)
         if (dirty & I915_UPLOAD_TEX(i))
            nr++;

      BEGIN_BATCH(2 + nr * 3);
      OUT_BATCH(_3DSTATE_MAP_STATE | (3 * nr));
      OUT_BATCH((dirty & I915_UPLOAD_TEX_ALL) >> I915_UPLOAD_TEX_0_SHIFT);
      for (i = 0; i < I915_TEX_UNITS; i++)
         if (dirty & I915_UPLOAD_TEX(i)) {
	    OUT_RELOC(state->tex_buffer[i],
		      I915_GEM_DOMAIN_SAMPLER, 0,
		      state->tex_offset[i]);

            OUT_BATCH(state->Tex[i][I915_TEXREG_MS3]);
            OUT_BATCH(state->Tex[i][I915_TEXREG_MS4]);
         }
      ADVANCE_BATCH();

      unwind = intel->batch.used;
      BEGIN_BATCH(2 + nr * 3);
      OUT_BATCH(_3DSTATE_SAMPLER_STATE | (3 * nr));
      OUT_BATCH((dirty & I915_UPLOAD_TEX_ALL) >> I915_UPLOAD_TEX_0_SHIFT);
      for (i = 0; i < I915_TEX_UNITS; i++)
         if (dirty & I915_UPLOAD_TEX(i)) {
            OUT_BATCH(state->Tex[i][I915_TEXREG_SS2]);
            OUT_BATCH(state->Tex[i][I915_TEXREG_SS3]);
            OUT_BATCH(state->Tex[i][I915_TEXREG_SS4]);
         }
      ADVANCE_BATCH();
      if (i915->last_sampler &&
	  memcmp(intel->batch.map + i915->last_sampler,
		 intel->batch.map + unwind,
		 (2 + nr*3)*sizeof(int)) == 0)
	  intel->batch.used = unwind;
      else
	  i915->last_sampler = unwind;
   }
示例#10
0
static void
gen6_blorp_emit_depth_stencil_config(struct brw_context *brw,
                                     const brw_blorp_params *params)
{
   struct intel_context *intel = &brw->intel;
   uint32_t draw_x = params->depth.x_offset;
   uint32_t draw_y = params->depth.y_offset;
   uint32_t tile_mask_x, tile_mask_y;

   gen6_blorp_compute_tile_masks(params, &tile_mask_x, &tile_mask_y);

   /* 3DSTATE_DEPTH_BUFFER */
   {
      uint32_t tile_x = draw_x & tile_mask_x;
      uint32_t tile_y = draw_y & tile_mask_y;
      uint32_t offset =
         intel_region_get_aligned_offset(params->depth.mt->region,
                                         draw_x & ~tile_mask_x,
                                         draw_y & ~tile_mask_y, false);

      /* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327
       * (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth
       * Coordinate Offset X/Y":
       *
       *   "The 3 LSBs of both offsets must be zero to ensure correct
       *   alignment"
       *
       * We have no guarantee that tile_x and tile_y are correctly aligned,
       * since they are determined by the mipmap layout, which is only aligned
       * to multiples of 4.
       *
       * So, to avoid hanging the GPU, just smash the low order 3 bits of
       * tile_x and tile_y to 0.  This is a temporary workaround until we come
       * up with a better solution.
       */
      tile_x &= ~7;
      tile_y &= ~7;

      intel_emit_post_sync_nonzero_flush(intel);
      intel_emit_depth_stall_flushes(intel);

      BEGIN_BATCH(7);
      OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2));
      uint32_t pitch_bytes =
         params->depth.mt->region->pitch * params->depth.mt->region->cpp;
      OUT_BATCH((pitch_bytes - 1) |
                params->depth_format << 18 |
                1 << 21 | /* separate stencil enable */
                1 << 22 | /* hiz enable */
                BRW_TILEWALK_YMAJOR << 26 |
                1 << 27 | /* y-tiled */
                BRW_SURFACE_2D << 29);
      OUT_RELOC(params->depth.mt->region->bo,
                I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
                offset);
      OUT_BATCH(BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1 |
                (params->depth.width + tile_x - 1) << 6 |
                (params->depth.height + tile_y - 1) << 19);
      OUT_BATCH(0);
      OUT_BATCH(tile_x |
                tile_y << 16);
      OUT_BATCH(0);
      ADVANCE_BATCH();
   }

   /* 3DSTATE_HIER_DEPTH_BUFFER */
   {
      struct intel_region *hiz_region = params->depth.mt->hiz_mt->region;
      uint32_t hiz_offset =
         intel_region_get_aligned_offset(hiz_region,
                                         draw_x & ~tile_mask_x,
                                         (draw_y & ~tile_mask_y) / 2, false);

      BEGIN_BATCH(3);
      OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2));
      OUT_BATCH(hiz_region->pitch * hiz_region->cpp - 1);
      OUT_RELOC(hiz_region->bo,
                I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
                hiz_offset);
      ADVANCE_BATCH();
   }

   /* 3DSTATE_STENCIL_BUFFER */
   {
      BEGIN_BATCH(3);
      OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2));
      OUT_BATCH(0);
      OUT_BATCH(0);
      ADVANCE_BATCH();
   }
}
void
gen7_emit_depth_stencil_hiz(struct brw_context *brw,
                            struct intel_mipmap_tree *depth_mt,
                            uint32_t depth_offset, uint32_t depthbuffer_format,
                            uint32_t depth_surface_type,
                            struct intel_mipmap_tree *stencil_mt,
                            struct intel_mipmap_tree *hiz_mt,
                            bool separate_stencil, uint32_t width,
                            uint32_t height, uint32_t tile_x, uint32_t tile_y)
{
   struct intel_context *intel = &brw->intel;
   struct gl_context *ctx = &intel->ctx;

   intel_emit_depth_stall_flushes(intel);

   /* _NEW_DEPTH, _NEW_STENCIL */
   BEGIN_BATCH(7);
   OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2));
   OUT_BATCH((depth_mt ? depth_mt->region->pitch - 1 : 0) |
             (depthbuffer_format << 18) |
             ((hiz_mt ? 1 : 0) << 22) |
             ((stencil_mt != NULL && ctx->Stencil._WriteEnabled) << 27) |
             ((ctx->Depth.Mask != 0) << 28) |
             (depth_surface_type << 29));

   if (depth_mt) {
      OUT_RELOC(depth_mt->region->bo,
	        I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
		depth_offset);
   } else {
      OUT_BATCH(0);
   }

   OUT_BATCH(((width + tile_x - 1) << 4) |
             ((height + tile_y - 1) << 18));
   OUT_BATCH(0);
   OUT_BATCH(tile_x | (tile_y << 16));
   OUT_BATCH(0);
   ADVANCE_BATCH();

   if (hiz_mt == NULL) {
      BEGIN_BATCH(3);
      OUT_BATCH(GEN7_3DSTATE_HIER_DEPTH_BUFFER << 16 | (3 - 2));
      OUT_BATCH(0);
      OUT_BATCH(0);
      ADVANCE_BATCH();
   } else {
      BEGIN_BATCH(3);
      OUT_BATCH(GEN7_3DSTATE_HIER_DEPTH_BUFFER << 16 | (3 - 2));
      OUT_BATCH(hiz_mt->region->pitch - 1);
      OUT_RELOC(hiz_mt->region->bo,
                I915_GEM_DOMAIN_RENDER,
                I915_GEM_DOMAIN_RENDER,
                brw->depthstencil.hiz_offset);
      ADVANCE_BATCH();
   }

   if (stencil_mt == NULL) {
      BEGIN_BATCH(3);
      OUT_BATCH(GEN7_3DSTATE_STENCIL_BUFFER << 16 | (3 - 2));
      OUT_BATCH(0);
      OUT_BATCH(0);
      ADVANCE_BATCH();
   } else {
      const int enabled = intel->is_haswell ? HSW_STENCIL_ENABLED : 0;

      BEGIN_BATCH(3);
      OUT_BATCH(GEN7_3DSTATE_STENCIL_BUFFER << 16 | (3 - 2));
      /* The stencil buffer has quirky pitch requirements.  From the Graphics
       * BSpec: vol2a.11 3D Pipeline Windower > Early Depth/Stencil Processing
       * > Depth/Stencil Buffer State > 3DSTATE_STENCIL_BUFFER [DevIVB+],
       * field "Surface Pitch":
       *
       *    The pitch must be set to 2x the value computed based on width, as
       *    the stencil buffer is stored with two rows interleaved.
       *
       * (Note that it is not 100% clear whether this intended to apply to
       * Gen7; the BSpec flags this comment as "DevILK,DevSNB" (which would
       * imply that it doesn't), however the comment appears on a "DevIVB+"
       * page (which would imply that it does).  Experiments with the hardware
       * indicate that it does.
       */
      OUT_BATCH(enabled |
	        (2 * stencil_mt->region->pitch - 1));
      OUT_RELOC(stencil_mt->region->bo,
	        I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
		brw->depthstencil.stencil_offset);
      ADVANCE_BATCH();
   }

   BEGIN_BATCH(3);
   OUT_BATCH(GEN7_3DSTATE_CLEAR_PARAMS << 16 | (3 - 2));
   OUT_BATCH(depth_mt ? depth_mt->depth_clear_value : 0);
   OUT_BATCH(1);
   ADVANCE_BATCH();
}
示例#12
0
/**
 * Enable or disable thread dispatch and set the HiZ op appropriately.
 */
static void
gen6_blorp_emit_wm_config(struct brw_context *brw,
                          const brw_blorp_params *params,
                          uint32_t prog_offset,
                          brw_blorp_prog_data *prog_data)
{
   struct intel_context *intel = &brw->intel;
   uint32_t dw2, dw4, dw5, dw6;

   /* Even when thread dispatch is disabled, max threads (dw5.25:31) must be
    * nonzero to prevent the GPU from hanging. See the valid ranges in the
    * BSpec, Volume 2a.11 Windower, Section 3DSTATE_WM, Dword 5.25:31
    * "Maximum Number Of Threads".
    *
    * To be safe (and to minimize extraneous code) we go ahead and fully
    * configure the WM state whether or not there is a WM program.
    */

   dw2 = dw4 = dw5 = dw6 = 0;
   switch (params->hiz_op) {
   case GEN6_HIZ_OP_DEPTH_CLEAR:
      dw4 |= GEN6_WM_DEPTH_CLEAR;
      break;
   case GEN6_HIZ_OP_DEPTH_RESOLVE:
      dw4 |= GEN6_WM_DEPTH_RESOLVE;
      break;
   case GEN6_HIZ_OP_HIZ_RESOLVE:
      dw4 |= GEN6_WM_HIERARCHICAL_DEPTH_RESOLVE;
      break;
   case GEN6_HIZ_OP_NONE:
      break;
   default:
      assert(0);
      break;
   }
   dw4 |= GEN6_WM_STATISTICS_ENABLE;
   dw5 |= GEN6_WM_LINE_AA_WIDTH_1_0;
   dw5 |= GEN6_WM_LINE_END_CAP_AA_WIDTH_0_5;
   dw5 |= (brw->max_wm_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT;
   dw6 |= 0 << GEN6_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT; /* No interp */
   dw6 |= 0 << GEN6_WM_NUM_SF_OUTPUTS_SHIFT; /* No inputs from SF */
   if (params->use_wm_prog) {
      dw2 |= 1 << GEN6_WM_SAMPLER_COUNT_SHIFT; /* Up to 4 samplers */
      dw4 |= prog_data->first_curbe_grf << GEN6_WM_DISPATCH_START_GRF_SHIFT_0;
      dw5 |= GEN6_WM_16_DISPATCH_ENABLE;
      dw5 |= GEN6_WM_KILL_ENABLE; /* TODO: temporarily smash on */
      dw5 |= GEN6_WM_DISPATCH_ENABLE; /* We are rendering */
   }

   if (params->num_samples > 1) {
      dw6 |= GEN6_WM_MSRAST_ON_PATTERN;
      if (prog_data && prog_data->persample_msaa_dispatch)
         dw6 |= GEN6_WM_MSDISPMODE_PERSAMPLE;
      else
         dw6 |= GEN6_WM_MSDISPMODE_PERPIXEL;
   } else {
      dw6 |= GEN6_WM_MSRAST_OFF_PIXEL;
      dw6 |= GEN6_WM_MSDISPMODE_PERSAMPLE;
   }

   BEGIN_BATCH(9);
   OUT_BATCH(_3DSTATE_WM << 16 | (9 - 2));
   OUT_BATCH(params->use_wm_prog ? prog_offset : 0);
   OUT_BATCH(dw2);
   OUT_BATCH(0); /* No scratch needed */
   OUT_BATCH(dw4);
   OUT_BATCH(dw5);
   OUT_BATCH(dw6);
   OUT_BATCH(0); /* No other programs */
   OUT_BATCH(0); /* No other programs */
   ADVANCE_BATCH();
}
示例#13
0
void
gen6_blorp_emit_vertices(struct brw_context *brw,
                         const brw_blorp_params *params)
{
   struct intel_context *intel = &brw->intel;
   uint32_t vertex_offset;

   /* Setup VBO for the rectangle primitive..
    *
    * A rectangle primitive (3DPRIM_RECTLIST) consists of only three
    * vertices. The vertices reside in screen space with DirectX coordinates
    * (that is, (0, 0) is the upper left corner).
    *
    *   v2 ------ implied
    *    |        |
    *    |        |
    *   v0 ----- v1
    *
    * Since the VS is disabled, the clipper loads each VUE directly from
    * the URB. This is controlled by the 3DSTATE_VERTEX_BUFFERS and
    * 3DSTATE_VERTEX_ELEMENTS packets below. The VUE contents are as follows:
    *   dw0: Reserved, MBZ.
    *   dw1: Render Target Array Index. The HiZ op does not use indexed
    *        vertices, so set the dword to 0.
    *   dw2: Viewport Index. The HiZ op disables viewport mapping and
    *        scissoring, so set the dword to 0.
    *   dw3: Point Width: The HiZ op does not emit the POINTLIST primitive, so
    *        set the dword to 0.
    *   dw4: Vertex Position X.
    *   dw5: Vertex Position Y.
    *   dw6: Vertex Position Z.
    *   dw7: Vertex Position W.
    *
    * For details, see the Sandybridge PRM, Volume 2, Part 1, Section 1.5.1
    * "Vertex URB Entry (VUE) Formats".
    */
   {
      float *vertex_data;

      const float vertices[GEN6_BLORP_VBO_SIZE] = {
         /* v0 */ 0, 0, 0, 0,     (float) params->x0, (float) params->y1, 0, 1,
         /* v1 */ 0, 0, 0, 0,     (float) params->x1, (float) params->y1, 0, 1,
         /* v2 */ 0, 0, 0, 0,     (float) params->x0, (float) params->y0, 0, 1,
      };

      vertex_data = (float *) brw_state_batch(brw, AUB_TRACE_VERTEX_BUFFER,
                                              GEN6_BLORP_VBO_SIZE, 32,
                                              &vertex_offset);
      memcpy(vertex_data, vertices, GEN6_BLORP_VBO_SIZE);
   }

   /* 3DSTATE_VERTEX_BUFFERS */
   {
      const int num_buffers = 1;
      const int batch_length = 1 + 4 * num_buffers;

      uint32_t dw0 = GEN6_VB0_ACCESS_VERTEXDATA |
                     (GEN6_BLORP_NUM_VUE_ELEMS * sizeof(float)) << BRW_VB0_PITCH_SHIFT;

      if (intel->gen >= 7)
         dw0 |= GEN7_VB0_ADDRESS_MODIFYENABLE;

      BEGIN_BATCH(batch_length);
      OUT_BATCH((_3DSTATE_VERTEX_BUFFERS << 16) | (batch_length - 2));
      OUT_BATCH(dw0);
      /* start address */
      OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_VERTEX, 0,
		vertex_offset);
      /* end address */
      OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_VERTEX, 0,
		vertex_offset + GEN6_BLORP_VBO_SIZE - 1);
      OUT_BATCH(0);
      ADVANCE_BATCH();
   }

   /* 3DSTATE_VERTEX_ELEMENTS
    *
    * Fetch dwords 0 - 7 from each VUE. See the comments above where
    * the vertex_bo is filled with data.
    */
   {
      const int num_elements = 2;
      const int batch_length = 1 + 2 * num_elements;

      BEGIN_BATCH(batch_length);
      OUT_BATCH((_3DSTATE_VERTEX_ELEMENTS << 16) | (batch_length - 2));
      /* Element 0 */
      OUT_BATCH(GEN6_VE0_VALID |
                BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT |
                0 << BRW_VE0_SRC_OFFSET_SHIFT);
      OUT_BATCH(BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT |
                BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_1_SHIFT |
                BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_2_SHIFT |
                BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_3_SHIFT);
      /* Element 1 */
      OUT_BATCH(GEN6_VE0_VALID |
                BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT |
                16 << BRW_VE0_SRC_OFFSET_SHIFT);
      OUT_BATCH(BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT |
                BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_1_SHIFT |
                BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_2_SHIFT |
                BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_3_SHIFT);
      ADVANCE_BATCH();
   }
}
示例#14
0
static void
gen8_upload_ds_state(struct brw_context *brw)
{
   struct gl_context *ctx = &brw->ctx;
   const struct brw_stage_state *stage_state = &brw->tes.base;
   /* BRW_NEW_TESS_EVAL_PROGRAM */
   bool active = brw->tess_eval_program;
   assert(!active || brw->tess_ctrl_program);

   /* BRW_NEW_TES_PROG_DATA */
   const struct brw_tes_prog_data *tes_prog_data = brw->tes.prog_data;
   const struct brw_vue_prog_data *vue_prog_data = &tes_prog_data->base;
   const struct brw_stage_prog_data *prog_data = &vue_prog_data->base;

   if (active) {
      BEGIN_BATCH(9);
      OUT_BATCH(_3DSTATE_DS << 16 | (9 - 2));
      OUT_BATCH(stage_state->prog_offset);
      OUT_BATCH(0);
      OUT_BATCH(SET_FIELD(DIV_ROUND_UP(stage_state->sampler_count, 4),
                          GEN7_DS_SAMPLER_COUNT) |
                SET_FIELD(prog_data->binding_table.size_bytes / 4,
                          GEN7_DS_BINDING_TABLE_ENTRY_COUNT));
      if (prog_data->total_scratch) {
         OUT_RELOC64(stage_state->scratch_bo,
                     I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
                     ffs(prog_data->total_scratch) - 11);
      } else {
         OUT_BATCH(0);
         OUT_BATCH(0);
      }
      OUT_BATCH(SET_FIELD(prog_data->dispatch_grf_start_reg,
                          GEN7_DS_DISPATCH_START_GRF) |
                SET_FIELD(vue_prog_data->urb_read_length,
                          GEN7_DS_URB_READ_LENGTH));

      OUT_BATCH(GEN7_DS_ENABLE |
                GEN7_DS_STATISTICS_ENABLE |
                (brw->max_ds_threads - 1) << HSW_DS_MAX_THREADS_SHIFT |
                (vue_prog_data->dispatch_mode == DISPATCH_MODE_SIMD8 ?
                 GEN7_DS_SIMD8_DISPATCH_ENABLE : 0) |
                (tes_prog_data->domain == BRW_TESS_DOMAIN_TRI ?
                 GEN7_DS_COMPUTE_W_COORDINATE_ENABLE : 0));
      OUT_BATCH(SET_FIELD(ctx->Transform.ClipPlanesEnabled,
                          GEN8_DS_USER_CLIP_DISTANCE));
      ADVANCE_BATCH();
   } else {
      BEGIN_BATCH(9);
      OUT_BATCH(_3DSTATE_DS << 16 | (9 - 2));
      OUT_BATCH(0);
      OUT_BATCH(0);
      OUT_BATCH(0);
      OUT_BATCH(0);
      OUT_BATCH(0);
      OUT_BATCH(0);
      OUT_BATCH(0);
      OUT_BATCH(0);
      ADVANCE_BATCH();
   }
   brw->tes.enabled = active;
}
示例#15
0
文件: gen7_hiz.c 项目: curro/mesa
/**
 * \copydoc gen6_hiz_exec()
 */
static void
gen7_hiz_exec(struct intel_context *intel,
              struct intel_mipmap_tree *mt,
              unsigned int level,
              unsigned int layer,
              enum gen6_hiz_op op)
{
   struct gl_context *ctx = &intel->ctx;
   struct brw_context *brw = brw_context(ctx);

   assert(op != GEN6_HIZ_OP_DEPTH_CLEAR); /* Not implemented yet. */
   assert(mt->hiz_mt != NULL);
   intel_miptree_check_level_layer(mt, level, layer);

   uint32_t depth_format;
   switch (mt->format) {
   case MESA_FORMAT_Z16:       depth_format = BRW_DEPTHFORMAT_D16_UNORM; break;
   case MESA_FORMAT_Z32_FLOAT: depth_format = BRW_DEPTHFORMAT_D32_FLOAT; break;
   case MESA_FORMAT_X8_Z24:    depth_format = BRW_DEPTHFORMAT_D24_UNORM_X8_UINT; break;
   default:                    assert(0); break;
   }

   gen6_hiz_emit_batch_head(brw);
   gen6_hiz_emit_vertices(brw, mt, level, layer);

   /* 3DSTATE_URB_VS
    * 3DSTATE_URB_HS
    * 3DSTATE_URB_DS
    * 3DSTATE_URB_GS
    *
    * If the 3DSTATE_URB_VS is emitted, than the others must be also. From the
    * BSpec, Volume 2a "3D Pipeline Overview", Section 1.7.1 3DSTATE_URB_VS:
    *     3DSTATE_URB_HS, 3DSTATE_URB_DS, and 3DSTATE_URB_GS must also be
    *     programmed in order for the programming of this state to be
    *     valid.
    */
   {
      /* The minimum valid value is 32. See 3DSTATE_URB_VS,
       * Dword 1.15:0 "VS Number of URB Entries".
       */
      int num_vs_entries = 32;

      BEGIN_BATCH(2);
      OUT_BATCH(_3DSTATE_URB_VS << 16 | (2 - 2));
      OUT_BATCH(1 << GEN7_URB_ENTRY_SIZE_SHIFT |
                0 << GEN7_URB_STARTING_ADDRESS_SHIFT |
                num_vs_entries);
      ADVANCE_BATCH();

      BEGIN_BATCH(2);
      OUT_BATCH(_3DSTATE_URB_GS << 16 | (2 - 2));
      OUT_BATCH(0);
      ADVANCE_BATCH();

      BEGIN_BATCH(2);
      OUT_BATCH(_3DSTATE_URB_HS << 16 | (2 - 2));
      OUT_BATCH(0);
      ADVANCE_BATCH();

      BEGIN_BATCH(2);
      OUT_BATCH(_3DSTATE_URB_DS << 16 | (2 - 2));
      OUT_BATCH(0);
      ADVANCE_BATCH();
   }

   /* 3DSTATE_DEPTH_STENCIL_STATE_POINTERS
    *
    * The offset is relative to CMD_STATE_BASE_ADDRESS.DynamicStateBaseAddress.
    */
   {
      uint32_t depthstencil_offset;
      gen6_hiz_emit_depth_stencil_state(brw, op, &depthstencil_offset);

      BEGIN_BATCH(2);
      OUT_BATCH(_3DSTATE_DEPTH_STENCIL_STATE_POINTERS << 16 | (2 - 2));
      OUT_BATCH(depthstencil_offset | 1);
      ADVANCE_BATCH();
   }

   /* 3DSTATE_VS
    *
    * Disable vertex shader.
    */
   {
      BEGIN_BATCH(6);
      OUT_BATCH(_3DSTATE_VS << 16 | (6 - 2));
      OUT_BATCH(0);
      OUT_BATCH(0);
      OUT_BATCH(0);
      OUT_BATCH(0);
      OUT_BATCH(0);
      ADVANCE_BATCH();
   }

   /* 3DSTATE_HS
    *
    * Disable the hull shader.
    */
   {
      BEGIN_BATCH(7);
      OUT_BATCH(_3DSTATE_HS << 16 | (7 - 2));
      OUT_BATCH(0);
      OUT_BATCH(0);
      OUT_BATCH(0);
      OUT_BATCH(0);
      OUT_BATCH(0);
      OUT_BATCH(0);
      ADVANCE_BATCH();
   }

   /* 3DSTATE_TE
    *
    * Disable the tesselation engine.
    */
   {
      BEGIN_BATCH(4);
      OUT_BATCH(_3DSTATE_TE << 16 | (4 - 2));
      OUT_BATCH(0);
      OUT_BATCH(0);
      OUT_BATCH(0);
      ADVANCE_BATCH();
   }

   /* 3DSTATE_DS
    *
    * Disable the domain shader.
    */
   {
      BEGIN_BATCH(6);
      OUT_BATCH(_3DSTATE_DS << 16 | (6 - 2));
      OUT_BATCH(0);
      OUT_BATCH(0);
      OUT_BATCH(0);
      OUT_BATCH(0);
      OUT_BATCH(0);
      ADVANCE_BATCH();
   }

   /* 3DSTATE_GS
    *
    * Disable the geometry shader.
    */
   {
      BEGIN_BATCH(7);
      OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2));
      OUT_BATCH(0);
      OUT_BATCH(0);
      OUT_BATCH(0);
      OUT_BATCH(0);
      OUT_BATCH(0);
      OUT_BATCH(0);
      ADVANCE_BATCH();
   }

   /* 3DSTATE_STREAMOUT
    *
    * Disable streamout.
    */
   {
      BEGIN_BATCH(3);
      OUT_BATCH(_3DSTATE_STREAMOUT << 16 | (3 - 2));
      OUT_BATCH(0);
      OUT_BATCH(0);
      ADVANCE_BATCH();
   }

   /* 3DSTATE_CLIP
    *
    * Disable the clipper.
    *
    * The HiZ op emits a rectangle primitive, which requires clipping to
    * be disabled. From page 10 of the Sandy Bridge PRM Volume 2 Part 1
    * Section 1.3 "3D Primitives Overview":
    *    RECTLIST:
    *    Either the CLIP unit should be DISABLED, or the CLIP unit's Clip
    *    Mode should be set to a value other than CLIPMODE_NORMAL.
    *
    * Also disable perspective divide. This doesn't change the clipper's
    * output, but does spare a few electrons.
    */
   {
      BEGIN_BATCH(4);
      OUT_BATCH(_3DSTATE_CLIP << 16 | (4 - 2));
      OUT_BATCH(0);
      OUT_BATCH(GEN6_CLIP_PERSPECTIVE_DIVIDE_DISABLE);
      OUT_BATCH(0);
      ADVANCE_BATCH();
   }

   /* 3DSTATE_SF
    *
    * Disable ViewportTransformEnable (dw1.1)
    *
    * From the SandyBridge PRM, Volume 2, Part 1, Section 1.3, "3D
    * Primitives Overview":
    *     RECTLIST: Viewport Mapping must be DISABLED (as is typical with the
    *     use of screen- space coordinates).
    *
    * A solid rectangle must be rendered, so set FrontFaceFillMode (dw1.6:5)
    * and BackFaceFillMode (dw1.4:3) to SOLID(0).
    *
    * From the Sandy Bridge PRM, Volume 2, Part 1, Section
    * 6.4.1.1 3DSTATE_SF, Field FrontFaceFillMode:
    *     SOLID: Any triangle or rectangle object found to be front-facing
    *     is rendered as a solid object. This setting is required when
    *     (rendering rectangle (RECTLIST) objects.
    */
   {
      BEGIN_BATCH(7);
      OUT_BATCH(_3DSTATE_SF << 16 | (7 - 2));
      OUT_BATCH(depth_format << GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT);
      OUT_BATCH(0);
      OUT_BATCH(0);
      OUT_BATCH(0);
      OUT_BATCH(0);
      OUT_BATCH(0);
      ADVANCE_BATCH();
   }

   /* 3DSTATE_SBE */
   {
      BEGIN_BATCH(14);
      OUT_BATCH(_3DSTATE_SBE << 16 | (14 - 2));
      OUT_BATCH((1 - 1) << GEN7_SBE_NUM_OUTPUTS_SHIFT | /* only position */
                1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT |
                0 << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT);
      for (int i = 0; i < 12; ++i)
         OUT_BATCH(0);
      ADVANCE_BATCH();
   }

   /* 3DSTATE_WM
    *
    * Disable PS thread dispatch (dw1.29) and enable the HiZ op.
    */
   {
      uint32_t dw1 = 0;

      switch (op) {
      case GEN6_HIZ_OP_DEPTH_CLEAR:
         assert(!"not implemented");
         dw1 |= GEN7_WM_DEPTH_CLEAR;
         break;
      case GEN6_HIZ_OP_DEPTH_RESOLVE:
         dw1 |= GEN7_WM_DEPTH_RESOLVE;
         break;
      case GEN6_HIZ_OP_HIZ_RESOLVE:
         dw1 |= GEN7_WM_HIERARCHICAL_DEPTH_RESOLVE;
         break;
      default:
         assert(0);
         break;
      }

      BEGIN_BATCH(3);
      OUT_BATCH(_3DSTATE_WM << 16 | (3 - 2));
      OUT_BATCH(dw1);
      OUT_BATCH(0);
      ADVANCE_BATCH();
   }

   /* 3DSTATE_PS
    *
    * Pixel shader dispatch is disabled above in 3DSTATE_WM, dw1.29. Despite
    * that, thread dispatch info must still be specified.
    *     - Maximum Number of Threads (dw4.24:31) must be nonzero, as the BSpec
    *       states that the valid range for this field is [0x3, 0x2f].
    *     - A dispatch mode must be given; that is, at least one of the
    *       "N Pixel Dispatch Enable" (N=8,16,32) fields must be set. This was
    *       discovered through simulator error messages.
    */
   {
      BEGIN_BATCH(8);
      OUT_BATCH(_3DSTATE_PS << 16 | (8 - 2));
      OUT_BATCH(0);
      OUT_BATCH(0);
      OUT_BATCH(0);
      OUT_BATCH(((brw->max_wm_threads - 1) << IVB_PS_MAX_THREADS_SHIFT) |
		GEN7_PS_32_DISPATCH_ENABLE);
      OUT_BATCH(0);
      OUT_BATCH(0);
      OUT_BATCH(0);
      ADVANCE_BATCH();
   }

   /* 3DSTATE_DEPTH_BUFFER */
   {
      uint32_t width = mt->level[level].width;
      uint32_t height = mt->level[level].height;

      uint32_t tile_x;
      uint32_t tile_y;
      uint32_t offset;
      {
         /* Construct a dummy renderbuffer just to extract tile offsets. */
         struct intel_renderbuffer rb;
         rb.mt = mt;
         rb.mt_level = level;
         rb.mt_layer = layer;
         intel_renderbuffer_set_draw_offset(&rb);
         offset = intel_renderbuffer_tile_offsets(&rb, &tile_x, &tile_y);
      }

      intel_emit_depth_stall_flushes(intel);

      BEGIN_BATCH(7);
      OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2));
      OUT_BATCH(((mt->region->pitch * mt->region->cpp) - 1) |
                depth_format << 18 |
                1 << 22 | /* hiz enable */
                1 << 28 | /* depth write */
                BRW_SURFACE_2D << 29);
      OUT_RELOC(mt->region->bo,
                I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
                offset);
      OUT_BATCH((width + tile_x - 1) << 4 |
                (height + tile_y - 1) << 18);
      OUT_BATCH(0);
      OUT_BATCH(tile_x |
                tile_y << 16);
      OUT_BATCH(0);
      ADVANCE_BATCH();
   }

   /* 3DSTATE_HIER_DEPTH_BUFFER */
   {
      struct intel_region *hiz_region = mt->hiz_mt->region;

      BEGIN_BATCH(3);
      OUT_BATCH((GEN7_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2));
      OUT_BATCH(hiz_region->pitch * hiz_region->cpp - 1);
      OUT_RELOC(hiz_region->bo,
                I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
                0);
      ADVANCE_BATCH();
   }

   /* 3DSTATE_STENCIL_BUFFER */
   {
      BEGIN_BATCH(3);
      OUT_BATCH((GEN7_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2));
      OUT_BATCH(0);
      OUT_BATCH(0);
      ADVANCE_BATCH();
   }

   /* 3DSTATE_CLEAR_PARAMS
    *
    * From the BSpec, Volume 2a.11 Windower, Section 1.5.6.3.2
    * 3DSTATE_CLEAR_PARAMS:
    *    [DevIVB] 3DSTATE_CLEAR_PARAMS must always be programmed in the along
    *    with the other Depth/Stencil state commands(i.e.  3DSTATE_DEPTH_BUFFER,
    *    3DSTATE_STENCIL_BUFFER, or 3DSTATE_HIER_DEPTH_BUFFER).
    */
   {
      BEGIN_BATCH(3);
      OUT_BATCH(GEN7_3DSTATE_CLEAR_PARAMS << 16 | (3 - 2));
      OUT_BATCH(0);
      OUT_BATCH(0);
      ADVANCE_BATCH();
   }

   /* 3DSTATE_DRAWING_RECTANGLE */
   {
      BEGIN_BATCH(4);
      OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE << 16 | (4 - 2));
      OUT_BATCH(0);
      OUT_BATCH(((mt->level[level].width - 1) & 0xffff) |
                ((mt->level[level].height - 1) << 16));
      OUT_BATCH(0);
      ADVANCE_BATCH();
   }

   /* 3DPRIMITIVE */
   {
     BEGIN_BATCH(7);
     OUT_BATCH(CMD_3D_PRIM << 16 | (7 - 2));
     OUT_BATCH(GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL |
               _3DPRIM_RECTLIST);
     OUT_BATCH(3); /* vertex count per instance */
     OUT_BATCH(0);
     OUT_BATCH(1); /* instance count */
     OUT_BATCH(0);
     OUT_BATCH(0);
     ADVANCE_BATCH();
   }

   /* See comments above at first invocation of intel_flush() in
    * gen6_hiz_emit_batch_head().
    */
   intel_flush(ctx);

   /* Be safe. */
   brw->state.dirty.brw = ~0;
   brw->state.dirty.cache = ~0;
}
示例#16
0
static void
upload_gs_state(struct brw_context *brw)
{
   /* BRW_NEW_GEOMETRY_PROGRAM */
   bool active = brw->geometry_program;
   /* BRW_NEW_GS_PROG_DATA */
   const struct brw_vue_prog_data *prog_data = &brw->gs.prog_data->base;
   const struct brw_stage_state *stage_state = &brw->gs.base;

   if (!active || stage_state->push_const_size == 0) {
      /* Disable the push constant buffers. */
      BEGIN_BATCH(5);
      OUT_BATCH(_3DSTATE_CONSTANT_GS << 16 | (5 - 2));
      OUT_BATCH(0);
      OUT_BATCH(0);
      OUT_BATCH(0);
      OUT_BATCH(0);
      ADVANCE_BATCH();
   } else {
      BEGIN_BATCH(5);
      OUT_BATCH(_3DSTATE_CONSTANT_GS << 16 |
		GEN6_CONSTANT_BUFFER_0_ENABLE |
		(5 - 2));
      /* Pointer to the GS constant buffer.  Covered by the set of
       * state flags from gen6_upload_vs_constants
       */
      OUT_BATCH(stage_state->push_const_offset +
                stage_state->push_const_size - 1);
      OUT_BATCH(0);
      OUT_BATCH(0);
      OUT_BATCH(0);
      ADVANCE_BATCH();
   }

   if (active) {
      BEGIN_BATCH(7);
      OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2));
      OUT_BATCH(stage_state->prog_offset);

      /* GEN6_GS_SPF_MODE and GEN6_GS_VECTOR_MASK_ENABLE are enabled as it
       * was previously done for gen6.
       *
       * TODO: test with both disabled to see if the HW is behaving
       * as expected, like in gen7.
       */
      OUT_BATCH(GEN6_GS_SPF_MODE | GEN6_GS_VECTOR_MASK_ENABLE |
                ((ALIGN(stage_state->sampler_count, 4)/4) <<
                 GEN6_GS_SAMPLER_COUNT_SHIFT) |
                ((prog_data->base.binding_table.size_bytes / 4) <<
                 GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT));

      if (prog_data->base.total_scratch) {
         OUT_RELOC(stage_state->scratch_bo,
                   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
                   ffs(prog_data->base.total_scratch) - 11);
      } else {
         OUT_BATCH(0); /* no scratch space */
      }

      OUT_BATCH((prog_data->urb_read_length <<
                 GEN6_GS_URB_READ_LENGTH_SHIFT) |
                (0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT) |
                (prog_data->base.dispatch_grf_start_reg <<
                 GEN6_GS_DISPATCH_START_GRF_SHIFT));

      OUT_BATCH(((brw->max_gs_threads - 1) << GEN6_GS_MAX_THREADS_SHIFT) |
                GEN6_GS_STATISTICS_ENABLE |
                GEN6_GS_SO_STATISTICS_ENABLE |
                GEN6_GS_RENDERING_ENABLE);

      if (brw->gs.prog_data->gen6_xfb_enabled) {
         /* GEN6_GS_REORDER is equivalent to GEN7_GS_REORDER_TRAILING
          * in gen7. SNB and IVB specs are the same regarding the reordering of
          * TRISTRIP/TRISTRIP_REV vertices and triangle orientation, so we do
          * the same thing in both generations. For more details, see the
          * comment in gen7_gs_state.c
          */
         OUT_BATCH(GEN6_GS_REORDER |
                   GEN6_GS_SVBI_PAYLOAD_ENABLE |
                   GEN6_GS_ENABLE);
      } else {
         OUT_BATCH(GEN6_GS_REORDER | GEN6_GS_ENABLE);
      }
      ADVANCE_BATCH();
   } else if (brw->ff_gs.prog_active) {
      /* In gen6, transform feedback for the VS stage is done with an ad-hoc GS
       * program. This function provides the needed 3DSTATE_GS for this.
       */
      upload_gs_state_for_tf(brw);
   } else {
      /* No GS function required */
      BEGIN_BATCH(7);
      OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2));
      OUT_BATCH(0); /* prog_bo */
      OUT_BATCH((0 << GEN6_GS_SAMPLER_COUNT_SHIFT) |
                (0 << GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
      OUT_BATCH(0); /* scratch space base offset */
      OUT_BATCH((1 << GEN6_GS_DISPATCH_START_GRF_SHIFT) |
                (0 << GEN6_GS_URB_READ_LENGTH_SHIFT) |
                (0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT));
      OUT_BATCH((0 << GEN6_GS_MAX_THREADS_SHIFT) |
                GEN6_GS_STATISTICS_ENABLE |
                GEN6_GS_RENDERING_ENABLE);
                OUT_BATCH(0);
      ADVANCE_BATCH();
   }
   brw->gs.enabled = active;
}
示例#17
0
/* Upload a new set of constants.  Too much variability to go into the
 * cache mechanism, but maybe would benefit from a comparison against
 * the current uploaded set of constants.
 */
static void
brw_upload_constant_buffer(struct brw_context *brw)
{
   struct gl_context *ctx = &brw->ctx;
   const GLuint sz = brw->curbe.total_size;
   const GLuint bufsz = sz * 16 * sizeof(GLfloat);
   GLfloat *buf;
   GLuint i;
   gl_clip_plane *clip_planes;

   if (sz == 0) {
      brw->curbe.last_bufsz  = 0;
      goto emit;
   }

   buf = brw->curbe.next_buf;

   /* fragment shader constants */
   if (brw->curbe.wm_size) {
      GLuint offset = brw->curbe.wm_start * 16;

      /* copy float constants */
      for (i = 0; i < brw->wm.prog_data->nr_params; i++) {
	 buf[offset + i] = *brw->wm.prog_data->param[i];
      }
   }

   /* clipper constants */
   if (brw->curbe.clip_size) {
      GLuint offset = brw->curbe.clip_start * 16;
      GLuint j;

      /* If any planes are going this way, send them all this way:
       */
      for (i = 0; i < 6; i++) {
	 buf[offset + i * 4 + 0] = fixed_plane[i][0];
	 buf[offset + i * 4 + 1] = fixed_plane[i][1];
	 buf[offset + i * 4 + 2] = fixed_plane[i][2];
	 buf[offset + i * 4 + 3] = fixed_plane[i][3];
      }

      /* Clip planes: _NEW_TRANSFORM plus _NEW_PROJECTION to get to
       * clip-space:
       */
      clip_planes = brw_select_clip_planes(ctx);
      for (j = 0; j < MAX_CLIP_PLANES; j++) {
	 if (ctx->Transform.ClipPlanesEnabled & (1<<j)) {
	    buf[offset + i * 4 + 0] = clip_planes[j][0];
	    buf[offset + i * 4 + 1] = clip_planes[j][1];
	    buf[offset + i * 4 + 2] = clip_planes[j][2];
	    buf[offset + i * 4 + 3] = clip_planes[j][3];
	    i++;
	 }
      }
   }

   /* vertex shader constants */
   if (brw->curbe.vs_size) {
      GLuint offset = brw->curbe.vs_start * 16;

      for (i = 0; i < brw->vs.prog_data->base.nr_params; i++) {
         buf[offset + i] = *brw->vs.prog_data->base.param[i];
      }
   }

   if (0) {
      for (i = 0; i < sz*16; i+=4) 
	 printf("curbe %d.%d: %f %f %f %f\n", i/8, i&4,
		buf[i+0], buf[i+1], buf[i+2], buf[i+3]);

      printf("last_buf %p buf %p sz %d/%d cmp %d\n",
	     brw->curbe.last_buf, buf,
	     bufsz, brw->curbe.last_bufsz,
	     brw->curbe.last_buf ? memcmp(buf, brw->curbe.last_buf, bufsz) : -1);
   }

   if (brw->curbe.curbe_bo != NULL &&
       bufsz == brw->curbe.last_bufsz &&
       memcmp(buf, brw->curbe.last_buf, bufsz) == 0) {
      /* constants have not changed */
   } else {
      /* Update the record of what our last set of constants was.  We
       * don't just flip the pointers because we don't fill in the
       * data in the padding between the entries.
       */
      memcpy(brw->curbe.last_buf, buf, bufsz);
      brw->curbe.last_bufsz = bufsz;

      if (brw->curbe.curbe_bo != NULL &&
	  brw->curbe.curbe_next_offset + bufsz > brw->curbe.curbe_bo->size)
      {
	 drm_intel_gem_bo_unmap_gtt(brw->curbe.curbe_bo);
	 drm_intel_bo_unreference(brw->curbe.curbe_bo);
	 brw->curbe.curbe_bo = NULL;
      }

      if (brw->curbe.curbe_bo == NULL) {
	 /* Allocate a single page for CURBE entries for this batchbuffer.
	  * They're generally around 64b.
	  */
	 brw->curbe.curbe_bo = drm_intel_bo_alloc(brw->bufmgr, "CURBE",
						  4096, 1 << 6);
	 brw->curbe.curbe_next_offset = 0;
	 drm_intel_gem_bo_map_gtt(brw->curbe.curbe_bo);
	 assert(bufsz < 4096);
      }

      brw->curbe.curbe_offset = brw->curbe.curbe_next_offset;
      brw->curbe.curbe_next_offset += bufsz;
      brw->curbe.curbe_next_offset = ALIGN(brw->curbe.curbe_next_offset, 64);

      /* Copy data to the buffer:
       */
      memcpy(brw->curbe.curbe_bo->virtual + brw->curbe.curbe_offset,
	     buf,
	     bufsz);
   }

   /* Because this provokes an action (ie copy the constants into the
    * URB), it shouldn't be shortcircuited if identical to the
    * previous time - because eg. the urb destination may have
    * changed, or the urb contents different to last time.
    *
    * Note that the data referred to is actually copied internally,
    * not just used in place according to passed pointer.
    *
    * It appears that the CS unit takes care of using each available
    * URB entry (Const URB Entry == CURBE) in turn, and issuing
    * flushes as necessary when doublebuffering of CURBEs isn't
    * possible.
    */

emit:
   BEGIN_BATCH(2);
   if (brw->curbe.total_size == 0) {
      OUT_BATCH((CMD_CONST_BUFFER << 16) | (2 - 2));
      OUT_BATCH(0);
   } else {
      OUT_BATCH((CMD_CONST_BUFFER << 16) | (1 << 8) | (2 - 2));
      OUT_RELOC(brw->curbe.curbe_bo,
		I915_GEM_DOMAIN_INSTRUCTION, 0,
		(brw->curbe.total_size - 1) + brw->curbe.curbe_offset);
   }
   ADVANCE_BATCH();
}
示例#18
0
static void
upload_sf_state(struct brw_context *brw)
{
   struct intel_context *intel = &brw->intel;
   struct gl_context *ctx = &intel->ctx;
   uint32_t dw1, dw2, dw3;
   float point_size;
   /* _NEW_BUFFERS */
   bool render_to_fbo = _mesa_is_user_fbo(brw->intel.ctx.DrawBuffer);
   bool multisampled_fbo = ctx->DrawBuffer->Visual.samples > 1;

   dw1 = GEN6_SF_STATISTICS_ENABLE |
         GEN6_SF_VIEWPORT_TRANSFORM_ENABLE;

   /* _NEW_BUFFERS */
   dw1 |= (brw_depthbuffer_format(brw) << GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT);

   /* _NEW_POLYGON */
   if ((ctx->Polygon.FrontFace == GL_CCW) ^ render_to_fbo)
      dw1 |= GEN6_SF_WINDING_CCW;

   if (ctx->Polygon.OffsetFill)
       dw1 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID;

   if (ctx->Polygon.OffsetLine)
       dw1 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME;

   if (ctx->Polygon.OffsetPoint)
       dw1 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT;

   switch (ctx->Polygon.FrontMode) {
   case GL_FILL:
       dw1 |= GEN6_SF_FRONT_SOLID;
       break;

   case GL_LINE:
       dw1 |= GEN6_SF_FRONT_WIREFRAME;
       break;

   case GL_POINT:
       dw1 |= GEN6_SF_FRONT_POINT;
       break;

   default:
       assert(0);
       break;
   }

   switch (ctx->Polygon.BackMode) {
   case GL_FILL:
       dw1 |= GEN6_SF_BACK_SOLID;
       break;

   case GL_LINE:
       dw1 |= GEN6_SF_BACK_WIREFRAME;
       break;

   case GL_POINT:
       dw1 |= GEN6_SF_BACK_POINT;
       break;

   default:
       assert(0);
       break;
   }

   dw2 = 0;

   if (ctx->Polygon.CullFlag) {
      switch (ctx->Polygon.CullFaceMode) {
      case GL_FRONT:
	 dw2 |= GEN6_SF_CULL_FRONT;
	 break;
      case GL_BACK:
	 dw2 |= GEN6_SF_CULL_BACK;
	 break;
      case GL_FRONT_AND_BACK:
	 dw2 |= GEN6_SF_CULL_BOTH;
	 break;
      default:
	 assert(0);
	 break;
      }
   } else {
      dw2 |= GEN6_SF_CULL_NONE;
   }

   /* _NEW_SCISSOR */
   if (ctx->Scissor.Enabled)
      dw2 |= GEN6_SF_SCISSOR_ENABLE;

   /* _NEW_LINE */
   {
      uint32_t line_width_u3_7 = U_FIXED(CLAMP(ctx->Line.Width, 0.0, 7.99), 7);
      /* TODO: line width of 0 is not allowed when MSAA enabled */
      if (line_width_u3_7 == 0)
         line_width_u3_7 = 1;
      dw2 |= line_width_u3_7 << GEN6_SF_LINE_WIDTH_SHIFT;
   }
   if (ctx->Line.SmoothFlag) {
      dw2 |= GEN6_SF_LINE_AA_ENABLE;
      dw2 |= GEN6_SF_LINE_END_CAP_WIDTH_1_0;
   }
   if (ctx->Line.StippleFlag && intel->is_haswell) {
      dw2 |= HSW_SF_LINE_STIPPLE_ENABLE;
   }
   /* _NEW_MULTISAMPLE */
   if (multisampled_fbo && ctx->Multisample.Enabled)
      dw2 |= GEN6_SF_MSRAST_ON_PATTERN;

   /* FINISHME: Last Pixel Enable?  Vertex Sub Pixel Precision Select?
    */

   dw3 = GEN6_SF_LINE_AA_MODE_TRUE;

   /* _NEW_PROGRAM | _NEW_POINT */
   if (!(ctx->VertexProgram.PointSizeEnabled || ctx->Point._Attenuated))
      dw3 |= GEN6_SF_USE_STATE_POINT_WIDTH;

   /* Clamp to ARB_point_parameters user limits */
   point_size = CLAMP(ctx->Point.Size, ctx->Point.MinSize, ctx->Point.MaxSize);

   /* Clamp to the hardware limits and convert to fixed point */
   dw3 |= U_FIXED(CLAMP(point_size, 0.125, 255.875), 3);

   /* _NEW_LIGHT */
   if (ctx->Light.ProvokingVertex != GL_FIRST_VERTEX_CONVENTION) {
      dw3 |=
	 (2 << GEN6_SF_TRI_PROVOKE_SHIFT) |
	 (2 << GEN6_SF_TRIFAN_PROVOKE_SHIFT) |
	 (1 << GEN6_SF_LINE_PROVOKE_SHIFT);
   } else {
      dw3 |= (1 << GEN6_SF_TRIFAN_PROVOKE_SHIFT);
   }

   BEGIN_BATCH(7);
   OUT_BATCH(_3DSTATE_SF << 16 | (7 - 2));
   OUT_BATCH(dw1);
   OUT_BATCH(dw2);
   OUT_BATCH(dw3);
   OUT_BATCH_F(ctx->Polygon.OffsetUnits * 2); /* constant.  copied from gen4 */
   OUT_BATCH_F(ctx->Polygon.OffsetFactor); /* scale */
   OUT_BATCH_F(0.0); /* XXX: global depth offset clamp */
   ADVANCE_BATCH();
}
/* Copy BitBlt
 */
void
intelEmitCopyBlit(struct intel_context *intel,
		  GLuint cpp,
		  GLshort src_pitch,
		  dri_bo *src_buffer,
		  GLuint src_offset,
		  GLboolean src_tiled,
		  GLshort dst_pitch,
		  dri_bo *dst_buffer,
		  GLuint dst_offset,
		  GLboolean dst_tiled,
		  GLshort src_x, GLshort src_y,
		  GLshort dst_x, GLshort dst_y,
		  GLshort w, GLshort h,
		  GLenum logic_op)
{
   GLuint CMD, BR13;
   int dst_y2 = dst_y + h;
   int dst_x2 = dst_x + w;
   int ret;
   BATCH_LOCALS;

   /* do space/cliprects check before going any further */
   intel_batchbuffer_require_space(intel->batch, 8 * 4, NO_LOOP_CLIPRECTS);
 again:
   ret = dri_bufmgr_check_aperture_space(dst_buffer);
   ret |= dri_bufmgr_check_aperture_space(src_buffer);
   if (ret) {
     intel_batchbuffer_flush(intel->batch);
     goto again;
   }

   DBG("%s src:buf(%p)/%d+%d %d,%d dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n",
       __FUNCTION__,
       src_buffer, src_pitch, src_offset, src_x, src_y,
       dst_buffer, dst_pitch, dst_offset, dst_x, dst_y, w, h);

   src_pitch *= cpp;
   dst_pitch *= cpp;

   BR13 = translate_raster_op(logic_op) << 16;

   switch (cpp) {
   case 1:
   case 2:
   case 3:
      BR13 |= (1 << 24);
      CMD = XY_SRC_COPY_BLT_CMD;
      break;
   case 4:
      BR13 |= (1 << 24) | (1 << 25);
      CMD = XY_SRC_COPY_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
      break;
   default:
      return;
   }

#ifndef I915
   if (dst_tiled) {
      CMD |= XY_DST_TILED;
      dst_pitch /= 4;
   }
   if (src_tiled) {
      CMD |= XY_SRC_TILED;
      src_pitch /= 4;
   }
#endif

   if (dst_y2 <= dst_y || dst_x2 <= dst_x) {
      return;
   }

   /* Initial y values don't seem to work with negative pitches.  If
    * we adjust the offsets manually (below), it seems to work fine.
    *
    * On the other hand, if we always adjust, the hardware doesn't
    * know which blit directions to use, so overlapping copypixels get
    * the wrong result.
    */
   if (dst_pitch > 0 && src_pitch > 0) {
      assert(dst_x < dst_x2);
      assert(dst_y < dst_y2);

      BEGIN_BATCH(8, NO_LOOP_CLIPRECTS);
      OUT_BATCH(CMD);
      OUT_BATCH(BR13 | dst_pitch);
      OUT_BATCH((dst_y << 16) | dst_x);
      OUT_BATCH((dst_y2 << 16) | dst_x2);
      OUT_RELOC(dst_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE,
		dst_offset);
      OUT_BATCH((src_y << 16) | src_x);
      OUT_BATCH(src_pitch);
      OUT_RELOC(src_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
		src_offset);
      ADVANCE_BATCH();
   }
   else {
      assert(dst_x < dst_x2);
      assert(h > 0);

      BEGIN_BATCH(8, NO_LOOP_CLIPRECTS);
      OUT_BATCH(CMD);
      OUT_BATCH(BR13 | ((uint16_t)dst_pitch));
      OUT_BATCH((0 << 16) | dst_x);
      OUT_BATCH((h << 16) | dst_x2);
      OUT_RELOC(dst_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE,
		dst_offset + dst_y * dst_pitch);
      OUT_BATCH((0 << 16) | src_x);
      OUT_BATCH(src_pitch);
      OUT_RELOC(src_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
		src_offset + src_y * src_pitch);
      ADVANCE_BATCH();
   }
}
示例#20
0
static void
upload_sbe_state(struct brw_context *brw)
{
   struct intel_context *intel = &brw->intel;
   struct gl_context *ctx = &intel->ctx;
   /* BRW_NEW_FRAGMENT_PROGRAM */
   uint32_t num_outputs = _mesa_bitcount_64(brw->fragment_program->Base.InputsRead);
   /* _NEW_LIGHT */
   bool shade_model_flat = ctx->Light.ShadeModel == GL_FLAT;
   uint32_t dw1, dw10, dw11;
   int i;
   int attr = 0, input_index = 0;
   int urb_entry_read_offset = 1;
   uint16_t attr_overrides[VARYING_SLOT_MAX];
   /* _NEW_BUFFERS */
   bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
   uint32_t point_sprite_origin;

   /* FINISHME: Attribute Swizzle Control Mode? */
   dw1 = GEN7_SBE_SWIZZLE_ENABLE | num_outputs << GEN7_SBE_NUM_OUTPUTS_SHIFT;

   /* _NEW_POINT
    *
    * Window coordinates in an FBO are inverted, which means point
    * sprite origin must be inverted.
    */
   if ((ctx->Point.SpriteOrigin == GL_LOWER_LEFT) != render_to_fbo) {
      point_sprite_origin = GEN6_SF_POINT_SPRITE_LOWERLEFT;
   } else {
      point_sprite_origin = GEN6_SF_POINT_SPRITE_UPPERLEFT;
   }
   dw1 |= point_sprite_origin;


   dw10 = 0;
   dw11 = 0;

   /* Create the mapping from the FS inputs we produce to the VS outputs
    * they source from.
    */
   uint32_t max_source_attr = 0;
   for (; attr < VARYING_SLOT_MAX; attr++) {
      enum glsl_interp_qualifier interp_qualifier =
         brw->fragment_program->InterpQualifier[attr];
      bool is_gl_Color = attr == VARYING_SLOT_COL0 || attr == VARYING_SLOT_COL1;

      if (!(brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(attr)))
	 continue;

      if (ctx->Point.PointSprite &&
	  attr >= VARYING_SLOT_TEX0 && attr <= VARYING_SLOT_TEX7 &&
	  ctx->Point.CoordReplace[attr - VARYING_SLOT_TEX0]) {
	 dw10 |= (1 << input_index);
      }

      if (attr == VARYING_SLOT_PNTC)
	 dw10 |= (1 << input_index);

      /* flat shading */
      if (interp_qualifier == INTERP_QUALIFIER_FLAT ||
          (shade_model_flat && is_gl_Color &&
           interp_qualifier == INTERP_QUALIFIER_NONE))
         dw11 |= (1 << input_index);

      /* The hardware can only do the overrides on 16 overrides at a
       * time, and the other up to 16 have to be lined up so that the
       * input index = the output index.  We'll need to do some
       * tweaking to make sure that's the case.
       */
      assert(input_index < 16 || attr == input_index);

      /* BRW_NEW_VUE_MAP_GEOM_OUT | _NEW_LIGHT | _NEW_PROGRAM */
      attr_overrides[input_index++] =
         get_attr_override(&brw->vue_map_geom_out,
			   urb_entry_read_offset, attr,
                           ctx->VertexProgram._TwoSideEnabled,
                           &max_source_attr);
   }

   /* From the Ivy Bridge PRM, Volume 2, Part 1, documentation for
    * 3DSTATE_SBE DWord 1 bits 15:11, "Vertex URB Entry Read Length":
    *
    * "This field should be set to the minimum length required to read the
    *  maximum source attribute.  The maximum source attribute is indicated
    *  by the maximum value of the enabled Attribute # Source Attribute if
    *  Attribute Swizzle Enable is set, Number of Output Attributes-1 if
    *  enable is not set.
    *
    *  read_length = ceiling((max_source_attr + 1) / 2)"
    */
   uint32_t urb_entry_read_length = ALIGN(max_source_attr + 1, 2) / 2;
   dw1 |= urb_entry_read_length << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT |
          urb_entry_read_offset << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT;

   for (; input_index < VARYING_SLOT_MAX; input_index++)
      attr_overrides[input_index] = 0;

   BEGIN_BATCH(14);
   OUT_BATCH(_3DSTATE_SBE << 16 | (14 - 2));
   OUT_BATCH(dw1);

   /* Output dwords 2 through 9 */
   for (i = 0; i < 8; i++) {
      OUT_BATCH(attr_overrides[i * 2] | attr_overrides[i * 2 + 1] << 16);
   }

   OUT_BATCH(dw10); /* point sprite texcoord bitmask */
   OUT_BATCH(dw11); /* constant interp bitmask */
   OUT_BATCH(0); /* wrapshortest enables 0-7 */
   OUT_BATCH(0); /* wrapshortest enables 8-15 */
   ADVANCE_BATCH();
}
/**
 * Copy the back color buffer to the front color buffer. 
 * Used for SwapBuffers().
 */
void
intelCopyBuffer(const __DRIdrawablePrivate * dPriv,
                const drm_clip_rect_t * rect)
{

   struct intel_context *intel;
   const intelScreenPrivate *intelScreen;
   int ret;

   DBG("%s\n", __FUNCTION__);

   assert(dPriv);

   intel = intelScreenContext(dPriv->driScreenPriv->private);
   if (!intel)
      return;

   intelScreen = intel->intelScreen;

   if (intel->last_swap_fence) {
      dri_fence_wait(intel->last_swap_fence);
      dri_fence_unreference(intel->last_swap_fence);
      intel->last_swap_fence = NULL;
   }
   intel->last_swap_fence = intel->first_swap_fence;
   intel->first_swap_fence = NULL;

   /* The LOCK_HARDWARE is required for the cliprects.  Buffer offsets
    * should work regardless.
    */
   LOCK_HARDWARE(intel);

   if (dPriv && dPriv->numClipRects) {
      struct intel_framebuffer *intel_fb = dPriv->driverPrivate;
      struct intel_region *src, *dst;
      int nbox = dPriv->numClipRects;
      drm_clip_rect_t *pbox = dPriv->pClipRects;
      int cpp;
      int src_pitch, dst_pitch;
      unsigned short src_x, src_y;
      int BR13, CMD;
      int i;

      src = intel_get_rb_region(&intel_fb->Base, BUFFER_BACK_LEFT);
      dst = intel_get_rb_region(&intel_fb->Base, BUFFER_FRONT_LEFT);

      src_pitch = src->pitch * src->cpp;
      dst_pitch = dst->pitch * dst->cpp;

      cpp = src->cpp;

      ASSERT(intel_fb);
      ASSERT(intel_fb->Base.Name == 0);    /* Not a user-created FBO */
      ASSERT(src);
      ASSERT(dst);
      ASSERT(src->cpp == dst->cpp);

      if (cpp == 2) {
	 BR13 = (0xCC << 16) | (1 << 24);
	 CMD = XY_SRC_COPY_BLT_CMD;
      }
      else {
	 BR13 = (0xCC << 16) | (1 << 24) | (1 << 25);
	 CMD = XY_SRC_COPY_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
      }

#ifndef I915
      if (src->tiled) {
	 CMD |= XY_SRC_TILED;
	 src_pitch /= 4;
      }
      if (dst->tiled) {
	 CMD |= XY_DST_TILED;
	 dst_pitch /= 4;
      }
#endif
      /* do space/cliprects check before going any further */
      intel_batchbuffer_require_space(intel->batch, 8 * 4, REFERENCES_CLIPRECTS);
   again:
      ret = dri_bufmgr_check_aperture_space(dst->buffer);
      ret |= dri_bufmgr_check_aperture_space(src->buffer);
      
      if (ret) {
	intel_batchbuffer_flush(intel->batch);
	goto again;
      }
      
      for (i = 0; i < nbox; i++, pbox++) {
	 drm_clip_rect_t box = *pbox;

	 if (rect) {
	    if (!intel_intersect_cliprects(&box, &box, rect))
	       continue;
	 }

	 if (box.x1 >= box.x2 ||
	     box.y1 >= box.y2)
	    continue;

	 assert(box.x1 < box.x2);
	 assert(box.y1 < box.y2);
	 src_x = box.x1 - dPriv->x + dPriv->backX;
	 src_y = box.y1 - dPriv->y + dPriv->backY;

	 BEGIN_BATCH(8, REFERENCES_CLIPRECTS);
	 OUT_BATCH(CMD);
	 OUT_BATCH(BR13 | dst_pitch);
	 OUT_BATCH((box.y1 << 16) | box.x1);
	 OUT_BATCH((box.y2 << 16) | box.x2);

	 OUT_RELOC(dst->buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, 0);
	 OUT_BATCH((src_y << 16) | src_x);
	 OUT_BATCH(src_pitch);
	 OUT_RELOC(src->buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, 0);
	 ADVANCE_BATCH();
      }

      if (intel->first_swap_fence)
	 dri_fence_unreference(intel->first_swap_fence);
      intel_batchbuffer_flush(intel->batch);
      intel->first_swap_fence = intel->batch->last_fence;
      if (intel->first_swap_fence)
	 dri_fence_reference(intel->first_swap_fence);
   }

   UNLOCK_HARDWARE(intel);
}
示例#22
0
static void
gen8_emit_vertices(struct brw_context *brw)
{
   struct gl_context *ctx = &brw->ctx;
   bool uses_edge_flag;

   brw_prepare_vertices(brw);
   brw_prepare_shader_draw_parameters(brw);

   uses_edge_flag = (ctx->Polygon.FrontMode != GL_FILL ||
                     ctx->Polygon.BackMode != GL_FILL);

   if (brw->vs.prog_data->uses_vertexid || brw->vs.prog_data->uses_instanceid) {
      unsigned vue = brw->vb.nr_enabled;

      /* The element for the edge flags must always be last, so we have to
       * insert the SGVS before it in that case.
       */
      if (uses_edge_flag) {
         assert(vue > 0);
         vue--;
      }

      WARN_ONCE(vue >= 33,
                "Trying to insert VID/IID past 33rd vertex element, "
                "need to reorder the vertex attrbutes.");

      unsigned dw1 = 0;
      if (brw->vs.prog_data->uses_vertexid) {
         dw1 |= GEN8_SGVS_ENABLE_VERTEX_ID |
                (2 << GEN8_SGVS_VERTEX_ID_COMPONENT_SHIFT) |  /* .z channel */
                (vue << GEN8_SGVS_VERTEX_ID_ELEMENT_OFFSET_SHIFT);
      }

      if (brw->vs.prog_data->uses_instanceid) {
         dw1 |= GEN8_SGVS_ENABLE_INSTANCE_ID |
                (3 << GEN8_SGVS_INSTANCE_ID_COMPONENT_SHIFT) | /* .w channel */
                (vue << GEN8_SGVS_INSTANCE_ID_ELEMENT_OFFSET_SHIFT);
      }

      BEGIN_BATCH(2);
      OUT_BATCH(_3DSTATE_VF_SGVS << 16 | (2 - 2));
      OUT_BATCH(dw1);
      ADVANCE_BATCH();

      BEGIN_BATCH(3);
      OUT_BATCH(_3DSTATE_VF_INSTANCING << 16 | (3 - 2));
      OUT_BATCH(vue | GEN8_VF_INSTANCING_ENABLE);
      OUT_BATCH(0);
      ADVANCE_BATCH();
   } else {
      BEGIN_BATCH(2);
      OUT_BATCH(_3DSTATE_VF_SGVS << 16 | (2 - 2));
      OUT_BATCH(0);
      ADVANCE_BATCH();
   }

   /* If the VS doesn't read any inputs (calculating vertex position from
    * a state variable for some reason, for example), emit a single pad
    * VERTEX_ELEMENT struct and bail.
    *
    * The stale VB state stays in place, but they don't do anything unless
    * a VE loads from them.
    */
   if (brw->vb.nr_enabled == 0) {
      BEGIN_BATCH(3);
      OUT_BATCH((_3DSTATE_VERTEX_ELEMENTS << 16) | (3 - 2));
      OUT_BATCH((0 << GEN6_VE0_INDEX_SHIFT) |
                GEN6_VE0_VALID |
                (BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) |
                (0 << BRW_VE0_SRC_OFFSET_SHIFT));
      OUT_BATCH((BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT) |
                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
                (BRW_VE1_COMPONENT_STORE_1_FLT << BRW_VE1_COMPONENT_3_SHIFT));
      ADVANCE_BATCH();
      return;
   }

   /* Now emit 3DSTATE_VERTEX_BUFFERS and 3DSTATE_VERTEX_ELEMENTS packets. */
   const bool uses_draw_params =
      brw->vs.prog_data->uses_basevertex ||
      brw->vs.prog_data->uses_baseinstance;
   const unsigned nr_buffers = brw->vb.nr_buffers +
      uses_draw_params + brw->vs.prog_data->uses_drawid;

   if (nr_buffers) {
      assert(nr_buffers <= 33);

      BEGIN_BATCH(1 + 4 * nr_buffers);
      OUT_BATCH((_3DSTATE_VERTEX_BUFFERS << 16) | (4 * nr_buffers - 1));
      for (unsigned i = 0; i < brw->vb.nr_buffers; i++) {
         const struct brw_vertex_buffer *buffer = &brw->vb.buffers[i];
         EMIT_VERTEX_BUFFER_STATE(brw, i, buffer->bo,
                                  buffer->offset,
                                  buffer->offset + buffer->size,
                                  buffer->stride, 0 /* unused */);
      }

      if (uses_draw_params) {
         EMIT_VERTEX_BUFFER_STATE(brw, brw->vb.nr_buffers,
                                  brw->draw.draw_params_bo,
                                  brw->draw.draw_params_offset,
                                  brw->draw.draw_params_bo->size,
                                  0 /* stride */,
                                  0 /* unused */);
      }

      if (brw->vs.prog_data->uses_drawid) {
         EMIT_VERTEX_BUFFER_STATE(brw, brw->vb.nr_buffers + 1,
                                  brw->draw.draw_id_bo,
                                  brw->draw.draw_id_offset,
                                  brw->draw.draw_id_bo->size,
                                  0 /* stride */,
                                  0 /* unused */);
      }
      ADVANCE_BATCH();
   }

   /* Normally we don't need an element for the SGVS attribute because the
    * 3DSTATE_VF_SGVS instruction lets you store the generated attribute in an
    * element that is past the list in 3DSTATE_VERTEX_ELEMENTS. However if
    * we're using draw parameters then we need an element for the those
    * values.  Additionally if there is an edge flag element then the SGVS
    * can't be inserted past that so we need a dummy element to ensure that
    * the edge flag is the last one.
    */
   const bool needs_sgvs_element = (brw->vs.prog_data->uses_basevertex ||
                                    brw->vs.prog_data->uses_baseinstance ||
                                    ((brw->vs.prog_data->uses_instanceid ||
                                      brw->vs.prog_data->uses_vertexid) &&
                                     uses_edge_flag));
   const unsigned nr_elements =
      brw->vb.nr_enabled + needs_sgvs_element + brw->vs.prog_data->uses_drawid;

   /* The hardware allows one more VERTEX_ELEMENTS than VERTEX_BUFFERS,
    * presumably for VertexID/InstanceID.
    */
   assert(nr_elements <= 34);

   struct brw_vertex_element *gen6_edgeflag_input = NULL;

   BEGIN_BATCH(1 + nr_elements * 2);
   OUT_BATCH((_3DSTATE_VERTEX_ELEMENTS << 16) | (2 * nr_elements - 1));
   for (unsigned i = 0; i < brw->vb.nr_enabled; i++) {
      struct brw_vertex_element *input = brw->vb.enabled[i];
      uint32_t format = brw_get_vertex_surface_type(brw, input->glarray);
      uint32_t comp0 = BRW_VE1_COMPONENT_STORE_SRC;
      uint32_t comp1 = BRW_VE1_COMPONENT_STORE_SRC;
      uint32_t comp2 = BRW_VE1_COMPONENT_STORE_SRC;
      uint32_t comp3 = BRW_VE1_COMPONENT_STORE_SRC;

      /* From the BDW PRM, Volume 2d, page 588 (VERTEX_ELEMENT_STATE):
       * "Any SourceElementFormat of *64*_PASSTHRU cannot be used with an
       * element which has edge flag enabled."
       */
      assert(!(is_passthru_format(format) && uses_edge_flag));

      /* The gen4 driver expects edgeflag to come in as a float, and passes
       * that float on to the tests in the clipper.  Mesa's current vertex
       * attribute value for EdgeFlag is stored as a float, which works out.
       * glEdgeFlagPointer, on the other hand, gives us an unnormalized
       * integer ubyte.  Just rewrite that to convert to a float.
       */
      if (input == &brw->vb.inputs[VERT_ATTRIB_EDGEFLAG]) {
         /* Gen6+ passes edgeflag as sideband along with the vertex, instead
          * of in the VUE.  We have to upload it sideband as the last vertex
          * element according to the B-Spec.
          */
         gen6_edgeflag_input = input;
         continue;
      }

      switch (input->glarray->Size) {
      case 0: comp0 = BRW_VE1_COMPONENT_STORE_0;
      case 1: comp1 = BRW_VE1_COMPONENT_STORE_0;
      case 2: comp2 = BRW_VE1_COMPONENT_STORE_0;
      case 3: comp3 = input->glarray->Integer ? BRW_VE1_COMPONENT_STORE_1_INT
                                              : BRW_VE1_COMPONENT_STORE_1_FLT;
         break;
      }

      /* From the BDW PRM, Volume 2d, page 586 (VERTEX_ELEMENT_STATE):
       *
       *     "When SourceElementFormat is set to one of the *64*_PASSTHRU
       *     formats, 64-bit components are stored in the URB without any
       *     conversion. In this case, vertex elements must be written as 128
       *     or 256 bits, with VFCOMP_STORE_0 being used to pad the output
       *     as required. E.g., if R64_PASSTHRU is used to copy a 64-bit Red
       *     component into the URB, Component 1 must be specified as
       *     VFCOMP_STORE_0 (with Components 2,3 set to VFCOMP_NOSTORE)
       *     in order to output a 128-bit vertex element, or Components 1-3 must
       *     be specified as VFCOMP_STORE_0 in order to output a 256-bit vertex
       *     element. Likewise, use of R64G64B64_PASSTHRU requires Component 3
       *     to be specified as VFCOMP_STORE_0 in order to output a 256-bit vertex
       *     element."
       */
      if (input->glarray->Doubles) {
         switch (input->glarray->Size) {
         case 0:
         case 1:
         case 2:
            /*  Use 128-bits instead of 256-bits to write double and dvec2
             *  vertex elements.
             */
            comp2 = BRW_VE1_COMPONENT_NOSTORE;
            comp3 = BRW_VE1_COMPONENT_NOSTORE;
            break;
         case 3:
            /* Pad the output using VFCOMP_STORE_0 as suggested
             * by the BDW PRM.
             */
            comp3 = BRW_VE1_COMPONENT_STORE_0;
            break;
         }
      }

      OUT_BATCH((input->buffer << GEN6_VE0_INDEX_SHIFT) |
                GEN6_VE0_VALID |
                (format << BRW_VE0_FORMAT_SHIFT) |
                (input->offset << BRW_VE0_SRC_OFFSET_SHIFT));

      OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) |
                (comp1 << BRW_VE1_COMPONENT_1_SHIFT) |
                (comp2 << BRW_VE1_COMPONENT_2_SHIFT) |
                (comp3 << BRW_VE1_COMPONENT_3_SHIFT));
   }

   if (needs_sgvs_element) {
      if (brw->vs.prog_data->uses_basevertex ||
          brw->vs.prog_data->uses_baseinstance) {
         OUT_BATCH(GEN6_VE0_VALID |
                   brw->vb.nr_buffers << GEN6_VE0_INDEX_SHIFT |
                   BRW_SURFACEFORMAT_R32G32_UINT << BRW_VE0_FORMAT_SHIFT);
         OUT_BATCH((BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) |
                   (BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_1_SHIFT) |
                   (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
                   (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT));
      } else {
         OUT_BATCH(GEN6_VE0_VALID);
         OUT_BATCH((BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT) |
                   (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
                   (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
                   (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT));
      }
   }

   if (brw->vs.prog_data->uses_drawid) {
      OUT_BATCH(GEN6_VE0_VALID |
                ((brw->vb.nr_buffers + 1) << GEN6_VE0_INDEX_SHIFT) |
                (BRW_SURFACEFORMAT_R32_UINT << BRW_VE0_FORMAT_SHIFT));
      OUT_BATCH((BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) |
                   (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
                   (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
                   (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT));
   }

   if (gen6_edgeflag_input) {
      uint32_t format =
         brw_get_vertex_surface_type(brw, gen6_edgeflag_input->glarray);

      OUT_BATCH((gen6_edgeflag_input->buffer << GEN6_VE0_INDEX_SHIFT) |
                GEN6_VE0_VALID |
                GEN6_VE0_EDGE_FLAG_ENABLE |
                (format << BRW_VE0_FORMAT_SHIFT) |
                (gen6_edgeflag_input->offset << BRW_VE0_SRC_OFFSET_SHIFT));
      OUT_BATCH((BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) |
                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT));
   }
   ADVANCE_BATCH();

   for (unsigned i = 0, j = 0; i < brw->vb.nr_enabled; i++) {
      const struct brw_vertex_element *input = brw->vb.enabled[i];
      const struct brw_vertex_buffer *buffer = &brw->vb.buffers[input->buffer];
      unsigned element_index;

      /* The edge flag element is reordered to be the last one in the code
       * above so we need to compensate for that in the element indices used
       * below.
       */
      if (input == gen6_edgeflag_input)
         element_index = nr_elements - 1;
      else
         element_index = j++;

      BEGIN_BATCH(3);
      OUT_BATCH(_3DSTATE_VF_INSTANCING << 16 | (3 - 2));
      OUT_BATCH(element_index |
                (buffer->step_rate ? GEN8_VF_INSTANCING_ENABLE : 0));
      OUT_BATCH(buffer->step_rate);
      ADVANCE_BATCH();
   }

   if (brw->vs.prog_data->uses_drawid) {
      const unsigned element = brw->vb.nr_enabled + needs_sgvs_element;
      BEGIN_BATCH(3);
      OUT_BATCH(_3DSTATE_VF_INSTANCING << 16 | (3 - 2));
      OUT_BATCH(element);
      OUT_BATCH(0);
      ADVANCE_BATCH();
   }
}
示例#23
0
static void
upload_wm_state(struct brw_context *brw)
{
   struct gl_context *ctx = &brw->ctx;
   const struct brw_fragment_program *fp =
      brw_fragment_program_const(brw->fragment_program);
   uint32_t dw2, dw4, dw5, dw6;

   /* _NEW_BUFFERS */
   bool multisampled_fbo = ctx->DrawBuffer->Visual.samples > 1;

    /* CACHE_NEW_WM_PROG */
   if (brw->wm.prog_data->nr_params == 0) {
      /* Disable the push constant buffers. */
      BEGIN_BATCH(5);
      OUT_BATCH(_3DSTATE_CONSTANT_PS << 16 | (5 - 2));
      OUT_BATCH(0);
      OUT_BATCH(0);
      OUT_BATCH(0);
      OUT_BATCH(0);
      ADVANCE_BATCH();
   } else {
      BEGIN_BATCH(5);
      OUT_BATCH(_3DSTATE_CONSTANT_PS << 16 |
		GEN6_CONSTANT_BUFFER_0_ENABLE |
		(5 - 2));
      /* Pointer to the WM constant buffer.  Covered by the set of
       * state flags from gen6_upload_wm_push_constants.
       */
      OUT_BATCH(brw->wm.push_const_offset +
		ALIGN(brw->wm.prog_data->nr_params,
		      brw->wm.prog_data->dispatch_width) / 8 - 1);
      OUT_BATCH(0);
      OUT_BATCH(0);
      OUT_BATCH(0);
      ADVANCE_BATCH();
   }

   dw2 = dw4 = dw5 = dw6 = 0;
   dw4 |= GEN6_WM_STATISTICS_ENABLE;
   dw5 |= GEN6_WM_LINE_AA_WIDTH_1_0;
   dw5 |= GEN6_WM_LINE_END_CAP_AA_WIDTH_0_5;

   /* Use ALT floating point mode for ARB fragment programs, because they
    * require 0^0 == 1.  Even though _CurrentFragmentProgram is used for
    * rendering, CurrentFragmentProgram is used for this check to
    * differentiate between the GLSL and non-GLSL cases.
    */
   if (ctx->Shader.CurrentFragmentProgram == NULL)
      dw2 |= GEN6_WM_FLOATING_POINT_MODE_ALT;

   /* CACHE_NEW_SAMPLER */
   dw2 |= (ALIGN(brw->sampler.count, 4) / 4) << GEN6_WM_SAMPLER_COUNT_SHIFT;
   dw4 |= (brw->wm.prog_data->first_curbe_grf <<
	   GEN6_WM_DISPATCH_START_GRF_SHIFT_0);
   dw4 |= (brw->wm.prog_data->first_curbe_grf_16 <<
	   GEN6_WM_DISPATCH_START_GRF_SHIFT_2);

   dw5 |= (brw->max_wm_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT;

   /* CACHE_NEW_WM_PROG */
   dw5 |= GEN6_WM_8_DISPATCH_ENABLE;
   if (brw->wm.prog_data->prog_offset_16)
      dw5 |= GEN6_WM_16_DISPATCH_ENABLE;

   /* CACHE_NEW_WM_PROG | _NEW_COLOR */
   if (brw->wm.prog_data->dual_src_blend &&
       (ctx->Color.BlendEnabled & 1) &&
       ctx->Color.Blend[0]._UsesDualSrc) {
      dw5 |= GEN6_WM_DUAL_SOURCE_BLEND_ENABLE;
   }

   /* _NEW_LINE */
   if (ctx->Line.StippleFlag)
      dw5 |= GEN6_WM_LINE_STIPPLE_ENABLE;

   /* _NEW_POLYGON */
   if (ctx->Polygon.StippleFlag)
      dw5 |= GEN6_WM_POLYGON_STIPPLE_ENABLE;

   /* BRW_NEW_FRAGMENT_PROGRAM */
   if (fp->program.Base.InputsRead & VARYING_BIT_POS)
      dw5 |= GEN6_WM_USES_SOURCE_DEPTH | GEN6_WM_USES_SOURCE_W;
   if (fp->program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH))
      dw5 |= GEN6_WM_COMPUTED_DEPTH;
   /* CACHE_NEW_WM_PROG */
   dw6 |= brw->wm.prog_data->barycentric_interp_modes <<
      GEN6_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT;

   /* _NEW_COLOR, _NEW_MULTISAMPLE */
   if (fp->program.UsesKill || ctx->Color.AlphaEnabled ||
       ctx->Multisample.SampleAlphaToCoverage)
      dw5 |= GEN6_WM_KILL_ENABLE;

   if (brw_color_buffer_write_enabled(brw) ||
       dw5 & (GEN6_WM_KILL_ENABLE | GEN6_WM_COMPUTED_DEPTH)) {
      dw5 |= GEN6_WM_DISPATCH_ENABLE;
   }

   dw6 |= _mesa_bitcount_64(brw->fragment_program->Base.InputsRead) <<
      GEN6_WM_NUM_SF_OUTPUTS_SHIFT;
   if (multisampled_fbo) {
      /* _NEW_MULTISAMPLE */
      if (ctx->Multisample.Enabled)
         dw6 |= GEN6_WM_MSRAST_ON_PATTERN;
      else
         dw6 |= GEN6_WM_MSRAST_OFF_PIXEL;
      dw6 |= GEN6_WM_MSDISPMODE_PERPIXEL;
   } else {
      dw6 |= GEN6_WM_MSRAST_OFF_PIXEL;
      dw6 |= GEN6_WM_MSDISPMODE_PERSAMPLE;
   }

   BEGIN_BATCH(9);
   OUT_BATCH(_3DSTATE_WM << 16 | (9 - 2));
   OUT_BATCH(brw->wm.prog_offset);
   OUT_BATCH(dw2);
   if (brw->wm.prog_data->total_scratch) {
      OUT_RELOC(brw->wm.scratch_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
		ffs(brw->wm.prog_data->total_scratch) - 11);
   } else {
      OUT_BATCH(0);
   }
   OUT_BATCH(dw4);
   OUT_BATCH(dw5);
   OUT_BATCH(dw6);
   OUT_BATCH(0); /* kernel 1 pointer */
   /* kernel 2 pointer */
   OUT_BATCH(brw->wm.prog_offset + brw->wm.prog_data->prog_offset_16);
   ADVANCE_BATCH();
}
示例#24
0
static void
gen8_upload_gs_state(struct brw_context *brw)
{
   struct gl_context *ctx = &brw->ctx;
   const struct brw_stage_state *stage_state = &brw->gs.base;
   /* BRW_NEW_GEOMETRY_PROGRAM */
   bool active = brw->geometry_program;
   /* CACHE_NEW_GS_PROG */
   const struct brw_vec4_prog_data *prog_data = &brw->gs.prog_data->base;

   if (active) {
      int urb_entry_write_offset = 1;
      uint32_t urb_entry_output_length =
         ((prog_data->vue_map.num_slots + 1) / 2 - urb_entry_write_offset);

      if (urb_entry_output_length == 0)
         urb_entry_output_length = 1;

      BEGIN_BATCH(10);
      OUT_BATCH(_3DSTATE_GS << 16 | (10 - 2));
      OUT_BATCH(stage_state->prog_offset);
      OUT_BATCH(0);
      OUT_BATCH(GEN6_GS_VECTOR_MASK_ENABLE |
                brw->geometry_program->VerticesIn |
                ((ALIGN(stage_state->sampler_count, 4)/4) <<
                 GEN6_GS_SAMPLER_COUNT_SHIFT) |
                ((prog_data->base.binding_table.size_bytes / 4) <<
                 GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT));

      if (brw->gs.prog_data->base.total_scratch) {
         OUT_RELOC64(stage_state->scratch_bo,
                     I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
                     ffs(brw->gs.prog_data->base.total_scratch) - 11);
         WARN_ONCE(true,
                   "May need to implement a temporary workaround: GS Number of "
                   "URB Entries must be less than or equal to the GS Maximum "
                   "Number of Threads.\n");
      } else {
         OUT_BATCH(0);
         OUT_BATCH(0);
      }

      /* DW6 */
      OUT_BATCH(((brw->gs.prog_data->output_vertex_size_hwords * 2 - 1) <<
                 GEN7_GS_OUTPUT_VERTEX_SIZE_SHIFT) |
                (brw->gs.prog_data->output_topology <<
                 GEN7_GS_OUTPUT_TOPOLOGY_SHIFT) |
                (prog_data->urb_read_length <<
                 GEN6_GS_URB_READ_LENGTH_SHIFT) |
                (0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT) |
                (prog_data->dispatch_grf_start_reg <<
                 GEN6_GS_DISPATCH_START_GRF_SHIFT));

      /* DW7 */
      OUT_BATCH(((brw->max_gs_threads / 2 - 1) << HSW_GS_MAX_THREADS_SHIFT) |
                (brw->gs.prog_data->control_data_header_size_hwords <<
                 GEN7_GS_CONTROL_DATA_HEADER_SIZE_SHIFT) |
                (brw->gs.prog_data->dual_instanced_dispatch ?
                 GEN7_GS_DISPATCH_MODE_DUAL_INSTANCE :
                 GEN7_GS_DISPATCH_MODE_DUAL_OBJECT) |
                GEN6_GS_STATISTICS_ENABLE |
                (brw->gs.prog_data->include_primitive_id ?
                 GEN7_GS_INCLUDE_PRIMITIVE_ID : 0) |
                GEN7_GS_REORDER_TRAILING |
                GEN7_GS_ENABLE);

      /* DW8 */
      OUT_BATCH(brw->gs.prog_data->control_data_format <<
                HSW_GS_CONTROL_DATA_FORMAT_SHIFT);

      /* DW9 / _NEW_TRANSFORM */
      OUT_BATCH((ctx->Transform.ClipPlanesEnabled <<
                 GEN8_GS_USER_CLIP_DISTANCE_SHIFT) |
                (urb_entry_output_length << GEN8_GS_URB_OUTPUT_LENGTH_SHIFT) |
                (urb_entry_write_offset <<
                 GEN8_GS_URB_ENTRY_OUTPUT_OFFSET_SHIFT));
      ADVANCE_BATCH();
   } else {
      BEGIN_BATCH(10);
      OUT_BATCH(_3DSTATE_GS << 16 | (10 - 2));
      OUT_BATCH(0); /* prog_bo */
      OUT_BATCH(0);
      OUT_BATCH(0);
      OUT_BATCH(0); /* scratch space base offset */
      OUT_BATCH(0);
      OUT_BATCH(0);
      OUT_BATCH(GEN6_GS_STATISTICS_ENABLE);
      OUT_BATCH(0);
      OUT_BATCH(0);
      ADVANCE_BATCH();
   }
}
static void
gen7_blorp_emit_depth_stencil_config(struct brw_context *brw,
                                     const brw_blorp_params *params)
{
   const uint8_t mocs = GEN7_MOCS_L3;
   uint32_t surfwidth, surfheight;
   uint32_t surftype;
   unsigned int depth = MAX2(params->depth.mt->logical_depth0, 1);
   unsigned int min_array_element;
   GLenum gl_target = params->depth.mt->target;
   unsigned int lod;

   switch (gl_target) {
   case GL_TEXTURE_CUBE_MAP_ARRAY:
   case GL_TEXTURE_CUBE_MAP:
      /* The PRM claims that we should use BRW_SURFACE_CUBE for this
       * situation, but experiments show that gl_Layer doesn't work when we do
       * this.  So we use BRW_SURFACE_2D, since for rendering purposes this is
       * equivalent.
       */
      surftype = BRW_SURFACE_2D;
      depth *= 6;
      break;
   default:
      surftype = translate_tex_target(gl_target);
      break;
   }

   min_array_element = params->depth.layer;
   if (params->depth.mt->num_samples > 1) {
      /* Convert physical layer to logical layer. */
      min_array_element /= params->depth.mt->num_samples;
   }

   lod = params->depth.level - params->depth.mt->first_level;

   if (params->hiz_op != GEN6_HIZ_OP_NONE && lod == 0) {
      /* HIZ ops for lod 0 may set the width & height a little
       * larger to allow the fast depth clear to fit the hardware
       * alignment requirements. (8x4)
       */
      surfwidth = params->depth.width;
      surfheight = params->depth.height;
   } else {
      surfwidth = params->depth.mt->logical_width0;
      surfheight = params->depth.mt->logical_height0;
   }

   /* 3DSTATE_DEPTH_BUFFER */
   {
      brw_emit_depth_stall_flushes(brw);

      BEGIN_BATCH(7);
      OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2));
      OUT_BATCH((params->depth.mt->pitch - 1) |
                params->depth_format << 18 |
                1 << 22 | /* hiz enable */
                1 << 28 | /* depth write */
                surftype << 29);
      OUT_RELOC(params->depth.mt->bo,
                I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
                0);
      OUT_BATCH((surfwidth - 1) << 4 |
                (surfheight - 1) << 18 |
                lod);
      OUT_BATCH(((depth - 1) << 21) |
                (min_array_element << 10) |
                mocs);
      OUT_BATCH(0);
      OUT_BATCH((depth - 1) << 21);
      ADVANCE_BATCH();
   }

   /* 3DSTATE_HIER_DEPTH_BUFFER */
   {
      struct intel_miptree_aux_buffer *hiz_buf = params->depth.mt->hiz_buf;

      BEGIN_BATCH(3);
      OUT_BATCH((GEN7_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2));
      OUT_BATCH((mocs << 25) |
                (hiz_buf->pitch - 1));
      OUT_RELOC(hiz_buf->bo,
                I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
                0);
      ADVANCE_BATCH();
   }

   /* 3DSTATE_STENCIL_BUFFER */
   {
      BEGIN_BATCH(3);
      OUT_BATCH((GEN7_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2));
      OUT_BATCH(0);
      OUT_BATCH(0);
      ADVANCE_BATCH();
   }
}
/**
 * Define the base addresses which some state is referenced from.
 *
 * This allows us to avoid having to emit relocations for the objects,
 * and is actually required for binding table pointers on gen6.
 *
 * Surface state base address covers binding table pointers and
 * surface state objects, but not the surfaces that the surface state
 * objects point to.
 */
static void upload_state_base_address( struct brw_context *brw )
{
   /* FINISHME: According to section 3.6.1 "STATE_BASE_ADDRESS" of
    * vol1a of the G45 PRM, MI_FLUSH with the ISC invalidate should be
    * programmed prior to STATE_BASE_ADDRESS.
    *
    * However, given that the instruction SBA (general state base
    * address) on this chipset is always set to 0 across X and GL,
    * maybe this isn't required for us in particular.
    */

   if (brw->gen >= 6) {
      if (brw->gen == 6)
	 intel_emit_post_sync_nonzero_flush(brw);

       BEGIN_BATCH(10);
       OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (10 - 2));
       /* General state base address: stateless DP read/write requests */
       OUT_BATCH(1);
       /* Surface state base address:
	* BINDING_TABLE_STATE
	* SURFACE_STATE
	*/
       OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0, 1);
        /* Dynamic state base address:
	 * SAMPLER_STATE
	 * SAMPLER_BORDER_COLOR_STATE
	 * CLIP, SF, WM/CC viewport state
	 * COLOR_CALC_STATE
	 * DEPTH_STENCIL_STATE
	 * BLEND_STATE
	 * Push constants (when INSTPM: CONSTANT_BUFFER Address Offset
	 * Disable is clear, which we rely on)
	 */
       OUT_RELOC(brw->batch.bo, (I915_GEM_DOMAIN_RENDER |
				   I915_GEM_DOMAIN_INSTRUCTION), 0, 1);

       OUT_BATCH(1); /* Indirect object base address: MEDIA_OBJECT data */
       OUT_RELOC(brw->cache.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
		 1); /* Instruction base address: shader kernels (incl. SIP) */

       OUT_BATCH(1); /* General state upper bound */
       /* Dynamic state upper bound.  Although the documentation says that
	* programming it to zero will cause it to be ignored, that is a lie.
	* If this isn't programmed to a real bound, the sampler border color
	* pointer is rejected, causing border color to mysteriously fail.
	*/
       OUT_BATCH(0xfffff001);
       OUT_BATCH(1); /* Indirect object upper bound */
       OUT_BATCH(1); /* Instruction access upper bound */
       ADVANCE_BATCH();
   } else if (brw->gen == 5) {
       BEGIN_BATCH(8);
       OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (8 - 2));
       OUT_BATCH(1); /* General state base address */
       OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0,
		 1); /* Surface state base address */
       OUT_BATCH(1); /* Indirect object base address */
       OUT_RELOC(brw->cache.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
		 1); /* Instruction base address */
       OUT_BATCH(0xfffff001); /* General state upper bound */
       OUT_BATCH(1); /* Indirect object upper bound */
       OUT_BATCH(1); /* Instruction access upper bound */
       ADVANCE_BATCH();
   } else {
       BEGIN_BATCH(6);
       OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (6 - 2));
       OUT_BATCH(1); /* General state base address */
       OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0,
		 1); /* Surface state base address */
       OUT_BATCH(1); /* Indirect object base address */
       OUT_BATCH(1); /* General state upper bound */
       OUT_BATCH(1); /* Indirect object upper bound */
       ADVANCE_BATCH();
   }

   /* According to section 3.6.1 of VOL1 of the 965 PRM,
    * STATE_BASE_ADDRESS updates require a reissue of:
    *
    * 3DSTATE_PIPELINE_POINTERS
    * 3DSTATE_BINDING_TABLE_POINTERS
    * MEDIA_STATE_POINTERS
    *
    * and this continues through Ironlake.  The Sandy Bridge PRM, vol
    * 1 part 1 says that the folowing packets must be reissued:
    *
    * 3DSTATE_CC_POINTERS
    * 3DSTATE_BINDING_TABLE_POINTERS
    * 3DSTATE_SAMPLER_STATE_POINTERS
    * 3DSTATE_VIEWPORT_STATE_POINTERS
    * MEDIA_STATE_POINTERS
    *
    * Those are always reissued following SBA updates anyway (new
    * batch time), except in the case of the program cache BO
    * changing.  Having a separate state flag makes the sequence more
    * obvious.
    */

   brw->state.dirty.brw |= BRW_NEW_STATE_BASE_ADDRESS;
}
示例#27
0
static void
upload_wm_state(struct brw_context *brw)
{
    struct intel_context *intel = &brw->intel;
    struct gl_context *ctx = &intel->ctx;
    const struct brw_fragment_program *fp =
        brw_fragment_program_const(brw->fragment_program);
    bool writes_depth = false;
    uint32_t dw1, dw2;

    /* _NEW_BUFFERS */
    bool multisampled_fbo = ctx->DrawBuffer->Visual.samples > 1;

    dw1 = dw2 = 0;
    dw1 |= GEN7_WM_STATISTICS_ENABLE;
    dw1 |= GEN7_WM_LINE_AA_WIDTH_1_0;
    dw1 |= GEN7_WM_LINE_END_CAP_AA_WIDTH_0_5;

    /* _NEW_LINE */
    if (ctx->Line.StippleFlag)
        dw1 |= GEN7_WM_LINE_STIPPLE_ENABLE;

    /* _NEW_POLYGON */
    if (ctx->Polygon.StippleFlag)
        dw1 |= GEN7_WM_POLYGON_STIPPLE_ENABLE;

    /* BRW_NEW_FRAGMENT_PROGRAM */
    if (fp->program.Base.InputsRead & VARYING_BIT_POS)
        dw1 |= GEN7_WM_USES_SOURCE_DEPTH | GEN7_WM_USES_SOURCE_W;
    if (fp->program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
        writes_depth = true;
        dw1 |= GEN7_WM_PSCDEPTH_ON;
    }
    /* CACHE_NEW_WM_PROG */
    dw1 |= brw->wm.prog_data->barycentric_interp_modes <<
           GEN7_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT;

    /* _NEW_COLOR, _NEW_MULTISAMPLE */
    if (fp->program.UsesKill || ctx->Color.AlphaEnabled ||
            ctx->Multisample.SampleAlphaToCoverage)
        dw1 |= GEN7_WM_KILL_ENABLE;

    /* _NEW_BUFFERS */
    if (brw_color_buffer_write_enabled(brw) || writes_depth ||
            dw1 & GEN7_WM_KILL_ENABLE) {
        dw1 |= GEN7_WM_DISPATCH_ENABLE;
    }
    if (multisampled_fbo) {
        /* _NEW_MULTISAMPLE */
        if (ctx->Multisample.Enabled)
            dw1 |= GEN7_WM_MSRAST_ON_PATTERN;
        else
            dw1 |= GEN7_WM_MSRAST_OFF_PIXEL;
        dw2 |= GEN7_WM_MSDISPMODE_PERPIXEL;
    } else {
        dw1 |= GEN7_WM_MSRAST_OFF_PIXEL;
        dw2 |= GEN7_WM_MSDISPMODE_PERSAMPLE;
    }

    BEGIN_BATCH(3);
    OUT_BATCH(_3DSTATE_WM << 16 | (3 - 2));
    OUT_BATCH(dw1);
    OUT_BATCH(dw2);
    ADVANCE_BATCH();
}
void
brw_emit_depth_stencil_hiz(struct brw_context *brw,
                           struct intel_mipmap_tree *depth_mt,
                           uint32_t depth_offset, uint32_t depthbuffer_format,
                           uint32_t depth_surface_type,
                           struct intel_mipmap_tree *stencil_mt,
                           bool hiz, bool separate_stencil,
                           uint32_t width, uint32_t height,
                           uint32_t tile_x, uint32_t tile_y)
{
   /* Enable the hiz bit if we're doing separate stencil, because it and the
    * separate stencil bit must have the same value. From Section 2.11.5.6.1.1
    * 3DSTATE_DEPTH_BUFFER, Bit 1.21 "Separate Stencil Enable":
    *     [DevIL]: If this field is enabled, Hierarchical Depth Buffer
    *     Enable must also be enabled.
    *
    *     [DevGT]: This field must be set to the same value (enabled or
    *     disabled) as Hierarchical Depth Buffer Enable
    */
   bool enable_hiz_ss = hiz || separate_stencil;


   /* 3DSTATE_DEPTH_BUFFER, 3DSTATE_STENCIL_BUFFER are both
    * non-pipelined state that will need the PIPE_CONTROL workaround.
    */
   if (brw->gen == 6) {
      intel_emit_post_sync_nonzero_flush(brw);
      intel_emit_depth_stall_flushes(brw);
   }

   unsigned int len;
   if (brw->gen >= 6)
      len = 7;
   else if (brw->is_g4x || brw->gen == 5)
      len = 6;
   else
      len = 5;

   BEGIN_BATCH(len);
   OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (len - 2));
   OUT_BATCH((depth_mt ? depth_mt->region->pitch - 1 : 0) |
             (depthbuffer_format << 18) |
             ((enable_hiz_ss ? 1 : 0) << 21) | /* separate stencil enable */
             ((enable_hiz_ss ? 1 : 0) << 22) | /* hiz enable */
             (BRW_TILEWALK_YMAJOR << 26) |
             ((depth_mt ? depth_mt->region->tiling != I915_TILING_NONE : 1)
              << 27) |
             (depth_surface_type << 29));

   if (depth_mt) {
      OUT_RELOC(depth_mt->region->bo,
		I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
		depth_offset);
   } else {
      OUT_BATCH(0);
   }

   OUT_BATCH(((width + tile_x - 1) << 6) |
             ((height + tile_y - 1) << 19));
   OUT_BATCH(0);

   if (brw->is_g4x || brw->gen >= 5)
      OUT_BATCH(tile_x | (tile_y << 16));
   else
      assert(tile_x == 0 && tile_y == 0);

   if (brw->gen >= 6)
      OUT_BATCH(0);

   ADVANCE_BATCH();

   if (hiz || separate_stencil) {
      /*
       * In the 3DSTATE_DEPTH_BUFFER batch emitted above, the 'separate
       * stencil enable' and 'hiz enable' bits were set. Therefore we must
       * emit 3DSTATE_HIER_DEPTH_BUFFER and 3DSTATE_STENCIL_BUFFER. Even if
       * there is no stencil buffer, 3DSTATE_STENCIL_BUFFER must be emitted;
       * failure to do so causes hangs on gen5 and a stall on gen6.
       */

      /* Emit hiz buffer. */
      if (hiz) {
         struct intel_mipmap_tree *hiz_mt = depth_mt->hiz_mt;
	 BEGIN_BATCH(3);
	 OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2));
	 OUT_BATCH(hiz_mt->region->pitch - 1);
	 OUT_RELOC(hiz_mt->region->bo,
		   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
		   brw->depthstencil.hiz_offset);
	 ADVANCE_BATCH();
      } else {
	 BEGIN_BATCH(3);
	 OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2));
	 OUT_BATCH(0);
	 OUT_BATCH(0);
	 ADVANCE_BATCH();
      }

      /* Emit stencil buffer. */
      if (separate_stencil) {
	 struct intel_region *region = stencil_mt->region;

	 BEGIN_BATCH(3);
	 OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2));
         /* The stencil buffer has quirky pitch requirements.  From Vol 2a,
          * 11.5.6.2.1 3DSTATE_STENCIL_BUFFER, field "Surface Pitch":
          *    The pitch must be set to 2x the value computed based on width, as
          *    the stencil buffer is stored with two rows interleaved.
          */
	 OUT_BATCH(2 * region->pitch - 1);
	 OUT_RELOC(region->bo,
		   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
		   brw->depthstencil.stencil_offset);
	 ADVANCE_BATCH();
      } else {
	 BEGIN_BATCH(3);
	 OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2));
	 OUT_BATCH(0);
	 OUT_BATCH(0);
	 ADVANCE_BATCH();
      }
   }

   /*
    * On Gen >= 6, emit clear params for safety. If using hiz, then clear
    * params must be emitted.
    *
    * From Section 2.11.5.6.4.1 3DSTATE_CLEAR_PARAMS:
    *     3DSTATE_CLEAR_PARAMS packet must follow the DEPTH_BUFFER_STATE packet
    *     when HiZ is enabled and the DEPTH_BUFFER_STATE changes.
    */
   if (brw->gen >= 6 || hiz) {
      if (brw->gen == 6)
	 intel_emit_post_sync_nonzero_flush(brw);

      BEGIN_BATCH(2);
      OUT_BATCH(_3DSTATE_CLEAR_PARAMS << 16 |
		GEN5_DEPTH_CLEAR_VALID |
		(2 - 2));
      OUT_BATCH(depth_mt ? depth_mt->depth_clear_value : 0);
      ADVANCE_BATCH();
   }
}
示例#29
0
static void emit_depthbuffer(struct brw_context *brw)
{
   struct intel_context *intel = &brw->intel;
   struct gl_context *ctx = &intel->ctx;
   struct gl_framebuffer *fb = ctx->DrawBuffer;
   /* _NEW_BUFFERS */
   struct intel_renderbuffer *depth_irb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
   struct intel_renderbuffer *stencil_irb = intel_get_renderbuffer(fb, BUFFER_STENCIL);
   struct intel_mipmap_tree *depth_mt = brw->depthstencil.depth_mt;
   struct intel_mipmap_tree *stencil_mt = brw->depthstencil.stencil_mt;
   struct intel_mipmap_tree *hiz_mt = brw->depthstencil.hiz_mt;
   uint32_t tile_x = brw->depthstencil.tile_x;
   uint32_t tile_y = brw->depthstencil.tile_y;
   unsigned int len;
   bool separate_stencil = false;

   if (stencil_mt && stencil_mt->format == MESA_FORMAT_S8)
      separate_stencil = true;

   /* 3DSTATE_DEPTH_BUFFER, 3DSTATE_STENCIL_BUFFER are both
    * non-pipelined state that will need the PIPE_CONTROL workaround.
    */
   if (intel->gen == 6) {
      intel_emit_post_sync_nonzero_flush(intel);
      intel_emit_depth_stall_flushes(intel);
   }

   /* If there's a packed depth/stencil bound to stencil only, we need to
    * emit the packed depth/stencil buffer packet.
    */
   if (!depth_irb && stencil_irb && !separate_stencil) {
      depth_irb = stencil_irb;
      depth_mt = stencil_mt;
   }

   if (intel->gen >= 6)
      len = 7;
   else if (intel->is_g4x || intel->gen == 5)
      len = 6;
   else
      len = 5;

   if (!depth_irb && !separate_stencil) {
      BEGIN_BATCH(len);
      OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (len - 2));
      OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT << 18) |
		(BRW_SURFACE_NULL << 29));
      OUT_BATCH(0);
      OUT_BATCH(0);
      OUT_BATCH(0);

      if (intel->is_g4x || intel->gen >= 5)
         OUT_BATCH(0);

      if (intel->gen >= 6)
	 OUT_BATCH(0);

      ADVANCE_BATCH();

   } else if (!depth_irb && separate_stencil) {
      /*
       * There exists a separate stencil buffer but no depth buffer.
       *
       * The stencil buffer inherits most of its fields from
       * 3DSTATE_DEPTH_BUFFER: namely the tile walk, surface type, width, and
       * height.
       *
       * Enable the hiz bit because it and the separate stencil bit must have
       * the same value. From Section 2.11.5.6.1.1 3DSTATE_DEPTH_BUFFER, Bit
       * 1.21 "Separate Stencil Enable":
       *     [DevIL]: If this field is enabled, Hierarchical Depth Buffer
       *     Enable must also be enabled.
       *
       *     [DevGT]: This field must be set to the same value (enabled or
       *     disabled) as Hierarchical Depth Buffer Enable
       *
       * The tiled bit must be set. From the Sandybridge PRM, Volume 2, Part 1,
       * Section 7.5.5.1.1 3DSTATE_DEPTH_BUFFER, Bit 1.27 Tiled Surface:
       *     [DevGT+]: This field must be set to TRUE.
       */
      assert(intel->has_separate_stencil);

      BEGIN_BATCH(len);
      OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (len - 2));
      OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT << 18) |
	        (1 << 21) | /* separate stencil enable */
	        (1 << 22) | /* hiz enable */
	        (BRW_TILEWALK_YMAJOR << 26) |
	        (1 << 27) | /* tiled surface */
	        (BRW_SURFACE_2D << 29));
      OUT_BATCH(0);
      OUT_BATCH(((stencil_irb->Base.Base.Width + tile_x - 1) << 6) |
	         (stencil_irb->Base.Base.Height + tile_y - 1) << 19);
      OUT_BATCH(0);

      if (intel->is_g4x || intel->gen >= 5)
         OUT_BATCH(tile_x | (tile_y << 16));
      else
	 assert(tile_x == 0 && tile_y == 0);

      if (intel->gen >= 6)
	 OUT_BATCH(0);

      ADVANCE_BATCH();

   } else {
      struct intel_region *region = depth_mt->region;

      /* If using separate stencil, hiz must be enabled. */
      assert(!separate_stencil || hiz_mt);

      assert(intel->gen < 6 || region->tiling == I915_TILING_Y);
      assert(!hiz_mt || region->tiling == I915_TILING_Y);

      BEGIN_BATCH(len);
      OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (len - 2));
      OUT_BATCH((region->pitch - 1) |
		(brw_depthbuffer_format(brw) << 18) |
		((hiz_mt ? 1 : 0) << 21) | /* separate stencil enable */
		((hiz_mt ? 1 : 0) << 22) | /* hiz enable */
		(BRW_TILEWALK_YMAJOR << 26) |
		((region->tiling != I915_TILING_NONE) << 27) |
		(BRW_SURFACE_2D << 29));
      OUT_RELOC(region->bo,
		I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
		brw->depthstencil.depth_offset);
      OUT_BATCH((BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1) |
		(((depth_irb->Base.Base.Width + tile_x) - 1) << 6) |
		(((depth_irb->Base.Base.Height + tile_y) - 1) << 19));
      OUT_BATCH(0);

      if (intel->is_g4x || intel->gen >= 5)
         OUT_BATCH(tile_x | (tile_y << 16));
      else
	 assert(tile_x == 0 && tile_y == 0);

      if (intel->gen >= 6)
	 OUT_BATCH(0);

      ADVANCE_BATCH();
   }

   if (hiz_mt || separate_stencil) {
      /*
       * In the 3DSTATE_DEPTH_BUFFER batch emitted above, the 'separate
       * stencil enable' and 'hiz enable' bits were set. Therefore we must
       * emit 3DSTATE_HIER_DEPTH_BUFFER and 3DSTATE_STENCIL_BUFFER. Even if
       * there is no stencil buffer, 3DSTATE_STENCIL_BUFFER must be emitted;
       * failure to do so causes hangs on gen5 and a stall on gen6.
       */

      /* Emit hiz buffer. */
      if (hiz_mt) {
	 BEGIN_BATCH(3);
	 OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2));
	 OUT_BATCH(hiz_mt->region->pitch - 1);
	 OUT_RELOC(hiz_mt->region->bo,
		   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
		   brw->depthstencil.hiz_offset);
	 ADVANCE_BATCH();
      } else {
	 BEGIN_BATCH(3);
	 OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2));
	 OUT_BATCH(0);
	 OUT_BATCH(0);
	 ADVANCE_BATCH();
      }

      /* Emit stencil buffer. */
      if (separate_stencil) {
	 struct intel_region *region = stencil_mt->region;

	 BEGIN_BATCH(3);
	 OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2));
         /* The stencil buffer has quirky pitch requirements.  From Vol 2a,
          * 11.5.6.2.1 3DSTATE_STENCIL_BUFFER, field "Surface Pitch":
          *    The pitch must be set to 2x the value computed based on width, as
          *    the stencil buffer is stored with two rows interleaved.
          */
	 OUT_BATCH(2 * region->pitch - 1);
	 OUT_RELOC(region->bo,
		   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
		   brw->depthstencil.stencil_offset);
	 ADVANCE_BATCH();
      } else {
	 BEGIN_BATCH(3);
	 OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2));
	 OUT_BATCH(0);
	 OUT_BATCH(0);
	 ADVANCE_BATCH();
      }
   }

   /*
    * On Gen >= 6, emit clear params for safety. If using hiz, then clear
    * params must be emitted.
    *
    * From Section 2.11.5.6.4.1 3DSTATE_CLEAR_PARAMS:
    *     3DSTATE_CLEAR_PARAMS packet must follow the DEPTH_BUFFER_STATE packet
    *     when HiZ is enabled and the DEPTH_BUFFER_STATE changes.
    */
   if (intel->gen >= 6 || hiz_mt) {
      if (intel->gen == 6)
	 intel_emit_post_sync_nonzero_flush(intel);

      BEGIN_BATCH(2);
      OUT_BATCH(_3DSTATE_CLEAR_PARAMS << 16 |
		GEN5_DEPTH_CLEAR_VALID |
		(2 - 2));
      OUT_BATCH(depth_irb ? depth_irb->mt->depth_clear_value : 0);
      ADVANCE_BATCH();
   }
}
示例#30
0
static void
brw_emit_vertices(struct brw_context *brw)
{
   GLuint i;

   brw_prepare_vertices(brw);
   brw_prepare_shader_draw_parameters(brw);

   brw_emit_query_begin(brw);

   unsigned nr_elements = brw->vb.nr_enabled;
   if (brw->vs.prog_data->uses_vertexid || brw->vs.prog_data->uses_instanceid)
      ++nr_elements;

   /* If the VS doesn't read any inputs (calculating vertex position from
    * a state variable for some reason, for example), emit a single pad
    * VERTEX_ELEMENT struct and bail.
    *
    * The stale VB state stays in place, but they don't do anything unless
    * a VE loads from them.
    */
   if (nr_elements == 0) {
      BEGIN_BATCH(3);
      OUT_BATCH((_3DSTATE_VERTEX_ELEMENTS << 16) | 1);
      if (brw->gen >= 6) {
	 OUT_BATCH((0 << GEN6_VE0_INDEX_SHIFT) |
		   GEN6_VE0_VALID |
		   (BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) |
		   (0 << BRW_VE0_SRC_OFFSET_SHIFT));
      } else {
	 OUT_BATCH((0 << BRW_VE0_INDEX_SHIFT) |
		   BRW_VE0_VALID |
		   (BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) |
		   (0 << BRW_VE0_SRC_OFFSET_SHIFT));
      }
      OUT_BATCH((BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT) |
		(BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
		(BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
		(BRW_VE1_COMPONENT_STORE_1_FLT << BRW_VE1_COMPONENT_3_SHIFT));
      ADVANCE_BATCH();
      return;
   }

   /* Now emit VB and VEP state packets.
    */

   unsigned nr_buffers =
      brw->vb.nr_buffers + brw->vs.prog_data->uses_vertexid;

   if (nr_buffers) {
      if (brw->gen >= 6) {
	 assert(nr_buffers <= 33);
      } else {
	 assert(nr_buffers <= 17);
      }

      BEGIN_BATCH(1 + 4 * nr_buffers);
      OUT_BATCH((_3DSTATE_VERTEX_BUFFERS << 16) | (4 * nr_buffers - 1));
      for (i = 0; i < brw->vb.nr_buffers; i++) {
	 struct brw_vertex_buffer *buffer = &brw->vb.buffers[i];
         EMIT_VERTEX_BUFFER_STATE(brw, i, buffer->bo, buffer->bo->size - 1,
                                  buffer->offset, buffer->stride,
                                  buffer->step_rate);

      }

      if (brw->vs.prog_data->uses_vertexid) {
         EMIT_VERTEX_BUFFER_STATE(brw, brw->vb.nr_buffers,
                                  brw->draw.draw_params_bo,
                                  brw->draw.draw_params_bo->size - 1,
                                  brw->draw.draw_params_offset,
                                  0,  /* stride */
                                  0); /* step rate */
      }
      ADVANCE_BATCH();
   }

   /* The hardware allows one more VERTEX_ELEMENTS than VERTEX_BUFFERS, presumably
    * for VertexID/InstanceID.
    */
   if (brw->gen >= 6) {
      assert(nr_elements <= 34);
   } else {
      assert(nr_elements <= 18);
   }

   struct brw_vertex_element *gen6_edgeflag_input = NULL;

   BEGIN_BATCH(1 + nr_elements * 2);
   OUT_BATCH((_3DSTATE_VERTEX_ELEMENTS << 16) | (2 * nr_elements - 1));
   for (i = 0; i < brw->vb.nr_enabled; i++) {
      struct brw_vertex_element *input = brw->vb.enabled[i];
      uint32_t format = brw_get_vertex_surface_type(brw, input->glarray);
      uint32_t comp0 = BRW_VE1_COMPONENT_STORE_SRC;
      uint32_t comp1 = BRW_VE1_COMPONENT_STORE_SRC;
      uint32_t comp2 = BRW_VE1_COMPONENT_STORE_SRC;
      uint32_t comp3 = BRW_VE1_COMPONENT_STORE_SRC;

      if (input == &brw->vb.inputs[VERT_ATTRIB_EDGEFLAG]) {
         /* Gen6+ passes edgeflag as sideband along with the vertex, instead
          * of in the VUE.  We have to upload it sideband as the last vertex
          * element according to the B-Spec.
          */
         if (brw->gen >= 6) {
            gen6_edgeflag_input = input;
            continue;
         }
      }

      switch (input->glarray->Size) {
      case 0: comp0 = BRW_VE1_COMPONENT_STORE_0;
      case 1: comp1 = BRW_VE1_COMPONENT_STORE_0;
      case 2: comp2 = BRW_VE1_COMPONENT_STORE_0;
      case 3: comp3 = input->glarray->Integer ? BRW_VE1_COMPONENT_STORE_1_INT
                                              : BRW_VE1_COMPONENT_STORE_1_FLT;
	 break;
      }

      if (brw->gen >= 6) {
	 OUT_BATCH((input->buffer << GEN6_VE0_INDEX_SHIFT) |
		   GEN6_VE0_VALID |
		   (format << BRW_VE0_FORMAT_SHIFT) |
		   (input->offset << BRW_VE0_SRC_OFFSET_SHIFT));
      } else {
	 OUT_BATCH((input->buffer << BRW_VE0_INDEX_SHIFT) |
		   BRW_VE0_VALID |
		   (format << BRW_VE0_FORMAT_SHIFT) |
		   (input->offset << BRW_VE0_SRC_OFFSET_SHIFT));
      }

      if (brw->gen >= 5)
          OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) |
                    (comp1 << BRW_VE1_COMPONENT_1_SHIFT) |
                    (comp2 << BRW_VE1_COMPONENT_2_SHIFT) |
                    (comp3 << BRW_VE1_COMPONENT_3_SHIFT));
      else
          OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) |
                    (comp1 << BRW_VE1_COMPONENT_1_SHIFT) |
                    (comp2 << BRW_VE1_COMPONENT_2_SHIFT) |
                    (comp3 << BRW_VE1_COMPONENT_3_SHIFT) |
                    ((i * 4) << BRW_VE1_DST_OFFSET_SHIFT));
   }

   if (brw->gen >= 6 && gen6_edgeflag_input) {
      uint32_t format =
         brw_get_vertex_surface_type(brw, gen6_edgeflag_input->glarray);

      OUT_BATCH((gen6_edgeflag_input->buffer << GEN6_VE0_INDEX_SHIFT) |
                GEN6_VE0_VALID |
                GEN6_VE0_EDGE_FLAG_ENABLE |
                (format << BRW_VE0_FORMAT_SHIFT) |
                (gen6_edgeflag_input->offset << BRW_VE0_SRC_OFFSET_SHIFT));
      OUT_BATCH((BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) |
                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT));
   }

   if (brw->vs.prog_data->uses_vertexid || brw->vs.prog_data->uses_instanceid) {
      uint32_t dw0 = 0, dw1 = 0;
      uint32_t comp0 = BRW_VE1_COMPONENT_STORE_0;
      uint32_t comp1 = BRW_VE1_COMPONENT_STORE_0;
      uint32_t comp2 = BRW_VE1_COMPONENT_STORE_0;
      uint32_t comp3 = BRW_VE1_COMPONENT_STORE_0;

      if (brw->vs.prog_data->uses_vertexid) {
         comp0 = BRW_VE1_COMPONENT_STORE_SRC;
         comp2 = BRW_VE1_COMPONENT_STORE_VID;
      }

      if (brw->vs.prog_data->uses_instanceid) {
         comp3 = BRW_VE1_COMPONENT_STORE_IID;
      }

      dw1 = (comp0 << BRW_VE1_COMPONENT_0_SHIFT) |
            (comp1 << BRW_VE1_COMPONENT_1_SHIFT) |
            (comp2 << BRW_VE1_COMPONENT_2_SHIFT) |
            (comp3 << BRW_VE1_COMPONENT_3_SHIFT);

      if (brw->gen >= 6) {
         dw0 |= GEN6_VE0_VALID |
                brw->vb.nr_buffers << GEN6_VE0_INDEX_SHIFT |
                BRW_SURFACEFORMAT_R32_UINT << BRW_VE0_FORMAT_SHIFT;
      } else {
         dw0 |= BRW_VE0_VALID |
                brw->vb.nr_buffers << BRW_VE0_INDEX_SHIFT |
                BRW_SURFACEFORMAT_R32_UINT << BRW_VE0_FORMAT_SHIFT;
	 dw1 |= (i * 4) << BRW_VE1_DST_OFFSET_SHIFT;
      }

      /* Note that for gl_VertexID, gl_InstanceID, and gl_PrimitiveID values,
       * the format is ignored and the value is always int.
       */

      OUT_BATCH(dw0);
      OUT_BATCH(dw1);
   }

   ADVANCE_BATCH();
}