示例#1
0
/* 3DSTATE_GS
 *
 * Disable the geometry shader.
 */
void
gen6_blorp_emit_gs_disable(struct brw_context *brw,
                           const brw_blorp_params *params)
{
   /* Disable all the constant buffers. */
   BEGIN_BATCH(5);
   OUT_BATCH(_3DSTATE_CONSTANT_GS << 16 | (5 - 2));
   OUT_BATCH(0);
   OUT_BATCH(0);
   OUT_BATCH(0);
   OUT_BATCH(0);
   ADVANCE_BATCH();

   BEGIN_BATCH(7);
   OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2));
   OUT_BATCH(0);
   OUT_BATCH(0);
   OUT_BATCH(0);
   OUT_BATCH(0);
   OUT_BATCH(0);
   OUT_BATCH(0);
   ADVANCE_BATCH();
}
示例#2
0
static void
gen7_upload_hs_state(struct brw_context *brw)
{
   const struct brw_stage_state *stage_state = &brw->tcs.base;
   /* BRW_NEW_TESS_PROGRAMS */
   bool active = brw->tess_eval_program;
   /* BRW_NEW_TCS_PROG_DATA */
   const struct brw_vue_prog_data *prog_data = &brw->tcs.prog_data->base;

   if (active) {
      BEGIN_BATCH(7);
      OUT_BATCH(_3DSTATE_HS << 16 | (7 - 2));
      OUT_BATCH(SET_FIELD(DIV_ROUND_UP(stage_state->sampler_count, 4),
                          GEN7_HS_SAMPLER_COUNT) |
                SET_FIELD(prog_data->base.binding_table.size_bytes / 4,
                          GEN7_HS_BINDING_TABLE_ENTRY_COUNT) |
                (brw->max_hs_threads - 1));
      OUT_BATCH(GEN7_HS_ENABLE |
                GEN7_HS_STATISTICS_ENABLE |
                SET_FIELD(brw->tcs.prog_data->instances - 1,
                          GEN7_HS_INSTANCE_COUNT));
      OUT_BATCH(stage_state->prog_offset);
      if (prog_data->base.total_scratch) {
         OUT_RELOC(stage_state->scratch_bo,
                   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
                   ffs(prog_data->base.total_scratch) - 11);
      } else {
         OUT_BATCH(0);
      }
      OUT_BATCH(GEN7_HS_INCLUDE_VERTEX_HANDLES |
                SET_FIELD(prog_data->base.dispatch_grf_start_reg,
                          GEN7_HS_DISPATCH_START_GRF));
      /* Ignore URB semaphores */
      OUT_BATCH(0);
      ADVANCE_BATCH();
   } else {
      BEGIN_BATCH(7);
      OUT_BATCH(_3DSTATE_HS << 16 | (7 - 2));
      OUT_BATCH(0);
      OUT_BATCH(0);
      OUT_BATCH(0);
      OUT_BATCH(0);
      OUT_BATCH(0);
      OUT_BATCH(0);
      ADVANCE_BATCH();
   }
   brw->tcs.enabled = active;
}
示例#3
0
static void
upload_gs_state(struct brw_context *brw)
{
   const struct gen_device_info *devinfo = &brw->screen->devinfo;
   const struct brw_stage_state *stage_state = &brw->gs.base;
   const int max_threads_shift = brw->is_haswell ?
      HSW_GS_MAX_THREADS_SHIFT : GEN6_GS_MAX_THREADS_SHIFT;
   /* BRW_NEW_GEOMETRY_PROGRAM */
   bool active = brw->geometry_program;
   /* BRW_NEW_GS_PROG_DATA */
   const struct brw_stage_prog_data *prog_data = stage_state->prog_data;
   const struct brw_vue_prog_data *vue_prog_data =
      brw_vue_prog_data(stage_state->prog_data);
   const struct brw_gs_prog_data *gs_prog_data =
      brw_gs_prog_data(stage_state->prog_data);

   /**
    * From Graphics BSpec: 3D-Media-GPGPU Engine > 3D Pipeline Stages >
    * Geometry > Geometry Shader > State:
    *
    *     "Note: Because of corruption in IVB:GT2, software needs to flush the
    *     whole fixed function pipeline when the GS enable changes value in
    *     the 3DSTATE_GS."
    *
    * The hardware architects have clarified that in this context "flush the
    * whole fixed function pipeline" means to emit a PIPE_CONTROL with the "CS
    * Stall" bit set.
    */
   if (!brw->is_haswell && brw->gt == 2 && brw->gs.enabled != active)
      gen7_emit_cs_stall_flush(brw);

   if (active) {
      BEGIN_BATCH(7);
      OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2));
      OUT_BATCH(stage_state->prog_offset);
      OUT_BATCH(((ALIGN(stage_state->sampler_count, 4)/4) <<
                 GEN6_GS_SAMPLER_COUNT_SHIFT) |
                ((prog_data->binding_table.size_bytes / 4) <<
                 GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT));

      if (prog_data->total_scratch) {
         OUT_RELOC(stage_state->scratch_bo,
                   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
                   ffs(stage_state->per_thread_scratch) - 11);
      } else {
         OUT_BATCH(0);
      }

      uint32_t dw4 =
         ((gs_prog_data->output_vertex_size_hwords * 2 - 1) <<
          GEN7_GS_OUTPUT_VERTEX_SIZE_SHIFT) |
         (gs_prog_data->output_topology << GEN7_GS_OUTPUT_TOPOLOGY_SHIFT) |
         (vue_prog_data->urb_read_length <<
          GEN6_GS_URB_READ_LENGTH_SHIFT) |
         (0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT) |
         (prog_data->dispatch_grf_start_reg <<
          GEN6_GS_DISPATCH_START_GRF_SHIFT);

      /* Note: the meaning of the GEN7_GS_REORDER_TRAILING bit changes between
       * Ivy Bridge and Haswell.
       *
       * On Ivy Bridge, setting this bit causes the vertices of a triangle
       * strip to be delivered to the geometry shader in an order that does
       * not strictly follow the OpenGL spec, but preserves triangle
       * orientation.  For example, if the vertices are (1, 2, 3, 4, 5), then
       * the geometry shader sees triangles:
       *
       * (1, 2, 3), (2, 4, 3), (3, 4, 5)
       *
       * (Clearing the bit is even worse, because it fails to preserve
       * orientation).
       *
       * Triangle strips with adjacency always ordered in a way that preserves
       * triangle orientation but does not strictly follow the OpenGL spec,
       * regardless of the setting of this bit.
       *
       * On Haswell, both triangle strips and triangle strips with adjacency
       * are always ordered in a way that preserves triangle orientation.
       * Setting this bit causes the ordering to strictly follow the OpenGL
       * spec.
       *
       * So in either case we want to set the bit.  Unfortunately on Ivy
       * Bridge this will get the order close to correct but not perfect.
       */
      uint32_t dw5 =
         ((devinfo->max_gs_threads - 1) << max_threads_shift) |
         (gs_prog_data->control_data_header_size_hwords <<
          GEN7_GS_CONTROL_DATA_HEADER_SIZE_SHIFT) |
         ((gs_prog_data->invocations - 1) <<
          GEN7_GS_INSTANCE_CONTROL_SHIFT) |
         SET_FIELD(vue_prog_data->dispatch_mode, GEN7_GS_DISPATCH_MODE) |
         GEN6_GS_STATISTICS_ENABLE |
         (gs_prog_data->include_primitive_id ?
          GEN7_GS_INCLUDE_PRIMITIVE_ID : 0) |
         GEN7_GS_REORDER_TRAILING |
         GEN7_GS_ENABLE;
      uint32_t dw6 = 0;

      if (brw->is_haswell) {
         dw6 |= gs_prog_data->control_data_format <<
            HSW_GS_CONTROL_DATA_FORMAT_SHIFT;
      } else {
         dw5 |= gs_prog_data->control_data_format <<
            IVB_GS_CONTROL_DATA_FORMAT_SHIFT;
      }

      OUT_BATCH(dw4);
      OUT_BATCH(dw5);
      OUT_BATCH(dw6);
      ADVANCE_BATCH();
   } else {
      BEGIN_BATCH(7);
      OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2));
      OUT_BATCH(0); /* prog_bo */
      OUT_BATCH((0 << GEN6_GS_SAMPLER_COUNT_SHIFT) |
                (0 << GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
      OUT_BATCH(0); /* scratch space base offset */
      OUT_BATCH((1 << GEN6_GS_DISPATCH_START_GRF_SHIFT) |
                (0 << GEN6_GS_URB_READ_LENGTH_SHIFT) |
                GEN7_GS_INCLUDE_VERTEX_HANDLES |
                (0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT));
      OUT_BATCH((0 << GEN6_GS_MAX_THREADS_SHIFT) |
                GEN6_GS_STATISTICS_ENABLE);
      OUT_BATCH(0);
      ADVANCE_BATCH();
   }
   brw->gs.enabled = active;
}
示例#4
0
void
i915_composite(PixmapPtr dest, int srcX, int srcY, int maskX, int maskY,
	       int dstX, int dstY, int w, int h)
{
	ScrnInfoPtr scrn = xf86Screens[dest->drawable.pScreen->myNum];
	intel_screen_private *intel = intel_get_screen_private(scrn);

	/* 28 + 16 + 10 + 20 + 32 + 16 */
	intel_batch_start_atomic(scrn, 150);

	if (intel->needs_render_state_emit)
		i915_emit_composite_setup(scrn);

	if (intel->needs_render_vertex_emit ||
	    intel_vertex_space(intel) < 3*4*intel->floats_per_vertex) {
		i915_vertex_flush(intel);

		if (intel_vertex_space(intel) < 256) {
			intel_next_vertex(intel);

			OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
				  I1_LOAD_S(0) | I1_LOAD_S(1) | 1);
			OUT_RELOC(intel->vertex_bo, I915_GEM_DOMAIN_VERTEX, 0, 0);
			OUT_BATCH((intel->floats_per_vertex << S1_VERTEX_WIDTH_SHIFT) |
				  (intel->floats_per_vertex << S1_VERTEX_PITCH_SHIFT));
			intel->vertex_index = 0;
		} else if (intel->floats_per_vertex != intel->last_floats_per_vertex){
			OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
				  I1_LOAD_S(1) | 0);
			OUT_BATCH((intel->floats_per_vertex << S1_VERTEX_WIDTH_SHIFT) |
				  (intel->floats_per_vertex << S1_VERTEX_PITCH_SHIFT));

			intel->vertex_index =
				(intel->vertex_used + intel->floats_per_vertex - 1) /  intel->floats_per_vertex;
			intel->vertex_used = intel->vertex_index * intel->floats_per_vertex;
		}

		intel->last_floats_per_vertex = intel->floats_per_vertex;
		intel->needs_render_vertex_emit = FALSE;
	}

	if (intel->prim_offset == 0) {
		if (intel->needs_render_ca_pass) {
			OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(6) | 0);
			OUT_BATCH(i915_get_blend_cntl(PictOpOutReverse,
						      intel->render_mask_picture,
						      intel->render_dest_picture->format));
			i915_composite_emit_shader(intel, PictOpOutReverse);
		}

		intel->prim_offset = intel->batch_used;
		OUT_BATCH(PRIM3D_RECTLIST | PRIM3D_INDIRECT_SEQUENTIAL);
		OUT_BATCH(intel->vertex_index);
	}
	intel->vertex_count += 3;

	intel->prim_emit(intel,
			 srcX, srcY,
			 maskX, maskY,
			 dstX, dstY,
			 w, h);

	intel_batch_end_atomic(scrn);
}
/**
 * Define the base addresses which some state is referenced from.
 *
 * This allows us to avoid having to emit relocations for the objects,
 * and is actually required for binding table pointers on gen6.
 *
 * Surface state base address covers binding table pointers and
 * surface state objects, but not the surfaces that the surface state
 * objects point to.
 */
static void upload_state_base_address( struct brw_context *brw )
{
   struct intel_context *intel = &brw->intel;

   /* FINISHME: According to section 3.6.1 "STATE_BASE_ADDRESS" of
    * vol1a of the G45 PRM, MI_FLUSH with the ISC invalidate should be
    * programmed prior to STATE_BASE_ADDRESS.
    *
    * However, given that the instruction SBA (general state base
    * address) on this chipset is always set to 0 across X and GL,
    * maybe this isn't required for us in particular.
    */

   if (intel->gen >= 6) {
      if (intel->gen == 6)
	 intel_emit_post_sync_nonzero_flush(intel);

       BEGIN_BATCH(10);
       OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (10 - 2));
       /* General state base address: stateless DP read/write requests */
       OUT_BATCH(1);
       /* Surface state base address:
	* BINDING_TABLE_STATE
	* SURFACE_STATE
	*/
       OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0, 1);
        /* Dynamic state base address:
	 * SAMPLER_STATE
	 * SAMPLER_BORDER_COLOR_STATE
	 * CLIP, SF, WM/CC viewport state
	 * COLOR_CALC_STATE
	 * DEPTH_STENCIL_STATE
	 * BLEND_STATE
	 * Push constants (when INSTPM: CONSTANT_BUFFER Address Offset
	 * Disable is clear, which we rely on)
	 */
       OUT_RELOC(intel->batch.bo, (I915_GEM_DOMAIN_RENDER |
				   I915_GEM_DOMAIN_INSTRUCTION), 0, 1);

       OUT_BATCH(1); /* Indirect object base address: MEDIA_OBJECT data */
       OUT_RELOC(brw->cache.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
		 1); /* Instruction base address: shader kernels (incl. SIP) */

       OUT_BATCH(1); /* General state upper bound */
       /* Dynamic state upper bound.  Although the documentation says that
	* programming it to zero will cause it to be ignored, that is a lie.
	* If this isn't programmed to a real bound, the sampler border color
	* pointer is rejected, causing border color to mysteriously fail.
	*/
       OUT_BATCH(0xfffff001);
       OUT_BATCH(1); /* Indirect object upper bound */
       OUT_BATCH(1); /* Instruction access upper bound */
       ADVANCE_BATCH();
   } else if (intel->gen == 5) {
       BEGIN_BATCH(8);
       OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (8 - 2));
       OUT_BATCH(1); /* General state base address */
       OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0,
		 1); /* Surface state base address */
       OUT_BATCH(1); /* Indirect object base address */
       OUT_RELOC(brw->cache.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
		 1); /* Instruction base address */
       OUT_BATCH(0xfffff001); /* General state upper bound */
       OUT_BATCH(1); /* Indirect object upper bound */
       OUT_BATCH(1); /* Instruction access upper bound */
       ADVANCE_BATCH();
   } else {
       BEGIN_BATCH(6);
       OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (6 - 2));
       OUT_BATCH(1); /* General state base address */
       OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0,
		 1); /* Surface state base address */
       OUT_BATCH(1); /* Indirect object base address */
       OUT_BATCH(1); /* General state upper bound */
       OUT_BATCH(1); /* Indirect object upper bound */
       ADVANCE_BATCH();
   }

   /* According to section 3.6.1 of VOL1 of the 965 PRM,
    * STATE_BASE_ADDRESS updates require a reissue of:
    *
    * 3DSTATE_PIPELINE_POINTERS
    * 3DSTATE_BINDING_TABLE_POINTERS
    * MEDIA_STATE_POINTERS
    *
    * and this continues through Ironlake.  The Sandy Bridge PRM, vol
    * 1 part 1 says that the folowing packets must be reissued:
    *
    * 3DSTATE_CC_POINTERS
    * 3DSTATE_BINDING_TABLE_POINTERS
    * 3DSTATE_SAMPLER_STATE_POINTERS
    * 3DSTATE_VIEWPORT_STATE_POINTERS
    * MEDIA_STATE_POINTERS
    *
    * Those are always reissued following SBA updates anyway (new
    * batch time), except in the case of the program cache BO
    * changing.  Having a separate state flag makes the sequence more
    * obvious.
    */

   brw->state.dirty.brw |= BRW_NEW_STATE_BASE_ADDRESS;
}
示例#6
0
/**
 * Extract the needed fields from vertex_header and emit i915 dwords.
 * Recall that the vertices are constructed by the 'draw' module and
 * have a couple of slots at the beginning (1-dword header, 4-dword
 * clip pos) that we ignore here.
 */
static INLINE void
emit_hw_vertex( struct i915_context *i915,
                const struct vertex_header *vertex)
{
   const struct vertex_info *vinfo = &i915->current.vertex_info;
   uint i;
   uint count = 0;  /* for debug/sanity */

   assert(!i915->dirty);

   for (i = 0; i < vinfo->num_attribs; i++) {
      const uint j = vinfo->attrib[i].src_index;
      const float *attrib = vertex->data[j];
      switch (vinfo->attrib[i].emit) {
      case EMIT_1F:
         OUT_BATCH( fui(attrib[0]) );
         count++;
         break;
      case EMIT_2F:
         OUT_BATCH( fui(attrib[0]) );
         OUT_BATCH( fui(attrib[1]) );
         count += 2;
         break;
      case EMIT_3F:
         OUT_BATCH( fui(attrib[0]) );
         OUT_BATCH( fui(attrib[1]) );
         OUT_BATCH( fui(attrib[2]) );
         count += 3;
         break;
      case EMIT_4F:
         OUT_BATCH( fui(attrib[0]) );
         OUT_BATCH( fui(attrib[1]) );
         OUT_BATCH( fui(attrib[2]) );
         OUT_BATCH( fui(attrib[3]) );
         count += 4;
         break;
      case EMIT_4UB:
         OUT_BATCH( pack_ub4(float_to_ubyte( attrib[0] ),
                             float_to_ubyte( attrib[1] ),
                             float_to_ubyte( attrib[2] ),
                             float_to_ubyte( attrib[3] )) );
         count += 1;
         break;
      case EMIT_4UB_BGRA:
         OUT_BATCH( pack_ub4(float_to_ubyte( attrib[2] ),
                             float_to_ubyte( attrib[1] ),
                             float_to_ubyte( attrib[0] ),
                             float_to_ubyte( attrib[3] )) );
         count += 1;
         break;
      default:
         assert(0);
      }
   }
   assert(count == vinfo->size);
}
示例#7
0
void
I915DisplayVideoTextured(ScrnInfoPtr scrn,
			 intel_adaptor_private *adaptor_priv, int id,
			 RegionPtr dstRegion,
			 short width, short height, int video_pitch,
			 int video_pitch2,
			 short src_w, short src_h, short drw_w, short drw_h,
			 PixmapPtr pixmap)
{
	intel_screen_private *intel = intel_get_screen_private(scrn);
	uint32_t format, ms3, s5, tiling;
	BoxPtr pbox = REGION_RECTS(dstRegion);
	int nbox_total = REGION_NUM_RECTS(dstRegion);
	int nbox_this_time;
	int dxo, dyo, pix_xoff, pix_yoff;
	PixmapPtr target;

#if 0
	ErrorF("I915DisplayVideo: %dx%d (pitch %d)\n", width, height,
	       video_pitch);
#endif

	dxo = dstRegion->extents.x1;
	dyo = dstRegion->extents.y1;

	if (pixmap->drawable.width > 2048 || pixmap->drawable.height > 2048 ||
	    !intel_uxa_check_pitch_3d(pixmap)) {
		ScreenPtr screen = pixmap->drawable.pScreen;

		target = screen->CreatePixmap(screen,
					      dstRegion->extents.x2 - dxo,
					      dstRegion->extents.y2 - dyo,
					      pixmap->drawable.depth,
					      CREATE_PIXMAP_USAGE_SCRATCH);
		if (target == NULL)
			return;

		if (intel_uxa_get_pixmap_bo(target) == NULL) {
			screen->DestroyPixmap(target);
			return;
		}

		pix_xoff = -dxo;
		pix_yoff = -dyo;
	} else {
		target = pixmap;

		/* Set up the offset for translating from the given region
		 * (in screen coordinates) to the backing pixmap.
		 */
#ifdef COMPOSITE
		pix_xoff = -target->screen_x + target->drawable.x;
		pix_yoff = -target->screen_y + target->drawable.y;
#else
		pix_xoff = 0;
		pix_yoff = 0;
#endif
	}

#define BYTES_FOR_BOXES(n)	((200 + (n) * 20) * 4)
#define BOXES_IN_BYTES(s)	((((s)/4) - 200) / 20)
#define BATCH_BYTES(p)		((p)->batch_bo->size - 16)

	while (nbox_total) {
		nbox_this_time = nbox_total;
		if (BYTES_FOR_BOXES(nbox_this_time) > BATCH_BYTES(intel))
			nbox_this_time = BOXES_IN_BYTES(BATCH_BYTES(intel));
		nbox_total -= nbox_this_time;

		intel_batch_start_atomic(scrn, 200 + 20 * nbox_this_time);

		IntelEmitInvarientState(scrn);
		intel->last_3d = LAST_3D_VIDEO;

		/* draw rect -- just clipping */
		OUT_BATCH(_3DSTATE_DRAW_RECT_CMD);
		OUT_BATCH(DRAW_DITHER_OFS_X(pixmap->drawable.x & 3) |
			  DRAW_DITHER_OFS_Y(pixmap->drawable.y & 3));
		OUT_BATCH(0x00000000);	/* ymin, xmin */
		/* ymax, xmax */
		OUT_BATCH((target->drawable.width - 1) |
			  (target->drawable.height - 1) << 16);
		OUT_BATCH(0x00000000);	/* yorigin, xorigin */

		OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(2) |
			  I1_LOAD_S(5) | I1_LOAD_S(6) | 2);
		OUT_BATCH(S2_TEXCOORD_FMT(0, TEXCOORDFMT_2D) |
			  S2_TEXCOORD_FMT(1, TEXCOORDFMT_NOT_PRESENT) |
			  S2_TEXCOORD_FMT(2, TEXCOORDFMT_NOT_PRESENT) |
			  S2_TEXCOORD_FMT(3, TEXCOORDFMT_NOT_PRESENT) |
			  S2_TEXCOORD_FMT(4, TEXCOORDFMT_NOT_PRESENT) |
			  S2_TEXCOORD_FMT(5, TEXCOORDFMT_NOT_PRESENT) |
			  S2_TEXCOORD_FMT(6, TEXCOORDFMT_NOT_PRESENT) |
			  S2_TEXCOORD_FMT(7, TEXCOORDFMT_NOT_PRESENT));
		s5 = 0x0;
		if (intel->cpp == 2)
			s5 |= S5_COLOR_DITHER_ENABLE;
		OUT_BATCH(s5);	/* S5 - enable bits */
		OUT_BATCH((2 << S6_DEPTH_TEST_FUNC_SHIFT) |
			  (2 << S6_CBUF_SRC_BLEND_FACT_SHIFT) |
			  (1 << S6_CBUF_DST_BLEND_FACT_SHIFT) |
			  S6_COLOR_WRITE_ENABLE | (2 << S6_TRISTRIP_PV_SHIFT));

		OUT_BATCH(_3DSTATE_CONST_BLEND_COLOR_CMD);
		OUT_BATCH(0x00000000);

		OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD);
		if (intel->cpp == 2)
			format = COLR_BUF_RGB565;
		else
			format =
			    COLR_BUF_ARGB8888 | DEPTH_FRMT_24_FIXED_8_OTHER;

		OUT_BATCH(LOD_PRECLAMP_OGL |
			  DSTORG_HORT_BIAS(0x8) |
			  DSTORG_VERT_BIAS(0x8) | format);

		/* front buffer, pitch, offset */
		if (intel_uxa_pixmap_tiled(target)) {
			tiling = BUF_3D_TILED_SURFACE;
			if (intel_uxa_get_pixmap_private(target)->tiling == I915_TILING_Y)
				tiling |= BUF_3D_TILE_WALK_Y;
		} else
			tiling = 0;
		OUT_BATCH(_3DSTATE_BUF_INFO_CMD);
		OUT_BATCH(BUF_3D_ID_COLOR_BACK | tiling |
			  BUF_3D_PITCH(intel_pixmap_pitch(target)));
		OUT_RELOC_PIXMAP(target, I915_GEM_DOMAIN_RENDER,
				 I915_GEM_DOMAIN_RENDER, 0);

		if (!is_planar_fourcc(id)) {
			FS_LOCALS();

			OUT_BATCH(_3DSTATE_PIXEL_SHADER_CONSTANTS | 4);
			OUT_BATCH(0x0000001);	/* constant 0 */
			/* constant 0: brightness/contrast */
			OUT_BATCH_F(adaptor_priv->brightness / 128.0);
			OUT_BATCH_F(adaptor_priv->contrast / 255.0);
			OUT_BATCH_F(0.0);
			OUT_BATCH_F(0.0);

			OUT_BATCH(_3DSTATE_SAMPLER_STATE | 3);
			OUT_BATCH(0x00000001);
			OUT_BATCH(SS2_COLORSPACE_CONVERSION |
				  (FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) |
				  (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT));
			OUT_BATCH((TEXCOORDMODE_CLAMP_EDGE <<
				   SS3_TCX_ADDR_MODE_SHIFT) |
				  (TEXCOORDMODE_CLAMP_EDGE <<
				   SS3_TCY_ADDR_MODE_SHIFT) |
				  (0 << SS3_TEXTUREMAP_INDEX_SHIFT) |
				  SS3_NORMALIZED_COORDS);
			OUT_BATCH(0x00000000);

			OUT_BATCH(_3DSTATE_MAP_STATE | 3);
			OUT_BATCH(0x00000001);	/* texture map #1 */
			if (adaptor_priv->buf)
				OUT_RELOC(adaptor_priv->buf,
					  I915_GEM_DOMAIN_SAMPLER, 0,
					  adaptor_priv->YBufOffset);
			else
				OUT_BATCH(adaptor_priv->YBufOffset);

			ms3 = MAPSURF_422;
			switch (id) {
			case FOURCC_YUY2:
				ms3 |= MT_422_YCRCB_NORMAL;
				break;
			case FOURCC_UYVY:
				ms3 |= MT_422_YCRCB_SWAPY;
				break;
			}
			ms3 |= (height - 1) << MS3_HEIGHT_SHIFT;
			ms3 |= (width - 1) << MS3_WIDTH_SHIFT;
			OUT_BATCH(ms3);
			OUT_BATCH(((video_pitch / 4) - 1) << MS4_PITCH_SHIFT);

			FS_BEGIN();
			i915_fs_dcl(FS_S0);
			i915_fs_dcl(FS_T0);
			i915_fs_texld(FS_OC, FS_S0, FS_T0);
			if (adaptor_priv->brightness != 0) {
				i915_fs_add(FS_OC,
					    i915_fs_operand_reg(FS_OC),
					    i915_fs_operand(FS_C0, X, X, X,
							    ZERO));
			}
			FS_END();
		} else {
			FS_LOCALS();

			/* For the planar formats, we set up three samplers --
			 * one for each plane, in a Y8 format.  Because I
			 * couldn't get the special PLANAR_TO_PACKED
			 * shader setup to work, I did the manual pixel shader:
			 *
			 * y' = y - .0625
			 * u' = u - .5
			 * v' = v - .5;
			 *
			 * r = 1.1643 * y' + 0.0     * u' + 1.5958  * v'
			 * g = 1.1643 * y' - 0.39173 * u' - 0.81290 * v'
			 * b = 1.1643 * y' + 2.017   * u' + 0.0     * v'
			 *
			 * register assignment:
			 * r0 = (y',u',v',0)
			 * r1 = (y,y,y,y)
			 * r2 = (u,u,u,u)
			 * r3 = (v,v,v,v)
			 * OC = (r,g,b,1)
			 */
			OUT_BATCH(_3DSTATE_PIXEL_SHADER_CONSTANTS | (22 - 2));
			OUT_BATCH(0x000001f);	/* constants 0-4 */
			/* constant 0: normalization offsets */
			OUT_BATCH_F(-0.0625);
			OUT_BATCH_F(-0.5);
			OUT_BATCH_F(-0.5);
			OUT_BATCH_F(0.0);
			/* constant 1: r coefficients */
			OUT_BATCH_F(1.1643);
			OUT_BATCH_F(0.0);
			OUT_BATCH_F(1.5958);
			OUT_BATCH_F(0.0);
			/* constant 2: g coefficients */
			OUT_BATCH_F(1.1643);
			OUT_BATCH_F(-0.39173);
			OUT_BATCH_F(-0.81290);
			OUT_BATCH_F(0.0);
			/* constant 3: b coefficients */
			OUT_BATCH_F(1.1643);
			OUT_BATCH_F(2.017);
			OUT_BATCH_F(0.0);
			OUT_BATCH_F(0.0);
			/* constant 4: brightness/contrast */
			OUT_BATCH_F(adaptor_priv->brightness / 128.0);
			OUT_BATCH_F(adaptor_priv->contrast / 255.0);
			OUT_BATCH_F(0.0);
			OUT_BATCH_F(0.0);

			OUT_BATCH(_3DSTATE_SAMPLER_STATE | 9);
			OUT_BATCH(0x00000007);
			/* sampler 0 */
			OUT_BATCH((FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) |
				  (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT));
			OUT_BATCH((TEXCOORDMODE_CLAMP_EDGE <<
				   SS3_TCX_ADDR_MODE_SHIFT) |
				  (TEXCOORDMODE_CLAMP_EDGE <<
				   SS3_TCY_ADDR_MODE_SHIFT) |
				  (0 << SS3_TEXTUREMAP_INDEX_SHIFT) |
				  SS3_NORMALIZED_COORDS);
			OUT_BATCH(0x00000000);
			/* sampler 1 */
			OUT_BATCH((FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) |
				  (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT));
			OUT_BATCH((TEXCOORDMODE_CLAMP_EDGE <<
				   SS3_TCX_ADDR_MODE_SHIFT) |
				  (TEXCOORDMODE_CLAMP_EDGE <<
				   SS3_TCY_ADDR_MODE_SHIFT) |
				  (1 << SS3_TEXTUREMAP_INDEX_SHIFT) |
				  SS3_NORMALIZED_COORDS);
			OUT_BATCH(0x00000000);
			/* sampler 2 */
			OUT_BATCH((FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) |
				  (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT));
			OUT_BATCH((TEXCOORDMODE_CLAMP_EDGE <<
				   SS3_TCX_ADDR_MODE_SHIFT) |
				  (TEXCOORDMODE_CLAMP_EDGE <<
				   SS3_TCY_ADDR_MODE_SHIFT) |
				  (2 << SS3_TEXTUREMAP_INDEX_SHIFT) |
				  SS3_NORMALIZED_COORDS);
			OUT_BATCH(0x00000000);

			OUT_BATCH(_3DSTATE_MAP_STATE | 9);
			OUT_BATCH(0x00000007);

			if (adaptor_priv->buf)
				OUT_RELOC(adaptor_priv->buf,
					  I915_GEM_DOMAIN_SAMPLER, 0,
					  adaptor_priv->YBufOffset);
			else
				OUT_BATCH(adaptor_priv->YBufOffset);

			ms3 = MAPSURF_8BIT | MT_8BIT_I8;
			ms3 |= (height - 1) << MS3_HEIGHT_SHIFT;
			ms3 |= (width - 1) << MS3_WIDTH_SHIFT;
			OUT_BATCH(ms3);
			/* check to see if Y has special pitch than normal
			 * double u/v pitch, e.g i915 XvMC hw requires at
			 * least 1K alignment, so Y pitch might
			 * be same as U/V's.*/
			if (video_pitch2)
				OUT_BATCH(((video_pitch2 / 4) -
					   1) << MS4_PITCH_SHIFT);
			else
				OUT_BATCH(((video_pitch * 2 / 4) -
					   1) << MS4_PITCH_SHIFT);

			if (adaptor_priv->buf)
				OUT_RELOC(adaptor_priv->buf,
					  I915_GEM_DOMAIN_SAMPLER, 0,
					  adaptor_priv->UBufOffset);
			else
				OUT_BATCH(adaptor_priv->UBufOffset);

			ms3 = MAPSURF_8BIT | MT_8BIT_I8;
			ms3 |= (height / 2 - 1) << MS3_HEIGHT_SHIFT;
			ms3 |= (width / 2 - 1) << MS3_WIDTH_SHIFT;
			OUT_BATCH(ms3);
			OUT_BATCH(((video_pitch / 4) - 1) << MS4_PITCH_SHIFT);

			if (adaptor_priv->buf)
				OUT_RELOC(adaptor_priv->buf,
					  I915_GEM_DOMAIN_SAMPLER, 0,
					  adaptor_priv->VBufOffset);
			else
				OUT_BATCH(adaptor_priv->VBufOffset);

			ms3 = MAPSURF_8BIT | MT_8BIT_I8;
			ms3 |= (height / 2 - 1) << MS3_HEIGHT_SHIFT;
			ms3 |= (width / 2 - 1) << MS3_WIDTH_SHIFT;
			OUT_BATCH(ms3);
			OUT_BATCH(((video_pitch / 4) - 1) << MS4_PITCH_SHIFT);

			FS_BEGIN();
			/* Declare samplers */
			i915_fs_dcl(FS_S0);	/* Y */
			i915_fs_dcl(FS_S1);	/* U */
			i915_fs_dcl(FS_S2);	/* V */
			i915_fs_dcl(FS_T0);	/* normalized coords */

			/* Load samplers to temporaries. */
			i915_fs_texld(FS_R1, FS_S0, FS_T0);
			i915_fs_texld(FS_R2, FS_S1, FS_T0);
			i915_fs_texld(FS_R3, FS_S2, FS_T0);

			/* Move the sampled YUV data in R[123] to the first
			 * 3 channels of R0.
			 */
			i915_fs_mov_masked(FS_R0, MASK_X,
					   i915_fs_operand_reg(FS_R1));
			i915_fs_mov_masked(FS_R0, MASK_Y,
					   i915_fs_operand_reg(FS_R2));
			i915_fs_mov_masked(FS_R0, MASK_Z,
					   i915_fs_operand_reg(FS_R3));

			/* Normalize the YUV data */
			i915_fs_add(FS_R0, i915_fs_operand_reg(FS_R0),
				    i915_fs_operand_reg(FS_C0));
			/* dot-product the YUV data in R0 by the vectors of
			 * coefficients for calculating R, G, and B, storing
			 * the results in the R, G, or B channels of the output
			 * color.  The OC results are implicitly clamped
			 * at the end of the program.
			 */
			i915_fs_dp3(FS_OC, MASK_X,
				    i915_fs_operand_reg(FS_R0),
				    i915_fs_operand_reg(FS_C1));
			i915_fs_dp3(FS_OC, MASK_Y,
				    i915_fs_operand_reg(FS_R0),
				    i915_fs_operand_reg(FS_C2));
			i915_fs_dp3(FS_OC, MASK_Z,
				    i915_fs_operand_reg(FS_R0),
				    i915_fs_operand_reg(FS_C3));
			/* Set alpha of the output to 1.0, by wiring W to 1
			 * and not actually using the source.
			 */
			i915_fs_mov_masked(FS_OC, MASK_W,
					   i915_fs_operand_one());

			if (adaptor_priv->brightness != 0) {
				i915_fs_add(FS_OC,
					    i915_fs_operand_reg(FS_OC),
					    i915_fs_operand(FS_C4, X, X, X,
							    ZERO));
			}
			FS_END();
		}

		OUT_BATCH(PRIM3D_RECTLIST | (12 * nbox_this_time - 1));
		while (nbox_this_time--) {
			int box_x1 = pbox->x1;
			int box_y1 = pbox->y1;
			int box_x2 = pbox->x2;
			int box_y2 = pbox->y2;
			float src_scale_x, src_scale_y;

			pbox++;

			src_scale_x = ((float)src_w / width) / drw_w;
			src_scale_y = ((float)src_h / height) / drw_h;

			/* vertex data - rect list consists of bottom right,
			 * bottom left, and top left vertices.
			 */

			/* bottom right */
			OUT_BATCH_F(box_x2 + pix_xoff);
			OUT_BATCH_F(box_y2 + pix_yoff);
			OUT_BATCH_F((box_x2 - dxo) * src_scale_x);
			OUT_BATCH_F((box_y2 - dyo) * src_scale_y);

			/* bottom left */
			OUT_BATCH_F(box_x1 + pix_xoff);
			OUT_BATCH_F(box_y2 + pix_yoff);
			OUT_BATCH_F((box_x1 - dxo) * src_scale_x);
			OUT_BATCH_F((box_y2 - dyo) * src_scale_y);

			/* top left */
			OUT_BATCH_F(box_x1 + pix_xoff);
			OUT_BATCH_F(box_y1 + pix_yoff);
			OUT_BATCH_F((box_x1 - dxo) * src_scale_x);
			OUT_BATCH_F((box_y1 - dyo) * src_scale_y);
		}

		intel_batch_end_atomic(scrn);
	}

	if (target != pixmap) {
		GCPtr gc;

		gc = GetScratchGC(pixmap->drawable.depth,
				  pixmap->drawable.pScreen);
		if (gc) {
			gc->subWindowMode = ClipByChildren;

			if (REGION_NUM_RECTS(dstRegion) > 1) {
				RegionPtr tmp;

				tmp = REGION_CREATE(pixmap->drawable.pScreen, NULL, 0);
				if (tmp) {
					REGION_COPY(pixmap->drawable.pScreen, tmp, dstRegion);
					gc->funcs->ChangeClip(gc, CT_REGION, tmp, 0);
				}
			}

			ValidateGC(&pixmap->drawable, gc);
			gc->ops->CopyArea(&target->drawable, &pixmap->drawable, gc,
					  0, 0,
					  target->drawable.width,
					  target->drawable.height,
					  -pix_xoff, -pix_yoff);
			FreeScratchGC(gc);
		}

		target->drawable.pScreen->DestroyPixmap(target);
	}

	intel_uxa_debug_flush(scrn);
}
示例#8
0
static void
upload_sbe_state(struct brw_context *brw)
{
   struct intel_context *intel = &brw->intel;
   struct gl_context *ctx = &intel->ctx;
   /* BRW_NEW_FRAGMENT_PROGRAM */
   uint32_t num_outputs = _mesa_bitcount_64(brw->fragment_program->Base.InputsRead);
   /* _NEW_LIGHT */
   bool shade_model_flat = ctx->Light.ShadeModel == GL_FLAT;
   uint32_t dw1, dw10, dw11;
   int i;
   int attr = 0, input_index = 0;
   int urb_entry_read_offset = 1;
   uint16_t attr_overrides[FRAG_ATTRIB_MAX];
   /* _NEW_BUFFERS */
   bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
   uint32_t point_sprite_origin;

   /* FINISHME: Attribute Swizzle Control Mode? */
   dw1 = GEN7_SBE_SWIZZLE_ENABLE | num_outputs << GEN7_SBE_NUM_OUTPUTS_SHIFT;

   /* _NEW_POINT
    *
    * Window coordinates in an FBO are inverted, which means point
    * sprite origin must be inverted.
    */
   if ((ctx->Point.SpriteOrigin == GL_LOWER_LEFT) != render_to_fbo) {
      point_sprite_origin = GEN6_SF_POINT_SPRITE_LOWERLEFT;
   } else {
      point_sprite_origin = GEN6_SF_POINT_SPRITE_UPPERLEFT;
   }
   dw1 |= point_sprite_origin;


   dw10 = 0;
   dw11 = 0;

   /* Create the mapping from the FS inputs we produce to the VS outputs
    * they source from.
    */
   uint32_t max_source_attr = 0;
   for (; attr < FRAG_ATTRIB_MAX; attr++) {
      enum glsl_interp_qualifier interp_qualifier =
         brw->fragment_program->InterpQualifier[attr];
      bool is_gl_Color = attr == FRAG_ATTRIB_COL0 || attr == FRAG_ATTRIB_COL1;

      if (!(brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(attr)))
	 continue;

      if (ctx->Point.PointSprite &&
	  attr >= FRAG_ATTRIB_TEX0 && attr <= FRAG_ATTRIB_TEX7 &&
	  ctx->Point.CoordReplace[attr - FRAG_ATTRIB_TEX0]) {
	 dw10 |= (1 << input_index);
      }

      if (attr == FRAG_ATTRIB_PNTC)
	 dw10 |= (1 << input_index);

      /* flat shading */
      if (interp_qualifier == INTERP_QUALIFIER_FLAT ||
          (shade_model_flat && is_gl_Color &&
           interp_qualifier == INTERP_QUALIFIER_NONE))
         dw11 |= (1 << input_index);

      /* The hardware can only do the overrides on 16 overrides at a
       * time, and the other up to 16 have to be lined up so that the
       * input index = the output index.  We'll need to do some
       * tweaking to make sure that's the case.
       */
      assert(input_index < 16 || attr == input_index);

      /* CACHE_NEW_VS_PROG | _NEW_LIGHT | _NEW_PROGRAM */
      attr_overrides[input_index++] =
         get_attr_override(&brw->vs.prog_data->vue_map,
			   urb_entry_read_offset, attr,
                           ctx->VertexProgram._TwoSideEnabled,
                           &max_source_attr);
   }

   /* From the Ivy Bridge PRM, Volume 2, Part 1, documentation for
    * 3DSTATE_SBE DWord 1 bits 15:11, "Vertex URB Entry Read Length":
    *
    * "This field should be set to the minimum length required to read the
    *  maximum source attribute.  The maximum source attribute is indicated
    *  by the maximum value of the enabled Attribute # Source Attribute if
    *  Attribute Swizzle Enable is set, Number of Output Attributes-1 if
    *  enable is not set.
    *
    *  read_length = ceiling((max_source_attr + 1) / 2)"
    */
   uint32_t urb_entry_read_length = ALIGN(max_source_attr + 1, 2) / 2;
   dw1 |= urb_entry_read_length << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT |
          urb_entry_read_offset << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT;

   for (; input_index < FRAG_ATTRIB_MAX; input_index++)
      attr_overrides[input_index] = 0;

   BEGIN_BATCH(14);
   OUT_BATCH(_3DSTATE_SBE << 16 | (14 - 2));
   OUT_BATCH(dw1);

   /* Output dwords 2 through 9 */
   for (i = 0; i < 8; i++) {
      OUT_BATCH(attr_overrides[i * 2] | attr_overrides[i * 2 + 1] << 16);
   }

   OUT_BATCH(dw10); /* point sprite texcoord bitmask */
   OUT_BATCH(dw11); /* constant interp bitmask */
   OUT_BATCH(0); /* wrapshortest enables 0-7 */
   OUT_BATCH(0); /* wrapshortest enables 8-15 */
   ADVANCE_BATCH();
}
示例#9
0
static void
gen7_blorp_emit_depth_stencil_config(struct brw_context *brw,
                                     const brw_blorp_params *params)
{
   struct intel_context *intel = &brw->intel;
   uint32_t draw_x = params->depth.x_offset;
   uint32_t draw_y = params->depth.y_offset;
   uint32_t tile_mask_x, tile_mask_y;

   gen6_blorp_compute_tile_masks(params, &tile_mask_x, &tile_mask_y);

   /* 3DSTATE_DEPTH_BUFFER */
   {
      uint32_t tile_x = draw_x & tile_mask_x;
      uint32_t tile_y = draw_y & tile_mask_y;
      uint32_t offset =
         intel_region_get_aligned_offset(params->depth.mt->region,
                                         draw_x & ~tile_mask_x,
                                         draw_y & ~tile_mask_y, false);

      /* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327
       * (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth
       * Coordinate Offset X/Y":
       *
       *   "The 3 LSBs of both offsets must be zero to ensure correct
       *   alignment"
       *
       * We have no guarantee that tile_x and tile_y are correctly aligned,
       * since they are determined by the mipmap layout, which is only aligned
       * to multiples of 4.
       *
       * So, to avoid hanging the GPU, just smash the low order 3 bits of
       * tile_x and tile_y to 0.  This is a temporary workaround until we come
       * up with a better solution.
       */
      tile_x &= ~7;
      tile_y &= ~7;

      intel_emit_depth_stall_flushes(intel);

      BEGIN_BATCH(7);
      OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2));
      uint32_t pitch_bytes =
         params->depth.mt->region->pitch * params->depth.mt->region->cpp;
      OUT_BATCH((pitch_bytes - 1) |
                params->depth_format << 18 |
                1 << 22 | /* hiz enable */
                1 << 28 | /* depth write */
                BRW_SURFACE_2D << 29);
      OUT_RELOC(params->depth.mt->region->bo,
                I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
                offset);
      OUT_BATCH((params->depth.width + tile_x - 1) << 4 |
                (params->depth.height + tile_y - 1) << 18);
      OUT_BATCH(0);
      OUT_BATCH(tile_x |
                tile_y << 16);
      OUT_BATCH(0);
      ADVANCE_BATCH();
   }

   /* 3DSTATE_HIER_DEPTH_BUFFER */
   {
      struct intel_region *hiz_region = params->depth.mt->hiz_mt->region;
      uint32_t hiz_offset =
         intel_region_get_aligned_offset(hiz_region,
                                         draw_x & ~tile_mask_x,
                                         (draw_y & ~tile_mask_y) / 2, false);

      BEGIN_BATCH(3);
      OUT_BATCH((GEN7_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2));
      OUT_BATCH(hiz_region->pitch * hiz_region->cpp - 1);
      OUT_RELOC(hiz_region->bo,
                I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
                hiz_offset);
      ADVANCE_BATCH();
   }

   /* 3DSTATE_STENCIL_BUFFER */
   {
      BEGIN_BATCH(3);
      OUT_BATCH((GEN7_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2));
      OUT_BATCH(0);
      OUT_BATCH(0);
      ADVANCE_BATCH();
   }
}
示例#10
0
文件: brw_curbe.c 项目: vsyrjala/mesa
/**
 * Gathers together all the uniform values into a block of memory to be
 * uploaded into the CURBE, then emits the state packet telling the hardware
 * the new location.
 */
static void
brw_upload_constant_buffer(struct brw_context *brw)
{
   struct gl_context *ctx = &brw->ctx;
   /* BRW_NEW_CURBE_OFFSETS */
   const GLuint sz = brw->curbe.total_size;
   const GLuint bufsz = sz * 16 * sizeof(GLfloat);
   gl_constant_value *buf;
   GLuint i;
   gl_clip_plane *clip_planes;

   if (sz == 0) {
      goto emit;
   }

   buf = intel_upload_space(brw, bufsz, 64,
                            &brw->curbe.curbe_bo, &brw->curbe.curbe_offset);

   STATIC_ASSERT(sizeof(gl_constant_value) == sizeof(float));

   /* fragment shader constants */
   if (brw->curbe.wm_size) {
      _mesa_load_state_parameters(ctx, brw->fragment_program->Base.Parameters);

      /* BRW_NEW_CURBE_OFFSETS */
      GLuint offset = brw->curbe.wm_start * 16;

      /* BRW_NEW_FS_PROG_DATA | _NEW_PROGRAM_CONSTANTS: copy uniform values */
      for (i = 0; i < brw->wm.prog_data->base.nr_params; i++) {
	 buf[offset + i] = *brw->wm.prog_data->base.param[i];
      }
   }

   /* clipper constants */
   if (brw->curbe.clip_size) {
      GLuint offset = brw->curbe.clip_start * 16;
      GLuint j;

      /* If any planes are going this way, send them all this way:
       */
      for (i = 0; i < 6; i++) {
	 buf[offset + i * 4 + 0].f = fixed_plane[i][0];
	 buf[offset + i * 4 + 1].f = fixed_plane[i][1];
	 buf[offset + i * 4 + 2].f = fixed_plane[i][2];
	 buf[offset + i * 4 + 3].f = fixed_plane[i][3];
      }

      /* Clip planes: _NEW_TRANSFORM plus _NEW_PROJECTION to get to
       * clip-space:
       */
      clip_planes = brw_select_clip_planes(ctx);
      for (j = 0; j < MAX_CLIP_PLANES; j++) {
	 if (ctx->Transform.ClipPlanesEnabled & (1<<j)) {
	    buf[offset + i * 4 + 0].f = clip_planes[j][0];
	    buf[offset + i * 4 + 1].f = clip_planes[j][1];
	    buf[offset + i * 4 + 2].f = clip_planes[j][2];
	    buf[offset + i * 4 + 3].f = clip_planes[j][3];
	    i++;
	 }
      }
   }

   /* vertex shader constants */
   if (brw->curbe.vs_size) {
      _mesa_load_state_parameters(ctx, brw->vertex_program->Base.Parameters);

      GLuint offset = brw->curbe.vs_start * 16;

      /* BRW_NEW_VS_PROG_DATA | _NEW_PROGRAM_CONSTANTS: copy uniform values */
      for (i = 0; i < brw->vs.prog_data->base.base.nr_params; i++) {
         buf[offset + i] = *brw->vs.prog_data->base.base.param[i];
      }
   }

   if (0) {
      for (i = 0; i < sz*16; i+=4)
	 fprintf(stderr, "curbe %d.%d: %f %f %f %f\n", i/8, i&4,
                 buf[i+0].f, buf[i+1].f, buf[i+2].f, buf[i+3].f);
   }

   /* Because this provokes an action (ie copy the constants into the
    * URB), it shouldn't be shortcircuited if identical to the
    * previous time - because eg. the urb destination may have
    * changed, or the urb contents different to last time.
    *
    * Note that the data referred to is actually copied internally,
    * not just used in place according to passed pointer.
    *
    * It appears that the CS unit takes care of using each available
    * URB entry (Const URB Entry == CURBE) in turn, and issuing
    * flushes as necessary when doublebuffering of CURBEs isn't
    * possible.
    */

emit:
   /* Work around mysterious 965 hangs that appear to happen if you do
    * two 3DPRIMITIVEs with only a CONSTANT_BUFFER inbetween.  If we
    * haven't already flushed for some other reason, explicitly do so.
    *
    * We've found no documented reason why this should be necessary.
    */
   if (brw->gen == 4 && !brw->is_g4x &&
       (brw->ctx.NewDriverState & (BRW_NEW_BATCH | BRW_NEW_PSP)) == 0) {
      BEGIN_BATCH(1);
      OUT_BATCH(MI_FLUSH);
      ADVANCE_BATCH();
   }

   /* BRW_NEW_URB_FENCE: From the gen4 PRM, volume 1, section 3.9.8
    * (CONSTANT_BUFFER (CURBE Load)):
    *
    *     "Modifying the CS URB allocation via URB_FENCE invalidates any
    *      previous CURBE entries. Therefore software must subsequently
    *      [re]issue a CONSTANT_BUFFER command before CURBE data can be used
    *      in the pipeline."
    */
   BEGIN_BATCH(2);
   if (brw->curbe.total_size == 0) {
      OUT_BATCH((CMD_CONST_BUFFER << 16) | (2 - 2));
      OUT_BATCH(0);
   } else {
      OUT_BATCH((CMD_CONST_BUFFER << 16) | (1 << 8) | (2 - 2));
      OUT_RELOC(brw->curbe.curbe_bo,
		I915_GEM_DOMAIN_INSTRUCTION, 0,
		(brw->curbe.total_size - 1) + brw->curbe.curbe_offset);
   }
   ADVANCE_BATCH();
}
示例#11
0
static void
upload_sf_state(struct brw_context *brw)
{
   struct intel_context *intel = &brw->intel;
   struct gl_context *ctx = &intel->ctx;
   uint32_t dw1, dw2, dw3;
   float point_size;
   /* _NEW_BUFFERS */
   bool render_to_fbo = _mesa_is_user_fbo(brw->intel.ctx.DrawBuffer);
   bool multisampled_fbo = ctx->DrawBuffer->Visual.samples > 1;

   dw1 = GEN6_SF_STATISTICS_ENABLE |
         GEN6_SF_VIEWPORT_TRANSFORM_ENABLE;

   /* _NEW_BUFFERS */
   dw1 |= (brw_depthbuffer_format(brw) << GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT);

   /* _NEW_POLYGON */
   if ((ctx->Polygon.FrontFace == GL_CCW) ^ render_to_fbo)
      dw1 |= GEN6_SF_WINDING_CCW;

   if (ctx->Polygon.OffsetFill)
       dw1 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID;

   if (ctx->Polygon.OffsetLine)
       dw1 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME;

   if (ctx->Polygon.OffsetPoint)
       dw1 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT;

   switch (ctx->Polygon.FrontMode) {
   case GL_FILL:
       dw1 |= GEN6_SF_FRONT_SOLID;
       break;

   case GL_LINE:
       dw1 |= GEN6_SF_FRONT_WIREFRAME;
       break;

   case GL_POINT:
       dw1 |= GEN6_SF_FRONT_POINT;
       break;

   default:
       assert(0);
       break;
   }

   switch (ctx->Polygon.BackMode) {
   case GL_FILL:
       dw1 |= GEN6_SF_BACK_SOLID;
       break;

   case GL_LINE:
       dw1 |= GEN6_SF_BACK_WIREFRAME;
       break;

   case GL_POINT:
       dw1 |= GEN6_SF_BACK_POINT;
       break;

   default:
       assert(0);
       break;
   }

   dw2 = 0;

   if (ctx->Polygon.CullFlag) {
      switch (ctx->Polygon.CullFaceMode) {
      case GL_FRONT:
	 dw2 |= GEN6_SF_CULL_FRONT;
	 break;
      case GL_BACK:
	 dw2 |= GEN6_SF_CULL_BACK;
	 break;
      case GL_FRONT_AND_BACK:
	 dw2 |= GEN6_SF_CULL_BOTH;
	 break;
      default:
	 assert(0);
	 break;
      }
   } else {
      dw2 |= GEN6_SF_CULL_NONE;
   }

   /* _NEW_SCISSOR */
   if (ctx->Scissor.Enabled)
      dw2 |= GEN6_SF_SCISSOR_ENABLE;

   /* _NEW_LINE */
   {
      uint32_t line_width_u3_7 = U_FIXED(CLAMP(ctx->Line.Width, 0.0, 7.99), 7);
      /* TODO: line width of 0 is not allowed when MSAA enabled */
      if (line_width_u3_7 == 0)
         line_width_u3_7 = 1;
      dw2 |= line_width_u3_7 << GEN6_SF_LINE_WIDTH_SHIFT;
   }
   if (ctx->Line.SmoothFlag) {
      dw2 |= GEN6_SF_LINE_AA_ENABLE;
      dw2 |= GEN6_SF_LINE_END_CAP_WIDTH_1_0;
   }
   if (ctx->Line.StippleFlag && intel->is_haswell) {
      dw2 |= HSW_SF_LINE_STIPPLE_ENABLE;
   }
   /* _NEW_MULTISAMPLE */
   if (multisampled_fbo && ctx->Multisample.Enabled)
      dw2 |= GEN6_SF_MSRAST_ON_PATTERN;

   /* FINISHME: Last Pixel Enable?  Vertex Sub Pixel Precision Select?
    */

   dw3 = GEN6_SF_LINE_AA_MODE_TRUE;

   /* _NEW_PROGRAM | _NEW_POINT */
   if (!(ctx->VertexProgram.PointSizeEnabled || ctx->Point._Attenuated))
      dw3 |= GEN6_SF_USE_STATE_POINT_WIDTH;

   /* Clamp to ARB_point_parameters user limits */
   point_size = CLAMP(ctx->Point.Size, ctx->Point.MinSize, ctx->Point.MaxSize);

   /* Clamp to the hardware limits and convert to fixed point */
   dw3 |= U_FIXED(CLAMP(point_size, 0.125, 255.875), 3);

   /* _NEW_LIGHT */
   if (ctx->Light.ProvokingVertex != GL_FIRST_VERTEX_CONVENTION) {
      dw3 |=
	 (2 << GEN6_SF_TRI_PROVOKE_SHIFT) |
	 (2 << GEN6_SF_TRIFAN_PROVOKE_SHIFT) |
	 (1 << GEN6_SF_LINE_PROVOKE_SHIFT);
   } else {
      dw3 |= (1 << GEN6_SF_TRIFAN_PROVOKE_SHIFT);
   }

   BEGIN_BATCH(7);
   OUT_BATCH(_3DSTATE_SF << 16 | (7 - 2));
   OUT_BATCH(dw1);
   OUT_BATCH(dw2);
   OUT_BATCH(dw3);
   OUT_BATCH_F(ctx->Polygon.OffsetUnits * 2); /* constant.  copied from gen4 */
   OUT_BATCH_F(ctx->Polygon.OffsetFactor); /* scale */
   OUT_BATCH_F(0.0); /* XXX: global depth offset clamp */
   ADVANCE_BATCH();
}
示例#12
0
static void brw_emit_vertices(struct brw_context *brw)
{
   struct gl_context *ctx = &brw->intel.ctx;
   struct intel_context *intel = intel_context(ctx);
   GLuint i, nr_elements;

   brw_prepare_vertices(brw);

   brw_emit_query_begin(brw);

   /* If the VS doesn't read any inputs (calculating vertex position from
    * a state variable for some reason, for example), emit a single pad
    * VERTEX_ELEMENT struct and bail.
    *
    * The stale VB state stays in place, but they don't do anything unless
    * a VE loads from them.
    */
   if (brw->vb.nr_enabled == 0) {
      BEGIN_BATCH(3);
      OUT_BATCH((_3DSTATE_VERTEX_ELEMENTS << 16) | 1);
      if (intel->gen >= 6) {
	 OUT_BATCH((0 << GEN6_VE0_INDEX_SHIFT) |
		   GEN6_VE0_VALID |
		   (BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) |
		   (0 << BRW_VE0_SRC_OFFSET_SHIFT));
      } else {
	 OUT_BATCH((0 << BRW_VE0_INDEX_SHIFT) |
		   BRW_VE0_VALID |
		   (BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) |
		   (0 << BRW_VE0_SRC_OFFSET_SHIFT));
      }
      OUT_BATCH((BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT) |
		(BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
		(BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
		(BRW_VE1_COMPONENT_STORE_1_FLT << BRW_VE1_COMPONENT_3_SHIFT));
      CACHED_BATCH();
      return;
   }

   /* Now emit VB and VEP state packets.
    */

   if (brw->vb.nr_buffers) {
      if (intel->gen >= 6) {
	 assert(brw->vb.nr_buffers <= 33);
      } else {
	 assert(brw->vb.nr_buffers <= 17);
      }

      BEGIN_BATCH(1 + 4*brw->vb.nr_buffers);
      OUT_BATCH((_3DSTATE_VERTEX_BUFFERS << 16) | (4*brw->vb.nr_buffers - 1));
      for (i = 0; i < brw->vb.nr_buffers; i++) {
	 struct brw_vertex_buffer *buffer = &brw->vb.buffers[i];
	 uint32_t dw0;

	 if (intel->gen >= 6) {
	    dw0 = buffer->step_rate
	             ? GEN6_VB0_ACCESS_INSTANCEDATA
	             : GEN6_VB0_ACCESS_VERTEXDATA;
	    dw0 |= i << GEN6_VB0_INDEX_SHIFT;
	 } else {
	    dw0 = buffer->step_rate
	             ? BRW_VB0_ACCESS_INSTANCEDATA
	             : BRW_VB0_ACCESS_VERTEXDATA;
	    dw0 |= i << BRW_VB0_INDEX_SHIFT;
	 }

	 if (intel->gen >= 7)
	    dw0 |= GEN7_VB0_ADDRESS_MODIFYENABLE;

	 OUT_BATCH(dw0 | (buffer->stride << BRW_VB0_PITCH_SHIFT));
	 OUT_RELOC(buffer->bo, I915_GEM_DOMAIN_VERTEX, 0, buffer->offset);
	 if (intel->gen >= 5) {
	    OUT_RELOC(buffer->bo, I915_GEM_DOMAIN_VERTEX, 0, buffer->bo->size - 1);
	 } else
	    OUT_BATCH(0);
	 OUT_BATCH(buffer->step_rate);

	 brw->vb.current_buffers[i].handle = buffer->bo->handle;
	 brw->vb.current_buffers[i].offset = buffer->offset;
	 brw->vb.current_buffers[i].stride = buffer->stride;
	 brw->vb.current_buffers[i].step_rate = buffer->step_rate;
      }
      brw->vb.nr_current_buffers = i;
      ADVANCE_BATCH();
   }

   nr_elements = brw->vb.nr_enabled + brw->vs.prog_data->uses_vertexid;

   /* The hardware allows one more VERTEX_ELEMENTS than VERTEX_BUFFERS, presumably
    * for VertexID/InstanceID.
    */
   if (intel->gen >= 6) {
      assert(nr_elements <= 34);
   } else {
      assert(nr_elements <= 18);
   }

   BEGIN_BATCH(1 + nr_elements * 2);
   OUT_BATCH((_3DSTATE_VERTEX_ELEMENTS << 16) | (2 * nr_elements - 1));
   for (i = 0; i < brw->vb.nr_enabled; i++) {
      struct brw_vertex_element *input = brw->vb.enabled[i];
      uint32_t format = get_surface_type(input->glarray->Type,
					 input->glarray->Size,
					 input->glarray->Format,
					 input->glarray->Normalized,
                                         input->glarray->Integer);
      uint32_t comp0 = BRW_VE1_COMPONENT_STORE_SRC;
      uint32_t comp1 = BRW_VE1_COMPONENT_STORE_SRC;
      uint32_t comp2 = BRW_VE1_COMPONENT_STORE_SRC;
      uint32_t comp3 = BRW_VE1_COMPONENT_STORE_SRC;

      switch (input->glarray->Size) {
      case 0: comp0 = BRW_VE1_COMPONENT_STORE_0;
      case 1: comp1 = BRW_VE1_COMPONENT_STORE_0;
      case 2: comp2 = BRW_VE1_COMPONENT_STORE_0;
      case 3: comp3 = input->glarray->Integer ? BRW_VE1_COMPONENT_STORE_1_INT
                                              : BRW_VE1_COMPONENT_STORE_1_FLT;
	 break;
      }

      if (intel->gen >= 6) {
	 OUT_BATCH((input->buffer << GEN6_VE0_INDEX_SHIFT) |
		   GEN6_VE0_VALID |
		   (format << BRW_VE0_FORMAT_SHIFT) |
		   (input->offset << BRW_VE0_SRC_OFFSET_SHIFT));
      } else {
	 OUT_BATCH((input->buffer << BRW_VE0_INDEX_SHIFT) |
		   BRW_VE0_VALID |
		   (format << BRW_VE0_FORMAT_SHIFT) |
		   (input->offset << BRW_VE0_SRC_OFFSET_SHIFT));
      }

      if (intel->gen >= 5)
          OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) |
                    (comp1 << BRW_VE1_COMPONENT_1_SHIFT) |
                    (comp2 << BRW_VE1_COMPONENT_2_SHIFT) |
                    (comp3 << BRW_VE1_COMPONENT_3_SHIFT));
      else
          OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) |
                    (comp1 << BRW_VE1_COMPONENT_1_SHIFT) |
                    (comp2 << BRW_VE1_COMPONENT_2_SHIFT) |
                    (comp3 << BRW_VE1_COMPONENT_3_SHIFT) |
                    ((i * 4) << BRW_VE1_DST_OFFSET_SHIFT));
   }

   if (brw->vs.prog_data->uses_vertexid) {
      uint32_t dw0 = 0, dw1 = 0;

      dw1 = ((BRW_VE1_COMPONENT_STORE_VID << BRW_VE1_COMPONENT_0_SHIFT) |
	     (BRW_VE1_COMPONENT_STORE_IID << BRW_VE1_COMPONENT_1_SHIFT) |
	     (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
	     (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT));

      if (intel->gen >= 6) {
	 dw0 |= GEN6_VE0_VALID;
      } else {
	 dw0 |= BRW_VE0_VALID;
	 dw1 |= (i * 4) << BRW_VE1_DST_OFFSET_SHIFT;
      }

      /* Note that for gl_VertexID, gl_InstanceID, and gl_PrimitiveID values,
       * the format is ignored and the value is always int.
       */

      OUT_BATCH(dw0);
      OUT_BATCH(dw1);
   }

   CACHED_BATCH();
}
示例#13
0
static void
gen6_blorp_emit_depth_stencil_config(struct brw_context *brw,
                                     const brw_blorp_params *params)
{
   struct gl_context *ctx = &brw->ctx;
   uint32_t draw_x = params->depth.x_offset;
   uint32_t draw_y = params->depth.y_offset;
   uint32_t tile_mask_x, tile_mask_y;

   brw_get_depthstencil_tile_masks(params->depth.mt,
                                   params->depth.level,
                                   params->depth.layer,
                                   NULL,
                                   &tile_mask_x, &tile_mask_y);

   /* 3DSTATE_DEPTH_BUFFER */
   {
      uint32_t tile_x = draw_x & tile_mask_x;
      uint32_t tile_y = draw_y & tile_mask_y;
      uint32_t offset =
         intel_region_get_aligned_offset(params->depth.mt->region,
                                         draw_x & ~tile_mask_x,
                                         draw_y & ~tile_mask_y, false);

      /* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327
       * (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth
       * Coordinate Offset X/Y":
       *
       *   "The 3 LSBs of both offsets must be zero to ensure correct
       *   alignment"
       *
       * We have no guarantee that tile_x and tile_y are correctly aligned,
       * since they are determined by the mipmap layout, which is only aligned
       * to multiples of 4.
       *
       * So, to avoid hanging the GPU, just smash the low order 3 bits of
       * tile_x and tile_y to 0.  This is a temporary workaround until we come
       * up with a better solution.
       */
      WARN_ONCE((tile_x & 7) || (tile_y & 7),
                "Depth/stencil buffer needs alignment to 8-pixel boundaries.\n"
                "Truncating offset, bad rendering may occur.\n");
      tile_x &= ~7;
      tile_y &= ~7;

      intel_emit_post_sync_nonzero_flush(brw);
      intel_emit_depth_stall_flushes(brw);

      BEGIN_BATCH(7);
      OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2));
      OUT_BATCH((params->depth.mt->region->pitch - 1) |
                params->depth_format << 18 |
                1 << 21 | /* separate stencil enable */
                1 << 22 | /* hiz enable */
                BRW_TILEWALK_YMAJOR << 26 |
                1 << 27 | /* y-tiled */
                BRW_SURFACE_2D << 29);
      OUT_RELOC(params->depth.mt->region->bo,
                I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
                offset);
      OUT_BATCH(BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1 |
                (params->depth.width + tile_x - 1) << 6 |
                (params->depth.height + tile_y - 1) << 19);
      OUT_BATCH(0);
      OUT_BATCH(tile_x |
                tile_y << 16);
      OUT_BATCH(0);
      ADVANCE_BATCH();
   }

   /* 3DSTATE_HIER_DEPTH_BUFFER */
   {
      struct intel_region *hiz_region = params->depth.mt->hiz_mt->region;
      uint32_t hiz_offset =
         intel_region_get_aligned_offset(hiz_region,
                                         draw_x & ~tile_mask_x,
                                         (draw_y & ~tile_mask_y) / 2, false);

      BEGIN_BATCH(3);
      OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2));
      OUT_BATCH(hiz_region->pitch - 1);
      OUT_RELOC(hiz_region->bo,
                I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
                hiz_offset);
      ADVANCE_BATCH();
   }

   /* 3DSTATE_STENCIL_BUFFER */
   {
      BEGIN_BATCH(3);
      OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2));
      OUT_BATCH(0);
      OUT_BATCH(0);
      ADVANCE_BATCH();
   }
}
示例#14
0
/**
 * Enable or disable thread dispatch and set the HiZ op appropriately.
 */
static void
gen6_blorp_emit_wm_config(struct brw_context *brw,
                          const brw_blorp_params *params,
                          uint32_t prog_offset,
                          brw_blorp_prog_data *prog_data)
{
   uint32_t dw2, dw4, dw5, dw6;

   /* Even when thread dispatch is disabled, max threads (dw5.25:31) must be
    * nonzero to prevent the GPU from hanging.  While the documentation doesn't
    * mention this explicitly, it notes that the valid range for the field is
    * [1,39] = [2,40] threads, which excludes zero.
    *
    * To be safe (and to minimize extraneous code) we go ahead and fully
    * configure the WM state whether or not there is a WM program.
    */

   dw2 = dw4 = dw5 = dw6 = 0;
   switch (params->hiz_op) {
   case GEN6_HIZ_OP_DEPTH_CLEAR:
      dw4 |= GEN6_WM_DEPTH_CLEAR;
      break;
   case GEN6_HIZ_OP_DEPTH_RESOLVE:
      dw4 |= GEN6_WM_DEPTH_RESOLVE;
      break;
   case GEN6_HIZ_OP_HIZ_RESOLVE:
      dw4 |= GEN6_WM_HIERARCHICAL_DEPTH_RESOLVE;
      break;
   case GEN6_HIZ_OP_NONE:
      break;
   default:
      assert(0);
      break;
   }
   dw5 |= GEN6_WM_LINE_AA_WIDTH_1_0;
   dw5 |= GEN6_WM_LINE_END_CAP_AA_WIDTH_0_5;
   dw5 |= (brw->max_wm_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT;
   dw6 |= 0 << GEN6_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT; /* No interp */
   dw6 |= 0 << GEN6_WM_NUM_SF_OUTPUTS_SHIFT; /* No inputs from SF */
   if (params->use_wm_prog) {
      dw2 |= 1 << GEN6_WM_SAMPLER_COUNT_SHIFT; /* Up to 4 samplers */
      dw4 |= prog_data->first_curbe_grf << GEN6_WM_DISPATCH_START_GRF_SHIFT_0;
      dw5 |= GEN6_WM_16_DISPATCH_ENABLE;
      dw5 |= GEN6_WM_KILL_ENABLE; /* TODO: temporarily smash on */
      dw5 |= GEN6_WM_DISPATCH_ENABLE; /* We are rendering */
   }

   if (params->num_samples > 1) {
      dw6 |= GEN6_WM_MSRAST_ON_PATTERN;
      if (prog_data && prog_data->persample_msaa_dispatch)
         dw6 |= GEN6_WM_MSDISPMODE_PERSAMPLE;
      else
         dw6 |= GEN6_WM_MSDISPMODE_PERPIXEL;
   } else {
      dw6 |= GEN6_WM_MSRAST_OFF_PIXEL;
      dw6 |= GEN6_WM_MSDISPMODE_PERSAMPLE;
   }

   BEGIN_BATCH(9);
   OUT_BATCH(_3DSTATE_WM << 16 | (9 - 2));
   OUT_BATCH(params->use_wm_prog ? prog_offset : 0);
   OUT_BATCH(dw2);
   OUT_BATCH(0); /* No scratch needed */
   OUT_BATCH(dw4);
   OUT_BATCH(dw5);
   OUT_BATCH(dw6);
   OUT_BATCH(0); /* No other programs */
   OUT_BATCH(0); /* No other programs */
   ADVANCE_BATCH();
}
示例#15
0
static void
gen7_upload_ps_state(struct brw_context *brw,
                     const struct brw_stage_state *stage_state,
                     const struct brw_wm_prog_data *prog_data,
                     bool enable_dual_src_blend, unsigned sample_mask,
                     unsigned fast_clear_op)
{
   const struct gen_device_info *devinfo = &brw->screen->devinfo;
   uint32_t dw2, dw4, dw5, ksp0, ksp2;
   const int max_threads_shift = brw->is_haswell ?
      HSW_PS_MAX_THREADS_SHIFT : IVB_PS_MAX_THREADS_SHIFT;

   dw2 = dw4 = dw5 = ksp2 = 0;

   const unsigned sampler_count =
      DIV_ROUND_UP(CLAMP(stage_state->sampler_count, 0, 16), 4);
   dw2 |= SET_FIELD(sampler_count, GEN7_PS_SAMPLER_COUNT);

   dw2 |= ((prog_data->base.binding_table.size_bytes / 4) <<
           GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT);

   if (prog_data->base.use_alt_mode)
      dw2 |= GEN7_PS_FLOATING_POINT_MODE_ALT;

   /* Haswell requires the sample mask to be set in this packet as well as
    * in 3DSTATE_SAMPLE_MASK; the values should match. */
   /* _NEW_BUFFERS, _NEW_MULTISAMPLE */
   if (brw->is_haswell)
      dw4 |= SET_FIELD(sample_mask, HSW_PS_SAMPLE_MASK);

   dw4 |= (devinfo->max_wm_threads - 1) << max_threads_shift;

   if (prog_data->base.nr_params > 0)
      dw4 |= GEN7_PS_PUSH_CONSTANT_ENABLE;

   /* From the IVB PRM, volume 2 part 1, page 287:
    * "This bit is inserted in the PS payload header and made available to
    * the DataPort (either via the message header or via header bypass) to
    * indicate that oMask data (one or two phases) is included in Render
    * Target Write messages. If present, the oMask data is used to mask off
    * samples."
    */
   if (prog_data->uses_omask)
      dw4 |= GEN7_PS_OMASK_TO_RENDER_TARGET;

   /* From the IVB PRM, volume 2 part 1, page 287:
    * "If the PS kernel does not need the Position XY Offsets to
    * compute a Position Value, then this field should be programmed
    * to POSOFFSET_NONE."
    * "SW Recommendation: If the PS kernel needs the Position Offsets
    * to compute a Position XY value, this field should match Position
    * ZW Interpolation Mode to ensure a consistent position.xyzw
    * computation."
    * We only require XY sample offsets. So, this recommendation doesn't
    * look useful at the moment. We might need this in future.
    */
   if (prog_data->uses_pos_offset)
      dw4 |= GEN7_PS_POSOFFSET_SAMPLE;
   else
      dw4 |= GEN7_PS_POSOFFSET_NONE;

   /* The hardware wedges if you have this bit set but don't turn on any dual
    * source blend factors.
    */
   if (enable_dual_src_blend)
      dw4 |= GEN7_PS_DUAL_SOURCE_BLEND_ENABLE;

   /* BRW_NEW_FS_PROG_DATA */
   if (prog_data->num_varying_inputs != 0)
      dw4 |= GEN7_PS_ATTRIBUTE_ENABLE;

   dw4 |= fast_clear_op;

   if (prog_data->dispatch_16)
      dw4 |= GEN7_PS_16_DISPATCH_ENABLE;

   if (prog_data->dispatch_8)
      dw4 |= GEN7_PS_8_DISPATCH_ENABLE;

   dw5 |= prog_data->base.dispatch_grf_start_reg <<
          GEN7_PS_DISPATCH_START_GRF_SHIFT_0;
   dw5 |= prog_data->dispatch_grf_start_reg_2 <<
          GEN7_PS_DISPATCH_START_GRF_SHIFT_2;

   ksp0 = stage_state->prog_offset;
   ksp2 = stage_state->prog_offset + prog_data->prog_offset_2;

   BEGIN_BATCH(8);
   OUT_BATCH(_3DSTATE_PS << 16 | (8 - 2));
   OUT_BATCH(ksp0);
   OUT_BATCH(dw2);
   if (prog_data->base.total_scratch) {
      OUT_RELOC(brw->wm.base.scratch_bo,
		I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
		ffs(stage_state->per_thread_scratch) - 11);
   } else {
      OUT_BATCH(0);
   }
   OUT_BATCH(dw4);
   OUT_BATCH(dw5);
   OUT_BATCH(0); /* kernel 1 pointer */
   OUT_BATCH(ksp2);
   ADVANCE_BATCH();
}
示例#16
0
static void
upload_sf_state(struct brw_context *brw)
{
   struct intel_context *intel = &brw->intel;
   struct gl_context *ctx = &intel->ctx;
   uint32_t urb_entry_read_length;
   /* BRW_NEW_FRAGMENT_PROGRAM */
   uint32_t num_outputs = _mesa_bitcount_64(brw->fragment_program->Base.InputsRead);
   /* _NEW_LIGHT */
   bool shade_model_flat = ctx->Light.ShadeModel == GL_FLAT;
   uint32_t dw1, dw2, dw3, dw4, dw16, dw17;
   int i;
   /* _NEW_BUFFER */
   bool render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0;
   int attr = 0, input_index = 0;
   int urb_entry_read_offset = 1;
   float point_size;
   uint16_t attr_overrides[FRAG_ATTRIB_MAX];
   uint32_t point_sprite_origin;

   /* CACHE_NEW_VS_PROG */
   urb_entry_read_length = ((brw->vs.prog_data->vue_map.num_slots + 1) / 2 -
			    urb_entry_read_offset);
   if (urb_entry_read_length == 0) {
      /* Setting the URB entry read length to 0 causes undefined behavior, so
       * if we have no URB data to read, set it to 1.
       */
      urb_entry_read_length = 1;
   }

   dw1 =
      GEN6_SF_SWIZZLE_ENABLE |
      num_outputs << GEN6_SF_NUM_OUTPUTS_SHIFT |
      urb_entry_read_length << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT |
      urb_entry_read_offset << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT;

   dw2 = GEN6_SF_STATISTICS_ENABLE |
         GEN6_SF_VIEWPORT_TRANSFORM_ENABLE;

   dw3 = 0;
   dw4 = 0;
   dw16 = 0;
   dw17 = 0;

   /* _NEW_POLYGON */
   if ((ctx->Polygon.FrontFace == GL_CCW) ^ render_to_fbo)
      dw2 |= GEN6_SF_WINDING_CCW;

   if (ctx->Polygon.OffsetFill)
       dw2 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID;

   if (ctx->Polygon.OffsetLine)
       dw2 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME;

   if (ctx->Polygon.OffsetPoint)
       dw2 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT;

   switch (ctx->Polygon.FrontMode) {
   case GL_FILL:
       dw2 |= GEN6_SF_FRONT_SOLID;
       break;

   case GL_LINE:
       dw2 |= GEN6_SF_FRONT_WIREFRAME;
       break;

   case GL_POINT:
       dw2 |= GEN6_SF_FRONT_POINT;
       break;

   default:
       assert(0);
       break;
   }

   switch (ctx->Polygon.BackMode) {
   case GL_FILL:
       dw2 |= GEN6_SF_BACK_SOLID;
       break;

   case GL_LINE:
       dw2 |= GEN6_SF_BACK_WIREFRAME;
       break;

   case GL_POINT:
       dw2 |= GEN6_SF_BACK_POINT;
       break;

   default:
       assert(0);
       break;
   }

   /* _NEW_SCISSOR */
   if (ctx->Scissor.Enabled)
      dw3 |= GEN6_SF_SCISSOR_ENABLE;

   /* _NEW_POLYGON */
   if (ctx->Polygon.CullFlag) {
      switch (ctx->Polygon.CullFaceMode) {
      case GL_FRONT:
	 dw3 |= GEN6_SF_CULL_FRONT;
	 break;
      case GL_BACK:
	 dw3 |= GEN6_SF_CULL_BACK;
	 break;
      case GL_FRONT_AND_BACK:
	 dw3 |= GEN6_SF_CULL_BOTH;
	 break;
      default:
	 assert(0);
	 break;
      }
   } else {
      dw3 |= GEN6_SF_CULL_NONE;
   }

   /* _NEW_LINE */
   dw3 |= U_FIXED(CLAMP(ctx->Line.Width, 0.0, 7.99), 7) <<
      GEN6_SF_LINE_WIDTH_SHIFT;
   if (ctx->Line.SmoothFlag) {
      dw3 |= GEN6_SF_LINE_AA_ENABLE;
      dw3 |= GEN6_SF_LINE_AA_MODE_TRUE;
      dw3 |= GEN6_SF_LINE_END_CAP_WIDTH_1_0;
   }

   /* _NEW_PROGRAM | _NEW_POINT */
   if (!(ctx->VertexProgram.PointSizeEnabled ||
	 ctx->Point._Attenuated))
      dw4 |= GEN6_SF_USE_STATE_POINT_WIDTH;

   /* Clamp to ARB_point_parameters user limits */
   point_size = CLAMP(ctx->Point.Size, ctx->Point.MinSize, ctx->Point.MaxSize);

   /* Clamp to the hardware limits and convert to fixed point */
   dw4 |= U_FIXED(CLAMP(point_size, 0.125, 255.875), 3);

   /*
    * Window coordinates in an FBO are inverted, which means point
    * sprite origin must be inverted, too.
    */
   if ((ctx->Point.SpriteOrigin == GL_LOWER_LEFT) != render_to_fbo) {
      point_sprite_origin = GEN6_SF_POINT_SPRITE_LOWERLEFT;
   } else {
      point_sprite_origin = GEN6_SF_POINT_SPRITE_UPPERLEFT;
   }
   dw1 |= point_sprite_origin;

   /* _NEW_LIGHT */
   if (ctx->Light.ProvokingVertex != GL_FIRST_VERTEX_CONVENTION) {
      dw4 |=
	 (2 << GEN6_SF_TRI_PROVOKE_SHIFT) |
	 (2 << GEN6_SF_TRIFAN_PROVOKE_SHIFT) |
	 (1 << GEN6_SF_LINE_PROVOKE_SHIFT);
   } else {
      dw4 |=
	 (1 << GEN6_SF_TRIFAN_PROVOKE_SHIFT);
   }

   /* Create the mapping from the FS inputs we produce to the VS outputs
    * they source from.
    */
   for (; attr < FRAG_ATTRIB_MAX; attr++) {
      enum glsl_interp_qualifier interp_qualifier =
         brw->fragment_program->InterpQualifier[attr];
      bool is_gl_Color = attr == FRAG_ATTRIB_COL0 || attr == FRAG_ATTRIB_COL1;

      if (!(brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(attr)))
	 continue;

      /* _NEW_POINT */
      if (ctx->Point.PointSprite &&
	  (attr >= FRAG_ATTRIB_TEX0 && attr <= FRAG_ATTRIB_TEX7) &&
	  ctx->Point.CoordReplace[attr - FRAG_ATTRIB_TEX0]) {
	 dw16 |= (1 << input_index);
      }

      if (attr == FRAG_ATTRIB_PNTC)
	 dw16 |= (1 << input_index);

      /* flat shading */
      if (interp_qualifier == INTERP_QUALIFIER_FLAT ||
          (shade_model_flat && is_gl_Color &&
           interp_qualifier == INTERP_QUALIFIER_NONE))
         dw17 |= (1 << input_index);

      /* The hardware can only do the overrides on 16 overrides at a
       * time, and the other up to 16 have to be lined up so that the
       * input index = the output index.  We'll need to do some
       * tweaking to make sure that's the case.
       */
      assert(input_index < 16 || attr == input_index);

      /* CACHE_NEW_VS_PROG | _NEW_LIGHT | _NEW_PROGRAM */
      attr_overrides[input_index++] =
         get_attr_override(&brw->vs.prog_data->vue_map,
			   urb_entry_read_offset, attr,
                           ctx->VertexProgram._TwoSideEnabled);
   }

   for (; input_index < FRAG_ATTRIB_MAX; input_index++)
      attr_overrides[input_index] = 0;

   BEGIN_BATCH(20);
   OUT_BATCH(_3DSTATE_SF << 16 | (20 - 2));
   OUT_BATCH(dw1);
   OUT_BATCH(dw2);
   OUT_BATCH(dw3);
   OUT_BATCH(dw4);
   OUT_BATCH_F(ctx->Polygon.OffsetUnits * 2); /* constant.  copied from gen4 */
   OUT_BATCH_F(ctx->Polygon.OffsetFactor); /* scale */
   OUT_BATCH_F(0.0); /* XXX: global depth offset clamp */
   for (i = 0; i < 8; i++) {
      OUT_BATCH(attr_overrides[i * 2] | attr_overrides[i * 2 + 1] << 16);
   }
   OUT_BATCH(dw16); /* point sprite texcoord bitmask */
   OUT_BATCH(dw17); /* constant interp bitmask */
   OUT_BATCH(0); /* wrapshortest enables 0-7 */
   OUT_BATCH(0); /* wrapshortest enables 8-15 */
   ADVANCE_BATCH();
}
示例#17
0
static void
upload_wm_state(struct brw_context *brw)
{
   struct gl_context *ctx = &brw->ctx;
   /* BRW_NEW_FS_PROG_DATA */
   const struct brw_wm_prog_data *prog_data =
      brw_wm_prog_data(brw->wm.base.prog_data);
   bool writes_depth = prog_data->computed_depth_mode != BRW_PSCDEPTH_OFF;
   uint32_t dw1, dw2;

   /* _NEW_BUFFERS */
   const bool multisampled_fbo = _mesa_geometric_samples(ctx->DrawBuffer) > 1;

   dw1 = dw2 = 0;
   dw1 |= GEN7_WM_STATISTICS_ENABLE;
   dw1 |= GEN7_WM_LINE_AA_WIDTH_1_0;
   dw1 |= GEN7_WM_LINE_END_CAP_AA_WIDTH_0_5;

   /* _NEW_LINE */
   if (ctx->Line.StippleFlag)
      dw1 |= GEN7_WM_LINE_STIPPLE_ENABLE;

   /* _NEW_POLYGON */
   if (ctx->Polygon.StippleFlag)
      dw1 |= GEN7_WM_POLYGON_STIPPLE_ENABLE;

   if (prog_data->uses_src_depth)
      dw1 |= GEN7_WM_USES_SOURCE_DEPTH;

   if (prog_data->uses_src_w)
      dw1 |= GEN7_WM_USES_SOURCE_W;

   dw1 |= prog_data->computed_depth_mode << GEN7_WM_COMPUTED_DEPTH_MODE_SHIFT;
   dw1 |= prog_data->barycentric_interp_modes <<
      GEN7_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT;

   /* _NEW_COLOR, _NEW_MULTISAMPLE _NEW_BUFFERS */
   /* Enable if the pixel shader kernel generates and outputs oMask.
    */
   if (prog_data->uses_kill ||
       _mesa_is_alpha_test_enabled(ctx) ||
       _mesa_is_alpha_to_coverage_enabled(ctx) ||
       prog_data->uses_omask) {
      dw1 |= GEN7_WM_KILL_ENABLE;
   }

   /* _NEW_BUFFERS | _NEW_COLOR */
   if (brw_color_buffer_write_enabled(brw) || writes_depth ||
       prog_data->has_side_effects || dw1 & GEN7_WM_KILL_ENABLE) {
      dw1 |= GEN7_WM_DISPATCH_ENABLE;
   }
   if (multisampled_fbo) {
      /* _NEW_MULTISAMPLE */
      if (ctx->Multisample.Enabled)
         dw1 |= GEN7_WM_MSRAST_ON_PATTERN;
      else
         dw1 |= GEN7_WM_MSRAST_OFF_PIXEL;

      if (prog_data->persample_dispatch)
         dw2 |= GEN7_WM_MSDISPMODE_PERSAMPLE;
      else
         dw2 |= GEN7_WM_MSDISPMODE_PERPIXEL;
   } else {
      dw1 |= GEN7_WM_MSRAST_OFF_PIXEL;
      dw2 |= GEN7_WM_MSDISPMODE_PERSAMPLE;
   }

   if (prog_data->uses_sample_mask) {
      dw1 |= GEN7_WM_USES_INPUT_COVERAGE_MASK;
   }

   /* BRW_NEW_FS_PROG_DATA */
   if (prog_data->early_fragment_tests)
      dw1 |= GEN7_WM_EARLY_DS_CONTROL_PREPS;
   else if (prog_data->has_side_effects)
      dw1 |= GEN7_WM_EARLY_DS_CONTROL_PSEXEC;

   /* The "UAV access enable" bits are unnecessary on HSW because they only
    * seem to have an effect on the HW-assisted coherency mechanism which we
    * don't need, and the rasterization-related UAV_ONLY flag and the
    * DISPATCH_ENABLE bit can be set independently from it.
    * C.f. gen8_upload_ps_extra().
    *
    * BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_FS_PROG_DATA | _NEW_BUFFERS | _NEW_COLOR
    */
   if (brw->is_haswell &&
       !(brw_color_buffer_write_enabled(brw) || writes_depth) &&
       prog_data->has_side_effects)
      dw2 |= HSW_WM_UAV_ONLY;

   BEGIN_BATCH(3);
   OUT_BATCH(_3DSTATE_WM << 16 | (3 - 2));
   OUT_BATCH(dw1);
   OUT_BATCH(dw2);
   ADVANCE_BATCH();
}
示例#18
0
static void
gen8_upload_gs_state(struct brw_context *brw)
{
   struct gl_context *ctx = &brw->ctx;
   const struct brw_stage_state *stage_state = &brw->gs.base;
   /* BRW_NEW_GEOMETRY_PROGRAM */
   bool active = brw->geometry_program;
   /* BRW_NEW_GS_PROG_DATA */
   const struct brw_vue_prog_data *prog_data = &brw->gs.prog_data->base;

   if (active) {
      int urb_entry_write_offset = 1;
      uint32_t urb_entry_output_length =
         ((prog_data->vue_map.num_slots + 1) / 2 - urb_entry_write_offset);

      if (urb_entry_output_length == 0)
         urb_entry_output_length = 1;

      BEGIN_BATCH(10);
      OUT_BATCH(_3DSTATE_GS << 16 | (10 - 2));
      OUT_BATCH(stage_state->prog_offset);
      OUT_BATCH(0);
      OUT_BATCH(GEN6_GS_VECTOR_MASK_ENABLE |
                brw->geometry_program->VerticesIn |
                ((ALIGN(stage_state->sampler_count, 4)/4) <<
                 GEN6_GS_SAMPLER_COUNT_SHIFT) |
                ((prog_data->base.binding_table.size_bytes / 4) <<
                 GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT));

      if (brw->gs.prog_data->base.base.total_scratch) {
         OUT_RELOC64(stage_state->scratch_bo,
                     I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
                     ffs(brw->gs.prog_data->base.base.total_scratch) - 11);
         WARN_ONCE(true,
                   "May need to implement a temporary workaround: GS Number of "
                   "URB Entries must be less than or equal to the GS Maximum "
                   "Number of Threads.\n");
      } else {
         OUT_BATCH(0);
         OUT_BATCH(0);
      }

      /* DW6 */
      OUT_BATCH(((brw->gs.prog_data->output_vertex_size_hwords * 2 - 1) <<
                 GEN7_GS_OUTPUT_VERTEX_SIZE_SHIFT) |
                (brw->gs.prog_data->output_topology <<
                 GEN7_GS_OUTPUT_TOPOLOGY_SHIFT) |
                (prog_data->urb_read_length <<
                 GEN6_GS_URB_READ_LENGTH_SHIFT) |
                (0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT) |
                (prog_data->base.dispatch_grf_start_reg <<
                 GEN6_GS_DISPATCH_START_GRF_SHIFT));

      uint32_t dw7 = (brw->gs.prog_data->control_data_header_size_hwords <<
                      GEN7_GS_CONTROL_DATA_HEADER_SIZE_SHIFT) |
                      brw->gs.prog_data->dispatch_mode |
                     ((brw->gs.prog_data->invocations - 1) <<
                      GEN7_GS_INSTANCE_CONTROL_SHIFT) |
                      GEN6_GS_STATISTICS_ENABLE |
                      (brw->gs.prog_data->include_primitive_id ?
                       GEN7_GS_INCLUDE_PRIMITIVE_ID : 0) |
                      GEN7_GS_REORDER_TRAILING |
                      GEN7_GS_ENABLE;
      uint32_t dw8 = brw->gs.prog_data->control_data_format <<
                     HSW_GS_CONTROL_DATA_FORMAT_SHIFT;

      if (brw->gen < 9)
         dw7 |= (brw->max_gs_threads / 2 - 1) << HSW_GS_MAX_THREADS_SHIFT;
      else
         dw8 |= brw->max_gs_threads - 1;

      /* DW7 */
      OUT_BATCH(dw7);

      /* DW8 */
      OUT_BATCH(dw8);

      /* DW9 / _NEW_TRANSFORM */
      OUT_BATCH((ctx->Transform.ClipPlanesEnabled <<
                 GEN8_GS_USER_CLIP_DISTANCE_SHIFT) |
                (urb_entry_output_length << GEN8_GS_URB_OUTPUT_LENGTH_SHIFT) |
                (urb_entry_write_offset <<
                 GEN8_GS_URB_ENTRY_OUTPUT_OFFSET_SHIFT));
      ADVANCE_BATCH();
   } else {
      BEGIN_BATCH(10);
      OUT_BATCH(_3DSTATE_GS << 16 | (10 - 2));
      OUT_BATCH(0); /* prog_bo */
      OUT_BATCH(0);
      OUT_BATCH(0);
      OUT_BATCH(0); /* scratch space base offset */
      OUT_BATCH(0);
      OUT_BATCH(0);
      OUT_BATCH(GEN6_GS_STATISTICS_ENABLE);
      OUT_BATCH(0);
      OUT_BATCH(0);
      ADVANCE_BATCH();
   }
}
示例#19
0
static void
upload_wm_state(struct brw_context *brw)
{
   struct intel_context *intel = &brw->intel;
   struct gl_context *ctx = &intel->ctx;
   const struct brw_fragment_program *fp =
      brw_fragment_program_const(brw->fragment_program);
   uint32_t dw2, dw4, dw5, dw6;

   /* _NEW_BUFFERS */
   bool multisampled_fbo = ctx->DrawBuffer->Visual.samples > 1;

    /* CACHE_NEW_WM_PROG */
   if (brw->wm.prog_data->nr_params == 0) {
      /* Disable the push constant buffers. */
      BEGIN_BATCH(5);
      OUT_BATCH(_3DSTATE_CONSTANT_PS << 16 | (5 - 2));
      OUT_BATCH(0);
      OUT_BATCH(0);
      OUT_BATCH(0);
      OUT_BATCH(0);
      ADVANCE_BATCH();
   } else {
      BEGIN_BATCH(5);
      OUT_BATCH(_3DSTATE_CONSTANT_PS << 16 |
		GEN6_CONSTANT_BUFFER_0_ENABLE |
		(5 - 2));
      /* Pointer to the WM constant buffer.  Covered by the set of
       * state flags from gen6_upload_wm_push_constants.
       */
      OUT_BATCH(brw->wm.push_const_offset +
		ALIGN(brw->wm.prog_data->nr_params,
		      brw->wm.prog_data->dispatch_width) / 8 - 1);
      OUT_BATCH(0);
      OUT_BATCH(0);
      OUT_BATCH(0);
      ADVANCE_BATCH();
   }

   dw2 = dw4 = dw5 = dw6 = 0;
   dw4 |= GEN6_WM_STATISTICS_ENABLE;
   dw5 |= GEN6_WM_LINE_AA_WIDTH_1_0;
   dw5 |= GEN6_WM_LINE_END_CAP_AA_WIDTH_0_5;

   /* Use ALT floating point mode for ARB fragment programs, because they
    * require 0^0 == 1.  Even though _CurrentFragmentProgram is used for
    * rendering, CurrentFragmentProgram is used for this check to
    * differentiate between the GLSL and non-GLSL cases.
    */
   if (ctx->Shader.CurrentFragmentProgram == NULL)
      dw2 |= GEN6_WM_FLOATING_POINT_MODE_ALT;

   /* CACHE_NEW_SAMPLER */
   dw2 |= (ALIGN(brw->sampler.count, 4) / 4) << GEN6_WM_SAMPLER_COUNT_SHIFT;
   dw4 |= (brw->wm.prog_data->first_curbe_grf <<
	   GEN6_WM_DISPATCH_START_GRF_SHIFT_0);
   dw4 |= (brw->wm.prog_data->first_curbe_grf_16 <<
	   GEN6_WM_DISPATCH_START_GRF_SHIFT_2);

   dw5 |= (brw->max_wm_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT;

   /* CACHE_NEW_WM_PROG */
   if (brw->wm.prog_data->dispatch_width == 8) {
      dw5 |= GEN6_WM_8_DISPATCH_ENABLE;
      if (brw->wm.prog_data->prog_offset_16)
	 dw5 |= GEN6_WM_16_DISPATCH_ENABLE;
   } else {
      dw5 |= GEN6_WM_16_DISPATCH_ENABLE;
   }

   /* CACHE_NEW_WM_PROG | _NEW_COLOR */
   if (brw->wm.prog_data->dual_src_blend &&
       (ctx->Color.BlendEnabled & 1) &&
       ctx->Color.Blend[0]._UsesDualSrc) {
      dw5 |= GEN6_WM_DUAL_SOURCE_BLEND_ENABLE;
   }

   /* _NEW_LINE */
   if (ctx->Line.StippleFlag)
      dw5 |= GEN6_WM_LINE_STIPPLE_ENABLE;

   /* _NEW_POLYGON */
   if (ctx->Polygon.StippleFlag)
      dw5 |= GEN6_WM_POLYGON_STIPPLE_ENABLE;

   /* BRW_NEW_FRAGMENT_PROGRAM */
   if (fp->program.Base.InputsRead & FRAG_BIT_WPOS)
      dw5 |= GEN6_WM_USES_SOURCE_DEPTH | GEN6_WM_USES_SOURCE_W;
   if (fp->program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH))
      dw5 |= GEN6_WM_COMPUTED_DEPTH;
   /* CACHE_NEW_WM_PROG */
   dw6 |= brw->wm.prog_data->barycentric_interp_modes <<
      GEN6_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT;

   /* _NEW_COLOR, _NEW_MULTISAMPLE */
   if (fp->program.UsesKill || ctx->Color.AlphaEnabled ||
       ctx->Multisample.SampleAlphaToCoverage)
      dw5 |= GEN6_WM_KILL_ENABLE;

   if (brw_color_buffer_write_enabled(brw) ||
       dw5 & (GEN6_WM_KILL_ENABLE | GEN6_WM_COMPUTED_DEPTH)) {
      dw5 |= GEN6_WM_DISPATCH_ENABLE;
   }

   dw6 |= _mesa_bitcount_64(brw->fragment_program->Base.InputsRead) <<
      GEN6_WM_NUM_SF_OUTPUTS_SHIFT;
   if (multisampled_fbo) {
      /* _NEW_MULTISAMPLE */
      if (ctx->Multisample.Enabled)
         dw6 |= GEN6_WM_MSRAST_ON_PATTERN;
      else
         dw6 |= GEN6_WM_MSRAST_OFF_PIXEL;
      dw6 |= GEN6_WM_MSDISPMODE_PERPIXEL;
   } else {
      dw6 |= GEN6_WM_MSRAST_OFF_PIXEL;
      dw6 |= GEN6_WM_MSDISPMODE_PERSAMPLE;
   }

   BEGIN_BATCH(9);
   OUT_BATCH(_3DSTATE_WM << 16 | (9 - 2));
   OUT_BATCH(brw->wm.prog_offset);
   OUT_BATCH(dw2);
   if (brw->wm.prog_data->total_scratch) {
      OUT_RELOC(brw->wm.scratch_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
		ffs(brw->wm.prog_data->total_scratch) - 11);
   } else {
      OUT_BATCH(0);
   }
   OUT_BATCH(dw4);
   OUT_BATCH(dw5);
   OUT_BATCH(dw6);
   OUT_BATCH(0); /* kernel 1 pointer */
   /* kernel 2 pointer */
   OUT_BATCH(brw->wm.prog_offset + brw->wm.prog_data->prog_offset_16);
   ADVANCE_BATCH();
}
示例#20
0
static void emit_depthbuffer(struct brw_context *brw)
{
   struct intel_context *intel = &brw->intel;
   struct gl_context *ctx = &intel->ctx;
   struct gl_framebuffer *fb = ctx->DrawBuffer;
   /* _NEW_BUFFERS */
   struct intel_renderbuffer *depth_irb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
   struct intel_renderbuffer *stencil_irb = intel_get_renderbuffer(fb, BUFFER_STENCIL);
   struct intel_mipmap_tree *depth_mt = brw->depthstencil.depth_mt;
   struct intel_mipmap_tree *stencil_mt = brw->depthstencil.stencil_mt;
   struct intel_mipmap_tree *hiz_mt = brw->depthstencil.hiz_mt;
   uint32_t tile_x = brw->depthstencil.tile_x;
   uint32_t tile_y = brw->depthstencil.tile_y;
   unsigned int len;
   bool separate_stencil = false;

   if (stencil_mt && stencil_mt->format == MESA_FORMAT_S8)
      separate_stencil = true;

   /* 3DSTATE_DEPTH_BUFFER, 3DSTATE_STENCIL_BUFFER are both
    * non-pipelined state that will need the PIPE_CONTROL workaround.
    */
   if (intel->gen == 6) {
      intel_emit_post_sync_nonzero_flush(intel);
      intel_emit_depth_stall_flushes(intel);
   }

   /* If there's a packed depth/stencil bound to stencil only, we need to
    * emit the packed depth/stencil buffer packet.
    */
   if (!depth_irb && stencil_irb && !separate_stencil) {
      depth_irb = stencil_irb;
      depth_mt = stencil_mt;
   }

   if (intel->gen >= 6)
      len = 7;
   else if (intel->is_g4x || intel->gen == 5)
      len = 6;
   else
      len = 5;

   if (!depth_irb && !separate_stencil) {
      BEGIN_BATCH(len);
      OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (len - 2));
      OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT << 18) |
		(BRW_SURFACE_NULL << 29));
      OUT_BATCH(0);
      OUT_BATCH(0);
      OUT_BATCH(0);

      if (intel->is_g4x || intel->gen >= 5)
         OUT_BATCH(0);

      if (intel->gen >= 6)
	 OUT_BATCH(0);

      ADVANCE_BATCH();

   } else if (!depth_irb && separate_stencil) {
      /*
       * There exists a separate stencil buffer but no depth buffer.
       *
       * The stencil buffer inherits most of its fields from
       * 3DSTATE_DEPTH_BUFFER: namely the tile walk, surface type, width, and
       * height.
       *
       * Enable the hiz bit because it and the separate stencil bit must have
       * the same value. From Section 2.11.5.6.1.1 3DSTATE_DEPTH_BUFFER, Bit
       * 1.21 "Separate Stencil Enable":
       *     [DevIL]: If this field is enabled, Hierarchical Depth Buffer
       *     Enable must also be enabled.
       *
       *     [DevGT]: This field must be set to the same value (enabled or
       *     disabled) as Hierarchical Depth Buffer Enable
       *
       * The tiled bit must be set. From the Sandybridge PRM, Volume 2, Part 1,
       * Section 7.5.5.1.1 3DSTATE_DEPTH_BUFFER, Bit 1.27 Tiled Surface:
       *     [DevGT+]: This field must be set to TRUE.
       */
      assert(intel->has_separate_stencil);

      BEGIN_BATCH(len);
      OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (len - 2));
      OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT << 18) |
	        (1 << 21) | /* separate stencil enable */
	        (1 << 22) | /* hiz enable */
	        (BRW_TILEWALK_YMAJOR << 26) |
	        (1 << 27) | /* tiled surface */
	        (BRW_SURFACE_2D << 29));
      OUT_BATCH(0);
      OUT_BATCH(((stencil_irb->Base.Base.Width + tile_x - 1) << 6) |
	         (stencil_irb->Base.Base.Height + tile_y - 1) << 19);
      OUT_BATCH(0);

      if (intel->is_g4x || intel->gen >= 5)
         OUT_BATCH(tile_x | (tile_y << 16));
      else
	 assert(tile_x == 0 && tile_y == 0);

      if (intel->gen >= 6)
	 OUT_BATCH(0);

      ADVANCE_BATCH();

   } else {
      struct intel_region *region = depth_mt->region;

      /* If using separate stencil, hiz must be enabled. */
      assert(!separate_stencil || hiz_mt);

      assert(intel->gen < 6 || region->tiling == I915_TILING_Y);
      assert(!hiz_mt || region->tiling == I915_TILING_Y);

      BEGIN_BATCH(len);
      OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (len - 2));
      OUT_BATCH((region->pitch - 1) |
		(brw_depthbuffer_format(brw) << 18) |
		((hiz_mt ? 1 : 0) << 21) | /* separate stencil enable */
		((hiz_mt ? 1 : 0) << 22) | /* hiz enable */
		(BRW_TILEWALK_YMAJOR << 26) |
		((region->tiling != I915_TILING_NONE) << 27) |
		(BRW_SURFACE_2D << 29));
      OUT_RELOC(region->bo,
		I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
		brw->depthstencil.depth_offset);
      OUT_BATCH((BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1) |
		(((depth_irb->Base.Base.Width + tile_x) - 1) << 6) |
		(((depth_irb->Base.Base.Height + tile_y) - 1) << 19));
      OUT_BATCH(0);

      if (intel->is_g4x || intel->gen >= 5)
         OUT_BATCH(tile_x | (tile_y << 16));
      else
	 assert(tile_x == 0 && tile_y == 0);

      if (intel->gen >= 6)
	 OUT_BATCH(0);

      ADVANCE_BATCH();
   }

   if (hiz_mt || separate_stencil) {
      /*
       * In the 3DSTATE_DEPTH_BUFFER batch emitted above, the 'separate
       * stencil enable' and 'hiz enable' bits were set. Therefore we must
       * emit 3DSTATE_HIER_DEPTH_BUFFER and 3DSTATE_STENCIL_BUFFER. Even if
       * there is no stencil buffer, 3DSTATE_STENCIL_BUFFER must be emitted;
       * failure to do so causes hangs on gen5 and a stall on gen6.
       */

      /* Emit hiz buffer. */
      if (hiz_mt) {
	 BEGIN_BATCH(3);
	 OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2));
	 OUT_BATCH(hiz_mt->region->pitch - 1);
	 OUT_RELOC(hiz_mt->region->bo,
		   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
		   brw->depthstencil.hiz_offset);
	 ADVANCE_BATCH();
      } else {
	 BEGIN_BATCH(3);
	 OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2));
	 OUT_BATCH(0);
	 OUT_BATCH(0);
	 ADVANCE_BATCH();
      }

      /* Emit stencil buffer. */
      if (separate_stencil) {
	 struct intel_region *region = stencil_mt->region;

	 BEGIN_BATCH(3);
	 OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2));
         /* The stencil buffer has quirky pitch requirements.  From Vol 2a,
          * 11.5.6.2.1 3DSTATE_STENCIL_BUFFER, field "Surface Pitch":
          *    The pitch must be set to 2x the value computed based on width, as
          *    the stencil buffer is stored with two rows interleaved.
          */
	 OUT_BATCH(2 * region->pitch - 1);
	 OUT_RELOC(region->bo,
		   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
		   brw->depthstencil.stencil_offset);
	 ADVANCE_BATCH();
      } else {
	 BEGIN_BATCH(3);
	 OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2));
	 OUT_BATCH(0);
	 OUT_BATCH(0);
	 ADVANCE_BATCH();
      }
   }

   /*
    * On Gen >= 6, emit clear params for safety. If using hiz, then clear
    * params must be emitted.
    *
    * From Section 2.11.5.6.4.1 3DSTATE_CLEAR_PARAMS:
    *     3DSTATE_CLEAR_PARAMS packet must follow the DEPTH_BUFFER_STATE packet
    *     when HiZ is enabled and the DEPTH_BUFFER_STATE changes.
    */
   if (intel->gen >= 6 || hiz_mt) {
      if (intel->gen == 6)
	 intel_emit_post_sync_nonzero_flush(intel);

      BEGIN_BATCH(2);
      OUT_BATCH(_3DSTATE_CLEAR_PARAMS << 16 |
		GEN5_DEPTH_CLEAR_VALID |
		(2 - 2));
      OUT_BATCH(depth_irb ? depth_irb->mt->depth_clear_value : 0);
      ADVANCE_BATCH();
   }
}
static void ctx_emit_cs(struct gl_context *ctx, struct radeon_state_atom *atom)
{
   r100ContextPtr r100 = R100_CONTEXT(ctx);
   BATCH_LOCALS(&r100->radeon);
   struct radeon_renderbuffer *rrb, *drb;
   uint32_t cbpitch = 0;
   uint32_t zbpitch = 0;
   uint32_t dwords = atom->check(ctx, atom);
   uint32_t depth_fmt;

   rrb = radeon_get_colorbuffer(&r100->radeon);
   if (!rrb || !rrb->bo) {
      fprintf(stderr, "no rrb\n");
      return;
   }

   atom->cmd[CTX_RB3D_CNTL] &= ~(0xf << 10);
   if (rrb->cpp == 4)
	atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_ARGB8888;
   else switch (rrb->base.Base.Format) {
   case MESA_FORMAT_RGB565:
	atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_RGB565;
	break;
   case MESA_FORMAT_ARGB4444:
	atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_ARGB4444;
	break;
   case MESA_FORMAT_ARGB1555:
	atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_ARGB1555;
	break;
   default:
	_mesa_problem(ctx, "unexpected format in ctx_emit_cs()");
   }

   cbpitch = (rrb->pitch / rrb->cpp);
   if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE)
       cbpitch |= R200_COLOR_TILE_ENABLE;
   if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE)
       cbpitch |= RADEON_COLOR_MICROTILE_ENABLE;

   drb = radeon_get_depthbuffer(&r100->radeon);
   if (drb) {
     zbpitch = (drb->pitch / drb->cpp);
     if (drb->cpp == 4)
        depth_fmt = RADEON_DEPTH_FORMAT_24BIT_INT_Z;
     else
        depth_fmt = RADEON_DEPTH_FORMAT_16BIT_INT_Z;
     atom->cmd[CTX_RB3D_ZSTENCILCNTL] &= ~RADEON_DEPTH_FORMAT_MASK;
     atom->cmd[CTX_RB3D_ZSTENCILCNTL] |= depth_fmt;
     
   }

   BEGIN_BATCH_NO_AUTOSTATE(dwords);

   /* In the CS case we need to split this up */
   OUT_BATCH(CP_PACKET0(packet[0].start, 3));
   OUT_BATCH_TABLE((atom->cmd + 1), 4);

   if (drb) {
     OUT_BATCH(CP_PACKET0(RADEON_RB3D_DEPTHOFFSET, 0));
     OUT_BATCH_RELOC(0, drb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);

     OUT_BATCH(CP_PACKET0(RADEON_RB3D_DEPTHPITCH, 0));
     OUT_BATCH(zbpitch);
   }

   OUT_BATCH(CP_PACKET0(RADEON_RB3D_ZSTENCILCNTL, 0));
   OUT_BATCH(atom->cmd[CTX_RB3D_ZSTENCILCNTL]);
   OUT_BATCH(CP_PACKET0(RADEON_PP_CNTL, 1));
   OUT_BATCH(atom->cmd[CTX_PP_CNTL]);
   OUT_BATCH(atom->cmd[CTX_RB3D_CNTL]);

   if (rrb) {
     OUT_BATCH(CP_PACKET0(RADEON_RB3D_COLOROFFSET, 0));
     OUT_BATCH_RELOC(rrb->draw_offset, rrb->bo, rrb->draw_offset, 0, RADEON_GEM_DOMAIN_VRAM, 0);

     OUT_BATCH(CP_PACKET0(RADEON_RB3D_COLORPITCH, 0));
     OUT_BATCH_RELOC(cbpitch, rrb->bo, cbpitch, 0, RADEON_GEM_DOMAIN_VRAM, 0);
   }

   // if (atom->cmd_size == CTX_STATE_SIZE_NEWDRM) {
   //   OUT_BATCH_TABLE((atom->cmd + 14), 4);
   // }

   END_BATCH();
   BEGIN_BATCH_NO_AUTOSTATE(4);
   OUT_BATCH(CP_PACKET0(RADEON_RE_TOP_LEFT, 0));
   OUT_BATCH(0);
   OUT_BATCH(CP_PACKET0(RADEON_RE_WIDTH_HEIGHT, 0));
   if (rrb) {
       OUT_BATCH(((rrb->base.Base.Width - 1) << RADEON_RE_WIDTH_SHIFT) |
                 ((rrb->base.Base.Height - 1) << RADEON_RE_HEIGHT_SHIFT));
   } else {
       OUT_BATCH(0);
   }
   END_BATCH();
}
示例#22
0
static void brw_emit_vertices(struct brw_context *brw)
{
   struct gl_context *ctx = &brw->ctx;
   GLuint i, nr_elements;

   brw_prepare_vertices(brw);

   brw_emit_query_begin(brw);

   nr_elements = brw->vb.nr_enabled + brw->vs.prog_data->uses_vertexid;

   /* If the VS doesn't read any inputs (calculating vertex position from
    * a state variable for some reason, for example), emit a single pad
    * VERTEX_ELEMENT struct and bail.
    *
    * The stale VB state stays in place, but they don't do anything unless
    * a VE loads from them.
    */
   if (nr_elements == 0) {
      BEGIN_BATCH(3);
      OUT_BATCH((_3DSTATE_VERTEX_ELEMENTS << 16) | 1);
      if (brw->gen >= 6) {
	 OUT_BATCH((0 << GEN6_VE0_INDEX_SHIFT) |
		   GEN6_VE0_VALID |
		   (BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) |
		   (0 << BRW_VE0_SRC_OFFSET_SHIFT));
      } else {
	 OUT_BATCH((0 << BRW_VE0_INDEX_SHIFT) |
		   BRW_VE0_VALID |
		   (BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) |
		   (0 << BRW_VE0_SRC_OFFSET_SHIFT));
      }
      OUT_BATCH((BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT) |
		(BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
		(BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
		(BRW_VE1_COMPONENT_STORE_1_FLT << BRW_VE1_COMPONENT_3_SHIFT));
      ADVANCE_BATCH();
      return;
   }

   /* Now emit VB and VEP state packets.
    */

   if (brw->vb.nr_buffers) {
      if (brw->gen >= 6) {
	 assert(brw->vb.nr_buffers <= 33);
      } else {
	 assert(brw->vb.nr_buffers <= 17);
      }

      BEGIN_BATCH(1 + 4*brw->vb.nr_buffers);
      OUT_BATCH((_3DSTATE_VERTEX_BUFFERS << 16) | (4*brw->vb.nr_buffers - 1));
      for (i = 0; i < brw->vb.nr_buffers; i++) {
	 struct brw_vertex_buffer *buffer = &brw->vb.buffers[i];
	 uint32_t dw0;

	 if (brw->gen >= 6) {
	    dw0 = buffer->step_rate
	             ? GEN6_VB0_ACCESS_INSTANCEDATA
	             : GEN6_VB0_ACCESS_VERTEXDATA;
	    dw0 |= i << GEN6_VB0_INDEX_SHIFT;
	 } else {
	    dw0 = buffer->step_rate
	             ? BRW_VB0_ACCESS_INSTANCEDATA
	             : BRW_VB0_ACCESS_VERTEXDATA;
	    dw0 |= i << BRW_VB0_INDEX_SHIFT;
	 }

	 if (brw->gen >= 7)
	    dw0 |= GEN7_VB0_ADDRESS_MODIFYENABLE;

         if (brw->gen == 7)
	    dw0 |= GEN7_MOCS_L3 << 16;

         WARN_ONCE(buffer->stride >= (brw->gen >= 5 ? 2048 : 2047),
                   "VBO stride %d too large, bad rendering may occur\n",
                   buffer->stride);
	 OUT_BATCH(dw0 | (buffer->stride << BRW_VB0_PITCH_SHIFT));
	 OUT_RELOC(buffer->bo, I915_GEM_DOMAIN_VERTEX, 0, buffer->offset);
	 if (brw->gen >= 5) {
	    OUT_RELOC(buffer->bo, I915_GEM_DOMAIN_VERTEX, 0, buffer->bo->size - 1);
	 } else
	    OUT_BATCH(0);
	 OUT_BATCH(buffer->step_rate);
      }
      ADVANCE_BATCH();
   }

   /* The hardware allows one more VERTEX_ELEMENTS than VERTEX_BUFFERS, presumably
    * for VertexID/InstanceID.
    */
   if (brw->gen >= 6) {
      assert(nr_elements <= 34);
   } else {
      assert(nr_elements <= 18);
   }

   struct brw_vertex_element *gen6_edgeflag_input = NULL;

   BEGIN_BATCH(1 + nr_elements * 2);
   OUT_BATCH((_3DSTATE_VERTEX_ELEMENTS << 16) | (2 * nr_elements - 1));
   for (i = 0; i < brw->vb.nr_enabled; i++) {
      struct brw_vertex_element *input = brw->vb.enabled[i];
      uint32_t format = brw_get_vertex_surface_type(brw, input->glarray);
      uint32_t comp0 = BRW_VE1_COMPONENT_STORE_SRC;
      uint32_t comp1 = BRW_VE1_COMPONENT_STORE_SRC;
      uint32_t comp2 = BRW_VE1_COMPONENT_STORE_SRC;
      uint32_t comp3 = BRW_VE1_COMPONENT_STORE_SRC;

      if (input == &brw->vb.inputs[VERT_ATTRIB_EDGEFLAG]) {
         /* Gen6+ passes edgeflag as sideband along with the vertex, instead
          * of in the VUE.  We have to upload it sideband as the last vertex
          * element according to the B-Spec.
          */
         if (brw->gen >= 6) {
            gen6_edgeflag_input = input;
            continue;
         }
      }

      switch (input->glarray->Size) {
      case 0: comp0 = BRW_VE1_COMPONENT_STORE_0;
      case 1: comp1 = BRW_VE1_COMPONENT_STORE_0;
      case 2: comp2 = BRW_VE1_COMPONENT_STORE_0;
      case 3: comp3 = input->glarray->Integer ? BRW_VE1_COMPONENT_STORE_1_INT
                                              : BRW_VE1_COMPONENT_STORE_1_FLT;
	 break;
      }

      if (brw->gen >= 6) {
	 OUT_BATCH((input->buffer << GEN6_VE0_INDEX_SHIFT) |
		   GEN6_VE0_VALID |
		   (format << BRW_VE0_FORMAT_SHIFT) |
		   (input->offset << BRW_VE0_SRC_OFFSET_SHIFT));
      } else {
	 OUT_BATCH((input->buffer << BRW_VE0_INDEX_SHIFT) |
		   BRW_VE0_VALID |
		   (format << BRW_VE0_FORMAT_SHIFT) |
		   (input->offset << BRW_VE0_SRC_OFFSET_SHIFT));
      }

      if (brw->gen >= 5)
          OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) |
                    (comp1 << BRW_VE1_COMPONENT_1_SHIFT) |
                    (comp2 << BRW_VE1_COMPONENT_2_SHIFT) |
                    (comp3 << BRW_VE1_COMPONENT_3_SHIFT));
      else
          OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) |
                    (comp1 << BRW_VE1_COMPONENT_1_SHIFT) |
                    (comp2 << BRW_VE1_COMPONENT_2_SHIFT) |
                    (comp3 << BRW_VE1_COMPONENT_3_SHIFT) |
                    ((i * 4) << BRW_VE1_DST_OFFSET_SHIFT));
   }

   if (brw->gen >= 6 && gen6_edgeflag_input) {
      uint32_t format =
         brw_get_vertex_surface_type(brw, gen6_edgeflag_input->glarray);

      OUT_BATCH((gen6_edgeflag_input->buffer << GEN6_VE0_INDEX_SHIFT) |
                GEN6_VE0_VALID |
                GEN6_VE0_EDGE_FLAG_ENABLE |
                (format << BRW_VE0_FORMAT_SHIFT) |
                (gen6_edgeflag_input->offset << BRW_VE0_SRC_OFFSET_SHIFT));
      OUT_BATCH((BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) |
                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT));
   }

   if (brw->vs.prog_data->uses_vertexid) {
      uint32_t dw0 = 0, dw1 = 0;

      dw1 = ((BRW_VE1_COMPONENT_STORE_VID << BRW_VE1_COMPONENT_0_SHIFT) |
	     (BRW_VE1_COMPONENT_STORE_IID << BRW_VE1_COMPONENT_1_SHIFT) |
	     (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
	     (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT));

      if (brw->gen >= 6) {
	 dw0 |= GEN6_VE0_VALID;
      } else {
	 dw0 |= BRW_VE0_VALID;
	 dw1 |= (i * 4) << BRW_VE1_DST_OFFSET_SHIFT;
      }

      /* Note that for gl_VertexID, gl_InstanceID, and gl_PrimitiveID values,
       * the format is ignored and the value is always int.
       */

      OUT_BATCH(dw0);
      OUT_BATCH(dw1);
   }

   ADVANCE_BATCH();
}
示例#23
0
static void i915_emit_composite_setup(ScrnInfoPtr scrn)
{
	intel_screen_private *intel = intel_get_screen_private(scrn);
	int op = intel->i915_render_state.op;
	PicturePtr mask_picture = intel->render_mask_picture;
	PicturePtr dest_picture = intel->render_dest_picture;
	PixmapPtr mask = intel->render_mask;
	PixmapPtr dest = intel->render_dest;
	Bool is_solid_src, is_solid_mask;
	int tex_count, t;

	intel->needs_render_state_emit = FALSE;

	IntelEmitInvarientState(scrn);
	intel->last_3d = LAST_3D_RENDER;

	is_solid_src = intel->render_source_is_solid;
	is_solid_mask = intel->render_mask_is_solid;

	tex_count = 0;
	tex_count += ! is_solid_src;
	tex_count += mask && ! is_solid_mask;

	assert(intel->in_batch_atomic);

	if (tex_count != 0) {
	    OUT_BATCH(_3DSTATE_MAP_STATE | (3 * tex_count));
	    OUT_BATCH((1 << tex_count) - 1);
	    for (t = 0; t < tex_count; t++) {
		OUT_RELOC_PIXMAP(intel->texture[t], I915_GEM_DOMAIN_SAMPLER, 0, 0);
		OUT_BATCH(intel->mapstate[3*t + 1]);
		OUT_BATCH(intel->mapstate[3*t + 2]);
	    }

	    OUT_BATCH(_3DSTATE_SAMPLER_STATE | (3 * tex_count));
	    OUT_BATCH((1 << tex_count) - 1);
	    for (t = 0; t < tex_count; t++) {
		OUT_BATCH(intel->samplerstate[3*t + 0]);
		OUT_BATCH(intel->samplerstate[3*t + 1]);
		OUT_BATCH(intel->samplerstate[3*t + 2]);
	    }
	}

	if (is_solid_src) {
	    OUT_BATCH (_3DSTATE_DFLT_DIFFUSE_CMD);
	    OUT_BATCH (intel->render_source_solid);
	}
	if (mask && is_solid_mask) {
	    OUT_BATCH (_3DSTATE_DFLT_SPEC_CMD);
	    OUT_BATCH (intel->render_mask_solid);
	}

	/* BUF_INFO is an implicit flush, so avoid if the target has not changed.
	 * XXX However for reasons unfathomed, correct rendering in KDE requires
	 * at least a MI_FLUSH | INHIBIT_RENDER_CACHE_FLUSH here.
	 */
	if (1 || dest != intel->render_current_dest) {
		uint32_t tiling_bits;

		intel_batch_do_flush(scrn);

		if (intel_pixmap_tiled(dest)) {
			tiling_bits = BUF_3D_TILED_SURFACE;
			if (intel_get_pixmap_private(dest)->tiling
			    == I915_TILING_Y)
				tiling_bits |= BUF_3D_TILE_WALK_Y;
		} else
			tiling_bits = 0;

		OUT_BATCH(_3DSTATE_BUF_INFO_CMD);
		OUT_BATCH(BUF_3D_ID_COLOR_BACK | tiling_bits |
			  BUF_3D_PITCH(intel_pixmap_pitch(dest)));
		OUT_RELOC_PIXMAP(dest, I915_GEM_DOMAIN_RENDER,
				 I915_GEM_DOMAIN_RENDER, 0);

		OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD);
		OUT_BATCH(intel->i915_render_state.dst_format);

		/* draw rect is unconditional */
		OUT_BATCH(_3DSTATE_DRAW_RECT_CMD);
		OUT_BATCH(0x00000000);
		OUT_BATCH(0x00000000);	/* ymin, xmin */
		OUT_BATCH(DRAW_YMAX(dest->drawable.height - 1) |
			  DRAW_XMAX(dest->drawable.width - 1));
		/* yorig, xorig (relate to color buffer?) */
		OUT_BATCH(0x00000000);

		intel->render_current_dest = dest;
	}

	{
		uint32_t ss2;

		ss2 = ~0;
		t = 0;
		if (! is_solid_src) {
		    ss2 &= ~S2_TEXCOORD_FMT(t, TEXCOORDFMT_NOT_PRESENT);
		    ss2 |= S2_TEXCOORD_FMT(t,
					   intel_transform_is_affine(intel->transform[t]) ?
					   TEXCOORDFMT_2D : TEXCOORDFMT_4D);
		    t++;
		}
		if (mask && ! is_solid_mask) {
		    ss2 &= ~S2_TEXCOORD_FMT(t, TEXCOORDFMT_NOT_PRESENT);
		    ss2 |= S2_TEXCOORD_FMT(t,
					   intel_transform_is_affine(intel->transform[t]) ?
					   TEXCOORDFMT_2D : TEXCOORDFMT_4D);
		    t++;
		}

		if (intel->needs_render_ca_pass) {
			OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(2) | 0);
			OUT_BATCH(ss2);
		} else {
			OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(2) | I1_LOAD_S(6) | 1);
			OUT_BATCH(ss2);
			OUT_BATCH(i915_get_blend_cntl(op, mask_picture, dest_picture->format));
		}
	}

	if (! intel->needs_render_ca_pass)
		i915_composite_emit_shader(intel, op);
}
示例#24
0
static void
upload_wm_state(struct brw_context *brw)
{
   struct gl_context *ctx = &brw->ctx;
   /* BRW_NEW_FRAGMENT_PROGRAM */
   const struct brw_fragment_program *fp =
      brw_fragment_program_const(brw->fragment_program);
   /* BRW_NEW_FS_PROG_DATA */
   const struct brw_wm_prog_data *prog_data = brw->wm.prog_data;
   bool writes_depth = prog_data->computed_depth_mode != BRW_PSCDEPTH_OFF;
   uint32_t dw1, dw2;

   /* _NEW_BUFFERS */
   const bool multisampled_fbo = _mesa_geometric_samples(ctx->DrawBuffer) > 1;

   dw1 = dw2 = 0;
   dw1 |= GEN7_WM_STATISTICS_ENABLE;
   dw1 |= GEN7_WM_LINE_AA_WIDTH_1_0;
   dw1 |= GEN7_WM_LINE_END_CAP_AA_WIDTH_0_5;

   /* _NEW_LINE */
   if (ctx->Line.StippleFlag)
      dw1 |= GEN7_WM_LINE_STIPPLE_ENABLE;

   /* _NEW_POLYGON */
   if (ctx->Polygon.StippleFlag)
      dw1 |= GEN7_WM_POLYGON_STIPPLE_ENABLE;

   if (fp->program.Base.InputsRead & VARYING_BIT_POS)
      dw1 |= GEN7_WM_USES_SOURCE_DEPTH | GEN7_WM_USES_SOURCE_W;

   dw1 |= prog_data->computed_depth_mode << GEN7_WM_COMPUTED_DEPTH_MODE_SHIFT;
   dw1 |= prog_data->barycentric_interp_modes <<
      GEN7_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT;

   /* _NEW_COLOR, _NEW_MULTISAMPLE */
   /* Enable if the pixel shader kernel generates and outputs oMask.
    */
   if (prog_data->uses_kill || ctx->Color.AlphaEnabled ||
       ctx->Multisample.SampleAlphaToCoverage ||
       prog_data->uses_omask) {
      dw1 |= GEN7_WM_KILL_ENABLE;
   }

   if (_mesa_active_fragment_shader_has_atomic_ops(&brw->ctx)) {
      dw1 |= GEN7_WM_DISPATCH_ENABLE;
   }

   /* _NEW_BUFFERS | _NEW_COLOR */
   if (brw_color_buffer_write_enabled(brw) || writes_depth ||
       dw1 & GEN7_WM_KILL_ENABLE) {
      dw1 |= GEN7_WM_DISPATCH_ENABLE;
   }
   if (multisampled_fbo) {
      /* _NEW_MULTISAMPLE */
      if (ctx->Multisample.Enabled)
         dw1 |= GEN7_WM_MSRAST_ON_PATTERN;
      else
         dw1 |= GEN7_WM_MSRAST_OFF_PIXEL;

      if (_mesa_get_min_invocations_per_fragment(ctx, brw->fragment_program, false) > 1)
         dw2 |= GEN7_WM_MSDISPMODE_PERSAMPLE;
      else
         dw2 |= GEN7_WM_MSDISPMODE_PERPIXEL;
   } else {
      dw1 |= GEN7_WM_MSRAST_OFF_PIXEL;
      dw2 |= GEN7_WM_MSDISPMODE_PERSAMPLE;
   }

   if (fp->program.Base.SystemValuesRead & SYSTEM_BIT_SAMPLE_MASK_IN) {
      dw1 |= GEN7_WM_USES_INPUT_COVERAGE_MASK;
   }

   BEGIN_BATCH(3);
   OUT_BATCH(_3DSTATE_WM << 16 | (3 - 2));
   OUT_BATCH(dw1);
   OUT_BATCH(dw2);
   ADVANCE_BATCH();
}
void
brw_emit_depth_stencil_hiz(struct brw_context *brw,
                           struct intel_mipmap_tree *depth_mt,
                           uint32_t depth_offset, uint32_t depthbuffer_format,
                           uint32_t depth_surface_type,
                           struct intel_mipmap_tree *stencil_mt,
                           struct intel_mipmap_tree *hiz_mt,
                           bool separate_stencil, uint32_t width,
                           uint32_t height, uint32_t tile_x, uint32_t tile_y)
{
   struct intel_context *intel = &brw->intel;

   /* Enable the hiz bit if we're doing separate stencil, because it and the
    * separate stencil bit must have the same value. From Section 2.11.5.6.1.1
    * 3DSTATE_DEPTH_BUFFER, Bit 1.21 "Separate Stencil Enable":
    *     [DevIL]: If this field is enabled, Hierarchical Depth Buffer
    *     Enable must also be enabled.
    *
    *     [DevGT]: This field must be set to the same value (enabled or
    *     disabled) as Hierarchical Depth Buffer Enable
    */
   bool enable_hiz_ss = hiz_mt || separate_stencil;


   /* 3DSTATE_DEPTH_BUFFER, 3DSTATE_STENCIL_BUFFER are both
    * non-pipelined state that will need the PIPE_CONTROL workaround.
    */
   if (intel->gen == 6) {
      intel_emit_post_sync_nonzero_flush(intel);
      intel_emit_depth_stall_flushes(intel);
   }

   unsigned int len;
   if (intel->gen >= 6)
      len = 7;
   else if (intel->is_g4x || intel->gen == 5)
      len = 6;
   else
      len = 5;

   BEGIN_BATCH(len);
   OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (len - 2));
   OUT_BATCH((depth_mt ? depth_mt->region->pitch - 1 : 0) |
             (depthbuffer_format << 18) |
             ((enable_hiz_ss ? 1 : 0) << 21) | /* separate stencil enable */
             ((enable_hiz_ss ? 1 : 0) << 22) | /* hiz enable */
             (BRW_TILEWALK_YMAJOR << 26) |
             ((depth_mt ? depth_mt->region->tiling != I915_TILING_NONE : 1)
              << 27) |
             (depth_surface_type << 29));

   if (depth_mt) {
      OUT_RELOC(depth_mt->region->bo,
		I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
		depth_offset);
   } else {
      OUT_BATCH(0);
   }

   OUT_BATCH(((width + tile_x - 1) << 6) |
             ((height + tile_y - 1) << 19));
   OUT_BATCH(0);

   if (intel->is_g4x || intel->gen >= 5)
      OUT_BATCH(tile_x | (tile_y << 16));
   else
      assert(tile_x == 0 && tile_y == 0);

   if (intel->gen >= 6)
      OUT_BATCH(0);

   ADVANCE_BATCH();

   if (hiz_mt || separate_stencil) {
      /*
       * In the 3DSTATE_DEPTH_BUFFER batch emitted above, the 'separate
       * stencil enable' and 'hiz enable' bits were set. Therefore we must
       * emit 3DSTATE_HIER_DEPTH_BUFFER and 3DSTATE_STENCIL_BUFFER. Even if
       * there is no stencil buffer, 3DSTATE_STENCIL_BUFFER must be emitted;
       * failure to do so causes hangs on gen5 and a stall on gen6.
       */

      /* Emit hiz buffer. */
      if (hiz_mt) {
	 BEGIN_BATCH(3);
	 OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2));
	 OUT_BATCH(hiz_mt->region->pitch - 1);
	 OUT_RELOC(hiz_mt->region->bo,
		   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
		   brw->depthstencil.hiz_offset);
	 ADVANCE_BATCH();
      } else {
	 BEGIN_BATCH(3);
	 OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2));
	 OUT_BATCH(0);
	 OUT_BATCH(0);
	 ADVANCE_BATCH();
      }

      /* Emit stencil buffer. */
      if (separate_stencil) {
	 struct intel_region *region = stencil_mt->region;

	 BEGIN_BATCH(3);
	 OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2));
         /* The stencil buffer has quirky pitch requirements.  From Vol 2a,
          * 11.5.6.2.1 3DSTATE_STENCIL_BUFFER, field "Surface Pitch":
          *    The pitch must be set to 2x the value computed based on width, as
          *    the stencil buffer is stored with two rows interleaved.
          */
	 OUT_BATCH(2 * region->pitch - 1);
	 OUT_RELOC(region->bo,
		   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
		   brw->depthstencil.stencil_offset);
	 ADVANCE_BATCH();
      } else {
	 BEGIN_BATCH(3);
	 OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2));
	 OUT_BATCH(0);
	 OUT_BATCH(0);
	 ADVANCE_BATCH();
      }
   }

   /*
    * On Gen >= 6, emit clear params for safety. If using hiz, then clear
    * params must be emitted.
    *
    * From Section 2.11.5.6.4.1 3DSTATE_CLEAR_PARAMS:
    *     3DSTATE_CLEAR_PARAMS packet must follow the DEPTH_BUFFER_STATE packet
    *     when HiZ is enabled and the DEPTH_BUFFER_STATE changes.
    */
   if (intel->gen >= 6 || hiz_mt) {
      if (intel->gen == 6)
	 intel_emit_post_sync_nonzero_flush(intel);

      BEGIN_BATCH(2);
      OUT_BATCH(_3DSTATE_CLEAR_PARAMS << 16 |
		GEN5_DEPTH_CLEAR_VALID |
		(2 - 2));
      OUT_BATCH(depth_mt ? depth_mt->depth_clear_value : 0);
      ADVANCE_BATCH();
   }
}
示例#26
0
/**
 * When the GS is not in use, we assign the entire URB space to the VS.  When
 * the GS is in use, we split the URB space evenly between the VS and the GS.
 * This is not ideal, but it's simple.
 *
 *           URB size / 2                   URB size / 2
 *   _____________-______________   _____________-______________
 *  /                            \ /                            \
 * +-------------------------------------------------------------+
 * | Vertex Shader Entries        | Geometry Shader Entries      |
 * +-------------------------------------------------------------+
 *
 * Sandybridge GT1 has 32kB of URB space, while GT2 has 64kB.
 * (See the Sandybridge PRM, Volume 2, Part 1, Section 1.4.7: 3DSTATE_URB.)
 */
static void
gen6_upload_urb( struct brw_context *brw )
{
   int nr_vs_entries, nr_gs_entries;
   int total_urb_size = brw->urb.size * 1024; /* in bytes */

   bool gs_present = brw->ff_gs.prog_active || brw->geometry_program;

   /* CACHE_NEW_VS_PROG */
   unsigned vs_size = MAX2(brw->vs.prog_data->base.urb_entry_size, 1);

   /* Whe using GS to do transform feedback only we use the same VUE layout for
    * VS outputs and GS outputs (as it's what the SF and Clipper expect), so we
    * can simply make the GS URB entry size the same as for the VS.  This may
    * technically be too large in cases where we have few vertex attributes and
    * a lot of varyings, since the VS size is determined by the larger of the
    * two. For now, it's safe.
    *
    * For user-provided GS the assumption above does not hold since the GS
    * outputs can be different from the VS outputs.
    */
   unsigned gs_size = vs_size;
   if (brw->geometry_program) {
      gs_size = brw->gs.prog_data->base.urb_entry_size;
      assert(gs_size >= 1);
   }

   /* Calculate how many entries fit in each stage's section of the URB */
   if (gs_present) {
      nr_vs_entries = (total_urb_size/2) / (vs_size * 128);
      nr_gs_entries = (total_urb_size/2) / (gs_size * 128);
   } else {
      nr_vs_entries = total_urb_size / (vs_size * 128);
      nr_gs_entries = 0;
   }

   /* Then clamp to the maximum allowed by the hardware */
   if (nr_vs_entries > brw->urb.max_vs_entries)
      nr_vs_entries = brw->urb.max_vs_entries;

   if (nr_gs_entries > brw->urb.max_gs_entries)
      nr_gs_entries = brw->urb.max_gs_entries;

   /* Finally, both must be a multiple of 4 (see 3DSTATE_URB in the PRM). */
   brw->urb.nr_vs_entries = ROUND_DOWN_TO(nr_vs_entries, 4);
   brw->urb.nr_gs_entries = ROUND_DOWN_TO(nr_gs_entries, 4);

   assert(brw->urb.nr_vs_entries >= brw->urb.min_vs_entries);
   assert(brw->urb.nr_vs_entries % 4 == 0);
   assert(brw->urb.nr_gs_entries % 4 == 0);
   assert(vs_size <= 5);
   assert(gs_size <= 5);

   BEGIN_BATCH(3);
   OUT_BATCH(_3DSTATE_URB << 16 | (3 - 2));
   OUT_BATCH(((vs_size - 1) << GEN6_URB_VS_SIZE_SHIFT) |
	     ((brw->urb.nr_vs_entries) << GEN6_URB_VS_ENTRIES_SHIFT));
   OUT_BATCH(((gs_size - 1) << GEN6_URB_GS_SIZE_SHIFT) |
	     ((brw->urb.nr_gs_entries) << GEN6_URB_GS_ENTRIES_SHIFT));
   ADVANCE_BATCH();

   /* From the PRM Volume 2 part 1, section 1.4.7:
    *
    *   Because of a urb corruption caused by allocating a previous gsunit’s
    *   urb entry to vsunit software is required to send a "GS NULL
    *   Fence"(Send URB fence with VS URB size == 1 and GS URB size == 0) plus
    *   a dummy DRAW call before any case where VS will be taking over GS URB
    *   space.
    *
    * It is not clear exactly what this means ("URB fence" is a command that
    * doesn't exist on Gen6).  So for now we just do a full pipeline flush as
    * a workaround.
    */
   if (brw->urb.gen6_gs_previously_active && !gs_present)
      intel_batchbuffer_emit_mi_flush(brw);
   brw->urb.gen6_gs_previously_active = gs_present;
}
static void brw_emit_prim(struct brw_context *brw,
			  const struct _mesa_prim *prim,
			  uint32_t hw_prim)
{
   int verts_per_instance;
   int vertex_access_type;
   int start_vertex_location;
   int base_vertex_location;
   int indirect_flag;

   DBG("PRIM: %s %d %d\n", _mesa_lookup_enum_by_nr(prim->mode),
       prim->start, prim->count);

   start_vertex_location = prim->start;
   base_vertex_location = prim->basevertex;
   if (prim->indexed) {
      vertex_access_type = brw->gen >= 7 ?
         GEN7_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM :
         GEN4_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM;
      start_vertex_location += brw->ib.start_vertex_offset;
      base_vertex_location += brw->vb.start_vertex_bias;
   } else {
      vertex_access_type = brw->gen >= 7 ?
         GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL :
         GEN4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL;
      start_vertex_location += brw->vb.start_vertex_bias;
   }

   /* We only need to trim the primitive count on pre-Gen6. */
   if (brw->gen < 6)
      verts_per_instance = trim(prim->mode, prim->count);
   else
      verts_per_instance = prim->count;

   /* If nothing to emit, just return. */
   if (verts_per_instance == 0 && !prim->is_indirect)
      return;

   /* If we're set to always flush, do it before and after the primitive emit.
    * We want to catch both missed flushes that hurt instruction/state cache
    * and missed flushes of the render cache as it heads to other parts of
    * the besides the draw code.
    */
   if (brw->always_flush_cache) {
      intel_batchbuffer_emit_mi_flush(brw);
   }

   /* If indirect, emit a bunch of loads from the indirect BO. */
   if (prim->is_indirect) {
      struct gl_buffer_object *indirect_buffer = brw->ctx.DrawIndirectBuffer;
      drm_intel_bo *bo = intel_bufferobj_buffer(brw,
            intel_buffer_object(indirect_buffer),
            prim->indirect_offset, 5 * sizeof(GLuint));

      indirect_flag = GEN7_3DPRIM_INDIRECT_PARAMETER_ENABLE;

      BEGIN_BATCH(15);

      OUT_BATCH(GEN7_MI_LOAD_REGISTER_MEM | (3 - 2));
      OUT_BATCH(GEN7_3DPRIM_VERTEX_COUNT);
      OUT_RELOC(bo, I915_GEM_DOMAIN_VERTEX, 0,
            prim->indirect_offset + 0);
      OUT_BATCH(GEN7_MI_LOAD_REGISTER_MEM | (3 - 2));
      OUT_BATCH(GEN7_3DPRIM_INSTANCE_COUNT);
      OUT_RELOC(bo, I915_GEM_DOMAIN_VERTEX, 0,
            prim->indirect_offset + 4);
      OUT_BATCH(GEN7_MI_LOAD_REGISTER_MEM | (3 - 2));
      OUT_BATCH(GEN7_3DPRIM_START_VERTEX);
      OUT_RELOC(bo, I915_GEM_DOMAIN_VERTEX, 0,
            prim->indirect_offset + 8);

      if (prim->indexed) {
         OUT_BATCH(GEN7_MI_LOAD_REGISTER_MEM | (3 - 2));
         OUT_BATCH(GEN7_3DPRIM_BASE_VERTEX);
         OUT_RELOC(bo, I915_GEM_DOMAIN_VERTEX, 0,
               prim->indirect_offset + 12);
         OUT_BATCH(GEN7_MI_LOAD_REGISTER_MEM | (3 - 2));
         OUT_BATCH(GEN7_3DPRIM_START_INSTANCE);
         OUT_RELOC(bo, I915_GEM_DOMAIN_VERTEX, 0,
               prim->indirect_offset + 16);
      }
      else {
         OUT_BATCH(GEN7_MI_LOAD_REGISTER_MEM | (3 - 2));
         OUT_BATCH(GEN7_3DPRIM_START_INSTANCE);
         OUT_RELOC(bo, I915_GEM_DOMAIN_VERTEX, 0,
               prim->indirect_offset + 12);
         OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
         OUT_BATCH(GEN7_3DPRIM_BASE_VERTEX);
         OUT_BATCH(0);
      }

      ADVANCE_BATCH();
   }
   else {
      indirect_flag = 0;
   }


   if (brw->gen >= 7) {
      BEGIN_BATCH(7);
      OUT_BATCH(CMD_3D_PRIM << 16 | (7 - 2) | indirect_flag);
      OUT_BATCH(hw_prim | vertex_access_type);
   } else {
      BEGIN_BATCH(6);
      OUT_BATCH(CMD_3D_PRIM << 16 | (6 - 2) |
                hw_prim << GEN4_3DPRIM_TOPOLOGY_TYPE_SHIFT |
                vertex_access_type);
   }
   OUT_BATCH(verts_per_instance);
   OUT_BATCH(start_vertex_location);
   OUT_BATCH(prim->num_instances);
   OUT_BATCH(prim->base_instance);
   OUT_BATCH(base_vertex_location);
   ADVANCE_BATCH();

   /* Only used on Sandybridge; harmless to set elsewhere. */
   brw->batch.need_workaround_flush = true;

   if (brw->always_flush_cache) {
      intel_batchbuffer_emit_mi_flush(brw);
   }
}
示例#28
0
static void
upload_clip_state(struct brw_context *brw)
{
   struct gl_context *ctx = &brw->ctx;
   /* BRW_NEW_META_IN_PROGRESS */
   uint32_t dw1 = brw->meta_in_progress ? 0 : GEN6_CLIP_STATISTICS_ENABLE;
   uint32_t dw2 = 0;

   /* _NEW_BUFFERS */
   struct gl_framebuffer *fb = ctx->DrawBuffer;

   /* BRW_NEW_FS_PROG_DATA */
   if (brw->wm.prog_data->barycentric_interp_modes &
       BRW_WM_NONPERSPECTIVE_BARYCENTRIC_BITS) {
      dw2 |= GEN6_CLIP_NON_PERSPECTIVE_BARYCENTRIC_ENABLE;
   }

   dw1 |= brw->vs.prog_data->base.cull_distance_mask;

   if (brw->gen >= 7)
      dw1 |= GEN7_CLIP_EARLY_CULL;

   if (brw->gen == 7) {
      /* _NEW_POLYGON */
      if (ctx->Polygon._FrontBit == _mesa_is_user_fbo(fb))
         dw1 |= GEN7_CLIP_WINDING_CCW;

      if (ctx->Polygon.CullFlag) {
         switch (ctx->Polygon.CullFaceMode) {
         case GL_FRONT:
            dw1 |= GEN7_CLIP_CULLMODE_FRONT;
            break;
         case GL_BACK:
            dw1 |= GEN7_CLIP_CULLMODE_BACK;
            break;
         case GL_FRONT_AND_BACK:
            dw1 |= GEN7_CLIP_CULLMODE_BOTH;
            break;
         default:
            unreachable("Should not get here: invalid CullFlag");
         }
      } else {
         dw1 |= GEN7_CLIP_CULLMODE_NONE;
      }
   }

   if (brw->gen < 8 && !ctx->Transform.DepthClamp)
      dw2 |= GEN6_CLIP_Z_TEST;

   /* _NEW_LIGHT */
   if (ctx->Light.ProvokingVertex == GL_FIRST_VERTEX_CONVENTION) {
      dw2 |=
	 (0 << GEN6_CLIP_TRI_PROVOKE_SHIFT) |
	 (1 << GEN6_CLIP_TRIFAN_PROVOKE_SHIFT) |
	 (0 << GEN6_CLIP_LINE_PROVOKE_SHIFT);
   } else {
      dw2 |=
	 (2 << GEN6_CLIP_TRI_PROVOKE_SHIFT) |
	 (2 << GEN6_CLIP_TRIFAN_PROVOKE_SHIFT) |
	 (1 << GEN6_CLIP_LINE_PROVOKE_SHIFT);
   }

   /* _NEW_TRANSFORM */
   dw2 |= (ctx->Transform.ClipPlanesEnabled <<
           GEN6_USER_CLIP_CLIP_DISTANCES_SHIFT);
   if (ctx->Transform.ClipDepthMode == GL_ZERO_TO_ONE)
      dw2 |= GEN6_CLIP_API_D3D;
   else
      dw2 |= GEN6_CLIP_API_OGL;

   dw2 |= GEN6_CLIP_GB_TEST;

   /* We need to disable guardband clipping if the guardband (which we always
    * program to the maximum screen-space bounding box of 8K x 8K) will be
    * smaller than the viewport.
    *
    * Closely examining the clip determination formulas in the documentation
    * reveals that objects will be discarded entirely if they're outside the
    * (small) guardband, even if they're within the (large) viewport:
    *
    *     TR = TR_GB || TR_VPXY || TR_VPZ || TR_UC || TR_NEGW
    *     TA   = !TR && TA_GB && TA_VPZ && TA_NEGW
    *     MC = !(TA || TR)
    *
    * (TA is "Trivial Accept", TR is "Trivial Reject", MC is "Must Clip".)
    *
    * Disabling guardband clipping removes the TR_GB condition, which means
    * they'll be considered MC ("Must Clip") unless they're rejected for
    * some other reason.
    *
    * Note that there is no TA_VPXY condition.  If there were, objects entirely
    * inside a 16384x16384 viewport would be trivially accepted, breaking the
    * "objects must have a screenspace bounding box not exceeding 8K in the X
    * or Y direction" restriction.  Instead, they're clipped.
    */
   for (unsigned i = 0; i < ctx->Const.MaxViewports; i++) {
      if (ctx->ViewportArray[i].Width > 8192 ||
          ctx->ViewportArray[i].Height > 8192) {
         dw2 &= ~GEN6_CLIP_GB_TEST;
         break;
      }
   }

   /* If the viewport dimensions are smaller than the drawable dimensions,
    * we have to disable guardband clipping prior to Gen8.  We always program
    * the guardband to a fixed size, which is almost always larger than the
    * viewport.  Any geometry which intersects the viewport but lies within
    * the guardband would bypass the 3D clipping stage, so it wouldn't be
    * clipped to the viewport.  Rendering would happen beyond the viewport,
    * but still inside the drawable.
    *
    * Gen8+ introduces a viewport extents test which restricts rendering to
    * the viewport, so we can ignore this restriction.
    */
   if (brw->gen < 8) {
      const float fb_width = (float)_mesa_geometric_width(fb);
      const float fb_height = (float)_mesa_geometric_height(fb);

      for (unsigned i = 0; i < ctx->Const.MaxViewports; i++) {
         if (ctx->ViewportArray[i].X != 0 ||
             ctx->ViewportArray[i].Y != 0 ||
             ctx->ViewportArray[i].Width != fb_width ||
             ctx->ViewportArray[i].Height != fb_height) {
            dw2 &= ~GEN6_CLIP_GB_TEST;
            break;
         }
      }
   }

   /* BRW_NEW_RASTERIZER_DISCARD */
   if (ctx->RasterDiscard) {
      dw2 |= GEN6_CLIP_MODE_REJECT_ALL;
      if (brw->gen == 6) {
         perf_debug("Rasterizer discard is currently implemented via the "
                    "clipper; having the GS not write primitives would "
                    "likely be faster.\n");
      }
   }

   uint32_t enable;
   if (brw->primitive == _3DPRIM_RECTLIST)
      enable = 0;
   else
      enable = GEN6_CLIP_ENABLE;

   if (!is_drawing_points(brw) && !is_drawing_lines(brw))
      dw2 |= GEN6_CLIP_XY_TEST;

   /* BRW_NEW_VUE_MAP_GEOM_OUT */
   const int max_vp_index =
      (brw->vue_map_geom_out.slots_valid & VARYING_BIT_VIEWPORT) != 0 ?
      ctx->Const.MaxViewports : 1;

   BEGIN_BATCH(4);
   OUT_BATCH(_3DSTATE_CLIP << 16 | (4 - 2));
   OUT_BATCH(dw1);
   OUT_BATCH(enable |
	     GEN6_CLIP_MODE_NORMAL |
	     dw2);
   OUT_BATCH(U_FIXED(0.125, 3) << GEN6_CLIP_MIN_POINT_WIDTH_SHIFT |
             U_FIXED(255.875, 3) << GEN6_CLIP_MAX_POINT_WIDTH_SHIFT |
             (_mesa_geometric_layers(fb) > 0 ? 0 : GEN6_CLIP_FORCE_ZERO_RTAINDEX) |
             ((max_vp_index - 1) & GEN6_CLIP_MAX_VP_INDEX_MASK));
   ADVANCE_BATCH();
}
示例#29
0
/**
 * 3DSTATE_PS
 *
 * Pixel shader dispatch is disabled above in 3DSTATE_WM, dw1.29. Despite
 * that, thread dispatch info must still be specified.
 *     - Maximum Number of Threads (dw4.24:31) must be nonzero, as the
 *       valid range for this field is [0x3, 0x2f].
 *     - A dispatch mode must be given; that is, at least one of the
 *       "N Pixel Dispatch Enable" (N=8,16,32) fields must be set. This was
 *       discovered through simulator error messages.
 */
static void
gen8_blorp_emit_ps_config(struct brw_context *brw,
                          const struct brw_blorp_params *params)
{
   const struct brw_blorp_prog_data *prog_data = params->wm_prog_data;
   uint32_t dw3, dw5, dw6, dw7, ksp0, ksp2;

   dw3 = dw5 = dw6 = dw7 = ksp0 = ksp2 = 0;
   dw3 |= GEN7_PS_VECTOR_MASK_ENABLE;

   if (params->src.mt) {
      dw3 |= 1 << GEN7_PS_SAMPLER_COUNT_SHIFT; /* Up to 4 samplers */
      dw3 |= 2 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT; /* Two surfaces */
   } else {
      dw3 |= 1 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT; /* One surface */
   }

   dw7 |= prog_data->first_curbe_grf_0 << GEN7_PS_DISPATCH_START_GRF_SHIFT_0;
   dw7 |= prog_data->first_curbe_grf_2 << GEN7_PS_DISPATCH_START_GRF_SHIFT_2;

   if (params->wm_prog_data->dispatch_8)
      dw6 |= GEN7_PS_8_DISPATCH_ENABLE;
   if (params->wm_prog_data->dispatch_16)
      dw6 |= GEN7_PS_16_DISPATCH_ENABLE;

   ksp0 = params->wm_prog_kernel;
   ksp2 = params->wm_prog_kernel + params->wm_prog_data->ksp_offset_2;

   /* 3DSTATE_PS expects the number of threads per PSD, which is always 64;
    * it implicitly scales for different GT levels (which have some # of PSDs).
    *
    * In Gen8 the format is U8-2 whereas in Gen9 it is U8-1.
    */
   if (brw->gen >= 9)
      dw6 |= (64 - 1) << HSW_PS_MAX_THREADS_SHIFT;
   else
      dw6 |= (64 - 2) << HSW_PS_MAX_THREADS_SHIFT;

   dw6 |= GEN7_PS_POSOFFSET_NONE;
   dw6 |= params->fast_clear_op;

   BEGIN_BATCH(12);
   OUT_BATCH(_3DSTATE_PS << 16 | (12 - 2));
   OUT_BATCH(ksp0);
   OUT_BATCH(0);
   OUT_BATCH(dw3);
   OUT_BATCH(0);
   OUT_BATCH(0);
   OUT_BATCH(dw6);
   OUT_BATCH(dw7);
   OUT_BATCH(0); /* kernel 1 pointer */
   OUT_BATCH(0);
   OUT_BATCH(ksp2);
   OUT_BATCH(0);
   ADVANCE_BATCH();
}
示例#30
0
/* 3DSTATE_VS
 *
 * Disable vertex shader.
 */
void
gen6_blorp_emit_vs_disable(struct brw_context *brw,
                           const brw_blorp_params *params)
{
   if (brw->gen == 6) {
      /* From the BSpec, 3D Pipeline > Geometry > Vertex Shader > State,
       * 3DSTATE_VS, Dword 5.0 "VS Function Enable":
       *
       *   [DevSNB] A pipeline flush must be programmed prior to a
       *   3DSTATE_VS command that causes the VS Function Enable to
       *   toggle. Pipeline flush can be executed by sending a PIPE_CONTROL
       *   command with CS stall bit set and a post sync operation.
       */
      intel_emit_post_sync_nonzero_flush(brw);
   }

   /* Disable the push constant buffers. */
   BEGIN_BATCH(5);
   OUT_BATCH(_3DSTATE_CONSTANT_VS << 16 | (5 - 2));
   OUT_BATCH(0);
   OUT_BATCH(0);
   OUT_BATCH(0);
   OUT_BATCH(0);
   ADVANCE_BATCH();

   BEGIN_BATCH(6);
   OUT_BATCH(_3DSTATE_VS << 16 | (6 - 2));
   OUT_BATCH(0);
   OUT_BATCH(0);
   OUT_BATCH(0);
   OUT_BATCH(0);
   OUT_BATCH(0);
   ADVANCE_BATCH();
}