static void brw_emit_index_buffer(struct brw_context *brw)
{
   struct intel_context *intel = &brw->intel;
   const struct _mesa_index_buffer *index_buffer = brw->ib.ib;

   if (index_buffer == NULL)
      return;

   BEGIN_BATCH(3);
   OUT_BATCH(CMD_INDEX_BUFFER << 16 |
             /* cut index enable << 10 */
             get_index_type(index_buffer->type) << 8 |
             1);
   OUT_RELOC(brw->ib.bo,
             I915_GEM_DOMAIN_VERTEX, 0,
             0);
   OUT_RELOC(brw->ib.bo,
             I915_GEM_DOMAIN_VERTEX, 0,
	     brw->ib.bo->size - 1);
   ADVANCE_BATCH();
}
Beispiel #2
0
static void
occlusion_pause(struct fd_acc_query *aq, struct fd_batch *batch)
{
	struct fd_ringbuffer *ring = batch->draw;

	OUT_PKT7(ring, CP_MEM_WRITE, 4);
	OUT_RELOCW(ring, query_sample(aq, stop));
	OUT_RING(ring, 0xffffffff);
	OUT_RING(ring, 0xffffffff);

	OUT_PKT7(ring, CP_WAIT_MEM_WRITES, 0);

	OUT_PKT4(ring, REG_A5XX_RB_SAMPLE_COUNT_CONTROL, 1);
	OUT_RING(ring, A5XX_RB_SAMPLE_COUNT_CONTROL_COPY);

	OUT_PKT4(ring, REG_A5XX_RB_SAMPLE_COUNT_ADDR_LO, 2);
	OUT_RELOCW(ring, query_sample(aq, stop));

	OUT_PKT7(ring, CP_EVENT_WRITE, 1);
	OUT_RING(ring, ZPASS_DONE);
	fd_reset_wfi(batch);

	OUT_PKT7(ring, CP_WAIT_REG_MEM, 6);
	OUT_RING(ring, 0x00000014);   // XXX
	OUT_RELOC(ring, query_sample(aq, stop));
	OUT_RING(ring, 0xffffffff);
	OUT_RING(ring, 0xffffffff);
	OUT_RING(ring, 0x00000010);   // XXX

	/* result += stop - start: */
	OUT_PKT7(ring, CP_MEM_TO_MEM, 9);
	OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE |
			CP_MEM_TO_MEM_0_NEG_C);
	OUT_RELOCW(ring, query_sample(aq, result));     /* dst */
	OUT_RELOC(ring, query_sample(aq, result));      /* srcA */
	OUT_RELOC(ring, query_sample(aq, stop));        /* srcB */
	OUT_RELOC(ring, query_sample(aq, start));       /* srcC */

	fd5_context(batch->ctx)->samples_passed_queries--;
}
Beispiel #3
0
/**
 * Emit a VERTEX_BUFFER_STATE entry (part of 3DSTATE_VERTEX_BUFFERS).
 */
static void
emit_vertex_buffer_state(struct brw_context *brw,
                         unsigned buffer_nr,
                         drm_intel_bo *bo,
                         unsigned bo_ending_address,
                         unsigned bo_offset,
                         unsigned stride,
                         unsigned step_rate)
{
   struct gl_context *ctx = &brw->ctx;
   uint32_t dw0;

   if (brw->gen >= 6) {
      dw0 = (buffer_nr << GEN6_VB0_INDEX_SHIFT) |
            (step_rate ? GEN6_VB0_ACCESS_INSTANCEDATA
                       : GEN6_VB0_ACCESS_VERTEXDATA);
   } else {
      dw0 = (buffer_nr << BRW_VB0_INDEX_SHIFT) |
            (step_rate ? BRW_VB0_ACCESS_INSTANCEDATA
                       : BRW_VB0_ACCESS_VERTEXDATA);
   }

   if (brw->gen >= 7)
      dw0 |= GEN7_VB0_ADDRESS_MODIFYENABLE;

   if (brw->gen == 7)
      dw0 |= GEN7_MOCS_L3 << 16;

   WARN_ONCE(stride >= (brw->gen >= 5 ? 2048 : 2047),
             "VBO stride %d too large, bad rendering may occur\n",
             stride);
   OUT_BATCH(dw0 | (stride << BRW_VB0_PITCH_SHIFT));
   OUT_RELOC(bo, I915_GEM_DOMAIN_VERTEX, 0, bo_offset);
   if (brw->gen >= 5) {
      OUT_RELOC(bo, I915_GEM_DOMAIN_VERTEX, 0, bo_ending_address);
   } else {
      OUT_BATCH(0);
   }
   OUT_BATCH(step_rate);
}
Beispiel #4
0
/**********************************************************************
 * Upload pointers to the per-stage state.
 *
 * The state pointers in this packet are all relative to the general state
 * base address set by CMD_STATE_BASE_ADDRESS, which is 0.
 */
static int upload_pipelined_state_pointers(struct brw_context *brw )
{
   BEGIN_BATCH(7, IGNORE_CLIPRECTS);
   OUT_BATCH(CMD_PIPELINED_STATE_POINTERS << 16 | (7 - 2));
   OUT_RELOC(brw->vs.state_bo, 
	     BRW_USAGE_STATE,
	     0);
   if (brw->gs.prog_active)
      OUT_RELOC(brw->gs.state_bo, 
		BRW_USAGE_STATE,
		1);
   else
      OUT_BATCH(0);
   OUT_RELOC(brw->clip.state_bo, 
	     BRW_USAGE_STATE,
	     1);
   OUT_RELOC(brw->sf.state_bo,
	     BRW_USAGE_STATE,
	     0);
   OUT_RELOC(brw->wm.state_bo,
	     BRW_USAGE_STATE,
	     0);
   OUT_RELOC(brw->cc.state_bo,
	     BRW_USAGE_STATE,
	     0);
   ADVANCE_BATCH();

   brw->state.dirty.brw |= BRW_NEW_PSP;
   return 0;
}
/**
 * Upload pointers to the per-stage state.
 *
 * The state pointers in this packet are all relative to the general state
 * base address set by CMD_STATE_BASE_ADDRESS, which is 0.
 */
static void upload_pipelined_state_pointers(struct brw_context *brw )
{
   struct intel_context *intel = &brw->intel;

   if (intel->gen == 5) {
      /* Need to flush before changing clip max threads for errata. */
      BEGIN_BATCH(1);
      OUT_BATCH(MI_FLUSH);
      ADVANCE_BATCH();
   }

   BEGIN_BATCH(7);
   OUT_BATCH(_3DSTATE_PIPELINED_POINTERS << 16 | (7 - 2));
   OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
	     brw->vs.state_offset);
   if (brw->gs.prog_active)
      OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
		brw->gs.state_offset | 1);
   else
      OUT_BATCH(0);
   OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
	     brw->clip.state_offset | 1);
   OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
	     brw->sf.state_offset);
   OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
	     brw->wm.state_offset);
   OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
	     brw->cc.state_offset);
   ADVANCE_BATCH();

   brw->state.dirty.brw |= BRW_NEW_PSP;
}
static void
gen9_emit_state_base_address(struct intel_batchbuffer *batch)
{
	OUT_BATCH(GEN8_STATE_BASE_ADDRESS | (19 - 2));

	/* general */
	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
	OUT_BATCH(0);

	/* stateless data port */
	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);

	/* surface */
	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_SAMPLER, 0, BASE_ADDRESS_MODIFY);

	/* dynamic */
	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION,
		0, BASE_ADDRESS_MODIFY);

	/* indirect */
	OUT_BATCH(0);
	OUT_BATCH(0);

	/* instruction */
	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);

	/* general state buffer size */
	OUT_BATCH(0xfffff000 | 1);
	/* dynamic state buffer size */
	OUT_BATCH(1 << 12 | 1);
	/* indirect object buffer size */
	OUT_BATCH(0xfffff000 | 1);
	/* intruction buffer size, must set modify enable bit, otherwise it may result in GPU hang */
	OUT_BATCH(1 << 12 | 1);

	/* Bindless surface state base address */
	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
	OUT_BATCH(0);
	OUT_BATCH(0xfffff000);
}
void
intelEmitFillBlit(struct intel_context *intel,
		  GLuint cpp,
		  GLshort dst_pitch,
		  dri_bo *dst_buffer,
		  GLuint dst_offset,
		  GLboolean dst_tiled,
		  GLshort x, GLshort y,
		  GLshort w, GLshort h,
		  GLuint color)
{
   GLuint BR13, CMD;
   BATCH_LOCALS;

   dst_pitch *= cpp;

   switch (cpp) {
   case 1:
   case 2:
   case 3:
      BR13 = (0xF0 << 16) | (1 << 24);
      CMD = XY_COLOR_BLT_CMD;
      break;
   case 4:
      BR13 = (0xF0 << 16) | (1 << 24) | (1 << 25);
      CMD = XY_COLOR_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
      break;
   default:
      return;
   }
#ifndef I915
   if (dst_tiled) {
      CMD |= XY_DST_TILED;
      dst_pitch /= 4;
   }
#endif

   DBG("%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n",
       __FUNCTION__, dst_buffer, dst_pitch, dst_offset, x, y, w, h);

   assert(w > 0);
   assert(h > 0);

   BEGIN_BATCH(6, NO_LOOP_CLIPRECTS);
   OUT_BATCH(CMD);
   OUT_BATCH(BR13 | dst_pitch);
   OUT_BATCH((y << 16) | x);
   OUT_BATCH(((y + h) << 16) | (x + w));
   OUT_RELOC(dst_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, dst_offset);
   OUT_BATCH(color);
   ADVANCE_BATCH();
}
/**
 * Upload the binding table pointers, which point each stage's array of surface
 * state pointers.
 *
 * The binding table pointers are relative to the surface state base address,
 * which is 0.
 */
static void upload_binding_table_pointers(struct brw_context *brw)
{
   struct intel_context *intel = &brw->intel;

   BEGIN_BATCH(6, IGNORE_CLIPRECTS);
   OUT_BATCH(CMD_BINDING_TABLE_PTRS << 16 | (6 - 2));
   OUT_BATCH(0); /* vs */
   OUT_BATCH(0); /* gs */
   OUT_BATCH(0); /* clip */
   OUT_BATCH(0); /* sf */
   OUT_RELOC(brw->wm.bind_bo, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, 0);
   ADVANCE_BATCH();
}
static void
gen6_emit_state_base_address(struct intel_batchbuffer *batch)
{
	OUT_BATCH(GEN6_STATE_BASE_ADDRESS | (10 - 2));
	OUT_BATCH(0); /* general */
	OUT_RELOC(batch->bo, /* surface */
		  I915_GEM_DOMAIN_INSTRUCTION, 0,
		  BASE_ADDRESS_MODIFY);
	OUT_RELOC(batch->bo, /* instruction */
		  I915_GEM_DOMAIN_INSTRUCTION, 0,
		  BASE_ADDRESS_MODIFY);
	OUT_BATCH(0); /* indirect */
	OUT_RELOC(batch->bo, /* dynamic */
		  I915_GEM_DOMAIN_INSTRUCTION, 0,
		  BASE_ADDRESS_MODIFY);

	/* upper bounds, disable */
	OUT_BATCH(0);
	OUT_BATCH(BASE_ADDRESS_MODIFY);
	OUT_BATCH(0);
	OUT_BATCH(BASE_ADDRESS_MODIFY);
}
Beispiel #10
0
static void
draw_emit(struct fd_batch *batch, struct fd_ringbuffer *ring,
		  enum pc_di_primtype primtype,
		  enum pc_di_vis_cull_mode vismode,
		  const struct pipe_draw_info *info,
		  unsigned index_offset)
{
	if (info->index_size) {
		assert(!info->has_user_indices);

		struct pipe_resource *idx_buffer = info->index.resource;
		uint32_t idx_size = info->index_size * info->count;
		uint32_t idx_offset = index_offset + info->start * info->index_size;

		/* leave vis mode blank for now, it will be patched up when
		 * we know if we are binning or not
		 */
		uint32_t draw = CP_DRAW_INDX_OFFSET_0_PRIM_TYPE(primtype) |
			CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT(DI_SRC_SEL_DMA) |
			CP_DRAW_INDX_OFFSET_0_INDEX_SIZE(fd4_size2indextype(info->index_size)) |
			0x2000;

		OUT_PKT7(ring, CP_DRAW_INDX_OFFSET, 7);
		if (vismode == USE_VISIBILITY) {
			OUT_RINGP(ring, draw, &batch->draw_patches);
		} else {
			OUT_RING(ring, draw);
		}
		OUT_RING(ring, info->instance_count);    /* NumInstances */
		OUT_RING(ring, info->count);             /* NumIndices */
		OUT_RING(ring, 0x0);           /* XXX */
		OUT_RELOC(ring, fd_resource(idx_buffer)->bo, idx_offset, 0, 0);
		OUT_RING (ring, idx_size);
	} else {
		/* leave vis mode blank for now, it will be patched up when
		 * we know if we are binning or not
		 */
		uint32_t draw = CP_DRAW_INDX_OFFSET_0_PRIM_TYPE(primtype) |
			CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT(DI_SRC_SEL_AUTO_INDEX) |
			0x2000;

		OUT_PKT7(ring, CP_DRAW_INDX_OFFSET, 3);
		if (vismode == USE_VISIBILITY) {
			OUT_RINGP(ring, draw, &batch->draw_patches);
		} else {
			OUT_RING(ring, draw);
		}
		OUT_RING(ring, info->instance_count);    /* NumInstances */
		OUT_RING(ring, info->count);             /* NumIndices */
	}
}
Beispiel #11
0
void
next_ring(void)
{
	int idx = ring_idx++ % ARRAY_SIZE(rings);

	if (rings[idx]) {
		ring = rings[idx];
		fd_ringbuffer_reset(ring);
		return;
	}

	ring = rings[idx] = fd_ringbuffer_new(pipe, 0x5000);

	memcpy(ring->start, initial_state, STATE_SIZE * sizeof(uint32_t));
	ring->cur = &ring->start[120];
	OUT_RELOC (ring, context_bos[0]);
	ring->cur = &ring->start[122];
	OUT_RELOC (ring, context_bos[1]);
	ring->cur = &ring->start[124];
	OUT_RELOC (ring, context_bos[2]);

	fd_ringbuffer_reset(ring);
}
Beispiel #12
0
static void
timestamp_pause(struct fd_acc_query *aq, struct fd_batch *batch)
{
	struct fd_ringbuffer *ring = batch->draw;

	OUT_PKT7(ring, CP_EVENT_WRITE, 4);
	OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(CACHE_FLUSH_AND_INV_EVENT) |
			CP_EVENT_WRITE_0_TIMESTAMP);
	OUT_RELOCW(ring, query_sample(aq, stop));
	OUT_RING(ring, 0x00000000);

	fd_reset_wfi(batch);
	fd_wfi(batch, ring);

	/* result += stop - start: */
	OUT_PKT7(ring, CP_MEM_TO_MEM, 9);
	OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE |
			CP_MEM_TO_MEM_0_NEG_C);
	OUT_RELOCW(ring, query_sample(aq, result));     /* dst */
	OUT_RELOC(ring, query_sample(aq, result));      /* srcA */
	OUT_RELOC(ring, query_sample(aq, stop));        /* srcB */
	OUT_RELOC(ring, query_sample(aq, start));       /* srcC */
}
Beispiel #13
0
void
fd_emit_vertex_bufs(struct fd_ringbuffer *ring, uint32_t val,
		struct fd_vertex_buf *vbufs, uint32_t n)
{
	unsigned i;

	OUT_PKT3(ring, CP_SET_CONSTANT, 1 + (2 * n));
	OUT_RING(ring, (0x1 << 16) | (val & 0xffff));
	for (i = 0; i < n; i++) {
		struct fd_resource *rsc = fd_resource(vbufs[i].prsc);
		OUT_RELOC(ring, rsc->bo, vbufs[i].offset, 3);
		OUT_RING (ring, vbufs[i].size);
	}
}
static void
store_dword_loop(int fd)
{
    int i;
    int num_rings = gem_get_num_rings(fd);

    srandom(0xdeadbeef);

    for (i = 0; i < SLOW_QUICK(0x100000, 10); i++) {
        int ring, mindex;
        ring = random() % num_rings + 1;
        mindex = random() % NUM_FD;
        batch = mbatch[mindex];
        if (ring == I915_EXEC_RENDER) {
            BEGIN_BATCH(4, 1);
            OUT_BATCH(MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE);
            OUT_BATCH(0xffffffff); /* compare dword */
            OUT_RELOC(mbuffer[mindex], I915_GEM_DOMAIN_RENDER,
                      I915_GEM_DOMAIN_RENDER, 0);
            OUT_BATCH(MI_NOOP);
            ADVANCE_BATCH();
        } else {
            BEGIN_BATCH(4, 1);
            OUT_BATCH(MI_FLUSH_DW | 1);
            OUT_BATCH(0); /* reserved */
            OUT_RELOC(mbuffer[mindex], I915_GEM_DOMAIN_RENDER,
                      I915_GEM_DOMAIN_RENDER, 0);
            OUT_BATCH(MI_NOOP | (1<<22) | (0xf));
            ADVANCE_BATCH();
        }
        intel_batchbuffer_flush_on_ring(batch, ring);
    }

    drm_intel_bo_map(target_buffer, 0);
    // map to force waiting on rendering
    drm_intel_bo_unmap(target_buffer);
}
static void
do_render(drm_intel_bufmgr *bufmgr, struct intel_batchbuffer *batch,
	  drm_intel_bo *dst_bo, int width, int height)
{
	uint32_t data[width * height];
	drm_intel_bo *src_bo;
	int i;
	static uint32_t seed = 1;

	/* Generate some junk.  Real workloads would be doing a lot more
	 * work to generate the junk.
	 */
	for (i = 0; i < width * height; i++) {
		data[i] = seed++;
	}

	/* Upload the junk. */
	src_bo = drm_intel_bo_alloc(bufmgr, "src", sizeof(data), 4096);
	drm_intel_bo_subdata(src_bo, 0, sizeof(data), data);

	/* Render the junk to the dst. */
	BLIT_COPY_BATCH_START(0);
	OUT_BATCH((3 << 24) | /* 32 bits */
		  (0xcc << 16) | /* copy ROP */
		  (width * 4) /* dst pitch */);
	OUT_BATCH(0); /* dst x1,y1 */
	OUT_BATCH((height << 16) | width); /* dst x2,y2 */
	OUT_RELOC(dst_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
	OUT_BATCH(0); /* src x1,y1 */
	OUT_BATCH(width * 4); /* src pitch */
	OUT_RELOC(src_bo, I915_GEM_DOMAIN_RENDER, 0, 0);
	ADVANCE_BATCH();

	intel_batchbuffer_flush(batch);

	drm_intel_bo_unreference(src_bo);
}
Beispiel #16
0
void
i915_fill_blit(struct i915_context *i915,
               unsigned cpp,
               unsigned short dst_pitch,
               struct intel_buffer *dst_buffer,
               unsigned dst_offset,
               short x, short y, 
               short w, short h, 
               unsigned color)
{
   unsigned BR13, CMD;


   I915_DBG(i915,
      "%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n",
      __FUNCTION__,
      dst_buffer, dst_pitch, dst_offset, x, y, w, h);

   switch (cpp) {
   case 1:
   case 2:
   case 3:
      BR13 = (((int) dst_pitch) & 0xffff) |
         (0xF0 << 16) | (1 << 24);
      CMD = XY_COLOR_BLT_CMD;
      break;
   case 4:
      BR13 = (((int) dst_pitch) & 0xffff) |
         (0xF0 << 16) | (1 << 24) | (1 << 25);
      CMD = (XY_COLOR_BLT_CMD | XY_COLOR_BLT_WRITE_ALPHA |
             XY_COLOR_BLT_WRITE_RGB);
      break;
   default:
      return;
   }

   if (!BEGIN_BATCH(6, 1)) {
      FLUSH_BATCH(NULL);
      assert(BEGIN_BATCH(6, 1));
   }
   OUT_BATCH(CMD);
   OUT_BATCH(BR13);
   OUT_BATCH((y << 16) | x);
   OUT_BATCH(((y + h) << 16) | (x + w));
   OUT_RELOC(dst_buffer, INTEL_USAGE_2D_TARGET, dst_offset);
   OUT_BATCH(color);
   FLUSH_BATCH(NULL);
}
Beispiel #17
0
static void
gen7_upload_hs_state(struct brw_context *brw)
{
   const struct brw_stage_state *stage_state = &brw->tcs.base;
   /* BRW_NEW_TESS_PROGRAMS */
   bool active = brw->tess_eval_program;
   /* BRW_NEW_TCS_PROG_DATA */
   const struct brw_vue_prog_data *prog_data = &brw->tcs.prog_data->base;

   if (active) {
      BEGIN_BATCH(7);
      OUT_BATCH(_3DSTATE_HS << 16 | (7 - 2));
      OUT_BATCH(SET_FIELD(DIV_ROUND_UP(stage_state->sampler_count, 4),
                          GEN7_HS_SAMPLER_COUNT) |
                SET_FIELD(prog_data->base.binding_table.size_bytes / 4,
                          GEN7_HS_BINDING_TABLE_ENTRY_COUNT) |
                (brw->max_hs_threads - 1));
      OUT_BATCH(GEN7_HS_ENABLE |
                GEN7_HS_STATISTICS_ENABLE |
                SET_FIELD(brw->tcs.prog_data->instances - 1,
                          GEN7_HS_INSTANCE_COUNT));
      OUT_BATCH(stage_state->prog_offset);
      if (prog_data->base.total_scratch) {
         OUT_RELOC(stage_state->scratch_bo,
                   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
                   ffs(prog_data->base.total_scratch) - 11);
      } else {
         OUT_BATCH(0);
      }
      OUT_BATCH(GEN7_HS_INCLUDE_VERTEX_HANDLES |
                SET_FIELD(prog_data->base.dispatch_grf_start_reg,
                          GEN7_HS_DISPATCH_START_GRF));
      /* Ignore URB semaphores */
      OUT_BATCH(0);
      ADVANCE_BATCH();
   } else {
      BEGIN_BATCH(7);
      OUT_BATCH(_3DSTATE_HS << 16 | (7 - 2));
      OUT_BATCH(0);
      OUT_BATCH(0);
      OUT_BATCH(0);
      OUT_BATCH(0);
      OUT_BATCH(0);
      OUT_BATCH(0);
      ADVANCE_BATCH();
   }
   brw->tcs.enabled = active;
}
Beispiel #18
0
static void
emit_shader(struct fd_ringbuffer *ring, const struct ir3_shader_variant *so)
{
	const struct ir3_info *si = &so->info;
	enum adreno_state_block sb;
	enum adreno_state_src src;
	uint32_t i, sz, *bin;

	if (so->type == SHADER_VERTEX) {
		sb = SB_VERT_SHADER;
	} else {
		sb = SB_FRAG_SHADER;
	}

	if (fd_mesa_debug & FD_DBG_DIRECT) {
		sz = si->sizedwords;
		src = SS_DIRECT;
		bin = fd_bo_map(so->bo);
	} else {
		sz = 0;
		src = 2;  // enums different on a5xx..
		bin = NULL;
	}

	OUT_PKT7(ring, CP_LOAD_STATE, 3 + sz);
	OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) |
			CP_LOAD_STATE_0_STATE_SRC(src) |
			CP_LOAD_STATE_0_STATE_BLOCK(sb) |
			CP_LOAD_STATE_0_NUM_UNIT(so->instrlen));
	if (bin) {
		OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) |
				CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER));
		OUT_RING(ring, CP_LOAD_STATE_2_EXT_SRC_ADDR_HI(0));
	} else {
		OUT_RELOC(ring, so->bo, 0,
				CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER), 0);
	}

	/* for how clever coverity is, it is sometimes rather dull, and
	 * doesn't realize that the only case where bin==NULL, sz==0:
	 */
	assume(bin || (sz == 0));

	for (i = 0; i < sz; i++) {
		OUT_RING(ring, bin[i]);
	}
}
Beispiel #19
0
/* emit texture state for mem->gmem restore operation.. eventually it would
 * be good to get rid of this and use normal CSO/etc state for more of these
 * special cases..
 */
void
fd4_emit_gmem_restore_tex(struct fd_ringbuffer *ring, struct pipe_surface *psurf)
{
	struct fd_resource *rsc = fd_resource(psurf->texture);
	unsigned lvl = psurf->u.tex.level;
	struct fd_resource_slice *slice = fd_resource_slice(rsc, lvl);
	uint32_t offset = fd_resource_offset(rsc, lvl, psurf->u.tex.first_layer);
	enum pipe_format format = fd4_gmem_restore_format(psurf->format);

	debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);

	/* output sampler state: */
	OUT_PKT3(ring, CP_LOAD_STATE, 4);
	OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) |
			CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
			CP_LOAD_STATE_0_STATE_BLOCK(SB_FRAG_TEX) |
			CP_LOAD_STATE_0_NUM_UNIT(1));
	OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER) |
			CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
	OUT_RING(ring, A4XX_TEX_SAMP_0_XY_MAG(A4XX_TEX_NEAREST) |
			A4XX_TEX_SAMP_0_XY_MIN(A4XX_TEX_NEAREST) |
			A4XX_TEX_SAMP_0_WRAP_S(A4XX_TEX_CLAMP_TO_EDGE) |
			A4XX_TEX_SAMP_0_WRAP_T(A4XX_TEX_CLAMP_TO_EDGE) |
			A4XX_TEX_SAMP_0_WRAP_R(A4XX_TEX_REPEAT));
	OUT_RING(ring, 0x00000000);

	/* emit texture state: */
	OUT_PKT3(ring, CP_LOAD_STATE, 10);
	OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) |
			CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
			CP_LOAD_STATE_0_STATE_BLOCK(SB_FRAG_TEX) |
			CP_LOAD_STATE_0_NUM_UNIT(1));
	OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) |
			CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
	OUT_RING(ring, A4XX_TEX_CONST_0_FMT(fd4_pipe2tex(format)) |
			A4XX_TEX_CONST_0_TYPE(A4XX_TEX_2D) |
			fd4_tex_swiz(format,  PIPE_SWIZZLE_RED, PIPE_SWIZZLE_GREEN,
					PIPE_SWIZZLE_BLUE, PIPE_SWIZZLE_ALPHA));
	OUT_RING(ring, A4XX_TEX_CONST_1_WIDTH(psurf->width) |
			A4XX_TEX_CONST_1_HEIGHT(psurf->height));
	OUT_RING(ring, A4XX_TEX_CONST_2_PITCH(slice->pitch * rsc->cpp));
	OUT_RING(ring, 0x00000000);
	OUT_RELOC(ring, rsc->bo, offset, 0, 0);
	OUT_RING(ring, 0x00000000);
	OUT_RING(ring, 0x00000000);
	OUT_RING(ring, 0x00000000);
}
Beispiel #20
0
static void emit_constant_buffer(struct brw_context *brw)
{
   struct intel_context *intel = &brw->intel;
   GLuint sz = brw->curbe.total_size;

   BEGIN_BATCH(2);
   if (sz == 0) {
      OUT_BATCH((CMD_CONST_BUFFER << 16) | (2 - 2));
      OUT_BATCH(0);
   } else {
      OUT_BATCH((CMD_CONST_BUFFER << 16) | (1 << 8) | (2 - 2));
      OUT_RELOC(brw->curbe.curbe_bo,
		I915_GEM_DOMAIN_INSTRUCTION, 0,
		(sz - 1) + brw->curbe.curbe_offset);
   }
   ADVANCE_BATCH();
}
Beispiel #21
0
boolean brw_upload_vertex_buffers( struct brw_context *brw )
{
   struct brw_array_state vbp;
   unsigned nr_enabled = 0;
   unsigned i;

   memset(&vbp, 0, sizeof(vbp));

   /* This is a hardware limit:
    */

   for (i = 0; i < BRW_VEP_MAX; i++)
   {
      if (brw->vb.vbo_array[i] == NULL) {
	 nr_enabled = i;
	 break;
      }

      vbp.vb[i].vb0.bits.pitch = brw->vb.vbo_array[i]->stride;
      vbp.vb[i].vb0.bits.pad = 0;
      vbp.vb[i].vb0.bits.access_type = BRW_VERTEXBUFFER_ACCESS_VERTEXDATA;
      vbp.vb[i].vb0.bits.vb_index = i;
      vbp.vb[i].offset = brw->vb.vbo_array[i]->buffer_offset;
      vbp.vb[i].buffer = brw->vb.vbo_array[i]->buffer;
      vbp.vb[i].max_index = brw->vb.vbo_array[i]->max_index;
   }


   vbp.header.bits.length = (1 + nr_enabled * 4) - 2;
   vbp.header.bits.opcode = CMD_VERTEX_BUFFER;

   BEGIN_BATCH(vbp.header.bits.length+2, 0);
   OUT_BATCH( vbp.header.dword );

   for (i = 0; i < nr_enabled; i++) {
      OUT_BATCH( vbp.vb[i].vb0.dword );
      OUT_RELOC( vbp.vb[i].buffer,  PIPE_BUFFER_USAGE_GPU_READ,
		 vbp.vb[i].offset);
      OUT_BATCH( vbp.vb[i].max_index );
      OUT_BATCH( vbp.vb[i].instance_data_step_rate );
   }
   ADVANCE_BATCH();
   return TRUE;
}
Beispiel #22
0
/* regid:          base const register
 * prsc or dwords: buffer containing constant values
 * sizedwords:     size of const value buffer
 */
void
fd3_emit_constant(struct fd_ringbuffer *ring,
		enum adreno_state_block sb,
		uint32_t regid, uint32_t offset, uint32_t sizedwords,
		const uint32_t *dwords, struct pipe_resource *prsc)
{
	uint32_t i, sz;
	enum adreno_state_src src;

	if (prsc) {
		sz = 0;
		src = SS_INDIRECT;
	} else {
		sz = sizedwords;
		src = SS_DIRECT;
	}

	/* we have this sometimes, not others.. perhaps we could be clever
	 * and figure out actually when we need to invalidate cache:
	 */
	OUT_PKT0(ring, REG_A3XX_UCHE_CACHE_INVALIDATE0_REG, 2);
	OUT_RING(ring, A3XX_UCHE_CACHE_INVALIDATE0_REG_ADDR(0));
	OUT_RING(ring, A3XX_UCHE_CACHE_INVALIDATE1_REG_ADDR(0) |
			A3XX_UCHE_CACHE_INVALIDATE1_REG_OPCODE(INVALIDATE) |
			A3XX_UCHE_CACHE_INVALIDATE1_REG_ENTIRE_CACHE);

	OUT_PKT3(ring, CP_LOAD_STATE, 2 + sz);
	OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(regid/2) |
			CP_LOAD_STATE_0_STATE_SRC(src) |
			CP_LOAD_STATE_0_STATE_BLOCK(sb) |
			CP_LOAD_STATE_0_NUM_UNIT(sizedwords/2));
	if (prsc) {
		struct fd_bo *bo = fd_resource(prsc)->bo;
		OUT_RELOC(ring, bo, offset,
				CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS), 0);
	} else {
		OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) |
				CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS));
		dwords = (uint32_t *)&((uint8_t *)dwords)[offset];
	}
	for (i = 0; i < sz; i++) {
		OUT_RING(ring, dwords[i]);
	}
}
Beispiel #23
0
void brw_fill_blit(struct brw_context *brw,
                   unsigned cpp,
                   short dst_pitch,
                   struct pipe_buffer *dst_buffer,
                   unsigned dst_offset,
                   boolean dst_tiled,
                   short x, short y,
                   short w, short h,
                   unsigned color)
{
   unsigned BR13, CMD;
   BATCH_LOCALS;

   dst_pitch *= cpp;

   switch(cpp) {
   case 1:
   case 2:
   case 3:
      BR13 = (0xF0 << 16) | (1<<24);
      CMD = XY_COLOR_BLT_CMD;
      break;
   case 4:
      BR13 = (0xF0 << 16) | (1<<24) | (1<<25);
      CMD = XY_COLOR_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
      break;
   default:
      return;
   }

   if (dst_tiled) {
      CMD |= XY_DST_TILED;
      dst_pitch /= 4;
   }

   BEGIN_BATCH(6, INTEL_BATCH_NO_CLIPRECTS);
   OUT_BATCH( CMD );
   OUT_BATCH( dst_pitch | BR13 );
   OUT_BATCH( (y << 16) | x );
   OUT_BATCH( ((y+h) << 16) | (x+w) );
   OUT_RELOC( dst_buffer, BRW_BUFFER_ACCESS_WRITE, dst_offset );
   OUT_BATCH( color );
   ADVANCE_BATCH();
}
Beispiel #24
0
void
i915_fill_blit(struct i915_context *i915,
	       unsigned cpp,
	       short dst_pitch,
	       struct pipe_buffer *dst_buffer,
	       unsigned dst_offset,
	       short x, short y, 
	       short w, short h, 
	       unsigned color)
{
   unsigned BR13, CMD;

   switch (cpp) {
   case 1:
   case 2:
   case 3:
      BR13 = dst_pitch | (0xF0 << 16) | (1 << 24);
      CMD = XY_COLOR_BLT_CMD;
      break;
   case 4:
      BR13 = dst_pitch | (0xF0 << 16) | (1 << 24) | (1 << 25);
      CMD = (XY_COLOR_BLT_CMD | XY_COLOR_BLT_WRITE_ALPHA |
             XY_COLOR_BLT_WRITE_RGB);
      break;
   default:
      return;
   }

//   DBG("%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n",
//       __FUNCTION__, dst_buffer, dst_pitch, dst_offset, x, y, w, h);


   if (!BEGIN_BATCH(6, 1)) {
      FLUSH_BATCH(NULL);
      assert(BEGIN_BATCH(6, 1));
   }
   OUT_BATCH(CMD);
   OUT_BATCH(BR13);
   OUT_BATCH((y << 16) | x);
   OUT_BATCH(((y + h) << 16) | (x + w));
   OUT_RELOC( dst_buffer, I915_BUFFER_ACCESS_WRITE, dst_offset);
   OUT_BATCH(color);
}
Beispiel #25
0
static inline void
out_srcpix(struct fd_ringbuffer *ring, PixmapPtr pix)
{
	struct fd_bo *bo = pix->bo;
	uint32_t w, h, p;

	w = pix->width;
	h = pix->height;

	/* pitch specified in units of 32 bytes, it appears.. not quite sure
	 * max size yet, but I think 11 or 12 bits..
	 */
	p = (pix->pitch / 32) & 0xfff;

	OUT_RING (ring, REGM(GRADW_TEXCFG, 3));
	OUT_RING (ring, 0x40000000 | p |   /* GRADW_TEXCFG */
			((pix->depth == 8) ? 0xe000 : 0x7000));
	// TODO check if 13 bit
	OUT_RING (ring, ((h & 0xfff) << 13) | (w & 0xfff)); /* GRADW_TEXSIZE */
	OUT_RELOC(ring, bo);               /* GRADW_TEXBASE */
}
Beispiel #26
0
static void emit_image_tex(struct fd_ringbuffer *ring, unsigned slot,
		struct fd6_image *img, enum pipe_shader_type shader)
{
	unsigned opcode = CP_LOAD_STATE6_FRAG;

	assert(shader == PIPE_SHADER_COMPUTE || shader == PIPE_SHADER_FRAGMENT);

	OUT_PKT7(ring, opcode, 3 + 12);
	OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(slot) |
		CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
		CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
		CP_LOAD_STATE6_0_STATE_BLOCK(texsb[shader]) |
		CP_LOAD_STATE6_0_NUM_UNIT(1));
	OUT_RING(ring, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
	OUT_RING(ring, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));

	OUT_RING(ring, A6XX_TEX_CONST_0_FMT(img->fmt) |
		fd6_tex_swiz(img->pfmt, PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y,
			PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W) |
		COND(img->srgb, A6XX_TEX_CONST_0_SRGB));
	OUT_RING(ring, A6XX_TEX_CONST_1_WIDTH(img->width) |
		A6XX_TEX_CONST_1_HEIGHT(img->height));
	OUT_RING(ring, A6XX_TEX_CONST_2_FETCHSIZE(img->fetchsize) |
		A6XX_TEX_CONST_2_TYPE(img->type) |
		A6XX_TEX_CONST_2_PITCH(img->pitch));
	OUT_RING(ring, A6XX_TEX_CONST_3_ARRAY_PITCH(img->array_pitch));
	if (img->bo) {
		OUT_RELOC(ring, img->bo, img->offset,
				(uint64_t)A6XX_TEX_CONST_5_DEPTH(img->depth) << 32, 0);
	} else {
		OUT_RING(ring, 0x00000000);
		OUT_RING(ring, A6XX_TEX_CONST_5_DEPTH(img->depth));
	}
	OUT_RING(ring, 0x00000000);
	OUT_RING(ring, 0x00000000);
	OUT_RING(ring, 0x00000000);
	OUT_RING(ring, 0x00000000);
	OUT_RING(ring, 0x00000000);
	OUT_RING(ring, 0x00000000);
}
static void
emit_shader(struct fd_ringbuffer *ring, const struct ir3_shader_variant *so)
{
	const struct ir3_info *si = &so->info;
	enum adreno_state_block sb;
	enum adreno_state_src src;
	uint32_t i, sz, *bin;

	if (so->type == SHADER_VERTEX) {
		sb = SB_VERT_SHADER;
	} else {
		sb = SB_FRAG_SHADER;
	}

	if (fd_mesa_debug & FD_DBG_DIRECT) {
		sz = si->sizedwords;
		src = SS_DIRECT;
		bin = fd_bo_map(so->bo);
	} else {
		sz = 0;
		src = 2;  // enums different on a4xx..
		bin = NULL;
	}

	OUT_PKT3(ring, CP_LOAD_STATE, 2 + sz);
	OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) |
			CP_LOAD_STATE_0_STATE_SRC(src) |
			CP_LOAD_STATE_0_STATE_BLOCK(sb) |
			CP_LOAD_STATE_0_NUM_UNIT(so->instrlen));
	if (bin) {
		OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) |
				CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER));
	} else {
		OUT_RELOC(ring, so->bo, 0,
				CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER), 0);
	}
	for (i = 0; i < sz; i++) {
		OUT_RING(ring, bin[i]);
	}
}
Beispiel #28
0
void intelEmitFillBlit( struct intel_context *intel,
			GLuint cpp,
			GLshort dst_pitch,
			GLuint dst_buffer,
			GLuint dst_offset,
			GLshort x, GLshort y, 
			GLshort w, GLshort h,
			GLuint color )
{
   GLuint BR13, CMD;
   BATCH_LOCALS;

   dst_pitch *= cpp;

   switch(cpp) {
   case 1: 
   case 2: 
   case 3: 
      BR13 = dst_pitch | (0xF0 << 16) | (1<<24);
      CMD = XY_COLOR_BLT_CMD;
      break;
   case 4:
      BR13 = dst_pitch | (0xF0 << 16) | (1<<24) | (1<<25);
      CMD = (XY_COLOR_BLT_CMD | XY_COLOR_BLT_WRITE_ALPHA |
	     XY_COLOR_BLT_WRITE_RGB);
      break;
   default:
      return;
   }

   BEGIN_BATCH(6, INTEL_BATCH_NO_CLIPRECTS);
   OUT_BATCH( CMD );
   OUT_BATCH( BR13 );
   OUT_BATCH( (y << 16) | x );
   OUT_BATCH( ((y+h) << 16) | (x+w) );
   OUT_RELOC( dst_buffer, DRM_MM_TT|DRM_MM_WRITE, dst_offset );
   OUT_BATCH( color );
   ADVANCE_BATCH();
}
Beispiel #29
0
static void
emit_binning_workaround(struct fd_context *ctx)
{
	struct fd3_context *fd3_ctx = fd3_context(ctx);
	struct fd_gmem_stateobj *gmem = &ctx->gmem;
	struct fd_ringbuffer *ring = ctx->ring;

	OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 2);
	OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) |
			A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE);
	OUT_RING(ring, A3XX_RB_RENDER_CONTROL_BIN_WIDTH(32) |
			A3XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE |
			A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER));

	OUT_PKT0(ring, REG_A3XX_RB_COPY_CONTROL, 4);
	OUT_RING(ring, A3XX_RB_COPY_CONTROL_MSAA_RESOLVE(MSAA_ONE) |
			A3XX_RB_COPY_CONTROL_MODE(0) |
			A3XX_RB_COPY_CONTROL_GMEM_BASE(0));
	OUT_RELOC(ring, fd_resource(fd3_ctx->solid_vbuf)->bo, 0x20, 0, -1);  /* RB_COPY_DEST_BASE */
	OUT_RING(ring, A3XX_RB_COPY_DEST_PITCH_PITCH(128));
	OUT_RING(ring, A3XX_RB_COPY_DEST_INFO_TILE(LINEAR) |
			A3XX_RB_COPY_DEST_INFO_FORMAT(RB_R8G8B8A8_UNORM) |
			A3XX_RB_COPY_DEST_INFO_SWAP(WZYX) |
			A3XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE(0xf) |
			A3XX_RB_COPY_DEST_INFO_ENDIAN(ENDIAN_NONE));

	OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);
	OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) |
			A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
			A3XX_GRAS_SC_CONTROL_RASTER_MODE(1));

	fd_wfi(ctx, ring);
	fd3_program_emit(ring, &ctx->solid_prog, key);
	fd3_emit_vertex_bufs(ring, fd3_shader_variant(ctx->solid_prog.vp, key),
			(struct fd3_vertex_buf[]) {{
				.prsc = fd3_ctx->solid_vbuf,
				.stride = 12,
				.format = PIPE_FORMAT_R32G32B32_FLOAT,
			}}, 1);
Beispiel #30
0
/* regid:          base const register
 * prsc or dwords: buffer containing constant values
 * sizedwords:     size of const value buffer
 */
void
fd4_emit_const(struct fd_ringbuffer *ring, enum shader_t type,
		uint32_t regid, uint32_t offset, uint32_t sizedwords,
		const uint32_t *dwords, struct pipe_resource *prsc)
{
	uint32_t i, sz;
	enum adreno_state_src src;

	debug_assert((regid % 4) == 0);
	debug_assert((sizedwords % 4) == 0);

	if (prsc) {
		sz = 0;
		src = 0x2;  // TODO ??
	} else {
		sz = sizedwords;
		src = SS_DIRECT;
	}

	OUT_PKT3(ring, CP_LOAD_STATE, 2 + sz);
	OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(regid/4) |
			CP_LOAD_STATE_0_STATE_SRC(src) |
			CP_LOAD_STATE_0_STATE_BLOCK(sb[type]) |
			CP_LOAD_STATE_0_NUM_UNIT(sizedwords/4));
	if (prsc) {
		struct fd_bo *bo = fd_resource(prsc)->bo;
		OUT_RELOC(ring, bo, offset,
				CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS), 0);
	} else {
		OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) |
				CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS));
		dwords = (uint32_t *)&((uint8_t *)dwords)[offset];
	}
	for (i = 0; i < sz; i++) {
		OUT_RING(ring, dwords[i]);
	}
}