static void brw_emit_index_buffer(struct brw_context *brw) { struct intel_context *intel = &brw->intel; const struct _mesa_index_buffer *index_buffer = brw->ib.ib; if (index_buffer == NULL) return; BEGIN_BATCH(3); OUT_BATCH(CMD_INDEX_BUFFER << 16 | /* cut index enable << 10 */ get_index_type(index_buffer->type) << 8 | 1); OUT_RELOC(brw->ib.bo, I915_GEM_DOMAIN_VERTEX, 0, 0); OUT_RELOC(brw->ib.bo, I915_GEM_DOMAIN_VERTEX, 0, brw->ib.bo->size - 1); ADVANCE_BATCH(); }
static void occlusion_pause(struct fd_acc_query *aq, struct fd_batch *batch) { struct fd_ringbuffer *ring = batch->draw; OUT_PKT7(ring, CP_MEM_WRITE, 4); OUT_RELOCW(ring, query_sample(aq, stop)); OUT_RING(ring, 0xffffffff); OUT_RING(ring, 0xffffffff); OUT_PKT7(ring, CP_WAIT_MEM_WRITES, 0); OUT_PKT4(ring, REG_A5XX_RB_SAMPLE_COUNT_CONTROL, 1); OUT_RING(ring, A5XX_RB_SAMPLE_COUNT_CONTROL_COPY); OUT_PKT4(ring, REG_A5XX_RB_SAMPLE_COUNT_ADDR_LO, 2); OUT_RELOCW(ring, query_sample(aq, stop)); OUT_PKT7(ring, CP_EVENT_WRITE, 1); OUT_RING(ring, ZPASS_DONE); fd_reset_wfi(batch); OUT_PKT7(ring, CP_WAIT_REG_MEM, 6); OUT_RING(ring, 0x00000014); // XXX OUT_RELOC(ring, query_sample(aq, stop)); OUT_RING(ring, 0xffffffff); OUT_RING(ring, 0xffffffff); OUT_RING(ring, 0x00000010); // XXX /* result += stop - start: */ OUT_PKT7(ring, CP_MEM_TO_MEM, 9); OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE | CP_MEM_TO_MEM_0_NEG_C); OUT_RELOCW(ring, query_sample(aq, result)); /* dst */ OUT_RELOC(ring, query_sample(aq, result)); /* srcA */ OUT_RELOC(ring, query_sample(aq, stop)); /* srcB */ OUT_RELOC(ring, query_sample(aq, start)); /* srcC */ fd5_context(batch->ctx)->samples_passed_queries--; }
/** * Emit a VERTEX_BUFFER_STATE entry (part of 3DSTATE_VERTEX_BUFFERS). */ static void emit_vertex_buffer_state(struct brw_context *brw, unsigned buffer_nr, drm_intel_bo *bo, unsigned bo_ending_address, unsigned bo_offset, unsigned stride, unsigned step_rate) { struct gl_context *ctx = &brw->ctx; uint32_t dw0; if (brw->gen >= 6) { dw0 = (buffer_nr << GEN6_VB0_INDEX_SHIFT) | (step_rate ? GEN6_VB0_ACCESS_INSTANCEDATA : GEN6_VB0_ACCESS_VERTEXDATA); } else { dw0 = (buffer_nr << BRW_VB0_INDEX_SHIFT) | (step_rate ? BRW_VB0_ACCESS_INSTANCEDATA : BRW_VB0_ACCESS_VERTEXDATA); } if (brw->gen >= 7) dw0 |= GEN7_VB0_ADDRESS_MODIFYENABLE; if (brw->gen == 7) dw0 |= GEN7_MOCS_L3 << 16; WARN_ONCE(stride >= (brw->gen >= 5 ? 2048 : 2047), "VBO stride %d too large, bad rendering may occur\n", stride); OUT_BATCH(dw0 | (stride << BRW_VB0_PITCH_SHIFT)); OUT_RELOC(bo, I915_GEM_DOMAIN_VERTEX, 0, bo_offset); if (brw->gen >= 5) { OUT_RELOC(bo, I915_GEM_DOMAIN_VERTEX, 0, bo_ending_address); } else { OUT_BATCH(0); } OUT_BATCH(step_rate); }
/********************************************************************** * Upload pointers to the per-stage state. * * The state pointers in this packet are all relative to the general state * base address set by CMD_STATE_BASE_ADDRESS, which is 0. */ static int upload_pipelined_state_pointers(struct brw_context *brw ) { BEGIN_BATCH(7, IGNORE_CLIPRECTS); OUT_BATCH(CMD_PIPELINED_STATE_POINTERS << 16 | (7 - 2)); OUT_RELOC(brw->vs.state_bo, BRW_USAGE_STATE, 0); if (brw->gs.prog_active) OUT_RELOC(brw->gs.state_bo, BRW_USAGE_STATE, 1); else OUT_BATCH(0); OUT_RELOC(brw->clip.state_bo, BRW_USAGE_STATE, 1); OUT_RELOC(brw->sf.state_bo, BRW_USAGE_STATE, 0); OUT_RELOC(brw->wm.state_bo, BRW_USAGE_STATE, 0); OUT_RELOC(brw->cc.state_bo, BRW_USAGE_STATE, 0); ADVANCE_BATCH(); brw->state.dirty.brw |= BRW_NEW_PSP; return 0; }
/** * Upload pointers to the per-stage state. * * The state pointers in this packet are all relative to the general state * base address set by CMD_STATE_BASE_ADDRESS, which is 0. */ static void upload_pipelined_state_pointers(struct brw_context *brw ) { struct intel_context *intel = &brw->intel; if (intel->gen == 5) { /* Need to flush before changing clip max threads for errata. */ BEGIN_BATCH(1); OUT_BATCH(MI_FLUSH); ADVANCE_BATCH(); } BEGIN_BATCH(7); OUT_BATCH(_3DSTATE_PIPELINED_POINTERS << 16 | (7 - 2)); OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, brw->vs.state_offset); if (brw->gs.prog_active) OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, brw->gs.state_offset | 1); else OUT_BATCH(0); OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, brw->clip.state_offset | 1); OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, brw->sf.state_offset); OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, brw->wm.state_offset); OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, brw->cc.state_offset); ADVANCE_BATCH(); brw->state.dirty.brw |= BRW_NEW_PSP; }
static void gen9_emit_state_base_address(struct intel_batchbuffer *batch) { OUT_BATCH(GEN8_STATE_BASE_ADDRESS | (19 - 2)); /* general */ OUT_BATCH(0 | BASE_ADDRESS_MODIFY); OUT_BATCH(0); /* stateless data port */ OUT_BATCH(0 | BASE_ADDRESS_MODIFY); /* surface */ OUT_RELOC(batch->bo, I915_GEM_DOMAIN_SAMPLER, 0, BASE_ADDRESS_MODIFY); /* dynamic */ OUT_RELOC(batch->bo, I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* indirect */ OUT_BATCH(0); OUT_BATCH(0); /* instruction */ OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* general state buffer size */ OUT_BATCH(0xfffff000 | 1); /* dynamic state buffer size */ OUT_BATCH(1 << 12 | 1); /* indirect object buffer size */ OUT_BATCH(0xfffff000 | 1); /* intruction buffer size, must set modify enable bit, otherwise it may result in GPU hang */ OUT_BATCH(1 << 12 | 1); /* Bindless surface state base address */ OUT_BATCH(0 | BASE_ADDRESS_MODIFY); OUT_BATCH(0); OUT_BATCH(0xfffff000); }
void intelEmitFillBlit(struct intel_context *intel, GLuint cpp, GLshort dst_pitch, dri_bo *dst_buffer, GLuint dst_offset, GLboolean dst_tiled, GLshort x, GLshort y, GLshort w, GLshort h, GLuint color) { GLuint BR13, CMD; BATCH_LOCALS; dst_pitch *= cpp; switch (cpp) { case 1: case 2: case 3: BR13 = (0xF0 << 16) | (1 << 24); CMD = XY_COLOR_BLT_CMD; break; case 4: BR13 = (0xF0 << 16) | (1 << 24) | (1 << 25); CMD = XY_COLOR_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB; break; default: return; } #ifndef I915 if (dst_tiled) { CMD |= XY_DST_TILED; dst_pitch /= 4; } #endif DBG("%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n", __FUNCTION__, dst_buffer, dst_pitch, dst_offset, x, y, w, h); assert(w > 0); assert(h > 0); BEGIN_BATCH(6, NO_LOOP_CLIPRECTS); OUT_BATCH(CMD); OUT_BATCH(BR13 | dst_pitch); OUT_BATCH((y << 16) | x); OUT_BATCH(((y + h) << 16) | (x + w)); OUT_RELOC(dst_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, dst_offset); OUT_BATCH(color); ADVANCE_BATCH(); }
/** * Upload the binding table pointers, which point each stage's array of surface * state pointers. * * The binding table pointers are relative to the surface state base address, * which is 0. */ static void upload_binding_table_pointers(struct brw_context *brw) { struct intel_context *intel = &brw->intel; BEGIN_BATCH(6, IGNORE_CLIPRECTS); OUT_BATCH(CMD_BINDING_TABLE_PTRS << 16 | (6 - 2)); OUT_BATCH(0); /* vs */ OUT_BATCH(0); /* gs */ OUT_BATCH(0); /* clip */ OUT_BATCH(0); /* sf */ OUT_RELOC(brw->wm.bind_bo, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, 0); ADVANCE_BATCH(); }
static void gen6_emit_state_base_address(struct intel_batchbuffer *batch) { OUT_BATCH(GEN6_STATE_BASE_ADDRESS | (10 - 2)); OUT_BATCH(0); /* general */ OUT_RELOC(batch->bo, /* surface */ I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); OUT_RELOC(batch->bo, /* instruction */ I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); OUT_BATCH(0); /* indirect */ OUT_RELOC(batch->bo, /* dynamic */ I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* upper bounds, disable */ OUT_BATCH(0); OUT_BATCH(BASE_ADDRESS_MODIFY); OUT_BATCH(0); OUT_BATCH(BASE_ADDRESS_MODIFY); }
static void draw_emit(struct fd_batch *batch, struct fd_ringbuffer *ring, enum pc_di_primtype primtype, enum pc_di_vis_cull_mode vismode, const struct pipe_draw_info *info, unsigned index_offset) { if (info->index_size) { assert(!info->has_user_indices); struct pipe_resource *idx_buffer = info->index.resource; uint32_t idx_size = info->index_size * info->count; uint32_t idx_offset = index_offset + info->start * info->index_size; /* leave vis mode blank for now, it will be patched up when * we know if we are binning or not */ uint32_t draw = CP_DRAW_INDX_OFFSET_0_PRIM_TYPE(primtype) | CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT(DI_SRC_SEL_DMA) | CP_DRAW_INDX_OFFSET_0_INDEX_SIZE(fd4_size2indextype(info->index_size)) | 0x2000; OUT_PKT7(ring, CP_DRAW_INDX_OFFSET, 7); if (vismode == USE_VISIBILITY) { OUT_RINGP(ring, draw, &batch->draw_patches); } else { OUT_RING(ring, draw); } OUT_RING(ring, info->instance_count); /* NumInstances */ OUT_RING(ring, info->count); /* NumIndices */ OUT_RING(ring, 0x0); /* XXX */ OUT_RELOC(ring, fd_resource(idx_buffer)->bo, idx_offset, 0, 0); OUT_RING (ring, idx_size); } else { /* leave vis mode blank for now, it will be patched up when * we know if we are binning or not */ uint32_t draw = CP_DRAW_INDX_OFFSET_0_PRIM_TYPE(primtype) | CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT(DI_SRC_SEL_AUTO_INDEX) | 0x2000; OUT_PKT7(ring, CP_DRAW_INDX_OFFSET, 3); if (vismode == USE_VISIBILITY) { OUT_RINGP(ring, draw, &batch->draw_patches); } else { OUT_RING(ring, draw); } OUT_RING(ring, info->instance_count); /* NumInstances */ OUT_RING(ring, info->count); /* NumIndices */ } }
void next_ring(void) { int idx = ring_idx++ % ARRAY_SIZE(rings); if (rings[idx]) { ring = rings[idx]; fd_ringbuffer_reset(ring); return; } ring = rings[idx] = fd_ringbuffer_new(pipe, 0x5000); memcpy(ring->start, initial_state, STATE_SIZE * sizeof(uint32_t)); ring->cur = &ring->start[120]; OUT_RELOC (ring, context_bos[0]); ring->cur = &ring->start[122]; OUT_RELOC (ring, context_bos[1]); ring->cur = &ring->start[124]; OUT_RELOC (ring, context_bos[2]); fd_ringbuffer_reset(ring); }
static void timestamp_pause(struct fd_acc_query *aq, struct fd_batch *batch) { struct fd_ringbuffer *ring = batch->draw; OUT_PKT7(ring, CP_EVENT_WRITE, 4); OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(CACHE_FLUSH_AND_INV_EVENT) | CP_EVENT_WRITE_0_TIMESTAMP); OUT_RELOCW(ring, query_sample(aq, stop)); OUT_RING(ring, 0x00000000); fd_reset_wfi(batch); fd_wfi(batch, ring); /* result += stop - start: */ OUT_PKT7(ring, CP_MEM_TO_MEM, 9); OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE | CP_MEM_TO_MEM_0_NEG_C); OUT_RELOCW(ring, query_sample(aq, result)); /* dst */ OUT_RELOC(ring, query_sample(aq, result)); /* srcA */ OUT_RELOC(ring, query_sample(aq, stop)); /* srcB */ OUT_RELOC(ring, query_sample(aq, start)); /* srcC */ }
void fd_emit_vertex_bufs(struct fd_ringbuffer *ring, uint32_t val, struct fd_vertex_buf *vbufs, uint32_t n) { unsigned i; OUT_PKT3(ring, CP_SET_CONSTANT, 1 + (2 * n)); OUT_RING(ring, (0x1 << 16) | (val & 0xffff)); for (i = 0; i < n; i++) { struct fd_resource *rsc = fd_resource(vbufs[i].prsc); OUT_RELOC(ring, rsc->bo, vbufs[i].offset, 3); OUT_RING (ring, vbufs[i].size); } }
static void store_dword_loop(int fd) { int i; int num_rings = gem_get_num_rings(fd); srandom(0xdeadbeef); for (i = 0; i < SLOW_QUICK(0x100000, 10); i++) { int ring, mindex; ring = random() % num_rings + 1; mindex = random() % NUM_FD; batch = mbatch[mindex]; if (ring == I915_EXEC_RENDER) { BEGIN_BATCH(4, 1); OUT_BATCH(MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE); OUT_BATCH(0xffffffff); /* compare dword */ OUT_RELOC(mbuffer[mindex], I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0); OUT_BATCH(MI_NOOP); ADVANCE_BATCH(); } else { BEGIN_BATCH(4, 1); OUT_BATCH(MI_FLUSH_DW | 1); OUT_BATCH(0); /* reserved */ OUT_RELOC(mbuffer[mindex], I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0); OUT_BATCH(MI_NOOP | (1<<22) | (0xf)); ADVANCE_BATCH(); } intel_batchbuffer_flush_on_ring(batch, ring); } drm_intel_bo_map(target_buffer, 0); // map to force waiting on rendering drm_intel_bo_unmap(target_buffer); }
static void do_render(drm_intel_bufmgr *bufmgr, struct intel_batchbuffer *batch, drm_intel_bo *dst_bo, int width, int height) { uint32_t data[width * height]; drm_intel_bo *src_bo; int i; static uint32_t seed = 1; /* Generate some junk. Real workloads would be doing a lot more * work to generate the junk. */ for (i = 0; i < width * height; i++) { data[i] = seed++; } /* Upload the junk. */ src_bo = drm_intel_bo_alloc(bufmgr, "src", sizeof(data), 4096); drm_intel_bo_subdata(src_bo, 0, sizeof(data), data); /* Render the junk to the dst. */ BLIT_COPY_BATCH_START(0); OUT_BATCH((3 << 24) | /* 32 bits */ (0xcc << 16) | /* copy ROP */ (width * 4) /* dst pitch */); OUT_BATCH(0); /* dst x1,y1 */ OUT_BATCH((height << 16) | width); /* dst x2,y2 */ OUT_RELOC(dst_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0); OUT_BATCH(0); /* src x1,y1 */ OUT_BATCH(width * 4); /* src pitch */ OUT_RELOC(src_bo, I915_GEM_DOMAIN_RENDER, 0, 0); ADVANCE_BATCH(); intel_batchbuffer_flush(batch); drm_intel_bo_unreference(src_bo); }
void i915_fill_blit(struct i915_context *i915, unsigned cpp, unsigned short dst_pitch, struct intel_buffer *dst_buffer, unsigned dst_offset, short x, short y, short w, short h, unsigned color) { unsigned BR13, CMD; I915_DBG(i915, "%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n", __FUNCTION__, dst_buffer, dst_pitch, dst_offset, x, y, w, h); switch (cpp) { case 1: case 2: case 3: BR13 = (((int) dst_pitch) & 0xffff) | (0xF0 << 16) | (1 << 24); CMD = XY_COLOR_BLT_CMD; break; case 4: BR13 = (((int) dst_pitch) & 0xffff) | (0xF0 << 16) | (1 << 24) | (1 << 25); CMD = (XY_COLOR_BLT_CMD | XY_COLOR_BLT_WRITE_ALPHA | XY_COLOR_BLT_WRITE_RGB); break; default: return; } if (!BEGIN_BATCH(6, 1)) { FLUSH_BATCH(NULL); assert(BEGIN_BATCH(6, 1)); } OUT_BATCH(CMD); OUT_BATCH(BR13); OUT_BATCH((y << 16) | x); OUT_BATCH(((y + h) << 16) | (x + w)); OUT_RELOC(dst_buffer, INTEL_USAGE_2D_TARGET, dst_offset); OUT_BATCH(color); FLUSH_BATCH(NULL); }
static void gen7_upload_hs_state(struct brw_context *brw) { const struct brw_stage_state *stage_state = &brw->tcs.base; /* BRW_NEW_TESS_PROGRAMS */ bool active = brw->tess_eval_program; /* BRW_NEW_TCS_PROG_DATA */ const struct brw_vue_prog_data *prog_data = &brw->tcs.prog_data->base; if (active) { BEGIN_BATCH(7); OUT_BATCH(_3DSTATE_HS << 16 | (7 - 2)); OUT_BATCH(SET_FIELD(DIV_ROUND_UP(stage_state->sampler_count, 4), GEN7_HS_SAMPLER_COUNT) | SET_FIELD(prog_data->base.binding_table.size_bytes / 4, GEN7_HS_BINDING_TABLE_ENTRY_COUNT) | (brw->max_hs_threads - 1)); OUT_BATCH(GEN7_HS_ENABLE | GEN7_HS_STATISTICS_ENABLE | SET_FIELD(brw->tcs.prog_data->instances - 1, GEN7_HS_INSTANCE_COUNT)); OUT_BATCH(stage_state->prog_offset); if (prog_data->base.total_scratch) { OUT_RELOC(stage_state->scratch_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, ffs(prog_data->base.total_scratch) - 11); } else { OUT_BATCH(0); } OUT_BATCH(GEN7_HS_INCLUDE_VERTEX_HANDLES | SET_FIELD(prog_data->base.dispatch_grf_start_reg, GEN7_HS_DISPATCH_START_GRF)); /* Ignore URB semaphores */ OUT_BATCH(0); ADVANCE_BATCH(); } else { BEGIN_BATCH(7); OUT_BATCH(_3DSTATE_HS << 16 | (7 - 2)); OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); ADVANCE_BATCH(); } brw->tcs.enabled = active; }
static void emit_shader(struct fd_ringbuffer *ring, const struct ir3_shader_variant *so) { const struct ir3_info *si = &so->info; enum adreno_state_block sb; enum adreno_state_src src; uint32_t i, sz, *bin; if (so->type == SHADER_VERTEX) { sb = SB_VERT_SHADER; } else { sb = SB_FRAG_SHADER; } if (fd_mesa_debug & FD_DBG_DIRECT) { sz = si->sizedwords; src = SS_DIRECT; bin = fd_bo_map(so->bo); } else { sz = 0; src = 2; // enums different on a5xx.. bin = NULL; } OUT_PKT7(ring, CP_LOAD_STATE, 3 + sz); OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) | CP_LOAD_STATE_0_STATE_SRC(src) | CP_LOAD_STATE_0_STATE_BLOCK(sb) | CP_LOAD_STATE_0_NUM_UNIT(so->instrlen)); if (bin) { OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) | CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER)); OUT_RING(ring, CP_LOAD_STATE_2_EXT_SRC_ADDR_HI(0)); } else { OUT_RELOC(ring, so->bo, 0, CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER), 0); } /* for how clever coverity is, it is sometimes rather dull, and * doesn't realize that the only case where bin==NULL, sz==0: */ assume(bin || (sz == 0)); for (i = 0; i < sz; i++) { OUT_RING(ring, bin[i]); } }
/* emit texture state for mem->gmem restore operation.. eventually it would * be good to get rid of this and use normal CSO/etc state for more of these * special cases.. */ void fd4_emit_gmem_restore_tex(struct fd_ringbuffer *ring, struct pipe_surface *psurf) { struct fd_resource *rsc = fd_resource(psurf->texture); unsigned lvl = psurf->u.tex.level; struct fd_resource_slice *slice = fd_resource_slice(rsc, lvl); uint32_t offset = fd_resource_offset(rsc, lvl, psurf->u.tex.first_layer); enum pipe_format format = fd4_gmem_restore_format(psurf->format); debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer); /* output sampler state: */ OUT_PKT3(ring, CP_LOAD_STATE, 4); OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) | CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) | CP_LOAD_STATE_0_STATE_BLOCK(SB_FRAG_TEX) | CP_LOAD_STATE_0_NUM_UNIT(1)); OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER) | CP_LOAD_STATE_1_EXT_SRC_ADDR(0)); OUT_RING(ring, A4XX_TEX_SAMP_0_XY_MAG(A4XX_TEX_NEAREST) | A4XX_TEX_SAMP_0_XY_MIN(A4XX_TEX_NEAREST) | A4XX_TEX_SAMP_0_WRAP_S(A4XX_TEX_CLAMP_TO_EDGE) | A4XX_TEX_SAMP_0_WRAP_T(A4XX_TEX_CLAMP_TO_EDGE) | A4XX_TEX_SAMP_0_WRAP_R(A4XX_TEX_REPEAT)); OUT_RING(ring, 0x00000000); /* emit texture state: */ OUT_PKT3(ring, CP_LOAD_STATE, 10); OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) | CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) | CP_LOAD_STATE_0_STATE_BLOCK(SB_FRAG_TEX) | CP_LOAD_STATE_0_NUM_UNIT(1)); OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) | CP_LOAD_STATE_1_EXT_SRC_ADDR(0)); OUT_RING(ring, A4XX_TEX_CONST_0_FMT(fd4_pipe2tex(format)) | A4XX_TEX_CONST_0_TYPE(A4XX_TEX_2D) | fd4_tex_swiz(format, PIPE_SWIZZLE_RED, PIPE_SWIZZLE_GREEN, PIPE_SWIZZLE_BLUE, PIPE_SWIZZLE_ALPHA)); OUT_RING(ring, A4XX_TEX_CONST_1_WIDTH(psurf->width) | A4XX_TEX_CONST_1_HEIGHT(psurf->height)); OUT_RING(ring, A4XX_TEX_CONST_2_PITCH(slice->pitch * rsc->cpp)); OUT_RING(ring, 0x00000000); OUT_RELOC(ring, rsc->bo, offset, 0, 0); OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); }
static void emit_constant_buffer(struct brw_context *brw) { struct intel_context *intel = &brw->intel; GLuint sz = brw->curbe.total_size; BEGIN_BATCH(2); if (sz == 0) { OUT_BATCH((CMD_CONST_BUFFER << 16) | (2 - 2)); OUT_BATCH(0); } else { OUT_BATCH((CMD_CONST_BUFFER << 16) | (1 << 8) | (2 - 2)); OUT_RELOC(brw->curbe.curbe_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, (sz - 1) + brw->curbe.curbe_offset); } ADVANCE_BATCH(); }
boolean brw_upload_vertex_buffers( struct brw_context *brw ) { struct brw_array_state vbp; unsigned nr_enabled = 0; unsigned i; memset(&vbp, 0, sizeof(vbp)); /* This is a hardware limit: */ for (i = 0; i < BRW_VEP_MAX; i++) { if (brw->vb.vbo_array[i] == NULL) { nr_enabled = i; break; } vbp.vb[i].vb0.bits.pitch = brw->vb.vbo_array[i]->stride; vbp.vb[i].vb0.bits.pad = 0; vbp.vb[i].vb0.bits.access_type = BRW_VERTEXBUFFER_ACCESS_VERTEXDATA; vbp.vb[i].vb0.bits.vb_index = i; vbp.vb[i].offset = brw->vb.vbo_array[i]->buffer_offset; vbp.vb[i].buffer = brw->vb.vbo_array[i]->buffer; vbp.vb[i].max_index = brw->vb.vbo_array[i]->max_index; } vbp.header.bits.length = (1 + nr_enabled * 4) - 2; vbp.header.bits.opcode = CMD_VERTEX_BUFFER; BEGIN_BATCH(vbp.header.bits.length+2, 0); OUT_BATCH( vbp.header.dword ); for (i = 0; i < nr_enabled; i++) { OUT_BATCH( vbp.vb[i].vb0.dword ); OUT_RELOC( vbp.vb[i].buffer, PIPE_BUFFER_USAGE_GPU_READ, vbp.vb[i].offset); OUT_BATCH( vbp.vb[i].max_index ); OUT_BATCH( vbp.vb[i].instance_data_step_rate ); } ADVANCE_BATCH(); return TRUE; }
/* regid: base const register * prsc or dwords: buffer containing constant values * sizedwords: size of const value buffer */ void fd3_emit_constant(struct fd_ringbuffer *ring, enum adreno_state_block sb, uint32_t regid, uint32_t offset, uint32_t sizedwords, const uint32_t *dwords, struct pipe_resource *prsc) { uint32_t i, sz; enum adreno_state_src src; if (prsc) { sz = 0; src = SS_INDIRECT; } else { sz = sizedwords; src = SS_DIRECT; } /* we have this sometimes, not others.. perhaps we could be clever * and figure out actually when we need to invalidate cache: */ OUT_PKT0(ring, REG_A3XX_UCHE_CACHE_INVALIDATE0_REG, 2); OUT_RING(ring, A3XX_UCHE_CACHE_INVALIDATE0_REG_ADDR(0)); OUT_RING(ring, A3XX_UCHE_CACHE_INVALIDATE1_REG_ADDR(0) | A3XX_UCHE_CACHE_INVALIDATE1_REG_OPCODE(INVALIDATE) | A3XX_UCHE_CACHE_INVALIDATE1_REG_ENTIRE_CACHE); OUT_PKT3(ring, CP_LOAD_STATE, 2 + sz); OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(regid/2) | CP_LOAD_STATE_0_STATE_SRC(src) | CP_LOAD_STATE_0_STATE_BLOCK(sb) | CP_LOAD_STATE_0_NUM_UNIT(sizedwords/2)); if (prsc) { struct fd_bo *bo = fd_resource(prsc)->bo; OUT_RELOC(ring, bo, offset, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS), 0); } else { OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) | CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS)); dwords = (uint32_t *)&((uint8_t *)dwords)[offset]; } for (i = 0; i < sz; i++) { OUT_RING(ring, dwords[i]); } }
void brw_fill_blit(struct brw_context *brw, unsigned cpp, short dst_pitch, struct pipe_buffer *dst_buffer, unsigned dst_offset, boolean dst_tiled, short x, short y, short w, short h, unsigned color) { unsigned BR13, CMD; BATCH_LOCALS; dst_pitch *= cpp; switch(cpp) { case 1: case 2: case 3: BR13 = (0xF0 << 16) | (1<<24); CMD = XY_COLOR_BLT_CMD; break; case 4: BR13 = (0xF0 << 16) | (1<<24) | (1<<25); CMD = XY_COLOR_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB; break; default: return; } if (dst_tiled) { CMD |= XY_DST_TILED; dst_pitch /= 4; } BEGIN_BATCH(6, INTEL_BATCH_NO_CLIPRECTS); OUT_BATCH( CMD ); OUT_BATCH( dst_pitch | BR13 ); OUT_BATCH( (y << 16) | x ); OUT_BATCH( ((y+h) << 16) | (x+w) ); OUT_RELOC( dst_buffer, BRW_BUFFER_ACCESS_WRITE, dst_offset ); OUT_BATCH( color ); ADVANCE_BATCH(); }
void i915_fill_blit(struct i915_context *i915, unsigned cpp, short dst_pitch, struct pipe_buffer *dst_buffer, unsigned dst_offset, short x, short y, short w, short h, unsigned color) { unsigned BR13, CMD; switch (cpp) { case 1: case 2: case 3: BR13 = dst_pitch | (0xF0 << 16) | (1 << 24); CMD = XY_COLOR_BLT_CMD; break; case 4: BR13 = dst_pitch | (0xF0 << 16) | (1 << 24) | (1 << 25); CMD = (XY_COLOR_BLT_CMD | XY_COLOR_BLT_WRITE_ALPHA | XY_COLOR_BLT_WRITE_RGB); break; default: return; } // DBG("%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n", // __FUNCTION__, dst_buffer, dst_pitch, dst_offset, x, y, w, h); if (!BEGIN_BATCH(6, 1)) { FLUSH_BATCH(NULL); assert(BEGIN_BATCH(6, 1)); } OUT_BATCH(CMD); OUT_BATCH(BR13); OUT_BATCH((y << 16) | x); OUT_BATCH(((y + h) << 16) | (x + w)); OUT_RELOC( dst_buffer, I915_BUFFER_ACCESS_WRITE, dst_offset); OUT_BATCH(color); }
static inline void out_srcpix(struct fd_ringbuffer *ring, PixmapPtr pix) { struct fd_bo *bo = pix->bo; uint32_t w, h, p; w = pix->width; h = pix->height; /* pitch specified in units of 32 bytes, it appears.. not quite sure * max size yet, but I think 11 or 12 bits.. */ p = (pix->pitch / 32) & 0xfff; OUT_RING (ring, REGM(GRADW_TEXCFG, 3)); OUT_RING (ring, 0x40000000 | p | /* GRADW_TEXCFG */ ((pix->depth == 8) ? 0xe000 : 0x7000)); // TODO check if 13 bit OUT_RING (ring, ((h & 0xfff) << 13) | (w & 0xfff)); /* GRADW_TEXSIZE */ OUT_RELOC(ring, bo); /* GRADW_TEXBASE */ }
static void emit_image_tex(struct fd_ringbuffer *ring, unsigned slot, struct fd6_image *img, enum pipe_shader_type shader) { unsigned opcode = CP_LOAD_STATE6_FRAG; assert(shader == PIPE_SHADER_COMPUTE || shader == PIPE_SHADER_FRAGMENT); OUT_PKT7(ring, opcode, 3 + 12); OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(slot) | CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) | CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) | CP_LOAD_STATE6_0_STATE_BLOCK(texsb[shader]) | CP_LOAD_STATE6_0_NUM_UNIT(1)); OUT_RING(ring, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0)); OUT_RING(ring, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0)); OUT_RING(ring, A6XX_TEX_CONST_0_FMT(img->fmt) | fd6_tex_swiz(img->pfmt, PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W) | COND(img->srgb, A6XX_TEX_CONST_0_SRGB)); OUT_RING(ring, A6XX_TEX_CONST_1_WIDTH(img->width) | A6XX_TEX_CONST_1_HEIGHT(img->height)); OUT_RING(ring, A6XX_TEX_CONST_2_FETCHSIZE(img->fetchsize) | A6XX_TEX_CONST_2_TYPE(img->type) | A6XX_TEX_CONST_2_PITCH(img->pitch)); OUT_RING(ring, A6XX_TEX_CONST_3_ARRAY_PITCH(img->array_pitch)); if (img->bo) { OUT_RELOC(ring, img->bo, img->offset, (uint64_t)A6XX_TEX_CONST_5_DEPTH(img->depth) << 32, 0); } else { OUT_RING(ring, 0x00000000); OUT_RING(ring, A6XX_TEX_CONST_5_DEPTH(img->depth)); } OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); }
static void emit_shader(struct fd_ringbuffer *ring, const struct ir3_shader_variant *so) { const struct ir3_info *si = &so->info; enum adreno_state_block sb; enum adreno_state_src src; uint32_t i, sz, *bin; if (so->type == SHADER_VERTEX) { sb = SB_VERT_SHADER; } else { sb = SB_FRAG_SHADER; } if (fd_mesa_debug & FD_DBG_DIRECT) { sz = si->sizedwords; src = SS_DIRECT; bin = fd_bo_map(so->bo); } else { sz = 0; src = 2; // enums different on a4xx.. bin = NULL; } OUT_PKT3(ring, CP_LOAD_STATE, 2 + sz); OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) | CP_LOAD_STATE_0_STATE_SRC(src) | CP_LOAD_STATE_0_STATE_BLOCK(sb) | CP_LOAD_STATE_0_NUM_UNIT(so->instrlen)); if (bin) { OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) | CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER)); } else { OUT_RELOC(ring, so->bo, 0, CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER), 0); } for (i = 0; i < sz; i++) { OUT_RING(ring, bin[i]); } }
void intelEmitFillBlit( struct intel_context *intel, GLuint cpp, GLshort dst_pitch, GLuint dst_buffer, GLuint dst_offset, GLshort x, GLshort y, GLshort w, GLshort h, GLuint color ) { GLuint BR13, CMD; BATCH_LOCALS; dst_pitch *= cpp; switch(cpp) { case 1: case 2: case 3: BR13 = dst_pitch | (0xF0 << 16) | (1<<24); CMD = XY_COLOR_BLT_CMD; break; case 4: BR13 = dst_pitch | (0xF0 << 16) | (1<<24) | (1<<25); CMD = (XY_COLOR_BLT_CMD | XY_COLOR_BLT_WRITE_ALPHA | XY_COLOR_BLT_WRITE_RGB); break; default: return; } BEGIN_BATCH(6, INTEL_BATCH_NO_CLIPRECTS); OUT_BATCH( CMD ); OUT_BATCH( BR13 ); OUT_BATCH( (y << 16) | x ); OUT_BATCH( ((y+h) << 16) | (x+w) ); OUT_RELOC( dst_buffer, DRM_MM_TT|DRM_MM_WRITE, dst_offset ); OUT_BATCH( color ); ADVANCE_BATCH(); }
static void emit_binning_workaround(struct fd_context *ctx) { struct fd3_context *fd3_ctx = fd3_context(ctx); struct fd_gmem_stateobj *gmem = &ctx->gmem; struct fd_ringbuffer *ring = ctx->ring; OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 2); OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) | A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE); OUT_RING(ring, A3XX_RB_RENDER_CONTROL_BIN_WIDTH(32) | A3XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE | A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER)); OUT_PKT0(ring, REG_A3XX_RB_COPY_CONTROL, 4); OUT_RING(ring, A3XX_RB_COPY_CONTROL_MSAA_RESOLVE(MSAA_ONE) | A3XX_RB_COPY_CONTROL_MODE(0) | A3XX_RB_COPY_CONTROL_GMEM_BASE(0)); OUT_RELOC(ring, fd_resource(fd3_ctx->solid_vbuf)->bo, 0x20, 0, -1); /* RB_COPY_DEST_BASE */ OUT_RING(ring, A3XX_RB_COPY_DEST_PITCH_PITCH(128)); OUT_RING(ring, A3XX_RB_COPY_DEST_INFO_TILE(LINEAR) | A3XX_RB_COPY_DEST_INFO_FORMAT(RB_R8G8B8A8_UNORM) | A3XX_RB_COPY_DEST_INFO_SWAP(WZYX) | A3XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE(0xf) | A3XX_RB_COPY_DEST_INFO_ENDIAN(ENDIAN_NONE)); OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1); OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) | A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) | A3XX_GRAS_SC_CONTROL_RASTER_MODE(1)); fd_wfi(ctx, ring); fd3_program_emit(ring, &ctx->solid_prog, key); fd3_emit_vertex_bufs(ring, fd3_shader_variant(ctx->solid_prog.vp, key), (struct fd3_vertex_buf[]) {{ .prsc = fd3_ctx->solid_vbuf, .stride = 12, .format = PIPE_FORMAT_R32G32B32_FLOAT, }}, 1);
/* regid: base const register * prsc or dwords: buffer containing constant values * sizedwords: size of const value buffer */ void fd4_emit_const(struct fd_ringbuffer *ring, enum shader_t type, uint32_t regid, uint32_t offset, uint32_t sizedwords, const uint32_t *dwords, struct pipe_resource *prsc) { uint32_t i, sz; enum adreno_state_src src; debug_assert((regid % 4) == 0); debug_assert((sizedwords % 4) == 0); if (prsc) { sz = 0; src = 0x2; // TODO ?? } else { sz = sizedwords; src = SS_DIRECT; } OUT_PKT3(ring, CP_LOAD_STATE, 2 + sz); OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(regid/4) | CP_LOAD_STATE_0_STATE_SRC(src) | CP_LOAD_STATE_0_STATE_BLOCK(sb[type]) | CP_LOAD_STATE_0_NUM_UNIT(sizedwords/4)); if (prsc) { struct fd_bo *bo = fd_resource(prsc)->bo; OUT_RELOC(ring, bo, offset, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS), 0); } else { OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) | CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS)); dwords = (uint32_t *)&((uint8_t *)dwords)[offset]; } for (i = 0; i < sz; i++) { OUT_RING(ring, dwords[i]); } }