static void draw_emit_indirect(struct fd_batch *batch, struct fd_ringbuffer *ring, enum pc_di_primtype primtype, enum pc_di_vis_cull_mode vismode, const struct pipe_draw_info *info, unsigned index_offset) { struct fd_resource *ind = fd_resource(info->indirect->buffer); if (info->index_size) { struct pipe_resource *idx = info->index.resource; unsigned max_indicies = (idx->width0 - info->indirect->offset) / info->index_size; OUT_PKT7(ring, CP_DRAW_INDX_INDIRECT, 6); OUT_RINGP(ring, DRAW4(primtype, DI_SRC_SEL_DMA, fd4_size2indextype(info->index_size), 0), &batch->draw_patches); OUT_RELOC(ring, fd_resource(idx)->bo, index_offset, 0, 0); // XXX: Check A5xx vs A6xx OUT_RING(ring, A5XX_CP_DRAW_INDX_INDIRECT_3_MAX_INDICES(max_indicies)); OUT_RELOC(ring, ind->bo, info->indirect->offset, 0, 0); } else { OUT_PKT7(ring, CP_DRAW_INDIRECT, 3); OUT_RINGP(ring, DRAW4(primtype, DI_SRC_SEL_AUTO_INDEX, 0, 0), &batch->draw_patches); OUT_RELOC(ring, ind->bo, info->indirect->offset, 0, 0); } }
/* before first tile */ static void fd5_emit_tile_init(struct fd_batch *batch) { struct fd_ringbuffer *ring = batch->gmem; fd5_emit_restore(batch, ring); OUT_PKT7(ring, CP_EVENT_WRITE, 1); OUT_RING(ring, UNK_26); OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1); OUT_RING(ring, 0x0); OUT_PKT4(ring, REG_A5XX_PC_POWER_CNTL, 1); OUT_RING(ring, 0x00000003); /* PC_POWER_CNTL */ OUT_PKT4(ring, REG_A5XX_VFD_POWER_CNTL, 1); OUT_RING(ring, 0x00000003); /* VFD_POWER_CNTL */ /* 0x10000000 for BYPASS.. 0x7c13c080 for GMEM: */ fd_wfi(batch, ring); OUT_PKT4(ring, REG_A5XX_RB_CCU_CNTL, 1); OUT_RING(ring, 0x7c13c080); /* RB_CCU_CNTL */ /* opcode: CP_PREEMPT_ENABLE_LOCAL (6a) (2 dwords) */ fd5_set_render_mode(batch->ctx, ring, GMEM); }
static void emit_image_ssbo(struct fd_ringbuffer *ring, unsigned slot, struct fd6_image *img, enum pipe_shader_type shader) { unsigned opcode = CP_LOAD_STATE6_FRAG; assert(shader == PIPE_SHADER_COMPUTE || shader == PIPE_SHADER_FRAGMENT); #if 0 OUT_PKT7(ring, opcode, 3 + 4); OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(slot) | CP_LOAD_STATE6_0_STATE_TYPE(0) | CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) | CP_LOAD_STATE6_0_STATE_BLOCK(imgsb[shader]) | CP_LOAD_STATE6_0_NUM_UNIT(1)); OUT_RING(ring, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0)); OUT_RING(ring, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0)); OUT_RING(ring, A6XX_SSBO_0_0_BASE_LO(0)); OUT_RING(ring, A6XX_SSBO_0_1_PITCH(img->pitch)); OUT_RING(ring, A6XX_SSBO_0_2_ARRAY_PITCH(img->array_pitch)); OUT_RING(ring, A6XX_SSBO_0_3_CPP(img->cpp)); #endif #if 0 OUT_PKT7(ring, opcode, 3 + 2); OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(slot) | CP_LOAD_STATE6_0_STATE_TYPE(1) | CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) | CP_LOAD_STATE6_0_STATE_BLOCK(imgsb[shader]) | CP_LOAD_STATE6_0_NUM_UNIT(1)); OUT_RING(ring, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0)); OUT_RING(ring, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0)); OUT_RING(ring, A6XX_SSBO_1_0_FMT(img->fmt) | A6XX_SSBO_1_0_WIDTH(img->width)); OUT_RING(ring, A6XX_SSBO_1_1_HEIGHT(img->height) | A6XX_SSBO_1_1_DEPTH(img->depth)); #endif OUT_PKT7(ring, opcode, 3 + 2); OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(slot) | CP_LOAD_STATE6_0_STATE_TYPE(2) | CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) | CP_LOAD_STATE6_0_STATE_BLOCK(imgsb[shader]) | CP_LOAD_STATE6_0_NUM_UNIT(1)); OUT_RING(ring, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0)); OUT_RING(ring, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0)); if (img->bo) { OUT_RELOCW(ring, img->bo, img->offset, 0, 0); } else { OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); } }
static void draw_emit(struct fd_batch *batch, struct fd_ringbuffer *ring, enum pc_di_primtype primtype, enum pc_di_vis_cull_mode vismode, const struct pipe_draw_info *info, unsigned index_offset) { if (info->index_size) { assert(!info->has_user_indices); struct pipe_resource *idx_buffer = info->index.resource; uint32_t idx_size = info->index_size * info->count; uint32_t idx_offset = index_offset + info->start * info->index_size; /* leave vis mode blank for now, it will be patched up when * we know if we are binning or not */ uint32_t draw = CP_DRAW_INDX_OFFSET_0_PRIM_TYPE(primtype) | CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT(DI_SRC_SEL_DMA) | CP_DRAW_INDX_OFFSET_0_INDEX_SIZE(fd4_size2indextype(info->index_size)) | 0x2000; OUT_PKT7(ring, CP_DRAW_INDX_OFFSET, 7); if (vismode == USE_VISIBILITY) { OUT_RINGP(ring, draw, &batch->draw_patches); } else { OUT_RING(ring, draw); } OUT_RING(ring, info->instance_count); /* NumInstances */ OUT_RING(ring, info->count); /* NumIndices */ OUT_RING(ring, 0x0); /* XXX */ OUT_RELOC(ring, fd_resource(idx_buffer)->bo, idx_offset, 0, 0); OUT_RING (ring, idx_size); } else { /* leave vis mode blank for now, it will be patched up when * we know if we are binning or not */ uint32_t draw = CP_DRAW_INDX_OFFSET_0_PRIM_TYPE(primtype) | CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT(DI_SRC_SEL_AUTO_INDEX) | 0x2000; OUT_PKT7(ring, CP_DRAW_INDX_OFFSET, 3); if (vismode == USE_VISIBILITY) { OUT_RINGP(ring, draw, &batch->draw_patches); } else { OUT_RING(ring, draw); } OUT_RING(ring, info->instance_count); /* NumInstances */ OUT_RING(ring, info->count); /* NumIndices */ } }
/** * emit marker string as payload of a no-op packet, which can be * decoded by cffdump. */ static void fd_emit_string_marker(struct pipe_context *pctx, const char *string, int len) { struct fd_context *ctx = fd_context(pctx); struct fd_ringbuffer *ring; const uint32_t *buf = (const void *)string; if (!ctx->batch) return; ring = ctx->batch->draw; /* max packet size is 0x3fff dwords: */ len = MIN2(len, 0x3fff * 4); if (ctx->screen->gpu_id >= 500) OUT_PKT7(ring, CP_NOP, align(len, 4) / 4); else OUT_PKT3(ring, CP_NOP, align(len, 4) / 4); while (len >= 4) { OUT_RING(ring, *buf); buf++; len -= 4; } /* copy remainder bytes without reading past end of input string: */ if (len > 0) { uint32_t w = 0; memcpy(&w, buf, len); OUT_RING(ring, w); } }
/* before IB to rendering cmds: */ static void fd5_emit_tile_renderprep(struct fd_batch *batch, struct fd_tile *tile) { struct fd_ringbuffer *ring = batch->gmem; struct fd_gmem_stateobj *gmem = &batch->ctx->gmem; struct pipe_framebuffer_state *pfb = &batch->framebuffer; OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1); OUT_RING(ring, 0x1); OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1); OUT_RING(ring, A5XX_RB_CNTL_WIDTH(gmem->bin_w) | A5XX_RB_CNTL_HEIGHT(gmem->bin_h)); patch_draws(batch, IGNORE_VISIBILITY); emit_zs(ring, pfb->zsbuf, gmem); emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, gmem); // TODO MSAA OUT_PKT4(ring, REG_A5XX_TPL1_TP_RAS_MSAA_CNTL, 2); OUT_RING(ring, A5XX_TPL1_TP_RAS_MSAA_CNTL_SAMPLES(MSAA_ONE)); OUT_RING(ring, A5XX_TPL1_TP_DEST_MSAA_CNTL_SAMPLES(MSAA_ONE) | A5XX_TPL1_TP_DEST_MSAA_CNTL_MSAA_DISABLE); OUT_PKT4(ring, REG_A5XX_RB_RAS_MSAA_CNTL, 2); OUT_RING(ring, A5XX_RB_RAS_MSAA_CNTL_SAMPLES(MSAA_ONE)); OUT_RING(ring, A5XX_RB_DEST_MSAA_CNTL_SAMPLES(MSAA_ONE) | A5XX_RB_DEST_MSAA_CNTL_MSAA_DISABLE); OUT_PKT4(ring, REG_A5XX_GRAS_SC_RAS_MSAA_CNTL, 2); OUT_RING(ring, A5XX_GRAS_SC_RAS_MSAA_CNTL_SAMPLES(MSAA_ONE)); OUT_RING(ring, A5XX_GRAS_SC_DEST_MSAA_CNTL_SAMPLES(MSAA_ONE) | A5XX_GRAS_SC_DEST_MSAA_CNTL_MSAA_DISABLE); }
static void occlusion_pause(struct fd_acc_query *aq, struct fd_batch *batch) { struct fd_ringbuffer *ring = batch->draw; OUT_PKT7(ring, CP_MEM_WRITE, 4); OUT_RELOCW(ring, query_sample(aq, stop)); OUT_RING(ring, 0xffffffff); OUT_RING(ring, 0xffffffff); OUT_PKT7(ring, CP_WAIT_MEM_WRITES, 0); OUT_PKT4(ring, REG_A5XX_RB_SAMPLE_COUNT_CONTROL, 1); OUT_RING(ring, A5XX_RB_SAMPLE_COUNT_CONTROL_COPY); OUT_PKT4(ring, REG_A5XX_RB_SAMPLE_COUNT_ADDR_LO, 2); OUT_RELOCW(ring, query_sample(aq, stop)); OUT_PKT7(ring, CP_EVENT_WRITE, 1); OUT_RING(ring, ZPASS_DONE); fd_reset_wfi(batch); OUT_PKT7(ring, CP_WAIT_REG_MEM, 6); OUT_RING(ring, 0x00000014); // XXX OUT_RELOC(ring, query_sample(aq, stop)); OUT_RING(ring, 0xffffffff); OUT_RING(ring, 0xffffffff); OUT_RING(ring, 0x00000010); // XXX /* result += stop - start: */ OUT_PKT7(ring, CP_MEM_TO_MEM, 9); OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE | CP_MEM_TO_MEM_0_NEG_C); OUT_RELOCW(ring, query_sample(aq, result)); /* dst */ OUT_RELOC(ring, query_sample(aq, result)); /* srcA */ OUT_RELOC(ring, query_sample(aq, stop)); /* srcB */ OUT_RELOC(ring, query_sample(aq, start)); /* srcC */ fd5_context(batch->ctx)->samples_passed_queries--; }
static void timestamp_resume(struct fd_acc_query *aq, struct fd_batch *batch) { struct fd_ringbuffer *ring = batch->draw; OUT_PKT7(ring, CP_EVENT_WRITE, 4); OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(CACHE_FLUSH_AND_INV_EVENT) | CP_EVENT_WRITE_0_TIMESTAMP); OUT_RELOCW(ring, query_sample(aq, start)); OUT_RING(ring, 0x00000000); fd_reset_wfi(batch); }
static void timestamp_pause(struct fd_acc_query *aq, struct fd_batch *batch) { struct fd_ringbuffer *ring = batch->draw; OUT_PKT7(ring, CP_EVENT_WRITE, 4); OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(CACHE_FLUSH_AND_INV_EVENT) | CP_EVENT_WRITE_0_TIMESTAMP); OUT_RELOCW(ring, query_sample(aq, stop)); OUT_RING(ring, 0x00000000); fd_reset_wfi(batch); fd_wfi(batch, ring); /* result += stop - start: */ OUT_PKT7(ring, CP_MEM_TO_MEM, 9); OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE | CP_MEM_TO_MEM_0_NEG_C); OUT_RELOCW(ring, query_sample(aq, result)); /* dst */ OUT_RELOC(ring, query_sample(aq, result)); /* srcA */ OUT_RELOC(ring, query_sample(aq, stop)); /* srcB */ OUT_RELOC(ring, query_sample(aq, start)); /* srcC */ }
static void emit_shader(struct fd_ringbuffer *ring, const struct ir3_shader_variant *so) { const struct ir3_info *si = &so->info; enum adreno_state_block sb; enum adreno_state_src src; uint32_t i, sz, *bin; if (so->type == SHADER_VERTEX) { sb = SB_VERT_SHADER; } else { sb = SB_FRAG_SHADER; } if (fd_mesa_debug & FD_DBG_DIRECT) { sz = si->sizedwords; src = SS_DIRECT; bin = fd_bo_map(so->bo); } else { sz = 0; src = 2; // enums different on a5xx.. bin = NULL; } OUT_PKT7(ring, CP_LOAD_STATE, 3 + sz); OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) | CP_LOAD_STATE_0_STATE_SRC(src) | CP_LOAD_STATE_0_STATE_BLOCK(sb) | CP_LOAD_STATE_0_NUM_UNIT(so->instrlen)); if (bin) { OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) | CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER)); OUT_RING(ring, CP_LOAD_STATE_2_EXT_SRC_ADDR_HI(0)); } else { OUT_RELOC(ring, so->bo, 0, CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER), 0); } /* for how clever coverity is, it is sometimes rather dull, and * doesn't realize that the only case where bin==NULL, sz==0: */ assume(bin || (sz == 0)); for (i = 0; i < sz; i++) { OUT_RING(ring, bin[i]); } }
static void occlusion_resume(struct fd_acc_query *aq, struct fd_batch *batch) { struct fd_ringbuffer *ring = batch->draw; OUT_PKT4(ring, REG_A5XX_RB_SAMPLE_COUNT_CONTROL, 1); OUT_RING(ring, A5XX_RB_SAMPLE_COUNT_CONTROL_COPY); OUT_PKT4(ring, REG_A5XX_RB_SAMPLE_COUNT_ADDR_LO, 2); OUT_RELOCW(ring, query_sample(aq, start)); OUT_PKT7(ring, CP_EVENT_WRITE, 1); OUT_RING(ring, ZPASS_DONE); fd_reset_wfi(batch); fd5_context(batch->ctx)->samples_passed_queries++; }
static void emit_image_tex(struct fd_ringbuffer *ring, unsigned slot, struct fd6_image *img, enum pipe_shader_type shader) { unsigned opcode = CP_LOAD_STATE6_FRAG; assert(shader == PIPE_SHADER_COMPUTE || shader == PIPE_SHADER_FRAGMENT); OUT_PKT7(ring, opcode, 3 + 12); OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(slot) | CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) | CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) | CP_LOAD_STATE6_0_STATE_BLOCK(texsb[shader]) | CP_LOAD_STATE6_0_NUM_UNIT(1)); OUT_RING(ring, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0)); OUT_RING(ring, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0)); OUT_RING(ring, A6XX_TEX_CONST_0_FMT(img->fmt) | fd6_tex_swiz(img->pfmt, PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W) | COND(img->srgb, A6XX_TEX_CONST_0_SRGB)); OUT_RING(ring, A6XX_TEX_CONST_1_WIDTH(img->width) | A6XX_TEX_CONST_1_HEIGHT(img->height)); OUT_RING(ring, A6XX_TEX_CONST_2_FETCHSIZE(img->fetchsize) | A6XX_TEX_CONST_2_TYPE(img->type) | A6XX_TEX_CONST_2_PITCH(img->pitch)); OUT_RING(ring, A6XX_TEX_CONST_3_ARRAY_PITCH(img->array_pitch)); if (img->bo) { OUT_RELOC(ring, img->bo, img->offset, (uint64_t)A6XX_TEX_CONST_5_DEPTH(img->depth) << 32, 0); } else { OUT_RING(ring, 0x00000000); OUT_RING(ring, A6XX_TEX_CONST_5_DEPTH(img->depth)); } OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); }
static void fd6_clear_lrz(struct fd_batch *batch, struct fd_resource *zsbuf, double depth) { struct fd_ringbuffer *ring; // TODO mid-frame clears (ie. app doing crazy stuff)?? Maybe worth // splitting both clear and lrz clear out into their own rb's. And // just throw away any draws prior to clear. (Anything not fullscreen // clear, just fallback to generic path that treats it as a normal // draw if (!batch->lrz_clear) { batch->lrz_clear = fd_ringbuffer_new(batch->ctx->pipe, 0x1000); fd_ringbuffer_set_parent(batch->lrz_clear, batch->gmem); } ring = batch->lrz_clear; emit_marker6(ring, 7); OUT_PKT7(ring, CP_SET_MARKER, 1); OUT_RING(ring, A2XX_CP_SET_MARKER_0_MODE(RM6_BYPASS)); emit_marker6(ring, 7); OUT_PKT4(ring, REG_A6XX_RB_CCU_CNTL, 1); OUT_RING(ring, 0x10000000); OUT_PKT4(ring, REG_A6XX_HLSQ_UPDATE_CNTL, 1); OUT_RING(ring, 0x7ffff); emit_marker6(ring, 7); OUT_PKT7(ring, CP_SET_MARKER, 1); OUT_RING(ring, A2XX_CP_SET_MARKER_0_MODE(0xc)); emit_marker6(ring, 7); OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_8C01, 1); OUT_RING(ring, 0x0); OUT_PKT4(ring, REG_A6XX_SP_PS_2D_SRC_INFO, 13); OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); OUT_PKT4(ring, REG_A6XX_SP_UNKNOWN_ACC0, 1); OUT_RING(ring, 0x0000f410); OUT_PKT4(ring, REG_A6XX_GRAS_2D_BLIT_CNTL, 1); OUT_RING(ring, A6XX_GRAS_2D_BLIT_CNTL_COLOR_FORMAT(RB6_R16_UNORM) | 0x4f00080); OUT_PKT4(ring, REG_A6XX_RB_2D_BLIT_CNTL, 1); OUT_RING(ring, A6XX_RB_2D_BLIT_CNTL_COLOR_FORMAT(RB6_R16_UNORM) | 0x4f00080); fd6_event_write(batch, ring, UNK_1D, true); fd6_event_write(batch, ring, PC_CCU_INVALIDATE_COLOR, false); OUT_PKT4(ring, REG_A6XX_RB_2D_SRC_SOLID_C0, 4); OUT_RING(ring, fui(depth)); OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); OUT_PKT4(ring, REG_A6XX_RB_2D_DST_INFO, 9); OUT_RING(ring, A6XX_RB_2D_DST_INFO_COLOR_FORMAT(RB6_R16_UNORM) | A6XX_RB_2D_DST_INFO_TILE_MODE(TILE6_LINEAR) | A6XX_RB_2D_DST_INFO_COLOR_SWAP(WZYX)); OUT_RELOCW(ring, zsbuf->lrz, 0, 0, 0); OUT_RING(ring, A6XX_RB_2D_DST_SIZE_PITCH(zsbuf->lrz_pitch * 2)); OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); OUT_PKT4(ring, REG_A6XX_GRAS_2D_SRC_TL_X, 4); OUT_RING(ring, A6XX_GRAS_2D_SRC_TL_X_X(0)); OUT_RING(ring, A6XX_GRAS_2D_SRC_BR_X_X(0)); OUT_RING(ring, A6XX_GRAS_2D_SRC_TL_Y_Y(0)); OUT_RING(ring, A6XX_GRAS_2D_SRC_BR_Y_Y(0)); OUT_PKT4(ring, REG_A6XX_GRAS_2D_DST_TL, 2); OUT_RING(ring, A6XX_GRAS_2D_DST_TL_X(0) | A6XX_GRAS_2D_DST_TL_Y(0)); OUT_RING(ring, A6XX_GRAS_2D_DST_BR_X(zsbuf->lrz_width - 1) | A6XX_GRAS_2D_DST_BR_Y(zsbuf->lrz_height - 1)); fd6_event_write(batch, ring, 0x3f, false); OUT_WFI5(ring); OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_8E04, 1); OUT_RING(ring, 0x1000000); OUT_PKT7(ring, CP_BLIT, 1); OUT_RING(ring, CP_BLIT_0_OP(BLIT_OP_SCALE)); OUT_WFI5(ring); OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_8E04, 1); OUT_RING(ring, 0x0); fd6_event_write(batch, ring, UNK_1D, true); fd6_event_write(batch, ring, FACENESS_FLUSH, true); fd6_event_write(batch, ring, CACHE_FLUSH_TS, true); fd6_cache_flush(batch, ring); }
/* TODO maybe some of this we could pre-compute once rather than having * so much draw-time logic? */ static void emit_stream_out(struct fd_ringbuffer *ring, const struct ir3_shader_variant *v, struct ir3_shader_linkage *l) { const struct pipe_stream_output_info *strmout = &v->shader->stream_output; unsigned ncomp[PIPE_MAX_SO_BUFFERS] = {0}; unsigned prog[align(l->max_loc, 2) / 2]; memset(prog, 0, sizeof(prog)); for (unsigned i = 0; i < strmout->num_outputs; i++) { const struct pipe_stream_output *out = &strmout->output[i]; unsigned k = out->register_index; unsigned idx; ncomp[out->output_buffer] += out->num_components; /* linkage map sorted by order frag shader wants things, so * a bit less ideal here.. */ for (idx = 0; idx < l->cnt; idx++) if (l->var[idx].regid == v->outputs[k].regid) break; debug_assert(idx < l->cnt); for (unsigned j = 0; j < out->num_components; j++) { unsigned c = j + out->start_component; unsigned loc = l->var[idx].loc + c; unsigned off = j + out->dst_offset; /* in dwords */ if (loc & 1) { prog[loc/2] |= A5XX_VPC_SO_PROG_B_EN | A5XX_VPC_SO_PROG_B_BUF(out->output_buffer) | A5XX_VPC_SO_PROG_B_OFF(off * 4); } else { prog[loc/2] |= A5XX_VPC_SO_PROG_A_EN | A5XX_VPC_SO_PROG_A_BUF(out->output_buffer) | A5XX_VPC_SO_PROG_A_OFF(off * 4); } } } OUT_PKT7(ring, CP_CONTEXT_REG_BUNCH, 12 + (2 * ARRAY_SIZE(prog))); OUT_RING(ring, REG_A5XX_VPC_SO_BUF_CNTL); OUT_RING(ring, A5XX_VPC_SO_BUF_CNTL_ENABLE | COND(ncomp[0] > 0, A5XX_VPC_SO_BUF_CNTL_BUF0) | COND(ncomp[1] > 0, A5XX_VPC_SO_BUF_CNTL_BUF1) | COND(ncomp[2] > 0, A5XX_VPC_SO_BUF_CNTL_BUF2) | COND(ncomp[3] > 0, A5XX_VPC_SO_BUF_CNTL_BUF3)); OUT_RING(ring, REG_A5XX_VPC_SO_NCOMP(0)); OUT_RING(ring, ncomp[0]); OUT_RING(ring, REG_A5XX_VPC_SO_NCOMP(1)); OUT_RING(ring, ncomp[1]); OUT_RING(ring, REG_A5XX_VPC_SO_NCOMP(2)); OUT_RING(ring, ncomp[2]); OUT_RING(ring, REG_A5XX_VPC_SO_NCOMP(3)); OUT_RING(ring, ncomp[3]); OUT_RING(ring, REG_A5XX_VPC_SO_CNTL); OUT_RING(ring, A5XX_VPC_SO_CNTL_ENABLE); for (unsigned i = 0; i < ARRAY_SIZE(prog); i++) { OUT_RING(ring, REG_A5XX_VPC_SO_PROG); OUT_RING(ring, prog[i]); } }
static void fd5_emit_sysmem_prep(struct fd_batch *batch) { struct pipe_framebuffer_state *pfb = &batch->framebuffer; struct fd_ringbuffer *ring = batch->gmem; fd5_emit_restore(batch, ring); OUT_PKT7(ring, CP_EVENT_WRITE, 1); OUT_RING(ring, UNK_26); OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1); OUT_RING(ring, 0x0); OUT_PKT4(ring, REG_A5XX_PC_POWER_CNTL, 1); OUT_RING(ring, 0x00000003); /* PC_POWER_CNTL */ OUT_PKT4(ring, REG_A5XX_VFD_POWER_CNTL, 1); OUT_RING(ring, 0x00000003); /* VFD_POWER_CNTL */ /* 0x10000000 for BYPASS.. 0x7c13c080 for GMEM: */ fd_wfi(batch, ring); OUT_PKT4(ring, REG_A5XX_RB_CCU_CNTL, 1); OUT_RING(ring, 0x10000000); /* RB_CCU_CNTL */ OUT_PKT4(ring, REG_A5XX_GRAS_SC_WINDOW_SCISSOR_TL, 2); OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_TL_X(0) | A5XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(0)); OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_BR_X(pfb->width - 1) | A5XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(pfb->height - 1)); OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_1, 2); OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_1_X(0) | A5XX_RB_RESOLVE_CNTL_1_Y(0)); OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_2_X(pfb->width - 1) | A5XX_RB_RESOLVE_CNTL_2_Y(pfb->height - 1)); OUT_PKT4(ring, REG_A5XX_RB_WINDOW_OFFSET, 1); OUT_RING(ring, A5XX_RB_WINDOW_OFFSET_X(0) | A5XX_RB_WINDOW_OFFSET_Y(0)); OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1); OUT_RING(ring, 0x1); OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1); OUT_RING(ring, A5XX_RB_CNTL_WIDTH(0) | A5XX_RB_CNTL_HEIGHT(0) | A5XX_RB_CNTL_BYPASS); patch_draws(batch, IGNORE_VISIBILITY); emit_zs(ring, pfb->zsbuf, NULL); emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL); // TODO MSAA OUT_PKT4(ring, REG_A5XX_TPL1_TP_RAS_MSAA_CNTL, 2); OUT_RING(ring, A5XX_TPL1_TP_RAS_MSAA_CNTL_SAMPLES(MSAA_ONE)); OUT_RING(ring, A5XX_TPL1_TP_DEST_MSAA_CNTL_SAMPLES(MSAA_ONE) | A5XX_TPL1_TP_DEST_MSAA_CNTL_MSAA_DISABLE); OUT_PKT4(ring, REG_A5XX_RB_RAS_MSAA_CNTL, 2); OUT_RING(ring, A5XX_RB_RAS_MSAA_CNTL_SAMPLES(MSAA_ONE)); OUT_RING(ring, A5XX_RB_DEST_MSAA_CNTL_SAMPLES(MSAA_ONE) | A5XX_RB_DEST_MSAA_CNTL_MSAA_DISABLE); OUT_PKT4(ring, REG_A5XX_GRAS_SC_RAS_MSAA_CNTL, 2); OUT_RING(ring, A5XX_GRAS_SC_RAS_MSAA_CNTL_SAMPLES(MSAA_ONE)); OUT_RING(ring, A5XX_GRAS_SC_DEST_MSAA_CNTL_SAMPLES(MSAA_ONE) | A5XX_GRAS_SC_DEST_MSAA_CNTL_MSAA_DISABLE); }