static void emit_shader(struct fd_ringbuffer *ring, struct fd3_shader_stateobj *so) { struct ir3_shader_info *si = &so->info; enum adreno_state_block sb; uint32_t i, *bin; if (so->type == SHADER_VERTEX) { sb = SB_VERT_SHADER; } else { sb = SB_FRAG_SHADER; } // XXX use SS_INDIRECT bin = fd_bo_map(so->bo); OUT_PKT3(ring, CP_LOAD_STATE, 2 + si->sizedwords); OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) | CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) | CP_LOAD_STATE_0_STATE_BLOCK(sb) | CP_LOAD_STATE_0_NUM_UNIT(so->instrlen)); OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER) | CP_LOAD_STATE_1_EXT_SRC_ADDR(0)); for (i = 0; i < si->sizedwords; i++) OUT_RING(ring, bin[i]); }
static void fd4_emit_const_bo(struct fd_ringbuffer *ring, enum shader_t type, boolean write, uint32_t regid, uint32_t num, struct fd_bo **bos, uint32_t *offsets) { uint32_t i; debug_assert((regid % 4) == 0); debug_assert((num % 4) == 0); OUT_PKT3(ring, CP_LOAD_STATE, 2 + num); OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(regid/4) | CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) | CP_LOAD_STATE_0_STATE_BLOCK(sb[type]) | CP_LOAD_STATE_0_NUM_UNIT(num/4)); OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) | CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS)); for (i = 0; i < num; i++) { if (bos[i]) { if (write) { OUT_RELOCW(ring, bos[i], offsets[i], 0, 0); } else { OUT_RELOC(ring, bos[i], offsets[i], 0, 0); } } else { OUT_RING(ring, 0xbad00000 | (i << 16)); } } }
/* regid: base const register * prsc or dwords: buffer containing constant values * sizedwords: size of const value buffer */ void fd4_emit_constant(struct fd_ringbuffer *ring, enum adreno_state_block sb, uint32_t regid, uint32_t offset, uint32_t sizedwords, const uint32_t *dwords, struct pipe_resource *prsc) { uint32_t i, sz; enum adreno_state_src src; if (prsc) { sz = 0; src = 0x2; // TODO ?? } else { sz = sizedwords; src = SS_DIRECT; } OUT_PKT3(ring, CP_LOAD_STATE, 2 + sz); OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(regid/4) | CP_LOAD_STATE_0_STATE_SRC(src) | CP_LOAD_STATE_0_STATE_BLOCK(sb) | CP_LOAD_STATE_0_NUM_UNIT(sizedwords/4)); if (prsc) { struct fd_bo *bo = fd_resource(prsc)->bo; OUT_RELOC(ring, bo, offset, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS), 0); } else { OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) | CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS)); dwords = (uint32_t *)&((uint8_t *)dwords)[offset]; } for (i = 0; i < sz; i++) { OUT_RING(ring, dwords[i]); } }
static void fd3_emit_const_bo(struct fd_ringbuffer *ring, enum shader_t type, boolean write, uint32_t regid, uint32_t num, struct pipe_resource **prscs, uint32_t *offsets) { uint32_t anum = align(num, 4); uint32_t i; debug_assert((regid % 4) == 0); OUT_PKT3(ring, CP_LOAD_STATE, 2 + anum); OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(regid/2) | CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) | CP_LOAD_STATE_0_STATE_BLOCK(sb[type]) | CP_LOAD_STATE_0_NUM_UNIT(anum/2)); OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) | CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS)); for (i = 0; i < num; i++) { if (prscs[i]) { if (write) { OUT_RELOCW(ring, fd_resource(prscs[i])->bo, offsets[i], 0, 0); } else { OUT_RELOC(ring, fd_resource(prscs[i])->bo, offsets[i], 0, 0); } } else { OUT_RING(ring, 0xbad00000 | (i << 16)); } } for (; i < anum; i++) OUT_RING(ring, 0xffffffff); }
/* emit texture state for mem->gmem restore operation.. eventually it would * be good to get rid of this and use normal CSO/etc state for more of these * special cases.. */ void fd4_emit_gmem_restore_tex(struct fd_ringbuffer *ring, struct pipe_surface *psurf) { struct fd_resource *rsc = fd_resource(psurf->texture); unsigned lvl = psurf->u.tex.level; struct fd_resource_slice *slice = fd_resource_slice(rsc, lvl); uint32_t offset = fd_resource_offset(rsc, lvl, psurf->u.tex.first_layer); enum pipe_format format = fd4_gmem_restore_format(psurf->format); debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer); /* output sampler state: */ OUT_PKT3(ring, CP_LOAD_STATE, 4); OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) | CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) | CP_LOAD_STATE_0_STATE_BLOCK(SB_FRAG_TEX) | CP_LOAD_STATE_0_NUM_UNIT(1)); OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER) | CP_LOAD_STATE_1_EXT_SRC_ADDR(0)); OUT_RING(ring, A4XX_TEX_SAMP_0_XY_MAG(A4XX_TEX_NEAREST) | A4XX_TEX_SAMP_0_XY_MIN(A4XX_TEX_NEAREST) | A4XX_TEX_SAMP_0_WRAP_S(A4XX_TEX_CLAMP_TO_EDGE) | A4XX_TEX_SAMP_0_WRAP_T(A4XX_TEX_CLAMP_TO_EDGE) | A4XX_TEX_SAMP_0_WRAP_R(A4XX_TEX_REPEAT)); OUT_RING(ring, 0x00000000); /* emit texture state: */ OUT_PKT3(ring, CP_LOAD_STATE, 10); OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) | CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) | CP_LOAD_STATE_0_STATE_BLOCK(SB_FRAG_TEX) | CP_LOAD_STATE_0_NUM_UNIT(1)); OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) | CP_LOAD_STATE_1_EXT_SRC_ADDR(0)); OUT_RING(ring, A4XX_TEX_CONST_0_FMT(fd4_pipe2tex(format)) | A4XX_TEX_CONST_0_TYPE(A4XX_TEX_2D) | fd4_tex_swiz(format, PIPE_SWIZZLE_RED, PIPE_SWIZZLE_GREEN, PIPE_SWIZZLE_BLUE, PIPE_SWIZZLE_ALPHA)); OUT_RING(ring, A4XX_TEX_CONST_1_WIDTH(psurf->width) | A4XX_TEX_CONST_1_HEIGHT(psurf->height)); OUT_RING(ring, A4XX_TEX_CONST_2_PITCH(slice->pitch * rsc->cpp)); OUT_RING(ring, 0x00000000); OUT_RELOC(ring, rsc->bo, offset, 0, 0); OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); }
static void emit_shader(struct fd_ringbuffer *ring, struct fd_shader *shader, enum adreno_state_block state_block) { uint32_t i; OUT_PKT3(ring, CP_LOAD_STATE, 2 + shader->sizedwords); OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) | CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) | CP_LOAD_STATE_0_STATE_BLOCK(state_block) | CP_LOAD_STATE_0_NUM_UNIT(instrlen(shader))); OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER) | CP_LOAD_STATE_1_EXT_SRC_ADDR(0)); for (i = 0; i < shader->sizedwords; i++) OUT_RING(ring, shader->bin[i]); }
static void emit_uniconst(struct fd_ringbuffer *ring, struct fd_shader *shader, struct fd_parameters *uniforms, enum adreno_state_block state_block) { static uint32_t buf[512]; /* cheesy, but test code isn't multithreaded */ uint32_t i, j, k, sz = 0; memset(buf, 0, sizeof(buf)); for (i = 0; i < shader->ir->consts_count; i++) { struct ir3_const *c = shader->ir->consts[i]; uint32_t off = c->cstart->num; memcpy(&buf[off], c->val, sizeof(c->val)); sz = max(sz, off + (sizeof(c->val) / sizeof(buf[0]))); } for (i = 0; i < shader->ir->uniforms_count; i++) { struct ir3_uniform *u = shader->ir->uniforms[i]; struct fd_param *p = find_param(uniforms, u->name); const uint32_t *dwords = p->data; uint32_t off = u->cstart->num; for (j = 0; j < p->count; j++) { for (k = 0; k < p->size; k++) buf[off++] = *(dwords++); /* zero pad if needed: */ for (; k < ALIGN(p->size, 4); k++) buf[off++] = 0; } sz = max(sz, off); } /* if no constants, don't emit the CP_LOAD_STATE */ if (sz == 0) return; OUT_PKT3(ring, CP_LOAD_STATE, 2 + sz); OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) | CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) | CP_LOAD_STATE_0_STATE_BLOCK(state_block) | CP_LOAD_STATE_0_NUM_UNIT(sz/2)); OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) | CP_LOAD_STATE_1_EXT_SRC_ADDR(0)); for (i = 0; i < sz; i++) OUT_RING(ring, buf[i]); }
static void emit_shader(struct fd_ringbuffer *ring, const struct ir3_shader_variant *so) { const struct ir3_info *si = &so->info; enum adreno_state_block sb; enum adreno_state_src src; uint32_t i, sz, *bin; if (so->type == SHADER_VERTEX) { sb = SB_VERT_SHADER; } else { sb = SB_FRAG_SHADER; } if (fd_mesa_debug & FD_DBG_DIRECT) { sz = si->sizedwords; src = SS_DIRECT; bin = fd_bo_map(so->bo); } else { sz = 0; src = 2; // enums different on a5xx.. bin = NULL; } OUT_PKT7(ring, CP_LOAD_STATE, 3 + sz); OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) | CP_LOAD_STATE_0_STATE_SRC(src) | CP_LOAD_STATE_0_STATE_BLOCK(sb) | CP_LOAD_STATE_0_NUM_UNIT(so->instrlen)); if (bin) { OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) | CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER)); OUT_RING(ring, CP_LOAD_STATE_2_EXT_SRC_ADDR_HI(0)); } else { OUT_RELOC(ring, so->bo, 0, CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER), 0); } /* for how clever coverity is, it is sometimes rather dull, and * doesn't realize that the only case where bin==NULL, sz==0: */ assume(bin || (sz == 0)); for (i = 0; i < sz; i++) { OUT_RING(ring, bin[i]); } }
/* regid: base const register * prsc or dwords: buffer containing constant values * sizedwords: size of const value buffer */ void fd3_emit_constant(struct fd_ringbuffer *ring, enum adreno_state_block sb, uint32_t regid, uint32_t offset, uint32_t sizedwords, const uint32_t *dwords, struct pipe_resource *prsc) { uint32_t i, sz; enum adreno_state_src src; if (prsc) { sz = 0; src = SS_INDIRECT; } else { sz = sizedwords; src = SS_DIRECT; } /* we have this sometimes, not others.. perhaps we could be clever * and figure out actually when we need to invalidate cache: */ OUT_PKT0(ring, REG_A3XX_UCHE_CACHE_INVALIDATE0_REG, 2); OUT_RING(ring, A3XX_UCHE_CACHE_INVALIDATE0_REG_ADDR(0)); OUT_RING(ring, A3XX_UCHE_CACHE_INVALIDATE1_REG_ADDR(0) | A3XX_UCHE_CACHE_INVALIDATE1_REG_OPCODE(INVALIDATE) | A3XX_UCHE_CACHE_INVALIDATE1_REG_ENTIRE_CACHE); OUT_PKT3(ring, CP_LOAD_STATE, 2 + sz); OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(regid/2) | CP_LOAD_STATE_0_STATE_SRC(src) | CP_LOAD_STATE_0_STATE_BLOCK(sb) | CP_LOAD_STATE_0_NUM_UNIT(sizedwords/2)); if (prsc) { struct fd_bo *bo = fd_resource(prsc)->bo; OUT_RELOC(ring, bo, offset, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS), 0); } else { OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) | CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS)); dwords = (uint32_t *)&((uint8_t *)dwords)[offset]; } for (i = 0; i < sz; i++) { OUT_RING(ring, dwords[i]); } }
static void emit_shader(struct fd_ringbuffer *ring, const struct ir3_shader_variant *so) { const struct ir3_info *si = &so->info; enum adreno_state_block sb; enum adreno_state_src src; uint32_t i, sz, *bin; if (so->type == SHADER_VERTEX) { sb = SB_VERT_SHADER; } else { sb = SB_FRAG_SHADER; } if (fd_mesa_debug & FD_DBG_DIRECT) { sz = si->sizedwords; src = SS_DIRECT; bin = fd_bo_map(so->bo); } else { sz = 0; src = 2; // enums different on a4xx.. bin = NULL; } OUT_PKT3(ring, CP_LOAD_STATE, 2 + sz); OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) | CP_LOAD_STATE_0_STATE_SRC(src) | CP_LOAD_STATE_0_STATE_BLOCK(sb) | CP_LOAD_STATE_0_NUM_UNIT(so->instrlen)); if (bin) { OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) | CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER)); } else { OUT_RELOC(ring, so->bo, 0, CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER), 0); } for (i = 0; i < sz; i++) { OUT_RING(ring, bin[i]); } }
/* regid: base const register * prsc or dwords: buffer containing constant values * sizedwords: size of const value buffer */ void fd3_emit_const(struct fd_ringbuffer *ring, enum shader_t type, uint32_t regid, uint32_t offset, uint32_t sizedwords, const uint32_t *dwords, struct pipe_resource *prsc) { uint32_t i, sz; enum adreno_state_src src; debug_assert((regid % 4) == 0); debug_assert((sizedwords % 4) == 0); if (prsc) { sz = 0; src = SS_INDIRECT; } else { sz = sizedwords; src = SS_DIRECT; } OUT_PKT3(ring, CP_LOAD_STATE, 2 + sz); OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(regid/2) | CP_LOAD_STATE_0_STATE_SRC(src) | CP_LOAD_STATE_0_STATE_BLOCK(sb[type]) | CP_LOAD_STATE_0_NUM_UNIT(sizedwords/2)); if (prsc) { struct fd_bo *bo = fd_resource(prsc)->bo; OUT_RELOC(ring, bo, offset, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS), 0); } else { OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) | CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS)); dwords = (uint32_t *)&((uint8_t *)dwords)[offset]; } for (i = 0; i < sz; i++) { OUT_RING(ring, dwords[i]); } }
static void emit_constants(struct fd_ringbuffer *ring, enum adreno_state_block sb, struct fd_constbuf_stateobj *constbuf, struct ir3_shader_variant *shader, bool emit_immediates) { uint32_t enabled_mask = constbuf->enabled_mask; uint32_t max_const; int i; // XXX TODO only emit dirty consts.. but we need to keep track if // they are clobbered by a clear, gmem2mem, or mem2gmem.. constbuf->dirty_mask = enabled_mask; /* in particular, with binning shader we may end up with unused * consts, ie. we could end up w/ constlen that is smaller * than first_immediate. In that case truncate the user consts * early to avoid HLSQ lockup caused by writing too many consts */ max_const = MIN2(shader->first_driver_param, shader->constlen); /* emit user constants: */ if (enabled_mask & 1) { const unsigned index = 0; struct pipe_constant_buffer *cb = &constbuf->cb[index]; unsigned size = align(cb->buffer_size, 4) / 4; /* size in dwords */ // I expect that size should be a multiple of vec4's: assert(size == align(size, 4)); /* and even if the start of the const buffer is before * first_immediate, the end may not be: */ size = MIN2(size, 4 * max_const); if (size && (constbuf->dirty_mask & (1 << index))) { fd4_emit_constant(ring, sb, 0, cb->buffer_offset, size, cb->user_buffer, cb->buffer); constbuf->dirty_mask &= ~(1 << index); } enabled_mask &= ~(1 << index); } /* emit ubos: */ if (shader->constlen > shader->first_driver_param) { uint32_t params = MIN2(4, shader->constlen - shader->first_driver_param); OUT_PKT3(ring, CP_LOAD_STATE, 2 + params * 4); OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(shader->first_driver_param) | CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) | CP_LOAD_STATE_0_STATE_BLOCK(sb) | CP_LOAD_STATE_0_NUM_UNIT(params)); OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) | CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS)); for (i = 1; i <= params * 4; i++) { struct pipe_constant_buffer *cb = &constbuf->cb[i]; assert(!cb->user_buffer); if ((enabled_mask & (1 << i)) && cb->buffer) OUT_RELOC(ring, fd_resource(cb->buffer)->bo, cb->buffer_offset, 0, 0); else OUT_RING(ring, 0xbad00000 | ((i - 1) << 16)); } } /* emit shader immediates: */ if (shader && emit_immediates) { int size = shader->immediates_count; uint32_t base = shader->first_immediate; /* truncate size to avoid writing constants that shader * does not use: */ size = MIN2(size + base, shader->constlen) - base; /* convert out of vec4: */ base *= 4; size *= 4; if (size > 0) { fd4_emit_constant(ring, sb, base, 0, size, shader->immediates[0].val, NULL); } } }
static void emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring, enum adreno_state_block sb, struct fd_texture_stateobj *tex) { unsigned i; if (tex->num_samplers > 0) { int num_samplers; /* not sure if this is an a420.0 workaround, but we seem * to need to emit these in pairs.. emit a final dummy * entry if odd # of samplers: */ num_samplers = align(tex->num_samplers, 2); /* output sampler state: */ OUT_PKT3(ring, CP_LOAD_STATE, 2 + (2 * num_samplers)); OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) | CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) | CP_LOAD_STATE_0_STATE_BLOCK(sb) | CP_LOAD_STATE_0_NUM_UNIT(num_samplers)); OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER) | CP_LOAD_STATE_1_EXT_SRC_ADDR(0)); for (i = 0; i < tex->num_samplers; i++) { static const struct fd4_sampler_stateobj dummy_sampler = {}; const struct fd4_sampler_stateobj *sampler = tex->samplers[i] ? fd4_sampler_stateobj(tex->samplers[i]) : &dummy_sampler; OUT_RING(ring, sampler->texsamp0); OUT_RING(ring, sampler->texsamp1); } for (; i < num_samplers; i++) { OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); } } if (tex->num_textures > 0) { /* emit texture state: */ OUT_PKT3(ring, CP_LOAD_STATE, 2 + (8 * tex->num_textures)); OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) | CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) | CP_LOAD_STATE_0_STATE_BLOCK(sb) | CP_LOAD_STATE_0_NUM_UNIT(tex->num_textures)); OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) | CP_LOAD_STATE_1_EXT_SRC_ADDR(0)); for (i = 0; i < tex->num_textures; i++) { static const struct fd4_pipe_sampler_view dummy_view = {}; const struct fd4_pipe_sampler_view *view = tex->textures[i] ? fd4_pipe_sampler_view(tex->textures[i]) : &dummy_view; unsigned start = view->base.u.tex.first_level; OUT_RING(ring, view->texconst0); OUT_RING(ring, view->texconst1); OUT_RING(ring, view->texconst2); OUT_RING(ring, view->texconst3); if (view->base.texture) { struct fd_resource *rsc = fd_resource(view->base.texture); uint32_t offset = fd_resource_offset(rsc, start, 0); OUT_RELOC(ring, rsc->bo, offset, view->textconst4, 0); } else { OUT_RING(ring, 0x00000000); } OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); } } }