static void emit_constants(struct fd_ringbuffer *ring, enum adreno_state_block sb, struct fd_constbuf_stateobj *constbuf, struct fd3_shader_stateobj *shader) { uint32_t enabled_mask = constbuf->enabled_mask; uint32_t base = 0; unsigned i; // XXX TODO only emit dirty consts.. but we need to keep track if // they are clobbered by a clear, gmem2mem, or mem2gmem.. constbuf->dirty_mask = enabled_mask; /* emit user constants: */ while (enabled_mask) { unsigned index = ffs(enabled_mask) - 1; struct pipe_constant_buffer *cb = &constbuf->cb[index]; unsigned size = align(cb->buffer_size, 4) / 4; /* size in dwords */ // I expect that size should be a multiple of vec4's: assert(size == align(size, 4)); /* gallium could have const-buffer still bound, even though the * shader is not using it. Writing consts above constlen (or * rather, HLSQ_{VS,FS}_CONTROL_REG.CONSTLENGTH) will cause a * hang. */ if ((base / 4) >= shader->constlen) break; if (constbuf->dirty_mask & (1 << index)) { fd3_emit_constant(ring, sb, base, cb->buffer_offset, size, cb->user_buffer, cb->buffer); constbuf->dirty_mask &= ~(1 << index); } base += size; enabled_mask &= ~(1 << index); } /* emit shader immediates: */ if (shader) { for (i = 0; i < shader->immediates_count; i++) { fd3_emit_constant(ring, sb, 4 * (shader->first_immediate + i), 0, 4, shader->immediates[i].val, NULL); } } }
static void fd3_draw(struct fd_context *ctx, const struct pipe_draw_info *info) { unsigned dirty = ctx->dirty; struct fd3_shader_key key = { /* do binning pass first: */ .binning_pass = true, .color_two_side = ctx->rasterizer ? ctx->rasterizer->light_twoside : false, // TODO set .half_precision based on render target format, // ie. float16 and smaller use half, float32 use full.. .half_precision = !!(fd_mesa_debug & FD_DBG_FRAGHALF), }; draw_impl(ctx, info, ctx->binning_ring, dirty & ~(FD_DIRTY_BLEND), key); /* and now regular (non-binning) pass: */ key.binning_pass = false; draw_impl(ctx, info, ctx->ring, dirty, key); } /* binning pass cmds for a clear: * NOTE: newer blob drivers don't use binning for clear, which is probably * preferable since it is low vtx count. However that doesn't seem to * actually work for me. Not sure if it is depending on support for * clear pass (rather than using solid-fill shader), or something else * that newer blob is doing differently. Once that is figured out, we * can remove fd3_clear_binning(). */ static void fd3_clear_binning(struct fd_context *ctx, unsigned dirty) { struct fd3_context *fd3_ctx = fd3_context(ctx); struct fd_ringbuffer *ring = ctx->binning_ring; struct fd3_shader_key key = { .binning_pass = true, .half_precision = true, }; fd3_emit_state(ctx, ring, &ctx->solid_prog, dirty, key); fd3_emit_vertex_bufs(ring, fd3_shader_variant(ctx->solid_prog.vp, key), (struct fd3_vertex_buf[]) {{ .prsc = fd3_ctx->solid_vbuf, .stride = 12, .format = PIPE_FORMAT_R32G32B32_FLOAT, }}, 1); OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1); OUT_RING(ring, A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(0) | A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE(PC_DRAW_TRIANGLES) | A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE(PC_DRAW_TRIANGLES) | A3XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST); OUT_PKT0(ring, REG_A3XX_VFD_INDEX_MIN, 4); OUT_RING(ring, 0); /* VFD_INDEX_MIN */ OUT_RING(ring, 2); /* VFD_INDEX_MAX */ OUT_RING(ring, 0); /* VFD_INSTANCEID_OFFSET */ OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */ OUT_PKT0(ring, REG_A3XX_PC_RESTART_INDEX, 1); OUT_RING(ring, 0xffffffff); /* PC_RESTART_INDEX */ OUT_PKT3(ring, CP_EVENT_WRITE, 1); OUT_RING(ring, PERFCOUNTER_STOP); fd_draw(ctx, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY, DI_SRC_SEL_AUTO_INDEX, 2, INDEX_SIZE_IGN, 0, 0, NULL); } static void fd3_clear(struct fd_context *ctx, unsigned buffers, const union pipe_color_union *color, double depth, unsigned stencil) { struct fd3_context *fd3_ctx = fd3_context(ctx); struct fd_ringbuffer *ring = ctx->ring; unsigned dirty = ctx->dirty; unsigned ce, i; struct fd3_shader_key key = { .half_precision = true, }; dirty &= FD_DIRTY_VIEWPORT | FD_DIRTY_FRAMEBUFFER | FD_DIRTY_SCISSOR; dirty |= FD_DIRTY_PROG; fd3_clear_binning(ctx, dirty); /* emit generic state now: */ fd3_emit_state(ctx, ring, &ctx->solid_prog, dirty, key); OUT_PKT0(ring, REG_A3XX_RB_BLEND_ALPHA, 1); OUT_RING(ring, A3XX_RB_BLEND_ALPHA_UINT(0xff) | A3XX_RB_BLEND_ALPHA_FLOAT(1.0)); OUT_PKT0(ring, REG_A3XX_RB_RENDER_CONTROL, 1); OUT_RINGP(ring, A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER), &fd3_ctx->rbrc_patches); if (buffers & PIPE_CLEAR_DEPTH) { OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1); OUT_RING(ring, A3XX_RB_DEPTH_CONTROL_Z_WRITE_ENABLE | A3XX_RB_DEPTH_CONTROL_Z_ENABLE | A3XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_ALWAYS)); OUT_PKT0(ring, REG_A3XX_GRAS_CL_VPORT_ZOFFSET, 2); OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZOFFSET(0.0)); OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZSCALE(depth)); ctx->dirty |= FD_DIRTY_VIEWPORT; } else { OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1); OUT_RING(ring, A3XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_NEVER)); } if (buffers & PIPE_CLEAR_STENCIL) { OUT_PKT0(ring, REG_A3XX_RB_STENCILREFMASK, 2); OUT_RING(ring, A3XX_RB_STENCILREFMASK_STENCILREF(stencil) | A3XX_RB_STENCILREFMASK_STENCILMASK(stencil) | A3XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff)); OUT_RING(ring, A3XX_RB_STENCILREFMASK_STENCILREF(0) | A3XX_RB_STENCILREFMASK_STENCILMASK(0) | 0xff000000 | // XXX ??? A3XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff)); OUT_PKT0(ring, REG_A3XX_RB_STENCIL_CONTROL, 1); OUT_RING(ring, A3XX_RB_STENCIL_CONTROL_STENCIL_ENABLE | A3XX_RB_STENCIL_CONTROL_FUNC(FUNC_ALWAYS) | A3XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) | A3XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_REPLACE) | A3XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) | A3XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_NEVER) | A3XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) | A3XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) | A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP)); } else { OUT_PKT0(ring, REG_A3XX_RB_STENCILREFMASK, 2); OUT_RING(ring, A3XX_RB_STENCILREFMASK_STENCILREF(0) | A3XX_RB_STENCILREFMASK_STENCILMASK(0) | A3XX_RB_STENCILREFMASK_STENCILWRITEMASK(0)); OUT_RING(ring, A3XX_RB_STENCILREFMASK_BF_STENCILREF(0) | A3XX_RB_STENCILREFMASK_BF_STENCILMASK(0) | A3XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(0)); OUT_PKT0(ring, REG_A3XX_RB_STENCIL_CONTROL, 1); OUT_RING(ring, A3XX_RB_STENCIL_CONTROL_FUNC(FUNC_NEVER) | A3XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) | A3XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_KEEP) | A3XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) | A3XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_NEVER) | A3XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) | A3XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) | A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP)); } if (buffers & PIPE_CLEAR_COLOR) { ce = 0xf; } else { ce = 0x0; } for (i = 0; i < 4; i++) { OUT_PKT0(ring, REG_A3XX_RB_MRT_CONTROL(i), 1); OUT_RING(ring, A3XX_RB_MRT_CONTROL_ROP_CODE(ROP_COPY) | A3XX_RB_MRT_CONTROL_DITHER_MODE(DITHER_ALWAYS) | A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE(ce)); OUT_PKT0(ring, REG_A3XX_RB_MRT_BLEND_CONTROL(i), 1); OUT_RING(ring, A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(FACTOR_ONE) | A3XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(BLEND_DST_PLUS_SRC) | A3XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(FACTOR_ZERO) | A3XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(FACTOR_ONE) | A3XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(BLEND_DST_PLUS_SRC) | A3XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(FACTOR_ZERO) | A3XX_RB_MRT_BLEND_CONTROL_CLAMP_ENABLE); } OUT_PKT0(ring, REG_A3XX_GRAS_SU_MODE_CONTROL, 1); OUT_RING(ring, A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(0)); fd3_emit_vertex_bufs(ring, fd3_shader_variant(ctx->solid_prog.vp, key), (struct fd3_vertex_buf[]) {{ .prsc = fd3_ctx->solid_vbuf, .stride = 12, .format = PIPE_FORMAT_R32G32B32_FLOAT, }}, 1); fd_wfi(ctx, ring); fd3_emit_constant(ring, SB_FRAG_SHADER, 0, 0, 4, color->ui, NULL); OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1); OUT_RING(ring, A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(0) | A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE(PC_DRAW_TRIANGLES) | A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE(PC_DRAW_TRIANGLES) | A3XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST); OUT_PKT0(ring, REG_A3XX_VFD_INDEX_MIN, 4); OUT_RING(ring, 0); /* VFD_INDEX_MIN */ OUT_RING(ring, 2); /* VFD_INDEX_MAX */ OUT_RING(ring, 0); /* VFD_INSTANCEID_OFFSET */ OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */ OUT_PKT0(ring, REG_A3XX_PC_RESTART_INDEX, 1); OUT_RING(ring, 0xffffffff); /* PC_RESTART_INDEX */ OUT_PKT3(ring, CP_EVENT_WRITE, 1); OUT_RING(ring, PERFCOUNTER_STOP); fd_draw(ctx, ring, DI_PT_RECTLIST, USE_VISIBILITY, DI_SRC_SEL_AUTO_INDEX, 2, INDEX_SIZE_IGN, 0, 0, NULL); } void fd3_draw_init(struct pipe_context *pctx) { struct fd_context *ctx = fd_context(pctx); ctx->draw = fd3_draw; ctx->clear = fd3_clear; }
static void emit_constants(struct fd_ringbuffer *ring, enum adreno_state_block sb, struct fd_constbuf_stateobj *constbuf, struct fd3_shader_variant *shader) { uint32_t enabled_mask = constbuf->enabled_mask; uint32_t first_immediate; uint32_t base = 0; unsigned i; // XXX TODO only emit dirty consts.. but we need to keep track if // they are clobbered by a clear, gmem2mem, or mem2gmem.. constbuf->dirty_mask = enabled_mask; /* in particular, with binning shader and a unneeded consts no * longer referenced, we could end up w/ constlen that is smaller * than first_immediate. In that case truncate the user consts * early to avoid HLSQ lockup caused by writing too many consts */ first_immediate = MIN2(shader->first_immediate, shader->constlen); /* emit user constants: */ while (enabled_mask) { unsigned index = ffs(enabled_mask) - 1; struct pipe_constant_buffer *cb = &constbuf->cb[index]; unsigned size = align(cb->buffer_size, 4) / 4; /* size in dwords */ // I expect that size should be a multiple of vec4's: assert(size == align(size, 4)); /* gallium could leave const buffers bound above what the * current shader uses.. don't let that confuse us. */ if (base >= (4 * first_immediate)) break; if (constbuf->dirty_mask & (1 << index)) { /* and even if the start of the const buffer is before * first_immediate, the end may not be: */ size = MIN2(size, (4 * first_immediate) - base); fd3_emit_constant(ring, sb, base, cb->buffer_offset, size, cb->user_buffer, cb->buffer); constbuf->dirty_mask &= ~(1 << index); } base += size; enabled_mask &= ~(1 << index); } /* emit shader immediates: */ if (shader) { for (i = 0; i < shader->immediates_count; i++) { base = 4 * (shader->first_immediate + i); if (base >= (4 * shader->constlen)) break; fd3_emit_constant(ring, sb, base, 0, 4, shader->immediates[i].val, NULL); } } }
static void emit_constants(struct fd_ringbuffer *ring, enum adreno_state_block sb, struct fd_constbuf_stateobj *constbuf, struct ir3_shader_variant *shader, bool emit_immediates) { uint32_t enabled_mask = constbuf->enabled_mask; uint32_t max_const; int i; // XXX TODO only emit dirty consts.. but we need to keep track if // they are clobbered by a clear, gmem2mem, or mem2gmem.. constbuf->dirty_mask = enabled_mask; /* in particular, with binning shader we may end up with unused * consts, ie. we could end up w/ constlen that is smaller * than first_immediate. In that case truncate the user consts * early to avoid HLSQ lockup caused by writing too many consts */ max_const = MIN2(shader->first_driver_param, shader->constlen); /* emit user constants: */ if (enabled_mask & 1) { const unsigned index = 0; struct pipe_constant_buffer *cb = &constbuf->cb[index]; unsigned size = align(cb->buffer_size, 4) / 4; /* size in dwords */ // I expect that size should be a multiple of vec4's: assert(size == align(size, 4)); /* and even if the start of the const buffer is before * first_immediate, the end may not be: */ size = MIN2(size, 4 * max_const); if (size && constbuf->dirty_mask & (1 << index)) { fd3_emit_constant(ring, sb, 0, cb->buffer_offset, size, cb->user_buffer, cb->buffer); constbuf->dirty_mask &= ~(1 << index); } enabled_mask &= ~(1 << index); } if (shader->constlen > shader->first_driver_param) { uint32_t params = MIN2(4, shader->constlen - shader->first_driver_param); /* emit ubos: */ OUT_PKT3(ring, CP_LOAD_STATE, 2 + params * 4); OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(shader->first_driver_param * 2) | CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) | CP_LOAD_STATE_0_STATE_BLOCK(sb) | CP_LOAD_STATE_0_NUM_UNIT(params * 2)); OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) | CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS)); for (i = 1; i <= params * 4; i++) { struct pipe_constant_buffer *cb = &constbuf->cb[i]; assert(!cb->user_buffer); if ((enabled_mask & (1 << i)) && cb->buffer) OUT_RELOC(ring, fd_resource(cb->buffer)->bo, cb->buffer_offset, 0, 0); else OUT_RING(ring, 0xbad00000 | ((i - 1) << 16)); } } /* emit shader immediates: */ if (shader && emit_immediates) { int size = shader->immediates_count; uint32_t base = shader->first_immediate; /* truncate size to avoid writing constants that shader * does not use: */ size = MIN2(size + base, shader->constlen) - base; /* convert out of vec4: */ base *= 4; size *= 4; if (size > 0) { fd3_emit_constant(ring, sb, base, 0, size, shader->immediates[0].val, NULL); } } }