void fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, struct fd4_emit *emit) { struct ir3_shader_variant *vp = fd4_emit_get_vp(emit); struct ir3_shader_variant *fp = fd4_emit_get_fp(emit); uint32_t dirty = emit->dirty; emit_marker(ring, 5); if ((dirty & (FD_DIRTY_ZSA | FD_DIRTY_PROG)) && !emit->key.binning_pass) { uint32_t val = fd4_zsa_stateobj(ctx->zsa)->rb_render_control; /* I suppose if we needed to (which I don't *think* we need * to), we could emit this for binning pass too. But we * would need to keep a different patch-list for binning * vs render pass. */ OUT_PKT0(ring, REG_A4XX_RB_RENDER_CONTROL, 1); OUT_RINGP(ring, val, &fd4_context(ctx)->rbrc_patches); } if (dirty & FD_DIRTY_ZSA) { struct fd4_zsa_stateobj *zsa = fd4_zsa_stateobj(ctx->zsa); OUT_PKT0(ring, REG_A4XX_RB_ALPHA_CONTROL, 1); OUT_RING(ring, zsa->rb_alpha_control); OUT_PKT0(ring, REG_A4XX_RB_STENCIL_CONTROL, 2); OUT_RING(ring, zsa->rb_stencil_control); OUT_RING(ring, zsa->rb_stencil_control2); } if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_STENCIL_REF)) { struct fd4_zsa_stateobj *zsa = fd4_zsa_stateobj(ctx->zsa); struct pipe_stencil_ref *sr = &ctx->stencil_ref; OUT_PKT0(ring, REG_A4XX_RB_STENCILREFMASK, 2); OUT_RING(ring, zsa->rb_stencilrefmask | A4XX_RB_STENCILREFMASK_STENCILREF(sr->ref_value[0])); OUT_RING(ring, zsa->rb_stencilrefmask_bf | A4XX_RB_STENCILREFMASK_BF_STENCILREF(sr->ref_value[1])); } if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_PROG)) { struct fd4_zsa_stateobj *zsa = fd4_zsa_stateobj(ctx->zsa); bool fragz = fp->has_kill | fp->writes_pos; OUT_PKT0(ring, REG_A4XX_RB_DEPTH_CONTROL, 1); OUT_RING(ring, zsa->rb_depth_control | COND(fragz, A4XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE)); /* maybe this register/bitfield needs a better name.. this * appears to be just disabling early-z */ OUT_PKT0(ring, REG_A4XX_GRAS_ALPHA_CONTROL, 1); OUT_RING(ring, zsa->gras_alpha_control | COND(fragz, A4XX_GRAS_ALPHA_CONTROL_ALPHA_TEST_ENABLE)); } if (dirty & FD_DIRTY_RASTERIZER) { struct fd4_rasterizer_stateobj *rasterizer = fd4_rasterizer_stateobj(ctx->rasterizer); OUT_PKT0(ring, REG_A4XX_GRAS_SU_MODE_CONTROL, 1); OUT_RING(ring, rasterizer->gras_su_mode_control | A4XX_GRAS_SU_MODE_CONTROL_RENDERING_PASS); OUT_PKT0(ring, REG_A4XX_GRAS_SU_POINT_MINMAX, 2); OUT_RING(ring, rasterizer->gras_su_point_minmax); OUT_RING(ring, rasterizer->gras_su_point_size); OUT_PKT0(ring, REG_A4XX_GRAS_SU_POLY_OFFSET_SCALE, 2); OUT_RING(ring, rasterizer->gras_su_poly_offset_scale); OUT_RING(ring, rasterizer->gras_su_poly_offset_offset); OUT_PKT0(ring, REG_A4XX_GRAS_CL_CLIP_CNTL, 1); OUT_RING(ring, rasterizer->gras_cl_clip_cntl); } /* NOTE: since primitive_restart is not actually part of any * state object, we need to make sure that we always emit * PRIM_VTX_CNTL.. either that or be more clever and detect * when it changes. */ if (emit->info) { const struct pipe_draw_info *info = emit->info; uint32_t val = fd4_rasterizer_stateobj(ctx->rasterizer) ->pc_prim_vtx_cntl; if (info->indexed && info->primitive_restart) val |= A4XX_PC_PRIM_VTX_CNTL_PRIMITIVE_RESTART; val |= COND(vp->writes_psize, A4XX_PC_PRIM_VTX_CNTL_PSIZE); if (fp->total_in > 0) { uint32_t varout = align(fp->total_in, 16) / 16; if (varout > 1) varout = align(varout, 2); val |= A4XX_PC_PRIM_VTX_CNTL_VAROUT(varout); } OUT_PKT0(ring, REG_A4XX_PC_PRIM_VTX_CNTL, 2); OUT_RING(ring, val); OUT_RING(ring, 0x12); /* XXX UNKNOWN_21C5 */ } if (dirty & FD_DIRTY_SCISSOR) { struct pipe_scissor_state *scissor = fd_context_get_scissor(ctx); OUT_PKT0(ring, REG_A4XX_GRAS_SC_WINDOW_SCISSOR_BR, 2); OUT_RING(ring, A4XX_GRAS_SC_WINDOW_SCISSOR_BR_X(scissor->maxx - 1) | A4XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(scissor->maxy - 1)); OUT_RING(ring, A4XX_GRAS_SC_WINDOW_SCISSOR_TL_X(scissor->minx) | A4XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(scissor->miny)); ctx->max_scissor.minx = MIN2(ctx->max_scissor.minx, scissor->minx); ctx->max_scissor.miny = MIN2(ctx->max_scissor.miny, scissor->miny); ctx->max_scissor.maxx = MAX2(ctx->max_scissor.maxx, scissor->maxx); ctx->max_scissor.maxy = MAX2(ctx->max_scissor.maxy, scissor->maxy); } if (dirty & FD_DIRTY_VIEWPORT) { fd_wfi(ctx, ring); OUT_PKT0(ring, REG_A4XX_GRAS_CL_VPORT_XOFFSET_0, 6); OUT_RING(ring, A4XX_GRAS_CL_VPORT_XOFFSET_0(ctx->viewport.translate[0])); OUT_RING(ring, A4XX_GRAS_CL_VPORT_XSCALE_0(ctx->viewport.scale[0])); OUT_RING(ring, A4XX_GRAS_CL_VPORT_YOFFSET_0(ctx->viewport.translate[1])); OUT_RING(ring, A4XX_GRAS_CL_VPORT_YSCALE_0(ctx->viewport.scale[1])); OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZOFFSET_0(ctx->viewport.translate[2])); OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZSCALE_0(ctx->viewport.scale[2])); } if (dirty & FD_DIRTY_PROG) fd4_program_emit(ring, emit); if ((dirty & (FD_DIRTY_PROG | FD_DIRTY_CONSTBUF)) && /* evil hack to deal sanely with clear path: */ (emit->prog == &ctx->prog)) { fd_wfi(ctx, ring); emit_constants(ring, SB_VERT_SHADER, &ctx->constbuf[PIPE_SHADER_VERTEX], vp, emit->prog->dirty & FD_SHADER_DIRTY_VP); if (!emit->key.binning_pass) { emit_constants(ring, SB_FRAG_SHADER, &ctx->constbuf[PIPE_SHADER_FRAGMENT], fp, emit->prog->dirty & FD_SHADER_DIRTY_FP); } } /* emit driver params every time */ if (emit->info && emit->prog == &ctx->prog) { uint32_t vertex_params[4] = { emit->info->indexed ? emit->info->index_bias : emit->info->start, 0, 0, 0 }; if (vp->constlen >= vp->first_driver_param + 4) { fd4_emit_constant(ring, SB_VERT_SHADER, (vp->first_driver_param + 4) * 4, 0, 4, vertex_params, NULL); } } if ((dirty & FD_DIRTY_BLEND) && ctx->blend) { struct fd4_blend_stateobj *blend = fd4_blend_stateobj(ctx->blend); uint32_t i; for (i = 0; i < 8; i++) { OUT_PKT0(ring, REG_A4XX_RB_MRT_CONTROL(i), 1); OUT_RING(ring, blend->rb_mrt[i].control); OUT_PKT0(ring, REG_A4XX_RB_MRT_BLEND_CONTROL(i), 1); OUT_RING(ring, blend->rb_mrt[i].blend_control); } OUT_PKT0(ring, REG_A4XX_RB_FS_OUTPUT, 1); OUT_RING(ring, blend->rb_fs_output | A4XX_RB_FS_OUTPUT_SAMPLE_MASK(0xffff)); } if (dirty & FD_DIRTY_BLEND_COLOR) { struct pipe_blend_color *bcolor = &ctx->blend_color; OUT_PKT0(ring, REG_A4XX_RB_BLEND_RED, 4); OUT_RING(ring, A4XX_RB_BLEND_RED_UINT(bcolor->color[0] * 255.0) | A4XX_RB_BLEND_RED_FLOAT(bcolor->color[0])); OUT_RING(ring, A4XX_RB_BLEND_GREEN_UINT(bcolor->color[1] * 255.0) | A4XX_RB_BLEND_GREEN_FLOAT(bcolor->color[1])); OUT_RING(ring, A4XX_RB_BLEND_BLUE_UINT(bcolor->color[2] * 255.0) | A4XX_RB_BLEND_BLUE_FLOAT(bcolor->color[2])); OUT_RING(ring, A4XX_RB_BLEND_ALPHA_UINT(bcolor->color[3] * 255.0) | A4XX_RB_BLEND_ALPHA_FLOAT(bcolor->color[3])); } if (dirty & FD_DIRTY_VERTTEX) { if (vp->has_samp) emit_textures(ctx, ring, SB_VERT_TEX, &ctx->verttex); else dirty &= ~FD_DIRTY_VERTTEX; } if (dirty & FD_DIRTY_FRAGTEX) { if (fp->has_samp) emit_textures(ctx, ring, SB_FRAG_TEX, &ctx->fragtex); else dirty &= ~FD_DIRTY_FRAGTEX; } ctx->dirty &= ~dirty; }
static void emit_constants(struct fd_ringbuffer *ring, enum adreno_state_block sb, struct fd_constbuf_stateobj *constbuf, struct ir3_shader_variant *shader, bool emit_immediates) { uint32_t enabled_mask = constbuf->enabled_mask; uint32_t max_const; int i; // XXX TODO only emit dirty consts.. but we need to keep track if // they are clobbered by a clear, gmem2mem, or mem2gmem.. constbuf->dirty_mask = enabled_mask; /* in particular, with binning shader we may end up with unused * consts, ie. we could end up w/ constlen that is smaller * than first_immediate. In that case truncate the user consts * early to avoid HLSQ lockup caused by writing too many consts */ max_const = MIN2(shader->first_driver_param, shader->constlen); /* emit user constants: */ if (enabled_mask & 1) { const unsigned index = 0; struct pipe_constant_buffer *cb = &constbuf->cb[index]; unsigned size = align(cb->buffer_size, 4) / 4; /* size in dwords */ // I expect that size should be a multiple of vec4's: assert(size == align(size, 4)); /* and even if the start of the const buffer is before * first_immediate, the end may not be: */ size = MIN2(size, 4 * max_const); if (size && (constbuf->dirty_mask & (1 << index))) { fd4_emit_constant(ring, sb, 0, cb->buffer_offset, size, cb->user_buffer, cb->buffer); constbuf->dirty_mask &= ~(1 << index); } enabled_mask &= ~(1 << index); } /* emit ubos: */ if (shader->constlen > shader->first_driver_param) { uint32_t params = MIN2(4, shader->constlen - shader->first_driver_param); OUT_PKT3(ring, CP_LOAD_STATE, 2 + params * 4); OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(shader->first_driver_param) | CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) | CP_LOAD_STATE_0_STATE_BLOCK(sb) | CP_LOAD_STATE_0_NUM_UNIT(params)); OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) | CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS)); for (i = 1; i <= params * 4; i++) { struct pipe_constant_buffer *cb = &constbuf->cb[i]; assert(!cb->user_buffer); if ((enabled_mask & (1 << i)) && cb->buffer) OUT_RELOC(ring, fd_resource(cb->buffer)->bo, cb->buffer_offset, 0, 0); else OUT_RING(ring, 0xbad00000 | ((i - 1) << 16)); } } /* emit shader immediates: */ if (shader && emit_immediates) { int size = shader->immediates_count; uint32_t base = shader->first_immediate; /* truncate size to avoid writing constants that shader * does not use: */ size = MIN2(size + base, shader->constlen) - base; /* convert out of vec4: */ base *= 4; size *= 4; if (size > 0) { fd4_emit_constant(ring, sb, base, 0, size, shader->immediates[0].val, NULL); } } }
static void fd4_clear(struct fd_context *ctx, unsigned buffers, const union pipe_color_union *color, double depth, unsigned stencil) { struct fd4_context *fd4_ctx = fd4_context(ctx); struct fd_ringbuffer *ring = ctx->ring; struct pipe_framebuffer_state *pfb = &ctx->framebuffer; unsigned dirty = ctx->dirty; unsigned ce, i; struct fd4_emit emit = { .vtx = &fd4_ctx->solid_vbuf_state, .prog = &ctx->solid_prog, .key = { .half_precision = true, }, }; uint32_t colr = 0; if ((buffers & PIPE_CLEAR_COLOR) && pfb->nr_cbufs) colr = pack_rgba(pfb->cbufs[0]->format, color->f); dirty &= FD_DIRTY_FRAMEBUFFER | FD_DIRTY_SCISSOR; dirty |= FD_DIRTY_PROG; emit.dirty = dirty; OUT_PKT0(ring, REG_A4XX_PC_PRIM_VTX_CNTL, 1); OUT_RING(ring, A4XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST); /* emit generic state now: */ fd4_emit_state(ctx, ring, &emit); reset_viewport(ring, pfb); if (buffers & PIPE_CLEAR_DEPTH) { OUT_PKT0(ring, REG_A4XX_RB_DEPTH_CONTROL, 1); OUT_RING(ring, A4XX_RB_DEPTH_CONTROL_Z_WRITE_ENABLE | A4XX_RB_DEPTH_CONTROL_Z_ENABLE | A4XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_ALWAYS)); fd_wfi(ctx, ring); OUT_PKT0(ring, REG_A4XX_GRAS_CL_VPORT_ZOFFSET_0, 2); OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZOFFSET_0(0.0)); OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZSCALE_0(depth)); ctx->dirty |= FD_DIRTY_VIEWPORT; } else { OUT_PKT0(ring, REG_A4XX_RB_DEPTH_CONTROL, 1); OUT_RING(ring, A4XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_NEVER)); } if (buffers & PIPE_CLEAR_STENCIL) { OUT_PKT0(ring, REG_A4XX_RB_STENCILREFMASK, 2); OUT_RING(ring, A4XX_RB_STENCILREFMASK_STENCILREF(stencil) | A4XX_RB_STENCILREFMASK_STENCILMASK(stencil) | A4XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff)); OUT_RING(ring, A4XX_RB_STENCILREFMASK_STENCILREF(0) | A4XX_RB_STENCILREFMASK_STENCILMASK(0) | 0xff000000 | // XXX ??? A4XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff)); OUT_PKT0(ring, REG_A4XX_RB_STENCIL_CONTROL, 2); OUT_RING(ring, A4XX_RB_STENCIL_CONTROL_STENCIL_ENABLE | A4XX_RB_STENCIL_CONTROL_FUNC(FUNC_ALWAYS) | A4XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) | A4XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_REPLACE) | A4XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) | A4XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_NEVER) | A4XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) | A4XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) | A4XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP)); OUT_RING(ring, 0x00000000); /* RB_STENCIL_CONTROL2 */ } else { OUT_PKT0(ring, REG_A4XX_RB_STENCILREFMASK, 2); OUT_RING(ring, A4XX_RB_STENCILREFMASK_STENCILREF(0) | A4XX_RB_STENCILREFMASK_STENCILMASK(0) | A4XX_RB_STENCILREFMASK_STENCILWRITEMASK(0)); OUT_RING(ring, A4XX_RB_STENCILREFMASK_BF_STENCILREF(0) | A4XX_RB_STENCILREFMASK_BF_STENCILMASK(0) | A4XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(0)); OUT_PKT0(ring, REG_A4XX_RB_STENCIL_CONTROL, 2); OUT_RING(ring, A4XX_RB_STENCIL_CONTROL_FUNC(FUNC_NEVER) | A4XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) | A4XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_KEEP) | A4XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) | A4XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_NEVER) | A4XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) | A4XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) | A4XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP)); OUT_RING(ring, 0x00000000); /* RB_STENCIL_CONTROL2 */ } if (buffers & PIPE_CLEAR_COLOR) { OUT_PKT0(ring, REG_A4XX_RB_ALPHA_CONTROL, 1); OUT_RING(ring, A4XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER)); ce = 0xf; } else { ce = 0x0; } for (i = 0; i < 8; i++) { OUT_PKT0(ring, REG_A4XX_RB_MRT_CONTROL(i), 1); OUT_RING(ring, A4XX_RB_MRT_CONTROL_FASTCLEAR | A4XX_RB_MRT_CONTROL_B11 | A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE(ce)); OUT_PKT0(ring, REG_A4XX_RB_MRT_BLEND_CONTROL(i), 1); OUT_RING(ring, A4XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(FACTOR_ONE) | A4XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(BLEND_DST_PLUS_SRC) | A4XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(FACTOR_ZERO) | A4XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(FACTOR_ONE) | A4XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(BLEND_DST_PLUS_SRC) | A4XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(FACTOR_ZERO)); } fd4_emit_vertex_bufs(ring, &emit); OUT_PKT0(ring, REG_A4XX_GRAS_ALPHA_CONTROL, 1); OUT_RING(ring, 0x0); /* XXX GRAS_ALPHA_CONTROL */ OUT_PKT0(ring, REG_A4XX_GRAS_CLEAR_CNTL, 1); OUT_RING(ring, 0x00000000); OUT_PKT0(ring, REG_A4XX_RB_CLEAR_COLOR_DW0, 4); OUT_RING(ring, colr); /* RB_CLEAR_COLOR_DW0 */ OUT_RING(ring, colr); /* RB_CLEAR_COLOR_DW1 */ OUT_RING(ring, colr); /* RB_CLEAR_COLOR_DW2 */ OUT_RING(ring, colr); /* RB_CLEAR_COLOR_DW3 */ /* until fastclear works: */ fd4_emit_constant(ring, SB_FRAG_SHADER, 0, 0, 4, color->ui, NULL); OUT_PKT0(ring, REG_A4XX_VFD_INDEX_OFFSET, 2); OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */ OUT_RING(ring, 0); /* ??? UNKNOWN_2209 */ OUT_PKT0(ring, REG_A4XX_PC_RESTART_INDEX, 1); OUT_RING(ring, 0xffffffff); /* PC_RESTART_INDEX */ OUT_PKT3(ring, CP_UNKNOWN_1A, 1); OUT_RING(ring, 0x00000001); fd4_draw(ctx, ring, DI_PT_RECTLIST, USE_VISIBILITY, DI_SRC_SEL_AUTO_INDEX, 2, INDEX_SIZE_IGN, 0, 0, NULL); OUT_PKT3(ring, CP_UNKNOWN_1A, 1); OUT_RING(ring, 0x00000000); OUT_PKT0(ring, REG_A4XX_GRAS_CLEAR_CNTL, 1); OUT_RING(ring, A4XX_GRAS_CLEAR_CNTL_NOT_FASTCLEAR); OUT_PKT0(ring, REG_A4XX_GRAS_SC_CONTROL, 1); OUT_RING(ring, A4XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) | A4XX_GRAS_SC_CONTROL_MSAA_DISABLE | A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) | A4XX_GRAS_SC_CONTROL_RASTER_MODE(0)); }