static void setup_stages(struct fd4_emit *emit, struct stage *s) { unsigned i; s[VS].v = fd4_emit_get_vp(emit); s[FS].v = fd4_emit_get_fp(emit); s[HS].v = s[DS].v = s[GS].v = NULL; /* for now */ for (i = 0; i < MAX_STAGES; i++) { if (s[i].v) { s[i].i = &s[i].v->info; /* constlen is in units of 4 * vec4: */ s[i].constlen = align(s[i].v->constlen, 4) / 4; /* instrlen is already in units of 16 instr.. although * probably we should ditch that and not make the compiler * care about instruction group size of a3xx vs a4xx */ s[i].instrlen = s[i].v->instrlen; } else { s[i].i = NULL; s[i].constlen = 0; s[i].instrlen = 0; } } /* NOTE: at least for gles2, blob partitions VS at bottom of const * space and FS taking entire remaining space. We probably don't * need to do that the same way, but for now mimic what the blob * does to make it easier to diff against register values from blob * * NOTE: if VS.instrlen + FS.instrlen > 64, then one or both shaders * is run from external memory. */ if ((s[VS].instrlen + s[FS].instrlen) > 64) { /* prioritize FS for internal memory: */ if (s[FS].instrlen < 64) { /* if FS can fit, kick VS out to external memory: */ s[VS].instrlen = 0; } else if (s[VS].instrlen < 64) { /* otherwise if VS can fit, kick out FS: */ s[FS].instrlen = 0; } else { /* neither can fit, run both from external memory: */ s[VS].instrlen = 0; s[FS].instrlen = 0; } } s[VS].constlen = 66; s[FS].constlen = 128 - s[VS].constlen; s[VS].instroff = 0; s[VS].constoff = 0; s[FS].instroff = 64 - s[FS].instrlen; s[FS].constoff = s[VS].constlen; s[HS].instroff = s[DS].instroff = s[GS].instroff = s[FS].instroff; s[HS].constoff = s[DS].constoff = s[GS].constoff = s[FS].constoff; }
void fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, struct fd4_emit *emit) { struct ir3_shader_variant *vp = fd4_emit_get_vp(emit); struct ir3_shader_variant *fp = fd4_emit_get_fp(emit); uint32_t dirty = emit->dirty; emit_marker(ring, 5); if ((dirty & (FD_DIRTY_ZSA | FD_DIRTY_PROG)) && !emit->key.binning_pass) { uint32_t val = fd4_zsa_stateobj(ctx->zsa)->rb_render_control; /* I suppose if we needed to (which I don't *think* we need * to), we could emit this for binning pass too. But we * would need to keep a different patch-list for binning * vs render pass. */ OUT_PKT0(ring, REG_A4XX_RB_RENDER_CONTROL, 1); OUT_RINGP(ring, val, &fd4_context(ctx)->rbrc_patches); } if (dirty & FD_DIRTY_ZSA) { struct fd4_zsa_stateobj *zsa = fd4_zsa_stateobj(ctx->zsa); OUT_PKT0(ring, REG_A4XX_RB_ALPHA_CONTROL, 1); OUT_RING(ring, zsa->rb_alpha_control); OUT_PKT0(ring, REG_A4XX_RB_STENCIL_CONTROL, 2); OUT_RING(ring, zsa->rb_stencil_control); OUT_RING(ring, zsa->rb_stencil_control2); } if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_STENCIL_REF)) { struct fd4_zsa_stateobj *zsa = fd4_zsa_stateobj(ctx->zsa); struct pipe_stencil_ref *sr = &ctx->stencil_ref; OUT_PKT0(ring, REG_A4XX_RB_STENCILREFMASK, 2); OUT_RING(ring, zsa->rb_stencilrefmask | A4XX_RB_STENCILREFMASK_STENCILREF(sr->ref_value[0])); OUT_RING(ring, zsa->rb_stencilrefmask_bf | A4XX_RB_STENCILREFMASK_BF_STENCILREF(sr->ref_value[1])); } if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_PROG)) { struct fd4_zsa_stateobj *zsa = fd4_zsa_stateobj(ctx->zsa); bool fragz = fp->has_kill | fp->writes_pos; OUT_PKT0(ring, REG_A4XX_RB_DEPTH_CONTROL, 1); OUT_RING(ring, zsa->rb_depth_control | COND(fragz, A4XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE)); /* maybe this register/bitfield needs a better name.. this * appears to be just disabling early-z */ OUT_PKT0(ring, REG_A4XX_GRAS_ALPHA_CONTROL, 1); OUT_RING(ring, zsa->gras_alpha_control | COND(fragz, A4XX_GRAS_ALPHA_CONTROL_ALPHA_TEST_ENABLE)); } if (dirty & FD_DIRTY_RASTERIZER) { struct fd4_rasterizer_stateobj *rasterizer = fd4_rasterizer_stateobj(ctx->rasterizer); OUT_PKT0(ring, REG_A4XX_GRAS_SU_MODE_CONTROL, 1); OUT_RING(ring, rasterizer->gras_su_mode_control | A4XX_GRAS_SU_MODE_CONTROL_RENDERING_PASS); OUT_PKT0(ring, REG_A4XX_GRAS_SU_POINT_MINMAX, 2); OUT_RING(ring, rasterizer->gras_su_point_minmax); OUT_RING(ring, rasterizer->gras_su_point_size); OUT_PKT0(ring, REG_A4XX_GRAS_SU_POLY_OFFSET_SCALE, 2); OUT_RING(ring, rasterizer->gras_su_poly_offset_scale); OUT_RING(ring, rasterizer->gras_su_poly_offset_offset); OUT_PKT0(ring, REG_A4XX_GRAS_CL_CLIP_CNTL, 1); OUT_RING(ring, rasterizer->gras_cl_clip_cntl); } /* NOTE: since primitive_restart is not actually part of any * state object, we need to make sure that we always emit * PRIM_VTX_CNTL.. either that or be more clever and detect * when it changes. */ if (emit->info) { const struct pipe_draw_info *info = emit->info; uint32_t val = fd4_rasterizer_stateobj(ctx->rasterizer) ->pc_prim_vtx_cntl; if (info->indexed && info->primitive_restart) val |= A4XX_PC_PRIM_VTX_CNTL_PRIMITIVE_RESTART; val |= COND(vp->writes_psize, A4XX_PC_PRIM_VTX_CNTL_PSIZE); if (fp->total_in > 0) { uint32_t varout = align(fp->total_in, 16) / 16; if (varout > 1) varout = align(varout, 2); val |= A4XX_PC_PRIM_VTX_CNTL_VAROUT(varout); } OUT_PKT0(ring, REG_A4XX_PC_PRIM_VTX_CNTL, 2); OUT_RING(ring, val); OUT_RING(ring, 0x12); /* XXX UNKNOWN_21C5 */ } if (dirty & FD_DIRTY_SCISSOR) { struct pipe_scissor_state *scissor = fd_context_get_scissor(ctx); OUT_PKT0(ring, REG_A4XX_GRAS_SC_WINDOW_SCISSOR_BR, 2); OUT_RING(ring, A4XX_GRAS_SC_WINDOW_SCISSOR_BR_X(scissor->maxx - 1) | A4XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(scissor->maxy - 1)); OUT_RING(ring, A4XX_GRAS_SC_WINDOW_SCISSOR_TL_X(scissor->minx) | A4XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(scissor->miny)); ctx->max_scissor.minx = MIN2(ctx->max_scissor.minx, scissor->minx); ctx->max_scissor.miny = MIN2(ctx->max_scissor.miny, scissor->miny); ctx->max_scissor.maxx = MAX2(ctx->max_scissor.maxx, scissor->maxx); ctx->max_scissor.maxy = MAX2(ctx->max_scissor.maxy, scissor->maxy); } if (dirty & FD_DIRTY_VIEWPORT) { fd_wfi(ctx, ring); OUT_PKT0(ring, REG_A4XX_GRAS_CL_VPORT_XOFFSET_0, 6); OUT_RING(ring, A4XX_GRAS_CL_VPORT_XOFFSET_0(ctx->viewport.translate[0])); OUT_RING(ring, A4XX_GRAS_CL_VPORT_XSCALE_0(ctx->viewport.scale[0])); OUT_RING(ring, A4XX_GRAS_CL_VPORT_YOFFSET_0(ctx->viewport.translate[1])); OUT_RING(ring, A4XX_GRAS_CL_VPORT_YSCALE_0(ctx->viewport.scale[1])); OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZOFFSET_0(ctx->viewport.translate[2])); OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZSCALE_0(ctx->viewport.scale[2])); } if (dirty & FD_DIRTY_PROG) fd4_program_emit(ring, emit); if ((dirty & (FD_DIRTY_PROG | FD_DIRTY_CONSTBUF)) && /* evil hack to deal sanely with clear path: */ (emit->prog == &ctx->prog)) { fd_wfi(ctx, ring); emit_constants(ring, SB_VERT_SHADER, &ctx->constbuf[PIPE_SHADER_VERTEX], vp, emit->prog->dirty & FD_SHADER_DIRTY_VP); if (!emit->key.binning_pass) { emit_constants(ring, SB_FRAG_SHADER, &ctx->constbuf[PIPE_SHADER_FRAGMENT], fp, emit->prog->dirty & FD_SHADER_DIRTY_FP); } } /* emit driver params every time */ if (emit->info && emit->prog == &ctx->prog) { uint32_t vertex_params[4] = { emit->info->indexed ? emit->info->index_bias : emit->info->start, 0, 0, 0 }; if (vp->constlen >= vp->first_driver_param + 4) { fd4_emit_constant(ring, SB_VERT_SHADER, (vp->first_driver_param + 4) * 4, 0, 4, vertex_params, NULL); } } if ((dirty & FD_DIRTY_BLEND) && ctx->blend) { struct fd4_blend_stateobj *blend = fd4_blend_stateobj(ctx->blend); uint32_t i; for (i = 0; i < 8; i++) { OUT_PKT0(ring, REG_A4XX_RB_MRT_CONTROL(i), 1); OUT_RING(ring, blend->rb_mrt[i].control); OUT_PKT0(ring, REG_A4XX_RB_MRT_BLEND_CONTROL(i), 1); OUT_RING(ring, blend->rb_mrt[i].blend_control); } OUT_PKT0(ring, REG_A4XX_RB_FS_OUTPUT, 1); OUT_RING(ring, blend->rb_fs_output | A4XX_RB_FS_OUTPUT_SAMPLE_MASK(0xffff)); } if (dirty & FD_DIRTY_BLEND_COLOR) { struct pipe_blend_color *bcolor = &ctx->blend_color; OUT_PKT0(ring, REG_A4XX_RB_BLEND_RED, 4); OUT_RING(ring, A4XX_RB_BLEND_RED_UINT(bcolor->color[0] * 255.0) | A4XX_RB_BLEND_RED_FLOAT(bcolor->color[0])); OUT_RING(ring, A4XX_RB_BLEND_GREEN_UINT(bcolor->color[1] * 255.0) | A4XX_RB_BLEND_GREEN_FLOAT(bcolor->color[1])); OUT_RING(ring, A4XX_RB_BLEND_BLUE_UINT(bcolor->color[2] * 255.0) | A4XX_RB_BLEND_BLUE_FLOAT(bcolor->color[2])); OUT_RING(ring, A4XX_RB_BLEND_ALPHA_UINT(bcolor->color[3] * 255.0) | A4XX_RB_BLEND_ALPHA_FLOAT(bcolor->color[3])); } if (dirty & FD_DIRTY_VERTTEX) { if (vp->has_samp) emit_textures(ctx, ring, SB_VERT_TEX, &ctx->verttex); else dirty &= ~FD_DIRTY_VERTTEX; } if (dirty & FD_DIRTY_FRAGTEX) { if (fp->has_samp) emit_textures(ctx, ring, SB_FRAG_TEX, &ctx->fragtex); else dirty &= ~FD_DIRTY_FRAGTEX; } ctx->dirty &= ~dirty; }
static bool fd4_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info) { struct fd4_context *fd4_ctx = fd4_context(ctx); struct fd4_emit emit = { .debug = &ctx->debug, .vtx = &ctx->vtx, .prog = &ctx->prog, .info = info, .key = { .color_two_side = ctx->rasterizer->light_twoside, .vclamp_color = ctx->rasterizer->clamp_vertex_color, .fclamp_color = ctx->rasterizer->clamp_fragment_color, .rasterflat = ctx->rasterizer->flatshade, // TODO set .half_precision based on render target format, // ie. float16 and smaller use half, float32 use full.. .half_precision = !!(fd_mesa_debug & FD_DBG_FRAGHALF), .ucp_enables = ctx->rasterizer->clip_plane_enable, .has_per_samp = (fd4_ctx->fsaturate || fd4_ctx->vsaturate || fd4_ctx->fastc_srgb || fd4_ctx->vastc_srgb), .vsaturate_s = fd4_ctx->vsaturate_s, .vsaturate_t = fd4_ctx->vsaturate_t, .vsaturate_r = fd4_ctx->vsaturate_r, .fsaturate_s = fd4_ctx->fsaturate_s, .fsaturate_t = fd4_ctx->fsaturate_t, .fsaturate_r = fd4_ctx->fsaturate_r, .vastc_srgb = fd4_ctx->vastc_srgb, .fastc_srgb = fd4_ctx->fastc_srgb, }, .rasterflat = ctx->rasterizer->flatshade, .sprite_coord_enable = ctx->rasterizer->sprite_coord_enable, .sprite_coord_mode = ctx->rasterizer->sprite_coord_mode, }; fixup_shader_state(ctx, &emit.key); unsigned dirty = ctx->dirty; /* do regular pass first, since that is more likely to fail compiling: */ if (!(fd4_emit_get_vp(&emit) && fd4_emit_get_fp(&emit))) return false; emit.key.binning_pass = false; emit.dirty = dirty; struct fd_ringbuffer *ring = ctx->batch->draw; if (ctx->rasterizer->rasterizer_discard) { fd_wfi(ctx->batch, ring); OUT_PKT3(ring, CP_REG_RMW, 3); OUT_RING(ring, REG_A4XX_RB_RENDER_CONTROL); OUT_RING(ring, ~A4XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE); OUT_RING(ring, A4XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE); } draw_impl(ctx, ctx->batch->draw, &emit); if (ctx->rasterizer->rasterizer_discard) { fd_wfi(ctx->batch, ring); OUT_PKT3(ring, CP_REG_RMW, 3); OUT_RING(ring, REG_A4XX_RB_RENDER_CONTROL); OUT_RING(ring, ~A4XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE); OUT_RING(ring, 0); } /* and now binning pass: */ emit.key.binning_pass = true; emit.dirty = dirty & ~(FD_DIRTY_BLEND); emit.vp = NULL; /* we changed key so need to refetch vp */ emit.fp = NULL; draw_impl(ctx, ctx->batch->binning, &emit); return true; }