void vc4_flush(struct pipe_context *pctx) { struct vc4_context *vc4 = vc4_context(pctx); if (!vc4->needs_flush) return; /* The RCL setup would choke if the draw bounds cause no drawing, so * just drop the drawing if that's the case. */ if (vc4->draw_max_x <= vc4->draw_min_x || vc4->draw_max_y <= vc4->draw_min_y) { vc4_job_reset(vc4); return; } /* Increment the semaphore indicating that binning is done and * unblocking the render thread. Note that this doesn't act until the * FLUSH completes. */ cl_ensure_space(&vc4->bcl, 8); cl_u8(&vc4->bcl, VC4_PACKET_INCREMENT_SEMAPHORE); /* The FLUSH caps all of our bin lists with a VC4_PACKET_RETURN. */ cl_u8(&vc4->bcl, VC4_PACKET_FLUSH); vc4_setup_rcl(vc4); vc4_job_submit(vc4); }
void vc4_flush(struct pipe_context *pctx) { struct vc4_context *vc4 = vc4_context(pctx); struct pipe_surface *cbuf = vc4->framebuffer.cbufs[0]; struct pipe_surface *zsbuf = vc4->framebuffer.zsbuf; if (!vc4->needs_flush) return; /* The RCL setup would choke if the draw bounds cause no drawing, so * just drop the drawing if that's the case. */ if (vc4->draw_max_x <= vc4->draw_min_x || vc4->draw_max_y <= vc4->draw_min_y) { vc4_job_reset(vc4); return; } /* Increment the semaphore indicating that binning is done and * unblocking the render thread. Note that this doesn't act until the * FLUSH completes. */ cl_ensure_space(&vc4->bcl, 8); struct vc4_cl_out *bcl = cl_start(&vc4->bcl); cl_u8(&bcl, VC4_PACKET_INCREMENT_SEMAPHORE); /* The FLUSH caps all of our bin lists with a VC4_PACKET_RETURN. */ cl_u8(&bcl, VC4_PACKET_FLUSH); cl_end(&vc4->bcl, bcl); if (cbuf && (vc4->resolve & PIPE_CLEAR_COLOR0)) { pipe_surface_reference(&vc4->color_write, cbuf); if (!(vc4->cleared & PIPE_CLEAR_COLOR0)) { pipe_surface_reference(&vc4->color_read, cbuf); } else { pipe_surface_reference(&vc4->color_read, NULL); } } else { pipe_surface_reference(&vc4->color_write, NULL); pipe_surface_reference(&vc4->color_read, NULL); } if (vc4->framebuffer.zsbuf && (vc4->resolve & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL))) { pipe_surface_reference(&vc4->zs_write, zsbuf); if (!(vc4->cleared & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL))) { pipe_surface_reference(&vc4->zs_read, zsbuf); } else { pipe_surface_reference(&vc4->zs_read, NULL); } } else { pipe_surface_reference(&vc4->zs_write, NULL); pipe_surface_reference(&vc4->zs_read, NULL); } vc4_job_submit(vc4); }
/** * HW-2116 workaround: Flush the batch before triggering the hardware state * counter wraparound behavior. * * State updates are tracked by a global counter which increments at the first * state update after a draw or a START_BINNING. Tiles can then have their * state updated at draw time with a set of cheap checks for whether the * state's copy of the global counter matches the global counter the last time * that state was written to the tile. * * The state counters are relatively small and wrap around quickly, so you * could get false negatives for needing to update a particular state in the * tile. To avoid this, the hardware attempts to write all of the state in * the tile at wraparound time. This apparently is broken, so we just flush * everything before that behavior is triggered. A batch flush is sufficient * to get our current contents drawn and reset the counters to 0. * * Note that we can't just use VC4_PACKET_FLUSH_ALL, because that caps the * tiles with VC4_PACKET_RETURN_FROM_LIST. */ static void vc4_hw_2116_workaround(struct pipe_context *pctx, int vert_count) { struct vc4_context *vc4 = vc4_context(pctx); struct vc4_job *job = vc4_get_job_for_fbo(vc4); if (job->draw_calls_queued + vert_count / 65535 >= VC4_HW_2116_COUNT) { perf_debug("Flushing batch due to HW-2116 workaround " "(too many draw calls per scene\n"); vc4_job_submit(vc4, job); } }
static bool vc4_tile_blit(struct pipe_context *pctx, const struct pipe_blit_info *info) { struct vc4_context *vc4 = vc4_context(pctx); bool msaa = (info->src.resource->nr_samples > 1 || info->dst.resource->nr_samples > 1); int tile_width = msaa ? 32 : 64; int tile_height = msaa ? 32 : 64; if (util_format_is_depth_or_stencil(info->dst.resource->format)) return false; if (info->scissor_enable) return false; if ((info->mask & PIPE_MASK_RGBA) == 0) return false; if (info->dst.box.x != info->src.box.x || info->dst.box.y != info->src.box.y || info->dst.box.width != info->src.box.width || info->dst.box.height != info->src.box.height) { return false; } int dst_surface_width = u_minify(info->dst.resource->width0, info->dst.level); int dst_surface_height = u_minify(info->dst.resource->height0, info->dst.level); if (is_tile_unaligned(info->dst.box.x, tile_width) || is_tile_unaligned(info->dst.box.y, tile_height) || (is_tile_unaligned(info->dst.box.width, tile_width) && info->dst.box.x + info->dst.box.width != dst_surface_width) || (is_tile_unaligned(info->dst.box.height, tile_height) && info->dst.box.y + info->dst.box.height != dst_surface_height)) { return false; } /* VC4_PACKET_LOAD_TILE_BUFFER_GENERAL uses the * VC4_PACKET_TILE_RENDERING_MODE_CONFIG's width (determined by our * destination surface) to determine the stride. This may be wrong * when reading from texture miplevels > 0, which are stored in * POT-sized areas. For MSAA, the tile addresses are computed * explicitly by the RCL, but still use the destination width to * determine the stride (which could be fixed by explicitly supplying * it in the ABI). */ struct vc4_resource *rsc = vc4_resource(info->src.resource); uint32_t stride; if (info->src.resource->nr_samples > 1) stride = align(dst_surface_width, 32) * 4 * rsc->cpp; else if (rsc->slices[info->src.level].tiling == VC4_TILING_FORMAT_T) stride = align(dst_surface_width * rsc->cpp, 128); else stride = align(dst_surface_width * rsc->cpp, 16); if (stride != rsc->slices[info->src.level].stride) return false; if (info->dst.resource->format != info->src.resource->format) return false; if (false) { fprintf(stderr, "RCL blit from %d,%d to %d,%d (%d,%d)\n", info->src.box.x, info->src.box.y, info->dst.box.x, info->dst.box.y, info->dst.box.width, info->dst.box.height); } struct pipe_surface *dst_surf = vc4_get_blit_surface(pctx, info->dst.resource, info->dst.level); struct pipe_surface *src_surf = vc4_get_blit_surface(pctx, info->src.resource, info->src.level); vc4_flush_jobs_reading_resource(vc4, info->src.resource); struct vc4_job *job = vc4_get_job(vc4, dst_surf, NULL); pipe_surface_reference(&job->color_read, src_surf); /* If we're resolving from MSAA to single sample, we still need to run * the engine in MSAA mode for the load. */ if (!job->msaa && info->src.resource->nr_samples > 1) { job->msaa = true; job->tile_width = 32; job->tile_height = 32; } job->draw_min_x = info->dst.box.x; job->draw_min_y = info->dst.box.y; job->draw_max_x = info->dst.box.x + info->dst.box.width; job->draw_max_y = info->dst.box.y + info->dst.box.height; job->draw_width = dst_surf->width; job->draw_height = dst_surf->height; job->tile_width = tile_width; job->tile_height = tile_height; job->msaa = msaa; job->needs_flush = true; job->resolve |= PIPE_CLEAR_COLOR; vc4_job_submit(vc4, job); pipe_surface_reference(&dst_surf, NULL); pipe_surface_reference(&src_surf, NULL); return true; }
static void vc4_clear(struct pipe_context *pctx, unsigned buffers, const union pipe_color_union *color, double depth, unsigned stencil) { struct vc4_context *vc4 = vc4_context(pctx); struct vc4_job *job = vc4_get_job_for_fbo(vc4); /* We can't flag new buffers for clearing once we've queued draws. We * could avoid this by using the 3d engine to clear. */ if (job->draw_calls_queued) { perf_debug("Flushing rendering to process new clear.\n"); vc4_job_submit(vc4, job); job = vc4_get_job_for_fbo(vc4); } if (buffers & PIPE_CLEAR_COLOR0) { struct vc4_resource *rsc = vc4_resource(vc4->framebuffer.cbufs[0]->texture); uint32_t clear_color; if (vc4_rt_format_is_565(vc4->framebuffer.cbufs[0]->format)) { /* In 565 mode, the hardware will be packing our color * for us. */ clear_color = pack_rgba(PIPE_FORMAT_R8G8B8A8_UNORM, color->f); } else { /* Otherwise, we need to do this packing because we * support multiple swizzlings of RGBA8888. */ clear_color = pack_rgba(vc4->framebuffer.cbufs[0]->format, color->f); } job->clear_color[0] = job->clear_color[1] = clear_color; rsc->initialized_buffers |= (buffers & PIPE_CLEAR_COLOR0); } if (buffers & PIPE_CLEAR_DEPTHSTENCIL) { struct vc4_resource *rsc = vc4_resource(vc4->framebuffer.zsbuf->texture); unsigned zsclear = buffers & PIPE_CLEAR_DEPTHSTENCIL; /* Clearing ZS will clear both Z and stencil, so if we're * trying to clear just one then we need to draw a quad to do * it instead. */ if ((zsclear == PIPE_CLEAR_DEPTH || zsclear == PIPE_CLEAR_STENCIL) && (rsc->initialized_buffers & ~(zsclear | job->cleared)) && util_format_is_depth_and_stencil(vc4->framebuffer.zsbuf->format)) { perf_debug("Partial clear of Z+stencil buffer, " "drawing a quad instead of fast clearing\n"); vc4_blitter_save(vc4); util_blitter_clear(vc4->blitter, vc4->framebuffer.width, vc4->framebuffer.height, 1, zsclear, NULL, depth, stencil); buffers &= ~zsclear; if (!buffers) return; } /* Though the depth buffer is stored with Z in the high 24, * for this field we just need to store it in the low 24. */ if (buffers & PIPE_CLEAR_DEPTH) { job->clear_depth = util_pack_z(PIPE_FORMAT_Z24X8_UNORM, depth); } if (buffers & PIPE_CLEAR_STENCIL) job->clear_stencil = stencil; rsc->initialized_buffers |= zsclear; } job->draw_min_x = 0; job->draw_min_y = 0; job->draw_max_x = vc4->framebuffer.width; job->draw_max_y = vc4->framebuffer.height; job->cleared |= buffers; job->resolve |= buffers; vc4_start_draw(vc4); }