static void vc4_resource_transfer_unmap(struct pipe_context *pctx, struct pipe_transfer *ptrans) { struct vc4_context *vc4 = vc4_context(pctx); struct vc4_transfer *trans = vc4_transfer(ptrans); struct pipe_resource *prsc = ptrans->resource; struct vc4_resource *rsc = vc4_resource(prsc); struct vc4_resource_slice *slice = &rsc->slices[ptrans->level]; if (trans->map) { if (ptrans->usage & PIPE_TRANSFER_WRITE) { vc4_store_tiled_image(rsc->bo->map + slice->offset + ptrans->box.z * rsc->cube_map_stride, slice->stride, trans->map, ptrans->stride, slice->tiling, rsc->cpp, &ptrans->box); } free(trans->map); } pipe_resource_reference(&ptrans->resource, NULL); util_slab_free(&vc4->transfer_pool, ptrans); }
static void vc4_submit_setup_rcl_render_config_surface(struct vc4_context *vc4, struct drm_vc4_submit_rcl_surface *submit_surf, struct pipe_surface *psurf) { struct vc4_surface *surf = vc4_surface(psurf); if (!surf) { submit_surf->hindex = ~0; return; } struct vc4_resource *rsc = vc4_resource(psurf->texture); submit_surf->hindex = vc4_gem_hindex(vc4, rsc->bo); submit_surf->offset = surf->offset; if (psurf->texture->nr_samples == 0) { submit_surf->bits = VC4_SET_FIELD(vc4_rt_format_is_565(surf->base.format) ? VC4_RENDER_CONFIG_FORMAT_BGR565 : VC4_RENDER_CONFIG_FORMAT_RGBA8888, VC4_RENDER_CONFIG_FORMAT) | VC4_SET_FIELD(surf->tiling, VC4_RENDER_CONFIG_MEMORY_FORMAT); } rsc->writes++; }
static struct pipe_surface * vc4_create_surface(struct pipe_context *pctx, struct pipe_resource *ptex, const struct pipe_surface *surf_tmpl) { struct vc4_surface *surface = CALLOC_STRUCT(vc4_surface); struct vc4_resource *rsc = vc4_resource(ptex); if (!surface) return NULL; assert(surf_tmpl->u.tex.first_layer == surf_tmpl->u.tex.last_layer); struct pipe_surface *psurf = &surface->base; unsigned level = surf_tmpl->u.tex.level; pipe_reference_init(&psurf->reference, 1); pipe_resource_reference(&psurf->texture, ptex); psurf->context = pctx; psurf->format = surf_tmpl->format; psurf->width = u_minify(ptex->width0, level); psurf->height = u_minify(ptex->height0, level); psurf->u.tex.level = level; psurf->u.tex.first_layer = surf_tmpl->u.tex.first_layer; psurf->u.tex.last_layer = surf_tmpl->u.tex.last_layer; surface->offset = rsc->slices[level].offset; surface->tiling = rsc->slices[level].tiling; return &surface->base; }
static void vc4_set_index_buffer(struct pipe_context *pctx, const struct pipe_index_buffer *ib) { struct vc4_context *vc4 = vc4_context(pctx); if (ib) { assert(!ib->user_buffer); if (ib->index_size == 4) { struct pipe_resource tmpl = *ib->buffer; assert(tmpl.format == PIPE_FORMAT_R8_UNORM); assert(tmpl.height0 == 1); tmpl.width0 = (tmpl.width0 - ib->offset) / 2; struct pipe_resource *pshadow = vc4_resource_create(&vc4->screen->base, &tmpl); struct vc4_resource *shadow = vc4_resource(pshadow); pipe_resource_reference(&shadow->shadow_parent, ib->buffer); pipe_resource_reference(&vc4->indexbuf.buffer, NULL); vc4->indexbuf.buffer = pshadow; vc4->indexbuf.index_size = 2; vc4->indexbuf.offset = 0; } else { pipe_resource_reference(&vc4->indexbuf.buffer, ib->buffer); vc4->indexbuf.index_size = ib->index_size; vc4->indexbuf.offset = ib->offset; } } else { pipe_resource_reference(&vc4->indexbuf.buffer, NULL); } vc4->dirty |= VC4_DIRTY_INDEXBUF; }
static void vc4_resource_destroy(struct pipe_screen *pscreen, struct pipe_resource *prsc) { struct vc4_resource *rsc = vc4_resource(prsc); pipe_resource_reference(&rsc->shadow_parent, NULL); vc4_bo_unreference(&rsc->bo); free(rsc); }
static void vc4_set_framebuffer_state(struct pipe_context *pctx, const struct pipe_framebuffer_state *framebuffer) { struct vc4_context *vc4 = vc4_context(pctx); struct pipe_framebuffer_state *cso = &vc4->framebuffer; unsigned i; vc4_flush(pctx); for (i = 0; i < framebuffer->nr_cbufs; i++) pipe_surface_reference(&cso->cbufs[i], framebuffer->cbufs[i]); for (; i < vc4->framebuffer.nr_cbufs; i++) pipe_surface_reference(&cso->cbufs[i], NULL); cso->nr_cbufs = framebuffer->nr_cbufs; pipe_surface_reference(&cso->zsbuf, framebuffer->zsbuf); cso->width = framebuffer->width; cso->height = framebuffer->height; /* Nonzero texture mipmap levels are laid out as if they were in * power-of-two-sized spaces. The renderbuffer config infers its * stride from the width parameter, so we need to configure our * framebuffer. Note that if the z/color buffers were mismatched * sizes, we wouldn't be able to do this. */ if (cso->cbufs[0] && cso->cbufs[0]->u.tex.level) { struct vc4_resource *rsc = vc4_resource(cso->cbufs[0]->texture); cso->width = (rsc->slices[cso->cbufs[0]->u.tex.level].stride / rsc->cpp); } else if (cso->zsbuf && cso->zsbuf->u.tex.level){ struct vc4_resource *rsc = vc4_resource(cso->zsbuf->texture); cso->width = (rsc->slices[cso->zsbuf->u.tex.level].stride / rsc->cpp); } vc4->dirty |= VC4_DIRTY_FRAMEBUFFER; }
static boolean vc4_resource_get_handle(struct pipe_screen *pscreen, struct pipe_resource *prsc, struct winsys_handle *handle) { struct vc4_resource *rsc = vc4_resource(prsc); return vc4_screen_bo_get_handle(pscreen, rsc->bo, rsc->slices[0].stride, handle); }
/** * Flushes the current command lists if they reference the given BO. * * This helps avoid flushing the command buffers when unnecessary. */ bool vc4_cl_references_bo(struct pipe_context *pctx, struct vc4_bo *bo, bool include_reads) { struct vc4_context *vc4 = vc4_context(pctx); if (!vc4->needs_flush) return false; /* Walk all the referenced BOs in the drawing command list to see if * they match. */ if (include_reads) { struct vc4_bo **referenced_bos = vc4->bo_pointers.base; for (int i = 0; i < cl_offset(&vc4->bo_handles) / 4; i++) { if (referenced_bos[i] == bo) { return true; } } } /* Also check for the Z/color buffers, since the references to those * are only added immediately before submit. */ struct vc4_surface *csurf = vc4_surface(vc4->framebuffer.cbufs[0]); if (csurf) { struct vc4_resource *ctex = vc4_resource(csurf->base.texture); if (ctex->bo == bo) { return true; } } struct vc4_surface *zsurf = vc4_surface(vc4->framebuffer.zsbuf); if (zsurf) { struct vc4_resource *ztex = vc4_resource(zsurf->base.texture); if (ztex->bo == bo) { return true; } } return false; }
static enum vc4_texture_data_type get_resource_texture_format(struct pipe_resource *prsc) { struct vc4_resource *rsc = vc4_resource(prsc); uint8_t format = vc4_get_tex_format(prsc->format); if (!rsc->tiled) { assert(format == VC4_TEXTURE_TYPE_RGBA8888); return VC4_TEXTURE_TYPE_RGBA32R; } return format; }
static void vc4_update_shadow_textures(struct pipe_context *pctx, struct vc4_texture_stateobj *stage_tex) { for (int i = 0; i < stage_tex->num_textures; i++) { struct pipe_sampler_view *view = stage_tex->textures[i]; if (!view) continue; struct vc4_resource *rsc = vc4_resource(view->texture); if (rsc->shadow_parent) vc4_update_shadow_baselevel_texture(pctx, view); } }
static struct pipe_sampler_view * vc4_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *prsc, const struct pipe_sampler_view *cso) { struct pipe_sampler_view *so = malloc(sizeof(*so)); if (!so) return NULL; *so = *cso; pipe_reference(NULL, &prsc->reference); /* There is no hardware level clamping, and the start address of a * texture may be misaligned, so in that case we have to copy to a * temporary. */ if (so->u.tex.first_level) { struct vc4_resource *shadow_parent = vc4_resource(prsc); struct pipe_resource tmpl = shadow_parent->base.b; struct vc4_resource *clone; tmpl.width0 = u_minify(tmpl.width0, so->u.tex.first_level); tmpl.height0 = u_minify(tmpl.height0, so->u.tex.first_level); tmpl.last_level = so->u.tex.last_level - so->u.tex.first_level; prsc = vc4_resource_create(pctx->screen, &tmpl); clone = vc4_resource(prsc); clone->shadow_parent = &shadow_parent->base.b; /* Flag it as needing update of the contents from the parent. */ clone->writes = shadow_parent->writes - 1; } so->texture = prsc; so->reference.count = 1; so->context = pctx; return so; }
static void vc4_predraw_check_textures(struct pipe_context *pctx, struct vc4_texture_stateobj *stage_tex) { struct vc4_context *vc4 = vc4_context(pctx); for (int i = 0; i < stage_tex->num_textures; i++) { struct pipe_sampler_view *view = stage_tex->textures[i]; if (!view) continue; struct vc4_resource *rsc = vc4_resource(view->texture); if (rsc->shadow_parent) vc4_update_shadow_baselevel_texture(pctx, view); vc4_flush_jobs_writing_resource(vc4, view->texture); } }
static void vc4_submit_setup_rcl_msaa_surface(struct vc4_context *vc4, struct drm_vc4_submit_rcl_surface *submit_surf, struct pipe_surface *psurf) { struct vc4_surface *surf = vc4_surface(psurf); if (!surf) { submit_surf->hindex = ~0; return; } struct vc4_resource *rsc = vc4_resource(psurf->texture); submit_surf->hindex = vc4_gem_hindex(vc4, rsc->bo); submit_surf->offset = surf->offset; submit_surf->bits = 0; rsc->writes++; }
static void vc4_submit_setup_rcl_surface(struct vc4_context *vc4, struct drm_vc4_submit_rcl_surface *submit_surf, struct pipe_surface *psurf, bool is_depth, bool is_write) { struct vc4_surface *surf = vc4_surface(psurf); if (!surf) { submit_surf->hindex = ~0; return; } struct vc4_resource *rsc = vc4_resource(psurf->texture); submit_surf->hindex = vc4_gem_hindex(vc4, rsc->bo); submit_surf->offset = surf->offset; if (psurf->texture->nr_samples == 0) { if (is_depth) { submit_surf->bits = VC4_SET_FIELD(VC4_LOADSTORE_TILE_BUFFER_ZS, VC4_LOADSTORE_TILE_BUFFER_BUFFER); } else { submit_surf->bits = VC4_SET_FIELD(VC4_LOADSTORE_TILE_BUFFER_COLOR, VC4_LOADSTORE_TILE_BUFFER_BUFFER) | VC4_SET_FIELD(vc4_rt_format_is_565(psurf->format) ? VC4_LOADSTORE_TILE_BUFFER_BGR565 : VC4_LOADSTORE_TILE_BUFFER_RGBA8888, VC4_LOADSTORE_TILE_BUFFER_FORMAT); } submit_surf->bits |= VC4_SET_FIELD(surf->tiling, VC4_LOADSTORE_TILE_BUFFER_TILING); } else { assert(!is_write); submit_surf->flags |= VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES; } if (is_write) rsc->writes++; }
static void * vc4_resource_transfer_map(struct pipe_context *pctx, struct pipe_resource *prsc, unsigned level, unsigned usage, const struct pipe_box *box, struct pipe_transfer **pptrans) { struct vc4_context *vc4 = vc4_context(pctx); struct vc4_resource *rsc = vc4_resource(prsc); struct vc4_resource_slice *slice = &rsc->slices[level]; struct vc4_transfer *trans; struct pipe_transfer *ptrans; enum pipe_format format = prsc->format; char *buf; if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) { vc4_resource_bo_alloc(rsc); } else if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) { if (vc4_cl_references_bo(pctx, rsc->bo)) { if ((usage & PIPE_TRANSFER_DISCARD_RANGE) && prsc->last_level == 0 && prsc->width0 == box->width && prsc->height0 == box->height && prsc->depth0 == box->depth) { vc4_resource_bo_alloc(rsc); } else { vc4_flush(pctx); } } } if (usage & PIPE_TRANSFER_WRITE) rsc->writes++; trans = util_slab_alloc(&vc4->transfer_pool); if (!trans) return NULL; /* XXX: Handle DONTBLOCK, DISCARD_RANGE, PERSISTENT, COHERENT. */ /* util_slab_alloc() doesn't zero: */ memset(trans, 0, sizeof(*trans)); ptrans = &trans->base; pipe_resource_reference(&ptrans->resource, prsc); ptrans->level = level; ptrans->usage = usage; ptrans->box = *box; /* Note that the current kernel implementation is synchronous, so no * need to do syncing stuff here yet. */ buf = vc4_bo_map(rsc->bo); if (!buf) { fprintf(stderr, "Failed to map bo\n"); goto fail; } *pptrans = ptrans; if (rsc->tiled) { uint32_t utile_w = vc4_utile_width(rsc->cpp); uint32_t utile_h = vc4_utile_height(rsc->cpp); /* No direct mappings of tiled, since we need to manually * tile/untile. */ if (usage & PIPE_TRANSFER_MAP_DIRECTLY) return NULL; /* We need to align the box to utile boundaries, since that's * what load/store operate on. */ uint32_t box_start_x = ptrans->box.x & (utile_w - 1); uint32_t box_start_y = ptrans->box.y & (utile_h - 1); ptrans->box.width += box_start_x; ptrans->box.x -= box_start_x; ptrans->box.height += box_start_y; ptrans->box.y -= box_start_y; ptrans->box.width = align(ptrans->box.width, utile_w); ptrans->box.height = align(ptrans->box.height, utile_h); ptrans->stride = ptrans->box.width * rsc->cpp; ptrans->layer_stride = ptrans->stride; trans->map = malloc(ptrans->stride * ptrans->box.height); if (usage & PIPE_TRANSFER_READ) { vc4_load_tiled_image(trans->map, ptrans->stride, buf + slice->offset + box->z * rsc->cube_map_stride, slice->stride, slice->tiling, rsc->cpp, &ptrans->box); } return (trans->map + box_start_x * rsc->cpp + box_start_y * ptrans->stride); } else { ptrans->stride = slice->stride; ptrans->layer_stride = ptrans->stride; return buf + slice->offset + box->y / util_format_get_blockheight(format) * ptrans->stride + box->x / util_format_get_blockwidth(format) * rsc->cpp + box->z * rsc->cube_map_stride; } fail: vc4_resource_transfer_unmap(pctx, ptrans); return NULL; }
static void vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) { struct vc4_context *vc4 = vc4_context(pctx); if (info->mode >= PIPE_PRIM_QUADS) { util_primconvert_save_index_buffer(vc4->primconvert, &vc4->indexbuf); util_primconvert_save_rasterizer_state(vc4->primconvert, &vc4->rasterizer->base); util_primconvert_draw_vbo(vc4->primconvert, info); perf_debug("Fallback conversion for %d %s vertices\n", info->count, u_prim_name(info->mode)); return; } /* Before setting up the draw, do any fixup blits necessary. */ vc4_predraw_check_textures(pctx, &vc4->verttex); vc4_predraw_check_textures(pctx, &vc4->fragtex); vc4_hw_2116_workaround(pctx, info->count); struct vc4_job *job = vc4_get_job_for_fbo(vc4); vc4_get_draw_cl_space(job, info->count); if (vc4->prim_mode != info->mode) { vc4->prim_mode = info->mode; vc4->dirty |= VC4_DIRTY_PRIM_MODE; } vc4_start_draw(vc4); if (!vc4_update_compiled_shaders(vc4, info->mode)) { debug_warn_once("shader compile failed, skipping draw call.\n"); return; } vc4_emit_state(pctx); if ((vc4->dirty & (VC4_DIRTY_VTXBUF | VC4_DIRTY_VTXSTATE | VC4_DIRTY_PRIM_MODE | VC4_DIRTY_RASTERIZER | VC4_DIRTY_COMPILED_CS | VC4_DIRTY_COMPILED_VS | VC4_DIRTY_COMPILED_FS | vc4->prog.cs->uniform_dirty_bits | vc4->prog.vs->uniform_dirty_bits | vc4->prog.fs->uniform_dirty_bits)) || vc4->last_index_bias != info->index_bias) { vc4_emit_gl_shader_state(vc4, info, 0); } vc4->dirty = 0; /* Note that the primitive type fields match with OpenGL/gallium * definitions, up to but not including QUADS. */ struct vc4_cl_out *bcl = cl_start(&job->bcl); if (info->indexed) { uint32_t offset = vc4->indexbuf.offset; uint32_t index_size = vc4->indexbuf.index_size; struct pipe_resource *prsc; if (vc4->indexbuf.index_size == 4) { prsc = vc4_get_shadow_index_buffer(pctx, &vc4->indexbuf, info->count, &offset); index_size = 2; } else { if (vc4->indexbuf.user_buffer) { prsc = NULL; u_upload_data(vc4->uploader, 0, info->count * index_size, 4, vc4->indexbuf.user_buffer, &offset, &prsc); } else { prsc = vc4->indexbuf.buffer; } } struct vc4_resource *rsc = vc4_resource(prsc); cl_start_reloc(&job->bcl, &bcl, 1); cl_u8(&bcl, VC4_PACKET_GL_INDEXED_PRIMITIVE); cl_u8(&bcl, info->mode | (index_size == 2 ? VC4_INDEX_BUFFER_U16: VC4_INDEX_BUFFER_U8)); cl_u32(&bcl, info->count); cl_reloc(job, &job->bcl, &bcl, rsc->bo, offset); cl_u32(&bcl, vc4->max_index); job->draw_calls_queued++; if (vc4->indexbuf.index_size == 4 || vc4->indexbuf.user_buffer) pipe_resource_reference(&prsc, NULL); } else { uint32_t count = info->count; uint32_t start = info->start; uint32_t extra_index_bias = 0; while (count) { uint32_t this_count = count; uint32_t step = count; static const uint32_t max_verts = 65535; /* GFXH-515 / SW-5891: The binner emits 16 bit indices * for drawarrays, which means that if start + count > * 64k it would truncate the top bits. Work around * this by emitting a limited number of primitives at * a time and reemitting the shader state pointing * farther down the vertex attribute arrays. * * To do this properly for line loops or trifans, we'd * need to make a new VB containing the first vertex * plus whatever remainder. */ if (extra_index_bias) { cl_end(&job->bcl, bcl); vc4_emit_gl_shader_state(vc4, info, extra_index_bias); bcl = cl_start(&job->bcl); } if (start + count > max_verts) { switch (info->mode) { case PIPE_PRIM_POINTS: this_count = step = max_verts; break; case PIPE_PRIM_LINES: this_count = step = max_verts - (max_verts % 2); break; case PIPE_PRIM_LINE_STRIP: this_count = max_verts; step = max_verts - 1; break; case PIPE_PRIM_LINE_LOOP: this_count = max_verts; step = max_verts - 1; debug_warn_once("unhandled line loop " "looping behavior with " ">65535 verts\n"); break; case PIPE_PRIM_TRIANGLES: this_count = step = max_verts - (max_verts % 3); break; case PIPE_PRIM_TRIANGLE_STRIP: this_count = max_verts; step = max_verts - 2; break; default: debug_warn_once("unhandled primitive " "max vert count, truncating\n"); this_count = step = max_verts; } } cl_u8(&bcl, VC4_PACKET_GL_ARRAY_PRIMITIVE); cl_u8(&bcl, info->mode); cl_u32(&bcl, this_count); cl_u32(&bcl, start); job->draw_calls_queued++; count -= step; extra_index_bias += start + step; start = 0; } } cl_end(&job->bcl, bcl); /* We shouldn't have tripped the HW_2116 bug with the GFXH-515 * workaround. */ assert(job->draw_calls_queued <= VC4_HW_2116_COUNT); if (vc4->zsa && vc4->framebuffer.zsbuf) { struct vc4_resource *rsc = vc4_resource(vc4->framebuffer.zsbuf->texture); if (vc4->zsa->base.depth.enabled) { job->resolve |= PIPE_CLEAR_DEPTH; rsc->initialized_buffers = PIPE_CLEAR_DEPTH; } if (vc4->zsa->base.stencil[0].enabled) { job->resolve |= PIPE_CLEAR_STENCIL; rsc->initialized_buffers |= PIPE_CLEAR_STENCIL; } } job->resolve |= PIPE_CLEAR_COLOR0; if (vc4_debug & VC4_DEBUG_ALWAYS_FLUSH) vc4_flush(pctx); }
static void vc4_setup_rcl(struct vc4_context *vc4) { struct vc4_surface *csurf = vc4_surface(vc4->framebuffer.cbufs[0]); struct vc4_resource *ctex = csurf ? vc4_resource(csurf->base.texture) : NULL; struct vc4_surface *zsurf = vc4_surface(vc4->framebuffer.zsbuf); struct vc4_resource *ztex = zsurf ? vc4_resource(zsurf->base.texture) : NULL; if (!csurf) vc4->resolve &= ~PIPE_CLEAR_COLOR0; if (!zsurf) vc4->resolve &= ~(PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL); uint32_t resolve_uncleared = vc4->resolve & ~vc4->cleared; uint32_t width = vc4->framebuffer.width; uint32_t height = vc4->framebuffer.height; uint32_t stride_in_tiles = align(width, 64) / 64; assert(vc4->draw_min_x != ~0 && vc4->draw_min_y != ~0); uint32_t min_x_tile = vc4->draw_min_x / 64; uint32_t min_y_tile = vc4->draw_min_y / 64; uint32_t max_x_tile = (vc4->draw_max_x - 1) / 64; uint32_t max_y_tile = (vc4->draw_max_y - 1) / 64; uint32_t xtiles = max_x_tile - min_x_tile + 1; uint32_t ytiles = max_y_tile - min_y_tile + 1; #if 0 fprintf(stderr, "RCL: resolve 0x%x clear 0x%x resolve uncleared 0x%x\n", vc4->resolve, vc4->cleared, resolve_uncleared); #endif uint32_t reloc_size = 9; uint32_t clear_size = 14; uint32_t config_size = 11 + reloc_size; uint32_t loadstore_size = 7 + reloc_size; uint32_t tilecoords_size = 3; uint32_t branch_size = 5 + reloc_size; uint32_t color_store_size = 1; uint32_t semaphore_size = 1; cl_ensure_space(&vc4->rcl, clear_size + config_size + loadstore_size + semaphore_size + xtiles * ytiles * (loadstore_size * 4 + tilecoords_size * 3 + branch_size + color_store_size)); if (vc4->cleared) { cl_u8(&vc4->rcl, VC4_PACKET_CLEAR_COLORS); cl_u32(&vc4->rcl, vc4->clear_color[0]); cl_u32(&vc4->rcl, vc4->clear_color[1]); cl_u32(&vc4->rcl, vc4->clear_depth); cl_u8(&vc4->rcl, vc4->clear_stencil); } /* The rendering mode config determines the pointer that's used for * VC4_PACKET_STORE_MS_TILE_BUFFER address computations. The kernel * could handle a no-relocation rendering mode config and deny those * packets, but instead we just tell the kernel we're doing our color * rendering to the Z buffer, and just don't emit any of those * packets. */ struct vc4_surface *render_surf = csurf ? csurf : zsurf; struct vc4_resource *render_tex = vc4_resource(render_surf->base.texture); cl_start_reloc(&vc4->rcl, 1); cl_u8(&vc4->rcl, VC4_PACKET_TILE_RENDERING_MODE_CONFIG); cl_reloc(vc4, &vc4->rcl, render_tex->bo, render_surf->offset); cl_u16(&vc4->rcl, width); cl_u16(&vc4->rcl, height); cl_u16(&vc4->rcl, ((render_surf->tiling << VC4_RENDER_CONFIG_MEMORY_FORMAT_SHIFT) | (vc4_rt_format_is_565(render_surf->base.format) ? VC4_RENDER_CONFIG_FORMAT_BGR565 : VC4_RENDER_CONFIG_FORMAT_RGBA8888))); /* The tile buffer normally gets cleared when the previous tile is * stored. If the clear values changed between frames, then the tile * buffer has stale clear values in it, so we have to do a store in * None mode (no writes) so that we trigger the tile buffer clear. * * Excess clearing is only a performance cost, since per-tile contents * will be loaded/stored in the loop below. */ if (vc4->cleared & (PIPE_CLEAR_COLOR0 | PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) { cl_u8(&vc4->rcl, VC4_PACKET_TILE_COORDINATES); cl_u8(&vc4->rcl, 0); cl_u8(&vc4->rcl, 0); cl_u8(&vc4->rcl, VC4_PACKET_STORE_TILE_BUFFER_GENERAL); cl_u16(&vc4->rcl, VC4_LOADSTORE_TILE_BUFFER_NONE); cl_u32(&vc4->rcl, 0); /* no address, since we're in None mode */ } uint32_t color_hindex = ctex ? vc4_gem_hindex(vc4, ctex->bo) : 0; uint32_t depth_hindex = ztex ? vc4_gem_hindex(vc4, ztex->bo) : 0; uint32_t tile_alloc_hindex = vc4_gem_hindex(vc4, vc4->tile_alloc); for (int y = min_y_tile; y <= max_y_tile; y++) { for (int x = min_x_tile; x <= max_x_tile; x++) { bool end_of_frame = (x == max_x_tile && y == max_y_tile); bool coords_emitted = false; /* Note that the load doesn't actually occur until the * tile coords packet is processed, and only one load * may be outstanding at a time. */ if (resolve_uncleared & PIPE_CLEAR_COLOR) { vc4_store_before_load(vc4, &coords_emitted); cl_start_reloc(&vc4->rcl, 1); cl_u8(&vc4->rcl, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL); cl_u8(&vc4->rcl, VC4_LOADSTORE_TILE_BUFFER_COLOR | (csurf->tiling << VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT)); cl_u8(&vc4->rcl, vc4_rt_format_is_565(csurf->base.format) ? VC4_LOADSTORE_TILE_BUFFER_BGR565 : VC4_LOADSTORE_TILE_BUFFER_RGBA8888); cl_reloc_hindex(&vc4->rcl, color_hindex, csurf->offset); vc4_tile_coordinates(vc4, x, y, &coords_emitted); } if (resolve_uncleared & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) { vc4_store_before_load(vc4, &coords_emitted); cl_start_reloc(&vc4->rcl, 1); cl_u8(&vc4->rcl, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL); cl_u8(&vc4->rcl, VC4_LOADSTORE_TILE_BUFFER_ZS | (zsurf->tiling << VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT)); cl_u8(&vc4->rcl, 0); cl_reloc_hindex(&vc4->rcl, depth_hindex, zsurf->offset); vc4_tile_coordinates(vc4, x, y, &coords_emitted); } /* Clipping depends on tile coordinates having been * emitted, so make sure it's happened even if * everything was cleared to start. */ vc4_tile_coordinates(vc4, x, y, &coords_emitted); /* Wait for the binner before jumping to the first * tile's lists. */ if (x == min_x_tile && y == min_y_tile) cl_u8(&vc4->rcl, VC4_PACKET_WAIT_ON_SEMAPHORE); cl_start_reloc(&vc4->rcl, 1); cl_u8(&vc4->rcl, VC4_PACKET_BRANCH_TO_SUB_LIST); cl_reloc_hindex(&vc4->rcl, tile_alloc_hindex, (y * stride_in_tiles + x) * 32); if (vc4->resolve & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) { vc4_tile_coordinates(vc4, x, y, &coords_emitted); cl_start_reloc(&vc4->rcl, 1); cl_u8(&vc4->rcl, VC4_PACKET_STORE_TILE_BUFFER_GENERAL); cl_u8(&vc4->rcl, VC4_LOADSTORE_TILE_BUFFER_ZS | (zsurf->tiling << VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT)); cl_u8(&vc4->rcl, VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR); cl_reloc_hindex(&vc4->rcl, depth_hindex, zsurf->offset | ((end_of_frame && !(vc4->resolve & PIPE_CLEAR_COLOR0)) ? VC4_LOADSTORE_TILE_BUFFER_EOF : 0)); coords_emitted = false; } if (vc4->resolve & PIPE_CLEAR_COLOR0) { vc4_tile_coordinates(vc4, x, y, &coords_emitted); if (end_of_frame) { cl_u8(&vc4->rcl, VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF); } else { cl_u8(&vc4->rcl, VC4_PACKET_STORE_MS_TILE_BUFFER); } coords_emitted = false; } /* One of the bits needs to have been set that would * have triggered an EOF. */ assert(vc4->resolve & (PIPE_CLEAR_COLOR0 | PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)); /* Any coords emitted must also have been consumed by * a store. */ assert(!coords_emitted); } } if (vc4->resolve & PIPE_CLEAR_COLOR0) ctex->writes++; if (vc4->resolve & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) ztex->writes++; }
static void vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) { struct vc4_context *vc4 = vc4_context(pctx); if (info->mode >= PIPE_PRIM_QUADS) { util_primconvert_save_index_buffer(vc4->primconvert, &vc4->indexbuf); util_primconvert_save_rasterizer_state(vc4->primconvert, &vc4->rasterizer->base); util_primconvert_draw_vbo(vc4->primconvert, info); perf_debug("Fallback conversion for %d %s vertices\n", info->count, u_prim_name(info->mode)); return; } /* Before setting up the draw, do any fixup blits necessary. */ vc4_update_shadow_textures(pctx, &vc4->verttex); vc4_update_shadow_textures(pctx, &vc4->fragtex); vc4_hw_2116_workaround(pctx); vc4_get_draw_cl_space(vc4); if (vc4->prim_mode != info->mode) { vc4->prim_mode = info->mode; vc4->dirty |= VC4_DIRTY_PRIM_MODE; } vc4_start_draw(vc4); vc4_update_compiled_shaders(vc4, info->mode); vc4_emit_state(pctx); if ((vc4->dirty & (VC4_DIRTY_VTXBUF | VC4_DIRTY_VTXSTATE | VC4_DIRTY_PRIM_MODE | VC4_DIRTY_RASTERIZER | VC4_DIRTY_COMPILED_CS | VC4_DIRTY_COMPILED_VS | VC4_DIRTY_COMPILED_FS | vc4->prog.cs->uniform_dirty_bits | vc4->prog.vs->uniform_dirty_bits | vc4->prog.fs->uniform_dirty_bits)) || vc4->last_index_bias != info->index_bias) { vc4_emit_gl_shader_state(vc4, info); } vc4->dirty = 0; /* Note that the primitive type fields match with OpenGL/gallium * definitions, up to but not including QUADS. */ struct vc4_cl_out *bcl = cl_start(&vc4->bcl); if (info->indexed) { uint32_t offset = vc4->indexbuf.offset; uint32_t index_size = vc4->indexbuf.index_size; struct pipe_resource *prsc; if (vc4->indexbuf.index_size == 4) { prsc = vc4_get_shadow_index_buffer(pctx, &vc4->indexbuf, info->count, &offset); index_size = 2; } else { if (vc4->indexbuf.user_buffer) { prsc = NULL; u_upload_data(vc4->uploader, 0, info->count * index_size, 4, vc4->indexbuf.user_buffer, &offset, &prsc); } else { prsc = vc4->indexbuf.buffer; } } struct vc4_resource *rsc = vc4_resource(prsc); cl_start_reloc(&vc4->bcl, &bcl, 1); cl_u8(&bcl, VC4_PACKET_GL_INDEXED_PRIMITIVE); cl_u8(&bcl, info->mode | (index_size == 2 ? VC4_INDEX_BUFFER_U16: VC4_INDEX_BUFFER_U8)); cl_u32(&bcl, info->count); cl_reloc(vc4, &vc4->bcl, &bcl, rsc->bo, offset); cl_u32(&bcl, vc4->max_index); if (vc4->indexbuf.index_size == 4 || vc4->indexbuf.user_buffer) pipe_resource_reference(&prsc, NULL); } else { cl_u8(&bcl, VC4_PACKET_GL_ARRAY_PRIMITIVE); cl_u8(&bcl, info->mode); cl_u32(&bcl, info->count); cl_u32(&bcl, info->start); } cl_end(&vc4->bcl, bcl); if (vc4->zsa && vc4->zsa->base.depth.enabled) { vc4->resolve |= PIPE_CLEAR_DEPTH; } if (vc4->zsa && vc4->zsa->base.stencil[0].enabled) vc4->resolve |= PIPE_CLEAR_STENCIL; vc4->resolve |= PIPE_CLEAR_COLOR0; vc4->shader_rec_count++; if (vc4_debug & VC4_DEBUG_ALWAYS_FLUSH) vc4_flush(pctx); }
static void vc4_emit_gl_shader_state(struct vc4_context *vc4, const struct pipe_draw_info *info) { /* VC4_DIRTY_VTXSTATE */ struct vc4_vertex_stateobj *vtx = vc4->vtx; /* VC4_DIRTY_VTXBUF */ struct vc4_vertexbuf_stateobj *vertexbuf = &vc4->vertexbuf; /* The simulator throws a fit if VS or CS don't read an attribute, so * we emit a dummy read. */ uint32_t num_elements_emit = MAX2(vtx->num_elements, 1); /* Emit the shader record. */ struct vc4_cl_out *shader_rec = cl_start_shader_reloc(&vc4->shader_rec, 3 + num_elements_emit); /* VC4_DIRTY_PRIM_MODE | VC4_DIRTY_RASTERIZER */ cl_u16(&shader_rec, VC4_SHADER_FLAG_ENABLE_CLIPPING | VC4_SHADER_FLAG_FS_SINGLE_THREAD | ((info->mode == PIPE_PRIM_POINTS && vc4->rasterizer->base.point_size_per_vertex) ? VC4_SHADER_FLAG_VS_POINT_SIZE : 0)); /* VC4_DIRTY_COMPILED_FS */ cl_u8(&shader_rec, 0); /* fs num uniforms (unused) */ cl_u8(&shader_rec, vc4->prog.fs->num_inputs); cl_reloc(vc4, &vc4->shader_rec, &shader_rec, vc4->prog.fs->bo, 0); cl_u32(&shader_rec, 0); /* UBO offset written by kernel */ /* VC4_DIRTY_COMPILED_VS */ cl_u16(&shader_rec, 0); /* vs num uniforms */ cl_u8(&shader_rec, vc4->prog.vs->vattrs_live); cl_u8(&shader_rec, vc4->prog.vs->vattr_offsets[8]); cl_reloc(vc4, &vc4->shader_rec, &shader_rec, vc4->prog.vs->bo, 0); cl_u32(&shader_rec, 0); /* UBO offset written by kernel */ /* VC4_DIRTY_COMPILED_CS */ cl_u16(&shader_rec, 0); /* cs num uniforms */ cl_u8(&shader_rec, vc4->prog.cs->vattrs_live); cl_u8(&shader_rec, vc4->prog.cs->vattr_offsets[8]); cl_reloc(vc4, &vc4->shader_rec, &shader_rec, vc4->prog.cs->bo, 0); cl_u32(&shader_rec, 0); /* UBO offset written by kernel */ uint32_t max_index = 0xffff; for (int i = 0; i < vtx->num_elements; i++) { struct pipe_vertex_element *elem = &vtx->pipe[i]; struct pipe_vertex_buffer *vb = &vertexbuf->vb[elem->vertex_buffer_index]; struct vc4_resource *rsc = vc4_resource(vb->buffer); /* not vc4->dirty tracked: vc4->last_index_bias */ uint32_t offset = (vb->buffer_offset + elem->src_offset + vb->stride * info->index_bias); uint32_t vb_size = rsc->bo->size - offset; uint32_t elem_size = util_format_get_blocksize(elem->src_format); cl_reloc(vc4, &vc4->shader_rec, &shader_rec, rsc->bo, offset); cl_u8(&shader_rec, elem_size - 1); cl_u8(&shader_rec, vb->stride); cl_u8(&shader_rec, vc4->prog.vs->vattr_offsets[i]); cl_u8(&shader_rec, vc4->prog.cs->vattr_offsets[i]); if (vb->stride > 0) { max_index = MIN2(max_index, (vb_size - elem_size) / vb->stride); } } if (vtx->num_elements == 0) { assert(num_elements_emit == 1); struct vc4_bo *bo = vc4_bo_alloc(vc4->screen, 4096, "scratch VBO"); cl_reloc(vc4, &vc4->shader_rec, &shader_rec, bo, 0); cl_u8(&shader_rec, 16 - 1); /* element size */ cl_u8(&shader_rec, 0); /* stride */ cl_u8(&shader_rec, 0); /* VS VPM offset */ cl_u8(&shader_rec, 0); /* CS VPM offset */ vc4_bo_unreference(&bo); } cl_end(&vc4->shader_rec, shader_rec); struct vc4_cl_out *bcl = cl_start(&vc4->bcl); /* the actual draw call. */ cl_u8(&bcl, VC4_PACKET_GL_SHADER_STATE); assert(vtx->num_elements <= 8); /* Note that number of attributes == 0 in the packet means 8 * attributes. This field also contains the offset into shader_rec. */ cl_u32(&bcl, num_elements_emit & 0x7); cl_end(&vc4->bcl, bcl); vc4_write_uniforms(vc4, vc4->prog.fs, &vc4->constbuf[PIPE_SHADER_FRAGMENT], &vc4->fragtex); vc4_write_uniforms(vc4, vc4->prog.vs, &vc4->constbuf[PIPE_SHADER_VERTEX], &vc4->verttex); vc4_write_uniforms(vc4, vc4->prog.cs, &vc4->constbuf[PIPE_SHADER_VERTEX], &vc4->verttex); vc4->last_index_bias = info->index_bias; vc4->max_index = max_index; }
static void vc4_clear(struct pipe_context *pctx, unsigned buffers, const union pipe_color_union *color, double depth, unsigned stencil) { struct vc4_context *vc4 = vc4_context(pctx); struct vc4_job *job = vc4_get_job_for_fbo(vc4); /* We can't flag new buffers for clearing once we've queued draws. We * could avoid this by using the 3d engine to clear. */ if (job->draw_calls_queued) { perf_debug("Flushing rendering to process new clear.\n"); vc4_job_submit(vc4, job); job = vc4_get_job_for_fbo(vc4); } if (buffers & PIPE_CLEAR_COLOR0) { struct vc4_resource *rsc = vc4_resource(vc4->framebuffer.cbufs[0]->texture); uint32_t clear_color; if (vc4_rt_format_is_565(vc4->framebuffer.cbufs[0]->format)) { /* In 565 mode, the hardware will be packing our color * for us. */ clear_color = pack_rgba(PIPE_FORMAT_R8G8B8A8_UNORM, color->f); } else { /* Otherwise, we need to do this packing because we * support multiple swizzlings of RGBA8888. */ clear_color = pack_rgba(vc4->framebuffer.cbufs[0]->format, color->f); } job->clear_color[0] = job->clear_color[1] = clear_color; rsc->initialized_buffers |= (buffers & PIPE_CLEAR_COLOR0); } if (buffers & PIPE_CLEAR_DEPTHSTENCIL) { struct vc4_resource *rsc = vc4_resource(vc4->framebuffer.zsbuf->texture); unsigned zsclear = buffers & PIPE_CLEAR_DEPTHSTENCIL; /* Clearing ZS will clear both Z and stencil, so if we're * trying to clear just one then we need to draw a quad to do * it instead. */ if ((zsclear == PIPE_CLEAR_DEPTH || zsclear == PIPE_CLEAR_STENCIL) && (rsc->initialized_buffers & ~(zsclear | job->cleared)) && util_format_is_depth_and_stencil(vc4->framebuffer.zsbuf->format)) { perf_debug("Partial clear of Z+stencil buffer, " "drawing a quad instead of fast clearing\n"); vc4_blitter_save(vc4); util_blitter_clear(vc4->blitter, vc4->framebuffer.width, vc4->framebuffer.height, 1, zsclear, NULL, depth, stencil); buffers &= ~zsclear; if (!buffers) return; } /* Though the depth buffer is stored with Z in the high 24, * for this field we just need to store it in the low 24. */ if (buffers & PIPE_CLEAR_DEPTH) { job->clear_depth = util_pack_z(PIPE_FORMAT_Z24X8_UNORM, depth); } if (buffers & PIPE_CLEAR_STENCIL) job->clear_stencil = stencil; rsc->initialized_buffers |= zsclear; } job->draw_min_x = 0; job->draw_min_y = 0; job->draw_max_x = vc4->framebuffer.width; job->draw_max_y = vc4->framebuffer.height; job->cleared |= buffers; job->resolve |= buffers; vc4_start_draw(vc4); }
int vc4_simulator_flush(struct vc4_context *vc4, struct drm_vc4_submit_cl *args) { struct vc4_surface *csurf = vc4_surface(vc4->framebuffer.cbufs[0]); struct vc4_resource *ctex = vc4_resource(csurf->base.texture); uint32_t winsys_stride = ctex->bo->simulator_winsys_stride; uint32_t sim_stride = ctex->slices[0].stride; uint32_t row_len = MIN2(sim_stride, winsys_stride); struct exec_info exec; struct drm_device local_dev = { .vc4 = vc4, .simulator_mem_next = OVERFLOW_SIZE, }; struct drm_device *dev = &local_dev; int ret; memset(&exec, 0, sizeof(exec)); if (ctex->bo->simulator_winsys_map) { #if 0 fprintf(stderr, "%dx%d %d %d %d\n", ctex->base.b.width0, ctex->base.b.height0, winsys_stride, sim_stride, ctex->bo->size); #endif for (int y = 0; y < ctex->base.b.height0; y++) { memcpy(ctex->bo->map + y * sim_stride, ctex->bo->simulator_winsys_map + y * winsys_stride, row_len); } } exec.args = args; ret = vc4_simulator_pin_bos(dev, &exec); if (ret) return ret; ret = vc4_cl_validate(dev, &exec); if (ret) return ret; simpenrose_do_binning(exec.ct0ca, exec.ct0ea); simpenrose_do_rendering(exec.ct1ca, exec.ct1ea); ret = vc4_simulator_unpin_bos(&exec); if (ret) return ret; free(exec.exec_bo); if (ctex->bo->simulator_winsys_map) { for (int y = 0; y < ctex->base.b.height0; y++) { memcpy(ctex->bo->simulator_winsys_map + y * winsys_stride, ctex->bo->map + y * sim_stride, row_len); } } return 0; }
static void vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) { struct vc4_context *vc4 = vc4_context(pctx); if (info->mode >= PIPE_PRIM_QUADS) { util_primconvert_save_index_buffer(vc4->primconvert, &vc4->indexbuf); util_primconvert_save_rasterizer_state(vc4->primconvert, &vc4->rasterizer->base); util_primconvert_draw_vbo(vc4->primconvert, info); return; } struct vc4_vertex_stateobj *vtx = vc4->vtx; struct vc4_vertexbuf_stateobj *vertexbuf = &vc4->vertexbuf; if (vc4->prim_mode != info->mode) { vc4->prim_mode = info->mode; vc4->dirty |= VC4_DIRTY_PRIM_MODE; } vc4_start_draw(vc4); vc4_update_compiled_shaders(vc4, info->mode); vc4_emit_state(pctx); vc4->dirty = 0; vc4_write_uniforms(vc4, vc4->prog.fs, &vc4->constbuf[PIPE_SHADER_FRAGMENT], &vc4->fragtex); vc4_write_uniforms(vc4, vc4->prog.vs, &vc4->constbuf[PIPE_SHADER_VERTEX], &vc4->verttex); vc4_write_uniforms(vc4, vc4->prog.cs, &vc4->constbuf[PIPE_SHADER_VERTEX], &vc4->verttex); /* The simulator throws a fit if VS or CS don't read an attribute, so * we emit a dummy read. */ uint32_t num_elements_emit = MAX2(vtx->num_elements, 1); /* Emit the shader record. */ cl_start_shader_reloc(&vc4->shader_rec, 3 + num_elements_emit); cl_u16(&vc4->shader_rec, VC4_SHADER_FLAG_ENABLE_CLIPPING | ((info->mode == PIPE_PRIM_POINTS && vc4->rasterizer->base.point_size_per_vertex) ? VC4_SHADER_FLAG_VS_POINT_SIZE : 0)); cl_u8(&vc4->shader_rec, 0); /* fs num uniforms (unused) */ cl_u8(&vc4->shader_rec, vc4->prog.fs->num_inputs); cl_reloc(vc4, &vc4->shader_rec, vc4->prog.fs->bo, 0); cl_u32(&vc4->shader_rec, 0); /* UBO offset written by kernel */ cl_u16(&vc4->shader_rec, 0); /* vs num uniforms */ cl_u8(&vc4->shader_rec, (1 << num_elements_emit) - 1); /* vs attribute array bitfield */ cl_u8(&vc4->shader_rec, 16 * num_elements_emit); /* vs total attribute size */ cl_reloc(vc4, &vc4->shader_rec, vc4->prog.vs->bo, 0); cl_u32(&vc4->shader_rec, 0); /* UBO offset written by kernel */ cl_u16(&vc4->shader_rec, 0); /* cs num uniforms */ cl_u8(&vc4->shader_rec, (1 << num_elements_emit) - 1); /* cs attribute array bitfield */ cl_u8(&vc4->shader_rec, 16 * num_elements_emit); /* cs total attribute size */ cl_reloc(vc4, &vc4->shader_rec, vc4->prog.cs->bo, 0); cl_u32(&vc4->shader_rec, 0); /* UBO offset written by kernel */ uint32_t max_index = 0xffff; for (int i = 0; i < vtx->num_elements; i++) { struct pipe_vertex_element *elem = &vtx->pipe[i]; struct pipe_vertex_buffer *vb = &vertexbuf->vb[elem->vertex_buffer_index]; struct vc4_resource *rsc = vc4_resource(vb->buffer); uint32_t offset = vb->buffer_offset + elem->src_offset; uint32_t vb_size = rsc->bo->size - offset; uint32_t elem_size = util_format_get_blocksize(elem->src_format); cl_reloc(vc4, &vc4->shader_rec, rsc->bo, offset); cl_u8(&vc4->shader_rec, elem_size - 1); cl_u8(&vc4->shader_rec, vb->stride); cl_u8(&vc4->shader_rec, i * 16); /* VS VPM offset */ cl_u8(&vc4->shader_rec, i * 16); /* CS VPM offset */ if (vb->stride > 0) { max_index = MIN2(max_index, (vb_size - elem_size) / vb->stride); } } if (vtx->num_elements == 0) { assert(num_elements_emit == 1); struct vc4_bo *bo = vc4_bo_alloc(vc4->screen, 4096, "scratch VBO"); cl_reloc(vc4, &vc4->shader_rec, bo, 0); cl_u8(&vc4->shader_rec, 16 - 1); /* element size */ cl_u8(&vc4->shader_rec, 0); /* stride */ cl_u8(&vc4->shader_rec, 0); /* VS VPM offset */ cl_u8(&vc4->shader_rec, 0); /* CS VPM offset */ vc4_bo_unreference(&bo); } /* the actual draw call. */ cl_u8(&vc4->bcl, VC4_PACKET_GL_SHADER_STATE); assert(vtx->num_elements <= 8); /* Note that number of attributes == 0 in the packet means 8 * attributes. This field also contains the offset into shader_rec. */ cl_u32(&vc4->bcl, num_elements_emit & 0x7); /* Note that the primitive type fields match with OpenGL/gallium * definitions, up to but not including QUADS. */ if (info->indexed) { struct vc4_resource *rsc = vc4_resource(vc4->indexbuf.buffer); uint32_t offset = vc4->indexbuf.offset; uint32_t index_size = vc4->indexbuf.index_size; if (rsc->shadow_parent) { vc4_update_shadow_index_buffer(pctx, &vc4->indexbuf); offset = 0; index_size = 2; } cl_start_reloc(&vc4->bcl, 1); cl_u8(&vc4->bcl, VC4_PACKET_GL_INDEXED_PRIMITIVE); cl_u8(&vc4->bcl, info->mode | (index_size == 2 ? VC4_INDEX_BUFFER_U16: VC4_INDEX_BUFFER_U8)); cl_u32(&vc4->bcl, info->count); cl_reloc(vc4, &vc4->bcl, rsc->bo, offset); cl_u32(&vc4->bcl, max_index); } else { cl_u8(&vc4->bcl, VC4_PACKET_GL_ARRAY_PRIMITIVE); cl_u8(&vc4->bcl, info->mode); cl_u32(&vc4->bcl, info->count); cl_u32(&vc4->bcl, info->start); } if (vc4->zsa && vc4->zsa->base.depth.enabled) { vc4->resolve |= PIPE_CLEAR_DEPTH; } if (vc4->zsa && vc4->zsa->base.stencil[0].enabled) vc4->resolve |= PIPE_CLEAR_STENCIL; vc4->resolve |= PIPE_CLEAR_COLOR0; vc4->shader_rec_count++; if (vc4_debug & VC4_DEBUG_ALWAYS_FLUSH) vc4_flush(pctx); }
void vc4_update_shadow_baselevel_texture(struct pipe_context *pctx, struct pipe_sampler_view *view) { struct vc4_resource *shadow = vc4_resource(view->texture); struct vc4_resource *orig = vc4_resource(shadow->shadow_parent); assert(orig); if (shadow->writes == orig->writes) return; for (int i = 0; i <= shadow->base.b.last_level; i++) { struct pipe_box box = { .x = 0, .y = 0, .z = 0, .width = u_minify(shadow->base.b.width0, i), .height = u_minify(shadow->base.b.height0, i), .depth = 1, }; util_resource_copy_region(pctx, &shadow->base.b, i, 0, 0, 0, &orig->base.b, view->u.tex.first_level + i, &box); } shadow->writes = orig->writes; } /** * Converts a 4-byte index buffer to 2 bytes. * * Since GLES2 only has support for 1 and 2-byte indices, the hardware doesn't * include 4-byte index support, and we have to shrink it down. * * There's no fallback support for when indices end up being larger than 2^16, * though it will at least assertion fail. Also, if the original index data * was in user memory, it would be nice to not have uploaded it to a VBO * before translating. */ void vc4_update_shadow_index_buffer(struct pipe_context *pctx, const struct pipe_index_buffer *ib) { struct vc4_resource *shadow = vc4_resource(ib->buffer); struct vc4_resource *orig = vc4_resource(shadow->shadow_parent); uint32_t count = shadow->base.b.width0 / 2; if (shadow->writes == orig->writes) return; struct pipe_transfer *src_transfer; uint32_t *src = pipe_buffer_map_range(pctx, &orig->base.b, ib->offset, count * 4, PIPE_TRANSFER_READ, &src_transfer); struct pipe_transfer *dst_transfer; uint16_t *dst = pipe_buffer_map_range(pctx, &shadow->base.b, 0, count * 2, PIPE_TRANSFER_WRITE, &dst_transfer); for (int i = 0; i < count; i++) { uint32_t src_index = src[i]; assert(src_index <= 0xffff); dst[i] = src_index; } pctx->transfer_unmap(pctx, dst_transfer); pctx->transfer_unmap(pctx, src_transfer); shadow->writes = orig->writes; } void vc4_resource_screen_init(struct pipe_screen *pscreen) { pscreen->resource_create = vc4_resource_create; pscreen->resource_from_handle = vc4_resource_from_handle; pscreen->resource_get_handle = u_resource_get_handle_vtbl; pscreen->resource_destroy = u_resource_destroy_vtbl; }
static void vc4_set_framebuffer_state(struct pipe_context *pctx, const struct pipe_framebuffer_state *framebuffer) { struct vc4_context *vc4 = vc4_context(pctx); struct pipe_framebuffer_state *cso = &vc4->framebuffer; unsigned i; vc4_flush(pctx); for (i = 0; i < framebuffer->nr_cbufs; i++) pipe_surface_reference(&cso->cbufs[i], framebuffer->cbufs[i]); for (; i < vc4->framebuffer.nr_cbufs; i++) pipe_surface_reference(&cso->cbufs[i], NULL); cso->nr_cbufs = framebuffer->nr_cbufs; pipe_surface_reference(&cso->zsbuf, framebuffer->zsbuf); cso->width = framebuffer->width; cso->height = framebuffer->height; /* If we're binding to uninitialized buffers, no need to load their * contents before drawing.. */ if (cso->cbufs[0]) { struct vc4_resource *rsc = vc4_resource(cso->cbufs[0]->texture); if (!rsc->writes) vc4->cleared |= PIPE_CLEAR_COLOR0; } if (cso->zsbuf) { struct vc4_resource *rsc = vc4_resource(cso->zsbuf->texture); if (!rsc->writes) vc4->cleared |= PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL; } /* Nonzero texture mipmap levels are laid out as if they were in * power-of-two-sized spaces. The renderbuffer config infers its * stride from the width parameter, so we need to configure our * framebuffer. Note that if the z/color buffers were mismatched * sizes, we wouldn't be able to do this. */ if (cso->cbufs[0] && cso->cbufs[0]->u.tex.level) { struct vc4_resource *rsc = vc4_resource(cso->cbufs[0]->texture); cso->width = (rsc->slices[cso->cbufs[0]->u.tex.level].stride / rsc->cpp); } else if (cso->zsbuf && cso->zsbuf->u.tex.level){ struct vc4_resource *rsc = vc4_resource(cso->zsbuf->texture); cso->width = (rsc->slices[cso->zsbuf->u.tex.level].stride / rsc->cpp); } vc4->msaa = false; if (cso->cbufs[0]) vc4->msaa = cso->cbufs[0]->texture->nr_samples > 1; else if (cso->zsbuf) vc4->msaa = cso->zsbuf->texture->nr_samples > 1; if (vc4->msaa) { vc4->tile_width = 32; vc4->tile_height = 32; } else { vc4->tile_width = 64; vc4->tile_height = 64; } vc4->draw_tiles_x = DIV_ROUND_UP(cso->width, vc4->tile_width); vc4->draw_tiles_y = DIV_ROUND_UP(cso->height, vc4->tile_height); vc4->dirty |= VC4_DIRTY_FRAMEBUFFER; }
int vc4_simulator_flush(struct vc4_context *vc4, struct drm_vc4_submit_cl *args) { struct vc4_screen *screen = vc4->screen; struct vc4_surface *csurf = vc4_surface(vc4->framebuffer.cbufs[0]); struct vc4_resource *ctex = csurf ? vc4_resource(csurf->base.texture) : NULL; uint32_t winsys_stride = ctex ? ctex->bo->simulator_winsys_stride : 0; uint32_t sim_stride = ctex ? ctex->slices[0].stride : 0; uint32_t row_len = MIN2(sim_stride, winsys_stride); struct exec_info exec; struct drm_device local_dev = { .vc4 = vc4, .simulator_mem_next = OVERFLOW_SIZE, }; struct drm_device *dev = &local_dev; int ret; memset(&exec, 0, sizeof(exec)); if (ctex && ctex->bo->simulator_winsys_map) { #if 0 fprintf(stderr, "%dx%d %d %d %d\n", ctex->base.b.width0, ctex->base.b.height0, winsys_stride, sim_stride, ctex->bo->size); #endif for (int y = 0; y < ctex->base.b.height0; y++) { memcpy(ctex->bo->map + y * sim_stride, ctex->bo->simulator_winsys_map + y * winsys_stride, row_len); } } exec.args = args; ret = vc4_simulator_pin_bos(dev, &exec); if (ret) return ret; ret = vc4_cl_validate(dev, &exec); if (ret) return ret; int bfc = simpenrose_do_binning(exec.ct0ca, exec.ct0ea); if (bfc != 1) { fprintf(stderr, "Binning returned %d flushes, should be 1.\n", bfc); fprintf(stderr, "Relocated binning command list:\n"); vc4_dump_cl(screen->simulator_mem_base + exec.ct0ca, exec.ct0ea - exec.ct0ca, false); abort(); } int rfc = simpenrose_do_rendering(exec.ct1ca, exec.ct1ea); if (rfc != 1) { fprintf(stderr, "Rendering returned %d frames, should be 1.\n", rfc); fprintf(stderr, "Relocated render command list:\n"); vc4_dump_cl(screen->simulator_mem_base + exec.ct1ca, exec.ct1ea - exec.ct1ca, true); abort(); } ret = vc4_simulator_unpin_bos(&exec); if (ret) return ret; vc4_bo_unreference(&exec.exec_bo->bo); free(exec.exec_bo); if (ctex && ctex->bo->simulator_winsys_map) { for (int y = 0; y < ctex->base.b.height0; y++) { memcpy(ctex->bo->simulator_winsys_map + y * winsys_stride, ctex->bo->map + y * sim_stride, row_len); } } return 0; }
static void *vc4_get_yuv_vs(struct pipe_context *pctx) { struct vc4_context *vc4 = vc4_context(pctx); struct pipe_screen *pscreen = pctx->screen; if (vc4->yuv_linear_blit_vs) return vc4->yuv_linear_blit_vs; const struct nir_shader_compiler_options *options = pscreen->get_compiler_options(pscreen, PIPE_SHADER_IR_NIR, PIPE_SHADER_VERTEX); nir_builder b; nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, options); b.shader->info.name = ralloc_strdup(b.shader, "linear_blit_vs"); const struct glsl_type *vec4 = glsl_vec4_type(); nir_variable *pos_in = nir_variable_create(b.shader, nir_var_shader_in, vec4, "pos"); nir_variable *pos_out = nir_variable_create(b.shader, nir_var_shader_out, vec4, "gl_Position"); pos_out->data.location = VARYING_SLOT_POS; nir_store_var(&b, pos_out, nir_load_var(&b, pos_in), 0xf); struct pipe_shader_state shader_tmpl = { .type = PIPE_SHADER_IR_NIR, .ir.nir = b.shader, }; vc4->yuv_linear_blit_vs = pctx->create_vs_state(pctx, &shader_tmpl); return vc4->yuv_linear_blit_vs; } static void *vc4_get_yuv_fs(struct pipe_context *pctx, int cpp) { struct vc4_context *vc4 = vc4_context(pctx); struct pipe_screen *pscreen = pctx->screen; struct pipe_shader_state **cached_shader; const char *name; if (cpp == 1) { cached_shader = &vc4->yuv_linear_blit_fs_8bit; name = "linear_blit_8bit_fs"; } else { cached_shader = &vc4->yuv_linear_blit_fs_16bit; name = "linear_blit_16bit_fs"; } if (*cached_shader) return *cached_shader; const struct nir_shader_compiler_options *options = pscreen->get_compiler_options(pscreen, PIPE_SHADER_IR_NIR, PIPE_SHADER_FRAGMENT); nir_builder b; nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, options); b.shader->info.name = ralloc_strdup(b.shader, name); const struct glsl_type *vec4 = glsl_vec4_type(); const struct glsl_type *glsl_int = glsl_int_type(); nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out, vec4, "f_color"); color_out->data.location = FRAG_RESULT_COLOR; nir_variable *pos_in = nir_variable_create(b.shader, nir_var_shader_in, vec4, "pos"); pos_in->data.location = VARYING_SLOT_POS; nir_ssa_def *pos = nir_load_var(&b, pos_in); nir_ssa_def *one = nir_imm_int(&b, 1); nir_ssa_def *two = nir_imm_int(&b, 2); nir_ssa_def *x = nir_f2i32(&b, nir_channel(&b, pos, 0)); nir_ssa_def *y = nir_f2i32(&b, nir_channel(&b, pos, 1)); nir_variable *stride_in = nir_variable_create(b.shader, nir_var_uniform, glsl_int, "stride"); nir_ssa_def *stride = nir_load_var(&b, stride_in); nir_ssa_def *x_offset; nir_ssa_def *y_offset; if (cpp == 1) { nir_ssa_def *intra_utile_x_offset = nir_ishl(&b, nir_iand(&b, x, one), two); nir_ssa_def *inter_utile_x_offset = nir_ishl(&b, nir_iand(&b, x, nir_imm_int(&b, ~3)), one); x_offset = nir_iadd(&b, intra_utile_x_offset, inter_utile_x_offset); y_offset = nir_imul(&b, nir_iadd(&b, nir_ishl(&b, y, one), nir_ushr(&b, nir_iand(&b, x, two), one)), stride); } else { x_offset = nir_ishl(&b, x, two); y_offset = nir_imul(&b, y, stride); } nir_intrinsic_instr *load = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_ubo); load->num_components = 1; nir_ssa_dest_init(&load->instr, &load->dest, load->num_components, 32, NULL); load->src[0] = nir_src_for_ssa(one); load->src[1] = nir_src_for_ssa(nir_iadd(&b, x_offset, y_offset)); nir_builder_instr_insert(&b, &load->instr); nir_store_var(&b, color_out, nir_unpack_unorm_4x8(&b, &load->dest.ssa), 0xf); struct pipe_shader_state shader_tmpl = { .type = PIPE_SHADER_IR_NIR, .ir.nir = b.shader, }; *cached_shader = pctx->create_fs_state(pctx, &shader_tmpl); return *cached_shader; } static bool vc4_yuv_blit(struct pipe_context *pctx, const struct pipe_blit_info *info) { struct vc4_context *vc4 = vc4_context(pctx); struct vc4_resource *src = vc4_resource(info->src.resource); struct vc4_resource *dst = vc4_resource(info->dst.resource); bool ok; if (src->tiled) return false; if (src->base.format != PIPE_FORMAT_R8_UNORM && src->base.format != PIPE_FORMAT_R8G8_UNORM) return false; /* YUV blits always turn raster-order to tiled */ assert(dst->base.format == src->base.format); assert(dst->tiled); /* Always 1:1 and at the origin */ assert(info->src.box.x == 0 && info->dst.box.x == 0); assert(info->src.box.y == 0 && info->dst.box.y == 0); assert(info->src.box.width == info->dst.box.width); assert(info->src.box.height == info->dst.box.height); if ((src->slices[info->src.level].offset & 3) || (src->slices[info->src.level].stride & 3)) { perf_debug("YUV-blit src texture offset/stride misaligned: 0x%08x/%d\n", src->slices[info->src.level].offset, src->slices[info->src.level].stride); goto fallback; } vc4_blitter_save(vc4); /* Create a renderable surface mapping the T-tiled shadow buffer. */ struct pipe_surface dst_tmpl; util_blitter_default_dst_texture(&dst_tmpl, info->dst.resource, info->dst.level, info->dst.box.z); dst_tmpl.format = PIPE_FORMAT_RGBA8888_UNORM; struct pipe_surface *dst_surf = pctx->create_surface(pctx, info->dst.resource, &dst_tmpl); if (!dst_surf) { fprintf(stderr, "Failed to create YUV dst surface\n"); util_blitter_unset_running_flag(vc4->blitter); return false; } dst_surf->width /= 2; if (dst->cpp == 1) dst_surf->height /= 2; /* Set the constant buffer. */ uint32_t stride = src->slices[info->src.level].stride; struct pipe_constant_buffer cb_uniforms = { .user_buffer = &stride, .buffer_size = sizeof(stride), }; pctx->set_constant_buffer(pctx, PIPE_SHADER_FRAGMENT, 0, &cb_uniforms); struct pipe_constant_buffer cb_src = { .buffer = info->src.resource, .buffer_offset = src->slices[info->src.level].offset, .buffer_size = (src->bo->size - src->slices[info->src.level].offset), }; pctx->set_constant_buffer(pctx, PIPE_SHADER_FRAGMENT, 1, &cb_src); /* Unbind the textures, to make sure we don't try to recurse into the * shadow blit. */ pctx->set_sampler_views(pctx, PIPE_SHADER_FRAGMENT, 0, 0, NULL); pctx->bind_sampler_states(pctx, PIPE_SHADER_FRAGMENT, 0, 0, NULL); util_blitter_custom_shader(vc4->blitter, dst_surf, vc4_get_yuv_vs(pctx), vc4_get_yuv_fs(pctx, src->cpp)); util_blitter_restore_textures(vc4->blitter); util_blitter_restore_constant_buffer_state(vc4->blitter); /* Restore cb1 (util_blitter doesn't handle this one). */ struct pipe_constant_buffer cb_disabled = { 0 }; pctx->set_constant_buffer(pctx, PIPE_SHADER_FRAGMENT, 1, &cb_disabled); pipe_surface_reference(&dst_surf, NULL); return true; fallback: /* Do an immediate SW fallback, since the render blit path * would just recurse. */ ok = util_try_blit_via_copy_region(pctx, info); assert(ok); (void)ok; return true; } static bool vc4_render_blit(struct pipe_context *ctx, struct pipe_blit_info *info) { struct vc4_context *vc4 = vc4_context(ctx); if (!util_blitter_is_blit_supported(vc4->blitter, info)) { fprintf(stderr, "blit unsupported %s -> %s\n", util_format_short_name(info->src.resource->format), util_format_short_name(info->dst.resource->format)); return false; } /* Enable the scissor, so we get a minimal set of tiles rendered. */ if (!info->scissor_enable) { info->scissor_enable = true; info->scissor.minx = info->dst.box.x; info->scissor.miny = info->dst.box.y; info->scissor.maxx = info->dst.box.x + info->dst.box.width; info->scissor.maxy = info->dst.box.y + info->dst.box.height; } vc4_blitter_save(vc4); util_blitter_blit(vc4->blitter, info); return true; } /* Optimal hardware path for blitting pixels. * Scaling, format conversion, up- and downsampling (resolve) are allowed. */ void vc4_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info) { struct pipe_blit_info info = *blit_info; if (vc4_yuv_blit(pctx, blit_info)) return; if (vc4_tile_blit(pctx, blit_info)) return; if (info.mask & PIPE_MASK_S) { if (util_try_blit_via_copy_region(pctx, &info)) return; info.mask &= ~PIPE_MASK_S; fprintf(stderr, "cannot blit stencil, skipping\n"); } if (vc4_render_blit(pctx, &info)) return; fprintf(stderr, "Unsupported blit\n"); }
static bool vc4_tile_blit(struct pipe_context *pctx, const struct pipe_blit_info *info) { struct vc4_context *vc4 = vc4_context(pctx); bool msaa = (info->src.resource->nr_samples > 1 || info->dst.resource->nr_samples > 1); int tile_width = msaa ? 32 : 64; int tile_height = msaa ? 32 : 64; if (util_format_is_depth_or_stencil(info->dst.resource->format)) return false; if (info->scissor_enable) return false; if ((info->mask & PIPE_MASK_RGBA) == 0) return false; if (info->dst.box.x != info->src.box.x || info->dst.box.y != info->src.box.y || info->dst.box.width != info->src.box.width || info->dst.box.height != info->src.box.height) { return false; } int dst_surface_width = u_minify(info->dst.resource->width0, info->dst.level); int dst_surface_height = u_minify(info->dst.resource->height0, info->dst.level); if (is_tile_unaligned(info->dst.box.x, tile_width) || is_tile_unaligned(info->dst.box.y, tile_height) || (is_tile_unaligned(info->dst.box.width, tile_width) && info->dst.box.x + info->dst.box.width != dst_surface_width) || (is_tile_unaligned(info->dst.box.height, tile_height) && info->dst.box.y + info->dst.box.height != dst_surface_height)) { return false; } /* VC4_PACKET_LOAD_TILE_BUFFER_GENERAL uses the * VC4_PACKET_TILE_RENDERING_MODE_CONFIG's width (determined by our * destination surface) to determine the stride. This may be wrong * when reading from texture miplevels > 0, which are stored in * POT-sized areas. For MSAA, the tile addresses are computed * explicitly by the RCL, but still use the destination width to * determine the stride (which could be fixed by explicitly supplying * it in the ABI). */ struct vc4_resource *rsc = vc4_resource(info->src.resource); uint32_t stride; if (info->src.resource->nr_samples > 1) stride = align(dst_surface_width, 32) * 4 * rsc->cpp; else if (rsc->slices[info->src.level].tiling == VC4_TILING_FORMAT_T) stride = align(dst_surface_width * rsc->cpp, 128); else stride = align(dst_surface_width * rsc->cpp, 16); if (stride != rsc->slices[info->src.level].stride) return false; if (info->dst.resource->format != info->src.resource->format) return false; if (false) { fprintf(stderr, "RCL blit from %d,%d to %d,%d (%d,%d)\n", info->src.box.x, info->src.box.y, info->dst.box.x, info->dst.box.y, info->dst.box.width, info->dst.box.height); } struct pipe_surface *dst_surf = vc4_get_blit_surface(pctx, info->dst.resource, info->dst.level); struct pipe_surface *src_surf = vc4_get_blit_surface(pctx, info->src.resource, info->src.level); vc4_flush_jobs_reading_resource(vc4, info->src.resource); struct vc4_job *job = vc4_get_job(vc4, dst_surf, NULL); pipe_surface_reference(&job->color_read, src_surf); /* If we're resolving from MSAA to single sample, we still need to run * the engine in MSAA mode for the load. */ if (!job->msaa && info->src.resource->nr_samples > 1) { job->msaa = true; job->tile_width = 32; job->tile_height = 32; } job->draw_min_x = info->dst.box.x; job->draw_min_y = info->dst.box.y; job->draw_max_x = info->dst.box.x + info->dst.box.width; job->draw_max_y = info->dst.box.y + info->dst.box.height; job->draw_width = dst_surf->width; job->draw_height = dst_surf->height; job->tile_width = tile_width; job->tile_height = tile_height; job->msaa = msaa; job->needs_flush = true; job->resolve |= PIPE_CLEAR_COLOR; vc4_job_submit(vc4, job); pipe_surface_reference(&dst_surf, NULL); pipe_surface_reference(&src_surf, NULL); return true; }
static void vc4_setup_rcl(struct vc4_context *vc4) { struct vc4_surface *csurf = vc4_surface(vc4->framebuffer.cbufs[0]); struct vc4_resource *ctex = vc4_resource(csurf->base.texture); uint32_t resolve_uncleared = vc4->resolve & ~vc4->cleared; uint32_t width = vc4->framebuffer.width; uint32_t height = vc4->framebuffer.height; uint32_t xtiles = align(width, 64) / 64; uint32_t ytiles = align(height, 64) / 64; #if 0 fprintf(stderr, "RCL: resolve 0x%x clear 0x%x resolve uncleared 0x%x\n", vc4->resolve, vc4->cleared, resolve_uncleared); #endif cl_u8(&vc4->rcl, VC4_PACKET_CLEAR_COLORS); cl_u32(&vc4->rcl, vc4->clear_color[0]); cl_u32(&vc4->rcl, vc4->clear_color[1]); cl_u32(&vc4->rcl, vc4->clear_depth); cl_u8(&vc4->rcl, 0); cl_start_reloc(&vc4->rcl, 1); cl_u8(&vc4->rcl, VC4_PACKET_TILE_RENDERING_MODE_CONFIG); cl_reloc(vc4, &vc4->rcl, ctex->bo, csurf->offset); cl_u16(&vc4->rcl, width); cl_u16(&vc4->rcl, height); cl_u16(&vc4->rcl, ((csurf->tiling << VC4_RENDER_CONFIG_MEMORY_FORMAT_SHIFT) | (vc4_rt_format_is_565(csurf->base.format) ? VC4_RENDER_CONFIG_FORMAT_BGR565 : VC4_RENDER_CONFIG_FORMAT_RGBA8888) | VC4_RENDER_CONFIG_EARLY_Z_COVERAGE_DISABLE)); /* The tile buffer normally gets cleared when the previous tile is * stored. If the clear values changed between frames, then the tile * buffer has stale clear values in it, so we have to do a store in * None mode (no writes) so that we trigger the tile buffer clear. */ if (vc4->cleared & PIPE_CLEAR_COLOR0) { cl_u8(&vc4->rcl, VC4_PACKET_TILE_COORDINATES); cl_u8(&vc4->rcl, 0); cl_u8(&vc4->rcl, 0); cl_u8(&vc4->rcl, VC4_PACKET_STORE_TILE_BUFFER_GENERAL); cl_u16(&vc4->rcl, VC4_LOADSTORE_TILE_BUFFER_NONE); cl_u32(&vc4->rcl, 0); /* no address, since we're in None mode */ } for (int y = 0; y < ytiles; y++) { for (int x = 0; x < xtiles; x++) { bool end_of_frame = (x == xtiles - 1 && y == ytiles - 1); /* Note that the load doesn't actually occur until the * tile coords packet is processed. */ if (resolve_uncleared & PIPE_CLEAR_COLOR) { cl_start_reloc(&vc4->rcl, 1); cl_u8(&vc4->rcl, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL); cl_u8(&vc4->rcl, VC4_LOADSTORE_TILE_BUFFER_COLOR | (csurf->tiling << VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT)); cl_u8(&vc4->rcl, vc4_rt_format_is_565(csurf->base.format) ? VC4_LOADSTORE_TILE_BUFFER_BGR565 : VC4_LOADSTORE_TILE_BUFFER_RGBA8888); cl_reloc(vc4, &vc4->rcl, ctex->bo, csurf->offset); } cl_u8(&vc4->rcl, VC4_PACKET_TILE_COORDINATES); cl_u8(&vc4->rcl, x); cl_u8(&vc4->rcl, y); cl_start_reloc(&vc4->rcl, 1); cl_u8(&vc4->rcl, VC4_PACKET_BRANCH_TO_SUB_LIST); cl_reloc(vc4, &vc4->rcl, vc4->tile_alloc, (y * xtiles + x) * 32); if (vc4->resolve & PIPE_CLEAR_COLOR0) { if (end_of_frame) { cl_u8(&vc4->rcl, VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF); } else { cl_u8(&vc4->rcl, VC4_PACKET_STORE_MS_TILE_BUFFER); } } else { assert(!"unfinished: Need to end the frame\n"); } } } }