static void
vc4_resource_transfer_unmap(struct pipe_context *pctx,
                            struct pipe_transfer *ptrans)
{
    struct vc4_context *vc4 = vc4_context(pctx);
    struct vc4_transfer *trans = vc4_transfer(ptrans);
    struct pipe_resource *prsc = ptrans->resource;
    struct vc4_resource *rsc = vc4_resource(prsc);
    struct vc4_resource_slice *slice = &rsc->slices[ptrans->level];

    if (trans->map) {
        if (ptrans->usage & PIPE_TRANSFER_WRITE) {
            vc4_store_tiled_image(rsc->bo->map + slice->offset +
                                  ptrans->box.z * rsc->cube_map_stride,
                                  slice->stride,
                                  trans->map, ptrans->stride,
                                  slice->tiling, rsc->cpp,
                                  &ptrans->box);
        }
        free(trans->map);
    }

    pipe_resource_reference(&ptrans->resource, NULL);
    util_slab_free(&vc4->transfer_pool, ptrans);
}
static void
vc4_submit_setup_rcl_render_config_surface(struct vc4_context *vc4,
                                           struct drm_vc4_submit_rcl_surface *submit_surf,
                                           struct pipe_surface *psurf)
{
        struct vc4_surface *surf = vc4_surface(psurf);

        if (!surf) {
                submit_surf->hindex = ~0;
                return;
        }

        struct vc4_resource *rsc = vc4_resource(psurf->texture);
        submit_surf->hindex = vc4_gem_hindex(vc4, rsc->bo);
        submit_surf->offset = surf->offset;

        if (psurf->texture->nr_samples == 0) {
                submit_surf->bits =
                        VC4_SET_FIELD(vc4_rt_format_is_565(surf->base.format) ?
                                      VC4_RENDER_CONFIG_FORMAT_BGR565 :
                                      VC4_RENDER_CONFIG_FORMAT_RGBA8888,
                                      VC4_RENDER_CONFIG_FORMAT) |
                        VC4_SET_FIELD(surf->tiling,
                                      VC4_RENDER_CONFIG_MEMORY_FORMAT);
        }

        rsc->writes++;
}
static struct pipe_surface *
vc4_create_surface(struct pipe_context *pctx,
                   struct pipe_resource *ptex,
                   const struct pipe_surface *surf_tmpl)
{
    struct vc4_surface *surface = CALLOC_STRUCT(vc4_surface);
    struct vc4_resource *rsc = vc4_resource(ptex);

    if (!surface)
        return NULL;

    assert(surf_tmpl->u.tex.first_layer == surf_tmpl->u.tex.last_layer);

    struct pipe_surface *psurf = &surface->base;
    unsigned level = surf_tmpl->u.tex.level;

    pipe_reference_init(&psurf->reference, 1);
    pipe_resource_reference(&psurf->texture, ptex);

    psurf->context = pctx;
    psurf->format = surf_tmpl->format;
    psurf->width = u_minify(ptex->width0, level);
    psurf->height = u_minify(ptex->height0, level);
    psurf->u.tex.level = level;
    psurf->u.tex.first_layer = surf_tmpl->u.tex.first_layer;
    psurf->u.tex.last_layer = surf_tmpl->u.tex.last_layer;
    surface->offset = rsc->slices[level].offset;
    surface->tiling = rsc->slices[level].tiling;

    return &surface->base;
}
static void
vc4_set_index_buffer(struct pipe_context *pctx,
                     const struct pipe_index_buffer *ib)
{
        struct vc4_context *vc4 = vc4_context(pctx);

        if (ib) {
                assert(!ib->user_buffer);

                if (ib->index_size == 4) {
                        struct pipe_resource tmpl = *ib->buffer;
                        assert(tmpl.format == PIPE_FORMAT_R8_UNORM);
                        assert(tmpl.height0 == 1);
                        tmpl.width0 = (tmpl.width0 - ib->offset) / 2;
                        struct pipe_resource *pshadow =
                                vc4_resource_create(&vc4->screen->base, &tmpl);
                        struct vc4_resource *shadow = vc4_resource(pshadow);
                        pipe_resource_reference(&shadow->shadow_parent, ib->buffer);

                        pipe_resource_reference(&vc4->indexbuf.buffer, NULL);
                        vc4->indexbuf.buffer = pshadow;
                        vc4->indexbuf.index_size = 2;
                        vc4->indexbuf.offset = 0;
                } else {
                        pipe_resource_reference(&vc4->indexbuf.buffer, ib->buffer);
                        vc4->indexbuf.index_size = ib->index_size;
                        vc4->indexbuf.offset = ib->offset;
                }
        } else {
                pipe_resource_reference(&vc4->indexbuf.buffer, NULL);
        }

        vc4->dirty |= VC4_DIRTY_INDEXBUF;
}
static void
vc4_resource_destroy(struct pipe_screen *pscreen,
                     struct pipe_resource *prsc)
{
    struct vc4_resource *rsc = vc4_resource(prsc);
    pipe_resource_reference(&rsc->shadow_parent, NULL);
    vc4_bo_unreference(&rsc->bo);
    free(rsc);
}
static void
vc4_set_framebuffer_state(struct pipe_context *pctx,
                          const struct pipe_framebuffer_state *framebuffer)
{
        struct vc4_context *vc4 = vc4_context(pctx);
        struct pipe_framebuffer_state *cso = &vc4->framebuffer;
        unsigned i;

        vc4_flush(pctx);

        for (i = 0; i < framebuffer->nr_cbufs; i++)
                pipe_surface_reference(&cso->cbufs[i], framebuffer->cbufs[i]);
        for (; i < vc4->framebuffer.nr_cbufs; i++)
                pipe_surface_reference(&cso->cbufs[i], NULL);

        cso->nr_cbufs = framebuffer->nr_cbufs;

        pipe_surface_reference(&cso->zsbuf, framebuffer->zsbuf);

        cso->width = framebuffer->width;
        cso->height = framebuffer->height;

        /* Nonzero texture mipmap levels are laid out as if they were in
         * power-of-two-sized spaces.  The renderbuffer config infers its
         * stride from the width parameter, so we need to configure our
         * framebuffer.  Note that if the z/color buffers were mismatched
         * sizes, we wouldn't be able to do this.
         */
        if (cso->cbufs[0] && cso->cbufs[0]->u.tex.level) {
                struct vc4_resource *rsc =
                        vc4_resource(cso->cbufs[0]->texture);
                cso->width =
                        (rsc->slices[cso->cbufs[0]->u.tex.level].stride /
                         rsc->cpp);
        } else if (cso->zsbuf && cso->zsbuf->u.tex.level){
                struct vc4_resource *rsc =
                        vc4_resource(cso->zsbuf->texture);
                cso->width =
                        (rsc->slices[cso->zsbuf->u.tex.level].stride /
                         rsc->cpp);
        }

        vc4->dirty |= VC4_DIRTY_FRAMEBUFFER;
}
static boolean
vc4_resource_get_handle(struct pipe_screen *pscreen,
                        struct pipe_resource *prsc,
                        struct winsys_handle *handle)
{
    struct vc4_resource *rsc = vc4_resource(prsc);

    return vc4_screen_bo_get_handle(pscreen, rsc->bo, rsc->slices[0].stride,
                                    handle);
}
Exemple #8
0
/**
 * Flushes the current command lists if they reference the given BO.
 *
 * This helps avoid flushing the command buffers when unnecessary.
 */
bool
vc4_cl_references_bo(struct pipe_context *pctx, struct vc4_bo *bo,
                     bool include_reads)
{
    struct vc4_context *vc4 = vc4_context(pctx);

    if (!vc4->needs_flush)
        return false;

    /* Walk all the referenced BOs in the drawing command list to see if
     * they match.
     */
    if (include_reads) {
        struct vc4_bo **referenced_bos = vc4->bo_pointers.base;
        for (int i = 0; i < cl_offset(&vc4->bo_handles) / 4; i++) {
            if (referenced_bos[i] == bo) {
                return true;
            }
        }
    }

    /* Also check for the Z/color buffers, since the references to those
     * are only added immediately before submit.
     */
    struct vc4_surface *csurf = vc4_surface(vc4->framebuffer.cbufs[0]);
    if (csurf) {
        struct vc4_resource *ctex = vc4_resource(csurf->base.texture);
        if (ctex->bo == bo) {
            return true;
        }
    }

    struct vc4_surface *zsurf = vc4_surface(vc4->framebuffer.zsbuf);
    if (zsurf) {
        struct vc4_resource *ztex =
            vc4_resource(zsurf->base.texture);
        if (ztex->bo == bo) {
            return true;
        }
    }

    return false;
}
static enum vc4_texture_data_type
get_resource_texture_format(struct pipe_resource *prsc)
{
    struct vc4_resource *rsc = vc4_resource(prsc);
    uint8_t format = vc4_get_tex_format(prsc->format);

    if (!rsc->tiled) {
        assert(format == VC4_TEXTURE_TYPE_RGBA8888);
        return VC4_TEXTURE_TYPE_RGBA32R;
    }

    return format;
}
Exemple #10
0
static void
vc4_update_shadow_textures(struct pipe_context *pctx,
                           struct vc4_texture_stateobj *stage_tex)
{
        for (int i = 0; i < stage_tex->num_textures; i++) {
                struct pipe_sampler_view *view = stage_tex->textures[i];
                if (!view)
                        continue;
                struct vc4_resource *rsc = vc4_resource(view->texture);
                if (rsc->shadow_parent)
                        vc4_update_shadow_baselevel_texture(pctx, view);
        }
}
static struct pipe_sampler_view *
vc4_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *prsc,
                        const struct pipe_sampler_view *cso)
{
        struct pipe_sampler_view *so = malloc(sizeof(*so));

        if (!so)
                return NULL;

        *so = *cso;

        pipe_reference(NULL, &prsc->reference);

        /* There is no hardware level clamping, and the start address of a
         * texture may be misaligned, so in that case we have to copy to a
         * temporary.
         */
        if (so->u.tex.first_level) {
                struct vc4_resource *shadow_parent = vc4_resource(prsc);
                struct pipe_resource tmpl = shadow_parent->base.b;
                struct vc4_resource *clone;

                tmpl.width0 = u_minify(tmpl.width0, so->u.tex.first_level);
                tmpl.height0 = u_minify(tmpl.height0, so->u.tex.first_level);
                tmpl.last_level = so->u.tex.last_level - so->u.tex.first_level;

                prsc = vc4_resource_create(pctx->screen, &tmpl);
                clone = vc4_resource(prsc);
                clone->shadow_parent = &shadow_parent->base.b;
                /* Flag it as needing update of the contents from the parent. */
                clone->writes = shadow_parent->writes - 1;
        }
        so->texture = prsc;
        so->reference.count = 1;
        so->context = pctx;

        return so;
}
Exemple #12
0
static void
vc4_predraw_check_textures(struct pipe_context *pctx,
                           struct vc4_texture_stateobj *stage_tex)
{
        struct vc4_context *vc4 = vc4_context(pctx);

        for (int i = 0; i < stage_tex->num_textures; i++) {
                struct pipe_sampler_view *view = stage_tex->textures[i];
                if (!view)
                        continue;
                struct vc4_resource *rsc = vc4_resource(view->texture);
                if (rsc->shadow_parent)
                        vc4_update_shadow_baselevel_texture(pctx, view);

                vc4_flush_jobs_writing_resource(vc4, view->texture);
        }
}
static void
vc4_submit_setup_rcl_msaa_surface(struct vc4_context *vc4,
                                  struct drm_vc4_submit_rcl_surface *submit_surf,
                                  struct pipe_surface *psurf)
{
        struct vc4_surface *surf = vc4_surface(psurf);

        if (!surf) {
                submit_surf->hindex = ~0;
                return;
        }

        struct vc4_resource *rsc = vc4_resource(psurf->texture);
        submit_surf->hindex = vc4_gem_hindex(vc4, rsc->bo);
        submit_surf->offset = surf->offset;
        submit_surf->bits = 0;
        rsc->writes++;
}
static void
vc4_submit_setup_rcl_surface(struct vc4_context *vc4,
                             struct drm_vc4_submit_rcl_surface *submit_surf,
                             struct pipe_surface *psurf,
                             bool is_depth, bool is_write)
{
        struct vc4_surface *surf = vc4_surface(psurf);

        if (!surf) {
                submit_surf->hindex = ~0;
                return;
        }

        struct vc4_resource *rsc = vc4_resource(psurf->texture);
        submit_surf->hindex = vc4_gem_hindex(vc4, rsc->bo);
        submit_surf->offset = surf->offset;

        if (psurf->texture->nr_samples == 0) {
                if (is_depth) {
                        submit_surf->bits =
                                VC4_SET_FIELD(VC4_LOADSTORE_TILE_BUFFER_ZS,
                                              VC4_LOADSTORE_TILE_BUFFER_BUFFER);

                } else {
                        submit_surf->bits =
                                VC4_SET_FIELD(VC4_LOADSTORE_TILE_BUFFER_COLOR,
                                              VC4_LOADSTORE_TILE_BUFFER_BUFFER) |
                                VC4_SET_FIELD(vc4_rt_format_is_565(psurf->format) ?
                                              VC4_LOADSTORE_TILE_BUFFER_BGR565 :
                                              VC4_LOADSTORE_TILE_BUFFER_RGBA8888,
                                              VC4_LOADSTORE_TILE_BUFFER_FORMAT);
                }
                submit_surf->bits |=
                        VC4_SET_FIELD(surf->tiling,
                                      VC4_LOADSTORE_TILE_BUFFER_TILING);
        } else {
                assert(!is_write);
                submit_surf->flags |= VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES;
        }

        if (is_write)
                rsc->writes++;
}
static void *
vc4_resource_transfer_map(struct pipe_context *pctx,
                          struct pipe_resource *prsc,
                          unsigned level, unsigned usage,
                          const struct pipe_box *box,
                          struct pipe_transfer **pptrans)
{
    struct vc4_context *vc4 = vc4_context(pctx);
    struct vc4_resource *rsc = vc4_resource(prsc);
    struct vc4_resource_slice *slice = &rsc->slices[level];
    struct vc4_transfer *trans;
    struct pipe_transfer *ptrans;
    enum pipe_format format = prsc->format;
    char *buf;

    if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) {
        vc4_resource_bo_alloc(rsc);
    } else if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
        if (vc4_cl_references_bo(pctx, rsc->bo)) {
            if ((usage & PIPE_TRANSFER_DISCARD_RANGE) &&
                    prsc->last_level == 0 &&
                    prsc->width0 == box->width &&
                    prsc->height0 == box->height &&
                    prsc->depth0 == box->depth) {
                vc4_resource_bo_alloc(rsc);
            } else {
                vc4_flush(pctx);
            }
        }
    }

    if (usage & PIPE_TRANSFER_WRITE)
        rsc->writes++;

    trans = util_slab_alloc(&vc4->transfer_pool);
    if (!trans)
        return NULL;

    /* XXX: Handle DONTBLOCK, DISCARD_RANGE, PERSISTENT, COHERENT. */

    /* util_slab_alloc() doesn't zero: */
    memset(trans, 0, sizeof(*trans));
    ptrans = &trans->base;

    pipe_resource_reference(&ptrans->resource, prsc);
    ptrans->level = level;
    ptrans->usage = usage;
    ptrans->box = *box;

    /* Note that the current kernel implementation is synchronous, so no
     * need to do syncing stuff here yet.
     */

    buf = vc4_bo_map(rsc->bo);
    if (!buf) {
        fprintf(stderr, "Failed to map bo\n");
        goto fail;
    }

    *pptrans = ptrans;

    if (rsc->tiled) {
        uint32_t utile_w = vc4_utile_width(rsc->cpp);
        uint32_t utile_h = vc4_utile_height(rsc->cpp);

        /* No direct mappings of tiled, since we need to manually
         * tile/untile.
         */
        if (usage & PIPE_TRANSFER_MAP_DIRECTLY)
            return NULL;

        /* We need to align the box to utile boundaries, since that's
         * what load/store operate on.
         */
        uint32_t box_start_x = ptrans->box.x & (utile_w - 1);
        uint32_t box_start_y = ptrans->box.y & (utile_h - 1);
        ptrans->box.width += box_start_x;
        ptrans->box.x -= box_start_x;
        ptrans->box.height += box_start_y;
        ptrans->box.y -= box_start_y;
        ptrans->box.width = align(ptrans->box.width, utile_w);
        ptrans->box.height = align(ptrans->box.height, utile_h);

        ptrans->stride = ptrans->box.width * rsc->cpp;
        ptrans->layer_stride = ptrans->stride;

        trans->map = malloc(ptrans->stride * ptrans->box.height);
        if (usage & PIPE_TRANSFER_READ) {
            vc4_load_tiled_image(trans->map, ptrans->stride,
                                 buf + slice->offset +
                                 box->z * rsc->cube_map_stride,
                                 slice->stride,
                                 slice->tiling, rsc->cpp,
                                 &ptrans->box);
        }
        return (trans->map +
                box_start_x * rsc->cpp +
                box_start_y * ptrans->stride);
    } else {
        ptrans->stride = slice->stride;
        ptrans->layer_stride = ptrans->stride;

        return buf + slice->offset +
               box->y / util_format_get_blockheight(format) * ptrans->stride +
               box->x / util_format_get_blockwidth(format) * rsc->cpp +
               box->z * rsc->cube_map_stride;
    }


fail:
    vc4_resource_transfer_unmap(pctx, ptrans);
    return NULL;
}
Exemple #16
0
static void
vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
{
        struct vc4_context *vc4 = vc4_context(pctx);

        if (info->mode >= PIPE_PRIM_QUADS) {
                util_primconvert_save_index_buffer(vc4->primconvert, &vc4->indexbuf);
                util_primconvert_save_rasterizer_state(vc4->primconvert, &vc4->rasterizer->base);
                util_primconvert_draw_vbo(vc4->primconvert, info);
                perf_debug("Fallback conversion for %d %s vertices\n",
                           info->count, u_prim_name(info->mode));
                return;
        }

        /* Before setting up the draw, do any fixup blits necessary. */
        vc4_predraw_check_textures(pctx, &vc4->verttex);
        vc4_predraw_check_textures(pctx, &vc4->fragtex);

        vc4_hw_2116_workaround(pctx, info->count);

        struct vc4_job *job = vc4_get_job_for_fbo(vc4);

        vc4_get_draw_cl_space(job, info->count);

        if (vc4->prim_mode != info->mode) {
                vc4->prim_mode = info->mode;
                vc4->dirty |= VC4_DIRTY_PRIM_MODE;
        }

        vc4_start_draw(vc4);
        if (!vc4_update_compiled_shaders(vc4, info->mode)) {
                debug_warn_once("shader compile failed, skipping draw call.\n");
                return;
        }

        vc4_emit_state(pctx);

        if ((vc4->dirty & (VC4_DIRTY_VTXBUF |
                           VC4_DIRTY_VTXSTATE |
                           VC4_DIRTY_PRIM_MODE |
                           VC4_DIRTY_RASTERIZER |
                           VC4_DIRTY_COMPILED_CS |
                           VC4_DIRTY_COMPILED_VS |
                           VC4_DIRTY_COMPILED_FS |
                           vc4->prog.cs->uniform_dirty_bits |
                           vc4->prog.vs->uniform_dirty_bits |
                           vc4->prog.fs->uniform_dirty_bits)) ||
            vc4->last_index_bias != info->index_bias) {
                vc4_emit_gl_shader_state(vc4, info, 0);
        }

        vc4->dirty = 0;

        /* Note that the primitive type fields match with OpenGL/gallium
         * definitions, up to but not including QUADS.
         */
        struct vc4_cl_out *bcl = cl_start(&job->bcl);
        if (info->indexed) {
                uint32_t offset = vc4->indexbuf.offset;
                uint32_t index_size = vc4->indexbuf.index_size;
                struct pipe_resource *prsc;
                if (vc4->indexbuf.index_size == 4) {
                        prsc = vc4_get_shadow_index_buffer(pctx, &vc4->indexbuf,
                                                           info->count, &offset);
                        index_size = 2;
                } else {
                        if (vc4->indexbuf.user_buffer) {
                                prsc = NULL;
                                u_upload_data(vc4->uploader, 0,
                                              info->count * index_size, 4,
                                              vc4->indexbuf.user_buffer,
                                              &offset, &prsc);
                        } else {
                                prsc = vc4->indexbuf.buffer;
                        }
                }
                struct vc4_resource *rsc = vc4_resource(prsc);

                cl_start_reloc(&job->bcl, &bcl, 1);
                cl_u8(&bcl, VC4_PACKET_GL_INDEXED_PRIMITIVE);
                cl_u8(&bcl,
                      info->mode |
                      (index_size == 2 ?
                       VC4_INDEX_BUFFER_U16:
                       VC4_INDEX_BUFFER_U8));
                cl_u32(&bcl, info->count);
                cl_reloc(job, &job->bcl, &bcl, rsc->bo, offset);
                cl_u32(&bcl, vc4->max_index);
                job->draw_calls_queued++;

                if (vc4->indexbuf.index_size == 4 || vc4->indexbuf.user_buffer)
                        pipe_resource_reference(&prsc, NULL);
        } else {
                uint32_t count = info->count;
                uint32_t start = info->start;
                uint32_t extra_index_bias = 0;

                while (count) {
                        uint32_t this_count = count;
                        uint32_t step = count;
                        static const uint32_t max_verts = 65535;

                        /* GFXH-515 / SW-5891: The binner emits 16 bit indices
                         * for drawarrays, which means that if start + count >
                         * 64k it would truncate the top bits.  Work around
                         * this by emitting a limited number of primitives at
                         * a time and reemitting the shader state pointing
                         * farther down the vertex attribute arrays.
                         *
                         * To do this properly for line loops or trifans, we'd
                         * need to make a new VB containing the first vertex
                         * plus whatever remainder.
                         */
                        if (extra_index_bias) {
                                cl_end(&job->bcl, bcl);
                                vc4_emit_gl_shader_state(vc4, info,
                                                         extra_index_bias);
                                bcl = cl_start(&job->bcl);
                        }

                        if (start + count > max_verts) {
                                switch (info->mode) {
                                case PIPE_PRIM_POINTS:
                                        this_count = step = max_verts;
                                        break;
                                case PIPE_PRIM_LINES:
                                        this_count = step = max_verts - (max_verts % 2);
                                        break;
                                case PIPE_PRIM_LINE_STRIP:
                                        this_count = max_verts;
                                        step = max_verts - 1;
                                        break;
                                case PIPE_PRIM_LINE_LOOP:
                                        this_count = max_verts;
                                        step = max_verts - 1;
                                        debug_warn_once("unhandled line loop "
                                                        "looping behavior with "
                                                        ">65535 verts\n");
                                        break;
                                case PIPE_PRIM_TRIANGLES:
                                        this_count = step = max_verts - (max_verts % 3);
                                        break;
                                case PIPE_PRIM_TRIANGLE_STRIP:
                                        this_count = max_verts;
                                        step = max_verts - 2;
                                        break;
                                default:
                                        debug_warn_once("unhandled primitive "
                                                        "max vert count, truncating\n");
                                        this_count = step = max_verts;
                                }
                        }

                        cl_u8(&bcl, VC4_PACKET_GL_ARRAY_PRIMITIVE);
                        cl_u8(&bcl, info->mode);
                        cl_u32(&bcl, this_count);
                        cl_u32(&bcl, start);
                        job->draw_calls_queued++;

                        count -= step;
                        extra_index_bias += start + step;
                        start = 0;
                }
        }
        cl_end(&job->bcl, bcl);

        /* We shouldn't have tripped the HW_2116 bug with the GFXH-515
         * workaround.
         */
        assert(job->draw_calls_queued <= VC4_HW_2116_COUNT);

        if (vc4->zsa && vc4->framebuffer.zsbuf) {
                struct vc4_resource *rsc =
                        vc4_resource(vc4->framebuffer.zsbuf->texture);

                if (vc4->zsa->base.depth.enabled) {
                        job->resolve |= PIPE_CLEAR_DEPTH;
                        rsc->initialized_buffers = PIPE_CLEAR_DEPTH;
                }

                if (vc4->zsa->base.stencil[0].enabled) {
                        job->resolve |= PIPE_CLEAR_STENCIL;
                        rsc->initialized_buffers |= PIPE_CLEAR_STENCIL;
                }
        }

        job->resolve |= PIPE_CLEAR_COLOR0;

        if (vc4_debug & VC4_DEBUG_ALWAYS_FLUSH)
                vc4_flush(pctx);
}
Exemple #17
0
static void
vc4_setup_rcl(struct vc4_context *vc4)
{
        struct vc4_surface *csurf = vc4_surface(vc4->framebuffer.cbufs[0]);
        struct vc4_resource *ctex = csurf ? vc4_resource(csurf->base.texture) : NULL;
        struct vc4_surface *zsurf = vc4_surface(vc4->framebuffer.zsbuf);
        struct vc4_resource *ztex = zsurf ? vc4_resource(zsurf->base.texture) : NULL;

        if (!csurf)
                vc4->resolve &= ~PIPE_CLEAR_COLOR0;
        if (!zsurf)
                vc4->resolve &= ~(PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL);
        uint32_t resolve_uncleared = vc4->resolve & ~vc4->cleared;
        uint32_t width = vc4->framebuffer.width;
        uint32_t height = vc4->framebuffer.height;
        uint32_t stride_in_tiles = align(width, 64) / 64;

        assert(vc4->draw_min_x != ~0 && vc4->draw_min_y != ~0);
        uint32_t min_x_tile = vc4->draw_min_x / 64;
        uint32_t min_y_tile = vc4->draw_min_y / 64;
        uint32_t max_x_tile = (vc4->draw_max_x - 1) / 64;
        uint32_t max_y_tile = (vc4->draw_max_y - 1) / 64;
        uint32_t xtiles = max_x_tile - min_x_tile + 1;
        uint32_t ytiles = max_y_tile - min_y_tile + 1;

#if 0
        fprintf(stderr, "RCL: resolve 0x%x clear 0x%x resolve uncleared 0x%x\n",
                vc4->resolve,
                vc4->cleared,
                resolve_uncleared);
#endif

        uint32_t reloc_size = 9;
        uint32_t clear_size = 14;
        uint32_t config_size = 11 + reloc_size;
        uint32_t loadstore_size = 7 + reloc_size;
        uint32_t tilecoords_size = 3;
        uint32_t branch_size = 5 + reloc_size;
        uint32_t color_store_size = 1;
        uint32_t semaphore_size = 1;
        cl_ensure_space(&vc4->rcl,
                        clear_size +
                        config_size +
                        loadstore_size +
                        semaphore_size +
                        xtiles * ytiles * (loadstore_size * 4 +
                                           tilecoords_size * 3 +
                                           branch_size +
                                           color_store_size));

        if (vc4->cleared) {
                cl_u8(&vc4->rcl, VC4_PACKET_CLEAR_COLORS);
                cl_u32(&vc4->rcl, vc4->clear_color[0]);
                cl_u32(&vc4->rcl, vc4->clear_color[1]);
                cl_u32(&vc4->rcl, vc4->clear_depth);
                cl_u8(&vc4->rcl, vc4->clear_stencil);
        }

        /* The rendering mode config determines the pointer that's used for
         * VC4_PACKET_STORE_MS_TILE_BUFFER address computations.  The kernel
         * could handle a no-relocation rendering mode config and deny those
         * packets, but instead we just tell the kernel we're doing our color
         * rendering to the Z buffer, and just don't emit any of those
         * packets.
         */
        struct vc4_surface *render_surf = csurf ? csurf : zsurf;
        struct vc4_resource *render_tex = vc4_resource(render_surf->base.texture);
        cl_start_reloc(&vc4->rcl, 1);
        cl_u8(&vc4->rcl, VC4_PACKET_TILE_RENDERING_MODE_CONFIG);
        cl_reloc(vc4, &vc4->rcl, render_tex->bo, render_surf->offset);
        cl_u16(&vc4->rcl, width);
        cl_u16(&vc4->rcl, height);
        cl_u16(&vc4->rcl, ((render_surf->tiling <<
                            VC4_RENDER_CONFIG_MEMORY_FORMAT_SHIFT) |
                           (vc4_rt_format_is_565(render_surf->base.format) ?
                            VC4_RENDER_CONFIG_FORMAT_BGR565 :
                            VC4_RENDER_CONFIG_FORMAT_RGBA8888)));

        /* The tile buffer normally gets cleared when the previous tile is
         * stored.  If the clear values changed between frames, then the tile
         * buffer has stale clear values in it, so we have to do a store in
         * None mode (no writes) so that we trigger the tile buffer clear.
         *
         * Excess clearing is only a performance cost, since per-tile contents
         * will be loaded/stored in the loop below.
         */
        if (vc4->cleared & (PIPE_CLEAR_COLOR0 |
                            PIPE_CLEAR_DEPTH |
                            PIPE_CLEAR_STENCIL)) {
                cl_u8(&vc4->rcl, VC4_PACKET_TILE_COORDINATES);
                cl_u8(&vc4->rcl, 0);
                cl_u8(&vc4->rcl, 0);

                cl_u8(&vc4->rcl, VC4_PACKET_STORE_TILE_BUFFER_GENERAL);
                cl_u16(&vc4->rcl, VC4_LOADSTORE_TILE_BUFFER_NONE);
                cl_u32(&vc4->rcl, 0); /* no address, since we're in None mode */
        }

        uint32_t color_hindex = ctex ? vc4_gem_hindex(vc4, ctex->bo) : 0;
        uint32_t depth_hindex = ztex ? vc4_gem_hindex(vc4, ztex->bo) : 0;
        uint32_t tile_alloc_hindex = vc4_gem_hindex(vc4, vc4->tile_alloc);

        for (int y = min_y_tile; y <= max_y_tile; y++) {
                for (int x = min_x_tile; x <= max_x_tile; x++) {
                        bool end_of_frame = (x == max_x_tile &&
                                             y == max_y_tile);
                        bool coords_emitted = false;

                        /* Note that the load doesn't actually occur until the
                         * tile coords packet is processed, and only one load
                         * may be outstanding at a time.
                         */
                        if (resolve_uncleared & PIPE_CLEAR_COLOR) {
                                vc4_store_before_load(vc4, &coords_emitted);

                                cl_start_reloc(&vc4->rcl, 1);
                                cl_u8(&vc4->rcl, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL);
                                cl_u8(&vc4->rcl,
                                      VC4_LOADSTORE_TILE_BUFFER_COLOR |
                                      (csurf->tiling <<
                                       VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT));
                                cl_u8(&vc4->rcl,
                                      vc4_rt_format_is_565(csurf->base.format) ?
                                      VC4_LOADSTORE_TILE_BUFFER_BGR565 :
                                      VC4_LOADSTORE_TILE_BUFFER_RGBA8888);
                                cl_reloc_hindex(&vc4->rcl, color_hindex,
                                                csurf->offset);

                                vc4_tile_coordinates(vc4, x, y, &coords_emitted);
                        }

                        if (resolve_uncleared & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
                                vc4_store_before_load(vc4, &coords_emitted);

                                cl_start_reloc(&vc4->rcl, 1);
                                cl_u8(&vc4->rcl, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL);
                                cl_u8(&vc4->rcl,
                                      VC4_LOADSTORE_TILE_BUFFER_ZS |
                                      (zsurf->tiling <<
                                       VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT));
                                cl_u8(&vc4->rcl, 0);
                                cl_reloc_hindex(&vc4->rcl, depth_hindex,
                                                zsurf->offset);

                                vc4_tile_coordinates(vc4, x, y, &coords_emitted);
                        }

                        /* Clipping depends on tile coordinates having been
                         * emitted, so make sure it's happened even if
                         * everything was cleared to start.
                         */
                        vc4_tile_coordinates(vc4, x, y, &coords_emitted);

                        /* Wait for the binner before jumping to the first
                         * tile's lists.
                         */
                        if (x == min_x_tile && y == min_y_tile)
                                cl_u8(&vc4->rcl, VC4_PACKET_WAIT_ON_SEMAPHORE);

                        cl_start_reloc(&vc4->rcl, 1);
                        cl_u8(&vc4->rcl, VC4_PACKET_BRANCH_TO_SUB_LIST);
                        cl_reloc_hindex(&vc4->rcl, tile_alloc_hindex,
                                        (y * stride_in_tiles + x) * 32);

                        if (vc4->resolve & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
                                vc4_tile_coordinates(vc4, x, y, &coords_emitted);

                                cl_start_reloc(&vc4->rcl, 1);
                                cl_u8(&vc4->rcl, VC4_PACKET_STORE_TILE_BUFFER_GENERAL);
                                cl_u8(&vc4->rcl,
                                      VC4_LOADSTORE_TILE_BUFFER_ZS |
                                      (zsurf->tiling <<
                                       VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT));
                                cl_u8(&vc4->rcl,
                                      VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR);
                                cl_reloc_hindex(&vc4->rcl, depth_hindex,
                                                zsurf->offset |
                                                ((end_of_frame &&
                                                  !(vc4->resolve & PIPE_CLEAR_COLOR0)) ?
                                                 VC4_LOADSTORE_TILE_BUFFER_EOF : 0));

                                coords_emitted = false;
                        }

                        if (vc4->resolve & PIPE_CLEAR_COLOR0) {
                                vc4_tile_coordinates(vc4, x, y, &coords_emitted);
                                if (end_of_frame) {
                                        cl_u8(&vc4->rcl,
                                              VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF);
                                } else {
                                        cl_u8(&vc4->rcl,
                                              VC4_PACKET_STORE_MS_TILE_BUFFER);
                                }

                                coords_emitted = false;
                        }

                        /* One of the bits needs to have been set that would
                         * have triggered an EOF.
                         */
                        assert(vc4->resolve & (PIPE_CLEAR_COLOR0 |
                                               PIPE_CLEAR_DEPTH |
                                               PIPE_CLEAR_STENCIL));
                        /* Any coords emitted must also have been consumed by
                         * a store.
                         */
                        assert(!coords_emitted);
                }
        }

        if (vc4->resolve & PIPE_CLEAR_COLOR0)
                ctex->writes++;

        if (vc4->resolve & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL))
                ztex->writes++;
}
Exemple #18
0
static void
vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
{
        struct vc4_context *vc4 = vc4_context(pctx);

        if (info->mode >= PIPE_PRIM_QUADS) {
                util_primconvert_save_index_buffer(vc4->primconvert, &vc4->indexbuf);
                util_primconvert_save_rasterizer_state(vc4->primconvert, &vc4->rasterizer->base);
                util_primconvert_draw_vbo(vc4->primconvert, info);
                perf_debug("Fallback conversion for %d %s vertices\n",
                           info->count, u_prim_name(info->mode));
                return;
        }

        /* Before setting up the draw, do any fixup blits necessary. */
        vc4_update_shadow_textures(pctx, &vc4->verttex);
        vc4_update_shadow_textures(pctx, &vc4->fragtex);

        vc4_hw_2116_workaround(pctx);

        vc4_get_draw_cl_space(vc4);

        if (vc4->prim_mode != info->mode) {
                vc4->prim_mode = info->mode;
                vc4->dirty |= VC4_DIRTY_PRIM_MODE;
        }

        vc4_start_draw(vc4);
        vc4_update_compiled_shaders(vc4, info->mode);

        vc4_emit_state(pctx);

        if ((vc4->dirty & (VC4_DIRTY_VTXBUF |
                           VC4_DIRTY_VTXSTATE |
                           VC4_DIRTY_PRIM_MODE |
                           VC4_DIRTY_RASTERIZER |
                           VC4_DIRTY_COMPILED_CS |
                           VC4_DIRTY_COMPILED_VS |
                           VC4_DIRTY_COMPILED_FS |
                           vc4->prog.cs->uniform_dirty_bits |
                           vc4->prog.vs->uniform_dirty_bits |
                           vc4->prog.fs->uniform_dirty_bits)) ||
            vc4->last_index_bias != info->index_bias) {
                vc4_emit_gl_shader_state(vc4, info);
        }

        vc4->dirty = 0;

        /* Note that the primitive type fields match with OpenGL/gallium
         * definitions, up to but not including QUADS.
         */
        struct vc4_cl_out *bcl = cl_start(&vc4->bcl);
        if (info->indexed) {
                uint32_t offset = vc4->indexbuf.offset;
                uint32_t index_size = vc4->indexbuf.index_size;
                struct pipe_resource *prsc;
                if (vc4->indexbuf.index_size == 4) {
                        prsc = vc4_get_shadow_index_buffer(pctx, &vc4->indexbuf,
                                                           info->count, &offset);
                        index_size = 2;
                } else {
                        if (vc4->indexbuf.user_buffer) {
                                prsc = NULL;
                                u_upload_data(vc4->uploader, 0,
                                              info->count * index_size, 4,
                                              vc4->indexbuf.user_buffer,
                                              &offset, &prsc);
                        } else {
                                prsc = vc4->indexbuf.buffer;
                        }
                }
                struct vc4_resource *rsc = vc4_resource(prsc);

                cl_start_reloc(&vc4->bcl, &bcl, 1);
                cl_u8(&bcl, VC4_PACKET_GL_INDEXED_PRIMITIVE);
                cl_u8(&bcl,
                      info->mode |
                      (index_size == 2 ?
                       VC4_INDEX_BUFFER_U16:
                       VC4_INDEX_BUFFER_U8));
                cl_u32(&bcl, info->count);
                cl_reloc(vc4, &vc4->bcl, &bcl, rsc->bo, offset);
                cl_u32(&bcl, vc4->max_index);

                if (vc4->indexbuf.index_size == 4 || vc4->indexbuf.user_buffer)
                        pipe_resource_reference(&prsc, NULL);
        } else {
                cl_u8(&bcl, VC4_PACKET_GL_ARRAY_PRIMITIVE);
                cl_u8(&bcl, info->mode);
                cl_u32(&bcl, info->count);
                cl_u32(&bcl, info->start);
        }
        cl_end(&vc4->bcl, bcl);

        if (vc4->zsa && vc4->zsa->base.depth.enabled) {
                vc4->resolve |= PIPE_CLEAR_DEPTH;
        }
        if (vc4->zsa && vc4->zsa->base.stencil[0].enabled)
                vc4->resolve |= PIPE_CLEAR_STENCIL;
        vc4->resolve |= PIPE_CLEAR_COLOR0;

        vc4->shader_rec_count++;

        if (vc4_debug & VC4_DEBUG_ALWAYS_FLUSH)
                vc4_flush(pctx);
}
Exemple #19
0
static void
vc4_emit_gl_shader_state(struct vc4_context *vc4, const struct pipe_draw_info *info)
{
        /* VC4_DIRTY_VTXSTATE */
        struct vc4_vertex_stateobj *vtx = vc4->vtx;
        /* VC4_DIRTY_VTXBUF */
        struct vc4_vertexbuf_stateobj *vertexbuf = &vc4->vertexbuf;

        /* The simulator throws a fit if VS or CS don't read an attribute, so
         * we emit a dummy read.
         */
        uint32_t num_elements_emit = MAX2(vtx->num_elements, 1);
        /* Emit the shader record. */
        struct vc4_cl_out *shader_rec =
                cl_start_shader_reloc(&vc4->shader_rec, 3 + num_elements_emit);
        /* VC4_DIRTY_PRIM_MODE | VC4_DIRTY_RASTERIZER */
        cl_u16(&shader_rec,
               VC4_SHADER_FLAG_ENABLE_CLIPPING |
               VC4_SHADER_FLAG_FS_SINGLE_THREAD |
               ((info->mode == PIPE_PRIM_POINTS &&
                 vc4->rasterizer->base.point_size_per_vertex) ?
                VC4_SHADER_FLAG_VS_POINT_SIZE : 0));

        /* VC4_DIRTY_COMPILED_FS */
        cl_u8(&shader_rec, 0); /* fs num uniforms (unused) */
        cl_u8(&shader_rec, vc4->prog.fs->num_inputs);
        cl_reloc(vc4, &vc4->shader_rec, &shader_rec, vc4->prog.fs->bo, 0);
        cl_u32(&shader_rec, 0); /* UBO offset written by kernel */

        /* VC4_DIRTY_COMPILED_VS */
        cl_u16(&shader_rec, 0); /* vs num uniforms */
        cl_u8(&shader_rec, vc4->prog.vs->vattrs_live);
        cl_u8(&shader_rec, vc4->prog.vs->vattr_offsets[8]);
        cl_reloc(vc4, &vc4->shader_rec, &shader_rec, vc4->prog.vs->bo, 0);
        cl_u32(&shader_rec, 0); /* UBO offset written by kernel */

        /* VC4_DIRTY_COMPILED_CS */
        cl_u16(&shader_rec, 0); /* cs num uniforms */
        cl_u8(&shader_rec, vc4->prog.cs->vattrs_live);
        cl_u8(&shader_rec, vc4->prog.cs->vattr_offsets[8]);
        cl_reloc(vc4, &vc4->shader_rec, &shader_rec, vc4->prog.cs->bo, 0);
        cl_u32(&shader_rec, 0); /* UBO offset written by kernel */

        uint32_t max_index = 0xffff;
        for (int i = 0; i < vtx->num_elements; i++) {
                struct pipe_vertex_element *elem = &vtx->pipe[i];
                struct pipe_vertex_buffer *vb =
                        &vertexbuf->vb[elem->vertex_buffer_index];
                struct vc4_resource *rsc = vc4_resource(vb->buffer);
                /* not vc4->dirty tracked: vc4->last_index_bias */
                uint32_t offset = (vb->buffer_offset +
                                   elem->src_offset +
                                   vb->stride * info->index_bias);
                uint32_t vb_size = rsc->bo->size - offset;
                uint32_t elem_size =
                        util_format_get_blocksize(elem->src_format);

                cl_reloc(vc4, &vc4->shader_rec, &shader_rec, rsc->bo, offset);
                cl_u8(&shader_rec, elem_size - 1);
                cl_u8(&shader_rec, vb->stride);
                cl_u8(&shader_rec, vc4->prog.vs->vattr_offsets[i]);
                cl_u8(&shader_rec, vc4->prog.cs->vattr_offsets[i]);

                if (vb->stride > 0) {
                        max_index = MIN2(max_index,
                                         (vb_size - elem_size) / vb->stride);
                }
        }

        if (vtx->num_elements == 0) {
                assert(num_elements_emit == 1);
                struct vc4_bo *bo = vc4_bo_alloc(vc4->screen, 4096, "scratch VBO");
                cl_reloc(vc4, &vc4->shader_rec, &shader_rec, bo, 0);
                cl_u8(&shader_rec, 16 - 1); /* element size */
                cl_u8(&shader_rec, 0); /* stride */
                cl_u8(&shader_rec, 0); /* VS VPM offset */
                cl_u8(&shader_rec, 0); /* CS VPM offset */
                vc4_bo_unreference(&bo);
        }
        cl_end(&vc4->shader_rec, shader_rec);

        struct vc4_cl_out *bcl = cl_start(&vc4->bcl);
        /* the actual draw call. */
        cl_u8(&bcl, VC4_PACKET_GL_SHADER_STATE);
        assert(vtx->num_elements <= 8);
        /* Note that number of attributes == 0 in the packet means 8
         * attributes.  This field also contains the offset into shader_rec.
         */
        cl_u32(&bcl, num_elements_emit & 0x7);
        cl_end(&vc4->bcl, bcl);

        vc4_write_uniforms(vc4, vc4->prog.fs,
                           &vc4->constbuf[PIPE_SHADER_FRAGMENT],
                           &vc4->fragtex);
        vc4_write_uniforms(vc4, vc4->prog.vs,
                           &vc4->constbuf[PIPE_SHADER_VERTEX],
                           &vc4->verttex);
        vc4_write_uniforms(vc4, vc4->prog.cs,
                           &vc4->constbuf[PIPE_SHADER_VERTEX],
                           &vc4->verttex);

        vc4->last_index_bias = info->index_bias;
        vc4->max_index = max_index;
}
Exemple #20
0
static void
vc4_clear(struct pipe_context *pctx, unsigned buffers,
          const union pipe_color_union *color, double depth, unsigned stencil)
{
        struct vc4_context *vc4 = vc4_context(pctx);
        struct vc4_job *job = vc4_get_job_for_fbo(vc4);

        /* We can't flag new buffers for clearing once we've queued draws.  We
         * could avoid this by using the 3d engine to clear.
         */
        if (job->draw_calls_queued) {
                perf_debug("Flushing rendering to process new clear.\n");
                vc4_job_submit(vc4, job);
                job = vc4_get_job_for_fbo(vc4);
        }

        if (buffers & PIPE_CLEAR_COLOR0) {
                struct vc4_resource *rsc =
                        vc4_resource(vc4->framebuffer.cbufs[0]->texture);
                uint32_t clear_color;

                if (vc4_rt_format_is_565(vc4->framebuffer.cbufs[0]->format)) {
                        /* In 565 mode, the hardware will be packing our color
                         * for us.
                         */
                        clear_color = pack_rgba(PIPE_FORMAT_R8G8B8A8_UNORM,
                                                color->f);
                } else {
                        /* Otherwise, we need to do this packing because we
                         * support multiple swizzlings of RGBA8888.
                         */
                        clear_color =
                                pack_rgba(vc4->framebuffer.cbufs[0]->format,
                                          color->f);
                }
                job->clear_color[0] = job->clear_color[1] = clear_color;
                rsc->initialized_buffers |= (buffers & PIPE_CLEAR_COLOR0);
        }

        if (buffers & PIPE_CLEAR_DEPTHSTENCIL) {
                struct vc4_resource *rsc =
                        vc4_resource(vc4->framebuffer.zsbuf->texture);
                unsigned zsclear = buffers & PIPE_CLEAR_DEPTHSTENCIL;

                /* Clearing ZS will clear both Z and stencil, so if we're
                 * trying to clear just one then we need to draw a quad to do
                 * it instead.
                 */
                if ((zsclear == PIPE_CLEAR_DEPTH ||
                     zsclear == PIPE_CLEAR_STENCIL) &&
                    (rsc->initialized_buffers & ~(zsclear | job->cleared)) &&
                    util_format_is_depth_and_stencil(vc4->framebuffer.zsbuf->format)) {
                        perf_debug("Partial clear of Z+stencil buffer, "
                                   "drawing a quad instead of fast clearing\n");
                        vc4_blitter_save(vc4);
                        util_blitter_clear(vc4->blitter,
                                           vc4->framebuffer.width,
                                           vc4->framebuffer.height,
                                           1,
                                           zsclear,
                                           NULL, depth, stencil);
                        buffers &= ~zsclear;
                        if (!buffers)
                                return;
                }

                /* Though the depth buffer is stored with Z in the high 24,
                 * for this field we just need to store it in the low 24.
                 */
                if (buffers & PIPE_CLEAR_DEPTH) {
                        job->clear_depth = util_pack_z(PIPE_FORMAT_Z24X8_UNORM,
                                                       depth);
                }
                if (buffers & PIPE_CLEAR_STENCIL)
                        job->clear_stencil = stencil;

                rsc->initialized_buffers |= zsclear;
        }

        job->draw_min_x = 0;
        job->draw_min_y = 0;
        job->draw_max_x = vc4->framebuffer.width;
        job->draw_max_y = vc4->framebuffer.height;
        job->cleared |= buffers;
        job->resolve |= buffers;

        vc4_start_draw(vc4);
}
Exemple #21
0
int
vc4_simulator_flush(struct vc4_context *vc4, struct drm_vc4_submit_cl *args)
{
        struct vc4_surface *csurf = vc4_surface(vc4->framebuffer.cbufs[0]);
        struct vc4_resource *ctex = vc4_resource(csurf->base.texture);
        uint32_t winsys_stride = ctex->bo->simulator_winsys_stride;
        uint32_t sim_stride = ctex->slices[0].stride;
        uint32_t row_len = MIN2(sim_stride, winsys_stride);
        struct exec_info exec;
        struct drm_device local_dev = {
                .vc4 = vc4,
                .simulator_mem_next = OVERFLOW_SIZE,
        };
        struct drm_device *dev = &local_dev;
        int ret;

        memset(&exec, 0, sizeof(exec));

        if (ctex->bo->simulator_winsys_map) {
#if 0
                fprintf(stderr, "%dx%d %d %d %d\n",
                        ctex->base.b.width0, ctex->base.b.height0,
                        winsys_stride,
                        sim_stride,
                        ctex->bo->size);
#endif

                for (int y = 0; y < ctex->base.b.height0; y++) {
                        memcpy(ctex->bo->map + y * sim_stride,
                               ctex->bo->simulator_winsys_map + y * winsys_stride,
                               row_len);
                }
        }

        exec.args = args;

        ret = vc4_simulator_pin_bos(dev, &exec);
        if (ret)
                return ret;

        ret = vc4_cl_validate(dev, &exec);
        if (ret)
                return ret;

        simpenrose_do_binning(exec.ct0ca, exec.ct0ea);
        simpenrose_do_rendering(exec.ct1ca, exec.ct1ea);

        ret = vc4_simulator_unpin_bos(&exec);
        if (ret)
                return ret;

        free(exec.exec_bo);

        if (ctex->bo->simulator_winsys_map) {
                for (int y = 0; y < ctex->base.b.height0; y++) {
                        memcpy(ctex->bo->simulator_winsys_map + y * winsys_stride,
                               ctex->bo->map + y * sim_stride,
                               row_len);
                }
        }

        return 0;
}
static void
vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
{
        struct vc4_context *vc4 = vc4_context(pctx);

        if (info->mode >= PIPE_PRIM_QUADS) {
                util_primconvert_save_index_buffer(vc4->primconvert, &vc4->indexbuf);
                util_primconvert_save_rasterizer_state(vc4->primconvert, &vc4->rasterizer->base);
                util_primconvert_draw_vbo(vc4->primconvert, info);
                return;
        }

        struct vc4_vertex_stateobj *vtx = vc4->vtx;
        struct vc4_vertexbuf_stateobj *vertexbuf = &vc4->vertexbuf;

        if (vc4->prim_mode != info->mode) {
                vc4->prim_mode = info->mode;
                vc4->dirty |= VC4_DIRTY_PRIM_MODE;
        }

        vc4_start_draw(vc4);
        vc4_update_compiled_shaders(vc4, info->mode);

        vc4_emit_state(pctx);
        vc4->dirty = 0;

        vc4_write_uniforms(vc4, vc4->prog.fs,
                           &vc4->constbuf[PIPE_SHADER_FRAGMENT],
                           &vc4->fragtex);
        vc4_write_uniforms(vc4, vc4->prog.vs,
                           &vc4->constbuf[PIPE_SHADER_VERTEX],
                           &vc4->verttex);
        vc4_write_uniforms(vc4, vc4->prog.cs,
                           &vc4->constbuf[PIPE_SHADER_VERTEX],
                           &vc4->verttex);

        /* The simulator throws a fit if VS or CS don't read an attribute, so
         * we emit a dummy read.
         */
        uint32_t num_elements_emit = MAX2(vtx->num_elements, 1);
        /* Emit the shader record. */
        cl_start_shader_reloc(&vc4->shader_rec, 3 + num_elements_emit);
        cl_u16(&vc4->shader_rec,
               VC4_SHADER_FLAG_ENABLE_CLIPPING |
               ((info->mode == PIPE_PRIM_POINTS &&
                 vc4->rasterizer->base.point_size_per_vertex) ?
                VC4_SHADER_FLAG_VS_POINT_SIZE : 0));
        cl_u8(&vc4->shader_rec, 0); /* fs num uniforms (unused) */
        cl_u8(&vc4->shader_rec, vc4->prog.fs->num_inputs);
        cl_reloc(vc4, &vc4->shader_rec, vc4->prog.fs->bo, 0);
        cl_u32(&vc4->shader_rec, 0); /* UBO offset written by kernel */

        cl_u16(&vc4->shader_rec, 0); /* vs num uniforms */
        cl_u8(&vc4->shader_rec, (1 << num_elements_emit) - 1); /* vs attribute array bitfield */
        cl_u8(&vc4->shader_rec, 16 * num_elements_emit); /* vs total attribute size */
        cl_reloc(vc4, &vc4->shader_rec, vc4->prog.vs->bo, 0);
        cl_u32(&vc4->shader_rec, 0); /* UBO offset written by kernel */

        cl_u16(&vc4->shader_rec, 0); /* cs num uniforms */
        cl_u8(&vc4->shader_rec, (1 << num_elements_emit) - 1); /* cs attribute array bitfield */
        cl_u8(&vc4->shader_rec, 16 * num_elements_emit); /* cs total attribute size */
        cl_reloc(vc4, &vc4->shader_rec, vc4->prog.cs->bo, 0);
        cl_u32(&vc4->shader_rec, 0); /* UBO offset written by kernel */

        uint32_t max_index = 0xffff;
        for (int i = 0; i < vtx->num_elements; i++) {
                struct pipe_vertex_element *elem = &vtx->pipe[i];
                struct pipe_vertex_buffer *vb =
                        &vertexbuf->vb[elem->vertex_buffer_index];
                struct vc4_resource *rsc = vc4_resource(vb->buffer);
                uint32_t offset = vb->buffer_offset + elem->src_offset;
                uint32_t vb_size = rsc->bo->size - offset;
                uint32_t elem_size =
                        util_format_get_blocksize(elem->src_format);

                cl_reloc(vc4, &vc4->shader_rec, rsc->bo, offset);
                cl_u8(&vc4->shader_rec, elem_size - 1);
                cl_u8(&vc4->shader_rec, vb->stride);
                cl_u8(&vc4->shader_rec, i * 16); /* VS VPM offset */
                cl_u8(&vc4->shader_rec, i * 16); /* CS VPM offset */

                if (vb->stride > 0) {
                        max_index = MIN2(max_index,
                                         (vb_size - elem_size) / vb->stride);
                }
        }

        if (vtx->num_elements == 0) {
                assert(num_elements_emit == 1);
                struct vc4_bo *bo = vc4_bo_alloc(vc4->screen, 4096, "scratch VBO");
                cl_reloc(vc4, &vc4->shader_rec, bo, 0);
                cl_u8(&vc4->shader_rec, 16 - 1); /* element size */
                cl_u8(&vc4->shader_rec, 0); /* stride */
                cl_u8(&vc4->shader_rec, 0); /* VS VPM offset */
                cl_u8(&vc4->shader_rec, 0); /* CS VPM offset */
                vc4_bo_unreference(&bo);
        }

        /* the actual draw call. */
        cl_u8(&vc4->bcl, VC4_PACKET_GL_SHADER_STATE);
        assert(vtx->num_elements <= 8);
        /* Note that number of attributes == 0 in the packet means 8
         * attributes.  This field also contains the offset into shader_rec.
         */
        cl_u32(&vc4->bcl, num_elements_emit & 0x7);

        /* Note that the primitive type fields match with OpenGL/gallium
         * definitions, up to but not including QUADS.
         */
        if (info->indexed) {
                struct vc4_resource *rsc = vc4_resource(vc4->indexbuf.buffer);
                uint32_t offset = vc4->indexbuf.offset;
                uint32_t index_size = vc4->indexbuf.index_size;
                if (rsc->shadow_parent) {
                        vc4_update_shadow_index_buffer(pctx, &vc4->indexbuf);
                        offset = 0;
                        index_size = 2;
                }

                cl_start_reloc(&vc4->bcl, 1);
                cl_u8(&vc4->bcl, VC4_PACKET_GL_INDEXED_PRIMITIVE);
                cl_u8(&vc4->bcl,
                      info->mode |
                      (index_size == 2 ?
                       VC4_INDEX_BUFFER_U16:
                       VC4_INDEX_BUFFER_U8));
                cl_u32(&vc4->bcl, info->count);
                cl_reloc(vc4, &vc4->bcl, rsc->bo, offset);
                cl_u32(&vc4->bcl, max_index);
        } else {
                cl_u8(&vc4->bcl, VC4_PACKET_GL_ARRAY_PRIMITIVE);
                cl_u8(&vc4->bcl, info->mode);
                cl_u32(&vc4->bcl, info->count);
                cl_u32(&vc4->bcl, info->start);
        }

        if (vc4->zsa && vc4->zsa->base.depth.enabled) {
                vc4->resolve |= PIPE_CLEAR_DEPTH;
        }
        if (vc4->zsa && vc4->zsa->base.stencil[0].enabled)
                vc4->resolve |= PIPE_CLEAR_STENCIL;
        vc4->resolve |= PIPE_CLEAR_COLOR0;

        vc4->shader_rec_count++;

        if (vc4_debug & VC4_DEBUG_ALWAYS_FLUSH)
                vc4_flush(pctx);
}
void
vc4_update_shadow_baselevel_texture(struct pipe_context *pctx,
                                    struct pipe_sampler_view *view)
{
    struct vc4_resource *shadow = vc4_resource(view->texture);
    struct vc4_resource *orig = vc4_resource(shadow->shadow_parent);
    assert(orig);

    if (shadow->writes == orig->writes)
        return;

    for (int i = 0; i <= shadow->base.b.last_level; i++) {
        struct pipe_box box = {
            .x = 0,
            .y = 0,
            .z = 0,
            .width = u_minify(shadow->base.b.width0, i),
            .height = u_minify(shadow->base.b.height0, i),
            .depth = 1,
        };

        util_resource_copy_region(pctx,
                                  &shadow->base.b, i, 0, 0, 0,
                                  &orig->base.b,
                                  view->u.tex.first_level + i,
                                  &box);
    }

    shadow->writes = orig->writes;
}

/**
 * Converts a 4-byte index buffer to 2 bytes.
 *
 * Since GLES2 only has support for 1 and 2-byte indices, the hardware doesn't
 * include 4-byte index support, and we have to shrink it down.
 *
 * There's no fallback support for when indices end up being larger than 2^16,
 * though it will at least assertion fail.  Also, if the original index data
 * was in user memory, it would be nice to not have uploaded it to a VBO
 * before translating.
 */
void
vc4_update_shadow_index_buffer(struct pipe_context *pctx,
                               const struct pipe_index_buffer *ib)
{
    struct vc4_resource *shadow = vc4_resource(ib->buffer);
    struct vc4_resource *orig = vc4_resource(shadow->shadow_parent);
    uint32_t count = shadow->base.b.width0 / 2;

    if (shadow->writes == orig->writes)
        return;

    struct pipe_transfer *src_transfer;
    uint32_t *src = pipe_buffer_map_range(pctx, &orig->base.b,
                                          ib->offset,
                                          count * 4,
                                          PIPE_TRANSFER_READ, &src_transfer);

    struct pipe_transfer *dst_transfer;
    uint16_t *dst = pipe_buffer_map_range(pctx, &shadow->base.b,
                                          0,
                                          count * 2,
                                          PIPE_TRANSFER_WRITE, &dst_transfer);

    for (int i = 0; i < count; i++) {
        uint32_t src_index = src[i];
        assert(src_index <= 0xffff);
        dst[i] = src_index;
    }

    pctx->transfer_unmap(pctx, dst_transfer);
    pctx->transfer_unmap(pctx, src_transfer);

    shadow->writes = orig->writes;
}

void
vc4_resource_screen_init(struct pipe_screen *pscreen)
{
    pscreen->resource_create = vc4_resource_create;
    pscreen->resource_from_handle = vc4_resource_from_handle;
    pscreen->resource_get_handle = u_resource_get_handle_vtbl;
    pscreen->resource_destroy = u_resource_destroy_vtbl;
}
Exemple #24
0
static void
vc4_set_framebuffer_state(struct pipe_context *pctx,
                          const struct pipe_framebuffer_state *framebuffer)
{
        struct vc4_context *vc4 = vc4_context(pctx);
        struct pipe_framebuffer_state *cso = &vc4->framebuffer;
        unsigned i;

        vc4_flush(pctx);

        for (i = 0; i < framebuffer->nr_cbufs; i++)
                pipe_surface_reference(&cso->cbufs[i], framebuffer->cbufs[i]);
        for (; i < vc4->framebuffer.nr_cbufs; i++)
                pipe_surface_reference(&cso->cbufs[i], NULL);

        cso->nr_cbufs = framebuffer->nr_cbufs;

        pipe_surface_reference(&cso->zsbuf, framebuffer->zsbuf);

        cso->width = framebuffer->width;
        cso->height = framebuffer->height;

        /* If we're binding to uninitialized buffers, no need to load their
         * contents before drawing..
         */
        if (cso->cbufs[0]) {
                struct vc4_resource *rsc =
                        vc4_resource(cso->cbufs[0]->texture);
                if (!rsc->writes)
                        vc4->cleared |= PIPE_CLEAR_COLOR0;
        }

        if (cso->zsbuf) {
                struct vc4_resource *rsc =
                        vc4_resource(cso->zsbuf->texture);
                if (!rsc->writes)
                        vc4->cleared |= PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL;
        }

        /* Nonzero texture mipmap levels are laid out as if they were in
         * power-of-two-sized spaces.  The renderbuffer config infers its
         * stride from the width parameter, so we need to configure our
         * framebuffer.  Note that if the z/color buffers were mismatched
         * sizes, we wouldn't be able to do this.
         */
        if (cso->cbufs[0] && cso->cbufs[0]->u.tex.level) {
                struct vc4_resource *rsc =
                        vc4_resource(cso->cbufs[0]->texture);
                cso->width =
                        (rsc->slices[cso->cbufs[0]->u.tex.level].stride /
                         rsc->cpp);
        } else if (cso->zsbuf && cso->zsbuf->u.tex.level){
                struct vc4_resource *rsc =
                        vc4_resource(cso->zsbuf->texture);
                cso->width =
                        (rsc->slices[cso->zsbuf->u.tex.level].stride /
                         rsc->cpp);
        }

        vc4->msaa = false;
        if (cso->cbufs[0])
                vc4->msaa = cso->cbufs[0]->texture->nr_samples > 1;
        else if (cso->zsbuf)
                vc4->msaa = cso->zsbuf->texture->nr_samples > 1;

        if (vc4->msaa) {
                vc4->tile_width = 32;
                vc4->tile_height = 32;
        } else {
                vc4->tile_width = 64;
                vc4->tile_height = 64;
        }
        vc4->draw_tiles_x = DIV_ROUND_UP(cso->width, vc4->tile_width);
        vc4->draw_tiles_y = DIV_ROUND_UP(cso->height, vc4->tile_height);

        vc4->dirty |= VC4_DIRTY_FRAMEBUFFER;
}
int
vc4_simulator_flush(struct vc4_context *vc4, struct drm_vc4_submit_cl *args)
{
        struct vc4_screen *screen = vc4->screen;
        struct vc4_surface *csurf = vc4_surface(vc4->framebuffer.cbufs[0]);
        struct vc4_resource *ctex = csurf ? vc4_resource(csurf->base.texture) : NULL;
        uint32_t winsys_stride = ctex ? ctex->bo->simulator_winsys_stride : 0;
        uint32_t sim_stride = ctex ? ctex->slices[0].stride : 0;
        uint32_t row_len = MIN2(sim_stride, winsys_stride);
        struct exec_info exec;
        struct drm_device local_dev = {
                .vc4 = vc4,
                .simulator_mem_next = OVERFLOW_SIZE,
        };
        struct drm_device *dev = &local_dev;
        int ret;

        memset(&exec, 0, sizeof(exec));

        if (ctex && ctex->bo->simulator_winsys_map) {
#if 0
                fprintf(stderr, "%dx%d %d %d %d\n",
                        ctex->base.b.width0, ctex->base.b.height0,
                        winsys_stride,
                        sim_stride,
                        ctex->bo->size);
#endif

                for (int y = 0; y < ctex->base.b.height0; y++) {
                        memcpy(ctex->bo->map + y * sim_stride,
                               ctex->bo->simulator_winsys_map + y * winsys_stride,
                               row_len);
                }
        }

        exec.args = args;

        ret = vc4_simulator_pin_bos(dev, &exec);
        if (ret)
                return ret;

        ret = vc4_cl_validate(dev, &exec);
        if (ret)
                return ret;

        int bfc = simpenrose_do_binning(exec.ct0ca, exec.ct0ea);
        if (bfc != 1) {
                fprintf(stderr, "Binning returned %d flushes, should be 1.\n",
                        bfc);
                fprintf(stderr, "Relocated binning command list:\n");
                vc4_dump_cl(screen->simulator_mem_base + exec.ct0ca,
                            exec.ct0ea - exec.ct0ca, false);
                abort();
        }
        int rfc = simpenrose_do_rendering(exec.ct1ca, exec.ct1ea);
        if (rfc != 1) {
                fprintf(stderr, "Rendering returned %d frames, should be 1.\n",
                        rfc);
                fprintf(stderr, "Relocated render command list:\n");
                vc4_dump_cl(screen->simulator_mem_base + exec.ct1ca,
                            exec.ct1ea - exec.ct1ca, true);
                abort();
        }

        ret = vc4_simulator_unpin_bos(&exec);
        if (ret)
                return ret;

        vc4_bo_unreference(&exec.exec_bo->bo);
        free(exec.exec_bo);

        if (ctex && ctex->bo->simulator_winsys_map) {
                for (int y = 0; y < ctex->base.b.height0; y++) {
                        memcpy(ctex->bo->simulator_winsys_map + y * winsys_stride,
                               ctex->bo->map + y * sim_stride,
                               row_len);
                }
        }

        return 0;
}
Exemple #26
0
static void *vc4_get_yuv_vs(struct pipe_context *pctx)
{
   struct vc4_context *vc4 = vc4_context(pctx);
   struct pipe_screen *pscreen = pctx->screen;

   if (vc4->yuv_linear_blit_vs)
           return vc4->yuv_linear_blit_vs;

   const struct nir_shader_compiler_options *options =
           pscreen->get_compiler_options(pscreen,
                                         PIPE_SHADER_IR_NIR,
                                         PIPE_SHADER_VERTEX);

   nir_builder b;
   nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, options);
   b.shader->info.name = ralloc_strdup(b.shader, "linear_blit_vs");

   const struct glsl_type *vec4 = glsl_vec4_type();
   nir_variable *pos_in = nir_variable_create(b.shader, nir_var_shader_in,
                                              vec4, "pos");

   nir_variable *pos_out = nir_variable_create(b.shader, nir_var_shader_out,
                                               vec4, "gl_Position");
   pos_out->data.location = VARYING_SLOT_POS;

   nir_store_var(&b, pos_out, nir_load_var(&b, pos_in), 0xf);

   struct pipe_shader_state shader_tmpl = {
           .type = PIPE_SHADER_IR_NIR,
           .ir.nir = b.shader,
   };

   vc4->yuv_linear_blit_vs = pctx->create_vs_state(pctx, &shader_tmpl);

   return vc4->yuv_linear_blit_vs;
}

static void *vc4_get_yuv_fs(struct pipe_context *pctx, int cpp)
{
   struct vc4_context *vc4 = vc4_context(pctx);
   struct pipe_screen *pscreen = pctx->screen;
   struct pipe_shader_state **cached_shader;
   const char *name;

   if (cpp == 1) {
           cached_shader = &vc4->yuv_linear_blit_fs_8bit;
           name = "linear_blit_8bit_fs";
   } else {
           cached_shader = &vc4->yuv_linear_blit_fs_16bit;
           name = "linear_blit_16bit_fs";
   }

   if (*cached_shader)
           return *cached_shader;

   const struct nir_shader_compiler_options *options =
           pscreen->get_compiler_options(pscreen,
                                         PIPE_SHADER_IR_NIR,
                                         PIPE_SHADER_FRAGMENT);

   nir_builder b;
   nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, options);
   b.shader->info.name = ralloc_strdup(b.shader, name);

   const struct glsl_type *vec4 = glsl_vec4_type();
   const struct glsl_type *glsl_int = glsl_int_type();

   nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out,
                                                 vec4, "f_color");
   color_out->data.location = FRAG_RESULT_COLOR;

   nir_variable *pos_in = nir_variable_create(b.shader, nir_var_shader_in,
                                              vec4, "pos");
   pos_in->data.location = VARYING_SLOT_POS;
   nir_ssa_def *pos = nir_load_var(&b, pos_in);

   nir_ssa_def *one = nir_imm_int(&b, 1);
   nir_ssa_def *two = nir_imm_int(&b, 2);

   nir_ssa_def *x = nir_f2i32(&b, nir_channel(&b, pos, 0));
   nir_ssa_def *y = nir_f2i32(&b, nir_channel(&b, pos, 1));

   nir_variable *stride_in = nir_variable_create(b.shader, nir_var_uniform,
                                                 glsl_int, "stride");
   nir_ssa_def *stride = nir_load_var(&b, stride_in);

   nir_ssa_def *x_offset;
   nir_ssa_def *y_offset;
   if (cpp == 1) {
           nir_ssa_def *intra_utile_x_offset =
                   nir_ishl(&b, nir_iand(&b, x, one), two);
           nir_ssa_def *inter_utile_x_offset =
                   nir_ishl(&b, nir_iand(&b, x, nir_imm_int(&b, ~3)), one);

           x_offset = nir_iadd(&b,
                               intra_utile_x_offset,
                               inter_utile_x_offset);
           y_offset = nir_imul(&b,
                               nir_iadd(&b,
                                        nir_ishl(&b, y, one),
                                        nir_ushr(&b, nir_iand(&b, x, two), one)),
                               stride);
   } else {
           x_offset = nir_ishl(&b, x, two);
           y_offset = nir_imul(&b, y, stride);
   }

   nir_intrinsic_instr *load =
           nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_ubo);
   load->num_components = 1;
   nir_ssa_dest_init(&load->instr, &load->dest, load->num_components, 32, NULL);
   load->src[0] = nir_src_for_ssa(one);
   load->src[1] = nir_src_for_ssa(nir_iadd(&b, x_offset, y_offset));
   nir_builder_instr_insert(&b, &load->instr);

   nir_store_var(&b, color_out,
                 nir_unpack_unorm_4x8(&b, &load->dest.ssa),
                 0xf);

   struct pipe_shader_state shader_tmpl = {
           .type = PIPE_SHADER_IR_NIR,
           .ir.nir = b.shader,
   };

   *cached_shader = pctx->create_fs_state(pctx, &shader_tmpl);

   return *cached_shader;
}

static bool
vc4_yuv_blit(struct pipe_context *pctx, const struct pipe_blit_info *info)
{
        struct vc4_context *vc4 = vc4_context(pctx);
        struct vc4_resource *src = vc4_resource(info->src.resource);
        struct vc4_resource *dst = vc4_resource(info->dst.resource);
        bool ok;

        if (src->tiled)
                return false;
        if (src->base.format != PIPE_FORMAT_R8_UNORM &&
            src->base.format != PIPE_FORMAT_R8G8_UNORM)
                return false;

        /* YUV blits always turn raster-order to tiled */
        assert(dst->base.format == src->base.format);
        assert(dst->tiled);

        /* Always 1:1 and at the origin */
        assert(info->src.box.x == 0 && info->dst.box.x == 0);
        assert(info->src.box.y == 0 && info->dst.box.y == 0);
        assert(info->src.box.width == info->dst.box.width);
        assert(info->src.box.height == info->dst.box.height);

        if ((src->slices[info->src.level].offset & 3) ||
            (src->slices[info->src.level].stride & 3)) {
                perf_debug("YUV-blit src texture offset/stride misaligned: 0x%08x/%d\n",
                           src->slices[info->src.level].offset,
                           src->slices[info->src.level].stride);
                goto fallback;
        }

        vc4_blitter_save(vc4);

        /* Create a renderable surface mapping the T-tiled shadow buffer.
         */
        struct pipe_surface dst_tmpl;
        util_blitter_default_dst_texture(&dst_tmpl, info->dst.resource,
                                         info->dst.level, info->dst.box.z);
        dst_tmpl.format = PIPE_FORMAT_RGBA8888_UNORM;
        struct pipe_surface *dst_surf =
                pctx->create_surface(pctx, info->dst.resource, &dst_tmpl);
        if (!dst_surf) {
                fprintf(stderr, "Failed to create YUV dst surface\n");
                util_blitter_unset_running_flag(vc4->blitter);
                return false;
        }
        dst_surf->width /= 2;
        if (dst->cpp == 1)
                dst_surf->height /= 2;

        /* Set the constant buffer. */
        uint32_t stride = src->slices[info->src.level].stride;
        struct pipe_constant_buffer cb_uniforms = {
                .user_buffer = &stride,
                .buffer_size = sizeof(stride),
        };
        pctx->set_constant_buffer(pctx, PIPE_SHADER_FRAGMENT, 0, &cb_uniforms);
        struct pipe_constant_buffer cb_src = {
                .buffer = info->src.resource,
                .buffer_offset = src->slices[info->src.level].offset,
                .buffer_size = (src->bo->size -
                                src->slices[info->src.level].offset),
        };
        pctx->set_constant_buffer(pctx, PIPE_SHADER_FRAGMENT, 1, &cb_src);

        /* Unbind the textures, to make sure we don't try to recurse into the
         * shadow blit.
         */
        pctx->set_sampler_views(pctx, PIPE_SHADER_FRAGMENT, 0, 0, NULL);
        pctx->bind_sampler_states(pctx, PIPE_SHADER_FRAGMENT, 0, 0, NULL);

        util_blitter_custom_shader(vc4->blitter, dst_surf,
                                   vc4_get_yuv_vs(pctx),
                                   vc4_get_yuv_fs(pctx, src->cpp));

        util_blitter_restore_textures(vc4->blitter);
        util_blitter_restore_constant_buffer_state(vc4->blitter);
        /* Restore cb1 (util_blitter doesn't handle this one). */
        struct pipe_constant_buffer cb_disabled = { 0 };
        pctx->set_constant_buffer(pctx, PIPE_SHADER_FRAGMENT, 1, &cb_disabled);

        pipe_surface_reference(&dst_surf, NULL);

        return true;

fallback:
        /* Do an immediate SW fallback, since the render blit path
         * would just recurse.
         */
        ok = util_try_blit_via_copy_region(pctx, info);
        assert(ok); (void)ok;

        return true;
}

static bool
vc4_render_blit(struct pipe_context *ctx, struct pipe_blit_info *info)
{
        struct vc4_context *vc4 = vc4_context(ctx);

        if (!util_blitter_is_blit_supported(vc4->blitter, info)) {
                fprintf(stderr, "blit unsupported %s -> %s\n",
                    util_format_short_name(info->src.resource->format),
                    util_format_short_name(info->dst.resource->format));
                return false;
        }

        /* Enable the scissor, so we get a minimal set of tiles rendered. */
        if (!info->scissor_enable) {
                info->scissor_enable = true;
                info->scissor.minx = info->dst.box.x;
                info->scissor.miny = info->dst.box.y;
                info->scissor.maxx = info->dst.box.x + info->dst.box.width;
                info->scissor.maxy = info->dst.box.y + info->dst.box.height;
        }

        vc4_blitter_save(vc4);
        util_blitter_blit(vc4->blitter, info);

        return true;
}

/* Optimal hardware path for blitting pixels.
 * Scaling, format conversion, up- and downsampling (resolve) are allowed.
 */
void
vc4_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info)
{
        struct pipe_blit_info info = *blit_info;

        if (vc4_yuv_blit(pctx, blit_info))
                return;

        if (vc4_tile_blit(pctx, blit_info))
                return;

        if (info.mask & PIPE_MASK_S) {
                if (util_try_blit_via_copy_region(pctx, &info))
                        return;

                info.mask &= ~PIPE_MASK_S;
                fprintf(stderr, "cannot blit stencil, skipping\n");
        }

        if (vc4_render_blit(pctx, &info))
                return;

        fprintf(stderr, "Unsupported blit\n");
}
Exemple #27
0
static bool
vc4_tile_blit(struct pipe_context *pctx, const struct pipe_blit_info *info)
{
        struct vc4_context *vc4 = vc4_context(pctx);
        bool msaa = (info->src.resource->nr_samples > 1 ||
                     info->dst.resource->nr_samples > 1);
        int tile_width = msaa ? 32 : 64;
        int tile_height = msaa ? 32 : 64;

        if (util_format_is_depth_or_stencil(info->dst.resource->format))
                return false;

        if (info->scissor_enable)
                return false;

        if ((info->mask & PIPE_MASK_RGBA) == 0)
                return false;

        if (info->dst.box.x != info->src.box.x ||
            info->dst.box.y != info->src.box.y ||
            info->dst.box.width != info->src.box.width ||
            info->dst.box.height != info->src.box.height) {
                return false;
        }

        int dst_surface_width = u_minify(info->dst.resource->width0,
                                         info->dst.level);
        int dst_surface_height = u_minify(info->dst.resource->height0,
                                         info->dst.level);
        if (is_tile_unaligned(info->dst.box.x, tile_width) ||
            is_tile_unaligned(info->dst.box.y, tile_height) ||
            (is_tile_unaligned(info->dst.box.width, tile_width) &&
             info->dst.box.x + info->dst.box.width != dst_surface_width) ||
            (is_tile_unaligned(info->dst.box.height, tile_height) &&
             info->dst.box.y + info->dst.box.height != dst_surface_height)) {
                return false;
        }

        /* VC4_PACKET_LOAD_TILE_BUFFER_GENERAL uses the
         * VC4_PACKET_TILE_RENDERING_MODE_CONFIG's width (determined by our
         * destination surface) to determine the stride.  This may be wrong
         * when reading from texture miplevels > 0, which are stored in
         * POT-sized areas.  For MSAA, the tile addresses are computed
         * explicitly by the RCL, but still use the destination width to
         * determine the stride (which could be fixed by explicitly supplying
         * it in the ABI).
         */
        struct vc4_resource *rsc = vc4_resource(info->src.resource);

        uint32_t stride;

        if (info->src.resource->nr_samples > 1)
                stride = align(dst_surface_width, 32) * 4 * rsc->cpp;
        else if (rsc->slices[info->src.level].tiling == VC4_TILING_FORMAT_T)
                stride = align(dst_surface_width * rsc->cpp, 128);
        else
                stride = align(dst_surface_width * rsc->cpp, 16);

        if (stride != rsc->slices[info->src.level].stride)
                return false;

        if (info->dst.resource->format != info->src.resource->format)
                return false;

        if (false) {
                fprintf(stderr, "RCL blit from %d,%d to %d,%d (%d,%d)\n",
                        info->src.box.x,
                        info->src.box.y,
                        info->dst.box.x,
                        info->dst.box.y,
                        info->dst.box.width,
                        info->dst.box.height);
        }

        struct pipe_surface *dst_surf =
                vc4_get_blit_surface(pctx, info->dst.resource, info->dst.level);
        struct pipe_surface *src_surf =
                vc4_get_blit_surface(pctx, info->src.resource, info->src.level);

        vc4_flush_jobs_reading_resource(vc4, info->src.resource);

        struct vc4_job *job = vc4_get_job(vc4, dst_surf, NULL);
        pipe_surface_reference(&job->color_read, src_surf);

        /* If we're resolving from MSAA to single sample, we still need to run
         * the engine in MSAA mode for the load.
         */
        if (!job->msaa && info->src.resource->nr_samples > 1) {
                job->msaa = true;
                job->tile_width = 32;
                job->tile_height = 32;
        }

        job->draw_min_x = info->dst.box.x;
        job->draw_min_y = info->dst.box.y;
        job->draw_max_x = info->dst.box.x + info->dst.box.width;
        job->draw_max_y = info->dst.box.y + info->dst.box.height;
        job->draw_width = dst_surf->width;
        job->draw_height = dst_surf->height;

        job->tile_width = tile_width;
        job->tile_height = tile_height;
        job->msaa = msaa;
        job->needs_flush = true;
        job->resolve |= PIPE_CLEAR_COLOR;

        vc4_job_submit(vc4, job);

        pipe_surface_reference(&dst_surf, NULL);
        pipe_surface_reference(&src_surf, NULL);

        return true;
}
Exemple #28
0
static void
vc4_setup_rcl(struct vc4_context *vc4)
{
        struct vc4_surface *csurf = vc4_surface(vc4->framebuffer.cbufs[0]);
        struct vc4_resource *ctex = vc4_resource(csurf->base.texture);
        uint32_t resolve_uncleared = vc4->resolve & ~vc4->cleared;
        uint32_t width = vc4->framebuffer.width;
        uint32_t height = vc4->framebuffer.height;
        uint32_t xtiles = align(width, 64) / 64;
        uint32_t ytiles = align(height, 64) / 64;

#if 0
        fprintf(stderr, "RCL: resolve 0x%x clear 0x%x resolve uncleared 0x%x\n",
                vc4->resolve,
                vc4->cleared,
                resolve_uncleared);
#endif

        cl_u8(&vc4->rcl, VC4_PACKET_CLEAR_COLORS);
        cl_u32(&vc4->rcl, vc4->clear_color[0]);
        cl_u32(&vc4->rcl, vc4->clear_color[1]);
        cl_u32(&vc4->rcl, vc4->clear_depth);
        cl_u8(&vc4->rcl, 0);

        cl_start_reloc(&vc4->rcl, 1);
        cl_u8(&vc4->rcl, VC4_PACKET_TILE_RENDERING_MODE_CONFIG);
        cl_reloc(vc4, &vc4->rcl, ctex->bo, csurf->offset);
        cl_u16(&vc4->rcl, width);
        cl_u16(&vc4->rcl, height);
        cl_u16(&vc4->rcl, ((csurf->tiling <<
                            VC4_RENDER_CONFIG_MEMORY_FORMAT_SHIFT) |
                           (vc4_rt_format_is_565(csurf->base.format) ?
                            VC4_RENDER_CONFIG_FORMAT_BGR565 :
                            VC4_RENDER_CONFIG_FORMAT_RGBA8888) |
                           VC4_RENDER_CONFIG_EARLY_Z_COVERAGE_DISABLE));

        /* The tile buffer normally gets cleared when the previous tile is
         * stored.  If the clear values changed between frames, then the tile
         * buffer has stale clear values in it, so we have to do a store in
         * None mode (no writes) so that we trigger the tile buffer clear.
         */
        if (vc4->cleared & PIPE_CLEAR_COLOR0) {
                cl_u8(&vc4->rcl, VC4_PACKET_TILE_COORDINATES);
                cl_u8(&vc4->rcl, 0);
                cl_u8(&vc4->rcl, 0);

                cl_u8(&vc4->rcl, VC4_PACKET_STORE_TILE_BUFFER_GENERAL);
                cl_u16(&vc4->rcl, VC4_LOADSTORE_TILE_BUFFER_NONE);
                cl_u32(&vc4->rcl, 0); /* no address, since we're in None mode */
        }

        for (int y = 0; y < ytiles; y++) {
                for (int x = 0; x < xtiles; x++) {
                        bool end_of_frame = (x == xtiles - 1 &&
                                             y == ytiles - 1);

                        /* Note that the load doesn't actually occur until the
                         * tile coords packet is processed.
                         */
                        if (resolve_uncleared & PIPE_CLEAR_COLOR) {
                                cl_start_reloc(&vc4->rcl, 1);
                                cl_u8(&vc4->rcl, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL);
                                cl_u8(&vc4->rcl,
                                      VC4_LOADSTORE_TILE_BUFFER_COLOR |
                                      (csurf->tiling <<
                                       VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT));
                                cl_u8(&vc4->rcl,
                                      vc4_rt_format_is_565(csurf->base.format) ?
                                      VC4_LOADSTORE_TILE_BUFFER_BGR565 :
                                      VC4_LOADSTORE_TILE_BUFFER_RGBA8888);
                                cl_reloc(vc4, &vc4->rcl, ctex->bo,
                                         csurf->offset);
                        }

                        cl_u8(&vc4->rcl, VC4_PACKET_TILE_COORDINATES);
                        cl_u8(&vc4->rcl, x);
                        cl_u8(&vc4->rcl, y);

                        cl_start_reloc(&vc4->rcl, 1);
                        cl_u8(&vc4->rcl, VC4_PACKET_BRANCH_TO_SUB_LIST);
                        cl_reloc(vc4, &vc4->rcl, vc4->tile_alloc,
                                 (y * xtiles + x) * 32);

                        if (vc4->resolve & PIPE_CLEAR_COLOR0) {
                                if (end_of_frame) {
                                        cl_u8(&vc4->rcl,
                                              VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF);
                                } else {
                                        cl_u8(&vc4->rcl,
                                              VC4_PACKET_STORE_MS_TILE_BUFFER);
                                }
                        } else {
                                assert(!"unfinished: Need to end the frame\n");
                        }
                }
        }
}