Exemple #1
0
bool etna_screen_resource_alloc_ts(struct pipe_screen *screen, struct etna_resource *resource)
{
    struct etna_screen *priv = etna_screen(screen);
    size_t rt_ts_size;
    assert(!resource->ts_bo);
    /* TS only for level 0 -- XXX is this formula correct? */
    rt_ts_size = align(resource->levels[0].size*priv->specs.bits_per_tile/0x80, 0x100);
    if(rt_ts_size == 0)
        return true;

    DBG_F(ETNA_DBG_RESOURCE_MSGS, "%p: Allocating tile status of size %i", resource, rt_ts_size);
    struct etna_bo *rt_ts = 0;
    if(unlikely((rt_ts = etna_bo_new(priv->dev, rt_ts_size, DRM_ETNA_GEM_TYPE_TS)) == NULL))
    {
        BUG("Problem allocating tile status for resource");
        return false;
    }
    resource->ts_bo = rt_ts;
    resource->levels[0].ts_offset = 0;
    resource->levels[0].ts_size = etna_bo_size(resource->ts_bo);
    /* It is important to initialize the TS, as random pattern
     * can result in crashes. Do this on the CPU as this only happens once
     * per surface anyway and it's a small area, so it may not be worth
     * queuing this to the GPU.
     */
    void *ts_map = etna_bo_map(rt_ts);
    memset(ts_map, priv->specs.ts_clear_value, rt_ts_size);
    return true;
}
Exemple #2
0
static void etna_pipe_transfer_unmap(struct pipe_context *pipe,
                      struct pipe_transfer *transfer_)
{
    struct etna_pipe_context *priv = etna_pipe_context(pipe);
    struct etna_transfer *ptrans = etna_transfer(transfer_);

    /* XXX
     * When writing to a resource that is already in use, replace the resource with a completely new buffer
     * and free the old one using a fenced free.
     * The most tricky case to implement will be: tiled or supertiled surface, partial write, target not aligned to 4/64
     */
    struct etna_resource *resource = etna_resource(ptrans->base.resource);
    assert(ptrans->base.level <= resource->base.last_level);

    if(ptrans->base.usage & PIPE_TRANSFER_WRITE)
    {
        /* write back */
        if(unlikely(!ptrans->in_place))
        {
            /* map buffer object */
            struct etna_resource_level *res_level = &resource->levels[ptrans->base.level];
            void *mapped = etna_bo_map(resource->bo) + res_level->offset;
            if(resource->layout == ETNA_LAYOUT_LINEAR || resource->layout == ETNA_LAYOUT_TILED)
            {
                if(resource->layout == ETNA_LAYOUT_TILED && !util_format_is_compressed(resource->base.format))
                {
                    etna_texture_tile(mapped + ptrans->base.box.z * res_level->layer_stride, ptrans->buffer,
                            ptrans->base.box.x, ptrans->base.box.y, res_level->stride,
                            ptrans->base.box.width, ptrans->base.box.height, ptrans->base.stride,
                            util_format_get_blocksize(resource->base.format));
                } else { /* non-tiled or compressed format */
                    util_copy_box(mapped,
                      resource->base.format,
                      res_level->stride, res_level->layer_stride,
                      ptrans->base.box.x, ptrans->base.box.y, ptrans->base.box.z,
                      ptrans->base.box.width, ptrans->base.box.height, ptrans->base.box.depth,
                      ptrans->buffer,
                      ptrans->base.stride, ptrans->base.layer_stride,
                      0, 0, 0 /* src x,y,z */);
                }
            } else
            {
                BUG("unsupported tiling %i", resource->layout);
            }
            FREE(ptrans->buffer);
        }
        if(resource->base.bind & PIPE_BIND_SAMPLER_VIEW)
        {
            /* XXX do we need to flush the CPU cache too or start a write barrier
             * to make sure the GPU sees it? */
            priv->dirty_bits |= ETNA_STATE_TEXTURE_CACHES;
        }
    }

    util_slab_free(&priv->transfer_pool, ptrans);
}
/* A tile is 4x4 pixels, having 'screen->specs.bits_per_tile' of tile status.
 * So, in a buffer of N pixels, there are N / (4 * 4) tiles.
 * We need N * screen->specs.bits_per_tile / (4 * 4) bits of tile status, or
 * N * screen->specs.bits_per_tile / (4 * 4 * 8) bytes.
 */
bool
etna_screen_resource_alloc_ts(struct pipe_screen *pscreen,
                              struct etna_resource *rsc)
{
   struct etna_screen *screen = etna_screen(pscreen);
   size_t rt_ts_size, ts_layer_stride, pixels;

   assert(!rsc->ts_bo);

   /* TS only for level 0 -- XXX is this formula correct? */
   pixels = rsc->levels[0].layer_stride / util_format_get_blocksize(rsc->base.format);
   ts_layer_stride = align(pixels * screen->specs.bits_per_tile / 0x80,
                           0x100 * screen->specs.pixel_pipes);
   rt_ts_size = ts_layer_stride * rsc->base.array_size;
   if (rt_ts_size == 0)
      return true;

   DBG_F(ETNA_DBG_RESOURCE_MSGS, "%p: Allocating tile status of size %zu",
         rsc, rt_ts_size);

   struct etna_bo *rt_ts;
   rt_ts = etna_bo_new(screen->dev, rt_ts_size, DRM_ETNA_GEM_CACHE_WC);

   if (unlikely(!rt_ts)) {
      BUG("Problem allocating tile status for resource");
      return false;
   }

   rsc->ts_bo = rt_ts;
   rsc->levels[0].ts_offset = 0;
   rsc->levels[0].ts_layer_stride = ts_layer_stride;
   rsc->levels[0].ts_size = rt_ts_size;

   /* It is important to initialize the TS, as random pattern
    * can result in crashes. Do this on the CPU as this only happens once
    * per surface anyway and it's a small area, so it may not be worth
    * queuing this to the GPU. */
   void *ts_map = etna_bo_map(rt_ts);
   memset(ts_map, screen->specs.ts_clear_value, rt_ts_size);

   return true;
}
Exemple #4
0
/* Clear GPU context, to rebuild it for next flush */
static int gpu_context_clear(struct etna_ctx *ctx)
{
    /* If context was used, queue free it and allocate new buffer to prevent
     * overwriting it while being used by the GPU.  Otherwise we can just
     * re-use it.
     */
    int rv;
#ifdef DEBUG
    fprintf(stderr, "gpu_context_clear (context %i)\n", (int)GCCTX(ctx)->id);
#endif
    if(GCCTX(ctx)->inUse != NULL &&
       *GCCTX(ctx)->inUse)
    {
#ifdef DEBUG
        fprintf(stderr, "gpu_context_clear: context was in use, deferred freeing and reallocating it\n");
#endif
        if((rv = etna_bo_del(ctx->conn, ctx->ctx_bo, ctx->queue)) != ETNA_OK)
        {
            return rv;
        }
        if((ctx->ctx_bo = etna_bo_new(ctx->conn, COMMAND_BUFFER_SIZE, DRM_ETNA_GEM_TYPE_CMD)) == NULL)
        {
            return ETNA_OUT_OF_MEMORY;
        }
    }
    /* Leave space at beginning of buffer for PIPE switch */
    GCCTX(ctx)->bufferSize = BEGIN_COMMIT_CLEARANCE;
    GCCTX(ctx)->logical = etna_bo_map(ctx->ctx_bo);
#ifdef GCABI_CONTEXT_HAS_PHYSICAL
    GCCTX(ctx)->bytes = etna_bo_size(ctx->ctx_bo); /* actual size of buffer */
    GCCTX(ctx)->physical = HANDLE_TO_VIV(etna_bo_gpu_address(ctx->ctx_bo));
#endif
    /* When context is empty, initial pipe should default to entry pipe so that
     * no pipe switch is needed within the context and the kernel does the
     * right thing.
     */
    GCCTX(ctx)->initialPipe = GCCTX(ctx)->entryPipe;
    return ETNA_OK;
}
Exemple #5
0
int etna_create(struct viv_conn *conn, struct etna_ctx **ctx_out)
{
    int rv;
    if(ctx_out == NULL) return ETNA_INVALID_ADDR;
    struct etna_ctx *ctx = ETNA_CALLOC_STRUCT(etna_ctx);
    if(ctx == NULL) return ETNA_OUT_OF_MEMORY;
    ctx->conn = conn;

    if(gpu_context_initialize(ctx) != ETNA_OK)
    {
        ETNA_FREE(ctx);
        return ETNA_INTERNAL_ERROR;
    }

    /* Create synchronization signal */
    if(viv_user_signal_create(conn, 0, &ctx->sig_id) != 0) /* automatic resetting signal */
    {
#ifdef DEBUG
        fprintf(stderr, "Cannot create user signal\n");
#endif
        return ETNA_INTERNAL_ERROR;
    }
#ifdef DEBUG
    fprintf(stderr, "Created user signal %i\n", ctx->sig_id);
#endif

    /* Allocate command buffers, and create a synchronization signal for each.
     * Also signal the synchronization signal for the buffers to tell that the buffers are ready for use.
     */
    for(int x=0; x<NUM_COMMAND_BUFFERS; ++x)
    {
        ctx->cmdbuf[x] = ETNA_CALLOC_STRUCT(_gcoCMDBUF);
        if((ctx->cmdbufi[x].bo = etna_bo_new(conn, COMMAND_BUFFER_SIZE, DRM_ETNA_GEM_TYPE_CMD))==NULL)
        {
#ifdef DEBUG
            fprintf(stderr, "Error allocating host memory for command buffer\n");
#endif
            return ETNA_OUT_OF_MEMORY;
        }
        ctx->cmdbuf[x]->object.type = gcvOBJ_COMMANDBUFFER;
#ifdef GCABI_CMDBUF_HAS_PHYSICAL
        ctx->cmdbuf[x]->physical = PTR_TO_VIV((void*)etna_bo_gpu_address(ctx->cmdbufi[x].bo));
        ctx->cmdbuf[x]->bytes = ctx->cmdbufi[x].bytes;
#endif
        ctx->cmdbuf[x]->logical = PTR_TO_VIV((void*)etna_bo_map(ctx->cmdbufi[x].bo));

        if(viv_user_signal_create(conn, 0, &ctx->cmdbufi[x].sig_id) != 0 ||
           viv_user_signal_signal(conn, ctx->cmdbufi[x].sig_id, 1) != 0)
        {
#ifdef DEBUG
            fprintf(stderr, "Cannot create user signal\n");
#endif
            return ETNA_INTERNAL_ERROR;
        }
#ifdef DEBUG
        fprintf(stderr, "Allocated buffer %i: phys=%08x log=%08x bytes=%08x [signal %i]\n", x,
                (uint32_t)buf0_physical, (uint32_t)buf0_logical, buf0_bytes, ctx->cmdbufi[x].sig);
#endif
    }

    /* Allocate command queue */
    if((rv = etna_queue_create(ctx, &ctx->queue)) != ETNA_OK)
    {
#ifdef DEBUG
        fprintf(stderr, "Error allocating kernel command queue\n");
#endif
        return rv;
    }

    /* Set current buffer to ETNA_NO_BUFFER, to signify that we need to switch to buffer 0 before
     * queueing of commands can be started.
     */
    ctx->cur_buf = ETNA_NO_BUFFER;

    *ctx_out = ctx;
    return ETNA_OK;
}
static void
etna_transfer_unmap(struct pipe_context *pctx, struct pipe_transfer *ptrans)
{
   struct etna_context *ctx = etna_context(pctx);
   struct etna_transfer *trans = etna_transfer(ptrans);
   struct etna_resource *rsc = etna_resource(ptrans->resource);

   /* XXX
    * When writing to a resource that is already in use, replace the resource
    * with a completely new buffer
    * and free the old one using a fenced free.
    * The most tricky case to implement will be: tiled or supertiled surface,
    * partial write, target not aligned to 4/64. */
   assert(ptrans->level <= rsc->base.last_level);

   if (rsc->texture && !etna_resource_newer(rsc, etna_resource(rsc->texture)))
      rsc = etna_resource(rsc->texture); /* switch to using the texture resource */

   /*
    * Temporary resources are always pulled into the CPU domain, must push them
    * back into GPU domain before the RS execs the blit to the base resource.
    */
   if (trans->rsc)
      etna_bo_cpu_fini(etna_resource(trans->rsc)->bo);

   if (ptrans->usage & PIPE_TRANSFER_WRITE) {
      if (trans->rsc) {
         /* We have a temporary resource due to either tile status or
          * tiling format. Write back the updated buffer contents.
          * FIXME: we need to invalidate the tile status. */
         etna_copy_resource_box(pctx, ptrans->resource, trans->rsc, ptrans->level, &ptrans->box);
      } else if (trans->staging) {
         /* map buffer object */
         struct etna_resource_level *res_level = &rsc->levels[ptrans->level];
         void *mapped = etna_bo_map(rsc->bo) + res_level->offset;

         if (rsc->layout == ETNA_LAYOUT_TILED) {
            etna_texture_tile(
               mapped + ptrans->box.z * res_level->layer_stride,
               trans->staging, ptrans->box.x, ptrans->box.y,
               res_level->stride, ptrans->box.width, ptrans->box.height,
               ptrans->stride, util_format_get_blocksize(rsc->base.format));
         } else if (rsc->layout == ETNA_LAYOUT_LINEAR) {
            util_copy_box(mapped, rsc->base.format, res_level->stride,
                          res_level->layer_stride, ptrans->box.x,
                          ptrans->box.y, ptrans->box.z, ptrans->box.width,
                          ptrans->box.height, ptrans->box.depth,
                          trans->staging, ptrans->stride,
                          ptrans->layer_stride, 0, 0, 0 /* src x,y,z */);
         } else {
            BUG("unsupported tiling %i", rsc->layout);
         }

         FREE(trans->staging);
      }

      rsc->seqno++;

      if (rsc->base.bind & PIPE_BIND_SAMPLER_VIEW) {
         ctx->dirty |= ETNA_DIRTY_TEXTURE_CACHES;
      }
   }

   /*
    * Transfers without a temporary are only pulled into the CPU domain if they
    * are not mapped unsynchronized. If they are, must push them back into GPU
    * domain after CPU access is finished.
    */
   if (!trans->rsc && !(ptrans->usage & PIPE_TRANSFER_UNSYNCHRONIZED))
      etna_bo_cpu_fini(rsc->bo);

   pipe_resource_reference(&trans->rsc, NULL);
   pipe_resource_reference(&ptrans->resource, NULL);
   slab_free(&ctx->transfer_pool, trans);
}
static void *
etna_transfer_map(struct pipe_context *pctx, struct pipe_resource *prsc,
                  unsigned level,
                  unsigned usage,
                  const struct pipe_box *box,
                  struct pipe_transfer **out_transfer)
{
   struct etna_context *ctx = etna_context(pctx);
   struct etna_resource *rsc = etna_resource(prsc);
   struct etna_transfer *trans;
   struct pipe_transfer *ptrans;
   enum pipe_format format = prsc->format;

   trans = slab_alloc(&ctx->transfer_pool);
   if (!trans)
      return NULL;

   /* slab_alloc() doesn't zero */
   memset(trans, 0, sizeof(*trans));

   ptrans = &trans->base;
   pipe_resource_reference(&ptrans->resource, prsc);
   ptrans->level = level;
   ptrans->usage = usage;
   ptrans->box = *box;

   assert(level <= prsc->last_level);

   /* Upgrade DISCARD_RANGE to WHOLE_RESOURCE if the whole resource is
    * being mapped. If we add buffer reallocation to avoid CPU/GPU sync this
    * check needs to be extended to coherent mappings and shared resources.
    */
   if ((usage & PIPE_TRANSFER_DISCARD_RANGE) &&
       !(usage & PIPE_TRANSFER_UNSYNCHRONIZED) &&
       prsc->last_level == 0 &&
       prsc->width0 == box->width &&
       prsc->height0 == box->height &&
       prsc->depth0 == box->depth &&
       prsc->array_size == 1) {
      usage |= PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE;
   }

   if (rsc->texture && !etna_resource_newer(rsc, etna_resource(rsc->texture))) {
      /* We have a texture resource which is the same age or newer than the
       * render resource. Use the texture resource, which avoids bouncing
       * pixels between the two resources, and we can de-tile it in s/w. */
      rsc = etna_resource(rsc->texture);
   } else if (rsc->ts_bo ||
              (rsc->layout != ETNA_LAYOUT_LINEAR &&
               util_format_get_blocksize(format) > 1 &&
               /* HALIGN 4 resources are incompatible with the resolve engine,
                * so fall back to using software to detile this resource. */
               rsc->halign != TEXTURE_HALIGN_FOUR)) {
      /* If the surface has tile status, we need to resolve it first.
       * The strategy we implement here is to use the RS to copy the
       * depth buffer, filling in the "holes" where the tile status
       * indicates that it's clear. We also do this for tiled
       * resources, but only if the RS can blit them. */
      if (usage & PIPE_TRANSFER_MAP_DIRECTLY) {
         slab_free(&ctx->transfer_pool, trans);
         BUG("unsupported transfer flags %#x with tile status/tiled layout", usage);
         return NULL;
      }

      if (prsc->depth0 > 1) {
         slab_free(&ctx->transfer_pool, trans);
         BUG("resource has depth >1 with tile status");
         return NULL;
      }

      struct pipe_resource templ = *prsc;
      templ.nr_samples = 0;
      templ.bind = PIPE_BIND_RENDER_TARGET;

      trans->rsc = etna_resource_alloc(pctx->screen, ETNA_LAYOUT_LINEAR,
                                       DRM_FORMAT_MOD_LINEAR, &templ);
      if (!trans->rsc) {
         slab_free(&ctx->transfer_pool, trans);
         return NULL;
      }

      /* Need to align the transfer region to satisfy RS restrictions, as we
       * really want to hit the RS blit path here.
       */
      unsigned w_align, h_align;

      if (rsc->layout & ETNA_LAYOUT_BIT_SUPER) {
         w_align = h_align = 64;
      } else {
         w_align = ETNA_RS_WIDTH_MASK + 1;
         h_align = ETNA_RS_HEIGHT_MASK + 1;
      }
      h_align *= ctx->screen->specs.pixel_pipes;

      ptrans->box.width += ptrans->box.x & (w_align - 1);
      ptrans->box.x = ptrans->box.x & ~(w_align - 1);
      ptrans->box.width = align(ptrans->box.width, (ETNA_RS_WIDTH_MASK + 1));
      ptrans->box.height += ptrans->box.y & (h_align - 1);
      ptrans->box.y = ptrans->box.y & ~(h_align - 1);
      ptrans->box.height = align(ptrans->box.height,
                                 (ETNA_RS_HEIGHT_MASK + 1) *
                                  ctx->screen->specs.pixel_pipes);

      if (!(usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE))
         etna_copy_resource_box(pctx, trans->rsc, prsc, level, &ptrans->box);

      /* Switch to using the temporary resource instead */
      rsc = etna_resource(trans->rsc);
   }

   struct etna_resource_level *res_level = &rsc->levels[level];

   /*
    * Always flush if we have the temporary resource and have a copy to this
    * outstanding. Otherwise infer flush requirement from resource access and
    * current GPU usage (reads must wait for GPU writes, writes must have
    * exclusive access to the buffer).
    */
   if ((trans->rsc && (etna_resource(trans->rsc)->status & ETNA_PENDING_WRITE)) ||
       (!trans->rsc &&
        (((usage & PIPE_TRANSFER_READ) && (rsc->status & ETNA_PENDING_WRITE)) ||
        ((usage & PIPE_TRANSFER_WRITE) && rsc->status))))
      pctx->flush(pctx, NULL, 0);

   /* XXX we don't handle PIPE_TRANSFER_FLUSH_EXPLICIT; this flag can be ignored
    * when mapping in-place,
    * but when not in place we need to fire off the copy operation in
    * transfer_flush_region (currently
    * a no-op) instead of unmap. Need to handle this to support
    * ARB_map_buffer_range extension at least.
    */
   /* XXX we don't take care of current operations on the resource; which can
      be, at some point in the pipeline
      which is not yet executed:

      - bound as surface
      - bound through vertex buffer
      - bound through index buffer
      - bound in sampler view
      - used in clear_render_target / clear_depth_stencil operation
      - used in blit
      - used in resource_copy_region

      How do other drivers record this information over course of the rendering
      pipeline?
      Is it necessary at all? Only in case we want to provide a fast path and
      map the resource directly
      (and for PIPE_TRANSFER_MAP_DIRECTLY) and we don't want to force a sync.
      We also need to know whether the resource is in use to determine if a sync
      is needed (or just do it
      always, but that comes at the expense of performance).

      A conservative approximation without too much overhead would be to mark
      all resources that have
      been bound at some point as busy. A drawback would be that accessing
      resources that have
      been bound but are no longer in use for a while still carry a performance
      penalty. On the other hand,
      the program could be using PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE or
      PIPE_TRANSFER_UNSYNCHRONIZED to
      avoid this in the first place...

      A) We use an in-pipe copy engine, and queue the copy operation after unmap
      so that the copy
         will be performed when all current commands have been executed.
         Using the RS is possible, not sure if always efficient. This can also
      do any kind of tiling for us.
         Only possible when PIPE_TRANSFER_DISCARD_RANGE is set.
      B) We discard the entire resource (or at least, the mipmap level) and
      allocate new memory for it.
         Only possible when mapping the entire resource or
      PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE is set.
    */

   /*
    * Pull resources into the CPU domain. Only skipped for unsynchronized
    * transfers without a temporary resource.
    */
   if (trans->rsc || !(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
      uint32_t prep_flags = 0;

      if (usage & PIPE_TRANSFER_READ)
         prep_flags |= DRM_ETNA_PREP_READ;
      if (usage & PIPE_TRANSFER_WRITE)
         prep_flags |= DRM_ETNA_PREP_WRITE;

      if (etna_bo_cpu_prep(rsc->bo, prep_flags))
         goto fail_prep;
   }

   /* map buffer object */
   void *mapped = etna_bo_map(rsc->bo);
   if (!mapped)
      goto fail;

   *out_transfer = ptrans;

   if (rsc->layout == ETNA_LAYOUT_LINEAR) {
      ptrans->stride = res_level->stride;
      ptrans->layer_stride = res_level->layer_stride;

      return mapped + res_level->offset +
             etna_compute_offset(prsc->format, box, res_level->stride,
                                 res_level->layer_stride);
   } else {
      unsigned divSizeX = util_format_get_blockwidth(format);
      unsigned divSizeY = util_format_get_blockheight(format);

      /* No direct mappings of tiled, since we need to manually
       * tile/untile.
       */
      if (usage & PIPE_TRANSFER_MAP_DIRECTLY)
         goto fail;

      mapped += res_level->offset;
      ptrans->stride = align(box->width, divSizeX) * util_format_get_blocksize(format); /* row stride in bytes */
      ptrans->layer_stride = align(box->height, divSizeY) * ptrans->stride;
      size_t size = ptrans->layer_stride * box->depth;

      trans->staging = MALLOC(size);
      if (!trans->staging)
         goto fail;

      if (usage & PIPE_TRANSFER_READ) {
         if (rsc->layout == ETNA_LAYOUT_TILED) {
            etna_texture_untile(trans->staging,
                                mapped + ptrans->box.z * res_level->layer_stride,
                                ptrans->box.x, ptrans->box.y, res_level->stride,
                                ptrans->box.width, ptrans->box.height, ptrans->stride,
                                util_format_get_blocksize(rsc->base.format));
         } else if (rsc->layout == ETNA_LAYOUT_LINEAR) {
            util_copy_box(trans->staging, rsc->base.format, ptrans->stride,
                          ptrans->layer_stride, 0, 0, 0, /* dst x,y,z */
                          ptrans->box.width, ptrans->box.height,
                          ptrans->box.depth, mapped, res_level->stride,
                          res_level->layer_stride, ptrans->box.x,
                          ptrans->box.y, ptrans->box.z);
         } else {
            /* TODO supertiling */
            BUG("unsupported tiling %i for reading", rsc->layout);
         }
      }

      return trans->staging;
   }

fail:
   etna_bo_cpu_fini(rsc->bo);
fail_prep:
   etna_transfer_unmap(pctx, ptrans);
   return NULL;
}
Exemple #8
0
/* Allocate 2D texture or render target resource
 */
static struct pipe_resource * etna_screen_resource_create(struct pipe_screen *screen,
                                         const struct pipe_resource *templat)
{
    struct etna_screen *priv = etna_screen(screen);
    assert(templat);

    /* Check input */
    if(templat->target == PIPE_TEXTURE_CUBE)
    {
        assert(templat->array_size == 6);
    } else if (templat->target == PIPE_BUFFER)
    {
        assert(templat->format == PIPE_FORMAT_R8_UNORM); /* bytes; want TYPELESS or similar */
        assert(templat->array_size == 1);
        assert(templat->height0 == 1);
        assert(templat->depth0 == 1);
        assert(templat->array_size == 1);
        assert(templat->last_level == 0);
    } else
    {
        assert(templat->array_size == 1);
    }
    assert(templat->width0 != 0);
    assert(templat->height0 != 0);
    assert(templat->depth0 != 0);
    assert(templat->array_size != 0);

    /* Figure out what tiling to use -- for now, assume that textures cannot be supertiled, and cannot be linear.
     * There is a feature flag SUPERTILED_TEXTURE (not supported on any known hw) that may allow this, as well
     * as LINEAR_TEXTURE_SUPPORT (supported on gc880 and gc2000 at least), but not sure how it works.
     * Buffers always have LINEAR layout.
     */
    unsigned layout = ETNA_LAYOUT_LINEAR;
    if(templat->target != PIPE_BUFFER)
    {
        if(!(templat->bind & PIPE_BIND_SAMPLER_VIEW) && priv->specs.can_supertile &&
                !DBG_ENABLED(ETNA_DBG_NO_SUPERTILE))
            layout = ETNA_LAYOUT_SUPER_TILED;
        else
            layout = ETNA_LAYOUT_TILED;
    }
    /* XXX multi tiled formats */

    /* Determine scaling for antialiasing, allow override using debug flag */
    int nr_samples = templat->nr_samples;
    if((templat->bind & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DEPTH_STENCIL)) &&
       !(templat->bind & PIPE_BIND_SAMPLER_VIEW))
    {
        if(DBG_ENABLED(ETNA_DBG_MSAA_2X))
            nr_samples = 2;
        if(DBG_ENABLED(ETNA_DBG_MSAA_4X))
            nr_samples = 4;
    }
    int msaa_xscale = 1, msaa_yscale = 1;
    if(!translate_samples_to_xyscale(nr_samples, &msaa_xscale, &msaa_yscale, NULL))
    {
        /* Number of samples not supported */
        return NULL;
    }

    /* Determine needed padding (alignment of height/width) */
    unsigned paddingX = 0, paddingY = 0;
    unsigned halign = TEXTURE_HALIGN_FOUR;
    etna_layout_multiple(layout,
            priv->dev->chip.pixel_pipes,
            (templat->bind & PIPE_BIND_SAMPLER_VIEW) && !VIV_FEATURE(priv->dev, chipMinorFeatures1, TEXTURE_HALIGN),
            &paddingX, &paddingY, &halign);
    assert(paddingX && paddingY);

    /* compute mipmap level sizes and offsets */
    struct etna_resource *resource = CALLOC_STRUCT(etna_resource);
    int max_mip_level = templat->last_level;
    if(unlikely(max_mip_level >= ETNA_NUM_LOD)) /* max LOD supported by hw */
        max_mip_level = ETNA_NUM_LOD - 1;

    unsigned ix = 0;
    unsigned x = templat->width0, y = templat->height0;
    unsigned offset = 0;
    while(true)
    {
        struct etna_resource_level *mip = &resource->levels[ix];
        mip->width = x;
        mip->height = y;
        mip->padded_width = align(x * msaa_xscale, paddingX);
        mip->padded_height = align(y * msaa_yscale, paddingY);
        mip->stride = util_format_get_stride(templat->format, mip->padded_width);
        mip->offset = offset;
        mip->layer_stride = mip->stride * util_format_get_nblocksy(templat->format, mip->padded_height);
        mip->size = templat->array_size * mip->layer_stride;
        offset += align(mip->size, ETNA_PE_ALIGNMENT); /* align mipmaps to 64 bytes to be able to render to them */
        if(ix == max_mip_level || (x == 1 && y == 1))
            break; // stop at last level
        x = u_minify(x, 1);
        y = u_minify(y, 1);
        ix += 1;
    }

    /* determine memory type */
    uint32_t flags = 0; /* XXX DRM_ETNA_GEM_CACHE_xxx */
    enum viv_surf_type memtype = VIV_SURF_UNKNOWN;
    if(templat->bind & PIPE_BIND_SAMPLER_VIEW)
        flags |= DRM_ETNA_GEM_TYPE_TEX;
    else if(templat->bind & PIPE_BIND_RENDER_TARGET)
        flags |= DRM_ETNA_GEM_TYPE_RT;
    else if(templat->bind & PIPE_BIND_DEPTH_STENCIL)
        flags |= DRM_ETNA_GEM_TYPE_ZS;
    else if(templat->bind & PIPE_BIND_INDEX_BUFFER)
        flags |= DRM_ETNA_GEM_TYPE_IDX;
    else if(templat->bind & PIPE_BIND_VERTEX_BUFFER)
        flags |= DRM_ETNA_GEM_TYPE_VTX;
    DBG_F(ETNA_DBG_RESOURCE_MSGS, "%p: Allocate surface of %ix%i (padded to %ix%i), %i layers, of format %s, size %08x flags %08x, memtype %i",
            resource,
            templat->width0, templat->height0, resource->levels[0].padded_width, resource->levels[0].padded_height, templat->array_size, util_format_name(templat->format),
            offset, templat->bind, memtype);

    struct etna_bo *bo = 0;
    if(unlikely((bo = etna_bo_new(priv->dev, offset, flags)) == NULL))
    {
        BUG("Problem allocating video memory for resource");
        return NULL;
    }

    resource->base = *templat;
    resource->base.last_level = ix; /* real last mipmap level */
    resource->base.screen = screen;
    resource->base.nr_samples = nr_samples;
    resource->layout = layout;
    resource->halign = halign;
    resource->bo = bo;
    resource->ts_bo = 0; /* TS is only created when first bound to surface */
    pipe_reference_init(&resource->base.reference, 1);

    if(DBG_ENABLED(ETNA_DBG_ZERO))
    {
        void *map = etna_bo_map(bo);
        memset(map, 0, offset);
    }

    return &resource->base;
}
Exemple #9
0
int main(int argc, char **argv)
{
    int width, height;
    int width_s, height_s;
    int padded_width, padded_height;
    int backbuffer = 0;
    int supersample_x = 2; // 1 or 2
    int supersample_y = 2; // 1 or 2
    int extra_ps_inputs = 0;

    struct fbdemos_scaffold *fbs = 0;
    fbdemo_init(&fbs);
    width = fbs->width;
    height = fbs->height;
    struct viv_conn *conn = fbs->conn;

    bool supertiled = VIV_FEATURE(conn, chipMinorFeatures0,SUPER_TILED);
    unsigned bits_per_tile = VIV_FEATURE(conn, chipMinorFeatures0,2BITPERTILE)?2:4;
    printf("supertiled: %i bits per tile: %i\n", supertiled, bits_per_tile);

    width_s = width * supersample_x;
    height_s = height * supersample_y;
    padded_width = etna_align_up(width_s, 64);
    padded_height = etna_align_up(height_s, 64);
    extra_ps_inputs = (supersample_x > 1 || supersample_y > 1); // in case of supersample, there is an extra input to PS

    printf("padded_width %i padded_height %i\n", padded_width, padded_height);

    struct etna_bo *rt = 0; /* main render target */
    struct etna_bo *rt_ts = 0; /* tile status for main render target */
    struct etna_bo *z = 0; /* depth for main render target */
    struct etna_bo *z_ts = 0; /* depth ts for main render target */
    struct etna_bo *vtx = 0; /* vertex buffer */
    struct etna_bo *idx = 0; /* index buffer */
    struct etna_bo *aux_rt = 0; /* auxilary render target */
    struct etna_bo *aux_rt_ts = 0; /* tile status for auxilary render target */
    struct etna_bo *tex = 0; /* texture */
    struct etna_bo *bmp = 0; /* bitmap */

    /* TODO: anti aliasing (doubles width/height) */
    size_t rt_size = padded_width * padded_height * 4;
    size_t rt_ts_size = etna_align_up((padded_width * padded_height * 4)*bits_per_tile/0x80, 0x100);
    size_t z_size = padded_width * padded_height * 2;
    size_t z_ts_size = etna_align_up((padded_width * padded_height * 2)*bits_per_tile/0x80, 0x100);
    size_t bmp_size = width * height * 4;

    if((rt=etna_bo_new(conn, rt_size, DRM_ETNA_GEM_TYPE_RT))==NULL ||
       (rt_ts=etna_bo_new(conn, rt_ts_size, DRM_ETNA_GEM_TYPE_TS))==NULL ||
       (z=etna_bo_new(conn, z_size, DRM_ETNA_GEM_TYPE_ZS))==NULL ||
       (z_ts=etna_bo_new(conn, z_ts_size, DRM_ETNA_GEM_TYPE_TS))==NULL ||
       (vtx=etna_bo_new(conn, VERTEX_BUFFER_SIZE, DRM_ETNA_GEM_TYPE_VTX))==NULL ||
       (idx=etna_bo_new(conn, INDEX_BUFFER_SIZE, DRM_ETNA_GEM_TYPE_IDX))==NULL ||
       (aux_rt=etna_bo_new(conn, 0x4000, DRM_ETNA_GEM_TYPE_RT))==NULL ||
       (aux_rt_ts=etna_bo_new(conn, 0x100, DRM_ETNA_GEM_TYPE_TS))==NULL ||
       (tex=etna_bo_new(conn, 0x100000, DRM_ETNA_GEM_TYPE_TEX))==NULL ||
       (bmp=etna_bo_new(conn, bmp_size, DRM_ETNA_GEM_TYPE_BMP))==NULL
       )
    {
        fprintf(stderr, "Error allocating video memory\n");
        exit(1);
    }

    /* Phew, now we got all the memory we need.
     * Write interleaved attribute vertex stream.
     * Unlike the GL example we only do this once, not every time glDrawArrays is called, the same would be accomplished
     * from GL by using a vertex buffer object.
     */
    memset(etna_bo_map(vtx), 0, VERTEX_BUFFER_SIZE);
#ifndef INDEXED
    printf("Interleaving vertices...\n");
    float *vertices_array = companion_vertices_array();
    float *texture_coordinates_array =
            companion_texture_coordinates_array();
    float *normals_array = companion_normals_array();
    assert(COMPANION_ARRAY_COUNT*(3+3+2)*sizeof(float) < VERTEX_BUFFER_SIZE);
    float *vtx_map = (float*)etna_bo_map(vtx);
    for(int vert=0; vert<COMPANION_ARRAY_COUNT; ++vert)
    {
        int dest_idx = vert * (3 + 3 + 2);
        for(int comp=0; comp<3; ++comp)
            vtx_map[dest_idx+comp+0] = vertices_array[vert*3 + comp]; /* 0 */
        for(int comp=0; comp<3; ++comp)
            vtx_map[dest_idx+comp+3] = normals_array[vert*3 + comp]; /* 1 */
        for(int comp=0; comp<2; ++comp)
            vtx_map[dest_idx+comp+6] = texture_coordinates_array[vert*2 + comp]; /* 2 */
    }
#else
    printf("Interleaving vertices and copying index buffer...\n");
    assert(COMPANION_VERTEX_COUNT*(3+3+2)*sizeof(float) < VERTEX_BUFFER_SIZE);
    float *vtx_map = (float*)etna_bo_map(vtx);
    for(int vert=0; vert<COMPANION_VERTEX_COUNT; ++vert)
    {
        int dest_idx = vert * (3 + 3 + 2);
        for(int comp=0; comp<3; ++comp)
            vtx_map[dest_idx+comp+0] = companion_vertices[vert][comp]; /* 0 */
        for(int comp=0; comp<3; ++comp)
            vtx_map[dest_idx+comp+3] = companion_normals[vert][comp]; /* 1 */
        for(int comp=0; comp<2; ++comp)
            vtx_map[dest_idx+comp+6] = companion_texture_coordinates[vert][comp]; /* 2 */
    }
    assert(COMPANION_TRIANGLE_COUNT*3*sizeof(unsigned short) < INDEX_BUFFER_SIZE);
    memcpy(etna_bo_map(idx), &companion_triangles[0][0], COMPANION_TRIANGLE_COUNT*3*sizeof(unsigned short));
#endif
    /* Fill in texture (convert from RGB linear to tiled) */
#define TILE_WIDTH (4)
#define TILE_HEIGHT (4)
#define TILE_WORDS (TILE_WIDTH*TILE_HEIGHT)
    unsigned ytiles = COMPANION_TEXTURE_HEIGHT / TILE_HEIGHT;
    unsigned xtiles = COMPANION_TEXTURE_WIDTH / TILE_WIDTH;
    unsigned dst_stride = xtiles * TILE_WORDS;
    uint32_t *tex_map = (uint32_t*)etna_bo_map(tex);

    for(unsigned ty=0; ty<ytiles; ++ty)
    {
        for(unsigned tx=0; tx<xtiles; ++tx)
        {
            unsigned ofs = ty * dst_stride + tx * TILE_WORDS;
            for(unsigned y=0; y<TILE_HEIGHT; ++y)
            {
                for(unsigned x=0; x<TILE_WIDTH; ++x)
                {
                    unsigned srcy = ty*TILE_HEIGHT + y;
                    unsigned srcx = tx*TILE_WIDTH + x;
                    unsigned src_ofs = (srcy*COMPANION_TEXTURE_WIDTH+srcx)*3;
                    unsigned r,g,b,a;
#ifndef TEST_PATTERN /* actual texture */
                    r = ((uint8_t*)companion_texture)[src_ofs+0];
                    g = ((uint8_t*)companion_texture)[src_ofs+1];
                    b = ((uint8_t*)companion_texture)[src_ofs+2];
#else /* test pattern */
                    r = srcx; g = srcy; b = 0;
#endif
                    a = 255;

                    tex_map[ofs] = ((a&0xFF) << 24) | ((b&0xFF) << 16) | ((g&0xFF) << 8) | (r&0xFF);
                    ofs += 1;
                }
            }
        }
    }

    struct etna_ctx *ctx = 0;
    if(etna_create(conn, &ctx) != ETNA_OK)
    {
        printf("Unable to create context\n");
        exit(1);
    }

    for(int frame=0; frame<1000; ++frame)
    {
        printf("*** FRAME %i ****\n", frame);
        /* XXX part of this can be put outside the loop, but until we have usable context management
         * this is safest.
         */
        etna_set_state(ctx, VIVS_GL_VERTEX_ELEMENT_CONFIG, 0x1);
        etna_set_state(ctx, VIVS_RA_CONTROL, 0x1);
        etna_set_state(ctx, VIVS_PA_W_CLIP_LIMIT, 0x34000001);
        etna_set_state(ctx, VIVS_PA_SYSTEM_MODE, 0x11);
        etna_set_state(ctx, VIVS_PA_CONFIG, ETNA_MASKED_BIT(VIVS_PA_CONFIG_UNK22, 0));
        etna_set_state(ctx, VIVS_SE_CONFIG, 0x0);
        etna_set_state(ctx, VIVS_GL_FLUSH_CACHE, VIVS_GL_FLUSH_CACHE_COLOR);

        etna_set_state(ctx, VIVS_PE_ALPHA_CONFIG,
                ETNA_MASKED_BIT(VIVS_PE_ALPHA_CONFIG_BLEND_ENABLE_COLOR, 0) &
                ETNA_MASKED_BIT(VIVS_PE_ALPHA_CONFIG_BLEND_SEPARATE_ALPHA, 0) &
                ETNA_MASKED(VIVS_PE_ALPHA_CONFIG_SRC_FUNC_COLOR, BLEND_FUNC_ONE) &
                ETNA_MASKED(VIVS_PE_ALPHA_CONFIG_SRC_FUNC_ALPHA, BLEND_FUNC_ONE) &
                ETNA_MASKED(VIVS_PE_ALPHA_CONFIG_DST_FUNC_COLOR, BLEND_FUNC_ZERO) &
                ETNA_MASKED(VIVS_PE_ALPHA_CONFIG_DST_FUNC_ALPHA, BLEND_FUNC_ZERO) &
                ETNA_MASKED(VIVS_PE_ALPHA_CONFIG_EQ_COLOR, BLEND_EQ_ADD) &
                ETNA_MASKED(VIVS_PE_ALPHA_CONFIG_EQ_ALPHA, BLEND_EQ_ADD));
        etna_set_state(ctx, VIVS_PE_ALPHA_BLEND_COLOR,
                VIVS_PE_ALPHA_BLEND_COLOR_B(0) |
                VIVS_PE_ALPHA_BLEND_COLOR_G(0) |
                VIVS_PE_ALPHA_BLEND_COLOR_R(0) |
                VIVS_PE_ALPHA_BLEND_COLOR_A(0));
        etna_set_state(ctx, VIVS_PE_ALPHA_OP, ETNA_MASKED_BIT(VIVS_PE_ALPHA_OP_ALPHA_TEST, 0));
        etna_set_state(ctx, VIVS_PA_CONFIG, ETNA_MASKED_INL(VIVS_PA_CONFIG_CULL_FACE_MODE, CCW));
        etna_set_state(ctx, VIVS_PE_STENCIL_CONFIG, ETNA_MASKED(VIVS_PE_STENCIL_CONFIG_REF_FRONT, 0) &
                                                    ETNA_MASKED(VIVS_PE_STENCIL_CONFIG_MASK_FRONT, 0xff) &
                                                    ETNA_MASKED(VIVS_PE_STENCIL_CONFIG_WRITE_MASK, 0xff) &
                                                    ETNA_MASKED_INL(VIVS_PE_STENCIL_CONFIG_MODE, DISABLED));
        etna_set_state(ctx, VIVS_PE_STENCIL_OP, ETNA_MASKED(VIVS_PE_STENCIL_OP_FUNC_FRONT, COMPARE_FUNC_ALWAYS) &
                                                ETNA_MASKED(VIVS_PE_STENCIL_OP_FUNC_BACK, COMPARE_FUNC_ALWAYS) &
                                                ETNA_MASKED(VIVS_PE_STENCIL_OP_FAIL_FRONT, STENCIL_OP_KEEP) &
                                                ETNA_MASKED(VIVS_PE_STENCIL_OP_FAIL_BACK, STENCIL_OP_KEEP) &
                                                ETNA_MASKED(VIVS_PE_STENCIL_OP_DEPTH_FAIL_FRONT, STENCIL_OP_KEEP) &
                                                ETNA_MASKED(VIVS_PE_STENCIL_OP_DEPTH_FAIL_BACK, STENCIL_OP_KEEP) &
                                                ETNA_MASKED(VIVS_PE_STENCIL_OP_PASS_FRONT, STENCIL_OP_KEEP) &
                                                ETNA_MASKED(VIVS_PE_STENCIL_OP_PASS_BACK, STENCIL_OP_KEEP));

        etna_set_state(ctx, VIVS_SE_DEPTH_SCALE, 0x0);
        etna_set_state(ctx, VIVS_SE_DEPTH_BIAS, 0x0);

        etna_set_state(ctx, VIVS_PA_CONFIG, ETNA_MASKED_INL(VIVS_PA_CONFIG_FILL_MODE, SOLID) &
                                            ETNA_MASKED_INL(VIVS_PA_CONFIG_SHADE_MODEL, SMOOTH));

        etna_set_state(ctx, VIVS_PE_COLOR_FORMAT,
                ETNA_MASKED_BIT(VIVS_PE_COLOR_FORMAT_OVERWRITE, 0) &
                ETNA_MASKED(VIVS_PE_COLOR_FORMAT_COMPONENTS, 0xf) &
                ETNA_MASKED(VIVS_PE_COLOR_FORMAT_FORMAT, RS_FORMAT_X8R8G8B8) &
                ETNA_MASKED_BIT(VIVS_PE_COLOR_FORMAT_SUPER_TILED, supertiled));

        etna_set_state(ctx, VIVS_PE_COLOR_ADDR, etna_bo_gpu_address(rt));
        etna_set_state(ctx, VIVS_PE_COLOR_STRIDE, padded_width * 4);

        uint32_t ts_msaa_config;
        if(supersample_x == 2 && supersample_y == 2)
        {
            // 4X MSAA
            etna_set_state(ctx, VIVS_GL_MULTI_SAMPLE_CONFIG,
                    ETNA_MASKED_INL(VIVS_GL_MULTI_SAMPLE_CONFIG_MSAA_SAMPLES, 4X) &
                    ETNA_MASKED(VIVS_GL_MULTI_SAMPLE_CONFIG_MSAA_ENABLES, 0xf) &
                    ETNA_MASKED(VIVS_GL_MULTI_SAMPLE_CONFIG_UNK12, 0x0) &
                    ETNA_MASKED(VIVS_GL_MULTI_SAMPLE_CONFIG_UNK16, 0x0)
                    );
            etna_set_state(ctx, VIVS_RA_MULTISAMPLE_UNK00E04, 0x0);
            etna_set_state(ctx, VIVS_RA_MULTISAMPLE_UNK00E10(2), 0xaaa22a22);
            etna_set_state(ctx, VIVS_RA_CENTROID_TABLE(8), 0x262a2288);
            etna_set_state(ctx, VIVS_RA_CENTROID_TABLE(9), 0x886688a2);
            etna_set_state(ctx, VIVS_RA_CENTROID_TABLE(10), 0x888866aa);
            etna_set_state(ctx, VIVS_RA_CENTROID_TABLE(11), 0x668888a6);
            etna_set_state(ctx, VIVS_RA_MULTISAMPLE_UNK00E10(1), 0xe6ae622a);
            etna_set_state(ctx, VIVS_RA_CENTROID_TABLE(4), 0x46622a88);
            etna_set_state(ctx, VIVS_RA_CENTROID_TABLE(5), 0x888888ae);
            etna_set_state(ctx, VIVS_RA_CENTROID_TABLE(6), 0x888888e6);
            etna_set_state(ctx, VIVS_RA_CENTROID_TABLE(7), 0x888888ca);
            etna_set_state(ctx, VIVS_RA_MULTISAMPLE_UNK00E10(0), 0xeaa26e26);
            etna_set_state(ctx, VIVS_RA_CENTROID_TABLE(0), 0x4a6e2688);
            etna_set_state(ctx, VIVS_RA_CENTROID_TABLE(1), 0x888888a2);
            etna_set_state(ctx, VIVS_RA_CENTROID_TABLE(2), 0x888888ea);
            etna_set_state(ctx, VIVS_RA_CENTROID_TABLE(3), 0x888888c6);
            ts_msaa_config = VIVS_TS_MEM_CONFIG_MSAA | VIVS_TS_MEM_CONFIG_MSAA_FORMAT_A8R8G8B8;
        } else if(supersample_x == 2 && supersample_y == 1)
        {
            // 2X MSAA
            etna_set_state(ctx, VIVS_GL_MULTI_SAMPLE_CONFIG,
                    ETNA_MASKED_INL(VIVS_GL_MULTI_SAMPLE_CONFIG_MSAA_SAMPLES, 2X) &
                    ETNA_MASKED(VIVS_GL_MULTI_SAMPLE_CONFIG_MSAA_ENABLES, 0xf) &
                    ETNA_MASKED(VIVS_GL_MULTI_SAMPLE_CONFIG_UNK12, 0x0) &
                    ETNA_MASKED(VIVS_GL_MULTI_SAMPLE_CONFIG_UNK16, 0x0)
                    );
            etna_set_state(ctx, VIVS_RA_MULTISAMPLE_UNK00E04, 0x0);
            etna_set_state(ctx, VIVS_RA_MULTISAMPLE_UNK00E10(0), 0xaa22);
            etna_set_state(ctx, VIVS_RA_CENTROID_TABLE(0), 0x66aa2288);
            etna_set_state(ctx, VIVS_RA_CENTROID_TABLE(1), 0x88558800);
            etna_set_state(ctx, VIVS_RA_CENTROID_TABLE(2), 0x88881100);
            etna_set_state(ctx, VIVS_RA_CENTROID_TABLE(3), 0x33888800);
            ts_msaa_config = VIVS_TS_MEM_CONFIG_MSAA | VIVS_TS_MEM_CONFIG_MSAA_FORMAT_A8R8G8B8;
        } else { // No multisampling
            etna_set_state(ctx, VIVS_GL_MULTI_SAMPLE_CONFIG,
                    ETNA_MASKED_INL(VIVS_GL_MULTI_SAMPLE_CONFIG_MSAA_SAMPLES, NONE) &
                    ETNA_MASKED(VIVS_GL_MULTI_SAMPLE_CONFIG_MSAA_ENABLES, 0xf) &
                    ETNA_MASKED(VIVS_GL_MULTI_SAMPLE_CONFIG_UNK12, 0x0) &
                    ETNA_MASKED(VIVS_GL_MULTI_SAMPLE_CONFIG_UNK16, 0x0)
                    );
            ts_msaa_config = 0;
        }

        etna_set_state(ctx, VIVS_GL_FLUSH_CACHE, VIVS_GL_FLUSH_CACHE_COLOR);

        etna_set_state(ctx, VIVS_TS_COLOR_CLEAR_VALUE, 0);
        etna_set_state(ctx, VIVS_TS_COLOR_STATUS_BASE, etna_bo_gpu_address(rt_ts));
        etna_set_state(ctx, VIVS_TS_COLOR_SURFACE_BASE, etna_bo_gpu_address(rt));

        etna_set_state(ctx, VIVS_PE_DEPTH_CONFIG,
                ETNA_MASKED_BIT(VIVS_PE_DEPTH_CONFIG_WRITE_ENABLE, 0) &
                ETNA_MASKED_INL(VIVS_PE_DEPTH_CONFIG_DEPTH_FORMAT, D16) &
                ETNA_MASKED_BIT(VIVS_PE_DEPTH_CONFIG_SUPER_TILED, supertiled) &
                ETNA_MASKED_BIT(VIVS_PE_DEPTH_CONFIG_EARLY_Z, 1));
        etna_set_state(ctx, VIVS_PE_DEPTH_ADDR, etna_bo_gpu_address(z));
        etna_set_state(ctx, VIVS_PE_DEPTH_STRIDE, padded_width * 2);
        etna_set_state(ctx, VIVS_PE_HDEPTH_CONTROL, VIVS_PE_HDEPTH_CONTROL_FORMAT_DISABLED);
        etna_set_state_f32(ctx, VIVS_PE_DEPTH_NORMALIZE, 65535.0);
        etna_set_state(ctx, VIVS_GL_FLUSH_CACHE, VIVS_GL_FLUSH_CACHE_DEPTH);

        etna_set_state(ctx, VIVS_TS_DEPTH_CLEAR_VALUE, 0xffffffff);
        etna_set_state(ctx, VIVS_TS_DEPTH_STATUS_BASE, etna_bo_gpu_address(z_ts));
        etna_set_state(ctx, VIVS_TS_DEPTH_SURFACE_BASE, etna_bo_gpu_address(z));
        etna_set_state(ctx, VIVS_TS_MEM_CONFIG,
                VIVS_TS_MEM_CONFIG_DEPTH_FAST_CLEAR |
                VIVS_TS_MEM_CONFIG_COLOR_FAST_CLEAR |
                VIVS_TS_MEM_CONFIG_DEPTH_16BPP |
                VIVS_TS_MEM_CONFIG_DEPTH_COMPRESSION |
                ts_msaa_config);

#ifdef EXTRA_DELAYS
        /* Warm up RS on aux render target (is this needed?) */
        etna_set_state(ctx, VIVS_GL_FLUSH_CACHE, VIVS_GL_FLUSH_CACHE_COLOR | VIVS_GL_FLUSH_CACHE_DEPTH);
        etna_warm_up_rs(ctx, etna_bo_gpu_address(aux_rt), etna_bo_gpu_address(aux_rt_ts));

        etna_set_state(ctx, VIVS_TS_COLOR_STATUS_BASE, rt_ts_physical);
        etna_set_state(ctx, VIVS_TS_COLOR_SURFACE_BASE, rt_physical);

        etna_set_state(ctx, VIVS_GL_FLUSH_CACHE, VIVS_GL_FLUSH_CACHE_COLOR | VIVS_GL_FLUSH_CACHE_DEPTH);
        etna_warm_up_rs(ctx, aux_rt_physical, aux_rt_ts_physical);

        etna_set_state(ctx, VIVS_TS_COLOR_STATUS_BASE, rt_ts_physical);
        etna_set_state(ctx, VIVS_TS_COLOR_SURFACE_BASE, rt_physical);
        etna_set_state(ctx, VIVS_GL_FLUSH_CACHE, VIVS_GL_FLUSH_CACHE_COLOR | VIVS_GL_FLUSH_CACHE_DEPTH);

        etna_set_state(ctx, VIVS_GL_FLUSH_CACHE, VIVS_GL_FLUSH_CACHE_COLOR | VIVS_GL_FLUSH_CACHE_DEPTH);
        etna_warm_up_rs(ctx, aux_rt_physical, aux_rt_ts_physical);
        etna_set_state(ctx, VIVS_TS_COLOR_STATUS_BASE, rt_ts_physical);
        etna_set_state(ctx, VIVS_TS_COLOR_SURFACE_BASE, rt_physical);
#endif
        /* sync rasterizer to pixel engine after changes to PE config */
        etna_stall(ctx, SYNC_RECIPIENT_RA, SYNC_RECIPIENT_PE);

        /* Set up the resolve to clear tile status for main render target and depth
         * Regard the TS as an image of width 16 with 4 bytes per pixel (64 bytes per row)
         * */
        etna_set_state(ctx, VIVS_RS_CONFIG,
                VIVS_RS_CONFIG_SOURCE_FORMAT(RS_FORMAT_A8R8G8B8) |
                VIVS_RS_CONFIG_DEST_FORMAT(RS_FORMAT_A8R8G8B8)
                );
        etna_set_state_multi(ctx, VIVS_RS_DITHER(0), 2, (uint32_t[]){0xffffffff, 0xffffffff});
        etna_set_state(ctx, VIVS_RS_FILL_VALUE(0), (bits_per_tile==4)?0x11111111:0x55555555);
        etna_set_state(ctx, VIVS_RS_CLEAR_CONTROL,
                VIVS_RS_CLEAR_CONTROL_MODE_ENABLED1 |
                (0xffff << VIVS_RS_CLEAR_CONTROL_BITS__SHIFT));
        etna_set_state(ctx, VIVS_RS_EXTRA_CONFIG, 0);
        /*    clear color ts */
        etna_set_state(ctx, VIVS_RS_DEST_ADDR, etna_bo_gpu_address(rt_ts));
        etna_set_state(ctx, VIVS_RS_DEST_STRIDE, 0x40);
        etna_set_state(ctx, VIVS_RS_WINDOW_SIZE,
                ((rt_ts_size/0x40) << VIVS_RS_WINDOW_SIZE_HEIGHT__SHIFT) |
                (16 << VIVS_RS_WINDOW_SIZE_WIDTH__SHIFT));
        etna_set_state(ctx, VIVS_RS_KICKER, 0xbeebbeeb);
        /*    clear depth ts */
        etna_set_state(ctx, VIVS_RS_DEST_ADDR, etna_bo_gpu_address(z_ts));
        etna_set_state(ctx, VIVS_RS_DEST_STRIDE, 0x40);
        etna_set_state(ctx, VIVS_RS_WINDOW_SIZE,
                ((z_ts_size/0x40) << VIVS_RS_WINDOW_SIZE_HEIGHT__SHIFT) |
                (16 << VIVS_RS_WINDOW_SIZE_WIDTH__SHIFT));
        etna_set_state(ctx, VIVS_RS_KICKER, 0xbeebbeeb);
        /** Done */
       
        etna_set_state(ctx, VIVS_TS_COLOR_CLEAR_VALUE, 0xff7f7f7f);
        etna_set_state(ctx, VIVS_GL_FLUSH_CACHE, VIVS_GL_FLUSH_CACHE_COLOR);

        etna_set_state(ctx, VIVS_TS_COLOR_STATUS_BASE, etna_bo_gpu_address(rt_ts));
        etna_set_state(ctx, VIVS_TS_COLOR_SURFACE_BASE, etna_bo_gpu_address(rt));
        etna_set_state(ctx, VIVS_GL_FLUSH_CACHE, VIVS_GL_FLUSH_CACHE_COLOR | VIVS_GL_FLUSH_CACHE_DEPTH);

        /* depth setup */
        etna_set_state(ctx, VIVS_PE_DEPTH_CONFIG, ETNA_MASKED_BIT(VIVS_PE_DEPTH_CONFIG_WRITE_ENABLE, 1) &
                                                  ETNA_MASKED(VIVS_PE_DEPTH_CONFIG_DEPTH_FUNC, COMPARE_FUNC_LESS) &
                                                  ETNA_MASKED_INL(VIVS_PE_DEPTH_CONFIG_DEPTH_MODE, Z) &
                                                  ETNA_MASKED_BIT(VIVS_PE_DEPTH_CONFIG_ONLY_DEPTH, 0));
        etna_set_state_f32(ctx, VIVS_PE_DEPTH_NEAR, 0.0);
        etna_set_state_f32(ctx, VIVS_PE_DEPTH_FAR, 1.0);
        etna_set_state_f32(ctx, VIVS_PE_DEPTH_NORMALIZE, 65535.0);

        /* set up primitive assembly and setup engine */
        etna_set_state_f32(ctx, VIVS_PA_VIEWPORT_OFFSET_Z, 0.0);
        etna_set_state_f32(ctx, VIVS_PA_VIEWPORT_SCALE_Z, 1.0);
        etna_set_state_fixp(ctx, VIVS_PA_VIEWPORT_OFFSET_X, width << 15);
        etna_set_state_fixp(ctx, VIVS_PA_VIEWPORT_OFFSET_Y, height << 15);
        etna_set_state_fixp(ctx, VIVS_PA_VIEWPORT_SCALE_X, width << 15);
        etna_set_state_fixp(ctx, VIVS_PA_VIEWPORT_SCALE_Y, height << 15);
        etna_set_state_fixp(ctx, VIVS_SE_SCISSOR_LEFT, 0);
        etna_set_state_fixp(ctx, VIVS_SE_SCISSOR_TOP, 0);
        etna_set_state_fixp(ctx, VIVS_SE_SCISSOR_RIGHT, (width << 16) | 5);
        etna_set_state_fixp(ctx, VIVS_SE_SCISSOR_BOTTOM, (height << 16) | 5);

        /* set up texture unit */
        etna_set_state(ctx, VIVS_GL_FLUSH_CACHE, VIVS_GL_FLUSH_CACHE_TEXTURE);
        etna_set_state(ctx, VIVS_TE_SAMPLER_SIZE(0),
                VIVS_TE_SAMPLER_SIZE_WIDTH(512)|VIVS_TE_SAMPLER_SIZE_HEIGHT(512));
        etna_set_state(ctx, VIVS_TE_SAMPLER_LOG_SIZE(0),
                VIVS_TE_SAMPLER_LOG_SIZE_WIDTH(9<<5) |
                VIVS_TE_SAMPLER_LOG_SIZE_HEIGHT(9<<5));
        etna_set_state(ctx, VIVS_TE_SAMPLER_LOD_ADDR(0,0), etna_bo_gpu_address(tex));
        etna_set_state(ctx, VIVS_TE_SAMPLER_CONFIG0(0),
                VIVS_TE_SAMPLER_CONFIG0_TYPE(TEXTURE_TYPE_2D)|
                VIVS_TE_SAMPLER_CONFIG0_UWRAP(TEXTURE_WRAPMODE_CLAMP_TO_EDGE)|
                VIVS_TE_SAMPLER_CONFIG0_VWRAP(TEXTURE_WRAPMODE_CLAMP_TO_EDGE)|
                VIVS_TE_SAMPLER_CONFIG0_MIN(TEXTURE_FILTER_LINEAR)|
                VIVS_TE_SAMPLER_CONFIG0_MIP(TEXTURE_FILTER_NONE)|
                VIVS_TE_SAMPLER_CONFIG0_MAG(TEXTURE_FILTER_LINEAR)|
                VIVS_TE_SAMPLER_CONFIG0_FORMAT(TEXTURE_FORMAT_X8R8G8B8));
        etna_set_state(ctx, VIVS_TE_SAMPLER_LOD_CONFIG(0), 0x00000000); /*   TE.SAMPLER[0].LOD_CONFIG := 0x0 */

        /* shader setup */
        etna_set_state(ctx, VIVS_VS_END_PC, vs_size/16);
        etna_set_state_multi(ctx, VIVS_VS_INPUT_COUNT, 3, (uint32_t[]){
                /* VIVS_VS_INPUT_COUNT */ VIVS_VS_INPUT_COUNT_UNK8(1) | VIVS_VS_INPUT_COUNT_COUNT(3),
                /* VIVS_VS_TEMP_REGISTER_CONTROL */ VIVS_VS_TEMP_REGISTER_CONTROL_NUM_TEMPS(6),
                /* VIVS_VS_OUTPUT(0) */ 0x10004});
        etna_set_state(ctx, VIVS_VS_START_PC, 0x0);
        etna_set_state_f32(ctx, VIVS_VS_UNIFORMS(45), 0.5); /* u11.y */
        etna_set_state_f32(ctx, VIVS_VS_UNIFORMS(44), 1.0); /* u11.x */
        etna_set_state_f32(ctx, VIVS_VS_UNIFORMS(27), 0.0); /* u6.w */
        etna_set_state_f32(ctx, VIVS_VS_UNIFORMS(23), 20.0); /* u5.w */
        etna_set_state_f32(ctx, VIVS_VS_UNIFORMS(19), 2.0); /* u4.w */

        /* Now load the shader itself */
        etna_set_state_multi(ctx, VIVS_VS_INST_MEM(0), vs_size/4, vs);
        etna_set_state(ctx, VIVS_RA_CONTROL, 0x3);
        etna_set_state_f32(ctx, VIVS_PS_UNIFORMS(0), 1.0); /* u0.x */
        etna_set_state_multi(ctx, VIVS_PS_END_PC, 2, (uint32_t[]){
                /* VIVS_PS_END_PC */ ps_size/16,
                /* VIVS_PS_OUTPUT_REG */ 0x1});
        etna_set_state(ctx, VIVS_PS_START_PC, 0x0);
        etna_set_state(ctx, VIVS_PA_ATTRIBUTE_ELEMENT_COUNT, 0x200);
        etna_set_state(ctx, VIVS_PA_SHADER_ATTRIBUTES(0), 0x200);
        etna_set_state(ctx, VIVS_PA_SHADER_ATTRIBUTES(1), 0x200);
        etna_set_state(ctx, VIVS_GL_VARYING_NUM_COMPONENTS, 
                VIVS_GL_VARYING_NUM_COMPONENTS_VAR0(4)| /* position */
                VIVS_GL_VARYING_NUM_COMPONENTS_VAR1(2)  /* texture coordinate */
                );
        etna_set_state_multi(ctx, VIVS_GL_VARYING_COMPONENT_USE(0), 2, (uint32_t[]){
                VIVS_GL_VARYING_COMPONENT_USE_COMP0(VARYING_COMPONENT_USE_USED) |
                VIVS_GL_VARYING_COMPONENT_USE_COMP1(VARYING_COMPONENT_USE_USED) |
                VIVS_GL_VARYING_COMPONENT_USE_COMP2(VARYING_COMPONENT_USE_USED) |
                VIVS_GL_VARYING_COMPONENT_USE_COMP3(VARYING_COMPONENT_USE_USED) |
                VIVS_GL_VARYING_COMPONENT_USE_COMP4(VARYING_COMPONENT_USE_USED) |
                VIVS_GL_VARYING_COMPONENT_USE_COMP5(VARYING_COMPONENT_USE_USED)
                , 0
                });
        etna_set_state_multi(ctx, VIVS_PS_INST_MEM(0), ps_size/4, ps);
        etna_set_state(ctx, VIVS_PS_INPUT_COUNT,
                VIVS_PS_INPUT_COUNT_UNK8(31)| VIVS_PS_INPUT_COUNT_COUNT(3 + extra_ps_inputs));
        etna_set_state(ctx, VIVS_PS_TEMP_REGISTER_CONTROL,
                VIVS_PS_TEMP_REGISTER_CONTROL_NUM_TEMPS(3 + extra_ps_inputs));
        etna_set_state(ctx, VIVS_PS_CONTROL, VIVS_PS_CONTROL_UNK1);
        etna_set_state(ctx, VIVS_GL_VARYING_TOTAL_COMPONENTS,
                VIVS_GL_VARYING_TOTAL_COMPONENTS_NUM(6)); /* 4+2=6 total varying components */
        etna_set_state(ctx, VIVS_VS_LOAD_BALANCING, 0xf3f0542); /* depends on number of inputs/outputs/varyings? XXX how exactly */
        etna_set_state(ctx, VIVS_VS_OUTPUT_COUNT, 3);
        etna_set_state(ctx, VIVS_PA_CONFIG, ETNA_MASKED_BIT(VIVS_PA_CONFIG_POINT_SIZE_ENABLE, 0));

        /*   Compute transform matrices in the same way as cube egl demo */
        ESMatrix modelview;
        esMatrixLoadIdentity(&modelview);
        esTranslate(&modelview, 0.0f, 0.0f, -9.0f);
        esRotate(&modelview, 45.0f, 1.0f, 0.0f, 0.0f);
        esRotate(&modelview, 45.0f, 0.0f, 1.0f, 0.0f);
        esRotate(&modelview, frame*0.5f, 0.0f, 0.0f, 1.0f);
	esScale(&modelview, 0.475f, 0.475f, 0.475f);
        float aspect = (float)(height) / (float)(width);
        ESMatrix projection;
        esMatrixLoadIdentity(&projection);
        esFrustum(&projection, -2.8f, +2.8f, -2.8f * aspect, +2.8f * aspect, 6.0f, 10.0f);
        ESMatrix modelviewprojection;
        esMatrixLoadIdentity(&modelviewprojection);
        esMatrixMultiply(&modelviewprojection, &modelview, &projection);
        ESMatrix inverse, normal; /* compute inverse transpose normal transformation matrix */
        esMatrixInverse3x3(&inverse, &modelview);
        esMatrixTranspose(&normal, &inverse);
       
        etna_set_state_multi(ctx, VIVS_VS_UNIFORMS(0), 16, (uint32_t*)&modelviewprojection.m[0][0]);
        etna_set_state_multi(ctx, VIVS_VS_UNIFORMS(16), 3, (uint32_t*)&normal.m[0][0]); /* u4.xyz */
        etna_set_state_multi(ctx, VIVS_VS_UNIFORMS(20), 3, (uint32_t*)&normal.m[1][0]); /* u5.xyz */
        etna_set_state_multi(ctx, VIVS_VS_UNIFORMS(24), 3, (uint32_t*)&normal.m[2][0]); /* u6.xyz */
        etna_set_state_multi(ctx, VIVS_VS_UNIFORMS(28), 16, (uint32_t*)&modelview.m[0][0]);
#ifdef INDEXED
        etna_set_state(ctx, VIVS_FE_INDEX_STREAM_BASE_ADDR, etna_bo_gpu_address(idx));
        etna_set_state(ctx, VIVS_FE_INDEX_STREAM_CONTROL, VIVS_FE_INDEX_STREAM_CONTROL_TYPE_UNSIGNED_SHORT);
#endif
        etna_set_state(ctx, VIVS_FE_VERTEX_STREAM_BASE_ADDR, etna_bo_gpu_address(vtx));
        etna_set_state(ctx, VIVS_FE_VERTEX_STREAM_CONTROL,
                FE_VERTEX_STREAM_CONTROL_VERTEX_STRIDE((3 + 3 + 2)*4));
        etna_set_state(ctx, VIVS_FE_VERTEX_ELEMENT_CONFIG(0),
                VIVS_FE_VERTEX_ELEMENT_CONFIG_TYPE_FLOAT |
                (ENDIAN_MODE_NO_SWAP << VIVS_FE_VERTEX_ELEMENT_CONFIG_ENDIAN__SHIFT) |
                VIVS_FE_VERTEX_ELEMENT_CONFIG_STREAM(0) |
                VIVS_FE_VERTEX_ELEMENT_CONFIG_NUM(3) |
                VIVS_FE_VERTEX_ELEMENT_CONFIG_NORMALIZE_OFF |
                VIVS_FE_VERTEX_ELEMENT_CONFIG_START(0x0) |
                VIVS_FE_VERTEX_ELEMENT_CONFIG_END(0xc));
        etna_set_state(ctx, VIVS_FE_VERTEX_ELEMENT_CONFIG(1),
                VIVS_FE_VERTEX_ELEMENT_CONFIG_TYPE_FLOAT |
                (ENDIAN_MODE_NO_SWAP << VIVS_FE_VERTEX_ELEMENT_CONFIG_ENDIAN__SHIFT) |
                VIVS_FE_VERTEX_ELEMENT_CONFIG_STREAM(0) |
                VIVS_FE_VERTEX_ELEMENT_CONFIG_NUM(3) |
                VIVS_FE_VERTEX_ELEMENT_CONFIG_NORMALIZE_OFF |
                VIVS_FE_VERTEX_ELEMENT_CONFIG_START(0xc) |
                VIVS_FE_VERTEX_ELEMENT_CONFIG_END(0x18));
        etna_set_state(ctx, VIVS_FE_VERTEX_ELEMENT_CONFIG(2),
                VIVS_FE_VERTEX_ELEMENT_CONFIG_TYPE_FLOAT |
                (ENDIAN_MODE_NO_SWAP << VIVS_FE_VERTEX_ELEMENT_CONFIG_ENDIAN__SHIFT) |
                VIVS_FE_VERTEX_ELEMENT_CONFIG_NONCONSECUTIVE |
                VIVS_FE_VERTEX_ELEMENT_CONFIG_STREAM(0) |
                VIVS_FE_VERTEX_ELEMENT_CONFIG_NUM(2) |
                VIVS_FE_VERTEX_ELEMENT_CONFIG_NORMALIZE_OFF |
                VIVS_FE_VERTEX_ELEMENT_CONFIG_START(0x18) |
                VIVS_FE_VERTEX_ELEMENT_CONFIG_END(0x20));
        etna_set_state(ctx, VIVS_VS_INPUT(0), 0x20100);
        etna_set_state(ctx, VIVS_PA_CONFIG, ETNA_MASKED_BIT(VIVS_PA_CONFIG_POINT_SPRITE_ENABLE, 0));

#ifdef INDEXED
        etna_draw_indexed_primitives(ctx, PRIMITIVE_TYPE_TRIANGLES, 0, COMPANION_TRIANGLE_COUNT, 0);
#else
        etna_draw_primitives(ctx, PRIMITIVE_TYPE_TRIANGLES, 0, COMPANION_TRIANGLE_COUNT);
#endif
        etna_set_state(ctx, VIVS_GL_FLUSH_CACHE, VIVS_GL_FLUSH_CACHE_COLOR | VIVS_GL_FLUSH_CACHE_DEPTH);
#ifdef EXTRA_DELAYS
        etna_flush(ctx, NULL);
#endif
        etna_set_state(ctx, VIVS_GL_FLUSH_CACHE, VIVS_GL_FLUSH_CACHE_COLOR | VIVS_GL_FLUSH_CACHE_DEPTH);
        etna_set_state(ctx, VIVS_RS_CONFIG,
                VIVS_RS_CONFIG_SOURCE_FORMAT(RS_FORMAT_A8R8G8B8) |
                VIVS_RS_CONFIG_SOURCE_TILED |
                VIVS_RS_CONFIG_DEST_FORMAT(RS_FORMAT_A8R8G8B8) |
                VIVS_RS_CONFIG_DEST_TILED);
        etna_set_state(ctx, VIVS_RS_SOURCE_STRIDE, (padded_width * 4 * 4) | (supertiled?VIVS_RS_SOURCE_STRIDE_TILING:0));
        etna_set_state(ctx, VIVS_RS_DEST_STRIDE, (padded_width * 4 * 4) | (supertiled?VIVS_RS_SOURCE_STRIDE_TILING:0));
        etna_set_state(ctx, VIVS_RS_DITHER(0), 0xffffffff);
        etna_set_state(ctx, VIVS_RS_DITHER(1), 0xffffffff);
        etna_set_state(ctx, VIVS_RS_CLEAR_CONTROL, VIVS_RS_CLEAR_CONTROL_MODE_DISABLED);
        etna_set_state(ctx, VIVS_RS_EXTRA_CONFIG, 0);
        etna_set_state(ctx, VIVS_RS_SOURCE_ADDR, etna_bo_gpu_address(rt));
        etna_set_state(ctx, VIVS_RS_DEST_ADDR, etna_bo_gpu_address(rt));
        etna_set_state(ctx, VIVS_RS_WINDOW_SIZE,
                VIVS_RS_WINDOW_SIZE_HEIGHT(padded_height) |
                VIVS_RS_WINDOW_SIZE_WIDTH(padded_width));
        etna_set_state(ctx, VIVS_RS_KICKER, 0xbeebbeeb);
#ifdef EXTRA_DELAYS
        etna_flush(ctx, NULL);

        etna_warm_up_rs(ctx, etna_bo_gpu_address(aux_rt), etna_bo_gpu_address(aux_rt_ts));
#endif

        etna_set_state(ctx, VIVS_TS_COLOR_STATUS_BASE, etna_bo_gpu_address(rt_ts));
        etna_set_state(ctx, VIVS_TS_COLOR_SURFACE_BASE, etna_bo_gpu_address(rt));
        etna_set_state(ctx, VIVS_GL_FLUSH_CACHE, VIVS_GL_FLUSH_CACHE_COLOR);
        etna_set_state(ctx, VIVS_TS_MEM_CONFIG,
                VIVS_TS_MEM_CONFIG_DEPTH_FAST_CLEAR |
                VIVS_TS_MEM_CONFIG_DEPTH_16BPP |
                VIVS_TS_MEM_CONFIG_DEPTH_COMPRESSION);
        etna_set_state(ctx, VIVS_GL_FLUSH_CACHE, VIVS_GL_FLUSH_CACHE_COLOR);

        /* Wait for pixel engine to finish */
#ifdef EXTRA_DELAYS
        etna_finish(ctx);
#else
        etna_stall(ctx, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
#endif

        /* Copy result to framebuffer */
        etna_set_state(ctx, VIVS_GL_FLUSH_CACHE, VIVS_GL_FLUSH_CACHE_COLOR | VIVS_GL_FLUSH_CACHE_DEPTH);
        etna_set_state(ctx, VIVS_RS_CONFIG,
                VIVS_RS_CONFIG_SOURCE_FORMAT(RS_FORMAT_A8R8G8B8) |
                VIVS_RS_CONFIG_SOURCE_TILED |
                ((supersample_x>1)?VIVS_RS_CONFIG_DOWNSAMPLE_X:0) |
                ((supersample_y>1)?VIVS_RS_CONFIG_DOWNSAMPLE_Y:0) |
                VIVS_RS_CONFIG_DEST_FORMAT(RS_FORMAT_A8R8G8B8) |
                VIVS_RS_CONFIG_SWAP_RB);
        etna_set_state(ctx, VIVS_RS_SOURCE_STRIDE, (padded_width * 4 * 4)  | (supertiled?VIVS_RS_SOURCE_STRIDE_TILING:0));
        etna_set_state(ctx, VIVS_RS_DEST_STRIDE, fbs->fb.fb_fix.line_length);
        etna_set_state(ctx, VIVS_RS_DITHER(0), 0xffffffff);
        etna_set_state(ctx, VIVS_RS_DITHER(1), 0xffffffff);
        etna_set_state(ctx, VIVS_RS_CLEAR_CONTROL, VIVS_RS_CLEAR_CONTROL_MODE_DISABLED);
        etna_set_state(ctx, VIVS_RS_EXTRA_CONFIG, 0);
        etna_set_state(ctx, VIVS_RS_SOURCE_ADDR, etna_bo_gpu_address(rt));
        etna_set_state(ctx, VIVS_RS_DEST_ADDR, etna_bo_gpu_address(fbs->fb.buffer[backbuffer]));
        etna_set_state(ctx, VIVS_RS_WINDOW_SIZE,
                VIVS_RS_WINDOW_SIZE_HEIGHT(height * supersample_y) |
                VIVS_RS_WINDOW_SIZE_WIDTH(width * supersample_x));
        etna_set_state(ctx, VIVS_RS_KICKER, 0xbeebbeeb);
        etna_finish(ctx);

        /* switch buffers */
        fb_set_buffer(&fbs->fb, backbuffer);
        backbuffer = 1-backbuffer;
    }

    etna_free(ctx);
    viv_close(conn);
    return 0;
}
/* Create a new resource object, using the given template info */
struct pipe_resource *
etna_resource_alloc(struct pipe_screen *pscreen, unsigned layout,
                    uint64_t modifier, const struct pipe_resource *templat)
{
   struct etna_screen *screen = etna_screen(pscreen);
   struct etna_resource *rsc;
   unsigned size;

   DBG_F(ETNA_DBG_RESOURCE_MSGS,
         "target=%d, format=%s, %ux%ux%u, array_size=%u, "
         "last_level=%u, nr_samples=%u, usage=%u, bind=%x, flags=%x",
         templat->target, util_format_name(templat->format), templat->width0,
         templat->height0, templat->depth0, templat->array_size,
         templat->last_level, templat->nr_samples, templat->usage,
         templat->bind, templat->flags);

   /* Determine scaling for antialiasing, allow override using debug flag */
   int nr_samples = templat->nr_samples;
   if ((templat->bind & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DEPTH_STENCIL)) &&
       !(templat->bind & PIPE_BIND_SAMPLER_VIEW)) {
      if (DBG_ENABLED(ETNA_DBG_MSAA_2X))
         nr_samples = 2;
      if (DBG_ENABLED(ETNA_DBG_MSAA_4X))
         nr_samples = 4;
   }

   int msaa_xscale = 1, msaa_yscale = 1;
   if (!translate_samples_to_xyscale(nr_samples, &msaa_xscale, &msaa_yscale, NULL)) {
      /* Number of samples not supported */
      return NULL;
   }

   /* Determine needed padding (alignment of height/width) */
   unsigned paddingX = 0, paddingY = 0;
   unsigned halign = TEXTURE_HALIGN_FOUR;
   if (!util_format_is_compressed(templat->format)) {
      /* If we have the TEXTURE_HALIGN feature, we can always align to the
       * resolve engine's width.  If not, we must not align resources used
       * only for textures. If this GPU uses the BLT engine, never do RS align.
       */
      bool rs_align = screen->specs.use_blt ? false : (
                         VIV_FEATURE(screen, chipMinorFeatures1, TEXTURE_HALIGN) ||
                         !etna_resource_sampler_only(templat));
      etna_layout_multiple(layout, screen->specs.pixel_pipes, rs_align, &paddingX,
                           &paddingY, &halign);
      assert(paddingX && paddingY);
   } else {
      /* Compressed textures are padded to their block size, but we don't have
       * to do anything special for that. */
      paddingX = 1;
      paddingY = 1;
   }

   if (!screen->specs.use_blt && templat->target != PIPE_BUFFER)
      etna_adjust_rs_align(screen->specs.pixel_pipes, NULL, &paddingY);

   if (templat->bind & PIPE_BIND_SCANOUT) {
      struct pipe_resource scanout_templat = *templat;
      struct renderonly_scanout *scanout;
      struct winsys_handle handle;

      /* pad scanout buffer size to be compatible with the RS */
      if (!screen->specs.use_blt && modifier == DRM_FORMAT_MOD_LINEAR)
         etna_adjust_rs_align(screen->specs.pixel_pipes, &paddingX, &paddingY);

      scanout_templat.width0 = align(scanout_templat.width0, paddingX);
      scanout_templat.height0 = align(scanout_templat.height0, paddingY);

      scanout = renderonly_scanout_for_resource(&scanout_templat,
                                                screen->ro, &handle);
      if (!scanout)
         return NULL;

      assert(handle.type == WINSYS_HANDLE_TYPE_FD);
      handle.modifier = modifier;
      rsc = etna_resource(pscreen->resource_from_handle(pscreen, templat,
                                                        &handle,
                                                        PIPE_HANDLE_USAGE_WRITE));
      close(handle.handle);
      if (!rsc)
         return NULL;

      rsc->scanout = scanout;

      return &rsc->base;
   }

   rsc = CALLOC_STRUCT(etna_resource);
   if (!rsc)
      return NULL;

   rsc->base = *templat;
   rsc->base.screen = pscreen;
   rsc->base.nr_samples = nr_samples;
   rsc->layout = layout;
   rsc->halign = halign;

   pipe_reference_init(&rsc->base.reference, 1);
   list_inithead(&rsc->list);

   size = setup_miptree(rsc, paddingX, paddingY, msaa_xscale, msaa_yscale);

   uint32_t flags = DRM_ETNA_GEM_CACHE_WC;
   if (templat->bind & PIPE_BIND_VERTEX_BUFFER)
      flags |= DRM_ETNA_GEM_FORCE_MMU;
   struct etna_bo *bo = etna_bo_new(screen->dev, size, flags);
   if (unlikely(bo == NULL)) {
      BUG("Problem allocating video memory for resource");
      goto free_rsc;
   }

   rsc->bo = bo;
   rsc->ts_bo = 0; /* TS is only created when first bound to surface */

   if (DBG_ENABLED(ETNA_DBG_ZERO)) {
      void *map = etna_bo_map(bo);
      memset(map, 0, size);
   }

   return &rsc->base;

free_rsc:
   FREE(rsc);
   return NULL;
}
/* Create a new resource object, using the given template info */
struct pipe_resource *
etna_resource_alloc(struct pipe_screen *pscreen, unsigned layout,
                    const struct pipe_resource *templat)
{
   struct etna_screen *screen = etna_screen(pscreen);
   unsigned size;

   DBG_F(ETNA_DBG_RESOURCE_MSGS,
         "target=%d, format=%s, %ux%ux%u, array_size=%u, "
         "last_level=%u, nr_samples=%u, usage=%u, bind=%x, flags=%x",
         templat->target, util_format_name(templat->format), templat->width0,
         templat->height0, templat->depth0, templat->array_size,
         templat->last_level, templat->nr_samples, templat->usage,
         templat->bind, templat->flags);

   /* Determine scaling for antialiasing, allow override using debug flag */
   int nr_samples = templat->nr_samples;
   if ((templat->bind & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DEPTH_STENCIL)) &&
       !(templat->bind & PIPE_BIND_SAMPLER_VIEW)) {
      if (DBG_ENABLED(ETNA_DBG_MSAA_2X))
         nr_samples = 2;
      if (DBG_ENABLED(ETNA_DBG_MSAA_4X))
         nr_samples = 4;
   }

   int msaa_xscale = 1, msaa_yscale = 1;
   if (!translate_samples_to_xyscale(nr_samples, &msaa_xscale, &msaa_yscale, NULL)) {
      /* Number of samples not supported */
      return NULL;
   }

   /* If we have the TEXTURE_HALIGN feature, we can always align to the
    * resolve engine's width.  If not, we must not align resources used
    * only for textures. */
   bool rs_align = VIV_FEATURE(screen, chipMinorFeatures1, TEXTURE_HALIGN) ||
                   !etna_resource_sampler_only(templat);

   /* Determine needed padding (alignment of height/width) */
   unsigned paddingX = 0, paddingY = 0;
   unsigned halign = TEXTURE_HALIGN_FOUR;
   etna_layout_multiple(layout, screen->specs.pixel_pipes, rs_align, &paddingX,
                        &paddingY, &halign);
   assert(paddingX && paddingY);

   if (templat->bind != PIPE_BUFFER) {
      unsigned min_paddingY = 4 * screen->specs.pixel_pipes;
      if (paddingY < min_paddingY)
         paddingY = min_paddingY;
   }

   struct etna_resource *rsc = CALLOC_STRUCT(etna_resource);

   if (!rsc)
      return NULL;

   rsc->base = *templat;
   rsc->base.screen = pscreen;
   rsc->base.nr_samples = nr_samples;
   rsc->layout = layout;
   rsc->halign = halign;

   pipe_reference_init(&rsc->base.reference, 1);
   list_inithead(&rsc->list);

   size = setup_miptree(rsc, paddingX, paddingY, msaa_xscale, msaa_yscale);

   uint32_t flags = DRM_ETNA_GEM_CACHE_WC;
   if (templat->bind & PIPE_BIND_VERTEX_BUFFER)
      flags |= DRM_ETNA_GEM_FORCE_MMU;
   struct etna_bo *bo = etna_bo_new(screen->dev, size, flags);
   if (unlikely(bo == NULL)) {
      BUG("Problem allocating video memory for resource");
      return NULL;
   }

   rsc->bo = bo;
   rsc->ts_bo = 0; /* TS is only created when first bound to surface */

   if (templat->bind & PIPE_BIND_SCANOUT)
      rsc->scanout = renderonly_scanout_for_resource(&rsc->base, screen->ro);

   if (DBG_ENABLED(ETNA_DBG_ZERO)) {
      void *map = etna_bo_map(bo);
      memset(map, 0, size);
   }

   return &rsc->base;
}
Exemple #12
0
static void *etna_pipe_transfer_map(struct pipe_context *pipe,
                         struct pipe_resource *resource,
                         unsigned level,
                         unsigned usage,  /* a combination of PIPE_TRANSFER_x */
                         const struct pipe_box *box,
                         struct pipe_transfer **out_transfer)
{
    struct etna_pipe_context *priv = etna_pipe_context(pipe);
    struct etna_transfer *ptrans = util_slab_alloc(&priv->transfer_pool);
    struct etna_resource *resource_priv = etna_resource(resource);
    enum pipe_format format = resource->format;
    if (!ptrans)
        return NULL;
    assert(level <= resource->last_level);

    /* PIPE_TRANSFER_READ always requires a sync. */
    if(usage & PIPE_TRANSFER_READ)
    {
        etna_finish(priv->ctx);
    }
    /* XXX we don't handle PIPE_TRANSFER_FLUSH_EXPLICIT; this flag can be ignored when mapping in-place,
     * but when not in place we need to fire off the copy operation in transfer_flush_region (currently
     * a no-op) instead of unmap. Need to handle this to support ARB_map_buffer_range extension at least.
     */
    /* XXX we don't take care of current operations on the resource; which can be, at some point in the pipeline
       which is not yet executed:

       - bound as surface
       - bound through vertex buffer
       - bound through index buffer
       - bound in sampler view
       - used in clear_render_target / clear_depth_stencil operation
       - used in blit
       - used in resource_copy_region

       How do other drivers record this information over course of the rendering pipeline?
       Is it necessary at all? Only in case we want to provide a fast path and map the resource directly
       (and for PIPE_TRANSFER_MAP_DIRECTLY) and we don't want to force a sync.
       We also need to know whether the resource is in use to determine if a sync is needed (or just do it
       always, but that comes at the expense of performance).

       A conservative approximation without too much overhead would be to mark all resources that have
       been bound at some point as busy. A drawback would be that accessing resources that have
       been bound but are no longer in use for a while still carry a performance penalty. On the other hand,
       the program could be using PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE or PIPE_TRANSFER_UNSYNCHRONIZED to
       avoid this in the first place...

       A) We use an in-pipe copy engine, and queue the copy operation after unmap so that the copy
          will be performed when all current commands have been executed.
          Using the RS is possible, not sure if always efficient. This can also do any kind of tiling for us.
          Only possible when PIPE_TRANSFER_DISCARD_RANGE is set.
       B) We discard the entire resource (or at least, the mipmap level) and allocate new memory for it.
          Only possible when mapping the entire resource or PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE is set.
     */

    /* No need to allocate a buffer for copying if the resource is not in use,
     * and no tiling is needed, can just return a direct pointer.
     */
    ptrans->in_place = resource_priv->layout == ETNA_LAYOUT_LINEAR ||
                       (resource_priv->layout == ETNA_LAYOUT_TILED && util_format_is_compressed(resource->format));
    ptrans->base.resource = resource;
    ptrans->base.level = level;
    ptrans->base.usage = usage;
    ptrans->base.box = *box;

    struct etna_resource_level *res_level = &resource_priv->levels[level];
    /* map buffer object */
    void *mapped = etna_bo_map(resource_priv->bo) + res_level->offset;
    if(likely(ptrans->in_place))
    {
        ptrans->base.stride = res_level->stride;
        ptrans->base.layer_stride = res_level->layer_stride;
        ptrans->buffer = mapped + etna_compute_offset(resource->format, box, res_level->stride, res_level->layer_stride);
    } else {
        unsigned divSizeX = util_format_get_blockwidth(format);
        unsigned divSizeY = util_format_get_blockheight(format);
        if(usage & PIPE_TRANSFER_MAP_DIRECTLY)
        {
            /* No in-place transfer possible */
            util_slab_free(&priv->transfer_pool, ptrans);
            return NULL;
        }

        ptrans->base.stride = align(box->width, divSizeX) * util_format_get_blocksize(format); /* row stride in bytes */
        ptrans->base.layer_stride = align(box->height, divSizeY) * ptrans->base.stride;
        size_t size = ptrans->base.layer_stride * box->depth;
        ptrans->buffer = MALLOC(size);

        if(usage & PIPE_TRANSFER_READ)
        {
            /* untile or copy resource for reading */
            if(resource_priv->layout == ETNA_LAYOUT_LINEAR || resource_priv->layout == ETNA_LAYOUT_TILED)
            {
                if(resource_priv->layout == ETNA_LAYOUT_TILED && !util_format_is_compressed(resource_priv->base.format))
                {
                    etna_texture_untile(ptrans->buffer, mapped + ptrans->base.box.z * res_level->layer_stride,
                            ptrans->base.box.x, ptrans->base.box.y, res_level->stride,
                            ptrans->base.box.width, ptrans->base.box.height, ptrans->base.stride,
                            util_format_get_blocksize(resource_priv->base.format));
                } else { /* non-tiled or compressed format */
                    util_copy_box(ptrans->buffer,
                      resource_priv->base.format,
                      ptrans->base.stride, ptrans->base.layer_stride,
                      0, 0, 0, /* dst x,y,z */
                      ptrans->base.box.width, ptrans->base.box.height, ptrans->base.box.depth,
                      mapped,
                      res_level->stride, res_level->layer_stride,
                      ptrans->base.box.x, ptrans->base.box.y, ptrans->base.box.z);
                }
            } else /* TODO supertiling */
            {
                BUG("unsupported tiling %i for reading", resource_priv->layout);
            }
        }
    }

    *out_transfer = &ptrans->base;
    return ptrans->buffer;
}