Пример #1
0
void
nvc0_launch_grid(struct pipe_context *pipe,
                 const uint *block_layout, const uint *grid_layout,
                 uint32_t label,
                 const void *input)
{
   struct nvc0_context *nvc0 = nvc0_context(pipe);
   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
   struct nvc0_program *cp = nvc0->compprog;
   unsigned s, i;
   int ret;

   ret = !nvc0_compute_state_validate(nvc0);
   if (ret)
      goto out;

   nvc0_compute_upload_input(nvc0, input);

   BEGIN_NVC0(push, NVC0_COMPUTE(CP_START_ID), 1);
   PUSH_DATA (push, nvc0_program_symbol_offset(cp, label));

   BEGIN_NVC0(push, NVC0_COMPUTE(LOCAL_POS_ALLOC), 3);
   PUSH_DATA (push, align(cp->cp.lmem_size, 0x10));
   PUSH_DATA (push, 0);
   PUSH_DATA (push, 0x800); /* WARP_CSTACK_SIZE */

   BEGIN_NVC0(push, NVC0_COMPUTE(SHARED_SIZE), 3);
   PUSH_DATA (push, align(cp->cp.smem_size, 0x100));
   PUSH_DATA (push, block_layout[0] * block_layout[1] * block_layout[2]);
   PUSH_DATA (push, cp->num_barriers);
   BEGIN_NVC0(push, NVC0_COMPUTE(CP_GPR_ALLOC), 1);
   PUSH_DATA (push, cp->num_gprs);

   /* grid/block setup */
   BEGIN_NVC0(push, NVC0_COMPUTE(GRIDDIM_YX), 2);
   PUSH_DATA (push, (grid_layout[1] << 16) | grid_layout[0]);
   PUSH_DATA (push, grid_layout[2]);
   BEGIN_NVC0(push, NVC0_COMPUTE(BLOCKDIM_YX), 2);
   PUSH_DATA (push, (block_layout[1] << 16) | block_layout[0]);
   PUSH_DATA (push, block_layout[2]);

   /* launch preliminary setup */
   BEGIN_NVC0(push, NVC0_COMPUTE(GRIDID), 1);
   PUSH_DATA (push, 0x1);
   BEGIN_NVC0(push, SUBC_COMPUTE(0x036c), 1);
   PUSH_DATA (push, 0);
   BEGIN_NVC0(push, NVC0_COMPUTE(FLUSH), 1);
   PUSH_DATA (push, NVC0_COMPUTE_FLUSH_GLOBAL | NVC0_COMPUTE_FLUSH_UNK8);

   /* kernel launching */
   BEGIN_NVC0(push, NVC0_COMPUTE(COMPUTE_BEGIN), 1);
   PUSH_DATA (push, 0);
   BEGIN_NVC0(push, SUBC_COMPUTE(0x0a08), 1);
   PUSH_DATA (push, 0);
   BEGIN_NVC0(push, NVC0_COMPUTE(LAUNCH), 1);
   PUSH_DATA (push, 0x1000);
   BEGIN_NVC0(push, NVC0_COMPUTE(COMPUTE_END), 1);
   PUSH_DATA (push, 0);
   BEGIN_NVC0(push, SUBC_COMPUTE(0x0360), 1);
   PUSH_DATA (push, 0x1);

   /* rebind all the 3D constant buffers
    * (looks like binding a CB on COMPUTE clobbers 3D state) */
   nvc0->dirty |= NVC0_NEW_CONSTBUF;
   for (s = 0; s < 6; s++) {
      for (i = 0; i < NVC0_MAX_PIPE_CONSTBUFS; i++)
         if (nvc0->constbuf[s][i].u.buf)
            nvc0->constbuf_dirty[s] |= 1 << i;
   }
   memset(nvc0->state.uniform_buffer_bound, 0,
          sizeof(nvc0->state.uniform_buffer_bound));

out:
   if (ret)
      NOUVEAU_ERR("Failed to launch grid !\n");
}
Пример #2
0
void
nvc0_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
{
   struct nvc0_context *nvc0 = nvc0_context(pipe);
   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
   struct nvc0_program *cp = nvc0->compprog;
   int ret;

   ret = !nvc0_state_validate_cp(nvc0, ~0);
   if (ret) {
      NOUVEAU_ERR("Failed to launch grid !\n");
      return;
   }

   nvc0_compute_upload_input(nvc0, info);

   BEGIN_NVC0(push, NVC0_CP(CP_START_ID), 1);
   PUSH_DATA (push, nvc0_program_symbol_offset(cp, info->pc));

   BEGIN_NVC0(push, NVC0_CP(LOCAL_POS_ALLOC), 3);
   PUSH_DATA (push, (cp->hdr[1] & 0xfffff0) + align(cp->cp.lmem_size, 0x10));
   PUSH_DATA (push, 0);
   PUSH_DATA (push, 0x800); /* WARP_CSTACK_SIZE */

   BEGIN_NVC0(push, NVC0_CP(SHARED_SIZE), 3);
   PUSH_DATA (push, align(cp->cp.smem_size, 0x100));
   PUSH_DATA (push, info->block[0] * info->block[1] * info->block[2]);
   PUSH_DATA (push, cp->num_barriers);
   BEGIN_NVC0(push, NVC0_CP(CP_GPR_ALLOC), 1);
   PUSH_DATA (push, cp->num_gprs);

   /* launch preliminary setup */
   BEGIN_NVC0(push, NVC0_CP(GRIDID), 1);
   PUSH_DATA (push, 0x1);
   BEGIN_NVC0(push, SUBC_CP(0x036c), 1);
   PUSH_DATA (push, 0);
   BEGIN_NVC0(push, NVC0_CP(FLUSH), 1);
   PUSH_DATA (push, NVC0_COMPUTE_FLUSH_GLOBAL | NVC0_COMPUTE_FLUSH_UNK8);

   /* block setup */
   BEGIN_NVC0(push, NVC0_CP(BLOCKDIM_YX), 2);
   PUSH_DATA (push, (info->block[1] << 16) | info->block[0]);
   PUSH_DATA (push, info->block[2]);

   if (unlikely(info->indirect)) {
      struct nv04_resource *res = nv04_resource(info->indirect);
      uint32_t offset = res->offset + info->indirect_offset;
      unsigned macro = NVC0_CP_MACRO_LAUNCH_GRID_INDIRECT;

      nouveau_pushbuf_space(push, 16, 0, 1);
      PUSH_REFN(push, res->bo, NOUVEAU_BO_RD | res->domain);
      PUSH_DATA(push, NVC0_FIFO_PKHDR_1I(1, macro, 3));
      nouveau_pushbuf_data(push, res->bo, offset,
                           NVC0_IB_ENTRY_1_NO_PREFETCH | 3 * 4);
   } else {
      /* grid setup */
      BEGIN_NVC0(push, NVC0_CP(GRIDDIM_YX), 2);
      PUSH_DATA (push, (info->grid[1] << 16) | info->grid[0]);
      PUSH_DATA (push, info->grid[2]);

      /* kernel launching */
      BEGIN_NVC0(push, NVC0_CP(COMPUTE_BEGIN), 1);
      PUSH_DATA (push, 0);
      BEGIN_NVC0(push, SUBC_CP(0x0a08), 1);
      PUSH_DATA (push, 0);
      BEGIN_NVC0(push, NVC0_CP(LAUNCH), 1);
      PUSH_DATA (push, 0x1000);
      BEGIN_NVC0(push, NVC0_CP(COMPUTE_END), 1);
      PUSH_DATA (push, 0);
      BEGIN_NVC0(push, SUBC_CP(0x0360), 1);
      PUSH_DATA (push, 0x1);
   }

   /* TODO: Not sure if this is really necessary. */
   nvc0_compute_invalidate_surfaces(nvc0, 5);
   nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_SUF);
   nvc0->dirty_cp |= NVC0_NEW_CP_SURFACES;
   nvc0->images_dirty[5] |= nvc0->images_valid[5];
}