示例#1
0
int
nvc0_screen_compute_setup(struct nvc0_screen *screen,
                          struct nouveau_pushbuf *push)
{
   struct nouveau_object *chan = screen->base.channel;
   struct nouveau_device *dev = screen->base.device;
   uint32_t obj_class;
   int ret;
   int i;

   switch (dev->chipset & ~0xf) {
   case 0xc0:
      if (dev->chipset == 0xc8)
         obj_class = NVC8_COMPUTE_CLASS;
      else
         obj_class = NVC0_COMPUTE_CLASS;
      break;
   case 0xd0:
      obj_class = NVC0_COMPUTE_CLASS;
      break;
   default:
      NOUVEAU_ERR("unsupported chipset: NV%02x\n", dev->chipset);
      return -1;
   }

   ret = nouveau_object_new(chan, 0xbeef90c0, obj_class, NULL, 0,
                            &screen->compute);
   if (ret) {
      NOUVEAU_ERR("Failed to allocate compute object: %d\n", ret);
      return ret;
   }

   ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, 1 << 12, NULL,
                        &screen->parm);
   if (ret)
      return ret;

   BEGIN_NVC0(push, SUBC_COMPUTE(NV01_SUBCHAN_OBJECT), 1);
   PUSH_DATA (push, screen->compute->oclass);

   /* hardware limit */
   BEGIN_NVC0(push, NVC0_COMPUTE(MP_LIMIT), 1);
   PUSH_DATA (push, screen->mp_count);
   BEGIN_NVC0(push, NVC0_COMPUTE(CALL_LIMIT_LOG), 1);
   PUSH_DATA (push, 0xf);

   BEGIN_NVC0(push, SUBC_COMPUTE(0x02a0), 1);
   PUSH_DATA (push, 0x8000);

   /* global memory setup */
   BEGIN_NVC0(push, SUBC_COMPUTE(0x02c4), 1);
   PUSH_DATA (push, 0);
   BEGIN_NIC0(push, NVC0_COMPUTE(GLOBAL_BASE), 0x100);
   for (i = 0; i <= 0xff; i++)
      PUSH_DATA (push, (0xc << 28) | (i << 16) | i);
   BEGIN_NVC0(push, SUBC_COMPUTE(0x02c4), 1);
   PUSH_DATA (push, 1);

   /* local memory and cstack setup */
   BEGIN_NVC0(push, NVC0_COMPUTE(TEMP_ADDRESS_HIGH), 2);
   PUSH_DATAh(push, screen->tls->offset);
   PUSH_DATA (push, screen->tls->offset);
   BEGIN_NVC0(push, NVC0_COMPUTE(TEMP_SIZE_HIGH), 2);
   PUSH_DATAh(push, screen->tls->size);
   PUSH_DATA (push, screen->tls->size);
   BEGIN_NVC0(push, NVC0_COMPUTE(WARP_TEMP_ALLOC), 1);
   PUSH_DATA (push, 0);
   BEGIN_NVC0(push, NVC0_COMPUTE(LOCAL_BASE), 1);
   PUSH_DATA (push, 1 << 24);

   /* shared memory setup */
   BEGIN_NVC0(push, NVC0_COMPUTE(CACHE_SPLIT), 1);
   PUSH_DATA (push, NVC0_COMPUTE_CACHE_SPLIT_48K_SHARED_16K_L1);
   BEGIN_NVC0(push, NVC0_COMPUTE(SHARED_BASE), 1);
   PUSH_DATA (push, 2 << 24);
   BEGIN_NVC0(push, NVC0_COMPUTE(SHARED_SIZE), 1);
   PUSH_DATA (push, 0);

   /* code segment setup */
   BEGIN_NVC0(push, NVC0_COMPUTE(CODE_ADDRESS_HIGH), 2);
   PUSH_DATAh(push, screen->text->offset);
   PUSH_DATA (push, screen->text->offset);

   /* bind parameters buffer */
   BEGIN_NVC0(push, NVC0_COMPUTE(CB_SIZE), 3);
   PUSH_DATA (push, screen->parm->size);
   PUSH_DATAh(push, screen->parm->offset);
   PUSH_DATA (push, screen->parm->offset);
   BEGIN_NVC0(push, NVC0_COMPUTE(CB_BIND), 1);
   PUSH_DATA (push, (0 << 8) | 1);

   /* TODO: textures & samplers */

   return 0;
}
示例#2
0
void
nvc0_launch_grid(struct pipe_context *pipe,
                 const uint *block_layout, const uint *grid_layout,
                 uint32_t label,
                 const void *input)
{
   struct nvc0_context *nvc0 = nvc0_context(pipe);
   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
   struct nvc0_program *cp = nvc0->compprog;
   unsigned s, i;
   int ret;

   ret = !nvc0_compute_state_validate(nvc0);
   if (ret)
      goto out;

   nvc0_compute_upload_input(nvc0, input);

   BEGIN_NVC0(push, NVC0_COMPUTE(CP_START_ID), 1);
   PUSH_DATA (push, nvc0_program_symbol_offset(cp, label));

   BEGIN_NVC0(push, NVC0_COMPUTE(LOCAL_POS_ALLOC), 3);
   PUSH_DATA (push, align(cp->cp.lmem_size, 0x10));
   PUSH_DATA (push, 0);
   PUSH_DATA (push, 0x800); /* WARP_CSTACK_SIZE */

   BEGIN_NVC0(push, NVC0_COMPUTE(SHARED_SIZE), 3);
   PUSH_DATA (push, align(cp->cp.smem_size, 0x100));
   PUSH_DATA (push, block_layout[0] * block_layout[1] * block_layout[2]);
   PUSH_DATA (push, cp->num_barriers);
   BEGIN_NVC0(push, NVC0_COMPUTE(CP_GPR_ALLOC), 1);
   PUSH_DATA (push, cp->num_gprs);

   /* grid/block setup */
   BEGIN_NVC0(push, NVC0_COMPUTE(GRIDDIM_YX), 2);
   PUSH_DATA (push, (grid_layout[1] << 16) | grid_layout[0]);
   PUSH_DATA (push, grid_layout[2]);
   BEGIN_NVC0(push, NVC0_COMPUTE(BLOCKDIM_YX), 2);
   PUSH_DATA (push, (block_layout[1] << 16) | block_layout[0]);
   PUSH_DATA (push, block_layout[2]);

   /* launch preliminary setup */
   BEGIN_NVC0(push, NVC0_COMPUTE(GRIDID), 1);
   PUSH_DATA (push, 0x1);
   BEGIN_NVC0(push, SUBC_COMPUTE(0x036c), 1);
   PUSH_DATA (push, 0);
   BEGIN_NVC0(push, NVC0_COMPUTE(FLUSH), 1);
   PUSH_DATA (push, NVC0_COMPUTE_FLUSH_GLOBAL | NVC0_COMPUTE_FLUSH_UNK8);

   /* kernel launching */
   BEGIN_NVC0(push, NVC0_COMPUTE(COMPUTE_BEGIN), 1);
   PUSH_DATA (push, 0);
   BEGIN_NVC0(push, SUBC_COMPUTE(0x0a08), 1);
   PUSH_DATA (push, 0);
   BEGIN_NVC0(push, NVC0_COMPUTE(LAUNCH), 1);
   PUSH_DATA (push, 0x1000);
   BEGIN_NVC0(push, NVC0_COMPUTE(COMPUTE_END), 1);
   PUSH_DATA (push, 0);
   BEGIN_NVC0(push, SUBC_COMPUTE(0x0360), 1);
   PUSH_DATA (push, 0x1);

   /* rebind all the 3D constant buffers
    * (looks like binding a CB on COMPUTE clobbers 3D state) */
   nvc0->dirty |= NVC0_NEW_CONSTBUF;
   for (s = 0; s < 6; s++) {
      for (i = 0; i < NVC0_MAX_PIPE_CONSTBUFS; i++)
         if (nvc0->constbuf[s][i].u.buf)
            nvc0->constbuf_dirty[s] |= 1 << i;
   }
   memset(nvc0->state.uniform_buffer_bound, 0,
          sizeof(nvc0->state.uniform_buffer_bound));

out:
   if (ret)
      NOUVEAU_ERR("Failed to launch grid !\n");
}
static void
nv50_hw_sm_end_query(struct nv50_context *nv50, struct nv50_hw_query *hq)
{
   struct nv50_screen *screen = nv50->screen;
   struct pipe_context *pipe = &nv50->base.pipe;
   struct nouveau_pushbuf *push = nv50->base.pushbuf;
   struct nv50_hw_sm_query *hsq = nv50_hw_sm_query(hq);
   struct pipe_grid_info info = {};
   uint32_t mask;
   uint32_t input[3];
   const uint block[3] = { 32, 1, 1 };
   const uint grid[3] = { screen->MPsInTP, screen->TPs, 1 };
   int c, i;

   if (unlikely(!screen->pm.prog)) {
      struct nv50_program *prog = CALLOC_STRUCT(nv50_program);
      prog->type = PIPE_SHADER_COMPUTE;
      prog->translated = true;
      prog->max_gpr = 7;
      prog->parm_size = 8;
      prog->code = (uint32_t *)nv50_read_hw_sm_counters_code;
      prog->code_size = sizeof(nv50_read_hw_sm_counters_code);
      screen->pm.prog = prog;
   }

   /* disable all counting */
   PUSH_SPACE(push, 8);
   for (c = 0; c < 4; c++) {
      if (screen->pm.mp_counter[c]) {
         BEGIN_NV04(push, NV50_COMPUTE(MP_PM_CONTROL(c)), 1);
         PUSH_DATA (push, 0);
      }
   }

   /* release counters for this query */
   for (c = 0; c < 4; c++) {
      if (screen->pm.mp_counter[c] == hsq) {
         screen->pm.num_hw_sm_active--;
         screen->pm.mp_counter[c] = NULL;
      }
   }

   BCTX_REFN_bo(nv50->bufctx_cp, CP_QUERY, NOUVEAU_BO_GART | NOUVEAU_BO_WR,
                hq->bo);

   PUSH_SPACE(push, 2);
   BEGIN_NV04(push, SUBC_COMPUTE(NV50_GRAPH_SERIALIZE), 1);
   PUSH_DATA (push, 0);

   pipe->bind_compute_state(pipe, screen->pm.prog);
   input[0] = hq->bo->offset + hq->base_offset;
   input[1] = hq->sequence;

   for (i = 0; i < 3; i++) {
      info.block[i] = block[i];
      info.grid[i] = grid[i];
   }
   info.pc = 0;
   info.input = input;
   pipe->launch_grid(pipe, &info);

   nouveau_bufctx_reset(nv50->bufctx_cp, NV50_BIND_CP_QUERY);

   /* re-active other counters */
   PUSH_SPACE(push, 8);
   mask = 0;
   for (c = 0; c < 4; c++) {
      const struct nv50_hw_sm_query_cfg *cfg;
      unsigned i;

      hsq = screen->pm.mp_counter[c];
      if (!hsq)
         continue;

      cfg = nv50_hw_sm_query_get_cfg(nv50, &hsq->base);
      for (i = 0; i < cfg->num_counters; i++) {
         uint16_t func;

         if (mask & (1 << hsq->ctr[i]))
            break;

         mask |= 1 << hsq->ctr[i];
         func  = nv50_hw_sm_get_func(hsq->ctr[i]);

         BEGIN_NV04(push, NV50_COMPUTE(MP_PM_CONTROL(hsq->ctr[i])), 1);
         PUSH_DATA (push, (cfg->ctr[i].sig << 24) | (func << 8)
                    | cfg->ctr[i].unit | cfg->ctr[i].mode);
      }
   }
}
示例#4
0
int
nv50_screen_compute_setup(struct nv50_screen *screen,
                          struct nouveau_pushbuf *push)
{
   struct nouveau_device *dev = screen->base.device;
   struct nouveau_object *chan = screen->base.channel;
   struct nv04_fifo *fifo = (struct nv04_fifo *)chan->data;
   unsigned obj_class;
   int i, ret;

   switch (dev->chipset & 0xf0) {
   case 0x50:
   case 0x80:
   case 0x90:
      obj_class = NV50_COMPUTE_CLASS;
      break;
   case 0xa0:
      switch (dev->chipset) {
      case 0xa3:
      case 0xa5:
      case 0xa8:
         obj_class = NVA3_COMPUTE_CLASS;
         break;
      default:
         obj_class = NV50_COMPUTE_CLASS;
         break;
      }
      break;
   default:
      NOUVEAU_ERR("unsupported chipset: NV%02x\n", dev->chipset);
      return -1;
   }

   ret = nouveau_object_new(chan, 0xbeef50c0, obj_class, NULL, 0,
                            &screen->compute);
   if (ret)
      return ret;

   BEGIN_NV04(push, SUBC_COMPUTE(NV01_SUBCHAN_OBJECT), 1);
   PUSH_DATA (push, screen->compute->handle);

   BEGIN_NV04(push, NV50_COMPUTE(UNK02A0), 1);
   PUSH_DATA (push, 1);
   BEGIN_NV04(push, NV50_COMPUTE(DMA_STACK), 1);
   PUSH_DATA (push, fifo->vram);
   BEGIN_NV04(push, NV50_COMPUTE(STACK_ADDRESS_HIGH), 2);
   PUSH_DATAh(push, screen->stack_bo->offset);
   PUSH_DATA (push, screen->stack_bo->offset);
   BEGIN_NV04(push, NV50_COMPUTE(STACK_SIZE_LOG), 1);
   PUSH_DATA (push, 4);

   BEGIN_NV04(push, NV50_COMPUTE(UNK0290), 1);
   PUSH_DATA (push, 1);
   BEGIN_NV04(push, NV50_COMPUTE(LANES32_ENABLE), 1);
   PUSH_DATA (push, 1);
   BEGIN_NV04(push, NV50_COMPUTE(REG_MODE), 1);
   PUSH_DATA (push, NV50_COMPUTE_REG_MODE_STRIPED);
   BEGIN_NV04(push, NV50_COMPUTE(UNK0384), 1);
   PUSH_DATA (push, 0x100);
   BEGIN_NV04(push, NV50_COMPUTE(DMA_GLOBAL), 1);
   PUSH_DATA (push, fifo->vram);

   for (i = 0; i < 15; i++) {
      BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_ADDRESS_HIGH(i)), 2);
      PUSH_DATA (push, 0);
      PUSH_DATA (push, 0);
      BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_LIMIT(i)), 1);
      PUSH_DATA (push, 0);
      BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_MODE(i)), 1);
      PUSH_DATA (push, NV50_COMPUTE_GLOBAL_MODE_LINEAR);
   }

   BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_ADDRESS_HIGH(15)), 2);
   PUSH_DATA (push, 0);
   PUSH_DATA (push, 0);
   BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_LIMIT(15)), 1);
   PUSH_DATA (push, ~0);
   BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_MODE(15)), 1);
   PUSH_DATA (push, NV50_COMPUTE_GLOBAL_MODE_LINEAR);

   BEGIN_NV04(push, NV50_COMPUTE(LOCAL_WARPS_LOG_ALLOC), 1);
   PUSH_DATA (push, 7);
   BEGIN_NV04(push, NV50_COMPUTE(LOCAL_WARPS_NO_CLAMP), 1);
   PUSH_DATA (push, 1);
   BEGIN_NV04(push, NV50_COMPUTE(STACK_WARPS_LOG_ALLOC), 1);
   PUSH_DATA (push, 7);
   BEGIN_NV04(push, NV50_COMPUTE(STACK_WARPS_NO_CLAMP), 1);
   PUSH_DATA (push, 1);
   BEGIN_NV04(push, NV50_COMPUTE(USER_PARAM_COUNT), 1);
   PUSH_DATA (push, 0);

   BEGIN_NV04(push, NV50_COMPUTE(DMA_TEXTURE), 1);
   PUSH_DATA (push, fifo->vram);
   BEGIN_NV04(push, NV50_COMPUTE(TEX_LIMITS), 1);
   PUSH_DATA (push, 0x54);
   BEGIN_NV04(push, NV50_COMPUTE(LINKED_TSC), 1);
   PUSH_DATA (push, 0);

   BEGIN_NV04(push, NV50_COMPUTE(DMA_TIC), 1);
   PUSH_DATA (push, fifo->vram);
   BEGIN_NV04(push, NV50_COMPUTE(TIC_ADDRESS_HIGH), 3);
   PUSH_DATAh(push, screen->txc->offset);
   PUSH_DATA (push, screen->txc->offset);
   PUSH_DATA (push, NV50_TIC_MAX_ENTRIES - 1);

   BEGIN_NV04(push, NV50_COMPUTE(DMA_TSC), 1);
   PUSH_DATA (push, fifo->vram);
   BEGIN_NV04(push, NV50_COMPUTE(TSC_ADDRESS_HIGH), 3);
   PUSH_DATAh(push, screen->txc->offset + 65536);
   PUSH_DATA (push, screen->txc->offset + 65536);
   PUSH_DATA (push, NV50_TSC_MAX_ENTRIES - 1);

   BEGIN_NV04(push, NV50_COMPUTE(DMA_CODE_CB), 1);
   PUSH_DATA (push, fifo->vram);

   BEGIN_NV04(push, NV50_COMPUTE(DMA_LOCAL), 1);
   PUSH_DATA (push, fifo->vram);
   BEGIN_NV04(push, NV50_COMPUTE(LOCAL_ADDRESS_HIGH), 2);
   PUSH_DATAh(push, screen->tls_bo->offset + 65536);
   PUSH_DATA (push, screen->tls_bo->offset + 65536);
   BEGIN_NV04(push, NV50_COMPUTE(LOCAL_SIZE_LOG), 1);
   PUSH_DATA (push, util_logbase2((screen->max_tls_space / ONE_TEMP_SIZE) * 2));

   return 0;
}
示例#5
0
static void
nve4_mp_pm_query_end(struct nvc0_context *nvc0, struct nvc0_query *q)
{
   struct nvc0_screen *screen = nvc0->screen;
   struct pipe_context *pipe = &nvc0->base.pipe;
   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
   uint32_t mask;
   uint32_t input[3];
   const uint block[3] = { 32, 4, 1 };
   const uint grid[3] = { screen->mp_count, 1, 1 };
   unsigned c;
   const struct nve4_mp_pm_query_cfg *cfg;

   cfg = &nve4_mp_pm_queries[q->type - PIPE_QUERY_DRIVER_SPECIFIC];

   if (unlikely(!screen->pm.prog)) {
      struct nvc0_program *prog = CALLOC_STRUCT(nvc0_program);
      prog->type = PIPE_SHADER_COMPUTE;
      prog->translated = TRUE;
      prog->num_gprs = 14;
      prog->code = (uint32_t *)nve4_read_mp_pm_counters_code;
      prog->code_size = sizeof(nve4_read_mp_pm_counters_code);
      prog->parm_size = 12;
      screen->pm.prog = prog;
   }

   /* disable all counting */
   PUSH_SPACE(push, 8);
   for (c = 0; c < 8; ++c)
      if (screen->pm.mp_counter[c])
         IMMED_NVC0(push, NVE4_COMPUTE(MP_PM_FUNC(c)), 0);
   /* release counters for this query */
   for (c = 0; c < 8; ++c) {
      if (nvc0_query(screen->pm.mp_counter[c]) == q) {
         screen->pm.num_mp_pm_active[c / 4]--;
         screen->pm.mp_counter[c] = NULL;
      }
   }

   BCTX_REFN_bo(nvc0->bufctx_cp, CP_QUERY, NOUVEAU_BO_GART | NOUVEAU_BO_WR,
                q->bo);

   PUSH_SPACE(push, 1);
   IMMED_NVC0(push, SUBC_COMPUTE(NV50_GRAPH_SERIALIZE), 0);

   pipe->bind_compute_state(pipe, screen->pm.prog);
   input[0] = (q->bo->offset + q->base);
   input[1] = (q->bo->offset + q->base) >> 32;
   input[2] = q->sequence;
   pipe->launch_grid(pipe, block, grid, 0, input);

   nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_QUERY);

   /* re-activate other counters */
   PUSH_SPACE(push, 16);
   mask = 0;
   for (c = 0; c < 8; ++c) {
      unsigned i;
      q = nvc0_query(screen->pm.mp_counter[c]);
      if (!q)
         continue;
      cfg = &nve4_mp_pm_queries[q->type - PIPE_QUERY_DRIVER_SPECIFIC];
      for (i = 0; i < cfg->num_counters; ++i) {
         if (mask & (1 << q->ctr[i]))
            break;
         mask |= 1 << q->ctr[i];
         BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_FUNC(q->ctr[i])), 1);
         PUSH_DATA (push, (cfg->ctr[i].func << 4) | cfg->ctr[i].mode);
      }
   }
}