Exemplo n.º 1
0
static void
nv50_compute_upload_input(struct nv50_context *nv50, const uint32_t *input)
{
   struct nv50_screen *screen = nv50->screen;
   struct nouveau_pushbuf *push = screen->base.pushbuf;
   unsigned size = align(nv50->compprog->parm_size, 0x4);

   BEGIN_NV04(push, NV50_COMPUTE(USER_PARAM_COUNT), 1);
   PUSH_DATA (push, (size / 4) << 8);

   if (size) {
      struct nouveau_mm_allocation *mm;
      struct nouveau_bo *bo = NULL;
      unsigned offset;

      mm = nouveau_mm_allocate(screen->base.mm_GART, size, &bo, &offset);
      assert(mm);

      nouveau_bo_map(bo, 0, screen->base.client);
      memcpy(bo->map + offset, input, size);

      nouveau_bufctx_refn(nv50->bufctx, 0, bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
      nouveau_pushbuf_bufctx(push, nv50->bufctx);
      nouveau_pushbuf_validate(push);

      BEGIN_NV04(push, NV50_COMPUTE(USER_PARAM(0)), size / 4);
      nouveau_pushbuf_data(push, bo, offset, size);

      nouveau_fence_work(screen->base.fence.current, nouveau_mm_free_work, mm);
      nouveau_bo_ref(NULL, &bo);
      nouveau_bufctx_reset(nv50->bufctx, 0);
   }
}
Exemplo n.º 2
0
static boolean
nv50_hw_sm_begin_query(struct nv50_context *nv50, struct nv50_hw_query *hq)
{
   struct nv50_screen *screen = nv50->screen;
   struct nouveau_pushbuf *push = nv50->base.pushbuf;
   struct nv50_hw_sm_query *hsq = nv50_hw_sm_query(hq);
   const struct nv50_hw_sm_query_cfg *cfg;
   uint16_t func;
   int i, c;

   cfg = nv50_hw_sm_query_get_cfg(nv50, hq);

   /* check if we have enough free counter slots */
   if (screen->pm.num_hw_sm_active + cfg->num_counters > 4) {
      NOUVEAU_ERR("Not enough free MP counter slots !\n");
      return false;
   }

   assert(cfg->num_counters <= 4);
   PUSH_SPACE(push, 4 * 4);

   /* set sequence field to 0 (used to check if result is available) */
   for (i = 0; i < screen->MPsInTP; ++i) {
      const unsigned b = (0x14 / 4) * i;
      hq->data[b + 16] = 0;
   }
   hq->sequence++;

   for (i = 0; i < cfg->num_counters; i++) {
      screen->pm.num_hw_sm_active++;

      /* find free counter slots */
      for (c = 0; c < 4; ++c) {
         if (!screen->pm.mp_counter[c]) {
            hsq->ctr[i] = c;
            screen->pm.mp_counter[c] = hsq;
            break;
         }
      }

      /* select func to aggregate counters */
      func = nv50_hw_sm_get_func(c);

      /* configure and reset the counter(s) */
      BEGIN_NV04(push, NV50_COMPUTE(MP_PM_CONTROL(c)), 1);
      PUSH_DATA (push, (cfg->ctr[i].sig << 24) | (func << 8)
                        | cfg->ctr[i].unit | cfg->ctr[i].mode);
      BEGIN_NV04(push, NV50_COMPUTE(MP_PM_SET(c)), 1);
      PUSH_DATA (push, 0);
   }
   return true;
}
Exemplo n.º 3
0
static bool
nv50_compute_validate_program(struct nv50_context *nv50)
{
   struct nv50_program *prog = nv50->compprog;

   if (prog->mem)
      return true;

   if (!prog->translated) {
      prog->translated = nv50_program_translate(
         prog, nv50->screen->base.device->chipset, &nv50->base.debug);
      if (!prog->translated)
         return false;
   }
   if (unlikely(!prog->code_size))
      return false;

   if (likely(prog->code_size)) {
      if (nv50_program_upload_code(nv50, prog)) {
         struct nouveau_pushbuf *push = nv50->base.pushbuf;
         BEGIN_NV04(push, NV50_COMPUTE(CODE_CB_FLUSH), 1);
         PUSH_DATA (push, 0);
         return true;
      }
   }
   return false;
}
Exemplo n.º 4
0
void
nv50_launch_grid(struct pipe_context *pipe,
                 const uint *block_layout, const uint *grid_layout,
                 uint32_t label, const void *input)
{
   struct nv50_context *nv50 = nv50_context(pipe);
   struct nouveau_pushbuf *push = nv50->base.pushbuf;
   unsigned block_size = block_layout[0] * block_layout[1] * block_layout[2];
   struct nv50_program *cp = nv50->compprog;
   bool ret;

   ret = !nv50_compute_state_validate(nv50);
   if (ret) {
      NOUVEAU_ERR("Failed to launch grid !\n");
      return;
   }

   nv50_compute_upload_input(nv50, input);

   BEGIN_NV04(push, NV50_COMPUTE(CP_START_ID), 1);
   PUSH_DATA (push, nv50_compute_find_symbol(nv50, label));

   BEGIN_NV04(push, NV50_COMPUTE(SHARED_SIZE), 1);
   PUSH_DATA (push, align(cp->cp.smem_size + cp->parm_size + 0x10, 0x40));
   BEGIN_NV04(push, NV50_COMPUTE(CP_REG_ALLOC_TEMP), 1);
   PUSH_DATA (push, cp->max_gpr);

   /* grid/block setup */
   BEGIN_NV04(push, NV50_COMPUTE(BLOCKDIM_XY), 2);
   PUSH_DATA (push, block_layout[1] << 16 | block_layout[0]);
   PUSH_DATA (push, block_layout[2]);
   BEGIN_NV04(push, NV50_COMPUTE(BLOCK_ALLOC), 1);
   PUSH_DATA (push, 1 << 16 | block_size);
   BEGIN_NV04(push, NV50_COMPUTE(BLOCKDIM_LATCH), 1);
   PUSH_DATA (push, 1);
   BEGIN_NV04(push, NV50_COMPUTE(GRIDDIM), 1);
   PUSH_DATA (push, grid_layout[1] << 16 | grid_layout[0]);
   BEGIN_NV04(push, NV50_COMPUTE(GRIDID), 1);
   PUSH_DATA (push, 1);

   /* kernel launching */
   BEGIN_NV04(push, NV50_COMPUTE(LAUNCH), 1);
   PUSH_DATA (push, 0);
   BEGIN_NV04(push, SUBC_COMPUTE(NV50_GRAPH_SERIALIZE), 1);
   PUSH_DATA (push, 0);

   /* bind a compute shader clobbers fragment shader state */
   nv50->dirty |= NV50_NEW_FRAGPROG;
}
Exemplo n.º 5
0
int
nv50_screen_compute_setup(struct nv50_screen *screen,
                          struct nouveau_pushbuf *push)
{
   struct nouveau_device *dev = screen->base.device;
   struct nouveau_object *chan = screen->base.channel;
   struct nv04_fifo *fifo = (struct nv04_fifo *)chan->data;
   unsigned obj_class;
   int i, ret;

   switch (dev->chipset & 0xf0) {
   case 0x50:
   case 0x80:
   case 0x90:
      obj_class = NV50_COMPUTE_CLASS;
      break;
   case 0xa0:
      switch (dev->chipset) {
      case 0xa3:
      case 0xa5:
      case 0xa8:
         obj_class = NVA3_COMPUTE_CLASS;
         break;
      default:
         obj_class = NV50_COMPUTE_CLASS;
         break;
      }
      break;
   default:
      NOUVEAU_ERR("unsupported chipset: NV%02x\n", dev->chipset);
      return -1;
   }

   ret = nouveau_object_new(chan, 0xbeef50c0, obj_class, NULL, 0,
                            &screen->compute);
   if (ret)
      return ret;

   BEGIN_NV04(push, SUBC_COMPUTE(NV01_SUBCHAN_OBJECT), 1);
   PUSH_DATA (push, screen->compute->handle);

   BEGIN_NV04(push, NV50_COMPUTE(UNK02A0), 1);
   PUSH_DATA (push, 1);
   BEGIN_NV04(push, NV50_COMPUTE(DMA_STACK), 1);
   PUSH_DATA (push, fifo->vram);
   BEGIN_NV04(push, NV50_COMPUTE(STACK_ADDRESS_HIGH), 2);
   PUSH_DATAh(push, screen->stack_bo->offset);
   PUSH_DATA (push, screen->stack_bo->offset);
   BEGIN_NV04(push, NV50_COMPUTE(STACK_SIZE_LOG), 1);
   PUSH_DATA (push, 4);

   BEGIN_NV04(push, NV50_COMPUTE(UNK0290), 1);
   PUSH_DATA (push, 1);
   BEGIN_NV04(push, NV50_COMPUTE(LANES32_ENABLE), 1);
   PUSH_DATA (push, 1);
   BEGIN_NV04(push, NV50_COMPUTE(REG_MODE), 1);
   PUSH_DATA (push, NV50_COMPUTE_REG_MODE_STRIPED);
   BEGIN_NV04(push, NV50_COMPUTE(UNK0384), 1);
   PUSH_DATA (push, 0x100);
   BEGIN_NV04(push, NV50_COMPUTE(DMA_GLOBAL), 1);
   PUSH_DATA (push, fifo->vram);

   for (i = 0; i < 15; i++) {
      BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_ADDRESS_HIGH(i)), 2);
      PUSH_DATA (push, 0);
      PUSH_DATA (push, 0);
      BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_LIMIT(i)), 1);
      PUSH_DATA (push, 0);
      BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_MODE(i)), 1);
      PUSH_DATA (push, NV50_COMPUTE_GLOBAL_MODE_LINEAR);
   }

   BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_ADDRESS_HIGH(15)), 2);
   PUSH_DATA (push, 0);
   PUSH_DATA (push, 0);
   BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_LIMIT(15)), 1);
   PUSH_DATA (push, ~0);
   BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_MODE(15)), 1);
   PUSH_DATA (push, NV50_COMPUTE_GLOBAL_MODE_LINEAR);

   BEGIN_NV04(push, NV50_COMPUTE(LOCAL_WARPS_LOG_ALLOC), 1);
   PUSH_DATA (push, 7);
   BEGIN_NV04(push, NV50_COMPUTE(LOCAL_WARPS_NO_CLAMP), 1);
   PUSH_DATA (push, 1);
   BEGIN_NV04(push, NV50_COMPUTE(STACK_WARPS_LOG_ALLOC), 1);
   PUSH_DATA (push, 7);
   BEGIN_NV04(push, NV50_COMPUTE(STACK_WARPS_NO_CLAMP), 1);
   PUSH_DATA (push, 1);
   BEGIN_NV04(push, NV50_COMPUTE(USER_PARAM_COUNT), 1);
   PUSH_DATA (push, 0);

   BEGIN_NV04(push, NV50_COMPUTE(DMA_TEXTURE), 1);
   PUSH_DATA (push, fifo->vram);
   BEGIN_NV04(push, NV50_COMPUTE(TEX_LIMITS), 1);
   PUSH_DATA (push, 0x54);
   BEGIN_NV04(push, NV50_COMPUTE(LINKED_TSC), 1);
   PUSH_DATA (push, 0);

   BEGIN_NV04(push, NV50_COMPUTE(DMA_TIC), 1);
   PUSH_DATA (push, fifo->vram);
   BEGIN_NV04(push, NV50_COMPUTE(TIC_ADDRESS_HIGH), 3);
   PUSH_DATAh(push, screen->txc->offset);
   PUSH_DATA (push, screen->txc->offset);
   PUSH_DATA (push, NV50_TIC_MAX_ENTRIES - 1);

   BEGIN_NV04(push, NV50_COMPUTE(DMA_TSC), 1);
   PUSH_DATA (push, fifo->vram);
   BEGIN_NV04(push, NV50_COMPUTE(TSC_ADDRESS_HIGH), 3);
   PUSH_DATAh(push, screen->txc->offset + 65536);
   PUSH_DATA (push, screen->txc->offset + 65536);
   PUSH_DATA (push, NV50_TSC_MAX_ENTRIES - 1);

   BEGIN_NV04(push, NV50_COMPUTE(DMA_CODE_CB), 1);
   PUSH_DATA (push, fifo->vram);

   BEGIN_NV04(push, NV50_COMPUTE(DMA_LOCAL), 1);
   PUSH_DATA (push, fifo->vram);
   BEGIN_NV04(push, NV50_COMPUTE(LOCAL_ADDRESS_HIGH), 2);
   PUSH_DATAh(push, screen->tls_bo->offset + 65536);
   PUSH_DATA (push, screen->tls_bo->offset + 65536);
   BEGIN_NV04(push, NV50_COMPUTE(LOCAL_SIZE_LOG), 1);
   PUSH_DATA (push, util_logbase2((screen->max_tls_space / ONE_TEMP_SIZE) * 2));

   return 0;
}
Exemplo n.º 6
0
static void
nv50_hw_sm_end_query(struct nv50_context *nv50, struct nv50_hw_query *hq)
{
   struct nv50_screen *screen = nv50->screen;
   struct pipe_context *pipe = &nv50->base.pipe;
   struct nouveau_pushbuf *push = nv50->base.pushbuf;
   struct nv50_hw_sm_query *hsq = nv50_hw_sm_query(hq);
   struct pipe_grid_info info = {};
   uint32_t mask;
   uint32_t input[3];
   const uint block[3] = { 32, 1, 1 };
   const uint grid[3] = { screen->MPsInTP, screen->TPs, 1 };
   int c, i;

   if (unlikely(!screen->pm.prog)) {
      struct nv50_program *prog = CALLOC_STRUCT(nv50_program);
      prog->type = PIPE_SHADER_COMPUTE;
      prog->translated = true;
      prog->max_gpr = 7;
      prog->parm_size = 8;
      prog->code = (uint32_t *)nv50_read_hw_sm_counters_code;
      prog->code_size = sizeof(nv50_read_hw_sm_counters_code);
      screen->pm.prog = prog;
   }

   /* disable all counting */
   PUSH_SPACE(push, 8);
   for (c = 0; c < 4; c++) {
      if (screen->pm.mp_counter[c]) {
         BEGIN_NV04(push, NV50_COMPUTE(MP_PM_CONTROL(c)), 1);
         PUSH_DATA (push, 0);
      }
   }

   /* release counters for this query */
   for (c = 0; c < 4; c++) {
      if (screen->pm.mp_counter[c] == hsq) {
         screen->pm.num_hw_sm_active--;
         screen->pm.mp_counter[c] = NULL;
      }
   }

   BCTX_REFN_bo(nv50->bufctx_cp, CP_QUERY, NOUVEAU_BO_GART | NOUVEAU_BO_WR,
                hq->bo);

   PUSH_SPACE(push, 2);
   BEGIN_NV04(push, SUBC_COMPUTE(NV50_GRAPH_SERIALIZE), 1);
   PUSH_DATA (push, 0);

   pipe->bind_compute_state(pipe, screen->pm.prog);
   input[0] = hq->bo->offset + hq->base_offset;
   input[1] = hq->sequence;

   for (i = 0; i < 3; i++) {
      info.block[i] = block[i];
      info.grid[i] = grid[i];
   }
   info.pc = 0;
   info.input = input;
   pipe->launch_grid(pipe, &info);

   nouveau_bufctx_reset(nv50->bufctx_cp, NV50_BIND_CP_QUERY);

   /* re-active other counters */
   PUSH_SPACE(push, 8);
   mask = 0;
   for (c = 0; c < 4; c++) {
      const struct nv50_hw_sm_query_cfg *cfg;
      unsigned i;

      hsq = screen->pm.mp_counter[c];
      if (!hsq)
         continue;

      cfg = nv50_hw_sm_query_get_cfg(nv50, &hsq->base);
      for (i = 0; i < cfg->num_counters; i++) {
         uint16_t func;

         if (mask & (1 << hsq->ctr[i]))
            break;

         mask |= 1 << hsq->ctr[i];
         func  = nv50_hw_sm_get_func(hsq->ctr[i]);

         BEGIN_NV04(push, NV50_COMPUTE(MP_PM_CONTROL(hsq->ctr[i])), 1);
         PUSH_DATA (push, (cfg->ctr[i].sig << 24) | (func << 8)
                    | cfg->ctr[i].unit | cfg->ctr[i].mode);
      }
   }
}