Example #1
0
/* This happens rather often with DTD9/st. */
void
nvc0_cb_push(struct nouveau_context *nv,
             struct nouveau_bo *bo, unsigned domain,
             unsigned base, unsigned size,
             unsigned offset, unsigned words, const uint32_t *data)
{
   struct nouveau_pushbuf *push = nv->pushbuf;

   NOUVEAU_DRV_STAT(nv->screen, constbuf_upload_count, 1);
   NOUVEAU_DRV_STAT(nv->screen, constbuf_upload_bytes, words * 4);

   assert(!(offset & 3));
   size = align(size, 0x100);

   BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
   PUSH_DATA (push, size);
   PUSH_DATAh(push, bo->offset + base);
   PUSH_DATA (push, bo->offset + base);

   while (words) {
      unsigned nr = PUSH_AVAIL(push);
      nr = MIN2(nr, words);
      nr = MIN2(nr, NV04_PFIFO_MAX_PACKET_LEN - 1);

      PUSH_SPACE(push, nr + 2);
      PUSH_REFN (push, bo, NOUVEAU_BO_WR | domain);
      BEGIN_1IC0(push, NVC0_3D(CB_POS), nr + 1);
      PUSH_DATA (push, offset);
      PUSH_DATAp(push, data, nr);

      words -= nr;
      data += nr;
      offset += nr * 4;
   }
}
Example #2
0
static boolean
nve4_validate_tic(struct nvc0_context *nvc0, unsigned s)
{
   struct nouveau_bo *txc = nvc0->screen->txc;
   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
   unsigned i;
   boolean need_flush = FALSE;

   for (i = 0; i < nvc0->num_textures[s]; ++i) {
      struct nv50_tic_entry *tic = nv50_tic_entry(nvc0->textures[s][i]);
      struct nv04_resource *res;
      const boolean dirty = !!(nvc0->textures_dirty[s] & (1 << i));

      if (!tic) {
         nvc0->tex_handles[s][i] |= NVE4_TIC_ENTRY_INVALID;
         continue;
      }
      res = nv04_resource(tic->pipe.texture);

      if (tic->id < 0) {
         tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic);

         PUSH_SPACE(push, 16);
         BEGIN_NVC0(push, NVE4_P2MF(DST_ADDRESS_HIGH), 2);
         PUSH_DATAh(push, txc->offset + (tic->id * 32));
         PUSH_DATA (push, txc->offset + (tic->id * 32));
         BEGIN_NVC0(push, NVE4_P2MF(LINE_LENGTH_IN), 2);
         PUSH_DATA (push, 32);
         PUSH_DATA (push, 1);
         BEGIN_1IC0(push, NVE4_P2MF(EXEC), 9);
         PUSH_DATA (push, 0x1001);
         PUSH_DATAp(push, &tic->tic[0], 8);

         need_flush = TRUE;
      } else
      if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
         BEGIN_NVC0(push, NVC0_3D(TEX_CACHE_CTL), 1);
         PUSH_DATA (push, (tic->id << 4) | 1);
      }
      nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32);

      res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;
      res->status |=  NOUVEAU_BUFFER_STATUS_GPU_READING;

      nvc0->tex_handles[s][i] &= ~NVE4_TIC_ENTRY_INVALID;
      nvc0->tex_handles[s][i] |= tic->id;
      if (dirty)
         BCTX_REFN(nvc0->bufctx_3d, TEX(s, i), res, RD);
   }
   for (; i < nvc0->state.num_textures[s]; ++i) {
      nvc0->tex_handles[s][i] |= NVE4_TIC_ENTRY_INVALID;
      nvc0->textures_dirty[s] |= 1 << i;
   }

   nvc0->state.num_textures[s] = nvc0->num_textures[s];

   return need_flush;
}
Example #3
0
static void
nvc0_compute_upload_input(struct nvc0_context *nvc0,
                          const struct pipe_grid_info *info)
{
   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
   struct nvc0_screen *screen = nvc0->screen;
   struct nvc0_program *cp = nvc0->compprog;

   if (cp->parm_size) {
      struct nouveau_bo *bo = screen->uniform_bo;
      const unsigned base = NVC0_CB_USR_INFO(5);

      BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);
      PUSH_DATA (push, align(cp->parm_size, 0x100));
      PUSH_DATAh(push, bo->offset + base);
      PUSH_DATA (push, bo->offset + base);
      BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1);
      PUSH_DATA (push, (0 << 8) | 1);
      /* NOTE: size is limited to 4 KiB, which is < NV04_PFIFO_MAX_PACKET_LEN */
      BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + cp->parm_size / 4);
      PUSH_DATA (push, 0);
      PUSH_DATAp(push, info->input, cp->parm_size / 4);

      nvc0_compute_invalidate_constbufs(nvc0);
   }

   BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);
   PUSH_DATA (push, NVC0_CB_AUX_SIZE);
   PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5));
   PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5));

   BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + 1);
   /* (7) as we only upload work_dim on nvc0, the rest uses special regs */
   PUSH_DATA (push, NVC0_CB_AUX_GRID_INFO(7));
   PUSH_DATA (push, info->work_dim);

   BEGIN_NVC0(push, NVC0_CP(FLUSH), 1);
   PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CB);
}
Example #4
0
boolean
nve4_validate_tsc(struct nvc0_context *nvc0, int s)
{
   struct nouveau_bo *txc = nvc0->screen->txc;
   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
   unsigned i;
   boolean need_flush = FALSE;

   for (i = 0; i < nvc0->num_samplers[s]; ++i) {
      struct nv50_tsc_entry *tsc = nv50_tsc_entry(nvc0->samplers[s][i]);

      if (!tsc) {
         nvc0->tex_handles[s][i] |= NVE4_TSC_ENTRY_INVALID;
         continue;
      }
      if (tsc->id < 0) {
         tsc->id = nvc0_screen_tsc_alloc(nvc0->screen, tsc);

         PUSH_SPACE(push, 16);
         BEGIN_NVC0(push, NVE4_P2MF(DST_ADDRESS_HIGH), 2);
         PUSH_DATAh(push, txc->offset + 65536 + (tsc->id * 32));
         PUSH_DATA (push, txc->offset + 65536 + (tsc->id * 32));
         BEGIN_NVC0(push, NVE4_P2MF(LINE_LENGTH_IN), 2);
         PUSH_DATA (push, 32);
         PUSH_DATA (push, 1);
         BEGIN_1IC0(push, NVE4_P2MF(EXEC), 9);
         PUSH_DATA (push, 0x1001);
         PUSH_DATAp(push, &tsc->tsc[0], 8);

         need_flush = TRUE;
      }
      nvc0->screen->tsc.lock[tsc->id / 32] |= 1 << (tsc->id % 32);

      nvc0->tex_handles[s][i] &= ~NVE4_TSC_ENTRY_INVALID;
      nvc0->tex_handles[s][i] |= tsc->id << 20;
   }
   for (; i < nvc0->state.num_samplers[s]; ++i) {
      nvc0->tex_handles[s][i] |= NVE4_TSC_ENTRY_INVALID;
      nvc0->samplers_dirty[s] |= 1 << i;
   }

   nvc0->state.num_samplers[s] = nvc0->num_samplers[s];

   return need_flush;
}
Example #5
0
void
nve4_p2mf_push_linear(struct nouveau_context *nv,
                      struct nouveau_bo *dst, unsigned offset, unsigned domain,
                      unsigned size, const void *data)
{
   struct nvc0_context *nvc0 = nvc0_context(&nv->pipe);
   struct nouveau_pushbuf *push = nv->pushbuf;
   uint32_t *src = (uint32_t *)data;
   unsigned count = (size + 3) / 4;

   nouveau_bufctx_refn(nvc0->bufctx, 0, dst, domain | NOUVEAU_BO_WR);
   nouveau_pushbuf_bufctx(push, nvc0->bufctx);
   nouveau_pushbuf_validate(push);

   while (count) {
      unsigned nr;

      if (!PUSH_SPACE(push, 16))
         break;
      nr = PUSH_AVAIL(push);
      assert(nr >= 16);
      nr = MIN2(count, nr - 8);
      nr = MIN2(nr, (NV04_PFIFO_MAX_PACKET_LEN - 1));

      BEGIN_NVC0(push, NVE4_P2MF(DST_ADDRESS_HIGH), 2);
      PUSH_DATAh(push, dst->offset + offset);
      PUSH_DATA (push, dst->offset + offset);
      BEGIN_NVC0(push, NVE4_P2MF(LINE_LENGTH_IN), 2);
      PUSH_DATA (push, MIN2(size, nr * 4));
      PUSH_DATA (push, 1);
      /* must not be interrupted (trap on QUERY fence, 0x50 works however) */
      BEGIN_1IC0(push, NVE4_P2MF(EXEC), nr + 1);
      PUSH_DATA (push, 0x1001);
      PUSH_DATAp(push, src, nr);

      count -= nr;
      src += nr;
      offset += nr * 4;
      size -= nr * 4;
   }

   nouveau_bufctx_reset(nvc0->bufctx, 0);
}
Example #6
0
static void
nvc0_compute_upload_input(struct nvc0_context *nvc0, const void *input)
{
   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
   struct nvc0_screen *screen = nvc0->screen;
   struct nvc0_program *cp = nvc0->compprog;

   if (cp->parm_size) {
      BEGIN_NVC0(push, NVC0_COMPUTE(CB_SIZE), 3);
      PUSH_DATA (push, align(cp->parm_size, 0x100));
      PUSH_DATAh(push, screen->parm->offset);
      PUSH_DATA (push, screen->parm->offset);
      BEGIN_NVC0(push, NVC0_COMPUTE(CB_BIND), 1);
      PUSH_DATA (push, (0 << 8) | 1);
      /* NOTE: size is limited to 4 KiB, which is < NV04_PFIFO_MAX_PACKET_LEN */
      BEGIN_1IC0(push, NVC0_COMPUTE(CB_POS), 1 + cp->parm_size / 4);
      PUSH_DATA (push, 0);
      PUSH_DATAp(push, input, cp->parm_size / 4);

      BEGIN_NVC0(push, NVC0_COMPUTE(FLUSH), 1);
      PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CB);
   }
}
Example #7
0
void
nvc0_cb_push(struct nouveau_context *nv,
             struct nouveau_bo *bo, unsigned domain,
             unsigned base, unsigned size,
             unsigned offset, unsigned words, const uint32_t *data)
{
   struct nouveau_bufctx *bctx = nvc0_context(&nv->pipe)->bufctx;
   struct nouveau_pushbuf *push = nv->pushbuf;

   assert(!(offset & 3));
   size = align(size, 0x100);

   nouveau_bufctx_refn(bctx, 0, bo, NOUVEAU_BO_WR | domain);
   nouveau_pushbuf_bufctx(push, bctx);
   nouveau_pushbuf_validate(push);

   BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
   PUSH_DATA (push, size);
   PUSH_DATAh(push, bo->offset + base);
   PUSH_DATA (push, bo->offset + base);

   while (words) {
      unsigned nr = PUSH_AVAIL(push);
      nr = MIN2(nr, words);
      nr = MIN2(nr, NV04_PFIFO_MAX_PACKET_LEN - 1);

      BEGIN_1IC0(push, NVC0_3D(CB_POS), nr + 1);
      PUSH_DATA (push, offset);
      PUSH_DATAp(push, data, nr);

      words -= nr;
      data += nr;
      offset += nr * 4;
   }

   nouveau_bufctx_reset(bctx, 0);
}
Example #8
0
static void
nvc0_compute_validate_buffers(struct nvc0_context *nvc0)
{
   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
   struct nvc0_screen *screen = nvc0->screen;
   const int s = 5;
   int i;

   BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);
   PUSH_DATA (push, NVC0_CB_AUX_SIZE);
   PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
   PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
   BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + 4 * NVC0_MAX_BUFFERS);
   PUSH_DATA (push, NVC0_CB_AUX_BUF_INFO(0));

   for (i = 0; i < NVC0_MAX_BUFFERS; i++) {
      if (nvc0->buffers[s][i].buffer) {
         struct nv04_resource *res =
            nv04_resource(nvc0->buffers[s][i].buffer);
         PUSH_DATA (push, res->address + nvc0->buffers[s][i].buffer_offset);
         PUSH_DATAh(push, res->address + nvc0->buffers[s][i].buffer_offset);
         PUSH_DATA (push, nvc0->buffers[s][i].buffer_size);
         PUSH_DATA (push, 0);
         BCTX_REFN(nvc0->bufctx_cp, CP_BUF, res, RDWR);
         util_range_add(&res->valid_buffer_range,
                        nvc0->buffers[s][i].buffer_offset,
                        nvc0->buffers[s][i].buffer_offset +
                        nvc0->buffers[s][i].buffer_size);
      } else {
         PUSH_DATA (push, 0);
         PUSH_DATA (push, 0);
         PUSH_DATA (push, 0);
         PUSH_DATA (push, 0);
      }
   }
}
Example #9
0
int
nvc0_screen_compute_setup(struct nvc0_screen *screen,
                          struct nouveau_pushbuf *push)
{
   struct nouveau_object *chan = screen->base.channel;
   struct nouveau_device *dev = screen->base.device;
   uint32_t obj_class;
   int ret;
   int i;

   switch (dev->chipset & ~0xf) {
   case 0xc0:
   case 0xd0:
      /* In theory, GF110+ should also support NVC8_COMPUTE_CLASS but,
       * in practice, a ILLEGAL_CLASS dmesg fail appears when using it. */
      obj_class = NVC0_COMPUTE_CLASS;
      break;
   default:
      NOUVEAU_ERR("unsupported chipset: NV%02x\n", dev->chipset);
      return -1;
   }

   ret = nouveau_object_new(chan, 0xbeef90c0, obj_class, NULL, 0,
                            &screen->compute);
   if (ret) {
      NOUVEAU_ERR("Failed to allocate compute object: %d\n", ret);
      return ret;
   }

   BEGIN_NVC0(push, SUBC_CP(NV01_SUBCHAN_OBJECT), 1);
   PUSH_DATA (push, screen->compute->oclass);

   /* hardware limit */
   BEGIN_NVC0(push, NVC0_CP(MP_LIMIT), 1);
   PUSH_DATA (push, screen->mp_count);
   BEGIN_NVC0(push, NVC0_CP(CALL_LIMIT_LOG), 1);
   PUSH_DATA (push, 0xf);

   BEGIN_NVC0(push, SUBC_CP(0x02a0), 1);
   PUSH_DATA (push, 0x8000);

   /* global memory setup */
   BEGIN_NVC0(push, SUBC_CP(0x02c4), 1);
   PUSH_DATA (push, 0);
   BEGIN_NIC0(push, NVC0_CP(GLOBAL_BASE), 0x100);
   for (i = 0; i <= 0xff; i++)
      PUSH_DATA (push, (0xc << 28) | (i << 16) | i);
   BEGIN_NVC0(push, SUBC_CP(0x02c4), 1);
   PUSH_DATA (push, 1);

   /* local memory and cstack setup */
   BEGIN_NVC0(push, NVC0_CP(TEMP_ADDRESS_HIGH), 2);
   PUSH_DATAh(push, screen->tls->offset);
   PUSH_DATA (push, screen->tls->offset);
   BEGIN_NVC0(push, NVC0_CP(TEMP_SIZE_HIGH), 2);
   PUSH_DATAh(push, screen->tls->size);
   PUSH_DATA (push, screen->tls->size);
   BEGIN_NVC0(push, NVC0_CP(WARP_TEMP_ALLOC), 1);
   PUSH_DATA (push, 0);
   BEGIN_NVC0(push, NVC0_CP(LOCAL_BASE), 1);
   PUSH_DATA (push, 0xff << 24);

   /* shared memory setup */
   BEGIN_NVC0(push, NVC0_CP(CACHE_SPLIT), 1);
   PUSH_DATA (push, NVC0_COMPUTE_CACHE_SPLIT_48K_SHARED_16K_L1);
   BEGIN_NVC0(push, NVC0_CP(SHARED_BASE), 1);
   PUSH_DATA (push, 0xfe << 24);
   BEGIN_NVC0(push, NVC0_CP(SHARED_SIZE), 1);
   PUSH_DATA (push, 0);

   /* code segment setup */
   BEGIN_NVC0(push, NVC0_CP(CODE_ADDRESS_HIGH), 2);
   PUSH_DATAh(push, screen->text->offset);
   PUSH_DATA (push, screen->text->offset);

   /* textures */
   BEGIN_NVC0(push, NVC0_CP(TIC_ADDRESS_HIGH), 3);
   PUSH_DATAh(push, screen->txc->offset);
   PUSH_DATA (push, screen->txc->offset);
   PUSH_DATA (push, NVC0_TIC_MAX_ENTRIES - 1);

   /* samplers */
   BEGIN_NVC0(push, NVC0_CP(TSC_ADDRESS_HIGH), 3);
   PUSH_DATAh(push, screen->txc->offset + 65536);
   PUSH_DATA (push, screen->txc->offset + 65536);
   PUSH_DATA (push, NVC0_TSC_MAX_ENTRIES - 1);

   /* MS sample coordinate offsets */
   BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);
   PUSH_DATA (push, NVC0_CB_AUX_SIZE);
   PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5));
   PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5));
   BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + 2 * 8);
   PUSH_DATA (push, NVC0_CB_AUX_MS_INFO);
   PUSH_DATA (push, 0); /* 0 */
   PUSH_DATA (push, 0);
   PUSH_DATA (push, 1); /* 1 */
   PUSH_DATA (push, 0);
   PUSH_DATA (push, 0); /* 2 */
   PUSH_DATA (push, 1);
   PUSH_DATA (push, 1); /* 3 */
   PUSH_DATA (push, 1);
   PUSH_DATA (push, 2); /* 4 */
   PUSH_DATA (push, 0);
   PUSH_DATA (push, 3); /* 5 */
   PUSH_DATA (push, 0);
   PUSH_DATA (push, 2); /* 6 */
   PUSH_DATA (push, 1);
   PUSH_DATA (push, 3); /* 7 */
   PUSH_DATA (push, 1);

   return 0;
}