/* This happens rather often with DTD9/st. */ void nvc0_cb_push(struct nouveau_context *nv, struct nouveau_bo *bo, unsigned domain, unsigned base, unsigned size, unsigned offset, unsigned words, const uint32_t *data) { struct nouveau_pushbuf *push = nv->pushbuf; NOUVEAU_DRV_STAT(nv->screen, constbuf_upload_count, 1); NOUVEAU_DRV_STAT(nv->screen, constbuf_upload_bytes, words * 4); assert(!(offset & 3)); size = align(size, 0x100); BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); PUSH_DATA (push, size); PUSH_DATAh(push, bo->offset + base); PUSH_DATA (push, bo->offset + base); while (words) { unsigned nr = PUSH_AVAIL(push); nr = MIN2(nr, words); nr = MIN2(nr, NV04_PFIFO_MAX_PACKET_LEN - 1); PUSH_SPACE(push, nr + 2); PUSH_REFN (push, bo, NOUVEAU_BO_WR | domain); BEGIN_1IC0(push, NVC0_3D(CB_POS), nr + 1); PUSH_DATA (push, offset); PUSH_DATAp(push, data, nr); words -= nr; data += nr; offset += nr * 4; } }
static boolean nve4_validate_tic(struct nvc0_context *nvc0, unsigned s) { struct nouveau_bo *txc = nvc0->screen->txc; struct nouveau_pushbuf *push = nvc0->base.pushbuf; unsigned i; boolean need_flush = FALSE; for (i = 0; i < nvc0->num_textures[s]; ++i) { struct nv50_tic_entry *tic = nv50_tic_entry(nvc0->textures[s][i]); struct nv04_resource *res; const boolean dirty = !!(nvc0->textures_dirty[s] & (1 << i)); if (!tic) { nvc0->tex_handles[s][i] |= NVE4_TIC_ENTRY_INVALID; continue; } res = nv04_resource(tic->pipe.texture); if (tic->id < 0) { tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic); PUSH_SPACE(push, 16); BEGIN_NVC0(push, NVE4_P2MF(DST_ADDRESS_HIGH), 2); PUSH_DATAh(push, txc->offset + (tic->id * 32)); PUSH_DATA (push, txc->offset + (tic->id * 32)); BEGIN_NVC0(push, NVE4_P2MF(LINE_LENGTH_IN), 2); PUSH_DATA (push, 32); PUSH_DATA (push, 1); BEGIN_1IC0(push, NVE4_P2MF(EXEC), 9); PUSH_DATA (push, 0x1001); PUSH_DATAp(push, &tic->tic[0], 8); need_flush = TRUE; } else if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) { BEGIN_NVC0(push, NVC0_3D(TEX_CACHE_CTL), 1); PUSH_DATA (push, (tic->id << 4) | 1); } nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32); res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING; res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING; nvc0->tex_handles[s][i] &= ~NVE4_TIC_ENTRY_INVALID; nvc0->tex_handles[s][i] |= tic->id; if (dirty) BCTX_REFN(nvc0->bufctx_3d, TEX(s, i), res, RD); } for (; i < nvc0->state.num_textures[s]; ++i) { nvc0->tex_handles[s][i] |= NVE4_TIC_ENTRY_INVALID; nvc0->textures_dirty[s] |= 1 << i; } nvc0->state.num_textures[s] = nvc0->num_textures[s]; return need_flush; }
static void nvc0_compute_upload_input(struct nvc0_context *nvc0, const struct pipe_grid_info *info) { struct nouveau_pushbuf *push = nvc0->base.pushbuf; struct nvc0_screen *screen = nvc0->screen; struct nvc0_program *cp = nvc0->compprog; if (cp->parm_size) { struct nouveau_bo *bo = screen->uniform_bo; const unsigned base = NVC0_CB_USR_INFO(5); BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3); PUSH_DATA (push, align(cp->parm_size, 0x100)); PUSH_DATAh(push, bo->offset + base); PUSH_DATA (push, bo->offset + base); BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1); PUSH_DATA (push, (0 << 8) | 1); /* NOTE: size is limited to 4 KiB, which is < NV04_PFIFO_MAX_PACKET_LEN */ BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + cp->parm_size / 4); PUSH_DATA (push, 0); PUSH_DATAp(push, info->input, cp->parm_size / 4); nvc0_compute_invalidate_constbufs(nvc0); } BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3); PUSH_DATA (push, NVC0_CB_AUX_SIZE); PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5)); PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5)); BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + 1); /* (7) as we only upload work_dim on nvc0, the rest uses special regs */ PUSH_DATA (push, NVC0_CB_AUX_GRID_INFO(7)); PUSH_DATA (push, info->work_dim); BEGIN_NVC0(push, NVC0_CP(FLUSH), 1); PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CB); }
boolean nve4_validate_tsc(struct nvc0_context *nvc0, int s) { struct nouveau_bo *txc = nvc0->screen->txc; struct nouveau_pushbuf *push = nvc0->base.pushbuf; unsigned i; boolean need_flush = FALSE; for (i = 0; i < nvc0->num_samplers[s]; ++i) { struct nv50_tsc_entry *tsc = nv50_tsc_entry(nvc0->samplers[s][i]); if (!tsc) { nvc0->tex_handles[s][i] |= NVE4_TSC_ENTRY_INVALID; continue; } if (tsc->id < 0) { tsc->id = nvc0_screen_tsc_alloc(nvc0->screen, tsc); PUSH_SPACE(push, 16); BEGIN_NVC0(push, NVE4_P2MF(DST_ADDRESS_HIGH), 2); PUSH_DATAh(push, txc->offset + 65536 + (tsc->id * 32)); PUSH_DATA (push, txc->offset + 65536 + (tsc->id * 32)); BEGIN_NVC0(push, NVE4_P2MF(LINE_LENGTH_IN), 2); PUSH_DATA (push, 32); PUSH_DATA (push, 1); BEGIN_1IC0(push, NVE4_P2MF(EXEC), 9); PUSH_DATA (push, 0x1001); PUSH_DATAp(push, &tsc->tsc[0], 8); need_flush = TRUE; } nvc0->screen->tsc.lock[tsc->id / 32] |= 1 << (tsc->id % 32); nvc0->tex_handles[s][i] &= ~NVE4_TSC_ENTRY_INVALID; nvc0->tex_handles[s][i] |= tsc->id << 20; } for (; i < nvc0->state.num_samplers[s]; ++i) { nvc0->tex_handles[s][i] |= NVE4_TSC_ENTRY_INVALID; nvc0->samplers_dirty[s] |= 1 << i; } nvc0->state.num_samplers[s] = nvc0->num_samplers[s]; return need_flush; }
void nve4_p2mf_push_linear(struct nouveau_context *nv, struct nouveau_bo *dst, unsigned offset, unsigned domain, unsigned size, const void *data) { struct nvc0_context *nvc0 = nvc0_context(&nv->pipe); struct nouveau_pushbuf *push = nv->pushbuf; uint32_t *src = (uint32_t *)data; unsigned count = (size + 3) / 4; nouveau_bufctx_refn(nvc0->bufctx, 0, dst, domain | NOUVEAU_BO_WR); nouveau_pushbuf_bufctx(push, nvc0->bufctx); nouveau_pushbuf_validate(push); while (count) { unsigned nr; if (!PUSH_SPACE(push, 16)) break; nr = PUSH_AVAIL(push); assert(nr >= 16); nr = MIN2(count, nr - 8); nr = MIN2(nr, (NV04_PFIFO_MAX_PACKET_LEN - 1)); BEGIN_NVC0(push, NVE4_P2MF(DST_ADDRESS_HIGH), 2); PUSH_DATAh(push, dst->offset + offset); PUSH_DATA (push, dst->offset + offset); BEGIN_NVC0(push, NVE4_P2MF(LINE_LENGTH_IN), 2); PUSH_DATA (push, MIN2(size, nr * 4)); PUSH_DATA (push, 1); /* must not be interrupted (trap on QUERY fence, 0x50 works however) */ BEGIN_1IC0(push, NVE4_P2MF(EXEC), nr + 1); PUSH_DATA (push, 0x1001); PUSH_DATAp(push, src, nr); count -= nr; src += nr; offset += nr * 4; size -= nr * 4; } nouveau_bufctx_reset(nvc0->bufctx, 0); }
static void nvc0_compute_upload_input(struct nvc0_context *nvc0, const void *input) { struct nouveau_pushbuf *push = nvc0->base.pushbuf; struct nvc0_screen *screen = nvc0->screen; struct nvc0_program *cp = nvc0->compprog; if (cp->parm_size) { BEGIN_NVC0(push, NVC0_COMPUTE(CB_SIZE), 3); PUSH_DATA (push, align(cp->parm_size, 0x100)); PUSH_DATAh(push, screen->parm->offset); PUSH_DATA (push, screen->parm->offset); BEGIN_NVC0(push, NVC0_COMPUTE(CB_BIND), 1); PUSH_DATA (push, (0 << 8) | 1); /* NOTE: size is limited to 4 KiB, which is < NV04_PFIFO_MAX_PACKET_LEN */ BEGIN_1IC0(push, NVC0_COMPUTE(CB_POS), 1 + cp->parm_size / 4); PUSH_DATA (push, 0); PUSH_DATAp(push, input, cp->parm_size / 4); BEGIN_NVC0(push, NVC0_COMPUTE(FLUSH), 1); PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CB); } }
void nvc0_cb_push(struct nouveau_context *nv, struct nouveau_bo *bo, unsigned domain, unsigned base, unsigned size, unsigned offset, unsigned words, const uint32_t *data) { struct nouveau_bufctx *bctx = nvc0_context(&nv->pipe)->bufctx; struct nouveau_pushbuf *push = nv->pushbuf; assert(!(offset & 3)); size = align(size, 0x100); nouveau_bufctx_refn(bctx, 0, bo, NOUVEAU_BO_WR | domain); nouveau_pushbuf_bufctx(push, bctx); nouveau_pushbuf_validate(push); BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); PUSH_DATA (push, size); PUSH_DATAh(push, bo->offset + base); PUSH_DATA (push, bo->offset + base); while (words) { unsigned nr = PUSH_AVAIL(push); nr = MIN2(nr, words); nr = MIN2(nr, NV04_PFIFO_MAX_PACKET_LEN - 1); BEGIN_1IC0(push, NVC0_3D(CB_POS), nr + 1); PUSH_DATA (push, offset); PUSH_DATAp(push, data, nr); words -= nr; data += nr; offset += nr * 4; } nouveau_bufctx_reset(bctx, 0); }
static void nvc0_compute_validate_buffers(struct nvc0_context *nvc0) { struct nouveau_pushbuf *push = nvc0->base.pushbuf; struct nvc0_screen *screen = nvc0->screen; const int s = 5; int i; BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3); PUSH_DATA (push, NVC0_CB_AUX_SIZE); PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s)); PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s)); BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + 4 * NVC0_MAX_BUFFERS); PUSH_DATA (push, NVC0_CB_AUX_BUF_INFO(0)); for (i = 0; i < NVC0_MAX_BUFFERS; i++) { if (nvc0->buffers[s][i].buffer) { struct nv04_resource *res = nv04_resource(nvc0->buffers[s][i].buffer); PUSH_DATA (push, res->address + nvc0->buffers[s][i].buffer_offset); PUSH_DATAh(push, res->address + nvc0->buffers[s][i].buffer_offset); PUSH_DATA (push, nvc0->buffers[s][i].buffer_size); PUSH_DATA (push, 0); BCTX_REFN(nvc0->bufctx_cp, CP_BUF, res, RDWR); util_range_add(&res->valid_buffer_range, nvc0->buffers[s][i].buffer_offset, nvc0->buffers[s][i].buffer_offset + nvc0->buffers[s][i].buffer_size); } else { PUSH_DATA (push, 0); PUSH_DATA (push, 0); PUSH_DATA (push, 0); PUSH_DATA (push, 0); } } }
int nvc0_screen_compute_setup(struct nvc0_screen *screen, struct nouveau_pushbuf *push) { struct nouveau_object *chan = screen->base.channel; struct nouveau_device *dev = screen->base.device; uint32_t obj_class; int ret; int i; switch (dev->chipset & ~0xf) { case 0xc0: case 0xd0: /* In theory, GF110+ should also support NVC8_COMPUTE_CLASS but, * in practice, a ILLEGAL_CLASS dmesg fail appears when using it. */ obj_class = NVC0_COMPUTE_CLASS; break; default: NOUVEAU_ERR("unsupported chipset: NV%02x\n", dev->chipset); return -1; } ret = nouveau_object_new(chan, 0xbeef90c0, obj_class, NULL, 0, &screen->compute); if (ret) { NOUVEAU_ERR("Failed to allocate compute object: %d\n", ret); return ret; } BEGIN_NVC0(push, SUBC_CP(NV01_SUBCHAN_OBJECT), 1); PUSH_DATA (push, screen->compute->oclass); /* hardware limit */ BEGIN_NVC0(push, NVC0_CP(MP_LIMIT), 1); PUSH_DATA (push, screen->mp_count); BEGIN_NVC0(push, NVC0_CP(CALL_LIMIT_LOG), 1); PUSH_DATA (push, 0xf); BEGIN_NVC0(push, SUBC_CP(0x02a0), 1); PUSH_DATA (push, 0x8000); /* global memory setup */ BEGIN_NVC0(push, SUBC_CP(0x02c4), 1); PUSH_DATA (push, 0); BEGIN_NIC0(push, NVC0_CP(GLOBAL_BASE), 0x100); for (i = 0; i <= 0xff; i++) PUSH_DATA (push, (0xc << 28) | (i << 16) | i); BEGIN_NVC0(push, SUBC_CP(0x02c4), 1); PUSH_DATA (push, 1); /* local memory and cstack setup */ BEGIN_NVC0(push, NVC0_CP(TEMP_ADDRESS_HIGH), 2); PUSH_DATAh(push, screen->tls->offset); PUSH_DATA (push, screen->tls->offset); BEGIN_NVC0(push, NVC0_CP(TEMP_SIZE_HIGH), 2); PUSH_DATAh(push, screen->tls->size); PUSH_DATA (push, screen->tls->size); BEGIN_NVC0(push, NVC0_CP(WARP_TEMP_ALLOC), 1); PUSH_DATA (push, 0); BEGIN_NVC0(push, NVC0_CP(LOCAL_BASE), 1); PUSH_DATA (push, 0xff << 24); /* shared memory setup */ BEGIN_NVC0(push, NVC0_CP(CACHE_SPLIT), 1); PUSH_DATA (push, NVC0_COMPUTE_CACHE_SPLIT_48K_SHARED_16K_L1); BEGIN_NVC0(push, NVC0_CP(SHARED_BASE), 1); PUSH_DATA (push, 0xfe << 24); BEGIN_NVC0(push, NVC0_CP(SHARED_SIZE), 1); PUSH_DATA (push, 0); /* code segment setup */ BEGIN_NVC0(push, NVC0_CP(CODE_ADDRESS_HIGH), 2); PUSH_DATAh(push, screen->text->offset); PUSH_DATA (push, screen->text->offset); /* textures */ BEGIN_NVC0(push, NVC0_CP(TIC_ADDRESS_HIGH), 3); PUSH_DATAh(push, screen->txc->offset); PUSH_DATA (push, screen->txc->offset); PUSH_DATA (push, NVC0_TIC_MAX_ENTRIES - 1); /* samplers */ BEGIN_NVC0(push, NVC0_CP(TSC_ADDRESS_HIGH), 3); PUSH_DATAh(push, screen->txc->offset + 65536); PUSH_DATA (push, screen->txc->offset + 65536); PUSH_DATA (push, NVC0_TSC_MAX_ENTRIES - 1); /* MS sample coordinate offsets */ BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3); PUSH_DATA (push, NVC0_CB_AUX_SIZE); PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5)); PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5)); BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + 2 * 8); PUSH_DATA (push, NVC0_CB_AUX_MS_INFO); PUSH_DATA (push, 0); /* 0 */ PUSH_DATA (push, 0); PUSH_DATA (push, 1); /* 1 */ PUSH_DATA (push, 0); PUSH_DATA (push, 0); /* 2 */ PUSH_DATA (push, 1); PUSH_DATA (push, 1); /* 3 */ PUSH_DATA (push, 1); PUSH_DATA (push, 2); /* 4 */ PUSH_DATA (push, 0); PUSH_DATA (push, 3); /* 5 */ PUSH_DATA (push, 0); PUSH_DATA (push, 2); /* 6 */ PUSH_DATA (push, 1); PUSH_DATA (push, 3); /* 7 */ PUSH_DATA (push, 1); return 0; }