static void nvc0_compute_validate_driverconst(struct nvc0_context *nvc0) { struct nouveau_pushbuf *push = nvc0->base.pushbuf; struct nvc0_screen *screen = nvc0->screen; BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3); PUSH_DATA (push, NVC0_CB_AUX_SIZE); PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5)); PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5)); BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1); PUSH_DATA (push, (15 << 8) | 1); nvc0->dirty_3d |= NVC0_NEW_3D_DRIVERCONST; }
void nvc0_compprog_validate(struct nvc0_context *nvc0) { struct nouveau_pushbuf *push = nvc0->base.pushbuf; struct nvc0_program *cp = nvc0->compprog; if (cp && !nvc0_program_validate(nvc0, cp)) return; BEGIN_NVC0(push, NVC0_CP(FLUSH), 1); PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CODE); }
static void nvc0_compute_validate_constbufs(struct nvc0_context *nvc0) { struct nouveau_pushbuf *push = nvc0->base.pushbuf; const int s = 5; while (nvc0->constbuf_dirty[s]) { int i = ffs(nvc0->constbuf_dirty[s]) - 1; nvc0->constbuf_dirty[s] &= ~(1 << i); if (nvc0->constbuf[s][i].user) { struct nouveau_bo *bo = nvc0->screen->uniform_bo; const unsigned base = NVC0_CB_USR_INFO(s); const unsigned size = nvc0->constbuf[s][0].size; assert(i == 0); /* we really only want OpenGL uniforms here */ assert(nvc0->constbuf[s][0].u.data); if (nvc0->state.uniform_buffer_bound[s] < size) { nvc0->state.uniform_buffer_bound[s] = align(size, 0x100); BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3); PUSH_DATA (push, nvc0->state.uniform_buffer_bound[s]); PUSH_DATAh(push, bo->offset + base); PUSH_DATA (push, bo->offset + base); BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1); PUSH_DATA (push, (0 << 8) | 1); } nvc0_cb_bo_push(&nvc0->base, bo, NV_VRAM_DOMAIN(&nvc0->screen->base), base, nvc0->state.uniform_buffer_bound[s], 0, (size + 3) / 4, nvc0->constbuf[s][0].u.data); } else { struct nv04_resource *res = nv04_resource(nvc0->constbuf[s][i].u.buf); if (res) { BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3); PUSH_DATA (push, nvc0->constbuf[s][i].size); PUSH_DATAh(push, res->address + nvc0->constbuf[s][i].offset); PUSH_DATA (push, res->address + nvc0->constbuf[s][i].offset); BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1); PUSH_DATA (push, (i << 8) | 1); BCTX_REFN(nvc0->bufctx_cp, CP_CB(i), res, RD); res->cb_bindings[s] |= 1 << i; } else { BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1); PUSH_DATA (push, (i << 8) | 0); } if (i == 0) nvc0->state.uniform_buffer_bound[s] = 0; } } nvc0_compute_invalidate_constbufs(nvc0); BEGIN_NVC0(push, NVC0_CP(FLUSH), 1); PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CB); }
static void nvc0_compute_validate_samplers(struct nvc0_context *nvc0) { bool need_flush = nvc0_validate_tsc(nvc0, 5); if (need_flush) { BEGIN_NVC0(nvc0->base.pushbuf, NVC0_CP(TSC_FLUSH), 1); PUSH_DATA (nvc0->base.pushbuf, 0); } /* Invalidate all 3D samplers because they are aliased. */ for (int s = 0; s < 5; s++) nvc0->samplers_dirty[s] = ~0; nvc0->dirty_3d |= NVC0_NEW_3D_SAMPLERS; }
static void nvc0_compute_validate_buffers(struct nvc0_context *nvc0) { struct nouveau_pushbuf *push = nvc0->base.pushbuf; struct nvc0_screen *screen = nvc0->screen; const int s = 5; int i; BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3); PUSH_DATA (push, NVC0_CB_AUX_SIZE); PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s)); PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s)); BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + 4 * NVC0_MAX_BUFFERS); PUSH_DATA (push, NVC0_CB_AUX_BUF_INFO(0)); for (i = 0; i < NVC0_MAX_BUFFERS; i++) { if (nvc0->buffers[s][i].buffer) { struct nv04_resource *res = nv04_resource(nvc0->buffers[s][i].buffer); PUSH_DATA (push, res->address + nvc0->buffers[s][i].buffer_offset); PUSH_DATAh(push, res->address + nvc0->buffers[s][i].buffer_offset); PUSH_DATA (push, nvc0->buffers[s][i].buffer_size); PUSH_DATA (push, 0); BCTX_REFN(nvc0->bufctx_cp, CP_BUF, res, RDWR); util_range_add(&res->valid_buffer_range, nvc0->buffers[s][i].buffer_offset, nvc0->buffers[s][i].buffer_offset + nvc0->buffers[s][i].buffer_size); } else { PUSH_DATA (push, 0); PUSH_DATA (push, 0); PUSH_DATA (push, 0); PUSH_DATA (push, 0); } } }
static void nvc0_compute_validate_textures(struct nvc0_context *nvc0) { bool need_flush = nvc0_validate_tic(nvc0, 5); if (need_flush) { BEGIN_NVC0(nvc0->base.pushbuf, NVC0_CP(TIC_FLUSH), 1); PUSH_DATA (nvc0->base.pushbuf, 0); } /* Invalidate all 3D textures because they are aliased. */ for (int s = 0; s < 5; s++) { for (int i = 0; i < nvc0->num_textures[s]; i++) nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_TEX(s, i)); nvc0->textures_dirty[s] = ~0; } nvc0->dirty_3d |= NVC0_NEW_3D_TEXTURES; }
static inline void nvc0_compute_invalidate_surfaces(struct nvc0_context *nvc0, const int s) { struct nouveau_pushbuf *push = nvc0->base.pushbuf; int i; for (i = 0; i < NVC0_MAX_IMAGES; ++i) { if (s == 5) BEGIN_NVC0(push, NVC0_CP(IMAGE(i)), 6); else BEGIN_NVC0(push, NVC0_3D(IMAGE(i)), 6); PUSH_DATA(push, 0); PUSH_DATA(push, 0); PUSH_DATA(push, 0); PUSH_DATA(push, 0); PUSH_DATA(push, 0x14000); PUSH_DATA(push, 0); } }
static void nvc0_compute_upload_input(struct nvc0_context *nvc0, const struct pipe_grid_info *info) { struct nouveau_pushbuf *push = nvc0->base.pushbuf; struct nvc0_screen *screen = nvc0->screen; struct nvc0_program *cp = nvc0->compprog; if (cp->parm_size) { struct nouveau_bo *bo = screen->uniform_bo; const unsigned base = NVC0_CB_USR_INFO(5); BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3); PUSH_DATA (push, align(cp->parm_size, 0x100)); PUSH_DATAh(push, bo->offset + base); PUSH_DATA (push, bo->offset + base); BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1); PUSH_DATA (push, (0 << 8) | 1); /* NOTE: size is limited to 4 KiB, which is < NV04_PFIFO_MAX_PACKET_LEN */ BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + cp->parm_size / 4); PUSH_DATA (push, 0); PUSH_DATAp(push, info->input, cp->parm_size / 4); nvc0_compute_invalidate_constbufs(nvc0); } BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3); PUSH_DATA (push, NVC0_CB_AUX_SIZE); PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5)); PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5)); BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + 1); /* (7) as we only upload work_dim on nvc0, the rest uses special regs */ PUSH_DATA (push, NVC0_CB_AUX_GRID_INFO(7)); PUSH_DATA (push, info->work_dim); BEGIN_NVC0(push, NVC0_CP(FLUSH), 1); PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CB); }
void nvc0_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info) { struct nvc0_context *nvc0 = nvc0_context(pipe); struct nouveau_pushbuf *push = nvc0->base.pushbuf; struct nvc0_program *cp = nvc0->compprog; int ret; ret = !nvc0_state_validate_cp(nvc0, ~0); if (ret) { NOUVEAU_ERR("Failed to launch grid !\n"); return; } nvc0_compute_upload_input(nvc0, info); BEGIN_NVC0(push, NVC0_CP(CP_START_ID), 1); PUSH_DATA (push, nvc0_program_symbol_offset(cp, info->pc)); BEGIN_NVC0(push, NVC0_CP(LOCAL_POS_ALLOC), 3); PUSH_DATA (push, (cp->hdr[1] & 0xfffff0) + align(cp->cp.lmem_size, 0x10)); PUSH_DATA (push, 0); PUSH_DATA (push, 0x800); /* WARP_CSTACK_SIZE */ BEGIN_NVC0(push, NVC0_CP(SHARED_SIZE), 3); PUSH_DATA (push, align(cp->cp.smem_size, 0x100)); PUSH_DATA (push, info->block[0] * info->block[1] * info->block[2]); PUSH_DATA (push, cp->num_barriers); BEGIN_NVC0(push, NVC0_CP(CP_GPR_ALLOC), 1); PUSH_DATA (push, cp->num_gprs); /* launch preliminary setup */ BEGIN_NVC0(push, NVC0_CP(GRIDID), 1); PUSH_DATA (push, 0x1); BEGIN_NVC0(push, SUBC_CP(0x036c), 1); PUSH_DATA (push, 0); BEGIN_NVC0(push, NVC0_CP(FLUSH), 1); PUSH_DATA (push, NVC0_COMPUTE_FLUSH_GLOBAL | NVC0_COMPUTE_FLUSH_UNK8); /* block setup */ BEGIN_NVC0(push, NVC0_CP(BLOCKDIM_YX), 2); PUSH_DATA (push, (info->block[1] << 16) | info->block[0]); PUSH_DATA (push, info->block[2]); if (unlikely(info->indirect)) { struct nv04_resource *res = nv04_resource(info->indirect); uint32_t offset = res->offset + info->indirect_offset; unsigned macro = NVC0_CP_MACRO_LAUNCH_GRID_INDIRECT; nouveau_pushbuf_space(push, 16, 0, 1); PUSH_REFN(push, res->bo, NOUVEAU_BO_RD | res->domain); PUSH_DATA(push, NVC0_FIFO_PKHDR_1I(1, macro, 3)); nouveau_pushbuf_data(push, res->bo, offset, NVC0_IB_ENTRY_1_NO_PREFETCH | 3 * 4); } else { /* grid setup */ BEGIN_NVC0(push, NVC0_CP(GRIDDIM_YX), 2); PUSH_DATA (push, (info->grid[1] << 16) | info->grid[0]); PUSH_DATA (push, info->grid[2]); /* kernel launching */ BEGIN_NVC0(push, NVC0_CP(COMPUTE_BEGIN), 1); PUSH_DATA (push, 0); BEGIN_NVC0(push, SUBC_CP(0x0a08), 1); PUSH_DATA (push, 0); BEGIN_NVC0(push, NVC0_CP(LAUNCH), 1); PUSH_DATA (push, 0x1000); BEGIN_NVC0(push, NVC0_CP(COMPUTE_END), 1); PUSH_DATA (push, 0); BEGIN_NVC0(push, SUBC_CP(0x0360), 1); PUSH_DATA (push, 0x1); } /* TODO: Not sure if this is really necessary. */ nvc0_compute_invalidate_surfaces(nvc0, 5); nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_SUF); nvc0->dirty_cp |= NVC0_NEW_CP_SURFACES; nvc0->images_dirty[5] |= nvc0->images_valid[5]; }
int nvc0_screen_compute_setup(struct nvc0_screen *screen, struct nouveau_pushbuf *push) { struct nouveau_object *chan = screen->base.channel; struct nouveau_device *dev = screen->base.device; uint32_t obj_class; int ret; int i; switch (dev->chipset & ~0xf) { case 0xc0: case 0xd0: /* In theory, GF110+ should also support NVC8_COMPUTE_CLASS but, * in practice, a ILLEGAL_CLASS dmesg fail appears when using it. */ obj_class = NVC0_COMPUTE_CLASS; break; default: NOUVEAU_ERR("unsupported chipset: NV%02x\n", dev->chipset); return -1; } ret = nouveau_object_new(chan, 0xbeef90c0, obj_class, NULL, 0, &screen->compute); if (ret) { NOUVEAU_ERR("Failed to allocate compute object: %d\n", ret); return ret; } BEGIN_NVC0(push, SUBC_CP(NV01_SUBCHAN_OBJECT), 1); PUSH_DATA (push, screen->compute->oclass); /* hardware limit */ BEGIN_NVC0(push, NVC0_CP(MP_LIMIT), 1); PUSH_DATA (push, screen->mp_count); BEGIN_NVC0(push, NVC0_CP(CALL_LIMIT_LOG), 1); PUSH_DATA (push, 0xf); BEGIN_NVC0(push, SUBC_CP(0x02a0), 1); PUSH_DATA (push, 0x8000); /* global memory setup */ BEGIN_NVC0(push, SUBC_CP(0x02c4), 1); PUSH_DATA (push, 0); BEGIN_NIC0(push, NVC0_CP(GLOBAL_BASE), 0x100); for (i = 0; i <= 0xff; i++) PUSH_DATA (push, (0xc << 28) | (i << 16) | i); BEGIN_NVC0(push, SUBC_CP(0x02c4), 1); PUSH_DATA (push, 1); /* local memory and cstack setup */ BEGIN_NVC0(push, NVC0_CP(TEMP_ADDRESS_HIGH), 2); PUSH_DATAh(push, screen->tls->offset); PUSH_DATA (push, screen->tls->offset); BEGIN_NVC0(push, NVC0_CP(TEMP_SIZE_HIGH), 2); PUSH_DATAh(push, screen->tls->size); PUSH_DATA (push, screen->tls->size); BEGIN_NVC0(push, NVC0_CP(WARP_TEMP_ALLOC), 1); PUSH_DATA (push, 0); BEGIN_NVC0(push, NVC0_CP(LOCAL_BASE), 1); PUSH_DATA (push, 0xff << 24); /* shared memory setup */ BEGIN_NVC0(push, NVC0_CP(CACHE_SPLIT), 1); PUSH_DATA (push, NVC0_COMPUTE_CACHE_SPLIT_48K_SHARED_16K_L1); BEGIN_NVC0(push, NVC0_CP(SHARED_BASE), 1); PUSH_DATA (push, 0xfe << 24); BEGIN_NVC0(push, NVC0_CP(SHARED_SIZE), 1); PUSH_DATA (push, 0); /* code segment setup */ BEGIN_NVC0(push, NVC0_CP(CODE_ADDRESS_HIGH), 2); PUSH_DATAh(push, screen->text->offset); PUSH_DATA (push, screen->text->offset); /* textures */ BEGIN_NVC0(push, NVC0_CP(TIC_ADDRESS_HIGH), 3); PUSH_DATAh(push, screen->txc->offset); PUSH_DATA (push, screen->txc->offset); PUSH_DATA (push, NVC0_TIC_MAX_ENTRIES - 1); /* samplers */ BEGIN_NVC0(push, NVC0_CP(TSC_ADDRESS_HIGH), 3); PUSH_DATAh(push, screen->txc->offset + 65536); PUSH_DATA (push, screen->txc->offset + 65536); PUSH_DATA (push, NVC0_TSC_MAX_ENTRIES - 1); /* MS sample coordinate offsets */ BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3); PUSH_DATA (push, NVC0_CB_AUX_SIZE); PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5)); PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5)); BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + 2 * 8); PUSH_DATA (push, NVC0_CB_AUX_MS_INFO); PUSH_DATA (push, 0); /* 0 */ PUSH_DATA (push, 0); PUSH_DATA (push, 1); /* 1 */ PUSH_DATA (push, 0); PUSH_DATA (push, 0); /* 2 */ PUSH_DATA (push, 1); PUSH_DATA (push, 1); /* 3 */ PUSH_DATA (push, 1); PUSH_DATA (push, 2); /* 4 */ PUSH_DATA (push, 0); PUSH_DATA (push, 3); /* 5 */ PUSH_DATA (push, 0); PUSH_DATA (push, 2); /* 6 */ PUSH_DATA (push, 1); PUSH_DATA (push, 3); /* 7 */ PUSH_DATA (push, 1); return 0; }
static void nvc0_render_condition(struct pipe_context *pipe, struct pipe_query *pq, boolean condition, enum pipe_render_cond_flag mode) { struct nvc0_context *nvc0 = nvc0_context(pipe); struct nouveau_pushbuf *push = nvc0->base.pushbuf; struct nvc0_query *q = nvc0_query(pq); struct nvc0_hw_query *hq = nvc0_hw_query(q); uint32_t cond; bool wait = mode != PIPE_RENDER_COND_NO_WAIT && mode != PIPE_RENDER_COND_BY_REGION_NO_WAIT; if (!pq) { cond = NVC0_3D_COND_MODE_ALWAYS; } else { /* NOTE: comparison of 2 queries only works if both have completed */ switch (q->type) { case PIPE_QUERY_SO_OVERFLOW_PREDICATE: case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE: cond = condition ? NVC0_3D_COND_MODE_EQUAL : NVC0_3D_COND_MODE_NOT_EQUAL; wait = true; break; case PIPE_QUERY_OCCLUSION_COUNTER: case PIPE_QUERY_OCCLUSION_PREDICATE: case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: if (hq->state == NVC0_HW_QUERY_STATE_READY) wait = true; if (likely(!condition)) { cond = wait ? NVC0_3D_COND_MODE_NOT_EQUAL : NVC0_3D_COND_MODE_ALWAYS; } else { cond = wait ? NVC0_3D_COND_MODE_EQUAL : NVC0_3D_COND_MODE_ALWAYS; } break; default: assert(!"render condition query not a predicate"); cond = NVC0_3D_COND_MODE_ALWAYS; break; } } nvc0->cond_query = pq; nvc0->cond_cond = condition; nvc0->cond_condmode = cond; nvc0->cond_mode = mode; if (!pq) { PUSH_SPACE(push, 2); IMMED_NVC0(push, NVC0_3D(COND_MODE), cond); if (nvc0->screen->compute) IMMED_NVC0(push, NVC0_CP(COND_MODE), cond); return; } if (wait && hq->state != NVC0_HW_QUERY_STATE_READY) nvc0_hw_query_fifo_wait(nvc0, q); PUSH_SPACE(push, 10); PUSH_REFN (push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD); BEGIN_NVC0(push, NVC0_3D(COND_ADDRESS_HIGH), 3); PUSH_DATAh(push, hq->bo->offset + hq->offset); PUSH_DATA (push, hq->bo->offset + hq->offset); PUSH_DATA (push, cond); BEGIN_NVC0(push, NVC0_2D(COND_ADDRESS_HIGH), 2); PUSH_DATAh(push, hq->bo->offset + hq->offset); PUSH_DATA (push, hq->bo->offset + hq->offset); if (nvc0->screen->compute) { BEGIN_NVC0(push, NVC0_CP(COND_ADDRESS_HIGH), 3); PUSH_DATAh(push, hq->bo->offset + hq->offset); PUSH_DATA (push, hq->bo->offset + hq->offset); PUSH_DATA (push, cond); } }