static inline void nvc0_program_update_context_state(struct nvc0_context *nvc0, struct nvc0_program *prog, int stage) { struct nouveau_pushbuf *push = nvc0->base.pushbuf; if (prog && prog->need_tls) { const uint32_t flags = NV_VRAM_DOMAIN(&nvc0->screen->base) | NOUVEAU_BO_RDWR; if (!nvc0->state.tls_required) BCTX_REFN_bo(nvc0->bufctx_3d, 3D_TLS, flags, nvc0->screen->tls); nvc0->state.tls_required |= 1 << stage; } else { if (nvc0->state.tls_required == (1 << stage)) nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_TLS); nvc0->state.tls_required &= ~(1 << stage); } if (prog && prog->immd_size) { BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); /* NOTE: may overlap code of a different shader */ PUSH_DATA (push, align(prog->immd_size, 0x100)); PUSH_DATAh(push, nvc0->screen->text->offset + prog->immd_base); PUSH_DATA (push, nvc0->screen->text->offset + prog->immd_base); BEGIN_NVC0(push, NVC0_3D(CB_BIND(stage)), 1); PUSH_DATA (push, (14 << 4) | 1); nvc0->state.c14_bound |= 1 << stage; } else if (nvc0->state.c14_bound & (1 << stage)) { BEGIN_NVC0(push, NVC0_3D(CB_BIND(stage)), 1); PUSH_DATA (push, (14 << 4) | 0); nvc0->state.c14_bound &= ~(1 << stage); } }
static void nvc0_compute_validate_constbufs(struct nvc0_context *nvc0) { struct nouveau_pushbuf *push = nvc0->base.pushbuf; const int s = 5; while (nvc0->constbuf_dirty[s]) { int i = ffs(nvc0->constbuf_dirty[s]) - 1; nvc0->constbuf_dirty[s] &= ~(1 << i); if (nvc0->constbuf[s][i].user) { struct nouveau_bo *bo = nvc0->screen->uniform_bo; const unsigned base = NVC0_CB_USR_INFO(s); const unsigned size = nvc0->constbuf[s][0].size; assert(i == 0); /* we really only want OpenGL uniforms here */ assert(nvc0->constbuf[s][0].u.data); if (nvc0->state.uniform_buffer_bound[s] < size) { nvc0->state.uniform_buffer_bound[s] = align(size, 0x100); BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3); PUSH_DATA (push, nvc0->state.uniform_buffer_bound[s]); PUSH_DATAh(push, bo->offset + base); PUSH_DATA (push, bo->offset + base); BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1); PUSH_DATA (push, (0 << 8) | 1); } nvc0_cb_bo_push(&nvc0->base, bo, NV_VRAM_DOMAIN(&nvc0->screen->base), base, nvc0->state.uniform_buffer_bound[s], 0, (size + 3) / 4, nvc0->constbuf[s][0].u.data); } else { struct nv04_resource *res = nv04_resource(nvc0->constbuf[s][i].u.buf); if (res) { BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3); PUSH_DATA (push, nvc0->constbuf[s][i].size); PUSH_DATAh(push, res->address + nvc0->constbuf[s][i].offset); PUSH_DATA (push, res->address + nvc0->constbuf[s][i].offset); BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1); PUSH_DATA (push, (i << 8) | 1); BCTX_REFN(nvc0->bufctx_cp, CP_CB(i), res, RD); res->cb_bindings[s] |= 1 << i; } else { BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1); PUSH_DATA (push, (i << 8) | 0); } if (i == 0) nvc0->state.uniform_buffer_bound[s] = 0; } } nvc0_compute_invalidate_constbufs(nvc0); BEGIN_NVC0(push, NVC0_CP(FLUSH), 1); PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CB); }
struct pipe_context * nvc0_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags) { struct nvc0_screen *screen = nvc0_screen(pscreen); struct nvc0_context *nvc0; struct pipe_context *pipe; int ret; uint32_t flags; nvc0 = CALLOC_STRUCT(nvc0_context); if (!nvc0) return NULL; pipe = &nvc0->base.pipe; if (!nvc0_blitctx_create(nvc0)) goto out_err; nvc0->base.pushbuf = screen->base.pushbuf; nvc0->base.client = screen->base.client; ret = nouveau_bufctx_new(screen->base.client, 2, &nvc0->bufctx); if (!ret) ret = nouveau_bufctx_new(screen->base.client, NVC0_BIND_3D_COUNT, &nvc0->bufctx_3d); if (!ret) ret = nouveau_bufctx_new(screen->base.client, NVC0_BIND_CP_COUNT, &nvc0->bufctx_cp); if (ret) goto out_err; nvc0->screen = screen; nvc0->base.screen = &screen->base; pipe->screen = pscreen; pipe->priv = priv; pipe->destroy = nvc0_destroy; pipe->draw_vbo = nvc0_draw_vbo; pipe->clear = nvc0_clear; pipe->launch_grid = (nvc0->screen->base.class_3d >= NVE4_3D_CLASS) ? nve4_launch_grid : nvc0_launch_grid; pipe->flush = nvc0_flush; pipe->texture_barrier = nvc0_texture_barrier; pipe->memory_barrier = nvc0_memory_barrier; pipe->get_sample_position = nvc0_context_get_sample_position; nouveau_context_init(&nvc0->base); nvc0_init_query_functions(nvc0); nvc0_init_surface_functions(nvc0); nvc0_init_state_functions(nvc0); nvc0_init_transfer_functions(nvc0); nvc0_init_resource_functions(pipe); nvc0->base.invalidate_resource_storage = nvc0_invalidate_resource_storage; pipe->create_video_codec = nvc0_create_decoder; pipe->create_video_buffer = nvc0_video_buffer_create; /* shader builtin library is per-screen, but we need a context for m2mf */ nvc0_program_library_upload(nvc0); nvc0_program_init_tcp_empty(nvc0); if (!nvc0->tcp_empty) goto out_err; /* set the empty tctl prog on next draw in case one is never set */ nvc0->dirty |= NVC0_NEW_TCTLPROG; /* now that there are no more opportunities for errors, set the current * context if there isn't already one. */ if (!screen->cur_ctx) { nvc0->state = screen->save_state; screen->cur_ctx = nvc0; nouveau_pushbuf_bufctx(screen->base.pushbuf, nvc0->bufctx); } screen->base.pushbuf->kick_notify = nvc0_default_kick_notify; /* add permanently resident buffers to bufctxts */ flags = NV_VRAM_DOMAIN(&screen->base) | NOUVEAU_BO_RD; BCTX_REFN_bo(nvc0->bufctx_3d, SCREEN, flags, screen->text); BCTX_REFN_bo(nvc0->bufctx_3d, SCREEN, flags, screen->uniform_bo); BCTX_REFN_bo(nvc0->bufctx_3d, SCREEN, flags, screen->txc); if (screen->compute) { BCTX_REFN_bo(nvc0->bufctx_cp, CP_SCREEN, flags, screen->text); BCTX_REFN_bo(nvc0->bufctx_cp, CP_SCREEN, flags, screen->txc); BCTX_REFN_bo(nvc0->bufctx_cp, CP_SCREEN, flags, screen->parm); } flags = NV_VRAM_DOMAIN(&screen->base) | NOUVEAU_BO_RDWR; if (screen->poly_cache) BCTX_REFN_bo(nvc0->bufctx_3d, SCREEN, flags, screen->poly_cache); if (screen->compute) BCTX_REFN_bo(nvc0->bufctx_cp, CP_SCREEN, flags, screen->tls); flags = NOUVEAU_BO_GART | NOUVEAU_BO_WR; BCTX_REFN_bo(nvc0->bufctx_3d, SCREEN, flags, screen->fence.bo); BCTX_REFN_bo(nvc0->bufctx, FENCE, flags, screen->fence.bo); if (screen->compute) BCTX_REFN_bo(nvc0->bufctx_cp, CP_SCREEN, flags, screen->fence.bo); nvc0->base.scratch.bo_size = 2 << 20; memset(nvc0->tex_handles, ~0, sizeof(nvc0->tex_handles)); util_dynarray_init(&nvc0->global_residents); return pipe; out_err: if (nvc0) { if (nvc0->bufctx_3d) nouveau_bufctx_del(&nvc0->bufctx_3d); if (nvc0->bufctx_cp) nouveau_bufctx_del(&nvc0->bufctx_cp); if (nvc0->bufctx) nouveau_bufctx_del(&nvc0->bufctx); FREE(nvc0->blit); FREE(nvc0); } return NULL; }
struct pipe_resource * nouveau_buffer_create(struct pipe_screen *pscreen, const struct pipe_resource *templ) { struct nouveau_screen *screen = nouveau_screen(pscreen); struct nv04_resource *buffer; bool ret; buffer = CALLOC_STRUCT(nv04_resource); if (!buffer) return NULL; buffer->base = *templ; buffer->vtbl = &nouveau_buffer_vtbl; pipe_reference_init(&buffer->base.reference, 1); buffer->base.screen = pscreen; if (buffer->base.flags & (PIPE_RESOURCE_FLAG_MAP_PERSISTENT | PIPE_RESOURCE_FLAG_MAP_COHERENT)) { buffer->domain = NOUVEAU_BO_GART; } else if (buffer->base.bind == 0 || (buffer->base.bind & (screen->vidmem_bindings & screen->sysmem_bindings))) { switch (buffer->base.usage) { case PIPE_USAGE_DEFAULT: case PIPE_USAGE_IMMUTABLE: buffer->domain = NV_VRAM_DOMAIN(screen); break; case PIPE_USAGE_DYNAMIC: /* For most apps, we'd have to do staging transfers to avoid sync * with this usage, and GART -> GART copies would be suboptimal. */ buffer->domain = NV_VRAM_DOMAIN(screen); break; case PIPE_USAGE_STAGING: case PIPE_USAGE_STREAM: buffer->domain = NOUVEAU_BO_GART; break; default: assert(0); break; } } else { if (buffer->base.bind & screen->vidmem_bindings) buffer->domain = NV_VRAM_DOMAIN(screen); else if (buffer->base.bind & screen->sysmem_bindings) buffer->domain = NOUVEAU_BO_GART; } /* There can be very special situations where we want non-gpu-mapped * buffers, but never through this interface. */ assert(buffer->domain); ret = nouveau_buffer_allocate(screen, buffer, buffer->domain); if (ret == false) goto fail; if (buffer->domain == NOUVEAU_BO_VRAM && screen->hint_buf_keep_sysmem_copy) nouveau_buffer_cache(NULL, buffer); NOUVEAU_DRV_STAT(screen, buf_obj_current_count, 1); util_range_init(&buffer->valid_buffer_range); return &buffer->base; fail: FREE(buffer); return NULL; }
int nvc0_screen_compute_setup(struct nvc0_screen *screen, struct nouveau_pushbuf *push) { struct nouveau_object *chan = screen->base.channel; struct nouveau_device *dev = screen->base.device; uint32_t obj_class; int ret; int i; switch (dev->chipset & ~0xf) { case 0xc0: case 0xd0: /* In theory, GF110+ should also support NVC8_COMPUTE_CLASS but, * in practice, a ILLEGAL_CLASS dmesg fail appears when using it. */ obj_class = NVC0_COMPUTE_CLASS; break; default: NOUVEAU_ERR("unsupported chipset: NV%02x\n", dev->chipset); return -1; } ret = nouveau_object_new(chan, 0xbeef90c0, obj_class, NULL, 0, &screen->compute); if (ret) { NOUVEAU_ERR("Failed to allocate compute object: %d\n", ret); return ret; } ret = nouveau_bo_new(dev, NV_VRAM_DOMAIN(&screen->base), 0, 1 << 12, NULL, &screen->parm); if (ret) return ret; BEGIN_NVC0(push, SUBC_COMPUTE(NV01_SUBCHAN_OBJECT), 1); PUSH_DATA (push, screen->compute->oclass); /* hardware limit */ BEGIN_NVC0(push, NVC0_COMPUTE(MP_LIMIT), 1); PUSH_DATA (push, screen->mp_count); BEGIN_NVC0(push, NVC0_COMPUTE(CALL_LIMIT_LOG), 1); PUSH_DATA (push, 0xf); BEGIN_NVC0(push, SUBC_COMPUTE(0x02a0), 1); PUSH_DATA (push, 0x8000); /* global memory setup */ BEGIN_NVC0(push, SUBC_COMPUTE(0x02c4), 1); PUSH_DATA (push, 0); BEGIN_NIC0(push, NVC0_COMPUTE(GLOBAL_BASE), 0x100); for (i = 0; i <= 0xff; i++) PUSH_DATA (push, (0xc << 28) | (i << 16) | i); BEGIN_NVC0(push, SUBC_COMPUTE(0x02c4), 1); PUSH_DATA (push, 1); /* local memory and cstack setup */ BEGIN_NVC0(push, NVC0_COMPUTE(TEMP_ADDRESS_HIGH), 2); PUSH_DATAh(push, screen->tls->offset); PUSH_DATA (push, screen->tls->offset); BEGIN_NVC0(push, NVC0_COMPUTE(TEMP_SIZE_HIGH), 2); PUSH_DATAh(push, screen->tls->size); PUSH_DATA (push, screen->tls->size); BEGIN_NVC0(push, NVC0_COMPUTE(WARP_TEMP_ALLOC), 1); PUSH_DATA (push, 0); BEGIN_NVC0(push, NVC0_COMPUTE(LOCAL_BASE), 1); PUSH_DATA (push, 1 << 24); /* shared memory setup */ BEGIN_NVC0(push, NVC0_COMPUTE(CACHE_SPLIT), 1); PUSH_DATA (push, NVC0_COMPUTE_CACHE_SPLIT_48K_SHARED_16K_L1); BEGIN_NVC0(push, NVC0_COMPUTE(SHARED_BASE), 1); PUSH_DATA (push, 2 << 24); BEGIN_NVC0(push, NVC0_COMPUTE(SHARED_SIZE), 1); PUSH_DATA (push, 0); /* code segment setup */ BEGIN_NVC0(push, NVC0_COMPUTE(CODE_ADDRESS_HIGH), 2); PUSH_DATAh(push, screen->text->offset); PUSH_DATA (push, screen->text->offset); /* TODO: textures & samplers */ return 0; }