bool nv50_program_upload_code(struct nv50_context *nv50, struct nv50_program *prog) { struct nouveau_heap *heap; int ret; uint32_t size = align(prog->code_size, 0x40); switch (prog->type) { case PIPE_SHADER_VERTEX: heap = nv50->screen->vp_code_heap; break; case PIPE_SHADER_GEOMETRY: heap = nv50->screen->fp_code_heap; break; case PIPE_SHADER_FRAGMENT: heap = nv50->screen->gp_code_heap; break; default: assert(!"invalid program type"); return false; } ret = nouveau_heap_alloc(heap, size, prog, &prog->mem); if (ret) { /* Out of space: evict everything to compactify the code segment, hoping * the working set is much smaller and drifts slowly. Improve me ! */ while (heap->next) { struct nv50_program *evict = heap->next->priv; if (evict) nouveau_heap_free(&evict->mem); } debug_printf("WARNING: out of code space, evicting all shaders.\n"); ret = nouveau_heap_alloc(heap, size, prog, &prog->mem); if (ret) { NOUVEAU_ERR("shader too large (0x%x) to fit in code space ?\n", size); return false; } } prog->code_base = prog->mem->start; ret = nv50_tls_realloc(nv50->screen, prog->tls_space); if (ret < 0) { nouveau_heap_free(&prog->mem); return false; } if (ret > 0) nv50->state.new_tls_space = true; if (prog->fixups) nv50_ir_relocate_code(prog->fixups, prog->code, prog->code_base, 0, 0); nv50_sifc_linear_u8(&nv50->base, nv50->screen->code, (prog->type << NV50_CODE_BO_SIZE_LOG2) + prog->code_base, NOUVEAU_BO_VRAM, prog->code_size, prog->code); BEGIN_NV04(nv50->base.pushbuf, NV50_3D(CODE_CB_FLUSH), 1); PUSH_DATA (nv50->base.pushbuf, 0); return true; }
static void nv30_vertprog_destroy(struct nv30_vertprog *vp) { util_dynarray_fini(&vp->branch_relocs); nouveau_heap_free(&vp->exec); FREE(vp->insns); vp->insns = NULL; vp->nr_insns = 0; util_dynarray_fini(&vp->const_relocs); nouveau_heap_free(&vp->data); FREE(vp->consts); vp->consts = NULL; vp->nr_consts = 0; vp->translated = FALSE; }
static void nv30_render_destroy(struct vbuf_render *render) { struct nv30_render *r = nv30_render(render); if (r->transfer) pipe_buffer_unmap(&r->nv30->base.pipe, r->transfer); pipe_resource_reference(&r->buffer, NULL); nouveau_heap_free(&r->vertprog); FREE(render); }
static void nv30_query_object_del(struct nv30_screen *screen, struct nv30_query_object **po) { struct nv30_query_object *qo = *po; *po = NULL; if (qo) { volatile uint32_t *ntfy = nv30_ntfy(screen, qo); while (ntfy[3] & 0xff000000) { } nouveau_heap_free(&qo->hw); LIST_DEL(&qo->list); FREE(qo); } }
void nv50_fragprog_validate(struct nv50_context *nv50) { struct nouveau_pushbuf *push = nv50->base.pushbuf; struct nv50_program *fp = nv50->fragprog; struct pipe_rasterizer_state *rast = &nv50->rast->pipe; if (fp->fp.force_persample_interp != rast->force_persample_interp) { /* Force the program to be reuploaded, which will trigger interp fixups * to get applied */ if (fp->mem) nouveau_heap_free(&fp->mem); fp->fp.force_persample_interp = rast->force_persample_interp; } if (fp->mem && !(nv50->dirty_3d & (NV50_NEW_3D_FRAGPROG | NV50_NEW_3D_MIN_SAMPLES))) return; if (!nv50_program_validate(nv50, fp)) return; nv50_program_update_context_state(nv50, fp, 1); BEGIN_NV04(push, NV50_3D(FP_REG_ALLOC_TEMP), 1); PUSH_DATA (push, fp->max_gpr); BEGIN_NV04(push, NV50_3D(FP_RESULT_COUNT), 1); PUSH_DATA (push, fp->max_out); BEGIN_NV04(push, NV50_3D(FP_CONTROL), 1); PUSH_DATA (push, fp->fp.flags[0]); BEGIN_NV04(push, NV50_3D(FP_CTRL_UNK196C), 1); PUSH_DATA (push, fp->fp.flags[1]); BEGIN_NV04(push, NV50_3D(FP_START_ID), 1); PUSH_DATA (push, fp->code_base); if (nv50->screen->tesla->oclass >= NVA3_3D_CLASS) { BEGIN_NV04(push, SUBC_3D(NVA3_3D_FP_MULTISAMPLE), 1); if (nv50->min_samples > 1 || fp->fp.has_samplemask) PUSH_DATA(push, NVA3_3D_FP_MULTISAMPLE_FORCE_PER_SAMPLE | (NVA3_3D_FP_MULTISAMPLE_EXPORT_SAMPLE_MASK * fp->fp.has_samplemask)); else PUSH_DATA(push, 0); } }
void nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p) { const struct pipe_shader_state pipe = p->pipe; const ubyte type = p->type; if (p->mem) nouveau_heap_free(&p->mem); FREE(p->code); FREE(p->fixups); FREE(p->so); memset(p, 0, sizeof(*p)); p->pipe = pipe; p->type = type; }
static inline struct nouveau_heap * nv30_transfer_rect_vertprog(struct nv30_context *nv30) { struct nouveau_heap *heap = nv30->screen->vp_exec_heap; struct nouveau_heap *vp; vp = nv30->blit_vp; if (!vp) { if (nouveau_heap_alloc(heap, 2, &nv30->blit_vp, &nv30->blit_vp)) { while (heap->next && heap->size < 2) { struct nouveau_heap **evict = heap->next->priv; nouveau_heap_free(evict); } if (nouveau_heap_alloc(heap, 2, &nv30->blit_vp, &nv30->blit_vp)) return NULL; } vp = nv30->blit_vp; if (vp) { struct nouveau_pushbuf *push = nv30->base.pushbuf; BEGIN_NV04(push, NV30_3D(VP_UPLOAD_FROM_ID), 1); PUSH_DATA (push, vp->start); BEGIN_NV04(push, NV30_3D(VP_UPLOAD_INST(0)), 4); PUSH_DATA (push, 0x401f9c6c); /* mov o[hpos], a[0]; */ PUSH_DATA (push, 0x0040000d); PUSH_DATA (push, 0x8106c083); PUSH_DATA (push, 0x6041ff80); BEGIN_NV04(push, NV30_3D(VP_UPLOAD_INST(0)), 4); PUSH_DATA (push, 0x401f9c6c); /* mov o[tex0], a[8]; end; */ PUSH_DATA (push, 0x0040080d); PUSH_DATA (push, 0x8106c083); PUSH_DATA (push, 0x6041ff9d); } } return vp; }
static boolean nv30_render_validate(struct nv30_context *nv30) { struct nv30_render *r = nv30_render(nv30->draw->render); struct nv30_rasterizer_stateobj *rast = nv30->rast; struct pipe_screen *pscreen = &nv30->screen->base.base; struct nouveau_pushbuf *push = nv30->screen->base.pushbuf; struct nouveau_object *eng3d = nv30->screen->eng3d; struct nv30_vertprog *vp = nv30->vertprog.program; struct vertex_info *vinfo = &r->vertex_info; unsigned vp_attribs = 0; unsigned vp_results = 0; unsigned attrib = 0; unsigned pntc; int i; if (!r->vertprog) { struct nouveau_heap *heap = nv30_screen(pscreen)->vp_exec_heap; if (nouveau_heap_alloc(heap, 16, &r->vertprog, &r->vertprog)) { while (heap->next && heap->size < 16) { struct nouveau_heap **evict = heap->next->priv; nouveau_heap_free(evict); } if (nouveau_heap_alloc(heap, 16, &r->vertprog, &r->vertprog)) return FALSE; } } vinfo->num_attribs = 0; vinfo->size = 0; /* setup routing for all necessary vp outputs */ for (i = 0; i < vp->info.num_outputs && attrib < 16; i++) { uint semantic = vp->info.output_semantic_name[i]; uint index = vp->info.output_semantic_index[i]; if (vroute_add(r, attrib, semantic, &index)) { vp_attribs |= (1 << attrib++); vp_results |= index; } } /* setup routing for replaced point coords not written by vp */ if (rast && rast->pipe.point_quad_rasterization) pntc = rast->pipe.sprite_coord_enable & 0x000002ff; else pntc = 0; while (pntc && attrib < 16) { uint index = ffs(pntc) - 1; pntc &= ~(1 << index); if (vroute_add(r, attrib, TGSI_SEMANTIC_TEXCOORD, &index)) { vp_attribs |= (1 << attrib++); vp_results |= index; } } /* modify vertex format for correct stride, and stub out unused ones */ BEGIN_NV04(push, NV30_3D(VP_UPLOAD_FROM_ID), 1); PUSH_DATA (push, r->vertprog->start); r->vtxprog[attrib - 1][3] |= 1; for (i = 0; i < attrib; i++) { BEGIN_NV04(push, NV30_3D(VP_UPLOAD_INST(0)), 4); PUSH_DATAp(push, r->vtxprog[i], 4); r->vtxfmt[i] |= vinfo->size << 8; } for (; i < 16; i++) r->vtxfmt[i] = NV30_3D_VTXFMT_TYPE_V32_FLOAT; BEGIN_NV04(push, NV30_3D(VIEWPORT_TRANSLATE_X), 8); PUSH_DATAf(push, 0.0); PUSH_DATAf(push, 0.0); PUSH_DATAf(push, 0.0); PUSH_DATAf(push, 0.0); PUSH_DATAf(push, 1.0); PUSH_DATAf(push, 1.0); PUSH_DATAf(push, 1.0); PUSH_DATAf(push, 1.0); BEGIN_NV04(push, NV30_3D(DEPTH_RANGE_NEAR), 2); PUSH_DATAf(push, 0.0); PUSH_DATAf(push, 1.0); BEGIN_NV04(push, NV30_3D(VTXFMT(0)), 16); PUSH_DATAp(push, r->vtxfmt, 16); BEGIN_NV04(push, NV30_3D(VP_START_FROM_ID), 1); PUSH_DATA (push, r->vertprog->start); BEGIN_NV04(push, NV30_3D(ENGINE), 1); PUSH_DATA (push, 0x00000103); if (eng3d->oclass >= NV40_3D_CLASS) { BEGIN_NV04(push, NV40_3D(VP_ATTRIB_EN), 2); PUSH_DATA (push, vp_attribs); PUSH_DATA (push, vp_results); } vinfo->size /= 4; return TRUE; }
void nvc0_fragprog_validate(struct nvc0_context *nvc0) { struct nouveau_pushbuf *push = nvc0->base.pushbuf; struct nvc0_program *fp = nvc0->fragprog; struct pipe_rasterizer_state *rast = &nvc0->rast->pipe; if (fp->fp.force_persample_interp != rast->force_persample_interp) { /* Force the program to be reuploaded, which will trigger interp fixups * to get applied */ if (fp->mem) nouveau_heap_free(&fp->mem); fp->fp.force_persample_interp = rast->force_persample_interp; } /* Shade model works well enough when both colors follow it. However if one * (or both) is explicitly set, then we have to go the patching route. */ bool has_explicit_color = fp->fp.colors && (((fp->fp.colors & 1) && !fp->fp.color_interp[0]) || ((fp->fp.colors & 2) && !fp->fp.color_interp[1])); bool hwflatshade = false; if (has_explicit_color && fp->fp.flatshade != rast->flatshade) { /* Force re-upload */ if (fp->mem) nouveau_heap_free(&fp->mem); fp->fp.flatshade = rast->flatshade; /* Always smooth-shade in this mode, the shader will decide on its own * when to flat-shade. */ } else if (!has_explicit_color) { hwflatshade = rast->flatshade; /* No need to binary-patch the shader each time, make sure that it's set * up for the default behaviour. */ fp->fp.flatshade = 0; } if (hwflatshade != nvc0->state.flatshade) { nvc0->state.flatshade = hwflatshade; BEGIN_NVC0(push, NVC0_3D(SHADE_MODEL), 1); PUSH_DATA (push, hwflatshade ? NVC0_3D_SHADE_MODEL_FLAT : NVC0_3D_SHADE_MODEL_SMOOTH); } if (fp->mem && !(nvc0->dirty_3d & NVC0_NEW_3D_FRAGPROG)) { return; } if (!nvc0_program_validate(nvc0, fp)) return; nvc0_program_update_context_state(nvc0, fp, 4); if (fp->fp.early_z != nvc0->state.early_z_forced) { nvc0->state.early_z_forced = fp->fp.early_z; IMMED_NVC0(push, NVC0_3D(FORCE_EARLY_FRAGMENT_TESTS), fp->fp.early_z); } BEGIN_NVC0(push, NVC0_3D(SP_SELECT(5)), 2); PUSH_DATA (push, 0x51); PUSH_DATA (push, fp->code_base); BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(5)), 1); PUSH_DATA (push, fp->num_gprs); BEGIN_NVC0(push, SUBC_3D(0x0360), 2); PUSH_DATA (push, 0x20164010); PUSH_DATA (push, 0x20); BEGIN_NVC0(push, NVC0_3D(ZCULL_TEST_MASK), 1); PUSH_DATA (push, fp->flags[0]); }
void nv30_vertprog_validate(struct nv30_context *nv30) { struct nouveau_pushbuf *push = nv30->base.pushbuf; struct nouveau_object *eng3d = nv30->screen->eng3d; struct nv30_vertprog *vp = nv30->vertprog.program; struct nv30_fragprog *fp = nv30->fragprog.program; boolean upload_code = FALSE; boolean upload_data = FALSE; unsigned i; if (nv30->dirty & NV30_NEW_FRAGPROG) { if (memcmp(vp->texcoord, fp->texcoord, sizeof(vp->texcoord))) { if (vp->translated) nv30_vertprog_destroy(vp); memcpy(vp->texcoord, fp->texcoord, sizeof(vp->texcoord)); } } if (nv30->rast && nv30->rast->pipe.clip_plane_enable != vp->enabled_ucps) { vp->enabled_ucps = nv30->rast->pipe.clip_plane_enable; if (vp->translated) nv30_vertprog_destroy(vp); } if (!vp->translated) { vp->translated = _nvfx_vertprog_translate(eng3d->oclass, vp); if (!vp->translated) { nv30->draw_flags |= NV30_NEW_VERTPROG; return; } nv30->dirty |= NV30_NEW_VERTPROG; } if (!vp->exec) { struct nouveau_heap *heap = nv30->screen->vp_exec_heap; struct nv30_shader_reloc *reloc = vp->branch_relocs.data; unsigned nr_reloc = vp->branch_relocs.size / sizeof(*reloc); uint32_t *inst, target; if (nouveau_heap_alloc(heap, vp->nr_insns, &vp->exec, &vp->exec)) { while (heap->next && heap->size < vp->nr_insns) { struct nouveau_heap **evict = heap->next->priv; nouveau_heap_free(evict); } if (nouveau_heap_alloc(heap, vp->nr_insns, &vp->exec, &vp->exec)) { nv30->draw_flags |= NV30_NEW_VERTPROG; return; } } if (eng3d->oclass < NV40_3D_CLASS) { while (nr_reloc--) { inst = vp->insns[reloc->location].data; target = vp->exec->start + reloc->target; inst[2] &= ~0x000007fc; inst[2] |= target << 2; reloc++; } } else { while (nr_reloc--) { inst = vp->insns[reloc->location].data; target = vp->exec->start + reloc->target; inst[2] &= ~0x0000003f; inst[2] |= target >> 3; inst[3] &= ~0xe0000000; inst[3] |= target << 29; reloc++; } } upload_code = TRUE; } if (vp->nr_consts && !vp->data) { struct nouveau_heap *heap = nv30->screen->vp_data_heap; struct nv30_shader_reloc *reloc = vp->const_relocs.data; unsigned nr_reloc = vp->const_relocs.size / sizeof(*reloc); uint32_t *inst, target; if (nouveau_heap_alloc(heap, vp->nr_consts, vp, &vp->data)) { while (heap->next && heap->size < vp->nr_consts) { struct nv30_vertprog *evp = heap->next->priv; nouveau_heap_free(&evp->data); } if (nouveau_heap_alloc(heap, vp->nr_consts, vp, &vp->data)) { nv30->draw_flags |= NV30_NEW_VERTPROG; return; } } if (eng3d->oclass < NV40_3D_CLASS) { while (nr_reloc--) { inst = vp->insns[reloc->location].data; target = vp->data->start + reloc->target; inst[1] &= ~0x0007fc000; inst[1] |= (target & 0x1ff) << 14; reloc++; } } else { while (nr_reloc--) { inst = vp->insns[reloc->location].data; target = vp->data->start + reloc->target; inst[1] &= ~0x0001ff000; inst[1] |= (target & 0x1ff) << 12; reloc++; } } upload_code = TRUE; upload_data = TRUE; } if (vp->nr_consts) { struct nv04_resource *res = nv04_resource(nv30->vertprog.constbuf); for (i = 0; i < vp->nr_consts; i++) { struct nv30_vertprog_data *data = &vp->consts[i]; if (data->index < 0) { if (!upload_data) continue; } else { float *constbuf = (float *)res->data; if (!upload_data && !memcmp(data->value, &constbuf[data->index * 4], 16)) continue; memcpy(data->value, &constbuf[data->index * 4], 16); } BEGIN_NV04(push, NV30_3D(VP_UPLOAD_CONST_ID), 5); PUSH_DATA (push, vp->data->start + i); PUSH_DATAp(push, data->value, 4); } } if (upload_code) { BEGIN_NV04(push, NV30_3D(VP_UPLOAD_FROM_ID), 1); PUSH_DATA (push, vp->exec->start); for (i = 0; i < vp->nr_insns; i++) { BEGIN_NV04(push, NV30_3D(VP_UPLOAD_INST(0)), 4); PUSH_DATAp(push, vp->insns[i].data, 4); } } if (nv30->dirty & (NV30_NEW_VERTPROG | NV30_NEW_FRAGPROG)) { BEGIN_NV04(push, NV30_3D(VP_START_FROM_ID), 1); PUSH_DATA (push, vp->exec->start); if (eng3d->oclass < NV40_3D_CLASS) { BEGIN_NV04(push, NV30_3D(ENGINE), 1); PUSH_DATA (push, 0x00000013); /* vp instead of ff, somehow */ } else { BEGIN_NV04(push, NV40_3D(VP_ATTRIB_EN), 2); PUSH_DATA (push, vp->ir); PUSH_DATA (push, vp->or | fp->vp_or); BEGIN_NV04(push, NV30_3D(ENGINE), 1); PUSH_DATA (push, 0x00000011); } } }