int nvc0_screen_compute_setup(struct nvc0_screen *screen, struct nouveau_pushbuf *push) { struct nouveau_object *chan = screen->base.channel; struct nouveau_device *dev = screen->base.device; uint32_t obj_class; int ret; int i; switch (dev->chipset & ~0xf) { case 0xc0: case 0xd0: /* In theory, GF110+ should also support NVC8_COMPUTE_CLASS but, * in practice, a ILLEGAL_CLASS dmesg fail appears when using it. */ obj_class = NVC0_COMPUTE_CLASS; break; default: NOUVEAU_ERR("unsupported chipset: NV%02x\n", dev->chipset); return -1; } ret = nouveau_object_new(chan, 0xbeef90c0, obj_class, NULL, 0, &screen->compute); if (ret) { NOUVEAU_ERR("Failed to allocate compute object: %d\n", ret); return ret; } ret = nouveau_bo_new(dev, NV_VRAM_DOMAIN(&screen->base), 0, 1 << 12, NULL, &screen->parm); if (ret) return ret; BEGIN_NVC0(push, SUBC_COMPUTE(NV01_SUBCHAN_OBJECT), 1); PUSH_DATA (push, screen->compute->oclass); /* hardware limit */ BEGIN_NVC0(push, NVC0_COMPUTE(MP_LIMIT), 1); PUSH_DATA (push, screen->mp_count); BEGIN_NVC0(push, NVC0_COMPUTE(CALL_LIMIT_LOG), 1); PUSH_DATA (push, 0xf); BEGIN_NVC0(push, SUBC_COMPUTE(0x02a0), 1); PUSH_DATA (push, 0x8000); /* global memory setup */ BEGIN_NVC0(push, SUBC_COMPUTE(0x02c4), 1); PUSH_DATA (push, 0); BEGIN_NIC0(push, NVC0_COMPUTE(GLOBAL_BASE), 0x100); for (i = 0; i <= 0xff; i++) PUSH_DATA (push, (0xc << 28) | (i << 16) | i); BEGIN_NVC0(push, SUBC_COMPUTE(0x02c4), 1); PUSH_DATA (push, 1); /* local memory and cstack setup */ BEGIN_NVC0(push, NVC0_COMPUTE(TEMP_ADDRESS_HIGH), 2); PUSH_DATAh(push, screen->tls->offset); PUSH_DATA (push, screen->tls->offset); BEGIN_NVC0(push, NVC0_COMPUTE(TEMP_SIZE_HIGH), 2); PUSH_DATAh(push, screen->tls->size); PUSH_DATA (push, screen->tls->size); BEGIN_NVC0(push, NVC0_COMPUTE(WARP_TEMP_ALLOC), 1); PUSH_DATA (push, 0); BEGIN_NVC0(push, NVC0_COMPUTE(LOCAL_BASE), 1); PUSH_DATA (push, 1 << 24); /* shared memory setup */ BEGIN_NVC0(push, NVC0_COMPUTE(CACHE_SPLIT), 1); PUSH_DATA (push, NVC0_COMPUTE_CACHE_SPLIT_48K_SHARED_16K_L1); BEGIN_NVC0(push, NVC0_COMPUTE(SHARED_BASE), 1); PUSH_DATA (push, 2 << 24); BEGIN_NVC0(push, NVC0_COMPUTE(SHARED_SIZE), 1); PUSH_DATA (push, 0); /* code segment setup */ BEGIN_NVC0(push, NVC0_COMPUTE(CODE_ADDRESS_HIGH), 2); PUSH_DATAh(push, screen->text->offset); PUSH_DATA (push, screen->text->offset); /* TODO: textures & samplers */ return 0; }
struct pipe_screen * nv30_screen_create(struct nouveau_device *dev) { struct nv30_screen *screen = CALLOC_STRUCT(nv30_screen); struct pipe_screen *pscreen; struct nouveau_pushbuf *push; struct nv04_fifo *fifo; unsigned oclass = 0; int ret, i; if (!screen) return NULL; switch (dev->chipset & 0xf0) { case 0x30: if (RANKINE_0397_CHIPSET & (1 << (dev->chipset & 0x0f))) oclass = NV30_3D_CLASS; else if (RANKINE_0697_CHIPSET & (1 << (dev->chipset & 0x0f))) oclass = NV34_3D_CLASS; else if (RANKINE_0497_CHIPSET & (1 << (dev->chipset & 0x0f))) oclass = NV35_3D_CLASS; break; case 0x40: if (CURIE_4097_CHIPSET & (1 << (dev->chipset & 0x0f))) oclass = NV40_3D_CLASS; else if (CURIE_4497_CHIPSET & (1 << (dev->chipset & 0x0f))) oclass = NV44_3D_CLASS; break; case 0x60: if (CURIE_4497_CHIPSET6X & (1 << (dev->chipset & 0x0f))) oclass = NV44_3D_CLASS; break; default: break; } if (!oclass) { NOUVEAU_ERR("unknown 3d class for 0x%02x\n", dev->chipset); FREE(screen); return NULL; } /* * Some modern apps try to use msaa without keeping in mind the * restrictions on videomem of older cards. Resulting in dmesg saying: * [ 1197.850642] nouveau E[soffice.bin[3785]] fail ttm_validate * [ 1197.850648] nouveau E[soffice.bin[3785]] validating bo list * [ 1197.850654] nouveau E[soffice.bin[3785]] validate: -12 * * Because we are running out of video memory, after which the program * using the msaa visual freezes, and eventually the entire system freezes. * * To work around this we do not allow msaa visauls by default and allow * the user to override this via NV30_MAX_MSAA. */ screen->max_sample_count = debug_get_num_option("NV30_MAX_MSAA", 0); if (screen->max_sample_count > 4) screen->max_sample_count = 4; pscreen = &screen->base.base; pscreen->destroy = nv30_screen_destroy; pscreen->get_param = nv30_screen_get_param; pscreen->get_paramf = nv30_screen_get_paramf; pscreen->get_shader_param = nv30_screen_get_shader_param; pscreen->context_create = nv30_context_create; pscreen->is_format_supported = nv30_screen_is_format_supported; nv30_resource_screen_init(pscreen); nouveau_screen_init_vdec(&screen->base); screen->base.fence.emit = nv30_screen_fence_emit; screen->base.fence.update = nv30_screen_fence_update; ret = nouveau_screen_init(&screen->base, dev); if (ret) FAIL_SCREEN_INIT("nv30_screen_init failed: %d\n", ret); screen->base.vidmem_bindings |= PIPE_BIND_VERTEX_BUFFER; screen->base.sysmem_bindings |= PIPE_BIND_VERTEX_BUFFER; if (oclass == NV40_3D_CLASS) { screen->base.vidmem_bindings |= PIPE_BIND_INDEX_BUFFER; screen->base.sysmem_bindings |= PIPE_BIND_INDEX_BUFFER; } fifo = screen->base.channel->data; push = screen->base.pushbuf; push->rsvd_kick = 16; ret = nouveau_object_new(screen->base.channel, 0x00000000, NV01_NULL_CLASS, NULL, 0, &screen->null); if (ret) FAIL_SCREEN_INIT("error allocating null object: %d\n", ret); /* DMA_FENCE refuses to accept DMA objects with "adjust" filled in, * this means that the address pointed at by the DMA object must * be 4KiB aligned, which means this object needs to be the first * one allocated on the channel. */ ret = nouveau_object_new(screen->base.channel, 0xbeef1e00, NOUVEAU_NOTIFIER_CLASS, &(struct nv04_notify) { .length = 32 }, sizeof(struct nv04_notify),
void nve4_set_surface_info(struct nouveau_pushbuf *push, struct pipe_surface *psf, struct nvc0_screen *screen) { struct nv50_surface *sf = nv50_surface(psf); struct nv04_resource *res; uint64_t address; uint32_t *const info = push->cur; uint8_t log2cpp; if (psf && !nve4_su_format_map[psf->format]) NOUVEAU_ERR("unsupported surface format, try is_format_supported() !\n"); push->cur += 16; if (!psf || !nve4_su_format_map[psf->format]) { memset(info, 0, 16 * sizeof(*info)); info[0] = 0xbadf0000; info[1] = 0x80004000; info[12] = nve4_suldp_lib_offset[PIPE_FORMAT_R32G32B32A32_UINT] + screen->lib_code->start; return; } res = nv04_resource(sf->base.texture); address = res->address + sf->offset; info[8] = sf->width; info[9] = sf->height; info[10] = sf->depth; switch (res->base.target) { case PIPE_TEXTURE_1D_ARRAY: info[11] = 1; break; case PIPE_TEXTURE_2D: case PIPE_TEXTURE_RECT: info[11] = 2; break; case PIPE_TEXTURE_3D: info[11] = 3; break; case PIPE_TEXTURE_2D_ARRAY: case PIPE_TEXTURE_CUBE: case PIPE_TEXTURE_CUBE_ARRAY: info[11] = 4; break; default: info[11] = 0; break; } log2cpp = (0xf000 & nve4_su_format_aux_map[sf->base.format]) >> 12; info[12] = nve4_suldp_lib_offset[sf->base.format] + screen->lib_code->start; /* limit in bytes for raw access */ info[13] = (0x06 << 22) | ((sf->width << log2cpp) - 1); info[1] = nve4_su_format_map[sf->base.format]; #if 0 switch (util_format_get_blocksizebits(sf->base.format)) { case 16: info[1] |= 1 << 16; break; case 32: info[1] |= 2 << 16; break; case 64: info[1] |= 3 << 16; break; case 128: info[1] |= 4 << 16; break; default: break; } #else info[1] |= log2cpp << 16; info[1] |= 0x4000; info[1] |= (0x0f00 & nve4_su_format_aux_map[sf->base.format]); #endif if (res->base.target == PIPE_BUFFER) { info[0] = address >> 8; info[2] = sf->width - 1; info[2] |= (0xff & nve4_su_format_aux_map[sf->base.format]) << 22; info[3] = 0; info[4] = 0; info[5] = 0; info[6] = 0; info[7] = 0; info[14] = 0; info[15] = 0; } else {
struct pipe_screen * nv30_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) { struct nv30_screen *screen = CALLOC_STRUCT(nv30_screen); struct nouveau_channel *chan; struct pipe_screen *pscreen; struct nouveau_stateobj *so; unsigned rankine_class = 0; int ret, i; if (!screen) return NULL; pscreen = &screen->base.base; ret = nouveau_screen_init(&screen->base, dev); if (ret) { nv30_screen_destroy(pscreen); return NULL; } chan = screen->base.channel; pscreen->winsys = ws; pscreen->destroy = nv30_screen_destroy; pscreen->get_param = nv30_screen_get_param; pscreen->get_paramf = nv30_screen_get_paramf; pscreen->is_format_supported = nv30_screen_surface_format_supported; pscreen->context_create = nv30_create; nv30_screen_init_miptree_functions(pscreen); nv30_screen_init_transfer_functions(pscreen); /* 3D object */ switch (dev->chipset & 0xf0) { case 0x30: if (NV30TCL_CHIPSET_3X_MASK & (1 << (dev->chipset & 0x0f))) rankine_class = 0x0397; else if (NV34TCL_CHIPSET_3X_MASK & (1 << (dev->chipset & 0x0f))) rankine_class = 0x0697; else if (NV35TCL_CHIPSET_3X_MASK & (1 << (dev->chipset & 0x0f))) rankine_class = 0x0497; break; default: break; } if (!rankine_class) { NOUVEAU_ERR("Unknown nv3x chipset: nv%02x\n", dev->chipset); return NULL; } ret = nouveau_grobj_alloc(chan, 0xbeef3097, rankine_class, &screen->rankine); if (ret) { NOUVEAU_ERR("Error creating 3D object: %d\n", ret); return FALSE; } /* 2D engine setup */ screen->eng2d = nv04_surface_2d_init(&screen->base); screen->eng2d->buf = nv30_surface_buffer; /* Notifier for sync purposes */ ret = nouveau_notifier_alloc(chan, 0xbeef0301, 1, &screen->sync); if (ret) { NOUVEAU_ERR("Error creating notifier object: %d\n", ret); nv30_screen_destroy(pscreen); return NULL; } /* Query objects */ ret = nouveau_notifier_alloc(chan, 0xbeef0302, 32, &screen->query); if (ret) { NOUVEAU_ERR("Error initialising query objects: %d\n", ret); nv30_screen_destroy(pscreen); return NULL; } ret = nouveau_resource_init(&screen->query_heap, 0, 32); if (ret) { NOUVEAU_ERR("Error initialising query object heap: %d\n", ret); nv30_screen_destroy(pscreen); return NULL; } /* Vtxprog resources */ if (nouveau_resource_init(&screen->vp_exec_heap, 0, 256) || nouveau_resource_init(&screen->vp_data_heap, 0, 256)) { nv30_screen_destroy(pscreen); return NULL; } /* Static rankine initialisation */ so = so_new(36, 60, 0); so_method(so, screen->rankine, NV34TCL_DMA_NOTIFY, 1); so_data (so, screen->sync->handle); so_method(so, screen->rankine, NV34TCL_DMA_TEXTURE0, 2); so_data (so, chan->vram->handle); so_data (so, chan->gart->handle); so_method(so, screen->rankine, NV34TCL_DMA_COLOR1, 1); so_data (so, chan->vram->handle); so_method(so, screen->rankine, NV34TCL_DMA_COLOR0, 2); so_data (so, chan->vram->handle); so_data (so, chan->vram->handle); so_method(so, screen->rankine, NV34TCL_DMA_VTXBUF0, 2); so_data (so, chan->vram->handle); so_data (so, chan->gart->handle); /* so_method(so, screen->rankine, NV34TCL_DMA_FENCE, 2); so_data (so, 0); so_data (so, screen->query->handle);*/ so_method(so, screen->rankine, NV34TCL_DMA_IN_MEMORY7, 1); so_data (so, chan->vram->handle); so_method(so, screen->rankine, NV34TCL_DMA_IN_MEMORY8, 1); so_data (so, chan->vram->handle); for (i=1; i<8; i++) { so_method(so, screen->rankine, NV34TCL_VIEWPORT_CLIP_HORIZ(i), 1); so_data (so, 0); so_method(so, screen->rankine, NV34TCL_VIEWPORT_CLIP_VERT(i), 1); so_data (so, 0); } so_method(so, screen->rankine, 0x220, 1); so_data (so, 1); so_method(so, screen->rankine, 0x03b0, 1); so_data (so, 0x00100000); so_method(so, screen->rankine, 0x1454, 1); so_data (so, 0); so_method(so, screen->rankine, 0x1d80, 1); so_data (so, 3); so_method(so, screen->rankine, 0x1450, 1); so_data (so, 0x00030004); /* NEW */ so_method(so, screen->rankine, 0x1e98, 1); so_data (so, 0); so_method(so, screen->rankine, 0x17e0, 3); so_data (so, fui(0.0)); so_data (so, fui(0.0)); so_data (so, fui(1.0)); so_method(so, screen->rankine, 0x1f80, 16); for (i=0; i<16; i++) { so_data (so, (i==8) ? 0x0000ffff : 0); } so_method(so, screen->rankine, 0x120, 3); so_data (so, 0); so_data (so, 1); so_data (so, 2); so_method(so, screen->rankine, 0x1d88, 1); so_data (so, 0x00001200); so_method(so, screen->rankine, NV34TCL_RC_ENABLE, 1); so_data (so, 0); so_method(so, screen->rankine, NV34TCL_DEPTH_RANGE_NEAR, 2); so_data (so, fui(0.0)); so_data (so, fui(1.0)); so_method(so, screen->rankine, NV34TCL_MULTISAMPLE_CONTROL, 1); so_data (so, 0xffff0000); /* enables use of vp rather than fixed-function somehow */ so_method(so, screen->rankine, 0x1e94, 1); so_data (so, 0x13); so_emit(chan, so); so_ref(NULL, &so); nouveau_pushbuf_flush(chan, 0); return pscreen; }
void nve4_mp_pm_query_begin(struct nvc0_context *nvc0, struct nvc0_query *q) { struct nvc0_screen *screen = nvc0->screen; struct nouveau_pushbuf *push = nvc0->base.pushbuf; const struct nve4_mp_pm_query_cfg *cfg; unsigned i, c; unsigned num_ab[2] = { 0, 0 }; cfg = &nve4_mp_pm_queries[q->type - PIPE_QUERY_DRIVER_SPECIFIC]; /* check if we have enough free counter slots */ for (i = 0; i < cfg->num_counters; ++i) num_ab[cfg->ctr[i].sig_dom]++; if (screen->pm.num_mp_pm_active[0] + num_ab[0] > 4 || screen->pm.num_mp_pm_active[1] + num_ab[1] > 4) { NOUVEAU_ERR("Not enough free MP counter slots !\n"); return; } assert(cfg->num_counters <= 4); PUSH_SPACE(push, 4 * 8 + 6); if (!screen->pm.mp_counters_enabled) { screen->pm.mp_counters_enabled = TRUE; BEGIN_NVC0(push, SUBC_SW(0x06ac), 1); PUSH_DATA (push, 0x1fcb); } /* set sequence field to 0 (used to check if result is available) */ for (i = 0; i < screen->mp_count; ++i) q->data[i * 10 + 10] = 0; for (i = 0; i < cfg->num_counters; ++i) { const unsigned d = cfg->ctr[i].sig_dom; if (!screen->pm.num_mp_pm_active[d]) { uint32_t m = (1 << 22) | (1 << (7 + (8 * !d))); if (screen->pm.num_mp_pm_active[!d]) m |= 1 << (7 + (8 * d)); BEGIN_NVC0(push, SUBC_SW(0x0600), 1); PUSH_DATA (push, m); } screen->pm.num_mp_pm_active[d]++; for (c = d * 4; c < (d * 4 + 4); ++c) { if (!screen->pm.mp_counter[c]) { q->ctr[i] = c; screen->pm.mp_counter[c] = (struct pipe_query *)q; break; } } assert(c <= (d * 4 + 3)); /* must succeed, already checked for space */ /* configure and reset the counter(s) */ if (d == 0) BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_A_SIGSEL(c & 3)), 1); else BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_B_SIGSEL(c & 3)), 1); PUSH_DATA (push, cfg->ctr[i].sig_sel); BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_SRCSEL(c)), 1); PUSH_DATA (push, cfg->ctr[i].src_sel + 0x2108421 * (c & 3)); BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_FUNC(c)), 1); PUSH_DATA (push, (cfg->ctr[i].func << 4) | cfg->ctr[i].mode); BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_SET(c)), 1); PUSH_DATA (push, 0); } }
static int nv50_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, enum pipe_shader_cap param) { switch (shader) { case PIPE_SHADER_VERTEX: case PIPE_SHADER_GEOMETRY: case PIPE_SHADER_FRAGMENT: break; case PIPE_SHADER_COMPUTE: default: return 0; } switch (param) { case PIPE_SHADER_CAP_MAX_INSTRUCTIONS: case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS: case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS: case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS: return 16384; case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH: return 4; case PIPE_SHADER_CAP_MAX_INPUTS: if (shader == PIPE_SHADER_VERTEX) return 32; return 15; case PIPE_SHADER_CAP_MAX_OUTPUTS: return 16; case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE: return 65536; case PIPE_SHADER_CAP_MAX_CONST_BUFFERS: return NV50_MAX_PIPE_CONSTBUFS; case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR: return shader != PIPE_SHADER_FRAGMENT; case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR: case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR: case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR: return 1; case PIPE_SHADER_CAP_MAX_PREDS: return 0; case PIPE_SHADER_CAP_MAX_TEMPS: return nv50_screen(pscreen)->max_tls_space / ONE_TEMP_SIZE; case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED: return 1; case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED: return 1; case PIPE_SHADER_CAP_SUBROUTINES: return 0; /* please inline, or provide function declarations */ case PIPE_SHADER_CAP_INTEGERS: return 1; case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS: /* The chip could handle more sampler views than samplers */ case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS: return MIN2(16, PIPE_MAX_SAMPLERS); case PIPE_SHADER_CAP_PREFERRED_IR: return PIPE_SHADER_IR_TGSI; case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT: return 32; case PIPE_SHADER_CAP_DOUBLES: case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE: case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS: case PIPE_SHADER_CAP_SUPPORTED_IRS: case PIPE_SHADER_CAP_MAX_SHADER_IMAGES: return 0; default: NOUVEAU_ERR("unknown PIPE_SHADER_CAP %d\n", param); return 0; } }
bool nv50_program_translate(struct nv50_program *prog, uint16_t chipset, struct pipe_debug_callback *debug) { struct nv50_ir_prog_info *info; int ret; const uint8_t map_undef = (prog->type == PIPE_SHADER_VERTEX) ? 0x40 : 0x80; info = CALLOC_STRUCT(nv50_ir_prog_info); if (!info) return false; info->type = prog->type; info->target = chipset; info->bin.sourceRep = NV50_PROGRAM_IR_TGSI; info->bin.source = (void *)prog->pipe.tokens; info->io.auxCBSlot = 15; info->io.ucpBase = NV50_CB_AUX_UCP_OFFSET; info->io.genUserClip = prog->vp.clpd_nr; info->io.resInfoCBSlot = 15; info->io.suInfoBase = NV50_CB_AUX_TEX_MS_OFFSET; info->io.sampleInfoBase = NV50_CB_AUX_SAMPLE_OFFSET; info->io.msInfoCBSlot = 15; info->io.msInfoBase = NV50_CB_AUX_MS_OFFSET; info->assignSlots = nv50_program_assign_varying_slots; prog->vp.bfc[0] = 0xff; prog->vp.bfc[1] = 0xff; prog->vp.edgeflag = 0xff; prog->vp.clpd[0] = map_undef; prog->vp.clpd[1] = map_undef; prog->vp.psiz = map_undef; prog->gp.has_layer = 0; prog->gp.has_viewport = 0; if (prog->type == PIPE_SHADER_COMPUTE) info->prop.cp.inputOffset = 0x10; info->driverPriv = prog; #ifdef DEBUG info->optLevel = debug_get_num_option("NV50_PROG_OPTIMIZE", 3); info->dbgFlags = debug_get_num_option("NV50_PROG_DEBUG", 0); #else info->optLevel = 3; #endif ret = nv50_ir_generate_code(info); if (ret) { NOUVEAU_ERR("shader translation failed: %i\n", ret); goto out; } FREE(info->bin.syms); prog->code = info->bin.code; prog->code_size = info->bin.codeSize; prog->fixups = info->bin.relocData; prog->interps = info->bin.interpData; prog->max_gpr = MAX2(4, (info->bin.maxGPR >> 1) + 1); prog->tls_space = info->bin.tlsSpace; prog->vp.need_vertex_id = info->io.vertexId < PIPE_MAX_SHADER_INPUTS; if (prog->type == PIPE_SHADER_FRAGMENT) { if (info->prop.fp.writesDepth) { prog->fp.flags[0] |= NV50_3D_FP_CONTROL_EXPORTS_Z; prog->fp.flags[1] = 0x11; } if (info->prop.fp.usesDiscard) prog->fp.flags[0] |= NV50_3D_FP_CONTROL_USES_KIL; } else if (prog->type == PIPE_SHADER_GEOMETRY) { switch (info->prop.gp.outputPrim) { case PIPE_PRIM_LINE_STRIP: prog->gp.prim_type = NV50_3D_GP_OUTPUT_PRIMITIVE_TYPE_LINE_STRIP; break; case PIPE_PRIM_TRIANGLE_STRIP: prog->gp.prim_type = NV50_3D_GP_OUTPUT_PRIMITIVE_TYPE_TRIANGLE_STRIP; break; case PIPE_PRIM_POINTS: default: assert(info->prop.gp.outputPrim == PIPE_PRIM_POINTS); prog->gp.prim_type = NV50_3D_GP_OUTPUT_PRIMITIVE_TYPE_POINTS; break; } prog->gp.vert_count = info->prop.gp.maxVertices; } else if (prog->type == PIPE_SHADER_COMPUTE) { prog->cp.syms = info->bin.syms; prog->cp.num_syms = info->bin.numSyms; } if (prog->pipe.stream_output.num_outputs) prog->so = nv50_program_create_strmout_state(info, &prog->pipe.stream_output); pipe_debug_message(debug, SHADER_INFO, "type: %d, local: %d, gpr: %d, inst: %d, bytes: %d", prog->type, info->bin.tlsSpace, prog->max_gpr, info->bin.instructions, info->bin.codeSize); out: FREE(info); return !ret; }
static int nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) { const uint16_t class_3d = nouveau_screen(pscreen)->class_3d; switch (param) { case PIPE_CAP_MAX_COMBINED_SAMPLERS: return 64; case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: return 14; case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: return 12; case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: return 14; case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS: return 512; case PIPE_CAP_MIN_TEXEL_OFFSET: return -8; case PIPE_CAP_MAX_TEXEL_OFFSET: return 7; case PIPE_CAP_TEXTURE_MIRROR_CLAMP: case PIPE_CAP_TEXTURE_SWIZZLE: case PIPE_CAP_TEXTURE_SHADOW_MAP: case PIPE_CAP_NPOT_TEXTURES: case PIPE_CAP_ANISOTROPIC_FILTER: case PIPE_CAP_SCALED_RESOLVE: case PIPE_CAP_TEXTURE_BUFFER_OBJECTS: return 1; case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE: return 65536; case PIPE_CAP_SEAMLESS_CUBE_MAP: return nv50_screen(pscreen)->tesla->oclass >= NVA0_3D_CLASS; case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: return 0; case PIPE_CAP_CUBE_MAP_ARRAY: return 0; /* return nv50_screen(pscreen)->tesla->oclass >= NVA3_3D_CLASS; */ case PIPE_CAP_TWO_SIDED_STENCIL: case PIPE_CAP_DEPTH_CLIP_DISABLE: case PIPE_CAP_POINT_SPRITE: return 1; case PIPE_CAP_SM3: return 1; case PIPE_CAP_GLSL_FEATURE_LEVEL: return 140; case PIPE_CAP_MAX_RENDER_TARGETS: return 8; case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS: return 1; case PIPE_CAP_FRAGMENT_COLOR_CLAMPED: case PIPE_CAP_VERTEX_COLOR_UNCLAMPED: case PIPE_CAP_VERTEX_COLOR_CLAMPED: return 1; case PIPE_CAP_QUERY_TIMESTAMP: case PIPE_CAP_QUERY_TIME_ELAPSED: case PIPE_CAP_OCCLUSION_QUERY: return 1; case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS: return 4; case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS: case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS: return 64; case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME: return (class_3d >= NVA0_3D_CLASS) ? 1 : 0; case PIPE_CAP_BLEND_EQUATION_SEPARATE: case PIPE_CAP_INDEP_BLEND_ENABLE: return 1; case PIPE_CAP_INDEP_BLEND_FUNC: return nv50_screen(pscreen)->tesla->oclass >= NVA3_3D_CLASS; case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: return 1; case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: return 0; case PIPE_CAP_SHADER_STENCIL_EXPORT: return 0; case PIPE_CAP_PRIMITIVE_RESTART: case PIPE_CAP_TGSI_INSTANCEID: case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR: case PIPE_CAP_MIXED_COLORBUFFER_FORMATS: case PIPE_CAP_CONDITIONAL_RENDER: case PIPE_CAP_TEXTURE_BARRIER: case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION: case PIPE_CAP_START_INSTANCE: return 1; case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS: return 0; /* state trackers will know better */ case PIPE_CAP_USER_CONSTANT_BUFFERS: case PIPE_CAP_USER_INDEX_BUFFERS: case PIPE_CAP_USER_VERTEX_BUFFERS: return 1; case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT: return 256; case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT: return 1; /* 256 for binding as RT, but that's not possible in GL */ case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT: return NOUVEAU_MIN_BUFFER_MAP_ALIGN; case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY: case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY: case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY: case PIPE_CAP_TGSI_TEXCOORD: case PIPE_CAP_TEXTURE_MULTISAMPLE: return 0; case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER: return 1; case PIPE_CAP_QUERY_PIPELINE_STATISTICS: return 0; case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK: return PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_NV50; case PIPE_CAP_ENDIANNESS: return PIPE_ENDIAN_LITTLE; default: NOUVEAU_ERR("unknown PIPE_CAP %d\n", param); return 0; } }
static void nvc0_render_tri(struct draw_stage *stage, struct prim_header *prim) { NOUVEAU_ERR("\n"); }
static void nvc0_render_reset_stipple_counter(struct draw_stage *stage) { NOUVEAU_ERR("\n"); }
static boolean nv30_vertprog_parse_instruction(struct nv30_vpc *vpc, const struct tgsi_full_instruction *finst) { struct nv30_sreg src[3], dst, tmp; struct nv30_sreg none = nv30_sr(NV30SR_NONE, 0); int mask; int ai = -1, ci = -1; int i; if (finst->Instruction.Opcode == TGSI_OPCODE_END) return TRUE; vpc->temp_temp_count = 0; for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { const struct tgsi_full_src_register *fsrc; fsrc = &finst->FullSrcRegisters[i]; if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) { src[i] = tgsi_src(vpc, fsrc); } } for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { const struct tgsi_full_src_register *fsrc; fsrc = &finst->FullSrcRegisters[i]; switch (fsrc->SrcRegister.File) { case TGSI_FILE_INPUT: if (ai == -1 || ai == fsrc->SrcRegister.Index) { ai = fsrc->SrcRegister.Index; src[i] = tgsi_src(vpc, fsrc); } else { src[i] = temp(vpc); arith(vpc, 0, OP_MOV, src[i], MASK_ALL, tgsi_src(vpc, fsrc), none, none); } break; /*XXX: index comparison is broken now that consts come from * two different register files. */ case TGSI_FILE_CONSTANT: case TGSI_FILE_IMMEDIATE: if (ci == -1 || ci == fsrc->SrcRegister.Index) { ci = fsrc->SrcRegister.Index; src[i] = tgsi_src(vpc, fsrc); } else { src[i] = temp(vpc); arith(vpc, 0, OP_MOV, src[i], MASK_ALL, tgsi_src(vpc, fsrc), none, none); } break; case TGSI_FILE_TEMPORARY: /* handled above */ break; default: NOUVEAU_ERR("bad src file\n"); return FALSE; } } dst = tgsi_dst(vpc, &finst->FullDstRegisters[0]); mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask); switch (finst->Instruction.Opcode) { case TGSI_OPCODE_ABS: arith(vpc, 0, OP_MOV, dst, mask, abs(src[0]), none, none); break; case TGSI_OPCODE_ADD: arith(vpc, 0, OP_ADD, dst, mask, src[0], none, src[1]); break; case TGSI_OPCODE_ARL: arith(vpc, 0, OP_ARL, dst, mask, src[0], none, none); break; case TGSI_OPCODE_DP3: arith(vpc, 0, OP_DP3, dst, mask, src[0], src[1], none); break; case TGSI_OPCODE_DP4: arith(vpc, 0, OP_DP4, dst, mask, src[0], src[1], none); break; case TGSI_OPCODE_DPH: arith(vpc, 0, OP_DPH, dst, mask, src[0], src[1], none); break; case TGSI_OPCODE_DST: arith(vpc, 0, OP_DST, dst, mask, src[0], src[1], none); break; case TGSI_OPCODE_EX2: arith(vpc, 1, OP_EX2, dst, mask, none, none, src[0]); break; case TGSI_OPCODE_EXP: arith(vpc, 1, OP_EXP, dst, mask, none, none, src[0]); break; case TGSI_OPCODE_FLR: arith(vpc, 0, OP_FLR, dst, mask, src[0], none, none); break; case TGSI_OPCODE_FRC: arith(vpc, 0, OP_FRC, dst, mask, src[0], none, none); break; case TGSI_OPCODE_LG2: arith(vpc, 1, OP_LG2, dst, mask, none, none, src[0]); break; case TGSI_OPCODE_LIT: arith(vpc, 1, OP_LIT, dst, mask, none, none, src[0]); break; case TGSI_OPCODE_LOG: arith(vpc, 1, OP_LOG, dst, mask, none, none, src[0]); break; case TGSI_OPCODE_MAD: arith(vpc, 0, OP_MAD, dst, mask, src[0], src[1], src[2]); break; case TGSI_OPCODE_MAX: arith(vpc, 0, OP_MAX, dst, mask, src[0], src[1], none); break; case TGSI_OPCODE_MIN: arith(vpc, 0, OP_MIN, dst, mask, src[0], src[1], none); break; case TGSI_OPCODE_MOV: arith(vpc, 0, OP_MOV, dst, mask, src[0], none, none); break; case TGSI_OPCODE_MUL: arith(vpc, 0, OP_MUL, dst, mask, src[0], src[1], none); break; case TGSI_OPCODE_POW: tmp = temp(vpc); arith(vpc, 1, OP_LG2, tmp, MASK_X, none, none, swz(src[0], X, X, X, X)); arith(vpc, 0, OP_MUL, tmp, MASK_X, swz(tmp, X, X, X, X), swz(src[1], X, X, X, X), none); arith(vpc, 1, OP_EX2, dst, mask, none, none, swz(tmp, X, X, X, X)); break; case TGSI_OPCODE_RCP: arith(vpc, 1, OP_RCP, dst, mask, none, none, src[0]); break; case TGSI_OPCODE_RET: break; case TGSI_OPCODE_RSQ: arith(vpc, 1, OP_RSQ, dst, mask, none, none, src[0]); break; case TGSI_OPCODE_SGE: arith(vpc, 0, OP_SGE, dst, mask, src[0], src[1], none); break; case TGSI_OPCODE_SGT: arith(vpc, 0, OP_SGT, dst, mask, src[0], src[1], none); break; case TGSI_OPCODE_SLT: arith(vpc, 0, OP_SLT, dst, mask, src[0], src[1], none); break; case TGSI_OPCODE_SUB: arith(vpc, 0, OP_ADD, dst, mask, src[0], none, neg(src[1])); break; case TGSI_OPCODE_XPD: tmp = temp(vpc); arith(vpc, 0, OP_MUL, tmp, mask, swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none); arith(vpc, 0, OP_MAD, dst, (mask & ~MASK_W), swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y), neg(tmp)); break; default: NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode); return FALSE; } return TRUE; }
int nv50_screen_compute_setup(struct nv50_screen *screen, struct nouveau_pushbuf *push) { struct nouveau_device *dev = screen->base.device; struct nouveau_object *chan = screen->base.channel; struct nv04_fifo *fifo = (struct nv04_fifo *)chan->data; unsigned obj_class; int i, ret; switch (dev->chipset & 0xf0) { case 0x50: case 0x80: case 0x90: obj_class = NV50_COMPUTE_CLASS; break; case 0xa0: switch (dev->chipset) { case 0xa3: case 0xa5: case 0xa8: obj_class = NVA3_COMPUTE_CLASS; break; default: obj_class = NV50_COMPUTE_CLASS; break; } break; default: NOUVEAU_ERR("unsupported chipset: NV%02x\n", dev->chipset); return -1; } ret = nouveau_object_new(chan, 0xbeef50c0, obj_class, NULL, 0, &screen->compute); if (ret) return ret; BEGIN_NV04(push, SUBC_COMPUTE(NV01_SUBCHAN_OBJECT), 1); PUSH_DATA (push, screen->compute->handle); BEGIN_NV04(push, NV50_COMPUTE(UNK02A0), 1); PUSH_DATA (push, 1); BEGIN_NV04(push, NV50_COMPUTE(DMA_STACK), 1); PUSH_DATA (push, fifo->vram); BEGIN_NV04(push, NV50_COMPUTE(STACK_ADDRESS_HIGH), 2); PUSH_DATAh(push, screen->stack_bo->offset); PUSH_DATA (push, screen->stack_bo->offset); BEGIN_NV04(push, NV50_COMPUTE(STACK_SIZE_LOG), 1); PUSH_DATA (push, 4); BEGIN_NV04(push, NV50_COMPUTE(UNK0290), 1); PUSH_DATA (push, 1); BEGIN_NV04(push, NV50_COMPUTE(LANES32_ENABLE), 1); PUSH_DATA (push, 1); BEGIN_NV04(push, NV50_COMPUTE(REG_MODE), 1); PUSH_DATA (push, NV50_COMPUTE_REG_MODE_STRIPED); BEGIN_NV04(push, NV50_COMPUTE(UNK0384), 1); PUSH_DATA (push, 0x100); BEGIN_NV04(push, NV50_COMPUTE(DMA_GLOBAL), 1); PUSH_DATA (push, fifo->vram); for (i = 0; i < 15; i++) { BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_ADDRESS_HIGH(i)), 2); PUSH_DATA (push, 0); PUSH_DATA (push, 0); BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_LIMIT(i)), 1); PUSH_DATA (push, 0); BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_MODE(i)), 1); PUSH_DATA (push, NV50_COMPUTE_GLOBAL_MODE_LINEAR); } BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_ADDRESS_HIGH(15)), 2); PUSH_DATA (push, 0); PUSH_DATA (push, 0); BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_LIMIT(15)), 1); PUSH_DATA (push, ~0); BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_MODE(15)), 1); PUSH_DATA (push, NV50_COMPUTE_GLOBAL_MODE_LINEAR); BEGIN_NV04(push, NV50_COMPUTE(LOCAL_WARPS_LOG_ALLOC), 1); PUSH_DATA (push, 7); BEGIN_NV04(push, NV50_COMPUTE(LOCAL_WARPS_NO_CLAMP), 1); PUSH_DATA (push, 1); BEGIN_NV04(push, NV50_COMPUTE(STACK_WARPS_LOG_ALLOC), 1); PUSH_DATA (push, 7); BEGIN_NV04(push, NV50_COMPUTE(STACK_WARPS_NO_CLAMP), 1); PUSH_DATA (push, 1); BEGIN_NV04(push, NV50_COMPUTE(USER_PARAM_COUNT), 1); PUSH_DATA (push, 0); BEGIN_NV04(push, NV50_COMPUTE(DMA_TEXTURE), 1); PUSH_DATA (push, fifo->vram); BEGIN_NV04(push, NV50_COMPUTE(TEX_LIMITS), 1); PUSH_DATA (push, 0x54); BEGIN_NV04(push, NV50_COMPUTE(LINKED_TSC), 1); PUSH_DATA (push, 0); BEGIN_NV04(push, NV50_COMPUTE(DMA_TIC), 1); PUSH_DATA (push, fifo->vram); BEGIN_NV04(push, NV50_COMPUTE(TIC_ADDRESS_HIGH), 3); PUSH_DATAh(push, screen->txc->offset); PUSH_DATA (push, screen->txc->offset); PUSH_DATA (push, NV50_TIC_MAX_ENTRIES - 1); BEGIN_NV04(push, NV50_COMPUTE(DMA_TSC), 1); PUSH_DATA (push, fifo->vram); BEGIN_NV04(push, NV50_COMPUTE(TSC_ADDRESS_HIGH), 3); PUSH_DATAh(push, screen->txc->offset + 65536); PUSH_DATA (push, screen->txc->offset + 65536); PUSH_DATA (push, NV50_TSC_MAX_ENTRIES - 1); BEGIN_NV04(push, NV50_COMPUTE(DMA_CODE_CB), 1); PUSH_DATA (push, fifo->vram); BEGIN_NV04(push, NV50_COMPUTE(DMA_LOCAL), 1); PUSH_DATA (push, fifo->vram); BEGIN_NV04(push, NV50_COMPUTE(LOCAL_ADDRESS_HIGH), 2); PUSH_DATAh(push, screen->tls_bo->offset + 65536); PUSH_DATA (push, screen->tls_bo->offset + 65536); BEGIN_NV04(push, NV50_COMPUTE(LOCAL_SIZE_LOG), 1); PUSH_DATA (push, util_logbase2((screen->max_tls_space / ONE_TEMP_SIZE) * 2)); return 0; }
struct pipe_screen * nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) { struct nv50_screen *screen = CALLOC_STRUCT(nv50_screen); struct nouveau_channel *chan; struct pipe_screen *pscreen; struct nouveau_stateobj *so; unsigned chipset = dev->chipset; unsigned tesla_class = 0; int ret, i; if (!screen) return NULL; pscreen = &screen->base.base; ret = nouveau_screen_init(&screen->base, dev); if (ret) { nv50_screen_destroy(pscreen); return NULL; } chan = screen->base.channel; pscreen->winsys = ws; pscreen->destroy = nv50_screen_destroy; pscreen->get_param = nv50_screen_get_param; pscreen->get_paramf = nv50_screen_get_paramf; pscreen->is_format_supported = nv50_screen_is_format_supported; pscreen->context_create = nv50_create; screen->base.pre_pipebuffer_map_callback = nv50_pre_pipebuffer_map; nv50_screen_init_miptree_functions(pscreen); nv50_transfer_init_screen_functions(pscreen); /* DMA engine object */ ret = nouveau_grobj_alloc(chan, 0xbeef5039, NV50_MEMORY_TO_MEMORY_FORMAT, &screen->m2mf); if (ret) { NOUVEAU_ERR("Error creating M2MF object: %d\n", ret); nv50_screen_destroy(pscreen); return NULL; } /* 2D object */ ret = nouveau_grobj_alloc(chan, 0xbeef502d, NV50_2D, &screen->eng2d); if (ret) { NOUVEAU_ERR("Error creating 2D object: %d\n", ret); nv50_screen_destroy(pscreen); return NULL; } /* 3D object */ switch (chipset & 0xf0) { case 0x50: tesla_class = NV50TCL; break; case 0x80: case 0x90: tesla_class = NV84TCL; break; case 0xa0: switch (chipset) { case 0xa0: case 0xaa: case 0xac: tesla_class = NVA0TCL; break; default: tesla_class = NVA8TCL; break; } break; default: NOUVEAU_ERR("Not a known NV50 chipset: NV%02x\n", chipset); nv50_screen_destroy(pscreen); return NULL; } ret = nouveau_grobj_alloc(chan, 0xbeef5097, tesla_class, &screen->tesla); if (ret) { NOUVEAU_ERR("Error creating 3D object: %d\n", ret); nv50_screen_destroy(pscreen); return NULL; } /* Sync notifier */ ret = nouveau_notifier_alloc(chan, 0xbeef0301, 1, &screen->sync); if (ret) { NOUVEAU_ERR("Error creating notifier object: %d\n", ret); nv50_screen_destroy(pscreen); return NULL; } /* Static M2MF init */ so = so_new(1, 3, 0); so_method(so, screen->m2mf, NV04_MEMORY_TO_MEMORY_FORMAT_DMA_NOTIFY, 3); so_data (so, screen->sync->handle); so_data (so, chan->vram->handle); so_data (so, chan->vram->handle); so_emit(chan, so); so_ref (NULL, &so); /* Static 2D init */ so = so_new(4, 7, 0); so_method(so, screen->eng2d, NV50_2D_DMA_NOTIFY, 4); so_data (so, screen->sync->handle); so_data (so, chan->vram->handle); so_data (so, chan->vram->handle); so_data (so, chan->vram->handle); so_method(so, screen->eng2d, NV50_2D_OPERATION, 1); so_data (so, NV50_2D_OPERATION_SRCCOPY); so_method(so, screen->eng2d, NV50_2D_CLIP_ENABLE, 1); so_data (so, 0); so_method(so, screen->eng2d, 0x0888, 1); so_data (so, 1); so_emit(chan, so); so_ref(NULL, &so); /* Static tesla init */ so = so_new(47, 95, 24); so_method(so, screen->tesla, NV50TCL_COND_MODE, 1); so_data (so, NV50TCL_COND_MODE_ALWAYS); so_method(so, screen->tesla, NV50TCL_DMA_NOTIFY, 1); so_data (so, screen->sync->handle); so_method(so, screen->tesla, NV50TCL_DMA_ZETA, 11); for (i = 0; i < 11; i++) so_data(so, chan->vram->handle); so_method(so, screen->tesla, NV50TCL_DMA_COLOR(0), NV50TCL_DMA_COLOR__SIZE); for (i = 0; i < NV50TCL_DMA_COLOR__SIZE; i++) so_data(so, chan->vram->handle); so_method(so, screen->tesla, NV50TCL_RT_CONTROL, 1); so_data (so, 1); /* activate all 32 lanes (threads) in a warp */ so_method(so, screen->tesla, NV50TCL_WARP_HALVES, 1); so_data (so, 0x2); so_method(so, screen->tesla, 0x1400, 1); so_data (so, 0xf); /* max TIC (bits 4:8) & TSC (ignored) bindings, per program type */ for (i = 0; i < 3; ++i) { so_method(so, screen->tesla, NV50TCL_TEX_LIMITS(i), 1); so_data (so, 0x54); } /* origin is top left (set to 1 for bottom left) */ so_method(so, screen->tesla, NV50TCL_Y_ORIGIN_BOTTOM, 1); so_data (so, 0); so_method(so, screen->tesla, NV50TCL_VP_REG_ALLOC_RESULT, 1); so_data (so, 8); /* constant buffers for immediates and VP/FP parameters */ ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, (32 * 4) * 4, &screen->constbuf_misc[0]); if (ret) { nv50_screen_destroy(pscreen); return NULL; } for (i = 0; i < 3; i++) { ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, (256 * 4) * 4, &screen->constbuf_parm[i]); if (ret) { nv50_screen_destroy(pscreen); return NULL; } } if (nouveau_resource_init(&screen->immd_heap[0], 0, 128) || nouveau_resource_init(&screen->parm_heap[0], 0, 512) || nouveau_resource_init(&screen->parm_heap[1], 0, 512)) { NOUVEAU_ERR("Error initialising constant buffers.\n"); nv50_screen_destroy(pscreen); return NULL; } /* // map constant buffers: // B = buffer ID (maybe more than 1 byte) // N = CB index used in shader instruction // P = program type (0 = VP, 2 = GP, 3 = FP) so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1); so_data (so, 0x000BBNP1); */ so_method(so, screen->tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3); so_reloc (so, screen->constbuf_misc[0], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); so_reloc (so, screen->constbuf_misc[0], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); so_data (so, (NV50_CB_PMISC << 16) | 0x00000200); so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1); so_data (so, 0x00000001 | (NV50_CB_PMISC << 12)); so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1); so_data (so, 0x00000021 | (NV50_CB_PMISC << 12)); so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1); so_data (so, 0x00000031 | (NV50_CB_PMISC << 12)); /* bind auxiliary constbuf to immediate data bo */ so_method(so, screen->tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3); so_reloc (so, screen->constbuf_misc[0], (128 * 4) * 4, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); so_reloc (so, screen->constbuf_misc[0], (128 * 4) * 4, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); so_data (so, (NV50_CB_AUX << 16) | 0x00000200); so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1); so_data (so, 0x00000201 | (NV50_CB_AUX << 12)); so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1); so_data (so, 0x00000221 | (NV50_CB_AUX << 12)); so_method(so, screen->tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3); so_reloc (so, screen->constbuf_parm[PIPE_SHADER_VERTEX], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); so_reloc (so, screen->constbuf_parm[PIPE_SHADER_VERTEX], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); so_data (so, (NV50_CB_PVP << 16) | 0x00000800); so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1); so_data (so, 0x00000101 | (NV50_CB_PVP << 12)); so_method(so, screen->tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3); so_reloc (so, screen->constbuf_parm[PIPE_SHADER_GEOMETRY], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); so_reloc (so, screen->constbuf_parm[PIPE_SHADER_GEOMETRY], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); so_data (so, (NV50_CB_PGP << 16) | 0x00000800); so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1); so_data (so, 0x00000121 | (NV50_CB_PGP << 12)); so_method(so, screen->tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3); so_reloc (so, screen->constbuf_parm[PIPE_SHADER_FRAGMENT], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); so_reloc (so, screen->constbuf_parm[PIPE_SHADER_FRAGMENT], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); so_data (so, (NV50_CB_PFP << 16) | 0x00000800); so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1); so_data (so, 0x00000131 | (NV50_CB_PFP << 12)); ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, PIPE_SHADER_TYPES*32*32, &screen->tic); if (ret) { nv50_screen_destroy(pscreen); return NULL; } so_method(so, screen->tesla, NV50TCL_TIC_ADDRESS_HIGH, 3); so_reloc (so, screen->tic, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); so_reloc (so, screen->tic, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); so_data (so, PIPE_SHADER_TYPES * 32 - 1); ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, PIPE_SHADER_TYPES*32*32, &screen->tsc); if (ret) { nv50_screen_destroy(pscreen); return NULL; } so_method(so, screen->tesla, NV50TCL_TSC_ADDRESS_HIGH, 3); so_reloc (so, screen->tsc, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); so_reloc (so, screen->tsc, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); so_data (so, 0x00000000); /* ignored if TSC_LINKED (0x1234) = 1 */ /* Vertex array limits - max them out */ for (i = 0; i < 16; i++) { so_method(so, screen->tesla, NV50TCL_VERTEX_ARRAY_LIMIT_HIGH(i), 2); so_data (so, 0x000000ff); so_data (so, 0xffffffff); } so_method(so, screen->tesla, NV50TCL_DEPTH_RANGE_NEAR(0), 2); so_data (so, fui(0.0)); so_data (so, fui(1.0)); /* no dynamic combination of TIC & TSC entries => only BIND_TIC used */ so_method(so, screen->tesla, NV50TCL_LINKED_TSC, 1); so_data (so, 1); /* activate first scissor rectangle */ so_method(so, screen->tesla, NV50TCL_SCISSOR_ENABLE(0), 1); so_data (so, 1); so_method(so, screen->tesla, NV50TCL_EDGEFLAG_ENABLE, 1); so_data (so, 1); /* default edgeflag to TRUE */ so_emit(chan, so); so_ref (so, &screen->static_init); so_ref (NULL, &so); nouveau_pushbuf_flush(chan, 0); return pscreen; }
static int nv30_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) { struct nv30_screen *screen = nv30_screen(pscreen); struct nouveau_object *eng3d = screen->eng3d; struct nouveau_device *dev = nouveau_screen(pscreen)->device; switch (param) { /* non-boolean capabilities */ case PIPE_CAP_MAX_RENDER_TARGETS: return (eng3d->oclass >= NV40_3D_CLASS) ? 4 : 1; case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: return 13; case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: return 10; case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: return 13; case PIPE_CAP_GLSL_FEATURE_LEVEL: return 120; case PIPE_CAP_ENDIANNESS: return PIPE_ENDIAN_LITTLE; case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT: return 16; case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT: return NOUVEAU_MIN_BUFFER_MAP_ALIGN; case PIPE_CAP_MAX_VIEWPORTS: return 1; case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE: return 2048; /* supported capabilities */ case PIPE_CAP_TWO_SIDED_STENCIL: case PIPE_CAP_ANISOTROPIC_FILTER: case PIPE_CAP_POINT_SPRITE: case PIPE_CAP_OCCLUSION_QUERY: case PIPE_CAP_QUERY_TIME_ELAPSED: case PIPE_CAP_QUERY_TIMESTAMP: case PIPE_CAP_TEXTURE_SHADOW_MAP: case PIPE_CAP_TEXTURE_SWIZZLE: case PIPE_CAP_DEPTH_CLIP_DISABLE: case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: case PIPE_CAP_TGSI_TEXCOORD: case PIPE_CAP_USER_CONSTANT_BUFFERS: case PIPE_CAP_USER_INDEX_BUFFERS: case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT: case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY: case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY: case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY: case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER: return 1; /* nv35 capabilities */ case PIPE_CAP_DEPTH_BOUNDS_TEST: return eng3d->oclass == NV35_3D_CLASS || eng3d->oclass >= NV40_3D_CLASS; /* nv4x capabilities */ case PIPE_CAP_BLEND_EQUATION_SEPARATE: case PIPE_CAP_NPOT_TEXTURES: case PIPE_CAP_CONDITIONAL_RENDER: case PIPE_CAP_TEXTURE_MIRROR_CLAMP: case PIPE_CAP_PRIMITIVE_RESTART: return (eng3d->oclass >= NV40_3D_CLASS) ? 1 : 0; /* unsupported */ case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS: case PIPE_CAP_SM3: case PIPE_CAP_INDEP_BLEND_ENABLE: case PIPE_CAP_INDEP_BLEND_FUNC: case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS: case PIPE_CAP_SHADER_STENCIL_EXPORT: case PIPE_CAP_TGSI_INSTANCEID: case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR: /* XXX: yes? */ case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS: case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME: case PIPE_CAP_MIN_TEXEL_OFFSET: case PIPE_CAP_MAX_TEXEL_OFFSET: case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET: case PIPE_CAP_MAX_TEXTURE_GATHER_OFFSET: case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS: case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS: case PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES: case PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS: case PIPE_CAP_MAX_VERTEX_STREAMS: case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS: case PIPE_CAP_TEXTURE_BARRIER: case PIPE_CAP_SEAMLESS_CUBE_MAP: case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: case PIPE_CAP_CUBE_MAP_ARRAY: case PIPE_CAP_VERTEX_COLOR_UNCLAMPED: case PIPE_CAP_FRAGMENT_COLOR_CLAMPED: case PIPE_CAP_VERTEX_COLOR_CLAMPED: case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION: case PIPE_CAP_MIXED_COLORBUFFER_FORMATS: case PIPE_CAP_START_INSTANCE: case PIPE_CAP_TEXTURE_MULTISAMPLE: case PIPE_CAP_TEXTURE_BUFFER_OBJECTS: case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT: case PIPE_CAP_QUERY_PIPELINE_STATISTICS: case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK: case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE: case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES: case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT: case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS: case PIPE_CAP_TEXTURE_GATHER_SM5: case PIPE_CAP_FAKE_SW_MSAA: case PIPE_CAP_TEXTURE_QUERY_LOD: case PIPE_CAP_SAMPLE_SHADING: case PIPE_CAP_TEXTURE_GATHER_OFFSETS: case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION: case PIPE_CAP_USER_VERTEX_BUFFERS: case PIPE_CAP_COMPUTE: case PIPE_CAP_DRAW_INDIRECT: case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE: case PIPE_CAP_CONDITIONAL_RENDER_INVERTED: case PIPE_CAP_SAMPLER_VIEW_TARGET: case PIPE_CAP_CLIP_HALFZ: case PIPE_CAP_VERTEXID_NOBASE: case PIPE_CAP_POLYGON_OFFSET_CLAMP: case PIPE_CAP_MULTISAMPLE_Z_RESOLVE: case PIPE_CAP_RESOURCE_FROM_USER_MEMORY: case PIPE_CAP_DEVICE_RESET_STATUS_QUERY: case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS: case PIPE_CAP_TEXTURE_FLOAT_LINEAR: case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR: return 0; case PIPE_CAP_VENDOR_ID: return 0x10de; case PIPE_CAP_DEVICE_ID: { uint64_t device_id; if (nouveau_getparam(dev, NOUVEAU_GETPARAM_PCI_DEVICE, &device_id)) { NOUVEAU_ERR("NOUVEAU_GETPARAM_PCI_DEVICE failed.\n"); return -1; } return device_id; } case PIPE_CAP_ACCELERATED: return 1; case PIPE_CAP_VIDEO_MEMORY: return dev->vram_size >> 20; case PIPE_CAP_UMA: return 0; } debug_printf("unknown param %d\n", param); return 0; }
bool nv50_program_upload_code(struct nv50_context *nv50, struct nv50_program *prog) { struct nouveau_heap *heap; int ret; uint32_t size = align(prog->code_size, 0x40); uint8_t prog_type; switch (prog->type) { case PIPE_SHADER_VERTEX: heap = nv50->screen->vp_code_heap; break; case PIPE_SHADER_GEOMETRY: heap = nv50->screen->gp_code_heap; break; case PIPE_SHADER_FRAGMENT: heap = nv50->screen->fp_code_heap; break; case PIPE_SHADER_COMPUTE: heap = nv50->screen->fp_code_heap; break; default: assert(!"invalid program type"); return false; } ret = nouveau_heap_alloc(heap, size, prog, &prog->mem); if (ret) { /* Out of space: evict everything to compactify the code segment, hoping * the working set is much smaller and drifts slowly. Improve me ! */ while (heap->next) { struct nv50_program *evict = heap->next->priv; if (evict) nouveau_heap_free(&evict->mem); } debug_printf("WARNING: out of code space, evicting all shaders.\n"); ret = nouveau_heap_alloc(heap, size, prog, &prog->mem); if (ret) { NOUVEAU_ERR("shader too large (0x%x) to fit in code space ?\n", size); return false; } } if (prog->type == PIPE_SHADER_COMPUTE) { /* CP code must be uploaded in FP code segment. */ prog_type = 1; } else { prog->code_base = prog->mem->start; prog_type = prog->type; } ret = nv50_tls_realloc(nv50->screen, prog->tls_space); if (ret < 0) { nouveau_heap_free(&prog->mem); return false; } if (ret > 0) nv50->state.new_tls_space = true; if (prog->fixups) nv50_ir_relocate_code(prog->fixups, prog->code, prog->code_base, 0, 0); if (prog->interps) nv50_ir_change_interp(prog->interps, prog->code, prog->fp.force_persample_interp, false /* flatshade */); nv50_sifc_linear_u8(&nv50->base, nv50->screen->code, (prog_type << NV50_CODE_BO_SIZE_LOG2) + prog->code_base, NOUVEAU_BO_VRAM, prog->code_size, prog->code); BEGIN_NV04(nv50->base.pushbuf, NV50_3D(CODE_CB_FLUSH), 1); PUSH_DATA (nv50->base.pushbuf, 0); return true; }
void nv50_constbufs_validate(struct nv50_context *nv50) { struct nouveau_pushbuf *push = nv50->base.pushbuf; unsigned s; for (s = 0; s < 3; ++s) { unsigned p; if (s == PIPE_SHADER_FRAGMENT) p = NV50_3D_SET_PROGRAM_CB_PROGRAM_FRAGMENT; else if (s == PIPE_SHADER_GEOMETRY) p = NV50_3D_SET_PROGRAM_CB_PROGRAM_GEOMETRY; else p = NV50_3D_SET_PROGRAM_CB_PROGRAM_VERTEX; while (nv50->constbuf_dirty[s]) { const unsigned i = (unsigned)ffs(nv50->constbuf_dirty[s]) - 1; assert(i < NV50_MAX_PIPE_CONSTBUFS); nv50->constbuf_dirty[s] &= ~(1 << i); if (nv50->constbuf[s][i].user) { const unsigned b = NV50_CB_PVP + s; unsigned start = 0; unsigned words = nv50->constbuf[s][0].size / 4; if (i) { NOUVEAU_ERR("user constbufs only supported in slot 0\n"); continue; } if (!nv50->state.uniform_buffer_bound[s]) { nv50->state.uniform_buffer_bound[s] = true; BEGIN_NV04(push, NV50_3D(SET_PROGRAM_CB), 1); PUSH_DATA (push, (b << 12) | (i << 8) | p | 1); } while (words) { unsigned nr = MIN2(words, NV04_PFIFO_MAX_PACKET_LEN); PUSH_SPACE(push, nr + 3); BEGIN_NV04(push, NV50_3D(CB_ADDR), 1); PUSH_DATA (push, (start << 8) | b); BEGIN_NI04(push, NV50_3D(CB_DATA(0)), nr); PUSH_DATAp(push, &nv50->constbuf[s][0].u.data[start * 4], nr); start += nr; words -= nr; } } else { struct nv04_resource *res = nv04_resource(nv50->constbuf[s][i].u.buf); if (res) { /* TODO: allocate persistent bindings */ const unsigned b = s * 16 + i; assert(nouveau_resource_mapped_by_gpu(&res->base)); BEGIN_NV04(push, NV50_3D(CB_DEF_ADDRESS_HIGH), 3); PUSH_DATAh(push, res->address + nv50->constbuf[s][i].offset); PUSH_DATA (push, res->address + nv50->constbuf[s][i].offset); PUSH_DATA (push, (b << 16) | (nv50->constbuf[s][i].size & 0xffff)); BEGIN_NV04(push, NV50_3D(SET_PROGRAM_CB), 1); PUSH_DATA (push, (b << 12) | (i << 8) | p | 1); BCTX_REFN(nv50->bufctx_3d, CB(s, i), res, RD); nv50->cb_dirty = 1; /* Force cache flush for UBO. */ } else { BEGIN_NV04(push, NV50_3D(SET_PROGRAM_CB), 1); PUSH_DATA (push, (i << 8) | p | 0); } if (i == 0) nv50->state.uniform_buffer_bound[s] = false; } } } }
struct pipe_sampler_view * nv50_create_texture_view(struct pipe_context *pipe, struct pipe_resource *texture, const struct pipe_sampler_view *templ, uint32_t flags, enum pipe_texture_target target) { const struct util_format_description *desc; uint64_t addr; uint32_t *tic; uint32_t swz[4]; uint32_t depth; struct nv50_tic_entry *view; struct nv50_miptree *mt = nv50_miptree(texture); boolean tex_int; view = MALLOC_STRUCT(nv50_tic_entry); if (!view) return NULL; view->pipe = *templ; view->pipe.reference.count = 1; view->pipe.texture = NULL; view->pipe.context = pipe; view->id = -1; pipe_resource_reference(&view->pipe.texture, texture); tic = &view->tic[0]; desc = util_format_description(view->pipe.format); /* TIC[0] */ tic[0] = nv50_format_table[view->pipe.format].tic; tex_int = util_format_is_pure_integer(view->pipe.format); swz[0] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_r, tex_int); swz[1] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_g, tex_int); swz[2] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_b, tex_int); swz[3] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_a, tex_int); tic[0] = (tic[0] & ~NV50_TIC_0_SWIZZLE__MASK) | (swz[0] << NV50_TIC_0_MAPR__SHIFT) | (swz[1] << NV50_TIC_0_MAPG__SHIFT) | (swz[2] << NV50_TIC_0_MAPB__SHIFT) | (swz[3] << NV50_TIC_0_MAPA__SHIFT); addr = mt->base.address; if (mt->base.base.target == PIPE_TEXTURE_1D_ARRAY || mt->base.base.target == PIPE_TEXTURE_2D_ARRAY) { addr += view->pipe.u.tex.first_layer * mt->layer_stride; depth = view->pipe.u.tex.last_layer - view->pipe.u.tex.first_layer + 1; } else { depth = mt->base.base.depth0; } tic[2] = 0x10001000 | NV50_TIC_2_NO_BORDER; if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) tic[2] |= NV50_TIC_2_COLORSPACE_SRGB; if (!(flags & NV50_TEXVIEW_SCALED_COORDS)) tic[2] |= NV50_TIC_2_NORMALIZED_COORDS; if (unlikely(!nouveau_bo_memtype(nv04_resource(texture)->bo))) { if (target == PIPE_BUFFER) { addr += view->pipe.u.buf.first_element * desc->block.bits / 8; tic[2] |= NV50_TIC_2_LINEAR | NV50_TIC_2_TARGET_BUFFER; tic[3] = 0; tic[4] = /* width */ view->pipe.u.buf.last_element - view->pipe.u.buf.first_element + 1; tic[5] = 0; } else { tic[2] |= NV50_TIC_2_LINEAR | NV50_TIC_2_TARGET_RECT; tic[3] = mt->level[0].pitch; tic[4] = mt->base.base.width0; tic[5] = (1 << 16) | (mt->base.base.height0); } tic[6] = tic[7] = 0; tic[1] = addr; tic[2] |= addr >> 32; return &view->pipe; } tic[1] = addr; tic[2] |= (addr >> 32) & 0xff; tic[2] |= ((mt->level[0].tile_mode & 0x0f0) << (22 - 4)) | ((mt->level[0].tile_mode & 0xf00) << (25 - 8)); switch (target) { case PIPE_TEXTURE_1D: tic[2] |= NV50_TIC_2_TARGET_1D; break; case PIPE_TEXTURE_2D: tic[2] |= NV50_TIC_2_TARGET_2D; break; case PIPE_TEXTURE_RECT: tic[2] |= NV50_TIC_2_TARGET_RECT; break; case PIPE_TEXTURE_3D: tic[2] |= NV50_TIC_2_TARGET_3D; break; case PIPE_TEXTURE_CUBE: depth /= 6; tic[2] |= NV50_TIC_2_TARGET_CUBE; break; case PIPE_TEXTURE_1D_ARRAY: tic[2] |= NV50_TIC_2_TARGET_1D_ARRAY; break; case PIPE_TEXTURE_2D_ARRAY: tic[2] |= NV50_TIC_2_TARGET_2D_ARRAY; break; case PIPE_TEXTURE_CUBE_ARRAY: depth /= 6; tic[2] |= NV50_TIC_2_TARGET_CUBE_ARRAY; break; case PIPE_BUFFER: assert(0); /* should be linear and handled above ! */ tic[2] |= NV50_TIC_2_TARGET_BUFFER | NV50_TIC_2_LINEAR; break; default: NOUVEAU_ERR("invalid texture target: %d\n", mt->base.base.target); return FALSE; } tic[3] = (flags & NV50_TEXVIEW_FILTER_MSAA8) ? 0x20000000 : 0x00300000; tic[4] = (1 << 31) | (mt->base.base.width0 << mt->ms_x); tic[5] = (mt->base.base.height0 << mt->ms_y) & 0xffff; tic[5] |= depth << 16; tic[5] |= mt->base.base.last_level << NV50_TIC_5_LAST_LEVEL__SHIFT; tic[6] = (mt->ms_x > 1) ? 0x88000000 : 0x03000000; /* sampling points */ tic[7] = (view->pipe.u.tex.last_level << 4) | view->pipe.u.tex.first_level; if (unlikely(!(tic[2] & NV50_TIC_2_NORMALIZED_COORDS))) if (mt->base.base.last_level) tic[5] &= ~NV50_TIC_5_LAST_LEVEL__MASK; return &view->pipe; }
static int nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, enum pipe_shader_cap param) { const uint16_t class_3d = nouveau_screen(pscreen)->class_3d; switch (shader) { case PIPE_SHADER_VERTEX: case PIPE_SHADER_GEOMETRY: case PIPE_SHADER_FRAGMENT: break; case PIPE_SHADER_TESS_CTRL: case PIPE_SHADER_TESS_EVAL: if (class_3d >= GM107_3D_CLASS) return 0; break; case PIPE_SHADER_COMPUTE: if (class_3d > NVE4_3D_CLASS) return 0; break; default: return 0; } switch (param) { case PIPE_SHADER_CAP_PREFERRED_IR: return PIPE_SHADER_IR_TGSI; case PIPE_SHADER_CAP_MAX_INSTRUCTIONS: case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS: case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS: case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS: return 16384; case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH: return 16; case PIPE_SHADER_CAP_MAX_INPUTS: if (shader == PIPE_SHADER_VERTEX) return 32; /* NOTE: These only count our slots for GENERIC varyings. * The address space may be larger, but the actual hard limit seems to be * less than what the address space layout permits, so don't add TEXCOORD, * COLOR, etc. here. */ if (shader == PIPE_SHADER_FRAGMENT) return 0x1f0 / 16; /* Actually this counts CLIPVERTEX, which occupies the last generic slot, * and excludes 0x60 per-patch inputs. */ return 0x200 / 16; case PIPE_SHADER_CAP_MAX_OUTPUTS: return 32; case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE: return 65536; case PIPE_SHADER_CAP_MAX_CONST_BUFFERS: if (shader == PIPE_SHADER_COMPUTE && class_3d >= NVE4_3D_CLASS) return NVE4_MAX_PIPE_CONSTBUFS_COMPUTE; return NVC0_MAX_PIPE_CONSTBUFS; case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR: case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR: return shader != PIPE_SHADER_FRAGMENT; case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR: case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR: return 1; case PIPE_SHADER_CAP_MAX_PREDS: return 0; case PIPE_SHADER_CAP_MAX_TEMPS: return NVC0_CAP_MAX_PROGRAM_TEMPS; case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED: return 1; case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED: return 0; case PIPE_SHADER_CAP_SUBROUTINES: return 1; case PIPE_SHADER_CAP_INTEGERS: return 1; case PIPE_SHADER_CAP_DOUBLES: return 1; case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: return 1; case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE: return 0; case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS: return 16; /* would be 32 in linked (OpenGL-style) mode */ case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS: return 16; /* XXX not sure if more are really safe */ case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT: return 32; default: NOUVEAU_ERR("unknown PIPE_SHADER_CAP %d\n", param); return 0; } }
struct pipe_screen * nv30_screen_create(struct nouveau_device *dev) { struct nv30_screen *screen = CALLOC_STRUCT(nv30_screen); struct pipe_screen *pscreen; struct nouveau_pushbuf *push; struct nv04_fifo *fifo; unsigned oclass = 0; int ret, i; if (!screen) return NULL; switch (dev->chipset & 0xf0) { case 0x30: if (RANKINE_0397_CHIPSET & (1 << (dev->chipset & 0x0f))) oclass = NV30_3D_CLASS; else if (RANKINE_0697_CHIPSET & (1 << (dev->chipset & 0x0f))) oclass = NV34_3D_CLASS; else if (RANKINE_0497_CHIPSET & (1 << (dev->chipset & 0x0f))) oclass = NV35_3D_CLASS; break; case 0x40: if (CURIE_4097_CHIPSET & (1 << (dev->chipset & 0x0f))) oclass = NV40_3D_CLASS; else if (CURIE_4497_CHIPSET & (1 << (dev->chipset & 0x0f))) oclass = NV44_3D_CLASS; break; case 0x60: if (CURIE_4497_CHIPSET6X & (1 << (dev->chipset & 0x0f))) oclass = NV44_3D_CLASS; break; default: break; } if (!oclass) { NOUVEAU_ERR("unknown 3d class for 0x%02x\n", dev->chipset); FREE(screen); return NULL; } pscreen = &screen->base.base; pscreen->destroy = nv30_screen_destroy; pscreen->get_param = nv30_screen_get_param; pscreen->get_paramf = nv30_screen_get_paramf; pscreen->get_shader_param = nv30_screen_get_shader_param; pscreen->context_create = nv30_context_create; pscreen->is_format_supported = nv30_screen_is_format_supported; nv30_resource_screen_init(pscreen); nouveau_screen_init_vdec(&screen->base); screen->base.fence.emit = nv30_screen_fence_emit; screen->base.fence.update = nv30_screen_fence_update; ret = nouveau_screen_init(&screen->base, dev); if (ret) FAIL_SCREEN_INIT("nv30_screen_init failed: %d\n", ret); screen->base.vidmem_bindings |= PIPE_BIND_VERTEX_BUFFER; screen->base.sysmem_bindings |= PIPE_BIND_VERTEX_BUFFER; if (oclass == NV40_3D_CLASS) { screen->base.vidmem_bindings |= PIPE_BIND_INDEX_BUFFER; screen->base.sysmem_bindings |= PIPE_BIND_INDEX_BUFFER; } fifo = screen->base.channel->data; push = screen->base.pushbuf; push->rsvd_kick = 16; ret = nouveau_object_new(screen->base.channel, 0x00000000, NV01_NULL_CLASS, NULL, 0, &screen->null); if (ret) FAIL_SCREEN_INIT("error allocating null object: %d\n", ret); /* DMA_FENCE refuses to accept DMA objects with "adjust" filled in, * this means that the address pointed at by the DMA object must * be 4KiB aligned, which means this object needs to be the first * one allocated on the channel. */ ret = nouveau_object_new(screen->base.channel, 0xbeef1e00, NOUVEAU_NOTIFIER_CLASS, &(struct nv04_notify) { .length = 32 }, sizeof(struct nv04_notify),
boolean nv50_program_translate(struct nv50_program *prog, uint16_t chipset) { struct nv50_ir_prog_info *info; int ret; const uint8_t map_undef = (prog->type == PIPE_SHADER_VERTEX) ? 0x40 : 0x80; info = CALLOC_STRUCT(nv50_ir_prog_info); if (!info) return FALSE; info->type = prog->type; info->target = chipset; info->bin.sourceRep = NV50_PROGRAM_IR_TGSI; info->bin.source = (void *)prog->pipe.tokens; info->io.ucpBinding = 15; info->io.ucpBase = 0; info->io.genUserClip = prog->vp.clpd_nr; info->assignSlots = nv50_program_assign_varying_slots; prog->vp.bfc[0] = 0xff; prog->vp.bfc[1] = 0xff; prog->vp.edgeflag = 0xff; prog->vp.clpd[0] = map_undef; prog->vp.clpd[1] = map_undef; prog->vp.psiz = map_undef; prog->gp.primid = 0x80; info->driverPriv = prog; #ifdef DEBUG info->optLevel = debug_get_num_option("NV50_PROG_OPTIMIZE", 3); info->dbgFlags = debug_get_num_option("NV50_PROG_DEBUG", 0); #else info->optLevel = 3; #endif ret = nv50_ir_generate_code(info); if (ret) { NOUVEAU_ERR("shader translation failed: %i\n", ret); goto out; } FREE(info->bin.syms); prog->code = info->bin.code; prog->code_size = info->bin.codeSize; prog->fixups = info->bin.relocData; prog->max_gpr = MAX2(4, (info->bin.maxGPR >> 1) + 1); prog->tls_space = info->bin.tlsSpace; if (prog->type == PIPE_SHADER_FRAGMENT) { if (info->prop.fp.writesDepth) { prog->fp.flags[0] |= NV50_3D_FP_CONTROL_EXPORTS_Z; prog->fp.flags[1] = 0x11; } if (info->prop.fp.usesDiscard) prog->fp.flags[0] |= NV50_3D_FP_CONTROL_USES_KIL; } if (prog->pipe.stream_output.num_outputs) prog->so = nv50_program_create_strmout_state(info, &prog->pipe.stream_output); out: FREE(info); return !ret; }
static int nvc0_2d_texture_set(struct nouveau_pushbuf *push, boolean dst, struct nv50_miptree *mt, unsigned level, unsigned layer) { struct nouveau_bo *bo = mt->base.bo; uint32_t width, height, depth; uint32_t format; uint32_t mthd = dst ? NVC0_2D_DST_FORMAT : NVC0_2D_SRC_FORMAT; uint32_t offset = mt->level[level].offset; format = nvc0_2d_format(mt->base.base.format); if (!format) { NOUVEAU_ERR("invalid/unsupported surface format: %s\n", util_format_name(mt->base.base.format)); return 1; } width = u_minify(mt->base.base.width0, level) << mt->ms_x; height = u_minify(mt->base.base.height0, level) << mt->ms_y; depth = u_minify(mt->base.base.depth0, level); /* layer has to be < depth, and depth > tile depth / 2 */ if (!mt->layout_3d) { offset += mt->layer_stride * layer; layer = 0; depth = 1; } else if (!dst) { offset += nvc0_mt_zslice_offset(mt, level, layer); layer = 0; } if (nouveau_bo_memtype(bo)) { BEGIN_NVC0(push, SUBC_2D(mthd), 2); PUSH_DATA (push, format); PUSH_DATA (push, 1); BEGIN_NVC0(push, SUBC_2D(mthd + 0x14), 5); PUSH_DATA (push, mt->level[level].pitch); PUSH_DATA (push, width); PUSH_DATA (push, height); PUSH_DATAh(push, bo->offset + offset); PUSH_DATA (push, bo->offset + offset); } else { BEGIN_NVC0(push, SUBC_2D(mthd), 5); PUSH_DATA (push, format); PUSH_DATA (push, 0); PUSH_DATA (push, mt->level[level].tile_mode); PUSH_DATA (push, depth); PUSH_DATA (push, layer); BEGIN_NVC0(push, SUBC_2D(mthd + 0x18), 4); PUSH_DATA (push, width); PUSH_DATA (push, height); PUSH_DATAh(push, bo->offset + offset); PUSH_DATA (push, bo->offset + offset); } #if 0 if (dst) { BEGIN_NVC0(push, SUBC_2D(NVC0_2D_CLIP_X), 4); PUSH_DATA (push, 0); PUSH_DATA (push, 0); PUSH_DATA (push, width); PUSH_DATA (push, height); } #endif return 0; }
static int nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) { const uint16_t class_3d = nouveau_screen(pscreen)->class_3d; struct nouveau_device *dev = nouveau_screen(pscreen)->device; switch (param) { /* non-boolean caps */ case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: return 14; case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: return 12; case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: return 14; case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS: return 512; case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET: case PIPE_CAP_MIN_TEXEL_OFFSET: return -8; case PIPE_CAP_MAX_TEXTURE_GATHER_OFFSET: case PIPE_CAP_MAX_TEXEL_OFFSET: return 7; case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE: return 128 * 1024 * 1024; case PIPE_CAP_GLSL_FEATURE_LEVEL: return 330; case PIPE_CAP_MAX_RENDER_TARGETS: return 8; case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS: return 1; case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS: return 4; case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS: case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS: return 64; case PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES: case PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS: return 1024; case PIPE_CAP_MAX_VERTEX_STREAMS: return 1; case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE: return 2048; case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT: return 256; case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT: return 16; /* 256 for binding as RT, but that's not possible in GL */ case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT: return NOUVEAU_MIN_BUFFER_MAP_ALIGN; case PIPE_CAP_MAX_VIEWPORTS: return NV50_MAX_VIEWPORTS; case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK: return PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_NV50; case PIPE_CAP_ENDIANNESS: return PIPE_ENDIAN_LITTLE; case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS: return (class_3d >= NVA3_3D_CLASS) ? 4 : 0; case PIPE_CAP_MAX_WINDOW_RECTANGLES: return NV50_MAX_WINDOW_RECTANGLES; /* supported caps */ case PIPE_CAP_TEXTURE_MIRROR_CLAMP: case PIPE_CAP_TEXTURE_SWIZZLE: case PIPE_CAP_TEXTURE_SHADOW_MAP: case PIPE_CAP_NPOT_TEXTURES: case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES: case PIPE_CAP_ANISOTROPIC_FILTER: case PIPE_CAP_TEXTURE_BUFFER_OBJECTS: case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT: case PIPE_CAP_TWO_SIDED_STENCIL: case PIPE_CAP_DEPTH_CLIP_DISABLE: case PIPE_CAP_POINT_SPRITE: case PIPE_CAP_SM3: case PIPE_CAP_FRAGMENT_COLOR_CLAMPED: case PIPE_CAP_VERTEX_COLOR_UNCLAMPED: case PIPE_CAP_VERTEX_COLOR_CLAMPED: case PIPE_CAP_QUERY_TIMESTAMP: case PIPE_CAP_QUERY_TIME_ELAPSED: case PIPE_CAP_OCCLUSION_QUERY: case PIPE_CAP_BLEND_EQUATION_SEPARATE: case PIPE_CAP_INDEP_BLEND_ENABLE: case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: case PIPE_CAP_PRIMITIVE_RESTART: case PIPE_CAP_TGSI_INSTANCEID: case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR: case PIPE_CAP_MIXED_COLORBUFFER_FORMATS: case PIPE_CAP_CONDITIONAL_RENDER: case PIPE_CAP_TEXTURE_BARRIER: case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION: case PIPE_CAP_START_INSTANCE: case PIPE_CAP_USER_CONSTANT_BUFFERS: case PIPE_CAP_USER_INDEX_BUFFERS: case PIPE_CAP_USER_VERTEX_BUFFERS: case PIPE_CAP_TEXTURE_MULTISAMPLE: case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER: case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE: case PIPE_CAP_SAMPLER_VIEW_TARGET: case PIPE_CAP_CONDITIONAL_RENDER_INVERTED: case PIPE_CAP_CLIP_HALFZ: case PIPE_CAP_POLYGON_OFFSET_CLAMP: case PIPE_CAP_QUERY_PIPELINE_STATISTICS: case PIPE_CAP_TEXTURE_FLOAT_LINEAR: case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR: case PIPE_CAP_DEPTH_BOUNDS_TEST: case PIPE_CAP_TGSI_TXQS: case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS: case PIPE_CAP_SHAREABLE_SHADERS: case PIPE_CAP_CLEAR_TEXTURE: case PIPE_CAP_COMPUTE: case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL: case PIPE_CAP_INVALIDATE_BUFFER: case PIPE_CAP_STRING_MARKER: case PIPE_CAP_CULL_DISTANCE: return 1; case PIPE_CAP_SEAMLESS_CUBE_MAP: return 1; /* class_3d >= NVA0_3D_CLASS; */ /* supported on nva0+ */ case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME: return class_3d >= NVA0_3D_CLASS; /* supported on nva3+ */ case PIPE_CAP_CUBE_MAP_ARRAY: case PIPE_CAP_INDEP_BLEND_FUNC: case PIPE_CAP_TEXTURE_QUERY_LOD: case PIPE_CAP_SAMPLE_SHADING: case PIPE_CAP_FORCE_PERSAMPLE_INTERP: return class_3d >= NVA3_3D_CLASS; /* unsupported caps */ case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: case PIPE_CAP_SHADER_STENCIL_EXPORT: case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS: case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY: case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY: case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY: case PIPE_CAP_TGSI_TEXCOORD: case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT: case PIPE_CAP_TEXTURE_GATHER_SM5: case PIPE_CAP_FAKE_SW_MSAA: case PIPE_CAP_TEXTURE_GATHER_OFFSETS: case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION: case PIPE_CAP_DRAW_INDIRECT: case PIPE_CAP_MULTI_DRAW_INDIRECT: case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS: case PIPE_CAP_VERTEXID_NOBASE: case PIPE_CAP_MULTISAMPLE_Z_RESOLVE: /* potentially supported on some hw */ case PIPE_CAP_RESOURCE_FROM_USER_MEMORY: case PIPE_CAP_DEVICE_RESET_STATUS_QUERY: case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS: case PIPE_CAP_DRAW_PARAMETERS: case PIPE_CAP_TGSI_PACK_HALF_FLOAT: case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL: case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT: case PIPE_CAP_GENERATE_MIPMAP: case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY: case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS: case PIPE_CAP_QUERY_BUFFER_OBJECT: case PIPE_CAP_QUERY_MEMORY_INFO: case PIPE_CAP_PCI_GROUP: case PIPE_CAP_PCI_BUS: case PIPE_CAP_PCI_DEVICE: case PIPE_CAP_PCI_FUNCTION: case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT: case PIPE_CAP_ROBUST_BUFFER_ACCESS_BEHAVIOR: case PIPE_CAP_PRIMITIVE_RESTART_FOR_PATCHES: case PIPE_CAP_TGSI_VOTE: case PIPE_CAP_POLYGON_OFFSET_UNITS_UNSCALED: case PIPE_CAP_VIEWPORT_SUBPIXEL_BITS: return 0; case PIPE_CAP_VENDOR_ID: return 0x10de; case PIPE_CAP_DEVICE_ID: { uint64_t device_id; if (nouveau_getparam(dev, NOUVEAU_GETPARAM_PCI_DEVICE, &device_id)) { NOUVEAU_ERR("NOUVEAU_GETPARAM_PCI_DEVICE failed.\n"); return -1; } return device_id; } case PIPE_CAP_ACCELERATED: return 1; case PIPE_CAP_VIDEO_MEMORY: return dev->vram_size >> 20; case PIPE_CAP_UMA: return 0; } NOUVEAU_ERR("unknown PIPE_CAP %d\n", param); return 0; }
static int nvfx_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) { struct nvfx_screen *screen = nvfx_screen(pscreen); switch (param) { case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: return 16; case PIPE_CAP_NPOT_TEXTURES: return screen->advertise_npot; case PIPE_CAP_TWO_SIDED_STENCIL: return 1; case PIPE_CAP_GLSL: return 1; case PIPE_CAP_ANISOTROPIC_FILTER: return 1; case PIPE_CAP_POINT_SPRITE: return 1; case PIPE_CAP_MAX_RENDER_TARGETS: return screen->use_nv4x ? 4 : 1; case PIPE_CAP_OCCLUSION_QUERY: return 1; case PIPE_CAP_TIMER_QUERY: return 0; case PIPE_CAP_TEXTURE_SHADOW_MAP: return 1; case PIPE_CAP_TEXTURE_SWIZZLE: return 1; case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: return 13; case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: return 10; case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: return 13; case PIPE_CAP_TEXTURE_MIRROR_CLAMP: return !!screen->use_nv4x; case PIPE_CAP_TEXTURE_MIRROR_REPEAT: return 1; case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: return 0; /* We have 4 on nv40 - but unsupported currently */ case PIPE_CAP_BLEND_EQUATION_SEPARATE: return screen->advertise_blend_equation_separate; case PIPE_CAP_MAX_COMBINED_SAMPLERS: return 16; case PIPE_CAP_INDEP_BLEND_ENABLE: /* TODO: on nv40 we have separate color masks */ /* TODO: nv40 mrt blending is probably broken */ return 0; case PIPE_CAP_INDEP_BLEND_FUNC: return 0; case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE: return 0; case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: return 1; case PIPE_CAP_DEPTH_CLAMP: return 0; // TODO: implement depth clamp case PIPE_CAP_PRIMITIVE_RESTART: return 0; // TODO: implement primitive restart case PIPE_CAP_SHADER_STENCIL_EXPORT: return 0; default: NOUVEAU_ERR("Warning: unknown PIPE_CAP %d\n", param); return 0; } }
struct pipe_sampler_view * nvc0_create_texture_view(struct pipe_context *pipe, struct pipe_resource *texture, const struct pipe_sampler_view *templ, uint32_t flags, enum pipe_texture_target target) { const struct util_format_description *desc; uint64_t address; uint32_t *tic; uint32_t swz[4]; uint32_t width, height; uint32_t depth; struct nv50_tic_entry *view; struct nv50_miptree *mt; boolean tex_int; view = MALLOC_STRUCT(nv50_tic_entry); if (!view) return NULL; mt = nv50_miptree(texture); view->pipe = *templ; view->pipe.reference.count = 1; view->pipe.texture = NULL; view->pipe.context = pipe; view->id = -1; pipe_resource_reference(&view->pipe.texture, texture); tic = &view->tic[0]; desc = util_format_description(view->pipe.format); tic[0] = nvc0_format_table[view->pipe.format].tic; tex_int = util_format_is_pure_integer(view->pipe.format); swz[0] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_r, tex_int); swz[1] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_g, tex_int); swz[2] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_b, tex_int); swz[3] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_a, tex_int); tic[0] = (tic[0] & ~NV50_TIC_0_SWIZZLE__MASK) | (swz[0] << NV50_TIC_0_MAPR__SHIFT) | (swz[1] << NV50_TIC_0_MAPG__SHIFT) | (swz[2] << NV50_TIC_0_MAPB__SHIFT) | (swz[3] << NV50_TIC_0_MAPA__SHIFT); address = mt->base.address; tic[2] = 0x10001000 | NV50_TIC_2_NO_BORDER; if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) tic[2] |= NV50_TIC_2_COLORSPACE_SRGB; if (!(flags & NV50_TEXVIEW_SCALED_COORDS)) tic[2] |= NV50_TIC_2_NORMALIZED_COORDS; /* check for linear storage type */ if (unlikely(!nouveau_bo_memtype(nv04_resource(texture)->bo))) { if (texture->target == PIPE_BUFFER) { assert(!(tic[2] & NV50_TIC_2_NORMALIZED_COORDS)); address += view->pipe.u.buf.first_element * desc->block.bits / 8; tic[2] |= NV50_TIC_2_LINEAR | NV50_TIC_2_TARGET_BUFFER; tic[3] = 0; tic[4] = /* width */ view->pipe.u.buf.last_element - view->pipe.u.buf.first_element + 1; tic[5] = 0; } else { /* must be 2D texture without mip maps */ tic[2] |= NV50_TIC_2_LINEAR | NV50_TIC_2_TARGET_RECT; tic[3] = mt->level[0].pitch; tic[4] = mt->base.base.width0; tic[5] = (1 << 16) | mt->base.base.height0; } tic[6] = tic[7] = 0; tic[1] = address; tic[2] |= address >> 32; return &view->pipe; } tic[2] |= ((mt->level[0].tile_mode & 0x0f0) << (22 - 4)) | ((mt->level[0].tile_mode & 0xf00) << (25 - 8)); depth = MAX2(mt->base.base.array_size, mt->base.base.depth0); if (mt->base.base.array_size > 1) { /* there doesn't seem to be a base layer field in TIC */ address += view->pipe.u.tex.first_layer * mt->layer_stride; depth = view->pipe.u.tex.last_layer - view->pipe.u.tex.first_layer + 1; } tic[1] = address; tic[2] |= address >> 32; switch (target) { case PIPE_TEXTURE_1D: tic[2] |= NV50_TIC_2_TARGET_1D; break; case PIPE_TEXTURE_2D: tic[2] |= NV50_TIC_2_TARGET_2D; break; case PIPE_TEXTURE_RECT: tic[2] |= NV50_TIC_2_TARGET_2D; break; case PIPE_TEXTURE_3D: tic[2] |= NV50_TIC_2_TARGET_3D; break; case PIPE_TEXTURE_CUBE: depth /= 6; tic[2] |= NV50_TIC_2_TARGET_CUBE; break; case PIPE_TEXTURE_1D_ARRAY: tic[2] |= NV50_TIC_2_TARGET_1D_ARRAY; break; case PIPE_TEXTURE_2D_ARRAY: tic[2] |= NV50_TIC_2_TARGET_2D_ARRAY; break; case PIPE_TEXTURE_CUBE_ARRAY: depth /= 6; tic[2] |= NV50_TIC_2_TARGET_CUBE_ARRAY; break; default: NOUVEAU_ERR("unexpected/invalid texture target: %d\n", mt->base.base.target); return FALSE; } tic[3] = (flags & NV50_TEXVIEW_FILTER_MSAA8) ? 0x20000000 : 0x00300000; if (flags & NV50_TEXVIEW_ACCESS_RESOLVE) { width = mt->base.base.width0 << mt->ms_x; height = mt->base.base.height0 << mt->ms_y; } else { width = mt->base.base.width0; height = mt->base.base.height0; } tic[4] = (1 << 31) | width; tic[5] = height & 0xffff; tic[5] |= depth << 16; tic[5] |= mt->base.base.last_level << 28; /* sampling points: (?) */ if (flags & NV50_TEXVIEW_ACCESS_RESOLVE) tic[6] = (mt->ms_x > 1) ? 0x88000000 : 0x03000000; else tic[6] = 0x03000000; tic[7] = (view->pipe.u.tex.last_level << 4) | view->pipe.u.tex.first_level; tic[7] |= mt->ms_mode << 12; return &view->pipe; }
struct pipe_screen * nvfx_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) { static const unsigned query_sizes[] = {(4096 - 4 * 32) / 32, 3 * 1024 / 32, 2 * 1024 / 32, 1024 / 32}; struct nvfx_screen *screen = CALLOC_STRUCT(nvfx_screen); struct nouveau_channel *chan; struct pipe_screen *pscreen; unsigned eng3d_class = 0; int ret, i; if (!screen) return NULL; pscreen = &screen->base.base; ret = nouveau_screen_init(&screen->base, dev); if (ret) { nvfx_screen_destroy(pscreen); return NULL; } chan = screen->base.channel; screen->cur_ctx = NULL; chan->user_private = screen; chan->flush_notify = nvfx_channel_flush_notify; pscreen->winsys = ws; pscreen->destroy = nvfx_screen_destroy; pscreen->get_param = nvfx_screen_get_param; pscreen->get_shader_param = nvfx_screen_get_shader_param; pscreen->get_paramf = nvfx_screen_get_paramf; pscreen->is_format_supported = nvfx_screen_is_format_supported; pscreen->context_create = nvfx_create; switch (dev->chipset & 0xf0) { case 0x30: if (NV30_3D_CHIPSET_3X_MASK & (1 << (dev->chipset & 0x0f))) eng3d_class = NV30_3D; else if (NV34_3D_CHIPSET_3X_MASK & (1 << (dev->chipset & 0x0f))) eng3d_class = NV34_3D; else if (NV35_3D_CHIPSET_3X_MASK & (1 << (dev->chipset & 0x0f))) eng3d_class = NV35_3D; break; case 0x40: if (NV4X_GRCLASS4097_CHIPSETS & (1 << (dev->chipset & 0x0f))) eng3d_class = NV40_3D; else if (NV4X_GRCLASS4497_CHIPSETS & (1 << (dev->chipset & 0x0f))) eng3d_class = NV44_3D; screen->is_nv4x = ~0; break; case 0x60: if (NV6X_GRCLASS4497_CHIPSETS & (1 << (dev->chipset & 0x0f))) eng3d_class = NV44_3D; screen->is_nv4x = ~0; break; } if (!eng3d_class) { NOUVEAU_ERR("Unknown nv3x/nv4x chipset: nv%02x\n", dev->chipset); return NULL; } screen->advertise_npot = !!screen->is_nv4x; screen->advertise_blend_equation_separate = !!screen->is_nv4x; screen->use_nv4x = screen->is_nv4x; if(screen->is_nv4x) { if(debug_get_bool_option("NVFX_SIMULATE_NV30", FALSE)) screen->use_nv4x = 0; if(!debug_get_bool_option("NVFX_NPOT", TRUE)) screen->advertise_npot = 0; if(!debug_get_bool_option("NVFX_BLEND_EQ_SEP", TRUE)) screen->advertise_blend_equation_separate = 0; } screen->force_swtnl = debug_get_bool_option("NVFX_SWTNL", FALSE); screen->trace_draw = debug_get_bool_option("NVFX_TRACE_DRAW", FALSE); screen->buffer_allocation_cost = debug_get_num_option("NVFX_BUFFER_ALLOCATION_COST", 16384); screen->inline_cost_per_hardware_cost = atof(debug_get_option("NVFX_INLINE_COST_PER_HARDWARE_COST", "1.0")); screen->static_reuse_threshold = atof(debug_get_option("NVFX_STATIC_REUSE_THRESHOLD", "2.0")); /* We don't advertise these by default because filtering and blending doesn't work as * it should, due to several restrictions. * The only exception is fp16 on nv40. */ screen->advertise_fp16 = debug_get_bool_option("NVFX_FP16", !!screen->use_nv4x); screen->advertise_fp32 = debug_get_bool_option("NVFX_FP32", 0); screen->vertex_buffer_reloc_flags = nvfx_screen_get_vertex_buffer_flags(screen); /* surely both nv3x and nv44 support index buffers too: find out how and test that */ if(eng3d_class == NV40_3D) screen->index_buffer_reloc_flags = screen->vertex_buffer_reloc_flags; if(!screen->force_swtnl && screen->vertex_buffer_reloc_flags == screen->index_buffer_reloc_flags) screen->base.vertex_buffer_flags = screen->base.index_buffer_flags = screen->vertex_buffer_reloc_flags; nvfx_screen_init_resource_functions(pscreen); ret = nouveau_grobj_alloc(chan, 0xbeef3097, eng3d_class, &screen->eng3d); if (ret) { NOUVEAU_ERR("Error creating 3D object: %d\n", ret); return FALSE; } /* 2D engine setup */ nvfx_screen_surface_init(pscreen); /* Notifier for sync purposes */ ret = nouveau_notifier_alloc(chan, 0xbeef0301, 1, &screen->sync); if (ret) { NOUVEAU_ERR("Error creating notifier object: %d\n", ret); nvfx_screen_destroy(pscreen); return NULL; } /* Query objects */ for(i = 0; i < sizeof(query_sizes) / sizeof(query_sizes[0]); ++i) { ret = nouveau_notifier_alloc(chan, 0xbeef0302, query_sizes[i], &screen->query); if(!ret) break; } if (ret) { NOUVEAU_ERR("Error initialising query objects: %d\n", ret); nvfx_screen_destroy(pscreen); return NULL; } ret = nouveau_resource_init(&screen->query_heap, 0, query_sizes[i]); if (ret) { NOUVEAU_ERR("Error initialising query object heap: %d\n", ret); nvfx_screen_destroy(pscreen); return NULL; } LIST_INITHEAD(&screen->query_list); /* Vtxprog resources */ if (nouveau_resource_init(&screen->vp_exec_heap, 0, screen->use_nv4x ? 512 : 256) || nouveau_resource_init(&screen->vp_data_heap, 0, screen->use_nv4x ? 468 : 256)) { nvfx_screen_destroy(pscreen); return NULL; } BIND_RING(chan, screen->eng3d, 7); /* Static eng3d initialisation */ /* note that we just started using the channel, so we must have space in the pushbuffer */ OUT_RING(chan, RING_3D(NV30_3D_DMA_NOTIFY, 1)); OUT_RING(chan, screen->sync->handle); OUT_RING(chan, RING_3D(NV30_3D_DMA_TEXTURE0, 2)); OUT_RING(chan, chan->vram->handle); OUT_RING(chan, chan->gart->handle); OUT_RING(chan, RING_3D(NV30_3D_DMA_COLOR1, 1)); OUT_RING(chan, chan->vram->handle); OUT_RING(chan, RING_3D(NV30_3D_DMA_COLOR0, 2)); OUT_RING(chan, chan->vram->handle); OUT_RING(chan, chan->vram->handle); OUT_RING(chan, RING_3D(NV30_3D_DMA_VTXBUF0, 2)); OUT_RING(chan, chan->vram->handle); OUT_RING(chan, chan->gart->handle); OUT_RING(chan, RING_3D(NV30_3D_DMA_FENCE, 2)); OUT_RING(chan, 0); OUT_RING(chan, screen->query->handle); OUT_RING(chan, RING_3D(NV30_3D_DMA_UNK1AC, 2)); OUT_RING(chan, chan->vram->handle); OUT_RING(chan, chan->vram->handle); if(!screen->is_nv4x) nv30_screen_init(screen); else nv40_screen_init(screen); return pscreen; }
struct pipe_screen * nv50_screen_create(struct nouveau_device *dev) { struct nv50_screen *screen; struct pipe_screen *pscreen; struct nouveau_object *chan; uint64_t value; uint32_t tesla_class; unsigned stack_size; int ret; screen = CALLOC_STRUCT(nv50_screen); if (!screen) return NULL; pscreen = &screen->base.base; ret = nouveau_screen_init(&screen->base, dev); if (ret) { NOUVEAU_ERR("nouveau_screen_init failed: %d\n", ret); goto fail; } /* TODO: Prevent FIFO prefetch before transfer of index buffers and * admit them to VRAM. */ screen->base.vidmem_bindings |= PIPE_BIND_CONSTANT_BUFFER | PIPE_BIND_VERTEX_BUFFER; screen->base.sysmem_bindings |= PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER; screen->base.pushbuf->user_priv = screen; screen->base.pushbuf->rsvd_kick = 5; chan = screen->base.channel; pscreen->destroy = nv50_screen_destroy; pscreen->context_create = nv50_create; pscreen->is_format_supported = nv50_screen_is_format_supported; pscreen->get_param = nv50_screen_get_param; pscreen->get_shader_param = nv50_screen_get_shader_param; pscreen->get_paramf = nv50_screen_get_paramf; nv50_screen_init_resource_functions(pscreen); if (screen->base.device->chipset < 0x84 || debug_get_bool_option("NOUVEAU_PMPEG", FALSE)) { /* PMPEG */ nouveau_screen_init_vdec(&screen->base); } else if (screen->base.device->chipset < 0x98 || screen->base.device->chipset == 0xa0) { /* VP2 */ screen->base.base.get_video_param = nv84_screen_get_video_param; screen->base.base.is_video_format_supported = nv84_screen_video_supported; } else { /* VP3/4 */ screen->base.base.get_video_param = nouveau_vp3_screen_get_video_param; screen->base.base.is_video_format_supported = nouveau_vp3_screen_video_supported; } ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0, 4096, NULL, &screen->fence.bo); if (ret) { NOUVEAU_ERR("Failed to allocate fence bo: %d\n", ret); goto fail; } nouveau_bo_map(screen->fence.bo, 0, NULL); screen->fence.map = screen->fence.bo->map; screen->base.fence.emit = nv50_screen_fence_emit; screen->base.fence.update = nv50_screen_fence_update; ret = nouveau_object_new(chan, 0xbeef0301, NOUVEAU_NOTIFIER_CLASS, &(struct nv04_notify){ .length = 32 }, sizeof(struct nv04_notify), &screen->sync);
void nvc0_launch_grid(struct pipe_context *pipe, const uint *block_layout, const uint *grid_layout, uint32_t label, const void *input) { struct nvc0_context *nvc0 = nvc0_context(pipe); struct nouveau_pushbuf *push = nvc0->base.pushbuf; struct nvc0_program *cp = nvc0->compprog; unsigned s, i; int ret; ret = !nvc0_compute_state_validate(nvc0); if (ret) goto out; nvc0_compute_upload_input(nvc0, input); BEGIN_NVC0(push, NVC0_COMPUTE(CP_START_ID), 1); PUSH_DATA (push, nvc0_program_symbol_offset(cp, label)); BEGIN_NVC0(push, NVC0_COMPUTE(LOCAL_POS_ALLOC), 3); PUSH_DATA (push, align(cp->cp.lmem_size, 0x10)); PUSH_DATA (push, 0); PUSH_DATA (push, 0x800); /* WARP_CSTACK_SIZE */ BEGIN_NVC0(push, NVC0_COMPUTE(SHARED_SIZE), 3); PUSH_DATA (push, align(cp->cp.smem_size, 0x100)); PUSH_DATA (push, block_layout[0] * block_layout[1] * block_layout[2]); PUSH_DATA (push, cp->num_barriers); BEGIN_NVC0(push, NVC0_COMPUTE(CP_GPR_ALLOC), 1); PUSH_DATA (push, cp->num_gprs); /* grid/block setup */ BEGIN_NVC0(push, NVC0_COMPUTE(GRIDDIM_YX), 2); PUSH_DATA (push, (grid_layout[1] << 16) | grid_layout[0]); PUSH_DATA (push, grid_layout[2]); BEGIN_NVC0(push, NVC0_COMPUTE(BLOCKDIM_YX), 2); PUSH_DATA (push, (block_layout[1] << 16) | block_layout[0]); PUSH_DATA (push, block_layout[2]); /* launch preliminary setup */ BEGIN_NVC0(push, NVC0_COMPUTE(GRIDID), 1); PUSH_DATA (push, 0x1); BEGIN_NVC0(push, SUBC_COMPUTE(0x036c), 1); PUSH_DATA (push, 0); BEGIN_NVC0(push, NVC0_COMPUTE(FLUSH), 1); PUSH_DATA (push, NVC0_COMPUTE_FLUSH_GLOBAL | NVC0_COMPUTE_FLUSH_UNK8); /* kernel launching */ BEGIN_NVC0(push, NVC0_COMPUTE(COMPUTE_BEGIN), 1); PUSH_DATA (push, 0); BEGIN_NVC0(push, SUBC_COMPUTE(0x0a08), 1); PUSH_DATA (push, 0); BEGIN_NVC0(push, NVC0_COMPUTE(LAUNCH), 1); PUSH_DATA (push, 0x1000); BEGIN_NVC0(push, NVC0_COMPUTE(COMPUTE_END), 1); PUSH_DATA (push, 0); BEGIN_NVC0(push, SUBC_COMPUTE(0x0360), 1); PUSH_DATA (push, 0x1); /* rebind all the 3D constant buffers * (looks like binding a CB on COMPUTE clobbers 3D state) */ nvc0->dirty |= NVC0_NEW_CONSTBUF; for (s = 0; s < 6; s++) { for (i = 0; i < NVC0_MAX_PIPE_CONSTBUFS; i++) if (nvc0->constbuf[s][i].u.buf) nvc0->constbuf_dirty[s] |= 1 << i; } memset(nvc0->state.uniform_buffer_bound, 0, sizeof(nvc0->state.uniform_buffer_bound)); out: if (ret) NOUVEAU_ERR("Failed to launch grid !\n"); }