static void nv50_screen_init_hwctx(struct nv50_screen *screen) { struct nouveau_pushbuf *push = screen->base.pushbuf; struct nv04_fifo *fifo; unsigned i; fifo = (struct nv04_fifo *)screen->base.channel->data; BEGIN_NV04(push, SUBC_M2MF(NV01_SUBCHAN_OBJECT), 1); PUSH_DATA (push, screen->m2mf->handle); BEGIN_NV04(push, SUBC_M2MF(NV03_M2MF_DMA_NOTIFY), 3); PUSH_DATA (push, screen->sync->handle); PUSH_DATA (push, fifo->vram); PUSH_DATA (push, fifo->vram); BEGIN_NV04(push, SUBC_2D(NV01_SUBCHAN_OBJECT), 1); PUSH_DATA (push, screen->eng2d->handle); BEGIN_NV04(push, NV50_2D(DMA_NOTIFY), 4); PUSH_DATA (push, screen->sync->handle); PUSH_DATA (push, fifo->vram); PUSH_DATA (push, fifo->vram); PUSH_DATA (push, fifo->vram); BEGIN_NV04(push, NV50_2D(OPERATION), 1); PUSH_DATA (push, NV50_2D_OPERATION_SRCCOPY); BEGIN_NV04(push, NV50_2D(CLIP_ENABLE), 1); PUSH_DATA (push, 0); BEGIN_NV04(push, NV50_2D(COLOR_KEY_ENABLE), 1); PUSH_DATA (push, 0); BEGIN_NV04(push, SUBC_2D(0x0888), 1); PUSH_DATA (push, 1); BEGIN_NV04(push, NV50_2D(COND_MODE), 1); PUSH_DATA (push, NV50_2D_COND_MODE_ALWAYS); BEGIN_NV04(push, SUBC_3D(NV01_SUBCHAN_OBJECT), 1); PUSH_DATA (push, screen->tesla->handle); BEGIN_NV04(push, NV50_3D(COND_MODE), 1); PUSH_DATA (push, NV50_3D_COND_MODE_ALWAYS); BEGIN_NV04(push, NV50_3D(DMA_NOTIFY), 1); PUSH_DATA (push, screen->sync->handle); BEGIN_NV04(push, NV50_3D(DMA_ZETA), 11); for (i = 0; i < 11; ++i) PUSH_DATA(push, fifo->vram); BEGIN_NV04(push, NV50_3D(DMA_COLOR(0)), NV50_3D_DMA_COLOR__LEN); for (i = 0; i < NV50_3D_DMA_COLOR__LEN; ++i) PUSH_DATA(push, fifo->vram); BEGIN_NV04(push, NV50_3D(REG_MODE), 1); PUSH_DATA (push, NV50_3D_REG_MODE_STRIPED); BEGIN_NV04(push, NV50_3D(UNK1400_LANES), 1); PUSH_DATA (push, 0xf); if (debug_get_bool_option("NOUVEAU_SHADER_WATCHDOG", TRUE)) { BEGIN_NV04(push, NV50_3D(WATCHDOG_TIMER), 1); PUSH_DATA (push, 0x18); } BEGIN_NV04(push, NV50_3D(RT_CONTROL), 1); PUSH_DATA (push, 1); BEGIN_NV04(push, NV50_3D(CSAA_ENABLE), 1); PUSH_DATA (push, 0); BEGIN_NV04(push, NV50_3D(MULTISAMPLE_ENABLE), 1); PUSH_DATA (push, 0); BEGIN_NV04(push, NV50_3D(MULTISAMPLE_MODE), 1); PUSH_DATA (push, NV50_3D_MULTISAMPLE_MODE_MS1); BEGIN_NV04(push, NV50_3D(MULTISAMPLE_CTRL), 1); PUSH_DATA (push, 0); BEGIN_NV04(push, NV50_3D(LINE_LAST_PIXEL), 1); PUSH_DATA (push, 0); BEGIN_NV04(push, NV50_3D(BLEND_SEPARATE_ALPHA), 1); PUSH_DATA (push, 1); if (screen->tesla->oclass >= NVA0_3D_CLASS) { BEGIN_NV04(push, SUBC_3D(NVA0_3D_TEX_MISC), 1); PUSH_DATA (push, NVA0_3D_TEX_MISC_SEAMLESS_CUBE_MAP); } BEGIN_NV04(push, NV50_3D(SCREEN_Y_CONTROL), 1); PUSH_DATA (push, 0); BEGIN_NV04(push, NV50_3D(WINDOW_OFFSET_X), 2); PUSH_DATA (push, 0); PUSH_DATA (push, 0); BEGIN_NV04(push, NV50_3D(ZCULL_REGION), 1); PUSH_DATA (push, 0x3f); BEGIN_NV04(push, NV50_3D(VP_ADDRESS_HIGH), 2); PUSH_DATAh(push, screen->code->offset + (0 << NV50_CODE_BO_SIZE_LOG2)); PUSH_DATA (push, screen->code->offset + (0 << NV50_CODE_BO_SIZE_LOG2)); BEGIN_NV04(push, NV50_3D(FP_ADDRESS_HIGH), 2); PUSH_DATAh(push, screen->code->offset + (1 << NV50_CODE_BO_SIZE_LOG2)); PUSH_DATA (push, screen->code->offset + (1 << NV50_CODE_BO_SIZE_LOG2)); BEGIN_NV04(push, NV50_3D(GP_ADDRESS_HIGH), 2); PUSH_DATAh(push, screen->code->offset + (2 << NV50_CODE_BO_SIZE_LOG2)); PUSH_DATA (push, screen->code->offset + (2 << NV50_CODE_BO_SIZE_LOG2)); BEGIN_NV04(push, NV50_3D(LOCAL_ADDRESS_HIGH), 3); PUSH_DATAh(push, screen->tls_bo->offset); PUSH_DATA (push, screen->tls_bo->offset); PUSH_DATA (push, util_logbase2(screen->cur_tls_space / 8)); BEGIN_NV04(push, NV50_3D(STACK_ADDRESS_HIGH), 3); PUSH_DATAh(push, screen->stack_bo->offset); PUSH_DATA (push, screen->stack_bo->offset); PUSH_DATA (push, 4); BEGIN_NV04(push, NV50_3D(CB_DEF_ADDRESS_HIGH), 3); PUSH_DATAh(push, screen->uniforms->offset + (0 << 16)); PUSH_DATA (push, screen->uniforms->offset + (0 << 16)); PUSH_DATA (push, (NV50_CB_PVP << 16) | 0x0000); BEGIN_NV04(push, NV50_3D(CB_DEF_ADDRESS_HIGH), 3); PUSH_DATAh(push, screen->uniforms->offset + (1 << 16)); PUSH_DATA (push, screen->uniforms->offset + (1 << 16)); PUSH_DATA (push, (NV50_CB_PGP << 16) | 0x0000); BEGIN_NV04(push, NV50_3D(CB_DEF_ADDRESS_HIGH), 3); PUSH_DATAh(push, screen->uniforms->offset + (2 << 16)); PUSH_DATA (push, screen->uniforms->offset + (2 << 16)); PUSH_DATA (push, (NV50_CB_PFP << 16) | 0x0000); BEGIN_NV04(push, NV50_3D(CB_DEF_ADDRESS_HIGH), 3); PUSH_DATAh(push, screen->uniforms->offset + (3 << 16)); PUSH_DATA (push, screen->uniforms->offset + (3 << 16)); PUSH_DATA (push, (NV50_CB_AUX << 16) | (NV50_CB_AUX_SIZE & 0xffff)); BEGIN_NI04(push, NV50_3D(SET_PROGRAM_CB), 3); PUSH_DATA (push, (NV50_CB_AUX << 12) | 0xf01); PUSH_DATA (push, (NV50_CB_AUX << 12) | 0xf21); PUSH_DATA (push, (NV50_CB_AUX << 12) | 0xf31); /* return { 0.0, 0.0, 0.0, 0.0 } on out-of-bounds vtxbuf access */ BEGIN_NV04(push, NV50_3D(CB_ADDR), 1); PUSH_DATA (push, (NV50_CB_AUX_RUNOUT_OFFSET << (8 - 2)) | NV50_CB_AUX); BEGIN_NI04(push, NV50_3D(CB_DATA(0)), 4); PUSH_DATAf(push, 0.0f); PUSH_DATAf(push, 0.0f); PUSH_DATAf(push, 0.0f); PUSH_DATAf(push, 0.0f); BEGIN_NV04(push, NV50_3D(VERTEX_RUNOUT_ADDRESS_HIGH), 2); PUSH_DATAh(push, screen->uniforms->offset + (3 << 16) + NV50_CB_AUX_RUNOUT_OFFSET); PUSH_DATA (push, screen->uniforms->offset + (3 << 16) + NV50_CB_AUX_RUNOUT_OFFSET); nv50_upload_ms_info(push); /* max TIC (bits 4:8) & TSC bindings, per program type */ for (i = 0; i < 3; ++i) { BEGIN_NV04(push, NV50_3D(TEX_LIMITS(i)), 1); PUSH_DATA (push, 0x54); } BEGIN_NV04(push, NV50_3D(TIC_ADDRESS_HIGH), 3); PUSH_DATAh(push, screen->txc->offset); PUSH_DATA (push, screen->txc->offset); PUSH_DATA (push, NV50_TIC_MAX_ENTRIES - 1); BEGIN_NV04(push, NV50_3D(TSC_ADDRESS_HIGH), 3); PUSH_DATAh(push, screen->txc->offset + 65536); PUSH_DATA (push, screen->txc->offset + 65536); PUSH_DATA (push, NV50_TSC_MAX_ENTRIES - 1); BEGIN_NV04(push, NV50_3D(LINKED_TSC), 1); PUSH_DATA (push, 0); BEGIN_NV04(push, NV50_3D(CLIP_RECTS_EN), 1); PUSH_DATA (push, 0); BEGIN_NV04(push, NV50_3D(CLIP_RECTS_MODE), 1); PUSH_DATA (push, NV50_3D_CLIP_RECTS_MODE_INSIDE_ANY); BEGIN_NV04(push, NV50_3D(CLIP_RECT_HORIZ(0)), 8 * 2); for (i = 0; i < 8 * 2; ++i) PUSH_DATA(push, 0); BEGIN_NV04(push, NV50_3D(CLIPID_ENABLE), 1); PUSH_DATA (push, 0); BEGIN_NV04(push, NV50_3D(VIEWPORT_TRANSFORM_EN), 1); PUSH_DATA (push, 1); for (i = 0; i < NV50_MAX_VIEWPORTS; i++) { BEGIN_NV04(push, NV50_3D(DEPTH_RANGE_NEAR(i)), 2); PUSH_DATAf(push, 0.0f); PUSH_DATAf(push, 1.0f); BEGIN_NV04(push, NV50_3D(VIEWPORT_HORIZ(i)), 2); PUSH_DATA (push, 8192 << 16); PUSH_DATA (push, 8192 << 16); } BEGIN_NV04(push, NV50_3D(VIEW_VOLUME_CLIP_CTRL), 1); #ifdef NV50_SCISSORS_CLIPPING PUSH_DATA (push, 0x0000); #else PUSH_DATA (push, 0x1080); #endif BEGIN_NV04(push, NV50_3D(CLEAR_FLAGS), 1); PUSH_DATA (push, NV50_3D_CLEAR_FLAGS_CLEAR_RECT_VIEWPORT); /* We use scissors instead of exact view volume clipping, * so they're always enabled. */ for (i = 0; i < NV50_MAX_VIEWPORTS; i++) { BEGIN_NV04(push, NV50_3D(SCISSOR_ENABLE(i)), 3); PUSH_DATA (push, 1); PUSH_DATA (push, 8192 << 16); PUSH_DATA (push, 8192 << 16); } BEGIN_NV04(push, NV50_3D(RASTERIZE_ENABLE), 1); PUSH_DATA (push, 1); BEGIN_NV04(push, NV50_3D(POINT_RASTER_RULES), 1); PUSH_DATA (push, NV50_3D_POINT_RASTER_RULES_OGL); BEGIN_NV04(push, NV50_3D(FRAG_COLOR_CLAMP_EN), 1); PUSH_DATA (push, 0x11111111); BEGIN_NV04(push, NV50_3D(EDGEFLAG), 1); PUSH_DATA (push, 1); PUSH_KICK (push); }
static int nvc0_2d_texture_set(struct nouveau_pushbuf *push, boolean dst, struct nv50_miptree *mt, unsigned level, unsigned layer, enum pipe_format pformat, boolean dst_src_pformat_equal) { struct nouveau_bo *bo = mt->base.bo; uint32_t width, height, depth; uint32_t format; uint32_t mthd = dst ? NV50_2D_DST_FORMAT : NV50_2D_SRC_FORMAT; uint32_t offset = mt->level[level].offset; format = nvc0_2d_format(pformat, dst, dst_src_pformat_equal); if (!format) { NOUVEAU_ERR("invalid/unsupported surface format: %s\n", util_format_name(pformat)); return 1; } width = u_minify(mt->base.base.width0, level) << mt->ms_x; height = u_minify(mt->base.base.height0, level) << mt->ms_y; depth = u_minify(mt->base.base.depth0, level); /* layer has to be < depth, and depth > tile depth / 2 */ if (!mt->layout_3d) { offset += mt->layer_stride * layer; layer = 0; depth = 1; } else if (!dst) { offset += nvc0_mt_zslice_offset(mt, level, layer); layer = 0; } if (!nouveau_bo_memtype(bo)) { BEGIN_NVC0(push, SUBC_2D(mthd), 2); PUSH_DATA (push, format); PUSH_DATA (push, 1); BEGIN_NVC0(push, SUBC_2D(mthd + 0x14), 5); PUSH_DATA (push, mt->level[level].pitch); PUSH_DATA (push, width); PUSH_DATA (push, height); PUSH_DATAh(push, bo->offset + offset); PUSH_DATA (push, bo->offset + offset); } else { BEGIN_NVC0(push, SUBC_2D(mthd), 5); PUSH_DATA (push, format); PUSH_DATA (push, 0); PUSH_DATA (push, mt->level[level].tile_mode); PUSH_DATA (push, depth); PUSH_DATA (push, layer); BEGIN_NVC0(push, SUBC_2D(mthd + 0x18), 4); PUSH_DATA (push, width); PUSH_DATA (push, height); PUSH_DATAh(push, bo->offset + offset); PUSH_DATA (push, bo->offset + offset); } #if 0 if (dst) { BEGIN_NVC0(push, SUBC_2D(NVC0_2D_CLIP_X), 4); PUSH_DATA (push, 0); PUSH_DATA (push, 0); PUSH_DATA (push, width); PUSH_DATA (push, height); } #endif return 0; }
static void nv50_render_condition(struct pipe_context *pipe, struct pipe_query *pq, boolean condition, uint mode) { struct nv50_context *nv50 = nv50_context(pipe); struct nouveau_pushbuf *push = nv50->base.pushbuf; struct nv50_query *q; uint32_t cond; boolean wait = mode != PIPE_RENDER_COND_NO_WAIT && mode != PIPE_RENDER_COND_BY_REGION_NO_WAIT; if (!pq) { cond = NV50_3D_COND_MODE_ALWAYS; } else { q = nv50_query(pq); /* NOTE: comparison of 2 queries only works if both have completed */ switch (q->type) { case PIPE_QUERY_SO_OVERFLOW_PREDICATE: cond = condition ? NV50_3D_COND_MODE_EQUAL : NV50_3D_COND_MODE_NOT_EQUAL; wait = TRUE; break; case PIPE_QUERY_OCCLUSION_COUNTER: case PIPE_QUERY_OCCLUSION_PREDICATE: if (likely(!condition)) { /* XXX: Placeholder, handle nesting here if available */ if (unlikely(false)) cond = wait ? NV50_3D_COND_MODE_NOT_EQUAL : NV50_3D_COND_MODE_ALWAYS; else cond = NV50_3D_COND_MODE_RES_NON_ZERO; } else { cond = wait ? NV50_3D_COND_MODE_EQUAL : NV50_3D_COND_MODE_ALWAYS; } break; default: assert(!"render condition query not a predicate"); cond = NV50_3D_COND_MODE_ALWAYS; break; } } nv50->cond_query = pq; nv50->cond_cond = condition; nv50->cond_condmode = cond; nv50->cond_mode = mode; if (!pq) { PUSH_SPACE(push, 2); BEGIN_NV04(push, NV50_3D(COND_MODE), 1); PUSH_DATA (push, cond); return; } PUSH_SPACE(push, 9); if (wait) { BEGIN_NV04(push, SUBC_3D(NV50_GRAPH_SERIALIZE), 1); PUSH_DATA (push, 0); } PUSH_REFN (push, q->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD); BEGIN_NV04(push, NV50_3D(COND_ADDRESS_HIGH), 3); PUSH_DATAh(push, q->bo->offset + q->offset); PUSH_DATA (push, q->bo->offset + q->offset); PUSH_DATA (push, cond); BEGIN_NV04(push, NV50_2D(COND_ADDRESS_HIGH), 2); PUSH_DATAh(push, q->bo->offset + q->offset); PUSH_DATA (push, q->bo->offset + q->offset); }
static void nvc0_m2mf_transfer_rect(struct nvc0_context *nvc0, const struct nv50_m2mf_rect *dst, const struct nv50_m2mf_rect *src, uint32_t nblocksx, uint32_t nblocksy) { struct nouveau_pushbuf *push = nvc0->base.pushbuf; struct nouveau_bufctx *bctx = nvc0->bufctx; const int cpp = dst->cpp; uint32_t src_ofst = src->base; uint32_t dst_ofst = dst->base; uint32_t height = nblocksy; uint32_t sy = src->y; uint32_t dy = dst->y; uint32_t exec = (1 << 20); assert(dst->cpp == src->cpp); nouveau_bufctx_refn(bctx, 0, src->bo, src->domain | NOUVEAU_BO_RD); nouveau_bufctx_refn(bctx, 0, dst->bo, dst->domain | NOUVEAU_BO_WR); nouveau_pushbuf_bufctx(push, bctx); nouveau_pushbuf_validate(push); if (nouveau_bo_memtype(src->bo)) { BEGIN_NVC0(push, NVC0_M2MF(TILING_MODE_IN), 5); PUSH_DATA (push, src->tile_mode); PUSH_DATA (push, src->width * cpp); PUSH_DATA (push, src->height); PUSH_DATA (push, src->depth); PUSH_DATA (push, src->z); } else { src_ofst += src->y * src->pitch + src->x * cpp; BEGIN_NVC0(push, NVC0_M2MF(PITCH_IN), 1); PUSH_DATA (push, src->width * cpp); exec |= NVC0_M2MF_EXEC_LINEAR_IN; } if (nouveau_bo_memtype(dst->bo)) { BEGIN_NVC0(push, NVC0_M2MF(TILING_MODE_OUT), 5); PUSH_DATA (push, dst->tile_mode); PUSH_DATA (push, dst->width * cpp); PUSH_DATA (push, dst->height); PUSH_DATA (push, dst->depth); PUSH_DATA (push, dst->z); } else { dst_ofst += dst->y * dst->pitch + dst->x * cpp; BEGIN_NVC0(push, NVC0_M2MF(PITCH_OUT), 1); PUSH_DATA (push, dst->width * cpp); exec |= NVC0_M2MF_EXEC_LINEAR_OUT; } while (height) { int line_count = height > 2047 ? 2047 : height; BEGIN_NVC0(push, NVC0_M2MF(OFFSET_IN_HIGH), 2); PUSH_DATAh(push, src->bo->offset + src_ofst); PUSH_DATA (push, src->bo->offset + src_ofst); BEGIN_NVC0(push, NVC0_M2MF(OFFSET_OUT_HIGH), 2); PUSH_DATAh(push, dst->bo->offset + dst_ofst); PUSH_DATA (push, dst->bo->offset + dst_ofst); if (!(exec & NVC0_M2MF_EXEC_LINEAR_IN)) { BEGIN_NVC0(push, NVC0_M2MF(TILING_POSITION_IN_X), 2); PUSH_DATA (push, src->x * cpp); PUSH_DATA (push, sy); } else { src_ofst += line_count * src->pitch; } if (!(exec & NVC0_M2MF_EXEC_LINEAR_OUT)) { BEGIN_NVC0(push, NVC0_M2MF(TILING_POSITION_OUT_X), 2); PUSH_DATA (push, dst->x * cpp); PUSH_DATA (push, dy); } else { dst_ofst += line_count * dst->pitch; } BEGIN_NVC0(push, NVC0_M2MF(LINE_LENGTH_IN), 2); PUSH_DATA (push, nblocksx * cpp); PUSH_DATA (push, line_count); BEGIN_NVC0(push, NVC0_M2MF(EXEC), 1); PUSH_DATA (push, exec); height -= line_count; sy += line_count; dy += line_count; } nouveau_bufctx_reset(bctx, 0); }
int nv50_screen_compute_setup(struct nv50_screen *screen, struct nouveau_pushbuf *push) { struct nouveau_device *dev = screen->base.device; struct nouveau_object *chan = screen->base.channel; struct nv04_fifo *fifo = (struct nv04_fifo *)chan->data; unsigned obj_class; int i, ret; switch (dev->chipset & 0xf0) { case 0x50: case 0x80: case 0x90: obj_class = NV50_COMPUTE_CLASS; break; case 0xa0: switch (dev->chipset) { case 0xa3: case 0xa5: case 0xa8: obj_class = NVA3_COMPUTE_CLASS; break; default: obj_class = NV50_COMPUTE_CLASS; break; } break; default: NOUVEAU_ERR("unsupported chipset: NV%02x\n", dev->chipset); return -1; } ret = nouveau_object_new(chan, 0xbeef50c0, obj_class, NULL, 0, &screen->compute); if (ret) return ret; BEGIN_NV04(push, SUBC_COMPUTE(NV01_SUBCHAN_OBJECT), 1); PUSH_DATA (push, screen->compute->handle); BEGIN_NV04(push, NV50_COMPUTE(UNK02A0), 1); PUSH_DATA (push, 1); BEGIN_NV04(push, NV50_COMPUTE(DMA_STACK), 1); PUSH_DATA (push, fifo->vram); BEGIN_NV04(push, NV50_COMPUTE(STACK_ADDRESS_HIGH), 2); PUSH_DATAh(push, screen->stack_bo->offset); PUSH_DATA (push, screen->stack_bo->offset); BEGIN_NV04(push, NV50_COMPUTE(STACK_SIZE_LOG), 1); PUSH_DATA (push, 4); BEGIN_NV04(push, NV50_COMPUTE(UNK0290), 1); PUSH_DATA (push, 1); BEGIN_NV04(push, NV50_COMPUTE(LANES32_ENABLE), 1); PUSH_DATA (push, 1); BEGIN_NV04(push, NV50_COMPUTE(REG_MODE), 1); PUSH_DATA (push, NV50_COMPUTE_REG_MODE_STRIPED); BEGIN_NV04(push, NV50_COMPUTE(UNK0384), 1); PUSH_DATA (push, 0x100); BEGIN_NV04(push, NV50_COMPUTE(DMA_GLOBAL), 1); PUSH_DATA (push, fifo->vram); for (i = 0; i < 15; i++) { BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_ADDRESS_HIGH(i)), 2); PUSH_DATA (push, 0); PUSH_DATA (push, 0); BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_LIMIT(i)), 1); PUSH_DATA (push, 0); BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_MODE(i)), 1); PUSH_DATA (push, NV50_COMPUTE_GLOBAL_MODE_LINEAR); } BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_ADDRESS_HIGH(15)), 2); PUSH_DATA (push, 0); PUSH_DATA (push, 0); BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_LIMIT(15)), 1); PUSH_DATA (push, ~0); BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_MODE(15)), 1); PUSH_DATA (push, NV50_COMPUTE_GLOBAL_MODE_LINEAR); BEGIN_NV04(push, NV50_COMPUTE(LOCAL_WARPS_LOG_ALLOC), 1); PUSH_DATA (push, 7); BEGIN_NV04(push, NV50_COMPUTE(LOCAL_WARPS_NO_CLAMP), 1); PUSH_DATA (push, 1); BEGIN_NV04(push, NV50_COMPUTE(STACK_WARPS_LOG_ALLOC), 1); PUSH_DATA (push, 7); BEGIN_NV04(push, NV50_COMPUTE(STACK_WARPS_NO_CLAMP), 1); PUSH_DATA (push, 1); BEGIN_NV04(push, NV50_COMPUTE(USER_PARAM_COUNT), 1); PUSH_DATA (push, 0); BEGIN_NV04(push, NV50_COMPUTE(DMA_TEXTURE), 1); PUSH_DATA (push, fifo->vram); BEGIN_NV04(push, NV50_COMPUTE(TEX_LIMITS), 1); PUSH_DATA (push, 0x54); BEGIN_NV04(push, NV50_COMPUTE(LINKED_TSC), 1); PUSH_DATA (push, 0); BEGIN_NV04(push, NV50_COMPUTE(DMA_TIC), 1); PUSH_DATA (push, fifo->vram); BEGIN_NV04(push, NV50_COMPUTE(TIC_ADDRESS_HIGH), 3); PUSH_DATAh(push, screen->txc->offset); PUSH_DATA (push, screen->txc->offset); PUSH_DATA (push, NV50_TIC_MAX_ENTRIES - 1); BEGIN_NV04(push, NV50_COMPUTE(DMA_TSC), 1); PUSH_DATA (push, fifo->vram); BEGIN_NV04(push, NV50_COMPUTE(TSC_ADDRESS_HIGH), 3); PUSH_DATAh(push, screen->txc->offset + 65536); PUSH_DATA (push, screen->txc->offset + 65536); PUSH_DATA (push, NV50_TSC_MAX_ENTRIES - 1); BEGIN_NV04(push, NV50_COMPUTE(DMA_CODE_CB), 1); PUSH_DATA (push, fifo->vram); BEGIN_NV04(push, NV50_COMPUTE(DMA_LOCAL), 1); PUSH_DATA (push, fifo->vram); BEGIN_NV04(push, NV50_COMPUTE(LOCAL_ADDRESS_HIGH), 2); PUSH_DATAh(push, screen->tls_bo->offset + 65536); PUSH_DATA (push, screen->tls_bo->offset + 65536); BEGIN_NV04(push, NV50_COMPUTE(LOCAL_SIZE_LOG), 1); PUSH_DATA (push, util_logbase2((screen->max_tls_space / ONE_TEMP_SIZE) * 2)); return 0; }
struct pipe_video_decoder * nvc0_create_decoder(struct pipe_context *context, enum pipe_video_profile profile, enum pipe_video_entrypoint entrypoint, enum pipe_video_chroma_format chroma_format, unsigned width, unsigned height, unsigned max_references, bool chunked_decode) { struct nouveau_screen *screen = &((struct nvc0_context *)context)->screen->base; struct nvc0_decoder *dec; struct nouveau_pushbuf **push; union nouveau_bo_config cfg; bool kepler = screen->device->chipset >= 0xe0; cfg.nvc0.tile_mode = 0x10; cfg.nvc0.memtype = 0xfe; int ret, i; uint32_t codec = 1, ppp_codec = 3; uint32_t timeout; u32 tmp_size = 0; if (getenv("XVMC_VL")) return vl_create_decoder(context, profile, entrypoint, chroma_format, width, height, max_references, chunked_decode); if (entrypoint != PIPE_VIDEO_ENTRYPOINT_BITSTREAM) { debug_printf("%x\n", entrypoint); return NULL; } dec = CALLOC_STRUCT(nvc0_decoder); if (!dec) return NULL; dec->client = screen->client; if (!kepler) { dec->bsp_idx = 5; dec->vp_idx = 6; dec->ppp_idx = 7; } else { dec->bsp_idx = 2; dec->vp_idx = 2; dec->ppp_idx = 2; } for (i = 0; i < 3; ++i) if (i && !kepler) { dec->channel[i] = dec->channel[0]; dec->pushbuf[i] = dec->pushbuf[0]; } else { void *data; u32 size; struct nvc0_fifo nvc0_args = {}; struct nve0_fifo nve0_args = {}; if (!kepler) { size = sizeof(nvc0_args); data = &nvc0_args; } else { unsigned engine[] = { NVE0_FIFO_ENGINE_BSP, NVE0_FIFO_ENGINE_VP, NVE0_FIFO_ENGINE_PPP }; nve0_args.engine = engine[i]; size = sizeof(nve0_args); data = &nve0_args; } ret = nouveau_object_new(&screen->device->object, 0, NOUVEAU_FIFO_CHANNEL_CLASS, data, size, &dec->channel[i]); if (!ret) ret = nouveau_pushbuf_new(screen->client, dec->channel[i], 4, 32 * 1024, true, &dec->pushbuf[i]); if (ret) break; } push = dec->pushbuf; if (!kepler) { if (!ret) ret = nouveau_object_new(dec->channel[0], 0x390b1, 0x90b1, NULL, 0, &dec->bsp); if (!ret) ret = nouveau_object_new(dec->channel[1], 0x190b2, 0x90b2, NULL, 0, &dec->vp); if (!ret) ret = nouveau_object_new(dec->channel[2], 0x290b3, 0x90b3, NULL, 0, &dec->ppp); } else { if (!ret) ret = nouveau_object_new(dec->channel[0], 0x95b1, 0x95b1, NULL, 0, &dec->bsp); if (!ret) ret = nouveau_object_new(dec->channel[1], 0x95b2, 0x95b2, NULL, 0, &dec->vp); if (!ret) ret = nouveau_object_new(dec->channel[2], 0x90b3, 0x90b3, NULL, 0, &dec->ppp); } if (ret) goto fail; BEGIN_NVC0(push[0], SUBC_BSP(NV01_SUBCHAN_OBJECT), 1); PUSH_DATA (push[0], dec->bsp->handle); BEGIN_NVC0(push[1], SUBC_VP(NV01_SUBCHAN_OBJECT), 1); PUSH_DATA (push[1], dec->vp->handle); BEGIN_NVC0(push[2], SUBC_PPP(NV01_SUBCHAN_OBJECT), 1); PUSH_DATA (push[2], dec->ppp->handle); dec->base.context = context; dec->base.profile = profile; dec->base.entrypoint = entrypoint; dec->base.chroma_format = chroma_format; dec->base.width = width; dec->base.height = height; dec->base.max_references = max_references; dec->base.destroy = nvc0_decoder_destroy; dec->base.flush = nvc0_decoder_flush; dec->base.decode_bitstream = nvc0_decoder_decode_bitstream; dec->base.begin_frame = nvc0_decoder_begin_frame; dec->base.end_frame = nvc0_decoder_end_frame; for (i = 0; i < NVC0_VIDEO_QDEPTH && !ret; ++i) ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM, 0, 1 << 20, &cfg, &dec->bsp_bo[i]); if (!ret) ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM, 0x100, 4 << 20, &cfg, &dec->inter_bo[0]); if (!ret) { if (!kepler) nouveau_bo_ref(dec->inter_bo[0], &dec->inter_bo[1]); else ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM, 0x100, dec->inter_bo[0]->size, &cfg, &dec->inter_bo[1]); } if (ret) goto fail; switch (u_reduce_video_profile(profile)) { case PIPE_VIDEO_CODEC_MPEG12: { codec = 1; assert(max_references <= 2); break; } case PIPE_VIDEO_CODEC_MPEG4: { codec = 4; tmp_size = mb(height)*16 * mb(width)*16; assert(max_references <= 2); break; } case PIPE_VIDEO_CODEC_VC1: { ppp_codec = codec = 2; tmp_size = mb(height)*16 * mb(width)*16; assert(max_references <= 2); break; } case PIPE_VIDEO_CODEC_MPEG4_AVC: { codec = 3; dec->tmp_stride = 16 * mb_half(width) * nvc0_video_align(height) * 3 / 2; tmp_size = dec->tmp_stride * (max_references + 1); assert(max_references <= 16); break; } default: fprintf(stderr, "invalid codec\n"); goto fail; } if (screen->device->chipset < 0xd0) { int fd; char path[PATH_MAX]; ssize_t r; uint32_t *end, endval; ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM, 0, 0x4000, &cfg, &dec->fw_bo); if (!ret) ret = nouveau_bo_map(dec->fw_bo, NOUVEAU_BO_WR, dec->client); if (ret) goto fail; nvc0_video_getpath(profile, path); fd = open(path, O_RDONLY | O_CLOEXEC); if (fd < 0) { fprintf(stderr, "opening firmware file %s failed: %m\n", path); goto fw_fail; } r = read(fd, dec->fw_bo->map, 0x4000); close(fd); if (r < 0) { fprintf(stderr, "reading firmware file %s failed: %m\n", path); goto fw_fail; } if (r == 0x4000) { fprintf(stderr, "firmware file %s too large!\n", path); goto fw_fail; } if (r & 0xff) { fprintf(stderr, "firmware file %s wrong size!\n", path); goto fw_fail; } end = dec->fw_bo->map + r - 4; endval = *end; while (endval == *end) end--; r = (intptr_t)end - (intptr_t)dec->fw_bo->map + 4; switch (u_reduce_video_profile(profile)) { case PIPE_VIDEO_CODEC_MPEG12: { assert((r & 0xff) == 0xe0); dec->fw_sizes = (0x2e0<<16) | (r - 0x2e0); break; } case PIPE_VIDEO_CODEC_MPEG4: { assert((r & 0xff) == 0xe0); dec->fw_sizes = (0x2e0<<16) | (r - 0x2e0); break; } case PIPE_VIDEO_CODEC_VC1: { assert((r & 0xff) == 0xac); dec->fw_sizes = (0x3ac<<16) | (r - 0x3ac); break; } case PIPE_VIDEO_CODEC_MPEG4_AVC: { assert((r & 0xff) == 0x70); dec->fw_sizes = (0x370<<16) | (r - 0x370); break; } default: goto fw_fail; } munmap(dec->fw_bo->map, dec->fw_bo->size); dec->fw_bo->map = NULL; } if (codec != 3) { ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM, 0, 0x400, &cfg, &dec->bitplane_bo); if (ret) goto fail; } dec->ref_stride = mb(width)*16 * (mb_half(height)*32 + nvc0_video_align(height)/2); ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM, 0, dec->ref_stride * (max_references+2) + tmp_size, &cfg, &dec->ref_bo); if (ret) goto fail; timeout = 0; BEGIN_NVC0(push[0], SUBC_BSP(0x200), 2); PUSH_DATA (push[0], codec); PUSH_DATA (push[0], timeout); BEGIN_NVC0(push[1], SUBC_VP(0x200), 2); PUSH_DATA (push[1], codec); PUSH_DATA (push[1], timeout); BEGIN_NVC0(push[2], SUBC_PPP(0x200), 2); PUSH_DATA (push[2], ppp_codec); PUSH_DATA (push[2], timeout); ++dec->fence_seq; #if NVC0_DEBUG_FENCE ret = nouveau_bo_new(screen->device, NOUVEAU_BO_GART|NOUVEAU_BO_MAP, 0, 0x1000, NULL, &dec->fence_bo); if (ret) goto fail; nouveau_bo_map(dec->fence_bo, NOUVEAU_BO_RDWR, screen->client); dec->fence_map = dec->fence_bo->map; dec->fence_map[0] = dec->fence_map[4] = dec->fence_map[8] = 0; dec->comm = (struct comm *)(dec->fence_map + (COMM_OFFSET/sizeof(*dec->fence_map))); /* So lets test if the fence is working? */ nouveau_pushbuf_space(push[0], 6, 1, 0); PUSH_REFN (push[0], dec->fence_bo, NOUVEAU_BO_GART|NOUVEAU_BO_RDWR); BEGIN_NVC0(push[0], SUBC_BSP(0x240), 3); PUSH_DATAh(push[0], dec->fence_bo->offset); PUSH_DATA (push[0], dec->fence_bo->offset); PUSH_DATA (push[0], dec->fence_seq); BEGIN_NVC0(push[0], SUBC_BSP(0x304), 1); PUSH_DATA (push[0], 0); PUSH_KICK (push[0]); nouveau_pushbuf_space(push[1], 6, 1, 0); PUSH_REFN (push[1], dec->fence_bo, NOUVEAU_BO_GART|NOUVEAU_BO_RDWR); BEGIN_NVC0(push[1], SUBC_VP(0x240), 3); PUSH_DATAh(push[1], (dec->fence_bo->offset + 0x10)); PUSH_DATA (push[1], (dec->fence_bo->offset + 0x10)); PUSH_DATA (push[1], dec->fence_seq); BEGIN_NVC0(push[1], SUBC_VP(0x304), 1); PUSH_DATA (push[1], 0); PUSH_KICK (push[1]); nouveau_pushbuf_space(push[2], 6, 1, 0); PUSH_REFN (push[2], dec->fence_bo, NOUVEAU_BO_GART|NOUVEAU_BO_RDWR); BEGIN_NVC0(push[2], SUBC_PPP(0x240), 3); PUSH_DATAh(push[2], (dec->fence_bo->offset + 0x20)); PUSH_DATA (push[2], (dec->fence_bo->offset + 0x20)); PUSH_DATA (push[2], dec->fence_seq); BEGIN_NVC0(push[2], SUBC_PPP(0x304), 1); PUSH_DATA (push[2], 0); PUSH_KICK (push[2]); usleep(100); while (dec->fence_seq > dec->fence_map[0] || dec->fence_seq > dec->fence_map[4] || dec->fence_seq > dec->fence_map[8]) { debug_printf("%u: %u %u %u\n", dec->fence_seq, dec->fence_map[0], dec->fence_map[4], dec->fence_map[8]); usleep(100); } debug_printf("%u: %u %u %u\n", dec->fence_seq, dec->fence_map[0], dec->fence_map[4], dec->fence_map[8]); #endif return &dec->base; fw_fail: debug_printf("Cannot create decoder without firmware..\n"); nvc0_decoder_destroy(&dec->base); return NULL; fail: debug_printf("Creation failed: %s (%i)\n", strerror(-ret), ret); nvc0_decoder_destroy(&dec->base); return NULL; }
static void nve4_m2mf_transfer_rect(struct nvc0_context *nvc0, const struct nv50_m2mf_rect *dst, const struct nv50_m2mf_rect *src, uint32_t nblocksx, uint32_t nblocksy) { struct nouveau_pushbuf *push = nvc0->base.pushbuf; struct nouveau_bufctx *bctx = nvc0->bufctx; uint32_t exec; uint32_t src_base = src->base; uint32_t dst_base = dst->base; const int cpp = dst->cpp; assert(dst->cpp == src->cpp); nouveau_bufctx_refn(bctx, 0, dst->bo, dst->domain | NOUVEAU_BO_WR); nouveau_bufctx_refn(bctx, 0, src->bo, src->domain | NOUVEAU_BO_RD); nouveau_pushbuf_bufctx(push, bctx); nouveau_pushbuf_validate(push); exec = 0x200 /* 2D_ENABLE */ | 0x6 /* UNK */; if (!nouveau_bo_memtype(dst->bo)) { assert(!dst->z); dst_base += dst->y * dst->pitch + dst->x * cpp; exec |= 0x100; /* DST_MODE_2D_LINEAR */ } if (!nouveau_bo_memtype(src->bo)) { assert(!src->z); src_base += src->y * src->pitch + src->x * cpp; exec |= 0x080; /* SRC_MODE_2D_LINEAR */ } BEGIN_NVC0(push, SUBC_COPY(0x070c), 6); PUSH_DATA (push, 0x1000 | dst->tile_mode); PUSH_DATA (push, dst->pitch); PUSH_DATA (push, dst->height); PUSH_DATA (push, dst->depth); PUSH_DATA (push, dst->z); PUSH_DATA (push, (dst->y << 16) | (dst->x * cpp)); BEGIN_NVC0(push, SUBC_COPY(0x0728), 6); PUSH_DATA (push, 0x1000 | src->tile_mode); PUSH_DATA (push, src->pitch); PUSH_DATA (push, src->height); PUSH_DATA (push, src->depth); PUSH_DATA (push, src->z); PUSH_DATA (push, (src->y << 16) | (src->x * cpp)); BEGIN_NVC0(push, SUBC_COPY(0x0400), 8); PUSH_DATAh(push, src->bo->offset + src_base); PUSH_DATA (push, src->bo->offset + src_base); PUSH_DATAh(push, dst->bo->offset + dst_base); PUSH_DATA (push, dst->bo->offset + dst_base); PUSH_DATA (push, src->pitch); PUSH_DATA (push, dst->pitch); PUSH_DATA (push, nblocksx * cpp); PUSH_DATA (push, nblocksy); BEGIN_NVC0(push, SUBC_COPY(0x0300), 1); PUSH_DATA (push, exec); nouveau_bufctx_reset(bctx, 0); }
static void nv50_render_condition(struct pipe_context *pipe, struct pipe_query *pq, boolean condition, enum pipe_render_cond_flag mode) { struct nv50_context *nv50 = nv50_context(pipe); struct nouveau_pushbuf *push = nv50->base.pushbuf; struct nv50_query *q = nv50_query(pq); struct nv50_hw_query *hq = nv50_hw_query(q); uint32_t cond; bool wait = mode != PIPE_RENDER_COND_NO_WAIT && mode != PIPE_RENDER_COND_BY_REGION_NO_WAIT; if (!pq) { cond = NV50_3D_COND_MODE_ALWAYS; } else { /* NOTE: comparison of 2 queries only works if both have completed */ switch (q->type) { case PIPE_QUERY_SO_OVERFLOW_PREDICATE: cond = condition ? NV50_3D_COND_MODE_EQUAL : NV50_3D_COND_MODE_NOT_EQUAL; wait = true; break; case PIPE_QUERY_OCCLUSION_COUNTER: case PIPE_QUERY_OCCLUSION_PREDICATE: case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: if (hq->state == NV50_HW_QUERY_STATE_READY) wait = true; if (likely(!condition)) { cond = wait ? NV50_3D_COND_MODE_NOT_EQUAL : NV50_3D_COND_MODE_ALWAYS; } else { cond = wait ? NV50_3D_COND_MODE_EQUAL : NV50_3D_COND_MODE_ALWAYS; } break; default: assert(!"render condition query not a predicate"); cond = NV50_3D_COND_MODE_ALWAYS; break; } } nv50->cond_query = pq; nv50->cond_cond = condition; nv50->cond_condmode = cond; nv50->cond_mode = mode; if (!pq) { PUSH_SPACE(push, 2); BEGIN_NV04(push, NV50_3D(COND_MODE), 1); PUSH_DATA (push, cond); return; } PUSH_SPACE(push, 9); if (wait && hq->state != NV50_HW_QUERY_STATE_READY) { BEGIN_NV04(push, SUBC_3D(NV50_GRAPH_SERIALIZE), 1); PUSH_DATA (push, 0); } PUSH_REFN (push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD); BEGIN_NV04(push, NV50_3D(COND_ADDRESS_HIGH), 3); PUSH_DATAh(push, hq->bo->offset + hq->offset); PUSH_DATA (push, hq->bo->offset + hq->offset); PUSH_DATA (push, cond); BEGIN_NV04(push, NV50_2D(COND_ADDRESS_HIGH), 2); PUSH_DATAh(push, hq->bo->offset + hq->offset); PUSH_DATA (push, hq->bo->offset + hq->offset); }
static boolean nvc0_validate_tic(struct nvc0_context *nvc0, int s) { uint32_t commands[32]; struct nouveau_pushbuf *push = nvc0->base.pushbuf; struct nouveau_bo *txc = nvc0->screen->txc; unsigned i; unsigned n = 0; boolean need_flush = FALSE; for (i = 0; i < nvc0->num_textures[s]; ++i) { struct nv50_tic_entry *tic = nv50_tic_entry(nvc0->textures[s][i]); struct nv04_resource *res; const boolean dirty = !!(nvc0->textures_dirty[s] & (1 << i)); if (!tic) { if (dirty) commands[n++] = (i << 1) | 0; continue; } res = nv04_resource(tic->pipe.texture); if (tic->id < 0) { tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic); PUSH_SPACE(push, 17); BEGIN_NVC0(push, NVC0_M2MF(OFFSET_OUT_HIGH), 2); PUSH_DATAh(push, txc->offset + (tic->id * 32)); PUSH_DATA (push, txc->offset + (tic->id * 32)); BEGIN_NVC0(push, NVC0_M2MF(LINE_LENGTH_IN), 2); PUSH_DATA (push, 32); PUSH_DATA (push, 1); BEGIN_NVC0(push, NVC0_M2MF(EXEC), 1); PUSH_DATA (push, 0x100111); BEGIN_NIC0(push, NVC0_M2MF(DATA), 8); PUSH_DATAp(push, &tic->tic[0], 8); need_flush = TRUE; } else if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) { BEGIN_NVC0(push, NVC0_3D(TEX_CACHE_CTL), 1); PUSH_DATA (push, (tic->id << 4) | 1); NOUVEAU_DRV_STAT(&nvc0->screen->base, tex_cache_flush_count, 1); } nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32); res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING; res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING; if (!dirty) continue; commands[n++] = (tic->id << 9) | (i << 1) | 1; BCTX_REFN(nvc0->bufctx_3d, TEX(s, i), res, RD); } for (; i < nvc0->state.num_textures[s]; ++i) commands[n++] = (i << 1) | 0; nvc0->state.num_textures[s] = nvc0->num_textures[s]; if (n) { BEGIN_NIC0(push, NVC0_3D(BIND_TIC(s)), n); PUSH_DATAp(push, commands, n); } nvc0->textures_dirty[s] = 0; return need_flush; }
void nvc0_tfb_validate(struct nvc0_context *nvc0) { struct nouveau_pushbuf *push = nvc0->base.pushbuf; struct nvc0_transform_feedback_state *tfb; unsigned b; if (nvc0->gmtyprog) tfb = nvc0->gmtyprog->tfb; else if (nvc0->tevlprog) tfb = nvc0->tevlprog->tfb; else tfb = nvc0->vertprog->tfb; IMMED_NVC0(push, NVC0_3D(TFB_ENABLE), (tfb && nvc0->num_tfbbufs) ? 1 : 0); if (tfb && tfb != nvc0->state.tfb) { for (b = 0; b < 4; ++b) { if (tfb->varying_count[b]) { unsigned n = (tfb->varying_count[b] + 3) / 4; BEGIN_NVC0(push, NVC0_3D(TFB_STREAM(b)), 3); PUSH_DATA (push, tfb->stream[b]); PUSH_DATA (push, tfb->varying_count[b]); PUSH_DATA (push, tfb->stride[b]); BEGIN_NVC0(push, NVC0_3D(TFB_VARYING_LOCS(b, 0)), n); PUSH_DATAp(push, tfb->varying_index[b], n); if (nvc0->tfbbuf[b]) nvc0_so_target(nvc0->tfbbuf[b])->stride = tfb->stride[b]; } else { IMMED_NVC0(push, NVC0_3D(TFB_VARYING_COUNT(b)), 0); } } } nvc0->state.tfb = tfb; if (!(nvc0->dirty_3d & NVC0_NEW_3D_TFB_TARGETS)) return; for (b = 0; b < nvc0->num_tfbbufs; ++b) { struct nvc0_so_target *targ = nvc0_so_target(nvc0->tfbbuf[b]); struct nv04_resource *buf; if (!targ) { IMMED_NVC0(push, NVC0_3D(TFB_BUFFER_ENABLE(b)), 0); continue; } if (tfb) targ->stride = tfb->stride[b]; buf = nv04_resource(targ->pipe.buffer); BCTX_REFN(nvc0->bufctx_3d, 3D_TFB, buf, WR); if (!(nvc0->tfbbuf_dirty & (1 << b))) continue; if (!targ->clean) nvc0_hw_query_fifo_wait(nvc0, nvc0_query(targ->pq)); nouveau_pushbuf_space(push, 0, 0, 1); BEGIN_NVC0(push, NVC0_3D(TFB_BUFFER_ENABLE(b)), 5); PUSH_DATA (push, 1); PUSH_DATAh(push, buf->address + targ->pipe.buffer_offset); PUSH_DATA (push, buf->address + targ->pipe.buffer_offset); PUSH_DATA (push, targ->pipe.buffer_size); if (!targ->clean) { nvc0_hw_query_pushbuf_submit(push, nvc0_query(targ->pq), 0x4); } else { PUSH_DATA(push, 0); /* TFB_BUFFER_OFFSET */ targ->clean = false; } } for (; b < 4; ++b) IMMED_NVC0(push, NVC0_3D(TFB_BUFFER_ENABLE(b)), 0); }
void nv50_constbufs_validate(struct nv50_context *nv50) { struct nouveau_pushbuf *push = nv50->base.pushbuf; unsigned s; for (s = 0; s < 3; ++s) { unsigned p; if (s == PIPE_SHADER_FRAGMENT) p = NV50_3D_SET_PROGRAM_CB_PROGRAM_FRAGMENT; else if (s == PIPE_SHADER_GEOMETRY) p = NV50_3D_SET_PROGRAM_CB_PROGRAM_GEOMETRY; else p = NV50_3D_SET_PROGRAM_CB_PROGRAM_VERTEX; while (nv50->constbuf_dirty[s]) { const unsigned i = (unsigned)ffs(nv50->constbuf_dirty[s]) - 1; assert(i < NV50_MAX_PIPE_CONSTBUFS); nv50->constbuf_dirty[s] &= ~(1 << i); if (nv50->constbuf[s][i].user) { const unsigned b = NV50_CB_PVP + s; unsigned start = 0; unsigned words = nv50->constbuf[s][0].size / 4; if (i) { NOUVEAU_ERR("user constbufs only supported in slot 0\n"); continue; } if (!nv50->state.uniform_buffer_bound[s]) { nv50->state.uniform_buffer_bound[s] = true; BEGIN_NV04(push, NV50_3D(SET_PROGRAM_CB), 1); PUSH_DATA (push, (b << 12) | (i << 8) | p | 1); } while (words) { unsigned nr = MIN2(words, NV04_PFIFO_MAX_PACKET_LEN); PUSH_SPACE(push, nr + 3); BEGIN_NV04(push, NV50_3D(CB_ADDR), 1); PUSH_DATA (push, (start << 8) | b); BEGIN_NI04(push, NV50_3D(CB_DATA(0)), nr); PUSH_DATAp(push, &nv50->constbuf[s][0].u.data[start * 4], nr); start += nr; words -= nr; } } else { struct nv04_resource *res = nv04_resource(nv50->constbuf[s][i].u.buf); if (res) { /* TODO: allocate persistent bindings */ const unsigned b = s * 16 + i; assert(nouveau_resource_mapped_by_gpu(&res->base)); BEGIN_NV04(push, NV50_3D(CB_DEF_ADDRESS_HIGH), 3); PUSH_DATAh(push, res->address + nv50->constbuf[s][i].offset); PUSH_DATA (push, res->address + nv50->constbuf[s][i].offset); PUSH_DATA (push, (b << 16) | (nv50->constbuf[s][i].size & 0xffff)); BEGIN_NV04(push, NV50_3D(SET_PROGRAM_CB), 1); PUSH_DATA (push, (b << 12) | (i << 8) | p | 1); BCTX_REFN(nv50->bufctx_3d, CB(s, i), res, RD); nv50->cb_dirty = 1; /* Force cache flush for UBO. */ } else { BEGIN_NV04(push, NV50_3D(SET_PROGRAM_CB), 1); PUSH_DATA (push, (i << 8) | p | 0); } if (i == 0) nv50->state.uniform_buffer_bound[s] = false; } } } }
static void nvc0_push_upload_vertex_ids(struct push_context *ctx, struct nvc0_context *nvc0, const struct pipe_draw_info *info) { struct nouveau_pushbuf *push = ctx->push; struct nouveau_bo *bo; uint64_t va; uint32_t *data; uint32_t format; unsigned index_size = nvc0->idxbuf.index_size; unsigned i; unsigned a = nvc0->vertex->num_elements; if (!index_size || info->index_bias) index_size = 4; data = (uint32_t *)nouveau_scratch_get(&nvc0->base, info->count * index_size, &va, &bo); BCTX_REFN_bo(nvc0->bufctx_3d, VTX_TMP, NOUVEAU_BO_GART | NOUVEAU_BO_RD, bo); nouveau_pushbuf_validate(push); if (info->indexed) { if (!info->index_bias) { memcpy(data, ctx->idxbuf, info->count * index_size); } else { switch (nvc0->idxbuf.index_size) { case 1: copy_indices_u8(data, ctx->idxbuf, info->index_bias, info->count); break; case 2: copy_indices_u16(data, ctx->idxbuf, info->index_bias, info->count); break; default: copy_indices_u32(data, ctx->idxbuf, info->index_bias, info->count); break; } } } else { for (i = 0; i < info->count; ++i) data[i] = i + (info->start + info->index_bias); } format = (1 << NVC0_3D_VERTEX_ATTRIB_FORMAT_BUFFER__SHIFT) | NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_UINT; switch (index_size) { case 1: format |= NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_8; break; case 2: format |= NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_16; break; default: format |= NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32; break; } PUSH_SPACE(push, 12); if (unlikely(nvc0->state.instance_elts & 2)) { nvc0->state.instance_elts &= ~2; IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_PER_INSTANCE(1)), 0); } BEGIN_NVC0(push, NVC0_3D(VERTEX_ATTRIB_FORMAT(a)), 1); PUSH_DATA (push, format); BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(1)), 3); PUSH_DATA (push, NVC0_3D_VERTEX_ARRAY_FETCH_ENABLE | index_size); PUSH_DATAh(push, va); PUSH_DATA (push, va); BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(1)), 2); PUSH_DATAh(push, va + info->count * index_size - 1); PUSH_DATA (push, va + info->count * index_size - 1); #define NVC0_3D_VERTEX_ID_REPLACE_SOURCE_ATTR_X(a) \ (((0x80 + (a) * 0x10) / 4) << NVC0_3D_VERTEX_ID_REPLACE_SOURCE__SHIFT) BEGIN_NVC0(push, NVC0_3D(VERTEX_ID_REPLACE), 1); PUSH_DATA (push, NVC0_3D_VERTEX_ID_REPLACE_SOURCE_ATTR_X(a) | 1); }
static void nvc0_render_condition(struct pipe_context *pipe, struct pipe_query *pq, boolean condition, uint mode) { struct nvc0_context *nvc0 = nvc0_context(pipe); struct nouveau_pushbuf *push = nvc0->base.pushbuf; struct nvc0_query *q = nvc0_query(pq); struct nvc0_hw_query *hq = nvc0_hw_query(q); uint32_t cond; bool wait = mode != PIPE_RENDER_COND_NO_WAIT && mode != PIPE_RENDER_COND_BY_REGION_NO_WAIT; if (!pq) { cond = NVC0_3D_COND_MODE_ALWAYS; } else { /* NOTE: comparison of 2 queries only works if both have completed */ switch (q->type) { case PIPE_QUERY_SO_OVERFLOW_PREDICATE: cond = condition ? NVC0_3D_COND_MODE_EQUAL : NVC0_3D_COND_MODE_NOT_EQUAL; wait = true; break; case PIPE_QUERY_OCCLUSION_COUNTER: case PIPE_QUERY_OCCLUSION_PREDICATE: if (likely(!condition)) { if (unlikely(hq->nesting)) cond = wait ? NVC0_3D_COND_MODE_NOT_EQUAL : NVC0_3D_COND_MODE_ALWAYS; else cond = NVC0_3D_COND_MODE_RES_NON_ZERO; } else { cond = wait ? NVC0_3D_COND_MODE_EQUAL : NVC0_3D_COND_MODE_ALWAYS; } break; default: assert(!"render condition query not a predicate"); cond = NVC0_3D_COND_MODE_ALWAYS; break; } } nvc0->cond_query = pq; nvc0->cond_cond = condition; nvc0->cond_condmode = cond; nvc0->cond_mode = mode; if (!pq) { PUSH_SPACE(push, 1); IMMED_NVC0(push, NVC0_3D(COND_MODE), cond); return; } if (wait) nvc0_hw_query_fifo_wait(push, q); PUSH_SPACE(push, 7); PUSH_REFN (push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD); BEGIN_NVC0(push, NVC0_3D(COND_ADDRESS_HIGH), 3); PUSH_DATAh(push, hq->bo->offset + hq->offset); PUSH_DATA (push, hq->bo->offset + hq->offset); PUSH_DATA (push, cond); BEGIN_NVC0(push, NVC0_2D(COND_ADDRESS_HIGH), 2); PUSH_DATAh(push, hq->bo->offset + hq->offset); PUSH_DATA (push, hq->bo->offset + hq->offset); }
static boolean nv50_validate_tic(struct nv50_context *nv50, int s) { struct nouveau_pushbuf *push = nv50->base.pushbuf; struct nouveau_bo *txc = nv50->screen->txc; unsigned i; boolean need_flush = FALSE; assert(nv50->num_textures[s] <= PIPE_MAX_SAMPLERS); for (i = 0; i < nv50->num_textures[s]; ++i) { struct nv50_tic_entry *tic = nv50_tic_entry(nv50->textures[s][i]); struct nv04_resource *res; if (!tic) { BEGIN_NV04(push, NV50_3D(BIND_TIC(s)), 1); PUSH_DATA (push, (i << 1) | 0); continue; } res = &nv50_miptree(tic->pipe.texture)->base; if (tic->id < 0) { tic->id = nv50_screen_tic_alloc(nv50->screen, tic); BEGIN_NV04(push, NV50_2D(DST_FORMAT), 2); PUSH_DATA (push, NV50_SURFACE_FORMAT_R8_UNORM); PUSH_DATA (push, 1); BEGIN_NV04(push, NV50_2D(DST_PITCH), 5); PUSH_DATA (push, 262144); PUSH_DATA (push, 65536); PUSH_DATA (push, 1); PUSH_DATAh(push, txc->offset); PUSH_DATA (push, txc->offset); BEGIN_NV04(push, NV50_2D(SIFC_BITMAP_ENABLE), 2); PUSH_DATA (push, 0); PUSH_DATA (push, NV50_SURFACE_FORMAT_R8_UNORM); BEGIN_NV04(push, NV50_2D(SIFC_WIDTH), 10); PUSH_DATA (push, 32); PUSH_DATA (push, 1); PUSH_DATA (push, 0); PUSH_DATA (push, 1); PUSH_DATA (push, 0); PUSH_DATA (push, 1); PUSH_DATA (push, 0); PUSH_DATA (push, tic->id * 32); PUSH_DATA (push, 0); PUSH_DATA (push, 0); BEGIN_NI04(push, NV50_2D(SIFC_DATA), 8); PUSH_DATAp(push, &tic->tic[0], 8); need_flush = TRUE; } else if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) { BEGIN_NV04(push, NV50_3D(TEX_CACHE_CTL), 1); PUSH_DATA (push, 0x20); } nv50->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32); res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING; res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING; BCTX_REFN(nv50->bufctx_3d, TEXTURES, res, RD); BEGIN_NV04(push, NV50_3D(BIND_TIC(s)), 1); PUSH_DATA (push, (tic->id << 9) | (i << 1) | 1); } for (; i < nv50->state.num_textures[s]; ++i) { BEGIN_NV04(push, NV50_3D(BIND_TIC(s)), 1); PUSH_DATA (push, (i << 1) | 0); } if (nv50->num_textures[s]) { BEGIN_NV04(push, NV50_3D(CB_ADDR), 1); PUSH_DATA (push, (NV50_CB_AUX_TEX_MS_OFFSET << (8 - 2)) | NV50_CB_AUX); BEGIN_NI04(push, NV50_3D(CB_DATA(0)), nv50->num_textures[s] * 2); for (i = 0; i < nv50->num_textures[s]; i++) { struct nv50_tic_entry *tic = nv50_tic_entry(nv50->textures[s][i]); struct nv50_miptree *res; if (!tic) { PUSH_DATA (push, 0); PUSH_DATA (push, 0); continue; } res = nv50_miptree(tic->pipe.texture); PUSH_DATA (push, res->ms_x); PUSH_DATA (push, res->ms_y); } } nv50->state.num_textures[s] = nv50->num_textures[s]; return need_flush; }
struct pipe_video_codec * nvc0_create_decoder(struct pipe_context *context, const struct pipe_video_codec *templ) { struct nouveau_screen *screen = &((struct nvc0_context *)context)->screen->base; struct nouveau_vp3_decoder *dec; struct nouveau_pushbuf **push; union nouveau_bo_config cfg; bool kepler = screen->device->chipset >= 0xe0; cfg.nvc0.tile_mode = 0x10; cfg.nvc0.memtype = 0xfe; int ret, i; uint32_t codec = 1, ppp_codec = 3; uint32_t timeout; u32 tmp_size = 0; if (getenv("XVMC_VL")) return vl_create_decoder(context, templ); if (templ->entrypoint != PIPE_VIDEO_ENTRYPOINT_BITSTREAM) { debug_printf("%x\n", templ->entrypoint); return NULL; } dec = CALLOC_STRUCT(nouveau_vp3_decoder); if (!dec) return NULL; dec->client = screen->client; dec->base = *templ; nouveau_vp3_decoder_init_common(&dec->base); if (!kepler) { dec->bsp_idx = 5; dec->vp_idx = 6; dec->ppp_idx = 7; } else { dec->bsp_idx = 2; dec->vp_idx = 2; dec->ppp_idx = 2; } for (i = 0; i < 3; ++i) if (i && !kepler) { dec->channel[i] = dec->channel[0]; dec->pushbuf[i] = dec->pushbuf[0]; } else { void *data; u32 size; struct nvc0_fifo nvc0_args = {}; struct nve0_fifo nve0_args = {}; if (!kepler) { size = sizeof(nvc0_args); data = &nvc0_args; } else { unsigned engine[] = { NVE0_FIFO_ENGINE_BSP, NVE0_FIFO_ENGINE_VP, NVE0_FIFO_ENGINE_PPP }; nve0_args.engine = engine[i]; size = sizeof(nve0_args); data = &nve0_args; } ret = nouveau_object_new(&screen->device->object, 0, NOUVEAU_FIFO_CHANNEL_CLASS, data, size, &dec->channel[i]); if (!ret) ret = nouveau_pushbuf_new(screen->client, dec->channel[i], 4, 32 * 1024, true, &dec->pushbuf[i]); if (ret) break; } push = dec->pushbuf; if (!kepler) { if (!ret) ret = nouveau_object_new(dec->channel[0], 0x390b1, 0x90b1, NULL, 0, &dec->bsp); if (!ret) ret = nouveau_object_new(dec->channel[1], 0x190b2, 0x90b2, NULL, 0, &dec->vp); if (!ret) ret = nouveau_object_new(dec->channel[2], 0x290b3, 0x90b3, NULL, 0, &dec->ppp); } else { if (!ret) ret = nouveau_object_new(dec->channel[0], 0x95b1, 0x95b1, NULL, 0, &dec->bsp); if (!ret) ret = nouveau_object_new(dec->channel[1], 0x95b2, 0x95b2, NULL, 0, &dec->vp); if (!ret) ret = nouveau_object_new(dec->channel[2], 0x90b3, 0x90b3, NULL, 0, &dec->ppp); } if (ret) goto fail; BEGIN_NVC0(push[0], SUBC_BSP(NV01_SUBCHAN_OBJECT), 1); PUSH_DATA (push[0], dec->bsp->handle); BEGIN_NVC0(push[1], SUBC_VP(NV01_SUBCHAN_OBJECT), 1); PUSH_DATA (push[1], dec->vp->handle); BEGIN_NVC0(push[2], SUBC_PPP(NV01_SUBCHAN_OBJECT), 1); PUSH_DATA (push[2], dec->ppp->handle); dec->base.context = context; dec->base.decode_bitstream = nvc0_decoder_decode_bitstream; for (i = 0; i < NOUVEAU_VP3_VIDEO_QDEPTH && !ret; ++i) ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM, 0, 1 << 20, &cfg, &dec->bsp_bo[i]); if (!ret) ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM, 0x100, 4 << 20, &cfg, &dec->inter_bo[0]); if (!ret) { ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM, 0x100, dec->inter_bo[0]->size, &cfg, &dec->inter_bo[1]); } if (ret) goto fail; switch (u_reduce_video_profile(templ->profile)) { case PIPE_VIDEO_FORMAT_MPEG12: { codec = 1; assert(templ->max_references <= 2); break; } case PIPE_VIDEO_FORMAT_MPEG4: { codec = 4; tmp_size = mb(templ->height)*16 * mb(templ->width)*16; assert(templ->max_references <= 2); break; } case PIPE_VIDEO_FORMAT_VC1: { ppp_codec = codec = 2; tmp_size = mb(templ->height)*16 * mb(templ->width)*16; assert(templ->max_references <= 2); break; } case PIPE_VIDEO_FORMAT_MPEG4_AVC: { codec = 3; dec->tmp_stride = 16 * mb_half(templ->width) * nouveau_vp3_video_align(templ->height) * 3 / 2; tmp_size = dec->tmp_stride * (templ->max_references + 1); assert(templ->max_references <= 16); break; } default: fprintf(stderr, "invalid codec\n"); goto fail; } if (screen->device->chipset < 0xd0) { ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM, 0, 0x4000, &cfg, &dec->fw_bo); if (ret) goto fail; ret = nouveau_vp3_load_firmware(dec, templ->profile, screen->device->chipset); if (ret) goto fw_fail; } if (codec != 3) { ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM, 0, 0x400, &cfg, &dec->bitplane_bo); if (ret) goto fail; } dec->ref_stride = mb(templ->width)*16 * (mb_half(templ->height)*32 + nouveau_vp3_video_align(templ->height)/2); ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM, 0, dec->ref_stride * (templ->max_references+2) + tmp_size, &cfg, &dec->ref_bo); if (ret) goto fail; timeout = 0; BEGIN_NVC0(push[0], SUBC_BSP(0x200), 2); PUSH_DATA (push[0], codec); PUSH_DATA (push[0], timeout); BEGIN_NVC0(push[1], SUBC_VP(0x200), 2); PUSH_DATA (push[1], codec); PUSH_DATA (push[1], timeout); BEGIN_NVC0(push[2], SUBC_PPP(0x200), 2); PUSH_DATA (push[2], ppp_codec); PUSH_DATA (push[2], timeout); ++dec->fence_seq; #if NOUVEAU_VP3_DEBUG_FENCE ret = nouveau_bo_new(screen->device, NOUVEAU_BO_GART|NOUVEAU_BO_MAP, 0, 0x1000, NULL, &dec->fence_bo); if (ret) goto fail; nouveau_bo_map(dec->fence_bo, NOUVEAU_BO_RDWR, screen->client); dec->fence_map = dec->fence_bo->map; dec->fence_map[0] = dec->fence_map[4] = dec->fence_map[8] = 0; dec->comm = (struct comm *)(dec->fence_map + (COMM_OFFSET/sizeof(*dec->fence_map))); /* So lets test if the fence is working? */ nouveau_pushbuf_space(push[0], 6, 1, 0); PUSH_REFN (push[0], dec->fence_bo, NOUVEAU_BO_GART|NOUVEAU_BO_RDWR); BEGIN_NVC0(push[0], SUBC_BSP(0x240), 3); PUSH_DATAh(push[0], dec->fence_bo->offset); PUSH_DATA (push[0], dec->fence_bo->offset); PUSH_DATA (push[0], dec->fence_seq); BEGIN_NVC0(push[0], SUBC_BSP(0x304), 1); PUSH_DATA (push[0], 0); PUSH_KICK (push[0]); nouveau_pushbuf_space(push[1], 6, 1, 0); PUSH_REFN (push[1], dec->fence_bo, NOUVEAU_BO_GART|NOUVEAU_BO_RDWR); BEGIN_NVC0(push[1], SUBC_VP(0x240), 3); PUSH_DATAh(push[1], (dec->fence_bo->offset + 0x10)); PUSH_DATA (push[1], (dec->fence_bo->offset + 0x10)); PUSH_DATA (push[1], dec->fence_seq); BEGIN_NVC0(push[1], SUBC_VP(0x304), 1); PUSH_DATA (push[1], 0); PUSH_KICK (push[1]); nouveau_pushbuf_space(push[2], 6, 1, 0); PUSH_REFN (push[2], dec->fence_bo, NOUVEAU_BO_GART|NOUVEAU_BO_RDWR); BEGIN_NVC0(push[2], SUBC_PPP(0x240), 3); PUSH_DATAh(push[2], (dec->fence_bo->offset + 0x20)); PUSH_DATA (push[2], (dec->fence_bo->offset + 0x20)); PUSH_DATA (push[2], dec->fence_seq); BEGIN_NVC0(push[2], SUBC_PPP(0x304), 1); PUSH_DATA (push[2], 0); PUSH_KICK (push[2]); usleep(100); while (dec->fence_seq > dec->fence_map[0] || dec->fence_seq > dec->fence_map[4] || dec->fence_seq > dec->fence_map[8]) { debug_printf("%u: %u %u %u\n", dec->fence_seq, dec->fence_map[0], dec->fence_map[4], dec->fence_map[8]); usleep(100); } debug_printf("%u: %u %u %u\n", dec->fence_seq, dec->fence_map[0], dec->fence_map[4], dec->fence_map[8]); #endif return &dec->base; fw_fail: debug_printf("Cannot create decoder without firmware..\n"); dec->base.destroy(&dec->base); return NULL; fail: debug_printf("Creation failed: %s (%i)\n", strerror(-ret), ret); dec->base.destroy(&dec->base); return NULL; }
int nvc0_screen_compute_setup(struct nvc0_screen *screen, struct nouveau_pushbuf *push) { struct nouveau_object *chan = screen->base.channel; struct nouveau_device *dev = screen->base.device; uint32_t obj_class; int ret; int i; switch (dev->chipset & ~0xf) { case 0xc0: case 0xd0: /* In theory, GF110+ should also support NVC8_COMPUTE_CLASS but, * in practice, a ILLEGAL_CLASS dmesg fail appears when using it. */ obj_class = NVC0_COMPUTE_CLASS; break; default: NOUVEAU_ERR("unsupported chipset: NV%02x\n", dev->chipset); return -1; } ret = nouveau_object_new(chan, 0xbeef90c0, obj_class, NULL, 0, &screen->compute); if (ret) { NOUVEAU_ERR("Failed to allocate compute object: %d\n", ret); return ret; } ret = nouveau_bo_new(dev, NV_VRAM_DOMAIN(&screen->base), 0, 1 << 12, NULL, &screen->parm); if (ret) return ret; BEGIN_NVC0(push, SUBC_COMPUTE(NV01_SUBCHAN_OBJECT), 1); PUSH_DATA (push, screen->compute->oclass); /* hardware limit */ BEGIN_NVC0(push, NVC0_COMPUTE(MP_LIMIT), 1); PUSH_DATA (push, screen->mp_count); BEGIN_NVC0(push, NVC0_COMPUTE(CALL_LIMIT_LOG), 1); PUSH_DATA (push, 0xf); BEGIN_NVC0(push, SUBC_COMPUTE(0x02a0), 1); PUSH_DATA (push, 0x8000); /* global memory setup */ BEGIN_NVC0(push, SUBC_COMPUTE(0x02c4), 1); PUSH_DATA (push, 0); BEGIN_NIC0(push, NVC0_COMPUTE(GLOBAL_BASE), 0x100); for (i = 0; i <= 0xff; i++) PUSH_DATA (push, (0xc << 28) | (i << 16) | i); BEGIN_NVC0(push, SUBC_COMPUTE(0x02c4), 1); PUSH_DATA (push, 1); /* local memory and cstack setup */ BEGIN_NVC0(push, NVC0_COMPUTE(TEMP_ADDRESS_HIGH), 2); PUSH_DATAh(push, screen->tls->offset); PUSH_DATA (push, screen->tls->offset); BEGIN_NVC0(push, NVC0_COMPUTE(TEMP_SIZE_HIGH), 2); PUSH_DATAh(push, screen->tls->size); PUSH_DATA (push, screen->tls->size); BEGIN_NVC0(push, NVC0_COMPUTE(WARP_TEMP_ALLOC), 1); PUSH_DATA (push, 0); BEGIN_NVC0(push, NVC0_COMPUTE(LOCAL_BASE), 1); PUSH_DATA (push, 1 << 24); /* shared memory setup */ BEGIN_NVC0(push, NVC0_COMPUTE(CACHE_SPLIT), 1); PUSH_DATA (push, NVC0_COMPUTE_CACHE_SPLIT_48K_SHARED_16K_L1); BEGIN_NVC0(push, NVC0_COMPUTE(SHARED_BASE), 1); PUSH_DATA (push, 2 << 24); BEGIN_NVC0(push, NVC0_COMPUTE(SHARED_SIZE), 1); PUSH_DATA (push, 0); /* code segment setup */ BEGIN_NVC0(push, NVC0_COMPUTE(CODE_ADDRESS_HIGH), 2); PUSH_DATAh(push, screen->text->offset); PUSH_DATA (push, screen->text->offset); /* TODO: textures & samplers */ return 0; }