void nv50_constbufs_validate(struct nv50_context *nv50) { struct nouveau_pushbuf *push = nv50->base.pushbuf; unsigned s; for (s = 0; s < 3; ++s) { unsigned p; if (s == PIPE_SHADER_FRAGMENT) p = NV50_3D_SET_PROGRAM_CB_PROGRAM_FRAGMENT; else if (s == PIPE_SHADER_GEOMETRY) p = NV50_3D_SET_PROGRAM_CB_PROGRAM_GEOMETRY; else p = NV50_3D_SET_PROGRAM_CB_PROGRAM_VERTEX; while (nv50->constbuf_dirty[s]) { const unsigned i = (unsigned)ffs(nv50->constbuf_dirty[s]) - 1; assert(i < NV50_MAX_PIPE_CONSTBUFS); nv50->constbuf_dirty[s] &= ~(1 << i); if (nv50->constbuf[s][i].user) { const unsigned b = NV50_CB_PVP + s; unsigned start = 0; unsigned words = nv50->constbuf[s][0].size / 4; if (i) { NOUVEAU_ERR("user constbufs only supported in slot 0\n"); continue; } if (!nv50->state.uniform_buffer_bound[s]) { nv50->state.uniform_buffer_bound[s] = true; BEGIN_NV04(push, NV50_3D(SET_PROGRAM_CB), 1); PUSH_DATA (push, (b << 12) | (i << 8) | p | 1); } while (words) { unsigned nr = MIN2(words, NV04_PFIFO_MAX_PACKET_LEN); PUSH_SPACE(push, nr + 3); BEGIN_NV04(push, NV50_3D(CB_ADDR), 1); PUSH_DATA (push, (start << 8) | b); BEGIN_NI04(push, NV50_3D(CB_DATA(0)), nr); PUSH_DATAp(push, &nv50->constbuf[s][0].u.data[start * 4], nr); start += nr; words -= nr; } } else { struct nv04_resource *res = nv04_resource(nv50->constbuf[s][i].u.buf); if (res) { /* TODO: allocate persistent bindings */ const unsigned b = s * 16 + i; assert(nouveau_resource_mapped_by_gpu(&res->base)); BEGIN_NV04(push, NV50_3D(CB_DEF_ADDRESS_HIGH), 3); PUSH_DATAh(push, res->address + nv50->constbuf[s][i].offset); PUSH_DATA (push, res->address + nv50->constbuf[s][i].offset); PUSH_DATA (push, (b << 16) | (nv50->constbuf[s][i].size & 0xffff)); BEGIN_NV04(push, NV50_3D(SET_PROGRAM_CB), 1); PUSH_DATA (push, (b << 12) | (i << 8) | p | 1); BCTX_REFN(nv50->bufctx_3d, CB(s, i), res, RD); nv50->cb_dirty = 1; /* Force cache flush for UBO. */ } else { BEGIN_NV04(push, NV50_3D(SET_PROGRAM_CB), 1); PUSH_DATA (push, (i << 8) | p | 0); } if (i == 0) nv50->state.uniform_buffer_bound[s] = false; } } } }
static void nv50_blitctx_prepare_state(struct nv50_blitctx *blit) { struct nouveau_pushbuf *push = blit->screen->base.pushbuf; /* blend state */ BEGIN_NV04(push, NV50_3D(COLOR_MASK(0)), 1); PUSH_DATA (push, blit->color_mask); BEGIN_NV04(push, NV50_3D(BLEND_ENABLE(0)), 1); PUSH_DATA (push, 0); BEGIN_NV04(push, NV50_3D(LOGIC_OP_ENABLE), 1); PUSH_DATA (push, 0); /* rasterizer state */ #ifndef NV50_SCISSORS_CLIPPING BEGIN_NV04(push, NV50_3D(SCISSOR_ENABLE(0)), 1); PUSH_DATA (push, 1); #endif BEGIN_NV04(push, NV50_3D(VERTEX_TWO_SIDE_ENABLE), 1); PUSH_DATA (push, 0); BEGIN_NV04(push, NV50_3D(FRAG_COLOR_CLAMP_EN), 1); PUSH_DATA (push, 0); BEGIN_NV04(push, NV50_3D(MULTISAMPLE_ENABLE), 1); PUSH_DATA (push, 0); BEGIN_NV04(push, NV50_3D(MSAA_MASK(0)), 4); PUSH_DATA (push, 0xffff); PUSH_DATA (push, 0xffff); PUSH_DATA (push, 0xffff); PUSH_DATA (push, 0xffff); BEGIN_NV04(push, NV50_3D(POLYGON_MODE_FRONT), 3); PUSH_DATA (push, NV50_3D_POLYGON_MODE_FRONT_FILL); PUSH_DATA (push, NV50_3D_POLYGON_MODE_BACK_FILL); PUSH_DATA (push, 0); BEGIN_NV04(push, NV50_3D(CULL_FACE_ENABLE), 1); PUSH_DATA (push, 0); BEGIN_NV04(push, NV50_3D(POLYGON_STIPPLE_ENABLE), 1); PUSH_DATA (push, 0); BEGIN_NV04(push, NV50_3D(POLYGON_OFFSET_FILL_ENABLE), 1); PUSH_DATA (push, 0); /* zsa state */ BEGIN_NV04(push, NV50_3D(DEPTH_TEST_ENABLE), 1); PUSH_DATA (push, 0); BEGIN_NV04(push, NV50_3D(STENCIL_ENABLE), 1); PUSH_DATA (push, 0); BEGIN_NV04(push, NV50_3D(ALPHA_TEST_ENABLE), 1); PUSH_DATA (push, 0); }
static void nv50_resource_resolve(struct pipe_context *pipe, const struct pipe_resolve_info *info) { struct nv50_context *nv50 = nv50_context(pipe); struct nv50_screen *screen = nv50->screen; struct nv50_blitctx *blit = screen->blitctx; struct nouveau_pushbuf *push = nv50->base.pushbuf; struct pipe_resource *src = info->src.res; struct pipe_resource *dst = info->dst.res; float x0, x1, y0, y1, z; float x_range, y_range; nv50_blitctx_get_color_mask_and_fp(blit, dst->format, info->mask); blit->filter = util_format_is_depth_or_stencil(dst->format) ? 0 : 1; nv50_blitctx_pre_blit(blit, nv50); nv50_blit_set_dst(nv50, dst, info->dst.level, info->dst.layer); nv50_blit_set_src(nv50, src, 0, info->src.layer); nv50_blitctx_prepare_state(blit); nv50_state_validate(nv50, ~0, 36); x_range = (float)(info->src.x1 - info->src.x0) / (float)(info->dst.x1 - info->dst.x0); y_range = (float)(info->src.y1 - info->src.y0) / (float)(info->dst.y1 - info->dst.y0); x0 = (float)info->src.x0 - x_range * (float)info->dst.x0; y0 = (float)info->src.y0 - y_range * (float)info->dst.y0; x1 = x0 + 16384.0f * x_range; y1 = y0 + 16384.0f * y_range; x0 *= (float)(1 << nv50_miptree(src)->ms_x); x1 *= (float)(1 << nv50_miptree(src)->ms_x); y0 *= (float)(1 << nv50_miptree(src)->ms_y); y1 *= (float)(1 << nv50_miptree(src)->ms_y); z = (float)info->src.layer; BEGIN_NV04(push, NV50_3D(FP_START_ID), 1); PUSH_DATA (push, blit->fp.code_base + blit->fp_offset); BEGIN_NV04(push, NV50_3D(VIEWPORT_TRANSFORM_EN), 1); PUSH_DATA (push, 0); /* Draw a large triangle in screen coordinates covering the whole * render target, with scissors defining the destination region. * The vertex is supplied with non-normalized texture coordinates * arranged in a way to yield the desired offset and scale. */ BEGIN_NV04(push, NV50_3D(SCISSOR_HORIZ(0)), 2); PUSH_DATA (push, (info->dst.x1 << 16) | info->dst.x0); PUSH_DATA (push, (info->dst.y1 << 16) | info->dst.y0); BEGIN_NV04(push, NV50_3D(VERTEX_BEGIN_GL), 1); PUSH_DATA (push, NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLES); BEGIN_NV04(push, NV50_3D(VTX_ATTR_3F_X(1)), 3); PUSH_DATAf(push, x0); PUSH_DATAf(push, y0); PUSH_DATAf(push, z); BEGIN_NV04(push, NV50_3D(VTX_ATTR_2F_X(0)), 2); PUSH_DATAf(push, 0.0f); PUSH_DATAf(push, 0.0f); BEGIN_NV04(push, NV50_3D(VTX_ATTR_3F_X(1)), 3); PUSH_DATAf(push, x1); PUSH_DATAf(push, y0); PUSH_DATAf(push, z); BEGIN_NV04(push, NV50_3D(VTX_ATTR_2F_X(0)), 2); PUSH_DATAf(push, 16384 << nv50_miptree(dst)->ms_x); PUSH_DATAf(push, 0.0f); BEGIN_NV04(push, NV50_3D(VTX_ATTR_3F_X(1)), 3); PUSH_DATAf(push, x0); PUSH_DATAf(push, y1); PUSH_DATAf(push, z); BEGIN_NV04(push, NV50_3D(VTX_ATTR_2F_X(0)), 2); PUSH_DATAf(push, 0.0f); PUSH_DATAf(push, 16384 << nv50_miptree(dst)->ms_y); BEGIN_NV04(push, NV50_3D(VERTEX_END_GL), 1); PUSH_DATA (push, 0); /* re-enable normally constant state */ BEGIN_NV04(push, NV50_3D(VIEWPORT_TRANSFORM_EN), 1); PUSH_DATA (push, 1); nv50_blitctx_post_blit(nv50, blit); }
void nv50_push_vbo(struct nv50_context *nv50, const struct pipe_draw_info *info) { struct push_context ctx; unsigned i, index_size; unsigned inst_count = info->instance_count; unsigned vert_count = info->count; boolean apply_bias = info->indexed && info->index_bias; ctx.push = nv50->base.pushbuf; ctx.translate = nv50->vertex->translate; ctx.packet_vertex_limit = nv50->vertex->packet_vertex_limit; ctx.vertex_words = nv50->vertex->vertex_size; for (i = 0; i < nv50->num_vtxbufs; ++i) { const struct pipe_vertex_buffer *vb = &nv50->vtxbuf[i]; const uint8_t *data; if (unlikely(vb->buffer)) data = nouveau_resource_map_offset(&nv50->base, nv04_resource(vb->buffer), vb->buffer_offset, NOUVEAU_BO_RD); else data = vb->user_buffer; if (apply_bias && likely(!(nv50->vertex->instance_bufs & (1 << i)))) data += (ptrdiff_t)info->index_bias * vb->stride; ctx.translate->set_buffer(ctx.translate, i, data, vb->stride, ~0); } if (info->indexed) { if (nv50->idxbuf.buffer) { ctx.idxbuf = nouveau_resource_map_offset(&nv50->base, nv04_resource(nv50->idxbuf.buffer), nv50->idxbuf.offset, NOUVEAU_BO_RD); } else { ctx.idxbuf = nv50->idxbuf.user_buffer; } if (!ctx.idxbuf) return; index_size = nv50->idxbuf.index_size; ctx.primitive_restart = info->primitive_restart; ctx.restart_index = info->restart_index; } else { if (unlikely(info->count_from_stream_output)) { struct pipe_context *pipe = &nv50->base.pipe; struct nv50_so_target *targ; targ = nv50_so_target(info->count_from_stream_output); if (!targ->pq) { NOUVEAU_ERR("draw_stream_output not supported on pre-NVA0 cards\n"); return; } pipe->get_query_result(pipe, targ->pq, TRUE, (void *)&vert_count); vert_count /= targ->stride; } ctx.idxbuf = NULL; index_size = 0; ctx.primitive_restart = FALSE; ctx.restart_index = 0; } ctx.instance_id = info->start_instance; ctx.prim = nv50_prim_gl(info->mode); if (info->primitive_restart) { BEGIN_NV04(ctx.push, NV50_3D(PRIM_RESTART_ENABLE), 2); PUSH_DATA (ctx.push, 1); PUSH_DATA (ctx.push, info->restart_index); } else if (nv50->state.prim_restart) { BEGIN_NV04(ctx.push, NV50_3D(PRIM_RESTART_ENABLE), 1); PUSH_DATA (ctx.push, 0); } nv50->state.prim_restart = info->primitive_restart; while (inst_count--) { BEGIN_NV04(ctx.push, NV50_3D(VERTEX_BEGIN_GL), 1); PUSH_DATA (ctx.push, ctx.prim); switch (index_size) { case 0: emit_vertices_seq(&ctx, info->start, vert_count); break; case 1: emit_vertices_i08(&ctx, info->start, vert_count); break; case 2: emit_vertices_i16(&ctx, info->start, vert_count); break; case 4: emit_vertices_i32(&ctx, info->start, vert_count); break; default: assert(0); break; } BEGIN_NV04(ctx.push, NV50_3D(VERTEX_END_GL), 1); PUSH_DATA (ctx.push, 0); ctx.instance_id++; ctx.prim |= NV50_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT; } }
static void nv50_screen_init_hwctx(struct nv50_screen *screen) { struct nouveau_pushbuf *push = screen->base.pushbuf; struct nv04_fifo *fifo; unsigned i; fifo = (struct nv04_fifo *)screen->base.channel->data; BEGIN_NV04(push, SUBC_M2MF(NV01_SUBCHAN_OBJECT), 1); PUSH_DATA (push, screen->m2mf->handle); BEGIN_NV04(push, SUBC_M2MF(NV03_M2MF_DMA_NOTIFY), 3); PUSH_DATA (push, screen->sync->handle); PUSH_DATA (push, fifo->vram); PUSH_DATA (push, fifo->vram); BEGIN_NV04(push, SUBC_2D(NV01_SUBCHAN_OBJECT), 1); PUSH_DATA (push, screen->eng2d->handle); BEGIN_NV04(push, NV50_2D(DMA_NOTIFY), 4); PUSH_DATA (push, screen->sync->handle); PUSH_DATA (push, fifo->vram); PUSH_DATA (push, fifo->vram); PUSH_DATA (push, fifo->vram); BEGIN_NV04(push, NV50_2D(OPERATION), 1); PUSH_DATA (push, NV50_2D_OPERATION_SRCCOPY); BEGIN_NV04(push, NV50_2D(CLIP_ENABLE), 1); PUSH_DATA (push, 0); BEGIN_NV04(push, NV50_2D(COLOR_KEY_ENABLE), 1); PUSH_DATA (push, 0); BEGIN_NV04(push, SUBC_2D(0x0888), 1); PUSH_DATA (push, 1); BEGIN_NV04(push, SUBC_3D(NV01_SUBCHAN_OBJECT), 1); PUSH_DATA (push, screen->tesla->handle); BEGIN_NV04(push, NV50_3D(COND_MODE), 1); PUSH_DATA (push, NV50_3D_COND_MODE_ALWAYS); BEGIN_NV04(push, NV50_3D(DMA_NOTIFY), 1); PUSH_DATA (push, screen->sync->handle); BEGIN_NV04(push, NV50_3D(DMA_ZETA), 11); for (i = 0; i < 11; ++i) PUSH_DATA(push, fifo->vram); BEGIN_NV04(push, NV50_3D(DMA_COLOR(0)), NV50_3D_DMA_COLOR__LEN); for (i = 0; i < NV50_3D_DMA_COLOR__LEN; ++i) PUSH_DATA(push, fifo->vram); BEGIN_NV04(push, NV50_3D(REG_MODE), 1); PUSH_DATA (push, NV50_3D_REG_MODE_STRIPED); BEGIN_NV04(push, NV50_3D(UNK1400_LANES), 1); PUSH_DATA (push, 0xf); if (debug_get_bool_option("NOUVEAU_SHADER_WATCHDOG", TRUE)) { BEGIN_NV04(push, NV50_3D(WATCHDOG_TIMER), 1); PUSH_DATA (push, 0x18); } BEGIN_NV04(push, NV50_3D(RT_CONTROL), 1); PUSH_DATA (push, 1); BEGIN_NV04(push, NV50_3D(CSAA_ENABLE), 1); PUSH_DATA (push, 0); BEGIN_NV04(push, NV50_3D(MULTISAMPLE_ENABLE), 1); PUSH_DATA (push, 0); BEGIN_NV04(push, NV50_3D(MULTISAMPLE_MODE), 1); PUSH_DATA (push, NV50_3D_MULTISAMPLE_MODE_MS1); BEGIN_NV04(push, NV50_3D(MULTISAMPLE_CTRL), 1); PUSH_DATA (push, 0); BEGIN_NV04(push, NV50_3D(LINE_LAST_PIXEL), 1); PUSH_DATA (push, 0); BEGIN_NV04(push, NV50_3D(BLEND_SEPARATE_ALPHA), 1); PUSH_DATA (push, 1); if (screen->tesla->oclass >= NVA0_3D_CLASS) { BEGIN_NV04(push, SUBC_3D(NVA0_3D_TEX_MISC), 1); PUSH_DATA (push, NVA0_3D_TEX_MISC_SEAMLESS_CUBE_MAP); } BEGIN_NV04(push, NV50_3D(SCREEN_Y_CONTROL), 1); PUSH_DATA (push, 0); BEGIN_NV04(push, NV50_3D(WINDOW_OFFSET_X), 2); PUSH_DATA (push, 0); PUSH_DATA (push, 0); BEGIN_NV04(push, NV50_3D(ZCULL_REGION), 1); PUSH_DATA (push, 0x3f); BEGIN_NV04(push, NV50_3D(VP_ADDRESS_HIGH), 2); PUSH_DATAh(push, screen->code->offset + (0 << NV50_CODE_BO_SIZE_LOG2)); PUSH_DATA (push, screen->code->offset + (0 << NV50_CODE_BO_SIZE_LOG2)); BEGIN_NV04(push, NV50_3D(FP_ADDRESS_HIGH), 2); PUSH_DATAh(push, screen->code->offset + (1 << NV50_CODE_BO_SIZE_LOG2)); PUSH_DATA (push, screen->code->offset + (1 << NV50_CODE_BO_SIZE_LOG2)); BEGIN_NV04(push, NV50_3D(GP_ADDRESS_HIGH), 2); PUSH_DATAh(push, screen->code->offset + (2 << NV50_CODE_BO_SIZE_LOG2)); PUSH_DATA (push, screen->code->offset + (2 << NV50_CODE_BO_SIZE_LOG2)); BEGIN_NV04(push, NV50_3D(LOCAL_ADDRESS_HIGH), 3); PUSH_DATAh(push, screen->tls_bo->offset); PUSH_DATA (push, screen->tls_bo->offset); PUSH_DATA (push, util_logbase2(screen->cur_tls_space / 8)); BEGIN_NV04(push, NV50_3D(STACK_ADDRESS_HIGH), 3); PUSH_DATAh(push, screen->stack_bo->offset); PUSH_DATA (push, screen->stack_bo->offset); PUSH_DATA (push, 4); BEGIN_NV04(push, NV50_3D(CB_DEF_ADDRESS_HIGH), 3); PUSH_DATAh(push, screen->uniforms->offset + (0 << 16)); PUSH_DATA (push, screen->uniforms->offset + (0 << 16)); PUSH_DATA (push, (NV50_CB_PVP << 16) | 0x0000); BEGIN_NV04(push, NV50_3D(CB_DEF_ADDRESS_HIGH), 3); PUSH_DATAh(push, screen->uniforms->offset + (1 << 16)); PUSH_DATA (push, screen->uniforms->offset + (1 << 16)); PUSH_DATA (push, (NV50_CB_PGP << 16) | 0x0000); BEGIN_NV04(push, NV50_3D(CB_DEF_ADDRESS_HIGH), 3); PUSH_DATAh(push, screen->uniforms->offset + (2 << 16)); PUSH_DATA (push, screen->uniforms->offset + (2 << 16)); PUSH_DATA (push, (NV50_CB_PFP << 16) | 0x0000); BEGIN_NV04(push, NV50_3D(CB_DEF_ADDRESS_HIGH), 3); PUSH_DATAh(push, screen->uniforms->offset + (3 << 16)); PUSH_DATA (push, screen->uniforms->offset + (3 << 16)); PUSH_DATA (push, (NV50_CB_AUX << 16) | 0x0200); BEGIN_NI04(push, NV50_3D(SET_PROGRAM_CB), 3); PUSH_DATA (push, (NV50_CB_AUX << 12) | 0xf01); PUSH_DATA (push, (NV50_CB_AUX << 12) | 0xf21); PUSH_DATA (push, (NV50_CB_AUX << 12) | 0xf31); /* return { 0.0, 0.0, 0.0, 0.0 } on out-of-bounds vtxbuf access */ BEGIN_NV04(push, NV50_3D(CB_ADDR), 1); PUSH_DATA (push, ((1 << 9) << 6) | NV50_CB_AUX); BEGIN_NI04(push, NV50_3D(CB_DATA(0)), 4); PUSH_DATAf(push, 0.0f); PUSH_DATAf(push, 0.0f); PUSH_DATAf(push, 0.0f); PUSH_DATAf(push, 0.0f); BEGIN_NV04(push, NV50_3D(VERTEX_RUNOUT_ADDRESS_HIGH), 2); PUSH_DATAh(push, screen->uniforms->offset + (3 << 16) + (1 << 9)); PUSH_DATA (push, screen->uniforms->offset + (3 << 16) + (1 << 9)); /* max TIC (bits 4:8) & TSC bindings, per program type */ for (i = 0; i < 3; ++i) { BEGIN_NV04(push, NV50_3D(TEX_LIMITS(i)), 1); PUSH_DATA (push, 0x54); } BEGIN_NV04(push, NV50_3D(TIC_ADDRESS_HIGH), 3); PUSH_DATAh(push, screen->txc->offset); PUSH_DATA (push, screen->txc->offset); PUSH_DATA (push, NV50_TIC_MAX_ENTRIES - 1); BEGIN_NV04(push, NV50_3D(TSC_ADDRESS_HIGH), 3); PUSH_DATAh(push, screen->txc->offset + 65536); PUSH_DATA (push, screen->txc->offset + 65536); PUSH_DATA (push, NV50_TSC_MAX_ENTRIES - 1); BEGIN_NV04(push, NV50_3D(LINKED_TSC), 1); PUSH_DATA (push, 0); BEGIN_NV04(push, NV50_3D(CLIP_RECTS_EN), 1); PUSH_DATA (push, 0); BEGIN_NV04(push, NV50_3D(CLIP_RECTS_MODE), 1); PUSH_DATA (push, NV50_3D_CLIP_RECTS_MODE_INSIDE_ANY); BEGIN_NV04(push, NV50_3D(CLIP_RECT_HORIZ(0)), 8 * 2); for (i = 0; i < 8 * 2; ++i) PUSH_DATA(push, 0); BEGIN_NV04(push, NV50_3D(CLIPID_ENABLE), 1); PUSH_DATA (push, 0); BEGIN_NV04(push, NV50_3D(VIEWPORT_TRANSFORM_EN), 1); PUSH_DATA (push, 1); BEGIN_NV04(push, NV50_3D(DEPTH_RANGE_NEAR(0)), 2); PUSH_DATAf(push, 0.0f); PUSH_DATAf(push, 1.0f); BEGIN_NV04(push, NV50_3D(VIEW_VOLUME_CLIP_CTRL), 1); #ifdef NV50_SCISSORS_CLIPPING PUSH_DATA (push, 0x0000); #else PUSH_DATA (push, 0x1080); #endif BEGIN_NV04(push, NV50_3D(CLEAR_FLAGS), 1); PUSH_DATA (push, NV50_3D_CLEAR_FLAGS_CLEAR_RECT_VIEWPORT); /* We use scissors instead of exact view volume clipping, * so they're always enabled. */ BEGIN_NV04(push, NV50_3D(SCISSOR_ENABLE(0)), 3); PUSH_DATA (push, 1); PUSH_DATA (push, 8192 << 16); PUSH_DATA (push, 8192 << 16); BEGIN_NV04(push, NV50_3D(RASTERIZE_ENABLE), 1); PUSH_DATA (push, 1); BEGIN_NV04(push, NV50_3D(POINT_RASTER_RULES), 1); PUSH_DATA (push, NV50_3D_POINT_RASTER_RULES_OGL); BEGIN_NV04(push, NV50_3D(FRAG_COLOR_CLAMP_EN), 1); PUSH_DATA (push, 0x11111111); BEGIN_NV04(push, NV50_3D(EDGEFLAG), 1); PUSH_DATA (push, 1); PUSH_KICK (push); }
bool nv50_program_upload_code(struct nv50_context *nv50, struct nv50_program *prog) { struct nouveau_heap *heap; int ret; uint32_t size = align(prog->code_size, 0x40); uint8_t prog_type; switch (prog->type) { case PIPE_SHADER_VERTEX: heap = nv50->screen->vp_code_heap; break; case PIPE_SHADER_GEOMETRY: heap = nv50->screen->gp_code_heap; break; case PIPE_SHADER_FRAGMENT: heap = nv50->screen->fp_code_heap; break; case PIPE_SHADER_COMPUTE: heap = nv50->screen->fp_code_heap; break; default: assert(!"invalid program type"); return false; } ret = nouveau_heap_alloc(heap, size, prog, &prog->mem); if (ret) { /* Out of space: evict everything to compactify the code segment, hoping * the working set is much smaller and drifts slowly. Improve me ! */ while (heap->next) { struct nv50_program *evict = heap->next->priv; if (evict) nouveau_heap_free(&evict->mem); } debug_printf("WARNING: out of code space, evicting all shaders.\n"); ret = nouveau_heap_alloc(heap, size, prog, &prog->mem); if (ret) { NOUVEAU_ERR("shader too large (0x%x) to fit in code space ?\n", size); return false; } } if (prog->type == PIPE_SHADER_COMPUTE) { /* CP code must be uploaded in FP code segment. */ prog_type = 1; } else { prog->code_base = prog->mem->start; prog_type = prog->type; } ret = nv50_tls_realloc(nv50->screen, prog->tls_space); if (ret < 0) { nouveau_heap_free(&prog->mem); return false; } if (ret > 0) nv50->state.new_tls_space = true; if (prog->fixups) nv50_ir_relocate_code(prog->fixups, prog->code, prog->code_base, 0, 0); if (prog->interps) nv50_ir_apply_fixups(prog->interps, prog->code, prog->fp.force_persample_interp, false /* flatshade */, prog->fp.alphatest - 1); nv50_sifc_linear_u8(&nv50->base, nv50->screen->code, (prog_type << NV50_CODE_BO_SIZE_LOG2) + prog->code_base, NOUVEAU_BO_VRAM, prog->code_size, prog->code); BEGIN_NV04(nv50->base.pushbuf, NV50_3D(CODE_CB_FLUSH), 1); PUSH_DATA (nv50->base.pushbuf, 0); return true; }
static void nv50_render_condition(struct pipe_context *pipe, struct pipe_query *pq, boolean condition, uint mode) { struct nv50_context *nv50 = nv50_context(pipe); struct nouveau_pushbuf *push = nv50->base.pushbuf; struct nv50_query *q; uint32_t cond; boolean wait = mode != PIPE_RENDER_COND_NO_WAIT && mode != PIPE_RENDER_COND_BY_REGION_NO_WAIT; if (!pq) { cond = NV50_3D_COND_MODE_ALWAYS; } else { q = nv50_query(pq); /* NOTE: comparison of 2 queries only works if both have completed */ switch (q->type) { case PIPE_QUERY_SO_OVERFLOW_PREDICATE: cond = condition ? NV50_3D_COND_MODE_EQUAL : NV50_3D_COND_MODE_NOT_EQUAL; wait = TRUE; break; case PIPE_QUERY_OCCLUSION_COUNTER: case PIPE_QUERY_OCCLUSION_PREDICATE: if (likely(!condition)) { /* XXX: Placeholder, handle nesting here if available */ if (unlikely(false)) cond = wait ? NV50_3D_COND_MODE_NOT_EQUAL : NV50_3D_COND_MODE_ALWAYS; else cond = NV50_3D_COND_MODE_RES_NON_ZERO; } else { cond = wait ? NV50_3D_COND_MODE_EQUAL : NV50_3D_COND_MODE_ALWAYS; } break; default: assert(!"render condition query not a predicate"); cond = NV50_3D_COND_MODE_ALWAYS; break; } } nv50->cond_query = pq; nv50->cond_cond = condition; nv50->cond_condmode = cond; nv50->cond_mode = mode; if (!pq) { PUSH_SPACE(push, 2); BEGIN_NV04(push, NV50_3D(COND_MODE), 1); PUSH_DATA (push, cond); return; } PUSH_SPACE(push, 9); if (wait) { BEGIN_NV04(push, SUBC_3D(NV50_GRAPH_SERIALIZE), 1); PUSH_DATA (push, 0); } PUSH_REFN (push, q->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD); BEGIN_NV04(push, NV50_3D(COND_ADDRESS_HIGH), 3); PUSH_DATAh(push, q->bo->offset + q->offset); PUSH_DATA (push, q->bo->offset + q->offset); PUSH_DATA (push, cond); BEGIN_NV04(push, NV50_2D(COND_ADDRESS_HIGH), 2); PUSH_DATAh(push, q->bo->offset + q->offset); PUSH_DATA (push, q->bo->offset + q->offset); }
static void nv50_hw_end_query(struct nv50_context *nv50, struct nv50_query *q) { struct nouveau_pushbuf *push = nv50->base.pushbuf; struct nv50_hw_query *hq = nv50_hw_query(q); hq->state = NV50_HW_QUERY_STATE_ENDED; switch (q->type) { case PIPE_QUERY_OCCLUSION_COUNTER: nv50_hw_query_get(push, q, 0, 0x0100f002); if (--nv50->screen->num_occlusion_queries_active == 0) { PUSH_SPACE(push, 2); BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1); PUSH_DATA (push, 0); } break; case PIPE_QUERY_PRIMITIVES_GENERATED: nv50_hw_query_get(push, q, 0, 0x06805002); break; case PIPE_QUERY_PRIMITIVES_EMITTED: nv50_hw_query_get(push, q, 0, 0x05805002); break; case PIPE_QUERY_SO_STATISTICS: nv50_hw_query_get(push, q, 0x00, 0x05805002); nv50_hw_query_get(push, q, 0x10, 0x06805002); break; case PIPE_QUERY_PIPELINE_STATISTICS: nv50_hw_query_get(push, q, 0x00, 0x00801002); /* VFETCH, VERTICES */ nv50_hw_query_get(push, q, 0x10, 0x01801002); /* VFETCH, PRIMS */ nv50_hw_query_get(push, q, 0x20, 0x02802002); /* VP, LAUNCHES */ nv50_hw_query_get(push, q, 0x30, 0x03806002); /* GP, LAUNCHES */ nv50_hw_query_get(push, q, 0x40, 0x04806002); /* GP, PRIMS_OUT */ nv50_hw_query_get(push, q, 0x50, 0x07804002); /* RAST, PRIMS_IN */ nv50_hw_query_get(push, q, 0x60, 0x08804002); /* RAST, PRIMS_OUT */ nv50_hw_query_get(push, q, 0x70, 0x0980a002); /* ROP, PIXELS */ break; case PIPE_QUERY_TIMESTAMP: hq->sequence++; /* fall through */ case PIPE_QUERY_TIME_ELAPSED: nv50_hw_query_get(push, q, 0, 0x00005002); break; case PIPE_QUERY_GPU_FINISHED: hq->sequence++; nv50_hw_query_get(push, q, 0, 0x1000f010); break; case NVA0_HW_QUERY_STREAM_OUTPUT_BUFFER_OFFSET: hq->sequence++; nv50_hw_query_get(push, q, 0, 0x0d005002 | (q->index << 5)); break; case PIPE_QUERY_TIMESTAMP_DISJOINT: /* This query is not issued on GPU because disjoint is forced to false */ hq->state = NV50_HW_QUERY_STATE_READY; break; default: assert(0); break; } if (hq->is64bit) nouveau_fence_ref(nv50->screen->base.fence.current, &hq->fence); }
static boolean nv50_hw_begin_query(struct nv50_context *nv50, struct nv50_query *q) { struct nouveau_pushbuf *push = nv50->base.pushbuf; struct nv50_hw_query *hq = nv50_hw_query(q); /* For occlusion queries we have to change the storage, because a previous * query might set the initial render condition to false even *after* we re- * initialized it to true. */ if (hq->rotate) { hq->offset += hq->rotate; hq->data += hq->rotate / sizeof(*hq->data); if (hq->offset - hq->base_offset == NV50_HW_QUERY_ALLOC_SPACE) nv50_hw_query_allocate(nv50, q, NV50_HW_QUERY_ALLOC_SPACE); /* XXX: can we do this with the GPU, and sync with respect to a previous * query ? */ hq->data[0] = hq->sequence; /* initialize sequence */ hq->data[1] = 1; /* initial render condition = true */ hq->data[4] = hq->sequence + 1; /* for comparison COND_MODE */ hq->data[5] = 0; } if (!hq->is64bit) hq->data[0] = hq->sequence++; /* the previously used one */ switch (q->type) { case PIPE_QUERY_OCCLUSION_COUNTER: hq->nesting = nv50->screen->num_occlusion_queries_active++; if (hq->nesting) { nv50_hw_query_get(push, q, 0x10, 0x0100f002); } else { PUSH_SPACE(push, 4); BEGIN_NV04(push, NV50_3D(COUNTER_RESET), 1); PUSH_DATA (push, NV50_3D_COUNTER_RESET_SAMPLECNT); BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1); PUSH_DATA (push, 1); } break; case PIPE_QUERY_PRIMITIVES_GENERATED: nv50_hw_query_get(push, q, 0x10, 0x06805002); break; case PIPE_QUERY_PRIMITIVES_EMITTED: nv50_hw_query_get(push, q, 0x10, 0x05805002); break; case PIPE_QUERY_SO_STATISTICS: nv50_hw_query_get(push, q, 0x20, 0x05805002); nv50_hw_query_get(push, q, 0x30, 0x06805002); break; case PIPE_QUERY_PIPELINE_STATISTICS: nv50_hw_query_get(push, q, 0x80, 0x00801002); /* VFETCH, VERTICES */ nv50_hw_query_get(push, q, 0x90, 0x01801002); /* VFETCH, PRIMS */ nv50_hw_query_get(push, q, 0xa0, 0x02802002); /* VP, LAUNCHES */ nv50_hw_query_get(push, q, 0xb0, 0x03806002); /* GP, LAUNCHES */ nv50_hw_query_get(push, q, 0xc0, 0x04806002); /* GP, PRIMS_OUT */ nv50_hw_query_get(push, q, 0xd0, 0x07804002); /* RAST, PRIMS_IN */ nv50_hw_query_get(push, q, 0xe0, 0x08804002); /* RAST, PRIMS_OUT */ nv50_hw_query_get(push, q, 0xf0, 0x0980a002); /* ROP, PIXELS */ break; case PIPE_QUERY_TIME_ELAPSED: nv50_hw_query_get(push, q, 0x10, 0x00005002); break; default: assert(0); return false; } hq->state = NV50_HW_QUERY_STATE_ACTIVE; return true; }
static void nv50_render_condition(struct pipe_context *pipe, struct pipe_query *pq, boolean condition, enum pipe_render_cond_flag mode) { struct nv50_context *nv50 = nv50_context(pipe); struct nouveau_pushbuf *push = nv50->base.pushbuf; struct nv50_query *q = nv50_query(pq); struct nv50_hw_query *hq = nv50_hw_query(q); uint32_t cond; bool wait = mode != PIPE_RENDER_COND_NO_WAIT && mode != PIPE_RENDER_COND_BY_REGION_NO_WAIT; if (!pq) { cond = NV50_3D_COND_MODE_ALWAYS; } else { /* NOTE: comparison of 2 queries only works if both have completed */ switch (q->type) { case PIPE_QUERY_SO_OVERFLOW_PREDICATE: cond = condition ? NV50_3D_COND_MODE_EQUAL : NV50_3D_COND_MODE_NOT_EQUAL; wait = true; break; case PIPE_QUERY_OCCLUSION_COUNTER: case PIPE_QUERY_OCCLUSION_PREDICATE: case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: if (hq->state == NV50_HW_QUERY_STATE_READY) wait = true; if (likely(!condition)) { cond = wait ? NV50_3D_COND_MODE_NOT_EQUAL : NV50_3D_COND_MODE_ALWAYS; } else { cond = wait ? NV50_3D_COND_MODE_EQUAL : NV50_3D_COND_MODE_ALWAYS; } break; default: assert(!"render condition query not a predicate"); cond = NV50_3D_COND_MODE_ALWAYS; break; } } nv50->cond_query = pq; nv50->cond_cond = condition; nv50->cond_condmode = cond; nv50->cond_mode = mode; if (!pq) { PUSH_SPACE(push, 2); BEGIN_NV04(push, NV50_3D(COND_MODE), 1); PUSH_DATA (push, cond); return; } PUSH_SPACE(push, 9); if (wait && hq->state != NV50_HW_QUERY_STATE_READY) { BEGIN_NV04(push, SUBC_3D(NV50_GRAPH_SERIALIZE), 1); PUSH_DATA (push, 0); } PUSH_REFN (push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD); BEGIN_NV04(push, NV50_3D(COND_ADDRESS_HIGH), 3); PUSH_DATAh(push, hq->bo->offset + hq->offset); PUSH_DATA (push, hq->bo->offset + hq->offset); PUSH_DATA (push, cond); BEGIN_NV04(push, NV50_2D(COND_ADDRESS_HIGH), 2); PUSH_DATAh(push, hq->bo->offset + hq->offset); PUSH_DATA (push, hq->bo->offset + hq->offset); }
static boolean nv50_query_begin(struct pipe_context *pipe, struct pipe_query *pq) { struct nv50_context *nv50 = nv50_context(pipe); struct nouveau_pushbuf *push = nv50->base.pushbuf; struct nv50_query *q = nv50_query(pq); /* For occlusion queries we have to change the storage, because a previous * query might set the initial render conition to FALSE even *after* we re- * initialized it to TRUE. */ if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) { q->offset += 32; q->data += 32 / sizeof(*q->data); if (q->offset - q->base == NV50_QUERY_ALLOC_SPACE) nv50_query_allocate(nv50, q, NV50_QUERY_ALLOC_SPACE); /* XXX: can we do this with the GPU, and sync with respect to a previous * query ? */ q->data[0] = q->sequence; /* initialize sequence */ q->data[1] = 1; /* initial render condition = TRUE */ q->data[4] = q->sequence + 1; /* for comparison COND_MODE */ q->data[5] = 0; } if (!q->is64bit) q->data[0] = q->sequence++; /* the previously used one */ switch (q->type) { case PIPE_QUERY_OCCLUSION_COUNTER: PUSH_SPACE(push, 4); BEGIN_NV04(push, NV50_3D(COUNTER_RESET), 1); PUSH_DATA (push, NV50_3D_COUNTER_RESET_SAMPLECNT); BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1); PUSH_DATA (push, 1); break; case PIPE_QUERY_PRIMITIVES_GENERATED: nv50_query_get(push, q, 0x10, 0x06805002); break; case PIPE_QUERY_PRIMITIVES_EMITTED: nv50_query_get(push, q, 0x10, 0x05805002); break; case PIPE_QUERY_SO_STATISTICS: nv50_query_get(push, q, 0x20, 0x05805002); nv50_query_get(push, q, 0x30, 0x06805002); break; case PIPE_QUERY_PIPELINE_STATISTICS: nv50_query_get(push, q, 0x80, 0x00801002); /* VFETCH, VERTICES */ nv50_query_get(push, q, 0x90, 0x01801002); /* VFETCH, PRIMS */ nv50_query_get(push, q, 0xa0, 0x02802002); /* VP, LAUNCHES */ nv50_query_get(push, q, 0xb0, 0x03806002); /* GP, LAUNCHES */ nv50_query_get(push, q, 0xc0, 0x04806002); /* GP, PRIMS_OUT */ nv50_query_get(push, q, 0xd0, 0x07804002); /* RAST, PRIMS_IN */ nv50_query_get(push, q, 0xe0, 0x08804002); /* RAST, PRIMS_OUT */ nv50_query_get(push, q, 0xf0, 0x0980a002); /* ROP, PIXELS */ break; case PIPE_QUERY_TIME_ELAPSED: nv50_query_get(push, q, 0x10, 0x00005002); break; default: break; } q->ready = FALSE; return true; }
static boolean nv50_validate_tic(struct nv50_context *nv50, int s) { struct nouveau_pushbuf *push = nv50->base.pushbuf; struct nouveau_bo *txc = nv50->screen->txc; unsigned i; boolean need_flush = FALSE; assert(nv50->num_textures[s] <= PIPE_MAX_SAMPLERS); for (i = 0; i < nv50->num_textures[s]; ++i) { struct nv50_tic_entry *tic = nv50_tic_entry(nv50->textures[s][i]); struct nv04_resource *res; if (!tic) { BEGIN_NV04(push, NV50_3D(BIND_TIC(s)), 1); PUSH_DATA (push, (i << 1) | 0); continue; } res = &nv50_miptree(tic->pipe.texture)->base; if (tic->id < 0) { tic->id = nv50_screen_tic_alloc(nv50->screen, tic); BEGIN_NV04(push, NV50_2D(DST_FORMAT), 2); PUSH_DATA (push, NV50_SURFACE_FORMAT_R8_UNORM); PUSH_DATA (push, 1); BEGIN_NV04(push, NV50_2D(DST_PITCH), 5); PUSH_DATA (push, 262144); PUSH_DATA (push, 65536); PUSH_DATA (push, 1); PUSH_DATAh(push, txc->offset); PUSH_DATA (push, txc->offset); BEGIN_NV04(push, NV50_2D(SIFC_BITMAP_ENABLE), 2); PUSH_DATA (push, 0); PUSH_DATA (push, NV50_SURFACE_FORMAT_R8_UNORM); BEGIN_NV04(push, NV50_2D(SIFC_WIDTH), 10); PUSH_DATA (push, 32); PUSH_DATA (push, 1); PUSH_DATA (push, 0); PUSH_DATA (push, 1); PUSH_DATA (push, 0); PUSH_DATA (push, 1); PUSH_DATA (push, 0); PUSH_DATA (push, tic->id * 32); PUSH_DATA (push, 0); PUSH_DATA (push, 0); BEGIN_NI04(push, NV50_2D(SIFC_DATA), 8); PUSH_DATAp(push, &tic->tic[0], 8); need_flush = TRUE; } else if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) { BEGIN_NV04(push, NV50_3D(TEX_CACHE_CTL), 1); PUSH_DATA (push, 0x20); } nv50->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32); res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING; res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING; BCTX_REFN(nv50->bufctx_3d, TEXTURES, res, RD); BEGIN_NV04(push, NV50_3D(BIND_TIC(s)), 1); PUSH_DATA (push, (tic->id << 9) | (i << 1) | 1); } for (; i < nv50->state.num_textures[s]; ++i) { BEGIN_NV04(push, NV50_3D(BIND_TIC(s)), 1); PUSH_DATA (push, (i << 1) | 0); } if (nv50->num_textures[s]) { BEGIN_NV04(push, NV50_3D(CB_ADDR), 1); PUSH_DATA (push, (NV50_CB_AUX_TEX_MS_OFFSET << (8 - 2)) | NV50_CB_AUX); BEGIN_NI04(push, NV50_3D(CB_DATA(0)), nv50->num_textures[s] * 2); for (i = 0; i < nv50->num_textures[s]; i++) { struct nv50_tic_entry *tic = nv50_tic_entry(nv50->textures[s][i]); struct nv50_miptree *res; if (!tic) { PUSH_DATA (push, 0); PUSH_DATA (push, 0); continue; } res = nv50_miptree(tic->pipe.texture); PUSH_DATA (push, res->ms_x); PUSH_DATA (push, res->ms_y); } } nv50->state.num_textures[s] = nv50->num_textures[s]; return need_flush; }