static void nvc0_texture_barrier(struct pipe_context *pipe) { struct nouveau_pushbuf *push = nvc0_context(pipe)->base.pushbuf; IMMED_NVC0(push, NVC0_3D(SERIALIZE), 0); IMMED_NVC0(push, NVC0_3D(TEX_CACHE_CTL), 0); }
static void nvc0_memory_barrier(struct pipe_context *pipe, unsigned flags) { struct nvc0_context *nvc0 = nvc0_context(pipe); struct nouveau_pushbuf *push = nvc0->base.pushbuf; int i, s; if (flags & PIPE_BARRIER_MAPPED_BUFFER) { for (i = 0; i < nvc0->num_vtxbufs; ++i) { if (!nvc0->vtxbuf[i].buffer) continue; if (nvc0->vtxbuf[i].buffer->flags & PIPE_RESOURCE_FLAG_MAP_PERSISTENT) nvc0->base.vbo_dirty = true; } if (nvc0->idxbuf.buffer && nvc0->idxbuf.buffer->flags & PIPE_RESOURCE_FLAG_MAP_PERSISTENT) nvc0->base.vbo_dirty = true; for (s = 0; s < 5 && !nvc0->cb_dirty; ++s) { uint32_t valid = nvc0->constbuf_valid[s]; while (valid && !nvc0->cb_dirty) { const unsigned i = ffs(valid) - 1; struct pipe_resource *res; valid &= ~(1 << i); if (nvc0->constbuf[s][i].user) continue; res = nvc0->constbuf[s][i].u.buf; if (!res) continue; if (res->flags & PIPE_RESOURCE_FLAG_MAP_PERSISTENT) nvc0->cb_dirty = true; } } } else { /* Pretty much any writing by shaders needs a serialize after * it. Especially when moving between 3d and compute pipelines, but even * without that. */ IMMED_NVC0(push, NVC0_3D(SERIALIZE), 0); } /* If we're going to texture from a buffer/image written by a shader, we * must flush the texture cache. */ if (flags & PIPE_BARRIER_TEXTURE) IMMED_NVC0(push, NVC0_3D(TEX_CACHE_CTL), 0); if (flags & PIPE_BARRIER_CONSTANT_BUFFER) nvc0->cb_dirty = true; if (flags & (PIPE_BARRIER_VERTEX_BUFFER | PIPE_BARRIER_INDEX_BUFFER)) nvc0->base.vbo_dirty = true; }
static void disp_vertices_i32(struct push_context *ctx, unsigned start, unsigned count) { struct nouveau_pushbuf *push = ctx->push; struct translate *translate = ctx->translate; const uint32_t *restrict elts = (uint32_t *)ctx->idxbuf + start; unsigned pos = 0; do { unsigned nR = count; if (unlikely(ctx->prim_restart)) nR = prim_restart_search_i32(elts, nR, ctx->restart_index); translate->run_elts(translate, elts, nR, 0, ctx->instance_id, ctx->dest); count -= nR; ctx->dest += nR * ctx->vertex_size; while (nR) { unsigned nE = nR; if (unlikely(ctx->edgeflag.enabled)) nE = ef_toggle_search_i32(ctx, elts, nR); PUSH_SPACE(push, 4); if (likely(nE >= 2)) { BEGIN_NVC0(push, NVC0_3D(VERTEX_BUFFER_FIRST), 2); PUSH_DATA (push, pos); PUSH_DATA (push, nE); } else if (nE) { if (pos <= 0xff) { IMMED_NVC0(push, NVC0_3D(VB_ELEMENT_U32), pos); } else { BEGIN_NVC0(push, NVC0_3D(VB_ELEMENT_U32), 1); PUSH_DATA (push, pos); } } if (unlikely(nE != nR)) IMMED_NVC0(push, NVC0_3D(EDGEFLAG), ef_toggle(ctx)); pos += nE; elts += nE; nR -= nE; } if (count) { BEGIN_NVC0(push, NVC0_3D(VB_ELEMENT_U32), 1); PUSH_DATA (push, ctx->restart_index); ++elts; ctx->dest += ctx->vertex_size; ++pos; --count; } } while (count); }
void nvc0_tctlprog_validate(struct nvc0_context *nvc0) { struct nouveau_pushbuf *push = nvc0->base.pushbuf; struct nvc0_program *tp = nvc0->tctlprog; if (tp && nvc0_program_validate(nvc0, tp)) { if (tp->tp.tess_mode != ~0) { BEGIN_NVC0(push, NVC0_3D(TESS_MODE), 1); PUSH_DATA (push, tp->tp.tess_mode); } BEGIN_NVC0(push, NVC0_3D(SP_SELECT(2)), 2); PUSH_DATA (push, 0x21); PUSH_DATA (push, tp->code_base); BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(2)), 1); PUSH_DATA (push, tp->max_gpr); if (tp->tp.input_patch_size <= 32) IMMED_NVC0(push, NVC0_3D(PATCH_VERTICES), tp->tp.input_patch_size); } else { BEGIN_NVC0(push, NVC0_3D(SP_SELECT(2)), 1); PUSH_DATA (push, 0x20); } nvc0_program_update_context_state(nvc0, tp, 1); }
void nvc0_fragprog_validate(struct nvc0_context *nvc0) { struct nouveau_pushbuf *push = nvc0->base.pushbuf; struct nvc0_program *fp = nvc0->fragprog; if (!nvc0_program_validate(nvc0, fp)) return; nvc0_program_update_context_state(nvc0, fp, 4); if (fp->fp.early_z != nvc0->state.early_z_forced) { nvc0->state.early_z_forced = fp->fp.early_z; IMMED_NVC0(push, NVC0_3D(FORCE_EARLY_FRAGMENT_TESTS), fp->fp.early_z); } BEGIN_NVC0(push, NVC0_3D(SP_SELECT(5)), 2); PUSH_DATA (push, 0x51); PUSH_DATA (push, fp->code_base); BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(5)), 1); PUSH_DATA (push, fp->max_gpr); BEGIN_NVC0(push, SUBC_3D(0x0360), 2); PUSH_DATA (push, 0x20164010); PUSH_DATA (push, 0x20); BEGIN_NVC0(push, NVC0_3D(ZCULL_TEST_MASK), 1); PUSH_DATA (push, fp->flags[0]); }
static void disp_vertices_seq(struct push_context *ctx, unsigned start, unsigned count) { struct nouveau_pushbuf *push = ctx->push; struct translate *translate = ctx->translate; unsigned pos = 0; translate->run(translate, start, count, 0, ctx->instance_id, ctx->dest); do { unsigned nr = count; if (unlikely(ctx->edgeflag.enabled)) nr = ef_toggle_search_seq(ctx, start + pos, nr); PUSH_SPACE(push, 4); if (likely(nr)) { BEGIN_NVC0(push, NVC0_3D(VERTEX_BUFFER_FIRST), 2); PUSH_DATA (push, pos); PUSH_DATA (push, nr); } if (unlikely(nr != count)) IMMED_NVC0(push, NVC0_3D(EDGEFLAG), ef_toggle(ctx)); pos += nr; count -= nr; } while (count); }
void nvc0_gmtyprog_validate(struct nvc0_context *nvc0) { struct nouveau_pushbuf *push = nvc0->base.pushbuf; struct nvc0_program *gp = nvc0->gmtyprog; if (gp) nvc0_program_validate(nvc0, gp); /* we allow GPs with no code for specifying stream output state only */ if (gp && gp->code_size) { const boolean gp_selects_layer = gp->hdr[13] & (1 << 9); BEGIN_NVC0(push, NVC0_3D(MACRO_GP_SELECT), 1); PUSH_DATA (push, 0x41); BEGIN_NVC0(push, NVC0_3D(SP_START_ID(4)), 1); PUSH_DATA (push, gp->code_base); BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(4)), 1); PUSH_DATA (push, gp->max_gpr); BEGIN_NVC0(push, NVC0_3D(LAYER), 1); PUSH_DATA (push, gp_selects_layer ? NVC0_3D_LAYER_USE_GP : 0); } else { IMMED_NVC0(push, NVC0_3D(LAYER), 0); BEGIN_NVC0(push, NVC0_3D(MACRO_GP_SELECT), 1); PUSH_DATA (push, 0x40); } nvc0_program_update_context_state(nvc0, gp, 3); }
static void nvc0_render_condition(struct pipe_context *pipe, struct pipe_query *pq, boolean condition, uint mode) { struct nvc0_context *nvc0 = nvc0_context(pipe); struct nouveau_pushbuf *push = nvc0->base.pushbuf; struct nvc0_query *q; uint32_t cond; boolean negated = FALSE; boolean wait = mode != PIPE_RENDER_COND_NO_WAIT && mode != PIPE_RENDER_COND_BY_REGION_NO_WAIT; nvc0->cond_query = pq; nvc0->cond_cond = condition; nvc0->cond_mode = mode; if (!pq) { PUSH_SPACE(push, 1); IMMED_NVC0(push, NVC0_3D(COND_MODE), NVC0_3D_COND_MODE_ALWAYS); return; } q = nvc0_query(pq); /* NOTE: comparison of 2 queries only works if both have completed */ switch (q->type) { case PIPE_QUERY_SO_OVERFLOW_PREDICATE: cond = negated ? NVC0_3D_COND_MODE_EQUAL : NVC0_3D_COND_MODE_NOT_EQUAL; wait = TRUE; break; case PIPE_QUERY_OCCLUSION_COUNTER: case PIPE_QUERY_OCCLUSION_PREDICATE: if (likely(!negated)) { if (unlikely(q->nesting)) cond = wait ? NVC0_3D_COND_MODE_NOT_EQUAL : NVC0_3D_COND_MODE_ALWAYS; else cond = NVC0_3D_COND_MODE_RES_NON_ZERO; } else { cond = wait ? NVC0_3D_COND_MODE_EQUAL : NVC0_3D_COND_MODE_ALWAYS; } break; default: assert(!"render condition query not a predicate"); mode = NVC0_3D_COND_MODE_ALWAYS; break; } if (wait) nvc0_query_fifo_wait(push, pq); PUSH_SPACE(push, 4); PUSH_REFN (push, q->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD); BEGIN_NVC0(push, NVC0_3D(COND_ADDRESS_HIGH), 3); PUSH_DATAh(push, q->bo->offset + q->offset); PUSH_DATA (push, q->bo->offset + q->offset); PUSH_DATA (push, cond); }
static void nvc0_memory_barrier(struct pipe_context *pipe, unsigned flags) { struct nvc0_context *nvc0 = nvc0_context(pipe); struct nouveau_pushbuf *push = nvc0->base.pushbuf; int i, s; if (flags & PIPE_BARRIER_MAPPED_BUFFER) { for (i = 0; i < nvc0->num_vtxbufs; ++i) { if (!nvc0->vtxbuf[i].buffer) continue; if (nvc0->vtxbuf[i].buffer->flags & PIPE_RESOURCE_FLAG_MAP_PERSISTENT) nvc0->base.vbo_dirty = true; } if (nvc0->idxbuf.buffer && nvc0->idxbuf.buffer->flags & PIPE_RESOURCE_FLAG_MAP_PERSISTENT) nvc0->base.vbo_dirty = true; for (s = 0; s < 5 && !nvc0->cb_dirty; ++s) { uint32_t valid = nvc0->constbuf_valid[s]; while (valid && !nvc0->cb_dirty) { const unsigned i = ffs(valid) - 1; struct pipe_resource *res; valid &= ~(1 << i); if (nvc0->constbuf[s][i].user) continue; res = nvc0->constbuf[s][i].u.buf; if (!res) continue; if (res->flags & PIPE_RESOURCE_FLAG_MAP_PERSISTENT) nvc0->cb_dirty = true; } } } if (flags & (PIPE_BARRIER_SHADER_BUFFER | PIPE_BARRIER_CONSTANT_BUFFER | PIPE_BARRIER_INDEX_BUFFER | PIPE_BARRIER_IMAGE | PIPE_BARRIER_TEXTURE | PIPE_BARRIER_VERTEX_BUFFER | PIPE_BARRIER_STREAMOUT_BUFFER)) { IMMED_NVC0(push, NVC0_3D(MEM_BARRIER), 0x1011); } }
void nvc0_so_target_save_offset(struct pipe_context *pipe, struct pipe_stream_output_target *ptarg, unsigned index, boolean *serialize) { struct nvc0_so_target *targ = nvc0_so_target(ptarg); if (*serialize) { *serialize = FALSE; PUSH_SPACE(nvc0_context(pipe)->base.pushbuf, 1); IMMED_NVC0(nvc0_context(pipe)->base.pushbuf, NVC0_3D(SERIALIZE), 0); } nvc0_query(targ->pq)->index = index; nvc0_query_end(pipe, targ->pq); }
static void nvc0_so_target_save_offset(struct pipe_context *pipe, struct pipe_stream_output_target *ptarg, unsigned index, bool *serialize) { struct nvc0_so_target *targ = nvc0_so_target(ptarg); if (*serialize) { *serialize = false; PUSH_SPACE(nvc0_context(pipe)->base.pushbuf, 1); IMMED_NVC0(nvc0_context(pipe)->base.pushbuf, NVC0_3D(SERIALIZE), 0); NOUVEAU_DRV_STAT(nouveau_screen(pipe->screen), gpu_serialize_count, 1); } nvc0_query(targ->pq)->index = index; pipe->end_query(pipe, targ->pq); }
static int nvc0_2d_texture_do_copy(struct nouveau_pushbuf *push, struct nv50_miptree *dst, unsigned dst_level, unsigned dx, unsigned dy, unsigned dz, struct nv50_miptree *src, unsigned src_level, unsigned sx, unsigned sy, unsigned sz, unsigned w, unsigned h) { const enum pipe_format dfmt = dst->base.base.format; const enum pipe_format sfmt = src->base.base.format; int ret; boolean eqfmt = dfmt == sfmt; if (!PUSH_SPACE(push, 2 * 16 + 32)) return PIPE_ERROR; ret = nvc0_2d_texture_set(push, TRUE, dst, dst_level, dz, dfmt, eqfmt); if (ret) return ret; ret = nvc0_2d_texture_set(push, FALSE, src, src_level, sz, sfmt, eqfmt); if (ret) return ret; IMMED_NVC0(push, NVC0_2D(BLIT_CONTROL), 0x00); BEGIN_NVC0(push, NVC0_2D(BLIT_DST_X), 4); PUSH_DATA (push, dx << dst->ms_x); PUSH_DATA (push, dy << dst->ms_y); PUSH_DATA (push, w << dst->ms_x); PUSH_DATA (push, h << dst->ms_y); BEGIN_NVC0(push, NVC0_2D(BLIT_DU_DX_FRACT), 4); PUSH_DATA (push, 0); PUSH_DATA (push, 1); PUSH_DATA (push, 0); PUSH_DATA (push, 1); BEGIN_NVC0(push, NVC0_2D(BLIT_SRC_X_FRACT), 4); PUSH_DATA (push, 0); PUSH_DATA (push, sx << src->ms_x); PUSH_DATA (push, 0); PUSH_DATA (push, sy << src->ms_x); return 0; }
void nvc0_push_vbo(struct nvc0_context *nvc0, const struct pipe_draw_info *info) { struct push_context ctx; unsigned i, index_size; unsigned inst_count = info->instance_count; unsigned vert_count = info->count; unsigned prim; nvc0_push_context_init(nvc0, &ctx); nvc0_vertex_configure_translate(nvc0, info->index_bias); if (nvc0->state.index_bias) { /* this is already taken care of by translate */ IMMED_NVC0(ctx.push, NVC0_3D(VB_ELEMENT_BASE), 0); nvc0->state.index_bias = 0; } if (unlikely(ctx.edgeflag.enabled)) nvc0_push_map_edgeflag(&ctx, nvc0, info->index_bias); ctx.prim_restart = info->primitive_restart; ctx.restart_index = info->restart_index; if (info->primitive_restart) { /* NOTE: I hope we won't ever need that last index (~0). * If we do, we have to disable primitive restart here always and * use END,BEGIN to restart. (XXX: would that affect PrimitiveID ?) * We could also deactive PRIM_RESTART_WITH_DRAW_ARRAYS temporarily, * and add manual restart to disp_vertices_seq. */ BEGIN_NVC0(ctx.push, NVC0_3D(PRIM_RESTART_ENABLE), 2); PUSH_DATA (ctx.push, 1); PUSH_DATA (ctx.push, info->indexed ? 0xffffffff : info->restart_index); } else if (nvc0->state.prim_restart) { IMMED_NVC0(ctx.push, NVC0_3D(PRIM_RESTART_ENABLE), 0); } nvc0->state.prim_restart = info->primitive_restart; if (info->indexed) { nvc0_push_map_idxbuf(&ctx, nvc0); index_size = nvc0->idxbuf.index_size; } else { if (unlikely(info->count_from_stream_output)) { struct pipe_context *pipe = &nvc0->base.pipe; struct nvc0_so_target *targ; targ = nvc0_so_target(info->count_from_stream_output); pipe->get_query_result(pipe, targ->pq, true, (void *)&vert_count); vert_count /= targ->stride; } ctx.idxbuf = NULL; /* shut up warnings */ index_size = 0; } ctx.instance_id = info->start_instance; prim = nvc0_prim_gl(info->mode); do { PUSH_SPACE(ctx.push, 9); ctx.dest = nvc0_push_setup_vertex_array(nvc0, vert_count); if (unlikely(!ctx.dest)) break; if (unlikely(ctx.need_vertex_id)) nvc0_push_upload_vertex_ids(&ctx, nvc0, info); if (nvc0->screen->eng3d->oclass < GM107_3D_CLASS) IMMED_NVC0(ctx.push, NVC0_3D(VERTEX_ARRAY_FLUSH), 0); BEGIN_NVC0(ctx.push, NVC0_3D(VERTEX_BEGIN_GL), 1); PUSH_DATA (ctx.push, prim); switch (index_size) { case 1: disp_vertices_i08(&ctx, info->start, vert_count); break; case 2: disp_vertices_i16(&ctx, info->start, vert_count); break; case 4: disp_vertices_i32(&ctx, info->start, vert_count); break; default: assert(index_size == 0); disp_vertices_seq(&ctx, info->start, vert_count); break; } PUSH_SPACE(ctx.push, 1); IMMED_NVC0(ctx.push, NVC0_3D(VERTEX_END_GL), 0); if (--inst_count) { prim |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT; ++ctx.instance_id; } nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_VTX_TMP); nouveau_scratch_done(&nvc0->base); } while (inst_count); /* reset state and unmap buffers (no-op) */ if (unlikely(!ctx.edgeflag.value)) { PUSH_SPACE(ctx.push, 1); IMMED_NVC0(ctx.push, NVC0_3D(EDGEFLAG), 1); } if (unlikely(ctx.need_vertex_id)) { PUSH_SPACE(ctx.push, 4); IMMED_NVC0(ctx.push, NVC0_3D(VERTEX_ID_REPLACE), 0); BEGIN_NVC0(ctx.push, NVC0_3D(VERTEX_ATTRIB_FORMAT(1)), 1); PUSH_DATA (ctx.push, NVC0_3D_VERTEX_ATTRIB_FORMAT_CONST | NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_FLOAT | NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32); IMMED_NVC0(ctx.push, NVC0_3D(VERTEX_ARRAY_FETCH(1)), 0); } if (info->indexed) nouveau_resource_unmap(nv04_resource(nvc0->idxbuf.buffer)); for (i = 0; i < nvc0->num_vtxbufs; ++i) nouveau_resource_unmap(nv04_resource(nvc0->vtxbuf[i].buffer)); NOUVEAU_DRV_STAT(&nvc0->screen->base, draw_calls_fallback_count, 1); }
static void nvc0_push_upload_vertex_ids(struct push_context *ctx, struct nvc0_context *nvc0, const struct pipe_draw_info *info) { struct nouveau_pushbuf *push = ctx->push; struct nouveau_bo *bo; uint64_t va; uint32_t *data; uint32_t format; unsigned index_size = nvc0->idxbuf.index_size; unsigned i; unsigned a = nvc0->vertex->num_elements; if (!index_size || info->index_bias) index_size = 4; data = (uint32_t *)nouveau_scratch_get(&nvc0->base, info->count * index_size, &va, &bo); BCTX_REFN_bo(nvc0->bufctx_3d, VTX_TMP, NOUVEAU_BO_GART | NOUVEAU_BO_RD, bo); nouveau_pushbuf_validate(push); if (info->indexed) { if (!info->index_bias) { memcpy(data, ctx->idxbuf, info->count * index_size); } else { switch (nvc0->idxbuf.index_size) { case 1: copy_indices_u8(data, ctx->idxbuf, info->index_bias, info->count); break; case 2: copy_indices_u16(data, ctx->idxbuf, info->index_bias, info->count); break; default: copy_indices_u32(data, ctx->idxbuf, info->index_bias, info->count); break; } } } else { for (i = 0; i < info->count; ++i) data[i] = i + (info->start + info->index_bias); } format = (1 << NVC0_3D_VERTEX_ATTRIB_FORMAT_BUFFER__SHIFT) | NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_UINT; switch (index_size) { case 1: format |= NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_8; break; case 2: format |= NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_16; break; default: format |= NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32; break; } PUSH_SPACE(push, 12); if (unlikely(nvc0->state.instance_elts & 2)) { nvc0->state.instance_elts &= ~2; IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_PER_INSTANCE(1)), 0); } BEGIN_NVC0(push, NVC0_3D(VERTEX_ATTRIB_FORMAT(a)), 1); PUSH_DATA (push, format); BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(1)), 3); PUSH_DATA (push, NVC0_3D_VERTEX_ARRAY_FETCH_ENABLE | index_size); PUSH_DATAh(push, va); PUSH_DATA (push, va); BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(1)), 2); PUSH_DATAh(push, va + info->count * index_size - 1); PUSH_DATA (push, va + info->count * index_size - 1); #define NVC0_3D_VERTEX_ID_REPLACE_SOURCE_ATTR_X(a) \ (((0x80 + (a) * 0x10) / 4) << NVC0_3D_VERTEX_ID_REPLACE_SOURCE__SHIFT) BEGIN_NVC0(push, NVC0_3D(VERTEX_ID_REPLACE), 1); PUSH_DATA (push, NVC0_3D_VERTEX_ID_REPLACE_SOURCE_ATTR_X(a) | 1); }
void nvc0_push_vbo(struct nvc0_context *nvc0, const struct pipe_draw_info *info) { struct push_context ctx; unsigned i, index_size; unsigned inst_count = info->instance_count; unsigned vert_count = info->count; unsigned prim; nvc0_push_context_init(nvc0, &ctx); nvc0_vertex_configure_translate(nvc0, info->index_bias); if (unlikely(ctx.edgeflag.enabled)) nvc0_push_map_edgeflag(&ctx, nvc0, info->index_bias); ctx.prim_restart = info->primitive_restart; ctx.restart_index = info->restart_index; if (info->indexed) { nvc0_push_map_idxbuf(&ctx, nvc0); index_size = nvc0->idxbuf.index_size; if (info->primitive_restart) { BEGIN_NVC0(ctx.push, NVC0_3D(PRIM_RESTART_ENABLE), 2); PUSH_DATA (ctx.push, 1); PUSH_DATA (ctx.push, info->restart_index); } else if (nvc0->state.prim_restart) { IMMED_NVC0(ctx.push, NVC0_3D(PRIM_RESTART_ENABLE), 0); } nvc0->state.prim_restart = info->primitive_restart; } else { if (unlikely(info->count_from_stream_output)) { struct pipe_context *pipe = &nvc0->base.pipe; struct nvc0_so_target *targ; targ = nvc0_so_target(info->count_from_stream_output); pipe->get_query_result(pipe, targ->pq, TRUE, (void *)&vert_count); vert_count /= targ->stride; } ctx.idxbuf = NULL; /* shut up warnings */ index_size = 0; } ctx.instance_id = info->start_instance; prim = nvc0_prim_gl(info->mode); do { PUSH_SPACE(ctx.push, 9); ctx.dest = nvc0_push_setup_vertex_array(nvc0, vert_count); if (unlikely(!ctx.dest)) break; if (unlikely(ctx.need_vertex_id)) nvc0_push_upload_vertex_ids(&ctx, nvc0, info); IMMED_NVC0(ctx.push, NVC0_3D(VERTEX_ARRAY_FLUSH), 0); BEGIN_NVC0(ctx.push, NVC0_3D(VERTEX_BEGIN_GL), 1); PUSH_DATA (ctx.push, prim); switch (index_size) { case 1: disp_vertices_i08(&ctx, info->start, vert_count); break; case 2: disp_vertices_i16(&ctx, info->start, vert_count); break; case 4: disp_vertices_i32(&ctx, info->start, vert_count); break; default: assert(index_size == 0); disp_vertices_seq(&ctx, info->start, vert_count); break; } PUSH_SPACE(ctx.push, 1); IMMED_NVC0(ctx.push, NVC0_3D(VERTEX_END_GL), 0); if (--inst_count) { prim |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT; ++ctx.instance_id; } nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_VTX_TMP); nouveau_scratch_done(&nvc0->base); } while (inst_count); /* reset state and unmap buffers (no-op) */ if (unlikely(!ctx.edgeflag.value)) { PUSH_SPACE(ctx.push, 1); IMMED_NVC0(ctx.push, NVC0_3D(EDGEFLAG), 1); } if (unlikely(ctx.need_vertex_id)) { PUSH_SPACE(ctx.push, 4); IMMED_NVC0(ctx.push, NVC0_3D(VERTEX_ID_REPLACE), 0); BEGIN_NVC0(ctx.push, NVC0_3D(VERTEX_ATTRIB_FORMAT(1)), 1); PUSH_DATA (ctx.push, NVC0_3D_VERTEX_ATTRIB_FORMAT_CONST | NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_FLOAT | NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32); IMMED_NVC0(ctx.push, NVC0_3D(VERTEX_ARRAY_FETCH(1)), 0); } if (info->indexed) nouveau_resource_unmap(nv04_resource(nvc0->idxbuf.buffer)); for (i = 0; i < nvc0->num_vtxbufs; ++i) nouveau_resource_unmap(nv04_resource(nvc0->vtxbuf[i].buffer)); NOUVEAU_DRV_STAT(&nvc0->screen->base, draw_calls_fallback_count, 1); }
static void nve4_mp_pm_query_end(struct nvc0_context *nvc0, struct nvc0_query *q) { struct nvc0_screen *screen = nvc0->screen; struct pipe_context *pipe = &nvc0->base.pipe; struct nouveau_pushbuf *push = nvc0->base.pushbuf; uint32_t mask; uint32_t input[3]; const uint block[3] = { 32, 4, 1 }; const uint grid[3] = { screen->mp_count, 1, 1 }; unsigned c; const struct nve4_mp_pm_query_cfg *cfg; cfg = &nve4_mp_pm_queries[q->type - PIPE_QUERY_DRIVER_SPECIFIC]; if (unlikely(!screen->pm.prog)) { struct nvc0_program *prog = CALLOC_STRUCT(nvc0_program); prog->type = PIPE_SHADER_COMPUTE; prog->translated = TRUE; prog->num_gprs = 14; prog->code = (uint32_t *)nve4_read_mp_pm_counters_code; prog->code_size = sizeof(nve4_read_mp_pm_counters_code); prog->parm_size = 12; screen->pm.prog = prog; } /* disable all counting */ PUSH_SPACE(push, 8); for (c = 0; c < 8; ++c) if (screen->pm.mp_counter[c]) IMMED_NVC0(push, NVE4_COMPUTE(MP_PM_FUNC(c)), 0); /* release counters for this query */ for (c = 0; c < 8; ++c) { if (nvc0_query(screen->pm.mp_counter[c]) == q) { screen->pm.num_mp_pm_active[c / 4]--; screen->pm.mp_counter[c] = NULL; } } BCTX_REFN_bo(nvc0->bufctx_cp, CP_QUERY, NOUVEAU_BO_GART | NOUVEAU_BO_WR, q->bo); PUSH_SPACE(push, 1); IMMED_NVC0(push, SUBC_COMPUTE(NV50_GRAPH_SERIALIZE), 0); pipe->bind_compute_state(pipe, screen->pm.prog); input[0] = (q->bo->offset + q->base); input[1] = (q->bo->offset + q->base) >> 32; input[2] = q->sequence; pipe->launch_grid(pipe, block, grid, 0, input); nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_QUERY); /* re-activate other counters */ PUSH_SPACE(push, 16); mask = 0; for (c = 0; c < 8; ++c) { unsigned i; q = nvc0_query(screen->pm.mp_counter[c]); if (!q) continue; cfg = &nve4_mp_pm_queries[q->type - PIPE_QUERY_DRIVER_SPECIFIC]; for (i = 0; i < cfg->num_counters; ++i) { if (mask & (1 << q->ctr[i])) break; mask |= 1 << q->ctr[i]; BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_FUNC(q->ctr[i])), 1); PUSH_DATA (push, (cfg->ctr[i].func << 4) | cfg->ctr[i].mode); } } }
static void nvc0_query_end(struct pipe_context *pipe, struct pipe_query *pq) { struct nvc0_context *nvc0 = nvc0_context(pipe); struct nouveau_pushbuf *push = nvc0->base.pushbuf; struct nvc0_query *q = nvc0_query(pq); if (q->state != NVC0_QUERY_STATE_ACTIVE) { /* some queries don't require 'begin' to be called (e.g. GPU_FINISHED) */ if (q->rotate) nvc0_query_rotate(nvc0, q); q->sequence++; } q->state = NVC0_QUERY_STATE_ENDED; switch (q->type) { case PIPE_QUERY_OCCLUSION_COUNTER: case PIPE_QUERY_OCCLUSION_PREDICATE: nvc0_query_get(push, q, 0, 0x0100f002); if (--nvc0->screen->num_occlusion_queries_active == 0) { PUSH_SPACE(push, 1); IMMED_NVC0(push, NVC0_3D(SAMPLECNT_ENABLE), 0); } break; case PIPE_QUERY_PRIMITIVES_GENERATED: nvc0_query_get(push, q, 0, 0x09005002 | (q->index << 5)); break; case PIPE_QUERY_PRIMITIVES_EMITTED: nvc0_query_get(push, q, 0, 0x05805002 | (q->index << 5)); break; case PIPE_QUERY_SO_STATISTICS: nvc0_query_get(push, q, 0x00, 0x05805002 | (q->index << 5)); nvc0_query_get(push, q, 0x10, 0x06805002 | (q->index << 5)); break; case PIPE_QUERY_SO_OVERFLOW_PREDICATE: /* TODO: How do we sum over all streams for render condition ? */ /* PRIMS_DROPPED doesn't write sequence, use a ZERO query to sync on */ nvc0_query_get(push, q, 0x00, 0x03005002 | (q->index << 5)); nvc0_query_get(push, q, 0x20, 0x00005002); break; case PIPE_QUERY_TIMESTAMP: case PIPE_QUERY_TIME_ELAPSED: nvc0_query_get(push, q, 0, 0x00005002); break; case PIPE_QUERY_GPU_FINISHED: nvc0_query_get(push, q, 0, 0x1000f010); break; case PIPE_QUERY_PIPELINE_STATISTICS: nvc0_query_get(push, q, 0x00, 0x00801002); /* VFETCH, VERTICES */ nvc0_query_get(push, q, 0x10, 0x01801002); /* VFETCH, PRIMS */ nvc0_query_get(push, q, 0x20, 0x02802002); /* VP, LAUNCHES */ nvc0_query_get(push, q, 0x30, 0x03806002); /* GP, LAUNCHES */ nvc0_query_get(push, q, 0x40, 0x04806002); /* GP, PRIMS_OUT */ nvc0_query_get(push, q, 0x50, 0x07804002); /* RAST, PRIMS_IN */ nvc0_query_get(push, q, 0x60, 0x08804002); /* RAST, PRIMS_OUT */ nvc0_query_get(push, q, 0x70, 0x0980a002); /* ROP, PIXELS */ nvc0_query_get(push, q, 0x80, 0x0d808002); /* TCP, LAUNCHES */ nvc0_query_get(push, q, 0x90, 0x0e809002); /* TEP, LAUNCHES */ break; case NVC0_QUERY_TFB_BUFFER_OFFSET: /* indexed by TFB buffer instead of by vertex stream */ nvc0_query_get(push, q, 0x00, 0x0d005002 | (q->index << 5)); break; default: #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS if (q->type >= NVC0_QUERY_DRV_STAT(0) && q->type <= NVC0_QUERY_DRV_STAT_LAST) { q->u.value = nvc0->screen->base.stats.v[q->index] - q->u.value; return; } else #endif if (q->type >= NVE4_PM_QUERY(0) && q->type <= NVE4_PM_QUERY_LAST) nve4_mp_pm_query_end(nvc0, q); break; } if (q->is64bit) nouveau_fence_ref(nvc0->screen->base.fence.current, &q->fence); }
static void nvc0_query_begin(struct pipe_context *pipe, struct pipe_query *pq) { struct nvc0_context *nvc0 = nvc0_context(pipe); struct nouveau_pushbuf *push = nvc0->base.pushbuf; struct nvc0_query *q = nvc0_query(pq); /* For occlusion queries we have to change the storage, because a previous * query might set the initial render conition to FALSE even *after* we re- * initialized it to TRUE. */ if (q->rotate) { nvc0_query_rotate(nvc0, q); /* XXX: can we do this with the GPU, and sync with respect to a previous * query ? */ q->data[0] = q->sequence; /* initialize sequence */ q->data[1] = 1; /* initial render condition = TRUE */ q->data[4] = q->sequence + 1; /* for comparison COND_MODE */ q->data[5] = 0; } q->sequence++; switch (q->type) { case PIPE_QUERY_OCCLUSION_COUNTER: case PIPE_QUERY_OCCLUSION_PREDICATE: q->nesting = nvc0->screen->num_occlusion_queries_active++; if (q->nesting) { nvc0_query_get(push, q, 0x10, 0x0100f002); } else { PUSH_SPACE(push, 3); BEGIN_NVC0(push, NVC0_3D(COUNTER_RESET), 1); PUSH_DATA (push, NVC0_3D_COUNTER_RESET_SAMPLECNT); IMMED_NVC0(push, NVC0_3D(SAMPLECNT_ENABLE), 1); } break; case PIPE_QUERY_PRIMITIVES_GENERATED: nvc0_query_get(push, q, 0x10, 0x09005002 | (q->index << 5)); break; case PIPE_QUERY_PRIMITIVES_EMITTED: nvc0_query_get(push, q, 0x10, 0x05805002 | (q->index << 5)); break; case PIPE_QUERY_SO_STATISTICS: nvc0_query_get(push, q, 0x20, 0x05805002 | (q->index << 5)); nvc0_query_get(push, q, 0x30, 0x06805002 | (q->index << 5)); break; case PIPE_QUERY_SO_OVERFLOW_PREDICATE: nvc0_query_get(push, q, 0x10, 0x03005002 | (q->index << 5)); break; case PIPE_QUERY_TIME_ELAPSED: nvc0_query_get(push, q, 0x10, 0x00005002); break; case PIPE_QUERY_PIPELINE_STATISTICS: nvc0_query_get(push, q, 0xc0 + 0x00, 0x00801002); /* VFETCH, VERTICES */ nvc0_query_get(push, q, 0xc0 + 0x10, 0x01801002); /* VFETCH, PRIMS */ nvc0_query_get(push, q, 0xc0 + 0x20, 0x02802002); /* VP, LAUNCHES */ nvc0_query_get(push, q, 0xc0 + 0x30, 0x03806002); /* GP, LAUNCHES */ nvc0_query_get(push, q, 0xc0 + 0x40, 0x04806002); /* GP, PRIMS_OUT */ nvc0_query_get(push, q, 0xc0 + 0x50, 0x07804002); /* RAST, PRIMS_IN */ nvc0_query_get(push, q, 0xc0 + 0x60, 0x08804002); /* RAST, PRIMS_OUT */ nvc0_query_get(push, q, 0xc0 + 0x70, 0x0980a002); /* ROP, PIXELS */ nvc0_query_get(push, q, 0xc0 + 0x80, 0x0d808002); /* TCP, LAUNCHES */ nvc0_query_get(push, q, 0xc0 + 0x90, 0x0e809002); /* TEP, LAUNCHES */ break; default: #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS if (q->type >= NVC0_QUERY_DRV_STAT(0) && q->type <= NVC0_QUERY_DRV_STAT_LAST) { if (q->index >= 5) q->u.value = nvc0->screen->base.stats.v[q->index]; else q->u.value = 0; } else #endif if (q->type >= NVE4_PM_QUERY(0) && q->type <= NVE4_PM_QUERY_LAST) { nve4_mp_pm_query_begin(nvc0, q); } break; } q->state = NVC0_QUERY_STATE_ACTIVE; }
void nvc0_fragprog_validate(struct nvc0_context *nvc0) { struct nouveau_pushbuf *push = nvc0->base.pushbuf; struct nvc0_program *fp = nvc0->fragprog; struct pipe_rasterizer_state *rast = &nvc0->rast->pipe; if (fp->fp.force_persample_interp != rast->force_persample_interp) { /* Force the program to be reuploaded, which will trigger interp fixups * to get applied */ if (fp->mem) nouveau_heap_free(&fp->mem); fp->fp.force_persample_interp = rast->force_persample_interp; } /* Shade model works well enough when both colors follow it. However if one * (or both) is explicitly set, then we have to go the patching route. */ bool has_explicit_color = fp->fp.colors && (((fp->fp.colors & 1) && !fp->fp.color_interp[0]) || ((fp->fp.colors & 2) && !fp->fp.color_interp[1])); bool hwflatshade = false; if (has_explicit_color && fp->fp.flatshade != rast->flatshade) { /* Force re-upload */ if (fp->mem) nouveau_heap_free(&fp->mem); fp->fp.flatshade = rast->flatshade; /* Always smooth-shade in this mode, the shader will decide on its own * when to flat-shade. */ } else if (!has_explicit_color) { hwflatshade = rast->flatshade; /* No need to binary-patch the shader each time, make sure that it's set * up for the default behaviour. */ fp->fp.flatshade = 0; } if (hwflatshade != nvc0->state.flatshade) { nvc0->state.flatshade = hwflatshade; BEGIN_NVC0(push, NVC0_3D(SHADE_MODEL), 1); PUSH_DATA (push, hwflatshade ? NVC0_3D_SHADE_MODEL_FLAT : NVC0_3D_SHADE_MODEL_SMOOTH); } if (fp->mem && !(nvc0->dirty_3d & NVC0_NEW_3D_FRAGPROG)) { return; } if (!nvc0_program_validate(nvc0, fp)) return; nvc0_program_update_context_state(nvc0, fp, 4); if (fp->fp.early_z != nvc0->state.early_z_forced) { nvc0->state.early_z_forced = fp->fp.early_z; IMMED_NVC0(push, NVC0_3D(FORCE_EARLY_FRAGMENT_TESTS), fp->fp.early_z); } BEGIN_NVC0(push, NVC0_3D(SP_SELECT(5)), 2); PUSH_DATA (push, 0x51); PUSH_DATA (push, fp->code_base); BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(5)), 1); PUSH_DATA (push, fp->num_gprs); BEGIN_NVC0(push, SUBC_3D(0x0360), 2); PUSH_DATA (push, 0x20164010); PUSH_DATA (push, 0x20); BEGIN_NVC0(push, NVC0_3D(ZCULL_TEST_MASK), 1); PUSH_DATA (push, fp->flags[0]); }
void nvc0_tfb_validate(struct nvc0_context *nvc0) { struct nouveau_pushbuf *push = nvc0->base.pushbuf; struct nvc0_transform_feedback_state *tfb; unsigned b; if (nvc0->gmtyprog) tfb = nvc0->gmtyprog->tfb; else if (nvc0->tevlprog) tfb = nvc0->tevlprog->tfb; else tfb = nvc0->vertprog->tfb; IMMED_NVC0(push, NVC0_3D(TFB_ENABLE), (tfb && nvc0->num_tfbbufs) ? 1 : 0); if (tfb && tfb != nvc0->state.tfb) { for (b = 0; b < 4; ++b) { if (tfb->varying_count[b]) { unsigned n = (tfb->varying_count[b] + 3) / 4; BEGIN_NVC0(push, NVC0_3D(TFB_STREAM(b)), 3); PUSH_DATA (push, 0); PUSH_DATA (push, tfb->varying_count[b]); PUSH_DATA (push, tfb->stride[b]); BEGIN_NVC0(push, NVC0_3D(TFB_VARYING_LOCS(b, 0)), n); PUSH_DATAp(push, tfb->varying_index[b], n); if (nvc0->tfbbuf[b]) nvc0_so_target(nvc0->tfbbuf[b])->stride = tfb->stride[b]; } else { IMMED_NVC0(push, NVC0_3D(TFB_VARYING_COUNT(b)), 0); } } } nvc0->state.tfb = tfb; if (!(nvc0->dirty & NVC0_NEW_TFB_TARGETS)) return; nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TFB); for (b = 0; b < nvc0->num_tfbbufs; ++b) { struct nvc0_so_target *targ = nvc0_so_target(nvc0->tfbbuf[b]); struct nv04_resource *buf = nv04_resource(targ->pipe.buffer); if (tfb) targ->stride = tfb->stride[b]; if (!(nvc0->tfbbuf_dirty & (1 << b))) continue; if (!targ->clean) nvc0_query_fifo_wait(push, targ->pq); BEGIN_NVC0(push, NVC0_3D(TFB_BUFFER_ENABLE(b)), 5); PUSH_DATA (push, 1); PUSH_DATAh(push, buf->address + targ->pipe.buffer_offset); PUSH_DATA (push, buf->address + targ->pipe.buffer_offset); PUSH_DATA (push, targ->pipe.buffer_size); if (!targ->clean) { nvc0_query_pushbuf_submit(push, targ->pq, 0x4); } else { PUSH_DATA(push, 0); /* TFB_BUFFER_OFFSET */ targ->clean = FALSE; } BCTX_REFN(nvc0->bufctx_3d, TFB, buf, WR); } for (; b < 4; ++b) IMMED_NVC0(push, NVC0_3D(TFB_BUFFER_ENABLE(b)), 0); }
static void nvc0_render_condition(struct pipe_context *pipe, struct pipe_query *pq, boolean condition, enum pipe_render_cond_flag mode) { struct nvc0_context *nvc0 = nvc0_context(pipe); struct nouveau_pushbuf *push = nvc0->base.pushbuf; struct nvc0_query *q = nvc0_query(pq); struct nvc0_hw_query *hq = nvc0_hw_query(q); uint32_t cond; bool wait = mode != PIPE_RENDER_COND_NO_WAIT && mode != PIPE_RENDER_COND_BY_REGION_NO_WAIT; if (!pq) { cond = NVC0_3D_COND_MODE_ALWAYS; } else { /* NOTE: comparison of 2 queries only works if both have completed */ switch (q->type) { case PIPE_QUERY_SO_OVERFLOW_PREDICATE: case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE: cond = condition ? NVC0_3D_COND_MODE_EQUAL : NVC0_3D_COND_MODE_NOT_EQUAL; wait = true; break; case PIPE_QUERY_OCCLUSION_COUNTER: case PIPE_QUERY_OCCLUSION_PREDICATE: case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: if (hq->state == NVC0_HW_QUERY_STATE_READY) wait = true; if (likely(!condition)) { cond = wait ? NVC0_3D_COND_MODE_NOT_EQUAL : NVC0_3D_COND_MODE_ALWAYS; } else { cond = wait ? NVC0_3D_COND_MODE_EQUAL : NVC0_3D_COND_MODE_ALWAYS; } break; default: assert(!"render condition query not a predicate"); cond = NVC0_3D_COND_MODE_ALWAYS; break; } } nvc0->cond_query = pq; nvc0->cond_cond = condition; nvc0->cond_condmode = cond; nvc0->cond_mode = mode; if (!pq) { PUSH_SPACE(push, 2); IMMED_NVC0(push, NVC0_3D(COND_MODE), cond); if (nvc0->screen->compute) IMMED_NVC0(push, NVC0_CP(COND_MODE), cond); return; } if (wait && hq->state != NVC0_HW_QUERY_STATE_READY) nvc0_hw_query_fifo_wait(nvc0, q); PUSH_SPACE(push, 10); PUSH_REFN (push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD); BEGIN_NVC0(push, NVC0_3D(COND_ADDRESS_HIGH), 3); PUSH_DATAh(push, hq->bo->offset + hq->offset); PUSH_DATA (push, hq->bo->offset + hq->offset); PUSH_DATA (push, cond); BEGIN_NVC0(push, NVC0_2D(COND_ADDRESS_HIGH), 2); PUSH_DATAh(push, hq->bo->offset + hq->offset); PUSH_DATA (push, hq->bo->offset + hq->offset); if (nvc0->screen->compute) { BEGIN_NVC0(push, NVC0_CP(COND_ADDRESS_HIGH), 3); PUSH_DATAh(push, hq->bo->offset + hq->offset); PUSH_DATA (push, hq->bo->offset + hq->offset); PUSH_DATA (push, cond); } }