void _tnl_run_pipeline( struct gl_context *ctx ) { TNLcontext *tnl = TNL_CONTEXT(ctx); unsigned short __tmp; GLuint i; if (!tnl->vb.Count) return; /* Check for changed input sizes or change in stride to/from zero * (ie const or non-const). */ if (check_input_changes( ctx ) || tnl->pipeline.new_state) { if (ctx->VertexProgram._MaintainTnlProgram) _tnl_UpdateFixedFunctionProgram( ctx ); for (i = 0; i < tnl->pipeline.nr_stages ; i++) { struct tnl_pipeline_stage *s = &tnl->pipeline.stages[i]; if (s->validate) s->validate( ctx, s ); } tnl->pipeline.new_state = 0; tnl->pipeline.input_changes = 0; /* Pipeline can only change its output in response to either a * statechange or an input size/stride change. No other changes * are allowed. */ if (check_output_changes( ctx )) _tnl_notify_pipeline_output_change( ctx ); } #ifndef _OPENMP /* Don't adjust FPU precision mode in case multiple threads are to be used. * This would require that the additional threads also changed the FPU mode * which is quite a mess as this had to be done in all parallelized sections; * otherwise the master thread and all other threads are running in different * modes, producing inconsistent results. * Note that all x64 implementations don't define/use START_FAST_MATH, so * this is "hack" is only used in i386 mode */ START_FAST_MATH(__tmp); #endif for (i = 0; i < tnl->pipeline.nr_stages ; i++) { struct tnl_pipeline_stage *s = &tnl->pipeline.stages[i]; if (!s->run( ctx, s )) break; } #ifndef _OPENMP END_FAST_MATH(__tmp); #endif }
void _tnl_run_pipeline( GLcontext *ctx ) { TNLcontext *tnl = TNL_CONTEXT(ctx); unsigned short __tmp; GLuint i; if (!tnl->vb.Count) return; /* Check for changed input sizes or change in stride to/from zero * (ie const or non-const). */ if (check_input_changes( ctx ) || tnl->pipeline.new_state) { if (ctx->_MaintainTnlProgram) _tnl_UpdateFixedFunctionProgram( ctx ); for (i = 0; i < tnl->pipeline.nr_stages ; i++) { struct tnl_pipeline_stage *s = &tnl->pipeline.stages[i]; if (s->validate) s->validate( ctx, s ); } tnl->pipeline.new_state = 0; tnl->pipeline.input_changes = 0; /* Pipeline can only change its output in response to either a * statechange or an input size/stride change. No other changes * are allowed. */ if (check_output_changes( ctx )) _tnl_notify_pipeline_output_change( ctx ); } START_FAST_MATH(__tmp); for (i = 0; i < tnl->pipeline.nr_stages ; i++) { struct tnl_pipeline_stage *s = &tnl->pipeline.stages[i]; if (!s->run( ctx, s )) break; } END_FAST_MATH(__tmp); }
static GLboolean evergreenTryDrawPrims(GLcontext *ctx, const struct gl_client_array *arrays[], const struct _mesa_prim *prim, GLuint nr_prims, const struct _mesa_index_buffer *ib, GLuint min_index, GLuint max_index ) { context_t *context = EVERGREEN_CONTEXT(ctx); radeonContextPtr radeon = &context->radeon; GLuint i, id = 0; struct radeon_renderbuffer *rrb; if (ctx->NewState) _mesa_update_state( ctx ); if (evergreen_check_fallbacks(ctx)) return GL_FALSE; _tnl_UpdateFixedFunctionProgram(ctx); evergreenSetVertexFormat(ctx, arrays, max_index + 1); /* shaders need to be updated before buffers are validated */ evergreenUpdateShaders(ctx); if (!evergreenValidateBuffers(ctx)) return GL_FALSE; /* always emit CB base to prevent * lock ups on some chips. */ EVERGREEN_STATECHANGE(context, cb); /* mark vtx as dirty since it changes per-draw */ EVERGREEN_STATECHANGE(context, vtx); evergreenSetScissor(context); evergreenSetupVertexProgram(ctx); evergreenSetupFragmentProgram(ctx); evergreenUpdateShaderStates(ctx); GLuint emit_end = evergreenPredictRenderSize(ctx, prim, ib, nr_prims) + context->radeon.cmdbuf.cs->cdw; /* evergreenPredictRenderSize will call radeonReleaseDmaRegions, so update VP/FP const buf after it. */ evergreenSetupVPconstants(ctx); evergreenSetupFPconstants(ctx); evergreenSetupIndexBuffer(ctx, ib); evergreenSetupStreams(ctx, arrays, max_index + 1); radeonEmitState(radeon); radeon_debug_add_indent(); for (i = 0; i < nr_prims; ++i) { if (context->ind_buf.bo) evergreenRunRenderPrimitive(ctx, prim[i].start, prim[i].start + prim[i].count, prim[i].mode, prim[i].basevertex); else evergreenRunRenderPrimitiveImmediate(ctx, prim[i].start, prim[i].start + prim[i].count, prim[i].mode); } radeon_debug_remove_indent(); /* Flush render op cached for last several quads. */ /* XXX drm should handle this in fence submit */ //evergreeWaitForIdleClean(context); rrb = radeon_get_colorbuffer(&context->radeon); if (rrb && rrb->bo) r700SyncSurf(context, rrb->bo, 0, RADEON_GEM_DOMAIN_VRAM, CB_ACTION_ENA_bit | (1 << (id + 6))); rrb = radeon_get_depthbuffer(&context->radeon); if (rrb && rrb->bo) r700SyncSurf(context, rrb->bo, 0, RADEON_GEM_DOMAIN_VRAM, DB_ACTION_ENA_bit | DB_DEST_BASE_ENA_bit); evergreenFreeData(ctx); if (emit_end < context->radeon.cmdbuf.cs->cdw) { WARN_ONCE("Rendering was %d commands larger than predicted size." " We might overflow command buffer.\n", context->radeon.cmdbuf.cs->cdw - emit_end); } return GL_TRUE; }
static GLboolean r300TryDrawPrims(GLcontext *ctx, const struct gl_client_array *arrays[], const struct _mesa_prim *prim, GLuint nr_prims, const struct _mesa_index_buffer *ib, GLuint min_index, GLuint max_index ) { struct r300_context *r300 = R300_CONTEXT(ctx); GLuint i; radeon_print(RADEON_RENDER, RADEON_NORMAL, "%s: %u (%d-%d) cs begin at %d\n", __FUNCTION__, nr_prims, min_index, max_index, r300->radeon.cmdbuf.cs->cdw ); if (ctx->NewState) _mesa_update_state( ctx ); if (r300->options.hw_tcl_enabled) _tnl_UpdateFixedFunctionProgram(ctx); r300UpdateShaders(r300); r300SwitchFallback(ctx, R300_FALLBACK_INVALID_BUFFERS, !r300ValidateBuffers(ctx)); r300SetVertexFormat(ctx, arrays, max_index + 1); if (r300->fallback) return GL_FALSE; r300SetupVAP(ctx, r300->selected_vp->code.InputsRead, r300->selected_vp->code.OutputsWritten); r300UpdateShaderStates(r300); /* ensure we have the cmd buf space in advance to cover * the state + DMA AOS pointers */ GLuint emit_end = r300PredictTryDrawPrimsSize(ctx, nr_prims, prim) + r300->radeon.cmdbuf.cs->cdw; r300SetupIndexBuffer(ctx, ib); r300AllocDmaRegions(ctx, arrays, max_index + 1); if (r300->fallback) return GL_FALSE; r300EmitCacheFlush(r300); radeonEmitState(&r300->radeon); for (i = 0; i < nr_prims; ++i) { r300RunRenderPrimitive(ctx, prim[i].start, prim[i].start + prim[i].count, prim[i].mode); } r300EmitCacheFlush(r300); r300FreeData(ctx); radeon_print(RADEON_RENDER, RADEON_VERBOSE, "%s: %u (%d-%d) cs ending at %d\n", __FUNCTION__, nr_prims, min_index, max_index, r300->radeon.cmdbuf.cs->cdw ); if (emit_end < r300->radeon.cmdbuf.cs->cdw) WARN_ONCE("Rendering was %d commands larger than predicted size." " We might overflow command buffer.\n", r300->radeon.cmdbuf.cs->cdw - emit_end); return GL_TRUE; }