/* TCL render. */ static GLboolean r200_run_tcl_render( GLcontext *ctx, struct tnl_pipeline_stage *stage ) { r200ContextPtr rmesa = R200_CONTEXT(ctx); TNLcontext *tnl = TNL_CONTEXT(ctx); struct vertex_buffer *VB = &tnl->vb; GLuint i; GLubyte *vimap_rev; /* use hw fixed order for simplicity, pos 0, weight 1, normal 2, fog 3, color0 - color3 4-7, texcoord0 - texcoord5 8-13, pos 1 14. Must not use more than 12 of those at the same time. */ GLubyte map_rev_fixed[15] = {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}; /* TODO: separate this from the swtnl pipeline */ if (rmesa->TclFallback) return GL_TRUE; /* fallback to software t&l */ if (R200_DEBUG & DEBUG_PRIMS) fprintf(stderr, "%s\n", __FUNCTION__); if (VB->Count == 0) return GL_FALSE; /* Validate state: */ if (rmesa->NewGLState) r200ValidateState( ctx ); if (!ctx->VertexProgram._Enabled) { /* NOTE: inputs != tnl->render_inputs - these are the untransformed * inputs. */ map_rev_fixed[0] = VERT_ATTRIB_POS; /* technically there is no reason we always need VA_COLOR0. In theory could disable it depending on lighting, color materials, texturing... */ map_rev_fixed[4] = VERT_ATTRIB_COLOR0; if (ctx->Light.Enabled) { map_rev_fixed[2] = VERT_ATTRIB_NORMAL; } /* this also enables VA_COLOR1 when using separate specular lighting model, which is unnecessary. FIXME: OTOH, we're missing the case where a ATI_fragment_shader accesses the secondary color (if lighting is disabled). The chip seems misconfigured for that though elsewhere (tcl output, might lock up) */ if (ctx->_TriangleCaps & DD_SEPARATE_SPECULAR) { map_rev_fixed[5] = VERT_ATTRIB_COLOR1; } if ( (ctx->Fog.FogCoordinateSource == GL_FOG_COORD) && ctx->Fog.Enabled ) { map_rev_fixed[3] = VERT_ATTRIB_FOG; } for (i = 0 ; i < ctx->Const.MaxTextureUnits; i++) { if (ctx->Texture.Unit[i]._ReallyEnabled) { if (rmesa->TexGenNeedNormals[i]) { map_rev_fixed[2] = VERT_ATTRIB_NORMAL; } map_rev_fixed[8 + i] = VERT_ATTRIB_TEX0 + i; } } vimap_rev = &map_rev_fixed[0]; } else { /* vtx_tcl_output_vtxfmt_0/1 need to match configuration of "fragment part", since using some vertex interpolator later which is not in out_vtxfmt0/1 will lock up. It seems to be ok to write in vertex prog to a not enabled output however, so just don't mess with it. We only need to change compsel. */ GLuint out_compsel = 0; GLuint vp_out = rmesa->curr_vp_hw->mesa_program.Base.OutputsWritten; vimap_rev = &rmesa->curr_vp_hw->inputmap_rev[0]; assert(vp_out & (1 << VERT_RESULT_HPOS)); out_compsel = R200_OUTPUT_XYZW; if (vp_out & (1 << VERT_RESULT_COL0)) { out_compsel |= R200_OUTPUT_COLOR_0; } if (vp_out & (1 << VERT_RESULT_COL1)) { out_compsel |= R200_OUTPUT_COLOR_1; } if (vp_out & (1 << VERT_RESULT_FOGC)) { out_compsel |= R200_OUTPUT_DISCRETE_FOG; } if (vp_out & (1 << VERT_RESULT_PSIZ)) { out_compsel |= R200_OUTPUT_PT_SIZE; } for (i = VERT_RESULT_TEX0; i < VERT_RESULT_TEX6; i++) { if (vp_out & (1 << i)) { out_compsel |= R200_OUTPUT_TEX_0 << (i - VERT_RESULT_TEX0); } } if (rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL] != out_compsel) { R200_STATECHANGE( rmesa, vtx ); rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL] = out_compsel; } } /* Do the actual work: */ r200ReleaseArrays( ctx, ~0 /* stage->changed_inputs */ ); r200EmitArrays( ctx, vimap_rev ); rmesa->tcl.Elts = VB->Elts; for (i = 0 ; i < VB->PrimitiveCount ; i++) { GLuint prim = _tnl_translate_prim(&VB->Primitive[i]); GLuint start = VB->Primitive[i].start; GLuint length = VB->Primitive[i].count; if (!length) continue; if (rmesa->tcl.Elts) r200EmitEltPrimitive( ctx, start, start+length, prim ); else r200EmitPrimitive( ctx, start, start+length, prim ); } return GL_FALSE; /* finished the pipe */ }
/* TCL render. */ static GLboolean r200_run_tcl_render( struct gl_context *ctx, struct tnl_pipeline_stage *stage ) { r200ContextPtr rmesa = R200_CONTEXT(ctx); TNLcontext *tnl = TNL_CONTEXT(ctx); struct vertex_buffer *VB = &tnl->vb; GLuint i; GLubyte *vimap_rev; /* use hw fixed order for simplicity, pos 0, weight 1, normal 2, fog 3, color0 - color3 4-7, texcoord0 - texcoord5 8-13, pos 1 14. Must not use more than 12 of those at the same time. */ GLubyte map_rev_fixed[15] = {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}; /* TODO: separate this from the swtnl pipeline */ if (rmesa->radeon.TclFallback) return GL_TRUE; /* fallback to software t&l */ radeon_print(RADEON_RENDER, RADEON_NORMAL, "%s\n", __func__); if (VB->Count == 0) return GL_FALSE; /* Validate state: */ if (rmesa->radeon.NewGLState) if (!r200ValidateState( ctx )) return GL_TRUE; /* fallback to sw t&l */ if (!_mesa_arb_vertex_program_enabled(ctx)) { /* NOTE: inputs != tnl->render_inputs - these are the untransformed * inputs. */ map_rev_fixed[0] = VERT_ATTRIB_POS; /* technically there is no reason we always need VA_COLOR0. In theory could disable it depending on lighting, color materials, texturing... */ map_rev_fixed[4] = VERT_ATTRIB_COLOR0; if (ctx->Light.Enabled) { map_rev_fixed[2] = VERT_ATTRIB_NORMAL; } /* this also enables VA_COLOR1 when using separate specular lighting model, which is unnecessary. FIXME: OTOH, we're missing the case where a ATI_fragment_shader accesses the secondary color (if lighting is disabled). The chip seems misconfigured for that though elsewhere (tcl output, might lock up) */ if (_mesa_need_secondary_color(ctx)) { map_rev_fixed[5] = VERT_ATTRIB_COLOR1; } if ( (ctx->Fog.FogCoordinateSource == GL_FOG_COORD) && ctx->Fog.Enabled ) { map_rev_fixed[3] = VERT_ATTRIB_FOG; } for (i = 0 ; i < ctx->Const.MaxTextureUnits; i++) { if (ctx->Texture.Unit[i]._Current) { if (rmesa->TexGenNeedNormals[i]) { map_rev_fixed[2] = VERT_ATTRIB_NORMAL; } map_rev_fixed[8 + i] = VERT_ATTRIB_TEX0 + i; } } vimap_rev = &map_rev_fixed[0]; } else { /* vtx_tcl_output_vtxfmt_0/1 need to match configuration of "fragment part", since using some vertex interpolator later which is not in out_vtxfmt0/1 will lock up. It seems to be ok to write in vertex prog to a not enabled output however, so just don't mess with it. We only need to change compsel. */ GLuint out_compsel = 0; const GLbitfield64 vp_out = rmesa->curr_vp_hw->mesa_program.info.outputs_written; vimap_rev = &rmesa->curr_vp_hw->inputmap_rev[0]; assert(vp_out & BITFIELD64_BIT(VARYING_SLOT_POS)); out_compsel = R200_OUTPUT_XYZW; if (vp_out & BITFIELD64_BIT(VARYING_SLOT_COL0)) { out_compsel |= R200_OUTPUT_COLOR_0; } if (vp_out & BITFIELD64_BIT(VARYING_SLOT_COL1)) { out_compsel |= R200_OUTPUT_COLOR_1; } if (vp_out & BITFIELD64_BIT(VARYING_SLOT_FOGC)) { out_compsel |= R200_OUTPUT_DISCRETE_FOG; } if (vp_out & BITFIELD64_BIT(VARYING_SLOT_PSIZ)) { out_compsel |= R200_OUTPUT_PT_SIZE; } for (i = VARYING_SLOT_TEX0; i < VARYING_SLOT_TEX6; i++) { if (vp_out & BITFIELD64_BIT(i)) { out_compsel |= R200_OUTPUT_TEX_0 << (i - VARYING_SLOT_TEX0); } } if (rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL] != out_compsel) { R200_STATECHANGE( rmesa, vtx ); rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL] = out_compsel; } } /* Do the actual work: */ radeonReleaseArrays( ctx, ~0 /* stage->changed_inputs */ ); GLuint emit_end = r200EnsureEmitSize( ctx, vimap_rev ) + rmesa->radeon.cmdbuf.cs->cdw; r200EmitArrays( ctx, vimap_rev ); for (i = 0 ; i < VB->PrimitiveCount ; i++) { GLuint prim = _tnl_translate_prim(&VB->Primitive[i]); GLuint start = VB->Primitive[i].start; GLuint length = VB->Primitive[i].count; if (!length) continue; if (VB->Elts) r200EmitEltPrimitive( ctx, start, start+length, prim ); else r200EmitPrimitive( ctx, start, start+length, prim ); } if ( emit_end < rmesa->radeon.cmdbuf.cs->cdw ) WARN_ONCE("Rendering was %d commands larger than predicted size." " We might overflow command buffer.\n", rmesa->radeon.cmdbuf.cs->cdw - emit_end); return GL_FALSE; /* finished the pipe */ }
static void flush_prims( r200ContextPtr rmesa ) { int i,j; struct r200_dma_region tmp = rmesa->dma.current; tmp.buf->refcount++; tmp.aos_size = rmesa->vb.vertex_size; tmp.aos_stride = rmesa->vb.vertex_size; tmp.aos_start = GET_START(&tmp); rmesa->dma.current.ptr = rmesa->dma.current.start += (rmesa->vb.initial_counter - rmesa->vb.counter) * rmesa->vb.vertex_size * 4; rmesa->tcl.vertex_format = rmesa->vb.vtxfmt_0; rmesa->tcl.aos_components[0] = &tmp; rmesa->tcl.nr_aos_components = 1; rmesa->dma.flush = NULL; /* Optimize the primitive list: */ if (rmesa->vb.nrprims > 1) { for (j = 0, i = 1 ; i < rmesa->vb.nrprims; i++) { int pj = rmesa->vb.primlist[j].prim & 0xf; int pi = rmesa->vb.primlist[i].prim & 0xf; if (pj == pi && discreet_gl_prim[pj] && rmesa->vb.primlist[i].start == rmesa->vb.primlist[j].end) { rmesa->vb.primlist[j].end = rmesa->vb.primlist[i].end; } else { j++; if (j != i) rmesa->vb.primlist[j] = rmesa->vb.primlist[i]; } } rmesa->vb.nrprims = j+1; } if (rmesa->vb.vtxfmt_0 != rmesa->hw.vtx.cmd[VTX_VTXFMT_0] || rmesa->vb.vtxfmt_1 != rmesa->hw.vtx.cmd[VTX_VTXFMT_1]) { R200_STATECHANGE( rmesa, vtx ); rmesa->hw.vtx.cmd[VTX_VTXFMT_0] = rmesa->vb.vtxfmt_0; rmesa->hw.vtx.cmd[VTX_VTXFMT_1] = rmesa->vb.vtxfmt_1; } for (i = 0 ; i < rmesa->vb.nrprims; i++) { if (R200_DEBUG & DEBUG_PRIMS) fprintf(stderr, "vtxfmt prim %d: %s %d..%d\n", i, _mesa_lookup_enum_by_nr( rmesa->vb.primlist[i].prim & PRIM_MODE_MASK ), rmesa->vb.primlist[i].start, rmesa->vb.primlist[i].end); if (rmesa->vb.primlist[i].start < rmesa->vb.primlist[i].end) r200EmitPrimitive( rmesa->glCtx, rmesa->vb.primlist[i].start, rmesa->vb.primlist[i].end, rmesa->vb.primlist[i].prim ); } rmesa->vb.nrprims = 0; r200ReleaseDmaRegion( rmesa, &tmp, __FUNCTION__ ); }