/** * Copy the last specified normal, color, texcoord, edge flag, etc * from the immediate struct into the ctx->Current attribute group. */ void _tnl_copy_to_current( GLcontext *ctx, struct immediate *IM, GLuint flag, GLuint count ) { if (MESA_VERBOSE&VERBOSE_IMMEDIATE) _tnl_print_vert_flags("copy to current", flag); /* XXX should be able to replace these conditions with a loop over * the 16 vertex attributes. */ if (flag & VERT_BIT_NORMAL) COPY_4FV( ctx->Current.Attrib[VERT_ATTRIB_NORMAL], IM->Attrib[VERT_ATTRIB_NORMAL][count]); if (flag & VERT_BIT_COLOR0) { COPY_4FV(ctx->Current.Attrib[VERT_ATTRIB_COLOR0], IM->Attrib[VERT_ATTRIB_COLOR0][count]); if (ctx->Light.ColorMaterialEnabled) { _mesa_update_color_material( ctx, ctx->Current.Attrib[VERT_ATTRIB_COLOR0] ); TNL_CONTEXT(ctx)->Driver.NotifyMaterialChange( ctx ); } } if (flag & VERT_BIT_COLOR1) COPY_4FV(ctx->Current.Attrib[VERT_ATTRIB_COLOR1], IM->Attrib[VERT_ATTRIB_COLOR1][count]); if (flag & VERT_BIT_FOG) ctx->Current.Attrib[VERT_ATTRIB_FOG][0] = IM->Attrib[VERT_ATTRIB_FOG][count][0]; if (flag & VERT_BIT_SIX) COPY_4FV(ctx->Current.Attrib[VERT_ATTRIB_SIX], IM->Attrib[VERT_ATTRIB_SIX][count]); if (flag & VERT_BIT_SEVEN) COPY_4FV(ctx->Current.Attrib[VERT_ATTRIB_SEVEN], IM->Attrib[VERT_ATTRIB_SEVEN][count]); if (flag & VERT_BITS_TEX_ANY) { GLuint i; for (i = 0 ; i < ctx->Const.MaxTextureUnits ; i++) { if (flag & VERT_BIT_TEX(i)) { COPY_4FV( ctx->Current.Attrib[VERT_ATTRIB_TEX0 + i], IM->Attrib[VERT_ATTRIB_TEX0 + i][count]); } } } if (flag & VERT_BIT_INDEX) ctx->Current.Index = IM->Index[count]; if (flag & VERT_BIT_EDGEFLAG) ctx->Current.EdgeFlag = IM->EdgeFlag[count]; if (flag & VERT_BIT_MATERIAL) { _mesa_update_material( ctx, IM->Material[IM->LastMaterial], IM->MaterialOrMask ); TNL_CONTEXT(ctx)->Driver.NotifyMaterialChange( ctx ); } }
void r200ReleaseArrays( GLcontext *ctx, GLuint newinputs ) { GLuint unit; r200ContextPtr rmesa = R200_CONTEXT( ctx ); /* if (R200_DEBUG & DEBUG_VERTS) */ /* _tnl_print_vert_flags( __FUNCTION__, newinputs ); */ if (newinputs & VERT_BIT_POS) r200ReleaseDmaRegion( rmesa, &rmesa->tcl.obj, __FUNCTION__ ); if (newinputs & VERT_BIT_NORMAL) r200ReleaseDmaRegion( rmesa, &rmesa->tcl.norm, __FUNCTION__ ); if (newinputs & VERT_BIT_FOG) r200ReleaseDmaRegion( rmesa, &rmesa->tcl.fog, __FUNCTION__ ); if (newinputs & VERT_BIT_COLOR0) r200ReleaseDmaRegion( rmesa, &rmesa->tcl.rgba, __FUNCTION__ ); if (newinputs & VERT_BIT_COLOR1) r200ReleaseDmaRegion( rmesa, &rmesa->tcl.spec, __FUNCTION__ ); for (unit = 0 ; unit < ctx->Const.MaxTextureUnits; unit++) { if (newinputs & VERT_BIT_TEX(unit)) r200ReleaseDmaRegion( rmesa, &rmesa->tcl.tex[unit], __FUNCTION__ ); } }
/* Quite a bit of work involved in finding out the inputs for the * render stage. */ static void _gld_mesa_render_stage_check( GLcontext *ctx, struct gl_pipeline_stage *stage) { GLuint inputs = VERT_BIT_CLIP; GLuint i; if (ctx->Visual.rgbMode) { inputs |= VERT_BIT_COLOR0; if (ctx->_TriangleCaps & DD_SEPARATE_SPECULAR) inputs |= VERT_BIT_COLOR1; //VERT_BIT_SPEC_RGB; //if (ctx->Texture._ReallyEnabled) { for (i=0; i<ctx->Const.MaxTextureUnits; i++) { if (ctx->Texture.Unit[i]._ReallyEnabled) inputs |= VERT_BIT_TEX(i); } //} } else { inputs |= VERT_BIT_INDEX; } if (ctx->Point._Attenuated) inputs |= VERT_BIT_POINT_SIZE; /* How do drivers turn this off? */ if (ctx->Fog.Enabled) inputs |= VERT_BIT_FOG; // VERT_FOG_COORD; if (ctx->_TriangleCaps & DD_TRI_UNFILLED) inputs |= VERT_BIT_EDGEFLAG; if (ctx->RenderMode==GL_FEEDBACK) inputs |= VERT_BITS_TEX_ANY; stage->inputs = inputs; }
void _tnl_vb_bind_arrays( GLcontext *ctx, GLint start, GLsizei count ) { TNLcontext *tnl = TNL_CONTEXT(ctx); struct vertex_buffer *VB = &tnl->vb; GLuint inputs = tnl->pipeline.inputs; struct vertex_arrays *tmp = &tnl->array_inputs; /* _mesa_debug(ctx, "%s %d..%d // %d..%d\n", __FUNCTION__, */ /* start, count, ctx->Array.LockFirst, ctx->Array.LockCount); */ /* _tnl_print_vert_flags(" inputs", inputs); */ /* _tnl_print_vert_flags(" _Enabled", ctx->Array._Enabled); */ /* _tnl_print_vert_flags(" importable", inputs & VERT_BITS_FIXUP); */ VB->Count = count - start; VB->FirstClipped = VB->Count; VB->Elts = NULL; VB->MaterialMask = NULL; VB->Material = NULL; VB->Flag = NULL; VB->Primitive = tnl->tmp_primitive; VB->PrimitiveLength = tnl->tmp_primitive_length; VB->import_data = _tnl_upgrade_client_data; VB->importable_data = inputs & VERT_BITS_FIXUP; if (ctx->Array.LockCount) { ASSERT(start == (GLint) ctx->Array.LockFirst); ASSERT(count == (GLint) ctx->Array.LockCount); } _ac_import_range( ctx, start, count ); if (inputs & VERT_BIT_POS) { _tnl_import_vertex( ctx, 0, 0 ); tmp->Obj.count = VB->Count; VB->ObjPtr = &tmp->Obj; } if (inputs & VERT_BIT_NORMAL) { _tnl_import_normal( ctx, 0, 0 ); tmp->Normal.count = VB->Count; VB->NormalPtr = &tmp->Normal; } if (inputs & VERT_BIT_COLOR0) { _tnl_import_color( ctx, 0, 0, 0 ); VB->ColorPtr[0] = &tmp->Color; VB->ColorPtr[1] = 0; } if (inputs & VERT_BITS_TEX_ANY) { GLuint unit; for (unit = 0; unit < ctx->Const.MaxTextureUnits; unit++) { if (inputs & VERT_BIT_TEX(unit)) { _tnl_import_texcoord( ctx, unit, GL_FALSE, GL_FALSE ); tmp->TexCoord[unit].count = VB->Count; VB->TexCoordPtr[unit] = &tmp->TexCoord[unit]; } } } if (inputs & (VERT_BIT_INDEX | VERT_BIT_FOG | VERT_BIT_EDGEFLAG | VERT_BIT_COLOR1)) { if (inputs & VERT_BIT_INDEX) { _tnl_import_index( ctx, 0, 0 ); tmp->Index.count = VB->Count; VB->IndexPtr[0] = &tmp->Index; VB->IndexPtr[1] = 0; } if (inputs & VERT_BIT_FOG) { _tnl_import_fogcoord( ctx, 0, 0 ); tmp->FogCoord.count = VB->Count; VB->FogCoordPtr = &tmp->FogCoord; } if (inputs & VERT_BIT_EDGEFLAG) { _tnl_import_edgeflag( ctx, GL_TRUE, sizeof(GLboolean) ); VB->EdgeFlag = (GLboolean *) tmp->EdgeFlag.data; } if (inputs & VERT_BIT_COLOR1) { _tnl_import_secondarycolor( ctx, 0, 0, 0 ); VB->SecondaryColorPtr[0] = &tmp->SecondaryColor; VB->SecondaryColorPtr[1] = 0; } } /* XXX not 100% sure this is finished. Keith should probably inspect. */ if (ctx->VertexProgram.Enabled) { GLuint index; for (index = 0; index < VERT_ATTRIB_MAX; index++) { /* XXX check program->InputsRead to reduce work here */ _tnl_import_attrib( ctx, index, GL_FALSE, GL_TRUE ); VB->AttribPtr[index] = &tmp->Attribs[index]; } } }
/** * Callback for VB stages that need to improve the quality of arrays * bound to the VB. This is only necessary for client arrays which * have not been transformed at any point in the pipeline. * \param required - bitmask of VERT_*_BIT flags * \param flags - bitmask of VEC_* flags (ex: VEC_NOT_WRITABLE) */ static void _tnl_upgrade_client_data( GLcontext *ctx, GLuint required, GLuint flags ) { GLuint i; struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb; GLboolean writeable = (flags & VEC_NOT_WRITEABLE) != 0; GLboolean stride = (flags & VEC_BAD_STRIDE) != 0; struct vertex_arrays *inputs = &TNL_CONTEXT(ctx)->array_inputs; GLuint ca_flags = 0; (void) inputs; if (writeable || stride) ca_flags |= CA_CLIENT_DATA; if ((required & VERT_BIT_CLIP) && VB->ClipPtr == VB->ObjPtr) required |= VERT_BIT_POS; /* _tnl_print_vert_flags("_tnl_upgrade_client_data", required); */ if ((required & VERT_BIT_POS) && (VB->ObjPtr->flags & flags)) { ASSERT(VB->ObjPtr == &inputs->Obj); _tnl_import_vertex( ctx, writeable, stride ); VB->importable_data &= ~(VERT_BIT_POS|VERT_BIT_CLIP); } if ((required & VERT_BIT_NORMAL) && (VB->NormalPtr->flags & flags)) { ASSERT(VB->NormalPtr == &inputs->Normal); _tnl_import_normal( ctx, writeable, stride ); VB->importable_data &= ~VERT_BIT_NORMAL; } if ((required & VERT_BIT_COLOR0) && (VB->ColorPtr[0]->Flags & ca_flags)) { ASSERT(VB->ColorPtr[0] == &inputs->Color); _tnl_import_color( ctx, GL_FLOAT, writeable, stride ); VB->importable_data &= ~VERT_BIT_COLOR0; } if ((required & VERT_BIT_COLOR1) && (VB->SecondaryColorPtr[0]->Flags & ca_flags)) { ASSERT(VB->SecondaryColorPtr[0] == &inputs->SecondaryColor); _tnl_import_secondarycolor( ctx, GL_FLOAT, writeable, stride ); VB->importable_data &= ~VERT_BIT_COLOR1; } if ((required & VERT_BIT_FOG) && (VB->FogCoordPtr->flags & flags)) { ASSERT(VB->FogCoordPtr == &inputs->FogCoord); _tnl_import_fogcoord( ctx, writeable, stride ); VB->importable_data &= ~VERT_BIT_FOG; } if ((required & VERT_BIT_INDEX) && (VB->IndexPtr[0]->flags & flags)) { ASSERT(VB->IndexPtr[0] == &inputs->Index); _tnl_import_index( ctx, writeable, stride ); VB->importable_data &= ~VERT_BIT_INDEX; } if (required & VERT_BITS_TEX_ANY) for (i = 0 ; i < ctx->Const.MaxTextureUnits ; i++) if ((required & VERT_BIT_TEX(i)) && (VB->TexCoordPtr[i]->flags & flags)) { ASSERT(VB->TexCoordPtr[i] == &inputs->TexCoord[i]); _tnl_import_texcoord( ctx, i, writeable, stride ); VB->importable_data &= ~VERT_BIT_TEX(i); } /* XXX not sure what to do here for vertex program arrays */ }
/** * Generate an R200 vertex program from Mesa's internal representation. * * \return GL_TRUE for success, GL_FALSE for failure. */ static GLboolean r200_translate_vertex_program(struct gl_context *ctx, struct r200_vertex_program *vp) { struct gl_vertex_program *mesa_vp = &vp->mesa_program; struct prog_instruction *vpi; int i; VERTEX_SHADER_INSTRUCTION *o_inst; unsigned long operands; int are_srcs_scalar; unsigned long hw_op; int dofogfix = 0; int fog_temp_i = 0; int free_inputs; int array_count = 0; int u_temp_used; vp->native = GL_FALSE; vp->translated = GL_TRUE; vp->fogmode = ctx->Fog.Mode; if (mesa_vp->Base.NumInstructions == 0) return GL_FALSE; #if 0 if ((mesa_vp->Base.InputsRead & ~(VERT_BIT_POS | VERT_BIT_NORMAL | VERT_BIT_COLOR0 | VERT_BIT_COLOR1 | VERT_BIT_FOG | VERT_BIT_TEX0 | VERT_BIT_TEX1 | VERT_BIT_TEX2 | VERT_BIT_TEX3 | VERT_BIT_TEX4 | VERT_BIT_TEX5)) != 0) { if (R200_DEBUG & RADEON_FALLBACKS) { fprintf(stderr, "can't handle vert prog inputs 0x%x\n", mesa_vp->Base.InputsRead); } return GL_FALSE; } #endif if ((mesa_vp->Base.OutputsWritten & ~((1 << VARYING_SLOT_POS) | (1 << VARYING_SLOT_COL0) | (1 << VARYING_SLOT_COL1) | (1 << VARYING_SLOT_FOGC) | (1 << VARYING_SLOT_TEX0) | (1 << VARYING_SLOT_TEX1) | (1 << VARYING_SLOT_TEX2) | (1 << VARYING_SLOT_TEX3) | (1 << VARYING_SLOT_TEX4) | (1 << VARYING_SLOT_TEX5) | (1 << VARYING_SLOT_PSIZ))) != 0) { if (R200_DEBUG & RADEON_FALLBACKS) { fprintf(stderr, "can't handle vert prog outputs 0x%llx\n", (unsigned long long) mesa_vp->Base.OutputsWritten); } return GL_FALSE; } /* Initial value should be last tmp reg that hw supports. Strangely enough r300 doesnt mind even though these would be out of range. Smart enough to realize that it doesnt need it? */ int u_temp_i = R200_VSF_MAX_TEMPS - 1; struct prog_src_register src[3]; struct prog_dst_register dst; /* FIXME: is changing the prog safe to do here? */ if (mesa_vp->IsPositionInvariant && /* make sure we only do this once */ !(mesa_vp->Base.OutputsWritten & (1 << VARYING_SLOT_POS))) { _mesa_insert_mvp_code(ctx, mesa_vp); } /* for fogc, can't change mesa_vp, as it would hose swtnl, and exp with base e isn't directly available neither. */ if ((mesa_vp->Base.OutputsWritten & (1 << VARYING_SLOT_FOGC)) && !vp->fogpidx) { struct gl_program_parameter_list *paramList; gl_state_index tokens[STATE_LENGTH] = { STATE_FOG_PARAMS, 0, 0, 0, 0 }; paramList = mesa_vp->Base.Parameters; vp->fogpidx = _mesa_add_state_reference(paramList, tokens); } vp->pos_end = 0; mesa_vp->Base.NumNativeInstructions = 0; if (mesa_vp->Base.Parameters) mesa_vp->Base.NumNativeParameters = mesa_vp->Base.Parameters->NumParameters; else mesa_vp->Base.NumNativeParameters = 0; for(i = 0; i < VERT_ATTRIB_MAX; i++) vp->inputs[i] = -1; for(i = 0; i < 15; i++) vp->inputmap_rev[i] = 255; free_inputs = 0x2ffd; /* fglrx uses fixed inputs as follows for conventional attribs. generic attribs use non-fixed assignment, fglrx will always use the lowest attrib values available. We'll just do the same. There are 12 generic attribs possible, corresponding to attrib 0, 2-11 and 13 in a hw vertex prog. attr 1 and 12 aren't used for generic attribs as those cannot be made vec4 (correspond to vertex normal/weight - maybe weight actually could be made vec4). Additionally, not more than 12 arrays in total are possible I think. attr 0 is pos, R200_VTX_XY1|R200_VTX_Z1|R200_VTX_W1 in R200_SE_VTX_FMT_0 attr 2-5 use colors 0-3 (R200_VTX_FP_RGBA << R200_VTX_COLOR_0/1/2/3_SHIFT in R200_SE_VTX_FMT_0) attr 6-11 use tex 0-5 (4 << R200_VTX_TEX0/1/2/3/4/5_COMP_CNT_SHIFT in R200_SE_VTX_FMT_1) attr 13 uses vtx1 pos (R200_VTX_XY1|R200_VTX_Z1|R200_VTX_W1 in R200_SE_VTX_FMT_0) */ /* attr 4,5 and 13 are only used with generic attribs. Haven't seen attr 14 used, maybe that's for the hw pointsize vec1 (which is not possibe to use with vertex progs as it is lacking in vert prog specification) */ /* may look different when using idx buf / input_route instead of se_vtx_fmt? */ if (mesa_vp->Base.InputsRead & VERT_BIT_POS) { vp->inputs[VERT_ATTRIB_POS] = 0; vp->inputmap_rev[0] = VERT_ATTRIB_POS; free_inputs &= ~(1 << 0); array_count++; } if (mesa_vp->Base.InputsRead & VERT_BIT_WEIGHT) { vp->inputs[VERT_ATTRIB_WEIGHT] = 12; vp->inputmap_rev[1] = VERT_ATTRIB_WEIGHT; array_count++; } if (mesa_vp->Base.InputsRead & VERT_BIT_NORMAL) { vp->inputs[VERT_ATTRIB_NORMAL] = 1; vp->inputmap_rev[2] = VERT_ATTRIB_NORMAL; array_count++; } if (mesa_vp->Base.InputsRead & VERT_BIT_COLOR0) { vp->inputs[VERT_ATTRIB_COLOR0] = 2; vp->inputmap_rev[4] = VERT_ATTRIB_COLOR0; free_inputs &= ~(1 << 2); array_count++; } if (mesa_vp->Base.InputsRead & VERT_BIT_COLOR1) { vp->inputs[VERT_ATTRIB_COLOR1] = 3; vp->inputmap_rev[5] = VERT_ATTRIB_COLOR1; free_inputs &= ~(1 << 3); array_count++; } if (mesa_vp->Base.InputsRead & VERT_BIT_FOG) { vp->inputs[VERT_ATTRIB_FOG] = 15; array_count++; vp->inputmap_rev[3] = VERT_ATTRIB_FOG; array_count++; } /* VERT_ATTRIB_TEX0-5 */ for (i = 0; i <= 5; i++) { if (mesa_vp->Base.InputsRead & VERT_BIT_TEX(i)) { vp->inputs[VERT_ATTRIB_TEX(i)] = i + 6; vp->inputmap_rev[8 + i] = VERT_ATTRIB_TEX(i); free_inputs &= ~(1 << (i + 6)); array_count++; } } /* using VERT_ATTRIB_TEX6/7 would be illegal */ for (; i < VERT_ATTRIB_TEX_MAX; i++) { if (mesa_vp->Base.InputsRead & VERT_BIT_TEX(i)) { if (R200_DEBUG & RADEON_FALLBACKS) { fprintf(stderr, "texture attribute %d in vert prog\n", i); } return GL_FALSE; } } /* completely ignore aliasing? */ for (i = 0; i < VERT_ATTRIB_GENERIC_MAX; i++) { int j; /* completely ignore aliasing? */ if (mesa_vp->Base.InputsRead & VERT_BIT_GENERIC(i)) { array_count++; if (array_count > 12) { if (R200_DEBUG & RADEON_FALLBACKS) { fprintf(stderr, "more than 12 attribs used in vert prog\n"); } return GL_FALSE; } for (j = 0; j < 14; j++) { /* will always find one due to limited array_count */ if (free_inputs & (1 << j)) { free_inputs &= ~(1 << j); vp->inputs[VERT_ATTRIB_GENERIC(i)] = j; if (j == 0) { /* mapped to pos */ vp->inputmap_rev[j] = VERT_ATTRIB_GENERIC(i); } else if (j < 12) { /* mapped to col/tex */ vp->inputmap_rev[j + 2] = VERT_ATTRIB_GENERIC(i); } else { /* mapped to pos1 */ vp->inputmap_rev[j + 1] = VERT_ATTRIB_GENERIC(i); } break; } } } } if (!(mesa_vp->Base.OutputsWritten & (1 << VARYING_SLOT_POS))) { if (R200_DEBUG & RADEON_FALLBACKS) { fprintf(stderr, "can't handle vert prog without position output\n"); } return GL_FALSE; } if (free_inputs & 1) { if (R200_DEBUG & RADEON_FALLBACKS) { fprintf(stderr, "can't handle vert prog without position input\n"); } return GL_FALSE; } o_inst = vp->instr; for (vpi = mesa_vp->Base.Instructions; vpi->Opcode != OPCODE_END; vpi++, o_inst++){ operands = op_operands(vpi->Opcode); are_srcs_scalar = operands & SCALAR_FLAG; operands &= OP_MASK; for(i = 0; i < operands; i++) { src[i] = vpi->SrcReg[i]; /* hack up default attrib values as per spec as swizzling. normal, fog, secondary color. Crazy? May need more if we don't submit vec4 elements? */ if (src[i].File == PROGRAM_INPUT) { if (src[i].Index == VERT_ATTRIB_NORMAL) { int j; for (j = 0; j < 4; j++) { if (GET_SWZ(src[i].Swizzle, j) == SWIZZLE_W) { src[i].Swizzle &= ~(SWIZZLE_W << (j*3)); src[i].Swizzle |= SWIZZLE_ONE << (j*3); } } } else if (src[i].Index == VERT_ATTRIB_COLOR1) { int j; for (j = 0; j < 4; j++) { if (GET_SWZ(src[i].Swizzle, j) == SWIZZLE_W) { src[i].Swizzle &= ~(SWIZZLE_W << (j*3)); src[i].Swizzle |= SWIZZLE_ZERO << (j*3); } } } else if (src[i].Index == VERT_ATTRIB_FOG) { int j; for (j = 0; j < 4; j++) { if (GET_SWZ(src[i].Swizzle, j) == SWIZZLE_W) { src[i].Swizzle &= ~(SWIZZLE_W << (j*3)); src[i].Swizzle |= SWIZZLE_ONE << (j*3); } else if ((GET_SWZ(src[i].Swizzle, j) == SWIZZLE_Y) || GET_SWZ(src[i].Swizzle, j) == SWIZZLE_Z) { src[i].Swizzle &= ~(SWIZZLE_W << (j*3)); src[i].Swizzle |= SWIZZLE_ZERO << (j*3); } } } } } if(operands == 3){ if( CMP_SRCS(src[1], src[2]) || CMP_SRCS(src[0], src[2]) ){ o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP, VSF_FLAG_ALL); o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[2]), SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4); o_inst->src1 = ZERO_SRC_0; o_inst->src2 = UNUSED_SRC_1; o_inst++; src[2].File = PROGRAM_TEMPORARY; src[2].Index = u_temp_i; src[2].RelAddr = 0; u_temp_i--; } } if(operands >= 2){ if( CMP_SRCS(src[1], src[0]) ){ o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP, VSF_FLAG_ALL); o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4); o_inst->src1 = ZERO_SRC_0; o_inst->src2 = UNUSED_SRC_1; o_inst++; src[0].File = PROGRAM_TEMPORARY; src[0].Index = u_temp_i; src[0].RelAddr = 0; u_temp_i--; } } dst = vpi->DstReg; if (dst.File == PROGRAM_OUTPUT && dst.Index == VARYING_SLOT_FOGC && dst.WriteMask & WRITEMASK_X) { fog_temp_i = u_temp_i; dst.File = PROGRAM_TEMPORARY; dst.Index = fog_temp_i; dofogfix = 1; u_temp_i--; } /* These ops need special handling. */ switch(vpi->Opcode){ case OPCODE_POW: /* pow takes only one argument, first scalar is in slot x, 2nd in slot z (other slots don't matter). So may need to insert additional instruction */ if ((src[0].File == src[1].File) && (src[0].Index == src[1].Index)) { o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_POW, t_dst(&dst), t_dst_mask(dst.WriteMask)); o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 0)), SWIZZLE_ZERO, t_swizzle(GET_SWZ(src[1].Swizzle, 0)), SWIZZLE_ZERO, t_src_class(src[0].File), src[0].Negate) | (src[0].RelAddr << 4); o_inst->src1 = UNUSED_SRC_0; o_inst->src2 = UNUSED_SRC_0; } else { o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP, VSF_FLAG_ALL); o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 0)), SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO, t_src_class(src[0].File), src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4); o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), SWIZZLE_ZERO, SWIZZLE_ZERO, t_swizzle(GET_SWZ(src[1].Swizzle, 0)), SWIZZLE_ZERO, t_src_class(src[1].File), src[1].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4); o_inst->src2 = UNUSED_SRC_1; o_inst++; o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_POW, t_dst(&dst), t_dst_mask(dst.WriteMask)); o_inst->src0 = MAKE_VSF_SOURCE(u_temp_i, VSF_IN_COMPONENT_X, VSF_IN_COMPONENT_Y, VSF_IN_COMPONENT_Z, VSF_IN_COMPONENT_W, VSF_IN_CLASS_TMP, VSF_FLAG_NONE); o_inst->src1 = UNUSED_SRC_0; o_inst->src2 = UNUSED_SRC_0; u_temp_i--; } goto next; case OPCODE_MOV://ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO} case OPCODE_SWZ: o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, t_dst(&dst), t_dst_mask(dst.WriteMask)); o_inst->src0 = t_src(vp, &src[0]); o_inst->src1 = ZERO_SRC_0; o_inst->src2 = UNUSED_SRC_1; goto next; case OPCODE_MAD: /* only 2 read ports into temp memory thus may need the macro op MAD_2 instead (requiring 2 clocks) if all inputs are in temp memory (and, only if they actually reference 3 distinct temps) */ hw_op=(src[0].File == PROGRAM_TEMPORARY && src[1].File == PROGRAM_TEMPORARY && src[2].File == PROGRAM_TEMPORARY && (((src[0].RelAddr << 8) | src[0].Index) != ((src[1].RelAddr << 8) | src[1].Index)) && (((src[0].RelAddr << 8) | src[0].Index) != ((src[2].RelAddr << 8) | src[2].Index)) && (((src[1].RelAddr << 8) | src[1].Index) != ((src[2].RelAddr << 8) | src[2].Index))) ? R200_VPI_OUT_OP_MAD_2 : R200_VPI_OUT_OP_MAD; o_inst->op = MAKE_VSF_OP(hw_op, t_dst(&dst), t_dst_mask(dst.WriteMask)); o_inst->src0 = t_src(vp, &src[0]); #if 0 if ((o_inst - vp->instr) == 31) { /* fix up the broken vertex program of quake4 demo... */ o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, t_src_class(src[1].File), src[1].Negate) | (src[1].RelAddr << 4); o_inst->src2 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, t_src_class(src[1].File), src[1].Negate) | (src[1].RelAddr << 4); } else { o_inst->src1 = t_src(vp, &src[1]); o_inst->src2 = t_src(vp, &src[2]); } #else o_inst->src1 = t_src(vp, &src[1]); o_inst->src2 = t_src(vp, &src[2]); #endif goto next; case OPCODE_DP3://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO} o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_DOT, t_dst(&dst), t_dst_mask(dst.WriteMask)); o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 0)), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), t_swizzle(GET_SWZ(src[0].Swizzle, 2)), SWIZZLE_ZERO, t_src_class(src[0].File), src[0].Negate) | (src[0].RelAddr << 4); o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 0)), t_swizzle(GET_SWZ(src[1].Swizzle, 1)), t_swizzle(GET_SWZ(src[1].Swizzle, 2)), SWIZZLE_ZERO, t_src_class(src[1].File), src[1].Negate) | (src[1].RelAddr << 4); o_inst->src2 = UNUSED_SRC_1; goto next; case OPCODE_DPH://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W} o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_DOT, t_dst(&dst), t_dst_mask(dst.WriteMask)); o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 0)), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), t_swizzle(GET_SWZ(src[0].Swizzle, 2)), VSF_IN_COMPONENT_ONE, t_src_class(src[0].File), src[0].Negate) | (src[0].RelAddr << 4); o_inst->src1 = t_src(vp, &src[1]); o_inst->src2 = UNUSED_SRC_1; goto next; case OPCODE_SUB://ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, t_dst(&dst), t_dst_mask(dst.WriteMask)); o_inst->src0 = t_src(vp, &src[0]); o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 0)), t_swizzle(GET_SWZ(src[1].Swizzle, 1)), t_swizzle(GET_SWZ(src[1].Swizzle, 2)), t_swizzle(GET_SWZ(src[1].Swizzle, 3)), t_src_class(src[1].File), (!src[1].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4); o_inst->src2 = UNUSED_SRC_1; goto next; case OPCODE_ABS://MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W o_inst->op=MAKE_VSF_OP(R200_VPI_OUT_OP_MAX, t_dst(&dst), t_dst_mask(dst.WriteMask)); o_inst->src0=t_src(vp, &src[0]); o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 0)), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), t_swizzle(GET_SWZ(src[0].Swizzle, 2)), t_swizzle(GET_SWZ(src[0].Swizzle, 3)), t_src_class(src[0].File), (!src[0].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4); o_inst->src2 = UNUSED_SRC_1; goto next; case OPCODE_FLR: /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W} ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */ o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_FRC, (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP, t_dst_mask(dst.WriteMask)); o_inst->src0 = t_src(vp, &src[0]); o_inst->src1 = UNUSED_SRC_0; o_inst->src2 = UNUSED_SRC_1; o_inst++; o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, t_dst(&dst), t_dst_mask(dst.WriteMask)); o_inst->src0 = t_src(vp, &src[0]); o_inst->src1 = MAKE_VSF_SOURCE(u_temp_i, VSF_IN_COMPONENT_X, VSF_IN_COMPONENT_Y, VSF_IN_COMPONENT_Z, VSF_IN_COMPONENT_W, VSF_IN_CLASS_TMP, /* Not 100% sure about this */ (!src[0].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE/*VSF_FLAG_ALL*/); o_inst->src2 = UNUSED_SRC_0; u_temp_i--; goto next; case OPCODE_XPD: /* mul r0, r1.yzxw, r2.zxyw mad r0, -r2.yzxw, r1.zxyw, r0 */ hw_op=(src[0].File == PROGRAM_TEMPORARY && src[1].File == PROGRAM_TEMPORARY && (((src[0].RelAddr << 8) | src[0].Index) != ((src[1].RelAddr << 8) | src[1].Index))) ? R200_VPI_OUT_OP_MAD_2 : R200_VPI_OUT_OP_MAD; o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL, (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP, t_dst_mask(dst.WriteMask)); o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w t_src_class(src[0].File), src[0].Negate) | (src[0].RelAddr << 4); o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w t_src_class(src[1].File), src[1].Negate) | (src[1].RelAddr << 4); o_inst->src2 = UNUSED_SRC_1; o_inst++; u_temp_i--; o_inst->op = MAKE_VSF_OP(hw_op, t_dst(&dst), t_dst_mask(dst.WriteMask)); o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w t_src_class(src[1].File), (!src[1].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4); o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w t_src_class(src[0].File), src[0].Negate) | (src[0].RelAddr << 4); o_inst->src2 = MAKE_VSF_SOURCE(u_temp_i+1, VSF_IN_COMPONENT_X, VSF_IN_COMPONENT_Y, VSF_IN_COMPONENT_Z, VSF_IN_COMPONENT_W, VSF_IN_CLASS_TMP, VSF_FLAG_NONE); goto next; case OPCODE_END: assert(0); default: break; } o_inst->op = MAKE_VSF_OP(t_opcode(vpi->Opcode), t_dst(&dst), t_dst_mask(dst.WriteMask)); if(are_srcs_scalar){ switch(operands){ case 1: o_inst->src0 = t_src_scalar(vp, &src[0]); o_inst->src1 = UNUSED_SRC_0; o_inst->src2 = UNUSED_SRC_1; break; case 2: o_inst->src0 = t_src_scalar(vp, &src[0]); o_inst->src1 = t_src_scalar(vp, &src[1]); o_inst->src2 = UNUSED_SRC_1; break; case 3: o_inst->src0 = t_src_scalar(vp, &src[0]); o_inst->src1 = t_src_scalar(vp, &src[1]); o_inst->src2 = t_src_scalar(vp, &src[2]); break; default: fprintf(stderr, "illegal number of operands %lu\n", operands); exit(-1); break; } } else { switch(operands){ case 1: o_inst->src0 = t_src(vp, &src[0]); o_inst->src1 = UNUSED_SRC_0; o_inst->src2 = UNUSED_SRC_1; break; case 2: o_inst->src0 = t_src(vp, &src[0]); o_inst->src1 = t_src(vp, &src[1]); o_inst->src2 = UNUSED_SRC_1; break; case 3: o_inst->src0 = t_src(vp, &src[0]); o_inst->src1 = t_src(vp, &src[1]); o_inst->src2 = t_src(vp, &src[2]); break; default: fprintf(stderr, "illegal number of operands %lu\n", operands); exit(-1); break; } } next: if (dofogfix) { o_inst++; if (vp->fogmode == GL_EXP) { o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL, (fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP, VSF_FLAG_X); o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE); o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, X, X, X, X, PARAM, NONE); o_inst->src2 = UNUSED_SRC_1; o_inst++; o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_EXP_E, R200_VSF_OUT_CLASS_RESULT_FOGC, VSF_FLAG_X); o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, ALL); o_inst->src1 = UNUSED_SRC_0; o_inst->src2 = UNUSED_SRC_1; } else if (vp->fogmode == GL_EXP2) { o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL, (fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP, VSF_FLAG_X); o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE); o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, X, X, X, X, PARAM, NONE); o_inst->src2 = UNUSED_SRC_1; o_inst++; o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL, (fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP, VSF_FLAG_X); o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE); o_inst->src1 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE); o_inst->src2 = UNUSED_SRC_1; o_inst++; o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_EXP_E, R200_VSF_OUT_CLASS_RESULT_FOGC, VSF_FLAG_X); o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, ALL); o_inst->src1 = UNUSED_SRC_0; o_inst->src2 = UNUSED_SRC_1; } else { /* fogmode == GL_LINEAR */ /* could do that with single op (dot) if using params like with fixed function pipeline fog */ o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, (fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP, VSF_FLAG_X); o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, ALL); o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, Z, Z, Z, Z, PARAM, NONE); o_inst->src2 = UNUSED_SRC_1; o_inst++; o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL, R200_VSF_OUT_CLASS_RESULT_FOGC, VSF_FLAG_X); o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE); o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, W, W, W, W, PARAM, NONE); o_inst->src2 = UNUSED_SRC_1; } dofogfix = 0; } u_temp_used = (R200_VSF_MAX_TEMPS - 1) - u_temp_i; if (mesa_vp->Base.NumNativeTemporaries < (mesa_vp->Base.NumTemporaries + u_temp_used)) { mesa_vp->Base.NumNativeTemporaries = mesa_vp->Base.NumTemporaries + u_temp_used; } if ((mesa_vp->Base.NumTemporaries + u_temp_used) > R200_VSF_MAX_TEMPS) { if (R200_DEBUG & RADEON_FALLBACKS) { fprintf(stderr, "Ran out of temps, num temps %d, us %d\n", mesa_vp->Base.NumTemporaries, u_temp_used); } return GL_FALSE; } u_temp_i = R200_VSF_MAX_TEMPS - 1; if(o_inst - vp->instr >= R200_VSF_MAX_INST) { mesa_vp->Base.NumNativeInstructions = 129; if (R200_DEBUG & RADEON_FALLBACKS) { fprintf(stderr, "more than 128 native instructions\n"); } return GL_FALSE; } if ((o_inst->op & R200_VSF_OUT_CLASS_MASK) == R200_VSF_OUT_CLASS_RESULT_POS) { vp->pos_end = (o_inst - vp->instr); } } vp->native = GL_TRUE; mesa_vp->Base.NumNativeInstructions = (o_inst - vp->instr); #if 0 fprintf(stderr, "hw program:\n"); for(i=0; i < vp->program.length; i++) fprintf(stderr, "%08x\n", vp->instr[i]); #endif return GL_TRUE; }
/* TCL render. */ static GLboolean radeon_run_tcl_render( struct gl_context *ctx, struct tnl_pipeline_stage *stage ) { r100ContextPtr rmesa = R100_CONTEXT(ctx); TNLcontext *tnl = TNL_CONTEXT(ctx); struct vertex_buffer *VB = &tnl->vb; GLuint inputs = VERT_BIT_POS | VERT_BIT_COLOR0; GLuint i; /* TODO: separate this from the swtnl pipeline */ if (rmesa->radeon.TclFallback) return GL_TRUE; /* fallback to software t&l */ if (VB->Count == 0) return GL_FALSE; /* NOTE: inputs != tnl->render_inputs - these are the untransformed * inputs. */ if (ctx->Light.Enabled) { inputs |= VERT_BIT_NORMAL; } if (ctx->_TriangleCaps & DD_SEPARATE_SPECULAR) { inputs |= VERT_BIT_COLOR1; } if ( (ctx->Fog.FogCoordinateSource == GL_FOG_COORD) && ctx->Fog.Enabled ) { inputs |= VERT_BIT_FOG; } for (i = 0 ; i < ctx->Const.MaxTextureUnits; i++) { if (ctx->Texture.Unit[i]._ReallyEnabled) { /* TODO: probably should not emit texture coords when texgen is enabled */ if (rmesa->TexGenNeedNormals[i]) { inputs |= VERT_BIT_NORMAL; } inputs |= VERT_BIT_TEX(i); } } radeonReleaseArrays( ctx, ~0 ); GLuint emit_end = radeonEnsureEmitSize( ctx, inputs ) + rmesa->radeon.cmdbuf.cs->cdw; radeonEmitArrays( ctx, inputs ); rmesa->tcl.Elts = VB->Elts; for (i = 0 ; i < VB->PrimitiveCount ; i++) { GLuint prim = _tnl_translate_prim(&VB->Primitive[i]); GLuint start = VB->Primitive[i].start; GLuint length = VB->Primitive[i].count; if (!length) continue; if (rmesa->tcl.Elts) radeonEmitEltPrimitive( ctx, start, start+length, prim ); else radeonEmitPrimitive( ctx, start, start+length, prim ); } if (emit_end < rmesa->radeon.cmdbuf.cs->cdw) WARN_ONCE("Rendering was %d commands larger than predicted size." " We might overflow command buffer.\n", rmesa->radeon.cmdbuf.cs->cdw - emit_end); return GL_FALSE; /* finished the pipe */ }
/** * Predict total emit size for next rendering operation so there is no flush in middle of rendering * Prediction has to aim towards the best possible value that is worse than worst case scenario */ static GLuint radeonEnsureEmitSize( struct gl_context * ctx , GLuint inputs ) { r100ContextPtr rmesa = R100_CONTEXT(ctx); TNLcontext *tnl = TNL_CONTEXT(ctx); struct vertex_buffer *VB = &tnl->vb; GLuint space_required; GLuint state_size; GLuint nr_aos = 1; /* radeonEmitArrays does always emit one */ int i; /* list of flags that are allocating aos object */ const GLuint flags_to_check[] = { VERT_BIT_NORMAL, VERT_BIT_COLOR0, VERT_BIT_COLOR1, VERT_BIT_FOG }; /* predict number of aos to emit */ for (i=0; i < sizeof(flags_to_check)/sizeof(flags_to_check[0]); ++i) { if (inputs & flags_to_check[i]) ++nr_aos; } for (i = 0; i < ctx->Const.MaxTextureUnits; ++i) { if (inputs & VERT_BIT_TEX(i)) ++nr_aos; } { /* count the prediction for state size */ space_required = 0; state_size = radeonCountStateEmitSize( &rmesa->radeon ); /* tcl may be changed in radeonEmitArrays so account for it if not dirty */ if (!rmesa->hw.tcl.dirty) state_size += rmesa->hw.tcl.check( rmesa->radeon.glCtx, &rmesa->hw.tcl ); /* predict size for elements */ for (i = 0; i < VB->PrimitiveCount; ++i) { if (!VB->Primitive[i].count) continue; /* If primitive.count is less than MAX_CONVERSION_SIZE rendering code may decide convert to elts. In that case we have to make pessimistic prediction. and use larger of 2 paths. */ const GLuint elts = ELTS_BUFSZ(nr_aos); const GLuint index = INDEX_BUFSZ; const GLuint vbuf = VBUF_BUFSZ; if ( (!VB->Elts && VB->Primitive[i].count >= MAX_CONVERSION_SIZE) || vbuf > index + elts) space_required += vbuf; else space_required += index + elts; space_required += VB->Primitive[i].count * 3; space_required += AOS_BUFSZ(nr_aos); } space_required += SCISSOR_BUFSZ; } /* flush the buffer in case we need more than is left. */ if (rcommonEnsureCmdBufSpace(&rmesa->radeon, space_required, __FUNCTION__)) return space_required + radeonCountStateEmitSize( &rmesa->radeon ); else return space_required + state_size; }
/** * This is where the vertex data is transfered from the 'struct immediate * into the 'struct vertex_buffer'. * * Note: The 'start' member of the GLvector structs is now redundant * because we always re-transform copied vertices, and the vectors * below are set up so that the first copied vertex (if any) appears * at position zero. */ static void _tnl_vb_bind_immediate( GLcontext *ctx, struct immediate *IM ) { TNLcontext *tnl = TNL_CONTEXT(ctx); struct vertex_buffer *VB = &tnl->vb; struct vertex_arrays *tmp = &tnl->imm_inputs; GLuint inputs = tnl->pipeline.inputs; /* for copy-to-current */ const GLuint start = IM->CopyStart; const GLuint count = IM->Count - start; /* TODO: optimize the case where nothing has changed. (Just bind * tmp to vb). */ /* Setup constant data in the VB. */ VB->Count = count; VB->FirstClipped = IMM_MAXDATA - IM->CopyStart; VB->import_data = NULL; VB->importable_data = 0; /* Need an IM->FirstPrimitive? */ VB->Primitive = IM->Primitive + IM->CopyStart; VB->PrimitiveLength = IM->PrimitiveLength + IM->CopyStart; VB->FirstPrimitive = 0; VB->Flag = IM->Flag + start; /* TexCoordPtr's are zeroed in loop below. */ VB->NormalPtr = NULL; VB->NormalLengthPtr = NULL; VB->EdgeFlag = NULL; VB->IndexPtr[0] = NULL; VB->IndexPtr[1] = NULL; VB->ColorPtr[0] = NULL; VB->ColorPtr[1] = NULL; VB->SecondaryColorPtr[0] = NULL; VB->SecondaryColorPtr[1] = NULL; VB->Elts = NULL; VB->MaterialMask = NULL; VB->Material = NULL; /* _tnl_print_vert_flags("copy-orflag", IM->CopyOrFlag); */ /* _tnl_print_vert_flags("orflag", IM->OrFlag); */ /* _tnl_print_vert_flags("inputs", inputs); */ /* Setup the initial values of array pointers in the vb. */ if (inputs & VERT_BIT_POS) { tmp->Obj.data = IM->Attrib[VERT_ATTRIB_POS] + start; tmp->Obj.start = (GLfloat *)(IM->Attrib[VERT_ATTRIB_POS] + start); tmp->Obj.count = count; VB->ObjPtr = &tmp->Obj; if ((IM->CopyOrFlag & VERT_BITS_OBJ_234) == VERT_BITS_OBJ_234) tmp->Obj.size = 4; else if ((IM->CopyOrFlag & VERT_BITS_OBJ_234) == VERT_BITS_OBJ_23) tmp->Obj.size = 3; else tmp->Obj.size = 2; } if (inputs & VERT_BIT_NORMAL) { tmp->Normal.data = IM->Attrib[VERT_ATTRIB_NORMAL] + start; tmp->Normal.start = (GLfloat *) (IM->Attrib[VERT_ATTRIB_NORMAL] + start); tmp->Normal.count = count; tmp->Normal.size = 3; /* just to be safe */ VB->NormalPtr = &tmp->Normal; if (IM->NormalLengthPtr) VB->NormalLengthPtr = IM->NormalLengthPtr + start; } if (inputs & VERT_BIT_INDEX) { tmp->Index.count = count; tmp->Index.data = IM->Index + start; tmp->Index.start = IM->Index + start; VB->IndexPtr[0] = &tmp->Index; } if (inputs & VERT_BIT_FOG) { tmp->FogCoord.data = IM->Attrib[VERT_ATTRIB_FOG] + start; tmp->FogCoord.start = (GLfloat *) (IM->Attrib[VERT_ATTRIB_FOG] + start); tmp->FogCoord.count = count; VB->FogCoordPtr = &tmp->FogCoord; } if (inputs & VERT_BIT_COLOR1) { tmp->SecondaryColor.Ptr = IM->Attrib[VERT_ATTRIB_COLOR1] + start; VB->SecondaryColorPtr[0] = &tmp->SecondaryColor; } if (inputs & VERT_BIT_EDGEFLAG) { VB->EdgeFlag = IM->EdgeFlag + start; } if (inputs & VERT_BIT_COLOR0) { if (IM->CopyOrFlag & VERT_BIT_COLOR0) { tmp->Color.Ptr = IM->Attrib[VERT_ATTRIB_COLOR0] + start; tmp->Color.StrideB = 4 * sizeof(GLfloat); tmp->Color.Flags = 0; } else { tmp->Color.Ptr = ctx->Current.Attrib[VERT_ATTRIB_COLOR0]; tmp->Color.StrideB = 0; tmp->Color.Flags = CA_CLIENT_DATA; /* hack */ VB->import_source = IM; VB->importable_data |= VERT_BIT_COLOR0; VB->import_data = _tnl_upgrade_current_data; } VB->ColorPtr[0] = &tmp->Color; } if (inputs & VERT_BITS_TEX_ANY) { GLuint i; for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { VB->TexCoordPtr[i] = NULL; if (inputs & VERT_BIT_TEX(i)) { tmp->TexCoord[i].count = count; tmp->TexCoord[i].data = IM->Attrib[VERT_ATTRIB_TEX0 + i] + start; tmp->TexCoord[i].start = (GLfloat *)(IM->Attrib[VERT_ATTRIB_TEX0 + i] + start); tmp->TexCoord[i].size = 2; if (IM->TexSize & TEX_SIZE_3(i)) { tmp->TexCoord[i].size = 3; if (IM->TexSize & TEX_SIZE_4(i)) tmp->TexCoord[i].size = 4; } VB->TexCoordPtr[i] = &tmp->TexCoord[i]; } } } if ((inputs & IM->OrFlag & VERT_BIT_MATERIAL) && IM->Material) { VB->MaterialMask = IM->MaterialMask + start; VB->Material = IM->Material + start; } /* GL_NV_vertex_program */ if (ctx->VertexProgram.Enabled) { GLuint attr; for (attr = 0; attr < VERT_ATTRIB_MAX; attr++) { tmp->Attribs[attr].count = count; tmp->Attribs[attr].data = IM->Attrib[attr] + start; tmp->Attribs[attr].start = (GLfloat *) (IM->Attrib[attr] + start); tmp->Attribs[attr].size = 4; VB->AttribPtr[attr] = &(tmp->Attribs[attr]); } } }
static void st_DrawTex(struct gl_context *ctx, GLfloat x, GLfloat y, GLfloat z, GLfloat width, GLfloat height) { struct st_context *st = ctx->st; struct pipe_context *pipe = st->pipe; struct cso_context *cso = ctx->st->cso_context; struct pipe_resource *vbuffer; struct pipe_transfer *vbuffer_transfer; GLuint i, numTexCoords, numAttribs; GLboolean emitColor; uint semantic_names[2 + MAX_TEXTURE_UNITS]; uint semantic_indexes[2 + MAX_TEXTURE_UNITS]; struct pipe_vertex_element velements[2 + MAX_TEXTURE_UNITS]; GLbitfield inputs = VERT_BIT_POS; st_validate_state(st); /* determine if we need vertex color */ if (ctx->FragmentProgram._Current->Base.InputsRead & FRAG_BIT_COL0) emitColor = GL_TRUE; else emitColor = GL_FALSE; /* determine how many enabled sets of texcoords */ numTexCoords = 0; for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { if (ctx->Texture.Unit[i]._ReallyEnabled & TEXTURE_2D_BIT) { inputs |= VERT_BIT_TEX(i); numTexCoords++; } } /* total number of attributes per vertex */ numAttribs = 1 + emitColor + numTexCoords; /* create the vertex buffer */ vbuffer = pipe_buffer_create(pipe->screen, PIPE_BIND_VERTEX_BUFFER, PIPE_USAGE_STREAM, numAttribs * 4 * 4 * sizeof(GLfloat)); /* load vertex buffer */ { #define SET_ATTRIB(VERT, ATTR, X, Y, Z, W) \ do { \ GLuint k = (((VERT) * numAttribs + (ATTR)) * 4); \ assert(k < 4 * 4 * numAttribs); \ vbuf[k + 0] = X; \ vbuf[k + 1] = Y; \ vbuf[k + 2] = Z; \ vbuf[k + 3] = W; \ } while (0) const GLfloat x0 = x, y0 = y, x1 = x + width, y1 = y + height; GLfloat *vbuf = (GLfloat *) pipe_buffer_map(pipe, vbuffer, PIPE_TRANSFER_WRITE, &vbuffer_transfer); GLuint attr; z = CLAMP(z, 0.0f, 1.0f); /* positions (in clip coords) */ { const struct gl_framebuffer *fb = st->ctx->DrawBuffer; const GLfloat fb_width = (GLfloat)fb->Width; const GLfloat fb_height = (GLfloat)fb->Height; const GLfloat clip_x0 = (GLfloat)(x0 / fb_width * 2.0 - 1.0); const GLfloat clip_y0 = (GLfloat)(y0 / fb_height * 2.0 - 1.0); const GLfloat clip_x1 = (GLfloat)(x1 / fb_width * 2.0 - 1.0); const GLfloat clip_y1 = (GLfloat)(y1 / fb_height * 2.0 - 1.0); SET_ATTRIB(0, 0, clip_x0, clip_y0, z, 1.0f); /* lower left */ SET_ATTRIB(1, 0, clip_x1, clip_y0, z, 1.0f); /* lower right */ SET_ATTRIB(2, 0, clip_x1, clip_y1, z, 1.0f); /* upper right */ SET_ATTRIB(3, 0, clip_x0, clip_y1, z, 1.0f); /* upper left */ semantic_names[0] = TGSI_SEMANTIC_POSITION; semantic_indexes[0] = 0; } /* colors */ if (emitColor) { const GLfloat *c = ctx->Current.Attrib[VERT_ATTRIB_COLOR0]; SET_ATTRIB(0, 1, c[0], c[1], c[2], c[3]); SET_ATTRIB(1, 1, c[0], c[1], c[2], c[3]); SET_ATTRIB(2, 1, c[0], c[1], c[2], c[3]); SET_ATTRIB(3, 1, c[0], c[1], c[2], c[3]); semantic_names[1] = TGSI_SEMANTIC_COLOR; semantic_indexes[1] = 0; attr = 2; } else { attr = 1; } /* texcoords */ for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { if (ctx->Texture.Unit[i]._ReallyEnabled & TEXTURE_2D_BIT) { struct gl_texture_object *obj = ctx->Texture.Unit[i]._Current; struct gl_texture_image *img = obj->Image[0][obj->BaseLevel]; const GLfloat wt = (GLfloat) img->Width; const GLfloat ht = (GLfloat) img->Height; const GLfloat s0 = obj->CropRect[0] / wt; const GLfloat t0 = obj->CropRect[1] / ht; const GLfloat s1 = (obj->CropRect[0] + obj->CropRect[2]) / wt; const GLfloat t1 = (obj->CropRect[1] + obj->CropRect[3]) / ht; /*printf("crop texcoords: %g, %g .. %g, %g\n", s0, t0, s1, t1);*/ SET_ATTRIB(0, attr, s0, t0, 0.0f, 1.0f); /* lower left */ SET_ATTRIB(1, attr, s1, t0, 0.0f, 1.0f); /* lower right */ SET_ATTRIB(2, attr, s1, t1, 0.0f, 1.0f); /* upper right */ SET_ATTRIB(3, attr, s0, t1, 0.0f, 1.0f); /* upper left */ semantic_names[attr] = TGSI_SEMANTIC_GENERIC; semantic_indexes[attr] = 0; attr++; } } pipe_buffer_unmap(pipe, vbuffer_transfer); #undef SET_ATTRIB } cso_save_viewport(cso); cso_save_vertex_shader(cso); cso_save_vertex_elements(cso); cso_save_vertex_buffers(cso); { void *vs = lookup_shader(pipe, numAttribs, semantic_names, semantic_indexes); cso_set_vertex_shader_handle(cso, vs); } for (i = 0; i < numAttribs; i++) { velements[i].src_offset = i * 4 * sizeof(float); velements[i].instance_divisor = 0; velements[i].vertex_buffer_index = 0; velements[i].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; } cso_set_vertex_elements(cso, numAttribs, velements); /* viewport state: viewport matching window dims */ { const struct gl_framebuffer *fb = st->ctx->DrawBuffer; const GLboolean invert = (st_fb_orientation(fb) == Y_0_TOP); const GLfloat width = (GLfloat)fb->Width; const GLfloat height = (GLfloat)fb->Height; struct pipe_viewport_state vp; vp.scale[0] = 0.5f * width; vp.scale[1] = height * (invert ? -0.5f : 0.5f); vp.scale[2] = 1.0f; vp.scale[3] = 1.0f; vp.translate[0] = 0.5f * width; vp.translate[1] = 0.5f * height; vp.translate[2] = 0.0f; vp.translate[3] = 0.0f; cso_set_viewport(cso, &vp); } util_draw_vertex_buffer(pipe, cso, vbuffer, 0, /* offset */ PIPE_PRIM_TRIANGLE_FAN, 4, /* verts */ numAttribs); /* attribs/vert */ pipe_resource_reference(&vbuffer, NULL); /* restore state */ cso_restore_viewport(cso); cso_restore_vertex_shader(cso); cso_restore_vertex_elements(cso); cso_restore_vertex_buffers(cso); }
/* TCL render. */ static GLboolean radeon_run_tcl_render( GLcontext *ctx, struct tnl_pipeline_stage *stage ) { radeonContextPtr rmesa = RADEON_CONTEXT(ctx); TNLcontext *tnl = TNL_CONTEXT(ctx); struct vertex_buffer *VB = &tnl->vb; GLuint inputs = VERT_BIT_POS | VERT_BIT_COLOR0; GLuint i; /* TODO: separate this from the swtnl pipeline */ if (rmesa->TclFallback) return GL_TRUE; /* fallback to software t&l */ if (VB->Count == 0) return GL_FALSE; /* NOTE: inputs != tnl->render_inputs - these are the untransformed * inputs. */ if (ctx->Light.Enabled) { inputs |= VERT_BIT_NORMAL; } if (ctx->_TriangleCaps & DD_SEPARATE_SPECULAR) { inputs |= VERT_BIT_COLOR1; } if ( (ctx->Fog.FogCoordinateSource == GL_FOG_COORD) && ctx->Fog.Enabled ) { inputs |= VERT_BIT_FOG; } for (i = 0 ; i < ctx->Const.MaxTextureUnits; i++) { if (ctx->Texture.Unit[i]._ReallyEnabled) { /* TODO: probably should not emit texture coords when texgen is enabled */ if (rmesa->TexGenNeedNormals[i]) { inputs |= VERT_BIT_NORMAL; } inputs |= VERT_BIT_TEX(i); } } radeonReleaseArrays( ctx, ~0 ); radeonEmitArrays( ctx, inputs ); rmesa->tcl.Elts = VB->Elts; for (i = 0 ; i < VB->PrimitiveCount ; i++) { GLuint prim = VB->Primitive[i].mode; GLuint start = VB->Primitive[i].start; GLuint length = VB->Primitive[i].count; if (!length) continue; if (rmesa->tcl.Elts) radeonEmitEltPrimitive( ctx, start, start+length, prim ); else radeonEmitPrimitive( ctx, start, start+length, prim ); } return GL_FALSE; /* finished the pipe */ }