static void build_m2( GLfloat f[][3], GLfloat m[], const GLvector4f *normal, const GLvector4f *eye ) { GLuint stride = eye->stride; GLfloat *coord = eye->start; GLuint count = eye->count; GLfloat *norm = normal->start; GLuint i; for (i=0;i<count;i++,STRIDE_F(coord,stride),STRIDE_F(norm,normal->stride)) { GLfloat u[3], two_nu, fx, fy, fz; COPY_2V( u, coord ); u[2] = 0; NORMALIZE_3FV( u ); two_nu = 2.0F * DOT3(norm,u); fx = f[i][0] = u[0] - norm[0] * two_nu; fy = f[i][1] = u[1] - norm[1] * two_nu; fz = f[i][2] = u[2] - norm[2] * two_nu; m[i] = fx * fx + fy * fy + (fz + 1.0F) * (fz + 1.0F); if (m[i] != 0.0F) { m[i] = 0.5F * INV_SQRTF(m[i]); } } }
/** * Compute point size for each vertex from the vertex eye-space Z * coordinate and the point size attenuation factors. * Only done when point size attenuation is enabled and vertex program is * disabled. */ static GLboolean run_point_stage(struct gl_context *ctx, struct tnl_pipeline_stage *stage) { if (ctx->Point._Attenuated && !ctx->VertexProgram._Current) { struct point_stage_data *store = POINT_STAGE_DATA(stage); struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb; const GLfloat *eyeCoord = (GLfloat *) VB->EyePtr->data + 2; const GLint eyeCoordStride = VB->EyePtr->stride / sizeof(GLfloat); const GLfloat p0 = ctx->Point.Params[0]; const GLfloat p1 = ctx->Point.Params[1]; const GLfloat p2 = ctx->Point.Params[2]; const GLfloat pointSize = ctx->Point.Size; GLfloat (*size)[4] = store->PointSize.data; GLuint i; for (i = 0; i < VB->Count; i++) { const GLfloat dist = FABSF(*eyeCoord); const GLfloat q = p0 + dist * (p1 + dist * p2); const GLfloat atten = (q != 0.0F) ? INV_SQRTF(q) : 1.0F; size[i][0] = pointSize * atten; /* clamping done in rasterization */ eyeCoord += eyeCoordStride; } VB->AttribPtr[_TNL_ATTRIB_POINTSIZE] = &store->PointSize; } return GL_TRUE; }
/** * Execute the given vertex program */ void _mesa_exec_vertex_program(GLcontext *ctx, const struct vertex_program *program) { struct vertex_program_state *state = &ctx->VertexProgram; const struct vp_instruction *inst; ctx->_CurrentProgram = GL_VERTEX_PROGRAM_ARB; /* or NV, doesn't matter */ /* If the program is position invariant, multiply the input * position and the MVP matrix and stick it into the output pos slot */ if (ctx->VertexProgram.Current->IsPositionInvariant) { TRANSFORM_POINT( ctx->VertexProgram.Outputs[0], ctx->_ModelProjectMatrix.m, ctx->VertexProgram.Inputs[0]); /* XXX: This could go elsewhere */ ctx->VertexProgram.Current->OutputsWritten |= 0x1; } for (inst = program->Instructions; /*inst->Opcode != VP_OPCODE_END*/; inst++) { if (ctx->VertexProgram.CallbackEnabled && ctx->VertexProgram.Callback) { ctx->VertexProgram.CurrentPosition = inst->StringPos; ctx->VertexProgram.Callback(program->Base.Target, ctx->VertexProgram.CallbackData); } switch (inst->Opcode) { case VP_OPCODE_MOV: { GLfloat t[4]; fetch_vector4( &inst->SrcReg[0], state, t ); store_vector4( &inst->DstReg, state, t ); } break; case VP_OPCODE_LIT: { const GLfloat epsilon = 1.0e-5F; /* XXX fix? */ GLfloat t[4], lit[4]; fetch_vector4( &inst->SrcReg[0], state, t ); if (t[3] < -(128.0F - epsilon)) t[3] = - (128.0F - epsilon); else if (t[3] > 128.0F - epsilon) t[3] = 128.0F - epsilon; if (t[0] < 0.0) t[0] = 0.0; if (t[1] < 0.0) t[1] = 0.0; lit[0] = 1.0; lit[1] = t[0]; lit[2] = (t[0] > 0.0) ? (GLfloat) exp(t[3] * log(t[1])) : 0.0F; lit[3] = 1.0; store_vector4( &inst->DstReg, state, lit ); } break; case VP_OPCODE_RCP: { GLfloat t[4]; fetch_vector1( &inst->SrcReg[0], state, t ); if (t[0] != 1.0F) t[0] = 1.0F / t[0]; /* div by zero is infinity! */ t[1] = t[2] = t[3] = t[0]; store_vector4( &inst->DstReg, state, t ); } break; case VP_OPCODE_RSQ: { GLfloat t[4]; fetch_vector1( &inst->SrcReg[0], state, t ); t[0] = INV_SQRTF(FABSF(t[0])); t[1] = t[2] = t[3] = t[0]; store_vector4( &inst->DstReg, state, t ); } break; case VP_OPCODE_EXP: { GLfloat t[4], q[4], floor_t0; fetch_vector1( &inst->SrcReg[0], state, t ); floor_t0 = (float) floor(t[0]); if (floor_t0 > FLT_MAX_EXP) { SET_POS_INFINITY(q[0]); SET_POS_INFINITY(q[2]); } else if (floor_t0 < FLT_MIN_EXP) { q[0] = 0.0F; q[2] = 0.0F; } else { #ifdef USE_IEEE GLint ii = (GLint) floor_t0; ii = (ii < 23) + 0x3f800000; SET_FLOAT_BITS(q[0], ii); q[0] = *((GLfloat *) &ii); #else q[0] = (GLfloat) pow(2.0, floor_t0); #endif q[2] = (GLfloat) (q[0] * LOG2(q[1])); } q[1] = t[0] - floor_t0; q[3] = 1.0F; store_vector4( &inst->DstReg, state, q ); } break; case VP_OPCODE_LOG: { GLfloat t[4], q[4], abs_t0; fetch_vector1( &inst->SrcReg[0], state, t ); abs_t0 = (GLfloat) fabs(t[0]); if (abs_t0 != 0.0F) { /* Since we really can't handle infinite values on VMS * like other OSes we'll use __MAXFLOAT to represent * infinity. This may need some tweaking. */ #ifdef VMS if (abs_t0 == __MAXFLOAT) #else if (IS_INF_OR_NAN(abs_t0)) #endif { SET_POS_INFINITY(q[0]); q[1] = 1.0F; SET_POS_INFINITY(q[2]); } else { int exponent; double mantissa = frexp(t[0], &exponent); q[0] = (GLfloat) (exponent - 1); q[1] = (GLfloat) (2.0 * mantissa); /* map [.5, 1) -> [1, 2) */ q[2] = (GLfloat) (q[0] + LOG2(q[1])); } } else { SET_NEG_INFINITY(q[0]); q[1] = 1.0F; SET_NEG_INFINITY(q[2]); } q[3] = 1.0; store_vector4( &inst->DstReg, state, q ); } break; case VP_OPCODE_MUL: { GLfloat t[4], u[4], prod[4]; fetch_vector4( &inst->SrcReg[0], state, t ); fetch_vector4( &inst->SrcReg[1], state, u ); prod[0] = t[0] * u[0]; prod[1] = t[1] * u[1]; prod[2] = t[2] * u[2]; prod[3] = t[3] * u[3]; store_vector4( &inst->DstReg, state, prod ); } break; case VP_OPCODE_ADD: { GLfloat t[4], u[4], sum[4]; fetch_vector4( &inst->SrcReg[0], state, t ); fetch_vector4( &inst->SrcReg[1], state, u ); sum[0] = t[0] + u[0]; sum[1] = t[1] + u[1]; sum[2] = t[2] + u[2]; sum[3] = t[3] + u[3]; store_vector4( &inst->DstReg, state, sum ); } break; case VP_OPCODE_DP3: { GLfloat t[4], u[4], dot[4]; fetch_vector4( &inst->SrcReg[0], state, t ); fetch_vector4( &inst->SrcReg[1], state, u ); dot[0] = t[0] * u[0] + t[1] * u[1] + t[2] * u[2]; dot[1] = dot[2] = dot[3] = dot[0]; store_vector4( &inst->DstReg, state, dot ); } break; case VP_OPCODE_DP4: { GLfloat t[4], u[4], dot[4]; fetch_vector4( &inst->SrcReg[0], state, t ); fetch_vector4( &inst->SrcReg[1], state, u ); dot[0] = t[0] * u[0] + t[1] * u[1] + t[2] * u[2] + t[3] * u[3]; dot[1] = dot[2] = dot[3] = dot[0]; store_vector4( &inst->DstReg, state, dot ); } break; case VP_OPCODE_DST: { GLfloat t[4], u[4], dst[4]; fetch_vector4( &inst->SrcReg[0], state, t ); fetch_vector4( &inst->SrcReg[1], state, u ); dst[0] = 1.0F; dst[1] = t[1] * u[1]; dst[2] = t[2]; dst[3] = u[3]; store_vector4( &inst->DstReg, state, dst ); } break; case VP_OPCODE_MIN: { GLfloat t[4], u[4], min[4]; fetch_vector4( &inst->SrcReg[0], state, t ); fetch_vector4( &inst->SrcReg[1], state, u ); min[0] = (t[0] < u[0]) ? t[0] : u[0]; min[1] = (t[1] < u[1]) ? t[1] : u[1]; min[2] = (t[2] < u[2]) ? t[2] : u[2]; min[3] = (t[3] < u[3]) ? t[3] : u[3]; store_vector4( &inst->DstReg, state, min ); } break; case VP_OPCODE_MAX: { GLfloat t[4], u[4], max[4]; fetch_vector4( &inst->SrcReg[0], state, t ); fetch_vector4( &inst->SrcReg[1], state, u ); max[0] = (t[0] > u[0]) ? t[0] : u[0]; max[1] = (t[1] > u[1]) ? t[1] : u[1]; max[2] = (t[2] > u[2]) ? t[2] : u[2]; max[3] = (t[3] > u[3]) ? t[3] : u[3]; store_vector4( &inst->DstReg, state, max ); } break; case VP_OPCODE_SLT: { GLfloat t[4], u[4], slt[4]; fetch_vector4( &inst->SrcReg[0], state, t ); fetch_vector4( &inst->SrcReg[1], state, u ); slt[0] = (t[0] < u[0]) ? 1.0F : 0.0F; slt[1] = (t[1] < u[1]) ? 1.0F : 0.0F; slt[2] = (t[2] < u[2]) ? 1.0F : 0.0F; slt[3] = (t[3] < u[3]) ? 1.0F : 0.0F; store_vector4( &inst->DstReg, state, slt ); } break; case VP_OPCODE_SGE: { GLfloat t[4], u[4], sge[4]; fetch_vector4( &inst->SrcReg[0], state, t ); fetch_vector4( &inst->SrcReg[1], state, u ); sge[0] = (t[0] >= u[0]) ? 1.0F : 0.0F; sge[1] = (t[1] >= u[1]) ? 1.0F : 0.0F; sge[2] = (t[2] >= u[2]) ? 1.0F : 0.0F; sge[3] = (t[3] >= u[3]) ? 1.0F : 0.0F; store_vector4( &inst->DstReg, state, sge ); } break; case VP_OPCODE_MAD: { GLfloat t[4], u[4], v[4], sum[4]; fetch_vector4( &inst->SrcReg[0], state, t ); fetch_vector4( &inst->SrcReg[1], state, u ); fetch_vector4( &inst->SrcReg[2], state, v ); sum[0] = t[0] * u[0] + v[0]; sum[1] = t[1] * u[1] + v[1]; sum[2] = t[2] * u[2] + v[2]; sum[3] = t[3] * u[3] + v[3]; store_vector4( &inst->DstReg, state, sum ); } break; case VP_OPCODE_ARL: { GLfloat t[4]; fetch_vector4( &inst->SrcReg[0], state, t ); state->AddressReg[0] = (GLint) floor(t[0]); } break; case VP_OPCODE_DPH: { GLfloat t[4], u[4], dot[4]; fetch_vector4( &inst->SrcReg[0], state, t ); fetch_vector4( &inst->SrcReg[1], state, u ); dot[0] = t[0] * u[0] + t[1] * u[1] + t[2] * u[2] + u[3]; dot[1] = dot[2] = dot[3] = dot[0]; store_vector4( &inst->DstReg, state, dot ); } break; case VP_OPCODE_RCC: { GLfloat t[4], u; fetch_vector1( &inst->SrcReg[0], state, t ); if (t[0] == 1.0F) u = 1.0F; else u = 1.0F / t[0]; if (u > 0.0F) { if (u > 1.884467e+019F) { u = 1.884467e+019F; /* IEEE 32-bit binary value 0x5F800000 */ } else if (u < 5.42101e-020F) { u = 5.42101e-020F; /* IEEE 32-bit binary value 0x1F800000 */ } } else { if (u < -1.884467e+019F) { u = -1.884467e+019F; /* IEEE 32-bit binary value 0xDF800000 */ } else if (u > -5.42101e-020F) { u = -5.42101e-020F; /* IEEE 32-bit binary value 0x9F800000 */ } } t[0] = t[1] = t[2] = t[3] = u; store_vector4( &inst->DstReg, state, t ); } break; case VP_OPCODE_SUB: /* GL_NV_vertex_program1_1 */ { GLfloat t[4], u[4], sum[4]; fetch_vector4( &inst->SrcReg[0], state, t ); fetch_vector4( &inst->SrcReg[1], state, u ); sum[0] = t[0] - u[0]; sum[1] = t[1] - u[1]; sum[2] = t[2] - u[2]; sum[3] = t[3] - u[3]; store_vector4( &inst->DstReg, state, sum ); } break; case VP_OPCODE_ABS: /* GL_NV_vertex_program1_1 */ { GLfloat t[4]; fetch_vector4( &inst->SrcReg[0], state, t ); if (t[0] < 0.0) t[0] = -t[0]; if (t[1] < 0.0) t[1] = -t[1]; if (t[2] < 0.0) t[2] = -t[2]; if (t[3] < 0.0) t[3] = -t[3]; store_vector4( &inst->DstReg, state, t ); } break; case VP_OPCODE_FLR: /* GL_ARB_vertex_program */ { GLfloat t[4]; fetch_vector4( &inst->SrcReg[0], state, t ); t[0] = FLOORF(t[0]); t[1] = FLOORF(t[1]); t[2] = FLOORF(t[2]); t[3] = FLOORF(t[3]); store_vector4( &inst->DstReg, state, t ); } break; case VP_OPCODE_FRC: /* GL_ARB_vertex_program */ { GLfloat t[4]; fetch_vector4( &inst->SrcReg[0], state, t ); t[0] = t[0] - FLOORF(t[0]); t[1] = t[1] - FLOORF(t[1]); t[2] = t[2] - FLOORF(t[2]); t[3] = t[3] - FLOORF(t[3]); store_vector4( &inst->DstReg, state, t ); } break; case VP_OPCODE_EX2: /* GL_ARB_vertex_program */ { GLfloat t[4]; fetch_vector1( &inst->SrcReg[0], state, t ); t[0] = t[1] = t[2] = t[3] = (GLfloat)_mesa_pow(2.0, t[0]); store_vector4( &inst->DstReg, state, t ); } break; case VP_OPCODE_LG2: /* GL_ARB_vertex_program */ { GLfloat t[4]; fetch_vector1( &inst->SrcReg[0], state, t ); t[0] = t[1] = t[2] = t[3] = LOG2(t[0]); store_vector4( &inst->DstReg, state, t ); } break; case VP_OPCODE_POW: /* GL_ARB_vertex_program */ { GLfloat t[4], u[4]; fetch_vector1( &inst->SrcReg[0], state, t ); fetch_vector1( &inst->SrcReg[1], state, u ); t[0] = t[1] = t[2] = t[3] = (GLfloat)_mesa_pow(t[0], u[0]); store_vector4( &inst->DstReg, state, t ); } break; case VP_OPCODE_XPD: /* GL_ARB_vertex_program */ { GLfloat t[4], u[4], cross[4]; fetch_vector4( &inst->SrcReg[0], state, t ); fetch_vector4( &inst->SrcReg[1], state, u ); cross[0] = t[1] * u[2] - t[2] * u[1]; cross[1] = t[2] * u[0] - t[0] * u[2]; cross[2] = t[0] * u[1] - t[1] * u[0]; store_vector4( &inst->DstReg, state, cross ); } break; case VP_OPCODE_SWZ: /* GL_ARB_vertex_program */ { const struct vp_src_register *source = &inst->SrcReg[0]; const GLfloat *src = get_register_pointer(source, state); GLfloat result[4]; GLuint i; /* do extended swizzling here */ for (i = 0; i < 3; i++) { if (source->Swizzle[i] == SWIZZLE_ZERO) result[i] = 0.0; else if (source->Swizzle[i] == SWIZZLE_ONE) result[i] = -1.0; else result[i] = -src[source->Swizzle[i]]; if (source->Negate) result[i] = -result[i]; } store_vector4( &inst->DstReg, state, result ); } break; case VP_OPCODE_END: ctx->_CurrentProgram = 0; return; default: /* bad instruction opcode */ _mesa_problem(ctx, "Bad VP Opcode in _mesa_exec_vertex_program"); ctx->_CurrentProgram = 0; return; } /* switch */ } /* for */ ctx->_CurrentProgram = 0; }