/** * Check that element 'j' of the array has reasonable data. * Map VBO if needed. * For debugging purposes; not normally used. */ static void check_array_data(struct gl_context *ctx, struct gl_client_array *array, GLuint attrib, GLuint j) { if (array->Enabled) { const void *data = array->Ptr; if (_mesa_is_bufferobj(array->BufferObj)) { if (!array->BufferObj->Pointer) { /* need to map now */ array->BufferObj->Pointer = ctx->Driver.MapBufferRange(ctx, 0, array->BufferObj->Size, GL_MAP_READ_BIT, array->BufferObj); } data = ADD_POINTERS(data, array->BufferObj->Pointer); } switch (array->Type) { case GL_FLOAT: { GLfloat *f = (GLfloat *) ((GLubyte *) data + array->StrideB * j); GLint k; for (k = 0; k < array->Size; k++) { if (IS_INF_OR_NAN(f[k]) || f[k] >= 1.0e20 || f[k] <= -1.0e10) { printf("Bad array data:\n"); printf(" Element[%u].%u = %f\n", j, k, f[k]); printf(" Array %u at %p\n", attrib, (void* ) array); printf(" Type 0x%x, Size %d, Stride %d\n", array->Type, array->Size, array->Stride); printf(" Address/offset %p in Buffer Object %u\n", array->Ptr, array->BufferObj->Name); f[k] = 1.0; /* XXX replace the bad value! */ } /*assert(!IS_INF_OR_NAN(f[k]));*/ } } break; default: ; } } }
/** * Use a hashtable to attempt to identify recently-emitted vertices * and avoid re-emitting them. */ static GLuint elt(struct copy_context *copy, GLuint elt_idx) { GLuint elt = copy->srcelt[elt_idx]; GLuint slot = elt & (ELT_TABLE_SIZE-1); /* printf("elt %d\n", elt); */ /* Look up the incoming element in the vertex cache. Re-emit if * necessary. */ if (copy->vert_cache[slot].in != elt) { GLubyte *csr = copy->dstptr; GLuint i; /* printf(" --> emit to dstelt %d\n", copy->dstbuf_nr); */ for (i = 0; i < copy->nr_varying; i++) { const struct gl_client_array *srcarray = copy->varying[i].array; const GLubyte *srcptr = copy->varying[i].src_ptr + elt * srcarray->StrideB; memcpy(csr, srcptr, copy->varying[i].size); csr += copy->varying[i].size; #ifdef NAN_CHECK if (srcarray->Type == GL_FLOAT) { GLuint k; GLfloat *f = (GLfloat *) srcptr; for (k = 0; k < srcarray->Size; k++) { assert(!IS_INF_OR_NAN(f[k])); assert(f[k] <= 1.0e20 && f[k] >= -1.0e20); } } #endif if (0) { const GLuint *f = (const GLuint *)srcptr; GLuint j; printf(" varying %d: ", i); for(j = 0; j < copy->varying[i].size / 4; j++) printf("%x ", f[j]); printf("\n"); } } copy->vert_cache[slot].in = elt; copy->vert_cache[slot].out = copy->dstbuf_nr++; copy->dstptr += copy->vertex_size; assert(csr == copy->dstptr); assert(copy->dstptr == (copy->dstbuf + copy->dstbuf_nr * copy->vertex_size)); } /* else */ /* printf(" --> reuse vertex\n"); */ /* printf(" --> emit %d\n", copy->vert_cache[slot].out); */ copy->dstelt[copy->dstelt_nr++] = copy->vert_cache[slot].out; return check_flush(copy); }
/** Check for NaNs and very large values */ static inline void check_float(float x) { assert(!IS_INF_OR_NAN(x)); assert(1.0e-15 <= x && x <= 1.0e15); }
/** * Execute the given vertex program */ void _mesa_exec_vertex_program(GLcontext *ctx, const struct vertex_program *program) { struct vertex_program_state *state = &ctx->VertexProgram; const struct vp_instruction *inst; ctx->_CurrentProgram = GL_VERTEX_PROGRAM_ARB; /* or NV, doesn't matter */ /* If the program is position invariant, multiply the input * position and the MVP matrix and stick it into the output pos slot */ if (ctx->VertexProgram.Current->IsPositionInvariant) { TRANSFORM_POINT( ctx->VertexProgram.Outputs[0], ctx->_ModelProjectMatrix.m, ctx->VertexProgram.Inputs[0]); /* XXX: This could go elsewhere */ ctx->VertexProgram.Current->OutputsWritten |= 0x1; } for (inst = program->Instructions; /*inst->Opcode != VP_OPCODE_END*/; inst++) { if (ctx->VertexProgram.CallbackEnabled && ctx->VertexProgram.Callback) { ctx->VertexProgram.CurrentPosition = inst->StringPos; ctx->VertexProgram.Callback(program->Base.Target, ctx->VertexProgram.CallbackData); } switch (inst->Opcode) { case VP_OPCODE_MOV: { GLfloat t[4]; fetch_vector4( &inst->SrcReg[0], state, t ); store_vector4( &inst->DstReg, state, t ); } break; case VP_OPCODE_LIT: { const GLfloat epsilon = 1.0e-5F; /* XXX fix? */ GLfloat t[4], lit[4]; fetch_vector4( &inst->SrcReg[0], state, t ); if (t[3] < -(128.0F - epsilon)) t[3] = - (128.0F - epsilon); else if (t[3] > 128.0F - epsilon) t[3] = 128.0F - epsilon; if (t[0] < 0.0) t[0] = 0.0; if (t[1] < 0.0) t[1] = 0.0; lit[0] = 1.0; lit[1] = t[0]; lit[2] = (t[0] > 0.0) ? (GLfloat) exp(t[3] * log(t[1])) : 0.0F; lit[3] = 1.0; store_vector4( &inst->DstReg, state, lit ); } break; case VP_OPCODE_RCP: { GLfloat t[4]; fetch_vector1( &inst->SrcReg[0], state, t ); if (t[0] != 1.0F) t[0] = 1.0F / t[0]; /* div by zero is infinity! */ t[1] = t[2] = t[3] = t[0]; store_vector4( &inst->DstReg, state, t ); } break; case VP_OPCODE_RSQ: { GLfloat t[4]; fetch_vector1( &inst->SrcReg[0], state, t ); t[0] = INV_SQRTF(FABSF(t[0])); t[1] = t[2] = t[3] = t[0]; store_vector4( &inst->DstReg, state, t ); } break; case VP_OPCODE_EXP: { GLfloat t[4], q[4], floor_t0; fetch_vector1( &inst->SrcReg[0], state, t ); floor_t0 = (float) floor(t[0]); if (floor_t0 > FLT_MAX_EXP) { SET_POS_INFINITY(q[0]); SET_POS_INFINITY(q[2]); } else if (floor_t0 < FLT_MIN_EXP) { q[0] = 0.0F; q[2] = 0.0F; } else { #ifdef USE_IEEE GLint ii = (GLint) floor_t0; ii = (ii < 23) + 0x3f800000; SET_FLOAT_BITS(q[0], ii); q[0] = *((GLfloat *) &ii); #else q[0] = (GLfloat) pow(2.0, floor_t0); #endif q[2] = (GLfloat) (q[0] * LOG2(q[1])); } q[1] = t[0] - floor_t0; q[3] = 1.0F; store_vector4( &inst->DstReg, state, q ); } break; case VP_OPCODE_LOG: { GLfloat t[4], q[4], abs_t0; fetch_vector1( &inst->SrcReg[0], state, t ); abs_t0 = (GLfloat) fabs(t[0]); if (abs_t0 != 0.0F) { /* Since we really can't handle infinite values on VMS * like other OSes we'll use __MAXFLOAT to represent * infinity. This may need some tweaking. */ #ifdef VMS if (abs_t0 == __MAXFLOAT) #else if (IS_INF_OR_NAN(abs_t0)) #endif { SET_POS_INFINITY(q[0]); q[1] = 1.0F; SET_POS_INFINITY(q[2]); } else { int exponent; double mantissa = frexp(t[0], &exponent); q[0] = (GLfloat) (exponent - 1); q[1] = (GLfloat) (2.0 * mantissa); /* map [.5, 1) -> [1, 2) */ q[2] = (GLfloat) (q[0] + LOG2(q[1])); } } else { SET_NEG_INFINITY(q[0]); q[1] = 1.0F; SET_NEG_INFINITY(q[2]); } q[3] = 1.0; store_vector4( &inst->DstReg, state, q ); } break; case VP_OPCODE_MUL: { GLfloat t[4], u[4], prod[4]; fetch_vector4( &inst->SrcReg[0], state, t ); fetch_vector4( &inst->SrcReg[1], state, u ); prod[0] = t[0] * u[0]; prod[1] = t[1] * u[1]; prod[2] = t[2] * u[2]; prod[3] = t[3] * u[3]; store_vector4( &inst->DstReg, state, prod ); } break; case VP_OPCODE_ADD: { GLfloat t[4], u[4], sum[4]; fetch_vector4( &inst->SrcReg[0], state, t ); fetch_vector4( &inst->SrcReg[1], state, u ); sum[0] = t[0] + u[0]; sum[1] = t[1] + u[1]; sum[2] = t[2] + u[2]; sum[3] = t[3] + u[3]; store_vector4( &inst->DstReg, state, sum ); } break; case VP_OPCODE_DP3: { GLfloat t[4], u[4], dot[4]; fetch_vector4( &inst->SrcReg[0], state, t ); fetch_vector4( &inst->SrcReg[1], state, u ); dot[0] = t[0] * u[0] + t[1] * u[1] + t[2] * u[2]; dot[1] = dot[2] = dot[3] = dot[0]; store_vector4( &inst->DstReg, state, dot ); } break; case VP_OPCODE_DP4: { GLfloat t[4], u[4], dot[4]; fetch_vector4( &inst->SrcReg[0], state, t ); fetch_vector4( &inst->SrcReg[1], state, u ); dot[0] = t[0] * u[0] + t[1] * u[1] + t[2] * u[2] + t[3] * u[3]; dot[1] = dot[2] = dot[3] = dot[0]; store_vector4( &inst->DstReg, state, dot ); } break; case VP_OPCODE_DST: { GLfloat t[4], u[4], dst[4]; fetch_vector4( &inst->SrcReg[0], state, t ); fetch_vector4( &inst->SrcReg[1], state, u ); dst[0] = 1.0F; dst[1] = t[1] * u[1]; dst[2] = t[2]; dst[3] = u[3]; store_vector4( &inst->DstReg, state, dst ); } break; case VP_OPCODE_MIN: { GLfloat t[4], u[4], min[4]; fetch_vector4( &inst->SrcReg[0], state, t ); fetch_vector4( &inst->SrcReg[1], state, u ); min[0] = (t[0] < u[0]) ? t[0] : u[0]; min[1] = (t[1] < u[1]) ? t[1] : u[1]; min[2] = (t[2] < u[2]) ? t[2] : u[2]; min[3] = (t[3] < u[3]) ? t[3] : u[3]; store_vector4( &inst->DstReg, state, min ); } break; case VP_OPCODE_MAX: { GLfloat t[4], u[4], max[4]; fetch_vector4( &inst->SrcReg[0], state, t ); fetch_vector4( &inst->SrcReg[1], state, u ); max[0] = (t[0] > u[0]) ? t[0] : u[0]; max[1] = (t[1] > u[1]) ? t[1] : u[1]; max[2] = (t[2] > u[2]) ? t[2] : u[2]; max[3] = (t[3] > u[3]) ? t[3] : u[3]; store_vector4( &inst->DstReg, state, max ); } break; case VP_OPCODE_SLT: { GLfloat t[4], u[4], slt[4]; fetch_vector4( &inst->SrcReg[0], state, t ); fetch_vector4( &inst->SrcReg[1], state, u ); slt[0] = (t[0] < u[0]) ? 1.0F : 0.0F; slt[1] = (t[1] < u[1]) ? 1.0F : 0.0F; slt[2] = (t[2] < u[2]) ? 1.0F : 0.0F; slt[3] = (t[3] < u[3]) ? 1.0F : 0.0F; store_vector4( &inst->DstReg, state, slt ); } break; case VP_OPCODE_SGE: { GLfloat t[4], u[4], sge[4]; fetch_vector4( &inst->SrcReg[0], state, t ); fetch_vector4( &inst->SrcReg[1], state, u ); sge[0] = (t[0] >= u[0]) ? 1.0F : 0.0F; sge[1] = (t[1] >= u[1]) ? 1.0F : 0.0F; sge[2] = (t[2] >= u[2]) ? 1.0F : 0.0F; sge[3] = (t[3] >= u[3]) ? 1.0F : 0.0F; store_vector4( &inst->DstReg, state, sge ); } break; case VP_OPCODE_MAD: { GLfloat t[4], u[4], v[4], sum[4]; fetch_vector4( &inst->SrcReg[0], state, t ); fetch_vector4( &inst->SrcReg[1], state, u ); fetch_vector4( &inst->SrcReg[2], state, v ); sum[0] = t[0] * u[0] + v[0]; sum[1] = t[1] * u[1] + v[1]; sum[2] = t[2] * u[2] + v[2]; sum[3] = t[3] * u[3] + v[3]; store_vector4( &inst->DstReg, state, sum ); } break; case VP_OPCODE_ARL: { GLfloat t[4]; fetch_vector4( &inst->SrcReg[0], state, t ); state->AddressReg[0] = (GLint) floor(t[0]); } break; case VP_OPCODE_DPH: { GLfloat t[4], u[4], dot[4]; fetch_vector4( &inst->SrcReg[0], state, t ); fetch_vector4( &inst->SrcReg[1], state, u ); dot[0] = t[0] * u[0] + t[1] * u[1] + t[2] * u[2] + u[3]; dot[1] = dot[2] = dot[3] = dot[0]; store_vector4( &inst->DstReg, state, dot ); } break; case VP_OPCODE_RCC: { GLfloat t[4], u; fetch_vector1( &inst->SrcReg[0], state, t ); if (t[0] == 1.0F) u = 1.0F; else u = 1.0F / t[0]; if (u > 0.0F) { if (u > 1.884467e+019F) { u = 1.884467e+019F; /* IEEE 32-bit binary value 0x5F800000 */ } else if (u < 5.42101e-020F) { u = 5.42101e-020F; /* IEEE 32-bit binary value 0x1F800000 */ } } else { if (u < -1.884467e+019F) { u = -1.884467e+019F; /* IEEE 32-bit binary value 0xDF800000 */ } else if (u > -5.42101e-020F) { u = -5.42101e-020F; /* IEEE 32-bit binary value 0x9F800000 */ } } t[0] = t[1] = t[2] = t[3] = u; store_vector4( &inst->DstReg, state, t ); } break; case VP_OPCODE_SUB: /* GL_NV_vertex_program1_1 */ { GLfloat t[4], u[4], sum[4]; fetch_vector4( &inst->SrcReg[0], state, t ); fetch_vector4( &inst->SrcReg[1], state, u ); sum[0] = t[0] - u[0]; sum[1] = t[1] - u[1]; sum[2] = t[2] - u[2]; sum[3] = t[3] - u[3]; store_vector4( &inst->DstReg, state, sum ); } break; case VP_OPCODE_ABS: /* GL_NV_vertex_program1_1 */ { GLfloat t[4]; fetch_vector4( &inst->SrcReg[0], state, t ); if (t[0] < 0.0) t[0] = -t[0]; if (t[1] < 0.0) t[1] = -t[1]; if (t[2] < 0.0) t[2] = -t[2]; if (t[3] < 0.0) t[3] = -t[3]; store_vector4( &inst->DstReg, state, t ); } break; case VP_OPCODE_FLR: /* GL_ARB_vertex_program */ { GLfloat t[4]; fetch_vector4( &inst->SrcReg[0], state, t ); t[0] = FLOORF(t[0]); t[1] = FLOORF(t[1]); t[2] = FLOORF(t[2]); t[3] = FLOORF(t[3]); store_vector4( &inst->DstReg, state, t ); } break; case VP_OPCODE_FRC: /* GL_ARB_vertex_program */ { GLfloat t[4]; fetch_vector4( &inst->SrcReg[0], state, t ); t[0] = t[0] - FLOORF(t[0]); t[1] = t[1] - FLOORF(t[1]); t[2] = t[2] - FLOORF(t[2]); t[3] = t[3] - FLOORF(t[3]); store_vector4( &inst->DstReg, state, t ); } break; case VP_OPCODE_EX2: /* GL_ARB_vertex_program */ { GLfloat t[4]; fetch_vector1( &inst->SrcReg[0], state, t ); t[0] = t[1] = t[2] = t[3] = (GLfloat)_mesa_pow(2.0, t[0]); store_vector4( &inst->DstReg, state, t ); } break; case VP_OPCODE_LG2: /* GL_ARB_vertex_program */ { GLfloat t[4]; fetch_vector1( &inst->SrcReg[0], state, t ); t[0] = t[1] = t[2] = t[3] = LOG2(t[0]); store_vector4( &inst->DstReg, state, t ); } break; case VP_OPCODE_POW: /* GL_ARB_vertex_program */ { GLfloat t[4], u[4]; fetch_vector1( &inst->SrcReg[0], state, t ); fetch_vector1( &inst->SrcReg[1], state, u ); t[0] = t[1] = t[2] = t[3] = (GLfloat)_mesa_pow(t[0], u[0]); store_vector4( &inst->DstReg, state, t ); } break; case VP_OPCODE_XPD: /* GL_ARB_vertex_program */ { GLfloat t[4], u[4], cross[4]; fetch_vector4( &inst->SrcReg[0], state, t ); fetch_vector4( &inst->SrcReg[1], state, u ); cross[0] = t[1] * u[2] - t[2] * u[1]; cross[1] = t[2] * u[0] - t[0] * u[2]; cross[2] = t[0] * u[1] - t[1] * u[0]; store_vector4( &inst->DstReg, state, cross ); } break; case VP_OPCODE_SWZ: /* GL_ARB_vertex_program */ { const struct vp_src_register *source = &inst->SrcReg[0]; const GLfloat *src = get_register_pointer(source, state); GLfloat result[4]; GLuint i; /* do extended swizzling here */ for (i = 0; i < 3; i++) { if (source->Swizzle[i] == SWIZZLE_ZERO) result[i] = 0.0; else if (source->Swizzle[i] == SWIZZLE_ONE) result[i] = -1.0; else result[i] = -src[source->Swizzle[i]]; if (source->Negate) result[i] = -result[i]; } store_vector4( &inst->DstReg, state, result ); } break; case VP_OPCODE_END: ctx->_CurrentProgram = 0; return; default: /* bad instruction opcode */ _mesa_problem(ctx, "Bad VP Opcode in _mesa_exec_vertex_program"); ctx->_CurrentProgram = 0; return; } /* switch */ } /* for */ ctx->_CurrentProgram = 0; }