struct dynfn *tnl_makeX86Attr4ubv( TNLcontext *tnl, int key ) { struct dynfn *dfn = MALLOC_STRUCT( dynfn ); insert_at_head( &tnl->dfn_cache.Color4ubv, dfn ); dfn->key = key; if (TNL_DEBUG & DEBUG_CODEGEN) _mesa_debug(NULL, "%s 0x%08x\n", __FUNCTION__, key ); if (key & TNL_CP_VC_FRMT_PKCOLOR) { static char temp[] = { 0x8b, 0x44, 0x24, 0x04, /* mov 0x4(%esp,1),%eax */ 0xba, 0x78, 0x56, 0x34, 0x12, /* mov $DEST,%edx */ 0x8b, 0x00, /* mov (%eax),%eax */ 0x89, 0x02, /* mov %eax,(%edx) */ 0xc3, /* ret */ }; dfn->code = ALIGN_MALLOC( sizeof(temp), 16 ); memcpy (dfn->code, temp, sizeof(temp)); FIXUP(dfn->code, 5, 0x12345678, (int)tnl->ubytecolorptr); return dfn; } else { static char temp[] = { 0x53, /* push %ebx */ 0xba, 0x00, 0x00, 0x00, 0x00, /* mov $0x0,%edx */ 0x31, 0xc0, /* xor %eax,%eax */ 0x31, 0xc9, /* xor %ecx,%ecx */ 0x8b, 0x5c, 0x24, 0x08, /* mov 0x8(%esp,1), %ebx */ 0x8b, 0x1b, /* mov (%ebx), %ebx */ 0x88, 0xd8, /* mov %bl, %al */ 0x88, 0xf9, /* mov %bh, %cl */ 0x8b, 0x04, 0x82, /* mov (%edx,%eax,4),%eax */ 0x8b, 0x0c, 0x8a, /* mov (%edx,%ecx,4),%ecx */ 0xa3, 0xaf, 0xbe, 0xad, 0xde, /* mov %eax,0xdeadbeaf */ 0x89, 0x0d, 0xaf, 0xbe, 0xad, 0xde, /* mov %ecx,0xdeadbeaf */ 0x31, 0xc0, /* xor %eax,%eax */ 0x31, 0xc9, /* xor %ecx,%ecx */ 0xc1, 0xeb, 0x10, /* shr $0x10, %ebx */ 0x88, 0xd8, /* mov %bl, %al */ 0x88, 0xf9, /* mov %bh, %cl */ 0x8b, 0x04, 0x82, /* mov (%edx,%eax,4),%eax */ 0x8b, 0x0c, 0x8a, /* mov (%edx,%ecx,4),%ecx */ 0xa3, 0xaf, 0xbe, 0xad, 0xde, /* mov %eax,0xdeadbeaf */ 0x89, 0x0d, 0xaf, 0xbe, 0xad, 0xde, /* mov %ecx,0xdeadbeaf */ 0x5b, /* pop %ebx */ 0xc3, /* ret */ }; dfn->code = ALIGN_MALLOC( sizeof(temp), 16 ); memcpy (dfn->code, temp, sizeof(temp)); FIXUP(dfn->code, 2, 0x00000000, (int)_mesa_ubyte_to_float_color_tab); FIXUP(dfn->code, 27, 0xdeadbeaf, (int)tnl->floatcolorptr); FIXUP(dfn->code, 33, 0xdeadbeaf, (int)tnl->floatcolorptr+4); FIXUP(dfn->code, 55, 0xdeadbeaf, (int)tnl->floatcolorptr+8); FIXUP(dfn->code, 61, 0xdeadbeaf, (int)tnl->floatcolorptr+12); return dfn; } }
struct dynfn *tnl_makeX86Attr3f( TNLcontext *tnl, int key ) { static char temp[] = { 0xba, 0x78, 0x56, 0x34, 0x12, /* mov $DEST,%edx */ 0x8b, 0x44, 0x24, 0x04, /* mov 0x4(%esp,1),%eax */ 0x89, 0x02, /* mov %eax,(%edx) */ 0x8b, 0x44, 0x24, 0x08, /* mov 0x8(%esp,1),%eax */ 0x89, 0x42, 0x04, /* mov %eax,0x4(%edx) */ 0x8b, 0x44, 0x24, 0x0c, /* mov 0xc(%esp,1),%eax */ 0x89, 0x42, 0x08, /* mov %eax,0x8(%edx) */ 0xc3, /* ret */ }; struct dynfn *dfn = MALLOC_STRUCT( dynfn ); if (TNL_DEBUG & DEBUG_CODEGEN) _mesa_debug(NULL, "%s 0x%08x\n", __FUNCTION__, key ); insert_at_head( &tnl->dfn_cache.Normal3f, dfn ); dfn->key = key; dfn->code = ALIGN_MALLOC( sizeof(temp), 16 ); memcpy (dfn->code, temp, sizeof(temp)); FIXUP(dfn->code, 1, 0x12345678, (int)tnl->normalptr); return dfn; }
static void do_import( struct vertex_buffer *VB, struct gl_client_array *to, struct gl_client_array *from ) { GLuint count = VB->Count; if (!to->Ptr) { to->Ptr = ALIGN_MALLOC( VB->Size * 4 * sizeof(GLubyte), 32 ); to->Type = GL_UNSIGNED_BYTE; } /* No need to transform the same value 3000 times. */ if (!from->StrideB) { to->StrideB = 0; count = 1; } else to->StrideB = 4 * sizeof(GLubyte); _math_trans_4ub( (GLubyte (*)[4]) to->Ptr, from->Ptr, from->StrideB, from->Type, from->Size, 0, count); }
static struct dynfn *makeSSENormal3fv( GLcontext *ctx, const int *key ) { /* Requires P4 (sse2?) */ static unsigned char temp[] = { 0x8b, 0x44, 0x24, 0x04, /* mov 0x4(%esp,1),%eax */ 0xba, 0x78, 0x56, 0x34, 0x12, /* mov $0x12345678,%edx */ 0xf3, 0x0f, 0x7e, 0x00, /* movq (%eax),%xmm0 */ 0x66, 0x0f, 0x6e, 0x48, 0x08, /* movd 0x8(%eax),%xmm1 */ 0x66, 0x0f, 0xd6, 0x42, 0x0c, /* movq %xmm0,0xc(%edx) */ 0x66, 0x0f, 0x7e, 0x4a, 0x14, /* movd %xmm1,0x14(%edx) */ 0xc3, /* ret */ }; struct dynfn *dfn = MALLOC_STRUCT( dynfn ); r200ContextPtr rmesa = R200_CONTEXT(ctx); insert_at_head( &rmesa->vb.dfn_cache.Normal3fv, dfn ); dfn->key[0] = key[0]; dfn->key[1] = key[1]; dfn->code = ALIGN_MALLOC( sizeof(temp), 16 ); memcpy (dfn->code, temp, sizeof(temp)); FIXUP(dfn->code, 5, 0x0, (int)vb.normalptr); return dfn; }
/** * Called the first time stage->run is called. In effect, don't * allocate data until the first time the stage is run. */ static GLboolean init_vp( GLcontext *ctx, struct tnl_pipeline_stage *stage ) { TNLcontext *tnl = TNL_CONTEXT(ctx); struct vertex_buffer *VB = &(tnl->vb); struct vp_stage_data *store; const GLuint size = VB->Size; GLuint i; stage->privatePtr = MALLOC(sizeof(*store)); store = VP_STAGE_DATA(stage); if (!store) return GL_FALSE; /* Allocate arrays of vertex output values */ for (i = 0; i < 15; i++) { _mesa_vector4f_alloc( &store->attribs[i], 0, size, 32 ); store->attribs[i].size = 4; } /* a few other misc allocations */ _mesa_vector4f_alloc( &store->ndcCoords, 0, size, 32 ); store->clipmask = (GLubyte *) ALIGN_MALLOC(sizeof(GLubyte)*size, 32 ); return GL_TRUE; }
struct dynfn *tnl_makeX86Vertex2f( TNLcontext *tnl, int key ) { struct dynfn *dfn = MALLOC_STRUCT( dynfn ); if (RADEON_DEBUG & DEBUG_CODEGEN) _mesa_debug(NULL, "%s 0x%08x\n", __FUNCTION__, key ); switch (tnl->vertex_size) { default: { /* Repz convenient as it's possible to emit code for any size * vertex with little tweaking. Might as well read vertsize * though, and have only one of these. */ static char temp[] = { 0x57, /* push %edi */ 0x56, /* push %esi */ 0xbe, 0, 0, 0, 0, /* mov $VERTEX+2,%esi */ 0x8b, 0x3d, 0, 0, 0, 0, /* mov DMAPTR,%edi */ 0x8b, 0x44, 0x24, 0x0c, /* mov 0x0c(%esp,1),%eax */ 0x8b, 0x54, 0x24, 0x10, /* mov 0x10(%esp,1),%edx */ 0x89, 0x07, /* mov %eax,(%edi) */ 0x89, 0x57, 0x04, /* mov %edx,0x4(%edi) */ 0x83, 0xc7, 0x08, /* add $0x8,%edi */ 0xb9, 0, 0, 0, 0, /* mov $VERTSIZE-2,%ecx */ 0xf3, 0xa5, /* repz movsl %ds:(%esi),%es:(%edi)*/ 0xa1, 0, 0, 0, 0, /* mov COUNTER,%eax */ 0x89, 0x3d, 0, 0, 0, 0, /* mov %edi,DMAPTR */ 0x48, /* dec %eax */ 0xa3, 0, 0, 0, 0, /* mov %eax,COUNTER */ 0x5e, /* pop %esi */ 0x5f, /* pop %edi */ 0x74, 0x01, /* je +1 */ 0xc3, /* ret */ 0xff, 0x25, 0, 0, 0, 0 /* jmp NOTIFY */ }; dfn->code = ALIGN_MALLOC( sizeof(temp), 16 ); memcpy (dfn->code, temp, sizeof(temp)); FIXUP(dfn->code, 3, 0x0, (int)&tnl->vertex[2]); FIXUP(dfn->code, 9, 0x0, (int)&tnl->dmaptr); FIXUP(dfn->code, 37, 0x0, tnl->vertex_size-2); FIXUP(dfn->code, 44, 0x0, (int)&tnl->counter); FIXUP(dfn->code, 50, 0x0, (int)&tnl->dmaptr); FIXUP(dfn->code, 56, 0x0, (int)&tnl->counter); FIXUP(dfn->code, 67, 0x0, (int)&tnl->notify); break; } } insert_at_head( &tnl->dfn_cache.Vertex3f, dfn ); dfn->key = key; return dfn; }
void fxAllocVB( GLcontext *ctx ) { fxMesaContext fxMesa = FX_CONTEXT(ctx); GLuint size = TNL_CONTEXT(ctx)->vb.Size; static int firsttime = 1; if (firsttime) { init_setup_tab(); firsttime = 0; } fxMesa->verts = (GrVertex *)ALIGN_MALLOC(size * sizeof(GrVertex), 32); fxMesa->SetupIndex = SETUP_XYZW|SETUP_RGBA; }
void tdfxInitVB( GLcontext *ctx ) { tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx); GLuint size = TNL_CONTEXT(ctx)->vb.Size; static int firsttime = 1; if (firsttime) { init_setup_tab(); firsttime = 0; } fxMesa->verts = ALIGN_MALLOC(size * sizeof(tdfxVertex), 32); fxMesa->vertexFormat = TDFX_LAYOUT_TINY; fxMesa->SetupIndex = TDFX_XYZ_BIT|TDFX_RGBA_BIT; }
void ffbInitVB( GLcontext *ctx ) { ffbContextPtr fmesa = FFB_CONTEXT(ctx); GLuint size = TNL_CONTEXT(ctx)->vb.Size; fmesa->verts = (ffb_vertex *)ALIGN_MALLOC(size * sizeof(ffb_vertex), 32); { static int firsttime = 1; if (firsttime) { init_setup_tab(); firsttime = 0; } } }
void mach64InitVB( GLcontext *ctx ) { mach64ContextPtr mmesa = MACH64_CONTEXT(ctx); GLuint size = TNL_CONTEXT(ctx)->vb.Size; mmesa->verts = (GLubyte *)ALIGN_MALLOC(size * 4 * 16, 32); { static int firsttime = 1; if (firsttime) { init_setup_tab(); firsttime = 0; } } }
void i830InitVB( GLcontext *ctx ) { i830ContextPtr imesa = I830_CONTEXT(ctx); GLuint size = TNL_CONTEXT(ctx)->vb.Size; imesa->verts = (char *)ALIGN_MALLOC(size * 4 * 16, 32); { static int firsttime = 1; if (firsttime) { init_setup_tab(); firsttime = 0; } } }
void gammaInitVB( GLcontext *ctx ) { gammaContextPtr gmesa = GAMMA_CONTEXT(ctx); GLuint size = TNL_CONTEXT(ctx)->vb.Size; gmesa->verts = (GLubyte *)ALIGN_MALLOC(size * 4 * 16, 32); { static int firsttime = 1; if (firsttime) { init_setup_tab(); firsttime = 0; gmesa->vertex_size = 16; /* FIXME - only one vertex setup */ } } }
void s3vInitVB( GLcontext *ctx ) { s3vContextPtr vmesa = S3V_CONTEXT(ctx); GLuint size = TNL_CONTEXT(ctx)->vb.Size; vmesa->verts = (char *)ALIGN_MALLOC(size * 4 * 16, 32); { static int firsttime = 1; if (firsttime) { init_setup_tab(); firsttime = 0; vmesa->vertex_stride_shift = 6 /* 4 */; /* FIXME - only one vertex setup */ } } }
static struct immediate *real_alloc_immediate( GLcontext *ctx ) { struct immediate *IM = ALIGN_MALLOC_STRUCT( immediate, 32 ); GLuint j; if (!IM) return 0; /* memset(IM, 0, sizeof(*IM)); */ IM->id = id++; IM->ref_count = 0; IM->backref = ctx; IM->FlushElt = 0; IM->LastPrimitive = IMM_MAX_COPIED_VERTS; IM->Count = IMM_MAX_COPIED_VERTS; IM->Start = IMM_MAX_COPIED_VERTS; IM->Material = 0; IM->MaterialMask = 0; IM->MaxTextureUnits = ctx->Const.MaxTextureUnits; IM->TexSize = 0; IM->NormalLengthPtr = 0; IM->CopyTexSize = 0; IM->CopyStart = IM->Start; /* TexCoord0 is special. */ IM->TexCoord[0] = IM->TexCoord0; for (j = 1; j < ctx->Const.MaxTextureUnits; j++) { IM->TexCoord[j] = (GLfloat (*)[4]) ALIGN_MALLOC( IMM_SIZE * sizeof(GLfloat) * 4, 32 ); } /* KW: Removed initialization of normals as these are now treated * identically to all other data types. */ MEMSET(IM->Flag, 0, sizeof(IM->Flag)); MEMSET(IM->Normal, 0.0 , sizeof(IM->Normal)); return IM; }
void vbo_exec_vtx_init( struct vbo_exec_context *exec ) { GLcontext *ctx = exec->ctx; struct vbo_context *vbo = vbo_context(ctx); GLuint i; /* Allocate a buffer object. Will just reuse this object * continuously, unless vbo_use_buffer_objects() is called to enable * use of real VBOs. */ _mesa_reference_buffer_object(ctx, &exec->vtx.bufferobj, ctx->Shared->NullBufferObj); ASSERT(!exec->vtx.buffer_map); exec->vtx.buffer_map = (GLfloat *)ALIGN_MALLOC(VBO_VERT_BUFFER_SIZE, 64); exec->vtx.buffer_ptr = exec->vtx.buffer_map; vbo_exec_vtxfmt_init( exec ); /* Hook our functions into the dispatch table. */ _mesa_install_exec_vtxfmt( exec->ctx, &exec->vtxfmt ); for (i = 0 ; i < VBO_ATTRIB_MAX ; i++) { ASSERT(i < Elements(exec->vtx.attrsz)); exec->vtx.attrsz[i] = 0; ASSERT(i < Elements(exec->vtx.active_sz)); exec->vtx.active_sz[i] = 0; } for (i = 0 ; i < VERT_ATTRIB_MAX; i++) { ASSERT(i < Elements(exec->vtx.inputs)); ASSERT(i < Elements(exec->vtx.arrays)); exec->vtx.inputs[i] = &exec->vtx.arrays[i]; } { struct gl_client_array *arrays = exec->vtx.arrays; memcpy(arrays, vbo->legacy_currval, 16 * sizeof(arrays[0])); memcpy(arrays + 16, vbo->generic_currval, 16 * sizeof(arrays[0])); } exec->vtx.vertex_size = 0; }
/* Install the codegen'ed choosers. * We should keep a list and free them in the end... */ void _tnl_x86choosers( tnl_attrfv_func (*choose)[4], tnl_attrfv_func (*do_choose)( GLuint attr, GLuint sz )) { int attr, size; for (attr = 0; attr < _TNL_MAX_ATTR_CODEGEN; attr++) { for (size = 0; size < 4; size++) { char *code; char *start = (char *)&_tnl_x86_choose_fv; char *end = (char *)&_tnl_x86_choose_fv_end; int offset = 0; code = ALIGN_MALLOC( end - start, 16 ); memcpy (code, start, end - start); FIXUP(code, 0, 0, attr); FIXUP(code, 0, 1, size + 1); FIXUPREL(code, 0, 2, do_choose); choose[attr][size] = (tnl_attrfv_func)code; } } }
void mgaInitVB( GLcontext *ctx ) { mgaContextPtr mmesa = MGA_CONTEXT(ctx); GLuint size = TNL_CONTEXT(ctx)->vb.Size; mmesa->verts = (char *)ALIGN_MALLOC(size * sizeof(mgaVertex), 32); { static int firsttime = 1; if (firsttime) { init_setup_tab(); firsttime = 0; } } mmesa->new_state |= MGA_NEW_WARP; mmesa->dirty |= MGA_UPLOAD_PIPE; mmesa->vertex_format = setup_tab[0].vertex_format; mmesa->vertex_size = setup_tab[0].vertex_size; mmesa->vertex_stride_shift = setup_tab[0].vertex_stride_shift; }
struct dynfn *tnl_makeX86Attr4ub( TNLcontext *tnl, int key ) { if (TNL_DEBUG & DEBUG_CODEGEN) _mesa_debug(NULL, "%s 0x%08x\n", __FUNCTION__, key ); if (key & TNL_CP_VC_FRMT_PKCOLOR) { /* XXX push/pop */ static char temp[] = { 0x53, /* push %ebx */ 0x8b, 0x44, 0x24, 0x08, /* mov 0x8(%esp,1),%eax */ 0x8b, 0x54, 0x24, 0x0c, /* mov 0xc(%esp,1),%edx */ 0x8b, 0x4c, 0x24, 0x10, /* mov 0x10(%esp,1),%ecx */ 0x8b, 0x5c, 0x24, 0x14, /* mov 0x14(%esp,1),%ebx */ 0xa2, 0, 0, 0, 0, /* mov %al,DEST */ 0x88, 0x15, 0, 0, 0, 0, /* mov %dl,DEST+1 */ 0x88, 0x0d, 0, 0, 0, 0, /* mov %cl,DEST+2 */ 0x88, 0x1d, 0, 0, 0, 0, /* mov %bl,DEST+3 */ 0x5b, /* pop %ebx */ 0xc3, /* ret */ }; struct dynfn *dfn = MALLOC_STRUCT( dynfn ); insert_at_head( &tnl->dfn_cache.Color4ub, dfn ); dfn->key = key; dfn->code = ALIGN_MALLOC( sizeof(temp), 16 ); memcpy (dfn->code, temp, sizeof(temp)); FIXUP(dfn->code, 18, 0x0, (int)tnl->ubytecolorptr); FIXUP(dfn->code, 24, 0x0, (int)tnl->ubytecolorptr+1); FIXUP(dfn->code, 30, 0x0, (int)tnl->ubytecolorptr+2); FIXUP(dfn->code, 36, 0x0, (int)tnl->ubytecolorptr+3); return dfn; } else return 0; }
struct dynfn *tnl_makeX86Vertex3fv( TNLcontext *tnl, int key ) { struct dynfn *dfn = MALLOC_STRUCT( dynfn ); if (TNL_DEBUG & DEBUG_CODEGEN) _mesa_debug(NULL, "%s 0x%08x\n", __FUNCTION__, key ); switch (tnl->vertex_size) { case 6: { static char temp[] = { 0xa1, 0x00, 0x00, 0, 0, /* mov 0x0,%eax */ 0x8b, 0x4c, 0x24, 0x04, /* mov 0x4(%esp,1),%ecx */ 0x8b, 0x11, /* mov (%ecx),%edx */ 0x89, 0x10, /* mov %edx,(%eax) */ 0x8b, 0x51, 0x04, /* mov 0x4(%ecx),%edx */ 0x8b, 0x49, 0x08, /* mov 0x8(%ecx),%ecx */ 0x89, 0x50, 0x04, /* mov %edx,0x4(%eax) */ 0x89, 0x48, 0x08, /* mov %ecx,0x8(%eax) */ 0x8b, 0x15, 0x1c, 0, 0, 0, /* mov 0x1c,%edx */ 0x8b, 0x0d, 0x20, 0, 0, 0, /* mov 0x20,%ecx */ 0x89, 0x50, 0x0c, /* mov %edx,0xc(%eax) */ 0x89, 0x48, 0x10, /* mov %ecx,0x10(%eax) */ 0x8b, 0x15, 0x24, 0, 0, 0, /* mov 0x24,%edx */ 0x89, 0x50, 0x14, /* mov %edx,0x14(%eax) */ 0x83, 0xc0, 0x18, /* add $0x18,%eax */ 0xa3, 0x00, 0x00, 0, 0, /* mov %eax,0x0 */ 0xa1, 0x04, 0x00, 0, 0, /* mov 0x4,%eax */ 0x48, /* dec %eax */ 0xa3, 0x04, 0x00, 0, 0, /* mov %eax,0x4 */ 0x74, 0x01, /* je 2a4 <.f11> */ 0xc3, /* ret */ 0xff, 0x25, 0x08, 0, 0, 0, /* jmp *0x8 */ }; dfn->code = ALIGN_MALLOC( sizeof(temp), 16 ); memcpy (dfn->code, temp, sizeof(temp)); FIXUP(dfn->code, 1, 0x00000000, (int)&tnl->dmaptr); FIXUP(dfn->code, 27, 0x0000001c, (int)&tnl->vertex[3]); FIXUP(dfn->code, 33, 0x00000020, (int)&tnl->vertex[4]); FIXUP(dfn->code, 45, 0x00000024, (int)&tnl->vertex[5]); FIXUP(dfn->code, 56, 0x00000000, (int)&tnl->dmaptr); FIXUP(dfn->code, 61, 0x00000004, (int)&tnl->counter); FIXUP(dfn->code, 67, 0x00000004, (int)&tnl->counter); FIXUP(dfn->code, 76, 0x00000008, (int)&tnl->notify); break; } case 8: { static char temp[] = { 0xa1, 0x00, 0x00, 0, 0, /* mov 0x0,%eax */ 0x8b, 0x4c, 0x24, 0x04, /* mov 0x4(%esp,1),%ecx */ 0x8b, 0x11, /* mov (%ecx),%edx */ 0x89, 0x10, /* mov %edx,(%eax) */ 0x8b, 0x51, 0x04, /* mov 0x4(%ecx),%edx */ 0x8b, 0x49, 0x08, /* mov 0x8(%ecx),%ecx */ 0x89, 0x50, 0x04, /* mov %edx,0x4(%eax) */ 0x89, 0x48, 0x08, /* mov %ecx,0x8(%eax) */ 0x8b, 0x15, 0x1c, 0, 0, 0, /* mov 0x1c,%edx */ 0x8b, 0x0d, 0x20, 0, 0, 0, /* mov 0x20,%ecx */ 0x89, 0x50, 0x0c, /* mov %edx,0xc(%eax) */ 0x89, 0x48, 0x10, /* mov %ecx,0x10(%eax) */ 0x8b, 0x15, 0x1c, 0, 0, 0, /* mov 0x1c,%edx */ 0x8b, 0x0d, 0x20, 0, 0, 0, /* mov 0x20,%ecx */ 0x89, 0x50, 0x14, /* mov %edx,0x14(%eax) */ 0x89, 0x48, 0x18, /* mov %ecx,0x18(%eax) */ 0x8b, 0x15, 0x24, 0, 0, 0, /* mov 0x24,%edx */ 0x89, 0x50, 0x1c, /* mov %edx,0x1c(%eax) */ 0x83, 0xc0, 0x20, /* add $0x20,%eax */ 0xa3, 0x00, 0x00, 0, 0, /* mov %eax,0x0 */ 0xa1, 0x04, 0x00, 0, 0, /* mov 0x4,%eax */ 0x48, /* dec %eax */ 0xa3, 0x04, 0x00, 0, 0, /* mov %eax,0x4 */ 0x74, 0x01, /* je 2a4 <.f11> */ 0xc3, /* ret */ 0xff, 0x25, 0x08, 0, 0, 0, /* jmp *0x8 */ }; dfn->code = ALIGN_MALLOC( sizeof(temp), 16 ); memcpy (dfn->code, temp, sizeof(temp)); FIXUP(dfn->code, 1, 0x00000000, (int)&tnl->dmaptr); FIXUP(dfn->code, 27, 0x0000001c, (int)&tnl->vertex[3]); FIXUP(dfn->code, 33, 0x00000020, (int)&tnl->vertex[4]); FIXUP(dfn->code, 45, 0x0000001c, (int)&tnl->vertex[5]); FIXUP(dfn->code, 51, 0x00000020, (int)&tnl->vertex[6]); FIXUP(dfn->code, 63, 0x00000024, (int)&tnl->vertex[7]); FIXUP(dfn->code, 74, 0x00000000, (int)&tnl->dmaptr); FIXUP(dfn->code, 79, 0x00000004, (int)&tnl->counter); FIXUP(dfn->code, 85, 0x00000004, (int)&tnl->counter); FIXUP(dfn->code, 94, 0x00000008, (int)&tnl->notify); break; } default: { /* Repz convenient as it's possible to emit code for any size * vertex with little tweaking. Might as well read vertsize * though, and have only one of these. */ static char temp[] = { 0x8b, 0x54, 0x24, 0x04, /* mov 0x4(%esp,1),%edx */ 0x57, /* push %edi */ 0x56, /* push %esi */ 0x8b, 0x3d, 1,1,1,1, /* mov DMAPTR,%edi */ 0x8b, 0x02, /* mov (%edx),%eax */ 0x8b, 0x4a, 0x04, /* mov 0x4(%edx),%ecx */ 0x8b, 0x72, 0x08, /* mov 0x8(%edx),%esi */ 0x89, 0x07, /* mov %eax,(%edi) */ 0x89, 0x4f, 0x04, /* mov %ecx,0x4(%edi) */ 0x89, 0x77, 0x08, /* mov %esi,0x8(%edi) */ 0x83, 0xc7, 0x0c, /* add $0xc,%edi */ 0xb9, 0x06, 0x00, 0x00, 0x00, /* mov $VERTSIZE-3,%ecx */ 0xbe, 0x58, 0x00, 0x00, 0x00, /* mov $VERTEX[3],%esi */ 0xf3, 0xa5, /* repz movsl %ds:(%esi),%es:(%edi)*/ 0x89, 0x3d, 1, 1, 1, 1, /* mov %edi,DMAPTR */ 0xa1, 2, 2, 2, 2, /* mov COUNTER,%eax */ 0x5e, /* pop %esi */ 0x5f, /* pop %edi */ 0x48, /* dec %eax */ 0xa3, 2, 2, 2, 2, /* mov %eax,COUNTER */ 0x74, 0x01, /* je +1 */ 0xc3, /* ret */ 0xff, 0x25, 0, 0, 0, 0 /* jmp NOTIFY */ }; dfn->code = ALIGN_MALLOC( sizeof(temp), 16 ); memcpy (dfn->code, temp, sizeof(temp)); FIXUP(dfn->code, 8, 0x01010101, (int)&tnl->dmaptr); FIXUP(dfn->code, 32, 0x00000006, tnl->vertex_size-3); FIXUP(dfn->code, 37, 0x00000058, (int)&tnl->vertex[3]); FIXUP(dfn->code, 45, 0x01010101, (int)&tnl->dmaptr); FIXUP(dfn->code, 50, 0x02020202, (int)&tnl->counter); FIXUP(dfn->code, 58, 0x02020202, (int)&tnl->counter); FIXUP(dfn->code, 67, 0x0, (int)&tnl->notify); break; } } insert_at_head( &tnl->dfn_cache.Vertex3fv, dfn ); dfn->key = key; return dfn; }
static int test_norm_function( normal_func func, int mtype, int masked, long *cycles ) { GLvector3f source[1], dest[1], dest2[1], ref[1], ref2[1]; GLmatrix mat[1]; GLfloat s[TEST_COUNT][5], d[TEST_COUNT][3], r[TEST_COUNT][3]; GLfloat d2[TEST_COUNT][3], r2[TEST_COUNT][3], length[TEST_COUNT]; GLfloat scale; GLfloat *m; GLubyte mask[TEST_COUNT]; int i, j; #ifdef RUN_XFORM_BENCHMARK int cycle_i; /* the counter for the benchmarks we run */ #endif (void) cycles; mat->m = (GLfloat *) ALIGN_MALLOC( 16 * sizeof(GLfloat), 16 ); mat->inv = m = mat->m; m[0] = 63.0; m[4] = 43.0; m[ 8] = 29.0; m[12] = 43.0; m[1] = 55.0; m[5] = 17.0; m[ 9] = 31.0; m[13] = 7.0; m[2] = 44.0; m[6] = 9.0; m[10] = 7.0; m[14] = 3.0; m[3] = 11.0; m[7] = 23.0; m[11] = 91.0; m[15] = 9.0; scale = 1.0F + rnd () * norm_scale_types[mtype]; for ( i = 0 ; i < 4 ; i++ ) { for ( j = 0 ; j < 4 ; j++ ) { switch ( norm_templates[mtype][i * 4 + j] ) { case NIL: m[j * 4 + i] = 0.0; break; case ONE: m[j * 4 + i] = 1.0; break; case NEG: m[j * 4 + i] = -1.0; break; case VAR: break; default: abort(); } } } for ( i = 0 ; i < TEST_COUNT ; i++ ) { mask[i] = i % 2; /* mask every 2nd element */ d[i][0] = s[i][0] = d2[i][0] = 0.0; d[i][1] = s[i][1] = d2[i][1] = 0.0; d[i][2] = s[i][2] = d2[i][2] = 0.0; for ( j = 0 ; j < 3 ; j++ ) s[i][j] = rnd(); length[i] = 1 / sqrt( s[i][0]*s[i][0] + s[i][1]*s[i][1] + s[i][2]*s[i][2] ); } source->data = (GLfloat(*)[3])s; source->start = (GLfloat *)s; source->count = TEST_COUNT; source->stride = sizeof(s[0]); source->flags = 0; dest->data = (GLfloat(*)[3])d; dest->start = (GLfloat *)d; dest->count = TEST_COUNT; dest->stride = sizeof(float[3]); dest->flags = 0; dest2->data = (GLfloat(*)[3])d2; dest2->start = (GLfloat *)d2; dest2->count = TEST_COUNT; dest2->stride = sizeof(float[3]); dest2->flags = 0; ref->data = (GLfloat(*)[3])r; ref->start = (GLfloat *)r; ref->count = TEST_COUNT; ref->stride = sizeof(float[3]); ref->flags = 0; ref2->data = (GLfloat(*)[3])r2; ref2->start = (GLfloat *)r2; ref2->count = TEST_COUNT; ref2->stride = sizeof(float[3]); ref2->flags = 0; if ( norm_normalize_types[mtype] == 0 ) { ref_norm_transform_rescale( mat, scale, source, NULL, NULL, ref ); } else { ref_norm_transform_normalize( mat, scale, source, NULL, NULL, ref ); ref_norm_transform_normalize( mat, scale, source, length, NULL, ref2 ); } if ( mesa_profile ) { if ( masked ) { BEGIN_RACE( *cycles ); func( mat, scale, source, NULL, mask, dest ); END_RACE( *cycles ); func( mat, scale, source, length, mask, dest2 ); } else { BEGIN_RACE( *cycles ); func( mat, scale, source, NULL, NULL, dest ); END_RACE( *cycles ); func( mat, scale, source, length, NULL, dest2 ); } } else { if ( masked ) { func( mat, scale, source, NULL, mask, dest ); func( mat, scale, source, length, mask, dest2 ); } else { func( mat, scale, source, NULL, NULL, dest ); func( mat, scale, source, length, NULL, dest2 ); } } for ( i = 0 ; i < TEST_COUNT ; i++ ) { if ( masked && !(mask[i] & 1) ) continue; for ( j = 0 ; j < 3 ; j++ ) { if ( significand_match( d[i][j], r[i][j] ) < REQUIRED_PRECISION ) { printf( "-----------------------------\n" ); printf( "(i = %i, j = %i)\n", i, j ); printf( "%f \t %f \t [ratio = %e - %i bit missed]\n", d[i][0], r[i][0], r[i][0]/d[i][0], MAX_PRECISION - significand_match( d[i][0], r[i][0] ) ); printf( "%f \t %f \t [ratio = %e - %i bit missed]\n", d[i][1], r[i][1], r[i][1]/d[i][1], MAX_PRECISION - significand_match( d[i][1], r[i][1] ) ); printf( "%f \t %f \t [ratio = %e - %i bit missed]\n", d[i][2], r[i][2], r[i][2]/d[i][2], MAX_PRECISION - significand_match( d[i][2], r[i][2] ) ); return 0; } if ( norm_normalize_types[mtype] != 0 ) { if ( significand_match( d2[i][j], r2[i][j] ) < REQUIRED_PRECISION ) { printf( "------------------- precalculated length case ------\n" ); printf( "(i = %i, j = %i)\n", i, j ); printf( "%f \t %f \t [ratio = %e - %i bit missed]\n", d2[i][0], r2[i][0], r2[i][0]/d2[i][0], MAX_PRECISION - significand_match( d2[i][0], r2[i][0] ) ); printf( "%f \t %f \t [ratio = %e - %i bit missed]\n", d2[i][1], r2[i][1], r2[i][1]/d2[i][1], MAX_PRECISION - significand_match( d2[i][1], r2[i][1] ) ); printf( "%f \t %f \t [ratio = %e - %i bit missed]\n", d2[i][2], r2[i][2], r2[i][2]/d2[i][2], MAX_PRECISION - significand_match( d2[i][2], r2[i][2] ) ); return 0; } } } } ALIGN_FREE( mat->m ); return 1; }
static int test_transform_function( transform_func func, int psize, int mtype, int masked, long *cycles ) { GLvector4f source[1], dest[1], ref[1]; GLmatrix mat[1]; GLfloat *m; GLubyte mask[TEST_COUNT]; int i, j; #ifdef RUN_XFORM_BENCHMARK int cycle_i; /* the counter for the benchmarks we run */ #endif (void) cycles; if ( psize > 4 ) { gl_problem( NULL, "test_transform_function called with psize > 4\n" ); return 0; } mat->m = (GLfloat *) ALIGN_MALLOC( 16 * sizeof(GLfloat), 16 ); mat->type = mtypes[mtype]; m = mat->m; m[0] = 63.0; m[4] = 43.0; m[ 8] = 29.0; m[12] = 43.0; m[1] = 55.0; m[5] = 17.0; m[ 9] = 31.0; m[13] = 7.0; m[2] = 44.0; m[6] = 9.0; m[10] = 7.0; m[14] = 3.0; m[3] = 11.0; m[7] = 23.0; m[11] = 91.0; m[15] = 9.0; for ( i = 0 ; i < 4 ; i++ ) { for ( j = 0 ; j < 4 ; j++ ) { switch ( templates[mtype][i * 4 + j] ) { case NIL: m[j * 4 + i] = 0.0; break; case ONE: m[j * 4 + i] = 1.0; break; case NEG: m[j * 4 + i] = -1.0; break; case VAR: break; default: abort(); } } } for ( i = 0 ; i < TEST_COUNT ; i++) { mask[i] = i % 2; /* mask every 2nd element */ d[i][0] = s[i][0] = 0.0; d[i][1] = s[i][1] = 0.0; d[i][2] = s[i][2] = 0.0; d[i][3] = s[i][3] = 1.0; for ( j = 0 ; j < psize ; j++ ) s[i][j] = rnd(); } source->data = (GLfloat(*)[4])s; source->start = (GLfloat *)s; source->count = TEST_COUNT; source->stride = sizeof(s[0]); source->size = 4; source->flags = 0; dest->data = (GLfloat(*)[4])d; dest->start = (GLfloat *)d; dest->count = TEST_COUNT; dest->stride = sizeof(float[4]); dest->size = 0; dest->flags = 0; ref->data = (GLfloat(*)[4])r; ref->start = (GLfloat *)r; ref->count = TEST_COUNT; ref->stride = sizeof(float[4]); ref->size = 0; ref->flags = 0; ref_transform( ref, mat, source, NULL, 0 ); if ( mesa_profile ) { if ( masked ) { BEGIN_RACE( *cycles ); func( dest, mat->m, source, mask, 1 ); END_RACE( *cycles ); } else { BEGIN_RACE( *cycles ); func( dest, mat->m, source, NULL, 0 ); END_RACE( *cycles ); } } else { if ( masked ) { func( dest, mat->m, source, mask, 1 ); } else { func( dest, mat->m, source, NULL, 0 ); } } for ( i = 0 ; i < TEST_COUNT ; i++ ) { if ( masked && (mask[i] & 1) ) continue; for ( j = 0 ; j < 4 ; j++ ) { if ( significand_match( d[i][j], r[i][j] ) < REQUIRED_PRECISION ) { printf( "-----------------------------\n" ); printf( "(i = %i, j = %i)\n", i, j ); printf( "%f \t %f \t [diff = %e - %i bit missed]\n", d[i][0], r[i][0], r[i][0]-d[i][0], MAX_PRECISION - significand_match( d[i][0], r[i][0] ) ); printf( "%f \t %f \t [diff = %e - %i bit missed]\n", d[i][1], r[i][1], r[i][1]-d[i][1], MAX_PRECISION - significand_match( d[i][1], r[i][1] ) ); printf( "%f \t %f \t [diff = %e - %i bit missed]\n", d[i][2], r[i][2], r[i][2]-d[i][2], MAX_PRECISION - significand_match( d[i][2], r[i][2] ) ); printf( "%f \t %f \t [diff = %e - %i bit missed]\n", d[i][3], r[i][3], r[i][3]-d[i][3], MAX_PRECISION - significand_match( d[i][3], r[i][3] ) ); return 0; } } } ALIGN_FREE( mat->m ); return 1; }
static int test_transform_function( transform_func func, int psize, int mtype, unsigned long *cycles ) { GLvector4f source[1], dest[1], ref[1]; GLmatrix mat[1]; GLfloat *m; int i, j; #ifdef RUN_DEBUG_BENCHMARK int cycle_i; /* the counter for the benchmarks we run */ #endif (void) cycles; if ( psize > 4 ) { _mesa_problem( NULL, "test_transform_function called with psize > 4\n" ); return 0; } mat->m = (GLfloat *) ALIGN_MALLOC( 16 * sizeof(GLfloat), 16 ); mat->type = mtypes[mtype]; m = mat->m; ASSERT( ((long)m & 15) == 0 ); init_matrix( m ); for ( i = 0 ; i < 4 ; i++ ) { for ( j = 0 ; j < 4 ; j++ ) { switch ( templates[mtype][i * 4 + j] ) { case NIL: m[j * 4 + i] = 0.0; break; case ONE: m[j * 4 + i] = 1.0; break; case NEG: m[j * 4 + i] = -1.0; break; case VAR: break; default: ASSERT(0); return 0; } } } for ( i = 0 ; i < TEST_COUNT ; i++) { ASSIGN_4V( d[i], 0.0, 0.0, 0.0, 1.0 ); ASSIGN_4V( s[i], 0.0, 0.0, 0.0, 1.0 ); for ( j = 0 ; j < psize ; j++ ) s[i][j] = rnd(); } source->data = (GLfloat(*)[4])s; source->start = (GLfloat *)s; source->count = TEST_COUNT; source->stride = sizeof(s[0]); source->size = 4; source->flags = 0; dest->data = (GLfloat(*)[4])d; dest->start = (GLfloat *)d; dest->count = TEST_COUNT; dest->stride = sizeof(float[4]); dest->size = 0; dest->flags = 0; ref->data = (GLfloat(*)[4])r; ref->start = (GLfloat *)r; ref->count = TEST_COUNT; ref->stride = sizeof(float[4]); ref->size = 0; ref->flags = 0; ref_transform( ref, mat, source ); if ( mesa_profile ) { BEGIN_RACE( *cycles ); func( dest, mat->m, source ); END_RACE( *cycles ); } else { func( dest, mat->m, source ); } for ( i = 0 ; i < TEST_COUNT ; i++ ) { for ( j = 0 ; j < 4 ; j++ ) { if ( significand_match( d[i][j], r[i][j] ) < REQUIRED_PRECISION ) { _mesa_printf("-----------------------------\n" ); _mesa_printf("(i = %i, j = %i)\n", i, j ); _mesa_printf("%f \t %f \t [diff = %e - %i bit missed]\n", d[i][0], r[i][0], r[i][0]-d[i][0], MAX_PRECISION - significand_match( d[i][0], r[i][0] ) ); _mesa_printf("%f \t %f \t [diff = %e - %i bit missed]\n", d[i][1], r[i][1], r[i][1]-d[i][1], MAX_PRECISION - significand_match( d[i][1], r[i][1] ) ); _mesa_printf("%f \t %f \t [diff = %e - %i bit missed]\n", d[i][2], r[i][2], r[i][2]-d[i][2], MAX_PRECISION - significand_match( d[i][2], r[i][2] ) ); _mesa_printf("%f \t %f \t [diff = %e - %i bit missed]\n", d[i][3], r[i][3], r[i][3]-d[i][3], MAX_PRECISION - significand_match( d[i][3], r[i][3] ) ); return 0; } } } ALIGN_FREE( mat->m ); return 1; }
/* Create the device specific context. */ GLboolean mach64CreateContext( const __GLcontextModes *glVisual, __DRIcontextPrivate *driContextPriv, void *sharedContextPrivate ) { GLcontext *ctx, *shareCtx; __DRIscreenPrivate *driScreen = driContextPriv->driScreenPriv; struct dd_function_table functions; mach64ContextPtr mmesa; mach64ScreenPtr mach64Screen; int i, heap; GLuint *c_textureSwapsPtr = NULL; #if DO_DEBUG MACH64_DEBUG = driParseDebugString(getenv("MACH64_DEBUG"), debug_control); #endif /* Allocate the mach64 context */ mmesa = (mach64ContextPtr) CALLOC( sizeof(*mmesa) ); if ( !mmesa ) return GL_FALSE; /* Init default driver functions then plug in our Mach64-specific functions * (the texture functions are especially important) */ _mesa_init_driver_functions( &functions ); mach64InitDriverFuncs( &functions ); mach64InitIoctlFuncs( &functions ); mach64InitTextureFuncs( &functions ); /* Allocate the Mesa context */ if (sharedContextPrivate) shareCtx = ((mach64ContextPtr) sharedContextPrivate)->glCtx; else shareCtx = NULL; mmesa->glCtx = _mesa_create_context(glVisual, shareCtx, &functions, (void *)mmesa); if (!mmesa->glCtx) { FREE(mmesa); return GL_FALSE; } driContextPriv->driverPrivate = mmesa; ctx = mmesa->glCtx; mmesa->driContext = driContextPriv; mmesa->driScreen = driScreen; mmesa->driDrawable = NULL; mmesa->hHWContext = driContextPriv->hHWContext; mmesa->driHwLock = &driScreen->pSAREA->lock; mmesa->driFd = driScreen->fd; mach64Screen = mmesa->mach64Screen = (mach64ScreenPtr)driScreen->private; /* Parse configuration files */ driParseConfigFiles (&mmesa->optionCache, &mach64Screen->optionCache, mach64Screen->driScreen->myNum, "mach64"); mmesa->sarea = (drm_mach64_sarea_t *)((char *)driScreen->pSAREA + sizeof(drm_sarea_t)); mmesa->CurrentTexObj[0] = NULL; mmesa->CurrentTexObj[1] = NULL; (void) memset( mmesa->texture_heaps, 0, sizeof( mmesa->texture_heaps ) ); make_empty_list( &mmesa->swapped ); mmesa->firstTexHeap = mach64Screen->firstTexHeap; mmesa->lastTexHeap = mach64Screen->firstTexHeap + mach64Screen->numTexHeaps; for ( i = mmesa->firstTexHeap ; i < mmesa->lastTexHeap ; i++ ) { mmesa->texture_heaps[i] = driCreateTextureHeap( i, mmesa, mach64Screen->texSize[i], 6, /* align to 64-byte boundary, use 12 for page-size boundary */ MACH64_NR_TEX_REGIONS, (drmTextureRegionPtr)mmesa->sarea->tex_list[i], &mmesa->sarea->tex_age[i], &mmesa->swapped, sizeof( mach64TexObj ), (destroy_texture_object_t *) mach64DestroyTexObj ); #if ENABLE_PERF_BOXES c_textureSwapsPtr = & mmesa->c_textureSwaps; #endif driSetTextureSwapCounterLocation( mmesa->texture_heaps[i], c_textureSwapsPtr ); } mmesa->RenderIndex = -1; /* Impossible value */ mmesa->vert_buf = NULL; mmesa->num_verts = 0; mmesa->new_state = MACH64_NEW_ALL; mmesa->dirty = MACH64_UPLOAD_ALL; /* Set the maximum texture size small enough that we can * guarentee that both texture units can bind a maximal texture * and have them both in memory (on-card or AGP) at once. * Test for 2 textures * bytes/texel * size * size. There's no * need to account for mipmaps since we only upload one level. */ ctx->Const.MaxTextureUnits = 2; ctx->Const.MaxTextureImageUnits = 2; ctx->Const.MaxTextureCoordUnits = 2; heap = mach64Screen->IsPCI ? MACH64_CARD_HEAP : MACH64_AGP_HEAP; driCalculateMaxTextureLevels( & mmesa->texture_heaps[heap], 1, & ctx->Const, mach64Screen->cpp, 10, /* max 2D texture size is 1024x1024 */ 0, /* 3D textures unsupported. */ 0, /* cube textures unsupported. */ 0, /* texture rectangles unsupported. */ 1, /* mipmapping unsupported. */ GL_TRUE, /* need to have both textures in either local or AGP memory */ 0 ); #if ENABLE_PERF_BOXES mmesa->boxes = ( getenv( "LIBGL_PERFORMANCE_BOXES" ) != NULL ); #endif /* Allocate the vertex buffer */ mmesa->vert_buf = ALIGN_MALLOC(MACH64_BUFFER_SIZE, 32); if ( !mmesa->vert_buf ) return GL_FALSE; mmesa->vert_used = 0; mmesa->vert_total = MACH64_BUFFER_SIZE; /* Initialize the software rasterizer and helper modules. */ _swrast_CreateContext( ctx ); _vbo_CreateContext( ctx ); _tnl_CreateContext( ctx ); _swsetup_CreateContext( ctx ); /* Install the customized pipeline: */ /* _tnl_destroy_pipeline( ctx ); */ /* _tnl_install_pipeline( ctx, mach64_pipeline ); */ /* Configure swrast and T&L to match hardware characteristics: */ _swrast_allow_pixel_fog( ctx, GL_FALSE ); _swrast_allow_vertex_fog( ctx, GL_TRUE ); _tnl_allow_pixel_fog( ctx, GL_FALSE ); _tnl_allow_vertex_fog( ctx, GL_TRUE ); driInitExtensions( ctx, card_extensions, GL_TRUE ); mach64InitVB( ctx ); mach64InitTriFuncs( ctx ); mach64DDInitStateFuncs( ctx ); mach64DDInitSpanFuncs( ctx ); mach64DDInitState( mmesa ); mmesa->do_irqs = (mmesa->mach64Screen->irq && !getenv("MACH64_NO_IRQS")); mmesa->vblank_flags = (mmesa->do_irqs) ? driGetDefaultVBlankFlags(&mmesa->optionCache) : VBLANK_FLAG_NO_IRQ; driContextPriv->driverPrivate = (void *)mmesa; if (driQueryOptionb(&mmesa->optionCache, "no_rast")) { fprintf(stderr, "disabling 3D acceleration\n"); FALLBACK(mmesa, MACH64_FALLBACK_DISABLE, 1); } return GL_TRUE; }
static int test_norm_function( normal_func func, int mtype, long *cycles ) { GLvector4f source[1], dest[1], dest2[1], ref[1], ref2[1]; GLmatrix mat[1]; GLfloat s[TEST_COUNT][5], d[TEST_COUNT][4], r[TEST_COUNT][4]; GLfloat d2[TEST_COUNT][4], r2[TEST_COUNT][4], length[TEST_COUNT]; GLfloat scale; GLfloat *m; int i, j; #ifdef RUN_DEBUG_BENCHMARK int cycle_i; /* the counter for the benchmarks we run */ #endif (void) cycles; mat->m = (GLfloat *) ALIGN_MALLOC( 16 * sizeof(GLfloat), 16 ); mat->inv = m = mat->m; init_matrix( m ); scale = 1.0F + rnd () * norm_scale_types[mtype]; for ( i = 0 ; i < 4 ; i++ ) { for ( j = 0 ; j < 4 ; j++ ) { switch ( norm_templates[mtype][i * 4 + j] ) { case NIL: m[j * 4 + i] = 0.0; break; case ONE: m[j * 4 + i] = 1.0; break; case NEG: m[j * 4 + i] = -1.0; break; case VAR: break; default: _mesa_exit(1); } } } for ( i = 0 ; i < TEST_COUNT ; i++ ) { ASSIGN_3V( d[i], 0.0, 0.0, 0.0 ); ASSIGN_3V( s[i], 0.0, 0.0, 0.0 ); ASSIGN_3V( d2[i], 0.0, 0.0, 0.0 ); for ( j = 0 ; j < 3 ; j++ ) s[i][j] = rnd(); length[i] = 1 / SQRTF( LEN_SQUARED_3FV( s[i] ) ); } source->data = (GLfloat(*)[4]) s; source->start = (GLfloat *) s; source->count = TEST_COUNT; source->stride = sizeof(s[0]); source->flags = 0; dest->data = d; dest->start = (GLfloat *) d; dest->count = TEST_COUNT; dest->stride = sizeof(float[4]); dest->flags = 0; dest2->data = d2; dest2->start = (GLfloat *) d2; dest2->count = TEST_COUNT; dest2->stride = sizeof(float[4]); dest2->flags = 0; ref->data = r; ref->start = (GLfloat *) r; ref->count = TEST_COUNT; ref->stride = sizeof(float[4]); ref->flags = 0; ref2->data = r2; ref2->start = (GLfloat *) r2; ref2->count = TEST_COUNT; ref2->stride = sizeof(float[4]); ref2->flags = 0; if ( norm_normalize_types[mtype] == 0 ) { ref_norm_transform_rescale( mat, scale, source, NULL, ref ); } else { ref_norm_transform_normalize( mat, scale, source, NULL, ref ); ref_norm_transform_normalize( mat, scale, source, length, ref2 ); } if ( mesa_profile ) { BEGIN_RACE( *cycles ); func( mat, scale, source, NULL, dest ); END_RACE( *cycles ); func( mat, scale, source, length, dest2 ); } else { func( mat, scale, source, NULL, dest ); func( mat, scale, source, length, dest2 ); } for ( i = 0 ; i < TEST_COUNT ; i++ ) { for ( j = 0 ; j < 3 ; j++ ) { if ( significand_match( d[i][j], r[i][j] ) < REQUIRED_PRECISION ) { _mesa_printf( "-----------------------------\n" ); _mesa_printf( "(i = %i, j = %i)\n", i, j ); _mesa_printf( "%f \t %f \t [ratio = %e - %i bit missed]\n", d[i][0], r[i][0], r[i][0]/d[i][0], MAX_PRECISION - significand_match( d[i][0], r[i][0] ) ); _mesa_printf( "%f \t %f \t [ratio = %e - %i bit missed]\n", d[i][1], r[i][1], r[i][1]/d[i][1], MAX_PRECISION - significand_match( d[i][1], r[i][1] ) ); _mesa_printf( "%f \t %f \t [ratio = %e - %i bit missed]\n", d[i][2], r[i][2], r[i][2]/d[i][2], MAX_PRECISION - significand_match( d[i][2], r[i][2] ) ); return 0; } if ( norm_normalize_types[mtype] != 0 ) { if ( significand_match( d2[i][j], r2[i][j] ) < REQUIRED_PRECISION ) { _mesa_printf( "------------------- precalculated length case ------\n" ); _mesa_printf( "(i = %i, j = %i)\n", i, j ); _mesa_printf( "%f \t %f \t [ratio = %e - %i bit missed]\n", d2[i][0], r2[i][0], r2[i][0]/d2[i][0], MAX_PRECISION - significand_match( d2[i][0], r2[i][0] ) ); _mesa_printf( "%f \t %f \t [ratio = %e - %i bit missed]\n", d2[i][1], r2[i][1], r2[i][1]/d2[i][1], MAX_PRECISION - significand_match( d2[i][1], r2[i][1] ) ); _mesa_printf( "%f \t %f \t [ratio = %e - %i bit missed]\n", d2[i][2], r2[i][2], r2[i][2]/d2[i][2], MAX_PRECISION - significand_match( d2[i][2], r2[i][2] ) ); return 0; } } } } ALIGN_FREE( mat->m ); return 1; }