void _tnl_generate_sse_emit( struct gl_context *ctx ) { struct tnl_clipspace *vtx = GET_VERTEX_STATE(ctx); struct x86_program p; if (!cpu_has_xmm) { vtx->codegen_emit = NULL; return; } memset(&p, 0, sizeof(p)); p.ctx = ctx; p.inputs_safe = 0; /* for now */ p.outputs_safe = 0; /* for now */ p.have_sse2 = cpu_has_xmm2; p.identity = x86_make_reg(file_XMM, 6); p.chan0 = x86_make_reg(file_XMM, 7); if (!x86_init_func_size(&p.func, MAX_SSE_CODE_SIZE)) { vtx->emit = NULL; return; } if (build_vertex_emit(&p)) { _tnl_register_fastpath( vtx, GL_TRUE ); } else { /* Note the failure so that we don't keep trying to codegen an * impossible state: */ _tnl_register_fastpath( vtx, GL_FALSE ); x86_release_func(&p.func); } }
GLuint _tnl_install_attrs( GLcontext *ctx, const struct tnl_attr_map *map, GLuint nr, const GLfloat *vp, GLuint unpacked_size ) { struct tnl_clipspace *vtx = GET_VERTEX_STATE(ctx); GLuint offset = 0; GLuint i, j; assert(nr < _TNL_ATTRIB_MAX); assert(nr == 0 || map[0].attrib == VERT_ATTRIB_POS); vtx->emit = 0; vtx->interp = choose_interp_func; vtx->copy_pv = choose_copy_pv_func; vtx->new_inputs = ~0; for (j = 0, i = 0; i < nr; i++) { const GLuint format = map[i].format; if (format == EMIT_PAD) { /* fprintf(stderr, "%d: pad %d, offset %d\n", i, map[i].offset, offset); */ offset += map[i].offset; } else { vtx->attr[j].attrib = map[i].attrib; vtx->attr[j].format = format; vtx->attr[j].vp = vp; vtx->attr[j].insert = format_info[format].insert; vtx->attr[j].extract = format_info[format].extract; vtx->attr[j].vertattrsize = format_info[format].attrsize; if (unpacked_size) vtx->attr[j].vertoffset = map[i].offset; else vtx->attr[j].vertoffset = offset; /* fprintf(stderr, "%d: %s, vp %p, offset %d\n", i, format_info[format].name, (void *)vp, vtx->attr[j].vertoffset); */ offset += format_info[format].attrsize; j++; } } vtx->attr_count = j; if (unpacked_size) vtx->vertex_size = unpacked_size; else vtx->vertex_size = offset; assert(vtx->vertex_size <= vtx->max_vertex_size); return vtx->vertex_size; }
static void generic_emit( GLcontext *ctx, GLuint start, GLuint end, void *dest ) { struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb; struct tnl_clipspace *vtx = GET_VERTEX_STATE(ctx); struct tnl_clipspace_attr *a = vtx->attr; GLubyte *v = (GLubyte *)dest; GLuint i, j; const GLuint count = vtx->attr_count; GLuint stride; for (j = 0; j < count; j++) { GLvector4f *vptr = VB->AttribPtr[a[j].attrib]; a[j].inputstride = vptr->stride; a[j].inputptr = ((GLubyte *)vptr->data) + start * vptr->stride; a[j].emit = a[j].insert[vptr->size - 1]; } end -= start; stride = vtx->vertex_size; for (i = 0 ; i < end ; i++, v += stride) { for (j = 0; j < count; j++) { GLfloat *in = (GLfloat *)a[j].inputptr; a[j].inputptr += a[j].inputstride; a[j].emit( &a[j], v + a[j].vertoffset, in ); } } }
/* Extract a named attribute from a hardware vertex. Will have to * reverse any viewport transformation, swizzling or other conversions * which may have been applied: */ void _tnl_get_attr( struct gl_context *ctx, const void *vin, GLenum attr, GLfloat *dest ) { struct tnl_clipspace *vtx = GET_VERTEX_STATE(ctx); const struct tnl_clipspace_attr *a = vtx->attr; const GLuint attr_count = vtx->attr_count; GLuint j; for (j = 0; j < attr_count; j++) { if (a[j].attrib == attr) { a[j].extract( &a[j], dest, (GLubyte *)vin + a[j].vertoffset ); return; } } /* Else return the value from ctx->Current. */ if (attr == _TNL_ATTRIB_POINTSIZE) { /* If the hardware vertex doesn't have point size then use size from * struct gl_context. XXX this will be wrong if drawing attenuated points! */ dest[0] = ctx->Point.Size; } else { memcpy( dest, ctx->Current.Attrib[attr], 4*sizeof(GLfloat)); } }
static void update_input_ptrs( struct gl_context *ctx, GLuint start ) { struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb; struct tnl_clipspace *vtx = GET_VERTEX_STATE(ctx); struct tnl_clipspace_attr *a = vtx->attr; const GLuint count = vtx->attr_count; GLuint j; for (j = 0; j < count; j++) { GLvector4f *vptr = VB->AttribPtr[a[j].attrib]; if (vtx->emit != choose_emit_func) { assert(a[j].inputstride == vptr->stride); assert(a[j].inputsize == vptr->size); } a[j].inputptr = ((GLubyte *)vptr->data) + start * vptr->stride; } if (a->vp) { vtx->vp_scale[0] = a->vp[MAT_SX]; vtx->vp_scale[1] = a->vp[MAT_SY]; vtx->vp_scale[2] = a->vp[MAT_SZ]; vtx->vp_scale[3] = 1.0; vtx->vp_xlate[0] = a->vp[MAT_TX]; vtx->vp_xlate[1] = a->vp[MAT_TY]; vtx->vp_xlate[2] = a->vp[MAT_TZ]; vtx->vp_xlate[3] = 0.0; } }
void _tnl_free_vertices( struct gl_context *ctx ) { TNLcontext *tnl = TNL_CONTEXT(ctx); if (tnl) { struct tnl_clipspace *vtx = GET_VERTEX_STATE(ctx); struct tnl_clipspace_fastpath *fp, *tmp; _mesa_align_free(vtx->vertex_buf); vtx->vertex_buf = NULL; for (fp = vtx->fastpath ; fp ; fp = tmp) { tmp = fp->next; free(fp->attr); /* KW: At the moment, fp->func is constrained to be allocated by * _mesa_exec_alloc(), as the hardwired fastpaths in * t_vertex_generic.c are handled specially. It would be nice * to unify them, but this probably won't change until this * module gets another overhaul. */ _mesa_exec_free((void *) fp->func); free(fp); } vtx->fastpath = NULL; } }
static void do_emit( GLcontext *ctx, GLuint start, GLuint end, void *dest) { TNLcontext *tnl = TNL_CONTEXT(ctx); struct vertex_buffer *VB = &tnl->vb; struct tnl_clipspace *vtx = GET_VERTEX_STATE(ctx); struct tnl_clipspace_attr *a = vtx->attr; const GLuint count = vtx->attr_count; GLuint j; for (j = 0; j < count; j++) { GLvector4f *vptr = VB->AttribPtr[a[j].attrib]; a[j].inputstride = vptr->stride; a[j].inputptr = ((GLubyte *)vptr->data) + start * vptr->stride; a[j].emit = a[j].insert[vptr->size - 1]; } vtx->emit = 0; if (0) vtx->emit = _tnl_codegen_emit(ctx); if (!vtx->emit) vtx->emit = generic_emit; vtx->emit( ctx, start, end, dest ); }
void _tnl_free_vertices( GLcontext *ctx ) { struct tnl_clipspace *vtx = GET_VERTEX_STATE(ctx); if (vtx->vertex_buf) { ALIGN_FREE(vtx->vertex_buf); vtx->vertex_buf = 0; } }
/* Interpolate between two vertices to produce a third: */ void _tnl_interp( struct gl_context *ctx, GLfloat t, GLuint edst, GLuint eout, GLuint ein, GLboolean force_boundary ) { struct tnl_clipspace *vtx = GET_VERTEX_STATE(ctx); vtx->interp( ctx, t, edst, eout, ein, force_boundary ); }
void *_tnl_emit_vertices_to_buffer( GLcontext *ctx, GLuint start, GLuint end, void *dest ) { struct tnl_clipspace *vtx = GET_VERTEX_STATE(ctx); do_emit( ctx, start, end, dest ); return (void *)((GLubyte *)dest + vtx->vertex_size * (end - start)); }
void _tnl_invalidate_vertex_state( GLcontext *ctx, GLuint new_state ) { if (new_state & (_DD_NEW_TRI_LIGHT_TWOSIDE|_DD_NEW_TRI_UNFILLED) ) { struct tnl_clipspace *vtx = GET_VERTEX_STATE(ctx); vtx->new_inputs = ~0; vtx->interp = choose_interp_func; vtx->copy_pv = choose_copy_pv_func; } }
void _tnl_invalidate_vertex_state( struct gl_context *ctx, GLuint new_state ) { /* if two-sided lighting changes or filled/unfilled polygon state changes */ if (new_state & (_NEW_LIGHT | _NEW_POLYGON) ) { struct tnl_clipspace *vtx = GET_VERTEX_STATE(ctx); vtx->new_inputs = ~0; vtx->interp = choose_interp_func; vtx->copy_pv = choose_copy_pv_func; } }
void _tnl_build_vertices( struct gl_context *ctx, GLuint start, GLuint end, GLuint newinputs ) { struct tnl_clipspace *vtx = GET_VERTEX_STATE(ctx); update_input_ptrs( ctx, start ); vtx->emit( ctx, end - start, (GLubyte *)(vtx->vertex_buf + start * vtx->vertex_size)); }
/* Not much happens here. Eventually use this function to try and * avoid saving/reloading the source pointers each vertex (if some of * them can fit in registers). */ static void get_src_ptr( struct x86_program *p, struct x86_reg srcREG, struct x86_reg vtxREG, struct tnl_clipspace_attr *a ) { struct tnl_clipspace *vtx = GET_VERTEX_STATE(p->ctx); struct x86_reg ptr_to_src = x86_make_disp(vtxREG, get_offset(vtx, &a->inputptr)); /* Load current a[j].inputptr */ x86_mov(&p->func, srcREG, ptr_to_src); }
/* Use the codegen paths to select one of a number of hardwired * fastpaths. */ void _tnl_generate_hardwired_emit( struct gl_context *ctx ) { struct tnl_clipspace *vtx = GET_VERTEX_STATE(ctx); tnl_emit_func func = NULL; /* Does it fit a hardwired fastpath? Help! this is growing out of * control! */ switch (vtx->attr_count) { case 2: if (vtx->attr[0].emit == insert_3f_viewport_3) { if (vtx->attr[1].emit == insert_4ub_4f_bgra_4) func = emit_viewport3_bgra4; else if (vtx->attr[1].emit == insert_4ub_4f_rgba_4) func = emit_viewport3_rgba4; } else if (vtx->attr[0].emit == insert_3f_3 && vtx->attr[1].emit == insert_4ub_4f_rgba_4) { func = emit_xyz3_rgba4; } break; case 3: if (vtx->attr[2].emit == insert_2f_2) { if (vtx->attr[1].emit == insert_4ub_4f_rgba_4) { if (vtx->attr[0].emit == insert_4f_viewport_4) func = emit_viewport4_rgba4_st2; else if (vtx->attr[0].emit == insert_4f_4) func = emit_xyzw4_rgba4_st2; } else if (vtx->attr[1].emit == insert_4ub_4f_bgra_4 && vtx->attr[0].emit == insert_4f_viewport_4) func = emit_viewport4_bgra4_st2; } break; case 4: if (vtx->attr[2].emit == insert_2f_2 && vtx->attr[3].emit == insert_2f_2) { if (vtx->attr[1].emit == insert_4ub_4f_rgba_4) { if (vtx->attr[0].emit == insert_4f_viewport_4) func = emit_viewport4_rgba4_st2_st2; else if (vtx->attr[0].emit == insert_4f_4) func = emit_xyzw4_rgba4_st2_st2; } else if (vtx->attr[1].emit == insert_4ub_4f_bgra_4 && vtx->attr[0].emit == insert_4f_viewport_4) func = emit_viewport4_bgra4_st2_st2; } break; } vtx->emit = func; }
static void choose_copy_pv_func( GLcontext *ctx, GLuint edst, GLuint esrc ) { struct tnl_clipspace *vtx = GET_VERTEX_STATE(ctx); if (vtx->need_extras && (ctx->_TriangleCaps & (DD_TRI_LIGHT_TWOSIDE|DD_TRI_UNFILLED))) { vtx->copy_pv = generic_copy_pv_extras; } else { vtx->copy_pv = generic_copy_pv; } vtx->copy_pv( ctx, edst, esrc ); }
/* Emit VB vertices start..end to dest. Note that VB vertex at * postion start will be emitted to dest at position zero. */ void *_tnl_emit_vertices_to_buffer( struct gl_context *ctx, GLuint start, GLuint end, void *dest ) { struct tnl_clipspace *vtx = GET_VERTEX_STATE(ctx); update_input_ptrs(ctx, start); /* Note: dest should not be adjusted for non-zero 'start' values: */ vtx->emit( ctx, end - start, (GLubyte*) dest ); return (void *)((GLubyte *)dest + vtx->vertex_size * (end - start)); }
static void adjust_input_ptrs( struct gl_context *ctx, GLint diff) { struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb; struct tnl_clipspace *vtx = GET_VERTEX_STATE(ctx); struct tnl_clipspace_attr *a = vtx->attr; const GLuint count = vtx->attr_count; GLuint j; diff -= 1; for (j=0; j<count; ++j) { register GLvector4f *vptr = VB->AttribPtr[a->attrib]; (a++)->inputptr += diff*vptr->stride; } }
void _tnl_generic_interp( struct gl_context *ctx, GLfloat t, GLuint edst, GLuint eout, GLuint ein, GLboolean force_boundary ) { TNLcontext *tnl = TNL_CONTEXT(ctx); struct vertex_buffer *VB = &tnl->vb; struct tnl_clipspace *vtx = GET_VERTEX_STATE(ctx); const GLubyte *vin = vtx->vertex_buf + ein * vtx->vertex_size; const GLubyte *vout = vtx->vertex_buf + eout * vtx->vertex_size; GLubyte *vdst = vtx->vertex_buf + edst * vtx->vertex_size; const struct tnl_clipspace_attr *a = vtx->attr; const GLuint attr_count = vtx->attr_count; GLuint j; (void) force_boundary; if (tnl->NeedNdcCoords) { const GLfloat *dstclip = VB->ClipPtr->data[edst]; if (dstclip[3] != 0.0) { const GLfloat w = 1.0f / dstclip[3]; GLfloat pos[4]; pos[0] = dstclip[0] * w; pos[1] = dstclip[1] * w; pos[2] = dstclip[2] * w; pos[3] = w; a[0].insert[4-1]( &a[0], vdst, pos ); } } else { a[0].insert[4-1]( &a[0], vdst, VB->ClipPtr->data[edst] ); } for (j = 1; j < attr_count; j++) { GLfloat fin[4], fout[4], fdst[4]; a[j].extract( &a[j], fin, vin + a[j].vertoffset ); a[j].extract( &a[j], fout, vout + a[j].vertoffset ); INTERP_F( t, fdst[3], fout[3], fin[3] ); INTERP_F( t, fdst[2], fout[2], fin[2] ); INTERP_F( t, fdst[1], fout[1], fin[1] ); INTERP_F( t, fdst[0], fout[0], fin[0] ); a[j].insert[4-1]( &a[j], vdst + a[j].vertoffset, fdst ); } }
void _tnl_init_vertices( struct gl_context *ctx, GLuint vb_size, GLuint max_vertex_size ) { struct tnl_clipspace *vtx = GET_VERTEX_STATE(ctx); _tnl_install_attrs( ctx, NULL, 0, NULL, 0 ); vtx->need_extras = GL_TRUE; if (max_vertex_size > vtx->max_vertex_size) { _tnl_free_vertices( ctx ); vtx->max_vertex_size = max_vertex_size; vtx->vertex_buf = _mesa_align_calloc(vb_size * max_vertex_size, 32 ); invalidate_funcs(vtx); } switch(CHAN_TYPE) { case GL_UNSIGNED_BYTE: vtx->chan_scale[0] = 255.0; vtx->chan_scale[1] = 255.0; vtx->chan_scale[2] = 255.0; vtx->chan_scale[3] = 255.0; break; case GL_UNSIGNED_SHORT: vtx->chan_scale[0] = 65535.0; vtx->chan_scale[1] = 65535.0; vtx->chan_scale[2] = 65535.0; vtx->chan_scale[3] = 65535.0; break; default: vtx->chan_scale[0] = 1.0; vtx->chan_scale[1] = 1.0; vtx->chan_scale[2] = 1.0; vtx->chan_scale[3] = 1.0; break; } vtx->identity[0] = 0.0; vtx->identity[1] = 0.0; vtx->identity[2] = 0.0; vtx->identity[3] = 1.0; vtx->codegen_emit = NULL; #ifdef USE_SSE_ASM if (!getenv("MESA_NO_CODEGEN")) vtx->codegen_emit = _tnl_generate_sse_emit; #endif }
/* Complementary operation to the above. */ void _tnl_set_attr( struct gl_context *ctx, void *vout, GLenum attr, const GLfloat *src ) { struct tnl_clipspace *vtx = GET_VERTEX_STATE(ctx); const struct tnl_clipspace_attr *a = vtx->attr; const GLuint attr_count = vtx->attr_count; GLuint j; for (j = 0; j < attr_count; j++) { if (a[j].attrib == attr) { a[j].insert[4-1]( &a[j], (GLubyte *)vout + a[j].vertoffset, src ); return; } } }
void _tnl_build_vertices( GLcontext *ctx, GLuint start, GLuint end, GLuint newinputs ) { struct tnl_clipspace *vtx = GET_VERTEX_STATE(ctx); const GLuint stride = vtx->vertex_size; GLubyte *vDest = ((GLubyte *)vtx->vertex_buf + (start*stride)); newinputs |= vtx->new_inputs; vtx->new_inputs = 0; if (newinputs) do_emit( ctx, start, end, vDest ); }
static void choose_interp_func( GLcontext *ctx, GLfloat t, GLuint edst, GLuint eout, GLuint ein, GLboolean force_boundary ) { struct tnl_clipspace *vtx = GET_VERTEX_STATE(ctx); if (vtx->need_extras && (ctx->_TriangleCaps & (DD_TRI_LIGHT_TWOSIDE|DD_TRI_UNFILLED))) { vtx->interp = generic_interp_extras; } else { vtx->interp = generic_interp; } vtx->interp( ctx, t, edst, eout, ein, force_boundary ); }
static void choose_copy_pv_func( struct gl_context *ctx, GLuint edst, GLuint esrc ) { struct tnl_clipspace *vtx = GET_VERTEX_STATE(ctx); GLboolean unfilled = (ctx->Polygon.FrontMode != GL_FILL || ctx->Polygon.BackMode != GL_FILL); GLboolean twosided = ctx->Light.Enabled && ctx->Light.Model.TwoSide; if (vtx->need_extras && (twosided || unfilled)) { vtx->copy_pv = _tnl_generic_copy_pv_extras; } else { vtx->copy_pv = _tnl_generic_copy_pv; } vtx->copy_pv( ctx, edst, esrc ); }
void _tnl_init_vertices( GLcontext *ctx, GLuint vb_size, GLuint max_vertex_size ) { struct tnl_clipspace *vtx = GET_VERTEX_STATE(ctx); _tnl_install_attrs( ctx, 0, 0, 0, 0 ); vtx->need_extras = GL_TRUE; if (max_vertex_size > vtx->max_vertex_size) { _tnl_free_vertices( ctx ); vtx->max_vertex_size = max_vertex_size; vtx->vertex_buf = (GLubyte *)ALIGN_CALLOC(vb_size * max_vertex_size, 32 ); } _tnl_init_c_codegen( &vtx->codegen ); }
void _tnl_generic_emit( struct gl_context *ctx, GLuint count, GLubyte *v ) { struct tnl_clipspace *vtx = GET_VERTEX_STATE(ctx); struct tnl_clipspace_attr *a = vtx->attr; const GLuint attr_count = vtx->attr_count; const GLuint stride = vtx->vertex_size; GLuint i, j; for (i = 0 ; i < count ; i++, v += stride) { for (j = 0; j < attr_count; j++) { GLfloat *in = (GLfloat *)a[j].inputptr; a[j].inputptr += a[j].inputstride; a[j].emit( &a[j], v + a[j].vertoffset, in ); } } }
/* Extract color attributes from one vertex and insert them into * another. (Shortcircuit extract/insert with memcpy). */ void _tnl_generic_copy_pv( struct gl_context *ctx, GLuint edst, GLuint esrc ) { struct tnl_clipspace *vtx = GET_VERTEX_STATE(ctx); GLubyte *vsrc = vtx->vertex_buf + esrc * vtx->vertex_size; GLubyte *vdst = vtx->vertex_buf + edst * vtx->vertex_size; const struct tnl_clipspace_attr *a = vtx->attr; const GLuint attr_count = vtx->attr_count; GLuint j; for (j = 0; j < attr_count; j++) { if (a[j].attrib == VERT_ATTRIB_COLOR) { memcpy( vdst + a[j].vertoffset, vsrc + a[j].vertoffset, a[j].vertattrsize ); } } }
static void choose_interp_func( struct gl_context *ctx, GLfloat t, GLuint edst, GLuint eout, GLuint ein, GLboolean force_boundary ) { struct tnl_clipspace *vtx = GET_VERTEX_STATE(ctx); GLboolean unfilled = (ctx->Polygon.FrontMode != GL_FILL || ctx->Polygon.BackMode != GL_FILL); GLboolean twosided = ctx->Light.Enabled && ctx->Light.Model.TwoSide; if (vtx->need_extras && (twosided || unfilled)) { vtx->interp = _tnl_generic_interp_extras; } else { vtx->interp = _tnl_generic_interp; } vtx->interp( ctx, t, edst, eout, ein, force_boundary ); }
/* Extract a named attribute from a hardware vertex. Will have to * reverse any viewport transformation, swizzling or other conversions * which may have been applied: */ void _tnl_get_attr( GLcontext *ctx, const void *vin, GLenum attr, GLfloat *dest ) { struct tnl_clipspace *vtx = GET_VERTEX_STATE(ctx); const struct tnl_clipspace_attr *a = vtx->attr; const GLuint attr_count = vtx->attr_count; GLuint j; for (j = 0; j < attr_count; j++) { if (a[j].attrib == attr) { a[j].extract( &a[j], dest, (GLubyte *)vin + a[j].vertoffset ); return; } } /* Else return the value from ctx->Current -- dangerous??? */ _mesa_memcpy( dest, ctx->Current.Attrib[attr], 4*sizeof(GLfloat)); }
static void update_src_ptr( struct x86_program *p, struct x86_reg srcREG, struct x86_reg vtxREG, struct tnl_clipspace_attr *a ) { if (a->inputstride) { struct tnl_clipspace *vtx = GET_VERTEX_STATE(p->ctx); struct x86_reg ptr_to_src = x86_make_disp(vtxREG, get_offset(vtx, &a->inputptr)); /* add a[j].inputstride (hardcoded value - could just as easily * pull the stride value from memory each time). */ x86_lea(&p->func, srcREG, x86_make_disp(srcREG, a->inputstride)); /* save new value of a[j].inputptr */ x86_mov(&p->func, ptr_to_src, srcREG); } }