static void r300EmitVertexProgram(r300ContextPtr r300, int dest, struct r300_vertex_program_code *code) { int i; assert((code->length > 0) && (code->length % 4 == 0)); R300_STATECHANGE( r300, vap_flush ); switch ((dest >> 8) & 0xf) { case 0: R300_STATECHANGE(r300, vpi); for (i = 0; i < code->length; i++) r300->hw.vpi.cmd[R300_VPI_INSTR_0 + i + 4 * (dest & 0xff)] = (code->body.d[i]); bump_vpu_count(r300->hw.vpi.cmd, code->length + 4 * (dest & 0xff)); break; case 2: R300_STATECHANGE(r300, vpp); for (i = 0; i < code->length; i++) r300->hw.vpp.cmd[R300_VPP_PARAM_0 + i + 4 * (dest & 0xff)] = (code->body.d[i]); bump_vpu_count(r300->hw.vpp.cmd, code->length + 4 * (dest & 0xff)); break; case 4: R300_STATECHANGE(r300, vps); for (i = 0; i < code->length; i++) r300->hw.vps.cmd[1 + i + 4 * (dest & 0xff)] = (code->body.d[i]); bump_vpu_count(r300->hw.vps.cmd, code->length + 4 * (dest & 0xff)); break; default: fprintf(stderr, "%s:%s don't know how to handle dest %04x\n", __FILE__, __FUNCTION__, dest); _mesa_exit(-1); } }
static void r300EmitElts(GLcontext * ctx, void *elts, unsigned long n_elts, int elt_size) { r300ContextPtr rmesa = R300_CONTEXT(ctx); struct r300_dma_region *rvb = &rmesa->state.elt_dma; void *out; assert(elt_size == 2 || elt_size == 4); if (r300IsGartMemory(rmesa, elts, n_elts * elt_size)) { rvb->address = rmesa->radeon.radeonScreen->gartTextures.map; rvb->start = ((char *)elts) - rvb->address; rvb->aos_offset = rmesa->radeon.radeonScreen->gart_texture_offset + rvb->start; return; } else if (r300IsGartMemory(rmesa, elts, 1)) { WARN_ONCE("Pointer not within GART memory!\n"); _mesa_exit(-1); } r300AllocDmaRegion(rmesa, rvb, n_elts * elt_size, elt_size); rvb->aos_offset = GET_START(rvb); out = rvb->address + rvb->start; memcpy(out, elts, n_elts * elt_size); }
static struct ureg get_temp( struct tnl_program *p ) { int bit = _mesa_ffs( ~p->temp_in_use ); if (!bit) { _mesa_problem(NULL, "%s: out of temporaries\n", __FILE__); _mesa_exit(1); } if ((GLuint) bit > p->program->Base.NumTemporaries) p->program->Base.NumTemporaries = bit; p->temp_in_use |= 1<<(bit-1); return make_ureg(PROGRAM_TEMPORARY, bit-1); }
GLuint r300VAPOutputCntl1(GLcontext * ctx, GLuint vp_writes) { GLuint i, ret = 0, first_free_texcoord = 0; for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { if (vp_writes & (1 << (VERT_RESULT_TEX0 + i))) { ret |= (4 << (3 * first_free_texcoord)); ++first_free_texcoord; } } if (first_free_texcoord > 8) { fprintf(stderr, "\tout of free texcoords\n"); _mesa_exit(-1); } return ret; }
void r300ChooseSwtclVertexFormat(GLcontext *ctx, GLuint *_InputsRead, GLuint *_OutputsWritten) { r300ContextPtr rmesa = R300_CONTEXT( ctx ); TNLcontext *tnl = TNL_CONTEXT(ctx); struct vertex_buffer *VB = &tnl->vb; int first_free_tex = 0; GLuint InputsRead = 0; GLuint OutputsWritten = 0; int num_attrs = 0; GLuint fp_reads = rmesa->selected_fp->InputsRead; struct vertex_attribute *attrs = rmesa->vbuf.attribs; radeon_print(RADEON_SWRENDER, RADEON_VERBOSE, "%s\n", __func__); rmesa->swtcl.coloroffset = rmesa->swtcl.specoffset = 0; rmesa->radeon.swtcl.vertex_attr_count = 0; if (RADEON_DEBUG & RADEON_VERTS) fprintf(stderr, "%s\n", __func__); /* We always want non Ndc coords format */ VB->AttribPtr[VERT_ATTRIB_POS] = VB->ClipPtr; /* Always write position vector */ InputsRead |= 1 << VERT_ATTRIB_POS; OutputsWritten |= 1 << VERT_RESULT_HPOS; EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_4F ); ADD_ATTR(VERT_ATTRIB_POS, R300_DATA_TYPE_FLOAT_4, SWTCL_OVM_POS, SWIZZLE_XYZW, MASK_XYZW, 0); rmesa->swtcl.coloroffset = 4; if (fp_reads & FRAG_BIT_COL0) { InputsRead |= 1 << VERT_ATTRIB_COLOR0; OutputsWritten |= 1 << VERT_RESULT_COL0; #if MESA_LITTLE_ENDIAN EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_4UB_4F_RGBA ); ADD_ATTR(VERT_ATTRIB_COLOR0, R300_DATA_TYPE_BYTE, SWTCL_OVM_COLOR0, SWIZZLE_XYZW, MASK_XYZW, 1); #else EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_4UB_4F_ABGR ); ADD_ATTR(VERT_ATTRIB_COLOR0, R300_DATA_TYPE_BYTE, SWTCL_OVM_COLOR0, SWIZZLE_XYZW, MASK_XYZW, 1); #endif } if (fp_reads & FRAG_BIT_COL1) { GLuint swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE); InputsRead |= 1 << VERT_ATTRIB_COLOR1; OutputsWritten |= 1 << VERT_RESULT_COL1; #if MESA_LITTLE_ENDIAN EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_4UB_4F_RGBA ); ADD_ATTR(VERT_ATTRIB_COLOR1, R300_DATA_TYPE_BYTE, SWTCL_OVM_COLOR1, swiz, MASK_XYZW, 1); #else EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_4UB_4F_ABGR ); ADD_ATTR(VERT_ATTRIB_COLOR1, R300_DATA_TYPE_BYTE, SWTCL_OVM_COLOR1, swiz, MASK_XYZW, 1); #endif rmesa->swtcl.specoffset = rmesa->swtcl.coloroffset + 1; } if (ctx->Light.Enabled && ctx->Light.Model.TwoSide) { VB->AttribPtr[VERT_ATTRIB_GENERIC0] = VB->ColorPtr[1]; OutputsWritten |= 1 << VERT_RESULT_BFC0; #if MESA_LITTLE_ENDIAN EMIT_ATTR( _TNL_ATTRIB_GENERIC0, EMIT_4UB_4F_RGBA ); ADD_ATTR(VERT_ATTRIB_GENERIC0, R300_DATA_TYPE_BYTE, SWTCL_OVM_COLOR2, SWIZZLE_XYZW, MASK_XYZW, 1); #else EMIT_ATTR( _TNL_ATTRIB_GENERIC0, EMIT_4UB_4F_ABGR ); ADD_ATTR(VERT_ATTRIB_GENERIC0, R300_DATA_TYPE_BYTE, SWTCL_OVM_COLOR2, SWIZZLE_XYZW, MASK_XYZW, 1); #endif if (fp_reads & FRAG_BIT_COL1) { VB->AttribPtr[VERT_ATTRIB_GENERIC1] = VB->SecondaryColorPtr[1]; GLuint swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE); OutputsWritten |= 1 << VERT_RESULT_BFC1; #if MESA_LITTLE_ENDIAN EMIT_ATTR( _TNL_ATTRIB_GENERIC1, EMIT_4UB_4F_RGBA ); ADD_ATTR(VERT_ATTRIB_GENERIC1, R300_DATA_TYPE_BYTE, SWTCL_OVM_COLOR3, swiz, MASK_XYZW, 1); #else EMIT_ATTR( _TNL_ATTRIB_GENERIC1, EMIT_4UB_4F_ABGR ); ADD_ATTR(VERT_ATTRIB_GENERIC1, R300_DATA_TYPE_BYTE, SWTCL_OVM_COLOR3, swiz, MASK_XYZW, 1); #endif } } if (RENDERINPUTS_TEST(tnl->render_inputs_bitset, _TNL_ATTRIB_POINTSIZE )) { GLuint swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO); InputsRead |= 1 << VERT_ATTRIB_POINT_SIZE; OutputsWritten |= 1 << VERT_RESULT_PSIZ; EMIT_ATTR( _TNL_ATTRIB_POINTSIZE, EMIT_1F ); ADD_ATTR(VERT_ATTRIB_POINT_SIZE, R300_DATA_TYPE_FLOAT_1, SWTCL_OVM_POINT_SIZE, swiz, MASK_X, 0); } if (rmesa->selected_fp->wpos_attr != FRAG_ATTRIB_MAX) { int tex_id = rmesa->selected_fp->wpos_attr - FRAG_ATTRIB_TEX0; VB->AttribPtr[VERT_ATTRIB_TEX0 + tex_id] = VB->AttribPtr[VERT_ATTRIB_POS]; VB->TexCoordPtr[tex_id] = VB->AttribPtr[VERT_ATTRIB_POS]; RENDERINPUTS_SET(tnl->render_inputs_bitset, _TNL_ATTRIB_TEX0 + tex_id); } if (rmesa->selected_fp->fog_attr != FRAG_ATTRIB_MAX) { int tex_id = rmesa->selected_fp->fog_attr - FRAG_ATTRIB_TEX0; VB->AttribPtr[VERT_ATTRIB_TEX0 + tex_id] = VB->AttribPtr[VERT_ATTRIB_FOG]; VB->TexCoordPtr[tex_id] = VB->AttribPtr[VERT_ATTRIB_FOG]; RENDERINPUTS_SET(tnl->render_inputs_bitset, _TNL_ATTRIB_TEX0 + tex_id); } /** * Sending only one texcoord component may lead to lock up, * so for all textures always output 4 texcoord components to RS. */ { int i; GLuint swiz, format, hw_format; for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { if (fp_reads & FRAG_BIT_TEX(i)) { switch (VB->TexCoordPtr[i]->size) { case 1: format = EMIT_1F; hw_format = R300_DATA_TYPE_FLOAT_1; swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ONE); break; case 2: format = EMIT_2F; hw_format = R300_DATA_TYPE_FLOAT_2; swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_ZERO, SWIZZLE_ONE); break; case 3: format = EMIT_3F; hw_format = R300_DATA_TYPE_FLOAT_3; swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE); break; case 4: format = EMIT_4F; hw_format = R300_DATA_TYPE_FLOAT_4; swiz = SWIZZLE_XYZW; break; default: continue; } InputsRead |= 1 << (VERT_ATTRIB_TEX0 + i); OutputsWritten |= 1 << (VERT_RESULT_TEX0 + i); EMIT_ATTR(_TNL_ATTRIB_TEX(i), format); ADD_ATTR(VERT_ATTRIB_TEX0 + i, hw_format, SWTCL_OVM_TEX(first_free_tex), swiz, MASK_XYZW, 0); ++first_free_tex; } } } if (first_free_tex >= ctx->Const.MaxTextureUnits) { fprintf(stderr, "\tout of free texcoords to write fog coordinate\n"); _mesa_exit(-1); } R300_NEWPRIM(rmesa); rmesa->vbuf.num_attribs = num_attrs; *_InputsRead = InputsRead; *_OutputsWritten = OutputsWritten; RENDERINPUTS_COPY(rmesa->render_inputs_bitset, tnl->render_inputs_bitset); }
static int test_norm_function( normal_func func, int mtype, long *cycles ) { GLvector4f source[1], dest[1], dest2[1], ref[1], ref2[1]; GLmatrix mat[1]; GLfloat s[TEST_COUNT][5], d[TEST_COUNT][4], r[TEST_COUNT][4]; GLfloat d2[TEST_COUNT][4], r2[TEST_COUNT][4], length[TEST_COUNT]; GLfloat scale; GLfloat *m; int i, j; #ifdef RUN_DEBUG_BENCHMARK int cycle_i; /* the counter for the benchmarks we run */ #endif (void) cycles; mat->m = (GLfloat *) ALIGN_MALLOC( 16 * sizeof(GLfloat), 16 ); mat->inv = m = mat->m; init_matrix( m ); scale = 1.0F + rnd () * norm_scale_types[mtype]; for ( i = 0 ; i < 4 ; i++ ) { for ( j = 0 ; j < 4 ; j++ ) { switch ( norm_templates[mtype][i * 4 + j] ) { case NIL: m[j * 4 + i] = 0.0; break; case ONE: m[j * 4 + i] = 1.0; break; case NEG: m[j * 4 + i] = -1.0; break; case VAR: break; default: _mesa_exit(1); } } } for ( i = 0 ; i < TEST_COUNT ; i++ ) { ASSIGN_3V( d[i], 0.0, 0.0, 0.0 ); ASSIGN_3V( s[i], 0.0, 0.0, 0.0 ); ASSIGN_3V( d2[i], 0.0, 0.0, 0.0 ); for ( j = 0 ; j < 3 ; j++ ) s[i][j] = rnd(); length[i] = 1 / SQRTF( LEN_SQUARED_3FV( s[i] ) ); } source->data = (GLfloat(*)[4]) s; source->start = (GLfloat *) s; source->count = TEST_COUNT; source->stride = sizeof(s[0]); source->flags = 0; dest->data = d; dest->start = (GLfloat *) d; dest->count = TEST_COUNT; dest->stride = sizeof(float[4]); dest->flags = 0; dest2->data = d2; dest2->start = (GLfloat *) d2; dest2->count = TEST_COUNT; dest2->stride = sizeof(float[4]); dest2->flags = 0; ref->data = r; ref->start = (GLfloat *) r; ref->count = TEST_COUNT; ref->stride = sizeof(float[4]); ref->flags = 0; ref2->data = r2; ref2->start = (GLfloat *) r2; ref2->count = TEST_COUNT; ref2->stride = sizeof(float[4]); ref2->flags = 0; if ( norm_normalize_types[mtype] == 0 ) { ref_norm_transform_rescale( mat, scale, source, NULL, ref ); } else { ref_norm_transform_normalize( mat, scale, source, NULL, ref ); ref_norm_transform_normalize( mat, scale, source, length, ref2 ); } if ( mesa_profile ) { BEGIN_RACE( *cycles ); func( mat, scale, source, NULL, dest ); END_RACE( *cycles ); func( mat, scale, source, length, dest2 ); } else { func( mat, scale, source, NULL, dest ); func( mat, scale, source, length, dest2 ); } for ( i = 0 ; i < TEST_COUNT ; i++ ) { for ( j = 0 ; j < 3 ; j++ ) { if ( significand_match( d[i][j], r[i][j] ) < REQUIRED_PRECISION ) { _mesa_printf( "-----------------------------\n" ); _mesa_printf( "(i = %i, j = %i)\n", i, j ); _mesa_printf( "%f \t %f \t [ratio = %e - %i bit missed]\n", d[i][0], r[i][0], r[i][0]/d[i][0], MAX_PRECISION - significand_match( d[i][0], r[i][0] ) ); _mesa_printf( "%f \t %f \t [ratio = %e - %i bit missed]\n", d[i][1], r[i][1], r[i][1]/d[i][1], MAX_PRECISION - significand_match( d[i][1], r[i][1] ) ); _mesa_printf( "%f \t %f \t [ratio = %e - %i bit missed]\n", d[i][2], r[i][2], r[i][2]/d[i][2], MAX_PRECISION - significand_match( d[i][2], r[i][2] ) ); return 0; } if ( norm_normalize_types[mtype] != 0 ) { if ( significand_match( d2[i][j], r2[i][j] ) < REQUIRED_PRECISION ) { _mesa_printf( "------------------- precalculated length case ------\n" ); _mesa_printf( "(i = %i, j = %i)\n", i, j ); _mesa_printf( "%f \t %f \t [ratio = %e - %i bit missed]\n", d2[i][0], r2[i][0], r2[i][0]/d2[i][0], MAX_PRECISION - significand_match( d2[i][0], r2[i][0] ) ); _mesa_printf( "%f \t %f \t [ratio = %e - %i bit missed]\n", d2[i][1], r2[i][1], r2[i][1]/d2[i][1], MAX_PRECISION - significand_match( d2[i][1], r2[i][1] ) ); _mesa_printf( "%f \t %f \t [ratio = %e - %i bit missed]\n", d2[i][2], r2[i][2], r2[i][2]/d2[i][2], MAX_PRECISION - significand_match( d2[i][2], r2[i][2] ) ); return 0; } } } } ALIGN_FREE( mat->m ); return 1; }