GLushort *radeonAllocEltsOpenEnded( r100ContextPtr rmesa, GLuint vertex_format, GLuint primitive, GLuint min_nr ) { GLushort *retval; int align_min_nr; BATCH_LOCALS(&rmesa->radeon); if (RADEON_DEBUG & RADEON_IOCTL) fprintf(stderr, "%s %d prim %x\n", __FUNCTION__, min_nr, primitive); assert((primitive & RADEON_CP_VC_CNTL_PRIM_WALK_IND)); radeonEmitState(&rmesa->radeon); radeonEmitScissor(rmesa); rmesa->tcl.elt_cmd_start = rmesa->radeon.cmdbuf.cs->cdw; /* round up min_nr to align the state */ align_min_nr = (min_nr + 1) & ~1; #if RADEON_OLD_PACKETS BEGIN_BATCH_NO_AUTOSTATE(2+ELTS_BUFSZ(align_min_nr)/4); OUT_BATCH_PACKET3_CLIP(RADEON_CP_PACKET3_3D_RNDR_GEN_INDX_PRIM, 0); OUT_BATCH(rmesa->ioctl.vertex_offset); OUT_BATCH(rmesa->ioctl.vertex_max); OUT_BATCH(vertex_format); OUT_BATCH(primitive | RADEON_CP_VC_CNTL_PRIM_WALK_IND | RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA | RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE); #else BEGIN_BATCH_NO_AUTOSTATE(ELTS_BUFSZ(align_min_nr)/4); OUT_BATCH_PACKET3_CLIP(RADEON_CP_PACKET3_DRAW_INDX, 0); OUT_BATCH(vertex_format); OUT_BATCH(primitive | RADEON_CP_VC_CNTL_PRIM_WALK_IND | RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA | RADEON_CP_VC_CNTL_MAOS_ENABLE | RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE); #endif rmesa->tcl.elt_cmd_offset = rmesa->radeon.cmdbuf.cs->cdw; rmesa->tcl.elt_used = min_nr; retval = (GLushort *)(rmesa->radeon.cmdbuf.cs->packets + rmesa->tcl.elt_cmd_offset); if (RADEON_DEBUG & RADEON_RENDER) fprintf(stderr, "%s: header prim %x \n", __FUNCTION__, primitive); assert(!rmesa->radeon.dma.flush); rmesa->radeon.glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES; rmesa->radeon.dma.flush = radeonFlushElts; return retval; }
static GLushort *r200AllocElts( r200ContextPtr rmesa, GLuint nr ) { if (rmesa->dma.flush == r200FlushElts && rmesa->store.cmd_used + nr*2 < R200_CMD_BUF_SZ) { GLushort *dest = (GLushort *)(rmesa->store.cmd_buf + rmesa->store.cmd_used); rmesa->store.cmd_used += nr*2; return dest; } else { if (rmesa->dma.flush) rmesa->dma.flush( rmesa ); r200EnsureCmdBufSpace( rmesa, AOS_BUFSZ(rmesa->tcl.nr_aos_components) + rmesa->hw.max_state_size + ELTS_BUFSZ(nr) ); r200EmitAOS( rmesa, rmesa->tcl.aos_components, rmesa->tcl.nr_aos_components, 0 ); return r200AllocEltsOpenEnded( rmesa, rmesa->tcl.hw_primitive, nr ); } }
/** * Predict total emit size for next rendering operation so there is no flush in middle of rendering * Prediction has to aim towards the best possible value that is worse than worst case scenario */ static GLuint r200EnsureEmitSize( struct gl_context * ctx , GLubyte* vimap_rev ) { r200ContextPtr rmesa = R200_CONTEXT(ctx); TNLcontext *tnl = TNL_CONTEXT(ctx); struct vertex_buffer *VB = &tnl->vb; GLuint space_required; GLuint state_size; GLuint nr_aos = 0; int i; /* predict number of aos to emit */ for (i = 0; i < 15; ++i) { if (vimap_rev[i] != 255) { ++nr_aos; } } { /* count the prediction for state size */ space_required = 0; state_size = radeonCountStateEmitSize( &rmesa->radeon ); /* vtx may be changed in r200EmitArrays so account for it if not dirty */ if (!rmesa->hw.vtx.dirty) state_size += rmesa->hw.vtx.check(&rmesa->radeon.glCtx, &rmesa->hw.vtx); /* predict size for elements */ for (i = 0; i < VB->PrimitiveCount; ++i) { if (!VB->Primitive[i].count) continue; /* If primitive.count is less than MAX_CONVERSION_SIZE rendering code may decide convert to elts. In that case we have to make pessimistic prediction. and use larger of 2 paths. */ const GLuint elt_count =(VB->Primitive[i].count/GET_MAX_HW_ELTS() + 1); const GLuint elts = ELTS_BUFSZ(nr_aos) * elt_count; const GLuint index = INDEX_BUFSZ * elt_count; const GLuint vbuf = VBUF_BUFSZ; if ( (!VB->Elts && VB->Primitive[i].count >= MAX_CONVERSION_SIZE) || vbuf > index + elts) space_required += vbuf; else space_required += index + elts; space_required += AOS_BUFSZ(nr_aos); } } radeon_print(RADEON_RENDER,RADEON_VERBOSE, "%s space %u, aos %d\n", __func__, space_required, AOS_BUFSZ(nr_aos) ); /* flush the buffer in case we need more than is left. */ if (rcommonEnsureCmdBufSpace(&rmesa->radeon, space_required + state_size, __func__)) return space_required + radeonCountStateEmitSize( &rmesa->radeon ); else return space_required + state_size; }
static GLushort *radeonAllocElts( radeonContextPtr rmesa, GLuint nr ) { if (rmesa->dma.flush) rmesa->dma.flush( rmesa ); radeonEnsureCmdBufSpace(rmesa, AOS_BUFSZ(rmesa->tcl.nr_aos_components) + rmesa->hw.max_state_size + ELTS_BUFSZ(nr)); radeonEmitAOS( rmesa, rmesa->tcl.aos_components, rmesa->tcl.nr_aos_components, 0 ); return radeonAllocEltsOpenEnded( rmesa, rmesa->tcl.vertex_format, rmesa->tcl.hw_primitive, nr ); }
/** * Predict total emit size for next rendering operation so there is no flush in middle of rendering * Prediction has to aim towards the best possible value that is worse than worst case scenario */ static GLuint radeonEnsureEmitSize( struct gl_context * ctx , GLuint inputs ) { r100ContextPtr rmesa = R100_CONTEXT(ctx); TNLcontext *tnl = TNL_CONTEXT(ctx); struct vertex_buffer *VB = &tnl->vb; GLuint space_required; GLuint state_size; GLuint nr_aos = 1; /* radeonEmitArrays does always emit one */ int i; /* list of flags that are allocating aos object */ const GLuint flags_to_check[] = { VERT_BIT_NORMAL, VERT_BIT_COLOR0, VERT_BIT_COLOR1, VERT_BIT_FOG }; /* predict number of aos to emit */ for (i=0; i < sizeof(flags_to_check)/sizeof(flags_to_check[0]); ++i) { if (inputs & flags_to_check[i]) ++nr_aos; } for (i = 0; i < ctx->Const.MaxTextureUnits; ++i) { if (inputs & VERT_BIT_TEX(i)) ++nr_aos; } { /* count the prediction for state size */ space_required = 0; state_size = radeonCountStateEmitSize( &rmesa->radeon ); /* tcl may be changed in radeonEmitArrays so account for it if not dirty */ if (!rmesa->hw.tcl.dirty) state_size += rmesa->hw.tcl.check( rmesa->radeon.glCtx, &rmesa->hw.tcl ); /* predict size for elements */ for (i = 0; i < VB->PrimitiveCount; ++i) { if (!VB->Primitive[i].count) continue; /* If primitive.count is less than MAX_CONVERSION_SIZE rendering code may decide convert to elts. In that case we have to make pessimistic prediction. and use larger of 2 paths. */ const GLuint elts = ELTS_BUFSZ(nr_aos); const GLuint index = INDEX_BUFSZ; const GLuint vbuf = VBUF_BUFSZ; if ( (!VB->Elts && VB->Primitive[i].count >= MAX_CONVERSION_SIZE) || vbuf > index + elts) space_required += vbuf; else space_required += index + elts; space_required += VB->Primitive[i].count * 3; space_required += AOS_BUFSZ(nr_aos); } space_required += SCISSOR_BUFSZ; } /* flush the buffer in case we need more than is left. */ if (rcommonEnsureCmdBufSpace(&rmesa->radeon, space_required, __FUNCTION__)) return space_required + radeonCountStateEmitSize( &rmesa->radeon ); else return space_required + state_size; }