static void r300_predict_emit_size( r300ContextPtr rmesa ) { if (!rmesa->radeon.swtcl.emit_prediction) { const int vertex_size = 7; const int prim_size = 3; const int cache_flush_size = 4; const int pre_emit_state = 4; const int scissor_size = 3; const int state_size = radeonCountStateEmitSize(&rmesa->radeon); if (rcommonEnsureCmdBufSpace(&rmesa->radeon, state_size + pre_emit_state + scissor_size + vertex_size + prim_size + cache_flush_size * 2, __FUNCTION__)) rmesa->radeon.swtcl.emit_prediction = radeonCountStateEmitSize(&rmesa->radeon); else rmesa->radeon.swtcl.emit_prediction = state_size; rmesa->radeon.swtcl.emit_prediction += rmesa->radeon.cmdbuf.cs->cdw + vertex_size + scissor_size + prim_size + cache_flush_size * 2 + pre_emit_state; radeon_print(RADEON_SWRENDER, RADEON_VERBOSE, "%s, size %d\n", __func__, rmesa->radeon.cmdbuf.cs->cdw + vertex_size + scissor_size + prim_size + cache_flush_size * 2 + pre_emit_state); } }
/** * Predict total emit size for next rendering operation so there is no flush in middle of rendering * Prediction has to aim towards the best possible value that is worse than worst case scenario */ static GLuint r200EnsureEmitSize( struct gl_context * ctx , GLubyte* vimap_rev ) { r200ContextPtr rmesa = R200_CONTEXT(ctx); TNLcontext *tnl = TNL_CONTEXT(ctx); struct vertex_buffer *VB = &tnl->vb; GLuint space_required; GLuint state_size; GLuint nr_aos = 0; int i; /* predict number of aos to emit */ for (i = 0; i < 15; ++i) { if (vimap_rev[i] != 255) { ++nr_aos; } } { /* count the prediction for state size */ space_required = 0; state_size = radeonCountStateEmitSize( &rmesa->radeon ); /* vtx may be changed in r200EmitArrays so account for it if not dirty */ if (!rmesa->hw.vtx.dirty) state_size += rmesa->hw.vtx.check(&rmesa->radeon.glCtx, &rmesa->hw.vtx); /* predict size for elements */ for (i = 0; i < VB->PrimitiveCount; ++i) { if (!VB->Primitive[i].count) continue; /* If primitive.count is less than MAX_CONVERSION_SIZE rendering code may decide convert to elts. In that case we have to make pessimistic prediction. and use larger of 2 paths. */ const GLuint elt_count =(VB->Primitive[i].count/GET_MAX_HW_ELTS() + 1); const GLuint elts = ELTS_BUFSZ(nr_aos) * elt_count; const GLuint index = INDEX_BUFSZ * elt_count; const GLuint vbuf = VBUF_BUFSZ; if ( (!VB->Elts && VB->Primitive[i].count >= MAX_CONVERSION_SIZE) || vbuf > index + elts) space_required += vbuf; else space_required += index + elts; space_required += AOS_BUFSZ(nr_aos); } } radeon_print(RADEON_RENDER,RADEON_VERBOSE, "%s space %u, aos %d\n", __func__, space_required, AOS_BUFSZ(nr_aos) ); /* flush the buffer in case we need more than is left. */ if (rcommonEnsureCmdBufSpace(&rmesa->radeon, space_required + state_size, __func__)) return space_required + radeonCountStateEmitSize( &rmesa->radeon ); else return space_required + state_size; }
static void radeon_predict_emit_size( r100ContextPtr rmesa ) { if (!rmesa->radeon.swtcl.emit_prediction) { const int state_size = radeonCountStateEmitSize( &rmesa->radeon ); const int scissor_size = 8; const int prims_size = 8; const int vertex_size = 7; if (rcommonEnsureCmdBufSpace(&rmesa->radeon, state_size + (scissor_size + prims_size + vertex_size), __func__)) rmesa->radeon.swtcl.emit_prediction = radeonCountStateEmitSize( &rmesa->radeon ); else rmesa->radeon.swtcl.emit_prediction = state_size; rmesa->radeon.swtcl.emit_prediction += scissor_size + prims_size + vertex_size + rmesa->radeon.cmdbuf.cs->cdw; } }
static GLuint evergreenPredictRenderSize(GLcontext* ctx, const struct _mesa_prim *prim, const struct _mesa_index_buffer *ib, GLuint nr_prims) { context_t *context = EVERGREEN_CONTEXT(ctx); GLboolean flushed; GLuint dwords, i; GLuint state_size; dwords = PRE_EMIT_STATE_BUFSZ; if (ib) dwords += nr_prims * 18; else { for (i = 0; i < nr_prims; ++i) { if (prim[i].start == 0) dwords += 14; else if (prim[i].count > 0xffff) dwords += prim[i].count + 14; else dwords += ((prim[i].count + 1) / 2) + 14; } } state_size = radeonCountStateEmitSize(&context->radeon); flushed = rcommonEnsureCmdBufSpace(&context->radeon, dwords + state_size, __FUNCTION__); if (flushed) dwords += radeonCountStateEmitSize(&context->radeon); else dwords += state_size; radeon_print(RADEON_RENDER, RADEON_VERBOSE, "%s: total prediction size is %d.\n", __FUNCTION__, dwords); return dwords; }
static GLuint r300PredictTryDrawPrimsSize(GLcontext *ctx, GLuint nr_prims, const struct _mesa_prim *prim) { struct r300_context *r300 = R300_CONTEXT(ctx); struct r300_vertex_buffer *vbuf = &r300->vbuf; GLboolean flushed; GLuint dwords; GLuint state_size; int i; GLuint extra_prims = 0; /* Check for primitive splitting. */ for (i = 0; i < nr_prims; ++i) { const GLuint num_verts = r300NumVerts(r300, prim[i].count, prim[i].mode); extra_prims += num_verts/(65535 - 32); } nr_prims += extra_prims; dwords = 2*CACHE_FLUSH_BUFSZ; dwords += PRE_EMIT_STATE_BUFSZ; dwords += (AOS_BUFSZ(vbuf->num_attribs) + SCISSORS_BUFSZ*2 + FIREAOS_BUFSZ )*nr_prims; state_size = radeonCountStateEmitSize(&r300->radeon); flushed = rcommonEnsureCmdBufSpace(&r300->radeon, dwords + state_size, __FUNCTION__); if (flushed) dwords += radeonCountStateEmitSize(&r300->radeon); else dwords += state_size; radeon_print(RADEON_RENDER, RADEON_VERBOSE, "%s: total prediction size is %d.\n", __FUNCTION__, dwords); return dwords; }
/** * Copy a region of [@a width x @a height] pixels from source buffer * to destination buffer. * @param[in] r200 r200 context * @param[in] src_bo source radeon buffer object * @param[in] src_offset offset of the source image in the @a src_bo * @param[in] src_mesaformat source image format * @param[in] src_pitch aligned source image width * @param[in] src_width source image width * @param[in] src_height source image height * @param[in] src_x_offset x offset in the source image * @param[in] src_y_offset y offset in the source image * @param[in] dst_bo destination radeon buffer object * @param[in] dst_offset offset of the destination image in the @a dst_bo * @param[in] dst_mesaformat destination image format * @param[in] dst_pitch aligned destination image width * @param[in] dst_width destination image width * @param[in] dst_height destination image height * @param[in] dst_x_offset x offset in the destination image * @param[in] dst_y_offset y offset in the destination image * @param[in] width region width * @param[in] height region height * @param[in] flip_y set if y coords of the source image need to be flipped */ unsigned r200_blit(GLcontext *ctx, struct radeon_bo *src_bo, intptr_t src_offset, gl_format src_mesaformat, unsigned src_pitch, unsigned src_width, unsigned src_height, unsigned src_x_offset, unsigned src_y_offset, struct radeon_bo *dst_bo, intptr_t dst_offset, gl_format dst_mesaformat, unsigned dst_pitch, unsigned dst_width, unsigned dst_height, unsigned dst_x_offset, unsigned dst_y_offset, unsigned reg_width, unsigned reg_height, unsigned flip_y) { struct r200_context *r200 = R200_CONTEXT(ctx); if (!r200_check_blit(dst_mesaformat)) return GL_FALSE; /* Make sure that colorbuffer has even width - hw limitation */ if (dst_pitch % 2 > 0) ++dst_pitch; /* Rendering to small buffer doesn't work. * Looks like a hw limitation. */ if (dst_pitch < 32) return GL_FALSE; /* Need to clamp the region size to make sure * we don't read outside of the source buffer * or write outside of the destination buffer. */ if (reg_width + src_x_offset > src_width) reg_width = src_width - src_x_offset; if (reg_height + src_y_offset > src_height) reg_height = src_height - src_y_offset; if (reg_width + dst_x_offset > dst_width) reg_width = dst_width - dst_x_offset; if (reg_height + dst_y_offset > dst_height) reg_height = dst_height - dst_y_offset; if (src_bo == dst_bo) { return GL_FALSE; } if (src_offset % 32 || dst_offset % 32) { return GL_FALSE; } if (0) { fprintf(stderr, "src: size [%d x %d], pitch %d, " "offset [%d x %d], format %s, bo %p\n", src_width, src_height, src_pitch, src_x_offset, src_y_offset, _mesa_get_format_name(src_mesaformat), src_bo); fprintf(stderr, "dst: pitch %d, offset[%d x %d], format %s, bo %p\n", dst_pitch, dst_x_offset, dst_y_offset, _mesa_get_format_name(dst_mesaformat), dst_bo); fprintf(stderr, "region: %d x %d\n", reg_width, reg_height); } /* Flush is needed to make sure that source buffer has correct data */ radeonFlush(r200->radeon.glCtx); rcommonEnsureCmdBufSpace(&r200->radeon, 78, __FUNCTION__); if (!validate_buffers(r200, src_bo, dst_bo)) return GL_FALSE; /* 14 */ emit_vtx_state(r200); /* 28 */ emit_tx_setup(r200, src_mesaformat, src_bo, src_offset, src_width, src_height, src_pitch); /* 22 */ emit_cb_setup(r200, dst_bo, dst_offset, dst_mesaformat, dst_pitch, dst_width, dst_height); /* 14 */ emit_draw_packet(r200, src_width, src_height, src_x_offset, src_y_offset, dst_x_offset, dst_y_offset, reg_width, reg_height, flip_y); radeonFlush(ctx); return GL_TRUE; }
/** * Predict total emit size for next rendering operation so there is no flush in middle of rendering * Prediction has to aim towards the best possible value that is worse than worst case scenario */ static GLuint radeonEnsureEmitSize( struct gl_context * ctx , GLuint inputs ) { r100ContextPtr rmesa = R100_CONTEXT(ctx); TNLcontext *tnl = TNL_CONTEXT(ctx); struct vertex_buffer *VB = &tnl->vb; GLuint space_required; GLuint state_size; GLuint nr_aos = 1; /* radeonEmitArrays does always emit one */ int i; /* list of flags that are allocating aos object */ const GLuint flags_to_check[] = { VERT_BIT_NORMAL, VERT_BIT_COLOR0, VERT_BIT_COLOR1, VERT_BIT_FOG }; /* predict number of aos to emit */ for (i=0; i < sizeof(flags_to_check)/sizeof(flags_to_check[0]); ++i) { if (inputs & flags_to_check[i]) ++nr_aos; } for (i = 0; i < ctx->Const.MaxTextureUnits; ++i) { if (inputs & VERT_BIT_TEX(i)) ++nr_aos; } { /* count the prediction for state size */ space_required = 0; state_size = radeonCountStateEmitSize( &rmesa->radeon ); /* tcl may be changed in radeonEmitArrays so account for it if not dirty */ if (!rmesa->hw.tcl.dirty) state_size += rmesa->hw.tcl.check( rmesa->radeon.glCtx, &rmesa->hw.tcl ); /* predict size for elements */ for (i = 0; i < VB->PrimitiveCount; ++i) { if (!VB->Primitive[i].count) continue; /* If primitive.count is less than MAX_CONVERSION_SIZE rendering code may decide convert to elts. In that case we have to make pessimistic prediction. and use larger of 2 paths. */ const GLuint elts = ELTS_BUFSZ(nr_aos); const GLuint index = INDEX_BUFSZ; const GLuint vbuf = VBUF_BUFSZ; if ( (!VB->Elts && VB->Primitive[i].count >= MAX_CONVERSION_SIZE) || vbuf > index + elts) space_required += vbuf; else space_required += index + elts; space_required += VB->Primitive[i].count * 3; space_required += AOS_BUFSZ(nr_aos); } space_required += SCISSOR_BUFSZ; } /* flush the buffer in case we need more than is left. */ if (rcommonEnsureCmdBufSpace(&rmesa->radeon, space_required, __FUNCTION__)) return space_required + radeonCountStateEmitSize( &rmesa->radeon ); else return space_required + state_size; }