/* May fail if out of video memory for texture or vbo upload, or on * fallback conditions. */ static bool brw_try_draw_prims( struct gl_context *ctx, const struct gl_client_array *arrays[], const struct _mesa_prim *prim, GLuint nr_prims, const struct _mesa_index_buffer *ib, GLuint min_index, GLuint max_index ) { struct intel_context *intel = intel_context(ctx); struct brw_context *brw = brw_context(ctx); bool retval = true; GLuint i; bool fail_next = false; if (ctx->NewState) _mesa_update_state( ctx ); /* We have to validate the textures *before* checking for fallbacks; * otherwise, the software fallback won't be able to rely on the * texture state, the firstLevel and lastLevel fields won't be * set in the intel texture object (they'll both be 0), and the * software fallback will segfault if it attempts to access any * texture level other than level 0. */ brw_validate_textures( brw ); /* Resolves must occur after updating state and finalizing textures but * before setting up any hardware state for this draw call. */ brw_predraw_resolve_buffers(brw); /* Bind all inputs, derive varying and size information: */ brw_merge_inputs( brw, arrays ); brw->ib.ib = ib; brw->state.dirty.brw |= BRW_NEW_INDICES; brw->vb.min_index = min_index; brw->vb.max_index = max_index; brw->state.dirty.brw |= BRW_NEW_VERTICES; /* Have to validate state quite late. Will rebuild tnl_program, * which depends on varying information. * * Note this is where brw->vs->prog_data.inputs_read is calculated, * so can't access it earlier. */ intel_prepare_render(intel); for (i = 0; i < nr_prims; i++) { int estimated_max_prim_size; estimated_max_prim_size = 512; /* batchbuffer commands */ estimated_max_prim_size += (BRW_MAX_TEX_UNIT * (sizeof(struct brw_sampler_state) + sizeof(struct gen5_sampler_default_color))); estimated_max_prim_size += 1024; /* gen6 VS push constants */ estimated_max_prim_size += 1024; /* gen6 WM push constants */ estimated_max_prim_size += 512; /* misc. pad */ /* Flush the batch if it's approaching full, so that we don't wrap while * we've got validated state that needs to be in the same batch as the * primitives. */ intel_batchbuffer_require_space(intel, estimated_max_prim_size, false); intel_batchbuffer_save_state(intel); if (intel->gen < 6) brw_set_prim(brw, &prim[i]); else gen6_set_prim(brw, &prim[i]); retry: /* Note that before the loop, brw->state.dirty.brw was set to != 0, and * that the state updated in the loop outside of this block is that in * *_set_prim or intel_batchbuffer_flush(), which only impacts * brw->state.dirty.brw. */ if (brw->state.dirty.brw) { intel->no_batch_wrap = true; brw_upload_state(brw); if (unlikely(brw->intel.Fallback)) { intel->no_batch_wrap = false; retval = false; goto out; } } if (intel->gen >= 7) gen7_emit_prim(brw, &prim[i], brw->primitive); else brw_emit_prim(brw, &prim[i], brw->primitive); intel->no_batch_wrap = false; if (dri_bufmgr_check_aperture_space(&intel->batch.bo, 1)) { if (!fail_next) { intel_batchbuffer_reset_to_saved(intel); intel_batchbuffer_flush(intel); fail_next = true; goto retry; } else { if (intel_batchbuffer_flush(intel) == -ENOSPC) { static bool warned = false; if (!warned) { fprintf(stderr, "i965: Single primitive emit exceeded" "available aperture space\n"); warned = true; } retval = false; } } } if (!_mesa_meta_in_progress(ctx)) brw_update_primitive_count(brw, &prim[i]); } if (intel->always_flush_batch) intel_batchbuffer_flush(intel); out: brw_state_cache_check_size(brw); brw_postdraw_set_buffers_need_resolve(brw); return retval; }
/* May fail if out of video memory for texture or vbo upload, or on * fallback conditions. */ static void brw_try_draw_prims(struct gl_context *ctx, const struct gl_client_array *arrays[], const struct _mesa_prim *prims, GLuint nr_prims, const struct _mesa_index_buffer *ib, GLuint min_index, GLuint max_index, struct gl_buffer_object *indirect) { struct brw_context *brw = brw_context(ctx); GLuint i; bool fail_next = false; if (ctx->NewState) _mesa_update_state(ctx); /* Find the highest sampler unit used by each shader program. A bit-count * won't work since ARB programs use the texture unit number as the sampler * index. */ brw->wm.base.sampler_count = _mesa_fls(ctx->FragmentProgram._Current->Base.SamplersUsed); brw->gs.base.sampler_count = ctx->GeometryProgram._Current ? _mesa_fls(ctx->GeometryProgram._Current->Base.SamplersUsed) : 0; brw->vs.base.sampler_count = _mesa_fls(ctx->VertexProgram._Current->Base.SamplersUsed); /* We have to validate the textures *before* checking for fallbacks; * otherwise, the software fallback won't be able to rely on the * texture state, the firstLevel and lastLevel fields won't be * set in the intel texture object (they'll both be 0), and the * software fallback will segfault if it attempts to access any * texture level other than level 0. */ brw_validate_textures(brw); intel_prepare_render(brw); /* This workaround has to happen outside of brw_upload_render_state() * because it may flush the batchbuffer for a blit, affecting the state * flags. */ brw_workaround_depthstencil_alignment(brw, 0); /* Bind all inputs, derive varying and size information: */ brw_merge_inputs(brw, arrays); brw->ib.ib = ib; brw->ctx.NewDriverState |= BRW_NEW_INDICES; brw->vb.min_index = min_index; brw->vb.max_index = max_index; brw->ctx.NewDriverState |= BRW_NEW_VERTICES; for (i = 0; i < nr_prims; i++) { int estimated_max_prim_size; const int sampler_state_size = 16; estimated_max_prim_size = 512; /* batchbuffer commands */ estimated_max_prim_size += BRW_MAX_TEX_UNIT * (sampler_state_size + sizeof(struct gen5_sampler_default_color)); estimated_max_prim_size += 1024; /* gen6 VS push constants */ estimated_max_prim_size += 1024; /* gen6 WM push constants */ estimated_max_prim_size += 512; /* misc. pad */ /* Flush the batch if it's approaching full, so that we don't wrap while * we've got validated state that needs to be in the same batch as the * primitives. */ intel_batchbuffer_require_space(brw, estimated_max_prim_size, RENDER_RING); intel_batchbuffer_save_state(brw); if (brw->num_instances != prims[i].num_instances || brw->basevertex != prims[i].basevertex) { brw->num_instances = prims[i].num_instances; brw->basevertex = prims[i].basevertex; if (i > 0) { /* For i == 0 we just did this before the loop */ brw->ctx.NewDriverState |= BRW_NEW_VERTICES; brw_merge_inputs(brw, arrays); } } brw->draw.gl_basevertex = prims[i].indexed ? prims[i].basevertex : prims[i].start; drm_intel_bo_unreference(brw->draw.draw_params_bo); if (prims[i].is_indirect) { /* Point draw_params_bo at the indirect buffer. */ brw->draw.draw_params_bo = intel_buffer_object(ctx->DrawIndirectBuffer)->buffer; drm_intel_bo_reference(brw->draw.draw_params_bo); brw->draw.draw_params_offset = prims[i].indirect_offset + (prims[i].indexed ? 12 : 8); } else { /* Set draw_params_bo to NULL so brw_prepare_vertices knows it * has to upload gl_BaseVertex and such if they're needed. */ brw->draw.draw_params_bo = NULL; brw->draw.draw_params_offset = 0; } if (brw->gen < 6) brw_set_prim(brw, &prims[i]); else gen6_set_prim(brw, &prims[i]); retry: /* Note that before the loop, brw->ctx.NewDriverState was set to != 0, and * that the state updated in the loop outside of this block is that in * *_set_prim or intel_batchbuffer_flush(), which only impacts * brw->ctx.NewDriverState. */ if (brw->ctx.NewDriverState) { brw->no_batch_wrap = true; brw_upload_render_state(brw); } brw_emit_prim(brw, &prims[i], brw->primitive); brw->no_batch_wrap = false; if (dri_bufmgr_check_aperture_space(&brw->batch.bo, 1)) { if (!fail_next) { intel_batchbuffer_reset_to_saved(brw); intel_batchbuffer_flush(brw); fail_next = true; goto retry; } else { int ret = intel_batchbuffer_flush(brw); WARN_ONCE(ret == -ENOSPC, "i965: Single primitive emit exceeded " "available aperture space\n"); } } /* Now that we know we haven't run out of aperture space, we can safely * reset the dirty bits. */ if (brw->ctx.NewDriverState) brw_render_state_finished(brw); } if (brw->always_flush_batch) intel_batchbuffer_flush(brw); brw_state_cache_check_size(brw); brw_postdraw_set_buffers_need_resolve(brw); return; }
/* May fail if out of video memory for texture or vbo upload, or on * fallback conditions. */ static bool brw_try_draw_prims( struct gl_context *ctx, const struct gl_client_array *arrays[], const struct _mesa_prim *prims, GLuint nr_prims, const struct _mesa_index_buffer *ib, GLuint min_index, GLuint max_index, struct gl_buffer_object *indirect) { struct brw_context *brw = brw_context(ctx); bool retval = true; GLuint i; bool fail_next = false; if (ctx->NewState) _mesa_update_state( ctx ); /* Find the highest sampler unit used by each shader program. A bit-count * won't work since ARB programs use the texture unit number as the sampler * index. */ brw->wm.base.sampler_count = _mesa_fls(ctx->FragmentProgram._Current->Base.SamplersUsed); brw->gs.base.sampler_count = ctx->GeometryProgram._Current ? _mesa_fls(ctx->GeometryProgram._Current->Base.SamplersUsed) : 0; brw->vs.base.sampler_count = _mesa_fls(ctx->VertexProgram._Current->Base.SamplersUsed); /* We have to validate the textures *before* checking for fallbacks; * otherwise, the software fallback won't be able to rely on the * texture state, the firstLevel and lastLevel fields won't be * set in the intel texture object (they'll both be 0), and the * software fallback will segfault if it attempts to access any * texture level other than level 0. */ brw_validate_textures( brw ); intel_prepare_render(brw); /* This workaround has to happen outside of brw_upload_state() because it * may flush the batchbuffer for a blit, affecting the state flags. */ brw_workaround_depthstencil_alignment(brw, 0); /* Resolves must occur after updating renderbuffers, updating context state, * and finalizing textures but before setting up any hardware state for * this draw call. */ brw_predraw_resolve_buffers(brw); /* Bind all inputs, derive varying and size information: */ brw_merge_inputs( brw, arrays ); brw->ib.ib = ib; brw->state.dirty.brw |= BRW_NEW_INDICES; brw->vb.min_index = min_index; brw->vb.max_index = max_index; brw->state.dirty.brw |= BRW_NEW_VERTICES; for (i = 0; i < nr_prims; i++) { int estimated_max_prim_size; estimated_max_prim_size = 512; /* batchbuffer commands */ estimated_max_prim_size += (BRW_MAX_TEX_UNIT * (sizeof(struct brw_sampler_state) + sizeof(struct gen5_sampler_default_color))); estimated_max_prim_size += 1024; /* gen6 VS push constants */ estimated_max_prim_size += 1024; /* gen6 WM push constants */ estimated_max_prim_size += 512; /* misc. pad */ /* Flush the batch if it's approaching full, so that we don't wrap while * we've got validated state that needs to be in the same batch as the * primitives. */ intel_batchbuffer_require_space(brw, estimated_max_prim_size, RENDER_RING); intel_batchbuffer_save_state(brw); if (brw->num_instances != prims[i].num_instances) { brw->num_instances = prims[i].num_instances; brw->state.dirty.brw |= BRW_NEW_VERTICES; brw_merge_inputs(brw, arrays); } if (brw->basevertex != prims[i].basevertex) { brw->basevertex = prims[i].basevertex; brw->state.dirty.brw |= BRW_NEW_VERTICES; brw_merge_inputs(brw, arrays); } if (brw->gen < 6) brw_set_prim(brw, &prims[i]); else gen6_set_prim(brw, &prims[i]); retry: /* Note that before the loop, brw->state.dirty.brw was set to != 0, and * that the state updated in the loop outside of this block is that in * *_set_prim or intel_batchbuffer_flush(), which only impacts * brw->state.dirty.brw. */ if (brw->state.dirty.brw) { brw->no_batch_wrap = true; brw_upload_state(brw); } brw_emit_prim(brw, &prims[i], brw->primitive); brw->no_batch_wrap = false; if (dri_bufmgr_check_aperture_space(&brw->batch.bo, 1)) { if (!fail_next) { intel_batchbuffer_reset_to_saved(brw); intel_batchbuffer_flush(brw); fail_next = true; goto retry; } else { if (intel_batchbuffer_flush(brw) == -ENOSPC) { static bool warned = false; if (!warned) { fprintf(stderr, "i965: Single primitive emit exceeded" "available aperture space\n"); warned = true; } retval = false; } } } } if (brw->always_flush_batch) intel_batchbuffer_flush(brw); brw_state_cache_check_size(brw); brw_postdraw_set_buffers_need_resolve(brw); return retval; }
/* May fail if out of video memory for texture or vbo upload, or on * fallback conditions. */ static GLboolean brw_try_draw_prims( GLcontext *ctx, const struct gl_client_array *arrays[], const struct _mesa_prim *prim, GLuint nr_prims, const struct _mesa_index_buffer *ib, GLuint min_index, GLuint max_index ) { struct intel_context *intel = intel_context(ctx); struct brw_context *brw = brw_context(ctx); GLboolean retval = GL_FALSE; GLuint i, j; if (ctx->NewState) _mesa_update_state( ctx ); /* Bind all inputs, derive varying and size information: */ brw_merge_inputs( brw, arrays ); /* Have to validate state quite late. Will rebuild tnl_program, * which depends on varying information. * * Note this is where brw->vs->prog_data.inputs_read is calculated, * so can't access it earlier. */ LOCK_HARDWARE(intel); if (brw->intel.numClipRects == 0) { assert(intel->batch->ptr == intel->batch->map + intel->batch->offset); UNLOCK_HARDWARE(intel); return GL_TRUE; } { /* Set the first primitive early, ahead of validate_state: */ brw_set_prim(brw, prim[0].mode); /* XXX: Need to separate validate and upload of state. */ brw_validate_state( brw ); /* Various fallback checks: */ if (brw->intel.Fallback) goto out; if (check_fallbacks( brw, prim, nr_prims )) goto out; /* Upload index, vertex data: */ if (ib) brw_upload_indices( brw, ib ); if (!brw_upload_vertices( brw, min_index, max_index)) { goto out; } /* For single cliprect, state is already emitted: */ if (brw->intel.numClipRects == 1) { for (i = 0; i < nr_prims; i++) { brw_emit_prim(brw, &prim[i]); } } else { /* Otherwise, explicitly do the cliprects at this point: */ for (j = 0; j < brw->intel.numClipRects; j++) { brw_emit_cliprect(brw, &brw->intel.pClipRects[j]); /* Emit prims to batchbuffer: */ for (i = 0; i < nr_prims; i++) { brw_emit_prim(brw, &prim[i]); } } } intel->need_flush = GL_TRUE; retval = GL_TRUE; } out: /* Currently have to do this to synchronize with the map/unmap of * the vertex buffer in brw_exec_api.c. Not sure if there is any * way around this, as not every flush is due to a buffer filling * up. */ if (!intel_batchbuffer_flush( brw->intel.batch )) { DBG("%s intel_batchbuffer_flush failed\n", __FUNCTION__); retval = GL_FALSE; } if (retval && intel->thrashing) { bmSetFence(intel); } /* Free any old data so it doesn't clog up texture memory - we * won't be referencing it again. */ while (brw->vb.upload.wrap != brw->vb.upload.buf) { ctx->Driver.BufferData(ctx, GL_ARRAY_BUFFER_ARB, BRW_UPLOAD_INIT_SIZE, NULL, GL_DYNAMIC_DRAW_ARB, brw->vb.upload.vbo[brw->vb.upload.wrap]); brw->vb.upload.wrap++; brw->vb.upload.wrap %= BRW_NR_UPLOAD_BUFS; } UNLOCK_HARDWARE(intel); if (!retval) DBG("%s failed\n", __FUNCTION__); return retval; }
/* May fail if out of video memory for texture or vbo upload, or on * fallback conditions. */ static GLboolean brw_try_draw_prims( GLcontext *ctx, const struct gl_client_array *arrays[], const struct _mesa_prim *prim, GLuint nr_prims, const struct _mesa_index_buffer *ib, GLuint min_index, GLuint max_index ) { struct intel_context *intel = intel_context(ctx); struct brw_context *brw = brw_context(ctx); GLboolean retval = GL_FALSE; GLuint i; GLuint ib_offset; dri_bo *ib_bo; GLboolean force_flush = GL_FALSE; int ret; if (ctx->NewState) _mesa_update_state( ctx ); brw_validate_textures( brw ); /* Bind all inputs, derive varying and size information: */ brw_merge_inputs( brw, arrays ); /* Have to validate state quite late. Will rebuild tnl_program, * which depends on varying information. * * Note this is where brw->vs->prog_data.inputs_read is calculated, * so can't access it earlier. */ LOCK_HARDWARE(intel); if (brw->intel.numClipRects == 0) { UNLOCK_HARDWARE(intel); return GL_TRUE; } { /* Flush the batch if it's approaching full, so that we don't wrap while * we've got validated state that needs to be in the same batch as the * primitives. This fraction is just a guess (minimal full state plus * a primitive is around 512 bytes), and would be better if we had * an upper bound of how much we might emit in a single * brw_try_draw_prims(). */ flush: if (force_flush) brw->no_batch_wrap = GL_FALSE; if (intel->batch->ptr - intel->batch->map > intel->batch->size * 3 / 4 /* brw_emit_prim may change the cliprect_mode to LOOP_CLIPRECTS */ || intel->batch->cliprect_mode != LOOP_CLIPRECTS || (force_flush == GL_TRUE)) intel_batchbuffer_flush(intel->batch); force_flush = GL_FALSE; brw->no_batch_wrap = GL_TRUE; /* Set the first primitive early, ahead of validate_state: */ brw_set_prim(brw, prim[0].mode, &force_flush); /* XXX: Need to separate validate and upload of state. */ ret = brw_validate_state( brw ); if (ret) { force_flush = GL_TRUE; goto flush; } /* Various fallback checks: */ if (brw->intel.Fallback) goto out; if (check_fallbacks( brw, prim, nr_prims )) goto out; /* need to account for index buffer and vertex buffer */ if (ib) { ret = brw_prepare_indices( brw, ib , &ib_bo, &ib_offset); if (ret) { force_flush = GL_TRUE; goto flush; } } ret = brw_prepare_vertices( brw, min_index, max_index); if (ret < 0) goto out; if (ret > 0) { force_flush = GL_TRUE; goto flush; } /* Upload index, vertex data: */ if (ib) brw_emit_indices( brw, ib, ib_bo, ib_offset); brw_emit_vertices( brw, min_index, max_index); for (i = 0; i < nr_prims; i++) { brw_emit_prim(brw, &prim[i]); } retval = GL_TRUE; } out: brw->no_batch_wrap = GL_FALSE; UNLOCK_HARDWARE(intel); if (!retval) DBG("%s failed\n", __FUNCTION__); return retval; }
/* May fail if out of video memory for texture or vbo upload, or on * fallback conditions. */ static GLboolean brw_try_draw_prims( struct gl_context *ctx, const struct gl_client_array *arrays[], const struct _mesa_prim *prim, GLuint nr_prims, const struct _mesa_index_buffer *ib, GLuint min_index, GLuint max_index ) { struct intel_context *intel = intel_context(ctx); struct brw_context *brw = brw_context(ctx); GLboolean retval = GL_FALSE; GLboolean warn = GL_FALSE; GLuint i; if (ctx->NewState) _mesa_update_state( ctx ); /* We have to validate the textures *before* checking for fallbacks; * otherwise, the software fallback won't be able to rely on the * texture state, the firstLevel and lastLevel fields won't be * set in the intel texture object (they'll both be 0), and the * software fallback will segfault if it attempts to access any * texture level other than level 0. */ brw_validate_textures( brw ); /* Bind all inputs, derive varying and size information: */ brw_merge_inputs( brw, arrays ); brw->ib.ib = ib; brw->state.dirty.brw |= BRW_NEW_INDICES; brw->vb.min_index = min_index; brw->vb.max_index = max_index; brw->state.dirty.brw |= BRW_NEW_VERTICES; /* Have to validate state quite late. Will rebuild tnl_program, * which depends on varying information. * * Note this is where brw->vs->prog_data.inputs_read is calculated, * so can't access it earlier. */ intel_prepare_render(intel); for (i = 0; i < nr_prims; i++) { uint32_t hw_prim; int estimated_max_prim_size; estimated_max_prim_size = 512; /* batchbuffer commands */ estimated_max_prim_size += (BRW_MAX_TEX_UNIT * (sizeof(struct brw_sampler_state) + sizeof(struct gen5_sampler_default_color))); estimated_max_prim_size += 1024; /* gen6 VS push constants */ estimated_max_prim_size += 1024; /* gen6 WM push constants */ estimated_max_prim_size += 512; /* misc. pad */ /* Flush the batch if it's approaching full, so that we don't wrap while * we've got validated state that needs to be in the same batch as the * primitives. */ intel_batchbuffer_require_space(intel, estimated_max_prim_size, false); hw_prim = brw_set_prim(brw, &prim[i]); if (brw->state.dirty.brw) { brw_validate_state(brw); /* Various fallback checks: */ if (brw->intel.Fallback) goto out; /* Check that we can fit our state in with our existing batchbuffer, or * flush otherwise. */ if (dri_bufmgr_check_aperture_space(brw->state.validated_bos, brw->state.validated_bo_count)) { static GLboolean warned; intel_batchbuffer_flush(intel); /* Validate the state after we flushed the batch (which would have * changed the set of dirty state). If we still fail to * check_aperture, warn of what's happening, but attempt to continue * on since it may succeed anyway, and the user would probably rather * see a failure and a warning than a fallback. */ brw_validate_state(brw); if (!warned && dri_bufmgr_check_aperture_space(brw->state.validated_bos, brw->state.validated_bo_count)) { warn = GL_TRUE; warned = GL_TRUE; } } intel->no_batch_wrap = GL_TRUE; brw_upload_state(brw); } if (intel->gen >= 7) gen7_emit_prim(brw, &prim[i], hw_prim); else brw_emit_prim(brw, &prim[i], hw_prim); intel->no_batch_wrap = GL_FALSE; retval = GL_TRUE; } if (intel->always_flush_batch) intel_batchbuffer_flush(intel); out: brw_state_cache_check_size(brw); if (warn) fprintf(stderr, "i965: Single primitive emit potentially exceeded " "available aperture space\n"); if (!retval) DBG("%s failed\n", __FUNCTION__); return retval; }