void brw_blorp_exec(struct brw_context *brw, const brw_blorp_params *params) { struct gl_context *ctx = &brw->ctx; uint32_t estimated_max_batch_usage = 1500; bool check_aperture_failed_once = false; /* Flush the sampler and render caches. We definitely need to flush the * sampler cache so that we get updated contents from the render cache for * the glBlitFramebuffer() source. Also, we are sometimes warned in the * docs to flush the cache between reinterpretations of the same surface * data with different formats, which blorp does for stencil and depth * data. */ intel_batchbuffer_emit_mi_flush(brw); retry: intel_batchbuffer_require_space(brw, estimated_max_batch_usage, false); intel_batchbuffer_save_state(brw); drm_intel_bo *saved_bo = brw->batch.bo; uint32_t saved_used = brw->batch.used; uint32_t saved_state_batch_offset = brw->batch.state_batch_offset; switch (brw->gen) { case 6: gen6_blorp_exec(brw, params); break; case 7: gen7_blorp_exec(brw, params); break; default: /* BLORP is not supported before Gen6. */ assert(false); break; } /* Make sure we didn't wrap the batch unintentionally, and make sure we * reserved enough space that a wrap will never happen. */ assert(brw->batch.bo == saved_bo); assert((brw->batch.used - saved_used) * 4 + (saved_state_batch_offset - brw->batch.state_batch_offset) < estimated_max_batch_usage); /* Shut up compiler warnings on release build */ (void)saved_bo; (void)saved_used; (void)saved_state_batch_offset; /* Check if the blorp op we just did would make our batch likely to fail to * map all the BOs into the GPU at batch exec time later. If so, flush the * batch and try again with nothing else in the batch. */ if (dri_bufmgr_check_aperture_space(&brw->batch.bo, 1)) { if (!check_aperture_failed_once) { check_aperture_failed_once = true; intel_batchbuffer_reset_to_saved(brw); intel_batchbuffer_flush(brw); goto retry; } else { int ret = intel_batchbuffer_flush(brw); WARN_ONCE(ret == -ENOSPC, "i965: blorp emit exceeded available aperture space\n"); } } if (unlikely(brw->always_flush_batch)) intel_batchbuffer_flush(brw); /* We've smashed all state compared to what the normal 3D pipeline * rendering tracks for GL. */ brw->state.dirty.brw = ~0; brw->state.dirty.cache = ~0; brw->state_batch_count = 0; brw->batch.need_workaround_flush = true; brw->ib.type = -1; intel_batchbuffer_clear_cache(brw); /* Flush the sampler cache so any texturing from the destination is * coherent. */ intel_batchbuffer_emit_mi_flush(brw); }
/* May fail if out of video memory for texture or vbo upload, or on * fallback conditions. */ static void brw_try_draw_prims(struct gl_context *ctx, const struct gl_client_array *arrays[], const struct _mesa_prim *prims, GLuint nr_prims, const struct _mesa_index_buffer *ib, bool index_bounds_valid, GLuint min_index, GLuint max_index, struct brw_transform_feedback_object *xfb_obj, unsigned stream, struct gl_buffer_object *indirect) { struct brw_context *brw = brw_context(ctx); GLuint i; bool fail_next = false; if (ctx->NewState) _mesa_update_state(ctx); /* We have to validate the textures *before* checking for fallbacks; * otherwise, the software fallback won't be able to rely on the * texture state, the firstLevel and lastLevel fields won't be * set in the intel texture object (they'll both be 0), and the * software fallback will segfault if it attempts to access any * texture level other than level 0. */ brw_validate_textures(brw); /* Find the highest sampler unit used by each shader program. A bit-count * won't work since ARB programs use the texture unit number as the sampler * index. */ brw->wm.base.sampler_count = _mesa_fls(ctx->FragmentProgram._Current->Base.SamplersUsed); brw->gs.base.sampler_count = ctx->GeometryProgram._Current ? _mesa_fls(ctx->GeometryProgram._Current->Base.SamplersUsed) : 0; brw->tes.base.sampler_count = ctx->TessEvalProgram._Current ? _mesa_fls(ctx->TessEvalProgram._Current->Base.SamplersUsed) : 0; brw->tcs.base.sampler_count = ctx->TessCtrlProgram._Current ? _mesa_fls(ctx->TessCtrlProgram._Current->Base.SamplersUsed) : 0; brw->vs.base.sampler_count = _mesa_fls(ctx->VertexProgram._Current->Base.SamplersUsed); intel_prepare_render(brw); brw_predraw_set_aux_buffers(brw); /* This workaround has to happen outside of brw_upload_render_state() * because it may flush the batchbuffer for a blit, affecting the state * flags. */ brw_workaround_depthstencil_alignment(brw, 0); /* Bind all inputs, derive varying and size information: */ brw_merge_inputs(brw, arrays); brw->ib.ib = ib; brw->ctx.NewDriverState |= BRW_NEW_INDICES; brw->vb.index_bounds_valid = index_bounds_valid; brw->vb.min_index = min_index; brw->vb.max_index = max_index; brw->ctx.NewDriverState |= BRW_NEW_VERTICES; for (i = 0; i < nr_prims; i++) { int estimated_max_prim_size; const int sampler_state_size = 16; estimated_max_prim_size = 512; /* batchbuffer commands */ estimated_max_prim_size += BRW_MAX_TEX_UNIT * (sampler_state_size + sizeof(struct gen5_sampler_default_color)); estimated_max_prim_size += 1024; /* gen6 VS push constants */ estimated_max_prim_size += 1024; /* gen6 WM push constants */ estimated_max_prim_size += 512; /* misc. pad */ /* Flush the batch if it's approaching full, so that we don't wrap while * we've got validated state that needs to be in the same batch as the * primitives. */ intel_batchbuffer_require_space(brw, estimated_max_prim_size, RENDER_RING); intel_batchbuffer_save_state(brw); if (brw->num_instances != prims[i].num_instances || brw->basevertex != prims[i].basevertex || brw->baseinstance != prims[i].base_instance) { brw->num_instances = prims[i].num_instances; brw->basevertex = prims[i].basevertex; brw->baseinstance = prims[i].base_instance; if (i > 0) { /* For i == 0 we just did this before the loop */ brw->ctx.NewDriverState |= BRW_NEW_VERTICES; brw_merge_inputs(brw, arrays); } } /* Determine if we need to flag BRW_NEW_VERTICES for updating the * gl_BaseVertexARB or gl_BaseInstanceARB values. For indirect draw, we * always flag if the shader uses one of the values. For direct draws, * we only flag if the values change. */ const int new_basevertex = prims[i].indexed ? prims[i].basevertex : prims[i].start; const int new_baseinstance = prims[i].base_instance; if (i > 0) { const bool uses_draw_parameters = brw->vs.prog_data->uses_basevertex || brw->vs.prog_data->uses_baseinstance; if ((uses_draw_parameters && prims[i].is_indirect) || (brw->vs.prog_data->uses_basevertex && brw->draw.params.gl_basevertex != new_basevertex) || (brw->vs.prog_data->uses_baseinstance && brw->draw.params.gl_baseinstance != new_baseinstance)) brw->ctx.NewDriverState |= BRW_NEW_VERTICES; } brw->draw.params.gl_basevertex = new_basevertex; brw->draw.params.gl_baseinstance = new_baseinstance; drm_intel_bo_unreference(brw->draw.draw_params_bo); if (prims[i].is_indirect) { /* Point draw_params_bo at the indirect buffer. */ brw->draw.draw_params_bo = intel_buffer_object(ctx->DrawIndirectBuffer)->buffer; drm_intel_bo_reference(brw->draw.draw_params_bo); brw->draw.draw_params_offset = prims[i].indirect_offset + (prims[i].indexed ? 12 : 8); } else { /* Set draw_params_bo to NULL so brw_prepare_vertices knows it * has to upload gl_BaseVertex and such if they're needed. */ brw->draw.draw_params_bo = NULL; brw->draw.draw_params_offset = 0; } /* gl_DrawID always needs its own vertex buffer since it's not part of * the indirect parameter buffer. If the program uses gl_DrawID we need * to flag BRW_NEW_VERTICES. For the first iteration, we don't have * valid brw->vs.prog_data, but we always flag BRW_NEW_VERTICES before * the loop. */ brw->draw.gl_drawid = prims[i].draw_id; drm_intel_bo_unreference(brw->draw.draw_id_bo); brw->draw.draw_id_bo = NULL; if (i > 0 && brw->vs.prog_data->uses_drawid) brw->ctx.NewDriverState |= BRW_NEW_VERTICES; if (brw->gen < 6) brw_set_prim(brw, &prims[i]); else gen6_set_prim(brw, &prims[i]); retry: /* Note that before the loop, brw->ctx.NewDriverState was set to != 0, and * that the state updated in the loop outside of this block is that in * *_set_prim or intel_batchbuffer_flush(), which only impacts * brw->ctx.NewDriverState. */ if (brw->ctx.NewDriverState) { brw->no_batch_wrap = true; brw_upload_render_state(brw); } brw_emit_prim(brw, &prims[i], brw->primitive, xfb_obj, stream); brw->no_batch_wrap = false; if (dri_bufmgr_check_aperture_space(&brw->batch.bo, 1)) { if (!fail_next) { intel_batchbuffer_reset_to_saved(brw); intel_batchbuffer_flush(brw); fail_next = true; goto retry; } else { int ret = intel_batchbuffer_flush(brw); WARN_ONCE(ret == -ENOSPC, "i965: Single primitive emit exceeded " "available aperture space\n"); } } /* Now that we know we haven't run out of aperture space, we can safely * reset the dirty bits. */ if (brw->ctx.NewDriverState) brw_render_state_finished(brw); } if (brw->always_flush_batch) intel_batchbuffer_flush(brw); brw_state_cache_check_size(brw); brw_postdraw_set_buffers_need_resolve(brw); return; }
/* May fail if out of video memory for texture or vbo upload, or on * fallback conditions. */ static bool brw_try_draw_prims( struct gl_context *ctx, const struct gl_client_array *arrays[], const struct _mesa_prim *prims, GLuint nr_prims, const struct _mesa_index_buffer *ib, GLuint min_index, GLuint max_index ) { struct brw_context *brw = brw_context(ctx); bool retval = true; GLuint i; bool fail_next = false; if (ctx->NewState) _mesa_update_state( ctx ); /* Find the highest sampler unit used by each shader program. A bit-count * won't work since ARB programs use the texture unit number as the sampler * index. */ brw->wm.base.sampler_count = _mesa_fls(ctx->FragmentProgram._Current->Base.SamplersUsed); brw->gs.base.sampler_count = ctx->GeometryProgram._Current ? _mesa_fls(ctx->GeometryProgram._Current->Base.SamplersUsed) : 0; brw->vs.base.sampler_count = _mesa_fls(ctx->VertexProgram._Current->Base.SamplersUsed); /* We have to validate the textures *before* checking for fallbacks; * otherwise, the software fallback won't be able to rely on the * texture state, the firstLevel and lastLevel fields won't be * set in the intel texture object (they'll both be 0), and the * software fallback will segfault if it attempts to access any * texture level other than level 0. */ brw_validate_textures( brw ); intel_prepare_render(brw); /* This workaround has to happen outside of brw_upload_state() because it * may flush the batchbuffer for a blit, affecting the state flags. */ brw_workaround_depthstencil_alignment(brw, 0); /* Resolves must occur after updating renderbuffers, updating context state, * and finalizing textures but before setting up any hardware state for * this draw call. */ brw_predraw_resolve_buffers(brw); /* Bind all inputs, derive varying and size information: */ brw_merge_inputs( brw, arrays ); brw->ib.ib = ib; brw->state.dirty.brw |= BRW_NEW_INDICES; brw->vb.min_index = min_index; brw->vb.max_index = max_index; brw->state.dirty.brw |= BRW_NEW_VERTICES; for (i = 0; i < nr_prims; i++) { int estimated_max_prim_size; estimated_max_prim_size = 512; /* batchbuffer commands */ estimated_max_prim_size += (BRW_MAX_TEX_UNIT * (sizeof(struct brw_sampler_state) + sizeof(struct gen5_sampler_default_color))); estimated_max_prim_size += 1024; /* gen6 VS push constants */ estimated_max_prim_size += 1024; /* gen6 WM push constants */ estimated_max_prim_size += 512; /* misc. pad */ /* Flush the batch if it's approaching full, so that we don't wrap while * we've got validated state that needs to be in the same batch as the * primitives. */ intel_batchbuffer_require_space(brw, estimated_max_prim_size, false); intel_batchbuffer_save_state(brw); if (brw->num_instances != prims[i].num_instances) { brw->num_instances = prims[i].num_instances; brw->state.dirty.brw |= BRW_NEW_VERTICES; brw_merge_inputs(brw, arrays); } if (brw->basevertex != prims[i].basevertex) { brw->basevertex = prims[i].basevertex; brw->state.dirty.brw |= BRW_NEW_VERTICES; brw_merge_inputs(brw, arrays); } if (brw->gen < 6) brw_set_prim(brw, &prims[i]); else gen6_set_prim(brw, &prims[i]); retry: /* Note that before the loop, brw->state.dirty.brw was set to != 0, and * that the state updated in the loop outside of this block is that in * *_set_prim or intel_batchbuffer_flush(), which only impacts * brw->state.dirty.brw. */ if (brw->state.dirty.brw) { brw->no_batch_wrap = true; brw_upload_state(brw); } brw_emit_prim(brw, &prims[i], brw->primitive); brw->no_batch_wrap = false; if (dri_bufmgr_check_aperture_space(&brw->batch.bo, 1)) { if (!fail_next) { intel_batchbuffer_reset_to_saved(brw); intel_batchbuffer_flush(brw); fail_next = true; goto retry; } else { if (intel_batchbuffer_flush(brw) == -ENOSPC) { static bool warned = false; if (!warned) { fprintf(stderr, "i965: Single primitive emit exceeded" "available aperture space\n"); warned = true; } retval = false; } } } } if (brw->always_flush_batch) intel_batchbuffer_flush(brw); brw_state_cache_check_size(brw); brw_postdraw_set_buffers_need_resolve(brw); return retval; }
/* May fail if out of video memory for texture or vbo upload, or on * fallback conditions. */ static bool brw_try_draw_prims( struct gl_context *ctx, const struct gl_client_array *arrays[], const struct _mesa_prim *prim, GLuint nr_prims, const struct _mesa_index_buffer *ib, GLuint min_index, GLuint max_index ) { struct intel_context *intel = intel_context(ctx); struct brw_context *brw = brw_context(ctx); bool retval = true; GLuint i; bool fail_next = false; if (ctx->NewState) _mesa_update_state( ctx ); /* We have to validate the textures *before* checking for fallbacks; * otherwise, the software fallback won't be able to rely on the * texture state, the firstLevel and lastLevel fields won't be * set in the intel texture object (they'll both be 0), and the * software fallback will segfault if it attempts to access any * texture level other than level 0. */ brw_validate_textures( brw ); /* Resolves must occur after updating state and finalizing textures but * before setting up any hardware state for this draw call. */ brw_predraw_resolve_buffers(brw); /* Bind all inputs, derive varying and size information: */ brw_merge_inputs( brw, arrays ); brw->ib.ib = ib; brw->state.dirty.brw |= BRW_NEW_INDICES; brw->vb.min_index = min_index; brw->vb.max_index = max_index; brw->state.dirty.brw |= BRW_NEW_VERTICES; /* Have to validate state quite late. Will rebuild tnl_program, * which depends on varying information. * * Note this is where brw->vs->prog_data.inputs_read is calculated, * so can't access it earlier. */ intel_prepare_render(intel); for (i = 0; i < nr_prims; i++) { int estimated_max_prim_size; estimated_max_prim_size = 512; /* batchbuffer commands */ estimated_max_prim_size += (BRW_MAX_TEX_UNIT * (sizeof(struct brw_sampler_state) + sizeof(struct gen5_sampler_default_color))); estimated_max_prim_size += 1024; /* gen6 VS push constants */ estimated_max_prim_size += 1024; /* gen6 WM push constants */ estimated_max_prim_size += 512; /* misc. pad */ /* Flush the batch if it's approaching full, so that we don't wrap while * we've got validated state that needs to be in the same batch as the * primitives. */ intel_batchbuffer_require_space(intel, estimated_max_prim_size, false); intel_batchbuffer_save_state(intel); if (intel->gen < 6) brw_set_prim(brw, &prim[i]); else gen6_set_prim(brw, &prim[i]); retry: /* Note that before the loop, brw->state.dirty.brw was set to != 0, and * that the state updated in the loop outside of this block is that in * *_set_prim or intel_batchbuffer_flush(), which only impacts * brw->state.dirty.brw. */ if (brw->state.dirty.brw) { intel->no_batch_wrap = true; brw_upload_state(brw); if (unlikely(brw->intel.Fallback)) { intel->no_batch_wrap = false; retval = false; goto out; } } if (intel->gen >= 7) gen7_emit_prim(brw, &prim[i], brw->primitive); else brw_emit_prim(brw, &prim[i], brw->primitive); intel->no_batch_wrap = false; if (dri_bufmgr_check_aperture_space(&intel->batch.bo, 1)) { if (!fail_next) { intel_batchbuffer_reset_to_saved(intel); intel_batchbuffer_flush(intel); fail_next = true; goto retry; } else { if (intel_batchbuffer_flush(intel) == -ENOSPC) { static bool warned = false; if (!warned) { fprintf(stderr, "i965: Single primitive emit exceeded" "available aperture space\n"); warned = true; } retval = false; } } } if (!_mesa_meta_in_progress(ctx)) brw_update_primitive_count(brw, &prim[i]); } if (intel->always_flush_batch) intel_batchbuffer_flush(intel); out: brw_state_cache_check_size(brw); brw_postdraw_set_buffers_need_resolve(brw); return retval; }