void brw_vs_debug_recompile(struct brw_context *brw, struct gl_shader_program *prog, const struct brw_vs_prog_key *key) { struct brw_cache_item *c = NULL; const struct brw_vs_prog_key *old_key = NULL; bool found = false; perf_debug("Recompiling vertex shader for program %d\n", prog->Name); for (unsigned int i = 0; i < brw->cache.size; i++) { for (c = brw->cache.items[i]; c; c = c->next) { if (c->cache_id == BRW_VS_PROG) { old_key = c->key; if (old_key->base.program_string_id == key->base.program_string_id) break; } } if (c) break; } if (!c) { perf_debug(" Didn't find previous compile in the shader cache for " "debug\n"); return; } for (unsigned int i = 0; i < VERT_ATTRIB_MAX; i++) { found |= key_debug(brw, "Vertex attrib w/a flags", old_key->gl_attrib_wa_flags[i], key->gl_attrib_wa_flags[i]); } found |= key_debug(brw, "user clip flags", old_key->base.userclip_active, key->base.userclip_active); found |= key_debug(brw, "user clipping planes as push constants", old_key->base.nr_userclip_plane_consts, key->base.nr_userclip_plane_consts); found |= key_debug(brw, "clip distance enable", old_key->base.uses_clip_distance, key->base.uses_clip_distance); found |= key_debug(brw, "copy edgeflag", old_key->copy_edgeflag, key->copy_edgeflag); found |= key_debug(brw, "PointCoord replace", old_key->point_coord_replace, key->point_coord_replace); found |= key_debug(brw, "vertex color clamping", old_key->base.clamp_vertex_color, key->base.clamp_vertex_color); found |= brw_debug_recompile_sampler_key(brw, &old_key->base.tex, &key->base.tex); if (!found) { perf_debug(" Something else\n"); } }
static bool intel_set_texture_storage_for_buffer_object(struct gl_context *ctx, struct gl_texture_object *tex_obj, struct gl_buffer_object *buffer_obj, uint32_t buffer_offset, uint32_t row_stride, bool read_only) { struct brw_context *brw = brw_context(ctx); struct intel_texture_object *intel_texobj = intel_texture_object(tex_obj); struct gl_texture_image *image = tex_obj->Image[0][0]; struct intel_texture_image *intel_image = intel_texture_image(image); struct intel_buffer_object *intel_buffer_obj = intel_buffer_object(buffer_obj); if (!read_only) { /* Renderbuffers have the restriction that the buffer offset and * surface pitch must be a multiple of the element size. If it's * not, we have to fail and fall back to software. */ int cpp = _mesa_get_format_bytes(image->TexFormat); if (buffer_offset % cpp || row_stride % cpp) { perf_debug("Bad PBO alignment; fallback to CPU mapping\n"); return false; } if (!brw->format_supported_as_render_target[image->TexFormat]) { perf_debug("Non-renderable PBO format; fallback to CPU mapping\n"); return false; } } assert(intel_texobj->mt == NULL); drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_buffer_obj, buffer_offset, row_stride * image->Height); intel_texobj->mt = intel_miptree_create_for_bo(brw, bo, image->TexFormat, buffer_offset, image->Width, image->Height, image->Depth, row_stride, 0); if (!intel_texobj->mt) return false; if (!_swrast_init_texture_image(image)) return false; intel_miptree_reference(&intel_image->mt, intel_texobj->mt); /* The miptree is in a validated state, so no need to check later. */ intel_texobj->needs_validate = false; intel_texobj->validated_first_level = 0; intel_texobj->validated_last_level = 0; intel_texobj->_Format = intel_texobj->mt->format; return true; }
void brw_draw_prims( struct gl_context *ctx, const struct _mesa_prim *prims, GLuint nr_prims, const struct _mesa_index_buffer *ib, GLboolean index_bounds_valid, GLuint min_index, GLuint max_index, struct gl_transform_feedback_object *unused_tfb_object, struct gl_buffer_object *indirect ) { struct brw_context *brw = brw_context(ctx); const struct gl_client_array **arrays = ctx->Array._DrawArrays; assert(unused_tfb_object == NULL); if (ctx->Query.CondRenderQuery) { perf_debug("Conditional rendering is implemented in software and may " "stall. This should be fixed in the driver.\n"); } if (!_mesa_check_conditional_render(ctx)) return; /* Handle primitive restart if needed */ if (brw_handle_primitive_restart(ctx, prims, nr_prims, ib, indirect)) { /* The draw was handled, so we can exit now */ return; } /* Do GL_SELECT and GL_FEEDBACK rendering using swrast, even though it * won't support all the extensions we support. */ if (ctx->RenderMode != GL_RENDER) { perf_debug("%s render mode not supported in hardware\n", _mesa_lookup_enum_by_nr(ctx->RenderMode)); _swsetup_Wakeup(ctx); _tnl_wakeup(ctx); _tnl_draw_prims(ctx, prims, nr_prims, ib, index_bounds_valid, min_index, max_index, NULL, NULL); return; } /* If we're going to have to upload any of the user's vertex arrays, then * get the minimum and maximum of their index buffer so we know what range * to upload. */ if (!index_bounds_valid && !vbo_all_varyings_in_vbos(arrays)) { perf_debug("Scanning index buffer to compute index buffer bounds. " "Use glDrawRangeElements() to avoid this.\n"); vbo_get_minmax_indices(ctx, prims, ib, &min_index, &max_index, nr_prims); } /* Try drawing with the hardware, but don't do anything else if we can't * manage it. swrast doesn't support our featureset, so we can't fall back * to it. */ brw_try_draw_prims(ctx, arrays, prims, nr_prims, ib, min_index, max_index, indirect); }
void brw_wm_debug_recompile(struct brw_context *brw, struct gl_shader_program *prog, const struct brw_wm_prog_key *key) { struct brw_cache_item *c = NULL; const struct brw_wm_prog_key *old_key = NULL; bool found = false; perf_debug("Recompiling fragment shader for program %d\n", prog->Name); for (unsigned int i = 0; i < brw->cache.size; i++) { for (c = brw->cache.items[i]; c; c = c->next) { if (c->cache_id == BRW_WM_PROG) { old_key = c->key; if (old_key->program_string_id == key->program_string_id) break; } } if (c) break; } if (!c) { perf_debug(" Didn't find previous compile in the shader cache for debug\n"); return; } found |= key_debug(brw, "alphatest, computed depth, depth test, or " "depth write", old_key->iz_lookup, key->iz_lookup); found |= key_debug(brw, "depth statistics", old_key->stats_wm, key->stats_wm); found |= key_debug(brw, "flat shading", old_key->flat_shade, key->flat_shade); found |= key_debug(brw, "number of color buffers", old_key->nr_color_regions, key->nr_color_regions); found |= key_debug(brw, "MRT alpha test or alpha-to-coverage", old_key->replicate_alpha, key->replicate_alpha); found |= key_debug(brw, "rendering to FBO", old_key->render_to_fbo, key->render_to_fbo); found |= key_debug(brw, "fragment color clamping", old_key->clamp_fragment_color, key->clamp_fragment_color); found |= key_debug(brw, "line smoothing", old_key->line_aa, key->line_aa); found |= key_debug(brw, "renderbuffer height", old_key->drawable_height, key->drawable_height); found |= key_debug(brw, "input slots valid", old_key->input_slots_valid, key->input_slots_valid); found |= brw_debug_recompile_sampler_key(brw, &old_key->tex, &key->tex); if (!found) { perf_debug(" Something else\n"); } }
/** * Determine if fast color clear supports the given clear color. * * Fast color clear can only clear to color values of 1.0 or 0.0. At the * moment we only support floating point, unorm, and snorm buffers. */ static bool is_color_fast_clear_compatible(struct brw_context *brw, mesa_format format, const union gl_color_union *color) { if (_mesa_is_format_integer_color(format)) { if (brw->gen >= 8) { perf_debug("Integer fast clear not enabled for (%s)", _mesa_get_format_name(format)); } return false; } for (int i = 0; i < 4; i++) { if (!_mesa_format_has_color_component(format, i)) { continue; } if (brw->gen < 9 && color->f[i] != 0.0f && color->f[i] != 1.0f) { return false; } } return true; }
/** * Replace data in a subrange of buffer object. If the data range * specified by size + offset extends beyond the end of the buffer or * if data is NULL, no copy is performed. * Called via glBufferSubDataARB(). */ static void intel_bufferobj_subdata(struct gl_context * ctx, GLintptrARB offset, GLsizeiptrARB size, const GLvoid * data, struct gl_buffer_object *obj) { struct intel_context *intel = intel_context(ctx); struct intel_buffer_object *intel_obj = intel_buffer_object(obj); bool busy; if (size == 0) return; assert(intel_obj); /* If we have a single copy in system memory, update that */ if (intel_obj->sys_buffer) { if (intel_obj->source) release_buffer(intel_obj); if (intel_obj->buffer == NULL) { memcpy((char *)intel_obj->sys_buffer + offset, data, size); return; } free(intel_obj->sys_buffer); intel_obj->sys_buffer = NULL; } /* Otherwise we need to update the copy in video memory. */ busy = drm_intel_bo_busy(intel_obj->buffer) || drm_intel_bo_references(intel->batch.bo, intel_obj->buffer); if (busy) { if (size == intel_obj->Base.Size) { /* Replace the current busy bo with fresh data. */ drm_intel_bo_unreference(intel_obj->buffer); intel_bufferobj_alloc_buffer(intel, intel_obj); drm_intel_bo_subdata(intel_obj->buffer, 0, size, data); } else { perf_debug("Using a blit copy to avoid stalling on %ldb " "glBufferSubData() to a busy buffer object.\n", (long)size); drm_intel_bo *temp_bo = drm_intel_bo_alloc(intel->bufmgr, "subdata temp", size, 64); drm_intel_bo_subdata(temp_bo, 0, size, data); intel_emit_linear_blit(intel, intel_obj->buffer, offset, temp_bo, 0, size); drm_intel_bo_unreference(temp_bo); } } else { drm_intel_bo_subdata(intel_obj->buffer, offset, size, data); } }
static void intelCopyTexSubImage(struct gl_context *ctx, GLuint dims, struct gl_texture_image *texImage, GLint xoffset, GLint yoffset, GLint slice, struct gl_renderbuffer *rb, GLint x, GLint y, GLsizei width, GLsizei height) { struct brw_context *brw = brw_context(ctx); /* Try BLORP first. It can handle almost everything. */ if (brw_blorp_copytexsubimage(brw, rb, texImage, slice, x, y, xoffset, yoffset, width, height)) return; /* Next, try the BLT engine. */ if (intel_copy_texsubimage(brw, intel_texture_image(texImage), xoffset, yoffset, slice, intel_renderbuffer(rb), x, y, width, height)) { return; } /* Finally, fall back to meta. This will likely be slow. */ perf_debug("%s - fallback to swrast\n", __func__); _mesa_meta_CopyTexSubImage(ctx, dims, texImage, xoffset, yoffset, slice, rb, x, y, width, height); }
/** * \brief Helper function for intel_miptree_create(). */ static uint32_t intel_miptree_choose_tiling(struct intel_context *intel, mesa_format format, uint32_t width0, enum intel_miptree_tiling_mode requested, struct intel_mipmap_tree *mt) { /* Some usages may want only one type of tiling, like depth miptrees (Y * tiled), or temporary BOs for uploading data once (linear). */ switch (requested) { case INTEL_MIPTREE_TILING_ANY: break; case INTEL_MIPTREE_TILING_Y: return I915_TILING_Y; case INTEL_MIPTREE_TILING_NONE: return I915_TILING_NONE; } int minimum_pitch = mt->total_width * mt->cpp; /* If the width is much smaller than a tile, don't bother tiling. */ if (minimum_pitch < 64) return I915_TILING_NONE; if (ALIGN(minimum_pitch, 512) >= 32768) { perf_debug("%dx%d miptree too large to blit, falling back to untiled", mt->total_width, mt->total_height); return I915_TILING_NONE; } /* We don't have BLORP to handle Y-tiled blits, so use X-tiling. */ return I915_TILING_X; }
void intelDrawPixels(struct gl_context * ctx, GLint x, GLint y, GLsizei width, GLsizei height, GLenum format, GLenum type, const struct gl_pixelstore_attrib *unpack, const GLvoid * pixels) { struct brw_context *brw = brw_context(ctx); if (!_mesa_check_conditional_render(ctx)) return; if (format == GL_STENCIL_INDEX) { _swrast_DrawPixels(ctx, x, y, width, height, format, type, unpack, pixels); return; } if (_mesa_is_bufferobj(unpack->BufferObj)) { if (do_blit_drawpixels(ctx, x, y, width, height, format, type, unpack, pixels)) { return; } perf_debug("%s: fallback to generic code in PBO case\n", __FUNCTION__); } _mesa_meta_DrawPixels(ctx, x, y, width, height, format, type, unpack, pixels); }
static void brw_tcs_debug_recompile(struct brw_context *brw, struct gl_shader_program *shader_prog, const struct brw_tcs_prog_key *key) { struct brw_cache_item *c = NULL; const struct brw_tcs_prog_key *old_key = NULL; bool found = false; perf_debug("Recompiling tessellation control shader for program %d\n", shader_prog->Name); for (unsigned int i = 0; i < brw->cache.size; i++) { for (c = brw->cache.items[i]; c; c = c->next) { if (c->cache_id == BRW_CACHE_TCS_PROG) { old_key = c->key; if (old_key->program_string_id == key->program_string_id) break; } } if (c) break; } if (!c) { perf_debug(" Didn't find previous compile in the shader cache for " "debug\n"); return; } found |= key_debug(brw, "input vertices", old_key->input_vertices, key->input_vertices); found |= key_debug(brw, "outputs written", old_key->outputs_written, key->outputs_written); found |= key_debug(brw, "patch outputs written", old_key->patch_outputs_written, key->patch_outputs_written); found |= key_debug(brw, "TES primitive mode", old_key->tes_primitive_mode, key->tes_primitive_mode); found |= key_debug(brw, "quads and equal_spacing workaround", old_key->quads_workaround, key->quads_workaround); found |= brw_debug_recompile_sampler_key(brw, &old_key->tex, &key->tex); if (!found) { perf_debug(" Something else\n"); } }
static void gen8_upload_3dstate_so_buffers(struct brw_context *brw) { struct gl_context *ctx = &brw->ctx; /* BRW_NEW_TRANSFORM_FEEDBACK */ struct gl_transform_feedback_object *xfb_obj = ctx->TransformFeedback.CurrentObject; struct brw_transform_feedback_object *brw_obj = (struct brw_transform_feedback_object *) xfb_obj; /* Set up the up to 4 output buffers. These are the ranges defined in the * gl_transform_feedback_object. */ for (int i = 0; i < 4; i++) { struct intel_buffer_object *bufferobj = intel_buffer_object(xfb_obj->Buffers[i]); if (!bufferobj) { BEGIN_BATCH(8); OUT_BATCH(_3DSTATE_SO_BUFFER << 16 | (8 - 2)); OUT_BATCH((i << SO_BUFFER_INDEX_SHIFT)); OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); ADVANCE_BATCH(); continue; } uint32_t start = xfb_obj->Offset[i]; assert(start % 4 == 0); uint32_t end = ALIGN(start + xfb_obj->Size[i], 4); drm_intel_bo *bo = intel_bufferobj_buffer(brw, bufferobj, start, end - start); assert(end <= bo->size); perf_debug("Missing MOCS setup for 3DSTATE_SO_BUFFER."); BEGIN_BATCH(8); OUT_BATCH(_3DSTATE_SO_BUFFER << 16 | (8 - 2)); OUT_BATCH(GEN8_SO_BUFFER_ENABLE | (i << SO_BUFFER_INDEX_SHIFT) | GEN8_SO_BUFFER_OFFSET_WRITE_ENABLE | GEN8_SO_BUFFER_OFFSET_ADDRESS_ENABLE | (BDW_MOCS_WB << 22)); OUT_RELOC64(bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, start); OUT_BATCH(xfb_obj->Size[i] / 4 - 1); OUT_RELOC64(brw_obj->offset_bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, i * sizeof(uint32_t)); if (brw_obj->zero_offsets) OUT_BATCH(0); /* Zero out the offset and write that to offset_bo */ else OUT_BATCH(0xFFFFFFFF); /* Use offset_bo as the "Stream Offset." */ ADVANCE_BATCH(); } brw_obj->zero_offsets = false; }
static bool key_debug(struct brw_context *brw, const char *name, int a, int b) { if (a != b) { perf_debug(" %s %d->%d\n", name, a, b); return true; } return false; }
static bool key_debug(const char *name, int a, int b) { if (a != b) { perf_debug(" %s %d->%d\n", name, a, b); return true; } else { return false; } }
/** * HW-2116 workaround: Flush the batch before triggering the hardware state * counter wraparound behavior. * * State updates are tracked by a global counter which increments at the first * state update after a draw or a START_BINNING. Tiles can then have their * state updated at draw time with a set of cheap checks for whether the * state's copy of the global counter matches the global counter the last time * that state was written to the tile. * * The state counters are relatively small and wrap around quickly, so you * could get false negatives for needing to update a particular state in the * tile. To avoid this, the hardware attempts to write all of the state in * the tile at wraparound time. This apparently is broken, so we just flush * everything before that behavior is triggered. A batch flush is sufficient * to get our current contents drawn and reset the counters to 0. * * Note that we can't just use VC4_PACKET_FLUSH_ALL, because that caps the * tiles with VC4_PACKET_RETURN_FROM_LIST. */ static void vc4_hw_2116_workaround(struct pipe_context *pctx) { struct vc4_context *vc4 = vc4_context(pctx); if (vc4->draw_calls_queued == 0x1ef0) { perf_debug("Flushing batch due to HW-2116 workaround " "(too many draw calls per scene\n"); vc4_flush(pctx); } }
bool brw_check_conditional_render(struct brw_context *brw) { if (brw->predicate.state == BRW_PREDICATE_STATE_STALL_FOR_QUERY) { perf_debug("Conditional rendering is implemented in software and may " "stall.\n"); return _mesa_check_conditional_render(&brw->ctx); } return brw->predicate.state != BRW_PREDICATE_STATE_DONT_RENDER; }
/** * HW-2116 workaround: Flush the batch before triggering the hardware state * counter wraparound behavior. * * State updates are tracked by a global counter which increments at the first * state update after a draw or a START_BINNING. Tiles can then have their * state updated at draw time with a set of cheap checks for whether the * state's copy of the global counter matches the global counter the last time * that state was written to the tile. * * The state counters are relatively small and wrap around quickly, so you * could get false negatives for needing to update a particular state in the * tile. To avoid this, the hardware attempts to write all of the state in * the tile at wraparound time. This apparently is broken, so we just flush * everything before that behavior is triggered. A batch flush is sufficient * to get our current contents drawn and reset the counters to 0. * * Note that we can't just use VC4_PACKET_FLUSH_ALL, because that caps the * tiles with VC4_PACKET_RETURN_FROM_LIST. */ static void vc4_hw_2116_workaround(struct pipe_context *pctx, int vert_count) { struct vc4_context *vc4 = vc4_context(pctx); struct vc4_job *job = vc4_get_job_for_fbo(vc4); if (job->draw_calls_queued + vert_count / 65535 >= VC4_HW_2116_COUNT) { perf_debug("Flushing batch due to HW-2116 workaround " "(too many draw calls per scene\n"); vc4_job_submit(vc4, job); } }
bool brw_wm_do_compile(struct brw_context *brw, struct brw_wm_compile *c) { struct gl_context *ctx = &brw->ctx; struct gl_shader *fs = NULL; if (c->shader_prog) fs = c->shader_prog->_LinkedShaders[MESA_SHADER_FRAGMENT]; /* Allocate the references to the uniforms that will end up in the * prog_data associated with the compiled program, and which will be freed * by the state cache. */ int param_count; if (fs) { param_count = fs->num_uniform_components; } else { param_count = c->fp->program.Base.Parameters->NumParameters * 4; } /* The backend also sometimes adds params for texture size. */ param_count += 2 * ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits; c->prog_data.base.param = rzalloc_array(NULL, const float *, param_count); c->prog_data.base.pull_param = rzalloc_array(NULL, const float *, param_count); c->prog_data.base.nr_params = param_count; c->prog_data.barycentric_interp_modes = brw_compute_barycentric_interp_modes(brw, c->key.flat_shade, c->key.persample_shading, &c->fp->program); c->program = brw_wm_fs_emit(brw, c, &c->fp->program, c->shader_prog, &c->program_size); if (c->program == NULL) return false; /* Scratch space is used for register spilling */ if (c->last_scratch) { perf_debug("Fragment shader triggered register spilling. " "Try reducing the number of live scalar values to " "improve performance.\n"); c->prog_data.total_scratch = brw_get_scratch_size(c->last_scratch); } if (unlikely(INTEL_DEBUG & DEBUG_WM)) fprintf(stderr, "\n"); return true; }
static void brw_gs_debug_recompile(struct brw_context *brw, struct gl_program *prog, const struct brw_gs_prog_key *key) { perf_debug("Recompiling geometry shader for program %d\n", prog->Id); bool found = false; const struct brw_gs_prog_key *old_key = brw_find_previous_compile(&brw->cache, BRW_CACHE_GS_PROG, key->program_string_id); if (!old_key) { perf_debug(" Didn't find previous compile in the shader cache for " "debug\n"); return; } found |= brw_debug_recompile_sampler_key(brw, &old_key->tex, &key->tex); if (!found) { perf_debug(" Something else\n"); } }
static void brw_gs_debug_recompile(struct brw_context *brw, struct gl_shader_program *shader_prog, const struct brw_gs_prog_key *key) { struct brw_cache_item *c = NULL; const struct brw_gs_prog_key *old_key = NULL; bool found = false; perf_debug("Recompiling geometry shader for program %d\n", shader_prog->Name); for (unsigned int i = 0; i < brw->cache.size; i++) { for (c = brw->cache.items[i]; c; c = c->next) { if (c->cache_id == BRW_CACHE_GS_PROG) { old_key = c->key; if (old_key->program_string_id == key->program_string_id) break; } } if (c) break; } if (!c) { perf_debug(" Didn't find previous compile in the shader cache for " "debug\n"); return; } found |= brw_debug_recompile_sampler_key(brw, &old_key->tex, &key->tex); if (!found) { perf_debug(" Something else\n"); } }
static void intel_miptree_copy_slice(struct intel_context *intel, struct intel_mipmap_tree *dst_mt, struct intel_mipmap_tree *src_mt, int level, int face, int depth) { mesa_format format = src_mt->format; uint32_t width = src_mt->level[level].width; uint32_t height = src_mt->level[level].height; int slice; if (face > 0) slice = face; else slice = depth; assert(depth < src_mt->level[level].depth); assert(src_mt->format == dst_mt->format); if (dst_mt->compressed) { height = ALIGN(height, dst_mt->align_h) / dst_mt->align_h; width = ALIGN(width, dst_mt->align_w); } uint32_t dst_x, dst_y, src_x, src_y; intel_miptree_get_image_offset(dst_mt, level, slice, &dst_x, &dst_y); intel_miptree_get_image_offset(src_mt, level, slice, &src_x, &src_y); DBG("validate blit mt %s %p %d,%d/%d -> mt %s %p %d,%d/%d (%dx%d)\n", _mesa_get_format_name(src_mt->format), src_mt, src_x, src_y, src_mt->region->pitch, _mesa_get_format_name(dst_mt->format), dst_mt, dst_x, dst_y, dst_mt->region->pitch, width, height); if (!intel_miptree_blit(intel, src_mt, level, slice, 0, 0, false, dst_mt, level, slice, 0, 0, false, width, height, GL_COPY)) { perf_debug("miptree validate blit for %s failed\n", _mesa_get_format_name(format)); intel_miptree_copy_slice_sw(intel, dst_mt, src_mt, level, slice, width, height); } }
int brw_bo_map_gtt(struct brw_context *brw, drm_intel_bo *bo, const char *bo_name) { if (likely(!brw->perf_debug) || !drm_intel_bo_busy(bo)) return drm_intel_gem_bo_map_gtt(bo); float start_time = get_time(); int ret = drm_intel_gem_bo_map_gtt(bo); perf_debug("GTT mapping a busy %s BO stalled and took %.03f ms.\n", bo_name, (get_time() - start_time) * 1000); return ret; }
/** * Determine if fast color clear supports the given clear color. * * Fast color clear can only clear to color values of 1.0 or 0.0. At the * moment we only support floating point, unorm, and snorm buffers. */ static bool is_color_fast_clear_compatible(struct brw_context *brw, gl_format format, const union gl_color_union *color) { if (_mesa_is_format_integer_color(format)) return false; for (int i = 0; i < 4; i++) { if (color->f[i] != 0.0 && color->f[i] != 1.0) { perf_debug("Clear color unsupported by fast color clear. " "Falling back to slow clear.\n"); return false; } } return true; }
void brw_draw_prims( struct gl_context *ctx, const struct _mesa_prim *prim, GLuint nr_prims, const struct _mesa_index_buffer *ib, GLboolean index_bounds_valid, GLuint min_index, GLuint max_index, struct gl_transform_feedback_object *tfb_vertcount ) { struct intel_context *intel = intel_context(ctx); const struct gl_client_array **arrays = ctx->Array._DrawArrays; if (!_mesa_check_conditional_render(ctx)) return; /* Handle primitive restart if needed */ if (brw_handle_primitive_restart(ctx, prim, nr_prims, ib)) { /* The draw was handled, so we can exit now */ return; } /* If we're going to have to upload any of the user's vertex arrays, then * get the minimum and maximum of their index buffer so we know what range * to upload. */ if (!vbo_all_varyings_in_vbos(arrays) && !index_bounds_valid) vbo_get_minmax_indices(ctx, prim, ib, &min_index, &max_index, nr_prims); /* Do GL_SELECT and GL_FEEDBACK rendering using swrast, even though it * won't support all the extensions we support. */ if (ctx->RenderMode != GL_RENDER) { perf_debug("%s render mode not supported in hardware\n", _mesa_lookup_enum_by_nr(ctx->RenderMode)); _swsetup_Wakeup(ctx); _tnl_wakeup(ctx); _tnl_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); return; } /* Try drawing with the hardware, but don't do anything else if we can't * manage it. swrast doesn't support our featureset, so we can't fall back * to it. */ brw_try_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); }
void intelReadPixels(struct gl_context * ctx, GLint x, GLint y, GLsizei width, GLsizei height, GLenum format, GLenum type, const struct gl_pixelstore_attrib *pack, GLvoid * pixels) { bool ok; struct brw_context *brw = brw_context(ctx); bool dirty; DBG("%s\n", __func__); /* Reading pixels wont dirty the front buffer, so reset the dirty * flag after calling intel_prepare_render(). */ dirty = brw->front_buffer_dirty; intel_prepare_render(brw); brw->front_buffer_dirty = dirty; if (_mesa_is_bufferobj(pack->BufferObj)) { if (intel_readpixels_blorp(ctx, x, y, width, height, format, type, pixels, pack)) return; perf_debug("%s: fallback to CPU mapping in PBO case\n", __func__); } ok = intel_readpixels_tiled_memcpy(ctx, x, y, width, height, format, type, pixels, pack); if(ok) return; /* Update Mesa state before calling _mesa_readpixels(). * XXX this may not be needed since ReadPixels no longer uses the * span code. */ if (ctx->NewState) _mesa_update_state(ctx); _mesa_readpixels(ctx, x, y, width, height, format, type, pack, pixels); /* There's an intel_prepare_render() call in intelSpanRenderStart(). */ brw->front_buffer_dirty = dirty; }
/** * Map a buffer object; issue performance warnings if mapping causes stalls. * * This matches the drm_intel_bo_map API, but takes an additional human-readable * name for the buffer object to use in the performance debug message. */ int brw_bo_map(struct brw_context *brw, drm_intel_bo *bo, int write_enable, const char *bo_name) { if (likely(!brw->perf_debug) || !drm_intel_bo_busy(bo)) return drm_intel_bo_map(bo, write_enable); double start_time = get_time(); int ret = drm_intel_bo_map(bo, write_enable); perf_debug("CPU mapping a busy %s BO stalled and took %.03f ms.\n", bo_name, (get_time() - start_time) * 1000); return ret; }
/** * Determine if fast color clear supports the given clear color. * * Fast color clear can only clear to color values of 1.0 or 0.0. At the * moment we only support floating point, unorm, and snorm buffers. */ bool brw_is_color_fast_clear_compatible(struct brw_context *brw, const struct intel_mipmap_tree *mt, const union gl_color_union *color) { const struct gen_device_info *devinfo = &brw->screen->devinfo; const struct gl_context *ctx = &brw->ctx; /* If we're mapping the render format to a different format than the * format we use for texturing then it is a bit questionable whether it * should be possible to use a fast clear. Although we only actually * render using a renderable format, without the override workaround it * wouldn't be possible to have a non-renderable surface in a fast clear * state so the hardware probably legitimately doesn't need to support * this case. At least on Gen9 this really does seem to cause problems. */ if (devinfo->gen >= 9 && brw_isl_format_for_mesa_format(mt->format) != brw->mesa_to_isl_render_format[mt->format]) return false; const mesa_format format = _mesa_get_render_format(ctx, mt->format); if (_mesa_is_format_integer_color(format)) { if (devinfo->gen >= 8) { perf_debug("Integer fast clear not enabled for (%s)", _mesa_get_format_name(format)); } return false; } for (int i = 0; i < 4; i++) { if (!_mesa_format_has_color_component(format, i)) { continue; } if (devinfo->gen < 9 && color->f[i] != 0.0f && color->f[i] != 1.0f) { return false; } } return true; }
static void intel_get_tex_sub_image(struct gl_context *ctx, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLint depth, GLenum format, GLenum type, GLvoid *pixels, struct gl_texture_image *texImage) { struct brw_context *brw = brw_context(ctx); bool ok; DBG("%s\n", __func__); if (_mesa_is_bufferobj(ctx->Pack.BufferObj)) { if (_mesa_meta_pbo_GetTexSubImage(ctx, 3, texImage, xoffset, yoffset, zoffset, width, height, depth, format, type, pixels, &ctx->Pack)) { /* Flush to guarantee coherency between the render cache and other * caches the PBO could potentially be bound to after this point. * See the related comment in intelReadPixels() for a more detailed * explanation. */ brw_emit_mi_flush(brw); return; } perf_debug("%s: fallback to CPU mapping in PBO case\n", __func__); } ok = intel_gettexsubimage_tiled_memcpy(ctx, texImage, xoffset, yoffset, width, height, format, type, pixels, &ctx->Pack); if(ok) return; _mesa_meta_GetTexSubImage(ctx, xoffset, yoffset, zoffset, width, height, depth, format, type, pixels, texImage); DBG("%s - DONE\n", __func__); }
void * intel_miptree_map_raw(struct intel_context *intel, struct intel_mipmap_tree *mt) { drm_intel_bo *bo = mt->region->bo; if (unlikely(INTEL_DEBUG & DEBUG_PERF)) { if (drm_intel_bo_busy(bo)) { perf_debug("Mapping a busy BO, causing a stall on the GPU.\n"); } } intel_flush(&intel->ctx); if (mt->region->tiling != I915_TILING_NONE) drm_intel_gem_bo_map_gtt(bo); else drm_intel_bo_map(bo, true); return bo->virtual; }
void intelReadPixels(struct gl_context * ctx, GLint x, GLint y, GLsizei width, GLsizei height, GLenum format, GLenum type, const struct gl_pixelstore_attrib *pack, GLvoid * pixels) { struct brw_context *brw = brw_context(ctx); bool dirty; DBG("%s\n", __FUNCTION__); if (_mesa_is_bufferobj(pack->BufferObj)) { /* Using PBOs, so try the BLT based path. */ if (do_blit_readpixels(ctx, x, y, width, height, format, type, pack, pixels)) { return; } perf_debug("%s: fallback to CPU mapping in PBO case\n", __FUNCTION__); } /* glReadPixels() wont dirty the front buffer, so reset the dirty * flag after calling intel_prepare_render(). */ dirty = brw->front_buffer_dirty; intel_prepare_render(brw); brw->front_buffer_dirty = dirty; /* Update Mesa state before calling _mesa_readpixels(). * XXX this may not be needed since ReadPixels no longer uses the * span code. */ if (ctx->NewState) _mesa_update_state(ctx); _mesa_readpixels(ctx, x, y, width, height, format, type, pack, pixels); /* There's an intel_prepare_render() call in intelSpanRenderStart(). */ brw->front_buffer_dirty = dirty; }
static void vc4_clear(struct pipe_context *pctx, unsigned buffers, const union pipe_color_union *color, double depth, unsigned stencil) { struct vc4_context *vc4 = vc4_context(pctx); /* We can't flag new buffers for clearing once we've queued draws. We * could avoid this by using the 3d engine to clear. */ if (vc4->draw_calls_queued) { perf_debug("Flushing rendering to process new clear.\n"); vc4_flush(pctx); } if (buffers & PIPE_CLEAR_COLOR0) { vc4->clear_color[0] = vc4->clear_color[1] = pack_rgba(vc4->framebuffer.cbufs[0]->format, color->f); } if (buffers & PIPE_CLEAR_DEPTH) { /* Though the depth buffer is stored with Z in the high 24, * for this field we just need to store it in the low 24. */ vc4->clear_depth = util_pack_z(PIPE_FORMAT_Z24X8_UNORM, depth); } if (buffers & PIPE_CLEAR_STENCIL) vc4->clear_stencil = stencil; vc4->draw_min_x = 0; vc4->draw_min_y = 0; vc4->draw_max_x = vc4->framebuffer.width; vc4->draw_max_y = vc4->framebuffer.height; vc4->cleared |= buffers; vc4->resolve |= buffers; vc4_start_draw(vc4); }