Ejemplo n.º 1
0
void
brw_vs_debug_recompile(struct brw_context *brw,
                       struct gl_shader_program *prog,
                       const struct brw_vs_prog_key *key)
{
   struct brw_cache_item *c = NULL;
   const struct brw_vs_prog_key *old_key = NULL;
   bool found = false;

   perf_debug("Recompiling vertex shader for program %d\n", prog->Name);

   for (unsigned int i = 0; i < brw->cache.size; i++) {
      for (c = brw->cache.items[i]; c; c = c->next) {
         if (c->cache_id == BRW_VS_PROG) {
            old_key = c->key;

            if (old_key->base.program_string_id == key->base.program_string_id)
               break;
         }
      }
      if (c)
         break;
   }

   if (!c) {
      perf_debug("  Didn't find previous compile in the shader cache for "
                 "debug\n");
      return;
   }

   for (unsigned int i = 0; i < VERT_ATTRIB_MAX; i++) {
      found |= key_debug(brw, "Vertex attrib w/a flags",
                         old_key->gl_attrib_wa_flags[i],
                         key->gl_attrib_wa_flags[i]);
   }

   found |= key_debug(brw, "user clip flags",
                      old_key->base.userclip_active, key->base.userclip_active);

   found |= key_debug(brw, "user clipping planes as push constants",
                      old_key->base.nr_userclip_plane_consts,
                      key->base.nr_userclip_plane_consts);

   found |= key_debug(brw, "clip distance enable",
                      old_key->base.uses_clip_distance, key->base.uses_clip_distance);
   found |= key_debug(brw, "copy edgeflag",
                      old_key->copy_edgeflag, key->copy_edgeflag);
   found |= key_debug(brw, "PointCoord replace",
                      old_key->point_coord_replace, key->point_coord_replace);
   found |= key_debug(brw, "vertex color clamping",
                      old_key->base.clamp_vertex_color, key->base.clamp_vertex_color);

   found |= brw_debug_recompile_sampler_key(brw, &old_key->base.tex,
                                            &key->base.tex);

   if (!found) {
      perf_debug("  Something else\n");
   }
}
Ejemplo n.º 2
0
static bool
intel_set_texture_storage_for_buffer_object(struct gl_context *ctx,
                                            struct gl_texture_object *tex_obj,
                                            struct gl_buffer_object *buffer_obj,
                                            uint32_t buffer_offset,
                                            uint32_t row_stride,
                                            bool read_only)
{
   struct brw_context *brw = brw_context(ctx);
   struct intel_texture_object *intel_texobj = intel_texture_object(tex_obj);
   struct gl_texture_image *image = tex_obj->Image[0][0];
   struct intel_texture_image *intel_image = intel_texture_image(image);
   struct intel_buffer_object *intel_buffer_obj = intel_buffer_object(buffer_obj);

   if (!read_only) {
      /* Renderbuffers have the restriction that the buffer offset and
       * surface pitch must be a multiple of the element size.  If it's
       * not, we have to fail and fall back to software.
       */
      int cpp = _mesa_get_format_bytes(image->TexFormat);
      if (buffer_offset % cpp || row_stride % cpp) {
         perf_debug("Bad PBO alignment; fallback to CPU mapping\n");
         return false;
      }

      if (!brw->format_supported_as_render_target[image->TexFormat]) {
         perf_debug("Non-renderable PBO format; fallback to CPU mapping\n");
         return false;
      }
   }

   assert(intel_texobj->mt == NULL);

   drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_buffer_obj,
                                             buffer_offset,
                                             row_stride * image->Height);
   intel_texobj->mt =
      intel_miptree_create_for_bo(brw, bo,
                                  image->TexFormat,
                                  buffer_offset,
                                  image->Width, image->Height, image->Depth,
                                  row_stride,
                                  0);
   if (!intel_texobj->mt)
      return false;

   if (!_swrast_init_texture_image(image))
      return false;

   intel_miptree_reference(&intel_image->mt, intel_texobj->mt);

   /* The miptree is in a validated state, so no need to check later. */
   intel_texobj->needs_validate = false;
   intel_texobj->validated_first_level = 0;
   intel_texobj->validated_last_level = 0;
   intel_texobj->_Format = intel_texobj->mt->format;

   return true;
}
Ejemplo n.º 3
0
void brw_draw_prims( struct gl_context *ctx,
		     const struct _mesa_prim *prims,
		     GLuint nr_prims,
		     const struct _mesa_index_buffer *ib,
		     GLboolean index_bounds_valid,
		     GLuint min_index,
		     GLuint max_index,
		     struct gl_transform_feedback_object *unused_tfb_object,
		     struct gl_buffer_object *indirect )
{
   struct brw_context *brw = brw_context(ctx);
   const struct gl_client_array **arrays = ctx->Array._DrawArrays;

   assert(unused_tfb_object == NULL);

   if (ctx->Query.CondRenderQuery) {
      perf_debug("Conditional rendering is implemented in software and may "
                 "stall.  This should be fixed in the driver.\n");
   }

   if (!_mesa_check_conditional_render(ctx))
      return;

   /* Handle primitive restart if needed */
   if (brw_handle_primitive_restart(ctx, prims, nr_prims, ib, indirect)) {
      /* The draw was handled, so we can exit now */
      return;
   }

   /* Do GL_SELECT and GL_FEEDBACK rendering using swrast, even though it
    * won't support all the extensions we support.
    */
   if (ctx->RenderMode != GL_RENDER) {
      perf_debug("%s render mode not supported in hardware\n",
                 _mesa_lookup_enum_by_nr(ctx->RenderMode));
      _swsetup_Wakeup(ctx);
      _tnl_wakeup(ctx);
      _tnl_draw_prims(ctx, prims, nr_prims, ib,
                      index_bounds_valid, min_index, max_index, NULL, NULL);
      return;
   }

   /* If we're going to have to upload any of the user's vertex arrays, then
    * get the minimum and maximum of their index buffer so we know what range
    * to upload.
    */
   if (!index_bounds_valid && !vbo_all_varyings_in_vbos(arrays)) {
      perf_debug("Scanning index buffer to compute index buffer bounds.  "
                 "Use glDrawRangeElements() to avoid this.\n");
      vbo_get_minmax_indices(ctx, prims, ib, &min_index, &max_index, nr_prims);
   }

   /* Try drawing with the hardware, but don't do anything else if we can't
    * manage it.  swrast doesn't support our featureset, so we can't fall back
    * to it.
    */
   brw_try_draw_prims(ctx, arrays, prims, nr_prims, ib, min_index, max_index, indirect);
}
Ejemplo n.º 4
0
void
brw_wm_debug_recompile(struct brw_context *brw,
                       struct gl_shader_program *prog,
                       const struct brw_wm_prog_key *key)
{
   struct brw_cache_item *c = NULL;
   const struct brw_wm_prog_key *old_key = NULL;
   bool found = false;

   perf_debug("Recompiling fragment shader for program %d\n", prog->Name);

   for (unsigned int i = 0; i < brw->cache.size; i++) {
      for (c = brw->cache.items[i]; c; c = c->next) {
         if (c->cache_id == BRW_WM_PROG) {
            old_key = c->key;

            if (old_key->program_string_id == key->program_string_id)
               break;
         }
      }
      if (c)
         break;
   }

   if (!c) {
      perf_debug("  Didn't find previous compile in the shader cache for debug\n");
      return;
   }

   found |= key_debug(brw, "alphatest, computed depth, depth test, or "
                      "depth write",
                      old_key->iz_lookup, key->iz_lookup);
   found |= key_debug(brw, "depth statistics",
                      old_key->stats_wm, key->stats_wm);
   found |= key_debug(brw, "flat shading",
                      old_key->flat_shade, key->flat_shade);
   found |= key_debug(brw, "number of color buffers",
                      old_key->nr_color_regions, key->nr_color_regions);
   found |= key_debug(brw, "MRT alpha test or alpha-to-coverage",
                      old_key->replicate_alpha, key->replicate_alpha);
   found |= key_debug(brw, "rendering to FBO",
                      old_key->render_to_fbo, key->render_to_fbo);
   found |= key_debug(brw, "fragment color clamping",
                      old_key->clamp_fragment_color, key->clamp_fragment_color);
   found |= key_debug(brw, "line smoothing",
                      old_key->line_aa, key->line_aa);
   found |= key_debug(brw, "renderbuffer height",
                      old_key->drawable_height, key->drawable_height);
   found |= key_debug(brw, "input slots valid",
                      old_key->input_slots_valid, key->input_slots_valid);

   found |= brw_debug_recompile_sampler_key(brw, &old_key->tex, &key->tex);

   if (!found) {
      perf_debug("  Something else\n");
   }
}
Ejemplo n.º 5
0
/**
 * Determine if fast color clear supports the given clear color.
 *
 * Fast color clear can only clear to color values of 1.0 or 0.0.  At the
 * moment we only support floating point, unorm, and snorm buffers.
 */
static bool
is_color_fast_clear_compatible(struct brw_context *brw,
                               mesa_format format,
                               const union gl_color_union *color)
{
   if (_mesa_is_format_integer_color(format)) {
      if (brw->gen >= 8) {
         perf_debug("Integer fast clear not enabled for (%s)",
                    _mesa_get_format_name(format));
      }
      return false;
   }

   for (int i = 0; i < 4; i++) {
      if (!_mesa_format_has_color_component(format, i)) {
         continue;
      }

      if (brw->gen < 9 &&
          color->f[i] != 0.0f && color->f[i] != 1.0f) {
         return false;
      }
   }
   return true;
}
Ejemplo n.º 6
0
/**
 * Replace data in a subrange of buffer object.  If the data range
 * specified by size + offset extends beyond the end of the buffer or
 * if data is NULL, no copy is performed.
 * Called via glBufferSubDataARB().
 */
static void
intel_bufferobj_subdata(struct gl_context * ctx,
                        GLintptrARB offset,
                        GLsizeiptrARB size,
                        const GLvoid * data, struct gl_buffer_object *obj)
{
   struct intel_context *intel = intel_context(ctx);
   struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
   bool busy;

   if (size == 0)
      return;

   assert(intel_obj);

   /* If we have a single copy in system memory, update that */
   if (intel_obj->sys_buffer) {
      if (intel_obj->source)
	 release_buffer(intel_obj);

      if (intel_obj->buffer == NULL) {
	 memcpy((char *)intel_obj->sys_buffer + offset, data, size);
	 return;
      }

      free(intel_obj->sys_buffer);
      intel_obj->sys_buffer = NULL;
   }

   /* Otherwise we need to update the copy in video memory. */
   busy =
      drm_intel_bo_busy(intel_obj->buffer) ||
      drm_intel_bo_references(intel->batch.bo, intel_obj->buffer);

   if (busy) {
      if (size == intel_obj->Base.Size) {
	 /* Replace the current busy bo with fresh data. */
	 drm_intel_bo_unreference(intel_obj->buffer);
	 intel_bufferobj_alloc_buffer(intel, intel_obj);
	 drm_intel_bo_subdata(intel_obj->buffer, 0, size, data);
      } else {
         perf_debug("Using a blit copy to avoid stalling on %ldb "
                    "glBufferSubData() to a busy buffer object.\n",
                    (long)size);
	 drm_intel_bo *temp_bo =
	    drm_intel_bo_alloc(intel->bufmgr, "subdata temp", size, 64);

	 drm_intel_bo_subdata(temp_bo, 0, size, data);

	 intel_emit_linear_blit(intel,
				intel_obj->buffer, offset,
				temp_bo, 0,
				size);

	 drm_intel_bo_unreference(temp_bo);
      }
   } else {
      drm_intel_bo_subdata(intel_obj->buffer, offset, size, data);
   }
}
Ejemplo n.º 7
0
static void
intelCopyTexSubImage(struct gl_context *ctx, GLuint dims,
                     struct gl_texture_image *texImage,
                     GLint xoffset, GLint yoffset, GLint slice,
                     struct gl_renderbuffer *rb,
                     GLint x, GLint y,
                     GLsizei width, GLsizei height)
{
   struct brw_context *brw = brw_context(ctx);

   /* Try BLORP first.  It can handle almost everything. */
   if (brw_blorp_copytexsubimage(brw, rb, texImage, slice, x, y,
                                 xoffset, yoffset, width, height))
      return;

   /* Next, try the BLT engine. */
   if (intel_copy_texsubimage(brw,
                              intel_texture_image(texImage),
                              xoffset, yoffset, slice,
                              intel_renderbuffer(rb), x, y, width, height)) {
      return;
   }

   /* Finally, fall back to meta.  This will likely be slow. */
   perf_debug("%s - fallback to swrast\n", __func__);
   _mesa_meta_CopyTexSubImage(ctx, dims, texImage,
                              xoffset, yoffset, slice,
                              rb, x, y, width, height);
}
/**
 * \brief Helper function for intel_miptree_create().
 */
static uint32_t
intel_miptree_choose_tiling(struct intel_context *intel,
                            mesa_format format,
                            uint32_t width0,
                            enum intel_miptree_tiling_mode requested,
                            struct intel_mipmap_tree *mt)
{
    /* Some usages may want only one type of tiling, like depth miptrees (Y
     * tiled), or temporary BOs for uploading data once (linear).
     */
    switch (requested) {
    case INTEL_MIPTREE_TILING_ANY:
        break;
    case INTEL_MIPTREE_TILING_Y:
        return I915_TILING_Y;
    case INTEL_MIPTREE_TILING_NONE:
        return I915_TILING_NONE;
    }

    int minimum_pitch = mt->total_width * mt->cpp;

    /* If the width is much smaller than a tile, don't bother tiling. */
    if (minimum_pitch < 64)
        return I915_TILING_NONE;

    if (ALIGN(minimum_pitch, 512) >= 32768) {
        perf_debug("%dx%d miptree too large to blit, falling back to untiled",
                   mt->total_width, mt->total_height);
        return I915_TILING_NONE;
    }

    /* We don't have BLORP to handle Y-tiled blits, so use X-tiling. */
    return I915_TILING_X;
}
void
intelDrawPixels(struct gl_context * ctx,
                GLint x, GLint y,
                GLsizei width, GLsizei height,
                GLenum format,
                GLenum type,
                const struct gl_pixelstore_attrib *unpack,
                const GLvoid * pixels)
{
   struct brw_context *brw = brw_context(ctx);

   if (!_mesa_check_conditional_render(ctx))
      return;

   if (format == GL_STENCIL_INDEX) {
      _swrast_DrawPixels(ctx, x, y, width, height, format, type,
                         unpack, pixels);
      return;
   }

   if (_mesa_is_bufferobj(unpack->BufferObj)) {
      if (do_blit_drawpixels(ctx, x, y, width, height, format, type, unpack,
			     pixels)) {
	 return;
      }

      perf_debug("%s: fallback to generic code in PBO case\n", __FUNCTION__);
   }

   _mesa_meta_DrawPixels(ctx, x, y, width, height, format, type,
                         unpack, pixels);
}
Ejemplo n.º 10
0
static void
brw_tcs_debug_recompile(struct brw_context *brw,
                       struct gl_shader_program *shader_prog,
                       const struct brw_tcs_prog_key *key)
{
   struct brw_cache_item *c = NULL;
   const struct brw_tcs_prog_key *old_key = NULL;
   bool found = false;

   perf_debug("Recompiling tessellation control shader for program %d\n",
              shader_prog->Name);

   for (unsigned int i = 0; i < brw->cache.size; i++) {
      for (c = brw->cache.items[i]; c; c = c->next) {
         if (c->cache_id == BRW_CACHE_TCS_PROG) {
            old_key = c->key;

            if (old_key->program_string_id == key->program_string_id)
               break;
         }
      }
      if (c)
         break;
   }

   if (!c) {
      perf_debug("  Didn't find previous compile in the shader cache for "
                 "debug\n");
      return;
   }

   found |= key_debug(brw, "input vertices", old_key->input_vertices,
                      key->input_vertices);
   found |= key_debug(brw, "outputs written", old_key->outputs_written,
                      key->outputs_written);
   found |= key_debug(brw, "patch outputs written", old_key->patch_outputs_written,
                      key->patch_outputs_written);
   found |= key_debug(brw, "TES primitive mode", old_key->tes_primitive_mode,
                      key->tes_primitive_mode);
   found |= key_debug(brw, "quads and equal_spacing workaround",
                      old_key->quads_workaround, key->quads_workaround);
   found |= brw_debug_recompile_sampler_key(brw, &old_key->tex, &key->tex);

   if (!found) {
      perf_debug("  Something else\n");
   }
}
Ejemplo n.º 11
0
static void
gen8_upload_3dstate_so_buffers(struct brw_context *brw)
{
   struct gl_context *ctx = &brw->ctx;
   /* BRW_NEW_TRANSFORM_FEEDBACK */
   struct gl_transform_feedback_object *xfb_obj =
      ctx->TransformFeedback.CurrentObject;
   struct brw_transform_feedback_object *brw_obj =
      (struct brw_transform_feedback_object *) xfb_obj;

   /* Set up the up to 4 output buffers.  These are the ranges defined in the
    * gl_transform_feedback_object.
    */
   for (int i = 0; i < 4; i++) {
      struct intel_buffer_object *bufferobj =
         intel_buffer_object(xfb_obj->Buffers[i]);

      if (!bufferobj) {
         BEGIN_BATCH(8);
         OUT_BATCH(_3DSTATE_SO_BUFFER << 16 | (8 - 2));
         OUT_BATCH((i << SO_BUFFER_INDEX_SHIFT));
         OUT_BATCH(0);
         OUT_BATCH(0);
         OUT_BATCH(0);
         OUT_BATCH(0);
         OUT_BATCH(0);
         OUT_BATCH(0);
         ADVANCE_BATCH();
         continue;
      }

      uint32_t start = xfb_obj->Offset[i];
      assert(start % 4 == 0);
      uint32_t end = ALIGN(start + xfb_obj->Size[i], 4);
      drm_intel_bo *bo =
         intel_bufferobj_buffer(brw, bufferobj, start, end - start);
      assert(end <= bo->size);

      perf_debug("Missing MOCS setup for 3DSTATE_SO_BUFFER.");

      BEGIN_BATCH(8);
      OUT_BATCH(_3DSTATE_SO_BUFFER << 16 | (8 - 2));
      OUT_BATCH(GEN8_SO_BUFFER_ENABLE | (i << SO_BUFFER_INDEX_SHIFT) |
                GEN8_SO_BUFFER_OFFSET_WRITE_ENABLE |
                GEN8_SO_BUFFER_OFFSET_ADDRESS_ENABLE |
                (BDW_MOCS_WB << 22));
      OUT_RELOC64(bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, start);
      OUT_BATCH(xfb_obj->Size[i] / 4 - 1);
      OUT_RELOC64(brw_obj->offset_bo,
                  I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
                  i * sizeof(uint32_t));
      if (brw_obj->zero_offsets)
         OUT_BATCH(0); /* Zero out the offset and write that to offset_bo */
      else
         OUT_BATCH(0xFFFFFFFF); /* Use offset_bo as the "Stream Offset." */
      ADVANCE_BATCH();
   }
   brw_obj->zero_offsets = false;
}
Ejemplo n.º 12
0
static bool
key_debug(struct brw_context *brw, const char *name, int a, int b)
{
   if (a != b) {
      perf_debug("  %s %d->%d\n", name, a, b);
      return true;
   }
   return false;
}
Ejemplo n.º 13
0
static bool
key_debug(const char *name, int a, int b)
{
   if (a != b) {
      perf_debug("  %s %d->%d\n", name, a, b);
      return true;
   } else {
      return false;
   }
}
Ejemplo n.º 14
0
/**
 * HW-2116 workaround: Flush the batch before triggering the hardware state
 * counter wraparound behavior.
 *
 * State updates are tracked by a global counter which increments at the first
 * state update after a draw or a START_BINNING.  Tiles can then have their
 * state updated at draw time with a set of cheap checks for whether the
 * state's copy of the global counter matches the global counter the last time
 * that state was written to the tile.
 *
 * The state counters are relatively small and wrap around quickly, so you
 * could get false negatives for needing to update a particular state in the
 * tile.  To avoid this, the hardware attempts to write all of the state in
 * the tile at wraparound time.  This apparently is broken, so we just flush
 * everything before that behavior is triggered.  A batch flush is sufficient
 * to get our current contents drawn and reset the counters to 0.
 *
 * Note that we can't just use VC4_PACKET_FLUSH_ALL, because that caps the
 * tiles with VC4_PACKET_RETURN_FROM_LIST.
 */
static void
vc4_hw_2116_workaround(struct pipe_context *pctx)
{
        struct vc4_context *vc4 = vc4_context(pctx);

        if (vc4->draw_calls_queued == 0x1ef0) {
                perf_debug("Flushing batch due to HW-2116 workaround "
                           "(too many draw calls per scene\n");
                vc4_flush(pctx);
        }
}
bool
brw_check_conditional_render(struct brw_context *brw)
{
   if (brw->predicate.state == BRW_PREDICATE_STATE_STALL_FOR_QUERY) {
      perf_debug("Conditional rendering is implemented in software and may "
                 "stall.\n");
      return _mesa_check_conditional_render(&brw->ctx);
   }

   return brw->predicate.state != BRW_PREDICATE_STATE_DONT_RENDER;
}
Ejemplo n.º 16
0
/**
 * HW-2116 workaround: Flush the batch before triggering the hardware state
 * counter wraparound behavior.
 *
 * State updates are tracked by a global counter which increments at the first
 * state update after a draw or a START_BINNING.  Tiles can then have their
 * state updated at draw time with a set of cheap checks for whether the
 * state's copy of the global counter matches the global counter the last time
 * that state was written to the tile.
 *
 * The state counters are relatively small and wrap around quickly, so you
 * could get false negatives for needing to update a particular state in the
 * tile.  To avoid this, the hardware attempts to write all of the state in
 * the tile at wraparound time.  This apparently is broken, so we just flush
 * everything before that behavior is triggered.  A batch flush is sufficient
 * to get our current contents drawn and reset the counters to 0.
 *
 * Note that we can't just use VC4_PACKET_FLUSH_ALL, because that caps the
 * tiles with VC4_PACKET_RETURN_FROM_LIST.
 */
static void
vc4_hw_2116_workaround(struct pipe_context *pctx, int vert_count)
{
        struct vc4_context *vc4 = vc4_context(pctx);
        struct vc4_job *job = vc4_get_job_for_fbo(vc4);

        if (job->draw_calls_queued + vert_count / 65535 >= VC4_HW_2116_COUNT) {
                perf_debug("Flushing batch due to HW-2116 workaround "
                           "(too many draw calls per scene\n");
                vc4_job_submit(vc4, job);
        }
}
Ejemplo n.º 17
0
bool
brw_wm_do_compile(struct brw_context *brw,
                  struct brw_wm_compile *c)
{
   struct gl_context *ctx = &brw->ctx;
   struct gl_shader *fs = NULL;

   if (c->shader_prog)
      fs = c->shader_prog->_LinkedShaders[MESA_SHADER_FRAGMENT];

   /* Allocate the references to the uniforms that will end up in the
    * prog_data associated with the compiled program, and which will be freed
    * by the state cache.
    */
   int param_count;
   if (fs) {
      param_count = fs->num_uniform_components;
   } else {
      param_count = c->fp->program.Base.Parameters->NumParameters * 4;
   }
   /* The backend also sometimes adds params for texture size. */
   param_count += 2 * ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits;
   c->prog_data.base.param = rzalloc_array(NULL, const float *, param_count);
   c->prog_data.base.pull_param =
      rzalloc_array(NULL, const float *, param_count);
   c->prog_data.base.nr_params = param_count;

   c->prog_data.barycentric_interp_modes =
      brw_compute_barycentric_interp_modes(brw, c->key.flat_shade,
                                           c->key.persample_shading,
                                           &c->fp->program);

   c->program = brw_wm_fs_emit(brw, c,
         &c->fp->program, c->shader_prog, &c->program_size);
   if (c->program == NULL)
      return false;

   /* Scratch space is used for register spilling */
   if (c->last_scratch) {
      perf_debug("Fragment shader triggered register spilling.  "
                 "Try reducing the number of live scalar values to "
                 "improve performance.\n");

      c->prog_data.total_scratch = brw_get_scratch_size(c->last_scratch);
   }

   if (unlikely(INTEL_DEBUG & DEBUG_WM))
      fprintf(stderr, "\n");

   return true;
}
Ejemplo n.º 18
0
static void
brw_gs_debug_recompile(struct brw_context *brw, struct gl_program *prog,
                       const struct brw_gs_prog_key *key)
{
   perf_debug("Recompiling geometry shader for program %d\n", prog->Id);

   bool found = false;
   const struct brw_gs_prog_key *old_key =
      brw_find_previous_compile(&brw->cache, BRW_CACHE_GS_PROG,
                                key->program_string_id);

   if (!old_key) {
      perf_debug("  Didn't find previous compile in the shader cache for "
                 "debug\n");
      return;
   }

   found |= brw_debug_recompile_sampler_key(brw, &old_key->tex, &key->tex);

   if (!found) {
      perf_debug("  Something else\n");
   }
}
Ejemplo n.º 19
0
static void
brw_gs_debug_recompile(struct brw_context *brw,
                       struct gl_shader_program *shader_prog,
                       const struct brw_gs_prog_key *key)
{
   struct brw_cache_item *c = NULL;
   const struct brw_gs_prog_key *old_key = NULL;
   bool found = false;

   perf_debug("Recompiling geometry shader for program %d\n",
              shader_prog->Name);

   for (unsigned int i = 0; i < brw->cache.size; i++) {
      for (c = brw->cache.items[i]; c; c = c->next) {
         if (c->cache_id == BRW_CACHE_GS_PROG) {
            old_key = c->key;

            if (old_key->program_string_id == key->program_string_id)
               break;
         }
      }
      if (c)
         break;
   }

   if (!c) {
      perf_debug("  Didn't find previous compile in the shader cache for "
                 "debug\n");
      return;
   }

   found |= brw_debug_recompile_sampler_key(brw, &old_key->tex, &key->tex);

   if (!found) {
      perf_debug("  Something else\n");
   }
}
Ejemplo n.º 20
0
static void
intel_miptree_copy_slice(struct intel_context *intel,
                         struct intel_mipmap_tree *dst_mt,
                         struct intel_mipmap_tree *src_mt,
                         int level,
                         int face,
                         int depth)

{
    mesa_format format = src_mt->format;
    uint32_t width = src_mt->level[level].width;
    uint32_t height = src_mt->level[level].height;
    int slice;

    if (face > 0)
        slice = face;
    else
        slice = depth;

    assert(depth < src_mt->level[level].depth);
    assert(src_mt->format == dst_mt->format);

    if (dst_mt->compressed) {
        height = ALIGN(height, dst_mt->align_h) / dst_mt->align_h;
        width = ALIGN(width, dst_mt->align_w);
    }

    uint32_t dst_x, dst_y, src_x, src_y;
    intel_miptree_get_image_offset(dst_mt, level, slice, &dst_x, &dst_y);
    intel_miptree_get_image_offset(src_mt, level, slice, &src_x, &src_y);

    DBG("validate blit mt %s %p %d,%d/%d -> mt %s %p %d,%d/%d (%dx%d)\n",
        _mesa_get_format_name(src_mt->format),
        src_mt, src_x, src_y, src_mt->region->pitch,
        _mesa_get_format_name(dst_mt->format),
        dst_mt, dst_x, dst_y, dst_mt->region->pitch,
        width, height);

    if (!intel_miptree_blit(intel,
                            src_mt, level, slice, 0, 0, false,
                            dst_mt, level, slice, 0, 0, false,
                            width, height, GL_COPY)) {
        perf_debug("miptree validate blit for %s failed\n",
                   _mesa_get_format_name(format));

        intel_miptree_copy_slice_sw(intel, dst_mt, src_mt, level, slice,
                                    width, height);
    }
}
Ejemplo n.º 21
0
int
brw_bo_map_gtt(struct brw_context *brw, drm_intel_bo *bo, const char *bo_name)
{
   if (likely(!brw->perf_debug) || !drm_intel_bo_busy(bo))
      return drm_intel_gem_bo_map_gtt(bo);

   float start_time = get_time();

   int ret = drm_intel_gem_bo_map_gtt(bo);

   perf_debug("GTT mapping a busy %s BO stalled and took %.03f ms.\n",
              bo_name, (get_time() - start_time) * 1000);

   return ret;
}
Ejemplo n.º 22
0
/**
 * Determine if fast color clear supports the given clear color.
 *
 * Fast color clear can only clear to color values of 1.0 or 0.0.  At the
 * moment we only support floating point, unorm, and snorm buffers.
 */
static bool
is_color_fast_clear_compatible(struct brw_context *brw,
                               gl_format format,
                               const union gl_color_union *color)
{
   if (_mesa_is_format_integer_color(format))
      return false;

   for (int i = 0; i < 4; i++) {
      if (color->f[i] != 0.0 && color->f[i] != 1.0) {
         perf_debug("Clear color unsupported by fast color clear.  "
                    "Falling back to slow clear.\n");
         return false;
      }
   }
   return true;
}
Ejemplo n.º 23
0
void brw_draw_prims( struct gl_context *ctx,
		     const struct _mesa_prim *prim,
		     GLuint nr_prims,
		     const struct _mesa_index_buffer *ib,
		     GLboolean index_bounds_valid,
		     GLuint min_index,
		     GLuint max_index,
		     struct gl_transform_feedback_object *tfb_vertcount )
{
   struct intel_context *intel = intel_context(ctx);
   const struct gl_client_array **arrays = ctx->Array._DrawArrays;

   if (!_mesa_check_conditional_render(ctx))
      return;

   /* Handle primitive restart if needed */
   if (brw_handle_primitive_restart(ctx, prim, nr_prims, ib)) {
      /* The draw was handled, so we can exit now */
      return;
   }

   /* If we're going to have to upload any of the user's vertex arrays, then
    * get the minimum and maximum of their index buffer so we know what range
    * to upload.
    */
   if (!vbo_all_varyings_in_vbos(arrays) && !index_bounds_valid)
      vbo_get_minmax_indices(ctx, prim, ib, &min_index, &max_index, nr_prims);

   /* Do GL_SELECT and GL_FEEDBACK rendering using swrast, even though it
    * won't support all the extensions we support.
    */
   if (ctx->RenderMode != GL_RENDER) {
      perf_debug("%s render mode not supported in hardware\n",
                 _mesa_lookup_enum_by_nr(ctx->RenderMode));
      _swsetup_Wakeup(ctx);
      _tnl_wakeup(ctx);
      _tnl_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index);
      return;
   }

   /* Try drawing with the hardware, but don't do anything else if we can't
    * manage it.  swrast doesn't support our featureset, so we can't fall back
    * to it.
    */
   brw_try_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index);
}
Ejemplo n.º 24
0
void
intelReadPixels(struct gl_context * ctx,
                GLint x, GLint y, GLsizei width, GLsizei height,
                GLenum format, GLenum type,
                const struct gl_pixelstore_attrib *pack, GLvoid * pixels)
{
   bool ok;

   struct brw_context *brw = brw_context(ctx);
   bool dirty;

   DBG("%s\n", __func__);

   /* Reading pixels wont dirty the front buffer, so reset the dirty
    * flag after calling intel_prepare_render().
    */
   dirty = brw->front_buffer_dirty;
   intel_prepare_render(brw);
   brw->front_buffer_dirty = dirty;

   if (_mesa_is_bufferobj(pack->BufferObj)) {
      if (intel_readpixels_blorp(ctx, x, y, width, height,
                                 format, type, pixels, pack))
         return;

      perf_debug("%s: fallback to CPU mapping in PBO case\n", __func__);
   }

   ok = intel_readpixels_tiled_memcpy(ctx, x, y, width, height,
                                      format, type, pixels, pack);
   if(ok)
      return;

   /* Update Mesa state before calling _mesa_readpixels().
    * XXX this may not be needed since ReadPixels no longer uses the
    * span code.
    */

   if (ctx->NewState)
      _mesa_update_state(ctx);

   _mesa_readpixels(ctx, x, y, width, height, format, type, pack, pixels);

   /* There's an intel_prepare_render() call in intelSpanRenderStart(). */
   brw->front_buffer_dirty = dirty;
}
Ejemplo n.º 25
0
/**
 * Map a buffer object; issue performance warnings if mapping causes stalls.
 *
 * This matches the drm_intel_bo_map API, but takes an additional human-readable
 * name for the buffer object to use in the performance debug message.
 */
int
brw_bo_map(struct brw_context *brw,
           drm_intel_bo *bo, int write_enable,
           const char *bo_name)
{
   if (likely(!brw->perf_debug) || !drm_intel_bo_busy(bo))
      return drm_intel_bo_map(bo, write_enable);

   double start_time = get_time();

   int ret = drm_intel_bo_map(bo, write_enable);

   perf_debug("CPU mapping a busy %s BO stalled and took %.03f ms.\n",
              bo_name, (get_time() - start_time) * 1000);

   return ret;
}
/**
 * Determine if fast color clear supports the given clear color.
 *
 * Fast color clear can only clear to color values of 1.0 or 0.0.  At the
 * moment we only support floating point, unorm, and snorm buffers.
 */
bool
brw_is_color_fast_clear_compatible(struct brw_context *brw,
                                   const struct intel_mipmap_tree *mt,
                                   const union gl_color_union *color)
{
   const struct gen_device_info *devinfo = &brw->screen->devinfo;
   const struct gl_context *ctx = &brw->ctx;

   /* If we're mapping the render format to a different format than the
    * format we use for texturing then it is a bit questionable whether it
    * should be possible to use a fast clear. Although we only actually
    * render using a renderable format, without the override workaround it
    * wouldn't be possible to have a non-renderable surface in a fast clear
    * state so the hardware probably legitimately doesn't need to support
    * this case. At least on Gen9 this really does seem to cause problems.
    */
   if (devinfo->gen >= 9 &&
       brw_isl_format_for_mesa_format(mt->format) !=
       brw->mesa_to_isl_render_format[mt->format])
      return false;

   const mesa_format format = _mesa_get_render_format(ctx, mt->format);
   if (_mesa_is_format_integer_color(format)) {
      if (devinfo->gen >= 8) {
         perf_debug("Integer fast clear not enabled for (%s)",
                    _mesa_get_format_name(format));
      }
      return false;
   }

   for (int i = 0; i < 4; i++) {
      if (!_mesa_format_has_color_component(format, i)) {
         continue;
      }

      if (devinfo->gen < 9 &&
          color->f[i] != 0.0f && color->f[i] != 1.0f) {
         return false;
      }
   }
   return true;
}
Ejemplo n.º 27
0
static void
intel_get_tex_sub_image(struct gl_context *ctx,
                        GLint xoffset, GLint yoffset, GLint zoffset,
                        GLsizei width, GLsizei height, GLint depth,
                        GLenum format, GLenum type, GLvoid *pixels,
                        struct gl_texture_image *texImage)
{
   struct brw_context *brw = brw_context(ctx);
   bool ok;

   DBG("%s\n", __func__);

   if (_mesa_is_bufferobj(ctx->Pack.BufferObj)) {
      if (_mesa_meta_pbo_GetTexSubImage(ctx, 3, texImage,
                                        xoffset, yoffset, zoffset,
                                        width, height, depth, format, type,
                                        pixels, &ctx->Pack)) {
         /* Flush to guarantee coherency between the render cache and other
          * caches the PBO could potentially be bound to after this point.
          * See the related comment in intelReadPixels() for a more detailed
          * explanation.
          */
         brw_emit_mi_flush(brw);
         return;
      }

      perf_debug("%s: fallback to CPU mapping in PBO case\n", __func__);
   }

   ok = intel_gettexsubimage_tiled_memcpy(ctx, texImage, xoffset, yoffset,
                                          width, height,
                                          format, type, pixels, &ctx->Pack);

   if(ok)
      return;

   _mesa_meta_GetTexSubImage(ctx, xoffset, yoffset, zoffset,
                             width, height, depth,
                             format, type, pixels, texImage);

   DBG("%s - DONE\n", __func__);
}
Ejemplo n.º 28
0
void *
intel_miptree_map_raw(struct intel_context *intel, struct intel_mipmap_tree *mt)
{
    drm_intel_bo *bo = mt->region->bo;

    if (unlikely(INTEL_DEBUG & DEBUG_PERF)) {
        if (drm_intel_bo_busy(bo)) {
            perf_debug("Mapping a busy BO, causing a stall on the GPU.\n");
        }
    }

    intel_flush(&intel->ctx);

    if (mt->region->tiling != I915_TILING_NONE)
        drm_intel_gem_bo_map_gtt(bo);
    else
        drm_intel_bo_map(bo, true);

    return bo->virtual;
}
Ejemplo n.º 29
0
void
intelReadPixels(struct gl_context * ctx,
                GLint x, GLint y, GLsizei width, GLsizei height,
                GLenum format, GLenum type,
                const struct gl_pixelstore_attrib *pack, GLvoid * pixels)
{
   struct brw_context *brw = brw_context(ctx);
   bool dirty;

   DBG("%s\n", __FUNCTION__);

   if (_mesa_is_bufferobj(pack->BufferObj)) {
      /* Using PBOs, so try the BLT based path. */
      if (do_blit_readpixels(ctx, x, y, width, height, format, type, pack,
                             pixels)) {
         return;
      }

      perf_debug("%s: fallback to CPU mapping in PBO case\n", __FUNCTION__);
   }

   /* glReadPixels() wont dirty the front buffer, so reset the dirty
    * flag after calling intel_prepare_render(). */
   dirty = brw->front_buffer_dirty;
   intel_prepare_render(brw);
   brw->front_buffer_dirty = dirty;

   /* Update Mesa state before calling _mesa_readpixels().
    * XXX this may not be needed since ReadPixels no longer uses the
    * span code.
    */

   if (ctx->NewState)
      _mesa_update_state(ctx);

   _mesa_readpixels(ctx, x, y, width, height, format, type, pack, pixels);

   /* There's an intel_prepare_render() call in intelSpanRenderStart(). */
   brw->front_buffer_dirty = dirty;
}
Ejemplo n.º 30
0
static void
vc4_clear(struct pipe_context *pctx, unsigned buffers,
          const union pipe_color_union *color, double depth, unsigned stencil)
{
        struct vc4_context *vc4 = vc4_context(pctx);

        /* We can't flag new buffers for clearing once we've queued draws.  We
         * could avoid this by using the 3d engine to clear.
         */
        if (vc4->draw_calls_queued) {
                perf_debug("Flushing rendering to process new clear.\n");
                vc4_flush(pctx);
        }

        if (buffers & PIPE_CLEAR_COLOR0) {
                vc4->clear_color[0] = vc4->clear_color[1] =
                        pack_rgba(vc4->framebuffer.cbufs[0]->format,
                                  color->f);
        }

        if (buffers & PIPE_CLEAR_DEPTH) {
                /* Though the depth buffer is stored with Z in the high 24,
                 * for this field we just need to store it in the low 24.
                 */
                vc4->clear_depth = util_pack_z(PIPE_FORMAT_Z24X8_UNORM, depth);
        }

        if (buffers & PIPE_CLEAR_STENCIL)
                vc4->clear_stencil = stencil;

        vc4->draw_min_x = 0;
        vc4->draw_min_y = 0;
        vc4->draw_max_x = vc4->framebuffer.width;
        vc4->draw_max_y = vc4->framebuffer.height;
        vc4->cleared |= buffers;
        vc4->resolve |= buffers;

        vc4_start_draw(vc4);
}