Beispiel #1
0
/* Helper function for si_blit_decompress_zs_in_place.
 */
static void
si_blit_decompress_zs_planes_in_place(struct si_context *sctx,
				      struct r600_texture *texture,
				      unsigned planes, unsigned level_mask,
				      unsigned first_layer, unsigned last_layer)
{
	struct pipe_surface *zsurf, surf_tmpl = {{0}};
	unsigned layer, max_layer, checked_last_layer;
	unsigned fully_decompressed_mask = 0;

	if (!level_mask)
		return;

	if (planes & PIPE_MASK_S)
		sctx->db_flush_stencil_inplace = true;
	if (planes & PIPE_MASK_Z)
		sctx->db_flush_depth_inplace = true;
	si_mark_atom_dirty(sctx, &sctx->db_render_state);

	surf_tmpl.format = texture->resource.b.b.format;

	while (level_mask) {
		unsigned level = u_bit_scan(&level_mask);

		surf_tmpl.u.tex.level = level;

		/* The smaller the mipmap level, the less layers there are
		 * as far as 3D textures are concerned. */
		max_layer = util_max_layer(&texture->resource.b.b, level);
		checked_last_layer = MIN2(last_layer, max_layer);

		for (layer = first_layer; layer <= checked_last_layer; layer++) {
			surf_tmpl.u.tex.first_layer = layer;
			surf_tmpl.u.tex.last_layer = layer;

			zsurf = sctx->b.b.create_surface(&sctx->b.b, &texture->resource.b.b, &surf_tmpl);

			si_blitter_begin(&sctx->b.b, SI_DECOMPRESS);
			util_blitter_custom_depth_stencil(sctx->blitter, zsurf, NULL, ~0,
							  sctx->custom_dsa_flush,
							  1.0f);
			si_blitter_end(&sctx->b.b);

			pipe_surface_reference(&zsurf, NULL);
		}

		/* The texture will always be dirty if some layers aren't flushed.
		 * I don't think this case occurs often though. */
		if (first_layer == 0 && last_layer == max_layer) {
			fully_decompressed_mask |= 1u << level;
		}
	}

	if (planes & PIPE_MASK_Z)
		texture->dirty_level_mask &= ~fully_decompressed_mask;
	if (planes & PIPE_MASK_S)
		texture->stencil_dirty_level_mask &= ~fully_decompressed_mask;

	sctx->db_flush_depth_inplace = false;
	sctx->db_flush_stencil_inplace = false;
	si_mark_atom_dirty(sctx, &sctx->db_render_state);
}
static void
update_textures(struct st_context *st,
                enum pipe_shader_type shader_stage,
                const struct gl_program *prog,
                struct pipe_sampler_view **sampler_views,
                unsigned *out_num_textures)
{
   const GLuint old_max = *out_num_textures;
   GLbitfield samplers_used = prog->SamplersUsed;
   GLbitfield texel_fetch_samplers = prog->info.textures_used_by_txf;
   GLbitfield free_slots = ~prog->SamplersUsed;
   GLbitfield external_samplers_used = prog->ExternalSamplersUsed;
   GLuint unit;

   if (samplers_used == 0x0 && old_max == 0)
      return;

   unsigned num_textures = 0;

   /* prog->sh.data is NULL if it's ARB_fragment_program */
   bool glsl130 = (prog->sh.data ? prog->sh.data->Version : 0) >= 130;

   /* loop over sampler units (aka tex image units) */
   for (unit = 0; samplers_used || unit < old_max;
        unit++, samplers_used >>= 1, texel_fetch_samplers >>= 1) {
      struct pipe_sampler_view *sampler_view = NULL;

      if (samplers_used & 1) {
         const GLuint texUnit = prog->SamplerUnits[unit];

         /* The EXT_texture_sRGB_decode extension says:
          *
          *    "The conversion of sRGB color space components to linear color
          *     space is always performed if the texel lookup function is one
          *     of the texelFetch builtin functions.
          *
          *     Otherwise, if the texel lookup function is one of the texture
          *     builtin functions or one of the texture gather functions, the
          *     conversion of sRGB color space components to linear color space
          *     is controlled by the TEXTURE_SRGB_DECODE_EXT parameter.
          *
          *     If the TEXTURE_SRGB_DECODE_EXT parameter is DECODE_EXT, the
          *     conversion of sRGB color space components to linear color space
          *     is performed.
          *
          *     If the TEXTURE_SRGB_DECODE_EXT parameter is SKIP_DECODE_EXT,
          *     the value is returned without decoding. However, if the texture
          *     is also [statically] accessed with a texelFetch function, then
          *     the result of texture builtin functions and/or texture gather
          *     functions may be returned with decoding or without decoding."
          *
          * Note: the "statically" will be added to the language per
          *       https://cvs.khronos.org/bugzilla/show_bug.cgi?id=14934
          *
          * So we simply ignore the setting entirely for samplers that are
          * (statically) accessed with a texelFetch function.
          */
         st_update_single_texture(st, &sampler_view, texUnit, glsl130,
                                  texel_fetch_samplers & 1);
         num_textures = unit + 1;
      }

      pipe_sampler_view_reference(&(sampler_views[unit]), sampler_view);
   }

   /* For any external samplers with multiplaner YUV, stuff the additional
    * sampler views we need at the end.
    *
    * Trying to cache the sampler view in the stObj looks painful, so just
    * re-create the sampler view for the extra planes each time.  Main use
    * case is video playback (ie. fps games wouldn't be using this) so I
    * guess no point to try to optimize this feature.
    */
   while (unlikely(external_samplers_used)) {
      GLuint unit = u_bit_scan(&external_samplers_used);
      GLuint extra = 0;
      struct st_texture_object *stObj =
            st_get_texture_object(st->ctx, prog, unit);
      struct pipe_sampler_view tmpl;

      if (!stObj)
         continue;

      /* use original view as template: */
      tmpl = *sampler_views[unit];

      switch (st_get_view_format(stObj)) {
      case PIPE_FORMAT_NV12:
         /* we need one additional R8G8 view: */
         tmpl.format = PIPE_FORMAT_RG88_UNORM;
         tmpl.swizzle_g = PIPE_SWIZZLE_Y;   /* tmpl from Y plane is R8 */
         extra = u_bit_scan(&free_slots);
         sampler_views[extra] =
               st->pipe->create_sampler_view(st->pipe, stObj->pt->next, &tmpl);
         break;
      case PIPE_FORMAT_IYUV:
         /* we need two additional R8 views: */
         tmpl.format = PIPE_FORMAT_R8_UNORM;
         extra = u_bit_scan(&free_slots);
         sampler_views[extra] =
               st->pipe->create_sampler_view(st->pipe, stObj->pt->next, &tmpl);
         extra = u_bit_scan(&free_slots);
         sampler_views[extra] =
               st->pipe->create_sampler_view(st->pipe, stObj->pt->next->next, &tmpl);
         break;
      default:
         break;
      }

      num_textures = MAX2(num_textures, extra + 1);
   }

   cso_set_sampler_views(st->cso_context,
                         shader_stage,
                         num_textures,
                         sampler_views);
   *out_num_textures = num_textures;
}
Beispiel #3
0
static void
update_textures(struct st_context *st,
                gl_shader_stage mesa_shader,
                const struct gl_program *prog,
                unsigned max_units,
                struct pipe_sampler_view **sampler_views,
                unsigned *num_textures)
{
   const GLuint old_max = *num_textures;
   GLbitfield samplers_used = prog->SamplersUsed;
   GLbitfield free_slots = ~prog->SamplersUsed;
   GLbitfield external_samplers_used = prog->ExternalSamplersUsed;
   GLuint unit;
   enum pipe_shader_type shader_stage = st_shader_stage_to_ptarget(mesa_shader);

   if (samplers_used == 0x0 && old_max == 0)
      return;

   *num_textures = 0;

   /* loop over sampler units (aka tex image units) */
   for (unit = 0; unit < max_units; unit++, samplers_used >>= 1) {
      struct pipe_sampler_view *sampler_view = NULL;

      if (samplers_used & 1) {
         /* prog->sh.data is NULL if it's ARB_fragment_program */
         unsigned glsl_version = prog->sh.data ? prog->sh.data->Version : 0;
         const GLuint texUnit = prog->SamplerUnits[unit];
         GLboolean retval;

         retval = update_single_texture(st, &sampler_view, texUnit,
                                        glsl_version);
         if (retval == GL_FALSE)
            continue;

         *num_textures = unit + 1;
      }
      else if (samplers_used == 0 && unit >= old_max) {
         /* if we've reset all the old views and we have no more new ones */
         break;
      }

      pipe_sampler_view_reference(&(sampler_views[unit]), sampler_view);
   }

   /* For any external samplers with multiplaner YUV, stuff the additional
    * sampler views we need at the end.
    *
    * Trying to cache the sampler view in the stObj looks painful, so just
    * re-create the sampler view for the extra planes each time.  Main use
    * case is video playback (ie. fps games wouldn't be using this) so I
    * guess no point to try to optimize this feature.
    */
   while (unlikely(external_samplers_used)) {
      GLuint unit = u_bit_scan(&external_samplers_used);
      GLuint extra = 0;
      struct st_texture_object *stObj =
            st_get_texture_object(st->ctx, prog, unit);
      struct pipe_sampler_view tmpl;

      if (!stObj)
         continue;

      /* use original view as template: */
      tmpl = *sampler_views[unit];

      switch (st_get_view_format(stObj)) {
      case PIPE_FORMAT_NV12:
         /* we need one additional R8G8 view: */
         tmpl.format = PIPE_FORMAT_RG88_UNORM;
         tmpl.swizzle_g = PIPE_SWIZZLE_Y;   /* tmpl from Y plane is R8 */
         extra = u_bit_scan(&free_slots);
         sampler_views[extra] =
               st->pipe->create_sampler_view(st->pipe, stObj->pt->next, &tmpl);
         break;
      case PIPE_FORMAT_IYUV:
         /* we need two additional R8 views: */
         tmpl.format = PIPE_FORMAT_R8_UNORM;
         extra = u_bit_scan(&free_slots);
         sampler_views[extra] =
               st->pipe->create_sampler_view(st->pipe, stObj->pt->next, &tmpl);
         extra = u_bit_scan(&free_slots);
         sampler_views[extra] =
               st->pipe->create_sampler_view(st->pipe, stObj->pt->next->next, &tmpl);
         break;
      default:
         break;
      }

      *num_textures = MAX2(*num_textures, extra + 1);
   }

   cso_set_sampler_views(st->cso_context,
                         shader_stage,
                         *num_textures,
                         sampler_views);
}
Beispiel #4
0
/**
 * Gathers together all the uniform values into a block of memory to be
 * uploaded into the CURBE, then emits the state packet telling the hardware
 * the new location.
 */
static void
brw_upload_constant_buffer(struct brw_context *brw)
{
   struct gl_context *ctx = &brw->ctx;
   /* BRW_NEW_CURBE_OFFSETS */
   const GLuint sz = brw->curbe.total_size;
   const GLuint bufsz = sz * 16 * sizeof(GLfloat);
   gl_constant_value *buf;
   GLuint i;
   gl_clip_plane *clip_planes;

   if (sz == 0) {
      goto emit;
   }

   buf = intel_upload_space(brw, bufsz, 64,
                            &brw->curbe.curbe_bo, &brw->curbe.curbe_offset);

   STATIC_ASSERT(sizeof(gl_constant_value) == sizeof(float));

   /* fragment shader constants */
   if (brw->curbe.wm_size) {
      _mesa_load_state_parameters(ctx, brw->fragment_program->Parameters);

      /* BRW_NEW_CURBE_OFFSETS */
      GLuint offset = brw->curbe.wm_start * 16;

      /* BRW_NEW_FS_PROG_DATA | _NEW_PROGRAM_CONSTANTS: copy uniform values */
      for (i = 0; i < brw->wm.base.prog_data->nr_params; i++) {
	 buf[offset + i] = *brw->wm.base.prog_data->param[i];
      }
   }

   /* clipper constants */
   if (brw->curbe.clip_size) {
      GLuint offset = brw->curbe.clip_start * 16;
      GLbitfield mask;

      /* If any planes are going this way, send them all this way:
       */
      for (i = 0; i < 6; i++) {
	 buf[offset + i * 4 + 0].f = fixed_plane[i][0];
	 buf[offset + i * 4 + 1].f = fixed_plane[i][1];
	 buf[offset + i * 4 + 2].f = fixed_plane[i][2];
	 buf[offset + i * 4 + 3].f = fixed_plane[i][3];
      }

      /* Clip planes: _NEW_TRANSFORM plus _NEW_PROJECTION to get to
       * clip-space:
       */
      clip_planes = brw_select_clip_planes(ctx);
      mask = ctx->Transform.ClipPlanesEnabled;
      while (mask) {
         const int j = u_bit_scan(&mask);
         buf[offset + i * 4 + 0].f = clip_planes[j][0];
         buf[offset + i * 4 + 1].f = clip_planes[j][1];
         buf[offset + i * 4 + 2].f = clip_planes[j][2];
         buf[offset + i * 4 + 3].f = clip_planes[j][3];
         i++;
      }
   }

   /* vertex shader constants */
   if (brw->curbe.vs_size) {
      _mesa_load_state_parameters(ctx, brw->vertex_program->Parameters);

      GLuint offset = brw->curbe.vs_start * 16;

      /* BRW_NEW_VS_PROG_DATA | _NEW_PROGRAM_CONSTANTS: copy uniform values */
      for (i = 0; i < brw->vs.base.prog_data->nr_params; i++) {
         buf[offset + i] = *brw->vs.base.prog_data->param[i];
      }
   }

   if (0) {
      for (i = 0; i < sz*16; i+=4)
	 fprintf(stderr, "curbe %d.%d: %f %f %f %f\n", i/8, i&4,
                 buf[i+0].f, buf[i+1].f, buf[i+2].f, buf[i+3].f);
   }

   /* Because this provokes an action (ie copy the constants into the
    * URB), it shouldn't be shortcircuited if identical to the
    * previous time - because eg. the urb destination may have
    * changed, or the urb contents different to last time.
    *
    * Note that the data referred to is actually copied internally,
    * not just used in place according to passed pointer.
    *
    * It appears that the CS unit takes care of using each available
    * URB entry (Const URB Entry == CURBE) in turn, and issuing
    * flushes as necessary when doublebuffering of CURBEs isn't
    * possible.
    */

emit:
   /* BRW_NEW_URB_FENCE: From the gen4 PRM, volume 1, section 3.9.8
    * (CONSTANT_BUFFER (CURBE Load)):
    *
    *     "Modifying the CS URB allocation via URB_FENCE invalidates any
    *      previous CURBE entries. Therefore software must subsequently
    *      [re]issue a CONSTANT_BUFFER command before CURBE data can be used
    *      in the pipeline."
    */
   BEGIN_BATCH(2);
   if (brw->curbe.total_size == 0) {
      OUT_BATCH((CMD_CONST_BUFFER << 16) | (2 - 2));
      OUT_BATCH(0);
   } else {
      OUT_BATCH((CMD_CONST_BUFFER << 16) | (1 << 8) | (2 - 2));
      OUT_RELOC(brw->curbe.curbe_bo,
		I915_GEM_DOMAIN_INSTRUCTION, 0,
		(brw->curbe.total_size - 1) + brw->curbe.curbe_offset);
   }
   ADVANCE_BATCH();

   /* Work around a Broadwater/Crestline depth interpolator bug.  The
    * following sequence will cause GPU hangs:
    *
    * 1. Change state so that all depth related fields in CC_STATE are
    *    disabled, and in WM_STATE, only "PS Use Source Depth" is enabled.
    * 2. Emit a CONSTANT_BUFFER packet.
    * 3. Draw via 3DPRIMITIVE.
    *
    * The recommended workaround is to emit a non-pipelined state change after
    * emitting CONSTANT_BUFFER, in order to drain the windowizer pipeline.
    *
    * We arbitrarily choose 3DSTATE_GLOBAL_DEPTH_CLAMP_OFFSET (as it's small),
    * and always emit it when "PS Use Source Depth" is set.  We could be more
    * precise, but the additional complexity is probably not worth it.
    *
    * BRW_NEW_FRAGMENT_PROGRAM
    */
   if (brw->gen == 4 && !brw->is_g4x &&
       (brw->fragment_program->info.inputs_read & (1 << VARYING_SLOT_POS))) {
      BEGIN_BATCH(2);
      OUT_BATCH(_3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP << 16 | (2 - 2));
      OUT_BATCH(0);
      ADVANCE_BATCH();
   }
}
static enum pipe_error
emit_consts_vgpu10(struct svga_context *svga, unsigned shader)
{
   enum pipe_error ret;
   unsigned dirty_constbufs;
   unsigned enabled_constbufs;

   /* Emit 0th constant buffer (with extra constants) */
   ret = emit_constbuf_vgpu10(svga, shader);
   if (ret != PIPE_OK) {
      return ret;
   }

   enabled_constbufs = svga->state.hw_draw.enabled_constbufs[shader] | 1u;

   /* Emit other constant buffers (UBOs) */
   dirty_constbufs = svga->state.dirty_constbufs[shader] & ~1u;

   while (dirty_constbufs) {
      unsigned index = u_bit_scan(&dirty_constbufs);
      unsigned offset = svga->curr.constbufs[shader][index].buffer_offset;
      unsigned size = svga->curr.constbufs[shader][index].buffer_size;
      struct svga_buffer *buffer =
         svga_buffer(svga->curr.constbufs[shader][index].buffer);
      struct svga_winsys_surface *handle;

      if (buffer) {
         handle = svga_buffer_handle(svga, &buffer->b.b);
         enabled_constbufs |= 1 << index;
      }
      else {
         handle = NULL;
         enabled_constbufs &= ~(1 << index);
         assert(offset == 0);
         assert(size == 0);
      }

      if (size % 16 != 0) {
         /* GL's buffer range sizes can be any number of bytes but the
          * SVGA3D device requires a multiple of 16 bytes.
          */
         const unsigned total_size = buffer->b.b.width0;

         if (offset + align(size, 16) <= total_size) {
            /* round up size to multiple of 16 */
            size = align(size, 16);
         }
         else {
            /* round down to mulitple of 16 (this may cause rendering problems
             * but should avoid a device error).
             */
            size &= ~15;
         }
      }

      assert(size % 16 == 0);
      ret = SVGA3D_vgpu10_SetSingleConstantBuffer(svga->swc,
                                                  index,
                                                  svga_shader_type(shader),
                                                  handle,
                                                  offset,
                                                  size);
      if (ret != PIPE_OK)
         return ret;

      svga->hud.num_const_buf_updates++;
   }

   svga->state.hw_draw.enabled_constbufs[shader] = enabled_constbufs;
   svga->state.dirty_constbufs[shader] = 0;

   return ret;
}
Beispiel #6
0
/**
 * Update the gallium driver's sampler state for fragment, vertex or
 * geometry shader stage.
 */
static void
update_shader_samplers(struct st_context *st,
                       enum pipe_shader_type shader_stage,
                       const struct gl_program *prog,
                       unsigned max_units,
                       struct pipe_sampler_state *samplers,
                       unsigned *num_samplers)
{
   GLbitfield samplers_used = prog->SamplersUsed;
   GLbitfield free_slots = ~prog->SamplersUsed;
   GLbitfield external_samplers_used = prog->ExternalSamplersUsed;
   GLuint unit;
   const GLuint old_max = *num_samplers;
   const struct pipe_sampler_state *states[PIPE_MAX_SAMPLERS];

   if (*num_samplers == 0 && samplers_used == 0x0)
      return;

   *num_samplers = 0;

   /* loop over sampler units (aka tex image units) */
   for (unit = 0; unit < max_units; unit++, samplers_used >>= 1) {
      struct pipe_sampler_state *sampler = samplers + unit;

      if (samplers_used & 1) {
         const GLuint texUnit = prog->SamplerUnits[unit];

         convert_sampler(st, sampler, texUnit);
         states[unit] = sampler;
         *num_samplers = unit + 1;
      }
      else if (samplers_used != 0 || unit < old_max) {
         states[unit] = NULL;
      }
      else {
         /* if we've reset all the old samplers and we have no more new ones */
         break;
      }
   }

   /* For any external samplers with multiplaner YUV, stuff the additional
    * sampler states we need at the end.
    *
    * Just re-use the existing sampler-state from the primary slot.
    */
   while (unlikely(external_samplers_used)) {
      GLuint unit = u_bit_scan(&external_samplers_used);
      GLuint extra = 0;
      struct st_texture_object *stObj =
            st_get_texture_object(st->ctx, prog, unit);
      struct pipe_sampler_state *sampler = samplers + unit;

      if (!stObj)
         continue;

      switch (st_get_view_format(stObj)) {
      case PIPE_FORMAT_NV12:
         /* we need one additional sampler: */
         extra = u_bit_scan(&free_slots);
         states[extra] = sampler;
         break;
      case PIPE_FORMAT_IYUV:
         /* we need two additional samplers: */
         extra = u_bit_scan(&free_slots);
         states[extra] = sampler;
         extra = u_bit_scan(&free_slots);
         states[extra] = sampler;
         break;
      default:
         break;
      }

      *num_samplers = MAX2(*num_samplers, extra + 1);
   }

   cso_set_samplers(st->cso_context, shader_stage, *num_samplers, states);
}
Beispiel #7
0
static void make_state_key( struct gl_context *ctx, struct state_key *key )
{
   const struct gl_program *fp = ctx->FragmentProgram._Current;
   GLbitfield mask;

   memset(key, 0, sizeof(struct state_key));

   /* This now relies on texenvprogram.c being active:
    */
   assert(fp);

   key->need_eye_coords = ctx->_NeedEyeCoords;

   key->fragprog_inputs_read = fp->info.inputs_read;
   key->varying_vp_inputs = ctx->varying_vp_inputs;

   if (ctx->RenderMode == GL_FEEDBACK) {
      /* make sure the vertprog emits color and tex0 */
      key->fragprog_inputs_read |= (VARYING_BIT_COL0 | VARYING_BIT_TEX0);
   }

   key->separate_specular = (ctx->Light.Model.ColorControl ==
			     GL_SEPARATE_SPECULAR_COLOR);

   if (ctx->Light.Enabled) {
      key->light_global_enabled = 1;

      if (ctx->Light.Model.LocalViewer)
	 key->light_local_viewer = 1;

      if (ctx->Light.Model.TwoSide)
	 key->light_twoside = 1;

      if (ctx->Light.ColorMaterialEnabled) {
	 key->light_color_material_mask = ctx->Light._ColorMaterialBitmask;
      }

      mask = ctx->Light._EnabledLights;
      while (mask) {
         const int i = u_bit_scan(&mask);
         struct gl_light *light = &ctx->Light.Light[i];

         key->unit[i].light_enabled = 1;

         if (light->EyePosition[3] == 0.0F)
            key->unit[i].light_eyepos3_is_zero = 1;

         if (light->SpotCutoff == 180.0F)
            key->unit[i].light_spotcutoff_is_180 = 1;

         if (light->ConstantAttenuation != 1.0F ||
             light->LinearAttenuation != 0.0F ||
             light->QuadraticAttenuation != 0.0F)
            key->unit[i].light_attenuated = 1;
      }

      if (check_active_shininess(ctx, key, 0)) {
         key->material_shininess_is_zero = 0;
      }
      else if (key->light_twoside &&
               check_active_shininess(ctx, key, 1)) {
         key->material_shininess_is_zero = 0;
      }
      else {
         key->material_shininess_is_zero = 1;
      }
   }

   if (ctx->Transform.Normalize)
      key->normalize = 1;

   if (ctx->Transform.RescaleNormals)
      key->rescale_normals = 1;

   if (ctx->Fog.FogCoordinateSource == GL_FRAGMENT_DEPTH_EXT) {
      key->fog_source_is_depth = 1;
      key->fog_distance_mode = translate_fog_distance_mode(ctx->Fog.FogDistanceMode);
   }

   if (ctx->Point._Attenuated)
      key->point_attenuated = 1;

   if (ctx->Array.VAO->VertexAttrib[VERT_ATTRIB_POINT_SIZE].Enabled)
      key->point_array = 1;

   if (ctx->Texture._TexGenEnabled ||
       ctx->Texture._TexMatEnabled ||
       ctx->Texture._MaxEnabledTexImageUnit != -1)
      key->texture_enabled_global = 1;

   mask = ctx->Texture._EnabledCoordUnits | ctx->Texture._TexGenEnabled
      | ctx->Texture._TexMatEnabled | ctx->Point.CoordReplace;
   while (mask) {
      const int i = u_bit_scan(&mask);
      struct gl_texture_unit *texUnit = &ctx->Texture.Unit[i];

      if (texUnit->_Current)
	 key->unit[i].texunit_really_enabled = 1;

      if (ctx->Point.PointSprite)
	 if (ctx->Point.CoordReplace & (1u << i))
	    key->unit[i].coord_replace = 1;

      if (ctx->Texture._TexMatEnabled & ENABLE_TEXMAT(i))
	 key->unit[i].texmat_enabled = 1;

      if (texUnit->TexGenEnabled) {
	 key->unit[i].texgen_enabled = 1;

	 key->unit[i].texgen_mode0 =
	    translate_texgen( texUnit->TexGenEnabled & (1<<0),
			      texUnit->GenS.Mode );
	 key->unit[i].texgen_mode1 =
	    translate_texgen( texUnit->TexGenEnabled & (1<<1),
			      texUnit->GenT.Mode );
	 key->unit[i].texgen_mode2 =
	    translate_texgen( texUnit->TexGenEnabled & (1<<2),
			      texUnit->GenR.Mode );
	 key->unit[i].texgen_mode3 =
	    translate_texgen( texUnit->TexGenEnabled & (1<<3),
			      texUnit->GenQ.Mode );
      }
   }
}
Beispiel #8
0
static void
gather_intrinsic_info(const nir_intrinsic_instr *instr,
		      struct ac_shader_info *info)
{
	switch (instr->intrinsic) {
	case nir_intrinsic_interp_var_at_sample:
		info->ps.needs_sample_positions = true;
		break;
	case nir_intrinsic_load_draw_id:
		info->vs.needs_draw_id = true;
		break;
	case nir_intrinsic_load_instance_id:
		info->vs.needs_instance_id = true;
		break;
	case nir_intrinsic_load_num_work_groups:
		info->cs.uses_grid_size = true;
		break;
	case nir_intrinsic_load_local_invocation_id:
	case nir_intrinsic_load_work_group_id: {
		unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa);
		while (mask) {
			unsigned i = u_bit_scan(&mask);

			if (instr->intrinsic == nir_intrinsic_load_work_group_id)
				info->cs.uses_block_id[i] = true;
			else
				info->cs.uses_thread_id[i] = true;
		}
		break;
	}
	case nir_intrinsic_load_local_invocation_index:
		info->cs.uses_local_invocation_idx = true;
		break;
	case nir_intrinsic_load_sample_id:
		info->ps.force_persample = true;
		break;
	case nir_intrinsic_load_sample_pos:
		info->ps.force_persample = true;
		break;
	case nir_intrinsic_load_view_index:
		info->needs_multiview_view_index = true;
		break;
	case nir_intrinsic_load_invocation_id:
		info->uses_invocation_id = true;
		break;
	case nir_intrinsic_load_primitive_id:
		info->uses_prim_id = true;
		break;
	case nir_intrinsic_load_push_constant:
		info->loads_push_constants = true;
		break;
	case nir_intrinsic_vulkan_resource_index:
		info->desc_set_used_mask |= (1 << nir_intrinsic_desc_set(instr));
		break;
	case nir_intrinsic_image_load:
	case nir_intrinsic_image_store:
	case nir_intrinsic_image_atomic_add:
	case nir_intrinsic_image_atomic_min:
	case nir_intrinsic_image_atomic_max:
	case nir_intrinsic_image_atomic_and:
	case nir_intrinsic_image_atomic_or:
	case nir_intrinsic_image_atomic_xor:
	case nir_intrinsic_image_atomic_exchange:
	case nir_intrinsic_image_atomic_comp_swap:
	case nir_intrinsic_image_size: {
		const struct glsl_type *type = instr->variables[0]->var->type;
		if(instr->variables[0]->deref.child)
			type = instr->variables[0]->deref.child->type;

		enum glsl_sampler_dim dim = glsl_get_sampler_dim(type);
		if (dim == GLSL_SAMPLER_DIM_SUBPASS ||
		    dim == GLSL_SAMPLER_DIM_SUBPASS_MS)
			info->ps.uses_input_attachments = true;
		mark_sampler_desc(instr->variables[0]->var, info);
		break;
	}
	default:
		break;
	}
}
Beispiel #9
0
static bool ac_eliminate_duplicated_output(uint8_t *vs_output_param_offset,
					   uint32_t num_outputs,
					   struct ac_vs_exports *processed,
				           struct ac_vs_exp_inst *exp)
{
	unsigned p, copy_back_channels = 0;

	/* See if the output is already in the list of processed outputs.
	 * The LLVMValueRef comparison relies on SSA.
	 */
	for (p = 0; p < processed->num; p++) {
		bool different = false;

		for (unsigned j = 0; j < 4; j++) {
			struct ac_vs_exp_chan *c1 = &processed->exp[p].chan[j];
			struct ac_vs_exp_chan *c2 = &exp->chan[j];

			/* Treat undef as a match. */
			if (c2->type == AC_IR_UNDEF)
				continue;

			/* If c1 is undef but c2 isn't, we can copy c2 to c1
			 * and consider the instruction duplicated.
			 */
			if (c1->type == AC_IR_UNDEF) {
				copy_back_channels |= 1 << j;
				continue;
			}

			/* Test whether the channels are not equal. */
			if (c1->type != c2->type ||
			    (c1->type == AC_IR_CONST &&
			     c1->const_float != c2->const_float) ||
			    (c1->type == AC_IR_VALUE &&
			     c1->value != c2->value)) {
				different = true;
				break;
			}
		}
		if (!different)
			break;

		copy_back_channels = 0;
	}
	if (p == processed->num)
		return false;

	/* If a match was found, but the matching export has undef where the new
	 * one has a normal value, copy the normal value to the undef channel.
	 */
	struct ac_vs_exp_inst *match = &processed->exp[p];

	while (copy_back_channels) {
		unsigned chan = u_bit_scan(&copy_back_channels);

		assert(match->chan[chan].type == AC_IR_UNDEF);
		LLVMSetOperand(match->inst, AC_EXP_OUT0 + chan,
			       exp->chan[chan].value);
		match->chan[chan] = exp->chan[chan];
	}

	/* The PARAM export is duplicated. Kill it. */
	LLVMInstructionEraseFromParent(exp->inst);

	/* Change OFFSET to the matching export. */
	for (unsigned i = 0; i < num_outputs; i++) {
		if (vs_output_param_offset[i] == exp->offset) {
			vs_output_param_offset[i] = match->offset;
			break;
		}
	}
	return true;
}
Beispiel #10
0
/**
 * Mark all states that have the resource dirty.
 */
void
ilo_mark_states_with_resource_dirty(struct ilo_context *ilo,
                                    const struct pipe_resource *res)
{
   uint32_t states = 0;
   unsigned sh, i;

   if (res->target == PIPE_BUFFER) {
      uint32_t vb_mask = ilo->vb.enabled_mask;

      while (vb_mask) {
         const unsigned idx = u_bit_scan(&vb_mask);

         if (ilo->vb.states[idx].buffer == res) {
            states |= ILO_DIRTY_VB;
            break;
         }
      }

      if (ilo->ib.buffer == res) {
         states |= ILO_DIRTY_IB;

         /*
          * finalize_index_buffer() has an optimization that clears
          * ILO_DIRTY_IB when the HW states do not change.  However, it fails
          * to flush the VF cache when the HW states do not change, but the
          * contents of the IB has changed.  Here, we set the index size to an
          * invalid value to avoid the optimization.
          */
         ilo->ib.hw_index_size = 0;
      }

      for (i = 0; i < ilo->so.count; i++) {
         if (ilo->so.states[i]->buffer == res) {
            states |= ILO_DIRTY_SO;
            break;
         }
      }
   }

   for (sh = 0; sh < PIPE_SHADER_TYPES; sh++) {
      for (i = 0; i < ilo->view[sh].count; i++) {
         struct pipe_sampler_view *view = ilo->view[sh].states[i];

         if (view->texture == res) {
            static const unsigned view_dirty_bits[PIPE_SHADER_TYPES] = {
               [PIPE_SHADER_VERTEX]    = ILO_DIRTY_VIEW_VS,
               [PIPE_SHADER_FRAGMENT]  = ILO_DIRTY_VIEW_FS,
               [PIPE_SHADER_GEOMETRY]  = ILO_DIRTY_VIEW_GS,
               [PIPE_SHADER_COMPUTE]   = ILO_DIRTY_VIEW_CS,
            };

            states |= view_dirty_bits[sh];
            break;
         }
      }

      if (res->target == PIPE_BUFFER) {
         for (i = 0; i < Elements(ilo->cbuf[sh].cso); i++) {
            struct ilo_cbuf_cso *cbuf = &ilo->cbuf[sh].cso[i];

            if (cbuf->resource == res) {
               states |= ILO_DIRTY_CBUF;
               break;
            }
         }
      }
   }
Beispiel #11
0
static void *r600_buffer_transfer_map(struct pipe_context *pipe,
				      struct pipe_transfer *transfer)
{
	struct r600_resource *rbuffer = r600_resource(transfer->resource);
	struct r600_context *rctx = (struct r600_context*)pipe;
	uint8_t *data;

	if (transfer->usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE &&
	    !(transfer->usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
		assert(transfer->usage & PIPE_TRANSFER_WRITE);

		/* Check if mapping this buffer would cause waiting for the GPU. */
		if (rctx->ws->cs_is_buffer_referenced(rctx->cs, rbuffer->cs_buf, RADEON_USAGE_READWRITE) ||
		    rctx->ws->buffer_is_busy(rbuffer->buf, RADEON_USAGE_READWRITE)) {
			unsigned i, mask;

			/* Discard the buffer. */
			pb_reference(&rbuffer->buf, NULL);

			/* Create a new one in the same pipe_resource. */
			/* XXX We probably want a different alignment for buffers and textures. */
			r600_init_resource(rctx->screen, rbuffer, rbuffer->b.b.width0, 4096,
					   rbuffer->b.b.bind, rbuffer->b.b.usage);

			/* We changed the buffer, now we need to bind it where the old one was bound. */
			/* Vertex buffers. */
			mask = rctx->vertex_buffer_state.enabled_mask;
			while (mask) {
				i = u_bit_scan(&mask);
				if (rctx->vertex_buffer_state.vb[i].buffer == &rbuffer->b.b) {
					rctx->vertex_buffer_state.dirty_mask |= 1 << i;
					r600_vertex_buffers_dirty(rctx);
				}
			}
			/* Streamout buffers. */
			for (i = 0; i < rctx->num_so_targets; i++) {
				if (rctx->so_targets[i]->b.buffer == &rbuffer->b.b) {
					r600_context_streamout_end(rctx);
					rctx->streamout_start = TRUE;
					rctx->streamout_append_bitmask = ~0;
				}
			}
			/* Constant buffers. */
			r600_set_constants_dirty_if_bound(rctx, rbuffer);
		}
	}
#if 0 /* this is broken (see Bug 53130) */
	else if ((transfer->usage & PIPE_TRANSFER_DISCARD_RANGE) &&
		 !(transfer->usage & PIPE_TRANSFER_UNSYNCHRONIZED) &&
		 rctx->screen->has_streamout &&
		 /* The buffer range must be aligned to 4. */
		 transfer->box.x % 4 == 0 && transfer->box.width % 4 == 0) {
		assert(transfer->usage & PIPE_TRANSFER_WRITE);

		/* Check if mapping this buffer would cause waiting for the GPU. */
		if (rctx->ws->cs_is_buffer_referenced(rctx->cs, rbuffer->cs_buf, RADEON_USAGE_READWRITE) ||
		    rctx->ws->buffer_is_busy(rbuffer->buf, RADEON_USAGE_READWRITE)) {
			/* Do a wait-free write-only transfer using a temporary buffer. */
			struct r600_transfer *rtransfer = (struct r600_transfer*)transfer;

			rtransfer->staging = (struct r600_resource*)
				pipe_buffer_create(pipe->screen, PIPE_BIND_VERTEX_BUFFER,
						   PIPE_USAGE_STAGING, transfer->box.width);
			return rctx->ws->buffer_map(rtransfer->staging->cs_buf, rctx->cs, PIPE_TRANSFER_WRITE);
		}
	}
#endif

	data = rctx->ws->buffer_map(rbuffer->cs_buf, rctx->cs, transfer->usage);
	if (!data)
		return NULL;

	return (uint8_t*)data + transfer->box.x;
}
Beispiel #12
0
static void
nouveau_enable(struct gl_context *ctx, GLenum cap, GLboolean state)
{
	GLbitfield mask;

	switch (cap) {
	case GL_ALPHA_TEST:
		context_dirty(ctx, ALPHA_FUNC);
		break;
	case GL_BLEND:
		context_dirty(ctx, BLEND_EQUATION);
		break;
	case GL_COLOR_LOGIC_OP:
		context_dirty(ctx, LOGIC_OPCODE);
		break;
	case GL_COLOR_MATERIAL:
		context_dirty(ctx, COLOR_MATERIAL);
		context_dirty(ctx, MATERIAL_FRONT_AMBIENT);
		context_dirty(ctx, MATERIAL_BACK_AMBIENT);
		context_dirty(ctx, MATERIAL_FRONT_DIFFUSE);
		context_dirty(ctx, MATERIAL_BACK_DIFFUSE);
		context_dirty(ctx, MATERIAL_FRONT_SPECULAR);
		context_dirty(ctx, MATERIAL_BACK_SPECULAR);
		break;
	case GL_COLOR_SUM_EXT:
		context_dirty(ctx, FRAG);
		context_dirty(ctx, LIGHT_MODEL);
		break;
	case GL_CULL_FACE:
		context_dirty(ctx, CULL_FACE);
		break;
	case GL_DEPTH_TEST:
		context_dirty(ctx, DEPTH);
		break;
	case GL_DITHER:
		context_dirty(ctx, DITHER);
		break;
	case GL_FOG:
		context_dirty(ctx, FOG);
		context_dirty(ctx, FRAG);
		context_dirty(ctx, MODELVIEW);
		break;
	case GL_LIGHT0:
	case GL_LIGHT1:
	case GL_LIGHT2:
	case GL_LIGHT3:
	case GL_LIGHT4:
	case GL_LIGHT5:
	case GL_LIGHT6:
	case GL_LIGHT7:
		context_dirty(ctx, MODELVIEW);
		context_dirty(ctx, LIGHT_ENABLE);
		context_dirty_i(ctx, LIGHT_SOURCE, cap - GL_LIGHT0);
		context_dirty(ctx, MATERIAL_FRONT_AMBIENT);
		context_dirty(ctx, MATERIAL_BACK_AMBIENT);
		context_dirty(ctx, MATERIAL_FRONT_DIFFUSE);
		context_dirty(ctx, MATERIAL_BACK_DIFFUSE);
		context_dirty(ctx, MATERIAL_FRONT_SPECULAR);
		context_dirty(ctx, MATERIAL_BACK_SPECULAR);
		context_dirty(ctx, MATERIAL_FRONT_SHININESS);
		context_dirty(ctx, MATERIAL_BACK_SHININESS);
		break;
	case GL_LIGHTING:
		context_dirty(ctx, FRAG);
		context_dirty(ctx, MODELVIEW);
		context_dirty(ctx, LIGHT_MODEL);
		context_dirty(ctx, LIGHT_ENABLE);

		mask = ctx->Light._EnabledLights;
		while (mask) {
			const int i = u_bit_scan(&mask);
			context_dirty_i(ctx, LIGHT_SOURCE, i);
		}

		context_dirty(ctx, MATERIAL_FRONT_AMBIENT);
		context_dirty(ctx, MATERIAL_BACK_AMBIENT);
		context_dirty(ctx, MATERIAL_FRONT_DIFFUSE);
		context_dirty(ctx, MATERIAL_BACK_DIFFUSE);
		context_dirty(ctx, MATERIAL_FRONT_SPECULAR);
		context_dirty(ctx, MATERIAL_BACK_SPECULAR);
		context_dirty(ctx, MATERIAL_FRONT_SHININESS);
		context_dirty(ctx, MATERIAL_BACK_SHININESS);
		break;
	case GL_LINE_SMOOTH:
		context_dirty(ctx, LINE_MODE);
		break;
	case GL_NORMALIZE:
		context_dirty(ctx, LIGHT_ENABLE);
		break;
	case GL_POINT_SMOOTH:
		context_dirty(ctx, POINT_MODE);
		break;
	case GL_POLYGON_OFFSET_POINT:
	case GL_POLYGON_OFFSET_LINE:
	case GL_POLYGON_OFFSET_FILL:
		context_dirty(ctx, POLYGON_OFFSET);
		break;
	case GL_POLYGON_SMOOTH:
		context_dirty(ctx, POLYGON_MODE);
		break;
	case GL_SCISSOR_TEST:
		context_dirty(ctx, SCISSOR);
		break;
	case GL_STENCIL_TEST:
		context_dirty(ctx, STENCIL_FUNC);
		break;
	case GL_TEXTURE_1D:
	case GL_TEXTURE_2D:
	case GL_TEXTURE_3D:
	case GL_TEXTURE_RECTANGLE:
		context_dirty_i(ctx, TEX_ENV, ctx->Texture.CurrentUnit);
		context_dirty_i(ctx, TEX_OBJ, ctx->Texture.CurrentUnit);
		break;
	case GL_TEXTURE_GEN_S:
	case GL_TEXTURE_GEN_T:
	case GL_TEXTURE_GEN_R:
	case GL_TEXTURE_GEN_Q:
		context_dirty_i(ctx, TEX_GEN, ctx->Texture.CurrentUnit);
		context_dirty(ctx, MODELVIEW);
		break;
	}
}
Beispiel #13
0
static void si_blit_decompress_color(struct pipe_context *ctx,
		struct r600_texture *rtex,
		unsigned first_level, unsigned last_level,
		unsigned first_layer, unsigned last_layer,
		bool need_dcc_decompress)
{
	struct si_context *sctx = (struct si_context *)ctx;
	void* custom_blend;
	unsigned layer, checked_last_layer, max_layer;
	unsigned level_mask =
		u_bit_consecutive(first_level, last_level - first_level + 1);

	if (!need_dcc_decompress)
		level_mask &= rtex->dirty_level_mask;
	if (!level_mask)
		return;

	if (rtex->dcc_offset && need_dcc_decompress) {
		custom_blend = sctx->custom_blend_dcc_decompress;

		/* disable levels without DCC */
		for (int i = first_level; i <= last_level; i++) {
			if (!rtex->dcc_offset ||
			    !rtex->surface.level[i].dcc_enabled)
				level_mask &= ~(1 << i);
		}
	} else if (rtex->fmask.size) {
		custom_blend = sctx->custom_blend_decompress;
	} else {
		custom_blend = sctx->custom_blend_fastclear;
	}

	while (level_mask) {
		unsigned level = u_bit_scan(&level_mask);

		/* The smaller the mipmap level, the less layers there are
		 * as far as 3D textures are concerned. */
		max_layer = util_max_layer(&rtex->resource.b.b, level);
		checked_last_layer = MIN2(last_layer, max_layer);

		for (layer = first_layer; layer <= checked_last_layer; layer++) {
			struct pipe_surface *cbsurf, surf_tmpl;

			surf_tmpl.format = rtex->resource.b.b.format;
			surf_tmpl.u.tex.level = level;
			surf_tmpl.u.tex.first_layer = layer;
			surf_tmpl.u.tex.last_layer = layer;
			cbsurf = ctx->create_surface(ctx, &rtex->resource.b.b, &surf_tmpl);

			si_blitter_begin(ctx, SI_DECOMPRESS);
			util_blitter_custom_color(sctx->blitter, cbsurf, custom_blend);
			si_blitter_end(ctx);

			pipe_surface_reference(&cbsurf, NULL);
		}

		/* The texture will always be dirty if some layers aren't flushed.
		 * I don't think this case occurs often though. */
		if (first_layer == 0 && last_layer == max_layer) {
			rtex->dirty_level_mask &= ~(1 << level);
		}
	}
}
Beispiel #14
0
static unsigned
si_blit_dbcb_copy(struct si_context *sctx,
		  struct r600_texture *src,
		  struct r600_texture *dst,
		  unsigned planes, unsigned level_mask,
		  unsigned first_layer, unsigned last_layer,
		  unsigned first_sample, unsigned last_sample)
{
	struct pipe_surface surf_tmpl = {{0}};
	unsigned layer, sample, checked_last_layer, max_layer;
	unsigned fully_copied_levels = 0;

	if (planes & PIPE_MASK_Z)
		sctx->dbcb_depth_copy_enabled = true;
	if (planes & PIPE_MASK_S)
		sctx->dbcb_stencil_copy_enabled = true;
	si_mark_atom_dirty(sctx, &sctx->db_render_state);

	assert(sctx->dbcb_depth_copy_enabled || sctx->dbcb_stencil_copy_enabled);

	while (level_mask) {
		unsigned level = u_bit_scan(&level_mask);

		/* The smaller the mipmap level, the less layers there are
		 * as far as 3D textures are concerned. */
		max_layer = util_max_layer(&src->resource.b.b, level);
		checked_last_layer = MIN2(last_layer, max_layer);

		surf_tmpl.u.tex.level = level;

		for (layer = first_layer; layer <= checked_last_layer; layer++) {
			struct pipe_surface *zsurf, *cbsurf;

			surf_tmpl.format = src->resource.b.b.format;
			surf_tmpl.u.tex.first_layer = layer;
			surf_tmpl.u.tex.last_layer = layer;

			zsurf = sctx->b.b.create_surface(&sctx->b.b, &src->resource.b.b, &surf_tmpl);

			surf_tmpl.format = dst->resource.b.b.format;
			cbsurf = sctx->b.b.create_surface(&sctx->b.b, &dst->resource.b.b, &surf_tmpl);

			for (sample = first_sample; sample <= last_sample; sample++) {
				if (sample != sctx->dbcb_copy_sample) {
					sctx->dbcb_copy_sample = sample;
					si_mark_atom_dirty(sctx, &sctx->db_render_state);
				}

				si_blitter_begin(&sctx->b.b, SI_DECOMPRESS);
				util_blitter_custom_depth_stencil(sctx->blitter, zsurf, cbsurf, 1 << sample,
								  sctx->custom_dsa_flush, 1.0f);
				si_blitter_end(&sctx->b.b);
			}

			pipe_surface_reference(&zsurf, NULL);
			pipe_surface_reference(&cbsurf, NULL);
		}

		if (first_layer == 0 && last_layer >= max_layer &&
		    first_sample == 0 && last_sample >= u_max_sample(&src->resource.b.b))
			fully_copied_levels |= 1u << level;
	}

	sctx->dbcb_depth_copy_enabled = false;
	sctx->dbcb_stencil_copy_enabled = false;
	si_mark_atom_dirty(sctx, &sctx->db_render_state);

	return fully_copied_levels;
}
Beispiel #15
0
void st_validate_state( struct st_context *st, enum st_pipeline pipeline )
{
   struct gl_context *ctx = st->ctx;
   uint64_t dirty, pipeline_mask;
   uint32_t dirty_lo, dirty_hi;

   /* Get Mesa driver state.
    *
    * Inactive states are shader states not used by shaders at the moment.
    */
   st->dirty |= ctx->NewDriverState & st->active_states & ST_ALL_STATES_MASK;
   ctx->NewDriverState = 0;

   /* Get pipeline state. */
   switch (pipeline) {
   case ST_PIPELINE_RENDER:
      if (st->ctx->API == API_OPENGL_COMPAT)
         check_attrib_edgeflag(st);

      if (st->gfx_shaders_may_be_dirty) {
         check_program_state(st);
         st->gfx_shaders_may_be_dirty = false;
      }

      st_manager_validate_framebuffers(st);

      pipeline_mask = ST_PIPELINE_RENDER_STATE_MASK;
      break;

   case ST_PIPELINE_CLEAR:
      st_manager_validate_framebuffers(st);
      pipeline_mask = ST_PIPELINE_CLEAR_STATE_MASK;
      break;

   case ST_PIPELINE_META:
      if (st->gfx_shaders_may_be_dirty) {
         check_program_state(st);
         st->gfx_shaders_may_be_dirty = false;
      }

      st_manager_validate_framebuffers(st);
      pipeline_mask = ST_PIPELINE_META_STATE_MASK;
      break;

   case ST_PIPELINE_UPDATE_FRAMEBUFFER:
      st_manager_validate_framebuffers(st);
      pipeline_mask = ST_PIPELINE_UPDATE_FB_STATE_MASK;
      break;

   case ST_PIPELINE_COMPUTE: {
      struct st_compute_program *old_cp = st->cp;
      struct gl_program *new_cp = ctx->ComputeProgram._Current;

      if (new_cp != &old_cp->Base) {
         if (old_cp)
            st->dirty |= old_cp->affected_states;
         assert(new_cp);
         st->dirty |= st_compute_program(new_cp)->affected_states;
      }

      st->compute_shader_may_be_dirty = false;

      /*
       * We add the ST_NEW_FB_STATE bit here as well, because glBindFramebuffer
       * acts as a barrier that breaks feedback loops between the framebuffer
       * and textures bound to the framebuffer, even when those textures are
       * accessed by compute shaders; so we must inform the driver of new
       * framebuffer state.
       */
      pipeline_mask = ST_PIPELINE_COMPUTE_STATE_MASK | ST_NEW_FB_STATE;
      break;
   }

   default:
      unreachable("Invalid pipeline specified");
   }

   dirty = st->dirty & pipeline_mask;
   if (!dirty)
      return;

   dirty_lo = dirty;
   dirty_hi = dirty >> 32;

   /* Update states.
    *
    * Don't use u_bit_scan64, it may be slower on 32-bit.
    */
   while (dirty_lo)
      update_functions[u_bit_scan(&dirty_lo)](st);
   while (dirty_hi)
      update_functions[32 + u_bit_scan(&dirty_hi)](st);

   /* Clear the render or compute state bits. */
   st->dirty &= ~pipeline_mask;
}
Beispiel #16
0
static void
gather_intrinsic_info(const nir_shader *nir, const nir_intrinsic_instr *instr,
		      struct ac_shader_info *info)
{
	switch (instr->intrinsic) {
	case nir_intrinsic_interp_var_at_sample:
		info->ps.needs_sample_positions = true;
		break;
	case nir_intrinsic_load_draw_id:
		info->vs.needs_draw_id = true;
		break;
	case nir_intrinsic_load_instance_id:
		info->vs.needs_instance_id = true;
		break;
	case nir_intrinsic_load_num_work_groups:
		info->cs.uses_grid_size = true;
		break;
	case nir_intrinsic_load_local_invocation_id:
	case nir_intrinsic_load_work_group_id: {
		unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa);
		while (mask) {
			unsigned i = u_bit_scan(&mask);

			if (instr->intrinsic == nir_intrinsic_load_work_group_id)
				info->cs.uses_block_id[i] = true;
			else
				info->cs.uses_thread_id[i] = true;
		}
		break;
	}
	case nir_intrinsic_load_local_invocation_index:
		info->cs.uses_local_invocation_idx = true;
		break;
	case nir_intrinsic_load_sample_id:
		info->ps.force_persample = true;
		break;
	case nir_intrinsic_load_sample_pos:
		info->ps.force_persample = true;
		break;
	case nir_intrinsic_load_view_index:
		info->needs_multiview_view_index = true;
		break;
	case nir_intrinsic_load_invocation_id:
		info->uses_invocation_id = true;
		break;
	case nir_intrinsic_load_primitive_id:
		info->uses_prim_id = true;
		break;
	case nir_intrinsic_load_push_constant:
		info->loads_push_constants = true;
		break;
	case nir_intrinsic_vulkan_resource_index:
		info->desc_set_used_mask |= (1 << nir_intrinsic_desc_set(instr));
		break;
	case nir_intrinsic_image_load:
	case nir_intrinsic_image_store:
	case nir_intrinsic_image_atomic_add:
	case nir_intrinsic_image_atomic_min:
	case nir_intrinsic_image_atomic_max:
	case nir_intrinsic_image_atomic_and:
	case nir_intrinsic_image_atomic_or:
	case nir_intrinsic_image_atomic_xor:
	case nir_intrinsic_image_atomic_exchange:
	case nir_intrinsic_image_atomic_comp_swap:
	case nir_intrinsic_image_size: {
		const struct glsl_type *type = instr->variables[0]->var->type;
		if(instr->variables[0]->deref.child)
			type = instr->variables[0]->deref.child->type;

		enum glsl_sampler_dim dim = glsl_get_sampler_dim(type);
		if (dim == GLSL_SAMPLER_DIM_SUBPASS ||
		    dim == GLSL_SAMPLER_DIM_SUBPASS_MS)
			info->ps.uses_input_attachments = true;
		mark_sampler_desc(instr->variables[0]->var, info);

		if (nir_intrinsic_image_store ||
		    nir_intrinsic_image_atomic_add ||
		    nir_intrinsic_image_atomic_min ||
		    nir_intrinsic_image_atomic_max ||
		    nir_intrinsic_image_atomic_and ||
		    nir_intrinsic_image_atomic_or ||
		    nir_intrinsic_image_atomic_xor ||
		    nir_intrinsic_image_atomic_exchange ||
		    nir_intrinsic_image_atomic_comp_swap) {
			if (nir->info.stage == MESA_SHADER_FRAGMENT)
				info->ps.writes_memory = true;
		}
		break;
	}
	case nir_intrinsic_store_ssbo:
	case nir_intrinsic_ssbo_atomic_add:
	case nir_intrinsic_ssbo_atomic_imin:
	case nir_intrinsic_ssbo_atomic_umin:
	case nir_intrinsic_ssbo_atomic_imax:
	case nir_intrinsic_ssbo_atomic_umax:
	case nir_intrinsic_ssbo_atomic_and:
	case nir_intrinsic_ssbo_atomic_or:
	case nir_intrinsic_ssbo_atomic_xor:
	case nir_intrinsic_ssbo_atomic_exchange:
	case nir_intrinsic_ssbo_atomic_comp_swap:
		if (nir->info.stage == MESA_SHADER_FRAGMENT)
			info->ps.writes_memory = true;
		break;
	case nir_intrinsic_load_var:
		if (nir->info.stage == MESA_SHADER_VERTEX) {
			nir_deref_var *dvar = instr->variables[0];
			nir_variable *var = dvar->var;

			if (var->data.mode == nir_var_shader_in) {
				unsigned idx = var->data.location;
				uint8_t mask =
					nir_ssa_def_components_read(&instr->dest.ssa);
				info->vs.input_usage_mask[idx] |= mask;
			}
		}
		break;
	default:
		break;
	}
}