예제 #1
0
파일: fd5_texture.c 프로젝트: FireBurn/mesa
static struct pipe_sampler_view *
fd5_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
		const struct pipe_sampler_view *cso)
{
	struct fd5_pipe_sampler_view *so = CALLOC_STRUCT(fd5_pipe_sampler_view);
	struct fd_resource *rsc = fd_resource(prsc);
	enum pipe_format format = cso->format;
	unsigned lvl, layers = 0;

	if (!so)
		return NULL;

	if (format == PIPE_FORMAT_X32_S8X24_UINT) {
		rsc = rsc->stencil;
		format = rsc->base.format;
	}

	so->base = *cso;
	pipe_reference(NULL, &prsc->reference);
	so->base.texture = prsc;
	so->base.reference.count = 1;
	so->base.context = pctx;

	so->texconst0 =
		A5XX_TEX_CONST_0_FMT(fd5_pipe2tex(format)) |
		A5XX_TEX_CONST_0_SAMPLES(fd_msaa_samples(prsc->nr_samples)) |
		fd5_tex_swiz(format, cso->swizzle_r, cso->swizzle_g,
				cso->swizzle_b, cso->swizzle_a);

	/* NOTE: since we sample z24s8 using 8888_UINT format, the swizzle
	 * we get isn't quite right.  Use SWAP(XYZW) as a cheap and cheerful
	 * way to re-arrange things so stencil component is where the swiz
	 * expects.
	 *
	 * Note that gallium expects stencil sampler to return (s,s,s,s)
	 * which isn't quite true.  To make that happen we'd have to massage
	 * the swizzle.  But in practice only the .x component is used.
	 */
	if (format == PIPE_FORMAT_X24S8_UINT) {
		so->texconst0 |= A5XX_TEX_CONST_0_SWAP(XYZW);
	}

	if (util_format_is_srgb(format)) {
		if (use_astc_srgb_workaround(pctx, format))
			so->astc_srgb = true;
		so->texconst0 |= A5XX_TEX_CONST_0_SRGB;
	}

	if (cso->target == PIPE_BUFFER) {
		unsigned elements = cso->u.buf.size / util_format_get_blocksize(format);

		lvl = 0;
		so->texconst1 =
			A5XX_TEX_CONST_1_WIDTH(elements) |
			A5XX_TEX_CONST_1_HEIGHT(1);
		so->texconst2 =
			A5XX_TEX_CONST_2_FETCHSIZE(fd5_pipe2fetchsize(format)) |
			A5XX_TEX_CONST_2_PITCH(elements * rsc->cpp);
		so->offset = cso->u.buf.offset;
	} else {
		unsigned miplevels;

		lvl = fd_sampler_first_level(cso);
		miplevels = fd_sampler_last_level(cso) - lvl;
		layers = cso->u.tex.last_layer - cso->u.tex.first_layer + 1;

		so->texconst0 |= A5XX_TEX_CONST_0_MIPLVLS(miplevels);
		so->texconst1 =
			A5XX_TEX_CONST_1_WIDTH(u_minify(prsc->width0, lvl)) |
			A5XX_TEX_CONST_1_HEIGHT(u_minify(prsc->height0, lvl));
		so->texconst2 =
			A5XX_TEX_CONST_2_FETCHSIZE(fd5_pipe2fetchsize(format)) |
			A5XX_TEX_CONST_2_PITCH(
					util_format_get_nblocksx(
							format, rsc->slices[lvl].pitch) * rsc->cpp);
		so->offset = fd_resource_offset(rsc, lvl, cso->u.tex.first_layer);
	}

	so->texconst2 |= A5XX_TEX_CONST_2_TYPE(fd5_tex_type(cso->target));

	switch (cso->target) {
	case PIPE_TEXTURE_RECT:
	case PIPE_TEXTURE_1D:
	case PIPE_TEXTURE_2D:
		so->texconst3 =
			A5XX_TEX_CONST_3_ARRAY_PITCH(rsc->layer_size);
		so->texconst5 =
			A5XX_TEX_CONST_5_DEPTH(1);
		break;
	case PIPE_TEXTURE_1D_ARRAY:
	case PIPE_TEXTURE_2D_ARRAY:
		so->texconst3 =
			A5XX_TEX_CONST_3_ARRAY_PITCH(rsc->layer_size);
		so->texconst5 =
			A5XX_TEX_CONST_5_DEPTH(layers);
		break;
	case PIPE_TEXTURE_CUBE:
	case PIPE_TEXTURE_CUBE_ARRAY:
		so->texconst3 =
			A5XX_TEX_CONST_3_ARRAY_PITCH(rsc->layer_size);
		so->texconst5 =
			A5XX_TEX_CONST_5_DEPTH(layers / 6);
		break;
	case PIPE_TEXTURE_3D:
		so->texconst3 =
			A5XX_TEX_CONST_3_ARRAY_PITCH(rsc->slices[lvl].size0);
		so->texconst5 =
			A5XX_TEX_CONST_5_DEPTH(u_minify(prsc->depth0, lvl));
		break;
	default:
		so->texconst3 = 0x00000000;
		break;
	}

	return &so->base;
}
예제 #2
0
/* Compare old and new render states and emit differences between them
 * to hardware.  Simplest implementation would be to emit the whole of
 * the "to" state.
 */
static enum pipe_error
emit_rss(struct svga_context *svga, unsigned dirty)
{
   struct svga_screen *screen = svga_screen(svga->pipe.screen);
   struct rs_queue queue;
   float point_size_min;

   queue.rs_count = 0;

   if (dirty & SVGA_NEW_BLEND) {
      const struct svga_blend_state *curr = svga->curr.blend;

      EMIT_RS( svga, curr->rt[0].writemask, COLORWRITEENABLE, fail );
      EMIT_RS( svga, curr->rt[0].blend_enable, BLENDENABLE, fail );

      if (curr->rt[0].blend_enable) {
         EMIT_RS( svga, curr->rt[0].srcblend, SRCBLEND, fail );
         EMIT_RS( svga, curr->rt[0].dstblend, DSTBLEND, fail );
         EMIT_RS( svga, curr->rt[0].blendeq, BLENDEQUATION, fail );

         EMIT_RS( svga, curr->rt[0].separate_alpha_blend_enable, 
                  SEPARATEALPHABLENDENABLE, fail );

         if (curr->rt[0].separate_alpha_blend_enable) {
            EMIT_RS( svga, curr->rt[0].srcblend_alpha, SRCBLENDALPHA, fail );
            EMIT_RS( svga, curr->rt[0].dstblend_alpha, DSTBLENDALPHA, fail );
            EMIT_RS( svga, curr->rt[0].blendeq_alpha, BLENDEQUATIONALPHA, fail );
         }
      }
   }

   if (dirty & SVGA_NEW_BLEND_COLOR) {
      uint32 color;
      uint32 r = float_to_ubyte(svga->curr.blend_color.color[0]);
      uint32 g = float_to_ubyte(svga->curr.blend_color.color[1]);
      uint32 b = float_to_ubyte(svga->curr.blend_color.color[2]);
      uint32 a = float_to_ubyte(svga->curr.blend_color.color[3]);

      color = (a << 24) | (r << 16) | (g << 8) | b;

      EMIT_RS( svga, color, BLENDCOLOR, fail );
   }

   if (dirty & (SVGA_NEW_DEPTH_STENCIL | SVGA_NEW_RAST)) {
      const struct svga_depth_stencil_state *curr = svga->curr.depth; 
      const struct svga_rasterizer_state *rast = svga->curr.rast; 

      if (!curr->stencil[0].enabled) 
      {
         /* Stencil disabled
          */
         EMIT_RS( svga, FALSE, STENCILENABLE, fail );
         EMIT_RS( svga, FALSE, STENCILENABLE2SIDED, fail );
      }
      else if (curr->stencil[0].enabled && !curr->stencil[1].enabled)
      {
         /* Regular stencil
          */
         EMIT_RS( svga, TRUE, STENCILENABLE, fail );
         EMIT_RS( svga, FALSE, STENCILENABLE2SIDED, fail );

         EMIT_RS( svga, curr->stencil[0].func,  STENCILFUNC, fail );
         EMIT_RS( svga, curr->stencil[0].fail,  STENCILFAIL, fail );
         EMIT_RS( svga, curr->stencil[0].zfail, STENCILZFAIL, fail );
         EMIT_RS( svga, curr->stencil[0].pass,  STENCILPASS, fail );

         EMIT_RS( svga, curr->stencil_mask, STENCILMASK, fail );
         EMIT_RS( svga, curr->stencil_writemask, STENCILWRITEMASK, fail );
      }
      else 
      {
         int cw, ccw;

         /* Hardware frontwinding is always CW, so if ours is also CW,
          * then our definition of front face agrees with hardware.
          * Otherwise need to flip.
          */
         if (rast->templ.front_ccw) {
            ccw = 0;
            cw = 1;
         }
         else {
            ccw = 1;
            cw = 0;
         }

         /* Twoside stencil
          */
         EMIT_RS( svga, TRUE, STENCILENABLE, fail );
         EMIT_RS( svga, TRUE, STENCILENABLE2SIDED, fail );

         EMIT_RS( svga, curr->stencil[cw].func,  STENCILFUNC, fail );
         EMIT_RS( svga, curr->stencil[cw].fail,  STENCILFAIL, fail );
         EMIT_RS( svga, curr->stencil[cw].zfail, STENCILZFAIL, fail );
         EMIT_RS( svga, curr->stencil[cw].pass,  STENCILPASS, fail );

         EMIT_RS( svga, curr->stencil[ccw].func,  CCWSTENCILFUNC, fail );
         EMIT_RS( svga, curr->stencil[ccw].fail,  CCWSTENCILFAIL, fail );
         EMIT_RS( svga, curr->stencil[ccw].zfail, CCWSTENCILZFAIL, fail );
         EMIT_RS( svga, curr->stencil[ccw].pass,  CCWSTENCILPASS, fail );

         EMIT_RS( svga, curr->stencil_mask, STENCILMASK, fail );
         EMIT_RS( svga, curr->stencil_writemask, STENCILWRITEMASK, fail );
      }

      EMIT_RS( svga, curr->zenable, ZENABLE, fail );
      if (curr->zenable) {
         EMIT_RS( svga, curr->zfunc, ZFUNC, fail );
         EMIT_RS( svga, curr->zwriteenable, ZWRITEENABLE, fail );
      }

      EMIT_RS( svga, curr->alphatestenable, ALPHATESTENABLE, fail );
      if (curr->alphatestenable) {
         EMIT_RS( svga, curr->alphafunc, ALPHAFUNC, fail );
         EMIT_RS_FLOAT( svga, curr->alpharef, ALPHAREF, fail );
      }
   }

   if (dirty & SVGA_NEW_STENCIL_REF) {
      EMIT_RS( svga, svga->curr.stencil_ref.ref_value[0], STENCILREF, fail );
   }

   if (dirty & (SVGA_NEW_RAST | SVGA_NEW_NEED_PIPELINE))
   {
      const struct svga_rasterizer_state *curr = svga->curr.rast; 
      unsigned cullmode = curr->cullmode;

      /* Shademode: still need to rearrange index list to move
       * flat-shading PV first vertex.
       */
      EMIT_RS( svga, curr->shademode, SHADEMODE, fail );

      /* Don't do culling while the software pipeline is active.  It
       * does it for us, and additionally introduces potentially
       * back-facing triangles.
       */
      if (svga->state.sw.need_pipeline)
         cullmode = SVGA3D_FACE_NONE;

      point_size_min = util_get_min_point_size(&curr->templ);

      EMIT_RS( svga, cullmode, CULLMODE, fail );
      EMIT_RS( svga, curr->scissortestenable, SCISSORTESTENABLE, fail );
      EMIT_RS( svga, curr->multisampleantialias, MULTISAMPLEANTIALIAS, fail );
      EMIT_RS( svga, curr->lastpixel, LASTPIXEL, fail );
      EMIT_RS( svga, curr->linepattern, LINEPATTERN, fail );
      EMIT_RS_FLOAT( svga, curr->pointsize, POINTSIZE, fail );
      EMIT_RS_FLOAT( svga, point_size_min, POINTSIZEMIN, fail );
      EMIT_RS_FLOAT( svga, screen->maxPointSize, POINTSIZEMAX, fail );
      EMIT_RS( svga, curr->pointsprite, POINTSPRITEENABLE, fail);
   }

   if (dirty & (SVGA_NEW_RAST | SVGA_NEW_FRAME_BUFFER | SVGA_NEW_NEED_PIPELINE))
   {
      const struct svga_rasterizer_state *curr = svga->curr.rast; 
      float slope = 0.0;
      float bias  = 0.0;

      /* Need to modify depth bias according to bound depthbuffer
       * format.  Don't do hardware depthbias while the software
       * pipeline is active.
       */
      if (!svga->state.sw.need_pipeline &&
          svga->curr.framebuffer.zsbuf)
      {
         slope = curr->slopescaledepthbias;
         bias  = svga->curr.depthscale * curr->depthbias;
      }

      EMIT_RS_FLOAT( svga, slope, SLOPESCALEDEPTHBIAS, fail );
      EMIT_RS_FLOAT( svga, bias, DEPTHBIAS, fail );
   }

   if (dirty & SVGA_NEW_FRAME_BUFFER) {
      /* XXX: we only look at the first color buffer's sRGB state */
      float gamma = 1.0f;
      if (svga->curr.framebuffer.cbufs[0] &&
          util_format_is_srgb(svga->curr.framebuffer.cbufs[0]->format)) {
         gamma = 2.2f;
      }
      EMIT_RS_FLOAT(svga, gamma, OUTPUTGAMMA, fail);
   }

   if (dirty & SVGA_NEW_RAST) {
      /* bitmask of the enabled clip planes */
      unsigned enabled = svga->curr.rast->templ.clip_plane_enable;
      EMIT_RS( svga, enabled, CLIPPLANEENABLE, fail );
   }

   if (queue.rs_count) {
      SVGA3dRenderState *rs;

      if (SVGA3D_BeginSetRenderState( svga->swc,
                                      &rs,
                                      queue.rs_count ) != PIPE_OK)
         goto fail;

      memcpy( rs,
              queue.rs,
              queue.rs_count * sizeof queue.rs[0]);

      SVGA_FIFOCommitAll( svga->swc );
   }

   return PIPE_OK;

fail:
   /* XXX: need to poison cached hardware state on failure to ensure
    * dirty state gets re-emitted.  Fix this by re-instating partial
    * FIFOCommit command and only updating cached hw state once the
    * initial allocation has succeeded.
    */
   memset(svga->state.hw_draw.rs, 0xcd, sizeof(svga->state.hw_draw.rs));

   return PIPE_ERROR_OUT_OF_MEMORY;
}
예제 #3
0
static struct pipe_sampler_view *
fd3_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
		const struct pipe_sampler_view *cso)
{
	struct fd3_pipe_sampler_view *so = CALLOC_STRUCT(fd3_pipe_sampler_view);
	struct fd_resource *rsc = fd_resource(prsc);
	unsigned lvl;
	uint32_t sz2 = 0;

	if (!so)
		return NULL;

	so->base = *cso;
	pipe_reference(NULL, &prsc->reference);
	so->base.texture = prsc;
	so->base.reference.count = 1;
	so->base.context = pctx;

	so->texconst0 =
			A3XX_TEX_CONST_0_TYPE(tex_type(prsc->target)) |
			A3XX_TEX_CONST_0_FMT(fd3_pipe2tex(cso->format)) |
			fd3_tex_swiz(cso->format, cso->swizzle_r, cso->swizzle_g,
						cso->swizzle_b, cso->swizzle_a);

	if (prsc->target == PIPE_BUFFER || util_format_is_pure_integer(cso->format))
		so->texconst0 |= A3XX_TEX_CONST_0_NOCONVERT;
	if (util_format_is_srgb(cso->format))
		so->texconst0 |= A3XX_TEX_CONST_0_SRGB;

	if (prsc->target == PIPE_BUFFER) {
		lvl = 0;
		so->texconst1 =
			A3XX_TEX_CONST_1_FETCHSIZE(fd3_pipe2fetchsize(cso->format)) |
			A3XX_TEX_CONST_1_WIDTH(cso->u.buf.size / util_format_get_blocksize(cso->format)) |
			A3XX_TEX_CONST_1_HEIGHT(1);
	} else {
		unsigned miplevels;

		lvl = fd_sampler_first_level(cso);
		miplevels = fd_sampler_last_level(cso) - lvl;

		so->texconst0 |= A3XX_TEX_CONST_0_MIPLVLS(miplevels);
		so->texconst1 =
			A3XX_TEX_CONST_1_FETCHSIZE(fd3_pipe2fetchsize(cso->format)) |
			A3XX_TEX_CONST_1_WIDTH(u_minify(prsc->width0, lvl)) |
			A3XX_TEX_CONST_1_HEIGHT(u_minify(prsc->height0, lvl));
	}
	/* when emitted, A3XX_TEX_CONST_2_INDX() must be OR'd in: */
	so->texconst2 =
			A3XX_TEX_CONST_2_PITCH(fd3_pipe2nblocksx(cso->format, rsc->slices[lvl].pitch) * rsc->cpp);
	switch (prsc->target) {
	case PIPE_TEXTURE_1D_ARRAY:
	case PIPE_TEXTURE_2D_ARRAY:
		so->texconst3 =
				A3XX_TEX_CONST_3_DEPTH(prsc->array_size - 1) |
				A3XX_TEX_CONST_3_LAYERSZ1(rsc->slices[0].size0);
		break;
	case PIPE_TEXTURE_3D:
		so->texconst3 =
				A3XX_TEX_CONST_3_DEPTH(u_minify(prsc->depth0, lvl)) |
				A3XX_TEX_CONST_3_LAYERSZ1(rsc->slices[lvl].size0);
		while (lvl < cso->u.tex.last_level && sz2 != rsc->slices[lvl+1].size0)
			sz2 = rsc->slices[++lvl].size0;
		so->texconst3 |= A3XX_TEX_CONST_3_LAYERSZ2(sz2);
		break;
	default:
		so->texconst3 = 0x00000000;
		break;
	}

	return &so->base;
}
예제 #4
0
/**
 * Update framebuffer state (color, depth, stencil, etc. buffers)
 */
static void
update_framebuffer_state( struct st_context *st )
{
    struct pipe_framebuffer_state *framebuffer = &st->state.framebuffer;
    struct gl_framebuffer *fb = st->ctx->DrawBuffer;
    struct st_renderbuffer *strb;
    GLuint i;

    st_flush_bitmap_cache(st);

    st->state.fb_orientation = st_fb_orientation(fb);
    framebuffer->width = fb->Width;
    framebuffer->height = fb->Height;

    /*printf("------ fb size %d x %d\n", fb->Width, fb->Height);*/

    /* Examine Mesa's ctx->DrawBuffer->_ColorDrawBuffers state
     * to determine which surfaces to draw to
     */
    framebuffer->nr_cbufs = fb->_NumColorDrawBuffers;

    for (i = 0; i < fb->_NumColorDrawBuffers; i++) {
        pipe_surface_reference(&framebuffer->cbufs[i], NULL);

        strb = st_renderbuffer(fb->_ColorDrawBuffers[i]);

        if (strb) {
            if (strb->is_rtt ||
                    (strb->texture && util_format_is_srgb(strb->texture->format))) {
                /* rendering to a GL texture, may have to update surface */
                st_update_renderbuffer_surface(st, strb);
            }

            if (strb->surface) {
                pipe_surface_reference(&framebuffer->cbufs[i], strb->surface);
            }
            strb->defined = GL_TRUE; /* we'll be drawing something */
        }
    }

    for (i = framebuffer->nr_cbufs; i < PIPE_MAX_COLOR_BUFS; i++) {
        pipe_surface_reference(&framebuffer->cbufs[i], NULL);
    }

    /*
     * Depth/Stencil renderbuffer/surface.
     */
    strb = st_renderbuffer(fb->Attachment[BUFFER_DEPTH].Renderbuffer);
    if (strb) {
        if (strb->is_rtt) {
            /* rendering to a GL texture, may have to update surface */
            st_update_renderbuffer_surface(st, strb);
        }
        pipe_surface_reference(&framebuffer->zsbuf, strb->surface);
    }
    else {
        strb = st_renderbuffer(fb->Attachment[BUFFER_STENCIL].Renderbuffer);
        if (strb) {
            assert(strb->surface);
            pipe_surface_reference(&framebuffer->zsbuf, strb->surface);
        }
        else
            pipe_surface_reference(&framebuffer->zsbuf, NULL);
    }

#ifdef DEBUG
    /* Make sure the resource binding flags were set properly */
    for (i = 0; i < framebuffer->nr_cbufs; i++) {
        assert(!framebuffer->cbufs[i] ||
               framebuffer->cbufs[i]->texture->bind & PIPE_BIND_RENDER_TARGET);
    }
    if (framebuffer->zsbuf) {
        assert(framebuffer->zsbuf->texture->bind & PIPE_BIND_DEPTH_STENCIL);
    }
#endif

    cso_set_framebuffer(st->cso_context, framebuffer);
}
예제 #5
0
파일: svga_screen.c 프로젝트: airlied/mesa
/**
 * Implement pipe_screen::is_format_supported().
 * \param bindings  bitmask of PIPE_BIND_x flags
 */
static boolean
svga_is_format_supported( struct pipe_screen *screen,
                          enum pipe_format format,
                          enum pipe_texture_target target,
                          unsigned sample_count,
                          unsigned bindings)
{
   struct svga_screen *ss = svga_screen(screen);
   SVGA3dSurfaceFormat svga_format;
   SVGA3dSurfaceFormatCaps caps;
   SVGA3dSurfaceFormatCaps mask;

   assert(bindings);

   if (sample_count > 1) {
      /* In ms_samples, if bit N is set it means that we support
       * multisample with N+1 samples per pixel.
       */
      if ((ss->ms_samples & (1 << (sample_count - 1))) == 0) {
         return FALSE;
      }
   }

   svga_format = svga_translate_format(ss, format, bindings);
   if (svga_format == SVGA3D_FORMAT_INVALID) {
      return FALSE;
   }

   /* we don't support sRGB rendering into display targets */
   if (util_format_is_srgb(format) && (bindings & PIPE_BIND_DISPLAY_TARGET)) {
      return FALSE;
   }

   /*
    * For VGPU10 vertex formats, skip querying host capabilities
    */

   if (ss->sws->have_vgpu10 && (bindings & PIPE_BIND_VERTEX_BUFFER)) {
      SVGA3dSurfaceFormat svga_format;
      unsigned flags;
      svga_translate_vertex_format_vgpu10(format, &svga_format, &flags);
      return svga_format != SVGA3D_FORMAT_INVALID;
   }

   /*
    * Override host capabilities, so that we end up with the same
    * visuals for all virtual hardware implementations.
    */

   if (bindings & PIPE_BIND_DISPLAY_TARGET) {
      switch (svga_format) {
      case SVGA3D_A8R8G8B8:
      case SVGA3D_X8R8G8B8:
      case SVGA3D_R5G6B5:
         break;

      /* VGPU10 formats */
      case SVGA3D_B8G8R8A8_UNORM:
      case SVGA3D_B8G8R8X8_UNORM:
      case SVGA3D_B5G6R5_UNORM:
         break;

      /* Often unsupported/problematic. This means we end up with the same
       * visuals for all virtual hardware implementations.
       */
      case SVGA3D_A4R4G4B4:
      case SVGA3D_A1R5G5B5:
         return FALSE;
         
      default:
         return FALSE;
      }
   }
   
   /*
    * Query the host capabilities.
    */

   svga_get_format_cap(ss, svga_format, &caps);

   if (bindings & PIPE_BIND_RENDER_TARGET) {
      /* Check that the color surface is blendable, unless it's an
       * integer format.
       */
      if (!svga_format_is_integer(svga_format) &&
          (caps.value & SVGA3DFORMAT_OP_NOALPHABLEND)) {
         return FALSE;
      }
   }

   mask.value = 0;
   if (bindings & PIPE_BIND_RENDER_TARGET) {
      mask.value |= SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET;
   }
   if (bindings & PIPE_BIND_DEPTH_STENCIL) {
      mask.value |= SVGA3DFORMAT_OP_ZSTENCIL;
   }
   if (bindings & PIPE_BIND_SAMPLER_VIEW) {
      mask.value |= SVGA3DFORMAT_OP_TEXTURE;
   }

   if (target == PIPE_TEXTURE_CUBE) {
      mask.value |= SVGA3DFORMAT_OP_CUBETEXTURE;
   }
   else if (target == PIPE_TEXTURE_3D) {
      mask.value |= SVGA3DFORMAT_OP_VOLUMETEXTURE;
   }

   return (caps.value & mask.value) == mask.value;
}
예제 #6
0
static void
write_texture_border_color(struct vc5_job *job,
                           struct vc5_cl_out **uniforms,
                           struct vc5_texture_stateobj *texstate,
                           uint32_t unit)
{
        struct pipe_sampler_state *sampler = texstate->samplers[unit];
        struct pipe_sampler_view *texture = texstate->textures[unit];
        struct vc5_resource *rsc = vc5_resource(texture->texture);
        union util_color uc;

        const struct util_format_description *tex_format_desc =
                util_format_description(texture->format);

        float border_color[4];
        for (int i = 0; i < 4; i++)
                border_color[i] = sampler->border_color.f[i];
        if (util_format_is_srgb(texture->format)) {
                for (int i = 0; i < 3; i++)
                        border_color[i] =
                                util_format_linear_to_srgb_float(border_color[i]);
        }

        /* Turn the border color into the layout of channels that it would
         * have when stored as texture contents.
         */
        float storage_color[4];
        util_format_unswizzle_4f(storage_color,
                                 border_color,
                                 tex_format_desc->swizzle);

        /* Now, pack so that when the vc5_format-sampled texture contents are
         * replaced with our border color, the vc5_get_format_swizzle()
         * swizzling will get the right channels.
         */
        if (util_format_is_depth_or_stencil(texture->format)) {
                uc.ui[0] = util_pack_z(PIPE_FORMAT_Z24X8_UNORM,
                                       sampler->border_color.f[0]) << 8;
        } else {
                switch (rsc->vc5_format) {
                default:
                case VC5_TEXTURE_TYPE_RGBA8888:
                        util_pack_color(storage_color,
                                        PIPE_FORMAT_R8G8B8A8_UNORM, &uc);
                        break;
                case VC5_TEXTURE_TYPE_RGBA4444:
                        util_pack_color(storage_color,
                                        PIPE_FORMAT_A8B8G8R8_UNORM, &uc);
                        break;
                case VC5_TEXTURE_TYPE_RGB565:
                        util_pack_color(storage_color,
                                        PIPE_FORMAT_B8G8R8A8_UNORM, &uc);
                        break;
                case VC5_TEXTURE_TYPE_ALPHA:
                        uc.ui[0] = float_to_ubyte(storage_color[0]) << 24;
                        break;
                case VC5_TEXTURE_TYPE_LUMALPHA:
                        uc.ui[0] = ((float_to_ubyte(storage_color[1]) << 24) |
                                    (float_to_ubyte(storage_color[0]) << 0));
                        break;
                }
        }

        cl_aligned_u32(uniforms, uc.ui[0]);
}
예제 #7
0
파일: fd5_gmem.c 프로젝트: Echelon9/mesa
static void
emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs,
		struct pipe_surface **bufs, struct fd_gmem_stateobj *gmem)
{
	enum a5xx_tile_mode tile_mode;
	unsigned i;

	if (gmem) {
		tile_mode = TILE5_2;
	} else {
		tile_mode = TILE5_LINEAR;
	}

	for (i = 0; i < A5XX_MAX_RENDER_TARGETS; i++) {
		enum a5xx_color_fmt format = 0;
		enum a3xx_color_swap swap = WZYX;
		bool srgb = false;
		struct fd_resource *rsc = NULL;
		struct fd_resource_slice *slice = NULL;
		uint32_t stride = 0;
		uint32_t size = 0;
		uint32_t base = 0;
		uint32_t offset = 0;

		if ((i < nr_bufs) && bufs[i]) {
			struct pipe_surface *psurf = bufs[i];
			enum pipe_format pformat = psurf->format;

			rsc = fd_resource(psurf->texture);

			slice = fd_resource_slice(rsc, psurf->u.tex.level);
			format = fd5_pipe2color(pformat);
			swap = fd5_pipe2swap(pformat);
			srgb = util_format_is_srgb(pformat);

			debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);

			offset = fd_resource_offset(rsc, psurf->u.tex.level,
					psurf->u.tex.first_layer);

			if (gmem) {
				stride = gmem->bin_w * rsc->cpp;
				size = stride * gmem->bin_h;
				base = gmem->cbuf_base[i];
			} else {
				stride = slice->pitch * rsc->cpp;
				size = slice->size0;
			}
		}

		OUT_PKT4(ring, REG_A5XX_RB_MRT_BUF_INFO(i), 5);
		OUT_RING(ring, A5XX_RB_MRT_BUF_INFO_COLOR_FORMAT(format) |
				A5XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(tile_mode) |
				A5XX_RB_MRT_BUF_INFO_COLOR_SWAP(swap) |
				0x800 | /* XXX 0x1000 for RECTLIST clear, 0x0 for BLIT.. */
				COND(srgb, A5XX_RB_MRT_BUF_INFO_COLOR_SRGB));
		OUT_RING(ring, A5XX_RB_MRT_PITCH(stride));
		OUT_RING(ring, A5XX_RB_MRT_ARRAY_PITCH(size));
		if (gmem || (i >= nr_bufs) || !bufs[i]) {
			OUT_RING(ring, base);           /* RB_MRT[i].BASE_LO */
			OUT_RING(ring, 0x00000000);     /* RB_MRT[i].BASE_HI */
		} else {
			debug_assert((offset + size) <= fd_bo_size(rsc->bo));
			OUT_RELOCW(ring, rsc->bo, offset, 0, 0);  /* BASE_LO/HI */
		}

		OUT_PKT4(ring, REG_A5XX_SP_FS_MRT_REG(i), 1);
		OUT_RING(ring, A5XX_SP_FS_MRT_REG_COLOR_FORMAT(format));

		/* when we support UBWC, these would be the system memory
		 * addr/pitch/etc:
		 */
		OUT_PKT4(ring, REG_A5XX_RB_MRT_FLAG_BUFFER(i), 4);
		OUT_RING(ring, 0x00000000);    /* RB_MRT_FLAG_BUFFER[i].ADDR_LO */
		OUT_RING(ring, 0x00000000);    /* RB_MRT_FLAG_BUFFER[i].ADDR_HI */
		OUT_RING(ring, A5XX_RB_MRT_FLAG_BUFFER_PITCH(0));
		OUT_RING(ring, A5XX_RB_MRT_FLAG_BUFFER_ARRAY_PITCH(0));
	}
}
예제 #8
0
파일: r300_blit.c 프로젝트: chemecse/mesa
static void r300_blit(struct pipe_context *pipe,
                      const struct pipe_blit_info *blit)
{
    struct r300_context *r300 = r300_context(pipe);
    struct pipe_framebuffer_state *fb =
        (struct pipe_framebuffer_state*)r300->fb_state.state;
    struct pipe_blit_info info = *blit;

    /* The driver supports sRGB textures but not framebuffers. Blitting
     * from sRGB to sRGB should be the same as blitting from linear
     * to linear, so use that, This avoids incorrect linearization.
     */
    if (util_format_is_srgb(info.src.format)) {
      info.src.format = util_format_linear(info.src.format);
      info.dst.format = util_format_linear(info.dst.format);
    }

    /* MSAA resolve. */
    if (info.src.resource->nr_samples > 1 &&
        !util_format_is_depth_or_stencil(info.src.resource->format)) {
        r300_msaa_resolve(pipe, &info);
        return;
    }

    /* Can't read MSAA textures. */
    if (info.src.resource->nr_samples > 1) {
        return;
    }

    /* Blit a combined depth-stencil resource as color.
     * S8Z24 is the only supported stencil format. */
    if ((info.mask & PIPE_MASK_S) &&
        info.src.format == PIPE_FORMAT_S8_UINT_Z24_UNORM &&
        info.dst.format == PIPE_FORMAT_S8_UINT_Z24_UNORM) {
        if (info.dst.resource->nr_samples > 1) {
            /* Cannot do that with MSAA buffers. */
            info.mask &= ~PIPE_MASK_S;
            if (!(info.mask & PIPE_MASK_Z)) {
                return;
            }
        } else {
            /* Single-sample buffer. */
            info.src.format = PIPE_FORMAT_B8G8R8A8_UNORM;
            info.dst.format = PIPE_FORMAT_B8G8R8A8_UNORM;
            if (info.mask & PIPE_MASK_Z) {
                info.mask = PIPE_MASK_RGBA; /* depth+stencil */
            } else {
                info.mask = PIPE_MASK_B; /* stencil only */
            }
        }
    }

    /* Decompress ZMASK. */
    if (r300->zmask_in_use && !r300->locked_zbuffer) {
        if (fb->zsbuf->texture == info.src.resource ||
            fb->zsbuf->texture == info.dst.resource) {
            r300_decompress_zmask(r300);
        }
    }

    r300_blitter_begin(r300, R300_BLIT |
		       (info.render_condition_enable ? 0 : R300_IGNORE_RENDER_COND));
    util_blitter_blit(r300->blitter, &info);
    r300_blitter_end(r300);
}
예제 #9
0
파일: fd4_gmem.c 프로젝트: dumbbell/mesa
static void
emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs,
		struct pipe_surface **bufs, uint32_t *bases,
		uint32_t bin_w, bool decode_srgb)
{
	enum a4xx_tile_mode tile_mode;
	unsigned i;

	if (bin_w) {
		tile_mode = 2;
	} else {
		tile_mode = TILE4_LINEAR;
	}

	for (i = 0; i < A4XX_MAX_RENDER_TARGETS; i++) {
		enum a4xx_color_fmt format = 0;
		enum a3xx_color_swap swap = WZYX;
		bool srgb = false;
		struct fd_resource *rsc = NULL;
		struct fd_resource_slice *slice = NULL;
		uint32_t stride = 0;
		uint32_t base = 0;
		uint32_t offset = 0;

		if ((i < nr_bufs) && bufs[i]) {
			struct pipe_surface *psurf = bufs[i];
			enum pipe_format pformat = psurf->format;

			rsc = fd_resource(psurf->texture);

			/* In case we're drawing to Z32F_S8, the "color" actually goes to
			 * the stencil
			 */
			if (rsc->stencil) {
				rsc = rsc->stencil;
				pformat = rsc->base.b.format;
				bases++;
			}

			slice = fd_resource_slice(rsc, psurf->u.tex.level);
			format = fd4_pipe2color(pformat);
			swap = fd4_pipe2swap(pformat);

			if (decode_srgb)
				srgb = util_format_is_srgb(pformat);
			else
				pformat = util_format_linear(pformat);

			debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);

			offset = fd_resource_offset(rsc, psurf->u.tex.level,
					psurf->u.tex.first_layer);

			if (bin_w) {
				stride = bin_w * rsc->cpp;

				if (bases) {
					base = bases[i];
				}
			} else {
				stride = slice->pitch * rsc->cpp;
			}
		} else if ((i < nr_bufs) && bases) {
			base = bases[i];
		}

		OUT_PKT0(ring, REG_A4XX_RB_MRT_BUF_INFO(i), 3);
		OUT_RING(ring, A4XX_RB_MRT_BUF_INFO_COLOR_FORMAT(format) |
				A4XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(tile_mode) |
				A4XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH(stride) |
				A4XX_RB_MRT_BUF_INFO_COLOR_SWAP(swap) |
				COND(srgb, A4XX_RB_MRT_BUF_INFO_COLOR_SRGB));
		if (bin_w || (i >= nr_bufs) || !bufs[i]) {
			OUT_RING(ring, base);
			OUT_RING(ring, A4XX_RB_MRT_CONTROL3_STRIDE(stride));
		} else {
			OUT_RELOCW(ring, rsc->bo, offset, 0, 0);
			/* RB_MRT[i].CONTROL3.STRIDE not emitted by c2d..
			 * not sure if we need to skip it for bypass or
			 * not.
			 */
			OUT_RING(ring, A4XX_RB_MRT_CONTROL3_STRIDE(0));
		}
	}
}
예제 #10
0
/**
 * Define a vgpu10 sampler state.
 */
static void
define_sampler_state_object(struct svga_context *svga,
                            struct svga_sampler_state *ss,
                            const struct pipe_sampler_state *ps)
{
   uint8_t max_aniso = (uint8_t) 255; /* XXX fix me */
   boolean anisotropic;
   uint8 compare_func;
   SVGA3dFilter filter;
   SVGA3dRGBAFloat bcolor;
   unsigned try;
   float min_lod, max_lod;

   assert(svga_have_vgpu10(svga));

   anisotropic = ss->aniso_level > 1.0f;

   filter = translate_filter_mode(ps->min_mip_filter,
                                  ps->min_img_filter,
                                  ps->mag_img_filter,
                                  anisotropic,
                                  ss->compare_mode);

   compare_func = translate_comparison_func(ss->compare_func);

   COPY_4V(bcolor.value, ps->border_color.f);

   assert(ps->min_lod <= ps->max_lod);

   if (ps->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) {
      /* just use the base level image */
      min_lod = max_lod = 0.0f;
   }
   else {
      min_lod = ps->min_lod;
      max_lod = ps->max_lod;
   }

   /* If shadow comparisons are enabled, create two sampler states: one
    * with the given shadow compare mode, another with shadow comparison off.
    * We need the later because in some cases, we have to do the shadow
    * compare in the shader.  So, we don't want to do it twice.
    */
   STATIC_ASSERT(PIPE_TEX_COMPARE_NONE == 0);
   STATIC_ASSERT(PIPE_TEX_COMPARE_R_TO_TEXTURE == 1);
   ss->id[1] = SVGA3D_INVALID_ID;

   unsigned i;
   for (i = 0; i <= ss->compare_mode; i++) {
      ss->id[i] = util_bitmask_add(svga->sampler_object_id_bm);

      /* Loop in case command buffer is full and we need to flush and retry */
      for (try = 0; try < 2; try++) {
         enum pipe_error ret =
            SVGA3D_vgpu10_DefineSamplerState(svga->swc,
                                             ss->id[i],
                                             filter,
                                             ss->addressu,
                                             ss->addressv,
                                             ss->addressw,
                                             ss->lod_bias, /* float */
                                             max_aniso,
                                             compare_func,
                                             bcolor,
                                             min_lod,       /* float */
                                             max_lod);      /* float */
         if (ret == PIPE_OK)
            break;
         svga_context_flush(svga, NULL);
      }

      /* turn off the shadow compare option for second iteration */
      filter &= ~SVGA3D_FILTER_COMPARE;
   }
}


static void *
svga_create_sampler_state(struct pipe_context *pipe,
                          const struct pipe_sampler_state *sampler)
{
   struct svga_context *svga = svga_context(pipe);
   struct svga_sampler_state *cso = CALLOC_STRUCT( svga_sampler_state );

   if (!cso)
      return NULL;

   cso->mipfilter = translate_mip_filter(sampler->min_mip_filter);
   cso->magfilter = translate_img_filter( sampler->mag_img_filter );
   cso->minfilter = translate_img_filter( sampler->min_img_filter );
   cso->aniso_level = MAX2( sampler->max_anisotropy, 1 );
   if (sampler->max_anisotropy)
      cso->magfilter = cso->minfilter = SVGA3D_TEX_FILTER_ANISOTROPIC;
   cso->lod_bias = sampler->lod_bias;
   cso->addressu = translate_wrap_mode(sampler->wrap_s);
   cso->addressv = translate_wrap_mode(sampler->wrap_t);
   cso->addressw = translate_wrap_mode(sampler->wrap_r);
   cso->normalized_coords = sampler->normalized_coords;
   cso->compare_mode = sampler->compare_mode;
   cso->compare_func = sampler->compare_func;

   {
      uint32 r = float_to_ubyte(sampler->border_color.f[0]);
      uint32 g = float_to_ubyte(sampler->border_color.f[1]);
      uint32 b = float_to_ubyte(sampler->border_color.f[2]);
      uint32 a = float_to_ubyte(sampler->border_color.f[3]);

      cso->bordercolor = (a << 24) | (r << 16) | (g << 8) | b;
   }

   /* No SVGA3D support for:
    *    - min/max LOD clamping
    */
   cso->min_lod = 0;
   cso->view_min_lod = MAX2((int) (sampler->min_lod + 0.5), 0);
   cso->view_max_lod = MAX2((int) (sampler->max_lod + 0.5), 0);

   /* Use min_mipmap */
   if (svga->debug.use_min_mipmap) {
      if (cso->view_min_lod == cso->view_max_lod) {
         cso->min_lod = cso->view_min_lod;
         cso->view_min_lod = 0;
         cso->view_max_lod = 1000; /* Just a high number */
         cso->mipfilter = SVGA3D_TEX_FILTER_NONE;
      }
   }

   if (svga_have_vgpu10(svga)) {
      define_sampler_state_object(svga, cso, sampler);
   }

   SVGA_DBG(DEBUG_SAMPLERS,
            "New sampler: min %u, view(min %u, max %u) lod, mipfilter %s\n",
            cso->min_lod, cso->view_min_lod, cso->view_max_lod,
            cso->mipfilter == SVGA3D_TEX_FILTER_NONE ? "SVGA3D_TEX_FILTER_NONE" : "SOMETHING");

   svga->hud.num_sampler_objects++;
   SVGA_STATS_COUNT_INC(svga_screen(svga->pipe.screen)->sws,
                        SVGA_STATS_COUNT_SAMPLER);

   return cso;
}


static void
svga_bind_sampler_states(struct pipe_context *pipe,
                         enum pipe_shader_type shader,
                         unsigned start,
                         unsigned num,
                         void **samplers)
{
   struct svga_context *svga = svga_context(pipe);
   unsigned i;
   boolean any_change = FALSE;

   assert(shader < PIPE_SHADER_TYPES);
   assert(start + num <= PIPE_MAX_SAMPLERS);

   /* Pre-VGPU10 only supports FS textures */
   if (!svga_have_vgpu10(svga) && shader != PIPE_SHADER_FRAGMENT)
      return;

   for (i = 0; i < num; i++) {
      if (svga->curr.sampler[shader][start + i] != samplers[i])
         any_change = TRUE;
      svga->curr.sampler[shader][start + i] = samplers[i];
   }

   if (!any_change) {
      return;
   }

   /* find highest non-null sampler[] entry */
   {
      unsigned j = MAX2(svga->curr.num_samplers[shader], start + num);
      while (j > 0 && svga->curr.sampler[shader][j - 1] == NULL)
         j--;
      svga->curr.num_samplers[shader] = j;
   }

   svga->dirty |= SVGA_NEW_SAMPLER;
}


static void
svga_delete_sampler_state(struct pipe_context *pipe, void *sampler)
{
   struct svga_sampler_state *ss = (struct svga_sampler_state *) sampler;
   struct svga_context *svga = svga_context(pipe);

   if (svga_have_vgpu10(svga)) {
      unsigned i;
      for (i = 0; i < 2; i++) {
         enum pipe_error ret;

         if (ss->id[i] != SVGA3D_INVALID_ID) {
            svga_hwtnl_flush_retry(svga);

            ret = SVGA3D_vgpu10_DestroySamplerState(svga->swc, ss->id[i]);
            if (ret != PIPE_OK) {
               svga_context_flush(svga, NULL);
               ret = SVGA3D_vgpu10_DestroySamplerState(svga->swc, ss->id[i]);
            }
            util_bitmask_clear(svga->sampler_object_id_bm, ss->id[i]);
         }
      }
   }

   FREE(sampler);
   svga->hud.num_sampler_objects--;
}


static struct pipe_sampler_view *
svga_create_sampler_view(struct pipe_context *pipe,
                         struct pipe_resource *texture,
                         const struct pipe_sampler_view *templ)
{
   struct svga_context *svga = svga_context(pipe);
   struct svga_pipe_sampler_view *sv = CALLOC_STRUCT(svga_pipe_sampler_view);

   if (!sv) {
      return NULL;
   }

   sv->base = *templ;
   sv->base.reference.count = 1;
   sv->base.texture = NULL;
   pipe_resource_reference(&sv->base.texture, texture);

   sv->base.context = pipe;
   sv->id = SVGA3D_INVALID_ID;

   svga->hud.num_samplerview_objects++;
   SVGA_STATS_COUNT_INC(svga_screen(svga->pipe.screen)->sws,
                        SVGA_STATS_COUNT_SAMPLERVIEW);

   return &sv->base;
}


static void
svga_sampler_view_destroy(struct pipe_context *pipe,
                          struct pipe_sampler_view *view)
{
   struct svga_context *svga = svga_context(pipe);
   struct svga_pipe_sampler_view *sv = svga_pipe_sampler_view(view);

   if (svga_have_vgpu10(svga) && sv->id != SVGA3D_INVALID_ID) {
      if (view->context != pipe) {
         /* The SVGA3D device will generate an error (and on Linux, cause
          * us to abort) if we try to destroy a shader resource view from
          * a context other than the one it was created with.  Skip the
          * SVGA3D_vgpu10_DestroyShaderResourceView() and leak the sampler
          * view for now.  This should only sometimes happen when a shared
          * texture is deleted.
          */
         _debug_printf("context mismatch in %s\n", __func__);
      }
      else {
         enum pipe_error ret;

         svga_hwtnl_flush_retry(svga); /* XXX is this needed? */

         ret = SVGA3D_vgpu10_DestroyShaderResourceView(svga->swc, sv->id);
         if (ret != PIPE_OK) {
            svga_context_flush(svga, NULL);
            ret = SVGA3D_vgpu10_DestroyShaderResourceView(svga->swc, sv->id);
         }
         util_bitmask_clear(svga->sampler_view_id_bm, sv->id);
      }
   }

   pipe_resource_reference(&sv->base.texture, NULL);

   FREE(sv);
   svga->hud.num_samplerview_objects--;
}


static void
svga_set_sampler_views(struct pipe_context *pipe,
                       enum pipe_shader_type shader,
                       unsigned start,
                       unsigned num,
                       struct pipe_sampler_view **views)
{
   struct svga_context *svga = svga_context(pipe);
   unsigned flag_1d = 0;
   unsigned flag_srgb = 0;
   uint i;
   boolean any_change = FALSE;

   assert(shader < PIPE_SHADER_TYPES);
   assert(start + num <= ARRAY_SIZE(svga->curr.sampler_views[shader]));

   /* Pre-VGPU10 only supports FS textures */
   if (!svga_have_vgpu10(svga) && shader != PIPE_SHADER_FRAGMENT)
      return;

   SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_SETSAMPLERVIEWS);

   /* This bit of code works around a quirk in the CSO module.
    * If start=num=0 it means all sampler views should be released.
    * Note that the CSO module treats sampler views for fragment shaders
    * differently than other shader types.
    */
   if (start == 0 && num == 0 && svga->curr.num_sampler_views[shader] > 0) {
      for (i = 0; i < svga->curr.num_sampler_views[shader]; i++) {
         pipe_sampler_view_release(pipe, &svga->curr.sampler_views[shader][i]);
      }
      any_change = TRUE;
   }

   for (i = 0; i < num; i++) {
      enum pipe_texture_target target;

      if (svga->curr.sampler_views[shader][start + i] != views[i]) {
         /* Note: we're using pipe_sampler_view_release() here to work around
          * a possible crash when the old view belongs to another context that
          * was already destroyed.
          */
         pipe_sampler_view_release(pipe, &svga->curr.sampler_views[shader][start + i]);
         pipe_sampler_view_reference(&svga->curr.sampler_views[shader][start + i],
                                     views[i]);
         any_change = TRUE;
      }

      if (!views[i])
         continue;

      if (util_format_is_srgb(views[i]->format))
         flag_srgb |= 1 << (start + i);

      target = views[i]->target;
      if (target == PIPE_TEXTURE_1D) {
         flag_1d |= 1 << (start + i);
      } else if (target == PIPE_TEXTURE_RECT) {
         /* If the size of the bound texture changes, we need to emit new
          * const buffer values.
          */
         svga->dirty |= SVGA_NEW_TEXTURE_CONSTS;
      } else if (target == PIPE_BUFFER) {
         /* If the size of the bound buffer changes, we need to emit new
          * const buffer values.
          */
         svga->dirty |= SVGA_NEW_TEXTURE_CONSTS;
      }
   }

   if (!any_change) {
      goto done;
   }

   /* find highest non-null sampler_views[] entry */
   {
      unsigned j = MAX2(svga->curr.num_sampler_views[shader], start + num);
      while (j > 0 && svga->curr.sampler_views[shader][j - 1] == NULL)
         j--;
      svga->curr.num_sampler_views[shader] = j;
   }

   svga->dirty |= SVGA_NEW_TEXTURE_BINDING;

   if (flag_srgb != svga->curr.tex_flags.flag_srgb ||
       flag_1d != svga->curr.tex_flags.flag_1d) {
      svga->dirty |= SVGA_NEW_TEXTURE_FLAGS;
      svga->curr.tex_flags.flag_1d = flag_1d;
      svga->curr.tex_flags.flag_srgb = flag_srgb;
   }

   /* Check if any of the sampler view resources collide with the framebuffer
    * color buffers or depth stencil resource. If so, set the NEW_FRAME_BUFFER
    * dirty bit so that emit_framebuffer can be invoked to create backed view
    * for the conflicted surface view.
    */
   if (svga_check_sampler_framebuffer_resource_collision(svga, shader)) {
      svga->dirty |= SVGA_NEW_FRAME_BUFFER;
   }

done:
   SVGA_STATS_TIME_POP(svga_sws(svga));
}

/**
 * Clean up sampler, sampler view state at context destruction time
 */
void
svga_cleanup_sampler_state(struct svga_context *svga)
{
   enum pipe_shader_type shader;

   for (shader = 0; shader <= PIPE_SHADER_GEOMETRY; shader++) {
      unsigned i;

      for (i = 0; i < svga->state.hw_draw.num_sampler_views[shader]; i++) {
         pipe_sampler_view_release(&svga->pipe,
                                   &svga->state.hw_draw.sampler_views[shader][i]);
      }
   }
   
   /* free polygon stipple state */
   if (svga->polygon_stipple.sampler) {
      svga->pipe.delete_sampler_state(&svga->pipe, svga->polygon_stipple.sampler);
   }

   if (svga->polygon_stipple.sampler_view) {
      svga->pipe.sampler_view_destroy(&svga->pipe,
                                      &svga->polygon_stipple.sampler_view->base);
   }
   pipe_resource_reference(&svga->polygon_stipple.texture, NULL);
}

void
svga_init_sampler_functions( struct svga_context *svga )
{
   svga->pipe.create_sampler_state = svga_create_sampler_state;
   svga->pipe.bind_sampler_states = svga_bind_sampler_states;
   svga->pipe.delete_sampler_state = svga_delete_sampler_state;
   svga->pipe.set_sampler_views = svga_set_sampler_views;
   svga->pipe.create_sampler_view = svga_create_sampler_view;
   svga->pipe.sampler_view_destroy = svga_sampler_view_destroy;
}
예제 #11
-1
파일: fd4_program.c 프로젝트: etnaviv/mesa
void
fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit,
		int nr, struct pipe_surface **bufs)
{
	struct stage s[MAX_STAGES];
	uint32_t pos_regid, posz_regid, psize_regid, color_regid[8];
	uint32_t face_regid, coord_regid, zwcoord_regid;
	enum a3xx_threadsize fssz;
	int constmode;
	int i, j, k;

	debug_assert(nr <= ARRAY_SIZE(color_regid));

	if (emit->key.binning_pass)
		nr = 0;

	setup_stages(emit, s);

	fssz = (s[FS].i->max_reg >= 24) ? TWO_QUADS : FOUR_QUADS;

	/* blob seems to always use constmode currently: */
	constmode = 1;

	pos_regid = ir3_find_output_regid(s[VS].v, VARYING_SLOT_POS);
	if (pos_regid == regid(63, 0)) {
		/* hw dislikes when there is no position output, which can
		 * happen for transform-feedback vertex shaders.  Just tell
		 * the hw to use r0.x, with whatever random value is there:
		 */
		pos_regid = regid(0, 0);
	}
	posz_regid = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DEPTH);
	psize_regid = ir3_find_output_regid(s[VS].v, VARYING_SLOT_PSIZ);
	if (s[FS].v->color0_mrt) {
		color_regid[0] = color_regid[1] = color_regid[2] = color_regid[3] =
		color_regid[4] = color_regid[5] = color_regid[6] = color_regid[7] =
			ir3_find_output_regid(s[FS].v, FRAG_RESULT_COLOR);
	} else {
		color_regid[0] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA0);
		color_regid[1] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA1);
		color_regid[2] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA2);
		color_regid[3] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA3);
		color_regid[4] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA4);
		color_regid[5] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA5);
		color_regid[6] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA6);
		color_regid[7] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA7);
	}

	/* TODO get these dynamically: */
	face_regid = s[FS].v->frag_face ? regid(0,0) : regid(63,0);
	coord_regid = s[FS].v->frag_coord ? regid(0,0) : regid(63,0);
	zwcoord_regid = s[FS].v->frag_coord ? regid(0,2) : regid(63,0);

	/* we could probably divide this up into things that need to be
	 * emitted if frag-prog is dirty vs if vert-prog is dirty..
	 */

	OUT_PKT0(ring, REG_A4XX_HLSQ_UPDATE_CONTROL, 1);
	OUT_RING(ring, 0x00000003);

	OUT_PKT0(ring, REG_A4XX_HLSQ_CONTROL_0_REG, 5);
	OUT_RING(ring, A4XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(fssz) |
			A4XX_HLSQ_CONTROL_0_REG_CONSTMODE(constmode) |
			A4XX_HLSQ_CONTROL_0_REG_FSSUPERTHREADENABLE |
			/* NOTE:  I guess SHADERRESTART and CONSTFULLUPDATE maybe
			 * flush some caches? I think we only need to set those
			 * bits if we have updated const or shader..
			 */
			A4XX_HLSQ_CONTROL_0_REG_SPSHADERRESTART |
			A4XX_HLSQ_CONTROL_0_REG_SPCONSTFULLUPDATE);
	OUT_RING(ring, A4XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE(TWO_QUADS) |
			A4XX_HLSQ_CONTROL_1_REG_VSSUPERTHREADENABLE |
			A4XX_HLSQ_CONTROL_1_REG_COORDREGID(coord_regid) |
			A4XX_HLSQ_CONTROL_1_REG_ZWCOORDREGID(zwcoord_regid));
	OUT_RING(ring, A4XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(63) |
			0x3f3f000 |           /* XXX */
			A4XX_HLSQ_CONTROL_2_REG_FACEREGID(face_regid));
	OUT_RING(ring, A4XX_HLSQ_CONTROL_3_REG_REGID(s[FS].v->pos_regid) |
			0xfcfcfc00);
	OUT_RING(ring, 0x00fcfcfc);   /* XXX HLSQ_CONTROL_4 */

	OUT_PKT0(ring, REG_A4XX_HLSQ_VS_CONTROL_REG, 5);
	OUT_RING(ring, A4XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH(s[VS].constlen) |
			A4XX_HLSQ_VS_CONTROL_REG_CONSTOBJECTOFFSET(s[VS].constoff) |
			A4XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH(s[VS].instrlen) |
			A4XX_HLSQ_VS_CONTROL_REG_SHADEROBJOFFSET(s[VS].instroff));
	OUT_RING(ring, A4XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH(s[FS].constlen) |
			A4XX_HLSQ_FS_CONTROL_REG_CONSTOBJECTOFFSET(s[FS].constoff) |
			A4XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH(s[FS].instrlen) |
			A4XX_HLSQ_FS_CONTROL_REG_SHADEROBJOFFSET(s[FS].instroff));
	OUT_RING(ring, A4XX_HLSQ_HS_CONTROL_REG_CONSTLENGTH(s[HS].constlen) |
			A4XX_HLSQ_HS_CONTROL_REG_CONSTOBJECTOFFSET(s[HS].constoff) |
			A4XX_HLSQ_HS_CONTROL_REG_INSTRLENGTH(s[HS].instrlen) |
			A4XX_HLSQ_HS_CONTROL_REG_SHADEROBJOFFSET(s[HS].instroff));
	OUT_RING(ring, A4XX_HLSQ_DS_CONTROL_REG_CONSTLENGTH(s[DS].constlen) |
			A4XX_HLSQ_DS_CONTROL_REG_CONSTOBJECTOFFSET(s[DS].constoff) |
			A4XX_HLSQ_DS_CONTROL_REG_INSTRLENGTH(s[DS].instrlen) |
			A4XX_HLSQ_DS_CONTROL_REG_SHADEROBJOFFSET(s[DS].instroff));
	OUT_RING(ring, A4XX_HLSQ_GS_CONTROL_REG_CONSTLENGTH(s[GS].constlen) |
			A4XX_HLSQ_GS_CONTROL_REG_CONSTOBJECTOFFSET(s[GS].constoff) |
			A4XX_HLSQ_GS_CONTROL_REG_INSTRLENGTH(s[GS].instrlen) |
			A4XX_HLSQ_GS_CONTROL_REG_SHADEROBJOFFSET(s[GS].instroff));

	OUT_PKT0(ring, REG_A4XX_SP_SP_CTRL_REG, 1);
	OUT_RING(ring, 0x140010 | /* XXX */
			COND(emit->key.binning_pass, A4XX_SP_SP_CTRL_REG_BINNING_PASS));

	OUT_PKT0(ring, REG_A4XX_SP_INSTR_CACHE_CTRL, 1);
	OUT_RING(ring, 0x7f | /* XXX */
			COND(s[VS].instrlen, A4XX_SP_INSTR_CACHE_CTRL_VS_BUFFER) |
			COND(s[FS].instrlen, A4XX_SP_INSTR_CACHE_CTRL_FS_BUFFER) |
			COND(s[VS].instrlen && s[FS].instrlen,
					A4XX_SP_INSTR_CACHE_CTRL_INSTR_BUFFER));

	OUT_PKT0(ring, REG_A4XX_SP_VS_LENGTH_REG, 1);
	OUT_RING(ring, s[VS].v->instrlen);      /* SP_VS_LENGTH_REG */

	OUT_PKT0(ring, REG_A4XX_SP_VS_CTRL_REG0, 3);
	OUT_RING(ring, A4XX_SP_VS_CTRL_REG0_THREADMODE(MULTI) |
			A4XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(s[VS].i->max_half_reg + 1) |
			A4XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(s[VS].i->max_reg + 1) |
			A4XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP(0) |
			A4XX_SP_VS_CTRL_REG0_THREADSIZE(TWO_QUADS) |
			A4XX_SP_VS_CTRL_REG0_SUPERTHREADMODE |
			COND(s[VS].v->has_samp, A4XX_SP_VS_CTRL_REG0_PIXLODENABLE));
	OUT_RING(ring, A4XX_SP_VS_CTRL_REG1_CONSTLENGTH(s[VS].constlen) |
			A4XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING(s[VS].v->total_in));
	OUT_RING(ring, A4XX_SP_VS_PARAM_REG_POSREGID(pos_regid) |
			A4XX_SP_VS_PARAM_REG_PSIZEREGID(psize_regid) |
			A4XX_SP_VS_PARAM_REG_TOTALVSOUTVAR(s[FS].v->varying_in));

	for (i = 0, j = -1; (i < 16) && (j < (int)s[FS].v->inputs_count); i++) {
		uint32_t reg = 0;

		OUT_PKT0(ring, REG_A4XX_SP_VS_OUT_REG(i), 1);

		j = ir3_next_varying(s[FS].v, j);
		if (j < s[FS].v->inputs_count) {
			k = ir3_find_output(s[VS].v, s[FS].v->inputs[j].slot);
			reg |= A4XX_SP_VS_OUT_REG_A_REGID(s[VS].v->outputs[k].regid);
			reg |= A4XX_SP_VS_OUT_REG_A_COMPMASK(s[FS].v->inputs[j].compmask);
		}

		j = ir3_next_varying(s[FS].v, j);
		if (j < s[FS].v->inputs_count) {
			k = ir3_find_output(s[VS].v, s[FS].v->inputs[j].slot);
			reg |= A4XX_SP_VS_OUT_REG_B_REGID(s[VS].v->outputs[k].regid);
			reg |= A4XX_SP_VS_OUT_REG_B_COMPMASK(s[FS].v->inputs[j].compmask);
		}

		OUT_RING(ring, reg);
	}

	for (i = 0, j = -1; (i < 8) && (j < (int)s[FS].v->inputs_count); i++) {
		uint32_t reg = 0;

		OUT_PKT0(ring, REG_A4XX_SP_VS_VPC_DST_REG(i), 1);

		j = ir3_next_varying(s[FS].v, j);
		if (j < s[FS].v->inputs_count)
			reg |= A4XX_SP_VS_VPC_DST_REG_OUTLOC0(s[FS].v->inputs[j].inloc);
		j = ir3_next_varying(s[FS].v, j);
		if (j < s[FS].v->inputs_count)
			reg |= A4XX_SP_VS_VPC_DST_REG_OUTLOC1(s[FS].v->inputs[j].inloc);
		j = ir3_next_varying(s[FS].v, j);
		if (j < s[FS].v->inputs_count)
			reg |= A4XX_SP_VS_VPC_DST_REG_OUTLOC2(s[FS].v->inputs[j].inloc);
		j = ir3_next_varying(s[FS].v, j);
		if (j < s[FS].v->inputs_count)
			reg |= A4XX_SP_VS_VPC_DST_REG_OUTLOC3(s[FS].v->inputs[j].inloc);

		OUT_RING(ring, reg);
	}

	OUT_PKT0(ring, REG_A4XX_SP_VS_OBJ_OFFSET_REG, 2);
	OUT_RING(ring, A4XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(s[VS].constoff) |
			A4XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET(s[VS].instroff));
	OUT_RELOC(ring, s[VS].v->bo, 0, 0, 0);  /* SP_VS_OBJ_START_REG */

	if (emit->key.binning_pass) {
		OUT_PKT0(ring, REG_A4XX_SP_FS_LENGTH_REG, 1);
		OUT_RING(ring, 0x00000000);         /* SP_FS_LENGTH_REG */

		OUT_PKT0(ring, REG_A4XX_SP_FS_CTRL_REG0, 2);
		OUT_RING(ring, A4XX_SP_FS_CTRL_REG0_THREADMODE(MULTI) |
				COND(s[FS].v->total_in > 0, A4XX_SP_FS_CTRL_REG0_VARYING) |
				A4XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(0) |
				A4XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(0) |
				A4XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP(1) |
				A4XX_SP_FS_CTRL_REG0_THREADSIZE(fssz) |
				A4XX_SP_FS_CTRL_REG0_SUPERTHREADMODE);
		OUT_RING(ring, A4XX_SP_FS_CTRL_REG1_CONSTLENGTH(s[FS].constlen) |
				0x80000000);

		OUT_PKT0(ring, REG_A4XX_SP_FS_OBJ_OFFSET_REG, 2);
		OUT_RING(ring, A4XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(s[FS].constoff) |
				A4XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(s[FS].instroff));
		OUT_RING(ring, 0x00000000);
	} else {
		OUT_PKT0(ring, REG_A4XX_SP_FS_LENGTH_REG, 1);
		OUT_RING(ring, s[FS].v->instrlen);  /* SP_FS_LENGTH_REG */

		OUT_PKT0(ring, REG_A4XX_SP_FS_CTRL_REG0, 2);
		OUT_RING(ring, A4XX_SP_FS_CTRL_REG0_THREADMODE(MULTI) |
				COND(s[FS].v->total_in > 0, A4XX_SP_FS_CTRL_REG0_VARYING) |
				A4XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(s[FS].i->max_half_reg + 1) |
				A4XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(s[FS].i->max_reg + 1) |
				A4XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP(1) |
				A4XX_SP_FS_CTRL_REG0_THREADSIZE(fssz) |
				A4XX_SP_FS_CTRL_REG0_SUPERTHREADMODE |
				COND(s[FS].v->has_samp, A4XX_SP_FS_CTRL_REG0_PIXLODENABLE));
		OUT_RING(ring, A4XX_SP_FS_CTRL_REG1_CONSTLENGTH(s[FS].constlen) |
				0x80000000 |      /* XXX */
				COND(s[FS].v->frag_face, A4XX_SP_FS_CTRL_REG1_FACENESS) |
				COND(s[FS].v->total_in > 0, A4XX_SP_FS_CTRL_REG1_VARYING) |
				COND(s[FS].v->frag_coord, A4XX_SP_FS_CTRL_REG1_FRAGCOORD));

		OUT_PKT0(ring, REG_A4XX_SP_FS_OBJ_OFFSET_REG, 2);
		OUT_RING(ring, A4XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(s[FS].constoff) |
				A4XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(s[FS].instroff));
		OUT_RELOC(ring, s[FS].v->bo, 0, 0, 0);  /* SP_FS_OBJ_START_REG */
	}

	OUT_PKT0(ring, REG_A4XX_SP_HS_OBJ_OFFSET_REG, 1);
	OUT_RING(ring, A4XX_SP_HS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(s[HS].constoff) |
			A4XX_SP_HS_OBJ_OFFSET_REG_SHADEROBJOFFSET(s[HS].instroff));

	OUT_PKT0(ring, REG_A4XX_SP_DS_OBJ_OFFSET_REG, 1);
	OUT_RING(ring, A4XX_SP_DS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(s[DS].constoff) |
			A4XX_SP_DS_OBJ_OFFSET_REG_SHADEROBJOFFSET(s[DS].instroff));

	OUT_PKT0(ring, REG_A4XX_SP_GS_OBJ_OFFSET_REG, 1);
	OUT_RING(ring, A4XX_SP_GS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(s[GS].constoff) |
			A4XX_SP_GS_OBJ_OFFSET_REG_SHADEROBJOFFSET(s[GS].instroff));

	OUT_PKT0(ring, REG_A4XX_RB_RENDER_CONTROL2, 1);
	OUT_RING(ring, A4XX_RB_RENDER_CONTROL2_MSAA_SAMPLES(0) |
			COND(s[FS].v->total_in > 0, A4XX_RB_RENDER_CONTROL2_VARYING) |
			COND(s[FS].v->frag_face, A4XX_RB_RENDER_CONTROL2_FACENESS) |
			COND(s[FS].v->frag_coord, A4XX_RB_RENDER_CONTROL2_XCOORD |
					A4XX_RB_RENDER_CONTROL2_YCOORD |
					A4XX_RB_RENDER_CONTROL2_ZCOORD |
					A4XX_RB_RENDER_CONTROL2_WCOORD));

	OUT_PKT0(ring, REG_A4XX_RB_FS_OUTPUT_REG, 1);
	OUT_RING(ring, A4XX_RB_FS_OUTPUT_REG_MRT(nr) |
			COND(s[FS].v->writes_pos, A4XX_RB_FS_OUTPUT_REG_FRAG_WRITES_Z));

	OUT_PKT0(ring, REG_A4XX_SP_FS_OUTPUT_REG, 1);
	OUT_RING(ring, A4XX_SP_FS_OUTPUT_REG_MRT(nr) |
			COND(s[FS].v->writes_pos, A4XX_SP_FS_OUTPUT_REG_DEPTH_ENABLE) |
			A4XX_SP_FS_OUTPUT_REG_DEPTH_REGID(posz_regid));

	OUT_PKT0(ring, REG_A4XX_SP_FS_MRT_REG(0), 8);
	for (i = 0; i < 8; i++) {
		enum a4xx_color_fmt format = 0;
		bool srgb = false;
		if (i < nr) {
			format = fd4_emit_format(bufs[i]);
			if (bufs[i] && !emit->no_decode_srgb)
				srgb = util_format_is_srgb(bufs[i]->format);
		}
		OUT_RING(ring, A4XX_SP_FS_MRT_REG_REGID(color_regid[i]) |
				A4XX_SP_FS_MRT_REG_MRTFORMAT(format) |
				COND(srgb, A4XX_SP_FS_MRT_REG_COLOR_SRGB) |
				COND(emit->key.half_precision,
					A4XX_SP_FS_MRT_REG_HALF_PRECISION));
	}

	if (emit->key.binning_pass) {
		OUT_PKT0(ring, REG_A4XX_VPC_ATTR, 2);
		OUT_RING(ring, A4XX_VPC_ATTR_THRDASSIGN(1) |
				0x40000000 |      /* XXX */
				COND(s[VS].v->writes_psize, A4XX_VPC_ATTR_PSIZE));
		OUT_RING(ring, 0x00000000);
	} else {
		uint32_t vinterp[8], vpsrepl[8];

		memset(vinterp, 0, sizeof(vinterp));
		memset(vpsrepl, 0, sizeof(vpsrepl));

		/* looks like we need to do int varyings in the frag
		 * shader on a4xx (no flatshad reg?  or a420.0 bug?):
		 *
		 *    (sy)(ss)nop
		 *    (sy)ldlv.u32 r0.x,l[r0.x], 1
		 *    ldlv.u32 r0.y,l[r0.x+1], 1
		 *    (ss)bary.f (ei)r63.x, 0, r0.x
		 *    (ss)(rpt1)cov.s32f16 hr0.x, (r)r0.x
		 *    (rpt5)nop
		 *    sam (f16)(xyzw)hr0.x, hr0.x, s#0, t#0
		 *
		 * Possibly on later a4xx variants we'll be able to use
		 * something like the code below instead of workaround
		 * in the shader:
		 */
		/* figure out VARYING_INTERP / VARYING_PS_REPL register values: */
		for (j = -1; (j = ir3_next_varying(s[FS].v, j)) < (int)s[FS].v->inputs_count; ) {
			/* NOTE: varyings are packed, so if compmask is 0xb
			 * then first, third, and fourth component occupy
			 * three consecutive varying slots:
			 */
			unsigned compmask = s[FS].v->inputs[j].compmask;

			/* TODO might be cleaner to just +8 in SP_VS_VPC_DST_REG
			 * instead.. rather than -8 everywhere else..
			 */
			uint32_t inloc = s[FS].v->inputs[j].inloc - 8;

			if ((s[FS].v->inputs[j].interpolate == INTERP_MODE_FLAT) ||
					(s[FS].v->inputs[j].rasterflat && emit->rasterflat)) {
				uint32_t loc = inloc;

				for (i = 0; i < 4; i++) {
					if (compmask & (1 << i)) {
						vinterp[loc / 16] |= 1 << ((loc % 16) * 2);
						//flatshade[loc / 32] |= 1 << (loc % 32);
						loc++;
					}
				}
			}

			gl_varying_slot slot = s[FS].v->inputs[j].slot;

			/* since we don't enable PIPE_CAP_TGSI_TEXCOORD: */
			if (slot >= VARYING_SLOT_VAR0) {
				unsigned texmask = 1 << (slot - VARYING_SLOT_VAR0);
				/* Replace the .xy coordinates with S/T from the point sprite. Set
				 * interpolation bits for .zw such that they become .01
				 */
				if (emit->sprite_coord_enable & texmask) {
					/* mask is two 2-bit fields, where:
					 *   '01' -> S
					 *   '10' -> T
					 *   '11' -> 1 - T  (flip mode)
					 */
					unsigned mask = emit->sprite_coord_mode ? 0b1101 : 0b1001;
					uint32_t loc = inloc;
					if (compmask & 0x1) {
						vpsrepl[loc / 16] |= ((mask >> 0) & 0x3) << ((loc % 16) * 2);
						loc++;
					}
					if (compmask & 0x2) {
						vpsrepl[loc / 16] |= ((mask >> 2) & 0x3) << ((loc % 16) * 2);
						loc++;
					}
					if (compmask & 0x4) {
						/* .z <- 0.0f */
						vinterp[loc / 16] |= 0b10 << ((loc % 16) * 2);
						loc++;
					}
					if (compmask & 0x8) {
						/* .w <- 1.0f */
						vinterp[loc / 16] |= 0b11 << ((loc % 16) * 2);
						loc++;
					}
				}