static void
i915_texture_layout_2d(struct i915_texture *tex)
{
   struct pipe_resource *pt = &tex->b.b;
   unsigned level;
   unsigned width = util_next_power_of_two(pt->width0);
   unsigned height = util_next_power_of_two(pt->height0);
   unsigned nblocksy = util_format_get_nblocksy(pt->format, width);
   unsigned align_y = 2;

   if (util_format_is_s3tc(pt->format))
      align_y = 1;

   tex->stride = align(util_format_get_stride(pt->format, width), 4);
   tex->total_nblocksy = 0;

   for (level = 0; level <= pt->last_level; level++) {
      i915_texture_set_level_info(tex, level, 1);
      i915_texture_set_image_offset(tex, level, 0, 0, tex->total_nblocksy);

      tex->total_nblocksy += nblocksy;

      width = u_minify(width, 1);
      height = u_minify(height, 1);
      nblocksy = align_nblocksy(pt->format, height, align_y);
   }
}
예제 #2
0
struct pipe_video_decoder *
vl_create_decoder(struct pipe_context *pipe,
                  enum pipe_video_profile profile,
                  enum pipe_video_entrypoint entrypoint,
                  enum pipe_video_chroma_format chroma_format,
                  unsigned width, unsigned height, unsigned max_references,
                  bool expect_chunked_decode)
{
   unsigned buffer_width, buffer_height;
   bool pot_buffers;

   assert(pipe);
   assert(width > 0 && height > 0);
   
   pot_buffers = !pipe->screen->get_video_param
   (
      pipe->screen,
      profile,
      PIPE_VIDEO_CAP_NPOT_TEXTURES
   );

   buffer_width = pot_buffers ? util_next_power_of_two(width) : align(width, VL_MACROBLOCK_WIDTH);
   buffer_height = pot_buffers ? util_next_power_of_two(height) : align(height, VL_MACROBLOCK_HEIGHT);

   switch (u_reduce_video_profile(profile)) {
      case PIPE_VIDEO_CODEC_MPEG12:
         return vl_create_mpeg12_decoder(pipe, profile, entrypoint, chroma_format,
                                         buffer_width, buffer_height, max_references,
                                         expect_chunked_decode);
      default:
         return NULL;
   }
   return NULL;
}
예제 #3
0
/**
 * Round up the requested multisample count to the next supported sample size.
 */
static unsigned
framebuffer_quantize_num_samples(struct st_context *st, unsigned num_samples)
{
   struct pipe_screen *screen = st->pipe->screen;
   int quantized_samples = 0;
   unsigned msaa_mode;

   if (!num_samples)
      return 0;

   /* Assumes the highest supported MSAA is a power of 2 */
   msaa_mode = util_next_power_of_two(st->ctx->Const.MaxFramebufferSamples);
   assert(!(num_samples > msaa_mode)); /* be safe from infinite loops */

   /**
    * Check if the MSAA mode that is higher than the requested
    * num_samples is supported, and if so returning it.
    */
   for (; msaa_mode >= num_samples; msaa_mode = msaa_mode / 2) {
      /**
       * For ARB_framebuffer_no_attachment, A format of
       * PIPE_FORMAT_NONE implies what number of samples is
       * supported for a framebuffer with no attachment. Thus the
       * drivers callback must be adjusted for this.
       */
      if (screen->is_format_supported(screen, PIPE_FORMAT_NONE,
                                      PIPE_TEXTURE_2D, msaa_mode,
                                      PIPE_BIND_RENDER_TARGET))
         quantized_samples = msaa_mode;
   }
   return quantized_samples;
}
예제 #4
0
void r600_texture_get_cmask_info(struct r600_common_screen *rscreen,
				 struct r600_texture *rtex,
				 struct r600_cmask_info *out)
{
	unsigned cmask_tile_width = 8;
	unsigned cmask_tile_height = 8;
	unsigned cmask_tile_elements = cmask_tile_width * cmask_tile_height;
	unsigned element_bits = 4;
	unsigned cmask_cache_bits = 1024;
	unsigned num_pipes = rscreen->tiling_info.num_channels;
	unsigned pipe_interleave_bytes = rscreen->tiling_info.group_bytes;

	unsigned elements_per_macro_tile = (cmask_cache_bits / element_bits) * num_pipes;
	unsigned pixels_per_macro_tile = elements_per_macro_tile * cmask_tile_elements;
	unsigned sqrt_pixels_per_macro_tile = sqrt(pixels_per_macro_tile);
	unsigned macro_tile_width = util_next_power_of_two(sqrt_pixels_per_macro_tile);
	unsigned macro_tile_height = pixels_per_macro_tile / macro_tile_width;

	unsigned pitch_elements = align(rtex->surface.npix_x, macro_tile_width);
	unsigned height = align(rtex->surface.npix_y, macro_tile_height);

	unsigned base_align = num_pipes * pipe_interleave_bytes;
	unsigned slice_bytes =
		((pitch_elements * height * element_bits + 7) / 8) / cmask_tile_elements;

	assert(macro_tile_width % 128 == 0);
	assert(macro_tile_height % 128 == 0);

	out->slice_tile_max = ((pitch_elements * height) / (128*128)) - 1;
	out->alignment = MAX2(256, base_align);
	out->size = (util_max_layer(&rtex->resource.b.b, 0) + 1) *
		    align(slice_bytes, base_align);
}
/**
 * Cube layout used on i915 and for non-compressed textures on i945.
 */
static void
i9x5_texture_layout_cube(struct i915_texture *tex)
{
   struct pipe_resource *pt = &tex->b.b;
   unsigned width = util_next_power_of_two(pt->width0);
   const unsigned nblocks = util_format_get_nblocksx(pt->format, width);
   unsigned level;
   unsigned face;

   assert(pt->width0 == pt->height0); /* cubemap images are square */

   /* double pitch for cube layouts */
   tex->stride = align(nblocks * util_format_get_blocksize(pt->format) * 2, 4);
   tex->total_nblocksy = nblocks * 4;

   for (level = 0; level <= pt->last_level; level++)
      i915_texture_set_level_info(tex, level, 6);

   for (face = 0; face < 6; face++) {
      unsigned x = initial_offsets[face][0] * nblocks;
      unsigned y = initial_offsets[face][1] * nblocks;
      unsigned d = nblocks;

      for (level = 0; level <= pt->last_level; level++) {
         i915_texture_set_image_offset(tex, level, face, x, y);
         d >>= 1;
         x += step_offsets[face][0] * d;
         y += step_offsets[face][1] * d;
      }
   }
}
예제 #6
0
static void r600_setup_miptree(struct r600_screen *rscreen, struct r600_resource_texture *rtex)
{
	struct pipe_resource *ptex = &rtex->resource.base.b;
	unsigned long w, h, pitch, size, layer_size, i, offset;

	rtex->bpt = util_format_get_blocksize(ptex->format);
	for (i = 0, offset = 0; i <= ptex->last_level; i++) {
		w = u_minify(ptex->width0, i);
		h = u_minify(ptex->height0, i);
		h = util_next_power_of_two(h);
		pitch = util_format_get_stride(ptex->format, align(w, 64));
		pitch = align(pitch, 256);
		layer_size = pitch * h;
		if (ptex->target == PIPE_TEXTURE_CUBE)
			size = layer_size * 6;
		else
			size = layer_size * u_minify(ptex->depth0, i);
		rtex->offset[i] = offset;
		rtex->layer_size[i] = layer_size;
		rtex->pitch[i] = pitch;
		rtex->width[i] = w;
		rtex->height[i] = h;
		offset += size;
	}
	rtex->size = offset;
}
static void
i945_texture_layout_3d(struct i915_texture *tex)
{
   struct pipe_resource *pt = &tex->b.b;
   unsigned width = util_next_power_of_two(pt->width0);
   unsigned height = util_next_power_of_two(pt->height0);
   unsigned depth = util_next_power_of_two(pt->depth0);
   unsigned nblocksy = util_format_get_nblocksy(pt->format, width);
   unsigned pack_x_pitch, pack_x_nr;
   unsigned pack_y_pitch;
   unsigned level;

   tex->stride = align(util_format_get_stride(pt->format, width), 4);
   tex->total_nblocksy = 0;

   pack_y_pitch = MAX2(nblocksy, 2);
   pack_x_pitch = tex->stride / util_format_get_blocksize(pt->format);
   pack_x_nr = 1;

   for (level = 0; level <= pt->last_level; level++) {
      int x = 0;
      int y = 0;
      unsigned q, j;

      i915_texture_set_level_info(tex, level, depth);

      for (q = 0; q < depth;) {
         for (j = 0; j < pack_x_nr && q < depth; j++, q++) {
            i915_texture_set_image_offset(tex, level, q, x, y + tex->total_nblocksy);
            x += pack_x_pitch;
         }

         x = 0;
         y += pack_y_pitch;
      }

      tex->total_nblocksy += y;

      if (pack_x_pitch > 4) {
         pack_x_pitch >>= 1;
         pack_x_nr <<= 1;
         assert(pack_x_pitch * pack_x_nr * util_format_get_blocksize(pt->format) <= tex->stride);
      }

      if (pack_y_pitch > 2) {
         pack_y_pitch >>= 1;
      }
예제 #8
0
static unsigned mip_minify(unsigned size, unsigned level)
{
	unsigned val;
	val = u_minify(size, level);
	if (level > 0)
		val = util_next_power_of_two(val);
	return val;
}
static void
i915_texture_layout_3d(struct i915_texture *tex)
{
   struct pipe_resource *pt = &tex->b.b;
   unsigned level;

   unsigned width = util_next_power_of_two(pt->width0);
   unsigned height = util_next_power_of_two(pt->height0);
   unsigned depth = util_next_power_of_two(pt->depth0);
   unsigned nblocksy = util_format_get_nblocksy(pt->format, height);
   unsigned stack_nblocksy = 0;

   /* Calculate the size of a single slice. 
    */
   tex->stride = align(util_format_get_stride(pt->format, width), 4);

   /* XXX: hardware expects/requires 9 levels at minimum.
    */
   for (level = 0; level <= MAX2(8, pt->last_level); level++) {
      i915_texture_set_level_info(tex, level, depth);

      stack_nblocksy += MAX2(2, nblocksy);

      width = u_minify(width, 1);
      height = u_minify(height, 1);
      nblocksy = util_format_get_nblocksy(pt->format, height);
   }

   /* Fixup depth image_offsets: 
    */
   for (level = 0; level <= pt->last_level; level++) {
      unsigned i;
      for (i = 0; i < depth; i++) 
         i915_texture_set_image_offset(tex, level, i, 0, i * stack_nblocksy);

      depth = u_minify(depth, 1);
   }

   /* Multiply slice size by texture depth for total size.  It's
    * remarkable how wasteful of memory the i915 texture layouts
    * are.  They are largely fixed in the i945.
    */
   tex->total_nblocksy = stack_nblocksy * util_next_power_of_two(pt->depth0);
}
예제 #10
0
struct pipe_video_buffer *
vl_video_buffer_create(struct pipe_context *pipe,
                       const struct pipe_video_buffer *tmpl)
{
   const enum pipe_format *resource_formats;
   struct pipe_video_buffer templat, *result;
   bool pot_buffers;

   assert(pipe);
   assert(tmpl->width > 0 && tmpl->height > 0);

   pot_buffers = !pipe->screen->get_video_param
   (
      pipe->screen,
      PIPE_VIDEO_PROFILE_UNKNOWN,
      PIPE_VIDEO_CAP_NPOT_TEXTURES
   );

   resource_formats = vl_video_buffer_formats(pipe->screen, tmpl->buffer_format);
   if (!resource_formats)
      return NULL;

   templat = *tmpl;
   templat.width = pot_buffers ? util_next_power_of_two(tmpl->width)
                 : align(tmpl->width, VL_MACROBLOCK_WIDTH);
   templat.height = pot_buffers ? util_next_power_of_two(tmpl->height)
                  : align(tmpl->height, VL_MACROBLOCK_HEIGHT);

   if (tmpl->interlaced)
      templat.height /= 2;

   result = vl_video_buffer_create_ex
   (
      pipe, &templat, resource_formats,
      1, tmpl->interlaced ? 2 : 1, PIPE_USAGE_STATIC
   );


   if (result && tmpl->interlaced)
      result->height *= 2;

   return result;
}
예제 #11
0
파일: cayman_msaa.c 프로젝트: dumbbell/mesa
void cayman_emit_msaa_config(struct radeon_winsys_cs *cs, int nr_samples,
			     int ps_iter_samples, int overrast_samples)
{
	int setup_samples = nr_samples > 1 ? nr_samples :
			    overrast_samples > 1 ? overrast_samples : 0;

	if (setup_samples > 1) {
		/* indexed by log2(nr_samples) */
		unsigned max_dist[] = {
			0,
			eg_max_dist_2x,
			eg_max_dist_4x,
			cm_max_dist_8x,
			cm_max_dist_16x
		};
		unsigned log_samples = util_logbase2(setup_samples);
		unsigned log_ps_iter_samples =
			util_logbase2(util_next_power_of_two(ps_iter_samples));

		radeon_set_context_reg_seq(cs, CM_R_028BDC_PA_SC_LINE_CNTL, 2);
		radeon_emit(cs, S_028BDC_LAST_PIXEL(1) |
			    S_028BDC_EXPAND_LINE_WIDTH(1)); /* CM_R_028BDC_PA_SC_LINE_CNTL */
		radeon_emit(cs, S_028BE0_MSAA_NUM_SAMPLES(log_samples) |
			    S_028BE0_MAX_SAMPLE_DIST(max_dist[log_samples]) |
			    S_028BE0_MSAA_EXPOSED_SAMPLES(log_samples)); /* CM_R_028BE0_PA_SC_AA_CONFIG */

		if (nr_samples > 1) {
			radeon_set_context_reg(cs, CM_R_028804_DB_EQAA,
					       S_028804_MAX_ANCHOR_SAMPLES(log_samples) |
					       S_028804_PS_ITER_SAMPLES(log_ps_iter_samples) |
					       S_028804_MASK_EXPORT_NUM_SAMPLES(log_samples) |
					       S_028804_ALPHA_TO_MASK_NUM_SAMPLES(log_samples) |
					       S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
					       S_028804_STATIC_ANCHOR_ASSOCIATIONS(1));
			radeon_set_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1,
					     EG_S_028A4C_PS_ITER_SAMPLE(ps_iter_samples > 1));
		} else if (overrast_samples > 1) {
			radeon_set_context_reg(cs, CM_R_028804_DB_EQAA,
					       S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
					       S_028804_STATIC_ANCHOR_ASSOCIATIONS(1) |
					       S_028804_OVERRASTERIZATION_AMOUNT(log_samples));
			radeon_set_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1, 0);
		}
	} else {
		radeon_set_context_reg_seq(cs, CM_R_028BDC_PA_SC_LINE_CNTL, 2);
		radeon_emit(cs, S_028BDC_LAST_PIXEL(1)); /* CM_R_028BDC_PA_SC_LINE_CNTL */
		radeon_emit(cs, 0); /* CM_R_028BE0_PA_SC_AA_CONFIG */

		radeon_set_context_reg(cs, CM_R_028804_DB_EQAA,
				       S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
				       S_028804_STATIC_ANCHOR_ASSOCIATIONS(1));
		radeon_set_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1, 0);
	}
}
예제 #12
0
static unsigned r300_texture_get_nblocksy(struct r300_resource *tex,
                                          unsigned level,
                                          boolean *out_aligned_for_cbzb)
{
    unsigned height, tile_height;

    height = u_minify(tex->tex.height0, level);

    /* Mipmapped and 3D textures must have their height aligned to POT. */
    if ((tex->b.b.b.target != PIPE_TEXTURE_1D &&
         tex->b.b.b.target != PIPE_TEXTURE_2D &&
         tex->b.b.b.target != PIPE_TEXTURE_RECT) ||
        tex->b.b.b.last_level != 0) {
        height = util_next_power_of_two(height);
    }

    if (util_format_is_plain(tex->b.b.b.format)) {
        tile_height = r300_get_pixel_alignment(tex->b.b.b.format,
                                               tex->b.b.b.nr_samples,
                                               tex->tex.microtile,
                                               tex->tex.macrotile[level],
                                               DIM_HEIGHT, 0);
        height = align(height, tile_height);

        /* See if the CBZB clear can be used on the buffer,
         * taking the texture size into account. */
        if (out_aligned_for_cbzb) {
            if (tex->tex.macrotile[level]) {
                /* When clearing, the layer (width*height) is horizontally split
                 * into two, and the upper and lower halves are cleared by the CB
                 * and ZB units, respectively. Therefore, the number of macrotiles
                 * in the Y direction must be even. */

                /* Align the height so that there is an even number of macrotiles.
                 * Do so for 3 or more macrotiles in the Y direction. */
                if (level == 0 && tex->b.b.b.last_level == 0 &&
                    (tex->b.b.b.target == PIPE_TEXTURE_1D ||
                     tex->b.b.b.target == PIPE_TEXTURE_2D ||
                     tex->b.b.b.target == PIPE_TEXTURE_RECT) &&
                    height >= tile_height * 3) {
                    height = align(height, tile_height * 2);
                }

                *out_aligned_for_cbzb = height % (tile_height * 2) == 0;
            } else {
                *out_aligned_for_cbzb = FALSE;
            }
        }
    }

    return util_format_get_nblocksy(tex->b.b.b.format, height);
}
예제 #13
0
struct pipe_video_buffer *
vl_video_buffer_create(struct pipe_context *pipe,
                       enum pipe_format buffer_format,
                       enum pipe_video_chroma_format chroma_format,
                       unsigned width, unsigned height)
{
   const enum pipe_format *resource_formats;
   struct pipe_video_buffer *result;
   unsigned buffer_width, buffer_height;
   bool pot_buffers;

   assert(pipe);
   assert(width > 0 && height > 0);

   pot_buffers = !pipe->screen->get_video_param
   (
      pipe->screen,
      PIPE_VIDEO_PROFILE_UNKNOWN,
      PIPE_VIDEO_CAP_NPOT_TEXTURES
   );

   resource_formats = vl_video_buffer_formats(pipe->screen, buffer_format);
   if (!resource_formats)
      return NULL;

   buffer_width = pot_buffers ? util_next_power_of_two(width) : align(width, MACROBLOCK_WIDTH);
   buffer_height = pot_buffers ? util_next_power_of_two(height) : align(height, MACROBLOCK_HEIGHT);

   result = vl_video_buffer_create_ex
   (
      pipe, buffer_width, buffer_height, 1,
      chroma_format, resource_formats, PIPE_USAGE_STATIC
   );
   if (result)
      result->buffer_format = buffer_format;

   return result;
}
예제 #14
0
파일: nv50_screen.c 프로젝트: UIKit0/mesa-1
static int nv50_tls_alloc(struct nv50_screen *screen, unsigned tls_space,
      uint64_t *tls_size)
{
   struct nouveau_device *dev = screen->base.device;
   int ret;

   screen->cur_tls_space = util_next_power_of_two(tls_space / ONE_TEMP_SIZE) *
         ONE_TEMP_SIZE;
   if (nouveau_mesa_debug)
      debug_printf("allocating space for %u temps\n",
            util_next_power_of_two(tls_space / ONE_TEMP_SIZE));
   *tls_size = screen->cur_tls_space * util_next_power_of_two(screen->TPs) *
         screen->MPsInTP * LOCAL_WARPS_ALLOC * THREADS_IN_WARP;

   ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16,
                        *tls_size, NULL, &screen->tls_bo);
   if (ret) {
      NOUVEAU_ERR("Failed to allocate local bo: %d\n", ret);
      return ret;
   }

   return 0;
}
예제 #15
0
void r600_test_dma(struct r600_common_screen *rscreen)
{
	struct pipe_screen *screen = &rscreen->b;
	struct pipe_context *ctx = screen->context_create(screen, NULL, 0);
	struct r600_common_context *rctx = (struct r600_common_context*)ctx;
	uint64_t max_alloc_size;
	unsigned i, iterations, num_partial_copies, max_levels, max_tex_side;
	unsigned num_pass = 0, num_fail = 0;

	max_levels = screen->get_param(screen, PIPE_CAP_MAX_TEXTURE_2D_LEVELS);
	max_tex_side = 1 << (max_levels - 1);

	/* Max 128 MB allowed for both textures. */
	max_alloc_size = 128 * 1024 * 1024;

	/* the seed for random test parameters */
	srand(0x9b47d95b);
	/* the seed for random pixel data */
	s_rand_xorshift128plus(seed_xorshift128plus, false);

	iterations = 1000000000; /* just kill it when you are bored */
	num_partial_copies = 30;

	/* These parameters are randomly generated per test:
	 * - whether to do one whole-surface copy or N partial copies per test
	 * - which tiling modes to use (LINEAR_ALIGNED, 1D, 2D)
	 * - which texture dimensions to use
	 * - whether to use VRAM (all tiling modes) and GTT (staging, linear
	 *   only) allocations
	 * - random initial pixels in src
	 * - generate random subrectangle copies for partial blits
	 */
	for (i = 0; i < iterations; i++) {
		struct pipe_resource tsrc = {}, tdst = {}, *src, *dst;
		struct r600_texture *rdst;
		struct r600_texture *rsrc;
		struct cpu_texture src_cpu, dst_cpu;
		unsigned bpp, max_width, max_height, max_depth, j, num;
		unsigned gfx_blits = 0, dma_blits = 0, max_tex_side_gen;
		unsigned max_tex_layers;
		bool pass;
		bool do_partial_copies = rand() & 1;

		/* generate a random test case */
		tsrc.target = tdst.target = PIPE_TEXTURE_2D_ARRAY;
		tsrc.depth0 = tdst.depth0 = 1;

		bpp = 1 << (rand() % 5);
		tsrc.format = tdst.format = get_format_from_bpp(bpp);

		max_tex_side_gen = generate_max_tex_side(max_tex_side);
		max_tex_layers = rand() % 4 ? 1 : 5;

		tsrc.width0 = (rand() % max_tex_side_gen) + 1;
		tsrc.height0 = (rand() % max_tex_side_gen) + 1;
		tsrc.array_size = (rand() % max_tex_layers) + 1;

		/* Have a 1/4 chance of getting power-of-two dimensions. */
		if (rand() % 4 == 0) {
			tsrc.width0 = util_next_power_of_two(tsrc.width0);
			tsrc.height0 = util_next_power_of_two(tsrc.height0);
		}

		if (!do_partial_copies) {
			/* whole-surface copies only, same dimensions */
			tdst = tsrc;
		} else {
			max_tex_side_gen = generate_max_tex_side(max_tex_side);
			max_tex_layers = rand() % 4 ? 1 : 5;

			/* many partial copies, dimensions can be different */
			tdst.width0 = (rand() % max_tex_side_gen) + 1;
			tdst.height0 = (rand() % max_tex_side_gen) + 1;
			tdst.array_size = (rand() % max_tex_layers) + 1;

			/* Have a 1/4 chance of getting power-of-two dimensions. */
			if (rand() % 4 == 0) {
				tdst.width0 = util_next_power_of_two(tdst.width0);
				tdst.height0 = util_next_power_of_two(tdst.height0);
			}
		}

		/* check texture sizes */
		if ((uint64_t)tsrc.width0 * tsrc.height0 * tsrc.array_size * bpp +
		    (uint64_t)tdst.width0 * tdst.height0 * tdst.array_size * bpp >
		    max_alloc_size) {
			/* too large, try again */
			i--;
			continue;
		}

		/* VRAM + the tiling mode depends on dimensions (3/4 of cases),
		 * or GTT + linear only (1/4 of cases)
		 */
		tsrc.usage = rand() % 4 ? PIPE_USAGE_DEFAULT : PIPE_USAGE_STAGING;
		tdst.usage = rand() % 4 ? PIPE_USAGE_DEFAULT : PIPE_USAGE_STAGING;

		/* Allocate textures (both the GPU and CPU copies).
		 * The CPU will emulate what the GPU should be doing.
		 */
		src = screen->resource_create(screen, &tsrc);
		dst = screen->resource_create(screen, &tdst);
		assert(src);
		assert(dst);
		rdst = (struct r600_texture*)dst;
		rsrc = (struct r600_texture*)src;
		alloc_cpu_texture(&src_cpu, &tsrc, bpp);
		alloc_cpu_texture(&dst_cpu, &tdst, bpp);

		printf("%4u: dst = (%5u x %5u x %u, %s), "
		       " src = (%5u x %5u x %u, %s), bpp = %2u, ",
		       i, tdst.width0, tdst.height0, tdst.array_size,
		       array_mode_to_string(rscreen, &rdst->surface),
		       tsrc.width0, tsrc.height0, tsrc.array_size,
		       array_mode_to_string(rscreen, &rsrc->surface), bpp);
		fflush(stdout);

		/* set src pixels */
		set_random_pixels(ctx, src, &src_cpu);

		/* clear dst pixels */
		rctx->clear_buffer(ctx, dst, 0, rdst->surface.surf_size, 0, true);
		memset(dst_cpu.ptr, 0, dst_cpu.layer_stride * tdst.array_size);

		/* preparation */
		max_width = MIN2(tsrc.width0, tdst.width0);
		max_height = MIN2(tsrc.height0, tdst.height0);
		max_depth = MIN2(tsrc.array_size, tdst.array_size);

		num = do_partial_copies ? num_partial_copies : 1;
		for (j = 0; j < num; j++) {
			int width, height, depth;
			int srcx, srcy, srcz, dstx, dsty, dstz;
			struct pipe_box box;
			unsigned old_num_draw_calls = rctx->num_draw_calls;
			unsigned old_num_dma_calls = rctx->num_dma_calls;

			if (!do_partial_copies) {
				/* copy whole src to dst */
				width = max_width;
				height = max_height;
				depth = max_depth;

				srcx = srcy = srcz = dstx = dsty = dstz = 0;
			} else {
				/* random sub-rectangle copies from src to dst */
				depth = (rand() % max_depth) + 1;
				srcz = rand() % (tsrc.array_size - depth + 1);
				dstz = rand() % (tdst.array_size - depth + 1);

				/* special code path to hit the tiled partial copies */
				if (!rsrc->surface.is_linear &&
				    !rdst->surface.is_linear &&
				    rand() & 1) {
					if (max_width < 8 || max_height < 8)
						continue;
					width = ((rand() % (max_width / 8)) + 1) * 8;
					height = ((rand() % (max_height / 8)) + 1) * 8;

					srcx = rand() % (tsrc.width0 - width + 1) & ~0x7;
					srcy = rand() % (tsrc.height0 - height + 1) & ~0x7;

					dstx = rand() % (tdst.width0 - width + 1) & ~0x7;
					dsty = rand() % (tdst.height0 - height + 1) & ~0x7;
				} else {
					/* just make sure that it doesn't divide by zero */
					assert(max_width > 0 && max_height > 0);

					width = (rand() % max_width) + 1;
					height = (rand() % max_height) + 1;

					srcx = rand() % (tsrc.width0 - width + 1);
					srcy = rand() % (tsrc.height0 - height + 1);

					dstx = rand() % (tdst.width0 - width + 1);
					dsty = rand() % (tdst.height0 - height + 1);
				}

				/* special code path to hit out-of-bounds reads in L2T */
				if (rsrc->surface.is_linear &&
				    !rdst->surface.is_linear &&
				    rand() % 4 == 0) {
					srcx = 0;
					srcy = 0;
					srcz = 0;
				}
			}

			/* GPU copy */
			u_box_3d(srcx, srcy, srcz, width, height, depth, &box);
			rctx->dma_copy(ctx, dst, 0, dstx, dsty, dstz, src, 0, &box);

			/* See which engine was used. */
			gfx_blits += rctx->num_draw_calls > old_num_draw_calls;
			dma_blits += rctx->num_dma_calls > old_num_dma_calls;

			/* CPU copy */
			util_copy_box(dst_cpu.ptr, tdst.format, dst_cpu.stride,
				      dst_cpu.layer_stride,
				      dstx, dsty, dstz, width, height, depth,
				      src_cpu.ptr, src_cpu.stride,
				      src_cpu.layer_stride,
				      srcx, srcy, srcz);
		}

		pass = compare_textures(ctx, dst, &dst_cpu, bpp);
		if (pass)
			num_pass++;
		else
			num_fail++;

		printf("BLITs: GFX = %2u, DMA = %2u, %s [%u/%u]\n",
		       gfx_blits, dma_blits, pass ? "pass" : "fail",
		       num_pass, num_pass+num_fail);

		/* cleanup */
		pipe_resource_reference(&src, NULL);
		pipe_resource_reference(&dst, NULL);
		free(src_cpu.ptr);
		free(dst_cpu.ptr);
	}

	ctx->destroy(ctx);
	exit(0);
}
static void
i945_texture_layout_2d(struct i915_texture *tex)
{
   struct pipe_resource *pt = &tex->b.b;
   int align_x = 4, align_y = 2;
   unsigned level;
   unsigned x = 0;
   unsigned y = 0;
   unsigned width = util_next_power_of_two(pt->width0);
   unsigned height = util_next_power_of_two(pt->height0);
   unsigned nblocksx = util_format_get_nblocksx(pt->format, width);
   unsigned nblocksy = util_format_get_nblocksy(pt->format, height);

   if (util_format_is_s3tc(pt->format)) {
      align_x = 1;
      align_y = 1;
   }

   tex->stride = align(util_format_get_stride(pt->format, width), 4);

   /* May need to adjust pitch to accomodate the placement of
    * the 2nd mipmap level.  This occurs when the alignment
    * constraints of mipmap placement push the right edge of the
    * 2nd mipmap level out past the width of its parent.
    */
   if (pt->last_level > 0) {
      unsigned mip1_nblocksx =
         align_nblocksx(pt->format, u_minify(width, 1), align_x) +
         util_format_get_nblocksx(pt->format, u_minify(width, 2));

      if (mip1_nblocksx > nblocksx)
         tex->stride = mip1_nblocksx * util_format_get_blocksize(pt->format);
   }

   /* Pitch must be a whole number of dwords
    */
   tex->stride = align(tex->stride, 64);
   tex->total_nblocksy = 0;

   for (level = 0; level <= pt->last_level; level++) {
      i915_texture_set_level_info(tex, level, 1);
      i915_texture_set_image_offset(tex, level, 0, x, y);

      /* Because the images are packed better, the final offset
       * might not be the maximal one:
       */
      tex->total_nblocksy = MAX2(tex->total_nblocksy, y + nblocksy);

      /* Layout_below: step right after second mipmap level.
       */
      if (level == 1) {
         x += nblocksx;
      } else {
         y += nblocksy;
      }

      width  = u_minify(width, 1);
      height = u_minify(height, 1);
      nblocksx = align_nblocksx(pt->format, width, align_x);
      nblocksy = align_nblocksy(pt->format, height, align_y);
   }
}
예제 #17
0
void r300_texture_desc_init(struct r300_screen *rscreen,
                            struct r300_resource *tex,
                            const struct pipe_resource *base)
{
    tex->b.b.target = base->target;
    tex->b.b.format = base->format;
    tex->b.b.width0 = base->width0;
    tex->b.b.height0 = base->height0;
    tex->b.b.depth0 = base->depth0;
    tex->b.b.array_size = base->array_size;
    tex->b.b.last_level = base->last_level;
    tex->b.b.nr_samples = base->nr_samples;
    tex->tex.width0 = base->width0;
    tex->tex.height0 = base->height0;
    tex->tex.depth0 = base->depth0;

    /* There is a CB memory addressing hardware bug that limits the width
     * of the MSAA buffer in some cases in R520. In order to get around it,
     * the following code lowers the sample count depending on the format and
     * the width.
     *
     * The only catch is that all MSAA colorbuffers and a zbuffer which are
     * supposed to be used together should always be bound together. Only
     * then the correct minimum sample count of all bound buffers is used
     * for rendering. */
    if (rscreen->caps.is_r500) {
        /* FP16 6x MSAA buffers are limited to a width of 1360 pixels. */
        if ((tex->b.b.format == PIPE_FORMAT_R16G16B16A16_FLOAT ||
             tex->b.b.format == PIPE_FORMAT_R16G16B16X16_FLOAT) &&
            tex->b.b.nr_samples == 6 && tex->b.b.width0 > 1360) {
            tex->b.b.nr_samples = 4;
        }

        /* FP16 4x MSAA buffers are limited to a width of 2048 pixels. */
        if ((tex->b.b.format == PIPE_FORMAT_R16G16B16A16_FLOAT ||
             tex->b.b.format == PIPE_FORMAT_R16G16B16X16_FLOAT) &&
            tex->b.b.nr_samples == 4 && tex->b.b.width0 > 2048) {
            tex->b.b.nr_samples = 2;
        }
    }

    /* 32-bit 6x MSAA buffers are limited to a width of 2720 pixels.
     * This applies to all R300-R500 cards. */
    if (util_format_get_blocksizebits(tex->b.b.format) == 32 &&
        !util_format_is_depth_or_stencil(tex->b.b.format) &&
        tex->b.b.nr_samples == 6 && tex->b.b.width0 > 2720) {
        tex->b.b.nr_samples = 4;
    }

    r300_setup_flags(tex);

    /* Align a 3D NPOT texture to POT. */
    if (base->target == PIPE_TEXTURE_3D && tex->tex.is_npot) {
        tex->tex.width0 = util_next_power_of_two(tex->tex.width0);
        tex->tex.height0 = util_next_power_of_two(tex->tex.height0);
        tex->tex.depth0 = util_next_power_of_two(tex->tex.depth0);
    }

    /* Setup tiling. */
    if (tex->tex.microtile == RADEON_LAYOUT_UNKNOWN) {
        r300_setup_tiling(rscreen, tex);
    }

    r300_setup_cbzb_flags(rscreen, tex);

    /* Setup the miptree description. */
    r300_setup_miptree(rscreen, tex, TRUE);
    /* If the required buffer size is larger than the given max size,
     * try again without the alignment for the CBZB clear. */
    if (tex->buf && tex->tex.size_in_bytes > tex->buf->size) {
        r300_setup_miptree(rscreen, tex, FALSE);

        /* Make sure the buffer we got is large enough. */
        if (tex->tex.size_in_bytes > tex->buf->size) {
            fprintf(stderr,
                "r300: I got a pre-allocated buffer to use it as a texture "
                "storage, but the buffer is too small. I'll use the buffer "
                "anyway, because I can't crash here, but it's dangerous. "
                "This can be a DDX bug. Got: %iB, Need: %iB, Info:\n",
                tex->buf->size, tex->tex.size_in_bytes);
            r300_tex_print_info(tex, "texture_desc_init");
            /* Ooops, what now. Apps will break if we fail this,
             * so just pretend everything's okay. */
        }
    }

    r300_setup_hyperz_properties(rscreen, tex);
    r300_setup_cmask_properties(rscreen, tex);

    if (SCREEN_DBG_ON(rscreen, DBG_TEX))
        r300_tex_print_info(tex, "texture_desc_init");
}
struct pipe_video_decoder *
vl_create_mpeg12_decoder(struct pipe_context *context,
                         enum pipe_video_profile profile,
                         enum pipe_video_entrypoint entrypoint,
                         enum pipe_video_chroma_format chroma_format,
                         unsigned width, unsigned height, unsigned max_references,
                         bool expect_chunked_decode)
{
   const unsigned block_size_pixels = VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT;
   const struct format_config *format_config;
   struct vl_mpeg12_decoder *dec;

   assert(u_reduce_video_profile(profile) == PIPE_VIDEO_CODEC_MPEG12);

   dec = CALLOC_STRUCT(vl_mpeg12_decoder);

   if (!dec)
      return NULL;

   dec->base.context = context;
   dec->base.profile = profile;
   dec->base.entrypoint = entrypoint;
   dec->base.chroma_format = chroma_format;
   dec->base.width = width;
   dec->base.height = height;
   dec->base.max_references = max_references;

   dec->base.destroy = vl_mpeg12_destroy;
   dec->base.begin_frame = vl_mpeg12_begin_frame;
   dec->base.decode_macroblock = vl_mpeg12_decode_macroblock;
   dec->base.decode_bitstream = vl_mpeg12_decode_bitstream;
   dec->base.end_frame = vl_mpeg12_end_frame;
   dec->base.flush = vl_mpeg12_flush;

   dec->blocks_per_line = MAX2(util_next_power_of_two(dec->base.width) / block_size_pixels, 4);
   dec->num_blocks = (dec->base.width * dec->base.height) / block_size_pixels;
   dec->width_in_macroblocks = align(dec->base.width, VL_MACROBLOCK_WIDTH) / VL_MACROBLOCK_WIDTH;
   dec->expect_chunked_decode = expect_chunked_decode;

   /* TODO: Implement 422, 444 */
   assert(dec->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420);

   if (dec->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) {
      dec->chroma_width = dec->base.width / 2;
      dec->chroma_height = dec->base.height / 2;
      dec->num_blocks = dec->num_blocks * 2;
   } else if (dec->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_422) {
      dec->chroma_width = dec->base.width;
      dec->chroma_height = dec->base.height / 2;
      dec->num_blocks = dec->num_blocks * 2 + dec->num_blocks;
   } else {
      dec->chroma_width = dec->base.width;
      dec->chroma_height = dec->base.height;
      dec->num_blocks = dec->num_blocks * 3;
   }

   dec->quads = vl_vb_upload_quads(dec->base.context);
   dec->pos = vl_vb_upload_pos(
      dec->base.context,
      dec->base.width / VL_MACROBLOCK_WIDTH,
      dec->base.height / VL_MACROBLOCK_HEIGHT
   );

   dec->ves_ycbcr = vl_vb_get_ves_ycbcr(dec->base.context);
   dec->ves_mv = vl_vb_get_ves_mv(dec->base.context);

   switch (entrypoint) {
   case PIPE_VIDEO_ENTRYPOINT_BITSTREAM:
      format_config = find_format_config(dec, bitstream_format_config, num_bitstream_format_configs);
      break;

   case PIPE_VIDEO_ENTRYPOINT_IDCT:
      format_config = find_format_config(dec, idct_format_config, num_idct_format_configs);
      break;

   case PIPE_VIDEO_ENTRYPOINT_MC:
      format_config = find_format_config(dec, mc_format_config, num_mc_format_configs);
      break;

   default:
      assert(0);
      FREE(dec);
      return NULL;
   }

   if (!format_config) {
      FREE(dec);
      return NULL;
   }

   if (!init_zscan(dec, format_config))
      goto error_zscan;

   if (entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) {
      if (!init_idct(dec, format_config))
         goto error_sources;
   } else {
      if (!init_mc_source_widthout_idct(dec, format_config))
         goto error_sources;
   }

   if (!vl_mc_init(&dec->mc_y, dec->base.context, dec->base.width, dec->base.height,
                   VL_MACROBLOCK_HEIGHT, format_config->mc_scale,
                   mc_vert_shader_callback, mc_frag_shader_callback, dec))
      goto error_mc_y;

   // TODO
   if (!vl_mc_init(&dec->mc_c, dec->base.context, dec->base.width, dec->base.height,
                   VL_BLOCK_HEIGHT, format_config->mc_scale,
                   mc_vert_shader_callback, mc_frag_shader_callback, dec))
      goto error_mc_c;

   if (!init_pipe_state(dec))
      goto error_pipe_state;

   return &dec->base;

error_pipe_state:
   vl_mc_cleanup(&dec->mc_c);

error_mc_c:
   vl_mc_cleanup(&dec->mc_y);

error_mc_y:
   if (entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) {
      vl_idct_cleanup(&dec->idct_y);
      vl_idct_cleanup(&dec->idct_c);
      dec->idct_source->destroy(dec->idct_source);
   }
   dec->mc_source->destroy(dec->mc_source);

error_sources:
   vl_zscan_cleanup(&dec->zscan_y);
   vl_zscan_cleanup(&dec->zscan_c);

error_zscan:
   FREE(dec);
   return NULL;
}
static void
vc4_setup_slices(struct vc4_resource *rsc)
{
    struct pipe_resource *prsc = &rsc->base.b;
    uint32_t width = prsc->width0;
    uint32_t height = prsc->height0;
    uint32_t pot_width = util_next_power_of_two(width);
    uint32_t pot_height = util_next_power_of_two(height);
    uint32_t offset = 0;
    uint32_t utile_w = vc4_utile_width(rsc->cpp);
    uint32_t utile_h = vc4_utile_height(rsc->cpp);

    for (int i = prsc->last_level; i >= 0; i--) {
        struct vc4_resource_slice *slice = &rsc->slices[i];

        uint32_t level_width, level_height;
        if (i == 0) {
            level_width = width;
            level_height = height;
        } else {
            level_width = u_minify(pot_width, i);
            level_height = u_minify(pot_height, i);
        }

        if (rsc->tiled == VC4_TILING_FORMAT_LINEAR) {
            slice->tiling = VC4_TILING_FORMAT_LINEAR;
            level_width = align(level_width, 16);
        } else {
            if (vc4_size_is_lt(level_width, level_height,
                               rsc->cpp)) {
                slice->tiling = VC4_TILING_FORMAT_LT;
                level_width = align(level_width, utile_w);
                level_height = align(level_height, utile_h);
            } else {
                slice->tiling = VC4_TILING_FORMAT_T;
                level_width = align(level_width,
                                    4 * 2 * utile_w);
                level_height = align(level_height,
                                     4 * 2 * utile_h);
            }
        }

        slice->offset = offset;
        slice->stride = level_width * rsc->cpp;
        slice->size = level_height * slice->stride;

        offset += slice->size;
    }

    /* The texture base pointer that has to point to level 0 doesn't have
     * intra-page bits, so we have to align it, and thus shift up all the
     * smaller slices.
     */
    uint32_t page_align_offset = (align(rsc->slices[0].offset, 4096) -
                                  rsc->slices[0].offset);
    if (page_align_offset) {
        for (int i = 0; i <= prsc->last_level; i++)
            rsc->slices[i].offset += page_align_offset;
    }

    /* Cube map faces appear as whole miptrees at a page-aligned offset
     * from the first face's miptree.
     */
    if (prsc->target == PIPE_TEXTURE_CUBE) {
        rsc->cube_map_stride = align(rsc->slices[0].offset +
                                     rsc->slices[0].size, 4096);
    }
}
static INLINE unsigned
get_pot_stride(enum pipe_format format, unsigned width)
{
   return util_next_power_of_two(util_format_get_stride(format, width));
}
예제 #21
0
void *
r300_texture_transfer_map(struct pipe_context *ctx,
                          struct pipe_resource *texture,
                          unsigned level,
                          unsigned usage,
                          const struct pipe_box *box,
                          struct pipe_transfer **transfer)
{
    struct r300_context *r300 = r300_context(ctx);
    struct r300_resource *tex = r300_resource(texture);
    struct r300_transfer *trans;
    boolean referenced_cs, referenced_hw;
    enum pipe_format format = tex->b.b.format;
    char *map;

    referenced_cs =
        r300->rws->cs_is_buffer_referenced(r300->cs, tex->cs_buf, RADEON_USAGE_READWRITE);
    if (referenced_cs) {
        referenced_hw = TRUE;
    } else {
        referenced_hw =
            r300->rws->buffer_is_busy(tex->buf, RADEON_USAGE_READWRITE);
    }

    trans = CALLOC_STRUCT(r300_transfer);
    if (trans) {
        /* Initialize the transfer object. */
        trans->transfer.resource = texture;
        trans->transfer.level = level;
        trans->transfer.usage = usage;
        trans->transfer.box = *box;

        /* If the texture is tiled, we must create a temporary detiled texture
         * for this transfer.
         * Also make write transfers pipelined. */
        if (tex->tex.microtile || tex->tex.macrotile[level] ||
            (referenced_hw && !(usage & PIPE_TRANSFER_READ) &&
             r300_is_blit_supported(texture->format))) {
            struct pipe_resource base;

            if (r300->blitter->running) {
                fprintf(stderr, "r300: ERROR: Blitter recursion in texture_get_transfer.\n");
                os_break();
            }

            memset(&base, 0, sizeof(base));
            base.target = PIPE_TEXTURE_2D;
            base.format = texture->format;
            base.width0 = box->width;
            base.height0 = box->height;
            base.depth0 = 1;
            base.array_size = 1;
            base.usage = PIPE_USAGE_STAGING;
            base.flags = R300_RESOURCE_FLAG_TRANSFER;

            /* We must set the correct texture target and dimensions if needed for a 3D transfer. */
            if (box->depth > 1 && util_max_layer(texture, level) > 0) {
                base.target = texture->target;

                if (base.target == PIPE_TEXTURE_3D) {
                    base.depth0 = util_next_power_of_two(box->depth);
                }
            }

            /* Create the temporary texture. */
            trans->linear_texture = r300_resource(
               ctx->screen->resource_create(ctx->screen,
                                            &base));

            if (!trans->linear_texture) {
                /* Oh crap, the thing can't create the texture.
                 * Let's flush and try again. */
                r300_flush(ctx, 0, NULL);

                trans->linear_texture = r300_resource(
                   ctx->screen->resource_create(ctx->screen,
                                                &base));

                if (!trans->linear_texture) {
                    fprintf(stderr,
                            "r300: Failed to create a transfer object.\n");
                    FREE(trans);
                    return NULL;
                }
            }

            assert(!trans->linear_texture->tex.microtile &&
                   !trans->linear_texture->tex.macrotile[0]);

            /* Set the stride. */
            trans->transfer.stride =
                    trans->linear_texture->tex.stride_in_bytes[0];
            trans->transfer.layer_stride =
                    trans->linear_texture->tex.layer_size_in_bytes[0];

            if (usage & PIPE_TRANSFER_READ) {
                /* We cannot map a tiled texture directly because the data is
                 * in a different order, therefore we do detiling using a blit. */
                r300_copy_from_tiled_texture(ctx, trans);

                /* Always referenced in the blit. */
                r300_flush(ctx, 0, NULL);
            }
        } else {
            /* Unpipelined transfer. */
            trans->transfer.stride = tex->tex.stride_in_bytes[level];
            trans->transfer.layer_stride = tex->tex.layer_size_in_bytes[level];
            trans->offset = r300_texture_get_offset(tex, level, box->z);

            if (referenced_cs &&
                !(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
                r300_flush(ctx, 0, NULL);
            }
        }
    }

    if (trans->linear_texture) {
        /* The detiled texture is of the same size as the region being mapped
         * (no offset needed). */
        map = r300->rws->buffer_map(trans->linear_texture->cs_buf,
                                    r300->cs, usage);
        if (!map) {
            pipe_resource_reference(
                (struct pipe_resource**)&trans->linear_texture, NULL);
            FREE(trans);
            return NULL;
        }
	*transfer = &trans->transfer;
        return map;
    } else {
        /* Tiling is disabled. */
        map = r300->rws->buffer_map(tex->cs_buf, r300->cs, usage);
        if (!map) {
            FREE(trans);
            return NULL;
        }

	*transfer = &trans->transfer;
        return map + trans->offset +
            box->y / util_format_get_blockheight(format) * trans->transfer.stride +
            box->x / util_format_get_blockwidth(format) * util_format_get_blocksize(format);
    }
}
예제 #22
0
void
st_setup_current(struct st_context *st,
                 const struct st_vertex_program *vp,
                 const struct st_vp_variant *vp_variant,
                 struct pipe_vertex_element *velements,
                 struct pipe_vertex_buffer *vbuffer, unsigned *num_vbuffers)
{
   struct gl_context *ctx = st->ctx;
   const GLbitfield inputs_read = vp_variant->vert_attrib_mask;

   /* Process values that should have better been uniforms in the application */
   GLbitfield curmask = inputs_read & _mesa_draw_current_bits(ctx);
   if (curmask) {
      /* vertex program validation must be done before this */
      const struct st_vertex_program *vp = st->vp;
      const ubyte *input_to_index = vp->input_to_index;
      /* For each attribute, upload the maximum possible size. */
      GLubyte data[VERT_ATTRIB_MAX * sizeof(GLdouble) * 4];
      GLubyte *cursor = data;
      const unsigned bufidx = (*num_vbuffers)++;
      unsigned max_alignment = 1;

      while (curmask) {
         const gl_vert_attrib attr = u_bit_scan(&curmask);
         const struct gl_array_attributes *const attrib
            = _mesa_draw_current_attrib(ctx, attr);
         const unsigned size = attrib->Format._ElementSize;
         const unsigned alignment = util_next_power_of_two(size);
         max_alignment = MAX2(max_alignment, alignment);
         memcpy(cursor, attrib->Ptr, size);
         if (alignment != size)
            memset(cursor + size, 0, alignment - size);

         init_velement_lowered(vp, velements, &attrib->Format, cursor - data, 0,
                               bufidx, input_to_index[attr]);

         cursor += alignment;
      }

      vbuffer[bufidx].is_user_buffer = false;
      vbuffer[bufidx].buffer.resource = NULL;
      /* vbuffer[bufidx].buffer_offset is set below */
      vbuffer[bufidx].stride = 0;

      /* Use const_uploader for zero-stride vertex attributes, because
       * it may use a better memory placement than stream_uploader.
       * The reason is that zero-stride attributes can be fetched many
       * times (thousands of times), so a better placement is going to
       * perform better.
       */
      struct u_upload_mgr *uploader = st->can_bind_const_buffer_as_vertex ?
                                      st->pipe->const_uploader :
                                      st->pipe->stream_uploader;
      u_upload_data(uploader,
                    0, cursor - data, max_alignment, data,
                    &vbuffer[bufidx].buffer_offset,
                    &vbuffer[bufidx].buffer.resource);
      /* Always unmap. The uploader might use explicit flushes. */
      u_upload_unmap(uploader);
   }
}
예제 #23
0
boolean r300_texture_desc_init(struct r300_screen *rscreen,
                               struct r300_resource *tex,
                               const struct pipe_resource *base)
{
    tex->b.b.b.target = base->target;
    tex->b.b.b.format = base->format;
    tex->b.b.b.width0 = base->width0;
    tex->b.b.b.height0 = base->height0;
    tex->b.b.b.depth0 = base->depth0;
    tex->b.b.b.array_size = base->array_size;
    tex->b.b.b.last_level = base->last_level;
    tex->b.b.b.nr_samples = base->nr_samples;
    tex->tex.width0 = base->width0;
    tex->tex.height0 = base->height0;
    tex->tex.depth0 = base->depth0;

    r300_setup_flags(tex);

    /* Align a 3D NPOT texture to POT. */
    if (base->target == PIPE_TEXTURE_3D && tex->tex.is_npot) {
        tex->tex.width0 = util_next_power_of_two(tex->tex.width0);
        tex->tex.height0 = util_next_power_of_two(tex->tex.height0);
        tex->tex.depth0 = util_next_power_of_two(tex->tex.depth0);
    }

    /* Setup tiling. */
    if (tex->tex.microtile == RADEON_LAYOUT_UNKNOWN) {
        r300_setup_tiling(rscreen, tex);
    }

    r300_setup_cbzb_flags(rscreen, tex);

    /* Setup the miptree description. */
    r300_setup_miptree(rscreen, tex, TRUE);
    /* If the required buffer size is larger the given max size,
     * try again without the alignment for the CBZB clear. */
    if (tex->buf_size && tex->tex.size_in_bytes > tex->buf_size) {
        r300_setup_miptree(rscreen, tex, FALSE);
    }

    r300_setup_hyperz_properties(rscreen, tex);

    if (tex->buf_size) {
        /* Make sure the buffer we got is large enough. */
        if (tex->tex.size_in_bytes > tex->buf_size) {
            fprintf(stderr, "r300: texture_desc_init: The buffer is not "
                            "large enough. Got: %i, Need: %i, Info:\n",
                            tex->buf_size, tex->tex.size_in_bytes);
            r300_tex_print_info(tex, "texture_desc_init");
            return FALSE;
        }

        tex->tex.buffer_size_in_bytes = tex->buf_size;
    } else {
        tex->tex.buffer_size_in_bytes = tex->tex.size_in_bytes;
    }

    if (SCREEN_DBG_ON(rscreen, DBG_TEX))
        r300_tex_print_info(tex, "texture_desc_init");

    return TRUE;
}
예제 #24
0
/**
 * Fetch a texels from a texture, returning them in SoA layout.
 *
 * \param type  the desired return type for 'rgba'.  The vector length
 *              is the number of texels to fetch
 * \param aligned if the offset is guaranteed to be aligned to element width
 *
 * \param base_ptr  points to the base of the texture mip tree.
 * \param offset    offset to start of the texture image block.  For non-
 *                  compressed formats, this simply is an offset to the texel.
 *                  For compressed formats, it is an offset to the start of the
 *                  compressed data block.
 *
 * \param i, j  the sub-block pixel coordinates.  For non-compressed formats
 *              these will always be (0,0).  For compressed formats, i will
 *              be in [0, block_width-1] and j will be in [0, block_height-1].
 * \param cache  optional value pointing to a lp_build_format_cache structure
 */
void
lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,
                        const struct util_format_description *format_desc,
                        struct lp_type type,
                        boolean aligned,
                        LLVMValueRef base_ptr,
                        LLVMValueRef offset,
                        LLVMValueRef i,
                        LLVMValueRef j,
                        LLVMValueRef cache,
                        LLVMValueRef rgba_out[4])
{
   LLVMBuilderRef builder = gallivm->builder;
   enum pipe_format format = format_desc->format;
   struct lp_type fetch_type;

   if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN &&
       (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB ||
        format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB ||
        format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) &&
       format_desc->block.width == 1 &&
       format_desc->block.height == 1 &&
       format_desc->block.bits <= type.width &&
       (format_desc->channel[0].type != UTIL_FORMAT_TYPE_FLOAT ||
        format_desc->channel[0].size == 32 ||
        format_desc->channel[0].size == 16))
   {
      /*
       * The packed pixel fits into an element of the destination format. Put
       * the packed pixels into a vector and extract each component for all
       * vector elements in parallel.
       */

      LLVMValueRef packed;

      /*
       * gather the texels from the texture
       * Ex: packed = {XYZW, XYZW, XYZW, XYZW}
       */
      assert(format_desc->block.bits <= type.width);
      fetch_type = lp_type_uint(type.width);
      packed = lp_build_gather(gallivm,
                               type.length,
                               format_desc->block.bits,
                               fetch_type,
                               aligned,
                               base_ptr, offset, FALSE);

      /*
       * convert texels to float rgba
       */
      lp_build_unpack_rgba_soa(gallivm,
                               format_desc,
                               type,
                               packed, rgba_out);
      return;
   }


   if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN &&
       (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB) &&
       format_desc->block.width == 1 &&
       format_desc->block.height == 1 &&
       format_desc->block.bits > type.width &&
       ((format_desc->block.bits <= type.width * type.length &&
         format_desc->channel[0].size <= type.width) ||
        (format_desc->channel[0].size == 64 &&
         format_desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT &&
         type.floating)))
   {
      /*
       * Similar to above, but the packed pixel is larger than what fits
       * into an element of the destination format. The packed pixels will be
       * shuffled into SoA vectors appropriately, and then the extraction will
       * be done in parallel as much as possible.
       * Good for 16xn (n > 2) and 32xn (n > 1) formats, care is taken so
       * the gathered vectors can be shuffled easily (even with avx).
       * 64xn float -> 32xn float is handled too but it's a bit special as
       * it does the conversion pre-shuffle.
       */

      LLVMValueRef packed[4], dst[4], output[4], shuffles[LP_MAX_VECTOR_WIDTH/32];
      struct lp_type fetch_type, gather_type = type;
      unsigned num_gather, fetch_width, i, j;
      struct lp_build_context bld;
      boolean fp64 = format_desc->channel[0].size == 64;

      lp_build_context_init(&bld, gallivm, type);

      assert(type.width == 32);
      assert(format_desc->block.bits > type.width);

      /*
       * First, figure out fetch order.
       */
      fetch_width = util_next_power_of_two(format_desc->block.bits);
      num_gather = fetch_width / type.width;
      /*
       * fp64 are treated like fp32 except we fetch twice wide values
       * (as we shuffle after trunc). The shuffles for that work out
       * mostly fine (slightly suboptimal for 4-wide, perfect for AVX)
       * albeit we miss the potential opportunity for hw gather (as it
       * only handles native size).
       */
      num_gather = fetch_width / type.width;
      gather_type.width *= num_gather;
      if (fp64) {
         num_gather /= 2;
      }
      gather_type.length /= num_gather;

      for (i = 0; i < num_gather; i++) {
         LLVMValueRef offsetr, shuf_vec;
         if(num_gather == 4) {
            for (j = 0; j < gather_type.length; j++) {
               unsigned idx = i + 4*j;
               shuffles[j] = lp_build_const_int32(gallivm, idx);
            }
            shuf_vec = LLVMConstVector(shuffles, gather_type.length);
            offsetr = LLVMBuildShuffleVector(builder, offset, offset, shuf_vec, "");

         }
         else if (num_gather == 2) {
            assert(num_gather == 2);
            for (j = 0; j < gather_type.length; j++) {
               unsigned idx = i*2 + (j%2) + (j/2)*4;
               shuffles[j] = lp_build_const_int32(gallivm, idx);
            }
            shuf_vec = LLVMConstVector(shuffles, gather_type.length);
            offsetr = LLVMBuildShuffleVector(builder, offset, offset, shuf_vec, "");
         }
         else {
            assert(num_gather == 1);
            offsetr = offset;
         }
         if (gather_type.length == 1) {
            LLVMValueRef zero = lp_build_const_int32(gallivm, 0);
            offsetr = LLVMBuildExtractElement(builder, offsetr, zero, "");
         }

         /*
          * Determine whether to use float or int loads. This is mostly
          * to outsmart the (stupid) llvm int/float shuffle logic, we
          * don't really care much if the data is floats or ints...
          * But llvm will refuse to use single float shuffle with int data
          * and instead use 3 int shuffles instead, the code looks atrocious.
          * (Note bitcasts often won't help, as llvm is too smart to be
          * fooled by that.)
          * Nobody cares about simd float<->int domain transition penalties,
          * which usually don't even exist for shuffles anyway.
          * With 4x32bit (and 3x32bit) fetch, we use float vec (the data is
          * going into transpose, which is unpacks, so doesn't really matter
          * much).
          * With 2x32bit or 4x16bit fetch, we use float vec, since those
          * go into the weird channel separation shuffle. With floats,
          * this is (with 128bit vectors):
          * - 2 movq, 2 movhpd, 2 shufps
          * With ints it would be:
          * - 4 movq, 2 punpcklqdq, 4 pshufd, 2 blendw
          * I've seen texture functions increase in code size by 15% just due
          * to that (there's lots of such fetches in them...)
          * (We could chose a different gather order to improve this somewhat
          * for the int path, but it would basically just drop the blends,
          * so the float path with this order really is optimal.)
          * Albeit it is tricky sometimes llvm doesn't ignore the float->int
          * casts so must avoid them until we're done with the float shuffle...
          * 3x16bit formats (the same is also true for 3x8) are pretty bad but
          * there's nothing we can do about them (we could overallocate by
          * those couple bytes and use unaligned but pot sized load).
          * Note that this is very much x86 specific. I don't know if this
          * affect other archs at all.
          */
         if (num_gather > 1) {
            /*
             * We always want some float type here (with x86)
             * due to shuffles being float ones afterwards (albeit for
             * the num_gather == 4 case int should work fine too
             * (unless there's some problems with avx but not avx2).
             */
            if (format_desc->channel[0].size == 64) {
               fetch_type = lp_type_float_vec(64, gather_type.width);
            } else {
               fetch_type = lp_type_int_vec(32, gather_type.width);
            }
         }
         else {
            /* type doesn't matter much */
            if (format_desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT &&
                (format_desc->channel[0].size == 32 ||
                 format_desc->channel[0].size == 64)) {
            fetch_type = lp_type_float(gather_type.width);
            } else {
               fetch_type = lp_type_uint(gather_type.width);
            }
         }

         /* Now finally gather the values */
         packed[i] = lp_build_gather(gallivm, gather_type.length,
                                     format_desc->block.bits,
                                     fetch_type, aligned,
                                     base_ptr, offsetr, FALSE);
         if (fp64) {
            struct lp_type conv_type = type;
            conv_type.width *= 2;
            packed[i] = LLVMBuildBitCast(builder, packed[i],
                                         lp_build_vec_type(gallivm, conv_type), "");
            packed[i] = LLVMBuildFPTrunc(builder, packed[i], bld.vec_type, "");
         }
      }

      /* shuffle the gathered values to SoA */
      if (num_gather == 2) {
         for (i = 0; i < num_gather; i++) {
            for (j = 0; j < type.length; j++) {
               unsigned idx = (j%2)*2 + (j/4)*4 + i;
               if ((j/2)%2)
                  idx += type.length;
               shuffles[j] = lp_build_const_int32(gallivm, idx);
            }
            dst[i] = LLVMBuildShuffleVector(builder, packed[0], packed[1],
                                            LLVMConstVector(shuffles, type.length), "");
         }
      }
      else if (num_gather == 4) {
         lp_build_transpose_aos(gallivm, lp_int_type(type), packed, dst);
      }
      else {
         assert(num_gather == 1);
         dst[0] = packed[0];
      }

      /*
       * And finally unpack exactly as above, except that
       * chan shift is adjusted and the right vector selected.
       */
      if (!fp64) {
         for (i = 0; i < num_gather; i++) {
            dst[i] = LLVMBuildBitCast(builder, dst[i], bld.int_vec_type, "");
         }
         for (i = 0; i < format_desc->nr_channels; i++) {
            struct util_format_channel_description chan_desc = format_desc->channel[i];
            unsigned blockbits = type.width;
            unsigned vec_nr = chan_desc.shift / type.width;
            chan_desc.shift %= type.width;

            output[i] = lp_build_extract_soa_chan(&bld,
                                                  blockbits,
                                                  FALSE,
                                                  chan_desc,
                                                  dst[vec_nr]);
         }
      }
      else {
         for (i = 0; i < format_desc->nr_channels; i++)  {
            output[i] = dst[i];
         }
      }

      lp_build_format_swizzle_soa(format_desc, &bld, output, rgba_out);
      return;
   }

   if (format == PIPE_FORMAT_R11G11B10_FLOAT ||
       format == PIPE_FORMAT_R9G9B9E5_FLOAT) {
      /*
       * similar conceptually to above but requiring special
       * AoS packed -> SoA float conversion code.
       */
      LLVMValueRef packed;
      struct lp_type fetch_type = lp_type_uint(type.width);

      assert(type.floating);
      assert(type.width == 32);

      packed = lp_build_gather(gallivm, type.length,
                               format_desc->block.bits,
                               fetch_type, aligned,
                               base_ptr, offset, FALSE);
      if (format == PIPE_FORMAT_R11G11B10_FLOAT) {
         lp_build_r11g11b10_to_float(gallivm, packed, rgba_out);
      }
      else {
         lp_build_rgb9e5_to_float(gallivm, packed, rgba_out);
      }
      return;
   }

   if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS &&
       format_desc->block.bits == 64) {
      /*
       * special case the format is 64 bits but we only require
       * 32bit (or 8bit) from each block.
       */
      LLVMValueRef packed;
      struct lp_type fetch_type = lp_type_uint(type.width);

      if (format == PIPE_FORMAT_X32_S8X24_UINT) {
         /*
          * for stencil simply fix up offsets - could in fact change
          * base_ptr instead even outside the shader.
          */
         unsigned mask = (1 << 8) - 1;
         LLVMValueRef s_offset = lp_build_const_int_vec(gallivm, type, 4);
         offset = LLVMBuildAdd(builder, offset, s_offset, "");
         packed = lp_build_gather(gallivm, type.length, 32, fetch_type,
                                  aligned, base_ptr, offset, FALSE);
         packed = LLVMBuildAnd(builder, packed,
                               lp_build_const_int_vec(gallivm, type, mask), "");
      }
      else {
         assert (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT);
         packed = lp_build_gather(gallivm, type.length, 32, fetch_type,
                                  aligned, base_ptr, offset, TRUE);
         packed = LLVMBuildBitCast(builder, packed,
                                   lp_build_vec_type(gallivm, type), "");
      }
      /* for consistency with lp_build_unpack_rgba_soa() return sss1 or zzz1 */
      rgba_out[0] = rgba_out[1] = rgba_out[2] = packed;
      rgba_out[3] = lp_build_const_vec(gallivm, type, 1.0f);
      return;
   }

   /*
    * Try calling lp_build_fetch_rgba_aos for all pixels.
    * Should only really hit subsampled, compressed
    * (for s3tc srgb too, for rgtc the unorm ones only) by now.
    * (This is invalid for plain 8unorm formats because we're lazy with
    * the swizzle since some results would arrive swizzled, some not.)
    */

   if ((format_desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) &&
       (util_format_fits_8unorm(format_desc) ||
        format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) &&
       type.floating && type.width == 32 &&
       (type.length == 1 || (type.length % 4 == 0))) {
      struct lp_type tmp_type;
      struct lp_build_context bld;
      LLVMValueRef packed, rgba[4];
      const struct util_format_description *flinear_desc;
      const struct util_format_description *frgba8_desc;
      unsigned chan;

      lp_build_context_init(&bld, gallivm, type);

      /*
       * Make sure the conversion in aos really only does convert to rgba8
       * and not anything more (so use linear format, adjust type).
       */
      flinear_desc = util_format_description(util_format_linear(format));
      memset(&tmp_type, 0, sizeof tmp_type);
      tmp_type.width = 8;
      tmp_type.length = type.length * 4;
      tmp_type.norm = TRUE;

      packed = lp_build_fetch_rgba_aos(gallivm, flinear_desc, tmp_type,
                                       aligned, base_ptr, offset, i, j, cache);
      packed = LLVMBuildBitCast(builder, packed, bld.int_vec_type, "");

      /*
       * The values are now packed so they match ordinary (srgb) RGBA8 format,
       * hence need to use matching format for unpack.
       */
      frgba8_desc = util_format_description(PIPE_FORMAT_R8G8B8A8_UNORM);
      if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) {
         assert(format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC);
         frgba8_desc = util_format_description(PIPE_FORMAT_R8G8B8A8_SRGB);
      }
      lp_build_unpack_rgba_soa(gallivm,
                               frgba8_desc,
                               type,
                               packed, rgba);

      /*
       * We converted 4 channels. Make sure llvm can drop unneeded ones
       * (luckily the rgba order is fixed, only LA needs special case).
       */
      for (chan = 0; chan < 4; chan++) {
         enum pipe_swizzle swizzle = format_desc->swizzle[chan];
         if (chan == 3 && util_format_is_luminance_alpha(format)) {
            swizzle = PIPE_SWIZZLE_W;
         }
         rgba_out[chan] = lp_build_swizzle_soa_channel(&bld, rgba, swizzle);
      }
      return;
   }


   /*
    * Fallback to calling lp_build_fetch_rgba_aos for each pixel.
    *
    * This is not the most efficient way of fetching pixels, as we
    * miss some opportunities to do vectorization, but this is
    * convenient for formats or scenarios for which there was no
    * opportunity or incentive to optimize.
    *
    * We do NOT want to end up here, this typically is quite terrible,
    * in particular if the formats have less than 4 channels.
    *
    * Right now, this should only be hit for:
    * - RGTC snorm formats
    *   (those miss fast fetch functions hence they are terrible anyway)
    */

   {
      unsigned k;
      struct lp_type tmp_type;
      LLVMValueRef aos_fetch[LP_MAX_VECTOR_WIDTH / 32];

      if (gallivm_debug & GALLIVM_DEBUG_PERF) {
         debug_printf("%s: AoS fetch fallback for %s\n",
                      __FUNCTION__, format_desc->short_name);
      }

      tmp_type = type;
      tmp_type.length = 4;

      /*
       * Note that vector transpose can be worse compared to insert/extract
       * for aos->soa conversion (for formats with 1 or 2 channels). However,
       * we should try to avoid getting here for just about all formats, so
       * don't bother.
       */

      /* loop over number of pixels */
      for(k = 0; k < type.length; ++k) {
         LLVMValueRef index = lp_build_const_int32(gallivm, k);
         LLVMValueRef offset_elem;
         LLVMValueRef i_elem, j_elem;

         offset_elem = LLVMBuildExtractElement(builder, offset,
                                               index, "");

         i_elem = LLVMBuildExtractElement(builder, i, index, "");
         j_elem = LLVMBuildExtractElement(builder, j, index, "");

         /* Get a single float[4]={R,G,B,A} pixel */
         aos_fetch[k] = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type,
                                                aligned, base_ptr, offset_elem,
                                                i_elem, j_elem, cache);

      }
      convert_to_soa(gallivm, aos_fetch, rgba_out, type);
   }
}
예제 #25
0
파일: subpicture.c 프로젝트: Haifen/Mesa-3D
PUBLIC
Status XvMCCreateSubpicture(Display *dpy, XvMCContext *context, XvMCSubpicture *subpicture,
                            unsigned short width, unsigned short height, int xvimage_id)
{
   XvMCContextPrivate *context_priv;
   XvMCSubpicturePrivate *subpicture_priv;
   struct pipe_context *pipe;
   struct pipe_resource tex_templ, *tex;
   struct pipe_sampler_view sampler_templ;
   Status ret;

   XVMC_MSG(XVMC_TRACE, "[XvMC] Creating subpicture %p.\n", subpicture);

   assert(dpy);

   if (!context)
      return XvMCBadContext;

   context_priv = context->privData;
   pipe = context_priv->pipe;

   if (!subpicture)
      return XvMCBadSubpicture;

   if (width > context_priv->subpicture_max_width ||
       height > context_priv->subpicture_max_height)
      return BadValue;

   ret = Validate(dpy, context->port, context->surface_type_id, xvimage_id);
   if (ret != Success)
      return ret;

   subpicture_priv = CALLOC(1, sizeof(XvMCSubpicturePrivate));
   if (!subpicture_priv)
      return BadAlloc;

   memset(&tex_templ, 0, sizeof(tex_templ));
   tex_templ.target = PIPE_TEXTURE_2D;
   tex_templ.format = XvIDToPipe(xvimage_id);
   tex_templ.last_level = 0;
   if (pipe->screen->get_video_param(pipe->screen,
                                     PIPE_VIDEO_PROFILE_UNKNOWN,
                                     PIPE_VIDEO_ENTRYPOINT_UNKNOWN,
                                     PIPE_VIDEO_CAP_NPOT_TEXTURES)) {
      tex_templ.width0 = width;
      tex_templ.height0 = height;
   }
   else {
      tex_templ.width0 = util_next_power_of_two(width);
      tex_templ.height0 = util_next_power_of_two(height);
   }
   tex_templ.depth0 = 1;
   tex_templ.array_size = 1;
   tex_templ.usage = PIPE_USAGE_DYNAMIC;
   tex_templ.bind = PIPE_BIND_SAMPLER_VIEW;
   tex_templ.flags = 0;

   tex = pipe->screen->resource_create(pipe->screen, &tex_templ);

   memset(&sampler_templ, 0, sizeof(sampler_templ));
   u_sampler_view_default_template(&sampler_templ, tex, tex->format);

   subpicture_priv->sampler = pipe->create_sampler_view(pipe, tex, &sampler_templ);
   pipe_resource_reference(&tex, NULL);
   if (!subpicture_priv->sampler) {
      FREE(subpicture_priv);
      return BadAlloc;
   }

   subpicture_priv->context = context;
   subpicture->subpicture_id = XAllocID(dpy);
   subpicture->context_id = context->context_id;
   subpicture->xvimage_id = xvimage_id;
   subpicture->width = width;
   subpicture->height = height;
   subpicture->num_palette_entries = NumPaletteEntries4XvID(xvimage_id);
   subpicture->entry_bytes = PipeToComponentOrder(tex_templ.format, subpicture->component_order);
   subpicture->privData = subpicture_priv;

   if (subpicture->num_palette_entries > 0) {
      tex_templ.target = PIPE_TEXTURE_1D;
      tex_templ.format = PIPE_FORMAT_R8G8B8X8_UNORM;
      tex_templ.width0 = subpicture->num_palette_entries;
      tex_templ.height0 = 1;
      tex_templ.usage = PIPE_USAGE_DEFAULT;

      tex = pipe->screen->resource_create(pipe->screen, &tex_templ);

      memset(&sampler_templ, 0, sizeof(sampler_templ));
      u_sampler_view_default_template(&sampler_templ, tex, tex->format);
      sampler_templ.swizzle_a = PIPE_SWIZZLE_ONE;
      subpicture_priv->palette = pipe->create_sampler_view(pipe, tex, &sampler_templ);
      pipe_resource_reference(&tex, NULL);
      if (!subpicture_priv->sampler) {
         FREE(subpicture_priv);
         return BadAlloc;
      }
   }

   SyncHandle();

   XVMC_MSG(XVMC_TRACE, "[XvMC] Subpicture %p created.\n", subpicture);

   return Success;
}