Exemplo n.º 1
0
/**
 * Create new pipe_resource given the template information.
 */
static struct pipe_resource *
softpipe_resource_create(struct pipe_screen *screen,
                         const struct pipe_resource *templat)
{
   struct softpipe_resource *spr = CALLOC_STRUCT(softpipe_resource);
   if (!spr)
      return NULL;

   assert(templat->format != PIPE_FORMAT_NONE);

   spr->base = *templat;
   pipe_reference_init(&spr->base.reference, 1);
   spr->base.screen = screen;

   spr->pot = (util_is_power_of_two(templat->width0) &&
               util_is_power_of_two(templat->height0) &&
               util_is_power_of_two(templat->depth0));

   if (spr->base.bind & (PIPE_BIND_DISPLAY_TARGET |
			 PIPE_BIND_SCANOUT |
			 PIPE_BIND_SHARED)) {
      if (!softpipe_displaytarget_layout(screen, spr))
         goto fail;
   }
   else {
      if (!softpipe_resource_layout(screen, spr))
         goto fail;
   }
    
   return &spr->base;

 fail:
   FREE(spr);
   return NULL;
}
Exemplo n.º 2
0
static unsigned
nvfx_miptree_layout(struct nvfx_miptree *mt)
{
	struct pipe_resource* pt = &mt->base.base;
        uint offset = 0;

	if(!nvfx_screen(pt->screen)->is_nv4x)
	{
		assert(pt->target == PIPE_TEXTURE_RECT
			|| (util_is_power_of_two(pt->width0) && util_is_power_of_two(pt->height0)));
	}

	for (unsigned l = 0; l <= pt->last_level; l++)
	{
		unsigned size;
		mt->level_offset[l] = offset;

		if(mt->linear_pitch)
			size = mt->linear_pitch;
		else
			size = util_format_get_stride(pt->format, u_minify(pt->width0, l));
		size = util_format_get_2d_size(pt->format, size, u_minify(pt->height0, l));

		if(pt->target == PIPE_TEXTURE_3D)
			size *= u_minify(pt->depth0, l);

		offset += size;
	}

	offset = align(offset, 128);
	mt->face_size = offset;
	if(mt->base.base.target == PIPE_TEXTURE_CUBE)
		offset += 5 * mt->face_size;
	return offset;
}
Exemplo n.º 3
0
static struct pipe_resource *
softpipe_resource_from_handle(struct pipe_screen *screen,
                              const struct pipe_resource *templat,
                              struct winsys_handle *whandle)
{
   struct sw_winsys *winsys = softpipe_screen(screen)->winsys;
   struct softpipe_resource *spr = CALLOC_STRUCT(softpipe_resource);
   if (!spr)
      return NULL;

   spr->base = *templat;
   pipe_reference_init(&spr->base.reference, 1);
   spr->base.screen = screen;

   spr->pot = (util_is_power_of_two(templat->width0) &&
               util_is_power_of_two(templat->height0) &&
               util_is_power_of_two(templat->depth0));

   spr->dt = winsys->displaytarget_from_handle(winsys,
                                               templat,
                                               whandle,
                                               &spr->stride[0]);
   if (!spr->dt)
      goto fail;

   return &spr->base;

 fail:
   FREE(spr);
   return NULL;
}
Exemplo n.º 4
0
static struct pipe_sampler_view *etna_pipe_create_sampler_view(struct pipe_context *pipe,
                                                 struct pipe_resource *texture,
                                                 const struct pipe_sampler_view *templat)
{
    struct etna_pipe_context *priv = etna_pipe_context(pipe);
    struct etna_sampler_view *sv = CALLOC_STRUCT(etna_sampler_view);
    sv->base = *templat;
    sv->base.context = pipe;
    sv->base.texture = 0;
    pipe_resource_reference(&sv->base.texture, texture);
    sv->base.texture = texture;
    assert(sv->base.texture);

    struct compiled_sampler_view *cs = CALLOC_STRUCT(compiled_sampler_view);
    struct etna_resource *res = etna_resource(sv->base.texture);
    assert(res != NULL);

    cs->TE_SAMPLER_CONFIG0 =
                VIVS_TE_SAMPLER_CONFIG0_TYPE(translate_texture_target(res->base.target, false)) |
                VIVS_TE_SAMPLER_CONFIG0_FORMAT(translate_texture_format(sv->base.format, false));
                /* merged with sampler state */
    cs->TE_SAMPLER_CONFIG0_MASK = 0xffffffff;

    cs->TE_SAMPLER_CONFIG1 =
                VIVS_TE_SAMPLER_CONFIG1_SWIZZLE_R(templat->swizzle_r) |
                VIVS_TE_SAMPLER_CONFIG1_SWIZZLE_G(templat->swizzle_g) |
                VIVS_TE_SAMPLER_CONFIG1_SWIZZLE_B(templat->swizzle_b) |
                VIVS_TE_SAMPLER_CONFIG1_SWIZZLE_A(templat->swizzle_a) |
                VIVS_TE_SAMPLER_CONFIG1_HALIGN(res->halign);
    cs->TE_SAMPLER_SIZE =
            VIVS_TE_SAMPLER_SIZE_WIDTH(res->base.width0)|
            VIVS_TE_SAMPLER_SIZE_HEIGHT(res->base.height0);
    cs->TE_SAMPLER_LOG_SIZE =
            VIVS_TE_SAMPLER_LOG_SIZE_WIDTH(etna_log2_fixp55(res->base.width0)) |
            VIVS_TE_SAMPLER_LOG_SIZE_HEIGHT(etna_log2_fixp55(res->base.height0));

    /* Set up levels-of-detail */
    for(int lod=0; lod<=res->base.last_level; ++lod)
    {
        cs->TE_SAMPLER_LOD_ADDR[lod] = etna_bo_gpu_address(res->bo) + res->levels[lod].offset;
    }
    cs->min_lod = sv->base.u.tex.first_level << 5;
    cs->max_lod = MIN2(sv->base.u.tex.last_level, res->base.last_level) << 5;

    /* Workaround for npot textures -- it appears that only CLAMP_TO_EDGE is supported when the
     * appropriate capability is not set. */
    if(!priv->specs.npot_tex_any_wrap &&
        (!util_is_power_of_two(res->base.width0) || !util_is_power_of_two(res->base.height0)))
    {
        cs->TE_SAMPLER_CONFIG0_MASK = ~(VIVS_TE_SAMPLER_CONFIG0_UWRAP__MASK |
                                        VIVS_TE_SAMPLER_CONFIG0_VWRAP__MASK);
        cs->TE_SAMPLER_CONFIG0 |= VIVS_TE_SAMPLER_CONFIG0_UWRAP(TEXTURE_WRAPMODE_CLAMP_TO_EDGE) |
                                  VIVS_TE_SAMPLER_CONFIG0_VWRAP(TEXTURE_WRAPMODE_CLAMP_TO_EDGE);
    }

    sv->internal = cs;
    pipe_reference_init(&sv->base.reference, 1);
    return &sv->base;
}
Exemplo n.º 5
0
static void r300_setup_flags(struct r300_resource *tex)
{
    tex->tex.uses_stride_addressing =
        !util_is_power_of_two(tex->b.b.b.width0) ||
        (tex->tex.stride_in_bytes_override &&
         stride_to_width(tex->b.b.b.format,
                         tex->tex.stride_in_bytes_override) != tex->b.b.b.width0);

    tex->tex.is_npot =
        tex->tex.uses_stride_addressing ||
        !util_is_power_of_two(tex->b.b.b.height0) ||
        !util_is_power_of_two(tex->b.b.b.depth0);
}
static void r300_setup_flags(struct r300_texture_desc *desc)
{
    desc->uses_stride_addressing =
        !util_is_power_of_two(desc->b.b.width0) ||
        !util_is_power_of_two(desc->b.b.height0) ||
        (desc->stride_in_bytes_override &&
         stride_to_width(desc->b.b.format,
                         desc->stride_in_bytes_override) != desc->b.b.width0);

    desc->is_npot =
        desc->uses_stride_addressing ||
        !util_is_power_of_two(desc->b.b.height0);
}
Exemplo n.º 7
0
static void
nvfx_miptree_choose_format(struct nvfx_miptree *mt)
{
	struct pipe_resource *pt = &mt->base.base;
	unsigned uniform_pitch = 0;
	static int no_swizzle = -1;
	if(no_swizzle < 0)
		no_swizzle = debug_get_bool_option("NV40_NO_SWIZZLE", FALSE); /* this will break things on nv30 */

	if (!util_is_power_of_two(pt->width0) ||
	    !util_is_power_of_two(pt->height0) ||
	    !util_is_power_of_two(pt->depth0) ||
	    (!nvfx_screen(pt->screen)->is_nv4x && pt->target == PIPE_TEXTURE_RECT)
	    )
		uniform_pitch = 1;

	if (
		(pt->bind & (PIPE_BIND_SCANOUT | PIPE_BIND_DISPLAY_TARGET))
		|| (pt->usage & PIPE_USAGE_DYNAMIC) || (pt->usage & PIPE_USAGE_STAGING)
		|| util_format_is_compressed(pt->format)
		|| no_swizzle
	)
		mt->base.base.flags |= NVFX_RESOURCE_FLAG_LINEAR;

	/* non compressed formats with uniform pitch must be linear, and vice versa */
	if(!util_format_is_s3tc(pt->format)
		&& (uniform_pitch || mt->base.base.flags & NVFX_RESOURCE_FLAG_LINEAR))
	{
		mt->base.base.flags |= NVFX_RESOURCE_FLAG_LINEAR;
		uniform_pitch = 1;
	}

	if(uniform_pitch)
	{
		mt->linear_pitch = util_format_get_stride(pt->format, pt->width0);

		// TODO: this is only a constraint for rendering and not sampling, apparently
		// we may also want this unconditionally
		if(pt->bind & (PIPE_BIND_SAMPLER_VIEW |
			PIPE_BIND_DEPTH_STENCIL |
			PIPE_BIND_RENDER_TARGET |
			PIPE_BIND_DISPLAY_TARGET |
			PIPE_BIND_SCANOUT))
			mt->linear_pitch = align(mt->linear_pitch, 64);
	}
	else
		mt->linear_pitch = 0;
}
/**
 * Concatenates several (must be a power of 2) vectors (of same type)
 * into a larger one.
 * Most useful for building up a 256bit sized vector out of two 128bit ones.
 */
LLVMValueRef
lp_build_concat(struct gallivm_state *gallivm,
                LLVMValueRef src[],
                struct lp_type src_type,
                unsigned num_vectors)
{
   unsigned new_length, i;
   LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH/2];
   LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];

   assert(src_type.length * num_vectors <= Elements(shuffles));
   assert(util_is_power_of_two(num_vectors));

   new_length = src_type.length;

   for (i = 0; i < num_vectors; i++)
      tmp[i] = src[i];

   while (num_vectors > 1) {
      num_vectors >>= 1;
      new_length <<= 1;
      for (i = 0; i < new_length; i++) {
         shuffles[i] = lp_build_const_int32(gallivm, i);
      }
      for (i = 0; i < num_vectors; i++) {
         tmp[i] = LLVMBuildShuffleVector(gallivm->builder, tmp[i*2], tmp[i*2 + 1],
                                         LLVMConstVector(shuffles, new_length), "");
      }
   }

   return tmp[0];
}
Exemplo n.º 9
0
static struct pipe_resource *
galahad_screen_resource_create(struct pipe_screen *_screen,
                                const struct pipe_resource *templat)
{
   struct galahad_screen *glhd_screen = galahad_screen(_screen);
   struct pipe_screen *screen = glhd_screen->screen;
   struct pipe_resource *result;

   if (templat->target >= PIPE_MAX_TEXTURE_TYPES)
      glhd_warn("Received bogus resource target %d", templat->target);

   if(templat->target != PIPE_TEXTURE_RECT && templat->target != PIPE_BUFFER && !screen->get_param(screen, PIPE_CAP_NPOT_TEXTURES))
   {
      if(!util_is_power_of_two(templat->width0) || !util_is_power_of_two(templat->height0))
         glhd_warn("Requested NPOT (%ux%u) non-rectangle texture without NPOT support", templat->width0, templat->height0);
   }

   if(templat->target == PIPE_TEXTURE_RECT && templat->last_level)
      glhd_warn("Rectangle textures cannot have mipmaps, but last_level = %u", templat->last_level);

   if(templat->target == PIPE_BUFFER && templat->last_level)
      glhd_warn("Buffers cannot have mipmaps, but last_level = %u", templat->last_level);

   if(templat->target != PIPE_TEXTURE_3D && templat->depth0 != 1)
      glhd_warn("Only 3D textures can have depth != 1, but received target %u and depth %u", templat->target, templat->depth0);

   if(templat->target == PIPE_TEXTURE_1D && templat->height0 != 1)
     glhd_warn("1D textures must have height 1 but got asked for height %u", templat->height0);

   if(templat->target == PIPE_BUFFER && templat->height0 != 1)
     glhd_warn("Buffers must have height 1 but got asked for height %u", templat->height0);

   if(templat->target == PIPE_TEXTURE_CUBE && templat->width0 != templat->height0)
      glhd_warn("Cube maps must be square, but got asked for %ux%u", templat->width0, templat->height0);

   result = screen->resource_create(screen,
                                    templat);

   if (result)
      return galahad_resource_create(glhd_screen, result);
   return NULL;
}
Exemplo n.º 10
0
/**
 * The texutre is for transfer only.  We can define our own layout to save
 * space.
 */
static void
layout_init_for_transfer(struct ilo_layout *layout,
                         const struct ilo_dev_info *dev,
                         const struct pipe_resource *templ)
{
   const unsigned num_layers = (templ->target == PIPE_TEXTURE_3D) ?
      templ->depth0 : templ->array_size;
   unsigned layer_width, layer_height;

   assert(templ->last_level == 0);
   assert(templ->nr_samples <= 1);

   layout->aux = ILO_LAYOUT_AUX_NONE;
   layout->width0 = templ->width0;
   layout->height0 = templ->height0;
   layout->format = templ->format;
   layout->block_width = util_format_get_blockwidth(templ->format);
   layout->block_height = util_format_get_blockheight(templ->format);
   layout->block_size = util_format_get_blocksize(templ->format);
   layout->walk = ILO_LAYOUT_WALK_LOD;

   layout->valid_tilings = LAYOUT_TILING_NONE;
   layout->tiling = INTEL_TILING_NONE;

   layout->align_i = layout->block_width;
   layout->align_j = layout->block_height;

   assert(util_is_power_of_two(layout->block_width) &&
          util_is_power_of_two(layout->block_height));

   /* use packed layout */
   layer_width = align(templ->width0, layout->align_i);
   layer_height = align(templ->height0, layout->align_j);

   layout->lods[0].slice_width = layer_width;
   layout->lods[0].slice_height = layer_height;

   layout->bo_stride = (layer_width / layout->block_width) * layout->block_size;
   layout->bo_stride = align(layout->bo_stride, 64);

   layout->bo_height = (layer_height / layout->block_height) * num_layers;
}
Exemplo n.º 11
0
bool
ImmediateValue::isPow2() const
{
   switch (reg.type) {
   case TYPE_U8:
   case TYPE_U16:
   case TYPE_U32: return util_is_power_of_two(reg.data.u32);
   default:
      return false;
   }
}
Exemplo n.º 12
0
static bool
image_get_gen6_layout(const struct ilo_dev *dev,
                      const struct ilo_image_info *info,
                      struct ilo_image_layout *layout)
{
   ILO_DEV_ASSERT(dev, 6, 8);

   if (!image_validate_gen6(dev, info))
      return false;

   if (image_bind_gpu(info) || info->level_count > 1) {
      if (!image_init_gen6_hardware_layout(dev, info, layout))
         return false;
   } else {
      if (!image_init_gen6_transfer_layout(dev, info, layout))
         return false;
   }

   /*
    * the fact that align i and j are multiples of block width and height
    * respectively is what makes the size of the bo a multiple of the block
    * size, slices start at block boundaries, and many of the computations
    * work.
    */
   assert(layout->align_i % info->block_width == 0);
   assert(layout->align_j % info->block_height == 0);

   /* make sure align() works */
   assert(util_is_power_of_two(layout->align_i) &&
          util_is_power_of_two(layout->align_j));
   assert(util_is_power_of_two(info->block_width) &&
          util_is_power_of_two(info->block_height));

   image_get_gen6_lods(dev, info, layout);

   assert(layout->walk_layer_height % info->block_height == 0);
   assert(layout->monolithic_width % info->block_width == 0);
   assert(layout->monolithic_height % info->block_height == 0);

   return true;
}
Exemplo n.º 13
0
static void
anv_physical_device_get_format_properties(struct anv_physical_device *physical_device,
                                          VkFormat format,
                                          VkFormatProperties *out_properties)
{
   int gen = physical_device->info->gen * 10;
   if (physical_device->info->is_haswell)
      gen += 5;

   VkFormatFeatureFlags linear = 0, tiled = 0, buffer = 0;
   if (anv_format_is_depth_or_stencil(&anv_formats[format])) {
      tiled |= VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT;
      if (physical_device->info->gen >= 8)
         tiled |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT;

      tiled |= VK_FORMAT_FEATURE_BLIT_SRC_BIT |
               VK_FORMAT_FEATURE_BLIT_DST_BIT;
   } else {
      enum isl_format linear_fmt, tiled_fmt;
      struct anv_format_swizzle linear_swizzle, tiled_swizzle;
      linear_fmt = anv_get_isl_format(format, VK_IMAGE_ASPECT_COLOR_BIT,
                                      VK_IMAGE_TILING_LINEAR, &linear_swizzle);
      tiled_fmt = anv_get_isl_format(format, VK_IMAGE_ASPECT_COLOR_BIT,
                                     VK_IMAGE_TILING_OPTIMAL, &tiled_swizzle);

      linear = get_image_format_properties(gen, linear_fmt, linear_fmt,
                                           linear_swizzle);
      tiled = get_image_format_properties(gen, linear_fmt, tiled_fmt,
                                          tiled_swizzle);
      buffer = get_buffer_format_properties(gen, linear_fmt);

      /* XXX: We handle 3-channel formats by switching them out for RGBX or
       * RGBA formats behind-the-scenes.  This works fine for textures
       * because the upload process will fill in the extra channel.
       * We could also support it for render targets, but it will take
       * substantially more work and we have enough RGBX formats to handle
       * what most clients will want.
       */
      if (linear_fmt != ISL_FORMAT_UNSUPPORTED &&
          !util_is_power_of_two(isl_format_layouts[linear_fmt].bs) &&
          isl_format_rgb_to_rgbx(linear_fmt) == ISL_FORMAT_UNSUPPORTED) {
         tiled &= ~VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT &
                  ~VK_FORMAT_FEATURE_BLIT_DST_BIT;
      }
   }

   out_properties->linearTilingFeatures = linear;
   out_properties->optimalTilingFeatures = tiled;
   out_properties->bufferFeatures = buffer;

   return;
}
Exemplo n.º 14
0
/**
 * Exactly one bit must be set in \a aspect.
 */
enum isl_format
anv_get_isl_format(VkFormat format, VkImageAspectFlags aspect,
                   VkImageTiling tiling, struct anv_format_swizzle *swizzle)
{
   const struct anv_format *anv_fmt = &anv_formats[format];

   if (swizzle)
      *swizzle = anv_fmt->swizzle;

   switch (aspect) {
   case VK_IMAGE_ASPECT_COLOR_BIT:
      if (anv_fmt->isl_format == ISL_FORMAT_UNSUPPORTED) {
         return ISL_FORMAT_UNSUPPORTED;
      } else if (tiling == VK_IMAGE_TILING_OPTIMAL &&
                 !util_is_power_of_two(anv_fmt->isl_layout->bs)) {
         /* Tiled formats *must* be power-of-two because we need up upload
          * them with the render pipeline.  For 3-channel formats, we fix
          * this by switching them over to RGBX or RGBA formats under the
          * hood.
          */
         enum isl_format rgbx = isl_format_rgb_to_rgbx(anv_fmt->isl_format);
         if (rgbx != ISL_FORMAT_UNSUPPORTED)
            return rgbx;
         else
            return isl_format_rgb_to_rgba(anv_fmt->isl_format);
      } else {
         return anv_fmt->isl_format;
      }

   case VK_IMAGE_ASPECT_DEPTH_BIT:
   case (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT):
      assert(anv_fmt->has_depth);
      return anv_fmt->isl_format;

   case VK_IMAGE_ASPECT_STENCIL_BIT:
      assert(anv_fmt->has_stencil);
      return ISL_FORMAT_R8_UINT;

   default:
      unreachable("bad VkImageAspect");
      return ISL_FORMAT_UNSUPPORTED;
   }
}
Exemplo n.º 15
0
struct util_ringbuffer *util_ringbuffer_create( unsigned dwords )
{
   struct util_ringbuffer *ring = CALLOC_STRUCT(util_ringbuffer);
   if (ring == NULL)
      return NULL;

   assert(util_is_power_of_two(dwords));
   
   ring->buf = MALLOC( dwords * sizeof(unsigned) );
   if (ring->buf == NULL)
      goto fail;

   ring->mask = dwords - 1;

   pipe_condvar_init(ring->change);
   pipe_mutex_init(ring->mutex);
   return ring;

fail:
   FREE(ring->buf);
   FREE(ring);
   return NULL;
}
Exemplo n.º 16
0
static void
layout_init_alignments(struct ilo_layout *layout,
                       struct ilo_layout_params *params)
{
   const struct pipe_resource *templ = params->templ;

   /*
    * From the Sandy Bridge PRM, volume 1 part 1, page 113:
    *
    *     "surface format           align_i     align_j
    *      YUV 4:2:2 formats        4           *see below
    *      BC1-5                    4           4
    *      FXT1                     8           4
    *      all other formats        4           *see below"
    *
    *     "- align_j = 4 for any depth buffer
    *      - align_j = 2 for separate stencil buffer
    *      - align_j = 4 for any render target surface is multisampled (4x)
    *      - align_j = 4 for any render target surface with Surface Vertical
    *        Alignment = VALIGN_4
    *      - align_j = 2 for any render target surface with Surface Vertical
    *        Alignment = VALIGN_2
    *      - align_j = 2 for all other render target surface
    *      - align_j = 2 for any sampling engine surface with Surface Vertical
    *        Alignment = VALIGN_2
    *      - align_j = 4 for any sampling engine surface with Surface Vertical
    *        Alignment = VALIGN_4"
    *
    * From the Sandy Bridge PRM, volume 4 part 1, page 86:
    *
    *     "This field (Surface Vertical Alignment) must be set to VALIGN_2 if
    *      the Surface Format is 96 bits per element (BPE)."
    *
    * They can be rephrased as
    *
    *                                  align_i        align_j
    *   compressed formats             block width    block height
    *   PIPE_FORMAT_S8_UINT            4              2
    *   other depth/stencil formats    4              4
    *   4x multisampled                4              4
    *   bpp 96                         4              2
    *   others                         4              2 or 4
    */

   /*
    * From the Ivy Bridge PRM, volume 1 part 1, page 110:
    *
    *     "surface defined by      surface format     align_i     align_j
    *      3DSTATE_DEPTH_BUFFER    D16_UNORM          8           4
    *                              not D16_UNORM      4           4
    *      3DSTATE_STENCIL_BUFFER  N/A                8           8
    *      SURFACE_STATE           BC*, ETC*, EAC*    4           4
    *                              FXT1               8           4
    *                              all others         (set by SURFACE_STATE)"
    *
    * From the Ivy Bridge PRM, volume 4 part 1, page 63:
    *
    *     "- This field (Surface Vertical Aligment) is intended to be set to
    *        VALIGN_4 if the surface was rendered as a depth buffer, for a
    *        multisampled (4x) render target, or for a multisampled (8x)
    *        render target, since these surfaces support only alignment of 4.
    *      - Use of VALIGN_4 for other surfaces is supported, but uses more
    *        memory.
    *      - This field must be set to VALIGN_4 for all tiled Y Render Target
    *        surfaces.
    *      - Value of 1 is not supported for format YCRCB_NORMAL (0x182),
    *        YCRCB_SWAPUVY (0x183), YCRCB_SWAPUV (0x18f), YCRCB_SWAPY (0x190)
    *      - If Number of Multisamples is not MULTISAMPLECOUNT_1, this field
    *        must be set to VALIGN_4."
    *      - VALIGN_4 is not supported for surface format R32G32B32_FLOAT."
    *
    *     "- This field (Surface Horizontal Aligment) is intended to be set to
    *        HALIGN_8 only if the surface was rendered as a depth buffer with
    *        Z16 format or a stencil buffer, since these surfaces support only
    *        alignment of 8.
    *      - Use of HALIGN_8 for other surfaces is supported, but uses more
    *        memory.
    *      - This field must be set to HALIGN_4 if the Surface Format is BC*.
    *      - This field must be set to HALIGN_8 if the Surface Format is
    *        FXT1."
    *
    * They can be rephrased as
    *
    *                                  align_i        align_j
    *  compressed formats              block width    block height
    *  PIPE_FORMAT_Z16_UNORM           8              4
    *  PIPE_FORMAT_S8_UINT             8              8
    *  other depth/stencil formats     4              4
    *  2x or 4x multisampled           4 or 8         4
    *  tiled Y                         4 or 8         4 (if rt)
    *  PIPE_FORMAT_R32G32B32_FLOAT     4 or 8         2
    *  others                          4 or 8         2 or 4
    */

   if (params->compressed) {
      /* this happens to be the case */
      layout->align_i = layout->block_width;
      layout->align_j = layout->block_height;
   } else if (templ->bind & PIPE_BIND_DEPTH_STENCIL) {
      if (ilo_dev_gen(params->dev) >= ILO_GEN(7)) {
         switch (layout->format) {
         case PIPE_FORMAT_Z16_UNORM:
            layout->align_i = 8;
            layout->align_j = 4;
            break;
         case PIPE_FORMAT_S8_UINT:
            layout->align_i = 8;
            layout->align_j = 8;
            break;
         default:
            layout->align_i = 4;
            layout->align_j = 4;
            break;
         }
      } else {
         switch (layout->format) {
         case PIPE_FORMAT_S8_UINT:
            layout->align_i = 4;
            layout->align_j = 2;
            break;
         default:
            layout->align_i = 4;
            layout->align_j = 4;
            break;
         }
      }
   } else {
      const bool valign_4 = (templ->nr_samples > 1) ||
         (ilo_dev_gen(params->dev) >= ILO_GEN(7) &&
          layout->tiling == INTEL_TILING_Y &&
          (templ->bind & PIPE_BIND_RENDER_TARGET));

      if (valign_4)
         assert(layout->block_size != 12);

      layout->align_i = 4;
      layout->align_j = (valign_4) ? 4 : 2;
   }

   /*
    * the fact that align i and j are multiples of block width and height
    * respectively is what makes the size of the bo a multiple of the block
    * size, slices start at block boundaries, and many of the computations
    * work.
    */
   assert(layout->align_i % layout->block_width == 0);
   assert(layout->align_j % layout->block_height == 0);

   /* make sure align() works */
   assert(util_is_power_of_two(layout->align_i) &&
          util_is_power_of_two(layout->align_j));
   assert(util_is_power_of_two(layout->block_width) &&
          util_is_power_of_two(layout->block_height));
}
Exemplo n.º 17
0
struct pipe_resource *
nv30_miptree_create(struct pipe_screen *pscreen,
                    const struct pipe_resource *tmpl)
{
   struct nouveau_device *dev = nouveau_screen(pscreen)->device;
   struct nv30_miptree *mt = CALLOC_STRUCT(nv30_miptree);
   struct pipe_resource *pt = &mt->base.base;
   unsigned blocksz, size;
   unsigned w, h, d, l;
   int ret;

   switch (tmpl->nr_samples) {
   case 4:
      mt->ms_mode = 0x00004000;
      mt->ms_x = 1;
      mt->ms_y = 1;
      break;
   case 2:
      mt->ms_mode = 0x00003000;
      mt->ms_x = 1;
      mt->ms_y = 0;
      break;
   default:
      mt->ms_mode = 0x00000000;
      mt->ms_x = 0;
      mt->ms_y = 0;
      break;
   }

   mt->base.vtbl = &nv30_miptree_vtbl;
   *pt = *tmpl;
   pipe_reference_init(&pt->reference, 1);
   pt->screen = pscreen;

   w = pt->width0 << mt->ms_x;
   h = pt->height0 << mt->ms_y;
   d = (pt->target == PIPE_TEXTURE_3D) ? pt->depth0 : 1;
   blocksz = util_format_get_blocksize(pt->format);

   if ((pt->target == PIPE_TEXTURE_RECT) ||
       !util_is_power_of_two(pt->width0) ||
       !util_is_power_of_two(pt->height0) ||
       !util_is_power_of_two(pt->depth0) ||
       util_format_is_compressed(pt->format) ||
       util_format_is_float(pt->format) || mt->ms_mode) {
      mt->uniform_pitch = util_format_get_nblocksx(pt->format, w) * blocksz;
      mt->uniform_pitch = align(mt->uniform_pitch, 64);
   }

   if (!mt->uniform_pitch)
      mt->swizzled = TRUE;

   size = 0;
   for (l = 0; l <= pt->last_level; l++) {
      struct nv30_miptree_level *lvl = &mt->level[l];
      unsigned nbx = util_format_get_nblocksx(pt->format, w);
      unsigned nby = util_format_get_nblocksx(pt->format, h);

      lvl->offset = size;
      lvl->pitch  = mt->uniform_pitch;
      if (!lvl->pitch)
         lvl->pitch = nbx * blocksz;

      lvl->zslice_size = lvl->pitch * nby;
      size += lvl->zslice_size * d;

      w = u_minify(w, 1);
      h = u_minify(h, 1);
      d = u_minify(d, 1);
   }

   mt->layer_size = size;
   if (pt->target == PIPE_TEXTURE_CUBE) {
      if (!mt->uniform_pitch)
         mt->layer_size = align(mt->layer_size, 128);
      size = mt->layer_size * 6;
   }

   ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 256, size, NULL, &mt->base.bo);
   if (ret) {
      FREE(mt);
      return NULL;
   }

   mt->base.domain = NOUVEAU_BO_VRAM;
   return &mt->base.base;
}
static struct pipe_sampler_view *
etna_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *prsc,
                         const struct pipe_sampler_view *so)
{
   struct etna_sampler_view *sv = CALLOC_STRUCT(etna_sampler_view);
   struct etna_resource *res = etna_resource(prsc);
   struct etna_context *ctx = etna_context(pctx);
   const uint32_t format = translate_texture_format(so->format);
   const bool ext = !!(format & EXT_FORMAT);
   const uint32_t swiz = get_texture_swiz(so->format, so->swizzle_r,
                                          so->swizzle_g, so->swizzle_b,
                                          so->swizzle_a);

   if (!sv)
      return NULL;

   if (!etna_resource_sampler_compatible(res)) {
      /* The original resource is not compatible with the sampler.
       * Allocate an appropriately tiled texture. */
      if (!res->texture) {
         struct pipe_resource templat = *prsc;

         templat.bind &= ~(PIPE_BIND_DEPTH_STENCIL | PIPE_BIND_RENDER_TARGET |
                           PIPE_BIND_BLENDABLE);
         res->texture =
            etna_resource_alloc(pctx->screen, ETNA_LAYOUT_TILED,
                                DRM_FORMAT_MOD_LINEAR, &templat);
      }

      if (!res->texture) {
         free(sv);
         return NULL;
      }
      res = etna_resource(res->texture);
   }

   sv->base = *so;
   pipe_reference_init(&sv->base.reference, 1);
   sv->base.texture = NULL;
   pipe_resource_reference(&sv->base.texture, prsc);
   sv->base.context = pctx;

   /* merged with sampler state */
   sv->TE_SAMPLER_CONFIG0 = COND(!ext, VIVS_TE_SAMPLER_CONFIG0_FORMAT(format));
   sv->TE_SAMPLER_CONFIG0_MASK = 0xffffffff;

   switch (sv->base.target) {
   case PIPE_TEXTURE_1D:
      /* For 1D textures, we will have a height of 1, so we can use 2D
       * but set T wrap to repeat */
      sv->TE_SAMPLER_CONFIG0_MASK = ~VIVS_TE_SAMPLER_CONFIG0_VWRAP__MASK;
      sv->TE_SAMPLER_CONFIG0 |= VIVS_TE_SAMPLER_CONFIG0_VWRAP(TEXTURE_WRAPMODE_REPEAT);
      /* fallthrough */
   case PIPE_TEXTURE_2D:
   case PIPE_TEXTURE_RECT:
      sv->TE_SAMPLER_CONFIG0 |= VIVS_TE_SAMPLER_CONFIG0_TYPE(TEXTURE_TYPE_2D);
      break;
   case PIPE_TEXTURE_CUBE:
      sv->TE_SAMPLER_CONFIG0 |= VIVS_TE_SAMPLER_CONFIG0_TYPE(TEXTURE_TYPE_CUBE_MAP);
      break;
   default:
      BUG("Unhandled texture target");
      free(sv);
      return NULL;
   }

   sv->TE_SAMPLER_CONFIG1 = COND(ext, VIVS_TE_SAMPLER_CONFIG1_FORMAT_EXT(format)) |
                            VIVS_TE_SAMPLER_CONFIG1_HALIGN(res->halign) | swiz;
   sv->TE_SAMPLER_SIZE = VIVS_TE_SAMPLER_SIZE_WIDTH(res->base.width0) |
                         VIVS_TE_SAMPLER_SIZE_HEIGHT(res->base.height0);
   sv->TE_SAMPLER_LOG_SIZE =
      VIVS_TE_SAMPLER_LOG_SIZE_WIDTH(etna_log2_fixp55(res->base.width0)) |
      VIVS_TE_SAMPLER_LOG_SIZE_HEIGHT(etna_log2_fixp55(res->base.height0));

   /* Set up levels-of-detail */
   for (int lod = 0; lod <= res->base.last_level; ++lod) {
      sv->TE_SAMPLER_LOD_ADDR[lod].bo = res->bo;
      sv->TE_SAMPLER_LOD_ADDR[lod].offset = res->levels[lod].offset;
      sv->TE_SAMPLER_LOD_ADDR[lod].flags = ETNA_RELOC_READ;
   }
   sv->min_lod = sv->base.u.tex.first_level << 5;
   sv->max_lod = MIN2(sv->base.u.tex.last_level, res->base.last_level) << 5;

   /* Workaround for npot textures -- it appears that only CLAMP_TO_EDGE is
    * supported when the appropriate capability is not set. */
   if (!ctx->specs.npot_tex_any_wrap &&
       (!util_is_power_of_two(res->base.width0) || !util_is_power_of_two(res->base.height0))) {
      sv->TE_SAMPLER_CONFIG0_MASK = ~(VIVS_TE_SAMPLER_CONFIG0_UWRAP__MASK |
                                      VIVS_TE_SAMPLER_CONFIG0_VWRAP__MASK);
      sv->TE_SAMPLER_CONFIG0 |=
         VIVS_TE_SAMPLER_CONFIG0_UWRAP(TEXTURE_WRAPMODE_CLAMP_TO_EDGE) |
         VIVS_TE_SAMPLER_CONFIG0_VWRAP(TEXTURE_WRAPMODE_CLAMP_TO_EDGE);
   }

   return &sv->base;
}
Exemplo n.º 19
0
/**
 * This uses a blit to copy the read buffer to a texture format which matches
 * the format and type combo and then a fast read-back is done using memcpy.
 * We can do arbitrary X/Y/Z/W/0/1 swizzling here as long as there is
 * a format which matches the swizzling.
 *
 * If such a format isn't available, we fall back to _mesa_readpixels.
 *
 * NOTE: Some drivers use a blit to convert between tiled and linear
 *       texture layouts during texture uploads/downloads, so the blit
 *       we do here should be free in such cases.
 */
static void
st_readpixels(struct gl_context *ctx, GLint x, GLint y,
              GLsizei width, GLsizei height,
              GLenum format, GLenum type,
              const struct gl_pixelstore_attrib *pack,
              GLvoid *pixels)
{
   struct st_context *st = st_context(ctx);
   struct gl_renderbuffer *rb =
         _mesa_get_read_renderbuffer_for_format(ctx, format);
   struct st_renderbuffer *strb = st_renderbuffer(rb);
   struct pipe_context *pipe = st->pipe;
   struct pipe_screen *screen = pipe->screen;
   struct pipe_resource *src;
   struct pipe_resource *dst = NULL;
   struct pipe_resource dst_templ;
   enum pipe_format dst_format, src_format;
   struct pipe_blit_info blit;
   unsigned bind = PIPE_BIND_TRANSFER_READ;
   struct pipe_transfer *tex_xfer;
   ubyte *map = NULL;

   /* Validate state (to be sure we have up-to-date framebuffer surfaces)
    * and flush the bitmap cache prior to reading. */
   st_validate_state(st);
   st_flush_bitmap_cache(st);

   if (!st->prefer_blit_based_texture_transfer) {
      goto fallback;
   }

   /* This must be done after state validation. */
   src = strb->texture;

   /* XXX Fallback for depth-stencil formats due to an incomplete
    * stencil blit implementation in some drivers. */
   if (format == GL_DEPTH_STENCIL) {
      goto fallback;
   }

   /* We are creating a texture of the size of the region being read back.
    * Need to check for NPOT texture support. */
   if (!screen->get_param(screen, PIPE_CAP_NPOT_TEXTURES) &&
       (!util_is_power_of_two(width) ||
        !util_is_power_of_two(height))) {
      goto fallback;
   }

   /* If the base internal format and the texture format don't match, we have
    * to use the slow path. */
   if (rb->_BaseFormat !=
       _mesa_get_format_base_format(rb->Format)) {
      goto fallback;
   }

   /* See if the texture format already matches the format and type,
    * in which case the memcpy-based fast path will likely be used and
    * we don't have to blit. */
   if (_mesa_format_matches_format_and_type(rb->Format, format,
                                            type, pack->SwapBytes)) {
      goto fallback;
   }

   if (_mesa_readpixels_needs_slow_path(ctx, format, type, GL_TRUE)) {
      goto fallback;
   }

   /* Convert the source format to what is expected by ReadPixels
    * and see if it's supported. */
   src_format = util_format_linear(src->format);
   src_format = util_format_luminance_to_red(src_format);
   src_format = util_format_intensity_to_red(src_format);

   if (!src_format ||
       !screen->is_format_supported(screen, src_format, src->target,
                                    src->nr_samples,
                                    PIPE_BIND_SAMPLER_VIEW)) {
      goto fallback;
   }

   if (format == GL_DEPTH_COMPONENT || format == GL_DEPTH_STENCIL)
      bind |= PIPE_BIND_DEPTH_STENCIL;
   else
      bind |= PIPE_BIND_RENDER_TARGET;

   /* Choose the destination format by finding the best match
    * for the format+type combo. */
   dst_format = st_choose_matching_format(screen, bind, format, type,
                                          pack->SwapBytes);
   if (dst_format == PIPE_FORMAT_NONE) {
      goto fallback;
   }

   /* create the destination texture */
   memset(&dst_templ, 0, sizeof(dst_templ));
   dst_templ.target = PIPE_TEXTURE_2D;
   dst_templ.format = dst_format;
   dst_templ.bind = bind;
   dst_templ.usage = PIPE_USAGE_STAGING;

   st_gl_texture_dims_to_pipe_dims(GL_TEXTURE_2D, width, height, 1,
                                   &dst_templ.width0, &dst_templ.height0,
                                   &dst_templ.depth0, &dst_templ.array_size);

   dst = screen->resource_create(screen, &dst_templ);
   if (!dst) {
      goto fallback;
   }

   memset(&blit, 0, sizeof(blit));
   blit.src.resource = src;
   blit.src.level = strb->surface->u.tex.level;
   blit.src.format = src_format;
   blit.dst.resource = dst;
   blit.dst.level = 0;
   blit.dst.format = dst->format;
   blit.src.box.x = x;
   blit.dst.box.x = 0;
   blit.src.box.y = y;
   blit.dst.box.y = 0;
   blit.src.box.z = strb->surface->u.tex.first_layer;
   blit.dst.box.z = 0;
   blit.src.box.width = blit.dst.box.width = width;
   blit.src.box.height = blit.dst.box.height = height;
   blit.src.box.depth = blit.dst.box.depth = 1;
   blit.mask = st_get_blit_mask(rb->_BaseFormat, format);
   blit.filter = PIPE_TEX_FILTER_NEAREST;
   blit.scissor_enable = FALSE;

   if (st_fb_orientation(ctx->ReadBuffer) == Y_0_TOP) {
      blit.src.box.y = rb->Height - blit.src.box.y;
      blit.src.box.height = -blit.src.box.height;
   }

   /* blit */
   st->pipe->blit(st->pipe, &blit);

   /* map resources */
   pixels = _mesa_map_pbo_dest(ctx, pack, pixels);

   map = pipe_transfer_map_3d(pipe, dst, 0, PIPE_TRANSFER_READ,
                              0, 0, 0, width, height, 1, &tex_xfer);
   if (!map) {
      _mesa_unmap_pbo_dest(ctx, pack);
      pipe_resource_reference(&dst, NULL);
      goto fallback;
   }

   /* memcpy data into a user buffer */
   {
      const uint bytesPerRow = width * util_format_get_blocksize(dst_format);
      GLuint row;

      for (row = 0; row < (unsigned) height; row++) {
         GLvoid *dest = _mesa_image_address3d(pack, pixels,
                                              width, height, format,
                                              type, 0, row, 0);
         memcpy(dest, map, bytesPerRow);
         map += tex_xfer->stride;
      }
   }

   pipe_transfer_unmap(pipe, tex_xfer);
   _mesa_unmap_pbo_dest(ctx, pack);
   pipe_resource_reference(&dst, NULL);
   return;

fallback:
   _mesa_readpixels(ctx, x, y, width, height, format, type, pack, pixels);
}
Exemplo n.º 20
0
/**
 * Initialize lp_sampler_static_state object with the gallium sampler
 * and texture state.
 * The former is considered to be static and the later dynamic.
 */
void
lp_sampler_static_state(struct lp_sampler_static_state *state,
                        const struct pipe_sampler_view *view,
                        const struct pipe_sampler_state *sampler)
{
   const struct pipe_resource *texture = view->texture;

   memset(state, 0, sizeof *state);

   if(!texture)
      return;

   if(!sampler)
      return;

   /*
    * We don't copy sampler state over unless it is actually enabled, to avoid
    * spurious recompiles, as the sampler static state is part of the shader
    * key.
    *
    * Ideally the state tracker or cso_cache module would make all state
    * canonical, but until that happens it's better to be safe than sorry here.
    *
    * XXX: Actually there's much more than can be done here, especially
    * regarding 1D/2D/3D/CUBE textures, wrap modes, etc.
    */

   state->format            = view->format;
   state->swizzle_r         = view->swizzle_r;
   state->swizzle_g         = view->swizzle_g;
   state->swizzle_b         = view->swizzle_b;
   state->swizzle_a         = view->swizzle_a;

   state->target            = texture->target;
   state->pot_width         = util_is_power_of_two(texture->width0);
   state->pot_height        = util_is_power_of_two(texture->height0);
   state->pot_depth         = util_is_power_of_two(texture->depth0);

   state->wrap_s            = sampler->wrap_s;
   state->wrap_t            = sampler->wrap_t;
   state->wrap_r            = sampler->wrap_r;
   state->min_img_filter    = sampler->min_img_filter;
   state->mag_img_filter    = sampler->mag_img_filter;

   if (view->u.tex.last_level && sampler->max_lod > 0.0f) {
      state->min_mip_filter = sampler->min_mip_filter;
   } else {
      state->min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
   }

   if (state->min_mip_filter != PIPE_TEX_MIPFILTER_NONE) {
      if (sampler->lod_bias != 0.0f) {
         state->lod_bias_non_zero = 1;
      }

      /* If min_lod == max_lod we can greatly simplify mipmap selection.
       * This is a case that occurs during automatic mipmap generation.
       */
      if (sampler->min_lod == sampler->max_lod) {
         state->min_max_lod_equal = 1;
      } else {
         if (sampler->min_lod > 0.0f) {
            state->apply_min_lod = 1;
         }

         if (sampler->max_lod < (float)view->u.tex.last_level) {
            state->apply_max_lod = 1;
         }
      }
   }

   state->compare_mode      = sampler->compare_mode;
   if (sampler->compare_mode != PIPE_TEX_COMPARE_NONE) {
      state->compare_func   = sampler->compare_func;
   }

   state->normalized_coords = sampler->normalized_coords;

   /*
    * FIXME: Handle the remainder of pipe_sampler_view.
    */
}
Exemplo n.º 21
0
/**
 * Check alignment.
 *
 * It is important that this check is not implemented as a macro or inlined
 * function, as the compiler assumptions in respect to alignment of global
 * and stack variables would often make the check a no op, defeating the
 * whole purpose of the exercise.
 */
extern "C" boolean
lp_check_alignment(const void *ptr, unsigned alignment)
{
   assert(util_is_power_of_two(alignment));
   return ((uintptr_t)ptr & (alignment - 1)) == 0;
}
Exemplo n.º 22
0
static bool
ps_get_gen6_ff_kernels(const struct ilo_dev *dev,
                       const struct ilo_state_ps_info *info,
                       struct pixel_ff *ff)
{
   const struct ilo_state_shader_kernel_info *kernel_8 = &info->kernel_8;
   const struct ilo_state_shader_kernel_info *kernel_16 = &info->kernel_16;
   const struct ilo_state_shader_kernel_info *kernel_32 = &info->kernel_32;
   uint32_t scratch_size;

   ILO_DEV_ASSERT(dev, 6, 8);

   ff->dispatch_modes = ps_get_gen6_dispatch_modes(dev, info);

   /* initialize kernel offsets and GRF starts */
   if (util_is_power_of_two(ff->dispatch_modes)) {
      if (ff->dispatch_modes & GEN6_PS_DISPATCH_8) {
         ff->kernel_offsets[0] = kernel_8->offset;
         ff->grf_starts[0] = kernel_8->grf_start;
      } else if (ff->dispatch_modes & GEN6_PS_DISPATCH_16) {
         ff->kernel_offsets[0] = kernel_16->offset;
         ff->grf_starts[0] = kernel_16->grf_start;
      } else if (ff->dispatch_modes & GEN6_PS_DISPATCH_32) {
         ff->kernel_offsets[0] = kernel_32->offset;
         ff->grf_starts[0] = kernel_32->grf_start;
      }
   } else {
      ff->kernel_offsets[0] = kernel_8->offset;
      ff->kernel_offsets[1] = kernel_32->offset;
      ff->kernel_offsets[2] = kernel_16->offset;

      ff->grf_starts[0] = kernel_8->grf_start;
      ff->grf_starts[1] = kernel_32->grf_start;
      ff->grf_starts[2] = kernel_16->grf_start;
   }

   /* we do not want to save it */
   assert(ff->kernel_offsets[0] == 0);

   ff->pcb_enable = (((ff->dispatch_modes & GEN6_PS_DISPATCH_8) &&
                      kernel_8->pcb_attr_count) ||
                     ((ff->dispatch_modes & GEN6_PS_DISPATCH_16) &&
                      kernel_16->pcb_attr_count) ||
                     ((ff->dispatch_modes & GEN6_PS_DISPATCH_32) &&
                      kernel_32->pcb_attr_count));

   scratch_size = 0;
   if ((ff->dispatch_modes & GEN6_PS_DISPATCH_8) &&
       scratch_size < kernel_8->scratch_size)
      scratch_size = kernel_8->scratch_size;
   if ((ff->dispatch_modes & GEN6_PS_DISPATCH_16) &&
       scratch_size < kernel_16->scratch_size)
      scratch_size = kernel_16->scratch_size;
   if ((ff->dispatch_modes & GEN6_PS_DISPATCH_32) &&
       scratch_size < kernel_32->scratch_size)
      scratch_size = kernel_32->scratch_size;

   /* next power of two, starting from 1KB */
   ff->scratch_space = (scratch_size > 1024) ?
      (util_last_bit(scratch_size - 1) - 10): 0;

   /* GPU hangs on Haswell if none of the dispatch mode bits is set */
   if (ilo_dev_gen(dev) == ILO_GEN(7.5) && !ff->dispatch_modes)
      ff->dispatch_modes |= GEN6_PS_DISPATCH_8;

   return true;
}
Exemplo n.º 23
0
/**
 * Gather one element from scatter positions in memory.
 * Nearly the same as above, however the individual elements
 * may be vectors themselves, and fetches may be float type.
 * Can also do pad vector instead of ZExt.
 *
 * @sa lp_build_gather()
 */
static LLVMValueRef
lp_build_gather_elem_vec(struct gallivm_state *gallivm,
                         unsigned length,
                         unsigned src_width,
                         LLVMTypeRef src_type,
                         struct lp_type dst_type,
                         boolean aligned,
                         LLVMValueRef base_ptr,
                         LLVMValueRef offsets,
                         unsigned i,
                         boolean vector_justify)
{
   LLVMValueRef ptr, res;
   LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0);
   assert(LLVMTypeOf(base_ptr) == LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0));

   ptr = lp_build_gather_elem_ptr(gallivm, length, base_ptr, offsets, i);
   ptr = LLVMBuildBitCast(gallivm->builder, ptr, src_ptr_type, "");
   res = LLVMBuildLoad(gallivm->builder, ptr, "");

   /* XXX
    * On some archs we probably really want to avoid having to deal
    * with alignments lower than 4 bytes (if fetch size is a power of
    * two >= 32). On x86 it doesn't matter, however.
    * We should be able to guarantee full alignment for any kind of texture
    * fetch (except ARB_texture_buffer_range, oops), but not vertex fetch
    * (there's PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY and friends
    * but I don't think that's quite what we wanted).
    * For ARB_texture_buffer_range, PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT
    * looks like a good fit, but it seems this cap bit (and OpenGL) aren't
    * enforcing what we want (which is what d3d10 does, the offset needs to
    * be aligned to element size, but GL has bytes regardless of element
    * size which would only leave us with minimum alignment restriction of 16
    * which doesn't make much sense if the type isn't 4x32bit). Due to
    * translation of offsets to first_elem in sampler_views it actually seems
    * gallium could not do anything else except 16 no matter what...
    */
   if (!aligned) {
      LLVMSetAlignment(res, 1);
   } else if (!util_is_power_of_two(src_width)) {
      /*
       * Full alignment is impossible, assume the caller really meant
       * the individual elements were aligned (e.g. 3x32bit format).
       * And yes the generated code may otherwise crash, llvm will
       * really assume 128bit alignment with a 96bit fetch (I suppose
       * that makes sense as it can just assume the upper 32bit to be
       * whatever).
       * Maybe the caller should be able to explicitly set this, but
       * this should cover all the 3-channel formats.
       */
      if (((src_width / 24) * 24 == src_width) &&
           util_is_power_of_two(src_width / 24)) {
          LLVMSetAlignment(res, src_width / 24);
      } else {
         LLVMSetAlignment(res, 1);
      }
   }

   assert(src_width <= dst_type.width * dst_type.length);
   if (src_width < dst_type.width * dst_type.length) {
      if (dst_type.length > 1) {
         res = lp_build_pad_vector(gallivm, res, dst_type.length);
         /*
          * vector_justify hopefully a non-issue since we only deal
          * with src_width >= 32 here?
          */
      } else {
         LLVMTypeRef dst_elem_type = lp_build_vec_type(gallivm, dst_type);

         /*
          * Only valid if src_ptr_type is int type...
          */
         res = LLVMBuildZExt(gallivm->builder, res, dst_elem_type, "");

#ifdef PIPE_ARCH_BIG_ENDIAN
         if (vector_justify) {
         res = LLVMBuildShl(gallivm->builder, res,
                            LLVMConstInt(dst_elem_type,
                                         dst_type.width - src_width, 0), "");
         }
         if (src_width == 48) {
            /* Load 3x16 bit vector.
             * The sequence of loads on big-endian hardware proceeds as follows.
             * 16-bit fields are denoted by X, Y, Z, and 0.  In memory, the sequence
             * of three fields appears in the order X, Y, Z.
             *
             * Load 32-bit word: 0.0.X.Y
             * Load 16-bit halfword: 0.0.0.Z
             * Rotate left: 0.X.Y.0
             * Bitwise OR: 0.X.Y.Z
             *
             * The order in which we need the fields in the result is 0.Z.Y.X,
             * the same as on little-endian; permute 16-bit fields accordingly
             * within 64-bit register:
             */
            LLVMValueRef shuffles[4] = {
               lp_build_const_int32(gallivm, 2),
               lp_build_const_int32(gallivm, 1),
               lp_build_const_int32(gallivm, 0),
               lp_build_const_int32(gallivm, 3),
            };
            res = LLVMBuildBitCast(gallivm->builder, res,
                                   lp_build_vec_type(gallivm, lp_type_uint_vec(16, 4*16)), "");
            res = LLVMBuildShuffleVector(gallivm->builder, res, res, LLVMConstVector(shuffles, 4), "");
            res = LLVMBuildBitCast(gallivm->builder, res, dst_elem_type, "");
         }
#endif
      }
   }
   return res;
}
Exemplo n.º 24
0
/**
 * Gather one element from scatter positions in memory.
 *
 * @sa lp_build_gather()
 */
LLVMValueRef
lp_build_gather_elem(struct gallivm_state *gallivm,
                     unsigned length,
                     unsigned src_width,
                     unsigned dst_width,
                     boolean aligned,
                     LLVMValueRef base_ptr,
                     LLVMValueRef offsets,
                     unsigned i,
                     boolean vector_justify)
{
   LLVMTypeRef src_type = LLVMIntTypeInContext(gallivm->context, src_width);
   LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0);
   LLVMTypeRef dst_elem_type = LLVMIntTypeInContext(gallivm->context, dst_width);
   LLVMValueRef ptr;
   LLVMValueRef res;

   assert(LLVMTypeOf(base_ptr) == LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0));

   ptr = lp_build_gather_elem_ptr(gallivm, length, base_ptr, offsets, i);
   ptr = LLVMBuildBitCast(gallivm->builder, ptr, src_ptr_type, "");
   res = LLVMBuildLoad(gallivm->builder, ptr, "");

   /* XXX
    * On some archs we probably really want to avoid having to deal
    * with alignments lower than 4 bytes (if fetch size is a power of
    * two >= 32). On x86 it doesn't matter, however.
    * We should be able to guarantee full alignment for any kind of texture
    * fetch (except ARB_texture_buffer_range, oops), but not vertex fetch
    * (there's PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY and friends
    * but I don't think that's quite what we wanted).
    * For ARB_texture_buffer_range, PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT
    * looks like a good fit, but it seems this cap bit (and OpenGL) aren't
    * enforcing what we want (which is what d3d10 does, the offset needs to
    * be aligned to element size, but GL has bytes regardless of element
    * size which would only leave us with minimum alignment restriction of 16
    * which doesn't make much sense if the type isn't 4x32bit). Due to
    * translation of offsets to first_elem in sampler_views it actually seems
    * gallium could not do anything else except 16 no matter what...
    */
   if (!aligned) {
      LLVMSetAlignment(res, 1);
   } else if (!util_is_power_of_two(src_width)) {
      /*
       * Full alignment is impossible, assume the caller really meant
       * the individual elements were aligned (e.g. 3x32bit format).
       * And yes the generated code may otherwise crash, llvm will
       * really assume 128bit alignment with a 96bit fetch (I suppose
       * that makes sense as it can just assume the upper 32bit to be
       * whatever).
       * Maybe the caller should be able to explicitly set this, but
       * this should cover all the 3-channel formats.
       */
      if (((src_width / 24) * 24 == src_width) &&
           util_is_power_of_two(src_width / 24)) {
          LLVMSetAlignment(res, src_width / 24);
      } else {
         LLVMSetAlignment(res, 1);
      }
   }

   assert(src_width <= dst_width);
   if (src_width < dst_width) {
      res = LLVMBuildZExt(gallivm->builder, res, dst_elem_type, "");
      if (vector_justify) {
#ifdef PIPE_ARCH_BIG_ENDIAN
         res = LLVMBuildShl(gallivm->builder, res,
                            LLVMConstInt(dst_elem_type, dst_width - src_width, 0), "");
#endif
      }
   }

   return res;
}
Exemplo n.º 25
0
/**
 * Fetch a pixel into a 4 float AoS.
 *
 * \param format_desc  describes format of the image we're fetching from
 * \param ptr  address of the pixel block (or the texel if uncompressed)
 * \param i, j  the sub-block pixel coordinates.  For non-compressed formats
 *              these will always be (0, 0).
 * \return  a 4 element vector with the pixel's RGBA values.
 */
LLVMValueRef
lp_build_fetch_rgba_aos(struct gallivm_state *gallivm,
                        const struct util_format_description *format_desc,
                        struct lp_type type,
                        LLVMValueRef base_ptr,
                        LLVMValueRef offset,
                        LLVMValueRef i,
                        LLVMValueRef j)
{
   LLVMBuilderRef builder = gallivm->builder;
   unsigned num_pixels = type.length / 4;
   struct lp_build_context bld;

   assert(type.length <= LP_MAX_VECTOR_LENGTH);
   assert(type.length % 4 == 0);

   lp_build_context_init(&bld, gallivm, type);

   /*
    * Trivial case
    *
    * The format matches the type (apart of a swizzle) so no need for
    * scaling or converting.
    */

   if (format_matches_type(format_desc, type) &&
       format_desc->block.bits <= type.width * 4 &&
       util_is_power_of_two(format_desc->block.bits)) {
      LLVMValueRef packed;

      /*
       * The format matches the type (apart of a swizzle) so no need for
       * scaling or converting.
       */

      packed = lp_build_gather(gallivm, type.length/4,
                               format_desc->block.bits, type.width*4,
                               base_ptr, offset);

      assert(format_desc->block.bits <= type.width * type.length);

      packed = LLVMBuildBitCast(gallivm->builder, packed,
                                lp_build_vec_type(gallivm, type), "");

      return lp_build_format_swizzle_aos(format_desc, &bld, packed);
   }

   /*
    * Bit arithmetic
    */

   if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN &&
       (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB ||
        format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) &&
       format_desc->block.width == 1 &&
       format_desc->block.height == 1 &&
       util_is_power_of_two(format_desc->block.bits) &&
       format_desc->block.bits <= 32 &&
       format_desc->is_bitmask &&
       !format_desc->is_mixed &&
       (format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED ||
        format_desc->channel[1].type == UTIL_FORMAT_TYPE_UNSIGNED)) {

      LLVMValueRef tmps[LP_MAX_VECTOR_LENGTH/4];
      LLVMValueRef res;
      unsigned k;

      /*
       * Unpack a pixel at a time into a <4 x float> RGBA vector
       */

      for (k = 0; k < num_pixels; ++k) {
         LLVMValueRef packed;

         packed = lp_build_gather_elem(gallivm, num_pixels,
                                       format_desc->block.bits, 32,
                                       base_ptr, offset, k);

         tmps[k] = lp_build_unpack_arith_rgba_aos(gallivm,
                                                  format_desc,
                                                  packed);
      }

      /*
       * Type conversion.
       *
       * TODO: We could avoid floating conversion for integer to
       * integer conversions.
       */

      if (gallivm_debug & GALLIVM_DEBUG_PERF && !type.floating) {
         debug_printf("%s: unpacking %s with floating point\n",
                      __FUNCTION__, format_desc->short_name);
      }

      lp_build_conv(gallivm,
                    lp_float32_vec4_type(),
                    type,
                    tmps, num_pixels, &res, 1);

      return lp_build_format_swizzle_aos(format_desc, &bld, res);
   }

   /*
    * YUV / subsampled formats
    */

   if (format_desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) {
      struct lp_type tmp_type;
      LLVMValueRef tmp;

      memset(&tmp_type, 0, sizeof tmp_type);
      tmp_type.width = 8;
      tmp_type.length = num_pixels * 4;
      tmp_type.norm = TRUE;

      tmp = lp_build_fetch_subsampled_rgba_aos(gallivm,
                                               format_desc,
                                               num_pixels,
                                               base_ptr,
                                               offset,
                                               i, j);

      lp_build_conv(gallivm,
                    tmp_type, type,
                    &tmp, 1, &tmp, 1);

      return tmp;
   }

   /*
    * Fallback to util_format_description::fetch_rgba_8unorm().
    */

   if (format_desc->fetch_rgba_8unorm &&
       !type.floating && type.width == 8 && !type.sign && type.norm) {
      /*
       * Fallback to calling util_format_description::fetch_rgba_8unorm.
       *
       * This is definitely not the most efficient way of fetching pixels, as
       * we miss the opportunity to do vectorization, but this it is a
       * convenient for formats or scenarios for which there was no opportunity
       * or incentive to optimize.
       */

      LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder)));
      char name[256];
      LLVMTypeRef i8t = LLVMInt8TypeInContext(gallivm->context);
      LLVMTypeRef pi8t = LLVMPointerType(i8t, 0);
      LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
      LLVMValueRef function;
      LLVMValueRef tmp_ptr;
      LLVMValueRef tmp;
      LLVMValueRef res;
      LLVMValueRef callee;
      unsigned k;

      util_snprintf(name, sizeof name, "util_format_%s_fetch_rgba_8unorm",
                    format_desc->short_name);

      if (gallivm_debug & GALLIVM_DEBUG_PERF) {
         debug_printf("%s: falling back to %s\n", __FUNCTION__, name);
      }

      /*
       * Declare and bind format_desc->fetch_rgba_8unorm().
       */

      function = LLVMGetNamedFunction(module, name);
      if (!function) {
         /*
          * Function to call looks like:
          *   fetch(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j)
          */
         LLVMTypeRef ret_type;
         LLVMTypeRef arg_types[4];
         LLVMTypeRef function_type;

         ret_type = LLVMVoidTypeInContext(gallivm->context);
         arg_types[0] = pi8t;
         arg_types[1] = pi8t;
         arg_types[2] = i32t;
         arg_types[3] = i32t;
         function_type = LLVMFunctionType(ret_type, arg_types,
                                          Elements(arg_types), 0);
         function = LLVMAddFunction(module, name, function_type);

         LLVMSetFunctionCallConv(function, LLVMCCallConv);
         LLVMSetLinkage(function, LLVMExternalLinkage);

         assert(LLVMIsDeclaration(function));
      }

      /* make const pointer for the C fetch_rgba_float function */
      callee = lp_build_const_int_pointer(gallivm,
         func_to_pointer((func_pointer) format_desc->fetch_rgba_8unorm));

      /* cast the callee pointer to the function's type */
      function = LLVMBuildBitCast(builder, callee,
                                  LLVMTypeOf(function), "cast callee");

      tmp_ptr = lp_build_alloca(gallivm, i32t, "");

      res = LLVMGetUndef(LLVMVectorType(i32t, num_pixels));

      /*
       * Invoke format_desc->fetch_rgba_8unorm() for each pixel and insert the result
       * in the SoA vectors.
       */

      for (k = 0; k < num_pixels; ++k) {
         LLVMValueRef index = lp_build_const_int32(gallivm, k);
         LLVMValueRef args[4];

         args[0] = LLVMBuildBitCast(builder, tmp_ptr, pi8t, "");
         args[1] = lp_build_gather_elem_ptr(gallivm, num_pixels,
                                            base_ptr, offset, k);

         if (num_pixels == 1) {
            args[2] = i;
            args[3] = j;
         }
         else {
            args[2] = LLVMBuildExtractElement(builder, i, index, "");
            args[3] = LLVMBuildExtractElement(builder, j, index, "");
         }

         LLVMBuildCall(builder, function, args, Elements(args), "");

         tmp = LLVMBuildLoad(builder, tmp_ptr, "");

         if (num_pixels == 1) {
            res = tmp;
         }
         else {
            res = LLVMBuildInsertElement(builder, res, tmp, index, "");
         }
      }

      /* Bitcast from <n x i32> to <4n x i8> */
      res = LLVMBuildBitCast(builder, res, bld.vec_type, "");

      return res;
   }


   /*
    * Fallback to util_format_description::fetch_rgba_float().
    */

   if (format_desc->fetch_rgba_float) {
      /*
       * Fallback to calling util_format_description::fetch_rgba_float.
       *
       * This is definitely not the most efficient way of fetching pixels, as
       * we miss the opportunity to do vectorization, but this it is a
       * convenient for formats or scenarios for which there was no opportunity
       * or incentive to optimize.
       */

      LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder)));
      char name[256];
      LLVMTypeRef f32t = LLVMFloatTypeInContext(gallivm->context);
      LLVMTypeRef f32x4t = LLVMVectorType(f32t, 4);
      LLVMTypeRef pf32t = LLVMPointerType(f32t, 0);
      LLVMTypeRef pi8t = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0);
      LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
      LLVMValueRef function;
      LLVMValueRef tmp_ptr;
      LLVMValueRef tmps[LP_MAX_VECTOR_LENGTH/4];
      LLVMValueRef res;
      LLVMValueRef callee;
      unsigned k;

      util_snprintf(name, sizeof name, "util_format_%s_fetch_rgba_float",
                    format_desc->short_name);

      if (gallivm_debug & GALLIVM_DEBUG_PERF) {
         debug_printf("%s: falling back to %s\n", __FUNCTION__, name);
      }

      /*
       * Declare and bind format_desc->fetch_rgba_float().
       */

      function = LLVMGetNamedFunction(module, name);
      if (!function) {
         /*
          * Function to call looks like:
          *   fetch(float *dst, const uint8_t *src, unsigned i, unsigned j)
          */
         LLVMTypeRef ret_type;
         LLVMTypeRef arg_types[4];
         LLVMTypeRef function_type;

         ret_type = LLVMVoidTypeInContext(gallivm->context);
         arg_types[0] = pf32t;
         arg_types[1] = pi8t;
         arg_types[2] = i32t;
         arg_types[3] = i32t;
         function_type = LLVMFunctionType(ret_type, arg_types,
                                          Elements(arg_types), 0);
         function = LLVMAddFunction(module, name, function_type);

         LLVMSetFunctionCallConv(function, LLVMCCallConv);
         LLVMSetLinkage(function, LLVMExternalLinkage);

         assert(LLVMIsDeclaration(function));
      }

      /* Note: we're using this casting here instead of LLVMAddGlobalMapping()
       * to work around a bug in LLVM 2.6.
       */

      /* make const pointer for the C fetch_rgba_float function */
      callee = lp_build_const_int_pointer(gallivm,
         func_to_pointer((func_pointer) format_desc->fetch_rgba_float));

      /* cast the callee pointer to the function's type */
      function = LLVMBuildBitCast(builder, callee,
                                  LLVMTypeOf(function), "cast callee");


      tmp_ptr = lp_build_alloca(gallivm, f32x4t, "");

      /*
       * Invoke format_desc->fetch_rgba_float() for each pixel and insert the result
       * in the SoA vectors.
       */

      for (k = 0; k < num_pixels; ++k) {
         LLVMValueRef args[4];

         args[0] = LLVMBuildBitCast(builder, tmp_ptr, pf32t, "");
         args[1] = lp_build_gather_elem_ptr(gallivm, num_pixels,
                                            base_ptr, offset, k);

         if (num_pixels == 1) {
            args[2] = i;
            args[3] = j;
         }
         else {
            LLVMValueRef index = lp_build_const_int32(gallivm, k);
            args[2] = LLVMBuildExtractElement(builder, i, index, "");
            args[3] = LLVMBuildExtractElement(builder, j, index, "");
         }

         LLVMBuildCall(builder, function, args, Elements(args), "");

         tmps[k] = LLVMBuildLoad(builder, tmp_ptr, "");
      }

      lp_build_conv(gallivm,
                    lp_float32_vec4_type(),
                    type,
                    tmps, num_pixels, &res, 1);

      return res;
   }

   assert(0);
   return lp_build_undef(gallivm, type);
}
Exemplo n.º 26
0
static void r300_setup_flags(struct r300_texture* tex)
{
    tex->is_npot = !util_is_power_of_two(tex->tex.width0) ||
                   !util_is_power_of_two(tex->tex.height0);
}