Beispiel #1
0
VkResult intel_query_create(struct intel_dev *dev,
                            const VkQueryPoolCreateInfo *info,
                            struct intel_query **query_ret)
{
    struct intel_query *query;

    query = (struct intel_query *) intel_base_create(&dev->base.handle,
            sizeof(*query), dev->base.dbg, VK_DEBUG_REPORT_OBJECT_TYPE_QUEUE_EXT,
            info, 0);
    if (!query)
        return VK_ERROR_OUT_OF_HOST_MEMORY;

    query->type = info->queryType;
    query->slot_count = info->queryCount;

    /*
     * For each query type, the GPU will be asked to write the values of some
     * registers to a buffer before and after a sequence of commands.  We will
     * compare the differences to get the query results.
     */
    switch (info->queryType) {
    case VK_QUERY_TYPE_OCCLUSION:
        query->slot_stride = u_align(sizeof(uint64_t) * 2, 64);
        break;
    case VK_QUERY_TYPE_PIPELINE_STATISTICS:
        query_init_pipeline_statistics(dev, info, query);
        break;
    case VK_QUERY_TYPE_TIMESTAMP:
        query->slot_stride = u_align(sizeof(uint64_t), 64);
        break;
    default:
        assert(!"unknown query type");
        break;
    }

    VkMemoryAllocateInfo mem_reqs;
    mem_reqs.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
    mem_reqs.allocationSize = query->slot_stride * query->slot_count;
    mem_reqs.pNext = NULL;
    mem_reqs.memoryTypeIndex = 0;
    intel_mem_alloc(dev, &mem_reqs, &query->obj.mem);

    query->obj.destroy = query_destroy;

    *query_ret = query;

    return VK_SUCCESS;
}
Beispiel #2
0
/**
 * Grow a mapped writer to at least \p new_size.  Failures are handled
 * silently.
 */
void cmd_writer_grow(struct intel_cmd *cmd,
                     enum intel_cmd_writer_type which,
                     size_t new_size)
{
    struct intel_cmd_writer *writer = &cmd->writers[which];
    struct intel_bo *new_bo;
    void *new_ptr;

    if (new_size < writer->size << 1)
        new_size = writer->size << 1;
    /* STATE_BASE_ADDRESS requires page-aligned buffers */
    new_size = u_align(new_size, 4096);

    new_bo = alloc_writer_bo(cmd->dev->winsys, which, new_size);
    if (!new_bo) {
        cmd_writer_discard(cmd, which);
        cmd_fail(cmd, VK_ERROR_OUT_OF_DEVICE_MEMORY);
        return;
    }

    /* map and copy the data over */
    new_ptr = intel_bo_map(new_bo, true);
    if (!new_ptr) {
        intel_bo_unref(new_bo);
        cmd_writer_discard(cmd, which);
        cmd_fail(cmd, VK_ERROR_VALIDATION_FAILED_EXT);
        return;
    }

    memcpy(new_ptr, writer->ptr, writer->used);

    intel_bo_unmap(writer->bo);
    intel_bo_unref(writer->bo);

    writer->size = new_size;
    writer->bo = new_bo;
    writer->ptr = new_ptr;
}
Beispiel #3
0
static void query_init_pipeline_statistics(
        struct intel_dev *dev,
        const VkQueryPoolCreateInfo *info,
        struct intel_query *query)
{
    /*
     * Note: order defined by Vulkan spec.
     */
    const uint32_t regs[][2] = {
        {VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_PRIMITIVES_BIT, GEN6_REG_IA_PRIMITIVES_COUNT},
        {VK_QUERY_PIPELINE_STATISTIC_VERTEX_SHADER_INVOCATIONS_BIT, GEN6_REG_VS_INVOCATION_COUNT},
        {VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_INVOCATIONS_BIT, GEN6_REG_GS_INVOCATION_COUNT},
        {VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT, GEN6_REG_GS_PRIMITIVES_COUNT},
        {VK_QUERY_PIPELINE_STATISTIC_CLIPPING_INVOCATIONS_BIT, GEN6_REG_CL_INVOCATION_COUNT},
        {VK_QUERY_PIPELINE_STATISTIC_CLIPPING_PRIMITIVES_BIT, GEN6_REG_CL_PRIMITIVES_COUNT},
        {VK_QUERY_PIPELINE_STATISTIC_FRAGMENT_SHADER_INVOCATIONS_BIT, GEN6_REG_PS_INVOCATION_COUNT},
        {VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_CONTROL_SHADER_PATCHES_BIT, (intel_gpu_gen(dev->gpu) >= INTEL_GEN(7)) ? GEN7_REG_HS_INVOCATION_COUNT : 0},
        {VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_EVALUATION_SHADER_INVOCATIONS_BIT, (intel_gpu_gen(dev->gpu) >= INTEL_GEN(7)) ? GEN7_REG_DS_INVOCATION_COUNT : 0},
        {VK_QUERY_PIPELINE_STATISTIC_COMPUTE_SHADER_INVOCATIONS_BIT, 0}
    };
    STATIC_ASSERT(ARRAY_SIZE(regs) < 32);
    uint32_t i;
    uint32_t reg_count = 0;

    /*
     * Only query registers indicated via pipeline statistics flags.
     * If HW does not support a flag, fill value with 0.
     */
    for (i=0; i < ARRAY_SIZE(regs); i++) {
        if ((regs[i][0] & info->pipelineStatistics)) {
            query->regs[reg_count] = regs[i][1];
            reg_count++;
        }
    }

    query->reg_count = reg_count;
    query->slot_stride = u_align(reg_count * sizeof(uint64_t) * 2, 64);
}
Beispiel #4
0
static VkResult buf_get_memory_requirements(struct intel_base *base,
                               VkMemoryRequirements *pRequirements)
{
    struct intel_buf *buf = intel_buf_from_base(base);

    /*
     * From the Sandy Bridge PRM, volume 1 part 1, page 118:
     *
     *     "For buffers, which have no inherent "height," padding
     *      requirements are different. A buffer must be padded to the
     *      next multiple of 256 array elements, with an additional 16
     *      bytes added beyond that to account for the L1 cache line."
    */
    pRequirements->size = buf->size;
    if (buf->usage & (VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT |
                      VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT)) {
        pRequirements->size = u_align(pRequirements->size, 256) + 16;
    }

    pRequirements->alignment      = 4096;
    pRequirements->memoryTypeBits = (1 << INTEL_MEMORY_TYPE_COUNT) - 1;

    return VK_SUCCESS;
}
Beispiel #5
0
VkResult intel_img_create(struct intel_dev *dev,
                          const VkImageCreateInfo *info,
                          const VkAllocationCallbacks *allocator,
                          bool scanout,
                          struct intel_img **img_ret)
{
    struct intel_img *img;
    struct intel_layout *layout;

    img = (struct intel_img *) intel_base_create(&dev->base.handle,
            sizeof(*img), dev->base.dbg, VK_DEBUG_REPORT_OBJECT_TYPE_IMAGE_EXT, info, 0);
    if (!img)
        return VK_ERROR_OUT_OF_HOST_MEMORY;

    layout = &img->layout;

    img->type = info->imageType;
    img->depth = info->extent.depth;
    img->mip_levels = info->mipLevels;
    img->array_size = info->arrayLayers;
    img->usage = info->usage;
    img->sample_count = (uint32_t) info->samples;
    intel_layout_init(layout, dev, info, scanout);

    img->total_size = img->layout.bo_stride * img->layout.bo_height;

    if (layout->aux != INTEL_LAYOUT_AUX_NONE) {
        img->aux_offset = u_align(img->total_size, 4096);
        img->total_size = img->aux_offset +
            layout->aux_stride * layout->aux_height;
    }

    if (layout->separate_stencil) {
        VkImageCreateInfo s8_info;

        img->s8_layout = intel_alloc(img, sizeof(*img->s8_layout), sizeof(int),
                VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
        if (!img->s8_layout) {
            intel_img_destroy(img);
            return VK_ERROR_OUT_OF_HOST_MEMORY;
        }

        s8_info = *info;
        s8_info.format = VK_FORMAT_S8_UINT;
        /* no stencil texturing */
        s8_info.usage &= ~VK_IMAGE_USAGE_SAMPLED_BIT;
        assert(icd_format_is_ds(info->format));

        intel_layout_init(img->s8_layout, dev, &s8_info, scanout);

        img->s8_offset = u_align(img->total_size, 4096);
        img->total_size = img->s8_offset +
            img->s8_layout->bo_stride * img->s8_layout->bo_height;
    }

    if (scanout) {
        VkResult ret = intel_wsi_img_init(img);
        if (ret != VK_SUCCESS) {
            intel_img_destroy(img);
            return ret;
        }
    }

    img->obj.destroy = img_destroy;
    img->obj.base.get_memory_requirements = img_get_memory_requirements;

    *img_ret = img;

    return VK_SUCCESS;
}
Beispiel #6
0
static void
layout_calculate_hiz_size(struct intel_layout *layout,
                          struct intel_layout_params *params)
{
   const VkImageCreateInfo *info = params->info;
   const unsigned hz_align_j = 8;
   enum intel_layout_walk_type hz_walk;
   unsigned hz_width, hz_height, lv;
   unsigned hz_clear_w, hz_clear_h;

   assert(layout->aux == INTEL_LAYOUT_AUX_HIZ);

   assert(layout->walk == INTEL_LAYOUT_WALK_LAYER ||
          layout->walk == INTEL_LAYOUT_WALK_3D);

   /*
    * From the Sandy Bridge PRM, volume 2 part 1, page 312:
    *
    *     "The hierarchical depth buffer does not support the LOD field, it is
    *      assumed by hardware to be zero. A separate hierarachical depth
    *      buffer is required for each LOD used, and the corresponding
    *      buffer's state delivered to hardware each time a new depth buffer
    *      state with modified LOD is delivered."
    *
    * We will put all LODs in a single bo with INTEL_LAYOUT_WALK_LOD.
    */
   if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7))
      hz_walk = layout->walk;
   else
      hz_walk = INTEL_LAYOUT_WALK_LOD;

   /*
    * See the Sandy Bridge PRM, volume 2 part 1, page 312, and the Ivy Bridge
    * PRM, volume 2 part 1, page 312-313.
    *
    * It seems HiZ buffer is aligned to 8x8, with every two rows packed into a
    * memory row.
    */
   switch (hz_walk) {
   case INTEL_LAYOUT_WALK_LOD:
      {
         unsigned lod_tx[INTEL_LAYOUT_MAX_LEVELS];
         unsigned lod_ty[INTEL_LAYOUT_MAX_LEVELS];
         unsigned cur_tx, cur_ty;

         /* figure out the tile offsets of LODs */
         hz_width = 0;
         hz_height = 0;
         cur_tx = 0;
         cur_ty = 0;
         for (lv = 0; lv < info->mipLevels; lv++) {
            unsigned tw, th;

            lod_tx[lv] = cur_tx;
            lod_ty[lv] = cur_ty;

            tw = u_align(layout->lods[lv].slice_width, 16);
            th = u_align(layout->lods[lv].slice_height, hz_align_j) *
               info->arrayLayers / 2;
            /* convert to Y-tiles */
            tw = u_align(tw, 128) / 128;
            th = u_align(th, 32) / 32;

            if (hz_width < cur_tx + tw)
               hz_width = cur_tx + tw;
            if (hz_height < cur_ty + th)
               hz_height = cur_ty + th;

            if (lv == 1)
               cur_tx += tw;
            else
               cur_ty += th;
         }

         /* convert tile offsets to memory offsets */
         for (lv = 0; lv < info->mipLevels; lv++) {
            layout->aux_offsets[lv] =
               (lod_ty[lv] * hz_width + lod_tx[lv]) * 4096;
         }
         hz_width *= 128;
         hz_height *= 32;
      }
      break;
   case INTEL_LAYOUT_WALK_LAYER:
      {
         const unsigned h0 = u_align(params->h0, hz_align_j);
         const unsigned h1 = u_align(params->h1, hz_align_j);
         const unsigned htail =
            ((intel_gpu_gen(params->gpu) >= INTEL_GEN(7)) ? 12 : 11) * hz_align_j;
         const unsigned hz_qpitch = h0 + h1 + htail;

         hz_width = u_align(layout->lods[0].slice_width, 16);

         hz_height = hz_qpitch * info->arrayLayers / 2;
         if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7))
            hz_height = u_align(hz_height, 8);

         layout->aux_layer_height = hz_qpitch;
      }
      break;
   case INTEL_LAYOUT_WALK_3D:
      hz_width = u_align(layout->lods[0].slice_width, 16);

      hz_height = 0;
      for (lv = 0; lv < info->mipLevels; lv++) {
         const unsigned h = u_align(layout->lods[lv].slice_height, hz_align_j);
         /* according to the formula, slices are packed together vertically */
         hz_height += h * u_minify(info->extent.depth, lv);
      }
      hz_height /= 2;
      break;
   default:
      assert(!"unknown layout walk");
      hz_width = 0;
      hz_height = 0;
      break;
   }

   /*
    * In hiz_align_fb(), we will align the LODs to 8x4 sample blocks.
    * Experiments on Haswell show that aligning the RECTLIST primitive and
    * 3DSTATE_DRAWING_RECTANGLE alone are not enough.  The LOD sizes must be
    * aligned.
    */
   hz_clear_w = 8;
   hz_clear_h = 4;
   switch (info->samples) {
   case VK_SAMPLE_COUNT_1_BIT:
   default:
      break;
   case VK_SAMPLE_COUNT_2_BIT:
      hz_clear_w /= 2;
      break;
   case VK_SAMPLE_COUNT_4_BIT:
      hz_clear_w /= 2;
      hz_clear_h /= 2;
      break;
   case VK_SAMPLE_COUNT_8_BIT:
      hz_clear_w /= 4;
      hz_clear_h /= 2;
      break;
   case VK_SAMPLE_COUNT_16_BIT:
      hz_clear_w /= 4;
      hz_clear_h /= 4;
      break;
   }

   for (lv = 0; lv < info->mipLevels; lv++) {
      if (u_minify(layout->width0, lv) % hz_clear_w ||
          u_minify(layout->height0, lv) % hz_clear_h)
         break;
      layout->aux_enables |= 1 << lv;
   }

   /* we padded to allow this in layout_align() */
   if (info->mipLevels == 1 && info->arrayLayers == 1 && info->extent.depth == 1)
      layout->aux_enables |= 0x1;

   /* align to Y-tile */
   layout->aux_stride = u_align(hz_width, 128);
   layout->aux_height = u_align(hz_height, 32);
}
Beispiel #7
0
/* note that this may force the texture to be linear */
static void
layout_calculate_bo_size(struct intel_layout *layout,
                         struct intel_layout_params *params)
{
   assert(params->max_x % layout->block_width == 0);
   assert(params->max_y % layout->block_height == 0);
   assert(layout->layer_height % layout->block_height == 0);

   layout->bo_stride =
      (params->max_x / layout->block_width) * layout->block_size;
   layout->bo_height = params->max_y / layout->block_height;

   while (true) {
      unsigned w = layout->bo_stride, h = layout->bo_height;
      unsigned align_w, align_h;

      /*
       * From the Haswell PRM, volume 5, page 163:
       *
       *     "For linear surfaces, additional padding of 64 bytes is required
       *      at the bottom of the surface. This is in addition to the padding
       *      required above."
       */
      if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7.5) &&
          (params->info->usage & VK_IMAGE_USAGE_SAMPLED_BIT) &&
          layout->tiling == GEN6_TILING_NONE)
         h += (64 + layout->bo_stride - 1) / layout->bo_stride;

      /*
       * From the Sandy Bridge PRM, volume 4 part 1, page 81:
       *
       *     "- For linear render target surfaces, the pitch must be a
       *        multiple of the element size for non-YUV surface formats.
       *        Pitch must be a multiple of 2 * element size for YUV surface
       *        formats.
       *      - For other linear surfaces, the pitch can be any multiple of
       *        bytes.
       *      - For tiled surfaces, the pitch must be a multiple of the tile
       *        width."
       *
       * Different requirements may exist when the bo is used in different
       * places, but our alignments here should be good enough that we do not
       * need to check layout->info->usage.
       */
      switch (layout->tiling) {
      case GEN6_TILING_X:
         align_w = 512;
         align_h = 8;
         break;
      case GEN6_TILING_Y:
         align_w = 128;
         align_h = 32;
         break;
      case GEN8_TILING_W:
         /*
          * From the Sandy Bridge PRM, volume 1 part 2, page 22:
          *
          *     "A 4KB tile is subdivided into 8-high by 8-wide array of
          *      Blocks for W-Major Tiles (W Tiles). Each Block is 8 rows by 8
          *      bytes."
          */
         align_w = 64;
         align_h = 64;
         break;
      default:
         assert(layout->tiling == GEN6_TILING_NONE);
         /* some good enough values */
         align_w = 64;
         align_h = 2;
         break;
      }

      w = u_align(w, align_w);
      h = u_align(h, align_h);

      /* make sure the bo is mappable */
      if (layout->tiling != GEN6_TILING_NONE) {
         /*
          * Usually only the first 256MB of the GTT is mappable.
          *
          * See also how intel_context::max_gtt_map_object_size is calculated.
          */
         const size_t mappable_gtt_size = 256 * 1024 * 1024;

         /*
          * Be conservative.  We may be able to switch from VALIGN_4 to
          * VALIGN_2 if the layout was Y-tiled, but let's keep it simple.
          */
         if (mappable_gtt_size / w / 4 < h) {
            if (layout->valid_tilings & LAYOUT_TILING_NONE) {
               layout->tiling = GEN6_TILING_NONE;
               /* MCS support for non-MSRTs is limited to tiled RTs */
               if (layout->aux == INTEL_LAYOUT_AUX_MCS &&
                   params->info->samples == VK_SAMPLE_COUNT_1_BIT)
                  layout->aux = INTEL_LAYOUT_AUX_NONE;

               continue;
            } else {
               /* mapping will fail */
            }
         }
      }

      layout->bo_stride = w;
      layout->bo_height = h;
      break;
   }
}
Beispiel #8
0
static void
layout_align(struct intel_layout *layout, struct intel_layout_params *params)
{
   const VkImageCreateInfo *info = params->info;
   int align_w = 1, align_h = 1, pad_h = 0;

   /*
    * From the Sandy Bridge PRM, volume 1 part 1, page 118:
    *
    *     "To determine the necessary padding on the bottom and right side of
    *      the surface, refer to the table in Section 7.18.3.4 for the i and j
    *      parameters for the surface format in use. The surface must then be
    *      extended to the next multiple of the alignment unit size in each
    *      dimension, and all texels contained in this extended surface must
    *      have valid GTT entries."
    *
    *     "For cube surfaces, an additional two rows of padding are required
    *      at the bottom of the surface. This must be ensured regardless of
    *      whether the surface is stored tiled or linear.  This is due to the
    *      potential rotation of cache line orientation from memory to cache."
    *
    *     "For compressed textures (BC* and FXT1 surface formats), padding at
    *      the bottom of the surface is to an even compressed row, which is
    *      equal to a multiple of 8 uncompressed texel rows. Thus, for padding
    *      purposes, these surfaces behave as if j = 8 only for surface
    *      padding purposes. The value of 4 for j still applies for mip level
    *      alignment and QPitch calculation."
    */
   if (info->usage & VK_IMAGE_USAGE_SAMPLED_BIT) {
      if (align_w < layout->align_i)
          align_w = layout->align_i;
      if (align_h < layout->align_j)
          align_h = layout->align_j;

      /* in case it is used as a cube */
      if (info->imageType == VK_IMAGE_TYPE_2D)
         pad_h += 2;

      if (params->compressed && align_h < layout->align_j * 2)
         align_h = layout->align_j * 2;
   }

   /*
    * From the Sandy Bridge PRM, volume 1 part 1, page 118:
    *
    *     "If the surface contains an odd number of rows of data, a final row
    *      below the surface must be allocated."
    */
   if ((info->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) && align_h < 2)
      align_h = 2;

   /*
    * Depth Buffer Clear/Resolve works in 8x4 sample blocks.  In
    * intel_texture_can_enable_hiz(), we always return true for the first slice.
    * To avoid out-of-bound access, we have to pad.
    */
   if (layout->aux == INTEL_LAYOUT_AUX_HIZ &&
       info->mipLevels == 1 &&
       info->arrayLayers == 1 &&
       info->extent.depth == 1) {
      if (align_w < 8)
          align_w = 8;
      if (align_h < 4)
          align_h = 4;
   }

   params->max_x = u_align(params->max_x, align_w);
   params->max_y = u_align(params->max_y + pad_h, align_h);
}
Beispiel #9
0
static void
layout_get_slice_size(const struct intel_layout *layout,
                      const struct intel_layout_params *params,
                      unsigned level, unsigned *width, unsigned *height)
{
   const VkImageCreateInfo *info = params->info;
   unsigned w, h;

   w = u_minify(layout->width0, level);
   h = u_minify(layout->height0, level);

   /*
    * From the Sandy Bridge PRM, volume 1 part 1, page 114:
    *
    *     "The dimensions of the mip maps are first determined by applying the
    *      sizing algorithm presented in Non-Power-of-Two Mipmaps above. Then,
    *      if necessary, they are padded out to compression block boundaries."
    */
   w = u_align(w, layout->block_width);
   h = u_align(h, layout->block_height);

   /*
    * From the Sandy Bridge PRM, volume 1 part 1, page 111:
    *
    *     "If the surface is multisampled (4x), these values must be adjusted
    *      as follows before proceeding:
    *
    *        W_L = ceiling(W_L / 2) * 4
    *        H_L = ceiling(H_L / 2) * 4"
    *
    * From the Ivy Bridge PRM, volume 1 part 1, page 108:
    *
    *     "If the surface is multisampled and it is a depth or stencil surface
    *      or Multisampled Surface StorageFormat in SURFACE_STATE is
    *      MSFMT_DEPTH_STENCIL, W_L and H_L must be adjusted as follows before
    *      proceeding:
    *
    *        #samples  W_L =                    H_L =
    *        2         ceiling(W_L / 2) * 4     HL [no adjustment]
    *        4         ceiling(W_L / 2) * 4     ceiling(H_L / 2) * 4
    *        8         ceiling(W_L / 2) * 8     ceiling(H_L / 2) * 4
    *        16        ceiling(W_L / 2) * 8     ceiling(H_L / 2) * 8"
    *
    * For interleaved samples (4x), where pixels
    *
    *   (x, y  ) (x+1, y  )
    *   (x, y+1) (x+1, y+1)
    *
    * would be is occupied by
    *
    *   (x, y  , si0) (x+1, y  , si0) (x, y  , si1) (x+1, y  , si1)
    *   (x, y+1, si0) (x+1, y+1, si0) (x, y+1, si1) (x+1, y+1, si1)
    *   (x, y  , si2) (x+1, y  , si2) (x, y  , si3) (x+1, y  , si3)
    *   (x, y+1, si2) (x+1, y+1, si2) (x, y+1, si3) (x+1, y+1, si3)
    *
    * Thus the need to
    *
    *   w = align(w, 2) * 2;
    *   y = align(y, 2) * 2;
    */
   if (layout->interleaved_samples) {
      switch (info->samples) {
      case VK_SAMPLE_COUNT_1_BIT:
         break;
      case VK_SAMPLE_COUNT_2_BIT:
         w = u_align(w, 2) * 2;
         break;
      case VK_SAMPLE_COUNT_4_BIT:
         w = u_align(w, 2) * 2;
         h = u_align(h, 2) * 2;
         break;
      case VK_SAMPLE_COUNT_8_BIT:
         w = u_align(w, 2) * 4;
         h = u_align(h, 2) * 2;
         break;
      case VK_SAMPLE_COUNT_16_BIT:
         w = u_align(w, 2) * 4;
         h = u_align(h, 2) * 4;
         break;
      default:
         assert(!"unsupported sample count");
         break;
      }
   }

   /*
    * From the Ivy Bridge PRM, volume 1 part 1, page 108:
    *
    *     "For separate stencil buffer, the width must be mutiplied by 2 and
    *      height divided by 2..."
    *
    * To make things easier (for transfer), we will just double the stencil
    * stride in 3DSTATE_STENCIL_BUFFER.
    */
   w = u_align(w, layout->align_i);
   h = u_align(h, layout->align_j);

   *width = w;
   *height = h;
}
Beispiel #10
0
static void
layout_init_lods(struct intel_layout *layout,
                 struct intel_layout_params *params)
{
   const VkImageCreateInfo *info = params->info;
   unsigned cur_x, cur_y;
   unsigned lv;

   cur_x = 0;
   cur_y = 0;
   for (lv = 0; lv < info->mipLevels; lv++) {
      unsigned lod_w, lod_h;

      layout_get_slice_size(layout, params, lv, &lod_w, &lod_h);

      layout->lods[lv].x = cur_x;
      layout->lods[lv].y = cur_y;
      layout->lods[lv].slice_width = lod_w;
      layout->lods[lv].slice_height = lod_h;

      switch (layout->walk) {
      case INTEL_LAYOUT_WALK_LOD:
         lod_h *= layout_get_num_layers(layout, params);
         if (lv == 1)
            cur_x += lod_w;
         else
            cur_y += lod_h;

         /* every LOD begins at tile boundaries */
         if (info->mipLevels > 1) {
            assert(layout->format == VK_FORMAT_S8_UINT);
            cur_x = u_align(cur_x, 64);
            cur_y = u_align(cur_y, 64);
         }
         break;
      case INTEL_LAYOUT_WALK_LAYER:
         /* MIPLAYOUT_BELOW */
         if (lv == 1)
            cur_x += lod_w;
         else
            cur_y += lod_h;
         break;
      case INTEL_LAYOUT_WALK_3D:
         {
            const unsigned num_slices = u_minify(info->extent.depth, lv);
            const unsigned num_slices_per_row = 1 << lv;
            const unsigned num_rows =
               (num_slices + num_slices_per_row - 1) / num_slices_per_row;

            lod_w *= num_slices_per_row;
            lod_h *= num_rows;

            cur_y += lod_h;
         }
         break;
      }

      if (params->max_x < layout->lods[lv].x + lod_w)
         params->max_x = layout->lods[lv].x + lod_w;
      if (params->max_y < layout->lods[lv].y + lod_h)
         params->max_y = layout->lods[lv].y + lod_h;
   }

   if (layout->walk == INTEL_LAYOUT_WALK_LAYER) {
      params->h0 = layout->lods[0].slice_height;

      if (info->mipLevels > 1)
         params->h1 = layout->lods[1].slice_height;
      else
         layout_get_slice_size(layout, params, 1, &cur_x, &params->h1);
   }
}
Beispiel #11
0
static void
layout_calculate_mcs_size(struct intel_layout *layout,
                          struct intel_layout_params *params)
{
   const VkImageCreateInfo *info = params->info;
   int mcs_width, mcs_height, mcs_cpp;
   int downscale_x, downscale_y;

   assert(layout->aux == INTEL_LAYOUT_AUX_MCS);

   if (info->samples != VK_SAMPLE_COUNT_1_BIT) {
      /*
       * From the Ivy Bridge PRM, volume 2 part 1, page 326, the clear
       * rectangle is scaled down by 8x2 for 4X MSAA and 2x2 for 8X MSAA.  The
       * need of scale down could be that the clear rectangle is used to clear
       * the MCS instead of the RT.
       *
       * For 8X MSAA, we need 32 bits in MCS for every pixel in the RT.  The
       * 2x2 factor could come from that the hardware writes 128 bits (an
       * OWord) at a time, and the OWord in MCS maps to a 2x2 pixel block in
       * the RT.  For 4X MSAA, we need 8 bits in MCS for every pixel in the
       * RT.  Similarly, we could reason that an OWord in 4X MCS maps to a 8x2
       * pixel block in the RT.
       */
      switch (info->samples) {
      case VK_SAMPLE_COUNT_2_BIT:
      case VK_SAMPLE_COUNT_4_BIT:
         downscale_x = 8;
         downscale_y = 2;
         mcs_cpp = 1;
         break;
      case VK_SAMPLE_COUNT_8_BIT:
         downscale_x = 2;
         downscale_y = 2;
         mcs_cpp = 4;
         break;
      case VK_SAMPLE_COUNT_16_BIT:
         downscale_x = 2;
         downscale_y = 1;
         mcs_cpp = 8;
         break;
      default:
         assert(!"unsupported sample count");
         return;
         break;
      }

      /*
       * It also appears that the 2x2 subspans generated by the scaled-down
       * clear rectangle cannot be masked.  The scale-down clear rectangle
       * thus must be aligned to 2x2, and we need to pad.
       */
      mcs_width = u_align(layout->width0, downscale_x * 2);
      mcs_height = u_align(layout->height0, downscale_y * 2);
   } else {
      /*
       * From the Ivy Bridge PRM, volume 2 part 1, page 327:
       *
       *     "              Pixels  Lines
       *      TiledY RT CL
       *          bpp
       *          32          8        4
       *          64          4        4
       *          128         2        4
       *
       *      TiledX RT CL
       *          bpp
       *          32          16       2
       *          64          8        2
       *          128         4        2"
       *
       * This table and the two following tables define the RT alignments, the
       * clear rectangle alignments, and the clear rectangle scale factors.
       * Viewing the RT alignments as the sizes of 128-byte blocks, we can see
       * that the clear rectangle alignments are 16x32 blocks, and the clear
       * rectangle scale factors are 8x16 blocks.
       *
       * For non-MSAA RT, we need 1 bit in MCS for every 128-byte block in the
       * RT.  Similar to the MSAA cases, we can argue that an OWord maps to
       * 8x16 blocks.
       *
       * One problem with this reasoning is that a Y-tile in MCS has 8x32
       * OWords and maps to 64x512 128-byte blocks.  This differs from i965,
       * which says that a Y-tile maps to 128x256 blocks (\see
       * intel_get_non_msrt_mcs_alignment).  It does not really change
       * anything except for the size of the allocated MCS.  Let's see if we
       * hit out-of-bound access.
       */
      switch (layout->tiling) {
      case GEN6_TILING_X:
         downscale_x = 64 / layout->block_size;
         downscale_y = 2;
         break;
      case GEN6_TILING_Y:
         downscale_x = 32 / layout->block_size;
         downscale_y = 4;
         break;
      default:
         assert(!"unsupported tiling mode");
         return;
         break;
      }

      downscale_x *= 8;
      downscale_y *= 16;

      /*
       * From the Haswell PRM, volume 7, page 652:
       *
       *     "Clear rectangle must be aligned to two times the number of
       *      pixels in the table shown below due to 16X16 hashing across the
       *      slice."
       *
       * The scaled-down clear rectangle must be aligned to 4x4 instead of
       * 2x2, and we need to pad.
       */
      mcs_width = u_align(layout->width0, downscale_x * 4) / downscale_x;
      mcs_height = u_align(layout->height0, downscale_y * 4) / downscale_y;
      mcs_cpp = 16; /* an OWord */
   }

   layout->aux_enables = (1 << info->mipLevels) - 1;
   /* align to Y-tile */
   layout->aux_stride = u_align(mcs_width * mcs_cpp, 128);
   layout->aux_height = u_align(mcs_height, 32);
}