Ejemplo n.º 1
0
void intel_img_view_init(struct intel_dev *dev,
                         const VkImageViewCreateInfo *info,
                         struct intel_img_view *view)
{
    VkComponentMapping state_swizzles;
    uint32_t mip_levels, array_size;
    struct intel_img *img = intel_img(info->image);

    mip_levels = info->subresourceRange.levelCount;
    if (mip_levels > img->mip_levels - info->subresourceRange.baseMipLevel)
        mip_levels = img->mip_levels - info->subresourceRange.baseMipLevel;

    array_size = info->subresourceRange.layerCount;
    if (array_size > img->array_size - info->subresourceRange.baseArrayLayer)
        array_size = img->array_size - info->subresourceRange.baseArrayLayer;

    view->obj.destroy = img_view_destroy;

    view->img = img;

    if (!(img->usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)) {
        if (intel_gpu_gen(dev->gpu) >= INTEL_GEN(7.5)) {
            state_swizzles = info->components;
            view->shader_swizzles.r = VK_COMPONENT_SWIZZLE_R;
            view->shader_swizzles.g = VK_COMPONENT_SWIZZLE_G;
            view->shader_swizzles.b = VK_COMPONENT_SWIZZLE_B;
            view->shader_swizzles.a = VK_COMPONENT_SWIZZLE_A;
        } else {
            state_swizzles.r = VK_COMPONENT_SWIZZLE_R;
            state_swizzles.g = VK_COMPONENT_SWIZZLE_G;
            state_swizzles.b = VK_COMPONENT_SWIZZLE_B;
            state_swizzles.a = VK_COMPONENT_SWIZZLE_A;
            view->shader_swizzles = info->components;
        }

        /* shader_swizzles is ignored by the compiler */
        if (view->shader_swizzles.r != VK_COMPONENT_SWIZZLE_R ||
            view->shader_swizzles.g != VK_COMPONENT_SWIZZLE_G ||
            view->shader_swizzles.b != VK_COMPONENT_SWIZZLE_B ||
            view->shader_swizzles.a != VK_COMPONENT_SWIZZLE_A) {
            intel_dev_log(dev, VK_DEBUG_REPORT_WARNING_BIT_EXT,
                          (struct intel_base*)view, 0, 0,
                          "image data swizzling is ignored");
        }

        if (intel_gpu_gen(dev->gpu) >= INTEL_GEN(7)) {
            surface_state_tex_gen7(dev->gpu, img, info->viewType, info->format,
                    info->subresourceRange.baseMipLevel, mip_levels,
                    info->subresourceRange.baseArrayLayer, array_size,
                    state_swizzles, false, view->cmd);
            view->cmd_len = 8;
        } else {
            surface_state_tex_gen6(dev->gpu, img, info->viewType, info->format,
                    info->subresourceRange.baseMipLevel, mip_levels,
                    info->subresourceRange.baseArrayLayer, array_size,
                    false, view->cmd);
            view->cmd_len = 6;
        }
    }
}
Ejemplo n.º 2
0
static void
layout_init_layer_height(struct intel_layout *layout,
                         struct intel_layout_params *params)
{
   const VkImageCreateInfo *info = params->info;
   unsigned num_layers;

   if (layout->walk != INTEL_LAYOUT_WALK_LAYER)
      return;

   num_layers = layout_get_num_layers(layout, params);
   if (num_layers <= 1)
      return;

   /*
    * From the Sandy Bridge PRM, volume 1 part 1, page 115:
    *
    *     "The following equation is used for surface formats other than
    *      compressed textures:
    *
    *        QPitch = (h0 + h1 + 11j)"
    *
    *     "The equation for compressed textures (BC* and FXT1 surface formats)
    *      follows:
    *
    *        QPitch = (h0 + h1 + 11j) / 4"
    *
    *     "[DevSNB] Errata: Sampler MSAA Qpitch will be 4 greater than the
    *      value calculated in the equation above, for every other odd Surface
    *      Height starting from 1 i.e. 1,5,9,13"
    *
    * From the Ivy Bridge PRM, volume 1 part 1, page 111-112:
    *
    *     "If Surface Array Spacing is set to ARYSPC_FULL (note that the depth
    *      buffer and stencil buffer have an implied value of ARYSPC_FULL):
    *
    *        QPitch = (h0 + h1 + 12j)
    *        QPitch = (h0 + h1 + 12j) / 4 (compressed)
    *
    *      (There are many typos or missing words here...)"
    *
    * To access the N-th slice, an offset of (Stride * QPitch * N) is added to
    * the base address.  The PRM divides QPitch by 4 for compressed formats
    * because the block height for those formats are 4, and it wants QPitch to
    * mean the number of memory rows, as opposed to texel rows, between
    * slices.  Since we use texel rows everywhere, we do not need to divide
    * QPitch by 4.
    */
   layout->layer_height = params->h0 + params->h1 +
      ((intel_gpu_gen(params->gpu) >= INTEL_GEN(7)) ? 12 : 11) * layout->align_j;

   if (intel_gpu_gen(params->gpu) == INTEL_GEN(6) &&
       info->samples != VK_SAMPLE_COUNT_1_BIT &&
       layout->height0 % 4 == 1)
      layout->layer_height += 4;

   params->max_y += layout->layer_height * (num_layers - 1);
}
Ejemplo n.º 3
0
static void att_view_init_for_input(struct intel_att_view *view,
                                    const struct intel_gpu *gpu,
                                    const struct intel_img *img,
                                    VkImageViewType view_type,
                                    VkFormat format, unsigned level,
                                    unsigned first_layer, unsigned num_layers)
{
    if (intel_gpu_gen(gpu) >= INTEL_GEN(7)) {
        if (false) {
            surface_state_tex_gen7(gpu, img, view_type, format,
                    level, 1, first_layer, num_layers,
                    identity_channel_mapping, false, view->cmd);
        } else {
            surface_state_null_gen7(gpu, view->cmd);
        }

        view->cmd_len = 8;
    } else {
        if (false) {
            surface_state_tex_gen6(gpu, img, view_type, format,
                    level, 1, first_layer, num_layers, false, view->cmd);
        } else {
            surface_state_null_gen6(gpu, view->cmd);
        }

        view->cmd_len = 6;
    }
}
Ejemplo n.º 4
0
static bool
layout_want_hiz(const struct intel_layout *layout,
                const struct intel_layout_params *params)
{
   const VkImageCreateInfo *info = params->info;

   if (intel_debug & INTEL_DEBUG_NOHIZ)
       return false;

   if (!(info->usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT))
      return false;

   if (!intel_format_has_depth(params->gpu, info->format))
      return false;

   /*
    * HiZ implies separate stencil on Gen6.  We do not want to copy stencils
    * values between combined and separate stencil buffers when HiZ is enabled
    * or disabled.
    */
   if (intel_gpu_gen(params->gpu) == INTEL_GEN(6))
       return false;

   return true;
}
Ejemplo n.º 5
0
static bool
layout_want_mcs(struct intel_layout *layout,
                struct intel_layout_params *params)
{
   const VkImageCreateInfo *info = params->info;
   bool want_mcs = false;

   /* MCS is for RT on GEN7+ */
   if (intel_gpu_gen(params->gpu) < INTEL_GEN(7))
      return false;

   if (info->imageType != VK_IMAGE_TYPE_2D ||
       !(info->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT))
      return false;

   /*
    * From the Ivy Bridge PRM, volume 4 part 1, page 77:
    *
    *     "For Render Target and Sampling Engine Surfaces:If the surface is
    *      multisampled (Number of Multisamples any value other than
    *      MULTISAMPLECOUNT_1), this field (MCS Enable) must be enabled."
    *
    *     "This field must be set to 0 for all SINT MSRTs when all RT channels
    *      are not written"
    */
   if (info->samples != VK_SAMPLE_COUNT_1_BIT &&
       !icd_format_is_int(info->format)) {
      want_mcs = true;
   } else if (info->samples == VK_SAMPLE_COUNT_1_BIT) {
      /*
       * From the Ivy Bridge PRM, volume 2 part 1, page 326:
       *
       *     "When MCS is buffer is used for color clear of non-multisampler
       *      render target, the following restrictions apply.
       *      - Support is limited to tiled render targets.
       *      - Support is for non-mip-mapped and non-array surface types
       *        only.
       *      - Clear is supported only on the full RT; i.e., no partial clear
       *        or overlapping clears.
       *      - MCS buffer for non-MSRT is supported only for RT formats
       *        32bpp, 64bpp and 128bpp.
       *      ..."
       */
      if (layout->tiling != GEN6_TILING_NONE &&
          info->mipLevels == 1 && info->arrayLayers == 1) {
         switch (layout->block_size) {
         case 4:
         case 8:
         case 16:
            want_mcs = true;
            break;
         default:
            break;
         }
      }
   }

   return want_mcs;
}
Ejemplo n.º 6
0
static void
layout_init_walk(struct intel_layout *layout,
                 struct intel_layout_params *params)
{
   if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7))
      layout_init_walk_gen7(layout, params);
   else
      layout_init_walk_gen6(layout, params);
}
Ejemplo n.º 7
0
void intel_buf_view_init(const struct intel_dev *dev,
                         const VkBufferViewCreateInfo *info,
                         struct intel_buf_view *view,
                         bool raw)
{
    struct intel_buf *buf = intel_buf(info->buffer);
    /* TODO: Is transfer destination the only shader write operation? */
    const bool will_write = (buf->usage & (VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT |
                             VK_BUFFER_USAGE_STORAGE_BUFFER_BIT));
    VkFormat format;
    VkDeviceSize stride;
    uint32_t *cmd;
    int i;

    view->obj.destroy = buf_view_destroy;

    view->buf = buf;

    /*
     * The compiler expects uniform buffers to have pitch of
     * 4 for fragment shaders, but 16 for other stages.  The format
     * must be VK_FORMAT_R32G32B32A32_SFLOAT.
     */
    if (raw) {
        format = VK_FORMAT_R32G32B32A32_SFLOAT;
        stride = 16;
    } else {
        format = info->format;
        stride = icd_format_get_size(format);
    }
    cmd = view->cmd;

    for (i = 0; i < 2; i++) {
        if (intel_gpu_gen(dev->gpu) >= INTEL_GEN(7)) {
            surface_state_buf_gen7(dev->gpu, info->offset,
                    info->range, stride, format,
                    will_write, will_write, cmd);
            view->cmd_len = 8;
        } else {
            surface_state_buf_gen6(dev->gpu, info->offset,
                    info->range, stride, format,
                    will_write, will_write, cmd);
            view->cmd_len = 6;
        }

        /* switch to view->fs_cmd */
        if (raw) {
            cmd = view->fs_cmd;
            stride = 4;
        } else {
            memcpy(view->fs_cmd, view->cmd, sizeof(uint32_t) * view->cmd_len);
            break;
        }
    }
}
Ejemplo n.º 8
0
void intel_null_view_init(struct intel_null_view *view,
                          struct intel_dev *dev)
{
    if (intel_gpu_gen(dev->gpu) >= INTEL_GEN(7)) {
        surface_state_null_gen7(dev->gpu, view->cmd);
        view->cmd_len = 8;
    } else {
        surface_state_null_gen6(dev->gpu, view->cmd);
        view->cmd_len = 6;
    }
}
Ejemplo n.º 9
0
static void
layout_init_size_and_format(struct intel_layout *layout,
                            struct intel_layout_params *params)
{
   const VkImageCreateInfo *info = params->info;
   VkFormat format = info->format;
   bool require_separate_stencil = false;

   layout->width0 = info->extent.width;
   layout->height0 = info->extent.height;

   /*
    * From the Sandy Bridge PRM, volume 2 part 1, page 317:
    *
    *     "This field (Separate Stencil Buffer Enable) must be set to the same
    *      value (enabled or disabled) as Hierarchical Depth Buffer Enable."
    *
    * GEN7+ requires separate stencil buffers.
    */
   if (info->usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
      if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7))
         require_separate_stencil = true;
      else
         require_separate_stencil = (layout->aux == INTEL_LAYOUT_AUX_HIZ);
   }

   switch (format) {
   case VK_FORMAT_D24_UNORM_S8_UINT:
      if (require_separate_stencil) {
         format = VK_FORMAT_X8_D24_UNORM_PACK32;
         layout->separate_stencil = true;
      }
      break;
   case VK_FORMAT_D32_SFLOAT_S8_UINT:
      if (require_separate_stencil) {
         format = VK_FORMAT_D32_SFLOAT;
         layout->separate_stencil = true;
      }
      break;
   default:
      break;
   }

   layout->format = format;
   layout->block_width = icd_format_get_block_width(format);
   layout->block_height = layout->block_width;
   layout->block_size = icd_format_get_size(format);

   params->compressed = icd_format_is_compressed(format);
}
Ejemplo n.º 10
0
static void query_init_pipeline_statistics(
        struct intel_dev *dev,
        const VkQueryPoolCreateInfo *info,
        struct intel_query *query)
{
    /*
     * Note: order defined by Vulkan spec.
     */
    const uint32_t regs[][2] = {
        {VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_PRIMITIVES_BIT, GEN6_REG_IA_PRIMITIVES_COUNT},
        {VK_QUERY_PIPELINE_STATISTIC_VERTEX_SHADER_INVOCATIONS_BIT, GEN6_REG_VS_INVOCATION_COUNT},
        {VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_INVOCATIONS_BIT, GEN6_REG_GS_INVOCATION_COUNT},
        {VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT, GEN6_REG_GS_PRIMITIVES_COUNT},
        {VK_QUERY_PIPELINE_STATISTIC_CLIPPING_INVOCATIONS_BIT, GEN6_REG_CL_INVOCATION_COUNT},
        {VK_QUERY_PIPELINE_STATISTIC_CLIPPING_PRIMITIVES_BIT, GEN6_REG_CL_PRIMITIVES_COUNT},
        {VK_QUERY_PIPELINE_STATISTIC_FRAGMENT_SHADER_INVOCATIONS_BIT, GEN6_REG_PS_INVOCATION_COUNT},
        {VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_CONTROL_SHADER_PATCHES_BIT, (intel_gpu_gen(dev->gpu) >= INTEL_GEN(7)) ? GEN7_REG_HS_INVOCATION_COUNT : 0},
        {VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_EVALUATION_SHADER_INVOCATIONS_BIT, (intel_gpu_gen(dev->gpu) >= INTEL_GEN(7)) ? GEN7_REG_DS_INVOCATION_COUNT : 0},
        {VK_QUERY_PIPELINE_STATISTIC_COMPUTE_SHADER_INVOCATIONS_BIT, 0}
    };
    STATIC_ASSERT(ARRAY_SIZE(regs) < 32);
    uint32_t i;
    uint32_t reg_count = 0;

    /*
     * Only query registers indicated via pipeline statistics flags.
     * If HW does not support a flag, fill value with 0.
     */
    for (i=0; i < ARRAY_SIZE(regs); i++) {
        if ((regs[i][0] & info->pipelineStatistics)) {
            query->regs[reg_count] = regs[i][1];
            reg_count++;
        }
    }

    query->reg_count = reg_count;
    query->slot_stride = u_align(reg_count * sizeof(uint64_t) * 2, 64);
}
Ejemplo n.º 11
0
static unsigned
layout_get_valid_tilings(const struct intel_layout *layout,
                         const struct intel_layout_params *params)
{
   const VkImageCreateInfo *info = params->info;
   const VkFormat format = layout->format;
   unsigned valid_tilings = LAYOUT_TILING_ALL;

   /*
    * From the Sandy Bridge PRM, volume 1 part 2, page 32:
    *
    *     "Display/Overlay   Y-Major not supported.
    *                        X-Major required for Async Flips"
    */
   if (params->scanout)
       valid_tilings &= LAYOUT_TILING_X;

   if (info->tiling == VK_IMAGE_TILING_LINEAR)
       valid_tilings &= LAYOUT_TILING_NONE;

   /*
    * From the Sandy Bridge PRM, volume 2 part 1, page 318:
    *
    *     "[DevSNB+]: This field (Tiled Surface) must be set to TRUE. Linear
    *      Depth Buffer is not supported."
    *
    *     "The Depth Buffer, if tiled, must use Y-Major tiling."
    *
    * From the Sandy Bridge PRM, volume 1 part 2, page 22:
    *
    *     "W-Major Tile Format is used for separate stencil."
    */
   if (info->usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
      switch (format) {
      case VK_FORMAT_S8_UINT:
         valid_tilings &= LAYOUT_TILING_W;
         break;
      default:
         valid_tilings &= LAYOUT_TILING_Y;
         break;
      }
   }

   if (info->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) {
      /*
       * From the Sandy Bridge PRM, volume 1 part 2, page 32:
       *
       *     "NOTE: 128BPE Format Color buffer ( render target ) MUST be
       *      either TileX or Linear."
       *
       * From the Haswell PRM, volume 5, page 32:
       *
       *     "NOTE: 128 BPP format color buffer (render target) supports
       *      Linear, TiledX and TiledY."
       */
      if (intel_gpu_gen(params->gpu) < INTEL_GEN(7.5) && layout->block_size == 16)
         valid_tilings &= ~LAYOUT_TILING_Y;

      /*
       * From the Ivy Bridge PRM, volume 4 part 1, page 63:
       *
       *     "This field (Surface Vertical Aligment) must be set to VALIGN_4
       *      for all tiled Y Render Target surfaces."
       *
       *     "VALIGN_4 is not supported for surface format R32G32B32_FLOAT."
       */
      if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7) &&
          intel_gpu_gen(params->gpu) <= INTEL_GEN(7.5) &&
          layout->format == VK_FORMAT_R32G32B32_SFLOAT)
         valid_tilings &= ~LAYOUT_TILING_Y;

      valid_tilings &= ~LAYOUT_TILING_W;
   }

   if (info->usage & VK_IMAGE_USAGE_SAMPLED_BIT) {
      if (intel_gpu_gen(params->gpu) < INTEL_GEN(8))
         valid_tilings &= ~LAYOUT_TILING_W;
   }

   /* no conflicting binding flags */
   assert(valid_tilings);

   return valid_tilings;
}
Ejemplo n.º 12
0
static void
layout_init_alignments(struct intel_layout *layout,
                       struct intel_layout_params *params)
{
   const VkImageCreateInfo *info = params->info;

   /*
    * From the Sandy Bridge PRM, volume 1 part 1, page 113:
    *
    *     "surface format           align_i     align_j
    *      YUV 4:2:2 formats        4           *see below
    *      BC1-5                    4           4
    *      FXT1                     8           4
    *      all other formats        4           *see below"
    *
    *     "- align_j = 4 for any depth buffer
    *      - align_j = 2 for separate stencil buffer
    *      - align_j = 4 for any render target surface is multisampled (4x)
    *      - align_j = 4 for any render target surface with Surface Vertical
    *        Alignment = VALIGN_4
    *      - align_j = 2 for any render target surface with Surface Vertical
    *        Alignment = VALIGN_2
    *      - align_j = 2 for all other render target surface
    *      - align_j = 2 for any sampling engine surface with Surface Vertical
    *        Alignment = VALIGN_2
    *      - align_j = 4 for any sampling engine surface with Surface Vertical
    *        Alignment = VALIGN_4"
    *
    * From the Sandy Bridge PRM, volume 4 part 1, page 86:
    *
    *     "This field (Surface Vertical Alignment) must be set to VALIGN_2 if
    *      the Surface Format is 96 bits per element (BPE)."
    *
    * They can be rephrased as
    *
    *                                  align_i        align_j
    *   compressed formats             block width    block height
    *   PIPE_FORMAT_S8_UINT            4              2
    *   other depth/stencil formats    4              4
    *   4x multisampled                4              4
    *   bpp 96                         4              2
    *   others                         4              2 or 4
    */

   /*
    * From the Ivy Bridge PRM, volume 1 part 1, page 110:
    *
    *     "surface defined by      surface format     align_i     align_j
    *      3DSTATE_DEPTH_BUFFER    D16_UNORM          8           4
    *                              not D16_UNORM      4           4
    *      3DSTATE_STENCIL_BUFFER  N/A                8           8
    *      SURFACE_STATE           BC*, ETC*, EAC*    4           4
    *                              FXT1               8           4
    *                              all others         (set by SURFACE_STATE)"
    *
    * From the Ivy Bridge PRM, volume 4 part 1, page 63:
    *
    *     "- This field (Surface Vertical Aligment) is intended to be set to
    *        VALIGN_4 if the surface was rendered as a depth buffer, for a
    *        multisampled (4x) render target, or for a multisampled (8x)
    *        render target, since these surfaces support only alignment of 4.
    *      - Use of VALIGN_4 for other surfaces is supported, but uses more
    *        memory.
    *      - This field must be set to VALIGN_4 for all tiled Y Render Target
    *        surfaces.
    *      - Value of 1 is not supported for format YCRCB_NORMAL (0x182),
    *        YCRCB_SWAPUVY (0x183), YCRCB_SWAPUV (0x18f), YCRCB_SWAPY (0x190)
    *      - If Number of Multisamples is not MULTISAMPLECOUNT_1, this field
    *        must be set to VALIGN_4."
    *      - VALIGN_4 is not supported for surface format R32G32B32_FLOAT."
    *
    *     "- This field (Surface Horizontal Aligment) is intended to be set to
    *        HALIGN_8 only if the surface was rendered as a depth buffer with
    *        Z16 format or a stencil buffer, since these surfaces support only
    *        alignment of 8.
    *      - Use of HALIGN_8 for other surfaces is supported, but uses more
    *        memory.
    *      - This field must be set to HALIGN_4 if the Surface Format is BC*.
    *      - This field must be set to HALIGN_8 if the Surface Format is
    *        FXT1."
    *
    * They can be rephrased as
    *
    *                                  align_i        align_j
    *  compressed formats              block width    block height
    *  PIPE_FORMAT_Z16_UNORM           8              4
    *  PIPE_FORMAT_S8_UINT             8              8
    *  other depth/stencil formats     4              4
    *  2x or 4x multisampled           4 or 8         4
    *  tiled Y                         4 or 8         4 (if rt)
    *  PIPE_FORMAT_R32G32B32_FLOAT     4 or 8         2
    *  others                          4 or 8         2 or 4
    */

   if (params->compressed) {
      /* this happens to be the case */
      layout->align_i = layout->block_width;
      layout->align_j = layout->block_height;
   } else if (info->usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
      if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7)) {
         switch (layout->format) {
         case VK_FORMAT_D16_UNORM:
            layout->align_i = 8;
            layout->align_j = 4;
            break;
         case VK_FORMAT_S8_UINT:
            layout->align_i = 8;
            layout->align_j = 8;
            break;
         default:
            layout->align_i = 4;
            layout->align_j = 4;
            break;
         }
      } else {
         switch (layout->format) {
         case VK_FORMAT_S8_UINT:
            layout->align_i = 4;
            layout->align_j = 2;
            break;
         default:
            layout->align_i = 4;
            layout->align_j = 4;
            break;
         }
      }
   } else {
      const bool valign_4 =
         (info->samples != VK_SAMPLE_COUNT_1_BIT) ||
         (intel_gpu_gen(params->gpu) >= INTEL_GEN(8)) ||
         (intel_gpu_gen(params->gpu) >= INTEL_GEN(7) &&
          layout->tiling == GEN6_TILING_Y &&
          (info->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT));

      if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7) &&
          intel_gpu_gen(params->gpu) <= INTEL_GEN(7.5) && valign_4)
         assert(layout->format != VK_FORMAT_R32G32B32_SFLOAT);

      layout->align_i = 4;
      layout->align_j = (valign_4) ? 4 : 2;
   }

   /*
    * the fact that align i and j are multiples of block width and height
    * respectively is what makes the size of the bo a multiple of the block
    * size, slices start at block boundaries, and many of the computations
    * work.
    */
   assert(layout->align_i % layout->block_width == 0);
   assert(layout->align_j % layout->block_height == 0);

   /* make sure u_align() works */
   assert(u_is_pow2(layout->align_i) &&
          u_is_pow2(layout->align_j));
   assert(u_is_pow2(layout->block_width) &&
          u_is_pow2(layout->block_height));
}
Ejemplo n.º 13
0
int intel_gpu_get_max_threads(const struct intel_gpu *gpu,
                              VkShaderStageFlagBits stage)
{
    switch (intel_gpu_gen(gpu)) {
    case INTEL_GEN(7.5):
        switch (stage) {
        case VK_SHADER_STAGE_VERTEX_BIT:
            return (gpu->gt >= 2) ? 280 : 70;
        case VK_SHADER_STAGE_GEOMETRY_BIT:
            /* values from ilo_gpe_init_gs_cso_gen7 */
            return (gpu->gt >= 2) ? 256 : 70;
        case VK_SHADER_STAGE_FRAGMENT_BIT:
            return (gpu->gt == 3) ? 408 :
                   (gpu->gt == 2) ? 204 : 102;
        default:
            break;
        }
        break;
    case INTEL_GEN(7):
        switch (stage) {
        case VK_SHADER_STAGE_VERTEX_BIT:
            return (gpu->gt == 2) ? 128 : 36;
        case VK_SHADER_STAGE_GEOMETRY_BIT:
            /* values from ilo_gpe_init_gs_cso_gen7 */
            return (gpu->gt == 2) ? 128 : 36;
        case VK_SHADER_STAGE_FRAGMENT_BIT:
            return (gpu->gt == 2) ? 172 : 48;
        default:
            break;
        }
        break;
    case INTEL_GEN(6):
        switch (stage) {
        case VK_SHADER_STAGE_VERTEX_BIT:
            return (gpu->gt == 2) ? 60 : 24;
        case VK_SHADER_STAGE_GEOMETRY_BIT:
            /* values from ilo_gpe_init_gs_cso_gen6 */
            return (gpu->gt == 2) ? 28 : 21;
        case VK_SHADER_STAGE_FRAGMENT_BIT:
            return (gpu->gt == 2) ? 80 : 40;
        default:
            break;
        }
        break;
    default:
        break;
    }

    intel_log(gpu, VK_DEBUG_REPORT_ERROR_BIT_EXT, 0, VK_NULL_HANDLE,
            0, 0, "unknown Gen or shader stage");

    switch (stage) {
    case VK_SHADER_STAGE_VERTEX_BIT:
        return 1;
    case VK_SHADER_STAGE_GEOMETRY_BIT:
        return 1;
    case VK_SHADER_STAGE_FRAGMENT_BIT:
        return 4;
    default:
        return 1;
    }
}
Ejemplo n.º 14
0
/* note that this may force the texture to be linear */
static void
layout_calculate_bo_size(struct intel_layout *layout,
                         struct intel_layout_params *params)
{
   assert(params->max_x % layout->block_width == 0);
   assert(params->max_y % layout->block_height == 0);
   assert(layout->layer_height % layout->block_height == 0);

   layout->bo_stride =
      (params->max_x / layout->block_width) * layout->block_size;
   layout->bo_height = params->max_y / layout->block_height;

   while (true) {
      unsigned w = layout->bo_stride, h = layout->bo_height;
      unsigned align_w, align_h;

      /*
       * From the Haswell PRM, volume 5, page 163:
       *
       *     "For linear surfaces, additional padding of 64 bytes is required
       *      at the bottom of the surface. This is in addition to the padding
       *      required above."
       */
      if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7.5) &&
          (params->info->usage & VK_IMAGE_USAGE_SAMPLED_BIT) &&
          layout->tiling == GEN6_TILING_NONE)
         h += (64 + layout->bo_stride - 1) / layout->bo_stride;

      /*
       * From the Sandy Bridge PRM, volume 4 part 1, page 81:
       *
       *     "- For linear render target surfaces, the pitch must be a
       *        multiple of the element size for non-YUV surface formats.
       *        Pitch must be a multiple of 2 * element size for YUV surface
       *        formats.
       *      - For other linear surfaces, the pitch can be any multiple of
       *        bytes.
       *      - For tiled surfaces, the pitch must be a multiple of the tile
       *        width."
       *
       * Different requirements may exist when the bo is used in different
       * places, but our alignments here should be good enough that we do not
       * need to check layout->info->usage.
       */
      switch (layout->tiling) {
      case GEN6_TILING_X:
         align_w = 512;
         align_h = 8;
         break;
      case GEN6_TILING_Y:
         align_w = 128;
         align_h = 32;
         break;
      case GEN8_TILING_W:
         /*
          * From the Sandy Bridge PRM, volume 1 part 2, page 22:
          *
          *     "A 4KB tile is subdivided into 8-high by 8-wide array of
          *      Blocks for W-Major Tiles (W Tiles). Each Block is 8 rows by 8
          *      bytes."
          */
         align_w = 64;
         align_h = 64;
         break;
      default:
         assert(layout->tiling == GEN6_TILING_NONE);
         /* some good enough values */
         align_w = 64;
         align_h = 2;
         break;
      }

      w = u_align(w, align_w);
      h = u_align(h, align_h);

      /* make sure the bo is mappable */
      if (layout->tiling != GEN6_TILING_NONE) {
         /*
          * Usually only the first 256MB of the GTT is mappable.
          *
          * See also how intel_context::max_gtt_map_object_size is calculated.
          */
         const size_t mappable_gtt_size = 256 * 1024 * 1024;

         /*
          * Be conservative.  We may be able to switch from VALIGN_4 to
          * VALIGN_2 if the layout was Y-tiled, but let's keep it simple.
          */
         if (mappable_gtt_size / w / 4 < h) {
            if (layout->valid_tilings & LAYOUT_TILING_NONE) {
               layout->tiling = GEN6_TILING_NONE;
               /* MCS support for non-MSRTs is limited to tiled RTs */
               if (layout->aux == INTEL_LAYOUT_AUX_MCS &&
                   params->info->samples == VK_SAMPLE_COUNT_1_BIT)
                  layout->aux = INTEL_LAYOUT_AUX_NONE;

               continue;
            } else {
               /* mapping will fail */
            }
         }
      }

      layout->bo_stride = w;
      layout->bo_height = h;
      break;
   }
}
Ejemplo n.º 15
0
static void
ds_init_info(const struct intel_gpu *gpu,
             const struct intel_img *img,
             VkImageViewType view_type,
             VkFormat format, unsigned level,
             unsigned first_layer, unsigned num_layers,
             struct ds_surface_info *info)
{
   bool separate_stencil;

   INTEL_GPU_ASSERT(gpu, 6, 7.5);

   memset(info, 0, sizeof(*info));

   info->surface_type = view_type_to_surface_type(view_type);

   if (info->surface_type == GEN6_SURFTYPE_CUBE) {
      /*
       * From the Sandy Bridge PRM, volume 2 part 1, page 325-326:
       *
       *     "For Other Surfaces (Cube Surfaces):
       *      This field (Minimum Array Element) is ignored."
       *
       *     "For Other Surfaces (Cube Surfaces):
       *      This field (Render Target View Extent) is ignored."
       *
       * As such, we cannot set first_layer and num_layers on cube surfaces.
       * To work around that, treat it as a 2D surface.
       */
      info->surface_type = GEN6_SURFTYPE_2D;
   }

   if (intel_gpu_gen(gpu) >= INTEL_GEN(7)) {
      separate_stencil = true;
   }
   else {
      /*
       * From the Sandy Bridge PRM, volume 2 part 1, page 317:
       *
       *     "This field (Separate Stencil Buffer Enable) must be set to the
       *      same value (enabled or disabled) as Hierarchical Depth Buffer
       *      Enable."
       */
      separate_stencil = intel_img_can_enable_hiz(img, level);
   }

   /*
    * From the Sandy Bridge PRM, volume 2 part 1, page 317:
    *
    *     "If this field (Hierarchical Depth Buffer Enable) is enabled, the
    *      Surface Format of the depth buffer cannot be
    *      D32_FLOAT_S8X24_UINT or D24_UNORM_S8_UINT. Use of stencil
    *      requires the separate stencil buffer."
    *
    * From the Ironlake PRM, volume 2 part 1, page 330:
    *
    *     "If this field (Separate Stencil Buffer Enable) is disabled, the
    *      Surface Format of the depth buffer cannot be D24_UNORM_X8_UINT."
    *
    * There is no similar restriction for GEN6.  But when D24_UNORM_X8_UINT
    * is indeed used, the depth values output by the fragment shaders will
    * be different when read back.
    *
    * As for GEN7+, separate_stencil is always true.
    */
   switch (format) {
   case VK_FORMAT_D16_UNORM:
      info->format = GEN6_ZFORMAT_D16_UNORM;
      break;
   case VK_FORMAT_D32_SFLOAT:
      info->format = GEN6_ZFORMAT_D32_FLOAT;
      break;
   case VK_FORMAT_D32_SFLOAT_S8_UINT:
      info->format = (separate_stencil) ?
         GEN6_ZFORMAT_D32_FLOAT :
         GEN6_ZFORMAT_D32_FLOAT_S8X24_UINT;
      break;
   case VK_FORMAT_S8_UINT:
      if (separate_stencil) {
         info->format = GEN6_ZFORMAT_D32_FLOAT;
         break;
      }
      /* fall through */
   default:
      assert(!"unsupported depth/stencil format");
      ds_init_info_null(gpu, info);
      return;
      break;
   }

   if (format != VK_FORMAT_S8_UINT)
      info->zs.stride = img->layout.bo_stride;

   if (img->s8_layout) {
      /*
       * From the Sandy Bridge PRM, volume 2 part 1, page 329:
       *
       *     "The pitch must be set to 2x the value computed based on width,
       *       as the stencil buffer is stored with two rows interleaved."
       *
       * According to the classic driver, we need to do the same for GEN7+
       * even though the Ivy Bridge PRM does not say anything about it.
       */
      info->stencil.stride = img->s8_layout->bo_stride * 2;

      if (intel_gpu_gen(gpu) == INTEL_GEN(6)) {
         unsigned x, y;

         assert(img->s8_layout->walk == INTEL_LAYOUT_WALK_LOD);

         /* offset to the level */
         intel_layout_get_slice_pos(img->s8_layout, level, 0, &x, &y);
         intel_layout_pos_to_mem(img->s8_layout, x, y, &x, &y);
         info->stencil.offset = intel_layout_mem_to_raw(img->s8_layout, x, y);
      }
   } else if (format == VK_FORMAT_S8_UINT) {
      info->stencil.stride = img->layout.bo_stride * 2;
   }

   if (intel_img_can_enable_hiz(img, level)) {
      info->hiz.stride = img->layout.aux_stride;

      /* offset to the level */
      if (intel_gpu_gen(gpu) == INTEL_GEN(6))
          info->hiz.offset = img->layout.aux_offsets[level];
   }

   info->width = img->layout.width0;
   info->height = img->layout.height0;
   info->depth = (img->type == VK_IMAGE_TYPE_3D) ?
      img->depth : num_layers;

   info->lod = level;
   info->first_layer = first_layer;
   info->num_layers = num_layers;
}
Ejemplo n.º 16
0
static void surface_state_tex_gen7(const struct intel_gpu *gpu,
                                   const struct intel_img *img,
                                   VkImageViewType type,
                                   VkFormat format,
                                   unsigned first_level,
                                   unsigned num_levels,
                                   unsigned first_layer,
                                   unsigned num_layers,
                                   VkComponentMapping swizzles,
                                   bool is_rt,
                                   uint32_t dw[8])
{
   int surface_type, surface_format;
   int width, height, depth, pitch, lod;

   INTEL_GPU_ASSERT(gpu, 7, 7.5);

   surface_type = view_type_to_surface_type(type);
   assert(surface_type != GEN6_SURFTYPE_BUFFER);

   surface_format = intel_format_translate_color(gpu, format);
   assert(surface_format >= 0);

   width = img->layout.width0;
   height = img->layout.height0;
   depth = (type == VK_IMAGE_VIEW_TYPE_3D) ?
      img->depth : num_layers;
   pitch = img->layout.bo_stride;

   if (surface_type == GEN6_SURFTYPE_CUBE) {
      /*
       * From the Ivy Bridge PRM, volume 4 part 1, page 70:
       *
       *     "For SURFTYPE_CUBE:For Sampling Engine Surfaces, the range of
       *      this field is [0,340], indicating the number of cube array
       *      elements (equal to the number of underlying 2D array elements
       *      divided by 6). For other surfaces, this field must be zero."
       *
       * When is_rt is true, we treat the texture as a 2D one to avoid the
       * restriction.
       */
      if (is_rt) {
         surface_type = GEN6_SURFTYPE_2D;
      }
      else {
         assert(num_layers % 6 == 0);
         depth = num_layers / 6;
      }
   }

   /* sanity check the size */
   assert(width >= 1 && height >= 1 && depth >= 1 && pitch >= 1);
   assert(first_layer < 2048 && num_layers <= 2048);
   switch (surface_type) {
   case GEN6_SURFTYPE_1D:
      assert(width <= 16384 && height == 1 && depth <= 2048);
      break;
   case GEN6_SURFTYPE_2D:
      assert(width <= 16384 && height <= 16384 && depth <= 2048);
      break;
   case GEN6_SURFTYPE_3D:
      assert(width <= 2048 && height <= 2048 && depth <= 2048);
      if (!is_rt)
         assert(first_layer == 0);
      break;
   case GEN6_SURFTYPE_CUBE:
      assert(width <= 16384 && height <= 16384 && depth <= 86);
      assert(width == height);
      if (is_rt)
         assert(first_layer == 0);
      break;
   default:
      assert(!"unexpected surface type");
      break;
   }

   if (is_rt) {
      assert(num_levels == 1);
      lod = first_level;
   }
   else {
      lod = num_levels - 1;
   }

   /*
    * From the Ivy Bridge PRM, volume 4 part 1, page 68:
    *
    *     "The Base Address for linear render target surfaces and surfaces
    *      accessed with the typed surface read/write data port messages must
    *      be element-size aligned, for non-YUV surface formats, or a multiple
    *      of 2 element-sizes for YUV surface formats.  Other linear surfaces
    *      have no alignment requirements (byte alignment is sufficient)."
    *
    * From the Ivy Bridge PRM, volume 4 part 1, page 70:
    *
    *     "For linear render target surfaces and surfaces accessed with the
    *      typed data port messages, the pitch must be a multiple of the
    *      element size for non-YUV surface formats. Pitch must be a multiple
    *      of 2 * element size for YUV surface formats. For linear surfaces
    *      with Surface Type of SURFTYPE_STRBUF, the pitch must be a multiple
    *      of 4 bytes.For other linear surfaces, the pitch can be any multiple
    *      of bytes."
    *
    * From the Ivy Bridge PRM, volume 4 part 1, page 74:
    *
    *     "For linear surfaces, this field (X Offset) must be zero."
    */
   if (img->layout.tiling == GEN6_TILING_NONE) {
      if (is_rt) {
         const int elem_size U_ASSERT_ONLY = icd_format_get_size(format);
         assert(pitch % elem_size == 0);
      }
   }

   assert(img->layout.tiling != GEN8_TILING_W);
   dw[0] = surface_type << GEN7_SURFACE_DW0_TYPE__SHIFT |
           surface_format << GEN7_SURFACE_DW0_FORMAT__SHIFT |
           img->layout.tiling << 13;

   /*
    * From the Ivy Bridge PRM, volume 4 part 1, page 63:
    *
    *     "If this field (Surface Array) is enabled, the Surface Type must be
    *      SURFTYPE_1D, SURFTYPE_2D, or SURFTYPE_CUBE. If this field is
    *      disabled and Surface Type is SURFTYPE_1D, SURFTYPE_2D, or
    *      SURFTYPE_CUBE, the Depth field must be set to zero."
    *
    * For non-3D sampler surfaces, resinfo (the sampler message) always
    * returns zero for the number of layers when this field is not set.
    */
   if (surface_type != GEN6_SURFTYPE_3D) {
      if (num_layers > 1)
         dw[0] |= GEN7_SURFACE_DW0_IS_ARRAY;
      else
         assert(depth == 1);
   }

   assert(img->layout.align_i == 4 || img->layout.align_i == 8);
   assert(img->layout.align_j == 2 || img->layout.align_j == 4);

   if (img->layout.align_j == 4)
      dw[0] |= GEN7_SURFACE_DW0_VALIGN_4;

   if (img->layout.align_i == 8)
      dw[0] |= GEN7_SURFACE_DW0_HALIGN_8;

   if (img->layout.walk == INTEL_LAYOUT_WALK_LOD)
      dw[0] |= GEN7_SURFACE_DW0_ARYSPC_LOD0;
   else
      dw[0] |= GEN7_SURFACE_DW0_ARYSPC_FULL;

   if (is_rt)
      dw[0] |= GEN7_SURFACE_DW0_RENDER_CACHE_RW;

   if (surface_type == GEN6_SURFTYPE_CUBE && !is_rt)
      dw[0] |= GEN7_SURFACE_DW0_CUBE_FACE_ENABLES__MASK;

   dw[1] = 0;

   dw[2] = (height - 1) << GEN7_SURFACE_DW2_HEIGHT__SHIFT |
           (width - 1) << GEN7_SURFACE_DW2_WIDTH__SHIFT;

   dw[3] = (depth - 1) << GEN7_SURFACE_DW3_DEPTH__SHIFT |
           (pitch - 1);

   dw[4] = first_layer << 18 |
           (num_layers - 1) << 7;

   /*
    * MSFMT_MSS means the samples are not interleaved and MSFMT_DEPTH_STENCIL
    * means the samples are interleaved.  The layouts are the same when the
    * number of samples is 1.
    */
   if (img->layout.interleaved_samples && img->sample_count > 1) {
      assert(!is_rt);
      dw[4] |= GEN7_SURFACE_DW4_MSFMT_DEPTH_STENCIL;
   }
   else {
      dw[4] |= GEN7_SURFACE_DW4_MSFMT_MSS;
   }

   if (img->sample_count > 4)
      dw[4] |= GEN7_SURFACE_DW4_MULTISAMPLECOUNT_8;
   else if (img->sample_count > 2)
      dw[4] |= GEN7_SURFACE_DW4_MULTISAMPLECOUNT_4;
   else
      dw[4] |= GEN7_SURFACE_DW4_MULTISAMPLECOUNT_1;

   dw[5] = GEN7_MOCS_L3_WB << GEN7_SURFACE_DW5_MOCS__SHIFT |
           (first_level) << GEN7_SURFACE_DW5_MIN_LOD__SHIFT |
           lod;

   dw[6] = 0;
   dw[7] = 0;

   if (intel_gpu_gen(gpu) >= INTEL_GEN(7.5)) {
      dw[7] |=
         channel_swizzle_to_scs((swizzles.r == VK_COMPONENT_SWIZZLE_IDENTITY) ? VK_COMPONENT_SWIZZLE_R : swizzles.r) << GEN75_SURFACE_DW7_SCS_R__SHIFT |
         channel_swizzle_to_scs((swizzles.g == VK_COMPONENT_SWIZZLE_IDENTITY) ? VK_COMPONENT_SWIZZLE_G : swizzles.g) << GEN75_SURFACE_DW7_SCS_G__SHIFT |
         channel_swizzle_to_scs((swizzles.b == VK_COMPONENT_SWIZZLE_IDENTITY) ? VK_COMPONENT_SWIZZLE_B : swizzles.b) << GEN75_SURFACE_DW7_SCS_B__SHIFT |
         channel_swizzle_to_scs((swizzles.a == VK_COMPONENT_SWIZZLE_IDENTITY) ? VK_COMPONENT_SWIZZLE_A : swizzles.a) << GEN75_SURFACE_DW7_SCS_A__SHIFT;
   } else {
         assert(((swizzles.r == VK_COMPONENT_SWIZZLE_R) || (swizzles.r == VK_COMPONENT_SWIZZLE_IDENTITY)) &&
                ((swizzles.g == VK_COMPONENT_SWIZZLE_G) || (swizzles.g == VK_COMPONENT_SWIZZLE_IDENTITY)) &&
                ((swizzles.b == VK_COMPONENT_SWIZZLE_B) || (swizzles.b == VK_COMPONENT_SWIZZLE_IDENTITY)) &&
                ((swizzles.a == VK_COMPONENT_SWIZZLE_A) || (swizzles.a == VK_COMPONENT_SWIZZLE_IDENTITY)));
   }
}
Ejemplo n.º 17
0
static void surface_state_buf_gen7(const struct intel_gpu *gpu,
                                   unsigned offset, unsigned size,
                                   unsigned struct_size,
                                   VkFormat elem_format,
                                   bool is_rt, bool render_cache_rw,
                                   uint32_t dw[8])
{
   const bool typed = !icd_format_is_undef(elem_format);
   const bool structured = (!typed && struct_size > 1);
   const int elem_size = (typed) ?
      icd_format_get_size(elem_format) : 1;
   int width, height, depth, pitch;
   int surface_type, surface_format, num_entries;

   INTEL_GPU_ASSERT(gpu, 7, 7.5);

   surface_type = (structured) ? GEN7_SURFTYPE_STRBUF : GEN6_SURFTYPE_BUFFER;

   surface_format = (typed) ?
      intel_format_translate_color(gpu, elem_format) : GEN6_FORMAT_RAW;

   /*
    * It's possible that the buffer view being used is smaller than
    * the format element size (required to be 16 for non-fragment shaders)
    * Make certain that size is at least struct_size to keep HW happy.
    */
   if (size < struct_size) {
       size = struct_size;
   }

   num_entries = size / struct_size;
   /* see if there is enough space to fit another element */
   if (size % struct_size >= elem_size && !structured)
      num_entries++;

   /*
    * From the Ivy Bridge PRM, volume 4 part 1, page 67:
    *
    *     "For SURFTYPE_BUFFER render targets, this field (Surface Base
    *      Address) specifies the base address of first element of the
    *      surface. The surface is interpreted as a simple array of that
    *      single element type. The address must be naturally-aligned to the
    *      element size (e.g., a buffer containing R32G32B32A32_FLOAT elements
    *      must be 16-byte aligned)
    *
    *      For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies
    *      the base address of the first element of the surface, computed in
    *      software by adding the surface base address to the byte offset of
    *      the element in the buffer."
    */
   if (is_rt)
      assert(offset % elem_size == 0);

   /*
    * From the Ivy Bridge PRM, volume 4 part 1, page 68:
    *
    *     "For typed buffer and structured buffer surfaces, the number of
    *      entries in the buffer ranges from 1 to 2^27.  For raw buffer
    *      surfaces, the number of entries in the buffer is the number of
    *      bytes which can range from 1 to 2^30."
    */
   assert(num_entries >= 1 &&
          num_entries <= 1 << ((typed || structured) ? 27 : 30));

   /*
    * From the Ivy Bridge PRM, volume 4 part 1, page 69:
    *
    *     "For SURFTYPE_BUFFER: The low two bits of this field (Width) must be
    *      11 if the Surface Format is RAW (the size of the buffer must be a
    *      multiple of 4 bytes)."
    *
    * From the Ivy Bridge PRM, volume 4 part 1, page 70:
    *
    *     "For surfaces of type SURFTYPE_BUFFER and SURFTYPE_STRBUF, this
    *      field (Surface Pitch) indicates the size of the structure."
    *
    *     "For linear surfaces with Surface Type of SURFTYPE_STRBUF, the pitch
    *      must be a multiple of 4 bytes."
    */
   if (structured)
      assert(struct_size % 4 == 0);
   else if (!typed)
      assert(num_entries % 4 == 0);

   pitch = struct_size;

   pitch--;
   num_entries--;
   /* bits [6:0] */
   width  = (num_entries & 0x0000007f);
   /* bits [20:7] */
   height = (num_entries & 0x001fff80) >> 7;
   /* bits [30:21] */
   depth  = (num_entries & 0x7fe00000) >> 21;
   /* limit to [26:21] */
   if (typed || structured)
      depth &= 0x3f;

   dw[0] = surface_type << GEN7_SURFACE_DW0_TYPE__SHIFT |
           surface_format << GEN7_SURFACE_DW0_FORMAT__SHIFT;
   if (render_cache_rw)
      dw[0] |= GEN7_SURFACE_DW0_RENDER_CACHE_RW;

   dw[1] = offset;

   dw[2] = height << GEN7_SURFACE_DW2_HEIGHT__SHIFT |
           width << GEN7_SURFACE_DW2_WIDTH__SHIFT;

   dw[3] = depth << GEN7_SURFACE_DW3_DEPTH__SHIFT |
           pitch;

   dw[4] = 0;
   dw[5] = GEN7_MOCS_L3_WB << GEN7_SURFACE_DW5_MOCS__SHIFT;

   dw[6] = 0;
   dw[7] = 0;

   if (intel_gpu_gen(gpu) >= INTEL_GEN(7.5)) {
      dw[7] |= GEN75_SCS_RED   << GEN75_SURFACE_DW7_SCS_R__SHIFT |
               GEN75_SCS_GREEN << GEN75_SURFACE_DW7_SCS_G__SHIFT |
               GEN75_SCS_BLUE  << GEN75_SURFACE_DW7_SCS_B__SHIFT |
               GEN75_SCS_ALPHA << GEN75_SURFACE_DW7_SCS_A__SHIFT;
   }
}
Ejemplo n.º 18
0
static void att_view_init_for_ds(struct intel_att_view *view,
                                 const struct intel_gpu *gpu,
                                 const struct intel_img *img,
                                 VkImageViewType view_type,
                                 VkFormat format, unsigned level,
                                 unsigned first_layer, unsigned num_layers)
{
   const int max_2d_size U_ASSERT_ONLY =
       (intel_gpu_gen(gpu) >= INTEL_GEN(7)) ? 16384 : 8192;
   const int max_array_size U_ASSERT_ONLY =
       (intel_gpu_gen(gpu) >= INTEL_GEN(7)) ? 2048 : 512;
   struct ds_surface_info info;
   uint32_t dw1, dw2, dw3, dw4, dw5, dw6;
   uint32_t *dw;

   INTEL_GPU_ASSERT(gpu, 6, 7.5);

   if (img) {
      ds_init_info(gpu, img, view_type, format, level,
              first_layer, num_layers, &info);
   }
   else {
      ds_init_info_null(gpu, &info);
   }

   switch (info.surface_type) {
   case GEN6_SURFTYPE_NULL:
      break;
   case GEN6_SURFTYPE_1D:
      assert(info.width <= max_2d_size && info.height == 1 &&
             info.depth <= max_array_size);
      assert(info.first_layer < max_array_size - 1 &&
             info.num_layers <= max_array_size);
      break;
   case GEN6_SURFTYPE_2D:
      assert(info.width <= max_2d_size && info.height <= max_2d_size &&
             info.depth <= max_array_size);
      assert(info.first_layer < max_array_size - 1 &&
             info.num_layers <= max_array_size);
      break;
   case GEN6_SURFTYPE_3D:
      assert(info.width <= 2048 && info.height <= 2048 && info.depth <= 2048);
      assert(info.first_layer < 2048 && info.num_layers <= max_array_size);
      break;
   case GEN6_SURFTYPE_CUBE:
      assert(info.width <= max_2d_size && info.height <= max_2d_size &&
             info.depth == 1);
      assert(info.first_layer == 0 && info.num_layers == 1);
      assert(info.width == info.height);
      break;
   default:
      assert(!"unexpected depth surface type");
      break;
   }

   dw1 = info.surface_type << 29 |
         info.format << 18;

   if (info.zs.stride) {
      /* required for GEN6+ */
      assert(info.zs.stride > 0 && info.zs.stride < 128 * 1024 &&
            info.zs.stride % 128 == 0);
      assert(info.width <= info.zs.stride);

      dw1 |= (info.zs.stride - 1);
   }

   dw2 = 0;

   if (intel_gpu_gen(gpu) >= INTEL_GEN(7)) {
      if (info.zs.stride)
         dw1 |= 1 << 28;

      if (info.stencil.stride)
         dw1 |= 1 << 27;

      if (info.hiz.stride)
         dw1 |= 1 << 22;

      dw3 = (info.height - 1) << 18 |
            (info.width - 1) << 4 |
            info.lod;

      dw4 = (info.depth - 1) << 21 |
            info.first_layer << 10 |
            GEN7_MOCS_L3_WB;

      dw5 = 0;

      dw6 = (info.num_layers - 1) << 21;
   }
   else {
      /* always Y-tiled */
      dw1 |= 1 << 27 |
             1 << 26;

      if (info.hiz.stride) {
         dw1 |= 1 << 22 |
                1 << 21;
      }

      dw3 = (info.height - 1) << 19 |
            (info.width - 1) << 6 |
            info.lod << 2 |
            GEN6_DEPTH_DW3_MIPLAYOUT_BELOW;

      dw4 = (info.depth - 1) << 21 |
            info.first_layer << 10 |
            (info.num_layers - 1) << 1;

      dw5 = 0;

      dw6 = 0;
   }

   STATIC_ASSERT(ARRAY_SIZE(view->att_cmd) >= 10);
   dw = view->att_cmd;

   dw[0] = dw1;
   dw[1] = dw2;
   dw[2] = dw3;
   dw[3] = dw4;
   dw[4] = dw5;
   dw[5] = dw6;

   /* separate stencil */
   if (info.stencil.stride) {
      assert(info.stencil.stride > 0 && info.stencil.stride < 128 * 1024 &&
             info.stencil.stride % 128 == 0);

      dw[6] = info.stencil.stride - 1;
      dw[7] = img->s8_offset;

      if (intel_gpu_gen(gpu) >= INTEL_GEN(7))
         dw[6] |= GEN7_MOCS_L3_WB << GEN6_STENCIL_DW1_MOCS__SHIFT;
      if (intel_gpu_gen(gpu) >= INTEL_GEN(7.5))
         dw[6] |= GEN75_STENCIL_DW1_STENCIL_BUFFER_ENABLE;
   }
   else {
      dw[6] = 0;
      dw[7] = 0;
   }

   /* hiz */
   if (info.hiz.stride) {
      dw[8] = info.hiz.stride - 1;
      dw[9] = img->aux_offset;

      if (intel_gpu_gen(gpu) >= INTEL_GEN(7))
         dw[8] |= GEN7_MOCS_L3_WB << GEN6_HIZ_DW1_MOCS__SHIFT;
   }
   else {
      dw[8] = 0;
      dw[9] = 0;
   }

   view->has_stencil = info.stencil.stride;
   view->has_hiz = info.hiz.stride;
}
Ejemplo n.º 19
0
static void
layout_calculate_hiz_size(struct intel_layout *layout,
                          struct intel_layout_params *params)
{
   const VkImageCreateInfo *info = params->info;
   const unsigned hz_align_j = 8;
   enum intel_layout_walk_type hz_walk;
   unsigned hz_width, hz_height, lv;
   unsigned hz_clear_w, hz_clear_h;

   assert(layout->aux == INTEL_LAYOUT_AUX_HIZ);

   assert(layout->walk == INTEL_LAYOUT_WALK_LAYER ||
          layout->walk == INTEL_LAYOUT_WALK_3D);

   /*
    * From the Sandy Bridge PRM, volume 2 part 1, page 312:
    *
    *     "The hierarchical depth buffer does not support the LOD field, it is
    *      assumed by hardware to be zero. A separate hierarachical depth
    *      buffer is required for each LOD used, and the corresponding
    *      buffer's state delivered to hardware each time a new depth buffer
    *      state with modified LOD is delivered."
    *
    * We will put all LODs in a single bo with INTEL_LAYOUT_WALK_LOD.
    */
   if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7))
      hz_walk = layout->walk;
   else
      hz_walk = INTEL_LAYOUT_WALK_LOD;

   /*
    * See the Sandy Bridge PRM, volume 2 part 1, page 312, and the Ivy Bridge
    * PRM, volume 2 part 1, page 312-313.
    *
    * It seems HiZ buffer is aligned to 8x8, with every two rows packed into a
    * memory row.
    */
   switch (hz_walk) {
   case INTEL_LAYOUT_WALK_LOD:
      {
         unsigned lod_tx[INTEL_LAYOUT_MAX_LEVELS];
         unsigned lod_ty[INTEL_LAYOUT_MAX_LEVELS];
         unsigned cur_tx, cur_ty;

         /* figure out the tile offsets of LODs */
         hz_width = 0;
         hz_height = 0;
         cur_tx = 0;
         cur_ty = 0;
         for (lv = 0; lv < info->mipLevels; lv++) {
            unsigned tw, th;

            lod_tx[lv] = cur_tx;
            lod_ty[lv] = cur_ty;

            tw = u_align(layout->lods[lv].slice_width, 16);
            th = u_align(layout->lods[lv].slice_height, hz_align_j) *
               info->arrayLayers / 2;
            /* convert to Y-tiles */
            tw = u_align(tw, 128) / 128;
            th = u_align(th, 32) / 32;

            if (hz_width < cur_tx + tw)
               hz_width = cur_tx + tw;
            if (hz_height < cur_ty + th)
               hz_height = cur_ty + th;

            if (lv == 1)
               cur_tx += tw;
            else
               cur_ty += th;
         }

         /* convert tile offsets to memory offsets */
         for (lv = 0; lv < info->mipLevels; lv++) {
            layout->aux_offsets[lv] =
               (lod_ty[lv] * hz_width + lod_tx[lv]) * 4096;
         }
         hz_width *= 128;
         hz_height *= 32;
      }
      break;
   case INTEL_LAYOUT_WALK_LAYER:
      {
         const unsigned h0 = u_align(params->h0, hz_align_j);
         const unsigned h1 = u_align(params->h1, hz_align_j);
         const unsigned htail =
            ((intel_gpu_gen(params->gpu) >= INTEL_GEN(7)) ? 12 : 11) * hz_align_j;
         const unsigned hz_qpitch = h0 + h1 + htail;

         hz_width = u_align(layout->lods[0].slice_width, 16);

         hz_height = hz_qpitch * info->arrayLayers / 2;
         if (intel_gpu_gen(params->gpu) >= INTEL_GEN(7))
            hz_height = u_align(hz_height, 8);

         layout->aux_layer_height = hz_qpitch;
      }
      break;
   case INTEL_LAYOUT_WALK_3D:
      hz_width = u_align(layout->lods[0].slice_width, 16);

      hz_height = 0;
      for (lv = 0; lv < info->mipLevels; lv++) {
         const unsigned h = u_align(layout->lods[lv].slice_height, hz_align_j);
         /* according to the formula, slices are packed together vertically */
         hz_height += h * u_minify(info->extent.depth, lv);
      }
      hz_height /= 2;
      break;
   default:
      assert(!"unknown layout walk");
      hz_width = 0;
      hz_height = 0;
      break;
   }

   /*
    * In hiz_align_fb(), we will align the LODs to 8x4 sample blocks.
    * Experiments on Haswell show that aligning the RECTLIST primitive and
    * 3DSTATE_DRAWING_RECTANGLE alone are not enough.  The LOD sizes must be
    * aligned.
    */
   hz_clear_w = 8;
   hz_clear_h = 4;
   switch (info->samples) {
   case VK_SAMPLE_COUNT_1_BIT:
   default:
      break;
   case VK_SAMPLE_COUNT_2_BIT:
      hz_clear_w /= 2;
      break;
   case VK_SAMPLE_COUNT_4_BIT:
      hz_clear_w /= 2;
      hz_clear_h /= 2;
      break;
   case VK_SAMPLE_COUNT_8_BIT:
      hz_clear_w /= 4;
      hz_clear_h /= 2;
      break;
   case VK_SAMPLE_COUNT_16_BIT:
      hz_clear_w /= 4;
      hz_clear_h /= 4;
      break;
   }

   for (lv = 0; lv < info->mipLevels; lv++) {
      if (u_minify(layout->width0, lv) % hz_clear_w ||
          u_minify(layout->height0, lv) % hz_clear_h)
         break;
      layout->aux_enables |= 1 << lv;
   }

   /* we padded to allow this in layout_align() */
   if (info->mipLevels == 1 && info->arrayLayers == 1 && info->extent.depth == 1)
      layout->aux_enables |= 0x1;

   /* align to Y-tile */
   layout->aux_stride = u_align(hz_width, 128);
   layout->aux_height = u_align(hz_height, 32);
}
Ejemplo n.º 20
0
VkResult intel_gpu_create(const struct intel_instance *instance, int devid,
                            const char *primary_node, const char *render_node,
                            struct intel_gpu **gpu_ret)
{
    const int gen = devid_to_gen(devid);
    size_t primary_len, render_len;
    struct intel_gpu *gpu;

    if (gen < 0) {
        intel_log(instance, VK_DEBUG_REPORT_WARNING_BIT_EXT, 0,
                VK_NULL_HANDLE, 0, 0, "unsupported device id 0x%04x", devid);
        return VK_ERROR_INITIALIZATION_FAILED;
    }

    gpu = intel_alloc(instance, sizeof(*gpu), sizeof(int), VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
    if (!gpu)
        return VK_ERROR_OUT_OF_HOST_MEMORY;

    memset(gpu, 0, sizeof(*gpu));
    /* there is no VK_DBG_OBJECT_GPU */
    intel_handle_init(&gpu->handle, VK_DEBUG_REPORT_OBJECT_TYPE_PHYSICAL_DEVICE_EXT, instance);

    gpu->devid = devid;

    primary_len = strlen(primary_node);
    render_len = (render_node) ? strlen(render_node) : 0;

    gpu->primary_node = intel_alloc(gpu, primary_len + 1 +
            ((render_len) ? (render_len + 1) : 0), sizeof(int), VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
    if (!gpu->primary_node) {
        intel_free(instance, gpu);
        return VK_ERROR_OUT_OF_HOST_MEMORY;
    }

    memcpy(gpu->primary_node, primary_node, primary_len + 1);

    if (render_node) {
        gpu->render_node = gpu->primary_node + primary_len + 1;
        memcpy(gpu->render_node, render_node, render_len + 1);
    } else {
        gpu->render_node = gpu->primary_node;
    }

    gpu->gen_opaque = gen;

    switch (intel_gpu_gen(gpu)) {
    case INTEL_GEN(7.5):
        gpu->gt = gen_get_hsw_gt(devid);
        break;
    case INTEL_GEN(7):
        gpu->gt = gen_get_ivb_gt(devid);
        break;
    case INTEL_GEN(6):
        gpu->gt = gen_get_snb_gt(devid);
        break;
    }

    /* 150K dwords */
    gpu->max_batch_buffer_size = sizeof(uint32_t) * 150*1024;

    /* the winsys is prepared for one reloc every two dwords, then minus 2 */
    gpu->batch_buffer_reloc_count =
        gpu->max_batch_buffer_size / sizeof(uint32_t) / 2 - 2;

    gpu->primary_fd_internal = -1;
    gpu->render_fd_internal = -1;

    *gpu_ret = gpu;

    return VK_SUCCESS;
}