Пример #1
0
/**
 * Initialize the \p dev from \p winsys.
 */
bool
ilo_dev_init(struct ilo_dev *dev, struct intel_winsys *winsys)
{
   const struct intel_winsys_info *info;

   assert(ilo_is_zeroed(dev, sizeof(*dev)));

   info = intel_winsys_get_info(winsys);

   dev->winsys = winsys;
   dev->devid = info->devid;
   dev->aperture_total = info->aperture_total;
   dev->aperture_mappable = info->aperture_mappable;
   dev->has_llc = info->has_llc;
   dev->has_address_swizzling = info->has_address_swizzling;
   dev->has_logical_context = info->has_logical_context;
   dev->has_ppgtt = info->has_ppgtt;
   dev->has_timestamp = info->has_timestamp;
   dev->has_gen7_sol_reset = info->has_gen7_sol_reset;

   if (!dev->has_logical_context) {
      ilo_err("missing hardware logical context support\n");
      return false;
   }

   /*
    * PIPE_CONTROL and MI_* use PPGTT writes on GEN7+ and privileged GGTT
    * writes on GEN6.
    *
    * From the Sandy Bridge PRM, volume 1 part 3, page 101:
    *
    *     "[DevSNB] When Per-Process GTT Enable is set, it is assumed that all
    *      code is in a secure environment, independent of address space.
    *      Under this condition, this bit only specifies the address space
    *      (GGTT or PPGTT). All commands are executed "as-is""
    *
    * We need PPGTT to be enabled on GEN6 too.
    */
   if (!dev->has_ppgtt) {
      /* experiments show that it does not really matter... */
      ilo_warn("PPGTT disabled\n");
   }

   if (gen_is_bdw(info->devid) || gen_is_chv(info->devid)) {
      dev->gen_opaque = ILO_GEN(8);
      dev->gt = (gen_is_bdw(info->devid)) ? gen_get_bdw_gt(info->devid) : 1;
      /* XXX random values */
      if (dev->gt == 3) {
         dev->eu_count = 48;
         dev->thread_count = 336;
         dev->urb_size = 384 * 1024;
      } else if (dev->gt == 2) {
         dev->eu_count = 24;
         dev->thread_count = 168;
         dev->urb_size = 384 * 1024;
      } else {
         dev->eu_count = 12;
         dev->thread_count = 84;
         dev->urb_size = 192 * 1024;
      }
   } else if (gen_is_hsw(info->devid)) {
      /*
       * From the Haswell PRM, volume 4, page 8:
       *
       *     "Description                    GT3      GT2      GT1.5    GT1
       *      (...)
       *      EUs (Total)                    40       20       12       10
       *      Threads (Total)                280      140      84       70
       *      (...)
       *      URB Size (max, within L3$)     512KB    256KB    256KB    128KB
       */
      dev->gen_opaque = ILO_GEN(7.5);
      dev->gt = gen_get_hsw_gt(info->devid);
      if (dev->gt == 3) {
         dev->eu_count = 40;
         dev->thread_count = 280;
         dev->urb_size = 512 * 1024;
      } else if (dev->gt == 2) {
         dev->eu_count = 20;
         dev->thread_count = 140;
         dev->urb_size = 256 * 1024;
      } else {
         dev->eu_count = 10;
         dev->thread_count = 70;
         dev->urb_size = 128 * 1024;
      }
   } else if (gen_is_ivb(info->devid) || gen_is_vlv(info->devid)) {
      /*
       * From the Ivy Bridge PRM, volume 1 part 1, page 18:
       *
       *     "Device             # of EUs        #Threads/EU
       *      Ivy Bridge (GT2)   16              8
       *      Ivy Bridge (GT1)   6               6"
       *
       * From the Ivy Bridge PRM, volume 4 part 2, page 17:
       *
       *     "URB Size    URB Rows    URB Rows when SLM Enabled
       *      128k        4096        2048
       *      256k        8096        4096"
       */
      dev->gen_opaque = ILO_GEN(7);
      dev->gt = (gen_is_ivb(info->devid)) ? gen_get_ivb_gt(info->devid) : 1;
      if (dev->gt == 2) {
         dev->eu_count = 16;
         dev->thread_count = 128;
         dev->urb_size = 256 * 1024;
      } else {
         dev->eu_count = 6;
         dev->thread_count = 36;
         dev->urb_size = 128 * 1024;
      }
   } else if (gen_is_snb(info->devid)) {
      /*
       * From the Sandy Bridge PRM, volume 1 part 1, page 22:
       *
       *     "Device             # of EUs        #Threads/EU
       *      SNB GT2            12              5
       *      SNB GT1            6               4"
       *
       * From the Sandy Bridge PRM, volume 4 part 2, page 18:
       *
       *     "[DevSNB]: The GT1 product's URB provides 32KB of storage,
       *      arranged as 1024 256-bit rows. The GT2 product's URB provides
       *      64KB of storage, arranged as 2048 256-bit rows. A row
       *      corresponds in size to an EU GRF register. Read/write access to
       *      the URB is generally supported on a row-granular basis."
       */
      dev->gen_opaque = ILO_GEN(6);
      dev->gt = gen_get_snb_gt(info->devid);
      if (dev->gt == 2) {
         dev->eu_count = 12;
         dev->thread_count = 60;
         dev->urb_size = 64 * 1024;
      } else {
         dev->eu_count = 6;
         dev->thread_count = 24;
         dev->urb_size = 32 * 1024;
      }
   } else {
      ilo_err("unknown GPU generation\n");
      return false;
   }

   return true;
}
Пример #2
0
VkResult intel_gpu_create(const struct intel_instance *instance, int devid,
                            const char *primary_node, const char *render_node,
                            struct intel_gpu **gpu_ret)
{
    const int gen = devid_to_gen(devid);
    size_t primary_len, render_len;
    struct intel_gpu *gpu;

    if (gen < 0) {
        intel_log(instance, VK_DEBUG_REPORT_WARNING_BIT_EXT, 0,
                VK_NULL_HANDLE, 0, 0, "unsupported device id 0x%04x", devid);
        return VK_ERROR_INITIALIZATION_FAILED;
    }

    gpu = intel_alloc(instance, sizeof(*gpu), sizeof(int), VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
    if (!gpu)
        return VK_ERROR_OUT_OF_HOST_MEMORY;

    memset(gpu, 0, sizeof(*gpu));
    /* there is no VK_DBG_OBJECT_GPU */
    intel_handle_init(&gpu->handle, VK_DEBUG_REPORT_OBJECT_TYPE_PHYSICAL_DEVICE_EXT, instance);

    gpu->devid = devid;

    primary_len = strlen(primary_node);
    render_len = (render_node) ? strlen(render_node) : 0;

    gpu->primary_node = intel_alloc(gpu, primary_len + 1 +
            ((render_len) ? (render_len + 1) : 0), sizeof(int), VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
    if (!gpu->primary_node) {
        intel_free(instance, gpu);
        return VK_ERROR_OUT_OF_HOST_MEMORY;
    }

    memcpy(gpu->primary_node, primary_node, primary_len + 1);

    if (render_node) {
        gpu->render_node = gpu->primary_node + primary_len + 1;
        memcpy(gpu->render_node, render_node, render_len + 1);
    } else {
        gpu->render_node = gpu->primary_node;
    }

    gpu->gen_opaque = gen;

    switch (intel_gpu_gen(gpu)) {
    case INTEL_GEN(7.5):
        gpu->gt = gen_get_hsw_gt(devid);
        break;
    case INTEL_GEN(7):
        gpu->gt = gen_get_ivb_gt(devid);
        break;
    case INTEL_GEN(6):
        gpu->gt = gen_get_snb_gt(devid);
        break;
    }

    /* 150K dwords */
    gpu->max_batch_buffer_size = sizeof(uint32_t) * 150*1024;

    /* the winsys is prepared for one reloc every two dwords, then minus 2 */
    gpu->batch_buffer_reloc_count =
        gpu->max_batch_buffer_size / sizeof(uint32_t) / 2 - 2;

    gpu->primary_fd_internal = -1;
    gpu->render_fd_internal = -1;

    *gpu_ret = gpu;

    return VK_SUCCESS;
}