/** * Initialize the \p dev from \p winsys. */ bool ilo_dev_init(struct ilo_dev *dev, struct intel_winsys *winsys) { const struct intel_winsys_info *info; assert(ilo_is_zeroed(dev, sizeof(*dev))); info = intel_winsys_get_info(winsys); dev->winsys = winsys; dev->devid = info->devid; dev->aperture_total = info->aperture_total; dev->aperture_mappable = info->aperture_mappable; dev->has_llc = info->has_llc; dev->has_address_swizzling = info->has_address_swizzling; dev->has_logical_context = info->has_logical_context; dev->has_ppgtt = info->has_ppgtt; dev->has_timestamp = info->has_timestamp; dev->has_gen7_sol_reset = info->has_gen7_sol_reset; if (!dev->has_logical_context) { ilo_err("missing hardware logical context support\n"); return false; } /* * PIPE_CONTROL and MI_* use PPGTT writes on GEN7+ and privileged GGTT * writes on GEN6. * * From the Sandy Bridge PRM, volume 1 part 3, page 101: * * "[DevSNB] When Per-Process GTT Enable is set, it is assumed that all * code is in a secure environment, independent of address space. * Under this condition, this bit only specifies the address space * (GGTT or PPGTT). All commands are executed "as-is"" * * We need PPGTT to be enabled on GEN6 too. */ if (!dev->has_ppgtt) { /* experiments show that it does not really matter... */ ilo_warn("PPGTT disabled\n"); } if (gen_is_bdw(info->devid) || gen_is_chv(info->devid)) { dev->gen_opaque = ILO_GEN(8); dev->gt = (gen_is_bdw(info->devid)) ? gen_get_bdw_gt(info->devid) : 1; /* XXX random values */ if (dev->gt == 3) { dev->eu_count = 48; dev->thread_count = 336; dev->urb_size = 384 * 1024; } else if (dev->gt == 2) { dev->eu_count = 24; dev->thread_count = 168; dev->urb_size = 384 * 1024; } else { dev->eu_count = 12; dev->thread_count = 84; dev->urb_size = 192 * 1024; } } else if (gen_is_hsw(info->devid)) { /* * From the Haswell PRM, volume 4, page 8: * * "Description GT3 GT2 GT1.5 GT1 * (...) * EUs (Total) 40 20 12 10 * Threads (Total) 280 140 84 70 * (...) * URB Size (max, within L3$) 512KB 256KB 256KB 128KB */ dev->gen_opaque = ILO_GEN(7.5); dev->gt = gen_get_hsw_gt(info->devid); if (dev->gt == 3) { dev->eu_count = 40; dev->thread_count = 280; dev->urb_size = 512 * 1024; } else if (dev->gt == 2) { dev->eu_count = 20; dev->thread_count = 140; dev->urb_size = 256 * 1024; } else { dev->eu_count = 10; dev->thread_count = 70; dev->urb_size = 128 * 1024; } } else if (gen_is_ivb(info->devid) || gen_is_vlv(info->devid)) { /* * From the Ivy Bridge PRM, volume 1 part 1, page 18: * * "Device # of EUs #Threads/EU * Ivy Bridge (GT2) 16 8 * Ivy Bridge (GT1) 6 6" * * From the Ivy Bridge PRM, volume 4 part 2, page 17: * * "URB Size URB Rows URB Rows when SLM Enabled * 128k 4096 2048 * 256k 8096 4096" */ dev->gen_opaque = ILO_GEN(7); dev->gt = (gen_is_ivb(info->devid)) ? gen_get_ivb_gt(info->devid) : 1; if (dev->gt == 2) { dev->eu_count = 16; dev->thread_count = 128; dev->urb_size = 256 * 1024; } else { dev->eu_count = 6; dev->thread_count = 36; dev->urb_size = 128 * 1024; } } else if (gen_is_snb(info->devid)) { /* * From the Sandy Bridge PRM, volume 1 part 1, page 22: * * "Device # of EUs #Threads/EU * SNB GT2 12 5 * SNB GT1 6 4" * * From the Sandy Bridge PRM, volume 4 part 2, page 18: * * "[DevSNB]: The GT1 product's URB provides 32KB of storage, * arranged as 1024 256-bit rows. The GT2 product's URB provides * 64KB of storage, arranged as 2048 256-bit rows. A row * corresponds in size to an EU GRF register. Read/write access to * the URB is generally supported on a row-granular basis." */ dev->gen_opaque = ILO_GEN(6); dev->gt = gen_get_snb_gt(info->devid); if (dev->gt == 2) { dev->eu_count = 12; dev->thread_count = 60; dev->urb_size = 64 * 1024; } else { dev->eu_count = 6; dev->thread_count = 24; dev->urb_size = 32 * 1024; } } else { ilo_err("unknown GPU generation\n"); return false; } return true; }
VkResult intel_gpu_create(const struct intel_instance *instance, int devid, const char *primary_node, const char *render_node, struct intel_gpu **gpu_ret) { const int gen = devid_to_gen(devid); size_t primary_len, render_len; struct intel_gpu *gpu; if (gen < 0) { intel_log(instance, VK_DEBUG_REPORT_WARNING_BIT_EXT, 0, VK_NULL_HANDLE, 0, 0, "unsupported device id 0x%04x", devid); return VK_ERROR_INITIALIZATION_FAILED; } gpu = intel_alloc(instance, sizeof(*gpu), sizeof(int), VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); if (!gpu) return VK_ERROR_OUT_OF_HOST_MEMORY; memset(gpu, 0, sizeof(*gpu)); /* there is no VK_DBG_OBJECT_GPU */ intel_handle_init(&gpu->handle, VK_DEBUG_REPORT_OBJECT_TYPE_PHYSICAL_DEVICE_EXT, instance); gpu->devid = devid; primary_len = strlen(primary_node); render_len = (render_node) ? strlen(render_node) : 0; gpu->primary_node = intel_alloc(gpu, primary_len + 1 + ((render_len) ? (render_len + 1) : 0), sizeof(int), VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); if (!gpu->primary_node) { intel_free(instance, gpu); return VK_ERROR_OUT_OF_HOST_MEMORY; } memcpy(gpu->primary_node, primary_node, primary_len + 1); if (render_node) { gpu->render_node = gpu->primary_node + primary_len + 1; memcpy(gpu->render_node, render_node, render_len + 1); } else { gpu->render_node = gpu->primary_node; } gpu->gen_opaque = gen; switch (intel_gpu_gen(gpu)) { case INTEL_GEN(7.5): gpu->gt = gen_get_hsw_gt(devid); break; case INTEL_GEN(7): gpu->gt = gen_get_ivb_gt(devid); break; case INTEL_GEN(6): gpu->gt = gen_get_snb_gt(devid); break; } /* 150K dwords */ gpu->max_batch_buffer_size = sizeof(uint32_t) * 150*1024; /* the winsys is prepared for one reloc every two dwords, then minus 2 */ gpu->batch_buffer_reloc_count = gpu->max_batch_buffer_size / sizeof(uint32_t) / 2 - 2; gpu->primary_fd_internal = -1; gpu->render_fd_internal = -1; *gpu_ret = gpu; return VK_SUCCESS; }