static void init_dev_info(int drm_fd, uint32_t devid) { i915_getparam_t test; int test_n_eus; int status; gputop_devinfo.devid = devid; test.param = I915_PARAM_EU_TOTAL; test.value = &test_n_eus; status = perf_ioctl(drm_fd, I915_IOCTL_GETPARAM, &test); if (status == -1) fprintf(stderr, "error calling I915_IOCTL_GETPARAM %m\n"); if (IS_HASWELL(devid)) { if (IS_HSW_GT1(devid)) { gputop_devinfo.n_eus = 10; gputop_devinfo.n_eu_slices = 1; gputop_devinfo.n_eu_sub_slices = 1; gputop_devinfo.subslice_mask = 0x1; } else if (IS_HSW_GT2(devid)) { gputop_devinfo.n_eus = 20; gputop_devinfo.n_eu_slices = 1; gputop_devinfo.n_eu_sub_slices = 2; gputop_devinfo.subslice_mask = 0x3; } else if (IS_HSW_GT3(devid)) { gputop_devinfo.n_eus = 40; gputop_devinfo.n_eu_slices = 2; gputop_devinfo.n_eu_sub_slices = 4; gputop_devinfo.subslice_mask = 0xf; } } else { #ifdef I915_PARAM_EU_TOTAL i915_getparam_t gp; int ret; int n_eus = 0; int slice_mask = 0; int ss_mask = 0; int s_max; int ss_max; uint64_t subslice_mask = 0; int s; if (IS_BROADWELL(devid)) { s_max = 2; ss_max = 3; } else if (IS_CHERRYVIEW(devid)) { s_max = 1; ss_max = 2; } else if (IS_SKYLAKE(devid)) { s_max = 3; ss_max = 4; } gp.param = I915_PARAM_EU_TOTAL; gp.value = &n_eus; ret = perf_ioctl(drm_fd, I915_IOCTL_GETPARAM, &gp); assert(ret == 0 && n_eus > 0); gp.param = I915_PARAM_SLICE_MASK; gp.value = &slice_mask; ret = perf_ioctl(drm_fd, I915_IOCTL_GETPARAM, &gp); assert(ret == 0 && slice_mask); gp.param = I915_PARAM_SUBSLICE_MASK; gp.value = &ss_mask; ret = perf_ioctl(drm_fd, I915_IOCTL_GETPARAM, &gp); assert(ret == 0 && ss_mask); gputop_devinfo.n_eus = n_eus; gputop_devinfo.n_eu_slices = __builtin_popcount(slice_mask); /* Note: some of the metrics we have (as described in XML) * are conditional on a $SubsliceMask variable which is * expected to also reflect the slice mask by packing * together subslice masks for each slice in one value... */ for (s = 0; s < s_max; s++) { if (slice_mask & (1<<s)) { slice_mask |= ss_mask << (ss_max * s); } } gputop_devinfo.subslice_mask = subslice_mask; #else assert(0); #endif } }
bool intelInitContext(struct brw_context *brw, int api, unsigned major_version, unsigned minor_version, const struct gl_config * mesaVis, __DRIcontext * driContextPriv, void *sharedContextPrivate, struct dd_function_table *functions, unsigned *dri_ctx_error) { struct gl_context *ctx = &brw->ctx; struct gl_context *shareCtx = (struct gl_context *) sharedContextPrivate; __DRIscreen *sPriv = driContextPriv->driScreenPriv; struct intel_screen *intelScreen = sPriv->driverPrivate; int bo_reuse_mode; struct gl_config visual; /* we can't do anything without a connection to the device */ if (intelScreen->bufmgr == NULL) { *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY; return false; } if (!validate_context_version(intelScreen, api, major_version, minor_version, dri_ctx_error)) return false; /* Can't rely on invalidate events, fall back to glViewport hack */ if (!driContextPriv->driScreenPriv->dri2.useInvalidate) { brw->saved_viewport = functions->Viewport; functions->Viewport = intel_viewport; } if (mesaVis == NULL) { memset(&visual, 0, sizeof visual); mesaVis = &visual; } brw->intelScreen = intelScreen; if (!_mesa_initialize_context(&brw->ctx, api, mesaVis, shareCtx, functions)) { *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY; printf("%s: failed to init mesa context\n", __FUNCTION__); return false; } driContextPriv->driverPrivate = brw; brw->driContext = driContextPriv; brw->gen = intelScreen->gen; const int devID = intelScreen->deviceID; if (IS_SNB_GT1(devID) || IS_IVB_GT1(devID) || IS_HSW_GT1(devID)) brw->gt = 1; else if (IS_SNB_GT2(devID) || IS_IVB_GT2(devID) || IS_HSW_GT2(devID)) brw->gt = 2; else if (IS_HSW_GT3(devID)) brw->gt = 3; else brw->gt = 0; if (IS_HASWELL(devID)) { brw->is_haswell = true; } else if (IS_BAYTRAIL(devID)) { brw->is_baytrail = true; brw->gt = 1; } else if (IS_G4X(devID)) { brw->is_g4x = true; } brw->has_separate_stencil = brw->intelScreen->hw_has_separate_stencil; brw->must_use_separate_stencil = brw->intelScreen->hw_must_use_separate_stencil; brw->has_hiz = brw->gen >= 6; brw->has_llc = brw->intelScreen->hw_has_llc; brw->has_swizzling = brw->intelScreen->hw_has_swizzling; memset(&ctx->TextureFormatSupported, 0, sizeof(ctx->TextureFormatSupported)); driParseConfigFiles(&brw->optionCache, &intelScreen->optionCache, sPriv->myNum, "i965"); /* Estimate the size of the mappable aperture into the GTT. There's an * ioctl to get the whole GTT size, but not one to get the mappable subset. * It turns out it's basically always 256MB, though some ancient hardware * was smaller. */ uint32_t gtt_size = 256 * 1024 * 1024; /* We don't want to map two objects such that a memcpy between them would * just fault one mapping in and then the other over and over forever. So * we would need to divide the GTT size by 2. Additionally, some GTT is * taken up by things like the framebuffer and the ringbuffer and such, so * be more conservative. */ brw->max_gtt_map_object_size = gtt_size / 4; brw->bufmgr = intelScreen->bufmgr; bo_reuse_mode = driQueryOptioni(&brw->optionCache, "bo_reuse"); switch (bo_reuse_mode) { case DRI_CONF_BO_REUSE_DISABLED: break; case DRI_CONF_BO_REUSE_ALL: intel_bufmgr_gem_enable_reuse(brw->bufmgr); break; } /* Initialize the software rasterizer and helper modules. * * As of GL 3.1 core, the gen4+ driver doesn't need the swrast context for * software fallbacks (which we have to support on legacy GL to do weird * glDrawPixels(), glBitmap(), and other functions). */ if (api != API_OPENGL_CORE && api != API_OPENGLES2) { _swrast_CreateContext(ctx); } _vbo_CreateContext(ctx); if (ctx->swrast_context) { _tnl_CreateContext(ctx); _swsetup_CreateContext(ctx); /* Configure swrast to match hardware characteristics: */ _swrast_allow_pixel_fog(ctx, false); _swrast_allow_vertex_fog(ctx, true); } _mesa_meta_init(ctx); intelInitExtensions(ctx); INTEL_DEBUG = driParseDebugString(getenv("INTEL_DEBUG"), debug_control); if (INTEL_DEBUG & DEBUG_BUFMGR) dri_bufmgr_set_debug(brw->bufmgr, true); if ((INTEL_DEBUG & DEBUG_SHADER_TIME) && brw->gen < 7) { fprintf(stderr, "shader_time debugging requires gen7 (Ivybridge) or better.\n"); INTEL_DEBUG &= ~DEBUG_SHADER_TIME; } if (INTEL_DEBUG & DEBUG_PERF) brw->perf_debug = true; if (INTEL_DEBUG & DEBUG_AUB) drm_intel_bufmgr_gem_set_aub_dump(brw->bufmgr, true); intel_batchbuffer_init(brw); intel_fbo_init(brw); if (!driQueryOptionb(&brw->optionCache, "hiz")) { brw->has_hiz = false; /* On gen6, you can only do separate stencil with HIZ. */ if (brw->gen == 6) brw->has_separate_stencil = false; } if (driQueryOptionb(&brw->optionCache, "always_flush_batch")) { fprintf(stderr, "flushing batchbuffer before/after each draw call\n"); brw->always_flush_batch = 1; } if (driQueryOptionb(&brw->optionCache, "always_flush_cache")) { fprintf(stderr, "flushing GPU caches before/after each draw call\n"); brw->always_flush_cache = 1; } if (driQueryOptionb(&brw->optionCache, "disable_throttling")) { fprintf(stderr, "disabling flush throttling\n"); brw->disable_throttling = 1; } return true; }
static bool init_dev(struct ilo_dev_info *dev, const struct intel_winsys_info *info) { dev->devid = info->devid; dev->has_llc = info->has_llc; dev->has_gen7_sol_reset = info->has_gen7_sol_reset; dev->has_address_swizzling = info->has_address_swizzling; /* * From the Sandy Bridge PRM, volume 4 part 2, page 18: * * "[DevSNB]: The GT1 product's URB provides 32KB of storage, arranged * as 1024 256-bit rows. The GT2 product's URB provides 64KB of * storage, arranged as 2048 256-bit rows. A row corresponds in size * to an EU GRF register. Read/write access to the URB is generally * supported on a row-granular basis." * * From the Ivy Bridge PRM, volume 4 part 2, page 17: * * "URB Size URB Rows URB Rows when SLM Enabled * 128k 4096 2048 * 256k 8096 4096" */ if (IS_HASWELL(info->devid)) { dev->gen = ILO_GEN(7.5); if (IS_HSW_GT3(info->devid)) { dev->gt = 3; dev->urb_size = 512 * 1024; } else if (IS_HSW_GT2(info->devid)) { dev->gt = 2; dev->urb_size = 256 * 1024; } else { dev->gt = 1; dev->urb_size = 128 * 1024; } } else if (IS_GEN7(info->devid)) { dev->gen = ILO_GEN(7); if (IS_IVB_GT2(info->devid)) { dev->gt = 2; dev->urb_size = 256 * 1024; } else { dev->gt = 1; dev->urb_size = 128 * 1024; } } else if (IS_GEN6(info->devid)) { dev->gen = ILO_GEN(6); if (IS_SNB_GT2(info->devid)) { dev->gt = 2; dev->urb_size = 64 * 1024; } else { dev->gt = 1; dev->urb_size = 32 * 1024; } } else { ilo_err("unknown GPU generation\n"); return false; } return true; }