Ejemplo n.º 1
0
static void
init_dev_info(int drm_fd, uint32_t devid)
{
    i915_getparam_t test;
    int test_n_eus;
    int status;

    gputop_devinfo.devid = devid;

    test.param = I915_PARAM_EU_TOTAL;
    test.value = &test_n_eus;
    status = perf_ioctl(drm_fd, I915_IOCTL_GETPARAM, &test);
    if (status == -1)
	fprintf(stderr, "error calling I915_IOCTL_GETPARAM %m\n");

    if (IS_HASWELL(devid)) {
	if (IS_HSW_GT1(devid)) {
	    gputop_devinfo.n_eus = 10;
	    gputop_devinfo.n_eu_slices = 1;
	    gputop_devinfo.n_eu_sub_slices = 1;
	    gputop_devinfo.subslice_mask = 0x1;
	} else if (IS_HSW_GT2(devid)) {
	    gputop_devinfo.n_eus = 20;
	    gputop_devinfo.n_eu_slices = 1;
	    gputop_devinfo.n_eu_sub_slices = 2;
	    gputop_devinfo.subslice_mask = 0x3;
	} else if (IS_HSW_GT3(devid)) {
	    gputop_devinfo.n_eus = 40;
	    gputop_devinfo.n_eu_slices = 2;
	    gputop_devinfo.n_eu_sub_slices = 4;
	    gputop_devinfo.subslice_mask = 0xf;
	}
    } else {
#ifdef I915_PARAM_EU_TOTAL
	i915_getparam_t gp;
	int ret;
	int n_eus = 0;
	int slice_mask = 0;
	int ss_mask = 0;
	int s_max;
	int ss_max;
	uint64_t subslice_mask = 0;
	int s;

	if (IS_BROADWELL(devid)) {
	    s_max = 2;
	    ss_max = 3;
	} else if (IS_CHERRYVIEW(devid)) {
	    s_max = 1;
	    ss_max = 2;
	} else if (IS_SKYLAKE(devid)) {
	    s_max = 3;
	    ss_max = 4;
	}

	gp.param = I915_PARAM_EU_TOTAL;
	gp.value = &n_eus;
	ret = perf_ioctl(drm_fd, I915_IOCTL_GETPARAM, &gp);
	assert(ret == 0 && n_eus > 0);

	gp.param = I915_PARAM_SLICE_MASK;
	gp.value = &slice_mask;
	ret = perf_ioctl(drm_fd, I915_IOCTL_GETPARAM, &gp);
	assert(ret == 0 && slice_mask);

	gp.param = I915_PARAM_SUBSLICE_MASK;
	gp.value = &ss_mask;
	ret = perf_ioctl(drm_fd, I915_IOCTL_GETPARAM, &gp);
	assert(ret == 0 && ss_mask);

	gputop_devinfo.n_eus = n_eus;
	gputop_devinfo.n_eu_slices = __builtin_popcount(slice_mask);

	/* Note: some of the metrics we have (as described in XML)
	 * are conditional on a $SubsliceMask variable which is
	 * expected to also reflect the slice mask by packing
	 * together subslice masks for each slice in one value...
	 */
	for (s = 0; s < s_max; s++) {
	    if (slice_mask & (1<<s)) {
		slice_mask |= ss_mask << (ss_max * s);
	    }
	}
	gputop_devinfo.subslice_mask = subslice_mask;
#else
	assert(0);
#endif
    }
}
Ejemplo n.º 2
0
bool
intelInitContext(struct brw_context *brw,
                 int api,
                 unsigned major_version,
                 unsigned minor_version,
                 const struct gl_config * mesaVis,
                 __DRIcontext * driContextPriv,
                 void *sharedContextPrivate,
                 struct dd_function_table *functions,
                 unsigned *dri_ctx_error)
{
   struct gl_context *ctx = &brw->ctx;
   struct gl_context *shareCtx = (struct gl_context *) sharedContextPrivate;
   __DRIscreen *sPriv = driContextPriv->driScreenPriv;
   struct intel_screen *intelScreen = sPriv->driverPrivate;
   int bo_reuse_mode;
   struct gl_config visual;

   /* we can't do anything without a connection to the device */
   if (intelScreen->bufmgr == NULL) {
      *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
      return false;
   }

   if (!validate_context_version(intelScreen,
                                 api, major_version, minor_version,
                                 dri_ctx_error))
      return false;

   /* Can't rely on invalidate events, fall back to glViewport hack */
   if (!driContextPriv->driScreenPriv->dri2.useInvalidate) {
      brw->saved_viewport = functions->Viewport;
      functions->Viewport = intel_viewport;
   }

   if (mesaVis == NULL) {
      memset(&visual, 0, sizeof visual);
      mesaVis = &visual;
   }

   brw->intelScreen = intelScreen;

   if (!_mesa_initialize_context(&brw->ctx, api, mesaVis, shareCtx,
                                 functions)) {
      *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
      printf("%s: failed to init mesa context\n", __FUNCTION__);
      return false;
   }

   driContextPriv->driverPrivate = brw;
   brw->driContext = driContextPriv;

   brw->gen = intelScreen->gen;

   const int devID = intelScreen->deviceID;
   if (IS_SNB_GT1(devID) || IS_IVB_GT1(devID) || IS_HSW_GT1(devID))
      brw->gt = 1;
   else if (IS_SNB_GT2(devID) || IS_IVB_GT2(devID) || IS_HSW_GT2(devID))
      brw->gt = 2;
   else if (IS_HSW_GT3(devID))
      brw->gt = 3;
   else
      brw->gt = 0;

   if (IS_HASWELL(devID)) {
      brw->is_haswell = true;
   } else if (IS_BAYTRAIL(devID)) {
      brw->is_baytrail = true;
      brw->gt = 1;
   } else if (IS_G4X(devID)) {
      brw->is_g4x = true;
   }

   brw->has_separate_stencil = brw->intelScreen->hw_has_separate_stencil;
   brw->must_use_separate_stencil = brw->intelScreen->hw_must_use_separate_stencil;
   brw->has_hiz = brw->gen >= 6;
   brw->has_llc = brw->intelScreen->hw_has_llc;
   brw->has_swizzling = brw->intelScreen->hw_has_swizzling;

   memset(&ctx->TextureFormatSupported,
	  0, sizeof(ctx->TextureFormatSupported));

   driParseConfigFiles(&brw->optionCache, &intelScreen->optionCache,
                       sPriv->myNum, "i965");

   /* Estimate the size of the mappable aperture into the GTT.  There's an
    * ioctl to get the whole GTT size, but not one to get the mappable subset.
    * It turns out it's basically always 256MB, though some ancient hardware
    * was smaller.
    */
   uint32_t gtt_size = 256 * 1024 * 1024;

   /* We don't want to map two objects such that a memcpy between them would
    * just fault one mapping in and then the other over and over forever.  So
    * we would need to divide the GTT size by 2.  Additionally, some GTT is
    * taken up by things like the framebuffer and the ringbuffer and such, so
    * be more conservative.
    */
   brw->max_gtt_map_object_size = gtt_size / 4;

   brw->bufmgr = intelScreen->bufmgr;

   bo_reuse_mode = driQueryOptioni(&brw->optionCache, "bo_reuse");
   switch (bo_reuse_mode) {
   case DRI_CONF_BO_REUSE_DISABLED:
      break;
   case DRI_CONF_BO_REUSE_ALL:
      intel_bufmgr_gem_enable_reuse(brw->bufmgr);
      break;
   }

   /* Initialize the software rasterizer and helper modules.
    *
    * As of GL 3.1 core, the gen4+ driver doesn't need the swrast context for
    * software fallbacks (which we have to support on legacy GL to do weird
    * glDrawPixels(), glBitmap(), and other functions).
    */
   if (api != API_OPENGL_CORE && api != API_OPENGLES2) {
      _swrast_CreateContext(ctx);
   }

   _vbo_CreateContext(ctx);
   if (ctx->swrast_context) {
      _tnl_CreateContext(ctx);
      _swsetup_CreateContext(ctx);

      /* Configure swrast to match hardware characteristics: */
      _swrast_allow_pixel_fog(ctx, false);
      _swrast_allow_vertex_fog(ctx, true);
   }

   _mesa_meta_init(ctx);

   intelInitExtensions(ctx);

   INTEL_DEBUG = driParseDebugString(getenv("INTEL_DEBUG"), debug_control);
   if (INTEL_DEBUG & DEBUG_BUFMGR)
      dri_bufmgr_set_debug(brw->bufmgr, true);
   if ((INTEL_DEBUG & DEBUG_SHADER_TIME) && brw->gen < 7) {
      fprintf(stderr,
              "shader_time debugging requires gen7 (Ivybridge) or better.\n");
      INTEL_DEBUG &= ~DEBUG_SHADER_TIME;
   }
   if (INTEL_DEBUG & DEBUG_PERF)
      brw->perf_debug = true;

   if (INTEL_DEBUG & DEBUG_AUB)
      drm_intel_bufmgr_gem_set_aub_dump(brw->bufmgr, true);

   intel_batchbuffer_init(brw);

   intel_fbo_init(brw);

   if (!driQueryOptionb(&brw->optionCache, "hiz")) {
       brw->has_hiz = false;
       /* On gen6, you can only do separate stencil with HIZ. */
       if (brw->gen == 6)
	  brw->has_separate_stencil = false;
   }

   if (driQueryOptionb(&brw->optionCache, "always_flush_batch")) {
      fprintf(stderr, "flushing batchbuffer before/after each draw call\n");
      brw->always_flush_batch = 1;
   }

   if (driQueryOptionb(&brw->optionCache, "always_flush_cache")) {
      fprintf(stderr, "flushing GPU caches before/after each draw call\n");
      brw->always_flush_cache = 1;
   }

   if (driQueryOptionb(&brw->optionCache, "disable_throttling")) {
      fprintf(stderr, "disabling flush throttling\n");
      brw->disable_throttling = 1;
   }

   return true;
}
Ejemplo n.º 3
0
static bool
init_dev(struct ilo_dev_info *dev, const struct intel_winsys_info *info)
{
   dev->devid = info->devid;
   dev->has_llc = info->has_llc;
   dev->has_gen7_sol_reset = info->has_gen7_sol_reset;
   dev->has_address_swizzling = info->has_address_swizzling;

   /*
    * From the Sandy Bridge PRM, volume 4 part 2, page 18:
    *
    *     "[DevSNB]: The GT1 product's URB provides 32KB of storage, arranged
    *      as 1024 256-bit rows. The GT2 product's URB provides 64KB of
    *      storage, arranged as 2048 256-bit rows. A row corresponds in size
    *      to an EU GRF register. Read/write access to the URB is generally
    *      supported on a row-granular basis."
    *
    * From the Ivy Bridge PRM, volume 4 part 2, page 17:
    *
    *     "URB Size    URB Rows    URB Rows when SLM Enabled
    *      128k        4096        2048
    *      256k        8096        4096"
    */

   if (IS_HASWELL(info->devid)) {
      dev->gen = ILO_GEN(7.5);

      if (IS_HSW_GT3(info->devid)) {
         dev->gt = 3;
         dev->urb_size = 512 * 1024;
      }
      else if (IS_HSW_GT2(info->devid)) {
         dev->gt = 2;
         dev->urb_size = 256 * 1024;
      }
      else {
         dev->gt = 1;
         dev->urb_size = 128 * 1024;
      }
   }
   else if (IS_GEN7(info->devid)) {
      dev->gen = ILO_GEN(7);

      if (IS_IVB_GT2(info->devid)) {
         dev->gt = 2;
         dev->urb_size = 256 * 1024;
      }
      else {
         dev->gt = 1;
         dev->urb_size = 128 * 1024;
      }
   }
   else if (IS_GEN6(info->devid)) {
      dev->gen = ILO_GEN(6);

      if (IS_SNB_GT2(info->devid)) {
         dev->gt = 2;
         dev->urb_size = 64 * 1024;
      }
      else {
         dev->gt = 1;
         dev->urb_size = 32 * 1024;
      }
   }
   else {
      ilo_err("unknown GPU generation\n");
      return false;
   }

   return true;
}