static void *work(void *arg) { struct intel_batchbuffer *batch; render_copyfunc_t rendercopy = get_render_copyfunc(devid); drm_intel_context *context; drm_intel_bufmgr *bufmgr; int thread_id = *(int *)arg; int td_fd; int i; if (multiple_fds) td_fd = fd = drm_open_any(); else td_fd = fd; assert(td_fd >= 0); bufmgr = drm_intel_bufmgr_gem_init(td_fd, 4096); batch = intel_batchbuffer_alloc(bufmgr, devid); context = drm_intel_gem_context_create(bufmgr); if (!context) { returns[thread_id] = 77; goto out; } for (i = 0; i < iter; i++) { struct scratch_buf src, dst; init_buffer(bufmgr, &src, 4096); init_buffer(bufmgr, &dst, 4096); if (uncontexted) { assert(rendercopy); rendercopy(batch, &src, 0, 0, 0, 0, &dst, 0, 0); } else { int ret; ret = drm_intel_bo_subdata(batch->bo, 0, 4096, batch->buffer); assert(ret == 0); intel_batchbuffer_flush_with_context(batch, context); } } out: drm_intel_gem_context_destroy(context); intel_batchbuffer_free(batch); drm_intel_bufmgr_destroy(bufmgr); if (multiple_fds) close(td_fd); pthread_exit(&returns[thread_id]); }
static bool probe_winsys(struct intel_winsys *winsys) { struct intel_winsys_info *info = &winsys->info; int val; /* * When we need the Nth vertex from a user vertex buffer, and the vertex is * uploaded to, say, the beginning of a bo, we want the first vertex in the * bo to be fetched. One way to do this is to set the base address of the * vertex buffer to * * bo->offset64 + (vb->buffer_offset - vb->stride * N). * * The second term may be negative, and we need kernel support to do that. * * This check is taken from the classic driver. u_vbuf_upload_buffers() * guarantees the term is never negative, but it is good to require a * recent kernel. */ get_param(winsys, I915_PARAM_HAS_RELAXED_DELTA, &val); if (!val) { debug_error("kernel 2.6.39 required"); return false; } info->devid = drm_intel_bufmgr_gem_get_devid(winsys->bufmgr); if (drm_intel_get_aperture_sizes(winsys->fd, &info->aperture_mappable, &info->aperture_total)) { debug_error("failed to query aperture sizes"); return false; } get_param(winsys, I915_PARAM_HAS_LLC, &val); info->has_llc = val; info->has_address_swizzling = test_address_swizzling(winsys); winsys->first_gem_ctx = drm_intel_gem_context_create(winsys->bufmgr); info->has_logical_context = (winsys->first_gem_ctx != NULL); get_param(winsys, I915_PARAM_HAS_ALIASING_PPGTT, &val); info->has_ppgtt = val; /* test TIMESTAMP read */ info->has_timestamp = test_reg_read(winsys, 0x2358); get_param(winsys, I915_PARAM_HAS_GEN7_SOL_RESET, &val); info->has_gen7_sol_reset = val; return true; }
struct intel_context * intel_winsys_create_context(struct intel_winsys *winsys) { drm_intel_context *gem_ctx; /* try the preallocated context first */ pipe_mutex_lock(winsys->mutex); gem_ctx = winsys->first_gem_ctx; winsys->first_gem_ctx = NULL; pipe_mutex_unlock(winsys->mutex); if (!gem_ctx) gem_ctx = drm_intel_gem_context_create(winsys->bufmgr); return (struct intel_context *) gem_ctx; }
bool brwCreateContext(int api, const struct gl_config *mesaVis, __DRIcontext *driContextPriv, unsigned major_version, unsigned minor_version, uint32_t flags, unsigned *error, void *sharedContextPrivate) { __DRIscreen *sPriv = driContextPriv->driScreenPriv; struct intel_screen *screen = sPriv->driverPrivate; struct dd_function_table functions; struct brw_context *brw = rzalloc(NULL, struct brw_context); if (!brw) { printf("%s: failed to alloc context\n", __FUNCTION__); *error = __DRI_CTX_ERROR_NO_MEMORY; return false; } /* brwInitVtbl needs to know the chipset generation so that it can set the * right pointers. */ brw->gen = screen->gen; brwInitVtbl( brw ); brwInitDriverFunctions(screen, &functions); struct gl_context *ctx = &brw->ctx; if (!intelInitContext( brw, api, major_version, minor_version, mesaVis, driContextPriv, sharedContextPrivate, &functions, error)) { ralloc_free(brw); return false; } brw_initialize_context_constants(brw); /* Reinitialize the context point state. It depends on ctx->Const values. */ _mesa_init_point(ctx); if (brw->gen >= 6) { /* Create a new hardware context. Using a hardware context means that * our GPU state will be saved/restored on context switch, allowing us * to assume that the GPU is in the same state we left it in. * * This is required for transform feedback buffer offsets, query objects, * and also allows us to reduce how much state we have to emit. */ brw->hw_ctx = drm_intel_gem_context_create(brw->bufmgr); if (!brw->hw_ctx) { fprintf(stderr, "Gen6+ requires Kernel 3.6 or later.\n"); ralloc_free(brw); return false; } } brw_init_surface_formats(brw); /* Initialize swrast, tnl driver tables: */ TNLcontext *tnl = TNL_CONTEXT(ctx); if (tnl) tnl->Driver.RunPipeline = _tnl_run_pipeline; ctx->DriverFlags.NewTransformFeedback = BRW_NEW_TRANSFORM_FEEDBACK; ctx->DriverFlags.NewRasterizerDiscard = BRW_NEW_RASTERIZER_DISCARD; ctx->DriverFlags.NewUniformBuffer = BRW_NEW_UNIFORM_BUFFER; if (brw->is_g4x || brw->gen >= 5) { brw->CMD_VF_STATISTICS = GM45_3DSTATE_VF_STATISTICS; brw->CMD_PIPELINE_SELECT = CMD_PIPELINE_SELECT_GM45; brw->has_surface_tile_offset = true; if (brw->gen < 6) brw->has_compr4 = true; brw->has_aa_line_parameters = true; brw->has_pln = true; } else { brw->CMD_VF_STATISTICS = GEN4_3DSTATE_VF_STATISTICS; brw->CMD_PIPELINE_SELECT = CMD_PIPELINE_SELECT_965; } /* WM maximum threads is number of EUs times number of threads per EU. */ assert(brw->gen <= 7); if (brw->is_haswell) { if (brw->gt == 1) { brw->max_wm_threads = 102; brw->max_vs_threads = 70; brw->max_gs_threads = 70; brw->urb.size = 128; brw->urb.min_vs_entries = 32; brw->urb.max_vs_entries = 640; brw->urb.max_gs_entries = 256; } else if (brw->gt == 2) { brw->max_wm_threads = 204; brw->max_vs_threads = 280; brw->max_gs_threads = 256; brw->urb.size = 256; brw->urb.min_vs_entries = 64; brw->urb.max_vs_entries = 1664; brw->urb.max_gs_entries = 640; } else if (brw->gt == 3) { brw->max_wm_threads = 408; brw->max_vs_threads = 280; brw->max_gs_threads = 256; brw->urb.size = 512; brw->urb.min_vs_entries = 64; brw->urb.max_vs_entries = 1664; brw->urb.max_gs_entries = 640; } } else if (brw->gen == 7) { if (brw->gt == 1) { brw->max_wm_threads = 48; brw->max_vs_threads = 36; brw->max_gs_threads = 36; brw->urb.size = 128; brw->urb.min_vs_entries = 32; brw->urb.max_vs_entries = 512; brw->urb.max_gs_entries = 192; } else if (brw->gt == 2) { brw->max_wm_threads = 172; brw->max_vs_threads = 128; brw->max_gs_threads = 128; brw->urb.size = 256; brw->urb.min_vs_entries = 32; brw->urb.max_vs_entries = 704; brw->urb.max_gs_entries = 320; } else { assert(!"Unknown gen7 device."); } } else if (brw->gen == 6) { if (brw->gt == 2) { brw->max_wm_threads = 80; brw->max_vs_threads = 60; brw->max_gs_threads = 60; brw->urb.size = 64; /* volume 5c.5 section 5.1 */ brw->urb.min_vs_entries = 24; brw->urb.max_vs_entries = 256; /* volume 2a (see 3DSTATE_URB) */ brw->urb.max_gs_entries = 256; } else { brw->max_wm_threads = 40; brw->max_vs_threads = 24; brw->max_gs_threads = 21; /* conservative; 24 if rendering disabled */ brw->urb.size = 32; /* volume 5c.5 section 5.1 */ brw->urb.min_vs_entries = 24; brw->urb.max_vs_entries = 256; /* volume 2a (see 3DSTATE_URB) */ brw->urb.max_gs_entries = 256; } brw->urb.gen6_gs_previously_active = false; } else if (brw->gen == 5) { brw->urb.size = 1024; brw->max_vs_threads = 72; brw->max_gs_threads = 32; brw->max_wm_threads = 12 * 6; } else if (brw->is_g4x) { brw->urb.size = 384; brw->max_vs_threads = 32; brw->max_gs_threads = 2; brw->max_wm_threads = 10 * 5; } else if (brw->gen < 6) { brw->urb.size = 256; brw->max_vs_threads = 16; brw->max_gs_threads = 2; brw->max_wm_threads = 8 * 4; brw->has_negative_rhw_bug = true; } if (brw->gen <= 7) { brw->needs_unlit_centroid_workaround = true; } brw->prim_restart.in_progress = false; brw->prim_restart.enable_cut_index = false; brw_init_state( brw ); if (brw->gen < 6) { brw->curbe.last_buf = calloc(1, 4096); brw->curbe.next_buf = calloc(1, 4096); } brw->state.dirty.mesa = ~0; brw->state.dirty.brw = ~0; /* Make sure that brw->state.dirty.brw has enough bits to hold all possible * dirty flags. */ STATIC_ASSERT(BRW_NUM_STATE_BITS <= 8 * sizeof(brw->state.dirty.brw)); brw->emit_state_always = 0; brw->batch.need_workaround_flush = true; ctx->VertexProgram._MaintainTnlProgram = true; ctx->FragmentProgram._MaintainTexEnvProgram = true; brw_draw_init( brw ); brw->precompile = driQueryOptionb(&brw->optionCache, "shader_precompile"); ctx->Const.ContextFlags = 0; if ((flags & __DRI_CTX_FLAG_FORWARD_COMPATIBLE) != 0) ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_FORWARD_COMPATIBLE_BIT; ctx->Debug.DebugOutput = GL_FALSE; if ((flags & __DRI_CTX_FLAG_DEBUG) != 0) { ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_DEBUG_BIT; ctx->Debug.DebugOutput = GL_TRUE; /* Turn on some extra GL_ARB_debug_output generation. */ brw->perf_debug = true; } brw_fs_alloc_reg_sets(brw); brw_vec4_alloc_reg_set(brw); if (INTEL_DEBUG & DEBUG_SHADER_TIME) brw_init_shader_time(brw); _mesa_compute_version(ctx); _mesa_initialize_dispatch_tables(ctx); _mesa_initialize_vbo_vtxfmt(ctx); return true; }
GLboolean brwCreateContext(gl_api api, const struct gl_config *mesaVis, __DRIcontext *driContextPriv, unsigned major_version, unsigned minor_version, uint32_t flags, bool notify_reset, unsigned *dri_ctx_error, void *sharedContextPrivate) { __DRIscreen *sPriv = driContextPriv->driScreenPriv; struct gl_context *shareCtx = (struct gl_context *) sharedContextPrivate; struct intel_screen *screen = sPriv->driverPrivate; const struct brw_device_info *devinfo = screen->devinfo; struct dd_function_table functions; /* Only allow the __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS flag if the kernel * provides us with context reset notifications. */ uint32_t allowed_flags = __DRI_CTX_FLAG_DEBUG | __DRI_CTX_FLAG_FORWARD_COMPATIBLE; if (screen->has_context_reset_notification) allowed_flags |= __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS; if (flags & ~allowed_flags) { *dri_ctx_error = __DRI_CTX_ERROR_UNKNOWN_FLAG; return false; } struct brw_context *brw = rzalloc(NULL, struct brw_context); if (!brw) { fprintf(stderr, "%s: failed to alloc context\n", __FUNCTION__); *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY; return false; } driContextPriv->driverPrivate = brw; brw->driContext = driContextPriv; brw->intelScreen = screen; brw->bufmgr = screen->bufmgr; brw->gen = devinfo->gen; brw->gt = devinfo->gt; brw->is_g4x = devinfo->is_g4x; brw->is_baytrail = devinfo->is_baytrail; brw->is_haswell = devinfo->is_haswell; brw->has_llc = devinfo->has_llc; brw->has_hiz = devinfo->has_hiz_and_separate_stencil; brw->has_separate_stencil = devinfo->has_hiz_and_separate_stencil; brw->has_pln = devinfo->has_pln; brw->has_compr4 = devinfo->has_compr4; brw->has_surface_tile_offset = devinfo->has_surface_tile_offset; brw->has_negative_rhw_bug = devinfo->has_negative_rhw_bug; brw->needs_unlit_centroid_workaround = devinfo->needs_unlit_centroid_workaround; brw->must_use_separate_stencil = screen->hw_must_use_separate_stencil; brw->has_swizzling = screen->hw_has_swizzling; brw->vs.base.stage = MESA_SHADER_VERTEX; brw->gs.base.stage = MESA_SHADER_GEOMETRY; brw->wm.base.stage = MESA_SHADER_FRAGMENT; if (brw->gen >= 8) { gen8_init_vtable_surface_functions(brw); gen7_init_vtable_sampler_functions(brw); brw->vtbl.emit_depth_stencil_hiz = gen8_emit_depth_stencil_hiz; } else if (brw->gen >= 7) { gen7_init_vtable_surface_functions(brw); gen7_init_vtable_sampler_functions(brw); brw->vtbl.emit_depth_stencil_hiz = gen7_emit_depth_stencil_hiz; } else { gen4_init_vtable_surface_functions(brw); gen4_init_vtable_sampler_functions(brw); brw->vtbl.emit_depth_stencil_hiz = brw_emit_depth_stencil_hiz; } brw_init_driver_functions(brw, &functions); if (notify_reset) functions.GetGraphicsResetStatus = brw_get_graphics_reset_status; struct gl_context *ctx = &brw->ctx; if (!_mesa_initialize_context(ctx, api, mesaVis, shareCtx, &functions)) { *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY; fprintf(stderr, "%s: failed to init mesa context\n", __FUNCTION__); intelDestroyContext(driContextPriv); return false; } driContextSetFlags(ctx, flags); /* Initialize the software rasterizer and helper modules. * * As of GL 3.1 core, the gen4+ driver doesn't need the swrast context for * software fallbacks (which we have to support on legacy GL to do weird * glDrawPixels(), glBitmap(), and other functions). */ if (api != API_OPENGL_CORE && api != API_OPENGLES2) { _swrast_CreateContext(ctx); } _vbo_CreateContext(ctx); if (ctx->swrast_context) { _tnl_CreateContext(ctx); TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline; _swsetup_CreateContext(ctx); /* Configure swrast to match hardware characteristics: */ _swrast_allow_pixel_fog(ctx, false); _swrast_allow_vertex_fog(ctx, true); } _mesa_meta_init(ctx); brw_process_driconf_options(brw); brw_process_intel_debug_variable(brw); brw_initialize_context_constants(brw); ctx->Const.ResetStrategy = notify_reset ? GL_LOSE_CONTEXT_ON_RESET_ARB : GL_NO_RESET_NOTIFICATION_ARB; /* Reinitialize the context point state. It depends on ctx->Const values. */ _mesa_init_point(ctx); intel_fbo_init(brw); intel_batchbuffer_init(brw); if (brw->gen >= 6) { /* Create a new hardware context. Using a hardware context means that * our GPU state will be saved/restored on context switch, allowing us * to assume that the GPU is in the same state we left it in. * * This is required for transform feedback buffer offsets, query objects, * and also allows us to reduce how much state we have to emit. */ brw->hw_ctx = drm_intel_gem_context_create(brw->bufmgr); if (!brw->hw_ctx) { fprintf(stderr, "Gen6+ requires Kernel 3.6 or later.\n"); intelDestroyContext(driContextPriv); return false; } } brw_init_state(brw); intelInitExtensions(ctx); brw_init_surface_formats(brw); brw->max_vs_threads = devinfo->max_vs_threads; brw->max_gs_threads = devinfo->max_gs_threads; brw->max_wm_threads = devinfo->max_wm_threads; brw->urb.size = devinfo->urb.size; brw->urb.min_vs_entries = devinfo->urb.min_vs_entries; brw->urb.max_vs_entries = devinfo->urb.max_vs_entries; brw->urb.max_gs_entries = devinfo->urb.max_gs_entries; /* Estimate the size of the mappable aperture into the GTT. There's an * ioctl to get the whole GTT size, but not one to get the mappable subset. * It turns out it's basically always 256MB, though some ancient hardware * was smaller. */ uint32_t gtt_size = 256 * 1024 * 1024; /* We don't want to map two objects such that a memcpy between them would * just fault one mapping in and then the other over and over forever. So * we would need to divide the GTT size by 2. Additionally, some GTT is * taken up by things like the framebuffer and the ringbuffer and such, so * be more conservative. */ brw->max_gtt_map_object_size = gtt_size / 4; if (brw->gen == 6) brw->urb.gen6_gs_previously_active = false; brw->prim_restart.in_progress = false; brw->prim_restart.enable_cut_index = false; brw->gs.enabled = false; if (brw->gen < 6) { brw->curbe.last_buf = calloc(1, 4096); brw->curbe.next_buf = calloc(1, 4096); } ctx->VertexProgram._MaintainTnlProgram = true; ctx->FragmentProgram._MaintainTexEnvProgram = true; brw_draw_init( brw ); if ((flags & __DRI_CTX_FLAG_DEBUG) != 0) { /* Turn on some extra GL_ARB_debug_output generation. */ brw->perf_debug = true; } if ((flags & __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS) != 0) ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_ROBUST_ACCESS_BIT_ARB; if (INTEL_DEBUG & DEBUG_SHADER_TIME) brw_init_shader_time(brw); _mesa_compute_version(ctx); _mesa_initialize_dispatch_tables(ctx); _mesa_initialize_vbo_vtxfmt(ctx); if (ctx->Extensions.AMD_performance_monitor) { brw_init_performance_monitors(brw); } vbo_use_buffer_objects(ctx); vbo_always_unmap_buffers(ctx); return true; }