static int nouveau_codegen(int chipset, int type, struct tgsi_token tokens[], unsigned *size, unsigned **code) { struct nv50_ir_prog_info info = {0}; int ret; info.type = type; info.target = chipset; info.bin.sourceRep = NV50_PROGRAM_IR_TGSI; info.bin.source = tokens; info.io.ucpCBSlot = 15; info.io.ucpBase = NV50_CB_AUX_UCP_OFFSET; info.io.resInfoCBSlot = 15; info.io.suInfoBase = NV50_CB_AUX_TEX_MS_OFFSET; info.io.msInfoCBSlot = 15; info.io.msInfoBase = NV50_CB_AUX_MS_OFFSET; info.assignSlots = dummy_assign_slots; info.optLevel = debug_get_num_option("NV50_PROG_OPTIMIZE", 3); info.dbgFlags = debug_get_num_option("NV50_PROG_DEBUG", 0); ret = nv50_ir_generate_code(&info); if (ret) { _debug_printf("Error compiling program: %d\n", ret); return ret; } *size = info.bin.codeSize; *code = info.bin.code; return 0; }
/** * Create a new pipe_screen object * Note: we're not presently subclassing pipe_screen (no llvmpipe_screen). */ struct pipe_screen * llvmpipe_create_screen(struct sw_winsys *winsys) { struct llvmpipe_screen *screen; util_cpu_detect(); #ifdef DEBUG LP_DEBUG = debug_get_flags_option("LP_DEBUG", lp_debug_flags, 0 ); #endif LP_PERF = debug_get_flags_option("LP_PERF", lp_perf_flags, 0 ); screen = CALLOC_STRUCT(llvmpipe_screen); if (!screen) return NULL; if (!lp_jit_screen_init(screen)) { FREE(screen); return NULL; } screen->winsys = winsys; screen->base.destroy = llvmpipe_destroy_screen; screen->base.get_name = llvmpipe_get_name; screen->base.get_vendor = llvmpipe_get_vendor; screen->base.get_device_vendor = llvmpipe_get_vendor; // TODO should be the CPU vendor screen->base.get_param = llvmpipe_get_param; screen->base.get_shader_param = llvmpipe_get_shader_param; screen->base.get_paramf = llvmpipe_get_paramf; screen->base.is_format_supported = llvmpipe_is_format_supported; screen->base.context_create = llvmpipe_create_context; screen->base.flush_frontbuffer = llvmpipe_flush_frontbuffer; screen->base.fence_reference = llvmpipe_fence_reference; screen->base.fence_finish = llvmpipe_fence_finish; screen->base.get_timestamp = llvmpipe_get_timestamp; llvmpipe_init_screen_resource_funcs(&screen->base); screen->num_threads = util_cpu_caps.nr_cpus > 1 ? util_cpu_caps.nr_cpus : 0; #ifdef PIPE_SUBSYSTEM_EMBEDDED screen->num_threads = 0; #endif screen->num_threads = debug_get_num_option("LP_NUM_THREADS", screen->num_threads); screen->num_threads = MIN2(screen->num_threads, LP_MAX_THREADS); screen->rast = lp_rast_create(screen->num_threads); if (!screen->rast) { lp_jit_screen_cleanup(screen); FREE(screen); return NULL; } (void) mtx_init(&screen->rast_mutex, mtx_plain); return &screen->base; }
VdpStatus vlVdpPresentationQueueDisplay(VdpPresentationQueue presentation_queue, VdpOutputSurface surface, uint32_t clip_width, uint32_t clip_height, VdpTime earliest_presentation_time) { static int dump_window = -1; vlVdpPresentationQueue *pq; vlVdpOutputSurface *surf; struct pipe_surface *drawable_surface; pq = vlGetDataHTAB(presentation_queue); if (!pq) return VDP_STATUS_INVALID_HANDLE; drawable_surface = vl_drawable_surface_get(pq->device->context, pq->drawable); if (!drawable_surface) return VDP_STATUS_INVALID_HANDLE; surf = vlGetDataHTAB(surface); if (!surf) return VDP_STATUS_INVALID_HANDLE; vl_compositor_clear_layers(&pq->compositor); vl_compositor_set_rgba_layer(&pq->compositor, 0, surf->sampler_view, NULL, NULL); vl_compositor_render(&pq->compositor, PIPE_MPEG12_PICTURE_TYPE_FRAME, drawable_surface, NULL, NULL); pq->device->context->pipe->screen->flush_frontbuffer ( pq->device->context->pipe->screen, drawable_surface->texture, 0, 0, vl_contextprivate_get(pq->device->context, drawable_surface) ); if(dump_window == -1) { dump_window = debug_get_num_option("VDPAU_DUMP", 0); } if(dump_window) { static unsigned int framenum = 0; char cmd[256]; sprintf(cmd, "xwd -id %d -out vdpau_frame_%08d.xwd", (int)pq->drawable, ++framenum); if (system(cmd) != 0) VDPAU_MSG(VDPAU_TRACE, "[VDPAU] Dumping surface %d failed.\n", surface); } pipe_surface_reference(&drawable_surface, NULL); return VDP_STATUS_OK; }
void lp_build_init(void) { if (gallivm_initialized) return; #ifdef DEBUG gallivm_debug = debug_get_option_gallivm_debug(); #endif lp_set_target_options(); #if USE_MCJIT LLVMLinkInMCJIT(); #else LLVMLinkInJIT(); #endif util_cpu_detect(); /* AMD Bulldozer AVX's throughput is the same as SSE2; and because using * 8-wide vector needs more floating ops than 4-wide (due to padding), it is * actually more efficient to use 4-wide vectors on this processor. * * See also: * - http://www.anandtech.com/show/4955/the-bulldozer-review-amd-fx8150-tested/2 */ if (HAVE_AVX && util_cpu_caps.has_avx && util_cpu_caps.has_intel) { lp_native_vector_width = 256; } else { /* Leave it at 128, even when no SIMD extensions are available. * Really needs to be a multiple of 128 so can fit 4 floats. */ lp_native_vector_width = 128; } lp_native_vector_width = debug_get_num_option("LP_NATIVE_VECTOR_WIDTH", lp_native_vector_width); gallivm_initialized = TRUE; #if 0 /* For simulating less capable machines */ util_cpu_caps.has_sse3 = 0; util_cpu_caps.has_ssse3 = 0; util_cpu_caps.has_sse4_1 = 0; #endif }
void lp_build_init(void) { if (gallivm_initialized) return; #ifdef DEBUG gallivm_debug = debug_get_option_gallivm_debug(); #endif lp_set_target_options(); #if USE_MCJIT LLVMLinkInMCJIT(); #else LLVMLinkInJIT(); #endif util_cpu_detect(); if (HAVE_AVX && util_cpu_caps.has_avx) { lp_native_vector_width = 256; } else { /* Leave it at 128, even when no SIMD extensions are available. * Really needs to be a multiple of 128 so can fit 4 floats. */ lp_native_vector_width = 128; } lp_native_vector_width = debug_get_num_option("LP_NATIVE_VECTOR_WIDTH", lp_native_vector_width); gallivm_initialized = TRUE; #if 0 /* For simulating less capable machines */ util_cpu_caps.has_sse3 = 0; util_cpu_caps.has_ssse3 = 0; util_cpu_caps.has_sse4_1 = 0; #endif }
void rc_pair_schedule(struct radeon_compiler *cc, void *user) { struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc; struct schedule_state s; struct rc_instruction * inst = c->Base.Program.Instructions.Next; unsigned int * opt = user; memset(&s, 0, sizeof(s)); s.Opt = *opt; s.C = &c->Base; if (s.C->is_r500) { s.CalcScore = calc_score_readers; } else { s.CalcScore = calc_score_r300; } s.max_tex_group = debug_get_num_option("RADEON_TEX_GROUP", 8); while(inst != &c->Base.Program.Instructions) { struct rc_instruction * first; if (is_controlflow(inst)) { inst = inst->Next; continue; } first = inst; while(inst != &c->Base.Program.Instructions && !is_controlflow(inst)) inst = inst->Next; DBG("Schedule one block\n"); memset(s.Temporary, 0, sizeof(s.Temporary)); s.TEXCount = 0; schedule_block(&s, first, inst); if (s.PendingTEX) { s.PrevBlockHasTex = 1; } } }
PUBLIC Status XvMCPutSurface(Display *dpy, XvMCSurface *surface, Drawable drawable, short srcx, short srcy, unsigned short srcw, unsigned short srch, short destx, short desty, unsigned short destw, unsigned short desth, int flags) { static int dump_window = -1; struct pipe_context *pipe; struct vl_compositor *compositor; struct vl_compositor_state *cstate; struct vl_screen *vscreen; XvMCSurfacePrivate *surface_priv; XvMCContextPrivate *context_priv; XvMCSubpicturePrivate *subpicture_priv; XvMCContext *context; struct u_rect src_rect = {srcx, srcx + srcw, srcy, srcy + srch}; struct u_rect dst_rect = {destx, destx + destw, desty, desty + desth}; struct pipe_resource *tex; struct pipe_surface surf_templ, *surf; struct u_rect *dirty_area; XVMC_MSG(XVMC_TRACE, "[XvMC] Displaying surface %p.\n", surface); assert(dpy); if (!surface || !surface->privData) return XvMCBadSurface; surface_priv = surface->privData; context = surface_priv->context; context_priv = context->privData; assert(flags == XVMC_TOP_FIELD || flags == XVMC_BOTTOM_FIELD || flags == XVMC_FRAME_PICTURE); assert(srcx + srcw - 1 < surface->width); assert(srcy + srch - 1 < surface->height); subpicture_priv = surface_priv->subpicture ? surface_priv->subpicture->privData : NULL; pipe = context_priv->pipe; compositor = &context_priv->compositor; cstate = &context_priv->cstate; vscreen = context_priv->vscreen; tex = vscreen->texture_from_drawable(vscreen, (void *)drawable); dirty_area = vscreen->get_dirty_area(vscreen); memset(&surf_templ, 0, sizeof(surf_templ)); surf_templ.format = tex->format; surf = pipe->create_surface(pipe, tex, &surf_templ); if (!surf) return BadDrawable; /* * Some apps (mplayer) hit these asserts because they call * this function after the window has been resized by the WM * but before they've handled the corresponding XEvent and * know about the new dimensions. The output should be clipped * until the app updates destw and desth. */ /* assert(destx + destw - 1 < drawable_surface->width); assert(desty + desth - 1 < drawable_surface->height); */ RecursiveEndFrame(surface_priv); context_priv->decoder->flush(context_priv->decoder); vl_compositor_clear_layers(cstate); vl_compositor_set_buffer_layer(cstate, compositor, 0, surface_priv->video_buffer, &src_rect, NULL, VL_COMPOSITOR_WEAVE); if (subpicture_priv) { XVMC_MSG(XVMC_TRACE, "[XvMC] Surface %p has subpicture %p.\n", surface, surface_priv->subpicture); assert(subpicture_priv->surface == surface); if (subpicture_priv->palette) vl_compositor_set_palette_layer(cstate, compositor, 1, subpicture_priv->sampler, subpicture_priv->palette, &subpicture_priv->src_rect, &subpicture_priv->dst_rect, true); else vl_compositor_set_rgba_layer(cstate, compositor, 1, subpicture_priv->sampler, &subpicture_priv->src_rect, &subpicture_priv->dst_rect, NULL); surface_priv->subpicture = NULL; subpicture_priv->surface = NULL; } // Workaround for r600g, there seems to be a bug in the fence refcounting code pipe->screen->fence_reference(pipe->screen, &surface_priv->fence, NULL); vl_compositor_set_layer_dst_area(cstate, 0, &dst_rect); vl_compositor_set_layer_dst_area(cstate, 1, &dst_rect); vl_compositor_render(cstate, compositor, surf, dirty_area, true); pipe->flush(pipe, &surface_priv->fence, 0); XVMC_MSG(XVMC_TRACE, "[XvMC] Submitted surface %p for display. Pushing to front buffer.\n", surface); pipe->screen->flush_frontbuffer(pipe->screen, tex, 0, 0, vscreen->get_private(vscreen), NULL); if(dump_window == -1) { dump_window = debug_get_num_option("XVMC_DUMP", 0); } if(dump_window) { static unsigned int framenum = 0; char cmd[256]; sprintf(cmd, "xwd -id %d -out xvmc_frame_%08d.xwd", (int)drawable, ++framenum); if (system(cmd) != 0) XVMC_MSG(XVMC_ERR, "[XvMC] Dumping surface %p failed.\n", surface); } XVMC_MSG(XVMC_TRACE, "[XvMC] Pushed surface %p to front buffer.\n", surface); return Success; }
struct pipe_screen * ddebug_screen_create(struct pipe_screen *screen) { struct dd_screen *dscreen; const char *option = debug_get_option("GALLIUM_DDEBUG", NULL); bool dump_always = option && !strcmp(option, "always"); bool no_flush = option && strstr(option, "noflush"); bool help = option && !strcmp(option, "help"); unsigned timeout = 0; if (help) { puts("Gallium driver debugger"); puts(""); puts("Usage:"); puts(""); puts(" GALLIUM_DDEBUG=always"); puts(" Dump context and driver information after every draw call into"); puts(" $HOME/"DD_DIR"/."); puts(""); puts(" GALLIUM_DDEBUG=[timeout in ms] noflush"); puts(" Flush and detect a device hang after every draw call based on the given"); puts(" fence timeout and dump context and driver information into"); puts(" $HOME/"DD_DIR"/ when a hang is detected."); puts(" If 'noflush' is specified, only detect hangs in pipe->flush."); puts(""); puts(" GALLIUM_DDEBUG_SKIP=[count]"); puts(" Skip flush and hang detection for the given initial number of draw calls."); puts(""); exit(0); } if (!option) return screen; if (!dump_always && sscanf(option, "%u", &timeout) != 1) return screen; dscreen = CALLOC_STRUCT(dd_screen); if (!dscreen) return NULL; #define SCR_INIT(_member) \ dscreen->base._member = screen->_member ? dd_screen_##_member : NULL dscreen->base.destroy = dd_screen_destroy; dscreen->base.get_name = dd_screen_get_name; dscreen->base.get_vendor = dd_screen_get_vendor; dscreen->base.get_device_vendor = dd_screen_get_device_vendor; dscreen->base.get_param = dd_screen_get_param; dscreen->base.get_paramf = dd_screen_get_paramf; dscreen->base.get_shader_param = dd_screen_get_shader_param; /* get_video_param */ /* get_compute_param */ SCR_INIT(get_timestamp); dscreen->base.context_create = dd_screen_context_create; dscreen->base.is_format_supported = dd_screen_is_format_supported; /* is_video_format_supported */ SCR_INIT(can_create_resource); dscreen->base.resource_create = dd_screen_resource_create; dscreen->base.resource_from_handle = dd_screen_resource_from_handle; SCR_INIT(resource_from_user_memory); dscreen->base.resource_get_handle = dd_screen_resource_get_handle; dscreen->base.resource_destroy = dd_screen_resource_destroy; SCR_INIT(flush_frontbuffer); SCR_INIT(fence_reference); SCR_INIT(fence_finish); SCR_INIT(get_driver_query_info); SCR_INIT(get_driver_query_group_info); #undef SCR_INIT dscreen->screen = screen; dscreen->timeout_ms = timeout; dscreen->mode = dump_always ? DD_DUMP_ALL_CALLS : DD_DETECT_HANGS; dscreen->no_flush = no_flush; switch (dscreen->mode) { case DD_DUMP_ALL_CALLS: fprintf(stderr, "Gallium debugger active. Logging all calls.\n"); break; case DD_DETECT_HANGS: fprintf(stderr, "Gallium debugger active. " "The hang detection timout is %i ms.\n", timeout); break; default: assert(0); } dscreen->skip_count = debug_get_num_option("GALLIUM_DDEBUG_SKIP", 0); if (dscreen->skip_count > 0) { fprintf(stderr, "Gallium debugger skipping the first %u draw calls.\n", dscreen->skip_count); } return &dscreen->base; }
struct pipe_screen * ddebug_screen_create(struct pipe_screen *screen) { struct dd_screen *dscreen; const char *option; bool flush = false; bool verbose = false; bool transfers = false; unsigned timeout = 1000; unsigned apitrace_dump_call = 0; enum dd_dump_mode mode = DD_DUMP_ONLY_HANGS; option = debug_get_option("GALLIUM_DDEBUG", NULL); if (!option) return screen; if (!strcmp(option, "help")) { puts("Gallium driver debugger"); puts(""); puts("Usage:"); puts(""); puts(" GALLIUM_DDEBUG=\"[<timeout in ms>] [(always|apitrace <call#)] [flush] [transfers] [verbose]\""); puts(" GALLIUM_DDEBUG_SKIP=[count]"); puts(""); puts("Dump context and driver information of draw calls into"); puts("$HOME/"DD_DIR"/. By default, watch for GPU hangs and only dump information"); puts("about draw calls related to the hang."); puts(""); puts("<timeout in ms>"); puts(" Change the default timeout for GPU hang detection (default=1000ms)."); puts(" Setting this to 0 will disable GPU hang detection entirely."); puts(""); puts("always"); puts(" Dump information about all draw calls."); puts(""); puts("transfers"); puts(" Also dump and do hang detection on transfers."); puts(""); puts("apitrace <call#>"); puts(" Dump information about the draw call corresponding to the given"); puts(" apitrace call number and exit."); puts(""); puts("flush"); puts(" Flush after every draw call."); puts(""); puts("verbose"); puts(" Write additional information to stderr."); puts(""); puts("GALLIUM_DDEBUG_SKIP=count"); puts(" Skip dumping on the first count draw calls (only relevant with 'always')."); puts(""); exit(0); } for (;;) { skip_space(&option); if (!*option) break; if (match_word(&option, "always")) { if (mode == DD_DUMP_APITRACE_CALL) { printf("ddebug: both 'always' and 'apitrace' specified\n"); exit(1); } mode = DD_DUMP_ALL_CALLS; } else if (match_word(&option, "flush")) { flush = true; } else if (match_word(&option, "transfers")) { transfers = true; } else if (match_word(&option, "verbose")) { verbose = true; } else if (match_word(&option, "apitrace")) { if (mode != DD_DUMP_ONLY_HANGS) { printf("ddebug: 'apitrace' can only appear once and not mixed with 'always'\n"); exit(1); } if (!match_uint(&option, &apitrace_dump_call)) { printf("ddebug: expected call number after 'apitrace'\n"); exit(1); } mode = DD_DUMP_APITRACE_CALL; } else if (match_uint(&option, &timeout)) { /* no-op */ } else { printf("ddebug: bad options: %s\n", option); exit(1); } } dscreen = CALLOC_STRUCT(dd_screen); if (!dscreen) return NULL; #define SCR_INIT(_member) \ dscreen->base._member = screen->_member ? dd_screen_##_member : NULL dscreen->base.destroy = dd_screen_destroy; dscreen->base.get_name = dd_screen_get_name; dscreen->base.get_vendor = dd_screen_get_vendor; dscreen->base.get_device_vendor = dd_screen_get_device_vendor; SCR_INIT(get_disk_shader_cache); dscreen->base.get_param = dd_screen_get_param; dscreen->base.get_paramf = dd_screen_get_paramf; dscreen->base.get_compute_param = dd_screen_get_compute_param; dscreen->base.get_shader_param = dd_screen_get_shader_param; dscreen->base.query_memory_info = dd_screen_query_memory_info; /* get_video_param */ /* get_compute_param */ SCR_INIT(get_timestamp); dscreen->base.context_create = dd_screen_context_create; dscreen->base.is_format_supported = dd_screen_is_format_supported; /* is_video_format_supported */ SCR_INIT(can_create_resource); dscreen->base.resource_create = dd_screen_resource_create; dscreen->base.resource_from_handle = dd_screen_resource_from_handle; SCR_INIT(resource_from_memobj); SCR_INIT(resource_from_user_memory); SCR_INIT(check_resource_capability); dscreen->base.resource_get_handle = dd_screen_resource_get_handle; SCR_INIT(resource_changed); dscreen->base.resource_destroy = dd_screen_resource_destroy; SCR_INIT(flush_frontbuffer); SCR_INIT(fence_reference); SCR_INIT(fence_finish); SCR_INIT(memobj_create_from_handle); SCR_INIT(memobj_destroy); SCR_INIT(get_driver_query_info); SCR_INIT(get_driver_query_group_info); SCR_INIT(get_compiler_options); SCR_INIT(get_driver_uuid); SCR_INIT(get_device_uuid); #undef SCR_INIT dscreen->screen = screen; dscreen->timeout_ms = timeout; dscreen->dump_mode = mode; dscreen->flush_always = flush; dscreen->transfers = transfers; dscreen->verbose = verbose; dscreen->apitrace_dump_call = apitrace_dump_call; switch (dscreen->dump_mode) { case DD_DUMP_ALL_CALLS: fprintf(stderr, "Gallium debugger active. Logging all calls.\n"); break; case DD_DUMP_APITRACE_CALL: fprintf(stderr, "Gallium debugger active. Going to dump an apitrace call.\n"); break; default: fprintf(stderr, "Gallium debugger active.\n"); break; } if (dscreen->timeout_ms > 0) fprintf(stderr, "Hang detection timeout is %ums.\n", dscreen->timeout_ms); else fprintf(stderr, "Hang detection is disabled.\n"); dscreen->skip_count = debug_get_num_option("GALLIUM_DDEBUG_SKIP", 0); if (dscreen->skip_count > 0) { fprintf(stderr, "Gallium debugger skipping the first %u draw calls.\n", dscreen->skip_count); } return &dscreen->base; }
/** * Create a new pipe_screen object * Note: we're not presently subclassing pipe_screen (no llvmpipe_screen). */ struct pipe_screen * llvmpipe_create_screen(struct sw_winsys *winsys) { struct llvmpipe_screen *screen; util_cpu_detect(); #if defined(PIPE_ARCH_X86) && HAVE_LLVM < 0x0302 /* require SSE2 due to LLVM PR6960. */ if (!util_cpu_caps.has_sse2) return NULL; #endif #ifdef DEBUG LP_DEBUG = debug_get_flags_option("LP_DEBUG", lp_debug_flags, 0 ); #endif LP_PERF = debug_get_flags_option("LP_PERF", lp_perf_flags, 0 ); screen = CALLOC_STRUCT(llvmpipe_screen); if (!screen) return NULL; screen->winsys = winsys; screen->base.destroy = llvmpipe_destroy_screen; screen->base.get_name = llvmpipe_get_name; screen->base.get_vendor = llvmpipe_get_vendor; screen->base.get_param = llvmpipe_get_param; screen->base.get_shader_param = llvmpipe_get_shader_param; screen->base.get_paramf = llvmpipe_get_paramf; screen->base.is_format_supported = llvmpipe_is_format_supported; screen->base.context_create = llvmpipe_create_context; screen->base.flush_frontbuffer = llvmpipe_flush_frontbuffer; screen->base.fence_reference = llvmpipe_fence_reference; screen->base.fence_signalled = llvmpipe_fence_signalled; screen->base.fence_finish = llvmpipe_fence_finish; screen->base.get_timestamp = llvmpipe_get_timestamp; llvmpipe_init_screen_resource_funcs(&screen->base); lp_jit_screen_init(screen); screen->num_threads = util_cpu_caps.nr_cpus > 1 ? util_cpu_caps.nr_cpus : 0; #ifdef PIPE_SUBSYSTEM_EMBEDDED screen->num_threads = 0; #endif screen->num_threads = debug_get_num_option("LP_NUM_THREADS", screen->num_threads); screen->num_threads = MIN2(screen->num_threads, LP_MAX_THREADS); screen->rast = lp_rast_create(screen->num_threads); if (!screen->rast) { lp_jit_screen_cleanup(screen); FREE(screen); return NULL; } pipe_mutex_init(screen->rast_mutex); util_format_s3tc_init(); return &screen->base; }
struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws, const struct pipe_screen_config *config) { struct si_screen *sscreen = CALLOC_STRUCT(si_screen); unsigned hw_threads, num_comp_hi_threads, num_comp_lo_threads, i; if (!sscreen) { return NULL; } sscreen->ws = ws; ws->query_info(ws, &sscreen->info); if (sscreen->info.chip_class >= GFX9) { sscreen->se_tile_repeat = 32 * sscreen->info.max_se; } else { ac_get_raster_config(&sscreen->info, &sscreen->pa_sc_raster_config, &sscreen->pa_sc_raster_config_1, &sscreen->se_tile_repeat); } sscreen->debug_flags = debug_get_flags_option("R600_DEBUG", debug_options, 0); sscreen->debug_flags |= debug_get_flags_option("AMD_DEBUG", debug_options, 0); /* Set functions first. */ sscreen->b.context_create = si_pipe_create_context; sscreen->b.destroy = si_destroy_screen; sscreen->b.set_max_shader_compiler_threads = si_set_max_shader_compiler_threads; sscreen->b.is_parallel_shader_compilation_finished = si_is_parallel_shader_compilation_finished; si_init_screen_get_functions(sscreen); si_init_screen_buffer_functions(sscreen); si_init_screen_fence_functions(sscreen); si_init_screen_state_functions(sscreen); si_init_screen_texture_functions(sscreen); si_init_screen_query_functions(sscreen); /* Set these flags in debug_flags early, so that the shader cache takes * them into account. */ if (driQueryOptionb(config->options, "glsl_correct_derivatives_after_discard")) sscreen->debug_flags |= DBG(FS_CORRECT_DERIVS_AFTER_KILL); if (driQueryOptionb(config->options, "radeonsi_enable_sisched")) sscreen->debug_flags |= DBG(SI_SCHED); if (sscreen->debug_flags & DBG(INFO)) ac_print_gpu_info(&sscreen->info); slab_create_parent(&sscreen->pool_transfers, sizeof(struct si_transfer), 64); sscreen->force_aniso = MIN2(16, debug_get_num_option("R600_TEX_ANISO", -1)); if (sscreen->force_aniso >= 0) { printf("radeonsi: Forcing anisotropy filter to %ix\n", /* round down to a power of two */ 1 << util_logbase2(sscreen->force_aniso)); } (void) mtx_init(&sscreen->aux_context_lock, mtx_plain); (void) mtx_init(&sscreen->gpu_load_mutex, mtx_plain); si_init_gs_info(sscreen); if (!si_init_shader_cache(sscreen)) { FREE(sscreen); return NULL; } si_disk_cache_create(sscreen); /* Determine the number of shader compiler threads. */ hw_threads = sysconf(_SC_NPROCESSORS_ONLN); if (hw_threads >= 12) { num_comp_hi_threads = hw_threads * 3 / 4; num_comp_lo_threads = hw_threads / 3; } else if (hw_threads >= 6) { num_comp_hi_threads = hw_threads - 2; num_comp_lo_threads = hw_threads / 2; } else if (hw_threads >= 2) { num_comp_hi_threads = hw_threads - 1; num_comp_lo_threads = hw_threads / 2; } else { num_comp_hi_threads = 1; num_comp_lo_threads = 1; } num_comp_hi_threads = MIN2(num_comp_hi_threads, ARRAY_SIZE(sscreen->compiler)); num_comp_lo_threads = MIN2(num_comp_lo_threads, ARRAY_SIZE(sscreen->compiler_lowp)); if (!util_queue_init(&sscreen->shader_compiler_queue, "sh", 64, num_comp_hi_threads, UTIL_QUEUE_INIT_RESIZE_IF_FULL | UTIL_QUEUE_INIT_SET_FULL_THREAD_AFFINITY)) { si_destroy_shader_cache(sscreen); FREE(sscreen); return NULL; } if (!util_queue_init(&sscreen->shader_compiler_queue_low_priority, "shlo", 64, num_comp_lo_threads, UTIL_QUEUE_INIT_RESIZE_IF_FULL | UTIL_QUEUE_INIT_SET_FULL_THREAD_AFFINITY | UTIL_QUEUE_INIT_USE_MINIMUM_PRIORITY)) { si_destroy_shader_cache(sscreen); FREE(sscreen); return NULL; } if (!debug_get_bool_option("RADEON_DISABLE_PERFCOUNTERS", false)) si_init_perfcounters(sscreen); /* Determine tessellation ring info. */ bool double_offchip_buffers = sscreen->info.chip_class >= CIK && sscreen->info.family != CHIP_CARRIZO && sscreen->info.family != CHIP_STONEY; /* This must be one less than the maximum number due to a hw limitation. * Various hardware bugs in SI, CIK, and GFX9 need this. */ unsigned max_offchip_buffers_per_se; /* Only certain chips can use the maximum value. */ if (sscreen->info.family == CHIP_VEGA12 || sscreen->info.family == CHIP_VEGA20) max_offchip_buffers_per_se = double_offchip_buffers ? 128 : 64; else max_offchip_buffers_per_se = double_offchip_buffers ? 127 : 63; unsigned max_offchip_buffers = max_offchip_buffers_per_se * sscreen->info.max_se; unsigned offchip_granularity; /* Hawaii has a bug with offchip buffers > 256 that can be worked * around by setting 4K granularity. */ if (sscreen->info.family == CHIP_HAWAII) { sscreen->tess_offchip_block_dw_size = 4096; offchip_granularity = V_03093C_X_4K_DWORDS; } else { sscreen->tess_offchip_block_dw_size = 8192; offchip_granularity = V_03093C_X_8K_DWORDS; } sscreen->tess_factor_ring_size = 32768 * sscreen->info.max_se; assert(((sscreen->tess_factor_ring_size / 4) & C_030938_SIZE) == 0); sscreen->tess_offchip_ring_size = max_offchip_buffers * sscreen->tess_offchip_block_dw_size * 4; if (sscreen->info.chip_class >= CIK) { if (sscreen->info.chip_class >= VI) --max_offchip_buffers; sscreen->vgt_hs_offchip_param = S_03093C_OFFCHIP_BUFFERING(max_offchip_buffers) | S_03093C_OFFCHIP_GRANULARITY(offchip_granularity); } else { assert(offchip_granularity == V_03093C_X_8K_DWORDS); sscreen->vgt_hs_offchip_param = S_0089B0_OFFCHIP_BUFFERING(max_offchip_buffers); } /* The mere presense of CLEAR_STATE in the IB causes random GPU hangs * on SI. Some CLEAR_STATE cause asic hang on radeon kernel, etc. * SPI_VS_OUT_CONFIG. So only enable CI CLEAR_STATE on amdgpu kernel.*/ sscreen->has_clear_state = sscreen->info.chip_class >= CIK && sscreen->info.drm_major == 3; sscreen->has_distributed_tess = sscreen->info.chip_class >= VI && sscreen->info.max_se >= 2; sscreen->has_draw_indirect_multi = (sscreen->info.family >= CHIP_POLARIS10) || (sscreen->info.chip_class == VI && sscreen->info.pfp_fw_version >= 121 && sscreen->info.me_fw_version >= 87) || (sscreen->info.chip_class == CIK && sscreen->info.pfp_fw_version >= 211 && sscreen->info.me_fw_version >= 173) || (sscreen->info.chip_class == SI && sscreen->info.pfp_fw_version >= 79 && sscreen->info.me_fw_version >= 142); sscreen->has_out_of_order_rast = sscreen->info.chip_class >= VI && sscreen->info.max_se >= 2 && !(sscreen->debug_flags & DBG(NO_OUT_OF_ORDER)); sscreen->assume_no_z_fights = driQueryOptionb(config->options, "radeonsi_assume_no_z_fights"); sscreen->commutative_blend_add = driQueryOptionb(config->options, "radeonsi_commutative_blend_add"); { #define OPT_BOOL(name, dflt, description) \ sscreen->options.name = \ driQueryOptionb(config->options, "radeonsi_"#name); #include "si_debug_options.h" } sscreen->has_gfx9_scissor_bug = sscreen->info.family == CHIP_VEGA10 || sscreen->info.family == CHIP_RAVEN; sscreen->has_msaa_sample_loc_bug = (sscreen->info.family >= CHIP_POLARIS10 && sscreen->info.family <= CHIP_POLARIS12) || sscreen->info.family == CHIP_VEGA10 || sscreen->info.family == CHIP_RAVEN; sscreen->has_ls_vgpr_init_bug = sscreen->info.family == CHIP_VEGA10 || sscreen->info.family == CHIP_RAVEN; sscreen->has_dcc_constant_encode = sscreen->info.family == CHIP_RAVEN2; /* Only enable primitive binning on APUs by default. */ sscreen->dpbb_allowed = sscreen->info.family == CHIP_RAVEN || sscreen->info.family == CHIP_RAVEN2; sscreen->dfsm_allowed = sscreen->info.family == CHIP_RAVEN || sscreen->info.family == CHIP_RAVEN2; /* Process DPBB enable flags. */ if (sscreen->debug_flags & DBG(DPBB)) { sscreen->dpbb_allowed = true; if (sscreen->debug_flags & DBG(DFSM)) sscreen->dfsm_allowed = true; } /* Process DPBB disable flags. */ if (sscreen->debug_flags & DBG(NO_DPBB)) { sscreen->dpbb_allowed = false; sscreen->dfsm_allowed = false; } else if (sscreen->debug_flags & DBG(NO_DFSM)) { sscreen->dfsm_allowed = false; } /* While it would be nice not to have this flag, we are constrained * by the reality that LLVM 5.0 doesn't have working VGPR indexing * on GFX9. */ sscreen->llvm_has_working_vgpr_indexing = sscreen->info.chip_class <= VI; /* Some chips have RB+ registers, but don't support RB+. Those must * always disable it. */ if (sscreen->info.family == CHIP_STONEY || sscreen->info.chip_class >= GFX9) { sscreen->has_rbplus = true; sscreen->rbplus_allowed = !(sscreen->debug_flags & DBG(NO_RB_PLUS)) && (sscreen->info.family == CHIP_STONEY || sscreen->info.family == CHIP_VEGA12 || sscreen->info.family == CHIP_RAVEN || sscreen->info.family == CHIP_RAVEN2); } sscreen->dcc_msaa_allowed = !(sscreen->debug_flags & DBG(NO_DCC_MSAA)); sscreen->cpdma_prefetch_writes_memory = sscreen->info.chip_class <= VI; (void) mtx_init(&sscreen->shader_parts_mutex, mtx_plain); sscreen->use_monolithic_shaders = (sscreen->debug_flags & DBG(MONOLITHIC_SHADERS)) != 0; sscreen->barrier_flags.cp_to_L2 = SI_CONTEXT_INV_SMEM_L1 | SI_CONTEXT_INV_VMEM_L1; if (sscreen->info.chip_class <= VI) { sscreen->barrier_flags.cp_to_L2 |= SI_CONTEXT_INV_GLOBAL_L2; sscreen->barrier_flags.L2_to_cp |= SI_CONTEXT_WRITEBACK_GLOBAL_L2; } if (debug_get_bool_option("RADEON_DUMP_SHADERS", false)) sscreen->debug_flags |= DBG_ALL_SHADERS; /* Syntax: * EQAA=s,z,c * Example: * EQAA=8,4,2 * That means 8 coverage samples, 4 Z/S samples, and 2 color samples. * Constraints: * s >= z >= c (ignoring this only wastes memory) * s = [2..16] * z = [2..8] * c = [2..8] * * Only MSAA color and depth buffers are overriden. */ if (sscreen->info.has_eqaa_surface_allocator) { const char *eqaa = debug_get_option("EQAA", NULL); unsigned s,z,f; if (eqaa && sscanf(eqaa, "%u,%u,%u", &s, &z, &f) == 3 && s && z && f) { sscreen->eqaa_force_coverage_samples = s; sscreen->eqaa_force_z_samples = z; sscreen->eqaa_force_color_samples = f; } } for (i = 0; i < num_comp_hi_threads; i++) si_init_compiler(sscreen, &sscreen->compiler[i]); for (i = 0; i < num_comp_lo_threads; i++) si_init_compiler(sscreen, &sscreen->compiler_lowp[i]); /* Create the auxiliary context. This must be done last. */ sscreen->aux_context = si_create_context( &sscreen->b, sscreen->options.aux_debug ? PIPE_CONTEXT_DEBUG : 0); if (sscreen->options.aux_debug) { struct u_log_context *log = CALLOC_STRUCT(u_log_context); u_log_context_init(log); sscreen->aux_context->set_log_context(sscreen->aux_context, log); } if (sscreen->debug_flags & DBG(TEST_DMA)) si_test_dma(sscreen); if (sscreen->debug_flags & DBG(TEST_DMA_PERF)) { si_test_dma_perf(sscreen); } if (sscreen->debug_flags & (DBG(TEST_VMFAULT_CP) | DBG(TEST_VMFAULT_SDMA) | DBG(TEST_VMFAULT_SHADER))) si_test_vmfault(sscreen); if (sscreen->debug_flags & DBG(TEST_GDS)) si_test_gds((struct si_context*)sscreen->aux_context); if (sscreen->debug_flags & DBG(TEST_GDS_MM)) { si_test_gds_memory_management((struct si_context*)sscreen->aux_context, 32 * 1024, 4, RADEON_DOMAIN_GDS); } if (sscreen->debug_flags & DBG(TEST_GDS_OA_MM)) { si_test_gds_memory_management((struct si_context*)sscreen->aux_context, 4, 1, RADEON_DOMAIN_OA); } return &sscreen->b; }
struct pipe_screen * nvfx_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) { static const unsigned query_sizes[] = {(4096 - 4 * 32) / 32, 3 * 1024 / 32, 2 * 1024 / 32, 1024 / 32}; struct nvfx_screen *screen = CALLOC_STRUCT(nvfx_screen); struct nouveau_channel *chan; struct pipe_screen *pscreen; unsigned eng3d_class = 0; int ret, i; if (!screen) return NULL; pscreen = &screen->base.base; ret = nouveau_screen_init(&screen->base, dev); if (ret) { nvfx_screen_destroy(pscreen); return NULL; } chan = screen->base.channel; screen->cur_ctx = NULL; chan->user_private = screen; chan->flush_notify = nvfx_channel_flush_notify; pscreen->winsys = ws; pscreen->destroy = nvfx_screen_destroy; pscreen->get_param = nvfx_screen_get_param; pscreen->get_shader_param = nvfx_screen_get_shader_param; pscreen->get_paramf = nvfx_screen_get_paramf; pscreen->is_format_supported = nvfx_screen_is_format_supported; pscreen->context_create = nvfx_create; switch (dev->chipset & 0xf0) { case 0x30: if (NV30_3D_CHIPSET_3X_MASK & (1 << (dev->chipset & 0x0f))) eng3d_class = NV30_3D; else if (NV34_3D_CHIPSET_3X_MASK & (1 << (dev->chipset & 0x0f))) eng3d_class = NV34_3D; else if (NV35_3D_CHIPSET_3X_MASK & (1 << (dev->chipset & 0x0f))) eng3d_class = NV35_3D; break; case 0x40: if (NV4X_GRCLASS4097_CHIPSETS & (1 << (dev->chipset & 0x0f))) eng3d_class = NV40_3D; else if (NV4X_GRCLASS4497_CHIPSETS & (1 << (dev->chipset & 0x0f))) eng3d_class = NV44_3D; screen->is_nv4x = ~0; break; case 0x60: if (NV6X_GRCLASS4497_CHIPSETS & (1 << (dev->chipset & 0x0f))) eng3d_class = NV44_3D; screen->is_nv4x = ~0; break; } if (!eng3d_class) { NOUVEAU_ERR("Unknown nv3x/nv4x chipset: nv%02x\n", dev->chipset); return NULL; } screen->advertise_npot = !!screen->is_nv4x; screen->advertise_blend_equation_separate = !!screen->is_nv4x; screen->use_nv4x = screen->is_nv4x; if(screen->is_nv4x) { if(debug_get_bool_option("NVFX_SIMULATE_NV30", FALSE)) screen->use_nv4x = 0; if(!debug_get_bool_option("NVFX_NPOT", TRUE)) screen->advertise_npot = 0; if(!debug_get_bool_option("NVFX_BLEND_EQ_SEP", TRUE)) screen->advertise_blend_equation_separate = 0; } screen->force_swtnl = debug_get_bool_option("NVFX_SWTNL", FALSE); screen->trace_draw = debug_get_bool_option("NVFX_TRACE_DRAW", FALSE); screen->buffer_allocation_cost = debug_get_num_option("NVFX_BUFFER_ALLOCATION_COST", 16384); screen->inline_cost_per_hardware_cost = atof(debug_get_option("NVFX_INLINE_COST_PER_HARDWARE_COST", "1.0")); screen->static_reuse_threshold = atof(debug_get_option("NVFX_STATIC_REUSE_THRESHOLD", "2.0")); /* We don't advertise these by default because filtering and blending doesn't work as * it should, due to several restrictions. * The only exception is fp16 on nv40. */ screen->advertise_fp16 = debug_get_bool_option("NVFX_FP16", !!screen->use_nv4x); screen->advertise_fp32 = debug_get_bool_option("NVFX_FP32", 0); screen->vertex_buffer_reloc_flags = nvfx_screen_get_vertex_buffer_flags(screen); /* surely both nv3x and nv44 support index buffers too: find out how and test that */ if(eng3d_class == NV40_3D) screen->index_buffer_reloc_flags = screen->vertex_buffer_reloc_flags; if(!screen->force_swtnl && screen->vertex_buffer_reloc_flags == screen->index_buffer_reloc_flags) screen->base.vertex_buffer_flags = screen->base.index_buffer_flags = screen->vertex_buffer_reloc_flags; nvfx_screen_init_resource_functions(pscreen); ret = nouveau_grobj_alloc(chan, 0xbeef3097, eng3d_class, &screen->eng3d); if (ret) { NOUVEAU_ERR("Error creating 3D object: %d\n", ret); return FALSE; } /* 2D engine setup */ nvfx_screen_surface_init(pscreen); /* Notifier for sync purposes */ ret = nouveau_notifier_alloc(chan, 0xbeef0301, 1, &screen->sync); if (ret) { NOUVEAU_ERR("Error creating notifier object: %d\n", ret); nvfx_screen_destroy(pscreen); return NULL; } /* Query objects */ for(i = 0; i < sizeof(query_sizes) / sizeof(query_sizes[0]); ++i) { ret = nouveau_notifier_alloc(chan, 0xbeef0302, query_sizes[i], &screen->query); if(!ret) break; } if (ret) { NOUVEAU_ERR("Error initialising query objects: %d\n", ret); nvfx_screen_destroy(pscreen); return NULL; } ret = nouveau_resource_init(&screen->query_heap, 0, query_sizes[i]); if (ret) { NOUVEAU_ERR("Error initialising query object heap: %d\n", ret); nvfx_screen_destroy(pscreen); return NULL; } LIST_INITHEAD(&screen->query_list); /* Vtxprog resources */ if (nouveau_resource_init(&screen->vp_exec_heap, 0, screen->use_nv4x ? 512 : 256) || nouveau_resource_init(&screen->vp_data_heap, 0, screen->use_nv4x ? 468 : 256)) { nvfx_screen_destroy(pscreen); return NULL; } BIND_RING(chan, screen->eng3d, 7); /* Static eng3d initialisation */ /* note that we just started using the channel, so we must have space in the pushbuffer */ OUT_RING(chan, RING_3D(NV30_3D_DMA_NOTIFY, 1)); OUT_RING(chan, screen->sync->handle); OUT_RING(chan, RING_3D(NV30_3D_DMA_TEXTURE0, 2)); OUT_RING(chan, chan->vram->handle); OUT_RING(chan, chan->gart->handle); OUT_RING(chan, RING_3D(NV30_3D_DMA_COLOR1, 1)); OUT_RING(chan, chan->vram->handle); OUT_RING(chan, RING_3D(NV30_3D_DMA_COLOR0, 2)); OUT_RING(chan, chan->vram->handle); OUT_RING(chan, chan->vram->handle); OUT_RING(chan, RING_3D(NV30_3D_DMA_VTXBUF0, 2)); OUT_RING(chan, chan->vram->handle); OUT_RING(chan, chan->gart->handle); OUT_RING(chan, RING_3D(NV30_3D_DMA_FENCE, 2)); OUT_RING(chan, 0); OUT_RING(chan, screen->query->handle); OUT_RING(chan, RING_3D(NV30_3D_DMA_UNK1AC, 2)); OUT_RING(chan, chan->vram->handle); OUT_RING(chan, chan->vram->handle); if(!screen->is_nv4x) nv30_screen_init(screen); else nv40_screen_init(screen); return pscreen; }
struct pipe_screen * nv30_screen_create(struct nouveau_device *dev) { struct nv30_screen *screen = CALLOC_STRUCT(nv30_screen); struct pipe_screen *pscreen; struct nouveau_pushbuf *push; struct nv04_fifo *fifo; unsigned oclass = 0; int ret, i; if (!screen) return NULL; switch (dev->chipset & 0xf0) { case 0x30: if (RANKINE_0397_CHIPSET & (1 << (dev->chipset & 0x0f))) oclass = NV30_3D_CLASS; else if (RANKINE_0697_CHIPSET & (1 << (dev->chipset & 0x0f))) oclass = NV34_3D_CLASS; else if (RANKINE_0497_CHIPSET & (1 << (dev->chipset & 0x0f))) oclass = NV35_3D_CLASS; break; case 0x40: if (CURIE_4097_CHIPSET & (1 << (dev->chipset & 0x0f))) oclass = NV40_3D_CLASS; else if (CURIE_4497_CHIPSET & (1 << (dev->chipset & 0x0f))) oclass = NV44_3D_CLASS; break; case 0x60: if (CURIE_4497_CHIPSET6X & (1 << (dev->chipset & 0x0f))) oclass = NV44_3D_CLASS; break; default: break; } if (!oclass) { NOUVEAU_ERR("unknown 3d class for 0x%02x\n", dev->chipset); FREE(screen); return NULL; } /* * Some modern apps try to use msaa without keeping in mind the * restrictions on videomem of older cards. Resulting in dmesg saying: * [ 1197.850642] nouveau E[soffice.bin[3785]] fail ttm_validate * [ 1197.850648] nouveau E[soffice.bin[3785]] validating bo list * [ 1197.850654] nouveau E[soffice.bin[3785]] validate: -12 * * Because we are running out of video memory, after which the program * using the msaa visual freezes, and eventually the entire system freezes. * * To work around this we do not allow msaa visauls by default and allow * the user to override this via NV30_MAX_MSAA. */ screen->max_sample_count = debug_get_num_option("NV30_MAX_MSAA", 0); if (screen->max_sample_count > 4) screen->max_sample_count = 4; pscreen = &screen->base.base; pscreen->destroy = nv30_screen_destroy; pscreen->get_param = nv30_screen_get_param; pscreen->get_paramf = nv30_screen_get_paramf; pscreen->get_shader_param = nv30_screen_get_shader_param; pscreen->context_create = nv30_context_create; pscreen->is_format_supported = nv30_screen_is_format_supported; nv30_resource_screen_init(pscreen); nouveau_screen_init_vdec(&screen->base); screen->base.fence.emit = nv30_screen_fence_emit; screen->base.fence.update = nv30_screen_fence_update; ret = nouveau_screen_init(&screen->base, dev); if (ret) FAIL_SCREEN_INIT("nv30_screen_init failed: %d\n", ret); screen->base.vidmem_bindings |= PIPE_BIND_VERTEX_BUFFER; screen->base.sysmem_bindings |= PIPE_BIND_VERTEX_BUFFER; if (oclass == NV40_3D_CLASS) { screen->base.vidmem_bindings |= PIPE_BIND_INDEX_BUFFER; screen->base.sysmem_bindings |= PIPE_BIND_INDEX_BUFFER; } fifo = screen->base.channel->data; push = screen->base.pushbuf; push->rsvd_kick = 16; ret = nouveau_object_new(screen->base.channel, 0x00000000, NV01_NULL_CLASS, NULL, 0, &screen->null); if (ret) FAIL_SCREEN_INIT("error allocating null object: %d\n", ret); /* DMA_FENCE refuses to accept DMA objects with "adjust" filled in, * this means that the address pointed at by the DMA object must * be 4KiB aligned, which means this object needs to be the first * one allocated on the channel. */ ret = nouveau_object_new(screen->base.channel, 0xbeef1e00, NOUVEAU_NOTIFIER_CLASS, &(struct nv04_notify) { .length = 32 }, sizeof(struct nv04_notify),
/** * Enter a surface into the presentation queue. */ VdpStatus vlVdpPresentationQueueDisplay(VdpPresentationQueue presentation_queue, VdpOutputSurface surface, uint32_t clip_width, uint32_t clip_height, VdpTime earliest_presentation_time) { static int dump_window = -1; vlVdpPresentationQueue *pq; vlVdpOutputSurface *surf; struct pipe_context *pipe; struct pipe_resource *tex; struct pipe_surface surf_templ, *surf_draw; struct u_rect src_rect, dst_clip, *dirty_area; struct vl_compositor *compositor; struct vl_compositor_state *cstate; pq = vlGetDataHTAB(presentation_queue); if (!pq) return VDP_STATUS_INVALID_HANDLE; surf = vlGetDataHTAB(surface); if (!surf) return VDP_STATUS_INVALID_HANDLE; pipe = pq->device->context; compositor = &pq->device->compositor; cstate = &pq->cstate; pipe_mutex_lock(pq->device->mutex); tex = vl_screen_texture_from_drawable(pq->device->vscreen, pq->drawable); if (!tex) { pipe_mutex_unlock(pq->device->mutex); return VDP_STATUS_INVALID_HANDLE; } dirty_area = vl_screen_get_dirty_area(pq->device->vscreen); memset(&surf_templ, 0, sizeof(surf_templ)); surf_templ.format = tex->format; surf_templ.usage = PIPE_BIND_RENDER_TARGET; surf_draw = pipe->create_surface(pipe, tex, &surf_templ); surf->timestamp = (vlVdpTime)earliest_presentation_time; dst_clip.x0 = 0; dst_clip.y0 = 0; dst_clip.x1 = clip_width ? clip_width : surf_draw->width; dst_clip.y1 = clip_height ? clip_height : surf_draw->height; if (pq->device->delayed_rendering.surface == surface && dst_clip.x1 == surf_draw->width && dst_clip.y1 == surf_draw->height) { // TODO: we correctly support the clipping here, but not the pq background color in the clipped area.... cstate = pq->device->delayed_rendering.cstate; vl_compositor_set_dst_clip(cstate, &dst_clip); vlVdpResolveDelayedRendering(pq->device, surf_draw, dirty_area); } else { vlVdpResolveDelayedRendering(pq->device, NULL, NULL); src_rect.x0 = 0; src_rect.y0 = 0; src_rect.x1 = surf_draw->width; src_rect.y1 = surf_draw->height; vl_compositor_clear_layers(cstate); vl_compositor_set_rgba_layer(cstate, compositor, 0, surf->sampler_view, &src_rect, NULL, NULL); vl_compositor_set_dst_clip(cstate, &dst_clip); vl_compositor_render(cstate, compositor, surf_draw, dirty_area); } vl_screen_set_next_timestamp(pq->device->vscreen, earliest_presentation_time); pipe->screen->flush_frontbuffer ( pipe->screen, tex, 0, 0, vl_screen_get_private(pq->device->vscreen) ); pipe->screen->fence_reference(pipe->screen, &surf->fence, NULL); pipe->flush(pipe, &surf->fence); if (dump_window == -1) { dump_window = debug_get_num_option("VDPAU_DUMP", 0); } if (dump_window) { static unsigned int framenum = 0; char cmd[256]; if (framenum) { sprintf(cmd, "xwd -id %d -silent -out vdpau_frame_%08d.xwd", (int)pq->drawable, framenum); if (system(cmd) != 0) VDPAU_MSG(VDPAU_ERR, "[VDPAU] Dumping surface %d failed.\n", surface); } framenum++; } pipe_resource_reference(&tex, NULL); pipe_surface_reference(&surf_draw, NULL); pipe_mutex_unlock(pq->device->mutex); return VDP_STATUS_OK; }
bool nv50_program_translate(struct nv50_program *prog, uint16_t chipset) { struct nv50_ir_prog_info *info; int ret; const uint8_t map_undef = (prog->type == PIPE_SHADER_VERTEX) ? 0x40 : 0x80; info = CALLOC_STRUCT(nv50_ir_prog_info); if (!info) return false; info->type = prog->type; info->target = chipset; info->bin.sourceRep = NV50_PROGRAM_IR_TGSI; info->bin.source = (void *)prog->pipe.tokens; info->io.ucpCBSlot = 15; info->io.ucpBase = NV50_CB_AUX_UCP_OFFSET; info->io.genUserClip = prog->vp.clpd_nr; info->io.sampleInterp = prog->fp.sample_interp; info->io.resInfoCBSlot = 15; info->io.suInfoBase = NV50_CB_AUX_TEX_MS_OFFSET; info->io.sampleInfoBase = NV50_CB_AUX_SAMPLE_OFFSET; info->io.msInfoCBSlot = 15; info->io.msInfoBase = NV50_CB_AUX_MS_OFFSET; info->assignSlots = nv50_program_assign_varying_slots; prog->vp.bfc[0] = 0xff; prog->vp.bfc[1] = 0xff; prog->vp.edgeflag = 0xff; prog->vp.clpd[0] = map_undef; prog->vp.clpd[1] = map_undef; prog->vp.psiz = map_undef; prog->gp.has_layer = 0; prog->gp.has_viewport = 0; info->driverPriv = prog; #ifdef DEBUG info->optLevel = debug_get_num_option("NV50_PROG_OPTIMIZE", 3); info->dbgFlags = debug_get_num_option("NV50_PROG_DEBUG", 0); #else info->optLevel = 3; #endif ret = nv50_ir_generate_code(info); if (ret) { NOUVEAU_ERR("shader translation failed: %i\n", ret); goto out; } FREE(info->bin.syms); prog->code = info->bin.code; prog->code_size = info->bin.codeSize; prog->fixups = info->bin.relocData; prog->max_gpr = MAX2(4, (info->bin.maxGPR >> 1) + 1); prog->tls_space = info->bin.tlsSpace; if (prog->type == PIPE_SHADER_FRAGMENT) { if (info->prop.fp.writesDepth) { prog->fp.flags[0] |= NV50_3D_FP_CONTROL_EXPORTS_Z; prog->fp.flags[1] = 0x11; } if (info->prop.fp.usesDiscard) prog->fp.flags[0] |= NV50_3D_FP_CONTROL_USES_KIL; } else if (prog->type == PIPE_SHADER_GEOMETRY) { switch (info->prop.gp.outputPrim) { case PIPE_PRIM_LINE_STRIP: prog->gp.prim_type = NV50_3D_GP_OUTPUT_PRIMITIVE_TYPE_LINE_STRIP; break; case PIPE_PRIM_TRIANGLE_STRIP: prog->gp.prim_type = NV50_3D_GP_OUTPUT_PRIMITIVE_TYPE_TRIANGLE_STRIP; break; case PIPE_PRIM_POINTS: default: assert(info->prop.gp.outputPrim == PIPE_PRIM_POINTS); prog->gp.prim_type = NV50_3D_GP_OUTPUT_PRIMITIVE_TYPE_POINTS; break; } prog->gp.vert_count = info->prop.gp.maxVertices; } if (prog->pipe.stream_output.num_outputs) prog->so = nv50_program_create_strmout_state(info, &prog->pipe.stream_output); out: FREE(info); return !ret; }
bool nv50_program_translate(struct nv50_program *prog, uint16_t chipset, struct pipe_debug_callback *debug) { struct nv50_ir_prog_info *info; int i, ret; const uint8_t map_undef = (prog->type == PIPE_SHADER_VERTEX) ? 0x40 : 0x80; info = CALLOC_STRUCT(nv50_ir_prog_info); if (!info) return false; info->type = prog->type; info->target = chipset; info->bin.sourceRep = prog->pipe.type; switch (prog->pipe.type) { case PIPE_SHADER_IR_TGSI: info->bin.source = (void *)prog->pipe.tokens; break; case PIPE_SHADER_IR_NIR: info->bin.source = (void *)nir_shader_clone(NULL, prog->pipe.ir.nir); break; default: assert(!"unsupported IR!"); return false; } info->bin.smemSize = prog->cp.smem_size; info->io.auxCBSlot = 15; info->io.ucpBase = NV50_CB_AUX_UCP_OFFSET; info->io.genUserClip = prog->vp.clpd_nr; if (prog->fp.alphatest) info->io.alphaRefBase = NV50_CB_AUX_ALPHATEST_OFFSET; info->io.suInfoBase = NV50_CB_AUX_TEX_MS_OFFSET; info->io.sampleInfoBase = NV50_CB_AUX_SAMPLE_OFFSET; info->io.msInfoCBSlot = 15; info->io.msInfoBase = NV50_CB_AUX_MS_OFFSET; info->assignSlots = nv50_program_assign_varying_slots; prog->vp.bfc[0] = 0xff; prog->vp.bfc[1] = 0xff; prog->vp.edgeflag = 0xff; prog->vp.clpd[0] = map_undef; prog->vp.clpd[1] = map_undef; prog->vp.psiz = map_undef; prog->gp.has_layer = 0; prog->gp.has_viewport = 0; if (prog->type == PIPE_SHADER_COMPUTE) info->prop.cp.inputOffset = 0x10; info->driverPriv = prog; #ifdef DEBUG info->optLevel = debug_get_num_option("NV50_PROG_OPTIMIZE", 3); info->dbgFlags = debug_get_num_option("NV50_PROG_DEBUG", 0); info->omitLineNum = debug_get_num_option("NV50_PROG_DEBUG_OMIT_LINENUM", 0); #else info->optLevel = 3; #endif ret = nv50_ir_generate_code(info); if (ret) { NOUVEAU_ERR("shader translation failed: %i\n", ret); goto out; } prog->code = info->bin.code; prog->code_size = info->bin.codeSize; prog->fixups = info->bin.relocData; prog->interps = info->bin.fixupData; prog->max_gpr = MAX2(4, (info->bin.maxGPR >> 1) + 1); prog->tls_space = info->bin.tlsSpace; prog->cp.smem_size = info->bin.smemSize; prog->mul_zero_wins = info->io.mul_zero_wins; prog->vp.need_vertex_id = info->io.vertexId < PIPE_MAX_SHADER_INPUTS; prog->vp.clip_enable = (1 << info->io.clipDistances) - 1; prog->vp.cull_enable = ((1 << info->io.cullDistances) - 1) << info->io.clipDistances; prog->vp.clip_mode = 0; for (i = 0; i < info->io.cullDistances; ++i) prog->vp.clip_mode |= 1 << ((info->io.clipDistances + i) * 4); if (prog->type == PIPE_SHADER_FRAGMENT) { if (info->prop.fp.writesDepth) { prog->fp.flags[0] |= NV50_3D_FP_CONTROL_EXPORTS_Z; prog->fp.flags[1] = 0x11; } if (info->prop.fp.usesDiscard) prog->fp.flags[0] |= NV50_3D_FP_CONTROL_USES_KIL; } else if (prog->type == PIPE_SHADER_GEOMETRY) { switch (info->prop.gp.outputPrim) { case PIPE_PRIM_LINE_STRIP: prog->gp.prim_type = NV50_3D_GP_OUTPUT_PRIMITIVE_TYPE_LINE_STRIP; break; case PIPE_PRIM_TRIANGLE_STRIP: prog->gp.prim_type = NV50_3D_GP_OUTPUT_PRIMITIVE_TYPE_TRIANGLE_STRIP; break; case PIPE_PRIM_POINTS: default: assert(info->prop.gp.outputPrim == PIPE_PRIM_POINTS); prog->gp.prim_type = NV50_3D_GP_OUTPUT_PRIMITIVE_TYPE_POINTS; break; } prog->gp.vert_count = CLAMP(info->prop.gp.maxVertices, 1, 1024); } if (prog->type == PIPE_SHADER_COMPUTE) { prog->cp.syms = info->bin.syms; prog->cp.num_syms = info->bin.numSyms; } else { FREE(info->bin.syms); } if (prog->pipe.stream_output.num_outputs) prog->so = nv50_program_create_strmout_state(info, &prog->pipe.stream_output); pipe_debug_message(debug, SHADER_INFO, "type: %d, local: %d, shared: %d, gpr: %d, inst: %d, bytes: %d", prog->type, info->bin.tlsSpace, info->bin.smemSize, prog->max_gpr, info->bin.instructions, info->bin.codeSize); out: if (info->bin.sourceRep == PIPE_SHADER_IR_NIR) ralloc_free((void *)info->bin.source); FREE(info); return !ret; }
bool r600_common_screen_init(struct r600_common_screen *rscreen, struct radeon_winsys *ws) { char llvm_string[32] = {}, kernel_version[128] = {}; struct utsname uname_data; ws->query_info(ws, &rscreen->info); if (uname(&uname_data) == 0) snprintf(kernel_version, sizeof(kernel_version), " / %s", uname_data.release); #if HAVE_LLVM snprintf(llvm_string, sizeof(llvm_string), ", LLVM %i.%i.%i", (HAVE_LLVM >> 8) & 0xff, HAVE_LLVM & 0xff, MESA_LLVM_VERSION_PATCH); #endif snprintf(rscreen->renderer_string, sizeof(rscreen->renderer_string), "%s (DRM %i.%i.%i%s%s)", r600_get_chip_name(rscreen), rscreen->info.drm_major, rscreen->info.drm_minor, rscreen->info.drm_patchlevel, kernel_version, llvm_string); rscreen->b.get_name = r600_get_name; rscreen->b.get_vendor = r600_get_vendor; rscreen->b.get_device_vendor = r600_get_device_vendor; rscreen->b.get_compute_param = r600_get_compute_param; rscreen->b.get_paramf = r600_get_paramf; rscreen->b.get_timestamp = r600_get_timestamp; rscreen->b.fence_finish = r600_fence_finish; rscreen->b.fence_reference = r600_fence_reference; rscreen->b.resource_destroy = u_resource_destroy_vtbl; rscreen->b.resource_from_user_memory = r600_buffer_from_user_memory; rscreen->b.query_memory_info = r600_query_memory_info; if (rscreen->info.has_uvd) { rscreen->b.get_video_param = rvid_get_video_param; rscreen->b.is_video_format_supported = rvid_is_format_supported; } else { rscreen->b.get_video_param = r600_get_video_param; rscreen->b.is_video_format_supported = vl_video_buffer_is_format_supported; } r600_init_screen_texture_functions(rscreen); r600_init_screen_query_functions(rscreen); rscreen->ws = ws; rscreen->family = rscreen->info.family; rscreen->chip_class = rscreen->info.chip_class; rscreen->debug_flags = debug_get_flags_option("R600_DEBUG", common_debug_options, 0); rscreen->force_aniso = MIN2(16, debug_get_num_option("R600_TEX_ANISO", -1)); if (rscreen->force_aniso >= 0) { printf("radeon: Forcing anisotropy filter to %ix\n", /* round down to a power of two */ 1 << util_logbase2(rscreen->force_aniso)); } util_format_s3tc_init(); pipe_mutex_init(rscreen->aux_context_lock); pipe_mutex_init(rscreen->gpu_load_mutex); if (rscreen->debug_flags & DBG_INFO) { printf("pci_id = 0x%x\n", rscreen->info.pci_id); printf("family = %i (%s)\n", rscreen->info.family, r600_get_chip_name(rscreen)); printf("chip_class = %i\n", rscreen->info.chip_class); printf("gart_size = %i MB\n", (int)DIV_ROUND_UP(rscreen->info.gart_size, 1024*1024)); printf("vram_size = %i MB\n", (int)DIV_ROUND_UP(rscreen->info.vram_size, 1024*1024)); printf("max_alloc_size = %i MB\n", (int)DIV_ROUND_UP(rscreen->info.max_alloc_size, 1024*1024)); printf("has_virtual_memory = %i\n", rscreen->info.has_virtual_memory); printf("gfx_ib_pad_with_type2 = %i\n", rscreen->info.gfx_ib_pad_with_type2); printf("has_sdma = %i\n", rscreen->info.has_sdma); printf("has_uvd = %i\n", rscreen->info.has_uvd); printf("vce_fw_version = %i\n", rscreen->info.vce_fw_version); printf("vce_harvest_config = %i\n", rscreen->info.vce_harvest_config); printf("clock_crystal_freq = %i\n", rscreen->info.clock_crystal_freq); printf("drm = %i.%i.%i\n", rscreen->info.drm_major, rscreen->info.drm_minor, rscreen->info.drm_patchlevel); printf("has_userptr = %i\n", rscreen->info.has_userptr); printf("r600_max_quad_pipes = %i\n", rscreen->info.r600_max_quad_pipes); printf("max_shader_clock = %i\n", rscreen->info.max_shader_clock); printf("num_good_compute_units = %i\n", rscreen->info.num_good_compute_units); printf("max_se = %i\n", rscreen->info.max_se); printf("max_sh_per_se = %i\n", rscreen->info.max_sh_per_se); printf("r600_gb_backend_map = %i\n", rscreen->info.r600_gb_backend_map); printf("r600_gb_backend_map_valid = %i\n", rscreen->info.r600_gb_backend_map_valid); printf("r600_num_banks = %i\n", rscreen->info.r600_num_banks); printf("num_render_backends = %i\n", rscreen->info.num_render_backends); printf("num_tile_pipes = %i\n", rscreen->info.num_tile_pipes); printf("pipe_interleave_bytes = %i\n", rscreen->info.pipe_interleave_bytes); } return true; }
boolean nv50_program_translate(struct nv50_program *prog, uint16_t chipset) { struct nv50_ir_prog_info *info; int ret; const uint8_t map_undef = (prog->type == PIPE_SHADER_VERTEX) ? 0x40 : 0x80; info = CALLOC_STRUCT(nv50_ir_prog_info); if (!info) return FALSE; info->type = prog->type; info->target = chipset; info->bin.sourceRep = NV50_PROGRAM_IR_TGSI; info->bin.source = (void *)prog->pipe.tokens; info->io.ucpCBSlot = 15; info->io.ucpBase = 0; info->io.genUserClip = prog->vp.clpd_nr; info->assignSlots = nv50_program_assign_varying_slots; prog->vp.bfc[0] = 0xff; prog->vp.bfc[1] = 0xff; prog->vp.edgeflag = 0xff; prog->vp.clpd[0] = map_undef; prog->vp.clpd[1] = map_undef; prog->vp.psiz = map_undef; prog->gp.primid = 0x80; info->driverPriv = prog; #ifdef DEBUG info->optLevel = debug_get_num_option("NV50_PROG_OPTIMIZE", 3); info->dbgFlags = debug_get_num_option("NV50_PROG_DEBUG", 0); #else info->optLevel = 3; #endif ret = nv50_ir_generate_code(info); if (ret) { NOUVEAU_ERR("shader translation failed: %i\n", ret); goto out; } FREE(info->bin.syms); prog->code = info->bin.code; prog->code_size = info->bin.codeSize; prog->fixups = info->bin.relocData; prog->max_gpr = MAX2(4, (info->bin.maxGPR >> 1) + 1); prog->tls_space = info->bin.tlsSpace; if (prog->type == PIPE_SHADER_FRAGMENT) { if (info->prop.fp.writesDepth) { prog->fp.flags[0] |= NV50_3D_FP_CONTROL_EXPORTS_Z; prog->fp.flags[1] = 0x11; } if (info->prop.fp.usesDiscard) prog->fp.flags[0] |= NV50_3D_FP_CONTROL_USES_KIL; } if (prog->pipe.stream_output.num_outputs) prog->so = nv50_program_create_strmout_state(info, &prog->pipe.stream_output); out: FREE(info); return !ret; }