/** * Create a new pipe_screen object * Note: we're not presently subclassing pipe_screen (no llvmpipe_screen). */ struct pipe_screen * llvmpipe_create_screen(struct sw_winsys *winsys) { struct llvmpipe_screen *screen; util_cpu_detect(); #ifdef DEBUG LP_DEBUG = debug_get_flags_option("LP_DEBUG", lp_debug_flags, 0 ); #endif LP_PERF = debug_get_flags_option("LP_PERF", lp_perf_flags, 0 ); screen = CALLOC_STRUCT(llvmpipe_screen); if (!screen) return NULL; if (!lp_jit_screen_init(screen)) { FREE(screen); return NULL; } screen->winsys = winsys; screen->base.destroy = llvmpipe_destroy_screen; screen->base.get_name = llvmpipe_get_name; screen->base.get_vendor = llvmpipe_get_vendor; screen->base.get_device_vendor = llvmpipe_get_vendor; // TODO should be the CPU vendor screen->base.get_param = llvmpipe_get_param; screen->base.get_shader_param = llvmpipe_get_shader_param; screen->base.get_paramf = llvmpipe_get_paramf; screen->base.is_format_supported = llvmpipe_is_format_supported; screen->base.context_create = llvmpipe_create_context; screen->base.flush_frontbuffer = llvmpipe_flush_frontbuffer; screen->base.fence_reference = llvmpipe_fence_reference; screen->base.fence_finish = llvmpipe_fence_finish; screen->base.get_timestamp = llvmpipe_get_timestamp; llvmpipe_init_screen_resource_funcs(&screen->base); screen->num_threads = util_cpu_caps.nr_cpus > 1 ? util_cpu_caps.nr_cpus : 0; #ifdef PIPE_SUBSYSTEM_EMBEDDED screen->num_threads = 0; #endif screen->num_threads = debug_get_num_option("LP_NUM_THREADS", screen->num_threads); screen->num_threads = MIN2(screen->num_threads, LP_MAX_THREADS); screen->rast = lp_rast_create(screen->num_threads); if (!screen->rast) { lp_jit_screen_cleanup(screen); FREE(screen); return NULL; } (void) mtx_init(&screen->rast_mutex, mtx_plain); return &screen->base; }
/** * Create a new pipe_screen object * Note: we're not presently subclassing pipe_screen (no llvmpipe_screen). */ struct pipe_screen * llvmpipe_create_screen(struct llvmpipe_winsys *winsys) { struct llvmpipe_screen *screen = CALLOC_STRUCT(llvmpipe_screen); #ifdef DEBUG LP_DEBUG = debug_get_flags_option("LP_DEBUG", lp_debug_flags, 0 ); #endif if (!screen) return NULL; screen->winsys = winsys; screen->base.destroy = llvmpipe_destroy_screen; screen->base.get_name = llvmpipe_get_name; screen->base.get_vendor = llvmpipe_get_vendor; screen->base.get_param = llvmpipe_get_param; screen->base.get_paramf = llvmpipe_get_paramf; screen->base.is_format_supported = llvmpipe_is_format_supported; screen->base.surface_buffer_create = llvmpipe_surface_buffer_create; screen->base.flush_frontbuffer = llvmpipe_flush_frontbuffer; llvmpipe_init_screen_texture_funcs(&screen->base); llvmpipe_init_screen_buffer_funcs(&screen->base); lp_jit_screen_init(screen); return &screen->base; }
void _nine_debug_printf( unsigned long flag, const char *func, const char *fmt, ... ) { static boolean first = TRUE; static unsigned long dbg_flags = DBG_ERROR | DBG_WARN; unsigned long tid = 0; if (first) { first = FALSE; dbg_flags |= debug_get_flags_option("NINE_DEBUG", nine_debug_flags, 0); } #if defined(HAVE_PTHREAD) # if defined(__GNU_LIBRARY__) && defined(__GLIBC__) && defined(__GLIBC_MINOR__) && \ (__GLIBC__ >= 3 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 12)) if (dbg_flags & DBG_TID) tid = pthread_self(); # endif #endif if (dbg_flags & flag) { const char *f = func ? strrchr(func, '_') : NULL; va_list ap; /* inside a class this will print nine:tid:classinlowercase:func: while * outside a class (rarely used) it will just print nine:tid:func * the reason for lower case is simply to match the filenames, as it * will also strip off the "Nine" */ if (f && strncmp(func, "Nine", 4) == 0) { char klass[96]; /* no class name is this long */ char *ptr = klass; for (func += 4; func != f; ++func) { *ptr++ = tolower(*func); } *ptr = '\0'; if (tid) _debug_printf("nine:0x%08lx:%s:%s: ", tid, klass, ++f); else _debug_printf("nine:%s:%s: ", klass, ++f); } else if (func) { if (tid) _debug_printf("nine:0x%08lx:%s ", tid, func); else _debug_printf("nine:%s ", func); } va_start(ap, fmt); _debug_vprintf(fmt, ap); va_end(ap); } }
bool r600_common_screen_init(struct r600_common_screen *rscreen, struct radeon_winsys *ws) { ws->query_info(ws, &rscreen->info); rscreen->b.get_name = r600_get_name; rscreen->b.get_vendor = r600_get_vendor; rscreen->b.get_compute_param = r600_get_compute_param; rscreen->b.get_paramf = r600_get_paramf; rscreen->b.get_driver_query_info = r600_get_driver_query_info; rscreen->b.get_timestamp = r600_get_timestamp; rscreen->b.fence_finish = r600_fence_finish; rscreen->b.fence_reference = r600_fence_reference; rscreen->b.fence_signalled = r600_fence_signalled; rscreen->b.resource_destroy = u_resource_destroy_vtbl; if (rscreen->info.has_uvd) { rscreen->b.get_video_param = rvid_get_video_param; rscreen->b.is_video_format_supported = rvid_is_format_supported; } else { rscreen->b.get_video_param = r600_get_video_param; rscreen->b.is_video_format_supported = vl_video_buffer_is_format_supported; } r600_init_screen_texture_functions(rscreen); rscreen->ws = ws; rscreen->family = rscreen->info.family; rscreen->chip_class = rscreen->info.chip_class; rscreen->debug_flags = debug_get_flags_option("R600_DEBUG", common_debug_options, 0); if (!r600_init_tiling(rscreen)) { return false; } util_format_s3tc_init(); pipe_mutex_init(rscreen->aux_context_lock); if (rscreen->info.drm_minor >= 28 && (rscreen->debug_flags & DBG_TRACE_CS)) { rscreen->trace_bo = (struct r600_resource*)pipe_buffer_create(&rscreen->b, PIPE_BIND_CUSTOM, PIPE_USAGE_STAGING, 4096); if (rscreen->trace_bo) { rscreen->trace_ptr = rscreen->ws->buffer_map(rscreen->trace_bo->cs_buf, NULL, PIPE_TRANSFER_UNSYNCHRONIZED); } } return true; }
struct pipe_screen * ilo_screen_create(struct intel_winsys *ws) { struct ilo_screen *is; const struct intel_winsys_info *info; ilo_debug = debug_get_flags_option("ILO_DEBUG", ilo_debug_flags, 0); is = CALLOC_STRUCT(ilo_screen); if (!is) return NULL; is->winsys = ws; intel_winsys_enable_reuse(is->winsys); info = intel_winsys_get_info(is->winsys); if (!init_dev(&is->dev, info)) { FREE(is); return NULL; } util_format_s3tc_init(); is->base.destroy = ilo_screen_destroy; is->base.get_name = ilo_get_name; is->base.get_vendor = ilo_get_vendor; is->base.get_param = ilo_get_param; is->base.get_paramf = ilo_get_paramf; is->base.get_shader_param = ilo_get_shader_param; is->base.get_video_param = ilo_get_video_param; is->base.get_compute_param = ilo_get_compute_param; is->base.get_timestamp = ilo_get_timestamp; is->base.flush_frontbuffer = NULL; is->base.fence_reference = ilo_fence_reference; is->base.fence_signalled = ilo_fence_signalled; is->base.fence_finish = ilo_fence_finish; is->base.get_driver_query_info = NULL; ilo_init_format_functions(is); ilo_init_context_functions(is); ilo_init_resource_functions(is); return &is->base; }
bool r600_common_screen_init(struct r600_common_screen *rscreen, struct radeon_winsys *ws) { ws->query_info(ws, &rscreen->info); rscreen->b.fence_finish = r600_fence_finish; rscreen->b.fence_reference = r600_fence_reference; rscreen->b.fence_signalled = r600_fence_signalled; rscreen->ws = ws; rscreen->family = rscreen->info.family; rscreen->chip_class = rscreen->info.chip_class; rscreen->debug_flags = debug_get_flags_option("R600_DEBUG", common_debug_options, 0); if (!r600_init_tiling(rscreen)) { return false; } util_format_s3tc_init(); pipe_mutex_init(rscreen->aux_context_lock); return true; }
/** * Create a new svga_screen object */ struct pipe_screen * svga_screen_create(struct svga_winsys_screen *sws) { struct svga_screen *svgascreen; struct pipe_screen *screen; SVGA3dDevCapResult result; boolean use_vs30, use_ps30; #ifdef DEBUG SVGA_DEBUG = debug_get_flags_option("SVGA_DEBUG", svga_debug_flags, 0 ); #endif svgascreen = CALLOC_STRUCT(svga_screen); if (!svgascreen) goto error1; svgascreen->debug.force_level_surface_view = debug_get_bool_option("SVGA_FORCE_LEVEL_SURFACE_VIEW", FALSE); svgascreen->debug.force_surface_view = debug_get_bool_option("SVGA_FORCE_SURFACE_VIEW", FALSE); svgascreen->debug.force_sampler_view = debug_get_bool_option("SVGA_FORCE_SAMPLER_VIEW", FALSE); svgascreen->debug.no_surface_view = debug_get_bool_option("SVGA_NO_SURFACE_VIEW", FALSE); svgascreen->debug.no_sampler_view = debug_get_bool_option("SVGA_NO_SAMPLER_VIEW", FALSE); screen = &svgascreen->screen; screen->destroy = svga_destroy_screen; screen->get_name = svga_get_name; screen->get_vendor = svga_get_vendor; screen->get_param = svga_get_param; screen->get_shader_param = svga_get_shader_param; screen->get_paramf = svga_get_paramf; screen->is_format_supported = svga_is_format_supported; screen->context_create = svga_context_create; screen->fence_reference = svga_fence_reference; screen->fence_signalled = svga_fence_signalled; screen->fence_finish = svga_fence_finish; screen->get_driver_query_info = svga_get_driver_query_info; svgascreen->sws = sws; svga_init_screen_resource_functions(svgascreen); if (sws->get_hw_version) { svgascreen->hw_version = sws->get_hw_version(sws); } else { svgascreen->hw_version = SVGA3D_HWVERSION_WS65_B1; } use_ps30 = sws->get_cap(sws, SVGA3D_DEVCAP_FRAGMENT_SHADER_VERSION, &result) && result.u >= SVGA3DPSVERSION_30 ? TRUE : FALSE; use_vs30 = sws->get_cap(sws, SVGA3D_DEVCAP_VERTEX_SHADER_VERSION, &result) && result.u >= SVGA3DVSVERSION_30 ? TRUE : FALSE; /* we require Shader model 3.0 or later */ if (!use_ps30 || !use_vs30) goto error2; /* * The D16, D24X8, and D24S8 formats always do an implicit shadow compare * when sampled from, where as the DF16, DF24, and D24S8_INT do not. So * we prefer the later when available. * * This mimics hardware vendors extensions for D3D depth sampling. See also * http://aras-p.info/texts/D3D9GPUHacks.html */ { boolean has_df16, has_df24, has_d24s8_int; SVGA3dSurfaceFormatCaps caps; SVGA3dSurfaceFormatCaps mask; mask.value = 0; mask.zStencil = 1; mask.texture = 1; svgascreen->depth.z16 = SVGA3D_Z_D16; svgascreen->depth.x8z24 = SVGA3D_Z_D24X8; svgascreen->depth.s8z24 = SVGA3D_Z_D24S8; svga_get_format_cap(svgascreen, SVGA3D_Z_DF16, &caps); has_df16 = (caps.value & mask.value) == mask.value; svga_get_format_cap(svgascreen, SVGA3D_Z_DF24, &caps); has_df24 = (caps.value & mask.value) == mask.value; svga_get_format_cap(svgascreen, SVGA3D_Z_D24S8_INT, &caps); has_d24s8_int = (caps.value & mask.value) == mask.value; /* XXX: We might want some other logic here. * Like if we only have d24s8_int we should * emulate the other formats with that. */ if (has_df16) { svgascreen->depth.z16 = SVGA3D_Z_DF16; } if (has_df24) { svgascreen->depth.x8z24 = SVGA3D_Z_DF24; } if (has_d24s8_int) { svgascreen->depth.s8z24 = SVGA3D_Z_D24S8_INT; } } /* Query device caps */ if (!sws->get_cap(sws, SVGA3D_DEVCAP_LINE_STIPPLE, &result)) svgascreen->haveLineStipple = FALSE; else svgascreen->haveLineStipple = result.u; if (!sws->get_cap(sws, SVGA3D_DEVCAP_LINE_AA, &result)) svgascreen->haveLineSmooth = FALSE; else svgascreen->haveLineSmooth = result.u; if (!sws->get_cap(sws, SVGA3D_DEVCAP_MAX_LINE_WIDTH, &result)) svgascreen->maxLineWidth = 1.0F; else svgascreen->maxLineWidth = result.f; if (!sws->get_cap(sws, SVGA3D_DEVCAP_MAX_AA_LINE_WIDTH, &result)) svgascreen->maxLineWidthAA = 1.0F; else svgascreen->maxLineWidthAA = result.f; if (0) debug_printf("svga: haveLineStip %u " "haveLineSmooth %u maxLineWidth %f\n", svgascreen->haveLineStipple, svgascreen->haveLineSmooth, svgascreen->maxLineWidth); if (!sws->get_cap(sws, SVGA3D_DEVCAP_MAX_POINT_SIZE, &result)) { svgascreen->maxPointSize = 1.0F; } else { /* Keep this to a reasonable size to avoid failures in * conform/pntaa.c: */ svgascreen->maxPointSize = MIN2(result.f, 80.0f); } /* The SVGA3D device always supports 4 targets at this time, regardless * of what querying SVGA3D_DEVCAP_MAX_RENDER_TARGETS might return. */ svgascreen->max_color_buffers = 4; pipe_mutex_init(svgascreen->tex_mutex); pipe_mutex_init(svgascreen->swc_mutex); svga_screen_cache_init(svgascreen); return screen; error2: FREE(svgascreen); error1: return NULL; }
bool r600_common_screen_init(struct r600_common_screen *rscreen, struct radeon_winsys *ws) { char llvm_string[32] = {}; ws->query_info(ws, &rscreen->info); #if HAVE_LLVM snprintf(llvm_string, sizeof(llvm_string), ", LLVM %i.%i.%i", (HAVE_LLVM >> 8) & 0xff, HAVE_LLVM & 0xff, MESA_LLVM_VERSION_PATCH); #endif snprintf(rscreen->renderer_string, sizeof(rscreen->renderer_string), "%s (DRM %i.%i.%i%s)", r600_get_chip_name(rscreen), rscreen->info.drm_major, rscreen->info.drm_minor, rscreen->info.drm_patchlevel, llvm_string); rscreen->b.get_name = r600_get_name; rscreen->b.get_vendor = r600_get_vendor; rscreen->b.get_device_vendor = r600_get_device_vendor; rscreen->b.get_compute_param = r600_get_compute_param; rscreen->b.get_paramf = r600_get_paramf; rscreen->b.get_driver_query_info = r600_get_driver_query_info; rscreen->b.get_timestamp = r600_get_timestamp; rscreen->b.fence_finish = r600_fence_finish; rscreen->b.fence_reference = r600_fence_reference; rscreen->b.resource_destroy = u_resource_destroy_vtbl; rscreen->b.resource_from_user_memory = r600_buffer_from_user_memory; if (rscreen->info.has_uvd) { rscreen->b.get_video_param = rvid_get_video_param; rscreen->b.is_video_format_supported = rvid_is_format_supported; } else { rscreen->b.get_video_param = r600_get_video_param; rscreen->b.is_video_format_supported = vl_video_buffer_is_format_supported; } r600_init_screen_texture_functions(rscreen); rscreen->ws = ws; rscreen->family = rscreen->info.family; rscreen->chip_class = rscreen->info.chip_class; rscreen->debug_flags = debug_get_flags_option("R600_DEBUG", common_debug_options, 0); if (!r600_init_tiling(rscreen)) { return false; } util_format_s3tc_init(); pipe_mutex_init(rscreen->aux_context_lock); pipe_mutex_init(rscreen->gpu_load_mutex); if (((rscreen->info.drm_major == 2 && rscreen->info.drm_minor >= 28) || rscreen->info.drm_major == 3) && (rscreen->debug_flags & DBG_TRACE_CS)) { rscreen->trace_bo = (struct r600_resource*)pipe_buffer_create(&rscreen->b, PIPE_BIND_CUSTOM, PIPE_USAGE_STAGING, 4096); if (rscreen->trace_bo) { rscreen->trace_ptr = rscreen->ws->buffer_map(rscreen->trace_bo->cs_buf, NULL, PIPE_TRANSFER_UNSYNCHRONIZED); } } if (rscreen->debug_flags & DBG_INFO) { printf("pci_id = 0x%x\n", rscreen->info.pci_id); printf("family = %i\n", rscreen->info.family); printf("chip_class = %i\n", rscreen->info.chip_class); printf("gart_size = %i MB\n", (int)(rscreen->info.gart_size >> 20)); printf("vram_size = %i MB\n", (int)(rscreen->info.vram_size >> 20)); printf("max_sclk = %i\n", rscreen->info.max_sclk); printf("max_compute_units = %i\n", rscreen->info.max_compute_units); printf("max_se = %i\n", rscreen->info.max_se); printf("max_sh_per_se = %i\n", rscreen->info.max_sh_per_se); printf("drm = %i.%i.%i\n", rscreen->info.drm_major, rscreen->info.drm_minor, rscreen->info.drm_patchlevel); printf("has_uvd = %i\n", rscreen->info.has_uvd); printf("vce_fw_version = %i\n", rscreen->info.vce_fw_version); printf("r600_num_backends = %i\n", rscreen->info.r600_num_backends); printf("r600_clock_crystal_freq = %i\n", rscreen->info.r600_clock_crystal_freq); printf("r600_tiling_config = 0x%x\n", rscreen->info.r600_tiling_config); printf("r600_num_tile_pipes = %i\n", rscreen->info.r600_num_tile_pipes); printf("r600_max_pipes = %i\n", rscreen->info.r600_max_pipes); printf("r600_virtual_address = %i\n", rscreen->info.r600_virtual_address); printf("r600_has_dma = %i\n", rscreen->info.r600_has_dma); printf("r600_backend_map = %i\n", rscreen->info.r600_backend_map); printf("r600_backend_map_valid = %i\n", rscreen->info.r600_backend_map_valid); printf("si_tile_mode_array_valid = %i\n", rscreen->info.si_tile_mode_array_valid); printf("cik_macrotile_mode_array_valid = %i\n", rscreen->info.cik_macrotile_mode_array_valid); }
/** * Create a new pipe_screen object * Note: we're not presently subclassing pipe_screen (no llvmpipe_screen). */ struct pipe_screen * llvmpipe_create_screen(struct sw_winsys *winsys) { struct llvmpipe_screen *screen; util_cpu_detect(); #if defined(PIPE_ARCH_X86) && HAVE_LLVM < 0x0302 /* require SSE2 due to LLVM PR6960. */ if (!util_cpu_caps.has_sse2) return NULL; #endif #ifdef DEBUG LP_DEBUG = debug_get_flags_option("LP_DEBUG", lp_debug_flags, 0 ); #endif LP_PERF = debug_get_flags_option("LP_PERF", lp_perf_flags, 0 ); screen = CALLOC_STRUCT(llvmpipe_screen); if (!screen) return NULL; screen->winsys = winsys; screen->base.destroy = llvmpipe_destroy_screen; screen->base.get_name = llvmpipe_get_name; screen->base.get_vendor = llvmpipe_get_vendor; screen->base.get_param = llvmpipe_get_param; screen->base.get_shader_param = llvmpipe_get_shader_param; screen->base.get_paramf = llvmpipe_get_paramf; screen->base.is_format_supported = llvmpipe_is_format_supported; screen->base.context_create = llvmpipe_create_context; screen->base.flush_frontbuffer = llvmpipe_flush_frontbuffer; screen->base.fence_reference = llvmpipe_fence_reference; screen->base.fence_signalled = llvmpipe_fence_signalled; screen->base.fence_finish = llvmpipe_fence_finish; screen->base.get_timestamp = llvmpipe_get_timestamp; llvmpipe_init_screen_resource_funcs(&screen->base); lp_jit_screen_init(screen); screen->num_threads = util_cpu_caps.nr_cpus > 1 ? util_cpu_caps.nr_cpus : 0; #ifdef PIPE_SUBSYSTEM_EMBEDDED screen->num_threads = 0; #endif screen->num_threads = debug_get_num_option("LP_NUM_THREADS", screen->num_threads); screen->num_threads = MIN2(screen->num_threads, LP_MAX_THREADS); screen->rast = lp_rast_create(screen->num_threads); if (!screen->rast) { lp_jit_screen_cleanup(screen); FREE(screen); return NULL; } pipe_mutex_init(screen->rast_mutex); util_format_s3tc_init(); return &screen->base; }
struct pipe_context * cell_create_context(struct pipe_screen *screen, void *priv ) { struct cell_context *cell; uint i; /* some fields need to be 16-byte aligned, so align the whole object */ cell = (struct cell_context*) align_malloc(sizeof(struct cell_context), 16); if (!cell) return NULL; memset(cell, 0, sizeof(*cell)); cell->winsys = NULL; /* XXX: fixme - get this from screen? */ cell->pipe.winsys = NULL; cell->pipe.screen = screen; cell->pipe.priv = priv; cell->pipe.destroy = cell_destroy_context; cell->pipe.clear = cell_clear; cell->pipe.flush = cell_flush; #if 0 cell->pipe.begin_query = cell_begin_query; cell->pipe.end_query = cell_end_query; cell->pipe.wait_query = cell_wait_query; #endif cell_init_draw_functions(cell); cell_init_state_functions(cell); cell_init_shader_functions(cell); cell_init_surface_functions(cell); cell_init_vertex_functions(cell); cell_init_texture_transfer_funcs(cell); cell->draw = cell_draw_create(cell); /* Create cache of fragment ops generated code */ cell->fragment_ops_cache = util_new_keymap(sizeof(struct cell_fragment_ops_key), ~0, NULL); cell_init_vbuf(cell); draw_set_rasterize_stage(cell->draw, cell->vbuf); /* convert all points/lines to tris for the time being */ draw_wide_point_threshold(cell->draw, 0.0); draw_wide_line_threshold(cell->draw, 0.0); /* get env vars or read config file to get debug flags */ cell->debug_flags = debug_get_flags_option("CELL_DEBUG", cell_debug_flags, 0 ); for (i = 0; i < CELL_NUM_BUFFERS; i++) cell_fence_init(&cell->fenced_buffers[i].fence); /* * SPU stuff */ /* This call only works with SDK 3.0. Anyone still using 2.1??? */ cell->num_cells = spe_cpu_info_get(SPE_COUNT_PHYSICAL_CPU_NODES, -1); cell->num_spus = spe_cpu_info_get(SPE_COUNT_USABLE_SPES, -1); if (cell->debug_flags) { printf("Cell: found %d Cell(s) with %u SPUs\n", cell->num_cells, cell->num_spus); } if (getenv("CELL_NUM_SPUS")) { cell->num_spus = atoi(getenv("CELL_NUM_SPUS")); assert(cell->num_spus > 0); } cell_start_spus(cell); cell_init_batch_buffers(cell); /* make sure SPU initializations are done before proceeding */ cell_flush_int(cell, CELL_FLUSH_WAIT); return &cell->pipe; }
struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws, const struct pipe_screen_config *config) { struct si_screen *sscreen = CALLOC_STRUCT(si_screen); unsigned hw_threads, num_comp_hi_threads, num_comp_lo_threads, i; if (!sscreen) { return NULL; } sscreen->ws = ws; ws->query_info(ws, &sscreen->info); if (sscreen->info.chip_class >= GFX9) { sscreen->se_tile_repeat = 32 * sscreen->info.max_se; } else { ac_get_raster_config(&sscreen->info, &sscreen->pa_sc_raster_config, &sscreen->pa_sc_raster_config_1, &sscreen->se_tile_repeat); } sscreen->debug_flags = debug_get_flags_option("R600_DEBUG", debug_options, 0); sscreen->debug_flags |= debug_get_flags_option("AMD_DEBUG", debug_options, 0); /* Set functions first. */ sscreen->b.context_create = si_pipe_create_context; sscreen->b.destroy = si_destroy_screen; sscreen->b.set_max_shader_compiler_threads = si_set_max_shader_compiler_threads; sscreen->b.is_parallel_shader_compilation_finished = si_is_parallel_shader_compilation_finished; si_init_screen_get_functions(sscreen); si_init_screen_buffer_functions(sscreen); si_init_screen_fence_functions(sscreen); si_init_screen_state_functions(sscreen); si_init_screen_texture_functions(sscreen); si_init_screen_query_functions(sscreen); /* Set these flags in debug_flags early, so that the shader cache takes * them into account. */ if (driQueryOptionb(config->options, "glsl_correct_derivatives_after_discard")) sscreen->debug_flags |= DBG(FS_CORRECT_DERIVS_AFTER_KILL); if (driQueryOptionb(config->options, "radeonsi_enable_sisched")) sscreen->debug_flags |= DBG(SI_SCHED); if (sscreen->debug_flags & DBG(INFO)) ac_print_gpu_info(&sscreen->info); slab_create_parent(&sscreen->pool_transfers, sizeof(struct si_transfer), 64); sscreen->force_aniso = MIN2(16, debug_get_num_option("R600_TEX_ANISO", -1)); if (sscreen->force_aniso >= 0) { printf("radeonsi: Forcing anisotropy filter to %ix\n", /* round down to a power of two */ 1 << util_logbase2(sscreen->force_aniso)); } (void) mtx_init(&sscreen->aux_context_lock, mtx_plain); (void) mtx_init(&sscreen->gpu_load_mutex, mtx_plain); si_init_gs_info(sscreen); if (!si_init_shader_cache(sscreen)) { FREE(sscreen); return NULL; } si_disk_cache_create(sscreen); /* Determine the number of shader compiler threads. */ hw_threads = sysconf(_SC_NPROCESSORS_ONLN); if (hw_threads >= 12) { num_comp_hi_threads = hw_threads * 3 / 4; num_comp_lo_threads = hw_threads / 3; } else if (hw_threads >= 6) { num_comp_hi_threads = hw_threads - 2; num_comp_lo_threads = hw_threads / 2; } else if (hw_threads >= 2) { num_comp_hi_threads = hw_threads - 1; num_comp_lo_threads = hw_threads / 2; } else { num_comp_hi_threads = 1; num_comp_lo_threads = 1; } num_comp_hi_threads = MIN2(num_comp_hi_threads, ARRAY_SIZE(sscreen->compiler)); num_comp_lo_threads = MIN2(num_comp_lo_threads, ARRAY_SIZE(sscreen->compiler_lowp)); if (!util_queue_init(&sscreen->shader_compiler_queue, "sh", 64, num_comp_hi_threads, UTIL_QUEUE_INIT_RESIZE_IF_FULL | UTIL_QUEUE_INIT_SET_FULL_THREAD_AFFINITY)) { si_destroy_shader_cache(sscreen); FREE(sscreen); return NULL; } if (!util_queue_init(&sscreen->shader_compiler_queue_low_priority, "shlo", 64, num_comp_lo_threads, UTIL_QUEUE_INIT_RESIZE_IF_FULL | UTIL_QUEUE_INIT_SET_FULL_THREAD_AFFINITY | UTIL_QUEUE_INIT_USE_MINIMUM_PRIORITY)) { si_destroy_shader_cache(sscreen); FREE(sscreen); return NULL; } if (!debug_get_bool_option("RADEON_DISABLE_PERFCOUNTERS", false)) si_init_perfcounters(sscreen); /* Determine tessellation ring info. */ bool double_offchip_buffers = sscreen->info.chip_class >= CIK && sscreen->info.family != CHIP_CARRIZO && sscreen->info.family != CHIP_STONEY; /* This must be one less than the maximum number due to a hw limitation. * Various hardware bugs in SI, CIK, and GFX9 need this. */ unsigned max_offchip_buffers_per_se; /* Only certain chips can use the maximum value. */ if (sscreen->info.family == CHIP_VEGA12 || sscreen->info.family == CHIP_VEGA20) max_offchip_buffers_per_se = double_offchip_buffers ? 128 : 64; else max_offchip_buffers_per_se = double_offchip_buffers ? 127 : 63; unsigned max_offchip_buffers = max_offchip_buffers_per_se * sscreen->info.max_se; unsigned offchip_granularity; /* Hawaii has a bug with offchip buffers > 256 that can be worked * around by setting 4K granularity. */ if (sscreen->info.family == CHIP_HAWAII) { sscreen->tess_offchip_block_dw_size = 4096; offchip_granularity = V_03093C_X_4K_DWORDS; } else { sscreen->tess_offchip_block_dw_size = 8192; offchip_granularity = V_03093C_X_8K_DWORDS; } sscreen->tess_factor_ring_size = 32768 * sscreen->info.max_se; assert(((sscreen->tess_factor_ring_size / 4) & C_030938_SIZE) == 0); sscreen->tess_offchip_ring_size = max_offchip_buffers * sscreen->tess_offchip_block_dw_size * 4; if (sscreen->info.chip_class >= CIK) { if (sscreen->info.chip_class >= VI) --max_offchip_buffers; sscreen->vgt_hs_offchip_param = S_03093C_OFFCHIP_BUFFERING(max_offchip_buffers) | S_03093C_OFFCHIP_GRANULARITY(offchip_granularity); } else { assert(offchip_granularity == V_03093C_X_8K_DWORDS); sscreen->vgt_hs_offchip_param = S_0089B0_OFFCHIP_BUFFERING(max_offchip_buffers); } /* The mere presense of CLEAR_STATE in the IB causes random GPU hangs * on SI. Some CLEAR_STATE cause asic hang on radeon kernel, etc. * SPI_VS_OUT_CONFIG. So only enable CI CLEAR_STATE on amdgpu kernel.*/ sscreen->has_clear_state = sscreen->info.chip_class >= CIK && sscreen->info.drm_major == 3; sscreen->has_distributed_tess = sscreen->info.chip_class >= VI && sscreen->info.max_se >= 2; sscreen->has_draw_indirect_multi = (sscreen->info.family >= CHIP_POLARIS10) || (sscreen->info.chip_class == VI && sscreen->info.pfp_fw_version >= 121 && sscreen->info.me_fw_version >= 87) || (sscreen->info.chip_class == CIK && sscreen->info.pfp_fw_version >= 211 && sscreen->info.me_fw_version >= 173) || (sscreen->info.chip_class == SI && sscreen->info.pfp_fw_version >= 79 && sscreen->info.me_fw_version >= 142); sscreen->has_out_of_order_rast = sscreen->info.chip_class >= VI && sscreen->info.max_se >= 2 && !(sscreen->debug_flags & DBG(NO_OUT_OF_ORDER)); sscreen->assume_no_z_fights = driQueryOptionb(config->options, "radeonsi_assume_no_z_fights"); sscreen->commutative_blend_add = driQueryOptionb(config->options, "radeonsi_commutative_blend_add"); { #define OPT_BOOL(name, dflt, description) \ sscreen->options.name = \ driQueryOptionb(config->options, "radeonsi_"#name); #include "si_debug_options.h" } sscreen->has_gfx9_scissor_bug = sscreen->info.family == CHIP_VEGA10 || sscreen->info.family == CHIP_RAVEN; sscreen->has_msaa_sample_loc_bug = (sscreen->info.family >= CHIP_POLARIS10 && sscreen->info.family <= CHIP_POLARIS12) || sscreen->info.family == CHIP_VEGA10 || sscreen->info.family == CHIP_RAVEN; sscreen->has_ls_vgpr_init_bug = sscreen->info.family == CHIP_VEGA10 || sscreen->info.family == CHIP_RAVEN; sscreen->has_dcc_constant_encode = sscreen->info.family == CHIP_RAVEN2; /* Only enable primitive binning on APUs by default. */ sscreen->dpbb_allowed = sscreen->info.family == CHIP_RAVEN || sscreen->info.family == CHIP_RAVEN2; sscreen->dfsm_allowed = sscreen->info.family == CHIP_RAVEN || sscreen->info.family == CHIP_RAVEN2; /* Process DPBB enable flags. */ if (sscreen->debug_flags & DBG(DPBB)) { sscreen->dpbb_allowed = true; if (sscreen->debug_flags & DBG(DFSM)) sscreen->dfsm_allowed = true; } /* Process DPBB disable flags. */ if (sscreen->debug_flags & DBG(NO_DPBB)) { sscreen->dpbb_allowed = false; sscreen->dfsm_allowed = false; } else if (sscreen->debug_flags & DBG(NO_DFSM)) { sscreen->dfsm_allowed = false; } /* While it would be nice not to have this flag, we are constrained * by the reality that LLVM 5.0 doesn't have working VGPR indexing * on GFX9. */ sscreen->llvm_has_working_vgpr_indexing = sscreen->info.chip_class <= VI; /* Some chips have RB+ registers, but don't support RB+. Those must * always disable it. */ if (sscreen->info.family == CHIP_STONEY || sscreen->info.chip_class >= GFX9) { sscreen->has_rbplus = true; sscreen->rbplus_allowed = !(sscreen->debug_flags & DBG(NO_RB_PLUS)) && (sscreen->info.family == CHIP_STONEY || sscreen->info.family == CHIP_VEGA12 || sscreen->info.family == CHIP_RAVEN || sscreen->info.family == CHIP_RAVEN2); } sscreen->dcc_msaa_allowed = !(sscreen->debug_flags & DBG(NO_DCC_MSAA)); sscreen->cpdma_prefetch_writes_memory = sscreen->info.chip_class <= VI; (void) mtx_init(&sscreen->shader_parts_mutex, mtx_plain); sscreen->use_monolithic_shaders = (sscreen->debug_flags & DBG(MONOLITHIC_SHADERS)) != 0; sscreen->barrier_flags.cp_to_L2 = SI_CONTEXT_INV_SMEM_L1 | SI_CONTEXT_INV_VMEM_L1; if (sscreen->info.chip_class <= VI) { sscreen->barrier_flags.cp_to_L2 |= SI_CONTEXT_INV_GLOBAL_L2; sscreen->barrier_flags.L2_to_cp |= SI_CONTEXT_WRITEBACK_GLOBAL_L2; } if (debug_get_bool_option("RADEON_DUMP_SHADERS", false)) sscreen->debug_flags |= DBG_ALL_SHADERS; /* Syntax: * EQAA=s,z,c * Example: * EQAA=8,4,2 * That means 8 coverage samples, 4 Z/S samples, and 2 color samples. * Constraints: * s >= z >= c (ignoring this only wastes memory) * s = [2..16] * z = [2..8] * c = [2..8] * * Only MSAA color and depth buffers are overriden. */ if (sscreen->info.has_eqaa_surface_allocator) { const char *eqaa = debug_get_option("EQAA", NULL); unsigned s,z,f; if (eqaa && sscanf(eqaa, "%u,%u,%u", &s, &z, &f) == 3 && s && z && f) { sscreen->eqaa_force_coverage_samples = s; sscreen->eqaa_force_z_samples = z; sscreen->eqaa_force_color_samples = f; } } for (i = 0; i < num_comp_hi_threads; i++) si_init_compiler(sscreen, &sscreen->compiler[i]); for (i = 0; i < num_comp_lo_threads; i++) si_init_compiler(sscreen, &sscreen->compiler_lowp[i]); /* Create the auxiliary context. This must be done last. */ sscreen->aux_context = si_create_context( &sscreen->b, sscreen->options.aux_debug ? PIPE_CONTEXT_DEBUG : 0); if (sscreen->options.aux_debug) { struct u_log_context *log = CALLOC_STRUCT(u_log_context); u_log_context_init(log); sscreen->aux_context->set_log_context(sscreen->aux_context, log); } if (sscreen->debug_flags & DBG(TEST_DMA)) si_test_dma(sscreen); if (sscreen->debug_flags & DBG(TEST_DMA_PERF)) { si_test_dma_perf(sscreen); } if (sscreen->debug_flags & (DBG(TEST_VMFAULT_CP) | DBG(TEST_VMFAULT_SDMA) | DBG(TEST_VMFAULT_SHADER))) si_test_vmfault(sscreen); if (sscreen->debug_flags & DBG(TEST_GDS)) si_test_gds((struct si_context*)sscreen->aux_context); if (sscreen->debug_flags & DBG(TEST_GDS_MM)) { si_test_gds_memory_management((struct si_context*)sscreen->aux_context, 32 * 1024, 4, RADEON_DOMAIN_GDS); } if (sscreen->debug_flags & DBG(TEST_GDS_OA_MM)) { si_test_gds_memory_management((struct si_context*)sscreen->aux_context, 4, 1, RADEON_DOMAIN_OA); } return &sscreen->b; }
/** * Create a new brw_screen object */ struct pipe_screen * brw_screen_create(struct brw_winsys_screen *sws) { struct brw_screen *bscreen; struct brw_chipset chipset; #ifdef DEBUG BRW_DEBUG = debug_get_flags_option("BRW_DEBUG", debug_names, 0); BRW_DEBUG |= debug_get_flags_option("INTEL_DEBUG", debug_names, 0); BRW_DEBUG |= DEBUG_STATS | DEBUG_MIN_URB | DEBUG_WM; BRW_DUMP = debug_get_flags_option("BRW_DUMP", dump_names, 0); #endif memset(&chipset, 0, sizeof chipset); chipset.pci_id = sws->pci_id; switch (chipset.pci_id) { case PCI_CHIP_I965_G: case PCI_CHIP_I965_Q: case PCI_CHIP_I965_G_1: case PCI_CHIP_I946_GZ: case PCI_CHIP_I965_GM: case PCI_CHIP_I965_GME: chipset.is_965 = TRUE; break; case PCI_CHIP_GM45_GM: case PCI_CHIP_IGD_E_G: case PCI_CHIP_Q45_G: case PCI_CHIP_G45_G: case PCI_CHIP_G41_G: case PCI_CHIP_B43_G: chipset.is_g4x = TRUE; break; case PCI_CHIP_ILD_G: case PCI_CHIP_ILM_G: chipset.is_igdng = TRUE; break; default: debug_printf("%s: unknown pci id 0x%x, cannot create screen\n", __FUNCTION__, chipset.pci_id); return NULL; } bscreen = CALLOC_STRUCT(brw_screen); if (!bscreen) return NULL; bscreen->chipset = chipset; bscreen->sws = sws; bscreen->base.winsys = NULL; bscreen->base.destroy = brw_destroy_screen; bscreen->base.get_name = brw_get_name; bscreen->base.get_vendor = brw_get_vendor; bscreen->base.get_param = brw_get_param; bscreen->base.get_shader_param = brw_get_shader_param; bscreen->base.get_paramf = brw_get_paramf; bscreen->base.is_format_supported = brw_is_format_supported; bscreen->base.context_create = brw_create_context; bscreen->base.fence_reference = brw_fence_reference; bscreen->base.fence_signalled = brw_fence_signalled; bscreen->base.fence_finish = brw_fence_finish; brw_init_screen_resource_functions(bscreen); brw_screen_tex_surface_init(bscreen); bscreen->no_tiling = debug_get_option("BRW_NO_TILING", FALSE) != NULL; return &bscreen->base; }
struct pipe_context * trace_context_create(struct trace_screen *tr_scr, struct pipe_context *pipe) { struct trace_context *tr_ctx; if(!pipe) goto error1; if(!trace_enabled()) goto error1; tr_ctx = CALLOC_STRUCT(trace_context); if(!tr_ctx) goto error1; tr_ctx->base.winsys = NULL; tr_ctx->base.priv = pipe->priv; /* expose wrapped priv data */ tr_ctx->base.screen = &tr_scr->base; tr_ctx->draw_blocker = debug_get_flags_option("RBUG_BLOCK", rbug_blocker_flags, 0); pipe_mutex_init(tr_ctx->draw_mutex); pipe_condvar_init(tr_ctx->draw_cond); pipe_mutex_init(tr_ctx->list_mutex); make_empty_list(&tr_ctx->shaders); tr_ctx->base.destroy = trace_context_destroy; tr_ctx->base.draw_arrays = trace_context_draw_arrays; tr_ctx->base.draw_elements = trace_context_draw_elements; tr_ctx->base.draw_range_elements = trace_context_draw_range_elements; tr_ctx->base.create_query = trace_context_create_query; tr_ctx->base.destroy_query = trace_context_destroy_query; tr_ctx->base.begin_query = trace_context_begin_query; tr_ctx->base.end_query = trace_context_end_query; tr_ctx->base.get_query_result = trace_context_get_query_result; tr_ctx->base.create_blend_state = trace_context_create_blend_state; tr_ctx->base.bind_blend_state = trace_context_bind_blend_state; tr_ctx->base.delete_blend_state = trace_context_delete_blend_state; tr_ctx->base.create_sampler_state = trace_context_create_sampler_state; tr_ctx->base.bind_fragment_sampler_states = trace_context_bind_fragment_sampler_states; tr_ctx->base.bind_vertex_sampler_states = trace_context_bind_vertex_sampler_states; tr_ctx->base.delete_sampler_state = trace_context_delete_sampler_state; tr_ctx->base.create_rasterizer_state = trace_context_create_rasterizer_state; tr_ctx->base.bind_rasterizer_state = trace_context_bind_rasterizer_state; tr_ctx->base.delete_rasterizer_state = trace_context_delete_rasterizer_state; tr_ctx->base.create_depth_stencil_alpha_state = trace_context_create_depth_stencil_alpha_state; tr_ctx->base.bind_depth_stencil_alpha_state = trace_context_bind_depth_stencil_alpha_state; tr_ctx->base.delete_depth_stencil_alpha_state = trace_context_delete_depth_stencil_alpha_state; tr_ctx->base.create_fs_state = trace_context_create_fs_state; tr_ctx->base.bind_fs_state = trace_context_bind_fs_state; tr_ctx->base.delete_fs_state = trace_context_delete_fs_state; tr_ctx->base.create_vs_state = trace_context_create_vs_state; tr_ctx->base.bind_vs_state = trace_context_bind_vs_state; tr_ctx->base.delete_vs_state = trace_context_delete_vs_state; tr_ctx->base.set_blend_color = trace_context_set_blend_color; tr_ctx->base.set_stencil_ref = trace_context_set_stencil_ref; tr_ctx->base.set_clip_state = trace_context_set_clip_state; tr_ctx->base.set_constant_buffer = trace_context_set_constant_buffer; tr_ctx->base.set_framebuffer_state = trace_context_set_framebuffer_state; tr_ctx->base.set_polygon_stipple = trace_context_set_polygon_stipple; tr_ctx->base.set_scissor_state = trace_context_set_scissor_state; tr_ctx->base.set_viewport_state = trace_context_set_viewport_state; tr_ctx->base.set_fragment_sampler_textures = trace_context_set_fragment_sampler_textures; tr_ctx->base.set_vertex_sampler_textures = trace_context_set_vertex_sampler_textures; tr_ctx->base.set_vertex_buffers = trace_context_set_vertex_buffers; tr_ctx->base.set_vertex_elements = trace_context_set_vertex_elements; if (pipe->surface_copy) tr_ctx->base.surface_copy = trace_context_surface_copy; if (pipe->surface_fill) tr_ctx->base.surface_fill = trace_context_surface_fill; tr_ctx->base.clear = trace_context_clear; tr_ctx->base.flush = trace_context_flush; tr_ctx->base.is_texture_referenced = trace_is_texture_referenced; tr_ctx->base.is_buffer_referenced = trace_is_buffer_referenced; tr_ctx->pipe = pipe; trace_screen_add_to_list(tr_scr, contexts, tr_ctx); return &tr_ctx->base; error1: return pipe; }
/** * Create a new svga_screen object */ struct pipe_screen * svga_screen_create(struct svga_winsys_screen *sws) { struct svga_screen *svgascreen; struct pipe_screen *screen; #ifdef DEBUG SVGA_DEBUG = debug_get_flags_option("SVGA_DEBUG", svga_debug_flags, 0 ); #endif svgascreen = CALLOC_STRUCT(svga_screen); if (!svgascreen) goto error1; svgascreen->debug.force_level_surface_view = debug_get_bool_option("SVGA_FORCE_LEVEL_SURFACE_VIEW", FALSE); svgascreen->debug.force_surface_view = debug_get_bool_option("SVGA_FORCE_SURFACE_VIEW", FALSE); svgascreen->debug.force_sampler_view = debug_get_bool_option("SVGA_FORCE_SAMPLER_VIEW", FALSE); svgascreen->debug.no_surface_view = debug_get_bool_option("SVGA_NO_SURFACE_VIEW", FALSE); svgascreen->debug.no_sampler_view = debug_get_bool_option("SVGA_NO_SAMPLER_VIEW", FALSE); screen = &svgascreen->screen; screen->destroy = svga_destroy_screen; screen->get_name = svga_get_name; screen->get_vendor = svga_get_vendor; screen->get_device_vendor = svga_get_vendor; // TODO actual device vendor screen->get_param = svga_get_param; screen->get_shader_param = svga_get_shader_param; screen->get_paramf = svga_get_paramf; screen->get_timestamp = NULL; screen->is_format_supported = svga_is_format_supported; screen->context_create = svga_context_create; screen->fence_reference = svga_fence_reference; screen->fence_finish = svga_fence_finish; screen->get_driver_query_info = svga_get_driver_query_info; svgascreen->sws = sws; svga_init_screen_resource_functions(svgascreen); if (sws->get_hw_version) { svgascreen->hw_version = sws->get_hw_version(sws); } else { svgascreen->hw_version = SVGA3D_HWVERSION_WS65_B1; } /* * The D16, D24X8, and D24S8 formats always do an implicit shadow compare * when sampled from, where as the DF16, DF24, and D24S8_INT do not. So * we prefer the later when available. * * This mimics hardware vendors extensions for D3D depth sampling. See also * http://aras-p.info/texts/D3D9GPUHacks.html */ { boolean has_df16, has_df24, has_d24s8_int; SVGA3dSurfaceFormatCaps caps; SVGA3dSurfaceFormatCaps mask; mask.value = 0; mask.zStencil = 1; mask.texture = 1; svgascreen->depth.z16 = SVGA3D_Z_D16; svgascreen->depth.x8z24 = SVGA3D_Z_D24X8; svgascreen->depth.s8z24 = SVGA3D_Z_D24S8; svga_get_format_cap(svgascreen, SVGA3D_Z_DF16, &caps); has_df16 = (caps.value & mask.value) == mask.value; svga_get_format_cap(svgascreen, SVGA3D_Z_DF24, &caps); has_df24 = (caps.value & mask.value) == mask.value; svga_get_format_cap(svgascreen, SVGA3D_Z_D24S8_INT, &caps); has_d24s8_int = (caps.value & mask.value) == mask.value; /* XXX: We might want some other logic here. * Like if we only have d24s8_int we should * emulate the other formats with that. */ if (has_df16) { svgascreen->depth.z16 = SVGA3D_Z_DF16; } if (has_df24) { svgascreen->depth.x8z24 = SVGA3D_Z_DF24; } if (has_d24s8_int) { svgascreen->depth.s8z24 = SVGA3D_Z_D24S8_INT; } } /* Query device caps */ if (sws->have_vgpu10) { svgascreen->haveProvokingVertex = get_bool_cap(sws, SVGA3D_DEVCAP_DX_PROVOKING_VERTEX, FALSE); svgascreen->haveLineSmooth = TRUE; svgascreen->maxPointSize = 80.0F; svgascreen->max_color_buffers = SVGA3D_DX_MAX_RENDER_TARGETS; /* Multisample samples per pixel */ svgascreen->ms_samples = get_uint_cap(sws, SVGA3D_DEVCAP_MULTISAMPLE_MASKABLESAMPLES, 0); /* Maximum number of constant buffers */ svgascreen->max_const_buffers = get_uint_cap(sws, SVGA3D_DEVCAP_DX_MAX_CONSTANT_BUFFERS, 1); assert(svgascreen->max_const_buffers <= SVGA_MAX_CONST_BUFS); } else { /* VGPU9 */ unsigned vs_ver = get_uint_cap(sws, SVGA3D_DEVCAP_VERTEX_SHADER_VERSION, SVGA3DVSVERSION_NONE); unsigned fs_ver = get_uint_cap(sws, SVGA3D_DEVCAP_FRAGMENT_SHADER_VERSION, SVGA3DPSVERSION_NONE); /* we require Shader model 3.0 or later */ if (fs_ver < SVGA3DPSVERSION_30 || vs_ver < SVGA3DVSVERSION_30) { goto error2; } svgascreen->haveProvokingVertex = FALSE; svgascreen->haveLineSmooth = get_bool_cap(sws, SVGA3D_DEVCAP_LINE_AA, FALSE); svgascreen->maxPointSize = get_float_cap(sws, SVGA3D_DEVCAP_MAX_POINT_SIZE, 1.0f); /* Keep this to a reasonable size to avoid failures in conform/pntaa.c */ svgascreen->maxPointSize = MIN2(svgascreen->maxPointSize, 80.0f); /* The SVGA3D device always supports 4 targets at this time, regardless * of what querying SVGA3D_DEVCAP_MAX_RENDER_TARGETS might return. */ svgascreen->max_color_buffers = 4; /* Only support one constant buffer */ svgascreen->max_const_buffers = 1; /* No multisampling */ svgascreen->ms_samples = 0; } /* common VGPU9 / VGPU10 caps */ svgascreen->haveLineStipple = get_bool_cap(sws, SVGA3D_DEVCAP_LINE_STIPPLE, FALSE); svgascreen->maxLineWidth = get_float_cap(sws, SVGA3D_DEVCAP_MAX_LINE_WIDTH, 1.0f); svgascreen->maxLineWidthAA = get_float_cap(sws, SVGA3D_DEVCAP_MAX_AA_LINE_WIDTH, 1.0f); if (0) { debug_printf("svga: haveProvokingVertex %u\n", svgascreen->haveProvokingVertex); debug_printf("svga: haveLineStip %u " "haveLineSmooth %u maxLineWidth %f\n", svgascreen->haveLineStipple, svgascreen->haveLineSmooth, svgascreen->maxLineWidth); debug_printf("svga: maxPointSize %g\n", svgascreen->maxPointSize); } pipe_mutex_init(svgascreen->tex_mutex); pipe_mutex_init(svgascreen->swc_mutex); svga_screen_cache_init(svgascreen); return screen; error2: FREE(svgascreen); error1: return NULL; }
bool r600_common_screen_init(struct r600_common_screen *rscreen, struct radeon_winsys *ws) { char llvm_string[32] = {}, kernel_version[128] = {}; struct utsname uname_data; ws->query_info(ws, &rscreen->info); if (uname(&uname_data) == 0) snprintf(kernel_version, sizeof(kernel_version), " / %s", uname_data.release); #if HAVE_LLVM snprintf(llvm_string, sizeof(llvm_string), ", LLVM %i.%i.%i", (HAVE_LLVM >> 8) & 0xff, HAVE_LLVM & 0xff, MESA_LLVM_VERSION_PATCH); #endif snprintf(rscreen->renderer_string, sizeof(rscreen->renderer_string), "%s (DRM %i.%i.%i%s%s)", r600_get_chip_name(rscreen), rscreen->info.drm_major, rscreen->info.drm_minor, rscreen->info.drm_patchlevel, kernel_version, llvm_string); rscreen->b.get_name = r600_get_name; rscreen->b.get_vendor = r600_get_vendor; rscreen->b.get_device_vendor = r600_get_device_vendor; rscreen->b.get_compute_param = r600_get_compute_param; rscreen->b.get_paramf = r600_get_paramf; rscreen->b.get_timestamp = r600_get_timestamp; rscreen->b.fence_finish = r600_fence_finish; rscreen->b.fence_reference = r600_fence_reference; rscreen->b.resource_destroy = u_resource_destroy_vtbl; rscreen->b.resource_from_user_memory = r600_buffer_from_user_memory; rscreen->b.query_memory_info = r600_query_memory_info; if (rscreen->info.has_uvd) { rscreen->b.get_video_param = rvid_get_video_param; rscreen->b.is_video_format_supported = rvid_is_format_supported; } else { rscreen->b.get_video_param = r600_get_video_param; rscreen->b.is_video_format_supported = vl_video_buffer_is_format_supported; } r600_init_screen_texture_functions(rscreen); r600_init_screen_query_functions(rscreen); rscreen->ws = ws; rscreen->family = rscreen->info.family; rscreen->chip_class = rscreen->info.chip_class; rscreen->debug_flags = debug_get_flags_option("R600_DEBUG", common_debug_options, 0); rscreen->force_aniso = MIN2(16, debug_get_num_option("R600_TEX_ANISO", -1)); if (rscreen->force_aniso >= 0) { printf("radeon: Forcing anisotropy filter to %ix\n", /* round down to a power of two */ 1 << util_logbase2(rscreen->force_aniso)); } util_format_s3tc_init(); pipe_mutex_init(rscreen->aux_context_lock); pipe_mutex_init(rscreen->gpu_load_mutex); if (rscreen->debug_flags & DBG_INFO) { printf("pci_id = 0x%x\n", rscreen->info.pci_id); printf("family = %i (%s)\n", rscreen->info.family, r600_get_chip_name(rscreen)); printf("chip_class = %i\n", rscreen->info.chip_class); printf("gart_size = %i MB\n", (int)DIV_ROUND_UP(rscreen->info.gart_size, 1024*1024)); printf("vram_size = %i MB\n", (int)DIV_ROUND_UP(rscreen->info.vram_size, 1024*1024)); printf("max_alloc_size = %i MB\n", (int)DIV_ROUND_UP(rscreen->info.max_alloc_size, 1024*1024)); printf("has_virtual_memory = %i\n", rscreen->info.has_virtual_memory); printf("gfx_ib_pad_with_type2 = %i\n", rscreen->info.gfx_ib_pad_with_type2); printf("has_sdma = %i\n", rscreen->info.has_sdma); printf("has_uvd = %i\n", rscreen->info.has_uvd); printf("vce_fw_version = %i\n", rscreen->info.vce_fw_version); printf("vce_harvest_config = %i\n", rscreen->info.vce_harvest_config); printf("clock_crystal_freq = %i\n", rscreen->info.clock_crystal_freq); printf("drm = %i.%i.%i\n", rscreen->info.drm_major, rscreen->info.drm_minor, rscreen->info.drm_patchlevel); printf("has_userptr = %i\n", rscreen->info.has_userptr); printf("r600_max_quad_pipes = %i\n", rscreen->info.r600_max_quad_pipes); printf("max_shader_clock = %i\n", rscreen->info.max_shader_clock); printf("num_good_compute_units = %i\n", rscreen->info.num_good_compute_units); printf("max_se = %i\n", rscreen->info.max_se); printf("max_sh_per_se = %i\n", rscreen->info.max_sh_per_se); printf("r600_gb_backend_map = %i\n", rscreen->info.r600_gb_backend_map); printf("r600_gb_backend_map_valid = %i\n", rscreen->info.r600_gb_backend_map_valid); printf("r600_num_banks = %i\n", rscreen->info.r600_num_banks); printf("num_render_backends = %i\n", rscreen->info.num_render_backends); printf("num_tile_pipes = %i\n", rscreen->info.num_tile_pipes); printf("pipe_interleave_bytes = %i\n", rscreen->info.pipe_interleave_bytes); } return true; }
struct pipe_screen *r600_screen_create(struct radeon_winsys *ws) { struct r600_screen *rscreen = CALLOC_STRUCT(r600_screen); if (rscreen == NULL) { return NULL; } /* Set functions first. */ rscreen->b.b.context_create = r600_create_context; rscreen->b.b.destroy = r600_destroy_screen; rscreen->b.b.get_param = r600_get_param; rscreen->b.b.get_shader_param = r600_get_shader_param; rscreen->b.b.resource_create = r600_resource_create; if (!r600_common_screen_init(&rscreen->b, ws)) { FREE(rscreen); return NULL; } if (rscreen->b.info.chip_class >= EVERGREEN) { rscreen->b.b.is_format_supported = evergreen_is_format_supported; } else { rscreen->b.b.is_format_supported = r600_is_format_supported; } rscreen->b.debug_flags |= debug_get_flags_option("R600_DEBUG", r600_debug_options, 0); if (debug_get_bool_option("R600_DEBUG_COMPUTE", FALSE)) rscreen->b.debug_flags |= DBG_COMPUTE; if (debug_get_bool_option("R600_DUMP_SHADERS", FALSE)) rscreen->b.debug_flags |= DBG_FS | DBG_VS | DBG_GS | DBG_PS | DBG_CS; if (debug_get_bool_option("R600_HYPERZ", FALSE)) rscreen->b.debug_flags |= DBG_HYPERZ; if (debug_get_bool_option("R600_LLVM", FALSE)) rscreen->b.debug_flags |= DBG_LLVM; if (rscreen->b.family == CHIP_UNKNOWN) { fprintf(stderr, "r600: Unknown chipset 0x%04X\n", rscreen->b.info.pci_id); FREE(rscreen); return NULL; } /* Figure out streamout kernel support. */ switch (rscreen->b.chip_class) { case R600: if (rscreen->b.family < CHIP_RS780) { rscreen->b.has_streamout = rscreen->b.info.drm_minor >= 14; } else { rscreen->b.has_streamout = rscreen->b.info.drm_minor >= 23; } break; case R700: rscreen->b.has_streamout = rscreen->b.info.drm_minor >= 17; break; case EVERGREEN: case CAYMAN: rscreen->b.has_streamout = rscreen->b.info.drm_minor >= 14; break; default: rscreen->b.has_streamout = FALSE; break; } /* MSAA support. */ switch (rscreen->b.chip_class) { case R600: case R700: rscreen->has_msaa = rscreen->b.info.drm_minor >= 22; rscreen->has_compressed_msaa_texturing = false; break; case EVERGREEN: rscreen->has_msaa = rscreen->b.info.drm_minor >= 19; rscreen->has_compressed_msaa_texturing = rscreen->b.info.drm_minor >= 24; break; case CAYMAN: rscreen->has_msaa = rscreen->b.info.drm_minor >= 19; rscreen->has_compressed_msaa_texturing = true; break; default: rscreen->has_msaa = FALSE; rscreen->has_compressed_msaa_texturing = false; } rscreen->b.has_cp_dma = rscreen->b.info.drm_minor >= 27 && !(rscreen->b.debug_flags & DBG_NO_CP_DMA); rscreen->global_pool = compute_memory_pool_new(rscreen); /* Create the auxiliary context. This must be done last. */ rscreen->b.aux_context = rscreen->b.b.context_create(&rscreen->b.b, NULL); #if 0 /* This is for testing whether aux_context and buffer clearing work correctly. */ struct pipe_resource templ = {}; templ.width0 = 4; templ.height0 = 2048; templ.depth0 = 1; templ.array_size = 1; templ.target = PIPE_TEXTURE_2D; templ.format = PIPE_FORMAT_R8G8B8A8_UNORM; templ.usage = PIPE_USAGE_DEFAULT; struct r600_resource *res = r600_resource(rscreen->screen.resource_create(&rscreen->screen, &templ)); unsigned char *map = ws->buffer_map(res->cs_buf, NULL, PIPE_TRANSFER_WRITE); memset(map, 0, 256); r600_screen_clear_buffer(rscreen, &res->b.b, 4, 4, 0xCC); r600_screen_clear_buffer(rscreen, &res->b.b, 8, 4, 0xDD); r600_screen_clear_buffer(rscreen, &res->b.b, 12, 4, 0xEE); r600_screen_clear_buffer(rscreen, &res->b.b, 20, 4, 0xFF); r600_screen_clear_buffer(rscreen, &res->b.b, 32, 20, 0x87); ws->buffer_wait(res->buf, RADEON_USAGE_WRITE); int i; for (i = 0; i < 256; i++) { printf("%02X", map[i]); if (i % 16 == 15) printf("\n"); } #endif return &rscreen->b.b; }
/** * Create a new svga_screen object */ struct pipe_screen * svga_screen_create(struct svga_winsys_screen *sws) { struct svga_screen *svgascreen; struct pipe_screen *screen; SVGA3dDevCapResult result; boolean use_vs30, use_ps30; #ifdef DEBUG SVGA_DEBUG = debug_get_flags_option("SVGA_DEBUG", svga_debug_flags, 0 ); #endif svgascreen = CALLOC_STRUCT(svga_screen); if (!svgascreen) goto error1; svgascreen->debug.force_level_surface_view = debug_get_bool_option("SVGA_FORCE_LEVEL_SURFACE_VIEW", FALSE); svgascreen->debug.force_surface_view = debug_get_bool_option("SVGA_FORCE_SURFACE_VIEW", FALSE); svgascreen->debug.force_sampler_view = debug_get_bool_option("SVGA_FORCE_SAMPLER_VIEW", FALSE); svgascreen->debug.no_surface_view = debug_get_bool_option("SVGA_NO_SURFACE_VIEW", FALSE); svgascreen->debug.no_sampler_view = debug_get_bool_option("SVGA_NO_SAMPLER_VIEW", FALSE); screen = &svgascreen->screen; screen->destroy = svga_destroy_screen; screen->get_name = svga_get_name; screen->get_vendor = svga_get_vendor; screen->get_param = svga_get_param; screen->get_shader_param = svga_get_shader_param; screen->get_paramf = svga_get_paramf; screen->is_format_supported = svga_is_format_supported; screen->context_create = svga_context_create; screen->fence_reference = svga_fence_reference; screen->fence_signalled = svga_fence_signalled; screen->fence_finish = svga_fence_finish; svgascreen->sws = sws; svga_init_screen_resource_functions(svgascreen); if (sws->get_hw_version) { svgascreen->hw_version = sws->get_hw_version(sws); } else { svgascreen->hw_version = SVGA3D_HWVERSION_WS65_B1; } use_ps30 = sws->get_cap(sws, SVGA3D_DEVCAP_FRAGMENT_SHADER_VERSION, &result) && result.u >= SVGA3DPSVERSION_30 ? TRUE : FALSE; use_vs30 = sws->get_cap(sws, SVGA3D_DEVCAP_VERTEX_SHADER_VERSION, &result) && result.u >= SVGA3DVSVERSION_30 ? TRUE : FALSE; /* we require Shader model 3.0 or later */ if (!use_ps30 || !use_vs30) goto error2; /* * The D16, D24X8, and D24S8 formats always do an implicit shadow compare * when sampled from, where as the DF16, DF24, and D24S8_INT do not. So * we prefer the later when available. * * This mimics hardware vendors extensions for D3D depth sampling. See also * http://aras-p.info/texts/D3D9GPUHacks.html */ { boolean has_df16, has_df24, has_d24s8_int; SVGA3dSurfaceFormatCaps caps; SVGA3dSurfaceFormatCaps mask; mask.value = 0; mask.zStencil = 1; mask.texture = 1; svgascreen->depth.z16 = SVGA3D_Z_D16; svgascreen->depth.x8z24 = SVGA3D_Z_D24X8; svgascreen->depth.s8z24 = SVGA3D_Z_D24S8; svga_get_format_cap(svgascreen, SVGA3D_Z_DF16, &caps); has_df16 = (caps.value & mask.value) == mask.value; svga_get_format_cap(svgascreen, SVGA3D_Z_DF24, &caps); has_df24 = (caps.value & mask.value) == mask.value; svga_get_format_cap(svgascreen, SVGA3D_Z_D24S8_INT, &caps); has_d24s8_int = (caps.value & mask.value) == mask.value; /* XXX: We might want some other logic here. * Like if we only have d24s8_int we should * emulate the other formats with that. */ if (has_df16) { svgascreen->depth.z16 = SVGA3D_Z_DF16; } if (has_df24) { svgascreen->depth.x8z24 = SVGA3D_Z_DF24; } if (has_d24s8_int) { svgascreen->depth.s8z24 = SVGA3D_Z_D24S8_INT; } } pipe_mutex_init(svgascreen->tex_mutex); pipe_mutex_init(svgascreen->swc_mutex); svga_screen_cache_init(svgascreen); return screen; error2: FREE(svgascreen); error1: return NULL; }