struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws) { struct si_screen *sscreen = CALLOC_STRUCT(si_screen); if (!sscreen) { return NULL; } /* Set functions first. */ sscreen->b.b.context_create = si_create_context; sscreen->b.b.destroy = si_destroy_screen; sscreen->b.b.get_param = si_get_param; sscreen->b.b.get_shader_param = si_get_shader_param; sscreen->b.b.is_format_supported = si_is_format_supported; sscreen->b.b.resource_create = r600_resource_create_common; si_init_screen_state_functions(sscreen); if (!r600_common_screen_init(&sscreen->b, ws) || !si_init_gs_info(sscreen) || !si_init_shader_cache(sscreen)) { FREE(sscreen); return NULL; } if (!debug_get_bool_option("RADEON_DISABLE_PERFCOUNTERS", FALSE)) si_init_perfcounters(sscreen); sscreen->b.has_cp_dma = true; sscreen->b.has_streamout = true; pipe_mutex_init(sscreen->shader_parts_mutex); sscreen->use_monolithic_shaders = HAVE_LLVM < 0x0308 || (sscreen->b.debug_flags & DBG_MONOLITHIC_SHADERS) != 0; if (debug_get_bool_option("RADEON_DUMP_SHADERS", FALSE)) sscreen->b.debug_flags |= DBG_FS | DBG_VS | DBG_GS | DBG_PS | DBG_CS; /* Create the auxiliary context. This must be done last. */ sscreen->b.aux_context = sscreen->b.b.context_create(&sscreen->b.b, NULL, 0); if (sscreen->b.debug_flags & DBG_TEST_DMA) r600_test_dma(&sscreen->b); return &sscreen->b.b; }
struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws) { struct si_screen *sscreen = CALLOC_STRUCT(si_screen); unsigned num_cpus, num_compiler_threads, i; if (!sscreen) { return NULL; } /* Set functions first. */ sscreen->b.b.context_create = si_create_context; sscreen->b.b.destroy = si_destroy_screen; sscreen->b.b.get_param = si_get_param; sscreen->b.b.get_shader_param = si_get_shader_param; sscreen->b.b.resource_create = r600_resource_create_common; si_init_screen_state_functions(sscreen); if (!r600_common_screen_init(&sscreen->b, ws) || !si_init_gs_info(sscreen) || !si_init_shader_cache(sscreen)) { FREE(sscreen); return NULL; } if (!debug_get_bool_option("RADEON_DISABLE_PERFCOUNTERS", false)) si_init_perfcounters(sscreen); /* Hawaii has a bug with offchip buffers > 256 that can be worked * around by setting 4K granularity. */ sscreen->tess_offchip_block_dw_size = sscreen->b.family == CHIP_HAWAII ? 4096 : 8192; sscreen->has_distributed_tess = sscreen->b.chip_class >= VI && sscreen->b.info.max_se >= 2; sscreen->b.has_cp_dma = true; sscreen->b.has_streamout = true; pipe_mutex_init(sscreen->shader_parts_mutex); sscreen->use_monolithic_shaders = HAVE_LLVM < 0x0308 || (sscreen->b.debug_flags & DBG_MONOLITHIC_SHADERS) != 0; if (debug_get_bool_option("RADEON_DUMP_SHADERS", false)) sscreen->b.debug_flags |= DBG_FS | DBG_VS | DBG_GS | DBG_PS | DBG_CS; /* Only enable as many threads as we have target machines and CPUs. */ num_cpus = sysconf(_SC_NPROCESSORS_ONLN); num_compiler_threads = MIN2(num_cpus, ARRAY_SIZE(sscreen->tm)); for (i = 0; i < num_compiler_threads; i++) sscreen->tm[i] = si_create_llvm_target_machine(sscreen); util_queue_init(&sscreen->shader_compiler_queue, "si_shader", 32, num_compiler_threads); /* Create the auxiliary context. This must be done last. */ sscreen->b.aux_context = sscreen->b.b.context_create(&sscreen->b.b, NULL, 0); if (sscreen->b.debug_flags & DBG_TEST_DMA) r600_test_dma(&sscreen->b); return &sscreen->b.b; }
struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws, const struct pipe_screen_config *config) { struct si_screen *sscreen = CALLOC_STRUCT(si_screen); unsigned hw_threads, num_comp_hi_threads, num_comp_lo_threads, i; if (!sscreen) { return NULL; } sscreen->ws = ws; ws->query_info(ws, &sscreen->info); if (sscreen->info.chip_class >= GFX9) { sscreen->se_tile_repeat = 32 * sscreen->info.max_se; } else { ac_get_raster_config(&sscreen->info, &sscreen->pa_sc_raster_config, &sscreen->pa_sc_raster_config_1, &sscreen->se_tile_repeat); } sscreen->debug_flags = debug_get_flags_option("R600_DEBUG", debug_options, 0); sscreen->debug_flags |= debug_get_flags_option("AMD_DEBUG", debug_options, 0); /* Set functions first. */ sscreen->b.context_create = si_pipe_create_context; sscreen->b.destroy = si_destroy_screen; sscreen->b.set_max_shader_compiler_threads = si_set_max_shader_compiler_threads; sscreen->b.is_parallel_shader_compilation_finished = si_is_parallel_shader_compilation_finished; si_init_screen_get_functions(sscreen); si_init_screen_buffer_functions(sscreen); si_init_screen_fence_functions(sscreen); si_init_screen_state_functions(sscreen); si_init_screen_texture_functions(sscreen); si_init_screen_query_functions(sscreen); /* Set these flags in debug_flags early, so that the shader cache takes * them into account. */ if (driQueryOptionb(config->options, "glsl_correct_derivatives_after_discard")) sscreen->debug_flags |= DBG(FS_CORRECT_DERIVS_AFTER_KILL); if (driQueryOptionb(config->options, "radeonsi_enable_sisched")) sscreen->debug_flags |= DBG(SI_SCHED); if (sscreen->debug_flags & DBG(INFO)) ac_print_gpu_info(&sscreen->info); slab_create_parent(&sscreen->pool_transfers, sizeof(struct si_transfer), 64); sscreen->force_aniso = MIN2(16, debug_get_num_option("R600_TEX_ANISO", -1)); if (sscreen->force_aniso >= 0) { printf("radeonsi: Forcing anisotropy filter to %ix\n", /* round down to a power of two */ 1 << util_logbase2(sscreen->force_aniso)); } (void) mtx_init(&sscreen->aux_context_lock, mtx_plain); (void) mtx_init(&sscreen->gpu_load_mutex, mtx_plain); si_init_gs_info(sscreen); if (!si_init_shader_cache(sscreen)) { FREE(sscreen); return NULL; } si_disk_cache_create(sscreen); /* Determine the number of shader compiler threads. */ hw_threads = sysconf(_SC_NPROCESSORS_ONLN); if (hw_threads >= 12) { num_comp_hi_threads = hw_threads * 3 / 4; num_comp_lo_threads = hw_threads / 3; } else if (hw_threads >= 6) { num_comp_hi_threads = hw_threads - 2; num_comp_lo_threads = hw_threads / 2; } else if (hw_threads >= 2) { num_comp_hi_threads = hw_threads - 1; num_comp_lo_threads = hw_threads / 2; } else { num_comp_hi_threads = 1; num_comp_lo_threads = 1; } num_comp_hi_threads = MIN2(num_comp_hi_threads, ARRAY_SIZE(sscreen->compiler)); num_comp_lo_threads = MIN2(num_comp_lo_threads, ARRAY_SIZE(sscreen->compiler_lowp)); if (!util_queue_init(&sscreen->shader_compiler_queue, "sh", 64, num_comp_hi_threads, UTIL_QUEUE_INIT_RESIZE_IF_FULL | UTIL_QUEUE_INIT_SET_FULL_THREAD_AFFINITY)) { si_destroy_shader_cache(sscreen); FREE(sscreen); return NULL; } if (!util_queue_init(&sscreen->shader_compiler_queue_low_priority, "shlo", 64, num_comp_lo_threads, UTIL_QUEUE_INIT_RESIZE_IF_FULL | UTIL_QUEUE_INIT_SET_FULL_THREAD_AFFINITY | UTIL_QUEUE_INIT_USE_MINIMUM_PRIORITY)) { si_destroy_shader_cache(sscreen); FREE(sscreen); return NULL; } if (!debug_get_bool_option("RADEON_DISABLE_PERFCOUNTERS", false)) si_init_perfcounters(sscreen); /* Determine tessellation ring info. */ bool double_offchip_buffers = sscreen->info.chip_class >= CIK && sscreen->info.family != CHIP_CARRIZO && sscreen->info.family != CHIP_STONEY; /* This must be one less than the maximum number due to a hw limitation. * Various hardware bugs in SI, CIK, and GFX9 need this. */ unsigned max_offchip_buffers_per_se; /* Only certain chips can use the maximum value. */ if (sscreen->info.family == CHIP_VEGA12 || sscreen->info.family == CHIP_VEGA20) max_offchip_buffers_per_se = double_offchip_buffers ? 128 : 64; else max_offchip_buffers_per_se = double_offchip_buffers ? 127 : 63; unsigned max_offchip_buffers = max_offchip_buffers_per_se * sscreen->info.max_se; unsigned offchip_granularity; /* Hawaii has a bug with offchip buffers > 256 that can be worked * around by setting 4K granularity. */ if (sscreen->info.family == CHIP_HAWAII) { sscreen->tess_offchip_block_dw_size = 4096; offchip_granularity = V_03093C_X_4K_DWORDS; } else { sscreen->tess_offchip_block_dw_size = 8192; offchip_granularity = V_03093C_X_8K_DWORDS; } sscreen->tess_factor_ring_size = 32768 * sscreen->info.max_se; assert(((sscreen->tess_factor_ring_size / 4) & C_030938_SIZE) == 0); sscreen->tess_offchip_ring_size = max_offchip_buffers * sscreen->tess_offchip_block_dw_size * 4; if (sscreen->info.chip_class >= CIK) { if (sscreen->info.chip_class >= VI) --max_offchip_buffers; sscreen->vgt_hs_offchip_param = S_03093C_OFFCHIP_BUFFERING(max_offchip_buffers) | S_03093C_OFFCHIP_GRANULARITY(offchip_granularity); } else { assert(offchip_granularity == V_03093C_X_8K_DWORDS); sscreen->vgt_hs_offchip_param = S_0089B0_OFFCHIP_BUFFERING(max_offchip_buffers); } /* The mere presense of CLEAR_STATE in the IB causes random GPU hangs * on SI. Some CLEAR_STATE cause asic hang on radeon kernel, etc. * SPI_VS_OUT_CONFIG. So only enable CI CLEAR_STATE on amdgpu kernel.*/ sscreen->has_clear_state = sscreen->info.chip_class >= CIK && sscreen->info.drm_major == 3; sscreen->has_distributed_tess = sscreen->info.chip_class >= VI && sscreen->info.max_se >= 2; sscreen->has_draw_indirect_multi = (sscreen->info.family >= CHIP_POLARIS10) || (sscreen->info.chip_class == VI && sscreen->info.pfp_fw_version >= 121 && sscreen->info.me_fw_version >= 87) || (sscreen->info.chip_class == CIK && sscreen->info.pfp_fw_version >= 211 && sscreen->info.me_fw_version >= 173) || (sscreen->info.chip_class == SI && sscreen->info.pfp_fw_version >= 79 && sscreen->info.me_fw_version >= 142); sscreen->has_out_of_order_rast = sscreen->info.chip_class >= VI && sscreen->info.max_se >= 2 && !(sscreen->debug_flags & DBG(NO_OUT_OF_ORDER)); sscreen->assume_no_z_fights = driQueryOptionb(config->options, "radeonsi_assume_no_z_fights"); sscreen->commutative_blend_add = driQueryOptionb(config->options, "radeonsi_commutative_blend_add"); { #define OPT_BOOL(name, dflt, description) \ sscreen->options.name = \ driQueryOptionb(config->options, "radeonsi_"#name); #include "si_debug_options.h" } sscreen->has_gfx9_scissor_bug = sscreen->info.family == CHIP_VEGA10 || sscreen->info.family == CHIP_RAVEN; sscreen->has_msaa_sample_loc_bug = (sscreen->info.family >= CHIP_POLARIS10 && sscreen->info.family <= CHIP_POLARIS12) || sscreen->info.family == CHIP_VEGA10 || sscreen->info.family == CHIP_RAVEN; sscreen->has_ls_vgpr_init_bug = sscreen->info.family == CHIP_VEGA10 || sscreen->info.family == CHIP_RAVEN; sscreen->has_dcc_constant_encode = sscreen->info.family == CHIP_RAVEN2; /* Only enable primitive binning on APUs by default. */ sscreen->dpbb_allowed = sscreen->info.family == CHIP_RAVEN || sscreen->info.family == CHIP_RAVEN2; sscreen->dfsm_allowed = sscreen->info.family == CHIP_RAVEN || sscreen->info.family == CHIP_RAVEN2; /* Process DPBB enable flags. */ if (sscreen->debug_flags & DBG(DPBB)) { sscreen->dpbb_allowed = true; if (sscreen->debug_flags & DBG(DFSM)) sscreen->dfsm_allowed = true; } /* Process DPBB disable flags. */ if (sscreen->debug_flags & DBG(NO_DPBB)) { sscreen->dpbb_allowed = false; sscreen->dfsm_allowed = false; } else if (sscreen->debug_flags & DBG(NO_DFSM)) { sscreen->dfsm_allowed = false; } /* While it would be nice not to have this flag, we are constrained * by the reality that LLVM 5.0 doesn't have working VGPR indexing * on GFX9. */ sscreen->llvm_has_working_vgpr_indexing = sscreen->info.chip_class <= VI; /* Some chips have RB+ registers, but don't support RB+. Those must * always disable it. */ if (sscreen->info.family == CHIP_STONEY || sscreen->info.chip_class >= GFX9) { sscreen->has_rbplus = true; sscreen->rbplus_allowed = !(sscreen->debug_flags & DBG(NO_RB_PLUS)) && (sscreen->info.family == CHIP_STONEY || sscreen->info.family == CHIP_VEGA12 || sscreen->info.family == CHIP_RAVEN || sscreen->info.family == CHIP_RAVEN2); } sscreen->dcc_msaa_allowed = !(sscreen->debug_flags & DBG(NO_DCC_MSAA)); sscreen->cpdma_prefetch_writes_memory = sscreen->info.chip_class <= VI; (void) mtx_init(&sscreen->shader_parts_mutex, mtx_plain); sscreen->use_monolithic_shaders = (sscreen->debug_flags & DBG(MONOLITHIC_SHADERS)) != 0; sscreen->barrier_flags.cp_to_L2 = SI_CONTEXT_INV_SMEM_L1 | SI_CONTEXT_INV_VMEM_L1; if (sscreen->info.chip_class <= VI) { sscreen->barrier_flags.cp_to_L2 |= SI_CONTEXT_INV_GLOBAL_L2; sscreen->barrier_flags.L2_to_cp |= SI_CONTEXT_WRITEBACK_GLOBAL_L2; } if (debug_get_bool_option("RADEON_DUMP_SHADERS", false)) sscreen->debug_flags |= DBG_ALL_SHADERS; /* Syntax: * EQAA=s,z,c * Example: * EQAA=8,4,2 * That means 8 coverage samples, 4 Z/S samples, and 2 color samples. * Constraints: * s >= z >= c (ignoring this only wastes memory) * s = [2..16] * z = [2..8] * c = [2..8] * * Only MSAA color and depth buffers are overriden. */ if (sscreen->info.has_eqaa_surface_allocator) { const char *eqaa = debug_get_option("EQAA", NULL); unsigned s,z,f; if (eqaa && sscanf(eqaa, "%u,%u,%u", &s, &z, &f) == 3 && s && z && f) { sscreen->eqaa_force_coverage_samples = s; sscreen->eqaa_force_z_samples = z; sscreen->eqaa_force_color_samples = f; } } for (i = 0; i < num_comp_hi_threads; i++) si_init_compiler(sscreen, &sscreen->compiler[i]); for (i = 0; i < num_comp_lo_threads; i++) si_init_compiler(sscreen, &sscreen->compiler_lowp[i]); /* Create the auxiliary context. This must be done last. */ sscreen->aux_context = si_create_context( &sscreen->b, sscreen->options.aux_debug ? PIPE_CONTEXT_DEBUG : 0); if (sscreen->options.aux_debug) { struct u_log_context *log = CALLOC_STRUCT(u_log_context); u_log_context_init(log); sscreen->aux_context->set_log_context(sscreen->aux_context, log); } if (sscreen->debug_flags & DBG(TEST_DMA)) si_test_dma(sscreen); if (sscreen->debug_flags & DBG(TEST_DMA_PERF)) { si_test_dma_perf(sscreen); } if (sscreen->debug_flags & (DBG(TEST_VMFAULT_CP) | DBG(TEST_VMFAULT_SDMA) | DBG(TEST_VMFAULT_SHADER))) si_test_vmfault(sscreen); if (sscreen->debug_flags & DBG(TEST_GDS)) si_test_gds((struct si_context*)sscreen->aux_context); if (sscreen->debug_flags & DBG(TEST_GDS_MM)) { si_test_gds_memory_management((struct si_context*)sscreen->aux_context, 32 * 1024, 4, RADEON_DOMAIN_GDS); } if (sscreen->debug_flags & DBG(TEST_GDS_OA_MM)) { si_test_gds_memory_management((struct si_context*)sscreen->aux_context, 4, 1, RADEON_DOMAIN_OA); } return &sscreen->b; }