struct pipe_context * dd_context_create(struct dd_screen *dscreen, struct pipe_context *pipe) { struct dd_context *dctx; if (!pipe) return NULL; dctx = CALLOC_STRUCT(dd_context); if (!dctx) goto fail; dctx->pipe = pipe; dctx->base.priv = pipe->priv; /* expose wrapped priv data */ dctx->base.screen = &dscreen->base; dctx->base.stream_uploader = pipe->stream_uploader; dctx->base.const_uploader = pipe->const_uploader; dctx->base.destroy = dd_context_destroy; CTX_INIT(render_condition); CTX_INIT(create_query); CTX_INIT(create_batch_query); CTX_INIT(destroy_query); CTX_INIT(begin_query); CTX_INIT(end_query); CTX_INIT(get_query_result); CTX_INIT(set_active_query_state); CTX_INIT(create_blend_state); CTX_INIT(bind_blend_state); CTX_INIT(delete_blend_state); CTX_INIT(create_sampler_state); CTX_INIT(bind_sampler_states); CTX_INIT(delete_sampler_state); CTX_INIT(create_rasterizer_state); CTX_INIT(bind_rasterizer_state); CTX_INIT(delete_rasterizer_state); CTX_INIT(create_depth_stencil_alpha_state); CTX_INIT(bind_depth_stencil_alpha_state); CTX_INIT(delete_depth_stencil_alpha_state); CTX_INIT(create_fs_state); CTX_INIT(bind_fs_state); CTX_INIT(delete_fs_state); CTX_INIT(create_vs_state); CTX_INIT(bind_vs_state); CTX_INIT(delete_vs_state); CTX_INIT(create_gs_state); CTX_INIT(bind_gs_state); CTX_INIT(delete_gs_state); CTX_INIT(create_tcs_state); CTX_INIT(bind_tcs_state); CTX_INIT(delete_tcs_state); CTX_INIT(create_tes_state); CTX_INIT(bind_tes_state); CTX_INIT(delete_tes_state); CTX_INIT(create_compute_state); CTX_INIT(bind_compute_state); CTX_INIT(delete_compute_state); CTX_INIT(create_vertex_elements_state); CTX_INIT(bind_vertex_elements_state); CTX_INIT(delete_vertex_elements_state); CTX_INIT(set_blend_color); CTX_INIT(set_stencil_ref); CTX_INIT(set_sample_mask); CTX_INIT(set_min_samples); CTX_INIT(set_clip_state); CTX_INIT(set_constant_buffer); CTX_INIT(set_framebuffer_state); CTX_INIT(set_polygon_stipple); CTX_INIT(set_scissor_states); CTX_INIT(set_viewport_states); CTX_INIT(set_sampler_views); CTX_INIT(set_tess_state); CTX_INIT(set_shader_buffers); CTX_INIT(set_shader_images); CTX_INIT(set_vertex_buffers); CTX_INIT(create_stream_output_target); CTX_INIT(stream_output_target_destroy); CTX_INIT(set_stream_output_targets); CTX_INIT(create_sampler_view); CTX_INIT(sampler_view_destroy); CTX_INIT(create_surface); CTX_INIT(surface_destroy); CTX_INIT(transfer_map); CTX_INIT(transfer_flush_region); CTX_INIT(transfer_unmap); CTX_INIT(buffer_subdata); CTX_INIT(texture_subdata); CTX_INIT(texture_barrier); CTX_INIT(memory_barrier); CTX_INIT(resource_commit); /* create_video_codec */ /* create_video_buffer */ /* set_compute_resources */ /* set_global_binding */ CTX_INIT(get_sample_position); CTX_INIT(invalidate_resource); CTX_INIT(get_device_reset_status); CTX_INIT(set_device_reset_callback); CTX_INIT(dump_debug_state); CTX_INIT(emit_string_marker); CTX_INIT(create_texture_handle); CTX_INIT(delete_texture_handle); CTX_INIT(make_texture_handle_resident); CTX_INIT(create_image_handle); CTX_INIT(delete_image_handle); CTX_INIT(make_image_handle_resident); dd_init_draw_functions(dctx); u_log_context_init(&dctx->log); if (pipe->set_log_context) pipe->set_log_context(pipe, &dctx->log); dctx->draw_state.sample_mask = ~0; if (dscreen->mode == DD_DETECT_HANGS_PIPELINED) { dctx->fence = pipe_buffer_create(dscreen->screen, PIPE_BIND_CUSTOM, PIPE_USAGE_STAGING, 4); if (!dctx->fence) goto fail; dctx->mapped_fence = pipe_buffer_map(pipe, dctx->fence, PIPE_TRANSFER_READ_WRITE | PIPE_TRANSFER_PERSISTENT | PIPE_TRANSFER_COHERENT, &dctx->fence_transfer); if (!dctx->mapped_fence) goto fail; *dctx->mapped_fence = 0; (void) mtx_init(&dctx->mutex, mtx_plain); dctx->thread = u_thread_create(dd_thread_pipelined_hang_detect, dctx); if (!dctx->thread) { mtx_destroy(&dctx->mutex); goto fail; } } return &dctx->base; fail: if (dctx) { if (dctx->mapped_fence) pipe_transfer_unmap(pipe, dctx->fence_transfer); pipe_resource_reference(&dctx->fence, NULL); FREE(dctx); } pipe->destroy(pipe); return NULL; }
struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws, const struct pipe_screen_config *config) { struct si_screen *sscreen = CALLOC_STRUCT(si_screen); unsigned hw_threads, num_comp_hi_threads, num_comp_lo_threads, i; if (!sscreen) { return NULL; } sscreen->ws = ws; ws->query_info(ws, &sscreen->info); if (sscreen->info.chip_class >= GFX9) { sscreen->se_tile_repeat = 32 * sscreen->info.max_se; } else { ac_get_raster_config(&sscreen->info, &sscreen->pa_sc_raster_config, &sscreen->pa_sc_raster_config_1, &sscreen->se_tile_repeat); } sscreen->debug_flags = debug_get_flags_option("R600_DEBUG", debug_options, 0); sscreen->debug_flags |= debug_get_flags_option("AMD_DEBUG", debug_options, 0); /* Set functions first. */ sscreen->b.context_create = si_pipe_create_context; sscreen->b.destroy = si_destroy_screen; sscreen->b.set_max_shader_compiler_threads = si_set_max_shader_compiler_threads; sscreen->b.is_parallel_shader_compilation_finished = si_is_parallel_shader_compilation_finished; si_init_screen_get_functions(sscreen); si_init_screen_buffer_functions(sscreen); si_init_screen_fence_functions(sscreen); si_init_screen_state_functions(sscreen); si_init_screen_texture_functions(sscreen); si_init_screen_query_functions(sscreen); /* Set these flags in debug_flags early, so that the shader cache takes * them into account. */ if (driQueryOptionb(config->options, "glsl_correct_derivatives_after_discard")) sscreen->debug_flags |= DBG(FS_CORRECT_DERIVS_AFTER_KILL); if (driQueryOptionb(config->options, "radeonsi_enable_sisched")) sscreen->debug_flags |= DBG(SI_SCHED); if (sscreen->debug_flags & DBG(INFO)) ac_print_gpu_info(&sscreen->info); slab_create_parent(&sscreen->pool_transfers, sizeof(struct si_transfer), 64); sscreen->force_aniso = MIN2(16, debug_get_num_option("R600_TEX_ANISO", -1)); if (sscreen->force_aniso >= 0) { printf("radeonsi: Forcing anisotropy filter to %ix\n", /* round down to a power of two */ 1 << util_logbase2(sscreen->force_aniso)); } (void) mtx_init(&sscreen->aux_context_lock, mtx_plain); (void) mtx_init(&sscreen->gpu_load_mutex, mtx_plain); si_init_gs_info(sscreen); if (!si_init_shader_cache(sscreen)) { FREE(sscreen); return NULL; } si_disk_cache_create(sscreen); /* Determine the number of shader compiler threads. */ hw_threads = sysconf(_SC_NPROCESSORS_ONLN); if (hw_threads >= 12) { num_comp_hi_threads = hw_threads * 3 / 4; num_comp_lo_threads = hw_threads / 3; } else if (hw_threads >= 6) { num_comp_hi_threads = hw_threads - 2; num_comp_lo_threads = hw_threads / 2; } else if (hw_threads >= 2) { num_comp_hi_threads = hw_threads - 1; num_comp_lo_threads = hw_threads / 2; } else { num_comp_hi_threads = 1; num_comp_lo_threads = 1; } num_comp_hi_threads = MIN2(num_comp_hi_threads, ARRAY_SIZE(sscreen->compiler)); num_comp_lo_threads = MIN2(num_comp_lo_threads, ARRAY_SIZE(sscreen->compiler_lowp)); if (!util_queue_init(&sscreen->shader_compiler_queue, "sh", 64, num_comp_hi_threads, UTIL_QUEUE_INIT_RESIZE_IF_FULL | UTIL_QUEUE_INIT_SET_FULL_THREAD_AFFINITY)) { si_destroy_shader_cache(sscreen); FREE(sscreen); return NULL; } if (!util_queue_init(&sscreen->shader_compiler_queue_low_priority, "shlo", 64, num_comp_lo_threads, UTIL_QUEUE_INIT_RESIZE_IF_FULL | UTIL_QUEUE_INIT_SET_FULL_THREAD_AFFINITY | UTIL_QUEUE_INIT_USE_MINIMUM_PRIORITY)) { si_destroy_shader_cache(sscreen); FREE(sscreen); return NULL; } if (!debug_get_bool_option("RADEON_DISABLE_PERFCOUNTERS", false)) si_init_perfcounters(sscreen); /* Determine tessellation ring info. */ bool double_offchip_buffers = sscreen->info.chip_class >= CIK && sscreen->info.family != CHIP_CARRIZO && sscreen->info.family != CHIP_STONEY; /* This must be one less than the maximum number due to a hw limitation. * Various hardware bugs in SI, CIK, and GFX9 need this. */ unsigned max_offchip_buffers_per_se; /* Only certain chips can use the maximum value. */ if (sscreen->info.family == CHIP_VEGA12 || sscreen->info.family == CHIP_VEGA20) max_offchip_buffers_per_se = double_offchip_buffers ? 128 : 64; else max_offchip_buffers_per_se = double_offchip_buffers ? 127 : 63; unsigned max_offchip_buffers = max_offchip_buffers_per_se * sscreen->info.max_se; unsigned offchip_granularity; /* Hawaii has a bug with offchip buffers > 256 that can be worked * around by setting 4K granularity. */ if (sscreen->info.family == CHIP_HAWAII) { sscreen->tess_offchip_block_dw_size = 4096; offchip_granularity = V_03093C_X_4K_DWORDS; } else { sscreen->tess_offchip_block_dw_size = 8192; offchip_granularity = V_03093C_X_8K_DWORDS; } sscreen->tess_factor_ring_size = 32768 * sscreen->info.max_se; assert(((sscreen->tess_factor_ring_size / 4) & C_030938_SIZE) == 0); sscreen->tess_offchip_ring_size = max_offchip_buffers * sscreen->tess_offchip_block_dw_size * 4; if (sscreen->info.chip_class >= CIK) { if (sscreen->info.chip_class >= VI) --max_offchip_buffers; sscreen->vgt_hs_offchip_param = S_03093C_OFFCHIP_BUFFERING(max_offchip_buffers) | S_03093C_OFFCHIP_GRANULARITY(offchip_granularity); } else { assert(offchip_granularity == V_03093C_X_8K_DWORDS); sscreen->vgt_hs_offchip_param = S_0089B0_OFFCHIP_BUFFERING(max_offchip_buffers); } /* The mere presense of CLEAR_STATE in the IB causes random GPU hangs * on SI. Some CLEAR_STATE cause asic hang on radeon kernel, etc. * SPI_VS_OUT_CONFIG. So only enable CI CLEAR_STATE on amdgpu kernel.*/ sscreen->has_clear_state = sscreen->info.chip_class >= CIK && sscreen->info.drm_major == 3; sscreen->has_distributed_tess = sscreen->info.chip_class >= VI && sscreen->info.max_se >= 2; sscreen->has_draw_indirect_multi = (sscreen->info.family >= CHIP_POLARIS10) || (sscreen->info.chip_class == VI && sscreen->info.pfp_fw_version >= 121 && sscreen->info.me_fw_version >= 87) || (sscreen->info.chip_class == CIK && sscreen->info.pfp_fw_version >= 211 && sscreen->info.me_fw_version >= 173) || (sscreen->info.chip_class == SI && sscreen->info.pfp_fw_version >= 79 && sscreen->info.me_fw_version >= 142); sscreen->has_out_of_order_rast = sscreen->info.chip_class >= VI && sscreen->info.max_se >= 2 && !(sscreen->debug_flags & DBG(NO_OUT_OF_ORDER)); sscreen->assume_no_z_fights = driQueryOptionb(config->options, "radeonsi_assume_no_z_fights"); sscreen->commutative_blend_add = driQueryOptionb(config->options, "radeonsi_commutative_blend_add"); { #define OPT_BOOL(name, dflt, description) \ sscreen->options.name = \ driQueryOptionb(config->options, "radeonsi_"#name); #include "si_debug_options.h" } sscreen->has_gfx9_scissor_bug = sscreen->info.family == CHIP_VEGA10 || sscreen->info.family == CHIP_RAVEN; sscreen->has_msaa_sample_loc_bug = (sscreen->info.family >= CHIP_POLARIS10 && sscreen->info.family <= CHIP_POLARIS12) || sscreen->info.family == CHIP_VEGA10 || sscreen->info.family == CHIP_RAVEN; sscreen->has_ls_vgpr_init_bug = sscreen->info.family == CHIP_VEGA10 || sscreen->info.family == CHIP_RAVEN; sscreen->has_dcc_constant_encode = sscreen->info.family == CHIP_RAVEN2; /* Only enable primitive binning on APUs by default. */ sscreen->dpbb_allowed = sscreen->info.family == CHIP_RAVEN || sscreen->info.family == CHIP_RAVEN2; sscreen->dfsm_allowed = sscreen->info.family == CHIP_RAVEN || sscreen->info.family == CHIP_RAVEN2; /* Process DPBB enable flags. */ if (sscreen->debug_flags & DBG(DPBB)) { sscreen->dpbb_allowed = true; if (sscreen->debug_flags & DBG(DFSM)) sscreen->dfsm_allowed = true; } /* Process DPBB disable flags. */ if (sscreen->debug_flags & DBG(NO_DPBB)) { sscreen->dpbb_allowed = false; sscreen->dfsm_allowed = false; } else if (sscreen->debug_flags & DBG(NO_DFSM)) { sscreen->dfsm_allowed = false; } /* While it would be nice not to have this flag, we are constrained * by the reality that LLVM 5.0 doesn't have working VGPR indexing * on GFX9. */ sscreen->llvm_has_working_vgpr_indexing = sscreen->info.chip_class <= VI; /* Some chips have RB+ registers, but don't support RB+. Those must * always disable it. */ if (sscreen->info.family == CHIP_STONEY || sscreen->info.chip_class >= GFX9) { sscreen->has_rbplus = true; sscreen->rbplus_allowed = !(sscreen->debug_flags & DBG(NO_RB_PLUS)) && (sscreen->info.family == CHIP_STONEY || sscreen->info.family == CHIP_VEGA12 || sscreen->info.family == CHIP_RAVEN || sscreen->info.family == CHIP_RAVEN2); } sscreen->dcc_msaa_allowed = !(sscreen->debug_flags & DBG(NO_DCC_MSAA)); sscreen->cpdma_prefetch_writes_memory = sscreen->info.chip_class <= VI; (void) mtx_init(&sscreen->shader_parts_mutex, mtx_plain); sscreen->use_monolithic_shaders = (sscreen->debug_flags & DBG(MONOLITHIC_SHADERS)) != 0; sscreen->barrier_flags.cp_to_L2 = SI_CONTEXT_INV_SMEM_L1 | SI_CONTEXT_INV_VMEM_L1; if (sscreen->info.chip_class <= VI) { sscreen->barrier_flags.cp_to_L2 |= SI_CONTEXT_INV_GLOBAL_L2; sscreen->barrier_flags.L2_to_cp |= SI_CONTEXT_WRITEBACK_GLOBAL_L2; } if (debug_get_bool_option("RADEON_DUMP_SHADERS", false)) sscreen->debug_flags |= DBG_ALL_SHADERS; /* Syntax: * EQAA=s,z,c * Example: * EQAA=8,4,2 * That means 8 coverage samples, 4 Z/S samples, and 2 color samples. * Constraints: * s >= z >= c (ignoring this only wastes memory) * s = [2..16] * z = [2..8] * c = [2..8] * * Only MSAA color and depth buffers are overriden. */ if (sscreen->info.has_eqaa_surface_allocator) { const char *eqaa = debug_get_option("EQAA", NULL); unsigned s,z,f; if (eqaa && sscanf(eqaa, "%u,%u,%u", &s, &z, &f) == 3 && s && z && f) { sscreen->eqaa_force_coverage_samples = s; sscreen->eqaa_force_z_samples = z; sscreen->eqaa_force_color_samples = f; } } for (i = 0; i < num_comp_hi_threads; i++) si_init_compiler(sscreen, &sscreen->compiler[i]); for (i = 0; i < num_comp_lo_threads; i++) si_init_compiler(sscreen, &sscreen->compiler_lowp[i]); /* Create the auxiliary context. This must be done last. */ sscreen->aux_context = si_create_context( &sscreen->b, sscreen->options.aux_debug ? PIPE_CONTEXT_DEBUG : 0); if (sscreen->options.aux_debug) { struct u_log_context *log = CALLOC_STRUCT(u_log_context); u_log_context_init(log); sscreen->aux_context->set_log_context(sscreen->aux_context, log); } if (sscreen->debug_flags & DBG(TEST_DMA)) si_test_dma(sscreen); if (sscreen->debug_flags & DBG(TEST_DMA_PERF)) { si_test_dma_perf(sscreen); } if (sscreen->debug_flags & (DBG(TEST_VMFAULT_CP) | DBG(TEST_VMFAULT_SDMA) | DBG(TEST_VMFAULT_SHADER))) si_test_vmfault(sscreen); if (sscreen->debug_flags & DBG(TEST_GDS)) si_test_gds((struct si_context*)sscreen->aux_context); if (sscreen->debug_flags & DBG(TEST_GDS_MM)) { si_test_gds_memory_management((struct si_context*)sscreen->aux_context, 32 * 1024, 4, RADEON_DOMAIN_GDS); } if (sscreen->debug_flags & DBG(TEST_GDS_OA_MM)) { si_test_gds_memory_management((struct si_context*)sscreen->aux_context, 4, 1, RADEON_DOMAIN_OA); } return &sscreen->b; }