Exemplo n.º 1
0
static void si_destroy_screen(struct pipe_screen* pscreen)
{
	struct si_screen *sscreen = (struct si_screen *)pscreen;
	struct si_shader_part *parts[] = {
		sscreen->vs_prologs,
		sscreen->vs_epilogs,
		sscreen->tcs_epilogs,
		sscreen->ps_prologs,
		sscreen->ps_epilogs
	};
	unsigned i;

	if (!sscreen)
		return;

	if (!sscreen->b.ws->unref(sscreen->b.ws))
		return;

	/* Free shader parts. */
	for (i = 0; i < ARRAY_SIZE(parts); i++) {
		while (parts[i]) {
			struct si_shader_part *part = parts[i];

			parts[i] = part->next;
			radeon_shader_binary_clean(&part->binary);
			FREE(part);
		}
	}
	pipe_mutex_destroy(sscreen->shader_parts_mutex);
	si_destroy_shader_cache(sscreen);
	r600_destroy_common_screen(&sscreen->b);
}
Exemplo n.º 2
0
/*
 * pipe_screen
 */
static void si_destroy_screen(struct pipe_screen* pscreen)
{
	struct si_screen *sscreen = (struct si_screen *)pscreen;
	struct si_shader_part *parts[] = {
		sscreen->vs_prologs,
		sscreen->tcs_epilogs,
		sscreen->gs_prologs,
		sscreen->ps_prologs,
		sscreen->ps_epilogs
	};
	unsigned i;

	if (!sscreen->ws->unref(sscreen->ws))
		return;

	mtx_destroy(&sscreen->aux_context_lock);

	struct u_log_context *aux_log = ((struct si_context *)sscreen->aux_context)->log;
	if (aux_log) {
		sscreen->aux_context->set_log_context(sscreen->aux_context, NULL);
		u_log_context_destroy(aux_log);
		FREE(aux_log);
	}

	sscreen->aux_context->destroy(sscreen->aux_context);

	util_queue_destroy(&sscreen->shader_compiler_queue);
	util_queue_destroy(&sscreen->shader_compiler_queue_low_priority);

	for (i = 0; i < ARRAY_SIZE(sscreen->compiler); i++)
		si_destroy_compiler(&sscreen->compiler[i]);

	for (i = 0; i < ARRAY_SIZE(sscreen->compiler_lowp); i++)
		si_destroy_compiler(&sscreen->compiler_lowp[i]);

	/* Free shader parts. */
	for (i = 0; i < ARRAY_SIZE(parts); i++) {
		while (parts[i]) {
			struct si_shader_part *part = parts[i];

			parts[i] = part->next;
			ac_shader_binary_clean(&part->binary);
			FREE(part);
		}
	}
	mtx_destroy(&sscreen->shader_parts_mutex);
	si_destroy_shader_cache(sscreen);

	si_destroy_perfcounters(sscreen);
	si_gpu_load_kill_thread(sscreen);

	mtx_destroy(&sscreen->gpu_load_mutex);

	slab_destroy_parent(&sscreen->pool_transfers);

	disk_cache_destroy(sscreen->disk_shader_cache);
	sscreen->ws->destroy(sscreen->ws);
	FREE(sscreen);
}
Exemplo n.º 3
0
static void si_destroy_screen(struct pipe_screen* pscreen)
{
	struct si_screen *sscreen = (struct si_screen *)pscreen;
	struct si_shader_part *parts[] = {
		sscreen->vs_prologs,
		sscreen->vs_epilogs,
		sscreen->tcs_epilogs,
		sscreen->gs_prologs,
		sscreen->ps_prologs,
		sscreen->ps_epilogs
	};
	unsigned i;

	if (!sscreen)
		return;

	if (!sscreen->b.ws->unref(sscreen->b.ws))
		return;

	if (util_queue_is_initialized(&sscreen->shader_compiler_queue))
		util_queue_destroy(&sscreen->shader_compiler_queue);

	for (i = 0; i < ARRAY_SIZE(sscreen->tm); i++)
		if (sscreen->tm[i])
			LLVMDisposeTargetMachine(sscreen->tm[i]);

	/* Free shader parts. */
	for (i = 0; i < ARRAY_SIZE(parts); i++) {
		while (parts[i]) {
			struct si_shader_part *part = parts[i];

			parts[i] = part->next;
			radeon_shader_binary_clean(&part->binary);
			FREE(part);
		}
	}
	pipe_mutex_destroy(sscreen->shader_parts_mutex);
	si_destroy_shader_cache(sscreen);
	r600_destroy_common_screen(&sscreen->b);
}
Exemplo n.º 4
0
struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws,
					   const struct pipe_screen_config *config)
{
	struct si_screen *sscreen = CALLOC_STRUCT(si_screen);
	unsigned hw_threads, num_comp_hi_threads, num_comp_lo_threads, i;

	if (!sscreen) {
		return NULL;
	}

	sscreen->ws = ws;
	ws->query_info(ws, &sscreen->info);

	if (sscreen->info.chip_class >= GFX9) {
		sscreen->se_tile_repeat = 32 * sscreen->info.max_se;
	} else {
		ac_get_raster_config(&sscreen->info,
				     &sscreen->pa_sc_raster_config,
				     &sscreen->pa_sc_raster_config_1,
				     &sscreen->se_tile_repeat);
	}

	sscreen->debug_flags = debug_get_flags_option("R600_DEBUG",
						      debug_options, 0);
	sscreen->debug_flags |= debug_get_flags_option("AMD_DEBUG",
						       debug_options, 0);

	/* Set functions first. */
	sscreen->b.context_create = si_pipe_create_context;
	sscreen->b.destroy = si_destroy_screen;
	sscreen->b.set_max_shader_compiler_threads =
		si_set_max_shader_compiler_threads;
	sscreen->b.is_parallel_shader_compilation_finished =
		si_is_parallel_shader_compilation_finished;

	si_init_screen_get_functions(sscreen);
	si_init_screen_buffer_functions(sscreen);
	si_init_screen_fence_functions(sscreen);
	si_init_screen_state_functions(sscreen);
	si_init_screen_texture_functions(sscreen);
	si_init_screen_query_functions(sscreen);

	/* Set these flags in debug_flags early, so that the shader cache takes
	 * them into account.
	 */
	if (driQueryOptionb(config->options,
			    "glsl_correct_derivatives_after_discard"))
		sscreen->debug_flags |= DBG(FS_CORRECT_DERIVS_AFTER_KILL);
	if (driQueryOptionb(config->options, "radeonsi_enable_sisched"))
		sscreen->debug_flags |= DBG(SI_SCHED);

	if (sscreen->debug_flags & DBG(INFO))
		ac_print_gpu_info(&sscreen->info);

	slab_create_parent(&sscreen->pool_transfers,
			   sizeof(struct si_transfer), 64);

	sscreen->force_aniso = MIN2(16, debug_get_num_option("R600_TEX_ANISO", -1));
	if (sscreen->force_aniso >= 0) {
		printf("radeonsi: Forcing anisotropy filter to %ix\n",
		       /* round down to a power of two */
		       1 << util_logbase2(sscreen->force_aniso));
	}

	(void) mtx_init(&sscreen->aux_context_lock, mtx_plain);
	(void) mtx_init(&sscreen->gpu_load_mutex, mtx_plain);

	si_init_gs_info(sscreen);
	if (!si_init_shader_cache(sscreen)) {
		FREE(sscreen);
		return NULL;
	}

	si_disk_cache_create(sscreen);

	/* Determine the number of shader compiler threads. */
	hw_threads = sysconf(_SC_NPROCESSORS_ONLN);

	if (hw_threads >= 12) {
		num_comp_hi_threads = hw_threads * 3 / 4;
		num_comp_lo_threads = hw_threads / 3;
	} else if (hw_threads >= 6) {
		num_comp_hi_threads = hw_threads - 2;
		num_comp_lo_threads = hw_threads / 2;
	} else if (hw_threads >= 2) {
		num_comp_hi_threads = hw_threads - 1;
		num_comp_lo_threads = hw_threads / 2;
	} else {
		num_comp_hi_threads = 1;
		num_comp_lo_threads = 1;
	}

	num_comp_hi_threads = MIN2(num_comp_hi_threads,
				   ARRAY_SIZE(sscreen->compiler));
	num_comp_lo_threads = MIN2(num_comp_lo_threads,
				   ARRAY_SIZE(sscreen->compiler_lowp));

	if (!util_queue_init(&sscreen->shader_compiler_queue, "sh",
			     64, num_comp_hi_threads,
			     UTIL_QUEUE_INIT_RESIZE_IF_FULL |
			     UTIL_QUEUE_INIT_SET_FULL_THREAD_AFFINITY)) {
		si_destroy_shader_cache(sscreen);
		FREE(sscreen);
		return NULL;
	}

	if (!util_queue_init(&sscreen->shader_compiler_queue_low_priority,
			     "shlo",
			     64, num_comp_lo_threads,
			     UTIL_QUEUE_INIT_RESIZE_IF_FULL |
			     UTIL_QUEUE_INIT_SET_FULL_THREAD_AFFINITY |
			     UTIL_QUEUE_INIT_USE_MINIMUM_PRIORITY)) {
	       si_destroy_shader_cache(sscreen);
	       FREE(sscreen);
	       return NULL;
	}

	if (!debug_get_bool_option("RADEON_DISABLE_PERFCOUNTERS", false))
		si_init_perfcounters(sscreen);

	/* Determine tessellation ring info. */
	bool double_offchip_buffers = sscreen->info.chip_class >= CIK &&
				      sscreen->info.family != CHIP_CARRIZO &&
				      sscreen->info.family != CHIP_STONEY;
	/* This must be one less than the maximum number due to a hw limitation.
	 * Various hardware bugs in SI, CIK, and GFX9 need this.
	 */
	unsigned max_offchip_buffers_per_se;

	/* Only certain chips can use the maximum value. */
	if (sscreen->info.family == CHIP_VEGA12 ||
	    sscreen->info.family == CHIP_VEGA20)
		max_offchip_buffers_per_se = double_offchip_buffers ? 128 : 64;
	else
		max_offchip_buffers_per_se = double_offchip_buffers ? 127 : 63;

	unsigned max_offchip_buffers = max_offchip_buffers_per_se *
				       sscreen->info.max_se;
	unsigned offchip_granularity;

	/* Hawaii has a bug with offchip buffers > 256 that can be worked
	 * around by setting 4K granularity.
	 */
	if (sscreen->info.family == CHIP_HAWAII) {
		sscreen->tess_offchip_block_dw_size = 4096;
		offchip_granularity = V_03093C_X_4K_DWORDS;
	} else {
		sscreen->tess_offchip_block_dw_size = 8192;
		offchip_granularity = V_03093C_X_8K_DWORDS;
	}

	sscreen->tess_factor_ring_size = 32768 * sscreen->info.max_se;
	assert(((sscreen->tess_factor_ring_size / 4) & C_030938_SIZE) == 0);
	sscreen->tess_offchip_ring_size = max_offchip_buffers *
					  sscreen->tess_offchip_block_dw_size * 4;

	if (sscreen->info.chip_class >= CIK) {
		if (sscreen->info.chip_class >= VI)
			--max_offchip_buffers;
		sscreen->vgt_hs_offchip_param =
			S_03093C_OFFCHIP_BUFFERING(max_offchip_buffers) |
			S_03093C_OFFCHIP_GRANULARITY(offchip_granularity);
	} else {
		assert(offchip_granularity == V_03093C_X_8K_DWORDS);
		sscreen->vgt_hs_offchip_param =
			S_0089B0_OFFCHIP_BUFFERING(max_offchip_buffers);
	}

	/* The mere presense of CLEAR_STATE in the IB causes random GPU hangs
        * on SI. Some CLEAR_STATE cause asic hang on radeon kernel, etc.
        * SPI_VS_OUT_CONFIG. So only enable CI CLEAR_STATE on amdgpu kernel.*/
       sscreen->has_clear_state = sscreen->info.chip_class >= CIK &&
                                  sscreen->info.drm_major == 3;

	sscreen->has_distributed_tess =
		sscreen->info.chip_class >= VI &&
		sscreen->info.max_se >= 2;

	sscreen->has_draw_indirect_multi =
		(sscreen->info.family >= CHIP_POLARIS10) ||
		(sscreen->info.chip_class == VI &&
		 sscreen->info.pfp_fw_version >= 121 &&
		 sscreen->info.me_fw_version >= 87) ||
		(sscreen->info.chip_class == CIK &&
		 sscreen->info.pfp_fw_version >= 211 &&
		 sscreen->info.me_fw_version >= 173) ||
		(sscreen->info.chip_class == SI &&
		 sscreen->info.pfp_fw_version >= 79 &&
		 sscreen->info.me_fw_version >= 142);

	sscreen->has_out_of_order_rast = sscreen->info.chip_class >= VI &&
					 sscreen->info.max_se >= 2 &&
					 !(sscreen->debug_flags & DBG(NO_OUT_OF_ORDER));
	sscreen->assume_no_z_fights =
		driQueryOptionb(config->options, "radeonsi_assume_no_z_fights");
	sscreen->commutative_blend_add =
		driQueryOptionb(config->options, "radeonsi_commutative_blend_add");

	{
#define OPT_BOOL(name, dflt, description) \
		sscreen->options.name = \
			driQueryOptionb(config->options, "radeonsi_"#name);
#include "si_debug_options.h"
	}

	sscreen->has_gfx9_scissor_bug = sscreen->info.family == CHIP_VEGA10 ||
					sscreen->info.family == CHIP_RAVEN;
	sscreen->has_msaa_sample_loc_bug = (sscreen->info.family >= CHIP_POLARIS10 &&
					    sscreen->info.family <= CHIP_POLARIS12) ||
					   sscreen->info.family == CHIP_VEGA10 ||
					   sscreen->info.family == CHIP_RAVEN;
	sscreen->has_ls_vgpr_init_bug = sscreen->info.family == CHIP_VEGA10 ||
					sscreen->info.family == CHIP_RAVEN;
	sscreen->has_dcc_constant_encode = sscreen->info.family == CHIP_RAVEN2;

	/* Only enable primitive binning on APUs by default. */
	sscreen->dpbb_allowed = sscreen->info.family == CHIP_RAVEN ||
				sscreen->info.family == CHIP_RAVEN2;

	sscreen->dfsm_allowed = sscreen->info.family == CHIP_RAVEN ||
				sscreen->info.family == CHIP_RAVEN2;

	/* Process DPBB enable flags. */
	if (sscreen->debug_flags & DBG(DPBB)) {
		sscreen->dpbb_allowed = true;
		if (sscreen->debug_flags & DBG(DFSM))
			sscreen->dfsm_allowed = true;
	}

	/* Process DPBB disable flags. */
	if (sscreen->debug_flags & DBG(NO_DPBB)) {
		sscreen->dpbb_allowed = false;
		sscreen->dfsm_allowed = false;
	} else if (sscreen->debug_flags & DBG(NO_DFSM)) {
		sscreen->dfsm_allowed = false;
	}

	/* While it would be nice not to have this flag, we are constrained
	 * by the reality that LLVM 5.0 doesn't have working VGPR indexing
	 * on GFX9.
	 */
	sscreen->llvm_has_working_vgpr_indexing = sscreen->info.chip_class <= VI;

	/* Some chips have RB+ registers, but don't support RB+. Those must
	 * always disable it.
	 */
	if (sscreen->info.family == CHIP_STONEY ||
	    sscreen->info.chip_class >= GFX9) {
		sscreen->has_rbplus = true;

		sscreen->rbplus_allowed =
			!(sscreen->debug_flags & DBG(NO_RB_PLUS)) &&
			(sscreen->info.family == CHIP_STONEY ||
			 sscreen->info.family == CHIP_VEGA12 ||
			 sscreen->info.family == CHIP_RAVEN ||
			 sscreen->info.family == CHIP_RAVEN2);
	}

	sscreen->dcc_msaa_allowed =
		!(sscreen->debug_flags & DBG(NO_DCC_MSAA));

	sscreen->cpdma_prefetch_writes_memory = sscreen->info.chip_class <= VI;

	(void) mtx_init(&sscreen->shader_parts_mutex, mtx_plain);
	sscreen->use_monolithic_shaders =
		(sscreen->debug_flags & DBG(MONOLITHIC_SHADERS)) != 0;

	sscreen->barrier_flags.cp_to_L2 = SI_CONTEXT_INV_SMEM_L1 |
					    SI_CONTEXT_INV_VMEM_L1;
	if (sscreen->info.chip_class <= VI) {
		sscreen->barrier_flags.cp_to_L2 |= SI_CONTEXT_INV_GLOBAL_L2;
		sscreen->barrier_flags.L2_to_cp |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
	}

	if (debug_get_bool_option("RADEON_DUMP_SHADERS", false))
		sscreen->debug_flags |= DBG_ALL_SHADERS;

	/* Syntax:
	 *     EQAA=s,z,c
	 * Example:
	 *     EQAA=8,4,2

	 * That means 8 coverage samples, 4 Z/S samples, and 2 color samples.
	 * Constraints:
	 *     s >= z >= c (ignoring this only wastes memory)
	 *     s = [2..16]
	 *     z = [2..8]
	 *     c = [2..8]
	 *
	 * Only MSAA color and depth buffers are overriden.
	 */
	if (sscreen->info.has_eqaa_surface_allocator) {
		const char *eqaa = debug_get_option("EQAA", NULL);
		unsigned s,z,f;

		if (eqaa && sscanf(eqaa, "%u,%u,%u", &s, &z, &f) == 3 && s && z && f) {
			sscreen->eqaa_force_coverage_samples = s;
			sscreen->eqaa_force_z_samples = z;
			sscreen->eqaa_force_color_samples = f;
		}
	}

	for (i = 0; i < num_comp_hi_threads; i++)
		si_init_compiler(sscreen, &sscreen->compiler[i]);
	for (i = 0; i < num_comp_lo_threads; i++)
		si_init_compiler(sscreen, &sscreen->compiler_lowp[i]);

	/* Create the auxiliary context. This must be done last. */
	sscreen->aux_context = si_create_context(
		&sscreen->b, sscreen->options.aux_debug ? PIPE_CONTEXT_DEBUG : 0);
	if (sscreen->options.aux_debug) {
		struct u_log_context *log = CALLOC_STRUCT(u_log_context);
		u_log_context_init(log);
		sscreen->aux_context->set_log_context(sscreen->aux_context, log);
	}

	if (sscreen->debug_flags & DBG(TEST_DMA))
		si_test_dma(sscreen);

	if (sscreen->debug_flags & DBG(TEST_DMA_PERF)) {
		si_test_dma_perf(sscreen);
	}

	if (sscreen->debug_flags & (DBG(TEST_VMFAULT_CP) |
				      DBG(TEST_VMFAULT_SDMA) |
				      DBG(TEST_VMFAULT_SHADER)))
		si_test_vmfault(sscreen);

	if (sscreen->debug_flags & DBG(TEST_GDS))
		si_test_gds((struct si_context*)sscreen->aux_context);

	if (sscreen->debug_flags & DBG(TEST_GDS_MM)) {
		si_test_gds_memory_management((struct si_context*)sscreen->aux_context,
					      32 * 1024, 4, RADEON_DOMAIN_GDS);
	}
	if (sscreen->debug_flags & DBG(TEST_GDS_OA_MM)) {
		si_test_gds_memory_management((struct si_context*)sscreen->aux_context,
					      4, 1, RADEON_DOMAIN_OA);
	}

	return &sscreen->b;
}