Beispiel #1
0
void si_init_config(struct radv_physical_device *physical_device,
		    struct radv_cmd_buffer *cmd_buffer)
{
	unsigned num_rb = MIN2(physical_device->rad_info.num_render_backends, 16);
	unsigned rb_mask = physical_device->rad_info.enabled_rb_mask;
	unsigned raster_config, raster_config_1;
	int i;
	struct radeon_winsys_cs *cs = cmd_buffer->cs;
	radeon_emit(cs, PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
	radeon_emit(cs, CONTEXT_CONTROL_LOAD_ENABLE(1));
	radeon_emit(cs, CONTEXT_CONTROL_SHADOW_ENABLE(1));

	radeon_set_context_reg(cs, R_028A18_VGT_HOS_MAX_TESS_LEVEL, fui(64));
	radeon_set_context_reg(cs, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, fui(0));

	/* FIXME calculate these values somehow ??? */
	radeon_set_context_reg(cs, R_028A54_VGT_GS_PER_ES, SI_GS_PER_ES);
	radeon_set_context_reg(cs, R_028A58_VGT_ES_PER_GS, 0x40);
	radeon_set_context_reg(cs, R_028A5C_VGT_GS_PER_VS, 0x2);

	radeon_set_context_reg(cs, R_028A8C_VGT_PRIMITIVEID_RESET, 0x0);
	radeon_set_context_reg(cs, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0);

	radeon_set_context_reg(cs, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0);
	radeon_set_context_reg(cs, R_028AB8_VGT_VTX_CNT_EN, 0x0);
	if (physical_device->rad_info.chip_class < CIK)
		radeon_set_config_reg(cs, R_008A14_PA_CL_ENHANCE, S_008A14_NUM_CLIP_SEQ(3) |
				      S_008A14_CLIP_VTX_REORDER_ENA(1));

	radeon_set_context_reg(cs, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 0x76543210);
	radeon_set_context_reg(cs, R_028BD8_PA_SC_CENTROID_PRIORITY_1, 0xfedcba98);

	radeon_set_context_reg(cs, R_02882C_PA_SU_PRIM_FILTER_CNTL, 0);

	for (i = 0; i < 16; i++) {
		radeon_set_context_reg(cs, R_0282D0_PA_SC_VPORT_ZMIN_0 + i*8, 0);
		radeon_set_context_reg(cs, R_0282D4_PA_SC_VPORT_ZMAX_0 + i*8, fui(1.0));
	}

	switch (physical_device->rad_info.family) {
	case CHIP_TAHITI:
	case CHIP_PITCAIRN:
		raster_config = 0x2a00126a;
		raster_config_1 = 0x00000000;
		break;
	case CHIP_VERDE:
		raster_config = 0x0000124a;
		raster_config_1 = 0x00000000;
		break;
	case CHIP_OLAND:
		raster_config = 0x00000082;
		raster_config_1 = 0x00000000;
		break;
	case CHIP_HAINAN:
		raster_config = 0x00000000;
		raster_config_1 = 0x00000000;
		break;
	case CHIP_BONAIRE:
		raster_config = 0x16000012;
		raster_config_1 = 0x00000000;
		break;
	case CHIP_HAWAII:
		raster_config = 0x3a00161a;
		raster_config_1 = 0x0000002e;
		break;
	case CHIP_FIJI:
		if (physical_device->rad_info.cik_macrotile_mode_array[0] == 0x000000e8) {
			/* old kernels with old tiling config */
			raster_config = 0x16000012;
			raster_config_1 = 0x0000002a;
		} else {
			raster_config = 0x3a00161a;
			raster_config_1 = 0x0000002e;
		}
		break;
	case CHIP_POLARIS10:
		raster_config = 0x16000012;
		raster_config_1 = 0x0000002a;
		break;
	case CHIP_POLARIS11:
		raster_config = 0x16000012;
		raster_config_1 = 0x00000000;
		break;
	case CHIP_TONGA:
		raster_config = 0x16000012;
		raster_config_1 = 0x0000002a;
		break;
	case CHIP_ICELAND:
		if (num_rb == 1)
			raster_config = 0x00000000;
		else
			raster_config = 0x00000002;
		raster_config_1 = 0x00000000;
		break;
	case CHIP_CARRIZO:
		raster_config = 0x00000002;
		raster_config_1 = 0x00000000;
		break;
	case CHIP_KAVERI:
		/* KV should be 0x00000002, but that causes problems with radeon */
		raster_config = 0x00000000; /* 0x00000002 */
		raster_config_1 = 0x00000000;
		break;
	case CHIP_KABINI:
	case CHIP_MULLINS:
	case CHIP_STONEY:
		raster_config = 0x00000000;
		raster_config_1 = 0x00000000;
		break;
	default:
		fprintf(stderr,
			"radeonsi: Unknown GPU, using 0 for raster_config\n");
		raster_config = 0x00000000;
		raster_config_1 = 0x00000000;
		break;
	}

	/* Always use the default config when all backends are enabled
	 * (or when we failed to determine the enabled backends).
	 */
	if (!rb_mask || util_bitcount(rb_mask) >= num_rb) {
		radeon_set_context_reg(cs, R_028350_PA_SC_RASTER_CONFIG,
				       raster_config);
		if (physical_device->rad_info.chip_class >= CIK)
			radeon_set_context_reg(cs, R_028354_PA_SC_RASTER_CONFIG_1,
					       raster_config_1);
	} else {
		si_write_harvested_raster_configs(physical_device, cs, raster_config, raster_config_1);
	}

	radeon_set_context_reg(cs, R_028204_PA_SC_WINDOW_SCISSOR_TL, S_028204_WINDOW_OFFSET_DISABLE(1));
	radeon_set_context_reg(cs, R_028240_PA_SC_GENERIC_SCISSOR_TL, S_028240_WINDOW_OFFSET_DISABLE(1));
	radeon_set_context_reg(cs, R_028244_PA_SC_GENERIC_SCISSOR_BR,
			       S_028244_BR_X(16384) | S_028244_BR_Y(16384));
	radeon_set_context_reg(cs, R_028030_PA_SC_SCREEN_SCISSOR_TL, 0);
	radeon_set_context_reg(cs, R_028034_PA_SC_SCREEN_SCISSOR_BR,
			       S_028034_BR_X(16384) | S_028034_BR_Y(16384));

	radeon_set_context_reg(cs, R_02820C_PA_SC_CLIPRECT_RULE, 0xFFFF);
	radeon_set_context_reg(cs, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA);
	/* PA_SU_HARDWARE_SCREEN_OFFSET must be 0 due to hw bug on SI */
	radeon_set_context_reg(cs, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0);
	radeon_set_context_reg(cs, R_028820_PA_CL_NANINF_CNTL, 0);

	radeon_set_context_reg(cs, R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, fui(1.0));
	radeon_set_context_reg(cs, R_028BEC_PA_CL_GB_VERT_DISC_ADJ, fui(1.0));
	radeon_set_context_reg(cs, R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ, fui(1.0));
	radeon_set_context_reg(cs, R_028BF4_PA_CL_GB_HORZ_DISC_ADJ, fui(1.0));

	radeon_set_context_reg(cs, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0);
	radeon_set_context_reg(cs, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0);
	radeon_set_context_reg(cs, R_028AC8_DB_PRELOAD_CONTROL, 0x0);
	radeon_set_context_reg(cs, R_02800C_DB_RENDER_OVERRIDE,
			       S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) |
			       S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE));

	radeon_set_context_reg(cs, R_028400_VGT_MAX_VTX_INDX, ~0);
	radeon_set_context_reg(cs, R_028404_VGT_MIN_VTX_INDX, 0);
	radeon_set_context_reg(cs, R_028408_VGT_INDX_OFFSET, 0);

	if (physical_device->rad_info.chip_class >= CIK) {
		radeon_set_sh_reg(cs, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, 0);
		radeon_set_sh_reg(cs, R_00B31C_SPI_SHADER_PGM_RSRC3_ES, S_00B31C_CU_EN(0xffff));
		radeon_set_sh_reg(cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, S_00B21C_CU_EN(0xffff));

		if (physical_device->rad_info.num_good_compute_units /
		    (physical_device->rad_info.max_se * physical_device->rad_info.max_sh_per_se) <= 4) {
			/* Too few available compute units per SH. Disallowing
			 * VS to run on CU0 could hurt us more than late VS
			 * allocation would help.
			 *
			 * LATE_ALLOC_VS = 2 is the highest safe number.
			 */
			radeon_set_sh_reg(cs, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, S_00B51C_CU_EN(0xffff));
			radeon_set_sh_reg(cs, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xffff));
			radeon_set_sh_reg(cs, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(2));
		} else {
			/* Set LATE_ALLOC_VS == 31. It should be less than
			 * the number of scratch waves. Limitations:
			 * - VS can't execute on CU0.
			 * - If HS writes outputs to LDS, LS can't execute on CU0.
			 */
			radeon_set_sh_reg(cs, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, S_00B51C_CU_EN(0xfffe));
			radeon_set_sh_reg(cs, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xfffe));
			radeon_set_sh_reg(cs, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(31));
		}

		radeon_set_sh_reg(cs, R_00B01C_SPI_SHADER_PGM_RSRC3_PS, S_00B01C_CU_EN(0xffff));
	}

	if (physical_device->rad_info.chip_class >= VI) {
		radeon_set_context_reg(cs, R_028424_CB_DCC_CONTROL,
				       S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(1) |
				       S_028424_OVERWRITE_COMBINER_WATERMARK(4));
		radeon_set_context_reg(cs, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 30);
		radeon_set_context_reg(cs, R_028C5C_VGT_OUT_DEALLOC_CNTL, 32);
		radeon_set_context_reg(cs, R_028B50_VGT_TESS_DISTRIBUTION,
				       S_028B50_ACCUM_ISOLINE(32) |
				       S_028B50_ACCUM_TRI(11) |
				       S_028B50_ACCUM_QUAD(11) |
				       S_028B50_DONUT_SPLIT(16));
	} else {
		radeon_set_context_reg(cs, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14);
		radeon_set_context_reg(cs, R_028C5C_VGT_OUT_DEALLOC_CNTL, 16);
	}

	if (physical_device->rad_info.family == CHIP_STONEY)
		radeon_set_context_reg(cs, R_028C40_PA_SC_SHADER_CONTROL, 0);

	si_init_compute(physical_device, cs);
}
Beispiel #2
0
static void si_pipe_shader_vs(struct pipe_context *ctx, struct si_pipe_shader *shader)
{
	struct r600_context *rctx = (struct r600_context *)ctx;
	struct si_pm4_state *pm4;
	unsigned num_sgprs, num_user_sgprs;
	unsigned nparams, i, vgpr_comp_cnt;
	uint64_t va;

	si_pm4_delete_state(rctx, vs, shader->pm4);
	pm4 = shader->pm4 = si_pm4_alloc_state(rctx);

	if (pm4 == NULL)
		return;

	/* Certain attributes (position, psize, etc.) don't count as params.
	 * VS is required to export at least one param and r600_shader_from_tgsi()
	 * takes care of adding a dummy export.
	 */
	for (nparams = 0, i = 0 ; i < shader->shader.noutput; i++) {
		switch (shader->shader.output[i].name) {
		case TGSI_SEMANTIC_CLIPVERTEX:
		case TGSI_SEMANTIC_POSITION:
		case TGSI_SEMANTIC_PSIZE:
			break;
		default:
			nparams++;
		}
	}
	if (nparams < 1)
		nparams = 1;

	si_pm4_set_reg(pm4, R_0286C4_SPI_VS_OUT_CONFIG,
		       S_0286C4_VS_EXPORT_COUNT(nparams - 1));

	si_pm4_set_reg(pm4, R_02870C_SPI_SHADER_POS_FORMAT,
		       S_02870C_POS0_EXPORT_FORMAT(V_02870C_SPI_SHADER_4COMP) |
		       S_02870C_POS1_EXPORT_FORMAT(shader->shader.nr_pos_exports > 1 ?
						   V_02870C_SPI_SHADER_4COMP :
						   V_02870C_SPI_SHADER_NONE) |
		       S_02870C_POS2_EXPORT_FORMAT(shader->shader.nr_pos_exports > 2 ?
						   V_02870C_SPI_SHADER_4COMP :
						   V_02870C_SPI_SHADER_NONE) |
		       S_02870C_POS3_EXPORT_FORMAT(shader->shader.nr_pos_exports > 3 ?
						   V_02870C_SPI_SHADER_4COMP :
						   V_02870C_SPI_SHADER_NONE));

	va = r600_resource_va(ctx->screen, (void *)shader->bo);
	si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ);
	si_pm4_set_reg(pm4, R_00B120_SPI_SHADER_PGM_LO_VS, va >> 8);
	si_pm4_set_reg(pm4, R_00B124_SPI_SHADER_PGM_HI_VS, va >> 40);

	num_user_sgprs = SI_VS_NUM_USER_SGPR;
	num_sgprs = shader->num_sgprs;
	if (num_user_sgprs > num_sgprs) {
		/* Last 2 reserved SGPRs are used for VCC */
		num_sgprs = num_user_sgprs + 2;
	}
	assert(num_sgprs <= 104);

	vgpr_comp_cnt = shader->shader.uses_instanceid ? 3 : 0;

	si_pm4_set_reg(pm4, R_00B128_SPI_SHADER_PGM_RSRC1_VS,
		       S_00B128_VGPRS((shader->num_vgprs - 1) / 4) |
		       S_00B128_SGPRS((num_sgprs - 1) / 8) |
		       S_00B128_VGPR_COMP_CNT(vgpr_comp_cnt));
	si_pm4_set_reg(pm4, R_00B12C_SPI_SHADER_PGM_RSRC2_VS,
		       S_00B12C_USER_SGPR(num_user_sgprs) |
		       S_00B12C_SO_BASE0_EN(!!shader->selector->so.stride[0]) |
		       S_00B12C_SO_BASE1_EN(!!shader->selector->so.stride[1]) |
		       S_00B12C_SO_BASE2_EN(!!shader->selector->so.stride[2]) |
		       S_00B12C_SO_BASE3_EN(!!shader->selector->so.stride[3]) |
		       S_00B12C_SO_EN(!!shader->selector->so.num_outputs));

	if (rctx->b.chip_class >= CIK) {
		si_pm4_set_reg(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS,
			       S_00B118_CU_EN(0xffff));
		si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS,
			       S_00B11C_LIMIT(0));
	}

	si_pm4_bind_state(rctx, vs, shader->pm4);
	rctx->b.flags |= R600_CONTEXT_INV_SHADER_CACHE;
}