void si_init_config(struct radv_physical_device *physical_device, struct radv_cmd_buffer *cmd_buffer) { unsigned num_rb = MIN2(physical_device->rad_info.num_render_backends, 16); unsigned rb_mask = physical_device->rad_info.enabled_rb_mask; unsigned raster_config, raster_config_1; int i; struct radeon_winsys_cs *cs = cmd_buffer->cs; radeon_emit(cs, PKT3(PKT3_CONTEXT_CONTROL, 1, 0)); radeon_emit(cs, CONTEXT_CONTROL_LOAD_ENABLE(1)); radeon_emit(cs, CONTEXT_CONTROL_SHADOW_ENABLE(1)); radeon_set_context_reg(cs, R_028A18_VGT_HOS_MAX_TESS_LEVEL, fui(64)); radeon_set_context_reg(cs, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, fui(0)); /* FIXME calculate these values somehow ??? */ radeon_set_context_reg(cs, R_028A54_VGT_GS_PER_ES, SI_GS_PER_ES); radeon_set_context_reg(cs, R_028A58_VGT_ES_PER_GS, 0x40); radeon_set_context_reg(cs, R_028A5C_VGT_GS_PER_VS, 0x2); radeon_set_context_reg(cs, R_028A8C_VGT_PRIMITIVEID_RESET, 0x0); radeon_set_context_reg(cs, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0); radeon_set_context_reg(cs, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0); radeon_set_context_reg(cs, R_028AB8_VGT_VTX_CNT_EN, 0x0); if (physical_device->rad_info.chip_class < CIK) radeon_set_config_reg(cs, R_008A14_PA_CL_ENHANCE, S_008A14_NUM_CLIP_SEQ(3) | S_008A14_CLIP_VTX_REORDER_ENA(1)); radeon_set_context_reg(cs, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 0x76543210); radeon_set_context_reg(cs, R_028BD8_PA_SC_CENTROID_PRIORITY_1, 0xfedcba98); radeon_set_context_reg(cs, R_02882C_PA_SU_PRIM_FILTER_CNTL, 0); for (i = 0; i < 16; i++) { radeon_set_context_reg(cs, R_0282D0_PA_SC_VPORT_ZMIN_0 + i*8, 0); radeon_set_context_reg(cs, R_0282D4_PA_SC_VPORT_ZMAX_0 + i*8, fui(1.0)); } switch (physical_device->rad_info.family) { case CHIP_TAHITI: case CHIP_PITCAIRN: raster_config = 0x2a00126a; raster_config_1 = 0x00000000; break; case CHIP_VERDE: raster_config = 0x0000124a; raster_config_1 = 0x00000000; break; case CHIP_OLAND: raster_config = 0x00000082; raster_config_1 = 0x00000000; break; case CHIP_HAINAN: raster_config = 0x00000000; raster_config_1 = 0x00000000; break; case CHIP_BONAIRE: raster_config = 0x16000012; raster_config_1 = 0x00000000; break; case CHIP_HAWAII: raster_config = 0x3a00161a; raster_config_1 = 0x0000002e; break; case CHIP_FIJI: if (physical_device->rad_info.cik_macrotile_mode_array[0] == 0x000000e8) { /* old kernels with old tiling config */ raster_config = 0x16000012; raster_config_1 = 0x0000002a; } else { raster_config = 0x3a00161a; raster_config_1 = 0x0000002e; } break; case CHIP_POLARIS10: raster_config = 0x16000012; raster_config_1 = 0x0000002a; break; case CHIP_POLARIS11: raster_config = 0x16000012; raster_config_1 = 0x00000000; break; case CHIP_TONGA: raster_config = 0x16000012; raster_config_1 = 0x0000002a; break; case CHIP_ICELAND: if (num_rb == 1) raster_config = 0x00000000; else raster_config = 0x00000002; raster_config_1 = 0x00000000; break; case CHIP_CARRIZO: raster_config = 0x00000002; raster_config_1 = 0x00000000; break; case CHIP_KAVERI: /* KV should be 0x00000002, but that causes problems with radeon */ raster_config = 0x00000000; /* 0x00000002 */ raster_config_1 = 0x00000000; break; case CHIP_KABINI: case CHIP_MULLINS: case CHIP_STONEY: raster_config = 0x00000000; raster_config_1 = 0x00000000; break; default: fprintf(stderr, "radeonsi: Unknown GPU, using 0 for raster_config\n"); raster_config = 0x00000000; raster_config_1 = 0x00000000; break; } /* Always use the default config when all backends are enabled * (or when we failed to determine the enabled backends). */ if (!rb_mask || util_bitcount(rb_mask) >= num_rb) { radeon_set_context_reg(cs, R_028350_PA_SC_RASTER_CONFIG, raster_config); if (physical_device->rad_info.chip_class >= CIK) radeon_set_context_reg(cs, R_028354_PA_SC_RASTER_CONFIG_1, raster_config_1); } else { si_write_harvested_raster_configs(physical_device, cs, raster_config, raster_config_1); } radeon_set_context_reg(cs, R_028204_PA_SC_WINDOW_SCISSOR_TL, S_028204_WINDOW_OFFSET_DISABLE(1)); radeon_set_context_reg(cs, R_028240_PA_SC_GENERIC_SCISSOR_TL, S_028240_WINDOW_OFFSET_DISABLE(1)); radeon_set_context_reg(cs, R_028244_PA_SC_GENERIC_SCISSOR_BR, S_028244_BR_X(16384) | S_028244_BR_Y(16384)); radeon_set_context_reg(cs, R_028030_PA_SC_SCREEN_SCISSOR_TL, 0); radeon_set_context_reg(cs, R_028034_PA_SC_SCREEN_SCISSOR_BR, S_028034_BR_X(16384) | S_028034_BR_Y(16384)); radeon_set_context_reg(cs, R_02820C_PA_SC_CLIPRECT_RULE, 0xFFFF); radeon_set_context_reg(cs, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA); /* PA_SU_HARDWARE_SCREEN_OFFSET must be 0 due to hw bug on SI */ radeon_set_context_reg(cs, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0); radeon_set_context_reg(cs, R_028820_PA_CL_NANINF_CNTL, 0); radeon_set_context_reg(cs, R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, fui(1.0)); radeon_set_context_reg(cs, R_028BEC_PA_CL_GB_VERT_DISC_ADJ, fui(1.0)); radeon_set_context_reg(cs, R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ, fui(1.0)); radeon_set_context_reg(cs, R_028BF4_PA_CL_GB_HORZ_DISC_ADJ, fui(1.0)); radeon_set_context_reg(cs, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0); radeon_set_context_reg(cs, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0); radeon_set_context_reg(cs, R_028AC8_DB_PRELOAD_CONTROL, 0x0); radeon_set_context_reg(cs, R_02800C_DB_RENDER_OVERRIDE, S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) | S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE)); radeon_set_context_reg(cs, R_028400_VGT_MAX_VTX_INDX, ~0); radeon_set_context_reg(cs, R_028404_VGT_MIN_VTX_INDX, 0); radeon_set_context_reg(cs, R_028408_VGT_INDX_OFFSET, 0); if (physical_device->rad_info.chip_class >= CIK) { radeon_set_sh_reg(cs, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, 0); radeon_set_sh_reg(cs, R_00B31C_SPI_SHADER_PGM_RSRC3_ES, S_00B31C_CU_EN(0xffff)); radeon_set_sh_reg(cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, S_00B21C_CU_EN(0xffff)); if (physical_device->rad_info.num_good_compute_units / (physical_device->rad_info.max_se * physical_device->rad_info.max_sh_per_se) <= 4) { /* Too few available compute units per SH. Disallowing * VS to run on CU0 could hurt us more than late VS * allocation would help. * * LATE_ALLOC_VS = 2 is the highest safe number. */ radeon_set_sh_reg(cs, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, S_00B51C_CU_EN(0xffff)); radeon_set_sh_reg(cs, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xffff)); radeon_set_sh_reg(cs, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(2)); } else { /* Set LATE_ALLOC_VS == 31. It should be less than * the number of scratch waves. Limitations: * - VS can't execute on CU0. * - If HS writes outputs to LDS, LS can't execute on CU0. */ radeon_set_sh_reg(cs, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, S_00B51C_CU_EN(0xfffe)); radeon_set_sh_reg(cs, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xfffe)); radeon_set_sh_reg(cs, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(31)); } radeon_set_sh_reg(cs, R_00B01C_SPI_SHADER_PGM_RSRC3_PS, S_00B01C_CU_EN(0xffff)); } if (physical_device->rad_info.chip_class >= VI) { radeon_set_context_reg(cs, R_028424_CB_DCC_CONTROL, S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(1) | S_028424_OVERWRITE_COMBINER_WATERMARK(4)); radeon_set_context_reg(cs, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 30); radeon_set_context_reg(cs, R_028C5C_VGT_OUT_DEALLOC_CNTL, 32); radeon_set_context_reg(cs, R_028B50_VGT_TESS_DISTRIBUTION, S_028B50_ACCUM_ISOLINE(32) | S_028B50_ACCUM_TRI(11) | S_028B50_ACCUM_QUAD(11) | S_028B50_DONUT_SPLIT(16)); } else { radeon_set_context_reg(cs, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14); radeon_set_context_reg(cs, R_028C5C_VGT_OUT_DEALLOC_CNTL, 16); } if (physical_device->rad_info.family == CHIP_STONEY) radeon_set_context_reg(cs, R_028C40_PA_SC_SHADER_CONTROL, 0); si_init_compute(physical_device, cs); }
static void si_pipe_shader_vs(struct pipe_context *ctx, struct si_pipe_shader *shader) { struct r600_context *rctx = (struct r600_context *)ctx; struct si_pm4_state *pm4; unsigned num_sgprs, num_user_sgprs; unsigned nparams, i, vgpr_comp_cnt; uint64_t va; si_pm4_delete_state(rctx, vs, shader->pm4); pm4 = shader->pm4 = si_pm4_alloc_state(rctx); if (pm4 == NULL) return; /* Certain attributes (position, psize, etc.) don't count as params. * VS is required to export at least one param and r600_shader_from_tgsi() * takes care of adding a dummy export. */ for (nparams = 0, i = 0 ; i < shader->shader.noutput; i++) { switch (shader->shader.output[i].name) { case TGSI_SEMANTIC_CLIPVERTEX: case TGSI_SEMANTIC_POSITION: case TGSI_SEMANTIC_PSIZE: break; default: nparams++; } } if (nparams < 1) nparams = 1; si_pm4_set_reg(pm4, R_0286C4_SPI_VS_OUT_CONFIG, S_0286C4_VS_EXPORT_COUNT(nparams - 1)); si_pm4_set_reg(pm4, R_02870C_SPI_SHADER_POS_FORMAT, S_02870C_POS0_EXPORT_FORMAT(V_02870C_SPI_SHADER_4COMP) | S_02870C_POS1_EXPORT_FORMAT(shader->shader.nr_pos_exports > 1 ? V_02870C_SPI_SHADER_4COMP : V_02870C_SPI_SHADER_NONE) | S_02870C_POS2_EXPORT_FORMAT(shader->shader.nr_pos_exports > 2 ? V_02870C_SPI_SHADER_4COMP : V_02870C_SPI_SHADER_NONE) | S_02870C_POS3_EXPORT_FORMAT(shader->shader.nr_pos_exports > 3 ? V_02870C_SPI_SHADER_4COMP : V_02870C_SPI_SHADER_NONE)); va = r600_resource_va(ctx->screen, (void *)shader->bo); si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ); si_pm4_set_reg(pm4, R_00B120_SPI_SHADER_PGM_LO_VS, va >> 8); si_pm4_set_reg(pm4, R_00B124_SPI_SHADER_PGM_HI_VS, va >> 40); num_user_sgprs = SI_VS_NUM_USER_SGPR; num_sgprs = shader->num_sgprs; if (num_user_sgprs > num_sgprs) { /* Last 2 reserved SGPRs are used for VCC */ num_sgprs = num_user_sgprs + 2; } assert(num_sgprs <= 104); vgpr_comp_cnt = shader->shader.uses_instanceid ? 3 : 0; si_pm4_set_reg(pm4, R_00B128_SPI_SHADER_PGM_RSRC1_VS, S_00B128_VGPRS((shader->num_vgprs - 1) / 4) | S_00B128_SGPRS((num_sgprs - 1) / 8) | S_00B128_VGPR_COMP_CNT(vgpr_comp_cnt)); si_pm4_set_reg(pm4, R_00B12C_SPI_SHADER_PGM_RSRC2_VS, S_00B12C_USER_SGPR(num_user_sgprs) | S_00B12C_SO_BASE0_EN(!!shader->selector->so.stride[0]) | S_00B12C_SO_BASE1_EN(!!shader->selector->so.stride[1]) | S_00B12C_SO_BASE2_EN(!!shader->selector->so.stride[2]) | S_00B12C_SO_BASE3_EN(!!shader->selector->so.stride[3]) | S_00B12C_SO_EN(!!shader->selector->so.num_outputs)); if (rctx->b.chip_class >= CIK) { si_pm4_set_reg(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xffff)); si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(0)); } si_pm4_bind_state(rctx, vs, shader->pm4); rctx->b.flags |= R600_CONTEXT_INV_SHADER_CACHE; }