void si_init_config(struct radv_physical_device *physical_device, struct radv_cmd_buffer *cmd_buffer) { unsigned num_rb = MIN2(physical_device->rad_info.num_render_backends, 16); unsigned rb_mask = physical_device->rad_info.enabled_rb_mask; unsigned raster_config, raster_config_1; int i; struct radeon_winsys_cs *cs = cmd_buffer->cs; radeon_emit(cs, PKT3(PKT3_CONTEXT_CONTROL, 1, 0)); radeon_emit(cs, CONTEXT_CONTROL_LOAD_ENABLE(1)); radeon_emit(cs, CONTEXT_CONTROL_SHADOW_ENABLE(1)); radeon_set_context_reg(cs, R_028A18_VGT_HOS_MAX_TESS_LEVEL, fui(64)); radeon_set_context_reg(cs, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, fui(0)); /* FIXME calculate these values somehow ??? */ radeon_set_context_reg(cs, R_028A54_VGT_GS_PER_ES, SI_GS_PER_ES); radeon_set_context_reg(cs, R_028A58_VGT_ES_PER_GS, 0x40); radeon_set_context_reg(cs, R_028A5C_VGT_GS_PER_VS, 0x2); radeon_set_context_reg(cs, R_028A8C_VGT_PRIMITIVEID_RESET, 0x0); radeon_set_context_reg(cs, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0); radeon_set_context_reg(cs, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0); radeon_set_context_reg(cs, R_028AB8_VGT_VTX_CNT_EN, 0x0); if (physical_device->rad_info.chip_class < CIK) radeon_set_config_reg(cs, R_008A14_PA_CL_ENHANCE, S_008A14_NUM_CLIP_SEQ(3) | S_008A14_CLIP_VTX_REORDER_ENA(1)); radeon_set_context_reg(cs, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 0x76543210); radeon_set_context_reg(cs, R_028BD8_PA_SC_CENTROID_PRIORITY_1, 0xfedcba98); radeon_set_context_reg(cs, R_02882C_PA_SU_PRIM_FILTER_CNTL, 0); for (i = 0; i < 16; i++) { radeon_set_context_reg(cs, R_0282D0_PA_SC_VPORT_ZMIN_0 + i*8, 0); radeon_set_context_reg(cs, R_0282D4_PA_SC_VPORT_ZMAX_0 + i*8, fui(1.0)); } switch (physical_device->rad_info.family) { case CHIP_TAHITI: case CHIP_PITCAIRN: raster_config = 0x2a00126a; raster_config_1 = 0x00000000; break; case CHIP_VERDE: raster_config = 0x0000124a; raster_config_1 = 0x00000000; break; case CHIP_OLAND: raster_config = 0x00000082; raster_config_1 = 0x00000000; break; case CHIP_HAINAN: raster_config = 0x00000000; raster_config_1 = 0x00000000; break; case CHIP_BONAIRE: raster_config = 0x16000012; raster_config_1 = 0x00000000; break; case CHIP_HAWAII: raster_config = 0x3a00161a; raster_config_1 = 0x0000002e; break; case CHIP_FIJI: if (physical_device->rad_info.cik_macrotile_mode_array[0] == 0x000000e8) { /* old kernels with old tiling config */ raster_config = 0x16000012; raster_config_1 = 0x0000002a; } else { raster_config = 0x3a00161a; raster_config_1 = 0x0000002e; } break; case CHIP_POLARIS10: raster_config = 0x16000012; raster_config_1 = 0x0000002a; break; case CHIP_POLARIS11: raster_config = 0x16000012; raster_config_1 = 0x00000000; break; case CHIP_TONGA: raster_config = 0x16000012; raster_config_1 = 0x0000002a; break; case CHIP_ICELAND: if (num_rb == 1) raster_config = 0x00000000; else raster_config = 0x00000002; raster_config_1 = 0x00000000; break; case CHIP_CARRIZO: raster_config = 0x00000002; raster_config_1 = 0x00000000; break; case CHIP_KAVERI: /* KV should be 0x00000002, but that causes problems with radeon */ raster_config = 0x00000000; /* 0x00000002 */ raster_config_1 = 0x00000000; break; case CHIP_KABINI: case CHIP_MULLINS: case CHIP_STONEY: raster_config = 0x00000000; raster_config_1 = 0x00000000; break; default: fprintf(stderr, "radeonsi: Unknown GPU, using 0 for raster_config\n"); raster_config = 0x00000000; raster_config_1 = 0x00000000; break; } /* Always use the default config when all backends are enabled * (or when we failed to determine the enabled backends). */ if (!rb_mask || util_bitcount(rb_mask) >= num_rb) { radeon_set_context_reg(cs, R_028350_PA_SC_RASTER_CONFIG, raster_config); if (physical_device->rad_info.chip_class >= CIK) radeon_set_context_reg(cs, R_028354_PA_SC_RASTER_CONFIG_1, raster_config_1); } else { si_write_harvested_raster_configs(physical_device, cs, raster_config, raster_config_1); } radeon_set_context_reg(cs, R_028204_PA_SC_WINDOW_SCISSOR_TL, S_028204_WINDOW_OFFSET_DISABLE(1)); radeon_set_context_reg(cs, R_028240_PA_SC_GENERIC_SCISSOR_TL, S_028240_WINDOW_OFFSET_DISABLE(1)); radeon_set_context_reg(cs, R_028244_PA_SC_GENERIC_SCISSOR_BR, S_028244_BR_X(16384) | S_028244_BR_Y(16384)); radeon_set_context_reg(cs, R_028030_PA_SC_SCREEN_SCISSOR_TL, 0); radeon_set_context_reg(cs, R_028034_PA_SC_SCREEN_SCISSOR_BR, S_028034_BR_X(16384) | S_028034_BR_Y(16384)); radeon_set_context_reg(cs, R_02820C_PA_SC_CLIPRECT_RULE, 0xFFFF); radeon_set_context_reg(cs, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA); /* PA_SU_HARDWARE_SCREEN_OFFSET must be 0 due to hw bug on SI */ radeon_set_context_reg(cs, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0); radeon_set_context_reg(cs, R_028820_PA_CL_NANINF_CNTL, 0); radeon_set_context_reg(cs, R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, fui(1.0)); radeon_set_context_reg(cs, R_028BEC_PA_CL_GB_VERT_DISC_ADJ, fui(1.0)); radeon_set_context_reg(cs, R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ, fui(1.0)); radeon_set_context_reg(cs, R_028BF4_PA_CL_GB_HORZ_DISC_ADJ, fui(1.0)); radeon_set_context_reg(cs, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0); radeon_set_context_reg(cs, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0); radeon_set_context_reg(cs, R_028AC8_DB_PRELOAD_CONTROL, 0x0); radeon_set_context_reg(cs, R_02800C_DB_RENDER_OVERRIDE, S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) | S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE)); radeon_set_context_reg(cs, R_028400_VGT_MAX_VTX_INDX, ~0); radeon_set_context_reg(cs, R_028404_VGT_MIN_VTX_INDX, 0); radeon_set_context_reg(cs, R_028408_VGT_INDX_OFFSET, 0); if (physical_device->rad_info.chip_class >= CIK) { radeon_set_sh_reg(cs, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, 0); radeon_set_sh_reg(cs, R_00B31C_SPI_SHADER_PGM_RSRC3_ES, S_00B31C_CU_EN(0xffff)); radeon_set_sh_reg(cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, S_00B21C_CU_EN(0xffff)); if (physical_device->rad_info.num_good_compute_units / (physical_device->rad_info.max_se * physical_device->rad_info.max_sh_per_se) <= 4) { /* Too few available compute units per SH. Disallowing * VS to run on CU0 could hurt us more than late VS * allocation would help. * * LATE_ALLOC_VS = 2 is the highest safe number. */ radeon_set_sh_reg(cs, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, S_00B51C_CU_EN(0xffff)); radeon_set_sh_reg(cs, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xffff)); radeon_set_sh_reg(cs, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(2)); } else { /* Set LATE_ALLOC_VS == 31. It should be less than * the number of scratch waves. Limitations: * - VS can't execute on CU0. * - If HS writes outputs to LDS, LS can't execute on CU0. */ radeon_set_sh_reg(cs, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, S_00B51C_CU_EN(0xfffe)); radeon_set_sh_reg(cs, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xfffe)); radeon_set_sh_reg(cs, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(31)); } radeon_set_sh_reg(cs, R_00B01C_SPI_SHADER_PGM_RSRC3_PS, S_00B01C_CU_EN(0xffff)); } if (physical_device->rad_info.chip_class >= VI) { radeon_set_context_reg(cs, R_028424_CB_DCC_CONTROL, S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(1) | S_028424_OVERWRITE_COMBINER_WATERMARK(4)); radeon_set_context_reg(cs, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 30); radeon_set_context_reg(cs, R_028C5C_VGT_OUT_DEALLOC_CNTL, 32); radeon_set_context_reg(cs, R_028B50_VGT_TESS_DISTRIBUTION, S_028B50_ACCUM_ISOLINE(32) | S_028B50_ACCUM_TRI(11) | S_028B50_ACCUM_QUAD(11) | S_028B50_DONUT_SPLIT(16)); } else { radeon_set_context_reg(cs, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14); radeon_set_context_reg(cs, R_028C5C_VGT_OUT_DEALLOC_CNTL, 16); } if (physical_device->rad_info.family == CHIP_STONEY) radeon_set_context_reg(cs, R_028C40_PA_SC_SHADER_CONTROL, 0); si_init_compute(physical_device, cs); }
static void si_pipe_shader_ps(struct pipe_context *ctx, struct si_pipe_shader *shader) { struct r600_context *rctx = (struct r600_context *)ctx; struct si_pm4_state *pm4; unsigned i, exports_ps, num_cout, spi_ps_in_control, db_shader_control; unsigned num_sgprs, num_user_sgprs; unsigned spi_baryc_cntl = 0, spi_ps_input_ena, spi_shader_z_format; uint64_t va; si_pm4_delete_state(rctx, ps, shader->pm4); pm4 = shader->pm4 = si_pm4_alloc_state(rctx); if (pm4 == NULL) return; db_shader_control = S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z) | S_02880C_ALPHA_TO_MASK_DISABLE(rctx->fb_cb0_is_integer); for (i = 0; i < shader->shader.ninput; i++) { switch (shader->shader.input[i].name) { case TGSI_SEMANTIC_POSITION: if (shader->shader.input[i].centroid) { /* SPI_BARYC_CNTL.POS_FLOAT_LOCATION * Possible vaules: * 0 -> Position = pixel center (default) * 1 -> Position = pixel centroid * 2 -> Position = iterated sample number XXX: * What does this mean? */ spi_baryc_cntl |= S_0286E0_POS_FLOAT_LOCATION(1); } /* Fall through */ case TGSI_SEMANTIC_FACE: continue; } } for (i = 0; i < shader->shader.noutput; i++) { if (shader->shader.output[i].name == TGSI_SEMANTIC_POSITION) db_shader_control |= S_02880C_Z_EXPORT_ENABLE(1); if (shader->shader.output[i].name == TGSI_SEMANTIC_STENCIL) db_shader_control |= S_02880C_STENCIL_TEST_VAL_EXPORT_ENABLE(1); } if (shader->shader.uses_kill || shader->key.ps.alpha_func != PIPE_FUNC_ALWAYS) db_shader_control |= S_02880C_KILL_ENABLE(1); exports_ps = 0; num_cout = 0; for (i = 0; i < shader->shader.noutput; i++) { if (shader->shader.output[i].name == TGSI_SEMANTIC_POSITION || shader->shader.output[i].name == TGSI_SEMANTIC_STENCIL) exports_ps |= 1; else if (shader->shader.output[i].name == TGSI_SEMANTIC_COLOR) { if (shader->shader.fs_write_all) num_cout = shader->shader.nr_cbufs; else num_cout++; } } if (!exports_ps) { /* always at least export 1 component per pixel */ exports_ps = 2; } spi_ps_in_control = S_0286D8_NUM_INTERP(shader->shader.ninterp) | S_0286D8_BC_OPTIMIZE_DISABLE(1); si_pm4_set_reg(pm4, R_0286E0_SPI_BARYC_CNTL, spi_baryc_cntl); spi_ps_input_ena = shader->spi_ps_input_ena; /* we need to enable at least one of them, otherwise we hang the GPU */ assert(G_0286CC_PERSP_SAMPLE_ENA(spi_ps_input_ena) || G_0286CC_PERSP_CENTER_ENA(spi_ps_input_ena) || G_0286CC_PERSP_CENTROID_ENA(spi_ps_input_ena) || G_0286CC_PERSP_PULL_MODEL_ENA(spi_ps_input_ena) || G_0286CC_LINEAR_SAMPLE_ENA(spi_ps_input_ena) || G_0286CC_LINEAR_CENTER_ENA(spi_ps_input_ena) || G_0286CC_LINEAR_CENTROID_ENA(spi_ps_input_ena) || G_0286CC_LINE_STIPPLE_TEX_ENA(spi_ps_input_ena)); si_pm4_set_reg(pm4, R_0286CC_SPI_PS_INPUT_ENA, spi_ps_input_ena); si_pm4_set_reg(pm4, R_0286D0_SPI_PS_INPUT_ADDR, spi_ps_input_ena); si_pm4_set_reg(pm4, R_0286D8_SPI_PS_IN_CONTROL, spi_ps_in_control); if (G_02880C_STENCIL_TEST_VAL_EXPORT_ENABLE(db_shader_control)) spi_shader_z_format = V_028710_SPI_SHADER_32_GR; else if (G_02880C_Z_EXPORT_ENABLE(db_shader_control)) spi_shader_z_format = V_028710_SPI_SHADER_32_R; else spi_shader_z_format = 0; si_pm4_set_reg(pm4, R_028710_SPI_SHADER_Z_FORMAT, spi_shader_z_format); si_pm4_set_reg(pm4, R_028714_SPI_SHADER_COL_FORMAT, shader->spi_shader_col_format); si_pm4_set_reg(pm4, R_02823C_CB_SHADER_MASK, shader->cb_shader_mask); va = r600_resource_va(ctx->screen, (void *)shader->bo); si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ); si_pm4_set_reg(pm4, R_00B020_SPI_SHADER_PGM_LO_PS, va >> 8); si_pm4_set_reg(pm4, R_00B024_SPI_SHADER_PGM_HI_PS, va >> 40); num_user_sgprs = SI_PS_NUM_USER_SGPR; num_sgprs = shader->num_sgprs; /* One SGPR after user SGPRs is pre-loaded with {prim_mask, lds_offset} */ if ((num_user_sgprs + 1) > num_sgprs) { /* Last 2 reserved SGPRs are used for VCC */ num_sgprs = num_user_sgprs + 1 + 2; } assert(num_sgprs <= 104); si_pm4_set_reg(pm4, R_00B028_SPI_SHADER_PGM_RSRC1_PS, S_00B028_VGPRS((shader->num_vgprs - 1) / 4) | S_00B028_SGPRS((num_sgprs - 1) / 8)); si_pm4_set_reg(pm4, R_00B02C_SPI_SHADER_PGM_RSRC2_PS, S_00B02C_EXTRA_LDS_SIZE(shader->lds_size) | S_00B02C_USER_SGPR(num_user_sgprs)); if (rctx->b.chip_class >= CIK) { si_pm4_set_reg(pm4, R_00B01C_SPI_SHADER_PGM_RSRC3_PS, S_00B01C_CU_EN(0xffff)); } si_pm4_set_reg(pm4, R_02880C_DB_SHADER_CONTROL, db_shader_control); shader->cb0_is_integer = rctx->fb_cb0_is_integer; shader->sprite_coord_enable = rctx->sprite_coord_enable; si_pm4_bind_state(rctx, ps, shader->pm4); rctx->b.flags |= R600_CONTEXT_INV_SHADER_CACHE; }