/* * Core functions */ static void cayman_gpu_init(struct radeon_device *rdev) { u32 gb_addr_config = 0; u32 mc_shared_chmap, mc_arb_ramcfg; u32 cgts_tcc_disable; u32 sx_debug_1; u32 smx_dc_ctl0; u32 cgts_sm_ctrl_reg; u32 hdp_host_path_cntl; u32 tmp; u32 disabled_rb_mask; int i, j; switch (rdev->family) { case CHIP_CAYMAN: rdev->config.cayman.max_shader_engines = 2; rdev->config.cayman.max_pipes_per_simd = 4; rdev->config.cayman.max_tile_pipes = 8; rdev->config.cayman.max_simds_per_se = 12; rdev->config.cayman.max_backends_per_se = 4; rdev->config.cayman.max_texture_channel_caches = 8; rdev->config.cayman.max_gprs = 256; rdev->config.cayman.max_threads = 256; rdev->config.cayman.max_gs_threads = 32; rdev->config.cayman.max_stack_entries = 512; rdev->config.cayman.sx_num_of_sets = 8; rdev->config.cayman.sx_max_export_size = 256; rdev->config.cayman.sx_max_export_pos_size = 64; rdev->config.cayman.sx_max_export_smx_size = 192; rdev->config.cayman.max_hw_contexts = 8; rdev->config.cayman.sq_num_cf_insts = 2; rdev->config.cayman.sc_prim_fifo_size = 0x100; rdev->config.cayman.sc_hiz_tile_fifo_size = 0x30; rdev->config.cayman.sc_earlyz_tile_fifo_size = 0x130; gb_addr_config = CAYMAN_GB_ADDR_CONFIG_GOLDEN; break; case CHIP_ARUBA: default: rdev->config.cayman.max_shader_engines = 1; rdev->config.cayman.max_pipes_per_simd = 4; rdev->config.cayman.max_tile_pipes = 2; if ((rdev->pdev->device == 0x9900) || (rdev->pdev->device == 0x9901) || (rdev->pdev->device == 0x9905) || (rdev->pdev->device == 0x9906) || (rdev->pdev->device == 0x9907) || (rdev->pdev->device == 0x9908) || (rdev->pdev->device == 0x9909) || (rdev->pdev->device == 0x9910) || (rdev->pdev->device == 0x9917)) { rdev->config.cayman.max_simds_per_se = 6; rdev->config.cayman.max_backends_per_se = 2; } else if ((rdev->pdev->device == 0x9903) || (rdev->pdev->device == 0x9904) || (rdev->pdev->device == 0x990A) || (rdev->pdev->device == 0x9913) || (rdev->pdev->device == 0x9918)) { rdev->config.cayman.max_simds_per_se = 4; rdev->config.cayman.max_backends_per_se = 2; } else if ((rdev->pdev->device == 0x9919) || (rdev->pdev->device == 0x9990) || (rdev->pdev->device == 0x9991) || (rdev->pdev->device == 0x9994) || (rdev->pdev->device == 0x99A0)) { rdev->config.cayman.max_simds_per_se = 3; rdev->config.cayman.max_backends_per_se = 1; } else { rdev->config.cayman.max_simds_per_se = 2; rdev->config.cayman.max_backends_per_se = 1; } rdev->config.cayman.max_texture_channel_caches = 2; rdev->config.cayman.max_gprs = 256; rdev->config.cayman.max_threads = 256; rdev->config.cayman.max_gs_threads = 32; rdev->config.cayman.max_stack_entries = 512; rdev->config.cayman.sx_num_of_sets = 8; rdev->config.cayman.sx_max_export_size = 256; rdev->config.cayman.sx_max_export_pos_size = 64; rdev->config.cayman.sx_max_export_smx_size = 192; rdev->config.cayman.max_hw_contexts = 8; rdev->config.cayman.sq_num_cf_insts = 2; rdev->config.cayman.sc_prim_fifo_size = 0x40; rdev->config.cayman.sc_hiz_tile_fifo_size = 0x30; rdev->config.cayman.sc_earlyz_tile_fifo_size = 0x130; gb_addr_config = ARUBA_GB_ADDR_CONFIG_GOLDEN; break; } /* Initialize HDP */ for (i = 0, j = 0; i < 32; i++, j += 0x18) { WREG32((0x2c14 + j), 0x00000000); WREG32((0x2c18 + j), 0x00000000); WREG32((0x2c1c + j), 0x00000000); WREG32((0x2c20 + j), 0x00000000); WREG32((0x2c24 + j), 0x00000000); } WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff)); evergreen_fix_pci_max_read_req_size(rdev); mc_shared_chmap = RREG32(MC_SHARED_CHMAP); mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG); tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT; rdev->config.cayman.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024; if (rdev->config.cayman.mem_row_size_in_kb > 4) rdev->config.cayman.mem_row_size_in_kb = 4; /* XXX use MC settings? */ rdev->config.cayman.shader_engine_tile_size = 32; rdev->config.cayman.num_gpus = 1; rdev->config.cayman.multi_gpu_tile_size = 64; tmp = (gb_addr_config & NUM_PIPES_MASK) >> NUM_PIPES_SHIFT; rdev->config.cayman.num_tile_pipes = (1 << tmp); tmp = (gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT; rdev->config.cayman.mem_max_burst_length_bytes = (tmp + 1) * 256; tmp = (gb_addr_config & NUM_SHADER_ENGINES_MASK) >> NUM_SHADER_ENGINES_SHIFT; rdev->config.cayman.num_shader_engines = tmp + 1; tmp = (gb_addr_config & NUM_GPUS_MASK) >> NUM_GPUS_SHIFT; rdev->config.cayman.num_gpus = tmp + 1; tmp = (gb_addr_config & MULTI_GPU_TILE_SIZE_MASK) >> MULTI_GPU_TILE_SIZE_SHIFT; rdev->config.cayman.multi_gpu_tile_size = 1 << tmp; tmp = (gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT; rdev->config.cayman.mem_row_size_in_kb = 1 << tmp; /* setup tiling info dword. gb_addr_config is not adequate since it does * not have bank info, so create a custom tiling dword. * bits 3:0 num_pipes * bits 7:4 num_banks * bits 11:8 group_size * bits 15:12 row_size */ rdev->config.cayman.tile_config = 0; switch (rdev->config.cayman.num_tile_pipes) { case 1: default: rdev->config.cayman.tile_config |= (0 << 0); break; case 2: rdev->config.cayman.tile_config |= (1 << 0); break; case 4: rdev->config.cayman.tile_config |= (2 << 0); break; case 8: rdev->config.cayman.tile_config |= (3 << 0); break; } /* num banks is 8 on all fusion asics. 0 = 4, 1 = 8, 2 = 16 */ if (rdev->flags & RADEON_IS_IGP) rdev->config.cayman.tile_config |= 1 << 4; else { switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) { case 0: /* four banks */ rdev->config.cayman.tile_config |= 0 << 4; break; case 1: /* eight banks */ rdev->config.cayman.tile_config |= 1 << 4; break; case 2: /* sixteen banks */ default: rdev->config.cayman.tile_config |= 2 << 4; break; } } rdev->config.cayman.tile_config |= ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8; rdev->config.cayman.tile_config |= ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12; tmp = 0; for (i = (rdev->config.cayman.max_shader_engines - 1); i >= 0; i--) { u32 rb_disable_bitmap; WREG32(GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_INDEX(i)); WREG32(RLC_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_INDEX(i)); rb_disable_bitmap = (RREG32(CC_RB_BACKEND_DISABLE) & 0x00ff0000) >> 16; tmp <<= 4; tmp |= rb_disable_bitmap; } /* enabled rb are just the one not disabled :) */ disabled_rb_mask = tmp; WREG32(GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_BROADCAST_WRITES); WREG32(RLC_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_BROADCAST_WRITES); WREG32(GB_ADDR_CONFIG, gb_addr_config); WREG32(DMIF_ADDR_CONFIG, gb_addr_config); WREG32(HDP_ADDR_CONFIG, gb_addr_config); tmp = gb_addr_config & NUM_PIPES_MASK; tmp = r6xx_remap_render_backend(rdev, tmp, rdev->config.cayman.max_backends_per_se * rdev->config.cayman.max_shader_engines, CAYMAN_MAX_BACKENDS, disabled_rb_mask); WREG32(GB_BACKEND_MAP, tmp); cgts_tcc_disable = 0xffff0000; for (i = 0; i < rdev->config.cayman.max_texture_channel_caches; i++) cgts_tcc_disable &= ~(1 << (16 + i)); WREG32(CGTS_TCC_DISABLE, cgts_tcc_disable); WREG32(CGTS_SYS_TCC_DISABLE, cgts_tcc_disable); WREG32(CGTS_USER_SYS_TCC_DISABLE, cgts_tcc_disable); WREG32(CGTS_USER_TCC_DISABLE, cgts_tcc_disable); /* reprogram the shader complex */ cgts_sm_ctrl_reg = RREG32(CGTS_SM_CTRL_REG); for (i = 0; i < 16; i++) WREG32(CGTS_SM_CTRL_REG, OVERRIDE); WREG32(CGTS_SM_CTRL_REG, cgts_sm_ctrl_reg); /* set HW defaults for 3D engine */ WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60)); sx_debug_1 = RREG32(SX_DEBUG_1); sx_debug_1 |= ENABLE_NEW_SMX_ADDRESS; WREG32(SX_DEBUG_1, sx_debug_1); smx_dc_ctl0 = RREG32(SMX_DC_CTL0); smx_dc_ctl0 &= ~NUMBER_OF_SETS(0x1ff); smx_dc_ctl0 |= NUMBER_OF_SETS(rdev->config.cayman.sx_num_of_sets); WREG32(SMX_DC_CTL0, smx_dc_ctl0); WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4) | CRC_SIMD_ID_WADDR_DISABLE); /* need to be explicitly zero-ed */ WREG32(VGT_OFFCHIP_LDS_BASE, 0); WREG32(SQ_LSTMP_RING_BASE, 0); WREG32(SQ_HSTMP_RING_BASE, 0); WREG32(SQ_ESTMP_RING_BASE, 0); WREG32(SQ_GSTMP_RING_BASE, 0); WREG32(SQ_VSTMP_RING_BASE, 0); WREG32(SQ_PSTMP_RING_BASE, 0); WREG32(TA_CNTL_AUX, DISABLE_CUBE_ANISO); WREG32(SX_EXPORT_BUFFER_SIZES, (COLOR_BUFFER_SIZE((rdev->config.cayman.sx_max_export_size / 4) - 1) | POSITION_BUFFER_SIZE((rdev->config.cayman.sx_max_export_pos_size / 4) - 1) | SMX_BUFFER_SIZE((rdev->config.cayman.sx_max_export_smx_size / 4) - 1))); WREG32(PA_SC_FIFO_SIZE, (SC_PRIM_FIFO_SIZE(rdev->config.cayman.sc_prim_fifo_size) | SC_HIZ_TILE_FIFO_SIZE(rdev->config.cayman.sc_hiz_tile_fifo_size) | SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cayman.sc_earlyz_tile_fifo_size))); WREG32(VGT_NUM_INSTANCES, 1); WREG32(CP_PERFMON_CNTL, 0); WREG32(SQ_MS_FIFO_SIZES, (CACHE_FIFO_SIZE(16 * rdev->config.cayman.sq_num_cf_insts) | FETCH_FIFO_HIWATER(0x4) | DONE_FIFO_HIWATER(0xe0) | ALU_UPDATE_FIFO_HIWATER(0x8))); WREG32(SQ_GPR_RESOURCE_MGMT_1, NUM_CLAUSE_TEMP_GPRS(4)); WREG32(SQ_CONFIG, (VC_ENABLE | EXPORT_SRC_C | GFX_PRIO(0) | CS1_PRIO(0) | CS2_PRIO(1))); WREG32(SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, DYN_GPR_ENABLE); WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) | FORCE_EOV_MAX_REZ_CNT(255))); WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) | AUTO_INVLD_EN(ES_AND_GS_AUTO)); WREG32(VGT_GS_VERTEX_REUSE, 16); WREG32(PA_SC_LINE_STIPPLE_STATE, 0); WREG32(CB_PERF_CTR0_SEL_0, 0); WREG32(CB_PERF_CTR0_SEL_1, 0); WREG32(CB_PERF_CTR1_SEL_0, 0); WREG32(CB_PERF_CTR1_SEL_1, 0); WREG32(CB_PERF_CTR2_SEL_0, 0); WREG32(CB_PERF_CTR2_SEL_1, 0); WREG32(CB_PERF_CTR3_SEL_0, 0); WREG32(CB_PERF_CTR3_SEL_1, 0); tmp = RREG32(HDP_MISC_CNTL); tmp |= HDP_FLUSH_INVALIDATE_CACHE; WREG32(HDP_MISC_CNTL, tmp); hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL); WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl); WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3)); udelay(50); }
static void si_write_harvested_raster_configs(struct radv_physical_device *physical_device, struct radeon_winsys_cs *cs, unsigned raster_config, unsigned raster_config_1) { unsigned sh_per_se = MAX2(physical_device->rad_info.max_sh_per_se, 1); unsigned num_se = MAX2(physical_device->rad_info.max_se, 1); unsigned rb_mask = physical_device->rad_info.enabled_rb_mask; unsigned num_rb = MIN2(physical_device->rad_info.num_render_backends, 16); unsigned rb_per_pkr = MIN2(num_rb / num_se / sh_per_se, 2); unsigned rb_per_se = num_rb / num_se; unsigned se_mask[4]; unsigned se; se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask; se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask; se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask; se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask; assert(num_se == 1 || num_se == 2 || num_se == 4); assert(sh_per_se == 1 || sh_per_se == 2); assert(rb_per_pkr == 1 || rb_per_pkr == 2); /* XXX: I can't figure out what the *_XSEL and *_YSEL * fields are for, so I'm leaving them as their default * values. */ if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) || (!se_mask[2] && !se_mask[3]))) { raster_config_1 &= C_028354_SE_PAIR_MAP; if (!se_mask[0] && !se_mask[1]) { raster_config_1 |= S_028354_SE_PAIR_MAP(V_028354_RASTER_CONFIG_SE_PAIR_MAP_3); } else { raster_config_1 |= S_028354_SE_PAIR_MAP(V_028354_RASTER_CONFIG_SE_PAIR_MAP_0); } } for (se = 0; se < num_se; se++) { unsigned raster_config_se = raster_config; unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se); unsigned pkr1_mask = pkr0_mask << rb_per_pkr; int idx = (se / 2) * 2; if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) { raster_config_se &= C_028350_SE_MAP; if (!se_mask[idx]) { raster_config_se |= S_028350_SE_MAP(V_028350_RASTER_CONFIG_SE_MAP_3); } else { raster_config_se |= S_028350_SE_MAP(V_028350_RASTER_CONFIG_SE_MAP_0); } } pkr0_mask &= rb_mask; pkr1_mask &= rb_mask; if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) { raster_config_se &= C_028350_PKR_MAP; if (!pkr0_mask) { raster_config_se |= S_028350_PKR_MAP(V_028350_RASTER_CONFIG_PKR_MAP_3); } else { raster_config_se |= S_028350_PKR_MAP(V_028350_RASTER_CONFIG_PKR_MAP_0); } } if (rb_per_se >= 2) { unsigned rb0_mask = 1 << (se * rb_per_se); unsigned rb1_mask = rb0_mask << 1; rb0_mask &= rb_mask; rb1_mask &= rb_mask; if (!rb0_mask || !rb1_mask) { raster_config_se &= C_028350_RB_MAP_PKR0; if (!rb0_mask) { raster_config_se |= S_028350_RB_MAP_PKR0(V_028350_RASTER_CONFIG_RB_MAP_3); } else { raster_config_se |= S_028350_RB_MAP_PKR0(V_028350_RASTER_CONFIG_RB_MAP_0); } } if (rb_per_se > 2) { rb0_mask = 1 << (se * rb_per_se + rb_per_pkr); rb1_mask = rb0_mask << 1; rb0_mask &= rb_mask; rb1_mask &= rb_mask; if (!rb0_mask || !rb1_mask) { raster_config_se &= C_028350_RB_MAP_PKR1; if (!rb0_mask) { raster_config_se |= S_028350_RB_MAP_PKR1(V_028350_RASTER_CONFIG_RB_MAP_3); } else { raster_config_se |= S_028350_RB_MAP_PKR1(V_028350_RASTER_CONFIG_RB_MAP_0); } } } } /* GRBM_GFX_INDEX has a different offset on SI and CI+ */ if (physical_device->rad_info.chip_class < CIK) radeon_set_config_reg(cs, GRBM_GFX_INDEX, SE_INDEX(se) | SH_BROADCAST_WRITES | INSTANCE_BROADCAST_WRITES); else radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX, S_030800_SE_INDEX(se) | S_030800_SH_BROADCAST_WRITES(1) | S_030800_INSTANCE_BROADCAST_WRITES(1)); radeon_set_context_reg(cs, R_028350_PA_SC_RASTER_CONFIG, raster_config_se); if (physical_device->rad_info.chip_class >= CIK) radeon_set_context_reg(cs, R_028354_PA_SC_RASTER_CONFIG_1, raster_config_1); } /* GRBM_GFX_INDEX has a different offset on SI and CI+ */ if (physical_device->rad_info.chip_class < CIK) radeon_set_config_reg(cs, GRBM_GFX_INDEX, SE_BROADCAST_WRITES | SH_BROADCAST_WRITES | INSTANCE_BROADCAST_WRITES); else radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX, S_030800_SE_BROADCAST_WRITES(1) | S_030800_SH_BROADCAST_WRITES(1) | S_030800_INSTANCE_BROADCAST_WRITES(1)); }