static int pll_reg_show(struct seq_file *s, void *data) { struct gk20a *g = s->private; u32 reg, m, n, pl, f; mutex_lock(&g->clk.clk_mutex); if (!g->clk.clk_hw_on) { seq_printf(s, "gk20a powered down - no access to registers\n"); mutex_unlock(&g->clk.clk_mutex); return 0; } reg = gk20a_readl(g, trim_sys_gpcpll_cfg_r()); seq_printf(s, "cfg = 0x%x : %s : %s\n", reg, trim_sys_gpcpll_cfg_enable_v(reg) ? "enabled" : "disabled", trim_sys_gpcpll_cfg_pll_lock_v(reg) ? "locked" : "unlocked"); reg = gk20a_readl(g, trim_sys_gpcpll_coeff_r()); m = trim_sys_gpcpll_coeff_mdiv_v(reg); n = trim_sys_gpcpll_coeff_ndiv_v(reg); pl = trim_sys_gpcpll_coeff_pldiv_v(reg); f = g->clk.gpc_pll.clk_in * n / (m * pl_to_div[pl]); seq_printf(s, "coef = 0x%x : m = %u : n = %u : pl = %u", reg, m, n, pl); seq_printf(s, " : pll_f(gpu_f) = %u(%u) kHz\n", f, f/2); mutex_unlock(&g->clk.clk_mutex); return 0; }
void gm20b_ltc_init_fs_state(struct gk20a *g) { u32 reg; gk20a_dbg_info("initialize gm20b l2"); g->max_ltc_count = gk20a_readl(g, top_num_ltcs_r()); g->ltc_count = gk20a_readl(g, pri_ringmaster_enum_ltc_r()); gk20a_dbg_info("%d ltcs out of %d", g->ltc_count, g->max_ltc_count); gk20a_writel(g, ltc_ltcs_ltss_cbc_num_active_ltcs_r(), g->ltc_count); gk20a_writel(g, ltc_ltcs_misc_ltc_num_active_ltcs_r(), g->ltc_count); gk20a_writel(g, ltc_ltcs_ltss_dstg_cfg0_r(), gk20a_readl(g, ltc_ltc0_lts0_dstg_cfg0_r()) | ltc_ltcs_ltss_dstg_cfg0_vdc_4to2_disable_m()); /* Disable LTC interrupts */ reg = gk20a_readl(g, ltc_ltcs_ltss_intr_r()); reg &= ~ltc_ltcs_ltss_intr_en_evicted_cb_m(); reg &= ~ltc_ltcs_ltss_intr_en_illegal_compstat_access_m(); reg &= ~ltc_ltcs_ltss_intr_en_illegal_compstat_m(); gk20a_writel(g, ltc_ltcs_ltss_intr_r(), reg); }
static void gr_gm20b_init_gpc_mmu(struct gk20a *g) { u32 temp; gk20a_dbg_info("initialize gpc mmu"); if (!g->ops.privsecurity) { /* Bypass MMU check for non-secure boot. For * secure-boot,this register write has no-effect */ gk20a_writel(g, fb_priv_mmu_phy_secure_r(), 0xffffffff); } temp = gk20a_readl(g, fb_mmu_ctrl_r()); temp &= gr_gpcs_pri_mmu_ctrl_vm_pg_size_m() | gr_gpcs_pri_mmu_ctrl_use_pdb_big_page_size_m() | gr_gpcs_pri_mmu_ctrl_vol_fault_m() | gr_gpcs_pri_mmu_ctrl_comp_fault_m() | gr_gpcs_pri_mmu_ctrl_miss_gran_m() | gr_gpcs_pri_mmu_ctrl_cache_mode_m() | gr_gpcs_pri_mmu_ctrl_mmu_aperture_m() | gr_gpcs_pri_mmu_ctrl_mmu_vol_m() | gr_gpcs_pri_mmu_ctrl_mmu_disable_m(); gk20a_writel(g, gr_gpcs_pri_mmu_ctrl_r(), temp); gk20a_writel(g, gr_gpcs_pri_mmu_pm_unit_mask_r(), 0); gk20a_writel(g, gr_gpcs_pri_mmu_pm_req_mask_r(), 0); gk20a_writel(g, gr_gpcs_pri_mmu_debug_ctrl_r(), gk20a_readl(g, fb_mmu_debug_ctrl_r())); gk20a_writel(g, gr_gpcs_pri_mmu_debug_wr_r(), gk20a_readl(g, fb_mmu_debug_wr_r())); gk20a_writel(g, gr_gpcs_pri_mmu_debug_rd_r(), gk20a_readl(g, fb_mmu_debug_rd_r())); gk20a_writel(g, gr_gpcs_mmu_num_active_ltcs_r(), gk20a_readl(g, fb_fbhub_num_active_ltcs_r())); }
static int monitor_get(void *data, u64 *val) { struct gk20a *g = (struct gk20a *)data; struct clk_gk20a *clk = &g->clk; u32 ncycle = 100; /* count GPCCLK for ncycle of clkin */ u32 clkin = clk->gpc_pll.clk_in; u32 count1, count2; gk20a_busy(g->dev); gk20a_writel(g, trim_gpc_clk_cntr_ncgpcclk_cfg_r(0), trim_gpc_clk_cntr_ncgpcclk_cfg_reset_asserted_f()); gk20a_writel(g, trim_gpc_clk_cntr_ncgpcclk_cfg_r(0), trim_gpc_clk_cntr_ncgpcclk_cfg_enable_asserted_f() | trim_gpc_clk_cntr_ncgpcclk_cfg_write_en_asserted_f() | trim_gpc_clk_cntr_ncgpcclk_cfg_noofipclks_f(ncycle)); /* start */ /* It should take about 8us to finish 100 cycle of 12MHz. But longer than 100us delay is required here. */ gk20a_readl(g, trim_gpc_clk_cntr_ncgpcclk_cfg_r(0)); udelay(2000); count1 = gk20a_readl(g, trim_gpc_clk_cntr_ncgpcclk_cnt_r(0)); udelay(100); count2 = gk20a_readl(g, trim_gpc_clk_cntr_ncgpcclk_cnt_r(0)); *val = (u64)(trim_gpc_clk_cntr_ncgpcclk_cnt_value_v(count2) * clkin / ncycle); gk20a_idle(g->dev); if (count1 != count2) return -EBUSY; return 0; }
static int clk_disable_gpcpll(struct gk20a *g, int allow_slide) { u32 cfg, coeff, m, nlo; struct clk_gk20a *clk = &g->clk; /* slide to VCO min */ cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r()); if (allow_slide && trim_sys_gpcpll_cfg_enable_v(cfg)) { coeff = gk20a_readl(g, trim_sys_gpcpll_coeff_r()); m = trim_sys_gpcpll_coeff_mdiv_v(coeff); nlo = DIV_ROUND_UP(m * gpc_pll_params.min_vco, clk->gpc_pll.clk_in); clk_slide_gpc_pll(g, nlo); } /* put PLL in bypass before disabling it */ cfg = gk20a_readl(g, trim_sys_sel_vco_r()); cfg = set_field(cfg, trim_sys_sel_vco_gpc2clk_out_m(), trim_sys_sel_vco_gpc2clk_out_bypass_f()); gk20a_writel(g, trim_sys_sel_vco_r(), cfg); /* disable PLL */ cfg = gk20a_readl(g, trim_sys_gpcpll_cfg_r()); cfg = set_field(cfg, trim_sys_gpcpll_cfg_enable_m(), trim_sys_gpcpll_cfg_enable_no_f()); gk20a_writel(g, trim_sys_gpcpll_cfg_r(), cfg); gk20a_readl(g, trim_sys_gpcpll_cfg_r()); clk->gpc_pll.enabled = false; return 0; }
static void gr_gm20b_set_circular_buffer_size(struct gk20a *g, u32 data) { struct gr_gk20a *gr = &g->gr; u32 gpc_index, ppc_index, stride, val; u32 cb_size = data * 4; gk20a_dbg_fn(""); if (cb_size > gr->attrib_cb_size) cb_size = gr->attrib_cb_size; gk20a_writel(g, gr_ds_tga_constraintlogic_r(), (gk20a_readl(g, gr_ds_tga_constraintlogic_r()) & ~gr_ds_tga_constraintlogic_beta_cbsize_f(~0)) | gr_ds_tga_constraintlogic_beta_cbsize_f(cb_size)); for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { stride = proj_gpc_stride_v() * gpc_index; for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; ppc_index++) { val = gk20a_readl(g, gr_gpc0_ppc0_cbm_beta_cb_size_r() + stride + proj_ppc_in_gpc_stride_v() * ppc_index); val = set_field(val, gr_gpc0_ppc0_cbm_beta_cb_size_v_m(), gr_gpc0_ppc0_cbm_beta_cb_size_v_f(cb_size * gr->pes_tpc_count[ppc_index][gpc_index])); gk20a_writel(g, gr_gpc0_ppc0_cbm_beta_cb_size_r() + stride + proj_ppc_in_gpc_stride_v() * ppc_index, val); val = gk20a_readl(g, gr_gpcs_swdx_tc_beta_cb_size_r( ppc_index + gpc_index)); val = set_field(val, gr_gpcs_swdx_tc_beta_cb_size_v_m(), gr_gpcs_swdx_tc_beta_cb_size_v_f(cb_size * gr->gpc_ppc_count[gpc_index])); val = set_field(val, gr_gpcs_swdx_tc_beta_cb_size_div3_m(), gr_gpcs_swdx_tc_beta_cb_size_div3_f((cb_size * gr->gpc_ppc_count[gpc_index])/3)); gk20a_writel(g, gr_gpcs_swdx_tc_beta_cb_size_r( ppc_index + gpc_index), val); } } }
/* Flushes the compression bit cache as well as "data". * Note: the name here is a bit of a misnomer. ELPG uses this * internally... but ELPG doesn't have to be on to do it manually. */ static void gk20a_mm_g_elpg_flush_locked(struct gk20a *g) { u32 data; s32 retry = 100; gk20a_dbg_fn(""); /* Make sure all previous writes are committed to the L2. There's no guarantee that writes are to DRAM. This will be a sysmembar internal to the L2. */ gk20a_writel(g, ltc_ltcs_ltss_g_elpg_r(), ltc_ltcs_ltss_g_elpg_flush_pending_f()); do { data = gk20a_readl(g, ltc_ltc0_ltss_g_elpg_r()); if (ltc_ltc0_ltss_g_elpg_flush_v(data) == ltc_ltc0_ltss_g_elpg_flush_pending_v()) { gk20a_dbg_info("g_elpg_flush 0x%x", data); retry--; usleep_range(20, 40); } else break; } while (retry >= 0 || !tegra_platform_is_silicon()); if (retry < 0) gk20a_warn(dev_from_gk20a(g), "g_elpg_flush too many retries"); }
static void gm20b_fb_set_mmu_page_size(struct gk20a *g) { /* set large page size in fb */ u32 fb_mmu_ctrl = gk20a_readl(g, fb_mmu_ctrl_r()); fb_mmu_ctrl |= fb_mmu_ctrl_use_pdb_big_page_size_true_f(); gk20a_writel(g, fb_mmu_ctrl_r(), fb_mmu_ctrl); }
static void gr_gm20b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data) { struct gr_gk20a *gr = &g->gr; u32 gpc_index, ppc_index, stride, val; u32 pd_ab_max_output; u32 alpha_cb_size = data * 4; gk20a_dbg_fn(""); /* if (NO_ALPHA_BETA_TIMESLICE_SUPPORT_DEF) return; */ if (alpha_cb_size > gr->alpha_cb_size) alpha_cb_size = gr->alpha_cb_size; gk20a_writel(g, gr_ds_tga_constraintlogic_r(), (gk20a_readl(g, gr_ds_tga_constraintlogic_r()) & ~gr_ds_tga_constraintlogic_alpha_cbsize_f(~0)) | gr_ds_tga_constraintlogic_alpha_cbsize_f(alpha_cb_size)); pd_ab_max_output = alpha_cb_size * gr_gpc0_ppc0_cbm_alpha_cb_size_v_granularity_v() / gr_pd_ab_dist_cfg1_max_output_granularity_v(); gk20a_writel(g, gr_pd_ab_dist_cfg1_r(), gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) | gr_pd_ab_dist_cfg1_max_batches_init_f()); for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { stride = proj_gpc_stride_v() * gpc_index; for (ppc_index = 0; ppc_index < gr->gpc_ppc_count[gpc_index]; ppc_index++) { val = gk20a_readl(g, gr_gpc0_ppc0_cbm_alpha_cb_size_r() + stride + proj_ppc_in_gpc_stride_v() * ppc_index); val = set_field(val, gr_gpc0_ppc0_cbm_alpha_cb_size_v_m(), gr_gpc0_ppc0_cbm_alpha_cb_size_v_f(alpha_cb_size * gr->pes_tpc_count[ppc_index][gpc_index])); gk20a_writel(g, gr_gpc0_ppc0_cbm_alpha_cb_size_r() + stride + proj_ppc_in_gpc_stride_v() * ppc_index, val); } } }
static void gk20a_debug_show_channel(struct gk20a *g, struct gk20a_debug_output *o, struct channel_gk20a *ch) { u32 channel = gk20a_readl(g, ccsr_channel_r(ch->hw_chid)); u32 status = ccsr_channel_status_v(channel); u32 syncpointa, syncpointb; void *inst_ptr; inst_ptr = ch->inst_block.cpuva; if (!inst_ptr) return; syncpointa = gk20a_mem_rd32(inst_ptr, ram_fc_syncpointa_w()); syncpointb = gk20a_mem_rd32(inst_ptr, ram_fc_syncpointb_w()); gk20a_debug_output(o, "%d-%s, pid %d: ", ch->hw_chid, ch->g->dev->name, ch->pid); gk20a_debug_output(o, "%s in use %s %s\n", ccsr_channel_enable_v(channel) ? "" : "not", ccsr_chan_status_str[status], ccsr_channel_busy_v(channel) ? "busy" : "not busy"); gk20a_debug_output(o, "TOP: %016llx PUT: %016llx GET: %016llx " "FETCH: %016llx\nHEADER: %08x COUNT: %08x\n" "SYNCPOINT %08x %08x SEMAPHORE %08x %08x %08x %08x\n", (u64)gk20a_mem_rd32(inst_ptr, ram_fc_pb_top_level_get_w()) + ((u64)gk20a_mem_rd32(inst_ptr, ram_fc_pb_top_level_get_hi_w()) << 32ULL), (u64)gk20a_mem_rd32(inst_ptr, ram_fc_pb_put_w()) + ((u64)gk20a_mem_rd32(inst_ptr, ram_fc_pb_put_hi_w()) << 32ULL), (u64)gk20a_mem_rd32(inst_ptr, ram_fc_pb_get_w()) + ((u64)gk20a_mem_rd32(inst_ptr, ram_fc_pb_get_hi_w()) << 32ULL), (u64)gk20a_mem_rd32(inst_ptr, ram_fc_pb_fetch_w()) + ((u64)gk20a_mem_rd32(inst_ptr, ram_fc_pb_fetch_hi_w()) << 32ULL), gk20a_mem_rd32(inst_ptr, ram_fc_pb_header_w()), gk20a_mem_rd32(inst_ptr, ram_fc_pb_count_w()), syncpointa, syncpointb, gk20a_mem_rd32(inst_ptr, ram_fc_semaphorea_w()), gk20a_mem_rd32(inst_ptr, ram_fc_semaphoreb_w()), gk20a_mem_rd32(inst_ptr, ram_fc_semaphorec_w()), gk20a_mem_rd32(inst_ptr, ram_fc_semaphored_w())); #ifdef CONFIG_TEGRA_GK20A if ((pbdma_syncpointb_op_v(syncpointb) == pbdma_syncpointb_op_wait_v()) && (pbdma_syncpointb_wait_switch_v(syncpointb) == pbdma_syncpointb_wait_switch_en_v())) gk20a_debug_output(o, "%s on syncpt %u (%s) val %u\n", (status == 3 || status == 8) ? "Waiting" : "Waited", pbdma_syncpointb_syncpt_index_v(syncpointb), nvhost_syncpt_get_name(g->host1x_dev, pbdma_syncpointb_syncpt_index_v(syncpointb)), pbdma_syncpointa_payload_v(syncpointa)); #endif gk20a_debug_output(o, "\n"); }
static bool gm20b_fb_set_use_full_comp_tag_line(struct gk20a *g) { /* set large page size in fb */ u32 fb_mmu_ctrl = gk20a_readl(g, fb_mmu_ctrl_r()); fb_mmu_ctrl |= fb_mmu_ctrl_use_full_comp_tag_line_true_f(); gk20a_writel(g, fb_mmu_ctrl_r(), fb_mmu_ctrl); return true; }
static int gk20a_init_therm_setup_hw(struct gk20a *g) { u32 v; /* program NV_THERM registers */ gk20a_writel(g, therm_use_a_r(), NV_THERM_USE_A_INIT); gk20a_writel(g, therm_evt_ext_therm_0_r(), NV_THERM_EVT_EXT_THERM_0_INIT); gk20a_writel(g, therm_evt_ext_therm_1_r(), NV_THERM_EVT_EXT_THERM_1_INIT); gk20a_writel(g, therm_evt_ext_therm_2_r(), NV_THERM_EVT_EXT_THERM_2_INIT); gk20a_writel(g, therm_grad_stepping_table_r(0), therm_grad_stepping_table_slowdown_factor0_f(therm_grad_stepping_table_slowdown_factor0_fpdiv_by1p5_f()) | therm_grad_stepping_table_slowdown_factor1_f(therm_grad_stepping_table_slowdown_factor0_fpdiv_by2_f()) | therm_grad_stepping_table_slowdown_factor2_f(therm_grad_stepping_table_slowdown_factor0_fpdiv_by4_f()) | therm_grad_stepping_table_slowdown_factor3_f(therm_grad_stepping_table_slowdown_factor0_fpdiv_by8_f()) | therm_grad_stepping_table_slowdown_factor4_f(therm_grad_stepping_table_slowdown_factor0_fpdiv_by8_f())); gk20a_writel(g, therm_grad_stepping_table_r(1), therm_grad_stepping_table_slowdown_factor0_f(therm_grad_stepping_table_slowdown_factor0_fpdiv_by8_f()) | therm_grad_stepping_table_slowdown_factor1_f(therm_grad_stepping_table_slowdown_factor0_fpdiv_by8_f()) | therm_grad_stepping_table_slowdown_factor2_f(therm_grad_stepping_table_slowdown_factor0_fpdiv_by8_f()) | therm_grad_stepping_table_slowdown_factor3_f(therm_grad_stepping_table_slowdown_factor0_fpdiv_by8_f()) | therm_grad_stepping_table_slowdown_factor4_f(therm_grad_stepping_table_slowdown_factor0_fpdiv_by8_f())); v = gk20a_readl(g, therm_clk_timing_r(0)); v |= therm_clk_timing_grad_slowdown_enabled_f(); gk20a_writel(g, therm_clk_timing_r(0), v); v = gk20a_readl(g, therm_config2_r()); v |= therm_config2_grad_enable_f(1); v |= therm_config2_slowdown_factor_extended_f(1); gk20a_writel(g, therm_config2_r(), v); gk20a_writel(g, therm_grad_stepping1_r(), therm_grad_stepping1_pdiv_duration_f(32)); v = gk20a_readl(g, therm_grad_stepping0_r()); v |= therm_grad_stepping0_feature_enable_f(); gk20a_writel(g, therm_grad_stepping0_r(), v); return 0; }
static u32 gr_gm20b_get_gpc_tpc_mask(struct gk20a *g, u32 gpc_index) { u32 val; struct gr_gk20a *gr = &g->gr; /* Toggle the bits of NV_FUSE_STATUS_OPT_TPC_GPC */ val = gk20a_readl(g, fuse_status_opt_tpc_gpc_r(gpc_index)); return (~val) & ((0x1 << gr->max_tpc_per_gpc_count) - 1); }
/* * Set the maximum number of ways that can have the "EVIST_LAST" class. */ static void gk20a_ltc_set_max_ways_evict_last(struct gk20a *g, u32 max_ways) { u32 mgmt_reg; mgmt_reg = gk20a_readl(g, ltc_ltcs_ltss_tstg_set_mgmt_r()) & ~ltc_ltcs_ltss_tstg_set_mgmt_max_ways_evict_last_f(~0); mgmt_reg |= ltc_ltcs_ltss_tstg_set_mgmt_max_ways_evict_last_f(max_ways); gk20a_writel(g, ltc_ltcs_ltss_tstg_set_mgmt_r(), mgmt_reg); }
static void gr_gm20b_detect_sm_arch(struct gk20a *g) { u32 v = gk20a_readl(g, gr_gpc0_tpc0_sm_arch_r()); g->gpu_characteristics.sm_arch_spa_version = gr_gpc0_tpc0_sm_arch_spa_version_v(v); g->gpu_characteristics.sm_arch_sm_version = gr_gpc0_tpc0_sm_arch_sm_version_v(v); g->gpu_characteristics.sm_arch_warp_count = gr_gpc0_tpc0_sm_arch_warp_count_v(v); }
u32 gm20b_ltc_cbc_fix_config(struct gk20a *g, int base) { u32 val = gk20a_readl(g, ltc_ltcs_ltss_cbc_num_active_ltcs_r()); if (val == 2) { return base * 2; } else if (val != 1) { gk20a_err(dev_from_gk20a(g), "Invalid number of active ltcs: %08x\n", val); } return base; }
static int gk20a_determine_L2_size_bytes(struct gk20a *g) { const u32 gpuid = GK20A_GPUID(g->gpu_characteristics.arch, g->gpu_characteristics.impl); u32 lts_per_ltc; u32 ways; u32 sets; u32 bytes_per_line; u32 active_ltcs; u32 cache_size; u32 tmp; u32 active_sets_value; tmp = gk20a_readl(g, ltc_ltc0_lts0_tstg_cfg1_r()); ways = hweight32(ltc_ltc0_lts0_tstg_cfg1_active_ways_v(tmp)); active_sets_value = ltc_ltc0_lts0_tstg_cfg1_active_sets_v(tmp); if (active_sets_value == ltc_ltc0_lts0_tstg_cfg1_active_sets_all_v()) { sets = 64; } else if (active_sets_value == ltc_ltc0_lts0_tstg_cfg1_active_sets_half_v()) { sets = 32; } else if (active_sets_value == ltc_ltc0_lts0_tstg_cfg1_active_sets_quarter_v()) { sets = 16; } else { dev_err(dev_from_gk20a(g), "Unknown constant %u for active sets", (unsigned)active_sets_value); sets = 0; } active_ltcs = g->gr.num_fbps; /* chip-specific values */ switch (gpuid) { case GK20A_GPUID_GK20A: lts_per_ltc = 1; bytes_per_line = 128; break; default: dev_err(dev_from_gk20a(g), "Unknown GPU id 0x%02x\n", (unsigned)gpuid); lts_per_ltc = 0; bytes_per_line = 0; } cache_size = active_ltcs * lts_per_ltc * ways * sets * bytes_per_line; return cache_size; }
void gm20b_ltc_isr(struct gk20a *g) { u32 mc_intr, ltc_intr; int ltc, slice; mc_intr = gk20a_readl(g, mc_intr_ltc_r()); gk20a_err(dev_from_gk20a(g), "mc_ltc_intr: %08x", mc_intr); for (ltc = 0; ltc < g->ltc_count; ltc++) { if ((mc_intr & 1 << ltc) == 0) continue; for (slice = 0; slice < g->gr.slices_per_ltc; slice++) { ltc_intr = gk20a_readl(g, ltc_ltc0_lts0_intr_r() + proj_ltc_stride_v() * ltc + proj_lts_stride_v() * slice); gk20a_err(dev_from_gk20a(g), "ltc%d, slice %d: %08x", ltc, slice, ltc_intr); gk20a_writel(g, ltc_ltc0_lts0_intr_r() + proj_ltc_stride_v() * ltc + proj_lts_stride_v() * slice, ltc_intr); } } }
void gk20a_reset_priv_ring(struct gk20a *g) { u32 data; data = gk20a_readl(g, trim_sys_gpc2clk_out_r()); data = set_field(data, trim_sys_gpc2clk_out_bypdiv_m(), trim_sys_gpc2clk_out_bypdiv_f(0)); gk20a_writel(g, trim_sys_gpc2clk_out_r(), data); gk20a_reset(g, mc_enable_priv_ring_enabled_f()); if (g->ops.clock_gating.slcg_priring_load_gating_prod) g->ops.clock_gating.slcg_priring_load_gating_prod(g, g->slcg_enabled); gk20a_writel(g,pri_ringmaster_command_r(), 0x4); gk20a_writel(g, pri_ringstation_sys_decode_config_r(), 0x2); gk20a_readl(g, pri_ringstation_sys_decode_config_r()); }
void gm20b_ltc_g_elpg_flush_locked(struct gk20a *g) { u32 data; bool done[g->ltc_count]; s32 retry = 100; int i; int num_done = 0; u32 ltc_d = ltc_ltc1_ltss_g_elpg_r() - ltc_ltc0_ltss_g_elpg_r(); gk20a_dbg_fn(""); trace_gk20a_mm_g_elpg_flush_locked(g->dev->name); for (i = 0; i < g->ltc_count; i++) done[i] = 0; gk20a_writel(g, ltc_ltcs_ltss_g_elpg_r(), ltc_ltcs_ltss_g_elpg_flush_pending_f()); do { for (i = 0; i < g->ltc_count; i++) { if (done[i]) continue; data = gk20a_readl(g, ltc_ltc0_ltss_g_elpg_r() + ltc_d * i); if (ltc_ltc0_ltss_g_elpg_flush_v(data)) { gk20a_dbg_info("g_elpg_flush 0x%x", data); } else { done[i] = 1; num_done++; } } if (num_done < g->ltc_count) { retry--; udelay(5); } else break; } while (retry >= 0 || !tegra_platform_is_silicon()); if (retry < 0 && tegra_platform_is_silicon()) gk20a_warn(dev_from_gk20a(g), "g_elpg_flush too many retries"); trace_gk20a_mm_g_elpg_flush_locked_done(g->dev->name); }
static int gk20a_init_clk_setup_hw(struct gk20a *g) { u32 data; gk20a_dbg_fn(""); data = gk20a_readl(g, trim_sys_gpc2clk_out_r()); data = set_field(data, trim_sys_gpc2clk_out_sdiv14_m() | trim_sys_gpc2clk_out_vcodiv_m() | trim_sys_gpc2clk_out_bypdiv_m(), trim_sys_gpc2clk_out_sdiv14_indiv4_mode_f() | trim_sys_gpc2clk_out_vcodiv_by1_f() | trim_sys_gpc2clk_out_bypdiv_f(0)); gk20a_writel(g, trim_sys_gpc2clk_out_r(), data); return 0; }
void gk20a_reset_priv_ring(struct gk20a *g) { if (tegra_platform_is_linsim()) return; gk20a_reset(g, mc_enable_priv_ring_enabled_f()); if (g->ops.clock_gating.slcg_priring_load_gating_prod) g->ops.clock_gating.slcg_priring_load_gating_prod(g, g->slcg_enabled); gk20a_writel(g,pri_ringmaster_command_r(), 0x4); gk20a_writel(g, pri_ringstation_sys_decode_config_r(), 0x2); gk20a_readl(g, pri_ringstation_sys_decode_config_r()); }
static void gk20a_ltc_sync_debugfs(struct gk20a *g) { u32 reg_f = ltc_ltcs_ltss_tstg_set_mgmt_2_l2_bypass_mode_enabled_f(); spin_lock(&g->debugfs_lock); if (g->mm.ltc_enabled != g->mm.ltc_enabled_debug) { u32 reg = gk20a_readl(g, ltc_ltcs_ltss_tstg_set_mgmt_2_r()); if (g->mm.ltc_enabled_debug) /* bypass disabled (normal caching ops)*/ reg &= ~reg_f; else /* bypass enabled (no caching) */ reg |= reg_f; gk20a_writel(g, ltc_ltcs_ltss_tstg_set_mgmt_2_r(), reg); g->mm.ltc_enabled = g->mm.ltc_enabled_debug; } spin_unlock(&g->debugfs_lock); }
static int gm20b_determine_L2_size_bytes(struct gk20a *g) { u32 lts_per_ltc; u32 ways; u32 sets; u32 bytes_per_line; u32 active_ltcs; u32 cache_size; u32 tmp; u32 active_sets_value; tmp = gk20a_readl(g, ltc_ltc0_lts0_tstg_cfg1_r()); ways = hweight32(ltc_ltc0_lts0_tstg_cfg1_active_ways_v(tmp)); active_sets_value = ltc_ltc0_lts0_tstg_cfg1_active_sets_v(tmp); if (active_sets_value == ltc_ltc0_lts0_tstg_cfg1_active_sets_all_v()) { sets = 64; } else if (active_sets_value == ltc_ltc0_lts0_tstg_cfg1_active_sets_half_v()) { sets = 32; } else if (active_sets_value == ltc_ltc0_lts0_tstg_cfg1_active_sets_quarter_v()) { sets = 16; } else { dev_err(dev_from_gk20a(g), "Unknown constant %u for active sets", (unsigned)active_sets_value); sets = 0; } active_ltcs = g->gr.num_fbps; /* chip-specific values */ lts_per_ltc = 2; bytes_per_line = 128; cache_size = active_ltcs * lts_per_ltc * ways * sets * bytes_per_line; return cache_size; }
void gk20a_priv_ring_isr(struct gk20a *g) { u32 status0, status1; u32 cmd; s32 retry = 100; status0 = gk20a_readl(g, pri_ringmaster_intr_status0_r()); status1 = gk20a_readl(g, pri_ringmaster_intr_status1_r()); gk20a_dbg_info("ringmaster intr status0: 0x%08x," "status1: 0x%08x", status0, status1); if (status0 & (0x1 | 0x2 | 0x4)) { gk20a_reset_priv_ring(g); } cmd = gk20a_readl(g, pri_ringmaster_command_r()); cmd = set_field(cmd, pri_ringmaster_command_cmd_m(), pri_ringmaster_command_cmd_ack_interrupt_f()); gk20a_writel(g, pri_ringmaster_command_r(), cmd); do { cmd = pri_ringmaster_command_cmd_v( gk20a_readl(g, pri_ringmaster_command_r())); usleep_range(20, 40); } while (cmd != pri_ringmaster_command_cmd_no_cmd_v() && --retry); if (retry <= 0) gk20a_warn(dev_from_gk20a(g), "priv ringmaster cmd ack too many retries"); status0 = gk20a_readl(g, pri_ringmaster_intr_status0_r()); status1 = gk20a_readl(g, pri_ringmaster_intr_status1_r()); gk20a_dbg_info("ringmaster intr status0: 0x%08x," " status1: 0x%08x", status0, status1); }
static void gm20b_fb_dump_vpr_wpr_info(struct gk20a *g) { u32 val; /* print vpr and wpr info */ val = gk20a_readl(g, fb_mmu_vpr_info_r()); val &= ~0x3; val |= fb_mmu_vpr_info_index_addr_lo_v(); gk20a_writel(g, fb_mmu_vpr_info_r(), val); gk20a_err(dev_from_gk20a(g), "VPR: %08x %08x %08x %08x", gk20a_readl(g, fb_mmu_vpr_info_r()), gk20a_readl(g, fb_mmu_vpr_info_r()), gk20a_readl(g, fb_mmu_vpr_info_r()), gk20a_readl(g, fb_mmu_vpr_info_r())); val = gk20a_readl(g, fb_mmu_wpr_info_r()); val &= ~0xf; val |= (fb_mmu_wpr_info_index_allow_read_v()); gk20a_writel(g, fb_mmu_wpr_info_r(), val); gk20a_err(dev_from_gk20a(g), "WPR: %08x %08x %08x %08x %08x %08x", gk20a_readl(g, fb_mmu_wpr_info_r()), gk20a_readl(g, fb_mmu_wpr_info_r()), gk20a_readl(g, fb_mmu_wpr_info_r()), gk20a_readl(g, fb_mmu_wpr_info_r()), gk20a_readl(g, fb_mmu_wpr_info_r()), gk20a_readl(g, fb_mmu_wpr_info_r())); }
static void gk20a_debug_show_dump(struct platform_device *pdev, struct gk20a_debug_output *o) { struct gk20a_platform *platform = gk20a_get_platform(pdev); struct gk20a *g = platform->g; struct fifo_gk20a *f = &g->fifo; u32 chid; int i, err; err = gk20a_busy(g->dev); if (err) { gk20a_debug_output(o, "failed to power on gpu: %d\n", err); return; } for (i = 0; i < fifo_pbdma_status__size_1_v(); i++) { u32 status = gk20a_readl(g, fifo_pbdma_status_r(i)); u32 chan_status = fifo_pbdma_status_chan_status_v(status); gk20a_debug_output(o, "%s pbdma %d: ", g->dev->name, i); gk20a_debug_output(o, "id: %d (%s), next_id: %d (%s) status: %s\n", fifo_pbdma_status_id_v(status), fifo_pbdma_status_id_type_v(status) ? "tsg" : "channel", fifo_pbdma_status_next_id_v(status), fifo_pbdma_status_next_id_type_v(status) ? "tsg" : "channel", chan_status_str[chan_status]); gk20a_debug_output(o, "PUT: %016llx GET: %016llx " "FETCH: %08x HEADER: %08x\n", (u64)gk20a_readl(g, pbdma_put_r(i)) + ((u64)gk20a_readl(g, pbdma_put_hi_r(i)) << 32ULL), (u64)gk20a_readl(g, pbdma_get_r(i)) + ((u64)gk20a_readl(g, pbdma_get_hi_r(i)) << 32ULL), gk20a_readl(g, pbdma_gp_fetch_r(i)), gk20a_readl(g, pbdma_pb_header_r(i))); } gk20a_debug_output(o, "\n"); for (i = 0; i < fifo_engine_status__size_1_v(); i++) { u32 status = gk20a_readl(g, fifo_engine_status_r(i)); u32 ctx_status = fifo_engine_status_ctx_status_v(status); gk20a_debug_output(o, "%s eng %d: ", g->dev->name, i); gk20a_debug_output(o, "id: %d (%s), next_id: %d (%s), ctx: %s ", fifo_engine_status_id_v(status), fifo_engine_status_id_type_v(status) ? "tsg" : "channel", fifo_engine_status_next_id_v(status), fifo_engine_status_next_id_type_v(status) ? "tsg" : "channel", ctx_status_str[ctx_status]); if (fifo_engine_status_faulted_v(status)) gk20a_debug_output(o, "faulted "); if (fifo_engine_status_engine_v(status)) gk20a_debug_output(o, "busy "); gk20a_debug_output(o, "\n"); } gk20a_debug_output(o, "\n"); for (chid = 0; chid < f->num_channels; chid++) { if (f->channel[chid].in_use) { struct channel_gk20a *gpu_ch = &f->channel[chid]; gk20a_debug_show_channel(g, o, gpu_ch); } } gk20a_idle(g->dev); }
int gm20b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op, u32 min, u32 max) { int err = 0; struct gr_gk20a *gr = &g->gr; u32 ltc, slice, ctrl1, val, hw_op = 0; s32 retry = 200; u32 slices_per_ltc = ltc_ltcs_ltss_cbc_param_slices_per_ltc_v( gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r())); gk20a_dbg_fn(""); trace_gk20a_ltc_cbc_ctrl_start(g->dev->name, op, min, max); if (gr->compbit_store.mem.size == 0) return 0; mutex_lock(&g->mm.l2_op_lock); if (op == gk20a_cbc_op_clear) { gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl2_r(), ltc_ltcs_ltss_cbc_ctrl2_clear_lower_bound_f(min)); gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl3_r(), ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_f(max)); hw_op = ltc_ltcs_ltss_cbc_ctrl1_clear_active_f(); } else if (op == gk20a_cbc_op_clean) { hw_op = ltc_ltcs_ltss_cbc_ctrl1_clean_active_f(); } else if (op == gk20a_cbc_op_invalidate) { hw_op = ltc_ltcs_ltss_cbc_ctrl1_invalidate_active_f(); } else { BUG_ON(1); } gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl1_r(), gk20a_readl(g, ltc_ltcs_ltss_cbc_ctrl1_r()) | hw_op); for (ltc = 0; ltc < g->ltc_count; ltc++) { for (slice = 0; slice < slices_per_ltc; slice++) { ctrl1 = ltc_ltc0_lts0_cbc_ctrl1_r() + ltc * proj_ltc_stride_v() + slice * proj_lts_stride_v(); retry = 200; do { val = gk20a_readl(g, ctrl1); if (!(val & hw_op)) break; retry--; udelay(5); } while (retry >= 0 || !tegra_platform_is_silicon()); if (retry < 0 && tegra_platform_is_silicon()) { gk20a_err(dev_from_gk20a(g), "comp tag clear timeout\n"); err = -EBUSY; goto out; } } } out: trace_gk20a_ltc_cbc_ctrl_done(g->dev->name); mutex_unlock(&g->mm.l2_op_lock); return err; }
/* * Performs a full flush of the L2 cache. */ void gm20b_flush_ltc(struct gk20a *g) { unsigned long timeout; int ltc; #define __timeout_init() \ do { \ timeout = jiffies + HZ; \ } while (0) #define __timeout_check() \ do { \ if (tegra_platform_is_silicon() && \ time_after(jiffies, timeout)) { \ gk20a_err(dev_from_gk20a(g), "L2 flush timeout!"); \ break; \ } \ } while (0) /* Clean... */ gk20a_writel(g, ltc_ltcs_ltss_tstg_cmgmt1_r(), ltc_ltcs_ltss_tstg_cmgmt1_clean_pending_f() | ltc_ltcs_ltss_tstg_cmgmt1_max_cycles_between_cleans_3_f() | ltc_ltcs_ltss_tstg_cmgmt1_clean_wait_for_fb_to_pull_true_f() | ltc_ltcs_ltss_tstg_cmgmt1_clean_evict_last_class_true_f() | ltc_ltcs_ltss_tstg_cmgmt1_clean_evict_normal_class_true_f() | ltc_ltcs_ltss_tstg_cmgmt1_clean_evict_first_class_true_f()); /* Wait on each LTC individually. */ for (ltc = 0; ltc < g->ltc_count; ltc++) { u32 op_pending; __timeout_init(); do { int cmgmt1 = ltc_ltc0_ltss_tstg_cmgmt1_r() + ltc * proj_ltc_stride_v(); op_pending = gk20a_readl(g, cmgmt1); __timeout_check(); } while (op_pending & ltc_ltc0_ltss_tstg_cmgmt1_clean_pending_f()); } /* And invalidate. */ gk20a_writel(g, ltc_ltcs_ltss_tstg_cmgmt0_r(), ltc_ltcs_ltss_tstg_cmgmt0_invalidate_pending_f() | ltc_ltcs_ltss_tstg_cmgmt0_max_cycles_between_invalidates_3_f() | ltc_ltcs_ltss_tstg_cmgmt0_invalidate_evict_last_class_true_f() | ltc_ltcs_ltss_tstg_cmgmt0_invalidate_evict_normal_class_true_f() | ltc_ltcs_ltss_tstg_cmgmt0_invalidate_evict_first_class_true_f()); /* Wait on each LTC individually. */ for (ltc = 0; ltc < g->ltc_count; ltc++) { u32 op_pending; __timeout_init(); do { int cmgmt0 = ltc_ltc0_ltss_tstg_cmgmt0_r() + ltc * proj_ltc_stride_v(); op_pending = gk20a_readl(g, cmgmt0); __timeout_check(); } while (op_pending & ltc_ltc0_ltss_tstg_cmgmt0_invalidate_pending_f()); } }
static int gm20b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr) { /* max memory size (MB) to cover */ u32 max_size = gr->max_comptag_mem; /* one tag line covers 128KB */ u32 max_comptag_lines = max_size << 3; u32 hw_max_comptag_lines = ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_init_v(); u32 cbc_param = gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r()); u32 comptags_per_cacheline = ltc_ltcs_ltss_cbc_param_comptags_per_cache_line_v(cbc_param); u32 cacheline_size = 512 << ltc_ltcs_ltss_cbc_param_cache_line_size_v(cbc_param); u32 slices_per_ltc = ltc_ltcs_ltss_cbc_param_slices_per_ltc_v(cbc_param); u32 compbit_backing_size; int err; gk20a_dbg_fn(""); if (max_comptag_lines == 0) return 0; if (max_comptag_lines > hw_max_comptag_lines) max_comptag_lines = hw_max_comptag_lines; compbit_backing_size = DIV_ROUND_UP(max_comptag_lines, comptags_per_cacheline) * cacheline_size * slices_per_ltc * g->ltc_count; /* aligned to 2KB * ltc_count */ compbit_backing_size += g->ltc_count << ltc_ltcs_ltss_cbc_base_alignment_shift_v(); /* must be a multiple of 64KB */ compbit_backing_size = roundup(compbit_backing_size, 64*1024); max_comptag_lines = (compbit_backing_size * comptags_per_cacheline) / (cacheline_size * slices_per_ltc * g->ltc_count); if (max_comptag_lines > hw_max_comptag_lines) max_comptag_lines = hw_max_comptag_lines; gk20a_dbg_info("compbit backing store size : %d", compbit_backing_size); gk20a_dbg_info("max comptag lines : %d", max_comptag_lines); if (tegra_platform_is_linsim()) err = gk20a_ltc_alloc_phys_cbc(g, compbit_backing_size); else err = gk20a_ltc_alloc_virt_cbc(g, compbit_backing_size); if (err) return err; err = gk20a_comptag_allocator_init(&gr->comp_tags, max_comptag_lines); if (err) return err; gr->comptags_per_cacheline = comptags_per_cacheline; gr->slices_per_ltc = slices_per_ltc; gr->cacheline_size = cacheline_size; return 0; }