static int vgpu_gr_alloc_global_ctx_buffers(struct gk20a *g) { struct gr_gk20a *gr = &g->gr; int attr_buffer_size; u32 cb_buffer_size = gr->bundle_cb_default_size * gr_scc_bundle_cb_size_div_256b_byte_granularity_v(); u32 pagepool_buffer_size = gr_scc_pagepool_total_pages_hwmax_value_v() * gr_scc_pagepool_total_pages_byte_granularity_v(); gk20a_dbg_fn(""); attr_buffer_size = g->ops.gr.calc_global_ctx_buffer_size(g); gk20a_dbg_info("cb_buffer_size : %d", cb_buffer_size); gr->global_ctx_buffer[CIRCULAR].mem.size = cb_buffer_size; gk20a_dbg_info("pagepool_buffer_size : %d", pagepool_buffer_size); gr->global_ctx_buffer[PAGEPOOL].mem.size = pagepool_buffer_size; gk20a_dbg_info("attr_buffer_size : %d", attr_buffer_size); gr->global_ctx_buffer[ATTRIBUTE].mem.size = attr_buffer_size; gk20a_dbg_info("priv access map size : %d", gr->ctx_vars.priv_access_map_size); gr->global_ctx_buffer[PRIV_ACCESS_MAP].mem.size = gr->ctx_vars.priv_access_map_size; return 0; }
void gm20b_ltc_init_fs_state(struct gk20a *g) { u32 reg; gk20a_dbg_info("initialize gm20b l2"); g->max_ltc_count = gk20a_readl(g, top_num_ltcs_r()); g->ltc_count = gk20a_readl(g, pri_ringmaster_enum_ltc_r()); gk20a_dbg_info("%d ltcs out of %d", g->ltc_count, g->max_ltc_count); gk20a_writel(g, ltc_ltcs_ltss_cbc_num_active_ltcs_r(), g->ltc_count); gk20a_writel(g, ltc_ltcs_misc_ltc_num_active_ltcs_r(), g->ltc_count); gk20a_writel(g, ltc_ltcs_ltss_dstg_cfg0_r(), gk20a_readl(g, ltc_ltc0_lts0_dstg_cfg0_r()) | ltc_ltcs_ltss_dstg_cfg0_vdc_4to2_disable_m()); /* Disable LTC interrupts */ reg = gk20a_readl(g, ltc_ltcs_ltss_intr_r()); reg &= ~ltc_ltcs_ltss_intr_en_evicted_cb_m(); reg &= ~ltc_ltcs_ltss_intr_en_illegal_compstat_access_m(); reg &= ~ltc_ltcs_ltss_intr_en_illegal_compstat_m(); gk20a_writel(g, ltc_ltcs_ltss_intr_r(), reg); }
static void fb_gm20b_init_fs_state(struct gk20a *g) { gk20a_dbg_info("initialize gm20b fb"); gk20a_writel(g, fb_fbhub_num_active_ltcs_r(), g->ltc_count); }
/* Flushes the compression bit cache as well as "data". * Note: the name here is a bit of a misnomer. ELPG uses this * internally... but ELPG doesn't have to be on to do it manually. */ static void gk20a_mm_g_elpg_flush_locked(struct gk20a *g) { u32 data; s32 retry = 100; gk20a_dbg_fn(""); /* Make sure all previous writes are committed to the L2. There's no guarantee that writes are to DRAM. This will be a sysmembar internal to the L2. */ gk20a_writel(g, ltc_ltcs_ltss_g_elpg_r(), ltc_ltcs_ltss_g_elpg_flush_pending_f()); do { data = gk20a_readl(g, ltc_ltc0_ltss_g_elpg_r()); if (ltc_ltc0_ltss_g_elpg_flush_v(data) == ltc_ltc0_ltss_g_elpg_flush_pending_v()) { gk20a_dbg_info("g_elpg_flush 0x%x", data); retry--; usleep_range(20, 40); } else break; } while (retry >= 0 || !tegra_platform_is_silicon()); if (retry < 0) gk20a_warn(dev_from_gk20a(g), "g_elpg_flush too many retries"); }
static void gr_gm20b_init_gpc_mmu(struct gk20a *g) { u32 temp; gk20a_dbg_info("initialize gpc mmu"); if (!g->ops.privsecurity) { /* Bypass MMU check for non-secure boot. For * secure-boot,this register write has no-effect */ gk20a_writel(g, fb_priv_mmu_phy_secure_r(), 0xffffffff); } temp = gk20a_readl(g, fb_mmu_ctrl_r()); temp &= gr_gpcs_pri_mmu_ctrl_vm_pg_size_m() | gr_gpcs_pri_mmu_ctrl_use_pdb_big_page_size_m() | gr_gpcs_pri_mmu_ctrl_vol_fault_m() | gr_gpcs_pri_mmu_ctrl_comp_fault_m() | gr_gpcs_pri_mmu_ctrl_miss_gran_m() | gr_gpcs_pri_mmu_ctrl_cache_mode_m() | gr_gpcs_pri_mmu_ctrl_mmu_aperture_m() | gr_gpcs_pri_mmu_ctrl_mmu_vol_m() | gr_gpcs_pri_mmu_ctrl_mmu_disable_m(); gk20a_writel(g, gr_gpcs_pri_mmu_ctrl_r(), temp); gk20a_writel(g, gr_gpcs_pri_mmu_pm_unit_mask_r(), 0); gk20a_writel(g, gr_gpcs_pri_mmu_pm_req_mask_r(), 0); gk20a_writel(g, gr_gpcs_pri_mmu_debug_ctrl_r(), gk20a_readl(g, fb_mmu_debug_ctrl_r())); gk20a_writel(g, gr_gpcs_pri_mmu_debug_wr_r(), gk20a_readl(g, fb_mmu_debug_wr_r())); gk20a_writel(g, gr_gpcs_pri_mmu_debug_rd_r(), gk20a_readl(g, fb_mmu_debug_rd_r())); gk20a_writel(g, gr_gpcs_mmu_num_active_ltcs_r(), gk20a_readl(g, fb_fbhub_num_active_ltcs_r())); }
static void gr_gm20b_commit_global_bundle_cb(struct gk20a *g, struct channel_ctx_gk20a *ch_ctx, u64 addr, u64 size, bool patch) { u32 data; gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_bundle_cb_base_r(), gr_scc_bundle_cb_base_addr_39_8_f(addr), patch); gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_bundle_cb_size_r(), gr_scc_bundle_cb_size_div_256b_f(size) | gr_scc_bundle_cb_size_valid_true_f(), patch); gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_swdx_bundle_cb_base_r(), gr_gpcs_swdx_bundle_cb_base_addr_39_8_f(addr), patch); gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_swdx_bundle_cb_size_r(), gr_gpcs_swdx_bundle_cb_size_div_256b_f(size) | gr_gpcs_swdx_bundle_cb_size_valid_true_f(), patch); /* data for state_limit */ data = (g->gr.bundle_cb_default_size * gr_scc_bundle_cb_size_div_256b_byte_granularity_v()) / gr_pd_ab_dist_cfg2_state_limit_scc_bundle_granularity_v(); data = min_t(u32, data, g->gr.min_gpm_fifo_depth); gk20a_dbg_info("bundle cb token limit : %d, state limit : %d", g->gr.bundle_cb_token_limit, data); gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg2_r(), gr_pd_ab_dist_cfg2_token_limit_f(g->gr.bundle_cb_token_limit) | gr_pd_ab_dist_cfg2_state_limit_f(data), patch); }
void gk20a_priv_ring_isr(struct gk20a *g) { u32 status0, status1; u32 cmd; s32 retry = 100; if (tegra_platform_is_linsim()) return; status0 = gk20a_readl(g, pri_ringmaster_intr_status0_r()); status1 = gk20a_readl(g, pri_ringmaster_intr_status1_r()); gk20a_dbg(gpu_dbg_intr, "ringmaster intr status0: 0x%08x," "status1: 0x%08x", status0, status1); if (status0 & (0x1 | 0x2 | 0x4)) { gk20a_reset_priv_ring(g); } if (status0 & 0x100) { gk20a_dbg(gpu_dbg_intr, "SYS write error. ADR %08x WRDAT %08x INFO %08x, CODE %08x", gk20a_readl(g, 0x122120), gk20a_readl(g, 0x122124), gk20a_readl(g, 0x122128), gk20a_readl(g, 0x12212c)); } if (status1 & 0x1) { gk20a_dbg(gpu_dbg_intr, "GPC write error. ADR %08x WRDAT %08x INFO %08x, CODE %08x", gk20a_readl(g, 0x128120), gk20a_readl(g, 0x128124), gk20a_readl(g, 0x128128), gk20a_readl(g, 0x12812c)); } cmd = gk20a_readl(g, pri_ringmaster_command_r()); cmd = set_field(cmd, pri_ringmaster_command_cmd_m(), pri_ringmaster_command_cmd_ack_interrupt_f()); gk20a_writel(g, pri_ringmaster_command_r(), cmd); do { cmd = pri_ringmaster_command_cmd_v( gk20a_readl(g, pri_ringmaster_command_r())); usleep_range(20, 40); } while (cmd != pri_ringmaster_command_cmd_no_cmd_v() && --retry); if (retry <= 0) gk20a_warn(dev_from_gk20a(g), "priv ringmaster cmd ack too many retries"); status0 = gk20a_readl(g, pri_ringmaster_intr_status0_r()); status1 = gk20a_readl(g, pri_ringmaster_intr_status1_r()); gk20a_dbg_info("ringmaster intr status0: 0x%08x," " status1: 0x%08x", status0, status1); }
void gk20a_priv_ring_isr(struct gk20a *g) { u32 status0, status1; u32 cmd; s32 retry = 100; status0 = gk20a_readl(g, pri_ringmaster_intr_status0_r()); status1 = gk20a_readl(g, pri_ringmaster_intr_status1_r()); gk20a_dbg_info("ringmaster intr status0: 0x%08x," "status1: 0x%08x", status0, status1); if (status0 & (0x1 | 0x2 | 0x4)) { gk20a_reset_priv_ring(g); } cmd = gk20a_readl(g, pri_ringmaster_command_r()); cmd = set_field(cmd, pri_ringmaster_command_cmd_m(), pri_ringmaster_command_cmd_ack_interrupt_f()); gk20a_writel(g, pri_ringmaster_command_r(), cmd); do { cmd = pri_ringmaster_command_cmd_v( gk20a_readl(g, pri_ringmaster_command_r())); usleep_range(20, 40); } while (cmd != pri_ringmaster_command_cmd_no_cmd_v() && --retry); if (retry <= 0) gk20a_warn(dev_from_gk20a(g), "priv ringmaster cmd ack too many retries"); status0 = gk20a_readl(g, pri_ringmaster_intr_status0_r()); status1 = gk20a_readl(g, pri_ringmaster_intr_status1_r()); gk20a_dbg_info("ringmaster intr status0: 0x%08x," " status1: 0x%08x", status0, status1); }
int gpu_init_hal(struct gk20a *g) { u32 ver = g->gpu_characteristics.arch + g->gpu_characteristics.impl; switch (ver) { case GK20A_GPUID_GK20A: gk20a_dbg_info("gk20a detected"); gk20a_init_hal(g); break; default: gk20a_err(&g->dev->dev, "no support for %x", ver); return -ENODEV; } return 0; }
void gm20b_ltc_g_elpg_flush_locked(struct gk20a *g) { u32 data; bool done[g->ltc_count]; s32 retry = 100; int i; int num_done = 0; u32 ltc_d = ltc_ltc1_ltss_g_elpg_r() - ltc_ltc0_ltss_g_elpg_r(); gk20a_dbg_fn(""); trace_gk20a_mm_g_elpg_flush_locked(g->dev->name); for (i = 0; i < g->ltc_count; i++) done[i] = 0; gk20a_writel(g, ltc_ltcs_ltss_g_elpg_r(), ltc_ltcs_ltss_g_elpg_flush_pending_f()); do { for (i = 0; i < g->ltc_count; i++) { if (done[i]) continue; data = gk20a_readl(g, ltc_ltc0_ltss_g_elpg_r() + ltc_d * i); if (ltc_ltc0_ltss_g_elpg_flush_v(data)) { gk20a_dbg_info("g_elpg_flush 0x%x", data); } else { done[i] = 1; num_done++; } } if (num_done < g->ltc_count) { retry--; udelay(5); } else break; } while (retry >= 0 || !tegra_platform_is_silicon()); if (retry < 0 && tegra_platform_is_silicon()) gk20a_warn(dev_from_gk20a(g), "g_elpg_flush too many retries"); trace_gk20a_mm_g_elpg_flush_locked_done(g->dev->name); }
static int gm20b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr) { /* max memory size (MB) to cover */ u32 max_size = gr->max_comptag_mem; /* one tag line covers 128KB */ u32 max_comptag_lines = max_size << 3; u32 hw_max_comptag_lines = ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_init_v(); u32 cbc_param = gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r()); u32 comptags_per_cacheline = ltc_ltcs_ltss_cbc_param_comptags_per_cache_line_v(cbc_param); u32 cacheline_size = 512 << ltc_ltcs_ltss_cbc_param_cache_line_size_v(cbc_param); u32 slices_per_ltc = ltc_ltcs_ltss_cbc_param_slices_per_ltc_v(cbc_param); u32 compbit_backing_size; int err; gk20a_dbg_fn(""); if (max_comptag_lines == 0) return 0; if (max_comptag_lines > hw_max_comptag_lines) max_comptag_lines = hw_max_comptag_lines; compbit_backing_size = DIV_ROUND_UP(max_comptag_lines, comptags_per_cacheline) * cacheline_size * slices_per_ltc * g->ltc_count; /* aligned to 2KB * ltc_count */ compbit_backing_size += g->ltc_count << ltc_ltcs_ltss_cbc_base_alignment_shift_v(); /* must be a multiple of 64KB */ compbit_backing_size = roundup(compbit_backing_size, 64*1024); max_comptag_lines = (compbit_backing_size * comptags_per_cacheline) / (cacheline_size * slices_per_ltc * g->ltc_count); if (max_comptag_lines > hw_max_comptag_lines) max_comptag_lines = hw_max_comptag_lines; gk20a_dbg_info("compbit backing store size : %d", compbit_backing_size); gk20a_dbg_info("max comptag lines : %d", max_comptag_lines); if (tegra_platform_is_linsim()) err = gk20a_ltc_alloc_phys_cbc(g, compbit_backing_size); else err = gk20a_ltc_alloc_virt_cbc(g, compbit_backing_size); if (err) return err; err = gk20a_comptag_allocator_init(&gr->comp_tags, max_comptag_lines); if (err) return err; gr->comptags_per_cacheline = comptags_per_cacheline; gr->slices_per_ltc = slices_per_ltc; gr->cacheline_size = cacheline_size; return 0; }
/* Calculate and update M/N/PL as well as pll->freq ref_clk_f = clk_in_f / src_div = clk_in_f; (src_div = 1 on gk20a) u_f = ref_clk_f / M; PLL output = vco_f = u_f * N = ref_clk_f * N / M; gpc2clk = target clock frequency = vco_f / PL; gpcclk = gpc2clk / 2; */ static int clk_config_pll(struct clk_gk20a *clk, struct pll *pll, struct pll_parms *pll_params, u32 *target_freq, bool best_fit) { u32 min_vco_f, max_vco_f; u32 best_M, best_N; u32 low_PL, high_PL, best_PL; u32 m, n, n2; u32 target_vco_f, vco_f; u32 ref_clk_f, target_clk_f, u_f; u32 delta, lwv, best_delta = ~0; int pl; BUG_ON(target_freq == NULL); gk20a_dbg_fn("request target freq %d MHz", *target_freq); ref_clk_f = pll->clk_in; target_clk_f = *target_freq; max_vco_f = pll_params->max_vco; min_vco_f = pll_params->min_vco; best_M = pll_params->max_M; best_N = pll_params->min_N; best_PL = pll_params->min_PL; target_vco_f = target_clk_f + target_clk_f / 50; if (max_vco_f < target_vco_f) max_vco_f = target_vco_f; high_PL = (max_vco_f + target_vco_f - 1) / target_vco_f; high_PL = min(high_PL, pll_params->max_PL); high_PL = max(high_PL, pll_params->min_PL); low_PL = min_vco_f / target_vco_f; low_PL = min(low_PL, pll_params->max_PL); low_PL = max(low_PL, pll_params->min_PL); /* Find Indices of high_PL and low_PL */ for (pl = 0; pl < 14; pl++) { if (pl_to_div[pl] >= low_PL) { low_PL = pl; break; } } for (pl = 0; pl < 14; pl++) { if (pl_to_div[pl] >= high_PL) { high_PL = pl; break; } } gk20a_dbg_info("low_PL %d(div%d), high_PL %d(div%d)", low_PL, pl_to_div[low_PL], high_PL, pl_to_div[high_PL]); for (pl = low_PL; pl <= high_PL; pl++) { target_vco_f = target_clk_f * pl_to_div[pl]; for (m = pll_params->min_M; m <= pll_params->max_M; m++) { u_f = ref_clk_f / m; if (u_f < pll_params->min_u) break; if (u_f > pll_params->max_u) continue; n = (target_vco_f * m) / ref_clk_f; n2 = ((target_vco_f * m) + (ref_clk_f - 1)) / ref_clk_f; if (n > pll_params->max_N) break; for (; n <= n2; n++) { if (n < pll_params->min_N) continue; if (n > pll_params->max_N) break; vco_f = ref_clk_f * n / m; if (vco_f >= min_vco_f && vco_f <= max_vco_f) { lwv = (vco_f + (pl_to_div[pl] / 2)) / pl_to_div[pl]; delta = abs(lwv - target_clk_f); if (delta < best_delta) { best_delta = delta; best_M = m; best_N = n; best_PL = pl; if (best_delta == 0 || /* 0.45% for non best fit */ (!best_fit && (vco_f / best_delta > 218))) { goto found_match; } gk20a_dbg_info("delta %d @ M %d, N %d, PL %d", delta, m, n, pl); } } } } } found_match: BUG_ON(best_delta == ~0); if (best_fit && best_delta != 0) gk20a_dbg_clk("no best match for target @ %dMHz on gpc_pll", target_clk_f); pll->M = best_M; pll->N = best_N; pll->PL = best_PL; /* save current frequency */ pll->freq = ref_clk_f * pll->N / (pll->M * pl_to_div[pll->PL]); *target_freq = pll->freq; gk20a_dbg_clk("actual target freq %d MHz, M %d, N %d, PL %d(div%d)", *target_freq, pll->M, pll->N, pll->PL, pl_to_div[pll->PL]); gk20a_dbg_fn("done"); return 0; }
static int gk20a_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr) { /* max memory size (MB) to cover */ u32 max_size = gr->max_comptag_mem; /* one tag line covers 128KB */ u32 max_comptag_lines = max_size << 3; u32 hw_max_comptag_lines = ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_init_v(); u32 cbc_param = gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r()); u32 comptags_per_cacheline = ltc_ltcs_ltss_cbc_param_comptags_per_cache_line_v(cbc_param); u32 slices_per_fbp = ltc_ltcs_ltss_cbc_param_slices_per_fbp_v(cbc_param); u32 cacheline_size = 512 << ltc_ltcs_ltss_cbc_param_cache_line_size_v(cbc_param); u32 compbit_backing_size; int err; gk20a_dbg_fn(""); if (max_comptag_lines == 0) { gr->compbit_store.size = 0; return 0; } if (max_comptag_lines > hw_max_comptag_lines) max_comptag_lines = hw_max_comptag_lines; /* no hybird fb */ compbit_backing_size = DIV_ROUND_UP(max_comptag_lines, comptags_per_cacheline) * cacheline_size * slices_per_fbp * gr->num_fbps; /* aligned to 2KB * num_fbps */ compbit_backing_size += gr->num_fbps << ltc_ltcs_ltss_cbc_base_alignment_shift_v(); /* must be a multiple of 64KB */ compbit_backing_size = roundup(compbit_backing_size, 64*1024); max_comptag_lines = (compbit_backing_size * comptags_per_cacheline) / cacheline_size * slices_per_fbp * gr->num_fbps; if (max_comptag_lines > hw_max_comptag_lines) max_comptag_lines = hw_max_comptag_lines; gk20a_dbg_info("compbit backing store size : %d", compbit_backing_size); gk20a_dbg_info("max comptag lines : %d", max_comptag_lines); if (IS_ENABLED(CONFIG_GK20A_PHYS_PAGE_TABLES)) err = gk20a_ltc_alloc_phys_cbc(g, compbit_backing_size); else err = gk20a_ltc_alloc_virt_cbc(g, compbit_backing_size); if (err) return err; gk20a_allocator_init(&gr->comp_tags, "comptag", 1, /* start */ max_comptag_lines - 1, /* length*/ 1); /* align */ gr->comptags_per_cacheline = comptags_per_cacheline; gr->slices_per_fbp = slices_per_fbp; gr->cacheline_size = cacheline_size; return 0; }
static void gk20a_ltc_init_fs_state(struct gk20a *g) { gk20a_dbg_info("initialize gk20a L2"); g->max_ltc_count = g->ltc_count = 1; }