static int gk20a_ltc_alloc_virt_cbc(struct gk20a *g, size_t compbit_backing_size) { struct device *d = dev_from_gk20a(g); struct gr_gk20a *gr = &g->gr; DEFINE_DMA_ATTRS(attrs); dma_addr_t iova; int err; dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs); gr->compbit_store.pages = dma_alloc_attrs(d, compbit_backing_size, &iova, GFP_KERNEL, &attrs); if (!gr->compbit_store.pages) { gk20a_err(dev_from_gk20a(g), "failed to allocate backing store for compbit : size %zu", compbit_backing_size); return -ENOMEM; } gr->compbit_store.base_iova = iova; gr->compbit_store.size = compbit_backing_size; err = gk20a_get_sgtable_from_pages(d, &gr->compbit_store.sgt, gr->compbit_store.pages, iova, compbit_backing_size); if (err) { gk20a_err(dev_from_gk20a(g), "failed to allocate sgt for backing store"); return err; } return 0; }
static void gm20b_fb_dump_vpr_wpr_info(struct gk20a *g) { u32 val; /* print vpr and wpr info */ val = gk20a_readl(g, fb_mmu_vpr_info_r()); val &= ~0x3; val |= fb_mmu_vpr_info_index_addr_lo_v(); gk20a_writel(g, fb_mmu_vpr_info_r(), val); gk20a_err(dev_from_gk20a(g), "VPR: %08x %08x %08x %08x", gk20a_readl(g, fb_mmu_vpr_info_r()), gk20a_readl(g, fb_mmu_vpr_info_r()), gk20a_readl(g, fb_mmu_vpr_info_r()), gk20a_readl(g, fb_mmu_vpr_info_r())); val = gk20a_readl(g, fb_mmu_wpr_info_r()); val &= ~0xf; val |= (fb_mmu_wpr_info_index_allow_read_v()); gk20a_writel(g, fb_mmu_wpr_info_r(), val); gk20a_err(dev_from_gk20a(g), "WPR: %08x %08x %08x %08x %08x %08x", gk20a_readl(g, fb_mmu_wpr_info_r()), gk20a_readl(g, fb_mmu_wpr_info_r()), gk20a_readl(g, fb_mmu_wpr_info_r()), gk20a_readl(g, fb_mmu_wpr_info_r()), gk20a_readl(g, fb_mmu_wpr_info_r()), gk20a_readl(g, fb_mmu_wpr_info_r())); }
static void vgpu_gr_detect_sm_arch(struct gk20a *g) { struct gk20a_platform *platform = gk20a_get_platform(g->dev); u32 v = 0, raw_version, version = 0; gk20a_dbg_fn(""); if (vgpu_get_attribute(platform->virt_handle, TEGRA_VGPU_ATTRIB_GPC0_TPC0_SM_ARCH, &v)) gk20a_err(dev_from_gk20a(g), "failed to retrieve SM arch"); raw_version = gr_gpc0_tpc0_sm_arch_spa_version_v(v); if (raw_version == gr_gpc0_tpc0_sm_arch_spa_version_smkepler_lp_v()) version = 0x320; /* SM 3.2 */ else gk20a_err(dev_from_gk20a(g), "Unknown SM version 0x%x", raw_version); /* on Kepler, SM version == SPA version */ g->gpu_characteristics.sm_arch_spa_version = version; g->gpu_characteristics.sm_arch_sm_version = version; g->gpu_characteristics.sm_arch_warp_count = gr_gpc0_tpc0_sm_arch_warp_count_v(v); }
u32 gm20b_ltc_cbc_fix_config(struct gk20a *g, int base) { u32 val = gk20a_readl(g, ltc_ltcs_ltss_cbc_num_active_ltcs_r()); if (val == 2) { return base * 2; } else if (val != 1) { gk20a_err(dev_from_gk20a(g), "Invalid number of active ltcs: %08x\n", val); } return base; }
int vgpu_gr_isr(struct gk20a *g, struct tegra_vgpu_gr_intr_info *info) { struct fifo_gk20a *f = &g->fifo; struct channel_gk20a *ch = &f->channel[info->chid]; gk20a_dbg_fn(""); if (info->type != TEGRA_VGPU_GR_INTR_NOTIFY && info->type != TEGRA_VGPU_GR_INTR_SEMAPHORE) gk20a_err(dev_from_gk20a(g), "gr intr (%d) on ch %u", info->type, info->chid); switch (info->type) { case TEGRA_VGPU_GR_INTR_NOTIFY: wake_up(&ch->notifier_wq); break; case TEGRA_VGPU_GR_INTR_SEMAPHORE: gk20a_channel_event(ch); wake_up(&ch->semaphore_wq); break; case TEGRA_VGPU_GR_INTR_SEMAPHORE_TIMEOUT: gk20a_set_error_notifier(ch, NVGPU_CHANNEL_GR_SEMAPHORE_TIMEOUT); break; case TEGRA_VGPU_GR_INTR_ILLEGAL_NOTIFY: gk20a_set_error_notifier(ch, NVGPU_CHANNEL_GR_ILLEGAL_NOTIFY); case TEGRA_VGPU_GR_INTR_ILLEGAL_METHOD: break; case TEGRA_VGPU_GR_INTR_ILLEGAL_CLASS: gk20a_set_error_notifier(ch, NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY); break; case TEGRA_VGPU_GR_INTR_FECS_ERROR: break; case TEGRA_VGPU_GR_INTR_CLASS_ERROR: gk20a_set_error_notifier(ch, NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY); break; case TEGRA_VGPU_GR_INTR_FIRMWARE_METHOD: gk20a_set_error_notifier(ch, NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY); break; case TEGRA_VGPU_GR_INTR_EXCEPTION: gk20a_set_error_notifier(ch, NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY); break; default: WARN_ON(1); break; } return 0; }
static u32 vgpu_gr_get_fbp_en_mask(struct gk20a *g) { struct gk20a_platform *platform = gk20a_get_platform(g->dev); u32 fbp_en_mask = 0; gk20a_dbg_fn(""); if (vgpu_get_attribute(platform->virt_handle, TEGRA_VGPU_ATTRIB_FBP_EN_MASK, &fbp_en_mask)) gk20a_err(dev_from_gk20a(g), "failed to retrieve fbp en mask"); return fbp_en_mask; }
static u32 vgpu_gr_get_max_fbps_count(struct gk20a *g) { struct gk20a_platform *platform = gk20a_get_platform(g->dev); u32 max_fbps_count = 0; gk20a_dbg_fn(""); if (vgpu_get_attribute(platform->virt_handle, TEGRA_VGPU_ATTRIB_NUM_FBPS, &max_fbps_count)) gk20a_err(dev_from_gk20a(g), "failed to retrieve num fbps"); return max_fbps_count; }
int gpu_init_hal(struct gk20a *g) { u32 ver = g->gpu_characteristics.arch + g->gpu_characteristics.impl; switch (ver) { case GK20A_GPUID_GK20A: gk20a_dbg_info("gk20a detected"); gk20a_init_hal(g); break; default: gk20a_err(&g->dev->dev, "no support for %x", ver); return -ENODEV; } return 0; }
void gm20b_ltc_isr(struct gk20a *g) { u32 mc_intr, ltc_intr; int ltc, slice; mc_intr = gk20a_readl(g, mc_intr_ltc_r()); gk20a_err(dev_from_gk20a(g), "mc_ltc_intr: %08x", mc_intr); for (ltc = 0; ltc < g->ltc_count; ltc++) { if ((mc_intr & 1 << ltc) == 0) continue; for (slice = 0; slice < g->gr.slices_per_ltc; slice++) { ltc_intr = gk20a_readl(g, ltc_ltc0_lts0_intr_r() + proj_ltc_stride_v() * ltc + proj_lts_stride_v() * slice); gk20a_err(dev_from_gk20a(g), "ltc%d, slice %d: %08x", ltc, slice, ltc_intr); gk20a_writel(g, ltc_ltc0_lts0_intr_r() + proj_ltc_stride_v() * ltc + proj_lts_stride_v() * slice, ltc_intr); } } }
static int gk20a_init_clk_setup_sw(struct gk20a *g) { struct clk_gk20a *clk = &g->clk; static int initialized; struct clk *ref; unsigned long ref_rate; gk20a_dbg_fn(""); if (clk->sw_ready) { gk20a_dbg_fn("skip init"); return 0; } if (!gk20a_clk_get(g)) return -EINVAL; ref = clk_get_parent(clk_get_parent(clk->tegra_clk)); if (IS_ERR(ref)) { gk20a_err(dev_from_gk20a(g), "failed to get GPCPLL reference clock"); return -EINVAL; } ref_rate = clk_get_rate(ref); clk->pll_delay = 300; /* usec */ clk->gpc_pll.id = GK20A_GPC_PLL; clk->gpc_pll.clk_in = ref_rate / KHZ; /* Decide initial frequency */ if (!initialized) { initialized = 1; clk->gpc_pll.M = 1; clk->gpc_pll.N = DIV_ROUND_UP(gpc_pll_params.min_vco, clk->gpc_pll.clk_in); clk->gpc_pll.PL = 1; clk->gpc_pll.freq = clk->gpc_pll.clk_in * clk->gpc_pll.N; clk->gpc_pll.freq /= pl_to_div[clk->gpc_pll.PL]; } mutex_init(&clk->clk_mutex); clk->sw_ready = true; gk20a_dbg_fn("done"); return 0; }
static int vgpu_gr_init_gr_setup_sw(struct gk20a *g) { struct gr_gk20a *gr = &g->gr; int err; gk20a_dbg_fn(""); if (gr->sw_ready) { gk20a_dbg_fn("skip init"); return 0; } gr->g = g; err = vgpu_gr_init_gr_config(g, gr); if (err) goto clean_up; err = vgpu_gr_init_ctx_state(g, gr); if (err) goto clean_up; err = g->ops.ltc.init_comptags(g, gr); if (err) goto clean_up; err = vgpu_gr_alloc_global_ctx_buffers(g); if (err) goto clean_up; mutex_init(&gr->ctx_mutex); gr->remove_support = vgpu_remove_gr_support; gr->sw_ready = true; gk20a_dbg_fn("done"); return 0; clean_up: gk20a_err(dev_from_gk20a(g), "fail"); vgpu_remove_gr_support(gr); return err; }
struct clk *gk20a_clk_get(struct gk20a *g) { if (!g->clk.tegra_clk) { struct clk *clk; char clk_dev_id[32]; struct device *dev = dev_from_gk20a(g); snprintf(clk_dev_id, 32, "tegra_%s", dev_name(dev)); clk = clk_get_sys(clk_dev_id, "gpu"); if (IS_ERR(clk)) { gk20a_err(dev, "fail to get tegra gpu clk %s/gpu\n", clk_dev_id); return NULL; } g->clk.tegra_clk = clk; } return g->clk.tegra_clk; }
static int set_pll_target(struct gk20a *g, u32 freq, u32 old_freq) { struct clk_gk20a *clk = &g->clk; if (freq > gpc_pll_params.max_freq) freq = gpc_pll_params.max_freq; else if (freq < gpc_pll_params.min_freq) freq = gpc_pll_params.min_freq; if (freq != old_freq) { /* gpc_pll.freq is changed to new value here */ if (clk_config_pll(clk, &clk->gpc_pll, &gpc_pll_params, &freq, true)) { gk20a_err(dev_from_gk20a(g), "failed to set pll target for %d", freq); return -EINVAL; } } return 0; }
static int vgpu_gr_init_gr_config(struct gk20a *g, struct gr_gk20a *gr) { struct gk20a_platform *platform = gk20a_get_platform(g->dev); u32 gpc_index; gk20a_dbg_fn(""); if (vgpu_get_attribute(platform->virt_handle, TEGRA_VGPU_ATTRIB_GPC_COUNT, &gr->gpc_count)) return -ENOMEM; if (vgpu_get_attribute(platform->virt_handle, TEGRA_VGPU_ATTRIB_MAX_TPC_PER_GPC_COUNT, &gr->max_tpc_per_gpc_count)) return -ENOMEM; if (vgpu_get_attribute(platform->virt_handle, TEGRA_VGPU_ATTRIB_MAX_TPC_COUNT, &gr->max_tpc_count)) return -ENOMEM; gr->gpc_tpc_mask = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL); if (!gr->gpc_tpc_mask) { gk20a_err(dev_from_gk20a(g), "%s: out of memory\n", __func__); return -ENOMEM; } for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { if (g->ops.gr.get_gpc_tpc_mask) gr->gpc_tpc_mask[gpc_index] = g->ops.gr.get_gpc_tpc_mask(g, gpc_index); } g->ops.gr.bundle_cb_defaults(g); g->ops.gr.cb_size_default(g); g->ops.gr.calc_global_ctx_buffer_size(g); return 0; }
static int set_pll_freq(struct gk20a *g, u32 freq, u32 old_freq) { struct clk_gk20a *clk = &g->clk; int err = 0; gk20a_dbg_fn("curr freq: %dMHz, target freq %dMHz", old_freq, freq); if ((freq == old_freq) && clk->gpc_pll.enabled) return 0; /* change frequency only if power is on */ if (g->clk.clk_hw_on) { err = clk_program_gpc_pll(g, clk, 1); if (err) err = clk_program_gpc_pll(g, clk, 0); } /* Just report error but not restore PLL since dvfs could already change voltage even when it returns error. */ if (err) gk20a_err(dev_from_gk20a(g), "failed to set pll to %d", freq); return err; }
int gm20b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op, u32 min, u32 max) { int err = 0; struct gr_gk20a *gr = &g->gr; u32 ltc, slice, ctrl1, val, hw_op = 0; s32 retry = 200; u32 slices_per_ltc = ltc_ltcs_ltss_cbc_param_slices_per_ltc_v( gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r())); gk20a_dbg_fn(""); trace_gk20a_ltc_cbc_ctrl_start(g->dev->name, op, min, max); if (gr->compbit_store.mem.size == 0) return 0; mutex_lock(&g->mm.l2_op_lock); if (op == gk20a_cbc_op_clear) { gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl2_r(), ltc_ltcs_ltss_cbc_ctrl2_clear_lower_bound_f(min)); gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl3_r(), ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_f(max)); hw_op = ltc_ltcs_ltss_cbc_ctrl1_clear_active_f(); } else if (op == gk20a_cbc_op_clean) { hw_op = ltc_ltcs_ltss_cbc_ctrl1_clean_active_f(); } else if (op == gk20a_cbc_op_invalidate) { hw_op = ltc_ltcs_ltss_cbc_ctrl1_invalidate_active_f(); } else { BUG_ON(1); } gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl1_r(), gk20a_readl(g, ltc_ltcs_ltss_cbc_ctrl1_r()) | hw_op); for (ltc = 0; ltc < g->ltc_count; ltc++) { for (slice = 0; slice < slices_per_ltc; slice++) { ctrl1 = ltc_ltc0_lts0_cbc_ctrl1_r() + ltc * proj_ltc_stride_v() + slice * proj_lts_stride_v(); retry = 200; do { val = gk20a_readl(g, ctrl1); if (!(val & hw_op)) break; retry--; udelay(5); } while (retry >= 0 || !tegra_platform_is_silicon()); if (retry < 0 && tegra_platform_is_silicon()) { gk20a_err(dev_from_gk20a(g), "comp tag clear timeout\n"); err = -EBUSY; goto out; } } } out: trace_gk20a_ltc_cbc_ctrl_done(g->dev->name); mutex_unlock(&g->mm.l2_op_lock); return err; }
static int gr_gm20b_load_ctxsw_ucode(struct gk20a *g) { u32 err, flags; u32 reg_offset = gr_gpcs_gpccs_falcon_hwcfg_r() - gr_fecs_falcon_hwcfg_r(); gk20a_dbg_fn(""); if (tegra_platform_is_linsim()) { gk20a_writel(g, gr_fecs_ctxsw_mailbox_r(7), gr_fecs_ctxsw_mailbox_value_f(0xc0de7777)); gk20a_writel(g, gr_gpccs_ctxsw_mailbox_r(7), gr_gpccs_ctxsw_mailbox_value_f(0xc0de7777)); } flags = PMU_ACR_CMD_BOOTSTRAP_FALCON_FLAGS_RESET_YES; g->ops.pmu.lsfloadedfalconid = 0; if (g->ops.pmu.fecsbootstrapdone) { /* this must be recovery so bootstrap fecs and gpccs */ if (!g->ops.securegpccs) { gr_gm20b_load_gpccs_with_bootloader(g); err = g->ops.pmu.load_lsfalcon_ucode(g, (1 << LSF_FALCON_ID_FECS)); } else { /* bind WPR VA inst block */ gr_gk20a_load_falcon_bind_instblk(g); err = g->ops.pmu.load_lsfalcon_ucode(g, (1 << LSF_FALCON_ID_FECS) | (1 << LSF_FALCON_ID_GPCCS)); } if (err) { gk20a_err(dev_from_gk20a(g), "Unable to recover GR falcon"); return err; } } else { /* cold boot or rg exit */ g->ops.pmu.fecsbootstrapdone = true; if (!g->ops.securegpccs) { gr_gm20b_load_gpccs_with_bootloader(g); } else { /* bind WPR VA inst block */ gr_gk20a_load_falcon_bind_instblk(g); err = g->ops.pmu.load_lsfalcon_ucode(g, (1 << LSF_FALCON_ID_GPCCS)); if (err) { gk20a_err(dev_from_gk20a(g), "Unable to boot GPCCS\n"); return err; } } } /*start gpccs */ if (g->ops.securegpccs) { gk20a_writel(g, reg_offset + gr_fecs_cpuctl_alias_r(), gr_gpccs_cpuctl_startcpu_f(1)); } else { gk20a_writel(g, gr_gpccs_dmactl_r(), gr_gpccs_dmactl_require_ctx_f(0)); gk20a_writel(g, gr_gpccs_cpuctl_r(), gr_gpccs_cpuctl_startcpu_f(1)); } /* start fecs */ gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0), ~0x0); gk20a_writel(g, gr_fecs_ctxsw_mailbox_r(1), 0x1); gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(6), 0xffffffff); gk20a_writel(g, gr_fecs_cpuctl_alias_r(), gr_fecs_cpuctl_startcpu_f(1)); gk20a_dbg_fn("done"); return 0; }
int gr_gk20a_init_ctx_vars_sim(struct gk20a *g, struct gr_gk20a *gr) { int err = 0; u32 i, temp; char *size_path = NULL; char *reg_path = NULL; char *value_path = NULL; gk20a_dbg(gpu_dbg_fn | gpu_dbg_info, "querying grctx info from chiplib"); g->gr.ctx_vars.dynamic = true; g->gr.netlist = GR_NETLIST_DYNAMIC; /* query sizes and counts */ gk20a_sim_esc_readl(g, "GRCTX_UCODE_INST_FECS_COUNT", 0, &g->gr.ctx_vars.ucode.fecs.inst.count); gk20a_sim_esc_readl(g, "GRCTX_UCODE_DATA_FECS_COUNT", 0, &g->gr.ctx_vars.ucode.fecs.data.count); gk20a_sim_esc_readl(g, "GRCTX_UCODE_INST_GPCCS_COUNT", 0, &g->gr.ctx_vars.ucode.gpccs.inst.count); gk20a_sim_esc_readl(g, "GRCTX_UCODE_DATA_GPCCS_COUNT", 0, &g->gr.ctx_vars.ucode.gpccs.data.count); gk20a_sim_esc_readl(g, "GRCTX_ALL_CTX_TOTAL_WORDS", 0, &temp); g->gr.ctx_vars.buffer_size = temp << 2; gk20a_sim_esc_readl(g, "GRCTX_SW_BUNDLE_INIT_SIZE", 0, &g->gr.ctx_vars.sw_bundle_init.count); gk20a_sim_esc_readl(g, "GRCTX_SW_METHOD_INIT_SIZE", 0, &g->gr.ctx_vars.sw_method_init.count); gk20a_sim_esc_readl(g, "GRCTX_SW_CTX_LOAD_SIZE", 0, &g->gr.ctx_vars.sw_ctx_load.count); switch (0) { /*g->gr.ctx_vars.reg_init_override)*/ #if 0 case NV_REG_STR_RM_GR_REG_INIT_OVERRIDE_PROD_DIFF: sizePath = "GRCTX_NONCTXSW_PROD_DIFF_REG_SIZE"; regPath = "GRCTX_NONCTXSW_PROD_DIFF_REG:REG"; valuePath = "GRCTX_NONCTXSW_PROD_DIFF_REG:VALUE"; break; #endif default: size_path = "GRCTX_NONCTXSW_REG_SIZE"; reg_path = "GRCTX_NONCTXSW_REG:REG"; value_path = "GRCTX_NONCTXSW_REG:VALUE"; break; } gk20a_sim_esc_readl(g, size_path, 0, &g->gr.ctx_vars.sw_non_ctx_load.count); gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_SYS_COUNT", 0, &g->gr.ctx_vars.ctxsw_regs.sys.count); gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_GPC_COUNT", 0, &g->gr.ctx_vars.ctxsw_regs.gpc.count); gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_TPC_COUNT", 0, &g->gr.ctx_vars.ctxsw_regs.tpc.count); #if 0 /* looks to be unused, actually chokes the sim */ gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PPC_COUNT", 0, &g->gr.ctx_vars.ctxsw_regs.ppc.count); #endif gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_ZCULL_GPC_COUNT", 0, &g->gr.ctx_vars.ctxsw_regs.zcull_gpc.count); gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_SYS_COUNT", 0, &g->gr.ctx_vars.ctxsw_regs.pm_sys.count); gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_GPC_COUNT", 0, &g->gr.ctx_vars.ctxsw_regs.pm_gpc.count); gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_TPC_COUNT", 0, &g->gr.ctx_vars.ctxsw_regs.pm_tpc.count); err |= !alloc_u32_list_gk20a(&g->gr.ctx_vars.ucode.fecs.inst); err |= !alloc_u32_list_gk20a(&g->gr.ctx_vars.ucode.fecs.data); err |= !alloc_u32_list_gk20a(&g->gr.ctx_vars.ucode.gpccs.inst); err |= !alloc_u32_list_gk20a(&g->gr.ctx_vars.ucode.gpccs.data); err |= !alloc_av_list_gk20a(&g->gr.ctx_vars.sw_bundle_init); err |= !alloc_av_list_gk20a(&g->gr.ctx_vars.sw_method_init); err |= !alloc_aiv_list_gk20a(&g->gr.ctx_vars.sw_ctx_load); err |= !alloc_av_list_gk20a(&g->gr.ctx_vars.sw_non_ctx_load); err |= !alloc_aiv_list_gk20a(&g->gr.ctx_vars.ctxsw_regs.sys); err |= !alloc_aiv_list_gk20a(&g->gr.ctx_vars.ctxsw_regs.gpc); err |= !alloc_aiv_list_gk20a(&g->gr.ctx_vars.ctxsw_regs.tpc); err |= !alloc_aiv_list_gk20a(&g->gr.ctx_vars.ctxsw_regs.zcull_gpc); err |= !alloc_aiv_list_gk20a(&g->gr.ctx_vars.ctxsw_regs.ppc); err |= !alloc_aiv_list_gk20a(&g->gr.ctx_vars.ctxsw_regs.pm_sys); err |= !alloc_aiv_list_gk20a(&g->gr.ctx_vars.ctxsw_regs.pm_gpc); err |= !alloc_aiv_list_gk20a(&g->gr.ctx_vars.ctxsw_regs.pm_tpc); if (err) goto fail; for (i = 0; i < g->gr.ctx_vars.ucode.fecs.inst.count; i++) gk20a_sim_esc_readl(g, "GRCTX_UCODE_INST_FECS", i, &g->gr.ctx_vars.ucode.fecs.inst.l[i]); for (i = 0; i < g->gr.ctx_vars.ucode.fecs.data.count; i++) gk20a_sim_esc_readl(g, "GRCTX_UCODE_DATA_FECS", i, &g->gr.ctx_vars.ucode.fecs.data.l[i]); for (i = 0; i < g->gr.ctx_vars.ucode.gpccs.inst.count; i++) gk20a_sim_esc_readl(g, "GRCTX_UCODE_INST_GPCCS", i, &g->gr.ctx_vars.ucode.gpccs.inst.l[i]); for (i = 0; i < g->gr.ctx_vars.ucode.gpccs.data.count; i++) gk20a_sim_esc_readl(g, "GRCTX_UCODE_DATA_GPCCS", i, &g->gr.ctx_vars.ucode.gpccs.data.l[i]); for (i = 0; i < g->gr.ctx_vars.sw_bundle_init.count; i++) { struct av_gk20a *l = g->gr.ctx_vars.sw_bundle_init.l; gk20a_sim_esc_readl(g, "GRCTX_SW_BUNDLE_INIT:ADDR", i, &l[i].addr); gk20a_sim_esc_readl(g, "GRCTX_SW_BUNDLE_INIT:VALUE", i, &l[i].value); } for (i = 0; i < g->gr.ctx_vars.sw_method_init.count; i++) { struct av_gk20a *l = g->gr.ctx_vars.sw_method_init.l; gk20a_sim_esc_readl(g, "GRCTX_SW_METHOD_INIT:ADDR", i, &l[i].addr); gk20a_sim_esc_readl(g, "GRCTX_SW_METHOD_INIT:VALUE", i, &l[i].value); } for (i = 0; i < g->gr.ctx_vars.sw_ctx_load.count; i++) { struct aiv_gk20a *l = g->gr.ctx_vars.sw_ctx_load.l; gk20a_sim_esc_readl(g, "GRCTX_SW_CTX_LOAD:ADDR", i, &l[i].addr); gk20a_sim_esc_readl(g, "GRCTX_SW_CTX_LOAD:INDEX", i, &l[i].index); gk20a_sim_esc_readl(g, "GRCTX_SW_CTX_LOAD:VALUE", i, &l[i].value); } for (i = 0; i < g->gr.ctx_vars.sw_non_ctx_load.count; i++) { struct av_gk20a *l = g->gr.ctx_vars.sw_non_ctx_load.l; gk20a_sim_esc_readl(g, reg_path, i, &l[i].addr); gk20a_sim_esc_readl(g, value_path, i, &l[i].value); } for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.sys.count; i++) { struct aiv_gk20a *l = g->gr.ctx_vars.ctxsw_regs.sys.l; gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_SYS:ADDR", i, &l[i].addr); gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_SYS:INDEX", i, &l[i].index); gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_SYS:VALUE", i, &l[i].value); } for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.gpc.count; i++) { struct aiv_gk20a *l = g->gr.ctx_vars.ctxsw_regs.gpc.l; gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_GPC:ADDR", i, &l[i].addr); gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_GPC:INDEX", i, &l[i].index); gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_GPC:VALUE", i, &l[i].value); } for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.tpc.count; i++) { struct aiv_gk20a *l = g->gr.ctx_vars.ctxsw_regs.tpc.l; gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_TPC:ADDR", i, &l[i].addr); gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_TPC:INDEX", i, &l[i].index); gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_TPC:VALUE", i, &l[i].value); } for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.ppc.count; i++) { struct aiv_gk20a *l = g->gr.ctx_vars.ctxsw_regs.ppc.l; gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PPC:ADDR", i, &l[i].addr); gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PPC:INDEX", i, &l[i].index); gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PPC:VALUE", i, &l[i].value); } for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.zcull_gpc.count; i++) { struct aiv_gk20a *l = g->gr.ctx_vars.ctxsw_regs.zcull_gpc.l; gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_ZCULL_GPC:ADDR", i, &l[i].addr); gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_ZCULL_GPC:INDEX", i, &l[i].index); gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_ZCULL_GPC:VALUE", i, &l[i].value); } for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.pm_sys.count; i++) { struct aiv_gk20a *l = g->gr.ctx_vars.ctxsw_regs.pm_sys.l; gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_SYS:ADDR", i, &l[i].addr); gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_SYS:INDEX", i, &l[i].index); gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_SYS:VALUE", i, &l[i].value); } for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.pm_gpc.count; i++) { struct aiv_gk20a *l = g->gr.ctx_vars.ctxsw_regs.pm_gpc.l; gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_GPC:ADDR", i, &l[i].addr); gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_GPC:INDEX", i, &l[i].index); gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_GPC:VALUE", i, &l[i].value); } for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.pm_tpc.count; i++) { struct aiv_gk20a *l = g->gr.ctx_vars.ctxsw_regs.pm_tpc.l; gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_TPC:ADDR", i, &l[i].addr); gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_TPC:INDEX", i, &l[i].index); gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_TPC:VALUE", i, &l[i].value); } g->gr.ctx_vars.valid = true; gk20a_sim_esc_readl(g, "GRCTX_GEN_CTX_REGS_BASE_INDEX", 0, &g->gr.ctx_vars.regs_base_index); gk20a_dbg(gpu_dbg_info | gpu_dbg_fn, "finished querying grctx info from chiplib"); return 0; fail: gk20a_err(dev_from_gk20a(g), "failed querying grctx info from chiplib"); return err; }
static int vgpu_gr_alloc_obj_ctx(struct channel_gk20a *c, struct nvgpu_alloc_obj_ctx_args *args) { struct gk20a *g = c->g; struct fifo_gk20a *f = &g->fifo; struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; struct tsg_gk20a *tsg = NULL; int err = 0; gk20a_dbg_fn(""); /* an address space needs to have been bound at this point.*/ if (!gk20a_channel_as_bound(c)) { gk20a_err(dev_from_gk20a(g), "not bound to address space at time" " of grctx allocation"); return -EINVAL; } if (!g->ops.gr.is_valid_class(g, args->class_num)) { gk20a_err(dev_from_gk20a(g), "invalid obj class 0x%x", args->class_num); err = -EINVAL; goto out; } c->obj_class = args->class_num; /* FIXME: add TSG support */ if (gk20a_is_channel_marked_as_tsg(c)) tsg = &f->tsg[c->tsgid]; /* allocate gr ctx buffer */ if (!ch_ctx->gr_ctx) { err = vgpu_gr_alloc_channel_gr_ctx(g, c); if (err) { gk20a_err(dev_from_gk20a(g), "fail to allocate gr ctx buffer"); goto out; } } else { /*TBD: needs to be more subtle about which is * being allocated as some are allowed to be * allocated along same channel */ gk20a_err(dev_from_gk20a(g), "too many classes alloc'd on same channel"); err = -EINVAL; goto out; } /* commit gr ctx buffer */ err = vgpu_gr_commit_inst(c, ch_ctx->gr_ctx->mem.gpu_va); if (err) { gk20a_err(dev_from_gk20a(g), "fail to commit gr ctx buffer"); goto out; } /* allocate patch buffer */ if (ch_ctx->patch_ctx.mem.pages == NULL) { err = vgpu_gr_alloc_channel_patch_ctx(g, c); if (err) { gk20a_err(dev_from_gk20a(g), "fail to allocate patch buffer"); goto out; } } /* map global buffer to channel gpu_va and commit */ if (!ch_ctx->global_ctx_buffer_mapped) { err = vgpu_gr_map_global_ctx_buffers(g, c); if (err) { gk20a_err(dev_from_gk20a(g), "fail to map global ctx buffer"); goto out; } gr_gk20a_elpg_protected_call(g, vgpu_gr_commit_global_ctx_buffers(g, c, true)); } /* load golden image */ if (!c->first_init) { err = gr_gk20a_elpg_protected_call(g, vgpu_gr_load_golden_ctx_image(g, c)); if (err) { gk20a_err(dev_from_gk20a(g), "fail to load golden ctx image"); goto out; } c->first_init = true; } c->num_objects++; gk20a_dbg_fn("done"); return 0; out: /* 1. gr_ctx, patch_ctx and global ctx buffer mapping can be reused so no need to release them. 2. golden image load is a one time thing so if they pass, no need to undo. */ gk20a_err(dev_from_gk20a(g), "fail"); return err; }
static int gk20a_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op, u32 min, u32 max) { int err = 0; struct gr_gk20a *gr = &g->gr; u32 fbp, slice, ctrl1, val, hw_op = 0; unsigned long end_jiffies = jiffies + msecs_to_jiffies(gk20a_get_gr_idle_timeout(g)); u32 delay = GR_IDLE_CHECK_DEFAULT; u32 slices_per_fbp = ltc_ltcs_ltss_cbc_param_slices_per_fbp_v( gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r())); gk20a_dbg_fn(""); if (gr->compbit_store.size == 0) return 0; mutex_lock(&g->mm.l2_op_lock); if (op == gk20a_cbc_op_clear) { gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl2_r(), ltc_ltcs_ltss_cbc_ctrl2_clear_lower_bound_f(min)); gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl3_r(), ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_f(max)); hw_op = ltc_ltcs_ltss_cbc_ctrl1_clear_active_f(); } else if (op == gk20a_cbc_op_clean) { hw_op = ltc_ltcs_ltss_cbc_ctrl1_clean_active_f(); } else if (op == gk20a_cbc_op_invalidate) { hw_op = ltc_ltcs_ltss_cbc_ctrl1_invalidate_active_f(); } else { BUG_ON(1); } gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl1_r(), gk20a_readl(g, ltc_ltcs_ltss_cbc_ctrl1_r()) | hw_op); for (fbp = 0; fbp < gr->num_fbps; fbp++) { for (slice = 0; slice < slices_per_fbp; slice++) { delay = GR_IDLE_CHECK_DEFAULT; ctrl1 = ltc_ltc0_lts0_cbc_ctrl1_r() + fbp * proj_ltc_stride_v() + slice * proj_lts_stride_v(); do { val = gk20a_readl(g, ctrl1); if (!(val & hw_op)) break; usleep_range(delay, delay * 2); delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX); } while (time_before(jiffies, end_jiffies) || !tegra_platform_is_silicon()); if (!time_before(jiffies, end_jiffies)) { gk20a_err(dev_from_gk20a(g), "comp tag clear timeout\n"); err = -EBUSY; goto out; } } } out: mutex_unlock(&g->mm.l2_op_lock); return 0; }
static int clk_slide_gpc_pll(struct gk20a *g, u32 n) { u32 data, coeff; u32 nold; int ramp_timeout = 500; /* get old coefficients */ coeff = gk20a_readl(g, trim_sys_gpcpll_coeff_r()); nold = trim_sys_gpcpll_coeff_ndiv_v(coeff); /* do nothing if NDIV is same */ if (n == nold) return 0; /* setup */ data = gk20a_readl(g, trim_sys_gpcpll_cfg2_r()); data = set_field(data, trim_sys_gpcpll_cfg2_pll_stepa_m(), trim_sys_gpcpll_cfg2_pll_stepa_f(0x2b)); gk20a_writel(g, trim_sys_gpcpll_cfg2_r(), data); data = gk20a_readl(g, trim_sys_gpcpll_cfg3_r()); data = set_field(data, trim_sys_gpcpll_cfg3_pll_stepb_m(), trim_sys_gpcpll_cfg3_pll_stepb_f(0xb)); gk20a_writel(g, trim_sys_gpcpll_cfg3_r(), data); /* pll slowdown mode */ data = gk20a_readl(g, trim_sys_gpcpll_ndiv_slowdown_r()); data = set_field(data, trim_sys_gpcpll_ndiv_slowdown_slowdown_using_pll_m(), trim_sys_gpcpll_ndiv_slowdown_slowdown_using_pll_yes_f()); gk20a_writel(g, trim_sys_gpcpll_ndiv_slowdown_r(), data); /* new ndiv ready for ramp */ coeff = gk20a_readl(g, trim_sys_gpcpll_coeff_r()); coeff = set_field(coeff, trim_sys_gpcpll_coeff_ndiv_m(), trim_sys_gpcpll_coeff_ndiv_f(n)); udelay(1); gk20a_writel(g, trim_sys_gpcpll_coeff_r(), coeff); /* dynamic ramp to new ndiv */ data = gk20a_readl(g, trim_sys_gpcpll_ndiv_slowdown_r()); data = set_field(data, trim_sys_gpcpll_ndiv_slowdown_en_dynramp_m(), trim_sys_gpcpll_ndiv_slowdown_en_dynramp_yes_f()); udelay(1); gk20a_writel(g, trim_sys_gpcpll_ndiv_slowdown_r(), data); do { udelay(1); ramp_timeout--; data = gk20a_readl( g, trim_gpc_bcast_gpcpll_ndiv_slowdown_debug_r()); if (trim_gpc_bcast_gpcpll_ndiv_slowdown_debug_pll_dynramp_done_synced_v(data)) break; } while (ramp_timeout > 0); /* exit slowdown mode */ data = gk20a_readl(g, trim_sys_gpcpll_ndiv_slowdown_r()); data = set_field(data, trim_sys_gpcpll_ndiv_slowdown_slowdown_using_pll_m(), trim_sys_gpcpll_ndiv_slowdown_slowdown_using_pll_no_f()); data = set_field(data, trim_sys_gpcpll_ndiv_slowdown_en_dynramp_m(), trim_sys_gpcpll_ndiv_slowdown_en_dynramp_no_f()); gk20a_writel(g, trim_sys_gpcpll_ndiv_slowdown_r(), data); gk20a_readl(g, trim_sys_gpcpll_ndiv_slowdown_r()); if (ramp_timeout <= 0) { gk20a_err(dev_from_gk20a(g), "gpcpll dynamic ramp timeout"); return -ETIMEDOUT; } return 0; }