static int gk20a_ltc_alloc_virt_cbc(struct gk20a *g, size_t compbit_backing_size) { struct device *d = dev_from_gk20a(g); struct gr_gk20a *gr = &g->gr; DEFINE_DMA_ATTRS(attrs); dma_addr_t iova; int err; dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs); gr->compbit_store.pages = dma_alloc_attrs(d, compbit_backing_size, &iova, GFP_KERNEL, &attrs); if (!gr->compbit_store.pages) { gk20a_err(dev_from_gk20a(g), "failed to allocate backing store for compbit : size %zu", compbit_backing_size); return -ENOMEM; } gr->compbit_store.base_iova = iova; gr->compbit_store.size = compbit_backing_size; err = gk20a_get_sgtable_from_pages(d, &gr->compbit_store.sgt, gr->compbit_store.pages, iova, compbit_backing_size); if (err) { gk20a_err(dev_from_gk20a(g), "failed to allocate sgt for backing store"); return err; } return 0; }
static void gm20b_fb_dump_vpr_wpr_info(struct gk20a *g) { u32 val; /* print vpr and wpr info */ val = gk20a_readl(g, fb_mmu_vpr_info_r()); val &= ~0x3; val |= fb_mmu_vpr_info_index_addr_lo_v(); gk20a_writel(g, fb_mmu_vpr_info_r(), val); gk20a_err(dev_from_gk20a(g), "VPR: %08x %08x %08x %08x", gk20a_readl(g, fb_mmu_vpr_info_r()), gk20a_readl(g, fb_mmu_vpr_info_r()), gk20a_readl(g, fb_mmu_vpr_info_r()), gk20a_readl(g, fb_mmu_vpr_info_r())); val = gk20a_readl(g, fb_mmu_wpr_info_r()); val &= ~0xf; val |= (fb_mmu_wpr_info_index_allow_read_v()); gk20a_writel(g, fb_mmu_wpr_info_r(), val); gk20a_err(dev_from_gk20a(g), "WPR: %08x %08x %08x %08x %08x %08x", gk20a_readl(g, fb_mmu_wpr_info_r()), gk20a_readl(g, fb_mmu_wpr_info_r()), gk20a_readl(g, fb_mmu_wpr_info_r()), gk20a_readl(g, fb_mmu_wpr_info_r()), gk20a_readl(g, fb_mmu_wpr_info_r()), gk20a_readl(g, fb_mmu_wpr_info_r())); }
static void vgpu_gr_detect_sm_arch(struct gk20a *g) { struct gk20a_platform *platform = gk20a_get_platform(g->dev); u32 v = 0, raw_version, version = 0; gk20a_dbg_fn(""); if (vgpu_get_attribute(platform->virt_handle, TEGRA_VGPU_ATTRIB_GPC0_TPC0_SM_ARCH, &v)) gk20a_err(dev_from_gk20a(g), "failed to retrieve SM arch"); raw_version = gr_gpc0_tpc0_sm_arch_spa_version_v(v); if (raw_version == gr_gpc0_tpc0_sm_arch_spa_version_smkepler_lp_v()) version = 0x320; /* SM 3.2 */ else gk20a_err(dev_from_gk20a(g), "Unknown SM version 0x%x", raw_version); /* on Kepler, SM version == SPA version */ g->gpu_characteristics.sm_arch_spa_version = version; g->gpu_characteristics.sm_arch_sm_version = version; g->gpu_characteristics.sm_arch_warp_count = gr_gpc0_tpc0_sm_arch_warp_count_v(v); }
static int gk20a_determine_L2_size_bytes(struct gk20a *g) { const u32 gpuid = GK20A_GPUID(g->gpu_characteristics.arch, g->gpu_characteristics.impl); u32 lts_per_ltc; u32 ways; u32 sets; u32 bytes_per_line; u32 active_ltcs; u32 cache_size; u32 tmp; u32 active_sets_value; tmp = gk20a_readl(g, ltc_ltc0_lts0_tstg_cfg1_r()); ways = hweight32(ltc_ltc0_lts0_tstg_cfg1_active_ways_v(tmp)); active_sets_value = ltc_ltc0_lts0_tstg_cfg1_active_sets_v(tmp); if (active_sets_value == ltc_ltc0_lts0_tstg_cfg1_active_sets_all_v()) { sets = 64; } else if (active_sets_value == ltc_ltc0_lts0_tstg_cfg1_active_sets_half_v()) { sets = 32; } else if (active_sets_value == ltc_ltc0_lts0_tstg_cfg1_active_sets_quarter_v()) { sets = 16; } else { dev_err(dev_from_gk20a(g), "Unknown constant %u for active sets", (unsigned)active_sets_value); sets = 0; } active_ltcs = g->gr.num_fbps; /* chip-specific values */ switch (gpuid) { case GK20A_GPUID_GK20A: lts_per_ltc = 1; bytes_per_line = 128; break; default: dev_err(dev_from_gk20a(g), "Unknown GPU id 0x%02x\n", (unsigned)gpuid); lts_per_ltc = 0; bytes_per_line = 0; } cache_size = active_ltcs * lts_per_ltc * ways * sets * bytes_per_line; return cache_size; }
/* Flushes the compression bit cache as well as "data". * Note: the name here is a bit of a misnomer. ELPG uses this * internally... but ELPG doesn't have to be on to do it manually. */ static void gk20a_mm_g_elpg_flush_locked(struct gk20a *g) { u32 data; s32 retry = 100; gk20a_dbg_fn(""); /* Make sure all previous writes are committed to the L2. There's no guarantee that writes are to DRAM. This will be a sysmembar internal to the L2. */ gk20a_writel(g, ltc_ltcs_ltss_g_elpg_r(), ltc_ltcs_ltss_g_elpg_flush_pending_f()); do { data = gk20a_readl(g, ltc_ltc0_ltss_g_elpg_r()); if (ltc_ltc0_ltss_g_elpg_flush_v(data) == ltc_ltc0_ltss_g_elpg_flush_pending_v()) { gk20a_dbg_info("g_elpg_flush 0x%x", data); retry--; usleep_range(20, 40); } else break; } while (retry >= 0 || !tegra_platform_is_silicon()); if (retry < 0) gk20a_warn(dev_from_gk20a(g), "g_elpg_flush too many retries"); }
int vgpu_gr_isr(struct gk20a *g, struct tegra_vgpu_gr_intr_info *info) { struct fifo_gk20a *f = &g->fifo; struct channel_gk20a *ch = &f->channel[info->chid]; gk20a_dbg_fn(""); if (info->type != TEGRA_VGPU_GR_INTR_NOTIFY && info->type != TEGRA_VGPU_GR_INTR_SEMAPHORE) gk20a_err(dev_from_gk20a(g), "gr intr (%d) on ch %u", info->type, info->chid); switch (info->type) { case TEGRA_VGPU_GR_INTR_NOTIFY: wake_up(&ch->notifier_wq); break; case TEGRA_VGPU_GR_INTR_SEMAPHORE: gk20a_channel_event(ch); wake_up(&ch->semaphore_wq); break; case TEGRA_VGPU_GR_INTR_SEMAPHORE_TIMEOUT: gk20a_set_error_notifier(ch, NVGPU_CHANNEL_GR_SEMAPHORE_TIMEOUT); break; case TEGRA_VGPU_GR_INTR_ILLEGAL_NOTIFY: gk20a_set_error_notifier(ch, NVGPU_CHANNEL_GR_ILLEGAL_NOTIFY); case TEGRA_VGPU_GR_INTR_ILLEGAL_METHOD: break; case TEGRA_VGPU_GR_INTR_ILLEGAL_CLASS: gk20a_set_error_notifier(ch, NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY); break; case TEGRA_VGPU_GR_INTR_FECS_ERROR: break; case TEGRA_VGPU_GR_INTR_CLASS_ERROR: gk20a_set_error_notifier(ch, NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY); break; case TEGRA_VGPU_GR_INTR_FIRMWARE_METHOD: gk20a_set_error_notifier(ch, NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY); break; case TEGRA_VGPU_GR_INTR_EXCEPTION: gk20a_set_error_notifier(ch, NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY); break; default: WARN_ON(1); break; } return 0; }
u32 gm20b_ltc_cbc_fix_config(struct gk20a *g, int base) { u32 val = gk20a_readl(g, ltc_ltcs_ltss_cbc_num_active_ltcs_r()); if (val == 2) { return base * 2; } else if (val != 1) { gk20a_err(dev_from_gk20a(g), "Invalid number of active ltcs: %08x\n", val); } return base; }
void gk20a_priv_ring_isr(struct gk20a *g) { u32 status0, status1; u32 cmd; s32 retry = 100; if (tegra_platform_is_linsim()) return; status0 = gk20a_readl(g, pri_ringmaster_intr_status0_r()); status1 = gk20a_readl(g, pri_ringmaster_intr_status1_r()); gk20a_dbg(gpu_dbg_intr, "ringmaster intr status0: 0x%08x," "status1: 0x%08x", status0, status1); if (status0 & (0x1 | 0x2 | 0x4)) { gk20a_reset_priv_ring(g); } if (status0 & 0x100) { gk20a_dbg(gpu_dbg_intr, "SYS write error. ADR %08x WRDAT %08x INFO %08x, CODE %08x", gk20a_readl(g, 0x122120), gk20a_readl(g, 0x122124), gk20a_readl(g, 0x122128), gk20a_readl(g, 0x12212c)); } if (status1 & 0x1) { gk20a_dbg(gpu_dbg_intr, "GPC write error. ADR %08x WRDAT %08x INFO %08x, CODE %08x", gk20a_readl(g, 0x128120), gk20a_readl(g, 0x128124), gk20a_readl(g, 0x128128), gk20a_readl(g, 0x12812c)); } cmd = gk20a_readl(g, pri_ringmaster_command_r()); cmd = set_field(cmd, pri_ringmaster_command_cmd_m(), pri_ringmaster_command_cmd_ack_interrupt_f()); gk20a_writel(g, pri_ringmaster_command_r(), cmd); do { cmd = pri_ringmaster_command_cmd_v( gk20a_readl(g, pri_ringmaster_command_r())); usleep_range(20, 40); } while (cmd != pri_ringmaster_command_cmd_no_cmd_v() && --retry); if (retry <= 0) gk20a_warn(dev_from_gk20a(g), "priv ringmaster cmd ack too many retries"); status0 = gk20a_readl(g, pri_ringmaster_intr_status0_r()); status1 = gk20a_readl(g, pri_ringmaster_intr_status1_r()); gk20a_dbg_info("ringmaster intr status0: 0x%08x," " status1: 0x%08x", status0, status1); }
static u32 vgpu_gr_get_fbp_en_mask(struct gk20a *g) { struct gk20a_platform *platform = gk20a_get_platform(g->dev); u32 fbp_en_mask = 0; gk20a_dbg_fn(""); if (vgpu_get_attribute(platform->virt_handle, TEGRA_VGPU_ATTRIB_FBP_EN_MASK, &fbp_en_mask)) gk20a_err(dev_from_gk20a(g), "failed to retrieve fbp en mask"); return fbp_en_mask; }
static u32 vgpu_gr_get_max_fbps_count(struct gk20a *g) { struct gk20a_platform *platform = gk20a_get_platform(g->dev); u32 max_fbps_count = 0; gk20a_dbg_fn(""); if (vgpu_get_attribute(platform->virt_handle, TEGRA_VGPU_ATTRIB_NUM_FBPS, &max_fbps_count)) gk20a_err(dev_from_gk20a(g), "failed to retrieve num fbps"); return max_fbps_count; }
static int vgpu_determine_L2_size_bytes(struct gk20a *g) { struct gk20a_platform *platform = gk20a_get_platform(g->dev); u32 cache_size = 0; gk20a_dbg_fn(""); if (vgpu_get_attribute(platform->virt_handle, TEGRA_VGPU_ATTRIB_L2_SIZE, &cache_size)) dev_err(dev_from_gk20a(g), "unable to get L2 size"); return cache_size; }
void gm20b_ltc_isr(struct gk20a *g) { u32 mc_intr, ltc_intr; int ltc, slice; mc_intr = gk20a_readl(g, mc_intr_ltc_r()); gk20a_err(dev_from_gk20a(g), "mc_ltc_intr: %08x", mc_intr); for (ltc = 0; ltc < g->ltc_count; ltc++) { if ((mc_intr & 1 << ltc) == 0) continue; for (slice = 0; slice < g->gr.slices_per_ltc; slice++) { ltc_intr = gk20a_readl(g, ltc_ltc0_lts0_intr_r() + proj_ltc_stride_v() * ltc + proj_lts_stride_v() * slice); gk20a_err(dev_from_gk20a(g), "ltc%d, slice %d: %08x", ltc, slice, ltc_intr); gk20a_writel(g, ltc_ltc0_lts0_intr_r() + proj_ltc_stride_v() * ltc + proj_lts_stride_v() * slice, ltc_intr); } } }
static int gk20a_init_clk_setup_sw(struct gk20a *g) { struct clk_gk20a *clk = &g->clk; static int initialized; struct clk *ref; unsigned long ref_rate; gk20a_dbg_fn(""); if (clk->sw_ready) { gk20a_dbg_fn("skip init"); return 0; } if (!gk20a_clk_get(g)) return -EINVAL; ref = clk_get_parent(clk_get_parent(clk->tegra_clk)); if (IS_ERR(ref)) { gk20a_err(dev_from_gk20a(g), "failed to get GPCPLL reference clock"); return -EINVAL; } ref_rate = clk_get_rate(ref); clk->pll_delay = 300; /* usec */ clk->gpc_pll.id = GK20A_GPC_PLL; clk->gpc_pll.clk_in = ref_rate / KHZ; /* Decide initial frequency */ if (!initialized) { initialized = 1; clk->gpc_pll.M = 1; clk->gpc_pll.N = DIV_ROUND_UP(gpc_pll_params.min_vco, clk->gpc_pll.clk_in); clk->gpc_pll.PL = 1; clk->gpc_pll.freq = clk->gpc_pll.clk_in * clk->gpc_pll.N; clk->gpc_pll.freq /= pl_to_div[clk->gpc_pll.PL]; } mutex_init(&clk->clk_mutex); clk->sw_ready = true; gk20a_dbg_fn("done"); return 0; }
struct clk *gk20a_clk_get(struct gk20a *g) { if (!g->clk.tegra_clk) { struct clk *clk; clk = clk_get_sys("tegra_gk20a", "gpu"); if (IS_ERR(clk)) { nvhost_err(dev_from_gk20a(g), "fail to get tegra gpu clk tegra_gk20a/gpu"); return NULL; } g->clk.tegra_clk = clk; } return g->clk.tegra_clk; }
void gm20b_ltc_g_elpg_flush_locked(struct gk20a *g) { u32 data; bool done[g->ltc_count]; s32 retry = 100; int i; int num_done = 0; u32 ltc_d = ltc_ltc1_ltss_g_elpg_r() - ltc_ltc0_ltss_g_elpg_r(); gk20a_dbg_fn(""); trace_gk20a_mm_g_elpg_flush_locked(g->dev->name); for (i = 0; i < g->ltc_count; i++) done[i] = 0; gk20a_writel(g, ltc_ltcs_ltss_g_elpg_r(), ltc_ltcs_ltss_g_elpg_flush_pending_f()); do { for (i = 0; i < g->ltc_count; i++) { if (done[i]) continue; data = gk20a_readl(g, ltc_ltc0_ltss_g_elpg_r() + ltc_d * i); if (ltc_ltc0_ltss_g_elpg_flush_v(data)) { gk20a_dbg_info("g_elpg_flush 0x%x", data); } else { done[i] = 1; num_done++; } } if (num_done < g->ltc_count) { retry--; udelay(5); } else break; } while (retry >= 0 || !tegra_platform_is_silicon()); if (retry < 0 && tegra_platform_is_silicon()) gk20a_warn(dev_from_gk20a(g), "g_elpg_flush too many retries"); trace_gk20a_mm_g_elpg_flush_locked_done(g->dev->name); }
static int vgpu_gr_init_gr_setup_sw(struct gk20a *g) { struct gr_gk20a *gr = &g->gr; int err; gk20a_dbg_fn(""); if (gr->sw_ready) { gk20a_dbg_fn("skip init"); return 0; } gr->g = g; err = vgpu_gr_init_gr_config(g, gr); if (err) goto clean_up; err = vgpu_gr_init_ctx_state(g, gr); if (err) goto clean_up; err = g->ops.ltc.init_comptags(g, gr); if (err) goto clean_up; err = vgpu_gr_alloc_global_ctx_buffers(g); if (err) goto clean_up; mutex_init(&gr->ctx_mutex); gr->remove_support = vgpu_remove_gr_support; gr->sw_ready = true; gk20a_dbg_fn("done"); return 0; clean_up: gk20a_err(dev_from_gk20a(g), "fail"); vgpu_remove_gr_support(gr); return err; }
struct clk *gk20a_clk_get(struct gk20a *g) { if (!g->clk.tegra_clk) { struct clk *clk; char clk_dev_id[32]; struct device *dev = dev_from_gk20a(g); snprintf(clk_dev_id, 32, "tegra_%s", dev_name(dev)); clk = clk_get_sys(clk_dev_id, "gpu"); if (IS_ERR(clk)) { gk20a_err(dev, "fail to get tegra gpu clk %s/gpu\n", clk_dev_id); return NULL; } g->clk.tegra_clk = clk; } return g->clk.tegra_clk; }
static int set_pll_target(struct gk20a *g, u32 freq, u32 old_freq) { struct clk_gk20a *clk = &g->clk; if (freq > gpc_pll_params.max_freq) freq = gpc_pll_params.max_freq; else if (freq < gpc_pll_params.min_freq) freq = gpc_pll_params.min_freq; if (freq != old_freq) { /* gpc_pll.freq is changed to new value here */ if (clk_config_pll(clk, &clk->gpc_pll, &gpc_pll_params, &freq, true)) { gk20a_err(dev_from_gk20a(g), "failed to set pll target for %d", freq); return -EINVAL; } } return 0; }
static int gm20b_determine_L2_size_bytes(struct gk20a *g) { u32 lts_per_ltc; u32 ways; u32 sets; u32 bytes_per_line; u32 active_ltcs; u32 cache_size; u32 tmp; u32 active_sets_value; tmp = gk20a_readl(g, ltc_ltc0_lts0_tstg_cfg1_r()); ways = hweight32(ltc_ltc0_lts0_tstg_cfg1_active_ways_v(tmp)); active_sets_value = ltc_ltc0_lts0_tstg_cfg1_active_sets_v(tmp); if (active_sets_value == ltc_ltc0_lts0_tstg_cfg1_active_sets_all_v()) { sets = 64; } else if (active_sets_value == ltc_ltc0_lts0_tstg_cfg1_active_sets_half_v()) { sets = 32; } else if (active_sets_value == ltc_ltc0_lts0_tstg_cfg1_active_sets_quarter_v()) { sets = 16; } else { dev_err(dev_from_gk20a(g), "Unknown constant %u for active sets", (unsigned)active_sets_value); sets = 0; } active_ltcs = g->gr.num_fbps; /* chip-specific values */ lts_per_ltc = 2; bytes_per_line = 128; cache_size = active_ltcs * lts_per_ltc * ways * sets * bytes_per_line; return cache_size; }
static int vgpu_gr_init_gr_config(struct gk20a *g, struct gr_gk20a *gr) { struct gk20a_platform *platform = gk20a_get_platform(g->dev); u32 gpc_index; gk20a_dbg_fn(""); if (vgpu_get_attribute(platform->virt_handle, TEGRA_VGPU_ATTRIB_GPC_COUNT, &gr->gpc_count)) return -ENOMEM; if (vgpu_get_attribute(platform->virt_handle, TEGRA_VGPU_ATTRIB_MAX_TPC_PER_GPC_COUNT, &gr->max_tpc_per_gpc_count)) return -ENOMEM; if (vgpu_get_attribute(platform->virt_handle, TEGRA_VGPU_ATTRIB_MAX_TPC_COUNT, &gr->max_tpc_count)) return -ENOMEM; gr->gpc_tpc_mask = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL); if (!gr->gpc_tpc_mask) { gk20a_err(dev_from_gk20a(g), "%s: out of memory\n", __func__); return -ENOMEM; } for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { if (g->ops.gr.get_gpc_tpc_mask) gr->gpc_tpc_mask[gpc_index] = g->ops.gr.get_gpc_tpc_mask(g, gpc_index); } g->ops.gr.bundle_cb_defaults(g); g->ops.gr.cb_size_default(g); g->ops.gr.calc_global_ctx_buffer_size(g); return 0; }
void gk20a_priv_ring_isr(struct gk20a *g) { u32 status0, status1; u32 cmd; s32 retry = 100; status0 = gk20a_readl(g, pri_ringmaster_intr_status0_r()); status1 = gk20a_readl(g, pri_ringmaster_intr_status1_r()); gk20a_dbg_info("ringmaster intr status0: 0x%08x," "status1: 0x%08x", status0, status1); if (status0 & (0x1 | 0x2 | 0x4)) { gk20a_reset_priv_ring(g); } cmd = gk20a_readl(g, pri_ringmaster_command_r()); cmd = set_field(cmd, pri_ringmaster_command_cmd_m(), pri_ringmaster_command_cmd_ack_interrupt_f()); gk20a_writel(g, pri_ringmaster_command_r(), cmd); do { cmd = pri_ringmaster_command_cmd_v( gk20a_readl(g, pri_ringmaster_command_r())); usleep_range(20, 40); } while (cmd != pri_ringmaster_command_cmd_no_cmd_v() && --retry); if (retry <= 0) gk20a_warn(dev_from_gk20a(g), "priv ringmaster cmd ack too many retries"); status0 = gk20a_readl(g, pri_ringmaster_intr_status0_r()); status1 = gk20a_readl(g, pri_ringmaster_intr_status1_r()); gk20a_dbg_info("ringmaster intr status0: 0x%08x," " status1: 0x%08x", status0, status1); }
static int set_pll_freq(struct gk20a *g, u32 freq, u32 old_freq) { struct clk_gk20a *clk = &g->clk; int err = 0; gk20a_dbg_fn("curr freq: %dMHz, target freq %dMHz", old_freq, freq); if ((freq == old_freq) && clk->gpc_pll.enabled) return 0; /* change frequency only if power is on */ if (g->clk.clk_hw_on) { err = clk_program_gpc_pll(g, clk, 1); if (err) err = clk_program_gpc_pll(g, clk, 0); } /* Just report error but not restore PLL since dvfs could already change voltage even when it returns error. */ if (err) gk20a_err(dev_from_gk20a(g), "failed to set pll to %d", freq); return err; }
int gm20b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op, u32 min, u32 max) { int err = 0; struct gr_gk20a *gr = &g->gr; u32 ltc, slice, ctrl1, val, hw_op = 0; s32 retry = 200; u32 slices_per_ltc = ltc_ltcs_ltss_cbc_param_slices_per_ltc_v( gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r())); gk20a_dbg_fn(""); trace_gk20a_ltc_cbc_ctrl_start(g->dev->name, op, min, max); if (gr->compbit_store.mem.size == 0) return 0; mutex_lock(&g->mm.l2_op_lock); if (op == gk20a_cbc_op_clear) { gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl2_r(), ltc_ltcs_ltss_cbc_ctrl2_clear_lower_bound_f(min)); gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl3_r(), ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_f(max)); hw_op = ltc_ltcs_ltss_cbc_ctrl1_clear_active_f(); } else if (op == gk20a_cbc_op_clean) { hw_op = ltc_ltcs_ltss_cbc_ctrl1_clean_active_f(); } else if (op == gk20a_cbc_op_invalidate) { hw_op = ltc_ltcs_ltss_cbc_ctrl1_invalidate_active_f(); } else { BUG_ON(1); } gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl1_r(), gk20a_readl(g, ltc_ltcs_ltss_cbc_ctrl1_r()) | hw_op); for (ltc = 0; ltc < g->ltc_count; ltc++) { for (slice = 0; slice < slices_per_ltc; slice++) { ctrl1 = ltc_ltc0_lts0_cbc_ctrl1_r() + ltc * proj_ltc_stride_v() + slice * proj_lts_stride_v(); retry = 200; do { val = gk20a_readl(g, ctrl1); if (!(val & hw_op)) break; retry--; udelay(5); } while (retry >= 0 || !tegra_platform_is_silicon()); if (retry < 0 && tegra_platform_is_silicon()) { gk20a_err(dev_from_gk20a(g), "comp tag clear timeout\n"); err = -EBUSY; goto out; } } } out: trace_gk20a_ltc_cbc_ctrl_done(g->dev->name); mutex_unlock(&g->mm.l2_op_lock); return err; }
long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { struct platform_device *dev = filp->private_data; struct gk20a *g = get_gk20a(dev); struct nvhost_gpu_zcull_get_ctx_size_args *get_ctx_size_args; struct nvhost_gpu_zcull_get_info_args *get_info_args; struct nvhost_gpu_zbc_set_table_args *set_table_args; struct nvhost_gpu_zbc_query_table_args *query_table_args; u8 buf[NVHOST_GPU_IOCTL_MAX_ARG_SIZE]; struct gr_zcull_info *zcull_info; struct zbc_entry *zbc_val; struct zbc_query_params *zbc_tbl; int i, err = 0; nvhost_dbg_fn(""); if ((_IOC_TYPE(cmd) != NVHOST_GPU_IOCTL_MAGIC) || (_IOC_NR(cmd) == 0) || (_IOC_NR(cmd) > NVHOST_GPU_IOCTL_LAST)) return -EFAULT; BUG_ON(_IOC_SIZE(cmd) > NVHOST_GPU_IOCTL_MAX_ARG_SIZE); if (_IOC_DIR(cmd) & _IOC_WRITE) { if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd))) return -EFAULT; } if (!g->gr.sw_ready) { gk20a_busy(g->dev); gk20a_idle(g->dev); } switch (cmd) { case NVHOST_GPU_IOCTL_ZCULL_GET_CTX_SIZE: get_ctx_size_args = (struct nvhost_gpu_zcull_get_ctx_size_args *)buf; get_ctx_size_args->size = gr_gk20a_get_ctxsw_zcull_size(g, &g->gr); break; case NVHOST_GPU_IOCTL_ZCULL_GET_INFO: get_info_args = (struct nvhost_gpu_zcull_get_info_args *)buf; memset(get_info_args, 0, sizeof(struct nvhost_gpu_zcull_get_info_args)); zcull_info = kzalloc(sizeof(struct gr_zcull_info), GFP_KERNEL); if (zcull_info == NULL) return -ENOMEM; err = gr_gk20a_get_zcull_info(g, &g->gr, zcull_info); if (err) break; get_info_args->width_align_pixels = zcull_info->width_align_pixels; get_info_args->height_align_pixels = zcull_info->height_align_pixels; get_info_args->pixel_squares_by_aliquots = zcull_info->pixel_squares_by_aliquots; get_info_args->aliquot_total = zcull_info->aliquot_total; get_info_args->region_byte_multiplier = zcull_info->region_byte_multiplier; get_info_args->region_header_size = zcull_info->region_header_size; get_info_args->subregion_header_size = zcull_info->subregion_header_size; get_info_args->subregion_width_align_pixels = zcull_info->subregion_width_align_pixels; get_info_args->subregion_height_align_pixels = zcull_info->subregion_height_align_pixels; get_info_args->subregion_count = zcull_info->subregion_count; if (zcull_info) kfree(zcull_info); break; case NVHOST_GPU_IOCTL_ZBC_SET_TABLE: set_table_args = (struct nvhost_gpu_zbc_set_table_args *)buf; zbc_val = kzalloc(sizeof(struct zbc_entry), GFP_KERNEL); if (zbc_val == NULL) return -ENOMEM; zbc_val->format = set_table_args->format; zbc_val->type = set_table_args->type; switch (zbc_val->type) { case GK20A_ZBC_TYPE_COLOR: for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) { zbc_val->color_ds[i] = set_table_args->color_ds[i]; zbc_val->color_l2[i] = set_table_args->color_l2[i]; } break; case GK20A_ZBC_TYPE_DEPTH: zbc_val->depth = set_table_args->depth; break; default: err = -EINVAL; } if (!err) { gk20a_busy(dev); err = gk20a_gr_zbc_set_table(g, &g->gr, zbc_val); gk20a_idle(dev); } if (zbc_val) kfree(zbc_val); break; case NVHOST_GPU_IOCTL_ZBC_QUERY_TABLE: query_table_args = (struct nvhost_gpu_zbc_query_table_args *)buf; zbc_tbl = kzalloc(sizeof(struct zbc_query_params), GFP_KERNEL); if (zbc_tbl == NULL) return -ENOMEM; zbc_tbl->type = query_table_args->type; zbc_tbl->index_size = query_table_args->index_size; err = gr_gk20a_query_zbc(g, &g->gr, zbc_tbl); if (!err) { switch (zbc_tbl->type) { case GK20A_ZBC_TYPE_COLOR: for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) { query_table_args->color_ds[i] = zbc_tbl->color_ds[i]; query_table_args->color_l2[i] = zbc_tbl->color_l2[i]; } break; case GK20A_ZBC_TYPE_DEPTH: query_table_args->depth = zbc_tbl->depth; break; case GK20A_ZBC_TYPE_INVALID: query_table_args->index_size = zbc_tbl->index_size; break; default: err = -EINVAL; } if (!err) { query_table_args->format = zbc_tbl->format; query_table_args->ref_cnt = zbc_tbl->ref_cnt; } } if (zbc_tbl) kfree(zbc_tbl); break; case NVHOST_GPU_IOCTL_GET_CHARACTERISTICS: err = gk20a_ctrl_ioctl_gpu_characteristics( g, (struct nvhost_gpu_get_characteristics *)buf); break; default: nvhost_err(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x", cmd); err = -ENOTTY; break; } if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ)) err = copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd)); return err; }
int gr_gk20a_init_ctx_vars_sim(struct gk20a *g, struct gr_gk20a *gr) { int err = 0; u32 i, temp; char *size_path = NULL; char *reg_path = NULL; char *value_path = NULL; gk20a_dbg(gpu_dbg_fn | gpu_dbg_info, "querying grctx info from chiplib"); g->gr.ctx_vars.dynamic = true; g->gr.netlist = GR_NETLIST_DYNAMIC; /* query sizes and counts */ gk20a_sim_esc_readl(g, "GRCTX_UCODE_INST_FECS_COUNT", 0, &g->gr.ctx_vars.ucode.fecs.inst.count); gk20a_sim_esc_readl(g, "GRCTX_UCODE_DATA_FECS_COUNT", 0, &g->gr.ctx_vars.ucode.fecs.data.count); gk20a_sim_esc_readl(g, "GRCTX_UCODE_INST_GPCCS_COUNT", 0, &g->gr.ctx_vars.ucode.gpccs.inst.count); gk20a_sim_esc_readl(g, "GRCTX_UCODE_DATA_GPCCS_COUNT", 0, &g->gr.ctx_vars.ucode.gpccs.data.count); gk20a_sim_esc_readl(g, "GRCTX_ALL_CTX_TOTAL_WORDS", 0, &temp); g->gr.ctx_vars.buffer_size = temp << 2; gk20a_sim_esc_readl(g, "GRCTX_SW_BUNDLE_INIT_SIZE", 0, &g->gr.ctx_vars.sw_bundle_init.count); gk20a_sim_esc_readl(g, "GRCTX_SW_METHOD_INIT_SIZE", 0, &g->gr.ctx_vars.sw_method_init.count); gk20a_sim_esc_readl(g, "GRCTX_SW_CTX_LOAD_SIZE", 0, &g->gr.ctx_vars.sw_ctx_load.count); switch (0) { /*g->gr.ctx_vars.reg_init_override)*/ #if 0 case NV_REG_STR_RM_GR_REG_INIT_OVERRIDE_PROD_DIFF: sizePath = "GRCTX_NONCTXSW_PROD_DIFF_REG_SIZE"; regPath = "GRCTX_NONCTXSW_PROD_DIFF_REG:REG"; valuePath = "GRCTX_NONCTXSW_PROD_DIFF_REG:VALUE"; break; #endif default: size_path = "GRCTX_NONCTXSW_REG_SIZE"; reg_path = "GRCTX_NONCTXSW_REG:REG"; value_path = "GRCTX_NONCTXSW_REG:VALUE"; break; } gk20a_sim_esc_readl(g, size_path, 0, &g->gr.ctx_vars.sw_non_ctx_load.count); gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_SYS_COUNT", 0, &g->gr.ctx_vars.ctxsw_regs.sys.count); gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_GPC_COUNT", 0, &g->gr.ctx_vars.ctxsw_regs.gpc.count); gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_TPC_COUNT", 0, &g->gr.ctx_vars.ctxsw_regs.tpc.count); #if 0 /* looks to be unused, actually chokes the sim */ gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PPC_COUNT", 0, &g->gr.ctx_vars.ctxsw_regs.ppc.count); #endif gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_ZCULL_GPC_COUNT", 0, &g->gr.ctx_vars.ctxsw_regs.zcull_gpc.count); gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_SYS_COUNT", 0, &g->gr.ctx_vars.ctxsw_regs.pm_sys.count); gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_GPC_COUNT", 0, &g->gr.ctx_vars.ctxsw_regs.pm_gpc.count); gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_TPC_COUNT", 0, &g->gr.ctx_vars.ctxsw_regs.pm_tpc.count); err |= !alloc_u32_list_gk20a(&g->gr.ctx_vars.ucode.fecs.inst); err |= !alloc_u32_list_gk20a(&g->gr.ctx_vars.ucode.fecs.data); err |= !alloc_u32_list_gk20a(&g->gr.ctx_vars.ucode.gpccs.inst); err |= !alloc_u32_list_gk20a(&g->gr.ctx_vars.ucode.gpccs.data); err |= !alloc_av_list_gk20a(&g->gr.ctx_vars.sw_bundle_init); err |= !alloc_av_list_gk20a(&g->gr.ctx_vars.sw_method_init); err |= !alloc_aiv_list_gk20a(&g->gr.ctx_vars.sw_ctx_load); err |= !alloc_av_list_gk20a(&g->gr.ctx_vars.sw_non_ctx_load); err |= !alloc_aiv_list_gk20a(&g->gr.ctx_vars.ctxsw_regs.sys); err |= !alloc_aiv_list_gk20a(&g->gr.ctx_vars.ctxsw_regs.gpc); err |= !alloc_aiv_list_gk20a(&g->gr.ctx_vars.ctxsw_regs.tpc); err |= !alloc_aiv_list_gk20a(&g->gr.ctx_vars.ctxsw_regs.zcull_gpc); err |= !alloc_aiv_list_gk20a(&g->gr.ctx_vars.ctxsw_regs.ppc); err |= !alloc_aiv_list_gk20a(&g->gr.ctx_vars.ctxsw_regs.pm_sys); err |= !alloc_aiv_list_gk20a(&g->gr.ctx_vars.ctxsw_regs.pm_gpc); err |= !alloc_aiv_list_gk20a(&g->gr.ctx_vars.ctxsw_regs.pm_tpc); if (err) goto fail; for (i = 0; i < g->gr.ctx_vars.ucode.fecs.inst.count; i++) gk20a_sim_esc_readl(g, "GRCTX_UCODE_INST_FECS", i, &g->gr.ctx_vars.ucode.fecs.inst.l[i]); for (i = 0; i < g->gr.ctx_vars.ucode.fecs.data.count; i++) gk20a_sim_esc_readl(g, "GRCTX_UCODE_DATA_FECS", i, &g->gr.ctx_vars.ucode.fecs.data.l[i]); for (i = 0; i < g->gr.ctx_vars.ucode.gpccs.inst.count; i++) gk20a_sim_esc_readl(g, "GRCTX_UCODE_INST_GPCCS", i, &g->gr.ctx_vars.ucode.gpccs.inst.l[i]); for (i = 0; i < g->gr.ctx_vars.ucode.gpccs.data.count; i++) gk20a_sim_esc_readl(g, "GRCTX_UCODE_DATA_GPCCS", i, &g->gr.ctx_vars.ucode.gpccs.data.l[i]); for (i = 0; i < g->gr.ctx_vars.sw_bundle_init.count; i++) { struct av_gk20a *l = g->gr.ctx_vars.sw_bundle_init.l; gk20a_sim_esc_readl(g, "GRCTX_SW_BUNDLE_INIT:ADDR", i, &l[i].addr); gk20a_sim_esc_readl(g, "GRCTX_SW_BUNDLE_INIT:VALUE", i, &l[i].value); } for (i = 0; i < g->gr.ctx_vars.sw_method_init.count; i++) { struct av_gk20a *l = g->gr.ctx_vars.sw_method_init.l; gk20a_sim_esc_readl(g, "GRCTX_SW_METHOD_INIT:ADDR", i, &l[i].addr); gk20a_sim_esc_readl(g, "GRCTX_SW_METHOD_INIT:VALUE", i, &l[i].value); } for (i = 0; i < g->gr.ctx_vars.sw_ctx_load.count; i++) { struct aiv_gk20a *l = g->gr.ctx_vars.sw_ctx_load.l; gk20a_sim_esc_readl(g, "GRCTX_SW_CTX_LOAD:ADDR", i, &l[i].addr); gk20a_sim_esc_readl(g, "GRCTX_SW_CTX_LOAD:INDEX", i, &l[i].index); gk20a_sim_esc_readl(g, "GRCTX_SW_CTX_LOAD:VALUE", i, &l[i].value); } for (i = 0; i < g->gr.ctx_vars.sw_non_ctx_load.count; i++) { struct av_gk20a *l = g->gr.ctx_vars.sw_non_ctx_load.l; gk20a_sim_esc_readl(g, reg_path, i, &l[i].addr); gk20a_sim_esc_readl(g, value_path, i, &l[i].value); } for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.sys.count; i++) { struct aiv_gk20a *l = g->gr.ctx_vars.ctxsw_regs.sys.l; gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_SYS:ADDR", i, &l[i].addr); gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_SYS:INDEX", i, &l[i].index); gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_SYS:VALUE", i, &l[i].value); } for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.gpc.count; i++) { struct aiv_gk20a *l = g->gr.ctx_vars.ctxsw_regs.gpc.l; gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_GPC:ADDR", i, &l[i].addr); gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_GPC:INDEX", i, &l[i].index); gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_GPC:VALUE", i, &l[i].value); } for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.tpc.count; i++) { struct aiv_gk20a *l = g->gr.ctx_vars.ctxsw_regs.tpc.l; gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_TPC:ADDR", i, &l[i].addr); gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_TPC:INDEX", i, &l[i].index); gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_TPC:VALUE", i, &l[i].value); } for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.ppc.count; i++) { struct aiv_gk20a *l = g->gr.ctx_vars.ctxsw_regs.ppc.l; gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PPC:ADDR", i, &l[i].addr); gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PPC:INDEX", i, &l[i].index); gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PPC:VALUE", i, &l[i].value); } for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.zcull_gpc.count; i++) { struct aiv_gk20a *l = g->gr.ctx_vars.ctxsw_regs.zcull_gpc.l; gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_ZCULL_GPC:ADDR", i, &l[i].addr); gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_ZCULL_GPC:INDEX", i, &l[i].index); gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_ZCULL_GPC:VALUE", i, &l[i].value); } for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.pm_sys.count; i++) { struct aiv_gk20a *l = g->gr.ctx_vars.ctxsw_regs.pm_sys.l; gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_SYS:ADDR", i, &l[i].addr); gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_SYS:INDEX", i, &l[i].index); gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_SYS:VALUE", i, &l[i].value); } for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.pm_gpc.count; i++) { struct aiv_gk20a *l = g->gr.ctx_vars.ctxsw_regs.pm_gpc.l; gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_GPC:ADDR", i, &l[i].addr); gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_GPC:INDEX", i, &l[i].index); gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_GPC:VALUE", i, &l[i].value); } for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.pm_tpc.count; i++) { struct aiv_gk20a *l = g->gr.ctx_vars.ctxsw_regs.pm_tpc.l; gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_TPC:ADDR", i, &l[i].addr); gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_TPC:INDEX", i, &l[i].index); gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_TPC:VALUE", i, &l[i].value); } g->gr.ctx_vars.valid = true; gk20a_sim_esc_readl(g, "GRCTX_GEN_CTX_REGS_BASE_INDEX", 0, &g->gr.ctx_vars.regs_base_index); gk20a_dbg(gpu_dbg_info | gpu_dbg_fn, "finished querying grctx info from chiplib"); return 0; fail: gk20a_err(dev_from_gk20a(g), "failed querying grctx info from chiplib"); return err; }
long gk20a_as_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { int err = 0; struct gk20a_as_share *as_share = filp->private_data; struct gk20a *g = gk20a_from_as(as_share->as); u8 buf[NVHOST_AS_IOCTL_MAX_ARG_SIZE]; if ((_IOC_TYPE(cmd) != NVHOST_AS_IOCTL_MAGIC) || (_IOC_NR(cmd) == 0) || (_IOC_NR(cmd) > NVHOST_AS_IOCTL_LAST)) return -EFAULT; BUG_ON(_IOC_SIZE(cmd) > NVHOST_AS_IOCTL_MAX_ARG_SIZE); if (_IOC_DIR(cmd) & _IOC_WRITE) { if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd))) return -EFAULT; } err = gk20a_busy(g->dev); if (err) return err; switch (cmd) { case NVHOST_AS_IOCTL_BIND_CHANNEL: trace_gk20a_as_ioctl_bind_channel(dev_name(dev_from_gk20a(g))); err = gk20a_as_ioctl_bind_channel(as_share, (struct nvhost_as_bind_channel_args *)buf); break; case NVHOST32_AS_IOCTL_ALLOC_SPACE: { struct nvhost32_as_alloc_space_args *args32 = (struct nvhost32_as_alloc_space_args *)buf; struct nvhost_as_alloc_space_args args; args.pages = args32->pages; args.page_size = args32->page_size; args.flags = args32->flags; args.o_a.offset = args32->o_a.offset; trace_gk20a_as_ioctl_alloc_space(dev_name(dev_from_gk20a(g))); err = gk20a_as_ioctl_alloc_space(as_share, &args); args32->o_a.offset = args.o_a.offset; break; } case NVHOST_AS_IOCTL_ALLOC_SPACE: trace_gk20a_as_ioctl_alloc_space(dev_name(dev_from_gk20a(g))); err = gk20a_as_ioctl_alloc_space(as_share, (struct nvhost_as_alloc_space_args *)buf); break; case NVHOST_AS_IOCTL_FREE_SPACE: trace_gk20a_as_ioctl_free_space(dev_name(dev_from_gk20a(g))); err = gk20a_as_ioctl_free_space(as_share, (struct nvhost_as_free_space_args *)buf); break; case NVHOST_AS_IOCTL_MAP_BUFFER: trace_gk20a_as_ioctl_map_buffer(dev_name(dev_from_gk20a(g))); err = gk20a_as_ioctl_map_buffer(as_share, (struct nvhost_as_map_buffer_args *)buf); break; case NVHOST_AS_IOCTL_MAP_BUFFER_EX: trace_gk20a_as_ioctl_map_buffer(dev_name(dev_from_gk20a(g))); err = gk20a_as_ioctl_map_buffer_ex(as_share, (struct nvhost_as_map_buffer_ex_args *)buf); break; case NVHOST_AS_IOCTL_UNMAP_BUFFER: trace_gk20a_as_ioctl_unmap_buffer(dev_name(dev_from_gk20a(g))); err = gk20a_as_ioctl_unmap_buffer(as_share, (struct nvhost_as_unmap_buffer_args *)buf); break; default: dev_err(dev_from_gk20a(g), "unrecognized as ioctl: 0x%x", cmd); err = -ENOTTY; break; } gk20a_idle(g->dev); if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ)) err = copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd)); return err; }
static int gr_gm20b_load_ctxsw_ucode(struct gk20a *g) { u32 err, flags; u32 reg_offset = gr_gpcs_gpccs_falcon_hwcfg_r() - gr_fecs_falcon_hwcfg_r(); gk20a_dbg_fn(""); if (tegra_platform_is_linsim()) { gk20a_writel(g, gr_fecs_ctxsw_mailbox_r(7), gr_fecs_ctxsw_mailbox_value_f(0xc0de7777)); gk20a_writel(g, gr_gpccs_ctxsw_mailbox_r(7), gr_gpccs_ctxsw_mailbox_value_f(0xc0de7777)); } flags = PMU_ACR_CMD_BOOTSTRAP_FALCON_FLAGS_RESET_YES; g->ops.pmu.lsfloadedfalconid = 0; if (g->ops.pmu.fecsbootstrapdone) { /* this must be recovery so bootstrap fecs and gpccs */ if (!g->ops.securegpccs) { gr_gm20b_load_gpccs_with_bootloader(g); err = g->ops.pmu.load_lsfalcon_ucode(g, (1 << LSF_FALCON_ID_FECS)); } else { /* bind WPR VA inst block */ gr_gk20a_load_falcon_bind_instblk(g); err = g->ops.pmu.load_lsfalcon_ucode(g, (1 << LSF_FALCON_ID_FECS) | (1 << LSF_FALCON_ID_GPCCS)); } if (err) { gk20a_err(dev_from_gk20a(g), "Unable to recover GR falcon"); return err; } } else { /* cold boot or rg exit */ g->ops.pmu.fecsbootstrapdone = true; if (!g->ops.securegpccs) { gr_gm20b_load_gpccs_with_bootloader(g); } else { /* bind WPR VA inst block */ gr_gk20a_load_falcon_bind_instblk(g); err = g->ops.pmu.load_lsfalcon_ucode(g, (1 << LSF_FALCON_ID_GPCCS)); if (err) { gk20a_err(dev_from_gk20a(g), "Unable to boot GPCCS\n"); return err; } } } /*start gpccs */ if (g->ops.securegpccs) { gk20a_writel(g, reg_offset + gr_fecs_cpuctl_alias_r(), gr_gpccs_cpuctl_startcpu_f(1)); } else { gk20a_writel(g, gr_gpccs_dmactl_r(), gr_gpccs_dmactl_require_ctx_f(0)); gk20a_writel(g, gr_gpccs_cpuctl_r(), gr_gpccs_cpuctl_startcpu_f(1)); } /* start fecs */ gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0), ~0x0); gk20a_writel(g, gr_fecs_ctxsw_mailbox_r(1), 0x1); gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(6), 0xffffffff); gk20a_writel(g, gr_fecs_cpuctl_alias_r(), gr_fecs_cpuctl_startcpu_f(1)); gk20a_dbg_fn("done"); return 0; }
static int gk20a_init_clk_setup_sw(struct gk20a *g) { struct clk_gk20a *clk = &g->clk; static int initialized; unsigned long *freqs; int err, num_freqs; struct clk *ref; unsigned long ref_rate; nvhost_dbg_fn(""); if (clk->sw_ready) { nvhost_dbg_fn("skip init"); return 0; } if (!gk20a_clk_get(g)) return -EINVAL; ref = clk_get_parent(clk_get_parent(clk->tegra_clk)); if (IS_ERR(ref)) { nvhost_err(dev_from_gk20a(g), "failed to get GPCPLL reference clock"); return -EINVAL; } ref_rate = clk_get_rate(ref); clk->pll_delay = 300; /* usec */ clk->gpc_pll.id = GK20A_GPC_PLL; clk->gpc_pll.clk_in = ref_rate / 1000000; /* MHz */ /* Decide initial frequency */ if (!initialized) { initialized = 1; clk->gpc_pll.M = 1; clk->gpc_pll.N = DIV_ROUND_UP(gpc_pll_params.min_vco, clk->gpc_pll.clk_in); clk->gpc_pll.PL = 1; clk->gpc_pll.freq = clk->gpc_pll.clk_in * clk->gpc_pll.N; clk->gpc_pll.freq /= pl_to_div[clk->gpc_pll.PL]; } err = tegra_dvfs_get_freqs(clk_get_parent(clk->tegra_clk), &freqs, &num_freqs); if (!err) { int i, j; /* init j for inverse traversal of frequencies */ j = num_freqs - 1; gpu_cooling_freq = kzalloc( (1 + num_freqs) * sizeof(*gpu_cooling_freq), GFP_KERNEL); /* store frequencies in inverse order */ for (i = 0; i < num_freqs; ++i, --j) { gpu_cooling_freq[i].index = i; gpu_cooling_freq[i].frequency = freqs[j]; } /* add 'end of table' marker */ gpu_cooling_freq[i].index = i; gpu_cooling_freq[i].frequency = GPUFREQ_TABLE_END; /* store number of frequencies */ num_gpu_cooling_freq = num_freqs + 1; } mutex_init(&clk->clk_mutex); clk->sw_ready = true; nvhost_dbg_fn("done"); return 0; }
static int clk_slide_gpc_pll(struct gk20a *g, u32 n) { u32 data, coeff; u32 nold; int ramp_timeout = 500; /* get old coefficients */ coeff = gk20a_readl(g, trim_sys_gpcpll_coeff_r()); nold = trim_sys_gpcpll_coeff_ndiv_v(coeff); /* do nothing if NDIV is same */ if (n == nold) return 0; /* setup */ data = gk20a_readl(g, trim_sys_gpcpll_cfg2_r()); data = set_field(data, trim_sys_gpcpll_cfg2_pll_stepa_m(), trim_sys_gpcpll_cfg2_pll_stepa_f(0x2b)); gk20a_writel(g, trim_sys_gpcpll_cfg2_r(), data); data = gk20a_readl(g, trim_sys_gpcpll_cfg3_r()); data = set_field(data, trim_sys_gpcpll_cfg3_pll_stepb_m(), trim_sys_gpcpll_cfg3_pll_stepb_f(0xb)); gk20a_writel(g, trim_sys_gpcpll_cfg3_r(), data); /* pll slowdown mode */ data = gk20a_readl(g, trim_sys_gpcpll_ndiv_slowdown_r()); data = set_field(data, trim_sys_gpcpll_ndiv_slowdown_slowdown_using_pll_m(), trim_sys_gpcpll_ndiv_slowdown_slowdown_using_pll_yes_f()); gk20a_writel(g, trim_sys_gpcpll_ndiv_slowdown_r(), data); /* new ndiv ready for ramp */ coeff = gk20a_readl(g, trim_sys_gpcpll_coeff_r()); coeff = set_field(coeff, trim_sys_gpcpll_coeff_ndiv_m(), trim_sys_gpcpll_coeff_ndiv_f(n)); udelay(1); gk20a_writel(g, trim_sys_gpcpll_coeff_r(), coeff); /* dynamic ramp to new ndiv */ data = gk20a_readl(g, trim_sys_gpcpll_ndiv_slowdown_r()); data = set_field(data, trim_sys_gpcpll_ndiv_slowdown_en_dynramp_m(), trim_sys_gpcpll_ndiv_slowdown_en_dynramp_yes_f()); udelay(1); gk20a_writel(g, trim_sys_gpcpll_ndiv_slowdown_r(), data); do { udelay(1); ramp_timeout--; data = gk20a_readl( g, trim_gpc_bcast_gpcpll_ndiv_slowdown_debug_r()); if (trim_gpc_bcast_gpcpll_ndiv_slowdown_debug_pll_dynramp_done_synced_v(data)) break; } while (ramp_timeout > 0); /* exit slowdown mode */ data = gk20a_readl(g, trim_sys_gpcpll_ndiv_slowdown_r()); data = set_field(data, trim_sys_gpcpll_ndiv_slowdown_slowdown_using_pll_m(), trim_sys_gpcpll_ndiv_slowdown_slowdown_using_pll_no_f()); data = set_field(data, trim_sys_gpcpll_ndiv_slowdown_en_dynramp_m(), trim_sys_gpcpll_ndiv_slowdown_en_dynramp_no_f()); gk20a_writel(g, trim_sys_gpcpll_ndiv_slowdown_r(), data); gk20a_readl(g, trim_sys_gpcpll_ndiv_slowdown_r()); if (ramp_timeout <= 0) { gk20a_err(dev_from_gk20a(g), "gpcpll dynamic ramp timeout"); return -ETIMEDOUT; } return 0; }
static int vgpu_gr_alloc_obj_ctx(struct channel_gk20a *c, struct nvgpu_alloc_obj_ctx_args *args) { struct gk20a *g = c->g; struct fifo_gk20a *f = &g->fifo; struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; struct tsg_gk20a *tsg = NULL; int err = 0; gk20a_dbg_fn(""); /* an address space needs to have been bound at this point.*/ if (!gk20a_channel_as_bound(c)) { gk20a_err(dev_from_gk20a(g), "not bound to address space at time" " of grctx allocation"); return -EINVAL; } if (!g->ops.gr.is_valid_class(g, args->class_num)) { gk20a_err(dev_from_gk20a(g), "invalid obj class 0x%x", args->class_num); err = -EINVAL; goto out; } c->obj_class = args->class_num; /* FIXME: add TSG support */ if (gk20a_is_channel_marked_as_tsg(c)) tsg = &f->tsg[c->tsgid]; /* allocate gr ctx buffer */ if (!ch_ctx->gr_ctx) { err = vgpu_gr_alloc_channel_gr_ctx(g, c); if (err) { gk20a_err(dev_from_gk20a(g), "fail to allocate gr ctx buffer"); goto out; } } else { /*TBD: needs to be more subtle about which is * being allocated as some are allowed to be * allocated along same channel */ gk20a_err(dev_from_gk20a(g), "too many classes alloc'd on same channel"); err = -EINVAL; goto out; } /* commit gr ctx buffer */ err = vgpu_gr_commit_inst(c, ch_ctx->gr_ctx->mem.gpu_va); if (err) { gk20a_err(dev_from_gk20a(g), "fail to commit gr ctx buffer"); goto out; } /* allocate patch buffer */ if (ch_ctx->patch_ctx.mem.pages == NULL) { err = vgpu_gr_alloc_channel_patch_ctx(g, c); if (err) { gk20a_err(dev_from_gk20a(g), "fail to allocate patch buffer"); goto out; } } /* map global buffer to channel gpu_va and commit */ if (!ch_ctx->global_ctx_buffer_mapped) { err = vgpu_gr_map_global_ctx_buffers(g, c); if (err) { gk20a_err(dev_from_gk20a(g), "fail to map global ctx buffer"); goto out; } gr_gk20a_elpg_protected_call(g, vgpu_gr_commit_global_ctx_buffers(g, c, true)); } /* load golden image */ if (!c->first_init) { err = gr_gk20a_elpg_protected_call(g, vgpu_gr_load_golden_ctx_image(g, c)); if (err) { gk20a_err(dev_from_gk20a(g), "fail to load golden ctx image"); goto out; } c->first_init = true; } c->num_objects++; gk20a_dbg_fn("done"); return 0; out: /* 1. gr_ctx, patch_ctx and global ctx buffer mapping can be reused so no need to release them. 2. golden image load is a one time thing so if they pass, no need to undo. */ gk20a_err(dev_from_gk20a(g), "fail"); return err; }