Beispiel #1
0
static int gk20a_ltc_alloc_virt_cbc(struct gk20a *g,
				    size_t compbit_backing_size)
{
	struct device *d = dev_from_gk20a(g);
	struct gr_gk20a *gr = &g->gr;
	DEFINE_DMA_ATTRS(attrs);
	dma_addr_t iova;
	int err;

	dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs);

	gr->compbit_store.pages =
		dma_alloc_attrs(d, compbit_backing_size, &iova,
				GFP_KERNEL, &attrs);
	if (!gr->compbit_store.pages) {
		gk20a_err(dev_from_gk20a(g), "failed to allocate backing store for compbit : size %zu",
				  compbit_backing_size);
		return -ENOMEM;
	}

	gr->compbit_store.base_iova = iova;
	gr->compbit_store.size = compbit_backing_size;
	err = gk20a_get_sgtable_from_pages(d,
				   &gr->compbit_store.sgt,
				   gr->compbit_store.pages, iova,
				   compbit_backing_size);
	if (err) {
		gk20a_err(dev_from_gk20a(g), "failed to allocate sgt for backing store");
		return err;
	}

	return 0;
}
Beispiel #2
0
static void gm20b_fb_dump_vpr_wpr_info(struct gk20a *g)
{
	u32 val;

	/* print vpr and wpr info */
	val = gk20a_readl(g, fb_mmu_vpr_info_r());
	val &= ~0x3;
	val |= fb_mmu_vpr_info_index_addr_lo_v();
	gk20a_writel(g, fb_mmu_vpr_info_r(), val);
	gk20a_err(dev_from_gk20a(g), "VPR: %08x %08x %08x %08x",
		gk20a_readl(g, fb_mmu_vpr_info_r()),
		gk20a_readl(g, fb_mmu_vpr_info_r()),
		gk20a_readl(g, fb_mmu_vpr_info_r()),
		gk20a_readl(g, fb_mmu_vpr_info_r()));

	val = gk20a_readl(g, fb_mmu_wpr_info_r());
	val &= ~0xf;
	val |= (fb_mmu_wpr_info_index_allow_read_v());
	gk20a_writel(g, fb_mmu_wpr_info_r(), val);
	gk20a_err(dev_from_gk20a(g), "WPR: %08x %08x %08x %08x %08x %08x",
		gk20a_readl(g, fb_mmu_wpr_info_r()),
		gk20a_readl(g, fb_mmu_wpr_info_r()),
		gk20a_readl(g, fb_mmu_wpr_info_r()),
		gk20a_readl(g, fb_mmu_wpr_info_r()),
		gk20a_readl(g, fb_mmu_wpr_info_r()),
		gk20a_readl(g, fb_mmu_wpr_info_r()));

}
static void vgpu_gr_detect_sm_arch(struct gk20a *g)
{
	struct gk20a_platform *platform = gk20a_get_platform(g->dev);
	u32 v = 0, raw_version, version = 0;

	gk20a_dbg_fn("");

	if (vgpu_get_attribute(platform->virt_handle,
			TEGRA_VGPU_ATTRIB_GPC0_TPC0_SM_ARCH, &v))
		gk20a_err(dev_from_gk20a(g), "failed to retrieve SM arch");

	raw_version = gr_gpc0_tpc0_sm_arch_spa_version_v(v);
	if (raw_version == gr_gpc0_tpc0_sm_arch_spa_version_smkepler_lp_v())
		version = 0x320; /* SM 3.2 */
	else
		gk20a_err(dev_from_gk20a(g), "Unknown SM version 0x%x",
			  raw_version);

	/* on Kepler, SM version == SPA version */
	g->gpu_characteristics.sm_arch_spa_version = version;
	g->gpu_characteristics.sm_arch_sm_version = version;

	g->gpu_characteristics.sm_arch_warp_count =
		gr_gpc0_tpc0_sm_arch_warp_count_v(v);
}
Beispiel #4
0
u32 gm20b_ltc_cbc_fix_config(struct gk20a *g, int base)
{
	u32 val = gk20a_readl(g, ltc_ltcs_ltss_cbc_num_active_ltcs_r());
	if (val == 2) {
		return base * 2;
	} else if (val != 1) {
		gk20a_err(dev_from_gk20a(g),
			"Invalid number of active ltcs: %08x\n", val);
	}

	return base;
}
int vgpu_gr_isr(struct gk20a *g, struct tegra_vgpu_gr_intr_info *info)
{
	struct fifo_gk20a *f = &g->fifo;
	struct channel_gk20a *ch = &f->channel[info->chid];

	gk20a_dbg_fn("");
	if (info->type != TEGRA_VGPU_GR_INTR_NOTIFY &&
		info->type != TEGRA_VGPU_GR_INTR_SEMAPHORE)
		gk20a_err(dev_from_gk20a(g), "gr intr (%d) on ch %u",
			info->type, info->chid);

	switch (info->type) {
	case TEGRA_VGPU_GR_INTR_NOTIFY:
		wake_up(&ch->notifier_wq);
		break;
	case TEGRA_VGPU_GR_INTR_SEMAPHORE:
		gk20a_channel_event(ch);
		wake_up(&ch->semaphore_wq);
		break;
	case TEGRA_VGPU_GR_INTR_SEMAPHORE_TIMEOUT:
		gk20a_set_error_notifier(ch,
				NVGPU_CHANNEL_GR_SEMAPHORE_TIMEOUT);
		break;
	case TEGRA_VGPU_GR_INTR_ILLEGAL_NOTIFY:
		gk20a_set_error_notifier(ch,
					NVGPU_CHANNEL_GR_ILLEGAL_NOTIFY);
	case TEGRA_VGPU_GR_INTR_ILLEGAL_METHOD:
		break;
	case TEGRA_VGPU_GR_INTR_ILLEGAL_CLASS:
		gk20a_set_error_notifier(ch,
					NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY);
		break;
	case TEGRA_VGPU_GR_INTR_FECS_ERROR:
		break;
	case TEGRA_VGPU_GR_INTR_CLASS_ERROR:
		gk20a_set_error_notifier(ch,
					NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY);
		break;
	case TEGRA_VGPU_GR_INTR_FIRMWARE_METHOD:
		gk20a_set_error_notifier(ch,
				NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY);
		break;
	case TEGRA_VGPU_GR_INTR_EXCEPTION:
		gk20a_set_error_notifier(ch,
				NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY);
		break;
	default:
		WARN_ON(1);
		break;
	}

	return 0;
}
static u32 vgpu_gr_get_fbp_en_mask(struct gk20a *g)
{
	struct gk20a_platform *platform = gk20a_get_platform(g->dev);
	u32 fbp_en_mask = 0;

	gk20a_dbg_fn("");

	if (vgpu_get_attribute(platform->virt_handle,
			TEGRA_VGPU_ATTRIB_FBP_EN_MASK, &fbp_en_mask))
		gk20a_err(dev_from_gk20a(g), "failed to retrieve fbp en mask");

	return fbp_en_mask;
}
static u32 vgpu_gr_get_max_fbps_count(struct gk20a *g)
{
	struct gk20a_platform *platform = gk20a_get_platform(g->dev);
	u32 max_fbps_count = 0;

	gk20a_dbg_fn("");

	if (vgpu_get_attribute(platform->virt_handle,
			TEGRA_VGPU_ATTRIB_NUM_FBPS, &max_fbps_count))
		gk20a_err(dev_from_gk20a(g), "failed to retrieve num fbps");

	return max_fbps_count;
}
Beispiel #8
0
int gpu_init_hal(struct gk20a *g)
{
	u32 ver = g->gpu_characteristics.arch + g->gpu_characteristics.impl;
	switch (ver) {
	case GK20A_GPUID_GK20A:
		gk20a_dbg_info("gk20a detected");
		gk20a_init_hal(g);
		break;
	default:
		gk20a_err(&g->dev->dev, "no support for %x", ver);
		return -ENODEV;
	}

	return 0;
}
Beispiel #9
0
void gm20b_ltc_isr(struct gk20a *g)
{
	u32 mc_intr, ltc_intr;
	int ltc, slice;

	mc_intr = gk20a_readl(g, mc_intr_ltc_r());
	gk20a_err(dev_from_gk20a(g), "mc_ltc_intr: %08x",
		  mc_intr);
	for (ltc = 0; ltc < g->ltc_count; ltc++) {
		if ((mc_intr & 1 << ltc) == 0)
			continue;
		for (slice = 0; slice < g->gr.slices_per_ltc; slice++) {
			ltc_intr = gk20a_readl(g, ltc_ltc0_lts0_intr_r() +
					   proj_ltc_stride_v() * ltc +
					   proj_lts_stride_v() * slice);
			gk20a_err(dev_from_gk20a(g), "ltc%d, slice %d: %08x",
				  ltc, slice, ltc_intr);
			gk20a_writel(g, ltc_ltc0_lts0_intr_r() +
					   proj_ltc_stride_v() * ltc +
					   proj_lts_stride_v() * slice,
				     ltc_intr);
		}
	}
}
Beispiel #10
0
static int gk20a_init_clk_setup_sw(struct gk20a *g)
{
	struct clk_gk20a *clk = &g->clk;
	static int initialized;
	struct clk *ref;
	unsigned long ref_rate;

	gk20a_dbg_fn("");

	if (clk->sw_ready) {
		gk20a_dbg_fn("skip init");
		return 0;
	}

	if (!gk20a_clk_get(g))
		return -EINVAL;

	ref = clk_get_parent(clk_get_parent(clk->tegra_clk));
	if (IS_ERR(ref)) {
		gk20a_err(dev_from_gk20a(g),
			"failed to get GPCPLL reference clock");
		return -EINVAL;
	}
	ref_rate = clk_get_rate(ref);

	clk->pll_delay = 300; /* usec */

	clk->gpc_pll.id = GK20A_GPC_PLL;
	clk->gpc_pll.clk_in = ref_rate / KHZ;

	/* Decide initial frequency */
	if (!initialized) {
		initialized = 1;
		clk->gpc_pll.M = 1;
		clk->gpc_pll.N = DIV_ROUND_UP(gpc_pll_params.min_vco,
					clk->gpc_pll.clk_in);
		clk->gpc_pll.PL = 1;
		clk->gpc_pll.freq = clk->gpc_pll.clk_in * clk->gpc_pll.N;
		clk->gpc_pll.freq /= pl_to_div[clk->gpc_pll.PL];
	}

	mutex_init(&clk->clk_mutex);

	clk->sw_ready = true;

	gk20a_dbg_fn("done");
	return 0;
}
static int vgpu_gr_init_gr_setup_sw(struct gk20a *g)
{
	struct gr_gk20a *gr = &g->gr;
	int err;

	gk20a_dbg_fn("");

	if (gr->sw_ready) {
		gk20a_dbg_fn("skip init");
		return 0;
	}

	gr->g = g;

	err = vgpu_gr_init_gr_config(g, gr);
	if (err)
		goto clean_up;

	err = vgpu_gr_init_ctx_state(g, gr);
	if (err)
		goto clean_up;

	err = g->ops.ltc.init_comptags(g, gr);
	if (err)
		goto clean_up;

	err = vgpu_gr_alloc_global_ctx_buffers(g);
	if (err)
		goto clean_up;

	mutex_init(&gr->ctx_mutex);

	gr->remove_support = vgpu_remove_gr_support;
	gr->sw_ready = true;

	gk20a_dbg_fn("done");
	return 0;

clean_up:
	gk20a_err(dev_from_gk20a(g), "fail");
	vgpu_remove_gr_support(gr);
	return err;
}
Beispiel #12
0
struct clk *gk20a_clk_get(struct gk20a *g)
{
	if (!g->clk.tegra_clk) {
		struct clk *clk;
		char clk_dev_id[32];
		struct device *dev = dev_from_gk20a(g);

		snprintf(clk_dev_id, 32, "tegra_%s", dev_name(dev));

		clk = clk_get_sys(clk_dev_id, "gpu");
		if (IS_ERR(clk)) {
			gk20a_err(dev, "fail to get tegra gpu clk %s/gpu\n",
				  clk_dev_id);
			return NULL;
		}
		g->clk.tegra_clk = clk;
	}

	return g->clk.tegra_clk;
}
Beispiel #13
0
static int set_pll_target(struct gk20a *g, u32 freq, u32 old_freq)
{
	struct clk_gk20a *clk = &g->clk;

	if (freq > gpc_pll_params.max_freq)
		freq = gpc_pll_params.max_freq;
	else if (freq < gpc_pll_params.min_freq)
		freq = gpc_pll_params.min_freq;

	if (freq != old_freq) {
		/* gpc_pll.freq is changed to new value here */
		if (clk_config_pll(clk, &clk->gpc_pll, &gpc_pll_params,
				   &freq, true)) {
			gk20a_err(dev_from_gk20a(g),
				   "failed to set pll target for %d", freq);
			return -EINVAL;
		}
	}
	return 0;
}
static int vgpu_gr_init_gr_config(struct gk20a *g, struct gr_gk20a *gr)
{
	struct gk20a_platform *platform = gk20a_get_platform(g->dev);
	u32 gpc_index;

	gk20a_dbg_fn("");

	if (vgpu_get_attribute(platform->virt_handle,
			TEGRA_VGPU_ATTRIB_GPC_COUNT, &gr->gpc_count))
		return -ENOMEM;

	if (vgpu_get_attribute(platform->virt_handle,
			TEGRA_VGPU_ATTRIB_MAX_TPC_PER_GPC_COUNT,
			&gr->max_tpc_per_gpc_count))
		return -ENOMEM;

	if (vgpu_get_attribute(platform->virt_handle,
			TEGRA_VGPU_ATTRIB_MAX_TPC_COUNT,
			&gr->max_tpc_count))
		return -ENOMEM;

	gr->gpc_tpc_mask = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL);
	if (!gr->gpc_tpc_mask) {
		gk20a_err(dev_from_gk20a(g), "%s: out of memory\n", __func__);
		return -ENOMEM;
	}

	for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
		if (g->ops.gr.get_gpc_tpc_mask)
			gr->gpc_tpc_mask[gpc_index] =
				g->ops.gr.get_gpc_tpc_mask(g, gpc_index);
	}

	g->ops.gr.bundle_cb_defaults(g);
	g->ops.gr.cb_size_default(g);
	g->ops.gr.calc_global_ctx_buffer_size(g);
	return 0;
}
Beispiel #15
0
static int set_pll_freq(struct gk20a *g, u32 freq, u32 old_freq)
{
	struct clk_gk20a *clk = &g->clk;
	int err = 0;

	gk20a_dbg_fn("curr freq: %dMHz, target freq %dMHz", old_freq, freq);

	if ((freq == old_freq) && clk->gpc_pll.enabled)
		return 0;

	/* change frequency only if power is on */
	if (g->clk.clk_hw_on) {
		err = clk_program_gpc_pll(g, clk, 1);
		if (err)
			err = clk_program_gpc_pll(g, clk, 0);
	}

	/* Just report error but not restore PLL since dvfs could already change
	    voltage even when it returns error. */
	if (err)
		gk20a_err(dev_from_gk20a(g),
			"failed to set pll to %d", freq);
	return err;
}
Beispiel #16
0
int gm20b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op,
		       u32 min, u32 max)
{
	int err = 0;
	struct gr_gk20a *gr = &g->gr;
	u32 ltc, slice, ctrl1, val, hw_op = 0;
	s32 retry = 200;
	u32 slices_per_ltc = ltc_ltcs_ltss_cbc_param_slices_per_ltc_v(
				gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r()));

	gk20a_dbg_fn("");

	trace_gk20a_ltc_cbc_ctrl_start(g->dev->name, op, min, max);

	if (gr->compbit_store.mem.size == 0)
		return 0;

	mutex_lock(&g->mm.l2_op_lock);

	if (op == gk20a_cbc_op_clear) {
		gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl2_r(),
			ltc_ltcs_ltss_cbc_ctrl2_clear_lower_bound_f(min));
		gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl3_r(),
			ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_f(max));
		hw_op = ltc_ltcs_ltss_cbc_ctrl1_clear_active_f();
	} else if (op == gk20a_cbc_op_clean) {
		hw_op = ltc_ltcs_ltss_cbc_ctrl1_clean_active_f();
	} else if (op == gk20a_cbc_op_invalidate) {
		hw_op = ltc_ltcs_ltss_cbc_ctrl1_invalidate_active_f();
	} else {
		BUG_ON(1);
	}
	gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl1_r(),
		     gk20a_readl(g, ltc_ltcs_ltss_cbc_ctrl1_r()) | hw_op);

	for (ltc = 0; ltc < g->ltc_count; ltc++) {
		for (slice = 0; slice < slices_per_ltc; slice++) {

			ctrl1 = ltc_ltc0_lts0_cbc_ctrl1_r() +
				ltc * proj_ltc_stride_v() +
				slice * proj_lts_stride_v();

			retry = 200;
			do {
				val = gk20a_readl(g, ctrl1);
				if (!(val & hw_op))
					break;
				retry--;
				udelay(5);

			} while (retry >= 0 ||
					!tegra_platform_is_silicon());

			if (retry < 0 && tegra_platform_is_silicon()) {
				gk20a_err(dev_from_gk20a(g),
					   "comp tag clear timeout\n");
				err = -EBUSY;
				goto out;
			}
		}
	}
out:
	trace_gk20a_ltc_cbc_ctrl_done(g->dev->name);
	mutex_unlock(&g->mm.l2_op_lock);
	return err;
}
static int gr_gm20b_load_ctxsw_ucode(struct gk20a *g)
{
	u32 err, flags;
	u32 reg_offset = gr_gpcs_gpccs_falcon_hwcfg_r() -
	  gr_fecs_falcon_hwcfg_r();

	gk20a_dbg_fn("");

	if (tegra_platform_is_linsim()) {
		gk20a_writel(g, gr_fecs_ctxsw_mailbox_r(7),
			gr_fecs_ctxsw_mailbox_value_f(0xc0de7777));
		gk20a_writel(g, gr_gpccs_ctxsw_mailbox_r(7),
			gr_gpccs_ctxsw_mailbox_value_f(0xc0de7777));
	}

	flags = PMU_ACR_CMD_BOOTSTRAP_FALCON_FLAGS_RESET_YES;
	g->ops.pmu.lsfloadedfalconid = 0;
	if (g->ops.pmu.fecsbootstrapdone) {
		/* this must be recovery so bootstrap fecs and gpccs */
		if (!g->ops.securegpccs) {
			gr_gm20b_load_gpccs_with_bootloader(g);
			err = g->ops.pmu.load_lsfalcon_ucode(g,
					(1 << LSF_FALCON_ID_FECS));
		} else {
			/* bind WPR VA inst block */
			gr_gk20a_load_falcon_bind_instblk(g);
			err = g->ops.pmu.load_lsfalcon_ucode(g,
				(1 << LSF_FALCON_ID_FECS) |
				(1 << LSF_FALCON_ID_GPCCS));
		}
		if (err) {
			gk20a_err(dev_from_gk20a(g),
				"Unable to recover GR falcon");
			return err;
		}

	} else {
		/* cold boot or rg exit */
		g->ops.pmu.fecsbootstrapdone = true;
		if (!g->ops.securegpccs) {
			gr_gm20b_load_gpccs_with_bootloader(g);
		} else {
			/* bind WPR VA inst block */
			gr_gk20a_load_falcon_bind_instblk(g);
			err = g->ops.pmu.load_lsfalcon_ucode(g,
					(1 << LSF_FALCON_ID_GPCCS));
			if (err) {
				gk20a_err(dev_from_gk20a(g),
						"Unable to boot GPCCS\n");
				return err;
			}
		}
	}

	/*start gpccs */
	if (g->ops.securegpccs) {
		gk20a_writel(g, reg_offset +
			gr_fecs_cpuctl_alias_r(),
			gr_gpccs_cpuctl_startcpu_f(1));
	} else {
		gk20a_writel(g, gr_gpccs_dmactl_r(),
			gr_gpccs_dmactl_require_ctx_f(0));
		gk20a_writel(g, gr_gpccs_cpuctl_r(),
			gr_gpccs_cpuctl_startcpu_f(1));
	}
	/* start fecs */
	gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0), ~0x0);
	gk20a_writel(g, gr_fecs_ctxsw_mailbox_r(1), 0x1);
	gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(6), 0xffffffff);
	gk20a_writel(g, gr_fecs_cpuctl_alias_r(),
			gr_fecs_cpuctl_startcpu_f(1));
	gk20a_dbg_fn("done");

	return 0;
}
int gr_gk20a_init_ctx_vars_sim(struct gk20a *g, struct gr_gk20a *gr)
{
	int err = 0;
	u32 i, temp;
	char *size_path  = NULL;
	char *reg_path   = NULL;
	char *value_path = NULL;

	gk20a_dbg(gpu_dbg_fn | gpu_dbg_info,
		   "querying grctx info from chiplib");

	g->gr.ctx_vars.dynamic = true;
	g->gr.netlist = GR_NETLIST_DYNAMIC;

	/* query sizes and counts */
	gk20a_sim_esc_readl(g, "GRCTX_UCODE_INST_FECS_COUNT", 0,
			    &g->gr.ctx_vars.ucode.fecs.inst.count);
	gk20a_sim_esc_readl(g, "GRCTX_UCODE_DATA_FECS_COUNT", 0,
			    &g->gr.ctx_vars.ucode.fecs.data.count);
	gk20a_sim_esc_readl(g, "GRCTX_UCODE_INST_GPCCS_COUNT", 0,
			    &g->gr.ctx_vars.ucode.gpccs.inst.count);
	gk20a_sim_esc_readl(g, "GRCTX_UCODE_DATA_GPCCS_COUNT", 0,
			    &g->gr.ctx_vars.ucode.gpccs.data.count);
	gk20a_sim_esc_readl(g, "GRCTX_ALL_CTX_TOTAL_WORDS", 0, &temp);
	g->gr.ctx_vars.buffer_size = temp << 2;
	gk20a_sim_esc_readl(g, "GRCTX_SW_BUNDLE_INIT_SIZE", 0,
			    &g->gr.ctx_vars.sw_bundle_init.count);
	gk20a_sim_esc_readl(g, "GRCTX_SW_METHOD_INIT_SIZE", 0,
			    &g->gr.ctx_vars.sw_method_init.count);
	gk20a_sim_esc_readl(g, "GRCTX_SW_CTX_LOAD_SIZE", 0,
			    &g->gr.ctx_vars.sw_ctx_load.count);

	switch (0) { /*g->gr.ctx_vars.reg_init_override)*/
#if 0
	case NV_REG_STR_RM_GR_REG_INIT_OVERRIDE_PROD_DIFF:
		sizePath   = "GRCTX_NONCTXSW_PROD_DIFF_REG_SIZE";
		regPath    = "GRCTX_NONCTXSW_PROD_DIFF_REG:REG";
		valuePath  = "GRCTX_NONCTXSW_PROD_DIFF_REG:VALUE";
		break;
#endif
	default:
		size_path   = "GRCTX_NONCTXSW_REG_SIZE";
		reg_path    = "GRCTX_NONCTXSW_REG:REG";
		value_path  = "GRCTX_NONCTXSW_REG:VALUE";
		break;
	}

	gk20a_sim_esc_readl(g, size_path, 0,
			    &g->gr.ctx_vars.sw_non_ctx_load.count);

	gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_SYS_COUNT", 0,
			    &g->gr.ctx_vars.ctxsw_regs.sys.count);
	gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_GPC_COUNT", 0,
			    &g->gr.ctx_vars.ctxsw_regs.gpc.count);
	gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_TPC_COUNT", 0,
			    &g->gr.ctx_vars.ctxsw_regs.tpc.count);
#if 0
	/* looks to be unused, actually chokes the sim */
	gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PPC_COUNT", 0,
			    &g->gr.ctx_vars.ctxsw_regs.ppc.count);
#endif
	gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_ZCULL_GPC_COUNT", 0,
			    &g->gr.ctx_vars.ctxsw_regs.zcull_gpc.count);
	gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_SYS_COUNT", 0,
			    &g->gr.ctx_vars.ctxsw_regs.pm_sys.count);
	gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_GPC_COUNT", 0,
			    &g->gr.ctx_vars.ctxsw_regs.pm_gpc.count);
	gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_TPC_COUNT", 0,
			    &g->gr.ctx_vars.ctxsw_regs.pm_tpc.count);

	err |= !alloc_u32_list_gk20a(&g->gr.ctx_vars.ucode.fecs.inst);
	err |= !alloc_u32_list_gk20a(&g->gr.ctx_vars.ucode.fecs.data);
	err |= !alloc_u32_list_gk20a(&g->gr.ctx_vars.ucode.gpccs.inst);
	err |= !alloc_u32_list_gk20a(&g->gr.ctx_vars.ucode.gpccs.data);
	err |= !alloc_av_list_gk20a(&g->gr.ctx_vars.sw_bundle_init);
	err |= !alloc_av_list_gk20a(&g->gr.ctx_vars.sw_method_init);
	err |= !alloc_aiv_list_gk20a(&g->gr.ctx_vars.sw_ctx_load);
	err |= !alloc_av_list_gk20a(&g->gr.ctx_vars.sw_non_ctx_load);
	err |= !alloc_aiv_list_gk20a(&g->gr.ctx_vars.ctxsw_regs.sys);
	err |= !alloc_aiv_list_gk20a(&g->gr.ctx_vars.ctxsw_regs.gpc);
	err |= !alloc_aiv_list_gk20a(&g->gr.ctx_vars.ctxsw_regs.tpc);
	err |= !alloc_aiv_list_gk20a(&g->gr.ctx_vars.ctxsw_regs.zcull_gpc);
	err |= !alloc_aiv_list_gk20a(&g->gr.ctx_vars.ctxsw_regs.ppc);
	err |= !alloc_aiv_list_gk20a(&g->gr.ctx_vars.ctxsw_regs.pm_sys);
	err |= !alloc_aiv_list_gk20a(&g->gr.ctx_vars.ctxsw_regs.pm_gpc);
	err |= !alloc_aiv_list_gk20a(&g->gr.ctx_vars.ctxsw_regs.pm_tpc);

	if (err)
		goto fail;

	for (i = 0; i < g->gr.ctx_vars.ucode.fecs.inst.count; i++)
		gk20a_sim_esc_readl(g, "GRCTX_UCODE_INST_FECS",
				    i, &g->gr.ctx_vars.ucode.fecs.inst.l[i]);

	for (i = 0; i < g->gr.ctx_vars.ucode.fecs.data.count; i++)
		gk20a_sim_esc_readl(g, "GRCTX_UCODE_DATA_FECS",
				    i, &g->gr.ctx_vars.ucode.fecs.data.l[i]);

	for (i = 0; i < g->gr.ctx_vars.ucode.gpccs.inst.count; i++)
		gk20a_sim_esc_readl(g, "GRCTX_UCODE_INST_GPCCS",
				    i, &g->gr.ctx_vars.ucode.gpccs.inst.l[i]);

	for (i = 0; i < g->gr.ctx_vars.ucode.gpccs.data.count; i++)
		gk20a_sim_esc_readl(g, "GRCTX_UCODE_DATA_GPCCS",
				    i, &g->gr.ctx_vars.ucode.gpccs.data.l[i]);

	for (i = 0; i < g->gr.ctx_vars.sw_bundle_init.count; i++) {
		struct av_gk20a *l = g->gr.ctx_vars.sw_bundle_init.l;
		gk20a_sim_esc_readl(g, "GRCTX_SW_BUNDLE_INIT:ADDR",
				    i, &l[i].addr);
		gk20a_sim_esc_readl(g, "GRCTX_SW_BUNDLE_INIT:VALUE",
				    i, &l[i].value);
	}

	for (i = 0; i < g->gr.ctx_vars.sw_method_init.count; i++) {
		struct av_gk20a *l = g->gr.ctx_vars.sw_method_init.l;
		gk20a_sim_esc_readl(g, "GRCTX_SW_METHOD_INIT:ADDR",
				    i, &l[i].addr);
		gk20a_sim_esc_readl(g, "GRCTX_SW_METHOD_INIT:VALUE",
				    i, &l[i].value);
	}

	for (i = 0; i < g->gr.ctx_vars.sw_ctx_load.count; i++) {
		struct aiv_gk20a *l = g->gr.ctx_vars.sw_ctx_load.l;
		gk20a_sim_esc_readl(g, "GRCTX_SW_CTX_LOAD:ADDR",
				    i, &l[i].addr);
		gk20a_sim_esc_readl(g, "GRCTX_SW_CTX_LOAD:INDEX",
				    i, &l[i].index);
		gk20a_sim_esc_readl(g, "GRCTX_SW_CTX_LOAD:VALUE",
				    i, &l[i].value);
	}

	for (i = 0; i < g->gr.ctx_vars.sw_non_ctx_load.count; i++) {
		struct av_gk20a *l = g->gr.ctx_vars.sw_non_ctx_load.l;
		gk20a_sim_esc_readl(g, reg_path, i, &l[i].addr);
		gk20a_sim_esc_readl(g, value_path, i, &l[i].value);
	}

	for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.sys.count; i++) {
		struct aiv_gk20a *l = g->gr.ctx_vars.ctxsw_regs.sys.l;
		gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_SYS:ADDR",
				    i, &l[i].addr);
		gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_SYS:INDEX",
				    i, &l[i].index);
		gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_SYS:VALUE",
				    i, &l[i].value);
	}

	for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.gpc.count; i++) {
		struct aiv_gk20a *l = g->gr.ctx_vars.ctxsw_regs.gpc.l;
		gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_GPC:ADDR",
				    i, &l[i].addr);
		gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_GPC:INDEX",
				    i, &l[i].index);
		gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_GPC:VALUE",
				    i, &l[i].value);
	}

	for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.tpc.count; i++) {
		struct aiv_gk20a *l = g->gr.ctx_vars.ctxsw_regs.tpc.l;
		gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_TPC:ADDR",
				    i, &l[i].addr);
		gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_TPC:INDEX",
				    i, &l[i].index);
		gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_TPC:VALUE",
				    i, &l[i].value);
	}

	for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.ppc.count; i++) {
		struct aiv_gk20a *l = g->gr.ctx_vars.ctxsw_regs.ppc.l;
		gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PPC:ADDR",
				    i, &l[i].addr);
		gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PPC:INDEX",
				    i, &l[i].index);
		gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PPC:VALUE",
				    i, &l[i].value);
	}

	for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.zcull_gpc.count; i++) {
		struct aiv_gk20a *l = g->gr.ctx_vars.ctxsw_regs.zcull_gpc.l;
		gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_ZCULL_GPC:ADDR",
				    i, &l[i].addr);
		gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_ZCULL_GPC:INDEX",
				    i, &l[i].index);
		gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_ZCULL_GPC:VALUE",
				    i, &l[i].value);
	}

	for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.pm_sys.count; i++) {
		struct aiv_gk20a *l = g->gr.ctx_vars.ctxsw_regs.pm_sys.l;
		gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_SYS:ADDR",
				    i, &l[i].addr);
		gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_SYS:INDEX",
				    i, &l[i].index);
		gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_SYS:VALUE",
				    i, &l[i].value);
	}

	for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.pm_gpc.count; i++) {
		struct aiv_gk20a *l = g->gr.ctx_vars.ctxsw_regs.pm_gpc.l;
		gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_GPC:ADDR",
				    i, &l[i].addr);
		gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_GPC:INDEX",
				    i, &l[i].index);
		gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_GPC:VALUE",
				    i, &l[i].value);
	}

	for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.pm_tpc.count; i++) {
		struct aiv_gk20a *l = g->gr.ctx_vars.ctxsw_regs.pm_tpc.l;
		gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_TPC:ADDR",
				    i, &l[i].addr);
		gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_TPC:INDEX",
				    i, &l[i].index);
		gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_TPC:VALUE",
				    i, &l[i].value);
	}

	g->gr.ctx_vars.valid = true;

	gk20a_sim_esc_readl(g, "GRCTX_GEN_CTX_REGS_BASE_INDEX", 0,
			    &g->gr.ctx_vars.regs_base_index);

	gk20a_dbg(gpu_dbg_info | gpu_dbg_fn, "finished querying grctx info from chiplib");
	return 0;
fail:
	gk20a_err(dev_from_gk20a(g),
		   "failed querying grctx info from chiplib");
	return err;

}
static int vgpu_gr_alloc_obj_ctx(struct channel_gk20a  *c,
				struct nvgpu_alloc_obj_ctx_args *args)
{
	struct gk20a *g = c->g;
	struct fifo_gk20a *f = &g->fifo;
	struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
	struct tsg_gk20a *tsg = NULL;
	int err = 0;

	gk20a_dbg_fn("");

	/* an address space needs to have been bound at this point.*/
	if (!gk20a_channel_as_bound(c)) {
		gk20a_err(dev_from_gk20a(g),
			   "not bound to address space at time"
			   " of grctx allocation");
		return -EINVAL;
	}

	if (!g->ops.gr.is_valid_class(g, args->class_num)) {
		gk20a_err(dev_from_gk20a(g),
			   "invalid obj class 0x%x", args->class_num);
		err = -EINVAL;
		goto out;
	}
	c->obj_class = args->class_num;

	/* FIXME: add TSG support */
	if (gk20a_is_channel_marked_as_tsg(c))
		tsg = &f->tsg[c->tsgid];

	/* allocate gr ctx buffer */
	if (!ch_ctx->gr_ctx) {
		err = vgpu_gr_alloc_channel_gr_ctx(g, c);
		if (err) {
			gk20a_err(dev_from_gk20a(g),
				"fail to allocate gr ctx buffer");
			goto out;
		}
	} else {
		/*TBD: needs to be more subtle about which is
		 * being allocated as some are allowed to be
		 * allocated along same channel */
		gk20a_err(dev_from_gk20a(g),
			"too many classes alloc'd on same channel");
		err = -EINVAL;
		goto out;
	}

	/* commit gr ctx buffer */
	err = vgpu_gr_commit_inst(c, ch_ctx->gr_ctx->mem.gpu_va);
	if (err) {
		gk20a_err(dev_from_gk20a(g),
			"fail to commit gr ctx buffer");
		goto out;
	}

	/* allocate patch buffer */
	if (ch_ctx->patch_ctx.mem.pages == NULL) {
		err = vgpu_gr_alloc_channel_patch_ctx(g, c);
		if (err) {
			gk20a_err(dev_from_gk20a(g),
				"fail to allocate patch buffer");
			goto out;
		}
	}

	/* map global buffer to channel gpu_va and commit */
	if (!ch_ctx->global_ctx_buffer_mapped) {
		err = vgpu_gr_map_global_ctx_buffers(g, c);
		if (err) {
			gk20a_err(dev_from_gk20a(g),
				"fail to map global ctx buffer");
			goto out;
		}
		gr_gk20a_elpg_protected_call(g,
				vgpu_gr_commit_global_ctx_buffers(g, c, true));
	}

	/* load golden image */
	if (!c->first_init) {
		err = gr_gk20a_elpg_protected_call(g,
				vgpu_gr_load_golden_ctx_image(g, c));
		if (err) {
			gk20a_err(dev_from_gk20a(g),
				"fail to load golden ctx image");
			goto out;
		}
		c->first_init = true;
	}

	c->num_objects++;

	gk20a_dbg_fn("done");
	return 0;
out:
	/* 1. gr_ctx, patch_ctx and global ctx buffer mapping
	   can be reused so no need to release them.
	   2. golden image load is a one time thing so if
	   they pass, no need to undo. */
	gk20a_err(dev_from_gk20a(g), "fail");
	return err;
}
Beispiel #20
0
static int gk20a_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op,
			      u32 min, u32 max)
{
	int err = 0;
	struct gr_gk20a *gr = &g->gr;
	u32 fbp, slice, ctrl1, val, hw_op = 0;
	unsigned long end_jiffies = jiffies +
		msecs_to_jiffies(gk20a_get_gr_idle_timeout(g));
	u32 delay = GR_IDLE_CHECK_DEFAULT;
	u32 slices_per_fbp =
		ltc_ltcs_ltss_cbc_param_slices_per_fbp_v(
			gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r()));

	gk20a_dbg_fn("");

	if (gr->compbit_store.size == 0)
		return 0;

	mutex_lock(&g->mm.l2_op_lock);

	if (op == gk20a_cbc_op_clear) {
		gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl2_r(),
			     ltc_ltcs_ltss_cbc_ctrl2_clear_lower_bound_f(min));
		gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl3_r(),
			     ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_f(max));
		hw_op = ltc_ltcs_ltss_cbc_ctrl1_clear_active_f();
	} else if (op == gk20a_cbc_op_clean) {
		hw_op = ltc_ltcs_ltss_cbc_ctrl1_clean_active_f();
	} else if (op == gk20a_cbc_op_invalidate) {
		hw_op = ltc_ltcs_ltss_cbc_ctrl1_invalidate_active_f();
	} else {
		BUG_ON(1);
	}

	gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl1_r(),
		     gk20a_readl(g, ltc_ltcs_ltss_cbc_ctrl1_r()) | hw_op);

	for (fbp = 0; fbp < gr->num_fbps; fbp++) {
		for (slice = 0; slice < slices_per_fbp; slice++) {

			delay = GR_IDLE_CHECK_DEFAULT;

			ctrl1 = ltc_ltc0_lts0_cbc_ctrl1_r() +
				fbp * proj_ltc_stride_v() +
				slice * proj_lts_stride_v();

			do {
				val = gk20a_readl(g, ctrl1);
				if (!(val & hw_op))
					break;

				usleep_range(delay, delay * 2);
				delay = min_t(u32, delay << 1,
					GR_IDLE_CHECK_MAX);

			} while (time_before(jiffies, end_jiffies) ||
					!tegra_platform_is_silicon());

			if (!time_before(jiffies, end_jiffies)) {
				gk20a_err(dev_from_gk20a(g),
					   "comp tag clear timeout\n");
				err = -EBUSY;
				goto out;
			}
		}
	}
out:
	mutex_unlock(&g->mm.l2_op_lock);
	return 0;
}
Beispiel #21
0
static int clk_slide_gpc_pll(struct gk20a *g, u32 n)
{
	u32 data, coeff;
	u32 nold;
	int ramp_timeout = 500;

	/* get old coefficients */
	coeff = gk20a_readl(g, trim_sys_gpcpll_coeff_r());
	nold = trim_sys_gpcpll_coeff_ndiv_v(coeff);

	/* do nothing if NDIV is same */
	if (n == nold)
		return 0;

	/* setup */
	data = gk20a_readl(g, trim_sys_gpcpll_cfg2_r());
	data = set_field(data, trim_sys_gpcpll_cfg2_pll_stepa_m(),
			trim_sys_gpcpll_cfg2_pll_stepa_f(0x2b));
	gk20a_writel(g, trim_sys_gpcpll_cfg2_r(), data);
	data = gk20a_readl(g, trim_sys_gpcpll_cfg3_r());
	data = set_field(data, trim_sys_gpcpll_cfg3_pll_stepb_m(),
			trim_sys_gpcpll_cfg3_pll_stepb_f(0xb));
	gk20a_writel(g, trim_sys_gpcpll_cfg3_r(), data);

	/* pll slowdown mode */
	data = gk20a_readl(g, trim_sys_gpcpll_ndiv_slowdown_r());
	data = set_field(data,
			trim_sys_gpcpll_ndiv_slowdown_slowdown_using_pll_m(),
			trim_sys_gpcpll_ndiv_slowdown_slowdown_using_pll_yes_f());
	gk20a_writel(g, trim_sys_gpcpll_ndiv_slowdown_r(), data);

	/* new ndiv ready for ramp */
	coeff = gk20a_readl(g, trim_sys_gpcpll_coeff_r());
	coeff = set_field(coeff, trim_sys_gpcpll_coeff_ndiv_m(),
			trim_sys_gpcpll_coeff_ndiv_f(n));
	udelay(1);
	gk20a_writel(g, trim_sys_gpcpll_coeff_r(), coeff);

	/* dynamic ramp to new ndiv */
	data = gk20a_readl(g, trim_sys_gpcpll_ndiv_slowdown_r());
	data = set_field(data,
			trim_sys_gpcpll_ndiv_slowdown_en_dynramp_m(),
			trim_sys_gpcpll_ndiv_slowdown_en_dynramp_yes_f());
	udelay(1);
	gk20a_writel(g, trim_sys_gpcpll_ndiv_slowdown_r(), data);

	do {
		udelay(1);
		ramp_timeout--;
		data = gk20a_readl(
			g, trim_gpc_bcast_gpcpll_ndiv_slowdown_debug_r());
		if (trim_gpc_bcast_gpcpll_ndiv_slowdown_debug_pll_dynramp_done_synced_v(data))
			break;
	} while (ramp_timeout > 0);

	/* exit slowdown mode */
	data = gk20a_readl(g, trim_sys_gpcpll_ndiv_slowdown_r());
	data = set_field(data,
			trim_sys_gpcpll_ndiv_slowdown_slowdown_using_pll_m(),
			trim_sys_gpcpll_ndiv_slowdown_slowdown_using_pll_no_f());
	data = set_field(data,
			trim_sys_gpcpll_ndiv_slowdown_en_dynramp_m(),
			trim_sys_gpcpll_ndiv_slowdown_en_dynramp_no_f());
	gk20a_writel(g, trim_sys_gpcpll_ndiv_slowdown_r(), data);
	gk20a_readl(g, trim_sys_gpcpll_ndiv_slowdown_r());

	if (ramp_timeout <= 0) {
		gk20a_err(dev_from_gk20a(g), "gpcpll dynamic ramp timeout");
		return -ETIMEDOUT;
	}
	return 0;
}