示例#1
0
static int gk20a_ltc_alloc_virt_cbc(struct gk20a *g,
				    size_t compbit_backing_size)
{
	struct device *d = dev_from_gk20a(g);
	struct gr_gk20a *gr = &g->gr;
	DEFINE_DMA_ATTRS(attrs);
	dma_addr_t iova;
	int err;

	dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs);

	gr->compbit_store.pages =
		dma_alloc_attrs(d, compbit_backing_size, &iova,
				GFP_KERNEL, &attrs);
	if (!gr->compbit_store.pages) {
		gk20a_err(dev_from_gk20a(g), "failed to allocate backing store for compbit : size %zu",
				  compbit_backing_size);
		return -ENOMEM;
	}

	gr->compbit_store.base_iova = iova;
	gr->compbit_store.size = compbit_backing_size;
	err = gk20a_get_sgtable_from_pages(d,
				   &gr->compbit_store.sgt,
				   gr->compbit_store.pages, iova,
				   compbit_backing_size);
	if (err) {
		gk20a_err(dev_from_gk20a(g), "failed to allocate sgt for backing store");
		return err;
	}

	return 0;
}
示例#2
0
static void gm20b_fb_dump_vpr_wpr_info(struct gk20a *g)
{
	u32 val;

	/* print vpr and wpr info */
	val = gk20a_readl(g, fb_mmu_vpr_info_r());
	val &= ~0x3;
	val |= fb_mmu_vpr_info_index_addr_lo_v();
	gk20a_writel(g, fb_mmu_vpr_info_r(), val);
	gk20a_err(dev_from_gk20a(g), "VPR: %08x %08x %08x %08x",
		gk20a_readl(g, fb_mmu_vpr_info_r()),
		gk20a_readl(g, fb_mmu_vpr_info_r()),
		gk20a_readl(g, fb_mmu_vpr_info_r()),
		gk20a_readl(g, fb_mmu_vpr_info_r()));

	val = gk20a_readl(g, fb_mmu_wpr_info_r());
	val &= ~0xf;
	val |= (fb_mmu_wpr_info_index_allow_read_v());
	gk20a_writel(g, fb_mmu_wpr_info_r(), val);
	gk20a_err(dev_from_gk20a(g), "WPR: %08x %08x %08x %08x %08x %08x",
		gk20a_readl(g, fb_mmu_wpr_info_r()),
		gk20a_readl(g, fb_mmu_wpr_info_r()),
		gk20a_readl(g, fb_mmu_wpr_info_r()),
		gk20a_readl(g, fb_mmu_wpr_info_r()),
		gk20a_readl(g, fb_mmu_wpr_info_r()),
		gk20a_readl(g, fb_mmu_wpr_info_r()));

}
static void vgpu_gr_detect_sm_arch(struct gk20a *g)
{
	struct gk20a_platform *platform = gk20a_get_platform(g->dev);
	u32 v = 0, raw_version, version = 0;

	gk20a_dbg_fn("");

	if (vgpu_get_attribute(platform->virt_handle,
			TEGRA_VGPU_ATTRIB_GPC0_TPC0_SM_ARCH, &v))
		gk20a_err(dev_from_gk20a(g), "failed to retrieve SM arch");

	raw_version = gr_gpc0_tpc0_sm_arch_spa_version_v(v);
	if (raw_version == gr_gpc0_tpc0_sm_arch_spa_version_smkepler_lp_v())
		version = 0x320; /* SM 3.2 */
	else
		gk20a_err(dev_from_gk20a(g), "Unknown SM version 0x%x",
			  raw_version);

	/* on Kepler, SM version == SPA version */
	g->gpu_characteristics.sm_arch_spa_version = version;
	g->gpu_characteristics.sm_arch_sm_version = version;

	g->gpu_characteristics.sm_arch_warp_count =
		gr_gpc0_tpc0_sm_arch_warp_count_v(v);
}
示例#4
0
static int gk20a_determine_L2_size_bytes(struct gk20a *g)
{
	const u32 gpuid = GK20A_GPUID(g->gpu_characteristics.arch,
				      g->gpu_characteristics.impl);
	u32 lts_per_ltc;
	u32 ways;
	u32 sets;
	u32 bytes_per_line;
	u32 active_ltcs;
	u32 cache_size;

	u32 tmp;
	u32 active_sets_value;

	tmp = gk20a_readl(g, ltc_ltc0_lts0_tstg_cfg1_r());
	ways = hweight32(ltc_ltc0_lts0_tstg_cfg1_active_ways_v(tmp));

	active_sets_value = ltc_ltc0_lts0_tstg_cfg1_active_sets_v(tmp);
	if (active_sets_value == ltc_ltc0_lts0_tstg_cfg1_active_sets_all_v()) {
		sets = 64;
	} else if (active_sets_value ==
		 ltc_ltc0_lts0_tstg_cfg1_active_sets_half_v()) {
		sets = 32;
	} else if (active_sets_value ==
		 ltc_ltc0_lts0_tstg_cfg1_active_sets_quarter_v()) {
		sets = 16;
	} else {
		dev_err(dev_from_gk20a(g),
			"Unknown constant %u for active sets",
		       (unsigned)active_sets_value);
		sets = 0;
	}

	active_ltcs = g->gr.num_fbps;

	/* chip-specific values */
	switch (gpuid) {
	case GK20A_GPUID_GK20A:
		lts_per_ltc = 1;
		bytes_per_line = 128;
		break;

	default:
		dev_err(dev_from_gk20a(g), "Unknown GPU id 0x%02x\n",
			(unsigned)gpuid);
		lts_per_ltc = 0;
		bytes_per_line = 0;
	}

	cache_size = active_ltcs * lts_per_ltc * ways * sets * bytes_per_line;

	return cache_size;
}
示例#5
0
/* Flushes the compression bit cache as well as "data".
 * Note: the name here is a bit of a misnomer.  ELPG uses this
 * internally... but ELPG doesn't have to be on to do it manually.
 */
static void gk20a_mm_g_elpg_flush_locked(struct gk20a *g)
{
	u32 data;
	s32 retry = 100;

	gk20a_dbg_fn("");

	/* Make sure all previous writes are committed to the L2. There's no
	   guarantee that writes are to DRAM. This will be a sysmembar internal
	   to the L2. */
	gk20a_writel(g, ltc_ltcs_ltss_g_elpg_r(),
		     ltc_ltcs_ltss_g_elpg_flush_pending_f());
	do {
		data = gk20a_readl(g, ltc_ltc0_ltss_g_elpg_r());

		if (ltc_ltc0_ltss_g_elpg_flush_v(data) ==
		    ltc_ltc0_ltss_g_elpg_flush_pending_v()) {
			gk20a_dbg_info("g_elpg_flush 0x%x", data);
			retry--;
			usleep_range(20, 40);
		} else
			break;
	} while (retry >= 0 || !tegra_platform_is_silicon());

	if (retry < 0)
		gk20a_warn(dev_from_gk20a(g),
			    "g_elpg_flush too many retries");

}
int vgpu_gr_isr(struct gk20a *g, struct tegra_vgpu_gr_intr_info *info)
{
	struct fifo_gk20a *f = &g->fifo;
	struct channel_gk20a *ch = &f->channel[info->chid];

	gk20a_dbg_fn("");
	if (info->type != TEGRA_VGPU_GR_INTR_NOTIFY &&
		info->type != TEGRA_VGPU_GR_INTR_SEMAPHORE)
		gk20a_err(dev_from_gk20a(g), "gr intr (%d) on ch %u",
			info->type, info->chid);

	switch (info->type) {
	case TEGRA_VGPU_GR_INTR_NOTIFY:
		wake_up(&ch->notifier_wq);
		break;
	case TEGRA_VGPU_GR_INTR_SEMAPHORE:
		gk20a_channel_event(ch);
		wake_up(&ch->semaphore_wq);
		break;
	case TEGRA_VGPU_GR_INTR_SEMAPHORE_TIMEOUT:
		gk20a_set_error_notifier(ch,
				NVGPU_CHANNEL_GR_SEMAPHORE_TIMEOUT);
		break;
	case TEGRA_VGPU_GR_INTR_ILLEGAL_NOTIFY:
		gk20a_set_error_notifier(ch,
					NVGPU_CHANNEL_GR_ILLEGAL_NOTIFY);
	case TEGRA_VGPU_GR_INTR_ILLEGAL_METHOD:
		break;
	case TEGRA_VGPU_GR_INTR_ILLEGAL_CLASS:
		gk20a_set_error_notifier(ch,
					NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY);
		break;
	case TEGRA_VGPU_GR_INTR_FECS_ERROR:
		break;
	case TEGRA_VGPU_GR_INTR_CLASS_ERROR:
		gk20a_set_error_notifier(ch,
					NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY);
		break;
	case TEGRA_VGPU_GR_INTR_FIRMWARE_METHOD:
		gk20a_set_error_notifier(ch,
				NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY);
		break;
	case TEGRA_VGPU_GR_INTR_EXCEPTION:
		gk20a_set_error_notifier(ch,
				NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY);
		break;
	default:
		WARN_ON(1);
		break;
	}

	return 0;
}
示例#7
0
u32 gm20b_ltc_cbc_fix_config(struct gk20a *g, int base)
{
	u32 val = gk20a_readl(g, ltc_ltcs_ltss_cbc_num_active_ltcs_r());
	if (val == 2) {
		return base * 2;
	} else if (val != 1) {
		gk20a_err(dev_from_gk20a(g),
			"Invalid number of active ltcs: %08x\n", val);
	}

	return base;
}
示例#8
0
void gk20a_priv_ring_isr(struct gk20a *g)
{
	u32 status0, status1;
	u32 cmd;
	s32 retry = 100;

	if (tegra_platform_is_linsim())
		return;

	status0 = gk20a_readl(g, pri_ringmaster_intr_status0_r());
	status1 = gk20a_readl(g, pri_ringmaster_intr_status1_r());

	gk20a_dbg(gpu_dbg_intr, "ringmaster intr status0: 0x%08x,"
		"status1: 0x%08x", status0, status1);

	if (status0 & (0x1 | 0x2 | 0x4)) {
		gk20a_reset_priv_ring(g);
	}

	if (status0 & 0x100) {
		gk20a_dbg(gpu_dbg_intr, "SYS write error. ADR %08x WRDAT %08x INFO %08x, CODE %08x",
			gk20a_readl(g, 0x122120), gk20a_readl(g, 0x122124), gk20a_readl(g, 0x122128),
			gk20a_readl(g, 0x12212c));
	}

	if (status1 & 0x1) {
		gk20a_dbg(gpu_dbg_intr, "GPC write error. ADR %08x WRDAT %08x INFO %08x, CODE %08x",
			gk20a_readl(g, 0x128120), gk20a_readl(g, 0x128124), gk20a_readl(g, 0x128128),
			gk20a_readl(g, 0x12812c));
	}

	cmd = gk20a_readl(g, pri_ringmaster_command_r());
	cmd = set_field(cmd, pri_ringmaster_command_cmd_m(),
		pri_ringmaster_command_cmd_ack_interrupt_f());
	gk20a_writel(g, pri_ringmaster_command_r(), cmd);

	do {
		cmd = pri_ringmaster_command_cmd_v(
			gk20a_readl(g, pri_ringmaster_command_r()));
		usleep_range(20, 40);
	} while (cmd != pri_ringmaster_command_cmd_no_cmd_v() && --retry);

	if (retry <= 0)
		gk20a_warn(dev_from_gk20a(g),
			"priv ringmaster cmd ack too many retries");

	status0 = gk20a_readl(g, pri_ringmaster_intr_status0_r());
	status1 = gk20a_readl(g, pri_ringmaster_intr_status1_r());

	gk20a_dbg_info("ringmaster intr status0: 0x%08x,"
		" status1: 0x%08x", status0, status1);
}
static u32 vgpu_gr_get_fbp_en_mask(struct gk20a *g)
{
	struct gk20a_platform *platform = gk20a_get_platform(g->dev);
	u32 fbp_en_mask = 0;

	gk20a_dbg_fn("");

	if (vgpu_get_attribute(platform->virt_handle,
			TEGRA_VGPU_ATTRIB_FBP_EN_MASK, &fbp_en_mask))
		gk20a_err(dev_from_gk20a(g), "failed to retrieve fbp en mask");

	return fbp_en_mask;
}
static u32 vgpu_gr_get_max_fbps_count(struct gk20a *g)
{
	struct gk20a_platform *platform = gk20a_get_platform(g->dev);
	u32 max_fbps_count = 0;

	gk20a_dbg_fn("");

	if (vgpu_get_attribute(platform->virt_handle,
			TEGRA_VGPU_ATTRIB_NUM_FBPS, &max_fbps_count))
		gk20a_err(dev_from_gk20a(g), "failed to retrieve num fbps");

	return max_fbps_count;
}
示例#11
0
static int vgpu_determine_L2_size_bytes(struct gk20a *g)
{
	struct gk20a_platform *platform = gk20a_get_platform(g->dev);
	u32 cache_size = 0;

	gk20a_dbg_fn("");

	if (vgpu_get_attribute(platform->virt_handle,
			TEGRA_VGPU_ATTRIB_L2_SIZE, &cache_size))
		dev_err(dev_from_gk20a(g), "unable to get L2 size");

	return cache_size;
}
示例#12
0
void gm20b_ltc_isr(struct gk20a *g)
{
	u32 mc_intr, ltc_intr;
	int ltc, slice;

	mc_intr = gk20a_readl(g, mc_intr_ltc_r());
	gk20a_err(dev_from_gk20a(g), "mc_ltc_intr: %08x",
		  mc_intr);
	for (ltc = 0; ltc < g->ltc_count; ltc++) {
		if ((mc_intr & 1 << ltc) == 0)
			continue;
		for (slice = 0; slice < g->gr.slices_per_ltc; slice++) {
			ltc_intr = gk20a_readl(g, ltc_ltc0_lts0_intr_r() +
					   proj_ltc_stride_v() * ltc +
					   proj_lts_stride_v() * slice);
			gk20a_err(dev_from_gk20a(g), "ltc%d, slice %d: %08x",
				  ltc, slice, ltc_intr);
			gk20a_writel(g, ltc_ltc0_lts0_intr_r() +
					   proj_ltc_stride_v() * ltc +
					   proj_lts_stride_v() * slice,
				     ltc_intr);
		}
	}
}
示例#13
0
static int gk20a_init_clk_setup_sw(struct gk20a *g)
{
	struct clk_gk20a *clk = &g->clk;
	static int initialized;
	struct clk *ref;
	unsigned long ref_rate;

	gk20a_dbg_fn("");

	if (clk->sw_ready) {
		gk20a_dbg_fn("skip init");
		return 0;
	}

	if (!gk20a_clk_get(g))
		return -EINVAL;

	ref = clk_get_parent(clk_get_parent(clk->tegra_clk));
	if (IS_ERR(ref)) {
		gk20a_err(dev_from_gk20a(g),
			"failed to get GPCPLL reference clock");
		return -EINVAL;
	}
	ref_rate = clk_get_rate(ref);

	clk->pll_delay = 300; /* usec */

	clk->gpc_pll.id = GK20A_GPC_PLL;
	clk->gpc_pll.clk_in = ref_rate / KHZ;

	/* Decide initial frequency */
	if (!initialized) {
		initialized = 1;
		clk->gpc_pll.M = 1;
		clk->gpc_pll.N = DIV_ROUND_UP(gpc_pll_params.min_vco,
					clk->gpc_pll.clk_in);
		clk->gpc_pll.PL = 1;
		clk->gpc_pll.freq = clk->gpc_pll.clk_in * clk->gpc_pll.N;
		clk->gpc_pll.freq /= pl_to_div[clk->gpc_pll.PL];
	}

	mutex_init(&clk->clk_mutex);

	clk->sw_ready = true;

	gk20a_dbg_fn("done");
	return 0;
}
示例#14
0
struct clk *gk20a_clk_get(struct gk20a *g)
{
	if (!g->clk.tegra_clk) {
		struct clk *clk;

		clk = clk_get_sys("tegra_gk20a", "gpu");
		if (IS_ERR(clk)) {
			nvhost_err(dev_from_gk20a(g),
				"fail to get tegra gpu clk tegra_gk20a/gpu");
			return NULL;
		}
		g->clk.tegra_clk = clk;
	}

	return g->clk.tegra_clk;
}
示例#15
0
void gm20b_ltc_g_elpg_flush_locked(struct gk20a *g)
{
	u32 data;
	bool done[g->ltc_count];
	s32 retry = 100;
	int i;
	int num_done = 0;
	u32 ltc_d = ltc_ltc1_ltss_g_elpg_r() - ltc_ltc0_ltss_g_elpg_r();

	gk20a_dbg_fn("");

	trace_gk20a_mm_g_elpg_flush_locked(g->dev->name);

	for (i = 0; i < g->ltc_count; i++)
		done[i] = 0;

	gk20a_writel(g, ltc_ltcs_ltss_g_elpg_r(),
		     ltc_ltcs_ltss_g_elpg_flush_pending_f());
	do {
		for (i = 0; i < g->ltc_count; i++) {
			if (done[i])
				continue;

			data = gk20a_readl(g,
					ltc_ltc0_ltss_g_elpg_r() + ltc_d * i);

			if (ltc_ltc0_ltss_g_elpg_flush_v(data)) {
				gk20a_dbg_info("g_elpg_flush 0x%x", data);
			} else {
				done[i] = 1;
				num_done++;
			}
		}

		if (num_done < g->ltc_count) {
			retry--;
			udelay(5);
		} else
			break;
	} while (retry >= 0 || !tegra_platform_is_silicon());

	if (retry < 0 && tegra_platform_is_silicon())
		gk20a_warn(dev_from_gk20a(g),
			    "g_elpg_flush too many retries");

	trace_gk20a_mm_g_elpg_flush_locked_done(g->dev->name);
}
static int vgpu_gr_init_gr_setup_sw(struct gk20a *g)
{
	struct gr_gk20a *gr = &g->gr;
	int err;

	gk20a_dbg_fn("");

	if (gr->sw_ready) {
		gk20a_dbg_fn("skip init");
		return 0;
	}

	gr->g = g;

	err = vgpu_gr_init_gr_config(g, gr);
	if (err)
		goto clean_up;

	err = vgpu_gr_init_ctx_state(g, gr);
	if (err)
		goto clean_up;

	err = g->ops.ltc.init_comptags(g, gr);
	if (err)
		goto clean_up;

	err = vgpu_gr_alloc_global_ctx_buffers(g);
	if (err)
		goto clean_up;

	mutex_init(&gr->ctx_mutex);

	gr->remove_support = vgpu_remove_gr_support;
	gr->sw_ready = true;

	gk20a_dbg_fn("done");
	return 0;

clean_up:
	gk20a_err(dev_from_gk20a(g), "fail");
	vgpu_remove_gr_support(gr);
	return err;
}
示例#17
0
struct clk *gk20a_clk_get(struct gk20a *g)
{
	if (!g->clk.tegra_clk) {
		struct clk *clk;
		char clk_dev_id[32];
		struct device *dev = dev_from_gk20a(g);

		snprintf(clk_dev_id, 32, "tegra_%s", dev_name(dev));

		clk = clk_get_sys(clk_dev_id, "gpu");
		if (IS_ERR(clk)) {
			gk20a_err(dev, "fail to get tegra gpu clk %s/gpu\n",
				  clk_dev_id);
			return NULL;
		}
		g->clk.tegra_clk = clk;
	}

	return g->clk.tegra_clk;
}
示例#18
0
static int set_pll_target(struct gk20a *g, u32 freq, u32 old_freq)
{
	struct clk_gk20a *clk = &g->clk;

	if (freq > gpc_pll_params.max_freq)
		freq = gpc_pll_params.max_freq;
	else if (freq < gpc_pll_params.min_freq)
		freq = gpc_pll_params.min_freq;

	if (freq != old_freq) {
		/* gpc_pll.freq is changed to new value here */
		if (clk_config_pll(clk, &clk->gpc_pll, &gpc_pll_params,
				   &freq, true)) {
			gk20a_err(dev_from_gk20a(g),
				   "failed to set pll target for %d", freq);
			return -EINVAL;
		}
	}
	return 0;
}
示例#19
0
static int gm20b_determine_L2_size_bytes(struct gk20a *g)
{
	u32 lts_per_ltc;
	u32 ways;
	u32 sets;
	u32 bytes_per_line;
	u32 active_ltcs;
	u32 cache_size;

	u32 tmp;
	u32 active_sets_value;

	tmp = gk20a_readl(g, ltc_ltc0_lts0_tstg_cfg1_r());
	ways = hweight32(ltc_ltc0_lts0_tstg_cfg1_active_ways_v(tmp));

	active_sets_value = ltc_ltc0_lts0_tstg_cfg1_active_sets_v(tmp);
	if (active_sets_value == ltc_ltc0_lts0_tstg_cfg1_active_sets_all_v()) {
		sets = 64;
	} else if (active_sets_value ==
		 ltc_ltc0_lts0_tstg_cfg1_active_sets_half_v()) {
		sets = 32;
	} else if (active_sets_value ==
		 ltc_ltc0_lts0_tstg_cfg1_active_sets_quarter_v()) {
		sets = 16;
	} else {
		dev_err(dev_from_gk20a(g),
			"Unknown constant %u for active sets",
		       (unsigned)active_sets_value);
		sets = 0;
	}

	active_ltcs = g->gr.num_fbps;

	/* chip-specific values */
	lts_per_ltc = 2;
	bytes_per_line = 128;
	cache_size = active_ltcs * lts_per_ltc * ways * sets * bytes_per_line;

	return cache_size;
}
static int vgpu_gr_init_gr_config(struct gk20a *g, struct gr_gk20a *gr)
{
	struct gk20a_platform *platform = gk20a_get_platform(g->dev);
	u32 gpc_index;

	gk20a_dbg_fn("");

	if (vgpu_get_attribute(platform->virt_handle,
			TEGRA_VGPU_ATTRIB_GPC_COUNT, &gr->gpc_count))
		return -ENOMEM;

	if (vgpu_get_attribute(platform->virt_handle,
			TEGRA_VGPU_ATTRIB_MAX_TPC_PER_GPC_COUNT,
			&gr->max_tpc_per_gpc_count))
		return -ENOMEM;

	if (vgpu_get_attribute(platform->virt_handle,
			TEGRA_VGPU_ATTRIB_MAX_TPC_COUNT,
			&gr->max_tpc_count))
		return -ENOMEM;

	gr->gpc_tpc_mask = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL);
	if (!gr->gpc_tpc_mask) {
		gk20a_err(dev_from_gk20a(g), "%s: out of memory\n", __func__);
		return -ENOMEM;
	}

	for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
		if (g->ops.gr.get_gpc_tpc_mask)
			gr->gpc_tpc_mask[gpc_index] =
				g->ops.gr.get_gpc_tpc_mask(g, gpc_index);
	}

	g->ops.gr.bundle_cb_defaults(g);
	g->ops.gr.cb_size_default(g);
	g->ops.gr.calc_global_ctx_buffer_size(g);
	return 0;
}
示例#21
0
void gk20a_priv_ring_isr(struct gk20a *g)
{
	u32 status0, status1;
	u32 cmd;
	s32 retry = 100;

	status0 = gk20a_readl(g, pri_ringmaster_intr_status0_r());
	status1 = gk20a_readl(g, pri_ringmaster_intr_status1_r());

	gk20a_dbg_info("ringmaster intr status0: 0x%08x,"
		"status1: 0x%08x", status0, status1);

	if (status0 & (0x1 | 0x2 | 0x4)) {
		gk20a_reset_priv_ring(g);
	}

	cmd = gk20a_readl(g, pri_ringmaster_command_r());
	cmd = set_field(cmd, pri_ringmaster_command_cmd_m(),
		pri_ringmaster_command_cmd_ack_interrupt_f());
	gk20a_writel(g, pri_ringmaster_command_r(), cmd);

	do {
		cmd = pri_ringmaster_command_cmd_v(
			gk20a_readl(g, pri_ringmaster_command_r()));
		usleep_range(20, 40);
	} while (cmd != pri_ringmaster_command_cmd_no_cmd_v() && --retry);

	if (retry <= 0)
		gk20a_warn(dev_from_gk20a(g),
			"priv ringmaster cmd ack too many retries");

	status0 = gk20a_readl(g, pri_ringmaster_intr_status0_r());
	status1 = gk20a_readl(g, pri_ringmaster_intr_status1_r());

	gk20a_dbg_info("ringmaster intr status0: 0x%08x,"
		" status1: 0x%08x", status0, status1);
}
示例#22
0
static int set_pll_freq(struct gk20a *g, u32 freq, u32 old_freq)
{
	struct clk_gk20a *clk = &g->clk;
	int err = 0;

	gk20a_dbg_fn("curr freq: %dMHz, target freq %dMHz", old_freq, freq);

	if ((freq == old_freq) && clk->gpc_pll.enabled)
		return 0;

	/* change frequency only if power is on */
	if (g->clk.clk_hw_on) {
		err = clk_program_gpc_pll(g, clk, 1);
		if (err)
			err = clk_program_gpc_pll(g, clk, 0);
	}

	/* Just report error but not restore PLL since dvfs could already change
	    voltage even when it returns error. */
	if (err)
		gk20a_err(dev_from_gk20a(g),
			"failed to set pll to %d", freq);
	return err;
}
示例#23
0
int gm20b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op,
		       u32 min, u32 max)
{
	int err = 0;
	struct gr_gk20a *gr = &g->gr;
	u32 ltc, slice, ctrl1, val, hw_op = 0;
	s32 retry = 200;
	u32 slices_per_ltc = ltc_ltcs_ltss_cbc_param_slices_per_ltc_v(
				gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r()));

	gk20a_dbg_fn("");

	trace_gk20a_ltc_cbc_ctrl_start(g->dev->name, op, min, max);

	if (gr->compbit_store.mem.size == 0)
		return 0;

	mutex_lock(&g->mm.l2_op_lock);

	if (op == gk20a_cbc_op_clear) {
		gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl2_r(),
			ltc_ltcs_ltss_cbc_ctrl2_clear_lower_bound_f(min));
		gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl3_r(),
			ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_f(max));
		hw_op = ltc_ltcs_ltss_cbc_ctrl1_clear_active_f();
	} else if (op == gk20a_cbc_op_clean) {
		hw_op = ltc_ltcs_ltss_cbc_ctrl1_clean_active_f();
	} else if (op == gk20a_cbc_op_invalidate) {
		hw_op = ltc_ltcs_ltss_cbc_ctrl1_invalidate_active_f();
	} else {
		BUG_ON(1);
	}
	gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl1_r(),
		     gk20a_readl(g, ltc_ltcs_ltss_cbc_ctrl1_r()) | hw_op);

	for (ltc = 0; ltc < g->ltc_count; ltc++) {
		for (slice = 0; slice < slices_per_ltc; slice++) {

			ctrl1 = ltc_ltc0_lts0_cbc_ctrl1_r() +
				ltc * proj_ltc_stride_v() +
				slice * proj_lts_stride_v();

			retry = 200;
			do {
				val = gk20a_readl(g, ctrl1);
				if (!(val & hw_op))
					break;
				retry--;
				udelay(5);

			} while (retry >= 0 ||
					!tegra_platform_is_silicon());

			if (retry < 0 && tegra_platform_is_silicon()) {
				gk20a_err(dev_from_gk20a(g),
					   "comp tag clear timeout\n");
				err = -EBUSY;
				goto out;
			}
		}
	}
out:
	trace_gk20a_ltc_cbc_ctrl_done(g->dev->name);
	mutex_unlock(&g->mm.l2_op_lock);
	return err;
}
示例#24
0
long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
	struct platform_device *dev = filp->private_data;
	struct gk20a *g = get_gk20a(dev);
	struct nvhost_gpu_zcull_get_ctx_size_args *get_ctx_size_args;
	struct nvhost_gpu_zcull_get_info_args *get_info_args;
	struct nvhost_gpu_zbc_set_table_args *set_table_args;
	struct nvhost_gpu_zbc_query_table_args *query_table_args;
	u8 buf[NVHOST_GPU_IOCTL_MAX_ARG_SIZE];
	struct gr_zcull_info *zcull_info;
	struct zbc_entry *zbc_val;
	struct zbc_query_params *zbc_tbl;
	int i, err = 0;

	nvhost_dbg_fn("");

	if ((_IOC_TYPE(cmd) != NVHOST_GPU_IOCTL_MAGIC) ||
		(_IOC_NR(cmd) == 0) ||
		(_IOC_NR(cmd) > NVHOST_GPU_IOCTL_LAST))
		return -EFAULT;

	BUG_ON(_IOC_SIZE(cmd) > NVHOST_GPU_IOCTL_MAX_ARG_SIZE);

	if (_IOC_DIR(cmd) & _IOC_WRITE) {
		if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd)))
			return -EFAULT;
	}

	if (!g->gr.sw_ready) {
		gk20a_busy(g->dev);
		gk20a_idle(g->dev);
	}

	switch (cmd) {
	case NVHOST_GPU_IOCTL_ZCULL_GET_CTX_SIZE:
		get_ctx_size_args = (struct nvhost_gpu_zcull_get_ctx_size_args *)buf;

		get_ctx_size_args->size = gr_gk20a_get_ctxsw_zcull_size(g, &g->gr);

		break;
	case NVHOST_GPU_IOCTL_ZCULL_GET_INFO:
		get_info_args = (struct nvhost_gpu_zcull_get_info_args *)buf;

		memset(get_info_args, 0, sizeof(struct nvhost_gpu_zcull_get_info_args));

		zcull_info = kzalloc(sizeof(struct gr_zcull_info), GFP_KERNEL);
		if (zcull_info == NULL)
			return -ENOMEM;

		err = gr_gk20a_get_zcull_info(g, &g->gr, zcull_info);
		if (err)
			break;

		get_info_args->width_align_pixels = zcull_info->width_align_pixels;
		get_info_args->height_align_pixels = zcull_info->height_align_pixels;
		get_info_args->pixel_squares_by_aliquots = zcull_info->pixel_squares_by_aliquots;
		get_info_args->aliquot_total = zcull_info->aliquot_total;
		get_info_args->region_byte_multiplier = zcull_info->region_byte_multiplier;
		get_info_args->region_header_size = zcull_info->region_header_size;
		get_info_args->subregion_header_size = zcull_info->subregion_header_size;
		get_info_args->subregion_width_align_pixels = zcull_info->subregion_width_align_pixels;
		get_info_args->subregion_height_align_pixels = zcull_info->subregion_height_align_pixels;
		get_info_args->subregion_count = zcull_info->subregion_count;

		if (zcull_info)
			kfree(zcull_info);
		break;
	case NVHOST_GPU_IOCTL_ZBC_SET_TABLE:
		set_table_args = (struct nvhost_gpu_zbc_set_table_args *)buf;

		zbc_val = kzalloc(sizeof(struct zbc_entry), GFP_KERNEL);
		if (zbc_val == NULL)
			return -ENOMEM;

		zbc_val->format = set_table_args->format;
		zbc_val->type = set_table_args->type;

		switch (zbc_val->type) {
		case GK20A_ZBC_TYPE_COLOR:
			for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) {
				zbc_val->color_ds[i] = set_table_args->color_ds[i];
				zbc_val->color_l2[i] = set_table_args->color_l2[i];
			}
			break;
		case GK20A_ZBC_TYPE_DEPTH:
			zbc_val->depth = set_table_args->depth;
			break;
		default:
			err = -EINVAL;
		}

		if (!err) {
			gk20a_busy(dev);
			err = gk20a_gr_zbc_set_table(g, &g->gr, zbc_val);
			gk20a_idle(dev);
		}

		if (zbc_val)
			kfree(zbc_val);
		break;
	case NVHOST_GPU_IOCTL_ZBC_QUERY_TABLE:
		query_table_args = (struct nvhost_gpu_zbc_query_table_args *)buf;

		zbc_tbl = kzalloc(sizeof(struct zbc_query_params), GFP_KERNEL);
		if (zbc_tbl == NULL)
			return -ENOMEM;

		zbc_tbl->type = query_table_args->type;
		zbc_tbl->index_size = query_table_args->index_size;

		err = gr_gk20a_query_zbc(g, &g->gr, zbc_tbl);

		if (!err) {
			switch (zbc_tbl->type) {
			case GK20A_ZBC_TYPE_COLOR:
				for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) {
					query_table_args->color_ds[i] = zbc_tbl->color_ds[i];
					query_table_args->color_l2[i] = zbc_tbl->color_l2[i];
				}
				break;
			case GK20A_ZBC_TYPE_DEPTH:
				query_table_args->depth = zbc_tbl->depth;
				break;
			case GK20A_ZBC_TYPE_INVALID:
				query_table_args->index_size = zbc_tbl->index_size;
				break;
			default:
				err = -EINVAL;
			}
			if (!err) {
				query_table_args->format = zbc_tbl->format;
				query_table_args->ref_cnt = zbc_tbl->ref_cnt;
			}
		}

		if (zbc_tbl)
			kfree(zbc_tbl);
		break;

	case NVHOST_GPU_IOCTL_GET_CHARACTERISTICS:
		err = gk20a_ctrl_ioctl_gpu_characteristics(
			g, (struct nvhost_gpu_get_characteristics *)buf);
		break;

	default:
		nvhost_err(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x", cmd);
		err = -ENOTTY;
		break;
	}

	if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
		err = copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd));

	return err;
}
int gr_gk20a_init_ctx_vars_sim(struct gk20a *g, struct gr_gk20a *gr)
{
	int err = 0;
	u32 i, temp;
	char *size_path  = NULL;
	char *reg_path   = NULL;
	char *value_path = NULL;

	gk20a_dbg(gpu_dbg_fn | gpu_dbg_info,
		   "querying grctx info from chiplib");

	g->gr.ctx_vars.dynamic = true;
	g->gr.netlist = GR_NETLIST_DYNAMIC;

	/* query sizes and counts */
	gk20a_sim_esc_readl(g, "GRCTX_UCODE_INST_FECS_COUNT", 0,
			    &g->gr.ctx_vars.ucode.fecs.inst.count);
	gk20a_sim_esc_readl(g, "GRCTX_UCODE_DATA_FECS_COUNT", 0,
			    &g->gr.ctx_vars.ucode.fecs.data.count);
	gk20a_sim_esc_readl(g, "GRCTX_UCODE_INST_GPCCS_COUNT", 0,
			    &g->gr.ctx_vars.ucode.gpccs.inst.count);
	gk20a_sim_esc_readl(g, "GRCTX_UCODE_DATA_GPCCS_COUNT", 0,
			    &g->gr.ctx_vars.ucode.gpccs.data.count);
	gk20a_sim_esc_readl(g, "GRCTX_ALL_CTX_TOTAL_WORDS", 0, &temp);
	g->gr.ctx_vars.buffer_size = temp << 2;
	gk20a_sim_esc_readl(g, "GRCTX_SW_BUNDLE_INIT_SIZE", 0,
			    &g->gr.ctx_vars.sw_bundle_init.count);
	gk20a_sim_esc_readl(g, "GRCTX_SW_METHOD_INIT_SIZE", 0,
			    &g->gr.ctx_vars.sw_method_init.count);
	gk20a_sim_esc_readl(g, "GRCTX_SW_CTX_LOAD_SIZE", 0,
			    &g->gr.ctx_vars.sw_ctx_load.count);

	switch (0) { /*g->gr.ctx_vars.reg_init_override)*/
#if 0
	case NV_REG_STR_RM_GR_REG_INIT_OVERRIDE_PROD_DIFF:
		sizePath   = "GRCTX_NONCTXSW_PROD_DIFF_REG_SIZE";
		regPath    = "GRCTX_NONCTXSW_PROD_DIFF_REG:REG";
		valuePath  = "GRCTX_NONCTXSW_PROD_DIFF_REG:VALUE";
		break;
#endif
	default:
		size_path   = "GRCTX_NONCTXSW_REG_SIZE";
		reg_path    = "GRCTX_NONCTXSW_REG:REG";
		value_path  = "GRCTX_NONCTXSW_REG:VALUE";
		break;
	}

	gk20a_sim_esc_readl(g, size_path, 0,
			    &g->gr.ctx_vars.sw_non_ctx_load.count);

	gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_SYS_COUNT", 0,
			    &g->gr.ctx_vars.ctxsw_regs.sys.count);
	gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_GPC_COUNT", 0,
			    &g->gr.ctx_vars.ctxsw_regs.gpc.count);
	gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_TPC_COUNT", 0,
			    &g->gr.ctx_vars.ctxsw_regs.tpc.count);
#if 0
	/* looks to be unused, actually chokes the sim */
	gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PPC_COUNT", 0,
			    &g->gr.ctx_vars.ctxsw_regs.ppc.count);
#endif
	gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_ZCULL_GPC_COUNT", 0,
			    &g->gr.ctx_vars.ctxsw_regs.zcull_gpc.count);
	gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_SYS_COUNT", 0,
			    &g->gr.ctx_vars.ctxsw_regs.pm_sys.count);
	gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_GPC_COUNT", 0,
			    &g->gr.ctx_vars.ctxsw_regs.pm_gpc.count);
	gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_TPC_COUNT", 0,
			    &g->gr.ctx_vars.ctxsw_regs.pm_tpc.count);

	err |= !alloc_u32_list_gk20a(&g->gr.ctx_vars.ucode.fecs.inst);
	err |= !alloc_u32_list_gk20a(&g->gr.ctx_vars.ucode.fecs.data);
	err |= !alloc_u32_list_gk20a(&g->gr.ctx_vars.ucode.gpccs.inst);
	err |= !alloc_u32_list_gk20a(&g->gr.ctx_vars.ucode.gpccs.data);
	err |= !alloc_av_list_gk20a(&g->gr.ctx_vars.sw_bundle_init);
	err |= !alloc_av_list_gk20a(&g->gr.ctx_vars.sw_method_init);
	err |= !alloc_aiv_list_gk20a(&g->gr.ctx_vars.sw_ctx_load);
	err |= !alloc_av_list_gk20a(&g->gr.ctx_vars.sw_non_ctx_load);
	err |= !alloc_aiv_list_gk20a(&g->gr.ctx_vars.ctxsw_regs.sys);
	err |= !alloc_aiv_list_gk20a(&g->gr.ctx_vars.ctxsw_regs.gpc);
	err |= !alloc_aiv_list_gk20a(&g->gr.ctx_vars.ctxsw_regs.tpc);
	err |= !alloc_aiv_list_gk20a(&g->gr.ctx_vars.ctxsw_regs.zcull_gpc);
	err |= !alloc_aiv_list_gk20a(&g->gr.ctx_vars.ctxsw_regs.ppc);
	err |= !alloc_aiv_list_gk20a(&g->gr.ctx_vars.ctxsw_regs.pm_sys);
	err |= !alloc_aiv_list_gk20a(&g->gr.ctx_vars.ctxsw_regs.pm_gpc);
	err |= !alloc_aiv_list_gk20a(&g->gr.ctx_vars.ctxsw_regs.pm_tpc);

	if (err)
		goto fail;

	for (i = 0; i < g->gr.ctx_vars.ucode.fecs.inst.count; i++)
		gk20a_sim_esc_readl(g, "GRCTX_UCODE_INST_FECS",
				    i, &g->gr.ctx_vars.ucode.fecs.inst.l[i]);

	for (i = 0; i < g->gr.ctx_vars.ucode.fecs.data.count; i++)
		gk20a_sim_esc_readl(g, "GRCTX_UCODE_DATA_FECS",
				    i, &g->gr.ctx_vars.ucode.fecs.data.l[i]);

	for (i = 0; i < g->gr.ctx_vars.ucode.gpccs.inst.count; i++)
		gk20a_sim_esc_readl(g, "GRCTX_UCODE_INST_GPCCS",
				    i, &g->gr.ctx_vars.ucode.gpccs.inst.l[i]);

	for (i = 0; i < g->gr.ctx_vars.ucode.gpccs.data.count; i++)
		gk20a_sim_esc_readl(g, "GRCTX_UCODE_DATA_GPCCS",
				    i, &g->gr.ctx_vars.ucode.gpccs.data.l[i]);

	for (i = 0; i < g->gr.ctx_vars.sw_bundle_init.count; i++) {
		struct av_gk20a *l = g->gr.ctx_vars.sw_bundle_init.l;
		gk20a_sim_esc_readl(g, "GRCTX_SW_BUNDLE_INIT:ADDR",
				    i, &l[i].addr);
		gk20a_sim_esc_readl(g, "GRCTX_SW_BUNDLE_INIT:VALUE",
				    i, &l[i].value);
	}

	for (i = 0; i < g->gr.ctx_vars.sw_method_init.count; i++) {
		struct av_gk20a *l = g->gr.ctx_vars.sw_method_init.l;
		gk20a_sim_esc_readl(g, "GRCTX_SW_METHOD_INIT:ADDR",
				    i, &l[i].addr);
		gk20a_sim_esc_readl(g, "GRCTX_SW_METHOD_INIT:VALUE",
				    i, &l[i].value);
	}

	for (i = 0; i < g->gr.ctx_vars.sw_ctx_load.count; i++) {
		struct aiv_gk20a *l = g->gr.ctx_vars.sw_ctx_load.l;
		gk20a_sim_esc_readl(g, "GRCTX_SW_CTX_LOAD:ADDR",
				    i, &l[i].addr);
		gk20a_sim_esc_readl(g, "GRCTX_SW_CTX_LOAD:INDEX",
				    i, &l[i].index);
		gk20a_sim_esc_readl(g, "GRCTX_SW_CTX_LOAD:VALUE",
				    i, &l[i].value);
	}

	for (i = 0; i < g->gr.ctx_vars.sw_non_ctx_load.count; i++) {
		struct av_gk20a *l = g->gr.ctx_vars.sw_non_ctx_load.l;
		gk20a_sim_esc_readl(g, reg_path, i, &l[i].addr);
		gk20a_sim_esc_readl(g, value_path, i, &l[i].value);
	}

	for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.sys.count; i++) {
		struct aiv_gk20a *l = g->gr.ctx_vars.ctxsw_regs.sys.l;
		gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_SYS:ADDR",
				    i, &l[i].addr);
		gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_SYS:INDEX",
				    i, &l[i].index);
		gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_SYS:VALUE",
				    i, &l[i].value);
	}

	for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.gpc.count; i++) {
		struct aiv_gk20a *l = g->gr.ctx_vars.ctxsw_regs.gpc.l;
		gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_GPC:ADDR",
				    i, &l[i].addr);
		gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_GPC:INDEX",
				    i, &l[i].index);
		gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_GPC:VALUE",
				    i, &l[i].value);
	}

	for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.tpc.count; i++) {
		struct aiv_gk20a *l = g->gr.ctx_vars.ctxsw_regs.tpc.l;
		gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_TPC:ADDR",
				    i, &l[i].addr);
		gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_TPC:INDEX",
				    i, &l[i].index);
		gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_TPC:VALUE",
				    i, &l[i].value);
	}

	for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.ppc.count; i++) {
		struct aiv_gk20a *l = g->gr.ctx_vars.ctxsw_regs.ppc.l;
		gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PPC:ADDR",
				    i, &l[i].addr);
		gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PPC:INDEX",
				    i, &l[i].index);
		gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PPC:VALUE",
				    i, &l[i].value);
	}

	for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.zcull_gpc.count; i++) {
		struct aiv_gk20a *l = g->gr.ctx_vars.ctxsw_regs.zcull_gpc.l;
		gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_ZCULL_GPC:ADDR",
				    i, &l[i].addr);
		gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_ZCULL_GPC:INDEX",
				    i, &l[i].index);
		gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_ZCULL_GPC:VALUE",
				    i, &l[i].value);
	}

	for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.pm_sys.count; i++) {
		struct aiv_gk20a *l = g->gr.ctx_vars.ctxsw_regs.pm_sys.l;
		gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_SYS:ADDR",
				    i, &l[i].addr);
		gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_SYS:INDEX",
				    i, &l[i].index);
		gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_SYS:VALUE",
				    i, &l[i].value);
	}

	for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.pm_gpc.count; i++) {
		struct aiv_gk20a *l = g->gr.ctx_vars.ctxsw_regs.pm_gpc.l;
		gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_GPC:ADDR",
				    i, &l[i].addr);
		gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_GPC:INDEX",
				    i, &l[i].index);
		gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_GPC:VALUE",
				    i, &l[i].value);
	}

	for (i = 0; i < g->gr.ctx_vars.ctxsw_regs.pm_tpc.count; i++) {
		struct aiv_gk20a *l = g->gr.ctx_vars.ctxsw_regs.pm_tpc.l;
		gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_TPC:ADDR",
				    i, &l[i].addr);
		gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_TPC:INDEX",
				    i, &l[i].index);
		gk20a_sim_esc_readl(g, "GRCTX_REG_LIST_PM_TPC:VALUE",
				    i, &l[i].value);
	}

	g->gr.ctx_vars.valid = true;

	gk20a_sim_esc_readl(g, "GRCTX_GEN_CTX_REGS_BASE_INDEX", 0,
			    &g->gr.ctx_vars.regs_base_index);

	gk20a_dbg(gpu_dbg_info | gpu_dbg_fn, "finished querying grctx info from chiplib");
	return 0;
fail:
	gk20a_err(dev_from_gk20a(g),
		   "failed querying grctx info from chiplib");
	return err;

}
示例#26
0
long gk20a_as_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
	int err = 0;
	struct gk20a_as_share *as_share = filp->private_data;
	struct gk20a *g = gk20a_from_as(as_share->as);

	u8 buf[NVHOST_AS_IOCTL_MAX_ARG_SIZE];

	if ((_IOC_TYPE(cmd) != NVHOST_AS_IOCTL_MAGIC) ||
		(_IOC_NR(cmd) == 0) ||
		(_IOC_NR(cmd) > NVHOST_AS_IOCTL_LAST))
		return -EFAULT;

	BUG_ON(_IOC_SIZE(cmd) > NVHOST_AS_IOCTL_MAX_ARG_SIZE);

	if (_IOC_DIR(cmd) & _IOC_WRITE) {
		if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd)))
			return -EFAULT;
	}

	err = gk20a_busy(g->dev);
	if (err)
		return err;

	switch (cmd) {
	case NVHOST_AS_IOCTL_BIND_CHANNEL:
		trace_gk20a_as_ioctl_bind_channel(dev_name(dev_from_gk20a(g)));
		err = gk20a_as_ioctl_bind_channel(as_share,
			       (struct nvhost_as_bind_channel_args *)buf);

		break;
	case NVHOST32_AS_IOCTL_ALLOC_SPACE:
	{
		struct nvhost32_as_alloc_space_args *args32 =
			(struct nvhost32_as_alloc_space_args *)buf;
		struct nvhost_as_alloc_space_args args;

		args.pages = args32->pages;
		args.page_size = args32->page_size;
		args.flags = args32->flags;
		args.o_a.offset = args32->o_a.offset;
		trace_gk20a_as_ioctl_alloc_space(dev_name(dev_from_gk20a(g)));
		err = gk20a_as_ioctl_alloc_space(as_share, &args);
		args32->o_a.offset = args.o_a.offset;
		break;
	}
	case NVHOST_AS_IOCTL_ALLOC_SPACE:
		trace_gk20a_as_ioctl_alloc_space(dev_name(dev_from_gk20a(g)));
		err = gk20a_as_ioctl_alloc_space(as_share,
				(struct nvhost_as_alloc_space_args *)buf);
		break;
	case NVHOST_AS_IOCTL_FREE_SPACE:
		trace_gk20a_as_ioctl_free_space(dev_name(dev_from_gk20a(g)));
		err = gk20a_as_ioctl_free_space(as_share,
				(struct nvhost_as_free_space_args *)buf);
		break;
	case NVHOST_AS_IOCTL_MAP_BUFFER:
		trace_gk20a_as_ioctl_map_buffer(dev_name(dev_from_gk20a(g)));
		err = gk20a_as_ioctl_map_buffer(as_share,
				(struct nvhost_as_map_buffer_args *)buf);
		break;
	case NVHOST_AS_IOCTL_MAP_BUFFER_EX:
		trace_gk20a_as_ioctl_map_buffer(dev_name(dev_from_gk20a(g)));
		err = gk20a_as_ioctl_map_buffer_ex(as_share,
				(struct nvhost_as_map_buffer_ex_args *)buf);
		break;
	case NVHOST_AS_IOCTL_UNMAP_BUFFER:
		trace_gk20a_as_ioctl_unmap_buffer(dev_name(dev_from_gk20a(g)));
		err = gk20a_as_ioctl_unmap_buffer(as_share,
				(struct nvhost_as_unmap_buffer_args *)buf);
		break;
	default:
		dev_err(dev_from_gk20a(g), "unrecognized as ioctl: 0x%x", cmd);
		err = -ENOTTY;
		break;
	}

	gk20a_idle(g->dev);

	if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
		err = copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd));

	return err;
}
static int gr_gm20b_load_ctxsw_ucode(struct gk20a *g)
{
	u32 err, flags;
	u32 reg_offset = gr_gpcs_gpccs_falcon_hwcfg_r() -
	  gr_fecs_falcon_hwcfg_r();

	gk20a_dbg_fn("");

	if (tegra_platform_is_linsim()) {
		gk20a_writel(g, gr_fecs_ctxsw_mailbox_r(7),
			gr_fecs_ctxsw_mailbox_value_f(0xc0de7777));
		gk20a_writel(g, gr_gpccs_ctxsw_mailbox_r(7),
			gr_gpccs_ctxsw_mailbox_value_f(0xc0de7777));
	}

	flags = PMU_ACR_CMD_BOOTSTRAP_FALCON_FLAGS_RESET_YES;
	g->ops.pmu.lsfloadedfalconid = 0;
	if (g->ops.pmu.fecsbootstrapdone) {
		/* this must be recovery so bootstrap fecs and gpccs */
		if (!g->ops.securegpccs) {
			gr_gm20b_load_gpccs_with_bootloader(g);
			err = g->ops.pmu.load_lsfalcon_ucode(g,
					(1 << LSF_FALCON_ID_FECS));
		} else {
			/* bind WPR VA inst block */
			gr_gk20a_load_falcon_bind_instblk(g);
			err = g->ops.pmu.load_lsfalcon_ucode(g,
				(1 << LSF_FALCON_ID_FECS) |
				(1 << LSF_FALCON_ID_GPCCS));
		}
		if (err) {
			gk20a_err(dev_from_gk20a(g),
				"Unable to recover GR falcon");
			return err;
		}

	} else {
		/* cold boot or rg exit */
		g->ops.pmu.fecsbootstrapdone = true;
		if (!g->ops.securegpccs) {
			gr_gm20b_load_gpccs_with_bootloader(g);
		} else {
			/* bind WPR VA inst block */
			gr_gk20a_load_falcon_bind_instblk(g);
			err = g->ops.pmu.load_lsfalcon_ucode(g,
					(1 << LSF_FALCON_ID_GPCCS));
			if (err) {
				gk20a_err(dev_from_gk20a(g),
						"Unable to boot GPCCS\n");
				return err;
			}
		}
	}

	/*start gpccs */
	if (g->ops.securegpccs) {
		gk20a_writel(g, reg_offset +
			gr_fecs_cpuctl_alias_r(),
			gr_gpccs_cpuctl_startcpu_f(1));
	} else {
		gk20a_writel(g, gr_gpccs_dmactl_r(),
			gr_gpccs_dmactl_require_ctx_f(0));
		gk20a_writel(g, gr_gpccs_cpuctl_r(),
			gr_gpccs_cpuctl_startcpu_f(1));
	}
	/* start fecs */
	gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0), ~0x0);
	gk20a_writel(g, gr_fecs_ctxsw_mailbox_r(1), 0x1);
	gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(6), 0xffffffff);
	gk20a_writel(g, gr_fecs_cpuctl_alias_r(),
			gr_fecs_cpuctl_startcpu_f(1));
	gk20a_dbg_fn("done");

	return 0;
}
示例#28
0
static int gk20a_init_clk_setup_sw(struct gk20a *g)
{
	struct clk_gk20a *clk = &g->clk;
	static int initialized;
	unsigned long *freqs;
	int err, num_freqs;
	struct clk *ref;
	unsigned long ref_rate;

	nvhost_dbg_fn("");

	if (clk->sw_ready) {
		nvhost_dbg_fn("skip init");
		return 0;
	}

	if (!gk20a_clk_get(g))
		return -EINVAL;

	ref = clk_get_parent(clk_get_parent(clk->tegra_clk));
	if (IS_ERR(ref)) {
		nvhost_err(dev_from_gk20a(g),
			"failed to get GPCPLL reference clock");
		return -EINVAL;
	}
	ref_rate = clk_get_rate(ref);

	clk->pll_delay = 300; /* usec */

	clk->gpc_pll.id = GK20A_GPC_PLL;
	clk->gpc_pll.clk_in = ref_rate / 1000000; /* MHz */

	/* Decide initial frequency */
	if (!initialized) {
		initialized = 1;
		clk->gpc_pll.M = 1;
		clk->gpc_pll.N = DIV_ROUND_UP(gpc_pll_params.min_vco,
					clk->gpc_pll.clk_in);
		clk->gpc_pll.PL = 1;
		clk->gpc_pll.freq = clk->gpc_pll.clk_in * clk->gpc_pll.N;
		clk->gpc_pll.freq /= pl_to_div[clk->gpc_pll.PL];
	}

	err = tegra_dvfs_get_freqs(clk_get_parent(clk->tegra_clk),
				   &freqs, &num_freqs);
	if (!err) {
		int i, j;

		/* init j for inverse traversal of frequencies */
		j = num_freqs - 1;

		gpu_cooling_freq = kzalloc(
				(1 + num_freqs) * sizeof(*gpu_cooling_freq),
				GFP_KERNEL);

		/* store frequencies in inverse order */
		for (i = 0; i < num_freqs; ++i, --j) {
			gpu_cooling_freq[i].index = i;
			gpu_cooling_freq[i].frequency = freqs[j];
		}

		/* add 'end of table' marker */
		gpu_cooling_freq[i].index = i;
		gpu_cooling_freq[i].frequency = GPUFREQ_TABLE_END;

		/* store number of frequencies */
		num_gpu_cooling_freq = num_freqs + 1;
	}

	mutex_init(&clk->clk_mutex);

	clk->sw_ready = true;

	nvhost_dbg_fn("done");
	return 0;
}
示例#29
0
static int clk_slide_gpc_pll(struct gk20a *g, u32 n)
{
	u32 data, coeff;
	u32 nold;
	int ramp_timeout = 500;

	/* get old coefficients */
	coeff = gk20a_readl(g, trim_sys_gpcpll_coeff_r());
	nold = trim_sys_gpcpll_coeff_ndiv_v(coeff);

	/* do nothing if NDIV is same */
	if (n == nold)
		return 0;

	/* setup */
	data = gk20a_readl(g, trim_sys_gpcpll_cfg2_r());
	data = set_field(data, trim_sys_gpcpll_cfg2_pll_stepa_m(),
			trim_sys_gpcpll_cfg2_pll_stepa_f(0x2b));
	gk20a_writel(g, trim_sys_gpcpll_cfg2_r(), data);
	data = gk20a_readl(g, trim_sys_gpcpll_cfg3_r());
	data = set_field(data, trim_sys_gpcpll_cfg3_pll_stepb_m(),
			trim_sys_gpcpll_cfg3_pll_stepb_f(0xb));
	gk20a_writel(g, trim_sys_gpcpll_cfg3_r(), data);

	/* pll slowdown mode */
	data = gk20a_readl(g, trim_sys_gpcpll_ndiv_slowdown_r());
	data = set_field(data,
			trim_sys_gpcpll_ndiv_slowdown_slowdown_using_pll_m(),
			trim_sys_gpcpll_ndiv_slowdown_slowdown_using_pll_yes_f());
	gk20a_writel(g, trim_sys_gpcpll_ndiv_slowdown_r(), data);

	/* new ndiv ready for ramp */
	coeff = gk20a_readl(g, trim_sys_gpcpll_coeff_r());
	coeff = set_field(coeff, trim_sys_gpcpll_coeff_ndiv_m(),
			trim_sys_gpcpll_coeff_ndiv_f(n));
	udelay(1);
	gk20a_writel(g, trim_sys_gpcpll_coeff_r(), coeff);

	/* dynamic ramp to new ndiv */
	data = gk20a_readl(g, trim_sys_gpcpll_ndiv_slowdown_r());
	data = set_field(data,
			trim_sys_gpcpll_ndiv_slowdown_en_dynramp_m(),
			trim_sys_gpcpll_ndiv_slowdown_en_dynramp_yes_f());
	udelay(1);
	gk20a_writel(g, trim_sys_gpcpll_ndiv_slowdown_r(), data);

	do {
		udelay(1);
		ramp_timeout--;
		data = gk20a_readl(
			g, trim_gpc_bcast_gpcpll_ndiv_slowdown_debug_r());
		if (trim_gpc_bcast_gpcpll_ndiv_slowdown_debug_pll_dynramp_done_synced_v(data))
			break;
	} while (ramp_timeout > 0);

	/* exit slowdown mode */
	data = gk20a_readl(g, trim_sys_gpcpll_ndiv_slowdown_r());
	data = set_field(data,
			trim_sys_gpcpll_ndiv_slowdown_slowdown_using_pll_m(),
			trim_sys_gpcpll_ndiv_slowdown_slowdown_using_pll_no_f());
	data = set_field(data,
			trim_sys_gpcpll_ndiv_slowdown_en_dynramp_m(),
			trim_sys_gpcpll_ndiv_slowdown_en_dynramp_no_f());
	gk20a_writel(g, trim_sys_gpcpll_ndiv_slowdown_r(), data);
	gk20a_readl(g, trim_sys_gpcpll_ndiv_slowdown_r());

	if (ramp_timeout <= 0) {
		gk20a_err(dev_from_gk20a(g), "gpcpll dynamic ramp timeout");
		return -ETIMEDOUT;
	}
	return 0;
}
static int vgpu_gr_alloc_obj_ctx(struct channel_gk20a  *c,
				struct nvgpu_alloc_obj_ctx_args *args)
{
	struct gk20a *g = c->g;
	struct fifo_gk20a *f = &g->fifo;
	struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
	struct tsg_gk20a *tsg = NULL;
	int err = 0;

	gk20a_dbg_fn("");

	/* an address space needs to have been bound at this point.*/
	if (!gk20a_channel_as_bound(c)) {
		gk20a_err(dev_from_gk20a(g),
			   "not bound to address space at time"
			   " of grctx allocation");
		return -EINVAL;
	}

	if (!g->ops.gr.is_valid_class(g, args->class_num)) {
		gk20a_err(dev_from_gk20a(g),
			   "invalid obj class 0x%x", args->class_num);
		err = -EINVAL;
		goto out;
	}
	c->obj_class = args->class_num;

	/* FIXME: add TSG support */
	if (gk20a_is_channel_marked_as_tsg(c))
		tsg = &f->tsg[c->tsgid];

	/* allocate gr ctx buffer */
	if (!ch_ctx->gr_ctx) {
		err = vgpu_gr_alloc_channel_gr_ctx(g, c);
		if (err) {
			gk20a_err(dev_from_gk20a(g),
				"fail to allocate gr ctx buffer");
			goto out;
		}
	} else {
		/*TBD: needs to be more subtle about which is
		 * being allocated as some are allowed to be
		 * allocated along same channel */
		gk20a_err(dev_from_gk20a(g),
			"too many classes alloc'd on same channel");
		err = -EINVAL;
		goto out;
	}

	/* commit gr ctx buffer */
	err = vgpu_gr_commit_inst(c, ch_ctx->gr_ctx->mem.gpu_va);
	if (err) {
		gk20a_err(dev_from_gk20a(g),
			"fail to commit gr ctx buffer");
		goto out;
	}

	/* allocate patch buffer */
	if (ch_ctx->patch_ctx.mem.pages == NULL) {
		err = vgpu_gr_alloc_channel_patch_ctx(g, c);
		if (err) {
			gk20a_err(dev_from_gk20a(g),
				"fail to allocate patch buffer");
			goto out;
		}
	}

	/* map global buffer to channel gpu_va and commit */
	if (!ch_ctx->global_ctx_buffer_mapped) {
		err = vgpu_gr_map_global_ctx_buffers(g, c);
		if (err) {
			gk20a_err(dev_from_gk20a(g),
				"fail to map global ctx buffer");
			goto out;
		}
		gr_gk20a_elpg_protected_call(g,
				vgpu_gr_commit_global_ctx_buffers(g, c, true));
	}

	/* load golden image */
	if (!c->first_init) {
		err = gr_gk20a_elpg_protected_call(g,
				vgpu_gr_load_golden_ctx_image(g, c));
		if (err) {
			gk20a_err(dev_from_gk20a(g),
				"fail to load golden ctx image");
			goto out;
		}
		c->first_init = true;
	}

	c->num_objects++;

	gk20a_dbg_fn("done");
	return 0;
out:
	/* 1. gr_ctx, patch_ctx and global ctx buffer mapping
	   can be reused so no need to release them.
	   2. golden image load is a one time thing so if
	   they pass, no need to undo. */
	gk20a_err(dev_from_gk20a(g), "fail");
	return err;
}