示例#1
0
文件: a4xx_gpu.c 项目: mkrufky/linux
static int a4xx_hw_init(struct msm_gpu *gpu)
{
	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
	struct a4xx_gpu *a4xx_gpu = to_a4xx_gpu(adreno_gpu);
	uint32_t *ptr, len;
	int i, ret;

	if (adreno_is_a420(adreno_gpu)) {
		gpu_write(gpu, REG_A4XX_VBIF_ABIT_SORT, 0x0001001F);
		gpu_write(gpu, REG_A4XX_VBIF_ABIT_SORT_CONF, 0x000000A4);
		gpu_write(gpu, REG_A4XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000001);
		gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF0, 0x18181818);
		gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF1, 0x00000018);
		gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF0, 0x18181818);
		gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF1, 0x00000018);
		gpu_write(gpu, REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
	} else if (adreno_is_a430(adreno_gpu)) {
		gpu_write(gpu, REG_A4XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000001);
		gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF0, 0x18181818);
		gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF1, 0x00000018);
		gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF0, 0x18181818);
		gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF1, 0x00000018);
		gpu_write(gpu, REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
	} else {
		BUG();
	}

	/* Make all blocks contribute to the GPU BUSY perf counter */
	gpu_write(gpu, REG_A4XX_RBBM_GPU_BUSY_MASKED, 0xffffffff);

	/* Tune the hystersis counters for SP and CP idle detection */
	gpu_write(gpu, REG_A4XX_RBBM_SP_HYST_CNT, 0x10);
	gpu_write(gpu, REG_A4XX_RBBM_WAIT_IDLE_CLOCKS_CTL, 0x10);

	if (adreno_is_a430(adreno_gpu)) {
		gpu_write(gpu, REG_A4XX_RBBM_WAIT_IDLE_CLOCKS_CTL2, 0x30);
	}

	 /* Enable the RBBM error reporting bits */
	gpu_write(gpu, REG_A4XX_RBBM_AHB_CTL0, 0x00000001);

	/* Enable AHB error reporting*/
	gpu_write(gpu, REG_A4XX_RBBM_AHB_CTL1, 0xa6ffffff);

	/* Enable power counters*/
	gpu_write(gpu, REG_A4XX_RBBM_RBBM_CTL, 0x00000030);

	/*
	 * Turn on hang detection - this spews a lot of useful information
	 * into the RBBM registers on a hang:
	 */
	gpu_write(gpu, REG_A4XX_RBBM_INTERFACE_HANG_INT_CTL,
			(1 << 30) | 0xFFFF);

	gpu_write(gpu, REG_A4XX_RB_GMEM_BASE_ADDR,
			(unsigned int)(a4xx_gpu->ocmem_base >> 14));

	/* Turn on performance counters: */
	gpu_write(gpu, REG_A4XX_RBBM_PERFCTR_CTL, 0x01);

	/* use the first CP counter for timestamp queries.. userspace may set
	 * this as well but it selects the same counter/countable:
	 */
	gpu_write(gpu, REG_A4XX_CP_PERFCTR_CP_SEL_0, CP_ALWAYS_COUNT);

	if (adreno_is_a430(adreno_gpu))
		gpu_write(gpu, REG_A4XX_UCHE_CACHE_WAYS_VFD, 0x07);

	/* Disable L2 bypass to avoid UCHE out of bounds errors */
	gpu_write(gpu, REG_A4XX_UCHE_TRAP_BASE_LO, 0xffff0000);
	gpu_write(gpu, REG_A4XX_UCHE_TRAP_BASE_HI, 0xffff0000);

	gpu_write(gpu, REG_A4XX_CP_DEBUG, (1 << 25) |
			(adreno_is_a420(adreno_gpu) ? (1 << 29) : 0));

	/* On A430 enable SP regfile sleep for power savings */
	/* TODO downstream does this for !420, so maybe applies for 405 too? */
	if (!adreno_is_a420(adreno_gpu)) {
		gpu_write(gpu, REG_A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_0,
			0x00000441);
		gpu_write(gpu, REG_A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_1,
			0x00000441);
	}

	a4xx_enable_hwcg(gpu);

	/*
	 * For A420 set RBBM_CLOCK_DELAY_HLSQ.CGC_HLSQ_TP_EARLY_CYC >= 2
	 * due to timing issue with HLSQ_TP_CLK_EN
	 */
	if (adreno_is_a420(adreno_gpu)) {
		unsigned int val;
		val = gpu_read(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ);
		val &= ~A4XX_CGC_HLSQ_EARLY_CYC__MASK;
		val |= 2 << A4XX_CGC_HLSQ_EARLY_CYC__SHIFT;
		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ, val);
	}

	/* setup access protection: */
	gpu_write(gpu, REG_A4XX_CP_PROTECT_CTRL, 0x00000007);

	/* RBBM registers */
	gpu_write(gpu, REG_A4XX_CP_PROTECT(0), 0x62000010);
	gpu_write(gpu, REG_A4XX_CP_PROTECT(1), 0x63000020);
	gpu_write(gpu, REG_A4XX_CP_PROTECT(2), 0x64000040);
	gpu_write(gpu, REG_A4XX_CP_PROTECT(3), 0x65000080);
	gpu_write(gpu, REG_A4XX_CP_PROTECT(4), 0x66000100);
	gpu_write(gpu, REG_A4XX_CP_PROTECT(5), 0x64000200);

	/* CP registers */
	gpu_write(gpu, REG_A4XX_CP_PROTECT(6), 0x67000800);
	gpu_write(gpu, REG_A4XX_CP_PROTECT(7), 0x64001600);


	/* RB registers */
	gpu_write(gpu, REG_A4XX_CP_PROTECT(8), 0x60003300);

	/* HLSQ registers */
	gpu_write(gpu, REG_A4XX_CP_PROTECT(9), 0x60003800);

	/* VPC registers */
	gpu_write(gpu, REG_A4XX_CP_PROTECT(10), 0x61003980);

	/* SMMU registers */
	gpu_write(gpu, REG_A4XX_CP_PROTECT(11), 0x6e010000);

	gpu_write(gpu, REG_A4XX_RBBM_INT_0_MASK, A4XX_INT0_MASK);

	ret = adreno_hw_init(gpu);
	if (ret)
		return ret;

	/* Load PM4: */
	ptr = (uint32_t *)(adreno_gpu->pm4->data);
	len = adreno_gpu->pm4->size / 4;
	DBG("loading PM4 ucode version: %u", ptr[0]);
	gpu_write(gpu, REG_A4XX_CP_ME_RAM_WADDR, 0);
	for (i = 1; i < len; i++)
		gpu_write(gpu, REG_A4XX_CP_ME_RAM_DATA, ptr[i]);

	/* Load PFP: */
	ptr = (uint32_t *)(adreno_gpu->pfp->data);
	len = adreno_gpu->pfp->size / 4;
	DBG("loading PFP ucode version: %u", ptr[0]);

	gpu_write(gpu, REG_A4XX_CP_PFP_UCODE_ADDR, 0);
	for (i = 1; i < len; i++)
		gpu_write(gpu, REG_A4XX_CP_PFP_UCODE_DATA, ptr[i]);

	/* clear ME_HALT to start micro engine */
	gpu_write(gpu, REG_A4XX_CP_ME_CNTL, 0);

	return a4xx_me_init(gpu) ? 0 : -EINVAL;
}
示例#2
0
文件: a4xx_gpu.c 项目: mkrufky/linux
/*
 * a4xx_enable_hwcg() - Program the clock control registers
 * @device: The adreno device pointer
 */
static void a4xx_enable_hwcg(struct msm_gpu *gpu)
{
	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
	unsigned int i;
	for (i = 0; i < 4; i++)
		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_TP(i), 0x02222202);
	for (i = 0; i < 4; i++)
		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_TP(i), 0x00002222);
	for (i = 0; i < 4; i++)
		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_TP(i), 0x0E739CE7);
	for (i = 0; i < 4; i++)
		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_TP(i), 0x00111111);
	for (i = 0; i < 4; i++)
		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_SP(i), 0x22222222);
	for (i = 0; i < 4; i++)
		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_SP(i), 0x00222222);
	for (i = 0; i < 4; i++)
		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_SP(i), 0x00000104);
	for (i = 0; i < 4; i++)
		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_SP(i), 0x00000081);
	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_UCHE, 0x22222222);
	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_UCHE, 0x02222222);
	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL3_UCHE, 0x00000000);
	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL4_UCHE, 0x00000000);
	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_UCHE, 0x00004444);
	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_UCHE, 0x00001112);
	for (i = 0; i < 4; i++)
		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_RB(i), 0x22222222);

	/* Disable L1 clocking in A420 due to CCU issues with it */
	for (i = 0; i < 4; i++) {
		if (adreno_is_a420(adreno_gpu)) {
			gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_RB(i),
					0x00002020);
		} else {
			gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_RB(i),
					0x00022020);
		}
	}

	for (i = 0; i < 4; i++) {
		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_MARB_CCU(i),
				0x00000922);
	}

	for (i = 0; i < 4; i++) {
		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_RB_MARB_CCU(i),
				0x00000000);
	}

	for (i = 0; i < 4; i++) {
		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_RB_MARB_CCU_L1(i),
				0x00000001);
	}

	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_MODE_GPC, 0x02222222);
	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_GPC, 0x04100104);
	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_GPC, 0x00022222);
	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_COM_DCOM, 0x00000022);
	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_COM_DCOM, 0x0000010F);
	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_COM_DCOM, 0x00000022);
	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_TSE_RAS_RBBM, 0x00222222);
	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00004104);
	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00000222);
	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_HLSQ , 0x00000000);
	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_HLSQ, 0x00000000);
	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ, 0x00220000);
	/* Early A430's have a timing issue with SP/TP power collapse;
	   disabling HW clock gating prevents it. */
	if (adreno_is_a430(adreno_gpu) && adreno_gpu->rev.patchid < 2)
		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL, 0);
	else
		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL, 0xAAAAAAAA);
	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2, 0);
}
/**
 * _ringbuffer_bootstrap_ucode() - Bootstrap GPU Ucode
 * @rb: Pointer to adreno ringbuffer
 * @load_jt: If non zero only load Jump tables
 *
 * Bootstrap ucode for GPU
 * load_jt == 0, bootstrap full microcode
 * load_jt == 1, bootstrap jump tables of microcode
 *
 * For example a bootstrap packet would like below
 * Setup a type3 bootstrap packet
 * PFP size to bootstrap
 * PFP addr to write the PFP data
 * PM4 size to bootstrap
 * PM4 addr to write the PM4 data
 * PFP dwords from microcode to bootstrap
 * PM4 size dwords from microcode to bootstrap
 */
static int _ringbuffer_bootstrap_ucode(struct adreno_ringbuffer *rb,
					unsigned int load_jt)
{
	unsigned int *cmds, bootstrap_size, rb_size;
	int i = 0;
	int ret;
	struct kgsl_device *device = rb->device;
	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
	unsigned int pm4_size, pm4_idx, pm4_addr, pfp_size, pfp_idx, pfp_addr;

	/* Only bootstrap jump tables of ucode */
	if (load_jt) {
		pm4_idx = adreno_dev->gpucore->pm4_jt_idx;
		pm4_addr = adreno_dev->gpucore->pm4_jt_addr;
		pfp_idx = adreno_dev->gpucore->pfp_jt_idx;
		pfp_addr = adreno_dev->gpucore->pfp_jt_addr;
	} else {
		/* Bootstrap full ucode */
		pm4_idx = 1;
		pm4_addr = 0;
		pfp_idx = 1;
		pfp_addr = 0;
	}

	pm4_size = (adreno_dev->pm4_fw_size - pm4_idx);
	pfp_size = (adreno_dev->pfp_fw_size - pfp_idx);

	bootstrap_size = (pm4_size + pfp_size + 5);

	/*
	 * Overwrite the first entry in the jump table with the special
	 * bootstrap opcode
	 */

	if (adreno_is_a4xx(adreno_dev)) {
		adreno_writereg(adreno_dev, ADRENO_REG_CP_PFP_UCODE_ADDR,
			0x400);
		adreno_writereg(adreno_dev, ADRENO_REG_CP_PFP_UCODE_DATA,
			 0x6f0009);

		/*
		 * The support packets (the RMW and INTERRUPT) that are sent
		 * after the bootstrap packet should not be included in the size
		 * of the bootstrap packet but we do need to reserve enough
		 * space for those too
		 */
		rb_size = bootstrap_size + 6;
	} else {
		adreno_writereg(adreno_dev, ADRENO_REG_CP_PFP_UCODE_ADDR,
			0x200);
		adreno_writereg(adreno_dev, ADRENO_REG_CP_PFP_UCODE_DATA,
			 0x6f0005);
		rb_size = bootstrap_size;
	}

	/* clear ME_HALT to start micro engine */
	adreno_writereg(adreno_dev, ADRENO_REG_CP_ME_CNTL, 0);

	cmds = adreno_ringbuffer_allocspace(rb, rb_size);
	if (IS_ERR(cmds))
		return PTR_ERR(cmds);
	if (cmds == NULL)
		return -ENOSPC;

	/* Construct the packet that bootsraps the ucode */
	*cmds++ = cp_type3_packet(CP_BOOTSTRAP_UCODE, (bootstrap_size - 1));
	*cmds++ = pfp_size;
	*cmds++ = pfp_addr;
	*cmds++ = pm4_size;
	*cmds++ = pm4_addr;

/**
 * Theory of operation:
 *
 * In A4x, we cannot have the PFP executing instructions while its instruction
 * RAM is loading. We load the PFP's instruction RAM using type-0 writes
 * from the ME.
 *
 * To make sure the PFP is not fetching instructions at the same time,
 * we put it in a one-instruction loop:
 *    mvc (ME), (ringbuffer)
 * which executes repeatedly until all of the data has been moved from
 * the ring buffer to the ME.
 */
	if (adreno_is_a4xx(adreno_dev)) {
		for (i = pm4_idx; i < adreno_dev->pm4_fw_size; i++)
			*cmds++ = adreno_dev->pm4_fw[i];
		for (i = pfp_idx; i < adreno_dev->pfp_fw_size; i++)
			*cmds++ = adreno_dev->pfp_fw[i];

		*cmds++ = cp_type3_packet(CP_REG_RMW, 3);
		*cmds++ = 0x20000000 + A4XX_CP_RB_WPTR;
		*cmds++ = 0xffffffff;
		*cmds++ = 0x00000002;
		*cmds++ = cp_type3_packet(CP_INTERRUPT, 1);
		*cmds++ = 0;

		rb->wptr = rb->wptr - 2;
		adreno_ringbuffer_submit(rb, NULL);
		rb->wptr = rb->wptr + 2;
	} else {
		for (i = pfp_idx; i < adreno_dev->pfp_fw_size; i++)
			*cmds++ = adreno_dev->pfp_fw[i];
		for (i = pm4_idx; i < adreno_dev->pm4_fw_size; i++)
			*cmds++ = adreno_dev->pm4_fw[i];
		adreno_ringbuffer_submit(rb, NULL);
	}

	/* idle device to validate bootstrap */
	ret = adreno_spin_idle(device);

	if (ret) {
		KGSL_DRV_ERR(rb->device,
		"microcode bootstrap failed to idle\n");
		kgsl_device_snapshot(device, NULL);
	}

	/* Clear the chicken bit for speed up on A430 and its derivatives */
	if (!adreno_is_a420(adreno_dev))
		kgsl_regwrite(device, A4XX_CP_DEBUG,
					A4XX_CP_DEBUG_DEFAULT & ~(1 << 14));

	return ret;
}