static int a4xx_hw_init(struct msm_gpu *gpu) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); struct a4xx_gpu *a4xx_gpu = to_a4xx_gpu(adreno_gpu); uint32_t *ptr, len; int i, ret; if (adreno_is_a420(adreno_gpu)) { gpu_write(gpu, REG_A4XX_VBIF_ABIT_SORT, 0x0001001F); gpu_write(gpu, REG_A4XX_VBIF_ABIT_SORT_CONF, 0x000000A4); gpu_write(gpu, REG_A4XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000001); gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF0, 0x18181818); gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF1, 0x00000018); gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF0, 0x18181818); gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF1, 0x00000018); gpu_write(gpu, REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003); } else if (adreno_is_a430(adreno_gpu)) { gpu_write(gpu, REG_A4XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000001); gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF0, 0x18181818); gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF1, 0x00000018); gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF0, 0x18181818); gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF1, 0x00000018); gpu_write(gpu, REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003); } else { BUG(); } /* Make all blocks contribute to the GPU BUSY perf counter */ gpu_write(gpu, REG_A4XX_RBBM_GPU_BUSY_MASKED, 0xffffffff); /* Tune the hystersis counters for SP and CP idle detection */ gpu_write(gpu, REG_A4XX_RBBM_SP_HYST_CNT, 0x10); gpu_write(gpu, REG_A4XX_RBBM_WAIT_IDLE_CLOCKS_CTL, 0x10); if (adreno_is_a430(adreno_gpu)) { gpu_write(gpu, REG_A4XX_RBBM_WAIT_IDLE_CLOCKS_CTL2, 0x30); } /* Enable the RBBM error reporting bits */ gpu_write(gpu, REG_A4XX_RBBM_AHB_CTL0, 0x00000001); /* Enable AHB error reporting*/ gpu_write(gpu, REG_A4XX_RBBM_AHB_CTL1, 0xa6ffffff); /* Enable power counters*/ gpu_write(gpu, REG_A4XX_RBBM_RBBM_CTL, 0x00000030); /* * Turn on hang detection - this spews a lot of useful information * into the RBBM registers on a hang: */ gpu_write(gpu, REG_A4XX_RBBM_INTERFACE_HANG_INT_CTL, (1 << 30) | 0xFFFF); gpu_write(gpu, REG_A4XX_RB_GMEM_BASE_ADDR, (unsigned int)(a4xx_gpu->ocmem_base >> 14)); /* Turn on performance counters: */ gpu_write(gpu, REG_A4XX_RBBM_PERFCTR_CTL, 0x01); /* use the first CP counter for timestamp queries.. userspace may set * this as well but it selects the same counter/countable: */ gpu_write(gpu, REG_A4XX_CP_PERFCTR_CP_SEL_0, CP_ALWAYS_COUNT); if (adreno_is_a430(adreno_gpu)) gpu_write(gpu, REG_A4XX_UCHE_CACHE_WAYS_VFD, 0x07); /* Disable L2 bypass to avoid UCHE out of bounds errors */ gpu_write(gpu, REG_A4XX_UCHE_TRAP_BASE_LO, 0xffff0000); gpu_write(gpu, REG_A4XX_UCHE_TRAP_BASE_HI, 0xffff0000); gpu_write(gpu, REG_A4XX_CP_DEBUG, (1 << 25) | (adreno_is_a420(adreno_gpu) ? (1 << 29) : 0)); /* On A430 enable SP regfile sleep for power savings */ /* TODO downstream does this for !420, so maybe applies for 405 too? */ if (!adreno_is_a420(adreno_gpu)) { gpu_write(gpu, REG_A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_0, 0x00000441); gpu_write(gpu, REG_A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_1, 0x00000441); } a4xx_enable_hwcg(gpu); /* * For A420 set RBBM_CLOCK_DELAY_HLSQ.CGC_HLSQ_TP_EARLY_CYC >= 2 * due to timing issue with HLSQ_TP_CLK_EN */ if (adreno_is_a420(adreno_gpu)) { unsigned int val; val = gpu_read(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ); val &= ~A4XX_CGC_HLSQ_EARLY_CYC__MASK; val |= 2 << A4XX_CGC_HLSQ_EARLY_CYC__SHIFT; gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ, val); } /* setup access protection: */ gpu_write(gpu, REG_A4XX_CP_PROTECT_CTRL, 0x00000007); /* RBBM registers */ gpu_write(gpu, REG_A4XX_CP_PROTECT(0), 0x62000010); gpu_write(gpu, REG_A4XX_CP_PROTECT(1), 0x63000020); gpu_write(gpu, REG_A4XX_CP_PROTECT(2), 0x64000040); gpu_write(gpu, REG_A4XX_CP_PROTECT(3), 0x65000080); gpu_write(gpu, REG_A4XX_CP_PROTECT(4), 0x66000100); gpu_write(gpu, REG_A4XX_CP_PROTECT(5), 0x64000200); /* CP registers */ gpu_write(gpu, REG_A4XX_CP_PROTECT(6), 0x67000800); gpu_write(gpu, REG_A4XX_CP_PROTECT(7), 0x64001600); /* RB registers */ gpu_write(gpu, REG_A4XX_CP_PROTECT(8), 0x60003300); /* HLSQ registers */ gpu_write(gpu, REG_A4XX_CP_PROTECT(9), 0x60003800); /* VPC registers */ gpu_write(gpu, REG_A4XX_CP_PROTECT(10), 0x61003980); /* SMMU registers */ gpu_write(gpu, REG_A4XX_CP_PROTECT(11), 0x6e010000); gpu_write(gpu, REG_A4XX_RBBM_INT_0_MASK, A4XX_INT0_MASK); ret = adreno_hw_init(gpu); if (ret) return ret; /* Load PM4: */ ptr = (uint32_t *)(adreno_gpu->pm4->data); len = adreno_gpu->pm4->size / 4; DBG("loading PM4 ucode version: %u", ptr[0]); gpu_write(gpu, REG_A4XX_CP_ME_RAM_WADDR, 0); for (i = 1; i < len; i++) gpu_write(gpu, REG_A4XX_CP_ME_RAM_DATA, ptr[i]); /* Load PFP: */ ptr = (uint32_t *)(adreno_gpu->pfp->data); len = adreno_gpu->pfp->size / 4; DBG("loading PFP ucode version: %u", ptr[0]); gpu_write(gpu, REG_A4XX_CP_PFP_UCODE_ADDR, 0); for (i = 1; i < len; i++) gpu_write(gpu, REG_A4XX_CP_PFP_UCODE_DATA, ptr[i]); /* clear ME_HALT to start micro engine */ gpu_write(gpu, REG_A4XX_CP_ME_CNTL, 0); return a4xx_me_init(gpu) ? 0 : -EINVAL; }
/* * a4xx_enable_hwcg() - Program the clock control registers * @device: The adreno device pointer */ static void a4xx_enable_hwcg(struct msm_gpu *gpu) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); unsigned int i; for (i = 0; i < 4; i++) gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_TP(i), 0x02222202); for (i = 0; i < 4; i++) gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_TP(i), 0x00002222); for (i = 0; i < 4; i++) gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_TP(i), 0x0E739CE7); for (i = 0; i < 4; i++) gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_TP(i), 0x00111111); for (i = 0; i < 4; i++) gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_SP(i), 0x22222222); for (i = 0; i < 4; i++) gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_SP(i), 0x00222222); for (i = 0; i < 4; i++) gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_SP(i), 0x00000104); for (i = 0; i < 4; i++) gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_SP(i), 0x00000081); gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_UCHE, 0x22222222); gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_UCHE, 0x02222222); gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL3_UCHE, 0x00000000); gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL4_UCHE, 0x00000000); gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_UCHE, 0x00004444); gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_UCHE, 0x00001112); for (i = 0; i < 4; i++) gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_RB(i), 0x22222222); /* Disable L1 clocking in A420 due to CCU issues with it */ for (i = 0; i < 4; i++) { if (adreno_is_a420(adreno_gpu)) { gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_RB(i), 0x00002020); } else { gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_RB(i), 0x00022020); } } for (i = 0; i < 4; i++) { gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_MARB_CCU(i), 0x00000922); } for (i = 0; i < 4; i++) { gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_RB_MARB_CCU(i), 0x00000000); } for (i = 0; i < 4; i++) { gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_RB_MARB_CCU_L1(i), 0x00000001); } gpu_write(gpu, REG_A4XX_RBBM_CLOCK_MODE_GPC, 0x02222222); gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_GPC, 0x04100104); gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_GPC, 0x00022222); gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_COM_DCOM, 0x00000022); gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_COM_DCOM, 0x0000010F); gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_COM_DCOM, 0x00000022); gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_TSE_RAS_RBBM, 0x00222222); gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00004104); gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00000222); gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_HLSQ , 0x00000000); gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_HLSQ, 0x00000000); gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ, 0x00220000); /* Early A430's have a timing issue with SP/TP power collapse; disabling HW clock gating prevents it. */ if (adreno_is_a430(adreno_gpu) && adreno_gpu->rev.patchid < 2) gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL, 0); else gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL, 0xAAAAAAAA); gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2, 0); }
/** * _ringbuffer_bootstrap_ucode() - Bootstrap GPU Ucode * @rb: Pointer to adreno ringbuffer * @load_jt: If non zero only load Jump tables * * Bootstrap ucode for GPU * load_jt == 0, bootstrap full microcode * load_jt == 1, bootstrap jump tables of microcode * * For example a bootstrap packet would like below * Setup a type3 bootstrap packet * PFP size to bootstrap * PFP addr to write the PFP data * PM4 size to bootstrap * PM4 addr to write the PM4 data * PFP dwords from microcode to bootstrap * PM4 size dwords from microcode to bootstrap */ static int _ringbuffer_bootstrap_ucode(struct adreno_ringbuffer *rb, unsigned int load_jt) { unsigned int *cmds, bootstrap_size, rb_size; int i = 0; int ret; struct kgsl_device *device = rb->device; struct adreno_device *adreno_dev = ADRENO_DEVICE(device); unsigned int pm4_size, pm4_idx, pm4_addr, pfp_size, pfp_idx, pfp_addr; /* Only bootstrap jump tables of ucode */ if (load_jt) { pm4_idx = adreno_dev->gpucore->pm4_jt_idx; pm4_addr = adreno_dev->gpucore->pm4_jt_addr; pfp_idx = adreno_dev->gpucore->pfp_jt_idx; pfp_addr = adreno_dev->gpucore->pfp_jt_addr; } else { /* Bootstrap full ucode */ pm4_idx = 1; pm4_addr = 0; pfp_idx = 1; pfp_addr = 0; } pm4_size = (adreno_dev->pm4_fw_size - pm4_idx); pfp_size = (adreno_dev->pfp_fw_size - pfp_idx); bootstrap_size = (pm4_size + pfp_size + 5); /* * Overwrite the first entry in the jump table with the special * bootstrap opcode */ if (adreno_is_a4xx(adreno_dev)) { adreno_writereg(adreno_dev, ADRENO_REG_CP_PFP_UCODE_ADDR, 0x400); adreno_writereg(adreno_dev, ADRENO_REG_CP_PFP_UCODE_DATA, 0x6f0009); /* * The support packets (the RMW and INTERRUPT) that are sent * after the bootstrap packet should not be included in the size * of the bootstrap packet but we do need to reserve enough * space for those too */ rb_size = bootstrap_size + 6; } else { adreno_writereg(adreno_dev, ADRENO_REG_CP_PFP_UCODE_ADDR, 0x200); adreno_writereg(adreno_dev, ADRENO_REG_CP_PFP_UCODE_DATA, 0x6f0005); rb_size = bootstrap_size; } /* clear ME_HALT to start micro engine */ adreno_writereg(adreno_dev, ADRENO_REG_CP_ME_CNTL, 0); cmds = adreno_ringbuffer_allocspace(rb, rb_size); if (IS_ERR(cmds)) return PTR_ERR(cmds); if (cmds == NULL) return -ENOSPC; /* Construct the packet that bootsraps the ucode */ *cmds++ = cp_type3_packet(CP_BOOTSTRAP_UCODE, (bootstrap_size - 1)); *cmds++ = pfp_size; *cmds++ = pfp_addr; *cmds++ = pm4_size; *cmds++ = pm4_addr; /** * Theory of operation: * * In A4x, we cannot have the PFP executing instructions while its instruction * RAM is loading. We load the PFP's instruction RAM using type-0 writes * from the ME. * * To make sure the PFP is not fetching instructions at the same time, * we put it in a one-instruction loop: * mvc (ME), (ringbuffer) * which executes repeatedly until all of the data has been moved from * the ring buffer to the ME. */ if (adreno_is_a4xx(adreno_dev)) { for (i = pm4_idx; i < adreno_dev->pm4_fw_size; i++) *cmds++ = adreno_dev->pm4_fw[i]; for (i = pfp_idx; i < adreno_dev->pfp_fw_size; i++) *cmds++ = adreno_dev->pfp_fw[i]; *cmds++ = cp_type3_packet(CP_REG_RMW, 3); *cmds++ = 0x20000000 + A4XX_CP_RB_WPTR; *cmds++ = 0xffffffff; *cmds++ = 0x00000002; *cmds++ = cp_type3_packet(CP_INTERRUPT, 1); *cmds++ = 0; rb->wptr = rb->wptr - 2; adreno_ringbuffer_submit(rb, NULL); rb->wptr = rb->wptr + 2; } else { for (i = pfp_idx; i < adreno_dev->pfp_fw_size; i++) *cmds++ = adreno_dev->pfp_fw[i]; for (i = pm4_idx; i < adreno_dev->pm4_fw_size; i++) *cmds++ = adreno_dev->pm4_fw[i]; adreno_ringbuffer_submit(rb, NULL); } /* idle device to validate bootstrap */ ret = adreno_spin_idle(device); if (ret) { KGSL_DRV_ERR(rb->device, "microcode bootstrap failed to idle\n"); kgsl_device_snapshot(device, NULL); } /* Clear the chicken bit for speed up on A430 and its derivatives */ if (!adreno_is_a420(adreno_dev)) kgsl_regwrite(device, A4XX_CP_DEBUG, A4XX_CP_DEBUG_DEFAULT & ~(1 << 14)); return ret; }