/** * adreno_context_restore() - generic context restore handler * @adreno_dev: the device * @context: the context * * Basic context restore handler that writes the context identifier * to the ringbuffer and issues pagetable switch commands if necessary. */ static int adreno_context_restore(struct adreno_device *adreno_dev, struct adreno_context *context) { struct kgsl_device *device; unsigned int cmds[8]; if (adreno_dev == NULL || context == NULL) return -EINVAL; device = &adreno_dev->dev; /* write the context identifier to the ringbuffer */ cmds[0] = cp_nop_packet(1); cmds[1] = KGSL_CONTEXT_TO_MEM_IDENTIFIER; cmds[2] = cp_type3_packet(CP_MEM_WRITE, 2); cmds[3] = device->memstore.gpuaddr + KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL, current_context); cmds[4] = context->base.id; /* Flush the UCHE for new context */ cmds[5] = cp_type0_packet( adreno_getreg(adreno_dev, ADRENO_REG_UCHE_INVALIDATE0), 2); cmds[6] = 0; if (adreno_is_a4xx(adreno_dev)) cmds[7] = 0x12; else if (adreno_is_a3xx(adreno_dev)) cmds[7] = 0x90000000; return adreno_ringbuffer_issuecmds(device, context, KGSL_CMD_FLAGS_NONE, cmds, 8); }
/** * adreno_perfcounter_restore() - Restore performance counters * @adreno_dev: adreno device to configure * * Load the physical performance counters with 64 bit value which are * saved on GPU power collapse. */ inline void adreno_perfcounter_restore(struct adreno_device *adreno_dev) { struct adreno_perfcounters *counters = ADRENO_PERFCOUNTERS(adreno_dev); struct adreno_perfcount_group *group; unsigned int regid, groupid; if (counters == NULL) return; for (groupid = 0; groupid < counters->group_count; groupid++) { if (!loadable_perfcounter_group(groupid)) continue; group = &(counters->groups[groupid]); /* group/counter iterator */ for (regid = 0; regid < group->reg_count; regid++) { if (!active_countable(group->regs[regid].countable)) continue; adreno_perfcounter_write(adreno_dev, groupid, regid); } } /* Clear the load cmd registers */ adreno_writereg(adreno_dev, ADRENO_REG_RBBM_PERFCTR_LOAD_CMD0, 0); adreno_writereg(adreno_dev, ADRENO_REG_RBBM_PERFCTR_LOAD_CMD1, 0); if (adreno_is_a4xx(adreno_dev)) adreno_writereg(adreno_dev, ADRENO_REG_RBBM_PERFCTR_LOAD_CMD2, 0); }
/** * adreno_perfcounter_write() - Write the physical performance * counter values. * @adreno_dev - Adreno device whose registers are to be written to. * @group - group to which the physical counter belongs to. * @counter - register id of the physical counter to which the value is * written to. * * This function loads the 64 bit saved value into the particular physical * counter by enabling the corresponding bit in A3XX_RBBM_PERFCTR_LOAD_CMD* * register. */ static void adreno_perfcounter_write(struct adreno_device *adreno_dev, unsigned int group, unsigned int counter) { struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); struct adreno_perfcount_register *reg; unsigned int val; reg = &(gpudev->perfcounters->groups[group].regs[counter]); /* Clear the load cmd registers */ adreno_writereg(adreno_dev, ADRENO_REG_RBBM_PERFCTR_LOAD_CMD0, 0); adreno_writereg(adreno_dev, ADRENO_REG_RBBM_PERFCTR_LOAD_CMD1, 0); if (adreno_is_a4xx(adreno_dev)) adreno_writereg(adreno_dev, ADRENO_REG_RBBM_PERFCTR_LOAD_CMD2, 0); /* Write the saved value to PERFCTR_LOAD_VALUE* registers. */ adreno_writereg(adreno_dev, ADRENO_REG_RBBM_PERFCTR_LOAD_VALUE_LO, (uint32_t)reg->value); adreno_writereg(adreno_dev, ADRENO_REG_RBBM_PERFCTR_LOAD_VALUE_HI, (uint32_t)(reg->value >> 32)); /* * Set the load bit in PERFCTR_LOAD_CMD for the physical counter * we want to restore. The value in PERFCTR_LOAD_VALUE* is loaded * into the corresponding physical counter. */ if (reg->load_bit < 32) { val = 1 << reg->load_bit; adreno_writereg(adreno_dev, ADRENO_REG_RBBM_PERFCTR_LOAD_CMD0, val); } else if (reg->load_bit < 64) { val = 1 << (reg->load_bit - 32); adreno_writereg(adreno_dev, ADRENO_REG_RBBM_PERFCTR_LOAD_CMD1, val); } else if (reg->load_bit >= 64 && adreno_is_a4xx(adreno_dev)) { val = 1 << (reg->load_bit - 64); adreno_writereg(adreno_dev, ADRENO_REG_RBBM_PERFCTR_LOAD_CMD2, val); } }
/** * adreno_context_restore() - generic context restore handler * @rb: The RB in which context is to be restored * * Basic context restore handler that writes the context identifier * to the ringbuffer and issues pagetable switch commands if necessary. */ static void adreno_context_restore(struct adreno_ringbuffer *rb) { struct kgsl_device *device = rb->device; struct adreno_device *adreno_dev = ADRENO_DEVICE(device); struct adreno_context *drawctxt = rb->drawctxt_active; unsigned int cmds[11]; int ret; if (!drawctxt) return; /* * write the context identifier to the ringbuffer, write to both * the global index and the index of the RB in which the context * operates. The global values will always be reliable since we * could be in middle of RB switch in which case the RB value may * not be accurate */ cmds[0] = cp_nop_packet(1); cmds[1] = KGSL_CONTEXT_TO_MEM_IDENTIFIER; cmds[2] = cp_type3_packet(CP_MEM_WRITE, 2); cmds[3] = device->memstore.gpuaddr + KGSL_MEMSTORE_RB_OFFSET(rb, current_context); cmds[4] = drawctxt->base.id; cmds[5] = cp_type3_packet(CP_MEM_WRITE, 2); cmds[6] = device->memstore.gpuaddr + KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL, current_context); cmds[7] = drawctxt->base.id; /* Flush the UCHE for new context */ cmds[8] = cp_type0_packet( adreno_getreg(adreno_dev, ADRENO_REG_UCHE_INVALIDATE0), 2); cmds[9] = 0; if (adreno_is_a4xx(adreno_dev)) cmds[10] = 0x12; else if (adreno_is_a3xx(adreno_dev)) cmds[10] = 0x90000000; ret = adreno_ringbuffer_issuecmds(rb, KGSL_CMD_FLAGS_NONE, cmds, 11); if (ret) { /* * A failure to submit commands to ringbuffer means RB may * be full, in this case wait for idle and use CPU */ ret = adreno_idle(device); BUG_ON(ret); _adreno_context_restore_cpu(rb, drawctxt); } }
void adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, struct msm_file_private *ctx) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); struct msm_drm_private *priv = gpu->dev->dev_private; struct msm_ringbuffer *ring = gpu->rb; unsigned i; for (i = 0; i < submit->nr_cmds; i++) { switch (submit->cmd[i].type) { case MSM_SUBMIT_CMD_IB_TARGET_BUF: /* ignore IB-targets */ break; case MSM_SUBMIT_CMD_CTX_RESTORE_BUF: /* ignore if there has not been a ctx switch: */ if (priv->lastctx == ctx) break; case MSM_SUBMIT_CMD_BUF: OUT_PKT3(ring, adreno_is_a430(adreno_gpu) ? CP_INDIRECT_BUFFER_PFE : CP_INDIRECT_BUFFER_PFD, 2); OUT_RING(ring, submit->cmd[i].iova); OUT_RING(ring, submit->cmd[i].size); OUT_PKT2(ring); break; } } OUT_PKT0(ring, REG_AXXX_CP_SCRATCH_REG2, 1); OUT_RING(ring, submit->fence->seqno); if (adreno_is_a3xx(adreno_gpu) || adreno_is_a4xx(adreno_gpu)) { /* Flush HLSQ lazy updates to make sure there is nothing * pending for indirect loads after the timestamp has * passed: */ OUT_PKT3(ring, CP_EVENT_WRITE, 1); OUT_RING(ring, HLSQ_FLUSH); OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1); OUT_RING(ring, 0x00000000); } OUT_PKT3(ring, CP_EVENT_WRITE, 3); OUT_RING(ring, CACHE_FLUSH_TS); OUT_RING(ring, rbmemptr(adreno_gpu, fence)); OUT_RING(ring, submit->fence->seqno); /* we could maybe be clever and only CP_COND_EXEC the interrupt: */ OUT_PKT3(ring, CP_INTERRUPT, 1); OUT_RING(ring, 0x80000000); /* Workaround for missing irq issue on 8x16/a306. Unsure if the * root cause is a platform issue or some a306 quirk, but this * keeps things humming along: */ if (adreno_is_a306(adreno_gpu)) { OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1); OUT_RING(ring, 0x00000000); OUT_PKT3(ring, CP_INTERRUPT, 1); OUT_RING(ring, 0x80000000); } #if 0 if (adreno_is_a3xx(adreno_gpu)) { /* Dummy set-constant to trigger context rollover */ OUT_PKT3(ring, CP_SET_CONSTANT, 2); OUT_RING(ring, CP_REG(REG_A3XX_HLSQ_CL_KERNEL_GROUP_X_REG)); OUT_RING(ring, 0x00000000); } #endif gpu->funcs->flush(gpu); }
/** * _ringbuffer_bootstrap_ucode() - Bootstrap GPU Ucode * @rb: Pointer to adreno ringbuffer * @load_jt: If non zero only load Jump tables * * Bootstrap ucode for GPU * load_jt == 0, bootstrap full microcode * load_jt == 1, bootstrap jump tables of microcode * * For example a bootstrap packet would like below * Setup a type3 bootstrap packet * PFP size to bootstrap * PFP addr to write the PFP data * PM4 size to bootstrap * PM4 addr to write the PM4 data * PFP dwords from microcode to bootstrap * PM4 size dwords from microcode to bootstrap */ static int _ringbuffer_bootstrap_ucode(struct adreno_ringbuffer *rb, unsigned int load_jt) { unsigned int *cmds, bootstrap_size; int i = 0; struct kgsl_device *device = rb->device; struct adreno_device *adreno_dev = ADRENO_DEVICE(device); unsigned int pm4_size, pm4_idx, pm4_addr, pfp_size, pfp_idx, pfp_addr; /* Only bootstrap jump tables of ucode */ if (load_jt) { pm4_idx = adreno_dev->gpucore->pm4_jt_idx; pm4_addr = adreno_dev->gpucore->pm4_jt_addr; pfp_idx = adreno_dev->gpucore->pfp_jt_idx; pfp_addr = adreno_dev->gpucore->pfp_jt_addr; } else { /* Bootstrap full ucode */ pm4_idx = 1; pm4_addr = 0; pfp_idx = 1; pfp_addr = 0; } pm4_size = (adreno_dev->pm4_fw_size - pm4_idx); pfp_size = (adreno_dev->pfp_fw_size - pfp_idx); /* * Overwrite the first entry in the jump table with the special * bootstrap opcode */ if (adreno_is_a4xx(adreno_dev)) { adreno_writereg(adreno_dev, ADRENO_REG_CP_PFP_UCODE_ADDR, 0x400); adreno_writereg(adreno_dev, ADRENO_REG_CP_PFP_UCODE_DATA, 0x6f0009); bootstrap_size = (pm4_size + pfp_size + 5 + 6); } else { adreno_writereg(adreno_dev, ADRENO_REG_CP_PFP_UCODE_ADDR, 0x200); adreno_writereg(adreno_dev, ADRENO_REG_CP_PFP_UCODE_DATA, 0x6f0005); bootstrap_size = (pm4_size + pfp_size + 5); } /* clear ME_HALT to start micro engine */ adreno_writereg(adreno_dev, ADRENO_REG_CP_ME_CNTL, 0); cmds = adreno_ringbuffer_allocspace(rb, bootstrap_size); if (IS_ERR(cmds)) return PTR_ERR(cmds); if (cmds == NULL) return -ENOSPC; /* Construct the packet that bootsraps the ucode */ *cmds++ = cp_type3_packet(CP_BOOTSTRAP_UCODE, (bootstrap_size - 1)); *cmds++ = pfp_size; *cmds++ = pfp_addr; *cmds++ = pm4_size; *cmds++ = pm4_addr; /** * Theory of operation: * * In A4x, we cannot have the PFP executing instructions while its instruction * RAM is loading. We load the PFP's instruction RAM using type-0 writes * from the ME. * * To make sure the PFP is not fetching instructions at the same time, * we put it in a one-instruction loop: * mvc (ME), (ringbuffer) * which executes repeatedly until all of the data has been moved from * the ring buffer to the ME. */ if (adreno_is_a4xx(adreno_dev)) { for (i = pm4_idx; i < adreno_dev->pm4_fw_size; i++) *cmds++ = adreno_dev->pm4_fw[i]; for (i = pfp_idx; i < adreno_dev->pfp_fw_size; i++) *cmds++ = adreno_dev->pfp_fw[i]; *cmds++ = cp_type3_packet(CP_REG_RMW, 3); *cmds++ = 0x20000000 + A4XX_CP_RB_WPTR; *cmds++ = 0xffffffff; *cmds++ = 0x00000002; *cmds++ = cp_type3_packet(CP_INTERRUPT, 1); *cmds++ = 0; rb->wptr = rb->wptr - 2; adreno_ringbuffer_submit(rb, NULL); rb->wptr = rb->wptr + 2; } else { for (i = pfp_idx; i < adreno_dev->pfp_fw_size; i++) *cmds++ = adreno_dev->pfp_fw[i]; for (i = pm4_idx; i < adreno_dev->pm4_fw_size; i++) *cmds++ = adreno_dev->pm4_fw[i]; adreno_ringbuffer_submit(rb, NULL); } /* idle device to validate bootstrap */ return adreno_spin_idle(device); }
/** * _ringbuffer_bootstrap_ucode() - Bootstrap GPU Ucode * @rb: Pointer to adreno ringbuffer * @load_jt: If non zero only load Jump tables * * Bootstrap ucode for GPU * load_jt == 0, bootstrap full microcode * load_jt == 1, bootstrap jump tables of microcode * * For example a bootstrap packet would like below * Setup a type3 bootstrap packet * PFP size to bootstrap * PFP addr to write the PFP data * PM4 size to bootstrap * PM4 addr to write the PM4 data * PFP dwords from microcode to bootstrap * PM4 size dwords from microcode to bootstrap */ static int _ringbuffer_bootstrap_ucode(struct adreno_ringbuffer *rb, unsigned int load_jt) { unsigned int *cmds, bootstrap_size, rb_size; int i = 0; int ret; struct kgsl_device *device = rb->device; struct adreno_device *adreno_dev = ADRENO_DEVICE(device); unsigned int pm4_size, pm4_idx, pm4_addr, pfp_size, pfp_idx, pfp_addr; /* Only bootstrap jump tables of ucode */ if (load_jt) { pm4_idx = adreno_dev->gpucore->pm4_jt_idx; pm4_addr = adreno_dev->gpucore->pm4_jt_addr; pfp_idx = adreno_dev->gpucore->pfp_jt_idx; pfp_addr = adreno_dev->gpucore->pfp_jt_addr; } else { /* Bootstrap full ucode */ pm4_idx = 1; pm4_addr = 0; pfp_idx = 1; pfp_addr = 0; } pm4_size = (adreno_dev->pm4_fw_size - pm4_idx); pfp_size = (adreno_dev->pfp_fw_size - pfp_idx); bootstrap_size = (pm4_size + pfp_size + 5); /* * Overwrite the first entry in the jump table with the special * bootstrap opcode */ if (adreno_is_a4xx(adreno_dev)) { adreno_writereg(adreno_dev, ADRENO_REG_CP_PFP_UCODE_ADDR, 0x400); adreno_writereg(adreno_dev, ADRENO_REG_CP_PFP_UCODE_DATA, 0x6f0009); /* * The support packets (the RMW and INTERRUPT) that are sent * after the bootstrap packet should not be included in the size * of the bootstrap packet but we do need to reserve enough * space for those too */ rb_size = bootstrap_size + 6; } else { adreno_writereg(adreno_dev, ADRENO_REG_CP_PFP_UCODE_ADDR, 0x200); adreno_writereg(adreno_dev, ADRENO_REG_CP_PFP_UCODE_DATA, 0x6f0005); rb_size = bootstrap_size; } /* clear ME_HALT to start micro engine */ adreno_writereg(adreno_dev, ADRENO_REG_CP_ME_CNTL, 0); cmds = adreno_ringbuffer_allocspace(rb, rb_size); if (IS_ERR(cmds)) return PTR_ERR(cmds); if (cmds == NULL) return -ENOSPC; /* Construct the packet that bootsraps the ucode */ *cmds++ = cp_type3_packet(CP_BOOTSTRAP_UCODE, (bootstrap_size - 1)); *cmds++ = pfp_size; *cmds++ = pfp_addr; *cmds++ = pm4_size; *cmds++ = pm4_addr; /** * Theory of operation: * * In A4x, we cannot have the PFP executing instructions while its instruction * RAM is loading. We load the PFP's instruction RAM using type-0 writes * from the ME. * * To make sure the PFP is not fetching instructions at the same time, * we put it in a one-instruction loop: * mvc (ME), (ringbuffer) * which executes repeatedly until all of the data has been moved from * the ring buffer to the ME. */ if (adreno_is_a4xx(adreno_dev)) { for (i = pm4_idx; i < adreno_dev->pm4_fw_size; i++) *cmds++ = adreno_dev->pm4_fw[i]; for (i = pfp_idx; i < adreno_dev->pfp_fw_size; i++) *cmds++ = adreno_dev->pfp_fw[i]; *cmds++ = cp_type3_packet(CP_REG_RMW, 3); *cmds++ = 0x20000000 + A4XX_CP_RB_WPTR; *cmds++ = 0xffffffff; *cmds++ = 0x00000002; *cmds++ = cp_type3_packet(CP_INTERRUPT, 1); *cmds++ = 0; rb->wptr = rb->wptr - 2; adreno_ringbuffer_submit(rb, NULL); rb->wptr = rb->wptr + 2; } else { for (i = pfp_idx; i < adreno_dev->pfp_fw_size; i++) *cmds++ = adreno_dev->pfp_fw[i]; for (i = pm4_idx; i < adreno_dev->pm4_fw_size; i++) *cmds++ = adreno_dev->pm4_fw[i]; adreno_ringbuffer_submit(rb, NULL); } /* idle device to validate bootstrap */ ret = adreno_spin_idle(device); if (ret) { KGSL_DRV_ERR(rb->device, "microcode bootstrap failed to idle\n"); kgsl_device_snapshot(device, NULL); } /* Clear the chicken bit for speed up on A430 and its derivatives */ if (!adreno_is_a420(adreno_dev)) kgsl_regwrite(device, A4XX_CP_DEBUG, A4XX_CP_DEBUG_DEFAULT & ~(1 << 14)); return ret; }