/** * adreno_perfcounter_restore() - Restore performance counters * @adreno_dev: adreno device to configure * * Load the physical performance counters with 64 bit value which are * saved on GPU power collapse. */ inline void adreno_perfcounter_restore(struct adreno_device *adreno_dev) { struct adreno_perfcounters *counters = ADRENO_PERFCOUNTERS(adreno_dev); struct adreno_perfcount_group *group; unsigned int regid, groupid; if (counters == NULL) return; for (groupid = 0; groupid < counters->group_count; groupid++) { if (!loadable_perfcounter_group(groupid)) continue; group = &(counters->groups[groupid]); /* group/counter iterator */ for (regid = 0; regid < group->reg_count; regid++) { if (!active_countable(group->regs[regid].countable)) continue; adreno_perfcounter_write(adreno_dev, groupid, regid); } } /* Clear the load cmd registers */ adreno_writereg(adreno_dev, ADRENO_REG_RBBM_PERFCTR_LOAD_CMD0, 0); adreno_writereg(adreno_dev, ADRENO_REG_RBBM_PERFCTR_LOAD_CMD1, 0); if (adreno_is_a4xx(adreno_dev)) adreno_writereg(adreno_dev, ADRENO_REG_RBBM_PERFCTR_LOAD_CMD2, 0); }
/** * adreno_ringbuffer_load_pfp_ucode() - Load pfp ucode * @device: Pointer to a KGSL device * @start: Starting index in pfp ucode to load * @end: Ending index of pfp ucode to load * @addr: Address to load the pfp ucode * * Load the pfp ucode from @start at @addr. */ static inline int adreno_ringbuffer_load_pfp_ucode(struct kgsl_device *device, unsigned int start, unsigned int end, unsigned int addr) { struct adreno_device *adreno_dev = ADRENO_DEVICE(device); int i; adreno_writereg(adreno_dev, ADRENO_REG_CP_PFP_UCODE_ADDR, addr); for (i = start; i < end; i++) adreno_writereg(adreno_dev, ADRENO_REG_CP_PFP_UCODE_DATA, adreno_dev->pfp_fw[i]); return 0; }
void adreno_ringbuffer_submit(struct adreno_ringbuffer *rb, struct adreno_submit_time *time) { struct adreno_device *adreno_dev = ADRENO_DEVICE(rb->device); struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); BUG_ON(rb->wptr == 0); /* Let the pwrscale policy know that new commands have been submitted. */ kgsl_pwrscale_busy(rb->device); /* Write the changes to CFF if so enabled */ _cff_write_ringbuffer(rb); /* * Read the current GPU ticks and wallclock for most accurate * profiling */ if (time != NULL) { /* * Here we are attempting to create a mapping between the * GPU time domain (alwayson counter) and the CPU time domain * (local_clock) by sampling both values as close together as * possible. This is useful for many types of debugging and * profiling. In order to make this mapping as accurate as * possible, we must turn off interrupts to avoid running * interrupt handlers between the two samples. */ unsigned long flags; local_irq_save(flags); if (gpudev->alwayson_counter_read != NULL) time->ticks = gpudev->alwayson_counter_read(adreno_dev); else time->ticks = 0; /* Get the kernel clock for time since boot */ time->ktime = local_clock(); /* Get the timeofday for the wall time (for the user) */ getnstimeofday(&time->utime); local_irq_restore(flags); } /* Memory barrier before informing the hardware of new commands */ mb(); adreno_writereg(adreno_dev, ADRENO_REG_CP_RB_WPTR, rb->wptr); }
void adreno_ringbuffer_submit(struct adreno_ringbuffer *rb, struct adreno_submit_time *time) { struct adreno_device *adreno_dev = ADRENO_DEVICE(rb->device); BUG_ON(rb->wptr == 0); _cff_write_ringbuffer(rb); if (time != NULL) { unsigned long flags; local_irq_save(flags); if (!adreno_is_a3xx(adreno_dev)) { adreno_readreg64(adreno_dev, ADRENO_REG_RBBM_ALWAYSON_COUNTER_LO, ADRENO_REG_RBBM_ALWAYSON_COUNTER_HI, &time->ticks); if (ADRENO_GPUREV(adreno_dev) >= 400 && ADRENO_GPUREV(adreno_dev) <= ADRENO_REV_A530) time->ticks &= 0xFFFFFFFF; } else time->ticks = 0; time->ktime = local_clock(); getnstimeofday(&time->utime); local_irq_restore(flags); } mb(); if (adreno_preempt_state(adreno_dev, ADRENO_DISPATCHER_PREEMPT_CLEAR) && (adreno_dev->cur_rb == rb)) { kgsl_pwrscale_busy(rb->device); adreno_writereg(adreno_dev, ADRENO_REG_CP_RB_WPTR, rb->wptr); } }
/** * adreno_perfcounter_write() - Write the physical performance * counter values. * @adreno_dev - Adreno device whose registers are to be written to. * @group - group to which the physical counter belongs to. * @counter - register id of the physical counter to which the value is * written to. * * This function loads the 64 bit saved value into the particular physical * counter by enabling the corresponding bit in A3XX_RBBM_PERFCTR_LOAD_CMD* * register. */ static void adreno_perfcounter_write(struct adreno_device *adreno_dev, unsigned int group, unsigned int counter) { struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); struct adreno_perfcount_register *reg; unsigned int val; reg = &(gpudev->perfcounters->groups[group].regs[counter]); /* Clear the load cmd registers */ adreno_writereg(adreno_dev, ADRENO_REG_RBBM_PERFCTR_LOAD_CMD0, 0); adreno_writereg(adreno_dev, ADRENO_REG_RBBM_PERFCTR_LOAD_CMD1, 0); if (adreno_is_a4xx(adreno_dev)) adreno_writereg(adreno_dev, ADRENO_REG_RBBM_PERFCTR_LOAD_CMD2, 0); /* Write the saved value to PERFCTR_LOAD_VALUE* registers. */ adreno_writereg(adreno_dev, ADRENO_REG_RBBM_PERFCTR_LOAD_VALUE_LO, (uint32_t)reg->value); adreno_writereg(adreno_dev, ADRENO_REG_RBBM_PERFCTR_LOAD_VALUE_HI, (uint32_t)(reg->value >> 32)); /* * Set the load bit in PERFCTR_LOAD_CMD for the physical counter * we want to restore. The value in PERFCTR_LOAD_VALUE* is loaded * into the corresponding physical counter. */ if (reg->load_bit < 32) { val = 1 << reg->load_bit; adreno_writereg(adreno_dev, ADRENO_REG_RBBM_PERFCTR_LOAD_CMD0, val); } else if (reg->load_bit < 64) { val = 1 << (reg->load_bit - 32); adreno_writereg(adreno_dev, ADRENO_REG_RBBM_PERFCTR_LOAD_CMD1, val); } else if (reg->load_bit >= 64 && adreno_is_a4xx(adreno_dev)) { val = 1 << (reg->load_bit - 64); adreno_writereg(adreno_dev, ADRENO_REG_RBBM_PERFCTR_LOAD_CMD2, val); } }
/** * _ringbuffer_bootstrap_ucode() - Bootstrap GPU Ucode * @rb: Pointer to adreno ringbuffer * @load_jt: If non zero only load Jump tables * * Bootstrap ucode for GPU * load_jt == 0, bootstrap full microcode * load_jt == 1, bootstrap jump tables of microcode * * For example a bootstrap packet would like below * Setup a type3 bootstrap packet * PFP size to bootstrap * PFP addr to write the PFP data * PM4 size to bootstrap * PM4 addr to write the PM4 data * PFP dwords from microcode to bootstrap * PM4 size dwords from microcode to bootstrap */ static int _ringbuffer_bootstrap_ucode(struct adreno_ringbuffer *rb, unsigned int load_jt) { unsigned int *cmds, bootstrap_size; int i = 0; struct kgsl_device *device = rb->device; struct adreno_device *adreno_dev = ADRENO_DEVICE(device); unsigned int pm4_size, pm4_idx, pm4_addr, pfp_size, pfp_idx, pfp_addr; /* Only bootstrap jump tables of ucode */ if (load_jt) { pm4_idx = adreno_dev->gpucore->pm4_jt_idx; pm4_addr = adreno_dev->gpucore->pm4_jt_addr; pfp_idx = adreno_dev->gpucore->pfp_jt_idx; pfp_addr = adreno_dev->gpucore->pfp_jt_addr; } else { /* Bootstrap full ucode */ pm4_idx = 1; pm4_addr = 0; pfp_idx = 1; pfp_addr = 0; } pm4_size = (adreno_dev->pm4_fw_size - pm4_idx); pfp_size = (adreno_dev->pfp_fw_size - pfp_idx); /* * Overwrite the first entry in the jump table with the special * bootstrap opcode */ if (adreno_is_a4xx(adreno_dev)) { adreno_writereg(adreno_dev, ADRENO_REG_CP_PFP_UCODE_ADDR, 0x400); adreno_writereg(adreno_dev, ADRENO_REG_CP_PFP_UCODE_DATA, 0x6f0009); bootstrap_size = (pm4_size + pfp_size + 5 + 6); } else { adreno_writereg(adreno_dev, ADRENO_REG_CP_PFP_UCODE_ADDR, 0x200); adreno_writereg(adreno_dev, ADRENO_REG_CP_PFP_UCODE_DATA, 0x6f0005); bootstrap_size = (pm4_size + pfp_size + 5); } /* clear ME_HALT to start micro engine */ adreno_writereg(adreno_dev, ADRENO_REG_CP_ME_CNTL, 0); cmds = adreno_ringbuffer_allocspace(rb, bootstrap_size); if (IS_ERR(cmds)) return PTR_ERR(cmds); if (cmds == NULL) return -ENOSPC; /* Construct the packet that bootsraps the ucode */ *cmds++ = cp_type3_packet(CP_BOOTSTRAP_UCODE, (bootstrap_size - 1)); *cmds++ = pfp_size; *cmds++ = pfp_addr; *cmds++ = pm4_size; *cmds++ = pm4_addr; /** * Theory of operation: * * In A4x, we cannot have the PFP executing instructions while its instruction * RAM is loading. We load the PFP's instruction RAM using type-0 writes * from the ME. * * To make sure the PFP is not fetching instructions at the same time, * we put it in a one-instruction loop: * mvc (ME), (ringbuffer) * which executes repeatedly until all of the data has been moved from * the ring buffer to the ME. */ if (adreno_is_a4xx(adreno_dev)) { for (i = pm4_idx; i < adreno_dev->pm4_fw_size; i++) *cmds++ = adreno_dev->pm4_fw[i]; for (i = pfp_idx; i < adreno_dev->pfp_fw_size; i++) *cmds++ = adreno_dev->pfp_fw[i]; *cmds++ = cp_type3_packet(CP_REG_RMW, 3); *cmds++ = 0x20000000 + A4XX_CP_RB_WPTR; *cmds++ = 0xffffffff; *cmds++ = 0x00000002; *cmds++ = cp_type3_packet(CP_INTERRUPT, 1); *cmds++ = 0; rb->wptr = rb->wptr - 2; adreno_ringbuffer_submit(rb, NULL); rb->wptr = rb->wptr + 2; } else { for (i = pfp_idx; i < adreno_dev->pfp_fw_size; i++) *cmds++ = adreno_dev->pfp_fw[i]; for (i = pm4_idx; i < adreno_dev->pm4_fw_size; i++) *cmds++ = adreno_dev->pm4_fw[i]; adreno_ringbuffer_submit(rb, NULL); } /* idle device to validate bootstrap */ return adreno_spin_idle(device); }
/** * _ringbuffer_bootstrap_ucode() - Bootstrap GPU Ucode * @rb: Pointer to adreno ringbuffer * @load_jt: If non zero only load Jump tables * * Bootstrap ucode for GPU * load_jt == 0, bootstrap full microcode * load_jt == 1, bootstrap jump tables of microcode * * For example a bootstrap packet would like below * Setup a type3 bootstrap packet * PFP size to bootstrap * PFP addr to write the PFP data * PM4 size to bootstrap * PM4 addr to write the PM4 data * PFP dwords from microcode to bootstrap * PM4 size dwords from microcode to bootstrap */ static int _ringbuffer_bootstrap_ucode(struct adreno_ringbuffer *rb, unsigned int load_jt) { unsigned int *cmds, bootstrap_size, rb_size; int i = 0; int ret; struct kgsl_device *device = rb->device; struct adreno_device *adreno_dev = ADRENO_DEVICE(device); unsigned int pm4_size, pm4_idx, pm4_addr, pfp_size, pfp_idx, pfp_addr; /* Only bootstrap jump tables of ucode */ if (load_jt) { pm4_idx = adreno_dev->gpucore->pm4_jt_idx; pm4_addr = adreno_dev->gpucore->pm4_jt_addr; pfp_idx = adreno_dev->gpucore->pfp_jt_idx; pfp_addr = adreno_dev->gpucore->pfp_jt_addr; } else { /* Bootstrap full ucode */ pm4_idx = 1; pm4_addr = 0; pfp_idx = 1; pfp_addr = 0; } pm4_size = (adreno_dev->pm4_fw_size - pm4_idx); pfp_size = (adreno_dev->pfp_fw_size - pfp_idx); bootstrap_size = (pm4_size + pfp_size + 5); /* * Overwrite the first entry in the jump table with the special * bootstrap opcode */ if (adreno_is_a4xx(adreno_dev)) { adreno_writereg(adreno_dev, ADRENO_REG_CP_PFP_UCODE_ADDR, 0x400); adreno_writereg(adreno_dev, ADRENO_REG_CP_PFP_UCODE_DATA, 0x6f0009); /* * The support packets (the RMW and INTERRUPT) that are sent * after the bootstrap packet should not be included in the size * of the bootstrap packet but we do need to reserve enough * space for those too */ rb_size = bootstrap_size + 6; } else { adreno_writereg(adreno_dev, ADRENO_REG_CP_PFP_UCODE_ADDR, 0x200); adreno_writereg(adreno_dev, ADRENO_REG_CP_PFP_UCODE_DATA, 0x6f0005); rb_size = bootstrap_size; } /* clear ME_HALT to start micro engine */ adreno_writereg(adreno_dev, ADRENO_REG_CP_ME_CNTL, 0); cmds = adreno_ringbuffer_allocspace(rb, rb_size); if (IS_ERR(cmds)) return PTR_ERR(cmds); if (cmds == NULL) return -ENOSPC; /* Construct the packet that bootsraps the ucode */ *cmds++ = cp_type3_packet(CP_BOOTSTRAP_UCODE, (bootstrap_size - 1)); *cmds++ = pfp_size; *cmds++ = pfp_addr; *cmds++ = pm4_size; *cmds++ = pm4_addr; /** * Theory of operation: * * In A4x, we cannot have the PFP executing instructions while its instruction * RAM is loading. We load the PFP's instruction RAM using type-0 writes * from the ME. * * To make sure the PFP is not fetching instructions at the same time, * we put it in a one-instruction loop: * mvc (ME), (ringbuffer) * which executes repeatedly until all of the data has been moved from * the ring buffer to the ME. */ if (adreno_is_a4xx(adreno_dev)) { for (i = pm4_idx; i < adreno_dev->pm4_fw_size; i++) *cmds++ = adreno_dev->pm4_fw[i]; for (i = pfp_idx; i < adreno_dev->pfp_fw_size; i++) *cmds++ = adreno_dev->pfp_fw[i]; *cmds++ = cp_type3_packet(CP_REG_RMW, 3); *cmds++ = 0x20000000 + A4XX_CP_RB_WPTR; *cmds++ = 0xffffffff; *cmds++ = 0x00000002; *cmds++ = cp_type3_packet(CP_INTERRUPT, 1); *cmds++ = 0; rb->wptr = rb->wptr - 2; adreno_ringbuffer_submit(rb, NULL); rb->wptr = rb->wptr + 2; } else { for (i = pfp_idx; i < adreno_dev->pfp_fw_size; i++) *cmds++ = adreno_dev->pfp_fw[i]; for (i = pm4_idx; i < adreno_dev->pm4_fw_size; i++) *cmds++ = adreno_dev->pm4_fw[i]; adreno_ringbuffer_submit(rb, NULL); } /* idle device to validate bootstrap */ ret = adreno_spin_idle(device); if (ret) { KGSL_DRV_ERR(rb->device, "microcode bootstrap failed to idle\n"); kgsl_device_snapshot(device, NULL); } /* Clear the chicken bit for speed up on A430 and its derivatives */ if (!adreno_is_a420(adreno_dev)) kgsl_regwrite(device, A4XX_CP_DEBUG, A4XX_CP_DEBUG_DEFAULT & ~(1 << 14)); return ret; }