static void adreno_ringbuffer_waitspace(struct adreno_ringbuffer *rb, unsigned int numcmds, int wptr_ahead) { int nopcount; unsigned int freecmds; unsigned int *cmds; uint cmds_gpu; struct adreno_device *adreno_dev = ADRENO_DEVICE(rb->device); unsigned long wait_timeout = msecs_to_jiffies(adreno_dev->wait_timeout); unsigned long wait_time; /* if wptr ahead, fill the remaining with NOPs */ if (wptr_ahead) { /* -1 for header */ nopcount = rb->sizedwords - rb->wptr - 1; cmds = (unsigned int *)rb->buffer_desc.hostptr + rb->wptr; cmds_gpu = rb->buffer_desc.gpuaddr + sizeof(uint)*rb->wptr; GSL_RB_WRITE(cmds, cmds_gpu, cp_nop_packet(nopcount)); /* Make sure that rptr is not 0 before submitting * commands at the end of ringbuffer. We do not * want the rptr and wptr to become equal when * the ringbuffer is not empty */ do { GSL_RB_GET_READPTR(rb, &rb->rptr); } while (!rb->rptr); rb->wptr++; adreno_ringbuffer_submit(rb); rb->wptr = 0; } wait_time = jiffies + wait_timeout; /* wait for space in ringbuffer */ while (1) { GSL_RB_GET_READPTR(rb, &rb->rptr); freecmds = rb->rptr - rb->wptr; if (freecmds == 0 || freecmds > numcmds) break; if (time_after(jiffies, wait_time)) { KGSL_DRV_ERR(rb->device, "Timed out while waiting for freespace in ringbuffer " "rptr: 0x%x, wptr: 0x%x\n", rb->rptr, rb->wptr); if (!adreno_dump_and_recover(rb->device)) wait_time = jiffies + wait_timeout; else /* GPU is hung and we cannot recover */ BUG(); } } }
static int adreno_ringbuffer_waitspace(struct adreno_ringbuffer *rb, unsigned int numcmds, int wptr_ahead) { int nopcount; unsigned int freecmds; unsigned int *cmds; uint cmds_gpu; /* if wptr ahead, fill the remaining with NOPs */ if (wptr_ahead) { /* -1 for header */ nopcount = rb->sizedwords - rb->wptr - 1; cmds = (unsigned int *)rb->buffer_desc.hostptr + rb->wptr; cmds_gpu = rb->buffer_desc.gpuaddr + sizeof(uint)*rb->wptr; GSL_RB_WRITE(cmds, cmds_gpu, pm4_nop_packet(nopcount)); /* Make sure that rptr is not 0 before submitting * commands at the end of ringbuffer. We do not * want the rptr and wptr to become equal when * the ringbuffer is not empty */ do { GSL_RB_GET_READPTR(rb, &rb->rptr); } while (!rb->rptr); rb->wptr++; adreno_ringbuffer_submit(rb); rb->wptr = 0; } /* wait for space in ringbuffer */ do { GSL_RB_GET_READPTR(rb, &rb->rptr); freecmds = rb->rptr - rb->wptr; } while ((freecmds != 0) && (freecmds <= numcmds)); return 0; }
int adreno_ringbuffer_submit_spin_retry(struct adreno_ringbuffer *rb, struct adreno_submit_time *time, unsigned int timeout, unsigned int retry) { int ret = 0; int num_trial = 0; adreno_ringbuffer_submit(rb, NULL); do { ret = adreno_spin_idle(rb->device, timeout); if (ret == -ETIMEDOUT) { KGSL_DRV_ERR(rb->device, "hw initialization failed to idle after %d msec\n", (++num_trial)*timeout); } else break; } while (num_trial < retry); return ret; }
static void adreno_ringbuffer_waitspace(struct adreno_ringbuffer *rb, unsigned int numcmds, int wptr_ahead) { int nopcount; unsigned int freecmds; unsigned int *cmds; uint cmds_gpu; unsigned long wait_time; unsigned long wait_timeout = msecs_to_jiffies(ADRENO_IDLE_TIMEOUT); unsigned long wait_time_part; unsigned int prev_reg_val[hang_detect_regs_count]; memset(prev_reg_val, 0, sizeof(prev_reg_val)); /* if wptr ahead, fill the remaining with NOPs */ if (wptr_ahead) { /* -1 for header */ nopcount = rb->sizedwords - rb->wptr - 1; cmds = (unsigned int *)rb->buffer_desc.hostptr + rb->wptr; cmds_gpu = rb->buffer_desc.gpuaddr + sizeof(uint)*rb->wptr; GSL_RB_WRITE(cmds, cmds_gpu, cp_nop_packet(nopcount)); /* Make sure that rptr is not 0 before submitting * commands at the end of ringbuffer. We do not * want the rptr and wptr to become equal when * the ringbuffer is not empty */ do { GSL_RB_GET_READPTR(rb, &rb->rptr); } while (!rb->rptr); rb->wptr++; adreno_ringbuffer_submit(rb); rb->wptr = 0; } wait_time = jiffies + wait_timeout; wait_time_part = jiffies + msecs_to_jiffies(KGSL_TIMEOUT_PART); /* wait for space in ringbuffer */ while (1) { GSL_RB_GET_READPTR(rb, &rb->rptr); freecmds = rb->rptr - rb->wptr; if (freecmds == 0 || freecmds > numcmds) break; /* Dont wait for timeout, detect hang faster. */ if (time_after(jiffies, wait_time_part)) { wait_time_part = jiffies + msecs_to_jiffies(KGSL_TIMEOUT_PART); if ((adreno_hang_detect(rb->device, prev_reg_val))){ KGSL_DRV_ERR(rb->device, "Hang detected while waiting for freespace in" "ringbuffer rptr: 0x%x, wptr: 0x%x\n", rb->rptr, rb->wptr); goto err; } } if (time_after(jiffies, wait_time)) { KGSL_DRV_ERR(rb->device, "Timed out while waiting for freespace in ringbuffer " "rptr: 0x%x, wptr: 0x%x\n", rb->rptr, rb->wptr); goto err; } continue; err: if (!adreno_dump_and_recover(rb->device)) wait_time = jiffies + wait_timeout; else /* GPU is hung and we cannot recover */ BUG(); } }
/** * _ringbuffer_bootstrap_ucode() - Bootstrap GPU Ucode * @rb: Pointer to adreno ringbuffer * @load_jt: If non zero only load Jump tables * * Bootstrap ucode for GPU * load_jt == 0, bootstrap full microcode * load_jt == 1, bootstrap jump tables of microcode * * For example a bootstrap packet would like below * Setup a type3 bootstrap packet * PFP size to bootstrap * PFP addr to write the PFP data * PM4 size to bootstrap * PM4 addr to write the PM4 data * PFP dwords from microcode to bootstrap * PM4 size dwords from microcode to bootstrap */ static int _ringbuffer_bootstrap_ucode(struct adreno_ringbuffer *rb, unsigned int load_jt) { unsigned int *cmds, bootstrap_size; int i = 0; struct kgsl_device *device = rb->device; struct adreno_device *adreno_dev = ADRENO_DEVICE(device); unsigned int pm4_size, pm4_idx, pm4_addr, pfp_size, pfp_idx, pfp_addr; /* Only bootstrap jump tables of ucode */ if (load_jt) { pm4_idx = adreno_dev->gpucore->pm4_jt_idx; pm4_addr = adreno_dev->gpucore->pm4_jt_addr; pfp_idx = adreno_dev->gpucore->pfp_jt_idx; pfp_addr = adreno_dev->gpucore->pfp_jt_addr; } else { /* Bootstrap full ucode */ pm4_idx = 1; pm4_addr = 0; pfp_idx = 1; pfp_addr = 0; } pm4_size = (adreno_dev->pm4_fw_size - pm4_idx); pfp_size = (adreno_dev->pfp_fw_size - pfp_idx); /* * Overwrite the first entry in the jump table with the special * bootstrap opcode */ if (adreno_is_a4xx(adreno_dev)) { adreno_writereg(adreno_dev, ADRENO_REG_CP_PFP_UCODE_ADDR, 0x400); adreno_writereg(adreno_dev, ADRENO_REG_CP_PFP_UCODE_DATA, 0x6f0009); bootstrap_size = (pm4_size + pfp_size + 5 + 6); } else { adreno_writereg(adreno_dev, ADRENO_REG_CP_PFP_UCODE_ADDR, 0x200); adreno_writereg(adreno_dev, ADRENO_REG_CP_PFP_UCODE_DATA, 0x6f0005); bootstrap_size = (pm4_size + pfp_size + 5); } /* clear ME_HALT to start micro engine */ adreno_writereg(adreno_dev, ADRENO_REG_CP_ME_CNTL, 0); cmds = adreno_ringbuffer_allocspace(rb, bootstrap_size); if (IS_ERR(cmds)) return PTR_ERR(cmds); if (cmds == NULL) return -ENOSPC; /* Construct the packet that bootsraps the ucode */ *cmds++ = cp_type3_packet(CP_BOOTSTRAP_UCODE, (bootstrap_size - 1)); *cmds++ = pfp_size; *cmds++ = pfp_addr; *cmds++ = pm4_size; *cmds++ = pm4_addr; /** * Theory of operation: * * In A4x, we cannot have the PFP executing instructions while its instruction * RAM is loading. We load the PFP's instruction RAM using type-0 writes * from the ME. * * To make sure the PFP is not fetching instructions at the same time, * we put it in a one-instruction loop: * mvc (ME), (ringbuffer) * which executes repeatedly until all of the data has been moved from * the ring buffer to the ME. */ if (adreno_is_a4xx(adreno_dev)) { for (i = pm4_idx; i < adreno_dev->pm4_fw_size; i++) *cmds++ = adreno_dev->pm4_fw[i]; for (i = pfp_idx; i < adreno_dev->pfp_fw_size; i++) *cmds++ = adreno_dev->pfp_fw[i]; *cmds++ = cp_type3_packet(CP_REG_RMW, 3); *cmds++ = 0x20000000 + A4XX_CP_RB_WPTR; *cmds++ = 0xffffffff; *cmds++ = 0x00000002; *cmds++ = cp_type3_packet(CP_INTERRUPT, 1); *cmds++ = 0; rb->wptr = rb->wptr - 2; adreno_ringbuffer_submit(rb, NULL); rb->wptr = rb->wptr + 2; } else { for (i = pfp_idx; i < adreno_dev->pfp_fw_size; i++) *cmds++ = adreno_dev->pfp_fw[i]; for (i = pm4_idx; i < adreno_dev->pm4_fw_size; i++) *cmds++ = adreno_dev->pm4_fw[i]; adreno_ringbuffer_submit(rb, NULL); } /* idle device to validate bootstrap */ return adreno_spin_idle(device); }
int adreno_ringbuffer_start(struct adreno_ringbuffer *rb, unsigned int init_ram) { int status; /*cp_rb_cntl_u cp_rb_cntl; */ union reg_cp_rb_cntl cp_rb_cntl; unsigned int *cmds, rb_cntl; struct kgsl_device *device = rb->device; struct adreno_device *adreno_dev = ADRENO_DEVICE(device); uint cmds_gpu; if (rb->flags & KGSL_FLAGS_STARTED) return 0; if (init_ram) { rb->timestamp = 0; GSL_RB_INIT_TIMESTAMP(rb); } kgsl_sharedmem_set(&rb->memptrs_desc, 0, 0, sizeof(struct kgsl_rbmemptrs)); kgsl_sharedmem_set(&rb->buffer_desc, 0, 0xAA, (rb->sizedwords << 2)); adreno_regwrite(device, REG_CP_RB_WPTR_BASE, (rb->memptrs_desc.gpuaddr + GSL_RB_MEMPTRS_WPTRPOLL_OFFSET)); /* setup WPTR delay */ adreno_regwrite(device, REG_CP_RB_WPTR_DELAY, 0 /*0x70000010 */); /*setup REG_CP_RB_CNTL */ adreno_regread(device, REG_CP_RB_CNTL, &rb_cntl); cp_rb_cntl.val = rb_cntl; /* * The size of the ringbuffer in the hardware is the log2 * representation of the size in quadwords (sizedwords / 2) */ cp_rb_cntl.f.rb_bufsz = ilog2(rb->sizedwords >> 1); /* * Specify the quadwords to read before updating mem RPTR. * Like above, pass the log2 representation of the blocksize * in quadwords. */ cp_rb_cntl.f.rb_blksz = ilog2(KGSL_RB_BLKSIZE >> 3); cp_rb_cntl.f.rb_poll_en = GSL_RB_CNTL_POLL_EN; /* WPTR polling */ /* mem RPTR writebacks */ cp_rb_cntl.f.rb_no_update = GSL_RB_CNTL_NO_UPDATE; adreno_regwrite(device, REG_CP_RB_CNTL, cp_rb_cntl.val); adreno_regwrite(device, REG_CP_RB_BASE, rb->buffer_desc.gpuaddr); adreno_regwrite(device, REG_CP_RB_RPTR_ADDR, rb->memptrs_desc.gpuaddr + GSL_RB_MEMPTRS_RPTR_OFFSET); /* explicitly clear all cp interrupts */ adreno_regwrite(device, REG_CP_INT_ACK, 0xFFFFFFFF); /* setup scratch/timestamp */ adreno_regwrite(device, REG_SCRATCH_ADDR, device->memstore.gpuaddr + KGSL_DEVICE_MEMSTORE_OFFSET(soptimestamp)); adreno_regwrite(device, REG_SCRATCH_UMSK, GSL_RB_MEMPTRS_SCRATCH_MASK); /* update the eoptimestamp field with the last retired timestamp */ kgsl_sharedmem_writel(&device->memstore, KGSL_DEVICE_MEMSTORE_OFFSET(eoptimestamp), rb->timestamp); /* load the CP ucode */ status = adreno_ringbuffer_load_pm4_ucode(device); if (status != 0) return status; /* load the prefetch parser ucode */ status = adreno_ringbuffer_load_pfp_ucode(device); if (status != 0) return status; adreno_regwrite(device, REG_CP_QUEUE_THRESHOLDS, 0x000C0804); rb->rptr = 0; rb->wptr = 0; /* clear ME_HALT to start micro engine */ adreno_regwrite(device, REG_CP_ME_CNTL, 0); /* ME_INIT */ cmds = adreno_ringbuffer_allocspace(rb, 19); cmds_gpu = rb->buffer_desc.gpuaddr + sizeof(uint)*(rb->wptr-19); GSL_RB_WRITE(cmds, cmds_gpu, CP_HDR_ME_INIT); /* All fields present (bits 9:0) */ GSL_RB_WRITE(cmds, cmds_gpu, 0x000003ff); /* Disable/Enable Real-Time Stream processing (present but ignored) */ GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000); /* Enable (2D <-> 3D) implicit synchronization (present but ignored) */ GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000); GSL_RB_WRITE(cmds, cmds_gpu, SUBBLOCK_OFFSET(REG_RB_SURFACE_INFO)); GSL_RB_WRITE(cmds, cmds_gpu, SUBBLOCK_OFFSET(REG_PA_SC_WINDOW_OFFSET)); GSL_RB_WRITE(cmds, cmds_gpu, SUBBLOCK_OFFSET(REG_VGT_MAX_VTX_INDX)); GSL_RB_WRITE(cmds, cmds_gpu, SUBBLOCK_OFFSET(REG_SQ_PROGRAM_CNTL)); GSL_RB_WRITE(cmds, cmds_gpu, SUBBLOCK_OFFSET(REG_RB_DEPTHCONTROL)); GSL_RB_WRITE(cmds, cmds_gpu, SUBBLOCK_OFFSET(REG_PA_SU_POINT_SIZE)); GSL_RB_WRITE(cmds, cmds_gpu, SUBBLOCK_OFFSET(REG_PA_SC_LINE_CNTL)); GSL_RB_WRITE(cmds, cmds_gpu, SUBBLOCK_OFFSET(REG_PA_SU_POLY_OFFSET_FRONT_SCALE)); /* Instruction memory size: */ GSL_RB_WRITE(cmds, cmds_gpu, (adreno_encode_istore_size(adreno_dev) | adreno_dev->pix_shader_start)); /* Maximum Contexts */ GSL_RB_WRITE(cmds, cmds_gpu, 0x00000001); /* Write Confirm Interval and The CP will wait the * wait_interval * 16 clocks between polling */ GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000); /* NQ and External Memory Swap */ GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000); /* Protected mode error checking */ GSL_RB_WRITE(cmds, cmds_gpu, GSL_RB_PROTECTED_MODE_CONTROL); /* Disable header dumping and Header dump address */ GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000); /* Header dump size */ GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000); adreno_ringbuffer_submit(rb); /* idle device to validate ME INIT */ status = adreno_idle(device); if (status == 0) rb->flags |= KGSL_FLAGS_STARTED; return status; }
int adreno_ringbuffer_submit_spin(struct adreno_ringbuffer *rb, struct adreno_submit_time *time, unsigned int timeout) { adreno_ringbuffer_submit(rb, NULL); return adreno_spin_idle(rb->device, timeout); }
static void adreno_ringbuffer_waitspace(struct adreno_ringbuffer *rb, unsigned int numcmds, int wptr_ahead) { int nopcount; unsigned int freecmds; unsigned int *cmds; uint cmds_gpu; struct adreno_device *adreno_dev = ADRENO_DEVICE(rb->device); unsigned long wait_timeout = msecs_to_jiffies(adreno_dev->wait_timeout); unsigned long wait_time; unsigned long wait_time_part; unsigned int msecs_part = KGSL_TIMEOUT_PART; unsigned int prev_reg_val[hang_detect_regs_count]; memset(prev_reg_val, 0, sizeof(prev_reg_val)); if (wptr_ahead) { nopcount = rb->sizedwords - rb->wptr - 1; cmds = (unsigned int *)rb->buffer_desc.hostptr + rb->wptr; cmds_gpu = rb->buffer_desc.gpuaddr + sizeof(uint)*rb->wptr; GSL_RB_WRITE(cmds, cmds_gpu, cp_nop_packet(nopcount)); do { GSL_RB_GET_READPTR(rb, &rb->rptr); } while (!rb->rptr); rb->wptr++; adreno_ringbuffer_submit(rb); rb->wptr = 0; } wait_time = jiffies + wait_timeout; wait_time_part = jiffies + msecs_to_jiffies(msecs_part); while (1) { GSL_RB_GET_READPTR(rb, &rb->rptr); freecmds = rb->rptr - rb->wptr; if (freecmds == 0 || freecmds > numcmds) break; if (time_after(jiffies, wait_time_part)) { wait_time_part = jiffies + msecs_to_jiffies(msecs_part); if ((adreno_hang_detect(rb->device, prev_reg_val))){ KGSL_DRV_ERR(rb->device, "Hang detected while waiting for freespace in" "ringbuffer rptr: 0x%x, wptr: 0x%x\n", rb->rptr, rb->wptr); goto err; } } if (time_after(jiffies, wait_time)) { KGSL_DRV_ERR(rb->device, "Timed out while waiting for freespace in ringbuffer " "rptr: 0x%x, wptr: 0x%x\n", rb->rptr, rb->wptr); goto err; } continue; err: if (!adreno_dump_and_recover(rb->device)) { wait_time = jiffies + wait_timeout; } else { BUG(); } } }
/** * _ringbuffer_bootstrap_ucode() - Bootstrap GPU Ucode * @rb: Pointer to adreno ringbuffer * @load_jt: If non zero only load Jump tables * * Bootstrap ucode for GPU * load_jt == 0, bootstrap full microcode * load_jt == 1, bootstrap jump tables of microcode * * For example a bootstrap packet would like below * Setup a type3 bootstrap packet * PFP size to bootstrap * PFP addr to write the PFP data * PM4 size to bootstrap * PM4 addr to write the PM4 data * PFP dwords from microcode to bootstrap * PM4 size dwords from microcode to bootstrap */ static int _ringbuffer_bootstrap_ucode(struct adreno_ringbuffer *rb, unsigned int load_jt) { unsigned int *cmds, bootstrap_size, rb_size; int i = 0; int ret; struct kgsl_device *device = rb->device; struct adreno_device *adreno_dev = ADRENO_DEVICE(device); unsigned int pm4_size, pm4_idx, pm4_addr, pfp_size, pfp_idx, pfp_addr; /* Only bootstrap jump tables of ucode */ if (load_jt) { pm4_idx = adreno_dev->gpucore->pm4_jt_idx; pm4_addr = adreno_dev->gpucore->pm4_jt_addr; pfp_idx = adreno_dev->gpucore->pfp_jt_idx; pfp_addr = adreno_dev->gpucore->pfp_jt_addr; } else { /* Bootstrap full ucode */ pm4_idx = 1; pm4_addr = 0; pfp_idx = 1; pfp_addr = 0; } pm4_size = (adreno_dev->pm4_fw_size - pm4_idx); pfp_size = (adreno_dev->pfp_fw_size - pfp_idx); bootstrap_size = (pm4_size + pfp_size + 5); /* * Overwrite the first entry in the jump table with the special * bootstrap opcode */ if (adreno_is_a4xx(adreno_dev)) { adreno_writereg(adreno_dev, ADRENO_REG_CP_PFP_UCODE_ADDR, 0x400); adreno_writereg(adreno_dev, ADRENO_REG_CP_PFP_UCODE_DATA, 0x6f0009); /* * The support packets (the RMW and INTERRUPT) that are sent * after the bootstrap packet should not be included in the size * of the bootstrap packet but we do need to reserve enough * space for those too */ rb_size = bootstrap_size + 6; } else { adreno_writereg(adreno_dev, ADRENO_REG_CP_PFP_UCODE_ADDR, 0x200); adreno_writereg(adreno_dev, ADRENO_REG_CP_PFP_UCODE_DATA, 0x6f0005); rb_size = bootstrap_size; } /* clear ME_HALT to start micro engine */ adreno_writereg(adreno_dev, ADRENO_REG_CP_ME_CNTL, 0); cmds = adreno_ringbuffer_allocspace(rb, rb_size); if (IS_ERR(cmds)) return PTR_ERR(cmds); if (cmds == NULL) return -ENOSPC; /* Construct the packet that bootsraps the ucode */ *cmds++ = cp_type3_packet(CP_BOOTSTRAP_UCODE, (bootstrap_size - 1)); *cmds++ = pfp_size; *cmds++ = pfp_addr; *cmds++ = pm4_size; *cmds++ = pm4_addr; /** * Theory of operation: * * In A4x, we cannot have the PFP executing instructions while its instruction * RAM is loading. We load the PFP's instruction RAM using type-0 writes * from the ME. * * To make sure the PFP is not fetching instructions at the same time, * we put it in a one-instruction loop: * mvc (ME), (ringbuffer) * which executes repeatedly until all of the data has been moved from * the ring buffer to the ME. */ if (adreno_is_a4xx(adreno_dev)) { for (i = pm4_idx; i < adreno_dev->pm4_fw_size; i++) *cmds++ = adreno_dev->pm4_fw[i]; for (i = pfp_idx; i < adreno_dev->pfp_fw_size; i++) *cmds++ = adreno_dev->pfp_fw[i]; *cmds++ = cp_type3_packet(CP_REG_RMW, 3); *cmds++ = 0x20000000 + A4XX_CP_RB_WPTR; *cmds++ = 0xffffffff; *cmds++ = 0x00000002; *cmds++ = cp_type3_packet(CP_INTERRUPT, 1); *cmds++ = 0; rb->wptr = rb->wptr - 2; adreno_ringbuffer_submit(rb, NULL); rb->wptr = rb->wptr + 2; } else { for (i = pfp_idx; i < adreno_dev->pfp_fw_size; i++) *cmds++ = adreno_dev->pfp_fw[i]; for (i = pm4_idx; i < adreno_dev->pm4_fw_size; i++) *cmds++ = adreno_dev->pm4_fw[i]; adreno_ringbuffer_submit(rb, NULL); } /* idle device to validate bootstrap */ ret = adreno_spin_idle(device); if (ret) { KGSL_DRV_ERR(rb->device, "microcode bootstrap failed to idle\n"); kgsl_device_snapshot(device, NULL); } /* Clear the chicken bit for speed up on A430 and its derivatives */ if (!adreno_is_a420(adreno_dev)) kgsl_regwrite(device, A4XX_CP_DEBUG, A4XX_CP_DEBUG_DEFAULT & ~(1 << 14)); return ret; }