static unsigned int *program_shader(unsigned int *cmds, int vtxfrag, unsigned int *shader_pgm, int dwords) { *cmds++ = cp_type3_packet(CP_IM_LOAD_IMMEDIATE, 2 + dwords); *cmds++ = vtxfrag; *cmds++ = ((0 << 16) | dwords); memcpy(cmds, shader_pgm, dwords << 2); cmds += dwords; return cmds; }
static unsigned int *program_shader(unsigned int *cmds, int vtxfrag, unsigned int *shader_pgm, int dwords) { /* load the patched vertex shader stream */ *cmds++ = cp_type3_packet(CP_IM_LOAD_IMMEDIATE, 2 + dwords); /* 0=vertex shader, 1=fragment shader */ *cmds++ = vtxfrag; /* instruction start & size (in 32-bit words) */ *cmds++ = ((0 << 16) | dwords); memcpy(cmds, shader_pgm, dwords << 2); cmds += dwords; return cmds; }
/* chicken restore */ static unsigned int *build_chicken_restore_cmds( struct adreno_context *drawctxt) { unsigned int *start = tmp_ctx.cmd; unsigned int *cmds = start; *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); *cmds++ = 0; *cmds++ = cp_type0_packet(REG_TP0_CHICKEN, 1); tmp_ctx.chicken_restore = virt2gpu(cmds, &drawctxt->gpustate); *cmds++ = 0x00000000; /* create indirect buffer command for above command sequence */ create_ib1(drawctxt, drawctxt->chicken_restore, start, cmds); return cmds; }
int adreno_context_restore(struct adreno_device *adreno_dev, struct adreno_context *context) { struct kgsl_device *device; unsigned int cmds[5]; if (adreno_dev == NULL || context == NULL) return -EINVAL; device = &adreno_dev->dev; cmds[0] = cp_nop_packet(1); cmds[1] = KGSL_CONTEXT_TO_MEM_IDENTIFIER; cmds[2] = cp_type3_packet(CP_MEM_WRITE, 2); cmds[3] = device->memstore.gpuaddr + KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL, current_context); cmds[4] = context->base.id; return adreno_ringbuffer_issuecmds(device, context, KGSL_CMD_FLAGS_NONE, cmds, 5); }
/** * _ringbuffer_bootstrap_ucode() - Bootstrap GPU Ucode * @rb: Pointer to adreno ringbuffer * @load_jt: If non zero only load Jump tables * * Bootstrap ucode for GPU * load_jt == 0, bootstrap full microcode * load_jt == 1, bootstrap jump tables of microcode * * For example a bootstrap packet would like below * Setup a type3 bootstrap packet * PFP size to bootstrap * PFP addr to write the PFP data * PM4 size to bootstrap * PM4 addr to write the PM4 data * PFP dwords from microcode to bootstrap * PM4 size dwords from microcode to bootstrap */ static int _ringbuffer_bootstrap_ucode(struct adreno_ringbuffer *rb, unsigned int load_jt) { unsigned int *cmds, bootstrap_size; int i = 0; struct kgsl_device *device = rb->device; struct adreno_device *adreno_dev = ADRENO_DEVICE(device); unsigned int pm4_size, pm4_idx, pm4_addr, pfp_size, pfp_idx, pfp_addr; /* Only bootstrap jump tables of ucode */ if (load_jt) { pm4_idx = adreno_dev->gpucore->pm4_jt_idx; pm4_addr = adreno_dev->gpucore->pm4_jt_addr; pfp_idx = adreno_dev->gpucore->pfp_jt_idx; pfp_addr = adreno_dev->gpucore->pfp_jt_addr; } else { /* Bootstrap full ucode */ pm4_idx = 1; pm4_addr = 0; pfp_idx = 1; pfp_addr = 0; } pm4_size = (adreno_dev->pm4_fw_size - pm4_idx); pfp_size = (adreno_dev->pfp_fw_size - pfp_idx); /* * Overwrite the first entry in the jump table with the special * bootstrap opcode */ if (adreno_is_a4xx(adreno_dev)) { adreno_writereg(adreno_dev, ADRENO_REG_CP_PFP_UCODE_ADDR, 0x400); adreno_writereg(adreno_dev, ADRENO_REG_CP_PFP_UCODE_DATA, 0x6f0009); bootstrap_size = (pm4_size + pfp_size + 5 + 6); } else { adreno_writereg(adreno_dev, ADRENO_REG_CP_PFP_UCODE_ADDR, 0x200); adreno_writereg(adreno_dev, ADRENO_REG_CP_PFP_UCODE_DATA, 0x6f0005); bootstrap_size = (pm4_size + pfp_size + 5); } /* clear ME_HALT to start micro engine */ adreno_writereg(adreno_dev, ADRENO_REG_CP_ME_CNTL, 0); cmds = adreno_ringbuffer_allocspace(rb, bootstrap_size); if (IS_ERR(cmds)) return PTR_ERR(cmds); if (cmds == NULL) return -ENOSPC; /* Construct the packet that bootsraps the ucode */ *cmds++ = cp_type3_packet(CP_BOOTSTRAP_UCODE, (bootstrap_size - 1)); *cmds++ = pfp_size; *cmds++ = pfp_addr; *cmds++ = pm4_size; *cmds++ = pm4_addr; /** * Theory of operation: * * In A4x, we cannot have the PFP executing instructions while its instruction * RAM is loading. We load the PFP's instruction RAM using type-0 writes * from the ME. * * To make sure the PFP is not fetching instructions at the same time, * we put it in a one-instruction loop: * mvc (ME), (ringbuffer) * which executes repeatedly until all of the data has been moved from * the ring buffer to the ME. */ if (adreno_is_a4xx(adreno_dev)) { for (i = pm4_idx; i < adreno_dev->pm4_fw_size; i++) *cmds++ = adreno_dev->pm4_fw[i]; for (i = pfp_idx; i < adreno_dev->pfp_fw_size; i++) *cmds++ = adreno_dev->pfp_fw[i]; *cmds++ = cp_type3_packet(CP_REG_RMW, 3); *cmds++ = 0x20000000 + A4XX_CP_RB_WPTR; *cmds++ = 0xffffffff; *cmds++ = 0x00000002; *cmds++ = cp_type3_packet(CP_INTERRUPT, 1); *cmds++ = 0; rb->wptr = rb->wptr - 2; adreno_ringbuffer_submit(rb, NULL); rb->wptr = rb->wptr + 2; } else { for (i = pfp_idx; i < adreno_dev->pfp_fw_size; i++) *cmds++ = adreno_dev->pfp_fw[i]; for (i = pm4_idx; i < adreno_dev->pm4_fw_size; i++) *cmds++ = adreno_dev->pm4_fw[i]; adreno_ringbuffer_submit(rb, NULL); } /* idle device to validate bootstrap */ return adreno_spin_idle(device); }
static void build_regrestore_cmds(struct adreno_device *adreno_dev, struct adreno_context *drawctxt) { unsigned int *start = tmp_ctx.cmd; unsigned int *cmd = start; unsigned int i = 0; unsigned int reg_array_size = 0; const unsigned int *ptr_register_ranges; *cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); *cmd++ = 0; /* H/W Registers */ /* deferred cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, ???); */ cmd++; #ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES /* Force mismatch */ *cmd++ = ((drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000) | 1; #else *cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000; #endif /* Based on chip id choose the registers ranges*/ if (adreno_is_a220(adreno_dev)) { ptr_register_ranges = register_ranges_a220; reg_array_size = ARRAY_SIZE(register_ranges_a220); } else if (adreno_is_a225(adreno_dev)) { ptr_register_ranges = register_ranges_a225; reg_array_size = ARRAY_SIZE(register_ranges_a225); } else { ptr_register_ranges = register_ranges_a20x; reg_array_size = ARRAY_SIZE(register_ranges_a20x); } for (i = 0; i < (reg_array_size/2); i++) { cmd = reg_range(cmd, ptr_register_ranges[i*2], ptr_register_ranges[i*2+1]); } /* Now we know how many register blocks we have, we can compute command * length */ start[2] = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, (cmd - start) - 3); /* Enable shadowing for the entire register block. */ #ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES start[4] |= (0 << 24) | (4 << 16); /* Disable shadowing. */ #else start[4] |= (1 << 24) | (4 << 16); #endif /* Need to handle some of the registers separately */ *cmd++ = cp_type0_packet(REG_SQ_GPR_MANAGEMENT, 1); tmp_ctx.reg_values[0] = virt2gpu(cmd, &drawctxt->gpustate); *cmd++ = 0x00040400; *cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); *cmd++ = 0; *cmd++ = cp_type0_packet(REG_TP0_CHICKEN, 1); tmp_ctx.reg_values[1] = virt2gpu(cmd, &drawctxt->gpustate); *cmd++ = 0x00000000; if (adreno_is_a22x(adreno_dev)) { unsigned int i; unsigned int j = 2; for (i = REG_A220_VSC_BIN_SIZE; i <= REG_A220_VSC_PIPE_DATA_LENGTH_7; i++) { *cmd++ = cp_type0_packet(i, 1); tmp_ctx.reg_values[j] = virt2gpu(cmd, &drawctxt->gpustate); *cmd++ = 0x00000000; j++; } } /* ALU Constants */ *cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3); *cmd++ = drawctxt->gpustate.gpuaddr & 0xFFFFE000; #ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES *cmd++ = (0 << 24) | (0 << 16) | 0; /* Disable shadowing */ #else *cmd++ = (1 << 24) | (0 << 16) | 0; #endif *cmd++ = ALU_CONSTANTS; /* Texture Constants */ *cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3); *cmd++ = (drawctxt->gpustate.gpuaddr + TEX_OFFSET) & 0xFFFFE000; #ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES /* Disable shadowing */ *cmd++ = (0 << 24) | (1 << 16) | 0; #else *cmd++ = (1 << 24) | (1 << 16) | 0; #endif *cmd++ = TEX_CONSTANTS; /* Boolean Constants */ *cmd++ = cp_type3_packet(CP_SET_CONSTANT, 1 + BOOL_CONSTANTS); *cmd++ = (2 << 16) | 0; /* the next BOOL_CONSTANT dwords is the shadow area for * boolean constants. */ tmp_ctx.bool_shadow = virt2gpu(cmd, &drawctxt->gpustate); cmd += BOOL_CONSTANTS; /* Loop Constants */ *cmd++ = cp_type3_packet(CP_SET_CONSTANT, 1 + LOOP_CONSTANTS); *cmd++ = (3 << 16) | 0; /* the next LOOP_CONSTANTS dwords is the shadow area for * loop constants. */ tmp_ctx.loop_shadow = virt2gpu(cmd, &drawctxt->gpustate); cmd += LOOP_CONSTANTS; /* create indirect buffer command for above command sequence */ create_ib1(drawctxt, drawctxt->reg_restore, start, cmd); tmp_ctx.cmd = cmd; }
static uint32_t adreno_ringbuffer_addcmds(struct adreno_ringbuffer *rb, struct adreno_context *context, unsigned int flags, unsigned int *cmds, int sizedwords) { struct adreno_device *adreno_dev = ADRENO_DEVICE(rb->device); unsigned int *ringcmds; unsigned int timestamp; unsigned int total_sizedwords = sizedwords + 6; unsigned int i; unsigned int rcmd_gpu; /* reserve space to temporarily turn off protected mode * error checking if needed */ total_sizedwords += flags & KGSL_CMD_FLAGS_PMODE ? 4 : 0; total_sizedwords += !(flags & KGSL_CMD_FLAGS_NO_TS_CMP) ? 7 : 0; /* 2 dwords to store the start of command sequence */ total_sizedwords += 2; if (adreno_is_a2xx(adreno_dev)) total_sizedwords += 2; /* CP_WAIT_FOR_IDLE */ if (adreno_is_a20x(adreno_dev)) total_sizedwords += 2; /* CACHE_FLUSH */ ringcmds = adreno_ringbuffer_allocspace(rb, total_sizedwords); /* GPU may hang during space allocation, if thats the case the current * context may have hung the GPU */ if (context && context->flags & CTXT_FLAGS_GPU_HANG) { KGSL_CTXT_WARN(rb->device, "Context %p caused a gpu hang. Will not accept commands for context %d\n", context, context->id); return rb->timestamp; } rcmd_gpu = rb->buffer_desc.gpuaddr + sizeof(uint)*(rb->wptr-total_sizedwords); GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_nop_packet(1)); GSL_RB_WRITE(ringcmds, rcmd_gpu, KGSL_CMD_IDENTIFIER); if (flags & KGSL_CMD_FLAGS_PMODE) { /* disable protected mode error checking */ GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_type3_packet(CP_SET_PROTECTED_MODE, 1)); GSL_RB_WRITE(ringcmds, rcmd_gpu, 0); } for (i = 0; i < sizedwords; i++) { GSL_RB_WRITE(ringcmds, rcmd_gpu, *cmds); cmds++; } if (flags & KGSL_CMD_FLAGS_PMODE) { /* re-enable protected mode error checking */ GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_type3_packet(CP_SET_PROTECTED_MODE, 1)); GSL_RB_WRITE(ringcmds, rcmd_gpu, 1); } rb->timestamp++; timestamp = rb->timestamp; /* HW Workaround for MMU Page fault * due to memory getting free early before * GPU completes it. */ if (adreno_is_a2xx(adreno_dev)) { GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_type3_packet(CP_WAIT_FOR_IDLE, 1)); GSL_RB_WRITE(ringcmds, rcmd_gpu, 0x00); } GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_type0_packet(REG_CP_TIMESTAMP, 1)); GSL_RB_WRITE(ringcmds, rcmd_gpu, rb->timestamp); GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_type3_packet(CP_EVENT_WRITE, 3)); GSL_RB_WRITE(ringcmds, rcmd_gpu, CACHE_FLUSH_TS); GSL_RB_WRITE(ringcmds, rcmd_gpu, (rb->device->memstore.gpuaddr + KGSL_DEVICE_MEMSTORE_OFFSET(eoptimestamp))); GSL_RB_WRITE(ringcmds, rcmd_gpu, rb->timestamp); if (adreno_is_a20x(adreno_dev)) { GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_type3_packet(CP_EVENT_WRITE, 1)); GSL_RB_WRITE(ringcmds, rcmd_gpu, CACHE_FLUSH); } if (!(flags & KGSL_CMD_FLAGS_NO_TS_CMP)) { /* Conditional execution based on memory values */ GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_type3_packet(CP_COND_EXEC, 4)); GSL_RB_WRITE(ringcmds, rcmd_gpu, (rb->device->memstore.gpuaddr + KGSL_DEVICE_MEMSTORE_OFFSET(ts_cmp_enable)) >> 2); GSL_RB_WRITE(ringcmds, rcmd_gpu, (rb->device->memstore.gpuaddr + KGSL_DEVICE_MEMSTORE_OFFSET(ref_wait_ts)) >> 2); GSL_RB_WRITE(ringcmds, rcmd_gpu, rb->timestamp); /* # of conditional command DWORDs */ GSL_RB_WRITE(ringcmds, rcmd_gpu, 2); GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_type3_packet(CP_INTERRUPT, 1)); GSL_RB_WRITE(ringcmds, rcmd_gpu, CP_INT_CNTL__RB_INT_MASK); }
/*copy colour, depth, & stencil buffers from graphics memory to system memory*/ static unsigned int *build_gmem2sys_cmds(struct adreno_device *adreno_dev, struct adreno_context *drawctxt, struct gmem_shadow_t *shadow) { unsigned int *cmds = shadow->gmem_save_commands; unsigned int *start = cmds; /* Calculate the new offset based on the adjusted base */ unsigned int bytesperpixel = format2bytesperpixel[shadow->format]; unsigned int addr = shadow->gmemshadow.gpuaddr; unsigned int offset = (addr - (addr & 0xfffff000)) / bytesperpixel; /* Store TP0_CHICKEN register */ *cmds++ = cp_type3_packet(CP_REG_TO_MEM, 2); *cmds++ = REG_TP0_CHICKEN; *cmds++ = tmp_ctx.chicken_restore; *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); *cmds++ = 0; /* Set TP0_CHICKEN to zero */ *cmds++ = cp_type0_packet(REG_TP0_CHICKEN, 1); *cmds++ = 0x00000000; /* Set PA_SC_AA_CONFIG to 0 */ *cmds++ = cp_type0_packet(REG_PA_SC_AA_CONFIG, 1); *cmds++ = 0x00000000; /* program shader */ /* load shader vtx constants ... 5 dwords */ *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 4); *cmds++ = (0x1 << 16) | SHADER_CONST_ADDR; *cmds++ = 0; /* valid(?) vtx constant flag & addr */ *cmds++ = shadow->quad_vertices.gpuaddr | 0x3; /* limit = 12 dwords */ *cmds++ = 0x00000030; /* Invalidate L2 cache to make sure vertices are updated */ *cmds++ = cp_type0_packet(REG_TC_CNTL_STATUS, 1); *cmds++ = 0x1; *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 4); *cmds++ = CP_REG(REG_VGT_MAX_VTX_INDX); *cmds++ = 0x00ffffff; /* REG_VGT_MAX_VTX_INDX */ *cmds++ = 0x0; /* REG_VGT_MIN_VTX_INDX */ *cmds++ = 0x00000000; /* REG_VGT_INDX_OFFSET */ *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); *cmds++ = CP_REG(REG_PA_SC_AA_MASK); *cmds++ = 0x0000ffff; /* REG_PA_SC_AA_MASK */ *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); *cmds++ = CP_REG(REG_RB_COLORCONTROL); *cmds++ = 0x00000c20; /* Repartition shaders */ *cmds++ = cp_type0_packet(REG_SQ_INST_STORE_MANAGMENT, 1); *cmds++ = 0x180; /* Invalidate Vertex & Pixel instruction code address and sizes */ *cmds++ = cp_type3_packet(CP_INVALIDATE_STATE, 1); *cmds++ = 0x00003F00; *cmds++ = cp_type3_packet(CP_SET_SHADER_BASES, 1); *cmds++ = adreno_encode_istore_size(adreno_dev) | adreno_dev->pix_shader_start; /* load the patched vertex shader stream */ cmds = program_shader(cmds, 0, gmem2sys_vtx_pgm, GMEM2SYS_VTX_PGM_LEN); /* Load the patched fragment shader stream */ cmds = program_shader(cmds, 1, gmem2sys_frag_pgm, GMEM2SYS_FRAG_PGM_LEN); /* SQ_PROGRAM_CNTL / SQ_CONTEXT_MISC */ *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); *cmds++ = CP_REG(REG_SQ_PROGRAM_CNTL); if (adreno_is_a22x(adreno_dev)) *cmds++ = 0x10018001; else *cmds++ = 0x10010001; *cmds++ = 0x00000008; /* resolve */ /* PA_CL_VTE_CNTL */ *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); *cmds++ = CP_REG(REG_PA_CL_VTE_CNTL); /* disable X/Y/Z transforms, X/Y/Z are premultiplied by W */ *cmds++ = 0x00000b00; /* program surface info */ *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); *cmds++ = CP_REG(REG_RB_SURFACE_INFO); *cmds++ = shadow->gmem_pitch; /* pitch, MSAA = 1 */ /* RB_COLOR_INFO Endian=none, Linear, Format=RGBA8888, Swap=0, * Base=gmem_base */ /* gmem base assumed 4K aligned. */ BUG_ON(tmp_ctx.gmem_base & 0xFFF); *cmds++ = (shadow-> format << RB_COLOR_INFO__COLOR_FORMAT__SHIFT) | tmp_ctx.gmem_base; /* disable Z */ *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); *cmds++ = CP_REG(REG_RB_DEPTHCONTROL); if (adreno_is_a22x(adreno_dev)) *cmds++ = 0x08; else *cmds++ = 0; /* set REG_PA_SU_SC_MODE_CNTL * Front_ptype = draw triangles * Back_ptype = draw triangles * Provoking vertex = last */ *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); *cmds++ = CP_REG(REG_PA_SU_SC_MODE_CNTL); *cmds++ = 0x00080240; /* Use maximum scissor values -- quad vertices already have the * correct bounds */ *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); *cmds++ = CP_REG(REG_PA_SC_SCREEN_SCISSOR_TL); *cmds++ = (0 << 16) | 0; *cmds++ = (0x1fff << 16) | (0x1fff); *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); *cmds++ = CP_REG(REG_PA_SC_WINDOW_SCISSOR_TL); *cmds++ = (unsigned int)((1U << 31) | (0 << 16) | 0); *cmds++ = (0x1fff << 16) | (0x1fff); /* load the viewport so that z scale = clear depth and * z offset = 0.0f */ *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); *cmds++ = CP_REG(REG_PA_CL_VPORT_ZSCALE); *cmds++ = 0xbf800000; /* -1.0f */ *cmds++ = 0x0; *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); *cmds++ = CP_REG(REG_RB_COLOR_MASK); *cmds++ = 0x0000000f; /* R = G = B = 1:enabled */ *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); *cmds++ = CP_REG(REG_RB_COLOR_DEST_MASK); *cmds++ = 0xffffffff; *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); *cmds++ = CP_REG(REG_SQ_WRAPPING_0); *cmds++ = 0x00000000; *cmds++ = 0x00000000; /* load the stencil ref value * $AAM - do this later */ /* load the COPY state */ *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 6); *cmds++ = CP_REG(REG_RB_COPY_CONTROL); *cmds++ = 0; /* RB_COPY_CONTROL */ *cmds++ = addr & 0xfffff000; /* RB_COPY_DEST_BASE */ *cmds++ = shadow->pitch >> 5; /* RB_COPY_DEST_PITCH */ /* Endian=none, Linear, Format=RGBA8888,Swap=0,!Dither, * MaskWrite:R=G=B=A=1 */ *cmds++ = 0x0003c008 | (shadow->format << RB_COPY_DEST_INFO__COPY_DEST_FORMAT__SHIFT); /* Make sure we stay in offsetx field. */ BUG_ON(offset & 0xfffff000); *cmds++ = offset; *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); *cmds++ = CP_REG(REG_RB_MODECONTROL); *cmds++ = 0x6; /* EDRAM copy */ *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); *cmds++ = CP_REG(REG_PA_CL_CLIP_CNTL); *cmds++ = 0x00010000; if (adreno_is_a22x(adreno_dev)) { *cmds++ = cp_type3_packet(CP_SET_DRAW_INIT_FLAGS, 1); *cmds++ = 0; *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); *cmds++ = CP_REG(REG_A220_RB_LRZ_VSC_CONTROL); *cmds++ = 0x0000000; *cmds++ = cp_type3_packet(CP_DRAW_INDX, 3); *cmds++ = 0; /* viz query info. */ /* PrimType=RectList, SrcSel=AutoIndex, VisCullMode=Ignore*/ *cmds++ = 0x00004088; *cmds++ = 3; /* NumIndices=3 */ } else { /* queue the draw packet */ *cmds++ = cp_type3_packet(CP_DRAW_INDX, 2); *cmds++ = 0; /* viz query info. */ /* PrimType=RectList, NumIndices=3, SrcSel=AutoIndex */ *cmds++ = 0x00030088; } /* create indirect buffer command for above command sequence */ create_ib1(drawctxt, shadow->gmem_save, start, cmds); return cmds; }
/*copy colour, depth, & stencil buffers from system memory to graphics memory*/ static unsigned int *build_sys2gmem_cmds(struct adreno_device *adreno_dev, struct adreno_context *drawctxt, struct gmem_shadow_t *shadow) { unsigned int *cmds = shadow->gmem_restore_commands; unsigned int *start = cmds; /* Store TP0_CHICKEN register */ *cmds++ = cp_type3_packet(CP_REG_TO_MEM, 2); *cmds++ = REG_TP0_CHICKEN; *cmds++ = tmp_ctx.chicken_restore; *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); *cmds++ = 0; /* Set TP0_CHICKEN to zero */ *cmds++ = cp_type0_packet(REG_TP0_CHICKEN, 1); *cmds++ = 0x00000000; /* Set PA_SC_AA_CONFIG to 0 */ *cmds++ = cp_type0_packet(REG_PA_SC_AA_CONFIG, 1); *cmds++ = 0x00000000; /* shader constants */ /* vertex buffer constants */ *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 7); *cmds++ = (0x1 << 16) | (9 * 6); /* valid(?) vtx constant flag & addr */ *cmds++ = shadow->quad_vertices.gpuaddr | 0x3; /* limit = 12 dwords */ *cmds++ = 0x00000030; /* valid(?) vtx constant flag & addr */ *cmds++ = shadow->quad_texcoords.gpuaddr | 0x3; /* limit = 8 dwords */ *cmds++ = 0x00000020; *cmds++ = 0; *cmds++ = 0; /* Invalidate L2 cache to make sure vertices are updated */ *cmds++ = cp_type0_packet(REG_TC_CNTL_STATUS, 1); *cmds++ = 0x1; cmds = program_shader(cmds, 0, sys2gmem_vtx_pgm, SYS2GMEM_VTX_PGM_LEN); /* Repartition shaders */ *cmds++ = cp_type0_packet(REG_SQ_INST_STORE_MANAGMENT, 1); *cmds++ = 0x180; /* Invalidate Vertex & Pixel instruction code address and sizes */ *cmds++ = cp_type3_packet(CP_INVALIDATE_STATE, 1); *cmds++ = 0x00000300; /* 0x100 = Vertex, 0x200 = Pixel */ *cmds++ = cp_type3_packet(CP_SET_SHADER_BASES, 1); *cmds++ = adreno_encode_istore_size(adreno_dev) | adreno_dev->pix_shader_start; /* Load the patched fragment shader stream */ cmds = program_shader(cmds, 1, sys2gmem_frag_pgm, SYS2GMEM_FRAG_PGM_LEN); /* SQ_PROGRAM_CNTL / SQ_CONTEXT_MISC */ *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); *cmds++ = CP_REG(REG_SQ_PROGRAM_CNTL); *cmds++ = 0x10030002; *cmds++ = 0x00000008; *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); *cmds++ = CP_REG(REG_PA_SC_AA_MASK); *cmds++ = 0x0000ffff; /* REG_PA_SC_AA_MASK */ if (!adreno_is_a22x(adreno_dev)) { /* PA_SC_VIZ_QUERY */ *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); *cmds++ = CP_REG(REG_PA_SC_VIZ_QUERY); *cmds++ = 0x0; /*REG_PA_SC_VIZ_QUERY */ } /* RB_COLORCONTROL */ *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); *cmds++ = CP_REG(REG_RB_COLORCONTROL); *cmds++ = 0x00000c20; *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 4); *cmds++ = CP_REG(REG_VGT_MAX_VTX_INDX); *cmds++ = 0x00ffffff; /* mmVGT_MAX_VTX_INDX */ *cmds++ = 0x0; /* mmVGT_MIN_VTX_INDX */ *cmds++ = 0x00000000; /* mmVGT_INDX_OFFSET */ *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); *cmds++ = CP_REG(REG_VGT_VERTEX_REUSE_BLOCK_CNTL); *cmds++ = 0x00000002; /* mmVGT_VERTEX_REUSE_BLOCK_CNTL */ *cmds++ = 0x00000002; /* mmVGT_OUT_DEALLOC_CNTL */ *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); *cmds++ = CP_REG(REG_SQ_INTERPOLATOR_CNTL); *cmds++ = 0xffffffff; /* mmSQ_INTERPOLATOR_CNTL */ *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); *cmds++ = CP_REG(REG_PA_SC_AA_CONFIG); *cmds++ = 0x00000000; /* REG_PA_SC_AA_CONFIG */ /* set REG_PA_SU_SC_MODE_CNTL * Front_ptype = draw triangles * Back_ptype = draw triangles * Provoking vertex = last */ *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); *cmds++ = CP_REG(REG_PA_SU_SC_MODE_CNTL); *cmds++ = 0x00080240; /* texture constants */ *cmds++ = cp_type3_packet(CP_SET_CONSTANT, (SYS2GMEM_TEX_CONST_LEN + 1)); *cmds++ = (0x1 << 16) | (0 * 6); memcpy(cmds, sys2gmem_tex_const, SYS2GMEM_TEX_CONST_LEN << 2); cmds[0] |= (shadow->pitch >> 5) << 22; cmds[1] |= shadow->gmemshadow.gpuaddr | surface_format_table[shadow->format]; cmds[2] |= (shadow->width - 1) | (shadow->height - 1) << 13; cmds += SYS2GMEM_TEX_CONST_LEN; /* program surface info */ *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); *cmds++ = CP_REG(REG_RB_SURFACE_INFO); *cmds++ = shadow->gmem_pitch; /* pitch, MSAA = 1 */ /* RB_COLOR_INFO Endian=none, Linear, Format=RGBA8888, Swap=0, * Base=gmem_base */ *cmds++ = (shadow-> format << RB_COLOR_INFO__COLOR_FORMAT__SHIFT) | tmp_ctx.gmem_base; /* RB_DEPTHCONTROL */ *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); *cmds++ = CP_REG(REG_RB_DEPTHCONTROL); if (adreno_is_a22x(adreno_dev)) *cmds++ = 8; /* disable Z */ else *cmds++ = 0; /* disable Z */ /* Use maximum scissor values -- quad vertices already * have the correct bounds */ *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); *cmds++ = CP_REG(REG_PA_SC_SCREEN_SCISSOR_TL); *cmds++ = (0 << 16) | 0; *cmds++ = ((0x1fff) << 16) | 0x1fff; *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); *cmds++ = CP_REG(REG_PA_SC_WINDOW_SCISSOR_TL); *cmds++ = (unsigned int)((1U << 31) | (0 << 16) | 0); *cmds++ = ((0x1fff) << 16) | 0x1fff; *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); *cmds++ = CP_REG(REG_PA_CL_VTE_CNTL); /* disable X/Y/Z transforms, X/Y/Z are premultiplied by W */ *cmds++ = 0x00000b00; /*load the viewport so that z scale = clear depth and z offset = 0.0f */ *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); *cmds++ = CP_REG(REG_PA_CL_VPORT_ZSCALE); *cmds++ = 0xbf800000; *cmds++ = 0x0; *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); *cmds++ = CP_REG(REG_RB_COLOR_MASK); *cmds++ = 0x0000000f; /* R = G = B = 1:enabled */ *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); *cmds++ = CP_REG(REG_RB_COLOR_DEST_MASK); *cmds++ = 0xffffffff; *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); *cmds++ = CP_REG(REG_SQ_WRAPPING_0); *cmds++ = 0x00000000; *cmds++ = 0x00000000; /* load the stencil ref value * $AAM - do this later */ *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); *cmds++ = CP_REG(REG_RB_MODECONTROL); /* draw pixels with color and depth/stencil component */ *cmds++ = 0x4; *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); *cmds++ = CP_REG(REG_PA_CL_CLIP_CNTL); *cmds++ = 0x00010000; if (adreno_is_a22x(adreno_dev)) { *cmds++ = cp_type3_packet(CP_SET_DRAW_INIT_FLAGS, 1); *cmds++ = 0; *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); *cmds++ = CP_REG(REG_A220_RB_LRZ_VSC_CONTROL); *cmds++ = 0x0000000; *cmds++ = cp_type3_packet(CP_DRAW_INDX, 3); *cmds++ = 0; /* viz query info. */ /* PrimType=RectList, SrcSel=AutoIndex, VisCullMode=Ignore*/ *cmds++ = 0x00004088; *cmds++ = 3; /* NumIndices=3 */ } else { /* queue the draw packet */ *cmds++ = cp_type3_packet(CP_DRAW_INDX, 2); *cmds++ = 0; /* viz query info. */ /* PrimType=RectList, NumIndices=3, SrcSel=AutoIndex */ *cmds++ = 0x00030088; } /* create indirect buffer command for above command sequence */ create_ib1(drawctxt, shadow->gmem_restore, start, cmds); return cmds; }
static void build_shader_save_restore_cmds(struct adreno_device *adreno_dev, struct adreno_context *drawctxt) { unsigned int *cmd = tmp_ctx.cmd; unsigned int *save, *restore, *fixup; unsigned int *startSizeVtx, *startSizePix, *startSizeShared; unsigned int *partition1; unsigned int *shaderBases, *partition2; tmp_ctx.shader_vertex = drawctxt->gpustate.gpuaddr + SHADER_OFFSET; tmp_ctx.shader_pixel = tmp_ctx.shader_vertex + _shader_shadow_size(adreno_dev); tmp_ctx.shader_shared = tmp_ctx.shader_pixel + _shader_shadow_size(adreno_dev); restore = cmd; *cmd++ = cp_type3_packet(CP_INVALIDATE_STATE, 1); *cmd++ = 0x00000300; *cmd++ = cp_type3_packet(CP_SET_SHADER_BASES, 1); shaderBases = cmd++; *cmd++ = cp_type0_packet(REG_SQ_INST_STORE_MANAGMENT, 1); partition1 = cmd++; *cmd++ = cp_type3_packet(CP_IM_LOAD, 2); *cmd++ = tmp_ctx.shader_vertex + 0x0; startSizeVtx = cmd++; *cmd++ = cp_type3_packet(CP_IM_LOAD, 2); *cmd++ = tmp_ctx.shader_pixel + 0x1; startSizePix = cmd++; *cmd++ = cp_type3_packet(CP_IM_LOAD, 2); *cmd++ = tmp_ctx.shader_shared + 0x2; startSizeShared = cmd++; create_ib1(drawctxt, drawctxt->shader_restore, restore, cmd); /* * fixup SET_SHADER_BASES data * * since self-modifying PM4 code is being used here, a seperate * command buffer is used for this fixup operation, to ensure the * commands are not read by the PM4 engine before the data fields * have been written. */ fixup = cmd; *cmd++ = cp_type0_packet(REG_SCRATCH_REG2, 1); partition2 = cmd++; *cmd++ = cp_type3_packet(CP_REG_RMW, 3); *cmd++ = REG_SCRATCH_REG2; *cmd++ = 0x0FFF0FFF; *cmd++ = adreno_encode_istore_size(adreno_dev); *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); *cmd++ = REG_SCRATCH_REG2; *cmd++ = virt2gpu(shaderBases, &drawctxt->gpustate); create_ib1(drawctxt, drawctxt->shader_fixup, fixup, cmd); save = cmd; *cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); *cmd++ = 0; *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); *cmd++ = REG_SQ_INST_STORE_MANAGMENT; *cmd++ = virt2gpu(partition1, &drawctxt->gpustate); *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); *cmd++ = REG_SQ_INST_STORE_MANAGMENT; *cmd++ = virt2gpu(partition2, &drawctxt->gpustate); *cmd++ = cp_type3_packet(CP_IM_STORE, 2); *cmd++ = tmp_ctx.shader_vertex + 0x0; *cmd++ = virt2gpu(startSizeVtx, &drawctxt->gpustate); *cmd++ = cp_type3_packet(CP_IM_STORE, 2); *cmd++ = tmp_ctx.shader_pixel + 0x1; *cmd++ = virt2gpu(startSizePix, &drawctxt->gpustate); *cmd++ = cp_type3_packet(CP_IM_STORE, 2); *cmd++ = tmp_ctx.shader_shared + 0x2; *cmd++ = virt2gpu(startSizeShared, &drawctxt->gpustate); *cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); *cmd++ = 0; create_ib1(drawctxt, drawctxt->shader_save, save, cmd); tmp_ctx.cmd = cmd; }
/* save h/w regs, alu constants, texture contants, etc. ... * requires: bool_shadow_gpuaddr, loop_shadow_gpuaddr */ static void build_regsave_cmds(struct adreno_device *adreno_dev, struct adreno_context *drawctxt) { unsigned int *start = tmp_ctx.cmd; unsigned int *cmd = start; *cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); *cmd++ = 0; #ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES /* Make sure the HW context has the correct register values * before reading them. */ *cmd++ = cp_type3_packet(CP_CONTEXT_UPDATE, 1); *cmd++ = 0; { unsigned int i = 0; unsigned int reg_array_size = 0; const unsigned int *ptr_register_ranges; /* Based on chip id choose the register ranges */ if (adreno_is_a220(adreno_dev)) { ptr_register_ranges = register_ranges_a220; reg_array_size = ARRAY_SIZE(register_ranges_a220); } else if (adreno_is_a225(adreno_dev)) { ptr_register_ranges = register_ranges_a225; reg_array_size = ARRAY_SIZE(register_ranges_a225); } else { ptr_register_ranges = register_ranges_a20x; reg_array_size = ARRAY_SIZE(register_ranges_a20x); } /* Write HW registers into shadow */ for (i = 0; i < (reg_array_size/2) ; i++) { build_reg_to_mem_range(ptr_register_ranges[i*2], ptr_register_ranges[i*2+1], &cmd, drawctxt); } } /* Copy ALU constants */ cmd = reg_to_mem(cmd, (drawctxt->gpustate.gpuaddr) & 0xFFFFE000, REG_SQ_CONSTANT_0, ALU_CONSTANTS); /* Copy Tex constants */ cmd = reg_to_mem(cmd, (drawctxt->gpustate.gpuaddr + TEX_OFFSET) & 0xFFFFE000, REG_SQ_FETCH_0, TEX_CONSTANTS); #else /* Insert a wait for idle packet before reading the registers. * This is to fix a hang/reset seen during stress testing. In this * hang, CP encountered a timeout reading SQ's boolean constant * register. There is logic in the HW that blocks reading of this * register when the SQ block is not idle, which we believe is * contributing to the hang.*/ *cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); *cmd++ = 0; /* H/w registers are already shadowed; just need to disable shadowing * to prevent corruption. */ *cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3); *cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000; *cmd++ = 4 << 16; /* regs, start=0 */ *cmd++ = 0x0; /* count = 0 */ /* ALU constants are already shadowed; just need to disable shadowing * to prevent corruption. */ *cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3); *cmd++ = drawctxt->gpustate.gpuaddr & 0xFFFFE000; *cmd++ = 0 << 16; /* ALU, start=0 */ *cmd++ = 0x0; /* count = 0 */ /* Tex constants are already shadowed; just need to disable shadowing * to prevent corruption. */ *cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3); *cmd++ = (drawctxt->gpustate.gpuaddr + TEX_OFFSET) & 0xFFFFE000; *cmd++ = 1 << 16; /* Tex, start=0 */ *cmd++ = 0x0; /* count = 0 */ #endif /* Need to handle some of the registers separately */ *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); *cmd++ = REG_SQ_GPR_MANAGEMENT; *cmd++ = tmp_ctx.reg_values[0]; *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); *cmd++ = REG_TP0_CHICKEN; *cmd++ = tmp_ctx.reg_values[1]; if (adreno_is_a22x(adreno_dev)) { unsigned int i; unsigned int j = 2; for (i = REG_A220_VSC_BIN_SIZE; i <= REG_A220_VSC_PIPE_DATA_LENGTH_7; i++) { *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); *cmd++ = i; *cmd++ = tmp_ctx.reg_values[j]; j++; } } /* Copy Boolean constants */ cmd = reg_to_mem(cmd, tmp_ctx.bool_shadow, REG_SQ_CF_BOOLEANS, BOOL_CONSTANTS); /* Copy Loop constants */ cmd = reg_to_mem(cmd, tmp_ctx.loop_shadow, REG_SQ_CF_LOOP, LOOP_CONSTANTS); /* create indirect buffer command for above command sequence */ create_ib1(drawctxt, drawctxt->reg_save, start, cmd); tmp_ctx.cmd = cmd; }
static void a2xx_drawctxt_restore(struct adreno_device *adreno_dev, struct adreno_context *context) { struct kgsl_device *device = &adreno_dev->dev; unsigned int cmds[5]; if (context == NULL) { kgsl_mmu_setstate(&device->mmu, device->mmu.defaultpagetable, adreno_dev->drawctxt_active->id); return; } KGSL_CTXT_INFO(device, "context flags %08x\n", context->flags); cmds[0] = cp_nop_packet(1); cmds[1] = KGSL_CONTEXT_TO_MEM_IDENTIFIER; cmds[2] = cp_type3_packet(CP_MEM_WRITE, 2); cmds[3] = device->memstore.gpuaddr + KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL, current_context); cmds[4] = context->id; adreno_ringbuffer_issuecmds(device, context, KGSL_CMD_FLAGS_NONE, cmds, 5); kgsl_mmu_setstate(&device->mmu, context->pagetable, context->id); #ifndef CONFIG_MSM_KGSL_CFF_DUMP_NO_CONTEXT_MEM_DUMP kgsl_cffdump_syncmem(NULL, &context->gpustate, context->gpustate.gpuaddr, LCC_SHADOW_SIZE + REG_SHADOW_SIZE + CMD_BUFFER_SIZE + TEX_SHADOW_SIZE, false); #endif if (context->flags & CTXT_FLAGS_GMEM_RESTORE) { adreno_ringbuffer_issuecmds(device, context, KGSL_CMD_FLAGS_PMODE, context->context_gmem_shadow.gmem_restore, 3); if (!(context->flags & CTXT_FLAGS_PREAMBLE)) { adreno_ringbuffer_issuecmds(device, context, KGSL_CMD_FLAGS_NONE, context->chicken_restore, 3); } context->flags &= ~CTXT_FLAGS_GMEM_RESTORE; } if (!(context->flags & CTXT_FLAGS_PREAMBLE)) { adreno_ringbuffer_issuecmds(device, context, KGSL_CMD_FLAGS_NONE, context->reg_restore, 3); if (context->flags & CTXT_FLAGS_SHADER_RESTORE) { adreno_ringbuffer_issuecmds(device, context, KGSL_CMD_FLAGS_NONE, context->shader_restore, 3); } } if (adreno_is_a20x(adreno_dev)) { cmds[0] = cp_type3_packet(CP_SET_BIN_BASE_OFFSET, 1); cmds[1] = context->bin_base_offset; adreno_ringbuffer_issuecmds(device, context, KGSL_CMD_FLAGS_NONE, cmds, 2); } }
static void build_shader_save_restore_cmds(struct adreno_device *adreno_dev, struct adreno_context *drawctxt) { unsigned int *cmd = tmp_ctx.cmd; unsigned int *save, *restore, *fixup; unsigned int *startSizeVtx, *startSizePix, *startSizeShared; unsigned int *partition1; unsigned int *shaderBases, *partition2; /* compute vertex, pixel and shared instruction shadow GPU addresses */ tmp_ctx.shader_vertex = drawctxt->gpustate.gpuaddr + SHADER_OFFSET; tmp_ctx.shader_pixel = tmp_ctx.shader_vertex + _shader_shadow_size(adreno_dev); tmp_ctx.shader_shared = tmp_ctx.shader_pixel + _shader_shadow_size(adreno_dev); /* restore shader partitioning and instructions */ restore = cmd; /* start address */ /* Invalidate Vertex & Pixel instruction code address and sizes */ *cmd++ = cp_type3_packet(CP_INVALIDATE_STATE, 1); *cmd++ = 0x00000300; /* 0x100 = Vertex, 0x200 = Pixel */ /* Restore previous shader vertex & pixel instruction bases. */ *cmd++ = cp_type3_packet(CP_SET_SHADER_BASES, 1); shaderBases = cmd++; /* TBD #5: shader bases (from fixup) */ /* write the shader partition information to a scratch register */ *cmd++ = cp_type0_packet(REG_SQ_INST_STORE_MANAGMENT, 1); partition1 = cmd++; /* TBD #4a: partition info (from save) */ /* load vertex shader instructions from the shadow. */ *cmd++ = cp_type3_packet(CP_IM_LOAD, 2); *cmd++ = tmp_ctx.shader_vertex + 0x0; /* 0x0 = Vertex */ startSizeVtx = cmd++; /* TBD #1: start/size (from save) */ /* load pixel shader instructions from the shadow. */ *cmd++ = cp_type3_packet(CP_IM_LOAD, 2); *cmd++ = tmp_ctx.shader_pixel + 0x1; /* 0x1 = Pixel */ startSizePix = cmd++; /* TBD #2: start/size (from save) */ /* load shared shader instructions from the shadow. */ *cmd++ = cp_type3_packet(CP_IM_LOAD, 2); *cmd++ = tmp_ctx.shader_shared + 0x2; /* 0x2 = Shared */ startSizeShared = cmd++; /* TBD #3: start/size (from save) */ /* create indirect buffer command for above command sequence */ create_ib1(drawctxt, drawctxt->shader_restore, restore, cmd); /* * fixup SET_SHADER_BASES data * * since self-modifying PM4 code is being used here, a seperate * command buffer is used for this fixup operation, to ensure the * commands are not read by the PM4 engine before the data fields * have been written. */ fixup = cmd; /* start address */ /* write the shader partition information to a scratch register */ *cmd++ = cp_type0_packet(REG_SCRATCH_REG2, 1); partition2 = cmd++; /* TBD #4b: partition info (from save) */ /* mask off unused bits, then OR with shader instruction memory size */ *cmd++ = cp_type3_packet(CP_REG_RMW, 3); *cmd++ = REG_SCRATCH_REG2; /* AND off invalid bits. */ *cmd++ = 0x0FFF0FFF; /* OR in instruction memory size. */ *cmd++ = adreno_encode_istore_size(adreno_dev); /* write the computed value to the SET_SHADER_BASES data field */ *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); *cmd++ = REG_SCRATCH_REG2; /* TBD #5: shader bases (to restore) */ *cmd++ = virt2gpu(shaderBases, &drawctxt->gpustate); /* create indirect buffer command for above command sequence */ create_ib1(drawctxt, drawctxt->shader_fixup, fixup, cmd); /* save shader partitioning and instructions */ save = cmd; /* start address */ *cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); *cmd++ = 0; /* fetch the SQ_INST_STORE_MANAGMENT register value, * store the value in the data fields of the SET_CONSTANT commands * above. */ *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); *cmd++ = REG_SQ_INST_STORE_MANAGMENT; /* TBD #4a: partition info (to restore) */ *cmd++ = virt2gpu(partition1, &drawctxt->gpustate); *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); *cmd++ = REG_SQ_INST_STORE_MANAGMENT; /* TBD #4b: partition info (to fixup) */ *cmd++ = virt2gpu(partition2, &drawctxt->gpustate); /* store the vertex shader instructions */ *cmd++ = cp_type3_packet(CP_IM_STORE, 2); *cmd++ = tmp_ctx.shader_vertex + 0x0; /* 0x0 = Vertex */ /* TBD #1: start/size (to restore) */ *cmd++ = virt2gpu(startSizeVtx, &drawctxt->gpustate); /* store the pixel shader instructions */ *cmd++ = cp_type3_packet(CP_IM_STORE, 2); *cmd++ = tmp_ctx.shader_pixel + 0x1; /* 0x1 = Pixel */ /* TBD #2: start/size (to restore) */ *cmd++ = virt2gpu(startSizePix, &drawctxt->gpustate); /* store the shared shader instructions if vertex base is nonzero */ *cmd++ = cp_type3_packet(CP_IM_STORE, 2); *cmd++ = tmp_ctx.shader_shared + 0x2; /* 0x2 = Shared */ /* TBD #3: start/size (to restore) */ *cmd++ = virt2gpu(startSizeShared, &drawctxt->gpustate); *cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); *cmd++ = 0; /* create indirect buffer command for above command sequence */ create_ib1(drawctxt, drawctxt->shader_save, save, cmd); tmp_ctx.cmd = cmd; }
static uint32_t adreno_ringbuffer_addcmds(struct adreno_ringbuffer *rb, struct adreno_context *context, unsigned int flags, unsigned int *cmds, int sizedwords) { struct adreno_device *adreno_dev = ADRENO_DEVICE(rb->device); unsigned int *ringcmds; unsigned int timestamp; unsigned int total_sizedwords = sizedwords; unsigned int i; unsigned int rcmd_gpu; unsigned int context_id = KGSL_MEMSTORE_GLOBAL; unsigned int gpuaddr = rb->device->memstore.gpuaddr; if (context->flags & CTXT_FLAGS_PER_CONTEXT_TS) context_id = context->id; total_sizedwords += flags & KGSL_CMD_FLAGS_PMODE ? 4 : 0; total_sizedwords += !(flags & KGSL_CMD_FLAGS_NO_TS_CMP) ? 7 : 0; total_sizedwords += 2; if (adreno_is_a3xx(adreno_dev)) total_sizedwords += 7; total_sizedwords += 2; if (context->flags & CTXT_FLAGS_PER_CONTEXT_TS) { total_sizedwords += 3; total_sizedwords += 4; total_sizedwords += 3; } else { total_sizedwords += 4; } ringcmds = adreno_ringbuffer_allocspace(rb, total_sizedwords); if (context->flags & CTXT_FLAGS_GPU_HANG) { KGSL_CTXT_WARN(rb->device, "Context %p caused a gpu hang. Will not accept commands for context %d\n", context, context->id); return rb->timestamp[context_id]; } rcmd_gpu = rb->buffer_desc.gpuaddr + sizeof(uint)*(rb->wptr-total_sizedwords); GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_nop_packet(1)); GSL_RB_WRITE(ringcmds, rcmd_gpu, KGSL_CMD_IDENTIFIER); if (flags & KGSL_CMD_FLAGS_PMODE) { GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_type3_packet(CP_SET_PROTECTED_MODE, 1)); GSL_RB_WRITE(ringcmds, rcmd_gpu, 0); } for (i = 0; i < sizedwords; i++) { GSL_RB_WRITE(ringcmds, rcmd_gpu, *cmds); cmds++; } if (flags & KGSL_CMD_FLAGS_PMODE) { GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_type3_packet(CP_SET_PROTECTED_MODE, 1)); GSL_RB_WRITE(ringcmds, rcmd_gpu, 1); } rb->timestamp[KGSL_MEMSTORE_GLOBAL]++; if (context) { if (context_id == KGSL_MEMSTORE_GLOBAL) rb->timestamp[context_id] = rb->timestamp[KGSL_MEMSTORE_GLOBAL]; else rb->timestamp[context_id]++; } timestamp = rb->timestamp[context_id]; GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_type0_packet(REG_CP_TIMESTAMP, 1)); GSL_RB_WRITE(ringcmds, rcmd_gpu, rb->timestamp[KGSL_MEMSTORE_GLOBAL]); if (adreno_is_a3xx(adreno_dev)) { GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_type3_packet(CP_EVENT_WRITE, 1)); GSL_RB_WRITE(ringcmds, rcmd_gpu, 0x07); GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_type3_packet(CP_WAIT_FOR_IDLE, 1)); GSL_RB_WRITE(ringcmds, rcmd_gpu, 0x00); } if (context->flags & CTXT_FLAGS_PER_CONTEXT_TS) { GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_type3_packet(CP_MEM_WRITE, 2)); GSL_RB_WRITE(ringcmds, rcmd_gpu, (gpuaddr + KGSL_MEMSTORE_OFFSET(context->id, soptimestamp))); GSL_RB_WRITE(ringcmds, rcmd_gpu, timestamp); GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_type3_packet(CP_EVENT_WRITE, 3)); GSL_RB_WRITE(ringcmds, rcmd_gpu, CACHE_FLUSH_TS); GSL_RB_WRITE(ringcmds, rcmd_gpu, (gpuaddr + KGSL_MEMSTORE_OFFSET(context->id, eoptimestamp))); GSL_RB_WRITE(ringcmds, rcmd_gpu, timestamp); GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_type3_packet(CP_MEM_WRITE, 2)); GSL_RB_WRITE(ringcmds, rcmd_gpu, (gpuaddr + KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL, eoptimestamp))); GSL_RB_WRITE(ringcmds, rcmd_gpu, rb->timestamp[KGSL_MEMSTORE_GLOBAL]); } else { GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_type3_packet(CP_EVENT_WRITE, 3)); GSL_RB_WRITE(ringcmds, rcmd_gpu, CACHE_FLUSH_TS); GSL_RB_WRITE(ringcmds, rcmd_gpu, (gpuaddr + KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL, eoptimestamp))); GSL_RB_WRITE(ringcmds, rcmd_gpu, rb->timestamp[KGSL_MEMSTORE_GLOBAL]); } if (!(flags & KGSL_CMD_FLAGS_NO_TS_CMP)) { GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_type3_packet(CP_COND_EXEC, 4)); GSL_RB_WRITE(ringcmds, rcmd_gpu, (gpuaddr + KGSL_MEMSTORE_OFFSET( context_id, ts_cmp_enable)) >> 2); GSL_RB_WRITE(ringcmds, rcmd_gpu, (gpuaddr + KGSL_MEMSTORE_OFFSET( context_id, ref_wait_ts)) >> 2); GSL_RB_WRITE(ringcmds, rcmd_gpu, timestamp); GSL_RB_WRITE(ringcmds, rcmd_gpu, 2); GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_type3_packet(CP_INTERRUPT, 1)); GSL_RB_WRITE(ringcmds, rcmd_gpu, CP_INT_CNTL__RB_INT_MASK); }
static uint32_t adreno_ringbuffer_addcmds(struct adreno_ringbuffer *rb, struct adreno_context *context, unsigned int flags, unsigned int *cmds, int sizedwords) { struct adreno_device *adreno_dev = ADRENO_DEVICE(rb->device); unsigned int *ringcmds; unsigned int timestamp; unsigned int total_sizedwords = sizedwords; unsigned int i; unsigned int rcmd_gpu; unsigned int context_id = KGSL_MEMSTORE_GLOBAL; unsigned int gpuaddr = rb->device->memstore.gpuaddr; /* * if the context was not created with per context timestamp * support, we must use the global timestamp since issueibcmds * will be returning that one. */ if (context->flags & CTXT_FLAGS_PER_CONTEXT_TS) context_id = context->id; /* reserve space to temporarily turn off protected mode * error checking if needed */ total_sizedwords += flags & KGSL_CMD_FLAGS_PMODE ? 4 : 0; total_sizedwords += !(flags & KGSL_CMD_FLAGS_NO_TS_CMP) ? 7 : 0; /* 2 dwords to store the start of command sequence */ total_sizedwords += 2; if (adreno_is_a2xx(adreno_dev)) total_sizedwords += 2; /* CP_WAIT_FOR_IDLE */ total_sizedwords += 2; /* scratchpad ts for recovery */ if (context->flags & CTXT_FLAGS_PER_CONTEXT_TS) { total_sizedwords += 3; /* sop timestamp */ total_sizedwords += 4; /* eop timestamp */ total_sizedwords += 3; /* global timestamp without cache * flush for non-zero context */ } else { total_sizedwords += 4; /* global timestamp for recovery*/ } ringcmds = adreno_ringbuffer_allocspace(rb, total_sizedwords); /* GPU may hang during space allocation, if thats the case the current * context may have hung the GPU */ if (context->flags & CTXT_FLAGS_GPU_HANG) { KGSL_CTXT_WARN(rb->device, "Context %p caused a gpu hang. Will not accept commands for context %d\n", context, context->id); return rb->timestamp[context_id]; } rcmd_gpu = rb->buffer_desc.gpuaddr + sizeof(uint)*(rb->wptr-total_sizedwords); GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_nop_packet(1)); GSL_RB_WRITE(ringcmds, rcmd_gpu, KGSL_CMD_IDENTIFIER); if (flags & KGSL_CMD_FLAGS_PMODE) { /* disable protected mode error checking */ GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_type3_packet(CP_SET_PROTECTED_MODE, 1)); GSL_RB_WRITE(ringcmds, rcmd_gpu, 0); } for (i = 0; i < sizedwords; i++) { GSL_RB_WRITE(ringcmds, rcmd_gpu, *cmds); cmds++; } if (flags & KGSL_CMD_FLAGS_PMODE) { /* re-enable protected mode error checking */ GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_type3_packet(CP_SET_PROTECTED_MODE, 1)); GSL_RB_WRITE(ringcmds, rcmd_gpu, 1); } /* always increment the global timestamp. once. */ rb->timestamp[KGSL_MEMSTORE_GLOBAL]++; if (context) { if (context_id == KGSL_MEMSTORE_GLOBAL) rb->timestamp[context_id] = rb->timestamp[KGSL_MEMSTORE_GLOBAL]; else rb->timestamp[context_id]++; } timestamp = rb->timestamp[context_id]; /* HW Workaround for MMU Page fault * due to memory getting free early before * GPU completes it. */ if (adreno_is_a2xx(adreno_dev)) { GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_type3_packet(CP_WAIT_FOR_IDLE, 1)); GSL_RB_WRITE(ringcmds, rcmd_gpu, 0x00); } /* scratchpad ts for recovery */ GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_type0_packet(REG_CP_TIMESTAMP, 1)); GSL_RB_WRITE(ringcmds, rcmd_gpu, rb->timestamp[KGSL_MEMSTORE_GLOBAL]); if (context->flags & CTXT_FLAGS_PER_CONTEXT_TS) { /* start-of-pipeline timestamp */ GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_type3_packet(CP_MEM_WRITE, 2)); GSL_RB_WRITE(ringcmds, rcmd_gpu, (gpuaddr + KGSL_MEMSTORE_OFFSET(context->id, soptimestamp))); GSL_RB_WRITE(ringcmds, rcmd_gpu, timestamp); /* end-of-pipeline timestamp */ GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_type3_packet(CP_EVENT_WRITE, 3)); GSL_RB_WRITE(ringcmds, rcmd_gpu, CACHE_FLUSH_TS); GSL_RB_WRITE(ringcmds, rcmd_gpu, (gpuaddr + KGSL_MEMSTORE_OFFSET(context->id, eoptimestamp))); GSL_RB_WRITE(ringcmds, rcmd_gpu, timestamp); GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_type3_packet(CP_MEM_WRITE, 2)); GSL_RB_WRITE(ringcmds, rcmd_gpu, (gpuaddr + KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL, eoptimestamp))); GSL_RB_WRITE(ringcmds, rcmd_gpu, rb->timestamp[KGSL_MEMSTORE_GLOBAL]); } else { GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_type3_packet(CP_EVENT_WRITE, 3)); GSL_RB_WRITE(ringcmds, rcmd_gpu, CACHE_FLUSH_TS); GSL_RB_WRITE(ringcmds, rcmd_gpu, (gpuaddr + KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL, eoptimestamp))); GSL_RB_WRITE(ringcmds, rcmd_gpu, rb->timestamp[KGSL_MEMSTORE_GLOBAL]); } if (!(flags & KGSL_CMD_FLAGS_NO_TS_CMP)) { /* Conditional execution based on memory values */ GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_type3_packet(CP_COND_EXEC, 4)); GSL_RB_WRITE(ringcmds, rcmd_gpu, (gpuaddr + KGSL_MEMSTORE_OFFSET( context_id, ts_cmp_enable)) >> 2); GSL_RB_WRITE(ringcmds, rcmd_gpu, (gpuaddr + KGSL_MEMSTORE_OFFSET( context_id, ref_wait_ts)) >> 2); GSL_RB_WRITE(ringcmds, rcmd_gpu, timestamp); /* # of conditional command DWORDs */ GSL_RB_WRITE(ringcmds, rcmd_gpu, 2); GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_type3_packet(CP_INTERRUPT, 1)); GSL_RB_WRITE(ringcmds, rcmd_gpu, CP_INT_CNTL__RB_INT_MASK); }
/** * _ringbuffer_bootstrap_ucode() - Bootstrap GPU Ucode * @rb: Pointer to adreno ringbuffer * @load_jt: If non zero only load Jump tables * * Bootstrap ucode for GPU * load_jt == 0, bootstrap full microcode * load_jt == 1, bootstrap jump tables of microcode * * For example a bootstrap packet would like below * Setup a type3 bootstrap packet * PFP size to bootstrap * PFP addr to write the PFP data * PM4 size to bootstrap * PM4 addr to write the PM4 data * PFP dwords from microcode to bootstrap * PM4 size dwords from microcode to bootstrap */ static int _ringbuffer_bootstrap_ucode(struct adreno_ringbuffer *rb, unsigned int load_jt) { unsigned int *cmds, bootstrap_size, rb_size; int i = 0; int ret; struct kgsl_device *device = rb->device; struct adreno_device *adreno_dev = ADRENO_DEVICE(device); unsigned int pm4_size, pm4_idx, pm4_addr, pfp_size, pfp_idx, pfp_addr; /* Only bootstrap jump tables of ucode */ if (load_jt) { pm4_idx = adreno_dev->gpucore->pm4_jt_idx; pm4_addr = adreno_dev->gpucore->pm4_jt_addr; pfp_idx = adreno_dev->gpucore->pfp_jt_idx; pfp_addr = adreno_dev->gpucore->pfp_jt_addr; } else { /* Bootstrap full ucode */ pm4_idx = 1; pm4_addr = 0; pfp_idx = 1; pfp_addr = 0; } pm4_size = (adreno_dev->pm4_fw_size - pm4_idx); pfp_size = (adreno_dev->pfp_fw_size - pfp_idx); bootstrap_size = (pm4_size + pfp_size + 5); /* * Overwrite the first entry in the jump table with the special * bootstrap opcode */ if (adreno_is_a4xx(adreno_dev)) { adreno_writereg(adreno_dev, ADRENO_REG_CP_PFP_UCODE_ADDR, 0x400); adreno_writereg(adreno_dev, ADRENO_REG_CP_PFP_UCODE_DATA, 0x6f0009); /* * The support packets (the RMW and INTERRUPT) that are sent * after the bootstrap packet should not be included in the size * of the bootstrap packet but we do need to reserve enough * space for those too */ rb_size = bootstrap_size + 6; } else { adreno_writereg(adreno_dev, ADRENO_REG_CP_PFP_UCODE_ADDR, 0x200); adreno_writereg(adreno_dev, ADRENO_REG_CP_PFP_UCODE_DATA, 0x6f0005); rb_size = bootstrap_size; } /* clear ME_HALT to start micro engine */ adreno_writereg(adreno_dev, ADRENO_REG_CP_ME_CNTL, 0); cmds = adreno_ringbuffer_allocspace(rb, rb_size); if (IS_ERR(cmds)) return PTR_ERR(cmds); if (cmds == NULL) return -ENOSPC; /* Construct the packet that bootsraps the ucode */ *cmds++ = cp_type3_packet(CP_BOOTSTRAP_UCODE, (bootstrap_size - 1)); *cmds++ = pfp_size; *cmds++ = pfp_addr; *cmds++ = pm4_size; *cmds++ = pm4_addr; /** * Theory of operation: * * In A4x, we cannot have the PFP executing instructions while its instruction * RAM is loading. We load the PFP's instruction RAM using type-0 writes * from the ME. * * To make sure the PFP is not fetching instructions at the same time, * we put it in a one-instruction loop: * mvc (ME), (ringbuffer) * which executes repeatedly until all of the data has been moved from * the ring buffer to the ME. */ if (adreno_is_a4xx(adreno_dev)) { for (i = pm4_idx; i < adreno_dev->pm4_fw_size; i++) *cmds++ = adreno_dev->pm4_fw[i]; for (i = pfp_idx; i < adreno_dev->pfp_fw_size; i++) *cmds++ = adreno_dev->pfp_fw[i]; *cmds++ = cp_type3_packet(CP_REG_RMW, 3); *cmds++ = 0x20000000 + A4XX_CP_RB_WPTR; *cmds++ = 0xffffffff; *cmds++ = 0x00000002; *cmds++ = cp_type3_packet(CP_INTERRUPT, 1); *cmds++ = 0; rb->wptr = rb->wptr - 2; adreno_ringbuffer_submit(rb, NULL); rb->wptr = rb->wptr + 2; } else { for (i = pfp_idx; i < adreno_dev->pfp_fw_size; i++) *cmds++ = adreno_dev->pfp_fw[i]; for (i = pm4_idx; i < adreno_dev->pm4_fw_size; i++) *cmds++ = adreno_dev->pm4_fw[i]; adreno_ringbuffer_submit(rb, NULL); } /* idle device to validate bootstrap */ ret = adreno_spin_idle(device); if (ret) { KGSL_DRV_ERR(rb->device, "microcode bootstrap failed to idle\n"); kgsl_device_snapshot(device, NULL); } /* Clear the chicken bit for speed up on A430 and its derivatives */ if (!adreno_is_a420(adreno_dev)) kgsl_regwrite(device, A4XX_CP_DEBUG, A4XX_CP_DEBUG_DEFAULT & ~(1 << 14)); return ret; }
static void build_regrestore_cmds(struct adreno_device *adreno_dev, struct adreno_context *drawctxt) { unsigned int *start = tmp_ctx.cmd; unsigned int *cmd = start; unsigned int i = 0; unsigned int reg_array_size = 0; const unsigned int *ptr_register_ranges; *cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); *cmd++ = 0; cmd++; #ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES *cmd++ = ((drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000) | 1; #else *cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000; #endif if (adreno_is_a220(adreno_dev)) { ptr_register_ranges = register_ranges_a220; reg_array_size = ARRAY_SIZE(register_ranges_a220); } else if (adreno_is_a225(adreno_dev)) { ptr_register_ranges = register_ranges_a225; reg_array_size = ARRAY_SIZE(register_ranges_a225); } else { ptr_register_ranges = register_ranges_a20x; reg_array_size = ARRAY_SIZE(register_ranges_a20x); } for (i = 0; i < (reg_array_size/2); i++) { cmd = reg_range(cmd, ptr_register_ranges[i*2], ptr_register_ranges[i*2+1]); } start[2] = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, (cmd - start) - 3); #ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES start[4] |= (0 << 24) | (4 << 16); #else start[4] |= (1 << 24) | (4 << 16); #endif *cmd++ = cp_type0_packet(REG_SQ_GPR_MANAGEMENT, 1); tmp_ctx.reg_values[0] = virt2gpu(cmd, &drawctxt->gpustate); *cmd++ = 0x00040400; *cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); *cmd++ = 0; *cmd++ = cp_type0_packet(REG_TP0_CHICKEN, 1); tmp_ctx.reg_values[1] = virt2gpu(cmd, &drawctxt->gpustate); *cmd++ = 0x00000000; if (adreno_is_a20x(adreno_dev)) { *cmd++ = cp_type0_packet(REG_RB_BC_CONTROL, 1); tmp_ctx.reg_values[2] = virt2gpu(cmd, &drawctxt->gpustate); *cmd++ = 0x00000000; } if (adreno_is_a22x(adreno_dev)) { unsigned int i; unsigned int j = 2; for (i = REG_A220_VSC_BIN_SIZE; i <= REG_A220_VSC_PIPE_DATA_LENGTH_7; i++) { *cmd++ = cp_type0_packet(i, 1); tmp_ctx.reg_values[j] = virt2gpu(cmd, &drawctxt->gpustate); *cmd++ = 0x00000000; j++; } } *cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3); *cmd++ = drawctxt->gpustate.gpuaddr & 0xFFFFE000; #ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES *cmd++ = (0 << 24) | (0 << 16) | 0; #else *cmd++ = (1 << 24) | (0 << 16) | 0; #endif *cmd++ = ALU_CONSTANTS; *cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3); *cmd++ = (drawctxt->gpustate.gpuaddr + TEX_OFFSET) & 0xFFFFE000; #ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES *cmd++ = (0 << 24) | (1 << 16) | 0; #else *cmd++ = (1 << 24) | (1 << 16) | 0; #endif *cmd++ = TEX_CONSTANTS; *cmd++ = cp_type3_packet(CP_SET_CONSTANT, 1 + BOOL_CONSTANTS); *cmd++ = (2 << 16) | 0; tmp_ctx.bool_shadow = virt2gpu(cmd, &drawctxt->gpustate); cmd += BOOL_CONSTANTS; *cmd++ = cp_type3_packet(CP_SET_CONSTANT, 1 + LOOP_CONSTANTS); *cmd++ = (3 << 16) | 0; tmp_ctx.loop_shadow = virt2gpu(cmd, &drawctxt->gpustate); cmd += LOOP_CONSTANTS; create_ib1(drawctxt, drawctxt->reg_restore, start, cmd); tmp_ctx.cmd = cmd; }
static unsigned int *build_sys2gmem_cmds(struct adreno_device *adreno_dev, struct adreno_context *drawctxt, struct gmem_shadow_t *shadow) { unsigned int *cmds = shadow->gmem_restore_commands; unsigned int *start = cmds; if (!(drawctxt->flags & CTXT_FLAGS_PREAMBLE)) { *cmds++ = cp_type3_packet(CP_REG_TO_MEM, 2); *cmds++ = REG_TP0_CHICKEN; *cmds++ = tmp_ctx.chicken_restore; *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); *cmds++ = 0; } *cmds++ = cp_type0_packet(REG_TP0_CHICKEN, 1); *cmds++ = 0x00000000; *cmds++ = cp_type0_packet(REG_PA_SC_AA_CONFIG, 1); *cmds++ = 0x00000000; *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 7); *cmds++ = (0x1 << 16) | (9 * 6); *cmds++ = shadow->quad_vertices.gpuaddr | 0x3; *cmds++ = 0x00000030; *cmds++ = shadow->quad_texcoords.gpuaddr | 0x3; *cmds++ = 0x00000020; *cmds++ = 0; *cmds++ = 0; *cmds++ = cp_type0_packet(REG_TC_CNTL_STATUS, 1); *cmds++ = 0x1; cmds = program_shader(cmds, 0, sys2gmem_vtx_pgm, SYS2GMEM_VTX_PGM_LEN); *cmds++ = cp_type0_packet(REG_SQ_INST_STORE_MANAGMENT, 1); *cmds++ = adreno_dev->pix_shader_start; *cmds++ = cp_type3_packet(CP_INVALIDATE_STATE, 1); *cmds++ = 0x00000300; *cmds++ = cp_type3_packet(CP_SET_SHADER_BASES, 1); *cmds++ = adreno_encode_istore_size(adreno_dev) | adreno_dev->pix_shader_start; cmds = program_shader(cmds, 1, sys2gmem_frag_pgm, SYS2GMEM_FRAG_PGM_LEN); *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); *cmds++ = CP_REG(REG_SQ_PROGRAM_CNTL); *cmds++ = 0x10030002; *cmds++ = 0x00000008; *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); *cmds++ = CP_REG(REG_PA_SC_AA_MASK); *cmds++ = 0x0000ffff; if (!adreno_is_a22x(adreno_dev)) { *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); *cmds++ = CP_REG(REG_PA_SC_VIZ_QUERY); *cmds++ = 0x0; } *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); *cmds++ = CP_REG(REG_RB_COLORCONTROL); *cmds++ = 0x00000c20; *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 4); *cmds++ = CP_REG(REG_VGT_MAX_VTX_INDX); *cmds++ = 0x00ffffff; *cmds++ = 0x0; *cmds++ = 0x00000000; *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); *cmds++ = CP_REG(REG_VGT_VERTEX_REUSE_BLOCK_CNTL); *cmds++ = 0x00000002; *cmds++ = 0x00000002; *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); *cmds++ = CP_REG(REG_SQ_INTERPOLATOR_CNTL); *cmds++ = 0xffffffff; *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); *cmds++ = CP_REG(REG_PA_SC_AA_CONFIG); *cmds++ = 0x00000000; *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); *cmds++ = CP_REG(REG_PA_SU_SC_MODE_CNTL); *cmds++ = 0x00080240; *cmds++ = cp_type3_packet(CP_SET_CONSTANT, (SYS2GMEM_TEX_CONST_LEN + 1)); *cmds++ = (0x1 << 16) | (0 * 6); memcpy(cmds, sys2gmem_tex_const, SYS2GMEM_TEX_CONST_LEN << 2); cmds[0] |= (shadow->pitch >> 5) << 22; cmds[1] |= shadow->gmemshadow.gpuaddr | surface_format_table[shadow->format]; cmds[2] |= (shadow->width - 1) | (shadow->height - 1) << 13; cmds += SYS2GMEM_TEX_CONST_LEN; *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); *cmds++ = CP_REG(REG_RB_SURFACE_INFO); *cmds++ = shadow->gmem_pitch; *cmds++ = (shadow-> format << RB_COLOR_INFO__COLOR_FORMAT__SHIFT) | tmp_ctx.gmem_base; *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); *cmds++ = CP_REG(REG_RB_DEPTHCONTROL); if (adreno_is_a22x(adreno_dev)) *cmds++ = 8; else *cmds++ = 0; *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); *cmds++ = CP_REG(REG_PA_SC_SCREEN_SCISSOR_TL); *cmds++ = (0 << 16) | 0; *cmds++ = ((0x1fff) << 16) | 0x1fff; *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); *cmds++ = CP_REG(REG_PA_SC_WINDOW_SCISSOR_TL); *cmds++ = (unsigned int)((1U << 31) | (0 << 16) | 0); *cmds++ = ((0x1fff) << 16) | 0x1fff; *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); *cmds++ = CP_REG(REG_PA_CL_VTE_CNTL); *cmds++ = 0x00000b00; *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); *cmds++ = CP_REG(REG_PA_CL_VPORT_ZSCALE); *cmds++ = 0xbf800000; *cmds++ = 0x0; *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); *cmds++ = CP_REG(REG_RB_COLOR_MASK); *cmds++ = 0x0000000f; *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); *cmds++ = CP_REG(REG_RB_COLOR_DEST_MASK); *cmds++ = 0xffffffff; *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); *cmds++ = CP_REG(REG_SQ_WRAPPING_0); *cmds++ = 0x00000000; *cmds++ = 0x00000000; *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); *cmds++ = CP_REG(REG_RB_MODECONTROL); *cmds++ = 0x4; *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); *cmds++ = CP_REG(REG_PA_CL_CLIP_CNTL); *cmds++ = 0x00010000; if (adreno_is_a22x(adreno_dev)) { *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); *cmds++ = CP_REG(REG_A220_RB_LRZ_VSC_CONTROL); *cmds++ = 0x0000000; *cmds++ = cp_type3_packet(CP_DRAW_INDX, 3); *cmds++ = 0; *cmds++ = 0x00004088; *cmds++ = 3; } else { *cmds++ = cp_type3_packet(CP_DRAW_INDX, 2); *cmds++ = 0; *cmds++ = 0x00030088; } create_ib1(drawctxt, shadow->gmem_restore, start, cmds); return cmds; }
static unsigned int *build_gmem2sys_cmds(struct adreno_device *adreno_dev, struct adreno_context *drawctxt, struct gmem_shadow_t *shadow) { unsigned int *cmds = shadow->gmem_save_commands; unsigned int *start = cmds; unsigned int bytesperpixel = format2bytesperpixel[shadow->format]; unsigned int addr = shadow->gmemshadow.gpuaddr; unsigned int offset = (addr - (addr & 0xfffff000)) / bytesperpixel; if (!(drawctxt->flags & CTXT_FLAGS_PREAMBLE)) { *cmds++ = cp_type3_packet(CP_REG_TO_MEM, 2); *cmds++ = REG_TP0_CHICKEN; *cmds++ = tmp_ctx.chicken_restore; *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); *cmds++ = 0; } *cmds++ = cp_type0_packet(REG_TP0_CHICKEN, 1); *cmds++ = 0x00000000; *cmds++ = cp_type0_packet(REG_PA_SC_AA_CONFIG, 1); *cmds++ = 0x00000000; *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 4); *cmds++ = (0x1 << 16) | SHADER_CONST_ADDR; *cmds++ = 0; *cmds++ = shadow->quad_vertices.gpuaddr | 0x3; *cmds++ = 0x00000030; *cmds++ = cp_type0_packet(REG_TC_CNTL_STATUS, 1); *cmds++ = 0x1; *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 4); *cmds++ = CP_REG(REG_VGT_MAX_VTX_INDX); *cmds++ = 0x00ffffff; *cmds++ = 0x0; *cmds++ = 0x00000000; *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); *cmds++ = CP_REG(REG_PA_SC_AA_MASK); *cmds++ = 0x0000ffff; *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); *cmds++ = CP_REG(REG_RB_COLORCONTROL); *cmds++ = 0x00000c20; *cmds++ = cp_type0_packet(REG_SQ_INST_STORE_MANAGMENT, 1); *cmds++ = adreno_dev->pix_shader_start; *cmds++ = cp_type3_packet(CP_INVALIDATE_STATE, 1); *cmds++ = 0x00003F00; *cmds++ = cp_type3_packet(CP_SET_SHADER_BASES, 1); *cmds++ = adreno_encode_istore_size(adreno_dev) | adreno_dev->pix_shader_start; cmds = program_shader(cmds, 0, gmem2sys_vtx_pgm, GMEM2SYS_VTX_PGM_LEN); cmds = program_shader(cmds, 1, gmem2sys_frag_pgm, GMEM2SYS_FRAG_PGM_LEN); *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); *cmds++ = CP_REG(REG_SQ_PROGRAM_CNTL); if (adreno_is_a22x(adreno_dev)) *cmds++ = 0x10018001; else *cmds++ = 0x10010001; *cmds++ = 0x00000008; *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); *cmds++ = CP_REG(REG_PA_CL_VTE_CNTL); *cmds++ = 0x00000b00; *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); *cmds++ = CP_REG(REG_RB_SURFACE_INFO); *cmds++ = shadow->gmem_pitch; BUG_ON(tmp_ctx.gmem_base & 0xFFF); *cmds++ = (shadow-> format << RB_COLOR_INFO__COLOR_FORMAT__SHIFT) | tmp_ctx.gmem_base; *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); *cmds++ = CP_REG(REG_RB_DEPTHCONTROL); if (adreno_is_a22x(adreno_dev)) *cmds++ = 0x08; else *cmds++ = 0; *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); *cmds++ = CP_REG(REG_PA_SU_SC_MODE_CNTL); *cmds++ = 0x00080240; *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); *cmds++ = CP_REG(REG_PA_SC_SCREEN_SCISSOR_TL); *cmds++ = (0 << 16) | 0; *cmds++ = (0x1fff << 16) | (0x1fff); *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); *cmds++ = CP_REG(REG_PA_SC_WINDOW_SCISSOR_TL); *cmds++ = (unsigned int)((1U << 31) | (0 << 16) | 0); *cmds++ = (0x1fff << 16) | (0x1fff); *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); *cmds++ = CP_REG(REG_PA_CL_VPORT_ZSCALE); *cmds++ = 0xbf800000; *cmds++ = 0x0; *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); *cmds++ = CP_REG(REG_RB_COLOR_MASK); *cmds++ = 0x0000000f; *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); *cmds++ = CP_REG(REG_RB_COLOR_DEST_MASK); *cmds++ = 0xffffffff; *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3); *cmds++ = CP_REG(REG_SQ_WRAPPING_0); *cmds++ = 0x00000000; *cmds++ = 0x00000000; *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 6); *cmds++ = CP_REG(REG_RB_COPY_CONTROL); *cmds++ = 0; *cmds++ = addr & 0xfffff000; *cmds++ = shadow->pitch >> 5; *cmds++ = 0x0003c008 | (shadow->format << RB_COPY_DEST_INFO__COPY_DEST_FORMAT__SHIFT); BUG_ON(offset & 0xfffff000); *cmds++ = offset; *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); *cmds++ = CP_REG(REG_RB_MODECONTROL); *cmds++ = 0x6; *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); *cmds++ = CP_REG(REG_PA_CL_CLIP_CNTL); *cmds++ = 0x00010000; if (adreno_is_a22x(adreno_dev)) { *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); *cmds++ = CP_REG(REG_A220_RB_LRZ_VSC_CONTROL); *cmds++ = 0x0000000; *cmds++ = cp_type3_packet(CP_DRAW_INDX, 3); *cmds++ = 0; *cmds++ = 0x00004088; *cmds++ = 3; } else { *cmds++ = cp_type3_packet(CP_DRAW_INDX, 2); *cmds++ = 0; *cmds++ = 0x00030088; } create_ib1(drawctxt, shadow->gmem_save, start, cmds); return cmds; }
static void build_regsave_cmds(struct adreno_device *adreno_dev, struct adreno_context *drawctxt) { unsigned int *start = tmp_ctx.cmd; unsigned int *cmd = start; *cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); *cmd++ = 0; #ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES *cmd++ = cp_type3_packet(CP_CONTEXT_UPDATE, 1); *cmd++ = 0; { unsigned int i = 0; unsigned int reg_array_size = 0; const unsigned int *ptr_register_ranges; if (adreno_is_a220(adreno_dev)) { ptr_register_ranges = register_ranges_a220; reg_array_size = ARRAY_SIZE(register_ranges_a220); } else if (adreno_is_a225(adreno_dev)) { ptr_register_ranges = register_ranges_a225; reg_array_size = ARRAY_SIZE(register_ranges_a225); } else { ptr_register_ranges = register_ranges_a20x; reg_array_size = ARRAY_SIZE(register_ranges_a20x); } for (i = 0; i < (reg_array_size/2) ; i++) { build_reg_to_mem_range(ptr_register_ranges[i*2], ptr_register_ranges[i*2+1], &cmd, drawctxt); } } cmd = reg_to_mem(cmd, (drawctxt->gpustate.gpuaddr) & 0xFFFFE000, REG_SQ_CONSTANT_0, ALU_CONSTANTS); cmd = reg_to_mem(cmd, (drawctxt->gpustate.gpuaddr + TEX_OFFSET) & 0xFFFFE000, REG_SQ_FETCH_0, TEX_CONSTANTS); #else *cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); *cmd++ = 0; *cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3); *cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000; *cmd++ = 4 << 16; *cmd++ = 0x0; *cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3); *cmd++ = drawctxt->gpustate.gpuaddr & 0xFFFFE000; *cmd++ = 0 << 16; *cmd++ = 0x0; *cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3); *cmd++ = (drawctxt->gpustate.gpuaddr + TEX_OFFSET) & 0xFFFFE000; *cmd++ = 1 << 16; *cmd++ = 0x0; #endif *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); *cmd++ = REG_SQ_GPR_MANAGEMENT; *cmd++ = tmp_ctx.reg_values[0]; *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); *cmd++ = REG_TP0_CHICKEN; *cmd++ = tmp_ctx.reg_values[1]; if (adreno_is_a20x(adreno_dev)) { *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); *cmd++ = REG_RB_BC_CONTROL; *cmd++ = tmp_ctx.reg_values[2]; } if (adreno_is_a22x(adreno_dev)) { unsigned int i; unsigned int j = 2; for (i = REG_A220_VSC_BIN_SIZE; i <= REG_A220_VSC_PIPE_DATA_LENGTH_7; i++) { *cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2); *cmd++ = i; *cmd++ = tmp_ctx.reg_values[j]; j++; } } cmd = reg_to_mem(cmd, tmp_ctx.bool_shadow, REG_SQ_CF_BOOLEANS, BOOL_CONSTANTS); cmd = reg_to_mem(cmd, tmp_ctx.loop_shadow, REG_SQ_CF_LOOP, LOOP_CONSTANTS); create_ib1(drawctxt, drawctxt->reg_save, start, cmd); tmp_ctx.cmd = cmd; }
static uint32_t adreno_ringbuffer_addcmds(struct adreno_ringbuffer *rb, struct adreno_context *context, unsigned int flags, unsigned int *cmds, int sizedwords) { struct adreno_device *adreno_dev = ADRENO_DEVICE(rb->device); unsigned int *ringcmds; unsigned int timestamp; unsigned int total_sizedwords = sizedwords; unsigned int i; unsigned int rcmd_gpu; unsigned int context_id = KGSL_MEMSTORE_GLOBAL; unsigned int gpuaddr = rb->device->memstore.gpuaddr; if (context != NULL) { /* * if the context was not created with per context timestamp * support, we must use the global timestamp since issueibcmds * will be returning that one. */ if (context->flags & CTXT_FLAGS_PER_CONTEXT_TS) context_id = context->id; } /* reserve space to temporarily turn off protected mode * error checking if needed */ total_sizedwords += flags & KGSL_CMD_FLAGS_PMODE ? 4 : 0; total_sizedwords += !(flags & KGSL_CMD_FLAGS_NO_TS_CMP) ? 7 : 0; total_sizedwords += !(flags & KGSL_CMD_FLAGS_NOT_KERNEL_CMD) ? 2 : 0; if (adreno_is_a3xx(adreno_dev)) total_sizedwords += 7; total_sizedwords += 2; /* scratchpad ts for recovery */ if (context) { total_sizedwords += 3; /* sop timestamp */ total_sizedwords += 4; /* eop timestamp */ total_sizedwords += 3; /* global timestamp without cache * flush for non-zero context */ } else { total_sizedwords += 4; /* global timestamp for recovery*/ } ringcmds = adreno_ringbuffer_allocspace(rb, total_sizedwords); rcmd_gpu = rb->buffer_desc.gpuaddr + sizeof(uint)*(rb->wptr-total_sizedwords); if (!(flags & KGSL_CMD_FLAGS_NOT_KERNEL_CMD)) { GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_nop_packet(1)); GSL_RB_WRITE(ringcmds, rcmd_gpu, KGSL_CMD_IDENTIFIER); } if (flags & KGSL_CMD_FLAGS_PMODE) { /* disable protected mode error checking */ GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_type3_packet(CP_SET_PROTECTED_MODE, 1)); GSL_RB_WRITE(ringcmds, rcmd_gpu, 0); } for (i = 0; i < sizedwords; i++) { GSL_RB_WRITE(ringcmds, rcmd_gpu, *cmds); cmds++; } if (flags & KGSL_CMD_FLAGS_PMODE) { /* re-enable protected mode error checking */ GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_type3_packet(CP_SET_PROTECTED_MODE, 1)); GSL_RB_WRITE(ringcmds, rcmd_gpu, 1); } /* always increment the global timestamp. once. */ rb->timestamp[KGSL_MEMSTORE_GLOBAL]++; if (context) { if (context_id == KGSL_MEMSTORE_GLOBAL) rb->timestamp[context_id] = rb->timestamp[KGSL_MEMSTORE_GLOBAL]; else rb->timestamp[context_id]++; } timestamp = rb->timestamp[context_id]; /* scratchpad ts for recovery */ GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_type0_packet(REG_CP_TIMESTAMP, 1)); GSL_RB_WRITE(ringcmds, rcmd_gpu, rb->timestamp[KGSL_MEMSTORE_GLOBAL]); if (adreno_is_a3xx(adreno_dev)) { /* * FLush HLSQ lazy updates to make sure there are no * rsources pending for indirect loads after the timestamp */ GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_type3_packet(CP_EVENT_WRITE, 1)); GSL_RB_WRITE(ringcmds, rcmd_gpu, 0x07); /* HLSQ_FLUSH */ GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_type3_packet(CP_WAIT_FOR_IDLE, 1)); GSL_RB_WRITE(ringcmds, rcmd_gpu, 0x00); } if (context) { /* start-of-pipeline timestamp */ GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_type3_packet(CP_MEM_WRITE, 2)); GSL_RB_WRITE(ringcmds, rcmd_gpu, (gpuaddr + KGSL_MEMSTORE_OFFSET(context->id, soptimestamp))); GSL_RB_WRITE(ringcmds, rcmd_gpu, timestamp); /* end-of-pipeline timestamp */ GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_type3_packet(CP_EVENT_WRITE, 3)); GSL_RB_WRITE(ringcmds, rcmd_gpu, CACHE_FLUSH_TS); GSL_RB_WRITE(ringcmds, rcmd_gpu, (gpuaddr + KGSL_MEMSTORE_OFFSET(context->id, eoptimestamp))); GSL_RB_WRITE(ringcmds, rcmd_gpu, timestamp); GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_type3_packet(CP_MEM_WRITE, 2)); GSL_RB_WRITE(ringcmds, rcmd_gpu, (gpuaddr + KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL, eoptimestamp))); GSL_RB_WRITE(ringcmds, rcmd_gpu, rb->timestamp[KGSL_MEMSTORE_GLOBAL]); } else { GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_type3_packet(CP_EVENT_WRITE, 3)); GSL_RB_WRITE(ringcmds, rcmd_gpu, CACHE_FLUSH_TS); GSL_RB_WRITE(ringcmds, rcmd_gpu, (gpuaddr + KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL, eoptimestamp))); GSL_RB_WRITE(ringcmds, rcmd_gpu, rb->timestamp[KGSL_MEMSTORE_GLOBAL]); } if (!(flags & KGSL_CMD_FLAGS_NO_TS_CMP)) { /* Conditional execution based on memory values */ GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_type3_packet(CP_COND_EXEC, 4)); GSL_RB_WRITE(ringcmds, rcmd_gpu, (gpuaddr + KGSL_MEMSTORE_OFFSET( context_id, ts_cmp_enable)) >> 2); GSL_RB_WRITE(ringcmds, rcmd_gpu, (gpuaddr + KGSL_MEMSTORE_OFFSET( context_id, ref_wait_ts)) >> 2); GSL_RB_WRITE(ringcmds, rcmd_gpu, timestamp); /* # of conditional command DWORDs */ GSL_RB_WRITE(ringcmds, rcmd_gpu, 2); GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_type3_packet(CP_INTERRUPT, 1)); GSL_RB_WRITE(ringcmds, rcmd_gpu, CP_INT_CNTL__RB_INT_MASK); }
static uint32_t adreno_ringbuffer_addcmds(struct adreno_ringbuffer *rb, unsigned int flags, unsigned int *cmds, int sizedwords) { unsigned int *ringcmds; unsigned int timestamp; unsigned int total_sizedwords = sizedwords + 6; unsigned int i; unsigned int rcmd_gpu; /* reserve space to temporarily turn off protected mode * error checking if needed */ total_sizedwords += flags & KGSL_CMD_FLAGS_PMODE ? 4 : 0; total_sizedwords += !(flags & KGSL_CMD_FLAGS_NO_TS_CMP) ? 7 : 0; total_sizedwords += !(flags & KGSL_CMD_FLAGS_NOT_KERNEL_CMD) ? 2 : 0; ringcmds = adreno_ringbuffer_allocspace(rb, total_sizedwords); rcmd_gpu = rb->buffer_desc.gpuaddr + sizeof(uint)*(rb->wptr-total_sizedwords); if (!(flags & KGSL_CMD_FLAGS_NOT_KERNEL_CMD)) { GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_nop_packet(1)); GSL_RB_WRITE(ringcmds, rcmd_gpu, KGSL_CMD_IDENTIFIER); } if (flags & KGSL_CMD_FLAGS_PMODE) { /* disable protected mode error checking */ GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_type3_packet(CP_SET_PROTECTED_MODE, 1)); GSL_RB_WRITE(ringcmds, rcmd_gpu, 0); } for (i = 0; i < sizedwords; i++) { GSL_RB_WRITE(ringcmds, rcmd_gpu, *cmds); cmds++; } if (flags & KGSL_CMD_FLAGS_PMODE) { /* re-enable protected mode error checking */ GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_type3_packet(CP_SET_PROTECTED_MODE, 1)); GSL_RB_WRITE(ringcmds, rcmd_gpu, 1); } rb->timestamp++; timestamp = rb->timestamp; /* start-of-pipeline and end-of-pipeline timestamps */ GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_type0_packet(REG_CP_TIMESTAMP, 1)); GSL_RB_WRITE(ringcmds, rcmd_gpu, rb->timestamp); GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_type3_packet(CP_EVENT_WRITE, 3)); GSL_RB_WRITE(ringcmds, rcmd_gpu, CACHE_FLUSH_TS); GSL_RB_WRITE(ringcmds, rcmd_gpu, (rb->device->memstore.gpuaddr + KGSL_DEVICE_MEMSTORE_OFFSET(eoptimestamp))); GSL_RB_WRITE(ringcmds, rcmd_gpu, rb->timestamp); if (!(flags & KGSL_CMD_FLAGS_NO_TS_CMP)) { /* Conditional execution based on memory values */ GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_type3_packet(CP_COND_EXEC, 4)); GSL_RB_WRITE(ringcmds, rcmd_gpu, (rb->device->memstore.gpuaddr + KGSL_DEVICE_MEMSTORE_OFFSET(ts_cmp_enable)) >> 2); GSL_RB_WRITE(ringcmds, rcmd_gpu, (rb->device->memstore.gpuaddr + KGSL_DEVICE_MEMSTORE_OFFSET(ref_wait_ts)) >> 2); GSL_RB_WRITE(ringcmds, rcmd_gpu, rb->timestamp); /* # of conditional command DWORDs */ GSL_RB_WRITE(ringcmds, rcmd_gpu, 2); GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_type3_packet(CP_INTERRUPT, 1)); GSL_RB_WRITE(ringcmds, rcmd_gpu, CP_INT_CNTL__RB_INT_MASK); }