/**
 * adreno_context_restore() - generic context restore handler
 * @adreno_dev: the device
 * @context: the context
 *
 * Basic context restore handler that writes the context identifier
 * to the ringbuffer and issues pagetable switch commands if necessary.
 */
static int adreno_context_restore(struct adreno_device *adreno_dev,
				  struct adreno_context *context)
{
	struct kgsl_device *device;
	unsigned int cmds[8];

	if (adreno_dev == NULL || context == NULL)
		return -EINVAL;

	device = &adreno_dev->dev;

	/* write the context identifier to the ringbuffer */
	cmds[0] = cp_nop_packet(1);
	cmds[1] = KGSL_CONTEXT_TO_MEM_IDENTIFIER;
	cmds[2] = cp_type3_packet(CP_MEM_WRITE, 2);
	cmds[3] = device->memstore.gpuaddr +
		KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL, current_context);
	cmds[4] = context->base.id;
	/* Flush the UCHE for new context */
	cmds[5] = cp_type0_packet(
		adreno_getreg(adreno_dev, ADRENO_REG_UCHE_INVALIDATE0), 2);
	cmds[6] = 0;
	if (adreno_is_a4xx(adreno_dev))
		cmds[7] = 0x12;
	else if (adreno_is_a3xx(adreno_dev))
		cmds[7] = 0x90000000;
	return adreno_ringbuffer_issuecmds(device, context,
				KGSL_CMD_FLAGS_NONE, cmds, 8);
}
/**
 * adreno_context_restore() - generic context restore handler
 * @rb: The RB in which context is to be restored
 *
 * Basic context restore handler that writes the context identifier
 * to the ringbuffer and issues pagetable switch commands if necessary.
 */
static void adreno_context_restore(struct adreno_ringbuffer *rb)
{
	struct kgsl_device *device = rb->device;
	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
	struct adreno_context *drawctxt = rb->drawctxt_active;
	unsigned int cmds[11];
	int ret;

	if (!drawctxt)
		return;
	/*
	 * write the context identifier to the ringbuffer, write to both
	 * the global index and the index of the RB in which the context
	 * operates. The global values will always be reliable since we
	 * could be in middle of RB switch in which case the RB value may
	 * not be accurate
	 */
	cmds[0] = cp_nop_packet(1);
	cmds[1] = KGSL_CONTEXT_TO_MEM_IDENTIFIER;
	cmds[2] = cp_type3_packet(CP_MEM_WRITE, 2);
	cmds[3] = device->memstore.gpuaddr +
		KGSL_MEMSTORE_RB_OFFSET(rb, current_context);
	cmds[4] = drawctxt->base.id;
	cmds[5] = cp_type3_packet(CP_MEM_WRITE, 2);
	cmds[6] = device->memstore.gpuaddr +
		KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
					current_context);
	cmds[7] = drawctxt->base.id;
	/* Flush the UCHE for new context */
	cmds[8] = cp_type0_packet(
		adreno_getreg(adreno_dev, ADRENO_REG_UCHE_INVALIDATE0), 2);
	cmds[9] = 0;
	if (adreno_is_a4xx(adreno_dev))
		cmds[10] = 0x12;
	else if (adreno_is_a3xx(adreno_dev))
		cmds[10] = 0x90000000;
	ret = adreno_ringbuffer_issuecmds(rb, KGSL_CMD_FLAGS_NONE, cmds, 11);
	if (ret) {
		/*
		 * A failure to submit commands to ringbuffer means RB may
		 * be full, in this case wait for idle and use CPU
		 */
		ret = adreno_idle(device);
		BUG_ON(ret);
		_adreno_context_restore_cpu(rb, drawctxt);
	}
}
Пример #3
0
/* chicken restore */
static unsigned int *build_chicken_restore_cmds(
					struct adreno_context *drawctxt)
{
	unsigned int *start = tmp_ctx.cmd;
	unsigned int *cmds = start;

	*cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
	*cmds++ = 0;

	*cmds++ = cp_type0_packet(REG_TP0_CHICKEN, 1);
	tmp_ctx.chicken_restore = virt2gpu(cmds, &drawctxt->gpustate);
	*cmds++ = 0x00000000;

	/* create indirect buffer command for above command sequence */
	create_ib1(drawctxt, drawctxt->chicken_restore, start, cmds);

	return cmds;
}
Пример #4
0
static uint32_t
adreno_ringbuffer_addcmds(struct adreno_ringbuffer *rb,
                          struct adreno_context *context,
                          unsigned int flags, unsigned int *cmds,
                          int sizedwords)
{
    struct adreno_device *adreno_dev = ADRENO_DEVICE(rb->device);
    unsigned int *ringcmds;
    unsigned int timestamp;
    unsigned int total_sizedwords = sizedwords;
    unsigned int i;
    unsigned int rcmd_gpu;
    unsigned int context_id = KGSL_MEMSTORE_GLOBAL;
    unsigned int gpuaddr = rb->device->memstore.gpuaddr;

    if (context != NULL) {
        /*
         * if the context was not created with per context timestamp
         * support, we must use the global timestamp since issueibcmds
         * will be returning that one.
         */
        if (context->flags & CTXT_FLAGS_PER_CONTEXT_TS)
            context_id = context->id;
    }

    /* reserve space to temporarily turn off protected mode
    *  error checking if needed
    */
    total_sizedwords += flags & KGSL_CMD_FLAGS_PMODE ? 4 : 0;
    total_sizedwords += !(flags & KGSL_CMD_FLAGS_NO_TS_CMP) ? 7 : 0;
    total_sizedwords += !(flags & KGSL_CMD_FLAGS_NOT_KERNEL_CMD) ? 2 : 0;

    if (adreno_is_a3xx(adreno_dev))
        total_sizedwords += 7;

    total_sizedwords += 2; /* scratchpad ts for recovery */
    if (context) {
        total_sizedwords += 3; /* sop timestamp */
        total_sizedwords += 4; /* eop timestamp */
        total_sizedwords += 3; /* global timestamp without cache
					* flush for non-zero context */
    } else {
        total_sizedwords += 4; /* global timestamp for recovery*/
    }

    ringcmds = adreno_ringbuffer_allocspace(rb, total_sizedwords);
    rcmd_gpu = rb->buffer_desc.gpuaddr
               + sizeof(uint)*(rb->wptr-total_sizedwords);

    if (!(flags & KGSL_CMD_FLAGS_NOT_KERNEL_CMD)) {
        GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_nop_packet(1));
        GSL_RB_WRITE(ringcmds, rcmd_gpu, KGSL_CMD_IDENTIFIER);
    }
    if (flags & KGSL_CMD_FLAGS_PMODE) {
        /* disable protected mode error checking */
        GSL_RB_WRITE(ringcmds, rcmd_gpu,
                     cp_type3_packet(CP_SET_PROTECTED_MODE, 1));
        GSL_RB_WRITE(ringcmds, rcmd_gpu, 0);
    }

    for (i = 0; i < sizedwords; i++) {
        GSL_RB_WRITE(ringcmds, rcmd_gpu, *cmds);
        cmds++;
    }

    if (flags & KGSL_CMD_FLAGS_PMODE) {
        /* re-enable protected mode error checking */
        GSL_RB_WRITE(ringcmds, rcmd_gpu,
                     cp_type3_packet(CP_SET_PROTECTED_MODE, 1));
        GSL_RB_WRITE(ringcmds, rcmd_gpu, 1);
    }

    /* always increment the global timestamp. once. */
    rb->timestamp[KGSL_MEMSTORE_GLOBAL]++;
    if (context) {
        if (context_id == KGSL_MEMSTORE_GLOBAL)
            rb->timestamp[context_id] =
                rb->timestamp[KGSL_MEMSTORE_GLOBAL];
        else
            rb->timestamp[context_id]++;
    }
    timestamp = rb->timestamp[context_id];

    /* scratchpad ts for recovery */
    GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_type0_packet(REG_CP_TIMESTAMP, 1));
    GSL_RB_WRITE(ringcmds, rcmd_gpu, rb->timestamp[KGSL_MEMSTORE_GLOBAL]);

    if (adreno_is_a3xx(adreno_dev)) {
        /*
         * FLush HLSQ lazy updates to make sure there are no
         * rsources pending for indirect loads after the timestamp
         */

        GSL_RB_WRITE(ringcmds, rcmd_gpu,
                     cp_type3_packet(CP_EVENT_WRITE, 1));
        GSL_RB_WRITE(ringcmds, rcmd_gpu, 0x07); /* HLSQ_FLUSH */
        GSL_RB_WRITE(ringcmds, rcmd_gpu,
                     cp_type3_packet(CP_WAIT_FOR_IDLE, 1));
        GSL_RB_WRITE(ringcmds, rcmd_gpu, 0x00);
    }

    if (context) {
        /* start-of-pipeline timestamp */
        GSL_RB_WRITE(ringcmds, rcmd_gpu,
                     cp_type3_packet(CP_MEM_WRITE, 2));
        GSL_RB_WRITE(ringcmds, rcmd_gpu, (gpuaddr +
                                          KGSL_MEMSTORE_OFFSET(context->id, soptimestamp)));
        GSL_RB_WRITE(ringcmds, rcmd_gpu, timestamp);

        /* end-of-pipeline timestamp */
        GSL_RB_WRITE(ringcmds, rcmd_gpu,
                     cp_type3_packet(CP_EVENT_WRITE, 3));
        GSL_RB_WRITE(ringcmds, rcmd_gpu, CACHE_FLUSH_TS);
        GSL_RB_WRITE(ringcmds, rcmd_gpu, (gpuaddr +
                                          KGSL_MEMSTORE_OFFSET(context->id, eoptimestamp)));
        GSL_RB_WRITE(ringcmds, rcmd_gpu, timestamp);

        GSL_RB_WRITE(ringcmds, rcmd_gpu,
                     cp_type3_packet(CP_MEM_WRITE, 2));
        GSL_RB_WRITE(ringcmds, rcmd_gpu, (gpuaddr +
                                          KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
                                                  eoptimestamp)));
        GSL_RB_WRITE(ringcmds, rcmd_gpu,
                     rb->timestamp[KGSL_MEMSTORE_GLOBAL]);
    } else {
        GSL_RB_WRITE(ringcmds, rcmd_gpu,
                     cp_type3_packet(CP_EVENT_WRITE, 3));
        GSL_RB_WRITE(ringcmds, rcmd_gpu, CACHE_FLUSH_TS);
        GSL_RB_WRITE(ringcmds, rcmd_gpu, (gpuaddr +
                                          KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
                                                  eoptimestamp)));
        GSL_RB_WRITE(ringcmds, rcmd_gpu,
                     rb->timestamp[KGSL_MEMSTORE_GLOBAL]);
    }

    if (!(flags & KGSL_CMD_FLAGS_NO_TS_CMP)) {
        /* Conditional execution based on memory values */
        GSL_RB_WRITE(ringcmds, rcmd_gpu,
                     cp_type3_packet(CP_COND_EXEC, 4));
        GSL_RB_WRITE(ringcmds, rcmd_gpu, (gpuaddr +
                                          KGSL_MEMSTORE_OFFSET(
                                              context_id, ts_cmp_enable)) >> 2);
        GSL_RB_WRITE(ringcmds, rcmd_gpu, (gpuaddr +
                                          KGSL_MEMSTORE_OFFSET(
                                              context_id, ref_wait_ts)) >> 2);
        GSL_RB_WRITE(ringcmds, rcmd_gpu, timestamp);
        /* # of conditional command DWORDs */
        GSL_RB_WRITE(ringcmds, rcmd_gpu, 2);
        GSL_RB_WRITE(ringcmds, rcmd_gpu,
                     cp_type3_packet(CP_INTERRUPT, 1));
        GSL_RB_WRITE(ringcmds, rcmd_gpu, CP_INT_CNTL__RB_INT_MASK);
    }
static uint32_t
adreno_ringbuffer_addcmds(struct adreno_ringbuffer *rb,
				struct adreno_context *context,
				unsigned int flags, unsigned int *cmds,
				int sizedwords)
{
	struct adreno_device *adreno_dev = ADRENO_DEVICE(rb->device);
	unsigned int *ringcmds;
	unsigned int timestamp;
	unsigned int total_sizedwords = sizedwords;
	unsigned int i;
	unsigned int rcmd_gpu;
	unsigned int context_id = KGSL_MEMSTORE_GLOBAL;
	unsigned int gpuaddr = rb->device->memstore.gpuaddr;

	/*
	 * if the context was not created with per context timestamp
	 * support, we must use the global timestamp since issueibcmds
	 * will be returning that one.
	 */
	if (context->flags & CTXT_FLAGS_PER_CONTEXT_TS)
		context_id = context->id;

	/* reserve space to temporarily turn off protected mode
	*  error checking if needed
	*/
	total_sizedwords += flags & KGSL_CMD_FLAGS_PMODE ? 4 : 0;
	total_sizedwords += !(flags & KGSL_CMD_FLAGS_NO_TS_CMP) ? 7 : 0;
	/* 2 dwords to store the start of command sequence */
	total_sizedwords += 2;

	if (adreno_is_a2xx(adreno_dev))
		total_sizedwords += 2; /* CP_WAIT_FOR_IDLE */

	total_sizedwords += 2; /* scratchpad ts for recovery */
	if (context->flags & CTXT_FLAGS_PER_CONTEXT_TS) {
		total_sizedwords += 3; /* sop timestamp */
		total_sizedwords += 4; /* eop timestamp */
		total_sizedwords += 3; /* global timestamp without cache
					* flush for non-zero context */
	} else {
		total_sizedwords += 4; /* global timestamp for recovery*/
	}

	ringcmds = adreno_ringbuffer_allocspace(rb, total_sizedwords);
	/* GPU may hang during space allocation, if thats the case the current
	 * context may have hung the GPU */
	if (context->flags & CTXT_FLAGS_GPU_HANG) {
		KGSL_CTXT_WARN(rb->device,
		"Context %p caused a gpu hang. Will not accept commands for context %d\n",
		context, context->id);
		return rb->timestamp[context_id];
	}

	rcmd_gpu = rb->buffer_desc.gpuaddr
		+ sizeof(uint)*(rb->wptr-total_sizedwords);

	GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_nop_packet(1));
	GSL_RB_WRITE(ringcmds, rcmd_gpu, KGSL_CMD_IDENTIFIER);

	if (flags & KGSL_CMD_FLAGS_PMODE) {
		/* disable protected mode error checking */
		GSL_RB_WRITE(ringcmds, rcmd_gpu,
			cp_type3_packet(CP_SET_PROTECTED_MODE, 1));
		GSL_RB_WRITE(ringcmds, rcmd_gpu, 0);
	}

	for (i = 0; i < sizedwords; i++) {
		GSL_RB_WRITE(ringcmds, rcmd_gpu, *cmds);
		cmds++;
	}

	if (flags & KGSL_CMD_FLAGS_PMODE) {
		/* re-enable protected mode error checking */
		GSL_RB_WRITE(ringcmds, rcmd_gpu,
			cp_type3_packet(CP_SET_PROTECTED_MODE, 1));
		GSL_RB_WRITE(ringcmds, rcmd_gpu, 1);
	}

	/* always increment the global timestamp. once. */
	rb->timestamp[KGSL_MEMSTORE_GLOBAL]++;
	if (context) {
		if (context_id == KGSL_MEMSTORE_GLOBAL)
			rb->timestamp[context_id] =
				rb->timestamp[KGSL_MEMSTORE_GLOBAL];
		else
			rb->timestamp[context_id]++;
	}
	timestamp = rb->timestamp[context_id];

	/* HW Workaround for MMU Page fault
	* due to memory getting free early before
	* GPU completes it.
	*/
	if (adreno_is_a2xx(adreno_dev)) {
			GSL_RB_WRITE(ringcmds, rcmd_gpu,
					cp_type3_packet(CP_WAIT_FOR_IDLE, 1));
			GSL_RB_WRITE(ringcmds, rcmd_gpu, 0x00);
	}

	/* scratchpad ts for recovery */
	GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_type0_packet(REG_CP_TIMESTAMP, 1));
	GSL_RB_WRITE(ringcmds, rcmd_gpu, rb->timestamp[KGSL_MEMSTORE_GLOBAL]);

	if (context->flags & CTXT_FLAGS_PER_CONTEXT_TS) {
		/* start-of-pipeline timestamp */
		GSL_RB_WRITE(ringcmds, rcmd_gpu,
			cp_type3_packet(CP_MEM_WRITE, 2));
		GSL_RB_WRITE(ringcmds, rcmd_gpu, (gpuaddr +
			KGSL_MEMSTORE_OFFSET(context->id, soptimestamp)));
		GSL_RB_WRITE(ringcmds, rcmd_gpu, timestamp);

		/* end-of-pipeline timestamp */
		GSL_RB_WRITE(ringcmds, rcmd_gpu,
			cp_type3_packet(CP_EVENT_WRITE, 3));
		GSL_RB_WRITE(ringcmds, rcmd_gpu, CACHE_FLUSH_TS);
		GSL_RB_WRITE(ringcmds, rcmd_gpu, (gpuaddr +
			KGSL_MEMSTORE_OFFSET(context->id, eoptimestamp)));
		GSL_RB_WRITE(ringcmds, rcmd_gpu, timestamp);

		GSL_RB_WRITE(ringcmds, rcmd_gpu,
			cp_type3_packet(CP_MEM_WRITE, 2));
		GSL_RB_WRITE(ringcmds, rcmd_gpu, (gpuaddr +
			      KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
				      eoptimestamp)));
		GSL_RB_WRITE(ringcmds, rcmd_gpu,
			rb->timestamp[KGSL_MEMSTORE_GLOBAL]);
	} else {
		GSL_RB_WRITE(ringcmds, rcmd_gpu,
			cp_type3_packet(CP_EVENT_WRITE, 3));
		GSL_RB_WRITE(ringcmds, rcmd_gpu, CACHE_FLUSH_TS);
		GSL_RB_WRITE(ringcmds, rcmd_gpu, (gpuaddr +
			      KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
				      eoptimestamp)));
		GSL_RB_WRITE(ringcmds, rcmd_gpu,
			rb->timestamp[KGSL_MEMSTORE_GLOBAL]);
	}

	if (!(flags & KGSL_CMD_FLAGS_NO_TS_CMP)) {
		/* Conditional execution based on memory values */
		GSL_RB_WRITE(ringcmds, rcmd_gpu,
			cp_type3_packet(CP_COND_EXEC, 4));
		GSL_RB_WRITE(ringcmds, rcmd_gpu, (gpuaddr +
			KGSL_MEMSTORE_OFFSET(
				context_id, ts_cmp_enable)) >> 2);
		GSL_RB_WRITE(ringcmds, rcmd_gpu, (gpuaddr +
			KGSL_MEMSTORE_OFFSET(
				context_id, ref_wait_ts)) >> 2);
		GSL_RB_WRITE(ringcmds, rcmd_gpu, timestamp);
		/* # of conditional command DWORDs */
		GSL_RB_WRITE(ringcmds, rcmd_gpu, 2);
		GSL_RB_WRITE(ringcmds, rcmd_gpu,
			cp_type3_packet(CP_INTERRUPT, 1));
		GSL_RB_WRITE(ringcmds, rcmd_gpu, CP_INT_CNTL__RB_INT_MASK);
	}
static uint32_t
adreno_ringbuffer_addcmds(struct adreno_ringbuffer *rb,
				struct adreno_context *context,
				unsigned int flags, unsigned int *cmds,
				int sizedwords)
{
	struct adreno_device *adreno_dev = ADRENO_DEVICE(rb->device);
	unsigned int *ringcmds;
	unsigned int timestamp;
	unsigned int total_sizedwords = sizedwords + 6;
	unsigned int i;
	unsigned int rcmd_gpu;

	/* reserve space to temporarily turn off protected mode
	*  error checking if needed
	*/
	total_sizedwords += flags & KGSL_CMD_FLAGS_PMODE ? 4 : 0;
	total_sizedwords += !(flags & KGSL_CMD_FLAGS_NO_TS_CMP) ? 7 : 0;
	/* 2 dwords to store the start of command sequence */
	total_sizedwords += 2;
	if (adreno_is_a2xx(adreno_dev))
		total_sizedwords += 2; /* CP_WAIT_FOR_IDLE */
	
	if (adreno_is_a20x(adreno_dev))
		total_sizedwords += 2; /* CACHE_FLUSH */

	ringcmds = adreno_ringbuffer_allocspace(rb, total_sizedwords);
	/* GPU may hang during space allocation, if thats the case the current
	 * context may have hung the GPU */
	if (context && context->flags & CTXT_FLAGS_GPU_HANG) {
		KGSL_CTXT_WARN(rb->device,
		"Context %p caused a gpu hang. Will not accept commands for context %d\n",
		context, context->id);
		return rb->timestamp;
	}

	rcmd_gpu = rb->buffer_desc.gpuaddr
		+ sizeof(uint)*(rb->wptr-total_sizedwords);

	GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_nop_packet(1));
	GSL_RB_WRITE(ringcmds, rcmd_gpu, KGSL_CMD_IDENTIFIER);

	if (flags & KGSL_CMD_FLAGS_PMODE) {
		/* disable protected mode error checking */
		GSL_RB_WRITE(ringcmds, rcmd_gpu,
			cp_type3_packet(CP_SET_PROTECTED_MODE, 1));
		GSL_RB_WRITE(ringcmds, rcmd_gpu, 0);
	}

	for (i = 0; i < sizedwords; i++) {
		GSL_RB_WRITE(ringcmds, rcmd_gpu, *cmds);
		cmds++;
	}

	if (flags & KGSL_CMD_FLAGS_PMODE) {
		/* re-enable protected mode error checking */
		GSL_RB_WRITE(ringcmds, rcmd_gpu,
			cp_type3_packet(CP_SET_PROTECTED_MODE, 1));
		GSL_RB_WRITE(ringcmds, rcmd_gpu, 1);
	}

	rb->timestamp++;
	timestamp = rb->timestamp;

	/* HW Workaround for MMU Page fault
	* due to memory getting free early before
	* GPU completes it.
	*/
	if (adreno_is_a2xx(adreno_dev)) {
		GSL_RB_WRITE(ringcmds, rcmd_gpu,
			cp_type3_packet(CP_WAIT_FOR_IDLE, 1));
		GSL_RB_WRITE(ringcmds, rcmd_gpu, 0x00);
	}

	GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_type0_packet(REG_CP_TIMESTAMP, 1));
	GSL_RB_WRITE(ringcmds, rcmd_gpu, rb->timestamp);
	GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_type3_packet(CP_EVENT_WRITE, 3));
	GSL_RB_WRITE(ringcmds, rcmd_gpu, CACHE_FLUSH_TS);
	GSL_RB_WRITE(ringcmds, rcmd_gpu,
		     (rb->device->memstore.gpuaddr +
		      KGSL_DEVICE_MEMSTORE_OFFSET(eoptimestamp)));
	GSL_RB_WRITE(ringcmds, rcmd_gpu, rb->timestamp);

	if (adreno_is_a20x(adreno_dev)) {
		GSL_RB_WRITE(ringcmds, rcmd_gpu,
			cp_type3_packet(CP_EVENT_WRITE, 1));
		GSL_RB_WRITE(ringcmds, rcmd_gpu, CACHE_FLUSH);
	}

	if (!(flags & KGSL_CMD_FLAGS_NO_TS_CMP)) {
		/* Conditional execution based on memory values */
		GSL_RB_WRITE(ringcmds, rcmd_gpu,
			cp_type3_packet(CP_COND_EXEC, 4));
		GSL_RB_WRITE(ringcmds, rcmd_gpu, (rb->device->memstore.gpuaddr +
			KGSL_DEVICE_MEMSTORE_OFFSET(ts_cmp_enable)) >> 2);
		GSL_RB_WRITE(ringcmds, rcmd_gpu, (rb->device->memstore.gpuaddr +
			KGSL_DEVICE_MEMSTORE_OFFSET(ref_wait_ts)) >> 2);
		GSL_RB_WRITE(ringcmds, rcmd_gpu, rb->timestamp);
		/* # of conditional command DWORDs */
		GSL_RB_WRITE(ringcmds, rcmd_gpu, 2);
		GSL_RB_WRITE(ringcmds, rcmd_gpu,
			cp_type3_packet(CP_INTERRUPT, 1));
		GSL_RB_WRITE(ringcmds, rcmd_gpu, CP_INT_CNTL__RB_INT_MASK);
	}
Пример #7
0
static void build_regrestore_cmds(struct adreno_device *adreno_dev,
				  struct adreno_context *drawctxt)
{
	unsigned int *start = tmp_ctx.cmd;
	unsigned int *cmd = start;

	unsigned int i = 0;
	unsigned int reg_array_size = 0;
	const unsigned int *ptr_register_ranges;

	*cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
	*cmd++ = 0;

	/* H/W Registers */
	/* deferred cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, ???); */
	cmd++;
#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
	/* Force mismatch */
	*cmd++ = ((drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000) | 1;
#else
	*cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000;
#endif

	/* Based on chip id choose the registers ranges*/
	if (adreno_is_a220(adreno_dev)) {
		ptr_register_ranges = register_ranges_a220;
		reg_array_size = ARRAY_SIZE(register_ranges_a220);
	} else if (adreno_is_a225(adreno_dev)) {
		ptr_register_ranges = register_ranges_a225;
		reg_array_size = ARRAY_SIZE(register_ranges_a225);
	} else {
		ptr_register_ranges = register_ranges_a20x;
		reg_array_size = ARRAY_SIZE(register_ranges_a20x);
	}


	for (i = 0; i < (reg_array_size/2); i++) {
		cmd = reg_range(cmd, ptr_register_ranges[i*2],
				ptr_register_ranges[i*2+1]);
	}

	/* Now we know how many register blocks we have, we can compute command
	 * length
	 */
	start[2] =
	    cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, (cmd - start) - 3);
	/* Enable shadowing for the entire register block. */
#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
	start[4] |= (0 << 24) | (4 << 16);	/* Disable shadowing. */
#else
	start[4] |= (1 << 24) | (4 << 16);
#endif

	/* Need to handle some of the registers separately */
	*cmd++ = cp_type0_packet(REG_SQ_GPR_MANAGEMENT, 1);
	tmp_ctx.reg_values[0] = virt2gpu(cmd, &drawctxt->gpustate);
	*cmd++ = 0x00040400;

	*cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
	*cmd++ = 0;
	*cmd++ = cp_type0_packet(REG_TP0_CHICKEN, 1);
	tmp_ctx.reg_values[1] = virt2gpu(cmd, &drawctxt->gpustate);
	*cmd++ = 0x00000000;

	if (adreno_is_a22x(adreno_dev)) {
		unsigned int i;
		unsigned int j = 2;
		for (i = REG_A220_VSC_BIN_SIZE; i <=
				REG_A220_VSC_PIPE_DATA_LENGTH_7; i++) {
			*cmd++ = cp_type0_packet(i, 1);
			tmp_ctx.reg_values[j] = virt2gpu(cmd,
				&drawctxt->gpustate);
			*cmd++ = 0x00000000;
			j++;
		}
	}

	/* ALU Constants */
	*cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3);
	*cmd++ = drawctxt->gpustate.gpuaddr & 0xFFFFE000;
#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
	*cmd++ = (0 << 24) | (0 << 16) | 0;	/* Disable shadowing */
#else
	*cmd++ = (1 << 24) | (0 << 16) | 0;
#endif
	*cmd++ = ALU_CONSTANTS;

	/* Texture Constants */
	*cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3);
	*cmd++ = (drawctxt->gpustate.gpuaddr + TEX_OFFSET) & 0xFFFFE000;
#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
	/* Disable shadowing */
	*cmd++ = (0 << 24) | (1 << 16) | 0;
#else
	*cmd++ = (1 << 24) | (1 << 16) | 0;
#endif
	*cmd++ = TEX_CONSTANTS;

	/* Boolean Constants */
	*cmd++ = cp_type3_packet(CP_SET_CONSTANT, 1 + BOOL_CONSTANTS);
	*cmd++ = (2 << 16) | 0;

	/* the next BOOL_CONSTANT dwords is the shadow area for
	 *  boolean constants.
	 */
	tmp_ctx.bool_shadow = virt2gpu(cmd, &drawctxt->gpustate);
	cmd += BOOL_CONSTANTS;

	/* Loop Constants */
	*cmd++ = cp_type3_packet(CP_SET_CONSTANT, 1 + LOOP_CONSTANTS);
	*cmd++ = (3 << 16) | 0;

	/* the next LOOP_CONSTANTS dwords is the shadow area for
	 * loop constants.
	 */
	tmp_ctx.loop_shadow = virt2gpu(cmd, &drawctxt->gpustate);
	cmd += LOOP_CONSTANTS;

	/* create indirect buffer command for above command sequence */
	create_ib1(drawctxt, drawctxt->reg_restore, start, cmd);

	tmp_ctx.cmd = cmd;
}
Пример #8
0
/*copy colour, depth, & stencil buffers from system memory to graphics memory*/
static unsigned int *build_sys2gmem_cmds(struct adreno_device *adreno_dev,
					 struct adreno_context *drawctxt,
					 struct gmem_shadow_t *shadow)
{
	unsigned int *cmds = shadow->gmem_restore_commands;
	unsigned int *start = cmds;

	/* Store TP0_CHICKEN register */
	*cmds++ = cp_type3_packet(CP_REG_TO_MEM, 2);
	*cmds++ = REG_TP0_CHICKEN;
	*cmds++ = tmp_ctx.chicken_restore;

	*cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
	*cmds++ = 0;

	/* Set TP0_CHICKEN to zero */
	*cmds++ = cp_type0_packet(REG_TP0_CHICKEN, 1);
	*cmds++ = 0x00000000;

	/* Set PA_SC_AA_CONFIG to 0 */
	*cmds++ = cp_type0_packet(REG_PA_SC_AA_CONFIG, 1);
	*cmds++ = 0x00000000;
	/* shader constants */

	/* vertex buffer constants */
	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 7);

	*cmds++ = (0x1 << 16) | (9 * 6);
	/* valid(?) vtx constant flag & addr */
	*cmds++ = shadow->quad_vertices.gpuaddr | 0x3;
	/* limit = 12 dwords */
	*cmds++ = 0x00000030;
	/* valid(?) vtx constant flag & addr */
	*cmds++ = shadow->quad_texcoords.gpuaddr | 0x3;
	/* limit = 8 dwords */
	*cmds++ = 0x00000020;
	*cmds++ = 0;
	*cmds++ = 0;

	/* Invalidate L2 cache to make sure vertices are updated */
	*cmds++ = cp_type0_packet(REG_TC_CNTL_STATUS, 1);
	*cmds++ = 0x1;

	cmds = program_shader(cmds, 0, sys2gmem_vtx_pgm, SYS2GMEM_VTX_PGM_LEN);

	/* Repartition shaders */
	*cmds++ = cp_type0_packet(REG_SQ_INST_STORE_MANAGMENT, 1);
	*cmds++ = 0x180;

	/* Invalidate Vertex & Pixel instruction code address and sizes */
	*cmds++ = cp_type3_packet(CP_INVALIDATE_STATE, 1);
	*cmds++ = 0x00000300; /* 0x100 = Vertex, 0x200 = Pixel */

	*cmds++ = cp_type3_packet(CP_SET_SHADER_BASES, 1);
	*cmds++ = adreno_encode_istore_size(adreno_dev)
		  | adreno_dev->pix_shader_start;

	/* Load the patched fragment shader stream */
	cmds =
	    program_shader(cmds, 1, sys2gmem_frag_pgm, SYS2GMEM_FRAG_PGM_LEN);

	/* SQ_PROGRAM_CNTL / SQ_CONTEXT_MISC */
	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
	*cmds++ = CP_REG(REG_SQ_PROGRAM_CNTL);
	*cmds++ = 0x10030002;
	*cmds++ = 0x00000008;

	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
	*cmds++ = CP_REG(REG_PA_SC_AA_MASK);
	*cmds++ = 0x0000ffff;	/* REG_PA_SC_AA_MASK */

	if (!adreno_is_a22x(adreno_dev)) {
		/* PA_SC_VIZ_QUERY */
		*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
		*cmds++ = CP_REG(REG_PA_SC_VIZ_QUERY);
		*cmds++ = 0x0;		/*REG_PA_SC_VIZ_QUERY */
	}

	/* RB_COLORCONTROL */
	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
	*cmds++ = CP_REG(REG_RB_COLORCONTROL);
	*cmds++ = 0x00000c20;

	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 4);
	*cmds++ = CP_REG(REG_VGT_MAX_VTX_INDX);
	*cmds++ = 0x00ffffff;	/* mmVGT_MAX_VTX_INDX */
	*cmds++ = 0x0;		/* mmVGT_MIN_VTX_INDX */
	*cmds++ = 0x00000000;	/* mmVGT_INDX_OFFSET */

	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
	*cmds++ = CP_REG(REG_VGT_VERTEX_REUSE_BLOCK_CNTL);
	*cmds++ = 0x00000002;	/* mmVGT_VERTEX_REUSE_BLOCK_CNTL */
	*cmds++ = 0x00000002;	/* mmVGT_OUT_DEALLOC_CNTL */

	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
	*cmds++ = CP_REG(REG_SQ_INTERPOLATOR_CNTL);
	*cmds++ = 0xffffffff;	/* mmSQ_INTERPOLATOR_CNTL */

	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
	*cmds++ = CP_REG(REG_PA_SC_AA_CONFIG);
	*cmds++ = 0x00000000;	/* REG_PA_SC_AA_CONFIG */

	/* set REG_PA_SU_SC_MODE_CNTL
	 * Front_ptype = draw triangles
	 * Back_ptype = draw triangles
	 * Provoking vertex = last
	 */
	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
	*cmds++ = CP_REG(REG_PA_SU_SC_MODE_CNTL);
	*cmds++ = 0x00080240;

	/* texture constants */
	*cmds++ =
	    cp_type3_packet(CP_SET_CONSTANT, (SYS2GMEM_TEX_CONST_LEN + 1));
	*cmds++ = (0x1 << 16) | (0 * 6);
	memcpy(cmds, sys2gmem_tex_const, SYS2GMEM_TEX_CONST_LEN << 2);
	cmds[0] |= (shadow->pitch >> 5) << 22;
	cmds[1] |=
	    shadow->gmemshadow.gpuaddr | surface_format_table[shadow->format];
	cmds[2] |= (shadow->width - 1) | (shadow->height - 1) << 13;
	cmds += SYS2GMEM_TEX_CONST_LEN;

	/* program surface info */
	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
	*cmds++ = CP_REG(REG_RB_SURFACE_INFO);
	*cmds++ = shadow->gmem_pitch;	/* pitch, MSAA = 1 */

	/* RB_COLOR_INFO Endian=none, Linear, Format=RGBA8888, Swap=0,
	 *                Base=gmem_base
	 */
	*cmds++ =
	    (shadow->
	     format << RB_COLOR_INFO__COLOR_FORMAT__SHIFT) | tmp_ctx.gmem_base;

	/* RB_DEPTHCONTROL */
	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
	*cmds++ = CP_REG(REG_RB_DEPTHCONTROL);

	if (adreno_is_a22x(adreno_dev))
		*cmds++ = 8;		/* disable Z */
	else
		*cmds++ = 0;		/* disable Z */

	/* Use maximum scissor values -- quad vertices already
	 * have the correct bounds */
	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
	*cmds++ = CP_REG(REG_PA_SC_SCREEN_SCISSOR_TL);
	*cmds++ = (0 << 16) | 0;
	*cmds++ = ((0x1fff) << 16) | 0x1fff;
	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
	*cmds++ = CP_REG(REG_PA_SC_WINDOW_SCISSOR_TL);
	*cmds++ = (unsigned int)((1U << 31) | (0 << 16) | 0);
	*cmds++ = ((0x1fff) << 16) | 0x1fff;

	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
	*cmds++ = CP_REG(REG_PA_CL_VTE_CNTL);
	/* disable X/Y/Z transforms, X/Y/Z are premultiplied by W */
	*cmds++ = 0x00000b00;

	/*load the viewport so that z scale = clear depth and z offset = 0.0f */
	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
	*cmds++ = CP_REG(REG_PA_CL_VPORT_ZSCALE);
	*cmds++ = 0xbf800000;
	*cmds++ = 0x0;

	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
	*cmds++ = CP_REG(REG_RB_COLOR_MASK);
	*cmds++ = 0x0000000f;	/* R = G = B = 1:enabled */

	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
	*cmds++ = CP_REG(REG_RB_COLOR_DEST_MASK);
	*cmds++ = 0xffffffff;

	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
	*cmds++ = CP_REG(REG_SQ_WRAPPING_0);
	*cmds++ = 0x00000000;
	*cmds++ = 0x00000000;

	/* load the stencil ref value
	 *  $AAM - do this later
	 */
	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
	*cmds++ = CP_REG(REG_RB_MODECONTROL);
	/* draw pixels with color and depth/stencil component */
	*cmds++ = 0x4;

	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
	*cmds++ = CP_REG(REG_PA_CL_CLIP_CNTL);
	*cmds++ = 0x00010000;

	if (adreno_is_a22x(adreno_dev)) {
		*cmds++ = cp_type3_packet(CP_SET_DRAW_INIT_FLAGS, 1);
		*cmds++ = 0;

		*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
		*cmds++ = CP_REG(REG_A220_RB_LRZ_VSC_CONTROL);
		*cmds++ = 0x0000000;

		*cmds++ = cp_type3_packet(CP_DRAW_INDX, 3);
		*cmds++ = 0;           /* viz query info. */
		/* PrimType=RectList, SrcSel=AutoIndex, VisCullMode=Ignore*/
		*cmds++ = 0x00004088;
		*cmds++ = 3;	       /* NumIndices=3 */
	} else {
		/* queue the draw packet */
		*cmds++ = cp_type3_packet(CP_DRAW_INDX, 2);
		*cmds++ = 0;		/* viz query info. */
		/* PrimType=RectList, NumIndices=3, SrcSel=AutoIndex */
		*cmds++ = 0x00030088;
	}

	/* create indirect buffer command for above command sequence */
	create_ib1(drawctxt, shadow->gmem_restore, start, cmds);

	return cmds;
}
Пример #9
0
/*copy colour, depth, & stencil buffers from graphics memory to system memory*/
static unsigned int *build_gmem2sys_cmds(struct adreno_device *adreno_dev,
					 struct adreno_context *drawctxt,
					 struct gmem_shadow_t *shadow)
{
	unsigned int *cmds = shadow->gmem_save_commands;
	unsigned int *start = cmds;
	/* Calculate the new offset based on the adjusted base */
	unsigned int bytesperpixel = format2bytesperpixel[shadow->format];
	unsigned int addr = shadow->gmemshadow.gpuaddr;
	unsigned int offset = (addr - (addr & 0xfffff000)) / bytesperpixel;

	/* Store TP0_CHICKEN register */
	*cmds++ = cp_type3_packet(CP_REG_TO_MEM, 2);
	*cmds++ = REG_TP0_CHICKEN;

	*cmds++ = tmp_ctx.chicken_restore;

	*cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
	*cmds++ = 0;

	/* Set TP0_CHICKEN to zero */
	*cmds++ = cp_type0_packet(REG_TP0_CHICKEN, 1);
	*cmds++ = 0x00000000;

	/* Set PA_SC_AA_CONFIG to 0 */
	*cmds++ = cp_type0_packet(REG_PA_SC_AA_CONFIG, 1);
	*cmds++ = 0x00000000;

	/* program shader */

	/* load shader vtx constants ... 5 dwords */
	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 4);
	*cmds++ = (0x1 << 16) | SHADER_CONST_ADDR;
	*cmds++ = 0;
	/* valid(?) vtx constant flag & addr */
	*cmds++ = shadow->quad_vertices.gpuaddr | 0x3;
	/* limit = 12 dwords */
	*cmds++ = 0x00000030;

	/* Invalidate L2 cache to make sure vertices are updated */
	*cmds++ = cp_type0_packet(REG_TC_CNTL_STATUS, 1);
	*cmds++ = 0x1;

	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 4);
	*cmds++ = CP_REG(REG_VGT_MAX_VTX_INDX);
	*cmds++ = 0x00ffffff;	/* REG_VGT_MAX_VTX_INDX */
	*cmds++ = 0x0;		/* REG_VGT_MIN_VTX_INDX */
	*cmds++ = 0x00000000;	/* REG_VGT_INDX_OFFSET */

	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
	*cmds++ = CP_REG(REG_PA_SC_AA_MASK);
	*cmds++ = 0x0000ffff;	/* REG_PA_SC_AA_MASK */

	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
	*cmds++ = CP_REG(REG_RB_COLORCONTROL);
	*cmds++ = 0x00000c20;

	/* Repartition shaders */
	*cmds++ = cp_type0_packet(REG_SQ_INST_STORE_MANAGMENT, 1);
	*cmds++ = 0x180;

	/* Invalidate Vertex & Pixel instruction code address and sizes */
	*cmds++ = cp_type3_packet(CP_INVALIDATE_STATE, 1);
	*cmds++ = 0x00003F00;

	*cmds++ = cp_type3_packet(CP_SET_SHADER_BASES, 1);
	*cmds++ = adreno_encode_istore_size(adreno_dev)
		  | adreno_dev->pix_shader_start;

	/* load the patched vertex shader stream */
	cmds = program_shader(cmds, 0, gmem2sys_vtx_pgm, GMEM2SYS_VTX_PGM_LEN);

	/* Load the patched fragment shader stream */
	cmds =
	    program_shader(cmds, 1, gmem2sys_frag_pgm, GMEM2SYS_FRAG_PGM_LEN);

	/* SQ_PROGRAM_CNTL / SQ_CONTEXT_MISC */
	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
	*cmds++ = CP_REG(REG_SQ_PROGRAM_CNTL);
	if (adreno_is_a22x(adreno_dev))
		*cmds++ = 0x10018001;
	else
		*cmds++ = 0x10010001;
	*cmds++ = 0x00000008;

	/* resolve */

	/* PA_CL_VTE_CNTL */
	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
	*cmds++ = CP_REG(REG_PA_CL_VTE_CNTL);
	/* disable X/Y/Z transforms, X/Y/Z are premultiplied by W */
	*cmds++ = 0x00000b00;

	/* program surface info */
	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
	*cmds++ = CP_REG(REG_RB_SURFACE_INFO);
	*cmds++ = shadow->gmem_pitch;	/* pitch, MSAA = 1 */

	/* RB_COLOR_INFO Endian=none, Linear, Format=RGBA8888, Swap=0,
	 *                Base=gmem_base
	 */
	/* gmem base assumed 4K aligned. */
	BUG_ON(tmp_ctx.gmem_base & 0xFFF);
	*cmds++ =
	    (shadow->
	     format << RB_COLOR_INFO__COLOR_FORMAT__SHIFT) | tmp_ctx.gmem_base;

	/* disable Z */
	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
	*cmds++ = CP_REG(REG_RB_DEPTHCONTROL);
	if (adreno_is_a22x(adreno_dev))
		*cmds++ = 0x08;
	else
		*cmds++ = 0;

	/* set REG_PA_SU_SC_MODE_CNTL
	 *              Front_ptype = draw triangles
	 *              Back_ptype = draw triangles
	 *              Provoking vertex = last
	 */
	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
	*cmds++ = CP_REG(REG_PA_SU_SC_MODE_CNTL);
	*cmds++ = 0x00080240;

	/* Use maximum scissor values -- quad vertices already have the
	 * correct bounds */
	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
	*cmds++ = CP_REG(REG_PA_SC_SCREEN_SCISSOR_TL);
	*cmds++ = (0 << 16) | 0;
	*cmds++ = (0x1fff << 16) | (0x1fff);
	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
	*cmds++ = CP_REG(REG_PA_SC_WINDOW_SCISSOR_TL);
	*cmds++ = (unsigned int)((1U << 31) | (0 << 16) | 0);
	*cmds++ = (0x1fff << 16) | (0x1fff);

	/* load the viewport so that z scale = clear depth and
	 *  z offset = 0.0f
	 */
	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
	*cmds++ = CP_REG(REG_PA_CL_VPORT_ZSCALE);
	*cmds++ = 0xbf800000;	/* -1.0f */
	*cmds++ = 0x0;

	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
	*cmds++ = CP_REG(REG_RB_COLOR_MASK);
	*cmds++ = 0x0000000f;	/* R = G = B = 1:enabled */

	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
	*cmds++ = CP_REG(REG_RB_COLOR_DEST_MASK);
	*cmds++ = 0xffffffff;

	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
	*cmds++ = CP_REG(REG_SQ_WRAPPING_0);
	*cmds++ = 0x00000000;
	*cmds++ = 0x00000000;

	/* load the stencil ref value
	 * $AAM - do this later
	 */

	/* load the COPY state */
	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 6);
	*cmds++ = CP_REG(REG_RB_COPY_CONTROL);
	*cmds++ = 0;		/* RB_COPY_CONTROL */
	*cmds++ = addr & 0xfffff000;	/* RB_COPY_DEST_BASE */
	*cmds++ = shadow->pitch >> 5;	/* RB_COPY_DEST_PITCH */

	/* Endian=none, Linear, Format=RGBA8888,Swap=0,!Dither,
	 *  MaskWrite:R=G=B=A=1
	 */
	*cmds++ = 0x0003c008 |
	    (shadow->format << RB_COPY_DEST_INFO__COPY_DEST_FORMAT__SHIFT);
	/* Make sure we stay in offsetx field. */
	BUG_ON(offset & 0xfffff000);
	*cmds++ = offset;

	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
	*cmds++ = CP_REG(REG_RB_MODECONTROL);
	*cmds++ = 0x6;		/* EDRAM copy */

	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
	*cmds++ = CP_REG(REG_PA_CL_CLIP_CNTL);
	*cmds++ = 0x00010000;

	if (adreno_is_a22x(adreno_dev)) {
		*cmds++ = cp_type3_packet(CP_SET_DRAW_INIT_FLAGS, 1);
		*cmds++ = 0;

		*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
		*cmds++ = CP_REG(REG_A220_RB_LRZ_VSC_CONTROL);
		*cmds++ = 0x0000000;

		*cmds++ = cp_type3_packet(CP_DRAW_INDX, 3);
		*cmds++ = 0;           /* viz query info. */
		/* PrimType=RectList, SrcSel=AutoIndex, VisCullMode=Ignore*/
		*cmds++ = 0x00004088;
		*cmds++ = 3;	       /* NumIndices=3 */
	} else {
		/* queue the draw packet */
		*cmds++ = cp_type3_packet(CP_DRAW_INDX, 2);
		*cmds++ = 0;		/* viz query info. */
		/* PrimType=RectList, NumIndices=3, SrcSel=AutoIndex */
		*cmds++ = 0x00030088;
	}

	/* create indirect buffer command for above command sequence */
	create_ib1(drawctxt, shadow->gmem_save, start, cmds);

	return cmds;
}
Пример #10
0
static uint32_t
adreno_ringbuffer_addcmds(struct adreno_ringbuffer *rb,
				unsigned int flags, unsigned int *cmds,
				int sizedwords)
{
	unsigned int *ringcmds;
	unsigned int timestamp;
	unsigned int total_sizedwords = sizedwords + 6;
	unsigned int i;
	unsigned int rcmd_gpu;

	/* reserve space to temporarily turn off protected mode
	*  error checking if needed
	*/
	total_sizedwords += flags & KGSL_CMD_FLAGS_PMODE ? 4 : 0;
	total_sizedwords += !(flags & KGSL_CMD_FLAGS_NO_TS_CMP) ? 7 : 0;
	total_sizedwords += !(flags & KGSL_CMD_FLAGS_NOT_KERNEL_CMD) ? 2 : 0;

	ringcmds = adreno_ringbuffer_allocspace(rb, total_sizedwords);
	rcmd_gpu = rb->buffer_desc.gpuaddr
		+ sizeof(uint)*(rb->wptr-total_sizedwords);

	if (!(flags & KGSL_CMD_FLAGS_NOT_KERNEL_CMD)) {
		GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_nop_packet(1));
		GSL_RB_WRITE(ringcmds, rcmd_gpu, KGSL_CMD_IDENTIFIER);
	}
	if (flags & KGSL_CMD_FLAGS_PMODE) {
		/* disable protected mode error checking */
		GSL_RB_WRITE(ringcmds, rcmd_gpu,
			cp_type3_packet(CP_SET_PROTECTED_MODE, 1));
		GSL_RB_WRITE(ringcmds, rcmd_gpu, 0);
	}

	for (i = 0; i < sizedwords; i++) {
		GSL_RB_WRITE(ringcmds, rcmd_gpu, *cmds);
		cmds++;
	}

	if (flags & KGSL_CMD_FLAGS_PMODE) {
		/* re-enable protected mode error checking */
		GSL_RB_WRITE(ringcmds, rcmd_gpu,
			cp_type3_packet(CP_SET_PROTECTED_MODE, 1));
		GSL_RB_WRITE(ringcmds, rcmd_gpu, 1);
	}

	rb->timestamp++;
	timestamp = rb->timestamp;

	/* start-of-pipeline and end-of-pipeline timestamps */
	GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_type0_packet(REG_CP_TIMESTAMP, 1));
	GSL_RB_WRITE(ringcmds, rcmd_gpu, rb->timestamp);
	GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_type3_packet(CP_EVENT_WRITE, 3));
	GSL_RB_WRITE(ringcmds, rcmd_gpu, CACHE_FLUSH_TS);
	GSL_RB_WRITE(ringcmds, rcmd_gpu,
		     (rb->device->memstore.gpuaddr +
		      KGSL_DEVICE_MEMSTORE_OFFSET(eoptimestamp)));
	GSL_RB_WRITE(ringcmds, rcmd_gpu, rb->timestamp);

	if (!(flags & KGSL_CMD_FLAGS_NO_TS_CMP)) {
		/* Conditional execution based on memory values */
		GSL_RB_WRITE(ringcmds, rcmd_gpu,
			cp_type3_packet(CP_COND_EXEC, 4));
		GSL_RB_WRITE(ringcmds, rcmd_gpu, (rb->device->memstore.gpuaddr +
			KGSL_DEVICE_MEMSTORE_OFFSET(ts_cmp_enable)) >> 2);
		GSL_RB_WRITE(ringcmds, rcmd_gpu, (rb->device->memstore.gpuaddr +
			KGSL_DEVICE_MEMSTORE_OFFSET(ref_wait_ts)) >> 2);
		GSL_RB_WRITE(ringcmds, rcmd_gpu, rb->timestamp);
		/* # of conditional command DWORDs */
		GSL_RB_WRITE(ringcmds, rcmd_gpu, 2);
		GSL_RB_WRITE(ringcmds, rcmd_gpu,
			cp_type3_packet(CP_INTERRUPT, 1));
		GSL_RB_WRITE(ringcmds, rcmd_gpu, CP_INT_CNTL__RB_INT_MASK);
	}
Пример #11
0
static void
build_shader_save_restore_cmds(struct adreno_device *adreno_dev,
				struct adreno_context *drawctxt)
{
	unsigned int *cmd = tmp_ctx.cmd;
	unsigned int *save, *restore, *fixup;
	unsigned int *startSizeVtx, *startSizePix, *startSizeShared;
	unsigned int *partition1;
	unsigned int *shaderBases, *partition2;

	/* compute vertex, pixel and shared instruction shadow GPU addresses */
	tmp_ctx.shader_vertex = drawctxt->gpustate.gpuaddr + SHADER_OFFSET;
	tmp_ctx.shader_pixel = tmp_ctx.shader_vertex
				+ _shader_shadow_size(adreno_dev);
	tmp_ctx.shader_shared = tmp_ctx.shader_pixel
				+  _shader_shadow_size(adreno_dev);

	/* restore shader partitioning and instructions */

	restore = cmd;		/* start address */

	/* Invalidate Vertex & Pixel instruction code address and sizes */
	*cmd++ = cp_type3_packet(CP_INVALIDATE_STATE, 1);
	*cmd++ = 0x00000300;	/* 0x100 = Vertex, 0x200 = Pixel */

	/* Restore previous shader vertex & pixel instruction bases. */
	*cmd++ = cp_type3_packet(CP_SET_SHADER_BASES, 1);
	shaderBases = cmd++;	/* TBD #5: shader bases (from fixup) */

	/* write the shader partition information to a scratch register */
	*cmd++ = cp_type0_packet(REG_SQ_INST_STORE_MANAGMENT, 1);
	partition1 = cmd++;	/* TBD #4a: partition info (from save) */

	/* load vertex shader instructions from the shadow. */
	*cmd++ = cp_type3_packet(CP_IM_LOAD, 2);
	*cmd++ = tmp_ctx.shader_vertex + 0x0;	/* 0x0 = Vertex */
	startSizeVtx = cmd++;	/* TBD #1: start/size (from save) */

	/* load pixel shader instructions from the shadow. */
	*cmd++ = cp_type3_packet(CP_IM_LOAD, 2);
	*cmd++ = tmp_ctx.shader_pixel + 0x1;	/* 0x1 = Pixel */
	startSizePix = cmd++;	/* TBD #2: start/size (from save) */

	/* load shared shader instructions from the shadow. */
	*cmd++ = cp_type3_packet(CP_IM_LOAD, 2);
	*cmd++ = tmp_ctx.shader_shared + 0x2;	/* 0x2 = Shared */
	startSizeShared = cmd++;	/* TBD #3: start/size (from save) */

	/* create indirect buffer command for above command sequence */
	create_ib1(drawctxt, drawctxt->shader_restore, restore, cmd);

	/*
	 *  fixup SET_SHADER_BASES data
	 *
	 *  since self-modifying PM4 code is being used here, a seperate
	 *  command buffer is used for this fixup operation, to ensure the
	 *  commands are not read by the PM4 engine before the data fields
	 *  have been written.
	 */

	fixup = cmd;		/* start address */

	/* write the shader partition information to a scratch register */
	*cmd++ = cp_type0_packet(REG_SCRATCH_REG2, 1);
	partition2 = cmd++;	/* TBD #4b: partition info (from save) */

	/* mask off unused bits, then OR with shader instruction memory size */
	*cmd++ = cp_type3_packet(CP_REG_RMW, 3);
	*cmd++ = REG_SCRATCH_REG2;
	/* AND off invalid bits. */
	*cmd++ = 0x0FFF0FFF;
	/* OR in instruction memory size.  */
	*cmd++ = adreno_encode_istore_size(adreno_dev);

	/* write the computed value to the SET_SHADER_BASES data field */
	*cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
	*cmd++ = REG_SCRATCH_REG2;
	/* TBD #5: shader bases (to restore) */
	*cmd++ = virt2gpu(shaderBases, &drawctxt->gpustate);

	/* create indirect buffer command for above command sequence */
	create_ib1(drawctxt, drawctxt->shader_fixup, fixup, cmd);

	/* save shader partitioning and instructions */

	save = cmd;		/* start address */

	*cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
	*cmd++ = 0;

	/* fetch the SQ_INST_STORE_MANAGMENT register value,
	 *  store the value in the data fields of the SET_CONSTANT commands
	 *  above.
	 */
	*cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
	*cmd++ = REG_SQ_INST_STORE_MANAGMENT;
	/* TBD #4a: partition info (to restore) */
	*cmd++ = virt2gpu(partition1, &drawctxt->gpustate);
	*cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
	*cmd++ = REG_SQ_INST_STORE_MANAGMENT;
	/* TBD #4b: partition info (to fixup) */
	*cmd++ = virt2gpu(partition2, &drawctxt->gpustate);


	/* store the vertex shader instructions */
	*cmd++ = cp_type3_packet(CP_IM_STORE, 2);
	*cmd++ = tmp_ctx.shader_vertex + 0x0;	/* 0x0 = Vertex */
	/* TBD #1: start/size (to restore) */
	*cmd++ = virt2gpu(startSizeVtx, &drawctxt->gpustate);

	/* store the pixel shader instructions */
	*cmd++ = cp_type3_packet(CP_IM_STORE, 2);
	*cmd++ = tmp_ctx.shader_pixel + 0x1;	/* 0x1 = Pixel */
	/* TBD #2: start/size (to restore) */
	*cmd++ = virt2gpu(startSizePix, &drawctxt->gpustate);

	/* store the shared shader instructions if vertex base is nonzero */

	*cmd++ = cp_type3_packet(CP_IM_STORE, 2);
	*cmd++ = tmp_ctx.shader_shared + 0x2;	/* 0x2 = Shared */
	/* TBD #3: start/size (to restore) */
	*cmd++ = virt2gpu(startSizeShared, &drawctxt->gpustate);


	*cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
	*cmd++ = 0;

	/* create indirect buffer command for above command sequence */
	create_ib1(drawctxt, drawctxt->shader_save, save, cmd);

	tmp_ctx.cmd = cmd;
}
Пример #12
0
static uint32_t
adreno_ringbuffer_addcmds(struct adreno_ringbuffer *rb,
				struct adreno_context *context,
				unsigned int flags, unsigned int *cmds,
				int sizedwords)
{
	struct adreno_device *adreno_dev = ADRENO_DEVICE(rb->device);
	unsigned int *ringcmds;
	unsigned int timestamp;
	unsigned int total_sizedwords = sizedwords;
	unsigned int i;
	unsigned int rcmd_gpu;
	unsigned int context_id = KGSL_MEMSTORE_GLOBAL;
	unsigned int gpuaddr = rb->device->memstore.gpuaddr;

	if (context->flags & CTXT_FLAGS_PER_CONTEXT_TS)
		context_id = context->id;

	total_sizedwords += flags & KGSL_CMD_FLAGS_PMODE ? 4 : 0;
	total_sizedwords += !(flags & KGSL_CMD_FLAGS_NO_TS_CMP) ? 7 : 0;
	
	total_sizedwords += 2;

	if (adreno_is_a3xx(adreno_dev))
		total_sizedwords += 7;

	total_sizedwords += 2; 
	if (context->flags & CTXT_FLAGS_PER_CONTEXT_TS) {
		total_sizedwords += 3; 
		total_sizedwords += 4; 
		total_sizedwords += 3; 
	} else {
		total_sizedwords += 4; 
	}

	ringcmds = adreno_ringbuffer_allocspace(rb, total_sizedwords);
	if (context->flags & CTXT_FLAGS_GPU_HANG) {
		KGSL_CTXT_WARN(rb->device,
		"Context %p caused a gpu hang. Will not accept commands for context %d\n",
		context, context->id);
		return rb->timestamp[context_id];
	}

	rcmd_gpu = rb->buffer_desc.gpuaddr
		+ sizeof(uint)*(rb->wptr-total_sizedwords);

	GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_nop_packet(1));
	GSL_RB_WRITE(ringcmds, rcmd_gpu, KGSL_CMD_IDENTIFIER);

	if (flags & KGSL_CMD_FLAGS_PMODE) {
		
		GSL_RB_WRITE(ringcmds, rcmd_gpu,
			cp_type3_packet(CP_SET_PROTECTED_MODE, 1));
		GSL_RB_WRITE(ringcmds, rcmd_gpu, 0);
	}

	for (i = 0; i < sizedwords; i++) {
		GSL_RB_WRITE(ringcmds, rcmd_gpu, *cmds);
		cmds++;
	}

	if (flags & KGSL_CMD_FLAGS_PMODE) {
		
		GSL_RB_WRITE(ringcmds, rcmd_gpu,
			cp_type3_packet(CP_SET_PROTECTED_MODE, 1));
		GSL_RB_WRITE(ringcmds, rcmd_gpu, 1);
	}

	
	rb->timestamp[KGSL_MEMSTORE_GLOBAL]++;
	if (context) {
		if (context_id == KGSL_MEMSTORE_GLOBAL)
			rb->timestamp[context_id] =
				rb->timestamp[KGSL_MEMSTORE_GLOBAL];
		else
			rb->timestamp[context_id]++;
	}
	timestamp = rb->timestamp[context_id];

	
	GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_type0_packet(REG_CP_TIMESTAMP, 1));
	GSL_RB_WRITE(ringcmds, rcmd_gpu, rb->timestamp[KGSL_MEMSTORE_GLOBAL]);

	if (adreno_is_a3xx(adreno_dev)) {

		GSL_RB_WRITE(ringcmds, rcmd_gpu,
			cp_type3_packet(CP_EVENT_WRITE, 1));
		GSL_RB_WRITE(ringcmds, rcmd_gpu, 0x07); 
		GSL_RB_WRITE(ringcmds, rcmd_gpu,
			cp_type3_packet(CP_WAIT_FOR_IDLE, 1));
		GSL_RB_WRITE(ringcmds, rcmd_gpu, 0x00);
	}

	if (context->flags & CTXT_FLAGS_PER_CONTEXT_TS) {
		
		GSL_RB_WRITE(ringcmds, rcmd_gpu,
			cp_type3_packet(CP_MEM_WRITE, 2));
		GSL_RB_WRITE(ringcmds, rcmd_gpu, (gpuaddr +
			KGSL_MEMSTORE_OFFSET(context->id, soptimestamp)));
		GSL_RB_WRITE(ringcmds, rcmd_gpu, timestamp);

		
		GSL_RB_WRITE(ringcmds, rcmd_gpu,
			cp_type3_packet(CP_EVENT_WRITE, 3));
		GSL_RB_WRITE(ringcmds, rcmd_gpu, CACHE_FLUSH_TS);
		GSL_RB_WRITE(ringcmds, rcmd_gpu, (gpuaddr +
			KGSL_MEMSTORE_OFFSET(context->id, eoptimestamp)));
		GSL_RB_WRITE(ringcmds, rcmd_gpu, timestamp);

		GSL_RB_WRITE(ringcmds, rcmd_gpu,
			cp_type3_packet(CP_MEM_WRITE, 2));
		GSL_RB_WRITE(ringcmds, rcmd_gpu, (gpuaddr +
			      KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
				      eoptimestamp)));
		GSL_RB_WRITE(ringcmds, rcmd_gpu,
			rb->timestamp[KGSL_MEMSTORE_GLOBAL]);
	} else {
		GSL_RB_WRITE(ringcmds, rcmd_gpu,
			cp_type3_packet(CP_EVENT_WRITE, 3));
		GSL_RB_WRITE(ringcmds, rcmd_gpu, CACHE_FLUSH_TS);
		GSL_RB_WRITE(ringcmds, rcmd_gpu, (gpuaddr +
			      KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
				      eoptimestamp)));
		GSL_RB_WRITE(ringcmds, rcmd_gpu,
			rb->timestamp[KGSL_MEMSTORE_GLOBAL]);
	}

	if (!(flags & KGSL_CMD_FLAGS_NO_TS_CMP)) {
		
		GSL_RB_WRITE(ringcmds, rcmd_gpu,
			cp_type3_packet(CP_COND_EXEC, 4));
		GSL_RB_WRITE(ringcmds, rcmd_gpu, (gpuaddr +
			KGSL_MEMSTORE_OFFSET(
				context_id, ts_cmp_enable)) >> 2);
		GSL_RB_WRITE(ringcmds, rcmd_gpu, (gpuaddr +
			KGSL_MEMSTORE_OFFSET(
				context_id, ref_wait_ts)) >> 2);
		GSL_RB_WRITE(ringcmds, rcmd_gpu, timestamp);
		
		GSL_RB_WRITE(ringcmds, rcmd_gpu, 2);
		GSL_RB_WRITE(ringcmds, rcmd_gpu,
			cp_type3_packet(CP_INTERRUPT, 1));
		GSL_RB_WRITE(ringcmds, rcmd_gpu, CP_INT_CNTL__RB_INT_MASK);
	}
static void build_regrestore_cmds(struct adreno_device *adreno_dev,
				  struct adreno_context *drawctxt)
{
	unsigned int *start = tmp_ctx.cmd;
	unsigned int *cmd = start;

	unsigned int i = 0;
	unsigned int reg_array_size = 0;
	const unsigned int *ptr_register_ranges;

	*cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
	*cmd++ = 0;

	
	
	cmd++;
#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
	
	*cmd++ = ((drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000) | 1;
#else
	*cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000;
#endif

	
	if (adreno_is_a220(adreno_dev)) {
		ptr_register_ranges = register_ranges_a220;
		reg_array_size = ARRAY_SIZE(register_ranges_a220);
	} else if (adreno_is_a225(adreno_dev)) {
		ptr_register_ranges = register_ranges_a225;
		reg_array_size = ARRAY_SIZE(register_ranges_a225);
	} else {
		ptr_register_ranges = register_ranges_a20x;
		reg_array_size = ARRAY_SIZE(register_ranges_a20x);
	}


	for (i = 0; i < (reg_array_size/2); i++) {
		cmd = reg_range(cmd, ptr_register_ranges[i*2],
				ptr_register_ranges[i*2+1]);
	}

	start[2] =
	    cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, (cmd - start) - 3);
	
#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
	start[4] |= (0 << 24) | (4 << 16);	
#else
	start[4] |= (1 << 24) | (4 << 16);
#endif

	
	*cmd++ = cp_type0_packet(REG_SQ_GPR_MANAGEMENT, 1);
	tmp_ctx.reg_values[0] = virt2gpu(cmd, &drawctxt->gpustate);
	*cmd++ = 0x00040400;

	*cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
	*cmd++ = 0;
	*cmd++ = cp_type0_packet(REG_TP0_CHICKEN, 1);
	tmp_ctx.reg_values[1] = virt2gpu(cmd, &drawctxt->gpustate);
	*cmd++ = 0x00000000;

	if (adreno_is_a20x(adreno_dev)) {
		*cmd++ = cp_type0_packet(REG_RB_BC_CONTROL, 1);
		tmp_ctx.reg_values[2] = virt2gpu(cmd, &drawctxt->gpustate);
		*cmd++ = 0x00000000;
	}

	if (adreno_is_a22x(adreno_dev)) {
		unsigned int i;
		unsigned int j = 2;
		for (i = REG_A220_VSC_BIN_SIZE; i <=
				REG_A220_VSC_PIPE_DATA_LENGTH_7; i++) {
			*cmd++ = cp_type0_packet(i, 1);
			tmp_ctx.reg_values[j] = virt2gpu(cmd,
				&drawctxt->gpustate);
			*cmd++ = 0x00000000;
			j++;
		}
	}

	
	*cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3);
	*cmd++ = drawctxt->gpustate.gpuaddr & 0xFFFFE000;
#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
	*cmd++ = (0 << 24) | (0 << 16) | 0;	
#else
	*cmd++ = (1 << 24) | (0 << 16) | 0;
#endif
	*cmd++ = ALU_CONSTANTS;

	
	*cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3);
	*cmd++ = (drawctxt->gpustate.gpuaddr + TEX_OFFSET) & 0xFFFFE000;
#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
	
	*cmd++ = (0 << 24) | (1 << 16) | 0;
#else
	*cmd++ = (1 << 24) | (1 << 16) | 0;
#endif
	*cmd++ = TEX_CONSTANTS;

	
	*cmd++ = cp_type3_packet(CP_SET_CONSTANT, 1 + BOOL_CONSTANTS);
	*cmd++ = (2 << 16) | 0;

	tmp_ctx.bool_shadow = virt2gpu(cmd, &drawctxt->gpustate);
	cmd += BOOL_CONSTANTS;

	
	*cmd++ = cp_type3_packet(CP_SET_CONSTANT, 1 + LOOP_CONSTANTS);
	*cmd++ = (3 << 16) | 0;

	tmp_ctx.loop_shadow = virt2gpu(cmd, &drawctxt->gpustate);
	cmd += LOOP_CONSTANTS;

	
	create_ib1(drawctxt, drawctxt->reg_restore, start, cmd);

	tmp_ctx.cmd = cmd;
}
static unsigned int *build_sys2gmem_cmds(struct adreno_device *adreno_dev,
					 struct adreno_context *drawctxt,
					 struct gmem_shadow_t *shadow)
{
	unsigned int *cmds = shadow->gmem_restore_commands;
	unsigned int *start = cmds;

	if (!(drawctxt->flags & CTXT_FLAGS_PREAMBLE)) {
		
		*cmds++ = cp_type3_packet(CP_REG_TO_MEM, 2);
		*cmds++ = REG_TP0_CHICKEN;
		*cmds++ = tmp_ctx.chicken_restore;

		*cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
		*cmds++ = 0;
	}

	
	*cmds++ = cp_type0_packet(REG_TP0_CHICKEN, 1);
	*cmds++ = 0x00000000;

	
	*cmds++ = cp_type0_packet(REG_PA_SC_AA_CONFIG, 1);
	*cmds++ = 0x00000000;
	

	
	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 7);

	*cmds++ = (0x1 << 16) | (9 * 6);
	
	*cmds++ = shadow->quad_vertices.gpuaddr | 0x3;
	
	*cmds++ = 0x00000030;
	
	*cmds++ = shadow->quad_texcoords.gpuaddr | 0x3;
	
	*cmds++ = 0x00000020;
	*cmds++ = 0;
	*cmds++ = 0;

	
	*cmds++ = cp_type0_packet(REG_TC_CNTL_STATUS, 1);
	*cmds++ = 0x1;

	cmds = program_shader(cmds, 0, sys2gmem_vtx_pgm, SYS2GMEM_VTX_PGM_LEN);

	
	*cmds++ = cp_type0_packet(REG_SQ_INST_STORE_MANAGMENT, 1);
	*cmds++ = adreno_dev->pix_shader_start;

	
	*cmds++ = cp_type3_packet(CP_INVALIDATE_STATE, 1);
	*cmds++ = 0x00000300; 

	*cmds++ = cp_type3_packet(CP_SET_SHADER_BASES, 1);
	*cmds++ = adreno_encode_istore_size(adreno_dev)
		  | adreno_dev->pix_shader_start;

	
	cmds =
	    program_shader(cmds, 1, sys2gmem_frag_pgm, SYS2GMEM_FRAG_PGM_LEN);

	
	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
	*cmds++ = CP_REG(REG_SQ_PROGRAM_CNTL);
	*cmds++ = 0x10030002;
	*cmds++ = 0x00000008;

	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
	*cmds++ = CP_REG(REG_PA_SC_AA_MASK);
	*cmds++ = 0x0000ffff;	

	if (!adreno_is_a22x(adreno_dev)) {
		
		*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
		*cmds++ = CP_REG(REG_PA_SC_VIZ_QUERY);
		*cmds++ = 0x0;		
	}

	
	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
	*cmds++ = CP_REG(REG_RB_COLORCONTROL);
	*cmds++ = 0x00000c20;

	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 4);
	*cmds++ = CP_REG(REG_VGT_MAX_VTX_INDX);
	*cmds++ = 0x00ffffff;	
	*cmds++ = 0x0;		
	*cmds++ = 0x00000000;	

	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
	*cmds++ = CP_REG(REG_VGT_VERTEX_REUSE_BLOCK_CNTL);
	*cmds++ = 0x00000002;	
	*cmds++ = 0x00000002;	

	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
	*cmds++ = CP_REG(REG_SQ_INTERPOLATOR_CNTL);
	*cmds++ = 0xffffffff;	

	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
	*cmds++ = CP_REG(REG_PA_SC_AA_CONFIG);
	*cmds++ = 0x00000000;	

	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
	*cmds++ = CP_REG(REG_PA_SU_SC_MODE_CNTL);
	*cmds++ = 0x00080240;

	
	*cmds++ =
	    cp_type3_packet(CP_SET_CONSTANT, (SYS2GMEM_TEX_CONST_LEN + 1));
	*cmds++ = (0x1 << 16) | (0 * 6);
	memcpy(cmds, sys2gmem_tex_const, SYS2GMEM_TEX_CONST_LEN << 2);
	cmds[0] |= (shadow->pitch >> 5) << 22;
	cmds[1] |=
	    shadow->gmemshadow.gpuaddr | surface_format_table[shadow->format];
	cmds[2] |= (shadow->width - 1) | (shadow->height - 1) << 13;
	cmds += SYS2GMEM_TEX_CONST_LEN;

	
	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
	*cmds++ = CP_REG(REG_RB_SURFACE_INFO);
	*cmds++ = shadow->gmem_pitch;	

	*cmds++ =
	    (shadow->
	     format << RB_COLOR_INFO__COLOR_FORMAT__SHIFT) | tmp_ctx.gmem_base;

	
	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
	*cmds++ = CP_REG(REG_RB_DEPTHCONTROL);

	if (adreno_is_a22x(adreno_dev))
		*cmds++ = 8;		
	else
		*cmds++ = 0;		

	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
	*cmds++ = CP_REG(REG_PA_SC_SCREEN_SCISSOR_TL);
	*cmds++ = (0 << 16) | 0;
	*cmds++ = ((0x1fff) << 16) | 0x1fff;
	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
	*cmds++ = CP_REG(REG_PA_SC_WINDOW_SCISSOR_TL);
	*cmds++ = (unsigned int)((1U << 31) | (0 << 16) | 0);
	*cmds++ = ((0x1fff) << 16) | 0x1fff;

	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
	*cmds++ = CP_REG(REG_PA_CL_VTE_CNTL);
	
	*cmds++ = 0x00000b00;

	
	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
	*cmds++ = CP_REG(REG_PA_CL_VPORT_ZSCALE);
	*cmds++ = 0xbf800000;
	*cmds++ = 0x0;

	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
	*cmds++ = CP_REG(REG_RB_COLOR_MASK);
	*cmds++ = 0x0000000f;	

	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
	*cmds++ = CP_REG(REG_RB_COLOR_DEST_MASK);
	*cmds++ = 0xffffffff;

	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
	*cmds++ = CP_REG(REG_SQ_WRAPPING_0);
	*cmds++ = 0x00000000;
	*cmds++ = 0x00000000;

	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
	*cmds++ = CP_REG(REG_RB_MODECONTROL);
	
	*cmds++ = 0x4;

	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
	*cmds++ = CP_REG(REG_PA_CL_CLIP_CNTL);
	*cmds++ = 0x00010000;

	if (adreno_is_a22x(adreno_dev)) {
		*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
		*cmds++ = CP_REG(REG_A220_RB_LRZ_VSC_CONTROL);
		*cmds++ = 0x0000000;

		*cmds++ = cp_type3_packet(CP_DRAW_INDX, 3);
		*cmds++ = 0;           
		
		*cmds++ = 0x00004088;
		*cmds++ = 3;	       
	} else {
		
		*cmds++ = cp_type3_packet(CP_DRAW_INDX, 2);
		*cmds++ = 0;		
		
		*cmds++ = 0x00030088;
	}

	
	create_ib1(drawctxt, shadow->gmem_restore, start, cmds);

	return cmds;
}
static unsigned int *build_gmem2sys_cmds(struct adreno_device *adreno_dev,
					 struct adreno_context *drawctxt,
					 struct gmem_shadow_t *shadow)
{
	unsigned int *cmds = shadow->gmem_save_commands;
	unsigned int *start = cmds;
	
	unsigned int bytesperpixel = format2bytesperpixel[shadow->format];
	unsigned int addr = shadow->gmemshadow.gpuaddr;
	unsigned int offset = (addr - (addr & 0xfffff000)) / bytesperpixel;

	if (!(drawctxt->flags & CTXT_FLAGS_PREAMBLE)) {
		
		*cmds++ = cp_type3_packet(CP_REG_TO_MEM, 2);
		*cmds++ = REG_TP0_CHICKEN;

		*cmds++ = tmp_ctx.chicken_restore;

		*cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
		*cmds++ = 0;
	}

	
	*cmds++ = cp_type0_packet(REG_TP0_CHICKEN, 1);
	*cmds++ = 0x00000000;

	
	*cmds++ = cp_type0_packet(REG_PA_SC_AA_CONFIG, 1);
	*cmds++ = 0x00000000;

	

	
	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 4);
	*cmds++ = (0x1 << 16) | SHADER_CONST_ADDR;
	*cmds++ = 0;
	
	*cmds++ = shadow->quad_vertices.gpuaddr | 0x3;
	
	*cmds++ = 0x00000030;

	
	*cmds++ = cp_type0_packet(REG_TC_CNTL_STATUS, 1);
	*cmds++ = 0x1;

	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 4);
	*cmds++ = CP_REG(REG_VGT_MAX_VTX_INDX);
	*cmds++ = 0x00ffffff;	
	*cmds++ = 0x0;		
	*cmds++ = 0x00000000;	

	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
	*cmds++ = CP_REG(REG_PA_SC_AA_MASK);
	*cmds++ = 0x0000ffff;	

	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
	*cmds++ = CP_REG(REG_RB_COLORCONTROL);
	*cmds++ = 0x00000c20;

	
	*cmds++ = cp_type0_packet(REG_SQ_INST_STORE_MANAGMENT, 1);
	*cmds++ = adreno_dev->pix_shader_start;

	
	*cmds++ = cp_type3_packet(CP_INVALIDATE_STATE, 1);
	*cmds++ = 0x00003F00;

	*cmds++ = cp_type3_packet(CP_SET_SHADER_BASES, 1);
	*cmds++ = adreno_encode_istore_size(adreno_dev)
		  | adreno_dev->pix_shader_start;

	
	cmds = program_shader(cmds, 0, gmem2sys_vtx_pgm, GMEM2SYS_VTX_PGM_LEN);

	
	cmds =
	    program_shader(cmds, 1, gmem2sys_frag_pgm, GMEM2SYS_FRAG_PGM_LEN);

	
	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
	*cmds++ = CP_REG(REG_SQ_PROGRAM_CNTL);
	if (adreno_is_a22x(adreno_dev))
		*cmds++ = 0x10018001;
	else
		*cmds++ = 0x10010001;
	*cmds++ = 0x00000008;

	

	
	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
	*cmds++ = CP_REG(REG_PA_CL_VTE_CNTL);
	
	*cmds++ = 0x00000b00;

	
	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
	*cmds++ = CP_REG(REG_RB_SURFACE_INFO);
	*cmds++ = shadow->gmem_pitch;	

	
	BUG_ON(tmp_ctx.gmem_base & 0xFFF);
	*cmds++ =
	    (shadow->
	     format << RB_COLOR_INFO__COLOR_FORMAT__SHIFT) | tmp_ctx.gmem_base;

	
	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
	*cmds++ = CP_REG(REG_RB_DEPTHCONTROL);
	if (adreno_is_a22x(adreno_dev))
		*cmds++ = 0x08;
	else
		*cmds++ = 0;

	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
	*cmds++ = CP_REG(REG_PA_SU_SC_MODE_CNTL);
	*cmds++ = 0x00080240;

	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
	*cmds++ = CP_REG(REG_PA_SC_SCREEN_SCISSOR_TL);
	*cmds++ = (0 << 16) | 0;
	*cmds++ = (0x1fff << 16) | (0x1fff);
	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
	*cmds++ = CP_REG(REG_PA_SC_WINDOW_SCISSOR_TL);
	*cmds++ = (unsigned int)((1U << 31) | (0 << 16) | 0);
	*cmds++ = (0x1fff << 16) | (0x1fff);

	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
	*cmds++ = CP_REG(REG_PA_CL_VPORT_ZSCALE);
	*cmds++ = 0xbf800000;	
	*cmds++ = 0x0;

	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
	*cmds++ = CP_REG(REG_RB_COLOR_MASK);
	*cmds++ = 0x0000000f;	

	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
	*cmds++ = CP_REG(REG_RB_COLOR_DEST_MASK);
	*cmds++ = 0xffffffff;

	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
	*cmds++ = CP_REG(REG_SQ_WRAPPING_0);
	*cmds++ = 0x00000000;
	*cmds++ = 0x00000000;


	
	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 6);
	*cmds++ = CP_REG(REG_RB_COPY_CONTROL);
	*cmds++ = 0;		
	*cmds++ = addr & 0xfffff000;	
	*cmds++ = shadow->pitch >> 5;	

	*cmds++ = 0x0003c008 |
	    (shadow->format << RB_COPY_DEST_INFO__COPY_DEST_FORMAT__SHIFT);
	
	BUG_ON(offset & 0xfffff000);
	*cmds++ = offset;

	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
	*cmds++ = CP_REG(REG_RB_MODECONTROL);
	*cmds++ = 0x6;		

	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
	*cmds++ = CP_REG(REG_PA_CL_CLIP_CNTL);
	*cmds++ = 0x00010000;

	if (adreno_is_a22x(adreno_dev)) {
		*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
		*cmds++ = CP_REG(REG_A220_RB_LRZ_VSC_CONTROL);
		*cmds++ = 0x0000000;

		*cmds++ = cp_type3_packet(CP_DRAW_INDX, 3);
		*cmds++ = 0;           
		
		*cmds++ = 0x00004088;
		*cmds++ = 3;	       
	} else {
		
		*cmds++ = cp_type3_packet(CP_DRAW_INDX, 2);
		*cmds++ = 0;		
		
		*cmds++ = 0x00030088;
	}

	
	create_ib1(drawctxt, shadow->gmem_save, start, cmds);

	return cmds;
}
static void
build_shader_save_restore_cmds(struct adreno_device *adreno_dev,
				struct adreno_context *drawctxt)
{
	unsigned int *cmd = tmp_ctx.cmd;
	unsigned int *save, *restore, *fixup;
	unsigned int *startSizeVtx, *startSizePix, *startSizeShared;
	unsigned int *partition1;
	unsigned int *shaderBases, *partition2;

	
	tmp_ctx.shader_vertex = drawctxt->gpustate.gpuaddr + SHADER_OFFSET;
	tmp_ctx.shader_pixel = tmp_ctx.shader_vertex
				+ _shader_shadow_size(adreno_dev);
	tmp_ctx.shader_shared = tmp_ctx.shader_pixel
				+  _shader_shadow_size(adreno_dev);

	

	restore = cmd;		

	
	*cmd++ = cp_type3_packet(CP_INVALIDATE_STATE, 1);
	*cmd++ = 0x00000300;	

	
	*cmd++ = cp_type3_packet(CP_SET_SHADER_BASES, 1);
	shaderBases = cmd++;	

	
	*cmd++ = cp_type0_packet(REG_SQ_INST_STORE_MANAGMENT, 1);
	partition1 = cmd++;	

	
	*cmd++ = cp_type3_packet(CP_IM_LOAD, 2);
	*cmd++ = tmp_ctx.shader_vertex + 0x0;	
	startSizeVtx = cmd++;	

	
	*cmd++ = cp_type3_packet(CP_IM_LOAD, 2);
	*cmd++ = tmp_ctx.shader_pixel + 0x1;	
	startSizePix = cmd++;	

	
	*cmd++ = cp_type3_packet(CP_IM_LOAD, 2);
	*cmd++ = tmp_ctx.shader_shared + 0x2;	
	startSizeShared = cmd++;	

	
	create_ib1(drawctxt, drawctxt->shader_restore, restore, cmd);

	/*
	 *  fixup SET_SHADER_BASES data
	 *
	 *  since self-modifying PM4 code is being used here, a seperate
	 *  command buffer is used for this fixup operation, to ensure the
	 *  commands are not read by the PM4 engine before the data fields
	 *  have been written.
	 */

	fixup = cmd;		

	
	*cmd++ = cp_type0_packet(REG_SCRATCH_REG2, 1);
	partition2 = cmd++;	

	
	*cmd++ = cp_type3_packet(CP_REG_RMW, 3);
	*cmd++ = REG_SCRATCH_REG2;
	
	*cmd++ = 0x0FFF0FFF;
	
	*cmd++ = adreno_encode_istore_size(adreno_dev);

	
	*cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
	*cmd++ = REG_SCRATCH_REG2;
	
	*cmd++ = virt2gpu(shaderBases, &drawctxt->gpustate);

	
	create_ib1(drawctxt, drawctxt->shader_fixup, fixup, cmd);

	

	save = cmd;		

	*cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
	*cmd++ = 0;

	*cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
	*cmd++ = REG_SQ_INST_STORE_MANAGMENT;
	
	*cmd++ = virt2gpu(partition1, &drawctxt->gpustate);
	*cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
	*cmd++ = REG_SQ_INST_STORE_MANAGMENT;
	
	*cmd++ = virt2gpu(partition2, &drawctxt->gpustate);


	
	*cmd++ = cp_type3_packet(CP_IM_STORE, 2);
	*cmd++ = tmp_ctx.shader_vertex + 0x0;	
	
	*cmd++ = virt2gpu(startSizeVtx, &drawctxt->gpustate);

	
	*cmd++ = cp_type3_packet(CP_IM_STORE, 2);
	*cmd++ = tmp_ctx.shader_pixel + 0x1;	
	
	*cmd++ = virt2gpu(startSizePix, &drawctxt->gpustate);

	

	*cmd++ = cp_type3_packet(CP_IM_STORE, 2);
	*cmd++ = tmp_ctx.shader_shared + 0x2;	
	
	*cmd++ = virt2gpu(startSizeShared, &drawctxt->gpustate);


	*cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
	*cmd++ = 0;

	
	create_ib1(drawctxt, drawctxt->shader_save, save, cmd);

	tmp_ctx.cmd = cmd;
}