void gen9_render_copyfunc(struct intel_batchbuffer *batch,
			  drm_intel_context *context,
			  struct igt_buf *src, unsigned src_x, unsigned src_y,
			  unsigned width, unsigned height,
			  struct igt_buf *dst, unsigned dst_x, unsigned dst_y)
{
	uint32_t ps_sampler_state, ps_kernel_off, ps_binding_table;
	uint32_t scissor_state;
	uint32_t vertex_buffer;
	uint32_t batch_end;

	intel_batchbuffer_flush_with_context(batch, context);

	batch_align(batch, 8);

	batch->ptr = &batch->buffer[BATCH_STATE_SPLIT];

	annotation_init(&aub_annotations);

	ps_binding_table  = gen8_bind_surfaces(batch, src, dst);
	ps_sampler_state  = gen8_create_sampler(batch);
	ps_kernel_off = gen8_fill_ps(batch, ps_kernel, sizeof(ps_kernel));
	vertex_buffer = gen7_fill_vertex_buffer_data(batch, src,
						     src_x, src_y,
						     dst_x, dst_y,
						     width, height);
	cc.cc_state = gen6_create_cc_state(batch);
	cc.blend_state = gen8_create_blend_state(batch);
	viewport.cc_state = gen6_create_cc_viewport(batch);
	viewport.sf_clip_state = gen7_create_sf_clip_viewport(batch);
	scissor_state = gen6_create_scissor_rect(batch);
	/* TODO: theree is other state which isn't setup */

	assert(batch->ptr < &batch->buffer[4095]);

	batch->ptr = batch->buffer;

	/* Start emitting the commands. The order roughly follows the mesa blorp
	 * order */
	OUT_BATCH(GEN6_PIPELINE_SELECT | PIPELINE_SELECT_3D |
				GEN9_PIPELINE_SELECTION_MASK);

	gen8_emit_sip(batch);

	gen7_emit_push_constants(batch);

	gen9_emit_state_base_address(batch);

	OUT_BATCH(GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC);
	OUT_BATCH(viewport.cc_state);
	OUT_BATCH(GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP);
	OUT_BATCH(viewport.sf_clip_state);

	gen7_emit_urb(batch);

	gen8_emit_cc(batch);

	gen8_emit_multisample(batch);

	gen8_emit_null_state(batch);

	OUT_BATCH(GEN7_3DSTATE_STREAMOUT | (5 - 2));
	OUT_BATCH(0);
	OUT_BATCH(0);
	OUT_BATCH(0);
	OUT_BATCH(0);

	gen7_emit_clip(batch);

	gen8_emit_sf(batch);

	gen8_emit_ps(batch, ps_kernel_off);

	OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS);
	OUT_BATCH(ps_binding_table);

	OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS);
	OUT_BATCH(ps_sampler_state);

	OUT_BATCH(GEN6_3DSTATE_SCISSOR_STATE_POINTERS);
	OUT_BATCH(scissor_state);

	gen9_emit_depth(batch);

	gen7_emit_clear(batch);

	gen6_emit_drawing_rectangle(batch, dst);

	gen7_emit_vertex_buffer(batch, vertex_buffer);
	gen6_emit_vertex_elements(batch);

	gen8_emit_vf_topology(batch);
	gen8_emit_primitive(batch, vertex_buffer);

	OUT_BATCH(MI_BATCH_BUFFER_END);

	batch_end = batch_align(batch, 8);
	assert(batch_end < BATCH_STATE_SPLIT);
	annotation_add_batch(&aub_annotations, batch_end);

	dump_batch(batch);

	annotation_flush(&aub_annotations, batch);

	gen6_render_flush(batch, context, batch_end);
	intel_batchbuffer_reset(batch);
}
void gen6_render_copyfunc(struct intel_batchbuffer *batch,
			  struct scratch_buf *src, unsigned src_x, unsigned src_y,
			  unsigned width, unsigned height,
			  struct scratch_buf *dst, unsigned dst_x, unsigned dst_y)
{
	uint32_t wm_state, wm_kernel, wm_table;
	uint32_t cc_vp, cc_blend, offset;
	uint32_t batch_end;

	intel_batchbuffer_flush(batch);

	batch->ptr = batch->buffer + 1024;
	batch_alloc(batch, 64, 64);
	wm_table  = gen6_bind_surfaces(batch, src, dst);
	wm_kernel = gen6_create_kernel(batch);
	wm_state  = gen6_create_sampler(batch,
					SAMPLER_FILTER_NEAREST,
					SAMPLER_EXTEND_NONE);

	cc_vp = gen6_create_cc_viewport(batch);
	cc_blend = gen6_create_cc_blend(batch);

	batch->ptr = batch->buffer;

	gen6_emit_invariant(batch);
	gen6_emit_state_base_address(batch);

	gen6_emit_sip(batch);
	gen6_emit_urb(batch);

	gen6_emit_viewports(batch, cc_vp);
	gen6_emit_vs(batch);
	gen6_emit_gs(batch);
	gen6_emit_clip(batch);
	gen6_emit_wm_constants(batch);
	gen6_emit_null_depth_buffer(batch);

	gen6_emit_drawing_rectangle(batch, dst);
	gen6_emit_cc(batch, cc_blend);
	gen6_emit_sampler(batch, wm_state);
	gen6_emit_sf(batch);
	gen6_emit_wm(batch, wm_kernel);
	gen6_emit_vertex_elements(batch);
	gen6_emit_binding_table(batch, wm_table);

	gen6_emit_vertex_buffer(batch);
	offset = gen6_emit_primitive(batch);

	OUT_BATCH(MI_BATCH_BUFFER_END);
	batch_end = batch_align(batch, 8);

	*(uint32_t*)(batch->buffer + offset) =
		batch_round_upto(batch, VERTEX_SIZE)/VERTEX_SIZE;

	emit_vertex_2s(batch, dst_x + width, dst_y + height);
	emit_vertex_normalized(batch, src_x + width, buf_width(src));
	emit_vertex_normalized(batch, src_y + height, buf_height(src));

	emit_vertex_2s(batch, dst_x, dst_y + height);
	emit_vertex_normalized(batch, src_x, buf_width(src));
	emit_vertex_normalized(batch, src_y + height, buf_height(src));

	emit_vertex_2s(batch, dst_x, dst_y);
	emit_vertex_normalized(batch, src_x, buf_width(src));
	emit_vertex_normalized(batch, src_y, buf_height(src));

	gen6_render_flush(batch, batch_end);
	intel_batchbuffer_reset(batch);
}