Beispiel #1
0
static void draw_rect_mmap_wc(int fd, struct buf_data *buf, struct rect *rect,
			      uint32_t color)
{
	uint32_t *ptr;
	uint32_t tiling, swizzle;

	gem_set_domain(fd, buf->handle, I915_GEM_DOMAIN_GTT,
		       I915_GEM_DOMAIN_GTT);
	gem_get_tiling(fd, buf->handle, &tiling, &swizzle);

	/* We didn't implement suport for the older tiling methods yet. */
	if (tiling != I915_TILING_NONE)
		igt_require(intel_gen(intel_get_drm_devid(fd)) >= 5);

	ptr = gem_mmap__wc(fd, buf->handle, 0, buf->size,
			   PROT_READ | PROT_WRITE);

	switch (tiling) {
	case I915_TILING_NONE:
		draw_rect_ptr_linear(ptr, buf->stride, rect, color, buf->bpp);
		break;
	case I915_TILING_X:
		draw_rect_ptr_tiled(ptr, buf->stride, swizzle, rect, color,
				    buf->bpp);
		break;
	default:
		igt_assert(false);
		break;
	}

	igt_assert(munmap(ptr, buf->size) == 0);
}
static void
check_test_requirements(int fd, int ringid)
{
	gem_require_ring(fd, ringid);
	igt_skip_on_f(intel_gen(devid) == 6 && ringid == I915_EXEC_BSD,
		      "MI_STORE_DATA broken on gen6 bsd\n");
}
static void run_test(int fd, unsigned ring, unsigned flags)
{
	const int gen = intel_gen(intel_get_drm_devid(fd));
	const uint32_t bbe = MI_BATCH_BUFFER_END;
	struct drm_i915_gem_exec_object2 obj[2];
	struct drm_i915_gem_relocation_entry reloc[1024];
	struct drm_i915_gem_execbuffer2 execbuf;
	struct igt_hang_ring hang;
	uint32_t *batch, *b;
	int i;

	gem_require_ring(fd, ring);
	igt_skip_on_f(gen == 6 && (ring & ~(3<<13)) == I915_EXEC_BSD,
		      "MI_STORE_DATA broken on gen6 bsd\n");

	gem_quiescent_gpu(fd);

	memset(&execbuf, 0, sizeof(execbuf));
	execbuf.buffers_ptr = (uintptr_t)obj;
	execbuf.buffer_count = 2;
	execbuf.flags = ring | (1 << 11);
	if (gen < 6)
		execbuf.flags |= I915_EXEC_SECURE;

	memset(obj, 0, sizeof(obj));
	obj[0].handle = gem_create(fd, 4096);
	obj[0].flags |= EXEC_OBJECT_WRITE;
	obj[1].handle = gem_create(fd, 1024*16 + 4096);
	gem_write(fd, obj[1].handle, 0, &bbe, sizeof(bbe));
	igt_require(__gem_execbuf(fd, &execbuf) == 0);

	obj[1].relocs_ptr = (uintptr_t)reloc;
	obj[1].relocation_count = 1024;

	batch = gem_mmap__cpu(fd, obj[1].handle, 0, 16*1024 + 4096,
			      PROT_WRITE | PROT_READ);
	gem_set_domain(fd, obj[1].handle,
		       I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU);

	memset(reloc, 0, sizeof(reloc));
	b = batch;
	for (i = 0; i < 1024; i++) {
		uint64_t offset;

		reloc[i].target_handle = obj[0].handle;
		reloc[i].presumed_offset = obj[0].offset;
		reloc[i].offset = (b - batch + 1) * sizeof(*batch);
		reloc[i].delta = i * sizeof(uint32_t);
		reloc[i].read_domains = I915_GEM_DOMAIN_INSTRUCTION;
		reloc[i].write_domain = I915_GEM_DOMAIN_INSTRUCTION;

		offset = obj[0].offset + reloc[i].delta;
		*b++ = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
		if (gen >= 8) {
			*b++ = offset;
			*b++ = offset >> 32;
		} else if (gen >= 4) {
Beispiel #4
0
static int gem_linear_blt(int fd,
                          uint32_t *batch,
                          uint32_t src,
                          uint32_t dst,
                          uint32_t length,
                          struct drm_i915_gem_relocation_entry *reloc)
{
    uint32_t *b = batch;

    *b++ = COPY_BLT_CMD | BLT_WRITE_ALPHA | BLT_WRITE_RGB;
    *b++ = 0x66 << 16 | 1 << 25 | 1 << 24 | (4*1024);
    *b++ = 0;
    *b++ = (length / (4*1024)) << 16 | 1024;
    *b++ = 0;
    reloc->offset = (b-batch-1) * sizeof(uint32_t);
    reloc->delta = 0;
    reloc->target_handle = dst;
    reloc->read_domains = I915_GEM_DOMAIN_RENDER;
    reloc->write_domain = I915_GEM_DOMAIN_RENDER;
    reloc->presumed_offset = 0;
    reloc++;
    if (intel_gen(intel_get_drm_devid(fd)) >= 8)
        *b++ = 0; /* FIXME */

    *b++ = 0;
    *b++ = 4*1024;
    *b++ = 0;
    reloc->offset = (b-batch-1) * sizeof(uint32_t);
    reloc->delta = 0;
    reloc->target_handle = src;
    reloc->read_domains = I915_GEM_DOMAIN_RENDER;
    reloc->write_domain = 0;
    reloc->presumed_offset = 0;
    reloc++;
    if (intel_gen(intel_get_drm_devid(fd)) >= 8)
        *b++ = 0; /* FIXME */

    *b++ = MI_BATCH_BUFFER_END;
    *b++ = 0;

    return (b - batch) * sizeof(uint32_t);
}
Beispiel #5
0
static void draw_rect_blt(int fd, struct cmd_data *cmd_data,
			  struct buf_data *buf, struct rect *rect,
			  uint32_t color)
{
	drm_intel_bo *dst;
	struct intel_batchbuffer *batch;
	int blt_cmd_len, blt_cmd_tiling, blt_cmd_depth;
	uint32_t devid = intel_get_drm_devid(fd);
	int gen = intel_gen(devid);
	uint32_t tiling, swizzle;
	int pitch;

	gem_get_tiling(fd, buf->handle, &tiling, &swizzle);

	dst = gem_handle_to_libdrm_bo(cmd_data->bufmgr, fd, "", buf->handle);
	igt_assert(dst);

	batch = intel_batchbuffer_alloc(cmd_data->bufmgr, devid);
	igt_assert(batch);

	switch (buf->bpp) {
	case 8:
		blt_cmd_depth = 0;
		break;
	case 16: /* we're assuming 565 */
		blt_cmd_depth = 1 << 24;
		break;
	case 32:
		blt_cmd_depth = 3 << 24;
		break;
	default:
		igt_assert(false);
	}

	blt_cmd_len = (gen >= 8) ?  0x5 : 0x4;
	blt_cmd_tiling = (tiling) ? XY_COLOR_BLT_TILED : 0;
	pitch = (tiling) ? buf->stride / 4 : buf->stride;

	BEGIN_BATCH(6, 1);
	OUT_BATCH(XY_COLOR_BLT_CMD_NOLEN | XY_COLOR_BLT_WRITE_ALPHA |
		  XY_COLOR_BLT_WRITE_RGB | blt_cmd_tiling | blt_cmd_len);
	OUT_BATCH(blt_cmd_depth | (0xF0 << 16) | pitch);
	OUT_BATCH((rect->y << 16) | rect->x);
	OUT_BATCH(((rect->y + rect->h) << 16) | (rect->x + rect->w));
	OUT_RELOC_FENCED(dst, 0, I915_GEM_DOMAIN_RENDER, 0);
	OUT_BATCH(color);
	ADVANCE_BATCH();

	intel_batchbuffer_flush(batch);
	intel_batchbuffer_free(batch);
}
Beispiel #6
0
static void draw_rect_pwrite_tiled(int fd, struct buf_data *buf,
				   struct rect *rect, uint32_t color,
				   uint32_t swizzle)
{
	int i;
	int tiled_pos, x, y, pixel_size;
	uint8_t tmp[4096];
	int tmp_used = 0, tmp_size;
	bool flush_tmp = false;
	int tmp_start_pos = 0;
	int pixels_written = 0;

	/* We didn't implement suport for the older tiling methods yet. */
	igt_require(intel_gen(intel_get_drm_devid(fd)) >= 5);

	pixel_size = buf->bpp / 8;
	tmp_size = sizeof(tmp) / pixel_size;

	/* Instead of doing one pwrite per pixel, we try to group the maximum
	 * amount of consecutive pixels we can in a single pwrite: that's why we
	 * use the "tmp" variables. */
	for (i = 0; i < tmp_size; i++)
		set_pixel(tmp, i, color, buf->bpp);

	for (tiled_pos = 0; tiled_pos < buf->size; tiled_pos += pixel_size) {
		tiled_pos_to_x_y_linear(tiled_pos, buf->stride, swizzle,
					buf->bpp, &x, &y);

		if (x >= rect->x && x < rect->x + rect->w &&
		    y >= rect->y && y < rect->y + rect->h) {
			if (tmp_used == 0)
				tmp_start_pos = tiled_pos;
			tmp_used++;
		} else {
			flush_tmp = true;
		}

		if (tmp_used == tmp_size || (flush_tmp && tmp_used > 0) ||
		    tiled_pos + pixel_size >= buf->size) {
			gem_write(fd, buf->handle, tmp_start_pos, tmp,
				  tmp_used * pixel_size);
			flush_tmp = false;
			pixels_written += tmp_used;
			tmp_used = 0;

			if (pixels_written == rect->w * rect->h)
				break;
		}
	}
}
static void test_connector(const char *test_name,
			   struct kmstest_connector_config *cconf,
			   enum test_flags flags)
{
	const uint32_t *formats;
	int format_count;
	int i;

	igt_get_all_formats(&formats, &format_count);
	for (i = 0; i < format_count; i++) {
		if (intel_gen(intel_get_drm_devid(drm_fd)) < 4
		    && formats[i] == DRM_FORMAT_XRGB2101010) {
			igt_info("gen2/3 don't support 10bpc, skipping\n");
			continue;
		}

		test_format(test_name,
			    cconf, &cconf->connector->modes[0],
			    formats[i], flags);
	}
}
static unsigned get_num_contexts(int fd)
{
	uint64_t ggtt_size;
	unsigned size;
	unsigned count;

	/* Compute the number of contexts we can allocate to fill the GGTT */
	if (intel_gen(intel_get_drm_devid(fd)) >= 8)
		ggtt_size = 1ull << 32;
	else
		ggtt_size = 1ull << 31;

	size = 64 << 10; /* Most gen require at least 64k for ctx */

	count = 3 * (ggtt_size / size) / 2;
	igt_info("Creating %lld contexts (assuming of size %lld)\n",
		 (long long)count, (long long)size);

	intel_require_memory(count, size, CHECK_RAM | CHECK_SWAP);
	return count;
}
static void write_dword(int fd,
			uint32_t target_handle,
			uint64_t target_offset,
			uint32_t value)
{
	int gen = intel_gen(intel_get_drm_devid(fd));
	struct drm_i915_gem_execbuffer2 execbuf;
	struct drm_i915_gem_exec_object2 obj[2];
	struct drm_i915_gem_relocation_entry reloc;
	uint32_t buf[16];
	int i;

	memset(obj, 0, sizeof(obj));
	obj[0].handle = target_handle;
	obj[1].handle = gem_create(fd, 4096);

	i = 0;
	buf[i++] = MI_STORE_DWORD_IMM | (gen < 6 ? 1<<22 : 0);
	if (gen >= 8) {
		buf[i++] = target_offset;
		buf[i++] = target_offset >> 32;
	} else if (gen >= 4) {
struct intel_register_map
intel_get_register_map(uint32_t devid)
{
	struct intel_register_map map;
	const int gen = intel_gen(devid);

	if (gen >= 6) {
		map.map = gen6_gt_register_map;
		map.top = 0x180000;
	} else if (IS_BROADWATER(devid) || IS_CRESTLINE(devid)) {
		map.map = gen_bwcl_register_map;
		map.top = 0x80000;
	} else if (gen >= 4) {
		map.map = gen4_register_map;
		map.top = 0x80000;
	} else {
		igt_fail_on("Gen2/3 Ranges are not supported. Please use ""unsafe access.");
	}

	map.alignment_mask = 0x3;

	return map;
}
/**
 * intel_register_write:
 * @reg: register offset
 * @val: value to write
 *
 * 32-bit write to the register at @offset. This function only works when the new
 * register access helper is initialized with intel_register_access_init().
 *
 * Compared to OUTREG() it can do optional checking with the register access
 * white lists.
 */
void
intel_register_write(uint32_t reg, uint32_t val)
{
	struct intel_register_range *range;

	igt_assert(mmio_data.inited);

	if (intel_gen(mmio_data.i915_devid) >= 6)
		igt_assert(mmio_data.key != -1);

	if (!mmio_data.safe)
		goto write_out;

	range = intel_get_register_range(mmio_data.map,
					 reg,
					 INTEL_RANGE_WRITE);

	igt_warn_on_f(!range,
		      "Register write blocked for safety ""(*0x%08x = 0x%x)\n", reg, val);

write_out:
	*(volatile uint32_t *)((volatile char *)igt_global_mmio + reg) = val;
}
static void
copy(int fd, uint32_t dst, uint32_t src, unsigned int error)
{
	uint32_t batch[12];
	struct drm_i915_gem_relocation_entry reloc[2];
	struct drm_i915_gem_exec_object2 obj[3];
	struct drm_i915_gem_execbuffer2 exec;
	uint32_t handle;
	int ret, i=0;

	batch[i++] = XY_SRC_COPY_BLT_CMD |
		  XY_SRC_COPY_BLT_WRITE_ALPHA |
		  XY_SRC_COPY_BLT_WRITE_RGB;
	if (intel_gen(intel_get_drm_devid(fd)) >= 8)
		batch[i - 1] |= 8;
	else
		batch[i - 1] |= 6;

	batch[i++] = (3 << 24) | /* 32 bits */
		  (0xcc << 16) | /* copy ROP */
		  WIDTH*4;
	batch[i++] = 0; /* dst x1,y1 */
	batch[i++] = (HEIGHT << 16) | WIDTH; /* dst x2,y2 */
	batch[i++] = 0; /* dst reloc */
	if (intel_gen(intel_get_drm_devid(fd)) >= 8)
		batch[i++] = 0;
	batch[i++] = 0; /* src x1,y1 */
	batch[i++] = WIDTH*4;
	batch[i++] = 0; /* src reloc */
	if (intel_gen(intel_get_drm_devid(fd)) >= 8)
		batch[i++] = 0;
	batch[i++] = MI_BATCH_BUFFER_END;
	batch[i++] = MI_NOOP;

	handle = gem_create(fd, 4096);
	gem_write(fd, handle, 0, batch, sizeof(batch));

	reloc[0].target_handle = dst;
	reloc[0].delta = 0;
	reloc[0].offset = 4 * sizeof(batch[0]);
	reloc[0].presumed_offset = 0;
	reloc[0].read_domains = I915_GEM_DOMAIN_RENDER;
	reloc[0].write_domain = I915_GEM_DOMAIN_RENDER;

	reloc[1].target_handle = src;
	reloc[1].delta = 0;
	reloc[1].offset = 7 * sizeof(batch[0]);
	if (intel_gen(intel_get_drm_devid(fd)) >= 8)
		reloc[1].offset += sizeof(batch[0]);
	reloc[1].presumed_offset = 0;
	reloc[1].read_domains = I915_GEM_DOMAIN_RENDER;
	reloc[1].write_domain = 0;

	memset(obj, 0, sizeof(obj));
	exec.buffer_count = 0;
	obj[exec.buffer_count++].handle = dst;
	if (src != dst)
		obj[exec.buffer_count++].handle = src;
	obj[exec.buffer_count].handle = handle;
	obj[exec.buffer_count].relocation_count = 2;
	obj[exec.buffer_count].relocs_ptr = (uintptr_t)reloc;
	exec.buffer_count++;
	exec.buffers_ptr = (uintptr_t)obj;

	exec.batch_start_offset = 0;
	exec.batch_len = i * 4;
	exec.DR1 = exec.DR4 = 0;
	exec.num_cliprects = 0;
	exec.cliprects_ptr = 0;
	exec.flags = HAS_BLT_RING(intel_get_drm_devid(fd)) ? I915_EXEC_BLT : 0;
	i915_execbuffer2_set_context_id(exec, 0);
	exec.rsvd2 = 0;

	ret = drmIoctl(fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &exec);
	if (ret)
		ret = errno;

	if (error == ~0)
		igt_assert_neq(ret, 0);
	else
		igt_assert(ret == error);

	gem_close(fd, handle);
}
Beispiel #13
0
static uint32_t busy_blt(int fd)
{
	const int gen = intel_gen(intel_get_drm_devid(fd));
	const int has_64bit_reloc = gen >= 8;
	struct drm_i915_gem_execbuffer2 execbuf;
	struct drm_i915_gem_exec_object2 object[2];
	struct drm_i915_gem_relocation_entry reloc[200], *r;
	uint32_t read, write;
	uint32_t *map;
	int factor = 100;
	int i = 0;

	memset(object, 0, sizeof(object));
	object[0].handle = gem_create(fd, 1024*1024);
	object[1].handle = gem_create(fd, 4096);

	r = memset(reloc, 0, sizeof(reloc));
	map = gem_mmap__cpu(fd, object[1].handle, 0, 4096, PROT_WRITE);
	gem_set_domain(fd, object[1].handle,
		       I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU);

#define COPY_BLT_CMD		(2<<29|0x53<<22|0x6)
#define BLT_WRITE_ALPHA		(1<<21)
#define BLT_WRITE_RGB		(1<<20)
	while (factor--) {
		/* XY_SRC_COPY */
		map[i++] = COPY_BLT_CMD | BLT_WRITE_ALPHA | BLT_WRITE_RGB;
		if (has_64bit_reloc)
			map[i-1] += 2;
		map[i++] = 0xcc << 16 | 1 << 25 | 1 << 24 | (4*1024);
		map[i++] = 0;
		map[i++] = 256 << 16 | 1024;

		r->offset = i * sizeof(uint32_t);
		r->target_handle = object[0].handle;
		r->read_domains = I915_GEM_DOMAIN_RENDER;
		r->write_domain = I915_GEM_DOMAIN_RENDER;
		r++;
		map[i++] = 0;
		if (has_64bit_reloc)
			map[i++] = 0;

		map[i++] = 0;
		map[i++] = 4096;

		r->offset = i * sizeof(uint32_t);
		r->target_handle = object[0].handle;
		r->read_domains = I915_GEM_DOMAIN_RENDER;
		r->write_domain = 0;
		r++;
		map[i++] = 0;
		if (has_64bit_reloc)
			map[i++] = 0;
	}
	map[i++] = MI_BATCH_BUFFER_END;
	igt_assert(i <= 4096/sizeof(uint32_t));
	igt_assert(r - reloc <= ARRAY_SIZE(reloc));
	munmap(map, 4096);

	object[1].relocs_ptr = (uintptr_t)reloc;
	object[1].relocation_count = r - reloc;

	memset(&execbuf, 0, sizeof(execbuf));
	execbuf.buffers_ptr = (unsigned long)object;
	execbuf.buffer_count = 2;
	if (gen >= 6)
		execbuf.flags = I915_EXEC_BLT;
	gem_execbuf(fd, &execbuf);

	__gem_busy(fd, object[0].handle, &read, &write);
	igt_assert_eq(read, 1 << write);
	igt_assert_eq(write, gen >= 6 ? I915_EXEC_BLT : I915_EXEC_RENDER);

	igt_debug("Created busy handle %d\n", object[0].handle);
	gem_close(fd, object[1].handle);
	return object[0].handle;
}
Beispiel #14
0
static int run(int object, int batch, int time, int reps)
{
	struct drm_i915_gem_execbuffer2 execbuf;
	struct drm_i915_gem_exec_object2 exec[3];
	struct drm_i915_gem_relocation_entry *reloc;
	uint32_t *buf, handle, src, dst;
	int fd, len, gen, size, nreloc;
	int ring, count;

	size = ALIGN(batch * 64, 4096);
	reloc = malloc(sizeof(*reloc)*size/32*2);

	fd = drm_open_driver(DRIVER_INTEL);
	handle = gem_create(fd, size);
	buf = gem_mmap__cpu(fd, handle, 0, size, PROT_WRITE);

	gen = intel_gen(intel_get_drm_devid(fd));
	has_64bit_reloc = gen >= 8;

	src = gem_create(fd, object);
	dst = gem_create(fd, object);

	len = gem_linear_blt(fd, buf, 0, 0, 1, object, reloc);
	if (has_64bit_reloc)
		nreloc = len > 56 ? 4 : 2;
	else
		nreloc = len > 40 ? 4 : 2;

	memset(exec, 0, sizeof(exec));
	exec[0].handle = src;
	exec[1].handle = dst;

	exec[2].handle = handle;
	exec[2].relocs_ptr = (uintptr_t)reloc;
	exec[2].relocation_count = nreloc;

	ring = 0;
	if (gen >= 6)
		ring = I915_EXEC_BLT;

	memset(&execbuf, 0, sizeof(execbuf));
	execbuf.buffers_ptr = (uintptr_t)exec;
	execbuf.buffer_count = 3;
	execbuf.batch_len = len;
	execbuf.flags = ring;
	execbuf.flags |= LOCAL_I915_EXEC_HANDLE_LUT;

	if (__gem_execbuf(fd, &execbuf)) {
		gem_set_domain(fd, handle, I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU);
		len = gem_linear_blt(fd, buf, 0, src, dst, object, reloc);
		igt_assert(len == execbuf.batch_len);
		execbuf.flags = ring;
		gem_execbuf(fd, &execbuf);
	}
	gem_sync(fd, handle);

	if (batch > 1) {
		if (execbuf.flags & LOCAL_I915_EXEC_HANDLE_LUT) {
			src = 0;
			dst = 1;
		}

		gem_set_domain(fd, handle, I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU);
		for (int i = 1; i < batch; i++) {
			len = gem_linear_blt(fd, buf, len - 8,
					     src, dst, object,
					     reloc + nreloc * i);
		}
		exec[2].relocation_count = nreloc * batch;
		execbuf.batch_len = len;

		gem_execbuf(fd, &execbuf);
		gem_sync(fd, handle);
	}
	if (execbuf.flags & LOCAL_I915_EXEC_HANDLE_LUT)
		execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC;

	/* Guess how many loops we need for 0.1s */
	count = baseline((uint64_t)object * batch, 100);

	while (reps--) {
		double min = HUGE_VAL;

		for (int s = 0; s <= time / 100; s++) {
			struct timespec start, end;
			double t;

			clock_gettime(CLOCK_MONOTONIC, &start);
			for (int loop = 0; loop < count; loop++)
				gem_execbuf(fd, &execbuf);
			gem_sync(fd, handle);
			clock_gettime(CLOCK_MONOTONIC, &end);

			t = elapsed(&start, &end);
			if (t < min)
				min = t;
		}

		printf("%7.3f\n", object/(1024*1024.)*batch*count/min);
	}

	close(fd);
	return 0;
}
static void
store_dword_loop(int fd, int ring, int count, int divider)
{
	int i, val = 0;
	struct drm_i915_gem_execbuffer2 execbuf;
	struct drm_i915_gem_exec_object2 obj[2];
	struct drm_i915_gem_relocation_entry reloc[divider];
	uint32_t handle[divider];
	uint32_t *batch[divider];
	uint32_t *target;
	int gen = intel_gen(devid);

	memset(obj, 0, sizeof(obj));
	obj[0].handle = gem_create(fd, 4096);
	target = mmap_coherent(fd, obj[0].handle, 4096);

	memset(reloc, 0, sizeof(reloc));
	for (i = 0; i < divider; i++) {
		uint32_t *b;

		handle[i] = gem_create(fd, 4096);
		batch[i] = mmap_coherent(fd, handle[i], 4096);
		gem_set_domain(fd, handle[i], coherent_domain, coherent_domain);

		b = batch[i];
		*b++ = MI_STORE_DWORD_IMM;
		*b++ = 0;
		*b++ = 0;
		*b++ = 0;
		*b++ = MI_BATCH_BUFFER_END;

		reloc[i].target_handle = obj[0].handle;
		reloc[i].offset = 4;
		if (gen < 8)
			reloc[i].offset += 4;
		reloc[i].read_domains = I915_GEM_DOMAIN_INSTRUCTION;
		reloc[i].write_domain = I915_GEM_DOMAIN_INSTRUCTION;
		obj[1].relocation_count = 1;
	}

	memset(&execbuf, 0, sizeof(execbuf));
	execbuf.buffers_ptr = (uintptr_t)obj;
	execbuf.buffer_count = 2;
	execbuf.flags = ring;

	igt_info("running storedw loop on render with stall every %i batch\n", divider);

	for (i = 0; i < SLOW_QUICK(0x2000, 0x10); i++) {
		int j = i % divider;

		gem_set_domain(fd, handle[j], coherent_domain, coherent_domain);
		batch[j][3] = val;
		obj[1].handle = handle[j];
		obj[1].relocs_ptr = (uintptr_t)&reloc[j];
		gem_execbuf(fd, &execbuf);

		if (j == 0) {
			gem_set_domain(fd, obj[0].handle, coherent_domain, 0);
			igt_assert_f(*target == val,
				     "%d: value mismatch: stored 0x%08x, expected 0x%08x\n",
				     i, *target, val);
		}

		val++;
	}

	gem_set_domain(fd, obj[0].handle, coherent_domain, 0);
	igt_info("completed %d writes successfully, current value: 0x%08x\n",
		 i, target[0]);

	munmap(target, 4096);
	gem_close(fd, obj[0].handle);
	for (i = 0; i < divider; ++i) {
		munmap(batch[i], 4096);
		gem_close(fd, handle[i]);
	}
}
static int negative_reloc_blt(int fd)
{
	const int gen = intel_gen(intel_get_drm_devid(fd));
	struct drm_i915_gem_execbuffer2 execbuf;
	struct drm_i915_gem_exec_object2 gem_exec[1024][2];
	struct drm_i915_gem_relocation_entry gem_reloc;
	uint32_t buf[1024], *b;
	int i;

	memset(&gem_reloc, 0, sizeof(gem_reloc));
	gem_reloc.offset = 4 * sizeof(uint32_t);
	gem_reloc.presumed_offset = ~0ULL;
	gem_reloc.delta = -4096;
	gem_reloc.target_handle = 0;
	gem_reloc.read_domains = I915_GEM_DOMAIN_RENDER;
	gem_reloc.write_domain = I915_GEM_DOMAIN_RENDER;

	for (i = 0; i < 1024; i++) {
		memset(gem_exec[i], 0, sizeof(gem_exec[i]));

		gem_exec[i][0].handle = gem_create(fd, 4096);
		gem_exec[i][0].flags = EXEC_OBJECT_NEEDS_FENCE;

		b = buf;
		*b++ = XY_COLOR_BLT_CMD_NOLEN |
			((gen >= 8) ? 5 : 4) |
			COLOR_BLT_WRITE_ALPHA | XY_COLOR_BLT_WRITE_RGB;
		*b++ = 0xf0 << 16 | 1 << 25 | 1 << 24 | 4096;
		*b++ = 1 << 16 | 0;
		*b++ = 2 << 16 | 1024;
		*b++ = ~0;
		if (gen >= 8)
			*b++ = ~0;
		*b++ = 0xc0ffee ^ i;
		*b++ = MI_BATCH_BUFFER_END;
		if ((b - buf) & 1)
			*b++ = 0;

		gem_exec[i][1].handle = gem_create(fd, 4096);
		gem_write(fd, gem_exec[i][1].handle, 0, buf, (b - buf) * sizeof(uint32_t));
		gem_exec[i][1].relocation_count = 1;
		gem_exec[i][1].relocs_ptr = (uintptr_t)&gem_reloc;
	}

	memset(&execbuf, 0, sizeof(execbuf));
	execbuf.buffer_count = 2;
	execbuf.batch_len = (b - buf) * sizeof(uint32_t);
	execbuf.flags = USE_LUT;
	if (gen >= 6)
		execbuf.flags |= I915_EXEC_BLT;

	for (i = 0; i < 1024; i++) {
		execbuf.buffers_ptr = (uintptr_t)gem_exec[i];
		gem_execbuf(fd, &execbuf);
	}

	for (i = 1024; i--;) {
		gem_read(fd, gem_exec[i][0].handle,
			 i*sizeof(uint32_t), buf + i, sizeof(uint32_t));
		gem_close(fd, gem_exec[i][0].handle);
		gem_close(fd, gem_exec[i][1].handle);
	}

	if (0) {
		for (i = 0; i < 1024; i += 8)
			igt_info("%08x %08x %08x %08x %08x %08x %08x %08x\n",
				 buf[i + 0], buf[i + 1], buf[i + 2], buf[i + 3],
				 buf[i + 4], buf[i + 5], buf[i + 6], buf[i + 7]);
	}
	for (i = 0; i < 1024; i++)
		igt_assert_eq(buf[i], 0xc0ffee ^ i);

	return 0;
}
/* Simulates SNA behaviour using negative self-relocations for
 * STATE_BASE_ADDRESS command packets. If they wrap around (to values greater
 * than the total size of the GTT), the GPU will hang.
 * See https://bugs.freedesktop.org/show_bug.cgi?id=78533
 */
static int negative_reloc(int fd, unsigned flags)
{
	struct drm_i915_gem_execbuffer2 execbuf;
	struct drm_i915_gem_exec_object2 gem_exec[2];
	struct drm_i915_gem_relocation_entry gem_reloc[1000];
	uint64_t gtt_max = get_page_table_size(fd);
	uint32_t buf[1024] = {MI_BATCH_BUFFER_END};
	int i;

#define BIAS (256*1024)

	igt_require(intel_gen(intel_get_drm_devid(fd)) >= 7);

	memset(gem_exec, 0, sizeof(gem_exec));
	gem_exec[0].handle = gem_create(fd, 4096);
	gem_write(fd, gem_exec[0].handle, 0, buf, 8);

	gem_reloc[0].offset = 1024;
	gem_reloc[0].delta = 0;
	gem_reloc[0].target_handle = gem_exec[0].handle;
	gem_reloc[0].read_domains = I915_GEM_DOMAIN_COMMAND;

	gem_exec[1].handle = gem_create(fd, 4096);
	gem_write(fd, gem_exec[1].handle, 0, buf, 8);
	gem_exec[1].relocation_count = 1;
	gem_exec[1].relocs_ptr = (uintptr_t)gem_reloc;

	memset(&execbuf, 0, sizeof(execbuf));
	execbuf.buffers_ptr = (uintptr_t)gem_exec;
	execbuf.buffer_count = 2;
	execbuf.batch_len = 8;

	do_or_die(drmIoctl(fd,
			   DRM_IOCTL_I915_GEM_EXECBUFFER2,
			   &execbuf));
	gem_close(fd, gem_exec[1].handle);

	igt_info("Found offset %lld for 4k batch\n", (long long)gem_exec[0].offset);
	/*
	 * Ideally we'd like to be able to control where the kernel is going to
	 * place the buffer. We don't SKIP here because it causes the test
	 * to "randomly" flip-flop between the SKIP and PASS states.
	 */
	if (gem_exec[0].offset < BIAS) {
		igt_info("Offset is below BIAS, not testing anything\n");
		return 0;
	}

	memset(gem_reloc, 0, sizeof(gem_reloc));
	for (i = 0; i < sizeof(gem_reloc)/sizeof(gem_reloc[0]); i++) {
		gem_reloc[i].offset = 8 + 4*i;
		gem_reloc[i].delta = -BIAS*i/1024;
		gem_reloc[i].target_handle = flags & USE_LUT ? 0 : gem_exec[0].handle;
		gem_reloc[i].read_domains = I915_GEM_DOMAIN_COMMAND;
	}

	gem_exec[0].relocation_count = sizeof(gem_reloc)/sizeof(gem_reloc[0]);
	gem_exec[0].relocs_ptr = (uintptr_t)gem_reloc;

	execbuf.buffer_count = 1;
	execbuf.flags = flags & USE_LUT;
	do_or_die(drmIoctl(fd,
			   DRM_IOCTL_I915_GEM_EXECBUFFER2,
			   &execbuf));

	igt_info("Batch is now at offset %lld\n", (long long)gem_exec[0].offset);

	gem_read(fd, gem_exec[0].handle, 0, buf, sizeof(buf));
	gem_close(fd, gem_exec[0].handle);

	for (i = 0; i < sizeof(gem_reloc)/sizeof(gem_reloc[0]); i++)
		igt_assert(buf[2 + i] < gtt_max);

	return 0;
}
static int
copy(int fd, uint32_t dst, uint32_t src, uint32_t *all_bo, int n_bo)
{
	uint32_t batch[12];
	struct drm_i915_gem_relocation_entry reloc[2];
	struct drm_i915_gem_exec_object2 *obj;
	struct drm_i915_gem_execbuffer2 exec;
	uint32_t handle;
	int n, ret, i=0;

	batch[i++] = (XY_SRC_COPY_BLT_CMD |
		    XY_SRC_COPY_BLT_WRITE_ALPHA |
		    XY_SRC_COPY_BLT_WRITE_RGB | 6);
	if (intel_gen(intel_get_drm_devid(fd)) >= 8)
		batch[i - 1] += 2;
	batch[i++] = (3 << 24) | /* 32 bits */
		  (0xcc << 16) | /* copy ROP */
		  WIDTH*4;
	batch[i++] = 0; /* dst x1,y1 */
	batch[i++] = (HEIGHT << 16) | WIDTH; /* dst x2,y2 */
	batch[i++] = 0; /* dst reloc */
	if (intel_gen(intel_get_drm_devid(fd)) >= 8)
		batch[i++] = 0; /* FIXME */
	batch[i++] = 0; /* src x1,y1 */
	batch[i++] = WIDTH*4;
	batch[i++] = 0; /* src reloc */
	if (intel_gen(intel_get_drm_devid(fd)) >= 8)
		batch[i++] = 0; /* FIXME */
	batch[i++] = MI_BATCH_BUFFER_END;
	batch[i++] = MI_NOOP;

	handle = gem_create(fd, 4096);
	gem_write(fd, handle, 0, batch, sizeof(batch));

	reloc[0].target_handle = dst;
	reloc[0].delta = 0;
	reloc[0].offset = 4 * sizeof(batch[0]);
	reloc[0].presumed_offset = 0;
	reloc[0].read_domains = I915_GEM_DOMAIN_RENDER;
	reloc[0].write_domain = I915_GEM_DOMAIN_RENDER;

	reloc[1].target_handle = src;
	reloc[1].delta = 0;
	reloc[1].offset = 7 * sizeof(batch[0]);
	if (intel_gen(intel_get_drm_devid(fd)) >= 8)
		reloc[1].offset += sizeof(batch[0]);
	reloc[1].presumed_offset = 0;
	reloc[1].read_domains = I915_GEM_DOMAIN_RENDER;
	reloc[1].write_domain = 0;

	obj = calloc(n_bo + 1, sizeof(*obj));
	for (n = 0; n < n_bo; n++)
		obj[n].handle = all_bo[n];
	obj[n].handle = handle;
	obj[n].relocation_count = 2;
	obj[n].relocs_ptr = (uintptr_t)reloc;

	exec.buffers_ptr = (uintptr_t)obj;
	exec.buffer_count = n_bo + 1;
	exec.batch_start_offset = 0;
	exec.batch_len = i * 4;
	exec.DR1 = exec.DR4 = 0;
	exec.num_cliprects = 0;
	exec.cliprects_ptr = 0;
	exec.flags = HAS_BLT_RING(intel_get_drm_devid(fd)) ? I915_EXEC_BLT : 0;
	i915_execbuffer2_set_context_id(exec, 0);
	exec.rsvd2 = 0;

	ret = drmIoctl(fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &exec);
	if (ret)
		ret = errno;

	gem_close(fd, handle);
	free(obj);

	return ret;
}
static int
blit(int fd, uint32_t dst, uint32_t src, uint32_t *all_bo, int n_bo)
{
	uint32_t batch[12];
	struct drm_i915_gem_relocation_entry reloc[2];
	struct drm_i915_gem_exec_object2 *obj;
	struct drm_i915_gem_execbuffer2 exec;
	uint32_t handle;
	int n, ret, i=0;

	batch[i++] = XY_SRC_COPY_BLT_CMD |
		  XY_SRC_COPY_BLT_WRITE_ALPHA |
		  XY_SRC_COPY_BLT_WRITE_RGB;
	if (intel_gen(intel_get_drm_devid(fd)) >= 8)
		batch[i - 1] |= 8;
	else
		batch[i - 1] |= 6;
	batch[i++] = (3 << 24) | /* 32 bits */
		  (0xcc << 16) | /* copy ROP */
		  WIDTH*4;
	batch[i++] = 0; /* dst x1,y1 */
	batch[i++] = (HEIGHT << 16) | WIDTH; /* dst x2,y2 */
	batch[i++] = 0; /* dst reloc */
	if (intel_gen(intel_get_drm_devid(fd)) >= 8)
		batch[i++] = 0;
	batch[i++] = 0; /* src x1,y1 */
	batch[i++] = WIDTH*4;
	batch[i++] = 0; /* src reloc */
	if (intel_gen(intel_get_drm_devid(fd)) >= 8)
		batch[i++] = 0;
	batch[i++] = MI_BATCH_BUFFER_END;
	batch[i++] = MI_NOOP;

	handle = gem_create(fd, 4096);
	gem_write(fd, handle, 0, batch, sizeof(batch));

	reloc[0].target_handle = dst;
	reloc[0].delta = 0;
	reloc[0].offset = 4 * sizeof(batch[0]);
	reloc[0].presumed_offset = 0;
	reloc[0].read_domains = I915_GEM_DOMAIN_RENDER;
	reloc[0].write_domain = I915_GEM_DOMAIN_RENDER;

	reloc[1].target_handle = src;
	reloc[1].delta = 0;
	reloc[1].offset = 7 * sizeof(batch[0]);
	if (intel_gen(intel_get_drm_devid(fd)) >= 8)
		reloc[1].offset += sizeof(batch[0]);
	reloc[1].presumed_offset = 0;
	reloc[1].read_domains = I915_GEM_DOMAIN_RENDER;
	reloc[1].write_domain = 0;

	memset(&exec, 0, sizeof(exec));
	obj = calloc(n_bo + 1, sizeof(*obj));
	for (n = 0; n < n_bo; n++)
		obj[n].handle = all_bo[n];
	obj[n].handle = handle;
	obj[n].relocation_count = 2;
	obj[n].relocs_ptr = (uintptr_t)reloc;

	exec.buffers_ptr = (uintptr_t)obj;
	exec.buffer_count = n_bo + 1;
	exec.flags = HAS_BLT_RING(intel_get_drm_devid(fd)) ? I915_EXEC_BLT : 0;

	ret = __gem_execbuf(fd, &exec);
	gem_close(fd, handle);
	free(obj);

	return ret;
}