static uint64_t submit_batch(int fd, unsigned ring_id) { const uint32_t batch[] = { MI_NOOP, MI_BATCH_BUFFER_END }; struct drm_i915_gem_execbuffer2 execbuf; struct drm_i915_gem_exec_object2 exec; uint64_t presumed_offset; gem_require_ring(fd, ring_id); exec.handle = gem_create(fd, 4096); gem_write(fd, exec.handle, 0, batch, sizeof(batch)); exec.relocation_count = 0; exec.relocs_ptr = 0; exec.alignment = 0; exec.offset = 0; exec.flags = 0; exec.rsvd1 = 0; exec.rsvd2 = 0; execbuf.buffers_ptr = (uintptr_t)&exec; execbuf.buffer_count = 1; execbuf.batch_start_offset = 0; execbuf.batch_len = sizeof(batch); execbuf.cliprects_ptr = 0; execbuf.num_cliprects = 0; execbuf.DR1 = 0; execbuf.DR4 = 0; execbuf.flags = ring_id; i915_execbuffer2_set_context_id(execbuf, 0); execbuf.rsvd2 = 0; gem_execbuf(fd, &execbuf); gem_sync(fd, exec.handle); presumed_offset = exec.offset; igt_set_stop_rings(igt_to_stop_ring_flag(ring_id)); gem_execbuf(fd, &execbuf); gem_sync(fd, exec.handle); igt_assert(igt_get_stop_rings() == STOP_RING_NONE); igt_assert(presumed_offset == exec.offset); gem_close(fd, exec.handle); return exec.offset; }
static void *thread(void *data) { struct thread *t = data; uint32_t bbe = MI_BATCH_BUFFER_END; struct drm_i915_gem_execbuffer2 execbuf; struct drm_i915_gem_exec_object2 obj; uint32_t *ctx; memset(&obj, 0, sizeof(obj)); obj.handle = gem_create(t->fd, 4096); gem_write(t->fd, obj.handle, 0, &bbe, sizeof(bbe)); memset(&execbuf, 0, sizeof(execbuf)); execbuf.buffers_ptr = (uintptr_t)&obj; execbuf.buffer_count = 1; ctx = malloc(t->num_ctx * sizeof(uint32_t)); igt_assert(ctx); memcpy(ctx, t->all_ctx, t->num_ctx * sizeof(uint32_t)); igt_permute_array(ctx, t->num_ctx, xchg_int); for (unsigned n = 0; n < t->num_ctx; n++) { execbuf.rsvd1 = ctx[n]; gem_execbuf(t->fd, &execbuf); } free(ctx); gem_close(t->fd, obj.handle); return NULL; }
static void run_on_ring(int fd, unsigned ring_id, const char *ring_name) { struct drm_i915_gem_execbuffer2 execbuf; struct drm_i915_gem_exec_object2 execobj; struct { uint32_t handle; uint32_t *batch; } obj[2]; unsigned i; char buf[100]; gem_require_ring(fd, ring_id); igt_require(has_softpin(fd)); for (i = 0; i < 2; i++) { obj[i].handle = gem_create(fd, BATCH_SIZE); obj[i].batch = mmap_coherent(fd, obj[i].handle, BATCH_SIZE); memset(obj[i].batch, 0xff, BATCH_SIZE); } memset(&execobj, 0, sizeof(execobj)); execobj.handle = obj[0].handle; obj[0].batch[0] = MI_BATCH_BUFFER_END; memset(&execbuf, 0, sizeof(execbuf)); execbuf.buffers_ptr = (uintptr_t)&execobj; execbuf.buffer_count = 1; execbuf.flags = ring_id; /* Execute once to allocate a gtt-offset */ gem_execbuf(fd, &execbuf); execobj.flags = EXEC_OBJECT_PINNED; sprintf(buf, "Testing %s cs tlb coherency: ", ring_name); for (i = 0; i < BATCH_SIZE/64; i++) { execobj.handle = obj[i&1].handle; obj[i&1].batch[i*64/4] = MI_BATCH_BUFFER_END; execbuf.batch_start_offset = i*64; gem_execbuf(fd, &execbuf); } for (i = 0; i < 2; i++) { gem_close(fd, obj[i].handle); munmap(obj[i].batch, BATCH_SIZE); } }
static void exec1(int fd, uint32_t handle, uint64_t reloc_ofs, unsigned flags, char *ptr) { struct drm_i915_gem_execbuffer2 execbuf; struct drm_i915_gem_exec_object2 gem_exec[1]; struct drm_i915_gem_relocation_entry gem_reloc[1]; gem_reloc[0].offset = reloc_ofs; gem_reloc[0].delta = 0; gem_reloc[0].target_handle = handle; gem_reloc[0].read_domains = I915_GEM_DOMAIN_RENDER; gem_reloc[0].write_domain = 0; gem_reloc[0].presumed_offset = 0; gem_exec[0].handle = handle; gem_exec[0].relocation_count = 1; gem_exec[0].relocs_ptr = (uintptr_t) gem_reloc; gem_exec[0].alignment = 0; gem_exec[0].offset = 0; gem_exec[0].flags = 0; gem_exec[0].rsvd1 = 0; gem_exec[0].rsvd2 = 0; execbuf.buffers_ptr = (uintptr_t)gem_exec; execbuf.buffer_count = 1; execbuf.batch_start_offset = 0; execbuf.batch_len = 8; execbuf.cliprects_ptr = 0; execbuf.num_cliprects = 0; execbuf.DR1 = 0; execbuf.DR4 = 0; execbuf.flags = flags; i915_execbuffer2_set_context_id(execbuf, 0); execbuf.rsvd2 = 0; /* Avoid hitting slowpaths in the reloc processing which might yield a * presumed_offset of -1. Happens when the batch is still busy from the * last round. */ gem_sync(fd, handle); gem_execbuf(fd, &execbuf); igt_warn_on(gem_reloc[0].presumed_offset == -1); if (use_64bit_relocs) { uint64_t tmp; if (ptr) tmp = *(uint64_t *)(ptr+reloc_ofs); else gem_read(fd, handle, reloc_ofs, &tmp, sizeof(tmp)); igt_assert_eq(tmp, gem_reloc[0].presumed_offset); } else { uint32_t tmp; if (ptr) tmp = *(uint32_t *)(ptr+reloc_ofs); else gem_read(fd, handle, reloc_ofs, &tmp, sizeof(tmp)); igt_assert_eq(tmp, gem_reloc[0].presumed_offset); } }
static void *gem_busyspin(void *arg) { const uint32_t bbe = MI_BATCH_BUFFER_END; struct gem_busyspin *bs = arg; struct drm_i915_gem_execbuffer2 execbuf; struct drm_i915_gem_exec_object2 obj; unsigned engines[16]; unsigned nengine; unsigned engine; int fd; fd = drm_open_driver(DRIVER_INTEL); nengine = 0; for_each_engine(fd, engine) if (!ignore_engine(fd, engine)) engines[nengine++] = engine; memset(&obj, 0, sizeof(obj)); obj.handle = gem_create(fd, 4096); gem_write(fd, obj.handle, 0, &bbe, sizeof(bbe)); memset(&execbuf, 0, sizeof(execbuf)); execbuf.buffers_ptr = (uintptr_t)&obj; execbuf.buffer_count = 1; execbuf.flags |= LOCAL_I915_EXEC_HANDLE_LUT; execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC; if (__gem_execbuf(fd, &execbuf)) { execbuf.flags = 0; gem_execbuf(fd, &execbuf); } while (!done) { for (int n = 0; n < nengine; n++) { execbuf.flags &= ~ENGINE_FLAGS; execbuf.flags |= engines[n]; gem_execbuf(fd, &execbuf); } bs->count += nengine; } close(fd); return NULL; }
static void exec(int fd, uint32_t handle) { struct drm_i915_gem_execbuffer2 execbuf; struct drm_i915_gem_exec_object2 gem_exec[1]; memset(gem_exec, 0, sizeof(gem_exec)); gem_exec[0].handle = handle; memset(&execbuf, 0, sizeof(execbuf)); execbuf.buffers_ptr = (uintptr_t)gem_exec; execbuf.buffer_count = 1; execbuf.batch_start_offset = 0; execbuf.batch_len = 4096; gem_execbuf(fd, &execbuf); gem_sync(fd, handle); }
static void make_busy(int fd, uint32_t handle) { struct drm_i915_gem_execbuffer2 execbuf; struct drm_i915_gem_exec_object2 gem_exec; const uint32_t buf[] = {MI_BATCH_BUFFER_END}; gem_write(fd, handle, 0, buf, sizeof(buf)); memset(&gem_exec, 0, sizeof(gem_exec)); gem_exec.handle = handle; memset(&execbuf, 0, sizeof(execbuf)); execbuf.buffers_ptr = (uintptr_t)&gem_exec; execbuf.buffer_count = 1; execbuf.flags |= LOCAL_I915_EXEC_HANDLE_LUT; execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC; if (__gem_execbuf(fd, &execbuf)) { execbuf.flags = 0; gem_execbuf(fd, &execbuf); } }
static void dontneed_before_exec(void) { int fd = drm_open_driver(DRIVER_INTEL); struct drm_i915_gem_execbuffer2 execbuf; struct drm_i915_gem_exec_object2 exec; uint32_t buf[] = { MI_BATCH_BUFFER_END, 0 }; memset(&execbuf, 0, sizeof(execbuf)); memset(&exec, 0, sizeof(exec)); exec.handle = gem_create(fd, OBJECT_SIZE); gem_write(fd, exec.handle, 0, buf, sizeof(buf)); gem_madvise(fd, exec.handle, I915_MADV_DONTNEED); execbuf.buffers_ptr = (uintptr_t)&exec; execbuf.buffer_count = 1; execbuf.batch_len = sizeof(buf); gem_execbuf(fd, &execbuf); gem_close(fd, exec.handle); close(fd); }
static void fill_ring(int fd, struct drm_i915_gem_execbuffer2 *execbuf) { int i; /* The ring we've been using is 128k, and each rendering op * will use at least 8 dwords: * * BATCH_START * BATCH_START offset * MI_FLUSH * STORE_DATA_INDEX * STORE_DATA_INDEX offset * STORE_DATA_INDEX value * MI_USER_INTERRUPT * (padding) * * So iterate just a little more than that -- if we don't fill the ring * doing this, we aren't likely to with this test. */ igt_debug("Executing execbuf %d times\n", 128*1024/(8*4)); for (i = 0; i < 128*1024 / (8 * 4); i++) gem_execbuf(fd, execbuf); }
static int negative_reloc_blt(int fd) { const int gen = intel_gen(intel_get_drm_devid(fd)); struct drm_i915_gem_execbuffer2 execbuf; struct drm_i915_gem_exec_object2 gem_exec[1024][2]; struct drm_i915_gem_relocation_entry gem_reloc; uint32_t buf[1024], *b; int i; memset(&gem_reloc, 0, sizeof(gem_reloc)); gem_reloc.offset = 4 * sizeof(uint32_t); gem_reloc.presumed_offset = ~0ULL; gem_reloc.delta = -4096; gem_reloc.target_handle = 0; gem_reloc.read_domains = I915_GEM_DOMAIN_RENDER; gem_reloc.write_domain = I915_GEM_DOMAIN_RENDER; for (i = 0; i < 1024; i++) { memset(gem_exec[i], 0, sizeof(gem_exec[i])); gem_exec[i][0].handle = gem_create(fd, 4096); gem_exec[i][0].flags = EXEC_OBJECT_NEEDS_FENCE; b = buf; *b++ = XY_COLOR_BLT_CMD_NOLEN | ((gen >= 8) ? 5 : 4) | COLOR_BLT_WRITE_ALPHA | XY_COLOR_BLT_WRITE_RGB; *b++ = 0xf0 << 16 | 1 << 25 | 1 << 24 | 4096; *b++ = 1 << 16 | 0; *b++ = 2 << 16 | 1024; *b++ = ~0; if (gen >= 8) *b++ = ~0; *b++ = 0xc0ffee ^ i; *b++ = MI_BATCH_BUFFER_END; if ((b - buf) & 1) *b++ = 0; gem_exec[i][1].handle = gem_create(fd, 4096); gem_write(fd, gem_exec[i][1].handle, 0, buf, (b - buf) * sizeof(uint32_t)); gem_exec[i][1].relocation_count = 1; gem_exec[i][1].relocs_ptr = (uintptr_t)&gem_reloc; } memset(&execbuf, 0, sizeof(execbuf)); execbuf.buffer_count = 2; execbuf.batch_len = (b - buf) * sizeof(uint32_t); execbuf.flags = USE_LUT; if (gen >= 6) execbuf.flags |= I915_EXEC_BLT; for (i = 0; i < 1024; i++) { execbuf.buffers_ptr = (uintptr_t)gem_exec[i]; gem_execbuf(fd, &execbuf); } for (i = 1024; i--;) { gem_read(fd, gem_exec[i][0].handle, i*sizeof(uint32_t), buf + i, sizeof(uint32_t)); gem_close(fd, gem_exec[i][0].handle); gem_close(fd, gem_exec[i][1].handle); } if (0) { for (i = 0; i < 1024; i += 8) igt_info("%08x %08x %08x %08x %08x %08x %08x %08x\n", buf[i + 0], buf[i + 1], buf[i + 2], buf[i + 3], buf[i + 4], buf[i + 5], buf[i + 6], buf[i + 7]); } for (i = 0; i < 1024; i++) igt_assert_eq(buf[i], 0xc0ffee ^ i); return 0; }
static void processes(void) { const struct intel_execution_engine *e; unsigned engines[16]; int num_engines; struct rlimit rlim; unsigned num_ctx; uint32_t name; int fd, *fds; fd = drm_open_driver(DRIVER_INTEL); num_ctx = get_num_contexts(fd); num_engines = 0; for (e = intel_execution_engines; e->name; e++) { if (e->exec_id == 0) continue; if (!has_engine(fd, e)) continue; if (e->exec_id == I915_EXEC_BSD) { int is_bsd2 = e->flags != 0; if (gem_has_bsd2(fd) != is_bsd2) continue; } engines[num_engines++] = e->exec_id | e->flags; if (num_engines == ARRAY_SIZE(engines)) break; } /* tweak rlimits to allow us to create this many files */ igt_assert(getrlimit(RLIMIT_NOFILE, &rlim) == 0); if (rlim.rlim_cur < ALIGN(num_ctx + 1024, 1024)) { rlim.rlim_cur = ALIGN(num_ctx + 1024, 1024); if (rlim.rlim_cur > rlim.rlim_max) rlim.rlim_max = rlim.rlim_cur; igt_assert(setrlimit(RLIMIT_NOFILE, &rlim) == 0); } fds = malloc(num_ctx * sizeof(int)); igt_assert(fds); for (unsigned n = 0; n < num_ctx; n++) { fds[n] = drm_open_driver(DRIVER_INTEL); if (fds[n] == -1) { int err = errno; for (unsigned i = n; i--; ) close(fds[i]); free(fds); errno = err; igt_assert_f(0, "failed to create context %lld/%lld\n", (long long)n, (long long)num_ctx); } } if (1) { uint32_t bbe = MI_BATCH_BUFFER_END; name = gem_create(fd, 4096); gem_write(fd, name, 0, &bbe, sizeof(bbe)); name = gem_flink(fd, name); } igt_fork(child, NUM_THREADS) { struct drm_i915_gem_execbuffer2 execbuf; struct drm_i915_gem_exec_object2 obj; memset(&obj, 0, sizeof(obj)); memset(&execbuf, 0, sizeof(execbuf)); execbuf.buffers_ptr = (uintptr_t)&obj; execbuf.buffer_count = 1; igt_permute_array(fds, num_ctx, xchg_int); for (unsigned n = 0; n < num_ctx; n++) { obj.handle = gem_open(fds[n], name); execbuf.flags = engines[n % num_engines]; gem_execbuf(fds[n], &execbuf); gem_close(fds[n], obj.handle); } } igt_waitchildren(); for (unsigned n = 0; n < num_ctx; n++) close(fds[n]); free(fds); close(fd); }
static void execN(int fd, uint32_t handle, uint64_t batch_size, unsigned flags, char *ptr) { #define reloc_ofs(N, T) ((((N)+1) << 12) - 4*(1 + ((N) == ((T)-1)))) struct drm_i915_gem_execbuffer2 execbuf; struct drm_i915_gem_exec_object2 gem_exec[1]; struct drm_i915_gem_relocation_entry *gem_reloc; uint64_t n, nreloc = batch_size >> 12; gem_reloc = calloc(nreloc, sizeof(*gem_reloc)); igt_assert(gem_reloc); for (n = 0; n < nreloc; n++) { gem_reloc[n].offset = reloc_ofs(n, nreloc); gem_reloc[n].target_handle = handle; gem_reloc[n].read_domains = I915_GEM_DOMAIN_RENDER; gem_reloc[n].presumed_offset = n ^ 0xbeefdeaddeadbeef; if (ptr) { if (use_64bit_relocs) *(uint64_t *)(ptr + gem_reloc[n].offset) = gem_reloc[n].presumed_offset; else *(uint32_t *)(ptr + gem_reloc[n].offset) = gem_reloc[n].presumed_offset; } else gem_write(fd, handle, gem_reloc[n].offset, &gem_reloc[n].presumed_offset, 4*(1+use_64bit_relocs)); } memset(gem_exec, 0, sizeof(gem_exec)); gem_exec[0].handle = handle; gem_exec[0].relocation_count = nreloc; gem_exec[0].relocs_ptr = (uintptr_t)gem_reloc; memset(&execbuf, 0, sizeof(execbuf)); execbuf.buffers_ptr = (uintptr_t)gem_exec; execbuf.buffer_count = 1; execbuf.batch_start_offset = 0; execbuf.batch_len = 8; execbuf.flags = flags; /* Avoid hitting slowpaths in the reloc processing which might yield a * presumed_offset of -1. Happens when the batch is still busy from the * last round. */ gem_sync(fd, handle); gem_execbuf(fd, &execbuf); for (n = 0; n < nreloc; n++) igt_warn_on(gem_reloc[n].presumed_offset == -1); if (use_64bit_relocs) { for (n = 0; n < nreloc; n++) { uint64_t tmp; if (ptr) tmp = *(uint64_t *)(ptr+reloc_ofs(n, nreloc)); else gem_read(fd, handle, reloc_ofs(n, nreloc), &tmp, sizeof(tmp)); igt_assert_eq(tmp, gem_reloc[n].presumed_offset); } } else { for (n = 0; n < nreloc; n++) { uint32_t tmp; if (ptr) tmp = *(uint32_t *)(ptr+reloc_ofs(n, nreloc)); else gem_read(fd, handle, reloc_ofs(n, nreloc), &tmp, sizeof(tmp)); igt_assert_eq(tmp, gem_reloc[n].presumed_offset); } } free(gem_reloc); #undef reloc_ofs }
static void store_dword_loop(int fd, int ring, int count, int divider) { int i, val = 0; struct drm_i915_gem_execbuffer2 execbuf; struct drm_i915_gem_exec_object2 obj[2]; struct drm_i915_gem_relocation_entry reloc[divider]; uint32_t handle[divider]; uint32_t *batch[divider]; uint32_t *target; int gen = intel_gen(devid); memset(obj, 0, sizeof(obj)); obj[0].handle = gem_create(fd, 4096); target = mmap_coherent(fd, obj[0].handle, 4096); memset(reloc, 0, sizeof(reloc)); for (i = 0; i < divider; i++) { uint32_t *b; handle[i] = gem_create(fd, 4096); batch[i] = mmap_coherent(fd, handle[i], 4096); gem_set_domain(fd, handle[i], coherent_domain, coherent_domain); b = batch[i]; *b++ = MI_STORE_DWORD_IMM; *b++ = 0; *b++ = 0; *b++ = 0; *b++ = MI_BATCH_BUFFER_END; reloc[i].target_handle = obj[0].handle; reloc[i].offset = 4; if (gen < 8) reloc[i].offset += 4; reloc[i].read_domains = I915_GEM_DOMAIN_INSTRUCTION; reloc[i].write_domain = I915_GEM_DOMAIN_INSTRUCTION; obj[1].relocation_count = 1; } memset(&execbuf, 0, sizeof(execbuf)); execbuf.buffers_ptr = (uintptr_t)obj; execbuf.buffer_count = 2; execbuf.flags = ring; igt_info("running storedw loop on render with stall every %i batch\n", divider); for (i = 0; i < SLOW_QUICK(0x2000, 0x10); i++) { int j = i % divider; gem_set_domain(fd, handle[j], coherent_domain, coherent_domain); batch[j][3] = val; obj[1].handle = handle[j]; obj[1].relocs_ptr = (uintptr_t)&reloc[j]; gem_execbuf(fd, &execbuf); if (j == 0) { gem_set_domain(fd, obj[0].handle, coherent_domain, 0); igt_assert_f(*target == val, "%d: value mismatch: stored 0x%08x, expected 0x%08x\n", i, *target, val); } val++; } gem_set_domain(fd, obj[0].handle, coherent_domain, 0); igt_info("completed %d writes successfully, current value: 0x%08x\n", i, target[0]); munmap(target, 4096); gem_close(fd, obj[0].handle); for (i = 0; i < divider; ++i) { munmap(batch[i], 4096); gem_close(fd, handle[i]); } }
static uint32_t busy_blt(int fd) { const int gen = intel_gen(intel_get_drm_devid(fd)); const int has_64bit_reloc = gen >= 8; struct drm_i915_gem_execbuffer2 execbuf; struct drm_i915_gem_exec_object2 object[2]; struct drm_i915_gem_relocation_entry reloc[200], *r; uint32_t read, write; uint32_t *map; int factor = 100; int i = 0; memset(object, 0, sizeof(object)); object[0].handle = gem_create(fd, 1024*1024); object[1].handle = gem_create(fd, 4096); r = memset(reloc, 0, sizeof(reloc)); map = gem_mmap__cpu(fd, object[1].handle, 0, 4096, PROT_WRITE); gem_set_domain(fd, object[1].handle, I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU); #define COPY_BLT_CMD (2<<29|0x53<<22|0x6) #define BLT_WRITE_ALPHA (1<<21) #define BLT_WRITE_RGB (1<<20) while (factor--) { /* XY_SRC_COPY */ map[i++] = COPY_BLT_CMD | BLT_WRITE_ALPHA | BLT_WRITE_RGB; if (has_64bit_reloc) map[i-1] += 2; map[i++] = 0xcc << 16 | 1 << 25 | 1 << 24 | (4*1024); map[i++] = 0; map[i++] = 256 << 16 | 1024; r->offset = i * sizeof(uint32_t); r->target_handle = object[0].handle; r->read_domains = I915_GEM_DOMAIN_RENDER; r->write_domain = I915_GEM_DOMAIN_RENDER; r++; map[i++] = 0; if (has_64bit_reloc) map[i++] = 0; map[i++] = 0; map[i++] = 4096; r->offset = i * sizeof(uint32_t); r->target_handle = object[0].handle; r->read_domains = I915_GEM_DOMAIN_RENDER; r->write_domain = 0; r++; map[i++] = 0; if (has_64bit_reloc) map[i++] = 0; } map[i++] = MI_BATCH_BUFFER_END; igt_assert(i <= 4096/sizeof(uint32_t)); igt_assert(r - reloc <= ARRAY_SIZE(reloc)); munmap(map, 4096); object[1].relocs_ptr = (uintptr_t)reloc; object[1].relocation_count = r - reloc; memset(&execbuf, 0, sizeof(execbuf)); execbuf.buffers_ptr = (unsigned long)object; execbuf.buffer_count = 2; if (gen >= 6) execbuf.flags = I915_EXEC_BLT; gem_execbuf(fd, &execbuf); __gem_busy(fd, object[0].handle, &read, &write); igt_assert_eq(read, 1 << write); igt_assert_eq(write, gen >= 6 ? I915_EXEC_BLT : I915_EXEC_RENDER); igt_debug("Created busy handle %d\n", object[0].handle); gem_close(fd, object[1].handle); return object[0].handle; }
static int run(int object, int batch, int time, int reps) { struct drm_i915_gem_execbuffer2 execbuf; struct drm_i915_gem_exec_object2 exec[3]; struct drm_i915_gem_relocation_entry *reloc; uint32_t *buf, handle, src, dst; int fd, len, gen, size, nreloc; int ring, count; size = ALIGN(batch * 64, 4096); reloc = malloc(sizeof(*reloc)*size/32*2); fd = drm_open_driver(DRIVER_INTEL); handle = gem_create(fd, size); buf = gem_mmap__cpu(fd, handle, 0, size, PROT_WRITE); gen = intel_gen(intel_get_drm_devid(fd)); has_64bit_reloc = gen >= 8; src = gem_create(fd, object); dst = gem_create(fd, object); len = gem_linear_blt(fd, buf, 0, 0, 1, object, reloc); if (has_64bit_reloc) nreloc = len > 56 ? 4 : 2; else nreloc = len > 40 ? 4 : 2; memset(exec, 0, sizeof(exec)); exec[0].handle = src; exec[1].handle = dst; exec[2].handle = handle; exec[2].relocs_ptr = (uintptr_t)reloc; exec[2].relocation_count = nreloc; ring = 0; if (gen >= 6) ring = I915_EXEC_BLT; memset(&execbuf, 0, sizeof(execbuf)); execbuf.buffers_ptr = (uintptr_t)exec; execbuf.buffer_count = 3; execbuf.batch_len = len; execbuf.flags = ring; execbuf.flags |= LOCAL_I915_EXEC_HANDLE_LUT; if (__gem_execbuf(fd, &execbuf)) { gem_set_domain(fd, handle, I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU); len = gem_linear_blt(fd, buf, 0, src, dst, object, reloc); igt_assert(len == execbuf.batch_len); execbuf.flags = ring; gem_execbuf(fd, &execbuf); } gem_sync(fd, handle); if (batch > 1) { if (execbuf.flags & LOCAL_I915_EXEC_HANDLE_LUT) { src = 0; dst = 1; } gem_set_domain(fd, handle, I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU); for (int i = 1; i < batch; i++) { len = gem_linear_blt(fd, buf, len - 8, src, dst, object, reloc + nreloc * i); } exec[2].relocation_count = nreloc * batch; execbuf.batch_len = len; gem_execbuf(fd, &execbuf); gem_sync(fd, handle); } if (execbuf.flags & LOCAL_I915_EXEC_HANDLE_LUT) execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC; /* Guess how many loops we need for 0.1s */ count = baseline((uint64_t)object * batch, 100); while (reps--) { double min = HUGE_VAL; for (int s = 0; s <= time / 100; s++) { struct timespec start, end; double t; clock_gettime(CLOCK_MONOTONIC, &start); for (int loop = 0; loop < count; loop++) gem_execbuf(fd, &execbuf); gem_sync(fd, handle); clock_gettime(CLOCK_MONOTONIC, &end); t = elapsed(&start, &end); if (t < min) min = t; } printf("%7.3f\n", object/(1024*1024.)*batch*count/min); } close(fd); return 0; }