static int loop(unsigned ring, int reps, unsigned flags) { struct drm_i915_gem_execbuffer2 execbuf; struct drm_i915_gem_exec_object2 gem_exec; int fd; fd = drm_open_driver(DRIVER_INTEL); memset(&gem_exec, 0, sizeof(gem_exec)); gem_exec.handle = batch(fd); memset(&execbuf, 0, sizeof(execbuf)); execbuf.buffers_ptr = (uintptr_t)&gem_exec; execbuf.buffer_count = 1; execbuf.flags = ring; execbuf.flags |= LOCAL_I915_EXEC_HANDLE_LUT; execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC; if (__gem_execbuf(fd, &execbuf)) { execbuf.flags = ring; if (__gem_execbuf(fd, &execbuf)) return 77; } while (reps--) { struct timespec start, end; unsigned count = 0; gem_set_domain(fd, gem_exec.handle, I915_GEM_DOMAIN_GTT, 0); sleep(1); /* wait for the hw to go back to sleep */ clock_gettime(CLOCK_MONOTONIC, &start); do { do_ioctl(fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &execbuf); count++; if (flags & SYNC) gem_sync(fd, gem_exec.handle); clock_gettime(CLOCK_MONOTONIC, &end); } while (elapsed(&start, &end) < 2.); gem_sync(fd, gem_exec.handle); clock_gettime(CLOCK_MONOTONIC, &end); printf("%7.3f\n", 1e6*elapsed(&start, &end)/count); } return 0; }
static void run_test(int fd, unsigned ring, unsigned flags) { const int gen = intel_gen(intel_get_drm_devid(fd)); const uint32_t bbe = MI_BATCH_BUFFER_END; struct drm_i915_gem_exec_object2 obj[2]; struct drm_i915_gem_relocation_entry reloc[1024]; struct drm_i915_gem_execbuffer2 execbuf; struct igt_hang_ring hang; uint32_t *batch, *b; int i; gem_require_ring(fd, ring); igt_skip_on_f(gen == 6 && (ring & ~(3<<13)) == I915_EXEC_BSD, "MI_STORE_DATA broken on gen6 bsd\n"); gem_quiescent_gpu(fd); memset(&execbuf, 0, sizeof(execbuf)); execbuf.buffers_ptr = (uintptr_t)obj; execbuf.buffer_count = 2; execbuf.flags = ring | (1 << 11); if (gen < 6) execbuf.flags |= I915_EXEC_SECURE; memset(obj, 0, sizeof(obj)); obj[0].handle = gem_create(fd, 4096); obj[0].flags |= EXEC_OBJECT_WRITE; obj[1].handle = gem_create(fd, 1024*16 + 4096); gem_write(fd, obj[1].handle, 0, &bbe, sizeof(bbe)); igt_require(__gem_execbuf(fd, &execbuf) == 0); obj[1].relocs_ptr = (uintptr_t)reloc; obj[1].relocation_count = 1024; batch = gem_mmap__cpu(fd, obj[1].handle, 0, 16*1024 + 4096, PROT_WRITE | PROT_READ); gem_set_domain(fd, obj[1].handle, I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU); memset(reloc, 0, sizeof(reloc)); b = batch; for (i = 0; i < 1024; i++) { uint64_t offset; reloc[i].target_handle = obj[0].handle; reloc[i].presumed_offset = obj[0].offset; reloc[i].offset = (b - batch + 1) * sizeof(*batch); reloc[i].delta = i * sizeof(uint32_t); reloc[i].read_domains = I915_GEM_DOMAIN_INSTRUCTION; reloc[i].write_domain = I915_GEM_DOMAIN_INSTRUCTION; offset = obj[0].offset + reloc[i].delta; *b++ = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0); if (gen >= 8) { *b++ = offset; *b++ = offset >> 32; } else if (gen >= 4) {
static void *gem_busyspin(void *arg) { const uint32_t bbe = MI_BATCH_BUFFER_END; struct gem_busyspin *bs = arg; struct drm_i915_gem_execbuffer2 execbuf; struct drm_i915_gem_exec_object2 obj; unsigned engines[16]; unsigned nengine; unsigned engine; int fd; fd = drm_open_driver(DRIVER_INTEL); nengine = 0; for_each_engine(fd, engine) if (!ignore_engine(fd, engine)) engines[nengine++] = engine; memset(&obj, 0, sizeof(obj)); obj.handle = gem_create(fd, 4096); gem_write(fd, obj.handle, 0, &bbe, sizeof(bbe)); memset(&execbuf, 0, sizeof(execbuf)); execbuf.buffers_ptr = (uintptr_t)&obj; execbuf.buffer_count = 1; execbuf.flags |= LOCAL_I915_EXEC_HANDLE_LUT; execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC; if (__gem_execbuf(fd, &execbuf)) { execbuf.flags = 0; gem_execbuf(fd, &execbuf); } while (!done) { for (int n = 0; n < nengine; n++) { execbuf.flags &= ~ENGINE_FLAGS; execbuf.flags |= engines[n]; gem_execbuf(fd, &execbuf); } bs->count += nengine; } close(fd); return NULL; }
static int has_engine(int fd, const struct intel_execution_engine *e) { uint32_t bbe = MI_BATCH_BUFFER_END; struct drm_i915_gem_execbuffer2 execbuf; struct drm_i915_gem_exec_object2 exec; int ret; memset(&exec, 0, sizeof(exec)); exec.handle = gem_create(fd, 4096); gem_write(fd, exec.handle, 0, &bbe, sizeof(bbe)); memset(&execbuf, 0, sizeof(execbuf)); execbuf.buffers_ptr = (uintptr_t)&exec; execbuf.buffer_count = 1; execbuf.flags = e->exec_id | e->flags; ret = __gem_execbuf(fd, &execbuf); gem_close(fd, exec.handle); return ret == 0; }
static void make_busy(int fd, uint32_t handle) { struct drm_i915_gem_execbuffer2 execbuf; struct drm_i915_gem_exec_object2 gem_exec; const uint32_t buf[] = {MI_BATCH_BUFFER_END}; gem_write(fd, handle, 0, buf, sizeof(buf)); memset(&gem_exec, 0, sizeof(gem_exec)); gem_exec.handle = handle; memset(&execbuf, 0, sizeof(execbuf)); execbuf.buffers_ptr = (uintptr_t)&gem_exec; execbuf.buffer_count = 1; execbuf.flags |= LOCAL_I915_EXEC_HANDLE_LUT; execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC; if (__gem_execbuf(fd, &execbuf)) { execbuf.flags = 0; gem_execbuf(fd, &execbuf); } }
static void test_execbuf(int fd) { struct drm_i915_gem_execbuffer2 execbuf; struct drm_i915_gem_exec_object2 exec; uint32_t tmp[] = { MI_BATCH_BUFFER_END }; memset(&exec, 0, sizeof(exec)); memset(&execbuf, 0, sizeof(execbuf)); exec.handle = gem_create(fd, 4096); gem_write(fd, exec.handle, 0, tmp, sizeof(tmp)); execbuf.buffers_ptr = (uintptr_t)&exec; execbuf.buffer_count = 1; wedge_gpu(fd); igt_assert_eq(__gem_execbuf(fd, &execbuf), -EIO); gem_close(fd, exec.handle); trigger_reset(fd); }
static bool exec_noop(int fd, uint32_t *handles, unsigned ring, bool write) { struct drm_i915_gem_execbuffer2 execbuf; struct drm_i915_gem_exec_object2 exec[3]; memset(exec, 0, sizeof(exec)); exec[0].handle = handles[BUSY]; exec[1].handle = handles[TEST]; if (write) exec[1].flags |= EXEC_OBJECT_WRITE; exec[2].handle = handles[BATCH]; memset(&execbuf, 0, sizeof(execbuf)); execbuf.buffers_ptr = (uintptr_t)exec; execbuf.buffer_count = 3; execbuf.flags = ring; igt_debug("Queuing handle for %s on ring %d\n", write ? "writing" : "reading", ring & 0x7); return __gem_execbuf(fd, &execbuf) == 0; }
static void run_test(int fd, int num_fences, int expected_errno, unsigned flags) { struct drm_i915_gem_execbuffer2 execbuf[2]; struct drm_i915_gem_exec_object2 exec[2][2*MAX_FENCES+3]; struct drm_i915_gem_relocation_entry reloc[2*MAX_FENCES+2]; int i, n; int loop = 1000; if (flags & BUSY_LOAD) { bufmgr = drm_intel_bufmgr_gem_init(fd, 4096); batch = intel_batchbuffer_alloc(bufmgr, devid); /* Takes forever otherwise. */ loop = 50; } if (flags & INTERRUPTIBLE) igt_fork_signal_helper(); memset(execbuf, 0, sizeof(execbuf)); memset(exec, 0, sizeof(exec)); memset(reloc, 0, sizeof(reloc)); for (n = 0; n < 2*num_fences; n++) { uint32_t handle = tiled_bo_create(fd); exec[1][2*num_fences - n-1].handle = exec[0][n].handle = handle; fill_reloc(&reloc[n], handle); } for (i = 0; i < 2; i++) { for (n = 0; n < num_fences; n++) exec[i][n].flags = EXEC_OBJECT_NEEDS_FENCE; exec[i][2*num_fences].handle = batch_create(fd); exec[i][2*num_fences].relocs_ptr = (uintptr_t)reloc; exec[i][2*num_fences].relocation_count = 2*num_fences; execbuf[i].buffers_ptr = (uintptr_t)exec[i]; execbuf[i].buffer_count = 2*num_fences+1; execbuf[i].batch_len = 2*sizeof(uint32_t); } do { if (flags & BUSY_LOAD) emit_dummy_load(); igt_assert_eq(__gem_execbuf(fd, &execbuf[0]), expected_errno); igt_assert_eq(__gem_execbuf(fd, &execbuf[1]), expected_errno); } while (--loop); if (flags & INTERRUPTIBLE) igt_stop_signal_helper(); /* Cleanup */ for (n = 0; n < 2*num_fences; n++) gem_close(fd, exec[0][n].handle); for (i = 0; i < 2; i++) gem_close(fd, exec[i][2*num_fences].handle); if (flags & BUSY_LOAD) { intel_batchbuffer_free(batch); drm_intel_bufmgr_destroy(bufmgr); } }
static int run(int object, int batch, int time, int reps) { struct drm_i915_gem_execbuffer2 execbuf; struct drm_i915_gem_exec_object2 exec[3]; struct drm_i915_gem_relocation_entry *reloc; uint32_t *buf, handle, src, dst; int fd, len, gen, size, nreloc; int ring, count; size = ALIGN(batch * 64, 4096); reloc = malloc(sizeof(*reloc)*size/32*2); fd = drm_open_driver(DRIVER_INTEL); handle = gem_create(fd, size); buf = gem_mmap__cpu(fd, handle, 0, size, PROT_WRITE); gen = intel_gen(intel_get_drm_devid(fd)); has_64bit_reloc = gen >= 8; src = gem_create(fd, object); dst = gem_create(fd, object); len = gem_linear_blt(fd, buf, 0, 0, 1, object, reloc); if (has_64bit_reloc) nreloc = len > 56 ? 4 : 2; else nreloc = len > 40 ? 4 : 2; memset(exec, 0, sizeof(exec)); exec[0].handle = src; exec[1].handle = dst; exec[2].handle = handle; exec[2].relocs_ptr = (uintptr_t)reloc; exec[2].relocation_count = nreloc; ring = 0; if (gen >= 6) ring = I915_EXEC_BLT; memset(&execbuf, 0, sizeof(execbuf)); execbuf.buffers_ptr = (uintptr_t)exec; execbuf.buffer_count = 3; execbuf.batch_len = len; execbuf.flags = ring; execbuf.flags |= LOCAL_I915_EXEC_HANDLE_LUT; if (__gem_execbuf(fd, &execbuf)) { gem_set_domain(fd, handle, I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU); len = gem_linear_blt(fd, buf, 0, src, dst, object, reloc); igt_assert(len == execbuf.batch_len); execbuf.flags = ring; gem_execbuf(fd, &execbuf); } gem_sync(fd, handle); if (batch > 1) { if (execbuf.flags & LOCAL_I915_EXEC_HANDLE_LUT) { src = 0; dst = 1; } gem_set_domain(fd, handle, I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU); for (int i = 1; i < batch; i++) { len = gem_linear_blt(fd, buf, len - 8, src, dst, object, reloc + nreloc * i); } exec[2].relocation_count = nreloc * batch; execbuf.batch_len = len; gem_execbuf(fd, &execbuf); gem_sync(fd, handle); } if (execbuf.flags & LOCAL_I915_EXEC_HANDLE_LUT) execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC; /* Guess how many loops we need for 0.1s */ count = baseline((uint64_t)object * batch, 100); while (reps--) { double min = HUGE_VAL; for (int s = 0; s <= time / 100; s++) { struct timespec start, end; double t; clock_gettime(CLOCK_MONOTONIC, &start); for (int loop = 0; loop < count; loop++) gem_execbuf(fd, &execbuf); gem_sync(fd, handle); clock_gettime(CLOCK_MONOTONIC, &end); t = elapsed(&start, &end); if (t < min) min = t; } printf("%7.3f\n", object/(1024*1024.)*batch*count/min); } close(fd); return 0; }
static void copy(int fd, uint32_t dst, uint32_t src, unsigned int error) { uint32_t batch[12]; struct drm_i915_gem_relocation_entry reloc[2]; struct drm_i915_gem_exec_object2 obj[3]; struct drm_i915_gem_execbuffer2 exec; uint32_t handle; int ret, i=0; batch[i++] = XY_SRC_COPY_BLT_CMD | XY_SRC_COPY_BLT_WRITE_ALPHA | XY_SRC_COPY_BLT_WRITE_RGB; if (intel_gen(intel_get_drm_devid(fd)) >= 8) batch[i - 1] |= 8; else batch[i - 1] |= 6; batch[i++] = (3 << 24) | /* 32 bits */ (0xcc << 16) | /* copy ROP */ WIDTH*4; batch[i++] = 0; /* dst x1,y1 */ batch[i++] = (HEIGHT << 16) | WIDTH; /* dst x2,y2 */ batch[i++] = 0; /* dst reloc */ if (intel_gen(intel_get_drm_devid(fd)) >= 8) batch[i++] = 0; batch[i++] = 0; /* src x1,y1 */ batch[i++] = WIDTH*4; batch[i++] = 0; /* src reloc */ if (intel_gen(intel_get_drm_devid(fd)) >= 8) batch[i++] = 0; batch[i++] = MI_BATCH_BUFFER_END; batch[i++] = MI_NOOP; handle = gem_create(fd, 4096); gem_write(fd, handle, 0, batch, sizeof(batch)); reloc[0].target_handle = dst; reloc[0].delta = 0; reloc[0].offset = 4 * sizeof(batch[0]); reloc[0].presumed_offset = 0; reloc[0].read_domains = I915_GEM_DOMAIN_RENDER; reloc[0].write_domain = I915_GEM_DOMAIN_RENDER; reloc[1].target_handle = src; reloc[1].delta = 0; reloc[1].offset = 7 * sizeof(batch[0]); if (intel_gen(intel_get_drm_devid(fd)) >= 8) reloc[1].offset += sizeof(batch[0]); reloc[1].presumed_offset = 0; reloc[1].read_domains = I915_GEM_DOMAIN_RENDER; reloc[1].write_domain = 0; memset(&exec, 0, sizeof(exec)); memset(obj, 0, sizeof(obj)); obj[exec.buffer_count++].handle = dst; if (src != dst) obj[exec.buffer_count++].handle = src; obj[exec.buffer_count].handle = handle; obj[exec.buffer_count].relocation_count = 2; obj[exec.buffer_count].relocs_ptr = (uintptr_t)reloc; exec.buffer_count++; exec.buffers_ptr = (uintptr_t)obj; exec.flags = HAS_BLT_RING(intel_get_drm_devid(fd)) ? I915_EXEC_BLT : 0; ret = __gem_execbuf(fd, &exec); gem_close(fd, handle); if (error == ~0) igt_assert_neq(ret, 0); else igt_assert_eq(ret, -error); }
static int blit(int fd, uint32_t dst, uint32_t src, uint32_t *all_bo, int n_bo) { uint32_t batch[12]; struct drm_i915_gem_relocation_entry reloc[2]; struct drm_i915_gem_exec_object2 *obj; struct drm_i915_gem_execbuffer2 exec; uint32_t handle; int n, ret, i=0; batch[i++] = XY_SRC_COPY_BLT_CMD | XY_SRC_COPY_BLT_WRITE_ALPHA | XY_SRC_COPY_BLT_WRITE_RGB; if (intel_gen(intel_get_drm_devid(fd)) >= 8) batch[i - 1] |= 8; else batch[i - 1] |= 6; batch[i++] = (3 << 24) | /* 32 bits */ (0xcc << 16) | /* copy ROP */ WIDTH*4; batch[i++] = 0; /* dst x1,y1 */ batch[i++] = (HEIGHT << 16) | WIDTH; /* dst x2,y2 */ batch[i++] = 0; /* dst reloc */ if (intel_gen(intel_get_drm_devid(fd)) >= 8) batch[i++] = 0; batch[i++] = 0; /* src x1,y1 */ batch[i++] = WIDTH*4; batch[i++] = 0; /* src reloc */ if (intel_gen(intel_get_drm_devid(fd)) >= 8) batch[i++] = 0; batch[i++] = MI_BATCH_BUFFER_END; batch[i++] = MI_NOOP; handle = gem_create(fd, 4096); gem_write(fd, handle, 0, batch, sizeof(batch)); reloc[0].target_handle = dst; reloc[0].delta = 0; reloc[0].offset = 4 * sizeof(batch[0]); reloc[0].presumed_offset = 0; reloc[0].read_domains = I915_GEM_DOMAIN_RENDER; reloc[0].write_domain = I915_GEM_DOMAIN_RENDER; reloc[1].target_handle = src; reloc[1].delta = 0; reloc[1].offset = 7 * sizeof(batch[0]); if (intel_gen(intel_get_drm_devid(fd)) >= 8) reloc[1].offset += sizeof(batch[0]); reloc[1].presumed_offset = 0; reloc[1].read_domains = I915_GEM_DOMAIN_RENDER; reloc[1].write_domain = 0; memset(&exec, 0, sizeof(exec)); obj = calloc(n_bo + 1, sizeof(*obj)); for (n = 0; n < n_bo; n++) obj[n].handle = all_bo[n]; obj[n].handle = handle; obj[n].relocation_count = 2; obj[n].relocs_ptr = (uintptr_t)reloc; exec.buffers_ptr = (uintptr_t)obj; exec.buffer_count = n_bo + 1; exec.flags = HAS_BLT_RING(intel_get_drm_devid(fd)) ? I915_EXEC_BLT : 0; ret = __gem_execbuf(fd, &exec); gem_close(fd, handle); free(obj); return ret; }