static void make_busy(int fd, uint32_t handle) { struct drm_i915_gem_execbuffer2 execbuf; struct drm_i915_gem_exec_object2 obj[2]; struct drm_i915_gem_relocation_entry reloc[2]; uint32_t batch[20]; uint32_t tmp; int count; tmp = gem_create(fd, 1024*1024); obj[0].handle = tmp; obj[0].relocation_count = 0; obj[0].relocs_ptr = 0; obj[0].alignment = 0; obj[0].offset = 0; obj[0].flags = 0; obj[0].rsvd1 = 0; obj[0].rsvd2 = 0; obj[1].handle = handle; obj[1].relocation_count = 2; obj[1].relocs_ptr = (uintptr_t) reloc; obj[1].alignment = 0; obj[1].offset = 0; obj[1].flags = 0; obj[1].rsvd1 = 0; obj[1].rsvd2 = 0; execbuf.buffers_ptr = (uintptr_t)obj; execbuf.buffer_count = 2; execbuf.batch_start_offset = 0; execbuf.batch_len = gem_linear_blt(fd, batch, tmp, tmp, 1024*1024,reloc); execbuf.cliprects_ptr = 0; execbuf.num_cliprects = 0; execbuf.DR1 = 0; execbuf.DR4 = 0; execbuf.flags = 0; if (HAS_BLT_RING(intel_get_drm_devid(fd))) execbuf.flags |= I915_EXEC_BLT; i915_execbuffer2_set_context_id(execbuf, 0); execbuf.rsvd2 = 0; gem_write(fd, handle, 0, batch, execbuf.batch_len); for (count = 0; count < 10; count++) do_ioctl(fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &execbuf); gem_close(fd, tmp); }
static int run(int object, int batch, int time, int reps) { struct drm_i915_gem_execbuffer2 execbuf; struct drm_i915_gem_exec_object2 exec[3]; struct drm_i915_gem_relocation_entry *reloc; uint32_t *buf, handle, src, dst; int fd, len, gen, size, nreloc; int ring, count; size = ALIGN(batch * 64, 4096); reloc = malloc(sizeof(*reloc)*size/32*2); fd = drm_open_driver(DRIVER_INTEL); handle = gem_create(fd, size); buf = gem_mmap__cpu(fd, handle, 0, size, PROT_WRITE); gen = intel_gen(intel_get_drm_devid(fd)); has_64bit_reloc = gen >= 8; src = gem_create(fd, object); dst = gem_create(fd, object); len = gem_linear_blt(fd, buf, 0, 0, 1, object, reloc); if (has_64bit_reloc) nreloc = len > 56 ? 4 : 2; else nreloc = len > 40 ? 4 : 2; memset(exec, 0, sizeof(exec)); exec[0].handle = src; exec[1].handle = dst; exec[2].handle = handle; exec[2].relocs_ptr = (uintptr_t)reloc; exec[2].relocation_count = nreloc; ring = 0; if (gen >= 6) ring = I915_EXEC_BLT; memset(&execbuf, 0, sizeof(execbuf)); execbuf.buffers_ptr = (uintptr_t)exec; execbuf.buffer_count = 3; execbuf.batch_len = len; execbuf.flags = ring; execbuf.flags |= LOCAL_I915_EXEC_HANDLE_LUT; if (__gem_execbuf(fd, &execbuf)) { gem_set_domain(fd, handle, I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU); len = gem_linear_blt(fd, buf, 0, src, dst, object, reloc); igt_assert(len == execbuf.batch_len); execbuf.flags = ring; gem_execbuf(fd, &execbuf); } gem_sync(fd, handle); if (batch > 1) { if (execbuf.flags & LOCAL_I915_EXEC_HANDLE_LUT) { src = 0; dst = 1; } gem_set_domain(fd, handle, I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU); for (int i = 1; i < batch; i++) { len = gem_linear_blt(fd, buf, len - 8, src, dst, object, reloc + nreloc * i); } exec[2].relocation_count = nreloc * batch; execbuf.batch_len = len; gem_execbuf(fd, &execbuf); gem_sync(fd, handle); } if (execbuf.flags & LOCAL_I915_EXEC_HANDLE_LUT) execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC; /* Guess how many loops we need for 0.1s */ count = baseline((uint64_t)object * batch, 100); while (reps--) { double min = HUGE_VAL; for (int s = 0; s <= time / 100; s++) { struct timespec start, end; double t; clock_gettime(CLOCK_MONOTONIC, &start); for (int loop = 0; loop < count; loop++) gem_execbuf(fd, &execbuf); gem_sync(fd, handle); clock_gettime(CLOCK_MONOTONIC, &end); t = elapsed(&start, &end); if (t < min) min = t; } printf("%7.3f\n", object/(1024*1024.)*batch*count/min); } close(fd); return 0; }
static void run(int object_size) { struct drm_i915_gem_execbuffer2 execbuf; struct drm_i915_gem_exec_object2 exec[3]; struct drm_i915_gem_relocation_entry reloc[4]; uint32_t buf[20]; uint32_t handle, src, dst; int fd, len, count; int ring; fd = drm_open_any(); handle = gem_create(fd, 4096); src = gem_create(fd, object_size); dst = gem_create(fd, object_size); len = gem_linear_blt(buf, src, dst, object_size, reloc); gem_write(fd, handle, 0, buf, len); exec[0].handle = src; exec[0].relocation_count = 0; exec[0].relocs_ptr = 0; exec[0].alignment = 0; exec[0].offset = 0; exec[0].flags = 0; exec[0].rsvd1 = 0; exec[0].rsvd2 = 0; exec[1].handle = dst; exec[1].relocation_count = 0; exec[1].relocs_ptr = 0; exec[1].alignment = 0; exec[1].offset = 0; exec[1].flags = 0; exec[1].rsvd1 = 0; exec[1].rsvd2 = 0; exec[2].handle = handle; exec[2].relocation_count = len > 40 ? 4 : 2; exec[2].relocs_ptr = (uintptr_t)reloc; exec[2].alignment = 0; exec[2].offset = 0; exec[2].flags = 0; exec[2].rsvd1 = 0; exec[2].rsvd2 = 0; ring = 0; if (HAS_BLT_RING(intel_get_drm_devid(fd))) ring = I915_EXEC_BLT; execbuf.buffers_ptr = (uintptr_t)exec; execbuf.buffer_count = 3; execbuf.batch_start_offset = 0; execbuf.batch_len = len; execbuf.cliprects_ptr = 0; execbuf.num_cliprects = 0; execbuf.DR1 = 0; execbuf.DR4 = 0; execbuf.flags = ring; i915_execbuffer2_set_context_id(execbuf, 0); execbuf.rsvd2 = 0; for (count = 1; count <= 1<<17; count <<= 1) { struct timeval start, end; gettimeofday(&start, NULL); if (gem_exec(fd, &execbuf, count)) exit(1); gem_sync(fd, handle); gettimeofday(&end, NULL); printf("Time to blt %d bytes x %6d: %7.3fµs, %s\n", object_size, count, elapsed(&start, &end, count), bytes_per_sec((char *)buf, object_size/elapsed(&start, &end, count)*1e6)); fflush(stdout); } gem_close(fd, handle); close(fd); }