static uint64_t submit_batch(int fd, unsigned ring_id) { const uint32_t batch[] = { MI_NOOP, MI_BATCH_BUFFER_END }; struct drm_i915_gem_execbuffer2 execbuf; struct drm_i915_gem_exec_object2 exec; uint64_t presumed_offset; gem_require_ring(fd, ring_id); exec.handle = gem_create(fd, 4096); gem_write(fd, exec.handle, 0, batch, sizeof(batch)); exec.relocation_count = 0; exec.relocs_ptr = 0; exec.alignment = 0; exec.offset = 0; exec.flags = 0; exec.rsvd1 = 0; exec.rsvd2 = 0; execbuf.buffers_ptr = (uintptr_t)&exec; execbuf.buffer_count = 1; execbuf.batch_start_offset = 0; execbuf.batch_len = sizeof(batch); execbuf.cliprects_ptr = 0; execbuf.num_cliprects = 0; execbuf.DR1 = 0; execbuf.DR4 = 0; execbuf.flags = ring_id; i915_execbuffer2_set_context_id(execbuf, 0); execbuf.rsvd2 = 0; gem_execbuf(fd, &execbuf); gem_sync(fd, exec.handle); presumed_offset = exec.offset; igt_set_stop_rings(igt_to_stop_ring_flag(ring_id)); gem_execbuf(fd, &execbuf); gem_sync(fd, exec.handle); igt_assert(igt_get_stop_rings() == STOP_RING_NONE); igt_assert(presumed_offset == exec.offset); gem_close(fd, exec.handle); return exec.offset; }
static void destroy_cairo_surface__blit(void *arg) { struct fb_blit_upload *blit = arg; struct igt_fb *fb = blit->fb; unsigned int obj_tiling = fb_mod_to_obj_tiling(fb->tiling); munmap(blit->linear.map, blit->linear.size); fb->cairo_surface = NULL; gem_set_domain(blit->fd, blit->linear.handle, I915_GEM_DOMAIN_GTT, 0); igt_blitter_fast_copy__raw(blit->fd, blit->linear.handle, blit->linear.stride, I915_TILING_NONE, 0, 0, /* src_x, src_y */ fb->width, fb->height, fb->gem_handle, fb->stride, obj_tiling, 0, 0 /* dst_x, dst_y */); gem_sync(blit->fd, blit->linear.handle); gem_close(blit->fd, blit->linear.handle); free(blit); }
static void exec1(int fd, uint32_t handle, uint64_t reloc_ofs, unsigned flags, char *ptr) { struct drm_i915_gem_execbuffer2 execbuf; struct drm_i915_gem_exec_object2 gem_exec[1]; struct drm_i915_gem_relocation_entry gem_reloc[1]; gem_reloc[0].offset = reloc_ofs; gem_reloc[0].delta = 0; gem_reloc[0].target_handle = handle; gem_reloc[0].read_domains = I915_GEM_DOMAIN_RENDER; gem_reloc[0].write_domain = 0; gem_reloc[0].presumed_offset = 0; gem_exec[0].handle = handle; gem_exec[0].relocation_count = 1; gem_exec[0].relocs_ptr = (uintptr_t) gem_reloc; gem_exec[0].alignment = 0; gem_exec[0].offset = 0; gem_exec[0].flags = 0; gem_exec[0].rsvd1 = 0; gem_exec[0].rsvd2 = 0; execbuf.buffers_ptr = (uintptr_t)gem_exec; execbuf.buffer_count = 1; execbuf.batch_start_offset = 0; execbuf.batch_len = 8; execbuf.cliprects_ptr = 0; execbuf.num_cliprects = 0; execbuf.DR1 = 0; execbuf.DR4 = 0; execbuf.flags = flags; i915_execbuffer2_set_context_id(execbuf, 0); execbuf.rsvd2 = 0; /* Avoid hitting slowpaths in the reloc processing which might yield a * presumed_offset of -1. Happens when the batch is still busy from the * last round. */ gem_sync(fd, handle); gem_execbuf(fd, &execbuf); igt_warn_on(gem_reloc[0].presumed_offset == -1); if (use_64bit_relocs) { uint64_t tmp; if (ptr) tmp = *(uint64_t *)(ptr+reloc_ofs); else gem_read(fd, handle, reloc_ofs, &tmp, sizeof(tmp)); igt_assert_eq(tmp, gem_reloc[0].presumed_offset); } else { uint32_t tmp; if (ptr) tmp = *(uint32_t *)(ptr+reloc_ofs); else gem_read(fd, handle, reloc_ofs, &tmp, sizeof(tmp)); igt_assert_eq(tmp, gem_reloc[0].presumed_offset); } }
static int loop(unsigned ring, int reps, unsigned flags) { struct drm_i915_gem_execbuffer2 execbuf; struct drm_i915_gem_exec_object2 gem_exec; int fd; fd = drm_open_driver(DRIVER_INTEL); memset(&gem_exec, 0, sizeof(gem_exec)); gem_exec.handle = batch(fd); memset(&execbuf, 0, sizeof(execbuf)); execbuf.buffers_ptr = (uintptr_t)&gem_exec; execbuf.buffer_count = 1; execbuf.flags = ring; execbuf.flags |= LOCAL_I915_EXEC_HANDLE_LUT; execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC; if (__gem_execbuf(fd, &execbuf)) { execbuf.flags = ring; if (__gem_execbuf(fd, &execbuf)) return 77; } while (reps--) { struct timespec start, end; unsigned count = 0; gem_set_domain(fd, gem_exec.handle, I915_GEM_DOMAIN_GTT, 0); sleep(1); /* wait for the hw to go back to sleep */ clock_gettime(CLOCK_MONOTONIC, &start); do { do_ioctl(fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &execbuf); count++; if (flags & SYNC) gem_sync(fd, gem_exec.handle); clock_gettime(CLOCK_MONOTONIC, &end); } while (elapsed(&start, &end) < 2.); gem_sync(fd, gem_exec.handle); clock_gettime(CLOCK_MONOTONIC, &end); printf("%7.3f\n", 1e6*elapsed(&start, &end)/count); } return 0; }
static void test_ring(int fd, unsigned ring, uint32_t flags) { uint32_t bbe = MI_BATCH_BUFFER_END; uint32_t handle[3]; uint32_t read, write; uint32_t active; unsigned i; gem_require_ring(fd, ring | flags); handle[TEST] = gem_create(fd, 4096); handle[BATCH] = gem_create(fd, 4096); gem_write(fd, handle[BATCH], 0, &bbe, sizeof(bbe)); /* Create a long running batch which we can use to hog the GPU */ handle[BUSY] = busy_blt(fd); /* Queue a batch after the busy, it should block and remain "busy" */ igt_assert(exec_noop(fd, handle, ring | flags, false)); igt_assert(still_busy(fd, handle[BUSY])); __gem_busy(fd, handle[TEST], &read, &write); igt_assert_eq(read, 1 << ring); igt_assert_eq(write, 0); /* Requeue with a write */ igt_assert(exec_noop(fd, handle, ring | flags, true)); igt_assert(still_busy(fd, handle[BUSY])); __gem_busy(fd, handle[TEST], &read, &write); igt_assert_eq(read, 1 << ring); igt_assert_eq(write, ring); /* Now queue it for a read across all available rings */ active = 0; for (i = I915_EXEC_RENDER; i <= I915_EXEC_VEBOX; i++) { if (exec_noop(fd, handle, i | flags, false)) active |= 1 << i; } igt_assert(still_busy(fd, handle[BUSY])); __gem_busy(fd, handle[TEST], &read, &write); igt_assert_eq(read, active); igt_assert_eq(write, ring); /* from the earlier write */ /* Check that our long batch was long enough */ igt_assert(still_busy(fd, handle[BUSY])); /* And make sure it becomes idle again */ gem_sync(fd, handle[TEST]); __gem_busy(fd, handle[TEST], &read, &write); igt_assert_eq(read, 0); igt_assert_eq(write, 0); for (i = TEST; i <= BATCH; i++) gem_close(fd, handle[i]); }
static void run_on_ring(int fd, unsigned ring_id, const char *ring_name) { uint32_t handle, handle_new; uint64_t gtt_offset, gtt_offset_new; uint32_t *batch_ptr, *batch_ptr_old; unsigned split; char buf[100]; int i; gem_require_ring(fd, ring_id); sprintf(buf, "testing %s cs tlb coherency: ", ring_name); /* Shut up gcc, too stupid. */ batch_ptr_old = NULL; handle = 0; gtt_offset = 0; for (split = 0; split < BATCH_SIZE/8 - 1; split += 2) { igt_progress(buf, split, BATCH_SIZE/8 - 1); handle_new = gem_create(fd, BATCH_SIZE); batch_ptr = gem_mmap__cpu(fd, handle_new, 0, BATCH_SIZE, PROT_READ | PROT_WRITE); batch_ptr[split*2] = MI_BATCH_BUFFER_END; for (i = split*2 + 2; i < BATCH_SIZE/8; i++) batch_ptr[i] = 0xffffffff; if (split > 0) { gem_sync(fd, handle); gem_close(fd, handle); } igt_assert_eq(exec(fd, handle_new, split, >t_offset_new, 0), 0); if (split > 0) { /* Check that we've managed to collide in the tlb. */ igt_assert(gtt_offset == gtt_offset_new); /* We hang onto the storage of the old batch by keeping * the cpu mmap around. */ munmap(batch_ptr_old, BATCH_SIZE); } handle = handle_new; gtt_offset = gtt_offset_new; batch_ptr_old = batch_ptr; } }
static void exec(int fd, uint32_t handle) { struct drm_i915_gem_execbuffer2 execbuf; struct drm_i915_gem_exec_object2 gem_exec[1]; memset(gem_exec, 0, sizeof(gem_exec)); gem_exec[0].handle = handle; memset(&execbuf, 0, sizeof(execbuf)); execbuf.buffers_ptr = (uintptr_t)gem_exec; execbuf.buffer_count = 1; execbuf.batch_start_offset = 0; execbuf.batch_len = 4096; gem_execbuf(fd, &execbuf); gem_sync(fd, handle); }
static int exec(int fd, uint32_t handle, int loops, unsigned ring_id) { struct drm_i915_gem_execbuffer2 execbuf; struct drm_i915_gem_exec_object2 gem_exec[1]; int ret = 0; gem_exec[0].handle = handle; gem_exec[0].relocation_count = 0; gem_exec[0].relocs_ptr = 0; gem_exec[0].alignment = 0; gem_exec[0].offset = 0; gem_exec[0].flags = 0; gem_exec[0].rsvd1 = 0; gem_exec[0].rsvd2 = 0; execbuf.buffers_ptr = (uintptr_t)gem_exec; execbuf.buffer_count = 1; execbuf.batch_start_offset = 0; execbuf.batch_len = 8; execbuf.cliprects_ptr = 0; execbuf.num_cliprects = 0; execbuf.DR1 = 0; execbuf.DR4 = 0; execbuf.flags = ring_id; i915_execbuffer2_set_context_id(execbuf, 0); execbuf.rsvd2 = 0; while (loops-- && ret == 0) { ret = drmIoctl(fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &execbuf); } gem_sync(fd, handle); return ret; }
int main(int argc, char **argv) { struct timeval start, end; uint8_t *buf; uint32_t handle; int size = OBJECT_SIZE; int loop, i, tiling; int fd; if (argc > 1) size = atoi(argv[1]); if (size == 0) { fprintf(stderr, "Invalid object size specified\n"); return 1; } buf = malloc(size); memset(buf, 0, size); fd = drm_open_any(); handle = gem_create(fd, size); assert(handle); for (tiling = I915_TILING_NONE; tiling <= I915_TILING_Y; tiling++) { if (tiling != I915_TILING_NONE) { printf("\nSetting tiling mode to %s\n", tiling == I915_TILING_X ? "X" : "Y"); gem_set_tiling(fd, handle, tiling, 512); } if (tiling == I915_TILING_NONE) { gem_set_domain(fd, handle, I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU); { uint32_t *base = gem_mmap__cpu(fd, handle, size, PROT_READ | PROT_WRITE); volatile uint32_t *ptr = base; int x = 0; for (i = 0; i < size/sizeof(*ptr); i++) x += ptr[i]; /* force overtly clever gcc to actually compute x */ ptr[0] = x; munmap(base, size); /* mmap read */ gettimeofday(&start, NULL); for (loop = 0; loop < 1000; loop++) { base = gem_mmap__cpu(fd, handle, size, PROT_READ | PROT_WRITE); ptr = base; x = 0; for (i = 0; i < size/sizeof(*ptr); i++) x += ptr[i]; /* force overtly clever gcc to actually compute x */ ptr[0] = x; munmap(base, size); } gettimeofday(&end, NULL); printf("Time to read %dk through a CPU map: %7.3fµs\n", size/1024, elapsed(&start, &end, loop)); /* mmap write */ gettimeofday(&start, NULL); for (loop = 0; loop < 1000; loop++) { base = gem_mmap__cpu(fd, handle, size, PROT_READ | PROT_WRITE); ptr = base; for (i = 0; i < size/sizeof(*ptr); i++) ptr[i] = i; munmap(base, size); } gettimeofday(&end, NULL); printf("Time to write %dk through a CPU map: %7.3fµs\n", size/1024, elapsed(&start, &end, loop)); gettimeofday(&start, NULL); for (loop = 0; loop < 1000; loop++) { base = gem_mmap__cpu(fd, handle, size, PROT_READ | PROT_WRITE); memset(base, 0, size); munmap(base, size); } gettimeofday(&end, NULL); printf("Time to clear %dk through a CPU map: %7.3fµs\n", size/1024, elapsed(&start, &end, loop)); gettimeofday(&start, NULL); base = gem_mmap__cpu(fd, handle, size, PROT_READ | PROT_WRITE); for (loop = 0; loop < 1000; loop++) memset(base, 0, size); munmap(base, size); gettimeofday(&end, NULL); printf("Time to clear %dk through a cached CPU map: %7.3fµs\n", size/1024, elapsed(&start, &end, loop)); } /* CPU pwrite */ gettimeofday(&start, NULL); for (loop = 0; loop < 1000; loop++) gem_write(fd, handle, 0, buf, size); gettimeofday(&end, NULL); printf("Time to pwrite %dk through the CPU: %7.3fµs\n", size/1024, elapsed(&start, &end, loop)); /* CPU pread */ gettimeofday(&start, NULL); for (loop = 0; loop < 1000; loop++) gem_read(fd, handle, 0, buf, size); gettimeofday(&end, NULL); printf("Time to pread %dk through the CPU: %7.3fµs\n", size/1024, elapsed(&start, &end, loop)); } /* prefault into gtt */ { uint32_t *base = gem_mmap(fd, handle, size, PROT_READ | PROT_WRITE); volatile uint32_t *ptr = base; int x = 0; for (i = 0; i < size/sizeof(*ptr); i++) x += ptr[i]; /* force overtly clever gcc to actually compute x */ ptr[0] = x; munmap(base, size); } /* mmap read */ gettimeofday(&start, NULL); for (loop = 0; loop < 1000; loop++) { uint32_t *base = gem_mmap(fd, handle, size, PROT_READ | PROT_WRITE); volatile uint32_t *ptr = base; int x = 0; for (i = 0; i < size/sizeof(*ptr); i++) x += ptr[i]; /* force overtly clever gcc to actually compute x */ ptr[0] = x; munmap(base, size); } gettimeofday(&end, NULL); printf("Time to read %dk through a GTT map: %7.3fµs\n", size/1024, elapsed(&start, &end, loop)); /* mmap write */ gettimeofday(&start, NULL); for (loop = 0; loop < 1000; loop++) { uint32_t *base = gem_mmap(fd, handle, size, PROT_READ | PROT_WRITE); volatile uint32_t *ptr = base; for (i = 0; i < size/sizeof(*ptr); i++) ptr[i] = i; munmap(base, size); } gettimeofday(&end, NULL); printf("Time to write %dk through a GTT map: %7.3fµs\n", size/1024, elapsed(&start, &end, loop)); /* mmap clear */ gettimeofday(&start, NULL); for (loop = 0; loop < 1000; loop++) { uint32_t *base = gem_mmap(fd, handle, size, PROT_READ | PROT_WRITE); memset(base, 0, size); munmap(base, size); } gettimeofday(&end, NULL); printf("Time to clear %dk through a GTT map: %7.3fµs\n", size/1024, elapsed(&start, &end, loop)); gettimeofday(&start, NULL);{ uint32_t *base = gem_mmap(fd, handle, size, PROT_READ | PROT_WRITE); for (loop = 0; loop < 1000; loop++) memset(base, 0, size); munmap(base, size); } gettimeofday(&end, NULL); printf("Time to clear %dk through a cached GTT map: %7.3fµs\n", size/1024, elapsed(&start, &end, loop)); /* mmap read */ gettimeofday(&start, NULL); for (loop = 0; loop < 1000; loop++) { uint32_t *base = gem_mmap(fd, handle, size, PROT_READ | PROT_WRITE); volatile uint32_t *ptr = base; int x = 0; for (i = 0; i < size/sizeof(*ptr); i++) x += ptr[i]; /* force overtly clever gcc to actually compute x */ ptr[0] = x; munmap(base, size); } gettimeofday(&end, NULL); printf("Time to read %dk (again) through a GTT map: %7.3fµs\n", size/1024, elapsed(&start, &end, loop)); if (tiling == I915_TILING_NONE) { /* GTT pwrite */ gettimeofday(&start, NULL); for (loop = 0; loop < 1000; loop++) gem_write(fd, handle, 0, buf, size); gettimeofday(&end, NULL); printf("Time to pwrite %dk through the GTT: %7.3fµs\n", size/1024, elapsed(&start, &end, loop)); /* GTT pread */ gettimeofday(&start, NULL); for (loop = 0; loop < 1000; loop++) gem_read(fd, handle, 0, buf, size); gettimeofday(&end, NULL); printf("Time to pread %dk through the GTT: %7.3fµs\n", size/1024, elapsed(&start, &end, loop)); /* GTT pwrite, including clflush */ gettimeofday(&start, NULL); for (loop = 0; loop < 1000; loop++) { gem_write(fd, handle, 0, buf, size); gem_sync(fd, handle); } gettimeofday(&end, NULL); printf("Time to pwrite %dk through the GTT (clflush): %7.3fµs\n", size/1024, elapsed(&start, &end, loop)); /* GTT pread, including clflush */ gettimeofday(&start, NULL); for (loop = 0; loop < 1000; loop++) { gem_sync(fd, handle); gem_read(fd, handle, 0, buf, size); } gettimeofday(&end, NULL); printf("Time to pread %dk through the GTT (clflush): %7.3fµs\n", size/1024, elapsed(&start, &end, loop)); /* partial writes */ printf("Now partial writes.\n"); size /= 4; /* partial GTT pwrite, including clflush */ gettimeofday(&start, NULL); for (loop = 0; loop < 1000; loop++) { gem_write(fd, handle, 0, buf, size); gem_sync(fd, handle); } gettimeofday(&end, NULL); printf("Time to pwrite %dk through the GTT (clflush): %7.3fµs\n", size/1024, elapsed(&start, &end, loop)); /* partial GTT pread, including clflush */ gettimeofday(&start, NULL); for (loop = 0; loop < 1000; loop++) { gem_sync(fd, handle); gem_read(fd, handle, 0, buf, size); } gettimeofday(&end, NULL); printf("Time to pread %dk through the GTT (clflush): %7.3fµs\n", size/1024, elapsed(&start, &end, loop)); size *= 4; } } gem_close(fd, handle); close(fd); return 0; }
static void big_exec(int fd, uint32_t handle, int ring) { struct drm_i915_gem_execbuffer2 execbuf; struct drm_i915_gem_exec_object2 *gem_exec; uint32_t ctx_id1, ctx_id2; int num_buffers = gem_available_aperture_size(fd) / 4096; int i; /* Make sure we only fill half of RAM with gem objects. */ igt_require(intel_get_total_ram_mb() * 1024 / 2 > num_buffers * 4); gem_exec = calloc(num_buffers + 1, sizeof(*gem_exec)); igt_assert(gem_exec); memset(gem_exec, 0, (num_buffers + 1) * sizeof(*gem_exec)); ctx_id1 = gem_context_create(fd); ctx_id2 = gem_context_create(fd); gem_exec[0].handle = handle; execbuf.buffers_ptr = (uintptr_t)gem_exec; execbuf.buffer_count = num_buffers + 1; execbuf.batch_start_offset = 0; execbuf.batch_len = 8; execbuf.cliprects_ptr = 0; execbuf.num_cliprects = 0; execbuf.DR1 = 0; execbuf.DR4 = 0; execbuf.flags = ring; execbuf.rsvd2 = 0; execbuf.buffer_count = 1; i915_execbuffer2_set_context_id(execbuf, ctx_id1); do_ioctl(fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &execbuf); for (i = 0; i < num_buffers; i++) { uint32_t tmp_handle = gem_create(fd, 4096); gem_exec[i].handle = tmp_handle; } gem_exec[i].handle = handle; execbuf.buffer_count = i + 1; /* figure out how many buffers we can exactly fit */ while (drmIoctl(fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &execbuf) != 0) { i--; gem_close(fd, gem_exec[i].handle); gem_exec[i].handle = handle; execbuf.buffer_count--; igt_info("trying buffer count %i\n", i - 1); } igt_info("reduced buffer count to %i from %i\n", i - 1, num_buffers); /* double check that it works */ do_ioctl(fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &execbuf); i915_execbuffer2_set_context_id(execbuf, ctx_id2); do_ioctl(fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &execbuf); gem_sync(fd, handle); }
static void create_cairo_surface__blit(int fd, struct igt_fb *fb) { struct fb_blit_upload *blit; cairo_format_t cairo_format; unsigned int obj_tiling = fb_mod_to_obj_tiling(fb->tiling); int bpp, ret; blit = malloc(sizeof(*blit)); igt_assert(blit); /* * We create a linear BO that we'll map for the CPU to write to (using * cairo). This linear bo will be then blitted to its final * destination, tiling it at the same time. */ bpp = igt_drm_format_to_bpp(fb->drm_format); ret = create_bo_for_fb(fd, fb->width, fb->height, bpp, LOCAL_DRM_FORMAT_MOD_NONE, 0, &blit->linear.handle, &blit->linear.size, &blit->linear.stride); igt_assert(ret == 0); blit->fd = fd; blit->fb = fb; /* Copy fb content to linear BO */ gem_set_domain(fd, blit->linear.handle, I915_GEM_DOMAIN_GTT, 0); igt_blitter_fast_copy__raw(fd, fb->gem_handle, fb->stride, obj_tiling, 0, 0, /* src_x, src_y */ fb->width, fb->height, blit->linear.handle, blit->linear.stride, I915_TILING_NONE, 0, 0 /* dst_x, dst_y */); gem_sync(fd, blit->linear.handle); gem_set_domain(fd, blit->linear.handle, I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU); /* Setup cairo context */ blit->linear.map = gem_mmap__cpu(fd, blit->linear.handle, 0, blit->linear.size, PROT_READ | PROT_WRITE); cairo_format = drm_format_to_cairo(fb->drm_format); fb->cairo_surface = cairo_image_surface_create_for_data(blit->linear.map, cairo_format, fb->width, fb->height, blit->linear.stride); cairo_surface_set_user_data(fb->cairo_surface, (cairo_user_data_key_t *)create_cairo_surface__blit, blit, destroy_cairo_surface__blit); }
static void execN(int fd, uint32_t handle, uint64_t batch_size, unsigned flags, char *ptr) { #define reloc_ofs(N, T) ((((N)+1) << 12) - 4*(1 + ((N) == ((T)-1)))) struct drm_i915_gem_execbuffer2 execbuf; struct drm_i915_gem_exec_object2 gem_exec[1]; struct drm_i915_gem_relocation_entry *gem_reloc; uint64_t n, nreloc = batch_size >> 12; gem_reloc = calloc(nreloc, sizeof(*gem_reloc)); igt_assert(gem_reloc); for (n = 0; n < nreloc; n++) { gem_reloc[n].offset = reloc_ofs(n, nreloc); gem_reloc[n].target_handle = handle; gem_reloc[n].read_domains = I915_GEM_DOMAIN_RENDER; gem_reloc[n].presumed_offset = n ^ 0xbeefdeaddeadbeef; if (ptr) { if (use_64bit_relocs) *(uint64_t *)(ptr + gem_reloc[n].offset) = gem_reloc[n].presumed_offset; else *(uint32_t *)(ptr + gem_reloc[n].offset) = gem_reloc[n].presumed_offset; } else gem_write(fd, handle, gem_reloc[n].offset, &gem_reloc[n].presumed_offset, 4*(1+use_64bit_relocs)); } memset(gem_exec, 0, sizeof(gem_exec)); gem_exec[0].handle = handle; gem_exec[0].relocation_count = nreloc; gem_exec[0].relocs_ptr = (uintptr_t)gem_reloc; memset(&execbuf, 0, sizeof(execbuf)); execbuf.buffers_ptr = (uintptr_t)gem_exec; execbuf.buffer_count = 1; execbuf.batch_start_offset = 0; execbuf.batch_len = 8; execbuf.flags = flags; /* Avoid hitting slowpaths in the reloc processing which might yield a * presumed_offset of -1. Happens when the batch is still busy from the * last round. */ gem_sync(fd, handle); gem_execbuf(fd, &execbuf); for (n = 0; n < nreloc; n++) igt_warn_on(gem_reloc[n].presumed_offset == -1); if (use_64bit_relocs) { for (n = 0; n < nreloc; n++) { uint64_t tmp; if (ptr) tmp = *(uint64_t *)(ptr+reloc_ofs(n, nreloc)); else gem_read(fd, handle, reloc_ofs(n, nreloc), &tmp, sizeof(tmp)); igt_assert_eq(tmp, gem_reloc[n].presumed_offset); } } else { for (n = 0; n < nreloc; n++) { uint32_t tmp; if (ptr) tmp = *(uint32_t *)(ptr+reloc_ofs(n, nreloc)); else gem_read(fd, handle, reloc_ofs(n, nreloc), &tmp, sizeof(tmp)); igt_assert_eq(tmp, gem_reloc[n].presumed_offset); } } free(gem_reloc); #undef reloc_ofs }
static int run(int object, int batch, int time, int reps) { struct drm_i915_gem_execbuffer2 execbuf; struct drm_i915_gem_exec_object2 exec[3]; struct drm_i915_gem_relocation_entry *reloc; uint32_t *buf, handle, src, dst; int fd, len, gen, size, nreloc; int ring, count; size = ALIGN(batch * 64, 4096); reloc = malloc(sizeof(*reloc)*size/32*2); fd = drm_open_driver(DRIVER_INTEL); handle = gem_create(fd, size); buf = gem_mmap__cpu(fd, handle, 0, size, PROT_WRITE); gen = intel_gen(intel_get_drm_devid(fd)); has_64bit_reloc = gen >= 8; src = gem_create(fd, object); dst = gem_create(fd, object); len = gem_linear_blt(fd, buf, 0, 0, 1, object, reloc); if (has_64bit_reloc) nreloc = len > 56 ? 4 : 2; else nreloc = len > 40 ? 4 : 2; memset(exec, 0, sizeof(exec)); exec[0].handle = src; exec[1].handle = dst; exec[2].handle = handle; exec[2].relocs_ptr = (uintptr_t)reloc; exec[2].relocation_count = nreloc; ring = 0; if (gen >= 6) ring = I915_EXEC_BLT; memset(&execbuf, 0, sizeof(execbuf)); execbuf.buffers_ptr = (uintptr_t)exec; execbuf.buffer_count = 3; execbuf.batch_len = len; execbuf.flags = ring; execbuf.flags |= LOCAL_I915_EXEC_HANDLE_LUT; if (__gem_execbuf(fd, &execbuf)) { gem_set_domain(fd, handle, I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU); len = gem_linear_blt(fd, buf, 0, src, dst, object, reloc); igt_assert(len == execbuf.batch_len); execbuf.flags = ring; gem_execbuf(fd, &execbuf); } gem_sync(fd, handle); if (batch > 1) { if (execbuf.flags & LOCAL_I915_EXEC_HANDLE_LUT) { src = 0; dst = 1; } gem_set_domain(fd, handle, I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU); for (int i = 1; i < batch; i++) { len = gem_linear_blt(fd, buf, len - 8, src, dst, object, reloc + nreloc * i); } exec[2].relocation_count = nreloc * batch; execbuf.batch_len = len; gem_execbuf(fd, &execbuf); gem_sync(fd, handle); } if (execbuf.flags & LOCAL_I915_EXEC_HANDLE_LUT) execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC; /* Guess how many loops we need for 0.1s */ count = baseline((uint64_t)object * batch, 100); while (reps--) { double min = HUGE_VAL; for (int s = 0; s <= time / 100; s++) { struct timespec start, end; double t; clock_gettime(CLOCK_MONOTONIC, &start); for (int loop = 0; loop < count; loop++) gem_execbuf(fd, &execbuf); gem_sync(fd, handle); clock_gettime(CLOCK_MONOTONIC, &end); t = elapsed(&start, &end); if (t < min) min = t; } printf("%7.3f\n", object/(1024*1024.)*batch*count/min); } close(fd); return 0; }
static void run(int object_size) { struct drm_i915_gem_execbuffer2 execbuf; struct drm_i915_gem_exec_object2 exec[3]; struct drm_i915_gem_relocation_entry reloc[4]; uint32_t buf[20]; uint32_t handle, src, dst; int fd, len, count; int ring; fd = drm_open_any(); handle = gem_create(fd, 4096); src = gem_create(fd, object_size); dst = gem_create(fd, object_size); len = gem_linear_blt(buf, src, dst, object_size, reloc); gem_write(fd, handle, 0, buf, len); exec[0].handle = src; exec[0].relocation_count = 0; exec[0].relocs_ptr = 0; exec[0].alignment = 0; exec[0].offset = 0; exec[0].flags = 0; exec[0].rsvd1 = 0; exec[0].rsvd2 = 0; exec[1].handle = dst; exec[1].relocation_count = 0; exec[1].relocs_ptr = 0; exec[1].alignment = 0; exec[1].offset = 0; exec[1].flags = 0; exec[1].rsvd1 = 0; exec[1].rsvd2 = 0; exec[2].handle = handle; exec[2].relocation_count = len > 40 ? 4 : 2; exec[2].relocs_ptr = (uintptr_t)reloc; exec[2].alignment = 0; exec[2].offset = 0; exec[2].flags = 0; exec[2].rsvd1 = 0; exec[2].rsvd2 = 0; ring = 0; if (HAS_BLT_RING(intel_get_drm_devid(fd))) ring = I915_EXEC_BLT; execbuf.buffers_ptr = (uintptr_t)exec; execbuf.buffer_count = 3; execbuf.batch_start_offset = 0; execbuf.batch_len = len; execbuf.cliprects_ptr = 0; execbuf.num_cliprects = 0; execbuf.DR1 = 0; execbuf.DR4 = 0; execbuf.flags = ring; i915_execbuffer2_set_context_id(execbuf, 0); execbuf.rsvd2 = 0; for (count = 1; count <= 1<<17; count <<= 1) { struct timeval start, end; gettimeofday(&start, NULL); if (gem_exec(fd, &execbuf, count)) exit(1); gem_sync(fd, handle); gettimeofday(&end, NULL); printf("Time to blt %d bytes x %6d: %7.3fµs, %s\n", object_size, count, elapsed(&start, &end, count), bytes_per_sec((char *)buf, object_size/elapsed(&start, &end, count)*1e6)); fflush(stdout); } gem_close(fd, handle); close(fd); }