static void draw_rect_mmap_wc(int fd, struct buf_data *buf, struct rect *rect, uint32_t color) { uint32_t *ptr; uint32_t tiling, swizzle; gem_set_domain(fd, buf->handle, I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT); gem_get_tiling(fd, buf->handle, &tiling, &swizzle); /* We didn't implement suport for the older tiling methods yet. */ if (tiling != I915_TILING_NONE) igt_require(intel_gen(intel_get_drm_devid(fd)) >= 5); ptr = gem_mmap__wc(fd, buf->handle, 0, buf->size, PROT_READ | PROT_WRITE); switch (tiling) { case I915_TILING_NONE: draw_rect_ptr_linear(ptr, buf->stride, rect, color, buf->bpp); break; case I915_TILING_X: draw_rect_ptr_tiled(ptr, buf->stride, swizzle, rect, color, buf->bpp); break; default: igt_assert(false); break; } igt_assert(munmap(ptr, buf->size) == 0); }
static void destroy_cairo_surface__blit(void *arg) { struct fb_blit_upload *blit = arg; struct igt_fb *fb = blit->fb; unsigned int obj_tiling = fb_mod_to_obj_tiling(fb->tiling); munmap(blit->linear.map, blit->linear.size); fb->cairo_surface = NULL; gem_set_domain(blit->fd, blit->linear.handle, I915_GEM_DOMAIN_GTT, 0); igt_blitter_fast_copy__raw(blit->fd, blit->linear.handle, blit->linear.stride, I915_TILING_NONE, 0, 0, /* src_x, src_y */ fb->width, fb->height, fb->gem_handle, fb->stride, obj_tiling, 0, 0 /* dst_x, dst_y */); gem_sync(blit->fd, blit->linear.handle); gem_close(blit->fd, blit->linear.handle); free(blit); }
static void run_test(int fd, unsigned ring, unsigned flags) { const int gen = intel_gen(intel_get_drm_devid(fd)); const uint32_t bbe = MI_BATCH_BUFFER_END; struct drm_i915_gem_exec_object2 obj[2]; struct drm_i915_gem_relocation_entry reloc[1024]; struct drm_i915_gem_execbuffer2 execbuf; struct igt_hang_ring hang; uint32_t *batch, *b; int i; gem_require_ring(fd, ring); igt_skip_on_f(gen == 6 && (ring & ~(3<<13)) == I915_EXEC_BSD, "MI_STORE_DATA broken on gen6 bsd\n"); gem_quiescent_gpu(fd); memset(&execbuf, 0, sizeof(execbuf)); execbuf.buffers_ptr = (uintptr_t)obj; execbuf.buffer_count = 2; execbuf.flags = ring | (1 << 11); if (gen < 6) execbuf.flags |= I915_EXEC_SECURE; memset(obj, 0, sizeof(obj)); obj[0].handle = gem_create(fd, 4096); obj[0].flags |= EXEC_OBJECT_WRITE; obj[1].handle = gem_create(fd, 1024*16 + 4096); gem_write(fd, obj[1].handle, 0, &bbe, sizeof(bbe)); igt_require(__gem_execbuf(fd, &execbuf) == 0); obj[1].relocs_ptr = (uintptr_t)reloc; obj[1].relocation_count = 1024; batch = gem_mmap__cpu(fd, obj[1].handle, 0, 16*1024 + 4096, PROT_WRITE | PROT_READ); gem_set_domain(fd, obj[1].handle, I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU); memset(reloc, 0, sizeof(reloc)); b = batch; for (i = 0; i < 1024; i++) { uint64_t offset; reloc[i].target_handle = obj[0].handle; reloc[i].presumed_offset = obj[0].offset; reloc[i].offset = (b - batch + 1) * sizeof(*batch); reloc[i].delta = i * sizeof(uint32_t); reloc[i].read_domains = I915_GEM_DOMAIN_INSTRUCTION; reloc[i].write_domain = I915_GEM_DOMAIN_INSTRUCTION; offset = obj[0].offset + reloc[i].delta; *b++ = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0); if (gen >= 8) { *b++ = offset; *b++ = offset >> 32; } else if (gen >= 4) {
static void test_huge_bo(int huge, int tiling) { uint64_t huge_object_size, last_offset; char *ptr_cpu; char *cpu_pattern; uint32_t bo; int i; switch (huge) { case -1: huge_object_size = gem_mappable_aperture_size() / 2; break; case 0: huge_object_size = gem_mappable_aperture_size() + PAGE_SIZE; break; default: huge_object_size = gem_aperture_size(fd) + PAGE_SIZE; break; } intel_require_memory(1, huge_object_size, CHECK_RAM); last_offset = huge_object_size - PAGE_SIZE; cpu_pattern = malloc(PAGE_SIZE); igt_assert(cpu_pattern); for (i = 0; i < PAGE_SIZE; i++) cpu_pattern[i] = i; bo = gem_create(fd, huge_object_size); /* Obtain CPU mapping for the object. */ ptr_cpu = gem_mmap__cpu(fd, bo, 0, huge_object_size, PROT_READ | PROT_WRITE); igt_assert(ptr_cpu); gem_set_domain(fd, bo, I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU); gem_close(fd, bo); /* Write first page through the mapping and assert reading it back * works. */ memcpy(ptr_cpu, cpu_pattern, PAGE_SIZE); igt_assert(memcmp(ptr_cpu, cpu_pattern, PAGE_SIZE) == 0); /* Write last page through the mapping and assert reading it back * works. */ memcpy(ptr_cpu + last_offset, cpu_pattern, PAGE_SIZE); igt_assert(memcmp(ptr_cpu + last_offset, cpu_pattern, PAGE_SIZE) == 0); /* Cross check that accessing two simultaneous pages works. */ igt_assert(memcmp(ptr_cpu, ptr_cpu + last_offset, PAGE_SIZE) == 0); munmap(ptr_cpu, huge_object_size); free(cpu_pattern); }
static void check_bo(int fd, uint32_t handle) { uint32_t *map; int i; igt_debug("Verifying result\n"); map = gem_mmap__cpu(fd, handle, 0, 4096, PROT_READ); gem_set_domain(fd, handle, I915_GEM_DOMAIN_CPU, 0); for (i = 0; i < 1024; i++) igt_assert_eq(map[i], i); munmap(map, 4096); }
static void draw_rect_mmap_gtt(int fd, struct buf_data *buf, struct rect *rect, uint32_t color) { uint32_t *ptr; gem_set_domain(fd, buf->handle, I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT); ptr = gem_mmap__gtt(fd, buf->handle, buf->size, PROT_READ | PROT_WRITE); draw_rect_ptr_linear(ptr, buf->stride, rect, color, buf->bpp); igt_assert(munmap(ptr, buf->size) == 0); }
static cairo_surface_t *get_cairo_surface(int fd, struct igt_fb *fb) { if (fb->cairo_surface == NULL) { if (fb->tiling == LOCAL_I915_FORMAT_MOD_Y_TILED || fb->tiling == LOCAL_I915_FORMAT_MOD_Yf_TILED) create_cairo_surface__blit(fd, fb); else create_cairo_surface__gtt(fd, fb); } gem_set_domain(fd, fb->gem_handle, I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU); igt_assert(cairo_surface_status(fb->cairo_surface) == CAIRO_STATUS_SUCCESS); return fb->cairo_surface; }
static void * mmap_coherent(int fd, uint32_t handle, int size) { int domain; void *ptr; if (gem_has_llc(fd) || !gem_mmap__has_wc(fd)) { domain = I915_GEM_DOMAIN_CPU; ptr = gem_mmap__cpu(fd, handle, 0, size, PROT_WRITE); } else { domain = I915_GEM_DOMAIN_GTT; ptr = gem_mmap__wc(fd, handle, 0, size, PROT_WRITE); } gem_set_domain(fd, handle, domain, domain); return ptr; }
static int loop(unsigned ring, int reps, unsigned flags) { struct drm_i915_gem_execbuffer2 execbuf; struct drm_i915_gem_exec_object2 gem_exec; int fd; fd = drm_open_driver(DRIVER_INTEL); memset(&gem_exec, 0, sizeof(gem_exec)); gem_exec.handle = batch(fd); memset(&execbuf, 0, sizeof(execbuf)); execbuf.buffers_ptr = (uintptr_t)&gem_exec; execbuf.buffer_count = 1; execbuf.flags = ring; execbuf.flags |= LOCAL_I915_EXEC_HANDLE_LUT; execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC; if (__gem_execbuf(fd, &execbuf)) { execbuf.flags = ring; if (__gem_execbuf(fd, &execbuf)) return 77; } while (reps--) { struct timespec start, end; unsigned count = 0; gem_set_domain(fd, gem_exec.handle, I915_GEM_DOMAIN_GTT, 0); sleep(1); /* wait for the hw to go back to sleep */ clock_gettime(CLOCK_MONOTONIC, &start); do { do_ioctl(fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &execbuf); count++; if (flags & SYNC) gem_sync(fd, gem_exec.handle); clock_gettime(CLOCK_MONOTONIC, &end); } while (elapsed(&start, &end) < 2.); gem_sync(fd, gem_exec.handle); clock_gettime(CLOCK_MONOTONIC, &end); printf("%7.3f\n", 1e6*elapsed(&start, &end)/count); } return 0; }
static uint32_t create_bo(int fd) { uint32_t handle; uint32_t *data; int i; handle = gem_create(fd, SIZE); gem_set_tiling(fd, handle, I915_TILING_X, WIDTH * sizeof(uint32_t)); /* Write throught the fence to tiled the data. * We then manually detile on reading back through the mmap(wc). */ data = gem_mmap__gtt(fd, handle, SIZE, PROT_READ | PROT_WRITE); for (i = 0; i < WIDTH*HEIGHT; i++) data[i] = i; munmap(data, SIZE); gem_set_domain(fd, handle, I915_GEM_DOMAIN_CPU, 0); return handle; }
static void create_cairo_surface__blit(int fd, struct igt_fb *fb) { struct fb_blit_upload *blit; cairo_format_t cairo_format; unsigned int obj_tiling = fb_mod_to_obj_tiling(fb->tiling); int bpp, ret; blit = malloc(sizeof(*blit)); igt_assert(blit); /* * We create a linear BO that we'll map for the CPU to write to (using * cairo). This linear bo will be then blitted to its final * destination, tiling it at the same time. */ bpp = igt_drm_format_to_bpp(fb->drm_format); ret = create_bo_for_fb(fd, fb->width, fb->height, bpp, LOCAL_DRM_FORMAT_MOD_NONE, 0, &blit->linear.handle, &blit->linear.size, &blit->linear.stride); igt_assert(ret == 0); blit->fd = fd; blit->fb = fb; /* Copy fb content to linear BO */ gem_set_domain(fd, blit->linear.handle, I915_GEM_DOMAIN_GTT, 0); igt_blitter_fast_copy__raw(fd, fb->gem_handle, fb->stride, obj_tiling, 0, 0, /* src_x, src_y */ fb->width, fb->height, blit->linear.handle, blit->linear.stride, I915_TILING_NONE, 0, 0 /* dst_x, dst_y */); gem_sync(fd, blit->linear.handle); gem_set_domain(fd, blit->linear.handle, I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU); /* Setup cairo context */ blit->linear.map = gem_mmap__cpu(fd, blit->linear.handle, 0, blit->linear.size, PROT_READ | PROT_WRITE); cairo_format = drm_format_to_cairo(fb->drm_format); fb->cairo_surface = cairo_image_surface_create_for_data(blit->linear.map, cairo_format, fb->width, fb->height, blit->linear.stride); cairo_surface_set_user_data(fb->cairo_surface, (cairo_user_data_key_t *)create_cairo_surface__blit, blit, destroy_cairo_surface__blit); }
int main(int argc, char **argv) { struct timeval start, end; uint8_t *buf; uint32_t handle; int size = OBJECT_SIZE; int loop, i, tiling; int fd; if (argc > 1) size = atoi(argv[1]); if (size == 0) { fprintf(stderr, "Invalid object size specified\n"); return 1; } buf = malloc(size); memset(buf, 0, size); fd = drm_open_any(); handle = gem_create(fd, size); assert(handle); for (tiling = I915_TILING_NONE; tiling <= I915_TILING_Y; tiling++) { if (tiling != I915_TILING_NONE) { printf("\nSetting tiling mode to %s\n", tiling == I915_TILING_X ? "X" : "Y"); gem_set_tiling(fd, handle, tiling, 512); } if (tiling == I915_TILING_NONE) { gem_set_domain(fd, handle, I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU); { uint32_t *base = gem_mmap__cpu(fd, handle, size, PROT_READ | PROT_WRITE); volatile uint32_t *ptr = base; int x = 0; for (i = 0; i < size/sizeof(*ptr); i++) x += ptr[i]; /* force overtly clever gcc to actually compute x */ ptr[0] = x; munmap(base, size); /* mmap read */ gettimeofday(&start, NULL); for (loop = 0; loop < 1000; loop++) { base = gem_mmap__cpu(fd, handle, size, PROT_READ | PROT_WRITE); ptr = base; x = 0; for (i = 0; i < size/sizeof(*ptr); i++) x += ptr[i]; /* force overtly clever gcc to actually compute x */ ptr[0] = x; munmap(base, size); } gettimeofday(&end, NULL); printf("Time to read %dk through a CPU map: %7.3fµs\n", size/1024, elapsed(&start, &end, loop)); /* mmap write */ gettimeofday(&start, NULL); for (loop = 0; loop < 1000; loop++) { base = gem_mmap__cpu(fd, handle, size, PROT_READ | PROT_WRITE); ptr = base; for (i = 0; i < size/sizeof(*ptr); i++) ptr[i] = i; munmap(base, size); } gettimeofday(&end, NULL); printf("Time to write %dk through a CPU map: %7.3fµs\n", size/1024, elapsed(&start, &end, loop)); gettimeofday(&start, NULL); for (loop = 0; loop < 1000; loop++) { base = gem_mmap__cpu(fd, handle, size, PROT_READ | PROT_WRITE); memset(base, 0, size); munmap(base, size); } gettimeofday(&end, NULL); printf("Time to clear %dk through a CPU map: %7.3fµs\n", size/1024, elapsed(&start, &end, loop)); gettimeofday(&start, NULL); base = gem_mmap__cpu(fd, handle, size, PROT_READ | PROT_WRITE); for (loop = 0; loop < 1000; loop++) memset(base, 0, size); munmap(base, size); gettimeofday(&end, NULL); printf("Time to clear %dk through a cached CPU map: %7.3fµs\n", size/1024, elapsed(&start, &end, loop)); } /* CPU pwrite */ gettimeofday(&start, NULL); for (loop = 0; loop < 1000; loop++) gem_write(fd, handle, 0, buf, size); gettimeofday(&end, NULL); printf("Time to pwrite %dk through the CPU: %7.3fµs\n", size/1024, elapsed(&start, &end, loop)); /* CPU pread */ gettimeofday(&start, NULL); for (loop = 0; loop < 1000; loop++) gem_read(fd, handle, 0, buf, size); gettimeofday(&end, NULL); printf("Time to pread %dk through the CPU: %7.3fµs\n", size/1024, elapsed(&start, &end, loop)); } /* prefault into gtt */ { uint32_t *base = gem_mmap(fd, handle, size, PROT_READ | PROT_WRITE); volatile uint32_t *ptr = base; int x = 0; for (i = 0; i < size/sizeof(*ptr); i++) x += ptr[i]; /* force overtly clever gcc to actually compute x */ ptr[0] = x; munmap(base, size); } /* mmap read */ gettimeofday(&start, NULL); for (loop = 0; loop < 1000; loop++) { uint32_t *base = gem_mmap(fd, handle, size, PROT_READ | PROT_WRITE); volatile uint32_t *ptr = base; int x = 0; for (i = 0; i < size/sizeof(*ptr); i++) x += ptr[i]; /* force overtly clever gcc to actually compute x */ ptr[0] = x; munmap(base, size); } gettimeofday(&end, NULL); printf("Time to read %dk through a GTT map: %7.3fµs\n", size/1024, elapsed(&start, &end, loop)); /* mmap write */ gettimeofday(&start, NULL); for (loop = 0; loop < 1000; loop++) { uint32_t *base = gem_mmap(fd, handle, size, PROT_READ | PROT_WRITE); volatile uint32_t *ptr = base; for (i = 0; i < size/sizeof(*ptr); i++) ptr[i] = i; munmap(base, size); } gettimeofday(&end, NULL); printf("Time to write %dk through a GTT map: %7.3fµs\n", size/1024, elapsed(&start, &end, loop)); /* mmap clear */ gettimeofday(&start, NULL); for (loop = 0; loop < 1000; loop++) { uint32_t *base = gem_mmap(fd, handle, size, PROT_READ | PROT_WRITE); memset(base, 0, size); munmap(base, size); } gettimeofday(&end, NULL); printf("Time to clear %dk through a GTT map: %7.3fµs\n", size/1024, elapsed(&start, &end, loop)); gettimeofday(&start, NULL);{ uint32_t *base = gem_mmap(fd, handle, size, PROT_READ | PROT_WRITE); for (loop = 0; loop < 1000; loop++) memset(base, 0, size); munmap(base, size); } gettimeofday(&end, NULL); printf("Time to clear %dk through a cached GTT map: %7.3fµs\n", size/1024, elapsed(&start, &end, loop)); /* mmap read */ gettimeofday(&start, NULL); for (loop = 0; loop < 1000; loop++) { uint32_t *base = gem_mmap(fd, handle, size, PROT_READ | PROT_WRITE); volatile uint32_t *ptr = base; int x = 0; for (i = 0; i < size/sizeof(*ptr); i++) x += ptr[i]; /* force overtly clever gcc to actually compute x */ ptr[0] = x; munmap(base, size); } gettimeofday(&end, NULL); printf("Time to read %dk (again) through a GTT map: %7.3fµs\n", size/1024, elapsed(&start, &end, loop)); if (tiling == I915_TILING_NONE) { /* GTT pwrite */ gettimeofday(&start, NULL); for (loop = 0; loop < 1000; loop++) gem_write(fd, handle, 0, buf, size); gettimeofday(&end, NULL); printf("Time to pwrite %dk through the GTT: %7.3fµs\n", size/1024, elapsed(&start, &end, loop)); /* GTT pread */ gettimeofday(&start, NULL); for (loop = 0; loop < 1000; loop++) gem_read(fd, handle, 0, buf, size); gettimeofday(&end, NULL); printf("Time to pread %dk through the GTT: %7.3fµs\n", size/1024, elapsed(&start, &end, loop)); /* GTT pwrite, including clflush */ gettimeofday(&start, NULL); for (loop = 0; loop < 1000; loop++) { gem_write(fd, handle, 0, buf, size); gem_sync(fd, handle); } gettimeofday(&end, NULL); printf("Time to pwrite %dk through the GTT (clflush): %7.3fµs\n", size/1024, elapsed(&start, &end, loop)); /* GTT pread, including clflush */ gettimeofday(&start, NULL); for (loop = 0; loop < 1000; loop++) { gem_sync(fd, handle); gem_read(fd, handle, 0, buf, size); } gettimeofday(&end, NULL); printf("Time to pread %dk through the GTT (clflush): %7.3fµs\n", size/1024, elapsed(&start, &end, loop)); /* partial writes */ printf("Now partial writes.\n"); size /= 4; /* partial GTT pwrite, including clflush */ gettimeofday(&start, NULL); for (loop = 0; loop < 1000; loop++) { gem_write(fd, handle, 0, buf, size); gem_sync(fd, handle); } gettimeofday(&end, NULL); printf("Time to pwrite %dk through the GTT (clflush): %7.3fµs\n", size/1024, elapsed(&start, &end, loop)); /* partial GTT pread, including clflush */ gettimeofday(&start, NULL); for (loop = 0; loop < 1000; loop++) { gem_sync(fd, handle); gem_read(fd, handle, 0, buf, size); } gettimeofday(&end, NULL); printf("Time to pread %dk through the GTT (clflush): %7.3fµs\n", size/1024, elapsed(&start, &end, loop)); size *= 4; } } gem_close(fd, handle); close(fd); return 0; }
static void set_domain_gtt(int fd, uint32_t handle) { gem_set_domain(fd, handle, I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT); }
static void test_huge_bo(int fd, int huge, int tiling) { uint32_t bo; char *ptr; char *tiled_pattern; char *linear_pattern; uint64_t size, last_offset; int pitch = tiling == I915_TILING_Y ? 128 : 512; int i; switch (huge) { case -1: size = gem_mappable_aperture_size() / 2; break; case 0: size = gem_mappable_aperture_size() + PAGE_SIZE; break; default: size = gem_aperture_size(fd) + PAGE_SIZE; break; } intel_require_memory(1, size, CHECK_RAM); last_offset = size - PAGE_SIZE; /* Create pattern */ bo = gem_create(fd, PAGE_SIZE); if (tiling) gem_set_tiling(fd, bo, tiling, pitch); linear_pattern = gem_mmap__gtt(fd, bo, PAGE_SIZE, PROT_READ | PROT_WRITE); for (i = 0; i < PAGE_SIZE; i++) linear_pattern[i] = i; tiled_pattern = gem_mmap__cpu(fd, bo, 0, PAGE_SIZE, PROT_READ); gem_set_domain(fd, bo, I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT, 0); gem_close(fd, bo); bo = gem_create(fd, size); if (tiling) gem_set_tiling(fd, bo, tiling, pitch); /* Initialise first/last page through CPU mmap */ ptr = gem_mmap__cpu(fd, bo, 0, size, PROT_READ | PROT_WRITE); memcpy(ptr, tiled_pattern, PAGE_SIZE); memcpy(ptr + last_offset, tiled_pattern, PAGE_SIZE); munmap(ptr, size); /* Obtain mapping for the object through GTT. */ ptr = __gem_mmap__gtt(fd, bo, size, PROT_READ | PROT_WRITE); igt_require_f(ptr, "Huge BO GTT mapping not supported.\n"); set_domain_gtt(fd, bo); /* Access through GTT should still provide the CPU written values. */ igt_assert(memcmp(ptr , linear_pattern, PAGE_SIZE) == 0); igt_assert(memcmp(ptr + last_offset, linear_pattern, PAGE_SIZE) == 0); gem_set_tiling(fd, bo, I915_TILING_NONE, 0); igt_assert(memcmp(ptr , tiled_pattern, PAGE_SIZE) == 0); igt_assert(memcmp(ptr + last_offset, tiled_pattern, PAGE_SIZE) == 0); munmap(ptr, size); gem_close(fd, bo); munmap(tiled_pattern, PAGE_SIZE); munmap(linear_pattern, PAGE_SIZE); }
static int run(int object, int batch, int time, int reps) { struct drm_i915_gem_execbuffer2 execbuf; struct drm_i915_gem_exec_object2 exec[3]; struct drm_i915_gem_relocation_entry *reloc; uint32_t *buf, handle, src, dst; int fd, len, gen, size, nreloc; int ring, count; size = ALIGN(batch * 64, 4096); reloc = malloc(sizeof(*reloc)*size/32*2); fd = drm_open_driver(DRIVER_INTEL); handle = gem_create(fd, size); buf = gem_mmap__cpu(fd, handle, 0, size, PROT_WRITE); gen = intel_gen(intel_get_drm_devid(fd)); has_64bit_reloc = gen >= 8; src = gem_create(fd, object); dst = gem_create(fd, object); len = gem_linear_blt(fd, buf, 0, 0, 1, object, reloc); if (has_64bit_reloc) nreloc = len > 56 ? 4 : 2; else nreloc = len > 40 ? 4 : 2; memset(exec, 0, sizeof(exec)); exec[0].handle = src; exec[1].handle = dst; exec[2].handle = handle; exec[2].relocs_ptr = (uintptr_t)reloc; exec[2].relocation_count = nreloc; ring = 0; if (gen >= 6) ring = I915_EXEC_BLT; memset(&execbuf, 0, sizeof(execbuf)); execbuf.buffers_ptr = (uintptr_t)exec; execbuf.buffer_count = 3; execbuf.batch_len = len; execbuf.flags = ring; execbuf.flags |= LOCAL_I915_EXEC_HANDLE_LUT; if (__gem_execbuf(fd, &execbuf)) { gem_set_domain(fd, handle, I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU); len = gem_linear_blt(fd, buf, 0, src, dst, object, reloc); igt_assert(len == execbuf.batch_len); execbuf.flags = ring; gem_execbuf(fd, &execbuf); } gem_sync(fd, handle); if (batch > 1) { if (execbuf.flags & LOCAL_I915_EXEC_HANDLE_LUT) { src = 0; dst = 1; } gem_set_domain(fd, handle, I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU); for (int i = 1; i < batch; i++) { len = gem_linear_blt(fd, buf, len - 8, src, dst, object, reloc + nreloc * i); } exec[2].relocation_count = nreloc * batch; execbuf.batch_len = len; gem_execbuf(fd, &execbuf); gem_sync(fd, handle); } if (execbuf.flags & LOCAL_I915_EXEC_HANDLE_LUT) execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC; /* Guess how many loops we need for 0.1s */ count = baseline((uint64_t)object * batch, 100); while (reps--) { double min = HUGE_VAL; for (int s = 0; s <= time / 100; s++) { struct timespec start, end; double t; clock_gettime(CLOCK_MONOTONIC, &start); for (int loop = 0; loop < count; loop++) gem_execbuf(fd, &execbuf); gem_sync(fd, handle); clock_gettime(CLOCK_MONOTONIC, &end); t = elapsed(&start, &end); if (t < min) min = t; } printf("%7.3f\n", object/(1024*1024.)*batch*count/min); } close(fd); return 0; }
static uint32_t busy_blt(int fd) { const int gen = intel_gen(intel_get_drm_devid(fd)); const int has_64bit_reloc = gen >= 8; struct drm_i915_gem_execbuffer2 execbuf; struct drm_i915_gem_exec_object2 object[2]; struct drm_i915_gem_relocation_entry reloc[200], *r; uint32_t read, write; uint32_t *map; int factor = 100; int i = 0; memset(object, 0, sizeof(object)); object[0].handle = gem_create(fd, 1024*1024); object[1].handle = gem_create(fd, 4096); r = memset(reloc, 0, sizeof(reloc)); map = gem_mmap__cpu(fd, object[1].handle, 0, 4096, PROT_WRITE); gem_set_domain(fd, object[1].handle, I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU); #define COPY_BLT_CMD (2<<29|0x53<<22|0x6) #define BLT_WRITE_ALPHA (1<<21) #define BLT_WRITE_RGB (1<<20) while (factor--) { /* XY_SRC_COPY */ map[i++] = COPY_BLT_CMD | BLT_WRITE_ALPHA | BLT_WRITE_RGB; if (has_64bit_reloc) map[i-1] += 2; map[i++] = 0xcc << 16 | 1 << 25 | 1 << 24 | (4*1024); map[i++] = 0; map[i++] = 256 << 16 | 1024; r->offset = i * sizeof(uint32_t); r->target_handle = object[0].handle; r->read_domains = I915_GEM_DOMAIN_RENDER; r->write_domain = I915_GEM_DOMAIN_RENDER; r++; map[i++] = 0; if (has_64bit_reloc) map[i++] = 0; map[i++] = 0; map[i++] = 4096; r->offset = i * sizeof(uint32_t); r->target_handle = object[0].handle; r->read_domains = I915_GEM_DOMAIN_RENDER; r->write_domain = 0; r++; map[i++] = 0; if (has_64bit_reloc) map[i++] = 0; } map[i++] = MI_BATCH_BUFFER_END; igt_assert(i <= 4096/sizeof(uint32_t)); igt_assert(r - reloc <= ARRAY_SIZE(reloc)); munmap(map, 4096); object[1].relocs_ptr = (uintptr_t)reloc; object[1].relocation_count = r - reloc; memset(&execbuf, 0, sizeof(execbuf)); execbuf.buffers_ptr = (unsigned long)object; execbuf.buffer_count = 2; if (gen >= 6) execbuf.flags = I915_EXEC_BLT; gem_execbuf(fd, &execbuf); __gem_busy(fd, object[0].handle, &read, &write); igt_assert_eq(read, 1 << write); igt_assert_eq(write, gen >= 6 ? I915_EXEC_BLT : I915_EXEC_RENDER); igt_debug("Created busy handle %d\n", object[0].handle); gem_close(fd, object[1].handle); return object[0].handle; }
int main(int argc, char **argv) { int fd = drm_open_any(); int size = 0; int busy = 0; int reps = 13; int c, n, s; while ((c = getopt (argc, argv, "bs:r:")) != -1) { switch (c) { case 's': size = atoi(optarg); break; case 'r': reps = atoi(optarg); if (reps < 1) reps = 1; break; case 'b': busy = true; break; default: break; } } if (size == 0) { for (s = 4096; s <= OBJECT_SIZE; s <<= 1) { igt_stats_t stats; igt_stats_init_with_size(&stats, reps); for (n = 0; n < reps; n++) { struct timespec start, end; uint64_t count = 0; clock_gettime(CLOCK_MONOTONIC, &start); do { for (c = 0; c < 1000; c++) { uint32_t handle; handle = gem_create(fd, s); gem_set_domain(fd, handle, I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT); if (busy) make_busy(fd, handle); gem_close(fd, handle); } count += c; clock_gettime(CLOCK_MONOTONIC, &end); } while (end.tv_sec - start.tv_sec < 2); igt_stats_push_float(&stats, count / elapsed(&start, &end)); } printf("%f\n", igt_stats_get_trimean(&stats)); igt_stats_fini(&stats); } } else { for (n = 0; n < reps; n++) { struct timespec start, end; uint64_t count = 0; clock_gettime(CLOCK_MONOTONIC, &start); do { for (c = 0; c < 1000; c++) { uint32_t handle; handle = gem_create(fd, size); gem_set_domain(fd, handle, I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT); if (busy) make_busy(fd, handle); gem_close(fd, handle); } count += c; clock_gettime(CLOCK_MONOTONIC, &end); } while (end.tv_sec - start.tv_sec < 2); printf("%f\n", count / elapsed(&start, &end)); } } return 0; }
static void store_dword_loop(int fd, int ring, int count, int divider) { int i, val = 0; struct drm_i915_gem_execbuffer2 execbuf; struct drm_i915_gem_exec_object2 obj[2]; struct drm_i915_gem_relocation_entry reloc[divider]; uint32_t handle[divider]; uint32_t *batch[divider]; uint32_t *target; int gen = intel_gen(devid); memset(obj, 0, sizeof(obj)); obj[0].handle = gem_create(fd, 4096); target = mmap_coherent(fd, obj[0].handle, 4096); memset(reloc, 0, sizeof(reloc)); for (i = 0; i < divider; i++) { uint32_t *b; handle[i] = gem_create(fd, 4096); batch[i] = mmap_coherent(fd, handle[i], 4096); gem_set_domain(fd, handle[i], coherent_domain, coherent_domain); b = batch[i]; *b++ = MI_STORE_DWORD_IMM; *b++ = 0; *b++ = 0; *b++ = 0; *b++ = MI_BATCH_BUFFER_END; reloc[i].target_handle = obj[0].handle; reloc[i].offset = 4; if (gen < 8) reloc[i].offset += 4; reloc[i].read_domains = I915_GEM_DOMAIN_INSTRUCTION; reloc[i].write_domain = I915_GEM_DOMAIN_INSTRUCTION; obj[1].relocation_count = 1; } memset(&execbuf, 0, sizeof(execbuf)); execbuf.buffers_ptr = (uintptr_t)obj; execbuf.buffer_count = 2; execbuf.flags = ring; igt_info("running storedw loop on render with stall every %i batch\n", divider); for (i = 0; i < SLOW_QUICK(0x2000, 0x10); i++) { int j = i % divider; gem_set_domain(fd, handle[j], coherent_domain, coherent_domain); batch[j][3] = val; obj[1].handle = handle[j]; obj[1].relocs_ptr = (uintptr_t)&reloc[j]; gem_execbuf(fd, &execbuf); if (j == 0) { gem_set_domain(fd, obj[0].handle, coherent_domain, 0); igt_assert_f(*target == val, "%d: value mismatch: stored 0x%08x, expected 0x%08x\n", i, *target, val); } val++; } gem_set_domain(fd, obj[0].handle, coherent_domain, 0); igt_info("completed %d writes successfully, current value: 0x%08x\n", i, target[0]); munmap(target, 4096); gem_close(fd, obj[0].handle); for (i = 0; i < divider; ++i) { munmap(batch[i], 4096); gem_close(fd, handle[i]); } }
int main(int argc, char **argv) { int fd = drm_open_driver(DRIVER_INTEL); enum map {CPU, GTT, WC} map = CPU; enum dir {READ, WRITE, CLEAR, FAULT} dir = READ; int tiling = I915_TILING_NONE; struct timespec start, end; void *buf = malloc(OBJECT_SIZE); uint32_t handle; void *ptr, *src, *dst; int reps = 1; int loops; int c; while ((c = getopt (argc, argv, "m:d:r:t:")) != -1) { switch (c) { case 'm': if (strcmp(optarg, "cpu") == 0) map = CPU; else if (strcmp(optarg, "gtt") == 0) map = GTT; else if (strcmp(optarg, "wc") == 0) map = WC; else abort(); break; case 'd': if (strcmp(optarg, "read") == 0) dir = READ; else if (strcmp(optarg, "write") == 0) dir = WRITE; else if (strcmp(optarg, "clear") == 0) dir = CLEAR; else if (strcmp(optarg, "fault") == 0) dir = FAULT; else abort(); break; case 't': if (strcmp(optarg, "x") == 0) tiling = I915_TILING_X; else if (strcmp(optarg, "y") == 0) tiling = I915_TILING_Y; else if (strcmp(optarg, "none") == 0) tiling = I915_TILING_NONE; else abort(); break; case 'r': reps = atoi(optarg); if (reps < 1) reps = 1; break; default: break; } } handle = gem_create(fd, OBJECT_SIZE); switch (map) { case CPU: ptr = gem_mmap__cpu(fd, handle, 0, OBJECT_SIZE, PROT_WRITE); gem_set_domain(fd, handle, I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU); break; case GTT: ptr = gem_mmap__gtt(fd, handle, OBJECT_SIZE, PROT_WRITE); gem_set_domain(fd, handle, I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT); break; case WC: ptr = gem_mmap__wc(fd, handle, 0, OBJECT_SIZE, PROT_WRITE); gem_set_domain(fd, handle, I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT); break; default: abort(); } gem_set_tiling(fd, handle, tiling, 512); if (dir == READ) { src = ptr; dst = buf; } else { src = buf; dst = ptr; } clock_gettime(CLOCK_MONOTONIC, &start); switch (dir) { case CLEAR: case FAULT: memset(dst, 0, OBJECT_SIZE); break; default: memcpy(dst, src, OBJECT_SIZE); break; } clock_gettime(CLOCK_MONOTONIC, &end); loops = 2 / elapsed(&start, &end); while (reps--) { clock_gettime(CLOCK_MONOTONIC, &start); for (c = 0; c < loops; c++) { int page; switch (dir) { case CLEAR: memset(dst, 0, OBJECT_SIZE); break; case FAULT: munmap(ptr, OBJECT_SIZE); switch (map) { case CPU: ptr = gem_mmap__cpu(fd, handle, 0, OBJECT_SIZE, PROT_WRITE); break; case GTT: ptr = gem_mmap__gtt(fd, handle, OBJECT_SIZE, PROT_WRITE); break; case WC: ptr = gem_mmap__wc(fd, handle, 0, OBJECT_SIZE, PROT_WRITE); break; default: abort(); } for (page = 0; page < OBJECT_SIZE; page += 4096) { uint32_t *x = (uint32_t *)ptr + page/4; __asm__ __volatile__("": : :"memory"); page += *x; /* should be zero! */ } break; default: memcpy(dst, src, OBJECT_SIZE); break; } } clock_gettime(CLOCK_MONOTONIC, &end); printf("%7.3f\n", OBJECT_SIZE / elapsed(&start, &end) * loops / (1024*1024)); } return 0; }
static void gem_userptr_sync(int fd, uint32_t handle) { gem_set_domain(fd, handle, I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU); }
int main(int argc, char **argv) { int fd = drm_open_driver(DRIVER_INTEL); enum map {CPU, GTT, WC} map = CPU; enum dir {READ, WRITE, CLEAR, FAULT} dir = READ; int tiling = I915_TILING_NONE; void *buf = malloc(OBJECT_SIZE); uint32_t handle; void *ptr, *src, *dst; int reps = 13; int c, size; while ((c = getopt (argc, argv, "m:d:r:t:")) != -1) { switch (c) { case 'm': if (strcmp(optarg, "cpu") == 0) map = CPU; else if (strcmp(optarg, "gtt") == 0) map = GTT; else if (strcmp(optarg, "wc") == 0) map = WC; else abort(); break; case 'd': if (strcmp(optarg, "read") == 0) dir = READ; else if (strcmp(optarg, "write") == 0) dir = WRITE; else if (strcmp(optarg, "clear") == 0) dir = CLEAR; else if (strcmp(optarg, "fault") == 0) dir = FAULT; else abort(); break; case 't': if (strcmp(optarg, "x") == 0) tiling = I915_TILING_X; else if (strcmp(optarg, "y") == 0) tiling = I915_TILING_Y; else if (strcmp(optarg, "none") == 0) tiling = I915_TILING_NONE; else abort(); break; case 'r': reps = atoi(optarg); if (reps < 1) reps = 1; break; default: break; } } handle = gem_create(fd, OBJECT_SIZE); switch (map) { case CPU: ptr = gem_mmap__cpu(fd, handle, 0, OBJECT_SIZE, PROT_WRITE); gem_set_domain(fd, handle, I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU); break; case GTT: ptr = gem_mmap__gtt(fd, handle, OBJECT_SIZE, PROT_WRITE); gem_set_domain(fd, handle, I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT); break; case WC: ptr = gem_mmap__wc(fd, handle, 0, OBJECT_SIZE, PROT_WRITE); gem_set_domain(fd, handle, I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT); break; default: abort(); } gem_set_tiling(fd, handle, tiling, 512); if (dir == READ) { src = ptr; dst = buf; } else { src = buf; dst = ptr; } for (size = 1; size <= OBJECT_SIZE; size <<= 1) { igt_stats_t stats; int n; igt_stats_init_with_size(&stats, reps); for (n = 0; n < reps; n++) { struct timespec start, end; int page; clock_gettime(CLOCK_MONOTONIC, &start); switch (dir) { case CLEAR: memset(dst, 0, size); break; case FAULT: for (page = 0; page < OBJECT_SIZE; page += 4096) { uint32_t *x = (uint32_t *)ptr + page/4; page += *x; /* should be zero! */ } break; default: memcpy(dst, src, size); break; } clock_gettime(CLOCK_MONOTONIC, &end); igt_stats_push(&stats, elapsed(&start, &end)); } printf("%7.3f\n", igt_stats_get_trimean(&stats)/1000); igt_stats_fini(&stats); } return 0; }
int main(int argc, char **argv) { int fd = drm_open_driver(DRIVER_INTEL); int domain = I915_GEM_DOMAIN_GTT; enum dir { READ, WRITE } dir = READ; void *buf = malloc(OBJECT_SIZE); uint32_t handle; int reps = 13; int c, size; while ((c = getopt (argc, argv, "D:d:r:")) != -1) { switch (c) { case 'd': if (strcmp(optarg, "cpu") == 0) domain = I915_GEM_DOMAIN_CPU; else if (strcmp(optarg, "gtt") == 0) domain = I915_GEM_DOMAIN_GTT; break; case 'D': if (strcmp(optarg, "read") == 0) dir = READ; else if (strcmp(optarg, "write") == 0) dir = WRITE; else abort(); break; case 'r': reps = atoi(optarg); if (reps < 1) reps = 1; break; default: break; } } handle = gem_create(fd, OBJECT_SIZE); for (size = 1; size <= OBJECT_SIZE; size <<= 1) { igt_stats_t stats; int n; igt_stats_init_with_size(&stats, reps); for (n = 0; n < reps; n++) { struct timespec start, end; gem_set_domain(fd, handle, domain, domain); clock_gettime(CLOCK_MONOTONIC, &start); if (dir == READ) gem_read(fd, handle, 0, buf, size); else gem_write(fd, handle, 0, buf, size); clock_gettime(CLOCK_MONOTONIC, &end); igt_stats_push(&stats, elapsed(&start, &end)); } printf("%7.3f\n", igt_stats_get_trimean(&stats)/1000); igt_stats_fini(&stats); } return 0; }