static void run(data_t *data, int child) { const int size = 4096 * (256 + child * child); const int tiling = child % 2; const int write = child % 2; uint32_t handle = gem_create(data->fd, size); uint32_t *ptr; uint32_t x; igt_assert(handle); if (tiling != I915_TILING_NONE) gem_set_tiling(data->fd, handle, tiling, 4096); /* load up the unfaulted bo */ busy(data, handle, size, 100); /* Note that we ignore the API and rely on the implict * set-to-gtt-domain within the fault handler. */ if (write) { ptr = gem_mmap__gtt(data->fd, handle, size, PROT_READ | PROT_WRITE); ptr[rand() % (size / 4)] = canary; } else { ptr = gem_mmap__gtt(data->fd, handle, size, PROT_READ); } x = ptr[rand() % (size / 4)]; munmap(ptr, size); igt_assert_eq_u32(x, canary); }
static void performance(void) { int n, loop, count; int fd, num_fences; double linear[2], tiled[2]; fd = drm_open_any(); num_fences = gem_available_fences(fd); igt_require(num_fences > 0); for (count = 2; count < 4*num_fences; count *= 2) { struct timeval start, end; uint32_t handle[count]; void *ptr[count]; for (n = 0; n < count; n++) { handle[n] = gem_create(fd, OBJECT_SIZE); ptr[n] = gem_mmap(fd, handle[n], OBJECT_SIZE, PROT_READ | PROT_WRITE); igt_assert(ptr[n]); } gettimeofday(&start, NULL); for (loop = 0; loop < 1024; loop++) { for (n = 0; n < count; n++) memset(ptr[n], 0, OBJECT_SIZE); } gettimeofday(&end, NULL); linear[count != 2] = count * loop / elapsed(&start, &end); igt_info("Upload rate for %d linear surfaces: %7.3fMiB/s\n", count, linear[count != 2]); for (n = 0; n < count; n++) gem_set_tiling(fd, handle[n], I915_TILING_X, 1024); gettimeofday(&start, NULL); for (loop = 0; loop < 1024; loop++) { for (n = 0; n < count; n++) memset(ptr[n], 0, OBJECT_SIZE); } gettimeofday(&end, NULL); tiled[count != 2] = count * loop / elapsed(&start, &end); igt_info("Upload rate for %d tiled surfaces: %7.3fMiB/s\n", count, tiled[count != 2]); for (n = 0; n < count; n++) { munmap(ptr[n], OBJECT_SIZE); gem_close(fd, handle[n]); } } errno = 0; igt_assert(linear[1] > 0.75 * linear[0]); igt_assert(tiled[1] > 0.75 * tiled[0]); }
static void wc_contention(void) { const int loops = 4096; int n, count; int fd, num_fences; double linear[2], tiled[2]; fd = drm_open_any(); gem_require_mmap_wc(fd); num_fences = gem_available_fences(fd); igt_require(num_fences > 0); for (count = 1; count < 4*num_fences; count *= 2) { struct timeval start, end; struct thread_contention threads[count]; for (n = 0; n < count; n++) { threads[n].handle = gem_create(fd, OBJECT_SIZE); threads[n].loops = loops; threads[n].fd = fd; } gettimeofday(&start, NULL); for (n = 0; n < count; n++) pthread_create(&threads[n].thread, NULL, wc_mmap, &threads[n]); for (n = 0; n < count; n++) pthread_join(threads[n].thread, NULL); gettimeofday(&end, NULL); linear[count != 2] = count * loops / elapsed(&start, &end) / (OBJECT_SIZE / 4096); igt_info("Contended upload rate for %d linear threads/wc: %7.3fMiB/s\n", count, linear[count != 2]); for (n = 0; n < count; n++) gem_set_tiling(fd, threads[n].handle, I915_TILING_X, 1024); gettimeofday(&start, NULL); for (n = 0; n < count; n++) pthread_create(&threads[n].thread, NULL, wc_mmap, &threads[n]); for (n = 0; n < count; n++) pthread_join(threads[n].thread, NULL); gettimeofday(&end, NULL); tiled[count != 2] = count * loops / elapsed(&start, &end) / (OBJECT_SIZE / 4096); igt_info("Contended upload rate for %d tiled threads/wc: %7.3fMiB/s\n", count, tiled[count != 2]); for (n = 0; n < count; n++) { gem_close(fd, threads[n].handle); } } errno = 0; igt_assert(linear[1] > 0.75 * linear[0]); igt_assert(tiled[1] > 0.75 * tiled[0]); }
static uint32_t tiled_bo_create (int fd) { uint32_t handle; handle = gem_create(fd, OBJECT_SIZE); gem_set_tiling(fd, handle, I915_TILING_X, WIDTH*4); return handle; }
static int prepare_primary_surface(int fd, int prim_width, int prim_height, uint32_t *prim_handle, uint32_t *prim_stride, uint32_t *prim_size, int tiled) { uint32_t bytes_per_pixel = sizeof(uint32_t); uint32_t *prim_fb_ptr; if (bytes_per_pixel != sizeof(uint32_t)) { printf("Bad bytes_per_pixel for primary surface: %d\n", bytes_per_pixel); return -EINVAL; } if (tiled) { int v; /* Round the tiling up to the next power-of-two and the * region up to the next pot fence size so that this works * on all generations. * * This can still fail if the framebuffer is too large to * be tiled. But then that failure is expected. */ v = prim_width * bytes_per_pixel; for (*prim_stride = 512; *prim_stride < v; *prim_stride *= 2) ; v = *prim_stride * prim_height; for (*prim_size = 1024*1024; *prim_size < v; *prim_size *= 2) ; } else { /* Scan-out has a 64 byte alignment restriction */ *prim_stride = (prim_width * bytes_per_pixel + 63) & ~63; *prim_size = *prim_stride * prim_height; } *prim_handle = gem_create(fd, *prim_size); if (tiled) gem_set_tiling(fd, *prim_handle, I915_TILING_X, *prim_stride); prim_fb_ptr = gem_mmap(fd, *prim_handle, *prim_size, PROT_READ | PROT_WRITE); if (prim_fb_ptr != NULL) { // Write primary surface with gray background memset(prim_fb_ptr, 0x3f, *prim_size); munmap(prim_fb_ptr, *prim_size); } return 0; }
static uint32_t create_bo(int fd, uint32_t val, int tiling) { uint32_t handle; uint32_t *v; int i; handle = gem_create(fd, WIDTH*HEIGHT*4); gem_set_tiling(fd, handle, tiling, WIDTH*4); /* Fill the BO with dwords starting at val */ v = gem_mmap(fd, handle, WIDTH*HEIGHT*4, PROT_READ | PROT_WRITE); for (i = 0; i < WIDTH*HEIGHT; i++) v[i] = val++; munmap(v, WIDTH*HEIGHT*4); return handle; }
static uint32_t create_bo(int fd) { uint32_t handle; uint32_t *data; int i; handle = gem_create(fd, sizeof(linear)); gem_set_tiling(fd, handle, I915_TILING_X, WIDTH * sizeof(uint32_t)); /* Fill the BO with dwords starting at start_val */ data = gem_mmap(fd, handle, sizeof(linear), PROT_READ | PROT_WRITE); for (i = 0; i < WIDTH*HEIGHT; i++) data[i] = i; munmap(data, sizeof(linear)); return handle; }
static uint32_t create_bo(int fd) { uint32_t handle; uint32_t *data; int i; handle = gem_create(fd, SIZE); gem_set_tiling(fd, handle, I915_TILING_X, WIDTH * sizeof(uint32_t)); /* Write throught the fence to tiled the data. * We then manually detile on reading back through the mmap(wc). */ data = gem_mmap__gtt(fd, handle, SIZE, PROT_READ | PROT_WRITE); for (i = 0; i < WIDTH*HEIGHT; i++) data[i] = i; munmap(data, SIZE); gem_set_domain(fd, handle, I915_GEM_DOMAIN_CPU, 0); return handle; }
int main(int argc, char **argv) { int fd = drm_open_driver(DRIVER_INTEL); enum map {CPU, GTT, WC} map = CPU; enum dir {READ, WRITE, CLEAR, FAULT} dir = READ; int tiling = I915_TILING_NONE; void *buf = malloc(OBJECT_SIZE); uint32_t handle; void *ptr, *src, *dst; int reps = 13; int c, size; while ((c = getopt (argc, argv, "m:d:r:t:")) != -1) { switch (c) { case 'm': if (strcmp(optarg, "cpu") == 0) map = CPU; else if (strcmp(optarg, "gtt") == 0) map = GTT; else if (strcmp(optarg, "wc") == 0) map = WC; else abort(); break; case 'd': if (strcmp(optarg, "read") == 0) dir = READ; else if (strcmp(optarg, "write") == 0) dir = WRITE; else if (strcmp(optarg, "clear") == 0) dir = CLEAR; else if (strcmp(optarg, "fault") == 0) dir = FAULT; else abort(); break; case 't': if (strcmp(optarg, "x") == 0) tiling = I915_TILING_X; else if (strcmp(optarg, "y") == 0) tiling = I915_TILING_Y; else if (strcmp(optarg, "none") == 0) tiling = I915_TILING_NONE; else abort(); break; case 'r': reps = atoi(optarg); if (reps < 1) reps = 1; break; default: break; } } handle = gem_create(fd, OBJECT_SIZE); switch (map) { case CPU: ptr = gem_mmap__cpu(fd, handle, 0, OBJECT_SIZE, PROT_WRITE); gem_set_domain(fd, handle, I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU); break; case GTT: ptr = gem_mmap__gtt(fd, handle, OBJECT_SIZE, PROT_WRITE); gem_set_domain(fd, handle, I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT); break; case WC: ptr = gem_mmap__wc(fd, handle, 0, OBJECT_SIZE, PROT_WRITE); gem_set_domain(fd, handle, I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT); break; default: abort(); } gem_set_tiling(fd, handle, tiling, 512); if (dir == READ) { src = ptr; dst = buf; } else { src = buf; dst = ptr; } for (size = 1; size <= OBJECT_SIZE; size <<= 1) { igt_stats_t stats; int n; igt_stats_init_with_size(&stats, reps); for (n = 0; n < reps; n++) { struct timespec start, end; int page; clock_gettime(CLOCK_MONOTONIC, &start); switch (dir) { case CLEAR: memset(dst, 0, size); break; case FAULT: for (page = 0; page < OBJECT_SIZE; page += 4096) { uint32_t *x = (uint32_t *)ptr + page/4; page += *x; /* should be zero! */ } break; default: memcpy(dst, src, size); break; } clock_gettime(CLOCK_MONOTONIC, &end); igt_stats_push(&stats, elapsed(&start, &end)); } printf("%7.3f\n", igt_stats_get_trimean(&stats)/1000); igt_stats_fini(&stats); } return 0; }
int main(int argc, char **argv) { struct timeval start, end; uint8_t *buf; uint32_t handle; int size = OBJECT_SIZE; int loop, i, tiling; int fd; if (argc > 1) size = atoi(argv[1]); if (size == 0) { fprintf(stderr, "Invalid object size specified\n"); return 1; } buf = malloc(size); memset(buf, 0, size); fd = drm_open_any(); handle = gem_create(fd, size); assert(handle); for (tiling = I915_TILING_NONE; tiling <= I915_TILING_Y; tiling++) { if (tiling != I915_TILING_NONE) { printf("\nSetting tiling mode to %s\n", tiling == I915_TILING_X ? "X" : "Y"); gem_set_tiling(fd, handle, tiling, 512); } if (tiling == I915_TILING_NONE) { gem_set_domain(fd, handle, I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU); { uint32_t *base = gem_mmap__cpu(fd, handle, size, PROT_READ | PROT_WRITE); volatile uint32_t *ptr = base; int x = 0; for (i = 0; i < size/sizeof(*ptr); i++) x += ptr[i]; /* force overtly clever gcc to actually compute x */ ptr[0] = x; munmap(base, size); /* mmap read */ gettimeofday(&start, NULL); for (loop = 0; loop < 1000; loop++) { base = gem_mmap__cpu(fd, handle, size, PROT_READ | PROT_WRITE); ptr = base; x = 0; for (i = 0; i < size/sizeof(*ptr); i++) x += ptr[i]; /* force overtly clever gcc to actually compute x */ ptr[0] = x; munmap(base, size); } gettimeofday(&end, NULL); printf("Time to read %dk through a CPU map: %7.3fµs\n", size/1024, elapsed(&start, &end, loop)); /* mmap write */ gettimeofday(&start, NULL); for (loop = 0; loop < 1000; loop++) { base = gem_mmap__cpu(fd, handle, size, PROT_READ | PROT_WRITE); ptr = base; for (i = 0; i < size/sizeof(*ptr); i++) ptr[i] = i; munmap(base, size); } gettimeofday(&end, NULL); printf("Time to write %dk through a CPU map: %7.3fµs\n", size/1024, elapsed(&start, &end, loop)); gettimeofday(&start, NULL); for (loop = 0; loop < 1000; loop++) { base = gem_mmap__cpu(fd, handle, size, PROT_READ | PROT_WRITE); memset(base, 0, size); munmap(base, size); } gettimeofday(&end, NULL); printf("Time to clear %dk through a CPU map: %7.3fµs\n", size/1024, elapsed(&start, &end, loop)); gettimeofday(&start, NULL); base = gem_mmap__cpu(fd, handle, size, PROT_READ | PROT_WRITE); for (loop = 0; loop < 1000; loop++) memset(base, 0, size); munmap(base, size); gettimeofday(&end, NULL); printf("Time to clear %dk through a cached CPU map: %7.3fµs\n", size/1024, elapsed(&start, &end, loop)); } /* CPU pwrite */ gettimeofday(&start, NULL); for (loop = 0; loop < 1000; loop++) gem_write(fd, handle, 0, buf, size); gettimeofday(&end, NULL); printf("Time to pwrite %dk through the CPU: %7.3fµs\n", size/1024, elapsed(&start, &end, loop)); /* CPU pread */ gettimeofday(&start, NULL); for (loop = 0; loop < 1000; loop++) gem_read(fd, handle, 0, buf, size); gettimeofday(&end, NULL); printf("Time to pread %dk through the CPU: %7.3fµs\n", size/1024, elapsed(&start, &end, loop)); } /* prefault into gtt */ { uint32_t *base = gem_mmap(fd, handle, size, PROT_READ | PROT_WRITE); volatile uint32_t *ptr = base; int x = 0; for (i = 0; i < size/sizeof(*ptr); i++) x += ptr[i]; /* force overtly clever gcc to actually compute x */ ptr[0] = x; munmap(base, size); } /* mmap read */ gettimeofday(&start, NULL); for (loop = 0; loop < 1000; loop++) { uint32_t *base = gem_mmap(fd, handle, size, PROT_READ | PROT_WRITE); volatile uint32_t *ptr = base; int x = 0; for (i = 0; i < size/sizeof(*ptr); i++) x += ptr[i]; /* force overtly clever gcc to actually compute x */ ptr[0] = x; munmap(base, size); } gettimeofday(&end, NULL); printf("Time to read %dk through a GTT map: %7.3fµs\n", size/1024, elapsed(&start, &end, loop)); /* mmap write */ gettimeofday(&start, NULL); for (loop = 0; loop < 1000; loop++) { uint32_t *base = gem_mmap(fd, handle, size, PROT_READ | PROT_WRITE); volatile uint32_t *ptr = base; for (i = 0; i < size/sizeof(*ptr); i++) ptr[i] = i; munmap(base, size); } gettimeofday(&end, NULL); printf("Time to write %dk through a GTT map: %7.3fµs\n", size/1024, elapsed(&start, &end, loop)); /* mmap clear */ gettimeofday(&start, NULL); for (loop = 0; loop < 1000; loop++) { uint32_t *base = gem_mmap(fd, handle, size, PROT_READ | PROT_WRITE); memset(base, 0, size); munmap(base, size); } gettimeofday(&end, NULL); printf("Time to clear %dk through a GTT map: %7.3fµs\n", size/1024, elapsed(&start, &end, loop)); gettimeofday(&start, NULL);{ uint32_t *base = gem_mmap(fd, handle, size, PROT_READ | PROT_WRITE); for (loop = 0; loop < 1000; loop++) memset(base, 0, size); munmap(base, size); } gettimeofday(&end, NULL); printf("Time to clear %dk through a cached GTT map: %7.3fµs\n", size/1024, elapsed(&start, &end, loop)); /* mmap read */ gettimeofday(&start, NULL); for (loop = 0; loop < 1000; loop++) { uint32_t *base = gem_mmap(fd, handle, size, PROT_READ | PROT_WRITE); volatile uint32_t *ptr = base; int x = 0; for (i = 0; i < size/sizeof(*ptr); i++) x += ptr[i]; /* force overtly clever gcc to actually compute x */ ptr[0] = x; munmap(base, size); } gettimeofday(&end, NULL); printf("Time to read %dk (again) through a GTT map: %7.3fµs\n", size/1024, elapsed(&start, &end, loop)); if (tiling == I915_TILING_NONE) { /* GTT pwrite */ gettimeofday(&start, NULL); for (loop = 0; loop < 1000; loop++) gem_write(fd, handle, 0, buf, size); gettimeofday(&end, NULL); printf("Time to pwrite %dk through the GTT: %7.3fµs\n", size/1024, elapsed(&start, &end, loop)); /* GTT pread */ gettimeofday(&start, NULL); for (loop = 0; loop < 1000; loop++) gem_read(fd, handle, 0, buf, size); gettimeofday(&end, NULL); printf("Time to pread %dk through the GTT: %7.3fµs\n", size/1024, elapsed(&start, &end, loop)); /* GTT pwrite, including clflush */ gettimeofday(&start, NULL); for (loop = 0; loop < 1000; loop++) { gem_write(fd, handle, 0, buf, size); gem_sync(fd, handle); } gettimeofday(&end, NULL); printf("Time to pwrite %dk through the GTT (clflush): %7.3fµs\n", size/1024, elapsed(&start, &end, loop)); /* GTT pread, including clflush */ gettimeofday(&start, NULL); for (loop = 0; loop < 1000; loop++) { gem_sync(fd, handle); gem_read(fd, handle, 0, buf, size); } gettimeofday(&end, NULL); printf("Time to pread %dk through the GTT (clflush): %7.3fµs\n", size/1024, elapsed(&start, &end, loop)); /* partial writes */ printf("Now partial writes.\n"); size /= 4; /* partial GTT pwrite, including clflush */ gettimeofday(&start, NULL); for (loop = 0; loop < 1000; loop++) { gem_write(fd, handle, 0, buf, size); gem_sync(fd, handle); } gettimeofday(&end, NULL); printf("Time to pwrite %dk through the GTT (clflush): %7.3fµs\n", size/1024, elapsed(&start, &end, loop)); /* partial GTT pread, including clflush */ gettimeofday(&start, NULL); for (loop = 0; loop < 1000; loop++) { gem_sync(fd, handle); gem_read(fd, handle, 0, buf, size); } gettimeofday(&end, NULL); printf("Time to pread %dk through the GTT (clflush): %7.3fµs\n", size/1024, elapsed(&start, &end, loop)); size *= 4; } } gem_close(fd, handle); close(fd); return 0; }
static void test_huge_copy(int fd, int huge, int tiling_a, int tiling_b) { uint64_t huge_object_size, i; uint32_t bo, *pattern_a, *pattern_b; char *a, *b; switch (huge) { case -2: huge_object_size = gem_mappable_aperture_size() / 4; break; case -1: huge_object_size = gem_mappable_aperture_size() / 2; break; case 0: huge_object_size = gem_mappable_aperture_size() + PAGE_SIZE; break; default: huge_object_size = gem_aperture_size(fd) + PAGE_SIZE; break; } intel_require_memory(2, huge_object_size, CHECK_RAM); pattern_a = malloc(PAGE_SIZE); for (i = 0; i < PAGE_SIZE/4; i++) pattern_a[i] = i; pattern_b = malloc(PAGE_SIZE); for (i = 0; i < PAGE_SIZE/4; i++) pattern_b[i] = ~i; bo = gem_create(fd, huge_object_size); if (tiling_a) gem_set_tiling(fd, bo, tiling_a, tiling_a == I915_TILING_Y ? 128 : 512); a = __gem_mmap__gtt(fd, bo, huge_object_size, PROT_READ | PROT_WRITE); igt_require(a); gem_close(fd, bo); for (i = 0; i < huge_object_size / PAGE_SIZE; i++) memcpy(a + PAGE_SIZE*i, pattern_a, PAGE_SIZE); bo = gem_create(fd, huge_object_size); if (tiling_b) gem_set_tiling(fd, bo, tiling_b, tiling_b == I915_TILING_Y ? 128 : 512); b = __gem_mmap__gtt(fd, bo, huge_object_size, PROT_READ | PROT_WRITE); igt_require(b); gem_close(fd, bo); for (i = 0; i < huge_object_size / PAGE_SIZE; i++) memcpy(b + PAGE_SIZE*i, pattern_b, PAGE_SIZE); for (i = 0; i < huge_object_size / PAGE_SIZE; i++) { if (i & 1) memcpy(a + i *PAGE_SIZE, b + i*PAGE_SIZE, PAGE_SIZE); else memcpy(b + i *PAGE_SIZE, a + i*PAGE_SIZE, PAGE_SIZE); } for (i = 0; i < huge_object_size / PAGE_SIZE; i++) { if (i & 1) igt_assert(memcmp(pattern_b, a + PAGE_SIZE*i, PAGE_SIZE) == 0); else igt_assert(memcmp(pattern_a, a + PAGE_SIZE*i, PAGE_SIZE) == 0); } munmap(a, huge_object_size); for (i = 0; i < huge_object_size / PAGE_SIZE; i++) { if (i & 1) igt_assert(memcmp(pattern_b, b + PAGE_SIZE*i, PAGE_SIZE) == 0); else igt_assert(memcmp(pattern_a, b + PAGE_SIZE*i, PAGE_SIZE) == 0); } munmap(b, huge_object_size); free(pattern_a); free(pattern_b); }
static void test_huge_bo(int fd, int huge, int tiling) { uint32_t bo; char *ptr; char *tiled_pattern; char *linear_pattern; uint64_t size, last_offset; int pitch = tiling == I915_TILING_Y ? 128 : 512; int i; switch (huge) { case -1: size = gem_mappable_aperture_size() / 2; break; case 0: size = gem_mappable_aperture_size() + PAGE_SIZE; break; default: size = gem_aperture_size(fd) + PAGE_SIZE; break; } intel_require_memory(1, size, CHECK_RAM); last_offset = size - PAGE_SIZE; /* Create pattern */ bo = gem_create(fd, PAGE_SIZE); if (tiling) gem_set_tiling(fd, bo, tiling, pitch); linear_pattern = gem_mmap__gtt(fd, bo, PAGE_SIZE, PROT_READ | PROT_WRITE); for (i = 0; i < PAGE_SIZE; i++) linear_pattern[i] = i; tiled_pattern = gem_mmap__cpu(fd, bo, 0, PAGE_SIZE, PROT_READ); gem_set_domain(fd, bo, I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT, 0); gem_close(fd, bo); bo = gem_create(fd, size); if (tiling) gem_set_tiling(fd, bo, tiling, pitch); /* Initialise first/last page through CPU mmap */ ptr = gem_mmap__cpu(fd, bo, 0, size, PROT_READ | PROT_WRITE); memcpy(ptr, tiled_pattern, PAGE_SIZE); memcpy(ptr + last_offset, tiled_pattern, PAGE_SIZE); munmap(ptr, size); /* Obtain mapping for the object through GTT. */ ptr = __gem_mmap__gtt(fd, bo, size, PROT_READ | PROT_WRITE); igt_require_f(ptr, "Huge BO GTT mapping not supported.\n"); set_domain_gtt(fd, bo); /* Access through GTT should still provide the CPU written values. */ igt_assert(memcmp(ptr , linear_pattern, PAGE_SIZE) == 0); igt_assert(memcmp(ptr + last_offset, linear_pattern, PAGE_SIZE) == 0); gem_set_tiling(fd, bo, I915_TILING_NONE, 0); igt_assert(memcmp(ptr , tiled_pattern, PAGE_SIZE) == 0); igt_assert(memcmp(ptr + last_offset, tiled_pattern, PAGE_SIZE) == 0); munmap(ptr, size); gem_close(fd, bo); munmap(tiled_pattern, PAGE_SIZE); munmap(linear_pattern, PAGE_SIZE); }
static int prepare_sprite_surfaces(int fd, int sprite_width, int sprite_height, uint32_t num_surfaces, uint32_t *sprite_handles, uint32_t *sprite_stride, uint32_t *sprite_size, int tiled) { uint32_t bytes_per_pixel = sizeof(uint32_t); uint32_t *sprite_fb_ptr; int i; if (bytes_per_pixel != sizeof(uint32_t)) { printf("Bad bytes_per_pixel for sprite: %d\n", bytes_per_pixel); return -EINVAL; } if (tiled) { int v; /* Round the tiling up to the next power-of-two and the * region up to the next pot fence size so that this works * on all generations. * * This can still fail if the framebuffer is too large to * be tiled. But then that failure is expected. */ v = sprite_width * bytes_per_pixel; for (*sprite_stride = 512; *sprite_stride < v; *sprite_stride *= 2) ; v = *sprite_stride * sprite_height; for (*sprite_size = 1024*1024; *sprite_size < v; *sprite_size *= 2) ; } else { /* Scan-out has a 64 byte alignment restriction */ *sprite_stride = (sprite_width * bytes_per_pixel + 63) & ~63; *sprite_size = *sprite_stride * sprite_height; } for (i = 0; i < num_surfaces; i++) { // Create the sprite surface sprite_handles[i] = gem_create(fd, *sprite_size); if (tiled) gem_set_tiling(fd, sprite_handles[i], I915_TILING_X, *sprite_stride); // Get pointer to the surface sprite_fb_ptr = gem_mmap(fd, sprite_handles[i], *sprite_size, PROT_READ | PROT_WRITE); if (sprite_fb_ptr != NULL) { // Fill with checkerboard pattern fill_sprite(sprite_width, sprite_height, *sprite_stride, i, sprite_fb_ptr); munmap(sprite_fb_ptr, *sprite_size); } else { i--; while (i >= 0) { gem_close(fd, sprite_handles[i]); i--; } } } return 0; }
int main(int argc, char **argv) { int fd = drm_open_driver(DRIVER_INTEL); enum map {CPU, GTT, WC} map = CPU; enum dir {READ, WRITE, CLEAR, FAULT} dir = READ; int tiling = I915_TILING_NONE; struct timespec start, end; void *buf = malloc(OBJECT_SIZE); uint32_t handle; void *ptr, *src, *dst; int reps = 1; int loops; int c; while ((c = getopt (argc, argv, "m:d:r:t:")) != -1) { switch (c) { case 'm': if (strcmp(optarg, "cpu") == 0) map = CPU; else if (strcmp(optarg, "gtt") == 0) map = GTT; else if (strcmp(optarg, "wc") == 0) map = WC; else abort(); break; case 'd': if (strcmp(optarg, "read") == 0) dir = READ; else if (strcmp(optarg, "write") == 0) dir = WRITE; else if (strcmp(optarg, "clear") == 0) dir = CLEAR; else if (strcmp(optarg, "fault") == 0) dir = FAULT; else abort(); break; case 't': if (strcmp(optarg, "x") == 0) tiling = I915_TILING_X; else if (strcmp(optarg, "y") == 0) tiling = I915_TILING_Y; else if (strcmp(optarg, "none") == 0) tiling = I915_TILING_NONE; else abort(); break; case 'r': reps = atoi(optarg); if (reps < 1) reps = 1; break; default: break; } } handle = gem_create(fd, OBJECT_SIZE); switch (map) { case CPU: ptr = gem_mmap__cpu(fd, handle, 0, OBJECT_SIZE, PROT_WRITE); gem_set_domain(fd, handle, I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU); break; case GTT: ptr = gem_mmap__gtt(fd, handle, OBJECT_SIZE, PROT_WRITE); gem_set_domain(fd, handle, I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT); break; case WC: ptr = gem_mmap__wc(fd, handle, 0, OBJECT_SIZE, PROT_WRITE); gem_set_domain(fd, handle, I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT); break; default: abort(); } gem_set_tiling(fd, handle, tiling, 512); if (dir == READ) { src = ptr; dst = buf; } else { src = buf; dst = ptr; } clock_gettime(CLOCK_MONOTONIC, &start); switch (dir) { case CLEAR: case FAULT: memset(dst, 0, OBJECT_SIZE); break; default: memcpy(dst, src, OBJECT_SIZE); break; } clock_gettime(CLOCK_MONOTONIC, &end); loops = 2 / elapsed(&start, &end); while (reps--) { clock_gettime(CLOCK_MONOTONIC, &start); for (c = 0; c < loops; c++) { int page; switch (dir) { case CLEAR: memset(dst, 0, OBJECT_SIZE); break; case FAULT: munmap(ptr, OBJECT_SIZE); switch (map) { case CPU: ptr = gem_mmap__cpu(fd, handle, 0, OBJECT_SIZE, PROT_WRITE); break; case GTT: ptr = gem_mmap__gtt(fd, handle, OBJECT_SIZE, PROT_WRITE); break; case WC: ptr = gem_mmap__wc(fd, handle, 0, OBJECT_SIZE, PROT_WRITE); break; default: abort(); } for (page = 0; page < OBJECT_SIZE; page += 4096) { uint32_t *x = (uint32_t *)ptr + page/4; __asm__ __volatile__("": : :"memory"); page += *x; /* should be zero! */ } break; default: memcpy(dst, src, OBJECT_SIZE); break; } } clock_gettime(CLOCK_MONOTONIC, &end); printf("%7.3f\n", OBJECT_SIZE / elapsed(&start, &end) * loops / (1024*1024)); } return 0; }
static void thread_performance(unsigned mask) { const int loops = 4096; int n, count; int fd, num_fences; double linear[2], tiled[2]; fd = drm_open_any(); num_fences = gem_available_fences(fd); igt_require(num_fences > 0); for (count = 2; count < 4*num_fences; count *= 2) { const int nthreads = (mask & READ ? count : 0) + (mask & WRITE ? count : 0); struct timeval start, end; struct thread_performance readers[count]; struct thread_performance writers[count]; uint32_t handle[count]; void *ptr[count]; for (n = 0; n < count; n++) { handle[n] = gem_create(fd, OBJECT_SIZE); ptr[n] = gem_mmap(fd, handle[n], OBJECT_SIZE, PROT_READ | PROT_WRITE); igt_assert(ptr[n]); if (mask & READ) { readers[n].id = n; readers[n].direction = READ; readers[n].ptr = ptr; readers[n].count = count; readers[n].loops = loops; } if (mask & WRITE) { writers[n].id = count - n - 1; writers[n].direction = WRITE; writers[n].ptr = ptr; writers[n].count = count; writers[n].loops = loops; } } gettimeofday(&start, NULL); for (n = 0; n < count; n++) { if (mask & READ) pthread_create(&readers[n].thread, NULL, read_thread_performance, &readers[n]); if (mask & WRITE) pthread_create(&writers[n].thread, NULL, write_thread_performance, &writers[n]); } for (n = 0; n < count; n++) { if (mask & READ) pthread_join(readers[n].thread, NULL); if (mask & WRITE) pthread_join(writers[n].thread, NULL); } gettimeofday(&end, NULL); linear[count != 2] = nthreads * loops / elapsed(&start, &end) / (OBJECT_SIZE / 4096); igt_info("%s rate for %d linear surfaces, %d threads: %7.3fMiB/s\n", direction_string(mask), count, nthreads, linear[count != 2]); for (n = 0; n < count; n++) gem_set_tiling(fd, handle[n], I915_TILING_X, 1024); gettimeofday(&start, NULL); for (n = 0; n < count; n++) { if (mask & READ) pthread_create(&readers[n].thread, NULL, read_thread_performance, &readers[n]); if (mask & WRITE) pthread_create(&writers[n].thread, NULL, write_thread_performance, &writers[n]); } for (n = 0; n < count; n++) { if (mask & READ) pthread_join(readers[n].thread, NULL); if (mask & WRITE) pthread_join(writers[n].thread, NULL); } gettimeofday(&end, NULL); tiled[count != 2] = nthreads * loops / elapsed(&start, &end) / (OBJECT_SIZE / 4096); igt_info("%s rate for %d tiled surfaces, %d threads: %7.3fMiB/s\n", direction_string(mask), count, nthreads, tiled[count != 2]); for (n = 0; n < count; n++) { munmap(ptr[n], OBJECT_SIZE); gem_close(fd, handle[n]); } } errno = 0; igt_assert(linear[1] > 0.75 * linear[0]); igt_assert(tiled[1] > 0.75 * tiled[0]); }