/** * radeon_vce_ib_test - test if VCE IBs are working * * @rdev: radeon_device pointer * @ring: the engine to test on * */ int radeon_vce_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) { struct radeon_fence *fence = NULL; int r; r = radeon_vce_get_create_msg(rdev, ring->idx, 1, NULL); if (r) { DRM_ERROR("radeon: failed to get create msg (%d).\n", r); goto error; } r = radeon_vce_get_destroy_msg(rdev, ring->idx, 1, &fence); if (r) { DRM_ERROR("radeon: failed to get destroy ib (%d).\n", r); goto error; } r = radeon_fence_wait(fence, false); if (r) { DRM_ERROR("radeon: fence wait failed (%d).\n", r); } else { DRM_INFO("ib test on ring %d succeeded\n", ring->idx); } error: radeon_fence_unref(&fence); return r; }
/** * radeon_vce_ib_test - test if VCE IBs are working * * @rdev: radeon_device pointer * @ring: the engine to test on * */ int radeon_vce_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) { struct radeon_fence *fence = NULL; int r; r = radeon_vce_get_create_msg(rdev, ring->idx, 1, NULL); if (r) { DRM_ERROR("radeon: failed to get create msg (%d).\n", r); goto error; } r = radeon_vce_get_destroy_msg(rdev, ring->idx, 1, &fence); if (r) { DRM_ERROR("radeon: failed to get destroy ib (%d).\n", r); goto error; } r = radeon_fence_wait_timeout(fence, false, usecs_to_jiffies( RADEON_USEC_IB_TEST_TIMEOUT)); if (r < 0) { DRM_ERROR("radeon: fence wait failed (%d).\n", r); } else if (r == 0) { DRM_ERROR("radeon: fence wait timed out.\n"); r = -ETIMEDOUT; } else { DRM_INFO("ib test on ring %d succeeded\n", ring->idx); r = 0; } error: radeon_fence_unref(&fence); return r; }
/** * uvd_v1_0_ib_test - test ib execution * * @rdev: radeon_device pointer * @ring: radeon_ring pointer * * Test if we can successfully execute an IB */ int uvd_v1_0_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) { struct radeon_fence *fence = NULL; int r; r = radeon_set_uvd_clocks(rdev, 53300, 40000); if (r) { DRM_ERROR("radeon: failed to raise UVD clocks (%d).\n", r); return r; } r = radeon_uvd_get_create_msg(rdev, ring->idx, 1, NULL); if (r) { DRM_ERROR("radeon: failed to get create msg (%d).\n", r); goto error; } r = radeon_uvd_get_destroy_msg(rdev, ring->idx, 1, &fence); if (r) { DRM_ERROR("radeon: failed to get destroy ib (%d).\n", r); goto error; } r = radeon_fence_wait(fence, false); if (r) { DRM_ERROR("radeon: fence wait failed (%d).\n", r); goto error; } DRM_INFO("ib test on ring %d succeeded\n", ring->idx); error: radeon_fence_unref(&fence); radeon_set_uvd_clocks(rdev, 0, 0); return r; }
static int radeon_benchmark_do_move(struct radeon_device *rdev, unsigned size, uint64_t saddr, uint64_t daddr, int flag, int n) { unsigned long start_jiffies; unsigned long end_jiffies; struct radeon_fence *fence = NULL; int i, r; start_jiffies = jiffies; for (i = 0; i < n; i++) { switch (flag) { case RADEON_BENCHMARK_COPY_DMA: r = radeon_fence_create(rdev, &fence, radeon_copy_dma_ring_index(rdev)); if (r) return r; r = radeon_copy_dma(rdev, saddr, daddr, size / RADEON_GPU_PAGE_SIZE, fence); break; case RADEON_BENCHMARK_COPY_BLIT: r = radeon_fence_create(rdev, &fence, radeon_copy_blit_ring_index(rdev)); if (r) return r; r = radeon_copy_blit(rdev, saddr, daddr, size / RADEON_GPU_PAGE_SIZE, fence); break; default: DRM_ERROR("Unknown copy method\n"); r = -EINVAL; } if (r) goto exit_do_move; r = radeon_fence_wait(fence, false); if (r) goto exit_do_move; radeon_fence_unref(&fence); } end_jiffies = jiffies; r = jiffies_to_msecs(end_jiffies - start_jiffies); exit_do_move: if (fence) radeon_fence_unref(&fence); return r; }
static int radeon_move_blit(struct ttm_buffer_object *bo, bool evict, int no_wait_reserve, bool no_wait_gpu, struct ttm_mem_reg *new_mem, struct ttm_mem_reg *old_mem) { struct radeon_device *rdev; uint64_t old_start, new_start; struct radeon_fence *fence; int r; rdev = radeon_get_rdev(bo->bdev); r = radeon_fence_create(rdev, &fence); if (unlikely(r)) { return r; } old_start = old_mem->start << PAGE_SHIFT; new_start = new_mem->start << PAGE_SHIFT; switch (old_mem->mem_type) { case TTM_PL_VRAM: old_start += rdev->mc.vram_start; break; case TTM_PL_TT: old_start += rdev->mc.gtt_start; break; default: DRM_ERROR("Unknown placement %d\n", old_mem->mem_type); return -EINVAL; } switch (new_mem->mem_type) { case TTM_PL_VRAM: new_start += rdev->mc.vram_start; break; case TTM_PL_TT: new_start += rdev->mc.gtt_start; break; default: DRM_ERROR("Unknown placement %d\n", old_mem->mem_type); return -EINVAL; } if (!rdev->cp.ready) { DRM_ERROR("Trying to move memory with CP turned off.\n"); return -EINVAL; } BUILD_BUG_ON((PAGE_SIZE % RADEON_GPU_PAGE_SIZE) != 0); r = radeon_copy(rdev, old_start, new_start, new_mem->num_pages * (PAGE_SIZE / RADEON_GPU_PAGE_SIZE), /* GPU pages */ fence); /* FIXME: handle copy error */ r = ttm_bo_move_accel_cleanup(bo, (void *)fence, NULL, evict, no_wait_reserve, no_wait_gpu, new_mem); radeon_fence_unref(&fence); return r; }
static int radeon_move_blit(struct ttm_buffer_object *bo, bool evict, bool no_wait_gpu, struct ttm_mem_reg *new_mem, struct ttm_mem_reg *old_mem) { struct radeon_device *rdev; uint64_t old_start, new_start; struct radeon_fence *fence; int r, ridx; rdev = radeon_get_rdev(bo->bdev); ridx = radeon_copy_ring_index(rdev); old_start = old_mem->start << PAGE_SHIFT; new_start = new_mem->start << PAGE_SHIFT; switch (old_mem->mem_type) { case TTM_PL_VRAM: old_start += rdev->mc.vram_start; break; case TTM_PL_TT: old_start += rdev->mc.gtt_start; break; default: DRM_ERROR("Unknown placement %d\n", old_mem->mem_type); return -EINVAL; } switch (new_mem->mem_type) { case TTM_PL_VRAM: new_start += rdev->mc.vram_start; break; case TTM_PL_TT: new_start += rdev->mc.gtt_start; break; default: DRM_ERROR("Unknown placement %d\n", old_mem->mem_type); return -EINVAL; } if (!rdev->ring[ridx].ready) { DRM_ERROR("Trying to move memory with ring turned off.\n"); return -EINVAL; } CTASSERT((PAGE_SIZE % RADEON_GPU_PAGE_SIZE) == 0); /* sync other rings */ fence = bo->sync_obj; r = radeon_copy(rdev, old_start, new_start, new_mem->num_pages * (PAGE_SIZE / RADEON_GPU_PAGE_SIZE), /* GPU pages */ &fence); /* FIXME: handle copy error */ r = ttm_bo_move_accel_cleanup(bo, (void *)fence, evict, no_wait_gpu, new_mem); radeon_fence_unref(&fence); return r; }
/** * radeon_vm_manager_fini - tear down the vm manager * * @rdev: radeon_device pointer * * Tear down the VM manager (cayman+). */ void radeon_vm_manager_fini(struct radeon_device *rdev) { int i; if (!rdev->vm_manager.enabled) return; for (i = 0; i < RADEON_NUM_VM; ++i) radeon_fence_unref(&rdev->vm_manager.active[i]); radeon_asic_vm_fini(rdev); rdev->vm_manager.enabled = false; }
int radeon_ib_get(struct radeon_device *rdev, int ring, struct radeon_ib **ib, unsigned size) { struct radeon_fence *fence; unsigned cretry = 0; int r = 0, i, idx; *ib = NULL; /* align size on 256 bytes */ size = ALIGN(size, 256); r = radeon_fence_create(rdev, &fence, ring); if (r) { dev_err(rdev->dev, "failed to create fence for new IB\n"); return r; } radeon_mutex_lock(&rdev->ib_pool.mutex); idx = rdev->ib_pool.head_id; retry: if (cretry > 5) { dev_err(rdev->dev, "failed to get an ib after 5 retry\n"); radeon_mutex_unlock(&rdev->ib_pool.mutex); radeon_fence_unref(&fence); return -ENOMEM; } cretry++; for (i = 0; i < RADEON_IB_POOL_SIZE; i++) { radeon_ib_try_free(rdev, &rdev->ib_pool.ibs[idx]); if (rdev->ib_pool.ibs[idx].fence == NULL) { r = radeon_sa_bo_new(rdev, &rdev->ib_pool.sa_manager, &rdev->ib_pool.ibs[idx].sa_bo, size, 256); if (!r) { *ib = &rdev->ib_pool.ibs[idx]; (*ib)->ptr = rdev->ib_pool.sa_manager.cpu_ptr; (*ib)->ptr += ((*ib)->sa_bo.offset >> 2); (*ib)->gpu_addr = rdev->ib_pool.sa_manager.gpu_addr; (*ib)->gpu_addr += (*ib)->sa_bo.offset; (*ib)->fence = fence; (*ib)->vm_id = 0; (*ib)->is_const_ib = false; /* ib are most likely to be allocated in a ring fashion * thus rdev->ib_pool.head_id should be the id of the * oldest ib */ rdev->ib_pool.head_id = (1 + idx); rdev->ib_pool.head_id &= (RADEON_IB_POOL_SIZE - 1); radeon_mutex_unlock(&rdev->ib_pool.mutex); return 0; } }
/** * radeon_vm_flush - hardware flush the vm * * @rdev: radeon_device pointer * @vm: vm we want to flush * @ring: ring to use for flush * @updates: last vm update that is waited for * * Flush the vm (cayman+). * * Global and local mutex must be locked! */ void radeon_vm_flush(struct radeon_device *rdev, struct radeon_vm *vm, int ring, struct radeon_fence *updates) { uint64_t pd_addr = radeon_bo_gpu_offset(vm->page_directory); struct radeon_vm_id *vm_id = &vm->ids[ring]; if (pd_addr != vm_id->pd_gpu_addr || !vm_id->flushed_updates || radeon_fence_is_earlier(vm_id->flushed_updates, updates)) { trace_radeon_vm_flush(pd_addr, ring, vm->ids[ring].id); radeon_fence_unref(&vm_id->flushed_updates); vm_id->flushed_updates = radeon_fence_ref(
static int radeon_move_blit(struct ttm_buffer_object *bo, bool evict, int no_wait, struct ttm_mem_reg *new_mem, struct ttm_mem_reg *old_mem) { struct radeon_device *rdev; uint64_t old_start, new_start; struct radeon_fence *fence; int r; rdev = radeon_get_rdev(bo->bdev); r = radeon_fence_create(rdev, &fence); if (unlikely(r)) { return r; } old_start = old_mem->mm_node->start << PAGE_SHIFT; new_start = new_mem->mm_node->start << PAGE_SHIFT; switch (old_mem->mem_type) { case TTM_PL_VRAM: old_start += rdev->mc.vram_location; break; case TTM_PL_TT: old_start += rdev->mc.gtt_location; break; default: DRM_ERROR("Unknown placement %d\n", old_mem->mem_type); return -EINVAL; } switch (new_mem->mem_type) { case TTM_PL_VRAM: new_start += rdev->mc.vram_location; break; case TTM_PL_TT: new_start += rdev->mc.gtt_location; break; default: DRM_ERROR("Unknown placement %d\n", old_mem->mem_type); return -EINVAL; } if (!rdev->cp.ready) { DRM_ERROR("Trying to move memory with CP turned off.\n"); return -EINVAL; } r = radeon_copy(rdev, old_start, new_start, new_mem->num_pages, fence); r = ttm_bo_move_accel_cleanup(bo, (void *)fence, NULL, evict, no_wait, new_mem); radeon_fence_unref(&fence); return r; }
/* * IB. */ bool radeon_ib_try_free(struct radeon_device *rdev, struct radeon_ib *ib) { bool done = false; /* only free ib which have been emited */ if (ib->fence && ib->fence->emitted) { if (radeon_fence_signaled(ib->fence)) { radeon_fence_unref(&ib->fence); radeon_sa_bo_free(rdev, &ib->sa_bo); done = true; } } return done; }
static void radeon_test_ring_sync2(struct radeon_device *rdev, struct radeon_ring *ringA, struct radeon_ring *ringB, struct radeon_ring *ringC) { struct radeon_fence *fenceA = NULL, *fenceB = NULL; struct radeon_semaphore *semaphore = NULL; bool sigA, sigB; int i, r; r = radeon_semaphore_create(rdev, &semaphore); if (r) { DRM_ERROR("Failed to create semaphore\n"); goto out_cleanup; } r = radeon_ring_lock(rdev, ringA, 64); if (r) { DRM_ERROR("Failed to lock ring A %d\n", ringA->idx); goto out_cleanup; } radeon_semaphore_emit_wait(rdev, ringA->idx, semaphore); radeon_ring_unlock_commit(rdev, ringA, false); r = radeon_test_create_and_emit_fence(rdev, ringA, &fenceA); if (r) goto out_cleanup; r = radeon_ring_lock(rdev, ringB, 64); if (r) { DRM_ERROR("Failed to lock ring B %d\n", ringB->idx); goto out_cleanup; } radeon_semaphore_emit_wait(rdev, ringB->idx, semaphore); radeon_ring_unlock_commit(rdev, ringB, false); r = radeon_test_create_and_emit_fence(rdev, ringB, &fenceB); if (r) goto out_cleanup; msleep(1000); if (radeon_fence_signaled(fenceA)) { DRM_ERROR("Fence A signaled without waiting for semaphore.\n"); goto out_cleanup; } if (radeon_fence_signaled(fenceB)) { DRM_ERROR("Fence B signaled without waiting for semaphore.\n"); goto out_cleanup; } r = radeon_ring_lock(rdev, ringC, 64); if (r) { DRM_ERROR("Failed to lock ring B %p\n", ringC); goto out_cleanup; } radeon_semaphore_emit_signal(rdev, ringC->idx, semaphore); radeon_ring_unlock_commit(rdev, ringC, false); for (i = 0; i < 30; ++i) { msleep(100); sigA = radeon_fence_signaled(fenceA); sigB = radeon_fence_signaled(fenceB); if (sigA || sigB) break; } if (!sigA && !sigB) { DRM_ERROR("Neither fence A nor B has been signaled\n"); goto out_cleanup; } else if (sigA && sigB) { DRM_ERROR("Both fence A and B has been signaled\n"); goto out_cleanup; } DRM_INFO("Fence %c was first signaled\n", sigA ? 'A' : 'B'); r = radeon_ring_lock(rdev, ringC, 64); if (r) { DRM_ERROR("Failed to lock ring B %p\n", ringC); goto out_cleanup; } radeon_semaphore_emit_signal(rdev, ringC->idx, semaphore); radeon_ring_unlock_commit(rdev, ringC, false); msleep(1000); r = radeon_fence_wait(fenceA, false); if (r) { DRM_ERROR("Failed to wait for sync fence A\n"); goto out_cleanup; } r = radeon_fence_wait(fenceB, false); if (r) { DRM_ERROR("Failed to wait for sync fence B\n"); goto out_cleanup; } out_cleanup: radeon_semaphore_free(rdev, &semaphore, NULL); if (fenceA) radeon_fence_unref(&fenceA); if (fenceB) radeon_fence_unref(&fenceB); if (r) pr_warn("Error while testing ring sync (%d)\n", r); }
static void radeon_sync_obj_unref(void **sync_obj) { radeon_fence_unref((struct radeon_fence **)sync_obj); }
static int radeon_move_blit(struct ttm_buffer_object *bo, bool evict, int no_wait_reserve, bool no_wait_gpu, struct ttm_mem_reg *new_mem, struct ttm_mem_reg *old_mem) { struct radeon_device *rdev; uint64_t old_start, new_start; struct radeon_fence *fence; int r, i; rdev = radeon_get_rdev(bo->bdev); r = radeon_fence_create(rdev, &fence, radeon_copy_ring_index(rdev)); if (unlikely(r)) { return r; } old_start = old_mem->start << PAGE_SHIFT; new_start = new_mem->start << PAGE_SHIFT; switch (old_mem->mem_type) { case TTM_PL_VRAM: old_start += rdev->mc.vram_start; break; case TTM_PL_TT: old_start += rdev->mc.gtt_start; break; default: DRM_ERROR("Unknown placement %d\n", old_mem->mem_type); return -EINVAL; } switch (new_mem->mem_type) { case TTM_PL_VRAM: new_start += rdev->mc.vram_start; break; case TTM_PL_TT: new_start += rdev->mc.gtt_start; break; default: DRM_ERROR("Unknown placement %d\n", old_mem->mem_type); return -EINVAL; } if (!rdev->ring[radeon_copy_ring_index(rdev)].ready) { DRM_ERROR("Trying to move memory with ring turned off.\n"); return -EINVAL; } BUILD_BUG_ON((PAGE_SIZE % RADEON_GPU_PAGE_SIZE) != 0); /* sync other rings */ if (rdev->family >= CHIP_R600) { for (i = 0; i < RADEON_NUM_RINGS; ++i) { /* no need to sync to our own or unused rings */ if (i == radeon_copy_ring_index(rdev) || !rdev->ring[i].ready) continue; if (!fence->semaphore) { r = radeon_semaphore_create(rdev, &fence->semaphore); /* FIXME: handle semaphore error */ if (r) continue; } r = radeon_ring_lock(rdev, &rdev->ring[i], 3); /* FIXME: handle ring lock error */ if (r) continue; radeon_semaphore_emit_signal(rdev, i, fence->semaphore); radeon_ring_unlock_commit(rdev, &rdev->ring[i]); r = radeon_ring_lock(rdev, &rdev->ring[radeon_copy_ring_index(rdev)], 3); /* FIXME: handle ring lock error */ if (r) continue; radeon_semaphore_emit_wait(rdev, radeon_copy_ring_index(rdev), fence->semaphore); radeon_ring_unlock_commit(rdev, &rdev->ring[radeon_copy_ring_index(rdev)]); } } r = radeon_copy(rdev, old_start, new_start, new_mem->num_pages * (PAGE_SIZE / RADEON_GPU_PAGE_SIZE), /* GPU pages */ fence); /* FIXME: handle copy error */ r = ttm_bo_move_accel_cleanup(bo, (void *)fence, NULL, evict, no_wait_reserve, no_wait_gpu, new_mem); radeon_fence_unref(&fence); return r; }
void radeon_test_ring_sync2(struct radeon_device *rdev, struct radeon_ring *ringA, struct radeon_ring *ringB, struct radeon_ring *ringC) { struct radeon_fence *fenceA = NULL, *fenceB = NULL; struct radeon_semaphore *semaphore = NULL; int ridxA = radeon_ring_index(rdev, ringA); int ridxB = radeon_ring_index(rdev, ringB); int ridxC = radeon_ring_index(rdev, ringC); bool sigA, sigB; int i, r; r = radeon_fence_create(rdev, &fenceA, ridxA); if (r) { DRM_ERROR("Failed to create sync fence 1\n"); goto out_cleanup; } r = radeon_fence_create(rdev, &fenceB, ridxB); if (r) { DRM_ERROR("Failed to create sync fence 2\n"); goto out_cleanup; } r = radeon_semaphore_create(rdev, &semaphore); if (r) { DRM_ERROR("Failed to create semaphore\n"); goto out_cleanup; } r = radeon_ring_lock(rdev, ringA, 64); if (r) { DRM_ERROR("Failed to lock ring A %d\n", ridxA); goto out_cleanup; } radeon_semaphore_emit_wait(rdev, ridxA, semaphore); radeon_fence_emit(rdev, fenceA); radeon_ring_unlock_commit(rdev, ringA); r = radeon_ring_lock(rdev, ringB, 64); if (r) { DRM_ERROR("Failed to lock ring B %d\n", ridxB); goto out_cleanup; } radeon_semaphore_emit_wait(rdev, ridxB, semaphore); radeon_fence_emit(rdev, fenceB); radeon_ring_unlock_commit(rdev, ringB); mdelay(1000); if (radeon_fence_signaled(fenceA)) { DRM_ERROR("Fence A signaled without waiting for semaphore.\n"); goto out_cleanup; } if (radeon_fence_signaled(fenceB)) { DRM_ERROR("Fence A signaled without waiting for semaphore.\n"); goto out_cleanup; } r = radeon_ring_lock(rdev, ringC, 64); if (r) { DRM_ERROR("Failed to lock ring B %p\n", ringC); goto out_cleanup; } radeon_semaphore_emit_signal(rdev, ridxC, semaphore); radeon_ring_unlock_commit(rdev, ringC); for (i = 0; i < 30; ++i) { mdelay(100); sigA = radeon_fence_signaled(fenceA); sigB = radeon_fence_signaled(fenceB); if (sigA || sigB) break; } if (!sigA && !sigB) { DRM_ERROR("Neither fence A nor B has been signaled\n"); goto out_cleanup; } else if (sigA && sigB) { DRM_ERROR("Both fence A and B has been signaled\n"); goto out_cleanup; } DRM_INFO("Fence %c was first signaled\n", sigA ? 'A' : 'B'); r = radeon_ring_lock(rdev, ringC, 64); if (r) { DRM_ERROR("Failed to lock ring B %p\n", ringC); goto out_cleanup; } radeon_semaphore_emit_signal(rdev, ridxC, semaphore); radeon_ring_unlock_commit(rdev, ringC); mdelay(1000); r = radeon_fence_wait(fenceA, false); if (r) { DRM_ERROR("Failed to wait for sync fence A\n"); goto out_cleanup; } r = radeon_fence_wait(fenceB, false); if (r) { DRM_ERROR("Failed to wait for sync fence B\n"); goto out_cleanup; } out_cleanup: if (semaphore) radeon_semaphore_free(rdev, semaphore); if (fenceA) radeon_fence_unref(&fenceA); if (fenceB) radeon_fence_unref(&fenceB); if (r) printk(KERN_WARNING "Error while testing ring sync (%d).\n", r); }
/* Test BO GTT->VRAM and VRAM->GTT GPU copies across the whole GTT aperture */ void radeon_test_moves(struct radeon_device *rdev) { struct radeon_bo *vram_obj = NULL; struct radeon_bo **gtt_obj = NULL; struct radeon_fence *fence = NULL; uint64_t gtt_addr, vram_addr; unsigned i, n, size; int r; size = 1024 * 1024; /* Number of tests = * (Total GTT - IB pool - writeback page - ring buffers) / test size */ n = rdev->mc.gtt_size - RADEON_IB_POOL_SIZE*64*1024; for (i = 0; i < RADEON_NUM_RINGS; ++i) n -= rdev->ring[i].ring_size; if (rdev->wb.wb_obj) n -= RADEON_GPU_PAGE_SIZE; if (rdev->ih.ring_obj) n -= rdev->ih.ring_size; n /= size; gtt_obj = kzalloc(n * sizeof(*gtt_obj), GFP_KERNEL); if (!gtt_obj) { DRM_ERROR("Failed to allocate %d pointers\n", n); r = 1; goto out_cleanup; } r = radeon_bo_create(rdev, size, PAGE_SIZE, true, RADEON_GEM_DOMAIN_VRAM, &vram_obj); if (r) { DRM_ERROR("Failed to create VRAM object\n"); goto out_cleanup; } r = radeon_bo_reserve(vram_obj, false); if (unlikely(r != 0)) goto out_cleanup; r = radeon_bo_pin(vram_obj, RADEON_GEM_DOMAIN_VRAM, &vram_addr); if (r) { DRM_ERROR("Failed to pin VRAM object\n"); goto out_cleanup; } for (i = 0; i < n; i++) { void *gtt_map, *vram_map; void **gtt_start, **gtt_end; void **vram_start, **vram_end; r = radeon_bo_create(rdev, size, PAGE_SIZE, true, RADEON_GEM_DOMAIN_GTT, gtt_obj + i); if (r) { DRM_ERROR("Failed to create GTT object %d\n", i); goto out_cleanup; } r = radeon_bo_reserve(gtt_obj[i], false); if (unlikely(r != 0)) goto out_cleanup; r = radeon_bo_pin(gtt_obj[i], RADEON_GEM_DOMAIN_GTT, >t_addr); if (r) { DRM_ERROR("Failed to pin GTT object %d\n", i); goto out_cleanup; } r = radeon_bo_kmap(gtt_obj[i], >t_map); if (r) { DRM_ERROR("Failed to map GTT object %d\n", i); goto out_cleanup; } for (gtt_start = gtt_map, gtt_end = gtt_map + size; gtt_start < gtt_end; gtt_start++) *gtt_start = gtt_start; radeon_bo_kunmap(gtt_obj[i]); r = radeon_fence_create(rdev, &fence, RADEON_RING_TYPE_GFX_INDEX); if (r) { DRM_ERROR("Failed to create GTT->VRAM fence %d\n", i); goto out_cleanup; } r = radeon_copy(rdev, gtt_addr, vram_addr, size / RADEON_GPU_PAGE_SIZE, fence); if (r) { DRM_ERROR("Failed GTT->VRAM copy %d\n", i); goto out_cleanup; } r = radeon_fence_wait(fence, false); if (r) { DRM_ERROR("Failed to wait for GTT->VRAM fence %d\n", i); goto out_cleanup; } radeon_fence_unref(&fence); r = radeon_bo_kmap(vram_obj, &vram_map); if (r) { DRM_ERROR("Failed to map VRAM object after copy %d\n", i); goto out_cleanup; } for (gtt_start = gtt_map, gtt_end = gtt_map + size, vram_start = vram_map, vram_end = vram_map + size; vram_start < vram_end; gtt_start++, vram_start++) { if (*vram_start != gtt_start) { DRM_ERROR("Incorrect GTT->VRAM copy %d: Got 0x%p, " "expected 0x%p (GTT/VRAM offset " "0x%16llx/0x%16llx)\n", i, *vram_start, gtt_start, (unsigned long long) (gtt_addr - rdev->mc.gtt_start + (void*)gtt_start - gtt_map), (unsigned long long) (vram_addr - rdev->mc.vram_start + (void*)gtt_start - gtt_map)); radeon_bo_kunmap(vram_obj); goto out_cleanup; } *vram_start = vram_start; } radeon_bo_kunmap(vram_obj); r = radeon_fence_create(rdev, &fence, RADEON_RING_TYPE_GFX_INDEX); if (r) { DRM_ERROR("Failed to create VRAM->GTT fence %d\n", i); goto out_cleanup; } r = radeon_copy(rdev, vram_addr, gtt_addr, size / RADEON_GPU_PAGE_SIZE, fence); if (r) { DRM_ERROR("Failed VRAM->GTT copy %d\n", i); goto out_cleanup; } r = radeon_fence_wait(fence, false); if (r) { DRM_ERROR("Failed to wait for VRAM->GTT fence %d\n", i); goto out_cleanup; } radeon_fence_unref(&fence); r = radeon_bo_kmap(gtt_obj[i], >t_map); if (r) { DRM_ERROR("Failed to map GTT object after copy %d\n", i); goto out_cleanup; } for (gtt_start = gtt_map, gtt_end = gtt_map + size, vram_start = vram_map, vram_end = vram_map + size; gtt_start < gtt_end; gtt_start++, vram_start++) { if (*gtt_start != vram_start) { DRM_ERROR("Incorrect VRAM->GTT copy %d: Got 0x%p, " "expected 0x%p (VRAM/GTT offset " "0x%16llx/0x%16llx)\n", i, *gtt_start, vram_start, (unsigned long long) (vram_addr - rdev->mc.vram_start + (void*)vram_start - vram_map), (unsigned long long) (gtt_addr - rdev->mc.gtt_start + (void*)vram_start - vram_map)); radeon_bo_kunmap(gtt_obj[i]); goto out_cleanup; } } radeon_bo_kunmap(gtt_obj[i]); DRM_INFO("Tested GTT->VRAM and VRAM->GTT copy for GTT offset 0x%llx\n", gtt_addr - rdev->mc.gtt_start); } out_cleanup: if (vram_obj) { if (radeon_bo_is_reserved(vram_obj)) { radeon_bo_unpin(vram_obj); radeon_bo_unreserve(vram_obj); } radeon_bo_unref(&vram_obj); } if (gtt_obj) { for (i = 0; i < n; i++) { if (gtt_obj[i]) { if (radeon_bo_is_reserved(gtt_obj[i])) { radeon_bo_unpin(gtt_obj[i]); radeon_bo_unreserve(gtt_obj[i]); } radeon_bo_unref(>t_obj[i]); } } kfree(gtt_obj); } if (fence) { radeon_fence_unref(&fence); } if (r) { printk(KERN_WARNING "Error while testing BO move.\n"); } }
void radeon_test_ring_sync(struct radeon_device *rdev, struct radeon_ring *ringA, struct radeon_ring *ringB) { struct radeon_fence *fence1 = NULL, *fence2 = NULL; struct radeon_semaphore *semaphore = NULL; int ridxA = radeon_ring_index(rdev, ringA); int ridxB = radeon_ring_index(rdev, ringB); int r; r = radeon_fence_create(rdev, &fence1, ridxA); if (r) { DRM_ERROR("Failed to create sync fence 1\n"); goto out_cleanup; } r = radeon_fence_create(rdev, &fence2, ridxA); if (r) { DRM_ERROR("Failed to create sync fence 2\n"); goto out_cleanup; } r = radeon_semaphore_create(rdev, &semaphore); if (r) { DRM_ERROR("Failed to create semaphore\n"); goto out_cleanup; } r = radeon_ring_lock(rdev, ringA, 64); if (r) { DRM_ERROR("Failed to lock ring A %d\n", ridxA); goto out_cleanup; } radeon_semaphore_emit_wait(rdev, ridxA, semaphore); radeon_fence_emit(rdev, fence1); radeon_semaphore_emit_wait(rdev, ridxA, semaphore); radeon_fence_emit(rdev, fence2); radeon_ring_unlock_commit(rdev, ringA); mdelay(1000); if (radeon_fence_signaled(fence1)) { DRM_ERROR("Fence 1 signaled without waiting for semaphore.\n"); goto out_cleanup; } r = radeon_ring_lock(rdev, ringB, 64); if (r) { DRM_ERROR("Failed to lock ring B %p\n", ringB); goto out_cleanup; } radeon_semaphore_emit_signal(rdev, ridxB, semaphore); radeon_ring_unlock_commit(rdev, ringB); r = radeon_fence_wait(fence1, false); if (r) { DRM_ERROR("Failed to wait for sync fence 1\n"); goto out_cleanup; } mdelay(1000); if (radeon_fence_signaled(fence2)) { DRM_ERROR("Fence 2 signaled without waiting for semaphore.\n"); goto out_cleanup; } r = radeon_ring_lock(rdev, ringB, 64); if (r) { DRM_ERROR("Failed to lock ring B %p\n", ringB); goto out_cleanup; } radeon_semaphore_emit_signal(rdev, ridxB, semaphore); radeon_ring_unlock_commit(rdev, ringB); r = radeon_fence_wait(fence2, false); if (r) { DRM_ERROR("Failed to wait for sync fence 1\n"); goto out_cleanup; } out_cleanup: if (semaphore) radeon_semaphore_free(rdev, semaphore); if (fence1) radeon_fence_unref(&fence1); if (fence2) radeon_fence_unref(&fence2); if (r) printk(KERN_WARNING "Error while testing ring sync (%d).\n", r); }
void radeon_test_ring_sync(struct radeon_device *rdev, struct radeon_ring *ringA, struct radeon_ring *ringB) { struct radeon_fence *fence1 = NULL, *fence2 = NULL; struct radeon_semaphore *semaphore = NULL; int r; r = radeon_semaphore_create(rdev, &semaphore); if (r) { DRM_ERROR("Failed to create semaphore\n"); goto out_cleanup; } r = radeon_ring_lock(rdev, ringA, 64); if (r) { DRM_ERROR("Failed to lock ring A %d\n", ringA->idx); goto out_cleanup; } radeon_semaphore_emit_wait(rdev, ringA->idx, semaphore); r = radeon_fence_emit(rdev, &fence1, ringA->idx); if (r) { DRM_ERROR("Failed to emit fence 1\n"); radeon_ring_unlock_undo(rdev, ringA); goto out_cleanup; } radeon_semaphore_emit_wait(rdev, ringA->idx, semaphore); r = radeon_fence_emit(rdev, &fence2, ringA->idx); if (r) { DRM_ERROR("Failed to emit fence 2\n"); radeon_ring_unlock_undo(rdev, ringA); goto out_cleanup; } radeon_ring_unlock_commit(rdev, ringA); DRM_MDELAY(1000); if (radeon_fence_signaled(fence1)) { DRM_ERROR("Fence 1 signaled without waiting for semaphore.\n"); goto out_cleanup; } r = radeon_ring_lock(rdev, ringB, 64); if (r) { DRM_ERROR("Failed to lock ring B %p\n", ringB); goto out_cleanup; } radeon_semaphore_emit_signal(rdev, ringB->idx, semaphore); radeon_ring_unlock_commit(rdev, ringB); r = radeon_fence_wait(fence1, false); if (r) { DRM_ERROR("Failed to wait for sync fence 1\n"); goto out_cleanup; } DRM_MDELAY(1000); if (radeon_fence_signaled(fence2)) { DRM_ERROR("Fence 2 signaled without waiting for semaphore.\n"); goto out_cleanup; } r = radeon_ring_lock(rdev, ringB, 64); if (r) { DRM_ERROR("Failed to lock ring B %p\n", ringB); goto out_cleanup; } radeon_semaphore_emit_signal(rdev, ringB->idx, semaphore); radeon_ring_unlock_commit(rdev, ringB); r = radeon_fence_wait(fence2, false); if (r) { DRM_ERROR("Failed to wait for sync fence 1\n"); goto out_cleanup; } out_cleanup: radeon_semaphore_free(rdev, &semaphore, NULL); if (fence1) radeon_fence_unref(&fence1); if (fence2) radeon_fence_unref(&fence2); if (r) DRM_ERROR("Error while testing ring sync (%d).\n", r); }
/* Test BO GTT->VRAM and VRAM->GTT GPU copies across the whole GTT aperture */ static void radeon_do_test_moves(struct radeon_device *rdev, int flag) { struct radeon_bo *vram_obj = NULL; struct radeon_bo **gtt_obj = NULL; uint64_t gtt_addr, vram_addr; unsigned n, size; int i, r, ring; switch (flag) { case RADEON_TEST_COPY_DMA: ring = radeon_copy_dma_ring_index(rdev); break; case RADEON_TEST_COPY_BLIT: ring = radeon_copy_blit_ring_index(rdev); break; default: DRM_ERROR("Unknown copy method\n"); return; } size = 1024 * 1024; /* Number of tests = * (Total GTT - IB pool - writeback page - ring buffers) / test size */ n = rdev->mc.gtt_size - RADEON_IB_POOL_SIZE*64*1024; for (i = 0; i < RADEON_NUM_RINGS; ++i) n -= rdev->ring[i].ring_size; if (rdev->wb.wb_obj) n -= RADEON_GPU_PAGE_SIZE; if (rdev->ih.ring_obj) n -= rdev->ih.ring_size; n /= size; gtt_obj = kzalloc(n * sizeof(*gtt_obj), GFP_KERNEL); if (!gtt_obj) { DRM_ERROR("Failed to allocate %d pointers\n", n); r = 1; goto out_cleanup; } r = radeon_bo_create(rdev, size, PAGE_SIZE, true, RADEON_GEM_DOMAIN_VRAM, NULL, &vram_obj); if (r) { DRM_ERROR("Failed to create VRAM object\n"); goto out_cleanup; } r = radeon_bo_reserve(vram_obj, false); if (unlikely(r != 0)) goto out_unref; r = radeon_bo_pin(vram_obj, RADEON_GEM_DOMAIN_VRAM, &vram_addr); if (r) { DRM_ERROR("Failed to pin VRAM object\n"); goto out_unres; } for (i = 0; i < n; i++) { void *gtt_map, *vram_map; void **gtt_start, **gtt_end; void **vram_start, **vram_end; struct radeon_fence *fence = NULL; r = radeon_bo_create(rdev, size, PAGE_SIZE, true, RADEON_GEM_DOMAIN_GTT, NULL, gtt_obj + i); if (r) { DRM_ERROR("Failed to create GTT object %d\n", i); goto out_lclean; } r = radeon_bo_reserve(gtt_obj[i], false); if (unlikely(r != 0)) goto out_lclean_unref; r = radeon_bo_pin(gtt_obj[i], RADEON_GEM_DOMAIN_GTT, >t_addr); if (r) { DRM_ERROR("Failed to pin GTT object %d\n", i); goto out_lclean_unres; } r = radeon_bo_kmap(gtt_obj[i], >t_map); if (r) { DRM_ERROR("Failed to map GTT object %d\n", i); goto out_lclean_unpin; } for (gtt_start = gtt_map, gtt_end = gtt_map + size; gtt_start < gtt_end; gtt_start++) *gtt_start = gtt_start; radeon_bo_kunmap(gtt_obj[i]); if (ring == R600_RING_TYPE_DMA_INDEX) r = radeon_copy_dma(rdev, gtt_addr, vram_addr, size / RADEON_GPU_PAGE_SIZE, &fence); else r = radeon_copy_blit(rdev, gtt_addr, vram_addr, size / RADEON_GPU_PAGE_SIZE, &fence); if (r) { DRM_ERROR("Failed GTT->VRAM copy %d\n", i); goto out_lclean_unpin; } r = radeon_fence_wait(fence, false); if (r) { DRM_ERROR("Failed to wait for GTT->VRAM fence %d\n", i); goto out_lclean_unpin; } radeon_fence_unref(&fence); r = radeon_bo_kmap(vram_obj, &vram_map); if (r) { DRM_ERROR("Failed to map VRAM object after copy %d\n", i); goto out_lclean_unpin; } for (gtt_start = gtt_map, gtt_end = gtt_map + size, vram_start = vram_map, vram_end = vram_map + size; vram_start < vram_end; gtt_start++, vram_start++) { if (*vram_start != gtt_start) { DRM_ERROR("Incorrect GTT->VRAM copy %d: Got 0x%p, " "expected 0x%p (GTT/VRAM offset " "0x%16llx/0x%16llx)\n", i, *vram_start, gtt_start, (unsigned long long) (gtt_addr - rdev->mc.gtt_start + (void*)gtt_start - gtt_map), (unsigned long long) (vram_addr - rdev->mc.vram_start + (void*)gtt_start - gtt_map)); radeon_bo_kunmap(vram_obj); goto out_lclean_unpin; } *vram_start = vram_start; } radeon_bo_kunmap(vram_obj); if (ring == R600_RING_TYPE_DMA_INDEX) r = radeon_copy_dma(rdev, vram_addr, gtt_addr, size / RADEON_GPU_PAGE_SIZE, &fence); else r = radeon_copy_blit(rdev, vram_addr, gtt_addr, size / RADEON_GPU_PAGE_SIZE, &fence); if (r) { DRM_ERROR("Failed VRAM->GTT copy %d\n", i); goto out_lclean_unpin; } r = radeon_fence_wait(fence, false); if (r) { DRM_ERROR("Failed to wait for VRAM->GTT fence %d\n", i); goto out_lclean_unpin; } radeon_fence_unref(&fence); r = radeon_bo_kmap(gtt_obj[i], >t_map); if (r) { DRM_ERROR("Failed to map GTT object after copy %d\n", i); goto out_lclean_unpin; } for (gtt_start = gtt_map, gtt_end = gtt_map + size, vram_start = vram_map, vram_end = vram_map + size; gtt_start < gtt_end; gtt_start++, vram_start++) { if (*gtt_start != vram_start) { DRM_ERROR("Incorrect VRAM->GTT copy %d: Got 0x%p, " "expected 0x%p (VRAM/GTT offset " "0x%16llx/0x%16llx)\n", i, *gtt_start, vram_start, (unsigned long long) (vram_addr - rdev->mc.vram_start + (void*)vram_start - vram_map), (unsigned long long) (gtt_addr - rdev->mc.gtt_start + (void*)vram_start - vram_map)); radeon_bo_kunmap(gtt_obj[i]); goto out_lclean_unpin; } } radeon_bo_kunmap(gtt_obj[i]); DRM_INFO("Tested GTT->VRAM and VRAM->GTT copy for GTT offset 0x%llx\n", gtt_addr - rdev->mc.gtt_start); continue; out_lclean_unpin: radeon_bo_unpin(gtt_obj[i]); out_lclean_unres: radeon_bo_unreserve(gtt_obj[i]); out_lclean_unref: radeon_bo_unref(>t_obj[i]); out_lclean: for (--i; i >= 0; --i) { radeon_bo_unpin(gtt_obj[i]); radeon_bo_unreserve(gtt_obj[i]); radeon_bo_unref(>t_obj[i]); } if (fence) radeon_fence_unref(&fence); break; } radeon_bo_unpin(vram_obj); out_unres: radeon_bo_unreserve(vram_obj); out_unref: radeon_bo_unref(&vram_obj); out_cleanup: kfree(gtt_obj); if (r) { printk(KERN_WARNING "Error while testing BO move.\n"); } }
/** * radeon_ib_free - free an IB (Indirect Buffer) * * @rdev: radeon_device pointer * @ib: IB object to free * * Free an IB (all asics). */ void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib *ib) { radeon_semaphore_free(rdev, &ib->semaphore, ib->fence); radeon_sa_bo_free(rdev, &ib->sa_bo, ib->fence); radeon_fence_unref(&ib->fence); }
void radeon_test_moves(struct radeon_device *rdev) { struct radeon_object *vram_obj = NULL; struct radeon_object **gtt_obj = NULL; struct radeon_fence *fence = NULL; uint64_t gtt_addr, vram_addr; unsigned i, n, size; int r; size = 1024 * 1024; n = ((u32)(rdev->mc.gtt_size - RADEON_IB_POOL_SIZE*64*1024 - RADEON_GPU_PAGE_SIZE - rdev->cp.ring_size)) / size; gtt_obj = kzalloc(n * sizeof(*gtt_obj), GFP_KERNEL); if (!gtt_obj) { DRM_ERROR("Failed to allocate %d pointers\n", n); r = 1; goto out_cleanup; } r = radeon_object_create(rdev, NULL, size, true, RADEON_GEM_DOMAIN_VRAM, false, &vram_obj); if (r) { DRM_ERROR("Failed to create VRAM object\n"); goto out_cleanup; } r = radeon_object_pin(vram_obj, RADEON_GEM_DOMAIN_VRAM, &vram_addr); if (r) { DRM_ERROR("Failed to pin VRAM object\n"); goto out_cleanup; } for (i = 0; i < n; i++) { void *gtt_map, *vram_map; void **gtt_start, **gtt_end; void **vram_start, **vram_end; r = radeon_object_create(rdev, NULL, size, true, RADEON_GEM_DOMAIN_GTT, false, gtt_obj + i); if (r) { DRM_ERROR("Failed to create GTT object %d\n", i); goto out_cleanup; } r = radeon_object_pin(gtt_obj[i], RADEON_GEM_DOMAIN_GTT, >t_addr); if (r) { DRM_ERROR("Failed to pin GTT object %d\n", i); goto out_cleanup; } r = radeon_object_kmap(gtt_obj[i], >t_map); if (r) { DRM_ERROR("Failed to map GTT object %d\n", i); goto out_cleanup; } for (gtt_start = gtt_map, gtt_end = gtt_map + size; gtt_start < gtt_end; gtt_start++) *gtt_start = gtt_start; radeon_object_kunmap(gtt_obj[i]); r = radeon_fence_create(rdev, &fence); if (r) { DRM_ERROR("Failed to create GTT->VRAM fence %d\n", i); goto out_cleanup; } r = radeon_copy(rdev, gtt_addr, vram_addr, size / RADEON_GPU_PAGE_SIZE, fence); if (r) { DRM_ERROR("Failed GTT->VRAM copy %d\n", i); goto out_cleanup; } r = radeon_fence_wait(fence, false); if (r) { DRM_ERROR("Failed to wait for GTT->VRAM fence %d\n", i); goto out_cleanup; } radeon_fence_unref(&fence); r = radeon_object_kmap(vram_obj, &vram_map); if (r) { DRM_ERROR("Failed to map VRAM object after copy %d\n", i); goto out_cleanup; } for (gtt_start = gtt_map, gtt_end = gtt_map + size, vram_start = vram_map, vram_end = vram_map + size; vram_start < vram_end; gtt_start++, vram_start++) { if (*vram_start != gtt_start) { DRM_ERROR("Incorrect GTT->VRAM copy %d: Got 0x%p, " "expected 0x%p (GTT map 0x%p-0x%p)\n", i, *vram_start, gtt_start, gtt_map, gtt_end); radeon_object_kunmap(vram_obj); goto out_cleanup; } *vram_start = vram_start; } radeon_object_kunmap(vram_obj); r = radeon_fence_create(rdev, &fence); if (r) { DRM_ERROR("Failed to create VRAM->GTT fence %d\n", i); goto out_cleanup; } r = radeon_copy(rdev, vram_addr, gtt_addr, size / RADEON_GPU_PAGE_SIZE, fence); if (r) { DRM_ERROR("Failed VRAM->GTT copy %d\n", i); goto out_cleanup; } r = radeon_fence_wait(fence, false); if (r) { DRM_ERROR("Failed to wait for VRAM->GTT fence %d\n", i); goto out_cleanup; } radeon_fence_unref(&fence); r = radeon_object_kmap(gtt_obj[i], >t_map); if (r) { DRM_ERROR("Failed to map GTT object after copy %d\n", i); goto out_cleanup; } for (gtt_start = gtt_map, gtt_end = gtt_map + size, vram_start = vram_map, vram_end = vram_map + size; gtt_start < gtt_end; gtt_start++, vram_start++) { if (*gtt_start != vram_start) { DRM_ERROR("Incorrect VRAM->GTT copy %d: Got 0x%p, " "expected 0x%p (VRAM map 0x%p-0x%p)\n", i, *gtt_start, vram_start, vram_map, vram_end); radeon_object_kunmap(gtt_obj[i]); goto out_cleanup; } } radeon_object_kunmap(gtt_obj[i]); DRM_INFO("Tested GTT->VRAM and VRAM->GTT copy for GTT offset 0x%llx\n", gtt_addr - rdev->mc.gtt_location); } out_cleanup: if (vram_obj) { radeon_object_unpin(vram_obj); radeon_object_unref(&vram_obj); } if (gtt_obj) { for (i = 0; i < n; i++) { if (gtt_obj[i]) { radeon_object_unpin(gtt_obj[i]); radeon_object_unref(>t_obj[i]); } } kfree(gtt_obj); } if (fence) { radeon_fence_unref(&fence); } if (r) { printk(KERN_WARNING "Error while testing BO move.\n"); } }
void radeon_benchmark_move(struct radeon_device *rdev, unsigned bsize, unsigned sdomain, unsigned ddomain) { struct radeon_bo *dobj = NULL; struct radeon_bo *sobj = NULL; struct radeon_fence *fence = NULL; uint64_t saddr, daddr; unsigned long start_jiffies; unsigned long end_jiffies; unsigned long time; unsigned i, n, size; int r; size = bsize; n = 1024; r = radeon_bo_create(rdev, NULL, size, PAGE_SIZE, true, sdomain, &sobj); if (r) { goto out_cleanup; } r = radeon_bo_reserve(sobj, false); if (unlikely(r != 0)) goto out_cleanup; r = radeon_bo_pin(sobj, sdomain, &saddr); radeon_bo_unreserve(sobj); if (r) { goto out_cleanup; } r = radeon_bo_create(rdev, NULL, size, PAGE_SIZE, true, ddomain, &dobj); if (r) { goto out_cleanup; } r = radeon_bo_reserve(dobj, false); if (unlikely(r != 0)) goto out_cleanup; r = radeon_bo_pin(dobj, ddomain, &daddr); radeon_bo_unreserve(dobj); if (r) { goto out_cleanup; } /* r100 doesn't have dma engine so skip the test */ if (rdev->asic->copy_dma) { start_jiffies = jiffies; for (i = 0; i < n; i++) { r = radeon_fence_create(rdev, &fence); if (r) { goto out_cleanup; } r = radeon_copy_dma(rdev, saddr, daddr, size / RADEON_GPU_PAGE_SIZE, fence); if (r) { goto out_cleanup; } r = radeon_fence_wait(fence, false); if (r) { goto out_cleanup; } radeon_fence_unref(&fence); } end_jiffies = jiffies; time = end_jiffies - start_jiffies; time = jiffies_to_msecs(time); if (time > 0) { i = ((n * size) >> 10) / time; printk(KERN_INFO "radeon: dma %u bo moves of %ukb from" " %d to %d in %lums (%ukb/ms %ukb/s %uM/s)\n", n, size >> 10, sdomain, ddomain, time, i, i * 1000, (i * 1000) / 1024); }