static bool test_md_zmatmul(void) { int A = 10; int B = 20; int C = 30; long odims[3] = { C, 1, A }; long idims1[3] = { 1, B, A }; long idims2[3] = { C, B, 1 }; complex float* dst1 = md_alloc(3, odims, CFL_SIZE); complex float* dst2 = md_alloc(3, odims, CFL_SIZE); complex float* src1 = md_alloc(3, idims1, CFL_SIZE); complex float* src2 = md_alloc(3, idims2, CFL_SIZE); md_gaussian_rand(3, odims, dst1); md_gaussian_rand(3, odims, dst2); md_gaussian_rand(3, idims1, src1); md_gaussian_rand(3, idims2, src2); md_zmatmul(3, odims, dst1, idims1, src1, idims2, src2); matrix_mult(A, B, C, &MD_CAST_ARRAY2(complex float, 3, odims, dst2, 0, 2), &MD_CAST_ARRAY2(const complex float, 3, idims1, src1, 1, 2), &MD_CAST_ARRAY2(const complex float, 3, idims2, src2, 0, 1)); double err = md_znrmse(3, odims, dst2, dst1); md_free(src1); md_free(src2); md_free(dst1); md_free(dst2); return (err < UT_TOL); }
static double bench_generic_add(long dims[DIMS], unsigned int flags, bool forloop) { long dimsX[DIMS]; long dimsY[DIMS]; long dimsC[DIMS]; md_select_dims(DIMS, flags, dimsX, dims); md_select_dims(DIMS, ~flags, dimsC, dims); md_select_dims(DIMS, ~0u, dimsY, dims); long strsX[DIMS]; long strsY[DIMS]; md_calc_strides(DIMS, strsX, dimsX, CFL_SIZE); md_calc_strides(DIMS, strsY, dimsY, CFL_SIZE); complex float* x = md_alloc(DIMS, dimsX, CFL_SIZE); complex float* y = md_alloc(DIMS, dimsY, CFL_SIZE); md_gaussian_rand(DIMS, dimsX, x); md_gaussian_rand(DIMS, dimsY, y); long L = md_calc_size(DIMS, dimsC); long T = md_calc_size(DIMS, dimsX); double tic = timestamp(); if (forloop) { for (long i = 0; i < L; i++) { for (long j = 0; j < T; j++) y[i + j * L] += x[j]; } } else { md_zaxpy2(DIMS, dims, strsY, y, 1., strsX, x); } double toc = timestamp(); md_free(x); md_free(y); return toc - tic; }
static double bench_wavelet_thresh(int version, long scale) { long dims[DIMS] = { 1, 256 * scale, 256 * scale, 1, 16, 1, 1, 1 }; long minsize[DIMS] = { [0 ... DIMS - 1] = 1 }; minsize[0] = MIN(dims[0], 16); minsize[1] = MIN(dims[1], 16); minsize[2] = MIN(dims[2], 16); const struct operator_p_s* p; switch (version) { case 2: p = prox_wavethresh_create(DIMS, dims, 7, minsize, 1.1, true, false); break; case 3: p = prox_wavelet3_thresh_create(DIMS, dims, 6, minsize, 1.1, true); break; default: assert(0); } complex float* x = md_alloc(DIMS, dims, CFL_SIZE); md_gaussian_rand(DIMS, dims, x); double tic = timestamp(); operator_p_apply(p, 0.98, DIMS, dims, x, DIMS, dims, x); double toc = timestamp(); md_free(x); operator_p_free(p); return toc - tic; }
static double bench_generic_matrix_multiply(long dims[DIMS]) { long dimsX[DIMS]; long dimsY[DIMS]; long dimsZ[DIMS]; md_select_dims(DIMS, 2 * 3 + 17, dimsX, dims); // 1 110 1 md_select_dims(DIMS, 2 * 6 + 17, dimsY, dims); // 1 011 1 md_select_dims(DIMS, 2 * 5 + 17, dimsZ, dims); // 1 101 1 long strsX[DIMS]; long strsY[DIMS]; long strsZ[DIMS]; md_calc_strides(DIMS, strsX, dimsX, CFL_SIZE); md_calc_strides(DIMS, strsY, dimsY, CFL_SIZE); md_calc_strides(DIMS, strsZ, dimsZ, CFL_SIZE); complex float* x = md_alloc(DIMS, dimsX, CFL_SIZE); complex float* y = md_alloc(DIMS, dimsY, CFL_SIZE); complex float* z = md_alloc(DIMS, dimsZ, CFL_SIZE); md_gaussian_rand(DIMS, dimsX, x); md_gaussian_rand(DIMS, dimsY, y); md_clear(DIMS, dimsZ, z, CFL_SIZE); double tic = timestamp(); md_zfmac2(DIMS, dims, strsZ, z, strsX, x, strsY, y); double toc = timestamp(); md_free(x); md_free(y); md_free(z); return toc - tic; }
static bool test_md_swap(void) { enum { N = 4 }; long dims[N] = { 10, 10, 10, 10 }; complex float* a = md_alloc(N, dims, sizeof(complex float)); complex float* b = md_alloc(N, dims, sizeof(complex float)); complex float* c = md_alloc(N, dims, sizeof(complex float)); md_gaussian_rand(N, dims, a); md_gaussian_rand(N, dims, b); md_gaussian_rand(N, dims, c); complex float* d = md_alloc(N, dims, sizeof(complex float)); complex float* e = md_alloc(N, dims, sizeof(complex float)); complex float* f = md_alloc(N, dims, sizeof(complex float)); md_copy(N, dims, d, a, sizeof(complex float)); md_copy(N, dims, e, b, sizeof(complex float)); md_copy(N, dims, f, c, sizeof(complex float)); md_circular_swap(3, N, dims, (void*[]){ a, b, c }, sizeof(complex float));
static double bench_norm(int s, long scale) { long dims[DIMS] = { 256 * scale, 256 * scale, 1, 16, 1, 1, 1, 1 }; #if 0 complex float* x = md_alloc_gpu(DIMS, dims, CFL_SIZE); complex float* y = md_alloc_gpu(DIMS, dims, CFL_SIZE); #else complex float* x = md_alloc(DIMS, dims, CFL_SIZE); complex float* y = md_alloc(DIMS, dims, CFL_SIZE); #endif md_gaussian_rand(DIMS, dims, x); md_gaussian_rand(DIMS, dims, y); double tic = timestamp(); switch (s) { case 0: md_zscalar(DIMS, dims, x, y); break; case 1: md_zscalar_real(DIMS, dims, x, y); break; case 2: md_znorm(DIMS, dims, x); break; case 3: md_z1norm(DIMS, dims, x); break; } double toc = timestamp(); md_free(x); md_free(y); return toc - tic; }
void md_gaussian_rand(unsigned int D, const long dims[D], complex float* dst) { #ifdef USE_CUDA if (cuda_ondevice(dst)) { complex float* tmp = md_alloc(D, dims, sizeof(complex float)); md_gaussian_rand(D, dims, tmp); md_copy(D, dims, dst, tmp, sizeof(complex float)); md_free(tmp); return; } #endif //#pragma omp parallel for for (long i = 0; i < md_calc_size(D, dims); i++) dst[i] = (float)gaussian_rand(); }
static bool test_md_copy(void) { enum { N = 4 }; long dims[N] = { 10, 10, 10, 10 }; complex float* a = md_alloc(N, dims, sizeof(complex float)); md_gaussian_rand(N, dims, a); complex float* b = md_alloc(N, dims, sizeof(complex float)); md_copy(N, dims, b, a, sizeof(complex float)); bool eq = md_compare(N, dims, a, b, sizeof(complex float)); md_free(a); md_free(b); return eq; }
static double bench_transpose(long scale) { long dims[DIMS] = { 2000 * scale, 2000 * scale, 1, 1, 1, 1, 1, 1 }; complex float* x = md_alloc(DIMS, dims, CFL_SIZE); complex float* y = md_alloc(DIMS, dims, CFL_SIZE); md_gaussian_rand(DIMS, dims, x); md_clear(DIMS, dims, y, CFL_SIZE); double tic = timestamp(); md_transpose(DIMS, 0, 1, dims, y, dims, x, CFL_SIZE); double toc = timestamp(); md_free(x); md_free(y); return toc - tic; }
static double bench_resize(long scale) { long dimsX[DIMS] = { 2000 * scale, 1000 * scale, 1, 1, 1, 1, 1, 1 }; long dimsY[DIMS] = { 1000 * scale, 2000 * scale, 1, 1, 1, 1, 1, 1 }; complex float* x = md_alloc(DIMS, dimsX, CFL_SIZE); complex float* y = md_alloc(DIMS, dimsY, CFL_SIZE); md_gaussian_rand(DIMS, dimsX, x); md_clear(DIMS, dimsY, y, CFL_SIZE); double tic = timestamp(); md_resize(DIMS, dimsY, y, dimsX, x, CFL_SIZE); double toc = timestamp(); md_free(x); md_free(y); return toc - tic; }
static double bench_generic_copy(long dims[DIMS]) { long strs[DIMS]; md_calc_strides(DIMS, strs, dims, CFL_SIZE); md_calc_strides(DIMS, strs, dims, CFL_SIZE); complex float* x = md_alloc(DIMS, dims, CFL_SIZE); complex float* y = md_alloc(DIMS, dims, CFL_SIZE); md_gaussian_rand(DIMS, dims, x); double tic = timestamp(); md_copy2(DIMS, dims, strs, y, strs, x, CFL_SIZE); double toc = timestamp(); md_free(x); md_free(y); return toc - tic; }
static void perturb(const long dims[2], complex float* vecs, float amt) { complex float* noise = md_alloc(2, dims, CFL_SIZE); md_gaussian_rand(2, dims, noise); for (long j = 0; j < dims[1]; j++) { float nrm = md_znorm(1, dims, noise + j * dims[0]); complex float val = amt / nrm; md_zsmul(1, dims, noise + j * dims[0], noise + j * dims[0], val); } md_zadd(2, dims, vecs, vecs, noise); for (long j = 0; j < dims[1]; j++) { float nrm = md_znorm(1, dims, vecs + j * dims[0]); complex float val = 1 / nrm; md_zsmul(1, dims, vecs + j * dims[0], vecs + j * dims[0], val); } md_free(noise); }