static double bench_resize(long scale) { long dimsX[DIMS] = { 2000 * scale, 1000 * scale, 1, 1, 1, 1, 1, 1 }; long dimsY[DIMS] = { 1000 * scale, 2000 * scale, 1, 1, 1, 1, 1, 1 }; complex float* x = md_alloc(DIMS, dimsX, CFL_SIZE); complex float* y = md_alloc(DIMS, dimsY, CFL_SIZE); md_gaussian_rand(DIMS, dimsX, x); md_clear(DIMS, dimsY, y, CFL_SIZE); double tic = timestamp(); md_resize(DIMS, dimsY, y, dimsX, x, CFL_SIZE); double toc = timestamp(); md_free(x); md_free(y); return toc - tic; }
/* * Low rank threhsolding for arbitrary block sizes */ static void lrthresh_apply(const operator_data_t* _data, float mu, complex float* dst, const complex float* src) { struct lrthresh_data_s* data = CAST_DOWN(lrthresh_data_s, _data); float lambda = mu * data->lambda; long strs1[DIMS]; md_calc_strides(DIMS, strs1, data->dims_decom, 1); //#pragma omp parallel for for (int l = 0; l < data->levels; l++) { complex float* dstl = dst + l * strs1[LEVEL_DIM]; const complex float* srcl = src + l * strs1[LEVEL_DIM]; long blkdims[DIMS]; long shifts[DIMS]; long unshifts[DIMS]; long zpad_dims[DIMS]; long M = 1; for (unsigned int i = 0; i < DIMS; i++) { blkdims[i] = data->blkdims[l][i]; zpad_dims[i] = (data->dims[i] + blkdims[i] - 1) / blkdims[i]; zpad_dims[i] *= blkdims[i]; if (MD_IS_SET(data->mflags, i)) M *= blkdims[i]; if (data->randshift) shifts[i] = rand_lim(MIN(blkdims[i] - 1, zpad_dims[i] - blkdims[i])); else shifts[i] = 0; unshifts[i] = -shifts[i]; } long zpad_strs[DIMS]; md_calc_strides(DIMS, zpad_strs, zpad_dims, CFL_SIZE); long blk_size = md_calc_size(DIMS, blkdims); long img_size = md_calc_size(DIMS, zpad_dims); long N = blk_size / M; long B = img_size / blk_size; if (data->noise && (l == data->levels - 1)) { M = img_size; N = 1; B = 1; } complex float* tmp = md_alloc_sameplace(DIMS, zpad_dims, CFL_SIZE, dst); md_circ_ext(DIMS, zpad_dims, tmp, data->dims, srcl, CFL_SIZE); md_circ_shift(DIMS, zpad_dims, shifts, tmp, tmp, CFL_SIZE); long mat_dims[2]; basorati_dims(DIMS, mat_dims, blkdims, zpad_dims); complex float* tmp_mat = md_alloc_sameplace(2, mat_dims, CFL_SIZE, dst); // Reshape image into a blk_size x number of blocks matrix basorati_matrix(DIMS, blkdims, mat_dims, tmp_mat, zpad_dims, zpad_strs, tmp); batch_svthresh(M, N, mat_dims[1], lambda * GWIDTH(M, N, B), *(complex float (*)[mat_dims[1]][M][N])tmp_mat); // for ( int b = 0; b < mat_dims[1]; b++ ) // svthresh(M, N, lambda * GWIDTH(M, N, B), tmp_mat, tmp_mat); basorati_matrixH(DIMS, blkdims, zpad_dims, zpad_strs, tmp, mat_dims, tmp_mat); md_circ_shift(DIMS, zpad_dims, unshifts, tmp, tmp, CFL_SIZE); md_resize(DIMS, data->dims, dstl, zpad_dims, tmp, CFL_SIZE); md_free(tmp); md_free(tmp_mat); } }
int main_homodyne(int argc, char* argv[]) { bool clear = false; const char* phase_ref = NULL; int com; while (-1 != (com = getopt(argc, argv, "hCP:"))) { switch (com) { case 'C': clear = true; break; case 'P': phase_ref = strdup(optarg); break; case 'h': help(argv[0], stdout); exit(0); default: help(argv[0], stderr); exit(1); } } if (argc - optind != 4) { usage(argv[0], stderr); exit(1); } const int N = DIMS; long dims[N]; complex float* idata = load_cfl(argv[optind + 2], N, dims); complex float* data = create_cfl(argv[optind + 3], N, dims); int pfdim = atoi(argv[optind + 0]); float frac = atof(argv[optind + 1]); assert((0 <= pfdim) && (pfdim < N)); assert(frac > 0.); long strs[N]; md_calc_strides(N, strs, dims, CFL_SIZE); struct wdata wdata; wdata.frac = frac; wdata.pfdim = pfdim; md_select_dims(N, MD_BIT(pfdim), wdata.wdims, dims); md_calc_strides(N, wdata.wstrs, wdata.wdims, CFL_SIZE); wdata.weights = md_alloc(N, wdata.wdims, CFL_SIZE); md_loop(N, wdata.wdims, &wdata, comp_weights); long pstrs[N]; long pdims[N]; complex float* phase = NULL; if (NULL == phase_ref) { phase = estimate_phase(wdata, FFT_FLAGS, N, dims, idata); md_copy_dims(N, pdims, dims); } else phase = load_cfl(phase_ref, N, pdims); md_calc_strides(N, pstrs, pdims, CFL_SIZE); complex float* cdata = NULL; complex float* idata2 = NULL; if (clear) { long cdims[N]; md_select_dims(N, ~MD_BIT(pfdim), cdims, dims); cdims[pfdim] = (int)(dims[pfdim] * frac); cdata = md_alloc(N, cdims, CFL_SIZE); idata2 = anon_cfl(NULL, N, dims); md_resize(N, cdims, cdata, dims, idata, CFL_SIZE); md_resize(N, dims, idata2, cdims, cdata, CFL_SIZE); md_free(cdata); unmap_cfl(N, dims, idata); idata = idata2; } if ((1 == dims[PHS2_DIM]) || (PHS2_DIM == pfdim)) { homodyne(wdata, FFT_FLAGS, N, dims, strs, data, idata, pstrs, phase); } else { unsigned int pardim = PHS2_DIM; ifftuc(N, dims, MD_CLEAR(FFT_FLAGS, pfdim), data, idata); long rdims[N]; md_select_dims(N, ~MD_BIT(pardim), rdims, dims); long rstrs[N]; md_calc_strides(N, rstrs, rdims, CFL_SIZE); #pragma omp parallel for for (unsigned int i = 0; i < dims[pardim]; i++) { complex float* tmp = md_alloc(N, rdims, CFL_SIZE); long pos[N]; md_set_dims(N, pos, 0); pos[pardim] = i; md_copy_block(N, pos, rdims, tmp, dims, data, CFL_SIZE); homodyne(wdata, MD_BIT(pfdim), N, rdims, rstrs, tmp, tmp, pstrs, phase); md_copy_block(N, pos, dims, data, rdims, tmp, CFL_SIZE); md_free(tmp); } } md_free(wdata.weights); if (NULL == phase_ref) md_free(phase); else { unmap_cfl(N, pdims, phase); free((void*)phase_ref); } unmap_cfl(N, dims, idata); unmap_cfl(N, dims, data); exit(0); }
/* * Low rank threhsolding for arbitrary block sizes */ static void lrthresh_apply(const void* _data, float mu, complex float* dst, const complex float* src) { struct lrthresh_data_s* data = (struct lrthresh_data_s*)_data; float lambda = mu * data->lambda; long strs1[DIMS]; md_calc_strides(DIMS, strs1, data->dims_decom, 1); //#pragma omp parallel for for (int l = 0; l < data->levels; l++) { complex float* dstl = dst + l * strs1[LEVEL_DIM]; const complex float* srcl = src + l * strs1[LEVEL_DIM]; // Initialize long blkdims[DIMS]; long shifts[DIMS]; long unshifts[DIMS]; long zpad_dims[DIMS]; long M = 1; for (unsigned int i = 0; i < DIMS; i++) { blkdims[i] = data->blkdims[l][i]; zpad_dims[i] = (data->dims[i] + blkdims[i] - 1) / blkdims[i]; zpad_dims[i] *= blkdims[i]; if (MD_IS_SET(data->mflags, i)) M *= blkdims[i]; if (data->randshift) shifts[i] = rand_lim(MIN(blkdims[i] - 1, zpad_dims[i] - blkdims[i])); else shifts[i] = 0; unshifts[i] = -shifts[i]; } long zpad_strs[DIMS]; md_calc_strides(DIMS, zpad_strs, zpad_dims, CFL_SIZE); long blk_size = md_calc_size( DIMS, blkdims ); long img_size = md_calc_size( DIMS, zpad_dims ); long N = blk_size / M; long B = img_size / blk_size; if (data->noise && (l == data->levels - 1)) { M = img_size; N = 1; B = 1; } // Initialize tmp complex float* tmp_ext; #ifdef USE_CUDA tmp_ext = (data->use_gpu ? md_alloc_gpu : md_alloc)(DIMS, zpad_dims, CFL_SIZE); #else tmp_ext = md_alloc(DIMS, zpad_dims, CFL_SIZE); #endif complex float* tmp; #ifdef USE_CUDA tmp = (data->use_gpu ? md_alloc_gpu : md_alloc)(DIMS, zpad_dims, CFL_SIZE); #else tmp = md_alloc(DIMS, zpad_dims, CFL_SIZE); #endif // Copy to tmp md_circ_ext(DIMS, zpad_dims, tmp_ext, data->dims, srcl, CFL_SIZE); if (data->randshift) md_circ_shift(DIMS, zpad_dims, shifts, tmp, tmp_ext, CFL_SIZE); // Initialize tmp_mat long mat_dims[2]; basorati_dims(DIMS, mat_dims, blkdims, zpad_dims); complex float* tmp_mat; #ifdef USE_CUDA tmp_mat = (data->use_gpu ? md_alloc_gpu : md_alloc)(2, mat_dims, CFL_SIZE); #else tmp_mat = md_alloc(2, mat_dims, CFL_SIZE); #endif // Reshape image into a blk_size x number of blocks matrix basorati_matrix(DIMS, blkdims, mat_dims, tmp_mat, zpad_dims, zpad_strs, tmp); batch_svthresh(M, N, mat_dims[1], lambda * GWIDTH(M, N, B), tmp_mat, tmp_mat); // for ( int b = 0; b < mat_dims[1]; b++ ) // svthresh(M, N, lambda * GWIDTH(M, N, B), tmp_mat, tmp_mat); basorati_matrixH(DIMS, blkdims, zpad_dims, zpad_strs, tmp, mat_dims, tmp_mat); // Copy to tmp if (data->randshift) md_circ_shift(DIMS, zpad_dims, unshifts, tmp_ext, tmp, CFL_SIZE); md_resize(DIMS, data->dims, dstl, zpad_dims, tmp_ext, CFL_SIZE); // Free data md_free(tmp); md_free(tmp_ext); md_free(tmp_mat); } }