/** * compute set of dimensions to parallelize * */ unsigned int dims_parallel(unsigned int D, unsigned int io, unsigned int N, const long dims[N], long (*strs[D])[N], size_t size[D]) { unsigned int flags = parallelizable(D, io, N, dims, strs, size); unsigned int i = D; unsigned int count = 1; long reps = md_calc_size(N, dims); unsigned int oflags = 0; while ((count < CORES) && (i-- > 0)) { if (MD_IS_SET(flags, i)) { reps /= dims[i]; if (reps < CHUNK) break; oflags = MD_SET(oflags, i); //break; // only 1 } } return oflags; }
static void noir_calc_weights(const long dims[3], complex float* dst) { unsigned int flags = 0; for (int i = 0; i < 3; i++) if (1 != dims[i]) flags = MD_SET(flags, i); klaplace(3, dims, flags, dst); md_zsmul(3, dims, dst, dst, 220.); md_zsadd(3, dims, dst, dst, 1.); md_zspow(3, dims, dst, dst, -16.); // 1 + 222. \Laplace^16 }
int main_bitmask(int argc, char* argv[]) { bool inverse = false; long flags = 0; const struct opt_s opts[] = { { 'b', false, opt_set, &inverse, "\tdimensions from bitmask" }, }; cmdline(&argc, argv, 0, 1000, usage_str, help_str, ARRAY_SIZE(opts), opts); if ((2 != argc) && inverse) error("exactly one argument needed.\n"); if (!inverse) { for (int i = 1; i < argc; i++) { int d = atoi(argv[i]); assert(d >= 0); flags = MD_SET(flags, d); } printf("%ld\n", flags); } else { int i = 0; flags = atoi(argv[1]); while (flags) { if (flags & 1) printf("%d ", i); flags >>= 1; i++; } printf("\n"); } exit(0); }
struct linop_s* maps2_create(const long coilim_dims[DIMS], const long maps_dims[DIMS], const long img_dims[DIMS], const complex float* maps, bool use_gpu) { long max_dims[DIMS]; unsigned int sens_flags = 0; for (unsigned int i = 0; i < DIMS; i++) if (1 != maps_dims[i]) sens_flags = MD_SET(sens_flags, i); assert(1 == coilim_dims[MAPS_DIM]); assert(1 == img_dims[COIL_DIM]); assert(maps_dims[COIL_DIM] == coilim_dims[COIL_DIM]); assert(maps_dims[MAPS_DIM] == img_dims[MAPS_DIM]); for (unsigned int i = 0; i < DIMS; i++) max_dims[i] = MAX(coilim_dims[i], MAX(maps_dims[i], img_dims[i])); struct maps_data* data = maps_create_data(max_dims, sens_flags, maps, use_gpu); return linop_create(DIMS, coilim_dims, DIMS, img_dims, data, maps_apply, maps_apply_adjoint, maps_apply_normal, maps_apply_pinverse, maps_free_data); }
void overlapandsave2NE(int N, unsigned int flags, const long blk[N], const long odims[N], complex float* dst, const long dims1[N], complex float* src1, const long dims2[N], complex float* src2, const long mdims[N], complex float* msk) { long dims1B[N]; long tdims[2 * N]; long nodims[2 * N]; long ndims1[2 * N]; long ndims2[2 * N]; long shift[2 * N]; unsigned int nflags = 0; for (int i = 0; i < N; i++) { if (MD_IS_SET(flags, i)) { nflags = MD_SET(nflags, 2 * i); assert(1 == dims2[i] % 2); assert(0 == blk[i] % 2); assert(0 == dims1[i] % 2); assert(0 == odims[i] % blk[i]); assert(0 == dims1[i] % blk[i]); assert(dims1[i] == odims[i]); assert(dims2[i] <= blk[i]); assert(dims1[i] >= dims2[i]); // blocked output nodims[i * 2 + 1] = odims[i] / blk[i]; nodims[i * 2 + 0] = blk[i]; // expanded temporary storage tdims[i * 2 + 1] = dims1[i] / blk[i]; tdims[i * 2 + 0] = blk[i] + dims2[i] - 1; // blocked input // ---|---,---,---|--- // + +++ + // + +++ + // resized input dims1B[i] = dims1[i] + 2 * blk[i]; ndims1[i * 2 + 1] = dims1[i] / blk[i] + 2; // do we need two full blocks? ndims1[i * 2 + 0] = blk[i]; shift[i * 2 + 1] = 0; shift[i * 2 + 0] = blk[i] - (dims2[i] - 1) / 2; // kernel ndims2[i * 2 + 1] = 1; ndims2[i * 2 + 0] = dims2[i]; } else { nodims[i * 2 + 1] = 1; nodims[i * 2 + 0] = odims[i]; tdims[i * 2 + 1] = 1; tdims[i * 2 + 0] = dims1[i]; ndims1[i * 2 + 1] = 1; ndims1[i * 2 + 0] = dims1[i]; shift[i * 2 + 1] = 0; shift[i * 2 + 0] = 0; dims1B[i] = dims1[i]; ndims2[i * 2 + 1] = 1; ndims2[i * 2 + 0] = dims2[i]; } } complex float* src1B = md_alloc(N, dims1B, CFL_SIZE); complex float* tmp = md_alloc(2 * N, tdims, CFL_SIZE); complex float* tmpX = md_alloc(N, odims, CFL_SIZE); long str1[2 * N]; long str2[2 * N]; md_calc_strides(2 * N, str1, ndims1, sizeof(complex float)); md_calc_strides(2 * N, str2, tdims, sizeof(complex float)); long off = md_calc_offset(2 * N, str1, shift); md_resize_center(N, dims1B, src1B, dims1, src1, sizeof(complex float)); // we can loop here md_copy2(2 * N, tdims, str2, tmp, str1, ((void*)src1B) + off, sizeof(complex float)); conv(2 * N, nflags, CONV_VALID, CONV_SYMMETRIC, nodims, tmpX, tdims, tmp, ndims2, src2); long ostr[N]; long mstr[N]; md_calc_strides(N, ostr, odims, sizeof(complex float)); md_calc_strides(N, mstr, mdims, sizeof(complex float)); md_zmul2(N, odims, ostr, tmpX, ostr, tmpX, mstr, msk); convH(2 * N, nflags, CONV_VALID, CONV_SYMMETRIC, tdims, tmp, nodims, tmpX, ndims2, src2); md_clear(N, dims1B, src1B, sizeof(complex float)); md_zadd2(2 * N, tdims, str1, ((void*)src1B) + off, str1, ((void*)src1B) + off, str2, tmp); // md_resize_center(N, dims1, dst, dims1B, src1B, sizeof(complex float)); md_free(src1B); md_free(tmpX); md_free(tmp); }
void opt_reg_configure(unsigned int N, const long img_dims[N], struct opt_reg_s* ropts, const struct operator_p_s* prox_ops[NUM_REGS], const struct linop_s* trafos[NUM_REGS], unsigned int llr_blk, bool randshift, bool use_gpu) { float lambda = ropts->lambda; if (-1. == lambda) lambda = 0.; // if no penalities specified but regularization // parameter is given, add a l2 penalty struct reg_s* regs = ropts->regs; if ((0 == ropts->r) && (lambda > 0.)) { regs[0].xform = L2IMG; regs[0].xflags = 0u; regs[0].jflags = 0u; regs[0].lambda = lambda; ropts->r = 1; } int nr_penalties = ropts->r; long blkdims[MAX_LEV][DIMS]; int levels; for (int nr = 0; nr < nr_penalties; nr++) { // fix up regularization parameter if (-1. == regs[nr].lambda) regs[nr].lambda = lambda; switch (regs[nr].xform) { case L1WAV: debug_printf(DP_INFO, "l1-wavelet regularization: %f\n", regs[nr].lambda); if (0 != regs[nr].jflags) debug_printf(DP_WARN, "joint l1-wavelet thresholding not currently supported.\n"); long minsize[DIMS] = { [0 ... DIMS - 1] = 1 }; minsize[0] = MIN(img_dims[0], 16); minsize[1] = MIN(img_dims[1], 16); minsize[2] = MIN(img_dims[2], 16); unsigned int wflags = 0; for (unsigned int i = 0; i < DIMS; i++) { if ((1 < img_dims[i]) && MD_IS_SET(regs[nr].xflags, i)) { wflags = MD_SET(wflags, i); minsize[i] = MIN(img_dims[i], 16); } } trafos[nr] = linop_identity_create(DIMS, img_dims); prox_ops[nr] = prox_wavelet3_thresh_create(DIMS, img_dims, wflags, minsize, regs[nr].lambda, randshift); break; case TV: debug_printf(DP_INFO, "TV regularization: %f\n", regs[nr].lambda); trafos[nr] = linop_grad_create(DIMS, img_dims, regs[nr].xflags); prox_ops[nr] = prox_thresh_create(DIMS + 1, linop_codomain(trafos[nr])->dims, regs[nr].lambda, regs[nr].jflags | MD_BIT(DIMS), use_gpu); break; case LLR: debug_printf(DP_INFO, "lowrank regularization: %f\n", regs[nr].lambda); // add locally lowrank penalty levels = llr_blkdims(blkdims, regs[nr].jflags, img_dims, llr_blk); assert(1 == levels); assert(levels == img_dims[LEVEL_DIM]); for(int l = 0; l < levels; l++) #if 0 blkdims[l][MAPS_DIM] = img_dims[MAPS_DIM]; #else blkdims[l][MAPS_DIM] = 1; #endif int remove_mean = 0; trafos[nr] = linop_identity_create(DIMS, img_dims); prox_ops[nr] = lrthresh_create(img_dims, randshift, regs[nr].xflags, (const long (*)[DIMS])blkdims, regs[nr].lambda, false, remove_mean, use_gpu); break; case MLR: #if 0 // FIXME: multiscale low rank changes the output image dimensions // and requires the forward linear operator. This should be decoupled... debug_printf(DP_INFO, "multi-scale lowrank regularization: %f\n", regs[nr].lambda); levels = multilr_blkdims(blkdims, regs[nr].jflags, img_dims, 8, 1); img_dims[LEVEL_DIM] = levels; max_dims[LEVEL_DIM] = levels; for(int l = 0; l < levels; l++) blkdims[l][MAPS_DIM] = 1; trafos[nr] = linop_identity_create(DIMS, img_dims); prox_ops[nr] = lrthresh_create(img_dims, randshift, regs[nr].xflags, (const long (*)[DIMS])blkdims, regs[nr].lambda, false, 0, use_gpu); const struct linop_s* decom_op = sum_create( img_dims, use_gpu ); const struct linop_s* tmp_op = forward_op; forward_op = linop_chain(decom_op, forward_op); linop_free(decom_op); linop_free(tmp_op); #else debug_printf(DP_WARN, "multi-scale lowrank regularization not yet supported: %f\n", regs[nr].lambda); #endif break; case IMAGL1: debug_printf(DP_INFO, "l1 regularization of imaginary part: %f\n", regs[nr].lambda); trafos[nr] = linop_rdiag_create(DIMS, img_dims, 0, &(complex float){ 1.i }); prox_ops[nr] = prox_thresh_create(DIMS, img_dims, regs[nr].lambda, regs[nr].jflags, use_gpu); break; case IMAGL2: debug_printf(DP_INFO, "l2 regularization of imaginary part: %f\n", regs[nr].lambda); trafos[nr] = linop_rdiag_create(DIMS, img_dims, 0, &(complex float){ 1.i }); prox_ops[nr] = prox_leastsquares_create(DIMS, img_dims, regs[nr].lambda, NULL); break; case L1IMG: debug_printf(DP_INFO, "l1 regularization: %f\n", regs[nr].lambda); trafos[nr] = linop_identity_create(DIMS, img_dims); prox_ops[nr] = prox_thresh_create(DIMS, img_dims, regs[nr].lambda, regs[nr].jflags, use_gpu); break; case L2IMG: debug_printf(DP_INFO, "l2 regularization: %f\n", regs[nr].lambda); trafos[nr] = linop_identity_create(DIMS, img_dims); prox_ops[nr] = prox_leastsquares_create(DIMS, img_dims, regs[nr].lambda, NULL); break; case FTL1: debug_printf(DP_INFO, "l1 regularization of Fourier transform: %f\n", regs[nr].lambda); trafos[nr] = linop_fft_create(DIMS, img_dims, regs[nr].xflags); prox_ops[nr] = prox_thresh_create(DIMS, img_dims, regs[nr].lambda, regs[nr].jflags, use_gpu); break; }
int main_bitmask(int argc, char* argv[]) { bool inverse = false; unsigned int flags = 0; int c; while (-1 != (c = getopt(argc, argv, "hb:"))) { switch (c) { case 'h': usage(argv[0], stdout); help(); exit(0); case 'b': flags = atoi(optarg); inverse = true; break; default: usage(argv[0], stderr); exit(1); } } if ((argc - optind < 1) && !inverse) { usage(argv[0], stderr); exit(1); } if (!inverse) { for (int i = optind; i < argc; i++) { int d = atoi(argv[i]); assert(d >= 0); flags = MD_SET(flags, d); } printf("%d\n", flags); } else { int i = 0; while (flags) { if (flags & 1) printf("%d ", i); flags >>= 1; i++; } printf("\n"); } exit(0); }
struct wavelet_plan_s* prepare_wavelet_plan_filters(int numdims, const long imSize[numdims], unsigned int flags, const long minSize[numdims], int use_gpu, int filter_length, const float filter[4][filter_length]) { // Currently only accept flags=3,7 assert( (3 == flags) || (7 == flags) ); assert((use_gpu == 0) || (use_gpu == 1)); struct wavelet_plan_s* plan = (struct wavelet_plan_s*)xmalloc(sizeof(struct wavelet_plan_s)); plan->use_gpu = use_gpu; plan->imSize = (long*)xmalloc(sizeof(long)*numdims); md_singleton_dims(numdims, plan->imSize); // Get imSize, numPixel, numdims_tr // plan->numdims and flags ignores imSize[i]=1 plan->numdims_tr = 0; plan->numPixel = 1; plan->numPixel_tr = 1; plan->batchSize = 1; plan->flags = 0; int i,i_tr; int d = 0; for (i = 0; i < numdims; i++) { assert(imSize[i] > 0); if (1 != imSize[i]) { plan->imSize[d] = imSize[i]; plan->numPixel *= imSize[i]; if (MD_IS_SET(flags, i)) { plan->numdims_tr++; plan->numPixel_tr*=imSize[i]; } else plan->batchSize*=imSize[i]; if (MD_IS_SET(flags, i)) plan->flags = MD_SET(plan->flags, d); d++; } } plan->numdims = d; // Get imSize_tr, trDims (dimensions that we do wavelet transform), minSize_tr plan->imSize_tr = (long*)xmalloc(sizeof(long) * plan->numdims_tr); plan->trDims = (long*)xmalloc(sizeof(long) * plan->numdims_tr); plan->minSize_tr = (long*)xmalloc(sizeof(long) * plan->numdims_tr); i_tr = 0; for (i = 0; i < numdims; i++) { if (MD_IS_SET(flags, i) && (1 != imSize[i])) { plan->imSize_tr[i_tr] = imSize[i]; plan->trDims[i_tr] = i; assert(minSize[i_tr] > 0); plan->minSize_tr[i_tr] = minSize[i]; i_tr++; } } plan->filterLen = filter_length; #ifdef USE_CUDA if (plan->use_gpu) { prepare_wavelet_filters_gpu(plan,plan->filterLen,&(filter[0][0])); create_numLevels(plan); create_wavelet_sizes(plan); plan->state = 1; plan->randShift_tr = (long*)xmalloc(sizeof(long) * plan->numdims_tr); memset(plan->randShift_tr, 0, sizeof(long) * plan->numdims_tr); prepare_wavelet_temp_gpu(plan); } else #endif { plan->lod = filter[0]; plan->hid = filter[1]; plan->lor = filter[2]; plan->hir = filter[3]; create_numLevels(plan); create_wavelet_sizes(plan); plan->state = 1; plan->randShift_tr = (long*)xmalloc(sizeof(long) * plan->numdims_tr); memset(plan->randShift_tr, 0, sizeof(long) * plan->numdims_tr); plan->tmp_mem_tr = (data_t*)xmalloc(sizeof(data_t)*plan->numCoeff_tr*4); } plan->lambda = 1.; return plan; }
void grecon(struct grecon_conf* param, const long dims1[DIMS], complex float* out1, const long cov1_dims[DIMS], complex float* cov1, const long w1_dims[DIMS], const complex float* weights, complex float* kspace1, bool usegpu) { struct sense_conf* conf = param->sense_conf; long ksp1_dims[DIMS]; md_select_dims(DIMS, ~MAPS_FLAG, ksp1_dims, dims1); long pat1_dims[DIMS]; const complex float* pattern; if (NULL == weights) { md_select_dims(DIMS, ~(COIL_FLAG | MAPS_FLAG), pat1_dims, dims1); complex float* tpattern = md_alloc(DIMS, pat1_dims, CFL_SIZE); estimate_pattern(DIMS, ksp1_dims, COIL_DIM, tpattern, kspace1); pattern = tpattern; } else { md_copy_dims(DIMS, pat1_dims, w1_dims); pattern = weights; } complex float* sens1; if (NULL != param->calib) { long img1_dims[DIMS]; md_select_dims(DIMS, ~COIL_FLAG, img1_dims, dims1); complex float* maps1 = md_alloc(DIMS, img1_dims, CFL_SIZE); sens1 = md_alloc(DIMS, dims1, CFL_SIZE); caltwo(param->calib, dims1, sens1, maps1, cov1_dims, cov1, NULL, NULL); crop_sens(dims1, sens1, param->calib->softcrop, param->calib->crop, maps1); fixphase(DIMS, dims1, COIL_DIM, sens1, sens1); md_free(maps1); } else { sens1 = cov1; } if (NOIR == param->algo) { assert(NULL == param->calib); assert(1 == dims1[MAPS_DIM]); sens1 = md_alloc(DIMS, dims1, CFL_SIZE); md_clear(DIMS, dims1, sens1, CFL_SIZE); fftmod(DIMS, ksp1_dims, FFT_FLAGS, kspace1, kspace1); } fftmod(DIMS, dims1, FFT_FLAGS, sens1, sens1); fftmod(DIMS, ksp1_dims, FFT_FLAGS, kspace1, kspace1); complex float* image1 = NULL; long img1_dims[DIMS]; md_select_dims(DIMS, ~COIL_FLAG, img1_dims, dims1); if (param->ksp && (POCS != param->algo)) { image1 = md_alloc(DIMS, img1_dims, CFL_SIZE); md_clear(DIMS, img1_dims, image1, CFL_SIZE); } else { image1 = out1; } #ifdef USE_CUDA int gpun = 0; if (usegpu) { int nr_cuda_devices = MIN(cuda_devices(), MAX_CUDA_DEVICES); gpun = omp_get_thread_num() % nr_cuda_devices; cuda_init(gpun); } #endif const struct operator_p_s* thresh_op = NULL; if (param->l1wav) { long minsize[DIMS] = { [0 ... DIMS - 1] = 1 }; minsize[0] = MIN(img1_dims[0], 16); minsize[1] = MIN(img1_dims[1], 16); minsize[2] = MIN(img1_dims[2], 16); #ifndef W3 thresh_op = prox_wavethresh_create(DIMS, img1_dims, FFT_FLAGS, minsize, param->lambda, param->randshift, usegpu); #else unsigned int wflags = 0; for (unsigned int i = 0; i < 3; i++) if (1 < img1_dims[i]) wflags = MD_SET(wflags, i); thresh_op = prox_wavelet3_thresh_create(DIMS, img1_dims, wflags, minsize, param->lambda, param->randshift); #endif }