void wavelet_dims(unsigned int N, unsigned int flags, long odims[2 * N], const long dims[N], const long flen) { md_copy_dims(N, odims, dims); md_singleton_dims(N, odims + N); wavelet_dims_r(N, N - 1, flags, odims, dims, flen); }
void fwtN(unsigned int N, unsigned int flags, const long shifts[N], const long dims[N], const long ostr[2 * N], complex float* out, const long istr[N], const complex float* in, const long flen, const float filter[2][2][flen]) { long odims[2 * N]; wavelet_dims(N, flags, odims, dims, flen); assert(md_calc_size(2 * N, odims) >= md_calc_size(N, dims)); // FIXME one of these is unnecessary if we use the output complex float* tmpA = md_alloc_sameplace(2 * N, odims, CFL_SIZE, out); complex float* tmpB = md_alloc_sameplace(2 * N, odims, CFL_SIZE, out); long tidims[2 * N]; md_copy_dims(N, tidims, dims); md_singleton_dims(N, tidims + N); long tistrs[2 * N]; md_calc_strides(2 * N, tistrs, tidims, CFL_SIZE); long todims[2 * N]; md_copy_dims(2 * N, todims, tidims); long tostrs[2 * N]; // maybe we should push the randshift into lower levels //md_copy2(N, dims, tistrs, tmpA, istr, in, CFL_SIZE); md_circ_shift2(N, dims, shifts, tistrs, tmpA, istr, in, CFL_SIZE); for (unsigned int i = 0; i < N; i++) { if (MD_IS_SET(flags, i)) { todims[0 + i] = odims[0 + i]; todims[N + i] = odims[N + i]; md_calc_strides(2 * N, tostrs, todims, CFL_SIZE); fwt1(2 * N, i, tidims, tostrs, tmpB, (void*)tmpB + tostrs[N + i], tistrs, tmpA, flen, filter); md_copy_dims(2 * N, tidims, todims); md_copy_dims(2 * N, tistrs, tostrs); complex float* swap = tmpA; tmpA = tmpB; tmpB = swap; } } md_copy2(2 * N, todims, ostr, out, tostrs, tmpA, CFL_SIZE); md_free(tmpA); md_free(tmpB); }
static void dfthresh(unsigned int D, const long dims[D], float lambda, complex float* out, const complex float* in) { long minsize[3]; md_singleton_dims(3, minsize); long coarse_scale[3] = MD_INIT_ARRAY(3, 16); md_min_dims(3, ~0u, minsize, dims, coarse_scale); complex float res[3]; res[0] = 1.; res[1] = 1.; res[2] = 1.; assert(3 == dims[TE_DIM]); const struct operator_p_s* p = prox_dfwavelet_create(dims, minsize, res, TE_DIM, lambda, false); operator_p_apply(p, 1., D, dims, out, D, dims, in); operator_p_free(p); }
/** * Efficiently chain two matrix linops by multiplying the actual matrices together. * Stores a copy of the new matrix. * Returns: C = B A * * @param a first matrix (applied to input) * @param b second matrix (applied to output of first matrix) */ struct linop_s* linop_matrix_chain(const struct linop_s* a, const struct linop_s* b) { const struct operator_matrix_s* a_data = linop_get_data(a); const struct operator_matrix_s* b_data = linop_get_data(b); // check compatibility assert(linop_codomain(a)->N == linop_domain(b)->N); assert(md_calc_size(linop_codomain(a)->N, linop_codomain(a)->dims) == md_calc_size(linop_domain(b)->N, linop_domain(b)->dims)); assert(a_data->K_dim != b_data->T_dim); // FIXME error for now -- need to deal with this specially. assert((a_data->T_dim == b_data->K_dim) && (a_data->T == b_data->K)); unsigned int N = linop_domain(a)->N; long max_dims[N]; md_singleton_dims(N, max_dims); max_dims[a_data->T_dim] = a_data->T; max_dims[a_data->K_dim] = a_data->K; max_dims[b_data->T_dim] = b_data->T; long matrix_dims[N]; long matrix_strs[N]; md_select_dims(N, ~MD_BIT(a_data->T_dim), matrix_dims, max_dims); md_calc_strides(N, matrix_strs, matrix_dims, CFL_SIZE); complex float* matrix = md_alloc_sameplace(N, matrix_dims, CFL_SIZE, a_data->mat); md_clear(N, matrix_dims, matrix, CFL_SIZE); md_zfmac2(N, max_dims, matrix_strs, matrix, a_data->mat_iovec->strs, a_data->mat, b_data->mat_iovec->strs, b_data->mat); struct linop_s* c = linop_matrix_create(N, linop_codomain(b)->dims, linop_domain(a)->dims, matrix_dims, matrix); md_free(matrix); return c; }
// FIXME: consider moving this to a more accessible location? static void wthresh(unsigned int D, const long dims[D], float lambda, unsigned int flags, complex float* out, const complex float* in) { long minsize[D]; md_singleton_dims(D, minsize); long course_scale[3] = MD_INIT_ARRAY(3, 16); md_copy_dims(3, minsize, course_scale); unsigned int wflags = 7; // FIXME for (unsigned int i = 0; i < 3; i++) if (dims[i] < minsize[i]) wflags = MD_CLEAR(wflags, i); long strs[D]; md_calc_strides(D, strs, dims, CFL_SIZE); const struct linop_s* w = linop_wavelet_create(D, wflags, dims, strs, minsize, false); const struct operator_p_s* p = prox_unithresh_create(D, w, lambda, flags); operator_p_apply(p, 1., D, dims, out, D, dims, in); operator_p_free(p); }
/** * Operator interface for a true matrix: * out = mat * in * in: [x x x x 1 x x K x x] * mat: [x x x x T x x K x x] * out: [x x x x T x x 1 x x] * where the x's are arbitrary dimensions and T and K may be transposed * * use this interface if K == 1 or T == 1 * * @param N number of dimensions * @param out_dims output dimensions after applying the matrix (codomain) * @param in_dims input dimensions to apply the matrix (domain) * @param T_dim dimension corresponding to the rows of A * @param K_dim dimension corresponding to the columns of A * @param matrix matrix data */ struct linop_s* linop_matrix_altcreate(unsigned int N, const long out_dims[N], const long in_dims[N], const unsigned int T_dim, const unsigned int K_dim, const complex float* matrix) { long matrix_dims[N]; md_singleton_dims(N, matrix_dims); matrix_dims[K_dim] = in_dims[K_dim]; matrix_dims[T_dim] = out_dims[T_dim]; unsigned int T = out_dims[T_dim]; unsigned int K = in_dims[K_dim]; PTR_ALLOC(long[N], max_dims); for (unsigned int i = 0; i < N; i++) { if ((in_dims[i] > 1) && (out_dims[i] == 1)) { (*max_dims)[i] = in_dims[i]; } else if ((in_dims[i] == 1) && (out_dims[i] > 1)) { (*max_dims)[i] = out_dims[i]; } else { assert(in_dims[i] == out_dims[i]); (*max_dims)[i] = in_dims[i]; } } complex float* mat = md_alloc_sameplace(N, matrix_dims, CFL_SIZE, matrix); complex float* matc = md_alloc_sameplace(N, matrix_dims, CFL_SIZE, matrix); md_copy(N, matrix_dims, mat, matrix, CFL_SIZE); md_zconj(N, matrix_dims, matc, mat); complex float* gram = NULL; const struct iovec_s* gram_iovec = compute_gram_matrix(N, T_dim, T, K_dim, K, &gram, matrix_dims, matrix); PTR_ALLOC(struct operator_matrix_s, data); SET_TYPEID(operator_matrix_s, data); data->mat_iovec = iovec_create(N, matrix_dims, CFL_SIZE); data->mat_gram_iovec = gram_iovec; data->max_dims = *max_dims; data->mat = mat; data->mat_conj = matc; data->mat_gram = gram; data->K_dim = K_dim; data->T_dim = T_dim; data->K = K; data->T = T; data->domain_iovec = iovec_create(N, in_dims, CFL_SIZE); data->codomain_iovec = iovec_create(N, out_dims, CFL_SIZE); return linop_create(N, out_dims, N, in_dims, CAST_UP(PTR_PASS(data)), linop_matrix_apply, linop_matrix_apply_adjoint, linop_matrix_apply_normal, NULL, linop_matrix_del); }
/** * Compute the Gram matrix, A^H A. * Stores the result in @param gram, which is allocated by the function * Returns: iovec_s corresponding to the gram matrix dimensions * * @param N number of dimensions * @param T_dim dimension corresponding to the rows of A * @param T number of rows of A (codomain) * @param K_dim dimension corresponding to the columns of A * @param K number of columns of A (domain) * @param gram store the result (allocated by this function) * @param matrix_dims dimensions of A * @param matrix matrix data */ const struct iovec_s* compute_gram_matrix(unsigned int N, unsigned int T_dim, unsigned int T, unsigned int K_dim, unsigned int K, complex float** gram, const long matrix_dims[N], const complex float* matrix) { // FIXME this can certainly be simplfied... // Just be careful to consider the case where the data passed to the operator is a subset of a bigger array // B_dims = [T K 1] or [K T 1] // C_dims = [T 1 K] or [1 T K] // A_dims = [1 K K] or [K 1 K] // after: gram_dims = [1 K1 K2] --> [K2 K1 1] or [K1 1 K2] --> [K1 K2 1] long A_dims[N + 1]; long B_dims[N + 1]; long C_dims[N + 1]; long fake_gram_dims[N + 1]; long A_str[N + 1]; long B_str[N + 1]; long C_str[N + 1]; long max_dims[N + 1]; md_singleton_dims(N + 1, A_dims); md_singleton_dims(N + 1, B_dims); md_singleton_dims(N + 1, C_dims); md_singleton_dims(N + 1, fake_gram_dims); md_singleton_dims(N + 1, max_dims); A_dims[K_dim] = K; A_dims[N] = K; B_dims[T_dim] = T; B_dims[K_dim] = K; C_dims[T_dim] = T; C_dims[N] = K; max_dims[T_dim] = T; max_dims[K_dim] = K; max_dims[N] = K; fake_gram_dims[T_dim] = K; fake_gram_dims[K_dim] = K; md_calc_strides(N + 1, A_str, A_dims, CFL_SIZE); md_calc_strides(N + 1, B_str, B_dims, CFL_SIZE); md_calc_strides(N + 1, C_str, C_dims, CFL_SIZE); complex float* tmpA = md_alloc_sameplace(N + 1 , A_dims, CFL_SIZE, matrix); complex float* tmpB = md_alloc_sameplace(N + 1, B_dims, CFL_SIZE, matrix); complex float* tmpC = md_alloc_sameplace(N + 1, C_dims, CFL_SIZE, matrix); md_copy(N, matrix_dims, tmpB, matrix, CFL_SIZE); //md_copy(N, matrix_dims, tmpC, matrix, CFL_SIZE); md_transpose(N + 1, K_dim, N, C_dims, tmpC, B_dims, tmpB, CFL_SIZE); md_clear(N + 1, A_dims, tmpA, CFL_SIZE); md_zfmacc2(N + 1, max_dims, A_str, tmpA, B_str, tmpB, C_str, tmpC); *gram = md_alloc_sameplace(N, fake_gram_dims, CFL_SIZE, matrix); md_transpose(N + 1, T_dim, N, fake_gram_dims, *gram, A_dims, tmpA, CFL_SIZE); const struct iovec_s* s = iovec_create(N, fake_gram_dims, CFL_SIZE); md_free(tmpA); md_free(tmpB); md_free(tmpC); return s; }
int ismrm_read(const char* datafile, long dims[DIMS], _Complex float* buf) { ISMRMRD_Dataset d; ismrmrd_init_dataset(&d, datafile, "/dataset"); ismrmrd_open_dataset(&d, false); assert(DIMS > 5); unsigned int number_of_acquisitions = ismrmrd_get_number_of_acquisitions(&d); long pos[DIMS]; long channels = -1; long slices = 0; long samples = -1; for (unsigned int i = 0; i < DIMS; i++) pos[i] = 0; long strs[DIMS]; long adc_dims[DIMS]; long adc_strs[DIMS]; if (NULL == buf) { md_singleton_dims(DIMS, dims); } else { md_calc_strides(DIMS, strs, dims, CFL_SIZE); md_select_dims(DIMS, READ_FLAG|COIL_FLAG, adc_dims, dims); md_calc_strides(DIMS, adc_strs, adc_dims, CFL_SIZE); } ISMRMRD_Acquisition acq; for (unsigned int i = 0; i < number_of_acquisitions; i++) { ismrmrd_init_acquisition(&acq); ismrmrd_read_acquisition(&d, i, &acq); if (acq.head.flags & (1 << (ISMRMRD_ACQ_IS_NOISE_MEASUREMENT - 1))) continue; if (-1 == channels) { channels = acq.head.available_channels; samples = acq.head.number_of_samples; } pos[1] = acq.head.idx.kspace_encode_step_1; pos[2] = acq.head.idx.kspace_encode_step_2; pos[4] = slices; // acq.head.idx.slice; if (buf != NULL) { assert(pos[1] < dims[1]); assert(pos[2] < dims[2]); assert(pos[4] < dims[4]); assert(dims[0] == acq.head.number_of_samples); assert(dims[3] == acq.head.active_channels); assert(dims[3] == acq.head.available_channels); debug_printf(DP_DEBUG3, ":/%ld %ld/%ld %ld/%ld :/%ld %ld/%ld %d\n", dims[0], pos[1], dims[1], pos[2], dims[2], dims[3], pos[4], dims[4], number_of_acquisitions); md_copy_block2(DIMS, pos, dims, strs, buf, adc_dims, adc_strs, acq.data, CFL_SIZE); } else { dims[1] = MAX(dims[1], pos[1] + 1); dims[2] = MAX(dims[2], pos[2] + 1); } if (acq.head.flags & (1 << (ISMRMRD_ACQ_LAST_IN_SLICE - 1))) slices++; // ismrmrd_free_acquisition(&acq); } if (NULL == buf) { dims[0] = samples; dims[3] = channels; dims[4] = slices; } else { assert(dims[3] == channels); assert(dims[4] == slices); } // printf("Done.\n"); return 0; }
int main_nufft(int argc, char* argv[]) { int c; bool adjoint = false; bool inverse = false; bool toeplitz = false; bool precond = false; bool use_gpu = false; bool two = false; bool calib = false; bool sizeinit = false; bool stoch = false; long coilim_dims[DIMS]; md_singleton_dims(DIMS, coilim_dims); int maxiter = 50; float lambda = 0.00; const char* pat_str = NULL; while (-1 != (c = getopt(argc, argv, "d:m:l:p:aihCto:w:2:c:S"))) { switch (c) { case '2': two = true; break; case 'i': inverse = true; break; case 'a': adjoint = true; break; case 'C': precond = true; break; case 'S': stoch = true; break; case 'c': calib = true; inverse = true; case 'd': sscanf(optarg, "%ld:%ld:%ld", &coilim_dims[0], &coilim_dims[1], &coilim_dims[2]); sizeinit = true; break; case 'm': maxiter = atoi(optarg); break; case 'p': pat_str = strdup(optarg); break; case 'l': lambda = atof(optarg); break; case 't': toeplitz = true; break; case 'h': usage(argv[0], stdout); help(); exit(0); default: usage(argv[0], stderr); exit(1); } } if (argc - optind != 3) { usage(argv[0], stderr); exit(1); } // Read trajectory long traj_dims[2]; complex float* traj = load_cfl(argv[optind + 0], 2, traj_dims); assert(3 == traj_dims[0]); if (!sizeinit) estimate_im_dims(coilim_dims, traj_dims, traj); num_init(); // Load pattern / density compensation (if any) complex float* pat = NULL; long pat_dims[2]; if (pat_str) { pat = load_cfl(pat_str, 2, pat_dims); assert(pat_dims[0] == 1); assert(pat_dims[1] == traj_dims[1]); } if (inverse || adjoint) { long ksp_dims[DIMS]; const complex float* ksp = load_cfl(argv[optind + 1], DIMS, ksp_dims); coilim_dims[COIL_DIM] = ksp_dims[COIL_DIM]; long out_dims[DIMS]; if (calib) { md_singleton_dims(DIMS, out_dims); estimate_im_dims(out_dims, traj_dims, traj); out_dims[COIL_DIM] = ksp_dims[COIL_DIM]; } else { md_copy_dims(DIMS, out_dims, coilim_dims); } complex float* out = create_cfl(argv[optind + 2], DIMS, out_dims); complex float* img = out; if (calib) img = md_alloc(DIMS, coilim_dims, CFL_SIZE); md_clear(DIMS, coilim_dims, img, CFL_SIZE); struct iter_conjgrad_conf cgconf = iter_conjgrad_defaults; cgconf.maxiter = maxiter; cgconf.l2lambda = 0.; cgconf.tol = 0; const struct linop_s* nufft_op; // Get nufft_op if (two) #ifdef BERKELEY_SVN nufft_op = nufft2_create(ksp_dims, coilim_dims, traj, pat, toeplitz, precond, &cgconf, use_gpu); #else assert(!two); #endif else
/** * * NUFFT operator initialization * * @param N - number of dimensions * @param ksp_dims - kspace dimension * @param cim_dims - coil images dimension * @param traj - trajectory * @param conf - configuration options * @param use_gpu - use gpu boolean * */ struct linop_s* nufft_create(unsigned int N, const long ksp_dims[N], const long cim_dims[N], const long traj_dims[N], const complex float* traj, const complex float* weights, struct nufft_conf_s conf, bool use_gpu) { struct nufft_data* data = (struct nufft_data*)xmalloc(sizeof(struct nufft_data)); data->N = N; data->use_gpu = use_gpu; data->traj = traj; data->conf = conf; data->width = 3.; data->beta = calc_beta(2., data->width); // get dims assert(md_check_compat(N - 3, 0, ksp_dims + 3, cim_dims + 3)); unsigned int ND = N + 3; data->ksp_dims = xmalloc(ND * sizeof(long)); data->cim_dims = xmalloc(ND * sizeof(long)); data->cml_dims = xmalloc(ND * sizeof(long)); data->img_dims = xmalloc(ND * sizeof(long)); data->trj_dims = xmalloc(ND * sizeof(long)); data->lph_dims = xmalloc(ND * sizeof(long)); data->psf_dims = xmalloc(ND * sizeof(long)); data->wgh_dims = xmalloc(ND * sizeof(long)); data->ksp_strs = xmalloc(ND * sizeof(long)); data->cim_strs = xmalloc(ND * sizeof(long)); data->cml_strs = xmalloc(ND * sizeof(long)); data->img_strs = xmalloc(ND * sizeof(long)); data->trj_strs = xmalloc(ND * sizeof(long)); data->lph_strs = xmalloc(ND * sizeof(long)); data->psf_strs = xmalloc(ND * sizeof(long)); data->wgh_strs = xmalloc(ND * sizeof(long)); md_singleton_dims(ND, data->cim_dims); md_singleton_dims(ND, data->ksp_dims); md_copy_dims(N, data->cim_dims, cim_dims); md_copy_dims(N, data->ksp_dims, ksp_dims); md_select_dims(ND, FFT_FLAGS, data->img_dims, data->cim_dims); assert(3 == traj_dims[0]); assert(traj_dims[1] == ksp_dims[1]); assert(traj_dims[2] == ksp_dims[2]); assert(md_check_compat(N - 3, ~0, traj_dims + 3, ksp_dims + 3)); assert(md_check_bounds(N - 3, ~0, traj_dims + 3, ksp_dims + 3)); md_singleton_dims(ND, data->trj_dims); md_copy_dims(N, data->trj_dims, traj_dims); // get strides md_calc_strides(ND, data->cim_strs, data->cim_dims, CFL_SIZE); md_calc_strides(ND, data->img_strs, data->img_dims, CFL_SIZE); md_calc_strides(ND, data->trj_strs, data->trj_dims, CFL_SIZE); md_calc_strides(ND, data->ksp_strs, data->ksp_dims, CFL_SIZE); data->weights = NULL; if (NULL != weights) { md_singleton_dims(ND, data->wgh_dims); md_select_dims(N, ~MD_BIT(0), data->wgh_dims, data->trj_dims); md_calc_strides(ND, data->wgh_strs, data->wgh_dims, CFL_SIZE); complex float* tmp = md_alloc(ND, data->wgh_dims, CFL_SIZE); md_copy(ND, data->wgh_dims, tmp, weights, CFL_SIZE); data->weights = tmp; } complex float* roll = md_alloc(ND, data->img_dims, CFL_SIZE); rolloff_correction(2., data->width, data->beta, data->img_dims, roll); data->roll = roll; complex float* linphase = compute_linphases(N, data->lph_dims, data->img_dims); md_calc_strides(ND, data->lph_strs, data->lph_dims, CFL_SIZE); if (!conf.toeplitz) md_zmul2(ND, data->lph_dims, data->lph_strs, linphase, data->lph_strs, linphase, data->img_strs, data->roll); fftmod(ND, data->lph_dims, FFT_FLAGS, linphase, linphase); fftscale(ND, data->lph_dims, FFT_FLAGS, linphase, linphase); // md_zsmul(ND, data->lph_dims, linphase, linphase, 1. / (float)(data->trj_dims[1] * data->trj_dims[2])); complex float* fftm = md_alloc(ND, data->img_dims, CFL_SIZE); md_zfill(ND, data->img_dims, fftm, 1.); fftmod(ND, data->img_dims, FFT_FLAGS, fftm, fftm); data->fftmod = fftm; data->linphase = linphase; data->psf = NULL; if (conf.toeplitz) { #if 0 md_copy_dims(ND, data->psf_dims, data->lph_dims); #else md_copy_dims(3, data->psf_dims, data->lph_dims); md_copy_dims(ND - 3, data->psf_dims + 3, data->trj_dims + 3); data->psf_dims[N] = data->lph_dims[N]; #endif md_calc_strides(ND, data->psf_strs, data->psf_dims, CFL_SIZE); data->psf = compute_psf2(N, data->psf_dims, data->trj_dims, data->traj, data->weights); } md_copy_dims(ND, data->cml_dims, data->cim_dims); data->cml_dims[N + 0] = data->lph_dims[N + 0]; md_calc_strides(ND, data->cml_strs, data->cml_dims, CFL_SIZE); data->cm2_dims = xmalloc(ND * sizeof(long)); // ! md_copy_dims(ND, data->cm2_dims, data->cim_dims); for (int i = 0; i < 3; i++) data->cm2_dims[i] = (1 == cim_dims[i]) ? 1 : (2 * cim_dims[i]); data->grid = md_alloc(ND, data->cml_dims, CFL_SIZE); data->fft_op = linop_fft_create(ND, data->cml_dims, FFT_FLAGS, use_gpu); return linop_create(N, ksp_dims, N, cim_dims, data, nufft_apply, nufft_apply_adjoint, nufft_apply_normal, NULL, nufft_free_data); }
/* * ADMM (ADMM-2 from Afonso et al.) * * Solves min_x 0.5 || y - Ax ||_2^2 + sum_i f_i(G_i x), where the f_i are * arbitrary convex functions. If Aop is NULL, solves min_x sum_i f_i(G_i x) * * Each iteration requires solving the proximal of f_i, as well as applying * G_i, G_i^H, and G_i^H G_i, all which must be provided in admm_plan_s. */ void admm(struct admm_history_s* history, const struct admm_plan_s* plan, unsigned int D, const long z_dims[D], long N, float* x, const float* x_adj, const struct vec_iter_s* vops, void (*Aop)(void* _data, float* _dst, const float* _src), void* Aop_data, void* obj_eval_data, float (*obj_eval)(const void*, const float*)) { bool fast = plan->fast; double ABSTOL = plan->ABSTOL; double RELTOL = plan->RELTOL; float tau = plan->tau; float mu = plan->mu; unsigned int num_funs = D; long pos = 0; long fake_strs[num_funs]; md_singleton_dims(num_funs, fake_strs); long M = md_calc_offset(num_funs, fake_strs, z_dims); // allocate memory for history history->r_norm = *TYPE_ALLOC(double[plan->maxiter]); history->s_norm = *TYPE_ALLOC(double[plan->maxiter]); history->eps_pri = *TYPE_ALLOC(double[plan->maxiter]); history->eps_dual = *TYPE_ALLOC(double[plan->maxiter]); history->objective = *TYPE_ALLOC(double[plan->maxiter]); history->rho = *TYPE_ALLOC(float[plan->maxiter]); history->relMSE = *TYPE_ALLOC(double[plan->maxiter]); long Mjmax = 0; for(unsigned int i = 0; i < num_funs; i++) Mjmax = MAX(Mjmax, z_dims[i]); struct iter_history_s cghistory; cghistory.numiter = 0; cghistory.relMSE = *TYPE_ALLOC(double[plan->maxitercg]); cghistory.objective = *TYPE_ALLOC(double[plan->maxitercg]); cghistory.resid = *TYPE_ALLOC(double[plan->maxitercg]); // allocate memory for all of our auxiliary variables float* z = vops->allocate(M); float* u = vops->allocate(M); float* rhs = vops->allocate(N); float* r = vops->allocate(M); float* s = vops->allocate(N); float* Gjx_plus_uj = vops->allocate(Mjmax); float* GH_usum = NULL; float* zj_old = NULL; if (!fast) { GH_usum = vops->allocate(N); zj_old = vops->allocate(Mjmax); } float* x_err = NULL; if (NULL != plan->image_truth) x_err = vops->allocate(N); if (!fast) { if (NULL != plan->image_truth) debug_printf(DP_DEBUG3, "%3s\t%3s\t%10s\t%10s\t%10s\t%10s\t%10s\t%10s\t%10s\n", "iter", "cgiter", "rho", "r norm", "eps pri", "s norm", "eps dual", "obj", "relMSE"); else debug_printf(DP_DEBUG3, "%3s\t%3s\t%10s\t%10s\t%10s\t%10s\t%10s\t%10s\n", "iter", "cgiter", "rho", "r norm", "eps pri", "s norm", "eps dual", "obj"); } float rho = plan->rho; struct admm_normaleq_data ndata; ndata.N = N; ndata.num_funs = num_funs; ndata.ops = plan->ops; ndata.Aop = Aop; ndata.Aop_data = Aop_data; ndata.rho = 1.; ndata.tmp = vops->allocate(N); struct cg_data_s* cgdata = (struct cg_data_s*) cg_data_init(N, vops); // hogwild int hw_K = 1; int hw_k = 0; unsigned int grad_iter = 0; // keep track of number of gradient evaluations if (plan->do_warmstart) { for (unsigned int j = 0; j < num_funs; j++) { // initialize for j'th function update pos = md_calc_offset(j, fake_strs, z_dims); long Mj = z_dims[j]; plan->ops[j].forward(plan->ops[j].data, Gjx_plus_uj, x); // Gj(x) if (0 == rho) vops->copy(Mj, z + pos, Gjx_plus_uj); else plan->prox_ops[j].prox_fun(plan->prox_ops[j].data, 1. / rho, z + pos, Gjx_plus_uj); vops->sub(Mj, u + pos, Gjx_plus_uj, z + pos); } } else { vops->clear(M, z); vops->clear(M, u); } for (unsigned int i = 0; i < plan->maxiter; i++) { // update x vops->clear(N, rhs); vops->sub(M, r, z, u); for (unsigned int j = 0; j < num_funs; j++) { pos = md_calc_offset(j, fake_strs, z_dims); plan->ops[j].adjoint(plan->ops[j].data, s, r + pos); vops->add(N, rhs, rhs, s); } if ((NULL != Aop) && (NULL != Aop_data)) { vops->xpay(N, rho, rhs, x_adj); ndata.rho = rho; } // x update: use plan->xupdate_fun if specified. use conjgrad otherwise if ((NULL != plan->xupdate_fun) && (NULL != plan->xupdate_data)) { plan->xupdate_fun(plan->xupdate_data, rho, x, rhs); grad_iter++; } else { float eps = vops->norm(N, rhs); if (eps > 0.) { conjgrad_hist_prealloc(&cghistory, plan->maxitercg, 0., 1.E-3 * eps, N, &ndata, cgdata, vops, admm_normaleq, x, rhs, plan->image_truth, obj_eval_data, obj_eval); //conjgrad_hist(&cghistory, plan->maxitercg, 0., 1.E-3 * eps, N, &ndata, vops, admm_normaleq, x, rhs, plan->image_truth, obj_eval_data, obj_eval); } else { cghistory.numiter = 0; cghistory.relMSE[0] = 0.; cghistory.objective[0] = 0.; cghistory.resid[0] = 0.; } grad_iter += cghistory.numiter; } if ((NULL != obj_eval) && (NULL != obj_eval_data)) history->objective[i] = obj_eval(obj_eval_data, x); else history->objective[i] = 0.; double n1 = 0.; if (!fast) { vops->clear(N, GH_usum); vops->clear(N, s); vops->clear(M, r); } // z_j prox for (unsigned int j = 0; j < num_funs; j++) { // initialize for j'th function update pos = md_calc_offset(j, fake_strs, z_dims); long Mj = z_dims[j]; plan->ops[j].forward(plan->ops[j].data, Gjx_plus_uj, x); // Gj(x) // over-relaxation: Gjx_hat = alpha * Gj(x) + (1 - alpha) * zj_old if (!fast) { vops->copy(Mj, zj_old, z + pos); vops->copy(Mj, r + pos, Gjx_plus_uj); // rj = Gj(x) n1 = n1 + vops->dot(Mj, r + pos, r + pos); vops->smul(Mj, plan->alpha, Gjx_plus_uj, Gjx_plus_uj); vops->axpy(Mj, Gjx_plus_uj, (1. - plan->alpha), z + pos); } vops->add(Mj, Gjx_plus_uj, Gjx_plus_uj, u + pos); // Gj(x) + uj if (0 == rho) vops->copy(Mj, z + pos, Gjx_plus_uj); else plan->prox_ops[j].prox_fun(plan->prox_ops[j].data, 1. / rho, z + pos, Gjx_plus_uj); vops->sub(Mj, u + pos, Gjx_plus_uj, z + pos); if (!fast) { // rj = rj - zj = Gj(x) - zj vops->sub(Mj, r + pos, r + pos, z + pos); // add next term to s: s = s + Gj^H (zj - zj_old) vops->sub(Mj, zj_old, z + pos, zj_old); plan->ops[j].adjoint(plan->ops[j].data, rhs, zj_old); vops->add(N, s, s, rhs); // GH_usum += G_j^H uj (for updating eps_dual) plan->ops[j].adjoint(plan->ops[j].data, rhs, u + pos); vops->add(N, GH_usum, GH_usum, rhs); } } history->rho[i] = rho; if (!fast) { history->s_norm[i] = rho * vops->norm(N, s); history->r_norm[i] = vops->norm(M, r); n1 = sqrt(n1); double n2 = vops->norm(M, z); history->eps_pri[i] = ABSTOL * sqrt(M) + RELTOL * (n1 > n2 ? n1 : n2); history->eps_dual[i] = ABSTOL * sqrt(N) + RELTOL * rho * vops->norm(N, GH_usum); if (NULL != plan->image_truth) { vops->sub(N, x_err, x, plan->image_truth); history->relMSE[i] = vops->norm(N, x_err) / vops->norm(N, plan->image_truth); } if (NULL != plan->image_truth) debug_printf(DP_DEBUG3, "%3d\t%3d\t%10.4f\t%10.4f\t%10.4f\t%10.4f\t%10.4f\t%10.4f\t%10.4f\n", i, grad_iter, history->rho[i], history->r_norm[i], history->eps_pri[i], history->s_norm[i], history->eps_dual[i], history->objective[i], history->relMSE[i]); else debug_printf(DP_DEBUG3, "%3d\t%3d\t%10.4f\t%10.4f\t%10.4f\t%10.4f\t%10.5f\t%10.4f\n", i, grad_iter, history->rho[i], history->r_norm[i], history->eps_pri[i], history->s_norm[i], history->eps_dual[i], history->objective[i]); if ((grad_iter > plan->maxiter) || ((history->r_norm[i] < history->eps_pri[i]) && (history->s_norm[i] < history->eps_dual[i]))) { history->numiter = i; break; } if (plan->dynamic_rho) { if (history->r_norm[i] > mu * history->s_norm[i]) { rho = rho * tau; vops->smul(M, 1. / tau, u, u); } else if (history->s_norm[i] > mu * history->r_norm[i]) { rho = rho / tau; vops->smul(M, tau, u, u); } } } else { debug_printf(DP_DEBUG3, "### ITER: %d (%d)\n", i, grad_iter); if (grad_iter > plan->maxiter) break; } if (plan->hogwild) { hw_k++; if (hw_k == hw_K) { hw_k = 0; rho *= 2.; hw_K *= 2; vops->smul(M, 0.5, u, u); } } } // cleanup vops->del(z); vops->del(u); vops->del(rhs); vops->del(Gjx_plus_uj); vops->del(r); vops->del(s); if (!fast) { vops->del(GH_usum); vops->del(zj_old); } if (NULL != plan->image_truth) vops->del(x_err); vops->del(ndata.tmp); cg_data_free(cgdata, vops); free(cghistory.resid); free(cghistory.objective); free(cghistory.relMSE); free(history->r_norm); free(history->s_norm); free(history->eps_pri); free(history->eps_dual); free(history->objective); free(history->rho); }
/** * Efficiently chain two matrix linops by multiplying the actual matrices together. * Stores a copy of the new matrix. * Returns: C = B A * * @param a first matrix (applied to input) * @param b second matrix (applied to output of first matrix) */ struct linop_s* linop_matrix_chain(const struct linop_s* a, const struct linop_s* b) { const struct operator_matrix_s* a_data = CAST_DOWN(operator_matrix_s, linop_get_data(a)); const struct operator_matrix_s* b_data = CAST_DOWN(operator_matrix_s, linop_get_data(b)); // check compatibility assert(linop_codomain(a)->N == linop_domain(b)->N); assert(md_check_compat(linop_codomain(a)->N, 0u, linop_codomain(a)->dims, linop_domain(b)->dims)); unsigned int D = linop_domain(a)->N; unsigned long outB_flags = md_nontriv_dims(D, linop_codomain(b)->dims); unsigned long inB_flags = md_nontriv_dims(D, linop_domain(b)->dims); unsigned long delB_flags = inB_flags & ~outB_flags; unsigned int N = a_data->N; assert(N == 2 * D); long in_dims[N]; md_copy_dims(N, in_dims, a_data->in_dims); long matA_dims[N]; md_copy_dims(N, matA_dims, a_data->mat_dims); long matB_dims[N]; md_copy_dims(N, matB_dims, b_data->mat_dims); long out_dims[N]; md_copy_dims(N, out_dims, b_data->out_dims); for (unsigned int i = 0; i < D; i++) { if (MD_IS_SET(delB_flags, i)) { matA_dims[2 * i + 0] = a_data->mat_dims[2 * i + 1]; matA_dims[2 * i + 1] = a_data->mat_dims[2 * i + 0]; in_dims[2 * i + 0] = a_data->in_dims[2 * i + 1]; in_dims[2 * i + 1] = a_data->in_dims[2 * i + 0]; } } long matrix_dims[N]; md_singleton_dims(N, matrix_dims); unsigned long iflags = md_nontriv_dims(N, in_dims); unsigned long oflags = md_nontriv_dims(N, out_dims); unsigned long flags = iflags | oflags; // we combine a and b and sum over dims not in input or output md_max_dims(N, flags, matrix_dims, matA_dims, matB_dims); debug_printf(DP_DEBUG1, "tensor chain: %ld x %ld -> %ld\n", md_calc_size(N, matA_dims), md_calc_size(N, matB_dims), md_calc_size(N, matrix_dims)); complex float* matrix = md_alloc(N, matrix_dims, CFL_SIZE); debug_print_dims(DP_DEBUG2, N, matrix_dims); debug_print_dims(DP_DEBUG2, N, in_dims); debug_print_dims(DP_DEBUG2, N, matA_dims); debug_print_dims(DP_DEBUG2, N, matB_dims); debug_print_dims(DP_DEBUG2, N, out_dims); md_ztenmul(N, matrix_dims, matrix, matA_dims, a_data->mat, matB_dims, b_data->mat); // priv2 takes our doubled dimensions struct operator_matrix_s* data = linop_matrix_priv2(N, out_dims, in_dims, matrix_dims, matrix); /* although we internally use different dimensions we define the * correct interface */ struct linop_s* c = linop_create(linop_codomain(b)->N, linop_codomain(b)->dims, linop_domain(a)->N, linop_domain(a)->dims, CAST_UP(data), linop_matrix_apply, linop_matrix_apply_adjoint, linop_matrix_apply_normal, NULL, linop_matrix_del); md_free(matrix); return c; }
int main_twixread(int argc, char* argv[argc]) { long adcs = 0; bool autoc = false; bool linectr = false; bool partctr = false; long dims[DIMS]; md_singleton_dims(DIMS, dims); struct opt_s opts[] = { OPT_LONG('x', &(dims[READ_DIM]), "X", "number of samples (read-out)"), OPT_LONG('y', &(dims[PHS1_DIM]), "Y", "phase encoding steps"), OPT_LONG('z', &(dims[PHS2_DIM]), "Z", "partition encoding steps"), OPT_LONG('s', &(dims[SLICE_DIM]), "S", "number of slices"), OPT_LONG('v', &(dims[AVG_DIM]), "V", "number of averages"), OPT_LONG('c', &(dims[COIL_DIM]), "C", "number of channels"), OPT_LONG('n', &(dims[TIME_DIM]), "N", "number of repetitions"), OPT_LONG('a', &adcs, "A", "total number of ADCs"), OPT_SET('A', &autoc, "automatic [guess dimensions]"), OPT_SET('L', &linectr, "use linectr offset"), OPT_SET('P', &partctr, "use partctr offset"), }; cmdline(&argc, argv, 2, 2, usage_str, help_str, ARRAY_SIZE(opts), opts); if (0 == adcs) adcs = dims[PHS1_DIM] * dims[PHS2_DIM] * dims[SLICE_DIM] * dims[TIME_DIM]; debug_print_dims(DP_DEBUG1, DIMS, dims); int ifd; if (-1 == (ifd = open(argv[1], O_RDONLY))) error("error opening file."); struct hdr_s hdr; bool vd = siemens_meas_setup(ifd, &hdr); long off[DIMS] = { 0 }; if (autoc) { long max[DIMS] = { [COIL_DIM] = 1000 }; long min[DIMS] = { 0 }; // min is always 0 adcs = 0; while (true) { if (-1 == siemens_bounds(vd, ifd, min, max)) break; debug_print_dims(DP_DEBUG3, DIMS, max); adcs++; } for (unsigned int i = 0; i < DIMS; i++) { off[i] = -min[i]; dims[i] = max[i] + off[i]; } debug_printf(DP_DEBUG2, "Dimensions: "); debug_print_dims(DP_DEBUG2, DIMS, dims); debug_printf(DP_DEBUG2, "Offset: "); debug_print_dims(DP_DEBUG2, DIMS, off); siemens_meas_setup(ifd, &hdr); // reset } complex float* out = create_cfl(argv[2], DIMS, dims); md_clear(DIMS, dims, out, CFL_SIZE); long adc_dims[DIMS]; md_select_dims(DIMS, READ_FLAG|COIL_FLAG, adc_dims, dims); void* buf = md_alloc(DIMS, adc_dims, CFL_SIZE); while (adcs--) { long pos[DIMS] = { [0 ... DIMS - 1] = 0 }; if (-1 == siemens_adc_read(vd, ifd, linectr, partctr, dims, pos, buf)) { debug_printf(DP_WARN, "Stopping.\n"); break; } for (unsigned int i = 0; i < DIMS; i++) pos[i] += off[i]; debug_print_dims(DP_DEBUG1, DIMS, pos); if (!md_is_index(DIMS, pos, dims)) { debug_printf(DP_WARN, "Index out of bounds.\n"); continue; } md_copy_block(DIMS, pos, dims, out, adc_dims, buf, CFL_SIZE); } md_free(buf); unmap_cfl(DIMS, dims, out); exit(0); }
struct wavelet_plan_s* prepare_wavelet_plan_filters(int numdims, const long imSize[numdims], unsigned int flags, const long minSize[numdims], int use_gpu, int filter_length, const float filter[4][filter_length]) { // Currently only accept flags=3,7 assert( (3 == flags) || (7 == flags) ); assert((use_gpu == 0) || (use_gpu == 1)); struct wavelet_plan_s* plan = (struct wavelet_plan_s*)xmalloc(sizeof(struct wavelet_plan_s)); plan->use_gpu = use_gpu; plan->imSize = (long*)xmalloc(sizeof(long)*numdims); md_singleton_dims(numdims, plan->imSize); // Get imSize, numPixel, numdims_tr // plan->numdims and flags ignores imSize[i]=1 plan->numdims_tr = 0; plan->numPixel = 1; plan->numPixel_tr = 1; plan->batchSize = 1; plan->flags = 0; int i,i_tr; int d = 0; for (i = 0; i < numdims; i++) { assert(imSize[i] > 0); if (1 != imSize[i]) { plan->imSize[d] = imSize[i]; plan->numPixel *= imSize[i]; if (MD_IS_SET(flags, i)) { plan->numdims_tr++; plan->numPixel_tr*=imSize[i]; } else plan->batchSize*=imSize[i]; if (MD_IS_SET(flags, i)) plan->flags = MD_SET(plan->flags, d); d++; } } plan->numdims = d; // Get imSize_tr, trDims (dimensions that we do wavelet transform), minSize_tr plan->imSize_tr = (long*)xmalloc(sizeof(long) * plan->numdims_tr); plan->trDims = (long*)xmalloc(sizeof(long) * plan->numdims_tr); plan->minSize_tr = (long*)xmalloc(sizeof(long) * plan->numdims_tr); i_tr = 0; for (i = 0; i < numdims; i++) { if (MD_IS_SET(flags, i) && (1 != imSize[i])) { plan->imSize_tr[i_tr] = imSize[i]; plan->trDims[i_tr] = i; assert(minSize[i_tr] > 0); plan->minSize_tr[i_tr] = minSize[i]; i_tr++; } } plan->filterLen = filter_length; #ifdef USE_CUDA if (plan->use_gpu) { prepare_wavelet_filters_gpu(plan,plan->filterLen,&(filter[0][0])); create_numLevels(plan); create_wavelet_sizes(plan); plan->state = 1; plan->randShift_tr = (long*)xmalloc(sizeof(long) * plan->numdims_tr); memset(plan->randShift_tr, 0, sizeof(long) * plan->numdims_tr); prepare_wavelet_temp_gpu(plan); } else #endif { plan->lod = filter[0]; plan->hid = filter[1]; plan->lor = filter[2]; plan->hir = filter[3]; create_numLevels(plan); create_wavelet_sizes(plan); plan->state = 1; plan->randShift_tr = (long*)xmalloc(sizeof(long) * plan->numdims_tr); memset(plan->randShift_tr, 0, sizeof(long) * plan->numdims_tr); plan->tmp_mem_tr = (data_t*)xmalloc(sizeof(data_t)*plan->numCoeff_tr*4); } plan->lambda = 1.; return plan; }
int main_twixread(int argc, char* argv[argc]) { int c; long adcs = 0; bool autoc = false; bool linectr = false; bool partctr = false; long dims[DIMS]; md_singleton_dims(DIMS, dims); while (-1 != (c = getopt(argc, argv, "x:y:z:s:c:a:n:PLAh"))) { switch (c) { case 'x': dims[READ_DIM] = atoi(optarg); break; case 'y': dims[PHS1_DIM] = atoi(optarg); break; case 'z': dims[PHS2_DIM] = atoi(optarg); break; case 's': dims[SLICE_DIM] = atoi(optarg); break; case 'v': dims[AVG_DIM] = atoi(optarg); break; case 'n': dims[TIME_DIM] = atoi(optarg); break; case 'a': adcs = atoi(optarg); break; case 'A': autoc = true; break; case 'c': dims[COIL_DIM] = atoi(optarg); break; case 'P': partctr = true; break; case 'L': linectr = true; break; case 'h': usage(argv[0], stdout); help(); exit(0); default: usage(argv[0], stderr); exit(1); } } if (argc - optind != 2) { usage(argv[0], stderr); exit(1); } if (0 == adcs) adcs = dims[PHS1_DIM] * dims[PHS2_DIM] * dims[SLICE_DIM] * dims[TIME_DIM]; debug_print_dims(DP_DEBUG1, DIMS, dims); int ifd; if (-1 == (ifd = open(argv[optind + 0], O_RDONLY))) error("error opening file."); struct hdr_s hdr; bool vd = siemens_meas_setup(ifd, &hdr); long off[DIMS] = { 0 }; if (autoc) { long max[DIMS] = { [COIL_DIM] = 1000 }; long min[DIMS] = { 0 }; // min is always 0 adcs = 0; while (true) { if (-1 == siemens_bounds(vd, ifd, min, max)) break; debug_print_dims(DP_DEBUG3, DIMS, max); adcs++; } for (unsigned int i = 0; i < DIMS; i++) { off[i] = -min[i]; dims[i] = max[i] + off[i]; } debug_printf(DP_INFO, "Dimensions: "); debug_print_dims(DP_INFO, DIMS, dims); debug_printf(DP_INFO, "Offset: "); debug_print_dims(DP_INFO, DIMS, off); siemens_meas_setup(ifd, &hdr); // reset } complex float* out = create_cfl(argv[optind + 1], DIMS, dims); md_clear(DIMS, dims, out, CFL_SIZE); long adc_dims[DIMS]; md_select_dims(DIMS, READ_FLAG|COIL_FLAG, adc_dims, dims); void* buf = md_alloc(DIMS, adc_dims, CFL_SIZE); while (adcs--) { long pos[DIMS] = { [0 ... DIMS - 1] = 0 }; if (-1 == siemens_adc_read(vd, ifd, linectr, partctr, dims, pos, buf)) { debug_printf(DP_WARN, "Stopping.\n"); break; } for (unsigned int i = 0; i < DIMS; i++) pos[i] += off[i]; debug_print_dims(DP_DEBUG1, DIMS, pos); if (!md_is_index(DIMS, pos, dims)) { debug_printf(DP_WARN, "Index out of bounds.\n"); continue; } md_copy_block(DIMS, pos, dims, out, adc_dims, buf, CFL_SIZE); } md_free(buf); unmap_cfl(DIMS, dims, out); exit(0); }
int main_nufft(int argc, char* argv[]) { int c; bool adjoint = false; bool inverse = false; bool use_gpu = false; bool sizeinit = false; struct nufft_conf_s conf = nufft_conf_defaults; struct iter_conjgrad_conf cgconf = iter_conjgrad_defaults; long coilim_dims[DIMS]; md_singleton_dims(DIMS, coilim_dims); float lambda = 0.; while (-1 != (c = getopt(argc, argv, "d:m:l:aiht"))) { switch (c) { case 'i': inverse = true; break; case 'a': adjoint = true; break; case 'd': sscanf(optarg, "%ld:%ld:%ld", &coilim_dims[0], &coilim_dims[1], &coilim_dims[2]); sizeinit = true; break; case 'm': cgconf.maxiter = atoi(optarg); break; case 'l': lambda = atof(optarg); break; case 't': conf.toeplitz = true; break; case 'h': usage(argv[0], stdout); help(); exit(0); default: usage(argv[0], stderr); exit(1); } } if (argc - optind != 3) { usage(argv[0], stderr); exit(1); } // Read trajectory long traj_dims[DIMS]; complex float* traj = load_cfl(argv[optind + 0], DIMS, traj_dims); assert(3 == traj_dims[0]); num_init(); if (inverse || adjoint) { long ksp_dims[DIMS]; const complex float* ksp = load_cfl(argv[optind + 1], DIMS, ksp_dims); assert(1 == ksp_dims[0]); assert(md_check_compat(DIMS, ~(PHS1_FLAG|PHS2_FLAG), ksp_dims, traj_dims)); md_copy_dims(DIMS - 3, coilim_dims + 3, ksp_dims + 3); if (!sizeinit) { estimate_im_dims(DIMS, coilim_dims, traj_dims, traj); debug_printf(DP_INFO, "Est. image size: %ld %ld %ld\n", coilim_dims[0], coilim_dims[1], coilim_dims[2]); } complex float* img = create_cfl(argv[optind + 2], DIMS, coilim_dims); md_clear(DIMS, coilim_dims, img, CFL_SIZE); const struct linop_s* nufft_op = nufft_create(DIMS, ksp_dims, coilim_dims, traj_dims, traj, NULL, conf, use_gpu); if (inverse) { lsqr(DIMS, &(struct lsqr_conf){ lambda }, iter_conjgrad, &cgconf, nufft_op, NULL, coilim_dims, img, ksp_dims, ksp); } else {