float svthresh_blockproc( const void* _data, const long blkdims[DIMS], complex float* dst, const complex float* src ) { const struct svthresh_blockproc_data* data = (const struct svthresh_blockproc_data*) _data; long M = 1; long N = md_calc_size( DIMS, blkdims ); for ( unsigned int i = 0; i < DIMS; i++ ) { if (MD_IS_SET(data->mflags, i)) { M *= blkdims[i]; N /= blkdims[i]; } } if (data->remove_mean == 1) svthresh_nomeanu(M, N, data->lambda , dst, src); else if (data->remove_mean == 2) svthresh_nomeanv(M, N, data->lambda , dst, src); else if (data->remove_mean == 0) svthresh(M, N, data->lambda , dst, src); else assert(0); return 0; }
/** * Generic functions which loops over all dimensions of a set of * multi-dimensional arrays and calls a given function for each position. * This functions tries to parallelize over the dimensions indicated * with flags. */ void md_parallel_nary(unsigned int C, unsigned int D, const long dim[D], unsigned long flags, const long* str[C], void* ptr[C], void* data, md_nary_fun_t fun) { if (0 == flags) { md_nary(C, D, dim, str, ptr, data, fun); return; } int b = ffsl(flags & -flags) - 1; assert(MD_IS_SET(flags, b)); flags = MD_CLEAR(flags, b); long dimc[D]; md_select_dims(D, ~MD_BIT(b), dimc, dim); debug_printf(DP_DEBUG4, "Parallelize: %d\n", dim[b]); // FIXME: this probably doesn't nest // (maybe collect all parallelizable dims into one giant loop?) #pragma omp parallel for for (long i = 0; i < dim[b]; i++) { void* moving_ptr[C]; for (unsigned int j = 0; j < C; j++) moving_ptr[j] = ptr[j] + i * str[j][b]; md_parallel_nary(C, D, dimc, flags, str, moving_ptr, data, fun); } }
/** * compute set of dimensions to parallelize * */ unsigned int dims_parallel(unsigned int D, unsigned int io, unsigned int N, const long dims[N], long (*strs[D])[N], size_t size[D]) { unsigned int flags = parallelizable(D, io, N, dims, strs, size); unsigned int i = D; unsigned int count = 1; long reps = md_calc_size(N, dims); unsigned int oflags = 0; while ((count < CORES) && (i-- > 0)) { if (MD_IS_SET(flags, i)) { reps /= dims[i]; if (reps < CHUNK) break; oflags = MD_SET(oflags, i); //break; // only 1 } } return oflags; }
/* * Implements finite difference operator (order 1 for now) * using circular shift: diff(x) = x - circshift(x) * @param snip Keeps first entry if snip = false; clear first entry if snip = true * * optr = [iptr(1); diff(iptr)] */ static void md_zfinitediff_core2(unsigned int D, const long dims[D], unsigned int flags, bool snip, complex float* tmp, const long ostrs[D], complex float* optr, const long istrs[D], const complex float* iptr) { md_copy2(D, dims, istrs, tmp, istrs, iptr, sizeof(complex float)); long zdims[D]; long center[D]; md_select_dims(D, ~0, zdims, dims); memset(center, 0, D * sizeof(long)); for (unsigned int i=0; i < D; i++) { if (MD_IS_SET(flags, i)) { center[i] = 1; // order md_circ_shift2(D, dims, center, ostrs, optr, istrs, tmp, sizeof(complex float)); zdims[i] = 1; if (!snip) // zero out first dimension before subtracting md_clear2(D, zdims, ostrs, optr, sizeof(complex float)); md_zsub2(D, dims, ostrs, optr, istrs, tmp, ostrs, optr); md_copy2(D, dims, ostrs, tmp, ostrs, optr, sizeof(complex float)); if (snip) // zero out first dimension after subtracting md_clear2(D, zdims, ostrs, optr, sizeof(complex float)); center[i] = 0; zdims[i] = dims[i]; } } }
int main_reshape(int argc, char* argv[]) { cmdline(&argc, argv, 3, 100, usage_str, help_str, 0, NULL); num_init(); unsigned int flags = atoi(argv[1]); unsigned int n = bitcount(flags); assert((int)n + 3 == argc - 1); long in_dims[DIMS]; long in_strs[DIMS]; long out_dims[DIMS]; long out_strs[DIMS]; complex float* in_data = load_cfl(argv[n + 2], DIMS, in_dims); md_calc_strides(DIMS, in_strs, in_dims, CFL_SIZE); md_copy_dims(DIMS, out_dims, in_dims); unsigned int j = 0; for (unsigned int i = 0; i < DIMS; i++) if (MD_IS_SET(flags, i)) out_dims[i] = atoi(argv[j++ + 2]); assert(j == n); assert(md_calc_size(DIMS, in_dims) == md_calc_size(DIMS, out_dims)); md_calc_strides(DIMS, out_strs, out_dims, CFL_SIZE); for (unsigned int i = 0; i < DIMS; i++) if (!(MD_IS_SET(flags, i) || (in_strs[i] == out_strs[i]))) error("Dimensions are not consistent at index %d.\n"); complex float* out_data = create_cfl(argv[n + 3], DIMS, out_dims); md_copy(DIMS, in_dims, out_data, in_data, CFL_SIZE); unmap_cfl(DIMS, in_dims, in_data); unmap_cfl(DIMS, out_dims, out_data); exit(0); }
/** * Generates multiscale low rank block sizes * * @param blkdims - block sizes to be written * @param flags - specifies which dimensions to do the blocks. The other dimensions will be the same as input * @param idims - input dimensions * @param blkskip - scale each level by blkskip to generate the next level * * returns number of levels */ long multilr_blkdims(long blkdims[MAX_LEV][DIMS], unsigned long flags, const long idims[DIMS], int blkskip, long initblk) { // Multiscale low rank block sizes long tmp_block[DIMS]; for (unsigned int i = 0; i < DIMS; i++) { if (MD_IS_SET(flags, i)) tmp_block[i] = MIN(initblk, idims[i]); else tmp_block[i] = idims[i]; } bool done; // Loop block_sizes long levels = 0; do { levels++; debug_printf(DP_INFO, "[\t"); for (unsigned int i = 0; i < DIMS; i++) { blkdims[levels - 1][i] = tmp_block[i]; debug_printf(DP_INFO, "%ld\t", blkdims[levels-1][i]); } debug_printf(DP_INFO, "]\n"); done = true; for (unsigned int i = 0; i < DIMS; i++) { if (MD_IS_SET(flags, i) && (idims[i] != 1)) { tmp_block[i] = MIN(tmp_block[i] * blkskip, idims[i]); done = done && (blkdims[levels - 1][i] == idims[i]); } } } while(!done); return levels; }
static bool wavelet_check_dims(unsigned int N, unsigned int flags, const long dims[N], const long minsize[N]) { for (unsigned int i = 0; i < N; i++) if (MD_IS_SET(flags, i)) if ((minsize[i] <= 2) || (dims[i] < minsize[i])) return false; return true; }
void fftshift2(unsigned int N, const long dims[N], unsigned long flags, const long ostrs[N], complex float* dst, const long istrs[N], const complex float* src) { long pos[N]; md_set_dims(N, pos, 0); for (unsigned int i = 0; i < N; i++) if (MD_IS_SET(flags, i)) pos[i] = dims[i] / 2; md_circ_shift2(N, dims, pos, ostrs, dst, istrs, src, CFL_SIZE); }
static void prox_dfwavelet_thresh(const operator_data_t* _data, float thresh, complex float* out, const complex float* in) { struct prox_dfwavelet_data* data = CONTAINER_OF(_data, struct prox_dfwavelet_data, base); bool done = false; long pos[DIMS]; md_set_dims(DIMS, pos, 0); while (!done) { // copy vx, vy, vz md_slice(DIMS, data->slice_flag, pos, data->im_dims, data->vx, in, CFL_SIZE); pos[data->flow_dim]++; md_slice(DIMS, data->slice_flag, pos, data->im_dims, data->vy, in, CFL_SIZE); pos[data->flow_dim]++; md_slice(DIMS, data->slice_flag, pos, data->im_dims, data->vz, in, CFL_SIZE); pos[data->flow_dim]=0; // threshold dfwavelet_thresh(data->plan, thresh * data->lambda, thresh* data->lambda, data->vx, data->vy, data->vz, data->vx, data->vy, data->vz); // copy vx, vy, vz md_copy_block(DIMS, pos, data->im_dims, out, data->tim_dims, data->vx, CFL_SIZE); pos[data->flow_dim]++; md_copy_block(DIMS, pos, data->im_dims, out, data->tim_dims, data->vy, CFL_SIZE); pos[data->flow_dim]++; md_copy_block(DIMS, pos, data->im_dims, out, data->tim_dims, data->vz, CFL_SIZE); pos[data->flow_dim]=0; // increment pos long carryon = 1; for (unsigned int i = 0; i < DIMS; i++) { if (MD_IS_SET(data->slice_flag & ~MD_BIT(data->flow_dim), i)) { pos[i] += carryon; if (pos[i] < data->im_dims[i]) { carryon = 0; break; } else { carryon = 1; pos[i] = 0; } } } done = carryon; } }
static void wavelet_dims_r(unsigned int N, unsigned int n, unsigned int flags, long odims[2 * N], const long dims[N], const long flen) { if (MD_IS_SET(flags, n)) { odims[0 + n] = bandsize(dims[n], flen); odims[N + n] = 2; } if (n > 0) wavelet_dims_r(N, n - 1, flags, odims, dims, flen); }
void fwtN(unsigned int N, unsigned int flags, const long shifts[N], const long dims[N], const long ostr[2 * N], complex float* out, const long istr[N], const complex float* in, const long flen, const float filter[2][2][flen]) { long odims[2 * N]; wavelet_dims(N, flags, odims, dims, flen); assert(md_calc_size(2 * N, odims) >= md_calc_size(N, dims)); // FIXME one of these is unnecessary if we use the output complex float* tmpA = md_alloc_sameplace(2 * N, odims, CFL_SIZE, out); complex float* tmpB = md_alloc_sameplace(2 * N, odims, CFL_SIZE, out); long tidims[2 * N]; md_copy_dims(N, tidims, dims); md_singleton_dims(N, tidims + N); long tistrs[2 * N]; md_calc_strides(2 * N, tistrs, tidims, CFL_SIZE); long todims[2 * N]; md_copy_dims(2 * N, todims, tidims); long tostrs[2 * N]; // maybe we should push the randshift into lower levels //md_copy2(N, dims, tistrs, tmpA, istr, in, CFL_SIZE); md_circ_shift2(N, dims, shifts, tistrs, tmpA, istr, in, CFL_SIZE); for (unsigned int i = 0; i < N; i++) { if (MD_IS_SET(flags, i)) { todims[0 + i] = odims[0 + i]; todims[N + i] = odims[N + i]; md_calc_strides(2 * N, tostrs, todims, CFL_SIZE); fwt1(2 * N, i, tidims, tostrs, tmpB, (void*)tmpB + tostrs[N + i], tistrs, tmpA, flen, filter); md_copy_dims(2 * N, tidims, todims); md_copy_dims(2 * N, tistrs, tostrs); complex float* swap = tmpA; tmpA = tmpB; tmpB = swap; } } md_copy2(2 * N, todims, ostr, out, tostrs, tmpA, CFL_SIZE); md_free(tmpA); md_free(tmpB); }
/** * Generates locally low rank block sizes * * @param blkdims - block sizes to be written * @param flags - specifies which dimensions to do the blocks. The other dimensions will be the same as input * @param idims - input dimensions * @param llkblk - the block size * * returns number of levels = 1 */ long llr_blkdims(long blkdims[MAX_LEV][DIMS], unsigned long flags, const long idims[DIMS], long llrblk) { for (unsigned int i = 0; i < DIMS; i++) { if (MD_IS_SET(flags, i)) blkdims[0][i] = MIN(llrblk, idims[i]); else blkdims[0][i] = idims[i]; } return 1; }
void iwtN(unsigned int N, unsigned int flags, const long shifts[N], const long dims[N], const long ostr[N], complex float* out, const long istr[2 * N], const complex float* in, const long flen, const float filter[2][2][flen]) { long idims[2 * N]; wavelet_dims(N, flags, idims, dims, flen); assert(md_calc_size(2 * N, idims) >= md_calc_size(N, dims)); complex float* tmpA = md_alloc_sameplace(2 * N, idims, CFL_SIZE, out); complex float* tmpB = md_alloc_sameplace(2 * N, idims, CFL_SIZE, out); long tidims[2 * N]; md_copy_dims(2 * N, tidims, idims); long tistrs[2 * N]; md_calc_strides(2 * N, tistrs, tidims, CFL_SIZE); long todims[2 * N]; md_copy_dims(2 * N, todims, tidims); long tostrs[2 * N]; long ishifts[N]; for (unsigned int i = 0; i < N; i++) ishifts[i] = -shifts[i]; md_copy2(2 * N, tidims, tistrs, tmpA, istr, in, CFL_SIZE); for (int i = N - 1; i >= 0; i--) { // run backwards to maintain contigous blocks if (MD_IS_SET(flags, i)) { todims[0 + i] = dims[0 + i]; todims[N + i] = 1; md_calc_strides(2 * N, tostrs, todims, CFL_SIZE); iwt1(2 * N, i, todims, tostrs, tmpB, tistrs, tmpA, (void*)tmpA + tistrs[N + i], flen, filter); md_copy_dims(2 * N, tidims, todims); md_copy_dims(2 * N, tistrs, tostrs); complex float* swap = tmpA; tmpA = tmpB; tmpB = swap; } } //md_copy2(N, dims, ostr, out, tostrs, tmpA, CFL_SIZE); md_circ_shift2(N, dims, ishifts, ostr, out, tostrs, tmpA, CFL_SIZE); md_free(tmpA); md_free(tmpB); }
static void embed(unsigned int N, unsigned int flags, long ostr[N], const long dims[N], const long str[N]) { unsigned int b = ffs(flags) - 1; long dims1[N]; md_select_dims(N, flags, dims1, dims); md_calc_strides(N, ostr, dims1, str[b]); for (unsigned int i = 0; i < N; i++) if (!MD_IS_SET(flags, i)) ostr[i] = str[i]; }
/** * compute set of parallelizable dimensions * */ static unsigned int parallelizable(unsigned int D, unsigned int io, unsigned int N, const long dims[N], long (*strs[D])[N], size_t size[D]) { // we assume no input / output overlap // (i.e. inputs which are also outputs have to be marked as output) // a dimension is parallelizable if all output operations // for that dimension are independent // for all output operations: // check - all other dimensions have strides greater or equal // the extend of this dimension or have an extend smaller or // equal the stride of this dimension // no overlap: [222] // [111111111111] // [333333333] // overlap: [222] // [1111111111111111] // [333333333] unsigned int flags = (1 << N) - 1; for (unsigned int d = 0; d < D; d++) { if (MD_IS_SET(io, d)) { bool m[N][N]; compute_enclosures(N, m, dims, *strs[d]); // print_dims(N, dims); // print_dims(N, *strs[d]); for (unsigned int i = 0; i < N; i++) { unsigned int a = 0; for (unsigned int j = 0; j < N; j++) if (m[i][j] || m[j][i]) a++; // printf("%d %d %d\n", d, i, a); if ((a != N - 1) || ((size_t)labs((*strs[d])[i]) < size[d])) flags = MD_CLEAR(flags, i); } } } return flags; }
void create_wavelet_sizes(struct wavelet_plan_s* plan) { int numdims_tr = plan->numdims_tr; int filterLen = plan->filterLen; int numLevels_tr = plan->numLevels_tr; int numSubCoef; plan->waveSizes_tr = (long*)xmalloc(sizeof(long) * numdims_tr * (numLevels_tr + 2)); // Get number of subband per level, (3 for 2d, 7 for 3d) // Set the last bandSize to be imSize int d,l; int numSubband = 1; for (d = 0; d<numdims_tr; d++) { plan->waveSizes_tr[d + numdims_tr*(numLevels_tr+1)] = plan->imSize_tr[d]; numSubband <<= 1; } numSubband--; // Get numCoeff and waveSizes // Each bandSize[l] is (bandSize[l+1] + filterLen - 1)/2 plan->numCoeff_tr = 0; for (l = plan->numLevels_tr; l >= 1; --l) { numSubCoef = 1; for (d = 0; d < numdims_tr; d++) { plan->waveSizes_tr[d + numdims_tr*l] = (plan->waveSizes_tr[d + numdims_tr*(l+1)] + filterLen - 1) / 2; numSubCoef *= plan->waveSizes_tr[d + numdims_tr*l]; } plan->numCoeff_tr += numSubband*numSubCoef; if (l==1) plan->numCoarse_tr = numSubCoef; } numSubCoef = 1; for (d = 0; d < numdims_tr; d++) { plan->waveSizes_tr[d] = plan->waveSizes_tr[numdims_tr+d]; numSubCoef *= plan->waveSizes_tr[d]; } plan->numCoeff_tr += numSubCoef; // Get Actual numCoeff plan->numCoeff = plan->numCoeff_tr; for (d = 0; d<plan->numdims; d++) { if (!MD_IS_SET(plan->flags, d)) plan->numCoeff *= plan->imSize[d]; } }
static complex float* compute_linphases(unsigned int N, long lph_dims[N + 3], const long img_dims[N + 3]) { float shifts[8][3]; int s = 0; for(int i = 0; i < 8; i++) { bool skip = false; for(int j = 0; j < 3; j++) { shifts[s][j] = 0.; if (MD_IS_SET(i, j)) { skip = skip || (1 == img_dims[j]); shifts[s][j] = -0.5; } } if (!skip) s++; } unsigned int ND = N + 3; md_select_dims(ND, FFT_FLAGS, lph_dims, img_dims); lph_dims[N + 0] = s; complex float* linphase = md_alloc(ND, lph_dims, CFL_SIZE); for(int i = 0; i < s; i++) { float shifts2[ND]; for (unsigned int j = 0; j < ND; j++) shifts2[j] = 0.; shifts2[0] = shifts[i][0]; shifts2[1] = shifts[i][1]; shifts2[2] = shifts[i][2]; linear_phase(ND, img_dims, shifts2, linphase + i * md_calc_size(ND, img_dims)); } return linphase; }
static void wavelet3_thresh_apply(const operator_data_t* _data, float mu, complex float* out, const complex float* in) { const struct wavelet3_thresh_s* data = CAST_DOWN(wavelet3_thresh_s, _data); long shift[data->N]; for (unsigned int i = 0; i < data->N; i++) shift[i] = 0; if (data->randshift) { int levels = wavelet_num_levels(data->N, data->flags, data->dims, data->minsize, 4); for (unsigned int i = 0; i < data->N; i++) if (MD_IS_SET(data->flags, i)) shift[i] = rand_lim((unsigned int*)&data->rand_state, 1 << levels); } wavelet3_thresh(data->N, data->lambda * mu, data->flags, shift, data->dims, out, in, data->minsize, 4, wavelet3_dau2); }
static struct operator_matrix_s* linop_matrix_priv(unsigned int N, const long out_dims[N], const long in_dims[N], const long matrix_dims[N], const complex float* matrix) { unsigned long out_flags = md_nontriv_dims(N, out_dims); unsigned long in_flags = md_nontriv_dims(N, in_dims); unsigned long del_flags = in_flags & ~out_flags; /* we double dimensions for chaining which can lead to * matrices with the same input and output dimension */ long out_dims2[2 * N]; long mat_dims2[2 * N]; long in_dims2[2 * N]; shadow_dims(N, out_dims2, out_dims); shadow_dims(N, mat_dims2, matrix_dims); shadow_dims(N, in_dims2, in_dims); /* move removed input dims into shadow position * which makes chaining easier below */ for (unsigned int i = 0; i < N; i++) { if (MD_IS_SET(del_flags, i)) { assert(1 == out_dims2[2 * i + 0]); assert(mat_dims2[2 * i + 0] == in_dims2[2 * i + 0]); mat_dims2[2 * i + 1] = mat_dims2[2 * i + 0]; mat_dims2[2 * i + 0] = 1; in_dims2[2 * i + 1] = in_dims[i]; in_dims2[2 * i + 0] = 1; } } return linop_matrix_priv2(2 * N, out_dims2, in_dims2, mat_dims2, matrix); }
/* * Implements cumulative sum operator (order 1 for now) * using circular shift: cumsum(x) = x + circshift(x,1) + circshift(x,2) + ... * * optr = cumsum(iptr) */ static void md_zcumsum_core2(unsigned int D, const long dims[D], unsigned int flags, complex float* tmp, complex float* tmp2, const long ostrs[D], complex float* optr, const long istrs[D], const complex float* iptr) { //out = dx md_copy2(D, dims, ostrs, optr, istrs, iptr, sizeof(complex float)); md_copy2(D, dims, istrs, tmp, istrs, iptr, sizeof(complex float)); long zdims[D]; long center[D]; md_select_dims(D, ~0, zdims, dims); memset(center, 0, D * sizeof(long)); for (unsigned int i=0; i < D; i++) { if (MD_IS_SET(flags, i)) { for (int d=1; d < dims[i]; d++) { // tmp = circshift(tmp, i) center[i] = d; md_circ_shift2(D, dims, center, istrs, tmp2, istrs, tmp, sizeof(complex float)); zdims[i] = d; // tmp(1:d,:) = 0 md_clear2(D, zdims, istrs, tmp2, sizeof(complex float)); //md_zsmul2(D, zdims, istrs, tmp2, istrs, tmp2, 0.); //dump_cfl("tmp2", D, dims, tmp2); // out = out + tmp md_zadd2(D, dims, ostrs, optr, istrs, tmp2, ostrs, optr); //md_copy2(D, dims, ostrs, tmp, ostrs, optr, sizeof(complex float)); } md_copy2(D, dims, ostrs, tmp, ostrs, optr, sizeof(complex float)); center[i] = 0; zdims[i] = dims[i]; } } }
float nucnorm_blockproc( const void* _data, const long blkdims[DIMS], complex float* dst, const complex float* src ) { UNUSED(dst); const struct svthresh_blockproc_data* data = (const struct svthresh_blockproc_data*) _data; long M = 1; long N = md_calc_size( DIMS, blkdims ); for ( unsigned int i = 0; i < DIMS; i++ ) { if (MD_IS_SET(data->mflags, i)) { M *= blkdims[i]; N /= blkdims[i]; } } float G = sqrtf(M) + sqrtf(N); return G * nuclearnorm(M, N, src); }
static fftwf_plan fft_fftwf_plan(unsigned int D, const long dimensions[D], unsigned long flags, const long ostrides[D], complex float* dst, const long istrides[D], const complex float* src, bool backwards, bool measure) { unsigned int N = D; fftwf_iodim64 dims[N]; fftwf_iodim64 hmdims[N]; unsigned int k = 0; unsigned int l = 0; //FFTW seems to be fine with this //assert(0 != flags); for (unsigned int i = 0; i < N; i++) { if (MD_IS_SET(flags, i)) { dims[k].n = dimensions[i]; dims[k].is = istrides[i] / CFL_SIZE; dims[k].os = ostrides[i] / CFL_SIZE; k++; } else { hmdims[l].n = dimensions[i]; hmdims[l].is = istrides[i] / CFL_SIZE; hmdims[l].os = ostrides[i] / CFL_SIZE; l++; } } fftwf_plan fftwf; #pragma omp critical fftwf = fftwf_plan_guru64_dft(k, dims, l, hmdims, (complex float*)src, dst, backwards ? 1 : (-1), measure ? FFTW_MEASURE : FFTW_ESTIMATE); return fftwf; }
static void fftmod2_r(unsigned int N, const long dims[N], unsigned long flags, const long ostrs[N], complex float* dst, const long istrs[N], const complex float* src, bool inv, double phase) { if (0 == flags) { md_zsmul2(N, dims, ostrs, dst, istrs, src, cexp(M_PI * 2.i * (inv ? -phase : phase))); return; } /* this will also currently be slow on the GPU because we do not * support strides there on the lowest level */ unsigned int i = N - 1; while (!MD_IS_SET(flags, i)) i--; #if 1 // If there is only one dimensions left and it is the innermost // which is contiguous optimize using md_zfftmod2 if ((0u == MD_CLEAR(flags, i)) && (1 == md_calc_size(i, dims)) && (CFL_SIZE == ostrs[i]) && (CFL_SIZE == istrs[i])) { md_zfftmod2(N - i, dims + i, ostrs + i, dst, istrs + i, src, inv, phase); return; } #endif long tdims[N]; md_select_dims(N, ~MD_BIT(i), tdims, dims); #pragma omp parallel for for (int j = 0; j < dims[i]; j++) fftmod2_r(N, tdims, MD_CLEAR(flags, i), ostrs, (void*)dst + j * ostrs[i], istrs, (void*)src + j * istrs[i], inv, phase + fftmod_phase(dims[i], j)); }
void fd_proj_noninc(const struct linop_s* o, complex float* optr, const complex float* iptr) { struct fdiff_s* data = (struct fdiff_s*)linop_get_data(o); // FIXME: CAST? dump_cfl("impre", data->D, data->dims, iptr); complex float* tmp2 = md_alloc_sameplace(data->D, data->dims, CFL_SIZE, optr); linop_forward_unchecked(o, tmp2, iptr); long tmpdim = data->dims[0]; long dims2[data->D]; md_select_dims(data->D, ~0u, dims2, data->dims); dims2[0] *= 2; dump_cfl("dxpre", data->D, data->dims, tmp2); md_smin(data->D, dims2, (float*)optr, (float*)tmp2, 0.); // add back initial value dims2[0] = tmpdim; for (unsigned int i = 0; i < data->D; i++) { if (MD_IS_SET(data->flags, i)) { dims2[i] = 1; md_copy2(data->D, dims2, data->str, optr, data->str, tmp2, CFL_SIZE); break; } } dump_cfl("dxpost", data->D, data->dims, optr); linop_norm_inv_unchecked(o, 0., optr, optr); dump_cfl("impost", data->D, data->dims, optr); md_free(tmp2); }
void opt_reg_configure(unsigned int N, const long img_dims[N], struct opt_reg_s* ropts, const struct operator_p_s* prox_ops[NUM_REGS], const struct linop_s* trafos[NUM_REGS], unsigned int llr_blk, bool randshift, bool use_gpu) { float lambda = ropts->lambda; if (-1. == lambda) lambda = 0.; // if no penalities specified but regularization // parameter is given, add a l2 penalty struct reg_s* regs = ropts->regs; if ((0 == ropts->r) && (lambda > 0.)) { regs[0].xform = L2IMG; regs[0].xflags = 0u; regs[0].jflags = 0u; regs[0].lambda = lambda; ropts->r = 1; } int nr_penalties = ropts->r; long blkdims[MAX_LEV][DIMS]; int levels; for (int nr = 0; nr < nr_penalties; nr++) { // fix up regularization parameter if (-1. == regs[nr].lambda) regs[nr].lambda = lambda; switch (regs[nr].xform) { case L1WAV: debug_printf(DP_INFO, "l1-wavelet regularization: %f\n", regs[nr].lambda); if (0 != regs[nr].jflags) debug_printf(DP_WARN, "joint l1-wavelet thresholding not currently supported.\n"); long minsize[DIMS] = { [0 ... DIMS - 1] = 1 }; minsize[0] = MIN(img_dims[0], 16); minsize[1] = MIN(img_dims[1], 16); minsize[2] = MIN(img_dims[2], 16); unsigned int wflags = 0; for (unsigned int i = 0; i < DIMS; i++) { if ((1 < img_dims[i]) && MD_IS_SET(regs[nr].xflags, i)) { wflags = MD_SET(wflags, i); minsize[i] = MIN(img_dims[i], 16); } } trafos[nr] = linop_identity_create(DIMS, img_dims); prox_ops[nr] = prox_wavelet3_thresh_create(DIMS, img_dims, wflags, minsize, regs[nr].lambda, randshift); break; case TV: debug_printf(DP_INFO, "TV regularization: %f\n", regs[nr].lambda); trafos[nr] = linop_grad_create(DIMS, img_dims, regs[nr].xflags); prox_ops[nr] = prox_thresh_create(DIMS + 1, linop_codomain(trafos[nr])->dims, regs[nr].lambda, regs[nr].jflags | MD_BIT(DIMS), use_gpu); break; case LLR: debug_printf(DP_INFO, "lowrank regularization: %f\n", regs[nr].lambda); // add locally lowrank penalty levels = llr_blkdims(blkdims, regs[nr].jflags, img_dims, llr_blk); assert(1 == levels); assert(levels == img_dims[LEVEL_DIM]); for(int l = 0; l < levels; l++) #if 0 blkdims[l][MAPS_DIM] = img_dims[MAPS_DIM]; #else blkdims[l][MAPS_DIM] = 1; #endif int remove_mean = 0; trafos[nr] = linop_identity_create(DIMS, img_dims); prox_ops[nr] = lrthresh_create(img_dims, randshift, regs[nr].xflags, (const long (*)[DIMS])blkdims, regs[nr].lambda, false, remove_mean, use_gpu); break; case MLR: #if 0 // FIXME: multiscale low rank changes the output image dimensions // and requires the forward linear operator. This should be decoupled... debug_printf(DP_INFO, "multi-scale lowrank regularization: %f\n", regs[nr].lambda); levels = multilr_blkdims(blkdims, regs[nr].jflags, img_dims, 8, 1); img_dims[LEVEL_DIM] = levels; max_dims[LEVEL_DIM] = levels; for(int l = 0; l < levels; l++) blkdims[l][MAPS_DIM] = 1; trafos[nr] = linop_identity_create(DIMS, img_dims); prox_ops[nr] = lrthresh_create(img_dims, randshift, regs[nr].xflags, (const long (*)[DIMS])blkdims, regs[nr].lambda, false, 0, use_gpu); const struct linop_s* decom_op = sum_create( img_dims, use_gpu ); const struct linop_s* tmp_op = forward_op; forward_op = linop_chain(decom_op, forward_op); linop_free(decom_op); linop_free(tmp_op); #else debug_printf(DP_WARN, "multi-scale lowrank regularization not yet supported: %f\n", regs[nr].lambda); #endif break; case IMAGL1: debug_printf(DP_INFO, "l1 regularization of imaginary part: %f\n", regs[nr].lambda); trafos[nr] = linop_rdiag_create(DIMS, img_dims, 0, &(complex float){ 1.i }); prox_ops[nr] = prox_thresh_create(DIMS, img_dims, regs[nr].lambda, regs[nr].jflags, use_gpu); break; case IMAGL2: debug_printf(DP_INFO, "l2 regularization of imaginary part: %f\n", regs[nr].lambda); trafos[nr] = linop_rdiag_create(DIMS, img_dims, 0, &(complex float){ 1.i }); prox_ops[nr] = prox_leastsquares_create(DIMS, img_dims, regs[nr].lambda, NULL); break; case L1IMG: debug_printf(DP_INFO, "l1 regularization: %f\n", regs[nr].lambda); trafos[nr] = linop_identity_create(DIMS, img_dims); prox_ops[nr] = prox_thresh_create(DIMS, img_dims, regs[nr].lambda, regs[nr].jflags, use_gpu); break; case L2IMG: debug_printf(DP_INFO, "l2 regularization: %f\n", regs[nr].lambda); trafos[nr] = linop_identity_create(DIMS, img_dims); prox_ops[nr] = prox_leastsquares_create(DIMS, img_dims, regs[nr].lambda, NULL); break; case FTL1: debug_printf(DP_INFO, "l1 regularization of Fourier transform: %f\n", regs[nr].lambda); trafos[nr] = linop_fft_create(DIMS, img_dims, regs[nr].xflags); prox_ops[nr] = prox_thresh_create(DIMS, img_dims, regs[nr].lambda, regs[nr].jflags, use_gpu); break; }
/* * Low rank threhsolding for arbitrary block sizes */ static void lrthresh_apply(const operator_data_t* _data, float mu, complex float* dst, const complex float* src) { struct lrthresh_data_s* data = CAST_DOWN(lrthresh_data_s, _data); float lambda = mu * data->lambda; long strs1[DIMS]; md_calc_strides(DIMS, strs1, data->dims_decom, 1); //#pragma omp parallel for for (int l = 0; l < data->levels; l++) { complex float* dstl = dst + l * strs1[LEVEL_DIM]; const complex float* srcl = src + l * strs1[LEVEL_DIM]; long blkdims[DIMS]; long shifts[DIMS]; long unshifts[DIMS]; long zpad_dims[DIMS]; long M = 1; for (unsigned int i = 0; i < DIMS; i++) { blkdims[i] = data->blkdims[l][i]; zpad_dims[i] = (data->dims[i] + blkdims[i] - 1) / blkdims[i]; zpad_dims[i] *= blkdims[i]; if (MD_IS_SET(data->mflags, i)) M *= blkdims[i]; if (data->randshift) shifts[i] = rand_lim(MIN(blkdims[i] - 1, zpad_dims[i] - blkdims[i])); else shifts[i] = 0; unshifts[i] = -shifts[i]; } long zpad_strs[DIMS]; md_calc_strides(DIMS, zpad_strs, zpad_dims, CFL_SIZE); long blk_size = md_calc_size(DIMS, blkdims); long img_size = md_calc_size(DIMS, zpad_dims); long N = blk_size / M; long B = img_size / blk_size; if (data->noise && (l == data->levels - 1)) { M = img_size; N = 1; B = 1; } complex float* tmp = md_alloc_sameplace(DIMS, zpad_dims, CFL_SIZE, dst); md_circ_ext(DIMS, zpad_dims, tmp, data->dims, srcl, CFL_SIZE); md_circ_shift(DIMS, zpad_dims, shifts, tmp, tmp, CFL_SIZE); long mat_dims[2]; basorati_dims(DIMS, mat_dims, blkdims, zpad_dims); complex float* tmp_mat = md_alloc_sameplace(2, mat_dims, CFL_SIZE, dst); // Reshape image into a blk_size x number of blocks matrix basorati_matrix(DIMS, blkdims, mat_dims, tmp_mat, zpad_dims, zpad_strs, tmp); batch_svthresh(M, N, mat_dims[1], lambda * GWIDTH(M, N, B), *(complex float (*)[mat_dims[1]][M][N])tmp_mat); // for ( int b = 0; b < mat_dims[1]; b++ ) // svthresh(M, N, lambda * GWIDTH(M, N, B), tmp_mat, tmp_mat); basorati_matrixH(DIMS, blkdims, zpad_dims, zpad_strs, tmp, mat_dims, tmp_mat); md_circ_shift(DIMS, zpad_dims, unshifts, tmp, tmp, CFL_SIZE); md_resize(DIMS, data->dims, dstl, zpad_dims, tmp, CFL_SIZE); md_free(tmp); md_free(tmp_mat); } }
void overlapandsave2HB(const struct vec_ops* ops, int N, unsigned int flags, const long blk[N], const long dims1[N], complex float* dst, const long odims[N], const complex float* src1, const long dims2[N], const complex float* src2, const long mdims[N], const complex float* msk) { long dims1B[N]; long tdims[2 * N]; long nodims[2 * N]; long ndims2[2 * N]; long nmdims[2 * N]; int e = N; for (int i = 0; i < N; i++) { if (MD_IS_SET(flags, i)) { assert(1 == dims2[i] % 2); assert(0 == blk[i] % 2); assert(0 == dims1[i] % 2); assert(0 == odims[i] % blk[i]); assert(0 == dims1[i] % blk[i]); assert(dims1[i] == odims[i]); assert(dims2[i] <= blk[i]); assert(dims1[i] >= dims2[i]); assert((1 == mdims[i]) || (mdims[i] == dims1[i])); // blocked output nodims[e] = odims[i] / blk[i]; nodims[i] = blk[i]; // expanded temporary storage tdims[e] = dims1[i] / blk[i]; tdims[i] = blk[i] + dims2[i] - 1; // blocked input // ---|---,---,---|--- // + +++ + // + +++ + if (1 == mdims[i]) { nmdims[2 * i + 1] = 1; nmdims[2 * i + 1] = 1; } else { nmdims[2 * i + 1] = mdims[i] / blk[i]; nmdims[2 * i + 0] = blk[i]; } // resized input // minimal padding dims1B[i] = dims1[i] + (dims2[i] - 1); // kernel ndims2[e] = 1; ndims2[i] = dims2[i]; e++; } else { nodims[i] = odims[i]; tdims[i] = dims1[i]; nmdims[2 * i + 1] = 1; nmdims[2 * i + 0] = mdims[i]; dims1B[i] = dims1[i]; ndims2[i] = dims2[i]; } } int NE = e; // long S = md_calc_size(N, dims1B, 1); long str1[NE]; long str1B[N]; md_calc_strides(N, str1B, dims1B, sizeof(complex float)); e = N; for (int i = 0; i < N; i++) { str1[i] = str1B[i]; if (MD_IS_SET(flags, i)) str1[e++] = str1B[i] * blk[i]; } assert(NE == e); long str2[NE]; md_calc_strides(NE, str2, tdims, sizeof(complex float)); long ostr[NE]; long mstr[NE]; long mstrB[2 * N]; md_calc_strides(NE, ostr, nodims, sizeof(complex float)); md_calc_strides(2 * N, mstrB, nmdims, sizeof(complex float)); e = N; for (int i = 0; i < N; i++) { mstr[i] = mstrB[2 * i + 0]; if (MD_IS_SET(flags, i)) mstr[e++] = mstrB[2 * i + 1]; } assert(NE == e); // we can loop here assert(NE == N + 3); assert(1 == ndims2[N + 0]); assert(1 == ndims2[N + 1]); assert(1 == ndims2[N + 2]); assert(tdims[N + 0] == nodims[N + 0]); assert(tdims[N + 1] == nodims[N + 1]); assert(tdims[N + 2] == nodims[N + 2]); long R = md_calc_size(N, nodims); long T = md_calc_size(N, tdims); //complex float* src1C = xmalloc(S * sizeof(complex float)); complex float* src1C = dst; md_clear(N, dims1B, src1C, CFL_SIZE); // must be done here #pragma omp parallel for collapse(3) for (int k = 0; k < nodims[N + 2]; k++) { for (int j = 0; j < nodims[N + 1]; j++) { for (int i = 0; i < nodims[N + 0]; i++) { complex float* tmp = (complex float*)ops->allocate(2 * T); complex float* tmpX = (complex float*)ops->allocate(2 * R); long off1 = str1[N + 0] * i + str1[N + 1] * j + str1[N + 2] * k; long off2 = mstr[N + 0] * i + mstr[N + 1] * j + mstr[N + 2] * k; long off3 = ostr[N + 0] * i + ostr[N + 1] * j + ostr[N + 2] * k; md_zmul2(N, nodims, ostr, tmpX, ostr, ((const void*)src1) + off3, mstr, ((const void*)msk) + off2); convH(N, flags, CONV_VALID, CONV_SYMMETRIC, tdims, tmp, nodims, tmpX, ndims2, src2); #pragma omp critical md_zadd2(N, tdims, str1, ((void*)src1C) + off1, str1, ((void*)src1C) + off1, str2, tmp); ops->del((void*)tmpX); ops->del((void*)tmp); }}} }
void overlapandsave2NE(int N, unsigned int flags, const long blk[N], const long odims[N], complex float* dst, const long dims1[N], complex float* src1, const long dims2[N], complex float* src2, const long mdims[N], complex float* msk) { long dims1B[N]; long tdims[2 * N]; long nodims[2 * N]; long ndims1[2 * N]; long ndims2[2 * N]; long shift[2 * N]; unsigned int nflags = 0; for (int i = 0; i < N; i++) { if (MD_IS_SET(flags, i)) { nflags = MD_SET(nflags, 2 * i); assert(1 == dims2[i] % 2); assert(0 == blk[i] % 2); assert(0 == dims1[i] % 2); assert(0 == odims[i] % blk[i]); assert(0 == dims1[i] % blk[i]); assert(dims1[i] == odims[i]); assert(dims2[i] <= blk[i]); assert(dims1[i] >= dims2[i]); // blocked output nodims[i * 2 + 1] = odims[i] / blk[i]; nodims[i * 2 + 0] = blk[i]; // expanded temporary storage tdims[i * 2 + 1] = dims1[i] / blk[i]; tdims[i * 2 + 0] = blk[i] + dims2[i] - 1; // blocked input // ---|---,---,---|--- // + +++ + // + +++ + // resized input dims1B[i] = dims1[i] + 2 * blk[i]; ndims1[i * 2 + 1] = dims1[i] / blk[i] + 2; // do we need two full blocks? ndims1[i * 2 + 0] = blk[i]; shift[i * 2 + 1] = 0; shift[i * 2 + 0] = blk[i] - (dims2[i] - 1) / 2; // kernel ndims2[i * 2 + 1] = 1; ndims2[i * 2 + 0] = dims2[i]; } else { nodims[i * 2 + 1] = 1; nodims[i * 2 + 0] = odims[i]; tdims[i * 2 + 1] = 1; tdims[i * 2 + 0] = dims1[i]; ndims1[i * 2 + 1] = 1; ndims1[i * 2 + 0] = dims1[i]; shift[i * 2 + 1] = 0; shift[i * 2 + 0] = 0; dims1B[i] = dims1[i]; ndims2[i * 2 + 1] = 1; ndims2[i * 2 + 0] = dims2[i]; } } complex float* src1B = md_alloc(N, dims1B, CFL_SIZE); complex float* tmp = md_alloc(2 * N, tdims, CFL_SIZE); complex float* tmpX = md_alloc(N, odims, CFL_SIZE); long str1[2 * N]; long str2[2 * N]; md_calc_strides(2 * N, str1, ndims1, sizeof(complex float)); md_calc_strides(2 * N, str2, tdims, sizeof(complex float)); long off = md_calc_offset(2 * N, str1, shift); md_resize_center(N, dims1B, src1B, dims1, src1, sizeof(complex float)); // we can loop here md_copy2(2 * N, tdims, str2, tmp, str1, ((void*)src1B) + off, sizeof(complex float)); conv(2 * N, nflags, CONV_VALID, CONV_SYMMETRIC, nodims, tmpX, tdims, tmp, ndims2, src2); long ostr[N]; long mstr[N]; md_calc_strides(N, ostr, odims, sizeof(complex float)); md_calc_strides(N, mstr, mdims, sizeof(complex float)); md_zmul2(N, odims, ostr, tmpX, ostr, tmpX, mstr, msk); convH(2 * N, nflags, CONV_VALID, CONV_SYMMETRIC, tdims, tmp, nodims, tmpX, ndims2, src2); md_clear(N, dims1B, src1B, sizeof(complex float)); md_zadd2(2 * N, tdims, str1, ((void*)src1B) + off, str1, ((void*)src1B) + off, str2, tmp); // md_resize_center(N, dims1, dst, dims1B, src1B, sizeof(complex float)); md_free(src1B); md_free(tmpX); md_free(tmp); }
/* O I M G * 1 1 1 1 - not used * 1 1 A ! - forbidden * 1 A 1 ! - forbidden * A 1 1 ! - forbidden * A A 1 1 - replicated * A 1 A 1 - output * 1 A A A/A - input * A A A A - batch */ static struct operator_matrix_s* linop_matrix_priv2(unsigned int N, const long out_dims[N], const long in_dims[N], const long matrix_dims[N], const complex float* matrix) { // to get assertions and cost estimate long max_dims[N]; md_tenmul_dims(N, max_dims, out_dims, in_dims, matrix_dims); PTR_ALLOC(struct operator_matrix_s, data); SET_TYPEID(operator_matrix_s, data); data->N = N; PTR_ALLOC(long[N], out_dims1); md_copy_dims(N, *out_dims1, out_dims); data->out_dims = *PTR_PASS(out_dims1); PTR_ALLOC(long[N], mat_dims1); md_copy_dims(N, *mat_dims1, matrix_dims); data->mat_dims = *PTR_PASS(mat_dims1); PTR_ALLOC(long[N], in_dims1); md_copy_dims(N, *in_dims1, in_dims); data->in_dims = *PTR_PASS(in_dims1); complex float* mat = md_alloc(N, matrix_dims, CFL_SIZE); md_copy(N, matrix_dims, mat, matrix, CFL_SIZE); data->mat = mat; data->mat_gram = NULL; #ifdef USE_CUDA data->mat_gpu = NULL; data->mat_gram_gpu = NULL; #endif #if 1 // pre-multiply gram matrix (if there is a cost reduction) unsigned long out_flags = md_nontriv_dims(N, out_dims); unsigned long in_flags = md_nontriv_dims(N, in_dims); unsigned long del_flags = in_flags & ~out_flags; unsigned long new_flags = out_flags & ~in_flags; /* we double (again) for the gram matrix */ PTR_ALLOC(long[2 * N], mat_dims2); PTR_ALLOC(long[2 * N], in_dims2); PTR_ALLOC(long[2 * N], gmt_dims2); PTR_ALLOC(long[2 * N], gin_dims2); PTR_ALLOC(long[2 * N], grm_dims2); PTR_ALLOC(long[2 * N], gout_dims2); shadow_dims(N, *gmt_dims2, matrix_dims); shadow_dims(N, *mat_dims2, matrix_dims); shadow_dims(N, *in_dims2, in_dims); shadow_dims(N, *gout_dims2, in_dims); shadow_dims(N, *gin_dims2, in_dims); shadow_dims(N, *grm_dims2, matrix_dims); /* move removed input dims into shadow position * for the gram matrix can have an output there */ for (unsigned int i = 0; i < N; i++) { if (MD_IS_SET(del_flags, i)) { assert((*mat_dims2)[2 * i + 0] == (*in_dims2)[2 * i + 0]); (*mat_dims2)[2 * i + 1] = (*mat_dims2)[2 * i + 0]; (*mat_dims2)[2 * i + 0] = 1; (*in_dims2)[2 * i + 1] = (*gin_dims2)[2 * i + 0]; (*in_dims2)[2 * i + 0] = 1; } } for (unsigned int i = 0; i < N; i++) { if (MD_IS_SET(new_flags, i)) { (*grm_dims2)[2 * i + 0] = 1; (*grm_dims2)[2 * i + 1] = 1; } if (MD_IS_SET(del_flags, i)) { (*gout_dims2)[2 * i + 1] = (*gin_dims2)[2 * i + 0]; (*gout_dims2)[2 * i + 0] = 1; (*grm_dims2)[2 * i + 0] = in_dims[i]; (*grm_dims2)[2 * i + 1] = in_dims[i]; } } long gmx_dims[2 * N]; md_tenmul_dims(2 * N, gmx_dims, *gout_dims2, *gin_dims2, *grm_dims2); long mult_mat = md_calc_size(N, max_dims); long mult_gram = md_calc_size(2 * N, gmx_dims); if (mult_gram < 2 * mult_mat) { // FIXME: rethink debug_printf(DP_DEBUG2, "Gram matrix: 2x %ld vs %ld\n", mult_mat, mult_gram); complex float* mat_gram = md_alloc(2 * N, *grm_dims2, CFL_SIZE); md_ztenmulc(2 * N, *grm_dims2, mat_gram, *gmt_dims2, matrix, *mat_dims2, matrix); data->mat_gram = mat_gram; } PTR_FREE(gmt_dims2); PTR_FREE(mat_dims2); PTR_FREE(in_dims2); data->gin_dims = *PTR_PASS(gin_dims2); data->gout_dims = *PTR_PASS(gout_dims2); data->grm_dims = *PTR_PASS(grm_dims2); #else data->gin_dims = NULL; data->gout_dims = NULL; data->grm_dims = NULL; #endif return PTR_PASS(data); }
/** * Efficiently chain two matrix linops by multiplying the actual matrices together. * Stores a copy of the new matrix. * Returns: C = B A * * @param a first matrix (applied to input) * @param b second matrix (applied to output of first matrix) */ struct linop_s* linop_matrix_chain(const struct linop_s* a, const struct linop_s* b) { const struct operator_matrix_s* a_data = CAST_DOWN(operator_matrix_s, linop_get_data(a)); const struct operator_matrix_s* b_data = CAST_DOWN(operator_matrix_s, linop_get_data(b)); // check compatibility assert(linop_codomain(a)->N == linop_domain(b)->N); assert(md_check_compat(linop_codomain(a)->N, 0u, linop_codomain(a)->dims, linop_domain(b)->dims)); unsigned int D = linop_domain(a)->N; unsigned long outB_flags = md_nontriv_dims(D, linop_codomain(b)->dims); unsigned long inB_flags = md_nontriv_dims(D, linop_domain(b)->dims); unsigned long delB_flags = inB_flags & ~outB_flags; unsigned int N = a_data->N; assert(N == 2 * D); long in_dims[N]; md_copy_dims(N, in_dims, a_data->in_dims); long matA_dims[N]; md_copy_dims(N, matA_dims, a_data->mat_dims); long matB_dims[N]; md_copy_dims(N, matB_dims, b_data->mat_dims); long out_dims[N]; md_copy_dims(N, out_dims, b_data->out_dims); for (unsigned int i = 0; i < D; i++) { if (MD_IS_SET(delB_flags, i)) { matA_dims[2 * i + 0] = a_data->mat_dims[2 * i + 1]; matA_dims[2 * i + 1] = a_data->mat_dims[2 * i + 0]; in_dims[2 * i + 0] = a_data->in_dims[2 * i + 1]; in_dims[2 * i + 1] = a_data->in_dims[2 * i + 0]; } } long matrix_dims[N]; md_singleton_dims(N, matrix_dims); unsigned long iflags = md_nontriv_dims(N, in_dims); unsigned long oflags = md_nontriv_dims(N, out_dims); unsigned long flags = iflags | oflags; // we combine a and b and sum over dims not in input or output md_max_dims(N, flags, matrix_dims, matA_dims, matB_dims); debug_printf(DP_DEBUG1, "tensor chain: %ld x %ld -> %ld\n", md_calc_size(N, matA_dims), md_calc_size(N, matB_dims), md_calc_size(N, matrix_dims)); complex float* matrix = md_alloc(N, matrix_dims, CFL_SIZE); debug_print_dims(DP_DEBUG2, N, matrix_dims); debug_print_dims(DP_DEBUG2, N, in_dims); debug_print_dims(DP_DEBUG2, N, matA_dims); debug_print_dims(DP_DEBUG2, N, matB_dims); debug_print_dims(DP_DEBUG2, N, out_dims); md_ztenmul(N, matrix_dims, matrix, matA_dims, a_data->mat, matB_dims, b_data->mat); // priv2 takes our doubled dimensions struct operator_matrix_s* data = linop_matrix_priv2(N, out_dims, in_dims, matrix_dims, matrix); /* although we internally use different dimensions we define the * correct interface */ struct linop_s* c = linop_create(linop_codomain(b)->N, linop_codomain(b)->dims, linop_domain(a)->N, linop_domain(a)->dims, CAST_UP(data), linop_matrix_apply, linop_matrix_apply_adjoint, linop_matrix_apply_normal, NULL, linop_matrix_del); md_free(matrix); return c; }