/** * * x = (ATA + uI)^-1 b * */ void sum_apply_pinverse(const void* _data, float rho, complex float* dst, const complex float* src) { struct sum_data* data = (struct sum_data*) _data; if (NULL == data->tmp) { #ifdef USE_CUDA data->tmp = (data->use_gpu ? md_alloc_gpu : md_alloc)(DIMS, data->img_dims, CFL_SIZE); #else data->tmp = md_alloc(DIMS, data->img_dims, CFL_SIZE); #endif } // get average md_clear( DIMS, data->img_dims, data->tmp, sizeof( complex float ) ); md_zadd2( DIMS, data->imgd_dims, data->img_strs, data->tmp, data->img_strs, data->tmp , data->imgd_strs, src ); md_zsmul( DIMS, data->img_dims, data->tmp, data->tmp, 1. / data->levels ); // get non-average md_zsub2( DIMS, data->imgd_dims, data->imgd_strs, dst, data->imgd_strs, src, data->img_strs, data->tmp ); // avg = avg / (1 + rho) md_zsmul( DIMS, data->img_dims, data->tmp, data->tmp, 1. / (1. + rho) ); // nonavg = nonavg / rho md_zsmul( DIMS, data->imgd_dims, dst, dst, 1. / rho ); // dst = avg + nonavg md_zadd2( DIMS, data->imgd_dims, data->imgd_strs, dst, data->imgd_strs, dst, data->img_strs, data->tmp ); }
void casorati_matrixH(unsigned int N, const long dimk[N], const long dim[N], const long str[N], complex float* optr, const long odim[2], const complex float* iptr) { long str2[2 * N]; long strc[2 * N]; long dimc[2 * N]; calc_casorati_geom(N, dimc, str2, dimk, dim, str); assert(odim[0] == md_calc_size(N, dimc)); assert(odim[1] == md_calc_size(N, dimc + N)); md_clear(N, dim, optr, CFL_SIZE); md_calc_strides(2 * N, strc, dimc, CFL_SIZE); md_zadd2(2 * N, dimc, str2, optr, str2, optr, strc, iptr); }
void overlapandadd(int N, const long dims[N], const long blk[N], complex float* dst, complex float* src1, const long dim2[N], complex float* src2) { long ndims[2 * N]; long L[2 * N]; long ndim2[2 * N]; long ndim3[2 * N]; for (int i = 0; i < N; i++) { assert(0 == dims[i] % blk[i]); assert(dim2[i] <= blk[i]); ndims[i * 2 + 1] = dims[i] / blk[i]; ndims[i * 2 + 0] = blk[i]; L[i * 2 + 1] = dims[i] / blk[i]; L[i * 2 + 0] = blk[i] + dim2[i] - 1; ndim2[i * 2 + 1] = 1; ndim2[i * 2 + 0] = dim2[i]; ndim3[i * 2 + 1] = dims[i] / blk[i] + 1; ndim3[i * 2 + 0] = blk[i]; } long T = md_calc_size(2 * N, L); complex float* tmp = xmalloc(T * 8); // conv_causal_extend(2 * N, L, tmp, ndims, src1, ndim2, src2); conv(2 * N, ~0, CONV_EXTENDED, CONV_CAUSAL, L, tmp, ndims, src1, ndim2, src2); // [------++++|||||||| //long str1[2 * N]; long str2[2 * N]; long str3[2 * N]; //md_calc_strides(2 * N, str1, ndims, 8); md_calc_strides(2 * N, str2, L, 8); md_calc_strides(2 * N, str3, ndim3, 8); md_clear(2 * N, ndim3, dst, CFL_SIZE); md_zadd2(2 * N, L, str3, dst, str3, dst, str2, tmp); free(tmp); }
/* * Implements cumulative sum operator (order 1 for now) * using circular shift: cumsum(x) = x + circshift(x,1) + circshift(x,2) + ... * * optr = cumsum(iptr) */ static void md_zcumsum_core2(unsigned int D, const long dims[D], unsigned int flags, complex float* tmp, complex float* tmp2, const long ostrs[D], complex float* optr, const long istrs[D], const complex float* iptr) { //out = dx md_copy2(D, dims, ostrs, optr, istrs, iptr, sizeof(complex float)); md_copy2(D, dims, istrs, tmp, istrs, iptr, sizeof(complex float)); long zdims[D]; long center[D]; md_select_dims(D, ~0, zdims, dims); memset(center, 0, D * sizeof(long)); for (unsigned int i=0; i < D; i++) { if (MD_IS_SET(flags, i)) { for (int d=1; d < dims[i]; d++) { // tmp = circshift(tmp, i) center[i] = d; md_circ_shift2(D, dims, center, istrs, tmp2, istrs, tmp, sizeof(complex float)); zdims[i] = d; // tmp(1:d,:) = 0 md_clear2(D, zdims, istrs, tmp2, sizeof(complex float)); //md_zsmul2(D, zdims, istrs, tmp2, istrs, tmp2, 0.); //dump_cfl("tmp2", D, dims, tmp2); // out = out + tmp md_zadd2(D, dims, ostrs, optr, istrs, tmp2, ostrs, optr); //md_copy2(D, dims, ostrs, tmp, ostrs, optr, sizeof(complex float)); } md_copy2(D, dims, ostrs, tmp, ostrs, optr, sizeof(complex float)); center[i] = 0; zdims[i] = dims[i]; } } }
void overlapandsave2HB(const struct vec_ops* ops, int N, unsigned int flags, const long blk[N], const long dims1[N], complex float* dst, const long odims[N], const complex float* src1, const long dims2[N], const complex float* src2, const long mdims[N], const complex float* msk) { long dims1B[N]; long tdims[2 * N]; long nodims[2 * N]; long ndims2[2 * N]; long nmdims[2 * N]; int e = N; for (int i = 0; i < N; i++) { if (MD_IS_SET(flags, i)) { assert(1 == dims2[i] % 2); assert(0 == blk[i] % 2); assert(0 == dims1[i] % 2); assert(0 == odims[i] % blk[i]); assert(0 == dims1[i] % blk[i]); assert(dims1[i] == odims[i]); assert(dims2[i] <= blk[i]); assert(dims1[i] >= dims2[i]); assert((1 == mdims[i]) || (mdims[i] == dims1[i])); // blocked output nodims[e] = odims[i] / blk[i]; nodims[i] = blk[i]; // expanded temporary storage tdims[e] = dims1[i] / blk[i]; tdims[i] = blk[i] + dims2[i] - 1; // blocked input // ---|---,---,---|--- // + +++ + // + +++ + if (1 == mdims[i]) { nmdims[2 * i + 1] = 1; nmdims[2 * i + 1] = 1; } else { nmdims[2 * i + 1] = mdims[i] / blk[i]; nmdims[2 * i + 0] = blk[i]; } // resized input // minimal padding dims1B[i] = dims1[i] + (dims2[i] - 1); // kernel ndims2[e] = 1; ndims2[i] = dims2[i]; e++; } else { nodims[i] = odims[i]; tdims[i] = dims1[i]; nmdims[2 * i + 1] = 1; nmdims[2 * i + 0] = mdims[i]; dims1B[i] = dims1[i]; ndims2[i] = dims2[i]; } } int NE = e; // long S = md_calc_size(N, dims1B, 1); long str1[NE]; long str1B[N]; md_calc_strides(N, str1B, dims1B, sizeof(complex float)); e = N; for (int i = 0; i < N; i++) { str1[i] = str1B[i]; if (MD_IS_SET(flags, i)) str1[e++] = str1B[i] * blk[i]; } assert(NE == e); long str2[NE]; md_calc_strides(NE, str2, tdims, sizeof(complex float)); long ostr[NE]; long mstr[NE]; long mstrB[2 * N]; md_calc_strides(NE, ostr, nodims, sizeof(complex float)); md_calc_strides(2 * N, mstrB, nmdims, sizeof(complex float)); e = N; for (int i = 0; i < N; i++) { mstr[i] = mstrB[2 * i + 0]; if (MD_IS_SET(flags, i)) mstr[e++] = mstrB[2 * i + 1]; } assert(NE == e); // we can loop here assert(NE == N + 3); assert(1 == ndims2[N + 0]); assert(1 == ndims2[N + 1]); assert(1 == ndims2[N + 2]); assert(tdims[N + 0] == nodims[N + 0]); assert(tdims[N + 1] == nodims[N + 1]); assert(tdims[N + 2] == nodims[N + 2]); long R = md_calc_size(N, nodims); long T = md_calc_size(N, tdims); //complex float* src1C = xmalloc(S * sizeof(complex float)); complex float* src1C = dst; md_clear(N, dims1B, src1C, CFL_SIZE); // must be done here #pragma omp parallel for collapse(3) for (int k = 0; k < nodims[N + 2]; k++) { for (int j = 0; j < nodims[N + 1]; j++) { for (int i = 0; i < nodims[N + 0]; i++) { complex float* tmp = (complex float*)ops->allocate(2 * T); complex float* tmpX = (complex float*)ops->allocate(2 * R); long off1 = str1[N + 0] * i + str1[N + 1] * j + str1[N + 2] * k; long off2 = mstr[N + 0] * i + mstr[N + 1] * j + mstr[N + 2] * k; long off3 = ostr[N + 0] * i + ostr[N + 1] * j + ostr[N + 2] * k; md_zmul2(N, nodims, ostr, tmpX, ostr, ((const void*)src1) + off3, mstr, ((const void*)msk) + off2); convH(N, flags, CONV_VALID, CONV_SYMMETRIC, tdims, tmp, nodims, tmpX, ndims2, src2); #pragma omp critical md_zadd2(N, tdims, str1, ((void*)src1C) + off1, str1, ((void*)src1C) + off1, str2, tmp); ops->del((void*)tmpX); ops->del((void*)tmp); }}} }
void overlapandsave2NE(int N, unsigned int flags, const long blk[N], const long odims[N], complex float* dst, const long dims1[N], complex float* src1, const long dims2[N], complex float* src2, const long mdims[N], complex float* msk) { long dims1B[N]; long tdims[2 * N]; long nodims[2 * N]; long ndims1[2 * N]; long ndims2[2 * N]; long shift[2 * N]; unsigned int nflags = 0; for (int i = 0; i < N; i++) { if (MD_IS_SET(flags, i)) { nflags = MD_SET(nflags, 2 * i); assert(1 == dims2[i] % 2); assert(0 == blk[i] % 2); assert(0 == dims1[i] % 2); assert(0 == odims[i] % blk[i]); assert(0 == dims1[i] % blk[i]); assert(dims1[i] == odims[i]); assert(dims2[i] <= blk[i]); assert(dims1[i] >= dims2[i]); // blocked output nodims[i * 2 + 1] = odims[i] / blk[i]; nodims[i * 2 + 0] = blk[i]; // expanded temporary storage tdims[i * 2 + 1] = dims1[i] / blk[i]; tdims[i * 2 + 0] = blk[i] + dims2[i] - 1; // blocked input // ---|---,---,---|--- // + +++ + // + +++ + // resized input dims1B[i] = dims1[i] + 2 * blk[i]; ndims1[i * 2 + 1] = dims1[i] / blk[i] + 2; // do we need two full blocks? ndims1[i * 2 + 0] = blk[i]; shift[i * 2 + 1] = 0; shift[i * 2 + 0] = blk[i] - (dims2[i] - 1) / 2; // kernel ndims2[i * 2 + 1] = 1; ndims2[i * 2 + 0] = dims2[i]; } else { nodims[i * 2 + 1] = 1; nodims[i * 2 + 0] = odims[i]; tdims[i * 2 + 1] = 1; tdims[i * 2 + 0] = dims1[i]; ndims1[i * 2 + 1] = 1; ndims1[i * 2 + 0] = dims1[i]; shift[i * 2 + 1] = 0; shift[i * 2 + 0] = 0; dims1B[i] = dims1[i]; ndims2[i * 2 + 1] = 1; ndims2[i * 2 + 0] = dims2[i]; } } complex float* src1B = md_alloc(N, dims1B, CFL_SIZE); complex float* tmp = md_alloc(2 * N, tdims, CFL_SIZE); complex float* tmpX = md_alloc(N, odims, CFL_SIZE); long str1[2 * N]; long str2[2 * N]; md_calc_strides(2 * N, str1, ndims1, sizeof(complex float)); md_calc_strides(2 * N, str2, tdims, sizeof(complex float)); long off = md_calc_offset(2 * N, str1, shift); md_resize_center(N, dims1B, src1B, dims1, src1, sizeof(complex float)); // we can loop here md_copy2(2 * N, tdims, str2, tmp, str1, ((void*)src1B) + off, sizeof(complex float)); conv(2 * N, nflags, CONV_VALID, CONV_SYMMETRIC, nodims, tmpX, tdims, tmp, ndims2, src2); long ostr[N]; long mstr[N]; md_calc_strides(N, ostr, odims, sizeof(complex float)); md_calc_strides(N, mstr, mdims, sizeof(complex float)); md_zmul2(N, odims, ostr, tmpX, ostr, tmpX, mstr, msk); convH(2 * N, nflags, CONV_VALID, CONV_SYMMETRIC, tdims, tmp, nodims, tmpX, ndims2, src2); md_clear(N, dims1B, src1B, sizeof(complex float)); md_zadd2(2 * N, tdims, str1, ((void*)src1B) + off, str1, ((void*)src1B) + off, str2, tmp); // md_resize_center(N, dims1, dst, dims1B, src1B, sizeof(complex float)); md_free(src1B); md_free(tmpX); md_free(tmp); }