void parslices2(grecon_fun_t grecon, void* param, const long dims[DIMS], const long img_strs[DIMS], complex float* image, const long sens_dims[DIMS], const complex float* sens_maps, const long pat_dims[DIMS], const complex float* pattern, const long ksp_strs[DIMS], const complex float* kspace_data, bool output_ksp, bool gpu) { // dimensions and strides int N = DIMS; long ksp_dims[N]; long img_dims[N]; long strs[N]; long sens_strs[N]; long pat_strs[N]; md_calc_strides(N, strs, dims, CFL_SIZE); md_calc_strides(N, sens_strs, sens_dims, CFL_SIZE); md_select_dims(N, output_ksp ? ~MAPS_FLAG : ~COIL_FLAG, img_dims, dims); md_select_dims(N, ~MAPS_FLAG, ksp_dims, dims); if (NULL != pattern) md_calc_strides(N, pat_strs, pat_dims, CFL_SIZE); // dimensions and strides for one slice long dims1[N]; long ksp1_dims[N]; long img1_dims[N]; long sens1_dims[N]; long pat1_dims[N]; long ksp1_strs[N]; long img1_strs[N]; long strs1[N]; long sens1_strs[N]; long pat1_strs[N]; md_select_dims(N, ~READ_FLAG, dims1, dims); md_calc_strides(N, strs1, dims1, CFL_SIZE); md_select_dims(N, ~READ_FLAG, sens1_dims, sens_dims); md_calc_strides(N, sens1_strs, sens1_dims, CFL_SIZE); md_select_dims(N, ~READ_FLAG, ksp1_dims, ksp_dims); md_calc_strides(N, ksp1_strs, ksp1_dims, CFL_SIZE); md_select_dims(N, ~READ_FLAG, img1_dims, img_dims); md_calc_strides(N, img1_strs, img1_dims, CFL_SIZE); if (NULL != pattern) { md_select_dims(N, ~READ_FLAG, pat1_dims, pat_dims); md_calc_strides(N, pat1_strs, pat1_dims, CFL_SIZE); } // estimate_pattern(ksp1_dims, 3, pattern, kspace_data + (ksp_dims[0] / 2) * ksp_strs[0]); // extract pattern form center of readout bool ap_save = num_auto_parallelize; num_auto_parallelize = false; if (gpu) { #ifdef USE_CUDA int nr_cuda_devices = cuda_devices(); omp_set_num_threads(nr_cuda_devices * 2); // fft_set_num_threads(1); #else assert(0); #endif } else { fft_set_num_threads(1); } int counter = 0; #pragma omp parallel for for (int i = 0; i < ksp_dims[READ_DIM]; i++) { complex float* image1 = md_alloc(N, img1_dims, CFL_SIZE); md_clear(N, img1_dims, image1, CFL_SIZE); complex float* kspace1 = md_alloc(N, ksp1_dims, CFL_SIZE); md_copy2(N, ksp1_dims, ksp1_strs, kspace1, ksp_strs, ((char*)kspace_data) + i * ksp_strs[0], CFL_SIZE); complex float* cov1 = md_alloc(N, sens1_dims, CFL_SIZE); md_copy2(N, sens1_dims, sens1_strs, cov1, sens_strs, ((char*)sens_maps) + i * sens_strs[0], CFL_SIZE); complex float* pattern1 = NULL; if (NULL != pattern) { pattern1 = md_alloc(N, pat1_dims, CFL_SIZE); md_copy2(N, pat1_dims, pat1_strs, pattern1, pat_strs, ((char*)pattern) + i * pat_strs[0], CFL_SIZE); } // if (i == 96) grecon(param, dims1, image1, sens1_dims, cov1, pat1_dims, pattern1, kspace1, gpu); md_copy2(N, img1_dims, img_strs, ((char*)image) + i * img_strs[0], img1_strs, image1, CFL_SIZE); if (NULL != pattern) md_free((void*)pattern1); md_free(image1); md_free(kspace1); md_free(cov1); #pragma omp critical { debug_printf(DP_DEBUG2, "%04d/%04ld \r", ++counter, ksp_dims[0]); } } num_auto_parallelize = ap_save; debug_printf(DP_DEBUG2, "\n"); }
void overlapandsave2NEB(const struct vec_ops* ops, int N, unsigned int flags, const long blk[N], const long odims[N], complex float* dst, const long dims1[N], const complex float* src1, const long dims2[N], const complex float* src2, const long mdims[N], const complex float* msk) { long dims1B[N]; long tdims[2 * N]; long nodims[2 * N]; long ndims2[2 * N]; long nmdims[2 * N]; int e = N; for (int i = 0; i < N; i++) { if (MD_IS_SET(flags, i)) { assert(1 == dims2[i] % 2); assert(0 == blk[i] % 2); assert(0 == dims1[i] % 2); assert(0 == odims[i] % blk[i]); assert(0 == dims1[i] % blk[i]); assert(dims1[i] == odims[i]); assert(dims2[i] <= blk[i]); assert(dims1[i] >= dims2[i]); assert((1 == mdims[i]) || (mdims[i] == dims1[i])); // blocked output nodims[e] = odims[i] / blk[i]; nodims[i] = blk[i]; // expanded temporary storage tdims[e] = dims1[i] / blk[i]; tdims[i] = blk[i] + dims2[i] - 1; // blocked input // ---|---,---,---|--- // + +++ + // + +++ + if (1 == mdims[i]) { nmdims[2 * i + 1] = 1; nmdims[2 * i + 1] = 1; } else { nmdims[2 * i + 1] = mdims[i] / blk[i]; nmdims[2 * i + 0] = blk[i]; } // resized input // minimal padding dims1B[i] = dims1[i] + (dims2[i] - 1); // kernel ndims2[e] = 1; ndims2[i] = dims2[i]; e++; } else { nodims[i] = odims[i]; tdims[i] = dims1[i]; nmdims[2 * i + 1] = 1; nmdims[2 * i + 0] = mdims[i]; dims1B[i] = dims1[i]; ndims2[i] = dims2[i]; } } int NE = e; //long S = md_calc_size(N, dims1B, 1); long str1[NE]; long str1B[N]; md_calc_strides(N, str1B, dims1B, sizeof(complex float)); e = N; for (int i = 0; i < N; i++) { str1[i] = str1B[i]; if (MD_IS_SET(flags, i)) str1[e++] = str1B[i] * blk[i]; } assert(NE == e); long str2[NE]; md_calc_strides(NE, str2, tdims, sizeof(complex float)); long ostr[NE]; long mstr[NE]; long mstrB[2 * N]; md_calc_strides(NE, ostr, nodims, sizeof(complex float)); md_calc_strides(2 * N, mstrB, nmdims, sizeof(complex float)); e = N; for (int i = 0; i < N; i++) { mstr[i] = mstrB[2 * i + 0]; if (MD_IS_SET(flags, i)) mstr[e++] = mstrB[2 * i + 1]; } assert(NE == e); const complex float* src1B = src1;//! //complex float* src1B = xmalloc(S * sizeof(complex float)); //md_resizec(N, dims1B, src1B, dims1, src1, sizeof(complex float)); // we can loop here assert(NE == N + 3); assert(1 == ndims2[N + 0]); assert(1 == ndims2[N + 1]); assert(1 == ndims2[N + 2]); assert(tdims[N + 0] == nodims[N + 0]); assert(tdims[N + 1] == nodims[N + 1]); assert(tdims[N + 2] == nodims[N + 2]); long R = md_calc_size(N, nodims); long T = md_calc_size(N, tdims); //complex float* src1C = xmalloc(S * sizeof(complex float)); complex float* src1C = dst; md_clear(N, dims1B, src1C, sizeof(complex float)); // must be done here #pragma omp parallel for collapse(3) for (int k = 0; k < nodims[N + 2]; k++) { for (int j = 0; j < nodims[N + 1]; j++) { for (int i = 0; i < nodims[N + 0]; i++) { complex float* tmp = (complex float*)ops->allocate(2 * T); complex float* tmpX = (complex float*)ops->allocate(2 * R); long off1 = str1[N + 0] * i + str1[N + 1] * j + str1[N + 2] * k; long off2 = mstr[N + 0] * i + mstr[N + 1] * j + mstr[N + 2] * k; md_copy2(N, tdims, str2, tmp, str1, ((const void*)src1B) + off1, sizeof(complex float)); conv(N, flags, CONV_VALID, CONV_SYMMETRIC, nodims, tmpX, tdims, tmp, ndims2, src2); md_zmul2(N, nodims, ostr, tmpX, ostr, tmpX, mstr, ((const void*)msk) + off2); convH(N, flags, CONV_VALID, CONV_SYMMETRIC, tdims, tmp, nodims, tmpX, ndims2, src2); #pragma omp critical md_zadd2(N, tdims, str1, ((void*)src1C) + off1, str1, ((void*)src1C) + off1, str2, tmp); ops->del((void*)tmpX); ops->del((void*)tmp); } } } //md_resizec(N, dims1, dst, dims1B, src1C, sizeof(complex float)); //free(src1C); //free(src1B); }
void overlapandsave2(int N, unsigned int flags, const long blk[N], const long odims[N], complex float* dst, const long dims1[N], const complex float* src1, const long dims2[N], const complex float* src2) { long dims1B[N]; long tdims[2 * N]; long nodims[2 * N]; long ndims1[2 * N]; long ndims2[2 * N]; long shift[2 * N]; unsigned int nflags = 0; for (int i = 0; i < N; i++) { if (MD_IS_SET(flags, i)) { nflags = MD_SET(nflags, 2 * i); assert(1 == dims2[i] % 2); assert(0 == blk[i] % 2); assert(0 == dims1[i] % 2); assert(0 == odims[i] % blk[i]); assert(0 == dims1[i] % blk[i]); assert(dims1[i] == odims[i]); assert(dims2[i] <= blk[i]); assert(dims1[i] >= dims2[i]); // blocked output nodims[i * 2 + 1] = odims[i] / blk[i]; nodims[i * 2 + 0] = blk[i]; // expanded temporary storage tdims[i * 2 + 1] = dims1[i] / blk[i]; tdims[i * 2 + 0] = blk[i] + dims2[i] - 1; // blocked input // ---|---,---,---|--- // + +++ + // + +++ + // resized input dims1B[i] = dims1[i] + 2 * blk[i]; ndims1[i * 2 + 1] = dims1[i] / blk[i] + 2; // do we need two full blocks? ndims1[i * 2 + 0] = blk[i]; shift[i * 2 + 1] = 0; shift[i * 2 + 0] = blk[i] - (dims2[i] - 1) / 2; // kernel ndims2[i * 2 + 1] = 1; ndims2[i * 2 + 0] = dims2[i]; } else { nodims[i * 2 + 1] = 1; nodims[i * 2 + 0] = odims[i]; tdims[i * 2 + 1] = 1; tdims[i * 2 + 0] = dims1[i]; ndims1[i * 2 + 1] = 1; ndims1[i * 2 + 0] = dims1[i]; shift[i * 2 + 1] = 0; shift[i * 2 + 0] = 0; dims1B[i] = dims1[i]; ndims2[i * 2 + 1] = 1; ndims2[i * 2 + 0] = dims2[i]; } } complex float* src1B = md_alloc(N, dims1B, CFL_SIZE); md_resize_center(N, dims1B, src1B, dims1, src1, CFL_SIZE); complex float* tmp = md_alloc(2 * N, tdims, CFL_SIZE); long str1[2 * N]; long str2[2 * N]; md_calc_strides(2 * N, str1, ndims1, CFL_SIZE); md_calc_strides(2 * N, str2, tdims, CFL_SIZE); long off = md_calc_offset(2 * N, str1, shift); md_copy2(2 * N, tdims, str2, tmp, str1, ((void*)src1B) + off, CFL_SIZE); md_free(src1B); conv(2 * N, nflags, CONV_VALID, CONV_SYMMETRIC, nodims, dst, tdims, tmp, ndims2, src2); md_free(tmp); }
void overlapandsave2NE(int N, unsigned int flags, const long blk[N], const long odims[N], complex float* dst, const long dims1[N], complex float* src1, const long dims2[N], complex float* src2, const long mdims[N], complex float* msk) { long dims1B[N]; long tdims[2 * N]; long nodims[2 * N]; long ndims1[2 * N]; long ndims2[2 * N]; long shift[2 * N]; unsigned int nflags = 0; for (int i = 0; i < N; i++) { if (MD_IS_SET(flags, i)) { nflags = MD_SET(nflags, 2 * i); assert(1 == dims2[i] % 2); assert(0 == blk[i] % 2); assert(0 == dims1[i] % 2); assert(0 == odims[i] % blk[i]); assert(0 == dims1[i] % blk[i]); assert(dims1[i] == odims[i]); assert(dims2[i] <= blk[i]); assert(dims1[i] >= dims2[i]); // blocked output nodims[i * 2 + 1] = odims[i] / blk[i]; nodims[i * 2 + 0] = blk[i]; // expanded temporary storage tdims[i * 2 + 1] = dims1[i] / blk[i]; tdims[i * 2 + 0] = blk[i] + dims2[i] - 1; // blocked input // ---|---,---,---|--- // + +++ + // + +++ + // resized input dims1B[i] = dims1[i] + 2 * blk[i]; ndims1[i * 2 + 1] = dims1[i] / blk[i] + 2; // do we need two full blocks? ndims1[i * 2 + 0] = blk[i]; shift[i * 2 + 1] = 0; shift[i * 2 + 0] = blk[i] - (dims2[i] - 1) / 2; // kernel ndims2[i * 2 + 1] = 1; ndims2[i * 2 + 0] = dims2[i]; } else { nodims[i * 2 + 1] = 1; nodims[i * 2 + 0] = odims[i]; tdims[i * 2 + 1] = 1; tdims[i * 2 + 0] = dims1[i]; ndims1[i * 2 + 1] = 1; ndims1[i * 2 + 0] = dims1[i]; shift[i * 2 + 1] = 0; shift[i * 2 + 0] = 0; dims1B[i] = dims1[i]; ndims2[i * 2 + 1] = 1; ndims2[i * 2 + 0] = dims2[i]; } } long R = md_calc_size(N, odims); long T = md_calc_size(2 * N, tdims); long S = md_calc_size(N, dims1B); complex float* src1B = xmalloc(S * sizeof(complex float)); complex float* tmp = xmalloc(T * sizeof(complex float)); complex float* tmpX = xmalloc(R * sizeof(complex float)); long str1[2 * N]; long str2[2 * N]; md_calc_strides(2 * N, str1, ndims1, sizeof(complex float)); md_calc_strides(2 * N, str2, tdims, sizeof(complex float)); long off = md_calc_offset(2 * N, str1, shift); md_resize_center(N, dims1B, src1B, dims1, src1, sizeof(complex float)); // we can loop here md_copy2(2 * N, tdims, str2, tmp, str1, ((void*)src1B) + off, sizeof(complex float)); conv(2 * N, nflags, CONV_VALID, CONV_SYMMETRIC, nodims, tmpX, tdims, tmp, ndims2, src2); long ostr[N]; long mstr[N]; md_calc_strides(N, ostr, odims, sizeof(complex float)); md_calc_strides(N, mstr, mdims, sizeof(complex float)); md_zmul2(N, odims, ostr, tmpX, ostr, tmpX, mstr, msk); convH(2 * N, nflags, CONV_VALID, CONV_SYMMETRIC, tdims, tmp, nodims, tmpX, ndims2, src2); md_clear(N, dims1B, src1B, sizeof(complex float)); md_zadd2(2 * N, tdims, str1, ((void*)src1B) + off, str1, ((void*)src1B) + off, str2, tmp); // md_resize_center(N, dims1, dst, dims1B, src1B, sizeof(complex float)); free(src1B); free(tmpX); free(tmp); }
void iwt2(unsigned int N, unsigned int flags, const long shifts[N], const long odims[N], const long ostr[N], complex float* out, const long idims[N], const long istr[N], const complex float* in, const long minsize[N], const long flen, const float filter[2][2][flen]) { assert(wavelet_check_dims(N, flags, odims, minsize)); if (0 == flags) { // note: recursion does *not* end here assert(md_check_compat(N, 0u, odims, idims)); md_copy2(N, idims, ostr, out, istr, in, CFL_SIZE); return; } // check input dimensions long idims2[N]; wavelet_coeffs2(N, flags, idims2, odims, minsize, flen); assert(md_check_compat(N, 0u, idims2, idims)); long wdims2[2 * N]; wavelet_dims(N, flags, wdims2, odims, flen); // only consider transform dims... long dims1[N]; md_select_dims(N, flags, dims1, odims); long wdims[2 * N]; wavelet_dims(N, flags, wdims, dims1, flen); long level_coeffs = md_calc_size(2 * N, wdims); // ... which get embedded in dimension b unsigned int b = ffs(flags) - 1; long istr2[2 * N]; md_calc_strides(2 * N, istr2, wdims, istr[b]); // merge with original strides for (unsigned int i = 0; i < N; i++) if (!MD_IS_SET(flags, i)) istr2[i] = istr[i]; assert(idims[b] >= level_coeffs); long offset = (idims[b] - level_coeffs) * (istr[b] / CFL_SIZE); long bands = md_calc_size(N, wdims + N); long coeffs = md_calc_size(N, wdims + 0); // subtract coefficients in high band idims2[b] -= (bands - 1) * coeffs; assert(idims2[b] > 0); debug_printf(DP_DEBUG4, "ifwt2: flags:%d lcoeffs:%ld coeffs:%ld (space:%ld) bands:%ld str:%ld off:%ld\n", flags, level_coeffs, coeffs, idims2[b], bands, istr[b], offset / ostr[b]); // fix me we need temp storage complex float* tmp = md_alloc_sameplace(2 * N, wdims2, CFL_SIZE, out); long tstr[2 * N]; md_calc_strides(2 * N, tstr, wdims2, CFL_SIZE); md_copy2(2 * N, wdims2, tstr, tmp, istr2, in + offset, CFL_SIZE); long shifts0[N]; for (unsigned int i = 0; i < N; i++) shifts0[i] = 0; unsigned int flags2 = wavelet_filter_flags(N, flags, wdims, minsize); assert((0 == offset) == (0u == flags2)); if (0u != flags2) { long idims3[N]; wavelet_coeffs2(N, flags2, idims3, wdims2, minsize, flen); long istr3[N]; embed(N, flags, istr3, idims3, istr); iwt2(N, flags2, shifts0, wdims2, tstr, tmp, idims3, istr3, in, minsize, flen, filter); } iwtN(N, flags, shifts, odims, ostr, out, tstr, tmp, flen, filter); md_free(tmp); }