Beispiel #1
0
void parslices2(grecon_fun_t grecon, void* param, const long dims[DIMS],
	const long img_strs[DIMS], complex float* image,
	const long sens_dims[DIMS], const complex float* sens_maps,
	const long pat_dims[DIMS], const complex float* pattern,
	const long ksp_strs[DIMS], const complex float* kspace_data,	
	bool output_ksp, bool gpu)
{ 
	// dimensions and strides

	int N = DIMS;

	long ksp_dims[N];
	long img_dims[N];

	long strs[N];
	long sens_strs[N];
	long pat_strs[N];


	md_calc_strides(N, strs, dims, CFL_SIZE);
	md_calc_strides(N, sens_strs, sens_dims, CFL_SIZE);
	
	md_select_dims(N, output_ksp ? ~MAPS_FLAG : ~COIL_FLAG, img_dims, dims);
	md_select_dims(N, ~MAPS_FLAG, ksp_dims, dims);

	if (NULL != pattern)
		md_calc_strides(N, pat_strs, pat_dims, CFL_SIZE);

	// dimensions and strides for one slice

	long dims1[N];
	long ksp1_dims[N];
	long img1_dims[N];
	long sens1_dims[N];
	long pat1_dims[N];

	long ksp1_strs[N];
	long img1_strs[N];
	long strs1[N];
	long sens1_strs[N];
	long pat1_strs[N];

	md_select_dims(N, ~READ_FLAG, dims1, dims);
	md_calc_strides(N, strs1, dims1, CFL_SIZE);

	md_select_dims(N, ~READ_FLAG, sens1_dims, sens_dims);
	md_calc_strides(N, sens1_strs, sens1_dims, CFL_SIZE);

	md_select_dims(N, ~READ_FLAG, ksp1_dims, ksp_dims);
	md_calc_strides(N, ksp1_strs, ksp1_dims, CFL_SIZE);

	md_select_dims(N, ~READ_FLAG, img1_dims, img_dims);
	md_calc_strides(N, img1_strs, img1_dims, CFL_SIZE);

	if (NULL != pattern) {

		md_select_dims(N, ~READ_FLAG, pat1_dims, pat_dims);
		md_calc_strides(N, pat1_strs, pat1_dims, CFL_SIZE);
	}

	

//	estimate_pattern(ksp1_dims, 3, pattern, kspace_data + (ksp_dims[0] / 2) * ksp_strs[0]);	// extract pattern form center of readout

	bool ap_save = num_auto_parallelize;
	num_auto_parallelize = false;

	if (gpu) {

#ifdef USE_CUDA
		int nr_cuda_devices = cuda_devices();
		omp_set_num_threads(nr_cuda_devices * 2);
//		fft_set_num_threads(1);
#else
		assert(0);
#endif		

	} else {

		fft_set_num_threads(1);
	}

	int counter = 0;

	#pragma omp parallel for
	for (int i = 0; i < ksp_dims[READ_DIM]; i++) {

		complex float* image1 = md_alloc(N, img1_dims, CFL_SIZE);
		md_clear(N, img1_dims, image1, CFL_SIZE);

		complex float* kspace1 = md_alloc(N, ksp1_dims, CFL_SIZE);
		md_copy2(N, ksp1_dims, ksp1_strs, kspace1, ksp_strs, ((char*)kspace_data) + i * ksp_strs[0], CFL_SIZE);

		complex float* cov1 = md_alloc(N, sens1_dims, CFL_SIZE);
		md_copy2(N, sens1_dims, sens1_strs, cov1, sens_strs, ((char*)sens_maps) + i * sens_strs[0], CFL_SIZE);

		complex float* pattern1 = NULL;

		if (NULL != pattern) {

			pattern1 = md_alloc(N, pat1_dims, CFL_SIZE);
			md_copy2(N, pat1_dims, pat1_strs, pattern1, pat_strs, ((char*)pattern) + i * pat_strs[0], CFL_SIZE);
		}

//		if (i == 96)
		grecon(param, dims1, image1, sens1_dims, cov1, pat1_dims, pattern1, kspace1, gpu);

		md_copy2(N, img1_dims, img_strs, ((char*)image) + i * img_strs[0], img1_strs, image1, CFL_SIZE);

		if (NULL != pattern)
			md_free((void*)pattern1);

		md_free(image1);
		md_free(kspace1);
		md_free(cov1);

		#pragma omp critical
                { debug_printf(DP_DEBUG2, "%04d/%04ld    \r", ++counter, ksp_dims[0]); }
	}

	num_auto_parallelize = ap_save;

	debug_printf(DP_DEBUG2, "\n");
}
void overlapandsave2NEB(const struct vec_ops* ops, int N, unsigned int flags, const long blk[N], const long odims[N], complex float* dst, const long dims1[N], const complex float* src1, const long dims2[N], const complex float* src2, const long mdims[N], const complex float* msk)
{
    long dims1B[N];

    long tdims[2 * N];
    long nodims[2 * N];
    long ndims2[2 * N];
    long nmdims[2 * N];


    int e = N;

    for (int i = 0; i < N; i++) {

        if (MD_IS_SET(flags, i)) {

            assert(1 == dims2[i] % 2);
            assert(0 == blk[i] % 2);
            assert(0 == dims1[i] % 2);
            assert(0 == odims[i] % blk[i]);
            assert(0 == dims1[i] % blk[i]);
            assert(dims1[i] == odims[i]);
            assert(dims2[i] <= blk[i]);
            assert(dims1[i] >= dims2[i]);
            assert((1 == mdims[i]) || (mdims[i] == dims1[i]));

            // blocked output

            nodims[e] = odims[i] / blk[i];
            nodims[i] = blk[i];

            // expanded temporary storage

            tdims[e] = dims1[i] / blk[i];
            tdims[i] = blk[i] + dims2[i] - 1;

            // blocked input

            // ---|---,---,---|---
            //   + +++ +
            //       + +++ +

            if (1 == mdims[i]) {

                nmdims[2 * i + 1] = 1;
                nmdims[2 * i + 1] = 1;

            } else {

                nmdims[2 * i + 1] = mdims[i] / blk[i];
                nmdims[2 * i + 0] = blk[i];
            }

            // resized input
            // minimal padding
            dims1B[i] = dims1[i] + (dims2[i] - 1);

            // kernel

            ndims2[e] = 1;
            ndims2[i] = dims2[i];

            e++;

        } else {

            nodims[i] = odims[i];
            tdims[i] = dims1[i];
            nmdims[2 * i + 1] = 1;
            nmdims[2 * i + 0] = mdims[i];

            dims1B[i] = dims1[i];
            ndims2[i] = dims2[i];
        }
    }

    int NE = e;

    //long S = md_calc_size(N, dims1B, 1);

    long str1[NE];

    long str1B[N];
    md_calc_strides(N, str1B, dims1B, sizeof(complex float));

    e = N;
    for (int i = 0; i < N; i++) {

        str1[i] = str1B[i];

        if (MD_IS_SET(flags, i))
            str1[e++] = str1B[i] * blk[i];
    }
    assert(NE == e);


    long str2[NE];
    md_calc_strides(NE, str2, tdims, sizeof(complex float));


    long ostr[NE];
    long mstr[NE];
    long mstrB[2 * N];

    md_calc_strides(NE, ostr, nodims, sizeof(complex float));
    md_calc_strides(2 * N, mstrB, nmdims, sizeof(complex float));

    e = N;
    for (int i = 0; i < N; i++) {

        mstr[i] = mstrB[2 * i + 0];

        if (MD_IS_SET(flags, i))
            mstr[e++] = mstrB[2 * i + 1];
    }
    assert(NE == e);


    const complex float* src1B = src1;//!
    //complex float* src1B = xmalloc(S * sizeof(complex float));
    //md_resizec(N, dims1B, src1B, dims1, src1, sizeof(complex float));

    // we can loop here
    assert(NE == N + 3);
    assert(1 == ndims2[N + 0]);
    assert(1 == ndims2[N + 1]);
    assert(1 == ndims2[N + 2]);
    assert(tdims[N + 0] == nodims[N + 0]);
    assert(tdims[N + 1] == nodims[N + 1]);
    assert(tdims[N + 2] == nodims[N + 2]);

    long R = md_calc_size(N, nodims);
    long T = md_calc_size(N, tdims);


    //complex float* src1C = xmalloc(S * sizeof(complex float));
    complex float* src1C = dst;

    md_clear(N, dims1B, src1C, sizeof(complex float));	// must be done here

    #pragma omp parallel for collapse(3)
    for (int k = 0; k < nodims[N + 2]; k++) {
        for (int j = 0; j < nodims[N + 1]; j++) {
            for (int i = 0; i < nodims[N + 0]; i++) {

                complex float* tmp = (complex float*)ops->allocate(2 * T);
                complex float* tmpX = (complex float*)ops->allocate(2 * R);

                long off1 = str1[N + 0] * i + str1[N + 1] * j + str1[N + 2] * k;
                long off2 = mstr[N + 0] * i + mstr[N + 1] * j + mstr[N + 2] * k;

                md_copy2(N, tdims, str2, tmp, str1, ((const void*)src1B) + off1, sizeof(complex float));
                conv(N, flags, CONV_VALID, CONV_SYMMETRIC, nodims, tmpX, tdims, tmp, ndims2, src2);
                md_zmul2(N, nodims, ostr, tmpX, ostr, tmpX, mstr, ((const void*)msk) + off2);
                convH(N, flags, CONV_VALID, CONV_SYMMETRIC, tdims, tmp, nodims, tmpX, ndims2, src2);

                #pragma omp critical
                md_zadd2(N, tdims, str1, ((void*)src1C) + off1, str1, ((void*)src1C) + off1, str2,  tmp);

                ops->del((void*)tmpX);
                ops->del((void*)tmp);
            }
        }
    }

    //md_resizec(N, dims1, dst, dims1B, src1C, sizeof(complex float));
    //free(src1C);
    //free(src1B);
}
void overlapandsave2(int N, unsigned int flags, const long blk[N], const long odims[N], complex float* dst, const long dims1[N], const complex float* src1, const long dims2[N], const complex float* src2)
{
    long dims1B[N];

    long tdims[2 * N];
    long nodims[2 * N];
    long ndims1[2 * N];
    long ndims2[2 * N];

    long shift[2 * N];

    unsigned int nflags = 0;

    for (int i = 0; i < N; i++) {

        if (MD_IS_SET(flags, i)) {

            nflags = MD_SET(nflags, 2 * i);

            assert(1 == dims2[i] % 2);
            assert(0 == blk[i] % 2);
            assert(0 == dims1[i] % 2);
            assert(0 == odims[i] % blk[i]);
            assert(0 == dims1[i] % blk[i]);
            assert(dims1[i] == odims[i]);
            assert(dims2[i] <= blk[i]);
            assert(dims1[i] >= dims2[i]);

            // blocked output

            nodims[i * 2 + 1] = odims[i] / blk[i];
            nodims[i * 2 + 0] = blk[i];

            // expanded temporary storage

            tdims[i * 2 + 1] = dims1[i] / blk[i];
            tdims[i * 2 + 0] = blk[i] + dims2[i] - 1;

            // blocked input

            // ---|---,---,---|---
            //   + +++ +
            //       + +++ +

            // resized input

            dims1B[i] = dims1[i] + 2 * blk[i];

            ndims1[i * 2 + 1] = dims1[i] / blk[i] + 2; // do we need two full blocks?
            ndims1[i * 2 + 0] = blk[i];

            shift[i * 2 + 1] = 0;
            shift[i * 2 + 0] = blk[i] - (dims2[i] - 1) / 2;

            // kernel

            ndims2[i * 2 + 1] = 1;
            ndims2[i * 2 + 0] = dims2[i];

        } else {

            nodims[i * 2 + 1] = 1;
            nodims[i * 2 + 0] = odims[i];

            tdims[i * 2 + 1] = 1;
            tdims[i * 2 + 0] = dims1[i];

            ndims1[i * 2 + 1] = 1;
            ndims1[i * 2 + 0] = dims1[i];

            shift[i * 2 + 1] = 0;
            shift[i * 2 + 0] = 0;


            dims1B[i] = dims1[i];

            ndims2[i * 2 + 1] = 1;
            ndims2[i * 2 + 0] = dims2[i];
        }
    }

    complex float* src1B = md_alloc(N, dims1B, CFL_SIZE);

    md_resize_center(N, dims1B, src1B, dims1, src1, CFL_SIZE);


    complex float* tmp = md_alloc(2 * N, tdims, CFL_SIZE);

    long str1[2 * N];
    long str2[2 * N];

    md_calc_strides(2 * N, str1, ndims1, CFL_SIZE);
    md_calc_strides(2 * N, str2, tdims, CFL_SIZE);

    long off = md_calc_offset(2 * N, str1, shift);
    md_copy2(2 * N, tdims, str2, tmp, str1, ((void*)src1B) + off, CFL_SIZE);

    md_free(src1B);

    conv(2 * N, nflags, CONV_VALID, CONV_SYMMETRIC, nodims, dst, tdims, tmp, ndims2, src2);

    md_free(tmp);
}
void overlapandsave2NE(int N, unsigned int flags, const long blk[N], const long odims[N], complex float* dst, const long dims1[N], complex float* src1, const long dims2[N], complex float* src2, const long mdims[N], complex float* msk)
{
    long dims1B[N];

    long tdims[2 * N];
    long nodims[2 * N];
    long ndims1[2 * N];
    long ndims2[2 * N];

    long shift[2 * N];

    unsigned int nflags = 0;

    for (int i = 0; i < N; i++) {

        if (MD_IS_SET(flags, i)) {

            nflags = MD_SET(nflags, 2 * i);

            assert(1 == dims2[i] % 2);
            assert(0 == blk[i] % 2);
            assert(0 == dims1[i] % 2);
            assert(0 == odims[i] % blk[i]);
            assert(0 == dims1[i] % blk[i]);
            assert(dims1[i] == odims[i]);
            assert(dims2[i] <= blk[i]);
            assert(dims1[i] >= dims2[i]);

            // blocked output

            nodims[i * 2 + 1] = odims[i] / blk[i];
            nodims[i * 2 + 0] = blk[i];

            // expanded temporary storage

            tdims[i * 2 + 1] = dims1[i] / blk[i];
            tdims[i * 2 + 0] = blk[i] + dims2[i] - 1;

            // blocked input

            // ---|---,---,---|---
            //   + +++ +
            //       + +++ +

            // resized input

            dims1B[i] = dims1[i] + 2 * blk[i];

            ndims1[i * 2 + 1] = dims1[i] / blk[i] + 2; // do we need two full blocks?
            ndims1[i * 2 + 0] = blk[i];

            shift[i * 2 + 1] = 0;
            shift[i * 2 + 0] = blk[i] - (dims2[i] - 1) / 2;

            // kernel

            ndims2[i * 2 + 1] = 1;
            ndims2[i * 2 + 0] = dims2[i];

        } else {

            nodims[i * 2 + 1] = 1;
            nodims[i * 2 + 0] = odims[i];

            tdims[i * 2 + 1] = 1;
            tdims[i * 2 + 0] = dims1[i];

            ndims1[i * 2 + 1] = 1;
            ndims1[i * 2 + 0] = dims1[i];

            shift[i * 2 + 1] = 0;
            shift[i * 2 + 0] = 0;


            dims1B[i] = dims1[i];

            ndims2[i * 2 + 1] = 1;
            ndims2[i * 2 + 0] = dims2[i];
        }
    }

    long R = md_calc_size(N, odims);
    long T = md_calc_size(2 * N, tdims);
    long S = md_calc_size(N, dims1B);

    complex float* src1B = xmalloc(S * sizeof(complex float));
    complex float* tmp = xmalloc(T * sizeof(complex float));
    complex float* tmpX = xmalloc(R * sizeof(complex float));

    long str1[2 * N];
    long str2[2 * N];

    md_calc_strides(2 * N, str1, ndims1, sizeof(complex float));
    md_calc_strides(2 * N, str2, tdims, sizeof(complex float));

    long off = md_calc_offset(2 * N, str1, shift);

    md_resize_center(N, dims1B, src1B, dims1, src1, sizeof(complex float));

    // we can loop here

    md_copy2(2 * N, tdims, str2, tmp, str1, ((void*)src1B) + off, sizeof(complex float));

    conv(2 * N, nflags, CONV_VALID, CONV_SYMMETRIC, nodims, tmpX, tdims, tmp, ndims2, src2);

    long ostr[N];
    long mstr[N];

    md_calc_strides(N, ostr, odims, sizeof(complex float));
    md_calc_strides(N, mstr, mdims, sizeof(complex float));

    md_zmul2(N, odims, ostr, tmpX, ostr, tmpX, mstr, msk);

    convH(2 * N, nflags, CONV_VALID, CONV_SYMMETRIC, tdims, tmp, nodims, tmpX, ndims2, src2);

    md_clear(N, dims1B, src1B, sizeof(complex float));
    md_zadd2(2 * N, tdims, str1, ((void*)src1B) + off, str1, ((void*)src1B) + off, str2, tmp);

    //

    md_resize_center(N, dims1, dst, dims1B, src1B, sizeof(complex float));

    free(src1B);
    free(tmpX);
    free(tmp);
}
Beispiel #5
0
void iwt2(unsigned int N, unsigned int flags, const long shifts[N], const long odims[N], const long ostr[N], complex float* out, const long idims[N], const long istr[N], const complex float* in, const long minsize[N], const long flen, const float filter[2][2][flen])
{
	assert(wavelet_check_dims(N, flags, odims, minsize));

	if (0 == flags) {	// note: recursion does *not* end here

		assert(md_check_compat(N, 0u, odims, idims));

		md_copy2(N, idims, ostr, out, istr, in, CFL_SIZE);

		return;
	}

	// check input dimensions

	long idims2[N];
	wavelet_coeffs2(N, flags, idims2, odims, minsize, flen);

	assert(md_check_compat(N, 0u, idims2, idims));

	long wdims2[2 * N];
	wavelet_dims(N, flags, wdims2, odims, flen);

	// only consider transform dims...

	long dims1[N];
	md_select_dims(N, flags, dims1, odims);

	long wdims[2 * N];
	wavelet_dims(N, flags, wdims, dims1, flen);
	long level_coeffs = md_calc_size(2 * N, wdims);

	// ... which get embedded in dimension b

	unsigned int b = ffs(flags) - 1;

	long istr2[2 * N];
	md_calc_strides(2 * N, istr2, wdims, istr[b]);

	// merge with original strides

	for (unsigned int i = 0; i < N; i++)
		if (!MD_IS_SET(flags, i))
			istr2[i] = istr[i];

	assert(idims[b] >= level_coeffs);

	long offset = (idims[b] - level_coeffs) * (istr[b] / CFL_SIZE);

	long bands = md_calc_size(N, wdims + N);
	long coeffs = md_calc_size(N, wdims + 0);

	// subtract coefficients in high band

	idims2[b] -= (bands - 1) * coeffs;

	assert(idims2[b] > 0);

	debug_printf(DP_DEBUG4, "ifwt2: flags:%d lcoeffs:%ld coeffs:%ld (space:%ld) bands:%ld str:%ld off:%ld\n", flags, level_coeffs, coeffs, idims2[b], bands, istr[b], offset / ostr[b]);

	// fix me we need temp storage
	complex float* tmp = md_alloc_sameplace(2 * N, wdims2, CFL_SIZE, out);

	long tstr[2 * N];
	md_calc_strides(2 * N, tstr, wdims2, CFL_SIZE);

	md_copy2(2 * N, wdims2, tstr, tmp, istr2, in + offset, CFL_SIZE);

	long shifts0[N];
	for (unsigned int i = 0; i < N; i++)
		shifts0[i] = 0;

	unsigned int flags2 = wavelet_filter_flags(N, flags, wdims, minsize);
	assert((0 == offset) == (0u == flags2));

	if (0u != flags2) {

		long idims3[N];
		wavelet_coeffs2(N, flags2, idims3, wdims2, minsize, flen);

		long istr3[N];
		embed(N, flags, istr3, idims3, istr);

		iwt2(N, flags2, shifts0, wdims2, tstr, tmp, idims3, istr3, in, minsize, flen, filter);
	}

	iwtN(N, flags, shifts, odims, ostr, out, tstr, tmp, flen, filter);

	md_free(tmp);
}