Пример #1
0
Файл: sum.c Проект: grlee77/bart
/**
 * 
 * x = (ATA + uI)^-1 b
 * 
 */
void sum_apply_pinverse(const void* _data, float rho, complex float* dst, const complex float* src)
{
	struct sum_data* data = (struct sum_data*) _data;

	if (NULL == data->tmp) {

#ifdef USE_CUDA
		data->tmp = (data->use_gpu ? md_alloc_gpu : md_alloc)(DIMS, data->img_dims, CFL_SIZE);
#else
		data->tmp = md_alloc(DIMS, data->img_dims, CFL_SIZE);
#endif
	}


	// get average
	md_clear( DIMS, data->img_dims, data->tmp, sizeof( complex float ) );

	md_zadd2( DIMS, data->imgd_dims, data->img_strs, data->tmp, data->img_strs, data->tmp , data->imgd_strs, src );
	md_zsmul( DIMS, data->img_dims, data->tmp, data->tmp, 1. / data->levels );


	// get non-average
	md_zsub2( DIMS, data->imgd_dims, data->imgd_strs, dst, data->imgd_strs, src, data->img_strs, data->tmp );

	// avg = avg / (1 + rho)
	md_zsmul( DIMS, data->img_dims, data->tmp, data->tmp, 1. / (1. + rho) );

	// nonavg = nonavg / rho
	md_zsmul( DIMS, data->imgd_dims, dst, dst, 1. / rho );

	// dst = avg + nonavg
	md_zadd2( DIMS, data->imgd_dims, data->imgd_strs, dst, data->imgd_strs, dst, data->img_strs, data->tmp );
}
Пример #2
0
void casorati_matrixH(unsigned int N, const long dimk[N], const long dim[N], const long str[N], complex float* optr, const long odim[2], const complex float* iptr)
{
	long str2[2 * N];
	long strc[2 * N];
	long dimc[2 * N];

	calc_casorati_geom(N, dimc, str2, dimk, dim, str);

	assert(odim[0] == md_calc_size(N, dimc));
	assert(odim[1] == md_calc_size(N, dimc + N));

	md_clear(N, dim, optr, CFL_SIZE);

	md_calc_strides(2 * N, strc, dimc, CFL_SIZE);
	md_zadd2(2 * N, dimc, str2, optr, str2, optr, strc, iptr);
}
Пример #3
0
void overlapandadd(int N, const long dims[N], const long blk[N], complex float* dst, complex float* src1, const long dim2[N], complex float* src2)
{
    long ndims[2 * N];
    long L[2 * N];
    long ndim2[2 * N];
    long ndim3[2 * N];

    for (int i = 0; i < N; i++) {

        assert(0 == dims[i] % blk[i]);
        assert(dim2[i] <= blk[i]);

        ndims[i * 2 + 1] = dims[i] / blk[i];
        ndims[i * 2 + 0] = blk[i];

        L[i * 2 + 1] = dims[i] / blk[i];
        L[i * 2 + 0] = blk[i] + dim2[i] - 1;

        ndim2[i * 2 + 1] = 1;
        ndim2[i * 2 + 0] = dim2[i];

        ndim3[i * 2 + 1] = dims[i] / blk[i] + 1;
        ndim3[i * 2 + 0] = blk[i];
    }

    long T = md_calc_size(2 * N, L);
    complex float* tmp = xmalloc(T * 8);

//	conv_causal_extend(2 * N, L, tmp, ndims, src1, ndim2, src2);
    conv(2 * N, ~0, CONV_EXTENDED, CONV_CAUSAL, L, tmp, ndims, src1, ndim2, src2);
    // [------++++||||||||

    //long str1[2 * N];
    long str2[2 * N];
    long str3[2 * N];

    //md_calc_strides(2 * N, str1, ndims, 8);
    md_calc_strides(2 * N, str2, L, 8);
    md_calc_strides(2 * N, str3, ndim3, 8);

    md_clear(2 * N, ndim3, dst, CFL_SIZE);
    md_zadd2(2 * N, L, str3, dst, str3, dst, str2, tmp);

    free(tmp);
}
Пример #4
0
/*
 * Implements cumulative sum operator (order 1 for now)
 * using circular shift: cumsum(x) = x + circshift(x,1) + circshift(x,2) + ...
 *
 * optr = cumsum(iptr)
 */
static void md_zcumsum_core2(unsigned int D, const long dims[D], unsigned int flags, complex float* tmp, complex float* tmp2, const long ostrs[D], complex float* optr, const long istrs[D], const complex float* iptr)
{
	//out = dx
	md_copy2(D, dims, ostrs, optr, istrs, iptr, sizeof(complex float));
	md_copy2(D, dims, istrs, tmp, istrs, iptr, sizeof(complex float));

	long zdims[D];
	long center[D];

	md_select_dims(D, ~0, zdims, dims);
	memset(center, 0, D * sizeof(long));

	for (unsigned int i=0; i < D; i++) {
		if (MD_IS_SET(flags, i)) {
			for (int d=1; d < dims[i]; d++) {

				// tmp = circshift(tmp, i)
				center[i] = d;
				md_circ_shift2(D, dims, center, istrs, tmp2, istrs, tmp, sizeof(complex float));
				zdims[i] = d;

				// tmp(1:d,:) = 0
				md_clear2(D, zdims, istrs, tmp2, sizeof(complex float));
				//md_zsmul2(D, zdims, istrs, tmp2, istrs, tmp2, 0.);
				//dump_cfl("tmp2", D, dims, tmp2);

				// out = out + tmp
				md_zadd2(D, dims, ostrs, optr, istrs, tmp2, ostrs, optr);
				//md_copy2(D, dims, ostrs, tmp, ostrs, optr, sizeof(complex float));

			}
			md_copy2(D, dims, ostrs, tmp, ostrs, optr, sizeof(complex float));

			center[i] = 0;
			zdims[i] = dims[i];
		}
	}
}
Пример #5
0
void overlapandsave2HB(const struct vec_ops* ops, int N, unsigned int flags, const long blk[N], const long dims1[N], complex float* dst, const long odims[N], const complex float* src1, const long dims2[N], const complex float* src2, const long mdims[N], const complex float* msk)
{
	long dims1B[N];

	long tdims[2 * N];
	long nodims[2 * N];
	long ndims2[2 * N];
	long nmdims[2 * N];


	int e = N;

	for (int i = 0; i < N; i++) {

		if (MD_IS_SET(flags, i)) {

			assert(1 == dims2[i] % 2);
			assert(0 == blk[i] % 2);
			assert(0 == dims1[i] % 2);
			assert(0 == odims[i] % blk[i]);
			assert(0 == dims1[i] % blk[i]);
			assert(dims1[i] == odims[i]);
			assert(dims2[i] <= blk[i]);
			assert(dims1[i] >= dims2[i]);
			assert((1 == mdims[i]) || (mdims[i] == dims1[i]));

			// blocked output

			nodims[e] = odims[i] / blk[i];
			nodims[i] = blk[i];

			// expanded temporary storage

			tdims[e] = dims1[i] / blk[i];
			tdims[i] = blk[i] + dims2[i] - 1;

			// blocked input

			// ---|---,---,---|---
			//   + +++ +
			//       + +++ +

			if (1 == mdims[i]) {

				nmdims[2 * i + 1] = 1;
				nmdims[2 * i + 1] = 1;

			} else {

				nmdims[2 * i + 1] = mdims[i] / blk[i];
				nmdims[2 * i + 0] = blk[i];
			}

			// resized input
			// minimal padding
			dims1B[i] = dims1[i] + (dims2[i] - 1);

			// kernel

			ndims2[e] = 1;
			ndims2[i] = dims2[i];

			e++;

		} else {

			nodims[i] = odims[i];
			tdims[i] = dims1[i];
			nmdims[2 * i + 1] = 1;
			nmdims[2 * i + 0] = mdims[i];

			dims1B[i] = dims1[i];
			ndims2[i] = dims2[i];
		}
	}

	int NE = e;

	// long S = md_calc_size(N, dims1B, 1);

	long str1[NE];

	long str1B[N];
	md_calc_strides(N, str1B, dims1B, sizeof(complex float));

	e = N;
	for (int i = 0; i < N; i++) {

		str1[i] = str1B[i];

		if (MD_IS_SET(flags, i))
			str1[e++] = str1B[i] * blk[i];
	}
	assert(NE == e);



	long str2[NE];
	md_calc_strides(NE, str2, tdims, sizeof(complex float));


	long ostr[NE];
	long mstr[NE];
	long mstrB[2 * N];

	md_calc_strides(NE, ostr, nodims, sizeof(complex float));
	md_calc_strides(2 * N, mstrB, nmdims, sizeof(complex float));

	e = N;
	for (int i = 0; i < N; i++) {

		mstr[i] = mstrB[2 * i + 0];

		if (MD_IS_SET(flags, i))
			mstr[e++] = mstrB[2 * i + 1];
	}
	assert(NE == e);
	
	// we can loop here
	assert(NE == N + 3);
	assert(1 == ndims2[N + 0]);
	assert(1 == ndims2[N + 1]);
	assert(1 == ndims2[N + 2]);
	assert(tdims[N + 0] == nodims[N + 0]);
	assert(tdims[N + 1] == nodims[N + 1]);
	assert(tdims[N + 2] == nodims[N + 2]);

	long R = md_calc_size(N, nodims);
	long T = md_calc_size(N, tdims);


	//complex float* src1C = xmalloc(S * sizeof(complex float));
	complex float* src1C = dst;

	md_clear(N, dims1B, src1C, CFL_SIZE);	// must be done here

	#pragma omp parallel for collapse(3)
	for (int k = 0; k < nodims[N + 2]; k++) {
	for (int j = 0; j < nodims[N + 1]; j++) {
	for (int i = 0; i < nodims[N + 0]; i++) {

		    complex float* tmp = (complex float*)ops->allocate(2 * T);
		    complex float* tmpX = (complex float*)ops->allocate(2 * R);

		    long off1 = str1[N + 0] * i + str1[N + 1] * j + str1[N + 2] * k;
		    long off2 = mstr[N + 0] * i + mstr[N + 1] * j + mstr[N + 2] * k;
		    long off3 = ostr[N + 0] * i + ostr[N + 1] * j + ostr[N + 2] * k;

		    md_zmul2(N, nodims, ostr, tmpX, ostr, ((const void*)src1) + off3, mstr, ((const void*)msk) + off2);
		    convH(N, flags, CONV_VALID, CONV_SYMMETRIC, tdims, tmp, nodims, tmpX, ndims2, src2);

		    #pragma omp critical
		    md_zadd2(N, tdims, str1, ((void*)src1C) + off1, str1, ((void*)src1C) + off1, str2,  tmp);

		    ops->del((void*)tmpX);
		    ops->del((void*)tmp);
	}}}
}
Пример #6
0
void overlapandsave2NE(int N, unsigned int flags, const long blk[N], const long odims[N], complex float* dst, const long dims1[N], complex float* src1, const long dims2[N], complex float* src2, const long mdims[N], complex float* msk)
{
	long dims1B[N];

	long tdims[2 * N];
	long nodims[2 * N];
	long ndims1[2 * N];
	long ndims2[2 * N];

	long shift[2 * N];

	unsigned int nflags = 0;

	for (int i = 0; i < N; i++) {

		if (MD_IS_SET(flags, i)) {

			nflags = MD_SET(nflags, 2 * i);

			assert(1 == dims2[i] % 2);
			assert(0 == blk[i] % 2);
			assert(0 == dims1[i] % 2);
			assert(0 == odims[i] % blk[i]);
			assert(0 == dims1[i] % blk[i]);
			assert(dims1[i] == odims[i]);
			assert(dims2[i] <= blk[i]);
			assert(dims1[i] >= dims2[i]);

			// blocked output

			nodims[i * 2 + 1] = odims[i] / blk[i];
			nodims[i * 2 + 0] = blk[i];

			// expanded temporary storage

			tdims[i * 2 + 1] = dims1[i] / blk[i];
			tdims[i * 2 + 0] = blk[i] + dims2[i] - 1;

			// blocked input

			// ---|---,---,---|---
			//   + +++ +
			//       + +++ +

			// resized input

			dims1B[i] = dims1[i] + 2 * blk[i];

			ndims1[i * 2 + 1] = dims1[i] / blk[i] + 2; // do we need two full blocks?
			ndims1[i * 2 + 0] = blk[i];

			shift[i * 2 + 1] = 0;
			shift[i * 2 + 0] = blk[i] - (dims2[i] - 1) / 2;

			// kernel

			ndims2[i * 2 + 1] = 1;
			ndims2[i * 2 + 0] = dims2[i];

		} else {

			nodims[i * 2 + 1] = 1;
			nodims[i * 2 + 0] = odims[i];

			tdims[i * 2 + 1] = 1;
			tdims[i * 2 + 0] = dims1[i];

			ndims1[i * 2 + 1] = 1;
			ndims1[i * 2 + 0] = dims1[i];

			shift[i * 2 + 1] = 0;
			shift[i * 2 + 0] = 0;


			dims1B[i] = dims1[i];

			ndims2[i * 2 + 1] = 1;
			ndims2[i * 2 + 0] = dims2[i];
		}
	}

	complex float* src1B = md_alloc(N, dims1B, CFL_SIZE);
	complex float* tmp = md_alloc(2 * N, tdims, CFL_SIZE);
	complex float* tmpX = md_alloc(N, odims, CFL_SIZE);

	long str1[2 * N];
	long str2[2 * N];

	md_calc_strides(2 * N, str1, ndims1, sizeof(complex float));
	md_calc_strides(2 * N, str2, tdims, sizeof(complex float));

	long off = md_calc_offset(2 * N, str1, shift);

	md_resize_center(N, dims1B, src1B, dims1, src1, sizeof(complex float));

	// we can loop here

	md_copy2(2 * N, tdims, str2, tmp, str1, ((void*)src1B) + off, sizeof(complex float));

	conv(2 * N, nflags, CONV_VALID, CONV_SYMMETRIC, nodims, tmpX, tdims, tmp, ndims2, src2);

	long ostr[N];
	long mstr[N];

	md_calc_strides(N, ostr, odims, sizeof(complex float));
	md_calc_strides(N, mstr, mdims, sizeof(complex float));

	md_zmul2(N, odims, ostr, tmpX, ostr, tmpX, mstr, msk);

	convH(2 * N, nflags, CONV_VALID, CONV_SYMMETRIC, tdims, tmp, nodims, tmpX, ndims2, src2);

	md_clear(N, dims1B, src1B, sizeof(complex float));
	md_zadd2(2 * N, tdims, str1, ((void*)src1B) + off, str1, ((void*)src1B) + off, str2, tmp);

	//

	md_resize_center(N, dims1, dst, dims1B, src1B, sizeof(complex float));

	md_free(src1B);
	md_free(tmpX);
	md_free(tmp);
}