Ejemplo n.º 1
0
float svthresh_blockproc( const void* _data, const long blkdims[DIMS], complex float* dst, const complex float* src )
{
	const struct svthresh_blockproc_data* data = (const struct svthresh_blockproc_data*) _data;

	long M = 1;
	long N = md_calc_size( DIMS, blkdims );


	for ( unsigned int i = 0; i < DIMS; i++ )
	{
		if (MD_IS_SET(data->mflags, i))
		{
			M *= blkdims[i];
			N /= blkdims[i];
		}
	}

	if (data->remove_mean == 1)
		svthresh_nomeanu(M, N, data->lambda , dst, src);
	else if (data->remove_mean == 2)
		svthresh_nomeanv(M, N, data->lambda , dst, src);
	else if (data->remove_mean == 0)
		svthresh(M, N, data->lambda , dst, src);
	else
		assert(0);

	return 0;
	
}
Ejemplo n.º 2
0
/**
 * Generic functions which loops over all dimensions of a set of
 * multi-dimensional arrays and calls a given function for each position.
 * This functions tries to parallelize over the dimensions indicated
 * with flags.
 */
void md_parallel_nary(unsigned int C, unsigned int D, const long dim[D], unsigned long flags, const long* str[C], void* ptr[C], void* data, md_nary_fun_t fun)
{
	if (0 == flags) {

		md_nary(C, D, dim, str, ptr, data, fun);
		return;
	}

	int b = ffsl(flags & -flags) - 1;
	assert(MD_IS_SET(flags, b));

	flags = MD_CLEAR(flags, b);

	long dimc[D];
	md_select_dims(D, ~MD_BIT(b), dimc, dim);

	debug_printf(DP_DEBUG4, "Parallelize: %d\n", dim[b]);

	// FIXME: this probably doesn't nest
	// (maybe collect all parallelizable dims into one giant loop?)
	#pragma omp parallel for
	for (long i = 0; i < dim[b]; i++) {

		void* moving_ptr[C];

		for (unsigned int j = 0; j < C; j++)
			moving_ptr[j] = ptr[j] + i * str[j][b];

		md_parallel_nary(C, D, dimc, flags, str, moving_ptr, data, fun);
	}
}
Ejemplo n.º 3
0
/**
 * compute set of dimensions to parallelize
 *
 */
unsigned int dims_parallel(unsigned int D, unsigned int io, unsigned int N, const long dims[N], long (*strs[D])[N], size_t size[D])
{
	unsigned int flags = parallelizable(D, io, N, dims, strs, size);

	unsigned int i = D;
	unsigned int count = 1;

	long reps = md_calc_size(N, dims);

	unsigned int oflags = 0;

	while ((count < CORES) && (i-- > 0)) {

		if (MD_IS_SET(flags, i)) {

			reps /= dims[i];

			if (reps < CHUNK)
				break;

			oflags = MD_SET(oflags, i);

			//break; // only 1
		}
	}

	return oflags;
}
Ejemplo n.º 4
0
/*
 * Implements finite difference operator (order 1 for now)
 * using circular shift: diff(x) = x - circshift(x)
 * @param snip Keeps first entry if snip = false; clear first entry if snip = true
 *
 * optr = [iptr(1); diff(iptr)]
 */
static void md_zfinitediff_core2(unsigned int D, const long dims[D], unsigned int flags, bool snip, complex float* tmp, const long ostrs[D], complex float* optr, const long istrs[D], const complex float* iptr)
{
	md_copy2(D, dims, istrs, tmp, istrs, iptr, sizeof(complex float));

	long zdims[D];
	long center[D];

	md_select_dims(D, ~0, zdims, dims);
	memset(center, 0, D * sizeof(long));

	for (unsigned int i=0; i < D; i++) {
		if (MD_IS_SET(flags, i)) {
			center[i] = 1; // order

			md_circ_shift2(D, dims, center, ostrs, optr, istrs, tmp, sizeof(complex float));

			zdims[i] = 1;

			if (!snip) // zero out first dimension before subtracting
				md_clear2(D, zdims, ostrs, optr, sizeof(complex float));

			md_zsub2(D, dims, ostrs, optr, istrs, tmp, ostrs, optr);
			md_copy2(D, dims, ostrs, tmp, ostrs, optr, sizeof(complex float));

			if (snip) // zero out first dimension after subtracting
				md_clear2(D, zdims, ostrs, optr, sizeof(complex float));

			center[i] = 0;
			zdims[i] = dims[i];
		}
	}
}
Ejemplo n.º 5
0
int main_reshape(int argc, char* argv[])
{
	cmdline(&argc, argv, 3, 100, usage_str, help_str, 0, NULL);

	num_init();

	unsigned int flags = atoi(argv[1]);
	unsigned int n = bitcount(flags);

	assert((int)n + 3 == argc - 1);

	long in_dims[DIMS];
	long in_strs[DIMS];

	long out_dims[DIMS];
	long out_strs[DIMS];

	complex float* in_data = load_cfl(argv[n + 2], DIMS, in_dims);

	md_calc_strides(DIMS, in_strs, in_dims, CFL_SIZE);

	md_copy_dims(DIMS, out_dims, in_dims);
	
	unsigned int j = 0;

	for (unsigned int i = 0; i < DIMS; i++)
		if (MD_IS_SET(flags, i))
			out_dims[i] = atoi(argv[j++ + 2]);

	assert(j == n);
	assert(md_calc_size(DIMS, in_dims) == md_calc_size(DIMS, out_dims));

	md_calc_strides(DIMS, out_strs, out_dims, CFL_SIZE);
	
	for (unsigned int i = 0; i < DIMS; i++)
		if (!(MD_IS_SET(flags, i) || (in_strs[i] == out_strs[i]))) 
			error("Dimensions are not consistent at index %d.\n");


	complex float* out_data = create_cfl(argv[n + 3], DIMS, out_dims);

	md_copy(DIMS, in_dims, out_data, in_data, CFL_SIZE);

	unmap_cfl(DIMS, in_dims, in_data);
	unmap_cfl(DIMS, out_dims, out_data);
	exit(0);
}
Ejemplo n.º 6
0
Archivo: lrthresh.c Proyecto: hcmh/bart
/**
 * Generates multiscale low rank block sizes
 *
 * @param blkdims - block sizes to be written
 * @param flags  - specifies which dimensions to do the blocks. The other dimensions will be the same as input
 * @param idims - input dimensions
 * @param blkskip - scale each level by blkskip to generate the next level
 *
 * returns number of levels
 */
long multilr_blkdims(long blkdims[MAX_LEV][DIMS], unsigned long flags, const long idims[DIMS], int blkskip, long initblk)
{
	// Multiscale low rank block sizes
	long tmp_block[DIMS];

	for (unsigned int i = 0; i < DIMS; i++) {

		if (MD_IS_SET(flags, i))
			tmp_block[i] = MIN(initblk, idims[i]);
		else
			tmp_block[i] = idims[i];
	}

	bool done;
	// Loop block_sizes
	long levels = 0;

	do {
		levels++;
		debug_printf(DP_INFO, "[\t");

		for (unsigned int i = 0; i < DIMS; i++) {

			blkdims[levels - 1][i] = tmp_block[i];
			debug_printf(DP_INFO, "%ld\t", blkdims[levels-1][i]);
		}

		debug_printf(DP_INFO, "]\n");


		done = true;

		for (unsigned int i = 0; i < DIMS; i++) {

			if (MD_IS_SET(flags, i) && (idims[i] != 1)) {

				tmp_block[i] = MIN(tmp_block[i] * blkskip, idims[i]);
				done = done && (blkdims[levels - 1][i] == idims[i]);
			}
		}
		
	} while(!done);

	return levels;
}
Ejemplo n.º 7
0
Archivo: wavelet.c Proyecto: hcmh/bart
static bool wavelet_check_dims(unsigned int N, unsigned int flags, const long dims[N], const long minsize[N])
{
	for (unsigned int i = 0; i < N; i++)
		if (MD_IS_SET(flags, i))
			if ((minsize[i] <= 2) || (dims[i] < minsize[i]))
				return false;

	return true;
}
Ejemplo n.º 8
0
void fftshift2(unsigned int N, const long dims[N], unsigned long flags, const long ostrs[N], complex float* dst, const long istrs[N], const complex float* src)
{
	long pos[N];
	md_set_dims(N, pos, 0);
	for (unsigned int i = 0; i < N; i++)
		if (MD_IS_SET(flags, i))
			pos[i] = dims[i] / 2;

	md_circ_shift2(N, dims, pos, ostrs, dst, istrs, src, CFL_SIZE);
}
Ejemplo n.º 9
0
static void prox_dfwavelet_thresh(const operator_data_t* _data, float thresh, complex float* out, const complex float* in)
{
	struct prox_dfwavelet_data* data = CONTAINER_OF(_data, struct prox_dfwavelet_data, base);

        bool done = false;
        long pos[DIMS];
        md_set_dims(DIMS, pos, 0);
        
        while (!done) {

                // copy vx, vy, vz
                md_slice(DIMS, data->slice_flag, pos, data->im_dims, data->vx, in, CFL_SIZE);
                pos[data->flow_dim]++;
                md_slice(DIMS, data->slice_flag, pos, data->im_dims, data->vy, in, CFL_SIZE);
                pos[data->flow_dim]++;
                md_slice(DIMS, data->slice_flag, pos, data->im_dims, data->vz, in, CFL_SIZE);
                pos[data->flow_dim]=0;

                // threshold
                dfwavelet_thresh(data->plan, thresh * data->lambda, thresh* data->lambda, data->vx, data->vy, data->vz, data->vx, data->vy, data->vz);

                // copy vx, vy, vz
                md_copy_block(DIMS, pos, data->im_dims, out, data->tim_dims, data->vx, CFL_SIZE);
                pos[data->flow_dim]++;
                md_copy_block(DIMS, pos, data->im_dims, out, data->tim_dims, data->vy, CFL_SIZE);
                pos[data->flow_dim]++;
                md_copy_block(DIMS, pos, data->im_dims, out, data->tim_dims, data->vz, CFL_SIZE);
                pos[data->flow_dim]=0;

                // increment pos
                long carryon = 1;

                for (unsigned int i = 0; i < DIMS; i++) {

                        if (MD_IS_SET(data->slice_flag & ~MD_BIT(data->flow_dim), i)) {

                                pos[i] += carryon;

                                if (pos[i] < data->im_dims[i]) {

                                        carryon = 0;
                                        break;

                                } else {

                                        carryon = 1;
                                        pos[i] = 0;
                                }
                        }
                }

                done = carryon;
        }
}
Ejemplo n.º 10
0
static void wavelet_dims_r(unsigned int N, unsigned int n, unsigned int flags, long odims[2 * N], const long dims[N], const long flen)
{
	if (MD_IS_SET(flags, n)) {

		odims[0 + n] = bandsize(dims[n], flen);
		odims[N + n] = 2;
	} 

	if (n > 0)
		wavelet_dims_r(N, n - 1, flags, odims, dims, flen);
}
Ejemplo n.º 11
0
void fwtN(unsigned int N, unsigned int flags, const long shifts[N], const long dims[N], const long ostr[2 * N], complex float* out, const long istr[N], const complex float* in, const long flen, const float filter[2][2][flen])
{
	long odims[2 * N];
	wavelet_dims(N, flags, odims, dims, flen);

	assert(md_calc_size(2 * N, odims) >= md_calc_size(N, dims));

	// FIXME one of these is unnecessary if we use the output

	complex float* tmpA = md_alloc_sameplace(2 * N, odims, CFL_SIZE, out);
	complex float* tmpB = md_alloc_sameplace(2 * N, odims, CFL_SIZE, out);

	long tidims[2 * N];
	md_copy_dims(N, tidims, dims);
	md_singleton_dims(N, tidims + N);
	
	long tistrs[2 * N];
	md_calc_strides(2 * N, tistrs, tidims, CFL_SIZE);

	long todims[2 * N];
	md_copy_dims(2 * N, todims, tidims);

	long tostrs[2 * N];

	// maybe we should push the randshift into lower levels

	//md_copy2(N, dims, tistrs, tmpA, istr, in, CFL_SIZE);
	md_circ_shift2(N, dims, shifts, tistrs, tmpA, istr, in, CFL_SIZE);

	for (unsigned int i = 0; i < N; i++) {

		if (MD_IS_SET(flags, i)) {

			todims[0 + i] = odims[0 + i];
			todims[N + i] = odims[N + i];

			md_calc_strides(2 * N, tostrs, todims, CFL_SIZE);
		
			fwt1(2 * N, i, tidims, tostrs, tmpB, (void*)tmpB + tostrs[N + i], tistrs, tmpA, flen, filter);

			md_copy_dims(2 * N, tidims, todims);
			md_copy_dims(2 * N, tistrs, tostrs);

			complex float* swap = tmpA;
			tmpA = tmpB;
			tmpB = swap;
		}
	}

	md_copy2(2 * N, todims, ostr, out, tostrs, tmpA, CFL_SIZE);

	md_free(tmpA);
	md_free(tmpB);
}
Ejemplo n.º 12
0
Archivo: lrthresh.c Proyecto: hcmh/bart
/**
 * Generates locally low rank block sizes
 *
 * @param blkdims - block sizes to be written
 * @param flags  - specifies which dimensions to do the blocks. The other dimensions will be the same as input
 * @param idims - input dimensions
 * @param llkblk - the block size
 *
 * returns number of levels = 1
 */
long llr_blkdims(long blkdims[MAX_LEV][DIMS], unsigned long flags, const long idims[DIMS], long llrblk)
{
	for (unsigned int i = 0; i < DIMS; i++) {

		if (MD_IS_SET(flags, i))
			blkdims[0][i] = MIN(llrblk, idims[i]);
		else
			blkdims[0][i] = idims[i];
	}

	return 1;
}
Ejemplo n.º 13
0
void iwtN(unsigned int N, unsigned int flags, const long shifts[N], const long dims[N], const long ostr[N], complex float* out, const long istr[2 * N], const complex float* in, const long flen, const float filter[2][2][flen])
{
	long idims[2 * N];
	wavelet_dims(N, flags, idims, dims, flen);

	assert(md_calc_size(2 * N, idims) >= md_calc_size(N, dims));

	complex float* tmpA = md_alloc_sameplace(2 * N, idims, CFL_SIZE, out);
	complex float* tmpB = md_alloc_sameplace(2 * N, idims, CFL_SIZE, out);

	long tidims[2 * N];
	md_copy_dims(2 * N, tidims, idims);
	
	long tistrs[2 * N];
	md_calc_strides(2 * N, tistrs, tidims, CFL_SIZE);

	long todims[2 * N];
	md_copy_dims(2 * N, todims, tidims);

	long tostrs[2 * N];

	long ishifts[N];
	for (unsigned int i = 0; i < N; i++)
		ishifts[i] = -shifts[i];

	md_copy2(2 * N, tidims, tistrs, tmpA, istr, in, CFL_SIZE);

	for (int i = N - 1; i >= 0; i--) {	// run backwards to maintain contigous blocks

		if (MD_IS_SET(flags, i)) {

			todims[0 + i] = dims[0 + i];
			todims[N + i] = 1;

			md_calc_strides(2 * N, tostrs, todims, CFL_SIZE);
		
			iwt1(2 * N, i, todims, tostrs, tmpB, tistrs, tmpA, (void*)tmpA + tistrs[N + i], flen, filter);

			md_copy_dims(2 * N, tidims, todims);
			md_copy_dims(2 * N, tistrs, tostrs);

			complex float* swap = tmpA;
			tmpA = tmpB;
			tmpB = swap;
		}
	}

	//md_copy2(N, dims, ostr, out, tostrs, tmpA, CFL_SIZE);
	md_circ_shift2(N, dims, ishifts, ostr, out, tostrs, tmpA, CFL_SIZE);

	md_free(tmpA);
	md_free(tmpB);
}
Ejemplo n.º 14
0
Archivo: wavelet.c Proyecto: hcmh/bart
static void embed(unsigned int N, unsigned int flags, long ostr[N], const long dims[N], const long str[N])
{
	unsigned int b = ffs(flags) - 1;

	long dims1[N];
	md_select_dims(N, flags, dims1, dims);

	md_calc_strides(N, ostr, dims1, str[b]);

	for (unsigned int i = 0; i < N; i++)
		if (!MD_IS_SET(flags, i))
			ostr[i] = str[i];
}
Ejemplo n.º 15
0
/**
 * compute set of parallelizable dimensions
 *
 */
static unsigned int parallelizable(unsigned int D, unsigned int io, unsigned int N, const long dims[N], long (*strs[D])[N], size_t size[D])
{
	// we assume no input / output overlap
	// (i.e. inputs which are also outputs have to be marked as output)

	// a dimension is parallelizable if all output operations
	// for that dimension are independent

	// for all output operations:
	// check - all other dimensions have strides greater or equal
	// the extend of this dimension or have an extend smaller or
	// equal the stride of this dimension

	// no overlap: [222]
	//                   [111111111111]
	//                                [333333333]
	//    overlap: [222]
	//		     [1111111111111111]
	//                                [333333333]

	unsigned int flags = (1 << N) - 1;

	for (unsigned int d = 0; d < D; d++) {

		if (MD_IS_SET(io, d)) {

			bool m[N][N];
			compute_enclosures(N, m, dims, *strs[d]);

	//		print_dims(N, dims);
	//		print_dims(N, *strs[d]);

			for (unsigned int i = 0; i < N; i++) {

				unsigned int a = 0;

				for (unsigned int j = 0; j < N; j++)
					if (m[i][j] || m[j][i])
						a++;

	//			printf("%d %d %d\n", d, i, a);

				if ((a != N - 1) || ((size_t)labs((*strs[d])[i]) < size[d]))
					flags = MD_CLEAR(flags, i);
			}
		}
	}

	return flags;
}
Ejemplo n.º 16
0
void create_wavelet_sizes(struct wavelet_plan_s* plan)
{
	int numdims_tr = plan->numdims_tr;
	int filterLen = plan->filterLen;
	int numLevels_tr = plan->numLevels_tr;
	int numSubCoef;
	plan->waveSizes_tr = (long*)xmalloc(sizeof(long) * numdims_tr * (numLevels_tr + 2));

	// Get number of subband per level, (3 for 2d, 7 for 3d)
	// Set the last bandSize to be imSize
	int d,l;
	int numSubband = 1;
	for (d = 0; d<numdims_tr; d++)
	{
		plan->waveSizes_tr[d + numdims_tr*(numLevels_tr+1)] = plan->imSize_tr[d];
		numSubband <<= 1;
	}
	numSubband--;

	// Get numCoeff and waveSizes
	// Each bandSize[l] is (bandSize[l+1] + filterLen - 1)/2
	plan->numCoeff_tr = 0;
	for (l = plan->numLevels_tr; l >= 1; --l) {
		numSubCoef = 1;
		for (d = 0; d < numdims_tr; d++)
		{
			plan->waveSizes_tr[d + numdims_tr*l] = (plan->waveSizes_tr[d + numdims_tr*(l+1)] + filterLen - 1) / 2;
			numSubCoef *= plan->waveSizes_tr[d + numdims_tr*l];
		}
		plan->numCoeff_tr += numSubband*numSubCoef;
		if (l==1)
			plan->numCoarse_tr = numSubCoef;
	}

	numSubCoef = 1;
	for (d = 0; d < numdims_tr; d++)
	{
		plan->waveSizes_tr[d] = plan->waveSizes_tr[numdims_tr+d];
		numSubCoef *= plan->waveSizes_tr[d];
	}
	plan->numCoeff_tr += numSubCoef;

	// Get Actual numCoeff
	plan->numCoeff = plan->numCoeff_tr;
	for (d = 0; d<plan->numdims; d++)
	{
		if (!MD_IS_SET(plan->flags, d))
			plan->numCoeff *= plan->imSize[d];
	}
}
Ejemplo n.º 17
0
static complex float* compute_linphases(unsigned int N, long lph_dims[N + 3], const long img_dims[N + 3])
{
	float shifts[8][3];

	int s = 0;
	for(int i = 0; i < 8; i++) {

		bool skip = false;

		for(int j = 0; j < 3; j++) {

			shifts[s][j] = 0.;

			if (MD_IS_SET(i, j)) {

				skip = skip || (1 == img_dims[j]);
				shifts[s][j] = -0.5;
			}
		}

		if (!skip)
			s++;
	}

	unsigned int ND = N + 3;
	md_select_dims(ND, FFT_FLAGS, lph_dims, img_dims);
	lph_dims[N + 0] = s;

	complex float* linphase = md_alloc(ND, lph_dims, CFL_SIZE);

	for(int i = 0; i < s; i++) {

		float shifts2[ND];
		for (unsigned int j = 0; j < ND; j++)
			shifts2[j] = 0.;

		shifts2[0] = shifts[i][0];
		shifts2[1] = shifts[i][1];
		shifts2[2] = shifts[i][2];

		linear_phase(ND, img_dims, shifts2, 
				linphase + i * md_calc_size(ND, img_dims));
	}

	return linphase;
}
Ejemplo n.º 18
0
static void wavelet3_thresh_apply(const operator_data_t* _data, float mu, complex float* out, const complex float* in)
{
	const struct wavelet3_thresh_s* data = CAST_DOWN(wavelet3_thresh_s, _data);

	long shift[data->N];
	for (unsigned int i = 0; i < data->N; i++)
		shift[i] = 0;

	if (data->randshift) {

		int levels = wavelet_num_levels(data->N, data->flags, data->dims, data->minsize, 4);

		for (unsigned int i = 0; i < data->N; i++)
			if (MD_IS_SET(data->flags, i))
				shift[i] = rand_lim((unsigned int*)&data->rand_state, 1 << levels);
	}

	wavelet3_thresh(data->N, data->lambda * mu, data->flags, shift, data->dims,
		out, in, data->minsize, 4, wavelet3_dau2);
}
Ejemplo n.º 19
0
Archivo: someops.c Proyecto: hcmh/bart
static struct operator_matrix_s* linop_matrix_priv(unsigned int N, const long out_dims[N], const long in_dims[N], const long matrix_dims[N], const complex float* matrix)
{
	unsigned long out_flags = md_nontriv_dims(N, out_dims);
	unsigned long in_flags = md_nontriv_dims(N, in_dims);

	unsigned long del_flags = in_flags & ~out_flags;

	/* we double dimensions for chaining which can lead to
	 * matrices with the same input and output dimension
	 */

	long out_dims2[2 * N];
	long mat_dims2[2 * N];
	long in_dims2[2 * N];

	shadow_dims(N, out_dims2, out_dims);
	shadow_dims(N, mat_dims2, matrix_dims);
	shadow_dims(N, in_dims2, in_dims);

	/* move removed input dims into shadow position
	 * which makes chaining easier below
	 */
	for (unsigned int i = 0; i < N; i++) {

		if (MD_IS_SET(del_flags, i)) {

			assert(1 == out_dims2[2 * i + 0]);
			assert(mat_dims2[2 * i + 0] == in_dims2[2 * i + 0]);

			mat_dims2[2 * i + 1] = mat_dims2[2 * i + 0];
			mat_dims2[2 * i + 0] = 1;

			in_dims2[2 * i + 1] = in_dims[i];
			in_dims2[2 * i + 0] = 1;
		}
	}

	return linop_matrix_priv2(2 * N, out_dims2, in_dims2, mat_dims2, matrix);
}
Ejemplo n.º 20
0
/*
 * Implements cumulative sum operator (order 1 for now)
 * using circular shift: cumsum(x) = x + circshift(x,1) + circshift(x,2) + ...
 *
 * optr = cumsum(iptr)
 */
static void md_zcumsum_core2(unsigned int D, const long dims[D], unsigned int flags, complex float* tmp, complex float* tmp2, const long ostrs[D], complex float* optr, const long istrs[D], const complex float* iptr)
{
	//out = dx
	md_copy2(D, dims, ostrs, optr, istrs, iptr, sizeof(complex float));
	md_copy2(D, dims, istrs, tmp, istrs, iptr, sizeof(complex float));

	long zdims[D];
	long center[D];

	md_select_dims(D, ~0, zdims, dims);
	memset(center, 0, D * sizeof(long));

	for (unsigned int i=0; i < D; i++) {
		if (MD_IS_SET(flags, i)) {
			for (int d=1; d < dims[i]; d++) {

				// tmp = circshift(tmp, i)
				center[i] = d;
				md_circ_shift2(D, dims, center, istrs, tmp2, istrs, tmp, sizeof(complex float));
				zdims[i] = d;

				// tmp(1:d,:) = 0
				md_clear2(D, zdims, istrs, tmp2, sizeof(complex float));
				//md_zsmul2(D, zdims, istrs, tmp2, istrs, tmp2, 0.);
				//dump_cfl("tmp2", D, dims, tmp2);

				// out = out + tmp
				md_zadd2(D, dims, ostrs, optr, istrs, tmp2, ostrs, optr);
				//md_copy2(D, dims, ostrs, tmp, ostrs, optr, sizeof(complex float));

			}
			md_copy2(D, dims, ostrs, tmp, ostrs, optr, sizeof(complex float));

			center[i] = 0;
			zdims[i] = dims[i];
		}
	}
}
Ejemplo n.º 21
0
float nucnorm_blockproc( const void* _data, const long blkdims[DIMS], complex float* dst, const complex float* src )
{
	UNUSED(dst);

	const struct svthresh_blockproc_data* data = (const struct svthresh_blockproc_data*) _data;

	long M = 1;
	long N = md_calc_size( DIMS, blkdims );


	for ( unsigned int i = 0; i < DIMS; i++ )
	{
		if (MD_IS_SET(data->mflags, i))
		{
			M *= blkdims[i];
			N /= blkdims[i];
		}
	}

	float G = sqrtf(M) + sqrtf(N);

	return G * nuclearnorm(M, N, src);
}
Ejemplo n.º 22
0
Archivo: fft.c Proyecto: hcmh/bart
static fftwf_plan fft_fftwf_plan(unsigned int D, const long dimensions[D], unsigned long flags, const long ostrides[D], complex float* dst, const long istrides[D], const complex float* src, bool backwards, bool measure)
{
	unsigned int N = D;
	fftwf_iodim64 dims[N];
	fftwf_iodim64 hmdims[N];
	unsigned int k = 0;
	unsigned int l = 0;

	//FFTW seems to be fine with this
	//assert(0 != flags); 

	for (unsigned int i = 0; i < N; i++) {

		if (MD_IS_SET(flags, i)) {

			dims[k].n = dimensions[i];
			dims[k].is = istrides[i] / CFL_SIZE;
			dims[k].os = ostrides[i] / CFL_SIZE;
			k++;

		} else  {

			hmdims[l].n = dimensions[i];
			hmdims[l].is = istrides[i] / CFL_SIZE;
			hmdims[l].os = ostrides[i] / CFL_SIZE;
			l++;
		}
	}

	fftwf_plan fftwf;

	#pragma omp critical
	fftwf = fftwf_plan_guru64_dft(k, dims, l, hmdims, (complex float*)src, dst,
				backwards ? 1 : (-1), measure ? FFTW_MEASURE : FFTW_ESTIMATE);

	return fftwf;
}
Ejemplo n.º 23
0
Archivo: fft.c Proyecto: hcmh/bart
static void fftmod2_r(unsigned int N, const long dims[N], unsigned long flags, const long ostrs[N], complex float* dst, const long istrs[N], const complex float* src, bool inv, double phase)
{
	if (0 == flags) {

		md_zsmul2(N, dims, ostrs, dst, istrs, src, cexp(M_PI * 2.i * (inv ? -phase : phase)));
		return;
	}


	/* this will also currently be slow on the GPU because we do not
	 * support strides there on the lowest level */

	unsigned int i = N - 1;
	while (!MD_IS_SET(flags, i))
		i--;

#if 1
	// If there is only one dimensions left and it is the innermost
	// which is contiguous optimize using md_zfftmod2

	if ((0u == MD_CLEAR(flags, i)) && (1 == md_calc_size(i, dims))
		&& (CFL_SIZE == ostrs[i]) && (CFL_SIZE == istrs[i])) {

		md_zfftmod2(N - i, dims + i, ostrs + i, dst, istrs + i, src, inv, phase);
		return;
	}
#endif

	long tdims[N];
	md_select_dims(N, ~MD_BIT(i), tdims, dims);

	#pragma omp parallel for
	for (int j = 0; j < dims[i]; j++)
		fftmod2_r(N, tdims, MD_CLEAR(flags, i),
			ostrs, (void*)dst + j * ostrs[i], istrs, (void*)src + j * istrs[i],
			inv, phase + fftmod_phase(dims[i], j));
}
Ejemplo n.º 24
0
void fd_proj_noninc(const struct linop_s* o, complex float* optr, const complex float* iptr)
{
	struct fdiff_s* data = (struct fdiff_s*)linop_get_data(o);	// FIXME: CAST?
	
	dump_cfl("impre", data->D, data->dims, iptr);

	complex float* tmp2 = md_alloc_sameplace(data->D, data->dims, CFL_SIZE, optr);
	linop_forward_unchecked(o, tmp2, iptr);

	long tmpdim = data->dims[0];
	long dims2[data->D];
	md_select_dims(data->D, ~0u, dims2, data->dims);
	dims2[0] *= 2; 
	dump_cfl("dxpre", data->D, data->dims, tmp2);

	md_smin(data->D, dims2, (float*)optr, (float*)tmp2, 0.);

	// add back initial value
	dims2[0] = tmpdim;

	for (unsigned int i = 0; i < data->D; i++) {

		if (MD_IS_SET(data->flags, i)) {

			dims2[i] = 1;
			md_copy2(data->D, dims2, data->str, optr, data->str, tmp2, CFL_SIZE);
			break;
		}
	}

	dump_cfl("dxpost", data->D, data->dims, optr);
	linop_norm_inv_unchecked(o, 0., optr, optr);
	
	dump_cfl("impost", data->D, data->dims, optr);

	md_free(tmp2);
}
Ejemplo n.º 25
0
void opt_reg_configure(unsigned int N, const long img_dims[N], struct opt_reg_s* ropts, const struct operator_p_s* prox_ops[NUM_REGS], const struct linop_s* trafos[NUM_REGS], unsigned int llr_blk, bool randshift, bool use_gpu)
{
	float lambda = ropts->lambda;

	if (-1. == lambda)
		lambda = 0.;

	// if no penalities specified but regularization
	// parameter is given, add a l2 penalty

	struct reg_s* regs = ropts->regs;

	if ((0 == ropts->r) && (lambda > 0.)) {

		regs[0].xform = L2IMG;
		regs[0].xflags = 0u;
		regs[0].jflags = 0u;
		regs[0].lambda = lambda;
		ropts->r = 1;
	}



	int nr_penalties = ropts->r;
	long blkdims[MAX_LEV][DIMS];
	int levels;


	for (int nr = 0; nr < nr_penalties; nr++) {

		// fix up regularization parameter
		if (-1. == regs[nr].lambda)
			regs[nr].lambda = lambda;

		switch (regs[nr].xform) {

			case L1WAV:
				debug_printf(DP_INFO, "l1-wavelet regularization: %f\n", regs[nr].lambda);

				if (0 != regs[nr].jflags)
					debug_printf(DP_WARN, "joint l1-wavelet thresholding not currently supported.\n");

				long minsize[DIMS] = { [0 ... DIMS - 1] = 1 };
				minsize[0] = MIN(img_dims[0], 16);
				minsize[1] = MIN(img_dims[1], 16);
				minsize[2] = MIN(img_dims[2], 16);


				unsigned int wflags = 0;
				for (unsigned int i = 0; i < DIMS; i++) {

					if ((1 < img_dims[i]) && MD_IS_SET(regs[nr].xflags, i)) {

						wflags = MD_SET(wflags, i);
						minsize[i] = MIN(img_dims[i], 16);
					}
				}

				trafos[nr] = linop_identity_create(DIMS, img_dims);
				prox_ops[nr] = prox_wavelet3_thresh_create(DIMS, img_dims, wflags, minsize, regs[nr].lambda, randshift);
				break;

			case TV:
				debug_printf(DP_INFO, "TV regularization: %f\n", regs[nr].lambda);

				trafos[nr] = linop_grad_create(DIMS, img_dims, regs[nr].xflags);
				prox_ops[nr] = prox_thresh_create(DIMS + 1,
						linop_codomain(trafos[nr])->dims,
						regs[nr].lambda, regs[nr].jflags | MD_BIT(DIMS), use_gpu);
				break;

			case LLR:
				debug_printf(DP_INFO, "lowrank regularization: %f\n", regs[nr].lambda);

				// add locally lowrank penalty
				levels = llr_blkdims(blkdims, regs[nr].jflags, img_dims, llr_blk);

				assert(1 == levels);
				assert(levels == img_dims[LEVEL_DIM]);

				for(int l = 0; l < levels; l++)
#if 0
					blkdims[l][MAPS_DIM] = img_dims[MAPS_DIM];
#else
				blkdims[l][MAPS_DIM] = 1;
#endif

				int remove_mean = 0;

				trafos[nr] = linop_identity_create(DIMS, img_dims);
				prox_ops[nr] = lrthresh_create(img_dims, randshift, regs[nr].xflags, (const long (*)[DIMS])blkdims, regs[nr].lambda, false, remove_mean, use_gpu);
				break;

			case MLR:
#if 0
				// FIXME: multiscale low rank changes the output image dimensions 
				// and requires the forward linear operator. This should be decoupled...
				debug_printf(DP_INFO, "multi-scale lowrank regularization: %f\n", regs[nr].lambda);

				levels = multilr_blkdims(blkdims, regs[nr].jflags, img_dims, 8, 1);

				img_dims[LEVEL_DIM] = levels;
				max_dims[LEVEL_DIM] = levels;

				for(int l = 0; l < levels; l++)
					blkdims[l][MAPS_DIM] = 1;

				trafos[nr] = linop_identity_create(DIMS, img_dims);
				prox_ops[nr] = lrthresh_create(img_dims, randshift, regs[nr].xflags, (const long (*)[DIMS])blkdims, regs[nr].lambda, false, 0, use_gpu);

				const struct linop_s* decom_op = sum_create( img_dims, use_gpu );
				const struct linop_s* tmp_op = forward_op;
				forward_op = linop_chain(decom_op, forward_op);

				linop_free(decom_op);
				linop_free(tmp_op);
#else
				debug_printf(DP_WARN, "multi-scale lowrank regularization not yet supported: %f\n", regs[nr].lambda);
#endif

				break;

			case IMAGL1:
				debug_printf(DP_INFO, "l1 regularization of imaginary part: %f\n", regs[nr].lambda);

				trafos[nr] = linop_rdiag_create(DIMS, img_dims, 0, &(complex float){ 1.i });
				prox_ops[nr] = prox_thresh_create(DIMS, img_dims, regs[nr].lambda, regs[nr].jflags, use_gpu);
				break;

			case IMAGL2:
				debug_printf(DP_INFO, "l2 regularization of imaginary part: %f\n", regs[nr].lambda);

				trafos[nr] = linop_rdiag_create(DIMS, img_dims, 0, &(complex float){ 1.i });
				prox_ops[nr] = prox_leastsquares_create(DIMS, img_dims, regs[nr].lambda, NULL);
				break;

			case L1IMG:
				debug_printf(DP_INFO, "l1 regularization: %f\n", regs[nr].lambda);

				trafos[nr] = linop_identity_create(DIMS, img_dims);
				prox_ops[nr] = prox_thresh_create(DIMS, img_dims, regs[nr].lambda, regs[nr].jflags, use_gpu);
				break;

			case L2IMG:
				debug_printf(DP_INFO, "l2 regularization: %f\n", regs[nr].lambda);

				trafos[nr] = linop_identity_create(DIMS, img_dims);
				prox_ops[nr] = prox_leastsquares_create(DIMS, img_dims, regs[nr].lambda, NULL);
				break;

			case FTL1:
				debug_printf(DP_INFO, "l1 regularization of Fourier transform: %f\n", regs[nr].lambda);

				trafos[nr] = linop_fft_create(DIMS, img_dims, regs[nr].xflags);
				prox_ops[nr] = prox_thresh_create(DIMS, img_dims, regs[nr].lambda, regs[nr].jflags, use_gpu);
				break;
		}
Ejemplo n.º 26
0
Archivo: lrthresh.c Proyecto: hcmh/bart
/*
 * Low rank threhsolding for arbitrary block sizes
 */
static void lrthresh_apply(const operator_data_t* _data, float mu, complex float* dst, const complex float* src)
{
	struct lrthresh_data_s* data = CAST_DOWN(lrthresh_data_s, _data);

	float lambda = mu * data->lambda;

	long strs1[DIMS];
	md_calc_strides(DIMS, strs1, data->dims_decom, 1);

//#pragma omp parallel for
	for (int l = 0; l < data->levels; l++) {

		complex float* dstl = dst + l * strs1[LEVEL_DIM];
		const complex float* srcl = src + l * strs1[LEVEL_DIM];

		long blkdims[DIMS];
		long shifts[DIMS];
		long unshifts[DIMS];
		long zpad_dims[DIMS];
		long M = 1;

		for (unsigned int i = 0; i < DIMS; i++) {

			blkdims[i] = data->blkdims[l][i];
			zpad_dims[i] = (data->dims[i] + blkdims[i] - 1) / blkdims[i];
			zpad_dims[i] *= blkdims[i];

			if (MD_IS_SET(data->mflags, i))
				M *= blkdims[i];

			if (data->randshift)
				shifts[i] = rand_lim(MIN(blkdims[i] - 1, zpad_dims[i] - blkdims[i]));
			else
				shifts[i] = 0;

			unshifts[i] = -shifts[i];
		}

		long zpad_strs[DIMS];
		md_calc_strides(DIMS, zpad_strs, zpad_dims, CFL_SIZE);

		long blk_size = md_calc_size(DIMS, blkdims);
		long img_size = md_calc_size(DIMS, zpad_dims);
		long N = blk_size / M;
		long B = img_size / blk_size;

		if (data->noise && (l == data->levels - 1)) {

			M = img_size;
			N = 1;
			B = 1;
		}


		complex float* tmp = md_alloc_sameplace(DIMS, zpad_dims, CFL_SIZE, dst);

		md_circ_ext(DIMS, zpad_dims, tmp, data->dims, srcl, CFL_SIZE);

		md_circ_shift(DIMS, zpad_dims, shifts, tmp, tmp, CFL_SIZE);


		long mat_dims[2];
		basorati_dims(DIMS, mat_dims, blkdims, zpad_dims);

		complex float* tmp_mat = md_alloc_sameplace(2, mat_dims, CFL_SIZE, dst);

		// Reshape image into a blk_size x number of blocks matrix

		basorati_matrix(DIMS, blkdims, mat_dims, tmp_mat, zpad_dims, zpad_strs, tmp);

		batch_svthresh(M, N, mat_dims[1], lambda * GWIDTH(M, N, B), *(complex float (*)[mat_dims[1]][M][N])tmp_mat);

		//	for ( int b = 0; b < mat_dims[1]; b++ )
		//	svthresh(M, N, lambda * GWIDTH(M, N, B), tmp_mat, tmp_mat);

		basorati_matrixH(DIMS, blkdims, zpad_dims, zpad_strs, tmp, mat_dims, tmp_mat);

		md_circ_shift(DIMS, zpad_dims, unshifts, tmp, tmp, CFL_SIZE);

		md_resize(DIMS, data->dims, dstl, zpad_dims, tmp, CFL_SIZE);

		md_free(tmp);
		md_free(tmp_mat);
	}
}
Ejemplo n.º 27
0
void overlapandsave2HB(const struct vec_ops* ops, int N, unsigned int flags, const long blk[N], const long dims1[N], complex float* dst, const long odims[N], const complex float* src1, const long dims2[N], const complex float* src2, const long mdims[N], const complex float* msk)
{
	long dims1B[N];

	long tdims[2 * N];
	long nodims[2 * N];
	long ndims2[2 * N];
	long nmdims[2 * N];


	int e = N;

	for (int i = 0; i < N; i++) {

		if (MD_IS_SET(flags, i)) {

			assert(1 == dims2[i] % 2);
			assert(0 == blk[i] % 2);
			assert(0 == dims1[i] % 2);
			assert(0 == odims[i] % blk[i]);
			assert(0 == dims1[i] % blk[i]);
			assert(dims1[i] == odims[i]);
			assert(dims2[i] <= blk[i]);
			assert(dims1[i] >= dims2[i]);
			assert((1 == mdims[i]) || (mdims[i] == dims1[i]));

			// blocked output

			nodims[e] = odims[i] / blk[i];
			nodims[i] = blk[i];

			// expanded temporary storage

			tdims[e] = dims1[i] / blk[i];
			tdims[i] = blk[i] + dims2[i] - 1;

			// blocked input

			// ---|---,---,---|---
			//   + +++ +
			//       + +++ +

			if (1 == mdims[i]) {

				nmdims[2 * i + 1] = 1;
				nmdims[2 * i + 1] = 1;

			} else {

				nmdims[2 * i + 1] = mdims[i] / blk[i];
				nmdims[2 * i + 0] = blk[i];
			}

			// resized input
			// minimal padding
			dims1B[i] = dims1[i] + (dims2[i] - 1);

			// kernel

			ndims2[e] = 1;
			ndims2[i] = dims2[i];

			e++;

		} else {

			nodims[i] = odims[i];
			tdims[i] = dims1[i];
			nmdims[2 * i + 1] = 1;
			nmdims[2 * i + 0] = mdims[i];

			dims1B[i] = dims1[i];
			ndims2[i] = dims2[i];
		}
	}

	int NE = e;

	// long S = md_calc_size(N, dims1B, 1);

	long str1[NE];

	long str1B[N];
	md_calc_strides(N, str1B, dims1B, sizeof(complex float));

	e = N;
	for (int i = 0; i < N; i++) {

		str1[i] = str1B[i];

		if (MD_IS_SET(flags, i))
			str1[e++] = str1B[i] * blk[i];
	}
	assert(NE == e);



	long str2[NE];
	md_calc_strides(NE, str2, tdims, sizeof(complex float));


	long ostr[NE];
	long mstr[NE];
	long mstrB[2 * N];

	md_calc_strides(NE, ostr, nodims, sizeof(complex float));
	md_calc_strides(2 * N, mstrB, nmdims, sizeof(complex float));

	e = N;
	for (int i = 0; i < N; i++) {

		mstr[i] = mstrB[2 * i + 0];

		if (MD_IS_SET(flags, i))
			mstr[e++] = mstrB[2 * i + 1];
	}
	assert(NE == e);
	
	// we can loop here
	assert(NE == N + 3);
	assert(1 == ndims2[N + 0]);
	assert(1 == ndims2[N + 1]);
	assert(1 == ndims2[N + 2]);
	assert(tdims[N + 0] == nodims[N + 0]);
	assert(tdims[N + 1] == nodims[N + 1]);
	assert(tdims[N + 2] == nodims[N + 2]);

	long R = md_calc_size(N, nodims);
	long T = md_calc_size(N, tdims);


	//complex float* src1C = xmalloc(S * sizeof(complex float));
	complex float* src1C = dst;

	md_clear(N, dims1B, src1C, CFL_SIZE);	// must be done here

	#pragma omp parallel for collapse(3)
	for (int k = 0; k < nodims[N + 2]; k++) {
	for (int j = 0; j < nodims[N + 1]; j++) {
	for (int i = 0; i < nodims[N + 0]; i++) {

		    complex float* tmp = (complex float*)ops->allocate(2 * T);
		    complex float* tmpX = (complex float*)ops->allocate(2 * R);

		    long off1 = str1[N + 0] * i + str1[N + 1] * j + str1[N + 2] * k;
		    long off2 = mstr[N + 0] * i + mstr[N + 1] * j + mstr[N + 2] * k;
		    long off3 = ostr[N + 0] * i + ostr[N + 1] * j + ostr[N + 2] * k;

		    md_zmul2(N, nodims, ostr, tmpX, ostr, ((const void*)src1) + off3, mstr, ((const void*)msk) + off2);
		    convH(N, flags, CONV_VALID, CONV_SYMMETRIC, tdims, tmp, nodims, tmpX, ndims2, src2);

		    #pragma omp critical
		    md_zadd2(N, tdims, str1, ((void*)src1C) + off1, str1, ((void*)src1C) + off1, str2,  tmp);

		    ops->del((void*)tmpX);
		    ops->del((void*)tmp);
	}}}
}
Ejemplo n.º 28
0
void overlapandsave2NE(int N, unsigned int flags, const long blk[N], const long odims[N], complex float* dst, const long dims1[N], complex float* src1, const long dims2[N], complex float* src2, const long mdims[N], complex float* msk)
{
	long dims1B[N];

	long tdims[2 * N];
	long nodims[2 * N];
	long ndims1[2 * N];
	long ndims2[2 * N];

	long shift[2 * N];

	unsigned int nflags = 0;

	for (int i = 0; i < N; i++) {

		if (MD_IS_SET(flags, i)) {

			nflags = MD_SET(nflags, 2 * i);

			assert(1 == dims2[i] % 2);
			assert(0 == blk[i] % 2);
			assert(0 == dims1[i] % 2);
			assert(0 == odims[i] % blk[i]);
			assert(0 == dims1[i] % blk[i]);
			assert(dims1[i] == odims[i]);
			assert(dims2[i] <= blk[i]);
			assert(dims1[i] >= dims2[i]);

			// blocked output

			nodims[i * 2 + 1] = odims[i] / blk[i];
			nodims[i * 2 + 0] = blk[i];

			// expanded temporary storage

			tdims[i * 2 + 1] = dims1[i] / blk[i];
			tdims[i * 2 + 0] = blk[i] + dims2[i] - 1;

			// blocked input

			// ---|---,---,---|---
			//   + +++ +
			//       + +++ +

			// resized input

			dims1B[i] = dims1[i] + 2 * blk[i];

			ndims1[i * 2 + 1] = dims1[i] / blk[i] + 2; // do we need two full blocks?
			ndims1[i * 2 + 0] = blk[i];

			shift[i * 2 + 1] = 0;
			shift[i * 2 + 0] = blk[i] - (dims2[i] - 1) / 2;

			// kernel

			ndims2[i * 2 + 1] = 1;
			ndims2[i * 2 + 0] = dims2[i];

		} else {

			nodims[i * 2 + 1] = 1;
			nodims[i * 2 + 0] = odims[i];

			tdims[i * 2 + 1] = 1;
			tdims[i * 2 + 0] = dims1[i];

			ndims1[i * 2 + 1] = 1;
			ndims1[i * 2 + 0] = dims1[i];

			shift[i * 2 + 1] = 0;
			shift[i * 2 + 0] = 0;


			dims1B[i] = dims1[i];

			ndims2[i * 2 + 1] = 1;
			ndims2[i * 2 + 0] = dims2[i];
		}
	}

	complex float* src1B = md_alloc(N, dims1B, CFL_SIZE);
	complex float* tmp = md_alloc(2 * N, tdims, CFL_SIZE);
	complex float* tmpX = md_alloc(N, odims, CFL_SIZE);

	long str1[2 * N];
	long str2[2 * N];

	md_calc_strides(2 * N, str1, ndims1, sizeof(complex float));
	md_calc_strides(2 * N, str2, tdims, sizeof(complex float));

	long off = md_calc_offset(2 * N, str1, shift);

	md_resize_center(N, dims1B, src1B, dims1, src1, sizeof(complex float));

	// we can loop here

	md_copy2(2 * N, tdims, str2, tmp, str1, ((void*)src1B) + off, sizeof(complex float));

	conv(2 * N, nflags, CONV_VALID, CONV_SYMMETRIC, nodims, tmpX, tdims, tmp, ndims2, src2);

	long ostr[N];
	long mstr[N];

	md_calc_strides(N, ostr, odims, sizeof(complex float));
	md_calc_strides(N, mstr, mdims, sizeof(complex float));

	md_zmul2(N, odims, ostr, tmpX, ostr, tmpX, mstr, msk);

	convH(2 * N, nflags, CONV_VALID, CONV_SYMMETRIC, tdims, tmp, nodims, tmpX, ndims2, src2);

	md_clear(N, dims1B, src1B, sizeof(complex float));
	md_zadd2(2 * N, tdims, str1, ((void*)src1B) + off, str1, ((void*)src1B) + off, str2, tmp);

	//

	md_resize_center(N, dims1, dst, dims1B, src1B, sizeof(complex float));

	md_free(src1B);
	md_free(tmpX);
	md_free(tmp);
}
Ejemplo n.º 29
0
Archivo: someops.c Proyecto: hcmh/bart
/* O I M G
 * 1 1 1 1   - not used
 * 1 1 A !   - forbidden
 * 1 A 1 !   - forbidden
 * A 1 1 !   - forbidden
 * A A 1 1   - replicated
 * A 1 A 1   - output
 * 1 A A A/A - input
 * A A A A   - batch
 */
static struct operator_matrix_s* linop_matrix_priv2(unsigned int N, const long out_dims[N], const long in_dims[N], const long matrix_dims[N], const complex float* matrix)
{
	// to get assertions and cost estimate

	long max_dims[N];
	md_tenmul_dims(N, max_dims, out_dims, in_dims, matrix_dims);


	PTR_ALLOC(struct operator_matrix_s, data);
	SET_TYPEID(operator_matrix_s, data);

	data->N = N;

	PTR_ALLOC(long[N], out_dims1);
	md_copy_dims(N, *out_dims1, out_dims);
	data->out_dims = *PTR_PASS(out_dims1);

	PTR_ALLOC(long[N], mat_dims1);
	md_copy_dims(N, *mat_dims1, matrix_dims);
	data->mat_dims = *PTR_PASS(mat_dims1);

	PTR_ALLOC(long[N], in_dims1);
	md_copy_dims(N, *in_dims1, in_dims);
	data->in_dims = *PTR_PASS(in_dims1);


	complex float* mat = md_alloc(N, matrix_dims, CFL_SIZE);

	md_copy(N, matrix_dims, mat, matrix, CFL_SIZE);

	data->mat = mat;
	data->mat_gram = NULL;
#ifdef USE_CUDA
	data->mat_gpu = NULL;
	data->mat_gram_gpu = NULL;
#endif

#if 1
	// pre-multiply gram matrix (if there is a cost reduction)

	unsigned long out_flags = md_nontriv_dims(N, out_dims);
	unsigned long in_flags = md_nontriv_dims(N, in_dims);

	unsigned long del_flags = in_flags & ~out_flags;
	unsigned long new_flags = out_flags & ~in_flags;

	/* we double (again) for the gram matrix
	 */

	PTR_ALLOC(long[2 * N], mat_dims2);
	PTR_ALLOC(long[2 * N], in_dims2);
	PTR_ALLOC(long[2 * N], gmt_dims2);
	PTR_ALLOC(long[2 * N], gin_dims2);
	PTR_ALLOC(long[2 * N], grm_dims2);
	PTR_ALLOC(long[2 * N], gout_dims2);

	shadow_dims(N, *gmt_dims2, matrix_dims);
	shadow_dims(N, *mat_dims2, matrix_dims);
	shadow_dims(N, *in_dims2, in_dims);
	shadow_dims(N, *gout_dims2, in_dims);
	shadow_dims(N, *gin_dims2, in_dims);
	shadow_dims(N, *grm_dims2, matrix_dims);

	/* move removed input dims into shadow position
	 * for the gram matrix can have an output there
	 */
	for (unsigned int i = 0; i < N; i++) {

		if (MD_IS_SET(del_flags, i)) {

			assert((*mat_dims2)[2 * i + 0] == (*in_dims2)[2 * i + 0]);

			(*mat_dims2)[2 * i + 1] = (*mat_dims2)[2 * i + 0];
			(*mat_dims2)[2 * i + 0] = 1;

			(*in_dims2)[2 * i + 1] = (*gin_dims2)[2 * i + 0];
			(*in_dims2)[2 * i + 0] = 1;
		}
	}

	for (unsigned int i = 0; i < N; i++) {

		if (MD_IS_SET(new_flags, i)) {

			(*grm_dims2)[2 * i + 0] = 1;
			(*grm_dims2)[2 * i + 1] = 1;
		}

		if (MD_IS_SET(del_flags, i)) {

			(*gout_dims2)[2 * i + 1] = (*gin_dims2)[2 * i + 0];
			(*gout_dims2)[2 * i + 0] = 1;

			(*grm_dims2)[2 * i + 0] = in_dims[i];
			(*grm_dims2)[2 * i + 1] = in_dims[i];
		}
	}


	long gmx_dims[2 * N];
	md_tenmul_dims(2 * N, gmx_dims, *gout_dims2, *gin_dims2, *grm_dims2);

	long mult_mat = md_calc_size(N, max_dims);
	long mult_gram = md_calc_size(2 * N, gmx_dims);

	if (mult_gram < 2 * mult_mat) {	// FIXME: rethink

		debug_printf(DP_DEBUG2, "Gram matrix: 2x %ld vs %ld\n", mult_mat, mult_gram);

		complex float* mat_gram = md_alloc(2 * N, *grm_dims2, CFL_SIZE);

		md_ztenmulc(2 * N, *grm_dims2, mat_gram, *gmt_dims2, matrix, *mat_dims2, matrix);

		data->mat_gram = mat_gram;
	}

	PTR_FREE(gmt_dims2);
	PTR_FREE(mat_dims2);
	PTR_FREE(in_dims2);

	data->gin_dims = *PTR_PASS(gin_dims2);
	data->gout_dims = *PTR_PASS(gout_dims2);
	data->grm_dims = *PTR_PASS(grm_dims2);
#else
	data->gin_dims = NULL;
	data->gout_dims = NULL;
	data->grm_dims = NULL;
#endif

	return PTR_PASS(data);
}
Ejemplo n.º 30
0
Archivo: someops.c Proyecto: hcmh/bart
/**
 * Efficiently chain two matrix linops by multiplying the actual matrices together.
 * Stores a copy of the new matrix.
 * Returns: C = B A
 *
 * @param a first matrix (applied to input)
 * @param b second matrix (applied to output of first matrix)
 */
struct linop_s* linop_matrix_chain(const struct linop_s* a, const struct linop_s* b)
{
	const struct operator_matrix_s* a_data = CAST_DOWN(operator_matrix_s, linop_get_data(a));
	const struct operator_matrix_s* b_data = CAST_DOWN(operator_matrix_s, linop_get_data(b));

	// check compatibility
	assert(linop_codomain(a)->N == linop_domain(b)->N);
	assert(md_check_compat(linop_codomain(a)->N, 0u, linop_codomain(a)->dims, linop_domain(b)->dims));

	unsigned int D = linop_domain(a)->N;

	unsigned long outB_flags = md_nontriv_dims(D, linop_codomain(b)->dims);
	unsigned long inB_flags = md_nontriv_dims(D, linop_domain(b)->dims);

	unsigned long delB_flags = inB_flags & ~outB_flags;

	unsigned int N = a_data->N;
	assert(N == 2 * D);

	long in_dims[N];
	md_copy_dims(N, in_dims, a_data->in_dims);

	long matA_dims[N];
	md_copy_dims(N, matA_dims, a_data->mat_dims);

	long matB_dims[N];
	md_copy_dims(N, matB_dims, b_data->mat_dims);

	long out_dims[N];
	md_copy_dims(N, out_dims, b_data->out_dims);

	for (unsigned int i = 0; i < D; i++) {

		if (MD_IS_SET(delB_flags, i)) {

			matA_dims[2 * i + 0] = a_data->mat_dims[2 * i + 1];
			matA_dims[2 * i + 1] = a_data->mat_dims[2 * i + 0];

			in_dims[2 * i + 0] = a_data->in_dims[2 * i + 1];
			in_dims[2 * i + 1] = a_data->in_dims[2 * i + 0];
		}
	}


	long matrix_dims[N];
	md_singleton_dims(N, matrix_dims);

	unsigned long iflags = md_nontriv_dims(N, in_dims);
	unsigned long oflags = md_nontriv_dims(N, out_dims);
	unsigned long flags = iflags | oflags;

	// we combine a and b and sum over dims not in input or output

	md_max_dims(N, flags, matrix_dims, matA_dims, matB_dims);

	debug_printf(DP_DEBUG1, "tensor chain: %ld x %ld -> %ld\n",
			md_calc_size(N, matA_dims), md_calc_size(N, matB_dims), md_calc_size(N, matrix_dims));


	complex float* matrix = md_alloc(N, matrix_dims, CFL_SIZE);

	debug_print_dims(DP_DEBUG2, N, matrix_dims);
	debug_print_dims(DP_DEBUG2, N, in_dims);
	debug_print_dims(DP_DEBUG2, N, matA_dims);
	debug_print_dims(DP_DEBUG2, N, matB_dims);
	debug_print_dims(DP_DEBUG2, N, out_dims);

	md_ztenmul(N, matrix_dims, matrix, matA_dims, a_data->mat, matB_dims, b_data->mat);

	// priv2 takes our doubled dimensions

	struct operator_matrix_s* data = linop_matrix_priv2(N, out_dims, in_dims, matrix_dims, matrix);

	/* although we internally use different dimensions we define the
	 * correct interface
	 */
	struct linop_s* c = linop_create(linop_codomain(b)->N, linop_codomain(b)->dims,
			linop_domain(a)->N, linop_domain(a)->dims, CAST_UP(data),
			linop_matrix_apply, linop_matrix_apply_adjoint,
			linop_matrix_apply_normal, NULL, linop_matrix_del);

	md_free(matrix);

	return c;
}