Пример #1
0
/**
 * Generic functions which loops over all dimensions of a set of
 * multi-dimensional arrays and calls a given function for each position.
 * This functions tries to parallelize over the dimensions indicated
 * with flags.
 */
void md_parallel_nary(unsigned int C, unsigned int D, const long dim[D], unsigned long flags, const long* str[C], void* ptr[C], void* data, md_nary_fun_t fun)
{
	if (0 == flags) {

		md_nary(C, D, dim, str, ptr, data, fun);
		return;
	}

	int b = ffsl(flags & -flags) - 1;
	assert(MD_IS_SET(flags, b));

	flags = MD_CLEAR(flags, b);

	long dimc[D];
	md_select_dims(D, ~MD_BIT(b), dimc, dim);

	debug_printf(DP_DEBUG4, "Parallelize: %d\n", dim[b]);

	// FIXME: this probably doesn't nest
	// (maybe collect all parallelizable dims into one giant loop?)
	#pragma omp parallel for
	for (long i = 0; i < dim[b]; i++) {

		void* moving_ptr[C];

		for (unsigned int j = 0; j < C; j++)
			moving_ptr[j] = ptr[j] + i * str[j][b];

		md_parallel_nary(C, D, dimc, flags, str, moving_ptr, data, fun);
	}
}
Пример #2
0
Файл: tv.c Проект: andruw17/bart
void tv_adjoint(unsigned int D, const long dims[D], unsigned int flags, complex float* out, const complex float* in)
{
	unsigned int N = bitcount(flags);

	assert(N == dims[D - 1]);	// we use the highest dim to store our different partial derivatives

	unsigned int flags2 = flags;

	complex float* tmp = md_alloc_sameplace(D - 1, dims, CFL_SIZE, out);

	md_clear(D - 1, dims, out, CFL_SIZE);
	md_clear(D - 1, dims, tmp, CFL_SIZE);

	for (unsigned int i = 0; i < N; i++) {

		unsigned int lsb = ffs(flags2) - 1;
		flags2 = MD_CLEAR(flags2, lsb);

		md_zfdiff_backwards(D - 1, dims, lsb, tmp, in + i * md_calc_size(D - 1, dims));
	
		md_zadd(D - 1, dims, out, out, tmp);
	}

	md_free(tmp);

	assert(0 == flags2);
}
Пример #3
0
static long wavelet_filter_flags(unsigned int N, long flags, const long dims[N], const long min[N])
{
	for (unsigned int i = 0; i < N; i++)
		if (dims[i] < min[i])	// CHECK
			flags = MD_CLEAR(flags, i);

	return flags;
}
Пример #4
0
/**
 * compute set of parallelizable dimensions
 *
 */
static unsigned int parallelizable(unsigned int D, unsigned int io, unsigned int N, const long dims[N], long (*strs[D])[N], size_t size[D])
{
	// we assume no input / output overlap
	// (i.e. inputs which are also outputs have to be marked as output)

	// a dimension is parallelizable if all output operations
	// for that dimension are independent

	// for all output operations:
	// check - all other dimensions have strides greater or equal
	// the extend of this dimension or have an extend smaller or
	// equal the stride of this dimension

	// no overlap: [222]
	//                   [111111111111]
	//                                [333333333]
	//    overlap: [222]
	//		     [1111111111111111]
	//                                [333333333]

	unsigned int flags = (1 << N) - 1;

	for (unsigned int d = 0; d < D; d++) {

		if (MD_IS_SET(io, d)) {

			bool m[N][N];
			compute_enclosures(N, m, dims, *strs[d]);

	//		print_dims(N, dims);
	//		print_dims(N, *strs[d]);

			for (unsigned int i = 0; i < N; i++) {

				unsigned int a = 0;

				for (unsigned int j = 0; j < N; j++)
					if (m[i][j] || m[j][i])
						a++;

	//			printf("%d %d %d\n", d, i, a);

				if ((a != N - 1) || ((size_t)labs((*strs[d])[i]) < size[d]))
					flags = MD_CLEAR(flags, i);
			}
		}
	}

	return flags;
}
Пример #5
0
Файл: fft.c Проект: hcmh/bart
static void fftmod2_r(unsigned int N, const long dims[N], unsigned long flags, const long ostrs[N], complex float* dst, const long istrs[N], const complex float* src, bool inv, double phase)
{
	if (0 == flags) {

		md_zsmul2(N, dims, ostrs, dst, istrs, src, cexp(M_PI * 2.i * (inv ? -phase : phase)));
		return;
	}


	/* this will also currently be slow on the GPU because we do not
	 * support strides there on the lowest level */

	unsigned int i = N - 1;
	while (!MD_IS_SET(flags, i))
		i--;

#if 1
	// If there is only one dimensions left and it is the innermost
	// which is contiguous optimize using md_zfftmod2

	if ((0u == MD_CLEAR(flags, i)) && (1 == md_calc_size(i, dims))
		&& (CFL_SIZE == ostrs[i]) && (CFL_SIZE == istrs[i])) {

		md_zfftmod2(N - i, dims + i, ostrs + i, dst, istrs + i, src, inv, phase);
		return;
	}
#endif

	long tdims[N];
	md_select_dims(N, ~MD_BIT(i), tdims, dims);

	#pragma omp parallel for
	for (int j = 0; j < dims[i]; j++)
		fftmod2_r(N, tdims, MD_CLEAR(flags, i),
			ostrs, (void*)dst + j * ostrs[i], istrs, (void*)src + j * istrs[i],
			inv, phase + fftmod_phase(dims[i], j));
}
Пример #6
0
Файл: tv.c Проект: andruw17/bart
void tv_op(unsigned int D, const long dims[D], unsigned int flags, complex float* out, const complex float* in)
{
	unsigned int N = bitcount(flags);

	assert(N == dims[D - 1]);	// we use the highest dim to store our different partial derivatives

	unsigned int flags2 = flags;

	for (unsigned int i = 0; i < N; i++) {

		unsigned int lsb = ffs(flags2) - 1;
		flags2 = MD_CLEAR(flags2, lsb);

		md_zfdiff(D - 1, dims, lsb, out + i * md_calc_size(D - 1, dims), in);
	}

	assert(0 == flags2);
}
Пример #7
0
// FIXME: consider moving this to a more accessible location?
static void wthresh(unsigned int D, const long dims[D], float lambda, unsigned int flags, complex float* out, const complex float* in)
{
	long minsize[D];
	md_singleton_dims(D, minsize);

	long course_scale[3] = MD_INIT_ARRAY(3, 16);
	md_copy_dims(3, minsize, course_scale);

	unsigned int wflags = 7; // FIXME

	for (unsigned int i = 0; i < 3; i++)
		if (dims[i] < minsize[i])
			wflags = MD_CLEAR(wflags, i);

	long strs[D];
	md_calc_strides(D, strs, dims, CFL_SIZE);

	const struct linop_s* w = linop_wavelet_create(D, wflags, dims, strs, minsize, false);
	const struct operator_p_s* p = prox_unithresh_create(D, w, lambda, flags);

	operator_p_apply(p, 1., D, dims, out, D, dims, in);

	operator_p_free(p);
}
Пример #8
0
static unsigned long clear_singletons(unsigned int N, const long dims[N], unsigned long flags)
{
       return (0 == N) ? flags : clear_singletons(N - 1, dims, (1 == dims[N - 1]) ? MD_CLEAR(flags, N - 1) : flags);
}
int main_homodyne(int argc, char* argv[])
{
	bool clear = false;
	const char* phase_ref = NULL;

	int com;
	while (-1 != (com = getopt(argc, argv, "hCP:"))) {

		switch (com) {

		case 'C':
			clear = true;
			break;

		case 'P':
			phase_ref = strdup(optarg);
			break;

		case 'h':
			help(argv[0], stdout);
			exit(0);

		default:
			help(argv[0], stderr);
			exit(1);
		}
	}

	if (argc - optind != 4) {
		usage(argv[0], stderr);
		exit(1);
	}

	const int N = DIMS;
	long dims[N];
	complex float* idata = load_cfl(argv[optind + 2], N, dims);
	complex float* data = create_cfl(argv[optind + 3], N, dims);

	int pfdim = atoi(argv[optind + 0]);
	float frac = atof(argv[optind + 1]);

	assert((0 <= pfdim) && (pfdim < N));
	assert(frac > 0.);


	long strs[N];
	md_calc_strides(N, strs, dims, CFL_SIZE);

	struct wdata wdata;
	wdata.frac = frac;
	wdata.pfdim = pfdim;
	md_select_dims(N, MD_BIT(pfdim), wdata.wdims, dims);
	md_calc_strides(N, wdata.wstrs, wdata.wdims, CFL_SIZE);
	wdata.weights = md_alloc(N, wdata.wdims, CFL_SIZE);

	md_loop(N, wdata.wdims, &wdata, comp_weights);

	long pstrs[N];
	long pdims[N];
	complex float* phase = NULL;

	if (NULL == phase_ref) {

		phase = estimate_phase(wdata, FFT_FLAGS, N, dims, idata);
		md_copy_dims(N, pdims, dims);
	}
	else
		phase = load_cfl(phase_ref, N, pdims);

	md_calc_strides(N, pstrs, pdims, CFL_SIZE);

	complex float* cdata = NULL;
	complex float* idata2 = NULL;

	if (clear) {

		long cdims[N];
		md_select_dims(N, ~MD_BIT(pfdim), cdims, dims);
		cdims[pfdim] = (int)(dims[pfdim] * frac);

		cdata = md_alloc(N, cdims, CFL_SIZE);
		idata2 = anon_cfl(NULL, N, dims);

		md_resize(N, cdims, cdata, dims, idata, CFL_SIZE);
		md_resize(N, dims, idata2, cdims, cdata, CFL_SIZE);

		md_free(cdata);
		unmap_cfl(N, dims, idata);
		idata = idata2;

	}


	if ((1 == dims[PHS2_DIM]) || (PHS2_DIM == pfdim)) {

		homodyne(wdata, FFT_FLAGS, N, dims, strs, data, idata, pstrs, phase);

	} else {

		unsigned int pardim = PHS2_DIM;

		ifftuc(N, dims, MD_CLEAR(FFT_FLAGS, pfdim), data, idata);

		long rdims[N];
		md_select_dims(N, ~MD_BIT(pardim), rdims, dims);
		long rstrs[N];
		md_calc_strides(N, rstrs, rdims, CFL_SIZE);

#pragma 	omp parallel for
		for (unsigned int i = 0; i < dims[pardim]; i++) {

			complex float* tmp = md_alloc(N, rdims, CFL_SIZE);
			long pos[N];
			md_set_dims(N, pos, 0);
			pos[pardim] = i;

			md_copy_block(N, pos, rdims, tmp, dims, data, CFL_SIZE);
			homodyne(wdata, MD_BIT(pfdim), N, rdims, rstrs, tmp, tmp, pstrs, phase);
			md_copy_block(N, pos, dims, data, rdims, tmp, CFL_SIZE);
			md_free(tmp);
		}
	}

	md_free(wdata.weights);
	if (NULL == phase_ref)
		md_free(phase);
	else {
		unmap_cfl(N, pdims, phase);
		free((void*)phase_ref);
	}

	unmap_cfl(N, dims, idata);
	unmap_cfl(N, dims, data);

	exit(0);
}
Пример #10
0
int main_homodyne(int argc, char* argv[])
{
	mini_cmdline(argc, argv, 4, usage_str, help_str);

	const int N = DIMS;
	long dims[N];
	complex float* idata = load_cfl(argv[3], N, dims);
	complex float* data = create_cfl(argv[4], N, dims);

	int pfdim = atoi(argv[1]);
	float frac = atof(argv[2]);

	assert((0 <= pfdim) && (pfdim < N));
	assert(frac > 0.);


	long strs[N];
	md_calc_strides(N, strs, dims, CFL_SIZE);

	struct wdata wdata;
	wdata.frac = frac;
	wdata.pfdim = pfdim;
	md_select_dims(N, MD_BIT(pfdim), wdata.wdims, dims);
	md_calc_strides(N, wdata.wstrs, wdata.wdims, CFL_SIZE);
	wdata.weights = md_alloc(N, wdata.wdims, CFL_SIZE);

	md_loop(N, wdata.wdims, &wdata, comp_weights);

	if ((1 == dims[PHS2_DIM]) || (PHS2_DIM == pfdim)) {

		homodyne(wdata, FFT_FLAGS, N, dims, strs, data, idata);

	} else {

		unsigned int pardim = PHS2_DIM;

		ifftuc(N, dims, MD_CLEAR(FFT_FLAGS, pfdim), data, idata);

		long rdims[N];
		md_select_dims(N, ~MD_BIT(pardim), rdims, dims);
		long rstrs[N];
		md_calc_strides(N, rstrs, rdims, CFL_SIZE);

#pragma 	omp parallel for
		for (unsigned int i = 0; i < dims[pardim]; i++) {

			complex float* tmp = md_alloc(N, rdims, CFL_SIZE);
			long pos[N];
			md_set_dims(N, pos, 0);
			pos[pardim] = i;

			md_copy_block(N, pos, rdims, tmp, dims, data, CFL_SIZE);
			homodyne(wdata, MD_BIT(pfdim), N, rdims, rstrs, tmp, tmp);
			md_copy_block(N, pos, dims, data, rdims, tmp, CFL_SIZE);
			md_free(tmp);
		}
	}

	md_free(wdata.weights);

	unmap_cfl(N, dims, idata);
	unmap_cfl(N, dims, data);

	exit(0);
}