Exemplos de md_gpu_move em C++ (Cpp)

Exemplo n.º 1

0

Exibir arquivo

Arquivo: wavelet.c Projeto: hcmh/bart

void fwt1(unsigned int N, unsigned int d, const long dims[N], const long ostr[N], complex float* low, complex float* hgh, const long istr[N], const complex float* in, const long flen, const float filter[2][2][flen])
{
	debug_printf(DP_DEBUG4, "fwt1: %d/%d\n", d, N);
	debug_print_dims(DP_DEBUG4, N, dims);

	assert(dims[d] >= 2);

	long odims[N];
	md_copy_dims(N, odims, dims);
	odims[d] = bandsize(dims[d], flen);

	debug_print_dims(DP_DEBUG4, N, odims);

	long o = d + 1;
	long u = N - o;

	// 0 1 2 3 4 5 6|7
	// --d-- * --u--|N
	// ---o---

	assert(d == md_calc_blockdim(d, dims + 0, istr + 0, CFL_SIZE));
	assert(u == md_calc_blockdim(u, dims + o, istr + o, CFL_SIZE * md_calc_size(o, dims)));

	assert(d == md_calc_blockdim(d, odims + 0, ostr + 0, CFL_SIZE));
	assert(u == md_calc_blockdim(u, odims + o, ostr + o, CFL_SIZE * md_calc_size(o, odims)));

	// merge dims

	long wdims[3] = { md_calc_size(d, dims), dims[d], md_calc_size(u, dims + o) };
	long wistr[3] = { CFL_SIZE, istr[d], CFL_SIZE * md_calc_size(o, dims) };
	long wostr[3] = { CFL_SIZE, ostr[d], CFL_SIZE * md_calc_size(o, odims) };

#ifdef  USE_CUDA
	if (cuda_ondevice(in)) {

		assert(cuda_ondevice(low));
		assert(cuda_ondevice(hgh));

		float* flow = md_gpu_move(1, MD_DIMS(flen), filter[0][0], FL_SIZE);
		float* fhgh = md_gpu_move(1, MD_DIMS(flen), filter[0][1], FL_SIZE);

		wl3_cuda_down3(wdims, wostr, low, wistr, in, flen, flow);
		wl3_cuda_down3(wdims, wostr, hgh, wistr, in, flen, fhgh);

		md_free(flow);
		md_free(fhgh);
		return;
	}
#endif

	// no clear needed
	wavelet_down3(wdims, wostr, low, wistr, in, flen, filter[0][0]);
	wavelet_down3(wdims, wostr, hgh, wistr, in, flen, filter[0][1]);
}

Exemplo n.º 2

0

Exibir arquivo

Arquivo: wavelet.c Projeto: hcmh/bart

void iwt1(unsigned int N, unsigned int d, const long dims[N], const long ostr[N], complex float* out, const long istr[N], const complex float* low, const complex float* hgh, const long flen, const float filter[2][2][flen])
{
	debug_printf(DP_DEBUG4, "ifwt1: %d/%d\n", d, N);
	debug_print_dims(DP_DEBUG4, N, dims);

	assert(dims[d] >= 2);

	long idims[N];
	md_copy_dims(N, idims, dims);
	idims[d] = bandsize(dims[d], flen);

	debug_print_dims(DP_DEBUG4, N, idims);

	long o = d + 1;
	long u = N - o;

	// 0 1 2 3 4 5 6|7
	// --d-- * --u--|N
	// ---o---

	assert(d == md_calc_blockdim(d, dims + 0, ostr + 0, CFL_SIZE));
	assert(u == md_calc_blockdim(u, dims + o, ostr + o, CFL_SIZE * md_calc_size(o, dims)));
	assert(d == md_calc_blockdim(d, idims + 0, istr + 0, CFL_SIZE));
	assert(u == md_calc_blockdim(u, idims + o, istr + o, CFL_SIZE * md_calc_size(o, idims)));

	long wdims[3] = { md_calc_size(d, dims), dims[d], md_calc_size(u, dims + o) };
	long wistr[3] = { CFL_SIZE, istr[d], CFL_SIZE * md_calc_size(o, idims) };
	long wostr[3] = { CFL_SIZE, ostr[d], CFL_SIZE * md_calc_size(o, dims) };

	md_clear(3, wdims, out, CFL_SIZE);	// we cannot clear because we merge outputs

#ifdef  USE_CUDA
	if (cuda_ondevice(out)) {

		assert(cuda_ondevice(low));
		assert(cuda_ondevice(hgh));

		float* flow = md_gpu_move(1, MD_DIMS(flen), filter[1][0], FL_SIZE);
		float* fhgh = md_gpu_move(1, MD_DIMS(flen), filter[1][1], FL_SIZE);

		wl3_cuda_up3(wdims, wostr, out, wistr, low, flen, flow);
		wl3_cuda_up3(wdims, wostr, out, wistr, hgh, flen, fhgh);

		md_free(flow);
		md_free(fhgh);
		return;
	}
#endif

	wavelet_up3(wdims, wostr, out, wistr, low, flen, filter[1][0]);
	wavelet_up3(wdims, wostr, out, wistr, hgh, flen, filter[1][1]);
}

Exemplo n.º 3

0

Exibir arquivo

Arquivo: someops.c Projeto: hcmh/bart

static void linop_matrix_apply_normal(const linop_data_t* _data, complex float* dst, const complex float* src)
{
	struct operator_matrix_s* data = CAST_DOWN(operator_matrix_s, _data);

	if (NULL == data->mat_gram) {

		complex float* tmp = md_alloc_sameplace(data->N, data->out_dims, CFL_SIZE, src);

		linop_matrix_apply(_data, tmp, src);
		linop_matrix_apply_adjoint(_data, dst, tmp);

		md_free(tmp);

	} else {

		const complex float* mat_gram = data->mat_gram;
#ifdef USE_CUDA
		if (cuda_ondevice(src)) {

			if (NULL == data->mat_gram_gpu)
				data->mat_gram_gpu = md_gpu_move(2 * data->N, data->grm_dims, data->mat_gram, CFL_SIZE);

			mat_gram = data->mat_gram_gpu;
		}
#endif
		md_ztenmul(2 * data->N, data->gout_dims, dst, data->gin_dims, src, data->grm_dims, mat_gram);
	}
}

Exemplo n.º 4

0

Exibir arquivo

Arquivo: sampling.c Projeto: mrirecon/bart

static const complex float* get_pat(const struct sampling_data_s* data, bool gpu)
{
	const complex float* pattern = data->pattern;

	if (gpu) {

		if (NULL == data->gpu_pattern)
			((struct sampling_data_s*)data)->gpu_pattern = md_gpu_move(DIMS, data->pat_dims, data->pattern, CFL_SIZE);

		pattern = data->gpu_pattern;
	}

	return pattern;
}

Exemplo n.º 5

0

Exibir arquivo

Arquivo: fmac.c Projeto: mrirecon/bart

static const complex float* get_tensor(const struct fmac_data* data, bool gpu)
{
	const complex float* tensor = data->tensor;

	if (gpu) {

		if (NULL == data->gpu_tensor)
			((struct fmac_data*)data)->gpu_tensor = md_gpu_move(data->N, data->tdims, data->tensor, CFL_SIZE);

		tensor = data->gpu_tensor;
	}

	return tensor;
}

Exemplo n.º 6

0

Exibir arquivo

Arquivo: someops.c Projeto: welcheb/bart

static void cdiag_adjoint(const linop_data_t* _data, complex float* dst, const complex float* src)
{
	const struct cdiag_s* data = CAST_DOWN(cdiag_s, _data);

	const complex float* diag = data->diag;
#ifdef USE_CUDA
	if (cuda_ondevice(src)) {

		if (NULL == data->gpu_diag)
			((struct cdiag_s*)data)->gpu_diag = md_gpu_move(data->N, data->dims, data->diag, CFL_SIZE);

		diag = data->gpu_diag;
	}
#endif
	(data->rmul ? md_zrmul2 : md_zmulc2)(data->N, data->dims, data->strs, dst, data->strs, src, data->dstrs, diag);
}

Exemplo n.º 7

0

Exibir arquivo

Arquivo: someops.c Projeto: hcmh/bart

static void linop_matrix_apply_adjoint(const linop_data_t* _data, complex float* dst, const complex float* src)
{
	struct operator_matrix_s* data = CAST_DOWN(operator_matrix_s, _data);
	const complex float* mat = data->mat;

#ifdef USE_CUDA
	if (cuda_ondevice(src)) {

		if (NULL == data->mat_gpu)
			data->mat_gpu = md_gpu_move(data->N, data->mat_dims, data->mat, CFL_SIZE);

		mat = data->mat_gpu;
	}
#endif

	md_ztenmulc(data->N, data->in_dims, dst, data->out_dims, src, data->mat_dims, mat);
}

Exemplo n.º 8

0

Exibir arquivo

Arquivo: model.c Projeto: cbasasif/bart

struct noir_data* noir_init(const long dims[DIMS], const complex float* mask, const complex float* psf, bool rvc, bool use_gpu)
{
#ifdef USE_CUDA
	md_alloc_fun_t my_alloc = use_gpu ? md_alloc_gpu : md_alloc;
#else
	assert(!use_gpu);
	md_alloc_fun_t my_alloc = md_alloc;
#endif

	PTR_ALLOC(struct noir_data, data);


	data->rvc = rvc;

	md_copy_dims(DIMS, data->dims, dims);

	md_select_dims(DIMS, FFT_FLAGS|COIL_FLAG|CSHIFT_FLAG, data->sign_dims, dims);
	md_calc_strides(DIMS, data->sign_strs, data->sign_dims, CFL_SIZE);

	md_select_dims(DIMS, FFT_FLAGS|COIL_FLAG|MAPS_FLAG, data->coil_dims, dims);
	md_calc_strides(DIMS, data->coil_strs, data->coil_dims, CFL_SIZE);

	md_select_dims(DIMS, FFT_FLAGS|MAPS_FLAG|CSHIFT_FLAG, data->imgs_dims, dims);
	md_calc_strides(DIMS, data->imgs_strs, data->imgs_dims, CFL_SIZE);

	md_select_dims(DIMS, FFT_FLAGS|COIL_FLAG, data->data_dims, dims);
	md_calc_strides(DIMS, data->data_strs, data->data_dims, CFL_SIZE);

	md_select_dims(DIMS, FFT_FLAGS, data->mask_dims, dims);
	md_calc_strides(DIMS, data->mask_strs, data->mask_dims, CFL_SIZE);

	md_select_dims(DIMS, FFT_FLAGS, data->wght_dims, dims);
	md_calc_strides(DIMS, data->wght_strs, data->wght_dims, CFL_SIZE);

	md_select_dims(DIMS, FFT_FLAGS|CSHIFT_FLAG, data->ptrn_dims, dims);
	md_calc_strides(DIMS, data->ptrn_strs, data->ptrn_dims, CFL_SIZE);


	complex float* weights = md_alloc(DIMS, data->wght_dims, CFL_SIZE);

	noir_calc_weights(dims, weights);
	fftmod(DIMS, data->wght_dims, FFT_FLAGS, weights, weights);
	fftscale(DIMS, data->wght_dims, FFT_FLAGS, weights, weights);

	data->weights = weights;

#ifdef USE_CUDA
	if (use_gpu) {

		data->weights = md_gpu_move(DIMS, data->wght_dims, weights, CFL_SIZE);
	}
#endif


	complex float* ptr = my_alloc(DIMS, data->ptrn_dims, CFL_SIZE);

	md_copy(DIMS, data->ptrn_dims, ptr, psf, CFL_SIZE);
	fftmod(DIMS, data->ptrn_dims, FFT_FLAGS, ptr, ptr);

	data->pattern = ptr;

	complex float* msk = my_alloc(DIMS, data->mask_dims, CFL_SIZE);

	if (NULL == mask) {

		assert(!use_gpu);
		md_zfill(DIMS, data->mask_dims, msk, 1.);

	} else {

		md_copy(DIMS, data->mask_dims, msk, mask, CFL_SIZE);
	}

//	fftmod(DIMS, data->mask_dims, 7, msk, msk);
	fftscale(DIMS, data->mask_dims, FFT_FLAGS, msk, msk);

	data->mask = msk;

	data->sens = my_alloc(DIMS, data->coil_dims, CFL_SIZE);
	data->xn = my_alloc(DIMS, data->imgs_dims, CFL_SIZE);
	data->tmp = my_alloc(DIMS, data->sign_dims, CFL_SIZE);

	return data;
}