Пример #1
0
int main_transpose(int argc, char* argv[])
{
	mini_cmdline(argc, argv, 4, usage_str, help_str);

	int N = DIMS;
	long idims[N];

	int dim1 = atoi(argv[1]);
	int dim2 = atoi(argv[2]);

	assert((0 <= dim1) && (dim1 < N));
	assert((0 <= dim2) && (dim2 < N));

	complex float* idata = load_cfl(argv[3], N, idims);

	long odims[N];
	md_transpose_dims(N, dim1, dim2, odims, idims);

	complex float* odata = create_cfl(argv[4], N, odims);

	md_transpose(N, dim1, dim2, odims, odata, idims, idata, sizeof(complex float));

	unmap_cfl(N, idims, idata);
	unmap_cfl(N, odims, odata);
	exit(0);
}
Пример #2
0
static bool test_md_transpose(void)
{
	enum { N = 4 };
	long dims[N] = { 10, 10, 10, 10 };

	complex float* a = md_alloc(N, dims, sizeof(complex float));

	md_gaussian_rand(N, dims, a);

	complex float* b = md_alloc(N, dims, sizeof(complex float));
	complex float* c = md_alloc(N, dims, sizeof(complex float));

	md_transpose(N, 0, 2, dims, b, dims, a, sizeof(complex float));
	md_transpose(N, 0, 2, dims, c, dims, b, sizeof(complex float));

	bool eq = md_compare(N, dims, a, c, sizeof(complex float));

	md_free(a);
	md_free(b);
	md_free(c);

	return eq;
}
Пример #3
0
static double bench_transpose(long scale)
{
	long dims[DIMS] = { 2000 * scale, 2000 * scale, 1, 1, 1, 1, 1, 1 };

	complex float* x = md_alloc(DIMS, dims, CFL_SIZE);
	complex float* y = md_alloc(DIMS, dims, CFL_SIZE);
	
	md_gaussian_rand(DIMS, dims, x);
	md_clear(DIMS, dims, y, CFL_SIZE);

	double tic = timestamp();

	md_transpose(DIMS, 0, 1, dims, y, dims, x, CFL_SIZE);

	double toc = timestamp();

	md_free(x);
	md_free(y);
	
	return toc - tic;
}
Пример #4
0
static void linop_matrix_apply_normal(const linop_data_t* _data, complex float* dst, const complex float* src)
{
	const struct operator_matrix_s* data = CAST_DOWN(operator_matrix_s, _data);

	unsigned int N = data->mat_iovec->N;
	// FIXME check all the cases where computation can be done with blas
	
	//debug_printf(DP_DEBUG1, "compute normal\n");
	if (cgemm_forward_standard(data)) {

		long max_dims_gram[N];
		md_copy_dims(N, max_dims_gram, data->domain_iovec->dims);
		max_dims_gram[data->T_dim] = data->K;

		long tmp_dims[N];
		long tmp_str[N];
		md_copy_dims(N, tmp_dims, max_dims_gram);
		tmp_dims[data->K_dim] = 1;
		md_calc_strides(N, tmp_str, tmp_dims, CFL_SIZE);

		complex float* tmp = md_alloc_sameplace(N, data->domain_iovec->dims, CFL_SIZE, dst);

		md_clear(N, data->domain_iovec->dims, tmp, CFL_SIZE);
		md_zfmac2(N, max_dims_gram, tmp_str, tmp, data->domain_iovec->strs, src, data->mat_gram_iovec->strs, data->mat_gram);
		md_transpose(N, data->T_dim, data->K_dim, data->domain_iovec->dims, dst, tmp_dims, tmp, CFL_SIZE);

		md_free(tmp);

	} else {

		long L = md_calc_size(data->T_dim, data->domain_iovec->dims);

		blas_cgemm('N', 'T', L, data->K, data->K, 1.,
				L, (const complex float (*)[])src,
				data->K, (const complex float (*)[])data->mat_gram,
				0., L, (complex float (*)[])dst);
	}

}
Пример #5
0
/**
 * Compute the Gram matrix, A^H A.
 * Stores the result in @param gram, which is allocated by the function
 * Returns: iovec_s corresponding to the gram matrix dimensions
 *
 * @param N number of dimensions
 * @param T_dim dimension corresponding to the rows of A
 * @param T number of rows of A (codomain)
 * @param K_dim dimension corresponding to the columns of A
 * @param K number of columns of A (domain)
 * @param gram store the result (allocated by this function)
 * @param matrix_dims dimensions of A
 * @param matrix matrix data
 */
const struct iovec_s* compute_gram_matrix(unsigned int N, unsigned int T_dim, unsigned int T, unsigned int K_dim, unsigned int K, complex float** gram, const long matrix_dims[N], const complex float* matrix)
{
	// FIXME this can certainly be simplfied...
	// Just be careful to consider the case where the data passed to the operator is a subset of a bigger array
	

	// B_dims = [T K 1]  or  [K T 1]
	// C_dims = [T 1 K]  or  [1 T K]
	// A_dims = [1 K K]  or  [K 1 K]
	// after: gram_dims = [1 K1 K2] --> [K2 K1 1]  or  [K1 1 K2] --> [K1 K2 1]

	long A_dims[N + 1];
	long B_dims[N + 1];
	long C_dims[N + 1];
	long fake_gram_dims[N + 1];

	long A_str[N + 1];
	long B_str[N + 1];
	long C_str[N + 1];
	long max_dims[N + 1];

	md_singleton_dims(N + 1, A_dims);
	md_singleton_dims(N + 1, B_dims);
	md_singleton_dims(N + 1, C_dims);
	md_singleton_dims(N + 1, fake_gram_dims);
	md_singleton_dims(N + 1, max_dims);

	A_dims[K_dim] = K;
	A_dims[N] = K;

	B_dims[T_dim] = T;
	B_dims[K_dim] = K;

	C_dims[T_dim] = T;
	C_dims[N] = K;

	max_dims[T_dim] = T;
	max_dims[K_dim] = K;
	max_dims[N] = K;

	fake_gram_dims[T_dim] = K;
	fake_gram_dims[K_dim] = K;

	md_calc_strides(N + 1, A_str, A_dims, CFL_SIZE);
	md_calc_strides(N + 1, B_str, B_dims, CFL_SIZE);
	md_calc_strides(N + 1, C_str, C_dims, CFL_SIZE);

	complex float* tmpA = md_alloc_sameplace(N + 1 , A_dims, CFL_SIZE, matrix);
	complex float* tmpB = md_alloc_sameplace(N + 1, B_dims, CFL_SIZE, matrix);
	complex float* tmpC = md_alloc_sameplace(N + 1, C_dims, CFL_SIZE, matrix);

	md_copy(N, matrix_dims, tmpB, matrix, CFL_SIZE);
	//md_copy(N, matrix_dims, tmpC, matrix, CFL_SIZE);

	md_transpose(N + 1, K_dim, N, C_dims, tmpC, B_dims, tmpB, CFL_SIZE);
	md_clear(N + 1, A_dims, tmpA, CFL_SIZE);
	md_zfmacc2(N + 1, max_dims, A_str, tmpA, B_str, tmpB, C_str, tmpC);

	*gram = md_alloc_sameplace(N, fake_gram_dims, CFL_SIZE, matrix);
	md_transpose(N + 1, T_dim, N, fake_gram_dims, *gram, A_dims, tmpA, CFL_SIZE); 


	const struct iovec_s* s =  iovec_create(N, fake_gram_dims, CFL_SIZE);

	md_free(tmpA);
	md_free(tmpB);
	md_free(tmpC);

	return s;
}