Esempio n. 1
0
static __inline  cuDoubleComplex cuCfma(cuDoubleComplex x, cuDoubleComplex y, cuDoubleComplex d)
{
  double real_res;
  double imag_res;
  real_res = cuCreal(x) * cuCreal(y) + cuCreal(d);
  imag_res = cuCreal(x) * cuCimag(y) + cuCimag(d);
  real_res =  -(cuCimag(x) * cuCimag(y)) + real_res;
  imag_res = cuCimag(x) * cuCreal(y) + imag_res;
  return make_cuDoubleComplex(real_res, imag_res);
}
Esempio n. 2
0
void CuBlasMatrix::set(int row, int col, std::complex<double> e)
{
    Q_ASSERT(row < rows);
    Q_ASSERT(col < cols);
    if(deviceDataChanged)
        updateHostData();
    cuDoubleComplex tmp = make_cuDoubleComplex(e.real(), e.imag());
    hostRaw[col * rows + row] = tmp;
    hostDataChanged = true;
}
Esempio n. 3
0
//Generates a sparse matrix in CSR format
//Taken from the CUSPARSE sample project
void genTridiag(int *I, int *J, cuDoubleComplex *val, int N, int nz)
{
	I[0] = 0, J[0] = 0, J[1] = 1;
	val[0] = make_cuDoubleComplex(rand() / RAND_MAX + 10.0f, rand() / RAND_MAX + 10.0f);
	val[1] = make_cuDoubleComplex(rand() / RAND_MAX, rand() / RAND_MAX);
	int start;

	for (int i = 1; i < N; i++)
	{
		if (i > 1)
		{
			I[i] = I[i - 1] + 3;
		}
		else
		{
			I[1] = 2;
		}

		start = (i - 1) * 3 + 2;
		J[start] = i - 1;
		J[start + 1] = i;

		if (i < N - 1)
		{
			J[start + 2] = i + 1;
		}

		val[start] = val[start - 1];
		val[start + 1] = make_cuDoubleComplex(rand() / RAND_MAX + 10.0f, rand() / RAND_MAX + 10.0f);

		if (i < N - 1)
		{
			val[start + 2] = make_cuDoubleComplex(rand() / RAND_MAX, rand() / RAND_MAX);
		}
	}

	I[N] = nz;
}
Esempio n. 4
0
static __inline  cuDoubleComplex cuCdiv(cuDoubleComplex x, cuDoubleComplex y)
{
  cuDoubleComplex quot;
  double s = fabs(cuCreal(y)) + fabs(cuCimag(y));
  double oos = 1.00000000000000000000000000000000000000000000000000000e+00 / s;
  double ars = cuCreal(x) * oos;
  double ais = cuCimag(x) * oos;
  double brs = cuCreal(y) * oos;
  double bis = cuCimag(y) * oos;
  s = brs * brs + bis * bis;
  oos = 1.00000000000000000000000000000000000000000000000000000e+00 / s;
  quot = make_cuDoubleComplex((ars * brs + ais * bis) * oos, (ais * brs - ars * bis) * oos);
  return quot;
}
Esempio n. 5
0
int _tmain(int argc, _TCHAR* argv[])
{
	int M = 0, N = 0, nz = 0, *I = NULL, *J = NULL;
	cuDoubleComplex *val = NULL;
	cuDoubleComplex *x, *y;
	cuDoubleComplex *d_x, *d_y;
	double duration, duration_setup;


	std::clock_t setup_clock;
	setup_clock = std::clock();
	// This will pick the best possible CUDA capable device
	cudaDeviceProp deviceProp;
	int devID = findCudaDevice(argc, (const char **)argv);

	if (devID < 0)
	{
		printf("no devices found...\n");
		exit(EXIT_SUCCESS);
	}

	checkCudaErrors(cudaGetDeviceProperties(&deviceProp, devID));

	// Statistics about the GPU device
	printf("> GPU device has %d Multi-Processors, SM %d.%d compute capabilities\n\n",
		deviceProp.multiProcessorCount, deviceProp.major, deviceProp.minor);

	int version = (deviceProp.major * 0x10 + deviceProp.minor);

	if (version < 0x11)
	{
		printf("Requires a minimum CUDA compute 1.1 capability\n");
		cudaDeviceReset();
		exit(EXIT_SUCCESS);
	}

	M = N = 8388608; //2 ^ 23
	//M = N = 4194304; //2 ^ 22
	//M = N = 2097152; //2 ^ 21
	//M = N = 1048576; //2 ^ 20
	//M = N = 524288; //2 ^ 19

	nz = N * 8;
	I = (int *)malloc(sizeof(int)*(N + 1));
	J = (int *)malloc(sizeof(int)*nz);
	val = (cuDoubleComplex *)malloc(sizeof(cuDoubleComplex)*nz);
	genTridiag(I, J, val, N, nz);

	x = (cuDoubleComplex*)malloc(sizeof(cuDoubleComplex)* N);
	y = (cuDoubleComplex*)malloc(sizeof(cuDoubleComplex)* N);

	//create an array for the answer array (Y) and set all of the answers to 0 for the test (could do random)
	for (int i = 0; i < N; i++)
	{
		y[i] = make_cuDoubleComplex(0.0, 0.0);
	}

	//Get handle to the CUBLAS context
	cublasHandle_t cublasHandle = 0;
	cublasStatus_t cublasStatus;
	cublasStatus = cublasCreate(&cublasHandle);

	checkCudaErrors(cublasStatus);

	//Get handle to the CUSPARSE context
	cusparseHandle_t cusparseHandle = 0;
	cusparseStatus_t cusparseStatus;
	cusparseStatus = cusparseCreate(&cusparseHandle);

	checkCudaErrors(cusparseStatus);

	//Get handle to a CUSPARSE matrix descriptor
	cusparseMatDescr_t descr = 0;
	cusparseStatus = cusparseCreateMatDescr(&descr);

	checkCudaErrors(cusparseStatus);

	//Get handle to a matrix_solve_info object
	cusparseSolveAnalysisInfo_t info = 0;
	cusparseStatus = cusparseCreateSolveAnalysisInfo(&info);

	checkCudaErrors(cusparseStatus);

	cusparseSetMatType(descr, CUSPARSE_MATRIX_TYPE_GENERAL);
	cusparseSetMatIndexBase(descr, CUSPARSE_INDEX_BASE_ZERO);

	duration_setup = (std::clock() - setup_clock) / (double)CLOCKS_PER_SEC;
	printf("setup_time: %f\r\n", duration_setup);

	std::clock_t start;
	start = std::clock();
	checkCudaErrors(cudaMalloc((void **)&d_x, N*sizeof(float)));
	checkCudaErrors(cudaMalloc((void **)&d_y, N*sizeof(float)));

	cudaMemcpy(d_x, x, N*sizeof(float), cudaMemcpyHostToDevice);
	cudaMemcpy(d_y, y, N*sizeof(float), cudaMemcpyHostToDevice);

	//Analyze the matrix. The info variable is needed to perform additional operations on the matrix
	cusparseStatus = cusparseZcsrsv_analysis(cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, N, nz, descr, val, J, I, info);
	//Uses infor gathered from the matrix to solve the matrix.
	cusparseStatus = cusparseZcsrsv_solve(cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, N, 0, descr, val, J, I, info, d_x, d_y);

	//Get the result back from the device
	cudaMemcpy(x, d_x, N*sizeof(float), cudaMemcpyDeviceToHost);
	cudaMemcpy(y, d_y, N*sizeof(float), cudaMemcpyDeviceToHost);

	duration = (std::clock() - start) / (double)CLOCKS_PER_SEC;
	printf("time ellapsed: %f", duration);

	//free up memory
	cusparseDestroy(cusparseHandle);
	cublasDestroy(cublasHandle);
	free(I);
	free(J);
	free(val);
	free(x);
	cudaFree(d_x);
	cudaDeviceReset();

	//Wait for user input so they can see the results
	char* s = (char*)malloc(sizeof(char) * 8);
	scanf(s);

	exit(0);
}
Esempio n. 6
0
__SDH__ cdouble make_cdouble(double x, double y) { return make_cuDoubleComplex(x, y); }
Esempio n. 7
0
__SDH__ cdouble make_cdouble(cfloat    c) { return make_cuDoubleComplex(c.x,c.y);   }
Esempio n. 8
0
__SDH__ cdouble make_cdouble(double    x) { return make_cuDoubleComplex(x,0);       }
Esempio n. 9
0
__SDH__ cdouble make_cdouble(unsigned  x) { return make_cuDoubleComplex(x,0);       }
Esempio n. 10
0
static __inline  cuDoubleComplex cuComplexFloatToDouble(cuFloatComplex c)
{
  return make_cuDoubleComplex((double)cuCrealf(c), (double)cuCimagf(c));
}
Esempio n. 11
0
static __inline  cuDoubleComplex cuCmul(cuDoubleComplex x, cuDoubleComplex y)
{
  cuDoubleComplex prod;
  prod = make_cuDoubleComplex(cuCreal(x) * cuCreal(y) - cuCimag(x) * cuCimag(y), cuCreal(x) * cuCimag(y) + cuCimag(x) * cuCreal(y));
  return prod;
}
Esempio n. 12
0
static __inline  cuDoubleComplex cuCsub(cuDoubleComplex x, cuDoubleComplex y)
{
  return make_cuDoubleComplex(cuCreal(x) - cuCreal(y), cuCimag(x) - cuCimag(y));
}
Esempio n. 13
0
static __inline  cuDoubleComplex cuConj(cuDoubleComplex x)
{
  return make_cuDoubleComplex(cuCreal(x),  -cuCimag(x));
}
Esempio n. 14
0
__SDH__ cdouble make_cdouble(cfloat c) {
    return make_cuDoubleComplex(static_cast<double>(c.x), c.y);
}
Esempio n. 15
0
__SDH__ cdouble make_cdouble(double x) {
    return make_cuDoubleComplex(static_cast<double>(x), 0);
}