Пример #1
0
int main()
{
	//Magma initialization
	magma_init();
	//Declaration of local variables
	double *a, *b, *dev_a, results=0;
	const int N=4098;
	int i,j;
	magma_int_t info=0, lda=N, ngpu=2;


	//Memory Allocation Segment
	magma_malloc_pinned((void**) &a,(N*N)*sizeof(double));
	magma_malloc_pinned((void**) &b,(N*N)*sizeof(double));

	
	//Generate two copies of Symmetric Positive Definite Matrix
	for(i=0;i<N;i++)
	{
		for(j=0;j<i;j++)
		{
			a[i*N+j] = 1e-9* (double)rand();
			a[j*N+i] = a[i*N+j];
			b[i*N+j] = a[i*N+j];
			b[j*N+i] = b[i*N+j];
		}
		a[i*N+i] = 1e-9*(double)rand() +  1000.0;
		b[i*N+i] = a[i*N+i];
	}

	//Call custom Magma Cholesky for obtaining results
	rr_dpotrf_m(ngpu,MagmaUpper,N,a,N,&info);

	//Call Standard Magma Cholesky for result validation
	magma_dpotrf(MagmaUpper,N,b,N,&info);
	if(info != 0)
	{
		printf("magma_dpotrf original returned error %d: %s. \n",(int) info, magma_strerror(info));
	}

	//Validate the results; Compute the RMS error value.
	for(i=0;i<N;i++)
		for(j=0;j<N;j++)
			results = results + (a[i*N+j] - b[i*N+j]) * (a[i*N+j] - b[i*N+j]);
	
	//Display the results of the test
	if(results < 1e-5)
		printf("The two functions have identical results\n");
	else
		printf("The custom function had significant errors. The RMS value was %g\n",results);
	magma_free_pinned(a);
	magma_free_pinned(b);
	magma_finalize();
	return 0;

}
Пример #2
0
SEXP magQR(SEXP a)
{
   SEXP gpu = GET_SLOT(a, install("gpu")),
        b = PROTECT(NEW_OBJECT(MAKE_CLASS("magmaQR")));
   int *DIMA = INTEGER(GET_DIM(a)), M = DIMA[0], N = DIMA[1],
       MIN_MN = (M < N ? M : N), NB = magma_get_dgeqrf_nb(M), *pivot, info;
   double *A, *tau;

   A = REAL(SET_VECTOR_ELT(b, 0, AS_NUMERIC(duplicate(a))));
   SET_VECTOR_ELT(b, 1, ScalarInteger(MIN_MN));
   tau = REAL(SET_VECTOR_ELT(b, 2, NEW_NUMERIC(MIN_MN)));
   pivot = INTEGER(SET_VECTOR_ELT(b, 3, NEW_INTEGER(N)));

   int i;
   for(i = 1; i <= N; i++) *pivot++ = i;

   if(LOGICAL_VALUE(gpu)) {
      int LENT = (2*MIN_MN + (N+31)/32*32)*NB;
      double *dA, *dT, *work;

      SET_SLOT(b, install("work"), NEW_NUMERIC(LENT));
      work = REAL(GET_SLOT(b, install("work")));

      magma_malloc((void**)&dA, (M*N)*sizeof(double));
      magma_malloc((void**)&dT, LENT*sizeof(double));

      magma_dsetmatrix(M, N, A, M, dA, M);
      magma_dgeqrf3_gpu(M, N, dA, M, tau, dT, &info);
      magma_dgetmatrix(M, N, dA, M, A, M);
      magma_dgetvector(LENT, dT, 1, work, 1);

      magma_free(dA);
      magma_free(dT);
   } else {
      int LWORK = N * NB;
      double *hA, *hwork;

      magma_malloc_pinned((void**)&hA, (M*N)*sizeof(double));
      magma_malloc_pinned((void**)&hwork, LWORK*sizeof(double));

      lapackf77_dlacpy(MagmaUpperLowerStr, &M, &N, A, &M, hA, &M);
      magma_dgeqrf_ooc(M, N, hA, M, tau, hwork, LWORK, &info);
      lapackf77_dlacpy(MagmaUpperLowerStr, &M, &N, hA, &M, A, &M);

      magma_free_pinned(hA);
      magma_free_pinned(hwork);
   }

   if(info < 0) error("illegal argument %d in 'magQR'", -1 * info);

   UNPROTECT(1);

   return b;
}
Пример #3
0
	K* hostmalloc(size_t size) {
		K* ptr;
		if(MAGMA_SUCCESS !=
		    magma_malloc_pinned( (void**)&ptr, size*sizeof(K) )) {
			std::cerr << "MAGMA ERROR: magma_malloc_pinned failed\n";
		    exit(-1);
		}
		return ptr;
	}
Пример #4
0
SEXP magChol(SEXP a)
{
   SEXP gpu = GET_SLOT(a, install("gpu")),
        b = PROTECT(NEW_OBJECT(MAKE_CLASS("magma")));
   int *DIMA = INTEGER(GET_DIM(a)), N = DIMA[0], N2 = N * N, LDB = N, info;
   double *B;

   if(DIMA[1] != N) error("non-square matrix");

   b = SET_SLOT(b, install(".Data"), AS_NUMERIC(a));
   SET_SLOT(b, install("gpu"), duplicate(gpu));
   B = REAL(b);
   
   if(LOGICAL_VALUE(gpu)) {
      double *dB;

      magma_malloc((void**)&dB, N2*sizeof(double));

      magma_dsetmatrix(N, N, B, LDB, dB, LDB);
      magma_dpotrf_gpu(magma_uplo_const('U'), N, dB, LDB, &info);
      magma_dgetmatrix(N, N, dB, LDB, B, LDB);

      magma_free(dB);
   } else {
      double *hB;

      magma_malloc_pinned((void**)&hB, N2*sizeof(double));
      lapackf77_dlacpy(MagmaUpperStr, &N, &N, B, &LDB, hB, &LDB);
      magma_dpotrf(magma_uplo_const('U'), N, hB, N, &info);
      lapackf77_dlacpy(MagmaUpperStr, &N, &N, hB, &LDB, B, &LDB);

      magma_free_pinned(hB);
   }

   if(info < 0) error("illegal argument %d in 'magChol", -1 * info);
   else if(info > 0) error("leading minor of order %d is not positive definite", info);

   int i, j;
   for(j = 0; j < N; j++) {
      for(i = j + 1; i < N; i++) {
         B[i + j * N] = 0.0;
      }
   }

   UNPROTECT(1);

   return b;
}
Пример #5
0
SEXP magLU(SEXP a)
{
   SEXP gpu = GET_SLOT(a, install("gpu")),
        b = PROTECT(NEW_OBJECT(MAKE_CLASS("magmaLU")));
   int *DIMA = INTEGER(GET_DIM(a)), M = DIMA[0], N = DIMA[1], LDA = M,
       MIN_MN = M < N ? M : N, *ipiv, info;
   double *A = REAL(PROTECT(AS_NUMERIC(a)));

   b = SET_SLOT(b, install(".Data"), AS_NUMERIC(a));
   SET_SLOT(b, install("pivot"), NEW_INTEGER(MIN_MN));
   ipiv = INTEGER(GET_SLOT(b, install("pivot")));
   SET_SLOT(b, install("gpu"), duplicate(gpu));

   if(LOGICAL_VALUE(gpu)) {
      double *dA;

      magma_malloc((void**)&dA, (M*N)*sizeof(double));

      magma_dsetmatrix(M, N, A, LDA, dA, LDA);
      magma_dgetrf_gpu(M, N, dA, LDA, ipiv, &info);
      magma_dgetmatrix(M, N, dA, LDA, REAL(b), LDA);

      magma_free(dA);
   } else {
      double *hA;

      magma_malloc_pinned((void**)&hA, (M*N)*sizeof(double));

      lapackf77_dlacpy(MagmaUpperLowerStr, &M, &N, A, &LDA, hA, &LDA);
      magma_dgetrf(M, N, hA, LDA, ipiv, &info);
      lapackf77_dlacpy(MagmaUpperLowerStr, &M, &N, hA, &LDA, REAL(b), &LDA);

      magma_free_pinned(hA);
   }

   if(info < 0) error("illegal argument %d in 'magLU'", -1 * info);
   else if(info > 0) error("factor U is singular");

   UNPROTECT(2);

   return b;
}
Пример #6
0
SEXP magmaCholeskyFinal_m(SEXP A, SEXP n, SEXP NB, SEXP zeroTri, SEXP ngpu, SEXP lowerTri)
{
	magma_init();
	int ndevices;
	double *h_R;
	
	ndevices = INTEGER_VALUE(ngpu);
        int idevice;
        for(idevice=0; idevice < ndevices; idevice++)
        {
                magma_setdevice(idevice);
                if(CUBLAS_STATUS_SUCCESS != cublasInit())
                {
                        printf("Error: gpu %d: cublasInit failed\n", idevice);
                        magma_finalize();
                        exit(-1);
                }
        }
//	magma_print_devices();
	
	int In, INB;
	In = INTEGER_VALUE(n);
	INB = INTEGER_VALUE(NB);
	double *PA = NUMERIC_POINTER(A);
	int i,j;

	//magma_timestr_t start, end;
	double gpu_time;
	printf("Inside magma_dpotrf_m");
	/*for(i = 0; i < 5; i++)
	{
		for(j = 0; j < 5; j++)
		{
			printf("%.8f ", PA[i+j*In]);
		}
		printf("\n");
	}	*/
	magma_int_t N, status, info, nGPU, n2, lda;
	clock_t t1, t2;
	N = In;
	status = 0;
	int nGPUs = ndevices;

        lda = N;
        n2 = lda*N;

	if ( MAGMA_SUCCESS != magma_malloc_pinned( (void**) &h_R, (n2)*sizeof(double) )) {      
        fprintf( stderr, "!!!! magma_malloc_pinned failed for: %s\n", h_R ); 
        magma_finalize();                                                  
        exit(-1);   
     	}

	lapackf77_dlacpy( MagmaUpperLowerStr, &N, &N, PA, &lda, h_R, &lda );
	//printf("Modified by Vinay in 2 GPU\n");
	//INB = magma_get_dpotrf_nb(N);
//	INB = 224;
//	printf("INB = %d\n", INB);
	//ngpu = ndevices;
//	printf("ngpu = %d\n", ngpu);
	//max_size = INB*(1+N/(INB*ndevices))*INB*((N+INB-1)/INB);
//	printf("max_size = %d\n", max_size);
	//int imax_size = max_size;
	//double *dA;
	//magma_dmalloc_pinned((void**)&dA, In*In*sizeof(double));
	
	//ldda = (1+N/(INB*ndevices))*INB;
//	printf("ldda = %d\n", ldda);
	//magma_dsetmatrix_1D_row_bcyclic(N, N, PA, N, dA, ldda, ngpu, INB);
	//magma_dpotrf_mgpu(ngpu, MagmaLower, N, dA, ldda, &info);
	int lTri;
	lTri = INTEGER_VALUE(lowerTri);
	if(lTri){
		t1 = clock();
		magma_dpotrf_m(nGPUs, MagmaLower, N, h_R, N, &info);
		t2 = clock ();
	}
	else{
		t1 = clock();
		magma_dpotrf_m(nGPUs, MagmaUpper, N, h_R, N, &info);
		t2 = clock ();
	}
	gpu_time = (double) (t2-t1)/(CLOCKS_PER_SEC) ; // Magma time
	printf (" magma_dpotrf_m time : %f sec. \n", gpu_time );
	if(info != 0)
	{
		printf("magma_dpotrf returned error %d: %s.\n", (int) info, magma_strerror(info));
	}
	
	//magma_dgetmatrix_1D_row_bcyclic(N, N, dA, ldda, PA, N, ngpu, INB);
	//for(dev = 0; dev < ndevices; dev++)
	//{
		//magma_setdevice(dev);
		//cudaFree(dA[dev]);
	//}
	lapackf77_dlacpy( MagmaUpperLowerStr, &N, &N, h_R, &lda, PA, &lda );
	magma_free_pinned(h_R);
	magma_finalize();
	cublasShutdown();

	int IZeroTri;
        IZeroTri = INTEGER_VALUE(zeroTri);
	if(IZeroTri & lTri) {
		for(i = 1; i < In; i++)
        	{
       			for(j=0; j< i; j++)
                	{
                       		PA[i*In+j] = 0.0;
                	}
        	}
	}
	else if(IZeroTri){
		for(i = 0; i < In; i++)
                {
                        for(j=i+1; j < In; j++)
                        {
                                PA[i*In+j] = 0.0;
                        }
                }
	}
	return(R_NilValue);
}
Пример #7
0
SEXP magmaCholeskyFinal(SEXP A, SEXP n, SEXP NB, SEXP id, SEXP zeroTri, SEXP lowerTri)
{
	magma_init();
//	magma_print_devices();
	
	double *h_R;
	int In, INB, ID;
	In = INTEGER_VALUE(n);
	INB = INTEGER_VALUE(NB);
	ID = INTEGER_VALUE(id);
	double *PA = NUMERIC_POINTER(A);
	int i,j;

	magma_int_t N, n2, lda, status, info, max_size;
	N=In;
   	lda = N;
   	n2 = lda*N;
  	

/*	for(i = 0; i < In; i++)
	{
		for(j = 0; j < In; j++)
		{
			printf("%.8f ", PA[i+j*In]);
		}
		printf("\n");
	}	*/

	if ( MAGMA_SUCCESS != magma_malloc_pinned( (void**) &h_R, (n2)*sizeof(double) )) {      
        fprintf( stderr, "!!!! magma_malloc_pinned failed for: %s\n", h_R ); 
        magma_finalize();                                                  
        exit(-1);   
     	}
        

        lapackf77_dlacpy( MagmaUpperLowerStr, &N, &N, PA, &lda, h_R, &lda );
	
	N = In;
	status = 0;
	magma_setdevice(ID);
	//printf("Modified by Vinay in one GPU\n");
	//INB = magma_get_dpotrf_nb(N);
//	INB = 224;
//	printf("INB = %d\n", INB);
	//ngpu = ndevices;
//	printf("ngpu = %d\n", ngpu);
	//max_size = INB*(1+N/(INB*ndevices))*INB*((N+INB-1)/INB);
//	printf("max_size = %d\n", max_size);
	//int imax_size = max_size;
	//double *dA;
	//magma_dmalloc_pinned((void**)&dA, In*In*sizeof(double));
	//ldda = (1+N/(INB*ndevices))*INB;
//	printf("ldda = %d\n", ldda);
	//magma_dsetmatrix_1D_row_bcyclic(N, N, PA, N, dA, ldda, ngpu, INB);
	//magma_dpotrf_mgpu(ngpu, MagmaLower, N, dA, ldda, &info);
	int lTri;
	lTri = INTEGER_VALUE(lowerTri);
	if(lTri)
		magma_dpotrf(MagmaLower, N, h_R, N, &info);
	else
		magma_dpotrf(MagmaUpper, N, h_R, N, &info);
	if(info != 0)
	{
		printf("magma_dpotrf returned error %d: %s.\n", (int) info, magma_strerror(info));
	}
		
	lapackf77_dlacpy( MagmaUpperLowerStr, &N, &N, h_R, &lda, PA, &lda );
	//magma_dgetmatrix_1D_row_bcyclic(N, N, dA, ldda, PA, N, ngpu, INB);
	//for(dev = 0; dev < ndevices; dev++)
	//{
		//magma_setdevice(dev);
		//cudaFree(dA[dev]);
	//}
	magma_free_pinned(h_R);
	magma_finalize();
	cublasShutdown();
	/*
	int IZeroTri;
        IZeroTri = INTEGER_VALUE(zeroTri);
	if(IZeroTri & lTri) {
		for(i = 1; i < In; i++)
        	{
       			for(j=0; j< i; j++)
                	{
                       		PA[i*In+j] = 0.0;
                	}
        	}
	}
	else if(IZeroTri)
		for(i = 0; i < In; i++)
                {
                        for(j=i+1; j < In; j++)
                        {
                                PA[i*In+j] = 0.0;
                        }
                }*/
	return(R_NilValue);
}