Exemplo n.º 1
0
void magmaf_dpotri_gpu(
    magma_uplo_t *uplo, magma_int_t *n,
    devptr_t *dA, magma_int_t *ldda,
    magma_int_t *info )
{
    magma_dpotri_gpu(
        *uplo, *n,
        magma_ddevptr(dA), *ldda,
        info );
}
Exemplo n.º 2
0
double *cholesky_gpu(double *ml, int m) {
 	magma_int_t mm = m*m;
 	magma_int_t info;
 	double *a;
 	double *d_a ;
 	magma_err_t err; 
 	err = magma_dmalloc_cpu ( &a , mm );
 	err = magma_dmalloc ( &d_a , mm );

 	magma_dsetmatrix ( m, m, ml, m, d_a , m );

 	magma_dpotrf_gpu('L',m,d_a,m,&info);
 	magma_dpotri_gpu('L',m,d_a,m,&info);

 	magma_dgetmatrix ( m, m, d_a , m, a, m );
 	magma_free (d_a );
 
 	return a;
}
Exemplo n.º 3
0
/* ////////////////////////////////////////////////////////////////////////////
   -- Testing dpotrf
*/
int main( int argc, char** argv)
{
    TESTING_INIT();

    real_Double_t   gflops, gpu_perf, gpu_time, cpu_perf, cpu_time;
    double *h_A, *h_R;
    double *d_A;
    double c_neg_one = MAGMA_D_NEG_ONE;
    magma_int_t N, n2, lda, ldda, info;
    magma_int_t ione     = 1;
    magma_int_t ISEED[4] = {0,0,0,1};
    double      work[1], error;
    magma_int_t status = 0;

    magma_opts opts;
    parse_opts( argc, argv, &opts );
    opts.lapack |= opts.check;  // check (-c) implies lapack (-l)
    
    double tol = opts.tolerance * lapackf77_dlamch("E");
    
    printf("    N   CPU GFlop/s (sec)   GPU GFlop/s (sec)   ||R||_F / ||A||_F\n");
    printf("=================================================================\n");
    for( int i = 0; i < opts.ntest; ++i ) {
        for( int iter = 0; iter < opts.niter; ++iter ) {
            N = opts.nsize[i];
            lda    = N;
            n2     = lda*N;
            ldda   = ((N+31)/32)*32;
            gflops = FLOPS_DPOTRI( N ) / 1e9;
            
            TESTING_MALLOC_CPU( h_A, double, n2 );
            TESTING_MALLOC_PIN( h_R, double, n2 );
            TESTING_MALLOC_DEV( d_A, double, ldda*N );
            
            /* Initialize the matrix */
            lapackf77_dlarnv( &ione, ISEED, &n2, h_A );
            magma_dmake_hpd( N, h_A, lda );
            lapackf77_dlacpy( MagmaUpperLowerStr, &N, &N, h_A, &lda, h_R, &lda );
            
            /* ====================================================================
               Performs operation using MAGMA
               =================================================================== */
            /* factorize matrix */
            magma_dsetmatrix( N, N, h_A, lda, d_A, ldda );
            magma_dpotrf_gpu( opts.uplo, N, d_A, ldda, &info );
            
            // check for exact singularity
            //magma_dgetmatrix( N, N, d_A, ldda, h_R, lda );
            //h_R[ 10 + 10*lda ] = MAGMA_D_MAKE( 0.0, 0.0 );
            //magma_dsetmatrix( N, N, h_R, lda, d_A, ldda );
            
            gpu_time = magma_wtime();
            magma_dpotri_gpu( opts.uplo, N, d_A, ldda, &info );
            gpu_time = magma_wtime() - gpu_time;
            gpu_perf = gflops / gpu_time;
            if (info != 0)
                printf("magma_dpotri_gpu returned error %d: %s.\n",
                       (int) info, magma_strerror( info ));
            
            /* =====================================================================
               Performs operation using LAPACK
               =================================================================== */
            if ( opts.lapack ) {
                lapackf77_dpotrf( &opts.uplo, &N, h_A, &lda, &info );
                
                cpu_time = magma_wtime();
                lapackf77_dpotri( &opts.uplo, &N, h_A, &lda, &info );
                cpu_time = magma_wtime() - cpu_time;
                cpu_perf = gflops / cpu_time;
                if (info != 0)
                    printf("lapackf77_dpotri returned error %d: %s.\n",
                           (int) info, magma_strerror( info ));
                
                /* =====================================================================
                   Check the result compared to LAPACK
                   =================================================================== */
                magma_dgetmatrix( N, N, d_A, ldda, h_R, lda );
                error = lapackf77_dlange("f", &N, &N, h_A, &lda, work);
                blasf77_daxpy(&n2, &c_neg_one, h_A, &ione, h_R, &ione);
                error = lapackf77_dlange("f", &N, &N, h_R, &lda, work) / error;
                printf("%5d   %7.2f (%7.2f)   %7.2f (%7.2f)   %8.2e%s\n",
                       (int) N, cpu_perf, cpu_time, gpu_perf, gpu_time,
                       error, (error < tol ? "" : "  failed") );
                status |= ! (error < tol);
            }
            else {
                printf("%5d     ---   (  ---  )   %7.2f (%7.2f)     ---\n",
                       (int) N, gpu_perf, gpu_time );
            }
            
            TESTING_FREE_CPU( h_A );
            TESTING_FREE_PIN( h_R );
            TESTING_FREE_DEV( d_A );
        }
        if ( opts.niter > 1 ) {
            printf( "\n" );
        }
    }

    TESTING_FINALIZE();
    return status;
}
Exemplo n.º 4
0
SEXP magma_dpoMatrix_solve(SEXP x)
{
#ifdef HIPLAR_WITH_MAGMA
    SEXP Chol = magma_dpoMatrix_chol(x);
    SEXP val = PROTECT(NEW_OBJECT(MAKE_CLASS("dpoMatrix")));
    int *dims = INTEGER(GET_SLOT(x, Matrix_DimSym)), info;

    SET_SLOT(val, Matrix_factorSym, allocVector(VECSXP, 0));
    slot_dup(val, Chol, Matrix_uploSym);
    slot_dup(val, Chol, Matrix_xSym);
    slot_dup(val, Chol, Matrix_DimSym);
    SET_SLOT(val, Matrix_DimNamesSym,
	     duplicate(GET_SLOT(x, Matrix_DimNamesSym)));
    double *A = REAL(GET_SLOT(val, Matrix_xSym));
		int N = *dims;	
		int lda = N;
		const char *uplo = uplo_P(val);
		
		if(GPUFlag == 0) {
			
			F77_CALL(dpotri)(uplo_P(val), dims, A, dims, &info);
		
		}
		else if(GPUFlag == 1 && Interface == 0) {
#ifdef HIPLAR_DBG
			R_ShowMessage("DBG: Solving using magma_dpotri");
#endif
			magma_dpotri(uplo[0], N, A, lda, &info);
		}
		else if(GPUFlag == 1 && Interface == 1){
			double *d_A;
			cublasStatus retStatus;
			cublasAlloc( N * lda , sizeof(double), (void**)&d_A);
#ifdef HIPLAR_DBG
			R_ShowMessage("DBG: Solving using magma_dpotri_gpu");
#endif		
			/* Error Checking */
			retStatus = cublasGetError ();
			if (retStatus != CUBLAS_STATUS_SUCCESS) 
				error(_("CUBLAS: Error in Memory Allocation"));
			/********************************************/

			cublasSetVector( N * lda, sizeof(double), A, 1, d_A, 1);

			/* Error Checking */
			retStatus = cublasGetError ();
			if (retStatus != CUBLAS_STATUS_SUCCESS) 
				error(_("CUBLAS: Error in Data Transfer to Device"));
			/********************************************/

			magma_dpotri_gpu(uplo[0], N, d_A, lda, &info);

			cublasGetVector(N * lda, sizeof(double), d_A, 1, val, 1);
			
			/* Error Checking */
			retStatus = cublasGetError ();
			if (retStatus != CUBLAS_STATUS_SUCCESS) 
				error(_("CUBLAS: Error in Data Transfer from Device"));
			/********************************************/
			
			cublasFree(d_A);
		}
		else
			error(_("MAGMA/LAPACK/Interface Flag not defined correctly"));
	
		if (info) {
			if(info > 0)
				error(_("the leading minor of order %d is not positive definite"),
						info);
			else /* should never happen! */
				error(_("Lapack routine %s returned error code %d"), "dpotrf", info);
		}
		
		UNPROTECT(1);
    return val;
#endif
	return R_NilValue;
}
Exemplo n.º 5
0
/* ////////////////////////////////////////////////////////////////////////////
   -- Testing dpotrf
*/
int main( int argc, char** argv) 
{
    TESTING_CUDA_INIT();

    magma_timestr_t  start, end;
    double      flops, gpu_perf, cpu_perf;
    double *h_A, *h_R;
    double *d_A;
    magma_int_t N = 0, n2, lda, ldda;
    magma_int_t size[10] = {1024,2048,3072,4032,5184,6016,7040,8064,9088,10112};
    
    magma_int_t i, info;
    const char *uplo     = MagmaUpperStr;
    double c_neg_one = MAGMA_D_NEG_ONE;
    magma_int_t ione     = 1;
    magma_int_t ISEED[4] = {0,0,0,1};
    double      work[1], matnorm;
    
    if (argc != 1){
        for(i = 1; i<argc; i++){        
            if (strcmp("-N", argv[i])==0)
                N = atoi(argv[++i]);
        }
        if (N>0) size[0] = size[9] = N;
        else exit(1);
    }
    else {
        printf("\nUsage: \n");
        printf("  testing_dpotri_gpu -N %d\n\n", 1024);
    }

    /* Allocate host memory for the matrix */
    n2   = size[9] * size[9];
    ldda = ((size[9]+31)/32) * 32;
    TESTING_MALLOC(    h_A, double, n2);
    TESTING_HOSTALLOC( h_R, double, n2);
    TESTING_DEVALLOC(  d_A, double, ldda*size[9] );

    printf("  N    CPU GFlop/s    GPU GFlop/s    ||R||_F / ||A||_F\n");
    printf("========================================================\n");
    for(i=0; i<10; i++){
        N   = size[i];
        lda = N; 
        n2  = lda*N;
        flops = FLOPS_DPOTRI( (double)N ) / 1000000;
        
        ldda = ((N+31)/32)*32;

        /* Initialize the matrix */
        lapackf77_dlarnv( &ione, ISEED, &n2, h_A );
        /* Symmetrize and increase the diagonal */
        {
            magma_int_t i, j;
            for(i=0; i<N; i++) {
                MAGMA_D_SET2REAL( h_A[i*lda+i], ( MAGMA_D_REAL(h_A[i*lda+i]) + 1.*N ) );
                for(j=0; j<i; j++)
                    h_A[i*lda+j] = (h_A[j*lda+i]);
            }
        }
        lapackf77_dlacpy( MagmaUpperLowerStr, &N, &N, h_A, &lda, h_R, &lda );

        /* ====================================================================
           Performs operation using MAGMA 
           =================================================================== */
        //cublasSetMatrix( N, N, sizeof(double), h_A, lda, d_A, ldda);
        //magma_dpotrf_gpu(uplo[0], N, d_A, ldda, &info);

        /* factorize matrix */
        magma_dsetmatrix( N, N, h_A, lda, d_A, ldda );
        magma_dpotrf_gpu(uplo[0], N, d_A, ldda, &info);
        
        // check for exact singularity
        //magma_dgetmatrix( N, N, d_A, ldda, h_R, lda );
        //h_R[ 10 + 10*lda ] = MAGMA_D_MAKE( 0.0, 0.0 );
        //magma_dsetmatrix( N, N, h_R, lda, d_A, ldda );
        
        start = get_current_time();
        magma_dpotri_gpu(uplo[0], N, d_A, ldda, &info);
        end = get_current_time();
        if (info != 0)
            printf("magma_dpotri_gpu returned error %d\n", (int) info);

        gpu_perf = flops / GetTimerValue(start, end);
        
        /* =====================================================================
           Performs operation using LAPACK 
           =================================================================== */
        lapackf77_dpotrf(uplo, &N, h_A, &lda, &info);
        
        start = get_current_time();
        lapackf77_dpotri(uplo, &N, h_A, &lda, &info);
        end = get_current_time();
        if (info != 0)
            printf("lapackf77_dpotri returned error %d\n", (int) info);
        
        cpu_perf = flops / GetTimerValue(start, end);
      
        /* =====================================================================
           Check the result compared to LAPACK
           =================================================================== */
        magma_dgetmatrix( N, N, d_A, ldda, h_R, lda );
        matnorm = lapackf77_dlange("f", &N, &N, h_A, &lda, work);
        blasf77_daxpy(&n2, &c_neg_one, h_A, &ione, h_R, &ione);
        printf("%5d    %6.2f         %6.2f        %e\n", 
               (int) size[i], cpu_perf, gpu_perf,
               lapackf77_dlange("f", &N, &N, h_R, &lda, work) / matnorm);
        
        if (argc != 1)
            break;
    }

    /* Memory clean up */
    TESTING_FREE( h_A );
    TESTING_HOSTFREE( h_R );
    TESTING_DEVFREE( d_A );

    /* Shutdown */
    TESTING_CUDA_FINALIZE();
}