Esempio n. 1
0
void magmaf_dgetri_gpu(
    magma_int_t *n,
    devptr_t *dA, magma_int_t *ldda,
    magma_int_t *ipiv,
    devptr_t *dwork, magma_int_t *lwork,
    magma_int_t *info )
{
    magma_dgetri_gpu(
        *n,
        magma_ddevptr(dA), *ldda,
        ipiv,
        magma_ddevptr(dwork), *lwork,
        info );
}
Esempio n. 2
0
/* ////////////////////////////////////////////////////////////////////////////
   -- Testing dgetrf
*/
int main( int argc, char** argv )
{
    TESTING_INIT();

    real_Double_t   gflops, gpu_perf, gpu_time, cpu_perf, cpu_time;
    double *h_A, *h_R, *work;
    magmaDouble_ptr d_A, dwork;
    double c_neg_one = MAGMA_D_NEG_ONE;
    magma_int_t N, n2, lda, ldda, info, lwork, ldwork;
    magma_int_t ione     = 1;
    magma_int_t ISEED[4] = {0,0,0,1};
    double tmp;
    double error, rwork[1];
    magma_int_t *ipiv;
    magma_int_t status = 0;
    
    magma_opts opts;
    parse_opts( argc, argv, &opts );
    opts.lapack |= opts.check;  // check (-c) implies lapack (-l)
    
    // need looser bound (3000*eps instead of 30*eps) for tests
    // TODO: should compute ||I - A*A^{-1}|| / (n*||A||*||A^{-1}||)
    opts.tolerance = max( 3000., opts.tolerance );
    double tol = opts.tolerance * lapackf77_dlamch("E");
    
    printf("    N   CPU GFlop/s (sec)   GPU GFlop/s (sec)   ||R||_F / (N*||A||_F)\n");
    printf("=================================================================\n");
    for( int itest = 0; itest < opts.ntest; ++itest ) {
        for( int iter = 0; iter < opts.niter; ++iter ) {
            N = opts.nsize[itest];
            lda    = N;
            n2     = lda*N;
            ldda   = ((N+31)/32)*32;
            ldwork = N * magma_get_dgetri_nb( N );
            gflops = FLOPS_DGETRI( N ) / 1e9;
            
            // query for workspace size
            lwork = -1;
            lapackf77_dgetri( &N, NULL, &lda, NULL, &tmp, &lwork, &info );
            if (info != 0)
                printf("lapackf77_dgetri returned error %d: %s.\n",
                       (int) info, magma_strerror( info ));
            lwork = int( MAGMA_D_REAL( tmp ));
            
            TESTING_MALLOC_CPU( ipiv,  magma_int_t,        N      );
            TESTING_MALLOC_CPU( work,  double, lwork  );
            TESTING_MALLOC_CPU( h_A,   double, n2     );
            
            TESTING_MALLOC_PIN( h_R,   double, n2     );
            
            TESTING_MALLOC_DEV( d_A,   double, ldda*N );
            TESTING_MALLOC_DEV( dwork, double, ldwork );
            
            /* Initialize the matrix */
            lapackf77_dlarnv( &ione, ISEED, &n2, h_A );
            error = lapackf77_dlange( "f", &N, &N, h_A, &lda, rwork );  // norm(A)
            
            /* Factor the matrix. Both MAGMA and LAPACK will use this factor. */
            magma_dsetmatrix( N, N, h_A, lda, d_A, 0, ldda, opts.queue );
            magma_dgetrf_gpu( N, N, d_A, 0, ldda, ipiv, opts.queue, &info );
            magma_dgetmatrix( N, N, d_A, 0, ldda, h_A, lda, opts.queue );
            if ( info != 0 )
                printf("magma_dgetrf_gpu returned error %d: %s.\n",
                       (int) info, magma_strerror( info ));
            
            // check for exact singularity
            //h_A[ 10 + 10*lda ] = MAGMA_D_MAKE( 0.0, 0.0 );
            //magma_dsetmatrix( N, N, h_A, lda, d_A, 0, ldda, opts.queue );
            
            /* ====================================================================
               Performs operation using MAGMA
               =================================================================== */
            gpu_time = magma_wtime();
            magma_dgetri_gpu( N, d_A, 0, ldda, ipiv, dwork, 0, ldwork, opts.queues2, &info );
            gpu_time = magma_wtime() - gpu_time;
            gpu_perf = gflops / gpu_time;
            if (info != 0)
                printf("magma_dgetri_gpu returned error %d: %s.\n",
                       (int) info, magma_strerror( info ));
            
            magma_dgetmatrix( N, N, d_A, 0, ldda, h_R, lda, opts.queue );
            
            /* =====================================================================
               Performs operation using LAPACK
               =================================================================== */
            if ( opts.lapack ) {
                cpu_time = magma_wtime();
                lapackf77_dgetri( &N, h_A, &lda, ipiv, work, &lwork, &info );
                cpu_time = magma_wtime() - cpu_time;
                cpu_perf = gflops / cpu_time;
                if (info != 0)
                    printf("lapackf77_dgetri returned error %d: %s.\n",
                           (int) info, magma_strerror( info ));
                
                /* =====================================================================
                   Check the result compared to LAPACK
                   =================================================================== */
                blasf77_daxpy( &n2, &c_neg_one, h_A, &ione, h_R, &ione );
                error = lapackf77_dlange( "f", &N, &N, h_R, &lda, rwork ) / (N*error);
                
                printf( "%5d   %7.2f (%7.2f)   %7.2f (%7.2f)   %8.2e   %s\n",
                        (int) N, cpu_perf, cpu_time, gpu_perf, gpu_time,
                        error, (error < tol ? "ok" : "failed"));
                status += ! (error < tol);
            }
            else {
                printf( "%5d     ---   (  ---  )   %7.2f (%7.2f)     ---\n",
                        (int) N, gpu_perf, gpu_time );
            }
            
            TESTING_FREE_CPU( ipiv  );
            TESTING_FREE_CPU( work  );
            TESTING_FREE_CPU( h_A   );
            
            TESTING_FREE_PIN( h_R   );
            
            TESTING_FREE_DEV( d_A   );
            TESTING_FREE_DEV( dwork );
            fflush( stdout );
        }
        if ( opts.niter > 1 ) {
            printf( "\n" );
        }
    }

    TESTING_FINALIZE();
    return status;
}
Esempio n. 3
0
/* ////////////////////////////////////////////////////////////////////////////
   -- Testing dgetri
*/
int main( int argc, char** argv )
{
    TESTING_INIT();

    // constants
    const double c_zero    = MAGMA_D_ZERO;
    const double c_one     = MAGMA_D_ONE;
    const double c_neg_one = MAGMA_D_NEG_ONE;
    
    real_Double_t   gflops, gpu_perf, gpu_time, cpu_perf, cpu_time;
    double *h_A, *h_Ainv, *h_R, *work;
    magmaDouble_ptr d_A, dwork;
    magma_int_t N, n2, lda, ldda, info, lwork, ldwork;
    magma_int_t ione     = 1;
    magma_int_t ISEED[4] = {0,0,0,1};
    double tmp;
    double error, rwork[1];
    magma_int_t *ipiv;
    magma_int_t status = 0;
    
    magma_opts opts;
    opts.parse_opts( argc, argv );
    
    double tol = opts.tolerance * lapackf77_dlamch("E");
    
    printf("%%   N   CPU Gflop/s (sec)   GPU Gflop/s (sec)   ||I - A*A^{-1}||_1 / (N*cond(A))\n");
    printf("%%===============================================================================\n");
    for( int itest = 0; itest < opts.ntest; ++itest ) {
        for( int iter = 0; iter < opts.niter; ++iter ) {
            N = opts.nsize[itest];
            lda    = N;
            n2     = lda*N;
            ldda   = magma_roundup( N, opts.align );  // multiple of 32 by default
            ldwork = N * magma_get_dgetri_nb( N );
            gflops = FLOPS_DGETRI( N ) / 1e9;
            
            // query for workspace size
            lwork = -1;
            lapackf77_dgetri( &N, NULL, &lda, NULL, &tmp, &lwork, &info );
            if (info != 0) {
                printf("lapackf77_dgetri returned error %d: %s.\n",
                       (int) info, magma_strerror( info ));
            }
            lwork = magma_int_t( MAGMA_D_REAL( tmp ));
            
            TESTING_MALLOC_CPU( ipiv,   magma_int_t,        N      );
            TESTING_MALLOC_CPU( work,   double, lwork  );
            TESTING_MALLOC_CPU( h_A,    double, n2     );
            TESTING_MALLOC_CPU( h_Ainv, double, n2     );
            TESTING_MALLOC_CPU( h_R,    double, n2     );
            
            TESTING_MALLOC_DEV( d_A,    double, ldda*N );
            TESTING_MALLOC_DEV( dwork,  double, ldwork );
            
            /* Initialize the matrix */
            lapackf77_dlarnv( &ione, ISEED, &n2, h_A );
            
            /* Factor the matrix. Both MAGMA and LAPACK will use this factor. */
            magma_dsetmatrix( N, N, h_A, lda, d_A, ldda, opts.queue );
            magma_dgetrf_gpu( N, N, d_A, ldda, ipiv, &info );
            magma_dgetmatrix( N, N, d_A, ldda, h_Ainv, lda, opts.queue );
            if (info != 0) {
                printf("magma_dgetrf_gpu returned error %d: %s.\n",
                       (int) info, magma_strerror( info ));
            }
            
            // check for exact singularity
            //h_Ainv[ 10 + 10*lda ] = MAGMA_D_MAKE( 0.0, 0.0 );
            //magma_dsetmatrix( N, N, h_Ainv, lda, d_A, ldda, opts.queue );
            
            /* ====================================================================
               Performs operation using MAGMA
               =================================================================== */
            gpu_time = magma_wtime();
            magma_dgetri_gpu( N, d_A, ldda, ipiv, dwork, ldwork, &info );
            gpu_time = magma_wtime() - gpu_time;
            gpu_perf = gflops / gpu_time;
            if (info != 0) {
                printf("magma_dgetri_gpu returned error %d: %s.\n",
                       (int) info, magma_strerror( info ));
            }
            
            /* =====================================================================
               Performs operation using LAPACK
               =================================================================== */
            if ( opts.lapack ) {
                cpu_time = magma_wtime();
                lapackf77_dgetri( &N, h_Ainv, &lda, ipiv, work, &lwork, &info );
                cpu_time = magma_wtime() - cpu_time;
                cpu_perf = gflops / cpu_time;
                if (info != 0) {
                    printf("lapackf77_dgetri returned error %d: %s.\n",
                           (int) info, magma_strerror( info ));
                }
                printf( "%5d   %7.2f (%7.2f)   %7.2f (%7.2f)",
                        (int) N, cpu_perf, cpu_time, gpu_perf, gpu_time );
            }
            else {
                printf( "%5d     ---   (  ---  )   %7.2f (%7.2f)",
                        (int) N, gpu_perf, gpu_time );
            }
            
            /* =====================================================================
               Check the result
               =================================================================== */
            if ( opts.check ) {
                magma_dgetmatrix( N, N, d_A, ldda, h_Ainv, lda, opts.queue );
                
                // compute 1-norm condition number estimate, following LAPACK's zget03
                double normA, normAinv, rcond;
                normA    = lapackf77_dlange( "1", &N, &N, h_A,    &lda, rwork );
                normAinv = lapackf77_dlange( "1", &N, &N, h_Ainv, &lda, rwork );
                if ( normA <= 0 || normAinv <= 0 ) {
                    rcond = 0;
                    error = 1 / (tol/opts.tolerance);  // == 1/eps
                }
                else {
                    rcond = (1 / normA) / normAinv;
                    // R = I
                    // R -= A*A^{-1}
                    // err = ||I - A*A^{-1}|| / ( N ||A||*||A^{-1}|| ) = ||R|| * rcond / N, using 1-norm
                    lapackf77_dlaset( "full", &N, &N, &c_zero, &c_one, h_R, &lda );
                    blasf77_dgemm( "no", "no", &N, &N, &N,
                                   &c_neg_one, h_A,    &lda,
                                               h_Ainv, &lda,
                                   &c_one,     h_R,    &lda );
                    error = lapackf77_dlange( "1", &N, &N, h_R, &lda, rwork );
                    error = error * rcond / N;
                }
                
                bool okay = (error < tol);
                status += ! okay;
                printf( "   %8.2e   %s\n",
                        error, (okay ? "ok" : "failed"));
            }
            else {
                printf( "\n" );
            }
            
            TESTING_FREE_CPU( ipiv   );
            TESTING_FREE_CPU( work   );
            TESTING_FREE_CPU( h_A    );
            TESTING_FREE_CPU( h_Ainv );
            TESTING_FREE_CPU( h_R    );
            
            TESTING_FREE_DEV( d_A    );
            TESTING_FREE_DEV( dwork  );
            fflush( stdout );
        }
        if ( opts.niter > 1 ) {
            printf( "\n" );
        }
    }

    opts.cleanup();
    TESTING_FINALIZE();
    return status;
}
Esempio n. 4
0
SEXP magma_dgeMatrix_solve(SEXP a)
{
#ifdef HIPLAR_WITH_MAGMA
    /*  compute the 1-norm of the matrix, which is needed
	later for the computation of the reciprocal condition number. */
    double aNorm = magma_get_norm(a, "1");

    /* the LU decomposition : */
		/* Given that we may be performing this operation
		 * on the GPU we may put in an optimisation here
		 * where if we call the LU solver we, we do not require
		 * the decomposition to be transferred back to CPU. This is TODO
		 */
    SEXP val = PROTECT(NEW_OBJECT(MAKE_CLASS("dgeMatrix"))),
	lu = magma_dgeMatrix_LU_(a, TRUE);
    int *dims = INTEGER(GET_SLOT(lu, Matrix_DimSym)),
	*pivot = INTEGER(GET_SLOT(lu, Matrix_permSym));

    /* prepare variables for the dgetri calls */
    double *x, tmp;
    int	info, lwork = -1;

    if (dims[0] != dims[1]) error(_("Solve requires a square matrix"));
    slot_dup(val, lu, Matrix_xSym);
    x = REAL(GET_SLOT(val, Matrix_xSym));
    slot_dup(val, lu, Matrix_DimSym);
		int N2 = dims[0] * dims[0];

    if(dims[0]) /* the dimension is not zero */
    {
			/* is the matrix is *computationally* singular ? */
			double rcond;
			F77_CALL(dgecon)("1", dims, x, dims, &aNorm, &rcond,
					(double *) R_alloc(4*dims[0], sizeof(double)),
					(int *) R_alloc(dims[0], sizeof(int)), &info);
			if (info)
				error(_("error [%d] from Lapack 'dgecon()'"), info);
			if(rcond < DOUBLE_EPS)
				error(_("Lapack dgecon(): system computationally singular, reciprocal condition number = %g"),
						rcond);

			/* only now try the inversion and check if the matrix is *exactly* singular: */
			// This is also a work space query. This is not an option in magma

			F77_CALL(dgetri)(dims, x, dims, pivot, &tmp, &lwork, &info);
			lwork = (int) tmp;
			
			if( GPUFlag == 0){
				

				F77_CALL(dgetri)(dims, x, dims, pivot,
						(double *) R_alloc((size_t) lwork, sizeof(double)),
						&lwork, &info);

#ifdef HIPLAR_DBG
				R_ShowMessage("DBG: Solve using LU using dgetri;");
#endif
			}
			else if(GPUFlag == 1) {
				
				double *d_x, *dwork; 
				cublasStatus retStatus;			
	
#ifdef HIPLAR_DBG
				R_ShowMessage("Solve using LU using magma_dgetri;");
#endif
				cublasAlloc(N2, sizeof(double), (void**)&d_x);

				//cublasAlloc(N2 , sizeof(double), (void**)&dtmp);
				/* Error Checking */
				retStatus = cublasGetError ();
				if (retStatus != CUBLAS_STATUS_SUCCESS) 
					error(_("CUBLAS: Error in Memory Allocation on Device"));
				/********************************************/

				cublasSetVector( N2, sizeof(double), x, 1, d_x, 1);

				/* Error Checking */
				retStatus = cublasGetError ();
				if (retStatus != CUBLAS_STATUS_SUCCESS) 
					error(_("CUBLAS: Error in Data Transfer to Device"));
				/********************************************/
				lwork = dims[0] * magma_get_dgetri_nb( dims[0] );
				
					cublasAlloc(lwork, sizeof(double), (void**)&dwork);

				/* Error Checking */
				retStatus = cublasGetError ();
				if (retStatus != CUBLAS_STATUS_SUCCESS) 
					error(_("CUBLAS: Error in Memory Allocation on Device"));
				/********************************************/

				magma_dgetri_gpu(dims[0], d_x, dims[0], pivot, dwork , lwork, &info);

				cublasGetVector(N2, sizeof(double), d_x, 1, x, 1);

				/* Error Checking */
				retStatus = cublasGetError ();
				if (retStatus != CUBLAS_STATUS_SUCCESS) 
					error(_("CUBLAS: Error in Data From to Device"));
				/********************************************/

				cublasFree(dwork);
				cublasFree(d_x);

				/* Error Checking */
				retStatus = cublasGetError ();
				if (retStatus != CUBLAS_STATUS_SUCCESS) 
					error(_("CUBLAS: Error freeing memory"));
				/********************************************/
					
			}
			else
				error(_("GPUFlag not set correctly"));

			if (info)
				error(_("Lapack routine dgetri: system is exactly singular"));
    }
    UNPROTECT(1);
    return val;
#endif
	    return R_NilValue;
}