void MAGMAF_ZGETRF( magma_int_t *m, magma_int_t *n, cuDoubleComplex *A, magma_int_t *lda, magma_int_t *ipiv, magma_int_t *info) { magma_zgetrf( *m, *n, A, *lda, ipiv, info); }
int magma_solve ( int *dA_dim, int *lWork, double2 *A, int *ipiv, int *N ){ // Check inputs // fprintf (stderr, "Using MAGMA solve\n" ); fprintf (stderr, " dA_dim: %i\n", *dA_dim ); fprintf (stderr, " N: %i\n", *N ); fprintf (stderr, " lWork: %i\n", *lWork ); cuInit(0); cublasInit(); printout_devices(); cublasStatus status; double2 *d_A, *work; status = cublasAlloc ( *dA_dim, sizeof(double2), (void**)&d_A ); if ( status != CUBLAS_STATUS_SUCCESS ){ fprintf (stderr, "ERROR: device memory allocation error (d_A)\n" ); fprintf (stderr, "ERROR: dA_dim: %i\n", dA_dim ); } cudaError_t err; err = cudaMallocHost ( (void**)&work, *lWork * sizeof(double2) ); if(err != cudaSuccess){ fprintf (stderr, "ERROR: cudaMallocHost error (work)\n" ); } int info[1]; TimeStruct start, end; start = get_current_time (); magma_zgetrf ( N, N, A, N, ipiv, work, d_A, info ); end = get_current_time (); double gpu_perf; gpu_perf = 4.*2.*(*N)*(*N)*(*N)/(3.*1000000*GetTimerValue(start,end)); if ( info[0] != 0 ){ fprintf (stderr, "ERROR: magma_zgetrf failed\n" ); } printf(" GPU performance: %6.2f GFlop/s\n", gpu_perf); int stat = 0; return stat; }
void MAGMA_ZGETRF( magma_int_t *m, magma_int_t *n, double2 *A, magma_int_t *lda, magma_int_t *ipiv, magma_int_t *info) { magma_zgetrf( *m, *n, A, *lda, ipiv, info); }
/* //////////////////////////////////////////////////////////////////////////// -- Testing zgetrf */ int main( int argc, char** argv) { TESTING_INIT(); real_Double_t gflops, gpu_perf, gpu_time, cpu_perf=0, cpu_time=0; double error; magmaDoubleComplex *h_A; magma_int_t *ipiv; magma_int_t M, N, n2, lda, info, min_mn; magma_int_t status = 0; magma_opts opts; parse_opts( argc, argv, &opts ); double tol = opts.tolerance * lapackf77_dlamch("E"); printf("ngpu %d\n", (int) opts.ngpu ); if ( opts.check == 2 ) { printf(" M N CPU GFlop/s (sec) GPU GFlop/s (sec) |Ax-b|/(N*|A|*|x|)\n"); } else { printf(" M N CPU GFlop/s (sec) GPU GFlop/s (sec) |PA-LU|/(N*|A|)\n"); } printf("=========================================================================\n"); for( int itest = 0; itest < opts.ntest; ++itest ) { for( int iter = 0; iter < opts.niter; ++iter ) { M = opts.msize[itest]; N = opts.nsize[itest]; min_mn = min(M, N); lda = M; n2 = lda*N; gflops = FLOPS_ZGETRF( M, N ) / 1e9; TESTING_MALLOC_CPU( ipiv, magma_int_t, min_mn ); TESTING_MALLOC_PIN( h_A, magmaDoubleComplex, n2 ); /* ===================================================================== Performs operation using LAPACK =================================================================== */ if ( opts.lapack ) { init_matrix( M, N, h_A, lda ); cpu_time = magma_wtime(); lapackf77_zgetrf(&M, &N, h_A, &lda, ipiv, &info); cpu_time = magma_wtime() - cpu_time; cpu_perf = gflops / cpu_time; if (info != 0) printf("lapackf77_zgetrf returned error %d: %s.\n", (int) info, magma_strerror( info )); } /* ==================================================================== Performs operation using MAGMA =================================================================== */ init_matrix( M, N, h_A, lda ); gpu_time = magma_wtime(); magma_zgetrf( M, N, h_A, lda, ipiv, &info); gpu_time = magma_wtime() - gpu_time; gpu_perf = gflops / gpu_time; if (info != 0) printf("magma_zgetrf returned error %d: %s.\n", (int) info, magma_strerror( info )); /* ===================================================================== Check the factorization =================================================================== */ if ( opts.lapack ) { printf("%5d %5d %7.2f (%7.2f) %7.2f (%7.2f)", (int) M, (int) N, cpu_perf, cpu_time, gpu_perf, gpu_time ); } else { printf("%5d %5d --- ( --- ) %7.2f (%7.2f)", (int) M, (int) N, gpu_perf, gpu_time ); } if ( opts.check == 2 ) { error = get_residual( M, N, h_A, lda, ipiv ); printf(" %8.2e %s\n", error, (error < tol ? "ok" : "failed")); status += ! (error < tol); } else if ( opts.check ) { error = get_LU_error( M, N, h_A, lda, ipiv ); printf(" %8.2e %s\n", error, (error < tol ? "ok" : "failed")); status += ! (error < tol); } else { printf(" --- \n"); } TESTING_FREE_CPU( ipiv ); TESTING_FREE_PIN( h_A ); fflush( stdout ); } if ( opts.niter > 1 ) { printf( "\n" ); } } TESTING_FINALIZE(); return status; }