/** Purpose ------- CGETRF_NOPIV computes an LU factorization of a general M-by-N matrix A without pivoting. The factorization has the form A = L * U where L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n). This is the right-looking Level 3 BLAS version of the algorithm. Arguments --------- @param[in] m INTEGER The number of rows of the matrix A. M >= 0. @param[in] n INTEGER The number of columns of the matrix A. N >= 0. @param[in,out] A COMPLEX array, dimension (LDA,N) On entry, the M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. @param[in] lda INTEGER The leading dimension of the array A. LDA >= max(1,M). @param[out] info INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - > 0: if INFO = i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations. @ingroup magma_cgesv_comp ********************************************************************/ extern "C" magma_int_t magma_cgetrf_nopiv( magma_int_t m, magma_int_t n, magmaFloatComplex *A, magma_int_t lda, magma_int_t *info) { #define A(i_,j_) (A + (i_) + (j_)*lda) magmaFloatComplex c_one = MAGMA_C_ONE; magmaFloatComplex c_neg_one = MAGMA_C_NEG_ONE; magma_int_t min_mn, i__3, i__4; magma_int_t j, jb, nb, iinfo; A -= 1 + lda; /* Function Body */ *info = 0; if (m < 0) { *info = -1; } else if (n < 0) { *info = -2; } else if (lda < max(1,m)) { *info = -4; } if (*info != 0) { magma_xerbla( __func__, -(*info) ); return *info; } /* Quick return if possible */ if (m == 0 || n == 0) { return *info; } /* Determine the block size for this environment. */ nb = 128; min_mn = min(m,n); if (nb <= 1 || nb >= min_mn) { /* Use unblocked code. */ magma_cgetf2_nopiv( m, n, A(1,1), lda, info ); } else { /* Use blocked code. */ for (j = 1; j <= min_mn; j += nb) { jb = min( min_mn - j + 1, nb ); /* Factor diagonal and subdiagonal blocks and test for exact singularity. */ i__3 = m - j + 1; //magma_cgetf2_nopiv( i__3, jb, A(j,j), lda, &iinfo ); i__3 -= jb; magma_cgetf2_nopiv( jb, jb, A(j,j), lda, &iinfo ); blasf77_ctrsm( "R", "U", "N", "N", &i__3, &jb, &c_one, A(j,j), &lda, A(j+jb,j), &lda ); /* Adjust INFO */ if (*info == 0 && iinfo > 0) *info = iinfo + j - 1; if (j + jb <= n) { /* Compute block row of U. */ i__3 = n - j - jb + 1; blasf77_ctrsm( "Left", "Lower", "No transpose", "Unit", &jb, &i__3, &c_one, A(j,j), &lda, A(j,j+jb), &lda ); if (j + jb <= m) { /* Update trailing submatrix. */ i__3 = m - j - jb + 1; i__4 = n - j - jb + 1; blasf77_cgemm( "No transpose", "No transpose", &i__3, &i__4, &jb, &c_neg_one, A(j+jb,j), &lda, A(j,j+jb), &lda, &c_one, A(j+jb,j+jb), &lda ); } } } } return *info; } /* magma_cgetrf_nopiv */
extern "C" magma_int_t magma_cgetrf_nopiv(magma_int_t *m, magma_int_t *n, cuFloatComplex *a, magma_int_t *lda, magma_int_t *info) { /* -- MAGMA (version 1.3.0) -- Univ. of Tennessee, Knoxville Univ. of California, Berkeley Univ. of Colorado, Denver November 2012 Purpose ======= CGETRF_NOPIV computes an LU factorization of a general M-by-N matrix A without pivoting. The factorization has the form A = L * U where L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n). This is the right-looking Level 3 BLAS version of the algorithm. Arguments ========= M (input) INTEGER The number of rows of the matrix A. M >= 0. N (input) INTEGER The number of columns of the matrix A. N >= 0. A (input/output) COMPLEX*16 array, dimension (LDA,N) On entry, the M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,M). INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value > 0: if INFO = i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations. ===================================================================== */ cuFloatComplex c_one = MAGMA_C_ONE; magma_int_t a_dim1, a_offset, min_mn, i__3, i__4; cuFloatComplex z__1; magma_int_t j, jb, nb, iinfo; a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; /* Function Body */ *info = 0; if (*m < 0) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*lda < max(1,*m)) { *info = -4; } if (*info != 0) { magma_xerbla( __func__, -(*info) ); return *info; } /* Quick return if possible */ if (*m == 0 || *n == 0) { return *info; } /* Determine the block size for this environment. */ nb = 128; min_mn = min(*m,*n); if (nb <= 1 || nb >= min_mn) { /* Use unblocked code. */ magma_cgetf2_nopiv(m, n, &a[a_offset], lda, info); } else { /* Use blocked code. */ for (j = 1; j <= min_mn; j += nb) { /* Computing MIN */ i__3 = min_mn - j + 1; jb = min(i__3,nb); /* Factor diagonal and subdiagonal blocks and test for exact singularity. */ i__3 = *m - j + 1; //magma_cgetf2_nopiv(&i__3, &jb, &a[j + j * a_dim1], lda, &iinfo); i__3 -= jb; magma_cgetf2_nopiv(&jb, &jb, &a[j + j * a_dim1], lda, &iinfo); blasf77_ctrsm("R", "U", "N", "N", &i__3, &jb, &c_one, &a[j + j * a_dim1], lda, &a[j + jb + j * a_dim1], lda); /* Adjust INFO */ if (*info == 0 && iinfo > 0) *info = iinfo + j - 1; if (j + jb <= *n) { /* Compute block row of U. */ i__3 = *n - j - jb + 1; blasf77_ctrsm("Left", "Lower", "No transpose", "Unit", &jb, &i__3, &c_one, &a[j + j * a_dim1], lda, &a[j + (j+jb)*a_dim1], lda); if (j + jb <= *m) { /* Update trailing submatrix. */ i__3 = *m - j - jb + 1; i__4 = *n - j - jb + 1; z__1 = MAGMA_C_NEG_ONE; blasf77_cgemm("No transpose", "No transpose", &i__3, &i__4, &jb, &z__1, &a[j + jb + j * a_dim1], lda, &a[j + (j + jb) * a_dim1], lda, &c_one, &a[j + jb + (j + jb) * a_dim1], lda); } } } } return *info; } /* magma_cgetrf_nopiv */
/* //////////////////////////////////////////////////////////////////////////// -- Testing cgetrf */ int main( int argc, char** argv) { TESTING_INIT(); real_Double_t gflops, gpu_perf, gpu_time, cpu_perf=0, cpu_time=0; float error; magmaFloatComplex *h_A; magma_int_t *ipiv; magma_int_t M, N, n2, lda, info, min_mn; magma_int_t status = 0; magma_opts opts; opts.parse_opts( argc, argv ); float tol = opts.tolerance * lapackf77_slamch("E"); printf("%% ngpu %d, version %d\n", (int) opts.ngpu, (int) opts.version ); if ( opts.check == 2 ) { printf("%% M N CPU Gflop/s (sec) GPU Gflop/s (sec) |Ax-b|/(N*|A|*|x|)\n"); } else { printf("%% M N CPU Gflop/s (sec) GPU Gflop/s (sec) |PA-LU|/(N*|A|)\n"); } printf("%%========================================================================\n"); for( int itest = 0; itest < opts.ntest; ++itest ) { for( int iter = 0; iter < opts.niter; ++iter ) { M = opts.msize[itest]; N = opts.nsize[itest]; min_mn = min(M, N); lda = M; n2 = lda*N; gflops = FLOPS_CGETRF( M, N ) / 1e9; TESTING_MALLOC_CPU( ipiv, magma_int_t, min_mn ); TESTING_MALLOC_PIN( h_A, magmaFloatComplex, n2 ); /* ===================================================================== Performs operation using LAPACK =================================================================== */ if ( opts.lapack ) { init_matrix( opts, M, N, h_A, lda ); cpu_time = magma_wtime(); lapackf77_cgetrf( &M, &N, h_A, &lda, ipiv, &info ); cpu_time = magma_wtime() - cpu_time; cpu_perf = gflops / cpu_time; if (info != 0) { printf("lapackf77_cgetrf returned error %d: %s.\n", (int) info, magma_strerror( info )); } } /* ==================================================================== Performs operation using MAGMA =================================================================== */ init_matrix( opts, M, N, h_A, lda ); if ( opts.version == 2 || opts.version == 3 ) { // no pivoting versions, so set ipiv to identity for (magma_int_t i=0; i < min_mn; ++i ) { ipiv[i] = i+1; } } gpu_time = magma_wtime(); if ( opts.version == 1 ) { magma_cgetrf( M, N, h_A, lda, ipiv, &info ); } else if ( opts.version == 2 ) { magma_cgetrf_nopiv( M, N, h_A, lda, &info ); } else if ( opts.version == 3 ) { magma_cgetf2_nopiv( M, N, h_A, lda, &info ); } gpu_time = magma_wtime() - gpu_time; gpu_perf = gflops / gpu_time; if (info != 0) { printf("magma_cgetrf returned error %d: %s.\n", (int) info, magma_strerror( info )); } /* ===================================================================== Check the factorization =================================================================== */ if ( opts.lapack ) { printf("%5d %5d %7.2f (%7.2f) %7.2f (%7.2f)", (int) M, (int) N, cpu_perf, cpu_time, gpu_perf, gpu_time ); } else { printf("%5d %5d --- ( --- ) %7.2f (%7.2f)", (int) M, (int) N, gpu_perf, gpu_time ); } if ( opts.check == 2 ) { error = get_residual( opts, M, N, h_A, lda, ipiv ); printf(" %8.2e %s\n", error, (error < tol ? "ok" : "failed")); status += ! (error < tol); } else if ( opts.check ) { error = get_LU_error( opts, M, N, h_A, lda, ipiv ); printf(" %8.2e %s\n", error, (error < tol ? "ok" : "failed")); status += ! (error < tol); } else { printf(" --- \n"); } TESTING_FREE_CPU( ipiv ); TESTING_FREE_PIN( h_A ); fflush( stdout ); } if ( opts.niter > 1 ) { printf( "\n" ); } } opts.cleanup(); TESTING_FINALIZE(); return status; }