/* //////////////////////////////////////////////////////////////////////////// -- Testing zgeqlf */ int main( int argc, char** argv) { TESTING_INIT(); const double d_neg_one = MAGMA_D_NEG_ONE; const double d_one = MAGMA_D_ONE; const magmaDoubleComplex c_neg_one = MAGMA_Z_NEG_ONE; const magmaDoubleComplex c_one = MAGMA_Z_ONE; const magmaDoubleComplex c_zero = MAGMA_Z_ZERO; const magma_int_t ione = 1; real_Double_t gflops, gpu_perf, gpu_time, cpu_perf=0, cpu_time=0; double Anorm, error=0, error2=0; magmaDoubleComplex *h_A, *h_R, *tau, *h_work, tmp[1]; magma_int_t M, N, n2, lda, lwork, info, min_mn, nb; magma_int_t ISEED[4] = {0,0,0,1}; magma_int_t status = 0; magma_opts opts; parse_opts( argc, argv, &opts ); double tol = opts.tolerance * lapackf77_dlamch("E"); printf(" M N CPU GFlop/s (sec) GPU GFlop/s (sec) |L - Q^H*A| |I - Q^H*Q|\n"); printf("===============================================================================\n"); for( int itest = 0; itest < opts.ntest; ++itest ) { for( int iter = 0; iter < opts.niter; ++iter ) { M = opts.msize[itest]; N = opts.nsize[itest]; min_mn = min(M, N); lda = M; n2 = lda*N; nb = magma_get_zgeqlf_nb(M); gflops = FLOPS_ZGEQLF( M, N ) / 1e9; // query for workspace size lwork = -1; lapackf77_zgeqlf(&M, &N, NULL, &M, NULL, tmp, &lwork, &info); lwork = (magma_int_t)MAGMA_Z_REAL( tmp[0] ); lwork = max( lwork, N*nb ); lwork = max( lwork, 2*nb*nb); TESTING_MALLOC_CPU( tau, magmaDoubleComplex, min_mn ); TESTING_MALLOC_CPU( h_A, magmaDoubleComplex, n2 ); TESTING_MALLOC_CPU( h_work, magmaDoubleComplex, lwork ); TESTING_MALLOC_PIN( h_R, magmaDoubleComplex, n2 ); /* Initialize the matrix */ lapackf77_zlarnv( &ione, ISEED, &n2, h_A ); lapackf77_zlacpy( MagmaUpperLowerStr, &M, &N, h_A, &lda, h_R, &lda ); /* ==================================================================== Performs operation using MAGMA =================================================================== */ gpu_time = magma_wtime(); magma_zgeqlf( M, N, h_R, lda, tau, h_work, lwork, &info); gpu_time = magma_wtime() - gpu_time; gpu_perf = gflops / gpu_time; if (info != 0) printf("magma_zgeqlf returned error %d: %s.\n", (int) info, magma_strerror( info )); /* ===================================================================== Check the result, following zqlt01 except using the reduced Q. This works for any M,N (square, tall, wide). =================================================================== */ if ( opts.check ) { magma_int_t ldq = M; magma_int_t ldl = min_mn; magmaDoubleComplex *Q, *L; double *work; TESTING_MALLOC_CPU( Q, magmaDoubleComplex, ldq*min_mn ); // M by K TESTING_MALLOC_CPU( L, magmaDoubleComplex, ldl*N ); // K by N TESTING_MALLOC_CPU( work, double, min_mn ); // copy M by K matrix V to Q (copying diagonal, which isn't needed) and // copy K by N matrix L lapackf77_zlaset( "Full", &min_mn, &N, &c_zero, &c_zero, L, &ldl ); if ( M >= N ) { // for M=5, N=3: A = [ V V V ] <= V full block (M-N by K) // K=N [ V V V ] // [ ----- ] // [ L V V ] <= V triangle (N by K, copying diagonal too) // [ L L V ] <= L triangle (K by N) // [ L L L ] magma_int_t M_N = M - N; lapackf77_zlacpy( "Full", &M_N, &min_mn, h_R, &lda, Q, &ldq ); lapackf77_zlacpy( "Upper", &N, &min_mn, &h_R[M_N], &lda, &Q[M_N], &ldq ); lapackf77_zlacpy( "Lower", &min_mn, &N, &h_R[M_N], &lda, L, &ldl ); } else { // for M=3, N=5: A = [ L L | L V V ] <= V triangle (K by K) // K=M [ L L | L L V ] <= L triangle (K by M) // [ L L | L L L ] // ^^^============= L full block (K by N-M) magma_int_t N_M = N - M; lapackf77_zlacpy( "Upper", &M, &min_mn, &h_R[N_M*lda], &lda, Q, &ldq ); lapackf77_zlacpy( "Full", &min_mn, &N_M, h_R, &lda, L, &ldl ); lapackf77_zlacpy( "Lower", &min_mn, &M, &h_R[N_M*lda], &lda, &L[N_M*ldl], &ldl ); } // generate M by K matrix Q, where K = min(M,N) lapackf77_zungql( &M, &min_mn, &min_mn, Q, &ldq, tau, h_work, &lwork, &info ); assert( info == 0 ); // error = || L - Q^H*A || / (N * ||A||) blasf77_zgemm( "Conj", "NoTrans", &min_mn, &N, &M, &c_neg_one, Q, &ldq, h_A, &lda, &c_one, L, &ldl ); Anorm = lapackf77_zlange( "1", &M, &N, h_A, &lda, work ); error = lapackf77_zlange( "1", &min_mn, &N, L, &ldl, work ); if ( N > 0 && Anorm > 0 ) error /= (N*Anorm); // set L = I (K by K identity), then L = I - Q^H*Q // error = || I - Q^H*Q || / N lapackf77_zlaset( "Upper", &min_mn, &min_mn, &c_zero, &c_one, L, &ldl ); blasf77_zherk( "Upper", "Conj", &min_mn, &M, &d_neg_one, Q, &ldq, &d_one, L, &ldl ); error2 = lapackf77_zlanhe( "1", "Upper", &min_mn, L, &ldl, work ); if ( N > 0 ) error2 /= N; TESTING_FREE_CPU( Q ); Q = NULL; TESTING_FREE_CPU( L ); L = NULL; TESTING_FREE_CPU( work ); work = NULL; } /* ===================================================================== Performs operation using LAPACK =================================================================== */ if ( opts.lapack ) { cpu_time = magma_wtime(); lapackf77_zgeqlf( &M, &N, h_A, &lda, tau, h_work, &lwork, &info ); cpu_time = magma_wtime() - cpu_time; cpu_perf = gflops / cpu_time; if (info != 0) printf("lapack_zgeqlf returned error %d: %s.\n", (int) info, magma_strerror( info )); } /* ===================================================================== Print performance and error. =================================================================== */ printf("%5d %5d ", (int) M, (int) N ); if ( opts.lapack ) { printf( "%7.2f (%7.2f)", cpu_perf, cpu_time ); } else { printf(" --- ( --- )" ); } printf( " %7.2f (%7.2f) ", gpu_perf, gpu_time ); if ( opts.check ) { bool okay = (error < tol && error2 < tol); status += ! okay; printf( "%11.2e %11.2e %s\n", error, error2, (okay ? "ok" : "failed") ); } else { printf( " ---\n" ); } TESTING_FREE_CPU( tau ); TESTING_FREE_CPU( h_A ); TESTING_FREE_CPU( h_work ); TESTING_FREE_PIN( h_R ); fflush( stdout ); } if ( opts.niter > 1 ) { printf( "\n" ); } } TESTING_FINALIZE(); return status; }
/** Purpose ------- ZUNGTR generates a complex unitary matrix Q which is defined as the product of n-1 elementary reflectors of order N, as returned by ZHETRD: if UPLO = MagmaUpper, Q = H(n-1) . . . H(2) H(1), if UPLO = MagmaLower, Q = H(1) H(2) . . . H(n-1). Arguments --------- @param[in] uplo magma_uplo_t - = MagmaUpper: Upper triangle of A contains elementary reflectors from ZHETRD; - = MagmaLower: Lower triangle of A contains elementary reflectors from ZHETRD. @param[in] n INTEGER The order of the matrix Q. N >= 0. @param[in,out] A COMPLEX_16 array, dimension (LDA,N) On entry, the vectors which define the elementary reflectors, as returned by ZHETRD. On exit, the N-by-N unitary matrix Q. @param[in] lda INTEGER The leading dimension of the array A. LDA >= N. @param[in] tau COMPLEX_16 array, dimension (N-1) TAU(i) must contain the scalar factor of the elementary reflector H(i), as returned by ZHETRD. @param[out] work (workspace) COMPLEX_16 array, dimension (LWORK) On exit, if INFO = 0, WORK[0] returns the optimal LWORK. @param[in] lwork INTEGER The dimension of the array WORK. LWORK >= N-1. For optimum performance LWORK >= N*NB, where NB is the optimal blocksize. \n If LWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the WORK array, returns this value as the first entry of the WORK array, and no error message related to LWORK is issued by XERBLA. @param[in] dT COMPLEX_16 array on the GPU device. DT contains the T matrices used in blocking the elementary reflectors H(i) as returned by magma_zhetrd. @param[in] nb INTEGER This is the block size used in ZHETRD, and correspondingly the size of the T matrices, used in the factorization, and stored in DT. @param[out] info INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value @ingroup magma_zheev_comp ********************************************************************/ extern "C" magma_int_t magma_zungtr( magma_uplo_t uplo, magma_int_t n, magmaDoubleComplex *A, magma_int_t lda, magmaDoubleComplex *tau, magmaDoubleComplex *work, magma_int_t lwork, magmaDoubleComplex *dT, magma_int_t nb, magma_int_t *info) { #define A(i,j) (A + (j)*lda+ (i)) magma_int_t i__1; magma_int_t i, j; magma_int_t iinfo; magma_int_t upper, lwkopt, lquery; *info = 0; lquery = (lwork == -1); upper = (uplo == MagmaUpper); if (! upper && uplo != MagmaLower) { *info = -1; } else if (n < 0) { *info = -2; } else if (lda < max(1,n)) { *info = -4; } else /* if (complicated condition) */ { /* Computing MAX */ if (lwork < max(1, n-1) && ! lquery) { *info = -7; } } lwkopt = max(1, n) * nb; if (*info == 0) { work[0] = magma_zmake_lwork( lwkopt ); } if (*info != 0) { magma_xerbla( __func__, -(*info)); return *info; } else if (lquery) { return *info; } /* Quick return if possible */ if (n == 0) { work[0] = MAGMA_Z_ONE; return *info; } if (upper) { /* Q was determined by a call to ZHETRD with UPLO = MagmaUpper Shift the vectors which define the elementary reflectors one column to the left, and set the last row and column of Q to those of the unit matrix */ for (j = 0; j < n-1; ++j) { for (i = 0; i < j-1; ++i) *A(i, j) = *A(i, j + 1); *A(n-1, j) = MAGMA_Z_ZERO; } for (i = 0; i < n-1; ++i) { *A(i, n-1) = MAGMA_Z_ZERO; } *A(n-1, n-1) = MAGMA_Z_ONE; /* Generate Q(1:n-1,1:n-1) */ i__1 = n - 1; lapackf77_zungql(&i__1, &i__1, &i__1, A(0,0), &lda, tau, work, &lwork, &iinfo); } else { /* Q was determined by a call to ZHETRD with UPLO = MagmaLower. Shift the vectors which define the elementary reflectors one column to the right, and set the first row and column of Q to those of the unit matrix */ for (j = n-1; j > 0; --j) { *A(0, j) = MAGMA_Z_ZERO; for (i = j; i < n-1; ++i) *A(i, j) = *A(i, j - 1); } *A(0, 0) = MAGMA_Z_ONE; for (i = 1; i < n-1; ++i) *A(i, 0) = MAGMA_Z_ZERO; if (n > 1) { /* Generate Q(2:n,2:n) */ magma_zungqr(n-1, n-1, n-1, A(1, 1), lda, tau, dT, nb, &iinfo); } } work[0] = magma_zmake_lwork( lwkopt ); return *info; } /* magma_zungtr */
extern "C" magma_int_t magma_zungtr(char uplo, magma_int_t n, magmaDoubleComplex *a, magma_int_t lda, magmaDoubleComplex *tau, magmaDoubleComplex *work, magma_int_t lwork, magmaDoubleComplex *dT, magma_int_t nb, magma_int_t *info) { /* -- MAGMA (version 1.4.0) -- Univ. of Tennessee, Knoxville Univ. of California, Berkeley Univ. of Colorado, Denver August 2013 Purpose ======= ZUNGTR generates a complex unitary matrix Q which is defined as the product of n-1 elementary reflectors of order N, as returned by ZHETRD: if UPLO = 'U', Q = H(n-1) . . . H(2) H(1), if UPLO = 'L', Q = H(1) H(2) . . . H(n-1). Arguments ========= UPLO (input) CHARACTER*1 = 'U': Upper triangle of A contains elementary reflectors from ZHETRD; = 'L': Lower triangle of A contains elementary reflectors from ZHETRD. N (input) INTEGER The order of the matrix Q. N >= 0. A (input/output) COMPLEX_16 array, dimension (LDA,N) On entry, the vectors which define the elementary reflectors, as returned by ZHETRD. On exit, the N-by-N unitary matrix Q. LDA (input) INTEGER The leading dimension of the array A. LDA >= N. TAU (input) COMPLEX_16 array, dimension (N-1) TAU(i) must contain the scalar factor of the elementary reflector H(i), as returned by ZHETRD. WORK (workspace/output) COMPLEX_16 array, dimension (LWORK) On exit, if INFO = 0, WORK(1) returns the optimal LWORK. LWORK (input) INTEGER The dimension of the array WORK. LWORK >= N-1. For optimum performance LWORK >= N*NB, where NB is the optimal blocksize. If LWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the WORK array, returns this value as the first entry of the WORK array, and no error message related to LWORK is issued by XERBLA. DT (input) COMPLEX_16 array on the GPU device. DT contains the T matrices used in blocking the elementary reflectors H(i) as returned by magma_zhetrd. NB (input) INTEGER This is the block size used in ZHETRD, and correspondingly the size of the T matrices, used in the factorization, and stored in DT. INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value ===================================================================== */ #define a_ref(i,j) ( a + (j)*lda+ (i)) char uplo_[2] = {uplo, 0}; magma_int_t i__1; magma_int_t i, j; magma_int_t iinfo; magma_int_t upper, lwkopt, lquery; *info = 0; lquery = lwork == -1; upper = lapackf77_lsame(uplo_, "U"); if (! upper && ! lapackf77_lsame(uplo_, "L")) { *info = -1; } else if (n < 0) { *info = -2; } else if (lda < max(1,n)) { *info = -4; } else /* if(complicated condition) */ { /* Computing MAX */ if (lwork < max(1, n-1) && ! lquery) { *info = -7; } } lwkopt = max(1, n) * nb; if (*info == 0) { MAGMA_Z_SET2REAL( work[0], lwkopt); } if (*info != 0) { magma_xerbla( __func__, -(*info)); return *info; } else if (lquery) { return *info; } /* Quick return if possible */ if (n == 0) { work[0] = MAGMA_Z_ONE; return *info; } if (upper) { /* Q was determined by a call to ZHETRD with UPLO = 'U' Shift the vectors which define the elementary reflectors one column to the left, and set the last row and column of Q to those of the unit matrix */ for (j = 0; j < n-1; ++j) { for (i = 0; i < j-1; ++i) *a_ref(i, j) = *a_ref(i, j + 1); *a_ref(n-1, j) = MAGMA_Z_ZERO; } for (i = 0; i < n-1; ++i) { *a_ref(i, n-1) = MAGMA_Z_ZERO; } *a_ref(n-1, n-1) = MAGMA_Z_ONE; /* Generate Q(1:n-1,1:n-1) */ i__1 = n - 1; lapackf77_zungql(&i__1, &i__1, &i__1, a_ref(0,0), &lda, tau, work, &lwork, &iinfo); } else { /* Q was determined by a call to ZHETRD with UPLO = 'L'. Shift the vectors which define the elementary reflectors one column to the right, and set the first row and column of Q to those of the unit matrix */ for (j = n-1; j > 0; --j) { *a_ref(0, j) = MAGMA_Z_ZERO; for (i = j; i < n-1; ++i) *a_ref(i, j) = *a_ref(i, j - 1); } *a_ref(0, 0) = MAGMA_Z_ONE; for (i = 1; i < n-1; ++i) *a_ref(i, 0) = MAGMA_Z_ZERO; if (n > 1) { /* Generate Q(2:n,2:n) */ magma_zungqr(n-1, n-1, n-1, a_ref(1, 1), lda, tau, dT, nb, &iinfo); } } MAGMA_Z_SET2REAL( work[0], lwkopt); return *info; } /* magma_zungtr */