/** Purpose ------- CUNGHR generates a COMPLEX unitary matrix Q which is defined as the product of IHI-ILO elementary reflectors of order N, as returned by CGEHRD: Q = H(ilo) H(ilo+1) . . . H(ihi-1). Arguments --------- @param[in] n INTEGER The order of the matrix Q. N >= 0. @param[in] ilo INTEGER @param[in] ihi INTEGER ILO and IHI must have the same values as in the previous call of CGEHRD. Q is equal to the unit matrix except in the submatrix Q(ilo+1:ihi,ilo+1:ihi). 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. @param[in,out] A COMPLEX array, dimension (LDA,N) On entry, the vectors which define the elementary reflectors, as returned by CGEHRD. On exit, the N-by-N unitary matrix Q. @param[in] lda INTEGER The leading dimension of the array A. LDA >= max(1,N). @param[in] tau COMPLEX array, dimension (N-1) TAU(i) must contain the scalar factor of the elementary reflector H(i), as returned by CGEHRD. @param[in] dT COMPLEX array on the GPU device. DT contains the T matrices used in blocking the elementary reflectors H(i), e.g., this can be the 9th argument of magma_cgehrd. @param[in] nb INTEGER This is the block size used in CGEHRD, and correspondingly the size of the T matrices, used in the factorization, and stored in DT. @param[out] info INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value @ingroup magma_cgeev_comp ********************************************************************/ extern "C" magma_int_t magma_cunghr(magma_int_t n, magma_int_t ilo, magma_int_t ihi, magmaFloatComplex *A, magma_int_t lda, magmaFloatComplex *tau, magmaFloatComplex *dT, magma_int_t nb, magma_int_t *info) { #define A(i,j) (A + (j)*lda+ (i)) magma_int_t i, j, nh, iinfo; *info = 0; nh = ihi - ilo; if (n < 0) *info = -1; else if (ilo < 1 || ilo > max(1,n)) *info = -2; else if (ihi < min(ilo,n) || ihi > n) *info = -3; else if (lda < max(1,n)) *info = -5; if (*info != 0) { magma_xerbla( __func__, -(*info) ); return *info; } /* Quick return if possible */ if (n == 0) return *info; /* Shift the vectors which define the elementary reflectors one column to the right, and set the first ilo and the last n-ihi rows and columns to those of the unit matrix */ for (j = ihi-1; j >= ilo; --j) { for (i = 0; i < j; ++i) *A(i, j) = MAGMA_C_ZERO; for (i = j+1; i < ihi; ++i) *A(i, j) = *A(i, j - 1); for (i = ihi; i < n; ++i) *A(i, j) = MAGMA_C_ZERO; } for (j = 0; j < ilo; ++j) { for (i = 0; i < n; ++i) *A(i, j) = MAGMA_C_ZERO; *A(j, j) = MAGMA_C_ONE; } for (j = ihi; j < n; ++j) { for (i = 0; i < n; ++i) *A(i, j) = MAGMA_C_ZERO; *A(j, j) = MAGMA_C_ONE; } if (nh > 0) { /* Generate Q(ilo+1:ihi,ilo+1:ihi) */ magma_cungqr(nh, nh, nh, A(ilo, ilo), lda, tau+ilo-1, dT, nb, &iinfo); } return *info; } /* magma_cunghr */
/* //////////////////////////////////////////////////////////////////////////// -- Testing cungqr */ int main( int argc, char** argv ) { TESTING_INIT(); real_Double_t gflops, gpu_perf, gpu_time, cpu_perf, cpu_time; float error, work[1]; magmaFloatComplex c_neg_one = MAGMA_C_NEG_ONE; magmaFloatComplex *hA, *hR, *tau, *h_work; magmaFloatComplex *dA, *dT; magma_int_t m, n, k; magma_int_t n2, lda, ldda, lwork, min_mn, nb, info; magma_int_t ione = 1; magma_int_t ISEED[4] = {0,0,0,1}; magma_int_t status = 0; magma_opts opts; parse_opts( argc, argv, &opts ); float tol = opts.tolerance * lapackf77_slamch("E"); opts.lapack |= opts.check; // check (-c) implies lapack (-l) printf("Running version %d; available are (specified through --version num):\n", (int) opts.version); printf("1 - uses precomputed clarft matrices (default)\n"); printf("2 - recomputes the clarft matrices on the fly\n\n"); printf(" m n k CPU GFlop/s (sec) GPU GFlop/s (sec) ||R|| / ||A||\n"); printf("=========================================================================\n"); for( int itest = 0; itest < opts.ntest; ++itest ) { for( int iter = 0; iter < opts.niter; ++iter ) { m = opts.msize[itest]; n = opts.nsize[itest]; k = opts.ksize[itest]; if ( m < n || n < k ) { printf( "%5d %5d %5d skipping because m < n or n < k\n", (int) m, (int) n, (int) k ); continue; } lda = m; ldda = ((m + 31)/32)*32; n2 = lda*n; min_mn = min(m, n); nb = magma_get_cgeqrf_nb( m ); lwork = (m + 2*n+nb)*nb; gflops = FLOPS_CUNGQR( m, n, k ) / 1e9; TESTING_MALLOC_PIN( h_work, magmaFloatComplex, lwork ); TESTING_MALLOC_PIN( hR, magmaFloatComplex, lda*n ); TESTING_MALLOC_CPU( hA, magmaFloatComplex, lda*n ); TESTING_MALLOC_CPU( tau, magmaFloatComplex, min_mn ); TESTING_MALLOC_DEV( dA, magmaFloatComplex, ldda*n ); TESTING_MALLOC_DEV( dT, magmaFloatComplex, ( 2*min_mn + ((n + 31)/32)*32 )*nb ); lapackf77_clarnv( &ione, ISEED, &n2, hA ); lapackf77_clacpy( MagmaUpperLowerStr, &m, &n, hA, &lda, hR, &lda ); /* ==================================================================== Performs operation using MAGMA =================================================================== */ // first, get QR factors magma_csetmatrix( m, n, hA, lda, dA, ldda ); magma_cgeqrf_gpu( m, n, dA, ldda, tau, dT, &info ); if (info != 0) printf("magma_cgeqrf_gpu returned error %d: %s.\n", (int) info, magma_strerror( info )); magma_cgetmatrix( m, n, dA, ldda, hR, lda ); gpu_time = magma_wtime(); if (opts.version == 1) magma_cungqr( m, n, k, hR, lda, tau, dT, nb, &info ); else magma_cungqr2(m, n, k, hR, lda, tau, &info ); gpu_time = magma_wtime() - gpu_time; gpu_perf = gflops / gpu_time; if (info != 0) printf("magma_cungqr_gpu returned error %d: %s.\n", (int) info, magma_strerror( info )); /* ===================================================================== Performs operation using LAPACK =================================================================== */ if ( opts.lapack ) { error = lapackf77_clange("f", &m, &n, hA, &lda, work ); lapackf77_cgeqrf( &m, &n, hA, &lda, tau, h_work, &lwork, &info ); if (info != 0) printf("lapackf77_cgeqrf returned error %d: %s.\n", (int) info, magma_strerror( info )); cpu_time = magma_wtime(); lapackf77_cungqr( &m, &n, &k, hA, &lda, tau, h_work, &lwork, &info ); cpu_time = magma_wtime() - cpu_time; cpu_perf = gflops / cpu_time; if (info != 0) printf("lapackf77_cungqr returned error %d: %s.\n", (int) info, magma_strerror( info )); // compute relative error |R|/|A| := |Q_magma - Q_lapack|/|A| blasf77_caxpy( &n2, &c_neg_one, hA, &ione, hR, &ione ); error = lapackf77_clange("f", &m, &n, hR, &lda, work) / error; printf("%5d %5d %5d %7.1f (%7.2f) %7.1f (%7.2f) %8.2e %s\n", (int) m, (int) n, (int) k, cpu_perf, cpu_time, gpu_perf, gpu_time, error, (error < tol ? "ok" : "failed")); status += ! (error < tol); } else { printf("%5d %5d %5d --- ( --- ) %7.1f (%7.2f) --- \n", (int) m, (int) n, (int) k, gpu_perf, gpu_time ); } TESTING_FREE_PIN( h_work ); TESTING_FREE_PIN( hR ); TESTING_FREE_CPU( hA ); TESTING_FREE_CPU( tau ); TESTING_FREE_DEV( dA ); TESTING_FREE_DEV( dT ); fflush( stdout ); } if ( opts.niter > 1 ) { printf( "\n" ); } } TESTING_FINALIZE(); return status; }
extern "C" magma_int_t magma_cunghr( magma_int_t n, magma_int_t ilo, magma_int_t ihi, magmaFloatComplex *a, magma_int_t lda, magmaFloatComplex *tau, magmaFloatComplex_ptr dT, size_t dT_offset, magma_int_t nb, magma_queue_t queue, magma_int_t *info ) { /* -- clMAGMA (version 1.3.0) -- Univ. of Tennessee, Knoxville Univ. of California, Berkeley Univ. of Colorado, Denver @date November 2014 Purpose ======= CUNGHR generates a COMPLEX unitary matrix Q which is defined as the product of IHI-ILO elementary reflectors of order N, as returned by CGEHRD: Q = H(ilo) H(ilo+1) . . . H(ihi-1). Arguments ========= N (input) INTEGER The order of the matrix Q. N >= 0. ILO (input) INTEGER IHI (input) INTEGER ILO and IHI must have the same values as in the previous call of CGEHRD. Q is equal to the unit matrix except in the submatrix Q(ilo+1:ihi,ilo+1:ihi). 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. A (input/output) COMPLEX array, dimension (LDA,N) On entry, the vectors which define the elementary reflectors, as returned by CGEHRD. On exit, the N-by-N unitary matrix Q. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,N). TAU (input) COMPLEX array, dimension (N-1) TAU(i) must contain the scalar factor of the elementary reflector H(i), as returned by CGEHRD. DT (input) COMPLEX array on the GPU device. DT contains the T matrices used in blocking the elementary reflectors H(i), e.g., this can be the 9th argument of magma_cgehrd. NB (input) INTEGER This is the block size used in CGEHRD, and correspondingly the size of the T matrices, used in the factorization, and stored in DT. INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value ===================================================================== */ #define a_ref(i,j) (a + (j)*lda+ (i)) magma_int_t i, j, nh, iinfo; *info = 0; nh = ihi - ilo; if (n < 0) *info = -1; else if (ilo < 1 || ilo > max(1,n)) *info = -2; else if (ihi < min(ilo,n) || ihi > n) *info = -3; else if (lda < max(1,n)) *info = -5; if (*info != 0) { magma_xerbla( __func__, -(*info) ); return *info; } /* Quick return if possible */ if (n == 0) return *info; /* Shift the vectors which define the elementary reflectors one column to the right, and set the first ilo and the last n-ihi rows and columns to those of the unit matrix */ for (j = ihi-1; j >= ilo; --j) { for (i = 0; i < j; ++i) *a_ref(i, j) = MAGMA_C_ZERO; for (i = j+1; i < ihi; ++i) *a_ref(i, j) = *a_ref(i, j - 1); for (i = ihi; i < n; ++i) *a_ref(i, j) = MAGMA_C_ZERO; } for (j = 0; j < ilo; ++j) { for (i = 0; i < n; ++i) *a_ref(i, j) = MAGMA_C_ZERO; *a_ref(j, j) = MAGMA_C_ONE; } for (j = ihi; j < n; ++j) { for (i = 0; i < n; ++i) *a_ref(i, j) = MAGMA_C_ZERO; *a_ref(j, j) = MAGMA_C_ONE; } if (nh > 0) { /* Generate Q(ilo+1:ihi,ilo+1:ihi) */ magma_cungqr(nh, nh, nh, a_ref(ilo, ilo), lda, tau+ilo-1, dT, dT_offset, nb, queue, &iinfo); } return *info; } /* magma_cunghr */
extern "C" magma_int_t magma_cungtr(char uplo, magma_int_t n, magmaFloatComplex *a, magma_int_t lda, magmaFloatComplex *tau, magmaFloatComplex *work, magma_int_t lwork, magmaFloatComplex *dT, magma_int_t nb, magma_int_t *info) { /* -- MAGMA (version 1.4.0) -- Univ. of Tennessee, Knoxville Univ. of California, Berkeley Univ. of Colorado, Denver August 2013 Purpose ======= CUNGTR generates a complex unitary matrix Q which is defined as the product of n-1 elementary reflectors of order N, as returned by CHETRD: if UPLO = 'U', Q = H(n-1) . . . H(2) H(1), if UPLO = 'L', Q = H(1) H(2) . . . H(n-1). Arguments ========= UPLO (input) CHARACTER*1 = 'U': Upper triangle of A contains elementary reflectors from CHETRD; = 'L': Lower triangle of A contains elementary reflectors from CHETRD. N (input) INTEGER The order of the matrix Q. N >= 0. A (input/output) COMPLEX array, dimension (LDA,N) On entry, the vectors which define the elementary reflectors, as returned by CHETRD. On exit, the N-by-N unitary matrix Q. LDA (input) INTEGER The leading dimension of the array A. LDA >= N. TAU (input) COMPLEX array, dimension (N-1) TAU(i) must contain the scalar factor of the elementary reflector H(i), as returned by CHETRD. WORK (workspace/output) COMPLEX array, dimension (LWORK) On exit, if INFO = 0, WORK(1) returns the optimal LWORK. LWORK (input) INTEGER The dimension of the array WORK. LWORK >= N-1. For optimum performance LWORK >= N*NB, where NB is the optimal blocksize. If LWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the WORK array, returns this value as the first entry of the WORK array, and no error message related to LWORK is issued by XERBLA. DT (input) COMPLEX array on the GPU device. DT contains the T matrices used in blocking the elementary reflectors H(i) as returned by magma_chetrd. NB (input) INTEGER This is the block size used in CHETRD, and correspondingly the size of the T matrices, used in the factorization, and stored in DT. INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value ===================================================================== */ #define a_ref(i,j) ( a + (j)*lda+ (i)) char uplo_[2] = {uplo, 0}; magma_int_t i__1; magma_int_t i, j; magma_int_t iinfo; magma_int_t upper, lwkopt, lquery; *info = 0; lquery = lwork == -1; upper = lapackf77_lsame(uplo_, "U"); if (! upper && ! lapackf77_lsame(uplo_, "L")) { *info = -1; } else if (n < 0) { *info = -2; } else if (lda < max(1,n)) { *info = -4; } else /* if(complicated condition) */ { /* Computing MAX */ if (lwork < max(1, n-1) && ! lquery) { *info = -7; } } lwkopt = max(1, n) * nb; if (*info == 0) { MAGMA_C_SET2REAL( work[0], lwkopt); } if (*info != 0) { magma_xerbla( __func__, -(*info)); return *info; } else if (lquery) { return *info; } /* Quick return if possible */ if (n == 0) { work[0] = MAGMA_C_ONE; return *info; } if (upper) { /* Q was determined by a call to CHETRD with UPLO = 'U' Shift the vectors which define the elementary reflectors one column to the left, and set the last row and column of Q to those of the unit matrix */ for (j = 0; j < n-1; ++j) { for (i = 0; i < j-1; ++i) *a_ref(i, j) = *a_ref(i, j + 1); *a_ref(n-1, j) = MAGMA_C_ZERO; } for (i = 0; i < n-1; ++i) { *a_ref(i, n-1) = MAGMA_C_ZERO; } *a_ref(n-1, n-1) = MAGMA_C_ONE; /* Generate Q(1:n-1,1:n-1) */ i__1 = n - 1; lapackf77_cungql(&i__1, &i__1, &i__1, a_ref(0,0), &lda, tau, work, &lwork, &iinfo); } else { /* Q was determined by a call to CHETRD with UPLO = 'L'. Shift the vectors which define the elementary reflectors one column to the right, and set the first row and column of Q to those of the unit matrix */ for (j = n-1; j > 0; --j) { *a_ref(0, j) = MAGMA_C_ZERO; for (i = j; i < n-1; ++i) *a_ref(i, j) = *a_ref(i, j - 1); } *a_ref(0, 0) = MAGMA_C_ONE; for (i = 1; i < n-1; ++i) *a_ref(i, 0) = MAGMA_C_ZERO; if (n > 1) { /* Generate Q(2:n,2:n) */ magma_cungqr(n-1, n-1, n-1, a_ref(1, 1), lda, tau, dT, nb, &iinfo); } } MAGMA_C_SET2REAL( work[0], lwkopt); return *info; } /* magma_cungtr */