/** Purpose ------- CGEQR2 computes a QR factorization of a complex m by n matrix A: A = Q * R. This expert routine requires two more arguments than the standard cgeqr2, namely, dT and ddA, explained below. The storage for A is also not as in the LAPACK's cgeqr2 routine (see below). The first is used to output the triangular n x n factor T of the block reflector used in the factorization. The second holds the diagonal nxn blocks of A, i.e., the diagonal submatrices of R. This routine implements the left looking QR. Arguments --------- @param[in] m INTEGER The number of rows of the matrix A. M >= 0. @param[in] n INTEGER The number of columns of the matrix A. N >= 0. @param[in,out] dA COMPLEX array, dimension (LDA,N) On entry, the m by n matrix A. On exit, the unitary matrix Q as a product of elementary reflectors (see Further Details). \n the elements on and above the diagonal of the array contain the min(m,n) by n upper trapezoidal matrix R (R is upper triangular if m >= n); the elements below the diagonal, with the array TAU, represent the unitary matrix Q as a product of elementary reflectors (see Further Details). @param[in] ldda INTEGER The leading dimension of the array A. LDA >= max(1,M). @param[out] dtau COMPLEX array, dimension (min(M,N)) The scalar factors of the elementary reflectors (see Further Details). @param[out] dT COMPLEX array, dimension N x N. Stores the triangular N x N factor T of the block reflector used in the factorization. The lower triangular part is 0. @param[out] ddA COMPLEX array, dimension N x N. Stores the elements of the upper N x N diagonal block of A. LAPACK stores this array in A. There are 0s below the diagonal. @param dwork (workspace) DOUBLE_PRECISION array, dimension (3 N) @param[out] info INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value Further Details --------------- The matrix Q is represented as a product of elementary reflectors Q = H(1) H(2) . . . H(k), where k = min(m,n). Each H(i) has the form H(i) = I - tau * v * v' where tau is a complex scalar, and v is a complex vector with v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), and tau in TAU(i). @ingroup magma_cgeqrf_comp ********************************************************************/ extern "C" magma_int_t magma_cgeqr2x2_gpu( magma_int_t m, magma_int_t n, magmaFloatComplex_ptr dA, magma_int_t ldda, magmaFloatComplex_ptr dtau, magmaFloatComplex_ptr dT, magmaFloatComplex_ptr ddA, magmaFloat_ptr dwork, magma_int_t *info) { #define dA(i_,j_) (dA + (j_)*(ldda) + (i_)) magma_int_t i, k; magmaFloatComplex *work = (magmaFloatComplex *)dwork; magmaFloat_ptr dnorm = dwork + 4*(n); *info = 0; if (m < 0) { *info = -1; } else if (n < 0) { *info = -2; } else if (ldda < max(1,m)) { *info = -4; } if (*info != 0) { magma_xerbla( __func__, -(*info) ); return *info; } /* Compute the norms of the trailing columns */ k = min(m,n); magmablas_scnrm2_cols(m, k, dA(0,0), ldda, dnorm); for (i = 0; i < k; ++i) { /* 1. Apply H' to A(:,i) from the left 2. Adjust the dnorm[i] to hold the norm of A(i:m,i) */ if (i > 0) { magma_clarfbx_gpu(m, i, dA(0, 0), ldda, dT, k, dA(0, i), work); magmablas_scnrm2_adjust(i, dnorm+i, dA(0, i)); } /* Generate elementary reflector H(i) to annihilate A(i+1:m,i) 1. 1 is not yet put on the diagonal of A 2. Elements above the diagonal are copied in ddA and the ones in A are set to zero 3. update T */ magma_clarfgtx_gpu(m-i, dA(i, i), dA(min(i+1,m), i), dtau+i, dnorm+i, ddA + i + i*(n), i, dA(i,0), ldda, dT, k, work); } return *info; } /* magma_cgeqr2 */
extern "C" magma_err_t magma_cgeqr2x3_gpu(magma_int_t *m, magma_int_t *n, magmaFloatComplex_ptr dA, size_t dA_offset, magma_int_t *ldda, magmaFloatComplex_ptr dtau, size_t dtau_offset, magmaFloatComplex_ptr dT, size_t dT_offset, magmaFloatComplex_ptr ddA, size_t ddA_offset, magmaFloat_ptr dwork, size_t dwork_offset, magma_int_t *info, magma_queue_t queue) { /* -- clMAGMA (version 1.1.0) -- Univ. of Tennessee, Knoxville Univ. of California, Berkeley Univ. of Colorado, Denver @date January 2014 Purpose ======= CGEQR2 computes a QR factorization of a complex m by n matrix A: A = Q * R. This expert routine requires two more arguments than the standard cgeqr2, namely, dT and ddA, explained below. The storage for A is also not as in the LAPACK's cgeqr2 routine (see below). The first is used to output the triangular n x n factor T of the block reflector used in the factorization. The second holds the diagonal nxn blocks of A, i.e., the diagonal submatrices of R. This routine implements the left looking QR. This version adds internal blocking. Arguments ========= M (input) INTEGER The number of rows of the matrix A. M >= 0. N (input) INTEGER The number of columns of the matrix A. N >= 0. A (input/output) COMPLEX array, dimension (LDA,N) On entry, the m by n matrix A. On exit, the unitary matrix Q as a product of elementary reflectors (see Further Details). the elements on and above the diagonal of the array contain the min(m,n) by n upper trapezoidal matrix R (R is upper triangular if m >= n); the elements below the diagonal, with the array TAU, represent the unitary matrix Q as a product of elementary reflectors (see Further Details). LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,M). TAU (output) COMPLEX array, dimension (min(M,N)) The scalar factors of the elementary reflectors (see Further Details). dT (output) COMPLEX array, dimension N x N. Stores the triangular N x N factor T of the block reflector used in the factorization. The lower triangular part is 0. ddA (output) COMPLEX array, dimension N x N. Stores the elements of the upper N x N diagonal block of A. LAPACK stores this array in A. There are 0s below the diagonal. RWORK (workspace) DOUBLE_PRECISION array, dimension (3 N) INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value Further Details =============== The matrix Q is represented as a product of elementary reflectors Q = H(1) H(2) . . . H(k), where k = min(m,n). Each H(i) has the form H(i) = I - tau * v * v' where tau is a complex scalar, and v is a complex vector with v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), and tau in TAU(i). ===================================================================== */ //#define da_ref(a_1,a_2) ( dA+(a_2)*(*ldda) + (a_1)) #define da_ref(a_1,a_2) dA, (dA_offset + ((a_2)*(*ldda) + (a_1))) #define BLOCK_SIZE 32 //#define BLOCK_SIZE 16 static magma_int_t i, k; //float *dnorm = dwork; magmaFloat_ptr dnorm = dwork; size_t dnorm_offset = dwork_offset; //magmaFloatComplex *work = (magmaFloatComplex *)(dwork+2*(*n)); magmaFloatComplex_ptr work = (magmaFloatComplex_ptr)dwork; size_t work_offset = dwork_offset + 2*(*n); *info = 0; if (*m < 0) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*ldda < max(1,*m)) { *info = -4; } if (*info != 0) { magma_xerbla( __func__, -(*info) ); return *info; } /* Compute the norms of the trailing columns */ k = min(*m,*n); magmablas_scnrm2(*m, k, da_ref(0,0), *ldda, dnorm, dnorm_offset, queue); for (int b=0; b < k; b += BLOCK_SIZE) { for (i = b; i < min(k, b+BLOCK_SIZE); ++i) { /* Apply H' to A(:,i) from the left */ if ( i-b > 0){ magma_queue_sync(queue); magma_clarfbx_gpu(*m-b, i-b, da_ref(b, b), *ldda, dT, (dT_offset+b+b*k), k, da_ref(b, i), work, work_offset, queue); } /* Adjust the dnorm[i] to hold the norm of A(i:m,i) */ if ( i > 0 ){ magma_queue_sync(queue); magmablas_scnrm2_adjust(i, dnorm, dnorm_offset+i, da_ref(0, i), queue); } /* Generate elementary reflector H(i) to annihilate A(i+1:m,i) 1. 1 is not yet put on the diagonal of A 2. Elements above the diagonal are copied in ddA and the ones in A are set to zero 3. update T */ magma_clarfgtx_gpu(*m-i, da_ref(i, i), da_ref(min(i+1,*m), i), dtau, dtau_offset+i, dnorm, dnorm_offset+i, ddA, ddA_offset + i + i*(*n), i, da_ref(i,0), *ldda, dT, dT_offset, k, work, work_offset, queue); } /* Apply the transformations to the trailing matrix. */ magma_clarfb2_gpu( *m-b, k-i, BLOCK_SIZE, da_ref(b, b), *ldda, dT, dT_offset+b+b*k, k, da_ref(b, i), *ldda, work, work_offset, k-i, queue); } magma_queue_sync(queue); return *info; } /* magma_cgeqr2 */