extern "C" magma_int_t magma_zgeqr2x3_gpu(magma_int_t *m, magma_int_t *n, magmaDoubleComplex *dA, magma_int_t *ldda, magmaDoubleComplex *dtau, magmaDoubleComplex *dT, magmaDoubleComplex *ddA, double *dwork, magma_int_t *info) { /* -- MAGMA (version 1.4.0) -- Univ. of Tennessee, Knoxville Univ. of California, Berkeley Univ. of Colorado, Denver August 2013 Purpose ======= ZGEQR2 computes a QR factorization of a complex m by n matrix A: A = Q * R. This expert routine requires two more arguments than the standard zgeqr2, namely, dT and ddA, explained below. The storage for A is also not as in the LAPACK's zgeqr2 routine (see below). The first is used to output the triangular n x n factor T of the block reflector used in the factorization. The second holds the diagonal nxn blocks of A, i.e., the diagonal submatrices of R. This routine implements the left looking QR. This version adds internal blocking. Arguments ========= M (input) INTEGER The number of rows of the matrix A. M >= 0. N (input) INTEGER The number of columns of the matrix A. N >= 0. A (input/output) COMPLEX_16 array, dimension (LDA,N) On entry, the m by n matrix A. On exit, the unitary matrix Q as a product of elementary reflectors (see Further Details). the elements on and above the diagonal of the array contain the min(m,n) by n upper trapezoidal matrix R (R is upper triangular if m >= n); the elements below the diagonal, with the array TAU, represent the unitary matrix Q as a product of elementary reflectors (see Further Details). LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,M). TAU (output) COMPLEX_16 array, dimension (min(M,N)) The scalar factors of the elementary reflectors (see Further Details). dT (output) COMPLEX_16 array, dimension N x N. Stores the triangular N x N factor T of the block reflector used in the factorization. The lower triangular part is 0. ddA (output) COMPLEX_16 array, dimension N x N. Stores the elements of the upper N x N diagonal block of A. LAPACK stores this array in A. There are 0s below the diagonal. RWORK (workspace) DOUBLE_PRECISION array, dimension (3 N) INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value Further Details =============== The matrix Q is represented as a product of elementary reflectors Q = H(1) H(2) . . . H(k), where k = min(m,n). Each H(i) has the form H(i) = I - tau * v * v' where tau is a complex scalar, and v is a complex vector with v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), and tau in TAU(i). ===================================================================== */ #define da_ref(a_1,a_2) ( dA+(a_2)*(*ldda) + (a_1)) #define BLOCK_SIZE 32 magma_int_t i, k; double *dnorm = dwork; magmaDoubleComplex *work = (magmaDoubleComplex *)(dwork+2*(*n)); *info = 0; if (*m < 0) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*ldda < max(1,*m)) { *info = -4; } if (*info != 0) { magma_xerbla( __func__, -(*info) ); return *info; } /* Compute the norms of the trailing columns */ k = min(*m,*n); magmablas_dznrm2_cols(*m, k, da_ref(0,0), *ldda, dnorm); for (int b=0; b < k; b += BLOCK_SIZE) { for (i = b; i < min(k, b+BLOCK_SIZE); ++i) { /* Apply H' to A(:,i) from the left */ if ( i-b > 0) magma_zlarfbx_gpu(*m-b, i-b, da_ref(b, b), *ldda, dT+b+b*k, k, da_ref(b, i), work); /* Adjust the dnorm[i] to hold the norm of A(i:m,i) */ if ( i > 0 ) magmablas_dznrm2_adjust(i, dnorm+i, da_ref(0, i)); /* Generate elementary reflector H(i) to annihilate A(i+1:m,i) 1. 1 is not yet put on the diagonal of A 2. Elements above the diagonal are copied in ddA and the ones in A are set to zero 3. update T */ magma_zlarfgtx_gpu(*m-i, da_ref(i, i), da_ref(min(i+1,*m), i), dtau+i, dnorm+i, ddA + i + i*(*n), i, da_ref(i,0), *ldda, dT, k, work); } /* Apply the transformations to the trailing matrix. */ //magma_zlarfb2_gpu( MagmaLeft, MagmaConjTrans, MagmaForward, MagmaColumnwise, magma_zlarfb2_gpu( *m-b, k-i, BLOCK_SIZE, da_ref(b, b), *ldda, dT+b+b*k, k, da_ref(b, i), *ldda, work, k-i); } return *info; } /* magma_zgeqr2 */
/** Purpose ------- ZGEQR2 computes a QR factorization of a complex m by n matrix A: A = Q * R. This expert routine requires two more arguments than the standard zgeqr2, namely, dT and ddA, explained below. The storage for A is also not as in the LAPACK's zgeqr2 routine (see below). The first is used to output the triangular n x n factor T of the block reflector used in the factorization. The second holds the diagonal nxn blocks of A, i.e., the diagonal submatrices of R. This routine implements the left looking QR. This version adds internal blocking. Arguments --------- @param[in] m INTEGER The number of rows of the matrix A. M >= 0. @param[in] n INTEGER The number of columns of the matrix A. N >= 0. @param[in,out] dA COMPLEX_16 array, dimension (LDDA,N) On entry, the m by n matrix A. On exit, the unitary matrix Q as a product of elementary reflectors (see Further Details). \n the elements on and above the diagonal of the array contain the min(m,n) by n upper trapezoidal matrix R (R is upper triangular if m >= n); the elements below the diagonal, with the array TAU, represent the unitary matrix Q as a product of elementary reflectors (see Further Details). @param[in] ldda INTEGER The leading dimension of the array A. LDDA >= max(1,M). @param[out] dtau COMPLEX_16 array, dimension (min(M,N)) The scalar factors of the elementary reflectors (see Further Details). @param[out] dT COMPLEX_16 array, dimension N x N. Stores the triangular N x N factor T of the block reflector used in the factorization. The lower triangular part is 0. @param[out] ddA COMPLEX_16 array, dimension N x N. Stores the elements of the upper N x N diagonal block of A. LAPACK stores this array in A. There are 0s below the diagonal. @param dwork (workspace) DOUBLE PRECISION array, dimension (3 N) @param[out] info INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value Further Details --------------- The matrix Q is represented as a product of elementary reflectors Q = H(1) H(2) . . . H(k), where k = min(m,n). Each H(i) has the form H(i) = I - tau * v * v' where tau is a complex scalar, and v is a complex vector with v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), and tau in TAU(i). @ingroup magma_zgeqrf_aux ********************************************************************/ extern "C" magma_int_t magma_zgeqr2x3_gpu( magma_int_t m, magma_int_t n, magmaDoubleComplex_ptr dA, magma_int_t ldda, magmaDoubleComplex_ptr dtau, magmaDoubleComplex_ptr dT, magmaDoubleComplex_ptr ddA, magmaDouble_ptr dwork, magma_int_t *info) { #define dA(i_,j_) (dA + (i_) + (j_)*ldda) #define BLOCK_SIZE 32 magma_int_t b, i, min_mn; magmaDouble_ptr dnorm = dwork; magmaDoubleComplex_ptr dwork2 = (magmaDoubleComplex_ptr)(dwork + 2*n); *info = 0; if (m < 0) { *info = -1; } else if (n < 0) { *info = -2; } else if (ldda < max(1,m)) { *info = -4; } if (*info != 0) { magma_xerbla( __func__, -(*info) ); return *info; } magma_queue_t queue; magma_device_t cdev; magma_getdevice( &cdev ); magma_queue_create( cdev, &queue ); /* Compute the norms of the trailing columns */ min_mn = min(m,n); // magmablas_dznrm2_cols( m, min_mn, dA(0,0), ldda, dnorm, queue ); for (b=0; b < min_mn; b += BLOCK_SIZE) { for (i = b; i < min(min_mn, b+BLOCK_SIZE); ++i) { /* Apply H' to A(:,i) from the left */ if ( i-b > 0) magma_zlarfbx_gpu( m-b, i-b, dA(b, b), ldda, dT+b+b*min_mn, min_mn, dA(b, i), dwork2, queue ); /* Adjust the dnorm[i] to hold the norm of A(i:m,i) */ //if ( i > 0 ) // magmablas_dznrm2_adjust( i, dnorm+i, dA(0, i), queue ); magmablas_dznrm2_cols( m-i, 1, dA(i,i), ldda, dnorm+i, queue ); /* Generate elementary reflector H(i) to annihilate A(i+1:m,i) 1. 1 is not yet put on the diagonal of A 2. Elements above the diagonal are copied in ddA and the ones in A are set to zero 3. update T */ magma_zlarfgtx_gpu(m-i, dA(i, i), dA(min(i+1,m), i), dtau+i, dnorm+i, ddA + i + i*(n), i, dA(i,0), ldda, dT, min_mn, dwork2, queue); } /* Apply the transformations to the trailing matrix. */ //magma_zlarfb2_gpu( MagmaLeft, MagmaConjTrans, MagmaForward, MagmaColumnwise, magma_zlarfb2_gpu( m-b, min_mn-i, BLOCK_SIZE, dA(b, b), ldda, dT+b+b*min_mn, min_mn, dA(b, i), ldda, dwork2, min_mn-i, queue ); } magma_queue_destroy( queue ); return *info; } /* magma_zgeqr2 */