Пример #1
0
/**
    Purpose
    -------
    CGEQR2 computes a QR factorization of a complex m by n matrix A:
    A = Q * R.

    This expert routine requires two more arguments than the standard
    cgeqr2, namely, dT and ddA, explained below. The storage for A is
    also not as in the LAPACK's cgeqr2 routine (see below).

    The first is used to output the triangular
    n x n factor T of the block reflector used in the factorization.
    The second holds the diagonal nxn blocks of A, i.e., the diagonal
    submatrices of R. This routine implements the left looking QR.

    Arguments
    ---------
    @param[in]
    m       INTEGER
            The number of rows of the matrix A.  M >= 0.

    @param[in]
    n       INTEGER
            The number of columns of the matrix A.  N >= 0.

    @param[in,out]
    dA      COMPLEX array, dimension (LDA,N)
            On entry, the m by n matrix A.
            On exit, the unitary matrix Q as a
            product of elementary reflectors (see Further Details).
    \n
            the elements on and above the diagonal of the array
            contain the min(m,n) by n upper trapezoidal matrix R (R is
            upper triangular if m >= n); the elements below the diagonal,
            with the array TAU, represent the unitary matrix Q as a
            product of elementary reflectors (see Further Details).

    @param[in]
    ldda    INTEGER
            The leading dimension of the array A.  LDA >= max(1,M).

    @param[out]
    dtau    COMPLEX array, dimension (min(M,N))
            The scalar factors of the elementary reflectors (see Further
            Details).

    @param[out]
    dT      COMPLEX array, dimension N x N.
            Stores the triangular N x N factor T of the block reflector
            used in the factorization. The lower triangular part is 0.

    @param[out]
    ddA     COMPLEX array, dimension N x N.
            Stores the elements of the upper N x N diagonal block of A.
            LAPACK stores this array in A. There are 0s below the diagonal.

    @param
    dwork   (workspace) DOUBLE_PRECISION array, dimension (3 N)

    @param[out]
    info    INTEGER
      -     = 0: successful exit
      -     < 0: if INFO = -i, the i-th argument had an illegal value

    Further Details
    ---------------
    The matrix Q is represented as a product of elementary reflectors

       Q = H(1) H(2) . . . H(k), where k = min(m,n).

    Each H(i) has the form

       H(i) = I - tau * v * v'

    where tau is a complex scalar, and v is a complex vector with
    v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i),
    and tau in TAU(i).

    @ingroup magma_cgeqrf_comp
    ********************************************************************/
extern "C" magma_int_t
magma_cgeqr2x2_gpu(
    magma_int_t m, magma_int_t n,
    magmaFloatComplex_ptr dA, magma_int_t ldda,
    magmaFloatComplex_ptr dtau,
    magmaFloatComplex_ptr dT,
    magmaFloatComplex_ptr ddA,
    magmaFloat_ptr dwork,
    magma_int_t *info)
{
    #define dA(i_,j_) (dA + (j_)*(ldda) + (i_))
    
    magma_int_t i, k;
    
    magmaFloatComplex *work = (magmaFloatComplex *)dwork;
    magmaFloat_ptr dnorm = dwork + 4*(n);


    *info = 0;
    if (m < 0) {
        *info = -1;
    } else if (n < 0) {
        *info = -2;
    } else if (ldda < max(1,m)) {
        *info = -4;
    }
    if (*info != 0) {
        magma_xerbla( __func__, -(*info) );
        return *info;
    }

    /* Compute the norms of the trailing columns */
    k = min(m,n);
    magmablas_scnrm2_cols(m, k, dA(0,0), ldda, dnorm);

    for (i = 0; i < k; ++i) {
        /*   1. Apply H' to A(:,i) from the left
             2. Adjust the dnorm[i] to hold the norm of A(i:m,i) */
        if (i > 0) {
            magma_clarfbx_gpu(m, i, dA(0, 0), ldda,
                              dT, k, dA(0, i), work);
            magmablas_scnrm2_adjust(i, dnorm+i, dA(0, i));
        }

        /*  Generate elementary reflector H(i) to annihilate A(i+1:m,i)
            1. 1 is not yet put on the diagonal of A
            2. Elements above the diagonal are copied in ddA and the ones
               in A are set to zero
            3. update T */
        magma_clarfgtx_gpu(m-i, dA(i, i), dA(min(i+1,m), i), dtau+i,
                           dnorm+i, ddA + i + i*(n), i,
                           dA(i,0), ldda,  dT, k, work);
    }

    return *info;
} /* magma_cgeqr2 */
Пример #2
0
/**
    Purpose
    -------
    CGEQP3 computes a QR factorization with column pivoting of a
    matrix A:  A*P = Q*R  using Level 3 BLAS.

    Arguments
    ---------
    @param[in]
    m       INTEGER
            The number of rows of the matrix A. M >= 0.

    @param[in]
    n       INTEGER
            The number of columns of the matrix A.  N >= 0.

    @param[in,out]
    dA      COMPLEX array on the GPU, dimension (LDDA,N)
            On entry, the M-by-N matrix A.
            On exit, the upper triangle of the array contains the
            min(M,N)-by-N upper trapezoidal matrix R; the elements below
            the diagonal, together with the array TAU, represent the
            unitary matrix Q as a product of min(M,N) elementary
            reflectors.

    @param[in]
    ldda    INTEGER
            The leading dimension of the array A. LDDA >= max(1,M).

    @param[in,out]
    jpvt    INTEGER array, dimension (N)
            On entry, if JPVT(J).ne.0, the J-th column of A is permuted
            to the front of A*P (a leading column); if JPVT(J)=0,
            the J-th column of A is a free column.
            On exit, if JPVT(J)=K, then the J-th column of A*P was the
            the K-th column of A.

    @param[out]
    tau     COMPLEX array, dimension (min(M,N))
            The scalar factors of the elementary reflectors.

    @param[out]
    dwork   (workspace) COMPLEX array on the GPU, dimension (MAX(1,LWORK))
            On exit, if INFO=0, WORK[0] returns the optimal LWORK.

    @param[in]
    lwork   INTEGER
            The dimension of the array WORK.
            For [sd]geqp3, LWORK >= (N+1)*NB + 2*N;
            for [cz]geqp3, LWORK >= (N+1)*NB,
            where NB is the optimal blocksize.
    \n
            Note: unlike the CPU interface of this routine, the GPU interface
            does not support a workspace query.

    @param
    rwork   (workspace, for [cz]geqp3 only) REAL array, dimension (2*N)

    @param[out]
    info    INTEGER
      -     = 0: successful exit.
      -     < 0: if INFO = -i, the i-th argument had an illegal value.

    Further Details
    ---------------
    The matrix Q is represented as a product of elementary reflectors

        Q = H(1) H(2) . . . H(k), where k = min(m,n).

    Each H(i) has the form

        H(i) = I - tau * v * v'

    where tau is a complex scalar, and v is a complex vector
    with v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in
    A(i+1:m,i), and tau in TAU(i).

    @ingroup magma_cgeqp3_comp
    ********************************************************************/
extern "C" magma_int_t
magma_cgeqp3_gpu(
    magma_int_t m, magma_int_t n,
    magmaFloatComplex_ptr dA, magma_int_t ldda,
    magma_int_t *jpvt, magmaFloatComplex *tau,
    magmaFloatComplex_ptr dwork, magma_int_t lwork,
    #ifdef COMPLEX
    float *rwork,
    #endif
    magma_int_t *info )
{
    #define dA(i_, j_) (dA + (i_) + (j_)*ldda)

    const magmaFloatComplex c_zero = MAGMA_C_ZERO;
    const magma_int_t ione = 1;

    //magma_int_t na;
    magma_int_t n_j;
    magma_int_t j, jb, nb, sm, sn, fjb, nfxd, minmn;
    magma_int_t topbmn, lwkopt;
    
    *info = 0;
    if (m < 0) {
        *info = -1;
    } else if (n < 0) {
        *info = -2;
    } else if (ldda < max(1,m)) {
        *info = -4;
    }
    
    nb = magma_get_cgeqp3_nb( m, n );
    minmn = min(m,n);
    if (*info == 0) {
        if (minmn == 0) {
            lwkopt = 1;
        } else {
            lwkopt = (n + 1)*nb;
            #ifdef REAL
            lwkopt += 2*n;
            #endif
        }
        if (lwork < lwkopt) {
            *info = -8;
        }
    }

    if (*info != 0) {
        magma_xerbla( __func__, -(*info) );
        return *info;
    }

    if (minmn == 0)
        return *info;

    #ifdef REAL
    float *rwork = dwork + (n + 1)*nb;
    #endif
    magmaFloatComplex_ptr df;
    if (MAGMA_SUCCESS != magma_cmalloc( &df, (n+1)*nb )) {
        *info = MAGMA_ERR_DEVICE_ALLOC;
        return *info;
    }
    
    magmaFloat_ptr dlsticcs;
    if (MAGMA_SUCCESS != magma_smalloc( &dlsticcs, 1+256*(n+255)/256 )) {
        magma_free( df );
        *info = MAGMA_ERR_DEVICE_ALLOC;
        return *info;
    }

    magma_queue_t queue;
    magma_device_t cdev;
    magma_getdevice( &cdev );
    magma_queue_create( cdev, &queue );

    magmablas_claset( MagmaFull, n+1, nb, c_zero, c_zero, df, n+1, queue );

    nfxd = 0;
    /* Move initial columns up front.
     * Note jpvt uses 1-based indices for historical compatibility. */
    for (j = 0; j < n; ++j) {
        if (jpvt[j] != 0) {
            if (j != nfxd) {
                blasf77_cswap(&m, dA(0, j), &ione, dA(0, nfxd), &ione);  // TODO: ERROR, matrix not on CPU!
                jpvt[j]    = jpvt[nfxd];
                jpvt[nfxd] = j + 1;
            }
            else {
                jpvt[j] = j + 1;
            }
            ++nfxd;
        }
        else {
            jpvt[j] = j + 1;
        }
    }

    /*
        // TODO:
           Factorize fixed columns
           =======================
           Compute the QR factorization of fixed columns and update
           remaining columns.
    if (nfxd > 0) {
        na = min(m,nfxd);
        lapackf77_cgeqrf(&m, &na, dA, &ldda, tau, dwork, &lwork, info);
        if (na < n) {
            n_j = n - na;
            lapackf77_cunmqr( MagmaLeftStr, MagmaConjTransStr, &m, &n_j, &na,
                              dA, &ldda, tau, dA(0, na), &ldda,
                              dwork, &lwork, info );
        }
    }*/
    
    /*  Factorize free columns */
    if (nfxd < minmn) {
        sm = m - nfxd;
        sn = n - nfxd;
        //sminmn = minmn - nfxd;
        
        /* Initialize partial column norms. */
        magmablas_scnrm2_cols( sm, sn, dA(nfxd,nfxd), ldda, &rwork[nfxd], queue );
        magma_scopymatrix( sn, 1, &rwork[nfxd], sn, &rwork[n+nfxd], sn, queue );
        
        j = nfxd;
        //if (nb < sminmn)
        {
            /* Use blocked code initially. */
            
            /* Compute factorization: while loop. */
            topbmn = minmn; // - nb;
            while(j < topbmn) {
                jb = min(nb, topbmn - j);
                
                /* Factorize JB columns among columns J:N. */
                n_j = n - j;
                
                //magma_claqps_gpu    // this is a cpp-file
                magma_claqps2_gpu   // this is a cuda-file
                    ( m, n_j, j, jb, &fjb,
                      dA(0, j), ldda,
                      &jpvt[j], &tau[j], &rwork[j], &rwork[n + j],
                      dwork,
                      &df[jb], n_j,
                      dlsticcs, queue );
                
                j += fjb;  /* fjb is actual number of columns factored */
            }
        }
        
        /*
        // Use unblocked code to factor the last or only block.
        if (j < minmn) {
            n_j = n - j;
            if (j > nfxd) {
                magma_cgetmatrix( m-j, n_j,
                                  dA(j,j), ldda,
                                   A(j,j), lda, queue );
            }
            lapackf77_claqp2(&m, &n_j, &j, dA(0, j), &ldda, &jpvt[j],
                             &tau[j], &rwork[j], &rwork[n+j], dwork );
        }*/
    }

    magma_queue_destroy( queue );
    
    magma_free( df );
    magma_free( dlsticcs );

    return *info;
} /* magma_cgeqp3_gpu */
Пример #3
0
extern "C" magma_int_t
magma_cgeqr2x_gpu(magma_int_t *m, magma_int_t *n, magmaFloatComplex *dA,
                  magma_int_t *ldda, magmaFloatComplex *dtau,
                  magmaFloatComplex *dT, magmaFloatComplex *ddA,
                  float *dwork, magma_int_t *info)
{
/*  -- MAGMA (version 1.4.1) --
       Univ. of Tennessee, Knoxville
       Univ. of California, Berkeley
       Univ. of Colorado, Denver
       December 2013

    Purpose
    =======
    CGEQR2 computes a QR factorization of a complex m by n matrix A:
    A = Q * R.

    This expert routine requires two more arguments than the standard
    cgeqr2, namely, dT and ddA, explained below. The storage for A is
    also not as in the LAPACK's cgeqr2 routine (see below).

    The first is used to output the triangular
    n x n factor T of the block reflector used in the factorization.
    The second holds the diagonal nxn blocks of A, i.e., the diagonal
    submatrices of R.

    This version implements the right-looking QR.

    Arguments
    =========
    M       (input) INTEGER
            The number of rows of the matrix A.  M >= 0.

    N       (input) INTEGER
            The number of columns of the matrix A.  N >= 0.

    A       (input/output) COMPLEX array, dimension (LDA,N)
            On entry, the m by n matrix A.
            On exit, the unitary matrix Q as a
            product of elementary reflectors (see Further Details).

            the elements on and above the diagonal of the array
            contain the min(m,n) by n upper trapezoidal matrix R (R is
            upper triangular if m >= n); the elements below the diagonal,
            with the array TAU, represent the unitary matrix Q as a
            product of elementary reflectors (see Further Details).

    LDA     (input) INTEGER
            The leading dimension of the array A.  LDA >= max(1,M).

    TAU     (output) COMPLEX array, dimension (min(M,N))
            The scalar factors of the elementary reflectors (see Further
            Details).

    dT      (output) COMPLEX array, dimension N x N.
            Stores the triangular N x N factor T of the block reflector
            used in the factorization. The lower triangular part is 0.

    ddA     (output) COMPLEX array, dimension N x N.
            Stores the elements of the upper N x N diagonal block of A.
            LAPACK stores this array in A. There are 0s below the diagonal.

    WORK    (workspace) COMPLEX array, dimension (N)

    INFO    (output) INTEGER
            = 0: successful exit
            < 0: if INFO = -i, the i-th argument had an illegal value

    Further Details
    ===============
    The matrix Q is represented as a product of elementary reflectors

       Q = H(1) H(2) . . . H(k), where k = min(m,n).

    Each H(i) has the form

       H(i) = I - tau * v * v'

    where tau is a complex scalar, and v is a complex vector with
    v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i),
    and tau in TAU(i).
    =====================================================================    */

    #define  da_ref(a_1,a_2) ( dA+(a_2)*(*ldda) + (a_1))
    
    magma_int_t i, k;

    float *dnorm = dwork;
    magmaFloatComplex *work = (magmaFloatComplex *)(dwork+2*(*n));

    *info = 0;
    if (*m < 0) {
        *info = -1;
    } else if (*n < 0) {
        *info = -2;
    } else if (*ldda < max(1,*m)) {
        *info = -4;
    }
    if (*info != 0) {
        magma_xerbla( __func__, -(*info) );
        return *info;
    }

    /* Compute the norms of the trailing columns */
    k = min(*m,*n);
    magmablas_scnrm2_cols(*m, k, da_ref(0,0), *ldda, dnorm);

    for (i = 0; i < k; ++i) {
        /*  Generate elementary reflector H(i) to annihilate A(i+1:m,i) */
        magma_clarfgx_gpu(*m-i, da_ref(i, i), da_ref(min(i+1,*m), i), dtau+i, dnorm+i,
                          ddA + i + i*(*n), i);
        
        if (i < *n) {
            /* Apply H(i)' to A(i:m,i+1:n) from the left */
            magma_clarfx_gpu(*m-i, *n-i-1, da_ref(i, i), dtau+i,
                             //da_ref(i, i+1), *ldda, dnorm+i+1,
                             da_ref(i, 0), *ldda, dnorm+i+1,
                             dT, i, work );
        }
    }

    return *info;
} /* magma_cgeqr2 */