Example #1
0
/***************************************************************************//**
 *
 **/
void plasma_pdlacpy(plasma_context_t *plasma)
{
    PLASMA_enum uplo;
    PLASMA_desc A;
    PLASMA_desc B;
    PLASMA_sequence *sequence;
    PLASMA_request *request;

    int X, Y;
    int m, n;
    int next_m;
    int next_n;
    int ldam, ldbm;

    plasma_unpack_args_5(uplo, A, B, sequence, request);
    if (sequence->status != PLASMA_SUCCESS)
        return;

    switch (uplo) {
    /*
     *  PlasmaUpper
     */
    case PlasmaUpper:
        m = 0;
        n = PLASMA_RANK;
        while (n >= A.nt) {
            m++;
            n = n - A.nt + m;
        }

        while (m < A.mt) {
            next_m = m;
            next_n = n;

            next_n += PLASMA_SIZE;
            while (next_n >= A.nt && next_m < A.mt) {
                next_m++;
                next_n = next_n - A.nt + next_m;
            }

            X = m == A.mt-1 ? A.m-m*A.mb : A.mb;
            Y = n == A.nt-1 ? A.n-n*A.nb : A.nb;
            ldam = BLKLDD(A, m);
            ldbm = BLKLDD(B, m);
            CORE_dlacpy(
                m == n ? uplo : PlasmaUpperLower,
                X, Y,
                A(m, n), ldam,
                B(m, n), ldbm);

            n = next_n;
            m = next_m;
        }
        break;
    /*
     *  PlasmaLower
     */
    case PlasmaLower:
        n = 0;
        m = PLASMA_RANK;
        while (m >= A.mt) {
            n++;
            m = m - A.mt + n;
        }

        while (n < A.nt) {
            next_m = m;
            next_n = n;

            next_m += PLASMA_SIZE;
            while (next_m >= A.mt && next_n < A.nt) {
                next_n++;
                next_m = next_m - A.mt + next_n;
            }

            X = m == A.mt-1 ? A.m-m*A.mb : A.mb;
            Y = n == A.nt-1 ? A.n-n*A.nb : A.nb;
            ldam = BLKLDD(A, m);
            ldbm = BLKLDD(B, m);
            CORE_dlacpy(
                m == n ? uplo : PlasmaUpperLower,
                X, Y,
                A(m, n), ldam,
                B(m, n), ldbm);

            n = next_n;
            m = next_m;
        }
        break;
    /*
     *  PlasmaUpperLower
     */
    case PlasmaUpperLower:
    default:
        n = 0;
        m = PLASMA_RANK;
        while (m >= A.mt) {
            n++;
            m = m - A.mt;
        }

        while (n < A.nt) {
            next_m = m;
            next_n = n;

            next_m += PLASMA_SIZE;
            while (next_m >= A.mt && next_n < A.nt) {
                next_n++;
                next_m = next_m - A.mt;
            }

            X = m == A.mt-1 ? A.m-m*A.mb : A.mb;
            Y = n == A.nt-1 ? A.n-n*A.nb : A.nb;
            ldam = BLKLDD(A, m);
            ldbm = BLKLDD(B, m);
            CORE_dlacpy(
                PlasmaUpperLower,
                X, Y,
                A(m, n), ldam,
                B(m, n), ldbm);

            n = next_n;
            m = next_m;
        }
        break;
    }
}
Example #2
0
/**
    Purpose
    -------
    DGETRF_INCPIV computes an LU factorization of a general M-by-N tile A
    using partial pivoting with row interchanges.

    The factorization has the form

      A = P * L * U

    where P is a permutation matrix, L is lower triangular with unit
    diagonal elements (lower trapezoidal if m > n), and U is upper
    triangular (upper trapezoidal if m < n).

    This is the right-looking Level 2.5 BLAS version of the algorithm.

    Arguments
    ---------
    @param[in]
    m       INTEGER
            The number of rows of the matrix A.  M >= 0.

    @param[in]
    n       INTEGER
            The number of columns of the matrix A.  N >= 0.

    @param[in]
    ib      INTEGER
            The inner-blocking size.  IB >= 0.

    @param[in,out]
    hA      DOUBLE_PRECISION array, dimension(LDHA, N), on cpu.
            On entry, only the M-by-IB first panel needs to be identical to dA(1..M, 1..IB).
            On exit, the content is incomplete. Shouldn't be used.

    @param[in]
    ldha    INTEGER
            The leading dimension of the array hA.  LDHA >= max(1,M).

    @param[in,out]
    dA      DOUBLE_PRECISION array, dimension(LDDA, N), on gpu.
            On entry, the M-by-N tile to be factored.
            On exit, the factors L and U from the factorization
            A = P*L*U; the unit diagonal elements of L are not stored.

    @param[in]
    ldda    INTEGER
            The leading dimension of the array dA.  LDDA >= max(1,M).

    @param[out]
    hL      DOUBLE_PRECISION array, dimension(LDHL, min(M,N)), on vpu.
            On exit, contains in the upper part the IB-by-K lower triangular tile,
            and in the lower part IB-by-min(M,N) the inverse of the top part.

    @param[in]
    ldhl    INTEGER
            The leading dimension of the array hL.  LDHL >= max(1,2*IB).

    @param[out]
    dL      DOUBLE_PRECISION array, dimension(LDDL, K), on gpu.
            On exit, contains in the upper part the IB-by-min(M,N) lower triangular tile,
            and in the lower part IB-by-min(M,N) the inverse of the top part.

    @param[in]
    lddl    INTEGER
            The leading dimension of the array dL.  LDDL >= max(1,2*IB).

    @param[out]
    ipiv    INTEGER array, dimension min(M,N), on the cpu.
            The pivot indices array.

    @param[out]
    dWORK   DOUBLE_PRECISION array, dimension(LDDWORK, 2*IB), on gpu.
            Workspace.

    @param[in]
    lddwork INTEGER
            The leading dimension of the array dWORK.  LDDWORK >= max(NB, 1).

    @param[out]
    info    INTEGER
            - PLASMA_SUCCESS successful exit
            - < 0 if INFO = -k, the k-th argument had an illegal value
            - > 0 if INFO = k, U(k,k) is exactly zero. The factorization
                has been completed, but the factor U is exactly
                singular, and division by zero will occur if it is used
                to solve a system of equations.

    @ingroup magma_dgesv_comp
    ********************************************************************/
extern "C" magma_int_t
magma_dgetrf_incpiv_gpu( magma_order_t order, magma_int_t m, magma_int_t n, magma_int_t ib,
                         double *hA, magma_int_t ldha, double *dA, magma_int_t ldda,
                         double *hL, magma_int_t ldhl, double *dL, magma_int_t lddl,
                         magma_int_t *ipiv,
                         double *dwork, magma_int_t lddwork,
                         magma_int_t *info)
{
#define AT(i,j) (dAT + (i)*ib*ldda + (j)*ib)
#define hA(i,j) (hA  + (i)*ib + (j)*ib*ldha)
#define hL(j)   (hL  + (j)*ib*ldhl         )
#define hL2(j)  (hL2 + (j)*ib*ldhl         )
#define dL(j)   (dL  + (j)*ib*lddl         )
#define dL2(j)  (dL2 + (j)*ib*lddl         )

    double c_one     = MAGMA_D_ONE;
    double c_neg_one = MAGMA_D_NEG_ONE;

    magma_int_t iinfo;
    magma_int_t maxm, mindim;
    magma_int_t i, rows, cols, s, ii, sb;
    double *dAT;
#ifndef WITHOUTTRTRI
    double *dL2 = dL + ib;
    double *hL2 = hL + ib;
#endif

    /* Check arguments */
    *info = 0;
    if (m < 0)
        *info = -1;
    else if (n < 0)
        *info = -2;
    else if (ldda < max(1,m))
        *info = -4;

    if (*info != 0) {
        magma_xerbla( __func__, -(*info) );
        return *info;
    }

    /* Quick return if possible */
    if (m == 0 || n == 0)
        return *info;

    /* Function Body */
    mindim = min(m, n);
    s      = mindim / ib;

    if ( ib >= mindim ) {
        /* Use CPU code. */
        lapackf77_dgetrf(&m, &n, hA, &ldha, ipiv, info);

#ifndef WITHOUTTRTRI
        CORE_dlacpy(PlasmaUpperLower, mindim, mindim,
                    (double*)hA, ldha,
                    (double*)hL2, ldhl );

        CORE_dtrtri( PlasmaLower, PlasmaUnit, mindim,
                     (double*)hL2, ldhl, info );
        if (*info != 0 ) {
            fprintf(stderr, "ERROR, trtri returned with info = %d\n", *info);
        }

        magma_dsetmatrix( mindim, mindim, hL2, ldhl, dL2, lddl );
#endif

        if ( order == MagmaRowMajor ) {
            magma_dsetmatrix( m, n, hA, ldha, dwork, lddwork );
            magmablas_dtranspose( m, n, dwork, lddwork, dA, ldda );
        } else {
            magma_dsetmatrix( m, n, hA, ldha, dA, ldda );
        }
    }
    else {
        /* Use hybrid blocked code. */
        maxm = ((m + 31)/32)*32;

        if ( order == MagmaColMajor ) {
            magmablas_dgetmo_in( dA, dAT, ldda, m, n );
        } else {
            dAT = dA;
        }

        for( i=0; i < s; i++ ) {
            ii = i * ib;
            sb = min(ib, mindim-ii);
            cols = maxm - ii;

            if ( i > 0 ) {
                // download i-th panel
                magmablas_dtranspose( sb, m, AT(0,i), ldda, dwork, maxm );
                magma_dgetmatrix( m, sb, dwork, maxm, hA(0, i), ldha );

                // make sure that gpu queue is empty
                //magma_device_sync();
#ifndef WITHOUTTRTRI
                magma_dtrmm( MagmaRight, MagmaLower, MagmaTrans, MagmaUnit,
                             n - (ii+sb), ib,
                             c_one, dL2(i-1),    lddl,
                                    AT(i-1,i+1), ldda );
#else
                magma_dtrsm( MagmaRight, MagmaUpper, MagmaNoTrans, MagmaUnit,
                             n - (ii+sb), ib,
                             c_one, AT(i-1,i-1), ldda,
                                    AT(i-1,i+1), ldda );
#endif
                magma_dgemm( MagmaNoTrans, MagmaNoTrans,
                             n-(ii+sb), m-ii, ib,
                             c_neg_one, AT(i-1,i+1), ldda,
                                        AT(i,  i-1), ldda,
                             c_one,     AT(i,  i+1), ldda );
            }

            // do the cpu part
            rows = m - ii;
            lapackf77_dgetrf( &rows, &sb, hA(i, i), &ldha, ipiv+ii, &iinfo);
            if ( (*info == 0) && (iinfo > 0) )
                *info = iinfo + ii;

            {
                int j;
                int fin = ii + sb;
                for (j=ii; j < fin; j++) {
                    ipiv[j] = ii + ipiv[j];
                }
            }
            magmablas_dlaswp( n-ii, AT(0, i), ldda, ii+1, ii+sb, ipiv, 1 );

#ifndef WITHOUTTRTRI
            CORE_dlacpy(PlasmaLower, sb, sb,
                        (double*)hA(i, i), ldha,
                        (double*)hL2(i), ldhl );

            CORE_dtrtri( PlasmaLower, PlasmaUnit, sb,
                         (double*)hL2(i), ldhl, info );
            if (*info != 0 ) {
                fprintf(stderr, "ERROR, trtri returned with info = %d\n", *info);
            }
            magma_dsetmatrix( sb, sb, hL2(i), ldhl, dL2(i), lddl );
#endif
            // upload i-th panel
            magma_dsetmatrix( rows, sb, hA(i, i), ldha, dwork, cols );
            magmablas_dtranspose( rows, sb, dwork, cols, AT(i,i), ldda );

            // do the small non-parallel computations
            if ( s > (i+1) ) {
#ifndef WITHOUTTRTRI
                magma_dtrmm( MagmaRight, MagmaLower, MagmaTrans, MagmaUnit,
                             sb, sb,
                             c_one, dL2(i),     lddl,
                                    AT(i, i+1), ldda);
#else
                magma_dtrsm( MagmaRight, MagmaUpper, MagmaNoTrans, MagmaUnit,
                             sb, sb,
                             c_one, AT(i, i  ), ldda,
                                    AT(i, i+1), ldda);
#endif
                magma_dgemm( MagmaNoTrans, MagmaNoTrans,
                             sb, m-(ii+sb), sb,
                             c_neg_one, AT(i,   i+1), ldda,
                                        AT(i+1, i  ), ldda,
                             c_one,     AT(i+1, i+1), ldda );
            }
            else {
                /* Update of the last panel */
#ifndef WITHOUTTRTRI
                magma_dtrmm( MagmaRight, MagmaLower, MagmaTrans, MagmaUnit,
                             n-mindim, sb,
                             c_one, dL2(i),     lddl,
                                    AT(i, i+1), ldda);
#else
                magma_dtrsm( MagmaRight, MagmaUpper, MagmaNoTrans, MagmaUnit,
                             n-mindim, sb,
                             c_one, AT(i, i  ), ldda,
                                    AT(i, i+1), ldda);
#endif
                /* m-(ii+sb) should be always 0 */
                magma_dgemm( MagmaNoTrans, MagmaNoTrans,
                             n-mindim, m-(ii+sb), sb,
                             c_neg_one, AT(i,   i+1), ldda,
                                        AT(i+1, i  ), ldda,
                             c_one,     AT(i+1, i+1), ldda );
            }
        }

        if ( order == MagmaColMajor ) {
            magmablas_dgetmo_out( dA, dAT, ldda, m, n );
        }
    }
    return *info;
}