Пример #1
0
/***************************************************************************//**
 *  Parallel tile Hermitian rank-k update - static scheduling
 **/
void plasma_pcsyr2k(plasma_context_t *plasma)
{
    PLASMA_enum uplo;
    PLASMA_enum trans;
    PLASMA_Complex32_t alpha;
    PLASMA_desc A;
    PLASMA_desc B;
    PLASMA_Complex32_t beta;
    PLASMA_desc C;
    PLASMA_sequence *sequence;
    PLASMA_request *request;

    int m, n, k;
    int next_m;
    int next_n;
    int ldam, ldan, ldak;
    int ldbm, ldbn, ldbk;
    int ldcm, ldcn;
    int tempkn, tempkm, tempmm, tempnn;

    PLASMA_Complex32_t zone = (PLASMA_Complex32_t)1.0;
    PLASMA_Complex32_t zbeta;

    plasma_unpack_args_9(uplo, trans, alpha, A, B, beta, C, sequence, request);
    if (sequence->status != PLASMA_SUCCESS)
        return;

    n = 0;
    m = PLASMA_RANK;
    while (m >= C.mt && n < C.nt) {
        n++;
        m = m-C.mt+n;
    }

    while (n < C.nt) {
        next_n = n;
        next_m = m + PLASMA_SIZE;
        while (next_m >= C.mt && next_n < C.nt) {
            next_n++;
            next_m = next_m - C.mt + next_n;
        }

        tempmm = m == C.mt-1 ? C.m-m*C.mb : C.mb;
        tempnn = n == C.nt-1 ? C.n-n*C.nb : C.nb;

        ldcn = BLKLDD(C, n);
        ldcm = BLKLDD(C, m);

        if (m == n) {
            /*
             *  PlasmaNoTrans
             */
            if (trans == PlasmaNoTrans) {
                ldam = BLKLDD(A, m);
                ldbm = BLKLDD(B, m);
                for (k = 0; k < A.nt; k++) {
                    tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb;
                    zbeta = k == 0 ? beta : zone;
                    CORE_csyr2k(
                        uplo, trans,
                        tempnn, tempkn,
                        alpha, A(m, k), ldam,
                               B(m, k), ldbm,
                        zbeta, C(m, m), ldcm);
                }
            }
            /*
             *  Plasma[Conj]Trans
             */
            else {
                for (k = 0; k < A.mt; k++) {
                    tempkm = k == A.mt-1 ? A.m-k*A.mb : A.mb;
                    ldak = BLKLDD(A, k);
                    ldbk = BLKLDD(B, k);
                    zbeta = k == 0 ? beta : zone;
                    CORE_csyr2k(
                        uplo, trans,
                        tempnn, tempkm,
                        alpha, A(k, m), ldak,
                               B(k, m), ldbk,
                        zbeta, C(m, m), ldcm);
                }
            }
        }
        else {
            if (trans == PlasmaNoTrans) {
                ldam = BLKLDD(A, m);
                ldan = BLKLDD(A, n);
                ldbm = BLKLDD(B, m);
                ldbn = BLKLDD(B, n);
                /*
                 *  PlasmaNoTrans / PlasmaLower
                 */
                if (uplo == PlasmaLower) {
                    for (k = 0; k < A.nt; k++) {
                        tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb;
                        zbeta = k == 0 ? beta : zone;
                        CORE_cgemm(
                            trans, PlasmaTrans,
                            tempmm, tempnn, tempkn,
                            alpha, A(m, k), ldam,
                                   B(n, k), ldbn,
                            zbeta, C(m, n), ldcm);

                        CORE_cgemm(
                            trans, PlasmaTrans,
                            tempmm, tempnn, tempkn,
                            alpha, B(m, k), ldbm,
                                   A(n, k), ldan,
                            zone,  C(m, n), ldcm);
                    }
                }
                /*
                 *  PlasmaNoTrans / PlasmaUpper
                 */
                else {
                    for (k = 0; k < A.nt; k++) {
                        tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb;
                        zbeta = k == 0 ? beta : zone;
                        CORE_cgemm(
                            trans, PlasmaTrans,
                            tempnn, tempmm, tempkn,
                            alpha, A(n, k), ldan,
                                   B(m, k), ldbm,
                            zbeta, C(n, m), ldcn);

                        CORE_cgemm(
                            trans, PlasmaTrans,
                            tempnn, tempmm, tempkn,
                            alpha, B(n, k), ldbn,
                                   A(m, k), ldam,
                            zone,  C(n, m), ldcn);
                    }
                }
            }
            else {
                /*
                 *  Plasma[Conj]Trans / PlasmaLower
                 */
                if (uplo == PlasmaLower) {
                    for (k = 0; k < A.mt; k++) {
                        ldak = BLKLDD(A, k);
                        ldbk = BLKLDD(B, k);
                        tempkm = k == A.mt-1 ? A.m-k*A.mb : A.mb;
                        zbeta = k == 0 ? beta : zone;
                        CORE_cgemm(
                            trans, PlasmaNoTrans,
                            tempmm, tempnn, tempkm,
                            alpha, A(k, m), ldak,
                                   B(k, n), ldbk,
                            zbeta, C(m, n), ldcm);

                        CORE_cgemm(
                            trans, PlasmaNoTrans,
                            tempmm, tempnn, tempkm,
                            alpha, B(k, m), ldbk,
                                   A(k, n), ldak,
                            zone,  C(m, n), ldcm);
                    }
                }
                /*
                 *  Plasma[Conj]Trans / PlasmaUpper
                 */
                else {
                    for (k = 0; k < A.mt; k++) {
                        tempkm = k == A.mt-1 ? A.m-k*A.mb : A.mb;
                        ldak = BLKLDD(A, k);
                        ldbk = BLKLDD(B, k);
                        zbeta = k == 0 ? beta : zone;
                        CORE_cgemm(
                            trans, PlasmaNoTrans,
                            tempnn, tempmm, tempkm,
                            alpha, A(k, n), ldak,
                                   B(k, m), ldbk,
                            zbeta, C(n, m), ldcm);

                        CORE_cgemm(
                            trans, PlasmaNoTrans,
                            tempnn, tempmm, tempkm,
                            alpha, B(k, n), ldbk,
                                   A(k, m), ldak,
                            zone,  C(n, m), ldcn);
                    }
                }
            }
        }
        m = next_m;
        n = next_n;
    }
}
Пример #2
0
/***************************************************************************//**
 *  Parallel tile matrix-matrix multiplication - static scheduling
 **/
void plasma_pzgemm(plasma_context_t *plasma)
{
    PLASMA_enum transA;
    PLASMA_enum transB;
    PLASMA_Complex64_t alpha;
    PLASMA_desc A;
    PLASMA_desc B;
    PLASMA_Complex64_t beta;
    PLASMA_desc C;
    PLASMA_sequence *sequence;
    PLASMA_request *request;

    int K, X, Y;
    int k, m, n;
    int next_m;
    int next_n;
    int ldam, ldak, ldbn, ldbk, ldcm;

    PLASMA_Complex64_t zbeta;
    PLASMA_Complex64_t zone = (PLASMA_Complex64_t)1.0;

    plasma_unpack_args_9(transA, transB, alpha, A, B, beta, C, sequence, request);
    if (sequence->status != PLASMA_SUCCESS)
        return;

    n = 0;
    m = PLASMA_RANK;
    while (m >= C.mt && n < C.nt) {
        n++;
        m = m-C.mt;
    }

    while (n < C.nt) {
        next_m = m;
        next_n = n;

        next_m += PLASMA_SIZE;
        while (next_m >= C.mt && next_n < C.nt) {
            next_n++;
            next_m = next_m - C.mt;
        }

        X = m == C.mt-1 ? C.m - m*C.mb : C.mb;
        Y = n == C.nt-1 ? C.n - n*C.nb : C.nb;

        ldcm = BLKLDD(C, m);
        /*
         *  A: PlasmaNoTrans / B: PlasmaNoTrans
         */
        if (transA == PlasmaNoTrans) {
            ldam = BLKLDD(A, m);
            if (transB == PlasmaNoTrans) {
                for (k = 0; k < A.nt; k++) {
                    K = k == A.nt-1 ? A.n-k*A.nb : A.nb;
                    ldbk = BLKLDD(B, k);
                    zbeta = k == 0 ? beta : zone;
                    CORE_zgemm(
                        transA, transB,
                        X, Y, K,
                        alpha, A(m, k), ldam,
                               B(k, n), ldbk,
                        zbeta, C(m, n), ldcm);
                }
            }
            /*
             *  A: PlasmaNoTrans / B: Plasma[Conj]Trans
             */
            else {
                ldbn = BLKLDD(B, n);
                for (k = 0; k < A.nt; k++) {
                    K = k == A.nt-1 ? A.n-k*A.nb : A.nb;
                    zbeta = k == 0 ? beta : zone;
                    CORE_zgemm(
                        transA, transB,
                        X, Y, K,
                        alpha, A(m, k), ldam,
                               B(n, k), ldbn,
                        zbeta, C(m, n), ldcm);
                }
            }
        }
        /*
         *  A: Plasma[Conj]Trans / B: PlasmaNoTrans
         */
        else {
            if (transB == PlasmaNoTrans) {
                for (k = 0; k < A.mt; k++) {
                    K = k == A.mt-1 ? A.m-k*A.mb : A.mb;
                    ldak = BLKLDD(A, k);
                    ldbk = BLKLDD(B, k);
                    zbeta = k == 0 ? beta : zone;
                    CORE_zgemm(
                        transA, transB,
                        X, Y, K,
                        alpha, A(k, m), ldak,
                               B(k, n), ldbk,
                        zbeta, C(m, n), ldcm);
                }
            }
            /*
             *  A: Plasma[Conj]Trans / B: Plasma[Conj]Trans
             */
            else {
                ldbn = BLKLDD(B, n);
                for (k = 0; k < A.mt; k++) {
                    K = k == A.mt-1 ? A.m-k*A.mb : A.mb;
                    ldak = BLKLDD(A, k);
                    zbeta = k == 0 ? beta : zone;
                    CORE_zgemm(
                        transA, transB,
                        X, Y, K,
                        alpha, A(k, m), ldak,
                               B(n, k), ldbn,
                        zbeta, C(m, n), ldcm);
                }
            }
        }
        m = next_m;
        n = next_n;
    }
}
Пример #3
0
/***************************************************************************//**
 *  Parallel tile symmetric matrix-matrix multiplication - static scheduling
 **/
void plasma_pdsymm(plasma_context_t *plasma)
{
    PLASMA_enum side;
    PLASMA_enum uplo;
    double alpha;
    PLASMA_desc A;
    PLASMA_desc B;
    double beta;
    PLASMA_desc C;
    PLASMA_sequence *sequence;
    PLASMA_request *request;

    int k, m, n;
    int next_m;
    int next_n;
    int lda, ldak, ldb, ldc;
    int tempmm, tempnn, tempkn, tempkm;

    double zbeta;
    double zone = (double)1.0;

    plasma_unpack_args_9(side, uplo, alpha, A, B, beta, C, sequence, request);
    if (sequence->status != PLASMA_SUCCESS)
        return;

    n = 0;
    m = PLASMA_RANK;
    while (m >= C.mt && n < C.nt) {
        n++;
        m = m-C.mt;
    }

    while (n < C.nt) {
        next_m = m;
        next_n = n;

        next_m += PLASMA_SIZE;
        while (next_m >= C.mt && next_n < C.nt) {
            next_n++;
            next_m = next_m - C.mt;
        }

        tempmm = m == C.mt-1 ? C.m-m*C.mb : C.mb;
        tempnn = n == C.nt-1 ? C.n-n*C.nb : C.nb;

        ldc = BLKLDD(C, m);
        /*
         *  PlasmaLeft / PlasmaLower
         */
        if (side == PlasmaLeft) {
            lda = BLKLDD(A, m);
            if (uplo == PlasmaLower) {
                for (k = 0; k < C.mt; k++) {
                    tempkm = k == C.mt-1 ? C.m-k*C.mb : C.mb;
                    ldak = BLKLDD(A, k);
                    ldb  = BLKLDD(B, k);
                    zbeta = k == 0 ? beta : zone;
                    if (k < m) {
                        CORE_dgemm(
                            PlasmaNoTrans, PlasmaNoTrans,
                            tempmm, tempnn, tempkm,
                            alpha, A(m, k), lda,
                                   B(k, n), ldb,
                            zbeta, C(m, n), ldc);
                    }
                    else {
                        if (k == m) {
                            CORE_dsymm(
                                side, uplo,
                                tempmm, tempnn,
                                alpha, A(k, k), ldak,
                                       B(k, n), ldb,
                                zbeta, C(m, n), ldc);
                        }
                        else {
                            CORE_dgemm(
                                PlasmaTrans, PlasmaNoTrans,
                                tempmm, tempnn, tempkm,
                                alpha, A(k, m), ldak,
                                       B(k, n), ldb,
                                zbeta, C(m, n), ldc);
                        }
                    }
                }
            }
            /*
             *  PlasmaLeft / PlasmaUpper
             */
            else {
                for (k = 0; k < C.mt; k++) {
                    tempkm = k == C.mt-1 ? C.m-k*C.mb : C.mb;
                    ldak = BLKLDD(A, k);
                    ldb  = BLKLDD(B, k);
                    zbeta = k == 0 ? beta : zone;
                    if (k < m) {
                        CORE_dgemm(
                            PlasmaTrans, PlasmaNoTrans,
                            tempmm, tempnn, tempkm,
                            alpha, A(k, m), ldak,
                                   B(k, n), ldb,
                            zbeta, C(m, n), ldc);
                    }
                    else {
                        if (k == m) {
                            CORE_dsymm(
                                side, uplo,
                                tempmm, tempnn,
                                alpha, A(k, k), ldak,
                                       B(k, n), ldb,
                                zbeta, C(m, n), ldc);
                        }
                        else {
                            CORE_dgemm(
                                PlasmaNoTrans, PlasmaNoTrans,
                                tempmm, tempnn, tempkm,
                                alpha, A(m, k), lda,
                                       B(k, n), ldb,
                                zbeta, C(m, n), ldc);
                        }
                    }
                }
            }
        }
        /*
         *  PlasmaRight / PlasmaLower
         */
        else {
            lda = BLKLDD(A, n);
            ldb = BLKLDD(B, m);
            if (uplo == PlasmaLower) {
                for (k = 0; k < C.nt; k++) {
                    tempkn = k == C.nt-1 ? C.n-k*C.nb : C.nb;
                    ldak = BLKLDD(A, k);
                    zbeta = k == 0 ? beta : zone;
                    if (k < n) {
                        CORE_dgemm(
                            PlasmaNoTrans, PlasmaTrans,
                            tempmm, tempnn, tempkn,
                            alpha, B(m, k), ldb,
                                   A(n, k), lda,
                            zbeta, C(m, n), ldc);
                    }
                    else {
                        if (n == k) {
                            CORE_dsymm(
                                side, uplo,
                                tempmm, tempnn,
                                alpha, A(k, k), ldak,
                                       B(m, k), ldb,
                                zbeta, C(m, n), ldc);
                        }
                        else {
                            CORE_dgemm(
                                PlasmaNoTrans, PlasmaNoTrans,
                                tempmm, tempnn, tempkn,
                                alpha, B(m, k), ldb,
                                       A(k, n), ldak,
                                zbeta, C(m, n), ldc);
                        }
                    }
                }
            }
            /*
             *  PlasmaRight / PlasmaUpper
             */
            else {
                for (k = 0; k < C.nt; k++) {
                    tempkn = k == C.nt-1 ? C.n-k*C.nb : C.nb;
                    ldak = BLKLDD(A, k);
                    zbeta = k == 0 ? beta : zone;
                    if (k < n) {
                        CORE_dgemm(
                            PlasmaNoTrans, PlasmaNoTrans,
                            tempmm, tempnn, tempkn,
                            alpha, B(m, k), ldb,
                                   A(k, n), ldak,
                            zbeta, C(m, n), ldc);
                    }
                    else {
                        if (n == k) {
                            CORE_dsymm(
                                side, uplo,
                                tempmm, tempnn,
                                alpha, A(k, k), ldak,
                                       B(m, k), ldb,
                                zbeta, C(m, n), ldc);
                        }
                        else {
                            CORE_dgemm(
                                PlasmaNoTrans, PlasmaTrans,
                                tempmm, tempnn, tempkn,
                                alpha, B(m, k), ldb,
                                       A(n, k), lda,
                                zbeta, C(m, n), ldc);
                        }
                    }
                }
            }
        }
        m = next_m;
        n = next_n;
    }
}