Example #1
0
/***************************************************************************//**
 *  Parallel tile triangular matrix inverse - dynamic scheduling
 **/
void plasma_pztrtri_quark(PLASMA_enum uplo, PLASMA_enum diag, PLASMA_desc A,
                          PLASMA_sequence *sequence, PLASMA_request *request)
{
    plasma_context_t *plasma;
    Quark_Task_Flags task_flags = Quark_Task_Flags_Initializer;

    int k, m, n;
    int ldam, ldan;
    int tempkn, tempmm, tempnn;

    PLASMA_Complex64_t zone  = (PLASMA_Complex64_t) 1.0;
    PLASMA_Complex64_t mzone = (PLASMA_Complex64_t)-1.0;

    plasma = plasma_context_self();
    if (sequence->status != PLASMA_SUCCESS)
        return;
    QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);
    /*
     *  PlasmaLower
     */
    if (uplo == PlasmaLower) {
        for (n = 0; n < A.nt; n++) {
            tempnn = n == A.nt-1 ? A.n-n*A.nb : A.nb;
            ldan = BLKLDD(A, n);
            for (m = n+1; m < A.mt; m++) {
                tempmm = m == A.mt-1 ? A.m-m*A.mb : A.mb;
                ldam = BLKLDD(A, m);
                QUARK_CORE_ztrsm(
                    plasma->quark, &task_flags,
                    PlasmaRight, uplo, PlasmaNoTrans, diag,
                    tempmm, tempnn, A.mb,
                    mzone, A(n, n), ldan,
                           A(m, n), ldam);
            }
            for (m = n+1; m < A.mt; m++) {
                tempmm = m == A.mt-1 ? A.m-m*A.mb : A.mb;
                ldam = BLKLDD(A, m);
                for (k = 0; k < n; k++) {
                    tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb;
                    QUARK_CORE_zgemm(
                        plasma->quark, &task_flags,
                        PlasmaNoTrans, PlasmaNoTrans,
                        tempmm, tempkn, tempnn, A.mb,
                        zone, A(m, n), ldam,
                              A(n, k), ldan,
                        zone, A(m, k), ldam);
                }
            }
            for (m = 0; m < n; m++) {
                tempmm = m == A.mt-1 ? A.m-m*A.mb : A.mb;
                QUARK_CORE_ztrsm(
                    plasma->quark, &task_flags,
                    PlasmaLeft, uplo, PlasmaNoTrans, diag,
                    tempnn, tempmm, A.mb,
                    zone, A(n, n), ldan,
                          A(n, m), ldan);
            }
            QUARK_CORE_ztrtri(
                plasma->quark, &task_flags,
                uplo, diag,
                tempnn, A.mb,
                A(n, n), ldan,
                sequence, request, A.nb*n);
        }
    }
    /*
     *  PlasmaUpper
     */
    else {
        for (m = 0; m < A.mt; m++) {
            tempmm = m == A.mt-1 ? A.m-m*A.mb : A.mb;
            ldam = BLKLDD(A, m);
            for (n = m+1; n < A.nt; n++) {
                tempnn = n == A.nt-1 ? A.n-n*A.nb : A.nb;
                QUARK_CORE_ztrsm(
                    plasma->quark, &task_flags,
                    PlasmaLeft, uplo, PlasmaNoTrans, diag,
                    tempmm, tempnn, A.mb,
                    mzone, A(m, m), ldam,
                           A(m, n), ldam);
            }
            for (n = 0; n < m; n++) {
                tempnn = n == A.nt-1 ? A.n-n*A.nb : A.nb;
                ldan = BLKLDD(A, n);
                for (k = m+1; k < A.nt; k++) {
                    tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb;
                    QUARK_CORE_zgemm(
                        plasma->quark, &task_flags,
                        PlasmaNoTrans, PlasmaNoTrans,
                        tempnn, tempkn, tempmm, A.mb,
                        zone, A(n, m), ldan,
                              A(m, k), ldam,
                        zone, A(n, k), ldan);
                }
                QUARK_CORE_ztrsm(
                    plasma->quark, &task_flags,
                    PlasmaRight, uplo, PlasmaNoTrans, diag,
                    tempnn, tempmm, A.mb,
                    zone, A(m, m), ldam,
                          A(n, m), ldan);
            }
            QUARK_CORE_ztrtri(
                plasma->quark, &task_flags,
                uplo, diag,
                tempmm, A.mb,
                A(m, m), ldam,
                sequence, request, A.mb*m);
        }
    }
}
Example #2
0
/***************************************************************************//**
 *  Parallel tile LU factorization with no pivoting - dynamic scheduling
 **/
void plasma_pzgetrf_nopiv_quark(PLASMA_desc A,
                                PLASMA_sequence *sequence,
                                PLASMA_request *request)
{
    plasma_context_t *plasma;
    Quark_Task_Flags task_flags = Quark_Task_Flags_Initializer;

    int k, m, n, ib;
    int ldak, ldam;
    int tempkm, tempkn, tempmm, tempnn;

    PLASMA_Complex64_t zone  = (PLASMA_Complex64_t) 1.0;
    PLASMA_Complex64_t mzone = (PLASMA_Complex64_t)-1.0;

    plasma = plasma_context_self();
    if (sequence->status != PLASMA_SUCCESS)
        return;
    QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);

    ib = PLASMA_IB;

    for (k = 0; k < min(A.mt, A.nt); k++) {
        tempkm = k == A.mt-1 ? A.m-k*A.mb : A.mb;
        tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb;
        ldak = BLKLDD(A, k);
        QUARK_CORE_zgetrf_nopiv(
            plasma->quark, &task_flags,
            tempkm, tempkn, ib, A.mb,
            A(k, k), ldak,
            sequence, request, A.mb*k);

        for (m = k+1; m < A.mt; m++) {
            tempmm = m == A.mt-1 ? A.m-m*A.mb : A.mb;
            ldam = BLKLDD(A, m);
            QUARK_CORE_ztrsm(
                plasma->quark, &task_flags,
                PlasmaRight, PlasmaUpper, PlasmaNoTrans, PlasmaNonUnit,
                tempmm, tempkn, A.mb,
                zone, A(k, k), ldak,
                      A(m, k), ldam);
        }
        for (n = k+1; n < A.nt; n++) {
            tempnn = n == A.nt-1 ? A.n-n*A.nb : A.nb;
            QUARK_CORE_ztrsm(
                plasma->quark, &task_flags,
                PlasmaLeft, PlasmaLower, PlasmaNoTrans, PlasmaUnit,
                tempkm, tempnn, A.mb,
                zone, A(k, k), ldak,
                      A(k, n), ldak);

            for (m = k+1; m < A.mt; m++) {
                tempmm = m == A.mt-1 ? A.m-m*A.mb : A.mb;
                ldam = BLKLDD(A, m);
                QUARK_CORE_zgemm(
                    plasma->quark, &task_flags,
                    PlasmaNoTrans, PlasmaNoTrans,
                    tempmm, tempnn, A.mb, A.mb,
                    mzone, A(m, k), ldam,
                           A(k, n), ldak,
                    zone,  A(m, n), ldam);
            }
        }
    }
}