Пример #1
0
/***************************************************************************//**
 *  Parallel construction of Q using tile V (application to identity) - dynamic scheduling
 **/
void plasma_pzunglq_quark(PLASMA_desc A, PLASMA_desc Q, PLASMA_desc T,
                          PLASMA_sequence *sequence, PLASMA_request *request)
{
    plasma_context_t *plasma;
    Quark_Task_Flags task_flags = Quark_Task_Flags_Initializer;

    int k, m, n;
    int ldak, ldqm;
    int tempnn, tempmm, tempkmin, tempkn;
    int tempAkm, tempAkn;
    int ib;

    plasma = plasma_context_self();
    if (sequence->status != PLASMA_SUCCESS)
        return;
    QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);

    ib = PLASMA_IB;
    for (k = min(A.mt, A.nt)-1; k >= 0; k--) {
        tempAkm  = k == A.mt-1 ? A.m-k*A.mb : A.mb;
        tempAkn  = k == A.nt-1 ? A.n-k*A.nb : A.nb;
        tempkmin = min( tempAkn, tempAkm );
        tempkn   = k == Q.nt-1 ? Q.n-k*Q.nb : Q.nb;
        ldak = BLKLDD(A, k);
        for (n = Q.nt-1; n > k; n--) {
            tempnn = n == Q.nt-1 ? Q.n-n*Q.nb : Q.nb;
            for (m = 0; m < Q.mt; m++) {
                tempmm = m == Q.mt-1 ? Q.m-m*Q.mb : Q.mb;
                ldqm = BLKLDD(Q, m);
                QUARK_CORE_ztsmlq(
                    plasma->quark, &task_flags,
                    PlasmaRight, PlasmaNoTrans,
                    tempmm, Q.nb, tempmm, tempnn, tempAkm, ib, T.nb,
                    Q(m, k), ldqm,
                    Q(m, n), ldqm,
                    A(k, n), ldak,
                    T(k, n), T.mb);
            }
        }
        for (m = 0; m < Q.mt; m++) {
            tempmm = m == Q.mt-1 ? Q.m-m*Q.mb : Q.mb;
            ldqm = BLKLDD(Q, m);
            QUARK_CORE_zunmlq(
                plasma->quark, &task_flags,
                PlasmaRight, PlasmaNoTrans,
                tempmm, tempkn, tempkmin, ib, T.nb,
                A(k, k), ldak,
                T(k, k), T.mb,
                Q(m, k), ldqm);
        }
    }
}
Пример #2
0
/***************************************************************************//**
 *  Parallel tile BAND Tridiagonal Reduction - dynamic scheduler
 **/
void plasma_pzherbt_quark(PLASMA_enum uplo, 
                          PLASMA_desc A, PLASMA_desc T,
                          PLASMA_sequence *sequence, PLASMA_request *request)
{
    plasma_context_t *plasma;
    Quark_Task_Flags task_flags = Quark_Task_Flags_Initializer;

    int k, m, n, i, j;
    int ldak, ldam, ldan, ldaj, ldai;
    int tempkn, tempmm, tempnn, tempjj;
    int ib;

    plasma = plasma_context_self();
    if (sequence->status != PLASMA_SUCCESS)
        return;

    QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);

    ib = PLASMA_IB;
    if (uplo == PlasmaLower) {
       for (k = 0; k < A.nt-1; k++){
           tempkn = k+1 == A.nt-1 ? A.n-(k+1)*A.nb : A.nb;
           ldak = BLKLDD(A, k+1);
           QUARK_CORE_zgeqrt(
               plasma->quark, &task_flags,
               tempkn, A.nb, ib, T.nb,
               A(k+1, k), ldak,
               T(k+1, k), T.mb);

           /* LEFT and RIGHT on the symmetric diagonal block */
           QUARK_CORE_zherfb(
               plasma->quark, &task_flags,
               PlasmaLower,
               tempkn, tempkn, ib, T.nb,
               A(k+1,   k), ldak,
               T(k+1,   k), T.mb,
               A(k+1, k+1), ldak);

           /* RIGHT on the remaining tiles until the bottom */
           for (m = k+2; m < A.mt ; m++) {
               tempmm = m == A.mt-1 ? A.m-m*A.mb : A.mb;
               ldam = BLKLDD(A, m);
               QUARK_CORE_zunmqr(
                   plasma->quark, &task_flags,
                   PlasmaRight, PlasmaNoTrans,
                   tempmm, A.nb, tempkn, ib, T.nb,
                   A(k+1,   k), ldak,
                   T(k+1,   k), T.mb,
                   A(m  , k+1), ldam);
           }

           for (m = k+2; m < A.mt; m++) {
               tempmm = m == A.mt-1 ? A.m-m*A.mb : A.mb;
               ldam = BLKLDD(A, m);
               QUARK_CORE_ztsqrt(
                   plasma->quark, &task_flags,
                   tempmm, A.nb, ib, T.nb,
                   A(k+1, k), ldak,
                   A(m  , k), ldam,
                   T(m  , k), T.mb);

               /* LEFT */
               for (i = k+2; i < m; i++) {
                   ldai = BLKLDD(A, i);
                   QUARK_CORE_ztsmqr_hetra1(
                       plasma->quark, &task_flags,
                       PlasmaLeft, PlasmaConjTrans,
                       A.mb, A.nb, tempmm, A.nb, A.nb, ib, T.nb,
                       A(i, k+1), ldai,
                       A(m,   i), ldam,
                       A(m,   k), ldam,
                       T(m,   k), T.mb);
               }

               /* RIGHT */
               for (j = m+1; j < A.mt ; j++) {
                   tempjj = j == A.mt-1 ? A.m-j*A.mb : A.mb;
                   ldaj = BLKLDD(A, j);
                   QUARK_CORE_ztsmqr(
                       plasma->quark, &task_flags,
                       PlasmaRight, PlasmaNoTrans,
                       tempjj, A.nb, tempjj, tempmm, A.nb, ib, T.nb,
                       A(j, k+1), ldaj,
                       A(j,   m), ldaj,
                       A(m,   k), ldam,
                       T(m,   k), T.mb);
               }
       
               /* LEFT->RIGHT */
               QUARK_CORE_ztsmqr_corner(
                   plasma->quark, &task_flags,
                   A.nb, A.nb, tempmm, A.nb, tempmm, tempmm, A.nb, ib, T.nb,
                   A(k+1, k+1), ldak,
                   A(m  , k+1), ldam,
                   A(m  ,   m), ldam,
                   A(m  ,   k), ldam,
                   T(m  ,   k), T.mb);
           }
       }
    }
    else {
       for (k = 0; k < A.nt-1; k++){
           tempkn = k+1 == A.nt-1 ? A.n-(k+1)*A.nb : A.nb;
           ldak = BLKLDD(A, k+1);
           QUARK_CORE_zgelqt(
               plasma->quark, &task_flags,
               A.nb, tempkn, ib, T.nb,
               A(k, k+1), A.nb,
               T(k, k+1), T.mb);

           /* RIGHT and LEFT on the symmetric diagonal block             */
           QUARK_CORE_zherfb(
               plasma->quark, &task_flags,
               PlasmaUpper,
               tempkn, tempkn, ib, T.nb,
               A(k,   k+1), A.nb,
               T(k,   k+1), T.mb,
               A(k+1, k+1), ldak);

           /* LEFT on the remaining tiles until the left side */
           for (n = k+2; n < A.nt ; n++) {
               tempnn = n == A.nt-1 ? A.n-n*A.nb : A.nb;
               QUARK_CORE_zunmlq(
                   plasma->quark, &task_flags,
                   PlasmaLeft, PlasmaNoTrans,
                   A.nb, tempnn, tempkn, ib, T.nb,
                   A(k,   k+1), A.nb,
                   T(k,   k+1), T.mb,
                   A(k+1,   n), ldak);
           }

           for (n = k+2; n < A.nt; n++) {
               tempnn = n == A.nt-1 ? A.n-n*A.nb : A.nb;
               ldan = BLKLDD(A, n);
               QUARK_CORE_ztslqt(
                   plasma->quark, &task_flags,
                   A.nb, tempnn, ib, T.nb,
                   A(k, k+1), A.nb,
                   A(k,   n), A.nb,
                   T(k,   n), T.mb);

               /* RIGHT */
               for (i = k+2; i < n; i++) {
                   ldai = BLKLDD(A, i);
                   QUARK_CORE_ztsmlq_hetra1(
                       plasma->quark, &task_flags,
                       PlasmaRight, PlasmaConjTrans,
                       A.mb, A.nb, A.nb, tempnn, A.nb, ib, T.nb,
                       A(k+1, i), ldak,
                       A(i,   n), ldai,
                       A(k,   n), A.nb,
                       T(k,   n), T.mb);
               }

               /* LEFT */
               for (j = n+1; j < A.nt ; j++) {
                   tempjj = j == A.nt-1 ? A.n-j*A.nb : A.nb;
                   ldaj = BLKLDD(A, j);
                   QUARK_CORE_ztsmlq(
                       plasma->quark, &task_flags,
                       PlasmaLeft, PlasmaNoTrans,
                       A.nb, tempjj, tempnn, tempjj, A.nb, ib, T.nb,
                       A(k+1, j), ldak,
                       A(n,   j), ldan,
                       A(k,   n), A.nb,
                       T(k,   n), T.mb);
               }
       
               /* RIGHT->LEFT */
               QUARK_CORE_ztsmlq_corner(
                   plasma->quark, &task_flags,
                   A.nb, A.nb, A.nb, tempnn, tempnn, tempnn, A.nb, ib, T.nb,
                   A(k+1, k+1), ldak,
                   A(k+1,   n), ldak,
                   A(n  ,   n), ldan,
                   A(k  ,   n), A.nb,
                   T(k  ,   n), T.mb);
           }
       }
    }
}
Пример #3
0
/***************************************************************************//**
 *  Parallel application of Q using tile V - LQ factorization (reduction
 *  Householder) - dynamic scheduling
 **/
void plasma_pzunmlqrh_quark(PLASMA_enum side, PLASMA_enum trans,
        PLASMA_desc A, PLASMA_desc B, PLASMA_desc T, int BS,
        PLASMA_sequence *sequence, PLASMA_request *request)
{
    plasma_context_t *plasma;
    Quark_Task_Flags task_flags = Quark_Task_Flags_Initializer;

    int k, m, n;
    int K, N, RD, lastRD;
    int ldaN, ldak;
    int ldbN, ldbm, ldbNRD;
    int tempNn, tempkm, tempnn, tempmm, tempNRDn, tempkmin;
    int ib;

    plasma = plasma_context_self();
    if (sequence->status != PLASMA_SUCCESS)
        return;
    QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);

    ib = PLASMA_IB;
    K = min(A.mt, A.nt);

    if (side == PlasmaLeft ) {
        if (trans == PlasmaNoTrans) {
            /*
             *  PlasmaLeft / PlasmaNoTrans
             */
            for (k = 0; k < K; k++) {
                tempkm = k == A.mt-1 ? A.m-k*A.mb : A.mb;
                ldak = BLKLDD(A, k);
                for (N = k; N < A.nt; N += BS) {
                    tempNn   = N == A.nt-1 ? A.n-N*A.nb : A.nb;
                    tempkmin = min(tempkm,tempNn);
                    ldaN = BLKLDD(A, N);
                    ldbN = BLKLDD(B, N);
                    for (n = 0; n < B.nt; n++) {
                        tempnn = n == B.nt-1 ? B.n-n*B.nb : B.nb;
                        QUARK_CORE_zunmlq(
                                plasma->quark, &task_flags,
                                side, trans,
                                tempNn, tempnn,
                                tempkmin, ib, T.nb,
                                A(k, N), ldak,
                                T(k, N), T.mb,
                                B(N, n), ldbN);
                    }
                    for (m = N+1; m < min(N+BS, A.nt); m++) {
                        tempmm = m == B.mt-1 ? B.m-m*B.mb : B.mb;
                        ldbm = BLKLDD(B, m);
                        for (n = 0; n < B.nt; n++) {
                            tempnn = n == B.nt-1 ? B.n-n*B.nb : B.nb;
                            QUARK_CORE_ztsmlq(
                                    plasma->quark, &task_flags,
                                    side, trans,
                                    B.nb, tempnn, tempmm, tempnn,
                                    tempkm, ib, T.nb,
                                    B(N, n), ldbN,
                                    B(m, n), ldbm,
                                    A(k, m), ldak,
                                    T(k, m), T.mb);
                        }
                    }
                }
                for (RD = BS; RD < A.nt-k; RD *= 2) {
                    for (N = k; N+RD < A.nt; N += 2*RD) {
                        tempNRDn = N+RD == A.nt-1 ? A.n-(N+RD)*A.nb : A.nb;
                        ldbN   = BLKLDD(B, N   );
                        ldbNRD = BLKLDD(B, N+RD);
                        for (n = 0; n < B.nt; n++) {
                            tempnn = n == B.nt-1 ? B.n-n*B.nb : B.nb;
                            QUARK_CORE_zttmlq(
                                    plasma->quark, &task_flags,
                                    side, trans,
                                    B.mb, tempnn, tempNRDn, tempnn,
                                    tempkm, ib, T.nb,
                                    B (N,    n), ldbN,
                                    B (N+RD, n), ldbNRD,
                                    A (k, N+RD), ldak,
                                    T2(k, N+RD), T.mb);
                        }
                    }
                }
            }
        } else {
            /*
             *  PlasmaLeft / PlasmaConjTrans
             */
            for (k = K-1; k >= 0; k--) {
                tempkm = k == A.mt-1 ? A.m-k*A.mb : A.mb;
                ldak = BLKLDD(A, k);
                lastRD = 0;
                for (RD = BS; RD < A.nt-k; RD *= 2)
                    lastRD = RD;
                for (RD = lastRD; RD >= BS; RD /= 2) {
                    for (N = k; N+RD < A.nt; N += 2*RD) {
                        tempNRDn = N+RD == A.nt-1 ? A.n-(N+RD)*A.nb : A.nb;
                        ldbN   = BLKLDD(B, N   );
                        ldbNRD = BLKLDD(B, N+RD);
                        for (n = 0; n < B.nt; n++) {
                            tempnn = n == B.nt-1 ? B.n-n*B.nb : B.nb;
                            QUARK_CORE_zttmlq(
                                    plasma->quark, &task_flags,
                                    side, trans,
                                    B.nb, tempnn, tempNRDn, tempnn,
                                    tempkm, ib, T.nb,
                                    B (N,    n), ldbN,
                                    B (N+RD, n), ldbNRD,
                                    A (k, N+RD), ldak,
                                    T2(k, N+RD), T.mb);
                        }
                    }
                }
                for (N = k; N < A.nt; N += BS) {
                    tempNn   = N == A.nt-1 ? A.n-N*A.nb : A.nb;
                    tempkmin = min(tempkm,tempNn);
                    ldaN = BLKLDD(A, N);
                    ldbN = BLKLDD(B, N);
                    for (m = min(N+BS, A.nt)-1; m > N; m--) {
                        tempmm = m == B.mt-1 ? B.m-m*B.mb : B.mb;
                        ldbm = BLKLDD(B, m);
                        for (n = 0; n < B.nt; n++) {
                            tempnn = n == B.nt-1 ? B.n-n*B.nb : B.nb;
                            QUARK_CORE_ztsmlq(
                                    plasma->quark, &task_flags,
                                    side, trans,
                                    B.mb, tempnn, tempmm, tempnn,
                                    tempkm, ib, T.nb,
                                    B(N, n), ldbN,
                                    B(m, n), ldbm,
                                    A(k, m), ldak,
                                    T(k, m), T.mb);
                        }
                    }
                    for (n = 0; n < B.nt; n++) {
                        tempnn = n == B.nt-1 ? B.n-n*B.nb : B.nb;
                        QUARK_CORE_zunmlq(
                                plasma->quark, &task_flags,
                                side, trans,
                                tempNn, tempnn,
                                tempkmin, ib, T.nb,
                                A(k, N), ldak,
                                T(k, N), T.mb,
                                B(N, n), ldbN);
                    }
                }
            }

        }
    } else {
        if (trans == PlasmaNoTrans) {
            /*
             *  PlasmaRight / PlasmaNoTrans
             */
              for (k = K-1; k >= 0; k--) {
                  tempkm = k == A.mt-1 ? A.m-k*A.mb : A.mb;
                  ldak = BLKLDD(A, k);
                  lastRD = 0;
                  for (RD = BS; RD < A.nt-k; RD *= 2)
                      lastRD = RD;
                  for (RD = lastRD; RD >= BS; RD /= 2) {
                      for (N = k; N+RD < A.nt; N += 2*RD) {
                          tempNRDn = N+RD == A.nt-1 ? A.n-(N+RD)*A.nb : A.nb;
                          for (m = 0; m < B.mt; m++) {
                              ldbm   = BLKLDD(B, m);
                              tempmm = m == B.mt-1 ? B.m-m*B.mb : B.mb;
                              QUARK_CORE_zttmlq(
                                      plasma->quark, &task_flags,
                                      side, trans,
                                      tempmm, B.nb, tempmm, tempNRDn,
                                      tempkm, ib, T.nb,
                                      B (m, N   ), ldbm,
                                      B (m, N+RD), ldbm,
                                      A (k, N+RD), ldak,
                                      T2(k, N+RD), T.mb);
                          }
                      }
                  }
                  for (N = k; N < A.nt; N += BS) {
                      tempNn   = N == A.nt-1 ? A.n-N*A.nb : A.nb;
                      tempkmin = min(tempkm,tempNn);
                      for (n = min(N+BS, A.nt)-1; n > N; n--) {
                          tempnn = n == B.nt-1 ? B.n-n*B.nb : B.nb;
                          for (m = 0; m < B.mt; m++) {
                              tempmm = m == B.mt-1 ? B.m-m*B.mb : B.mb;
                              ldbm = BLKLDD(B, m);
                              QUARK_CORE_ztsmlq(
                                      plasma->quark, &task_flags,
                                      side, trans,
                                      tempmm, B.nb, tempmm, tempnn,
                                      tempkm, ib, T.nb,
                                      B(m, N), ldbm,
                                      B(m, n), ldbm,
                                      A(k, n), ldak,
                                      T(k, n), T.mb);
                          }
                      }
                      for (m = 0; m < B.mt; m++) {
                          tempmm = m == B.mt-1 ? B.m-m*B.mb : B.mb;
                          ldbm = BLKLDD(B, m);
                          QUARK_CORE_zunmlq(
                                  plasma->quark, &task_flags,
                                  side, trans,
                                  tempmm, tempNn,
                                  tempkmin, ib, T.nb,
                                  A(k, N), ldak,
                                  T(k, N), T.mb,
                                  B(m, N), ldbm);
                      }
                  }
              }
        } else {
            /*
             *  PlasmaRight / PlasmaConjTrans
             */
            for (k = 0; k < K; k++) {
                tempkm = k == A.mt-1 ? A.m-k*A.mb : A.mb;
                ldak = BLKLDD(A, k);
                for (N = k; N < A.nt; N += BS) {
                    tempNn = N == A.nt-1 ? A.n-N*A.nb : A.nb;
                    tempkmin = min(tempkm,tempNn);
                    ldaN = BLKLDD(A, N);
                    for (m = 0; m < B.mt; m++) {
                        ldbm = BLKLDD(B, m);
                        tempmm = m == B.mt-1 ? B.m-m*B.mb : B.mb;
                        QUARK_CORE_zunmlq(
                                plasma->quark, &task_flags,
                                side, trans,
                                tempmm, tempNn,
                                tempkmin, ib, T.nb,
                                A(k, N), ldaN,
                                T(k, N), T.mb,
                                B(m, N), ldbm);
                    }
                    for (n = N+1; n < min(N+BS, A.nt); n++) {
                        tempnn = n == B.nt-1 ? B.n-n*B.nb : B.nb;
                        for (m = 0; m < B.mt; m++) {
                            tempmm = m == B.mt-1 ? B.m-m*B.mb : B.mb;
                            ldbm = BLKLDD(B, m);
                            QUARK_CORE_ztsmlq(
                                    plasma->quark, &task_flags,
                                    side, trans,
                                    tempmm, tempNn, tempmm, tempnn,
                                    tempkm, ib, T.nb,
                                    B(m, N), ldbm,
                                    B(m, n), ldbm,
                                    A(k, n), ldak,
                                    T(k, n), T.mb);
                        }
                    }
                }
                for (RD = BS; RD < A.nt-k; RD *= 2) {
                    for (N = k; N+RD < A.nt; N += 2*RD) {
                        tempNRDn = N+RD == A.nt-1 ? A.n-(N+RD)*A.nb : A.nb;
                        for (m = 0; m < B.mt; m++) {
                            tempmm = m == B.mt-1 ? B.m-m*B.mb : B.mb;
                            ldbm   = BLKLDD(B, m);
                            QUARK_CORE_zttmlq(
                                    plasma->quark, &task_flags,
                                    side, trans,
                                    tempmm, B.nb, tempmm, tempNRDn,
                                    tempkm, ib, T.nb,
                                    B (m, N   ), ldbm,
                                    B (m, N+RD), ldbm,
                                    A (k, N+RD), ldak,
                                    T2(k, N+RD), T.mb);
                        }
                    }
                }
            }
        }
    }
}
Пример #4
0
/***************************************************************************//**
 *  Parallel application of Q using tile V - LQ factorization - dynamic scheduling
 **/
void plasma_pzunmlq_quark(PLASMA_enum side, PLASMA_enum trans,
        PLASMA_desc A, PLASMA_desc B, PLASMA_desc T,
        PLASMA_sequence *sequence, PLASMA_request *request)
{
    plasma_context_t *plasma;
    Quark_Task_Flags task_flags = Quark_Task_Flags_Initializer;

    int k, m, n;
    int ldak, ldan, ldbk, ldbm;
    int tempmm, tempnn, tempkn, tempkm, tempkmin;
    int ib, minMT, minM;

    plasma = plasma_context_self();
    if (sequence->status != PLASMA_SUCCESS)
        return;
    QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);

    ib = PLASMA_IB;
    if (A.m > A.n) {
        minM  = A.n;
        minMT = A.nt;
    } else {
        minM  = A.m;
        minMT = A.mt;
    }

    if (side == PlasmaLeft ) {
        if (trans == PlasmaNoTrans) {
            /*
             *  PlasmaLeft / PlasmaNoTrans
             */
            for (k = 0; k < minMT; k++) {
                tempkm   = k == B.mt -1 ? B.m -k*B.mb : B.mb;
                tempkmin = k == minMT-1 ? minM-k*A.nb : A.nb;
                ldak = BLKLDD(A, k);
                ldbk = BLKLDD(B, k);
                for (n = 0; n < B.nt; n++) {
                    tempnn = n == B.nt-1 ? B.n-n*B.nb : B.nb;
                    QUARK_CORE_zunmlq(
                            plasma->quark, &task_flags,
                            side, trans,
                            tempkm, tempnn, tempkmin, ib, T.nb,
                            A(k, k), ldak,
                            T(k, k), T.mb,
                            B(k, n), ldbk);
                }
                for (m = k+1; m < B.mt; m++) {
                    tempmm = m == B.mt-1 ? B.m-m*B.mb : B.mb;
                    ldbm = BLKLDD(B, m);
                    for (n = 0; n < B.nt; n++) {
                        tempnn = n == B.nt-1 ? B.n-n*B.nb : B.nb;
                        QUARK_CORE_ztsmlq(
                                plasma->quark, &task_flags,
                                side, trans,
                                B.mb, tempnn, tempmm, tempnn, tempkmin, ib, T.nb,
                                B(k, n), ldbk,
                                B(m, n), ldbm,
                                A(k, m), ldak,
                                T(k, m), T.mb);
                    }
                }
            }
        }
        else {
            /*
             *  PlasmaLeft / PlasmaConjTrans
             */
            for (k = minMT-1; k >= 0; k--) {
                tempkm   = k == B.mt -1 ? B.m -k*B.mb : B.mb;
                tempkmin = k == minMT-1 ? minM-k*A.nb : A.nb;
                ldak = BLKLDD(A, k);
                ldbk = BLKLDD(B, k);
                for (m = B.mt-1; m > k; m--) {
                    tempmm = m == B.mt-1 ? B.m-m*B.mb : B.mb;
                    ldbm = BLKLDD(B, m);
                    for (n = 0; n < B.nt; n++) {
                        tempnn   = n == B.nt-1 ? B.n-n*B.nb : B.nb;
                        QUARK_CORE_ztsmlq(
                                plasma->quark, &task_flags,
                                side, trans,
                                B.mb, tempnn, tempmm, tempnn, tempkmin, ib, T.nb,
                                B(k, n), ldbk,
                                B(m, n), ldbm,
                                A(k, m), ldak,
                                T(k, m), T.mb);
                    }
                }
                for (n = 0; n < B.nt; n++) {
                    tempnn = n == B.nt-1 ? B.n-n*B.nb : B.nb;
                    QUARK_CORE_zunmlq(
                            plasma->quark, &task_flags,
                            side, trans,
                            tempkm, tempnn, tempkmin, ib, T.nb,
                            A(k, k), ldak,
                            T(k, k), T.mb,
                            B(k, n), ldbk);
                }
            }
        }
    }
    else {
        if (trans == PlasmaNoTrans) {
            /*
             *  PlasmaRight / PlasmaNoTrans
             */
            for (k = minMT-1; k >= 0; k--) {
                tempkn   = k == B.nt -1 ? B.n -k*B.nb : B.nb;
                tempkmin = k == minMT-1 ? minM-k*A.nb : A.nb;
                ldak = BLKLDD(A, k);
                for (n = B.nt-1; n > k; n--) {
                    tempnn = n == B.nt-1 ? B.n-n*B.nb : B.nb;
                    ldan = BLKLDD(A, n);
                    for (m = 0; m < B.mt; m++) {
                        tempmm = m == B.mt-1 ? B.m-m*B.mb : B.mb;
                        ldbm = BLKLDD(B, m);
                        QUARK_CORE_ztsmlq(
                                plasma->quark, &task_flags,
                                side, trans,
                                tempmm, B.nb, tempmm, tempnn, tempkmin, ib, T.nb,
                                B(m, k), ldbm,
                                B(m, n), ldbm,
                                A(k, n), ldak,
                                T(k, n), T.mb);
                    }
                }
                for (m = 0; m < B.mt; m++) {
                    tempmm = m == B.mt-1 ? B.m-m*B.mb : B.mb;
                    ldbm = BLKLDD(B, m);
                    QUARK_CORE_zunmlq(
                            plasma->quark, &task_flags,
                            side, trans,
                            tempmm, tempkn, tempkmin, ib, T.nb,
                            A(k, k), ldak,
                            T(k, k), T.mb,
                            B(m, k), ldbm);
                }
            }
        }
        else {
            /*
             *  PlasmaRight / PlasmaConjTrans
             */
            for (k = 0; k < minMT; k++) {
                tempkn   = k == B.nt -1 ? B.n -k*B.nb : B.nb;
                tempkmin = k == minMT-1 ? minM-k*A.mb : A.mb;
                ldak = BLKLDD(A, k);
                for (m = 0; m < B.mt; m++) {
                    tempmm = m == B.mt-1 ? B.m-m*B.mb : B.mb;
                    ldbm = BLKLDD(B, m);
                    QUARK_CORE_zunmlq(
                            plasma->quark, &task_flags,
                            side, trans,
                            tempmm, tempkn, tempkmin, ib, T.nb,
                            A(k, k), ldak,
                            T(k, k), T.mb,
                            B(m, k), ldbm);
                }
                for (n = k+1; n < B.nt; n++) {
                    tempnn = n == B.nt-1 ? B.n-n*B.nb : B.nb;
                    for (m = 0; m < B.mt; m++) {
                        tempmm = m == B.mt-1 ? B.m-m*B.mb : B.mb;
                        ldbm = BLKLDD(B, m);
                        QUARK_CORE_ztsmlq(
                                plasma->quark, &task_flags,
                                side, trans,
                                tempmm, B.nb, tempmm, tempnn, tempkmin, ib, T.nb,
                                B(m, k), ldbm,
                                B(m, n), ldbm,
                                A(k, n), ldak,
                                T(k, n), T.mb);
                    }
                }
            }
        }
    }
}
Пример #5
0
/***************************************************************************//**
 *  Parallel construction of Q using tile V (application to identity;
 *  reduction Householder) - dynamic scheduling
 **/
void plasma_pzunglqrh_quark(PLASMA_desc A, PLASMA_desc Q, PLASMA_desc T, int BS,
                            PLASMA_sequence *sequence, PLASMA_request *request)
{
    plasma_context_t *plasma;
    Quark_Task_Flags task_flags = Quark_Task_Flags_Initializer;

    int k, m, n;
    int K, N, RD, lastRD;
    int ldak;
    int ldqm;
    int tempkm, tempNn, tempnn, tempmm, tempNRDn, tempkmin;
    int ib;

    plasma = plasma_context_self();
    if (sequence->status != PLASMA_SUCCESS)
        return;
    QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);

    ib = PLASMA_IB;
    K = min(A.mt, A.nt);

    for (k = K-1; k >= 0; k--) {
        tempkm = k == A.mt-1 ? A.m-k*A.mb : A.mb;
        ldak = BLKLDD(A, k);
        lastRD = 0;
        for (RD = BS; RD < A.nt-k; RD *= 2)
            lastRD = RD;
        for (RD = lastRD; RD >= BS; RD /= 2) {
            for (N = k;
                 N+RD < A.nt-1; /* No reduction with rightmost single-column subdomain */
                 N += 2*RD) {
                tempNRDn = N+RD == A.nt-1 ? A.n-(N+RD)*A.nb : A.nb;
                for (m = 0; m < Q.mt; m++) {
                    tempmm = m == Q.mt-1 ? Q.m-m*Q.mb : Q.mb;
                    ldqm   = BLKLDD(Q, m   );
                    QUARK_CORE_zttmlq(
                                      plasma->quark, &task_flags,
                                      PlasmaRight, PlasmaNoTrans,
                                      tempmm, Q.nb, tempmm, tempNRDn,
                                      tempkm, ib, T.nb,
                                      Q (m, N   ), ldqm,
                                      Q (m, N+RD), ldqm,
                                      A (k, N+RD), ldak,
                                      T2(k, N+RD), T.mb); 
                }
            }
        }
        for (N = k;
             N < A.nt-1 || N == k;  /* No rightmost single-column */
             N += BS) {
            tempNn = N == A.nt-1 ? A.n-N*A.nb : A.nb;
            tempkmin = min(tempkm, tempNn);
            for (n = N+BS-1 == A.nt-2 ? A.nt-1 : min(N+BS-1, A.nt-1); /* Suck in rightmost single-column domain */
                 n >= N+1;
                 n--) {
                tempnn = n == Q.nt-1 ? Q.n-n*Q.nb : Q.nb;
                
                for (m = 0; m < Q.mt; m++) {
                    tempmm = m == Q.mt-1 ? Q.m-m*Q.mb : Q.mb;
                    ldqm = BLKLDD(Q, m);
                    QUARK_CORE_ztsmlq(
                                      plasma->quark, &task_flags,
                                      PlasmaRight, PlasmaNoTrans,
                                      tempmm, Q.nb, tempmm, tempnn,
                                      tempkm, ib, T.nb,
                                      Q(m, N), ldqm,
                                      Q(m, n), ldqm,
                                      A(k, n), ldak,
                                      T(k, n), T.mb);
                }
            }
            for (m = 0; m < Q.mt; m++) {
                tempmm = m == Q.mt-1 ? Q.m-m*Q.mb : Q.mb;
                ldqm = BLKLDD(Q, m);
                QUARK_CORE_zunmlq(
                                  plasma->quark, &task_flags,
                                  PlasmaRight, PlasmaNoTrans,
                                  tempmm, tempNn, 
                                  tempkmin, ib, T.nb,
                                  A(k, N), ldak,
                                  T(k, N), T.mb,
                                  Q(m, N), ldqm);
            }
        }
    }
}
Пример #6
0
/***************************************************************************//**
 *  Parallel tile LQ factorization (reduction Householder) - dynamic scheduling
 **/
void plasma_pzgelqfrh_quark(PLASMA_desc A, PLASMA_desc T, int BS,
                            PLASMA_sequence *sequence, PLASMA_request *request)
{
    plasma_context_t *plasma;
    Quark_Task_Flags task_flags = Quark_Task_Flags_Initializer;

    int k, m, n;
    int K, N, RD;
    int ldak, ldam;
    int tempkm, tempNn, tempmm, tempnn, tempNRDn;
    int ib;

    plasma = plasma_context_self();
    if (sequence->status != PLASMA_SUCCESS)
        return;
    QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);

    ib = PLASMA_IB;
    K = min(A.mt, A.nt);
    for (k = 0; k < K; k++) {

        tempkm = k == A.mt-1 ? A.m-k*A.mb : A.mb;
        ldak = BLKLDD(A, k);
//      for (N = k; N < A.nt; N += BS) {
        for (N = k;
             N < A.nt-1 || N == k;  // No rightmost single-column subdomain
             N += BS) {
            tempNn = N == A.nt-1 ? A.n-N*A.nb : A.nb;
            QUARK_CORE_zgelqt(
                plasma->quark, &task_flags,
                tempkm, tempNn, ib, T.nb,
                A(k, N), ldak,
                T(k, N), T.mb);

            for (m = k+1; m < A.mt; m++) {
                tempmm = m == A.mt-1 ? A.m-m*A.mb : A.mb;
                ldam = BLKLDD(A, m);
                QUARK_CORE_zunmlq(
                    plasma->quark, &task_flags,
                    PlasmaRight, PlasmaConjTrans,
                    tempmm, tempNn, tempNn, ib, T.nb,
                    A(k, N), ldak,
                    T(k, N), T.mb,
                    A(m, N), ldam);
            }
//          for (n = N+1; n < N+BS && n < A.nt; n++) {
            for (n = N+1;
                 (n < N+BS && n < A.nt) || n == A.nt-1; // Suck in rightmost single-column domain
                 n++) {
                tempnn = n == A.nt-1 ? A.n-n*A.nb : A.nb;
                QUARK_CORE_ztslqt(
                    plasma->quark, &task_flags,
                    tempkm, tempnn, ib, T.nb,
                    A(k, N), ldak,
                    A(k, n), ldak,
                    T(k, n), T.mb);

                for (m = k+1; m < A.mt; m++) {
                    tempmm = m == A.mt-1 ? A.m-m*A.mb : A.mb;
                    ldam = BLKLDD(A, m);
                    QUARK_CORE_ztsmlq(
                        plasma->quark, &task_flags,
                        PlasmaRight, PlasmaConjTrans,
                        tempmm, A.nb, tempmm, tempnn, A.mb, ib, T.nb,
                        A(m, N), ldam,
                        A(m, n), ldam,
                        A(k, n), ldak,
                        T(k, n), T.mb);
                }
            }
        }
        for (RD = BS; RD < A.nt-k; RD *= 2) {
//          for (N = k; N+RD < A.nt; N += 2*RD) {
            for (N = k;
                 N+RD < A.nt-1; // No reduction with rightmost single-column subdomain
                 N += 2*RD) {
                tempNRDn = N+RD == A.nt-1 ? A.n-(N+RD)*A.nb : A.nb;
                QUARK_CORE_zttlqt(
                    plasma->quark, &task_flags,
                    tempkm, tempNRDn, ib, T.nb,
                    A (k, N   ), ldak,
                    A (k, N+RD), ldak,
                    T2(k, N+RD), T.mb);

                for (m = k+1; m < A.mt; m++) {
                    tempmm = m == A.mt-1 ? A.m-m*A.mb : A.mb;
                    ldam = BLKLDD(A, m);
                    QUARK_CORE_zttmlq(
                        plasma->quark, &task_flags,
                        PlasmaRight, PlasmaConjTrans,
                        tempmm, A.nb, tempmm, tempNRDn, A.mb, ib, T.nb,
                        A (m, N   ), ldam,
                        A (m, N+RD), ldam,
                        A (k, N+RD), ldak,
                        T2(k, N+RD), T.mb);
                }
            }
        }
    }
}
Пример #7
0
/***************************************************************************//**
 *  Parallel tile LQ factorization - dynamic scheduling
 **/
void plasma_pzgelqf_quark(PLASMA_desc A, PLASMA_desc T,
                          PLASMA_sequence *sequence, PLASMA_request *request)
{
    plasma_context_t *plasma;
    Quark_Task_Flags task_flags = Quark_Task_Flags_Initializer;

    int k, m, n;
    int ldak, ldam;
    int tempkm, tempkn, tempmm, tempnn;
    int ib;

    plasma = plasma_context_self();
    if (sequence->status != PLASMA_SUCCESS)
        return;
    QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);

    ib = PLASMA_IB;
    for (k = 0; k < min(A.mt, A.nt); k++) {
        tempkm = k == A.mt-1 ? A.m-k*A.mb : A.mb;
        tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb;
        ldak = BLKLDD(A, k);
        QUARK_CORE_zgelqt(
            plasma->quark, &task_flags,
            tempkm, tempkn, ib, T.nb,
            A(k, k), ldak,
            T(k, k), T.mb);

        for (m = k+1; m < A.mt; m++) {
            tempmm = m == A.mt-1 ? A.m-m*A.mb : A.mb;
            ldam = BLKLDD(A, m);
            QUARK_CORE_zunmlq(
                plasma->quark, &task_flags,
                PlasmaRight, PlasmaConjTrans,
                tempmm, tempkn, tempkn, ib, T.nb,
                A(k, k), ldak,
                T(k, k), T.mb,
                A(m, k), ldam);
        }
        for (n = k+1; n < A.nt; n++) {
            tempnn = n == A.nt-1 ? A.n-n*A.nb : A.nb;
            QUARK_CORE_ztslqt(
                plasma->quark, &task_flags,
                tempkm, tempnn, ib, T.nb,
                A(k, k), ldak,
                A(k, n), ldak,
                T(k, n), T.mb);

            for (m = k+1; m < A.mt; m++) {
                tempmm = m == A.mt-1 ? A.m-m*A.mb : A.mb;
                ldam = BLKLDD(A, m);
                QUARK_CORE_ztsmlq(
                    plasma->quark, &task_flags,
                    PlasmaRight, PlasmaConjTrans,
                    tempmm, A.nb, tempmm, tempnn, A.mb, ib, T.nb,
                    A(m, k), ldam,
                    A(m, n), ldam,
                    A(k, n), ldak,
                    T(k, n), T.mb);
            }
        }
    }
}