/***************************************************************************//** * Parallel tile Hermitian rank-k update - static scheduling **/ void plasma_pcsyr2k(plasma_context_t *plasma) { PLASMA_enum uplo; PLASMA_enum trans; PLASMA_Complex32_t alpha; PLASMA_desc A; PLASMA_desc B; PLASMA_Complex32_t beta; PLASMA_desc C; PLASMA_sequence *sequence; PLASMA_request *request; int m, n, k; int next_m; int next_n; int ldam, ldan, ldak; int ldbm, ldbn, ldbk; int ldcm, ldcn; int tempkn, tempkm, tempmm, tempnn; PLASMA_Complex32_t zone = (PLASMA_Complex32_t)1.0; PLASMA_Complex32_t zbeta; plasma_unpack_args_9(uplo, trans, alpha, A, B, beta, C, sequence, request); if (sequence->status != PLASMA_SUCCESS) return; n = 0; m = PLASMA_RANK; while (m >= C.mt && n < C.nt) { n++; m = m-C.mt+n; } while (n < C.nt) { next_n = n; next_m = m + PLASMA_SIZE; while (next_m >= C.mt && next_n < C.nt) { next_n++; next_m = next_m - C.mt + next_n; } tempmm = m == C.mt-1 ? C.m-m*C.mb : C.mb; tempnn = n == C.nt-1 ? C.n-n*C.nb : C.nb; ldcn = BLKLDD(C, n); ldcm = BLKLDD(C, m); if (m == n) { /* * PlasmaNoTrans */ if (trans == PlasmaNoTrans) { ldam = BLKLDD(A, m); ldbm = BLKLDD(B, m); for (k = 0; k < A.nt; k++) { tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb; zbeta = k == 0 ? beta : zone; CORE_csyr2k( uplo, trans, tempnn, tempkn, alpha, A(m, k), ldam, B(m, k), ldbm, zbeta, C(m, m), ldcm); } } /* * Plasma[Conj]Trans */ else { for (k = 0; k < A.mt; k++) { tempkm = k == A.mt-1 ? A.m-k*A.mb : A.mb; ldak = BLKLDD(A, k); ldbk = BLKLDD(B, k); zbeta = k == 0 ? beta : zone; CORE_csyr2k( uplo, trans, tempnn, tempkm, alpha, A(k, m), ldak, B(k, m), ldbk, zbeta, C(m, m), ldcm); } } } else { if (trans == PlasmaNoTrans) { ldam = BLKLDD(A, m); ldan = BLKLDD(A, n); ldbm = BLKLDD(B, m); ldbn = BLKLDD(B, n); /* * PlasmaNoTrans / PlasmaLower */ if (uplo == PlasmaLower) { for (k = 0; k < A.nt; k++) { tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb; zbeta = k == 0 ? beta : zone; CORE_cgemm( trans, PlasmaTrans, tempmm, tempnn, tempkn, alpha, A(m, k), ldam, B(n, k), ldbn, zbeta, C(m, n), ldcm); CORE_cgemm( trans, PlasmaTrans, tempmm, tempnn, tempkn, alpha, B(m, k), ldbm, A(n, k), ldan, zone, C(m, n), ldcm); } } /* * PlasmaNoTrans / PlasmaUpper */ else { for (k = 0; k < A.nt; k++) { tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb; zbeta = k == 0 ? beta : zone; CORE_cgemm( trans, PlasmaTrans, tempnn, tempmm, tempkn, alpha, A(n, k), ldan, B(m, k), ldbm, zbeta, C(n, m), ldcn); CORE_cgemm( trans, PlasmaTrans, tempnn, tempmm, tempkn, alpha, B(n, k), ldbn, A(m, k), ldam, zone, C(n, m), ldcn); } } } else { /* * Plasma[Conj]Trans / PlasmaLower */ if (uplo == PlasmaLower) { for (k = 0; k < A.mt; k++) { ldak = BLKLDD(A, k); ldbk = BLKLDD(B, k); tempkm = k == A.mt-1 ? A.m-k*A.mb : A.mb; zbeta = k == 0 ? beta : zone; CORE_cgemm( trans, PlasmaNoTrans, tempmm, tempnn, tempkm, alpha, A(k, m), ldak, B(k, n), ldbk, zbeta, C(m, n), ldcm); CORE_cgemm( trans, PlasmaNoTrans, tempmm, tempnn, tempkm, alpha, B(k, m), ldbk, A(k, n), ldak, zone, C(m, n), ldcm); } } /* * Plasma[Conj]Trans / PlasmaUpper */ else { for (k = 0; k < A.mt; k++) { tempkm = k == A.mt-1 ? A.m-k*A.mb : A.mb; ldak = BLKLDD(A, k); ldbk = BLKLDD(B, k); zbeta = k == 0 ? beta : zone; CORE_cgemm( trans, PlasmaNoTrans, tempnn, tempmm, tempkm, alpha, A(k, n), ldak, B(k, m), ldbk, zbeta, C(n, m), ldcm); CORE_cgemm( trans, PlasmaNoTrans, tempnn, tempmm, tempkm, alpha, B(k, n), ldbk, A(k, m), ldak, zone, C(n, m), ldcn); } } } } m = next_m; n = next_n; } }
/***************************************************************************//** * Parallel tile matrix-matrix multiplication - static scheduling **/ void plasma_pzgemm(plasma_context_t *plasma) { PLASMA_enum transA; PLASMA_enum transB; PLASMA_Complex64_t alpha; PLASMA_desc A; PLASMA_desc B; PLASMA_Complex64_t beta; PLASMA_desc C; PLASMA_sequence *sequence; PLASMA_request *request; int K, X, Y; int k, m, n; int next_m; int next_n; int ldam, ldak, ldbn, ldbk, ldcm; PLASMA_Complex64_t zbeta; PLASMA_Complex64_t zone = (PLASMA_Complex64_t)1.0; plasma_unpack_args_9(transA, transB, alpha, A, B, beta, C, sequence, request); if (sequence->status != PLASMA_SUCCESS) return; n = 0; m = PLASMA_RANK; while (m >= C.mt && n < C.nt) { n++; m = m-C.mt; } while (n < C.nt) { next_m = m; next_n = n; next_m += PLASMA_SIZE; while (next_m >= C.mt && next_n < C.nt) { next_n++; next_m = next_m - C.mt; } X = m == C.mt-1 ? C.m - m*C.mb : C.mb; Y = n == C.nt-1 ? C.n - n*C.nb : C.nb; ldcm = BLKLDD(C, m); /* * A: PlasmaNoTrans / B: PlasmaNoTrans */ if (transA == PlasmaNoTrans) { ldam = BLKLDD(A, m); if (transB == PlasmaNoTrans) { for (k = 0; k < A.nt; k++) { K = k == A.nt-1 ? A.n-k*A.nb : A.nb; ldbk = BLKLDD(B, k); zbeta = k == 0 ? beta : zone; CORE_zgemm( transA, transB, X, Y, K, alpha, A(m, k), ldam, B(k, n), ldbk, zbeta, C(m, n), ldcm); } } /* * A: PlasmaNoTrans / B: Plasma[Conj]Trans */ else { ldbn = BLKLDD(B, n); for (k = 0; k < A.nt; k++) { K = k == A.nt-1 ? A.n-k*A.nb : A.nb; zbeta = k == 0 ? beta : zone; CORE_zgemm( transA, transB, X, Y, K, alpha, A(m, k), ldam, B(n, k), ldbn, zbeta, C(m, n), ldcm); } } } /* * A: Plasma[Conj]Trans / B: PlasmaNoTrans */ else { if (transB == PlasmaNoTrans) { for (k = 0; k < A.mt; k++) { K = k == A.mt-1 ? A.m-k*A.mb : A.mb; ldak = BLKLDD(A, k); ldbk = BLKLDD(B, k); zbeta = k == 0 ? beta : zone; CORE_zgemm( transA, transB, X, Y, K, alpha, A(k, m), ldak, B(k, n), ldbk, zbeta, C(m, n), ldcm); } } /* * A: Plasma[Conj]Trans / B: Plasma[Conj]Trans */ else { ldbn = BLKLDD(B, n); for (k = 0; k < A.mt; k++) { K = k == A.mt-1 ? A.m-k*A.mb : A.mb; ldak = BLKLDD(A, k); zbeta = k == 0 ? beta : zone; CORE_zgemm( transA, transB, X, Y, K, alpha, A(k, m), ldak, B(n, k), ldbn, zbeta, C(m, n), ldcm); } } } m = next_m; n = next_n; } }
/***************************************************************************//** * Parallel tile symmetric matrix-matrix multiplication - static scheduling **/ void plasma_pdsymm(plasma_context_t *plasma) { PLASMA_enum side; PLASMA_enum uplo; double alpha; PLASMA_desc A; PLASMA_desc B; double beta; PLASMA_desc C; PLASMA_sequence *sequence; PLASMA_request *request; int k, m, n; int next_m; int next_n; int lda, ldak, ldb, ldc; int tempmm, tempnn, tempkn, tempkm; double zbeta; double zone = (double)1.0; plasma_unpack_args_9(side, uplo, alpha, A, B, beta, C, sequence, request); if (sequence->status != PLASMA_SUCCESS) return; n = 0; m = PLASMA_RANK; while (m >= C.mt && n < C.nt) { n++; m = m-C.mt; } while (n < C.nt) { next_m = m; next_n = n; next_m += PLASMA_SIZE; while (next_m >= C.mt && next_n < C.nt) { next_n++; next_m = next_m - C.mt; } tempmm = m == C.mt-1 ? C.m-m*C.mb : C.mb; tempnn = n == C.nt-1 ? C.n-n*C.nb : C.nb; ldc = BLKLDD(C, m); /* * PlasmaLeft / PlasmaLower */ if (side == PlasmaLeft) { lda = BLKLDD(A, m); if (uplo == PlasmaLower) { for (k = 0; k < C.mt; k++) { tempkm = k == C.mt-1 ? C.m-k*C.mb : C.mb; ldak = BLKLDD(A, k); ldb = BLKLDD(B, k); zbeta = k == 0 ? beta : zone; if (k < m) { CORE_dgemm( PlasmaNoTrans, PlasmaNoTrans, tempmm, tempnn, tempkm, alpha, A(m, k), lda, B(k, n), ldb, zbeta, C(m, n), ldc); } else { if (k == m) { CORE_dsymm( side, uplo, tempmm, tempnn, alpha, A(k, k), ldak, B(k, n), ldb, zbeta, C(m, n), ldc); } else { CORE_dgemm( PlasmaTrans, PlasmaNoTrans, tempmm, tempnn, tempkm, alpha, A(k, m), ldak, B(k, n), ldb, zbeta, C(m, n), ldc); } } } } /* * PlasmaLeft / PlasmaUpper */ else { for (k = 0; k < C.mt; k++) { tempkm = k == C.mt-1 ? C.m-k*C.mb : C.mb; ldak = BLKLDD(A, k); ldb = BLKLDD(B, k); zbeta = k == 0 ? beta : zone; if (k < m) { CORE_dgemm( PlasmaTrans, PlasmaNoTrans, tempmm, tempnn, tempkm, alpha, A(k, m), ldak, B(k, n), ldb, zbeta, C(m, n), ldc); } else { if (k == m) { CORE_dsymm( side, uplo, tempmm, tempnn, alpha, A(k, k), ldak, B(k, n), ldb, zbeta, C(m, n), ldc); } else { CORE_dgemm( PlasmaNoTrans, PlasmaNoTrans, tempmm, tempnn, tempkm, alpha, A(m, k), lda, B(k, n), ldb, zbeta, C(m, n), ldc); } } } } } /* * PlasmaRight / PlasmaLower */ else { lda = BLKLDD(A, n); ldb = BLKLDD(B, m); if (uplo == PlasmaLower) { for (k = 0; k < C.nt; k++) { tempkn = k == C.nt-1 ? C.n-k*C.nb : C.nb; ldak = BLKLDD(A, k); zbeta = k == 0 ? beta : zone; if (k < n) { CORE_dgemm( PlasmaNoTrans, PlasmaTrans, tempmm, tempnn, tempkn, alpha, B(m, k), ldb, A(n, k), lda, zbeta, C(m, n), ldc); } else { if (n == k) { CORE_dsymm( side, uplo, tempmm, tempnn, alpha, A(k, k), ldak, B(m, k), ldb, zbeta, C(m, n), ldc); } else { CORE_dgemm( PlasmaNoTrans, PlasmaNoTrans, tempmm, tempnn, tempkn, alpha, B(m, k), ldb, A(k, n), ldak, zbeta, C(m, n), ldc); } } } } /* * PlasmaRight / PlasmaUpper */ else { for (k = 0; k < C.nt; k++) { tempkn = k == C.nt-1 ? C.n-k*C.nb : C.nb; ldak = BLKLDD(A, k); zbeta = k == 0 ? beta : zone; if (k < n) { CORE_dgemm( PlasmaNoTrans, PlasmaNoTrans, tempmm, tempnn, tempkn, alpha, B(m, k), ldb, A(k, n), ldak, zbeta, C(m, n), ldc); } else { if (n == k) { CORE_dsymm( side, uplo, tempmm, tempnn, alpha, A(k, k), ldak, B(m, k), ldb, zbeta, C(m, n), ldc); } else { CORE_dgemm( PlasmaNoTrans, PlasmaTrans, tempmm, tempnn, tempkn, alpha, B(m, k), ldb, A(n, k), lda, zbeta, C(m, n), ldc); } } } } } m = next_m; n = next_n; } }