void THByteBlas_gemm8(const int is_a_transposed, const int is_b_transposed, const int is_c_transposed, const int m, const int n, const int k, const uint8_t* a, const uint8_t* b, uint8_t* c, const int lda, const int ldb, const int ldc, const int a_offset, const int b_offset, const int c_offset, const int c_mult, const int c_shift) { qgemm_(is_a_transposed, is_b_transposed, is_c_transposed, m, n, k, a, b, c, lda, ldb, ldc, a_offset, b_offset, c_offset, c_mult, c_shift); }
/* Subroutine */ void qpbtrf_(char *uplo, int *n, int *kd, LONG DOUBLE * #endif ab, int *ldab, int *info) { /* -- LAPACK routine (version 2.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University March 31, 1993 Purpose ======= DPBTRF computes the Cholesky factorization of a real symmetric positive definite band matrix A. The factorization has the form A = U**T * U, if UPLO = 'U', or A = L * L**T, if UPLO = 'L', where U is an upper triangular matrix and L is lower triangular. Arguments ========= UPLO (input) CHARACTER*1 = 'U': Upper triangle of A is stored; = 'L': Lower triangle of A is stored. N (input) INTEGER The order of the matrix A. N >= 0. KD (input) INTEGER The number of superdiagonals of the matrix A if UPLO = 'U', or the number of subdiagonals if UPLO = 'L'. KD >= 0. AB (input/output) LONG DOUBLE PRECISION array, dimension (LDAB,N) On entry, the upper or lower triangle of the symmetric band matrix A, stored in the first KD+1 rows of the array. The j-th column of A is stored in the j-th column of the array AB as follows: if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for MAX(1,j-kd)<=i<=j; if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=MIN(n,j+kd). On exit, if INFO = 0, the triangular factor U or L from the Cholesky factorization A = U**T*U or A = L*L**T of the band matrix A, in the same storage format as A. LDAB (input) INTEGER The leading dimension of the array AB. LDAB >= KD+1. INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value > 0: if INFO = i, the leading minor of order i is not positive definite, and the factorization could not be completed. Further Details =============== The band storage scheme is illustrated by the following example, when N = 6, KD = 2, and UPLO = 'U': On entry: On exit: * * a13 a24 a35 a46 * * u13 u24 u35 u46 * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 Similarly, if UPLO = 'L' the format of A is as follows: On entry: On exit: a11 a22 a33 a44 a55 a66 l11 l22 l33 l44 l55 l66 a21 a32 a43 a54 a65 * l21 l32 l43 l54 l65 * a31 a42 a53 a64 * * l31 l42 l53 l64 * * Array elements marked * are not used by the routine. Contributed by Peter Mayes and Giuseppe Radicati, IBM ECSEC, Rome, March 23, 1989 ===================================================================== Test the input parameters. Parameter adjustments Function Body */ /* Table of constant values */ static int c__1 = 1; static int c_n1 = -1; static LONG DOUBLE c_b18 = 1.; static LONG DOUBLE c_b21 = -1.; static int c__33 = 33; /* System generated locals */ int i__1, i__2, i__3, i__4; /* Local variables */ static LONG DOUBLE work[1056] /* was [33][32] */; static int i, j; #ifdef PETSC_PREFIX_SUFFIX extern /* Subroutine */ void dgemm_(char *, char *, int *, int *, #endif #ifdef Q_C_PREFIX_SUFFIX extern /* Subroutine */ void qgemm(char *, char *, int *, int *, #endif #ifdef Q_NORMAL_PREFIX_SUFFIX extern /* Subroutine */ void qgemm_(char *, char *, int *, int *, #endif int *, LONG DOUBLE *, LONG DOUBLE *, int *, LONG DOUBLE *, int *, LONG DOUBLE *, LONG DOUBLE *, int *); extern long int lsame_(char *, char *); #ifdef PETSC_PREFIX_SUFFIX extern /* Subroutine */ void dtrsm_(char *, char *, char *, char *, #endif #ifdef Q_C_PREFIX_SUFFIX extern /* Subroutine */ void qtrsm(char *, char *, char *, char *, #endif #ifdef Q_NORMAL_PREFIX_SUFFIX extern /* Subroutine */ void qtrsm_(char *, char *, char *, char *, #endif int *, int *, LONG DOUBLE *, LONG DOUBLE *, int *, LONG DOUBLE *, int *); static int i2, i3; #ifdef PETSC_PREFIX_SUFFIX extern /* Subroutine */ void dsyrk_(char *, char *, int *, int *, #endif #ifdef Q_C_PREFIX_SUFFIX extern /* Subroutine */ void qsyrk(char *, char *, int *, int *, #endif #ifdef Q_NORMAL_PREFIX_SUFFIX extern /* Subroutine */ void qsyrk_(char *, char *, int *, int *, #endif LONG DOUBLE *, LONG DOUBLE *, int *, LONG DOUBLE *, LONG DOUBLE *, #ifdef PETSC_PREFIX_SUFFIX int *), dpbtf2_(char *, int *, int *, #endif #ifdef Q_C_PREFIX_SUFFIX int *), qpbtf2(char *, int *, int *, #endif #ifdef Q_NORMAL_PREFIX_SUFFIX int *), qpbtf2_(char *, int *, int *, #endif #ifdef PETSC_PREFIX_SUFFIX LONG DOUBLE *, int *, int *), dpotf2_(char *, #endif #ifdef Q_C_PREFIX_SUFFIX LONG DOUBLE *, int *, int *), qpotf2(char *, #endif #ifdef Q_NORMAL_PREFIX_SUFFIX LONG DOUBLE *, int *, int *), qpotf2_(char *, #endif int *, LONG DOUBLE *, int *, int *); static int ib, nb, ii, jj; extern /* Subroutine */ void xerbla_(char *, int *); extern int ilaenv_(int *, char *, char *, int *, int *, int *, int *, long int, long int); #define WORK(I) work[(I)] #define WAS(I) was[(I)] #define AB(I,J) ab[(I)-1 + ((J)-1)* ( *ldab)] *info = 0; if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*kd < 0) { *info = -3; } else if (*ldab < *kd + 1) { *info = -5; } if (*info != 0) { i__1 = -(*info); xerbla_("DPBTRF", &i__1); return; } /* Quick return if possible */ if (*n == 0) { return; } /* Determine the block size for this environment */ nb = ilaenv_(&c__1, "DPBTRF", uplo, n, kd, &c_n1, &c_n1, 6L, 1L); /* The block size must not exceed the semi-bandwidth KD, and must not exceed the limit set by the size of the local array WORK. */ nb = MIN(nb,32); if (nb <= 1 || nb > *kd) { /* Use unblocked code */ #ifdef PETSC_PREFIX_SUFFIX dpbtf2_(uplo, n, kd, &AB(1,1), ldab, info); #endif #ifdef Q_C_PREFIX_SUFFIX qpbtf2(uplo, n, kd, &AB(1,1), ldab, info); #endif #ifdef Q_NORMAL_PREFIX_SUFFIX qpbtf2_(uplo, n, kd, &AB(1,1), ldab, info); #endif } else { /* Use blocked code */ if (lsame_(uplo, "U")) { /* Compute the Cholesky factorization of a symmetric ban d matrix, given the upper triangle of the matrix in ban d storage. Zero the upper triangle of the work array. */ i__1 = nb; for (j = 1; j <= nb; ++j) { i__2 = j - 1; for (i = 1; i <= j-1; ++i) { WORK(i + j * 33 - 34) = 0.; /* L10: */ } /* L20: */ } /* Process the band matrix one diagonal block at a time. */ i__1 = *n; i__2 = nb; for (i = 1; nb < 0 ? i >= *n : i <= *n; i += nb) { /* Computing MIN */ i__3 = nb, i__4 = *n - i + 1; ib = MIN(i__3,i__4); /* Factorize the diagonal block */ i__3 = *ldab - 1; #ifdef PETSC_PREFIX_SUFFIX dpotf2_(uplo, &ib, &AB(*kd+1,i), &i__3, &ii) #endif #ifdef Q_C_PREFIX_SUFFIX qpotf2(uplo, &ib, &AB(*kd+1,i), &i__3, &ii) #endif #ifdef Q_NORMAL_PREFIX_SUFFIX qpotf2_(uplo, &ib, &AB(*kd+1,i), &i__3, &ii) #endif ; if (ii != 0) { *info = i + ii - 1; goto L150; } if (i + ib <= *n) { /* Update the relevant part of the trailin g submatrix. If A11 denotes the diagonal block which has just been factorized, then we need to update the remaining blocks in the diagram: A11 A12 A13 A22 A23 A33 The numbers of rows and columns in the partitioning are IB, I2, I3 respectively. The blocks A12, A22 and A23 are empty if IB = KD. The upper tri angle of A13 lies outside the band. Computing MIN */ i__3 = *kd - ib, i__4 = *n - i - ib + 1; i2 = MIN(i__3,i__4); /* Computing MIN */ i__3 = ib, i__4 = *n - i - *kd + 1; i3 = MIN(i__3,i__4); if (i2 > 0) { /* Update A12 */ i__3 = *ldab - 1; i__4 = *ldab - 1; #ifdef PETSC_PREFIX_SUFFIX dtrsm_("Left", "Upper", "Transpose", "Non-unit", &ib, #endif #ifdef Q_C_PREFIX_SUFFIX qtrsm("Left", "Upper", "Transpose", "Non-unit", &ib, #endif #ifdef Q_NORMAL_PREFIX_SUFFIX qtrsm_("Left", "Upper", "Transpose", "Non-unit", &ib, #endif &i2, &c_b18, &AB(*kd+1,i), & i__3, &AB(*kd+1-ib,i+ib), &i__4); /* Update A22 */ i__3 = *ldab - 1; i__4 = *ldab - 1; #ifdef PETSC_PREFIX_SUFFIX dsyrk_("Upper", "Transpose", &i2, &ib, &c_b21, &AB(*kd+1-ib,i+ib), &i__3, & #endif #ifdef Q_C_PREFIX_SUFFIX qsyrk("Upper", "Transpose", &i2, &ib, &c_b21, &AB(*kd+1-ib,i+ib), &i__3, & #endif #ifdef Q_NORMAL_PREFIX_SUFFIX qsyrk_("Upper", "Transpose", &i2, &ib, &c_b21, &AB(*kd+1-ib,i+ib), &i__3, & #endif c_b18, &AB(*kd+1,i+ib), & i__4); } if (i3 > 0) { /* Copy the lower triangle of A13 i nto the work array. */ i__3 = i3; for (jj = 1; jj <= i3; ++jj) { i__4 = ib; for (ii = jj; ii <= ib; ++ii) { WORK(ii + jj * 33 - 34) = AB(ii-jj+1,jj+i+*kd-1); /* L30: */ } /* L40: */ } /* Update A13 (in the work array). */ i__3 = *ldab - 1; #ifdef PETSC_PREFIX_SUFFIX dtrsm_("Left", "Upper", "Transpose", "Non-unit", &ib, #endif #ifdef Q_C_PREFIX_SUFFIX qtrsm("Left", "Upper", "Transpose", "Non-unit", &ib, #endif #ifdef Q_NORMAL_PREFIX_SUFFIX qtrsm_("Left", "Upper", "Transpose", "Non-unit", &ib, #endif &i3, &c_b18, &AB(*kd+1,i), & i__3, work, &c__33); /* Update A23 */ if (i2 > 0) { i__3 = *ldab - 1; i__4 = *ldab - 1; #ifdef PETSC_PREFIX_SUFFIX dgemm_("Transpose", "No Transpose", &i2, &i3, &ib, #endif #ifdef Q_C_PREFIX_SUFFIX qgemm("Transpose", "No Transpose", &i2, &i3, &ib, #endif #ifdef Q_NORMAL_PREFIX_SUFFIX qgemm_("Transpose", "No Transpose", &i2, &i3, &ib, #endif &c_b21, &AB(*kd+1-ib,i+ib), &i__3, work, &c__33, &c_b18, & AB(ib+1,i+*kd), &i__4); } /* Update A33 */ i__3 = *ldab - 1; #ifdef PETSC_PREFIX_SUFFIX dsyrk_("Upper", "Transpose", &i3, &ib, &c_b21, work, & #endif #ifdef Q_C_PREFIX_SUFFIX qsyrk("Upper", "Transpose", &i3, &ib, &c_b21, work, & #endif #ifdef Q_NORMAL_PREFIX_SUFFIX qsyrk_("Upper", "Transpose", &i3, &ib, &c_b21, work, & #endif c__33, &c_b18, &AB(*kd+1,i+*kd), &i__3); /* Copy the lower triangle of A13 b ack into place. */ i__3 = i3; for (jj = 1; jj <= i3; ++jj) { i__4 = ib; for (ii = jj; ii <= ib; ++ii) { AB(ii-jj+1,jj+i+*kd-1) = WORK(ii + jj * 33 - 34); /* L50: */ } /* L60: */ } }