void matrixmultiply(float32 ** c, float32 ** a, float32 ** b, int32 n) { char side, uplo; float32 alpha; side = 'L'; uplo = 'L'; alpha = 1.0; ssymm_(&side, &uplo, &n, &n, &alpha, a[0], &n, b[0], &n, &alpha, c[0], &n); }
int f2c_ssymm(char* side, char* uplo, integer* M, integer* N, real* alpha, real* A, integer* lda, real* B, integer* ldb, real* beta, real* C, integer* ldc) { ssymm_(side, uplo, M, N, alpha, A, lda, B, ldb, beta, C, ldc); return 0; }
/* Subroutine */ int ssgt01_(integer *itype, char *uplo, integer *n, integer * m, real *a, integer *lda, real *b, integer *ldb, real *z__, integer * ldz, real *d__, real *work, real *result) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, z_dim1, z_offset, i__1; /* Local variables */ static integer i__; extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); static real anorm; extern /* Subroutine */ int ssymm_(char *, char *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *); extern doublereal slamch_(char *), slange_(char *, integer *, integer *, real *, integer *, real *), slansy_(char *, char *, integer *, real *, integer *, real *); static real ulp; #define z___ref(a_1,a_2) z__[(a_2)*z_dim1 + a_1] /* -- LAPACK test routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University June 30, 1999 modified August 1997, a new parameter M is added to the calling sequence. Purpose ======= SSGT01 checks a decomposition of the form A Z = B Z D or A B Z = Z D or B A Z = Z D where A is a symmetric matrix, B is symmetric positive definite, Z is orthogonal, and D is diagonal. One of the following test ratios is computed: ITYPE = 1: RESULT(1) = | A Z - B Z D | / ( |A| |Z| n ulp ) ITYPE = 2: RESULT(1) = | A B Z - Z D | / ( |A| |Z| n ulp ) ITYPE = 3: RESULT(1) = | B A Z - Z D | / ( |A| |Z| n ulp ) Arguments ========= ITYPE (input) INTEGER The form of the symmetric generalized eigenproblem. = 1: A*z = (lambda)*B*z = 2: A*B*z = (lambda)*z = 3: B*A*z = (lambda)*z UPLO (input) CHARACTER*1 Specifies whether the upper or lower triangular part of the symmetric matrices A and B is stored. = 'U': Upper triangular = 'L': Lower triangular N (input) INTEGER The order of the matrix A. N >= 0. M (input) INTEGER The number of eigenvalues found. 0 <= M <= N. A (input) REAL array, dimension (LDA, N) The original symmetric matrix A. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,N). B (input) REAL array, dimension (LDB, N) The original symmetric positive definite matrix B. LDB (input) INTEGER The leading dimension of the array B. LDB >= max(1,N). Z (input) REAL array, dimension (LDZ, M) The computed eigenvectors of the generalized eigenproblem. LDZ (input) INTEGER The leading dimension of the array Z. LDZ >= max(1,N). D (input) REAL array, dimension (M) The computed eigenvalues of the generalized eigenproblem. WORK (workspace) REAL array, dimension (N*N) RESULT (output) REAL array, dimension (1) The test ratio as described above. ===================================================================== Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1 * 1; b -= b_offset; z_dim1 = *ldz; z_offset = 1 + z_dim1 * 1; z__ -= z_offset; --d__; --work; --result; /* Function Body */ result[1] = 0.f; if (*n <= 0) { return 0; } ulp = slamch_("Epsilon"); /* Compute product of 1-norms of A and Z. */ anorm = slansy_("1", uplo, n, &a[a_offset], lda, &work[1]) * slange_("1", n, m, &z__[z_offset], ldz, &work[1]); if (anorm == 0.f) { anorm = 1.f; } if (*itype == 1) { /* Norm of AZ - BZD */ ssymm_("Left", uplo, n, m, &c_b6, &a[a_offset], lda, &z__[z_offset], ldz, &c_b7, &work[1], n); i__1 = *m; for (i__ = 1; i__ <= i__1; ++i__) { sscal_(n, &d__[i__], &z___ref(1, i__), &c__1); /* L10: */ } ssymm_("Left", uplo, n, m, &c_b6, &b[b_offset], ldb, &z__[z_offset], ldz, &c_b12, &work[1], n); result[1] = slange_("1", n, m, &work[1], n, &work[1]) / anorm / (*n * ulp); } else if (*itype == 2) { /* Norm of ABZ - ZD */ ssymm_("Left", uplo, n, m, &c_b6, &b[b_offset], ldb, &z__[z_offset], ldz, &c_b7, &work[1], n); i__1 = *m; for (i__ = 1; i__ <= i__1; ++i__) { sscal_(n, &d__[i__], &z___ref(1, i__), &c__1); /* L20: */ } ssymm_("Left", uplo, n, m, &c_b6, &a[a_offset], lda, &work[1], n, & c_b12, &z__[z_offset], ldz); result[1] = slange_("1", n, m, &z__[z_offset], ldz, &work[1]) / anorm / (*n * ulp); } else if (*itype == 3) { /* Norm of BAZ - ZD */ ssymm_("Left", uplo, n, m, &c_b6, &a[a_offset], lda, &z__[z_offset], ldz, &c_b7, &work[1], n); i__1 = *m; for (i__ = 1; i__ <= i__1; ++i__) { sscal_(n, &d__[i__], &z___ref(1, i__), &c__1); /* L30: */ } ssymm_("Left", uplo, n, m, &c_b6, &b[b_offset], ldb, &work[1], n, & c_b12, &z__[z_offset], ldz); result[1] = slange_("1", n, m, &z__[z_offset], ldz, &work[1]) / anorm / (*n * ulp); } return 0; /* End of SSGT01 */ } /* ssgt01_ */
/* Subroutine */ int ssyt22_(integer *itype, char *uplo, integer *n, integer * m, integer *kband, real *a, integer *lda, real *d__, real *e, real *u, integer *ldu, real *v, integer *ldv, real *tau, real *work, real * result) { /* System generated locals */ integer a_dim1, a_offset, u_dim1, u_offset, v_dim1, v_offset, i__1; real r__1, r__2; /* Local variables */ static real unfl; static integer j; extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *); static real anorm; extern /* Subroutine */ int sort01_(char *, integer *, integer *, real *, integer *, real *, integer *, real *); static real wnorm; extern /* Subroutine */ int ssymm_(char *, char *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *); static integer jj, nn; extern doublereal slamch_(char *); static integer jj1, jj2; extern doublereal slansy_(char *, char *, integer *, real *, integer *, real *); static real ulp; static integer nnp1; /* -- LAPACK test routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University February 29, 1992 Purpose ======= SSYT22 generally checks a decomposition of the form A U = U S where A is symmetric, the columns of U are orthonormal, and S is diagonal (if KBAND=0) or symmetric tridiagonal (if KBAND=1). If ITYPE=1, then U is represented as a dense matrix, otherwise the U is expressed as a product of Householder transformations, whose vectors are stored in the array "V" and whose scaling constants are in "TAU"; we shall use the letter "V" to refer to the product of Householder transformations (which should be equal to U). Specifically, if ITYPE=1, then: RESULT(1) = | U' A U - S | / ( |A| m ulp ) *and* RESULT(2) = | I - U'U | / ( m ulp ) Arguments ========= ITYPE INTEGER Specifies the type of tests to be performed. 1: U expressed as a dense orthogonal matrix: RESULT(1) = | A - U S U' | / ( |A| n ulp ) *and* RESULT(2) = | I - UU' | / ( n ulp ) UPLO CHARACTER If UPLO='U', the upper triangle of A will be used and the (strictly) lower triangle will not be referenced. If UPLO='L', the lower triangle of A will be used and the (strictly) upper triangle will not be referenced. Not modified. N INTEGER The size of the matrix. If it is zero, SSYT22 does nothing. It must be at least zero. Not modified. M INTEGER The number of columns of U. If it is zero, SSYT22 does nothing. It must be at least zero. Not modified. KBAND INTEGER The bandwidth of the matrix. It may only be zero or one. If zero, then S is diagonal, and E is not referenced. If one, then S is symmetric tri-diagonal. Not modified. A REAL array, dimension (LDA , N) The original (unfactored) matrix. It is assumed to be symmetric, and only the upper (UPLO='U') or only the lower (UPLO='L') will be referenced. Not modified. LDA INTEGER The leading dimension of A. It must be at least 1 and at least N. Not modified. D REAL array, dimension (N) The diagonal of the (symmetric tri-) diagonal matrix. Not modified. E REAL array, dimension (N) The off-diagonal of the (symmetric tri-) diagonal matrix. E(1) is ignored, E(2) is the (1,2) and (2,1) element, etc. Not referenced if KBAND=0. Not modified. U REAL array, dimension (LDU, N) If ITYPE=1 or 3, this contains the orthogonal matrix in the decomposition, expressed as a dense matrix. If ITYPE=2, then it is not referenced. Not modified. LDU INTEGER The leading dimension of U. LDU must be at least N and at least 1. Not modified. V REAL array, dimension (LDV, N) If ITYPE=2 or 3, the lower triangle of this array contains the Householder vectors used to describe the orthogonal matrix in the decomposition. If ITYPE=1, then it is not referenced. Not modified. LDV INTEGER The leading dimension of V. LDV must be at least N and at least 1. Not modified. TAU REAL array, dimension (N) If ITYPE >= 2, then TAU(j) is the scalar factor of v(j) v(j)' in the Householder transformation H(j) of the product U = H(1)...H(n-2) If ITYPE < 2, then TAU is not referenced. Not modified. WORK REAL array, dimension (2*N**2) Workspace. Modified. RESULT REAL array, dimension (2) The values computed by the two tests described above. The values are currently limited to 1/ulp, to avoid overflow. RESULT(1) is always modified. RESULT(2) is modified only if LDU is at least N. Modified. ===================================================================== Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; --d__; --e; u_dim1 = *ldu; u_offset = 1 + u_dim1 * 1; u -= u_offset; v_dim1 = *ldv; v_offset = 1 + v_dim1 * 1; v -= v_offset; --tau; --work; --result; /* Function Body */ result[1] = 0.f; result[2] = 0.f; if (*n <= 0 || *m <= 0) { return 0; } unfl = slamch_("Safe minimum"); ulp = slamch_("Precision"); /* Do Test 1 Norm of A: Computing MAX */ r__1 = slansy_("1", uplo, n, &a[a_offset], lda, &work[1]); anorm = dmax(r__1,unfl); /* Compute error matrix: ITYPE=1: error = U' A U - S */ ssymm_("L", uplo, n, m, &c_b6, &a[a_offset], lda, &u[u_offset], ldu, & c_b7, &work[1], n); nn = *n * *n; nnp1 = nn + 1; sgemm_("T", "N", m, m, n, &c_b6, &u[u_offset], ldu, &work[1], n, &c_b7, & work[nnp1], n); i__1 = *m; for (j = 1; j <= i__1; ++j) { jj = nn + (j - 1) * *n + j; work[jj] -= d__[j]; /* L10: */ } if (*kband == 1 && *n > 1) { i__1 = *m; for (j = 2; j <= i__1; ++j) { jj1 = nn + (j - 1) * *n + j - 1; jj2 = nn + (j - 2) * *n + j; work[jj1] -= e[j - 1]; work[jj2] -= e[j - 1]; /* L20: */ } } wnorm = slansy_("1", uplo, m, &work[nnp1], n, &work[1]); if (anorm > wnorm) { result[1] = wnorm / anorm / (*m * ulp); } else { if (anorm < 1.f) { /* Computing MIN */ r__1 = wnorm, r__2 = *m * anorm; result[1] = dmin(r__1,r__2) / anorm / (*m * ulp); } else { /* Computing MIN */ r__1 = wnorm / anorm, r__2 = (real) (*m); result[1] = dmin(r__1,r__2) / (*m * ulp); } } /* Do Test 2 Compute U'U - I */ if (*itype == 1) { i__1 = (*n << 1) * *n; sort01_("Columns", n, m, &u[u_offset], ldu, &work[1], &i__1, &result[ 2]); } return 0; /* End of SSYT22 */ } /* ssyt22_ */
/* Subroutine */ int ssygst_(integer *itype, char *uplo, integer *n, real *a, integer *lda, real *b, integer *ldb, integer *info) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3; /* Local variables */ integer k, kb, nb; extern logical lsame_(char *, char *); logical upper; extern /* Subroutine */ int strmm_(char *, char *, char *, char *, integer *, integer *, real *, real *, integer *, real *, integer * ), ssymm_(char *, char *, integer *, integer *, real *, real *, integer *, real *, integer *, real * , real *, integer *), strsm_(char *, char *, char *, char *, integer *, integer *, real *, real *, integer *, real * , integer *), ssygs2_(integer *, char *, integer *, real *, integer *, real *, integer *, integer * ), ssyr2k_(char *, char *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *), xerbla_(char *, integer *); extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* SSYGST reduces a real symmetric-definite generalized eigenproblem */ /* to standard form. */ /* If ITYPE = 1, the problem is A*x = lambda*B*x, */ /* and A is overwritten by inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T) */ /* If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or */ /* B*A*x = lambda*x, and A is overwritten by U*A*U**T or L**T*A*L. */ /* B must have been previously factorized as U**T*U or L*L**T by SPOTRF. */ /* Arguments */ /* ========= */ /* ITYPE (input) INTEGER */ /* = 1: compute inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T); */ /* = 2 or 3: compute U*A*U**T or L**T*A*L. */ /* UPLO (input) CHARACTER*1 */ /* = 'U': Upper triangle of A is stored and B is factored as */ /* U**T*U; */ /* = 'L': Lower triangle of A is stored and B is factored as */ /* L*L**T. */ /* N (input) INTEGER */ /* The order of the matrices A and B. N >= 0. */ /* A (input/output) REAL array, dimension (LDA,N) */ /* On entry, the symmetric matrix A. If UPLO = 'U', the leading */ /* N-by-N upper triangular part of A contains the upper */ /* triangular part of the matrix A, and the strictly lower */ /* triangular part of A is not referenced. If UPLO = 'L', the */ /* leading N-by-N lower triangular part of A contains the lower */ /* triangular part of the matrix A, and the strictly upper */ /* triangular part of A is not referenced. */ /* On exit, if INFO = 0, the transformed matrix, stored in the */ /* same format as A. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* B (input) REAL array, dimension (LDB,N) */ /* The triangular factor from the Cholesky factorization of B, */ /* as returned by SPOTRF. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,N). */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; /* Function Body */ *info = 0; upper = lsame_(uplo, "U"); if (*itype < 1 || *itype > 3) { *info = -1; } else if (! upper && ! lsame_(uplo, "L")) { *info = -2; } else if (*n < 0) { *info = -3; } else if (*lda < max(1,*n)) { *info = -5; } else if (*ldb < max(1,*n)) { *info = -7; } if (*info != 0) { i__1 = -(*info); xerbla_("SSYGST", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Determine the block size for this environment. */ nb = ilaenv_(&c__1, "SSYGST", uplo, n, &c_n1, &c_n1, &c_n1); if (nb <= 1 || nb >= *n) { /* Use unblocked code */ ssygs2_(itype, uplo, n, &a[a_offset], lda, &b[b_offset], ldb, info); } else { /* Use blocked code */ if (*itype == 1) { if (upper) { /* Compute inv(U')*A*inv(U) */ i__1 = *n; i__2 = nb; for (k = 1; i__2 < 0 ? k >= i__1 : k <= i__1; k += i__2) { /* Computing MIN */ i__3 = *n - k + 1; kb = min(i__3,nb); /* Update the upper triangle of A(k:n,k:n) */ ssygs2_(itype, uplo, &kb, &a[k + k * a_dim1], lda, &b[k + k * b_dim1], ldb, info); if (k + kb <= *n) { i__3 = *n - k - kb + 1; strsm_("Left", uplo, "Transpose", "Non-unit", &kb, & i__3, &c_b14, &b[k + k * b_dim1], ldb, &a[k + (k + kb) * a_dim1], lda); i__3 = *n - k - kb + 1; ssymm_("Left", uplo, &kb, &i__3, &c_b16, &a[k + k * a_dim1], lda, &b[k + (k + kb) * b_dim1], ldb, &c_b14, &a[k + (k + kb) * a_dim1], lda); i__3 = *n - k - kb + 1; ssyr2k_(uplo, "Transpose", &i__3, &kb, &c_b19, &a[k + (k + kb) * a_dim1], lda, &b[k + (k + kb) * b_dim1], ldb, &c_b14, &a[k + kb + (k + kb) * a_dim1], lda); i__3 = *n - k - kb + 1; ssymm_("Left", uplo, &kb, &i__3, &c_b16, &a[k + k * a_dim1], lda, &b[k + (k + kb) * b_dim1], ldb, &c_b14, &a[k + (k + kb) * a_dim1], lda); i__3 = *n - k - kb + 1; strsm_("Right", uplo, "No transpose", "Non-unit", &kb, &i__3, &c_b14, &b[k + kb + (k + kb) * b_dim1] , ldb, &a[k + (k + kb) * a_dim1], lda); } /* L10: */ } } else { /* Compute inv(L)*A*inv(L') */ i__2 = *n; i__1 = nb; for (k = 1; i__1 < 0 ? k >= i__2 : k <= i__2; k += i__1) { /* Computing MIN */ i__3 = *n - k + 1; kb = min(i__3,nb); /* Update the lower triangle of A(k:n,k:n) */ ssygs2_(itype, uplo, &kb, &a[k + k * a_dim1], lda, &b[k + k * b_dim1], ldb, info); if (k + kb <= *n) { i__3 = *n - k - kb + 1; strsm_("Right", uplo, "Transpose", "Non-unit", &i__3, &kb, &c_b14, &b[k + k * b_dim1], ldb, &a[k + kb + k * a_dim1], lda); i__3 = *n - k - kb + 1; ssymm_("Right", uplo, &i__3, &kb, &c_b16, &a[k + k * a_dim1], lda, &b[k + kb + k * b_dim1], ldb, & c_b14, &a[k + kb + k * a_dim1], lda); i__3 = *n - k - kb + 1; ssyr2k_(uplo, "No transpose", &i__3, &kb, &c_b19, &a[ k + kb + k * a_dim1], lda, &b[k + kb + k * b_dim1], ldb, &c_b14, &a[k + kb + (k + kb) * a_dim1], lda); i__3 = *n - k - kb + 1; ssymm_("Right", uplo, &i__3, &kb, &c_b16, &a[k + k * a_dim1], lda, &b[k + kb + k * b_dim1], ldb, & c_b14, &a[k + kb + k * a_dim1], lda); i__3 = *n - k - kb + 1; strsm_("Left", uplo, "No transpose", "Non-unit", & i__3, &kb, &c_b14, &b[k + kb + (k + kb) * b_dim1], ldb, &a[k + kb + k * a_dim1], lda); } /* L20: */ } } } else { if (upper) { /* Compute U*A*U' */ i__1 = *n; i__2 = nb; for (k = 1; i__2 < 0 ? k >= i__1 : k <= i__1; k += i__2) { /* Computing MIN */ i__3 = *n - k + 1; kb = min(i__3,nb); /* Update the upper triangle of A(1:k+kb-1,1:k+kb-1) */ i__3 = k - 1; strmm_("Left", uplo, "No transpose", "Non-unit", &i__3, & kb, &c_b14, &b[b_offset], ldb, &a[k * a_dim1 + 1], lda) ; i__3 = k - 1; ssymm_("Right", uplo, &i__3, &kb, &c_b52, &a[k + k * a_dim1], lda, &b[k * b_dim1 + 1], ldb, &c_b14, &a[ k * a_dim1 + 1], lda); i__3 = k - 1; ssyr2k_(uplo, "No transpose", &i__3, &kb, &c_b14, &a[k * a_dim1 + 1], lda, &b[k * b_dim1 + 1], ldb, &c_b14, &a[a_offset], lda); i__3 = k - 1; ssymm_("Right", uplo, &i__3, &kb, &c_b52, &a[k + k * a_dim1], lda, &b[k * b_dim1 + 1], ldb, &c_b14, &a[ k * a_dim1 + 1], lda); i__3 = k - 1; strmm_("Right", uplo, "Transpose", "Non-unit", &i__3, &kb, &c_b14, &b[k + k * b_dim1], ldb, &a[k * a_dim1 + 1], lda); ssygs2_(itype, uplo, &kb, &a[k + k * a_dim1], lda, &b[k + k * b_dim1], ldb, info); /* L30: */ } } else { /* Compute L'*A*L */ i__2 = *n; i__1 = nb; for (k = 1; i__1 < 0 ? k >= i__2 : k <= i__2; k += i__1) { /* Computing MIN */ i__3 = *n - k + 1; kb = min(i__3,nb); /* Update the lower triangle of A(1:k+kb-1,1:k+kb-1) */ i__3 = k - 1; strmm_("Right", uplo, "No transpose", "Non-unit", &kb, & i__3, &c_b14, &b[b_offset], ldb, &a[k + a_dim1], lda); i__3 = k - 1; ssymm_("Left", uplo, &kb, &i__3, &c_b52, &a[k + k * a_dim1], lda, &b[k + b_dim1], ldb, &c_b14, &a[k + a_dim1], lda); i__3 = k - 1; ssyr2k_(uplo, "Transpose", &i__3, &kb, &c_b14, &a[k + a_dim1], lda, &b[k + b_dim1], ldb, &c_b14, &a[ a_offset], lda); i__3 = k - 1; ssymm_("Left", uplo, &kb, &i__3, &c_b52, &a[k + k * a_dim1], lda, &b[k + b_dim1], ldb, &c_b14, &a[k + a_dim1], lda); i__3 = k - 1; strmm_("Left", uplo, "Transpose", "Non-unit", &kb, &i__3, &c_b14, &b[k + k * b_dim1], ldb, &a[k + a_dim1], lda); ssygs2_(itype, uplo, &kb, &a[k + k * a_dim1], lda, &b[k + k * b_dim1], ldb, info); /* L40: */ } } } } return 0; /* End of SSYGST */ } /* ssygst_ */
void ssymm(char side, char uplo, int m, int n, float alpha, float *a, int lda, float *b, int ldb, float beta, float *c, int ldc) { ssymm_( &side, &uplo, &m, &n, &alpha, a, &lda, b, &ldb, &beta, c, &ldc); }
int main( int argc, char** argv ) { obj_t a, b, c; obj_t c_save; obj_t alpha, beta; dim_t m, n; dim_t p; dim_t p_begin, p_end, p_inc; int m_input, n_input; num_t dt; int r, n_repeats; side_t side; uplo_t uploa; f77_char f77_side; f77_char f77_uploa; double dtime; double dtime_save; double gflops; bli_init(); //bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING ); n_repeats = 3; #ifndef PRINT p_begin = 200; p_end = 2000; p_inc = 200; m_input = -1; n_input = -1; #else p_begin = 16; p_end = 16; p_inc = 1; m_input = 4; n_input = 4; #endif #if 1 //dt = BLIS_FLOAT; dt = BLIS_DOUBLE; #else //dt = BLIS_SCOMPLEX; dt = BLIS_DCOMPLEX; #endif side = BLIS_LEFT; //side = BLIS_RIGHT; uploa = BLIS_LOWER; //uploa = BLIS_UPPER; bli_param_map_blis_to_netlib_side( side, &f77_side ); bli_param_map_blis_to_netlib_uplo( uploa, &f77_uploa ); for ( p = p_begin; p <= p_end; p += p_inc ) { if ( m_input < 0 ) m = p * ( dim_t )abs(m_input); else m = ( dim_t ) m_input; if ( n_input < 0 ) n = p * ( dim_t )abs(n_input); else n = ( dim_t ) n_input; bli_obj_create( dt, 1, 1, 0, 0, &alpha ); bli_obj_create( dt, 1, 1, 0, 0, &beta ); if ( bli_is_left( side ) ) bli_obj_create( dt, m, m, 0, 0, &a ); else bli_obj_create( dt, n, n, 0, 0, &a ); bli_obj_create( dt, m, n, 0, 0, &b ); bli_obj_create( dt, m, n, 0, 0, &c ); bli_obj_create( dt, m, n, 0, 0, &c_save ); bli_randm( &a ); bli_randm( &b ); bli_randm( &c ); bli_obj_set_struc( BLIS_HERMITIAN, a ); bli_obj_set_uplo( uploa, a ); // Randomize A, make it densely Hermitian, and zero the unstored // triangle to ensure the implementation reads only from the stored // region. bli_randm( &a ); bli_mkherm( &a ); bli_mktrim( &a ); /* bli_obj_toggle_uplo( a ); bli_obj_inc_diag_off( 1, a ); bli_setm( &BLIS_ZERO, &a ); bli_obj_inc_diag_off( -1, a ); bli_obj_toggle_uplo( a ); bli_obj_set_diag( BLIS_NONUNIT_DIAG, a ); bli_scalm( &BLIS_TWO, &a ); bli_scalm( &BLIS_TWO, &a ); */ bli_setsc( (2.0/1.0), 1.0, &alpha ); bli_setsc( -(1.0/1.0), 0.0, &beta ); bli_copym( &c, &c_save ); dtime_save = 1.0e9; for ( r = 0; r < n_repeats; ++r ) { bli_copym( &c_save, &c ); dtime = bli_clock(); #ifdef PRINT bli_printm( "a", &a, "%4.1f", "" ); bli_printm( "b", &b, "%4.1f", "" ); bli_printm( "c", &c, "%4.1f", "" ); #endif #ifdef BLIS bli_hemm( side, &alpha, &a, &b, &beta, &c ); #else if ( bli_is_float( dt ) ) { f77_int mm = bli_obj_length( c ); f77_int nn = bli_obj_width( c ); f77_int lda = bli_obj_col_stride( a ); f77_int ldb = bli_obj_col_stride( b ); f77_int ldc = bli_obj_col_stride( c ); float* alphap = bli_obj_buffer( alpha ); float* ap = bli_obj_buffer( a ); float* bp = bli_obj_buffer( b ); float* betap = bli_obj_buffer( beta ); float* cp = bli_obj_buffer( c ); ssymm_( &f77_side, &f77_uploa, &mm, &nn, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } else if ( bli_is_double( dt ) ) { f77_int mm = bli_obj_length( c ); f77_int nn = bli_obj_width( c ); f77_int lda = bli_obj_col_stride( a ); f77_int ldb = bli_obj_col_stride( b ); f77_int ldc = bli_obj_col_stride( c ); double* alphap = bli_obj_buffer( alpha ); double* ap = bli_obj_buffer( a ); double* bp = bli_obj_buffer( b ); double* betap = bli_obj_buffer( beta ); double* cp = bli_obj_buffer( c ); dsymm_( &f77_side, &f77_uploa, &mm, &nn, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } else if ( bli_is_scomplex( dt ) ) { f77_int mm = bli_obj_length( c ); f77_int nn = bli_obj_width( c ); f77_int lda = bli_obj_col_stride( a ); f77_int ldb = bli_obj_col_stride( b ); f77_int ldc = bli_obj_col_stride( c ); scomplex* alphap = bli_obj_buffer( alpha ); scomplex* ap = bli_obj_buffer( a ); scomplex* bp = bli_obj_buffer( b ); scomplex* betap = bli_obj_buffer( beta ); scomplex* cp = bli_obj_buffer( c ); chemm_( &f77_side, &f77_uploa, &mm, &nn, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } else if ( bli_is_dcomplex( dt ) ) { f77_int mm = bli_obj_length( c ); f77_int nn = bli_obj_width( c ); f77_int lda = bli_obj_col_stride( a ); f77_int ldb = bli_obj_col_stride( b ); f77_int ldc = bli_obj_col_stride( c ); dcomplex* alphap = bli_obj_buffer( alpha ); dcomplex* ap = bli_obj_buffer( a ); dcomplex* bp = bli_obj_buffer( b ); dcomplex* betap = bli_obj_buffer( beta ); dcomplex* cp = bli_obj_buffer( c ); zhemm_( &f77_side, &f77_uploa, &mm, &nn, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } #endif #ifdef PRINT bli_printm( "c after", &c, "%9.5f", "" ); exit(1); #endif dtime_save = bli_clock_min_diff( dtime_save, dtime ); } if ( bli_is_left( side ) ) gflops = ( 2.0 * m * m * n ) / ( dtime_save * 1.0e9 ); else gflops = ( 2.0 * m * n * n ) / ( dtime_save * 1.0e9 ); if ( bli_is_complex( dt ) ) gflops *= 4.0; #ifdef BLIS printf( "data_hemm_blis" ); #else printf( "data_hemm_%s", BLAS ); #endif printf( "( %2lu, 1:4 ) = [ %4lu %4lu %10.3e %6.3f ];\n", ( unsigned long )(p - p_begin + 1)/p_inc + 1, ( unsigned long )m, ( unsigned long )n, dtime_save, gflops ); bli_obj_free( &alpha ); bli_obj_free( &beta ); bli_obj_free( &a ); bli_obj_free( &b ); bli_obj_free( &c ); bli_obj_free( &c_save ); } bli_finalize(); return 0; }
/* Subroutine */ int ssygst_(integer *itype, char *uplo, integer *n, real *a, integer *lda, real *b, integer *ldb, integer *info) { /* -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= SSYGST reduces a real symmetric-definite generalized eigenproblem to standard form. If ITYPE = 1, the problem is A*x = lambda*B*x, and A is overwritten by inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T) If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or B*A*x = lambda*x, and A is overwritten by U*A*U**T or L**T*A*L. B must have been previously factorized as U**T*U or L*L**T by SPOTRF. Arguments ========= ITYPE (input) INTEGER = 1: compute inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T); = 2 or 3: compute U*A*U**T or L**T*A*L. UPLO (input) CHARACTER = 'U': Upper triangle of A is stored and B is factored as U**T*U; = 'L': Lower triangle of A is stored and B is factored as L*L**T. N (input) INTEGER The order of the matrices A and B. N >= 0. A (input/output) REAL array, dimension (LDA,N) On entry, the symmetric matrix A. If UPLO = 'U', the leading N-by-N upper triangular part of A contains the upper triangular part of the matrix A, and the strictly lower triangular part of A is not referenced. If UPLO = 'L', the leading N-by-N lower triangular part of A contains the lower triangular part of the matrix A, and the strictly upper triangular part of A is not referenced. On exit, if INFO = 0, the transformed matrix, stored in the same format as A. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,N). B (input) REAL array, dimension (LDB,N) The triangular factor from the Cholesky factorization of B, as returned by SPOTRF. LDB (input) INTEGER The leading dimension of the array B. LDB >= max(1,N). INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value ===================================================================== Test the input parameters. Parameter adjustments */ /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; static real c_b14 = 1.f; static real c_b16 = -.5f; static real c_b19 = -1.f; static real c_b52 = .5f; /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3; /* Local variables */ static integer k; extern logical lsame_(char *, char *); static logical upper; extern /* Subroutine */ int strmm_(char *, char *, char *, char *, integer *, integer *, real *, real *, integer *, real *, integer * ), ssymm_(char *, char *, integer *, integer *, real *, real *, integer *, real *, integer *, real * , real *, integer *), strsm_(char *, char *, char *, char *, integer *, integer *, real *, real *, integer *, real * , integer *); static integer kb, nb; extern /* Subroutine */ int ssygs2_(integer *, char *, integer *, real *, integer *, real *, integer *, integer *), ssyr2k_(char *, char *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *), xerbla_( char *, integer *); extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, ftnlen, ftnlen); #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1] a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1 * 1; b -= b_offset; /* Function Body */ *info = 0; upper = lsame_(uplo, "U"); if (*itype < 1 || *itype > 3) { *info = -1; } else if (! upper && ! lsame_(uplo, "L")) { *info = -2; } else if (*n < 0) { *info = -3; } else if (*lda < max(1,*n)) { *info = -5; } else if (*ldb < max(1,*n)) { *info = -7; } if (*info != 0) { i__1 = -(*info); xerbla_("SSYGST", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Determine the block size for this environment. */ nb = ilaenv_(&c__1, "SSYGST", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, ( ftnlen)1); if (nb <= 1 || nb >= *n) { /* Use unblocked code */ ssygs2_(itype, uplo, n, &a[a_offset], lda, &b[b_offset], ldb, info); } else { /* Use blocked code */ if (*itype == 1) { if (upper) { /* Compute inv(U')*A*inv(U) */ i__1 = *n; i__2 = nb; for (k = 1; i__2 < 0 ? k >= i__1 : k <= i__1; k += i__2) { /* Computing MIN */ i__3 = *n - k + 1; kb = min(i__3,nb); /* Update the upper triangle of A(k:n,k:n) */ ssygs2_(itype, uplo, &kb, &a_ref(k, k), lda, &b_ref(k, k), ldb, info); if (k + kb <= *n) { i__3 = *n - k - kb + 1; strsm_("Left", uplo, "Transpose", "Non-unit", &kb, & i__3, &c_b14, &b_ref(k, k), ldb, &a_ref(k, k + kb), lda); i__3 = *n - k - kb + 1; ssymm_("Left", uplo, &kb, &i__3, &c_b16, &a_ref(k, k), lda, &b_ref(k, k + kb), ldb, &c_b14, &a_ref( k, k + kb), lda); i__3 = *n - k - kb + 1; ssyr2k_(uplo, "Transpose", &i__3, &kb, &c_b19, &a_ref( k, k + kb), lda, &b_ref(k, k + kb), ldb, & c_b14, &a_ref(k + kb, k + kb), lda); i__3 = *n - k - kb + 1; ssymm_("Left", uplo, &kb, &i__3, &c_b16, &a_ref(k, k), lda, &b_ref(k, k + kb), ldb, &c_b14, &a_ref( k, k + kb), lda); i__3 = *n - k - kb + 1; strsm_("Right", uplo, "No transpose", "Non-unit", &kb, &i__3, &c_b14, &b_ref(k + kb, k + kb), ldb, & a_ref(k, k + kb), lda); } /* L10: */ } } else { /* Compute inv(L)*A*inv(L') */ i__2 = *n; i__1 = nb; for (k = 1; i__1 < 0 ? k >= i__2 : k <= i__2; k += i__1) { /* Computing MIN */ i__3 = *n - k + 1; kb = min(i__3,nb); /* Update the lower triangle of A(k:n,k:n) */ ssygs2_(itype, uplo, &kb, &a_ref(k, k), lda, &b_ref(k, k), ldb, info); if (k + kb <= *n) { i__3 = *n - k - kb + 1; strsm_("Right", uplo, "Transpose", "Non-unit", &i__3, &kb, &c_b14, &b_ref(k, k), ldb, &a_ref(k + kb, k), lda); i__3 = *n - k - kb + 1; ssymm_("Right", uplo, &i__3, &kb, &c_b16, &a_ref(k, k) , lda, &b_ref(k + kb, k), ldb, &c_b14, &a_ref( k + kb, k), lda); i__3 = *n - k - kb + 1; ssyr2k_(uplo, "No transpose", &i__3, &kb, &c_b19, & a_ref(k + kb, k), lda, &b_ref(k + kb, k), ldb, &c_b14, &a_ref(k + kb, k + kb), lda); i__3 = *n - k - kb + 1; ssymm_("Right", uplo, &i__3, &kb, &c_b16, &a_ref(k, k) , lda, &b_ref(k + kb, k), ldb, &c_b14, &a_ref( k + kb, k), lda); i__3 = *n - k - kb + 1; strsm_("Left", uplo, "No transpose", "Non-unit", & i__3, &kb, &c_b14, &b_ref(k + kb, k + kb), ldb, &a_ref(k + kb, k), lda); } /* L20: */ } } } else { if (upper) { /* Compute U*A*U' */ i__1 = *n; i__2 = nb; for (k = 1; i__2 < 0 ? k >= i__1 : k <= i__1; k += i__2) { /* Computing MIN */ i__3 = *n - k + 1; kb = min(i__3,nb); /* Update the upper triangle of A(1:k+kb-1,1:k+kb-1) */ i__3 = k - 1; strmm_("Left", uplo, "No transpose", "Non-unit", &i__3, & kb, &c_b14, &b[b_offset], ldb, &a_ref(1, k), lda); i__3 = k - 1; ssymm_("Right", uplo, &i__3, &kb, &c_b52, &a_ref(k, k), lda, &b_ref(1, k), ldb, &c_b14, &a_ref(1, k), lda); i__3 = k - 1; ssyr2k_(uplo, "No transpose", &i__3, &kb, &c_b14, &a_ref( 1, k), lda, &b_ref(1, k), ldb, &c_b14, &a[ a_offset], lda); i__3 = k - 1; ssymm_("Right", uplo, &i__3, &kb, &c_b52, &a_ref(k, k), lda, &b_ref(1, k), ldb, &c_b14, &a_ref(1, k), lda); i__3 = k - 1; strmm_("Right", uplo, "Transpose", "Non-unit", &i__3, &kb, &c_b14, &b_ref(k, k), ldb, &a_ref(1, k), lda); ssygs2_(itype, uplo, &kb, &a_ref(k, k), lda, &b_ref(k, k), ldb, info); /* L30: */ } } else { /* Compute L'*A*L */ i__2 = *n; i__1 = nb; for (k = 1; i__1 < 0 ? k >= i__2 : k <= i__2; k += i__1) { /* Computing MIN */ i__3 = *n - k + 1; kb = min(i__3,nb); /* Update the lower triangle of A(1:k+kb-1,1:k+kb-1) */ i__3 = k - 1; strmm_("Right", uplo, "No transpose", "Non-unit", &kb, & i__3, &c_b14, &b[b_offset], ldb, &a_ref(k, 1), lda); i__3 = k - 1; ssymm_("Left", uplo, &kb, &i__3, &c_b52, &a_ref(k, k), lda, &b_ref(k, 1), ldb, &c_b14, &a_ref(k, 1), lda); i__3 = k - 1; ssyr2k_(uplo, "Transpose", &i__3, &kb, &c_b14, &a_ref(k, 1), lda, &b_ref(k, 1), ldb, &c_b14, &a[a_offset], lda); i__3 = k - 1; ssymm_("Left", uplo, &kb, &i__3, &c_b52, &a_ref(k, k), lda, &b_ref(k, 1), ldb, &c_b14, &a_ref(k, 1), lda); i__3 = k - 1; strmm_("Left", uplo, "Transpose", "Non-unit", &kb, &i__3, &c_b14, &b_ref(k, k), ldb, &a_ref(k, 1), lda); ssygs2_(itype, uplo, &kb, &a_ref(k, k), lda, &b_ref(k, k), ldb, info); /* L40: */ } } } } return 0; /* End of SSYGST */ } /* ssygst_ */
void cblas_ssymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const integer M, const integer N, const float alpha, const float *A, const integer lda, const float *B, const integer ldb, const float beta, float *C, const integer ldc) { char SD, UL; #ifdef F77_CHAR F77_CHAR F77_SD, F77_UL; #else #define F77_SD &SD #define F77_UL &UL #endif #define F77_M M #define F77_N N #define F77_lda lda #define F77_ldb ldb #define F77_ldc ldc extern integer CBLAS_CallFromC; extern integer RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if( Order == CblasColMajor ) { if( Side == CblasRight) SD='R'; else if ( Side == CblasLeft ) SD='L'; else { cblas_xerbla(2, "cblas_ssymm", "Illegal Side setting, %d\n", Side); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if( Uplo == CblasUpper) UL='U'; else if ( Uplo == CblasLower ) UL='L'; else { cblas_xerbla(3, "cblas_ssymm", "Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_SD = C2F_CHAR(&SD); #endif ssymm_(F77_SD, F77_UL, &F77_M, &F77_N, &alpha, A, &F77_lda, B, &F77_ldb, &beta, C, &F77_ldc); } else if (Order == CblasRowMajor) { RowMajorStrg = 1; if( Side == CblasRight) SD='L'; else if ( Side == CblasLeft ) SD='R'; else { cblas_xerbla(2, "cblas_ssymm", "Illegal Side setting, %d\n", Side); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if( Uplo == CblasUpper) UL='L'; else if ( Uplo == CblasLower ) UL='U'; else { cblas_xerbla(3, "cblas_ssymm", "Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_SD = C2F_CHAR(&SD); #endif ssymm_(F77_SD, F77_UL, &F77_N, &F77_M, &alpha, A, &F77_lda, B, &F77_ldb, &beta, C, &F77_ldc); } else cblas_xerbla(1, "cblas_ssymm", "Illegal Order setting, %d\n", Order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; }