/*------------------------------------------------------------------------ * Check the factorization of the matrix A2 */ static int check_factorization(int N, float *A1, float *A2, int LDA, int uplo, float eps) { float alpha = 1.0; float Anorm, Rnorm, result; int info_factorization; int i,j; float *Residual = (float *)malloc(N*N*sizeof(float)); float *L1 = (float *)malloc(N*N*sizeof(float)); float *L2 = (float *)malloc(N*N*sizeof(float)); float *work = (float *)malloc(N*sizeof(float)); memset((void*)L1, 0, N*N*sizeof(float)); memset((void*)L2, 0, N*N*sizeof(float)); LAPACKE_slacpy_work(LAPACK_COL_MAJOR,' ', N, N, A1, LDA, Residual, N); /* Dealing with L'L or U'U */ LAPACKE_slacpy_work(LAPACK_COL_MAJOR, lapack_const(uplo), N, N, A2, LDA, L1, N); LAPACKE_slacpy_work(LAPACK_COL_MAJOR, lapack_const(uplo), N, N, A2, LDA, L2, N); if (uplo == PlasmaUpper) cblas_strmm(CblasColMajor, CblasLeft, (CBLAS_UPLO)uplo, CblasTrans, CblasNonUnit, N, N, (alpha), L1, N, L2, N); else cblas_strmm(CblasColMajor, CblasRight, (CBLAS_UPLO)uplo, CblasTrans, CblasNonUnit, N, N, (alpha), L1, N, L2, N); /* Compute the Residual || A - L'L|| */ for (i = 0; i < N; i++) for (j = 0; j < N; j++) Residual[j*N+i] = L2[j*N+i] - Residual[j*N+i]; Rnorm = LAPACKE_slange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, Residual, N, work); Anorm = LAPACKE_slange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, A1, LDA, work); result = Rnorm / ( Anorm * N * eps ); printf("============\n"); printf("Checking the Cholesky Factorization \n"); printf("-- ||L'L-A||_oo/(||A||_oo.N.eps) = %e \n", result); if ( isnan(result) || isinf(result) || (result > 60.0) ){ printf("-- Factorization is suspicious ! \n"); info_factorization = 1; } else{ printf("-- Factorization is CORRECT ! \n"); info_factorization = 0; } free(Residual); free(L1); free(L2); free(work); return info_factorization; }
void F77_strmm(int *order, char *rtlf, char *uplow, char *transp, char *diagn, int *m, int *n, float *alpha, float *a, int *lda, float *b, int *ldb) { int i,j,LDA,LDB; float *A, *B; enum CBLAS_SIDE side; enum CBLAS_DIAG diag; enum CBLAS_UPLO uplo; enum CBLAS_TRANSPOSE trans; get_uplo_type(uplow,&uplo); get_transpose_type(transp,&trans); get_diag_type(diagn,&diag); get_side_type(rtlf,&side); if (*order == TEST_ROW_MJR) { if (side == CblasLeft) { LDA = *m+1; A = ( float* )malloc( (*m)*LDA*sizeof( float ) ); for( i=0; i<*m; i++ ) for( j=0; j<*m; j++ ) A[i*LDA+j]=a[j*(*lda)+i]; } else{ LDA = *n+1; A = ( float* )malloc( (*n)*LDA*sizeof( float ) ); for( i=0; i<*n; i++ ) for( j=0; j<*n; j++ ) A[i*LDA+j]=a[j*(*lda)+i]; } LDB = *n+1; B = ( float* )malloc( (*m)*LDB*sizeof( float ) ); for( i=0; i<*m; i++ ) for( j=0; j<*n; j++ ) B[i*LDB+j]=b[j*(*ldb)+i]; cblas_strmm(CblasRowMajor, side, uplo, trans, diag, *m, *n, *alpha, A, LDA, B, LDB ); for( j=0; j<*n; j++ ) for( i=0; i<*m; i++ ) b[j*(*ldb)+i]=B[i*LDB+j]; free(A); free(B); } else if (*order == TEST_COL_MJR) cblas_strmm(CblasColMajor, side, uplo, trans, diag, *m, *n, *alpha, a, *lda, b, *ldb); else cblas_strmm(UNDEFINED, side, uplo, trans, diag, *m, *n, *alpha, a, *lda, b, *ldb); }
void STARPU_STRMM(const char *side, const char *uplo, const char *transA, const char *diag, const int m, const int n, const float alpha, const float *A, const int lda, float *B, const int ldb) { enum CBLAS_SIDE side_ = (toupper(side[0]) == 'L')?CblasLeft:CblasRight; enum CBLAS_UPLO uplo_ = (toupper(uplo[0]) == 'U')?CblasUpper:CblasLower; enum CBLAS_TRANSPOSE transA_ = (toupper(transA[0]) == 'N')?CblasNoTrans:CblasTrans; enum CBLAS_DIAG diag_ = (toupper(diag[0]) == 'N')?CblasNonUnit:CblasUnit; cblas_strmm(CblasColMajor, side_, uplo_, transA_, diag_, m, n, alpha, A, lda, B, ldb); }
JNIEXPORT void JNICALL Java_uncomplicate_neanderthal_CBLAS_strmm (JNIEnv *env, jclass clazz, jint Order, jint Side, jint Uplo, jint TransA, jint Diag, jint M, jint N, jfloat alpha, jobject A, jint lda, jobject B, jint ldb) { float *cA = (float *) (*env)->GetDirectBufferAddress(env, A); float *cB = (float *) (*env)->GetDirectBufferAddress(env, B); cblas_strmm(Order, Side, Uplo, TransA, Diag, M, N, alpha, cA, lda, cB, ldb); };
inline void trmm( CBLAS_ORDER const order, CBLAS_SIDE const side, CBLAS_UPLO const uplo, CBLAS_TRANSPOSE const transA, CBLAS_DIAG const unit, int const M, int const N, float const *A, int const lda, float* B, int const incB ) { cblas_strmm(order, side, uplo, transA, unit, M, N, 1.0, A, lda, B, incB ); }
static int check_transformation(int itype, int uplo, int N, float *A1, float *A2, int LDA, float *B2, int LDB, float eps) { float alpha = 1.0; float Anorm, Rnorm, result; int info_transformation; int i, j; char *str; float *Residual = (float *)malloc(N*N*sizeof(float)); float *Aorig = (float *)malloc(N*N*sizeof(float)); float *work = (float *)malloc(N*sizeof(float)); LAPACKE_slacpy_work(LAPACK_COL_MAJOR, 'a', N, N, A1, LDA, Residual, N); LAPACKE_slacpy_work(LAPACK_COL_MAJOR, lapack_const(uplo), N, N, A2, LDA, Aorig, N); /* Rebuild the symmetry of A2 */ if (uplo == PlasmaLower) { for (j = 0; j < N; j++) for (i = j+1; i < N; i++) Aorig[j+i*N] = (Aorig[i+j*N]); } else { for (i = 0; i < N; i++) for (j = i+1; j < N; j++) Aorig[j+i*N] = (Aorig[i+j*N]); } if (itype == 1) { if (uplo == PlasmaLower) { str = "L*A2*L'"; cblas_strmm(CblasColMajor, CblasLeft, CblasLower, CblasNoTrans, CblasNonUnit, N, N, (alpha), B2, LDB, Aorig, N); cblas_strmm(CblasColMajor, CblasRight, CblasLower, CblasTrans, CblasNonUnit, N, N, (alpha), B2, LDB, Aorig, N); } else{ str = "U'*A2*U"; cblas_strmm(CblasColMajor, CblasLeft, CblasUpper, CblasTrans, CblasNonUnit, N, N, (alpha), B2, LDB, Aorig, N); cblas_strmm(CblasColMajor, CblasRight, CblasUpper, CblasNoTrans, CblasNonUnit, N, N, (alpha), B2, LDB, Aorig, N); } } else { if (uplo == PlasmaLower) { str = "inv(L')*A2*inv(L)"; cblas_strsm(CblasColMajor, CblasLeft, CblasLower, CblasTrans, CblasNonUnit, N, N, (alpha), B2, LDB, Aorig, N); cblas_strsm(CblasColMajor, CblasRight, CblasLower, CblasNoTrans, CblasNonUnit, N, N, (alpha), B2, LDB, Aorig, N); } else{ str = "inv(U)*A2*inv(U')"; cblas_strsm(CblasColMajor, CblasLeft, CblasUpper, CblasNoTrans, CblasNonUnit, N, N, (alpha), B2, LDB, Aorig, N); cblas_strsm(CblasColMajor, CblasRight, CblasUpper, CblasTrans, CblasNonUnit, N, N, (alpha), B2, LDB, Aorig, N); } } /* Compute the Residual ( A1 - W ) */ for (i = 0; i < N; i++) for (j = 0; j < N; j++) Residual[j*N+i] = Aorig[j*N+i] - Residual[j*N+i]; Rnorm = LAPACKE_slange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, Residual, N, work); Anorm = LAPACKE_slange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, A1, LDA, work); result = Rnorm / (Anorm * N *eps); printf("============\n"); printf("Checking the global transformation \n"); printf("-- ||A1-%s||_oo/(||A1||_oo.N.eps) = %e \n", str, result ); if (isnan(result) || isinf(result) || (result > 60.0) ) { printf("-- Transformation is suspicious ! \n"); info_transformation = 1; } else { printf("-- Transformation is CORRECT ! \n"); info_transformation = 0; } free(Residual); free(Aorig); free(work); return info_transformation; }
int CORE_stsrfb(int side, int trans, int direct, int storev, int M1, int N1, int M2, int N2, int K, float *A1, int LDA1, float *A2, int LDA2, float *V, int LDV, float *T, int LDT, float *WORK, int LDWORK) { static float zone = 1.0; static float mzone = -1.0; int j; /* Check input arguments */ if (M1 < 0) { coreblas_error(5, "Illegal value of M1"); return -5; } if (N1 < 0) { coreblas_error(6, "Illegal value of N1"); return -6; } if ( (M2 < 0) || ( (M2 != M1) && (side == PlasmaRight) ) ){ coreblas_error(7, "Illegal value of M2"); return -7; } if ( (N2 < 0) || ( (N2 != N1) && (side == PlasmaLeft) ) ){ coreblas_error(8, "Illegal value of N2"); return -8; } if (K < 0) { coreblas_error(9, "Illegal value of K"); return -9; } /* Quick return */ if ((M1 == 0) || (N1 == 0) || (M2 == 0) || (N2 == 0) || (K == 0)) return PLASMA_SUCCESS; if (storev == PlasmaColumnwise) { if (direct == PlasmaForward) { if (side == PlasmaLeft) { /* * B = A1 + V' * A2 */ LAPACKE_slacpy_work(LAPACK_COL_MAJOR, lapack_const(PlasmaUpperLower), K, N1, A1, LDA1, WORK, LDWORK); cblas_sgemm( CblasColMajor, CblasTrans, CblasNoTrans, K, N2, M2, (zone), V, LDV, A2, LDA2, (zone), WORK, LDWORK); /* * A2 = A2 - V*T*B -> B = T*B, A2 = A2 - V*B */ cblas_strmm( CblasColMajor, CblasLeft, CblasUpper, (CBLAS_TRANSPOSE)trans, CblasNonUnit, K, N2, (zone), T, LDT, WORK, LDWORK); cblas_sgemm( CblasColMajor, CblasNoTrans, CblasNoTrans, M2, N2, K, (mzone), V, LDV, WORK, LDWORK, (zone), A2, LDA2); /* * A1 = A1 - B */ for(j = 0; j < N1; j++) { cblas_saxpy( K, (mzone), &WORK[LDWORK*j], 1, &A1[LDA1*j], 1); } } /* * Columnwise / Forward / Right */ else { /* * B = A1 + A2 * V */ LAPACKE_slacpy_work(LAPACK_COL_MAJOR, lapack_const(PlasmaUpperLower), M1, K, A1, LDA1, WORK, LDWORK); cblas_sgemm( CblasColMajor, CblasNoTrans, CblasNoTrans, M2, K, N2, (zone), A2, LDA2, V, LDV, (zone), WORK, LDWORK); /* * A2 = A2 - B*T*V' -> B = B*T, A2 = A2 - B*V' */ cblas_strmm( CblasColMajor, CblasRight, CblasUpper, (CBLAS_TRANSPOSE)trans, CblasNonUnit, M1, K, (zone), T, LDT, WORK, LDWORK); cblas_sgemm( CblasColMajor, CblasNoTrans, CblasTrans, M2, N2, K, (mzone), WORK, LDWORK, V, LDV, (zone), A2, LDA2); /* * A1 = A1 - B */ for(j = 0; j < K; j++) { cblas_saxpy( M1, (mzone), &WORK[LDWORK*j], 1, &A1[LDA1*j], 1); } } } else { coreblas_error(3, "Not implemented (ColMajor / Backward / Left or Right)"); return PLASMA_ERR_NOT_SUPPORTED; } } else { if (direct == PlasmaForward) { /* * Rowwise / Forward / Left */ if (side == PlasmaLeft) { /* * B = A1 + V * A2 */ LAPACKE_slacpy_work(LAPACK_COL_MAJOR, lapack_const(PlasmaUpperLower), K, N1, A1, LDA1, WORK, LDWORK); cblas_sgemm( CblasColMajor, CblasNoTrans, CblasNoTrans, K, N2, M2, (zone), V, LDV, A2, LDA2, (zone), WORK, LDWORK); /* * A2 = A2 - V'*T*B -> B = T*B, A2 = A2 - V'*B */ cblas_strmm( CblasColMajor, CblasLeft, CblasUpper, (CBLAS_TRANSPOSE)trans, CblasNonUnit, K, N2, (zone), T, LDT, WORK, LDWORK); cblas_sgemm( CblasColMajor, CblasTrans, CblasNoTrans, M2, N2, K, (mzone), V, LDV, WORK, LDWORK, (zone), A2, LDA2); /* * A1 = A1 - B */ for(j=0; j<N1; j++) { cblas_saxpy( K, (mzone), &WORK[LDWORK*j], 1, &A1[LDA1*j], 1); } } /* * Rowwise / Forward / Right */ else { /* * B = A1 + A2 * V' */ LAPACKE_slacpy_work(LAPACK_COL_MAJOR, lapack_const(PlasmaUpperLower), M1, K, A1, LDA1, WORK, LDWORK); cblas_sgemm( CblasColMajor, CblasNoTrans, CblasTrans, M2, K, N2, (zone), A2, LDA2, V, LDV, (zone), WORK, LDWORK); /* * A2 = A2 - B*T*V -> B = B*T, A2 = A2 - B*V' */ cblas_strmm( CblasColMajor, CblasRight, CblasUpper, (CBLAS_TRANSPOSE)trans, CblasNonUnit, M1, K, (zone), T, LDT, WORK, LDWORK); cblas_sgemm( CblasColMajor, CblasNoTrans, CblasNoTrans, M2, N2, K, (mzone), WORK, LDWORK, V, LDV, (zone), A2, LDA2); /* * A1 = A1 - B */ for(j = 0; j < K; j++) { cblas_saxpy( M1, (mzone), &WORK[LDWORK*j], 1, &A1[LDA1*j], 1); } } } else { coreblas_error(3, "Not implemented (RowMajor / Backward / Left or Right)"); return PLASMA_ERR_NOT_SUPPORTED; } } return PLASMA_SUCCESS; }