示例#1
0
/*------------------------------------------------------------------------
 *  Check the factorization of the matrix A2
 */
static int check_factorization(int N, float *A1, float *A2, int LDA, int uplo, float eps)
{
    float alpha = 1.0;
    float Anorm, Rnorm, result;
    int info_factorization;
    int i,j;

    float *Residual = (float *)malloc(N*N*sizeof(float));
    float *L1       = (float *)malloc(N*N*sizeof(float));
    float *L2       = (float *)malloc(N*N*sizeof(float));
    float *work                 = (float *)malloc(N*sizeof(float));

    memset((void*)L1, 0, N*N*sizeof(float));
    memset((void*)L2, 0, N*N*sizeof(float));

    LAPACKE_slacpy_work(LAPACK_COL_MAJOR,' ', N, N, A1, LDA, Residual, N);

    /* Dealing with L'L or U'U  */
    LAPACKE_slacpy_work(LAPACK_COL_MAJOR, lapack_const(uplo), N, N, A2, LDA, L1, N);
    LAPACKE_slacpy_work(LAPACK_COL_MAJOR, lapack_const(uplo), N, N, A2, LDA, L2, N);
    if (uplo == PlasmaUpper)
       cblas_strmm(CblasColMajor, CblasLeft, (CBLAS_UPLO)uplo, CblasTrans, CblasNonUnit, N, N, (alpha), L1, N, L2, N);
    else
       cblas_strmm(CblasColMajor, CblasRight, (CBLAS_UPLO)uplo, CblasTrans, CblasNonUnit, N, N, (alpha), L1, N, L2, N);

    /* Compute the Residual || A - L'L|| */
    for (i = 0; i < N; i++)
        for (j = 0; j < N; j++)
           Residual[j*N+i] = L2[j*N+i] - Residual[j*N+i];

    Rnorm = LAPACKE_slange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, Residual, N,   work);
    Anorm = LAPACKE_slange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, A1,       LDA, work);

    result = Rnorm / ( Anorm * N * eps );
    printf("============\n");
    printf("Checking the Cholesky Factorization \n");
    printf("-- ||L'L-A||_oo/(||A||_oo.N.eps) = %e \n", result);

    if ( isnan(result) || isinf(result) || (result > 60.0) ){
        printf("-- Factorization is suspicious ! \n");
        info_factorization = 1;
    }
    else{
        printf("-- Factorization is CORRECT ! \n");
        info_factorization = 0;
    }

    free(Residual); free(L1); free(L2); free(work);

    return info_factorization;
}
示例#2
0
void F77_strmm(int *order, char *rtlf, char *uplow, char *transp, char *diagn,
              int *m, int *n, float *alpha, float *a, int *lda, float *b, 
              int *ldb) {
  int i,j,LDA,LDB;
  float *A, *B;
  enum CBLAS_SIDE side;
  enum CBLAS_DIAG diag;
  enum CBLAS_UPLO uplo;
  enum CBLAS_TRANSPOSE trans;

  get_uplo_type(uplow,&uplo);
  get_transpose_type(transp,&trans);
  get_diag_type(diagn,&diag);
  get_side_type(rtlf,&side);

  if (*order == TEST_ROW_MJR) {
     if (side == CblasLeft) {
        LDA = *m+1;
        A   = ( float* )malloc( (*m)*LDA*sizeof( float ) );
        for( i=0; i<*m; i++ )
           for( j=0; j<*m; j++ )
              A[i*LDA+j]=a[j*(*lda)+i];
     }
     else{
        LDA = *n+1;
        A   = ( float* )malloc( (*n)*LDA*sizeof( float ) );
        for( i=0; i<*n; i++ )
           for( j=0; j<*n; j++ )
              A[i*LDA+j]=a[j*(*lda)+i];
     }
     LDB = *n+1;
     B   = ( float* )malloc( (*m)*LDB*sizeof( float ) );
     for( i=0; i<*m; i++ )
        for( j=0; j<*n; j++ )
           B[i*LDB+j]=b[j*(*ldb)+i];
     cblas_strmm(CblasRowMajor, side, uplo, trans, diag, *m, *n, *alpha, 
		 A, LDA, B, LDB );
     for( j=0; j<*n; j++ )
        for( i=0; i<*m; i++ )
           b[j*(*ldb)+i]=B[i*LDB+j];
     free(A);
     free(B);
  }
  else if (*order == TEST_COL_MJR)
     cblas_strmm(CblasColMajor, side, uplo, trans, diag, *m, *n, *alpha, 
		   a, *lda, b, *ldb);
  else
     cblas_strmm(UNDEFINED, side, uplo, trans, diag, *m, *n, *alpha, 
		   a, *lda, b, *ldb);
}
void STARPU_STRMM(const char *side, const char *uplo, const char *transA,
                 const char *diag, const int m, const int n,
                 const float alpha, const float *A, const int lda,
                 float *B, const int ldb)
{
	enum CBLAS_SIDE side_ = (toupper(side[0]) == 'L')?CblasLeft:CblasRight;
	enum CBLAS_UPLO uplo_ = (toupper(uplo[0]) == 'U')?CblasUpper:CblasLower;
	enum CBLAS_TRANSPOSE transA_ = (toupper(transA[0]) == 'N')?CblasNoTrans:CblasTrans;
	enum CBLAS_DIAG diag_ = (toupper(diag[0]) == 'N')?CblasNonUnit:CblasUnit;

	cblas_strmm(CblasColMajor, side_, uplo_, transA_, diag_, m, n, alpha, A, lda, B, ldb);
}
示例#4
0
JNIEXPORT void JNICALL Java_uncomplicate_neanderthal_CBLAS_strmm
(JNIEnv *env, jclass clazz,
 jint Order, jint Side,
 jint Uplo, jint TransA, jint Diag,
 jint M, jint N,
 jfloat alpha,
 jobject A, jint lda,
 jobject B, jint ldb) {

  float *cA = (float *) (*env)->GetDirectBufferAddress(env, A);
  float *cB = (float *) (*env)->GetDirectBufferAddress(env, B);
  cblas_strmm(Order, Side, Uplo, TransA, Diag, M, N, alpha, cA, lda, cB, ldb);
};
示例#5
0
文件: trmm.hpp 项目: Blonder/Shark
inline void trmm(
	CBLAS_ORDER const order,
	CBLAS_SIDE const side,
	CBLAS_UPLO const uplo,
	CBLAS_TRANSPOSE const transA,
	CBLAS_DIAG const unit,
	int const M,
	int const N,
	float const *A, int const lda,
        float* B, int const incB
) {
	cblas_strmm(order, side, uplo, transA, unit, M, N, 
		1.0,
		A, lda,
	        B, incB
	);
}
示例#6
0
static int check_transformation(int itype, int uplo, int N, float *A1, float *A2, int LDA, float *B2, int LDB, float eps)
{
    float alpha = 1.0;
    float Anorm, Rnorm, result;
    int info_transformation;
    int i, j;
    char *str;

    float *Residual = (float *)malloc(N*N*sizeof(float));
    float *Aorig    = (float *)malloc(N*N*sizeof(float));
    float *work                 = (float *)malloc(N*sizeof(float));

    LAPACKE_slacpy_work(LAPACK_COL_MAJOR, 'a',                N, N, A1, LDA, Residual, N);
    LAPACKE_slacpy_work(LAPACK_COL_MAJOR, lapack_const(uplo), N, N, A2, LDA, Aorig,    N);
    
    /* Rebuild the symmetry of A2 */
    if (uplo == PlasmaLower) {
        for (j = 0; j < N; j++)
            for (i = j+1; i < N; i++)
                Aorig[j+i*N] = (Aorig[i+j*N]); 
    } else {
        for (i = 0; i < N; i++)
            for (j = i+1; j < N; j++)
                Aorig[j+i*N] = (Aorig[i+j*N]); 
    }

    if (itype == 1) {
        if (uplo == PlasmaLower) {
            str = "L*A2*L'";
            cblas_strmm(CblasColMajor, CblasLeft,  CblasLower, CblasNoTrans,   CblasNonUnit, N, N, (alpha), B2, LDB, Aorig, N);   
            cblas_strmm(CblasColMajor, CblasRight, CblasLower, CblasTrans, CblasNonUnit, N, N, (alpha), B2, LDB, Aorig, N);   
        }
        else{
            str = "U'*A2*U";
            cblas_strmm(CblasColMajor, CblasLeft,  CblasUpper, CblasTrans, CblasNonUnit, N, N, (alpha), B2, LDB, Aorig, N);   
            cblas_strmm(CblasColMajor, CblasRight, CblasUpper, CblasNoTrans,   CblasNonUnit, N, N, (alpha), B2, LDB, Aorig, N);   
        }
    }
    else {
        if (uplo == PlasmaLower) {
            str = "inv(L')*A2*inv(L)";
            cblas_strsm(CblasColMajor, CblasLeft,  CblasLower, CblasTrans, CblasNonUnit, N, N, (alpha), B2, LDB, Aorig, N);   
            cblas_strsm(CblasColMajor, CblasRight, CblasLower, CblasNoTrans,   CblasNonUnit, N, N, (alpha), B2, LDB, Aorig, N);   
        }
        else{
            str = "inv(U)*A2*inv(U')";
            cblas_strsm(CblasColMajor, CblasLeft,  CblasUpper, CblasNoTrans,   CblasNonUnit, N, N, (alpha), B2, LDB, Aorig, N);   
            cblas_strsm(CblasColMajor, CblasRight, CblasUpper, CblasTrans, CblasNonUnit, N, N, (alpha), B2, LDB, Aorig, N);   
            
        }
    }
    
    /* Compute the Residual ( A1 - W ) */
    for (i = 0; i < N; i++)
        for (j = 0; j < N; j++)
            Residual[j*N+i] = Aorig[j*N+i] - Residual[j*N+i];
    
    Rnorm = LAPACKE_slange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, Residual, N,   work);
    Anorm = LAPACKE_slange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, A1,       LDA, work);
    
    result = Rnorm / (Anorm * N *eps);
    printf("============\n");
    printf("Checking the global transformation \n");
    printf("-- ||A1-%s||_oo/(||A1||_oo.N.eps) = %e \n", str, result );

    if (isnan(result) || isinf(result) || (result > 60.0) ) {
        printf("-- Transformation is suspicious ! \n");
        info_transformation = 1;
    }
    else {
        printf("-- Transformation is CORRECT ! \n");
        info_transformation = 0;
    }

    free(Residual); free(Aorig); free(work);

    return info_transformation;
}
示例#7
0
int CORE_stsrfb(int side, int trans, int direct, int storev,
                int M1, int N1, int M2, int N2, int K,
                float *A1, int LDA1,
                float *A2, int LDA2,
                float *V, int LDV,
                float *T, int LDT,
                float *WORK, int LDWORK)
{
    static float zone  =  1.0;
    static float mzone = -1.0;

    int j;

    /* Check input arguments */
    if (M1 < 0) {
        coreblas_error(5, "Illegal value of M1");
        return -5;
    }
    if (N1 < 0) {
        coreblas_error(6, "Illegal value of N1");
        return -6;
    }
    if ( (M2 < 0) || 
         ( (M2 != M1) && (side == PlasmaRight) ) ){
        coreblas_error(7, "Illegal value of M2");
        return -7;
    }
    if ( (N2 < 0) || 
         ( (N2 != N1) && (side == PlasmaLeft) ) ){
        coreblas_error(8, "Illegal value of N2");
        return -8;
    }
    if (K < 0) {
        coreblas_error(9, "Illegal value of K");
        return -9;
    }

    /* Quick return */
    if ((M1 == 0) || (N1 == 0) || (M2 == 0) || (N2 == 0) || (K == 0))
        return PLASMA_SUCCESS;

    if (storev == PlasmaColumnwise) {
        if (direct == PlasmaForward) {
            if (side == PlasmaLeft) {
                /*
                 * B = A1 + V' * A2
                 */
                LAPACKE_slacpy_work(LAPACK_COL_MAJOR,
                    lapack_const(PlasmaUpperLower),
                    K, N1,
                    A1, LDA1, WORK, LDWORK);

                cblas_sgemm(
                    CblasColMajor, CblasTrans, CblasNoTrans,
                    K, N2, M2,
                    (zone), V, LDV,
                    A2, LDA2,
                    (zone), WORK, LDWORK);
                /*
                 * A2 = A2 - V*T*B -> B = T*B, A2 = A2 - V*B
                 */
                cblas_strmm(
                    CblasColMajor, CblasLeft, CblasUpper,
                    (CBLAS_TRANSPOSE)trans, CblasNonUnit, K, N2,
                    (zone), T, LDT, WORK, LDWORK);

                cblas_sgemm(
                    CblasColMajor, CblasNoTrans, CblasNoTrans,
                    M2, N2, K,
                    (mzone), V, LDV,
                    WORK, LDWORK,
                    (zone), A2, LDA2);
                /*
                 * A1 = A1 - B
                 */
                for(j = 0; j < N1; j++) {
                    cblas_saxpy(
                        K, (mzone),
                        &WORK[LDWORK*j], 1,
                        &A1[LDA1*j], 1);
                }
            }
            /*
             * Columnwise / Forward / Right
             */
            else {
                /*
                 * B = A1 + A2 * V
                 */
                LAPACKE_slacpy_work(LAPACK_COL_MAJOR,
                    lapack_const(PlasmaUpperLower),
                    M1, K,
                    A1, LDA1, WORK, LDWORK);

                cblas_sgemm(
                    CblasColMajor, CblasNoTrans, CblasNoTrans,
                    M2, K, N2,
                    (zone), A2, LDA2,
                    V, LDV,
                    (zone), WORK, LDWORK);
                /*
                 * A2 = A2 - B*T*V' -> B = B*T, A2 = A2 - B*V'
                 */
                cblas_strmm(
                    CblasColMajor, CblasRight, CblasUpper,
                    (CBLAS_TRANSPOSE)trans, CblasNonUnit, M1, K,
                    (zone), T, LDT, WORK, LDWORK);

                cblas_sgemm(
                    CblasColMajor, CblasNoTrans, CblasTrans,
                    M2, N2, K,
                    (mzone), WORK, LDWORK,
                    V, LDV,
                    (zone), A2, LDA2);
                /*
                 * A1 = A1 - B
                 */
                for(j = 0; j < K; j++) {
                    cblas_saxpy(
                        M1, (mzone),
                        &WORK[LDWORK*j], 1,
                        &A1[LDA1*j], 1);
                }
            }
        }
        else {
            coreblas_error(3, "Not implemented (ColMajor / Backward / Left or Right)");
            return PLASMA_ERR_NOT_SUPPORTED;
        }
    }
    else {
        if (direct == PlasmaForward) {
            /*
             * Rowwise / Forward / Left
             */
            if (side == PlasmaLeft) {
                /*
                 * B = A1 + V * A2
                 */
                LAPACKE_slacpy_work(LAPACK_COL_MAJOR,
                    lapack_const(PlasmaUpperLower),
                    K, N1,
                    A1, LDA1, WORK, LDWORK);

                cblas_sgemm(
                    CblasColMajor, CblasNoTrans, CblasNoTrans,
                    K, N2, M2,
                    (zone), V, LDV,
                    A2, LDA2,
                    (zone), WORK, LDWORK);
                /*
                 * A2 = A2 - V'*T*B -> B = T*B, A2 = A2 - V'*B
                 */
                cblas_strmm(
                    CblasColMajor, CblasLeft, CblasUpper,
                    (CBLAS_TRANSPOSE)trans, CblasNonUnit, K, N2,
                    (zone), T, LDT, WORK, LDWORK);

                cblas_sgemm(
                    CblasColMajor, CblasTrans, CblasNoTrans,
                    M2, N2, K,
                    (mzone), V, LDV,
                    WORK, LDWORK,
                    (zone), A2, LDA2);
                /*
                 * A1 = A1 - B
                 */
                for(j=0; j<N1; j++) {
                    cblas_saxpy(
                        K, (mzone),
                        &WORK[LDWORK*j], 1,
                        &A1[LDA1*j], 1);
                }
            }
            /*
             * Rowwise / Forward / Right
             */
            else {
                /*
                 * B = A1 + A2 * V'
                 */
                LAPACKE_slacpy_work(LAPACK_COL_MAJOR,
                    lapack_const(PlasmaUpperLower),
                    M1, K,
                    A1, LDA1, WORK, LDWORK);

                cblas_sgemm(
                    CblasColMajor, CblasNoTrans, CblasTrans,
                    M2, K, N2,
                    (zone), A2, LDA2,
                    V, LDV,
                    (zone), WORK, LDWORK);
                /*
                 * A2 = A2 - B*T*V -> B = B*T, A2 = A2 - B*V'
                 */
                cblas_strmm(
                    CblasColMajor, CblasRight, CblasUpper,
                    (CBLAS_TRANSPOSE)trans, CblasNonUnit, M1, K,
                    (zone), T, LDT, WORK, LDWORK);

                cblas_sgemm(
                    CblasColMajor, CblasNoTrans, CblasNoTrans,
                    M2, N2, K,
                    (mzone), WORK, LDWORK,
                    V, LDV,
                    (zone), A2, LDA2);
                /*
                 * A1 = A1 - B
                 */
                for(j = 0; j < K; j++) {
                    cblas_saxpy(
                        M1, (mzone),
                        &WORK[LDWORK*j], 1,
                        &A1[LDA1*j], 1);
                }
            }
        }
        else {
            coreblas_error(3, "Not implemented (RowMajor / Backward / Left or Right)");
            return PLASMA_ERR_NOT_SUPPORTED;
        }
    }
    return PLASMA_SUCCESS;
}