예제 #1
0
/*------------------------------------------------------------------------
 *  Check the factorization of the matrix A2
 */
static int check_factorization(int N, float *A1, float *A2, int LDA, int uplo, float eps)
{
    float alpha = 1.0;
    float Anorm, Rnorm, result;
    int info_factorization;
    int i,j;

    float *Residual = (float *)malloc(N*N*sizeof(float));
    float *L1       = (float *)malloc(N*N*sizeof(float));
    float *L2       = (float *)malloc(N*N*sizeof(float));
    float *work                 = (float *)malloc(N*sizeof(float));

    memset((void*)L1, 0, N*N*sizeof(float));
    memset((void*)L2, 0, N*N*sizeof(float));

    LAPACKE_slacpy_work(LAPACK_COL_MAJOR,' ', N, N, A1, LDA, Residual, N);

    /* Dealing with L'L or U'U  */
    LAPACKE_slacpy_work(LAPACK_COL_MAJOR, lapack_const(uplo), N, N, A2, LDA, L1, N);
    LAPACKE_slacpy_work(LAPACK_COL_MAJOR, lapack_const(uplo), N, N, A2, LDA, L2, N);
    if (uplo == PlasmaUpper)
       cblas_strmm(CblasColMajor, CblasLeft, (CBLAS_UPLO)uplo, CblasTrans, CblasNonUnit, N, N, (alpha), L1, N, L2, N);
    else
       cblas_strmm(CblasColMajor, CblasRight, (CBLAS_UPLO)uplo, CblasTrans, CblasNonUnit, N, N, (alpha), L1, N, L2, N);

    /* Compute the Residual || A - L'L|| */
    for (i = 0; i < N; i++)
        for (j = 0; j < N; j++)
           Residual[j*N+i] = L2[j*N+i] - Residual[j*N+i];

    Rnorm = LAPACKE_slange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, Residual, N,   work);
    Anorm = LAPACKE_slange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, A1,       LDA, work);

    result = Rnorm / ( Anorm * N * eps );
    printf("============\n");
    printf("Checking the Cholesky Factorization \n");
    printf("-- ||L'L-A||_oo/(||A||_oo.N.eps) = %e \n", result);

    if ( isnan(result) || isinf(result) || (result > 60.0) ){
        printf("-- Factorization is suspicious ! \n");
        info_factorization = 1;
    }
    else{
        printf("-- Factorization is CORRECT ! \n");
        info_factorization = 0;
    }

    free(Residual); free(L1); free(L2); free(work);

    return info_factorization;
}
lapack_int LAPACKE_slacpy( int matrix_layout, char uplo, lapack_int m,
                           lapack_int n, const float* a, lapack_int lda,
                           float* b, lapack_int ldb )
{
    if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) {
        LAPACKE_xerbla( "LAPACKE_slacpy", -1 );
        return -1;
    }
#ifndef LAPACK_DISABLE_NAN_CHECK
    /* Optionally check input matrices for NaNs */
    if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) {
        return -5;
    }
#endif
    return LAPACKE_slacpy_work( matrix_layout, uplo, m, n, a, lda, b, ldb );
}
예제 #3
0
static int
RunTest(int *iparam, float *dparam, real_Double_t *t_) 
{
    float *A, *Acpy = NULL, *b, *x;
    real_Double_t       t;
    int                *piv;
    int m     = iparam[TIMING_M];
    int n     = iparam[TIMING_N];
    int nrhs  = iparam[TIMING_NRHS];
    int check = iparam[TIMING_CHECK];
    int lda   = m;
    int ldb   = m;

    /* Allocate Data */
    A   = (float *)malloc(lda*n*sizeof(float));
    piv = (int *)malloc( min(m, n) * sizeof(int));
    
    /* Check if unable to allocate memory */
    if ( !A || !piv ){
        printf("Out of Memory \n ");
        return -1;
    }
    
    /* Initialize Plasma */ 
    PLASMA_Init( iparam[TIMING_THRDNBR] );
    if ( iparam[TIMING_SCHEDULER] )
        PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_DYNAMIC_SCHEDULING );
    else
        PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_STATIC_SCHEDULING );
    
    /*if ( !iparam[TIMING_AUTOTUNING] ) {*/
    PLASMA_Disable(PLASMA_AUTOTUNING);
    PLASMA_Set(PLASMA_TILE_SIZE,        iparam[TIMING_NB] );
    PLASMA_Set(PLASMA_INNER_BLOCK_SIZE, iparam[TIMING_IB] );
    /* } else { */
    /*     PLASMA_Get(PLASMA_TILE_SIZE,        &iparam[TIMING_NB] ); */
    /*     PLASMA_Get(PLASMA_INNER_BLOCK_SIZE, &iparam[TIMING_IB] ); */
    /* }  */
    
    /* Initialize Data */
    /*LAPACKE_slarnv_work(1, ISEED, n*lda, A);*/
    PLASMA_splrnt(m, n, A, lda, 3456);

    /* Save AT in lapack layout for check */
    if ( check && (m == n) ) {
        Acpy = (float *)malloc(lda*n*sizeof(float));
        LAPACKE_slacpy_work(LAPACK_COL_MAJOR, 'A', m, n, A, lda, Acpy, lda);
    }

    t = -cWtime();
    PLASMA_sgetrf( m, n, A, lda, piv );
    t += cWtime();
    *t_ = t;
    
    /* Check the solution */
    if ( check && (m == n) )
      {
        b  = (float *)malloc(ldb*nrhs *sizeof(float));
        x  = (float *)malloc(ldb*nrhs *sizeof(float));

        LAPACKE_slarnv_work(1, ISEED, ldb*nrhs, x);
        LAPACKE_slacpy_work(LAPACK_COL_MAJOR, 'A', n, nrhs, x, ldb, b, ldb);

        PLASMA_sgetrs( PlasmaNoTrans, n, nrhs, A, lda, piv, x, ldb );

        dparam[TIMING_RES] = s_check_solution(m, n, nrhs, Acpy, lda, b, x, ldb,
                                              &(dparam[TIMING_ANORM]), &(dparam[TIMING_BNORM]), 
                                              &(dparam[TIMING_XNORM]));

        free( Acpy ); free( b ); free( x );
      }

    free( A );
    free( piv );
    PLASMA_Finalize();

    return 0;
}
예제 #4
0
int testing_ssygst(int argc, char **argv)
{
    /* Check for number of arguments*/
    if (argc != 3) {
        USAGE("HEGST", "N LDA LDB",
              "   - N    : size of the matrices A and B\n"
              "   - LDA  : leading dimension of the matrix A\n"
              "   - LDB  : leading dimension of the matrix B\n");
        return -1;
    }

    float eps = LAPACKE_slamch_work('e');
    int    N     = atoi(argv[0]);
    int    LDA   = atoi(argv[1]);
    int    LDB   = atoi(argv[2]);
    int    info_transformation, info_factorization;
    int    i, u;
    int    LDAxN = LDA*N;
    int    LDBxN = LDB*N;

    float *A1    = (float *)malloc(LDAxN*sizeof(float));
    float *A2    = (float *)malloc(LDAxN*sizeof(float));
    float *B1    = (float *)malloc(LDBxN*sizeof(float));
    float *B2    = (float *)malloc(LDBxN*sizeof(float));
    float *Ainit = (float *)malloc(LDAxN*sizeof(float));
    float *Binit = (float *)malloc(LDBxN*sizeof(float));

    /* Check if unable to allocate memory */
    if ((!A1)||(!A2)||(!B1)||(!B2)||(!Ainit)||(!Binit)){
        printf("Out of Memory \n ");
        return -2;
    }

    /*----------------------------------------------------------
    *  TESTING SSYGST
    */

    /* Initialize A1 and A2 */
    PLASMA_splgsy(0., N, A1, LDA, 5198);
    LAPACKE_slacpy_work(LAPACK_COL_MAJOR, 'A', N, N, A1, LDA, Ainit, LDA);

    /* Initialize B1 and B2 */
    PLASMA_splgsy((float)N, N, B1, LDB, 4231);
    LAPACKE_slacpy_work(LAPACK_COL_MAJOR, 'A', N, N, B1, LDB, Binit, LDB);

    printf("\n");
    printf("------ TESTS FOR PLASMA SSYGST ROUTINE -------  \n");
    printf("        Size of the Matrix %d by %d\n", N, N);
    printf("\n");
    printf(" The matrices A and B are randomly generated for each test.\n");
    printf("============\n");
    printf(" The relative machine precision (eps) is to be %e \n",eps);
    printf(" Computational tests pass if scaled residuals are less than 60.\n");

    /*----------------------------------------------------------
     *  TESTING SSYGST
     */

    for (i=0; i<3; i++) {
        for (u=0; u<2; u++) {
            memcpy(A2, Ainit, LDAxN*sizeof(float));
            memcpy(B2, Binit, LDBxN*sizeof(float));

            PLASMA_spotrf(uplo[u], N, B2, LDB);
            PLASMA_ssygst(itype[i], uplo[u], N, A2, LDA, B2, LDB);
        
            /* Check the Cholesky factorization and the transformation */
            info_factorization = check_factorization(N, B1, B2, LDB, uplo[u], eps);
            info_transformation = check_transformation(itype[i], uplo[u], N, A1, A2, LDA, B2, LDB, eps);
        
            if ( (info_transformation == 0) && (info_factorization == 0) ) {
                printf("***************************************************\n");
                printf(" ---- TESTING SSYGST (%s, %s) ....... PASSED !\n", itypestr[i], uplostr[u]);
                printf("***************************************************\n");
            }
            else {
                printf("************************************************\n");
                printf(" - TESTING SSYGST (%s, %s) ... FAILED !\n", itypestr[i], uplostr[u]);
                printf("************************************************\n");
            }
        }
    }
        
    free(A1); 
    free(A2); 
    free(B1); 
    free(B2);
    free(Ainit); 
    free(Binit);

    return 0;
}
예제 #5
0
static int check_transformation(int itype, int uplo, int N, float *A1, float *A2, int LDA, float *B2, int LDB, float eps)
{
    float alpha = 1.0;
    float Anorm, Rnorm, result;
    int info_transformation;
    int i, j;
    char *str;

    float *Residual = (float *)malloc(N*N*sizeof(float));
    float *Aorig    = (float *)malloc(N*N*sizeof(float));
    float *work                 = (float *)malloc(N*sizeof(float));

    LAPACKE_slacpy_work(LAPACK_COL_MAJOR, 'a',                N, N, A1, LDA, Residual, N);
    LAPACKE_slacpy_work(LAPACK_COL_MAJOR, lapack_const(uplo), N, N, A2, LDA, Aorig,    N);
    
    /* Rebuild the symmetry of A2 */
    if (uplo == PlasmaLower) {
        for (j = 0; j < N; j++)
            for (i = j+1; i < N; i++)
                Aorig[j+i*N] = (Aorig[i+j*N]); 
    } else {
        for (i = 0; i < N; i++)
            for (j = i+1; j < N; j++)
                Aorig[j+i*N] = (Aorig[i+j*N]); 
    }

    if (itype == 1) {
        if (uplo == PlasmaLower) {
            str = "L*A2*L'";
            cblas_strmm(CblasColMajor, CblasLeft,  CblasLower, CblasNoTrans,   CblasNonUnit, N, N, (alpha), B2, LDB, Aorig, N);   
            cblas_strmm(CblasColMajor, CblasRight, CblasLower, CblasTrans, CblasNonUnit, N, N, (alpha), B2, LDB, Aorig, N);   
        }
        else{
            str = "U'*A2*U";
            cblas_strmm(CblasColMajor, CblasLeft,  CblasUpper, CblasTrans, CblasNonUnit, N, N, (alpha), B2, LDB, Aorig, N);   
            cblas_strmm(CblasColMajor, CblasRight, CblasUpper, CblasNoTrans,   CblasNonUnit, N, N, (alpha), B2, LDB, Aorig, N);   
        }
    }
    else {
        if (uplo == PlasmaLower) {
            str = "inv(L')*A2*inv(L)";
            cblas_strsm(CblasColMajor, CblasLeft,  CblasLower, CblasTrans, CblasNonUnit, N, N, (alpha), B2, LDB, Aorig, N);   
            cblas_strsm(CblasColMajor, CblasRight, CblasLower, CblasNoTrans,   CblasNonUnit, N, N, (alpha), B2, LDB, Aorig, N);   
        }
        else{
            str = "inv(U)*A2*inv(U')";
            cblas_strsm(CblasColMajor, CblasLeft,  CblasUpper, CblasNoTrans,   CblasNonUnit, N, N, (alpha), B2, LDB, Aorig, N);   
            cblas_strsm(CblasColMajor, CblasRight, CblasUpper, CblasTrans, CblasNonUnit, N, N, (alpha), B2, LDB, Aorig, N);   
            
        }
    }
    
    /* Compute the Residual ( A1 - W ) */
    for (i = 0; i < N; i++)
        for (j = 0; j < N; j++)
            Residual[j*N+i] = Aorig[j*N+i] - Residual[j*N+i];
    
    Rnorm = LAPACKE_slange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, Residual, N,   work);
    Anorm = LAPACKE_slange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, A1,       LDA, work);
    
    result = Rnorm / (Anorm * N *eps);
    printf("============\n");
    printf("Checking the global transformation \n");
    printf("-- ||A1-%s||_oo/(||A1||_oo.N.eps) = %e \n", str, result );

    if (isnan(result) || isinf(result) || (result > 60.0) ) {
        printf("-- Transformation is suspicious ! \n");
        info_transformation = 1;
    }
    else {
        printf("-- Transformation is CORRECT ! \n");
        info_transformation = 0;
    }

    free(Residual); free(Aorig); free(work);

    return info_transformation;
}
예제 #6
0
int CORE_stsrfb(int side, int trans, int direct, int storev,
                int M1, int N1, int M2, int N2, int K,
                float *A1, int LDA1,
                float *A2, int LDA2,
                float *V, int LDV,
                float *T, int LDT,
                float *WORK, int LDWORK)
{
    static float zone  =  1.0;
    static float mzone = -1.0;

    int j;

    /* Check input arguments */
    if (M1 < 0) {
        coreblas_error(5, "Illegal value of M1");
        return -5;
    }
    if (N1 < 0) {
        coreblas_error(6, "Illegal value of N1");
        return -6;
    }
    if ( (M2 < 0) || 
         ( (M2 != M1) && (side == PlasmaRight) ) ){
        coreblas_error(7, "Illegal value of M2");
        return -7;
    }
    if ( (N2 < 0) || 
         ( (N2 != N1) && (side == PlasmaLeft) ) ){
        coreblas_error(8, "Illegal value of N2");
        return -8;
    }
    if (K < 0) {
        coreblas_error(9, "Illegal value of K");
        return -9;
    }

    /* Quick return */
    if ((M1 == 0) || (N1 == 0) || (M2 == 0) || (N2 == 0) || (K == 0))
        return PLASMA_SUCCESS;

    if (storev == PlasmaColumnwise) {
        if (direct == PlasmaForward) {
            if (side == PlasmaLeft) {
                /*
                 * B = A1 + V' * A2
                 */
                LAPACKE_slacpy_work(LAPACK_COL_MAJOR,
                    lapack_const(PlasmaUpperLower),
                    K, N1,
                    A1, LDA1, WORK, LDWORK);

                cblas_sgemm(
                    CblasColMajor, CblasTrans, CblasNoTrans,
                    K, N2, M2,
                    (zone), V, LDV,
                    A2, LDA2,
                    (zone), WORK, LDWORK);
                /*
                 * A2 = A2 - V*T*B -> B = T*B, A2 = A2 - V*B
                 */
                cblas_strmm(
                    CblasColMajor, CblasLeft, CblasUpper,
                    (CBLAS_TRANSPOSE)trans, CblasNonUnit, K, N2,
                    (zone), T, LDT, WORK, LDWORK);

                cblas_sgemm(
                    CblasColMajor, CblasNoTrans, CblasNoTrans,
                    M2, N2, K,
                    (mzone), V, LDV,
                    WORK, LDWORK,
                    (zone), A2, LDA2);
                /*
                 * A1 = A1 - B
                 */
                for(j = 0; j < N1; j++) {
                    cblas_saxpy(
                        K, (mzone),
                        &WORK[LDWORK*j], 1,
                        &A1[LDA1*j], 1);
                }
            }
            /*
             * Columnwise / Forward / Right
             */
            else {
                /*
                 * B = A1 + A2 * V
                 */
                LAPACKE_slacpy_work(LAPACK_COL_MAJOR,
                    lapack_const(PlasmaUpperLower),
                    M1, K,
                    A1, LDA1, WORK, LDWORK);

                cblas_sgemm(
                    CblasColMajor, CblasNoTrans, CblasNoTrans,
                    M2, K, N2,
                    (zone), A2, LDA2,
                    V, LDV,
                    (zone), WORK, LDWORK);
                /*
                 * A2 = A2 - B*T*V' -> B = B*T, A2 = A2 - B*V'
                 */
                cblas_strmm(
                    CblasColMajor, CblasRight, CblasUpper,
                    (CBLAS_TRANSPOSE)trans, CblasNonUnit, M1, K,
                    (zone), T, LDT, WORK, LDWORK);

                cblas_sgemm(
                    CblasColMajor, CblasNoTrans, CblasTrans,
                    M2, N2, K,
                    (mzone), WORK, LDWORK,
                    V, LDV,
                    (zone), A2, LDA2);
                /*
                 * A1 = A1 - B
                 */
                for(j = 0; j < K; j++) {
                    cblas_saxpy(
                        M1, (mzone),
                        &WORK[LDWORK*j], 1,
                        &A1[LDA1*j], 1);
                }
            }
        }
        else {
            coreblas_error(3, "Not implemented (ColMajor / Backward / Left or Right)");
            return PLASMA_ERR_NOT_SUPPORTED;
        }
    }
    else {
        if (direct == PlasmaForward) {
            /*
             * Rowwise / Forward / Left
             */
            if (side == PlasmaLeft) {
                /*
                 * B = A1 + V * A2
                 */
                LAPACKE_slacpy_work(LAPACK_COL_MAJOR,
                    lapack_const(PlasmaUpperLower),
                    K, N1,
                    A1, LDA1, WORK, LDWORK);

                cblas_sgemm(
                    CblasColMajor, CblasNoTrans, CblasNoTrans,
                    K, N2, M2,
                    (zone), V, LDV,
                    A2, LDA2,
                    (zone), WORK, LDWORK);
                /*
                 * A2 = A2 - V'*T*B -> B = T*B, A2 = A2 - V'*B
                 */
                cblas_strmm(
                    CblasColMajor, CblasLeft, CblasUpper,
                    (CBLAS_TRANSPOSE)trans, CblasNonUnit, K, N2,
                    (zone), T, LDT, WORK, LDWORK);

                cblas_sgemm(
                    CblasColMajor, CblasTrans, CblasNoTrans,
                    M2, N2, K,
                    (mzone), V, LDV,
                    WORK, LDWORK,
                    (zone), A2, LDA2);
                /*
                 * A1 = A1 - B
                 */
                for(j=0; j<N1; j++) {
                    cblas_saxpy(
                        K, (mzone),
                        &WORK[LDWORK*j], 1,
                        &A1[LDA1*j], 1);
                }
            }
            /*
             * Rowwise / Forward / Right
             */
            else {
                /*
                 * B = A1 + A2 * V'
                 */
                LAPACKE_slacpy_work(LAPACK_COL_MAJOR,
                    lapack_const(PlasmaUpperLower),
                    M1, K,
                    A1, LDA1, WORK, LDWORK);

                cblas_sgemm(
                    CblasColMajor, CblasNoTrans, CblasTrans,
                    M2, K, N2,
                    (zone), A2, LDA2,
                    V, LDV,
                    (zone), WORK, LDWORK);
                /*
                 * A2 = A2 - B*T*V -> B = B*T, A2 = A2 - B*V'
                 */
                cblas_strmm(
                    CblasColMajor, CblasRight, CblasUpper,
                    (CBLAS_TRANSPOSE)trans, CblasNonUnit, M1, K,
                    (zone), T, LDT, WORK, LDWORK);

                cblas_sgemm(
                    CblasColMajor, CblasNoTrans, CblasNoTrans,
                    M2, N2, K,
                    (mzone), WORK, LDWORK,
                    V, LDV,
                    (zone), A2, LDA2);
                /*
                 * A1 = A1 - B
                 */
                for(j = 0; j < K; j++) {
                    cblas_saxpy(
                        M1, (mzone),
                        &WORK[LDWORK*j], 1,
                        &A1[LDA1*j], 1);
                }
            }
        }
        else {
            coreblas_error(3, "Not implemented (RowMajor / Backward / Left or Right)");
            return PLASMA_ERR_NOT_SUPPORTED;
        }
    }
    return PLASMA_SUCCESS;
}