lapack_int LAPACKE_dlacpy( int matrix_layout, char uplo, lapack_int m,
                           lapack_int n, const double* a, lapack_int lda,
                           double* b, lapack_int ldb )
{
    if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) {
        LAPACKE_xerbla( "LAPACKE_dlacpy", -1 );
        return -1;
    }
#ifndef LAPACK_DISABLE_NAN_CHECK
    if( LAPACKE_get_nancheck() ) {
        /* Optionally check input matrices for NaNs */
        if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) {
            return -5;
        }
    }
#endif
    return LAPACKE_dlacpy_work( matrix_layout, uplo, m, n, a, lda, b, ldb );
}
int check_factorization(int M, int N, double *A1, double *A2, int LDA, double *Q)
{
    double Anorm, Rnorm;
    double alpha, beta;
    int info_factorization;
    int i,j;
    double eps;

    eps = LAPACKE_dlamch_work('e');

    double *Ql       = (double *)malloc(M*N*sizeof(double));
    double *Residual = (double *)malloc(M*N*sizeof(double));
    double *work              = (double *)malloc(max(M,N)*sizeof(double));

    alpha=1.0;
    beta=0.0;

    if (M >= N) {
        /* Extract the R */
        double *R = (double *)malloc(N*N*sizeof(double));
        memset((void*)R, 0, N*N*sizeof(double));
        LAPACKE_dlacpy_work(LAPACK_COL_MAJOR,'u', M, N, A2, LDA, R, N);

        /* Perform Ql=Q*R */
        memset((void*)Ql, 0, M*N*sizeof(double));
        cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, N, (alpha), Q, LDA, R, N, (beta), Ql, M);
        free(R);
    }
    else {
        /* Extract the L */
        double *L = (double *)malloc(M*M*sizeof(double));
        memset((void*)L, 0, M*M*sizeof(double));
        LAPACKE_dlacpy_work(LAPACK_COL_MAJOR,'l', M, N, A2, LDA, L, M);

    /* Perform Ql=LQ */
        memset((void*)Ql, 0, M*N*sizeof(double));
        cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, M, (alpha), L, M, Q, LDA, (beta), Ql, M);
        free(L);
    }

    /* Compute the Residual */
    for (i = 0; i < M; i++)
        for (j = 0 ; j < N; j++)
            Residual[j*M+i] = A1[j*LDA+i]-Ql[j*M+i];

    Rnorm = LAPACKE_dlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, Residual, M, work);
    Anorm = LAPACKE_dlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, A2, LDA, work);

    if (M >= N) {
        printf("============\n");
        printf("Checking the QR Factorization \n");
        printf("-- ||A-QR||_oo/(||A||_oo.N.eps) = %e \n",Rnorm/(Anorm*N*eps));
    }
    else {
        printf("============\n");
        printf("Checking the LQ Factorization \n");
        printf("-- ||A-LQ||_oo/(||A||_oo.N.eps) = %e \n",Rnorm/(Anorm*N*eps));
    }

    if (isnan(Rnorm / (Anorm * N *eps)) || (Rnorm / (Anorm * N * eps) > 10.0) ) {
        printf("-- Factorization is suspicious ! \n");
        info_factorization = 1;
    }
    else {
        printf("-- Factorization is CORRECT ! \n");
        info_factorization = 0;
    }

    free(work); free(Ql); free(Residual);

    return info_factorization;
}
Exemple #3
0
static int
RunTest(int *iparam, double *dparam, real_Double_t *t_) 
{
    double *A, *b, *x;
    double *Acpy = NULL;
    double *bcpy = NULL;
    real_Double_t       t;
    int                *piv;
    int n       = iparam[TIMING_N];
    int nrhs    = iparam[TIMING_NRHS];
    int check   = iparam[TIMING_CHECK];
    int                 lda = n;
    int                 ldb = n;
    int                iter = 0;
    
    /* Allocate Data */
    A = (double *)malloc(lda*n*   sizeof(double));
    b = (double *)malloc(ldb*nrhs*sizeof(double));
    x = (double *)malloc(ldb*nrhs*sizeof(double));
    piv = (int *)malloc( n*sizeof(int));

    /* Check if unable to allocate memory */
    if ( (!A) || (!b) || (!x) || (!piv) ) {
        printf("Out of Memory \n ");
        return -1;
    }
    
    /* Initialize Plasma */ 
    PLASMA_Init( iparam[TIMING_THRDNBR] );
    if ( iparam[TIMING_SCHEDULER] )
        PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_DYNAMIC_SCHEDULING );
    else
        PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_STATIC_SCHEDULING );

    /*if ( !iparam[TIMING_AUTOTUNING] ) {*/
        PLASMA_Disable(PLASMA_AUTOTUNING);
        PLASMA_Set(PLASMA_TILE_SIZE,        iparam[TIMING_NB] );
        PLASMA_Set(PLASMA_INNER_BLOCK_SIZE, iparam[TIMING_IB] );
    /* } */

     /* Initialiaze Data */
    LAPACKE_dlarnv_work(1, ISEED, lda*n,    A);
    LAPACKE_dlarnv_work(1, ISEED, ldb*nrhs, b);

    /* Save A and b  */
    if (check) {
        Acpy = (double *)malloc(lda*n*   sizeof(double));
        bcpy = (double *)malloc(ldb*nrhs*sizeof(double));
        LAPACKE_dlacpy_work(LAPACK_COL_MAJOR,' ', n, n,    A, lda, Acpy, lda);
        LAPACKE_dlacpy_work(LAPACK_COL_MAJOR,' ', n, nrhs, b, ldb, bcpy, ldb);
      }

    t = -cWtime();
    PLASMA_dsgesv( n, nrhs, A, lda, piv, b, ldb, x, ldb, &iter );
    t += cWtime();
    *t_ = t;
    
    /* Check the solution */
    if (check)
      {
        dparam[TIMING_RES] = d_check_solution(n, n, nrhs, Acpy, lda, bcpy, x, ldb,
                                             &(dparam[TIMING_ANORM]), &(dparam[TIMING_BNORM]), 
                                             &(dparam[TIMING_XNORM]));
        free(Acpy); free(bcpy);
      }

    free( piv );
    free( x );
    free( b );
    free( A );

    PLASMA_Finalize();

    return 0;
}
Exemple #4
0
static inline int
CORE_dpamm_w(PLASMA_enum side, PLASMA_enum trans, PLASMA_enum uplo,
             int M, int N, int K, int L,
             int vi2, int vi3,
             const double *A1, int LDA1,
                   double *A2, int LDA2,
             const double *V, int LDV,
                   double *W, int LDW)
{

   /*
    * W = A1 + op(V) * A2  or  W = A1 + A2 * op(V)
    */

    int j;
    static double zone  =  1.0;
    static double zzero =  0.0;

    if (side == PlasmaLeft) {

        if (((trans == PlasmaTrans) && (uplo == CblasUpper)) ||
            ((trans == PlasmaNoTrans) && (uplo == CblasLower))) {

            /*
             * W = A1 + V' * A2
             */

            /* W = A2_2 */
            LAPACKE_dlacpy_work(LAPACK_COL_MAJOR,
                lapack_const(PlasmaUpperLower),
                L, N,
                &A2[K-L], LDA2, W, LDW);

            /* W = V_2' * W + V_1' * A2_1 (ge+tr, top L rows of V') */
            if (L > 0) {
                /* W = V_2' * W */
                cblas_dtrmm(
                    CblasColMajor, CblasLeft, (CBLAS_UPLO)uplo,
                    (CBLAS_TRANSPOSE)trans, CblasNonUnit, L, N,
                    (zone), &V[vi2], LDV,
                    W, LDW);

                /* W = W + V_1' * A2_1 */
                if (K > L) {
                    cblas_dgemm(
                        CblasColMajor, (CBLAS_TRANSPOSE)trans, CblasNoTrans,
                        L, N, K-L,
                        (zone), V, LDV,
                        A2, LDA2,
                        (zone), W, LDW);
                }
            }

            /* W_2 = V_3' * A2: (ge, bottom M-L rows of V') */
            if (M > L) {
                cblas_dgemm(
                    CblasColMajor, (CBLAS_TRANSPOSE)trans, CblasNoTrans,
                    (M-L), N, K,
                    (zone), &V[vi3], LDV,
                    A2, LDA2,
                    (zzero), &W[L], LDW);
            }

            /* W = A1 + W */
            for(j = 0; j < N; j++) {
                cblas_daxpy(
                        M, (zone),
                        &A1[LDA1*j], 1,
                        &W[LDW*j], 1);
            }
        }
        else {
            printf("Left Upper/NoTrans & Lower/ConjTrans not implemented yet\n");
            return PLASMA_ERR_NOT_SUPPORTED;

        }
    }
    else { //side right

        if (((trans == PlasmaTrans) && (uplo == CblasUpper)) ||
            ((trans == PlasmaNoTrans) && (uplo == CblasLower))) {
            printf("Right Upper/ConjTrans & Lower/NoTrans not implemented yet\n");
            return PLASMA_ERR_NOT_SUPPORTED;

        }
        else {

            /*
             * W = A1 + A2 * V
             */

            if (L > 0) {

                /* W = A2_2 */
                LAPACKE_dlacpy_work(LAPACK_COL_MAJOR,
                    lapack_const(PlasmaUpperLower),
                    M, L,
                    &A2[LDA2*(K-L)], LDA2, W, LDW);

                /* W = W * V_2 --> W = A2_2 * V_2 */
                cblas_dtrmm(
                    CblasColMajor, CblasRight, (CBLAS_UPLO)uplo,
                    (CBLAS_TRANSPOSE)trans, CblasNonUnit, M, L,
                    (zone), &V[vi2], LDV,
                    W, LDW);

                /* W = W + A2_1 * V_1 */
                if (K > L) {
                    cblas_dgemm(
                        CblasColMajor, CblasNoTrans, (CBLAS_TRANSPOSE)trans,
                        M, L, K-L,
                        (zone), A2, LDA2,
                        V, LDV,
                        (zone), W, LDW);
                }

            }

            /* W = W + A2 * V_3 */
            if (N > L) {
                cblas_dgemm(
                    CblasColMajor, CblasNoTrans, (CBLAS_TRANSPOSE)trans,
                    M, N-L, K,
                    (zone), A2, LDA2,
                    &V[vi3], LDV,
                    (zzero), &W[LDW*L], LDW);
            }

            /* W = A1 + W */
            for (j = 0; j < N; j++) {
                cblas_daxpy(
                        M, (zone),
                        &A1[LDA1*j], 1,
                        &W[LDW*j], 1);
            }
        }
    }

    return PLASMA_SUCCESS;
}
Exemple #5
0
int testing_dsygv(int argc, char **argv)
{
    /* Check for number of arguments*/
    if (argc != 3) {
        USAGE("HEGV", "N LDA LDB",
              "   - N    : size of the matrices A and B\n"
              "   - LDA  : leading dimension of the matrix A\n"
              "   - LDB  : leading dimension of the matrix B\n");
        return -1;
    }

    double      eps = LAPACKE_dlamch_work('e');
    PLASMA_enum vec = PlasmaNoVec;
    int    N        = atoi(argv[0]);
    int    LDA      = atoi(argv[1]);
    int    LDB      = atoi(argv[2]);
    int    LDQ      = LDA;
    int    LDAxN    = LDA*N;
    int    LDBxN    = LDB*N;
    int    LDQxN    = LDQ*N;

    int info_ortho     = 0;
    int info_solution  = 0;
    int info_reduction = 0;
    int i, u;

    double *A1    = (double *)malloc(LDAxN*sizeof(double));
    double *A2    = (double *)malloc(LDAxN*sizeof(double));
    double *B1    = (double *)malloc(LDBxN*sizeof(double));
    double *B2    = (double *)malloc(LDBxN*sizeof(double));
    double *Q     = (double *)malloc(LDQxN*sizeof(double));
    double *Ainit = (double *)malloc(LDAxN*sizeof(double));
    double *Binit = (double *)malloc(LDBxN*sizeof(double));
    double *W1 = (double *)malloc(N*sizeof(double));
    double *W2 = (double *)malloc(N*sizeof(double));
    double *work = (double *)malloc(3*N* sizeof(double));
    PLASMA_desc *T;

    /* Check if unable to allocate memory */
    if ((!A1)||(!A2)||(!B1)||(!B2)||(!Q)||(!Ainit)||(!Binit)){
        printf("Out of Memory \n ");
        return -2;
    }

    /*
    PLASMA_Disable(PLASMA_AUTOTUNING);
    PLASMA_Set(PLASMA_TILE_SIZE, 120);
    PLASMA_Set(PLASMA_INNER_BLOCK_SIZE, 20);
    */
    
    PLASMA_Enable(PLASMA_WARNINGS);
    PLASMA_Enable(PLASMA_ERRORS);
    PLASMA_Alloc_Workspace_dsygv(N, N, &T);

    /*----------------------------------------------------------
    *  TESTING DSYGV
    */

    /* Initialize A1 and Ainit */
    PLASMA_dplgsy(0., N, A1, LDA, 5198);
    LAPACKE_dlacpy_work(LAPACK_COL_MAJOR, 'A', N, N, A1, LDA, Ainit, LDA);

    /* Initialize B1 and Binit */
    PLASMA_dplgsy((double)N, N, B1, LDB, 4321 );
    LAPACKE_dlacpy_work(LAPACK_COL_MAJOR, 'A', N, N, B1, LDB, Binit, LDB);

    printf("\n");
    printf("------ TESTS FOR PLASMA DSYGV ROUTINE -------  \n");
    printf("        Size of the Matrix %d by %d\n", N, N);
    printf("\n");
    printf(" The matrix A is randomly generated for each test.\n");
    printf("============\n");
    printf(" The relative machine precision (eps) is to be %e \n",eps);
    printf(" Computational tests pass if scaled residuals are less than 60.\n");

    /*----------------------------------------------------------
     *  TESTING DSYGV
     */

    for (i=0; i<3; i++) {
        for (u=0; u<2; u++) {
            LAPACKE_dlaset_work(LAPACK_COL_MAJOR, 'A', LDA, N, 0., 1., Q, LDA);

            memcpy(A2, Ainit, LDAxN*sizeof(double));
            memcpy(B2, Binit, LDBxN*sizeof(double));

            PLASMA_dsygv(itype[i], vec, uplo[u], N, A2, LDA, B2, LDB, W2, T, Q, LDQ);

            /* Check the orthogonality, reduction and the eigen solutions */
            if (vec == PlasmaVec)
                info_ortho = check_orthogonality(N, N, Q, LDA, eps);
            /* 
             * WARNING: For now, Q is associated to Band tridiagonal reduction and 
             * not to the final tridiagonal reduction, so we can not call the check
             */
            if (0)
                info_reduction = check_reduction(itype[i], uplo[u], N, 1, A1, A2, LDA, B2, LDB, Q, eps);

            memcpy(A1, Ainit, LDAxN*sizeof(double));
            memcpy(B1, Binit, LDBxN*sizeof(double));

            LAPACKE_dsygv( LAPACK_COL_MAJOR, 
                     itype[i], lapack_const(vec), lapack_const(uplo[u]),
                     N, A1, LDA, B1, LDB, W1);

            /*info_solution  = check_solution(N, N, N, A1, LDA, B1, B2, LDB, eps);*/
            info_solution = check_solution(N, W1, W2, eps);
         
            if ( (info_ortho == 0) & (info_reduction == 0) & (info_solution == 0)) {
                printf("***************************************************\n");
                printf(" ---- TESTING DSYGV (%s, %s) ...................... PASSED !\n", itypestr[i], uplostr[u]);
                printf("***************************************************\n");
            }
            else {
                printf("************************************************\n");
                printf(" - TESTING DSYGV (%s, %s) ... FAILED !\n", itypestr[i], uplostr[u]);
                printf("************************************************\n");
            } 
        }
    }

    PLASMA_Dealloc_Handle_Tile(&T);
    free(A1); 
    free(A2); 
    free(B1); 
    free(B2); 
    free(Q); 
    free(Ainit); 
    free(Binit); 
    free(W1);
    free(W2);
    free(work);

    return 0;
}
Exemple #6
0
static int check_reduction(int itype, int uplo, int N, int bw, 
                           double *A1, double *A2, int LDA, 
                           double *B2, int LDB, double *Q, double eps )
{
    double alpha = 1.0;
    double beta  = 0.0;
    double Anorm, Rnorm, result;
    int info_reduction;
    int i, j;
    char *str;
    
    double *Aorig    = (double *)malloc(N*N*sizeof(double));
    double *Residual = (double *)malloc(N*N*sizeof(double));
    double *T        = (double *)malloc(N*N*sizeof(double));
    double *work                 = (double *)malloc(N*sizeof(double));

    memset((void*)T, 0, N*N*sizeof(double));
    LAPACKE_dlacpy_work(LAPACK_COL_MAJOR, ' ', N, N, A1, LDA, Residual, N);

    /* Rebuild the T */
    LAPACKE_dlacpy_work(LAPACK_COL_MAJOR, lapack_const(uplo), N, N, A2, LDA, T, N);

    if (uplo == PlasmaLower) {
        /* Set the reflectors to 0 */
        for (i = bw+1; i < N; i++)
            for (j = 0 ; (j < N) && (j < i-bw); j++)
                T[j*N+i] = 0.;

        /* Copy the lower part to the upper part to rebuild the symmetry */
        for (i = 0; i < N; i++)
            for (j = 0 ; j < i; j++)
                T[i*N+j] = (T[j*N+i]);
    } else {
        /* Set the reflectors to 0 */
        for (j = bw+1; j < N; j++)
            for (i = 0 ; (i < N) && (i < j-bw); i++)
                T[j*N+i] = 0.;
        /* Copy the upper part to the lower part to rebuild the symmetry */
        for (i = 0; i < N; i++)
            for (j = i+1 ; j < N; j++)
                T[i*N+j] = (T[j*N+i]);
    }

    memset((void*)Aorig, 0, N*N*sizeof(double));

    if (itype == 1) {
        if (uplo == PlasmaLower) {
            str = "L*Q*T*Q'*L'";
   
            /* Compute Aorig=Q*T*Q' */
            cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans,   N, N, N, (alpha), Q,     LDA, T, N,   (beta), Aorig, N);
            cblas_dgemm(CblasColMajor, CblasNoTrans, CblasTrans, N, N, N, (alpha), Aorig, N,   Q, LDA, (beta), T,     N);

            LAPACKE_dlacpy_work(LAPACK_COL_MAJOR, PlasmaUpperLower, N, N, T, N, Aorig, N);

            cblas_dtrmm(CblasColMajor, CblasLeft,  CblasLower, CblasNoTrans,   CblasNonUnit, N, N, (alpha), B2, LDB, Aorig, N);   
            cblas_dtrmm(CblasColMajor, CblasRight, CblasLower, CblasTrans, CblasNonUnit, N, N, (alpha), B2, LDB, Aorig, N); 

        }
        else {
            str = "U'*Q*T*Q'*U";

            /* Compute Aorig=Q'*T*Q */
            cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, N, N, N, (alpha), Q,     LDA, T, N,   (beta), Aorig, N);
            cblas_dgemm(CblasColMajor, CblasNoTrans,   CblasNoTrans, N, N, N, (alpha), Aorig, N,   Q, LDA, (beta), T,     N);

            LAPACKE_dlacpy_work(LAPACK_COL_MAJOR, 'A', N, N, T, N, Aorig, N);

            cblas_dtrmm(CblasColMajor, CblasLeft,  CblasUpper, CblasTrans, CblasNonUnit, N, N, (alpha), B2, LDB, Aorig, N);   
            cblas_dtrmm(CblasColMajor, CblasRight, CblasUpper, CblasNoTrans,   CblasNonUnit, N, N, (alpha), B2, LDB, Aorig, N);  
        }
    }
    else {
        if (uplo == PlasmaLower) {
            str = "inv(L')*Q*A2*Q'*inv(L)";
            
            /* Compute Aorig=Q*T*Q' */
            cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans,   N, N, N, (alpha), Q,     LDA, T, N,   (beta), Aorig, N);
            cblas_dgemm(CblasColMajor, CblasNoTrans, CblasTrans, N, N, N, (alpha), Aorig, N,   Q, LDA, (beta), T, N   );

            LAPACKE_dlacpy_work(LAPACK_COL_MAJOR, 'A', N, N, T, N, Aorig, N);

            cblas_dtrsm(CblasColMajor, CblasLeft,  CblasLower, CblasTrans, CblasNonUnit, N, N, (alpha), B2, LDB, Aorig, N);   
            cblas_dtrsm(CblasColMajor, CblasRight, CblasLower, CblasNoTrans,   CblasNonUnit, N, N, (alpha), B2, LDB, Aorig, N);   
        }
        else{
            str = "inv(U)*Q*A2*Q'*inv(U')";

            /* Compute Aorig=Q'*T*Q */
            cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, N, N, N, (alpha), Q,     LDA, T, N,   (beta), Aorig, N);
            cblas_dgemm(CblasColMajor, CblasNoTrans,   CblasNoTrans, N, N, N, (alpha), Aorig, N,   Q, LDA, (beta), T,     N);
      
            LAPACKE_dlacpy_work(LAPACK_COL_MAJOR, 'A', N, N, T, N, Aorig, N);
          
            cblas_dtrsm(CblasColMajor, CblasLeft,  CblasUpper, CblasNoTrans,   CblasNonUnit, N, N, (alpha), B2, LDB, Aorig, N);   
            cblas_dtrsm(CblasColMajor, CblasRight, CblasUpper, CblasTrans, CblasNonUnit, N, N, (alpha), B2, LDB, Aorig, N);   
        }
    }
   
    /* Compute the Residual */
    for (i = 0; i < N; i++)
        for (j = 0 ; j < N; j++)
            Residual[j*N+i] = A1[j*LDA+i]-Aorig[j*N+i];
    
    Rnorm = LAPACKE_dlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, Residual, N,   work);
    Anorm = LAPACKE_dlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, A2,       LDA, work);
    
    result = Rnorm / (Anorm * N *eps);
    printf("============\n");
    printf("Checking the tridiagonal reduction \n");
    printf("-- ||A-%s||_oo/(||A||_oo.N.eps) = %e \n", str, result );
    
    if (isnan(result) || isinf(result) || (result > 60.0) ) {
        printf("-- Reduction is suspicious ! \n");
        info_reduction = 1;
    }
    else {
        printf("-- Reduction is CORRECT ! \n");
        info_reduction = 0;
    }
    
    free(Aorig); free(Residual); free(T); free(work);
    
    return info_reduction;
}