示例#1
0
static double
RunTest(real_Double_t *t_, struct user_parameters* params)
{
    double t;
    PLASMA_desc *descT;
    int64_t N     = params->matrix_size;
    int64_t IB    = params->iblocksize;
    int64_t NB    = params->blocksize;
    int check     = params->check;
    double check_res = 0;

    /* Allocate Data */
    PLASMA_desc *descA = NULL;
    double *ptr = (double*)malloc(N * N * sizeof(double));
    PLASMA_Desc_Create(&descA, ptr, PlasmaRealDouble, NB, NB, NB*NB, N, N, 0, 0, N, N);

#pragma omp parallel
    {
#pragma omp single
        {
    plasma_pdpltmg_quark(*descA, 5373 );
        }
    }

    /* Save A for check */
    double *A = NULL;
    if ( check ) {
        A = (double*)malloc(N * N * sizeof(double));
        plasma_pdtile_to_lapack_quark(*descA, (void*)A, N);
    }

    /* Allocate Workspace */
    plasma_alloc_ibnb_tile(N, N, PlasmaRealDouble, &descT, IB, NB);

    /* Do the computations */
    START_TIMING();
#pragma omp parallel
    {
#pragma omp single
        {
    plasma_pdgeqrf_quark( *descA, *descT , IB);
        }
    }
    STOP_TIMING();

    /* Check the solution */
    if ( check )
    {
        /* Allocate B for check */
        PLASMA_desc *descB = NULL;
        double* ptr = (double*)malloc(N * sizeof(double));
        PLASMA_Desc_Create(&descB, ptr, PlasmaRealDouble, NB, NB, NB*NB, N, 1, 0, 0, N, 1);

        /* Initialize and save B */
        plasma_pdpltmg_seq(*descB, 2264 );
        double *B = (double*)malloc(N * sizeof(double));
        plasma_pdtile_to_lapack_quark(*descB, (void*)B, N);

        /* Compute the solution */
        PLASMA_dgeqrs_Tile( descA, descT, descB , IB);

        /* Copy solution to X */
        double *X = (double*)malloc(N * sizeof(double));
        plasma_pdtile_to_lapack_quark(*descB, (void*)X, N);

        check_res = d_check_solution(N, N, 1, A, N, B, X, N);

        /* Free checking structures */
        PASTE_CODE_FREE_MATRIX( descB );
        free( A );
        free( B );
        free( X );
    }

    /* Free data */
    PLASMA_Dealloc_Handle_Tile(&descT);
    PASTE_CODE_FREE_MATRIX( descA );

    return check_res;
}
示例#2
0
static int
RunTest(int *iparam, double *dparam, real_Double_t *t_) 
{
    double *A = NULL, *AT, *b = NULL, *bT, *x;
    PLASMA_desc        *descA, *descB, *descT;
    real_Double_t       t;
    int nb, nb2, nt;
    int n     = iparam[TIMING_N];
    int nrhs  = iparam[TIMING_NRHS];
    int check = iparam[TIMING_CHECK];
    int lda = n;
    int ldb = n;

    /* Initialize Plasma */ 
    PLASMA_Init( iparam[TIMING_THRDNBR] );
    if ( iparam[TIMING_SCHEDULER] )
        PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_DYNAMIC_SCHEDULING );
    else
        PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_STATIC_SCHEDULING );
  
#if defined(PLASMA_CUDA)
  core_cublas_init();
#endif

    /*if ( !iparam[TIMING_AUTOTUNING] ) {*/
        PLASMA_Disable(PLASMA_AUTOTUNING);
        PLASMA_Set(PLASMA_TILE_SIZE,        iparam[TIMING_NB] );
        PLASMA_Set(PLASMA_INNER_BLOCK_SIZE, iparam[TIMING_IB] );
    /* } else { */
    /*     PLASMA_Get(PLASMA_TILE_SIZE,        &iparam[TIMING_NB] ); */
    /*     PLASMA_Get(PLASMA_INNER_BLOCK_SIZE, &iparam[TIMING_IB] ); */
    /* }  */
    nb  = iparam[TIMING_NB];
    nb2 = nb * nb;
    nt  = n / nb + ((n % nb == 0) ? 0 : 1);
    
    /* Allocate Data */
    AT  = (double *)malloc(nt*nt*nb2*sizeof(double));

    /* Check if unable to allocate memory */
    if ( !AT ){
        printf("Out of Memory \n ");
        exit(0);
    }
  
#if defined(PLASMA_CUDA)
    cudaHostRegister((void*)AT, nt*nt*nb2*sizeof(double), cudaHostRegisterPortable);
#endif

    /* Initialiaze Data */
    PLASMA_Desc_Create(&descA, AT, PlasmaRealDouble, nb, nb, nb*nb, n, n, 0, 0, n, n);
    LAPACKE_dlarnv_work(1, ISEED, nt*nt*nb2, AT);

    /* Allocate Workspace */
    PLASMA_Alloc_Workspace_dgels_Tile(n, n, &descT);
  
#if defined(PLASMA_CUDA)
    cudaHostRegister((void*)descT->mat, descT->lm*descT->ln*sizeof(double), cudaHostRegisterPortable);
#endif

    /* Save AT in lapack layout for check */
    if ( check ) {
        A = (double *)malloc(lda*n    *sizeof(double));
        PLASMA_Tile_to_Lapack(descA, (void*)A, n);
    }

    t = -cWtime();
    PLASMA_dgeqrf_Tile( descA, descT );
    t += cWtime();
    *t_ = t;
    
    /* Check the solution */
    if ( check )
      {
        b  = (double *)malloc(ldb*nrhs *sizeof(double));
        bT = (double *)malloc(nt*nb2   *sizeof(double));
        x  = (double *)malloc(ldb*nrhs *sizeof(double));

        LAPACKE_dlarnv_work(1, ISEED, nt*nb2, bT);
        PLASMA_Desc_Create(&descB, bT, PlasmaRealDouble, nb, nb, nb*nb, n, nrhs, 0, 0, n, nrhs);
        PLASMA_Tile_to_Lapack(descB, (void*)b, n);

        PLASMA_dgeqrs_Tile( descA, descT, descB );

        PLASMA_Tile_to_Lapack(descB, (void*)x, n);

        dparam[TIMING_RES] = d_check_solution(n, n, nrhs, A, lda, b, x, ldb,
                                             &(dparam[TIMING_ANORM]), &(dparam[TIMING_BNORM]), 
                                             &(dparam[TIMING_XNORM]));

        PLASMA_Desc_Destroy(&descB);
        free( A ); 
        free( b ); 
        free( bT ); 
        free( x );
      }

    /* Allocate Workspace */
    PLASMA_Dealloc_Handle_Tile(&descT);

    PLASMA_Desc_Destroy(&descA);

    free( AT );
    PLASMA_Finalize();
#if defined(PLASMA_CUDA)
#endif
    return 0;
}
示例#3
0
static int
RunTest(int *iparam, double *dparam, real_Double_t *t_) 
{
    double *A, *b, *x;
    double *Acpy = NULL;
    double *bcpy = NULL;
    real_Double_t       t;
    int                *piv;
    int n       = iparam[TIMING_N];
    int nrhs    = iparam[TIMING_NRHS];
    int check   = iparam[TIMING_CHECK];
    int                 lda = n;
    int                 ldb = n;
    int                iter = 0;
    
    /* Allocate Data */
    A = (double *)malloc(lda*n*   sizeof(double));
    b = (double *)malloc(ldb*nrhs*sizeof(double));
    x = (double *)malloc(ldb*nrhs*sizeof(double));
    piv = (int *)malloc( n*sizeof(int));

    /* Check if unable to allocate memory */
    if ( (!A) || (!b) || (!x) || (!piv) ) {
        printf("Out of Memory \n ");
        return -1;
    }
    
    /* Initialize Plasma */ 
    PLASMA_Init( iparam[TIMING_THRDNBR] );
    if ( iparam[TIMING_SCHEDULER] )
        PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_DYNAMIC_SCHEDULING );
    else
        PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_STATIC_SCHEDULING );

    /*if ( !iparam[TIMING_AUTOTUNING] ) {*/
        PLASMA_Disable(PLASMA_AUTOTUNING);
        PLASMA_Set(PLASMA_TILE_SIZE,        iparam[TIMING_NB] );
        PLASMA_Set(PLASMA_INNER_BLOCK_SIZE, iparam[TIMING_IB] );
    /* } */

     /* Initialiaze Data */
    LAPACKE_dlarnv_work(1, ISEED, lda*n,    A);
    LAPACKE_dlarnv_work(1, ISEED, ldb*nrhs, b);

    /* Save A and b  */
    if (check) {
        Acpy = (double *)malloc(lda*n*   sizeof(double));
        bcpy = (double *)malloc(ldb*nrhs*sizeof(double));
        LAPACKE_dlacpy_work(LAPACK_COL_MAJOR,' ', n, n,    A, lda, Acpy, lda);
        LAPACKE_dlacpy_work(LAPACK_COL_MAJOR,' ', n, nrhs, b, ldb, bcpy, ldb);
      }

    t = -cWtime();
    PLASMA_dsgesv( n, nrhs, A, lda, piv, b, ldb, x, ldb, &iter );
    t += cWtime();
    *t_ = t;
    
    /* Check the solution */
    if (check)
      {
        dparam[TIMING_RES] = d_check_solution(n, n, nrhs, Acpy, lda, bcpy, x, ldb,
                                             &(dparam[TIMING_ANORM]), &(dparam[TIMING_BNORM]), 
                                             &(dparam[TIMING_XNORM]));
        free(Acpy); free(bcpy);
      }

    free( piv );
    free( x );
    free( b );
    free( A );

    PLASMA_Finalize();

    return 0;
}
示例#4
0
static double
RunTest(real_Double_t *t_, struct user_parameters* params)
{
    double  t;
    int64_t N     = params->matrix_size;
    int64_t NB    = params->blocksize;
    int check     = params->check;
    int uplo = PlasmaUpper;
    double check_res = 0;

    /* Allocate Data */
    PLASMA_desc *descA = NULL;
    double* ptr = malloc(N * N * sizeof(double));
    PLASMA_Desc_Create(&descA, ptr, PlasmaRealDouble, NB, NB, NB*NB, N, N, 0, 0, N, N);

#pragma omp parallel 
{
#pragma omp single 
{
    plasma_pdplgsy_quark( (double)N, *descA, 51 );
        }
    }

    /* Save A for check */
    double *A = NULL;
    if(check) {
        A = (double*)malloc(N * N * sizeof(double));
        plasma_pdtile_to_lapack_quark(*descA, (void*)A, N);
    }

    /* PLASMA DPOSV */
    START_TIMING();
#pragma omp parallel 
{
#pragma omp single 
{
    plasma_pdpotrf_quark(uplo, *descA);
        }
    }
    STOP_TIMING();

    /* Check the solution */
    if ( check )
    {
        PLASMA_desc *descB = NULL;
        double* ptr = (double*)malloc(N * sizeof(double));
        PLASMA_Desc_Create(&descB, ptr, PlasmaRealDouble, NB, NB, NB*NB, N, 1, 0, 0, N, 1);

        plasma_pdpltmg_seq(* descB, 7672 );
        double* B = (double*)malloc(N * sizeof(double));
        plasma_pdtile_to_lapack_quark(*descB, (void*)B, N);

        PLASMA_dpotrs_Tile( uplo, descA, descB );

        double* X = (double*)malloc(N * sizeof(double));
        plasma_pdtile_to_lapack_quark(*descB, (void*)X, N);

        check_res = d_check_solution(N, N, 1, A, N, B, X, N);

        PASTE_CODE_FREE_MATRIX( descB );
        free( A );
        free( B );
        free( X );
    }

    PASTE_CODE_FREE_MATRIX( descA );

    return check_res;
}
示例#5
0
static int
RunTest(int *iparam, double *dparam, real_Double_t *t_)
{
    double *A = NULL, *AT, *b = NULL, *bT, *x = NULL, *xT;
    real_Double_t       t;
    PLASMA_desc        *descA, *descB, *descX;
    int nb, nb2, nt;
    int n     = iparam[TIMING_N];
    int nrhs  = iparam[TIMING_NRHS];
    int check = iparam[TIMING_CHECK];
    int lda = n;
    int ldb = n;
    int iter;

    /* Initialize Plasma */ 
    PLASMA_Init( iparam[TIMING_THRDNBR] );
    if ( iparam[TIMING_SCHEDULER] )
        PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_DYNAMIC_SCHEDULING );
    else
        PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_STATIC_SCHEDULING );

    /*if ( !iparam[TIMING_AUTOTUNING] ) {*/
        PLASMA_Disable(PLASMA_AUTOTUNING);
        PLASMA_Set(PLASMA_TILE_SIZE,        iparam[TIMING_NB] );
    /* } else { */
    /*     PLASMA_Get(PLASMA_TILE_SIZE,        &iparam[TIMING_NB] ); */
    /* }  */
    nb  = iparam[TIMING_NB];
    nb2 = nb * nb;
    nt  = n / nb + ((n % nb == 0) ? 0 : 1);
    
    /* Allocate Data */
    AT = (double *)malloc(nt*nt*nb2*sizeof(double));
    bT = (double *)malloc(nt*nb2   *sizeof(double));
    xT = (double *)malloc(nt*nb2   *sizeof(double));

    /* Check if unable to allocate memory */
    if ( (!AT) || (!bT) || (!xT) ) {
        printf("Out of Memory \n ");
        exit(0);
    }

    /* Initialize AT and bT for Symmetric Positif Matrix */
    PLASMA_Desc_Create(&descA, AT, PlasmaRealDouble, nb, nb, nb*nb, n, n,    0, 0, n, n);
    PLASMA_Desc_Create(&descB, bT, PlasmaRealDouble, nb, nb, nb*nb, n, nrhs, 0, 0, n, nrhs);
    PLASMA_Desc_Create(&descX, xT, PlasmaRealDouble, nb, nb, nb*nb, n, nrhs, 0, 0, n, nrhs);
    PLASMA_dplgsy_Tile((double)n, descA, 51 );
    LAPACKE_dlarnv_work(1, ISEED, nt*nb2, bT);

    /* Save AT and bT in lapack layout for check */
    if ( check ) {
        A = (double *)malloc(lda*n    *sizeof(double));
        b = (double *)malloc(ldb*nrhs *sizeof(double));
        PLASMA_Tile_to_Lapack(descA, (void*)A, n);
        PLASMA_Tile_to_Lapack(descB, (void*)b, n);
    }

    /* PLASMA DSPOSV */
    t = -cWtime();
    PLASMA_dsposv_Tile(PlasmaUpper, descA, descB, descX, &iter);
    t += cWtime();
    *t_ = t;

    /* Check the solution */
    if (check)
      {
        x = (double *)malloc(ldb*nrhs *sizeof(double));
        PLASMA_Tile_to_Lapack(descX, (void*)x, n);

        dparam[TIMING_RES] = d_check_solution(n, n, nrhs, A, lda, b, x, ldb,
                                             &(dparam[TIMING_ANORM]), &(dparam[TIMING_BNORM]), 
                                             &(dparam[TIMING_XNORM]));
        free(A); free(b); free(x);
      }

    PLASMA_Desc_Destroy(&descA);
    PLASMA_Desc_Destroy(&descB);
    PLASMA_Desc_Destroy(&descX);

    free(AT); free(bT); free(xT);

    PLASMA_Finalize();

    return 0;
}