Esempio n. 1
0
magma_int_t
magma_capplypastix( magma_c_vector b, magma_c_vector *x, 
                    magma_c_preconditioner *precond ){

#if defined(HAVE_PASTIX)

    #if defined(PRECISION_d)

        pastix_int_t      ncol;               /* Size of the matrix                                        */
        pastix_int_t     *colptr      = NULL; /* Indexes of first element of each column in row and values */
        pastix_int_t     *rows        = NULL; /* Row of each element of the matrix                         */
        pastix_float_t   *values      = NULL; /* Value of each element of the matrix                       */
        pastix_float_t   *rhs         = NULL; /* right hand side                                           */
        pastix_int_t     *iparm;  /* integer parameters for pastix                             */
        float           *dparm;  /* floating parameters for pastix                            */
        pastix_int_t     *perm        = NULL; /* Permutation tabular                                       */
        pastix_int_t     *invp        = NULL; /* Reverse permutation tabular                               */

        magma_c_vector b_h;

        magma_c_vtransfer( b, &b_h, b.memory_location, Magma_CPU);

        rhs = (pastix_float_t*) b_h.val;
        ncol = precond->M.num_rows;
        colptr = (pastix_int_t*) precond->M.col;
        rows = (pastix_int_t*) precond->M.row;
        values = (pastix_float_t*) precond->M.val;
        iparm = precond->iparm;
        dparm = precond->dparm;

        perm = (pastix_int_t*)precond->int_array_1; 
        invp = (pastix_int_t*)precond->int_array_1; 

        /*******************************************/
        /*     Step 5 - Solve                      */
        /* For each one of your Right-hand-side    */
        /* members.                                */
        /* Also consider using multiple            */
        /* right-hand-side members.                */
        /*******************************************/
        iparm[IPARM_START_TASK] = API_TASK_SOLVE;
        iparm[IPARM_END_TASK]   = API_TASK_REFINEMENT;


        pastix(&(precond->pastix_data), MPI_COMM_WORLD,
             ncol, colptr, rows, values,
             perm, invp, b_h.val, 1, iparm, dparm);

        // fix that x is not allocated every time
        //  in case of many iterations, it might be faster to use
        // magma_csetvector( ncol, 
        //                                    b_h.val, 1, x->val, 1 );
        magma_c_vfree( x );
        magma_c_vtransfer( b_h, x, Magma_CPU, b.memory_location);

        magma_c_vfree( &b_h);

    #else
        printf( "error: only real supported yet.\n");
    #endif

#else
        printf( "error: pastix not available.\n");
#endif

    return MAGMA_SUCCESS;

}
Esempio n. 2
0
magma_int_t
magma_cpastixsetup( magma_c_sparse_matrix A, magma_c_vector b,
                        magma_c_preconditioner *precond ){

#if defined(HAVE_PASTIX)

    #if defined(PRECISION_d)

        pastix_data_t    *pastix_data = NULL; /* Pointer to a storage structure needed by pastix           */
        pastix_int_t      ncol;               /* Size of the matrix                                        */
        pastix_int_t     *colptr      = NULL; /* Indexes of first element of each column in row and values */
        pastix_int_t     *rows        = NULL; /* Row of each element of the matrix                         */
        pastix_float_t   *values      = NULL; /* Value of each element of the matrix                       */
        pastix_float_t   *rhs         = NULL; /* right hand side                                           */
        pastix_int_t     *iparm = NULL;  /* integer parameters for pastix                             */
        float           *dparm = NULL;  /* floating parameters for pastix                            */
        pastix_int_t     *perm        = NULL; /* Permutation tabular                                       */
        pastix_int_t     *invp        = NULL; /* Reverse permutation tabular                               */
        pastix_int_t      mat_type;

        magma_c_sparse_matrix A_h1, B;
        magma_c_vector diag, c_t, b_h;
        magma_c_vinit( &c_t, Magma_CPU, A.num_rows, MAGMA_C_ZERO );
        magma_c_vinit( &diag, Magma_CPU, A.num_rows, MAGMA_C_ZERO );
        magma_c_vtransfer( b, &b_h, A.memory_location, Magma_CPU);

        if( A.storage_type != Magma_CSR ){
            magma_c_mtransfer( A, &A_h1, A.memory_location, Magma_CPU);
            magma_c_mconvert( A_h1, &B, A_h1.storage_type, Magma_CSR);
        }
        else{
            magma_c_mtransfer( A, &B, A.memory_location, Magma_CPU);
        }


        rhs = (pastix_float_t*) b_h.val;
        ncol = B.num_rows;
        colptr = B.row;
        rows = B.col;
        values = (pastix_float_t*) B.val;

        mat_type = API_SYM_NO;

        iparm = (pastix_int_t*)malloc(IPARM_SIZE*sizeof(pastix_int_t));
        dparm = (pastix_float_t*)malloc(DPARM_SIZE*sizeof(pastix_float_t));

        /*******************************************/
        /* Initialize parameters to default values */
        /*******************************************/
        iparm[IPARM_MODIFY_PARAMETER]    = API_NO;
        pastix(&pastix_data, MPI_COMM_WORLD,
             ncol, colptr, rows, values,
             perm, invp, rhs, 1, iparm, dparm);
        iparm[IPARM_THREAD_NBR]          = 16;
        iparm[IPARM_SYM]                 = mat_type;
        iparm[IPARM_FACTORIZATION]       = API_FACT_LU;
        iparm[IPARM_VERBOSE]             = API_VERBOSE_YES;
        iparm[IPARM_ORDERING]            = API_ORDER_SCOTCH;
        iparm[IPARM_INCOMPLETE]          = API_NO;
        iparm[IPARM_RHS_MAKING]          = API_RHS_B;
        //iparm[IPARM_AMALGAMATION]         = 5;
        iparm[IPARM_LEVEL_OF_FILL]       = 0;
        /*  if (incomplete == 1)
            {
            dparm[DPARM_EPSILON_REFINEMENT] = 1e-7;
            }
        */


        /*
         * Matrix needs :
         *    - to be in fortran numbering
         *    - to have only the lower triangular part in symmetric case
         *    - to have a graph with a symmetric structure in unsymmetric case
         * If those criteria are not matched, the csc will be reallocated and changed. 
         */
        iparm[IPARM_MATRIX_VERIFICATION] = API_YES;

        perm = (pastix_int_t*)malloc(ncol*sizeof(pastix_int_t));
        invp = (pastix_int_t*)malloc(ncol*sizeof(pastix_int_t));

        /*******************************************/
        /*      Step 1 - Ordering / Scotch         */
        /*  Perform it only when the pattern of    */
        /*  matrix change.                         */
        /*  eg: mesh refinement                    */
        /*  In many cases users can simply go from */
        /*  API_TASK_ORDERING to API_TASK_ANALYSE  */
        /*  in one call.                           */
        /*******************************************/
        /*******************************************/
        /*      Step 2 - Symbolic factorization    */
        /*  Perform it only when the pattern of    */
        /*  matrix change.                         */
        /*******************************************/
        /*******************************************/
        /* Step 3 - Mapping and Compute scheduling */
        /*  Perform it only when the pattern of    */
        /*  matrix change.                         */
        /*******************************************/
        /*******************************************/
        /*     Step 4 - Numerical Factorisation    */
        /* Perform it each time the values of the  */
        /* matrix changed.                         */
        /*******************************************/

        iparm[IPARM_START_TASK] = API_TASK_ORDERING;
        iparm[IPARM_END_TASK]   = API_TASK_NUMFACT;

        pastix(&pastix_data, MPI_COMM_WORLD,
             ncol, colptr, rows, values,
             perm, invp, NULL, 1, iparm, dparm);

        precond->int_array_1 = (magma_int_t*) perm;
        precond->int_array_2 = (magma_int_t*) invp;

        precond->M.val = (magmaFloatComplex*) values;
        precond->M.col = (magma_int_t*) colptr;
        precond->M.row = (magma_int_t*) rows;
        precond->M.num_rows = A.num_rows;
        precond->M.num_cols = A.num_cols;
        precond->M.memory_location = Magma_CPU;
        precond->pastix_data = pastix_data;
        precond->iparm = iparm;
        precond->dparm = dparm;

        if( A.storage_type != Magma_CSR){
            magma_c_mfree( &A_h1 );
        }   
        magma_c_vfree( &b_h);
        magma_c_mfree( &B );

    #else
        printf( "error: only real supported yet.\n");
    #endif

#else
        printf( "error: pastix not available.\n");
#endif

    return MAGMA_SUCCESS;

}
Esempio n. 3
0
/* ////////////////////////////////////////////////////////////////////////////
   -- testing sparse matrix vector product
*/
int main(  int argc, char** argv )
{
    TESTING_INIT();
    magma_queue_t queue;
    magma_queue_create( /*devices[ opts->device ],*/ &queue );

    magma_c_sparse_matrix hA, hA_SELLP, hA_ELL, dA, dA_SELLP, dA_ELL;
    hA_SELLP.blocksize = 8;
    hA_SELLP.alignment = 8;
    real_Double_t start, end, res;
    magma_int_t *pntre;

    magmaFloatComplex c_one  = MAGMA_C_MAKE(1.0, 0.0);
    magmaFloatComplex c_zero = MAGMA_C_MAKE(0.0, 0.0);
    
    magma_int_t i, j;
    for( i = 1; i < argc; ++i ) {
        if ( strcmp("--blocksize", argv[i]) == 0 ) {
            hA_SELLP.blocksize = atoi( argv[++i] );
        } else if ( strcmp("--alignment", argv[i]) == 0 ) {
            hA_SELLP.alignment = atoi( argv[++i] );
        } else
            break;
    }
    printf( "\n#    usage: ./run_cspmv"
        " [ --blocksize %d --alignment %d (for SELLP) ]"
        " matrices \n\n", (int) hA_SELLP.blocksize, (int) hA_SELLP.alignment );

    while(  i < argc ) {

        if ( strcmp("LAPLACE2D", argv[i]) == 0 && i+1 < argc ) {   // Laplace test
            i++;
            magma_int_t laplace_size = atoi( argv[i] );
            magma_cm_5stencil(  laplace_size, &hA, queue );
        } else {                        // file-matrix test
            magma_c_csr_mtx( &hA,  argv[i], queue );
        }

        printf( "\n# matrix info: %d-by-%d with %d nonzeros\n\n",
                            (int) hA.num_rows,(int) hA.num_cols,(int) hA.nnz );

        real_Double_t FLOPS = 2.0*hA.nnz/1e9;

        magma_c_vector hx, hy, dx, dy, hrefvec, hcheck;

        // init CPU vectors
        magma_c_vinit( &hx, Magma_CPU, hA.num_rows, c_zero, queue );
        magma_c_vinit( &hy, Magma_CPU, hA.num_rows, c_zero, queue );

        // init DEV vectors
        magma_c_vinit( &dx, Magma_DEV, hA.num_rows, c_one, queue );
        magma_c_vinit( &dy, Magma_DEV, hA.num_rows, c_zero, queue );

        #ifdef MAGMA_WITH_MKL
            // calling MKL with CSR
            pntre = (magma_int_t*)malloc( (hA.num_rows+1)*sizeof(magma_int_t) );
            pntre[0] = 0;
            for (j=0; j<hA.num_rows; j++ ) {
                pntre[j] = hA.row[j+1];
            }
             MKL_INT num_rows = hA.num_rows;
             MKL_INT num_cols = hA.num_cols;
             MKL_INT nnz = hA.nnz;

            MKL_INT *col;
            TESTING_MALLOC_CPU( col, MKL_INT, nnz );
            for( magma_int_t t=0; t < hA.nnz; ++t ) {
                col[ t ] = hA.col[ t ];
            }
            MKL_INT *row;
            TESTING_MALLOC_CPU( row, MKL_INT, num_rows );
            for( magma_int_t t=0; t < hA.num_rows; ++t ) {
                row[ t ] = hA.col[ t ];
            }
    
            start = magma_wtime();
            for (j=0; j<10; j++ ) {
                mkl_ccsrmv( "N", &num_rows, &num_cols, 
                            MKL_ADDR(&c_one), "GFNC", MKL_ADDR(hA.val), 
                            col, row, pntre, 
                                                    MKL_ADDR(hx.val), 
                            MKL_ADDR(&c_zero),        MKL_ADDR(hy.val) );
            }
            end = magma_wtime();
            printf( "\n > MKL  : %.2e seconds %.2e GFLOP/s    (CSR).\n",
                                            (end-start)/10, FLOPS*10/(end-start) );

            TESTING_FREE_CPU( row );
            TESTING_FREE_CPU( col );
            free(pntre);
        #endif // MAGMA_WITH_MKL

        // copy matrix to GPU
        magma_c_mtransfer( hA, &dA, Magma_CPU, Magma_DEV, queue );        
        // SpMV on GPU (CSR) -- this is the reference!
        start = magma_sync_wtime( queue );
        for (j=0; j<10; j++)
            magma_c_spmv( c_one, dA, dx, c_zero, dy, queue );
        end = magma_sync_wtime( queue );
        printf( " > MAGMA: %.2e seconds %.2e GFLOP/s    (standard CSR).\n",
                                        (end-start)/10, FLOPS*10/(end-start) );
        magma_c_mfree(&dA, queue );
        magma_c_vtransfer( dy, &hrefvec , Magma_DEV, Magma_CPU, queue );

        // convert to ELL and copy to GPU
        magma_c_mconvert(  hA, &hA_ELL, Magma_CSR, Magma_ELL, queue );
        magma_c_mtransfer( hA_ELL, &dA_ELL, Magma_CPU, Magma_DEV, queue );
        magma_c_mfree(&hA_ELL, queue );
        magma_c_vfree( &dy, queue );
        magma_c_vinit( &dy, Magma_DEV, hA.num_rows, c_zero, queue );
        // SpMV on GPU (ELL)
        start = magma_sync_wtime( queue );
        for (j=0; j<10; j++)
            magma_c_spmv( c_one, dA_ELL, dx, c_zero, dy, queue );
        end = magma_sync_wtime( queue );
        printf( " > MAGMA: %.2e seconds %.2e GFLOP/s    (standard ELL).\n",
                                        (end-start)/10, FLOPS*10/(end-start) );
        magma_c_mfree(&dA_ELL, queue );
        magma_c_vtransfer( dy, &hcheck , Magma_DEV, Magma_CPU, queue );
        res = 0.0;
        for(magma_int_t k=0; k<hA.num_rows; k++ )
            res=res + MAGMA_C_REAL(hcheck.val[k]) - MAGMA_C_REAL(hrefvec.val[k]);
        if ( res < .000001 )
            printf("# tester spmv ELL:  ok\n");
        else
            printf("# tester spmv ELL:  failed\n");
        magma_c_vfree( &hcheck, queue );

        // convert to SELLP and copy to GPU
        magma_c_mconvert(  hA, &hA_SELLP, Magma_CSR, Magma_SELLP, queue );
        magma_c_mtransfer( hA_SELLP, &dA_SELLP, Magma_CPU, Magma_DEV, queue );
        magma_c_mfree(&hA_SELLP, queue );
        magma_c_vfree( &dy, queue );
        magma_c_vinit( &dy, Magma_DEV, hA.num_rows, c_zero, queue );
        // SpMV on GPU (SELLP)
        start = magma_sync_wtime( queue );
        for (j=0; j<10; j++)
            magma_c_spmv( c_one, dA_SELLP, dx, c_zero, dy, queue );
        end = magma_sync_wtime( queue );
        printf( " > MAGMA: %.2e seconds %.2e GFLOP/s    (SELLP).\n",
                                        (end-start)/10, FLOPS*10/(end-start) );

        magma_c_vtransfer( dy, &hcheck , Magma_DEV, Magma_CPU, queue );
        res = 0.0;
        for(magma_int_t k=0; k<hA.num_rows; k++ )
            res=res + MAGMA_C_REAL(hcheck.val[k]) - MAGMA_C_REAL(hrefvec.val[k]);
        printf("# |x-y|_F = %8.2e\n", res);
        if ( res < .000001 )
            printf("# tester spmv SELL-P:  ok\n");
        else
            printf("# tester spmv SELL-P:  failed\n");
        magma_c_vfree( &hcheck, queue );

        magma_c_mfree(&dA_SELLP, queue );


        // SpMV on GPU (CUSPARSE - CSR)
        // CUSPARSE context //

        cusparseHandle_t cusparseHandle = 0;
        cusparseStatus_t cusparseStatus;
        cusparseStatus = cusparseCreate(&cusparseHandle);
        cusparseSetStream( cusparseHandle, queue );

        cusparseMatDescr_t descr = 0;
        cusparseStatus = cusparseCreateMatDescr(&descr);

        cusparseSetMatType(descr,CUSPARSE_MATRIX_TYPE_GENERAL);
        cusparseSetMatIndexBase(descr,CUSPARSE_INDEX_BASE_ZERO);
        magmaFloatComplex alpha = c_one;
        magmaFloatComplex beta = c_zero;
        magma_c_vfree( &dy, queue );
        magma_c_vinit( &dy, Magma_DEV, hA.num_rows, c_zero, queue );

        // copy matrix to GPU
        magma_c_mtransfer( hA, &dA, Magma_CPU, Magma_DEV, queue );

        start = magma_sync_wtime( queue );
        for (j=0; j<10; j++)
            cusparseStatus =
            cusparseCcsrmv(cusparseHandle,CUSPARSE_OPERATION_NON_TRANSPOSE, 
                        hA.num_rows, hA.num_cols, hA.nnz, &alpha, descr, 
                        dA.dval, dA.drow, dA.dcol, dx.dval, &beta, dy.dval);
        end = magma_sync_wtime( queue );
        if (cusparseStatus != 0)    printf("error in cuSPARSE CSR\n");
        printf( " > CUSPARSE: %.2e seconds %.2e GFLOP/s    (CSR).\n",
                                        (end-start)/10, FLOPS*10/(end-start) );
        cusparseMatDescr_t descrA;
        cusparseStatus = cusparseCreateMatDescr(&descrA);
         if (cusparseStatus != 0)    printf("error\n");
        cusparseHybMat_t hybA;
        cusparseStatus = cusparseCreateHybMat( &hybA );
         if (cusparseStatus != 0)    printf("error\n");

        magma_c_vtransfer( dy, &hcheck , Magma_DEV, Magma_CPU, queue );
        res = 0.0;
        for(magma_int_t k=0; k<hA.num_rows; k++ )
            res=res + MAGMA_C_REAL(hcheck.val[k]) - MAGMA_C_REAL(hrefvec.val[k]);
        printf("# |x-y|_F = %8.2e\n", res);
        if ( res < .000001 )
            printf("# tester spmv cuSPARSE CSR:  ok\n");
        else
            printf("# tester spmv cuSPARSE CSR:  failed\n");
        magma_c_vfree( &hcheck, queue );
        magma_c_vfree( &dy, queue );
        magma_c_vinit( &dy, Magma_DEV, hA.num_rows, c_zero, queue );
       
        cusparseCcsr2hyb(cusparseHandle,  hA.num_rows, hA.num_cols,
                        descrA, dA.dval, dA.drow, dA.dcol,
                        hybA, 0, CUSPARSE_HYB_PARTITION_AUTO);

        start = magma_sync_wtime( queue );
        for (j=0; j<10; j++)
            cusparseStatus =
            cusparseChybmv( cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, 
               &alpha, descrA, hybA,
               dx.dval, &beta, dy.dval);
        end = magma_sync_wtime( queue );
        if (cusparseStatus != 0)    printf("error in cuSPARSE HYB\n");
        printf( " > CUSPARSE: %.2e seconds %.2e GFLOP/s    (HYB).\n",
                                        (end-start)/10, FLOPS*10/(end-start) );

        magma_c_vtransfer( dy, &hcheck , Magma_DEV, Magma_CPU, queue );
        res = 0.0;
        for(magma_int_t k=0; k<hA.num_rows; k++ )
            res=res + MAGMA_C_REAL(hcheck.val[k]) - MAGMA_C_REAL(hrefvec.val[k]);
        printf("# |x-y|_F = %8.2e\n", res);
        if ( res < .000001 )
            printf("# tester spmv cuSPARSE HYB:  ok\n");
        else
            printf("# tester spmv cuSPARSE HYB:  failed\n");
        magma_c_vfree( &hcheck, queue );

        cusparseDestroyMatDescr( descrA );
        cusparseDestroyHybMat( hybA );
        cusparseDestroy( cusparseHandle );

        magma_c_mfree(&dA, queue );



        printf("\n\n");


        // free CPU memory
        magma_c_mfree(&hA, queue );
        magma_c_vfree(&hx, queue );
        magma_c_vfree(&hy, queue );
        magma_c_vfree(&hrefvec, queue );
        // free GPU memory
        magma_c_vfree(&dx, queue );
        magma_c_vfree(&dy, queue );

        i++;

    }
    
    magma_queue_destroy( queue );
    TESTING_FINALIZE();
    return 0;
}