예제 #1
0
extern "C" magma_int_t
magma_zmdiagadd(
    magma_z_sparse_matrix *A, 
    magmaDoubleComplex add,
    magma_queue_t queue )
{
    if ( A->memory_location == Magma_CPU && A->storage_type == Magma_CSRCOO ) {
        for( magma_int_t z=0; z<A->nnz; z++ ) {
            if ( A->col[z]== A->rowidx[z] ) {
                // add some identity matrix
                A->val[z] = A->val[z] +  add;
            }
        }
        return MAGMA_SUCCESS; 
    }
    else {

        magma_z_sparse_matrix hA, CSRA;
        magma_storage_t A_storage = A->storage_type;
        magma_location_t A_location = A->memory_location;
        magma_z_mtransfer( *A, &hA, A->memory_location, Magma_CPU, queue );
        magma_z_mconvert( hA, &CSRA, hA.storage_type, Magma_CSRCOO, queue );

        magma_zmdiagadd( &CSRA, add, queue );

        magma_z_mfree( &hA, queue );
        magma_z_mfree( A, queue );
        magma_z_mconvert( CSRA, &hA, Magma_CSRCOO, A_storage, queue );
        magma_z_mtransfer( hA, A, Magma_CPU, A_location, queue );
        magma_z_mfree( &hA, queue );
        magma_z_mfree( &CSRA, queue );    

        return MAGMA_SUCCESS; 
    }
}
예제 #2
0
magma_int_t
magma_zbaiter( magma_z_sparse_matrix A, magma_z_vector b, magma_z_vector *x,  
           magma_z_solver_par *solver_par )
{
    // prepare solver feedback
    solver_par->solver = Magma_BAITER;
    solver_par->info = 0;

    magma_z_sparse_matrix A_d, D, R, D_d, R_d;
    magma_z_mtransfer( A, &A_d, Magma_CPU, Magma_DEV );

    // initial residual
    real_Double_t tempo1, tempo2;
    double residual;
    magma_zresidual( A_d, b, *x, &residual );
    solver_par->init_res = residual;
    solver_par->res_vec = NULL;
    solver_par->timing = NULL;

    // setup
    magma_zcsrsplit( 256, A, &D, &R );
    magma_z_mtransfer( D, &D_d, Magma_CPU, Magma_DEV );
    magma_z_mtransfer( R, &R_d, Magma_CPU, Magma_DEV );

    magma_int_t localiter = 1;

    magma_device_sync(); tempo1=magma_wtime();

    // block-asynchronous iteration iterator
    for( int iter=0; iter<solver_par->maxiter; iter++)
        magma_zbajac_csr( localiter, D_d, R_d, b, x );

    magma_device_sync(); tempo2=magma_wtime();
    solver_par->runtime = (real_Double_t) tempo2-tempo1;
    magma_zresidual( A_d, b, *x, &residual );
    solver_par->final_res = residual;
    solver_par->numiter = solver_par->maxiter;

    if( solver_par->init_res > solver_par->final_res )
        solver_par->info = 0;
    else
        solver_par->info = -1;

    magma_z_mfree(&D);
    magma_z_mfree(&R);
    magma_z_mfree(&D_d);
    magma_z_mfree(&R_d);
    magma_z_mfree(&A_d);

    return MAGMA_SUCCESS;
}   /* magma_zbaiter */
예제 #3
0
extern "C" magma_int_t
magma_zjacobisetup_diagscal(
    magma_z_sparse_matrix A, magma_z_vector *d,
    magma_queue_t queue )
{
    magma_int_t i;

    magma_z_sparse_matrix A_h1, B;
    magma_z_vector diag;
    magma_z_vinit( &diag, Magma_CPU, A.num_rows, MAGMA_Z_ZERO, queue );

    if ( A.storage_type != Magma_CSR) {
        magma_z_mtransfer( A, &A_h1, A.memory_location, Magma_CPU, queue );
        magma_z_mconvert( A_h1, &B, A_h1.storage_type, Magma_CSR, queue );
    }
    else {
        magma_z_mtransfer( A, &B, A.memory_location, Magma_CPU, queue );
    }
    for( magma_int_t rowindex=0; rowindex<B.num_rows; rowindex++ ) {
        magma_int_t start = (B.drow[rowindex]);
        magma_int_t end = (B.drow[rowindex+1]);
        for( i=start; i<end; i++ ) {
            if ( B.dcol[i]==rowindex ) {
                diag.val[rowindex] = 1.0/B.val[i];
                if ( MAGMA_Z_REAL( diag.val[rowindex]) == 0 )
                    printf(" error: zero diagonal element in row %d!\n", 
                                                                (int) rowindex);
            }
        }
    }
    magma_z_vtransfer( diag, d, Magma_CPU, A.memory_location, queue );

    if ( A.storage_type != Magma_CSR) {
        magma_z_mfree( &A_h1, queue );
    }
    magma_z_mfree( &B, queue );
    magma_z_vfree( &diag, queue );
 
    return MAGMA_SUCCESS;
}
예제 #4
0
extern "C" magma_int_t
magma_zjacobi(
    magma_z_sparse_matrix A, 
    magma_z_vector b, 
    magma_z_vector *x,  
    magma_z_solver_par *solver_par,
    magma_queue_t queue )
{
    // set queue for old dense routines
    magma_queue_t orig_queue;
    magmablasGetKernelStream( &orig_queue );

    // prepare solver feedback
    solver_par->solver = Magma_JACOBI;
    solver_par->info = MAGMA_SUCCESS;

    real_Double_t tempo1, tempo2;
    double residual;
    magma_zresidual( A, b, *x, &residual, queue );
    solver_par->init_res = residual;
    solver_par->res_vec = NULL;
    solver_par->timing = NULL;

    // some useful variables
    magmaDoubleComplex c_zero = MAGMA_Z_ZERO, c_one = MAGMA_Z_ONE, 
                                                c_mone = MAGMA_Z_NEG_ONE;
    magma_int_t dofs = A.num_rows;
    double nom0;


    magma_z_sparse_matrix M;
    magma_z_vector c, r;
    magma_z_vinit( &r, Magma_DEV, dofs, c_zero, queue );
    magma_z_spmv( c_one, A, *x, c_zero, r, queue );                  // r = A x
    magma_zaxpy(dofs,  c_mone, b.dval, 1, r.dval, 1);           // r = r - b
    nom0 = magma_dznrm2(dofs, r.dval, 1);                      // den = || r ||

    // Jacobi setup
    magma_zjacobisetup( A, b, &M, &c, queue );
    magma_z_solver_par jacobiiter_par;
    jacobiiter_par.maxiter = solver_par->maxiter;

    tempo1 = magma_sync_wtime( queue );

    // Jacobi iterator
    magma_zjacobiiter( M, c, x, &jacobiiter_par, queue ); 

    tempo2 = magma_sync_wtime( queue );
    solver_par->runtime = (real_Double_t) tempo2-tempo1;
    magma_zresidual( A, b, *x, &residual, queue );
    solver_par->final_res = residual;
    solver_par->numiter = solver_par->maxiter;

    if ( solver_par->init_res > solver_par->final_res )
        solver_par->info = MAGMA_SUCCESS;
    else
        solver_par->info = MAGMA_DIVERGENCE;

    magma_z_mfree( &M, queue );
    magma_z_vfree( &c, queue );
    magma_z_vfree( &r, queue );

    magmablasSetKernelStream( orig_queue );
    return MAGMA_SUCCESS;
}   /* magma_zjacobi */
예제 #5
0
extern "C" magma_int_t
magma_zjacobisetup(
    magma_z_sparse_matrix A, magma_z_vector b, 
    magma_z_sparse_matrix *M, magma_z_vector *c,
    magma_queue_t queue )
{
    magma_int_t i;

    magma_z_sparse_matrix A_h1, A_h2, B, C;
    magma_z_vector diag, c_t, b_h;
    magma_z_vinit( &c_t, Magma_CPU, A.num_rows, MAGMA_Z_ZERO, queue );
    magma_z_vinit( &diag, Magma_CPU, A.num_rows, MAGMA_Z_ZERO, queue );
    magma_z_vtransfer( b, &b_h, A.memory_location, Magma_CPU, queue );

    if ( A.storage_type != Magma_CSR ) {
        magma_z_mtransfer( A, &A_h1, A.memory_location, Magma_CPU, queue );
        magma_z_mconvert( A_h1, &B, A_h1.storage_type, Magma_CSR, queue );
    }
    else {
        magma_z_mtransfer( A, &B, A.memory_location, Magma_CPU, queue );
    }
    for( magma_int_t rowindex=0; rowindex<B.num_rows; rowindex++ ) {
        magma_int_t start = (B.drow[rowindex]);
        magma_int_t end = (B.drow[rowindex+1]);
        for( i=start; i<end; i++ ) {
            if ( B.dcol[i]==rowindex ) {
                diag.val[rowindex] = B.val[i];
                if ( MAGMA_Z_REAL( diag.val[rowindex]) == 0 )
                    printf(" error: zero diagonal element in row %d!\n", 
                                                               (int) rowindex);
            }
        }
        for( i=start; i<end; i++ ) {
            B.val[i] = B.val[i] / diag.val[rowindex];
            if ( B.dcol[i]==rowindex ) {
                B.val[i] = MAGMA_Z_MAKE( 0., 0. );
            }
        }
        c_t.val[rowindex] = b_h.val[rowindex] / diag.val[rowindex];

    }

    magma_z_csr_compressor(&B.val, &B.drow, &B.dcol, 
                           &C.val, &C.drow, &C.dcol, &B.num_rows, queue );  

    C.num_rows = B.num_rows;
    C.num_cols = B.num_cols;
    C.memory_location = B.memory_location;
    C.nnz = C.drow[B.num_rows];
    C.storage_type = B.storage_type;
    C.memory_location = B.memory_location;
    if ( A.storage_type != Magma_CSR) {
        A_h2.alignment = A.alignment;
        A_h2.blocksize = A.blocksize;
        magma_z_mconvert( C, &A_h2, Magma_CSR, A_h1.storage_type, queue );
        magma_z_mtransfer( A_h2, M, Magma_CPU, A.memory_location, queue );
    }
    else {
        magma_z_mtransfer( C, M, Magma_CPU, A.memory_location, queue );
    }     
    magma_z_vtransfer( c_t, c, Magma_CPU, A.memory_location, queue );

    if ( A.storage_type != Magma_CSR) {
        magma_z_mfree( &A_h1, queue );
        magma_z_mfree( &A_h2, queue );   
    }   
    magma_z_mfree( &B, queue );
    magma_z_mfree( &C, queue );  
    magma_z_vfree( &diag, queue );
    magma_z_vfree( &c_t, queue );
    magma_z_vfree( &b_h, queue );

    return MAGMA_SUCCESS;
}
예제 #6
0
/* ////////////////////////////////////////////////////////////////////////////
   -- running magma_zbaiter
*/
int main( int argc, char** argv)
{
    TESTING_INIT();

    magma_z_solver_par solver_par;
    magma_z_preconditioner precond_par;
    solver_par.maxiter = 1000;
    solver_par.verbose = 0;
    solver_par.num_eigenvalues = 0;
    int scale = 0;
    magma_scale_t scaling = Magma_NOSCALE;
    
    magma_z_sparse_matrix A;
    magma_z_vector x, b;

    magmaDoubleComplex one = MAGMA_Z_MAKE(1.0, 0.0);
    magmaDoubleComplex zero = MAGMA_Z_MAKE(0.0, 0.0);

    int i;
    for( i = 1; i < argc; ++i ) {
        if ( strcmp("--maxiter", argv[i]) == 0 ){
            solver_par.maxiter = atoi( argv[++i] );
        }else if ( strcmp("--mscale", argv[i]) == 0 ) {
            scale = atoi( argv[++i] );
            switch( scale ) {
                case 0: scaling = Magma_NOSCALE; break;
                case 1: scaling = Magma_UNITDIAG; break;
                case 2: scaling = Magma_UNITROW; break;
            }

        }else
            break;
    }
    printf( "\n#    usage: ./run_zbaiter"
        " [ "
        " --mscale %d (0=no, 1=unitdiag, 2=unitrownrm)"
        " --maxiter %d ]"
        " matrices \n\n",
        (int) scale,
        (int) solver_par.maxiter);

    magma_zsolverinfo_init( &solver_par, &precond_par );

    while(  i < argc ){

        magma_z_csr_mtx( &A,  argv[i]  ); 

        printf( "\n# matrix info: %d-by-%d with %d nonzeros\n\n",
                            (int) A.num_rows,(int) A.num_cols,(int) A.nnz );

        // scale initial guess
        magma_zmscale( &A, scaling );

        magma_z_vinit( &b, Magma_DEV, A.num_cols, one );
        magma_z_vinit( &x, Magma_DEV, A.num_cols, zero );

        magma_zbaiter( A, b, &x, &solver_par );

        magma_zsolverinfo( &solver_par, &precond_par );


        magma_z_mfree(&A); 
        magma_z_vfree(&x);
        magma_z_vfree(&b);
        
        i++;
    }

    magma_zsolverinfo_free( &solver_par, &precond_par );

    TESTING_FINALIZE();

    return 0;
}
예제 #7
0
/* ////////////////////////////////////////////////////////////////////////////
   -- testing csr matrix add
*/
int main(  int argc, char** argv )
{
    TESTING_INIT();
    
    magma_queue_t queue;
    magma_queue_create( /*devices[ opts->device ],*/ &queue );

    real_Double_t res;
    magma_z_sparse_matrix A, B, B2, C, A_d, B_d, C_d;

    magmaDoubleComplex one = MAGMA_Z_MAKE(1.0, 0.0);
    magmaDoubleComplex zero = MAGMA_Z_MAKE(0.0, 0.0);
    magmaDoubleComplex mone = MAGMA_Z_MAKE(-1.0, 0.0);

    magma_int_t i=1;

    if ( strcmp("LAPLACE2D", argv[i]) == 0 && i+1 < argc ) {   // Laplace test
        i++;
        magma_int_t laplace_size = atoi( argv[i] );
        magma_zm_5stencil(  laplace_size, &A, queue );
    } else {                        // file-matrix test
        magma_z_csr_mtx( &A,  argv[i], queue );
    }
    printf( "# matrix info: %d-by-%d with %d nonzeros\n",
                        (int) A.num_rows,(int) A.num_cols,(int) A.nnz );
    i++;

    if ( strcmp("LAPLACE2D", argv[i]) == 0 && i+1 < argc ) {   // Laplace test
        i++;
        magma_int_t laplace_size = atoi( argv[i] );
        magma_zm_5stencil(  laplace_size, &B, queue );
    } else {                        // file-matrix test
        magma_z_csr_mtx( &B,  argv[i], queue );
    }
    printf( "# matrix info: %d-by-%d with %d nonzeros\n",
                        (int) B.num_rows,(int) B.num_cols,(int) B.nnz );


    magma_z_mtransfer( A, &A_d, Magma_CPU, Magma_DEV, queue );
    magma_z_mtransfer( B, &B_d, Magma_CPU, Magma_DEV, queue );

    magma_zcuspaxpy( &one, A_d, &one, B_d, &C_d, queue );

    magma_z_mfree(&B_d, queue );

    magma_zcuspaxpy( &mone, A_d, &one, C_d, &B_d, queue );
    
    magma_z_mtransfer( B_d, &B2, Magma_DEV, Magma_CPU, queue );

    magma_z_mfree(&A_d, queue );
    magma_z_mfree(&B_d, queue );
    magma_z_mfree(&C_d, queue );

    // check difference
    magma_zmdiff( B, B2, &res, queue );
    printf("# ||A-B||_F = %8.2e\n", res);
    if ( res < .000001 )
        printf("# tester matrix add:  ok\n");
    else
        printf("# tester matrix add:  failed\n");

    magma_z_mfree(&A, queue ); 
    magma_z_mfree(&B, queue ); 
    magma_z_mfree(&B2, queue ); 

    magma_queue_destroy( queue );
    TESTING_FINALIZE();
    return 0;
}
예제 #8
0
/* ////////////////////////////////////////////////////////////////////////////
   -- running magma_zgmres
*/
int main( int argc, char** argv)
{
    TESTING_INIT();

    magma_z_solver_par solver_par;
    magma_z_preconditioner precond_par;
    solver_par.epsilon = 10e-16;
    solver_par.maxiter = 1000;
    solver_par.restart = 30;
    solver_par.num_eigenvalues = 0;
    solver_par.ortho = Magma_CGS;
    solver_par.verbose = 0;
    int format = 0;
    int ortho = 0;
    int scale = 0;
    magma_scale_t scaling = Magma_NOSCALE;

    magma_z_sparse_matrix A, B, B_d;
    magma_z_vector x, b;
    B.blocksize = 8;
    B.alignment = 8;

    magmaDoubleComplex one = MAGMA_Z_MAKE(1.0, 0.0);
    magmaDoubleComplex zero = MAGMA_Z_MAKE(0.0, 0.0);

    B.storage_type = Magma_CSR;
    int i;
    for( i = 1; i < argc; ++i ) {
     if ( strcmp("--format", argv[i]) == 0 ) {
            format = atoi( argv[++i] );
            switch( format ) {
                case 0: B.storage_type = Magma_CSR; break;
                case 1: B.storage_type = Magma_ELL; break;
                case 2: B.storage_type = Magma_ELLRT; break;
                case 3: B.storage_type = Magma_SELLP; break;
            }
        }else if ( strcmp("--mscale", argv[i]) == 0 ) {
            scale = atoi( argv[++i] );
            switch( scale ) {
                case 0: scaling = Magma_NOSCALE; break;
                case 1: scaling = Magma_UNITDIAG; break;
                case 2: scaling = Magma_UNITROW; break;
            }

        }else if ( strcmp("--blocksize", argv[i]) == 0 ) {
            B.blocksize = atoi( argv[++i] );
        }else if ( strcmp("--alignment", argv[i]) == 0 ) {
            B.alignment = atoi( argv[++i] );
        }else if ( strcmp("--verbose", argv[i]) == 0 ) {
            solver_par.verbose = atoi( argv[++i] );
        } else if ( strcmp("--ortho", argv[i]) == 0 ) {
            ortho = atoi( argv[++i] );
            switch( ortho ) {
                case 0: solver_par.ortho = Magma_CGS; break;
                case 1: solver_par.ortho = Magma_MGS; break;
                case 2: solver_par.ortho = Magma_FUSED_CGS; break;
            }
        } else if ( strcmp("--restart", argv[i]) == 0 ) {
            solver_par.restart = atoi( argv[++i] );
        } else if ( strcmp("--maxiter", argv[i]) == 0 ) {
            solver_par.maxiter = atoi( argv[++i] );
        } else if ( strcmp("--tol", argv[i]) == 0 ) {
            sscanf( argv[++i], "%lf", &solver_par.epsilon );
        } else
            break;
    }
    printf( "\n#    usage: ./run_zgmres"
        " [ --format %d (0=CSR, 1=ELL 2=ELLRT, 3=SELLP)"
        " [ --blocksize %d --alignment %d ]"
        " --mscale %d (0=no, 1=unitdiag, 2=unitrownrm)"
        " --verbose %d (0=summary, k=details every k iterations)"
        " --restart %d --maxiter %d --tol %.2e"
        " --ortho %d (0=CGS, 1=MGS, 2=FUSED_CGS) ]"
        " matrices \n\n", format, (int) B.blocksize, (int) B.alignment,
        (int) scale,
        (int) solver_par.verbose, 
        (int) solver_par.restart, (int) solver_par.maxiter, 
                                    solver_par.epsilon, ortho );

    magma_zsolverinfo_init( &solver_par, &precond_par );

    while(  i < argc ){

        magma_z_csr_mtx( &A,  argv[i]  ); 

        printf( "\n# matrix info: %d-by-%d with %d nonzeros\n\n",
                            (int) A.num_rows,(int) A.num_cols,(int) A.nnz );

        // scale matrix
        magma_zmscale( &A, scaling );

        magma_z_mconvert( A, &B, Magma_CSR, B.storage_type );
        magma_z_mtransfer( B, &B_d, Magma_CPU, Magma_DEV );

        // vectors and initial guess
        magma_z_vinit( &b, Magma_DEV, A.num_cols, one );
        magma_z_vinit( &x, Magma_DEV, A.num_cols, one );
        magma_z_spmv( one, B_d, x, zero, b );                 //  b = A x
        magma_z_vfree(&x);
        magma_z_vinit( &x, Magma_DEV, A.num_cols, zero );

        magma_zgmres( B_d, b, &x, &solver_par );

        magma_zsolverinfo( &solver_par, &precond_par );

        magma_z_mfree(&B_d);
        magma_z_mfree(&B);
        magma_z_mfree(&A); 
        magma_z_vfree(&x);
        magma_z_vfree(&b);

        i++;
    }
        
    magma_zsolverinfo_free( &solver_par, &precond_par );

    TESTING_FINALIZE();
    return 0;
}
예제 #9
0
/* ////////////////////////////////////////////////////////////////////////////
   -- testing any solver 
*/
int main(  int argc, char** argv )
{
    TESTING_INIT();

    magma_zopts zopts;
    magma_queue_t queue;
    magma_queue_create( /*devices[ opts->device ],*/ &queue );
    
    int i=1;
    magma_zparse_opts( argc, argv, &zopts, &i, queue );


    real_Double_t res;
    magma_z_sparse_matrix A, A2, A3, A4, A5;

    while(  i < argc ) {

        if ( strcmp("LAPLACE2D", argv[i]) == 0 && i+1 < argc ) {   // Laplace test
            i++;
            magma_int_t laplace_size = atoi( argv[i] );
            magma_zm_5stencil(  laplace_size, &A, queue );
        } else {                        // file-matrix test
            magma_z_csr_mtx( &A,  argv[i], queue );
        }

        printf( "# matrix info: %d-by-%d with %d nonzeros\n",
                            (int) A.num_rows,(int) A.num_cols,(int) A.nnz );

        // filename for temporary matrix storage
        const char *filename = "testmatrix.mtx";

        // write to file
        write_z_csrtomtx( A, filename, queue );

        // read from file
        magma_z_csr_mtx( &A2, filename, queue );

        // delete temporary matrix
        unlink( filename );
                
        //visualize
        printf("A2:\n");
        magma_z_mvisu( A2, queue );
        
        //visualize
        magma_z_mconvert(A2, &A4, Magma_CSR, Magma_CSRL, queue );
        printf("A4:\n");
        magma_z_mvisu( A4, queue );
        magma_z_mconvert(A4, &A5, Magma_CSR, Magma_ELL, queue );
        printf("A5:\n");
        magma_z_mvisu( A5, queue );

        // pass it to another application and back
        magma_int_t m, n;
        magma_index_t *row, *col;
        magmaDoubleComplex *val;
        magma_zcsrget( A2, &m, &n, &row, &col, &val, queue );
        magma_zcsrset( m, n, row, col, val, &A3, queue );

        magma_zmdiff( A, A2, &res, queue );
        printf("# ||A-B||_F = %8.2e\n", res);
        if ( res < .000001 )
            printf("# tester IO:  ok\n");
        else
            printf("# tester IO:  failed\n");

        magma_zmdiff( A, A3, &res, queue );
        printf("# ||A-B||_F = %8.2e\n", res);
        if ( res < .000001 )
            printf("# tester matrix interface:  ok\n");
        else
            printf("# tester matrix interface:  failed\n");

        magma_z_mfree(&A, queue ); 
        magma_z_mfree(&A2, queue ); 
        magma_z_mfree(&A4, queue ); 
        magma_z_mfree(&A5, queue ); 


        i++;
    }
    
    magma_queue_destroy( queue );
    TESTING_FINALIZE();
    return 0;
}
예제 #10
0
extern "C" magma_int_t
magma_z_cucsrtranspose(
    magma_z_sparse_matrix A, 
    magma_z_sparse_matrix *B,
    magma_queue_t queue )
{
    // for symmetric matrices: convert to csc using cusparse

    if( A.storage_type == Magma_CSR && A.memory_location == Magma_DEV ) {
                  
         magma_z_sparse_matrix C;
         magma_z_mtransfer( A, &C, Magma_DEV, Magma_DEV, queue );
        // CUSPARSE context //
        cusparseHandle_t handle;
        cusparseStatus_t cusparseStatus;
        cusparseStatus = cusparseCreate(&handle);
        cusparseSetStream( handle, queue );
         if (cusparseStatus != 0)    printf("error in Handle.\n");


        cusparseMatDescr_t descrA;
        cusparseMatDescr_t descrB;
        cusparseStatus = cusparseCreateMatDescr(&descrA);
        cusparseStatus = cusparseCreateMatDescr(&descrB);
         if (cusparseStatus != 0)    printf("error in MatrDescr.\n");

        cusparseStatus =
        cusparseSetMatType(descrA,CUSPARSE_MATRIX_TYPE_GENERAL);
        cusparseSetMatType(descrB,CUSPARSE_MATRIX_TYPE_GENERAL);
         if (cusparseStatus != 0)    printf("error in MatrType.\n");

        cusparseStatus =
        cusparseSetMatIndexBase(descrA,CUSPARSE_INDEX_BASE_ZERO);
        cusparseSetMatIndexBase(descrB,CUSPARSE_INDEX_BASE_ZERO);
         if (cusparseStatus != 0)    printf("error in IndexBase.\n");

        cusparseStatus = 
        cusparseZcsr2csc( handle, A.num_rows, A.num_rows, A.nnz,
                         A.dval, A.drow, A.dcol, C.dval, C.dcol, C.drow,
                         CUSPARSE_ACTION_NUMERIC, 
                         CUSPARSE_INDEX_BASE_ZERO);
         if (cusparseStatus != 0)    
                printf("error in transpose: %d.\n", cusparseStatus);

        cusparseDestroyMatDescr( descrA );
        cusparseDestroyMatDescr( descrB );
        cusparseDestroy( handle );
        
        magma_z_mtransfer( C, B, Magma_DEV, Magma_DEV, queue );   
        
        if( A.fill_mode == Magma_FULL ){
             B->fill_mode = Magma_FULL;
        }
        else if( A.fill_mode == Magma_LOWER ){
             B->fill_mode = Magma_UPPER;
        }
        else if ( A.fill_mode == Magma_UPPER ){
             B->fill_mode = Magma_LOWER;
        }

        // end CUSPARSE context //

        return MAGMA_SUCCESS;
        
    }else if( A.storage_type == Magma_CSR && A.memory_location == Magma_CPU ){
               
        magma_z_sparse_matrix A_d, B_d;

        magma_z_mtransfer( A, &A_d, A.memory_location, Magma_DEV, queue );
        magma_z_cucsrtranspose( A_d, &B_d, queue );
        magma_z_mtransfer( B_d, B, Magma_DEV, A.memory_location, queue );
        
        magma_z_mfree( &A_d, queue );
        magma_z_mfree( &B_d, queue );
        
        return MAGMA_SUCCESS;
                
    }else {

        magma_z_sparse_matrix ACSR, BCSR;
        
        magma_z_mconvert( A, &ACSR, A.storage_type, Magma_CSR, queue );
        magma_z_cucsrtranspose( ACSR, &BCSR, queue );
        magma_z_mconvert( BCSR, B, Magma_CSR, A.storage_type, queue );
       
        magma_z_mfree( &ACSR, queue );
        magma_z_mfree( &BCSR, queue );

        return MAGMA_SUCCESS;
    }
}
예제 #11
0
magma_int_t
magma_zailusetup( magma_z_sparse_matrix A, magma_z_preconditioner *precond ){

    magma_z_sparse_matrix hAh, hA, hAL, hALCOO, hAU, hAUT, hAUCOO, dAL, dAU, 
                                        hL, hU, dL, dU, DL, RL, DU, RU;

    // copy original matrix as CSRCOO to device
    magma_z_mtransfer(A, &hAh, A.memory_location, Magma_CPU);
    magma_z_mconvert( hAh, &hA, hAh.storage_type, Magma_CSR );
    magma_z_mfree(&hAh);

    // in case using fill-in
    magma_zilustruct( &hA, precond->levels);

    // need only lower triangular
    hAL.diagorder_type == Magma_UNITY;
    magma_z_mconvert( hA, &hAL, Magma_CSR, Magma_CSRL );
    magma_z_mconvert( hAL, &hALCOO, Magma_CSR, Magma_CSRCOO );
    magma_z_mtransfer( hALCOO, &dAL, Magma_CPU, Magma_DEV );
    magma_z_mtransfer( hALCOO, &dAU, Magma_CPU, Magma_DEV );

    // need only upper triangular
    magma_z_mconvert( hA, &hAU, Magma_CSR, Magma_CSRU );
    magma_z_cucsrtranspose(  hAU, &hAUT );
    magma_z_mconvert( hAUT, &hAUCOO, Magma_CSR, Magma_CSRCOO );
    magma_z_mtransfer( hAUCOO, &dL, Magma_CPU, Magma_DEV );
    magma_z_mtransfer( hAUCOO, &dU, Magma_CPU, Magma_DEV );

    magma_z_mfree(&hALCOO);
    magma_z_mfree(&hAL);
    magma_z_mfree(&hAUCOO);
    magma_z_mfree(&hAUT);
    magma_z_mfree(&hAU);

    for(int i=0; i<precond->sweeps; i++){
        magma_zailu_csr_s( dAL, dAU, dL, dU );

    }

    magma_z_mtransfer( dL, &hL, Magma_DEV, Magma_CPU );
    magma_z_mtransfer( dU, &hU, Magma_DEV, Magma_CPU );

    magma_z_LUmergein( hL, hU, &hA);

    magma_z_mtransfer( hA, &precond->M, Magma_CPU, Magma_DEV );

    magma_z_mfree(&dL);
    magma_z_mfree(&dU);
    magma_z_mfree(&dAL);
    magma_z_mfree(&dAU);

    hAL.diagorder_type = Magma_UNITY;
    magma_z_mconvert(hA, &hAL, Magma_CSR, Magma_CSRL);
    hAL.storage_type = Magma_CSR;
    magma_z_mconvert(hA, &hAU, Magma_CSR, Magma_CSRU);
    hAU.storage_type = Magma_CSR;
    magma_z_mfree(&hA);

    magma_z_mfree(&hL);
    magma_z_mfree(&hU);

    magma_zcsrsplit( 256, hAL, &DL, &RL );
    magma_zcsrsplit( 256, hAU, &DU, &RU );

    magma_z_mtransfer( DL, &precond->LD, Magma_CPU, Magma_DEV );
    magma_z_mtransfer( DU, &precond->UD, Magma_CPU, Magma_DEV );

    // for cusparse uncomment this
    magma_z_mtransfer( hAL, &precond->L, Magma_CPU, Magma_DEV );
    magma_z_mtransfer( hAU, &precond->U, Magma_CPU, Magma_DEV );

    // for ba-solve uncomment this
/*
    if( RL.nnz != 0 )
        magma_z_mtransfer( RL, &precond->L, Magma_CPU, Magma_DEV );
    else{ 
        precond->L.nnz = 0;
        precond->L.val = NULL;
        precond->L.col = NULL;
        precond->L.row = NULL;
        precond->L.blockinfo = NULL;
    }

    if( RU.nnz != 0 )
        magma_z_mtransfer( RU, &precond->U, Magma_CPU, Magma_DEV );
    else{ 
        precond->U.nnz = 0;
        precond->L.val = NULL;
        precond->L.col = NULL;
        precond->L.row = NULL;
        precond->L.blockinfo = NULL;
    }
*/
    magma_z_mfree(&hAL);
    magma_z_mfree(&hAU);
    magma_z_mfree(&DL);
    magma_z_mfree(&RL);
    magma_z_mfree(&DU);
    magma_z_mfree(&RU);

    // CUSPARSE context //
    cusparseHandle_t cusparseHandle;
    cusparseStatus_t cusparseStatus;

    cusparseStatus = cusparseCreate(&cusparseHandle);
     if(cusparseStatus != 0)    printf("error in Handle.\n");

    cusparseMatDescr_t descrL;
    cusparseStatus = cusparseCreateMatDescr(&descrL);
     if(cusparseStatus != 0)    printf("error in MatrDescr.\n");

    cusparseStatus =
    cusparseSetMatType(descrL,CUSPARSE_MATRIX_TYPE_TRIANGULAR);
     if(cusparseStatus != 0)    printf("error in MatrType.\n");

    cusparseStatus =
    cusparseSetMatDiagType (descrL, CUSPARSE_DIAG_TYPE_UNIT);
     if(cusparseStatus != 0)    printf("error in DiagType.\n");

    cusparseStatus =
    cusparseSetMatIndexBase(descrL,CUSPARSE_INDEX_BASE_ZERO);
     if(cusparseStatus != 0)    printf("error in IndexBase.\n");

    cusparseStatus =
    cusparseSetMatFillMode(descrL,CUSPARSE_FILL_MODE_LOWER);
     if(cusparseStatus != 0)    printf("error in fillmode.\n");


    cusparseStatus = cusparseCreateSolveAnalysisInfo(&precond->cuinfoL); 
     if(cusparseStatus != 0)    printf("error in info.\n");

    cusparseStatus =
    cusparseZcsrsv_analysis(cusparseHandle, 
        CUSPARSE_OPERATION_NON_TRANSPOSE, precond->L.num_rows, 
        precond->L.nnz, descrL, 
        precond->L.val, precond->L.row, precond->L.col, precond->cuinfoL );
     if(cusparseStatus != 0)    printf("error in analysis.\n");

    cusparseDestroyMatDescr( descrL );

    cusparseMatDescr_t descrU;
    cusparseStatus = cusparseCreateMatDescr(&descrU);
     if(cusparseStatus != 0)    printf("error in MatrDescr.\n");

    cusparseStatus =
    cusparseSetMatType(descrU,CUSPARSE_MATRIX_TYPE_TRIANGULAR);
     if(cusparseStatus != 0)    printf("error in MatrType.\n");

    cusparseStatus =
    cusparseSetMatDiagType (descrU, CUSPARSE_DIAG_TYPE_NON_UNIT);
     if(cusparseStatus != 0)    printf("error in DiagType.\n");

    cusparseStatus =
    cusparseSetMatIndexBase(descrU,CUSPARSE_INDEX_BASE_ZERO);
     if(cusparseStatus != 0)    printf("error in IndexBase.\n");

    cusparseStatus =
    cusparseSetMatFillMode(descrU,CUSPARSE_FILL_MODE_UPPER);
     if(cusparseStatus != 0)    printf("error in fillmode.\n");

    cusparseStatus = cusparseCreateSolveAnalysisInfo(&precond->cuinfoU); 
     if(cusparseStatus != 0)    printf("error in info.\n");

    cusparseStatus =
    cusparseZcsrsv_analysis(cusparseHandle, 
        CUSPARSE_OPERATION_NON_TRANSPOSE, precond->U.num_rows, 
        precond->U.nnz, descrU, 
        precond->U.val, precond->U.row, precond->U.col, precond->cuinfoU );
     if(cusparseStatus != 0)    printf("error in analysis.\n");

    cusparseDestroyMatDescr( descrU );
    cusparseDestroy( cusparseHandle );

    return MAGMA_SUCCESS;

}
예제 #12
0
magma_int_t
magma_zaiccsetup( magma_z_sparse_matrix A, magma_z_preconditioner *precond ){


    magma_z_sparse_matrix hAh, hA, hAL, hALCOO, dAL, hL, dL, DL, RL;



    // copy original matrix as CSRCOO to device
    magma_z_mtransfer(A, &hAh, A.memory_location, Magma_CPU);
    magma_z_mconvert( hAh, &hA, hAh.storage_type, Magma_CSR );
    magma_z_mfree(&hAh);

    // in case using fill-in
    magma_zilustruct( &hA, precond->levels);

    magma_z_mconvert( hA, &hAL, Magma_CSR, Magma_CSRL );
    magma_z_mconvert( hAL, &hALCOO, Magma_CSR, Magma_CSRCOO );

    magma_z_mtransfer( hALCOO, &dAL, Magma_CPU, Magma_DEV );
    magma_z_mtransfer( hALCOO, &dL, Magma_CPU, Magma_DEV );
    magma_z_mfree(&hALCOO);
    magma_z_mfree(&hAL);
    magma_z_mfree(&hA);

    for(int i=0; i<precond->sweeps; i++){
        magma_zaic_csr_s( dAL, dL );

    }
    magma_z_mtransfer( dL, &hL, Magma_DEV, Magma_CPU );

    magma_z_mfree(&dL);
    magma_z_mfree(&dAL);

    magma_z_mconvert(hL, &hAL, hL.storage_type, Magma_CSR);

    // for CUSPARSE
    magma_z_mtransfer( hAL, &precond->M, Magma_CPU, Magma_DEV );

    magma_zcsrsplit( 256, hAL, &DL, &RL );

    magma_z_mtransfer( DL, &precond->LD, Magma_CPU, Magma_DEV );
    magma_z_mtransfer( RL, &precond->L, Magma_CPU, Magma_DEV );

    magma_z_mfree(&hL);

    magma_z_cucsrtranspose(   hAL, &hL );

    magma_zcsrsplit( 256, hL, &DL, &RL );

    magma_z_mtransfer( DL, &precond->UD, Magma_CPU, Magma_DEV );
    magma_z_mtransfer( RL, &precond->U, Magma_CPU, Magma_DEV );

    magma_z_mfree(&hAL);
    magma_z_mfree(&hL);

    magma_z_mfree(&DL);
    magma_z_mfree(&RL);


    // CUSPARSE context //
    cusparseHandle_t cusparseHandle;
    cusparseStatus_t cusparseStatus;
    cusparseStatus = cusparseCreate(&cusparseHandle);
     if(cusparseStatus != 0)    printf("error in Handle.\n");

    cusparseMatDescr_t descrL;
    cusparseStatus = cusparseCreateMatDescr(&descrL);
     if(cusparseStatus != 0)    printf("error in MatrDescr.\n");

    cusparseStatus =
    cusparseSetMatType(descrL,CUSPARSE_MATRIX_TYPE_TRIANGULAR);
     if(cusparseStatus != 0)    printf("error in MatrType.\n");

    cusparseStatus =
    cusparseSetMatDiagType (descrL, CUSPARSE_DIAG_TYPE_NON_UNIT);
     if(cusparseStatus != 0)    printf("error in DiagType.\n");

    cusparseStatus =
    cusparseSetMatIndexBase(descrL,CUSPARSE_INDEX_BASE_ZERO);
     if(cusparseStatus != 0)    printf("error in IndexBase.\n");

    cusparseStatus =
    cusparseSetMatFillMode(descrL,CUSPARSE_FILL_MODE_LOWER);
     if(cusparseStatus != 0)    printf("error in fillmode.\n");


    cusparseStatus = cusparseCreateSolveAnalysisInfo(&precond->cuinfoL); 
     if(cusparseStatus != 0)    printf("error in info.\n");

    cusparseStatus =
    cusparseZcsrsv_analysis(cusparseHandle, 
        CUSPARSE_OPERATION_NON_TRANSPOSE, precond->M.num_rows, 
        precond->M.nnz, descrL, 
        precond->M.val, precond->M.row, precond->M.col, precond->cuinfoL );
     if(cusparseStatus != 0)    printf("error in analysis L.\n");

    cusparseDestroyMatDescr( descrL );

    cusparseMatDescr_t descrU;
    cusparseStatus = cusparseCreateMatDescr(&descrU);
     if(cusparseStatus != 0)    printf("error in MatrDescr.\n");

    cusparseStatus =
    cusparseSetMatType(descrU,CUSPARSE_MATRIX_TYPE_TRIANGULAR);
     if(cusparseStatus != 0)    printf("error in MatrType.\n");

    cusparseStatus =
    cusparseSetMatDiagType (descrU, CUSPARSE_DIAG_TYPE_NON_UNIT);
     if(cusparseStatus != 0)    printf("error in DiagType.\n");

    cusparseStatus =
    cusparseSetMatIndexBase(descrU,CUSPARSE_INDEX_BASE_ZERO);
     if(cusparseStatus != 0)    printf("error in IndexBase.\n");

    cusparseStatus =
    cusparseSetMatFillMode(descrU,CUSPARSE_FILL_MODE_LOWER);
     if(cusparseStatus != 0)    printf("error in fillmode.\n");

    cusparseStatus = cusparseCreateSolveAnalysisInfo(&precond->cuinfoU); 
     if(cusparseStatus != 0)    printf("error in info.\n");

    cusparseStatus =
    cusparseZcsrsv_analysis(cusparseHandle, 
        CUSPARSE_OPERATION_TRANSPOSE, precond->M.num_rows, 
        precond->M.nnz, descrU, 
        precond->M.val, precond->M.row, precond->M.col, precond->cuinfoU );
     if(cusparseStatus != 0)    printf("error in analysis U.\n");

    cusparseDestroyMatDescr( descrU );
    cusparseDestroy( cusparseHandle );

    return MAGMA_SUCCESS;

}
예제 #13
0
/* ////////////////////////////////////////////////////////////////////////////
   -- Debugging file
*/
int main( int argc, char** argv)
{
    TESTING_INIT();

    magma_z_solver_par solver_par;
    magma_z_preconditioner precond_par;
    solver_par.epsilon = 10e-16;
    solver_par.maxiter = 1000;
    solver_par.verbose = 0;
    solver_par.restart = 30;
    solver_par.num_eigenvalues = 0;
    solver_par.ortho = Magma_CGS;
    
    magmaDoubleComplex one = MAGMA_Z_MAKE(1.0, 0.0);
    magmaDoubleComplex zero = MAGMA_Z_MAKE(0.0, 0.0);

    magma_z_sparse_matrix A, B, B_d;
    magma_z_vector x, b;

    // generate matrix of desired structure and size
    magma_int_t n=100;   // size is n*n
    magma_int_t nn = n*n;
    magma_int_t offdiags = 2;
    magma_index_t *diag_offset;
    magmaDoubleComplex *diag_vals;
    magma_zmalloc_cpu( &diag_vals, offdiags+1 );
    magma_index_malloc_cpu( &diag_offset, offdiags+1 );
    diag_offset[0] = 0;
    diag_offset[1] = 1;
    diag_offset[2] = n;
    diag_vals[0] = MAGMA_Z_MAKE( 4.1, 0.0 );
    diag_vals[1] = MAGMA_Z_MAKE( -1.0, 0.0 );
    diag_vals[2] = MAGMA_Z_MAKE( -1.0, 0.0 );
    magma_zmgenerator( nn, offdiags, diag_offset, diag_vals, &A );

    // convert marix into desired format
    B.storage_type = Magma_SELLC;
    B.blocksize = 8;
    B.alignment = 8;
    // scale matrix
    magma_zmscale( &A, Magma_UNITDIAG );

    magma_z_mconvert( A, &B, Magma_CSR, B.storage_type );
    magma_z_mtransfer( B, &B_d, Magma_CPU, Magma_DEV );


    // test CG ####################################
    // vectors and initial guess
    magma_z_vinit( &b, Magma_DEV, A.num_cols, one );
    magma_z_vinit( &x, Magma_DEV, A.num_cols, one );
    magma_z_spmv( one, B_d, x, zero, b );                 //  b = A x
    magma_z_vfree(&x);
    magma_z_vinit( &x, Magma_DEV, A.num_cols, zero );
    magma_zsolverinfo_init( &solver_par, &precond_par );
    // solver
    magma_zcg_res( B_d, b, &x, &solver_par );
    // solverinfo
    magma_zsolverinfo( &solver_par, &precond_par );
    if( solver_par.numiter > 150 ){
        printf("error: test not passed!\n"); exit(-1);
    }
    magma_zsolverinfo_free( &solver_par, &precond_par );
    magma_z_vfree(&x);
    magma_z_vfree(&b);

    // test PCG Jacobi ############################
    // vectors and initial guess
    magma_z_vinit( &b, Magma_DEV, A.num_cols, one );
    magma_z_vinit( &x, Magma_DEV, A.num_cols, one );
    magma_z_spmv( one, B_d, x, zero, b );                 //  b = A x
    magma_z_vfree(&x);
    magma_z_vinit( &x, Magma_DEV, A.num_cols, zero );
    magma_zsolverinfo_init( &solver_par, &precond_par );
    // Preconditioner
    precond_par.solver = Magma_JACOBI;
    magma_z_precondsetup( B_d, b, &precond_par );
    // solver
    magma_zpcg( B_d, b, &x, &solver_par, &precond_par );
    // solverinfo
    magma_zsolverinfo( &solver_par, &precond_par );
    if( solver_par.numiter > 150 ){
        printf("error: test not passed!\n"); exit(-1);
    }
    magma_zsolverinfo_free( &solver_par, &precond_par );
    magma_z_vfree(&x);
    magma_z_vfree(&b);

    // test PCG IC ################################
    // vectors and initial guess
    magma_z_vinit( &b, Magma_DEV, A.num_cols, one );
    magma_z_vinit( &x, Magma_DEV, A.num_cols, one );
    magma_z_spmv( one, B_d, x, zero, b );                 //  b = A x
    magma_z_vfree(&x);
    magma_z_vinit( &x, Magma_DEV, A.num_cols, zero );
    magma_zsolverinfo_init( &solver_par, &precond_par );
    // Preconditioner
    precond_par.solver = Magma_ICC;
    magma_z_precondsetup( B_d, b, &precond_par );
    // solver
    magma_zpcg( B_d, b, &x, &solver_par, &precond_par );
    // solverinfo
    magma_zsolverinfo( &solver_par, &precond_par );
    if( solver_par.numiter > 150 ){
        printf("error: test not passed!\n"); exit(-1);
    }
    magma_zsolverinfo_free( &solver_par, &precond_par );
    magma_z_vfree(&x);
    magma_z_vfree(&b);


    // test PCG IC ################################
    // vectors and initial guess
    magma_z_vinit( &b, Magma_DEV, A.num_cols, one );
    magma_z_vinit( &x, Magma_DEV, A.num_cols, one );
    magma_z_spmv( one, B_d, x, zero, b );                 //  b = A x
    magma_z_vfree(&x);
    magma_z_vinit( &x, Magma_DEV, A.num_cols, zero );
    magma_zsolverinfo_init( &solver_par, &precond_par );
    // Preconditioner
    precond_par.solver = Magma_ICC;
    magma_z_precondsetup( B_d, b, &precond_par );
    // solver
    magma_zpcg( B_d, b, &x, &solver_par, &precond_par );
    // solverinfo
    magma_zsolverinfo( &solver_par, &precond_par );
    if( solver_par.numiter > 150 ){
        printf("error: test not passed!\n"); exit(-1);
    }
    magma_zsolverinfo_free( &solver_par, &precond_par );
    magma_z_vfree(&x);
    magma_z_vfree(&b);

    // test BICGSTAB ####################################
    // vectors and initial guess
    magma_z_vinit( &b, Magma_DEV, A.num_cols, one );
    magma_z_vinit( &x, Magma_DEV, A.num_cols, one );
    magma_z_spmv( one, B_d, x, zero, b );                 //  b = A x
    magma_z_vfree(&x);
    magma_z_vinit( &x, Magma_DEV, A.num_cols, zero );
    magma_zsolverinfo_init( &solver_par, &precond_par );
    // solver
    magma_zbicgstab( B_d, b, &x, &solver_par );
    // solverinfo
    magma_zsolverinfo( &solver_par, &precond_par );
    if( solver_par.numiter > 150 ){
        printf("error: test not passed!\n"); exit(-1);
    }
    magma_zsolverinfo_free( &solver_par, &precond_par );
    magma_z_vfree(&x);
    magma_z_vfree(&b);

    // test PBICGSTAB Jacobi ############################
    // vectors and initial guess
    magma_z_vinit( &b, Magma_DEV, A.num_cols, one );
    magma_z_vinit( &x, Magma_DEV, A.num_cols, one );
    magma_z_spmv( one, B_d, x, zero, b );                 //  b = A x
    magma_z_vfree(&x);
    magma_z_vinit( &x, Magma_DEV, A.num_cols, zero );
    magma_zsolverinfo_init( &solver_par, &precond_par );
    // Preconditioner
    precond_par.solver = Magma_JACOBI;
    magma_z_precondsetup( B_d, b, &precond_par );
    // solver
    magma_zpbicgstab( B_d, b, &x, &solver_par, &precond_par );
    // solverinfo
    magma_zsolverinfo( &solver_par, &precond_par );
    if( solver_par.numiter > 150 ){
        printf("error: test not passed!\n"); exit(-1);
    }
    magma_zsolverinfo_free( &solver_par, &precond_par );
    magma_z_vfree(&x);
    magma_z_vfree(&b);
/*
    // test PBICGSTAB ILU ###############################
    // vectors and initial guess
    magma_z_vinit( &b, Magma_DEV, A.num_cols, one );
    magma_z_vinit( &x, Magma_DEV, A.num_cols, one );
    magma_z_spmv( one, B_d, x, zero, b );                 //  b = A x
    magma_z_vfree(&x);
    magma_z_vinit( &x, Magma_DEV, A.num_cols, zero );
    magma_zsolverinfo_init( &solver_par, &precond_par );
    // Preconditioner
    precond_par.solver = Magma_ILU;
    magma_z_precondsetup( B_d, b, &precond_par );
    // solver
    magma_zpbicgstab( B_d, b, &x, &solver_par, &precond_par );
    // solverinfo
    magma_zsolverinfo( &solver_par, &precond_par );
    if( solver_par.numiter > 150 ){
        printf("error: test not passed!\n"); exit(-1);
    }
    magma_zsolverinfo_free( &solver_par, &precond_par );
    magma_z_vfree(&x);
    magma_z_vfree(&b);

    // test PBICGSTAB ILU ###############################
    // vectors and initial guess
    magma_z_vinit( &b, Magma_DEV, A.num_cols, one );
    magma_z_vinit( &x, Magma_DEV, A.num_cols, one );
    magma_z_spmv( one, B_d, x, zero, b );                 //  b = A x
    magma_z_vfree(&x);printf("here\n");
    magma_z_vinit( &x, Magma_DEV, A.num_cols, zero );
    magma_zsolverinfo_init( &solver_par, &precond_par );
    // Preconditioner
    precond_par.solver = Magma_ILU;
    magma_z_precondsetup( B_d, b, &precond_par );
    // solver
    magma_zpbicgstab( B_d, b, &x, &solver_par, &precond_par );
    // solverinfo
    magma_zsolverinfo( &solver_par, &precond_par );
    if( solver_par.numiter > 150 ){
        printf("error: test not passed!\n"); exit(-1);
    }
    magma_zsolverinfo_free( &solver_par, &precond_par );
    magma_z_vfree(&x);
    magma_z_vfree(&b);

    // test GMRES ####################################
    // vectors and initial guess
    magma_z_vinit( &b, Magma_DEV, A.num_cols, one );
    magma_z_vinit( &x, Magma_DEV, A.num_cols, one );
    magma_z_spmv( one, B_d, x, zero, b );                 //  b = A x
    magma_z_vfree(&x);
    magma_z_vinit( &x, Magma_DEV, A.num_cols, zero );
    magma_zsolverinfo_init( &solver_par, &precond_par );
    // solver
    magma_zgmres( B_d, b, &x, &solver_par );
    // solverinfo
    magma_zsolverinfo( &solver_par, &precond_par );
    magma_zsolverinfo_free( &solver_par, &precond_par );
    magma_z_vfree(&x);
    magma_z_vfree(&b);

    // test PGMRES Jacobi ############################
    // vectors and initial guess
    magma_z_vinit( &b, Magma_DEV, A.num_cols, one );
    magma_z_vinit( &x, Magma_DEV, A.num_cols, one );
    magma_z_spmv( one, B_d, x, zero, b );                 //  b = A x
    magma_z_vfree(&x);
    magma_z_vinit( &x, Magma_DEV, A.num_cols, zero );
    magma_zsolverinfo_init( &solver_par, &precond_par );
    // Preconditioner
    precond_par.solver = Magma_JACOBI;
    magma_z_precondsetup( B_d, b, &precond_par );
    // solver
    magma_zpgmres( B_d, b, &x, &solver_par, &precond_par );
    // solverinfo
    magma_zsolverinfo( &solver_par, &precond_par );
    magma_zsolverinfo_free( &solver_par, &precond_par );
    magma_z_vfree(&x);
    magma_z_vfree(&b);*/

    // test PGMRES ILU ###############################
    // vectors and initial guess
    magma_z_vinit( &b, Magma_DEV, A.num_cols, one );
    magma_z_vinit( &x, Magma_DEV, A.num_cols, one );
    magma_z_spmv( one, B_d, x, zero, b );                 //  b = A x
    magma_z_vfree(&x);
    magma_z_vinit( &x, Magma_DEV, A.num_cols, zero );
    magma_zsolverinfo_init( &solver_par, &precond_par );
    // Preconditioner
    precond_par.solver = Magma_ILU;
    magma_z_precondsetup( B_d, b, &precond_par );
    // solver
    magma_zpgmres( B_d, b, &x, &solver_par, &precond_par );
    // solverinfo
    magma_zsolverinfo( &solver_par, &precond_par );
    if( solver_par.numiter > 150 ){
        printf("error: test not passed!\n"); exit(-1);
    }
    magma_zsolverinfo_free( &solver_par, &precond_par );
    magma_z_vfree(&x);
    magma_z_vfree(&b);


    printf("all tests passed.\n");




    magma_z_mfree(&B_d);
    magma_z_mfree(&B);
    magma_z_mfree(&A); 


    TESTING_FINALIZE();
    return 0;
}
예제 #14
0
/* ////////////////////////////////////////////////////////////////////////////
   -- testing sparse matrix vector product
*/
int main(  int argc, char** argv )
{
    TESTING_INIT();
    magma_queue_t queue;
    magma_queue_create( /*devices[ opts->device ],*/ &queue );

    magma_z_sparse_matrix hA, hA_SELLP, hA_ELL, dA, dA_SELLP, dA_ELL;
    hA_SELLP.blocksize = 8;
    hA_SELLP.alignment = 8;
    real_Double_t start, end, res;
    magma_int_t *pntre;

    magmaDoubleComplex c_one  = MAGMA_Z_MAKE(1.0, 0.0);
    magmaDoubleComplex c_zero = MAGMA_Z_MAKE(0.0, 0.0);
    
    magma_int_t i, j;
    for( i = 1; i < argc; ++i ) {
        if ( strcmp("--blocksize", argv[i]) == 0 ) {
            hA_SELLP.blocksize = atoi( argv[++i] );
        } else if ( strcmp("--alignment", argv[i]) == 0 ) {
            hA_SELLP.alignment = atoi( argv[++i] );
        } else
            break;
    }
    printf( "\n#    usage: ./run_zspmv"
        " [ --blocksize %d --alignment %d (for SELLP) ]"
        " matrices \n\n", (int) hA_SELLP.blocksize, (int) hA_SELLP.alignment );

    while(  i < argc ) {

        if ( strcmp("LAPLACE2D", argv[i]) == 0 && i+1 < argc ) {   // Laplace test
            i++;
            magma_int_t laplace_size = atoi( argv[i] );
            magma_zm_5stencil(  laplace_size, &hA, queue );
        } else {                        // file-matrix test
            magma_z_csr_mtx( &hA,  argv[i], queue );
        }

        printf( "\n# matrix info: %d-by-%d with %d nonzeros\n\n",
                            (int) hA.num_rows,(int) hA.num_cols,(int) hA.nnz );

        real_Double_t FLOPS = 2.0*hA.nnz/1e9;

        magma_z_vector hx, hy, dx, dy, hrefvec, hcheck;

        // init CPU vectors
        magma_z_vinit( &hx, Magma_CPU, hA.num_rows, c_zero, queue );
        magma_z_vinit( &hy, Magma_CPU, hA.num_rows, c_zero, queue );

        // init DEV vectors
        magma_z_vinit( &dx, Magma_DEV, hA.num_rows, c_one, queue );
        magma_z_vinit( &dy, Magma_DEV, hA.num_rows, c_zero, queue );

        #ifdef MAGMA_WITH_MKL
            // calling MKL with CSR
            pntre = (magma_int_t*)malloc( (hA.num_rows+1)*sizeof(magma_int_t) );
            pntre[0] = 0;
            for (j=0; j<hA.num_rows; j++ ) {
                pntre[j] = hA.row[j+1];
            }
             MKL_INT num_rows = hA.num_rows;
             MKL_INT num_cols = hA.num_cols;
             MKL_INT nnz = hA.nnz;

            MKL_INT *col;
            TESTING_MALLOC_CPU( col, MKL_INT, nnz );
            for( magma_int_t t=0; t < hA.nnz; ++t ) {
                col[ t ] = hA.col[ t ];
            }
            MKL_INT *row;
            TESTING_MALLOC_CPU( row, MKL_INT, num_rows );
            for( magma_int_t t=0; t < hA.num_rows; ++t ) {
                row[ t ] = hA.col[ t ];
            }
    
            start = magma_wtime();
            for (j=0; j<10; j++ ) {
                mkl_zcsrmv( "N", &num_rows, &num_cols, 
                            MKL_ADDR(&c_one), "GFNC", MKL_ADDR(hA.val), 
                            col, row, pntre, 
                                                    MKL_ADDR(hx.val), 
                            MKL_ADDR(&c_zero),        MKL_ADDR(hy.val) );
            }
            end = magma_wtime();
            printf( "\n > MKL  : %.2e seconds %.2e GFLOP/s    (CSR).\n",
                                            (end-start)/10, FLOPS*10/(end-start) );

            TESTING_FREE_CPU( row );
            TESTING_FREE_CPU( col );
            free(pntre);
        #endif // MAGMA_WITH_MKL

        // copy matrix to GPU
        magma_z_mtransfer( hA, &dA, Magma_CPU, Magma_DEV, queue );        
        // SpMV on GPU (CSR) -- this is the reference!
        start = magma_sync_wtime( queue );
        for (j=0; j<10; j++)
            magma_z_spmv( c_one, dA, dx, c_zero, dy, queue );
        end = magma_sync_wtime( queue );
        printf( " > MAGMA: %.2e seconds %.2e GFLOP/s    (standard CSR).\n",
                                        (end-start)/10, FLOPS*10/(end-start) );
        magma_z_mfree(&dA, queue );
        magma_z_vtransfer( dy, &hrefvec , Magma_DEV, Magma_CPU, queue );

        // convert to ELL and copy to GPU
        magma_z_mconvert(  hA, &hA_ELL, Magma_CSR, Magma_ELL, queue );
        magma_z_mtransfer( hA_ELL, &dA_ELL, Magma_CPU, Magma_DEV, queue );
        magma_z_mfree(&hA_ELL, queue );
        magma_z_vfree( &dy, queue );
        magma_z_vinit( &dy, Magma_DEV, hA.num_rows, c_zero, queue );
        // SpMV on GPU (ELL)
        start = magma_sync_wtime( queue );
        for (j=0; j<10; j++)
            magma_z_spmv( c_one, dA_ELL, dx, c_zero, dy, queue );
        end = magma_sync_wtime( queue );
        printf( " > MAGMA: %.2e seconds %.2e GFLOP/s    (standard ELL).\n",
                                        (end-start)/10, FLOPS*10/(end-start) );
        magma_z_mfree(&dA_ELL, queue );
        magma_z_vtransfer( dy, &hcheck , Magma_DEV, Magma_CPU, queue );
        res = 0.0;
        for(magma_int_t k=0; k<hA.num_rows; k++ )
            res=res + MAGMA_Z_REAL(hcheck.val[k]) - MAGMA_Z_REAL(hrefvec.val[k]);
        if ( res < .000001 )
            printf("# tester spmv ELL:  ok\n");
        else
            printf("# tester spmv ELL:  failed\n");
        magma_z_vfree( &hcheck, queue );

        // convert to SELLP and copy to GPU
        magma_z_mconvert(  hA, &hA_SELLP, Magma_CSR, Magma_SELLP, queue );
        magma_z_mtransfer( hA_SELLP, &dA_SELLP, Magma_CPU, Magma_DEV, queue );
        magma_z_mfree(&hA_SELLP, queue );
        magma_z_vfree( &dy, queue );
        magma_z_vinit( &dy, Magma_DEV, hA.num_rows, c_zero, queue );
        // SpMV on GPU (SELLP)
        start = magma_sync_wtime( queue );
        for (j=0; j<10; j++)
            magma_z_spmv( c_one, dA_SELLP, dx, c_zero, dy, queue );
        end = magma_sync_wtime( queue );
        printf( " > MAGMA: %.2e seconds %.2e GFLOP/s    (SELLP).\n",
                                        (end-start)/10, FLOPS*10/(end-start) );

        magma_z_vtransfer( dy, &hcheck , Magma_DEV, Magma_CPU, queue );
        res = 0.0;
        for(magma_int_t k=0; k<hA.num_rows; k++ )
            res=res + MAGMA_Z_REAL(hcheck.val[k]) - MAGMA_Z_REAL(hrefvec.val[k]);
        printf("# |x-y|_F = %8.2e\n", res);
        if ( res < .000001 )
            printf("# tester spmv SELL-P:  ok\n");
        else
            printf("# tester spmv SELL-P:  failed\n");
        magma_z_vfree( &hcheck, queue );

        magma_z_mfree(&dA_SELLP, queue );


        // SpMV on GPU (CUSPARSE - CSR)
        // CUSPARSE context //

        cusparseHandle_t cusparseHandle = 0;
        cusparseStatus_t cusparseStatus;
        cusparseStatus = cusparseCreate(&cusparseHandle);
        cusparseSetStream( cusparseHandle, queue );

        cusparseMatDescr_t descr = 0;
        cusparseStatus = cusparseCreateMatDescr(&descr);

        cusparseSetMatType(descr,CUSPARSE_MATRIX_TYPE_GENERAL);
        cusparseSetMatIndexBase(descr,CUSPARSE_INDEX_BASE_ZERO);
        magmaDoubleComplex alpha = c_one;
        magmaDoubleComplex beta = c_zero;
        magma_z_vfree( &dy, queue );
        magma_z_vinit( &dy, Magma_DEV, hA.num_rows, c_zero, queue );

        // copy matrix to GPU
        magma_z_mtransfer( hA, &dA, Magma_CPU, Magma_DEV, queue );

        start = magma_sync_wtime( queue );
        for (j=0; j<10; j++)
            cusparseStatus =
            cusparseZcsrmv(cusparseHandle,CUSPARSE_OPERATION_NON_TRANSPOSE, 
                        hA.num_rows, hA.num_cols, hA.nnz, &alpha, descr, 
                        dA.dval, dA.drow, dA.dcol, dx.dval, &beta, dy.dval);
        end = magma_sync_wtime( queue );
        if (cusparseStatus != 0)    printf("error in cuSPARSE CSR\n");
        printf( " > CUSPARSE: %.2e seconds %.2e GFLOP/s    (CSR).\n",
                                        (end-start)/10, FLOPS*10/(end-start) );
        cusparseMatDescr_t descrA;
        cusparseStatus = cusparseCreateMatDescr(&descrA);
         if (cusparseStatus != 0)    printf("error\n");
        cusparseHybMat_t hybA;
        cusparseStatus = cusparseCreateHybMat( &hybA );
         if (cusparseStatus != 0)    printf("error\n");

        magma_z_vtransfer( dy, &hcheck , Magma_DEV, Magma_CPU, queue );
        res = 0.0;
        for(magma_int_t k=0; k<hA.num_rows; k++ )
            res=res + MAGMA_Z_REAL(hcheck.val[k]) - MAGMA_Z_REAL(hrefvec.val[k]);
        printf("# |x-y|_F = %8.2e\n", res);
        if ( res < .000001 )
            printf("# tester spmv cuSPARSE CSR:  ok\n");
        else
            printf("# tester spmv cuSPARSE CSR:  failed\n");
        magma_z_vfree( &hcheck, queue );
        magma_z_vfree( &dy, queue );
        magma_z_vinit( &dy, Magma_DEV, hA.num_rows, c_zero, queue );
       
        cusparseZcsr2hyb(cusparseHandle,  hA.num_rows, hA.num_cols,
                        descrA, dA.dval, dA.drow, dA.dcol,
                        hybA, 0, CUSPARSE_HYB_PARTITION_AUTO);

        start = magma_sync_wtime( queue );
        for (j=0; j<10; j++)
            cusparseStatus =
            cusparseZhybmv( cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, 
               &alpha, descrA, hybA,
               dx.dval, &beta, dy.dval);
        end = magma_sync_wtime( queue );
        if (cusparseStatus != 0)    printf("error in cuSPARSE HYB\n");
        printf( " > CUSPARSE: %.2e seconds %.2e GFLOP/s    (HYB).\n",
                                        (end-start)/10, FLOPS*10/(end-start) );

        magma_z_vtransfer( dy, &hcheck , Magma_DEV, Magma_CPU, queue );
        res = 0.0;
        for(magma_int_t k=0; k<hA.num_rows; k++ )
            res=res + MAGMA_Z_REAL(hcheck.val[k]) - MAGMA_Z_REAL(hrefvec.val[k]);
        printf("# |x-y|_F = %8.2e\n", res);
        if ( res < .000001 )
            printf("# tester spmv cuSPARSE HYB:  ok\n");
        else
            printf("# tester spmv cuSPARSE HYB:  failed\n");
        magma_z_vfree( &hcheck, queue );

        cusparseDestroyMatDescr( descrA );
        cusparseDestroyHybMat( hybA );
        cusparseDestroy( cusparseHandle );

        magma_z_mfree(&dA, queue );



        printf("\n\n");


        // free CPU memory
        magma_z_mfree(&hA, queue );
        magma_z_vfree(&hx, queue );
        magma_z_vfree(&hy, queue );
        magma_z_vfree(&hrefvec, queue );
        // free GPU memory
        magma_z_vfree(&dx, queue );
        magma_z_vfree(&dy, queue );

        i++;

    }
    
    magma_queue_destroy( queue );
    TESTING_FINALIZE();
    return 0;
}
예제 #15
0
extern "C" magma_int_t
magma_zpastixsetup(
    magma_z_sparse_matrix A, magma_z_vector b,
    magma_z_preconditioner *precond,
    magma_queue_t queue )
{
    #if defined(HAVE_PASTIX)

    #if defined(PRECISION_d)

        pastix_data_t    *pastix_data = NULL; /* Pointer to a storage structure needed by pastix           */
        pastix_int_t      ncol;               /* Size of the matrix                                        */
        pastix_int_t     *colptr      = NULL; /* Indexes of first element of each column in row and values */
        pastix_int_t     *rows        = NULL; /* Row of each element of the matrix                         */
        pastix_float_t   *values      = NULL; /* Value of each element of the matrix                       */
        pastix_float_t   *rhs         = NULL; /* right hand side                                           */
        pastix_int_t     *iparm = NULL;  /* integer parameters for pastix                             */
        double           *dparm = NULL;  /* floating parameters for pastix                            */
        pastix_int_t     *perm        = NULL; /* Permutation tabular                                       */
        pastix_int_t     *invp        = NULL; /* Reverse permutation tabular                               */
        pastix_int_t      mat_type;

        magma_z_sparse_matrix A_h1, B;
        magma_z_vector diag, c_t, b_h;
        magma_z_vinit( &c_t, Magma_CPU, A.num_rows, MAGMA_Z_ZERO, queue );
        magma_z_vinit( &diag, Magma_CPU, A.num_rows, MAGMA_Z_ZERO, queue );
        magma_z_vtransfer( b, &b_h, A.memory_location, Magma_CPU, queue );

        if ( A.storage_type != Magma_CSR ) {
            magma_z_mtransfer( A, &A_h1, A.memory_location, Magma_CPU, queue );
            magma_z_mconvert( A_h1, &B, A_h1.storage_type, Magma_CSR, queue );
        }
        else {
            magma_z_mtransfer( A, &B, A.memory_location, Magma_CPU, queue );
        }


        rhs = (pastix_float_t*) b_h.dval;
        ncol = B.num_rows;
        colptr = B.drow;
        rows = B.dcol;
        values = (pastix_float_t*) B.dval;

        mat_type = API_SYM_NO;

        iparm = (pastix_int_t*)malloc(IPARM_SIZE*sizeof(pastix_int_t));
        dparm = (pastix_float_t*)malloc(DPARM_SIZE*sizeof(pastix_float_t));

        /*******************************************/
        /* Initialize parameters to default values */
        /*******************************************/
        iparm[IPARM_MODIFY_PARAMETER]    = API_NO;
        pastix(&pastix_data, MPI_COMM_WORLD,
             ncol, colptr, rows, values,
             perm, invp, rhs, 1, iparm, dparm);
        iparm[IPARM_THREAD_NBR]          = 16;
        iparm[IPARM_SYM]                 = mat_type;
        iparm[IPARM_FACTORIZATION]       = API_FACT_LU;
        iparm[IPARM_VERBOSE]             = API_VERBOSE_YES;
        iparm[IPARM_ORDERING]            = API_ORDER_SCOTCH;
        iparm[IPARM_INCOMPLETE]          = API_NO;
        iparm[IPARM_RHS_MAKING]          = API_RHS_B;
        //iparm[IPARM_AMALGAMATION]         = 5;
        iparm[IPARM_LEVEL_OF_FILL]       = 0;
        /*  if (incomplete == 1)
            {
            dparm[DPARM_EPSILON_REFINEMENT] = 1e-7;
            }
        */


        /*
         * Matrix needs :
         *    - to be in fortran numbering
         *    - to have only the lower triangular part in symmetric case
         *    - to have a graph with a symmetric structure in unsymmetric case
         * If those criteria are not matched, the csc will be reallocated and changed. 
         */
        iparm[IPARM_MATRIX_VERIFICATION] = API_YES;

        perm = (pastix_int_t*)malloc(ncol*sizeof(pastix_int_t));
        invp = (pastix_int_t*)malloc(ncol*sizeof(pastix_int_t));

        /*******************************************/
        /*      Step 1 - Ordering / Scotch         */
        /*  Perform it only when the pattern of    */
        /*  matrix change.                         */
        /*  eg: mesh refinement                    */
        /*  In many cases users can simply go from */
        /*  API_TASK_ORDERING to API_TASK_ANALYSE  */
        /*  in one call.                           */
        /*******************************************/
        /*******************************************/
        /*      Step 2 - Symbolic factorization    */
        /*  Perform it only when the pattern of    */
        /*  matrix change.                         */
        /*******************************************/
        /*******************************************/
        /* Step 3 - Mapping and Compute scheduling */
        /*  Perform it only when the pattern of    */
        /*  matrix change.                         */
        /*******************************************/
        /*******************************************/
        /*     Step 4 - Numerical Factorisation    */
        /* Perform it each time the values of the  */
        /* matrix changed.                         */
        /*******************************************/

        iparm[IPARM_START_TASK] = API_TASK_ORDERING;
        iparm[IPARM_END_TASK]   = API_TASK_NUMFACT;

        pastix(&pastix_data, MPI_COMM_WORLD,
             ncol, colptr, rows, values,
             perm, invp, NULL, 1, iparm, dparm);

        precond->int_array_1 = (magma_int_t*) perm;
        precond->int_array_2 = (magma_int_t*) invp;

        precond->M.dval = (magmaDoubleComplex*) values;
        precond->M.dcol = (magma_int_t*) colptr;
        precond->M.drow = (magma_int_t*) rows;
        precond->M.num_rows = A.num_rows;
        precond->M.num_cols = A.num_cols;
        precond->M.memory_location = Magma_CPU;
        precond->pastix_data = pastix_data;
        precond->iparm = iparm;
        precond->dparm = dparm;

        if ( A.storage_type != Magma_CSR) {
            magma_z_mfree( &A_h1, queue );
        }   
        magma_z_vfree( &b_h, queue );
        magma_z_mfree( &B, queue );

    #else
        printf( "error: only double precision supported yet.\n");
    #endif

#else
        printf( "error: pastix not available.\n");
#endif

    return MAGMA_SUCCESS;
}
예제 #16
0
extern "C" magma_int_t
magma_zcuspaxpy(
    magmaDoubleComplex *alpha, magma_z_sparse_matrix A,
    magmaDoubleComplex *beta, magma_z_sparse_matrix B,
    magma_z_sparse_matrix *AB,
    magma_queue_t queue )
{
    if (    A.memory_location == Magma_DEV
            && B.memory_location == Magma_DEV
            && ( A.storage_type == Magma_CSR ||
                 A.storage_type == Magma_CSRCOO )
            && ( B.storage_type == Magma_CSR ||
                 B.storage_type == Magma_CSRCOO ) ) {

        magma_z_sparse_matrix C;
        C.num_rows = A.num_rows;
        C.num_cols = A.num_cols;
        C.storage_type = A.storage_type;
        C.memory_location = A.memory_location;
        magma_int_t stat_dev = 0;
        C.val = NULL;
        C.col = NULL;
        C.row = NULL;
        C.rowidx = NULL;
        C.blockinfo = NULL;
        C.diag = NULL;
        C.dval = NULL;
        C.dcol = NULL;
        C.drow = NULL;
        C.drowidx = NULL;
        C.ddiag = NULL;

        // CUSPARSE context //
        cusparseHandle_t handle;
        cusparseStatus_t cusparseStatus;
        cusparseStatus = cusparseCreate(&handle);
        cusparseSetStream( handle, queue );
        if (cusparseStatus != 0)    printf("error in Handle.\n");

        cusparseMatDescr_t descrA;
        cusparseMatDescr_t descrB;
        cusparseMatDescr_t descrC;
        cusparseStatus = cusparseCreateMatDescr(&descrA);
        cusparseStatus = cusparseCreateMatDescr(&descrB);
        cusparseStatus = cusparseCreateMatDescr(&descrC);
        if (cusparseStatus != 0)    printf("error in MatrDescr.\n");

        cusparseStatus =
            cusparseSetMatType(descrA,CUSPARSE_MATRIX_TYPE_GENERAL);
        cusparseSetMatType(descrB,CUSPARSE_MATRIX_TYPE_GENERAL);
        cusparseSetMatType(descrC,CUSPARSE_MATRIX_TYPE_GENERAL);
        if (cusparseStatus != 0)    printf("error in MatrType.\n");

        cusparseStatus =
            cusparseSetMatIndexBase(descrA,CUSPARSE_INDEX_BASE_ZERO);
        cusparseSetMatIndexBase(descrB,CUSPARSE_INDEX_BASE_ZERO);
        cusparseSetMatIndexBase(descrC,CUSPARSE_INDEX_BASE_ZERO);
        if (cusparseStatus != 0)    printf("error in IndexBase.\n");

        // multiply A and B on the device
        magma_int_t baseC;
        // nnzTotalDevHostPtr points to host memory
        magma_index_t *nnzTotalDevHostPtr = (magma_index_t*) &C.nnz;
        cusparseSetPointerMode(handle, CUSPARSE_POINTER_MODE_HOST);
        stat_dev += magma_index_malloc( &C.drow, (A.num_rows + 1) );

        cusparseXcsrgeamNnz(handle,A.num_rows, A.num_cols,
                            descrA, A.nnz, A.drow, A.dcol,
                            descrB, B.nnz, B.drow, B.dcol,
                            descrC, C.row, nnzTotalDevHostPtr);

        if (NULL != nnzTotalDevHostPtr) {
            C.nnz = *nnzTotalDevHostPtr;
        } else {
            // workaround as nnz and base C are magma_int_t
            magma_index_t base_t, nnz_t;
            magma_index_getvector( 1, C.drow+C.num_rows, 1, &nnz_t, 1 );
            magma_index_getvector( 1, C.drow,   1, &base_t,    1 );
            C.nnz = (magma_int_t) nnz_t;
            baseC = (magma_int_t) base_t;
            C.nnz -= baseC;
        }
        stat_dev += magma_index_malloc( &C.dcol, C.nnz );
        stat_dev += magma_zmalloc( &C.dval, C.nnz );
        if( stat_dev != 0 ) {
            magma_z_mfree( &C, queue );
            return MAGMA_ERR_DEVICE_ALLOC;
        }

        cusparseZcsrgeam(handle, A.num_rows, A.num_cols,
                         alpha,
                         descrA, A.nnz,
                         A.dval, A.drow, A.dcol,
                         beta,
                         descrB, B.nnz,
                         B.dval, B.drow, B.dcol,
                         descrC,
                         C.dval, C.drow, C.dcol);


        cusparseDestroyMatDescr( descrA );
        cusparseDestroyMatDescr( descrB );
        cusparseDestroyMatDescr( descrC );
        cusparseDestroy( handle );
        // end CUSPARSE context //

        magma_z_mtransfer( C, AB, Magma_DEV, Magma_DEV, queue );
        magma_z_mfree( &C, queue );

        return MAGMA_SUCCESS;
    }
    else {

        printf("error: CSRSPAXPY only supported on device and CSR format.\n");

        return MAGMA_SUCCESS;
    }
}
예제 #17
0
extern "C" magma_int_t
magma_zmscale(
    magma_z_sparse_matrix *A, 
    magma_scale_t scaling,
    magma_queue_t queue )
{
    if ( A->memory_location == Magma_CPU && A->storage_type == Magma_CSRCOO ) {
        if ( scaling == Magma_NOSCALE ) {
            // no scale
            ;
        }
        else if ( scaling == Magma_UNITROW ) {
            // scale to unit rownorm
            magmaDoubleComplex *tmp;
            magma_zmalloc_cpu( &tmp, A->num_rows );
            for( magma_int_t z=0; z<A->num_rows; z++ ) {
                magmaDoubleComplex s = MAGMA_Z_MAKE( 0.0, 0.0 );
                for( magma_int_t f=A->row[z]; f<A->row[z+1]; f++ )
                    s+= MAGMA_Z_REAL(A->val[f])*MAGMA_Z_REAL(A->val[f]);
                tmp[z] = MAGMA_Z_MAKE( 1.0/sqrt(  MAGMA_Z_REAL( s )  ), 0.0 );                   
            }
            for( magma_int_t z=0; z<A->nnz; z++ ) {
                A->val[z] = A->val[z] * tmp[A->col[z]] * tmp[A->rowidx[z]];
            }
            magma_free_cpu( tmp );
        }
        else if (scaling == Magma_UNITDIAG ) {
            // scale to unit diagonal
            magmaDoubleComplex *tmp;
            magma_zmalloc_cpu( &tmp, A->num_rows );
            for( magma_int_t z=0; z<A->num_rows; z++ ) {
                magmaDoubleComplex s = MAGMA_Z_MAKE( 0.0, 0.0 );
                for( magma_int_t f=A->row[z]; f<A->row[z+1]; f++ ) {
                    if ( A->col[f]== z ) {
                        // add some identity matrix
                        //A->val[f] = A->val[f] +  MAGMA_Z_MAKE( 100000.0, 0.0 );
                        s = A->val[f];
                    }
                }
                if ( s == MAGMA_Z_MAKE( 0.0, 0.0 ) )
                    printf("error: zero diagonal element.\n");
                tmp[z] = MAGMA_Z_MAKE( 1.0/sqrt(  MAGMA_Z_REAL( s )  ), 0.0 );    
                   
            }
            for( magma_int_t z=0; z<A->nnz; z++ ) {
                A->val[z] = A->val[z] * tmp[A->col[z]] * tmp[A->rowidx[z]];
            }
            magma_free_cpu( tmp );
        }
        else
            printf( "error: scaling not supported\n" );
        return MAGMA_SUCCESS; 
    }
    else {

        magma_z_sparse_matrix hA, CSRA;
        magma_storage_t A_storage = A->storage_type;
        magma_location_t A_location = A->memory_location;
        magma_z_mtransfer( *A, &hA, A->memory_location, Magma_CPU, queue );
        magma_z_mconvert( hA, &CSRA, hA.storage_type, Magma_CSRCOO, queue );

        magma_zmscale( &CSRA, scaling, queue );

        magma_z_mfree( &hA, queue );
        magma_z_mfree( A, queue );
        magma_z_mconvert( CSRA, &hA, Magma_CSRCOO, A_storage, queue );
        magma_z_mtransfer( hA, A, Magma_CPU, A_location, queue );
        magma_z_mfree( &hA, queue );
        magma_z_mfree( &CSRA, queue );    

        return MAGMA_SUCCESS; 
    }
}