Beispiel #1
0
extern "C" magma_int_t
magma_zjacobiiter(
    magma_z_sparse_matrix M, magma_z_vector c, magma_z_vector *x,  
    magma_z_solver_par *solver_par,
    magma_queue_t queue )
{
    // set queue for old dense routines
    magma_queue_t orig_queue;
    magmablasGetKernelStream( &orig_queue );

    // local variables
    magmaDoubleComplex c_zero = MAGMA_Z_ZERO, c_one = MAGMA_Z_ONE, 
                                            c_mone = MAGMA_Z_NEG_ONE;
    magma_int_t dofs = M.num_rows;
    magma_z_vector t, swap;
    magma_z_vinit( &t, Magma_DEV, dofs, c_zero, queue );


    for( magma_int_t i=0; i<solver_par->maxiter; i++ ) {
        magma_z_spmv( c_mone, M, *x, c_zero, t, queue );                // t = - M * x
        magma_zaxpy( dofs, c_one , c.dval, 1 , t.dval, 1 );        // t = t + c

        // swap so that x again contains solution, and y is ready to be used
        swap = *x;
        *x = t;
        t = swap;        
        //magma_zcopy( dofs, t.dval, 1 , x->dval, 1 );               // x = t
    }

    magma_z_vfree( &t, queue );

    magmablasSetKernelStream( orig_queue );
    return MAGMA_SUCCESS;
}   /* magma_zjacobiiter */
extern "C" magma_int_t
magma_z_applyprecond(
    magma_z_sparse_matrix A, 
    magma_z_vector b, 
    magma_z_vector *x, 
    magma_z_preconditioner *precond,
    magma_queue_t queue )
{
    // set queue for old dense routines
    magma_queue_t orig_queue;
    magmablasGetKernelStream( &orig_queue );

    if ( precond->solver == Magma_JACOBI ) {
        magma_zjacobi_diagscal( A.num_rows, precond->d, b, x, queue );
    }
    else if ( precond->solver == Magma_PASTIX ) {
        magma_zapplypastix( b, x, precond, queue );
    }
    else if ( precond->solver == Magma_ILU ) {
        magma_z_vector tmp;
        magma_z_vinit( &tmp, Magma_DEV, A.num_rows, MAGMA_Z_ZERO, queue );
        magma_z_vfree( &tmp, queue );
    }
    else if ( precond->solver == Magma_ICC ) {
        magma_z_vector tmp;
        magma_z_vinit( &tmp, Magma_DEV, A.num_rows, MAGMA_Z_ZERO, queue );
        magma_z_vfree( &tmp, queue );
    }
    else if ( precond->solver == Magma_NONE ) {
        magma_zcopy( b.num_rows, b.dval, 1, x->dval, 1 );      //  x = b
    }
    else {
        printf( "error: preconditioner type not yet supported.\n" );
        magmablasSetKernelStream( orig_queue );
        return MAGMA_ERR_NOT_SUPPORTED;
    }
    magmablasSetKernelStream( orig_queue );
    return MAGMA_SUCCESS;
}
Beispiel #3
0
extern "C" magma_int_t
magma_zjacobisetup_vector(
    magma_z_vector b, magma_z_vector d, 
    magma_z_vector *c,
    magma_queue_t queue )
{
    if ( b.memory_location == Magma_CPU ) {
        magma_z_vector diag, c_t, b_h;
        magma_z_vinit( &c_t, Magma_CPU, b.num_rows, MAGMA_Z_ZERO, queue );

        magma_z_vtransfer( b, &b_h, b.memory_location, Magma_CPU, queue );
        magma_z_vtransfer( d, &diag, b.memory_location, Magma_CPU, queue );

        for( magma_int_t rowindex=0; rowindex<b.num_rows; rowindex++ ) {   
            c_t.val[rowindex] = b_h.val[rowindex] / diag.val[rowindex];

        }  
        magma_z_vtransfer( c_t, c, Magma_CPU, b.memory_location, queue ); 

        magma_z_vfree( &diag, queue );
        magma_z_vfree( &c_t, queue );
        magma_z_vfree( &b_h, queue );

        return MAGMA_SUCCESS;
    }
    else if ( b.memory_location == Magma_DEV ) {
        // fill vector
        magma_z_vector tmp;
        magma_z_vinit( &tmp, Magma_DEV, b.num_rows, MAGMA_Z_ZERO, queue );
        magma_zjacobisetup_vector_gpu( 
                    b.num_rows, b, d, *c, &tmp, queue );
        magma_z_vfree( &tmp, queue );
        return MAGMA_SUCCESS;
    }

    return MAGMA_SUCCESS;
}
Beispiel #4
0
extern "C" magma_int_t
magma_zjacobisetup_diagscal(
    magma_z_sparse_matrix A, magma_z_vector *d,
    magma_queue_t queue )
{
    magma_int_t i;

    magma_z_sparse_matrix A_h1, B;
    magma_z_vector diag;
    magma_z_vinit( &diag, Magma_CPU, A.num_rows, MAGMA_Z_ZERO, queue );

    if ( A.storage_type != Magma_CSR) {
        magma_z_mtransfer( A, &A_h1, A.memory_location, Magma_CPU, queue );
        magma_z_mconvert( A_h1, &B, A_h1.storage_type, Magma_CSR, queue );
    }
    else {
        magma_z_mtransfer( A, &B, A.memory_location, Magma_CPU, queue );
    }
    for( magma_int_t rowindex=0; rowindex<B.num_rows; rowindex++ ) {
        magma_int_t start = (B.drow[rowindex]);
        magma_int_t end = (B.drow[rowindex+1]);
        for( i=start; i<end; i++ ) {
            if ( B.dcol[i]==rowindex ) {
                diag.val[rowindex] = 1.0/B.val[i];
                if ( MAGMA_Z_REAL( diag.val[rowindex]) == 0 )
                    printf(" error: zero diagonal element in row %d!\n", 
                                                                (int) rowindex);
            }
        }
    }
    magma_z_vtransfer( diag, d, Magma_CPU, A.memory_location, queue );

    if ( A.storage_type != Magma_CSR) {
        magma_z_mfree( &A_h1, queue );
    }
    magma_z_mfree( &B, queue );
    magma_z_vfree( &diag, queue );
 
    return MAGMA_SUCCESS;
}
Beispiel #5
0
extern "C" magma_int_t
magma_ziterref(
    magma_z_sparse_matrix A, magma_z_vector b, magma_z_vector *x,  
    magma_z_solver_par *solver_par, magma_z_preconditioner *precond_par,
    magma_queue_t queue )
{
    // set queue for old dense routines
    magma_queue_t orig_queue;
    magmablasGetKernelStream( &orig_queue );

    // prepare solver feedback
    solver_par->solver = Magma_ITERREF;
    solver_par->numiter = 0;
    solver_par->info = MAGMA_SUCCESS;

    double residual;
    magma_zresidual( A, b, *x, &residual, queue );
    solver_par->init_res = residual;

    // some useful variables
    magmaDoubleComplex c_zero = MAGMA_Z_ZERO, c_one = MAGMA_Z_ONE, 
                                                c_mone = MAGMA_Z_NEG_ONE;
    
    magma_int_t dofs = A.num_rows;

    // workspace
    magma_z_vector r,z;
    magma_z_vinit( &r, Magma_DEV, dofs, c_zero, queue );
    magma_z_vinit( &z, Magma_DEV, dofs, c_zero, queue );

    // solver variables
    double nom, nom0, r0;

    // solver setup
    magma_zscal( dofs, c_zero, x->dval, 1) ;                    // x = 0

    magma_zcopy( dofs, b.dval, 1, r.dval, 1 );                    // r = b
    nom0 = magma_dznrm2(dofs, r.dval, 1);                       // nom0 = || r ||
    nom = nom0 * nom0;
    solver_par->init_res = nom0;

    if ( (r0 = nom * solver_par->epsilon) < ATOLERANCE ) 
        r0 = ATOLERANCE;
    if ( nom < r0 ) {
        magmablasSetKernelStream( orig_queue );
        return MAGMA_SUCCESS;
    }
    
    //Chronometry
    real_Double_t tempo1, tempo2;
    tempo1 = magma_sync_wtime( queue );
    if ( solver_par->verbose > 0 ) {
        solver_par->res_vec[0] = nom0;
        solver_par->timing[0] = 0.0;
    }
    
    // start iteration
    for( solver_par->numiter= 1; solver_par->numiter<solver_par->maxiter; 
                                                    solver_par->numiter++ ) {

        magma_zscal( dofs, MAGMA_Z_MAKE(1./nom, 0.), r.dval, 1) ;  // scale it
        magma_z_precond( A, r, &z, precond_par, queue );  // inner solver:  A * z = r
        magma_zscal( dofs, MAGMA_Z_MAKE(nom, 0.), z.dval, 1) ;  // scale it
        magma_zaxpy(dofs,  c_one, z.dval, 1, x->dval, 1);        // x = x + z
        magma_z_spmv( c_mone, A, *x, c_zero, r, queue );              // r = - A x
        magma_zaxpy(dofs,  c_one, b.dval, 1, r.dval, 1);         // r = r + b
        nom = magma_dznrm2(dofs, r.dval, 1);                    // nom = || r || 

        if ( solver_par->verbose > 0 ) {
            tempo2 = magma_sync_wtime( queue );
            if ( (solver_par->numiter)%solver_par->verbose==0 ) {
                solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] 
                        = (real_Double_t) nom;
                solver_par->timing[(solver_par->numiter)/solver_par->verbose] 
                        = (real_Double_t) tempo2-tempo1;
            }
        }

        if (  nom  < r0 ) {
            break;
        }
    } 
    tempo2 = magma_sync_wtime( queue );
    solver_par->runtime = (real_Double_t) tempo2-tempo1;
    magma_zresidual( A, b, *x, &residual, queue );
    solver_par->final_res = residual;
    solver_par->iter_res = nom;

    if ( solver_par->numiter < solver_par->maxiter) {
        solver_par->info = MAGMA_SUCCESS;
    } else if ( solver_par->init_res > solver_par->final_res ) {
        if ( solver_par->verbose > 0 ) {
            if ( (solver_par->numiter)%solver_par->verbose==0 ) {
                solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] 
                        = (real_Double_t) nom;
                solver_par->timing[(solver_par->numiter)/solver_par->verbose] 
                        = (real_Double_t) tempo2-tempo1;
            }
        }
        solver_par->info = MAGMA_SLOW_CONVERGENCE;
    }
    else {
        if ( solver_par->verbose > 0 ) {
            if ( (solver_par->numiter)%solver_par->verbose==0 ) {
                solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] 
                        = (real_Double_t) nom;
                solver_par->timing[(solver_par->numiter)/solver_par->verbose] 
                        = (real_Double_t) tempo2-tempo1;
            }
        }
        solver_par->info = MAGMA_DIVERGENCE;
    }   
    magma_z_vfree(&r, queue );
    magma_z_vfree(&z, queue );


    magmablasSetKernelStream( orig_queue );
    return MAGMA_SUCCESS;
}   /* magma_ziterref */
Beispiel #6
0
extern "C" magma_int_t
magma_zpastixsetup(
    magma_z_sparse_matrix A, magma_z_vector b,
    magma_z_preconditioner *precond,
    magma_queue_t queue )
{
    #if defined(HAVE_PASTIX)

    #if defined(PRECISION_d)

        pastix_data_t    *pastix_data = NULL; /* Pointer to a storage structure needed by pastix           */
        pastix_int_t      ncol;               /* Size of the matrix                                        */
        pastix_int_t     *colptr      = NULL; /* Indexes of first element of each column in row and values */
        pastix_int_t     *rows        = NULL; /* Row of each element of the matrix                         */
        pastix_float_t   *values      = NULL; /* Value of each element of the matrix                       */
        pastix_float_t   *rhs         = NULL; /* right hand side                                           */
        pastix_int_t     *iparm = NULL;  /* integer parameters for pastix                             */
        double           *dparm = NULL;  /* floating parameters for pastix                            */
        pastix_int_t     *perm        = NULL; /* Permutation tabular                                       */
        pastix_int_t     *invp        = NULL; /* Reverse permutation tabular                               */
        pastix_int_t      mat_type;

        magma_z_sparse_matrix A_h1, B;
        magma_z_vector diag, c_t, b_h;
        magma_z_vinit( &c_t, Magma_CPU, A.num_rows, MAGMA_Z_ZERO, queue );
        magma_z_vinit( &diag, Magma_CPU, A.num_rows, MAGMA_Z_ZERO, queue );
        magma_z_vtransfer( b, &b_h, A.memory_location, Magma_CPU, queue );

        if ( A.storage_type != Magma_CSR ) {
            magma_z_mtransfer( A, &A_h1, A.memory_location, Magma_CPU, queue );
            magma_z_mconvert( A_h1, &B, A_h1.storage_type, Magma_CSR, queue );
        }
        else {
            magma_z_mtransfer( A, &B, A.memory_location, Magma_CPU, queue );
        }


        rhs = (pastix_float_t*) b_h.dval;
        ncol = B.num_rows;
        colptr = B.drow;
        rows = B.dcol;
        values = (pastix_float_t*) B.dval;

        mat_type = API_SYM_NO;

        iparm = (pastix_int_t*)malloc(IPARM_SIZE*sizeof(pastix_int_t));
        dparm = (pastix_float_t*)malloc(DPARM_SIZE*sizeof(pastix_float_t));

        /*******************************************/
        /* Initialize parameters to default values */
        /*******************************************/
        iparm[IPARM_MODIFY_PARAMETER]    = API_NO;
        pastix(&pastix_data, MPI_COMM_WORLD,
             ncol, colptr, rows, values,
             perm, invp, rhs, 1, iparm, dparm);
        iparm[IPARM_THREAD_NBR]          = 16;
        iparm[IPARM_SYM]                 = mat_type;
        iparm[IPARM_FACTORIZATION]       = API_FACT_LU;
        iparm[IPARM_VERBOSE]             = API_VERBOSE_YES;
        iparm[IPARM_ORDERING]            = API_ORDER_SCOTCH;
        iparm[IPARM_INCOMPLETE]          = API_NO;
        iparm[IPARM_RHS_MAKING]          = API_RHS_B;
        //iparm[IPARM_AMALGAMATION]         = 5;
        iparm[IPARM_LEVEL_OF_FILL]       = 0;
        /*  if (incomplete == 1)
            {
            dparm[DPARM_EPSILON_REFINEMENT] = 1e-7;
            }
        */


        /*
         * Matrix needs :
         *    - to be in fortran numbering
         *    - to have only the lower triangular part in symmetric case
         *    - to have a graph with a symmetric structure in unsymmetric case
         * If those criteria are not matched, the csc will be reallocated and changed. 
         */
        iparm[IPARM_MATRIX_VERIFICATION] = API_YES;

        perm = (pastix_int_t*)malloc(ncol*sizeof(pastix_int_t));
        invp = (pastix_int_t*)malloc(ncol*sizeof(pastix_int_t));

        /*******************************************/
        /*      Step 1 - Ordering / Scotch         */
        /*  Perform it only when the pattern of    */
        /*  matrix change.                         */
        /*  eg: mesh refinement                    */
        /*  In many cases users can simply go from */
        /*  API_TASK_ORDERING to API_TASK_ANALYSE  */
        /*  in one call.                           */
        /*******************************************/
        /*******************************************/
        /*      Step 2 - Symbolic factorization    */
        /*  Perform it only when the pattern of    */
        /*  matrix change.                         */
        /*******************************************/
        /*******************************************/
        /* Step 3 - Mapping and Compute scheduling */
        /*  Perform it only when the pattern of    */
        /*  matrix change.                         */
        /*******************************************/
        /*******************************************/
        /*     Step 4 - Numerical Factorisation    */
        /* Perform it each time the values of the  */
        /* matrix changed.                         */
        /*******************************************/

        iparm[IPARM_START_TASK] = API_TASK_ORDERING;
        iparm[IPARM_END_TASK]   = API_TASK_NUMFACT;

        pastix(&pastix_data, MPI_COMM_WORLD,
             ncol, colptr, rows, values,
             perm, invp, NULL, 1, iparm, dparm);

        precond->int_array_1 = (magma_int_t*) perm;
        precond->int_array_2 = (magma_int_t*) invp;

        precond->M.dval = (magmaDoubleComplex*) values;
        precond->M.dcol = (magma_int_t*) colptr;
        precond->M.drow = (magma_int_t*) rows;
        precond->M.num_rows = A.num_rows;
        precond->M.num_cols = A.num_cols;
        precond->M.memory_location = Magma_CPU;
        precond->pastix_data = pastix_data;
        precond->iparm = iparm;
        precond->dparm = dparm;

        if ( A.storage_type != Magma_CSR) {
            magma_z_mfree( &A_h1, queue );
        }   
        magma_z_vfree( &b_h, queue );
        magma_z_mfree( &B, queue );

    #else
        printf( "error: only double precision supported yet.\n");
    #endif

#else
        printf( "error: pastix not available.\n");
#endif

    return MAGMA_SUCCESS;
}
Beispiel #7
0
extern "C" magma_int_t
magma_zjacobi(
    magma_z_sparse_matrix A, 
    magma_z_vector b, 
    magma_z_vector *x,  
    magma_z_solver_par *solver_par,
    magma_queue_t queue )
{
    // set queue for old dense routines
    magma_queue_t orig_queue;
    magmablasGetKernelStream( &orig_queue );

    // prepare solver feedback
    solver_par->solver = Magma_JACOBI;
    solver_par->info = MAGMA_SUCCESS;

    real_Double_t tempo1, tempo2;
    double residual;
    magma_zresidual( A, b, *x, &residual, queue );
    solver_par->init_res = residual;
    solver_par->res_vec = NULL;
    solver_par->timing = NULL;

    // some useful variables
    magmaDoubleComplex c_zero = MAGMA_Z_ZERO, c_one = MAGMA_Z_ONE, 
                                                c_mone = MAGMA_Z_NEG_ONE;
    magma_int_t dofs = A.num_rows;
    double nom0;


    magma_z_sparse_matrix M;
    magma_z_vector c, r;
    magma_z_vinit( &r, Magma_DEV, dofs, c_zero, queue );
    magma_z_spmv( c_one, A, *x, c_zero, r, queue );                  // r = A x
    magma_zaxpy(dofs,  c_mone, b.dval, 1, r.dval, 1);           // r = r - b
    nom0 = magma_dznrm2(dofs, r.dval, 1);                      // den = || r ||

    // Jacobi setup
    magma_zjacobisetup( A, b, &M, &c, queue );
    magma_z_solver_par jacobiiter_par;
    jacobiiter_par.maxiter = solver_par->maxiter;

    tempo1 = magma_sync_wtime( queue );

    // Jacobi iterator
    magma_zjacobiiter( M, c, x, &jacobiiter_par, queue ); 

    tempo2 = magma_sync_wtime( queue );
    solver_par->runtime = (real_Double_t) tempo2-tempo1;
    magma_zresidual( A, b, *x, &residual, queue );
    solver_par->final_res = residual;
    solver_par->numiter = solver_par->maxiter;

    if ( solver_par->init_res > solver_par->final_res )
        solver_par->info = MAGMA_SUCCESS;
    else
        solver_par->info = MAGMA_DIVERGENCE;

    magma_z_mfree( &M, queue );
    magma_z_vfree( &c, queue );
    magma_z_vfree( &r, queue );

    magmablasSetKernelStream( orig_queue );
    return MAGMA_SUCCESS;
}   /* magma_zjacobi */
Beispiel #8
0
extern "C" magma_int_t
magma_zjacobisetup(
    magma_z_sparse_matrix A, magma_z_vector b, 
    magma_z_sparse_matrix *M, magma_z_vector *c,
    magma_queue_t queue )
{
    magma_int_t i;

    magma_z_sparse_matrix A_h1, A_h2, B, C;
    magma_z_vector diag, c_t, b_h;
    magma_z_vinit( &c_t, Magma_CPU, A.num_rows, MAGMA_Z_ZERO, queue );
    magma_z_vinit( &diag, Magma_CPU, A.num_rows, MAGMA_Z_ZERO, queue );
    magma_z_vtransfer( b, &b_h, A.memory_location, Magma_CPU, queue );

    if ( A.storage_type != Magma_CSR ) {
        magma_z_mtransfer( A, &A_h1, A.memory_location, Magma_CPU, queue );
        magma_z_mconvert( A_h1, &B, A_h1.storage_type, Magma_CSR, queue );
    }
    else {
        magma_z_mtransfer( A, &B, A.memory_location, Magma_CPU, queue );
    }
    for( magma_int_t rowindex=0; rowindex<B.num_rows; rowindex++ ) {
        magma_int_t start = (B.drow[rowindex]);
        magma_int_t end = (B.drow[rowindex+1]);
        for( i=start; i<end; i++ ) {
            if ( B.dcol[i]==rowindex ) {
                diag.val[rowindex] = B.val[i];
                if ( MAGMA_Z_REAL( diag.val[rowindex]) == 0 )
                    printf(" error: zero diagonal element in row %d!\n", 
                                                               (int) rowindex);
            }
        }
        for( i=start; i<end; i++ ) {
            B.val[i] = B.val[i] / diag.val[rowindex];
            if ( B.dcol[i]==rowindex ) {
                B.val[i] = MAGMA_Z_MAKE( 0., 0. );
            }
        }
        c_t.val[rowindex] = b_h.val[rowindex] / diag.val[rowindex];

    }

    magma_z_csr_compressor(&B.val, &B.drow, &B.dcol, 
                           &C.val, &C.drow, &C.dcol, &B.num_rows, queue );  

    C.num_rows = B.num_rows;
    C.num_cols = B.num_cols;
    C.memory_location = B.memory_location;
    C.nnz = C.drow[B.num_rows];
    C.storage_type = B.storage_type;
    C.memory_location = B.memory_location;
    if ( A.storage_type != Magma_CSR) {
        A_h2.alignment = A.alignment;
        A_h2.blocksize = A.blocksize;
        magma_z_mconvert( C, &A_h2, Magma_CSR, A_h1.storage_type, queue );
        magma_z_mtransfer( A_h2, M, Magma_CPU, A.memory_location, queue );
    }
    else {
        magma_z_mtransfer( C, M, Magma_CPU, A.memory_location, queue );
    }     
    magma_z_vtransfer( c_t, c, Magma_CPU, A.memory_location, queue );

    if ( A.storage_type != Magma_CSR) {
        magma_z_mfree( &A_h1, queue );
        magma_z_mfree( &A_h2, queue );   
    }   
    magma_z_mfree( &B, queue );
    magma_z_mfree( &C, queue );  
    magma_z_vfree( &diag, queue );
    magma_z_vfree( &c_t, queue );
    magma_z_vfree( &b_h, queue );

    return MAGMA_SUCCESS;
}
Beispiel #9
0
/* ////////////////////////////////////////////////////////////////////////////
   -- running magma_zbaiter
*/
int main( int argc, char** argv)
{
    TESTING_INIT();

    magma_z_solver_par solver_par;
    magma_z_preconditioner precond_par;
    solver_par.maxiter = 1000;
    solver_par.verbose = 0;
    solver_par.num_eigenvalues = 0;
    int scale = 0;
    magma_scale_t scaling = Magma_NOSCALE;
    
    magma_z_sparse_matrix A;
    magma_z_vector x, b;

    magmaDoubleComplex one = MAGMA_Z_MAKE(1.0, 0.0);
    magmaDoubleComplex zero = MAGMA_Z_MAKE(0.0, 0.0);

    int i;
    for( i = 1; i < argc; ++i ) {
        if ( strcmp("--maxiter", argv[i]) == 0 ){
            solver_par.maxiter = atoi( argv[++i] );
        }else if ( strcmp("--mscale", argv[i]) == 0 ) {
            scale = atoi( argv[++i] );
            switch( scale ) {
                case 0: scaling = Magma_NOSCALE; break;
                case 1: scaling = Magma_UNITDIAG; break;
                case 2: scaling = Magma_UNITROW; break;
            }

        }else
            break;
    }
    printf( "\n#    usage: ./run_zbaiter"
        " [ "
        " --mscale %d (0=no, 1=unitdiag, 2=unitrownrm)"
        " --maxiter %d ]"
        " matrices \n\n",
        (int) scale,
        (int) solver_par.maxiter);

    magma_zsolverinfo_init( &solver_par, &precond_par );

    while(  i < argc ){

        magma_z_csr_mtx( &A,  argv[i]  ); 

        printf( "\n# matrix info: %d-by-%d with %d nonzeros\n\n",
                            (int) A.num_rows,(int) A.num_cols,(int) A.nnz );

        // scale initial guess
        magma_zmscale( &A, scaling );

        magma_z_vinit( &b, Magma_DEV, A.num_cols, one );
        magma_z_vinit( &x, Magma_DEV, A.num_cols, zero );

        magma_zbaiter( A, b, &x, &solver_par );

        magma_zsolverinfo( &solver_par, &precond_par );


        magma_z_mfree(&A); 
        magma_z_vfree(&x);
        magma_z_vfree(&b);
        
        i++;
    }

    magma_zsolverinfo_free( &solver_par, &precond_par );

    TESTING_FINALIZE();

    return 0;
}
Beispiel #10
0
magma_int_t
magma_zpcg( magma_z_sparse_matrix A, magma_z_vector b, magma_z_vector *x,  
            magma_z_solver_par *solver_par, 
            magma_z_preconditioner *precond_par ){

    // prepare solver feedback
    solver_par->solver = Magma_PCG;
    solver_par->numiter = 0;
    solver_par->info = 0;

    // local variables
    magmaDoubleComplex c_zero = MAGMA_Z_ZERO, c_one = MAGMA_Z_ONE;
    
    magma_int_t dofs = A.num_rows;

    // GPU workspace
    magma_z_vector r, rt, p, q, h;
    magma_z_vinit( &r, Magma_DEV, dofs, c_zero );
    magma_z_vinit( &rt, Magma_DEV, dofs, c_zero );
    magma_z_vinit( &p, Magma_DEV, dofs, c_zero );
    magma_z_vinit( &q, Magma_DEV, dofs, c_zero );
    magma_z_vinit( &h, Magma_DEV, dofs, c_zero );
    
    // solver variables
    magmaDoubleComplex alpha, beta;
    double nom, nom0, r0, gammaold, gammanew, den, res;

    // solver setup
    magma_zscal( dofs, c_zero, x->val, 1) ;                     // x = 0
    magma_zcopy( dofs, b.val, 1, r.val, 1 );                    // r = b

    // preconditioner
    magma_z_applyprecond_left( A, r, &rt, precond_par );
    magma_z_applyprecond_right( A, rt, &h, precond_par );

    magma_zcopy( dofs, h.val, 1, p.val, 1 );                    // p = h
    nom = MAGMA_Z_REAL( magma_zdotc(dofs, r.val, 1, h.val, 1) );          
    nom0 = magma_dznrm2( dofs, r.val, 1 );                                                 
    magma_z_spmv( c_one, A, p, c_zero, q );                     // q = A p
    den = MAGMA_Z_REAL( magma_zdotc(dofs, p.val, 1, q.val, 1) );// den = p dot q
    solver_par->init_res = nom0;
    
    if ( (r0 = nom * solver_par->epsilon) < ATOLERANCE ) 
        r0 = ATOLERANCE;
    if ( nom < r0 )
        return MAGMA_SUCCESS;
    // check positive definite
    if (den <= 0.0) {
        printf("Operator A is not postive definite. (Ar,r) = %f\n", den);
        return -100;
    }

    //Chronometry
    real_Double_t tempo1, tempo2;
    magma_device_sync(); tempo1=magma_wtime();
    if( solver_par->verbose > 0 ){
        solver_par->res_vec[0] = (real_Double_t)nom0;
        solver_par->timing[0] = 0.0;
    }
    
    // start iteration
    for( solver_par->numiter= 1; solver_par->numiter<solver_par->maxiter; 
                                                    solver_par->numiter++ ){
        // preconditioner
        magma_z_applyprecond_left( A, r, &rt, precond_par );
        magma_z_applyprecond_right( A, rt, &h, precond_par );

        gammanew = MAGMA_Z_REAL( magma_zdotc(dofs, r.val, 1, h.val, 1) );   
                                                            // gn = < r,h>

        if( solver_par->numiter==1 ){
            magma_zcopy( dofs, h.val, 1, p.val, 1 );                    // p = h            
        }else{
            beta = MAGMA_Z_MAKE(gammanew/gammaold, 0.);       // beta = gn/go
            magma_zscal(dofs, beta, p.val, 1);            // p = beta*p
            magma_zaxpy(dofs, c_one, h.val, 1, p.val, 1); // p = p + h 
        }

        magma_z_spmv( c_one, A, p, c_zero, q );           // q = A p
        den = MAGMA_Z_REAL(magma_zdotc(dofs, p.val, 1, q.val, 1));    
                // den = p dot q 

        alpha = MAGMA_Z_MAKE(gammanew/den, 0.);
        magma_zaxpy(dofs,  alpha, p.val, 1, x->val, 1);     // x = x + alpha p
        magma_zaxpy(dofs, -alpha, q.val, 1, r.val, 1);      // r = r - alpha q
        gammaold = gammanew;

        res = magma_dznrm2( dofs, r.val, 1 );
        if( solver_par->verbose > 0 ){
            magma_device_sync(); tempo2=magma_wtime();
            if( (solver_par->numiter)%solver_par->verbose==0 ) {
                solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] 
                        = (real_Double_t) res;
                solver_par->timing[(solver_par->numiter)/solver_par->verbose] 
                        = (real_Double_t) tempo2-tempo1;
            }
        }


        if (  res/nom0  < solver_par->epsilon ) {
            break;
        }
    } 
    magma_device_sync(); tempo2=magma_wtime();
    solver_par->runtime = (real_Double_t) tempo2-tempo1;
    double residual;
    magma_zresidual( A, b, *x, &residual );
    solver_par->iter_res = res;
    solver_par->final_res = residual;

    if( solver_par->numiter < solver_par->maxiter){
        solver_par->info = 0;
    }else if( solver_par->init_res > solver_par->final_res ){
        if( solver_par->verbose > 0 ){
            if( (solver_par->numiter)%solver_par->verbose==0 ) {
                solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] 
                        = (real_Double_t) res;
                solver_par->timing[(solver_par->numiter)/solver_par->verbose] 
                        = (real_Double_t) tempo2-tempo1;
            }
        }
        solver_par->info = -2;
    }
    else{
        if( solver_par->verbose > 0 ){
            if( (solver_par->numiter)%solver_par->verbose==0 ) {
                solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] 
                        = (real_Double_t) res;
                solver_par->timing[(solver_par->numiter)/solver_par->verbose] 
                        = (real_Double_t) tempo2-tempo1;
            }
        }
        solver_par->info = -1;
    }
    magma_z_vfree(&r);
    magma_z_vfree(&rt);
    magma_z_vfree(&p);
    magma_z_vfree(&q);
    magma_z_vfree(&h);

    return MAGMA_SUCCESS;
}   /* magma_zcg */
Beispiel #11
0
/* ////////////////////////////////////////////////////////////////////////////
   -- running magma_zgmres
*/
int main( int argc, char** argv)
{
    TESTING_INIT();

    magma_z_solver_par solver_par;
    magma_z_preconditioner precond_par;
    solver_par.epsilon = 10e-16;
    solver_par.maxiter = 1000;
    solver_par.restart = 30;
    solver_par.num_eigenvalues = 0;
    solver_par.ortho = Magma_CGS;
    solver_par.verbose = 0;
    int format = 0;
    int ortho = 0;
    int scale = 0;
    magma_scale_t scaling = Magma_NOSCALE;

    magma_z_sparse_matrix A, B, B_d;
    magma_z_vector x, b;
    B.blocksize = 8;
    B.alignment = 8;

    magmaDoubleComplex one = MAGMA_Z_MAKE(1.0, 0.0);
    magmaDoubleComplex zero = MAGMA_Z_MAKE(0.0, 0.0);

    B.storage_type = Magma_CSR;
    int i;
    for( i = 1; i < argc; ++i ) {
     if ( strcmp("--format", argv[i]) == 0 ) {
            format = atoi( argv[++i] );
            switch( format ) {
                case 0: B.storage_type = Magma_CSR; break;
                case 1: B.storage_type = Magma_ELL; break;
                case 2: B.storage_type = Magma_ELLRT; break;
                case 3: B.storage_type = Magma_SELLP; break;
            }
        }else if ( strcmp("--mscale", argv[i]) == 0 ) {
            scale = atoi( argv[++i] );
            switch( scale ) {
                case 0: scaling = Magma_NOSCALE; break;
                case 1: scaling = Magma_UNITDIAG; break;
                case 2: scaling = Magma_UNITROW; break;
            }

        }else if ( strcmp("--blocksize", argv[i]) == 0 ) {
            B.blocksize = atoi( argv[++i] );
        }else if ( strcmp("--alignment", argv[i]) == 0 ) {
            B.alignment = atoi( argv[++i] );
        }else if ( strcmp("--verbose", argv[i]) == 0 ) {
            solver_par.verbose = atoi( argv[++i] );
        } else if ( strcmp("--ortho", argv[i]) == 0 ) {
            ortho = atoi( argv[++i] );
            switch( ortho ) {
                case 0: solver_par.ortho = Magma_CGS; break;
                case 1: solver_par.ortho = Magma_MGS; break;
                case 2: solver_par.ortho = Magma_FUSED_CGS; break;
            }
        } else if ( strcmp("--restart", argv[i]) == 0 ) {
            solver_par.restart = atoi( argv[++i] );
        } else if ( strcmp("--maxiter", argv[i]) == 0 ) {
            solver_par.maxiter = atoi( argv[++i] );
        } else if ( strcmp("--tol", argv[i]) == 0 ) {
            sscanf( argv[++i], "%lf", &solver_par.epsilon );
        } else
            break;
    }
    printf( "\n#    usage: ./run_zgmres"
        " [ --format %d (0=CSR, 1=ELL 2=ELLRT, 3=SELLP)"
        " [ --blocksize %d --alignment %d ]"
        " --mscale %d (0=no, 1=unitdiag, 2=unitrownrm)"
        " --verbose %d (0=summary, k=details every k iterations)"
        " --restart %d --maxiter %d --tol %.2e"
        " --ortho %d (0=CGS, 1=MGS, 2=FUSED_CGS) ]"
        " matrices \n\n", format, (int) B.blocksize, (int) B.alignment,
        (int) scale,
        (int) solver_par.verbose, 
        (int) solver_par.restart, (int) solver_par.maxiter, 
                                    solver_par.epsilon, ortho );

    magma_zsolverinfo_init( &solver_par, &precond_par );

    while(  i < argc ){

        magma_z_csr_mtx( &A,  argv[i]  ); 

        printf( "\n# matrix info: %d-by-%d with %d nonzeros\n\n",
                            (int) A.num_rows,(int) A.num_cols,(int) A.nnz );

        // scale matrix
        magma_zmscale( &A, scaling );

        magma_z_mconvert( A, &B, Magma_CSR, B.storage_type );
        magma_z_mtransfer( B, &B_d, Magma_CPU, Magma_DEV );

        // vectors and initial guess
        magma_z_vinit( &b, Magma_DEV, A.num_cols, one );
        magma_z_vinit( &x, Magma_DEV, A.num_cols, one );
        magma_z_spmv( one, B_d, x, zero, b );                 //  b = A x
        magma_z_vfree(&x);
        magma_z_vinit( &x, Magma_DEV, A.num_cols, zero );

        magma_zgmres( B_d, b, &x, &solver_par );

        magma_zsolverinfo( &solver_par, &precond_par );

        magma_z_mfree(&B_d);
        magma_z_mfree(&B);
        magma_z_mfree(&A); 
        magma_z_vfree(&x);
        magma_z_vfree(&b);

        i++;
    }
        
    magma_zsolverinfo_free( &solver_par, &precond_par );

    TESTING_FINALIZE();
    return 0;
}
Beispiel #12
0
/* ////////////////////////////////////////////////////////////////////////////
   -- Debugging file
*/
int main( int argc, char** argv)
{
    TESTING_INIT();

    magma_z_solver_par solver_par;
    magma_z_preconditioner precond_par;
    solver_par.epsilon = 10e-16;
    solver_par.maxiter = 1000;
    solver_par.verbose = 0;
    solver_par.restart = 30;
    solver_par.num_eigenvalues = 0;
    solver_par.ortho = Magma_CGS;
    
    magmaDoubleComplex one = MAGMA_Z_MAKE(1.0, 0.0);
    magmaDoubleComplex zero = MAGMA_Z_MAKE(0.0, 0.0);

    magma_z_sparse_matrix A, B, B_d;
    magma_z_vector x, b;

    // generate matrix of desired structure and size
    magma_int_t n=100;   // size is n*n
    magma_int_t nn = n*n;
    magma_int_t offdiags = 2;
    magma_index_t *diag_offset;
    magmaDoubleComplex *diag_vals;
    magma_zmalloc_cpu( &diag_vals, offdiags+1 );
    magma_index_malloc_cpu( &diag_offset, offdiags+1 );
    diag_offset[0] = 0;
    diag_offset[1] = 1;
    diag_offset[2] = n;
    diag_vals[0] = MAGMA_Z_MAKE( 4.1, 0.0 );
    diag_vals[1] = MAGMA_Z_MAKE( -1.0, 0.0 );
    diag_vals[2] = MAGMA_Z_MAKE( -1.0, 0.0 );
    magma_zmgenerator( nn, offdiags, diag_offset, diag_vals, &A );

    // convert marix into desired format
    B.storage_type = Magma_SELLC;
    B.blocksize = 8;
    B.alignment = 8;
    // scale matrix
    magma_zmscale( &A, Magma_UNITDIAG );

    magma_z_mconvert( A, &B, Magma_CSR, B.storage_type );
    magma_z_mtransfer( B, &B_d, Magma_CPU, Magma_DEV );


    // test CG ####################################
    // vectors and initial guess
    magma_z_vinit( &b, Magma_DEV, A.num_cols, one );
    magma_z_vinit( &x, Magma_DEV, A.num_cols, one );
    magma_z_spmv( one, B_d, x, zero, b );                 //  b = A x
    magma_z_vfree(&x);
    magma_z_vinit( &x, Magma_DEV, A.num_cols, zero );
    magma_zsolverinfo_init( &solver_par, &precond_par );
    // solver
    magma_zcg_res( B_d, b, &x, &solver_par );
    // solverinfo
    magma_zsolverinfo( &solver_par, &precond_par );
    if( solver_par.numiter > 150 ){
        printf("error: test not passed!\n"); exit(-1);
    }
    magma_zsolverinfo_free( &solver_par, &precond_par );
    magma_z_vfree(&x);
    magma_z_vfree(&b);

    // test PCG Jacobi ############################
    // vectors and initial guess
    magma_z_vinit( &b, Magma_DEV, A.num_cols, one );
    magma_z_vinit( &x, Magma_DEV, A.num_cols, one );
    magma_z_spmv( one, B_d, x, zero, b );                 //  b = A x
    magma_z_vfree(&x);
    magma_z_vinit( &x, Magma_DEV, A.num_cols, zero );
    magma_zsolverinfo_init( &solver_par, &precond_par );
    // Preconditioner
    precond_par.solver = Magma_JACOBI;
    magma_z_precondsetup( B_d, b, &precond_par );
    // solver
    magma_zpcg( B_d, b, &x, &solver_par, &precond_par );
    // solverinfo
    magma_zsolverinfo( &solver_par, &precond_par );
    if( solver_par.numiter > 150 ){
        printf("error: test not passed!\n"); exit(-1);
    }
    magma_zsolverinfo_free( &solver_par, &precond_par );
    magma_z_vfree(&x);
    magma_z_vfree(&b);

    // test PCG IC ################################
    // vectors and initial guess
    magma_z_vinit( &b, Magma_DEV, A.num_cols, one );
    magma_z_vinit( &x, Magma_DEV, A.num_cols, one );
    magma_z_spmv( one, B_d, x, zero, b );                 //  b = A x
    magma_z_vfree(&x);
    magma_z_vinit( &x, Magma_DEV, A.num_cols, zero );
    magma_zsolverinfo_init( &solver_par, &precond_par );
    // Preconditioner
    precond_par.solver = Magma_ICC;
    magma_z_precondsetup( B_d, b, &precond_par );
    // solver
    magma_zpcg( B_d, b, &x, &solver_par, &precond_par );
    // solverinfo
    magma_zsolverinfo( &solver_par, &precond_par );
    if( solver_par.numiter > 150 ){
        printf("error: test not passed!\n"); exit(-1);
    }
    magma_zsolverinfo_free( &solver_par, &precond_par );
    magma_z_vfree(&x);
    magma_z_vfree(&b);


    // test PCG IC ################################
    // vectors and initial guess
    magma_z_vinit( &b, Magma_DEV, A.num_cols, one );
    magma_z_vinit( &x, Magma_DEV, A.num_cols, one );
    magma_z_spmv( one, B_d, x, zero, b );                 //  b = A x
    magma_z_vfree(&x);
    magma_z_vinit( &x, Magma_DEV, A.num_cols, zero );
    magma_zsolverinfo_init( &solver_par, &precond_par );
    // Preconditioner
    precond_par.solver = Magma_ICC;
    magma_z_precondsetup( B_d, b, &precond_par );
    // solver
    magma_zpcg( B_d, b, &x, &solver_par, &precond_par );
    // solverinfo
    magma_zsolverinfo( &solver_par, &precond_par );
    if( solver_par.numiter > 150 ){
        printf("error: test not passed!\n"); exit(-1);
    }
    magma_zsolverinfo_free( &solver_par, &precond_par );
    magma_z_vfree(&x);
    magma_z_vfree(&b);

    // test BICGSTAB ####################################
    // vectors and initial guess
    magma_z_vinit( &b, Magma_DEV, A.num_cols, one );
    magma_z_vinit( &x, Magma_DEV, A.num_cols, one );
    magma_z_spmv( one, B_d, x, zero, b );                 //  b = A x
    magma_z_vfree(&x);
    magma_z_vinit( &x, Magma_DEV, A.num_cols, zero );
    magma_zsolverinfo_init( &solver_par, &precond_par );
    // solver
    magma_zbicgstab( B_d, b, &x, &solver_par );
    // solverinfo
    magma_zsolverinfo( &solver_par, &precond_par );
    if( solver_par.numiter > 150 ){
        printf("error: test not passed!\n"); exit(-1);
    }
    magma_zsolverinfo_free( &solver_par, &precond_par );
    magma_z_vfree(&x);
    magma_z_vfree(&b);

    // test PBICGSTAB Jacobi ############################
    // vectors and initial guess
    magma_z_vinit( &b, Magma_DEV, A.num_cols, one );
    magma_z_vinit( &x, Magma_DEV, A.num_cols, one );
    magma_z_spmv( one, B_d, x, zero, b );                 //  b = A x
    magma_z_vfree(&x);
    magma_z_vinit( &x, Magma_DEV, A.num_cols, zero );
    magma_zsolverinfo_init( &solver_par, &precond_par );
    // Preconditioner
    precond_par.solver = Magma_JACOBI;
    magma_z_precondsetup( B_d, b, &precond_par );
    // solver
    magma_zpbicgstab( B_d, b, &x, &solver_par, &precond_par );
    // solverinfo
    magma_zsolverinfo( &solver_par, &precond_par );
    if( solver_par.numiter > 150 ){
        printf("error: test not passed!\n"); exit(-1);
    }
    magma_zsolverinfo_free( &solver_par, &precond_par );
    magma_z_vfree(&x);
    magma_z_vfree(&b);
/*
    // test PBICGSTAB ILU ###############################
    // vectors and initial guess
    magma_z_vinit( &b, Magma_DEV, A.num_cols, one );
    magma_z_vinit( &x, Magma_DEV, A.num_cols, one );
    magma_z_spmv( one, B_d, x, zero, b );                 //  b = A x
    magma_z_vfree(&x);
    magma_z_vinit( &x, Magma_DEV, A.num_cols, zero );
    magma_zsolverinfo_init( &solver_par, &precond_par );
    // Preconditioner
    precond_par.solver = Magma_ILU;
    magma_z_precondsetup( B_d, b, &precond_par );
    // solver
    magma_zpbicgstab( B_d, b, &x, &solver_par, &precond_par );
    // solverinfo
    magma_zsolverinfo( &solver_par, &precond_par );
    if( solver_par.numiter > 150 ){
        printf("error: test not passed!\n"); exit(-1);
    }
    magma_zsolverinfo_free( &solver_par, &precond_par );
    magma_z_vfree(&x);
    magma_z_vfree(&b);

    // test PBICGSTAB ILU ###############################
    // vectors and initial guess
    magma_z_vinit( &b, Magma_DEV, A.num_cols, one );
    magma_z_vinit( &x, Magma_DEV, A.num_cols, one );
    magma_z_spmv( one, B_d, x, zero, b );                 //  b = A x
    magma_z_vfree(&x);printf("here\n");
    magma_z_vinit( &x, Magma_DEV, A.num_cols, zero );
    magma_zsolverinfo_init( &solver_par, &precond_par );
    // Preconditioner
    precond_par.solver = Magma_ILU;
    magma_z_precondsetup( B_d, b, &precond_par );
    // solver
    magma_zpbicgstab( B_d, b, &x, &solver_par, &precond_par );
    // solverinfo
    magma_zsolverinfo( &solver_par, &precond_par );
    if( solver_par.numiter > 150 ){
        printf("error: test not passed!\n"); exit(-1);
    }
    magma_zsolverinfo_free( &solver_par, &precond_par );
    magma_z_vfree(&x);
    magma_z_vfree(&b);

    // test GMRES ####################################
    // vectors and initial guess
    magma_z_vinit( &b, Magma_DEV, A.num_cols, one );
    magma_z_vinit( &x, Magma_DEV, A.num_cols, one );
    magma_z_spmv( one, B_d, x, zero, b );                 //  b = A x
    magma_z_vfree(&x);
    magma_z_vinit( &x, Magma_DEV, A.num_cols, zero );
    magma_zsolverinfo_init( &solver_par, &precond_par );
    // solver
    magma_zgmres( B_d, b, &x, &solver_par );
    // solverinfo
    magma_zsolverinfo( &solver_par, &precond_par );
    magma_zsolverinfo_free( &solver_par, &precond_par );
    magma_z_vfree(&x);
    magma_z_vfree(&b);

    // test PGMRES Jacobi ############################
    // vectors and initial guess
    magma_z_vinit( &b, Magma_DEV, A.num_cols, one );
    magma_z_vinit( &x, Magma_DEV, A.num_cols, one );
    magma_z_spmv( one, B_d, x, zero, b );                 //  b = A x
    magma_z_vfree(&x);
    magma_z_vinit( &x, Magma_DEV, A.num_cols, zero );
    magma_zsolverinfo_init( &solver_par, &precond_par );
    // Preconditioner
    precond_par.solver = Magma_JACOBI;
    magma_z_precondsetup( B_d, b, &precond_par );
    // solver
    magma_zpgmres( B_d, b, &x, &solver_par, &precond_par );
    // solverinfo
    magma_zsolverinfo( &solver_par, &precond_par );
    magma_zsolverinfo_free( &solver_par, &precond_par );
    magma_z_vfree(&x);
    magma_z_vfree(&b);*/

    // test PGMRES ILU ###############################
    // vectors and initial guess
    magma_z_vinit( &b, Magma_DEV, A.num_cols, one );
    magma_z_vinit( &x, Magma_DEV, A.num_cols, one );
    magma_z_spmv( one, B_d, x, zero, b );                 //  b = A x
    magma_z_vfree(&x);
    magma_z_vinit( &x, Magma_DEV, A.num_cols, zero );
    magma_zsolverinfo_init( &solver_par, &precond_par );
    // Preconditioner
    precond_par.solver = Magma_ILU;
    magma_z_precondsetup( B_d, b, &precond_par );
    // solver
    magma_zpgmres( B_d, b, &x, &solver_par, &precond_par );
    // solverinfo
    magma_zsolverinfo( &solver_par, &precond_par );
    if( solver_par.numiter > 150 ){
        printf("error: test not passed!\n"); exit(-1);
    }
    magma_zsolverinfo_free( &solver_par, &precond_par );
    magma_z_vfree(&x);
    magma_z_vfree(&b);


    printf("all tests passed.\n");




    magma_z_mfree(&B_d);
    magma_z_mfree(&B);
    magma_z_mfree(&A); 


    TESTING_FINALIZE();
    return 0;
}
Beispiel #13
0
/* ////////////////////////////////////////////////////////////////////////////
   -- testing sparse matrix vector product
*/
int main(  int argc, char** argv )
{
    TESTING_INIT();
    magma_queue_t queue;
    magma_queue_create( /*devices[ opts->device ],*/ &queue );

    magma_z_sparse_matrix hA, hA_SELLP, hA_ELL, dA, dA_SELLP, dA_ELL;
    hA_SELLP.blocksize = 8;
    hA_SELLP.alignment = 8;
    real_Double_t start, end, res;
    magma_int_t *pntre;

    magmaDoubleComplex c_one  = MAGMA_Z_MAKE(1.0, 0.0);
    magmaDoubleComplex c_zero = MAGMA_Z_MAKE(0.0, 0.0);
    
    magma_int_t i, j;
    for( i = 1; i < argc; ++i ) {
        if ( strcmp("--blocksize", argv[i]) == 0 ) {
            hA_SELLP.blocksize = atoi( argv[++i] );
        } else if ( strcmp("--alignment", argv[i]) == 0 ) {
            hA_SELLP.alignment = atoi( argv[++i] );
        } else
            break;
    }
    printf( "\n#    usage: ./run_zspmv"
        " [ --blocksize %d --alignment %d (for SELLP) ]"
        " matrices \n\n", (int) hA_SELLP.blocksize, (int) hA_SELLP.alignment );

    while(  i < argc ) {

        if ( strcmp("LAPLACE2D", argv[i]) == 0 && i+1 < argc ) {   // Laplace test
            i++;
            magma_int_t laplace_size = atoi( argv[i] );
            magma_zm_5stencil(  laplace_size, &hA, queue );
        } else {                        // file-matrix test
            magma_z_csr_mtx( &hA,  argv[i], queue );
        }

        printf( "\n# matrix info: %d-by-%d with %d nonzeros\n\n",
                            (int) hA.num_rows,(int) hA.num_cols,(int) hA.nnz );

        real_Double_t FLOPS = 2.0*hA.nnz/1e9;

        magma_z_vector hx, hy, dx, dy, hrefvec, hcheck;

        // init CPU vectors
        magma_z_vinit( &hx, Magma_CPU, hA.num_rows, c_zero, queue );
        magma_z_vinit( &hy, Magma_CPU, hA.num_rows, c_zero, queue );

        // init DEV vectors
        magma_z_vinit( &dx, Magma_DEV, hA.num_rows, c_one, queue );
        magma_z_vinit( &dy, Magma_DEV, hA.num_rows, c_zero, queue );

        #ifdef MAGMA_WITH_MKL
            // calling MKL with CSR
            pntre = (magma_int_t*)malloc( (hA.num_rows+1)*sizeof(magma_int_t) );
            pntre[0] = 0;
            for (j=0; j<hA.num_rows; j++ ) {
                pntre[j] = hA.row[j+1];
            }
             MKL_INT num_rows = hA.num_rows;
             MKL_INT num_cols = hA.num_cols;
             MKL_INT nnz = hA.nnz;

            MKL_INT *col;
            TESTING_MALLOC_CPU( col, MKL_INT, nnz );
            for( magma_int_t t=0; t < hA.nnz; ++t ) {
                col[ t ] = hA.col[ t ];
            }
            MKL_INT *row;
            TESTING_MALLOC_CPU( row, MKL_INT, num_rows );
            for( magma_int_t t=0; t < hA.num_rows; ++t ) {
                row[ t ] = hA.col[ t ];
            }
    
            start = magma_wtime();
            for (j=0; j<10; j++ ) {
                mkl_zcsrmv( "N", &num_rows, &num_cols, 
                            MKL_ADDR(&c_one), "GFNC", MKL_ADDR(hA.val), 
                            col, row, pntre, 
                                                    MKL_ADDR(hx.val), 
                            MKL_ADDR(&c_zero),        MKL_ADDR(hy.val) );
            }
            end = magma_wtime();
            printf( "\n > MKL  : %.2e seconds %.2e GFLOP/s    (CSR).\n",
                                            (end-start)/10, FLOPS*10/(end-start) );

            TESTING_FREE_CPU( row );
            TESTING_FREE_CPU( col );
            free(pntre);
        #endif // MAGMA_WITH_MKL

        // copy matrix to GPU
        magma_z_mtransfer( hA, &dA, Magma_CPU, Magma_DEV, queue );        
        // SpMV on GPU (CSR) -- this is the reference!
        start = magma_sync_wtime( queue );
        for (j=0; j<10; j++)
            magma_z_spmv( c_one, dA, dx, c_zero, dy, queue );
        end = magma_sync_wtime( queue );
        printf( " > MAGMA: %.2e seconds %.2e GFLOP/s    (standard CSR).\n",
                                        (end-start)/10, FLOPS*10/(end-start) );
        magma_z_mfree(&dA, queue );
        magma_z_vtransfer( dy, &hrefvec , Magma_DEV, Magma_CPU, queue );

        // convert to ELL and copy to GPU
        magma_z_mconvert(  hA, &hA_ELL, Magma_CSR, Magma_ELL, queue );
        magma_z_mtransfer( hA_ELL, &dA_ELL, Magma_CPU, Magma_DEV, queue );
        magma_z_mfree(&hA_ELL, queue );
        magma_z_vfree( &dy, queue );
        magma_z_vinit( &dy, Magma_DEV, hA.num_rows, c_zero, queue );
        // SpMV on GPU (ELL)
        start = magma_sync_wtime( queue );
        for (j=0; j<10; j++)
            magma_z_spmv( c_one, dA_ELL, dx, c_zero, dy, queue );
        end = magma_sync_wtime( queue );
        printf( " > MAGMA: %.2e seconds %.2e GFLOP/s    (standard ELL).\n",
                                        (end-start)/10, FLOPS*10/(end-start) );
        magma_z_mfree(&dA_ELL, queue );
        magma_z_vtransfer( dy, &hcheck , Magma_DEV, Magma_CPU, queue );
        res = 0.0;
        for(magma_int_t k=0; k<hA.num_rows; k++ )
            res=res + MAGMA_Z_REAL(hcheck.val[k]) - MAGMA_Z_REAL(hrefvec.val[k]);
        if ( res < .000001 )
            printf("# tester spmv ELL:  ok\n");
        else
            printf("# tester spmv ELL:  failed\n");
        magma_z_vfree( &hcheck, queue );

        // convert to SELLP and copy to GPU
        magma_z_mconvert(  hA, &hA_SELLP, Magma_CSR, Magma_SELLP, queue );
        magma_z_mtransfer( hA_SELLP, &dA_SELLP, Magma_CPU, Magma_DEV, queue );
        magma_z_mfree(&hA_SELLP, queue );
        magma_z_vfree( &dy, queue );
        magma_z_vinit( &dy, Magma_DEV, hA.num_rows, c_zero, queue );
        // SpMV on GPU (SELLP)
        start = magma_sync_wtime( queue );
        for (j=0; j<10; j++)
            magma_z_spmv( c_one, dA_SELLP, dx, c_zero, dy, queue );
        end = magma_sync_wtime( queue );
        printf( " > MAGMA: %.2e seconds %.2e GFLOP/s    (SELLP).\n",
                                        (end-start)/10, FLOPS*10/(end-start) );

        magma_z_vtransfer( dy, &hcheck , Magma_DEV, Magma_CPU, queue );
        res = 0.0;
        for(magma_int_t k=0; k<hA.num_rows; k++ )
            res=res + MAGMA_Z_REAL(hcheck.val[k]) - MAGMA_Z_REAL(hrefvec.val[k]);
        printf("# |x-y|_F = %8.2e\n", res);
        if ( res < .000001 )
            printf("# tester spmv SELL-P:  ok\n");
        else
            printf("# tester spmv SELL-P:  failed\n");
        magma_z_vfree( &hcheck, queue );

        magma_z_mfree(&dA_SELLP, queue );


        // SpMV on GPU (CUSPARSE - CSR)
        // CUSPARSE context //

        cusparseHandle_t cusparseHandle = 0;
        cusparseStatus_t cusparseStatus;
        cusparseStatus = cusparseCreate(&cusparseHandle);
        cusparseSetStream( cusparseHandle, queue );

        cusparseMatDescr_t descr = 0;
        cusparseStatus = cusparseCreateMatDescr(&descr);

        cusparseSetMatType(descr,CUSPARSE_MATRIX_TYPE_GENERAL);
        cusparseSetMatIndexBase(descr,CUSPARSE_INDEX_BASE_ZERO);
        magmaDoubleComplex alpha = c_one;
        magmaDoubleComplex beta = c_zero;
        magma_z_vfree( &dy, queue );
        magma_z_vinit( &dy, Magma_DEV, hA.num_rows, c_zero, queue );

        // copy matrix to GPU
        magma_z_mtransfer( hA, &dA, Magma_CPU, Magma_DEV, queue );

        start = magma_sync_wtime( queue );
        for (j=0; j<10; j++)
            cusparseStatus =
            cusparseZcsrmv(cusparseHandle,CUSPARSE_OPERATION_NON_TRANSPOSE, 
                        hA.num_rows, hA.num_cols, hA.nnz, &alpha, descr, 
                        dA.dval, dA.drow, dA.dcol, dx.dval, &beta, dy.dval);
        end = magma_sync_wtime( queue );
        if (cusparseStatus != 0)    printf("error in cuSPARSE CSR\n");
        printf( " > CUSPARSE: %.2e seconds %.2e GFLOP/s    (CSR).\n",
                                        (end-start)/10, FLOPS*10/(end-start) );
        cusparseMatDescr_t descrA;
        cusparseStatus = cusparseCreateMatDescr(&descrA);
         if (cusparseStatus != 0)    printf("error\n");
        cusparseHybMat_t hybA;
        cusparseStatus = cusparseCreateHybMat( &hybA );
         if (cusparseStatus != 0)    printf("error\n");

        magma_z_vtransfer( dy, &hcheck , Magma_DEV, Magma_CPU, queue );
        res = 0.0;
        for(magma_int_t k=0; k<hA.num_rows; k++ )
            res=res + MAGMA_Z_REAL(hcheck.val[k]) - MAGMA_Z_REAL(hrefvec.val[k]);
        printf("# |x-y|_F = %8.2e\n", res);
        if ( res < .000001 )
            printf("# tester spmv cuSPARSE CSR:  ok\n");
        else
            printf("# tester spmv cuSPARSE CSR:  failed\n");
        magma_z_vfree( &hcheck, queue );
        magma_z_vfree( &dy, queue );
        magma_z_vinit( &dy, Magma_DEV, hA.num_rows, c_zero, queue );
       
        cusparseZcsr2hyb(cusparseHandle,  hA.num_rows, hA.num_cols,
                        descrA, dA.dval, dA.drow, dA.dcol,
                        hybA, 0, CUSPARSE_HYB_PARTITION_AUTO);

        start = magma_sync_wtime( queue );
        for (j=0; j<10; j++)
            cusparseStatus =
            cusparseZhybmv( cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, 
               &alpha, descrA, hybA,
               dx.dval, &beta, dy.dval);
        end = magma_sync_wtime( queue );
        if (cusparseStatus != 0)    printf("error in cuSPARSE HYB\n");
        printf( " > CUSPARSE: %.2e seconds %.2e GFLOP/s    (HYB).\n",
                                        (end-start)/10, FLOPS*10/(end-start) );

        magma_z_vtransfer( dy, &hcheck , Magma_DEV, Magma_CPU, queue );
        res = 0.0;
        for(magma_int_t k=0; k<hA.num_rows; k++ )
            res=res + MAGMA_Z_REAL(hcheck.val[k]) - MAGMA_Z_REAL(hrefvec.val[k]);
        printf("# |x-y|_F = %8.2e\n", res);
        if ( res < .000001 )
            printf("# tester spmv cuSPARSE HYB:  ok\n");
        else
            printf("# tester spmv cuSPARSE HYB:  failed\n");
        magma_z_vfree( &hcheck, queue );

        cusparseDestroyMatDescr( descrA );
        cusparseDestroyHybMat( hybA );
        cusparseDestroy( cusparseHandle );

        magma_z_mfree(&dA, queue );



        printf("\n\n");


        // free CPU memory
        magma_z_mfree(&hA, queue );
        magma_z_vfree(&hx, queue );
        magma_z_vfree(&hy, queue );
        magma_z_vfree(&hrefvec, queue );
        // free GPU memory
        magma_z_vfree(&dx, queue );
        magma_z_vfree(&dy, queue );

        i++;

    }
    
    magma_queue_destroy( queue );
    TESTING_FINALIZE();
    return 0;
}
Beispiel #14
0
extern "C" magma_int_t
magma_zcg(
    magma_z_sparse_matrix A, magma_z_vector b, magma_z_vector *x,  
    magma_z_solver_par *solver_par,
    magma_queue_t queue )
{
    // set queue for old dense routines
    magma_queue_t orig_queue;
    magmablasGetKernelStream( &orig_queue );

    // prepare solver feedback
    solver_par->solver = Magma_CG;
    solver_par->numiter = 0;
    solver_par->info = MAGMA_SUCCESS; 

    // local variables
    magmaDoubleComplex c_zero = MAGMA_Z_ZERO, c_one = MAGMA_Z_ONE;
    
    magma_int_t dofs = A.num_rows;

    // GPU workspace
    magma_z_vector r, p, q;
    magma_z_vinit( &r, Magma_DEV, dofs, c_zero, queue );
    magma_z_vinit( &p, Magma_DEV, dofs, c_zero, queue );
    magma_z_vinit( &q, Magma_DEV, dofs, c_zero, queue );
    
    // solver variables
    magmaDoubleComplex alpha, beta;
    double nom, nom0, r0, betanom, betanomsq, den;

    // solver setup
    magma_zscal( dofs, c_zero, x->dval, 1) ;                     // x = 0
    magma_zcopy( dofs, b.dval, 1, r.dval, 1 );                    // r = b
    magma_zcopy( dofs, b.dval, 1, p.dval, 1 );                    // p = b
    nom0 = betanom = magma_dznrm2( dofs, r.dval, 1 );           
    nom  = nom0 * nom0;                                // nom = r' * r
    magma_z_spmv( c_one, A, p, c_zero, q, queue );                     // q = A p
    den = MAGMA_Z_REAL( magma_zdotc(dofs, p.dval, 1, q.dval, 1) );// den = p dot q
    solver_par->init_res = nom0;
    
    if ( (r0 = nom * solver_par->epsilon) < ATOLERANCE ) 
        r0 = ATOLERANCE;
    if ( nom < r0 ) {
        magmablasSetKernelStream( orig_queue );
        return MAGMA_SUCCESS;
    }
    // check positive definite
    if (den <= 0.0) {
        printf("Operator A is not postive definite. (Ar,r) = %f\n", den);
        magmablasSetKernelStream( orig_queue );
        return MAGMA_NONSPD;
        solver_par->info = MAGMA_NONSPD;
    }

    //Chronometry
    real_Double_t tempo1, tempo2;
    tempo1 = magma_sync_wtime( queue );
    if ( solver_par->verbose > 0 ) {
        solver_par->res_vec[0] = (real_Double_t)nom0;
        solver_par->timing[0] = 0.0;
    }
    
    // start iteration
    for( solver_par->numiter= 1; solver_par->numiter<solver_par->maxiter; 
                                                    solver_par->numiter++ ) {
        alpha = MAGMA_Z_MAKE(nom/den, 0.);
        magma_zaxpy(dofs,  alpha, p.dval, 1, x->dval, 1);     // x = x + alpha p
        magma_zaxpy(dofs, -alpha, q.dval, 1, r.dval, 1);      // r = r - alpha q
        betanom = magma_dznrm2(dofs, r.dval, 1);             // betanom = || r ||
        betanomsq = betanom * betanom;                      // betanoms = r' * r

        if ( solver_par->verbose > 0 ) {
            tempo2 = magma_sync_wtime( queue );
            if ( (solver_par->numiter)%solver_par->verbose==0 ) {
                solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] 
                        = (real_Double_t) betanom;
                solver_par->timing[(solver_par->numiter)/solver_par->verbose] 
                        = (real_Double_t) tempo2-tempo1;
            }
        }

        if (  betanom  < r0 ) {
            break;
        }

        beta = MAGMA_Z_MAKE(betanomsq/nom, 0.);           // beta = betanoms/nom
        magma_zscal(dofs, beta, p.dval, 1);                // p = beta*p
        magma_zaxpy(dofs, c_one, r.dval, 1, p.dval, 1);     // p = p + r 
        magma_z_spmv( c_one, A, p, c_zero, q, queue );           // q = A p
        den = MAGMA_Z_REAL(magma_zdotc(dofs, p.dval, 1, q.dval, 1));    
                // den = p dot q
        nom = betanomsq;
    } 
    tempo2 = magma_sync_wtime( queue );
    solver_par->runtime = (real_Double_t) tempo2-tempo1;
    double residual;
    magma_zresidual( A, b, *x, &residual, queue );
    solver_par->final_res = residual;

    if ( solver_par->numiter < solver_par->maxiter) {
        solver_par->info = MAGMA_SUCCESS;
    } else if ( solver_par->init_res > solver_par->final_res ) {
        if ( solver_par->verbose > 0 ) {
            if ( (solver_par->numiter)%solver_par->verbose==0 ) {
                solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] 
                        = (real_Double_t) betanom;
                solver_par->timing[(solver_par->numiter)/solver_par->verbose] 
                        = (real_Double_t) tempo2-tempo1;
            }
        }
        solver_par->info = MAGMA_SLOW_CONVERGENCE;
    }
    else {
        if ( solver_par->verbose > 0 ) {
            if ( (solver_par->numiter)%solver_par->verbose==0 ) {
                solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] 
                        = (real_Double_t) betanom;
                solver_par->timing[(solver_par->numiter)/solver_par->verbose] 
                        = (real_Double_t) tempo2-tempo1;
            }
        }
        solver_par->info = MAGMA_DIVERGENCE;
    }
    magma_z_vfree(&r, queue );
    magma_z_vfree(&p, queue );
    magma_z_vfree(&q, queue );

    magmablasSetKernelStream( orig_queue );
    return MAGMA_SUCCESS;
}   /* magma_zcg */
Beispiel #15
0
/* ////////////////////////////////////////////////////////////////////////////
   -- testing zdot
*/
int main( int argc, char** argv)
{
    TESTING_INIT();




        printf("#================================================================================================================================================\n");
        printf("\n");
        printf("            |                            runtime                             |                              GFLOPS\n");
        printf("#n num_vecs |  CUDOT       CUGEMV       MAGMAGEMV       MDOT       MDGM      |      CUDOT       CUGEMV      MAGMAGEMV       MDOT       MDGM      \n");
        printf("#------------------------------------------------------------------------------------------------------------------------------------------------\n");
    printf("\n");




    for( magma_int_t num_vecs=5; num_vecs<6; num_vecs+=1 ){

    for( magma_int_t n=10000; n<100000001; n=n+10000 ){
           
            magma_z_sparse_matrix A, B, C, D, E, F, G, H, I, J, K, Z;
            magma_z_vector a,b,c,x, y, z, skp;
            int iters = 10;
            double computations = (2.* n * iters * num_vecs); 

            
            magmaDoubleComplex one = MAGMA_Z_MAKE(1.0, 0.0);
            magmaDoubleComplex zero = MAGMA_Z_MAKE(0.0, 0.0);
            magmaDoubleComplex alpha;

            #define ENABLE_TIMER
            #ifdef ENABLE_TIMER
            double mdot1, mdot2, mdgm1, mdgm2, magmagemv1, magmagemv2, cugemv1, cugemv2, cudot1, cudot2;
            double mdot_time, mdgm_time, magmagemv_time, cugemv_time, cudot_time;
            #endif


            magma_z_vinit( &a, Magma_DEV, n*num_vecs, one );
            magma_z_vinit( &b, Magma_DEV, num_vecs, one );
            int min_ten = min(num_vecs, 15);
            magma_z_vinit( &x, Magma_DEV, min_ten*n, one );
            magma_z_vinit( &y, Magma_DEV, min_ten*n, one );
            magma_z_vinit( &skp, Magma_DEV, num_vecs, zero );

            // warm up
            magma_zgemvmdot( n, num_vecs, a.val, b.val, x.val, y.val, skp.val );

            // CUDOT
            #ifdef ENABLE_TIMER
            magma_device_sync(); cudot1=magma_wtime();
            #endif
            for( int h=0; h<iters; h++){
                for( int l=0; l<num_vecs; l++)
                    alpha = magma_zdotc(n, a.val, 1, b.val, 1);
            }
            #ifdef ENABLE_TIMER
            magma_device_sync(); cudot2=magma_wtime();
            cudot_time=cudot2-cudot1;
            #endif
            // CUGeMV
            #ifdef ENABLE_TIMER
            magma_device_sync(); cugemv1=magma_wtime();
            #endif
            for( int h=0; h<iters; h++){
                magma_zgemv(MagmaTrans, n, num_vecs, one, a.val, n, b.val, 1, zero, skp.val, 1);
                //h++;
            }
            #ifdef ENABLE_TIMER
            magma_device_sync(); cugemv2=magma_wtime();
            cugemv_time=cugemv2-cugemv1;
            #endif
            // MAGMAGeMV
            #ifdef ENABLE_TIMER
            magma_device_sync(); magmagemv1=magma_wtime();
            #endif
            for( int h=0; h<iters; h++){
                magmablas_zgemv(MagmaTrans, n, num_vecs, one, a.val, n, b.val, 1, zero, skp.val, 1);
                //h++;
            }
            #ifdef ENABLE_TIMER
            magma_device_sync(); magmagemv2=magma_wtime();
            magmagemv_time=magmagemv2-magmagemv1;
            #endif
            // MDOT
            #ifdef ENABLE_TIMER
            magma_device_sync(); mdot1=magma_wtime();
            #endif
            for( int h=0; h<iters; h++){
                //magma_zmdotc( n, num_vecs, a.val, b.val, x.val, y.val, skp.val );
                magma_zmdotc( n, 2, a.val, b.val, x.val, y.val, skp.val );
                magma_zmdotc( n, 2, a.val, b.val, x.val, y.val, skp.val );
                magma_zmdotc( n, 1, a.val, b.val, x.val, y.val, skp.val );
                //h++;
            }
            #ifdef ENABLE_TIMER
            magma_device_sync(); mdot2=magma_wtime();
            mdot_time=mdot2-mdot1;
            #endif
            // MDGM
            #ifdef ENABLE_TIMER
            magma_device_sync(); mdgm1=magma_wtime();
            #endif
            for( int h=0; h<iters; h++){
                magma_zgemvmdot( n, num_vecs, a.val, b.val, x.val, y.val, skp.val );
                //h++;
            }
            #ifdef ENABLE_TIMER
            magma_device_sync(); mdgm2=magma_wtime();
            mdgm_time=mdgm2-mdgm1;
            #endif

            //magma_zprint_gpu(num_vecs,1,skp.val,num_vecs);

            //Chronometry  
            #ifdef ENABLE_TIMER
            printf("%d  %d  %e  %e  %e  %e  %e  %e  %e  %e  %e  %e\n", 
                    n, num_vecs, 
                    cudot_time/iters, 
                    (cugemv_time)/iters, 
                    (magmagemv_time)/iters,
                    (mdot_time)/iters,
                    (mdgm_time)/iters,
                    (double)(computations)/(cudot_time*(1.e+09)), 
                    (double)(computations)/(cugemv_time*(1.e+09)),
                    (double)(computations)/(magmagemv_time*(1.e+09)),
                    (double)(computations)/(mdot_time*(1.e+09)),
                    (double)(computations)/(mdgm_time*(1.e+09)) );
            #endif

            magma_z_vfree(&a);
            magma_z_vfree(&b);
            magma_z_vfree(&x);
            magma_z_vfree(&y);
            magma_z_vfree(&skp);



        }


  //  }
        printf("#================================================================================================================================================\n");
        printf("\n");
        printf("\n");

    }

    TESTING_FINALIZE();
    return 0;
}