Ejemplo n.º 1
0
int main (int argc, const char * argv[])
{
    typedef float               ScalarType;
    typedef boost::numeric::ublas::compressed_matrix<ScalarType>        MatrixType;
    typedef boost::numeric::ublas::vector<ScalarType>                   VectorType;
    typedef viennacl::compressed_matrix<ScalarType>                     GPUMatrixType;
    typedef viennacl::vector<ScalarType>                                GPUVectorType;
  
    MatrixType M;

    //
    // Read system matrix from file
    //
    #ifdef _MSC_VER
    if (!viennacl::io::read_matrix_market_file(M, "../../examples/testdata/mat65k.mtx"))
    #else
    if (!viennacl::io::read_matrix_market_file(M, "../examples/testdata/mat65k.mtx"))
    #endif
    {
      std::cerr<<"ERROR: Could not read matrix file " << std::endl;
      exit(EXIT_FAILURE);
    }
    
    std::cout << "Size of matrix: " << M.size1() << std::endl;
    std::cout << "Avg. Entries per row: " << M.nnz() / static_cast<double>(M.size1()) << std::endl;
    
    //
    // Use uniform load vector:
    //
    VectorType rhs(M.size2());
    for (size_t i=0; i<rhs.size(); ++i)
      rhs(i) = 1;

    GPUMatrixType  gpu_M(M.size1(), M.size2());
    GPUVectorType  gpu_rhs(M.size1());
    viennacl::copy(M, gpu_M);
    viennacl::copy(rhs, gpu_rhs);
    
    ///////////////////////////////// Tests to follow /////////////////////////////

    viennacl::linalg::bicgstab_tag solver_tag(1e-10, 50); //for simplicity and reasonably short execution times we use only 50 iterations here

    //
    // Reference: No preconditioner:
    //
    std::cout << "--- Reference 1: Pure BiCGStab on CPU ---" << std::endl;
    VectorType result = viennacl::linalg::solve(M, rhs, solver_tag);
    std::cout << " * Solver iterations: " << solver_tag.iters() << std::endl;
    VectorType residual = viennacl::linalg::prod(M, result) - rhs;
    std::cout << " * Rel. Residual: " << viennacl::linalg::norm_2(residual) / viennacl::linalg::norm_2(rhs) << std::endl;

    std::cout << "--- Reference 2: Pure BiCGStab on GPU ---" << std::endl;
    GPUVectorType gpu_result = viennacl::linalg::solve(gpu_M, gpu_rhs, solver_tag);
    std::cout << " * Solver iterations: " << solver_tag.iters() << std::endl;
    GPUVectorType gpu_residual = viennacl::linalg::prod(gpu_M, gpu_result) - gpu_rhs;
    std::cout << " * Rel. Residual: " << viennacl::linalg::norm_2(gpu_residual) / viennacl::linalg::norm_2(gpu_rhs) << std::endl;
    
    
    //
    // Reference: ILUT preconditioner:
    //
    std::cout << "--- Reference 2: BiCGStab with ILUT on CPU ---" << std::endl;
    std::cout << " * Preconditioner setup..." << std::endl;
    viennacl::linalg::ilut_precond<MatrixType> ilut(M, viennacl::linalg::ilut_tag());
    std::cout << " * Iterative solver run..." << std::endl;
    run_solver(M, rhs, solver_tag, ilut);
    
    
    //
    // Test 1: SPAI with CPU:
    //
    std::cout << "--- Test 1: CPU-based SPAI ---" << std::endl;  
    std::cout << " * Preconditioner setup..." << std::endl;
    viennacl::linalg::spai_precond<MatrixType> spai_cpu(M, viennacl::linalg::spai_tag(1e-3, 3, 5e-2));
    std::cout << " * Iterative solver run..." << std::endl;
    run_solver(M, rhs, solver_tag, spai_cpu);
    
    //
    // Test 2: FSPAI with CPU:
    //      
    std::cout << "--- Test 2: CPU-based FSPAI ---" << std::endl;  
    std::cout << " * Preconditioner setup..." << std::endl;
    viennacl::linalg::fspai_precond<MatrixType> fspai_cpu(M, viennacl::linalg::fspai_tag());
    std::cout << " * Iterative solver run..." << std::endl;
    run_solver(M, rhs, solver_tag, fspai_cpu);
    
    //
    // Test 3: SPAI with GPU:
    //      
    std::cout << "--- Test 3: GPU-based SPAI ---" << std::endl;  
    std::cout << " * Preconditioner setup..." << std::endl;
    viennacl::linalg::spai_precond<GPUMatrixType> spai_gpu(gpu_M, viennacl::linalg::spai_tag(1e-3, 3, 5e-2));
    std::cout << " * Iterative solver run..." << std::endl;
    run_solver(gpu_M, gpu_rhs, solver_tag, spai_gpu);
    
    return EXIT_SUCCESS;
}
Ejemplo n.º 2
0
/**
*  The main steps in this tutorial are the following:
*  - Setup the systems
*  - Run solvers without preconditioner and with ILUT preconditioner for comparison
*  - Run solver with SPAI preconditioner on CPU
*  - Run solver with SPAI preconditioner on GPU
*  - Run solver with factored SPAI preconditioner on CPU
*  - Run solver with factored SPAI preconditioner on GPU
*
**/
int main (int, const char **)
{
  typedef float               ScalarType;
  typedef boost::numeric::ublas::compressed_matrix<ScalarType>        MatrixType;
  typedef boost::numeric::ublas::vector<ScalarType>                   VectorType;
  typedef viennacl::compressed_matrix<ScalarType>                     GPUMatrixType;
  typedef viennacl::vector<ScalarType>                                GPUVectorType;

  /**
  *  If you have multiple OpenCL-capable devices in your system, we pick the second device for this tutorial.
  **/
#ifdef VIENNACL_WITH_OPENCL
  // Optional: Customize OpenCL backend
  viennacl::ocl::platform pf = viennacl::ocl::get_platforms()[0];
  std::vector<viennacl::ocl::device> const & devices = pf.devices();

  // Optional: Set first device to first context:
  viennacl::ocl::setup_context(0, devices[0]);

  // Optional: Set second device for second context (use the same device for the second context if only one device available):
  if (devices.size() > 1)
    viennacl::ocl::setup_context(1, devices[1]);
  else
    viennacl::ocl::setup_context(1, devices[0]);

  std::cout << viennacl::ocl::current_device().info() << std::endl;
  viennacl::context ctx(viennacl::ocl::get_context(1));
#else
  viennacl::context ctx;
#endif

  /**
  * Create uBLAS-based sparse matrix and read system matrix from file
  **/
  MatrixType M;

  if (!viennacl::io::read_matrix_market_file(M, "../examples/testdata/mat65k.mtx"))
  {
    std::cerr<<"ERROR: Could not read matrix file " << std::endl;
    exit(EXIT_FAILURE);
  }

  std::cout << "Size of matrix: " << M.size1() << std::endl;
  std::cout << "Avg. Entries per row: " << double(M.nnz()) / static_cast<double>(M.size1()) << std::endl;

  /**
  *   Use a constant load vector for simplicity
  **/
  VectorType rhs(M.size2());
  for (std::size_t i=0; i<rhs.size(); ++i)
    rhs(i) = ScalarType(1);

  /**
  *   Create the ViennaCL matrix and vector and initialize with uBLAS data:
  **/
  GPUMatrixType  gpu_M(M.size1(), M.size2(), ctx);
  GPUVectorType  gpu_rhs(M.size1(), ctx);
  viennacl::copy(M, gpu_M);
  viennacl::copy(rhs, gpu_rhs);

  /**
  *  <h2>Solver Runs</h2>
  *  We use a relative tolerance of \f$ 10^{-10} \f$ with a maximum of 50 iterations for each use case.
  *  Usually more than 50 solver iterations are required for convergence, but this choice ensures shorter execution times and suffices for this tutorial.
  **/

  viennacl::linalg::bicgstab_tag solver_tag(1e-10, 50); //for simplicity and reasonably short execution times we use only 50 iterations here

  /**
  *  The first reference is to use no preconditioner (CPU and GPU):
  **/
  std::cout << "--- Reference 1: Pure BiCGStab on CPU ---" << std::endl;
  VectorType result = viennacl::linalg::solve(M, rhs, solver_tag);
  std::cout << " * Solver iterations: " << solver_tag.iters() << std::endl;
  VectorType residual = viennacl::linalg::prod(M, result) - rhs;
  std::cout << " * Rel. Residual: " << viennacl::linalg::norm_2(residual) / viennacl::linalg::norm_2(rhs) << std::endl;

  std::cout << "--- Reference 2: Pure BiCGStab on GPU ---" << std::endl;
  GPUVectorType gpu_result = viennacl::linalg::solve(gpu_M, gpu_rhs, solver_tag);
  std::cout << " * Solver iterations: " << solver_tag.iters() << std::endl;
  GPUVectorType gpu_residual = viennacl::linalg::prod(gpu_M, gpu_result);
  gpu_residual -= gpu_rhs;
  std::cout << " * Rel. Residual: " << viennacl::linalg::norm_2(gpu_residual) / viennacl::linalg::norm_2(gpu_rhs) << std::endl;


  /**
  * The second reference is a standard ILUT preconditioner (only CPU):
  **/
  std::cout << "--- Reference 2: BiCGStab with ILUT on CPU ---" << std::endl;
  std::cout << " * Preconditioner setup..." << std::endl;
  viennacl::linalg::ilut_precond<MatrixType> ilut(M, viennacl::linalg::ilut_tag());
  std::cout << " * Iterative solver run..." << std::endl;
  run_solver(M, rhs, solver_tag, ilut);


  /**
  * <h2>Step 1: SPAI with CPU</h2>
  **/
  std::cout << "--- Test 1: CPU-based SPAI ---" << std::endl;
  std::cout << " * Preconditioner setup..." << std::endl;
  viennacl::linalg::spai_precond<MatrixType> spai_cpu(M, viennacl::linalg::spai_tag(1e-3, 3, 5e-2));
  std::cout << " * Iterative solver run..." << std::endl;
  run_solver(M, rhs, solver_tag, spai_cpu);

  /**
  * <h2>Step 2: FSPAI with CPU</h2>
  **/
  std::cout << "--- Test 2: CPU-based FSPAI ---" << std::endl;
  std::cout << " * Preconditioner setup..." << std::endl;
  viennacl::linalg::fspai_precond<MatrixType> fspai_cpu(M, viennacl::linalg::fspai_tag());
  std::cout << " * Iterative solver run..." << std::endl;
  run_solver(M, rhs, solver_tag, fspai_cpu);

  /**
  * <h2>Step 3: SPAI with GPU</h2>
  **/
  std::cout << "--- Test 3: GPU-based SPAI ---" << std::endl;
  std::cout << " * Preconditioner setup..." << std::endl;
  viennacl::linalg::spai_precond<GPUMatrixType> spai_gpu(gpu_M, viennacl::linalg::spai_tag(1e-3, 3, 5e-2));
  std::cout << " * Iterative solver run..." << std::endl;
  run_solver(gpu_M, gpu_rhs, solver_tag, spai_gpu);

  /**
  * <h2>Step 4: FSPAI with GPU</h2>
  **/
  std::cout << "--- Test 4: GPU-based FSPAI ---" << std::endl;
  std::cout << " * Preconditioner setup..." << std::endl;
  viennacl::linalg::fspai_precond<GPUMatrixType> fspai_gpu(gpu_M, viennacl::linalg::fspai_tag());
  std::cout << " * Iterative solver run..." << std::endl;
  run_solver(gpu_M, gpu_rhs, solver_tag, fspai_gpu);

  /**
  *   That's it! Print success message and exit.
  **/
  std::cout << "!!!! TUTORIAL COMPLETED SUCCESSFULLY !!!!" << std::endl;

  return EXIT_SUCCESS;
}
Ejemplo n.º 3
0
/*---------------------------------------*/
void bilut(csr_t *csr, bilu_host_t *h_bilu,
int nb, int p, double tol, double *avgfil) {
/*---------------------------------------*/
  int i,j,k,j1,j2;
  int *ia = csr->ia;
  int *ja = csr->ja;
  double *a = csr->a;
/*---------------------------------------*/
/*---------- diagonal blocks */
  Calloc(h_bilu->bdiag, nb, csr_t);
/*------ nnz in all diag blocks */
  int nnzds = 0;
/*------ for each block diag */
  for (i=0; i<nb; i++) {
    int nnzd = 0;
    j1 = h_bilu->noff[i];
    j2 = h_bilu->noff[i] + h_bilu->nrow[i];
/*---------- nnz of block diag i */
    for (j=j1; j<j2; j++)
      for (k=ia[j]; k<ia[j+1]; k++)
        if (ja[k-1] > j1 && ja[k-1] <= j2)
          nnzd ++;
/*---------- alloc diag block csr */
    nnzds += nnzd;
    malloc_csr(j2-j1, nnzd, h_bilu->bdiag+i);
    int *ia2   = h_bilu->bdiag[i].ia;
    int *ja2   = h_bilu->bdiag[i].ja;
    double *a2 = h_bilu->bdiag[i].a;
/*------------- build block diag i */
    ia2[0] = 1;
    //ptr for ja, a
    int p = 0; 
    for (j=j1; j<j2; j++) {
      int rownnz = 0;
      for (k=ia[j]; k<ia[j+1]; k++)
        if (ja[k-1] > j1 && ja[k-1] <= j2) {
          rownnz ++;
          a2[p] = a[k-1];
          ja2[p] = ja[k-1] - j1;
          p++;
        }
      ia2[j+1-j1] = ia2[j-j1] + rownnz;
    }
  }
  printf("number of blocks %d\n", nb);
  printf("  %f in diag blocks\n",\
  (double)nnzds/(double)csr->nnz);
/*------- iluk for each block diag */
  printf("begin bilut(%d,%.2e) ...\n", p, tol);
  Calloc(h_bilu->blu, nb, lu_t);
  double *fil;
  Malloc(fil, nb, double);
  for (i=0; i<nb; i++) {
    if (ilut(&h_bilu->bdiag[i], &h_bilu->blu[i],
        tol, p)) {
       printf("BILUT error in block %d\n", i);
       exit(-1);
    }
    int nnzl = h_bilu->blu[i].l->nnz;
    int nnzu = h_bilu->blu[i].u->nnz;
    int nnzdi = h_bilu->bdiag[i].nnz;
    fil[i] = (nnzl+nnzu) / (double)(nnzdi);
  }
/*-------- average fill factor */
  (*avgfil) = 0.0;
  for (i=0; i<nb; i++)
    (*avgfil) += fil[i];
  (*avgfil) /= nb;
  printf("  done, avg fillfactor %f\n",*avgfil);
  free(fil);
}