int main(int argc, char* argv[]) {

  if (argc == 1) { 
    std::cerr << argv[0] << " <matrix> [Num threads]" << std::endl;
    exit(1);
  }

  init_paralution();

  if (argc > 2) {
    set_omp_threads_paralution(atoi(argv[2]));
  } 

  info_paralution();

  LocalVector<double> x;
  LocalVector<double> rhs;

  LocalMatrix<double> mat;


  mat.ReadFileMTX(std::string(argv[1]));
  mat.info();

  x.Allocate("x", mat.get_nrow());
  rhs.Allocate("rhs", mat.get_nrow());

  x.info();
  rhs.info();

  rhs.Ones();
  
  mat.Apply(rhs, &x);

  std::cout << "dot=" << x.Dot(rhs) << std::endl;

  mat.ConvertToELL();
  mat.info();

  mat.MoveToAccelerator();
  x.MoveToAccelerator();
  rhs.MoveToAccelerator();
  mat.info();

  rhs.Ones();
  
  mat.Apply(rhs, &x);

  std::cout << "dot=" << x.Dot(rhs) << std::endl;

  stop_paralution();

  return 0;
}
Ejemplo n.º 2
0
int main(int argc, char* argv[]) {

    if (argc == 1) {
        std::cerr << argv[0] << " <matrix> [Num threads]" << std::endl;
        exit(1);
    }

    init_paralution();

    if (argc > 2) {
        set_omp_threads_paralution(atoi(argv[2]));
    }

    info_paralution();
    //    int ii;

    LocalVector<double> x;
    LocalVector<double> rhs;

    LocalMatrix<double> mat;

    struct timeval ti1,ti2;//timer

    mat.ReadFileMTX(std::string(argv[1]));
    mat.info();

    x.Allocate("x", mat.get_nrow());
    rhs.Allocate("rhs", mat.get_nrow());

    x.info();
    rhs.info();

    rhs.Ones();

    gettimeofday(&ti1,NULL); /* read starttime in t1 */
    mat.Apply(rhs, &x);
    gettimeofday(&ti2,NULL); /* read endtime in t2 */

    fflush(stderr);
    fprintf(stderr, "\nTime cost host spmv code microseconds: %ld microseconds\n",
            ((ti2.tv_sec - ti1.tv_sec)*1000000L
             +ti2.tv_usec) - ti1.tv_usec
            );

    std::cout << "\ndot=" << x.Dot(rhs) << std::endl;

    mat.ConvertToBCSR();
    mat.info();

    mat.MoveToAccelerator();
    x.MoveToAccelerator();
    rhs.MoveToAccelerator();
    mat.info();

    rhs.Ones();
//    exit(1);

    gettimeofday(&ti1,NULL); /* read starttime in t1 */
    mat.Apply(rhs, &x);
    gettimeofday(&ti2,NULL); /* read endtime in t2 */

    fflush(stderr);
    fprintf(stderr, "\nTime cost for accelerator spmv  microseconds: %ld microseconds\n",
            ((ti2.tv_sec - ti1.tv_sec)*1000000L
             +ti2.tv_usec) - ti1.tv_usec
            );

    std::cout << "\ndot=" << x.Dot(rhs) << std::endl;

    stop_paralution();

    return 0;
}
Ejemplo n.º 3
0
int main(int argc, char* argv[]) {

  if (argc == 1) { 
    std::cerr << argv[0] << " <matrix> [Num threads]" << std::endl;
    exit(1);
  }

  init_paralution();

  if (argc > 2) {
    set_omp_threads_paralution(atoi(argv[2]));
  } 

  info_paralution();

  LocalVector<double> b, b_old, *b_k, *b_k1, *b_tmp;
  LocalMatrix<double> mat;

  mat.ReadFileMTX(std::string(argv[1]));

  // Gershgorin spectrum approximation
  double glambda_min, glambda_max;

  // Power method spectrum approximation
  double plambda_min, plambda_max;

  // Maximum number of iteration for the power method
  int iter_max = 10000;

  double tick, tack;

  // Gershgorin approximation of the eigenvalues
  mat.Gershgorin(glambda_min, glambda_max);
  std::cout << "Gershgorin : Lambda min = " << glambda_min
            << "; Lambda max = " << glambda_max << std::endl;


  mat.MoveToAccelerator();
  b.MoveToAccelerator();
  b_old.MoveToAccelerator();


  b.Allocate("b_k+1", mat.get_nrow());
  b_k1 = &b;

  b_old.Allocate("b_k", mat.get_nrow());
  b_k = &b_old;  

  b_k->Ones();

  mat.info();

  tick = paralution_time();

  // compute lambda max
  for (int i=0; i<=iter_max; ++i) {

    mat.Apply(*b_k, b_k1);

    //    std::cout << b_k1->Dot(*b_k) << std::endl;
    b_k1->Scale(double(1.0)/b_k1->Norm());

    b_tmp = b_k1;
    b_k1 = b_k;
    b_k = b_tmp;

  }

  // get lambda max (Rayleigh quotient)
  mat.Apply(*b_k, b_k1);
  plambda_max = b_k1->Dot(*b_k) ;

  tack = paralution_time();
  std::cout << "Power method (lambda max) execution:" << (tack-tick)/1000000 << " sec" << std::endl;

  mat.AddScalarDiagonal(double(-1.0)*plambda_max);


  b_k->Ones();

  tick = paralution_time();

  // compute lambda min
  for (int i=0; i<=iter_max; ++i) {

    mat.Apply(*b_k, b_k1);

    //    std::cout << b_k1->Dot(*b_k) + plambda_max << std::endl;
    b_k1->Scale(double(1.0)/b_k1->Norm());

    b_tmp = b_k1;
    b_k1 = b_k;
    b_k = b_tmp;

  }

  // get lambda min (Rayleigh quotient)
  mat.Apply(*b_k, b_k1);
  plambda_min = (b_k1->Dot(*b_k) + plambda_max);

  // back to the original matrix
  mat.AddScalarDiagonal(plambda_max);

  tack = paralution_time();
  std::cout << "Power method (lambda min) execution:" << (tack-tick)/1000000 << " sec" << std::endl;


  std::cout << "Power method Lambda min = " << plambda_min
            << "; Lambda max = " << plambda_max 
            << "; iter=2x" << iter_max << std::endl;

  LocalVector<double> x;
  LocalVector<double> rhs;

  x.CloneBackend(mat);
  rhs.CloneBackend(mat);

  x.Allocate("x", mat.get_nrow());
  rhs.Allocate("rhs", mat.get_nrow());

  // Chebyshev iteration
  Chebyshev<LocalMatrix<double>, LocalVector<double>, double > ls;

  rhs.Ones();
  x.Zeros(); 

  ls.SetOperator(mat);

  ls.Set(plambda_min, plambda_max);

  ls.Build();

  tick = paralution_time();

  ls.Solve(rhs, &x);

  tack = paralution_time();
  std::cout << "Solver execution:" << (tack-tick)/1000000 << " sec" << std::endl;

  // PCG + Chebyshev polynomial
  CG<LocalMatrix<double>, LocalVector<double>, double > cg;
  AIChebyshev<LocalMatrix<double>, LocalVector<double>, double > p;

  // damping factor
  plambda_min = plambda_max / 7;
  p.Set(3, plambda_min, plambda_max);
  rhs.Ones();
  x.Zeros(); 

  cg.SetOperator(mat);
  cg.SetPreconditioner(p);

  cg.Build();

  tick = paralution_time();

  cg.Solve(rhs, &x);

  tack = paralution_time();
  std::cout << "Solver execution:" << (tack-tick)/1000000 << " sec" << std::endl;

  stop_paralution();

  return 0;
}
Ejemplo n.º 4
0
int main(int argc, char* argv[]) {

  if (argc == 1) { 
    std::cerr << argv[0] << " <matrix> <initial_guess> <rhs> [Num threads]" << std::endl;
    exit(1);
  }

  init_paralution();

//   if (argc > 4) {
//     set_omp_threads_paralution(atoi(argv[5]));
//   } 
  set_omp_threads_paralution(8);
  
  info_paralution();

  struct timeval now;
  double tick, tack, b=0.0f,s=0.0f, lprep=0.0f, sol_norm, diff_norm, ones_norm;
  double *phi_ptr=NULL;
  int *bubmap_ptr=NULL, phisize, maxbmap, setlssd, lvst_offst;
  int xdim, ydim, zdim, defvex_perdirec, defvex_perdirec_y, defvex_perdirec_z;
  DPCG<LocalMatrix<double>, LocalVector<double>, double > ls;
#ifdef BUBFLO  
  xdim=atoi(argv[5]);
  setlssd=atoi(argv[6]);
  defvex_perdirec=atoi(argv[7]);
  lvst_offst=atoi(argv[8]);
  phisize=(xdim+2*lvst_offst)*(ydim+2*lvst_offst)*(zdim+2*lvst_offst);
#endif  
  LocalVector<double> x;
  LocalVector<double>refsol;
  LocalVector<double>refones;
  LocalVector<double>chk_r;
  LocalVector<double> rhs;
  LocalMatrix<double> mat;
  LocalVector<double> Dinvhalf_min;
  LocalVector<double> Dinvhalf_plus;
#ifdef GUUS  
  LocalMatrix<double> Zin;
#endif  
  mat.ReadFileMTX(std::string(argv[1]));
  mat.info();
#ifdef GUUS  
  Zin.ReadFileMTX(std::string(argv[2]));
  Zin.info();
  
#endif  
  x.Allocate("x", mat.get_nrow());
  refsol.Allocate("refsol", mat.get_nrow());
  refones.Allocate("refones", mat.get_nrow());
  rhs.Allocate("rhs", mat.get_nrow());
  chk_r.Allocate("chk_r", mat.get_nrow());
#ifdef BUBFLO
   x.ReadFileASCII(std::string(argv[2]));
#endif   
  rhs.ReadFileASCII(std::string(argv[3]));
#ifdef GUUS
  x.SetRandom(0.0,1.0,1000);
  refsol.ReadFileASCII(std::string(argv[4]));
  refones.Ones();
#endif  

  
  //refsol.Ones();
// 
//   // Uncomment for GPU
#ifdef GPURUN  
  mat.MoveToAccelerator();
  x.MoveToAccelerator();
  rhs.MoveToAccelerator();
  chk_r.MoveToAccelerator();
  Dinvhalf_min.MoveToAccelerator();
  Dinvhalf_plus.MoveToAccelerator();
  
#endif  
  
  gettimeofday(&now, NULL);
  tick = now.tv_sec*1000000.0+(now.tv_usec);

#ifdef BUBFLO  
  if(setlssd){
    LocalVector<double> phi;
    LocalVector<int> bubmap;
    phi.Allocate("PHI", phisize);
    bubmap.Allocate("bubmap",mat.get_nrow());
    phi.ReadFileASCII(std::string(argv[4]));
    
    bubmap.LeaveDataPtr(&bubmap_ptr);
    phi.LeaveDataPtr(&phi_ptr);

    bubmap_create(phi_ptr, bubmap_ptr, xdim, xdim, xdim, mat.get_nrow(), &maxbmap, lvst_offst);
    phi.Clear();
    
  }
  ls.Setxdim(xdim);
  ls.SetNVectors_eachdirec(defvex_perdirec+1, defvex_perdirec+2, defvex_perdirec+3);
  ls.Set_alldims(xdim, xdim, xdim);
  ls.Setlvst_offst(lvst_offst);
  ls.SetNVectors(defvex_perdirec);
  ls.SetZlssd(setlssd);
#endif  
  gettimeofday(&now, NULL);
  tack = now.tv_sec*1000000.0+(now.tv_usec);
  lprep=(tack-tick)/1000000;
  std::cout << "levelset_prep" << lprep << " sec" << std::endl;
  // Linear Solver
//   return 0;
  gettimeofday(&now, NULL);
  tick = now.tv_sec*1000000.0+(now.tv_usec);
 
#ifdef SCALIN
  
  mat.ExtractInverseDiagonal_sqrt(&Dinvhalf_min, -1);
  mat.ExtractInverseDiagonal_sqrt(&Dinvhalf_plus, 1);
  
  mat.DiagonalMatrixMult(Dinvhalf_min);
  mat.DiagonalMatrixMult_fromL(Dinvhalf_min);
  
  //x.PointWiseMult(Dinvhalf_plus);
  rhs.PointWiseMult(Dinvhalf_min);
//   rhs.Scale(0.3);
#endif
#ifdef GUUS  
   ls.SetZ(Zin);
#endif   
  ls.SetOperator(mat);
  ls.Init(0.0, 1e-6, 1e8, 200000);
//  ls.RecordResidualHistory();
  

#ifdef BUBFLO
  ls.MakeZ_CSR(); // requires xdim_ and novecni_ and zlssd_ to be set
  if(setlssd)
    ls.MakeZLSSD(bubmap_ptr, maxbmap); // bubmap must be ready and maxbmap available
#endif    
//   
//  stop_paralution();
//  return 0;
  ls.Build();
#ifdef MATDIA  
   mat.ConvertToDIA();
#endif  

  gettimeofday(&now, NULL);
  tack = now.tv_sec*1000000.0+(now.tv_usec);
  b=(tack-tick)/1000000;
  std::cout << "Building:" << b+lprep << " sec" << std::endl;
  
//   ls.Verbose(2);

  mat.info();

  gettimeofday(&now, NULL);
  tick = now.tv_sec*1000000.0+(now.tv_usec);

  ls.Solve(rhs, &x);

  gettimeofday(&now, NULL);
  tack = now.tv_sec*1000000.0+(now.tv_usec);
  s=(tack-tick)/1000000;
  std::cout << "Solver execution:" << s << " sec" << std::endl;
  std::cout << "Total execution:" << s+b << " sec" << std::endl;
  
#ifdef SCALIN
  x.PointWiseMult(Dinvhalf_min);
#endif

//   

  
#ifdef GUUS  
//   x.WriteFileASCII("x_solution_shell_inv_neumann.rec");
  //ls.RecordHistory("res__ongpu_tns.rec");
  x.MoveToHost();
  x.WriteFileASCII("x_neumann.rec");
  x.MoveToAccelerator();
  sol_norm=x.Norm();
  mat.Apply(x, &chk_r); 
  chk_r.ScaleAdd(double(-1.0), rhs);
  cout<<"\n Real Residual Norm is "<<chk_r.Norm();
  cout<<"\n Norm of Solution is "<<sol_norm<<endl;
  cout<<"\n Norm of Reference Solution is "<<refsol.Norm()<<endl;
  cout<<"\n Norm of Ones is "<<refones.Norm()<<endl;
  x.MoveToHost();
  refones.AddScale(x,(double)-1.0f);
  x.AddScale(refsol,(double)-1.0f);
  diff_norm=x.Norm();
  ones_norm=refones.Norm();
  cout<<"\n Relative Norm of Calculated Solution w.r.t. Reference is "<<((double)diff_norm/(double)sol_norm)<<endl;
  cout<<"\n Relative Norm of Calculated Solution w.r.t. Ones is "<<((double)ones_norm/(double)sol_norm)<<endl;
#endif  
  ls.Clear();
  


  stop_paralution();

  return 0;
}