extern "C" __declspec(dllexport) void solveCSRSingle(int *row_offset, int *col, float *val, 
	const int nnz, const int N, float *_rhs, float *_x) {

	std::string name = "s";
	LocalVector<float> x;
	LocalVector<float> rhs;
	LocalMatrix<float> mat;

	x.Allocate(name, N);
	x.Zeros();
	rhs.Allocate(name, N);
	mat.AllocateCSR(name, nnz, N, N);
	mat.CopyFromCSR(row_offset, col, val);
	rhs.CopyFromData(_rhs);
//	mat.Check();

/*	rhs.SetDataPtr(&_rhs, name, N);
	x.SetDataPtr(&_x, name, N);
	mat.SetDataPtrCSR(&row_offset, &col, &val, name, nnz, N, N);
*/

	mat.MoveToAccelerator();
	x.MoveToAccelerator();
	rhs.MoveToAccelerator();
	CG<LocalMatrix<float>, LocalVector<float>, float> ls;
	MultiColoredILU<LocalMatrix<float>, LocalVector<float>, float> p;
	ls.SetOperator(mat);
	ls.SetPreconditioner(p);
	ls.Build();

	ls.Solve(rhs, &x);

	mat.MoveToHost();
	x.MoveToHost();
	rhs.MoveToHost();

	/*
	mat.LeaveDataPtrCSR(&row_offset, &col, &val);
	rhs.LeaveDataPtr(&_rhs);
	x.LeaveDataPtr(&_x);
*/

	x.CopyToData(_x);
	mat.Clear();
	x.Clear();
	rhs.Clear();
	
	ls.Clear();
}
extern "C" __declspec(dllexport) void solveCSRDouble(int *row_offset, int *col, double *val,
	const int nnz, const int N, double *_rhs, double *_x) {
	std::string name = "s";
	LocalVector<double> x;
	LocalVector<double> rhs;
	LocalMatrix<double> mat;

	x.Allocate(name, N);
	x.Zeros();
	rhs.Allocate(name, N);
	mat.AllocateCSR(name, nnz, N, N);
	mat.CopyFromCSR(row_offset, col, val);
	rhs.CopyFromData(_rhs);

	mat.MoveToAccelerator();
	x.MoveToAccelerator();
	rhs.MoveToAccelerator();
	CG<LocalMatrix<double>, LocalVector<double>, double> ls;
	MultiColoredILU<LocalMatrix<double>, LocalVector<double>, double> p;
	ls.SetOperator(mat);
	ls.SetPreconditioner(p);
	ls.Build();

	ls.Solve(rhs, &x);

	mat.MoveToHost();
	x.MoveToHost();
	rhs.MoveToHost();

	x.CopyToData(_x);
	mat.Clear();
	x.Clear();
	rhs.Clear();

	ls.Clear();
}
int main(int argc, char* argv[]) {

  if (argc == 1) { 
    std::cerr << argv[0] << " <matrix> <initial_guess> <rhs> [Num threads]" << std::endl;
    exit(1);
  }

  init_paralution();

//   if (argc > 4) {
//     set_omp_threads_paralution(atoi(argv[]));
//   } 
  set_omp_threads_paralution(8);
  info_paralution();

  struct timeval now;
  double tick, tack, b,s, sol_norm, diff_norm, ones_norm;
  double *phi_ptr=NULL;
  int *bubmap_ptr=NULL, phisize, maxbmap, setlssd, lvst_offst;
  int xdim, ydim, zdim, defvex_perdirec;
#ifdef BUBFLO
  xdim=atoi(argv[5]);
  setlssd=atoi(argv[6]);
  defvex_perdirec=atoi(argv[7]);
  lvst_offst=atoi(argv[8]);
  phisize=(xdim+2*lvst_offst)*(ydim+2*lvst_offst)*(zdim+2*lvst_offst);
#endif  
  LocalVector<double> x;
  
  LocalVector<double> rhs;
  LocalMatrix<double> mat;
  LocalVector<double> Dinvhalf_min;
  LocalVector<double> Dinvhalf_plus;
#ifdef GUUS  
  LocalMatrix<double> Zin;
  LocalVector<double> refsol;
  LocalVector<double> refones;
#endif  
  mat.ReadFileMTX(std::string(argv[1]));
  mat.info();
#ifdef GUUS  
  Zin.ReadFileMTX(std::string(argv[2]));
  Zin.info();
  refsol.Allocate("refsol", mat.get_nrow());
  refones.Allocate("refones", mat.get_nrow());
  //refsol.Ones();
  refsol.ReadFileASCII(std::string(argv[4]));
  refones.Ones();
#endif  
  x.Allocate("x", mat.get_nrow());
  rhs.Allocate("rhs", mat.get_nrow());
  
  
  // Linear Solver
  DPCG<LocalMatrix<double>, LocalVector<double>, double > ls;
  MultiElimination<LocalMatrix<double>, LocalVector<double>, double > p;
  Jacobi<LocalMatrix<double>, LocalVector<double>, double > j_p;
  MultiColoredILU<LocalMatrix<double>, LocalVector<double>, double > mcilu_p;
  ILU<LocalMatrix<double>, LocalVector<double>, double > ilu_p;
  MultiColoredSGS<LocalMatrix<double>, LocalVector<double>, double > mcsgs_p;
  FSAI<LocalMatrix <double>, LocalVector<double>, double > fsai_p ;
  SPAI<LocalMatrix <double>, LocalVector <double>, double > spai_p ;
  


#ifdef GPURUN  
  mat.MoveToAccelerator();
  x.MoveToAccelerator();
  rhs.MoveToAccelerator();
#endif  
  
#ifdef SCALIN
  mat.ExtractInverseDiagonal_sqrt(&Dinvhalf_min, -1);
  mat.ExtractInverseDiagonal_sqrt(&Dinvhalf_plus, 1);
  
  mat.DiagonalMatrixMult(Dinvhalf_min);
  mat.DiagonalMatrixMult_fromL(Dinvhalf_min);
  
  //x.PointWiseMult(Dinvhalf_plus);
  rhs.PointWiseMult(Dinvhalf_min);
#endif
  
    /////////////////////////////////////////////////////////////////  
   std::cout << "-----------------------------------------------" << std::endl;
   std::cout << "DPCG solver MCSGS" << std::endl;
 #ifdef GUUS
   rhs.ReadFileASCII(std::string(argv[3]));
   x.SetRandom(0.0,1.0,1000);
   ls.SetZ(Zin);
 #endif
   
 #ifdef BUBFLO  
   x.ReadFileASCII(std::string(argv[2]));
   rhs.ReadFileASCII(std::string(argv[3]));
 #endif
 
   gettimeofday(&now, NULL);
   tick = now.tv_sec*1000000.0+(now.tv_usec);
   
 #ifdef BUBFLO  
   if(setlssd){
     LocalVector<double> phi;
     LocalVector<int> bubmap;
     phi.Allocate("PHI", phisize);
     bubmap.Allocate("bubmap",mat.get_nrow());
     phi.ReadFileASCII(std::string(argv[4]));
     
     bubmap.LeaveDataPtr(&bubmap_ptr);
     phi.LeaveDataPtr(&phi_ptr);
 
     x.SetRandom(0.0,1.0,1000);
     bubmap_create(phi_ptr, bubmap_ptr, xdim, xdim, xdim, mat.get_nrow(), &maxbmap, lvst_offst);
     phi.Clear();
     
   }
   ls.Setxdim(xdim);
   ls.SetNVectors(defvex_perdirec);
   ls.Setlvst_offst(lvst_offst);
   ls.SetZlssd(setlssd);
   mat.ConvertToCSR();  
 #endif
   
   ls.SetOperator(mat);
   ls.SetPreconditioner (mcsgs_p) ;
   mcsgs_p.SetPrecondMatrixFormat(HYB);
   
  
   ls.Init(0.0, 1e-6, 1e8, 200000);
 #ifdef BUBFLO  	
  ls.MakeZ_CSR(); // requires xdim_ and novecni_ and zlssd_ to be set
   if(setlssd)
     ls.MakeZLSSD(bubmap_ptr, maxbmap); // bubmap must be ready and maxbmap available	
 #endif
     
   
 
   ls.Build();
 #ifdef MATDIA  
   mat.ConvertToDIA();
 #endif  
   gettimeofday(&now, NULL);
   tack = now.tv_sec*1000000.0+(now.tv_usec);
   b=(tack-tick)/1000000;
  std::cout << "Building:" << b << " sec" << std::endl;
   
 
   mat.info();
 
   gettimeofday(&now, NULL);
   tick = now.tv_sec*1000000.0+(now.tv_usec);
 
   ls.Solve(rhs, &x);
 
   gettimeofday(&now, NULL);
   tack = now.tv_sec*1000000.0+(now.tv_usec);
   s= (tack-tick)/1000000;
   std::cout << "Solver execution:" << s << " sec" << std::endl;
   std::cout << "Total execution:" << s+b << " sec" << std::endl;
 #ifdef GUUS 
   x.MoveToHost();
   sol_norm=x.Norm();
   cout<<"\n Norm of Solution is "<<sol_norm<<endl;
   cout<<"\n Norm of Reference Solution is "<<refsol.Norm()<<endl;
   refones.AddScale(x,(double)-1.0f);
   x.AddScale(refsol,(double)-1.0f);
   
   
   diff_norm=x.Norm();
   ones_norm=refones.Norm();
   cout<<"\n Relative Norm of Calculated Solution w.r.t. Reference is "<<((double)diff_norm/(double)sol_norm)<<endl;
   cout<<"\n Relative Norm of Calculated Solution w.r.t. Ones is "<<((double)ones_norm/(double)sol_norm)<<endl;
 #endif  
   //x.WriteFileASCII("x_solution1e3shell_ilu01.rec");
  ls.Clear();
  /////////////////////////////////////////////////////////////////  
 
  /////////////////////////////////////////////////////////////////  
  std::cout << "-----------------------------------------------" << std::endl;
  std::cout << "DPCG solver FSAI" << std::endl;
  refones.Ones();
#ifdef GUUS  
  rhs.ReadFileASCII(std::string(argv[3]));
  x.SetRandom(0.0,1.0,1000);
  ls.SetZ(Zin);
#endif

  
#ifdef BUBFLO  
  x.ReadFileASCII(std::string(argv[2]));
  rhs.ReadFileASCII(std::string(argv[3]));
#endif

  gettimeofday(&now, NULL);
  tick = now.tv_sec*1000000.0+(now.tv_usec);
  
#ifdef BUBFLO  
  if(setlssd){
    LocalVector<double> phi;
    LocalVector<int> bubmap;
    phi.Allocate("PHI", phisize);
    bubmap.Allocate("bubmap",mat.get_nrow());
    phi.ReadFileASCII(std::string(argv[4]));
    
    bubmap.LeaveDataPtr(&bubmap_ptr);
    phi.LeaveDataPtr(&phi_ptr);

    //x.SetRandom(0.0,1.0,1000);
    bubmap_create(phi_ptr, bubmap_ptr, xdim, xdim, xdim, mat.get_nrow(), &maxbmap, lvst_offst);
    phi.Clear();
    
  }
  ls.Setxdim(xdim);
  ls.SetNVectors(defvex_perdirec);
  ls.SetZlssd(setlssd);
  mat.ConvertToCSR();  
#endif    
  fsai_p.Set (2) ;
  
  ls.SetOperator(mat);
  ls.SetPreconditioner (fsai_p) ;
  fsai_p.SetPrecondMatrixFormat(HYB);
  
 
  ls.Init(0.0, 1e-6, 1e8, 200000);
#ifdef GPURUN  
  mat.MoveToAccelerator();
  x.MoveToAccelerator();
  rhs.MoveToAccelerator();
#endif  
  
  
#ifdef BUBFLO  
  ls.MakeZ_CSR(); // requires xdim_ and novecni_ and zlssd_ to be set
  if(setlssd)
    ls.MakeZLSSD(bubmap_ptr, maxbmap); // bubmap must be ready and maxbmap available
#endif  
  

  ls.Build();
#ifdef MATDIA  
  mat.ConvertToDIA();
#endif
  
  gettimeofday(&now, NULL);
  tack = now.tv_sec*1000000.0+(now.tv_usec);
  b=(tack-tick)/1000000;
  std::cout << "Building:" << b << " sec" << std::endl;
  

//   mat.info();

  gettimeofday(&now, NULL);
  tick = now.tv_sec*1000000.0+(now.tv_usec);

  ls.Solve(rhs, &x);

  gettimeofday(&now, NULL);
  tack = now.tv_sec*1000000.0+(now.tv_usec);
  s= (tack-tick)/1000000;
  std::cout << "Solver execution:" << s << " sec" << std::endl;
  std::cout << "Total execution:" << s+b << " sec" << std::endl;
#ifdef GUUS
    x.MoveToHost();
  sol_norm=x.Norm();
  cout<<"\n Norm of Solution is "<<sol_norm<<endl;
  cout<<"\n Norm of Reference Solution is "<<refsol.Norm()<<endl;
  refones.AddScale(x,(double)-1.0f);
  x.AddScale(refsol,(double)-1.0f);
  
  
  diff_norm=x.Norm();
  ones_norm=refones.Norm();
  cout<<"\n Relative Norm of Calculated Solution w.r.t. Reference is "<<((double)diff_norm/(double)sol_norm)<<endl;
  cout<<"\n Relative Norm of Calculated Solution w.r.t. Ones is "<<((double)ones_norm/(double)sol_norm)<<endl;
#endif  
  //x.WriteFileASCII("x_solution1e3shell_ilu01.rec");
  ls.Clear();
// //   
  
///////////////////////////////////////////////////////////////  
  std::cout << "-----------------------------------------------" << std::endl;
  std::cout << "DPCG solver ILU-p" << std::endl;
  refones.Ones();
#ifdef GUUS  
  rhs.ReadFileASCII(std::string(argv[3]));
  x.SetRandom(0.0,1.0,1000);
  ls.SetZ(Zin);
#endif
  
#ifdef BUBFLO  
   x.ReadFileASCII(std::string(argv[2]));
  rhs.ReadFileASCII(std::string(argv[3]));
#endif  
  
  gettimeofday(&now, NULL);
  tick = now.tv_sec*1000000.0+(now.tv_usec);
  
#ifdef BUBFLO
  if(setlssd){
    LocalVector<double> phi;
    LocalVector<int> bubmap;
    phi.Allocate("PHI", phisize);
    bubmap.Allocate("bubmap",mat.get_nrow());
    phi.ReadFileASCII(std::string(argv[4]));
    
    bubmap.LeaveDataPtr(&bubmap_ptr);
    phi.LeaveDataPtr(&phi_ptr);

    //x.SetRandom(0.0,1.0,1000);
    bubmap_create(phi_ptr, bubmap_ptr, xdim, xdim, xdim, mat.get_nrow(), &maxbmap, lvst_offst);
    phi.Clear();
    
  }
  ls.Setxdim(xdim);
  ls.SetNVectors(defvex_perdirec);
  ls.SetZlssd(setlssd);
  mat.ConvertToCSR();  
#endif
  
  ilu_p.Set(0);
  ls.SetOperator(mat);
  ls.SetPreconditioner(ilu_p);
  ls.Init(0.0, 1e-6, 1e8, 20000);
  ls.RecordResidualHistory();
//  mat.ConvertToCSR();  
#ifdef GPURUN  
  mat.MoveToAccelerator();
  x.MoveToAccelerator();
  rhs.MoveToAccelerator();
#endif  

#ifdef BUBFLO  
  ls.MakeZ_CSR(); // requires xdim_ and novecni_ and zlssd_ to be set
  if(setlssd)
    ls.MakeZLSSD(bubmap_ptr, maxbmap); // bubmap must be ready and maxbmap available
#endif  
  ls.Build();
#ifdef MATDIA  
  mat.ConvertToDIA();
#endif  
  gettimeofday(&now, NULL);
  tack = now.tv_sec*1000000.0+(now.tv_usec);
  b=(tack-tick)/1000000;
  std::cout << "Building:" << b << " sec" << std::endl;
  
  //ls.Verbose(2);
  
  gettimeofday(&now, NULL);
  tick = now.tv_sec*1000000.0+(now.tv_usec);
  
  ls.Solve(rhs, &x);

  gettimeofday(&now, NULL);
  tack = now.tv_sec*1000000.0+(now.tv_usec);
  s= (tack-tick)/1000000;
  std::cout << "Solver execution:" << s << " sec" << std::endl;
  std::cout << "Total execution:" << s+b << " sec" << std::endl;

#ifdef SCALIN
  x.PointWiseMult(Dinvhalf_min);
#endif
  x.MoveToHost();
//   x.WriteFileASCII("x_solution_shell_scal.rec");
#ifdef GUUS
 // ls.RecordHistory("res_ongpu_ilu-p.rec");
  sol_norm=x.Norm();
  cout<<"\n Norm of Solution is "<<sol_norm<<endl;
  cout<<"\n Norm of Reference Solution is "<<refsol.Norm()<<endl;
  refones.AddScale(x,(double)-1.0f);
  x.AddScale(refsol,(double)-1.0f);
  
  x.MoveToHost();
  diff_norm=x.Norm();
  ones_norm=refones.Norm();
  cout<<"\n Relative Norm of Calculated Solution w.r.t. Reference is "<<((double)diff_norm/(double)sol_norm)<<endl;
  cout<<"\n Relative Norm of Calculated Solution w.r.t. Ones is "<<((double)ones_norm/(double)sol_norm)<<endl;
#endif  
  
  ls.Clear();
  
/////////////////////////////////////////////////////////////////
  std::cout << "-----------------------------------------------" << std::endl;
  std::cout << "DPCG solver ME-ILU-J" << std::endl;
  refones.Ones();
#ifdef GUUS  
  rhs.ReadFileASCII(std::string(argv[3]));
  x.SetRandom(0.0,1.0,1000);
  ls.SetZ(Zin);
#endif  

#ifdef BUBFLO
   x.ReadFileASCII(std::string(argv[2]));
   rhs.ReadFileASCII(std::string(argv[3]));
#endif
  gettimeofday(&now, NULL);
  tick = now.tv_sec*1000000.0+(now.tv_usec);
 
#ifdef BUBFLO   
  if(setlssd){
    LocalVector<double> phi;
    LocalVector<int> bubmap;
    phi.Allocate("PHI", phisize);
    bubmap.Allocate("bubmap",mat.get_nrow());
    phi.ReadFileASCII(std::string(argv[4]));
    
    bubmap.LeaveDataPtr(&bubmap_ptr);
    phi.LeaveDataPtr(&phi_ptr);

    //x.SetRandom(0.0,1.0,1000);
    bubmap_create(phi_ptr, bubmap_ptr, xdim, xdim, xdim, mat.get_nrow(), &maxbmap, lvst_offst);
    phi.Clear();
    
  }
  ls.Setxdim(xdim);
  ls.SetNVectors(defvex_perdirec);
  ls.SetZlssd(setlssd);
  mat.ConvertToCSR();
#endif
  p.Set(j_p, 1);
  
  ls.SetOperator(mat);
  ls.SetPreconditioner(p);
  
  
  
  
  ls.Init(0.0, 1e-6, 1e8, 200000);
#ifdef GPURUN  
  mat.MoveToAccelerator();
  x.MoveToAccelerator();
  rhs.MoveToAccelerator();
#endif  
  
#ifdef BUBFLO  
  ls.MakeZ_CSR(); // requires xdim_ and novecni_ and zlssd_ to be set
  if(setlssd)
    ls.MakeZLSSD(bubmap_ptr, maxbmap); // bubmap must be ready and maxbmap available
#endif  
  
  ls.Build();
#ifdef MATDIA  
  mat.ConvertToDIA();
#endif  
  gettimeofday(&now, NULL);
  tack = now.tv_sec*1000000.0+(now.tv_usec);
  b=(tack-tick)/1000000;
  std::cout << "Building:" << b << " sec" << std::endl;
  

  mat.info();

  gettimeofday(&now, NULL);
  tick = now.tv_sec*1000000.0+(now.tv_usec);

  ls.Solve(rhs, &x);

  gettimeofday(&now, NULL);
  tack = now.tv_sec*1000000.0+(now.tv_usec);
  s= (tack-tick)/1000000;
  std::cout << "Solver execution:" << s << " sec" << std::endl;
  std::cout << "Total execution:" << s+b << " sec" << std::endl;
#ifdef GUUS  
  x.MoveToHost();
  sol_norm=x.Norm();
  cout<<"\n Norm of Solution is "<<sol_norm<<endl;
  cout<<"\n Norm of Reference Solution is "<<refsol.Norm()<<endl;
  refones.AddScale(x,(double)-1.0f);
  x.AddScale(refsol,(double)-1.0f);
  
  
  diff_norm=x.Norm();
  ones_norm=refones.Norm();
  cout<<"\n Relative Norm of Calculated Solution w.r.t. Reference is "<<((double)diff_norm/(double)sol_norm)<<endl;
  cout<<"\n Relative Norm of Calculated Solution w.r.t. Ones is "<<((double)ones_norm/(double)sol_norm)<<endl;
#endif  
  //x.WriteFileASCII("x_solution1e3shell_ilu01.rec");
  ls.Clear();

///////////////////////////////////////////////////////////////  
  std::cout << "-----------------------------------------------" << std::endl;
  std::cout << "DPCG solver ME-ILU-SGS" << std::endl;
  refones.Ones();
#ifdef GUUS  
  rhs.ReadFileASCII(std::string(argv[3]));
  x.SetRandom(0.0,1.0,1000);
  ls.SetZ(Zin);
#endif
  
#ifdef BUBFLO  
  x.ReadFileASCII(std::string(argv[2]));
  rhs.ReadFileASCII(std::string(argv[3]));
#endif
  
  gettimeofday(&now, NULL);
  tick = now.tv_sec*1000000.0+(now.tv_usec);

#ifdef BUBFLO  
  if(setlssd){
    LocalVector<double> phi;
    LocalVector<int> bubmap;
    phi.Allocate("PHI", phisize);
    bubmap.Allocate("bubmap",mat.get_nrow());
    phi.ReadFileASCII(std::string(argv[4]));
    
    bubmap.LeaveDataPtr(&bubmap_ptr);
    phi.LeaveDataPtr(&phi_ptr);

    //x.SetRandom(0.0,1.0,1000);
    bubmap_create(phi_ptr, bubmap_ptr, xdim, xdim, xdim, mat.get_nrow(), &maxbmap, lvst_offst);
    phi.Clear();
    
  }
  ls.Setxdim(xdim);
  ls.SetNVectors(defvex_perdirec);
  ls.SetZlssd(setlssd);
  mat.ConvertToCSR();  
#endif  
  //p.Init(mcsgs_p, 1);
  ls.SetOperator(mat);
  ls.SetPreconditioner(mcsgs_p);

  ls.Init(0.0, 1e-6, 1e8, 200000);
#ifdef GPURUN  
  mat.MoveToAccelerator();
  x.MoveToAccelerator();
  rhs.MoveToAccelerator();
#endif  
#ifdef BUBFLO  
  ls.MakeZ_CSR(); // requires xdim_ and novecni_ and zlssd_ to be set
  if(setlssd)
    ls.MakeZLSSD(bubmap_ptr, maxbmap); // bubmap must be ready and maxbmap available
#endif
    
  
  ls.Build();
#ifdef MATDIA  
  mat.ConvertToDIA();
#endif  
  gettimeofday(&now, NULL);
  tack = now.tv_sec*1000000.0+(now.tv_usec);
  b=(tack-tick)/1000000;
  std::cout << "Building:" << b << " sec" << std::endl;
  

//   mat.info();

  gettimeofday(&now, NULL);
  tick = now.tv_sec*1000000.0+(now.tv_usec);

  ls.Solve(rhs, &x);

  gettimeofday(&now, NULL);
  tack = now.tv_sec*1000000.0+(now.tv_usec);
  s= (tack-tick)/1000000;
  std::cout << "Solver execution:" << s << " sec" << std::endl;
  std::cout << "Total execution:" << s+b << " sec" << std::endl;
#ifdef GUUS  
x.MoveToHost();
  sol_norm=x.Norm();
  cout<<"\n Norm of Solution is "<<sol_norm<<endl;
  cout<<"\n Norm of Reference Solution is "<<refsol.Norm()<<endl;
  refones.AddScale(x,(double)-1.0f);
  x.AddScale(refsol,(double)-1.0f);
  
  
  diff_norm=x.Norm();
  ones_norm=refones.Norm();
  cout<<"\n Relative Norm of Calculated Solution w.r.t. Reference is "<<((double)diff_norm/(double)sol_norm)<<endl;
  cout<<"\n Relative Norm of Calculated Solution w.r.t. Ones is "<<((double)ones_norm/(double)sol_norm)<<endl;
#endif  
  //x.WriteFileASCII("x_solution1e3shell_ilu01.rec");
  ls.Clear();
// 
// /////////////////////////////////////////////////////////////////  
//   
// /////////////////////////////////////////////////////////////////  
  std::cout << "-----------------------------------------------" << std::endl;
  std::cout << "DPCG solver ME-ILU-ILU(0,1)" << std::endl;
  refones.Ones();
#ifdef GUUS  
  rhs.ReadFileASCII(std::string(argv[3]));
  x.SetRandom(0.0,1.0,1000);
  ls.SetZ(Zin);
#endif
#ifdef BUBFLO  
  x.ReadFileASCII(std::string(argv[2]));
  rhs.ReadFileASCII(std::string(argv[3]));
#endif
  
  gettimeofday(&now, NULL);
  tick = now.tv_sec*1000000.0+(now.tv_usec);

#ifdef BUBFLO
  if(setlssd){
    LocalVector<double> phi;
    LocalVector<int> bubmap;
    phi.Allocate("PHI", phisize);
    bubmap.Allocate("bubmap",mat.get_nrow());
    phi.ReadFileASCII(std::string(argv[4]));
    
    bubmap.LeaveDataPtr(&bubmap_ptr);
    phi.LeaveDataPtr(&phi_ptr);

    //x.SetRandom(0.0,1.0,1000);
    bubmap_create(phi_ptr, bubmap_ptr, xdim, xdim, xdim, mat.get_nrow(), &maxbmap, lvst_offst);
    phi.Clear();
    
  }
  ls.Setxdim(xdim);
  ls.SetNVectors(defvex_perdirec);
  ls.SetZlssd(setlssd);
  mat.ConvertToCSR();    
#endif  
//   mcilu_p.Init(0);
//   
//   p.Init(mcilu_p, 1, 0.0);
  mcilu_p.Set(0);
  p.Set(mcilu_p, 1, 0.0);

  ls.SetOperator(mat);
  ls.SetPreconditioner(p);
//   p.SetPrecondMatrixFormat(HYB);
  ls.Init(0.0, 1e-6, 1e8, 200000);
#ifdef GPURUN  
  mat.MoveToAccelerator();
  x.MoveToAccelerator();
  rhs.MoveToAccelerator();
#endif  
  #ifdef BUBFLO  
//   ls.SetNVectors(4);
  ls.MakeZ_CSR(); // requires xdim_ and novecni_ and zlssd_ to be set
  if(setlssd)
    ls.MakeZLSSD(bubmap_ptr, maxbmap); // bubmap must be ready and maxbmap available
#endif    
//   
  
  ls.Build();
#ifdef MATDIA  
  mat.ConvertToDIA();
#endif  
  gettimeofday(&now, NULL);
  tack = now.tv_sec*1000000.0+(now.tv_usec);
  b=(tack-tick)/1000000;
  std::cout << "Building:" << b << " sec" << std::endl;
// 
//   ls.Verbose(2);
  mat.info();

  gettimeofday(&now, NULL);
  tick = now.tv_sec*1000000.0+(now.tv_usec);

  ls.Solve(rhs, &x);
  
  gettimeofday(&now, NULL);
  tack = now.tv_sec*1000000.0+(now.tv_usec);
  s= (tack-tick)/1000000;
  std::cout << "Solver execution:" << s << " sec" << std::endl;
  std::cout << "Total execution:" << s+b << " sec" << std::endl;
#ifdef GUUS  
x.MoveToHost();
  sol_norm=x.Norm();
  cout<<"\n Norm of Solution is "<<sol_norm<<endl;
  cout<<"\n Norm of Reference Solution is "<<refsol.Norm()<<endl;
  refones.AddScale(x,(double)-1.0f);
  x.AddScale(refsol,(double)-1.0f);
  
  
  diff_norm=x.Norm();
  ones_norm=refones.Norm();
  cout<<"\n Relative Norm of Calculated Solution w.r.t. Reference is "<<((double)diff_norm/(double)sol_norm)<<endl;
  cout<<"\n Relative Norm of Calculated Solution w.r.t. Ones is "<<((double)ones_norm/(double)sol_norm)<<endl;
  //x.WriteFileASCII("x_solution1e3shell_ilu01.rec");
#endif  
  ls.Clear();

// /////////////////////////////////////////////////////////////////    
  std::cout << "-----------------------------------------------" << std::endl;
  std::cout << "DPCG solver ILU(0,1)" << std::endl;
  refones.Ones();
#ifdef GUUS  
  rhs.ReadFileASCII(std::string(argv[3]));
  x.SetRandom(0.0,1.0,1000);
  ls.SetZ(Zin);
#endif  
  gettimeofday(&now, NULL);
  tick = now.tv_sec*1000000.0+(now.tv_usec);

#ifdef BUBFLO  
  x.ReadFileASCII(std::string(argv[2]));
  rhs.ReadFileASCII(std::string(argv[3]));
  if(setlssd){
    LocalVector<double> phi;
    LocalVector<int> bubmap;
    phi.Allocate("PHI", phisize);
    bubmap.Allocate("bubmap",mat.get_nrow());
    phi.ReadFileASCII(std::string(argv[4]));
    
    bubmap.LeaveDataPtr(&bubmap_ptr);
    phi.LeaveDataPtr(&phi_ptr);

    //x.SetRandom(0.0,1.0,1000);
    bubmap_create(phi_ptr, bubmap_ptr, xdim, xdim, xdim, mat.get_nrow(), &maxbmap, lvst_offst);
    phi.Clear();
    
  }
  ls.Setxdim(xdim);
  ls.SetNVectors(defvex_perdirec);
  ls.SetZlssd(setlssd);
  mat.ConvertToCSR();
#endif  
  
  mcilu_p.Set(0,1);
  ls.SetOperator(mat);
  ls.SetPreconditioner(mcilu_p);
  
  ls.Init(0.0, 1e-6, 1e8, 200000);
#ifdef GPURUN  
  mat.MoveToAccelerator();
  x.MoveToAccelerator();
  rhs.MoveToAccelerator();
#endif  
#ifdef BUBFLO  
  ls.MakeZ_CSR(); // requires xdim_ and novecni_ and zlssd_ to be set
  if(setlssd)
    ls.MakeZLSSD(bubmap_ptr, maxbmap); // bubmap must be ready and maxbmap available	
#endif
    
  
  ls.Build();
#ifdef MATDIA  
  mat.ConvertToDIA();
#endif  
  gettimeofday(&now, NULL);
  tack = now.tv_sec*1000000.0+(now.tv_usec);
  b=(tack-tick)/1000000;
  std::cout << "Building:" << b << " sec" << std::endl;
  

//   mat.info();

  gettimeofday(&now, NULL);
  tick = now.tv_sec*1000000.0+(now.tv_usec);

  ls.Solve(rhs, &x);

  gettimeofday(&now, NULL);
  tack = now.tv_sec*1000000.0+(now.tv_usec);
  s= (tack-tick)/1000000;
  std::cout << "Solver execution:" << s << " sec" << std::endl;
  std::cout << "Total execution:" << s+b << " sec" << std::endl;
#ifdef GUUS  
x.MoveToHost();
  sol_norm=x.Norm();
  cout<<"\n Norm of Solution is "<<sol_norm<<endl;
  cout<<"\n Norm of Reference Solution is "<<refsol.Norm()<<endl;
  refones.AddScale(x,(double)-1.0f);
  x.AddScale(refsol,(double)-1.0f);
  
  
  diff_norm=x.Norm();
  ones_norm=refones.Norm();
  cout<<"\n Relative Norm of Calculated Solution w.r.t. Reference is "<<((double)diff_norm/(double)sol_norm)<<endl;
  cout<<"\n Relative Norm of Calculated Solution w.r.t. Ones is "<<((double)ones_norm/(double)sol_norm)<<endl;
//   x.WriteFileASCII("x_solution1e3shell_ilu01.rec");
#endif  
  ls.Clear();
/////////////////////////////////////////////////////////////////    
  
// /////////////////////////////////////////////////////////////////  
  std::cout << "-----------------------------------------------" << std::endl;
  std::cout << "DPCG solver jacobi" << std::endl;
  refones.Ones();
#ifdef GUUS  
  rhs.ReadFileASCII(std::string(argv[3]));
  x.SetRandom(0.0,1.0,1000);
  ls.SetZ(Zin);
#endif
#ifdef BUBFLO  
  x.ReadFileASCII(std::string(argv[2]));
  rhs.ReadFileASCII(std::string(argv[3]));
#endif
  
  gettimeofday(&now, NULL);
  tick = now.tv_sec*1000000.0+(now.tv_usec);

#ifdef BUBFLO
  if(setlssd){
    LocalVector<double> phi;
    LocalVector<int> bubmap;
    phi.Allocate("PHI", phisize);
    bubmap.Allocate("bubmap",mat.get_nrow());
    phi.ReadFileASCII(std::string(argv[4]));
    
    bubmap.LeaveDataPtr(&bubmap_ptr);
    phi.LeaveDataPtr(&phi_ptr);

    //x.SetRandom(0.0,1.0,1000);
    bubmap_create(phi_ptr, bubmap_ptr, xdim, xdim, xdim, mat.get_nrow(), &maxbmap, lvst_offst);
    phi.Clear();
    
  }
  ls.Setxdim(xdim);
  ls.SetNVectors(defvex_perdirec);
  ls.SetZlssd(setlssd);
  mat.ConvertToCSR();    
#endif  


  ls.SetOperator(mat);
  ls.SetPreconditioner(j_p);
//   p.SetPrecondMatrixFormat(HYB);
  ls.Init(0.0, 1e-6, 1e8, 200000);
#ifdef GPURUN  
  mat.MoveToAccelerator();
  x.MoveToAccelerator();
  rhs.MoveToAccelerator();
#endif    
#ifdef BUBFLO  
//   ls.SetNVectors(4);
  ls.MakeZ_CSR(); // requires xdim_ and novecni_ and zlssd_ to be set
  if(setlssd)
    ls.MakeZLSSD(bubmap_ptr, maxbmap); // bubmap must be ready and maxbmap available
#endif    
//   
  
  ls.Build();
#ifdef MATDIA  
  mat.ConvertToDIA();
#endif  
  gettimeofday(&now, NULL);
  tack = now.tv_sec*1000000.0+(now.tv_usec);
  b=(tack-tick)/1000000;
  std::cout << "Building:" << b << " sec" << std::endl;
// 
//   ls.Verbose(2);
  mat.info();

  gettimeofday(&now, NULL);
  tick = now.tv_sec*1000000.0+(now.tv_usec);

  ls.Solve(rhs, &x);
  
  gettimeofday(&now, NULL);
  tack = now.tv_sec*1000000.0+(now.tv_usec);
  s= (tack-tick)/1000000;
  std::cout << "Solver execution:" << s << " sec" << std::endl;
  std::cout << "Total execution:" << s+b << " sec" << std::endl;
#ifdef GUUS  
x.MoveToHost();
  sol_norm=x.Norm();
  cout<<"\n Norm of Solution is "<<sol_norm<<endl;
  cout<<"\n Norm of Reference Solution is "<<refsol.Norm()<<endl;
  refones.AddScale(x,(double)-1.0f);
  x.AddScale(refsol,(double)-1.0f);
  
  
  diff_norm=x.Norm();
  ones_norm=refones.Norm();
  cout<<"\n Relative Norm of Calculated Solution w.r.t. Reference is "<<((double)diff_norm/(double)sol_norm)<<endl;
  cout<<"\n Relative Norm of Calculated Solution w.r.t. Ones is "<<((double)ones_norm/(double)sol_norm)<<endl;
  //x.WriteFileASCII("x_solution1e3shell_ilu01.rec");
#endif  
  ls.Clear();
//   
  cout<<"########################################################################"<<endl;
  cout<<"Everything complete stopping paralution now."<<endl;
  stop_paralution();

  return 0;
}
Пример #4
0
int main(int argc, char* argv[]) {

  if (argc == 1) { 
    std::cerr << argv[0] << " <matrix> <initial_guess> <rhs> [Num threads]" << std::endl;
    exit(1);
  }

  init_paralution();

//   if (argc > 4) {
//     set_omp_threads_paralution(atoi(argv[5]));
//   } 
  set_omp_threads_paralution(8);
  
  info_paralution();

  struct timeval now;
  double tick, tack, b=0.0f,s=0.0f, lprep=0.0f, sol_norm, diff_norm, ones_norm;
  double *phi_ptr=NULL;
  int *bubmap_ptr=NULL, phisize, maxbmap, setlssd, lvst_offst;
  int xdim, ydim, zdim, defvex_perdirec, defvex_perdirec_y, defvex_perdirec_z;
  DPCG<LocalMatrix<double>, LocalVector<double>, double > ls;
#ifdef BUBFLO  
  xdim=atoi(argv[5]);
  setlssd=atoi(argv[6]);
  defvex_perdirec=atoi(argv[7]);
  lvst_offst=atoi(argv[8]);
  phisize=(xdim+2*lvst_offst)*(ydim+2*lvst_offst)*(zdim+2*lvst_offst);
#endif  
  LocalVector<double> x;
  LocalVector<double>refsol;
  LocalVector<double>refones;
  LocalVector<double>chk_r;
  LocalVector<double> rhs;
  LocalMatrix<double> mat;
  LocalVector<double> Dinvhalf_min;
  LocalVector<double> Dinvhalf_plus;
#ifdef GUUS  
  LocalMatrix<double> Zin;
#endif  
  mat.ReadFileMTX(std::string(argv[1]));
  mat.info();
#ifdef GUUS  
  Zin.ReadFileMTX(std::string(argv[2]));
  Zin.info();
  
#endif  
  x.Allocate("x", mat.get_nrow());
  refsol.Allocate("refsol", mat.get_nrow());
  refones.Allocate("refones", mat.get_nrow());
  rhs.Allocate("rhs", mat.get_nrow());
  chk_r.Allocate("chk_r", mat.get_nrow());
#ifdef BUBFLO
   x.ReadFileASCII(std::string(argv[2]));
#endif   
  rhs.ReadFileASCII(std::string(argv[3]));
#ifdef GUUS
  x.SetRandom(0.0,1.0,1000);
  refsol.ReadFileASCII(std::string(argv[4]));
  refones.Ones();
#endif  

  
  //refsol.Ones();
// 
//   // Uncomment for GPU
#ifdef GPURUN  
  mat.MoveToAccelerator();
  x.MoveToAccelerator();
  rhs.MoveToAccelerator();
  chk_r.MoveToAccelerator();
  Dinvhalf_min.MoveToAccelerator();
  Dinvhalf_plus.MoveToAccelerator();
  
#endif  
  
  gettimeofday(&now, NULL);
  tick = now.tv_sec*1000000.0+(now.tv_usec);

#ifdef BUBFLO  
  if(setlssd){
    LocalVector<double> phi;
    LocalVector<int> bubmap;
    phi.Allocate("PHI", phisize);
    bubmap.Allocate("bubmap",mat.get_nrow());
    phi.ReadFileASCII(std::string(argv[4]));
    
    bubmap.LeaveDataPtr(&bubmap_ptr);
    phi.LeaveDataPtr(&phi_ptr);

    bubmap_create(phi_ptr, bubmap_ptr, xdim, xdim, xdim, mat.get_nrow(), &maxbmap, lvst_offst);
    phi.Clear();
    
  }
  ls.Setxdim(xdim);
  ls.SetNVectors_eachdirec(defvex_perdirec+1, defvex_perdirec+2, defvex_perdirec+3);
  ls.Set_alldims(xdim, xdim, xdim);
  ls.Setlvst_offst(lvst_offst);
  ls.SetNVectors(defvex_perdirec);
  ls.SetZlssd(setlssd);
#endif  
  gettimeofday(&now, NULL);
  tack = now.tv_sec*1000000.0+(now.tv_usec);
  lprep=(tack-tick)/1000000;
  std::cout << "levelset_prep" << lprep << " sec" << std::endl;
  // Linear Solver
//   return 0;
  gettimeofday(&now, NULL);
  tick = now.tv_sec*1000000.0+(now.tv_usec);
 
#ifdef SCALIN
  
  mat.ExtractInverseDiagonal_sqrt(&Dinvhalf_min, -1);
  mat.ExtractInverseDiagonal_sqrt(&Dinvhalf_plus, 1);
  
  mat.DiagonalMatrixMult(Dinvhalf_min);
  mat.DiagonalMatrixMult_fromL(Dinvhalf_min);
  
  //x.PointWiseMult(Dinvhalf_plus);
  rhs.PointWiseMult(Dinvhalf_min);
//   rhs.Scale(0.3);
#endif
#ifdef GUUS  
   ls.SetZ(Zin);
#endif   
  ls.SetOperator(mat);
  ls.Init(0.0, 1e-6, 1e8, 200000);
//  ls.RecordResidualHistory();
  

#ifdef BUBFLO
  ls.MakeZ_CSR(); // requires xdim_ and novecni_ and zlssd_ to be set
  if(setlssd)
    ls.MakeZLSSD(bubmap_ptr, maxbmap); // bubmap must be ready and maxbmap available
#endif    
//   
//  stop_paralution();
//  return 0;
  ls.Build();
#ifdef MATDIA  
   mat.ConvertToDIA();
#endif  

  gettimeofday(&now, NULL);
  tack = now.tv_sec*1000000.0+(now.tv_usec);
  b=(tack-tick)/1000000;
  std::cout << "Building:" << b+lprep << " sec" << std::endl;
  
//   ls.Verbose(2);

  mat.info();

  gettimeofday(&now, NULL);
  tick = now.tv_sec*1000000.0+(now.tv_usec);

  ls.Solve(rhs, &x);

  gettimeofday(&now, NULL);
  tack = now.tv_sec*1000000.0+(now.tv_usec);
  s=(tack-tick)/1000000;
  std::cout << "Solver execution:" << s << " sec" << std::endl;
  std::cout << "Total execution:" << s+b << " sec" << std::endl;
  
#ifdef SCALIN
  x.PointWiseMult(Dinvhalf_min);
#endif

//   

  
#ifdef GUUS  
//   x.WriteFileASCII("x_solution_shell_inv_neumann.rec");
  //ls.RecordHistory("res__ongpu_tns.rec");
  x.MoveToHost();
  x.WriteFileASCII("x_neumann.rec");
  x.MoveToAccelerator();
  sol_norm=x.Norm();
  mat.Apply(x, &chk_r); 
  chk_r.ScaleAdd(double(-1.0), rhs);
  cout<<"\n Real Residual Norm is "<<chk_r.Norm();
  cout<<"\n Norm of Solution is "<<sol_norm<<endl;
  cout<<"\n Norm of Reference Solution is "<<refsol.Norm()<<endl;
  cout<<"\n Norm of Ones is "<<refones.Norm()<<endl;
  x.MoveToHost();
  refones.AddScale(x,(double)-1.0f);
  x.AddScale(refsol,(double)-1.0f);
  diff_norm=x.Norm();
  ones_norm=refones.Norm();
  cout<<"\n Relative Norm of Calculated Solution w.r.t. Reference is "<<((double)diff_norm/(double)sol_norm)<<endl;
  cout<<"\n Relative Norm of Calculated Solution w.r.t. Ones is "<<((double)ones_norm/(double)sol_norm)<<endl;
#endif  
  ls.Clear();
  


  stop_paralution();

  return 0;
}