extern "C" __declspec(dllexport) void CPUsolveCSRDouble(int *row_offset, int *col, double *val,
	const int nnz, const int N, double *_rhs, double *_x) {

	std::string name = "s";
	LocalVector<double> x;
	LocalVector<double> rhs;
	LocalMatrix<double> mat;

	x.Allocate(name, N);
	x.Zeros();
	rhs.Allocate(name, N);
	mat.AllocateCSR(name, nnz, N, N);
	mat.CopyFromCSR(row_offset, col, val);
	rhs.CopyFromData(_rhs);

	CG<LocalMatrix<double>, LocalVector<double>, double> ls;
	Jacobi<LocalMatrix<double>, LocalVector<double>, double> p;
	ls.SetOperator(mat);
	ls.SetPreconditioner(p);
	ls.Build();
	ls.Solve(rhs, &x);

	x.CopyToData(_x);
	mat.Clear();
	x.Clear();
	rhs.Clear();

	ls.Clear();
}
TEST_F(beamFEATest, TransformsLocalToGlobalCoords) {
    // test that identity matrix is recovered if local axes == global axes
    Eigen::Vector3d nx(1.0, 0.0, 0.0);
    Eigen::Vector3d nz(0.0, 0.0, 1.0);

    assembleK3D.calcAelem(nx, nz);

    LocalMatrix Aelem = assembleK3D.getAelem();

    LocalMatrix expected;
    expected.setIdentity();

    for (size_t i = 0; i < Aelem.rows(); ++i) {
        EXPECT_DOUBLE_EQ(expected(i), Aelem(i));
    }
}
LocalMatrix chemReductionGIA::orthcomp( LocalMatrix & inMat )
{
	// initialize it so that they have the same dimension
    LocalMatrix outMat;

	Eigen::FullPivLU<LocalMatrix> lu_decomp(inMat.transpose());

	outMat = lu_decomp.kernel();

	// notice that if the kernel returns a matrix with dimension zero,
	// then the returned matrix will be a column-vector filled with zeros
	// therefore we do a safety check here, and set the number of columns to zero
	if ( outMat.cols() == 1 && outMat.col(0).norm() == 0.0 )
		outMat = LocalMatrix::Zero(inMat.rows(), 0);

	return outMat;
}
extern "C" __declspec(dllexport) void solveCSRSingle(int *row_offset, int *col, float *val, 
	const int nnz, const int N, float *_rhs, float *_x) {

	std::string name = "s";
	LocalVector<float> x;
	LocalVector<float> rhs;
	LocalMatrix<float> mat;

	x.Allocate(name, N);
	x.Zeros();
	rhs.Allocate(name, N);
	mat.AllocateCSR(name, nnz, N, N);
	mat.CopyFromCSR(row_offset, col, val);
	rhs.CopyFromData(_rhs);
//	mat.Check();

/*	rhs.SetDataPtr(&_rhs, name, N);
	x.SetDataPtr(&_x, name, N);
	mat.SetDataPtrCSR(&row_offset, &col, &val, name, nnz, N, N);
*/

	mat.MoveToAccelerator();
	x.MoveToAccelerator();
	rhs.MoveToAccelerator();
	CG<LocalMatrix<float>, LocalVector<float>, float> ls;
	MultiColoredILU<LocalMatrix<float>, LocalVector<float>, float> p;
	ls.SetOperator(mat);
	ls.SetPreconditioner(p);
	ls.Build();

	ls.Solve(rhs, &x);

	mat.MoveToHost();
	x.MoveToHost();
	rhs.MoveToHost();

	/*
	mat.LeaveDataPtrCSR(&row_offset, &col, &val);
	rhs.LeaveDataPtr(&_rhs);
	x.LeaveDataPtr(&_x);
*/

	x.CopyToData(_x);
	mat.Clear();
	x.Clear();
	rhs.Clear();
	
	ls.Clear();
}
Esempio n. 5
0
int main(int argc, char* argv[]) {

  if (argc == 1) { 
    std::cerr << argv[0] << " <matrix> [Num threads]" << std::endl;
    exit(1);
  }

  init_paralution();

  if (argc > 2) {
    set_omp_threads_paralution(atoi(argv[2]));
  } 

  info_paralution();

  LocalVector<double> x;
  LocalVector<double> rhs;

  LocalMatrix<double> mat;

  mat.ReadFileMTX(std::string(argv[1]));

  // Compute and apply (R)CMK ordering
  LocalVector<int> cmk;
  //  mat.CMK(&cmk);
  mat.RCMK(&cmk);
  mat.Permute(cmk);

  mat.MoveToAccelerator();
  x.MoveToAccelerator();
  rhs.MoveToAccelerator();

  x.Allocate("x", mat.get_nrow());
  rhs.Allocate("rhs", mat.get_nrow());

  // Linear Solver
  CG<LocalMatrix<double>, LocalVector<double>, double > ls;

  // Preconditioner
  ILU<LocalMatrix<double>, LocalVector<double>, double > p;

  double tick, tack;

  rhs.Ones();
  x.Zeros(); 

  ls.SetOperator(mat);
  ls.SetPreconditioner(p);

  ls.Build();

  mat.info();

  tick = paralution_time();

  ls.Solve(rhs, &x);

  tack = paralution_time();
  std::cout << "Solver execution:" << (tack-tick)/1000000 << " sec" << std::endl;

  // Revert CMK ordering on solution vector
  x.PermuteBackward(cmk);

  stop_paralution();

  return 0;
}
Esempio n. 6
0
    void update_rotation(const std::vector<Node> &nodes, const BeamElement& elem, LocalMatrix &rotation, LocalMatrix &rotation_transposed) {
        rotation.setZero();
        rotation_transposed.setZero();
        const int nn1 = elem.get_node_numbers()[0];
        const int nn2 = elem.get_node_numbers()[1];

        // calculate unit normal vector along local x-direction
        Eigen::Vector3d nx = nodes[nn2] - nodes[nn1];
        nx.normalize();

        // calculate unit normal vector along y-direction
        const Eigen::Vector3d ny = elem.get_props().normal_vec.normalized();

        // calculate the unit normal vector in local z direction
        Eigen::Vector3d nz = nx.cross(ny);
        nz.normalize();

        // update rotation matrix
        rotation(0, 0) = nx(0);
        rotation(0, 1) = nx(1);
        rotation(0, 2) = nx(2);
        rotation(1, 0) = ny(0);
        rotation(1, 1) = ny(1);
        rotation(1, 2) = ny(2);
        rotation(2, 0) = nz(0);
        rotation(2, 1) = nz(1);
        rotation(2, 2) = nz(2);

        rotation(3, 3) = nx(0);
        rotation(3, 4) = nx(1);
        rotation(3, 5) = nx(2);
        rotation(4, 3) = ny(0);
        rotation(4, 4) = ny(1);
        rotation(4, 5) = ny(2);
        rotation(5, 3) = nz(0);
        rotation(5, 4) = nz(1);
        rotation(5, 5) = nz(2);

        rotation(6, 6) = nx(0);
        rotation(6, 7) = nx(1);
        rotation(6, 8) = nx(2);
        rotation(7, 6) = ny(0);
        rotation(7, 7) = ny(1);
        rotation(7, 8) = ny(2);
        rotation(8, 6) = nz(0);
        rotation(8, 7) = nz(1);
        rotation(8, 8) = nz(2);

        rotation(9, 9) = nx(0);
        rotation(9, 10) = nx(1);
        rotation(9, 11) = nx(2);
        rotation(10, 9) = ny(0);
        rotation(10, 10) = ny(1);
        rotation(10, 11) = ny(2);
        rotation(11, 9) = nz(0);
        rotation(11, 10) = nz(1);
        rotation(11, 11) = nz(2);

        // update transposed rotation matrix
        rotation_transposed(0, 0) = nx(0);
        rotation_transposed(0, 1) = ny(0);
        rotation_transposed(0, 2) = nz(0);
        rotation_transposed(1, 0) = nx(1);
        rotation_transposed(1, 1) = ny(1);
        rotation_transposed(1, 2) = nz(1);
        rotation_transposed(2, 0) = nx(2);
        rotation_transposed(2, 1) = ny(2);
        rotation_transposed(2, 2) = nz(2);

        rotation_transposed(3, 3) = nx(0);
        rotation_transposed(3, 4) = ny(0);
        rotation_transposed(3, 5) = nz(0);
        rotation_transposed(4, 3) = nx(1);
        rotation_transposed(4, 4) = ny(1);
        rotation_transposed(4, 5) = nz(1);
        rotation_transposed(5, 3) = nx(2);
        rotation_transposed(5, 4) = ny(2);
        rotation_transposed(5, 5) = nz(2);

        rotation_transposed(6, 6) = nx(0);
        rotation_transposed(6, 7) = ny(0);
        rotation_transposed(6, 8) = nz(0);
        rotation_transposed(7, 6) = nx(1);
        rotation_transposed(7, 7) = ny(1);
        rotation_transposed(7, 8) = nz(1);
        rotation_transposed(8, 6) = nx(2);
        rotation_transposed(8, 7) = ny(2);
        rotation_transposed(8, 8) = nz(2);

        rotation_transposed(9, 9) = nx(0);
        rotation_transposed(9, 10) = ny(0);
        rotation_transposed(9, 11) = nz(0);
        rotation_transposed(10, 9) = nx(1);
        rotation_transposed(10, 10) = ny(1);
        rotation_transposed(10, 11) = nz(1);
        rotation_transposed(11, 9) = nx(2);
        rotation_transposed(11, 10) = ny(2);
        rotation_transposed(11, 11) = nz(2);
    }
int main(int argc, char* argv[]) {

    if (argc == 1) {
        std::cerr << argv[0] << " <matrix> [Num threads]" << std::endl;
        exit(1);
    }

    init_paralution();

    if (argc > 2) {
        set_omp_threads_paralution(atoi(argv[2]));
    }

    info_paralution();
    //    int ii;

    LocalVector<double> x;
    LocalVector<double> rhs;

    LocalMatrix<double> mat;

    struct timeval ti1,ti2;//timer

    mat.ReadFileMTX(std::string(argv[1]));
    mat.info();

    x.Allocate("x", mat.get_nrow());
    rhs.Allocate("rhs", mat.get_nrow());

    x.info();
    rhs.info();

    rhs.Ones();

    gettimeofday(&ti1,NULL); /* read starttime in t1 */
    mat.Apply(rhs, &x);
    gettimeofday(&ti2,NULL); /* read endtime in t2 */

    fflush(stderr);
    fprintf(stderr, "\nTime cost host spmv code microseconds: %ld microseconds\n",
            ((ti2.tv_sec - ti1.tv_sec)*1000000L
             +ti2.tv_usec) - ti1.tv_usec
            );

    std::cout << "\ndot=" << x.Dot(rhs) << std::endl;

    mat.ConvertToBCSR();
    mat.info();

    mat.MoveToAccelerator();
    x.MoveToAccelerator();
    rhs.MoveToAccelerator();
    mat.info();

    rhs.Ones();
//    exit(1);

    gettimeofday(&ti1,NULL); /* read starttime in t1 */
    mat.Apply(rhs, &x);
    gettimeofday(&ti2,NULL); /* read endtime in t2 */

    fflush(stderr);
    fprintf(stderr, "\nTime cost for accelerator spmv  microseconds: %ld microseconds\n",
            ((ti2.tv_sec - ti1.tv_sec)*1000000L
             +ti2.tv_usec) - ti1.tv_usec
            );

    std::cout << "\ndot=" << x.Dot(rhs) << std::endl;

    stop_paralution();

    return 0;
}
int main(int argc, char* argv[]) {

  if (argc == 1) { 
    std::cerr << argv[0] << " <matrix> [Num threads]" << std::endl;
    exit(1);
  }

  init_paralution();

  if (argc > 2) {
    set_omp_threads_paralution(atoi(argv[2]));
  } 

  info_paralution();

  LocalVector<double> b, b_old, *b_k, *b_k1, *b_tmp;
  LocalMatrix<double> mat;

  mat.ReadFileMTX(std::string(argv[1]));

  // Gershgorin spectrum approximation
  double glambda_min, glambda_max;

  // Power method spectrum approximation
  double plambda_min, plambda_max;

  // Maximum number of iteration for the power method
  int iter_max = 10000;

  double tick, tack;

  // Gershgorin approximation of the eigenvalues
  mat.Gershgorin(glambda_min, glambda_max);
  std::cout << "Gershgorin : Lambda min = " << glambda_min
            << "; Lambda max = " << glambda_max << std::endl;


  mat.MoveToAccelerator();
  b.MoveToAccelerator();
  b_old.MoveToAccelerator();


  b.Allocate("b_k+1", mat.get_nrow());
  b_k1 = &b;

  b_old.Allocate("b_k", mat.get_nrow());
  b_k = &b_old;  

  b_k->Ones();

  mat.info();

  tick = paralution_time();

  // compute lambda max
  for (int i=0; i<=iter_max; ++i) {

    mat.Apply(*b_k, b_k1);

    //    std::cout << b_k1->Dot(*b_k) << std::endl;
    b_k1->Scale(double(1.0)/b_k1->Norm());

    b_tmp = b_k1;
    b_k1 = b_k;
    b_k = b_tmp;

  }

  // get lambda max (Rayleigh quotient)
  mat.Apply(*b_k, b_k1);
  plambda_max = b_k1->Dot(*b_k) ;

  tack = paralution_time();
  std::cout << "Power method (lambda max) execution:" << (tack-tick)/1000000 << " sec" << std::endl;

  mat.AddScalarDiagonal(double(-1.0)*plambda_max);


  b_k->Ones();

  tick = paralution_time();

  // compute lambda min
  for (int i=0; i<=iter_max; ++i) {

    mat.Apply(*b_k, b_k1);

    //    std::cout << b_k1->Dot(*b_k) + plambda_max << std::endl;
    b_k1->Scale(double(1.0)/b_k1->Norm());

    b_tmp = b_k1;
    b_k1 = b_k;
    b_k = b_tmp;

  }

  // get lambda min (Rayleigh quotient)
  mat.Apply(*b_k, b_k1);
  plambda_min = (b_k1->Dot(*b_k) + plambda_max);

  // back to the original matrix
  mat.AddScalarDiagonal(plambda_max);

  tack = paralution_time();
  std::cout << "Power method (lambda min) execution:" << (tack-tick)/1000000 << " sec" << std::endl;


  std::cout << "Power method Lambda min = " << plambda_min
            << "; Lambda max = " << plambda_max 
            << "; iter=2x" << iter_max << std::endl;

  LocalVector<double> x;
  LocalVector<double> rhs;

  x.CloneBackend(mat);
  rhs.CloneBackend(mat);

  x.Allocate("x", mat.get_nrow());
  rhs.Allocate("rhs", mat.get_nrow());

  // Chebyshev iteration
  Chebyshev<LocalMatrix<double>, LocalVector<double>, double > ls;

  rhs.Ones();
  x.Zeros(); 

  ls.SetOperator(mat);

  ls.Set(plambda_min, plambda_max);

  ls.Build();

  tick = paralution_time();

  ls.Solve(rhs, &x);

  tack = paralution_time();
  std::cout << "Solver execution:" << (tack-tick)/1000000 << " sec" << std::endl;

  // PCG + Chebyshev polynomial
  CG<LocalMatrix<double>, LocalVector<double>, double > cg;
  AIChebyshev<LocalMatrix<double>, LocalVector<double>, double > p;

  // damping factor
  plambda_min = plambda_max / 7;
  p.Set(3, plambda_min, plambda_max);
  rhs.Ones();
  x.Zeros(); 

  cg.SetOperator(mat);
  cg.SetPreconditioner(p);

  cg.Build();

  tick = paralution_time();

  cg.Solve(rhs, &x);

  tack = paralution_time();
  std::cout << "Solver execution:" << (tack-tick)/1000000 << " sec" << std::endl;

  stop_paralution();

  return 0;
}
int main(int argc, char* argv[]) {

  if (argc == 1) { 
    std::cerr << argv[0] << " <matrix> <initial_guess> <rhs> [Num threads]" << std::endl;
    exit(1);
  }

  init_paralution();

//   if (argc > 4) {
//     set_omp_threads_paralution(atoi(argv[]));
//   } 
  set_omp_threads_paralution(8);
  info_paralution();

  struct timeval now;
  double tick, tack, b,s, sol_norm, diff_norm, ones_norm;
  double *phi_ptr=NULL;
  int *bubmap_ptr=NULL, phisize, maxbmap, setlssd, lvst_offst;
  int xdim, ydim, zdim, defvex_perdirec;
#ifdef BUBFLO
  xdim=atoi(argv[5]);
  setlssd=atoi(argv[6]);
  defvex_perdirec=atoi(argv[7]);
  lvst_offst=atoi(argv[8]);
  phisize=(xdim+2*lvst_offst)*(ydim+2*lvst_offst)*(zdim+2*lvst_offst);
#endif  
  LocalVector<double> x;
  
  LocalVector<double> rhs;
  LocalMatrix<double> mat;
  LocalVector<double> Dinvhalf_min;
  LocalVector<double> Dinvhalf_plus;
#ifdef GUUS  
  LocalMatrix<double> Zin;
  LocalVector<double> refsol;
  LocalVector<double> refones;
#endif  
  mat.ReadFileMTX(std::string(argv[1]));
  mat.info();
#ifdef GUUS  
  Zin.ReadFileMTX(std::string(argv[2]));
  Zin.info();
  refsol.Allocate("refsol", mat.get_nrow());
  refones.Allocate("refones", mat.get_nrow());
  //refsol.Ones();
  refsol.ReadFileASCII(std::string(argv[4]));
  refones.Ones();
#endif  
  x.Allocate("x", mat.get_nrow());
  rhs.Allocate("rhs", mat.get_nrow());
  
  
  // Linear Solver
  DPCG<LocalMatrix<double>, LocalVector<double>, double > ls;
  MultiElimination<LocalMatrix<double>, LocalVector<double>, double > p;
  Jacobi<LocalMatrix<double>, LocalVector<double>, double > j_p;
  MultiColoredILU<LocalMatrix<double>, LocalVector<double>, double > mcilu_p;
  ILU<LocalMatrix<double>, LocalVector<double>, double > ilu_p;
  MultiColoredSGS<LocalMatrix<double>, LocalVector<double>, double > mcsgs_p;
  FSAI<LocalMatrix <double>, LocalVector<double>, double > fsai_p ;
  SPAI<LocalMatrix <double>, LocalVector <double>, double > spai_p ;
  


#ifdef GPURUN  
  mat.MoveToAccelerator();
  x.MoveToAccelerator();
  rhs.MoveToAccelerator();
#endif  
  
#ifdef SCALIN
  mat.ExtractInverseDiagonal_sqrt(&Dinvhalf_min, -1);
  mat.ExtractInverseDiagonal_sqrt(&Dinvhalf_plus, 1);
  
  mat.DiagonalMatrixMult(Dinvhalf_min);
  mat.DiagonalMatrixMult_fromL(Dinvhalf_min);
  
  //x.PointWiseMult(Dinvhalf_plus);
  rhs.PointWiseMult(Dinvhalf_min);
#endif
  
    /////////////////////////////////////////////////////////////////  
   std::cout << "-----------------------------------------------" << std::endl;
   std::cout << "DPCG solver MCSGS" << std::endl;
 #ifdef GUUS
   rhs.ReadFileASCII(std::string(argv[3]));
   x.SetRandom(0.0,1.0,1000);
   ls.SetZ(Zin);
 #endif
   
 #ifdef BUBFLO  
   x.ReadFileASCII(std::string(argv[2]));
   rhs.ReadFileASCII(std::string(argv[3]));
 #endif
 
   gettimeofday(&now, NULL);
   tick = now.tv_sec*1000000.0+(now.tv_usec);
   
 #ifdef BUBFLO  
   if(setlssd){
     LocalVector<double> phi;
     LocalVector<int> bubmap;
     phi.Allocate("PHI", phisize);
     bubmap.Allocate("bubmap",mat.get_nrow());
     phi.ReadFileASCII(std::string(argv[4]));
     
     bubmap.LeaveDataPtr(&bubmap_ptr);
     phi.LeaveDataPtr(&phi_ptr);
 
     x.SetRandom(0.0,1.0,1000);
     bubmap_create(phi_ptr, bubmap_ptr, xdim, xdim, xdim, mat.get_nrow(), &maxbmap, lvst_offst);
     phi.Clear();
     
   }
   ls.Setxdim(xdim);
   ls.SetNVectors(defvex_perdirec);
   ls.Setlvst_offst(lvst_offst);
   ls.SetZlssd(setlssd);
   mat.ConvertToCSR();  
 #endif
   
   ls.SetOperator(mat);
   ls.SetPreconditioner (mcsgs_p) ;
   mcsgs_p.SetPrecondMatrixFormat(HYB);
   
  
   ls.Init(0.0, 1e-6, 1e8, 200000);
 #ifdef BUBFLO  	
  ls.MakeZ_CSR(); // requires xdim_ and novecni_ and zlssd_ to be set
   if(setlssd)
     ls.MakeZLSSD(bubmap_ptr, maxbmap); // bubmap must be ready and maxbmap available	
 #endif
     
   
 
   ls.Build();
 #ifdef MATDIA  
   mat.ConvertToDIA();
 #endif  
   gettimeofday(&now, NULL);
   tack = now.tv_sec*1000000.0+(now.tv_usec);
   b=(tack-tick)/1000000;
  std::cout << "Building:" << b << " sec" << std::endl;
   
 
   mat.info();
 
   gettimeofday(&now, NULL);
   tick = now.tv_sec*1000000.0+(now.tv_usec);
 
   ls.Solve(rhs, &x);
 
   gettimeofday(&now, NULL);
   tack = now.tv_sec*1000000.0+(now.tv_usec);
   s= (tack-tick)/1000000;
   std::cout << "Solver execution:" << s << " sec" << std::endl;
   std::cout << "Total execution:" << s+b << " sec" << std::endl;
 #ifdef GUUS 
   x.MoveToHost();
   sol_norm=x.Norm();
   cout<<"\n Norm of Solution is "<<sol_norm<<endl;
   cout<<"\n Norm of Reference Solution is "<<refsol.Norm()<<endl;
   refones.AddScale(x,(double)-1.0f);
   x.AddScale(refsol,(double)-1.0f);
   
   
   diff_norm=x.Norm();
   ones_norm=refones.Norm();
   cout<<"\n Relative Norm of Calculated Solution w.r.t. Reference is "<<((double)diff_norm/(double)sol_norm)<<endl;
   cout<<"\n Relative Norm of Calculated Solution w.r.t. Ones is "<<((double)ones_norm/(double)sol_norm)<<endl;
 #endif  
   //x.WriteFileASCII("x_solution1e3shell_ilu01.rec");
  ls.Clear();
  /////////////////////////////////////////////////////////////////  
 
  /////////////////////////////////////////////////////////////////  
  std::cout << "-----------------------------------------------" << std::endl;
  std::cout << "DPCG solver FSAI" << std::endl;
  refones.Ones();
#ifdef GUUS  
  rhs.ReadFileASCII(std::string(argv[3]));
  x.SetRandom(0.0,1.0,1000);
  ls.SetZ(Zin);
#endif

  
#ifdef BUBFLO  
  x.ReadFileASCII(std::string(argv[2]));
  rhs.ReadFileASCII(std::string(argv[3]));
#endif

  gettimeofday(&now, NULL);
  tick = now.tv_sec*1000000.0+(now.tv_usec);
  
#ifdef BUBFLO  
  if(setlssd){
    LocalVector<double> phi;
    LocalVector<int> bubmap;
    phi.Allocate("PHI", phisize);
    bubmap.Allocate("bubmap",mat.get_nrow());
    phi.ReadFileASCII(std::string(argv[4]));
    
    bubmap.LeaveDataPtr(&bubmap_ptr);
    phi.LeaveDataPtr(&phi_ptr);

    //x.SetRandom(0.0,1.0,1000);
    bubmap_create(phi_ptr, bubmap_ptr, xdim, xdim, xdim, mat.get_nrow(), &maxbmap, lvst_offst);
    phi.Clear();
    
  }
  ls.Setxdim(xdim);
  ls.SetNVectors(defvex_perdirec);
  ls.SetZlssd(setlssd);
  mat.ConvertToCSR();  
#endif    
  fsai_p.Set (2) ;
  
  ls.SetOperator(mat);
  ls.SetPreconditioner (fsai_p) ;
  fsai_p.SetPrecondMatrixFormat(HYB);
  
 
  ls.Init(0.0, 1e-6, 1e8, 200000);
#ifdef GPURUN  
  mat.MoveToAccelerator();
  x.MoveToAccelerator();
  rhs.MoveToAccelerator();
#endif  
  
  
#ifdef BUBFLO  
  ls.MakeZ_CSR(); // requires xdim_ and novecni_ and zlssd_ to be set
  if(setlssd)
    ls.MakeZLSSD(bubmap_ptr, maxbmap); // bubmap must be ready and maxbmap available
#endif  
  

  ls.Build();
#ifdef MATDIA  
  mat.ConvertToDIA();
#endif
  
  gettimeofday(&now, NULL);
  tack = now.tv_sec*1000000.0+(now.tv_usec);
  b=(tack-tick)/1000000;
  std::cout << "Building:" << b << " sec" << std::endl;
  

//   mat.info();

  gettimeofday(&now, NULL);
  tick = now.tv_sec*1000000.0+(now.tv_usec);

  ls.Solve(rhs, &x);

  gettimeofday(&now, NULL);
  tack = now.tv_sec*1000000.0+(now.tv_usec);
  s= (tack-tick)/1000000;
  std::cout << "Solver execution:" << s << " sec" << std::endl;
  std::cout << "Total execution:" << s+b << " sec" << std::endl;
#ifdef GUUS
    x.MoveToHost();
  sol_norm=x.Norm();
  cout<<"\n Norm of Solution is "<<sol_norm<<endl;
  cout<<"\n Norm of Reference Solution is "<<refsol.Norm()<<endl;
  refones.AddScale(x,(double)-1.0f);
  x.AddScale(refsol,(double)-1.0f);
  
  
  diff_norm=x.Norm();
  ones_norm=refones.Norm();
  cout<<"\n Relative Norm of Calculated Solution w.r.t. Reference is "<<((double)diff_norm/(double)sol_norm)<<endl;
  cout<<"\n Relative Norm of Calculated Solution w.r.t. Ones is "<<((double)ones_norm/(double)sol_norm)<<endl;
#endif  
  //x.WriteFileASCII("x_solution1e3shell_ilu01.rec");
  ls.Clear();
// //   
  
///////////////////////////////////////////////////////////////  
  std::cout << "-----------------------------------------------" << std::endl;
  std::cout << "DPCG solver ILU-p" << std::endl;
  refones.Ones();
#ifdef GUUS  
  rhs.ReadFileASCII(std::string(argv[3]));
  x.SetRandom(0.0,1.0,1000);
  ls.SetZ(Zin);
#endif
  
#ifdef BUBFLO  
   x.ReadFileASCII(std::string(argv[2]));
  rhs.ReadFileASCII(std::string(argv[3]));
#endif  
  
  gettimeofday(&now, NULL);
  tick = now.tv_sec*1000000.0+(now.tv_usec);
  
#ifdef BUBFLO
  if(setlssd){
    LocalVector<double> phi;
    LocalVector<int> bubmap;
    phi.Allocate("PHI", phisize);
    bubmap.Allocate("bubmap",mat.get_nrow());
    phi.ReadFileASCII(std::string(argv[4]));
    
    bubmap.LeaveDataPtr(&bubmap_ptr);
    phi.LeaveDataPtr(&phi_ptr);

    //x.SetRandom(0.0,1.0,1000);
    bubmap_create(phi_ptr, bubmap_ptr, xdim, xdim, xdim, mat.get_nrow(), &maxbmap, lvst_offst);
    phi.Clear();
    
  }
  ls.Setxdim(xdim);
  ls.SetNVectors(defvex_perdirec);
  ls.SetZlssd(setlssd);
  mat.ConvertToCSR();  
#endif
  
  ilu_p.Set(0);
  ls.SetOperator(mat);
  ls.SetPreconditioner(ilu_p);
  ls.Init(0.0, 1e-6, 1e8, 20000);
  ls.RecordResidualHistory();
//  mat.ConvertToCSR();  
#ifdef GPURUN  
  mat.MoveToAccelerator();
  x.MoveToAccelerator();
  rhs.MoveToAccelerator();
#endif  

#ifdef BUBFLO  
  ls.MakeZ_CSR(); // requires xdim_ and novecni_ and zlssd_ to be set
  if(setlssd)
    ls.MakeZLSSD(bubmap_ptr, maxbmap); // bubmap must be ready and maxbmap available
#endif  
  ls.Build();
#ifdef MATDIA  
  mat.ConvertToDIA();
#endif  
  gettimeofday(&now, NULL);
  tack = now.tv_sec*1000000.0+(now.tv_usec);
  b=(tack-tick)/1000000;
  std::cout << "Building:" << b << " sec" << std::endl;
  
  //ls.Verbose(2);
  
  gettimeofday(&now, NULL);
  tick = now.tv_sec*1000000.0+(now.tv_usec);
  
  ls.Solve(rhs, &x);

  gettimeofday(&now, NULL);
  tack = now.tv_sec*1000000.0+(now.tv_usec);
  s= (tack-tick)/1000000;
  std::cout << "Solver execution:" << s << " sec" << std::endl;
  std::cout << "Total execution:" << s+b << " sec" << std::endl;

#ifdef SCALIN
  x.PointWiseMult(Dinvhalf_min);
#endif
  x.MoveToHost();
//   x.WriteFileASCII("x_solution_shell_scal.rec");
#ifdef GUUS
 // ls.RecordHistory("res_ongpu_ilu-p.rec");
  sol_norm=x.Norm();
  cout<<"\n Norm of Solution is "<<sol_norm<<endl;
  cout<<"\n Norm of Reference Solution is "<<refsol.Norm()<<endl;
  refones.AddScale(x,(double)-1.0f);
  x.AddScale(refsol,(double)-1.0f);
  
  x.MoveToHost();
  diff_norm=x.Norm();
  ones_norm=refones.Norm();
  cout<<"\n Relative Norm of Calculated Solution w.r.t. Reference is "<<((double)diff_norm/(double)sol_norm)<<endl;
  cout<<"\n Relative Norm of Calculated Solution w.r.t. Ones is "<<((double)ones_norm/(double)sol_norm)<<endl;
#endif  
  
  ls.Clear();
  
/////////////////////////////////////////////////////////////////
  std::cout << "-----------------------------------------------" << std::endl;
  std::cout << "DPCG solver ME-ILU-J" << std::endl;
  refones.Ones();
#ifdef GUUS  
  rhs.ReadFileASCII(std::string(argv[3]));
  x.SetRandom(0.0,1.0,1000);
  ls.SetZ(Zin);
#endif  

#ifdef BUBFLO
   x.ReadFileASCII(std::string(argv[2]));
   rhs.ReadFileASCII(std::string(argv[3]));
#endif
  gettimeofday(&now, NULL);
  tick = now.tv_sec*1000000.0+(now.tv_usec);
 
#ifdef BUBFLO   
  if(setlssd){
    LocalVector<double> phi;
    LocalVector<int> bubmap;
    phi.Allocate("PHI", phisize);
    bubmap.Allocate("bubmap",mat.get_nrow());
    phi.ReadFileASCII(std::string(argv[4]));
    
    bubmap.LeaveDataPtr(&bubmap_ptr);
    phi.LeaveDataPtr(&phi_ptr);

    //x.SetRandom(0.0,1.0,1000);
    bubmap_create(phi_ptr, bubmap_ptr, xdim, xdim, xdim, mat.get_nrow(), &maxbmap, lvst_offst);
    phi.Clear();
    
  }
  ls.Setxdim(xdim);
  ls.SetNVectors(defvex_perdirec);
  ls.SetZlssd(setlssd);
  mat.ConvertToCSR();
#endif
  p.Set(j_p, 1);
  
  ls.SetOperator(mat);
  ls.SetPreconditioner(p);
  
  
  
  
  ls.Init(0.0, 1e-6, 1e8, 200000);
#ifdef GPURUN  
  mat.MoveToAccelerator();
  x.MoveToAccelerator();
  rhs.MoveToAccelerator();
#endif  
  
#ifdef BUBFLO  
  ls.MakeZ_CSR(); // requires xdim_ and novecni_ and zlssd_ to be set
  if(setlssd)
    ls.MakeZLSSD(bubmap_ptr, maxbmap); // bubmap must be ready and maxbmap available
#endif  
  
  ls.Build();
#ifdef MATDIA  
  mat.ConvertToDIA();
#endif  
  gettimeofday(&now, NULL);
  tack = now.tv_sec*1000000.0+(now.tv_usec);
  b=(tack-tick)/1000000;
  std::cout << "Building:" << b << " sec" << std::endl;
  

  mat.info();

  gettimeofday(&now, NULL);
  tick = now.tv_sec*1000000.0+(now.tv_usec);

  ls.Solve(rhs, &x);

  gettimeofday(&now, NULL);
  tack = now.tv_sec*1000000.0+(now.tv_usec);
  s= (tack-tick)/1000000;
  std::cout << "Solver execution:" << s << " sec" << std::endl;
  std::cout << "Total execution:" << s+b << " sec" << std::endl;
#ifdef GUUS  
  x.MoveToHost();
  sol_norm=x.Norm();
  cout<<"\n Norm of Solution is "<<sol_norm<<endl;
  cout<<"\n Norm of Reference Solution is "<<refsol.Norm()<<endl;
  refones.AddScale(x,(double)-1.0f);
  x.AddScale(refsol,(double)-1.0f);
  
  
  diff_norm=x.Norm();
  ones_norm=refones.Norm();
  cout<<"\n Relative Norm of Calculated Solution w.r.t. Reference is "<<((double)diff_norm/(double)sol_norm)<<endl;
  cout<<"\n Relative Norm of Calculated Solution w.r.t. Ones is "<<((double)ones_norm/(double)sol_norm)<<endl;
#endif  
  //x.WriteFileASCII("x_solution1e3shell_ilu01.rec");
  ls.Clear();

///////////////////////////////////////////////////////////////  
  std::cout << "-----------------------------------------------" << std::endl;
  std::cout << "DPCG solver ME-ILU-SGS" << std::endl;
  refones.Ones();
#ifdef GUUS  
  rhs.ReadFileASCII(std::string(argv[3]));
  x.SetRandom(0.0,1.0,1000);
  ls.SetZ(Zin);
#endif
  
#ifdef BUBFLO  
  x.ReadFileASCII(std::string(argv[2]));
  rhs.ReadFileASCII(std::string(argv[3]));
#endif
  
  gettimeofday(&now, NULL);
  tick = now.tv_sec*1000000.0+(now.tv_usec);

#ifdef BUBFLO  
  if(setlssd){
    LocalVector<double> phi;
    LocalVector<int> bubmap;
    phi.Allocate("PHI", phisize);
    bubmap.Allocate("bubmap",mat.get_nrow());
    phi.ReadFileASCII(std::string(argv[4]));
    
    bubmap.LeaveDataPtr(&bubmap_ptr);
    phi.LeaveDataPtr(&phi_ptr);

    //x.SetRandom(0.0,1.0,1000);
    bubmap_create(phi_ptr, bubmap_ptr, xdim, xdim, xdim, mat.get_nrow(), &maxbmap, lvst_offst);
    phi.Clear();
    
  }
  ls.Setxdim(xdim);
  ls.SetNVectors(defvex_perdirec);
  ls.SetZlssd(setlssd);
  mat.ConvertToCSR();  
#endif  
  //p.Init(mcsgs_p, 1);
  ls.SetOperator(mat);
  ls.SetPreconditioner(mcsgs_p);

  ls.Init(0.0, 1e-6, 1e8, 200000);
#ifdef GPURUN  
  mat.MoveToAccelerator();
  x.MoveToAccelerator();
  rhs.MoveToAccelerator();
#endif  
#ifdef BUBFLO  
  ls.MakeZ_CSR(); // requires xdim_ and novecni_ and zlssd_ to be set
  if(setlssd)
    ls.MakeZLSSD(bubmap_ptr, maxbmap); // bubmap must be ready and maxbmap available
#endif
    
  
  ls.Build();
#ifdef MATDIA  
  mat.ConvertToDIA();
#endif  
  gettimeofday(&now, NULL);
  tack = now.tv_sec*1000000.0+(now.tv_usec);
  b=(tack-tick)/1000000;
  std::cout << "Building:" << b << " sec" << std::endl;
  

//   mat.info();

  gettimeofday(&now, NULL);
  tick = now.tv_sec*1000000.0+(now.tv_usec);

  ls.Solve(rhs, &x);

  gettimeofday(&now, NULL);
  tack = now.tv_sec*1000000.0+(now.tv_usec);
  s= (tack-tick)/1000000;
  std::cout << "Solver execution:" << s << " sec" << std::endl;
  std::cout << "Total execution:" << s+b << " sec" << std::endl;
#ifdef GUUS  
x.MoveToHost();
  sol_norm=x.Norm();
  cout<<"\n Norm of Solution is "<<sol_norm<<endl;
  cout<<"\n Norm of Reference Solution is "<<refsol.Norm()<<endl;
  refones.AddScale(x,(double)-1.0f);
  x.AddScale(refsol,(double)-1.0f);
  
  
  diff_norm=x.Norm();
  ones_norm=refones.Norm();
  cout<<"\n Relative Norm of Calculated Solution w.r.t. Reference is "<<((double)diff_norm/(double)sol_norm)<<endl;
  cout<<"\n Relative Norm of Calculated Solution w.r.t. Ones is "<<((double)ones_norm/(double)sol_norm)<<endl;
#endif  
  //x.WriteFileASCII("x_solution1e3shell_ilu01.rec");
  ls.Clear();
// 
// /////////////////////////////////////////////////////////////////  
//   
// /////////////////////////////////////////////////////////////////  
  std::cout << "-----------------------------------------------" << std::endl;
  std::cout << "DPCG solver ME-ILU-ILU(0,1)" << std::endl;
  refones.Ones();
#ifdef GUUS  
  rhs.ReadFileASCII(std::string(argv[3]));
  x.SetRandom(0.0,1.0,1000);
  ls.SetZ(Zin);
#endif
#ifdef BUBFLO  
  x.ReadFileASCII(std::string(argv[2]));
  rhs.ReadFileASCII(std::string(argv[3]));
#endif
  
  gettimeofday(&now, NULL);
  tick = now.tv_sec*1000000.0+(now.tv_usec);

#ifdef BUBFLO
  if(setlssd){
    LocalVector<double> phi;
    LocalVector<int> bubmap;
    phi.Allocate("PHI", phisize);
    bubmap.Allocate("bubmap",mat.get_nrow());
    phi.ReadFileASCII(std::string(argv[4]));
    
    bubmap.LeaveDataPtr(&bubmap_ptr);
    phi.LeaveDataPtr(&phi_ptr);

    //x.SetRandom(0.0,1.0,1000);
    bubmap_create(phi_ptr, bubmap_ptr, xdim, xdim, xdim, mat.get_nrow(), &maxbmap, lvst_offst);
    phi.Clear();
    
  }
  ls.Setxdim(xdim);
  ls.SetNVectors(defvex_perdirec);
  ls.SetZlssd(setlssd);
  mat.ConvertToCSR();    
#endif  
//   mcilu_p.Init(0);
//   
//   p.Init(mcilu_p, 1, 0.0);
  mcilu_p.Set(0);
  p.Set(mcilu_p, 1, 0.0);

  ls.SetOperator(mat);
  ls.SetPreconditioner(p);
//   p.SetPrecondMatrixFormat(HYB);
  ls.Init(0.0, 1e-6, 1e8, 200000);
#ifdef GPURUN  
  mat.MoveToAccelerator();
  x.MoveToAccelerator();
  rhs.MoveToAccelerator();
#endif  
  #ifdef BUBFLO  
//   ls.SetNVectors(4);
  ls.MakeZ_CSR(); // requires xdim_ and novecni_ and zlssd_ to be set
  if(setlssd)
    ls.MakeZLSSD(bubmap_ptr, maxbmap); // bubmap must be ready and maxbmap available
#endif    
//   
  
  ls.Build();
#ifdef MATDIA  
  mat.ConvertToDIA();
#endif  
  gettimeofday(&now, NULL);
  tack = now.tv_sec*1000000.0+(now.tv_usec);
  b=(tack-tick)/1000000;
  std::cout << "Building:" << b << " sec" << std::endl;
// 
//   ls.Verbose(2);
  mat.info();

  gettimeofday(&now, NULL);
  tick = now.tv_sec*1000000.0+(now.tv_usec);

  ls.Solve(rhs, &x);
  
  gettimeofday(&now, NULL);
  tack = now.tv_sec*1000000.0+(now.tv_usec);
  s= (tack-tick)/1000000;
  std::cout << "Solver execution:" << s << " sec" << std::endl;
  std::cout << "Total execution:" << s+b << " sec" << std::endl;
#ifdef GUUS  
x.MoveToHost();
  sol_norm=x.Norm();
  cout<<"\n Norm of Solution is "<<sol_norm<<endl;
  cout<<"\n Norm of Reference Solution is "<<refsol.Norm()<<endl;
  refones.AddScale(x,(double)-1.0f);
  x.AddScale(refsol,(double)-1.0f);
  
  
  diff_norm=x.Norm();
  ones_norm=refones.Norm();
  cout<<"\n Relative Norm of Calculated Solution w.r.t. Reference is "<<((double)diff_norm/(double)sol_norm)<<endl;
  cout<<"\n Relative Norm of Calculated Solution w.r.t. Ones is "<<((double)ones_norm/(double)sol_norm)<<endl;
  //x.WriteFileASCII("x_solution1e3shell_ilu01.rec");
#endif  
  ls.Clear();

// /////////////////////////////////////////////////////////////////    
  std::cout << "-----------------------------------------------" << std::endl;
  std::cout << "DPCG solver ILU(0,1)" << std::endl;
  refones.Ones();
#ifdef GUUS  
  rhs.ReadFileASCII(std::string(argv[3]));
  x.SetRandom(0.0,1.0,1000);
  ls.SetZ(Zin);
#endif  
  gettimeofday(&now, NULL);
  tick = now.tv_sec*1000000.0+(now.tv_usec);

#ifdef BUBFLO  
  x.ReadFileASCII(std::string(argv[2]));
  rhs.ReadFileASCII(std::string(argv[3]));
  if(setlssd){
    LocalVector<double> phi;
    LocalVector<int> bubmap;
    phi.Allocate("PHI", phisize);
    bubmap.Allocate("bubmap",mat.get_nrow());
    phi.ReadFileASCII(std::string(argv[4]));
    
    bubmap.LeaveDataPtr(&bubmap_ptr);
    phi.LeaveDataPtr(&phi_ptr);

    //x.SetRandom(0.0,1.0,1000);
    bubmap_create(phi_ptr, bubmap_ptr, xdim, xdim, xdim, mat.get_nrow(), &maxbmap, lvst_offst);
    phi.Clear();
    
  }
  ls.Setxdim(xdim);
  ls.SetNVectors(defvex_perdirec);
  ls.SetZlssd(setlssd);
  mat.ConvertToCSR();
#endif  
  
  mcilu_p.Set(0,1);
  ls.SetOperator(mat);
  ls.SetPreconditioner(mcilu_p);
  
  ls.Init(0.0, 1e-6, 1e8, 200000);
#ifdef GPURUN  
  mat.MoveToAccelerator();
  x.MoveToAccelerator();
  rhs.MoveToAccelerator();
#endif  
#ifdef BUBFLO  
  ls.MakeZ_CSR(); // requires xdim_ and novecni_ and zlssd_ to be set
  if(setlssd)
    ls.MakeZLSSD(bubmap_ptr, maxbmap); // bubmap must be ready and maxbmap available	
#endif
    
  
  ls.Build();
#ifdef MATDIA  
  mat.ConvertToDIA();
#endif  
  gettimeofday(&now, NULL);
  tack = now.tv_sec*1000000.0+(now.tv_usec);
  b=(tack-tick)/1000000;
  std::cout << "Building:" << b << " sec" << std::endl;
  

//   mat.info();

  gettimeofday(&now, NULL);
  tick = now.tv_sec*1000000.0+(now.tv_usec);

  ls.Solve(rhs, &x);

  gettimeofday(&now, NULL);
  tack = now.tv_sec*1000000.0+(now.tv_usec);
  s= (tack-tick)/1000000;
  std::cout << "Solver execution:" << s << " sec" << std::endl;
  std::cout << "Total execution:" << s+b << " sec" << std::endl;
#ifdef GUUS  
x.MoveToHost();
  sol_norm=x.Norm();
  cout<<"\n Norm of Solution is "<<sol_norm<<endl;
  cout<<"\n Norm of Reference Solution is "<<refsol.Norm()<<endl;
  refones.AddScale(x,(double)-1.0f);
  x.AddScale(refsol,(double)-1.0f);
  
  
  diff_norm=x.Norm();
  ones_norm=refones.Norm();
  cout<<"\n Relative Norm of Calculated Solution w.r.t. Reference is "<<((double)diff_norm/(double)sol_norm)<<endl;
  cout<<"\n Relative Norm of Calculated Solution w.r.t. Ones is "<<((double)ones_norm/(double)sol_norm)<<endl;
//   x.WriteFileASCII("x_solution1e3shell_ilu01.rec");
#endif  
  ls.Clear();
/////////////////////////////////////////////////////////////////    
  
// /////////////////////////////////////////////////////////////////  
  std::cout << "-----------------------------------------------" << std::endl;
  std::cout << "DPCG solver jacobi" << std::endl;
  refones.Ones();
#ifdef GUUS  
  rhs.ReadFileASCII(std::string(argv[3]));
  x.SetRandom(0.0,1.0,1000);
  ls.SetZ(Zin);
#endif
#ifdef BUBFLO  
  x.ReadFileASCII(std::string(argv[2]));
  rhs.ReadFileASCII(std::string(argv[3]));
#endif
  
  gettimeofday(&now, NULL);
  tick = now.tv_sec*1000000.0+(now.tv_usec);

#ifdef BUBFLO
  if(setlssd){
    LocalVector<double> phi;
    LocalVector<int> bubmap;
    phi.Allocate("PHI", phisize);
    bubmap.Allocate("bubmap",mat.get_nrow());
    phi.ReadFileASCII(std::string(argv[4]));
    
    bubmap.LeaveDataPtr(&bubmap_ptr);
    phi.LeaveDataPtr(&phi_ptr);

    //x.SetRandom(0.0,1.0,1000);
    bubmap_create(phi_ptr, bubmap_ptr, xdim, xdim, xdim, mat.get_nrow(), &maxbmap, lvst_offst);
    phi.Clear();
    
  }
  ls.Setxdim(xdim);
  ls.SetNVectors(defvex_perdirec);
  ls.SetZlssd(setlssd);
  mat.ConvertToCSR();    
#endif  


  ls.SetOperator(mat);
  ls.SetPreconditioner(j_p);
//   p.SetPrecondMatrixFormat(HYB);
  ls.Init(0.0, 1e-6, 1e8, 200000);
#ifdef GPURUN  
  mat.MoveToAccelerator();
  x.MoveToAccelerator();
  rhs.MoveToAccelerator();
#endif    
#ifdef BUBFLO  
//   ls.SetNVectors(4);
  ls.MakeZ_CSR(); // requires xdim_ and novecni_ and zlssd_ to be set
  if(setlssd)
    ls.MakeZLSSD(bubmap_ptr, maxbmap); // bubmap must be ready and maxbmap available
#endif    
//   
  
  ls.Build();
#ifdef MATDIA  
  mat.ConvertToDIA();
#endif  
  gettimeofday(&now, NULL);
  tack = now.tv_sec*1000000.0+(now.tv_usec);
  b=(tack-tick)/1000000;
  std::cout << "Building:" << b << " sec" << std::endl;
// 
//   ls.Verbose(2);
  mat.info();

  gettimeofday(&now, NULL);
  tick = now.tv_sec*1000000.0+(now.tv_usec);

  ls.Solve(rhs, &x);
  
  gettimeofday(&now, NULL);
  tack = now.tv_sec*1000000.0+(now.tv_usec);
  s= (tack-tick)/1000000;
  std::cout << "Solver execution:" << s << " sec" << std::endl;
  std::cout << "Total execution:" << s+b << " sec" << std::endl;
#ifdef GUUS  
x.MoveToHost();
  sol_norm=x.Norm();
  cout<<"\n Norm of Solution is "<<sol_norm<<endl;
  cout<<"\n Norm of Reference Solution is "<<refsol.Norm()<<endl;
  refones.AddScale(x,(double)-1.0f);
  x.AddScale(refsol,(double)-1.0f);
  
  
  diff_norm=x.Norm();
  ones_norm=refones.Norm();
  cout<<"\n Relative Norm of Calculated Solution w.r.t. Reference is "<<((double)diff_norm/(double)sol_norm)<<endl;
  cout<<"\n Relative Norm of Calculated Solution w.r.t. Ones is "<<((double)ones_norm/(double)sol_norm)<<endl;
  //x.WriteFileASCII("x_solution1e3shell_ilu01.rec");
#endif  
  ls.Clear();
//   
  cout<<"########################################################################"<<endl;
  cout<<"Everything complete stopping paralution now."<<endl;
  stop_paralution();

  return 0;
}
int main(int argc, char* argv[]) {

  if (argc == 1) { 
    std::cerr << argv[0] << " <matrix> [Num threads]" << std::endl;
    exit(1);
  }

  init_paralution();

  if (argc > 2) {
    set_omp_threads_paralution(atoi(argv[2]));
  } 

  info_paralution();

  LocalVector<double> x;
  LocalVector<double> rhs;

  LocalMatrix<double> mat;


  mat.ReadFileMTX(std::string(argv[1]));
  mat.info();

  x.Allocate("x", mat.get_nrow());
  rhs.Allocate("rhs", mat.get_nrow());

  x.info();
  rhs.info();

  rhs.Ones();
  
  mat.Apply(rhs, &x);

  std::cout << "dot=" << x.Dot(rhs) << std::endl;

  mat.ConvertToELL();
  mat.info();

  mat.MoveToAccelerator();
  x.MoveToAccelerator();
  rhs.MoveToAccelerator();
  mat.info();

  rhs.Ones();
  
  mat.Apply(rhs, &x);

  std::cout << "dot=" << x.Dot(rhs) << std::endl;

  stop_paralution();

  return 0;
}
Esempio n. 11
0
int main(int argc, char* argv[]) {

  if (argc == 1) { 
    std::cerr << argv[0] << " <matrix> <initial_guess> <rhs> [Num threads]" << std::endl;
    exit(1);
  }

  init_paralution();

//   if (argc > 4) {
//     set_omp_threads_paralution(atoi(argv[5]));
//   } 
  set_omp_threads_paralution(8);
  
  info_paralution();

  struct timeval now;
  double tick, tack, b=0.0f,s=0.0f, lprep=0.0f, sol_norm, diff_norm, ones_norm;
  double *phi_ptr=NULL;
  int *bubmap_ptr=NULL, phisize, maxbmap, setlssd, lvst_offst;
  int xdim, ydim, zdim, defvex_perdirec, defvex_perdirec_y, defvex_perdirec_z;
  DPCG<LocalMatrix<double>, LocalVector<double>, double > ls;
#ifdef BUBFLO  
  xdim=atoi(argv[5]);
  setlssd=atoi(argv[6]);
  defvex_perdirec=atoi(argv[7]);
  lvst_offst=atoi(argv[8]);
  phisize=(xdim+2*lvst_offst)*(ydim+2*lvst_offst)*(zdim+2*lvst_offst);
#endif  
  LocalVector<double> x;
  LocalVector<double>refsol;
  LocalVector<double>refones;
  LocalVector<double>chk_r;
  LocalVector<double> rhs;
  LocalMatrix<double> mat;
  LocalVector<double> Dinvhalf_min;
  LocalVector<double> Dinvhalf_plus;
#ifdef GUUS  
  LocalMatrix<double> Zin;
#endif  
  mat.ReadFileMTX(std::string(argv[1]));
  mat.info();
#ifdef GUUS  
  Zin.ReadFileMTX(std::string(argv[2]));
  Zin.info();
  
#endif  
  x.Allocate("x", mat.get_nrow());
  refsol.Allocate("refsol", mat.get_nrow());
  refones.Allocate("refones", mat.get_nrow());
  rhs.Allocate("rhs", mat.get_nrow());
  chk_r.Allocate("chk_r", mat.get_nrow());
#ifdef BUBFLO
   x.ReadFileASCII(std::string(argv[2]));
#endif   
  rhs.ReadFileASCII(std::string(argv[3]));
#ifdef GUUS
  x.SetRandom(0.0,1.0,1000);
  refsol.ReadFileASCII(std::string(argv[4]));
  refones.Ones();
#endif  

  
  //refsol.Ones();
// 
//   // Uncomment for GPU
#ifdef GPURUN  
  mat.MoveToAccelerator();
  x.MoveToAccelerator();
  rhs.MoveToAccelerator();
  chk_r.MoveToAccelerator();
  Dinvhalf_min.MoveToAccelerator();
  Dinvhalf_plus.MoveToAccelerator();
  
#endif  
  
  gettimeofday(&now, NULL);
  tick = now.tv_sec*1000000.0+(now.tv_usec);

#ifdef BUBFLO  
  if(setlssd){
    LocalVector<double> phi;
    LocalVector<int> bubmap;
    phi.Allocate("PHI", phisize);
    bubmap.Allocate("bubmap",mat.get_nrow());
    phi.ReadFileASCII(std::string(argv[4]));
    
    bubmap.LeaveDataPtr(&bubmap_ptr);
    phi.LeaveDataPtr(&phi_ptr);

    bubmap_create(phi_ptr, bubmap_ptr, xdim, xdim, xdim, mat.get_nrow(), &maxbmap, lvst_offst);
    phi.Clear();
    
  }
  ls.Setxdim(xdim);
  ls.SetNVectors_eachdirec(defvex_perdirec+1, defvex_perdirec+2, defvex_perdirec+3);
  ls.Set_alldims(xdim, xdim, xdim);
  ls.Setlvst_offst(lvst_offst);
  ls.SetNVectors(defvex_perdirec);
  ls.SetZlssd(setlssd);
#endif  
  gettimeofday(&now, NULL);
  tack = now.tv_sec*1000000.0+(now.tv_usec);
  lprep=(tack-tick)/1000000;
  std::cout << "levelset_prep" << lprep << " sec" << std::endl;
  // Linear Solver
//   return 0;
  gettimeofday(&now, NULL);
  tick = now.tv_sec*1000000.0+(now.tv_usec);
 
#ifdef SCALIN
  
  mat.ExtractInverseDiagonal_sqrt(&Dinvhalf_min, -1);
  mat.ExtractInverseDiagonal_sqrt(&Dinvhalf_plus, 1);
  
  mat.DiagonalMatrixMult(Dinvhalf_min);
  mat.DiagonalMatrixMult_fromL(Dinvhalf_min);
  
  //x.PointWiseMult(Dinvhalf_plus);
  rhs.PointWiseMult(Dinvhalf_min);
//   rhs.Scale(0.3);
#endif
#ifdef GUUS  
   ls.SetZ(Zin);
#endif   
  ls.SetOperator(mat);
  ls.Init(0.0, 1e-6, 1e8, 200000);
//  ls.RecordResidualHistory();
  

#ifdef BUBFLO
  ls.MakeZ_CSR(); // requires xdim_ and novecni_ and zlssd_ to be set
  if(setlssd)
    ls.MakeZLSSD(bubmap_ptr, maxbmap); // bubmap must be ready and maxbmap available
#endif    
//   
//  stop_paralution();
//  return 0;
  ls.Build();
#ifdef MATDIA  
   mat.ConvertToDIA();
#endif  

  gettimeofday(&now, NULL);
  tack = now.tv_sec*1000000.0+(now.tv_usec);
  b=(tack-tick)/1000000;
  std::cout << "Building:" << b+lprep << " sec" << std::endl;
  
//   ls.Verbose(2);

  mat.info();

  gettimeofday(&now, NULL);
  tick = now.tv_sec*1000000.0+(now.tv_usec);

  ls.Solve(rhs, &x);

  gettimeofday(&now, NULL);
  tack = now.tv_sec*1000000.0+(now.tv_usec);
  s=(tack-tick)/1000000;
  std::cout << "Solver execution:" << s << " sec" << std::endl;
  std::cout << "Total execution:" << s+b << " sec" << std::endl;
  
#ifdef SCALIN
  x.PointWiseMult(Dinvhalf_min);
#endif

//   

  
#ifdef GUUS  
//   x.WriteFileASCII("x_solution_shell_inv_neumann.rec");
  //ls.RecordHistory("res__ongpu_tns.rec");
  x.MoveToHost();
  x.WriteFileASCII("x_neumann.rec");
  x.MoveToAccelerator();
  sol_norm=x.Norm();
  mat.Apply(x, &chk_r); 
  chk_r.ScaleAdd(double(-1.0), rhs);
  cout<<"\n Real Residual Norm is "<<chk_r.Norm();
  cout<<"\n Norm of Solution is "<<sol_norm<<endl;
  cout<<"\n Norm of Reference Solution is "<<refsol.Norm()<<endl;
  cout<<"\n Norm of Ones is "<<refones.Norm()<<endl;
  x.MoveToHost();
  refones.AddScale(x,(double)-1.0f);
  x.AddScale(refsol,(double)-1.0f);
  diff_norm=x.Norm();
  ones_norm=refones.Norm();
  cout<<"\n Relative Norm of Calculated Solution w.r.t. Reference is "<<((double)diff_norm/(double)sol_norm)<<endl;
  cout<<"\n Relative Norm of Calculated Solution w.r.t. Ones is "<<((double)ones_norm/(double)sol_norm)<<endl;
#endif  
  ls.Clear();
  


  stop_paralution();

  return 0;
}