Пример #1
0
bool LocalVector<ValueType>::Check(void) const {

    LOG_DEBUG(this, "LocalVector::Check()",
              "");

    bool check = false;

    if (this->is_accel() == true) {

        LocalVector<ValueType> vec;
        vec.CopyFrom(*this);

        check = vec.Check();

        LOG_VERBOSE_INFO(2, "*** warning: LocalVector::Check() is performed on the host");

    } else {

        check = this->vector_->Check();

    }

    return check;

}
Пример #2
0
TEST(DictTrieTest, Test1) {
  string s1, s2;
  DictTrie trie(DICT_FILE);
  ASSERT_LT(trie.GetMinWeight() + 15.6479, 0.001);
  string word("来到");
  Unicode uni;
  ASSERT_TRUE(TransCode::Decode(word, uni));
  DictUnit nodeInfo;
  nodeInfo.word = uni;
  nodeInfo.tag = "v";
  nodeInfo.weight = -8.87033;
  s1 << nodeInfo;
  s2 << (*trie.Find(uni.begin(), uni.end()));

  EXPECT_EQ("[\"26469\", \"21040\"] v -8.870", s2);
  word = "清华大学";
  LocalVector<pair<size_t, const DictUnit*> > res;
  const char * words[] = {"清", "清华", "清华大学"};
  for (size_t i = 0; i < sizeof(words)/sizeof(words[0]); i++) {
    ASSERT_TRUE(TransCode::Decode(words[i], uni));
    res.push_back(make_pair(uni.size() - 1, trie.Find(uni.begin(), uni.end())));
    //resMap[uni.size() - 1] = trie.Find(uni.begin(), uni.end());
  }
  vector<pair<size_t, const DictUnit*> > vec;
  vector<struct Dag> dags;
  ASSERT_TRUE(TransCode::Decode(word, uni));
  trie.Find(uni.begin(), uni.end(), dags);
  ASSERT_EQ(dags.size(), uni.size());
  ASSERT_NE(dags.size(), 0u);
  s1 << res;
  s2 << dags[0].nexts;
  ASSERT_EQ(s1, s2);
  
}
extern "C" __declspec(dllexport) void CPUsolveCSRDouble(int *row_offset, int *col, double *val,
	const int nnz, const int N, double *_rhs, double *_x) {

	std::string name = "s";
	LocalVector<double> x;
	LocalVector<double> rhs;
	LocalMatrix<double> mat;

	x.Allocate(name, N);
	x.Zeros();
	rhs.Allocate(name, N);
	mat.AllocateCSR(name, nnz, N, N);
	mat.CopyFromCSR(row_offset, col, val);
	rhs.CopyFromData(_rhs);

	CG<LocalMatrix<double>, LocalVector<double>, double> ls;
	Jacobi<LocalMatrix<double>, LocalVector<double>, double> p;
	ls.SetOperator(mat);
	ls.SetPreconditioner(p);
	ls.Build();
	ls.Solve(rhs, &x);

	x.CopyToData(_x);
	mat.Clear();
	x.Clear();
	rhs.Clear();

	ls.Clear();
}
Пример #4
0
void LocalVector<ValueType>::ScaleAdd2(const ValueType alpha, const LocalVector<ValueType> &x, const ValueType beta, const LocalVector<ValueType> &y, const ValueType gamma) {

    LOG_DEBUG(this, "LocalVector::ScaleAdd2()",
              alpha << " " << beta);

    assert(this->get_size() == x.get_size());
    assert(this->get_size() == y.get_size());
    assert( ( (this->vector_ == this->vector_host_)  && (x.vector_ == x.vector_host_)  && (y.vector_ == y.vector_host_)) ||
            ( (this->vector_ == this->vector_accel_) && (x.vector_ == x.vector_accel_) && (y.vector_ == y.vector_accel_)) );

    if (this->get_size() > 0 ) {
        this->vector_->ScaleAdd2(alpha, *x.vector_, beta, *y.vector_, gamma);
    }

}
Пример #5
0
void LocalVector<ValueType>::PointWiseMult(const LocalVector<ValueType> &x, const LocalVector<ValueType> &y) {

    LOG_DEBUG(this, "LocalVector::(x, y)",
              "");

    assert(this->get_size() == x.get_size());
    assert(this->get_size() == y.get_size());
    assert( ( (this->vector_ == this->vector_host_)  && (x.vector_ == x.vector_host_)  && (y.vector_ == y.vector_host_)) ||
            ( (this->vector_ == this->vector_accel_) && (x.vector_ == x.vector_accel_) && (y.vector_ == y.vector_accel_)) );

    if (this->get_size() > 0 ) {
        this->vector_->PointWiseMult(*x.vector_, *y.vector_);
    }

}
Пример #6
0
TEST(LocalVector, test1) {
  LocalVector<size_t> vec;
  ASSERT_EQ(vec.size(), 0u);
  ASSERT_EQ(vec.capacity(), LOCAL_VECTOR_BUFFER_SIZE);
  ASSERT_TRUE(vec.empty());
  size_t size = 129;
  for(size_t i = 0; i < size; i++) {
    vec.push_back(i);
  }
  ASSERT_EQ(vec.size(), size);
  ASSERT_EQ(vec.capacity(), 256u);
  ASSERT_FALSE(vec.empty());
  LocalVector<size_t> vec2(vec);
  ASSERT_EQ(vec2.capacity(), vec.capacity());
  ASSERT_EQ(vec2.size(), vec.size());
}
Пример #7
0
void LocalVector<ValueType>::CopyFromPermute(const LocalVector<ValueType> &src,
                                             const LocalVector<int> &permutation) {

    LOG_DEBUG(this, "LocalVector::CopyFromPermute()",
              "");

    assert(&src != NULL);
    assert(&src != this);
    assert(&permutation != NULL);
    assert(permutation.get_size() == this->get_size());
    assert(this->get_size() == src.get_size());
    assert( ( (this->vector_ == this->vector_host_)  && (src.vector_ == src.vector_host_)  && (permutation.vector_ == permutation.vector_host_)) ||
            ( (this->vector_ == this->vector_accel_) && (src.vector_ == src.vector_accel_) && (permutation.vector_ == permutation.vector_accel_)) );

    if (this->get_size() > 0 ) {
        this->vector_->CopyFromPermute(*src.vector_, *permutation.vector_);
    }

}
Пример #8
0
std::pair<LocalPoint,LocalVector> secondOrderAccurate(
						     const GlobalPoint& startingPos,
						     const GlobalVector& startingDir,
						     double rho, const Plane& plane)
{

  typedef Basic2DVector<float>  Vector2D;

  // translate problem to local frame of the plane
  LocalPoint lPos = plane.toLocal(startingPos);
  LocalVector lDir = plane.toLocal(startingDir);

  LocalVector yPrime = plane.toLocal( GlobalVector(0,0,1.f));
  float sinPhi=0, cosPhi=0;
  Vector2D pos;
  Vector2D dir;

  sinPhi = yPrime.y();
  cosPhi = yPrime.x();
  pos = Vector2D( lPos.x()*cosPhi + lPos.y()*sinPhi,
		  -lPos.x()*sinPhi + lPos.y()*cosPhi);
  dir = Vector2D( lDir.x()*cosPhi + lDir.y()*sinPhi,
		    -lDir.x()*sinPhi + lDir.y()*cosPhi);

  double d = -lPos.z();
  double x = pos.x() + dir.x()/lDir.z()*d - 0.5*rho*d*d;
  double y = pos.y() + dir.y()/lDir.z()*d;


  LocalPoint thePos( x*cosPhi - y*sinPhi,
		     x*sinPhi + y*cosPhi, 0);
  float px = dir.x()+rho*d;
  LocalVector theDir( px*cosPhi - dir.y()*sinPhi,
		     px*sinPhi + dir.y()*cosPhi, lDir.z());
  
  return std::pair<LocalPoint,LocalVector>(thePos,theDir);

}
Пример #9
0
void LocalVector<ValueType>::ScaleAdd(const ValueType alpha, const LocalVector<ValueType> &x) {

    LOG_DEBUG(this, "LocalVector::ScaleAdd()",
              alpha);

    assert(this->get_size() == x.get_size());
    assert( ( (this->vector_ == this->vector_host_)  && (x.vector_ == x.vector_host_)) ||
            ( (this->vector_ == this->vector_accel_) && (x.vector_ == x.vector_accel_)) );

    if (this->get_size() > 0 ) {
        this->vector_->ScaleAdd(alpha, *x.vector_);
    }

}
Пример #10
0
void LocalVector<ValueType>::PartialSum(const LocalVector<ValueType> &x) {

    LOG_DEBUG(this, "LocalVector::PartialSum()",
              "");

    assert(this->get_size() == x.get_size());
    assert( ( (this->vector_ == this->vector_host_)  && (x.vector_ == x.vector_host_)) ||
            ( (this->vector_ == this->vector_accel_) && (x.vector_ == x.vector_accel_)) );

    if (this->get_size() > 0 ) {
        this->vector_->PartialSum(*x.vector_);
    }

}
Пример #11
0
void LocalVector<ValueType>::PermuteBackward(const LocalVector<int> &permutation) {

    LOG_DEBUG(this, "LocalVector::PermuteBackward()",
              "");

    assert(&permutation != NULL);
    assert(permutation.get_size() == this->get_size());
    assert( ( (this->vector_ == this->vector_host_)  && (permutation.vector_ == permutation.vector_host_)) ||
            ( (this->vector_ == this->vector_accel_) && (permutation.vector_ == permutation.vector_accel_)) );

    if (this->get_size() > 0 ) {
        this->vector_->PermuteBackward(*permutation.vector_);
    }

}
Пример #12
0
ValueType LocalVector<ValueType>::DotNonConj(const LocalVector<ValueType> &x) const {

    LOG_DEBUG(this, "LocalVector::DotNonConj()",
              "");

    assert(this->get_size() == x.get_size());
    assert( ( (this->vector_ == this->vector_host_)  && (x.vector_ == x.vector_host_)) ||
            ( (this->vector_ == this->vector_accel_) && (x.vector_ == x.vector_accel_)) );

    if (this->get_size() > 0 ) {
        return this->vector_->DotNonConj(*x.vector_);
    } else {
        return ValueType(0.0);
    }

}
Пример #13
0
void FV1InnerBoundaryElemDisc<TDomain>::
add_jac_A_elem(LocalMatrix& J, const LocalVector& u, GridObject* elem, const MathVector<dim> vCornerCoords[])
{
	// get finite volume geometry
	const static TFVGeom& fvgeom = GeomProvider<TFVGeom>::get();

	for (size_t i = 0; i < fvgeom.num_bf(); ++i)
	{
		// get current BF
		const typename TFVGeom::BF& bf = fvgeom.bf(i);

		// get associated node and subset index
		const int co = bf.node_id();
		int si = fvgeom.subset_index();

		// get solution at the corner of the bf
		size_t nFct = u.num_fct();
		std::vector<LocalVector::value_type> uAtCorner(nFct);
		for (size_t fct = 0; fct < nFct; fct++)
			uAtCorner[fct] = u(fct,co);

		// get corner coordinates
		const MathVector<dim>& cc = bf.global_corner(0);

		FluxDerivCond fdc;
		if (!fluxDensityDerivFct(uAtCorner, elem, cc, si, fdc))
			UG_THROW("FV1InnerBoundaryElemDisc::add_jac_A_elem:"
							" Call to fluxDensityDerivFct resulted did not succeed.");
		
		// scale with volume of BF
		for (size_t j=0; j<fdc.fluxDeriv.size(); j++)
			for (size_t k=0; k<fdc.fluxDeriv[j].size(); k++)
				fdc.fluxDeriv[j][k].second *= bf.volume();
		
		// add to Jacobian
		for (size_t j=0; j<fdc.fluxDeriv.size(); j++)
		{
			for (size_t k=0; k<fdc.fluxDeriv[j].size(); k++)
			{
				if (fdc.from[j] != InnerBoundaryConstants::_IGNORE_)
					J(fdc.from[j], co, fdc.fluxDeriv[j][k].first, co) += fdc.fluxDeriv[j][k].second;
				if (fdc.to[j] != InnerBoundaryConstants::_IGNORE_)
					J(fdc.to[j], co, fdc.fluxDeriv[j][k].first, co)	-= fdc.fluxDeriv[j][k].second;
			}
		}	
	}
}
Пример #14
0
void FV1InnerBoundaryElemDisc<TDomain>::
add_def_A_elem(LocalVector& d, const LocalVector& u, GridObject* elem, const MathVector<dim> vCornerCoords[])
{
	// get finite volume geometry
	static TFVGeom& fvgeom = GeomProvider<TFVGeom>::get();

	// loop Boundary Faces
	for (size_t i = 0; i < fvgeom.num_bf(); ++i)
	{
		// get current BF
		const typename TFVGeom::BF& bf = fvgeom.bf(i);
		
		// get associated node and subset index
		const int co = bf.node_id();
		int si = fvgeom.subset_index();

		// get solution at the corner of the bf
		size_t nFct = u.num_fct();
		std::vector<LocalVector::value_type> uAtCorner(nFct);
		for (size_t fct = 0; fct < nFct; fct++)
			uAtCorner[fct] = u(fct,co);

		// get corner coordinates
		const MathVector<dim>& cc = bf.global_corner(0);

		// get flux densities in that node
		FluxCond fc;
		if (!fluxDensityFct(uAtCorner, elem, cc, si, fc))
		{
			UG_THROW("FV1InnerBoundaryElemDisc::add_def_A_elem:"
						" Call to fluxDensityFct did not succeed.");
		}

		// scale with volume of BF
		for (size_t j=0; j<fc.flux.size(); j++) fc.flux[j] *= bf.volume();

		// add to defect
		for (size_t j=0; j<fc.flux.size(); j++)
		{
			if (fc.from[j] != InnerBoundaryConstants::_IGNORE_) d(fc.from[j], co) += fc.flux[j];
			if (fc.to[j] != InnerBoundaryConstants::_IGNORE_) d(fc.to[j], co) -= fc.flux[j];
		}
	}
}
Пример #15
0
void LocalVector<ValueType>::CopyFrom(const LocalVector<ValueType> &src,
                                      const int src_offset,
                                      const int dst_offset,
                                      const int size) {

    LOG_DEBUG(this, "LocalVector::CopyFrom()",
              "src_offset=" << src_offset <<
              " dst_offset=" << dst_offset <<
              " size=" << size);

    assert(&src != NULL);
    assert(&src != this);
    assert(src_offset < src.get_size());
    assert(dst_offset < this->get_size());

    assert( ( (this->vector_ == this->vector_host_)  && (src.vector_ == src.vector_host_)) ||
            ( (this->vector_ == this->vector_accel_) && (src.vector_ == src.vector_accel_)) );

    this->vector_->CopyFrom(*src.vector_, src_offset, dst_offset, size);

}
Пример #16
0
void LocalVector<ValueType>::ScaleAddScale(const ValueType alpha, const LocalVector<ValueType> &x, const ValueType beta,
                                           const int src_offset,
                                           const int dst_offset,
                                           const int size) {

    LOG_DEBUG(this, "LocalVector::ScaleAddScale()",
              alpha << " " << beta << " " <<
              "src_offset=" << src_offset <<
              " dst_offset=" << dst_offset <<
              " size=" << size);

    assert(&x != NULL);
    assert(src_offset < x.get_size());
    assert(dst_offset < this->get_size());
    assert( ( (this->vector_ == this->vector_host_)  && (x.vector_ == x.vector_host_)) ||
            ( (this->vector_ == this->vector_accel_) && (x.vector_ == x.vector_accel_)) );

    if (this->get_size() > 0 ) {
        this->vector_->ScaleAddScale(alpha, *x.vector_, beta,
                                     src_offset, dst_offset, size);
    }

}
Пример #17
0
int main(int argc, char* argv[]) {

  init_paralution();

  info_paralution();

  LocalVector<double> x;
  LocalVector<double> rhs;

  LocalStencil<double> stencil(Laplace2D);

  stencil.SetGrid(100); // 100x100

  x.Allocate("x", stencil.get_nrow());
  rhs.Allocate("rhs", stencil.get_nrow());

  // Linear Solver
  CG<LocalStencil<double>, LocalVector<double>, double > ls;

  rhs.Ones();
  x.Zeros(); 

  ls.SetOperator(stencil);

  ls.Build();

  stencil.info();

  double tick, tack;
  tick = paralution_time();

  ls.Solve(rhs, &x);

  tack = paralution_time();
  std::cout << "Solver execution:" << (tack-tick)/1000000 << " sec" << std::endl;

  ls.Clear();

  stop_paralution();

  return 0;
}
Пример #18
0
int main(int argc, char* argv[]) {

  if (argc == 1) { 
    std::cerr << argv[0] << " <matrix> <initial_guess> <rhs> [Num threads]" << std::endl;
    exit(1);
  }

  init_paralution();

//   if (argc > 4) {
//     set_omp_threads_paralution(atoi(argv[5]));
//   } 
  set_omp_threads_paralution(8);
  
  info_paralution();

  struct timeval now;
  double tick, tack, b=0.0f,s=0.0f, lprep=0.0f, sol_norm, diff_norm, ones_norm;
  double *phi_ptr=NULL;
  int *bubmap_ptr=NULL, phisize, maxbmap, setlssd, lvst_offst;
  int xdim, ydim, zdim, defvex_perdirec, defvex_perdirec_y, defvex_perdirec_z;
  DPCG<LocalMatrix<double>, LocalVector<double>, double > ls;
#ifdef BUBFLO  
  xdim=atoi(argv[5]);
  setlssd=atoi(argv[6]);
  defvex_perdirec=atoi(argv[7]);
  lvst_offst=atoi(argv[8]);
  phisize=(xdim+2*lvst_offst)*(ydim+2*lvst_offst)*(zdim+2*lvst_offst);
#endif  
  LocalVector<double> x;
  LocalVector<double>refsol;
  LocalVector<double>refones;
  LocalVector<double>chk_r;
  LocalVector<double> rhs;
  LocalMatrix<double> mat;
  LocalVector<double> Dinvhalf_min;
  LocalVector<double> Dinvhalf_plus;
#ifdef GUUS  
  LocalMatrix<double> Zin;
#endif  
  mat.ReadFileMTX(std::string(argv[1]));
  mat.info();
#ifdef GUUS  
  Zin.ReadFileMTX(std::string(argv[2]));
  Zin.info();
  
#endif  
  x.Allocate("x", mat.get_nrow());
  refsol.Allocate("refsol", mat.get_nrow());
  refones.Allocate("refones", mat.get_nrow());
  rhs.Allocate("rhs", mat.get_nrow());
  chk_r.Allocate("chk_r", mat.get_nrow());
#ifdef BUBFLO
   x.ReadFileASCII(std::string(argv[2]));
#endif   
  rhs.ReadFileASCII(std::string(argv[3]));
#ifdef GUUS
  x.SetRandom(0.0,1.0,1000);
  refsol.ReadFileASCII(std::string(argv[4]));
  refones.Ones();
#endif  

  
  //refsol.Ones();
// 
//   // Uncomment for GPU
#ifdef GPURUN  
  mat.MoveToAccelerator();
  x.MoveToAccelerator();
  rhs.MoveToAccelerator();
  chk_r.MoveToAccelerator();
  Dinvhalf_min.MoveToAccelerator();
  Dinvhalf_plus.MoveToAccelerator();
  
#endif  
  
  gettimeofday(&now, NULL);
  tick = now.tv_sec*1000000.0+(now.tv_usec);

#ifdef BUBFLO  
  if(setlssd){
    LocalVector<double> phi;
    LocalVector<int> bubmap;
    phi.Allocate("PHI", phisize);
    bubmap.Allocate("bubmap",mat.get_nrow());
    phi.ReadFileASCII(std::string(argv[4]));
    
    bubmap.LeaveDataPtr(&bubmap_ptr);
    phi.LeaveDataPtr(&phi_ptr);

    bubmap_create(phi_ptr, bubmap_ptr, xdim, xdim, xdim, mat.get_nrow(), &maxbmap, lvst_offst);
    phi.Clear();
    
  }
  ls.Setxdim(xdim);
  ls.SetNVectors_eachdirec(defvex_perdirec+1, defvex_perdirec+2, defvex_perdirec+3);
  ls.Set_alldims(xdim, xdim, xdim);
  ls.Setlvst_offst(lvst_offst);
  ls.SetNVectors(defvex_perdirec);
  ls.SetZlssd(setlssd);
#endif  
  gettimeofday(&now, NULL);
  tack = now.tv_sec*1000000.0+(now.tv_usec);
  lprep=(tack-tick)/1000000;
  std::cout << "levelset_prep" << lprep << " sec" << std::endl;
  // Linear Solver
//   return 0;
  gettimeofday(&now, NULL);
  tick = now.tv_sec*1000000.0+(now.tv_usec);
 
#ifdef SCALIN
  
  mat.ExtractInverseDiagonal_sqrt(&Dinvhalf_min, -1);
  mat.ExtractInverseDiagonal_sqrt(&Dinvhalf_plus, 1);
  
  mat.DiagonalMatrixMult(Dinvhalf_min);
  mat.DiagonalMatrixMult_fromL(Dinvhalf_min);
  
  //x.PointWiseMult(Dinvhalf_plus);
  rhs.PointWiseMult(Dinvhalf_min);
//   rhs.Scale(0.3);
#endif
#ifdef GUUS  
   ls.SetZ(Zin);
#endif   
  ls.SetOperator(mat);
  ls.Init(0.0, 1e-6, 1e8, 200000);
//  ls.RecordResidualHistory();
  

#ifdef BUBFLO
  ls.MakeZ_CSR(); // requires xdim_ and novecni_ and zlssd_ to be set
  if(setlssd)
    ls.MakeZLSSD(bubmap_ptr, maxbmap); // bubmap must be ready and maxbmap available
#endif    
//   
//  stop_paralution();
//  return 0;
  ls.Build();
#ifdef MATDIA  
   mat.ConvertToDIA();
#endif  

  gettimeofday(&now, NULL);
  tack = now.tv_sec*1000000.0+(now.tv_usec);
  b=(tack-tick)/1000000;
  std::cout << "Building:" << b+lprep << " sec" << std::endl;
  
//   ls.Verbose(2);

  mat.info();

  gettimeofday(&now, NULL);
  tick = now.tv_sec*1000000.0+(now.tv_usec);

  ls.Solve(rhs, &x);

  gettimeofday(&now, NULL);
  tack = now.tv_sec*1000000.0+(now.tv_usec);
  s=(tack-tick)/1000000;
  std::cout << "Solver execution:" << s << " sec" << std::endl;
  std::cout << "Total execution:" << s+b << " sec" << std::endl;
  
#ifdef SCALIN
  x.PointWiseMult(Dinvhalf_min);
#endif

//   

  
#ifdef GUUS  
//   x.WriteFileASCII("x_solution_shell_inv_neumann.rec");
  //ls.RecordHistory("res__ongpu_tns.rec");
  x.MoveToHost();
  x.WriteFileASCII("x_neumann.rec");
  x.MoveToAccelerator();
  sol_norm=x.Norm();
  mat.Apply(x, &chk_r); 
  chk_r.ScaleAdd(double(-1.0), rhs);
  cout<<"\n Real Residual Norm is "<<chk_r.Norm();
  cout<<"\n Norm of Solution is "<<sol_norm<<endl;
  cout<<"\n Norm of Reference Solution is "<<refsol.Norm()<<endl;
  cout<<"\n Norm of Ones is "<<refones.Norm()<<endl;
  x.MoveToHost();
  refones.AddScale(x,(double)-1.0f);
  x.AddScale(refsol,(double)-1.0f);
  diff_norm=x.Norm();
  ones_norm=refones.Norm();
  cout<<"\n Relative Norm of Calculated Solution w.r.t. Reference is "<<((double)diff_norm/(double)sol_norm)<<endl;
  cout<<"\n Relative Norm of Calculated Solution w.r.t. Ones is "<<((double)ones_norm/(double)sol_norm)<<endl;
#endif  
  ls.Clear();
  


  stop_paralution();

  return 0;
}
Пример #19
0
int main(int argc, char* argv[]) {

  if (argc == 1) { 
    std::cerr << argv[0] << " <matrix> [Num threads]" << std::endl;
    exit(1);
  }

  init_paralution();

  if (argc > 2) {
    set_omp_threads_paralution(atoi(argv[2]));
  } 

  info_paralution();

  LocalVector<double> x;
  LocalVector<double> rhs;

  LocalMatrix<double> mat;

  mat.ReadFileMTX(std::string(argv[1]));

  // Compute and apply (R)CMK ordering
  LocalVector<int> cmk;
  //  mat.CMK(&cmk);
  mat.RCMK(&cmk);
  mat.Permute(cmk);

  mat.MoveToAccelerator();
  x.MoveToAccelerator();
  rhs.MoveToAccelerator();

  x.Allocate("x", mat.get_nrow());
  rhs.Allocate("rhs", mat.get_nrow());

  // Linear Solver
  CG<LocalMatrix<double>, LocalVector<double>, double > ls;

  // Preconditioner
  ILU<LocalMatrix<double>, LocalVector<double>, double > p;

  double tick, tack;

  rhs.Ones();
  x.Zeros(); 

  ls.SetOperator(mat);
  ls.SetPreconditioner(p);

  ls.Build();

  mat.info();

  tick = paralution_time();

  ls.Solve(rhs, &x);

  tack = paralution_time();
  std::cout << "Solver execution:" << (tack-tick)/1000000 << " sec" << std::endl;

  // Revert CMK ordering on solution vector
  x.PermuteBackward(cmk);

  stop_paralution();

  return 0;
}
Пример #20
0
int main(int argc, char* argv[]) {

    if (argc == 1) {
        std::cerr << argv[0] << " <matrix> [Num threads]" << std::endl;
        exit(1);
    }

    init_paralution();

    if (argc > 2) {
        set_omp_threads_paralution(atoi(argv[2]));
    }

    info_paralution();
    //    int ii;

    LocalVector<double> x;
    LocalVector<double> rhs;

    LocalMatrix<double> mat;

    struct timeval ti1,ti2;//timer

    mat.ReadFileMTX(std::string(argv[1]));
    mat.info();

    x.Allocate("x", mat.get_nrow());
    rhs.Allocate("rhs", mat.get_nrow());

    x.info();
    rhs.info();

    rhs.Ones();

    gettimeofday(&ti1,NULL); /* read starttime in t1 */
    mat.Apply(rhs, &x);
    gettimeofday(&ti2,NULL); /* read endtime in t2 */

    fflush(stderr);
    fprintf(stderr, "\nTime cost host spmv code microseconds: %ld microseconds\n",
            ((ti2.tv_sec - ti1.tv_sec)*1000000L
             +ti2.tv_usec) - ti1.tv_usec
            );

    std::cout << "\ndot=" << x.Dot(rhs) << std::endl;

    mat.ConvertToBCSR();
    mat.info();

    mat.MoveToAccelerator();
    x.MoveToAccelerator();
    rhs.MoveToAccelerator();
    mat.info();

    rhs.Ones();
//    exit(1);

    gettimeofday(&ti1,NULL); /* read starttime in t1 */
    mat.Apply(rhs, &x);
    gettimeofday(&ti2,NULL); /* read endtime in t2 */

    fflush(stderr);
    fprintf(stderr, "\nTime cost for accelerator spmv  microseconds: %ld microseconds\n",
            ((ti2.tv_sec - ti1.tv_sec)*1000000L
             +ti2.tv_usec) - ti1.tv_usec
            );

    std::cout << "\ndot=" << x.Dot(rhs) << std::endl;

    stop_paralution();

    return 0;
}
Пример #21
0
void FV1InnerBoundaryElemDisc<TDomain>::
compute_err_est_A_elem(const LocalVector& u, GridObject* elem, const MathVector<dim> vCornerCoords[], const number& scale)
{
	// get error estimator
	err_est_type* err_est_data = dynamic_cast<err_est_type*>(this->m_spErrEstData.get());

	// cast this elem to side_type of error estimator
	typename SideAndElemErrEstData<TDomain>::side_type* side =
		dynamic_cast<typename SideAndElemErrEstData<TDomain>::side_type*>(elem);
	if (!side)
	{
		UG_THROW("Error in DependentNeumannBoundaryFV1<TDomain>::compute_err_est_A_elem():\n"
				 "Element that error assembling routine is called for has the wrong type.");
	}

	// global IPs
	ReferenceObjectID roid = side->reference_object_id();
	size_t numSideIPs = err_est_data->get(0)->num_side_ips(roid);
	MathVector<dim>* globIPs = err_est_data->get(0)->side_global_ips(side, vCornerCoords);

	// loop IPs
	try
	{
		for (size_t sip = 0; sip < numSideIPs; sip++)
		{
			// get values of u at ip (interpolate)
			size_t nFct = u.num_fct();
			std::vector<LocalVector::value_type> uAtIP = std::vector<LocalVector::value_type>(nFct);

			for (size_t fct = 0; fct < nFct; fct++)
			{
				uAtIP[fct] = 0.0;
				for (size_t sh = 0; sh < m_shapeValues.num_sh(); sh++)
					uAtIP[fct] += u(fct,sh) * m_shapeValues.shapeAtSideIP(sh,sip);
			}

			// ip coordinates
			const MathVector<dim>& ipCoords = globIPs[sip];

			// elem subset
			int si = this->subset_handler().get_subset_index(side);

			FluxCond fc;
			if (!fluxDensityFct(uAtIP, elem, ipCoords, si, fc))
			{
				UG_THROW("FV1InnerBoundaryElemDisc::compute_err_est_A_elem:"
							" Call to fluxDensityFct did not succeed.");
			}

			// subtract from estimator values
			// sign must be opposite of that in add_def_A_elem
			// as the difference between this and the actual flux of the unknown is calculated
			for (size_t j=0; j<fc.flux.size(); j++)
			{
				if (fc.from[j] != InnerBoundaryConstants::_IGNORE_)
					(*err_est_data->get(this->m_fctGrp[fc.from[j]])) (side,sip) -= scale * fc.flux[j];
				if (fc.to[j] != InnerBoundaryConstants::_IGNORE_)
					(*err_est_data->get(this->m_fctGrp[fc.to[j]])) (side,sip) += scale * fc.flux[j];
			}
		}
	}
	UG_CATCH_THROW("Values for the error estimator could not be assembled at every IP." << std::endl
			<< "Maybe wrong type of ErrEstData object? This implementation needs: MultipleSideAndElemErrEstData.");
}
int main(int argc, char* argv[]) {

  if (argc == 1) { 
    std::cerr << argv[0] << " <matrix> [Num threads]" << std::endl;
    exit(1);
  }

  init_paralution();

  if (argc > 2) {
    set_omp_threads_paralution(atoi(argv[2]));
  } 

  info_paralution();

  LocalVector<double> x;
  LocalVector<double> rhs;

  LocalMatrix<double> mat;


  mat.ReadFileMTX(std::string(argv[1]));
  mat.info();

  x.Allocate("x", mat.get_nrow());
  rhs.Allocate("rhs", mat.get_nrow());

  x.info();
  rhs.info();

  rhs.Ones();
  
  mat.Apply(rhs, &x);

  std::cout << "dot=" << x.Dot(rhs) << std::endl;

  mat.ConvertToELL();
  mat.info();

  mat.MoveToAccelerator();
  x.MoveToAccelerator();
  rhs.MoveToAccelerator();
  mat.info();

  rhs.Ones();
  
  mat.Apply(rhs, &x);

  std::cout << "dot=" << x.Dot(rhs) << std::endl;

  stop_paralution();

  return 0;
}
Пример #23
0
int main(int argc, char* argv[]) {

  if (argc == 1) { 
    std::cerr << argv[0] << " <matrix> [Num threads]" << std::endl;
    exit(1);
  }

  init_paralution();

  if (argc > 2) {
    set_omp_threads_paralution(atoi(argv[2]));
  } 

  info_paralution();

  LocalVector<double> b, b_old, *b_k, *b_k1, *b_tmp;
  LocalMatrix<double> mat;

  mat.ReadFileMTX(std::string(argv[1]));

  // Gershgorin spectrum approximation
  double glambda_min, glambda_max;

  // Power method spectrum approximation
  double plambda_min, plambda_max;

  // Maximum number of iteration for the power method
  int iter_max = 10000;

  double tick, tack;

  // Gershgorin approximation of the eigenvalues
  mat.Gershgorin(glambda_min, glambda_max);
  std::cout << "Gershgorin : Lambda min = " << glambda_min
            << "; Lambda max = " << glambda_max << std::endl;


  mat.MoveToAccelerator();
  b.MoveToAccelerator();
  b_old.MoveToAccelerator();


  b.Allocate("b_k+1", mat.get_nrow());
  b_k1 = &b;

  b_old.Allocate("b_k", mat.get_nrow());
  b_k = &b_old;  

  b_k->Ones();

  mat.info();

  tick = paralution_time();

  // compute lambda max
  for (int i=0; i<=iter_max; ++i) {

    mat.Apply(*b_k, b_k1);

    //    std::cout << b_k1->Dot(*b_k) << std::endl;
    b_k1->Scale(double(1.0)/b_k1->Norm());

    b_tmp = b_k1;
    b_k1 = b_k;
    b_k = b_tmp;

  }

  // get lambda max (Rayleigh quotient)
  mat.Apply(*b_k, b_k1);
  plambda_max = b_k1->Dot(*b_k) ;

  tack = paralution_time();
  std::cout << "Power method (lambda max) execution:" << (tack-tick)/1000000 << " sec" << std::endl;

  mat.AddScalarDiagonal(double(-1.0)*plambda_max);


  b_k->Ones();

  tick = paralution_time();

  // compute lambda min
  for (int i=0; i<=iter_max; ++i) {

    mat.Apply(*b_k, b_k1);

    //    std::cout << b_k1->Dot(*b_k) + plambda_max << std::endl;
    b_k1->Scale(double(1.0)/b_k1->Norm());

    b_tmp = b_k1;
    b_k1 = b_k;
    b_k = b_tmp;

  }

  // get lambda min (Rayleigh quotient)
  mat.Apply(*b_k, b_k1);
  plambda_min = (b_k1->Dot(*b_k) + plambda_max);

  // back to the original matrix
  mat.AddScalarDiagonal(plambda_max);

  tack = paralution_time();
  std::cout << "Power method (lambda min) execution:" << (tack-tick)/1000000 << " sec" << std::endl;


  std::cout << "Power method Lambda min = " << plambda_min
            << "; Lambda max = " << plambda_max 
            << "; iter=2x" << iter_max << std::endl;

  LocalVector<double> x;
  LocalVector<double> rhs;

  x.CloneBackend(mat);
  rhs.CloneBackend(mat);

  x.Allocate("x", mat.get_nrow());
  rhs.Allocate("rhs", mat.get_nrow());

  // Chebyshev iteration
  Chebyshev<LocalMatrix<double>, LocalVector<double>, double > ls;

  rhs.Ones();
  x.Zeros(); 

  ls.SetOperator(mat);

  ls.Set(plambda_min, plambda_max);

  ls.Build();

  tick = paralution_time();

  ls.Solve(rhs, &x);

  tack = paralution_time();
  std::cout << "Solver execution:" << (tack-tick)/1000000 << " sec" << std::endl;

  // PCG + Chebyshev polynomial
  CG<LocalMatrix<double>, LocalVector<double>, double > cg;
  AIChebyshev<LocalMatrix<double>, LocalVector<double>, double > p;

  // damping factor
  plambda_min = plambda_max / 7;
  p.Set(3, plambda_min, plambda_max);
  rhs.Ones();
  x.Zeros(); 

  cg.SetOperator(mat);
  cg.SetPreconditioner(p);

  cg.Build();

  tick = paralution_time();

  cg.Solve(rhs, &x);

  tack = paralution_time();
  std::cout << "Solver execution:" << (tack-tick)/1000000 << " sec" << std::endl;

  stop_paralution();

  return 0;
}
int main(int argc, char* argv[]) {

  if (argc == 1) { 
    std::cerr << argv[0] << " <matrix> <initial_guess> <rhs> [Num threads]" << std::endl;
    exit(1);
  }

  init_paralution();

//   if (argc > 4) {
//     set_omp_threads_paralution(atoi(argv[]));
//   } 
  set_omp_threads_paralution(8);
  info_paralution();

  struct timeval now;
  double tick, tack, b,s, sol_norm, diff_norm, ones_norm;
  double *phi_ptr=NULL;
  int *bubmap_ptr=NULL, phisize, maxbmap, setlssd, lvst_offst;
  int xdim, ydim, zdim, defvex_perdirec;
#ifdef BUBFLO
  xdim=atoi(argv[5]);
  setlssd=atoi(argv[6]);
  defvex_perdirec=atoi(argv[7]);
  lvst_offst=atoi(argv[8]);
  phisize=(xdim+2*lvst_offst)*(ydim+2*lvst_offst)*(zdim+2*lvst_offst);
#endif  
  LocalVector<double> x;
  
  LocalVector<double> rhs;
  LocalMatrix<double> mat;
  LocalVector<double> Dinvhalf_min;
  LocalVector<double> Dinvhalf_plus;
#ifdef GUUS  
  LocalMatrix<double> Zin;
  LocalVector<double> refsol;
  LocalVector<double> refones;
#endif  
  mat.ReadFileMTX(std::string(argv[1]));
  mat.info();
#ifdef GUUS  
  Zin.ReadFileMTX(std::string(argv[2]));
  Zin.info();
  refsol.Allocate("refsol", mat.get_nrow());
  refones.Allocate("refones", mat.get_nrow());
  //refsol.Ones();
  refsol.ReadFileASCII(std::string(argv[4]));
  refones.Ones();
#endif  
  x.Allocate("x", mat.get_nrow());
  rhs.Allocate("rhs", mat.get_nrow());
  
  
  // Linear Solver
  DPCG<LocalMatrix<double>, LocalVector<double>, double > ls;
  MultiElimination<LocalMatrix<double>, LocalVector<double>, double > p;
  Jacobi<LocalMatrix<double>, LocalVector<double>, double > j_p;
  MultiColoredILU<LocalMatrix<double>, LocalVector<double>, double > mcilu_p;
  ILU<LocalMatrix<double>, LocalVector<double>, double > ilu_p;
  MultiColoredSGS<LocalMatrix<double>, LocalVector<double>, double > mcsgs_p;
  FSAI<LocalMatrix <double>, LocalVector<double>, double > fsai_p ;
  SPAI<LocalMatrix <double>, LocalVector <double>, double > spai_p ;
  


#ifdef GPURUN  
  mat.MoveToAccelerator();
  x.MoveToAccelerator();
  rhs.MoveToAccelerator();
#endif  
  
#ifdef SCALIN
  mat.ExtractInverseDiagonal_sqrt(&Dinvhalf_min, -1);
  mat.ExtractInverseDiagonal_sqrt(&Dinvhalf_plus, 1);
  
  mat.DiagonalMatrixMult(Dinvhalf_min);
  mat.DiagonalMatrixMult_fromL(Dinvhalf_min);
  
  //x.PointWiseMult(Dinvhalf_plus);
  rhs.PointWiseMult(Dinvhalf_min);
#endif
  
    /////////////////////////////////////////////////////////////////  
   std::cout << "-----------------------------------------------" << std::endl;
   std::cout << "DPCG solver MCSGS" << std::endl;
 #ifdef GUUS
   rhs.ReadFileASCII(std::string(argv[3]));
   x.SetRandom(0.0,1.0,1000);
   ls.SetZ(Zin);
 #endif
   
 #ifdef BUBFLO  
   x.ReadFileASCII(std::string(argv[2]));
   rhs.ReadFileASCII(std::string(argv[3]));
 #endif
 
   gettimeofday(&now, NULL);
   tick = now.tv_sec*1000000.0+(now.tv_usec);
   
 #ifdef BUBFLO  
   if(setlssd){
     LocalVector<double> phi;
     LocalVector<int> bubmap;
     phi.Allocate("PHI", phisize);
     bubmap.Allocate("bubmap",mat.get_nrow());
     phi.ReadFileASCII(std::string(argv[4]));
     
     bubmap.LeaveDataPtr(&bubmap_ptr);
     phi.LeaveDataPtr(&phi_ptr);
 
     x.SetRandom(0.0,1.0,1000);
     bubmap_create(phi_ptr, bubmap_ptr, xdim, xdim, xdim, mat.get_nrow(), &maxbmap, lvst_offst);
     phi.Clear();
     
   }
   ls.Setxdim(xdim);
   ls.SetNVectors(defvex_perdirec);
   ls.Setlvst_offst(lvst_offst);
   ls.SetZlssd(setlssd);
   mat.ConvertToCSR();  
 #endif
   
   ls.SetOperator(mat);
   ls.SetPreconditioner (mcsgs_p) ;
   mcsgs_p.SetPrecondMatrixFormat(HYB);
   
  
   ls.Init(0.0, 1e-6, 1e8, 200000);
 #ifdef BUBFLO  	
  ls.MakeZ_CSR(); // requires xdim_ and novecni_ and zlssd_ to be set
   if(setlssd)
     ls.MakeZLSSD(bubmap_ptr, maxbmap); // bubmap must be ready and maxbmap available	
 #endif
     
   
 
   ls.Build();
 #ifdef MATDIA  
   mat.ConvertToDIA();
 #endif  
   gettimeofday(&now, NULL);
   tack = now.tv_sec*1000000.0+(now.tv_usec);
   b=(tack-tick)/1000000;
  std::cout << "Building:" << b << " sec" << std::endl;
   
 
   mat.info();
 
   gettimeofday(&now, NULL);
   tick = now.tv_sec*1000000.0+(now.tv_usec);
 
   ls.Solve(rhs, &x);
 
   gettimeofday(&now, NULL);
   tack = now.tv_sec*1000000.0+(now.tv_usec);
   s= (tack-tick)/1000000;
   std::cout << "Solver execution:" << s << " sec" << std::endl;
   std::cout << "Total execution:" << s+b << " sec" << std::endl;
 #ifdef GUUS 
   x.MoveToHost();
   sol_norm=x.Norm();
   cout<<"\n Norm of Solution is "<<sol_norm<<endl;
   cout<<"\n Norm of Reference Solution is "<<refsol.Norm()<<endl;
   refones.AddScale(x,(double)-1.0f);
   x.AddScale(refsol,(double)-1.0f);
   
   
   diff_norm=x.Norm();
   ones_norm=refones.Norm();
   cout<<"\n Relative Norm of Calculated Solution w.r.t. Reference is "<<((double)diff_norm/(double)sol_norm)<<endl;
   cout<<"\n Relative Norm of Calculated Solution w.r.t. Ones is "<<((double)ones_norm/(double)sol_norm)<<endl;
 #endif  
   //x.WriteFileASCII("x_solution1e3shell_ilu01.rec");
  ls.Clear();
  /////////////////////////////////////////////////////////////////  
 
  /////////////////////////////////////////////////////////////////  
  std::cout << "-----------------------------------------------" << std::endl;
  std::cout << "DPCG solver FSAI" << std::endl;
  refones.Ones();
#ifdef GUUS  
  rhs.ReadFileASCII(std::string(argv[3]));
  x.SetRandom(0.0,1.0,1000);
  ls.SetZ(Zin);
#endif

  
#ifdef BUBFLO  
  x.ReadFileASCII(std::string(argv[2]));
  rhs.ReadFileASCII(std::string(argv[3]));
#endif

  gettimeofday(&now, NULL);
  tick = now.tv_sec*1000000.0+(now.tv_usec);
  
#ifdef BUBFLO  
  if(setlssd){
    LocalVector<double> phi;
    LocalVector<int> bubmap;
    phi.Allocate("PHI", phisize);
    bubmap.Allocate("bubmap",mat.get_nrow());
    phi.ReadFileASCII(std::string(argv[4]));
    
    bubmap.LeaveDataPtr(&bubmap_ptr);
    phi.LeaveDataPtr(&phi_ptr);

    //x.SetRandom(0.0,1.0,1000);
    bubmap_create(phi_ptr, bubmap_ptr, xdim, xdim, xdim, mat.get_nrow(), &maxbmap, lvst_offst);
    phi.Clear();
    
  }
  ls.Setxdim(xdim);
  ls.SetNVectors(defvex_perdirec);
  ls.SetZlssd(setlssd);
  mat.ConvertToCSR();  
#endif    
  fsai_p.Set (2) ;
  
  ls.SetOperator(mat);
  ls.SetPreconditioner (fsai_p) ;
  fsai_p.SetPrecondMatrixFormat(HYB);
  
 
  ls.Init(0.0, 1e-6, 1e8, 200000);
#ifdef GPURUN  
  mat.MoveToAccelerator();
  x.MoveToAccelerator();
  rhs.MoveToAccelerator();
#endif  
  
  
#ifdef BUBFLO  
  ls.MakeZ_CSR(); // requires xdim_ and novecni_ and zlssd_ to be set
  if(setlssd)
    ls.MakeZLSSD(bubmap_ptr, maxbmap); // bubmap must be ready and maxbmap available
#endif  
  

  ls.Build();
#ifdef MATDIA  
  mat.ConvertToDIA();
#endif
  
  gettimeofday(&now, NULL);
  tack = now.tv_sec*1000000.0+(now.tv_usec);
  b=(tack-tick)/1000000;
  std::cout << "Building:" << b << " sec" << std::endl;
  

//   mat.info();

  gettimeofday(&now, NULL);
  tick = now.tv_sec*1000000.0+(now.tv_usec);

  ls.Solve(rhs, &x);

  gettimeofday(&now, NULL);
  tack = now.tv_sec*1000000.0+(now.tv_usec);
  s= (tack-tick)/1000000;
  std::cout << "Solver execution:" << s << " sec" << std::endl;
  std::cout << "Total execution:" << s+b << " sec" << std::endl;
#ifdef GUUS
    x.MoveToHost();
  sol_norm=x.Norm();
  cout<<"\n Norm of Solution is "<<sol_norm<<endl;
  cout<<"\n Norm of Reference Solution is "<<refsol.Norm()<<endl;
  refones.AddScale(x,(double)-1.0f);
  x.AddScale(refsol,(double)-1.0f);
  
  
  diff_norm=x.Norm();
  ones_norm=refones.Norm();
  cout<<"\n Relative Norm of Calculated Solution w.r.t. Reference is "<<((double)diff_norm/(double)sol_norm)<<endl;
  cout<<"\n Relative Norm of Calculated Solution w.r.t. Ones is "<<((double)ones_norm/(double)sol_norm)<<endl;
#endif  
  //x.WriteFileASCII("x_solution1e3shell_ilu01.rec");
  ls.Clear();
// //   
  
///////////////////////////////////////////////////////////////  
  std::cout << "-----------------------------------------------" << std::endl;
  std::cout << "DPCG solver ILU-p" << std::endl;
  refones.Ones();
#ifdef GUUS  
  rhs.ReadFileASCII(std::string(argv[3]));
  x.SetRandom(0.0,1.0,1000);
  ls.SetZ(Zin);
#endif
  
#ifdef BUBFLO  
   x.ReadFileASCII(std::string(argv[2]));
  rhs.ReadFileASCII(std::string(argv[3]));
#endif  
  
  gettimeofday(&now, NULL);
  tick = now.tv_sec*1000000.0+(now.tv_usec);
  
#ifdef BUBFLO
  if(setlssd){
    LocalVector<double> phi;
    LocalVector<int> bubmap;
    phi.Allocate("PHI", phisize);
    bubmap.Allocate("bubmap",mat.get_nrow());
    phi.ReadFileASCII(std::string(argv[4]));
    
    bubmap.LeaveDataPtr(&bubmap_ptr);
    phi.LeaveDataPtr(&phi_ptr);

    //x.SetRandom(0.0,1.0,1000);
    bubmap_create(phi_ptr, bubmap_ptr, xdim, xdim, xdim, mat.get_nrow(), &maxbmap, lvst_offst);
    phi.Clear();
    
  }
  ls.Setxdim(xdim);
  ls.SetNVectors(defvex_perdirec);
  ls.SetZlssd(setlssd);
  mat.ConvertToCSR();  
#endif
  
  ilu_p.Set(0);
  ls.SetOperator(mat);
  ls.SetPreconditioner(ilu_p);
  ls.Init(0.0, 1e-6, 1e8, 20000);
  ls.RecordResidualHistory();
//  mat.ConvertToCSR();  
#ifdef GPURUN  
  mat.MoveToAccelerator();
  x.MoveToAccelerator();
  rhs.MoveToAccelerator();
#endif  

#ifdef BUBFLO  
  ls.MakeZ_CSR(); // requires xdim_ and novecni_ and zlssd_ to be set
  if(setlssd)
    ls.MakeZLSSD(bubmap_ptr, maxbmap); // bubmap must be ready and maxbmap available
#endif  
  ls.Build();
#ifdef MATDIA  
  mat.ConvertToDIA();
#endif  
  gettimeofday(&now, NULL);
  tack = now.tv_sec*1000000.0+(now.tv_usec);
  b=(tack-tick)/1000000;
  std::cout << "Building:" << b << " sec" << std::endl;
  
  //ls.Verbose(2);
  
  gettimeofday(&now, NULL);
  tick = now.tv_sec*1000000.0+(now.tv_usec);
  
  ls.Solve(rhs, &x);

  gettimeofday(&now, NULL);
  tack = now.tv_sec*1000000.0+(now.tv_usec);
  s= (tack-tick)/1000000;
  std::cout << "Solver execution:" << s << " sec" << std::endl;
  std::cout << "Total execution:" << s+b << " sec" << std::endl;

#ifdef SCALIN
  x.PointWiseMult(Dinvhalf_min);
#endif
  x.MoveToHost();
//   x.WriteFileASCII("x_solution_shell_scal.rec");
#ifdef GUUS
 // ls.RecordHistory("res_ongpu_ilu-p.rec");
  sol_norm=x.Norm();
  cout<<"\n Norm of Solution is "<<sol_norm<<endl;
  cout<<"\n Norm of Reference Solution is "<<refsol.Norm()<<endl;
  refones.AddScale(x,(double)-1.0f);
  x.AddScale(refsol,(double)-1.0f);
  
  x.MoveToHost();
  diff_norm=x.Norm();
  ones_norm=refones.Norm();
  cout<<"\n Relative Norm of Calculated Solution w.r.t. Reference is "<<((double)diff_norm/(double)sol_norm)<<endl;
  cout<<"\n Relative Norm of Calculated Solution w.r.t. Ones is "<<((double)ones_norm/(double)sol_norm)<<endl;
#endif  
  
  ls.Clear();
  
/////////////////////////////////////////////////////////////////
  std::cout << "-----------------------------------------------" << std::endl;
  std::cout << "DPCG solver ME-ILU-J" << std::endl;
  refones.Ones();
#ifdef GUUS  
  rhs.ReadFileASCII(std::string(argv[3]));
  x.SetRandom(0.0,1.0,1000);
  ls.SetZ(Zin);
#endif  

#ifdef BUBFLO
   x.ReadFileASCII(std::string(argv[2]));
   rhs.ReadFileASCII(std::string(argv[3]));
#endif
  gettimeofday(&now, NULL);
  tick = now.tv_sec*1000000.0+(now.tv_usec);
 
#ifdef BUBFLO   
  if(setlssd){
    LocalVector<double> phi;
    LocalVector<int> bubmap;
    phi.Allocate("PHI", phisize);
    bubmap.Allocate("bubmap",mat.get_nrow());
    phi.ReadFileASCII(std::string(argv[4]));
    
    bubmap.LeaveDataPtr(&bubmap_ptr);
    phi.LeaveDataPtr(&phi_ptr);

    //x.SetRandom(0.0,1.0,1000);
    bubmap_create(phi_ptr, bubmap_ptr, xdim, xdim, xdim, mat.get_nrow(), &maxbmap, lvst_offst);
    phi.Clear();
    
  }
  ls.Setxdim(xdim);
  ls.SetNVectors(defvex_perdirec);
  ls.SetZlssd(setlssd);
  mat.ConvertToCSR();
#endif
  p.Set(j_p, 1);
  
  ls.SetOperator(mat);
  ls.SetPreconditioner(p);
  
  
  
  
  ls.Init(0.0, 1e-6, 1e8, 200000);
#ifdef GPURUN  
  mat.MoveToAccelerator();
  x.MoveToAccelerator();
  rhs.MoveToAccelerator();
#endif  
  
#ifdef BUBFLO  
  ls.MakeZ_CSR(); // requires xdim_ and novecni_ and zlssd_ to be set
  if(setlssd)
    ls.MakeZLSSD(bubmap_ptr, maxbmap); // bubmap must be ready and maxbmap available
#endif  
  
  ls.Build();
#ifdef MATDIA  
  mat.ConvertToDIA();
#endif  
  gettimeofday(&now, NULL);
  tack = now.tv_sec*1000000.0+(now.tv_usec);
  b=(tack-tick)/1000000;
  std::cout << "Building:" << b << " sec" << std::endl;
  

  mat.info();

  gettimeofday(&now, NULL);
  tick = now.tv_sec*1000000.0+(now.tv_usec);

  ls.Solve(rhs, &x);

  gettimeofday(&now, NULL);
  tack = now.tv_sec*1000000.0+(now.tv_usec);
  s= (tack-tick)/1000000;
  std::cout << "Solver execution:" << s << " sec" << std::endl;
  std::cout << "Total execution:" << s+b << " sec" << std::endl;
#ifdef GUUS  
  x.MoveToHost();
  sol_norm=x.Norm();
  cout<<"\n Norm of Solution is "<<sol_norm<<endl;
  cout<<"\n Norm of Reference Solution is "<<refsol.Norm()<<endl;
  refones.AddScale(x,(double)-1.0f);
  x.AddScale(refsol,(double)-1.0f);
  
  
  diff_norm=x.Norm();
  ones_norm=refones.Norm();
  cout<<"\n Relative Norm of Calculated Solution w.r.t. Reference is "<<((double)diff_norm/(double)sol_norm)<<endl;
  cout<<"\n Relative Norm of Calculated Solution w.r.t. Ones is "<<((double)ones_norm/(double)sol_norm)<<endl;
#endif  
  //x.WriteFileASCII("x_solution1e3shell_ilu01.rec");
  ls.Clear();

///////////////////////////////////////////////////////////////  
  std::cout << "-----------------------------------------------" << std::endl;
  std::cout << "DPCG solver ME-ILU-SGS" << std::endl;
  refones.Ones();
#ifdef GUUS  
  rhs.ReadFileASCII(std::string(argv[3]));
  x.SetRandom(0.0,1.0,1000);
  ls.SetZ(Zin);
#endif
  
#ifdef BUBFLO  
  x.ReadFileASCII(std::string(argv[2]));
  rhs.ReadFileASCII(std::string(argv[3]));
#endif
  
  gettimeofday(&now, NULL);
  tick = now.tv_sec*1000000.0+(now.tv_usec);

#ifdef BUBFLO  
  if(setlssd){
    LocalVector<double> phi;
    LocalVector<int> bubmap;
    phi.Allocate("PHI", phisize);
    bubmap.Allocate("bubmap",mat.get_nrow());
    phi.ReadFileASCII(std::string(argv[4]));
    
    bubmap.LeaveDataPtr(&bubmap_ptr);
    phi.LeaveDataPtr(&phi_ptr);

    //x.SetRandom(0.0,1.0,1000);
    bubmap_create(phi_ptr, bubmap_ptr, xdim, xdim, xdim, mat.get_nrow(), &maxbmap, lvst_offst);
    phi.Clear();
    
  }
  ls.Setxdim(xdim);
  ls.SetNVectors(defvex_perdirec);
  ls.SetZlssd(setlssd);
  mat.ConvertToCSR();  
#endif  
  //p.Init(mcsgs_p, 1);
  ls.SetOperator(mat);
  ls.SetPreconditioner(mcsgs_p);

  ls.Init(0.0, 1e-6, 1e8, 200000);
#ifdef GPURUN  
  mat.MoveToAccelerator();
  x.MoveToAccelerator();
  rhs.MoveToAccelerator();
#endif  
#ifdef BUBFLO  
  ls.MakeZ_CSR(); // requires xdim_ and novecni_ and zlssd_ to be set
  if(setlssd)
    ls.MakeZLSSD(bubmap_ptr, maxbmap); // bubmap must be ready and maxbmap available
#endif
    
  
  ls.Build();
#ifdef MATDIA  
  mat.ConvertToDIA();
#endif  
  gettimeofday(&now, NULL);
  tack = now.tv_sec*1000000.0+(now.tv_usec);
  b=(tack-tick)/1000000;
  std::cout << "Building:" << b << " sec" << std::endl;
  

//   mat.info();

  gettimeofday(&now, NULL);
  tick = now.tv_sec*1000000.0+(now.tv_usec);

  ls.Solve(rhs, &x);

  gettimeofday(&now, NULL);
  tack = now.tv_sec*1000000.0+(now.tv_usec);
  s= (tack-tick)/1000000;
  std::cout << "Solver execution:" << s << " sec" << std::endl;
  std::cout << "Total execution:" << s+b << " sec" << std::endl;
#ifdef GUUS  
x.MoveToHost();
  sol_norm=x.Norm();
  cout<<"\n Norm of Solution is "<<sol_norm<<endl;
  cout<<"\n Norm of Reference Solution is "<<refsol.Norm()<<endl;
  refones.AddScale(x,(double)-1.0f);
  x.AddScale(refsol,(double)-1.0f);
  
  
  diff_norm=x.Norm();
  ones_norm=refones.Norm();
  cout<<"\n Relative Norm of Calculated Solution w.r.t. Reference is "<<((double)diff_norm/(double)sol_norm)<<endl;
  cout<<"\n Relative Norm of Calculated Solution w.r.t. Ones is "<<((double)ones_norm/(double)sol_norm)<<endl;
#endif  
  //x.WriteFileASCII("x_solution1e3shell_ilu01.rec");
  ls.Clear();
// 
// /////////////////////////////////////////////////////////////////  
//   
// /////////////////////////////////////////////////////////////////  
  std::cout << "-----------------------------------------------" << std::endl;
  std::cout << "DPCG solver ME-ILU-ILU(0,1)" << std::endl;
  refones.Ones();
#ifdef GUUS  
  rhs.ReadFileASCII(std::string(argv[3]));
  x.SetRandom(0.0,1.0,1000);
  ls.SetZ(Zin);
#endif
#ifdef BUBFLO  
  x.ReadFileASCII(std::string(argv[2]));
  rhs.ReadFileASCII(std::string(argv[3]));
#endif
  
  gettimeofday(&now, NULL);
  tick = now.tv_sec*1000000.0+(now.tv_usec);

#ifdef BUBFLO
  if(setlssd){
    LocalVector<double> phi;
    LocalVector<int> bubmap;
    phi.Allocate("PHI", phisize);
    bubmap.Allocate("bubmap",mat.get_nrow());
    phi.ReadFileASCII(std::string(argv[4]));
    
    bubmap.LeaveDataPtr(&bubmap_ptr);
    phi.LeaveDataPtr(&phi_ptr);

    //x.SetRandom(0.0,1.0,1000);
    bubmap_create(phi_ptr, bubmap_ptr, xdim, xdim, xdim, mat.get_nrow(), &maxbmap, lvst_offst);
    phi.Clear();
    
  }
  ls.Setxdim(xdim);
  ls.SetNVectors(defvex_perdirec);
  ls.SetZlssd(setlssd);
  mat.ConvertToCSR();    
#endif  
//   mcilu_p.Init(0);
//   
//   p.Init(mcilu_p, 1, 0.0);
  mcilu_p.Set(0);
  p.Set(mcilu_p, 1, 0.0);

  ls.SetOperator(mat);
  ls.SetPreconditioner(p);
//   p.SetPrecondMatrixFormat(HYB);
  ls.Init(0.0, 1e-6, 1e8, 200000);
#ifdef GPURUN  
  mat.MoveToAccelerator();
  x.MoveToAccelerator();
  rhs.MoveToAccelerator();
#endif  
  #ifdef BUBFLO  
//   ls.SetNVectors(4);
  ls.MakeZ_CSR(); // requires xdim_ and novecni_ and zlssd_ to be set
  if(setlssd)
    ls.MakeZLSSD(bubmap_ptr, maxbmap); // bubmap must be ready and maxbmap available
#endif    
//   
  
  ls.Build();
#ifdef MATDIA  
  mat.ConvertToDIA();
#endif  
  gettimeofday(&now, NULL);
  tack = now.tv_sec*1000000.0+(now.tv_usec);
  b=(tack-tick)/1000000;
  std::cout << "Building:" << b << " sec" << std::endl;
// 
//   ls.Verbose(2);
  mat.info();

  gettimeofday(&now, NULL);
  tick = now.tv_sec*1000000.0+(now.tv_usec);

  ls.Solve(rhs, &x);
  
  gettimeofday(&now, NULL);
  tack = now.tv_sec*1000000.0+(now.tv_usec);
  s= (tack-tick)/1000000;
  std::cout << "Solver execution:" << s << " sec" << std::endl;
  std::cout << "Total execution:" << s+b << " sec" << std::endl;
#ifdef GUUS  
x.MoveToHost();
  sol_norm=x.Norm();
  cout<<"\n Norm of Solution is "<<sol_norm<<endl;
  cout<<"\n Norm of Reference Solution is "<<refsol.Norm()<<endl;
  refones.AddScale(x,(double)-1.0f);
  x.AddScale(refsol,(double)-1.0f);
  
  
  diff_norm=x.Norm();
  ones_norm=refones.Norm();
  cout<<"\n Relative Norm of Calculated Solution w.r.t. Reference is "<<((double)diff_norm/(double)sol_norm)<<endl;
  cout<<"\n Relative Norm of Calculated Solution w.r.t. Ones is "<<((double)ones_norm/(double)sol_norm)<<endl;
  //x.WriteFileASCII("x_solution1e3shell_ilu01.rec");
#endif  
  ls.Clear();

// /////////////////////////////////////////////////////////////////    
  std::cout << "-----------------------------------------------" << std::endl;
  std::cout << "DPCG solver ILU(0,1)" << std::endl;
  refones.Ones();
#ifdef GUUS  
  rhs.ReadFileASCII(std::string(argv[3]));
  x.SetRandom(0.0,1.0,1000);
  ls.SetZ(Zin);
#endif  
  gettimeofday(&now, NULL);
  tick = now.tv_sec*1000000.0+(now.tv_usec);

#ifdef BUBFLO  
  x.ReadFileASCII(std::string(argv[2]));
  rhs.ReadFileASCII(std::string(argv[3]));
  if(setlssd){
    LocalVector<double> phi;
    LocalVector<int> bubmap;
    phi.Allocate("PHI", phisize);
    bubmap.Allocate("bubmap",mat.get_nrow());
    phi.ReadFileASCII(std::string(argv[4]));
    
    bubmap.LeaveDataPtr(&bubmap_ptr);
    phi.LeaveDataPtr(&phi_ptr);

    //x.SetRandom(0.0,1.0,1000);
    bubmap_create(phi_ptr, bubmap_ptr, xdim, xdim, xdim, mat.get_nrow(), &maxbmap, lvst_offst);
    phi.Clear();
    
  }
  ls.Setxdim(xdim);
  ls.SetNVectors(defvex_perdirec);
  ls.SetZlssd(setlssd);
  mat.ConvertToCSR();
#endif  
  
  mcilu_p.Set(0,1);
  ls.SetOperator(mat);
  ls.SetPreconditioner(mcilu_p);
  
  ls.Init(0.0, 1e-6, 1e8, 200000);
#ifdef GPURUN  
  mat.MoveToAccelerator();
  x.MoveToAccelerator();
  rhs.MoveToAccelerator();
#endif  
#ifdef BUBFLO  
  ls.MakeZ_CSR(); // requires xdim_ and novecni_ and zlssd_ to be set
  if(setlssd)
    ls.MakeZLSSD(bubmap_ptr, maxbmap); // bubmap must be ready and maxbmap available	
#endif
    
  
  ls.Build();
#ifdef MATDIA  
  mat.ConvertToDIA();
#endif  
  gettimeofday(&now, NULL);
  tack = now.tv_sec*1000000.0+(now.tv_usec);
  b=(tack-tick)/1000000;
  std::cout << "Building:" << b << " sec" << std::endl;
  

//   mat.info();

  gettimeofday(&now, NULL);
  tick = now.tv_sec*1000000.0+(now.tv_usec);

  ls.Solve(rhs, &x);

  gettimeofday(&now, NULL);
  tack = now.tv_sec*1000000.0+(now.tv_usec);
  s= (tack-tick)/1000000;
  std::cout << "Solver execution:" << s << " sec" << std::endl;
  std::cout << "Total execution:" << s+b << " sec" << std::endl;
#ifdef GUUS  
x.MoveToHost();
  sol_norm=x.Norm();
  cout<<"\n Norm of Solution is "<<sol_norm<<endl;
  cout<<"\n Norm of Reference Solution is "<<refsol.Norm()<<endl;
  refones.AddScale(x,(double)-1.0f);
  x.AddScale(refsol,(double)-1.0f);
  
  
  diff_norm=x.Norm();
  ones_norm=refones.Norm();
  cout<<"\n Relative Norm of Calculated Solution w.r.t. Reference is "<<((double)diff_norm/(double)sol_norm)<<endl;
  cout<<"\n Relative Norm of Calculated Solution w.r.t. Ones is "<<((double)ones_norm/(double)sol_norm)<<endl;
//   x.WriteFileASCII("x_solution1e3shell_ilu01.rec");
#endif  
  ls.Clear();
/////////////////////////////////////////////////////////////////    
  
// /////////////////////////////////////////////////////////////////  
  std::cout << "-----------------------------------------------" << std::endl;
  std::cout << "DPCG solver jacobi" << std::endl;
  refones.Ones();
#ifdef GUUS  
  rhs.ReadFileASCII(std::string(argv[3]));
  x.SetRandom(0.0,1.0,1000);
  ls.SetZ(Zin);
#endif
#ifdef BUBFLO  
  x.ReadFileASCII(std::string(argv[2]));
  rhs.ReadFileASCII(std::string(argv[3]));
#endif
  
  gettimeofday(&now, NULL);
  tick = now.tv_sec*1000000.0+(now.tv_usec);

#ifdef BUBFLO
  if(setlssd){
    LocalVector<double> phi;
    LocalVector<int> bubmap;
    phi.Allocate("PHI", phisize);
    bubmap.Allocate("bubmap",mat.get_nrow());
    phi.ReadFileASCII(std::string(argv[4]));
    
    bubmap.LeaveDataPtr(&bubmap_ptr);
    phi.LeaveDataPtr(&phi_ptr);

    //x.SetRandom(0.0,1.0,1000);
    bubmap_create(phi_ptr, bubmap_ptr, xdim, xdim, xdim, mat.get_nrow(), &maxbmap, lvst_offst);
    phi.Clear();
    
  }
  ls.Setxdim(xdim);
  ls.SetNVectors(defvex_perdirec);
  ls.SetZlssd(setlssd);
  mat.ConvertToCSR();    
#endif  


  ls.SetOperator(mat);
  ls.SetPreconditioner(j_p);
//   p.SetPrecondMatrixFormat(HYB);
  ls.Init(0.0, 1e-6, 1e8, 200000);
#ifdef GPURUN  
  mat.MoveToAccelerator();
  x.MoveToAccelerator();
  rhs.MoveToAccelerator();
#endif    
#ifdef BUBFLO  
//   ls.SetNVectors(4);
  ls.MakeZ_CSR(); // requires xdim_ and novecni_ and zlssd_ to be set
  if(setlssd)
    ls.MakeZLSSD(bubmap_ptr, maxbmap); // bubmap must be ready and maxbmap available
#endif    
//   
  
  ls.Build();
#ifdef MATDIA  
  mat.ConvertToDIA();
#endif  
  gettimeofday(&now, NULL);
  tack = now.tv_sec*1000000.0+(now.tv_usec);
  b=(tack-tick)/1000000;
  std::cout << "Building:" << b << " sec" << std::endl;
// 
//   ls.Verbose(2);
  mat.info();

  gettimeofday(&now, NULL);
  tick = now.tv_sec*1000000.0+(now.tv_usec);

  ls.Solve(rhs, &x);
  
  gettimeofday(&now, NULL);
  tack = now.tv_sec*1000000.0+(now.tv_usec);
  s= (tack-tick)/1000000;
  std::cout << "Solver execution:" << s << " sec" << std::endl;
  std::cout << "Total execution:" << s+b << " sec" << std::endl;
#ifdef GUUS  
x.MoveToHost();
  sol_norm=x.Norm();
  cout<<"\n Norm of Solution is "<<sol_norm<<endl;
  cout<<"\n Norm of Reference Solution is "<<refsol.Norm()<<endl;
  refones.AddScale(x,(double)-1.0f);
  x.AddScale(refsol,(double)-1.0f);
  
  
  diff_norm=x.Norm();
  ones_norm=refones.Norm();
  cout<<"\n Relative Norm of Calculated Solution w.r.t. Reference is "<<((double)diff_norm/(double)sol_norm)<<endl;
  cout<<"\n Relative Norm of Calculated Solution w.r.t. Ones is "<<((double)ones_norm/(double)sol_norm)<<endl;
  //x.WriteFileASCII("x_solution1e3shell_ilu01.rec");
#endif  
  ls.Clear();
//   
  cout<<"########################################################################"<<endl;
  cout<<"Everything complete stopping paralution now."<<endl;
  stop_paralution();

  return 0;
}
extern "C" __declspec(dllexport) void solveCSRSingle(int *row_offset, int *col, float *val, 
	const int nnz, const int N, float *_rhs, float *_x) {

	std::string name = "s";
	LocalVector<float> x;
	LocalVector<float> rhs;
	LocalMatrix<float> mat;

	x.Allocate(name, N);
	x.Zeros();
	rhs.Allocate(name, N);
	mat.AllocateCSR(name, nnz, N, N);
	mat.CopyFromCSR(row_offset, col, val);
	rhs.CopyFromData(_rhs);
//	mat.Check();

/*	rhs.SetDataPtr(&_rhs, name, N);
	x.SetDataPtr(&_x, name, N);
	mat.SetDataPtrCSR(&row_offset, &col, &val, name, nnz, N, N);
*/

	mat.MoveToAccelerator();
	x.MoveToAccelerator();
	rhs.MoveToAccelerator();
	CG<LocalMatrix<float>, LocalVector<float>, float> ls;
	MultiColoredILU<LocalMatrix<float>, LocalVector<float>, float> p;
	ls.SetOperator(mat);
	ls.SetPreconditioner(p);
	ls.Build();

	ls.Solve(rhs, &x);

	mat.MoveToHost();
	x.MoveToHost();
	rhs.MoveToHost();

	/*
	mat.LeaveDataPtrCSR(&row_offset, &col, &val);
	rhs.LeaveDataPtr(&_rhs);
	x.LeaveDataPtr(&_x);
*/

	x.CopyToData(_x);
	mat.Clear();
	x.Clear();
	rhs.Clear();
	
	ls.Clear();
}