void run_test_cases( BOOST_EXPLICIT_TEMPLATE_TYPE(Block) )
{
  // a bunch of typedefs which will be handy later on
  typedef boost::dynamic_bitset<Block> bitset_type;
  typedef bitset_test<bitset_type> Tests;
  // typedef typename bitset_type::size_type size_type; // unusable with Borland 5.5.1

  std::string long_string = get_long_string();
  std::size_t ul_width = std::numeric_limits<unsigned long>::digits;

  //=====================================================================
  // Test b.empty()
  {
    bitset_type b;
    Tests::empty(b);
  }
  {
    bitset_type b(1, 1ul);
    Tests::empty(b);
  }
  {
    bitset_type b(bitset_type::bits_per_block
                  + bitset_type::bits_per_block/2, 15ul);
    Tests::empty(b);
  }
  //=====================================================================
  // Test b.to_long()
  {
    boost::dynamic_bitset<Block> b;
    Tests::to_ulong(b);
  }
  {
    boost::dynamic_bitset<Block> b(std::string("1"));
    Tests::to_ulong(b);
  }
  {
    boost::dynamic_bitset<Block> b(bitset_type::bits_per_block,
                                   static_cast<unsigned long>(-1));
    Tests::to_ulong(b);
  }
  {
    std::string str(ul_width - 1, '1');
    boost::dynamic_bitset<Block> b(str);
    Tests::to_ulong(b);
  }
  {
    std::string ul_str(ul_width, '1');
    boost::dynamic_bitset<Block> b(ul_str);
    Tests::to_ulong(b);
  }
  { // case overflow
    boost::dynamic_bitset<Block> b(long_string);
    Tests::to_ulong(b);
  }
  //=====================================================================
  // Test to_string(b, str)
  {
    boost::dynamic_bitset<Block> b;
    Tests::to_string(b);
  }
  {
    boost::dynamic_bitset<Block> b(std::string("0"));
    Tests::to_string(b);
  }
  {
    boost::dynamic_bitset<Block> b(long_string);
    Tests::to_string(b);
  }
  //=====================================================================
  // Test b.count()
  {
    boost::dynamic_bitset<Block> b;
    Tests::count(b);
  }
  {
    boost::dynamic_bitset<Block> b(std::string("0"));
    Tests::count(b);
  }
  {
    boost::dynamic_bitset<Block> b(std::string("1"));
    Tests::count(b);
  }
  {
    boost::dynamic_bitset<Block> b(8, 255ul);
    Tests::count(b);
  }
  {
    boost::dynamic_bitset<Block> b(long_string);
    Tests::count(b);
  }
  //=====================================================================
  // Test b.size()
  {
    boost::dynamic_bitset<Block> b;
    Tests::size(b);
  }
  {
    boost::dynamic_bitset<Block> b(std::string("0"));
    Tests::size(b);
  }
  {
    boost::dynamic_bitset<Block> b(long_string);
    Tests::size(b);
  }
  //=====================================================================
  // Test b.any()
  {
    boost::dynamic_bitset<Block> b;
    Tests::any(b);
  }
  {
    boost::dynamic_bitset<Block> b(std::string("0"));
    Tests::any(b);
  }
  {
    boost::dynamic_bitset<Block> b(long_string);
    Tests::any(b);
  }
  //=====================================================================
  // Test b.none()
  {
    boost::dynamic_bitset<Block> b;
    Tests::none(b);
  }
  {
    boost::dynamic_bitset<Block> b(std::string("0"));
    Tests::none(b);
  }
  {
    boost::dynamic_bitset<Block> b(long_string);
    Tests::none(b);
  }
  //=====================================================================
  // Test a.is_subset_of(b)
  {
    boost::dynamic_bitset<Block> a, b;
    Tests::subset(a, b);
  }
  {
    boost::dynamic_bitset<Block> a(std::string("0")), b(std::string("0"));
    Tests::subset(a, b);
  }
  {
    boost::dynamic_bitset<Block> a(std::string("1")), b(std::string("1"));
    Tests::subset(a, b);
  }
  {
    boost::dynamic_bitset<Block> a(long_string), b(long_string);
    Tests::subset(a, b);
  }
  {
    boost::dynamic_bitset<Block> a(long_string), b(long_string);
    a[long_string.size()/2].flip();
    Tests::subset(a, b);
  }
  {
    boost::dynamic_bitset<Block> a(long_string), b(long_string);
    b[long_string.size()/2].flip();
    Tests::subset(a, b);
  }
  //=====================================================================
  // Test a.is_proper_subset_of(b)
  {
    boost::dynamic_bitset<Block> a, b;
    Tests::proper_subset(a, b);
  }
  {
    boost::dynamic_bitset<Block> a(std::string("0")), b(std::string("0"));
    Tests::proper_subset(a, b);
  }
  {
    boost::dynamic_bitset<Block> a(std::string("1")), b(std::string("1"));
    Tests::proper_subset(a, b);
  }
  {
    boost::dynamic_bitset<Block> a(long_string), b(long_string);
    Tests::proper_subset(a, b);
  }
  {
    boost::dynamic_bitset<Block> a(long_string), b(long_string);
    a[long_string.size()/2].flip();
    Tests::proper_subset(a, b);
  }
  {
    boost::dynamic_bitset<Block> a(long_string), b(long_string);
    b[long_string.size()/2].flip();
    Tests::proper_subset(a, b);
  }
  //=====================================================================
  // Test intersects
  {
    bitset_type a; // empty
    bitset_type b;
    Tests::intersects(a, b);
  }
  {
    bitset_type a;
    bitset_type b(5, 8ul);
    Tests::intersects(a, b);
  }
  {
    bitset_type a(8, 0ul);
    bitset_type b(15, 0ul);
    b[9] = 1;
    Tests::intersects(a, b);
  }
  {
    bitset_type a(15, 0ul);
    bitset_type b(22, 0ul);
    a[14] = b[14] = 1;
    Tests::intersects(a, b);
  }
  //=====================================================================
  // Test find_first
  {
      // empty bitset
      bitset_type b;
      Tests::find_first(b);
  }
  {
      // bitset of size 1
      bitset_type b(1, 1ul);
      Tests::find_first(b);
  }
  {
      // all-0s bitset
      bitset_type b(4 * bitset_type::bits_per_block, 0ul);
      Tests::find_first(b);
  }
  {
      // first bit on
      bitset_type b(1, 1ul);
      Tests::find_first(b);
  }
  {
      // last bit on
      bitset_type b(4 * bitset_type::bits_per_block - 1, 0ul);
      b.set(b.size() - 1);
      Tests::find_first(b);
  }
  //=====================================================================
  // Test find_next
  {
      // empty bitset
      bitset_type b;

      // check
      Tests::find_next(b, 0);
      Tests::find_next(b, 1);
      Tests::find_next(b, 200);
      Tests::find_next(b, b.npos);
  }
  {
      // bitset of size 1 (find_next can never find)
      bitset_type b(1, 1ul);

      // check
      Tests::find_next(b, 0);
      Tests::find_next(b, 1);
      Tests::find_next(b, 200);
      Tests::find_next(b, b.npos);
  }
  {
      // all-1s bitset
      bitset_type b(16 * bitset_type::bits_per_block);
      b.set();

      // check
      const typename bitset_type::size_type larger_than_size = 5 + b.size();
      for(typename bitset_type::size_type i = 0; i <= larger_than_size; ++i) {
          Tests::find_next(b, i);
      }
      Tests::find_next(b, b.npos);
  }
  {
      // a bitset with 1s at block boundary only
      const int num_blocks = 32;
      const int block_width = bitset_type::bits_per_block;

      bitset_type b(num_blocks * block_width);
      typename bitset_type::size_type i = block_width - 1;
      for ( ; i < b.size(); i += block_width) {

        b.set(i);
        typename bitset_type::size_type first_in_block = i - (block_width - 1);
        b.set(first_in_block);
      }

      // check
      const typename bitset_type::size_type larger_than_size = 5 + b.size();
      for (i = 0; i <= larger_than_size; ++i) {
          Tests::find_next(b, i);
      }
      Tests::find_next(b, b.npos);

  }
  {
      // bitset with alternate 1s and 0s
      const typename bitset_type::size_type sz = 1000;
      bitset_type b(sz);

      typename bitset_type::size_type i = 0;
      for ( ; i < sz; ++i) {
        b[i] = (i%2 == 0);
      }

      // check
      const typename bitset_type::size_type larger_than_size = 5 + b.size();
      for (i = 0; i <= larger_than_size; ++i) {
          Tests::find_next(b, i);
      }
      Tests::find_next(b, b.npos);

  }
  //=====================================================================
  // Test operator==
  {
    boost::dynamic_bitset<Block> a, b;
    Tests::operator_equal(a, b);
  }
  {
    boost::dynamic_bitset<Block> a(std::string("0")), b(std::string("0"));
    Tests::operator_equal(a, b);
  }
  {
    boost::dynamic_bitset<Block> a(std::string("1")), b(std::string("1"));
    Tests::operator_equal(a, b);
  }
  {
    boost::dynamic_bitset<Block> a(long_string), b(long_string);
    Tests::operator_equal(a, b);
  }
  {
    boost::dynamic_bitset<Block> a(long_string), b(long_string);
    a[long_string.size()/2].flip();
    Tests::operator_equal(a, b);
  }
  {
    boost::dynamic_bitset<Block> a(long_string), b(long_string);
    b[long_string.size()/2].flip();
    Tests::operator_equal(a, b);
  }
  //=====================================================================
  // Test operator!=
  {
    boost::dynamic_bitset<Block> a, b;
    Tests::operator_not_equal(a, b);
  }
  {
    boost::dynamic_bitset<Block> a(std::string("0")), b(std::string("0"));
    Tests::operator_not_equal(a, b);
  }
  {
    boost::dynamic_bitset<Block> a(std::string("1")), b(std::string("1"));
    Tests::operator_not_equal(a, b);
  }
  {
    boost::dynamic_bitset<Block> a(long_string), b(long_string);
    Tests::operator_not_equal(a, b);
  }
  {
    boost::dynamic_bitset<Block> a(long_string), b(long_string);
    a[long_string.size()/2].flip();
    Tests::operator_not_equal(a, b);
  }
  {
    boost::dynamic_bitset<Block> a(long_string), b(long_string);
    b[long_string.size()/2].flip();
    Tests::operator_not_equal(a, b);
  }
  //=====================================================================
  // Test operator<
  {
    boost::dynamic_bitset<Block> a, b;
    Tests::operator_less_than(a, b);
  }
  {
    boost::dynamic_bitset<Block> a(std::string("0")), b(std::string("0"));
    Tests::operator_less_than(a, b);
  }
  {
    boost::dynamic_bitset<Block> a(std::string("1")), b(std::string("1"));
    Tests::operator_less_than(a, b);
  }
  {
    boost::dynamic_bitset<Block> a(std::string("10")), b(std::string("11"));
    Tests::operator_less_than(a, b);
  }
  {
    boost::dynamic_bitset<Block> a(long_string), b(long_string);
    Tests::operator_less_than(a, b);
  }
  {
    boost::dynamic_bitset<Block> a(long_string), b(long_string);
    a[long_string.size()/2].flip();
    Tests::operator_less_than(a, b);
  }
  {
    boost::dynamic_bitset<Block> a(long_string), b(long_string);
    b[long_string.size()/2].flip();
    Tests::operator_less_than(a, b);
  }
  // check for consistency with ulong behaviour
  {
    boost::dynamic_bitset<Block> a(3, 4ul), b(3, 5ul);
    assert(a < b);
  }
  {
    boost::dynamic_bitset<Block> a(3, 4ul), b(3, 4ul);
    assert(!(a < b));
  }
  {
    boost::dynamic_bitset<Block> a(3, 5ul), b(3, 4ul);
    assert(!(a < b));
  }
  //=====================================================================
  // Test operator<=
  {
    boost::dynamic_bitset<Block> a, b;
    Tests::operator_less_than_eq(a, b);
  }
  {
    boost::dynamic_bitset<Block> a(std::string("0")), b(std::string("0"));
    Tests::operator_less_than_eq(a, b);
  }
  {
    boost::dynamic_bitset<Block> a(std::string("1")), b(std::string("1"));
    Tests::operator_less_than_eq(a, b);
  }
  {
    boost::dynamic_bitset<Block> a(long_string), b(long_string);
    Tests::operator_less_than_eq(a, b);
  }
  {
    boost::dynamic_bitset<Block> a(long_string), b(long_string);
    a[long_string.size()/2].flip();
    Tests::operator_less_than_eq(a, b);
  }
  {
    boost::dynamic_bitset<Block> a(long_string), b(long_string);
    b[long_string.size()/2].flip();
    Tests::operator_less_than_eq(a, b);
  }
  // check for consistency with ulong behaviour
  {
    boost::dynamic_bitset<Block> a(3, 4ul), b(3, 5ul);
    assert(a <= b);
  }
  {
    boost::dynamic_bitset<Block> a(3, 4ul), b(3, 4ul);
    assert(a <= b);
  }
  {
    boost::dynamic_bitset<Block> a(3, 5ul), b(3, 4ul);
    assert(!(a <= b));
  }
  //=====================================================================
  // Test operator>
  {
    boost::dynamic_bitset<Block> a, b;
    Tests::operator_greater_than(a, b);
  }
  {
    boost::dynamic_bitset<Block> a(std::string("0")), b(std::string("0"));
    Tests::operator_greater_than(a, b);
  }
  {
    boost::dynamic_bitset<Block> a(std::string("1")), b(std::string("1"));
    Tests::operator_greater_than(a, b);
  }
  {
    boost::dynamic_bitset<Block> a(long_string), b(long_string);
    Tests::operator_greater_than(a, b);
  }
  {
    boost::dynamic_bitset<Block> a(long_string), b(long_string);
    a[long_string.size()/2].flip();
    Tests::operator_greater_than(a, b);
  }
  {
    boost::dynamic_bitset<Block> a(long_string), b(long_string);
    b[long_string.size()/2].flip();
    Tests::operator_greater_than(a, b);
  }
  // check for consistency with ulong behaviour
  {
    boost::dynamic_bitset<Block> a(3, 4ul), b(3, 5ul);
    assert(!(a > b));
  }
  {
    boost::dynamic_bitset<Block> a(3, 4ul), b(3, 4ul);
    assert(!(a > b));
  }
  {
    boost::dynamic_bitset<Block> a(3, 5ul), b(3, 4ul);
    assert(a > b);
  }
  //=====================================================================
  // Test operator<=
  {
    boost::dynamic_bitset<Block> a, b;
    Tests::operator_greater_than_eq(a, b);
  }
  {
    boost::dynamic_bitset<Block> a(std::string("0")), b(std::string("0"));
    Tests::operator_greater_than_eq(a, b);
  }
  {
    boost::dynamic_bitset<Block> a(std::string("1")), b(std::string("1"));
    Tests::operator_greater_than_eq(a, b);
  }
  {
    boost::dynamic_bitset<Block> a(long_string), b(long_string);
    Tests::operator_greater_than_eq(a, b);
  }
  {
    boost::dynamic_bitset<Block> a(long_string), b(long_string);
    a[long_string.size()/2].flip();
    Tests::operator_greater_than_eq(a, b);
  }
  {
    boost::dynamic_bitset<Block> a(long_string), b(long_string);
    b[long_string.size()/2].flip();
    Tests::operator_greater_than_eq(a, b);
  }
  // check for consistency with ulong behaviour
  {
    boost::dynamic_bitset<Block> a(3, 4ul), b(3, 5ul);
    assert(!(a >= b));
  }
  {
    boost::dynamic_bitset<Block> a(3, 4ul), b(3, 4ul);
    assert(a >= b);
  }
  {
    boost::dynamic_bitset<Block> a(3, 5ul), b(3, 4ul);
    assert(a >= b);
  }
  //=====================================================================
  // Test b.test(pos)
  { // case pos >= b.size()
    boost::dynamic_bitset<Block> b;
    Tests::test_bit(b, 0);
  }
  { // case pos < b.size()
    boost::dynamic_bitset<Block> b(std::string("0"));
    Tests::test_bit(b, 0);
  }
  { // case pos == b.size() / 2
    boost::dynamic_bitset<Block> b(long_string);
    Tests::test_bit(b, long_string.size()/2);
  }
  //=====================================================================
  // Test b << pos
  { // case pos == 0
    std::size_t pos = 0;
    boost::dynamic_bitset<Block> b(std::string("1010"));
    Tests::operator_shift_left(b, pos);
  }
  { // case pos == size()/2
    std::size_t pos = long_string.size() / 2;
    boost::dynamic_bitset<Block> b(long_string);
    Tests::operator_shift_left(b, pos);
  }
  { // case pos >= n
    std::size_t pos = long_string.size();
    boost::dynamic_bitset<Block> b(long_string);
    Tests::operator_shift_left(b, pos);
  }
  //=====================================================================
  // Test b >> pos
  { // case pos == 0
    std::size_t pos = 0;
    boost::dynamic_bitset<Block> b(std::string("1010"));
    Tests::operator_shift_right(b, pos);
  }
  { // case pos == size()/2
    std::size_t pos = long_string.size() / 2;
    boost::dynamic_bitset<Block> b(long_string);
    Tests::operator_shift_right(b, pos);
  }
  { // case pos >= n
    std::size_t pos = long_string.size();
    boost::dynamic_bitset<Block> b(long_string);
    Tests::operator_shift_right(b, pos);
  }
  //=====================================================================
  // Test a & b
  {
    boost::dynamic_bitset<Block> lhs, rhs;
    Tests::operator_and(lhs, rhs);
  }
  {
    boost::dynamic_bitset<Block> lhs(std::string("1")), rhs(std::string("0"));
    Tests::operator_and(lhs, rhs);
  }
  {
    boost::dynamic_bitset<Block> lhs(long_string.size(), 0), rhs(long_string);
    Tests::operator_and(lhs, rhs);
  }
  {
    boost::dynamic_bitset<Block> lhs(long_string.size(), 1), rhs(long_string);
    Tests::operator_and(lhs, rhs);
  }
  //=====================================================================
  // Test a | b
  {
    boost::dynamic_bitset<Block> lhs, rhs;
    Tests::operator_or(lhs, rhs);
  }
  {
    boost::dynamic_bitset<Block> lhs(std::string("1")), rhs(std::string("0"));
    Tests::operator_or(lhs, rhs);
  }
  {
    boost::dynamic_bitset<Block> lhs(long_string.size(), 0), rhs(long_string);
    Tests::operator_or(lhs, rhs);
  }
  {
    boost::dynamic_bitset<Block> lhs(long_string.size(), 1), rhs(long_string);
    Tests::operator_or(lhs, rhs);
  }
  //=====================================================================
  // Test a^b
  {
    boost::dynamic_bitset<Block> lhs, rhs;
    Tests::operator_xor(lhs, rhs);
  }
  {
    boost::dynamic_bitset<Block> lhs(std::string("1")), rhs(std::string("0"));
    Tests::operator_xor(lhs, rhs);
  }
  {
    boost::dynamic_bitset<Block> lhs(long_string.size(), 0), rhs(long_string);
    Tests::operator_xor(lhs, rhs);
  }
  {
    boost::dynamic_bitset<Block> lhs(long_string.size(), 1), rhs(long_string);
    Tests::operator_xor(lhs, rhs);
  }
  //=====================================================================
  // Test a-b
  {
    boost::dynamic_bitset<Block> lhs, rhs;
    Tests::operator_sub(lhs, rhs);
  }
  {
    boost::dynamic_bitset<Block> lhs(std::string("1")), rhs(std::string("0"));
    Tests::operator_sub(lhs, rhs);
  }
  {
    boost::dynamic_bitset<Block> lhs(long_string.size(), 0), rhs(long_string);
    Tests::operator_sub(lhs, rhs);
  }
  {
    boost::dynamic_bitset<Block> lhs(long_string.size(), 1), rhs(long_string);
    Tests::operator_sub(lhs, rhs);
  }
}
int main(int argc, char **argv){

	double tend = 1E2, speed = 1.;
	// double tend = 1E-1, speed = 1.;
	char *init_type="mixed2";
	double *roots, *weights, *ll, *dl, xmin, xmax, 
		   deltax, jac, xr, xl, cfl, dt, rtime, min_dx;
	int ii, jj, kk, ee, idx, eres;
	long nstep;
	double *dx, *mesh; 
	double *smat, *xx, *qq, *qtemp, *k1, *k2, *k3, *k4, *minv_vec, *mmat, *dv, 
		   *mf, *ib, *df, *fstar;

	// initialize 
	// fortran index structure array[ii,jj,ee] where size(array) = (np, np, ne)
	// c 1d index structure array = [ee*np*np + jj*np + ii]
	roots   = (double*)malloc(np*   sizeof(double));
	weights = (double*)malloc(np*   sizeof(double));
	ll      = (double*)malloc(np*   sizeof(double));
	dl      = (double*)malloc(np*   sizeof(double));
	dx      = (double*)malloc(ne*   sizeof(double));
	mesh    = (double*)malloc((ne+1)*sizeof(double));

	smat	= (double*)malloc(np*np*sizeof(double));    // [jj np, ii np]
	xx		= (double*)malloc(ne*np*sizeof(double));    // [ee ne, ii np]
	qq		= (double*)malloc(ne*np*sizeof(double));    // [ee ne, ii np]	
	qtemp	= (double*)malloc(ne*np*sizeof(double));    // [ee ne, ii np]	
	k1		= (double*)malloc(ne*np*sizeof(double));    // [ee ne, ii np]	
	k2		= (double*)malloc(ne*np*sizeof(double));    // [ee ne, ii np]
	k3		= (double*)malloc(ne*np*sizeof(double));    // [ee ne, ii np]
	k4		= (double*)malloc(ne*np*sizeof(double));    // [ee ne, ii np]
	minv_vec= (double*)malloc(ne*np*sizeof(double));    // [ee ne, ii np]
	mmat	= (double*)malloc(ne*np*np*sizeof(double)); // [ee ne, jj np, ii np]
	dv		= (double*)malloc(ne*np*np*sizeof(double)); // [ee ne, jj np, ii np]
	mf		= (double*)malloc(2*np*sizeof(double));     // [jj 2,  ii np]
	ib		= (double*)malloc(2*np*sizeof(double));     // [jj 2,  ii np]
	fstar	= (double*)malloc(2*ne*sizeof(double));     // [jj 2,  ii ne]
	df		= (double*)malloc(ne*2*np*sizeof(double));  // [ee ne, jj 2, ii np]

	for (ii=0; ii<np; ++ii){
		roots[ii] = 0;
		weights[ii] = 0;
		ll[ii] = 0;
		dl[ii] = 0;
	}
	for (ii=0; ii<ne; ++ii){
		dx[ii] = 0;
		mesh[ii] = 0;
	}
	mesh[ne] = 0;

	
	for (ii=0; ii<np*np; ++ii){
		smat[ii] = 0;
	}
	for (ii=0; ii<ne*np; ++ii){
		xx[ii]	= 0;		 	
		qq[ii]	= 0;		 	
		k1[ii]	= 0;		 	
		k2[ii]	= 0;		 	
		k3[ii]	= 0;		 	
		k4[ii]	= 0;		 	
		minv_vec[ii]	= 0; 
	}
	for (ii=0; ii<ne*np*np; ++ii){
		mmat[ii] = 0;    	
		dv[ii]	 = 0;
	}
	for (ii=0; ii<np*2; ++ii){
		mf[ii] = 0;
		ib[ii] = 0;
	}
	for (ii=0; ii<ne*2; ++ii){
		fstar[ii] = 0;
	}
	for (ii=0; ii<ne*2*np; ++ii){
		df[ii] = 0;
	}

	// mesh setup
	xmin = 0.;
	xmax = 10.;
	deltax = (xmax-xmin)/(double)ne;
	mesh[ne] = xmax;
	for(ee=0;ee<ne;++ee) {
		mesh[ee] = xmin+ee*deltax;
	}
	
	// gauss lobatto quadrature point, weight setup
	gausslobatto_quadrature(np, roots, weights);

	// coordinates and element size
	min_dx = xmax - xmin; // initial guess
	for(ee=0;ee<ne;ee++){
		xl = mesh[ee];
		xr = mesh[ee+1];
		dx[ee] = xr-xl; // size of each element
		if(dx[ee] < min_dx){
			min_dx = dx[ee]; // finding minimum dx
		}
		for(ii=0;ii<np;ii++){
			idx = ee*np+ii;
			xx[idx] = xl + 0.5*(1+roots[ii])*dx[ee];
		}
	}

	// mass matrix
	for(ii=0;ii<ne*np*np;ii++){
		mmat[ii] = 0;
	}

	for(ee=0;ee<ne;ee++){
		jac = fabs(dx[ee])/2;
		for(kk=0;kk<np;kk++){
			lagrange(roots[kk], ll, roots);
			for(jj=0;jj<np;jj++){
				for(ii=0;ii<np;ii++){
					idx = ee*np*np+jj*np+ii;
					// mass matrix mmat[ne][np][np] in 1d index representation
					mmat[idx] += jac*weights[kk]*ll[ii]*ll[jj];
				}
			}
		}
	}

	// stiffness matrix
	for(ii=0;ii<np*np;ii++){
		smat[ii] = 0;
	}
	for(kk=0;kk<np;kk++){
		lagrange(roots[kk], ll, roots);
		lagrange_deriv(roots[kk], dl, roots);
		for(jj=0;jj<np;jj++){
			for(ii=0;ii<np;ii++){
				idx = jj*np+ii;
				// stiffness matrix smat[np][np] in 1d index representation
				smat[idx] += weights[kk]*ll[jj]*dl[ii];
			}
		}
	}

	// face integration
	for(ii=0;ii<np*2;ii++){
		mf[ii] = 0;
	}
	lagrange(-1,mf,   roots); // mf[ii] for(ii=0, ii<np,ii++) represents element left face integration
	lagrange( 1,mf+np,roots); // mf[ii] for ii=np, ii<2*np, ii++) reresents element right face integration

	// boundary interpolation
	for(ii=0;ii<np*2;ii++){
		ib[ii] = 0;
	}
	lagrange(-1,ib,   roots); // element left edge interpolation
	lagrange( 1,ib+np,roots); // element right edge interpolation

	
	// divergence operators
	for(ii=0;ii<ne*np*np;ii++){
		dv[ii] = 0;
	}
	for(ii=0;ii<ne*np*2;ii++){
		dv[ii] = 0;
	}

	for(ee=0;ee<ne;ee++){
		for(jj=0;jj<np;jj++){
			// it turn out that mmat is diagonal. i.e., ii != jj, mmat[ee][jj][ii] = 0
			// the inverse of mmat is just the inverse of the diagonal components
			// here, we are extracting the inverse diagonal components only
			minv_vec[ee*np+jj] = 1./mmat[ee*np*np+jj*np+jj];
		}
		for(jj=0;jj<np;jj++){
			for(ii=0;ii<np;ii++){
				dv[ee*np*np+jj*np+ii] = minv_vec[ee*np+ii]*smat[jj*np+ii];
			}
		}
		for(jj=0;jj<2;jj++){
			for(ii=0;ii<np;ii++){
				df[ee*np*2+jj*np+ii]  = minv_vec[ee*np+ii]*mf[jj*np+ii];
			}
		}

	}
	
	// initialize qq field
	initialize(qq, xx, xmax, xmin, init_type);
	cfl = 1./(np*np);
	dt = cfl * min_dx / fabs(speed);
	rtime = 0.;
	nstep = 0;

	printf("Start Time Integration\n");

	// Runge-Kutta 4th order Time integration loop
	
	t_sta = clock();

	while(rtime < tend){
		dt = fmin(dt, tend-rtime);

		rhs(qq,	   k1, dv, df, ib, speed);

		for(ii=0;ii<ne*np;ii++)
			qtemp[ii] = qq[ii]+0.5*dt*k1[ii];
		rhs(qtemp, k2, dv, df, ib, speed);

		for(ii=0;ii<ne*np;ii++)
			qtemp[ii] = qq[ii]+0.5*dt*k2[ii];
		rhs(qtemp, k3, dv, df, ib, speed);
		
		for(ii=0;ii<ne*np;ii++)
			qtemp[ii] = qq[ii]+dt*k3[ii];
		rhs(qtemp, k4, dv, df, ib, speed);

		for(ii=0;ii<ne*np;ii++)
			qq[ii] += 1./6.*dt*(k1[ii]+2*k2[ii]+2*k3[ii]+k4[ii]);

		rtime += dt;
		nstep += 1;
		if(nstep%10000 == 0) 
			printf("nstep = %10ld, %5.1f%% complete\n", nstep, rtime/tend*100);
	}

	// timeloop ends here;

	printf("Integration complete\n");

	if(ne > 200){
		eres = 2;
	}
	else if (ne > 60){
		eres = 3;
	}
	else if (ne > 30){
		eres = 6;
	}
	else {
		eres = 10;
	}

	// final report
	printf("-----------------------------------------------\n");
	printf("code type   : c serial\n");
	printf("Final time  : %13.5e\n", rtime);
	printf("CFL         : %13.5e\n", cfl);
	printf("DOF         : %13d\n", ne*np);
	printf("No. of Elem : %13d\n", ne);
	printf("Order       : %13d\n", np);
	printf("eres        : %13d\n", eres);
	printf("time steps  : %13ld\n", nstep);
	printf("-----------------------------------------------\n");

	save_field(xx, qq, ne, roots, eres);

	t_end = clock();
	printf("Motion time = %f msec\n", (double)(t_end - t_sta)/1000.0);

	free(roots);   
	free(weights); 
	free(ll);      
	free(dl);      
	free(dx);      
	free(mesh);    
	free(smat);	
	free(xx);		
	free(qq);		
	free(qtemp);	
	free(k1);		
	free(k2);		
	free(k3);		
	free(k4);		
	free(minv_vec);
	free(mmat);	
	free(dv);		
	free(mf);		
	free(ib);		
	free(fstar);	
	free(df);		

	return 0;
}
    void PolynomialFit4<Real>::DoLeastSquaresFit ( int numSamples,
            Real* trgSamples[4] )
    {
        // The matrix and vector for a linear system that determines the
        // coefficients of the fitted polynomial.
        GMatrix<Real> mat( mNumPowers, mNumPowers ); // initially zero
        GVector<Real> rhs( mNumPowers ); // initially zero
        mCoefficients = new1<Real>( mNumPowers );

        int row, col;
        for ( int i = 0; i < numSamples; ++i )
        {
            // Compute relevant powers of x and y.
            Real x = trgSamples[0][i];
            Real y = trgSamples[1][i];
            Real z = trgSamples[2][i];
            Real w = trgSamples[3][i];
            int j;
            for ( j = 1; j <= 2 * mMaxXPower; ++j )
            {
                mXPowers[j] = mXPowers[j - 1] * x;
            }
            for ( j = 1; j <= 2 * mMaxYPower; ++j )
            {
                mYPowers[j] = mYPowers[j - 1] * y;
            }
            for ( j = 1; j <= 2 * mMaxZPower; ++j )
            {
                mZPowers[j] = mZPowers[j - 1] * z;
            }

            for ( row = 0; row < mNumPowers; ++row )
            {
                // Update the upper-triangular portion of the symmetric matrix.
                Real xp, yp, zp;
                for ( col = row; col < mNumPowers; ++col )
                {
                    xp = mXPowers[mPowers[row][0] + mPowers[col][0]];
                    yp = mYPowers[mPowers[row][1] + mPowers[col][1]];
                    zp = mYPowers[mPowers[row][2] + mPowers[col][2]];
                    mat[row][col] += xp * yp * zp;
                }

                // Update the right-hand side of the system.
                xp = mXPowers[mPowers[row][0]];
                yp = mYPowers[mPowers[row][1]];
                zp = mYPowers[mPowers[row][2]];
                rhs[row] += xp * yp * zp * w;
            }
        }

        // Copy the upper-triangular portion of the symmetric matrix to the
        // lower-triangular portion.
        for ( row = 0; row < mNumPowers; ++row )
        {
            for ( col = 0; col < row; ++col )
            {
                mat[row][col] = mat[col][row];
            }
        }

        // Precondition by normalizing the sums.
        Real invNumSamples = ( ( Real )1 ) / ( Real )numSamples;
        for ( row = 0; row < mNumPowers; ++row )
        {
            for ( col = 0; col < mNumPowers; ++col )
            {
                mat[row][col] *= invNumSamples;
            }
            rhs[row] *= invNumSamples;
        }

        if ( LinearSystem<Real>().Solve( mat, rhs, mCoefficients ) )
        {
            mSolved = true;
        }
        else
        {
            memset( mCoefficients, 0, mNumPowers * sizeof( Real ) );
            mSolved = false;
        }
    }
    void NaturalSpline1<Real>::CreatePeriodicSpline ()
    {
        mB = new1<Real>( mNumSegments );
        mC = new1<Real>( mNumSegments );
        mD = new1<Real>( mNumSegments );

#if 1
        // Solving the system using a standard linear solver appears to be
        // numerically stable.
        const int size = 4 * mNumSegments;
        GMatrix<Real> mat( size, size );
        GVector<Real> rhs( size );
        int i, j, k;
        Real delta, delta2, delta3;
        for ( i = 0, j = 0; i < mNumSegments - 1; ++i, j += 4 )
        {
            delta = mTimes[i + 1] - mTimes[i];
            delta2 = delta * delta;
            delta3 = delta * delta2;

            mat[j + 0][j + 0] = ( Real )1;
            mat[j + 0][j + 1] = ( Real )0;
            mat[j + 0][j + 2] = ( Real )0;
            mat[j + 0][j + 3] = ( Real )0;
            mat[j + 1][j + 0] = ( Real )1;
            mat[j + 1][j + 1] = delta;
            mat[j + 1][j + 2] = delta2;
            mat[j + 1][j + 3] = delta3;
            mat[j + 2][j + 0] = ( Real )0;
            mat[j + 2][j + 1] = ( Real )1;
            mat[j + 2][j + 2] = ( ( Real )2 ) * delta;
            mat[j + 2][j + 3] = ( ( Real )3 ) * delta2;
            mat[j + 3][j + 0] = ( Real )0;
            mat[j + 3][j + 1] = ( Real )0;
            mat[j + 3][j + 2] = ( Real )1;
            mat[j + 3][j + 3] = ( ( Real )3 ) * delta;

            k = j + 4;
            mat[j + 0][k + 0] = ( Real )0;
            mat[j + 0][k + 1] = ( Real )0;
            mat[j + 0][k + 2] = ( Real )0;
            mat[j + 0][k + 3] = ( Real )0;
            mat[j + 1][k + 0] = ( Real ) - 1;
            mat[j + 1][k + 1] = ( Real )0;
            mat[j + 1][k + 2] = ( Real )0;
            mat[j + 1][k + 3] = ( Real )0;
            mat[j + 2][k + 0] = ( Real )0;
            mat[j + 2][k + 1] = ( Real ) - 1;
            mat[j + 2][k + 2] = ( Real )0;
            mat[j + 2][k + 3] = ( Real )0;
            mat[j + 3][k + 0] = ( Real )0;
            mat[j + 3][k + 1] = ( Real )0;
            mat[j + 3][k + 2] = ( Real ) - 1;
            mat[j + 3][k + 3] = ( Real )0;
        }

        delta = mTimes[i + 1] - mTimes[i];
        delta2 = delta * delta;
        delta3 = delta * delta2;

        mat[j + 0][j + 0] = ( Real )1;
        mat[j + 0][j + 1] = ( Real )0;
        mat[j + 0][j + 2] = ( Real )0;
        mat[j + 0][j + 3] = ( Real )0;
        mat[j + 1][j + 0] = ( Real )1;
        mat[j + 1][j + 1] = delta;
        mat[j + 1][j + 2] = delta2;
        mat[j + 1][j + 3] = delta3;
        mat[j + 2][j + 0] = ( Real )0;
        mat[j + 2][j + 1] = ( Real )1;
        mat[j + 2][j + 2] = ( ( Real )2 ) * delta;
        mat[j + 2][j + 3] = ( ( Real )3 ) * delta2;
        mat[j + 3][j + 0] = ( Real )0;
        mat[j + 3][j + 1] = ( Real )0;
        mat[j + 3][j + 2] = ( Real )1;
        mat[j + 3][j + 3] = ( ( Real )3 ) * delta;

        k = 0;
        mat[j + 0][k + 0] = ( Real )0;
        mat[j + 0][k + 1] = ( Real )0;
        mat[j + 0][k + 2] = ( Real )0;
        mat[j + 0][k + 3] = ( Real )0;
        mat[j + 1][k + 0] = ( Real ) - 1;
        mat[j + 1][k + 1] = ( Real )0;
        mat[j + 1][k + 2] = ( Real )0;
        mat[j + 1][k + 3] = ( Real )0;
        mat[j + 2][k + 0] = ( Real )0;
        mat[j + 2][k + 1] = ( Real ) - 1;
        mat[j + 2][k + 2] = ( Real )0;
        mat[j + 2][k + 3] = ( Real )0;
        mat[j + 3][k + 0] = ( Real )0;
        mat[j + 3][k + 1] = ( Real )0;
        mat[j + 3][k + 2] = ( Real ) - 1;
        mat[j + 3][k + 3] = ( Real )0;

        for ( i = 0, j = 0; i < mNumSegments; ++i, j += 4 )
        {
            rhs[j + 0] = mA[i];
            rhs[j + 1] = ( Real )0;
            rhs[j + 2] = ( Real )0;
            rhs[j + 3] = ( Real )0;
        }

        GVector<Real> coeff( size );
        bool solved = LinearSystem<Real>().Solve( mat, rhs, coeff );
        assertion( solved, "Failed to solve linear system\n" );
        WM5_UNUSED( solved );

        for ( i = 0, j = 0; i < mNumSegments; ++i )
        {
            j++;
            mB[i] = coeff[j++];
            mC[i] = coeff[j++];
            mD[i] = coeff[j++];
        }
#endif

#if 0
        // Solving the system using the equations derived in the PDF
        // "Fitting a Natural Spline to Samples of the Form (t,f(t))"
        // is ill-conditioned.  TODO: Find a way to row-reduce the matrix of the
        // PDF in a numerically stable manner yet retaining the O(n) asymptotic
        // behavior.

        // Compute the inverses M[i]^{-1}.
        const int numSegmentsM1 = mNumSegments - 1;
        Matrix4<Real>* invM = new1<Matrix4<Real> >( numSegmentsM1 );

        Real delta;
        int i;
        for ( i = 0; i < numSegmentsM1; i++ )
        {
            delta = mTimes[i + 1] - mTimes[i];
            Real invDelta1 = ( ( Real )1 ) / delta;
            Real invDelta2 = invDelta1 / delta;
            Real invDelta3 = invDelta2 / delta;

            Matrix4<Real>& invMi = invM[i];
            invMi[0][0] = ( Real )1;
            invMi[0][1] = ( Real )0;
            invMi[0][2] = ( Real )0;
            invMi[0][3] = ( Real )0;
            invMi[1][0] = ( ( Real )( -3 ) ) * invDelta1;
            invMi[1][1] = ( ( Real )3 ) * invDelta1;
            invMi[1][2] = ( Real )( -2 );
            invMi[1][3] = delta;
            invMi[2][0] = ( ( Real )3 ) * invDelta2;
            invMi[2][1] = ( ( Real )( -3 ) ) * invDelta2;
            invMi[2][2] = ( ( Real )3 ) * invDelta1;
            invMi[2][3] = ( Real )( -2 );
            invMi[3][0] = -invDelta3;
            invMi[3][1] = invDelta3;
            invMi[3][2] = -invDelta2;
            invMi[3][3] = invDelta1;
        }

        // Matrix M[n-1].
        delta = mTimes[i + 1] - mTimes[i];
        Real delta2 = delta * delta;
        Real delta3 = delta2 * delta;
        Matrix4<Real> lastM
        (
            ( Real )1, ( Real )0, ( Real )0, ( Real )0,
            ( Real )1, delta, delta2, delta3,
            ( Real )0, ( Real )1, ( ( Real )2 )*delta, ( ( Real )3 )*delta2,
            ( Real )0, ( Real )0, ( Real )1, ( ( Real )3 )*delta
        );

        // Matrix L.
        Matrix4<Real> LMat
        (
            ( Real )0, ( Real )0, ( Real )0, ( Real )0,
            ( Real )1, ( Real )0, ( Real )0, ( Real )0,
            ( Real )0, ( Real )1, ( Real )0, ( Real )0,
            ( Real )0, ( Real )0, ( Real )1, ( Real )0
        );

        // Vector U.
        Vector<4, Real> U( ( Real )1, ( Real )0, ( Real )0, ( Real )0 );

        // Initialize P = L and Q = f[n-2]*U.
        Matrix4<Real> P = LMat;

        const int numSegmentsM2 = mNumSegments - 2;
        Vector<4, Real> Q = mA[numSegmentsM2] * U;

        // Compute P and Q.
        for ( i = numSegmentsM2; i >= 0; --i )
        {
            // Matrix L*M[i]^{-1}.
            Matrix4<Real> LMInv = LMat * invM[i];

            // Update P.
            P = LMInv * P;

            // Update Q.
            if ( i > 0 )
            {
                Q = mA[i - 1] * U + LMInv * Q;
            }
            else
            {
                Q = mA[numSegmentsM1] * U + LMInv * Q;
            }
        }

        // Final update of P.
        P = lastM - P;

        // Compute P^{-1}.
        Matrix4<Real> invP = P.Inverse();

        // Compute K[n-1].
        Vector<4, Real> coeff = invP * Q;
        mB[numSegmentsM1] = coeff[1];
        mC[numSegmentsM1] = coeff[2];
        mD[numSegmentsM1] = coeff[3];

        // Back substitution for the other K[i].
        for ( i = numSegmentsM2; i >= 0; i-- )
        {
            coeff = invM[i] * ( mA[i] * U + LMat * coeff );
            mB[i] = coeff[1];
            mC[i] = coeff[2];
            mD[i] = coeff[3];
        }

        delete1( invM );
#endif
    }
vector<Grid> Planner::dStarLite(Map &map)
{
    // this is the implementation of the D star lite algorithm
    
    vector<Grid> wayPoint;

    
    // Initialization
    vector<vector<int> > g(map._rows,vector<int>(map._cols,map._rows*map._cols+1));
    vector<vector<int> > rhs(map._rows,vector<int>(map._cols,map._rows*map._cols+1));
    km =0;
    rhs[map.goal.y][map.goal.x] =0;
    Key n = calculateKey(map.goal,g,rhs,map.start,0);
    U.push_back({map.goal,n});
    
    
    // Computer current shortest path
    // Update the states of the map if the first element in the priority list can be updated or the goal is not reached
    while(U.front().key<calculateKey(map.goal,g,rhs,map.start,0)||(rhs[map.start.y][map.start.x]!=g[map.start.y][map.start.x]))
          {
              // take the key value and postion of the first element in the priority queue
              Key kold = U.front().key;
              Grid u = U.front().point;
              
              U.pop_front();
              Key knew = calculateKey(u,g,rhs,map.start,km); // calculate the new key value
              
              // update map if old value is different from the new value
              if (kold<knew)
              {
                  // if the new key is larger, the cost of the edge of the grid might be change
                  // the current grid should be updated and re-expanded
                  // insert it in the priority queue
                  auto pt =find_if(U.begin(),U.end(),[knew](Uelem &u){return u.key<knew;});
                  U.insert(pt,{u,knew});
              }
              else if (g[u.y][u.x]>rhs[u.y][u.x])
              {
                  // if the grid is overconstraint, there are new shorter paths detected
                  g[u.y][u.x] = rhs[u.y][u.x];
                  
                  // update all its neighbour value
                  vector<Grid> neightbour = findNeighbour(map, u, 8);
                  for(auto &n:neightbour)
                  {
                      updateVertex(U,n,g,rhs,map,km);
                  }
              }
              else
              {
                  // if the grid is underconstraint, the grid it self and its neightbour should all be updated
                  g[u.y][u.x] = map._cols*map._rows;
                  vector<Grid> neightbour = findNeighbour(map, u, 8);
                  for(auto &n:neightbour)
                  {
                      updateVertex(U,n,g,rhs,map,km);
                  }
                  updateVertex(U,u,g,rhs,map,km);
              }
              
                  
              
              
              
          }
    
    return wayPoint;
}
Beispiel #6
0
void check_params( struct user_parameters* params, int matrix_size, 
                   int block_size, double dx, double dy, double *f_,
                   int niter, double *u_, double *unew_) 
{
    double x, y;
    int i, j;
    double *udiff_ =(double*)malloc(matrix_size * matrix_size * sizeof(double));
    double (*udiff)[matrix_size][matrix_size] = (double (*)[matrix_size][matrix_size])udiff_;
    double (*unew)[matrix_size][matrix_size] = (double (*)[matrix_size][matrix_size])unew_;
    double (*u)[matrix_size][matrix_size] = (double (*)[matrix_size][matrix_size])u_;
    double (*f)[matrix_size][matrix_size] = (double (*)[matrix_size][matrix_size])f_;

    // Check for convergence.
    for (j = 0; j < matrix_size; j++) {
        y = (double) (j) / (double) (matrix_size - 1);
        for (i = 0; i < matrix_size; i++) {
            x = (double) (i) / (double) (matrix_size - 1);
            (*udiff)[i][j] = (*unew)[i][j] - u_exact(x, y);
            if( (*udiff)[i][j] > 1.0E-6 ) { 
                printf("error: %d, %d: %f\n", i, j, (*udiff)[i][j]);
            }
        }
    }
    double error = r8mat_rms(matrix_size, matrix_size, udiff_);

    double error1;
    // Set the right hand side array F.
    rhs(matrix_size, matrix_size, f_, block_size);

    for (j = 0; j < matrix_size; j++) {
        for (i = 0; i < matrix_size; i++) {
            if (i == 0 || i == matrix_size - 1 || j == 0 || j == matrix_size - 1) {
                (*unew)[i][j] = (*f)[i][j];
                (*u)[i][j] = (*f)[i][j];
            } else {
                (*unew)[i][j] = 0.0;
                (*u)[i][j] = 0.0;
            }
        }
    }

    sweep_seq(matrix_size, matrix_size, dx, dy, f_, 0, niter, u_, unew_);

    // Check for convergence.
    for (j = 0; j < matrix_size; j++) {
        y = (double) (j) / (double) (matrix_size - 1);
        for (i = 0; i < matrix_size; i++) {
            x = (double) (i) / (double) (matrix_size - 1);
            (*udiff)[i][j] = (*unew)[i][j] - u_exact(x, y);
            if( (*udiff)[i][j] > 1.0E-6 ) { 
                printf("error: %d, %d: %f\n", i, j, (*udiff)[i][j]);
            }
        }
    }
    error1 = r8mat_rms(matrix_size, matrix_size, udiff_);
    params->succeed = fabs(error - error1) < 1.0E-6;
    if(!params->succeed) {
        printf("error = %f, error1 = %f\n", error, error1);
    }
    free(udiff_);
}
Beispiel #7
0
double aug (cholmod_sparse *A)
{
    double r, maxerr = 0, bnorm, anorm ;
    cholmod_sparse *S, *Im, *In, *At, *A1, *A2, *Sup ;
    cholmod_dense *Alpha, *B, *Baug, *X, *W1, *W2, *R, *X2, X2mat ;
    cholmod_factor *L ;
    double *b, *baug, *rx, *w, *x ;
    Int nrow, ncol, nrhs, i, j, d, d2, save, save2, save3 ;

    if (A == NULL)
    {
	ERROR (CHOLMOD_INVALID, "cm: no A for aug") ;
	return (1) ;
    }

    if (A->xtype != CHOLMOD_REAL)
    {
	return (0) ;
    }

    /* ---------------------------------------------------------------------- */
    /* A is m-by-n, B must be m-by-nrhs */
    /* ---------------------------------------------------------------------- */

    nrow = A->nrow ;
    ncol = A->ncol ;
    B = rhs (A, 5, A->nrow + 7) ;

    /* ---------------------------------------------------------------------- */
    /* create scalars */
    /* ---------------------------------------------------------------------- */

    bnorm = CHOLMOD(norm_dense) (B, 0, cm) ;
    anorm = CHOLMOD(norm_sparse) (A, 1, cm) ;

    Alpha = CHOLMOD(eye) (1, 1, CHOLMOD_REAL, cm) ;
    if (Alpha != NULL)
    {
	((double *) (Alpha->x)) [0] = anorm ;
    }

    CHOLMOD(print_dense) (M1, "MinusOne", cm) ;
    CHOLMOD(print_dense) (Alpha, "Alpha = norm(A)", cm) ;

    /* ---------------------------------------------------------------------- */
    /* create augmented system, S = [-I A' ; A anorm*I] */
    /* ---------------------------------------------------------------------- */

    Im = CHOLMOD(speye) (nrow, nrow, CHOLMOD_REAL, cm) ;
    In = CHOLMOD(speye) (ncol, ncol, CHOLMOD_REAL, cm) ;
    CHOLMOD(scale) (Alpha, CHOLMOD_SCALAR, Im, cm) ;
    CHOLMOD(scale) (M1, CHOLMOD_SCALAR, In, cm) ;
    At = CHOLMOD(transpose) (A, 2, cm) ;

    /* use one of two equivalent methods */
    if (nrow % 2)
    {
	/* S = [[-In A'] ; [A alpha*Im]] */
	A1 = CHOLMOD(horzcat) (In, At, TRUE, cm) ;
	A2 = CHOLMOD(horzcat) (A,  Im, TRUE, cm) ;
	S = CHOLMOD(vertcat) (A1, A2, TRUE, cm) ;
    }
    else
    {
	/* S = [[-In ; A] [A' ; alpha*Im]] */
	A1 = CHOLMOD(vertcat) (In, A, TRUE, cm) ;
	A2 = CHOLMOD(vertcat) (At, Im, TRUE, cm) ;
	S = CHOLMOD(horzcat) (A1, A2, TRUE, cm) ;
    }

    CHOLMOD(free_sparse) (&Im, cm) ;
    CHOLMOD(free_sparse) (&In, cm) ;

    CHOLMOD(print_sparse) (S, "S, augmented system", cm) ;

    /* make a symmetric (upper) copy of S */
    Sup = CHOLMOD(copy) (S, 1, 1, cm) ;

    CHOLMOD(print_sparse) (S, "S, augmented system (upper)", cm) ;
    CHOLMOD(print_sparse) (Sup, "Sup", cm) ;

    /* ---------------------------------------------------------------------- */
    /* create augmented right-hand-side, Baug = [ zeros(ncol,nrhs) ; B ] */
    /* ---------------------------------------------------------------------- */

    b = NULL ;
    d = 0 ;
    nrhs = 0 ;
    d2 = 0 ;
    if (B != NULL)
    {
	nrhs = B->ncol ;
	d = B->d ;
	b = B->x ;
	Baug = CHOLMOD(zeros) (nrow+ncol, nrhs, CHOLMOD_REAL, cm) ;
	if (Baug != NULL)
	{
	    d2 = Baug->d ;
	    baug = Baug->x ;
	    for (j = 0 ; j < nrhs ; j++)
	    {
		for (i = 0 ; i < nrow ; i++)
		{
		    baug [(i+ncol)+j*d2] = b [i+j*d] ;
		}
	    }
	}
    }
    else
    {
	Baug = NULL ;
    }

    /* ---------------------------------------------------------------------- */
    /* solve Sx=baug */
    /* ---------------------------------------------------------------------- */

    /* S is symmetric indefinite, so do not use a supernodal LL' */
    save = cm->supernodal ;
    save2 = cm->final_asis ;
    cm->supernodal = CHOLMOD_SIMPLICIAL ;
    cm->final_asis = TRUE ;
    save3 = cm->metis_memory ;
    cm->metis_memory = 2.0 ;
    L = CHOLMOD(analyze) (Sup, cm) ;
    CHOLMOD(factorize) (Sup, L, cm) ;
    X = CHOLMOD(solve) (CHOLMOD_A, L, Baug, cm) ;
    cm->supernodal = save ;
    cm->final_asis = save2 ;
    cm->metis_memory = save3 ;

    /* ---------------------------------------------------------------------- */
    /* compute the residual */
    /* ---------------------------------------------------------------------- */

    r = resid (Sup, X, Baug) ;
    MAXERR (maxerr, r, 1) ;

    /* ---------------------------------------------------------------------- */
    /* create a shallow submatrix of X, X2 = X (ncol:end, :)  */
    /* ---------------------------------------------------------------------- */

    if (X == NULL)
    {
	X2 = NULL ;
    }
    else
    {
	X2 = &X2mat ;
	X2->nrow = nrow ; 
	X2->ncol = nrhs ; 
	X2->nzmax = X->nzmax ;
	X2->d = X->d ;
	X2->x = ((double *) X->x) + ncol ;
	X2->z = NULL ;
	X2->xtype = X->xtype ;
	X2->dtype = X->dtype ;
    }

    CHOLMOD(print_dense) (X, "X", cm) ;
    CHOLMOD(print_dense) (X2, "X2 = X (ncol:end,:)", cm) ;

    /* ---------------------------------------------------------------------- */
    /* compute norm ((alpha*I + A*A')*x-b) */
    /* ---------------------------------------------------------------------- */

    /* W1 = A'*X2 */
    W1 = CHOLMOD(zeros) (ncol, nrhs, CHOLMOD_REAL, cm) ;
    CHOLMOD(sdmult) (A, TRUE, one, zero, X2, W1, cm) ;

    /* W2 = A*W1 */
    W2 = CHOLMOD(zeros) (nrow, nrhs, CHOLMOD_REAL, cm) ;
    CHOLMOD(sdmult) (A, FALSE, one, zero, W1, W2, cm) ;

    /* R = alpha*x + w2 - b */
    R = CHOLMOD(zeros) (nrow, nrhs, CHOLMOD_REAL, cm) ;

    if (R != NULL && W2 != NULL && X != NULL)
    {
	w = W2->x ;
	rx = R->x ;
	x = X2->x ;
	for (j = 0 ; j < nrhs ; j++)
	{
	    for (i = 0 ; i < nrow ; i++)
	    {
		rx [i+j*nrow] = anorm * x [i+j*d2] + w [i+j*nrow] - b [i+j*d] ;
	    }
	}
    }

    r = CHOLMOD(norm_dense) (R, 1, cm) ;
    MAXERR (maxerr, r, bnorm) ;

    /* ---------------------------------------------------------------------- */
    /* free everything */
    /* ---------------------------------------------------------------------- */

    CHOLMOD(free_sparse) (&At, cm) ;
    CHOLMOD(free_sparse) (&A1, cm) ;
    CHOLMOD(free_sparse) (&A2, cm) ;
    CHOLMOD(free_sparse) (&S, cm) ;
    CHOLMOD(free_sparse) (&Sup, cm) ;
    CHOLMOD(free_factor) (&L, cm) ;
    CHOLMOD(free_dense) (&R, cm) ;
    CHOLMOD(free_dense) (&W1, cm) ;
    CHOLMOD(free_dense) (&W2, cm) ;
    CHOLMOD(free_dense) (&B, cm) ;
    CHOLMOD(free_dense) (&Baug, cm) ;
    CHOLMOD(free_dense) (&X, cm) ;
    CHOLMOD(free_dense) (&Alpha, cm) ;

    progress (0, '.') ;
    return (maxerr) ;
}
// This routine simply glues together many of the routines that are already
// written in the Poisson solver library
//
// phi( 1:SubNumPhysNodes       ) is a scalar quantity.  
//
// E1 ( 1:NumElems, 1:kmax2d ) is a vector quantity.
// E2 ( 1:NumElems, 1:kmax2d ) is a vector quantity.
//
// See also: ConvertEfieldOntoDGbasis
void ComputeElectricField( const double t, const mesh& Mesh, const dTensorBC5& q,
    dTensor2& E1, dTensor2& E2)
{

    //
    const int       mx = q.getsize(1);   assert_eq(mx,dogParamsCart2.get_mx());
    const int       my = q.getsize(2);   assert_eq(my,dogParamsCart2.get_my());
    const int NumElems = q.getsize(3);
    const int     meqn = q.getsize(4);
    const int     kmax = q.getsize(5);

    const int space_order = dogParams.get_space_order();

    // unstructured parameters:
    const int kmax2d    = E2.getsize(2);
    const int NumBndNodes  = Mesh.get_NumBndNodes();
    const int NumPhysNodes = Mesh.get_NumPhysNodes();

    // Quick error check
    if( !Mesh.get_is_submesh() )
    {
        printf("ERROR: mesh needs to have subfactor set to %d\n", space_order);
        printf("Go to Unstructured mesh and remesh the problem\n");
        exit(-1);
    }
    const int SubFactor    = Mesh.get_SubFactor();

    assert_eq( NumElems, Mesh.get_NumElems() );

    // -- Step 1: Compute rho -- //
    dTensor3 rho(NumElems, 1, kmax2d );
    void ComputeDensity( const mesh& Mesh, const dTensorBC5& q, dTensor3& rho );
    ComputeDensity( Mesh, q, rho );

    // -- Step 2: Figure out how large phi needs to be
    int SubNumPhysNodes = 0;
    int SubNumBndNodes  = 0;
    switch( dogParams.get_space_order() )
    {
        case 1:
            SubNumPhysNodes = NumPhysNodes;
            SubNumBndNodes  = NumBndNodes;
            break;

        case 2:
            SubNumPhysNodes = Mesh.get_SubNumPhysNodes();
            SubNumBndNodes  = Mesh.get_SubNumBndNodes();
            if(SubFactor!=2)
            {
                printf("\n");
                printf(" Error: for space_order = %i, need SubFactor = %i\n",space_order,2);
                printf("      SubFactor = %i\n",SubFactor);
                printf("\n");
                exit(1);
            }
            break;

        case 3:
            SubNumPhysNodes = Mesh.get_SubNumPhysNodes();
            SubNumBndNodes  = Mesh.get_SubNumBndNodes();
            if(SubFactor!=3)
            {
                printf("\n");
                printf(" Error: for space_order = %i, need SubFactor = %i\n",space_order,3);
                printf("      SubFactor = %i\n",SubFactor);
                printf("\n");
                exit(1);
            }
            break;

        default:
            printf("\n");
            printf(" ERROR in RunDogpack_unst.cpp: space_order value not supported.\n");
            printf("       space_order = %i\n",space_order);
            printf("\n");
            exit(1);
    }

    // local storage:
    dTensor1 rhs(SubNumPhysNodes);
    dTensor1 phi(SubNumPhysNodes);

    // Get Cholesky factorization matrix R
    //
    // TODO - this should be saved earlier in the code rather than reading
    // from file every time we with to run a Poisson solve!
    //
    SparseCholesky R(SubNumPhysNodes);
    string outputdir = dogParams.get_outputdir();
    R.init(outputdir);
    R.read(outputdir);

    // Create right-hand side vector
    void Rhs2D_unst(const int space_order,
            const mesh& Mesh, const dTensor3& rhs_dg,
            dTensor1& rhs);
    Rhs2D_unst(space_order, Mesh, rho, rhs);

    // Call Poisson solver  
    void PoissonSolver2D_unst(const int space_order,
            const mesh& Mesh,
            const SparseCholesky& R,
            const dTensor1& rhs,
            dTensor1& phi,
            dTensor2& E1,
            dTensor2& E2);
    PoissonSolver2D_unst(space_order, Mesh, R, rhs, phi, E1, E2);

    // Compare errors with the exact Electric field:
    //
    void L2Project_Unst(
        const double time,
        const dTensor2* vel_vec,
        const int istart, 
        const int iend, 
        const int QuadOrder, 
        const int BasisOrder_qin,
        const int BasisOrder_auxin,
        const int BasisOrder_fout,
        const mesh& Mesh, 
        const dTensor3* qin, 
        const dTensor3* auxin, 
        dTensor3* fout, 
        void (*Func)(const double t, const dTensor2* vel_vec, const dTensor2&,const dTensor2&,
            const dTensor2&,dTensor2&));

    const int sorder = dogParams.get_space_order();
    dTensor3 qtmp   (NumElems, 2, kmax2d );  qtmp.setall(0.);
    dTensor3 auxtmp (NumElems, 0, kmax2d );
    dTensor3 ExactE (NumElems, 2, kmax2d );
    L2Project_Unst( t, NULL, 1, NumElems, 
        sorder, sorder, sorder, sorder, Mesh, 
        &qtmp, &auxtmp, &ExactE, 
        &ExactElectricField );

    // Compute errors on these two:
    //
    double err = 0.;
    for( int n=1; n <= NumElems; n++ )
    for( int k=1; k <= kmax2d;   k++ )
    {
        err += Mesh.get_area_prim(n)*pow( ExactE.get(n,1,k) - E1.get(n,k), 2 );
        err += Mesh.get_area_prim(n)*pow( ExactE.get(n,2,k) - E2.get(n,k), 2 );
    }
    printf("error = %2.15e\n", err );

}
Beispiel #9
0
/* Cholesky update/downdate */
cs_long_t demo3 (problem *Prob)
{
    cs_cl *A, *C, *W = NULL, *WW, *WT, *E = NULL, *W2 ;
    cs_long_t n, k, *Li, *Lp, *Wi, *Wp, p1, p2, *p = NULL, ok ;
    cs_complex_t *b, *x, *resid, *y = NULL, *Lx, *Wx, s ;
    double t, t1 ;
    cs_cls *S = NULL ;
    cs_cln *N = NULL ;
    if (!Prob || !Prob->sym || Prob->A->n == 0) return (0) ;
    A = Prob->A ; C = Prob->C ; b = Prob->b ; x = Prob->x ; resid = Prob->resid;
    n = A->n ;
    if (!Prob->sym || n == 0) return (1) ;
    rhs (x, b, n) ;                             /* compute right-hand side */
    printf ("\nchol then update/downdate ") ;
    print_order (1) ;
    y = cs_cl_malloc (n, sizeof (cs_complex_t)) ;
    t = tic () ;
    S = cs_cl_schol (1, C) ;                       /* symbolic Chol, amd(A+A') */
    printf ("\nsymbolic chol time %8.2f\n", toc (t)) ;
    t = tic () ;
    N = cs_cl_chol (C, S) ;                        /* numeric Cholesky */
    printf ("numeric  chol time %8.2f\n", toc (t)) ;
    if (!S || !N || !y) return (done3 (0, S, N, y, W, E, p)) ;
    t = tic () ;
    cs_cl_ipvec (S->pinv, b, y, n) ;               /* y = P*b */
    cs_cl_lsolve (N->L, y) ;                       /* y = L\y */
    cs_cl_ltsolve (N->L, y) ;                      /* y = L'\y */
    cs_cl_pvec (S->pinv, y, x, n) ;                /* x = P'*y */
    printf ("solve    chol time %8.2f\n", toc (t)) ;
    printf ("original: ") ;
    print_resid (1, C, x, b, resid) ;           /* print residual */
    k = n/2 ;                                   /* construct W  */
    W = cs_cl_spalloc (n, 1, n, 1, 0) ;
    if (!W) return (done3 (0, S, N, y, W, E, p)) ;
    Lp = N->L->p ; Li = N->L->i ; Lx = N->L->x ;
    Wp = W->p ; Wi = W->i ; Wx = W->x ;
    Wp [0] = 0 ;
    p1 = Lp [k] ;
    Wp [1] = Lp [k+1] - p1 ;
    s = Lx [p1] ;
    srand (1) ;
    for ( ; p1 < Lp [k+1] ; p1++)
    {
        p2 = p1 - Lp [k] ;
        Wi [p2] = Li [p1] ;
        Wx [p2] = s * rand () / ((double) RAND_MAX) ;
    }
    t = tic () ;
    ok = cs_cl_updown (N->L, +1, W, S->parent) ;   /* update: L*L'+W*W' */
    t1 = toc (t) ;
    printf ("update:   time: %8.2f\n", t1) ;
    if (!ok) return (done3 (0, S, N, y, W, E, p)) ;
    t = tic () ;
    cs_cl_ipvec (S->pinv, b, y, n) ;               /* y = P*b */
    cs_cl_lsolve (N->L, y) ;                       /* y = L\y */
    cs_cl_ltsolve (N->L, y) ;                      /* y = L'\y */
    cs_cl_pvec (S->pinv, y, x, n) ;                /* x = P'*y */
    t = toc (t) ;
    p = cs_cl_pinv (S->pinv, n) ;
    W2 = cs_cl_permute (W, p, NULL, 1) ;           /* E = C + (P'W)*(P'W)' */
    WT = cs_cl_transpose (W2,1) ;
    WW = cs_cl_multiply (W2, WT) ;
    cs_cl_spfree (WT) ;
    cs_cl_spfree (W2) ;
    E = cs_cl_add (C, WW, 1, 1) ;
    cs_cl_spfree (WW) ;
    if (!E || !p) return (done3 (0, S, N, y, W, E, p)) ;
    printf ("update:   time: %8.2f (incl solve) ", t1+t) ;
    print_resid (1, E, x, b, resid) ;           /* print residual */
    cs_cl_nfree (N) ;                              /* clear N */
    t = tic () ;
    N = cs_cl_chol (E, S) ;                        /* numeric Cholesky */
    if (!N) return (done3 (0, S, N, y, W, E, p)) ;
    cs_cl_ipvec (S->pinv, b, y, n) ;               /* y = P*b */
    cs_cl_lsolve (N->L, y) ;                       /* y = L\y */
    cs_cl_ltsolve (N->L, y) ;                      /* y = L'\y */
    cs_cl_pvec (S->pinv, y, x, n) ;                /* x = P'*y */
    t = toc (t) ;
    printf ("rechol:   time: %8.2f (incl solve) ", t) ;
    print_resid (1, E, x, b, resid) ;           /* print residual */
    t = tic () ;
    ok = cs_cl_updown (N->L, -1, W, S->parent) ;   /* downdate: L*L'-W*W' */
    t1 = toc (t) ;
    if (!ok) return (done3 (0, S, N, y, W, E, p)) ;
    printf ("downdate: time: %8.2f\n", t1) ;
    t = tic () ;
    cs_cl_ipvec (S->pinv, b, y, n) ;               /* y = P*b */
    cs_cl_lsolve (N->L, y) ;                       /* y = L\y */
    cs_cl_ltsolve (N->L, y) ;                      /* y = L'\y */
    cs_cl_pvec (S->pinv, y, x, n) ;                /* x = P'*y */
    t = toc (t) ;
    printf ("downdate: time: %8.2f (incl solve) ", t1+t) ;
    print_resid (1, C, x, b, resid) ;           /* print residual */
    return (done3 (1, S, N, y, W, E, p)) ;
} 
TEST(uri_comparison_test, equality_test_capitalized_scheme_with_case_normalization) {
  network::uri lhs("http://www.example.com/");
  network::uri rhs("HTTP://www.example.com/");
  ASSERT_EQ(lhs.compare(rhs, network::uri_comparison_level::syntax_based), 0);
}
TEST(uri_comparison_test, equality_empty_lhs) {
  network::uri lhs;
  network::uri rhs("http://www.example.com/");
  ASSERT_NE(lhs, rhs);
}
TEST(uri_comparison_test, equality_test_capitalized_scheme) {
  network::uri lhs("http://www.example.com/");
  network::uri rhs("HTTP://www.example.com/");
  ASSERT_NE(lhs.compare(rhs, network::uri_comparison_level::string_comparison), 0);
}
TEST(uri_comparison_test, less_than_test) {
  // lhs is lexicographically less than rhs
  network::uri lhs("http://www.example.com/");
  network::uri rhs("http://www.example.org/");
  ASSERT_LT(lhs, rhs);
}
TEST(uri_comparison_test, inequality_test) {
  network::uri lhs("http://www.example.com/");
  network::uri rhs("http://www.example.com/");
  ASSERT_FALSE(lhs != rhs);
}
void DisparityProc::interpolate_natural_cubic_spline (
	// input
	std::vector<double> const & x,
	std::vector<double> const & y,
	int l_clamped,
	int r_clamped,
	std::vector<double> const & x_vis,
	// output
	std::vector<double> & y_vis )
{
  // Assemble the system; note that it is a tridiagonal one, so we store it as 3 vectors 

  int N = x.size();

  std::vector<double> lower(N-1, 0);
  std::vector<double> diag(N, 0);
  std::vector<double> upper(N-1, 0);
  std::vector<double> rhs(N);

  // Compute difference between data points
  std::vector<double> delta(N-1, 0);
  for (unsigned int i = 0; i < delta.size(); ++i)
    delta[i] = x[i+1] - x[i];

  // Initialize the system
  if (l_clamped) {
    upper[0] = (delta[0])/6;
    diag[0] = (delta[0])/3;
    rhs[0] = (y[1] - y[0])/(delta[0]);
  }
  else
    diag[0] = 1;

  for (int i = 1; i < N - 1; ++i) {
    upper[i] = (delta[i])/6;
    lower[i-1] = (delta[i-1])/6;
    diag[i] = (x[i+1] - x[i-1])/3;
    rhs[i] = ((y[i+1] - y[i])/delta[i]) - ((y[i] - y[i-1])/delta[i-1]);
  }

  if (r_clamped) {
    diag[N-1] = -(delta[N-2])/3;
    lower[N-2] = -(delta[N-2])/6;
    rhs[N-1] = (y[N-1] - y[N-2])/(delta[N-2]);
  }
  else
    diag[N-1] = 1;

  // Solve the system and store the result in d
  std::vector<double> d(N,0);
  solve_thomas(lower,diag,upper,rhs,d);

  // Evaluate the interploated curve at x_vis and store data in y_vis
  // We assume a sorted data list -> maybe implement later
  // The counter keeps track of which cubic polynomial the current x_vis values are
  int counter = 0;
  for (unsigned int i = 0; i < x_vis.size(); ++i) {
    while (x_vis[i] >= x[counter]) {
      counter++;
      if (counter == N) {
	counter--;
	break;
      }
    }
    y_vis[i] =	d[counter-1] * ((pow(x[counter] - x_vis[i], 3))/(6*delta[counter-1])) + 
		d[counter]*((pow(x_vis[i] - x[counter-1], 3))/(6*delta[counter-1])) + 
		((y[counter] - y[counter-1])/delta[counter-1] - (d[counter] -
		d[counter-1])*(delta[counter-1])/6)*(x_vis[i] - x[counter-1]) + (y[counter-1] - 
		d[counter-1]*(delta[counter-1]*delta[counter-1]/6));
  }
}
Beispiel #16
0
RegressionTreeNode* RegressionTree::buildTree(const RegressionData &trainingData,RegressionTreeNode *parent,Vector< UINT > features,UINT nodeID){
    
    const UINT M = trainingData.getNumSamples();
    const UINT N = trainingData.getNumInputDimensions();
    const UINT T = trainingData.getNumTargetDimensions();
    VectorFloat regressionData(T);
    
    //Update the nodeID
    
    //Get the depth
    UINT depth = 0;
    
    if( parent != NULL )
        depth = parent->getDepth() + 1;
    
    //If there are no training data then return NULL
    if( trainingData.getNumSamples() == 0 )
        return NULL;
    
    //Create the new node
    RegressionTreeNode *node = new RegressionTreeNode;
    
    if( node == NULL )
        return NULL;
    
    //Set the parent
    node->initNode( parent, depth, nodeID );
    
    //If there are no features left then create a leaf node and return
    if( features.size() == 0 || M < minNumSamplesPerNode || depth >= maxDepth ){
        
        //Flag that this is a leaf node
        node->setIsLeafNode( true );
        
        //Compute the regression data that will be stored at this node
        computeNodeRegressionData( trainingData, regressionData );
        
        //Set the node
        node->set( trainingData.getNumSamples(), 0, 0, regressionData );
        
        Regressifier::trainingLog << "Reached leaf node. Depth: " << depth << " NumSamples: " << trainingData.getNumSamples() << std::endl;
        
        return node;
    }
    
    //Compute the best spilt point
    UINT featureIndex = 0;
    Float threshold = 0;
    Float minError = 0;
    if( !computeBestSpilt( trainingData, features, featureIndex, threshold, minError ) ){
        delete node;
        return NULL;
    }
    
    Regressifier::trainingLog << "Depth: " << depth << " FeatureIndex: " << featureIndex << " Threshold: " << threshold << " MinError: " << minError << std::endl;
    
    //If the minError is below the minRMSError then create a leaf node and return
    if( minError <= minRMSErrorPerNode ){
        //Compute the regression data that will be stored at this node
        computeNodeRegressionData( trainingData, regressionData );
        
        //Set the node
        node->set( trainingData.getNumSamples(), featureIndex, threshold, regressionData );
        
        Regressifier::trainingLog << "Reached leaf node. Depth: " << depth << " NumSamples: " << M << std::endl;
        
        return node;
    }
    
    //Set the node
    node->set( trainingData.getNumSamples(), featureIndex, threshold, regressionData );
    
    //Remove the selected feature so we will not use it again
    if( removeFeaturesAtEachSpilt ){
        for(UINT i=0; i<features.getSize(); i++){
            if( features[i] == featureIndex ){
                features.erase( features.begin()+i );
                break;
            }
        }
    }
    
    //Split the data
    RegressionData lhs(N,T);
    RegressionData rhs(N,T);
    
    for(UINT i=0; i<M; i++){
        if( node->predict( trainingData[i].getInputVector() ) ){
            rhs.addSample(trainingData[i].getInputVector(), trainingData[i].getTargetVector());
        }else lhs.addSample(trainingData[i].getInputVector(), trainingData[i].getTargetVector());
    }
    
    //Run the recursive tree building on the children
    node->setLeftChild( buildTree( lhs, node, features, nodeID ) );
    node->setRightChild( buildTree( rhs, node, features, nodeID ) );
    
    return node;
}
Beispiel #17
0
double run(struct user_parameters* params)
{
    int matrix_size = params->matrix_size;
    if (matrix_size <= 0) {
        matrix_size = 512;
        params->matrix_size = matrix_size;
    }
    int block_size = params->blocksize;
    if (block_size <= 0) {
        block_size = 128;
        params->blocksize = block_size;
    }
    if ( (matrix_size % block_size) || (matrix_size % block_size) ) {
        params->succeed = 0;
        params->string2display = "*****ERROR: blocsize must divide NX and NY";
        return 0;
    }
    int niter = params->titer;
    if (niter <= 0) {
        niter = 4;
        params->titer = niter;
    }
    int ii,i,jj,j;
    double *f_ = (double*)malloc(matrix_size * matrix_size * sizeof(double));
    double (*f)[matrix_size][matrix_size] = (double (*)[matrix_size][matrix_size])f_;
    double *u_ = (double*)malloc(matrix_size * matrix_size * sizeof(double));
    double *unew_ = (double*)malloc(matrix_size * matrix_size * sizeof(double));
    double (*unew)[matrix_size][matrix_size] = (double (*)[matrix_size][matrix_size])unew_;
    double (*u)[matrix_size][matrix_size] = (double (*)[matrix_size][matrix_size])u_;

    double dx = 1.0 / (double) (matrix_size - 1);
    double dy = 1.0 / (double) (matrix_size - 1);

    rhs(matrix_size, matrix_size, f_, block_size);

    //Set the initial solution estimate UNEW.
    //We are "allowed" to pick up the boundary conditions exactly.
#pragma omp parallel
#pragma omp master
    for (j = 0; j < matrix_size; j+= block_size)
        for (i = 0; i < matrix_size; i+= block_size)
#pragma omp task firstprivate(i,j) private(ii,jj)
            for (jj=j; jj<j+block_size; ++jj)
                for (ii=i; ii<i+block_size; ++ii) {
                    if (ii == 0 || ii == matrix_size - 1 || jj == 0 || jj == matrix_size - 1) {
                        (*unew)[ii][jj] = (*f)[ii][jj];
                        (*u)[ii][jj] = (*f)[ii][jj];
                    } else {
                        (*unew)[ii][jj] = 0.0;
                        (*u)[ii][jj] = 0.0;
                    }
                }
    /// KERNEL INTENSIVE COMPUTATION
    START_TIMER;
#ifndef _OPENMP
    sweep_seq(matrix_size, matrix_size, dx, dy, f_, 0, niter, u_, unew_);
#else
    sweep(matrix_size, matrix_size, dx, dy, f_, 0, niter, u_, unew_, block_size);
#endif
    END_TIMER;

#ifdef _OPENMP
    if(params->check) {
        check_params(params, matrix_size, block_size, dx, dy, f_, niter, u_, unew_) ;
    }
#else
    params->succeed = 1;
#endif
    free(f_);
    free(u_);
    free(unew_);
    return TIMER;
}
void run_test_cases()
{
  typedef bitset_test< boost::dynamic_bitset<Block> > Tests;

  std::string long_string(101, '0');
  for (std::size_t i = 0; i < long_string.size(); ++i)
    long_string[i] = '0' + (i % 2);

  std::size_t ul_size = CHAR_BIT * sizeof(unsigned long);

  //=====================================================================
  // Test b.to_long()
  {
    boost::dynamic_bitset<Block> b;
    Tests::to_ulong(b);
  }
  {
    std::string ul_str(ul_size, '1');
    boost::dynamic_bitset<Block> b(ul_str);
    Tests::to_ulong(b);
  }
  { // case overflow
    boost::dynamic_bitset<Block> b(long_string);
    Tests::to_ulong(b);
  }
  //=====================================================================
  // Test to_string(b, str)
  {
    boost::dynamic_bitset<Block> b;
    Tests::to_string(b);
  }
  {
    boost::dynamic_bitset<Block> b(std::string("0"));
    Tests::to_string(b);
  }
  {
    boost::dynamic_bitset<Block> b(long_string);
    Tests::to_string(b);
  }
  //=====================================================================
  // Test b.count()
  {
    boost::dynamic_bitset<Block> b;
    Tests::count(b);
  }
  {
    boost::dynamic_bitset<Block> b(std::string("0"));
    Tests::count(b);
  }
  {
    boost::dynamic_bitset<Block> b(long_string);
    Tests::count(b);
  }
  //=====================================================================
  // Test b.size()
  {
    boost::dynamic_bitset<Block> b;
    Tests::size(b);
  }
  {
    boost::dynamic_bitset<Block> b(std::string("0"));
    Tests::size(b);
  }
  {
    boost::dynamic_bitset<Block> b(long_string);
    Tests::size(b);
  }
  //=====================================================================
  // Test b.any()
  {
    boost::dynamic_bitset<Block> b;
    Tests::any(b);
  }
  {
    boost::dynamic_bitset<Block> b(std::string("0"));
    Tests::any(b);
  }
  {
    boost::dynamic_bitset<Block> b(long_string);
    Tests::any(b);
  }
  //=====================================================================
  // Test b.none()
  {
    boost::dynamic_bitset<Block> b;
    Tests::none(b);
  }
  {
    boost::dynamic_bitset<Block> b(std::string("0"));
    Tests::none(b);
  }
  {
    boost::dynamic_bitset<Block> b(long_string);
    Tests::none(b);
  }
  //=====================================================================
  // Test a.is_subset_of(b)
  {
    boost::dynamic_bitset<Block> a, b;
    Tests::subset(a, b);
  }
  {
    boost::dynamic_bitset<Block> a(std::string("0")), b(std::string("0"));
    Tests::subset(a, b);
  }
  {
    boost::dynamic_bitset<Block> a(std::string("1")), b(std::string("1"));
    Tests::subset(a, b);
  }
  {
    boost::dynamic_bitset<Block> a(long_string), b(long_string);
    Tests::subset(a, b);
  }
  {
    boost::dynamic_bitset<Block> a(long_string), b(long_string);
    a[long_string.size()/2].flip();
    Tests::subset(a, b);
  }
  {
    boost::dynamic_bitset<Block> a(long_string), b(long_string);
    b[long_string.size()/2].flip();
    Tests::subset(a, b);
  }
  //=====================================================================
  // Test a.is_proper_subset_of(b)
  {
    boost::dynamic_bitset<Block> a, b;
    Tests::proper_subset(a, b);
  }
  {
    boost::dynamic_bitset<Block> a(std::string("0")), b(std::string("0"));
    Tests::proper_subset(a, b);
  }
  {
    boost::dynamic_bitset<Block> a(std::string("1")), b(std::string("1"));
    Tests::proper_subset(a, b);
  }
  {
    boost::dynamic_bitset<Block> a(long_string), b(long_string);
    Tests::proper_subset(a, b);
  }
  {
    boost::dynamic_bitset<Block> a(long_string), b(long_string);
    a[long_string.size()/2].flip();
    Tests::proper_subset(a, b);
  }
  {
    boost::dynamic_bitset<Block> a(long_string), b(long_string);
    b[long_string.size()/2].flip();
    Tests::proper_subset(a, b);
  }
  //=====================================================================
  // Test operator==
  {
    boost::dynamic_bitset<Block> a, b;
    Tests::operator_equal(a, b);
  }
  {
    boost::dynamic_bitset<Block> a(std::string("0")), b(std::string("0"));
    Tests::operator_equal(a, b);
  }
  {
    boost::dynamic_bitset<Block> a(std::string("1")), b(std::string("1"));
    Tests::operator_equal(a, b);
  }
  {
    boost::dynamic_bitset<Block> a(long_string), b(long_string);
    Tests::operator_equal(a, b);
  }
  {
    boost::dynamic_bitset<Block> a(long_string), b(long_string);
    a[long_string.size()/2].flip();
    Tests::operator_equal(a, b);
  }
  {
    boost::dynamic_bitset<Block> a(long_string), b(long_string);
    b[long_string.size()/2].flip();
    Tests::operator_equal(a, b);
  }
  //=====================================================================
  // Test operator!=
  {
    boost::dynamic_bitset<Block> a, b;
    Tests::operator_not_equal(a, b);
  }
  {
    boost::dynamic_bitset<Block> a(std::string("0")), b(std::string("0"));
    Tests::operator_not_equal(a, b);
  }
  {
    boost::dynamic_bitset<Block> a(std::string("1")), b(std::string("1"));
    Tests::operator_not_equal(a, b);
  }
  {
    boost::dynamic_bitset<Block> a(long_string), b(long_string);
    Tests::operator_not_equal(a, b);
  }
  {
    boost::dynamic_bitset<Block> a(long_string), b(long_string);
    a[long_string.size()/2].flip();
    Tests::operator_not_equal(a, b);
  }
  {
    boost::dynamic_bitset<Block> a(long_string), b(long_string);
    b[long_string.size()/2].flip();
    Tests::operator_not_equal(a, b);
  }
  //=====================================================================
  // Test operator<
  {
    boost::dynamic_bitset<Block> a, b;
    Tests::operator_less_than(a, b);
  }
  {
    boost::dynamic_bitset<Block> a(std::string("0")), b(std::string("0"));
    Tests::operator_less_than(a, b);
  }
  {
    boost::dynamic_bitset<Block> a(std::string("1")), b(std::string("1"));
    Tests::operator_less_than(a, b);
  }
  {
    boost::dynamic_bitset<Block> a(long_string), b(long_string);
    Tests::operator_less_than(a, b);
  }
  {
    boost::dynamic_bitset<Block> a(long_string), b(long_string);
    a[long_string.size()/2].flip();
    Tests::operator_less_than(a, b);
  }
  {
    boost::dynamic_bitset<Block> a(long_string), b(long_string);
    b[long_string.size()/2].flip();
    Tests::operator_less_than(a, b);
  }
  // check for consistency with ulong behaviour
  {
    boost::dynamic_bitset<Block> a(3, 4ul), b(3, 5ul);
    assert(a < b);
  }
  {
    boost::dynamic_bitset<Block> a(3, 4ul), b(3, 4ul);
    assert(!(a < b));
  }
  {
    boost::dynamic_bitset<Block> a(3, 5ul), b(3, 4ul);
    assert(!(a < b));
  }
  //=====================================================================
  // Test operator<=
  {
    boost::dynamic_bitset<Block> a, b;
    Tests::operator_less_than_eq(a, b);
  }
  {
    boost::dynamic_bitset<Block> a(std::string("0")), b(std::string("0"));
    Tests::operator_less_than_eq(a, b);
  }
  {
    boost::dynamic_bitset<Block> a(std::string("1")), b(std::string("1"));
    Tests::operator_less_than_eq(a, b);
  }
  {
    boost::dynamic_bitset<Block> a(long_string), b(long_string);
    Tests::operator_less_than_eq(a, b);
  }
  {
    boost::dynamic_bitset<Block> a(long_string), b(long_string);
    a[long_string.size()/2].flip();
    Tests::operator_less_than_eq(a, b);
  }
  {
    boost::dynamic_bitset<Block> a(long_string), b(long_string);
    b[long_string.size()/2].flip();
    Tests::operator_less_than_eq(a, b);
  }
  // check for consistency with ulong behaviour
  {
    boost::dynamic_bitset<Block> a(3, 4ul), b(3, 5ul);
    assert(a <= b);
  }
  {
    boost::dynamic_bitset<Block> a(3, 4ul), b(3, 4ul);
    assert(a <= b);
  }
  {
    boost::dynamic_bitset<Block> a(3, 5ul), b(3, 4ul);
    assert(!(a <= b));
  }
  //=====================================================================
  // Test operator>
  {
    boost::dynamic_bitset<Block> a, b;
    Tests::operator_greater_than(a, b);
  }
  {
    boost::dynamic_bitset<Block> a(std::string("0")), b(std::string("0"));
    Tests::operator_greater_than(a, b);
  }
  {
    boost::dynamic_bitset<Block> a(std::string("1")), b(std::string("1"));
    Tests::operator_greater_than(a, b);
  }
  {
    boost::dynamic_bitset<Block> a(long_string), b(long_string);
    Tests::operator_greater_than(a, b);
  }
  {
    boost::dynamic_bitset<Block> a(long_string), b(long_string);
    a[long_string.size()/2].flip();
    Tests::operator_greater_than(a, b);
  }
  {
    boost::dynamic_bitset<Block> a(long_string), b(long_string);
    b[long_string.size()/2].flip();
    Tests::operator_greater_than(a, b);
  }
  // check for consistency with ulong behaviour
  {
    boost::dynamic_bitset<Block> a(3, 4ul), b(3, 5ul);
    assert(!(a > b));
  }
  {
    boost::dynamic_bitset<Block> a(3, 4ul), b(3, 4ul);
    assert(!(a > b));
  }
  {
    boost::dynamic_bitset<Block> a(3, 5ul), b(3, 4ul);
    assert(a > b);
  }
  //=====================================================================
  // Test operator<=
  {
    boost::dynamic_bitset<Block> a, b;
    Tests::operator_greater_than_eq(a, b);
  }
  {
    boost::dynamic_bitset<Block> a(std::string("0")), b(std::string("0"));
    Tests::operator_greater_than_eq(a, b);
  }
  {
    boost::dynamic_bitset<Block> a(std::string("1")), b(std::string("1"));
    Tests::operator_greater_than_eq(a, b);
  }
  {
    boost::dynamic_bitset<Block> a(long_string), b(long_string);
    Tests::operator_greater_than_eq(a, b);
  }
  {
    boost::dynamic_bitset<Block> a(long_string), b(long_string);
    a[long_string.size()/2].flip();
    Tests::operator_greater_than_eq(a, b);
  }
  {
    boost::dynamic_bitset<Block> a(long_string), b(long_string);
    b[long_string.size()/2].flip();
    Tests::operator_greater_than_eq(a, b);
  }
  // check for consistency with ulong behaviour
  {
    boost::dynamic_bitset<Block> a(3, 4ul), b(3, 5ul);
    assert(!(a >= b));
  }
  {
    boost::dynamic_bitset<Block> a(3, 4ul), b(3, 4ul);
    assert(a >= b);
  }
  {
    boost::dynamic_bitset<Block> a(3, 5ul), b(3, 4ul);
    assert(a >= b);
  }
  //=====================================================================
  // Test b.test(pos)  
  { // case pos >= b.size()
    boost::dynamic_bitset<Block> b;
    Tests::test_bit(b, 0);
  }
  { // case pos < b.size()
    boost::dynamic_bitset<Block> b(std::string("0"));
    Tests::test_bit(b, 0);
  }
  { // case pos == b.size() / 2
    boost::dynamic_bitset<Block> b(long_string);
    Tests::test_bit(b, long_string.size()/2);
  }
  //=====================================================================
  // Test b << pos  
  { // case pos == 0
    std::size_t pos = 0;
    boost::dynamic_bitset<Block> b(std::string("1010"));
    Tests::operator_shift_left(b, pos);
  }
  { // case pos == size()/2
    std::size_t pos = long_string.size() / 2;
    boost::dynamic_bitset<Block> b(long_string);
    Tests::operator_shift_left(b, pos);
  }
  { // case pos >= n
    std::size_t pos = long_string.size();
    boost::dynamic_bitset<Block> b(long_string);
    Tests::operator_shift_left(b, pos);
  }
  //=====================================================================
  // Test b >> pos  
  { // case pos == 0
    std::size_t pos = 0;
    boost::dynamic_bitset<Block> b(std::string("1010"));
    Tests::operator_shift_right(b, pos);
  }
  { // case pos == size()/2
    std::size_t pos = long_string.size() / 2;
    boost::dynamic_bitset<Block> b(long_string);
    Tests::operator_shift_right(b, pos);
  }
  { // case pos >= n
    std::size_t pos = long_string.size();
    boost::dynamic_bitset<Block> b(long_string);
    Tests::operator_shift_right(b, pos);
  }
  //=====================================================================
  // Test a & b
  {
    boost::dynamic_bitset<Block> lhs, rhs;
    Tests::operator_and(lhs, rhs);
  }
  {
    boost::dynamic_bitset<Block> lhs(std::string("1")), rhs(std::string("0"));
    Tests::operator_and(lhs, rhs);
  }
  {
    boost::dynamic_bitset<Block> lhs(long_string.size(), 0), rhs(long_string);
    Tests::operator_and(lhs, rhs);
  }
  {
    boost::dynamic_bitset<Block> lhs(long_string.size(), 1), rhs(long_string);
    Tests::operator_and(lhs, rhs);
  }
  //=====================================================================
  // Test a | b
  {
    boost::dynamic_bitset<Block> lhs, rhs;
    Tests::operator_or(lhs, rhs);
  }
  {
    boost::dynamic_bitset<Block> lhs(std::string("1")), rhs(std::string("0"));
    Tests::operator_or(lhs, rhs);
  }
  {
    boost::dynamic_bitset<Block> lhs(long_string.size(), 0), rhs(long_string);
    Tests::operator_or(lhs, rhs);
  }
  {
    boost::dynamic_bitset<Block> lhs(long_string.size(), 1), rhs(long_string);
    Tests::operator_or(lhs, rhs);
  }
  //=====================================================================
  // Test a^b
  {
    boost::dynamic_bitset<Block> lhs, rhs;
    Tests::operator_xor(lhs, rhs);
  }
  {
    boost::dynamic_bitset<Block> lhs(std::string("1")), rhs(std::string("0"));
    Tests::operator_xor(lhs, rhs);
  }
  {
    boost::dynamic_bitset<Block> lhs(long_string.size(), 0), rhs(long_string);
    Tests::operator_xor(lhs, rhs);
  }
  {
    boost::dynamic_bitset<Block> lhs(long_string.size(), 1), rhs(long_string);
    Tests::operator_xor(lhs, rhs);
  }
  //=====================================================================
  // Test a-b
  {
    boost::dynamic_bitset<Block> lhs, rhs;
    Tests::operator_sub(lhs, rhs);
  }
  {
    boost::dynamic_bitset<Block> lhs(std::string("1")), rhs(std::string("0"));
    Tests::operator_sub(lhs, rhs);
  }
  {
    boost::dynamic_bitset<Block> lhs(long_string.size(), 0), rhs(long_string);
    Tests::operator_sub(lhs, rhs);
  }
  {
    boost::dynamic_bitset<Block> lhs(long_string.size(), 1), rhs(long_string);
    Tests::operator_sub(lhs, rhs);
  }
  //=====================================================================
  // Test stream operator<< and operator>>
  {
    boost::dynamic_bitset<Block> b;
    boost::dynamic_bitset<Block> x(b.size());
    Tests::stream_read_write(b, x);
  }
  {
    boost::dynamic_bitset<Block> b(std::string("0"));
    boost::dynamic_bitset<Block> x(b.size());
    Tests::stream_read_write(b, x);
  }
  {
    boost::dynamic_bitset<Block> b(long_string);
    boost::dynamic_bitset<Block> x(b.size());
    Tests::stream_read_write(b, x);
  }
}
    void ContinuousArithmeticAsianVecerEngine::calculate() const {
        Real expectedAverage;

        QL_REQUIRE(arguments_.averageType == Average::Arithmetic,
                   "not an Arithmetic average option");
        QL_REQUIRE(arguments_.exercise->type() == Exercise::European,
                   "not an European Option");

        DayCounter rfdc  = process_->riskFreeRate()->dayCounter();
        DayCounter divdc = process_->dividendYield()->dayCounter();
        DayCounter voldc = process_->blackVolatility()->dayCounter();
        Real S_0 = process_->stateVariable()->value();

        // payoff
        ext::shared_ptr<StrikedTypePayoff> payoff =
            ext::dynamic_pointer_cast<StrikedTypePayoff>(arguments_.payoff);
        QL_REQUIRE(payoff, "non-plain payoff given");

        // original time to maturity
        Date maturity = arguments_.exercise->lastDate();

        Real X = payoff->strike();
        QL_REQUIRE(z_min_<=0 && z_max_>=0,
                   "strike (0 for vecer fixed strike asian)  not on Grid");

        Volatility sigma =
            process_->blackVolatility()->blackVol(maturity, X);

        Rate r = process_->riskFreeRate()->
            zeroRate(maturity, rfdc, Continuous, NoFrequency);
        Rate q = process_->dividendYield()->
            zeroRate(maturity, divdc, Continuous, NoFrequency);

        Date today(Settings::instance().evaluationDate());

        QL_REQUIRE(startDate_>=today,
                   "Seasoned Asian not yet implemented");

        // Expiry in Years
        Time T = rfdc.yearFraction(today,
                                   arguments_.exercise->lastDate());
        Time T1 = rfdc.yearFraction(today,
                                    startDate_ );            // Average Begin
        Time T2 = T;            // Average End (In this version only Maturity...)

        if ((T2 - T1) < 0.001) {
            // its a vanilla option. Use vanilla engine
            VanillaOption europeanOption(payoff, arguments_.exercise);
            europeanOption.setPricingEngine(
                        ext::make_shared<AnalyticEuropeanEngine>(process_));
            results_.value = europeanOption.NPV();

        } else {
            Real Theta = 0.5;        // Mixed Scheme: 0.5 = Crank Nicolson
            Real Z_0 = cont_strategy(0,T1,T2,q,r) - std::exp(-r*T) * X /S_0;

            QL_REQUIRE(Z_0>=z_min_ && Z_0<=z_max_,
                       "spot not on grid");

            Real h = (z_max_ - z_min_) / assetSteps_; // Space step size
            Real k = T / timeSteps_;         // Time Step size

            Real sigma2 = sigma * sigma, vecerTerm;

            Array SVec(assetSteps_+1),u_initial(assetSteps_+1),
                  u(assetSteps_+1),rhs(assetSteps_+1);

            for (Natural i= 0; i<= SVec.size()-1;i++) {
                SVec[i] = z_min_ + i * h;     // Value of Underlying on the grid
            }

            // Begin gamma construction
            TridiagonalOperator gammaOp = DPlusDMinus(assetSteps_+1,h);

            Array upperD = gammaOp.upperDiagonal();
            Array lowerD = gammaOp.lowerDiagonal();
            Array Dia    = gammaOp.diagonal();

            // Construct Vecer operator
            TridiagonalOperator explicit_part(gammaOp.size());
            TridiagonalOperator implicit_part(gammaOp.size());

            for (Natural i= 0; i<= SVec.size()-1;i++) {
                u_initial[i] = std::max<Real>(SVec[i] , 0.0); // Call Payoff
            }

            u = u_initial;

            // Start Time Loop

            for (Natural j = 1; j<=timeSteps_;j++) {
                if (Theta != 1.0) { // Explicit Part
                    for (Natural i = 1; i<= SVec.size()-2;i++) {
                        vecerTerm = SVec[i] - std::exp(-q * (T-(j-1)*k))
                                  * cont_strategy(T-(j-1)*k,T1,T2,q,r);
                        gammaOp.setMidRow(i,
                            0.5 * sigma2 * vecerTerm * vecerTerm  * lowerD[i-1],
                            0.5 * sigma2 * vecerTerm * vecerTerm  * Dia[i],
                            0.5 * sigma2 *  vecerTerm * vecerTerm * upperD[i]);
                    }
                    explicit_part = gammaOp.identity(gammaOp.size()) +
                                    (1 - Theta) * k * gammaOp;
                    explicit_part.setFirstRow(1.0,0.0); // Apply before applying
                    explicit_part.setLastRow(-1.0,1.0); // Neumann BC

                    u = explicit_part.applyTo(u);

                    // Apply after applying (Neumann BC)
                    u[assetSteps_] = u[assetSteps_-1] + h;
                    u[0] = 0;
                } // End Explicit Part

                if (Theta != 0.0) {  // Implicit Part
                    for (Natural i = 1; i<= SVec.size()-2;i++) {
                        vecerTerm = SVec[i] - std::exp(-q * (T-j*k)) *
                                    cont_strategy(T-j*k,T1,T2,q,r);
                        gammaOp.setMidRow(i,
                            0.5 * sigma2 * vecerTerm * vecerTerm * lowerD[i-1],
                            0.5 * sigma2 * vecerTerm * vecerTerm  * Dia[i],
                            0.5 * sigma2 * vecerTerm * vecerTerm * upperD[i]);
                    }

                    implicit_part = gammaOp.identity(gammaOp.size()) -
                                    Theta * k * gammaOp;

                    // Apply before solving
                    implicit_part.setFirstRow(1.0,0.0);
                    implicit_part.setLastRow(-1.0,1.0);
                    rhs = u;
                    rhs[0] = 0; // Lower BC
                    rhs[assetSteps_] = h; // Upper BC (Neumann) Delta=1
                    u = implicit_part.solveFor(rhs);
                } // End implicit Part
            } // End Time Loop

            DownRounding Rounding(0);
            Integer lowerI = Integer(Rounding( (Z_0-z_min_)/h));
            // Interpolate solution
            Real pv;

            pv = u[lowerI] + (u[lowerI+1] - u[lowerI]) * (Z_0 - SVec[lowerI])/h;
            results_.value = S_0 * pv;

            if (payoff->optionType()==Option::Put) {
                // Apply Call Put Parity for Asians
                if (r == q) {
                    expectedAverage = S_0;
                } else {
                    expectedAverage =
                        S_0 * (std::exp( (r-q) * T2) -
                               std::exp( (r-q) * T1)) / ((r-q) * (T2-T1));
                }

                Real asianForward = std::exp(-r * T2) * (expectedAverage -  X);
                results_.value = results_.value - asianForward;
            }
        }
    }
//--------------------------------------------------------------------------
//-------- execute ---------------------------------------------------------
//--------------------------------------------------------------------------
void
AssembleScalarEdgeSolverAlgorithm::execute()
{

  stk::mesh::BulkData & bulk_data = realm_.bulk_data();
  stk::mesh::MetaData & meta_data = realm_.meta_data();

  const int nDim = meta_data.spatial_dimension();

  const double small = 1.0e-16;

  // extract user advection options (allow to potentially change over time)
  const std::string dofName = scalarQ_->name();
  const double hybridFactor = realm_.get_hybrid_factor(dofName);
  const double alpha = realm_.get_alpha_factor(dofName);
  const double alphaUpw = realm_.get_alpha_upw_factor(dofName);
  const double hoUpwind = realm_.get_upw_factor(dofName);
  const bool useLimiter = realm_.primitive_uses_limiter(dofName);

  // one minus flavor
  const double om_alpha = 1.0-alpha;
  const double om_alphaUpw = 1.0-alphaUpw;

  // space for LHS/RHS; always edge connectivity
  const int nodesPerEdge = 2;
  const int lhsSize = nodesPerEdge*nodesPerEdge;
  const int rhsSize = nodesPerEdge;
  std::vector<double> lhs(lhsSize);
  std::vector<double> rhs(rhsSize);
  std::vector<stk::mesh::Entity> connected_nodes(2);

  // area vector; gather into
  std::vector<double> areaVec(nDim);

  // pointer for fast access
  double *p_lhs = &lhs[0];
  double *p_rhs = &rhs[0];
  double *p_areaVec = &areaVec[0];

  // deal with state
  ScalarFieldType &scalarQNp1  = scalarQ_->field_of_state(stk::mesh::StateNP1);
  ScalarFieldType &densityNp1 = density_->field_of_state(stk::mesh::StateNP1);

  // define some common selectors
  stk::mesh::Selector s_locally_owned_union = meta_data.locally_owned_part()
    & stk::mesh::selectUnion(partVec_) 
    & !(realm_.get_inactive_selector());

  stk::mesh::BucketVector const& edge_buckets =
    realm_.get_buckets( stk::topology::EDGE_RANK, s_locally_owned_union );
  for ( stk::mesh::BucketVector::const_iterator ib = edge_buckets.begin();
        ib != edge_buckets.end() ; ++ib ) {
    stk::mesh::Bucket & b = **ib ;
    const stk::mesh::Bucket::size_type length   = b.size();

    // pointer to edge area vector and mdot
    const double * av = stk::mesh::field_data(*edgeAreaVec_, b);
    const double * mdot = stk::mesh::field_data(*massFlowRate_, b);

    for ( stk::mesh::Bucket::size_type k = 0 ; k < length ; ++k ) {

      // zeroing of lhs/rhs
      for ( int i = 0; i < lhsSize; ++i ) {
        p_lhs[i] = 0.0;
      }
      for ( int i = 0; i < rhsSize; ++i ) {
        p_rhs[i] = 0.0;
      }

      // get edge
      stk::mesh::Entity edge = b[k];

      stk::mesh::Entity const * edge_node_rels = bulk_data.begin_nodes(edge);

      // sanity check on number or nodes
      ThrowAssert( bulk_data.num_nodes(edge) == 2 );

      // pointer to edge area vector
      for ( int j = 0; j < nDim; ++j )
        p_areaVec[j] = av[k*nDim+j];
      const double tmdot = mdot[k];

      // left and right nodes
      stk::mesh::Entity nodeL = edge_node_rels[0];
      stk::mesh::Entity nodeR = edge_node_rels[1];

      connected_nodes[0] = nodeL;
      connected_nodes[1] = nodeR;

      // extract nodal fields
      const double * coordL = stk::mesh::field_data(*coordinates_, nodeL);
      const double * coordR = stk::mesh::field_data(*coordinates_, nodeR);

      const double * dqdxL = stk::mesh::field_data(*dqdx_, nodeL);
      const double * dqdxR = stk::mesh::field_data(*dqdx_, nodeR);

      const double * vrtmL = stk::mesh::field_data(*velocityRTM_, nodeL);
      const double * vrtmR = stk::mesh::field_data(*velocityRTM_, nodeR);

      const double qNp1L = *stk::mesh::field_data(scalarQNp1, nodeL);
      const double qNp1R = *stk::mesh::field_data(scalarQNp1, nodeR);

      const double densityL = *stk::mesh::field_data(densityNp1, nodeL);
      const double densityR = *stk::mesh::field_data(densityNp1, nodeR);

      const double diffFluxCoeffL = *stk::mesh::field_data(*diffFluxCoeff_, nodeL);
      const double diffFluxCoeffR = *stk::mesh::field_data(*diffFluxCoeff_, nodeR);

      // compute geometry
      double axdx = 0.0;
      double asq = 0.0;
      double udotx = 0.0;
      for ( int j = 0; j < nDim; ++j ) {
        const double axj = p_areaVec[j];
        const double dxj = coordR[j] - coordL[j];
        asq += axj*axj;
        axdx += axj*dxj;
        udotx += 0.5*dxj*(vrtmL[j] + vrtmR[j]);
      }

      const double inv_axdx = 1.0/axdx;

      // ip props
      const double viscIp = 0.5*(diffFluxCoeffL + diffFluxCoeffR);
      const double diffIp = 0.5*(diffFluxCoeffL/densityL + diffFluxCoeffR/densityR);

      // Peclet factor
      double pecfac = hybridFactor*udotx/(diffIp+small);
      pecfac = pecfac*pecfac/(5.0 + pecfac*pecfac);
      const double om_pecfac = 1.0-pecfac;

      // left and right extrapolation; add in diffusion calc
      double dqL = 0.0;
      double dqR = 0.0;
      double nonOrth = 0.0;
      for ( int j = 0; j < nDim; ++j ) {
        const double dxj = coordR[j] - coordL[j];
        dqL += 0.5*dxj*dqdxL[j];
        dqR += 0.5*dxj*dqdxR[j];
        // now non-orth (over-relaxed procedure of Jasek)
        const double axj = p_areaVec[j];
        const double kxj = axj - asq*inv_axdx*dxj;
        const double GjIp = 0.5*(dqdxL[j] + dqdxR[j]);
        nonOrth += -viscIp*kxj*GjIp;
      }

      // add limiter if appropriate
      double limitL = 1.0;
      double limitR = 1.0;
      const double dq = qNp1R - qNp1L;
      if ( useLimiter ) {
        const double dqMl = 2.0*2.0*dqL - dq;
        const double dqMr = 2.0*2.0*dqR - dq;
        limitL = van_leer(dqMl, dq, small);
        limitR = van_leer(dqMr, dq, small);
      }
      
      // extrapolated; for now limit
      const double qIpL = qNp1L + dqL*hoUpwind*limitL;
      const double qIpR = qNp1R - dqR*hoUpwind*limitR;

      //====================================
      // diffusive flux
      //====================================
      double lhsfac = -viscIp*asq*inv_axdx;
      double diffFlux = lhsfac*(qNp1R - qNp1L) + nonOrth;

      // first left
      p_lhs[0] = -lhsfac;
      p_lhs[1] = +lhsfac;
      p_rhs[0] = -diffFlux;

      // now right
      p_lhs[2] = +lhsfac;
      p_lhs[3] = -lhsfac;
      p_rhs[1] = diffFlux;

      //====================================
      // advective flux
      //====================================

      // 2nd order central
      const double qIp = 0.5*( qNp1L + qNp1R );

      // upwind
      const double qUpwind = (tmdot > 0) ? alphaUpw*qIpL + om_alphaUpw*qIp
          : alphaUpw*qIpR + om_alphaUpw*qIp;

      // generalized central (2nd and 4th order)
      const double qHatL = alpha*qIpL + om_alpha*qIp;
      const double qHatR = alpha*qIpR + om_alpha*qIp;
      const double qCds = 0.5*(qHatL + qHatR);

      // total advection
      const double aflux = tmdot*(pecfac*qUpwind + om_pecfac*qCds);

      // upwind advection (includes 4th); left node
      double alhsfac = 0.5*(tmdot+std::abs(tmdot))*pecfac*alphaUpw
        + 0.5*alpha*om_pecfac*tmdot;
      p_lhs[0] += alhsfac;
      p_lhs[2] -= alhsfac;

      // upwind advection; right node
      alhsfac = 0.5*(tmdot-std::abs(tmdot))*pecfac*alphaUpw
        + 0.5*alpha*om_pecfac*tmdot;
      p_lhs[3] -= alhsfac;
      p_lhs[1] += alhsfac;

      // central; left; collect terms on alpha and alphaUpw
      alhsfac = 0.5*tmdot*(pecfac*om_alphaUpw + om_pecfac*om_alpha);
      p_lhs[0] += alhsfac;
      p_lhs[1] += alhsfac;
      // central; right; collect terms on alpha and alphaUpw
      p_lhs[2] -= alhsfac;
      p_lhs[3] -= alhsfac;

      // total flux left
      p_rhs[0] -= aflux;
      // total flux right
      p_rhs[1] += aflux;

      apply_coeff(connected_nodes, rhs, lhs, __FILE__);

    }
  }
}
void ISOP2P1::boundaryValueStokes(Vector<double> &x)
{
    /// 各空间自由度.
    unsigned int n_dof_v = fem_space_v.n_dof();
    unsigned int n_dof_p = fem_space_p.n_dof();
    unsigned int n_total_dof_v = 2 * n_dof_v;
    const std::size_t * rowstart = sp_stokes.get_rowstart_indices();
    const unsigned int * colnum = sp_stokes.get_column_numbers();
    std::cout << "n_dof_v: " << n_dof_v << ", n_dof_p: " << n_dof_p << std::endl;
    std::cout << "n_A: " << sp_stokes.n_rows() << ", m_A: " << sp_stokes.n_cols() << std::endl;

    /// 遍历全部维度的速度节点. 
    for (unsigned int i = 0; i < n_total_dof_v; ++i)
    {
	/// 边界标志.
	int bm = -1;
	/// 判断一下是 x 方向还是 y 方向. 分别读取标志.
	if (i < n_dof_v)
	    bm = fem_space_v.dofInfo(i).boundary_mark;
	else
	    bm = fem_space_v.dofInfo(i - n_dof_v).boundary_mark;

	if (bm == 0)
	    continue;
	/// 对 Dirichelet 边界根据边界分别赋值. 注意同时还要区别 x 和
	/// y 方向.
	// /// 障碍流边界条件.
	// if (bm < 8 && bm > 0 && bm != 6)
	//     x(i) = 0.0;
	// else if (bm == 8 || bm == 9)
	//     if (i < n_dof_v)
	//     {
	// 	PoiseuilleVx poiseuille_vx(0.0, 2.0); 
	// 	x(i) = poiseuille_vx.value(fem_space_v.dofInfo(i).interp_point);
	//     }
	//     else
	//     {
	// 	PoiseuilleVy poiseuille_vy; 
	// 	x(i) = poiseuille_vy.value(fem_space_v.dofInfo(i - n_dof_v).interp_point);
	//     }

	/// 方腔流边界条件.
	if (bm == 1 || bm == 2 || bm == 3 || bm == 4)
	{ 
	    if (i < n_dof_v)
	    {
	 	DiVx real_vx(viscosity, t + dt); 
	 	x(i) = real_vx.value(fem_space_v.dofInfo(i).interp_point);
	    }
	    else
	    {
	 	DiVy real_vy(viscosity, t + dt); 
	 	x(i) = real_vy.value(fem_space_v.dofInfo(i - n_dof_v).interp_point);
	    }
	}
	// else if (bm  == 2 || bm == 3)
	//     if (i < n_dof_v)
	//     {
	// 	PoiseuilleVx poiseuille_vx(0.0, 1.0); 
	// 	x(i) = poiseuille_vx.value(fem_space_v.dofInfo(i).interp_point);
	//     }
	//     else
	//     {
	// 	PoiseuilleVy poiseuille_vy; 
	// 	x(i) = poiseuille_vy.value(fem_space_v.dofInfo(i - n_dof_v).interp_point);
	//     }
	// else if (bm == 11)
	//     if (i < n_dof_v)
	//     {
	// 	RealVx real_vx; 
	// 	x(i) = real_vx.value(fem_space_v.dofInfo(i).interp_point);
	//     }
	//     else
	//     {
	// 	RealVy real_vy; 
	// 	x(i) = real_vy.value(fem_space_v.dofInfo(i - n_dof_v).interp_point);
	//     }

	
	/// 右端项这样改, 如果该行和列其余元素均为零, 则在迭代中确
	/// 保该数值解和边界一致.
	/// 方腔流边界条件.
        if (bm == 1 || bm == 2 || bm == 3 || bm == 4 || bm == 5 || bm == 11)
	// /// 障碍流边界条件.
	// if (bm < 10 && bm > 0 && bm != 6)
	{
	    rhs(i) = matrix.diag_element(i) * x(i); 
	    /// 遍历 i 行.
	    for (unsigned int j = rowstart[i] + 1; 
		 j < rowstart[i + 1]; ++j) 
	    { 
		/// 第 j 个元素消成零(不是第 j 列!). 注意避开了对角元.
		matrix.global_entry(j) -= matrix.global_entry(j);
		/// 第 j 个元素是第 k 列.
		unsigned int k = colnum[j];
		/// 看看第 k 行的 i 列是否为零元.
		const unsigned int *p = std::find(&colnum[rowstart[k] + 1], 
						  &colnum[rowstart[k + 1]], 
						  i);
		/// 如果是非零元. 则需要将这一项移动到右端项. 因为第 i 个未知量已知.
		if (p != &colnum[rowstart[k + 1]]) 
		{
		    /// 计算 k 行 i 列的存储位置.
		    unsigned int l = p - &colnum[rowstart[0]];
		    /// 移动到右端项. 等价于 r(k) = r(k) - x(i) * A(k, i). 
		    rhs(k) -= matrix.global_entry(l) 
			* x(i); 
		    /// 移完此项自然是零.
		    matrix.global_entry(l) -= matrix.global_entry(l);
		}
	    }
	}
    }
    std::cout << "boundary values for Stokes OK!" << std::endl;
};
Beispiel #22
0
void FGAccelerations::ResolveFrictionForces(double dt)
{
  const double invMass = 1.0 / in.Mass;
  const FGMatrix33& Jinv = in.Jinv;
  FGColumnVector3 vdot, wdot;
  vector<LagrangeMultiplier*>& multipliers = *in.MultipliersList;
  size_t n = multipliers.size();

  vFrictionForces.InitMatrix();
  vFrictionMoments.InitMatrix();

  // If no gears are in contact with the ground then return
  if (!n) return;

  vector<double> a(n*n); // Will contain Jac*M^-1*Jac^T
  vector<double> rhs(n);

  // Assemble the linear system of equations
  for (unsigned int i=0; i < n; i++) {
    FGColumnVector3 v1 = invMass * multipliers[i]->ForceJacobian;
    FGColumnVector3 v2 = Jinv * multipliers[i]->MomentJacobian; // Should be J^-T but J is symmetric and so is J^-1

    for (unsigned int j=0; j < i; j++)
      a[i*n+j] = a[j*n+i]; // Takes advantage of the symmetry of Jac^T*M^-1*Jac
    for (unsigned int j=i; j < n; j++)
      a[i*n+j] = DotProduct(v1, multipliers[j]->ForceJacobian)
               + DotProduct(v2, multipliers[j]->MomentJacobian);
  }

  // Assemble the RHS member

  // Translation
  vdot = vUVWdot;
  if (dt > 0.) // Zeroes out the relative movement between the aircraft and the ground
    vdot += (in.vUVW - in.Tec2b * in.TerrainVelocity) / dt;

  // Rotation
  wdot = vPQRdot;
  if (dt > 0.) // Zeroes out the relative movement between the aircraft and the ground
    wdot += (in.vPQR - in.Tec2b * in.TerrainAngularVel) / dt;

  // Prepare the linear system for the Gauss-Seidel algorithm :
  // 1. Compute the right hand side member 'rhs'
  // 2. Divide every line of 'a' and 'rhs' by a[i,i]. This is in order to save
  //    a division computation at each iteration of Gauss-Seidel.
  for (unsigned int i=0; i < n; i++) {
    double d = 1.0 / a[i*n+i];

    rhs[i] = -(DotProduct(multipliers[i]->ForceJacobian, vdot)
              +DotProduct(multipliers[i]->MomentJacobian, wdot))*d;
    for (unsigned int j=0; j < n; j++)
      a[i*n+j] *= d;
  }

  // Resolve the Lagrange multipliers with the projected Gauss-Seidel method
  for (int iter=0; iter < 50; iter++) {
    double norm = 0.;

    for (unsigned int i=0; i < n; i++) {
      double lambda0 = multipliers[i]->value;
      double dlambda = rhs[i];

      for (unsigned int j=0; j < n; j++)
        dlambda -= a[i*n+j]*multipliers[j]->value;

      multipliers[i]->value = Constrain(multipliers[i]->Min, lambda0+dlambda, multipliers[i]->Max);
      dlambda = multipliers[i]->value - lambda0;

      norm += fabs(dlambda);
    }

    if (norm < 1E-5) break;
  }

  // Calculate the total friction forces and moments

  for (unsigned int i=0; i< n; i++) {
    double lambda = multipliers[i]->value;
    vFrictionForces += lambda * multipliers[i]->ForceJacobian;
    vFrictionMoments += lambda * multipliers[i]->MomentJacobian;
  }

  FGColumnVector3 accel = invMass * vFrictionForces;
  FGColumnVector3 omegadot = Jinv * vFrictionMoments;

  vBodyAccel += accel;
  vUVWdot += accel;
  vUVWidot += in.Tb2i * accel;
  vPQRdot += omegadot;
  vPQRidot += omegadot;
}
Beispiel #23
0
// ** Temporary version
int
ClpPdco::pdco( ClpPdcoBase * stuff, Options &options, Info &info, Outfo &outfo)
{
//    D1, D2 are positive-definite diagonal matrices defined from d1, d2.
//           In particular, d2 indicates the accuracy required for
//           satisfying each row of Ax = b.
//
// D1 and D2 (via d1 and d2) provide primal and dual regularization
// respectively.  They ensure that the primal and dual solutions
// (x,r) and (y,z) are unique and bounded.
//
// A scalar d1 is equivalent to d1 = ones(n,1), D1 = diag(d1).
// A scalar d2 is equivalent to d2 = ones(m,1), D2 = diag(d2).
// Typically, d1 = d2 = 1e-4.
// These values perturb phi(x) only slightly  (by about 1e-8) and request
// that A*x = b be satisfied quite accurately (to about 1e-8).
// Set d1 = 1e-4, d2 = 1 for least-squares problems with bound constraints.
// The problem is then
//
//    minimize    phi(x) + 1/2 norm(d1*x)^2 + 1/2 norm(A*x - b)^2
//    subject to  bl <= x <= bu.
//
// More generally, d1 and d2 may be n and m vectors containing any positive
// values (preferably not too small, and typically no larger than 1).
// Bigger elements of d1 and d2 improve the stability of the solver.
//
// At an optimal solution, if x(j) is on its lower or upper bound,
// the corresponding z(j) is positive or negative respectively.
// If x(j) is between its bounds, z(j) = 0.
// If bl(j) = bu(j), x(j) is fixed at that value and z(j) may have
// either sign.
//
// Also, r and y satisfy r = D2 y, so that Ax + D2^2 y = b.
// Thus if d2(i) = 1e-4, the i-th row of Ax = b will be satisfied to
// approximately 1e-8.  This determines how large d2(i) can safely be.
//
//
// EXTERNAL FUNCTIONS:
// options         = pdcoSet;                  provided with pdco.m
// [obj,grad,hess] = pdObj( x );               provided by user
//               y = pdMat( name,mode,m,n,x ); provided by user if pdMat
//                                             is a string, not a matrix
//
// INPUT ARGUMENTS:
// pdObj      is a string containing the name of a function pdObj.m
//            or a function_handle for such a function
//            such that [obj,grad,hess] = pdObj(x) defines
//            obj  = phi(x)              : a scalar,
//            grad = gradient of phi(x)  : an n-vector,
//            hess = diag(Hessian of phi): an n-vector.
//         Examples:
//            If phi(x) is the linear function c"x, pdObj should return
//               [obj,grad,hess] = [c"*x, c, zeros(n,1)].
//            If phi(x) is the entropy function E(x) = sum x(j) log x(j),
//               [obj,grad,hess] = [E(x), log(x)+1, 1./x].
// pdMat      may be an ifexplicit m x n matrix A (preferably sparse!),
//            or a string containing the name of a function pdMat.m
//            or a function_handle for such a function
//            such that y = pdMat( name,mode,m,n,x )
//            returns   y = A*x (mode=1)  or  y = A"*x (mode=2).
//            The input parameter "name" will be the string pdMat.
// b          is an m-vector.
// bl         is an n-vector of lower bounds.  Non-existent bounds
//            may be represented by bl(j) = -Inf or bl(j) <= -1e+20.
// bu         is an n-vector of upper bounds.  Non-existent bounds
//            may be represented by bu(j) =  Inf or bu(j) >=  1e+20.
// d1, d2     may be positive scalars or positive vectors (see above).
// options    is a structure that may be set and altered by pdcoSet
//            (type help pdcoSet).
// x0, y0, z0 provide an initial solution.
// xsize, zsize are estimates of the biggest x and z at the solution.
//            They are used to scale (x,y,z).  Good estimates
//            should improve the performance of the barrier method.
//
//
// OUTPUT ARGUMENTS:
// x          is the primal solution.
// y          is the dual solution associated with Ax + D2 r = b.
// z          is the dual solution associated with bl <= x <= bu.
// inform = 0 if a solution is found;
//        = 1 if too many iterations were required;
//        = 2 if the linesearch failed too often.
// PDitns     is the number of Primal-Dual Barrier iterations required.
// CGitns     is the number of Conjugate-Gradient  iterations required
//            if an iterative solver is used (LSQR).
// time       is the cpu time used.
//----------------------------------------------------------------------

// PRIVATE FUNCTIONS:
//    pdxxxbounds
//    pdxxxdistrib
//    pdxxxlsqr
//    pdxxxlsqrmat
//    pdxxxmat
//    pdxxxmerit
//    pdxxxresid1
//    pdxxxresid2
//    pdxxxstep
//
// GLOBAL VARIABLES:
//    global pdDDD1 pdDDD2 pdDDD3
//
//
// NOTES:
// The matrix A should be reasonably well scaled: norm(A,inf) =~ 1.
// The vector b and objective phi(x) may be of any size, but ensure that
// xsize and zsize are reasonably close to norm(x,inf) and norm(z,inf)
// at the solution.
//
// The files defining pdObj  and pdMat
// must not be called Fname.m or Aname.m!!
//
//
// AUTHOR:
//    Michael Saunders, Systems Optimization Laboratory (SOL),
//    Stanford University, Stanford, California, USA.
//    [email protected]
//
// CONTRIBUTORS:
//    Byunggyoo Kim, SOL, Stanford University.
//    [email protected]
//
// DEVELOPMENT:
// 20 Jun 1997: Original version of pdsco.m derived from pdlp0.m.
// 29 Sep 2002: Original version of pdco.m  derived from pdsco.m.
//              Introduced D1, D2 in place of gamma*I, delta*I
//              and allowed for general bounds bl <= x <= bu.
// 06 Oct 2002: Allowed for fixed variabes: bl(j) = bu(j) for any j.
// 15 Oct 2002: Eliminated some work vectors (since m, n might be LARGE).
//              Modularized residuals, linesearch
// 16 Oct 2002: pdxxx..., pdDDD... names rationalized.
//              pdAAA eliminated (global copy of A).
//              Aname is now used directly as an ifexplicit A or a function.
//              NOTE: If Aname is a function, it now has an extra parameter.
// 23 Oct 2002: Fname and Aname can now be function handles.
// 01 Nov 2002: Bug fixed in feval in pdxxxmat.
//-----------------------------------------------------------------------

//  global pdDDD1 pdDDD2 pdDDD3
     double inf = 1.0e30;
     double eps = 1.0e-15;
     double atolold = -1.0, r3ratio = -1.0, Pinf, Dinf, Cinf, Cinf0;

     printf("\n   --------------------------------------------------------");
     printf("\n   pdco.m                            Version of 01 Nov 2002");
     printf("\n   Primal-dual barrier method to minimize a convex function");
     printf("\n   subject to linear constraints Ax + r = b,  bl <= x <= bu");
     printf("\n   --------------------------------------------------------\n");

     int m = numberRows_;
     int n = numberColumns_;
     bool ifexplicit = true;

     CoinDenseVector<double> b(m, rhs_);
     CoinDenseVector<double> x(n, x_);
     CoinDenseVector<double> y(m, y_);
     CoinDenseVector<double> z(n, dj_);
     //delete old arrays
     delete [] rhs_;
     delete [] x_;
     delete [] y_;
     delete [] dj_;
     rhs_ = NULL;
     x_ = NULL;
     y_ = NULL;
     dj_ = NULL;

     // Save stuff so available elsewhere
     pdcoStuff_ = stuff;

     double normb  = b.infNorm();
     double normx0 = x.infNorm();
     double normy0 = y.infNorm();
     double normz0 = z.infNorm();

     printf("\nmax |b | = %8g     max |x0| = %8g", normb , normx0);
     printf(                "      xsize   = %8g", xsize_);
     printf("\nmax |y0| = %8g     max |z0| = %8g", normy0, normz0);
     printf(                "      zsize   = %8g", zsize_);

     //---------------------------------------------------------------------
     // Initialize.
     //---------------------------------------------------------------------
     //true   = 1;
     //false  = 0;
     //zn     = zeros(n,1);
     //int nb     = n + m;
     int CGitns = 0;
     int inform = 0;
     //---------------------------------------------------------------------
     //  Only allow scalar d1, d2 for now
     //---------------------------------------------------------------------
     /*
     if (d1_->size()==1)
         d1_->resize(n, d1_->getElements()[0]);  // Allow scalar d1, d2
     if (d2_->size()==1)
         d2->resize(m, d2->getElements()[0]);  // to mean dk * unit vector
      */
     assert (stuff->sizeD1() == 1);
     double d1 = stuff->getD1();
     double d2 = stuff->getD2();

     //---------------------------------------------------------------------
     // Grab input options.
     //---------------------------------------------------------------------
     int  maxitn    = options.MaxIter;
     double featol    = options.FeaTol;
     double opttol    = options.OptTol;
     double steptol   = options.StepTol;
     int  stepSame  = 1;  /* options.StepSame;   // 1 means stepx == stepz */
     double x0min     = options.x0min;
     double z0min     = options.z0min;
     double mu0       = options.mu0;
     int  LSproblem = options.LSproblem;  // See below
     int  LSmethod  = options.LSmethod;   // 1=Cholesky    2=QR    3=LSQR
     int  itnlim    = options.LSQRMaxIter * CoinMin(m, n);
     double atol1     = options.LSQRatol1;  // Initial  atol
     double atol2     = options.LSQRatol2;  // Smallest atol,unless atol1 is smaller
     double conlim    = options.LSQRconlim;
     //int  wait      = options.wait;

     // LSproblem:
     //  1 = dy          2 = dy shifted, DLS
     // 11 = s          12 =  s shifted, DLS    (dx = Ds)
     // 21 = dx
     // 31 = 3x3 system, symmetrized by Z^{1/2}
     // 32 = 2x2 system, symmetrized by X^{1/2}

     //---------------------------------------------------------------------
     // Set other parameters.
     //---------------------------------------------------------------------
     int  kminor    = 0;      // 1 stops after each iteration
     double eta       = 1e-4;   // Linesearch tolerance for "sufficient descent"
     double maxf      = 10;     // Linesearch backtrack limit (function evaluations)
     double maxfail   = 1;      // Linesearch failure limit (consecutive iterations)
     double bigcenter = 1e+3;   // mu is reduced if center < bigcenter.

     // Parameters for LSQR.
     double atolmin   = eps;    // Smallest atol if linesearch back-tracks
     double btol      = 0;      // Should be small (zero is ok)
     double show      = false;  // Controls lsqr iteration log
     /*
     double gamma     = d1->infNorm();
     double delta     = d2->infNorm();
     */
     double gamma = d1;
     double delta = d2;

     printf("\n\nx0min    = %8g     featol   = %8.1e", x0min, featol);
     printf(                  "      d1max   = %8.1e", gamma);
     printf(  "\nz0min    = %8g     opttol   = %8.1e", z0min, opttol);
     printf(                  "      d2max   = %8.1e", delta);
     printf(  "\nmu0      = %8.1e     steptol  = %8g", mu0  , steptol);
     printf(                  "     bigcenter= %8g"  , bigcenter);

     printf("\n\nLSQR:");
     printf("\natol1    = %8.1e     atol2    = %8.1e", atol1 , atol2 );
     printf(                  "      btol    = %8.1e", btol );
     printf("\nconlim   = %8.1e     itnlim   = %8d"  , conlim, itnlim);
     printf(                  "      show    = %8g"  , show );

// LSmethod  = 3;  ////// Hardwire LSQR
// LSproblem = 1;  ////// and LS problem defining "dy".
     /*
       if wait
         printf("\n\nReview parameters... then type "return"\n")
         keyboard
       end
     */
     if (eta < 0)
          printf("\n\nLinesearch disabled by eta < 0");

     //---------------------------------------------------------------------
     // All parameters have now been set.
     //---------------------------------------------------------------------
     double time    = CoinCpuTime();
     //bool useChol = (LSmethod == 1);
     //bool useQR   = (LSmethod == 2);
     bool direct  = (LSmethod <= 2 && ifexplicit);
     char solver[6];
     strcpy(solver, "  LSQR");


     //---------------------------------------------------------------------
     // Categorize bounds and allow for fixed variables by modifying b.
     //---------------------------------------------------------------------

     int nlow, nupp, nfix;
     int *bptrs[3] = {0};
     getBoundTypes(&nlow, &nupp, &nfix, bptrs );
     int *low = bptrs[0];
     int *upp = bptrs[1];
     int *fix = bptrs[2];

     int nU = n;
     if (nupp == 0) nU = 1;  //Make dummy vectors if no Upper bounds

     //---------------------------------------------------------------------
     //  Get pointers to local copy of model bounds
     //---------------------------------------------------------------------

     CoinDenseVector<double> bl(n, columnLower_);
     double *bl_elts = bl.getElements();
     CoinDenseVector<double> bu(nU, columnUpper_);  // this is dummy if no UB
     double *bu_elts = bu.getElements();

     CoinDenseVector<double> r1(m, 0.0);
     double *r1_elts = r1.getElements();
     CoinDenseVector<double> x1(n, 0.0);
     double *x1_elts = x1.getElements();

     if (nfix > 0) {
          for (int k = 0; k < nfix; k++)
               x1_elts[fix[k]] = bl[fix[k]];
          matVecMult(1, r1, x1);
          b = b - r1;
          // At some stage, might want to look at normfix = norm(r1,inf);
     }

     //---------------------------------------------------------------------
     // Scale the input data.
     // The scaled variables are
     //    xbar     = x/beta,
     //    ybar     = y/zeta,
     //    zbar     = z/zeta.
     // Define
     //    theta    = beta*zeta;
     // The scaled function is
     //    phibar   = ( 1   /theta) fbar(beta*xbar),
     //    gradient = (beta /theta) grad,
     //    Hessian  = (beta2/theta) hess.
     //---------------------------------------------------------------------
     double beta = xsize_;
     if (beta == 0) beta = 1; // beta scales b, x.
     double zeta = zsize_;
     if (zeta == 0) zeta = 1; // zeta scales y, z.
     double theta  = beta * zeta;                          // theta scales obj.
     // (theta could be anything, but theta = beta*zeta makes
     // scaled grad = grad/zeta = 1 approximately if zeta is chosen right.)

     for (int k = 0; k < nlow; k++)
          bl_elts[low[k]] = bl_elts[low[k]] / beta;
     for (int k = 0; k < nupp; k++)
          bu_elts[upp[k]] = bu_elts[upp[k]] / beta;
     d1     = d1 * ( beta / sqrt(theta) );
     d2     = d2 * ( sqrt(theta) / beta );

     double beta2  = beta * beta;
     b.scale( (1.0 / beta) );
     y.scale( (1.0 / zeta) );
     x.scale( (1.0 / beta) );
     z.scale( (1.0 / zeta) );

     //---------------------------------------------------------------------
     // Initialize vectors that are not fully used if bounds are missing.
     //---------------------------------------------------------------------
     CoinDenseVector<double> rL(n, 0.0);
     CoinDenseVector<double> cL(n, 0.0);
     CoinDenseVector<double> z1(n, 0.0);
     CoinDenseVector<double> dx1(n, 0.0);
     CoinDenseVector<double> dz1(n, 0.0);
     CoinDenseVector<double> r2(n, 0.0);

     // Assign upper bd regions (dummy if no UBs)

     CoinDenseVector<double> rU(nU, 0.0);
     CoinDenseVector<double> cU(nU, 0.0);
     CoinDenseVector<double> x2(nU, 0.0);
     CoinDenseVector<double> z2(nU, 0.0);
     CoinDenseVector<double> dx2(nU, 0.0);
     CoinDenseVector<double> dz2(nU, 0.0);

     //---------------------------------------------------------------------
     // Initialize x, y, z, objective, etc.
     //---------------------------------------------------------------------
     CoinDenseVector<double> dx(n, 0.0);
     CoinDenseVector<double> dy(m, 0.0);
     CoinDenseVector<double> Pr(m);
     CoinDenseVector<double> D(n);
     double *D_elts = D.getElements();
     CoinDenseVector<double> w(n);
     double *w_elts = w.getElements();
     CoinDenseVector<double> rhs(m + n);


     //---------------------------------------------------------------------
     // Pull out the element array pointers for efficiency
     //---------------------------------------------------------------------
     double *x_elts  = x.getElements();
     double *x2_elts = x2.getElements();
     double *z_elts  = z.getElements();
     double *z1_elts = z1.getElements();
     double *z2_elts = z2.getElements();

     for (int k = 0; k < nlow; k++) {
          x_elts[low[k]]  = CoinMax( x_elts[low[k]], bl[low[k]]);
          x1_elts[low[k]] = CoinMax( x_elts[low[k]] - bl[low[k]], x0min  );
          z1_elts[low[k]] = CoinMax( z_elts[low[k]], z0min  );
     }
     for (int k = 0; k < nupp; k++) {
          x_elts[upp[k]]  = CoinMin( x_elts[upp[k]], bu[upp[k]]);
          x2_elts[upp[k]] = CoinMax(bu[upp[k]] -  x_elts[upp[k]], x0min  );
          z2_elts[upp[k]] = CoinMax(-z_elts[upp[k]], z0min  );
     }
     //////////////////// Assume hessian is diagonal. //////////////////////

//  [obj,grad,hess] = feval( Fname, (x*beta) );
     x.scale(beta);
     double obj = getObj(x);
     CoinDenseVector<double> grad(n);
     getGrad(x, grad);
     CoinDenseVector<double> H(n);
     getHessian(x , H);
     x.scale((1.0 / beta));

     //double * g_elts = grad.getElements();
     double * H_elts = H.getElements();

     obj /= theta;                       // Scaled obj.
     grad = grad * (beta / theta) + (d1 * d1) * x; // grad includes x regularization.
     H  = H * (beta2 / theta) + (d1 * d1)      ; // H    includes x regularization.


     /*---------------------------------------------------------------------
     // Compute primal and dual residuals:
     // r1 =  b - Aprod(x) - d2*d2*y;
     // r2 =  grad - Atprod(y) + z2 - z1;
     //  rL =  bl - x + x1;
     //  rU =  x + x2 - bu; */
     //---------------------------------------------------------------------
     //  [r1,r2,rL,rU,Pinf,Dinf] = ...
     //      pdxxxresid1( Aname,fix,low,upp, ...
     //                   b,bl,bu,d1,d2,grad,rL,rU,x,x1,x2,y,z1,z2 );
     pdxxxresid1( this, nlow, nupp, nfix, low, upp, fix,
                  b, bl_elts, bu_elts, d1, d2, grad, rL, rU, x, x1, x2, y, z1, z2,
                  r1, r2, &Pinf, &Dinf);
     //---------------------------------------------------------------------
     // Initialize mu and complementarity residuals:
     //    cL   = mu*e - X1*z1.
     //    cU   = mu*e - X2*z2.
     //
     // 25 Jan 2001: Now that b and obj are scaled (and hence x,y,z),
     //              we should be able to use mufirst = mu0 (absolute value).
     //              0.1 worked poorly on StarTest1 with x0min = z0min = 0.1.
     // 29 Jan 2001: We might as well use mu0 = x0min * z0min;
     //              so that most variables are centered after a warm start.
     // 29 Sep 2002: Use mufirst = mu0*(x0min * z0min),
     //              regarding mu0 as a scaling of the initial center.
     //---------------------------------------------------------------------
     //  double mufirst = mu0*(x0min * z0min);
     double mufirst = mu0;   // revert to absolute value
     double mulast  = 0.1 * opttol;
     mulast  = CoinMin( mulast, mufirst );
     double mu      = mufirst;
     double center,  fmerit;
     pdxxxresid2( mu, nlow, nupp, low, upp, cL, cU, x1, x2,
                  z1, z2, &center, &Cinf, &Cinf0 );
     fmerit = pdxxxmerit(nlow, nupp, low, upp, r1, r2, rL, rU, cL, cU );

     // Initialize other things.

     bool  precon   = true;
     double PDitns    = 0;
     //bool converged = false;
     double atol      = atol1;
     atol2     = CoinMax( atol2, atolmin );
     atolmin   = atol2;
     //  pdDDD2    = d2;    // Global vector for diagonal matrix D2

     //  Iteration log.

     int nf      = 0;
     int itncg   = 0;
     int nfail   = 0;

     printf("\n\nItn   mu   stepx   stepz  Pinf  Dinf");
     printf("  Cinf   Objective    nf  center");
     if (direct) {
          printf("\n");
     } else {
          printf("  atol   solver   Inexact\n");
     }

     double regx = (d1 * x).twoNorm();
     double regy = (d2 * y).twoNorm();
     //  regterm = twoNorm(d1.*x)^2  +  norm(d2.*y)^2;
     double regterm = regx * regx + regy * regy;
     double objreg  = obj  +  0.5 * regterm;
     double objtrue = objreg * theta;

     printf("\n%3g                     ", PDitns        );
     printf("%6.1f%6.1f" , log10(Pinf ), log10(Dinf));
     printf("%6.1f%15.7e", log10(Cinf0), objtrue    );
     printf("   %8.1f\n"   , center                   );
     /*
     if kminor
       printf("\n\nStart of first minor itn...\n");
       keyboard
     end
     */
     //---------------------------------------------------------------------
     // Main loop.
     //---------------------------------------------------------------------
     // Lsqr
     ClpLsqr  thisLsqr(this);
     //  while (converged) {
     while(PDitns < maxitn) {
          PDitns = PDitns + 1;

          // 31 Jan 2001: Set atol according to progress, a la Inexact Newton.
          // 07 Feb 2001: 0.1 not small enough for Satellite problem.  Try 0.01.
          // 25 Apr 2001: 0.01 seems wasteful for Star problem.
          //              Now that starting conditions are better, go back to 0.1.

          double r3norm = CoinMax(Pinf,   CoinMax(Dinf,  Cinf));
          atol   = CoinMin(atol,  r3norm * 0.1);
          atol   = CoinMax(atol,  atolmin   );
          info.r3norm = r3norm;

          //-------------------------------------------------------------------
          //  Define a damped Newton iteration for solving f = 0,
          //  keeping  x1, x2, z1, z2 > 0.  We eliminate dx1, dx2, dz1, dz2
          //  to obtain the system
          //
          //     [-H2  A"  ] [ dx ] = [ w ],   H2 = H + D1^2 + X1inv Z1 + X2inv Z2,
          //     [ A   D2^2] [ dy ] = [ r1]    w  = r2 - X1inv(cL + Z1 rL)
          //                                           + X2inv(cU + Z2 rU),
          //
          //  which is equivalent to the least-squares problem
          //
          //     min || [ D A"]dy  -  [  D w   ] ||,   D = H2^{-1/2}.         (*)
          //         || [  D2 ]       [D2inv r1] ||
          //-------------------------------------------------------------------
          for (int k = 0; k < nlow; k++)
               H_elts[low[k]]  = H_elts[low[k]] + z1[low[k]] / x1[low[k]];
          for (int k = 0; k < nupp; k++)
               H[upp[k]]  = H[upp[k]] + z2[upp[k]] / x2[upp[k]];
          w = r2;
          for (int k = 0; k < nlow; k++)
               w[low[k]]  = w[low[k]] - (cL[low[k]] + z1[low[k]] * rL[low[k]]) / x1[low[k]];
          for (int k = 0; k < nupp; k++)
               w[upp[k]]  = w[upp[k]] + (cU[upp[k]] + z2[upp[k]] * rU[upp[k]]) / x2[upp[k]];

          if (LSproblem == 1) {
               //-----------------------------------------------------------------
               //  Solve (*) for dy.
               //-----------------------------------------------------------------
               H      = 1.0 / H;  // H is now Hinv (NOTE!)
               for (int k = 0; k < nfix; k++)
                    H[fix[k]] = 0;
               for (int k = 0; k < n; k++)
                    D_elts[k] = sqrt(H_elts[k]);
               thisLsqr.borrowDiag1(D_elts);
               thisLsqr.diag2_ = d2;

               if (direct) {
                    // Omit direct option for now
               } else {// Iterative solve using LSQR.
                    //rhs     = [ D.*w; r1./d2 ];
                    for (int k = 0; k < n; k++)
                         rhs[k] = D_elts[k] * w_elts[k];
                    for (int k = 0; k < m; k++)
                         rhs[n+k] = r1_elts[k] * (1.0 / d2);
                    double damp    = 0;

                    if (precon) {   // Construct diagonal preconditioner for LSQR
                         matPrecon(d2, Pr, D);
                    }
                    /*
                    	rw(7)        = precon;
                            info.atolmin = atolmin;
                            info.r3norm  = fmerit;  // Must be the 2-norm here.

                            [ dy, istop, itncg, outfo ] = ...
                       pdxxxlsqr( nb,m,"pdxxxlsqrmat",Aname,rw,rhs,damp, ...
                                  atol,btol,conlim,itnlim,show,info );


                    	thisLsqr.input->rhs_vec = &rhs;
                    	thisLsqr.input->sol_vec = &dy;
                    	thisLsqr.input->rel_mat_err = atol;
                    	thisLsqr.do_lsqr(this);
                    	*/
                    //  New version of lsqr

                    int istop;
                    dy.clear();
                    show = false;
                    info.atolmin = atolmin;
                    info.r3norm  = fmerit;  // Must be the 2-norm here.

                    thisLsqr.do_lsqr( rhs, damp, atol, btol, conlim, itnlim,
                                      show, info, dy , &istop, &itncg, &outfo, precon, Pr);
                    if (precon)
                         dy = dy * Pr;

                    if (!precon && itncg > 999999)
                         precon = true;

                    if (istop == 3  ||  istop == 7 )  // conlim or itnlim
                         printf("\n    LSQR stopped early:  istop = //%d", istop);


                    atolold   = outfo.atolold;
                    atol      = outfo.atolnew;
                    r3ratio   = outfo.r3ratio;
               }// LSproblem 1

               //      grad      = pdxxxmat( Aname,2,m,n,dy );   // grad = A"dy
               grad.clear();
               matVecMult(2, grad, dy);
               for (int k = 0; k < nfix; k++)
                    grad[fix[k]] = 0;                            // grad is a work vector
               dx = H * (grad - w);

          } else {
               perror( "This LSproblem not yet implemented\n" );
          }
          //-------------------------------------------------------------------

          CGitns += itncg;

          //-------------------------------------------------------------------
          // dx and dy are now known.  Get dx1, dx2, dz1, dz2.
          //-------------------------------------------------------------------
          for (int k = 0; k < nlow; k++) {
               dx1[low[k]] = - rL[low[k]] + dx[low[k]];
               dz1[low[k]] =  (cL[low[k]] - z1[low[k]] * dx1[low[k]]) / x1[low[k]];
          }
          for (int k = 0; k < nupp; k++) {
               dx2[upp[k]] = - rU[upp[k]] - dx[upp[k]];
               dz2[upp[k]] =  (cU[upp[k]] - z2[upp[k]] * dx2[upp[k]]) / x2[upp[k]];
          }
          //-------------------------------------------------------------------
          // Find the maximum step.
          //--------------------------------------------------------------------
          double stepx1 = pdxxxstep(nlow, low, x1, dx1 );
          double stepx2 = inf;
          if (nupp > 0)
               stepx2 = pdxxxstep(nupp, upp, x2, dx2 );
          double stepz1 = pdxxxstep( z1     , dz1      );
          double stepz2 = inf;
          if (nupp > 0)
               stepz2 = pdxxxstep( z2     , dz2      );
          double stepx  = CoinMin( stepx1, stepx2 );
          double stepz  = CoinMin( stepz1, stepz2 );
          stepx  = CoinMin( steptol * stepx, 1.0 );
          stepz  = CoinMin( steptol * stepz, 1.0 );
          if (stepSame) {                  // For NLPs, force same step
               stepx = CoinMin( stepx, stepz );   // (true Newton method)
               stepz = stepx;
          }

          //-------------------------------------------------------------------
          // Backtracking linesearch.
          //-------------------------------------------------------------------
          bool fail     =  true;
          nf       =  0;

          while (nf < maxf) {
               nf      = nf + 1;
               x       = x        +  stepx * dx;
               y       = y        +  stepz * dy;
               for (int k = 0; k < nlow; k++) {
                    x1[low[k]] = x1[low[k]]  +  stepx * dx1[low[k]];
                    z1[low[k]] = z1[low[k]]  +  stepz * dz1[low[k]];
               }
               for (int k = 0; k < nupp; k++) {
                    x2[upp[k]] = x2[upp[k]]  +  stepx * dx2[upp[k]];
                    z2[upp[k]] = z2[upp[k]]  +  stepz * dz2[upp[k]];
               }
               //      [obj,grad,hess] = feval( Fname, (x*beta) );
               x.scale(beta);
               obj = getObj(x);
               getGrad(x, grad);
               getHessian(x, H);
               x.scale((1.0 / beta));

               obj        /= theta;
               grad       = grad * (beta / theta)  +  d1 * d1 * x;
               H          = H * (beta2 / theta)  +  d1 * d1;

               //      [r1,r2,rL,rU,Pinf,Dinf] = ...
               pdxxxresid1( this, nlow, nupp, nfix, low, upp, fix,
                            b, bl_elts, bu_elts, d1, d2, grad, rL, rU, x, x1, x2,
                            y, z1, z2, r1, r2, &Pinf, &Dinf );
               //double center, Cinf, Cinf0;
               //      [cL,cU,center,Cinf,Cinf0] = ...
               pdxxxresid2( mu, nlow, nupp, low, upp, cL, cU, x1, x2, z1, z2,
                            &center, &Cinf, &Cinf0);
               double fmeritnew = pdxxxmerit(nlow, nupp, low, upp, r1, r2, rL, rU, cL, cU );
               double step      = CoinMin( stepx, stepz );

               if (fmeritnew <= (1 - eta * step)*fmerit) {
                    fail = false;
                    break;
               }

               // Merit function didn"t decrease.
               // Restore variables to previous values.
               // (This introduces a little error, but save lots of space.)

               x       = x        -  stepx * dx;
               y       = y        -  stepz * dy;
               for (int k = 0; k < nlow; k++) {
                    x1[low[k]] = x1[low[k]]  -  stepx * dx1[low[k]];
                    z1[low[k]] = z1[low[k]]  -  stepz * dz1[low[k]];
               }
               for (int k = 0; k < nupp; k++) {
                    x2[upp[k]] = x2[upp[k]]  -  stepx * dx2[upp[k]];
                    z2[upp[k]] = z2[upp[k]]  -  stepz * dz2[upp[k]];
               }
               // Back-track.
               // If it"s the first time,
               // make stepx and stepz the same.

               if (nf == 1 && stepx != stepz) {
                    stepx = step;
               } else if (nf < maxf) {
                    stepx = stepx / 2;
               }
               stepz = stepx;
          }

          if (fail) {
               printf("\n     Linesearch failed (nf too big)");
               nfail += 1;
          } else {
               nfail = 0;
          }

          //-------------------------------------------------------------------
          // Set convergence measures.
          //--------------------------------------------------------------------
          regx = (d1 * x).twoNorm();
          regy = (d2 * y).twoNorm();
          regterm = regx * regx + regy * regy;
          objreg  = obj  +  0.5 * regterm;
          objtrue = objreg * theta;

          bool primalfeas    = Pinf  <=  featol;
          bool dualfeas      = Dinf  <=  featol;
          bool complementary = Cinf0 <=  opttol;
          bool enough        = PDitns >=       4; // Prevent premature termination.
          bool converged     = primalfeas  &  dualfeas  &  complementary  &  enough;

          //-------------------------------------------------------------------
          // Iteration log.
          //-------------------------------------------------------------------
          char str1[100], str2[100], str3[100], str4[100], str5[100];
          sprintf(str1, "\n%3g%5.1f" , PDitns      , log10(mu)   );
          sprintf(str2, "%8.5f%8.5f" , stepx       , stepz       );
          if (stepx < 0.0001 || stepz < 0.0001) {
               sprintf(str2, " %6.1e %6.1e" , stepx       , stepz       );
          }

          sprintf(str3, "%6.1f%6.1f" , log10(Pinf) , log10(Dinf));
          sprintf(str4, "%6.1f%15.7e", log10(Cinf0), objtrue     );
          sprintf(str5, "%3d%8.1f"   , nf          , center      );
          if (center > 99999) {
               sprintf(str5, "%3d%8.1e"   , nf          , center      );
          }
          printf("%s%s%s%s%s", str1, str2, str3, str4, str5);
          if (direct) {
               // relax
          } else {
               printf(" %5.1f%7d%7.3f", log10(atolold), itncg, r3ratio);
          }
          //-------------------------------------------------------------------
          // Test for termination.
          //-------------------------------------------------------------------
          if (kminor) {
               printf( "\nStart of next minor itn...\n");
               //      keyboard;
          }

          if (converged) {
               printf("\n   Converged");
               break;
          } else if (PDitns >= maxitn) {
               printf("\n   Too many iterations");
               inform = 1;
               break;
          } else if (nfail  >= maxfail) {
               printf("\n   Too many linesearch failures");
               inform = 2;
               break;
          } else {

               // Reduce mu, and reset certain residuals.

               double stepmu  = CoinMin( stepx , stepz   );
               stepmu  = CoinMin( stepmu, steptol );
               double muold   = mu;
               mu      = mu   -  stepmu * mu;
               if (center >= bigcenter)
                    mu = muold;

               // mutrad = mu0*(sum(Xz)/n); // 24 May 1998: Traditional value, but
               // mu     = CoinMin(mu,mutrad ); // it seemed to decrease mu too much.

               mu      = CoinMax(mu, mulast); // 13 Jun 1998: No need for smaller mu.
               //      [cL,cU,center,Cinf,Cinf0] = ...
               pdxxxresid2( mu, nlow, nupp, low, upp, cL, cU, x1, x2, z1, z2,
                            &center, &Cinf, &Cinf0 );
               fmerit = pdxxxmerit( nlow, nupp, low, upp, r1, r2, rL, rU, cL, cU );

               // Reduce atol for LSQR (and SYMMLQ).
               // NOW DONE AT TOP OF LOOP.

               atolold = atol;
               // if atol > atol2
               //   atolfac = (mu/mufirst)^0.25;
               //   atol    = CoinMax( atol*atolfac, atol2 );
               // end

               // atol = CoinMin( atol, mu );     // 22 Jan 2001: a la Inexact Newton.
               // atol = CoinMin( atol, 0.5*mu ); // 30 Jan 2001: A bit tighter

               // If the linesearch took more than one function (nf > 1),
               // we assume the search direction needed more accuracy
               // (though this may be true only for LPs).
               // 12 Jun 1998: Ask for more accuracy if nf > 2.
               // 24 Nov 2000: Also if the steps are small.
               // 30 Jan 2001: Small steps might be ok with warm start.
               // 06 Feb 2001: Not necessarily.  Reinstated tests in next line.

               if (nf > 2  ||  CoinMin( stepx, stepz ) <= 0.01)
                    atol = atolold * 0.1;
          }
          //---------------------------------------------------------------------
          // End of main loop.
          //---------------------------------------------------------------------
     }


     for (int k = 0; k < nfix; k++)
          x[fix[k]] = bl[fix[k]];
     z      = z1;
     if (nupp > 0)
          z = z - z2;
     printf("\n\nmax |x| =%10.3f", x.infNorm() );
     printf("    max |y| =%10.3f", y.infNorm() );
     printf("    max |z| =%10.3f", z.infNorm() );
     printf("   scaled");

     x.scale(beta);
     y.scale(zeta);
     z.scale(zeta);   // Unscale x, y, z.

     printf(  "\nmax |x| =%10.3f", x.infNorm() );
     printf("    max |y| =%10.3f", y.infNorm() );
     printf("    max |z| =%10.3f", z.infNorm() );
     printf(" unscaled\n");

     time   = CoinCpuTime() - time;
     char str1[100], str2[100];
     sprintf(str1, "\nPDitns  =%10g", PDitns );
     sprintf(str2, "itns =%10d", CGitns );
     //  printf( [str1 " " solver str2] );
     printf("    time    =%10.1f\n", time);
     /*
     pdxxxdistrib( abs(x),abs(z) );   // Private function

     if (wait)
       keyboard;
     */
//-----------------------------------------------------------------------
// End function pdco.m
//-----------------------------------------------------------------------
     /*  printf("Solution x values:\n\n");
       for (int k=0; k<n; k++)
         printf(" %d   %e\n", k, x[k]);
     */
// Print distribution
     double thresh[9] = { 0.00000001, 0.0000001, 0.000001, 0.00001, 0.0001, 0.001, 0.01, 0.1, 1.00001};
     int counts[9] = {0};
     for (int ij = 0; ij < n; ij++) {
          for (int j = 0; j < 9; j++) {
               if(x[ij] < thresh[j]) {
                    counts[j] += 1;
                    break;
               }
          }
     }
     printf ("Distribution of Solution Values\n");
     for (int j = 8; j > 1; j--)
          printf(" %g  to  %g %d\n", thresh[j-1], thresh[j], counts[j]);
     printf("   Less than   %g %d\n", thresh[2], counts[0]);

     return inform;
}
Beispiel #24
0
void foo() {
  __block int i;
  i = rhs();
  i += rhs();
}
int main (int argc, char* argv[]) {

	if(argc != 2){
		cerr << "USAGE: " << argv[0] << " Jmax (= max. level of sparse grid)" << endl;
		exit(1);
	}

	//===============================================================//
	//========= PROBLEM SETUP  =======================//
	//===============================================================//
	
    int d   = 2;
    int d_  = 2;
    int j0  = 2;
    size_t Jmax = atoi(argv[1]);

    //getchar();

    /// Basis initialization
    TrialBasis_Time      basis_per(d,d_,j0);
    TestBasis_Time       basis_int(d,d_,j0);
    Basis_Space 		 basis_intbc(d,0);
    basis_intbc.enforceBoundaryCondition<DirichletBC>();

    Basis2D_Trial basis2d_trial(basis_per,basis_intbc);
    Basis2D_Test  basis2d_test(basis_int,basis_intbc);

    /// Initialization of operator

    // Bilinear Forms
    Convection1D_Time			ConvectionBil_t(basis_per, basis_int);
    Identity1D_Time 		    IdentityBil_t(basis_per, basis_int);
    Identity1D_Space 	        IdentityBil_x(basis_intbc, basis_intbc);
    Laplace1D_Space 	        LaplaceBil_x(basis_intbc, basis_intbc);
    
    RefConvection1D_Time 		RefConvectionBil_t(basis_per.refinementbasis, basis_int.refinementbasis);
    RefIdentity1D_Time 		    RefIdentityBil_t(basis_per.refinementbasis, basis_int.refinementbasis);
    RefIdentity1D_Space 	    RefIdentityBil_x(basis_intbc.refinementbasis, basis_intbc.refinementbasis);
    RefLaplace1D_Space 	        RefLaplaceBil_x(basis_intbc.refinementbasis, basis_intbc.refinementbasis);

    // Transposed Bilinear Forms
    TranspConvection1D_Time 	TranspConvectionBil_t(basis_per, basis_int);
    TranspIdentity1D_Time 		TranspIdentityBil_t(basis_per, basis_int);
    TranspIdentity1D_Space 	    TranspIdentityBil_x(basis_intbc, basis_intbc);
    TranspLaplace1D_Space 	    TranspLaplaceBil_x(basis_intbc, basis_intbc);
    
    RefTranspConvection1D_Time 	RefTranspConvectionBil_t(basis_per.refinementbasis, basis_int.refinementbasis);
    RefTranspIdentity1D_Time 	RefTranspIdentityBil_t(basis_per.refinementbasis, basis_int.refinementbasis);
    RefTranspIdentity1D_Space 	RefTranspIdentityBil_x(basis_intbc.refinementbasis, basis_intbc.refinementbasis);
    RefTranspLaplace1D_Space 	RefTranspLaplaceBil_x(basis_intbc.refinementbasis, basis_intbc.refinementbasis);

    /// Initialization of local operator
    LOp_Conv1D_Time		lOp_Conv1D_t(basis_int, basis_per, RefConvectionBil_t, ConvectionBil_t);
    LOp_Id1D_Time		lOp_Id1D_t  (basis_int, basis_per, RefIdentityBil_t, IdentityBil_t);
    LOp_Id1D_Space		lOp_Id1D_x  (basis_intbc, basis_intbc, RefIdentityBil_x, IdentityBil_x);
    LOp_Lapl1D_Space	lOp_Lapl1D_x(basis_intbc, basis_intbc, RefLaplaceBil_x, LaplaceBil_x);
    
    LOpT_Conv1D_Time	lOpT_Conv1D_t(basis_per, basis_int, RefTranspConvectionBil_t, TranspConvectionBil_t);
    LOpT_Id1D_Time		lOpT_Id1D_t  (basis_per, basis_int, RefTranspIdentityBil_t, TranspIdentityBil_t);
    LOpT_Id1D_Space		lOpT_Id1D_x  (basis_intbc, basis_intbc, RefTranspIdentityBil_x, TranspIdentityBil_x);
    LOpT_Lapl1D_Space	lOpT_Lapl1D_x(basis_intbc, basis_intbc, RefTranspLaplaceBil_x, TranspLaplaceBil_x);

    LOp_Conv_Id_2D		localConvectionIdentityOp2D(lOp_Conv1D_t, lOp_Id1D_x);
    LOp_Id_Lapl_2D		localIdentityLaplaceOp2D(lOp_Id1D_t, lOp_Lapl1D_x);
    
    LOpT_Conv_Id_2D		transpLocalConvectionIdentityOp2D(lOpT_Conv1D_t, lOpT_Id1D_x);
    LOpT_Id_Lapl_2D		transpLocalIdentityLaplaceOp2D(lOpT_Id1D_t, lOpT_Lapl1D_x);

    localConvectionIdentityOp2D.setJ(9);
    localIdentityLaplaceOp2D.setJ(9);
    transpLocalConvectionIdentityOp2D.setJ(9);
    transpLocalIdentityLaplaceOp2D.setJ(9);

    // Use CompoundLocalOperator2D
    COp_Heat            localOperator2D(localConvectionIdentityOp2D,localIdentityLaplaceOp2D);
    COpT_Heat           transpLocalOperator2D(transpLocalConvectionIdentityOp2D,transpLocalIdentityLaplaceOp2D);

    // Use FlexibleCompoundLocalOperator2D
//    vector<AbstractLocalOperator2D<T>* > localOperatorVec, transpLocalOperatorVec;
//    localOperatorVec.push_back(&localConvectionIdentityOp2D);
//    localOperatorVec.push_back(&localIdentityLaplaceOp2D);
//    transpLocalOperatorVec.push_back(&transpLocalConvectionIdentityOp2D);
//    transpLocalOperatorVec.push_back(&transpLocalIdentityLaplaceOp2D);
//    FlexibleCompoundLocalOperator2D       localOperator2D(localOperatorVec);
//    FlexibleCompoundLocalOperator2D  	    transpLocalOperator2D(transpLocalOperatorVec);

    /// Initialization of preconditioner
    LeftPrec2D leftPrec(basis2d_test);
    RightPrec2D rightPrec(basis2d_trial);

    NoPrec2D noPrec;

    /// Initialization of rhs

    /// Right Hand Side:
    ///     No Singular Supports in both dimensions
    DenseVectorT sing_support_x;
    DenseVectorT sing_support_t(n+1);
    for(size_t i = 0; i <= n; ++i){
    	sing_support_t(i+1) = i*l;
    }
    ///      Forcing Functions
    SeparableFunction2D<T> F_fct(f_t, sing_support_t, f_x, sing_support_x);
    ///     Peaks: points and corresponding coefficients
    ///             (heights of jumps in derivatives)
    FullColMatrixT nodeltas;
    SeparableRhsIntegral2D			rhs(basis2d_test, F_fct, nodeltas, nodeltas, 20);
    SeparableRhs           			F(rhs,noPrec);

	//===============================================================//
	//===============  AWGM =========================================//
	//===============================================================//


    /* AWGM PG Parameters Default Values
    double tol = 5e-03;
	double alpha = 0.7;
	size_t max_its = 100;
	size_t max_basissize = 400000;
	bool reset_res = false;
	bool print_info = true;
	bool verbose = true;
	bool plot_solution = false;
	bool verbose_extra = false; //(print added wavelet indizes)
	size_t hashmapsize_trial = 10;
	size_t hashmapsize_test = 10;
	std::string info_filename = "awgm_cgls_conv_info.txt";
	std::string plot_filename = "awgm_cgls_u_plot";
	bool write_intermediary_solutions = false;
    std::string intermediary_solutions_filename = "awgm_cgls_u";
	*/

    /* IS Parameters Default Values
	bool adaptive_tol = true;
	size_t max_its = 100;
	double init_tol = 0.001;
	double res_reduction = 0.01;
	double absolute_tol = 1e-8;
	bool verbose = true;
	*/

    // MultitreeAWGM with default values
    //MT_AWGM multitree_awgm(basis2d_trial, basis2d_test, localOperator2D, transLocalOperator2D,
    //    						F, rightPrec, leftPrec);


    // If you want other parameters
    AWGM_PG_Parameters awgm_parameters;
    IS_Parameters cgls_parameters;
    // .... set them here:
    awgm_parameters.max_its = 0;
    awgm_parameters.tol = 1e-04;
    awgm_parameters.plot_solution = false;
    awgm_parameters.verbose_extra = false;
    awgm_parameters.info_filename = "awgm_ExSaw_SG_mv_conv_info.txt";
    awgm_parameters.plot_filename = "awgm_ExSaw_SG_mv_u_plot";
    awgm_parameters.write_intermediary_solutions = true;
    awgm_parameters.max_basissize = 1000000;

    cgls_parameters.adaptive_tol = true;
    cgls_parameters.init_tol = 1e-4;
    cgls_parameters.res_reduction = 0.01;
    cgls_parameters.max_its = 700;

    MT_AWGM multitree_awgm(basis2d_trial, basis2d_test, localOperator2D, transpLocalOperator2D,
    						F, rightPrec, leftPrec, awgm_parameters, cgls_parameters);


    multitree_awgm.awgm_params.print();
    multitree_awgm.is_params.print();

    multitree_awgm.set_sol(dummy);

    for(size_t J = 2; J < Jmax; ++J){

        stringstream filename;
        filename << "awgm_ExSaw_SG_mv_u_J_" << J;
        multitree_awgm.awgm_params.intermediary_solutions_filename = filename.str();

        /// Initialization of solution vector and initial index sets
        Coefficients<Lexicographical,T,Index2D> u;

        T gamma = 0.2;
        IndexSet<Index2D> LambdaTrial, LambdaTest;
        getSparseGridIndexSet(basis2d_trial,LambdaTrial,J,0,gamma);
        getSparseGridIndexSet(basis2d_test ,LambdaTest ,J,1,gamma);

        Timer time;
        time.start();
        multitree_awgm.solve(u, LambdaTrial, LambdaTest);
        time.stop();
        cout << "Solution took " << time.elapsed() << " seconds" << endl;
    }


    return 0;
}
Beispiel #26
0
ModelTracker::Transform ModelTracker::softPosit(unsigned int numImagePoints,ModelTracker::ImgPoint imagePoints[],const Transform& initialTransform)
	{
	typedef Transform::Vector Vector;
	
	/* Pre-transform the image points by the image transformation: */
	for(unsigned int ipi=0;ipi<numImagePoints;++ipi)
		imagePoints[ipi]=imgTransform.transform(imagePoints[ipi]);
	
	/* Assign initial homogeneous weights to the model points: */
	for(unsigned int mpi=0;mpi<numModelPoints;++mpi)
		mpws[mpi]=1.0;
	
	/* Create the assignment matrix: */
	Math::Matrix m(numImagePoints+1,numModelPoints+1);
	
	/* Initialize the "slack" rows and columns: */
	double gamma=1.0/double(Math::max(numImagePoints,numModelPoints)+1);
	for(unsigned int ipi=0;ipi<numImagePoints;++ipi)
		m(ipi,numModelPoints)=gamma;
	for(unsigned int mpi=0;mpi<numModelPoints;++mpi)
		m(numImagePoints,mpi)=gamma;
	m(numImagePoints,numModelPoints)=gamma;
	
	/* Initialize the pose vectors: */
	Transform::Rotation inverseOrientation=Geometry::invert(initialTransform.getRotation());
	Vector r1=inverseOrientation.getDirection(0);
	Vector r2=inverseOrientation.getDirection(1);
	Vector t=initialTransform.getTranslation();
	double s=-f/t[2];
	
	/* Perform the deterministic annealing loop: */
	for(double beta=0.005;beta<=0.5;beta*=1.025)
		{
		/* Create the initial assignment matrix based on squared distances between projected object points and image points: */
		for(unsigned int ipi=0;ipi<numImagePoints;++ipi)
			for(unsigned int mpi=0;mpi<numModelPoints;++mpi)
				{
				double d2=Math::sqr((r1*modelPoints[mpi]+t[0])*s-mpws[mpi]*imagePoints[ipi][0])
				         +Math::sqr((r2*modelPoints[mpi]+t[1])*s-mpws[mpi]*imagePoints[ipi][1]);
				m(ipi,mpi)=Math::exp(-beta*(d2-maxMatchDist2));
				
				// DEBUGGING
				// std::cout<<' '<<d2;
				}
		// DEBUGGING
		// std::cout<<std::endl;
		
		/* Normalize the assignment matrix using Sinkhorn's method: */
		double rowMaxDelta,colMaxDelta;
		do
			{
			/* Normalize image point rows: */
			rowMaxDelta=0.0;
			for(unsigned int ipi=0;ipi<numImagePoints;++ipi)
				{
				/* Calculate the row sum: */
				double rowSum=0.0;
				for(unsigned int mpi=0;mpi<numModelPoints+1;++mpi)
					rowSum+=m(ipi,mpi);
				
				/* Normalize the row: */
				for(unsigned int mpi=0;mpi<numModelPoints+1;++mpi)
					{
					double oldM=m(ipi,mpi);
					m(ipi,mpi)/=rowSum;
					rowMaxDelta=Math::max(rowMaxDelta,Math::abs(m(ipi,mpi)-oldM));
					}
				}
			
			/* Normalize model point columns: */
			colMaxDelta=0.0;
			for(unsigned int mpi=0;mpi<numModelPoints;++mpi)
				{
				/* Calculate the column sum: */
				double colSum=0.0;
				for(unsigned int ipi=0;ipi<numImagePoints+1;++ipi)
					colSum+=m(ipi,mpi);
				
				/* Normalize the column: */
				for(unsigned int ipi=0;ipi<numImagePoints+1;++ipi)
					{
					double oldM=m(ipi,mpi);
					m(ipi,mpi)/=colSum;
					colMaxDelta=Math::max(colMaxDelta,Math::abs(m(ipi,mpi)-oldM));
					}
				}
			}
		while(rowMaxDelta+colMaxDelta>1.0e-4);
		
		/* Compute the left-hand side of the pose alignment linear system: */
		Math::Matrix lhs(4,4,0.0);
		for(unsigned int mpi=0;mpi<numModelPoints;++mpi)
			{
			const Point& mp=modelPoints[mpi];
			
			/* Calculate the linear equation weight for the model point: */
			double mpWeight=0.0;
			for(unsigned int ipi=0;ipi<numImagePoints;++ipi)
				mpWeight+=m(ipi,mpi);
			
			/* Enter the model point into the pose alignment linear system: */
			for(int i=0;i<3;++i)
				{
				for(int j=0;j<3;++j)
					lhs(i,j)+=mp[i]*mp[j]*mpWeight;
				lhs(i,3)+=mp[i]*mpWeight;
				}
			for(int j=0;j<3;++j)
				lhs(3,j)+=mp[j]*mpWeight;
			lhs(3,3)+=mpWeight;
			}
		
		/* Invert the left-hand side matrix: */
		Math::Matrix lhsInv;
		try
			{
			lhsInv=lhs.inverseFullPivot();
			}
		catch(Math::Matrix::RankDeficientError)
			{
			std::cerr<<"Left-hand side matrix is rank deficient"<<std::endl;
			for(int i=0;i<4;++i)
				{
				for(int j=0;j<4;++j)
					std::cerr<<"  "<<lhs(i,j);
				std::cerr<<std::endl;
				}
			
			std::cerr<<"Assignment matrix:"<<std::endl;
			for(unsigned int i=0;i<=numImagePoints;++i)
				{
				for(unsigned int j=0;j<=numModelPoints;++j)
					std::cerr<<"  "<<m(i,j);
				std::cerr<<std::endl;
				}
			
			return Transform::identity;
			}
		
		/* Perform a fixed number of iterations of POSIT: */
		for(unsigned int iteration=0;iteration<2U;++iteration)
			{
			/* Compute the right-hand side of the pose alignment linear system: */
			Math::Matrix rhs(4,2,0.0);
			for(unsigned int mpi=0;mpi<numModelPoints;++mpi)
				{
				const Point& mp=modelPoints[mpi];
				
				/* Enter the model point into the pose alignment linear system: */
				double sumX=0.0;
				double sumY=0.0;
				for(unsigned int ipi=0;ipi<numImagePoints;++ipi)
					{
					sumX+=m(ipi,mpi)*imagePoints[ipi][0];
					sumY+=m(ipi,mpi)*imagePoints[ipi][1];
					}
				sumX*=mpws[mpi];
				sumY*=mpws[mpi];
				
				for(int i=0;i<3;++i)
					{
					rhs(i,0)+=sumX*mp[i];
					rhs(i,1)+=sumY*mp[i];
					}
				rhs(3,0)+=sumX;
				rhs(3,1)+=sumY;
				}
			
			/* Solve the pose alignment system: */
			Math::Matrix pose=lhsInv*rhs;
			for(int i=0;i<3;++i)
				{
				r1[i]=pose(i,0);
				r2[i]=pose(i,1);
				}
			
			/* Orthonormalize the pose vectors: */
			double s1=r1.mag();
			double s2=r2.mag();
			Vector r3=Geometry::normalize(r1^r2);
			Vector mid=r1/s1+r2/s2;
			mid/=mid.mag()*Math::sqrt(2.0);
			Vector mid2=r3^mid;
			r1=mid-mid2;
			r2=mid+mid2;
			s=Math::sqrt(s1*s2);
			t[0]=pose(3,0)/s;
			t[1]=pose(3,1)/s;
			t[2]=-f/s;
			
			/* Update the object points' homogeneous weights: */
			for(unsigned int mpi=0;mpi<numModelPoints;++mpi)
				mpws[mpi]=(r3*modelPoints[mpi])/t[2]+1.0;
			}
		
		// DEBUGGING
		// std::cout<<"Intermediate: "<<Transform(t,Geometry::invert(Transform::Rotation::fromBaseVectors(r1,r2)))<<std::endl;
		}
	
	// DEBUGGING
	std::cerr<<"Final assignment matrix:"<<std::endl;
	for(unsigned int i=0;i<=numImagePoints;++i)
		{
		for(unsigned int j=0;j<=numModelPoints;++j)
			std::cerr<<"  "<<m(i,j);
		std::cerr<<std::endl;
		}
	
	/* Return the result transformation: */
	return Transform(t,Geometry::invert(Transform::Rotation::fromBaseVectors(r1,r2)));
	}
Beispiel #27
0
int
CVODEModel::evaluateRHSFunction(
   double time,
   SundialsAbstractVector* y,
   SundialsAbstractVector* y_dot)
{
   /*
    * Convert Sundials vectors to SAMRAI vectors
    */
   std::shared_ptr<SAMRAIVectorReal<double> > y_samvect(
      Sundials_SAMRAIVector::getSAMRAIVector(y));
   std::shared_ptr<SAMRAIVectorReal<double> > y_dot_samvect(
      Sundials_SAMRAIVector::getSAMRAIVector(y_dot));

   std::shared_ptr<PatchHierarchy> hierarchy(y_samvect->getPatchHierarchy());

   /*
    * Compute max norm of solution vector.
    */
   //std::shared_ptr<HierarchyDataOpsReal<double> > hierops(
   //   new HierarchyCellDataOpsReal<double>(hierarchy));
   //double max_norm = hierops->maxNorm(y_samvect->
   //                                   getComponentDescriptorIndex(0));

   if (d_print_solver_info) {
      pout << "\t\tEval RHS: "
           << "\n   \t\t\ttime = " << time
           << "\n   \t\t\ty_maxnorm = " << y_samvect->maxNorm()
           << endl;
   }

   /*
    * Allocate scratch space and fill ghost cells in the solution vector
    * 1) Create a refine algorithm
    * 2) Register with the algorithm the current & scratch space, along
    *    with a refine operator.
    * 3) Use the refine algorithm to construct a refine schedule
    * 4) Use the refine schedule to fill data on fine level.
    */
   std::shared_ptr<RefineAlgorithm> bdry_fill_alg(
      new RefineAlgorithm());
   std::shared_ptr<RefineOperator> refine_op(d_grid_geometry->
                                               lookupRefineOperator(d_soln_var,
                                                  "CONSERVATIVE_LINEAR_REFINE"));
   bdry_fill_alg->registerRefine(d_soln_scr_id,  // dest
      y_samvect->
      getComponentDescriptorIndex(0),                            // src
      d_soln_scr_id,                            // scratch
      refine_op);

   for (int ln = hierarchy->getFinestLevelNumber(); ln >= 0; --ln) {
      std::shared_ptr<PatchLevel> level(hierarchy->getPatchLevel(ln));
      if (!level->checkAllocated(d_soln_scr_id)) {
         level->allocatePatchData(d_soln_scr_id);
      }

      // Note:  a pointer to "this" tells the refine schedule to invoke
      // the setPhysicalBCs defined in this class.
      std::shared_ptr<RefineSchedule> bdry_fill_alg_schedule(
         bdry_fill_alg->createSchedule(level,
            ln - 1,
            hierarchy,
            this));

      bdry_fill_alg_schedule->fillData(time);
   }

   /*
    * Step through the levels and compute rhs
    */
   for (int ln = hierarchy->getFinestLevelNumber(); ln >= 0; --ln) {
      std::shared_ptr<PatchLevel> level(hierarchy->getPatchLevel(ln));

      for (PatchLevel::iterator ip(level->begin()); ip != level->end(); ++ip) {
         const std::shared_ptr<Patch>& patch = *ip;

         std::shared_ptr<CellData<double> > y(
            SAMRAI_SHARED_PTR_CAST<CellData<double>, PatchData>(
               patch->getPatchData(d_soln_scr_id)));
         std::shared_ptr<SideData<double> > diff(
            SAMRAI_SHARED_PTR_CAST<SideData<double>, PatchData>(
               patch->getPatchData(d_diff_id)));
         std::shared_ptr<CellData<double> > rhs(
            SAMRAI_SHARED_PTR_CAST<CellData<double>, PatchData>(
               patch->getPatchData(y_dot_samvect->getComponentDescriptorIndex(0))));
         TBOX_ASSERT(y);
         TBOX_ASSERT(diff);
         TBOX_ASSERT(rhs);

         const Index ifirst(patch->getBox().lower());
         const Index ilast(patch->getBox().upper());

         const std::shared_ptr<CartesianPatchGeometry> patch_geom(
            SAMRAI_SHARED_PTR_CAST<CartesianPatchGeometry, PatchGeometry>(
               patch->getPatchGeometry()));
         TBOX_ASSERT(patch_geom);
         const double* dx = patch_geom->getDx();

         IntVector ghost_cells(y->getGhostCellWidth());

         /*
          * 1 eqn radiation diffusion
          */
         if (d_dim == Dimension(2)) {
            SAMRAI_F77_FUNC(comprhs2d, COMPRHS2D) (
               ifirst(0), ilast(0),
               ifirst(1), ilast(1),
               ghost_cells(0), ghost_cells(1),
               dx,
               y->getPointer(),
               diff->getPointer(0),
               diff->getPointer(1),
               rhs->getPointer());
         } else if (d_dim == Dimension(3)) {
            SAMRAI_F77_FUNC(comprhs3d, COMPRHS3D) (
               ifirst(0), ilast(0),
               ifirst(1), ilast(1),
               ifirst(2), ilast(2),
               ghost_cells(0), ghost_cells(1),
               ghost_cells(2),
               dx,
               y->getPointer(),
               diff->getPointer(0),
               diff->getPointer(1),
               diff->getPointer(2),
               rhs->getPointer());
         }

      } // loop over patches
   } // loop over levels

   /*
    * Deallocate scratch space.
    */
   for (int ln = hierarchy->getFinestLevelNumber(); ln >= 0; --ln) {
      hierarchy->getPatchLevel(ln)->deallocatePatchData(d_soln_scr_id);
   }

   /*
    * record current time and increment counter for number of RHS
    * evaluations.
    */
   d_current_soln_time = time;
   ++d_number_rhs_eval;

   return 0;
}
//--------------------------------------------------------------------------
//-------- execute ---------------------------------------------------------
//--------------------------------------------------------------------------
void
AssembleContinuityEdgeSolverAlgorithm::execute()
{

  stk::mesh::MetaData & meta_data = realm_.meta_data();

  const int nDim = meta_data.spatial_dimension();

  // extract noc
  const std::string dofName = "pressure";
  const double nocFac
    = (realm_.get_noc_usage(dofName) == true) ? 1.0 : 0.0;

  // time step
  const double dt = realm_.get_time_step();
  const double gamma1 = realm_.get_gamma1();
  const double projTimeScale = dt/gamma1;

  // deal with interpolation procedure
  const double interpTogether = realm_.get_mdot_interp();
  const double om_interpTogether = 1.0-interpTogether;
  
  // space for LHS/RHS; always nodesPerEdge*nodesPerEdge and nodesPerEdge
  std::vector<double> lhs(4);
  std::vector<double> rhs(2);
  std::vector<stk::mesh::Entity> connected_nodes(2);

  // area vector; gather into
  std::vector<double> areaVec(nDim);

  // pointers for fast access
  double *p_lhs = &lhs[0];
  double *p_rhs = &rhs[0];
  double *p_areaVec = &areaVec[0];

  // mesh motion
  std::vector<double> vrtmL(nDim);
  std::vector<double> vrtmR(nDim);
  double * p_vrtmL = &vrtmL[0];
  double * p_vrtmR = &vrtmR[0];

  // deal with state
  VectorFieldType &velocityNp1 = velocity_->field_of_state(stk::mesh::StateNP1);
  ScalarFieldType &densityNp1 = density_->field_of_state(stk::mesh::StateNP1);

  // define some common selectors
  stk::mesh::Selector s_locally_owned_union = meta_data.locally_owned_part()
    &stk::mesh::selectUnion(partVec_);

  stk::mesh::BucketVector const& edge_buckets =
    realm_.get_buckets( stk::topology::EDGE_RANK, s_locally_owned_union );
  for ( stk::mesh::BucketVector::const_iterator ib = edge_buckets.begin();
        ib != edge_buckets.end() ; ++ib ) {
    stk::mesh::Bucket & b = **ib ;
    const stk::mesh::Bucket::size_type length   = b.size();

    // pointer to edge area vector
    const double * av = stk::mesh::field_data(*edgeAreaVec_, b);

    for ( stk::mesh::Bucket::size_type k = 0 ; k < length ; ++k ) {

      // sanity check on number or nodes
      ThrowAssert( b.num_nodes(k) == 2 );

      stk::mesh::Entity const * edge_node_rels = b.begin_nodes(k);

      // pointer to edge area vector
      for ( int j = 0; j < nDim; ++j )
        p_areaVec[j] = av[k*nDim+j];

      // left and right nodes
      stk::mesh::Entity nodeL = edge_node_rels[0];
      stk::mesh::Entity nodeR = edge_node_rels[1];

      connected_nodes[0] = nodeL;
      connected_nodes[1] = nodeR;

      // extract nodal fields
      const double * coordL = stk::mesh::field_data(*coordinates_, nodeL);
      const double * coordR = stk::mesh::field_data(*coordinates_, nodeR);

      const double * GpdxL = stk::mesh::field_data(*Gpdx_, nodeL);
      const double * GpdxR = stk::mesh::field_data(*Gpdx_, nodeR);

      const double * velocityNp1L = stk::mesh::field_data(velocityNp1, nodeL);
      const double * velocityNp1R = stk::mesh::field_data(velocityNp1, nodeR);

      const double pressureL = *stk::mesh::field_data(*pressure_, nodeL);
      const double pressureR = *stk::mesh::field_data(*pressure_, nodeR);

      const double densityL = *stk::mesh::field_data(densityNp1, nodeL);
      const double densityR = *stk::mesh::field_data(densityNp1, nodeR);

      // copy to velcoity relative to mesh
      for ( int j = 0; j < nDim; ++j ) {
        p_vrtmL[j] = velocityNp1L[j];
        p_vrtmR[j] = velocityNp1R[j];
      }

      // deal with mesh motion
      if ( meshMotion_ ) {
        const double * meshVelocityL = stk::mesh::field_data(*meshVelocity_, nodeL );
        const double * meshVelocityR = stk::mesh::field_data(*meshVelocity_, nodeR );
        for (int j = 0; j < nDim; ++j ) {
          p_vrtmL[j] -= meshVelocityL[j];
          p_vrtmR[j] -= meshVelocityR[j];
        }
      }

      // compute geometry
      double axdx = 0.0;
      double asq = 0.0;
      for ( int j = 0; j < nDim; ++j ) {
        const double axj = p_areaVec[j];
        const double dxj = coordR[j] - coordL[j];
        asq += axj*axj;
        axdx += axj*dxj;
      }

      const double inv_axdx = 1.0/axdx;
      const double rhoIp = 0.5*(densityR + densityL);

      //  mdot
      double tmdot = -projTimeScale*(pressureR - pressureL)*asq*inv_axdx;
      for ( int j = 0; j < nDim; ++j ) {
        const double axj = p_areaVec[j];
        const double dxj = coordR[j] - coordL[j];
        const double kxj = axj - asq*inv_axdx*dxj; // NOC
        const double rhoUjIp = 0.5*(densityR*p_vrtmR[j] + densityL*p_vrtmL[j]);
        const double ujIp = 0.5*(p_vrtmR[j] + p_vrtmL[j]);
        const double GjIp = 0.5*(GpdxR[j] + GpdxL[j]);
        tmdot += (interpTogether*rhoUjIp + om_interpTogether*rhoIp*ujIp + projTimeScale*GjIp)*axj 
          - projTimeScale*kxj*GjIp*nocFac;
      }

      const double lhsfac = -asq*inv_axdx;

      /*
        lhs[0] = IL,IL; lhs[1] = IL,IR; IR,IL; IR,IR
      */

      // first left
      p_lhs[0] = -lhsfac;
      p_lhs[1] = +lhsfac;
      p_rhs[0] = -tmdot/projTimeScale;

      // now right
      p_lhs[2] = +lhsfac;
      p_lhs[3] = -lhsfac;
      p_rhs[1] = tmdot/projTimeScale;

      apply_coeff(connected_nodes, rhs, lhs, __FILE__);

    }
  }
}
Beispiel #29
0
int main(int argc, char* argv[])
{
  BoxLib::Initialize(argc,argv);

  BL_PROFILE_VAR("main()", pmain);

  std::cout << std::setprecision(15);

  solver_type = BoxLib_C;
  bc_type = Periodic;

  Real     a = 0.0;
  Real     b = 1.0;

  // ---- First use the number of processors to decide how many grids you have.
  // ---- We arbitrarily decide to have one grid per MPI process in a uniform
  // ---- cubic domain, so we require that the number of processors be N^3.
  // ---- This requirement is somewhat arbitrary, but convenient for now.

  int nprocs = ParallelDescriptor::NProcs();

  // N is the cube root of the number of processors
  int N(0);
  for(int i(1); i*i*i <= nprocs; ++i) {
    if(i*i*i == nprocs) {
      N = i;
    }
  }

  if(N == 0) {  // not a cube
    if(ParallelDescriptor::IOProcessor()) {
      std::cerr << "**** Error:  nprocs = " << nprocs << " is not currently supported." << std::endl;
    }
    BoxLib::Error("We require that the number of processors be a perfect cube");
  }
  if(ParallelDescriptor::IOProcessor()) {
    std::cout << "N = " << N << std::endl;
  }


  // ---- make a box, then a boxarray with maxSize
  int domain_hi = (N*maxGrid) - 1;
  Box domain(IntVect(0,0,0), IntVect(domain_hi,domain_hi,domain_hi));
  BoxArray bs(domain);
  bs.maxSize(maxGrid);

  // This defines the physical size of the box.  Right now the box is [0,1] in each direction.
  RealBox real_box;
  for (int n = 0; n < BL_SPACEDIM; n++) {
    real_box.setLo(n, 0.0);
    real_box.setHi(n, 1.0);
  }
 
  // This says we are using Cartesian coordinates
  int coord = 0;
  
  // This sets the boundary conditions to be periodic or not
  int is_per[BL_SPACEDIM];
  
  if (bc_type == Dirichlet || bc_type == Neumann) {
    for (int n = 0; n < BL_SPACEDIM; n++) is_per[n] = 0;
  } 
  else {
    for (int n = 0; n < BL_SPACEDIM; n++) is_per[n] = 1;
  }
 
  // This defines a Geometry object which is useful for writing the plotfiles
  Geometry geom(domain,&real_box,coord,is_per);

  for ( int n=0; n<BL_SPACEDIM; n++ ) {
    dx[n] = ( geom.ProbHi(n) - geom.ProbLo(n) )/domain.length(n);
  }

  if (ParallelDescriptor::IOProcessor()) {
     std::cout << "Domain size     : " << N << std::endl;
     std::cout << "Max_grid_size   : " << maxGrid << std::endl;
     std::cout << "Number of grids : " << bs.size() << std::endl;
  }

  // Allocate and define the right hand side.
  MultiFab rhs(bs, Ncomp, 0, Fab_allocate); 
  setup_rhs(rhs, geom, a, b);

  MultiFab alpha(bs, Ncomp, 0, Fab_allocate);
  MultiFab beta[BL_SPACEDIM];
  for ( int n=0; n<BL_SPACEDIM; ++n ) {
    BoxArray bx(bs);
    beta[n].define(bx.surroundingNodes(n), Ncomp, 1, Fab_allocate);
  }

  setup_coeffs(bs, alpha, beta, geom);

  MultiFab anaSoln;
  if (comp_norm) {
    anaSoln.define(bs, Ncomp, 0, Fab_allocate);
    compute_analyticSolution(anaSoln);
  }

  // Allocate the solution array 
  // Set the number of ghost cells in the solution array.
  MultiFab soln(bs, Ncomp, 1, Fab_allocate);

  solve(soln, anaSoln, a, b, alpha, beta, rhs, bs, geom, BoxLib_C);

  BL_PROFILE_VAR_STOP(pmain);

  BoxLib::Finalize();
}
Beispiel #30
0
void
CBS :: solveYourselfAt(TimeStep *tStep)
{
    int momneq = this->giveNumberOfDomainEquations(1, vnum);
    int presneq = this->giveNumberOfDomainEquations(1, pnum);
    int presneq_prescribed = this->giveNumberOfDomainEquations(1, pnumPrescribed);
    double deltaT = tStep->giveTimeIncrement();

    FloatArray rhs(momneq);

    if ( initFlag ) {
        deltaAuxVelocity.resize(momneq);

        nodalPrescribedTractionPressureConnectivity.resize(presneq_prescribed);
        nodalPrescribedTractionPressureConnectivity.zero();
        this->assembleVectorFromElements( nodalPrescribedTractionPressureConnectivity, tStep,
                                         NumberOfNodalPrescribedTractionPressureAssembler(), VM_Total,
                                         pnumPrescribed, this->giveDomain(1) );


        lhs.reset( classFactory.createSparseMtrx(sparseMtrxType) );
        if ( !lhs ) {
            OOFEM_ERROR("sparse matrix creation failed");
        }

        lhs->buildInternalStructure(this, 1, pnum);

        this->assemble( *lhs, stepWhenIcApply.get(), PressureLhsAssembler(),
                       pnum, this->giveDomain(1) );
        lhs->times(deltaT * theta1 * theta2);

        if ( consistentMassFlag ) {
            mss.reset( classFactory.createSparseMtrx(sparseMtrxType) );
            if ( !mss ) {
                OOFEM_ERROR("sparse matrix creation failed");
            }

            mss->buildInternalStructure(this, 1, vnum);
            this->assemble( *mss, stepWhenIcApply.get(), MassMatrixAssembler(),
                           vnum, this->giveDomain(1) );
        } else {
            mm.resize(momneq);
            mm.zero();
            this->assembleVectorFromElements( mm, tStep, LumpedMassVectorAssembler(), VM_Total,
                                             vnum, this->giveDomain(1) );
        }

        //<RESTRICTED_SECTION>
        // init material interface
        if ( materialInterface ) {
            materialInterface->initialize();
        }

        //</RESTRICTED_SECTION>
        initFlag = 0;
    }
    //<RESTRICTED_SECTION>
    else if ( materialInterface ) {
        lhs->zero();
        this->assemble( *lhs, stepWhenIcApply.get(), PressureLhsAssembler(),
                       pnum, this->giveDomain(1) );
        lhs->times(deltaT * theta1 * theta2);

        if ( consistentMassFlag ) {
            mss->zero();
            this->assemble( *mss, stepWhenIcApply.get(), MassMatrixAssembler(),
                           vnum, this->giveDomain(1) );
        } else {
            mm.zero();
            this->assembleVectorFromElements( mm, tStep, LumpedMassVectorAssembler(), VM_Total,
                                             vnum, this->giveDomain(1) );
        }
    }

    //</RESTRICTED_SECTION>

    if ( tStep->isTheFirstStep() ) {
        TimeStep *stepWhenIcApply = tStep->givePreviousStep();
        this->applyIC(stepWhenIcApply);
    }

    VelocityField.advanceSolution(tStep);
    PressureField.advanceSolution(tStep);
    FloatArray *velocityVector = VelocityField.giveSolutionVector(tStep);
    FloatArray *prevVelocityVector = VelocityField.giveSolutionVector( tStep->givePreviousStep() );
    FloatArray *pressureVector = PressureField.giveSolutionVector(tStep);
    FloatArray *prevPressureVector = PressureField.giveSolutionVector( tStep->givePreviousStep() );

    velocityVector->resize(momneq);
    pressureVector->resize(presneq);

    /* STEP 1 - calculates auxiliary velocities*/
    rhs.zero();
    // Depends on old v:
    this->assembleVectorFromElements( rhs, tStep, IntermediateConvectionDiffusionAssembler(), VM_Total, vnum, this->giveDomain(1) );
    //this->assembleVectorFromElements(mm, tStep, LumpedMassVectorAssembler(), VM_Total, this->giveDomain(1));

    if ( consistentMassFlag ) {
        rhs.times(deltaT);
        // Depends on prescribed v
        this->assembleVectorFromElements( rhs, tStep, PrescribedVelocityRhsAssembler(), VM_Total, vnum, this->giveDomain(1) );
        nMethod->solve(*mss, rhs, deltaAuxVelocity);
    } else {
        for ( int i = 1; i <= momneq; i++ ) {
            deltaAuxVelocity.at(i) = deltaT * rhs.at(i) / mm.at(i);
        }
    }

    /* STEP 2 - calculates pressure (implicit solver) */
    this->prescribedTractionPressure.resize(presneq_prescribed);
    this->prescribedTractionPressure.zero();
    this->assembleVectorFromElements( prescribedTractionPressure, tStep,
                                     DensityPrescribedTractionPressureAssembler(), VM_Total,
                                     pnumPrescribed, this->giveDomain(1) );
    for ( int i = 1; i <= presneq_prescribed; i++ ) {
        prescribedTractionPressure.at(i) /= nodalPrescribedTractionPressureConnectivity.at(i);
    }

    // DensityRhsVelocityTerms needs this: Current velocity without correction;
    * velocityVector = * prevVelocityVector;
    velocityVector->add(this->theta1, deltaAuxVelocity);

    // Depends on old V + deltaAuxV * theta1 and p:
    rhs.resize(presneq);
    rhs.zero();
    this->assembleVectorFromElements( rhs, tStep, DensityRhsAssembler(), VM_Total,
                                     pnum, this->giveDomain(1) );
    this->giveNumericalMethod( this->giveCurrentMetaStep() );
    nMethod->solve(*lhs, rhs, *pressureVector);
    pressureVector->times(this->theta2);
    pressureVector->add(* prevPressureVector);

    /* STEP 3 - velocity correction step */
    rhs.resize(momneq);
    rhs.zero();
    // Depends on p:
    this->assembleVectorFromElements( rhs, tStep, CorrectionRhsAssembler(), VM_Total,
                                     vnum, this->giveDomain(1) );
    if ( consistentMassFlag ) {
        rhs.times(deltaT);
        //this->assembleVectorFromElements(rhs, tStep, PrescribedRhsAssembler(), VM_Incremental, vnum, this->giveDomain(1));
        nMethod->solve(*mss, rhs, *velocityVector);
        velocityVector->add(deltaAuxVelocity);
        velocityVector->add(* prevVelocityVector);
    } else {
        for ( int i = 1; i <= momneq; i++ ) {
            velocityVector->at(i) = prevVelocityVector->at(i) + deltaAuxVelocity.at(i) + deltaT *rhs.at(i) / mm.at(i);
        }
    }

    // update solution state counter
    tStep->incrementStateCounter();

    //<RESTRICTED_SECTION>
    if ( materialInterface ) {
#ifdef TIME_REPORT
        Timer timer;
        timer.startTimer();
#endif
        materialInterface->updatePosition( this->giveCurrentStep() );
#ifdef TIME_REPORT
        timer.stopTimer();
        OOFEM_LOG_INFO( "CBS info: user time consumed by updating interfaces: %.2fs\n", timer.getUtime() );
#endif
    }

    //</RESTRICTED_SECTION>
}