void run_test_cases( BOOST_EXPLICIT_TEMPLATE_TYPE(Block) ) { // a bunch of typedefs which will be handy later on typedef boost::dynamic_bitset<Block> bitset_type; typedef bitset_test<bitset_type> Tests; // typedef typename bitset_type::size_type size_type; // unusable with Borland 5.5.1 std::string long_string = get_long_string(); std::size_t ul_width = std::numeric_limits<unsigned long>::digits; //===================================================================== // Test b.empty() { bitset_type b; Tests::empty(b); } { bitset_type b(1, 1ul); Tests::empty(b); } { bitset_type b(bitset_type::bits_per_block + bitset_type::bits_per_block/2, 15ul); Tests::empty(b); } //===================================================================== // Test b.to_long() { boost::dynamic_bitset<Block> b; Tests::to_ulong(b); } { boost::dynamic_bitset<Block> b(std::string("1")); Tests::to_ulong(b); } { boost::dynamic_bitset<Block> b(bitset_type::bits_per_block, static_cast<unsigned long>(-1)); Tests::to_ulong(b); } { std::string str(ul_width - 1, '1'); boost::dynamic_bitset<Block> b(str); Tests::to_ulong(b); } { std::string ul_str(ul_width, '1'); boost::dynamic_bitset<Block> b(ul_str); Tests::to_ulong(b); } { // case overflow boost::dynamic_bitset<Block> b(long_string); Tests::to_ulong(b); } //===================================================================== // Test to_string(b, str) { boost::dynamic_bitset<Block> b; Tests::to_string(b); } { boost::dynamic_bitset<Block> b(std::string("0")); Tests::to_string(b); } { boost::dynamic_bitset<Block> b(long_string); Tests::to_string(b); } //===================================================================== // Test b.count() { boost::dynamic_bitset<Block> b; Tests::count(b); } { boost::dynamic_bitset<Block> b(std::string("0")); Tests::count(b); } { boost::dynamic_bitset<Block> b(std::string("1")); Tests::count(b); } { boost::dynamic_bitset<Block> b(8, 255ul); Tests::count(b); } { boost::dynamic_bitset<Block> b(long_string); Tests::count(b); } //===================================================================== // Test b.size() { boost::dynamic_bitset<Block> b; Tests::size(b); } { boost::dynamic_bitset<Block> b(std::string("0")); Tests::size(b); } { boost::dynamic_bitset<Block> b(long_string); Tests::size(b); } //===================================================================== // Test b.any() { boost::dynamic_bitset<Block> b; Tests::any(b); } { boost::dynamic_bitset<Block> b(std::string("0")); Tests::any(b); } { boost::dynamic_bitset<Block> b(long_string); Tests::any(b); } //===================================================================== // Test b.none() { boost::dynamic_bitset<Block> b; Tests::none(b); } { boost::dynamic_bitset<Block> b(std::string("0")); Tests::none(b); } { boost::dynamic_bitset<Block> b(long_string); Tests::none(b); } //===================================================================== // Test a.is_subset_of(b) { boost::dynamic_bitset<Block> a, b; Tests::subset(a, b); } { boost::dynamic_bitset<Block> a(std::string("0")), b(std::string("0")); Tests::subset(a, b); } { boost::dynamic_bitset<Block> a(std::string("1")), b(std::string("1")); Tests::subset(a, b); } { boost::dynamic_bitset<Block> a(long_string), b(long_string); Tests::subset(a, b); } { boost::dynamic_bitset<Block> a(long_string), b(long_string); a[long_string.size()/2].flip(); Tests::subset(a, b); } { boost::dynamic_bitset<Block> a(long_string), b(long_string); b[long_string.size()/2].flip(); Tests::subset(a, b); } //===================================================================== // Test a.is_proper_subset_of(b) { boost::dynamic_bitset<Block> a, b; Tests::proper_subset(a, b); } { boost::dynamic_bitset<Block> a(std::string("0")), b(std::string("0")); Tests::proper_subset(a, b); } { boost::dynamic_bitset<Block> a(std::string("1")), b(std::string("1")); Tests::proper_subset(a, b); } { boost::dynamic_bitset<Block> a(long_string), b(long_string); Tests::proper_subset(a, b); } { boost::dynamic_bitset<Block> a(long_string), b(long_string); a[long_string.size()/2].flip(); Tests::proper_subset(a, b); } { boost::dynamic_bitset<Block> a(long_string), b(long_string); b[long_string.size()/2].flip(); Tests::proper_subset(a, b); } //===================================================================== // Test intersects { bitset_type a; // empty bitset_type b; Tests::intersects(a, b); } { bitset_type a; bitset_type b(5, 8ul); Tests::intersects(a, b); } { bitset_type a(8, 0ul); bitset_type b(15, 0ul); b[9] = 1; Tests::intersects(a, b); } { bitset_type a(15, 0ul); bitset_type b(22, 0ul); a[14] = b[14] = 1; Tests::intersects(a, b); } //===================================================================== // Test find_first { // empty bitset bitset_type b; Tests::find_first(b); } { // bitset of size 1 bitset_type b(1, 1ul); Tests::find_first(b); } { // all-0s bitset bitset_type b(4 * bitset_type::bits_per_block, 0ul); Tests::find_first(b); } { // first bit on bitset_type b(1, 1ul); Tests::find_first(b); } { // last bit on bitset_type b(4 * bitset_type::bits_per_block - 1, 0ul); b.set(b.size() - 1); Tests::find_first(b); } //===================================================================== // Test find_next { // empty bitset bitset_type b; // check Tests::find_next(b, 0); Tests::find_next(b, 1); Tests::find_next(b, 200); Tests::find_next(b, b.npos); } { // bitset of size 1 (find_next can never find) bitset_type b(1, 1ul); // check Tests::find_next(b, 0); Tests::find_next(b, 1); Tests::find_next(b, 200); Tests::find_next(b, b.npos); } { // all-1s bitset bitset_type b(16 * bitset_type::bits_per_block); b.set(); // check const typename bitset_type::size_type larger_than_size = 5 + b.size(); for(typename bitset_type::size_type i = 0; i <= larger_than_size; ++i) { Tests::find_next(b, i); } Tests::find_next(b, b.npos); } { // a bitset with 1s at block boundary only const int num_blocks = 32; const int block_width = bitset_type::bits_per_block; bitset_type b(num_blocks * block_width); typename bitset_type::size_type i = block_width - 1; for ( ; i < b.size(); i += block_width) { b.set(i); typename bitset_type::size_type first_in_block = i - (block_width - 1); b.set(first_in_block); } // check const typename bitset_type::size_type larger_than_size = 5 + b.size(); for (i = 0; i <= larger_than_size; ++i) { Tests::find_next(b, i); } Tests::find_next(b, b.npos); } { // bitset with alternate 1s and 0s const typename bitset_type::size_type sz = 1000; bitset_type b(sz); typename bitset_type::size_type i = 0; for ( ; i < sz; ++i) { b[i] = (i%2 == 0); } // check const typename bitset_type::size_type larger_than_size = 5 + b.size(); for (i = 0; i <= larger_than_size; ++i) { Tests::find_next(b, i); } Tests::find_next(b, b.npos); } //===================================================================== // Test operator== { boost::dynamic_bitset<Block> a, b; Tests::operator_equal(a, b); } { boost::dynamic_bitset<Block> a(std::string("0")), b(std::string("0")); Tests::operator_equal(a, b); } { boost::dynamic_bitset<Block> a(std::string("1")), b(std::string("1")); Tests::operator_equal(a, b); } { boost::dynamic_bitset<Block> a(long_string), b(long_string); Tests::operator_equal(a, b); } { boost::dynamic_bitset<Block> a(long_string), b(long_string); a[long_string.size()/2].flip(); Tests::operator_equal(a, b); } { boost::dynamic_bitset<Block> a(long_string), b(long_string); b[long_string.size()/2].flip(); Tests::operator_equal(a, b); } //===================================================================== // Test operator!= { boost::dynamic_bitset<Block> a, b; Tests::operator_not_equal(a, b); } { boost::dynamic_bitset<Block> a(std::string("0")), b(std::string("0")); Tests::operator_not_equal(a, b); } { boost::dynamic_bitset<Block> a(std::string("1")), b(std::string("1")); Tests::operator_not_equal(a, b); } { boost::dynamic_bitset<Block> a(long_string), b(long_string); Tests::operator_not_equal(a, b); } { boost::dynamic_bitset<Block> a(long_string), b(long_string); a[long_string.size()/2].flip(); Tests::operator_not_equal(a, b); } { boost::dynamic_bitset<Block> a(long_string), b(long_string); b[long_string.size()/2].flip(); Tests::operator_not_equal(a, b); } //===================================================================== // Test operator< { boost::dynamic_bitset<Block> a, b; Tests::operator_less_than(a, b); } { boost::dynamic_bitset<Block> a(std::string("0")), b(std::string("0")); Tests::operator_less_than(a, b); } { boost::dynamic_bitset<Block> a(std::string("1")), b(std::string("1")); Tests::operator_less_than(a, b); } { boost::dynamic_bitset<Block> a(std::string("10")), b(std::string("11")); Tests::operator_less_than(a, b); } { boost::dynamic_bitset<Block> a(long_string), b(long_string); Tests::operator_less_than(a, b); } { boost::dynamic_bitset<Block> a(long_string), b(long_string); a[long_string.size()/2].flip(); Tests::operator_less_than(a, b); } { boost::dynamic_bitset<Block> a(long_string), b(long_string); b[long_string.size()/2].flip(); Tests::operator_less_than(a, b); } // check for consistency with ulong behaviour { boost::dynamic_bitset<Block> a(3, 4ul), b(3, 5ul); assert(a < b); } { boost::dynamic_bitset<Block> a(3, 4ul), b(3, 4ul); assert(!(a < b)); } { boost::dynamic_bitset<Block> a(3, 5ul), b(3, 4ul); assert(!(a < b)); } //===================================================================== // Test operator<= { boost::dynamic_bitset<Block> a, b; Tests::operator_less_than_eq(a, b); } { boost::dynamic_bitset<Block> a(std::string("0")), b(std::string("0")); Tests::operator_less_than_eq(a, b); } { boost::dynamic_bitset<Block> a(std::string("1")), b(std::string("1")); Tests::operator_less_than_eq(a, b); } { boost::dynamic_bitset<Block> a(long_string), b(long_string); Tests::operator_less_than_eq(a, b); } { boost::dynamic_bitset<Block> a(long_string), b(long_string); a[long_string.size()/2].flip(); Tests::operator_less_than_eq(a, b); } { boost::dynamic_bitset<Block> a(long_string), b(long_string); b[long_string.size()/2].flip(); Tests::operator_less_than_eq(a, b); } // check for consistency with ulong behaviour { boost::dynamic_bitset<Block> a(3, 4ul), b(3, 5ul); assert(a <= b); } { boost::dynamic_bitset<Block> a(3, 4ul), b(3, 4ul); assert(a <= b); } { boost::dynamic_bitset<Block> a(3, 5ul), b(3, 4ul); assert(!(a <= b)); } //===================================================================== // Test operator> { boost::dynamic_bitset<Block> a, b; Tests::operator_greater_than(a, b); } { boost::dynamic_bitset<Block> a(std::string("0")), b(std::string("0")); Tests::operator_greater_than(a, b); } { boost::dynamic_bitset<Block> a(std::string("1")), b(std::string("1")); Tests::operator_greater_than(a, b); } { boost::dynamic_bitset<Block> a(long_string), b(long_string); Tests::operator_greater_than(a, b); } { boost::dynamic_bitset<Block> a(long_string), b(long_string); a[long_string.size()/2].flip(); Tests::operator_greater_than(a, b); } { boost::dynamic_bitset<Block> a(long_string), b(long_string); b[long_string.size()/2].flip(); Tests::operator_greater_than(a, b); } // check for consistency with ulong behaviour { boost::dynamic_bitset<Block> a(3, 4ul), b(3, 5ul); assert(!(a > b)); } { boost::dynamic_bitset<Block> a(3, 4ul), b(3, 4ul); assert(!(a > b)); } { boost::dynamic_bitset<Block> a(3, 5ul), b(3, 4ul); assert(a > b); } //===================================================================== // Test operator<= { boost::dynamic_bitset<Block> a, b; Tests::operator_greater_than_eq(a, b); } { boost::dynamic_bitset<Block> a(std::string("0")), b(std::string("0")); Tests::operator_greater_than_eq(a, b); } { boost::dynamic_bitset<Block> a(std::string("1")), b(std::string("1")); Tests::operator_greater_than_eq(a, b); } { boost::dynamic_bitset<Block> a(long_string), b(long_string); Tests::operator_greater_than_eq(a, b); } { boost::dynamic_bitset<Block> a(long_string), b(long_string); a[long_string.size()/2].flip(); Tests::operator_greater_than_eq(a, b); } { boost::dynamic_bitset<Block> a(long_string), b(long_string); b[long_string.size()/2].flip(); Tests::operator_greater_than_eq(a, b); } // check for consistency with ulong behaviour { boost::dynamic_bitset<Block> a(3, 4ul), b(3, 5ul); assert(!(a >= b)); } { boost::dynamic_bitset<Block> a(3, 4ul), b(3, 4ul); assert(a >= b); } { boost::dynamic_bitset<Block> a(3, 5ul), b(3, 4ul); assert(a >= b); } //===================================================================== // Test b.test(pos) { // case pos >= b.size() boost::dynamic_bitset<Block> b; Tests::test_bit(b, 0); } { // case pos < b.size() boost::dynamic_bitset<Block> b(std::string("0")); Tests::test_bit(b, 0); } { // case pos == b.size() / 2 boost::dynamic_bitset<Block> b(long_string); Tests::test_bit(b, long_string.size()/2); } //===================================================================== // Test b << pos { // case pos == 0 std::size_t pos = 0; boost::dynamic_bitset<Block> b(std::string("1010")); Tests::operator_shift_left(b, pos); } { // case pos == size()/2 std::size_t pos = long_string.size() / 2; boost::dynamic_bitset<Block> b(long_string); Tests::operator_shift_left(b, pos); } { // case pos >= n std::size_t pos = long_string.size(); boost::dynamic_bitset<Block> b(long_string); Tests::operator_shift_left(b, pos); } //===================================================================== // Test b >> pos { // case pos == 0 std::size_t pos = 0; boost::dynamic_bitset<Block> b(std::string("1010")); Tests::operator_shift_right(b, pos); } { // case pos == size()/2 std::size_t pos = long_string.size() / 2; boost::dynamic_bitset<Block> b(long_string); Tests::operator_shift_right(b, pos); } { // case pos >= n std::size_t pos = long_string.size(); boost::dynamic_bitset<Block> b(long_string); Tests::operator_shift_right(b, pos); } //===================================================================== // Test a & b { boost::dynamic_bitset<Block> lhs, rhs; Tests::operator_and(lhs, rhs); } { boost::dynamic_bitset<Block> lhs(std::string("1")), rhs(std::string("0")); Tests::operator_and(lhs, rhs); } { boost::dynamic_bitset<Block> lhs(long_string.size(), 0), rhs(long_string); Tests::operator_and(lhs, rhs); } { boost::dynamic_bitset<Block> lhs(long_string.size(), 1), rhs(long_string); Tests::operator_and(lhs, rhs); } //===================================================================== // Test a | b { boost::dynamic_bitset<Block> lhs, rhs; Tests::operator_or(lhs, rhs); } { boost::dynamic_bitset<Block> lhs(std::string("1")), rhs(std::string("0")); Tests::operator_or(lhs, rhs); } { boost::dynamic_bitset<Block> lhs(long_string.size(), 0), rhs(long_string); Tests::operator_or(lhs, rhs); } { boost::dynamic_bitset<Block> lhs(long_string.size(), 1), rhs(long_string); Tests::operator_or(lhs, rhs); } //===================================================================== // Test a^b { boost::dynamic_bitset<Block> lhs, rhs; Tests::operator_xor(lhs, rhs); } { boost::dynamic_bitset<Block> lhs(std::string("1")), rhs(std::string("0")); Tests::operator_xor(lhs, rhs); } { boost::dynamic_bitset<Block> lhs(long_string.size(), 0), rhs(long_string); Tests::operator_xor(lhs, rhs); } { boost::dynamic_bitset<Block> lhs(long_string.size(), 1), rhs(long_string); Tests::operator_xor(lhs, rhs); } //===================================================================== // Test a-b { boost::dynamic_bitset<Block> lhs, rhs; Tests::operator_sub(lhs, rhs); } { boost::dynamic_bitset<Block> lhs(std::string("1")), rhs(std::string("0")); Tests::operator_sub(lhs, rhs); } { boost::dynamic_bitset<Block> lhs(long_string.size(), 0), rhs(long_string); Tests::operator_sub(lhs, rhs); } { boost::dynamic_bitset<Block> lhs(long_string.size(), 1), rhs(long_string); Tests::operator_sub(lhs, rhs); } }
int main(int argc, char **argv){ double tend = 1E2, speed = 1.; // double tend = 1E-1, speed = 1.; char *init_type="mixed2"; double *roots, *weights, *ll, *dl, xmin, xmax, deltax, jac, xr, xl, cfl, dt, rtime, min_dx; int ii, jj, kk, ee, idx, eres; long nstep; double *dx, *mesh; double *smat, *xx, *qq, *qtemp, *k1, *k2, *k3, *k4, *minv_vec, *mmat, *dv, *mf, *ib, *df, *fstar; // initialize // fortran index structure array[ii,jj,ee] where size(array) = (np, np, ne) // c 1d index structure array = [ee*np*np + jj*np + ii] roots = (double*)malloc(np* sizeof(double)); weights = (double*)malloc(np* sizeof(double)); ll = (double*)malloc(np* sizeof(double)); dl = (double*)malloc(np* sizeof(double)); dx = (double*)malloc(ne* sizeof(double)); mesh = (double*)malloc((ne+1)*sizeof(double)); smat = (double*)malloc(np*np*sizeof(double)); // [jj np, ii np] xx = (double*)malloc(ne*np*sizeof(double)); // [ee ne, ii np] qq = (double*)malloc(ne*np*sizeof(double)); // [ee ne, ii np] qtemp = (double*)malloc(ne*np*sizeof(double)); // [ee ne, ii np] k1 = (double*)malloc(ne*np*sizeof(double)); // [ee ne, ii np] k2 = (double*)malloc(ne*np*sizeof(double)); // [ee ne, ii np] k3 = (double*)malloc(ne*np*sizeof(double)); // [ee ne, ii np] k4 = (double*)malloc(ne*np*sizeof(double)); // [ee ne, ii np] minv_vec= (double*)malloc(ne*np*sizeof(double)); // [ee ne, ii np] mmat = (double*)malloc(ne*np*np*sizeof(double)); // [ee ne, jj np, ii np] dv = (double*)malloc(ne*np*np*sizeof(double)); // [ee ne, jj np, ii np] mf = (double*)malloc(2*np*sizeof(double)); // [jj 2, ii np] ib = (double*)malloc(2*np*sizeof(double)); // [jj 2, ii np] fstar = (double*)malloc(2*ne*sizeof(double)); // [jj 2, ii ne] df = (double*)malloc(ne*2*np*sizeof(double)); // [ee ne, jj 2, ii np] for (ii=0; ii<np; ++ii){ roots[ii] = 0; weights[ii] = 0; ll[ii] = 0; dl[ii] = 0; } for (ii=0; ii<ne; ++ii){ dx[ii] = 0; mesh[ii] = 0; } mesh[ne] = 0; for (ii=0; ii<np*np; ++ii){ smat[ii] = 0; } for (ii=0; ii<ne*np; ++ii){ xx[ii] = 0; qq[ii] = 0; k1[ii] = 0; k2[ii] = 0; k3[ii] = 0; k4[ii] = 0; minv_vec[ii] = 0; } for (ii=0; ii<ne*np*np; ++ii){ mmat[ii] = 0; dv[ii] = 0; } for (ii=0; ii<np*2; ++ii){ mf[ii] = 0; ib[ii] = 0; } for (ii=0; ii<ne*2; ++ii){ fstar[ii] = 0; } for (ii=0; ii<ne*2*np; ++ii){ df[ii] = 0; } // mesh setup xmin = 0.; xmax = 10.; deltax = (xmax-xmin)/(double)ne; mesh[ne] = xmax; for(ee=0;ee<ne;++ee) { mesh[ee] = xmin+ee*deltax; } // gauss lobatto quadrature point, weight setup gausslobatto_quadrature(np, roots, weights); // coordinates and element size min_dx = xmax - xmin; // initial guess for(ee=0;ee<ne;ee++){ xl = mesh[ee]; xr = mesh[ee+1]; dx[ee] = xr-xl; // size of each element if(dx[ee] < min_dx){ min_dx = dx[ee]; // finding minimum dx } for(ii=0;ii<np;ii++){ idx = ee*np+ii; xx[idx] = xl + 0.5*(1+roots[ii])*dx[ee]; } } // mass matrix for(ii=0;ii<ne*np*np;ii++){ mmat[ii] = 0; } for(ee=0;ee<ne;ee++){ jac = fabs(dx[ee])/2; for(kk=0;kk<np;kk++){ lagrange(roots[kk], ll, roots); for(jj=0;jj<np;jj++){ for(ii=0;ii<np;ii++){ idx = ee*np*np+jj*np+ii; // mass matrix mmat[ne][np][np] in 1d index representation mmat[idx] += jac*weights[kk]*ll[ii]*ll[jj]; } } } } // stiffness matrix for(ii=0;ii<np*np;ii++){ smat[ii] = 0; } for(kk=0;kk<np;kk++){ lagrange(roots[kk], ll, roots); lagrange_deriv(roots[kk], dl, roots); for(jj=0;jj<np;jj++){ for(ii=0;ii<np;ii++){ idx = jj*np+ii; // stiffness matrix smat[np][np] in 1d index representation smat[idx] += weights[kk]*ll[jj]*dl[ii]; } } } // face integration for(ii=0;ii<np*2;ii++){ mf[ii] = 0; } lagrange(-1,mf, roots); // mf[ii] for(ii=0, ii<np,ii++) represents element left face integration lagrange( 1,mf+np,roots); // mf[ii] for ii=np, ii<2*np, ii++) reresents element right face integration // boundary interpolation for(ii=0;ii<np*2;ii++){ ib[ii] = 0; } lagrange(-1,ib, roots); // element left edge interpolation lagrange( 1,ib+np,roots); // element right edge interpolation // divergence operators for(ii=0;ii<ne*np*np;ii++){ dv[ii] = 0; } for(ii=0;ii<ne*np*2;ii++){ dv[ii] = 0; } for(ee=0;ee<ne;ee++){ for(jj=0;jj<np;jj++){ // it turn out that mmat is diagonal. i.e., ii != jj, mmat[ee][jj][ii] = 0 // the inverse of mmat is just the inverse of the diagonal components // here, we are extracting the inverse diagonal components only minv_vec[ee*np+jj] = 1./mmat[ee*np*np+jj*np+jj]; } for(jj=0;jj<np;jj++){ for(ii=0;ii<np;ii++){ dv[ee*np*np+jj*np+ii] = minv_vec[ee*np+ii]*smat[jj*np+ii]; } } for(jj=0;jj<2;jj++){ for(ii=0;ii<np;ii++){ df[ee*np*2+jj*np+ii] = minv_vec[ee*np+ii]*mf[jj*np+ii]; } } } // initialize qq field initialize(qq, xx, xmax, xmin, init_type); cfl = 1./(np*np); dt = cfl * min_dx / fabs(speed); rtime = 0.; nstep = 0; printf("Start Time Integration\n"); // Runge-Kutta 4th order Time integration loop t_sta = clock(); while(rtime < tend){ dt = fmin(dt, tend-rtime); rhs(qq, k1, dv, df, ib, speed); for(ii=0;ii<ne*np;ii++) qtemp[ii] = qq[ii]+0.5*dt*k1[ii]; rhs(qtemp, k2, dv, df, ib, speed); for(ii=0;ii<ne*np;ii++) qtemp[ii] = qq[ii]+0.5*dt*k2[ii]; rhs(qtemp, k3, dv, df, ib, speed); for(ii=0;ii<ne*np;ii++) qtemp[ii] = qq[ii]+dt*k3[ii]; rhs(qtemp, k4, dv, df, ib, speed); for(ii=0;ii<ne*np;ii++) qq[ii] += 1./6.*dt*(k1[ii]+2*k2[ii]+2*k3[ii]+k4[ii]); rtime += dt; nstep += 1; if(nstep%10000 == 0) printf("nstep = %10ld, %5.1f%% complete\n", nstep, rtime/tend*100); } // timeloop ends here; printf("Integration complete\n"); if(ne > 200){ eres = 2; } else if (ne > 60){ eres = 3; } else if (ne > 30){ eres = 6; } else { eres = 10; } // final report printf("-----------------------------------------------\n"); printf("code type : c serial\n"); printf("Final time : %13.5e\n", rtime); printf("CFL : %13.5e\n", cfl); printf("DOF : %13d\n", ne*np); printf("No. of Elem : %13d\n", ne); printf("Order : %13d\n", np); printf("eres : %13d\n", eres); printf("time steps : %13ld\n", nstep); printf("-----------------------------------------------\n"); save_field(xx, qq, ne, roots, eres); t_end = clock(); printf("Motion time = %f msec\n", (double)(t_end - t_sta)/1000.0); free(roots); free(weights); free(ll); free(dl); free(dx); free(mesh); free(smat); free(xx); free(qq); free(qtemp); free(k1); free(k2); free(k3); free(k4); free(minv_vec); free(mmat); free(dv); free(mf); free(ib); free(fstar); free(df); return 0; }
void PolynomialFit4<Real>::DoLeastSquaresFit ( int numSamples, Real* trgSamples[4] ) { // The matrix and vector for a linear system that determines the // coefficients of the fitted polynomial. GMatrix<Real> mat( mNumPowers, mNumPowers ); // initially zero GVector<Real> rhs( mNumPowers ); // initially zero mCoefficients = new1<Real>( mNumPowers ); int row, col; for ( int i = 0; i < numSamples; ++i ) { // Compute relevant powers of x and y. Real x = trgSamples[0][i]; Real y = trgSamples[1][i]; Real z = trgSamples[2][i]; Real w = trgSamples[3][i]; int j; for ( j = 1; j <= 2 * mMaxXPower; ++j ) { mXPowers[j] = mXPowers[j - 1] * x; } for ( j = 1; j <= 2 * mMaxYPower; ++j ) { mYPowers[j] = mYPowers[j - 1] * y; } for ( j = 1; j <= 2 * mMaxZPower; ++j ) { mZPowers[j] = mZPowers[j - 1] * z; } for ( row = 0; row < mNumPowers; ++row ) { // Update the upper-triangular portion of the symmetric matrix. Real xp, yp, zp; for ( col = row; col < mNumPowers; ++col ) { xp = mXPowers[mPowers[row][0] + mPowers[col][0]]; yp = mYPowers[mPowers[row][1] + mPowers[col][1]]; zp = mYPowers[mPowers[row][2] + mPowers[col][2]]; mat[row][col] += xp * yp * zp; } // Update the right-hand side of the system. xp = mXPowers[mPowers[row][0]]; yp = mYPowers[mPowers[row][1]]; zp = mYPowers[mPowers[row][2]]; rhs[row] += xp * yp * zp * w; } } // Copy the upper-triangular portion of the symmetric matrix to the // lower-triangular portion. for ( row = 0; row < mNumPowers; ++row ) { for ( col = 0; col < row; ++col ) { mat[row][col] = mat[col][row]; } } // Precondition by normalizing the sums. Real invNumSamples = ( ( Real )1 ) / ( Real )numSamples; for ( row = 0; row < mNumPowers; ++row ) { for ( col = 0; col < mNumPowers; ++col ) { mat[row][col] *= invNumSamples; } rhs[row] *= invNumSamples; } if ( LinearSystem<Real>().Solve( mat, rhs, mCoefficients ) ) { mSolved = true; } else { memset( mCoefficients, 0, mNumPowers * sizeof( Real ) ); mSolved = false; } }
void NaturalSpline1<Real>::CreatePeriodicSpline () { mB = new1<Real>( mNumSegments ); mC = new1<Real>( mNumSegments ); mD = new1<Real>( mNumSegments ); #if 1 // Solving the system using a standard linear solver appears to be // numerically stable. const int size = 4 * mNumSegments; GMatrix<Real> mat( size, size ); GVector<Real> rhs( size ); int i, j, k; Real delta, delta2, delta3; for ( i = 0, j = 0; i < mNumSegments - 1; ++i, j += 4 ) { delta = mTimes[i + 1] - mTimes[i]; delta2 = delta * delta; delta3 = delta * delta2; mat[j + 0][j + 0] = ( Real )1; mat[j + 0][j + 1] = ( Real )0; mat[j + 0][j + 2] = ( Real )0; mat[j + 0][j + 3] = ( Real )0; mat[j + 1][j + 0] = ( Real )1; mat[j + 1][j + 1] = delta; mat[j + 1][j + 2] = delta2; mat[j + 1][j + 3] = delta3; mat[j + 2][j + 0] = ( Real )0; mat[j + 2][j + 1] = ( Real )1; mat[j + 2][j + 2] = ( ( Real )2 ) * delta; mat[j + 2][j + 3] = ( ( Real )3 ) * delta2; mat[j + 3][j + 0] = ( Real )0; mat[j + 3][j + 1] = ( Real )0; mat[j + 3][j + 2] = ( Real )1; mat[j + 3][j + 3] = ( ( Real )3 ) * delta; k = j + 4; mat[j + 0][k + 0] = ( Real )0; mat[j + 0][k + 1] = ( Real )0; mat[j + 0][k + 2] = ( Real )0; mat[j + 0][k + 3] = ( Real )0; mat[j + 1][k + 0] = ( Real ) - 1; mat[j + 1][k + 1] = ( Real )0; mat[j + 1][k + 2] = ( Real )0; mat[j + 1][k + 3] = ( Real )0; mat[j + 2][k + 0] = ( Real )0; mat[j + 2][k + 1] = ( Real ) - 1; mat[j + 2][k + 2] = ( Real )0; mat[j + 2][k + 3] = ( Real )0; mat[j + 3][k + 0] = ( Real )0; mat[j + 3][k + 1] = ( Real )0; mat[j + 3][k + 2] = ( Real ) - 1; mat[j + 3][k + 3] = ( Real )0; } delta = mTimes[i + 1] - mTimes[i]; delta2 = delta * delta; delta3 = delta * delta2; mat[j + 0][j + 0] = ( Real )1; mat[j + 0][j + 1] = ( Real )0; mat[j + 0][j + 2] = ( Real )0; mat[j + 0][j + 3] = ( Real )0; mat[j + 1][j + 0] = ( Real )1; mat[j + 1][j + 1] = delta; mat[j + 1][j + 2] = delta2; mat[j + 1][j + 3] = delta3; mat[j + 2][j + 0] = ( Real )0; mat[j + 2][j + 1] = ( Real )1; mat[j + 2][j + 2] = ( ( Real )2 ) * delta; mat[j + 2][j + 3] = ( ( Real )3 ) * delta2; mat[j + 3][j + 0] = ( Real )0; mat[j + 3][j + 1] = ( Real )0; mat[j + 3][j + 2] = ( Real )1; mat[j + 3][j + 3] = ( ( Real )3 ) * delta; k = 0; mat[j + 0][k + 0] = ( Real )0; mat[j + 0][k + 1] = ( Real )0; mat[j + 0][k + 2] = ( Real )0; mat[j + 0][k + 3] = ( Real )0; mat[j + 1][k + 0] = ( Real ) - 1; mat[j + 1][k + 1] = ( Real )0; mat[j + 1][k + 2] = ( Real )0; mat[j + 1][k + 3] = ( Real )0; mat[j + 2][k + 0] = ( Real )0; mat[j + 2][k + 1] = ( Real ) - 1; mat[j + 2][k + 2] = ( Real )0; mat[j + 2][k + 3] = ( Real )0; mat[j + 3][k + 0] = ( Real )0; mat[j + 3][k + 1] = ( Real )0; mat[j + 3][k + 2] = ( Real ) - 1; mat[j + 3][k + 3] = ( Real )0; for ( i = 0, j = 0; i < mNumSegments; ++i, j += 4 ) { rhs[j + 0] = mA[i]; rhs[j + 1] = ( Real )0; rhs[j + 2] = ( Real )0; rhs[j + 3] = ( Real )0; } GVector<Real> coeff( size ); bool solved = LinearSystem<Real>().Solve( mat, rhs, coeff ); assertion( solved, "Failed to solve linear system\n" ); WM5_UNUSED( solved ); for ( i = 0, j = 0; i < mNumSegments; ++i ) { j++; mB[i] = coeff[j++]; mC[i] = coeff[j++]; mD[i] = coeff[j++]; } #endif #if 0 // Solving the system using the equations derived in the PDF // "Fitting a Natural Spline to Samples of the Form (t,f(t))" // is ill-conditioned. TODO: Find a way to row-reduce the matrix of the // PDF in a numerically stable manner yet retaining the O(n) asymptotic // behavior. // Compute the inverses M[i]^{-1}. const int numSegmentsM1 = mNumSegments - 1; Matrix4<Real>* invM = new1<Matrix4<Real> >( numSegmentsM1 ); Real delta; int i; for ( i = 0; i < numSegmentsM1; i++ ) { delta = mTimes[i + 1] - mTimes[i]; Real invDelta1 = ( ( Real )1 ) / delta; Real invDelta2 = invDelta1 / delta; Real invDelta3 = invDelta2 / delta; Matrix4<Real>& invMi = invM[i]; invMi[0][0] = ( Real )1; invMi[0][1] = ( Real )0; invMi[0][2] = ( Real )0; invMi[0][3] = ( Real )0; invMi[1][0] = ( ( Real )( -3 ) ) * invDelta1; invMi[1][1] = ( ( Real )3 ) * invDelta1; invMi[1][2] = ( Real )( -2 ); invMi[1][3] = delta; invMi[2][0] = ( ( Real )3 ) * invDelta2; invMi[2][1] = ( ( Real )( -3 ) ) * invDelta2; invMi[2][2] = ( ( Real )3 ) * invDelta1; invMi[2][3] = ( Real )( -2 ); invMi[3][0] = -invDelta3; invMi[3][1] = invDelta3; invMi[3][2] = -invDelta2; invMi[3][3] = invDelta1; } // Matrix M[n-1]. delta = mTimes[i + 1] - mTimes[i]; Real delta2 = delta * delta; Real delta3 = delta2 * delta; Matrix4<Real> lastM ( ( Real )1, ( Real )0, ( Real )0, ( Real )0, ( Real )1, delta, delta2, delta3, ( Real )0, ( Real )1, ( ( Real )2 )*delta, ( ( Real )3 )*delta2, ( Real )0, ( Real )0, ( Real )1, ( ( Real )3 )*delta ); // Matrix L. Matrix4<Real> LMat ( ( Real )0, ( Real )0, ( Real )0, ( Real )0, ( Real )1, ( Real )0, ( Real )0, ( Real )0, ( Real )0, ( Real )1, ( Real )0, ( Real )0, ( Real )0, ( Real )0, ( Real )1, ( Real )0 ); // Vector U. Vector<4, Real> U( ( Real )1, ( Real )0, ( Real )0, ( Real )0 ); // Initialize P = L and Q = f[n-2]*U. Matrix4<Real> P = LMat; const int numSegmentsM2 = mNumSegments - 2; Vector<4, Real> Q = mA[numSegmentsM2] * U; // Compute P and Q. for ( i = numSegmentsM2; i >= 0; --i ) { // Matrix L*M[i]^{-1}. Matrix4<Real> LMInv = LMat * invM[i]; // Update P. P = LMInv * P; // Update Q. if ( i > 0 ) { Q = mA[i - 1] * U + LMInv * Q; } else { Q = mA[numSegmentsM1] * U + LMInv * Q; } } // Final update of P. P = lastM - P; // Compute P^{-1}. Matrix4<Real> invP = P.Inverse(); // Compute K[n-1]. Vector<4, Real> coeff = invP * Q; mB[numSegmentsM1] = coeff[1]; mC[numSegmentsM1] = coeff[2]; mD[numSegmentsM1] = coeff[3]; // Back substitution for the other K[i]. for ( i = numSegmentsM2; i >= 0; i-- ) { coeff = invM[i] * ( mA[i] * U + LMat * coeff ); mB[i] = coeff[1]; mC[i] = coeff[2]; mD[i] = coeff[3]; } delete1( invM ); #endif }
vector<Grid> Planner::dStarLite(Map &map) { // this is the implementation of the D star lite algorithm vector<Grid> wayPoint; // Initialization vector<vector<int> > g(map._rows,vector<int>(map._cols,map._rows*map._cols+1)); vector<vector<int> > rhs(map._rows,vector<int>(map._cols,map._rows*map._cols+1)); km =0; rhs[map.goal.y][map.goal.x] =0; Key n = calculateKey(map.goal,g,rhs,map.start,0); U.push_back({map.goal,n}); // Computer current shortest path // Update the states of the map if the first element in the priority list can be updated or the goal is not reached while(U.front().key<calculateKey(map.goal,g,rhs,map.start,0)||(rhs[map.start.y][map.start.x]!=g[map.start.y][map.start.x])) { // take the key value and postion of the first element in the priority queue Key kold = U.front().key; Grid u = U.front().point; U.pop_front(); Key knew = calculateKey(u,g,rhs,map.start,km); // calculate the new key value // update map if old value is different from the new value if (kold<knew) { // if the new key is larger, the cost of the edge of the grid might be change // the current grid should be updated and re-expanded // insert it in the priority queue auto pt =find_if(U.begin(),U.end(),[knew](Uelem &u){return u.key<knew;}); U.insert(pt,{u,knew}); } else if (g[u.y][u.x]>rhs[u.y][u.x]) { // if the grid is overconstraint, there are new shorter paths detected g[u.y][u.x] = rhs[u.y][u.x]; // update all its neighbour value vector<Grid> neightbour = findNeighbour(map, u, 8); for(auto &n:neightbour) { updateVertex(U,n,g,rhs,map,km); } } else { // if the grid is underconstraint, the grid it self and its neightbour should all be updated g[u.y][u.x] = map._cols*map._rows; vector<Grid> neightbour = findNeighbour(map, u, 8); for(auto &n:neightbour) { updateVertex(U,n,g,rhs,map,km); } updateVertex(U,u,g,rhs,map,km); } } return wayPoint; }
void check_params( struct user_parameters* params, int matrix_size, int block_size, double dx, double dy, double *f_, int niter, double *u_, double *unew_) { double x, y; int i, j; double *udiff_ =(double*)malloc(matrix_size * matrix_size * sizeof(double)); double (*udiff)[matrix_size][matrix_size] = (double (*)[matrix_size][matrix_size])udiff_; double (*unew)[matrix_size][matrix_size] = (double (*)[matrix_size][matrix_size])unew_; double (*u)[matrix_size][matrix_size] = (double (*)[matrix_size][matrix_size])u_; double (*f)[matrix_size][matrix_size] = (double (*)[matrix_size][matrix_size])f_; // Check for convergence. for (j = 0; j < matrix_size; j++) { y = (double) (j) / (double) (matrix_size - 1); for (i = 0; i < matrix_size; i++) { x = (double) (i) / (double) (matrix_size - 1); (*udiff)[i][j] = (*unew)[i][j] - u_exact(x, y); if( (*udiff)[i][j] > 1.0E-6 ) { printf("error: %d, %d: %f\n", i, j, (*udiff)[i][j]); } } } double error = r8mat_rms(matrix_size, matrix_size, udiff_); double error1; // Set the right hand side array F. rhs(matrix_size, matrix_size, f_, block_size); for (j = 0; j < matrix_size; j++) { for (i = 0; i < matrix_size; i++) { if (i == 0 || i == matrix_size - 1 || j == 0 || j == matrix_size - 1) { (*unew)[i][j] = (*f)[i][j]; (*u)[i][j] = (*f)[i][j]; } else { (*unew)[i][j] = 0.0; (*u)[i][j] = 0.0; } } } sweep_seq(matrix_size, matrix_size, dx, dy, f_, 0, niter, u_, unew_); // Check for convergence. for (j = 0; j < matrix_size; j++) { y = (double) (j) / (double) (matrix_size - 1); for (i = 0; i < matrix_size; i++) { x = (double) (i) / (double) (matrix_size - 1); (*udiff)[i][j] = (*unew)[i][j] - u_exact(x, y); if( (*udiff)[i][j] > 1.0E-6 ) { printf("error: %d, %d: %f\n", i, j, (*udiff)[i][j]); } } } error1 = r8mat_rms(matrix_size, matrix_size, udiff_); params->succeed = fabs(error - error1) < 1.0E-6; if(!params->succeed) { printf("error = %f, error1 = %f\n", error, error1); } free(udiff_); }
double aug (cholmod_sparse *A) { double r, maxerr = 0, bnorm, anorm ; cholmod_sparse *S, *Im, *In, *At, *A1, *A2, *Sup ; cholmod_dense *Alpha, *B, *Baug, *X, *W1, *W2, *R, *X2, X2mat ; cholmod_factor *L ; double *b, *baug, *rx, *w, *x ; Int nrow, ncol, nrhs, i, j, d, d2, save, save2, save3 ; if (A == NULL) { ERROR (CHOLMOD_INVALID, "cm: no A for aug") ; return (1) ; } if (A->xtype != CHOLMOD_REAL) { return (0) ; } /* ---------------------------------------------------------------------- */ /* A is m-by-n, B must be m-by-nrhs */ /* ---------------------------------------------------------------------- */ nrow = A->nrow ; ncol = A->ncol ; B = rhs (A, 5, A->nrow + 7) ; /* ---------------------------------------------------------------------- */ /* create scalars */ /* ---------------------------------------------------------------------- */ bnorm = CHOLMOD(norm_dense) (B, 0, cm) ; anorm = CHOLMOD(norm_sparse) (A, 1, cm) ; Alpha = CHOLMOD(eye) (1, 1, CHOLMOD_REAL, cm) ; if (Alpha != NULL) { ((double *) (Alpha->x)) [0] = anorm ; } CHOLMOD(print_dense) (M1, "MinusOne", cm) ; CHOLMOD(print_dense) (Alpha, "Alpha = norm(A)", cm) ; /* ---------------------------------------------------------------------- */ /* create augmented system, S = [-I A' ; A anorm*I] */ /* ---------------------------------------------------------------------- */ Im = CHOLMOD(speye) (nrow, nrow, CHOLMOD_REAL, cm) ; In = CHOLMOD(speye) (ncol, ncol, CHOLMOD_REAL, cm) ; CHOLMOD(scale) (Alpha, CHOLMOD_SCALAR, Im, cm) ; CHOLMOD(scale) (M1, CHOLMOD_SCALAR, In, cm) ; At = CHOLMOD(transpose) (A, 2, cm) ; /* use one of two equivalent methods */ if (nrow % 2) { /* S = [[-In A'] ; [A alpha*Im]] */ A1 = CHOLMOD(horzcat) (In, At, TRUE, cm) ; A2 = CHOLMOD(horzcat) (A, Im, TRUE, cm) ; S = CHOLMOD(vertcat) (A1, A2, TRUE, cm) ; } else { /* S = [[-In ; A] [A' ; alpha*Im]] */ A1 = CHOLMOD(vertcat) (In, A, TRUE, cm) ; A2 = CHOLMOD(vertcat) (At, Im, TRUE, cm) ; S = CHOLMOD(horzcat) (A1, A2, TRUE, cm) ; } CHOLMOD(free_sparse) (&Im, cm) ; CHOLMOD(free_sparse) (&In, cm) ; CHOLMOD(print_sparse) (S, "S, augmented system", cm) ; /* make a symmetric (upper) copy of S */ Sup = CHOLMOD(copy) (S, 1, 1, cm) ; CHOLMOD(print_sparse) (S, "S, augmented system (upper)", cm) ; CHOLMOD(print_sparse) (Sup, "Sup", cm) ; /* ---------------------------------------------------------------------- */ /* create augmented right-hand-side, Baug = [ zeros(ncol,nrhs) ; B ] */ /* ---------------------------------------------------------------------- */ b = NULL ; d = 0 ; nrhs = 0 ; d2 = 0 ; if (B != NULL) { nrhs = B->ncol ; d = B->d ; b = B->x ; Baug = CHOLMOD(zeros) (nrow+ncol, nrhs, CHOLMOD_REAL, cm) ; if (Baug != NULL) { d2 = Baug->d ; baug = Baug->x ; for (j = 0 ; j < nrhs ; j++) { for (i = 0 ; i < nrow ; i++) { baug [(i+ncol)+j*d2] = b [i+j*d] ; } } } } else { Baug = NULL ; } /* ---------------------------------------------------------------------- */ /* solve Sx=baug */ /* ---------------------------------------------------------------------- */ /* S is symmetric indefinite, so do not use a supernodal LL' */ save = cm->supernodal ; save2 = cm->final_asis ; cm->supernodal = CHOLMOD_SIMPLICIAL ; cm->final_asis = TRUE ; save3 = cm->metis_memory ; cm->metis_memory = 2.0 ; L = CHOLMOD(analyze) (Sup, cm) ; CHOLMOD(factorize) (Sup, L, cm) ; X = CHOLMOD(solve) (CHOLMOD_A, L, Baug, cm) ; cm->supernodal = save ; cm->final_asis = save2 ; cm->metis_memory = save3 ; /* ---------------------------------------------------------------------- */ /* compute the residual */ /* ---------------------------------------------------------------------- */ r = resid (Sup, X, Baug) ; MAXERR (maxerr, r, 1) ; /* ---------------------------------------------------------------------- */ /* create a shallow submatrix of X, X2 = X (ncol:end, :) */ /* ---------------------------------------------------------------------- */ if (X == NULL) { X2 = NULL ; } else { X2 = &X2mat ; X2->nrow = nrow ; X2->ncol = nrhs ; X2->nzmax = X->nzmax ; X2->d = X->d ; X2->x = ((double *) X->x) + ncol ; X2->z = NULL ; X2->xtype = X->xtype ; X2->dtype = X->dtype ; } CHOLMOD(print_dense) (X, "X", cm) ; CHOLMOD(print_dense) (X2, "X2 = X (ncol:end,:)", cm) ; /* ---------------------------------------------------------------------- */ /* compute norm ((alpha*I + A*A')*x-b) */ /* ---------------------------------------------------------------------- */ /* W1 = A'*X2 */ W1 = CHOLMOD(zeros) (ncol, nrhs, CHOLMOD_REAL, cm) ; CHOLMOD(sdmult) (A, TRUE, one, zero, X2, W1, cm) ; /* W2 = A*W1 */ W2 = CHOLMOD(zeros) (nrow, nrhs, CHOLMOD_REAL, cm) ; CHOLMOD(sdmult) (A, FALSE, one, zero, W1, W2, cm) ; /* R = alpha*x + w2 - b */ R = CHOLMOD(zeros) (nrow, nrhs, CHOLMOD_REAL, cm) ; if (R != NULL && W2 != NULL && X != NULL) { w = W2->x ; rx = R->x ; x = X2->x ; for (j = 0 ; j < nrhs ; j++) { for (i = 0 ; i < nrow ; i++) { rx [i+j*nrow] = anorm * x [i+j*d2] + w [i+j*nrow] - b [i+j*d] ; } } } r = CHOLMOD(norm_dense) (R, 1, cm) ; MAXERR (maxerr, r, bnorm) ; /* ---------------------------------------------------------------------- */ /* free everything */ /* ---------------------------------------------------------------------- */ CHOLMOD(free_sparse) (&At, cm) ; CHOLMOD(free_sparse) (&A1, cm) ; CHOLMOD(free_sparse) (&A2, cm) ; CHOLMOD(free_sparse) (&S, cm) ; CHOLMOD(free_sparse) (&Sup, cm) ; CHOLMOD(free_factor) (&L, cm) ; CHOLMOD(free_dense) (&R, cm) ; CHOLMOD(free_dense) (&W1, cm) ; CHOLMOD(free_dense) (&W2, cm) ; CHOLMOD(free_dense) (&B, cm) ; CHOLMOD(free_dense) (&Baug, cm) ; CHOLMOD(free_dense) (&X, cm) ; CHOLMOD(free_dense) (&Alpha, cm) ; progress (0, '.') ; return (maxerr) ; }
// This routine simply glues together many of the routines that are already // written in the Poisson solver library // // phi( 1:SubNumPhysNodes ) is a scalar quantity. // // E1 ( 1:NumElems, 1:kmax2d ) is a vector quantity. // E2 ( 1:NumElems, 1:kmax2d ) is a vector quantity. // // See also: ConvertEfieldOntoDGbasis void ComputeElectricField( const double t, const mesh& Mesh, const dTensorBC5& q, dTensor2& E1, dTensor2& E2) { // const int mx = q.getsize(1); assert_eq(mx,dogParamsCart2.get_mx()); const int my = q.getsize(2); assert_eq(my,dogParamsCart2.get_my()); const int NumElems = q.getsize(3); const int meqn = q.getsize(4); const int kmax = q.getsize(5); const int space_order = dogParams.get_space_order(); // unstructured parameters: const int kmax2d = E2.getsize(2); const int NumBndNodes = Mesh.get_NumBndNodes(); const int NumPhysNodes = Mesh.get_NumPhysNodes(); // Quick error check if( !Mesh.get_is_submesh() ) { printf("ERROR: mesh needs to have subfactor set to %d\n", space_order); printf("Go to Unstructured mesh and remesh the problem\n"); exit(-1); } const int SubFactor = Mesh.get_SubFactor(); assert_eq( NumElems, Mesh.get_NumElems() ); // -- Step 1: Compute rho -- // dTensor3 rho(NumElems, 1, kmax2d ); void ComputeDensity( const mesh& Mesh, const dTensorBC5& q, dTensor3& rho ); ComputeDensity( Mesh, q, rho ); // -- Step 2: Figure out how large phi needs to be int SubNumPhysNodes = 0; int SubNumBndNodes = 0; switch( dogParams.get_space_order() ) { case 1: SubNumPhysNodes = NumPhysNodes; SubNumBndNodes = NumBndNodes; break; case 2: SubNumPhysNodes = Mesh.get_SubNumPhysNodes(); SubNumBndNodes = Mesh.get_SubNumBndNodes(); if(SubFactor!=2) { printf("\n"); printf(" Error: for space_order = %i, need SubFactor = %i\n",space_order,2); printf(" SubFactor = %i\n",SubFactor); printf("\n"); exit(1); } break; case 3: SubNumPhysNodes = Mesh.get_SubNumPhysNodes(); SubNumBndNodes = Mesh.get_SubNumBndNodes(); if(SubFactor!=3) { printf("\n"); printf(" Error: for space_order = %i, need SubFactor = %i\n",space_order,3); printf(" SubFactor = %i\n",SubFactor); printf("\n"); exit(1); } break; default: printf("\n"); printf(" ERROR in RunDogpack_unst.cpp: space_order value not supported.\n"); printf(" space_order = %i\n",space_order); printf("\n"); exit(1); } // local storage: dTensor1 rhs(SubNumPhysNodes); dTensor1 phi(SubNumPhysNodes); // Get Cholesky factorization matrix R // // TODO - this should be saved earlier in the code rather than reading // from file every time we with to run a Poisson solve! // SparseCholesky R(SubNumPhysNodes); string outputdir = dogParams.get_outputdir(); R.init(outputdir); R.read(outputdir); // Create right-hand side vector void Rhs2D_unst(const int space_order, const mesh& Mesh, const dTensor3& rhs_dg, dTensor1& rhs); Rhs2D_unst(space_order, Mesh, rho, rhs); // Call Poisson solver void PoissonSolver2D_unst(const int space_order, const mesh& Mesh, const SparseCholesky& R, const dTensor1& rhs, dTensor1& phi, dTensor2& E1, dTensor2& E2); PoissonSolver2D_unst(space_order, Mesh, R, rhs, phi, E1, E2); // Compare errors with the exact Electric field: // void L2Project_Unst( const double time, const dTensor2* vel_vec, const int istart, const int iend, const int QuadOrder, const int BasisOrder_qin, const int BasisOrder_auxin, const int BasisOrder_fout, const mesh& Mesh, const dTensor3* qin, const dTensor3* auxin, dTensor3* fout, void (*Func)(const double t, const dTensor2* vel_vec, const dTensor2&,const dTensor2&, const dTensor2&,dTensor2&)); const int sorder = dogParams.get_space_order(); dTensor3 qtmp (NumElems, 2, kmax2d ); qtmp.setall(0.); dTensor3 auxtmp (NumElems, 0, kmax2d ); dTensor3 ExactE (NumElems, 2, kmax2d ); L2Project_Unst( t, NULL, 1, NumElems, sorder, sorder, sorder, sorder, Mesh, &qtmp, &auxtmp, &ExactE, &ExactElectricField ); // Compute errors on these two: // double err = 0.; for( int n=1; n <= NumElems; n++ ) for( int k=1; k <= kmax2d; k++ ) { err += Mesh.get_area_prim(n)*pow( ExactE.get(n,1,k) - E1.get(n,k), 2 ); err += Mesh.get_area_prim(n)*pow( ExactE.get(n,2,k) - E2.get(n,k), 2 ); } printf("error = %2.15e\n", err ); }
/* Cholesky update/downdate */ cs_long_t demo3 (problem *Prob) { cs_cl *A, *C, *W = NULL, *WW, *WT, *E = NULL, *W2 ; cs_long_t n, k, *Li, *Lp, *Wi, *Wp, p1, p2, *p = NULL, ok ; cs_complex_t *b, *x, *resid, *y = NULL, *Lx, *Wx, s ; double t, t1 ; cs_cls *S = NULL ; cs_cln *N = NULL ; if (!Prob || !Prob->sym || Prob->A->n == 0) return (0) ; A = Prob->A ; C = Prob->C ; b = Prob->b ; x = Prob->x ; resid = Prob->resid; n = A->n ; if (!Prob->sym || n == 0) return (1) ; rhs (x, b, n) ; /* compute right-hand side */ printf ("\nchol then update/downdate ") ; print_order (1) ; y = cs_cl_malloc (n, sizeof (cs_complex_t)) ; t = tic () ; S = cs_cl_schol (1, C) ; /* symbolic Chol, amd(A+A') */ printf ("\nsymbolic chol time %8.2f\n", toc (t)) ; t = tic () ; N = cs_cl_chol (C, S) ; /* numeric Cholesky */ printf ("numeric chol time %8.2f\n", toc (t)) ; if (!S || !N || !y) return (done3 (0, S, N, y, W, E, p)) ; t = tic () ; cs_cl_ipvec (S->pinv, b, y, n) ; /* y = P*b */ cs_cl_lsolve (N->L, y) ; /* y = L\y */ cs_cl_ltsolve (N->L, y) ; /* y = L'\y */ cs_cl_pvec (S->pinv, y, x, n) ; /* x = P'*y */ printf ("solve chol time %8.2f\n", toc (t)) ; printf ("original: ") ; print_resid (1, C, x, b, resid) ; /* print residual */ k = n/2 ; /* construct W */ W = cs_cl_spalloc (n, 1, n, 1, 0) ; if (!W) return (done3 (0, S, N, y, W, E, p)) ; Lp = N->L->p ; Li = N->L->i ; Lx = N->L->x ; Wp = W->p ; Wi = W->i ; Wx = W->x ; Wp [0] = 0 ; p1 = Lp [k] ; Wp [1] = Lp [k+1] - p1 ; s = Lx [p1] ; srand (1) ; for ( ; p1 < Lp [k+1] ; p1++) { p2 = p1 - Lp [k] ; Wi [p2] = Li [p1] ; Wx [p2] = s * rand () / ((double) RAND_MAX) ; } t = tic () ; ok = cs_cl_updown (N->L, +1, W, S->parent) ; /* update: L*L'+W*W' */ t1 = toc (t) ; printf ("update: time: %8.2f\n", t1) ; if (!ok) return (done3 (0, S, N, y, W, E, p)) ; t = tic () ; cs_cl_ipvec (S->pinv, b, y, n) ; /* y = P*b */ cs_cl_lsolve (N->L, y) ; /* y = L\y */ cs_cl_ltsolve (N->L, y) ; /* y = L'\y */ cs_cl_pvec (S->pinv, y, x, n) ; /* x = P'*y */ t = toc (t) ; p = cs_cl_pinv (S->pinv, n) ; W2 = cs_cl_permute (W, p, NULL, 1) ; /* E = C + (P'W)*(P'W)' */ WT = cs_cl_transpose (W2,1) ; WW = cs_cl_multiply (W2, WT) ; cs_cl_spfree (WT) ; cs_cl_spfree (W2) ; E = cs_cl_add (C, WW, 1, 1) ; cs_cl_spfree (WW) ; if (!E || !p) return (done3 (0, S, N, y, W, E, p)) ; printf ("update: time: %8.2f (incl solve) ", t1+t) ; print_resid (1, E, x, b, resid) ; /* print residual */ cs_cl_nfree (N) ; /* clear N */ t = tic () ; N = cs_cl_chol (E, S) ; /* numeric Cholesky */ if (!N) return (done3 (0, S, N, y, W, E, p)) ; cs_cl_ipvec (S->pinv, b, y, n) ; /* y = P*b */ cs_cl_lsolve (N->L, y) ; /* y = L\y */ cs_cl_ltsolve (N->L, y) ; /* y = L'\y */ cs_cl_pvec (S->pinv, y, x, n) ; /* x = P'*y */ t = toc (t) ; printf ("rechol: time: %8.2f (incl solve) ", t) ; print_resid (1, E, x, b, resid) ; /* print residual */ t = tic () ; ok = cs_cl_updown (N->L, -1, W, S->parent) ; /* downdate: L*L'-W*W' */ t1 = toc (t) ; if (!ok) return (done3 (0, S, N, y, W, E, p)) ; printf ("downdate: time: %8.2f\n", t1) ; t = tic () ; cs_cl_ipvec (S->pinv, b, y, n) ; /* y = P*b */ cs_cl_lsolve (N->L, y) ; /* y = L\y */ cs_cl_ltsolve (N->L, y) ; /* y = L'\y */ cs_cl_pvec (S->pinv, y, x, n) ; /* x = P'*y */ t = toc (t) ; printf ("downdate: time: %8.2f (incl solve) ", t1+t) ; print_resid (1, C, x, b, resid) ; /* print residual */ return (done3 (1, S, N, y, W, E, p)) ; }
TEST(uri_comparison_test, equality_test_capitalized_scheme_with_case_normalization) { network::uri lhs("http://www.example.com/"); network::uri rhs("HTTP://www.example.com/"); ASSERT_EQ(lhs.compare(rhs, network::uri_comparison_level::syntax_based), 0); }
TEST(uri_comparison_test, equality_empty_lhs) { network::uri lhs; network::uri rhs("http://www.example.com/"); ASSERT_NE(lhs, rhs); }
TEST(uri_comparison_test, equality_test_capitalized_scheme) { network::uri lhs("http://www.example.com/"); network::uri rhs("HTTP://www.example.com/"); ASSERT_NE(lhs.compare(rhs, network::uri_comparison_level::string_comparison), 0); }
TEST(uri_comparison_test, less_than_test) { // lhs is lexicographically less than rhs network::uri lhs("http://www.example.com/"); network::uri rhs("http://www.example.org/"); ASSERT_LT(lhs, rhs); }
TEST(uri_comparison_test, inequality_test) { network::uri lhs("http://www.example.com/"); network::uri rhs("http://www.example.com/"); ASSERT_FALSE(lhs != rhs); }
void DisparityProc::interpolate_natural_cubic_spline ( // input std::vector<double> const & x, std::vector<double> const & y, int l_clamped, int r_clamped, std::vector<double> const & x_vis, // output std::vector<double> & y_vis ) { // Assemble the system; note that it is a tridiagonal one, so we store it as 3 vectors int N = x.size(); std::vector<double> lower(N-1, 0); std::vector<double> diag(N, 0); std::vector<double> upper(N-1, 0); std::vector<double> rhs(N); // Compute difference between data points std::vector<double> delta(N-1, 0); for (unsigned int i = 0; i < delta.size(); ++i) delta[i] = x[i+1] - x[i]; // Initialize the system if (l_clamped) { upper[0] = (delta[0])/6; diag[0] = (delta[0])/3; rhs[0] = (y[1] - y[0])/(delta[0]); } else diag[0] = 1; for (int i = 1; i < N - 1; ++i) { upper[i] = (delta[i])/6; lower[i-1] = (delta[i-1])/6; diag[i] = (x[i+1] - x[i-1])/3; rhs[i] = ((y[i+1] - y[i])/delta[i]) - ((y[i] - y[i-1])/delta[i-1]); } if (r_clamped) { diag[N-1] = -(delta[N-2])/3; lower[N-2] = -(delta[N-2])/6; rhs[N-1] = (y[N-1] - y[N-2])/(delta[N-2]); } else diag[N-1] = 1; // Solve the system and store the result in d std::vector<double> d(N,0); solve_thomas(lower,diag,upper,rhs,d); // Evaluate the interploated curve at x_vis and store data in y_vis // We assume a sorted data list -> maybe implement later // The counter keeps track of which cubic polynomial the current x_vis values are int counter = 0; for (unsigned int i = 0; i < x_vis.size(); ++i) { while (x_vis[i] >= x[counter]) { counter++; if (counter == N) { counter--; break; } } y_vis[i] = d[counter-1] * ((pow(x[counter] - x_vis[i], 3))/(6*delta[counter-1])) + d[counter]*((pow(x_vis[i] - x[counter-1], 3))/(6*delta[counter-1])) + ((y[counter] - y[counter-1])/delta[counter-1] - (d[counter] - d[counter-1])*(delta[counter-1])/6)*(x_vis[i] - x[counter-1]) + (y[counter-1] - d[counter-1]*(delta[counter-1]*delta[counter-1]/6)); } }
RegressionTreeNode* RegressionTree::buildTree(const RegressionData &trainingData,RegressionTreeNode *parent,Vector< UINT > features,UINT nodeID){ const UINT M = trainingData.getNumSamples(); const UINT N = trainingData.getNumInputDimensions(); const UINT T = trainingData.getNumTargetDimensions(); VectorFloat regressionData(T); //Update the nodeID //Get the depth UINT depth = 0; if( parent != NULL ) depth = parent->getDepth() + 1; //If there are no training data then return NULL if( trainingData.getNumSamples() == 0 ) return NULL; //Create the new node RegressionTreeNode *node = new RegressionTreeNode; if( node == NULL ) return NULL; //Set the parent node->initNode( parent, depth, nodeID ); //If there are no features left then create a leaf node and return if( features.size() == 0 || M < minNumSamplesPerNode || depth >= maxDepth ){ //Flag that this is a leaf node node->setIsLeafNode( true ); //Compute the regression data that will be stored at this node computeNodeRegressionData( trainingData, regressionData ); //Set the node node->set( trainingData.getNumSamples(), 0, 0, regressionData ); Regressifier::trainingLog << "Reached leaf node. Depth: " << depth << " NumSamples: " << trainingData.getNumSamples() << std::endl; return node; } //Compute the best spilt point UINT featureIndex = 0; Float threshold = 0; Float minError = 0; if( !computeBestSpilt( trainingData, features, featureIndex, threshold, minError ) ){ delete node; return NULL; } Regressifier::trainingLog << "Depth: " << depth << " FeatureIndex: " << featureIndex << " Threshold: " << threshold << " MinError: " << minError << std::endl; //If the minError is below the minRMSError then create a leaf node and return if( minError <= minRMSErrorPerNode ){ //Compute the regression data that will be stored at this node computeNodeRegressionData( trainingData, regressionData ); //Set the node node->set( trainingData.getNumSamples(), featureIndex, threshold, regressionData ); Regressifier::trainingLog << "Reached leaf node. Depth: " << depth << " NumSamples: " << M << std::endl; return node; } //Set the node node->set( trainingData.getNumSamples(), featureIndex, threshold, regressionData ); //Remove the selected feature so we will not use it again if( removeFeaturesAtEachSpilt ){ for(UINT i=0; i<features.getSize(); i++){ if( features[i] == featureIndex ){ features.erase( features.begin()+i ); break; } } } //Split the data RegressionData lhs(N,T); RegressionData rhs(N,T); for(UINT i=0; i<M; i++){ if( node->predict( trainingData[i].getInputVector() ) ){ rhs.addSample(trainingData[i].getInputVector(), trainingData[i].getTargetVector()); }else lhs.addSample(trainingData[i].getInputVector(), trainingData[i].getTargetVector()); } //Run the recursive tree building on the children node->setLeftChild( buildTree( lhs, node, features, nodeID ) ); node->setRightChild( buildTree( rhs, node, features, nodeID ) ); return node; }
double run(struct user_parameters* params) { int matrix_size = params->matrix_size; if (matrix_size <= 0) { matrix_size = 512; params->matrix_size = matrix_size; } int block_size = params->blocksize; if (block_size <= 0) { block_size = 128; params->blocksize = block_size; } if ( (matrix_size % block_size) || (matrix_size % block_size) ) { params->succeed = 0; params->string2display = "*****ERROR: blocsize must divide NX and NY"; return 0; } int niter = params->titer; if (niter <= 0) { niter = 4; params->titer = niter; } int ii,i,jj,j; double *f_ = (double*)malloc(matrix_size * matrix_size * sizeof(double)); double (*f)[matrix_size][matrix_size] = (double (*)[matrix_size][matrix_size])f_; double *u_ = (double*)malloc(matrix_size * matrix_size * sizeof(double)); double *unew_ = (double*)malloc(matrix_size * matrix_size * sizeof(double)); double (*unew)[matrix_size][matrix_size] = (double (*)[matrix_size][matrix_size])unew_; double (*u)[matrix_size][matrix_size] = (double (*)[matrix_size][matrix_size])u_; double dx = 1.0 / (double) (matrix_size - 1); double dy = 1.0 / (double) (matrix_size - 1); rhs(matrix_size, matrix_size, f_, block_size); //Set the initial solution estimate UNEW. //We are "allowed" to pick up the boundary conditions exactly. #pragma omp parallel #pragma omp master for (j = 0; j < matrix_size; j+= block_size) for (i = 0; i < matrix_size; i+= block_size) #pragma omp task firstprivate(i,j) private(ii,jj) for (jj=j; jj<j+block_size; ++jj) for (ii=i; ii<i+block_size; ++ii) { if (ii == 0 || ii == matrix_size - 1 || jj == 0 || jj == matrix_size - 1) { (*unew)[ii][jj] = (*f)[ii][jj]; (*u)[ii][jj] = (*f)[ii][jj]; } else { (*unew)[ii][jj] = 0.0; (*u)[ii][jj] = 0.0; } } /// KERNEL INTENSIVE COMPUTATION START_TIMER; #ifndef _OPENMP sweep_seq(matrix_size, matrix_size, dx, dy, f_, 0, niter, u_, unew_); #else sweep(matrix_size, matrix_size, dx, dy, f_, 0, niter, u_, unew_, block_size); #endif END_TIMER; #ifdef _OPENMP if(params->check) { check_params(params, matrix_size, block_size, dx, dy, f_, niter, u_, unew_) ; } #else params->succeed = 1; #endif free(f_); free(u_); free(unew_); return TIMER; }
void run_test_cases() { typedef bitset_test< boost::dynamic_bitset<Block> > Tests; std::string long_string(101, '0'); for (std::size_t i = 0; i < long_string.size(); ++i) long_string[i] = '0' + (i % 2); std::size_t ul_size = CHAR_BIT * sizeof(unsigned long); //===================================================================== // Test b.to_long() { boost::dynamic_bitset<Block> b; Tests::to_ulong(b); } { std::string ul_str(ul_size, '1'); boost::dynamic_bitset<Block> b(ul_str); Tests::to_ulong(b); } { // case overflow boost::dynamic_bitset<Block> b(long_string); Tests::to_ulong(b); } //===================================================================== // Test to_string(b, str) { boost::dynamic_bitset<Block> b; Tests::to_string(b); } { boost::dynamic_bitset<Block> b(std::string("0")); Tests::to_string(b); } { boost::dynamic_bitset<Block> b(long_string); Tests::to_string(b); } //===================================================================== // Test b.count() { boost::dynamic_bitset<Block> b; Tests::count(b); } { boost::dynamic_bitset<Block> b(std::string("0")); Tests::count(b); } { boost::dynamic_bitset<Block> b(long_string); Tests::count(b); } //===================================================================== // Test b.size() { boost::dynamic_bitset<Block> b; Tests::size(b); } { boost::dynamic_bitset<Block> b(std::string("0")); Tests::size(b); } { boost::dynamic_bitset<Block> b(long_string); Tests::size(b); } //===================================================================== // Test b.any() { boost::dynamic_bitset<Block> b; Tests::any(b); } { boost::dynamic_bitset<Block> b(std::string("0")); Tests::any(b); } { boost::dynamic_bitset<Block> b(long_string); Tests::any(b); } //===================================================================== // Test b.none() { boost::dynamic_bitset<Block> b; Tests::none(b); } { boost::dynamic_bitset<Block> b(std::string("0")); Tests::none(b); } { boost::dynamic_bitset<Block> b(long_string); Tests::none(b); } //===================================================================== // Test a.is_subset_of(b) { boost::dynamic_bitset<Block> a, b; Tests::subset(a, b); } { boost::dynamic_bitset<Block> a(std::string("0")), b(std::string("0")); Tests::subset(a, b); } { boost::dynamic_bitset<Block> a(std::string("1")), b(std::string("1")); Tests::subset(a, b); } { boost::dynamic_bitset<Block> a(long_string), b(long_string); Tests::subset(a, b); } { boost::dynamic_bitset<Block> a(long_string), b(long_string); a[long_string.size()/2].flip(); Tests::subset(a, b); } { boost::dynamic_bitset<Block> a(long_string), b(long_string); b[long_string.size()/2].flip(); Tests::subset(a, b); } //===================================================================== // Test a.is_proper_subset_of(b) { boost::dynamic_bitset<Block> a, b; Tests::proper_subset(a, b); } { boost::dynamic_bitset<Block> a(std::string("0")), b(std::string("0")); Tests::proper_subset(a, b); } { boost::dynamic_bitset<Block> a(std::string("1")), b(std::string("1")); Tests::proper_subset(a, b); } { boost::dynamic_bitset<Block> a(long_string), b(long_string); Tests::proper_subset(a, b); } { boost::dynamic_bitset<Block> a(long_string), b(long_string); a[long_string.size()/2].flip(); Tests::proper_subset(a, b); } { boost::dynamic_bitset<Block> a(long_string), b(long_string); b[long_string.size()/2].flip(); Tests::proper_subset(a, b); } //===================================================================== // Test operator== { boost::dynamic_bitset<Block> a, b; Tests::operator_equal(a, b); } { boost::dynamic_bitset<Block> a(std::string("0")), b(std::string("0")); Tests::operator_equal(a, b); } { boost::dynamic_bitset<Block> a(std::string("1")), b(std::string("1")); Tests::operator_equal(a, b); } { boost::dynamic_bitset<Block> a(long_string), b(long_string); Tests::operator_equal(a, b); } { boost::dynamic_bitset<Block> a(long_string), b(long_string); a[long_string.size()/2].flip(); Tests::operator_equal(a, b); } { boost::dynamic_bitset<Block> a(long_string), b(long_string); b[long_string.size()/2].flip(); Tests::operator_equal(a, b); } //===================================================================== // Test operator!= { boost::dynamic_bitset<Block> a, b; Tests::operator_not_equal(a, b); } { boost::dynamic_bitset<Block> a(std::string("0")), b(std::string("0")); Tests::operator_not_equal(a, b); } { boost::dynamic_bitset<Block> a(std::string("1")), b(std::string("1")); Tests::operator_not_equal(a, b); } { boost::dynamic_bitset<Block> a(long_string), b(long_string); Tests::operator_not_equal(a, b); } { boost::dynamic_bitset<Block> a(long_string), b(long_string); a[long_string.size()/2].flip(); Tests::operator_not_equal(a, b); } { boost::dynamic_bitset<Block> a(long_string), b(long_string); b[long_string.size()/2].flip(); Tests::operator_not_equal(a, b); } //===================================================================== // Test operator< { boost::dynamic_bitset<Block> a, b; Tests::operator_less_than(a, b); } { boost::dynamic_bitset<Block> a(std::string("0")), b(std::string("0")); Tests::operator_less_than(a, b); } { boost::dynamic_bitset<Block> a(std::string("1")), b(std::string("1")); Tests::operator_less_than(a, b); } { boost::dynamic_bitset<Block> a(long_string), b(long_string); Tests::operator_less_than(a, b); } { boost::dynamic_bitset<Block> a(long_string), b(long_string); a[long_string.size()/2].flip(); Tests::operator_less_than(a, b); } { boost::dynamic_bitset<Block> a(long_string), b(long_string); b[long_string.size()/2].flip(); Tests::operator_less_than(a, b); } // check for consistency with ulong behaviour { boost::dynamic_bitset<Block> a(3, 4ul), b(3, 5ul); assert(a < b); } { boost::dynamic_bitset<Block> a(3, 4ul), b(3, 4ul); assert(!(a < b)); } { boost::dynamic_bitset<Block> a(3, 5ul), b(3, 4ul); assert(!(a < b)); } //===================================================================== // Test operator<= { boost::dynamic_bitset<Block> a, b; Tests::operator_less_than_eq(a, b); } { boost::dynamic_bitset<Block> a(std::string("0")), b(std::string("0")); Tests::operator_less_than_eq(a, b); } { boost::dynamic_bitset<Block> a(std::string("1")), b(std::string("1")); Tests::operator_less_than_eq(a, b); } { boost::dynamic_bitset<Block> a(long_string), b(long_string); Tests::operator_less_than_eq(a, b); } { boost::dynamic_bitset<Block> a(long_string), b(long_string); a[long_string.size()/2].flip(); Tests::operator_less_than_eq(a, b); } { boost::dynamic_bitset<Block> a(long_string), b(long_string); b[long_string.size()/2].flip(); Tests::operator_less_than_eq(a, b); } // check for consistency with ulong behaviour { boost::dynamic_bitset<Block> a(3, 4ul), b(3, 5ul); assert(a <= b); } { boost::dynamic_bitset<Block> a(3, 4ul), b(3, 4ul); assert(a <= b); } { boost::dynamic_bitset<Block> a(3, 5ul), b(3, 4ul); assert(!(a <= b)); } //===================================================================== // Test operator> { boost::dynamic_bitset<Block> a, b; Tests::operator_greater_than(a, b); } { boost::dynamic_bitset<Block> a(std::string("0")), b(std::string("0")); Tests::operator_greater_than(a, b); } { boost::dynamic_bitset<Block> a(std::string("1")), b(std::string("1")); Tests::operator_greater_than(a, b); } { boost::dynamic_bitset<Block> a(long_string), b(long_string); Tests::operator_greater_than(a, b); } { boost::dynamic_bitset<Block> a(long_string), b(long_string); a[long_string.size()/2].flip(); Tests::operator_greater_than(a, b); } { boost::dynamic_bitset<Block> a(long_string), b(long_string); b[long_string.size()/2].flip(); Tests::operator_greater_than(a, b); } // check for consistency with ulong behaviour { boost::dynamic_bitset<Block> a(3, 4ul), b(3, 5ul); assert(!(a > b)); } { boost::dynamic_bitset<Block> a(3, 4ul), b(3, 4ul); assert(!(a > b)); } { boost::dynamic_bitset<Block> a(3, 5ul), b(3, 4ul); assert(a > b); } //===================================================================== // Test operator<= { boost::dynamic_bitset<Block> a, b; Tests::operator_greater_than_eq(a, b); } { boost::dynamic_bitset<Block> a(std::string("0")), b(std::string("0")); Tests::operator_greater_than_eq(a, b); } { boost::dynamic_bitset<Block> a(std::string("1")), b(std::string("1")); Tests::operator_greater_than_eq(a, b); } { boost::dynamic_bitset<Block> a(long_string), b(long_string); Tests::operator_greater_than_eq(a, b); } { boost::dynamic_bitset<Block> a(long_string), b(long_string); a[long_string.size()/2].flip(); Tests::operator_greater_than_eq(a, b); } { boost::dynamic_bitset<Block> a(long_string), b(long_string); b[long_string.size()/2].flip(); Tests::operator_greater_than_eq(a, b); } // check for consistency with ulong behaviour { boost::dynamic_bitset<Block> a(3, 4ul), b(3, 5ul); assert(!(a >= b)); } { boost::dynamic_bitset<Block> a(3, 4ul), b(3, 4ul); assert(a >= b); } { boost::dynamic_bitset<Block> a(3, 5ul), b(3, 4ul); assert(a >= b); } //===================================================================== // Test b.test(pos) { // case pos >= b.size() boost::dynamic_bitset<Block> b; Tests::test_bit(b, 0); } { // case pos < b.size() boost::dynamic_bitset<Block> b(std::string("0")); Tests::test_bit(b, 0); } { // case pos == b.size() / 2 boost::dynamic_bitset<Block> b(long_string); Tests::test_bit(b, long_string.size()/2); } //===================================================================== // Test b << pos { // case pos == 0 std::size_t pos = 0; boost::dynamic_bitset<Block> b(std::string("1010")); Tests::operator_shift_left(b, pos); } { // case pos == size()/2 std::size_t pos = long_string.size() / 2; boost::dynamic_bitset<Block> b(long_string); Tests::operator_shift_left(b, pos); } { // case pos >= n std::size_t pos = long_string.size(); boost::dynamic_bitset<Block> b(long_string); Tests::operator_shift_left(b, pos); } //===================================================================== // Test b >> pos { // case pos == 0 std::size_t pos = 0; boost::dynamic_bitset<Block> b(std::string("1010")); Tests::operator_shift_right(b, pos); } { // case pos == size()/2 std::size_t pos = long_string.size() / 2; boost::dynamic_bitset<Block> b(long_string); Tests::operator_shift_right(b, pos); } { // case pos >= n std::size_t pos = long_string.size(); boost::dynamic_bitset<Block> b(long_string); Tests::operator_shift_right(b, pos); } //===================================================================== // Test a & b { boost::dynamic_bitset<Block> lhs, rhs; Tests::operator_and(lhs, rhs); } { boost::dynamic_bitset<Block> lhs(std::string("1")), rhs(std::string("0")); Tests::operator_and(lhs, rhs); } { boost::dynamic_bitset<Block> lhs(long_string.size(), 0), rhs(long_string); Tests::operator_and(lhs, rhs); } { boost::dynamic_bitset<Block> lhs(long_string.size(), 1), rhs(long_string); Tests::operator_and(lhs, rhs); } //===================================================================== // Test a | b { boost::dynamic_bitset<Block> lhs, rhs; Tests::operator_or(lhs, rhs); } { boost::dynamic_bitset<Block> lhs(std::string("1")), rhs(std::string("0")); Tests::operator_or(lhs, rhs); } { boost::dynamic_bitset<Block> lhs(long_string.size(), 0), rhs(long_string); Tests::operator_or(lhs, rhs); } { boost::dynamic_bitset<Block> lhs(long_string.size(), 1), rhs(long_string); Tests::operator_or(lhs, rhs); } //===================================================================== // Test a^b { boost::dynamic_bitset<Block> lhs, rhs; Tests::operator_xor(lhs, rhs); } { boost::dynamic_bitset<Block> lhs(std::string("1")), rhs(std::string("0")); Tests::operator_xor(lhs, rhs); } { boost::dynamic_bitset<Block> lhs(long_string.size(), 0), rhs(long_string); Tests::operator_xor(lhs, rhs); } { boost::dynamic_bitset<Block> lhs(long_string.size(), 1), rhs(long_string); Tests::operator_xor(lhs, rhs); } //===================================================================== // Test a-b { boost::dynamic_bitset<Block> lhs, rhs; Tests::operator_sub(lhs, rhs); } { boost::dynamic_bitset<Block> lhs(std::string("1")), rhs(std::string("0")); Tests::operator_sub(lhs, rhs); } { boost::dynamic_bitset<Block> lhs(long_string.size(), 0), rhs(long_string); Tests::operator_sub(lhs, rhs); } { boost::dynamic_bitset<Block> lhs(long_string.size(), 1), rhs(long_string); Tests::operator_sub(lhs, rhs); } //===================================================================== // Test stream operator<< and operator>> { boost::dynamic_bitset<Block> b; boost::dynamic_bitset<Block> x(b.size()); Tests::stream_read_write(b, x); } { boost::dynamic_bitset<Block> b(std::string("0")); boost::dynamic_bitset<Block> x(b.size()); Tests::stream_read_write(b, x); } { boost::dynamic_bitset<Block> b(long_string); boost::dynamic_bitset<Block> x(b.size()); Tests::stream_read_write(b, x); } }
void ContinuousArithmeticAsianVecerEngine::calculate() const { Real expectedAverage; QL_REQUIRE(arguments_.averageType == Average::Arithmetic, "not an Arithmetic average option"); QL_REQUIRE(arguments_.exercise->type() == Exercise::European, "not an European Option"); DayCounter rfdc = process_->riskFreeRate()->dayCounter(); DayCounter divdc = process_->dividendYield()->dayCounter(); DayCounter voldc = process_->blackVolatility()->dayCounter(); Real S_0 = process_->stateVariable()->value(); // payoff ext::shared_ptr<StrikedTypePayoff> payoff = ext::dynamic_pointer_cast<StrikedTypePayoff>(arguments_.payoff); QL_REQUIRE(payoff, "non-plain payoff given"); // original time to maturity Date maturity = arguments_.exercise->lastDate(); Real X = payoff->strike(); QL_REQUIRE(z_min_<=0 && z_max_>=0, "strike (0 for vecer fixed strike asian) not on Grid"); Volatility sigma = process_->blackVolatility()->blackVol(maturity, X); Rate r = process_->riskFreeRate()-> zeroRate(maturity, rfdc, Continuous, NoFrequency); Rate q = process_->dividendYield()-> zeroRate(maturity, divdc, Continuous, NoFrequency); Date today(Settings::instance().evaluationDate()); QL_REQUIRE(startDate_>=today, "Seasoned Asian not yet implemented"); // Expiry in Years Time T = rfdc.yearFraction(today, arguments_.exercise->lastDate()); Time T1 = rfdc.yearFraction(today, startDate_ ); // Average Begin Time T2 = T; // Average End (In this version only Maturity...) if ((T2 - T1) < 0.001) { // its a vanilla option. Use vanilla engine VanillaOption europeanOption(payoff, arguments_.exercise); europeanOption.setPricingEngine( ext::make_shared<AnalyticEuropeanEngine>(process_)); results_.value = europeanOption.NPV(); } else { Real Theta = 0.5; // Mixed Scheme: 0.5 = Crank Nicolson Real Z_0 = cont_strategy(0,T1,T2,q,r) - std::exp(-r*T) * X /S_0; QL_REQUIRE(Z_0>=z_min_ && Z_0<=z_max_, "spot not on grid"); Real h = (z_max_ - z_min_) / assetSteps_; // Space step size Real k = T / timeSteps_; // Time Step size Real sigma2 = sigma * sigma, vecerTerm; Array SVec(assetSteps_+1),u_initial(assetSteps_+1), u(assetSteps_+1),rhs(assetSteps_+1); for (Natural i= 0; i<= SVec.size()-1;i++) { SVec[i] = z_min_ + i * h; // Value of Underlying on the grid } // Begin gamma construction TridiagonalOperator gammaOp = DPlusDMinus(assetSteps_+1,h); Array upperD = gammaOp.upperDiagonal(); Array lowerD = gammaOp.lowerDiagonal(); Array Dia = gammaOp.diagonal(); // Construct Vecer operator TridiagonalOperator explicit_part(gammaOp.size()); TridiagonalOperator implicit_part(gammaOp.size()); for (Natural i= 0; i<= SVec.size()-1;i++) { u_initial[i] = std::max<Real>(SVec[i] , 0.0); // Call Payoff } u = u_initial; // Start Time Loop for (Natural j = 1; j<=timeSteps_;j++) { if (Theta != 1.0) { // Explicit Part for (Natural i = 1; i<= SVec.size()-2;i++) { vecerTerm = SVec[i] - std::exp(-q * (T-(j-1)*k)) * cont_strategy(T-(j-1)*k,T1,T2,q,r); gammaOp.setMidRow(i, 0.5 * sigma2 * vecerTerm * vecerTerm * lowerD[i-1], 0.5 * sigma2 * vecerTerm * vecerTerm * Dia[i], 0.5 * sigma2 * vecerTerm * vecerTerm * upperD[i]); } explicit_part = gammaOp.identity(gammaOp.size()) + (1 - Theta) * k * gammaOp; explicit_part.setFirstRow(1.0,0.0); // Apply before applying explicit_part.setLastRow(-1.0,1.0); // Neumann BC u = explicit_part.applyTo(u); // Apply after applying (Neumann BC) u[assetSteps_] = u[assetSteps_-1] + h; u[0] = 0; } // End Explicit Part if (Theta != 0.0) { // Implicit Part for (Natural i = 1; i<= SVec.size()-2;i++) { vecerTerm = SVec[i] - std::exp(-q * (T-j*k)) * cont_strategy(T-j*k,T1,T2,q,r); gammaOp.setMidRow(i, 0.5 * sigma2 * vecerTerm * vecerTerm * lowerD[i-1], 0.5 * sigma2 * vecerTerm * vecerTerm * Dia[i], 0.5 * sigma2 * vecerTerm * vecerTerm * upperD[i]); } implicit_part = gammaOp.identity(gammaOp.size()) - Theta * k * gammaOp; // Apply before solving implicit_part.setFirstRow(1.0,0.0); implicit_part.setLastRow(-1.0,1.0); rhs = u; rhs[0] = 0; // Lower BC rhs[assetSteps_] = h; // Upper BC (Neumann) Delta=1 u = implicit_part.solveFor(rhs); } // End implicit Part } // End Time Loop DownRounding Rounding(0); Integer lowerI = Integer(Rounding( (Z_0-z_min_)/h)); // Interpolate solution Real pv; pv = u[lowerI] + (u[lowerI+1] - u[lowerI]) * (Z_0 - SVec[lowerI])/h; results_.value = S_0 * pv; if (payoff->optionType()==Option::Put) { // Apply Call Put Parity for Asians if (r == q) { expectedAverage = S_0; } else { expectedAverage = S_0 * (std::exp( (r-q) * T2) - std::exp( (r-q) * T1)) / ((r-q) * (T2-T1)); } Real asianForward = std::exp(-r * T2) * (expectedAverage - X); results_.value = results_.value - asianForward; } } }
//-------------------------------------------------------------------------- //-------- execute --------------------------------------------------------- //-------------------------------------------------------------------------- void AssembleScalarEdgeSolverAlgorithm::execute() { stk::mesh::BulkData & bulk_data = realm_.bulk_data(); stk::mesh::MetaData & meta_data = realm_.meta_data(); const int nDim = meta_data.spatial_dimension(); const double small = 1.0e-16; // extract user advection options (allow to potentially change over time) const std::string dofName = scalarQ_->name(); const double hybridFactor = realm_.get_hybrid_factor(dofName); const double alpha = realm_.get_alpha_factor(dofName); const double alphaUpw = realm_.get_alpha_upw_factor(dofName); const double hoUpwind = realm_.get_upw_factor(dofName); const bool useLimiter = realm_.primitive_uses_limiter(dofName); // one minus flavor const double om_alpha = 1.0-alpha; const double om_alphaUpw = 1.0-alphaUpw; // space for LHS/RHS; always edge connectivity const int nodesPerEdge = 2; const int lhsSize = nodesPerEdge*nodesPerEdge; const int rhsSize = nodesPerEdge; std::vector<double> lhs(lhsSize); std::vector<double> rhs(rhsSize); std::vector<stk::mesh::Entity> connected_nodes(2); // area vector; gather into std::vector<double> areaVec(nDim); // pointer for fast access double *p_lhs = &lhs[0]; double *p_rhs = &rhs[0]; double *p_areaVec = &areaVec[0]; // deal with state ScalarFieldType &scalarQNp1 = scalarQ_->field_of_state(stk::mesh::StateNP1); ScalarFieldType &densityNp1 = density_->field_of_state(stk::mesh::StateNP1); // define some common selectors stk::mesh::Selector s_locally_owned_union = meta_data.locally_owned_part() & stk::mesh::selectUnion(partVec_) & !(realm_.get_inactive_selector()); stk::mesh::BucketVector const& edge_buckets = realm_.get_buckets( stk::topology::EDGE_RANK, s_locally_owned_union ); for ( stk::mesh::BucketVector::const_iterator ib = edge_buckets.begin(); ib != edge_buckets.end() ; ++ib ) { stk::mesh::Bucket & b = **ib ; const stk::mesh::Bucket::size_type length = b.size(); // pointer to edge area vector and mdot const double * av = stk::mesh::field_data(*edgeAreaVec_, b); const double * mdot = stk::mesh::field_data(*massFlowRate_, b); for ( stk::mesh::Bucket::size_type k = 0 ; k < length ; ++k ) { // zeroing of lhs/rhs for ( int i = 0; i < lhsSize; ++i ) { p_lhs[i] = 0.0; } for ( int i = 0; i < rhsSize; ++i ) { p_rhs[i] = 0.0; } // get edge stk::mesh::Entity edge = b[k]; stk::mesh::Entity const * edge_node_rels = bulk_data.begin_nodes(edge); // sanity check on number or nodes ThrowAssert( bulk_data.num_nodes(edge) == 2 ); // pointer to edge area vector for ( int j = 0; j < nDim; ++j ) p_areaVec[j] = av[k*nDim+j]; const double tmdot = mdot[k]; // left and right nodes stk::mesh::Entity nodeL = edge_node_rels[0]; stk::mesh::Entity nodeR = edge_node_rels[1]; connected_nodes[0] = nodeL; connected_nodes[1] = nodeR; // extract nodal fields const double * coordL = stk::mesh::field_data(*coordinates_, nodeL); const double * coordR = stk::mesh::field_data(*coordinates_, nodeR); const double * dqdxL = stk::mesh::field_data(*dqdx_, nodeL); const double * dqdxR = stk::mesh::field_data(*dqdx_, nodeR); const double * vrtmL = stk::mesh::field_data(*velocityRTM_, nodeL); const double * vrtmR = stk::mesh::field_data(*velocityRTM_, nodeR); const double qNp1L = *stk::mesh::field_data(scalarQNp1, nodeL); const double qNp1R = *stk::mesh::field_data(scalarQNp1, nodeR); const double densityL = *stk::mesh::field_data(densityNp1, nodeL); const double densityR = *stk::mesh::field_data(densityNp1, nodeR); const double diffFluxCoeffL = *stk::mesh::field_data(*diffFluxCoeff_, nodeL); const double diffFluxCoeffR = *stk::mesh::field_data(*diffFluxCoeff_, nodeR); // compute geometry double axdx = 0.0; double asq = 0.0; double udotx = 0.0; for ( int j = 0; j < nDim; ++j ) { const double axj = p_areaVec[j]; const double dxj = coordR[j] - coordL[j]; asq += axj*axj; axdx += axj*dxj; udotx += 0.5*dxj*(vrtmL[j] + vrtmR[j]); } const double inv_axdx = 1.0/axdx; // ip props const double viscIp = 0.5*(diffFluxCoeffL + diffFluxCoeffR); const double diffIp = 0.5*(diffFluxCoeffL/densityL + diffFluxCoeffR/densityR); // Peclet factor double pecfac = hybridFactor*udotx/(diffIp+small); pecfac = pecfac*pecfac/(5.0 + pecfac*pecfac); const double om_pecfac = 1.0-pecfac; // left and right extrapolation; add in diffusion calc double dqL = 0.0; double dqR = 0.0; double nonOrth = 0.0; for ( int j = 0; j < nDim; ++j ) { const double dxj = coordR[j] - coordL[j]; dqL += 0.5*dxj*dqdxL[j]; dqR += 0.5*dxj*dqdxR[j]; // now non-orth (over-relaxed procedure of Jasek) const double axj = p_areaVec[j]; const double kxj = axj - asq*inv_axdx*dxj; const double GjIp = 0.5*(dqdxL[j] + dqdxR[j]); nonOrth += -viscIp*kxj*GjIp; } // add limiter if appropriate double limitL = 1.0; double limitR = 1.0; const double dq = qNp1R - qNp1L; if ( useLimiter ) { const double dqMl = 2.0*2.0*dqL - dq; const double dqMr = 2.0*2.0*dqR - dq; limitL = van_leer(dqMl, dq, small); limitR = van_leer(dqMr, dq, small); } // extrapolated; for now limit const double qIpL = qNp1L + dqL*hoUpwind*limitL; const double qIpR = qNp1R - dqR*hoUpwind*limitR; //==================================== // diffusive flux //==================================== double lhsfac = -viscIp*asq*inv_axdx; double diffFlux = lhsfac*(qNp1R - qNp1L) + nonOrth; // first left p_lhs[0] = -lhsfac; p_lhs[1] = +lhsfac; p_rhs[0] = -diffFlux; // now right p_lhs[2] = +lhsfac; p_lhs[3] = -lhsfac; p_rhs[1] = diffFlux; //==================================== // advective flux //==================================== // 2nd order central const double qIp = 0.5*( qNp1L + qNp1R ); // upwind const double qUpwind = (tmdot > 0) ? alphaUpw*qIpL + om_alphaUpw*qIp : alphaUpw*qIpR + om_alphaUpw*qIp; // generalized central (2nd and 4th order) const double qHatL = alpha*qIpL + om_alpha*qIp; const double qHatR = alpha*qIpR + om_alpha*qIp; const double qCds = 0.5*(qHatL + qHatR); // total advection const double aflux = tmdot*(pecfac*qUpwind + om_pecfac*qCds); // upwind advection (includes 4th); left node double alhsfac = 0.5*(tmdot+std::abs(tmdot))*pecfac*alphaUpw + 0.5*alpha*om_pecfac*tmdot; p_lhs[0] += alhsfac; p_lhs[2] -= alhsfac; // upwind advection; right node alhsfac = 0.5*(tmdot-std::abs(tmdot))*pecfac*alphaUpw + 0.5*alpha*om_pecfac*tmdot; p_lhs[3] -= alhsfac; p_lhs[1] += alhsfac; // central; left; collect terms on alpha and alphaUpw alhsfac = 0.5*tmdot*(pecfac*om_alphaUpw + om_pecfac*om_alpha); p_lhs[0] += alhsfac; p_lhs[1] += alhsfac; // central; right; collect terms on alpha and alphaUpw p_lhs[2] -= alhsfac; p_lhs[3] -= alhsfac; // total flux left p_rhs[0] -= aflux; // total flux right p_rhs[1] += aflux; apply_coeff(connected_nodes, rhs, lhs, __FILE__); } } }
void ISOP2P1::boundaryValueStokes(Vector<double> &x) { /// 各空间自由度. unsigned int n_dof_v = fem_space_v.n_dof(); unsigned int n_dof_p = fem_space_p.n_dof(); unsigned int n_total_dof_v = 2 * n_dof_v; const std::size_t * rowstart = sp_stokes.get_rowstart_indices(); const unsigned int * colnum = sp_stokes.get_column_numbers(); std::cout << "n_dof_v: " << n_dof_v << ", n_dof_p: " << n_dof_p << std::endl; std::cout << "n_A: " << sp_stokes.n_rows() << ", m_A: " << sp_stokes.n_cols() << std::endl; /// 遍历全部维度的速度节点. for (unsigned int i = 0; i < n_total_dof_v; ++i) { /// 边界标志. int bm = -1; /// 判断一下是 x 方向还是 y 方向. 分别读取标志. if (i < n_dof_v) bm = fem_space_v.dofInfo(i).boundary_mark; else bm = fem_space_v.dofInfo(i - n_dof_v).boundary_mark; if (bm == 0) continue; /// 对 Dirichelet 边界根据边界分别赋值. 注意同时还要区别 x 和 /// y 方向. // /// 障碍流边界条件. // if (bm < 8 && bm > 0 && bm != 6) // x(i) = 0.0; // else if (bm == 8 || bm == 9) // if (i < n_dof_v) // { // PoiseuilleVx poiseuille_vx(0.0, 2.0); // x(i) = poiseuille_vx.value(fem_space_v.dofInfo(i).interp_point); // } // else // { // PoiseuilleVy poiseuille_vy; // x(i) = poiseuille_vy.value(fem_space_v.dofInfo(i - n_dof_v).interp_point); // } /// 方腔流边界条件. if (bm == 1 || bm == 2 || bm == 3 || bm == 4) { if (i < n_dof_v) { DiVx real_vx(viscosity, t + dt); x(i) = real_vx.value(fem_space_v.dofInfo(i).interp_point); } else { DiVy real_vy(viscosity, t + dt); x(i) = real_vy.value(fem_space_v.dofInfo(i - n_dof_v).interp_point); } } // else if (bm == 2 || bm == 3) // if (i < n_dof_v) // { // PoiseuilleVx poiseuille_vx(0.0, 1.0); // x(i) = poiseuille_vx.value(fem_space_v.dofInfo(i).interp_point); // } // else // { // PoiseuilleVy poiseuille_vy; // x(i) = poiseuille_vy.value(fem_space_v.dofInfo(i - n_dof_v).interp_point); // } // else if (bm == 11) // if (i < n_dof_v) // { // RealVx real_vx; // x(i) = real_vx.value(fem_space_v.dofInfo(i).interp_point); // } // else // { // RealVy real_vy; // x(i) = real_vy.value(fem_space_v.dofInfo(i - n_dof_v).interp_point); // } /// 右端项这样改, 如果该行和列其余元素均为零, 则在迭代中确 /// 保该数值解和边界一致. /// 方腔流边界条件. if (bm == 1 || bm == 2 || bm == 3 || bm == 4 || bm == 5 || bm == 11) // /// 障碍流边界条件. // if (bm < 10 && bm > 0 && bm != 6) { rhs(i) = matrix.diag_element(i) * x(i); /// 遍历 i 行. for (unsigned int j = rowstart[i] + 1; j < rowstart[i + 1]; ++j) { /// 第 j 个元素消成零(不是第 j 列!). 注意避开了对角元. matrix.global_entry(j) -= matrix.global_entry(j); /// 第 j 个元素是第 k 列. unsigned int k = colnum[j]; /// 看看第 k 行的 i 列是否为零元. const unsigned int *p = std::find(&colnum[rowstart[k] + 1], &colnum[rowstart[k + 1]], i); /// 如果是非零元. 则需要将这一项移动到右端项. 因为第 i 个未知量已知. if (p != &colnum[rowstart[k + 1]]) { /// 计算 k 行 i 列的存储位置. unsigned int l = p - &colnum[rowstart[0]]; /// 移动到右端项. 等价于 r(k) = r(k) - x(i) * A(k, i). rhs(k) -= matrix.global_entry(l) * x(i); /// 移完此项自然是零. matrix.global_entry(l) -= matrix.global_entry(l); } } } } std::cout << "boundary values for Stokes OK!" << std::endl; };
void FGAccelerations::ResolveFrictionForces(double dt) { const double invMass = 1.0 / in.Mass; const FGMatrix33& Jinv = in.Jinv; FGColumnVector3 vdot, wdot; vector<LagrangeMultiplier*>& multipliers = *in.MultipliersList; size_t n = multipliers.size(); vFrictionForces.InitMatrix(); vFrictionMoments.InitMatrix(); // If no gears are in contact with the ground then return if (!n) return; vector<double> a(n*n); // Will contain Jac*M^-1*Jac^T vector<double> rhs(n); // Assemble the linear system of equations for (unsigned int i=0; i < n; i++) { FGColumnVector3 v1 = invMass * multipliers[i]->ForceJacobian; FGColumnVector3 v2 = Jinv * multipliers[i]->MomentJacobian; // Should be J^-T but J is symmetric and so is J^-1 for (unsigned int j=0; j < i; j++) a[i*n+j] = a[j*n+i]; // Takes advantage of the symmetry of Jac^T*M^-1*Jac for (unsigned int j=i; j < n; j++) a[i*n+j] = DotProduct(v1, multipliers[j]->ForceJacobian) + DotProduct(v2, multipliers[j]->MomentJacobian); } // Assemble the RHS member // Translation vdot = vUVWdot; if (dt > 0.) // Zeroes out the relative movement between the aircraft and the ground vdot += (in.vUVW - in.Tec2b * in.TerrainVelocity) / dt; // Rotation wdot = vPQRdot; if (dt > 0.) // Zeroes out the relative movement between the aircraft and the ground wdot += (in.vPQR - in.Tec2b * in.TerrainAngularVel) / dt; // Prepare the linear system for the Gauss-Seidel algorithm : // 1. Compute the right hand side member 'rhs' // 2. Divide every line of 'a' and 'rhs' by a[i,i]. This is in order to save // a division computation at each iteration of Gauss-Seidel. for (unsigned int i=0; i < n; i++) { double d = 1.0 / a[i*n+i]; rhs[i] = -(DotProduct(multipliers[i]->ForceJacobian, vdot) +DotProduct(multipliers[i]->MomentJacobian, wdot))*d; for (unsigned int j=0; j < n; j++) a[i*n+j] *= d; } // Resolve the Lagrange multipliers with the projected Gauss-Seidel method for (int iter=0; iter < 50; iter++) { double norm = 0.; for (unsigned int i=0; i < n; i++) { double lambda0 = multipliers[i]->value; double dlambda = rhs[i]; for (unsigned int j=0; j < n; j++) dlambda -= a[i*n+j]*multipliers[j]->value; multipliers[i]->value = Constrain(multipliers[i]->Min, lambda0+dlambda, multipliers[i]->Max); dlambda = multipliers[i]->value - lambda0; norm += fabs(dlambda); } if (norm < 1E-5) break; } // Calculate the total friction forces and moments for (unsigned int i=0; i< n; i++) { double lambda = multipliers[i]->value; vFrictionForces += lambda * multipliers[i]->ForceJacobian; vFrictionMoments += lambda * multipliers[i]->MomentJacobian; } FGColumnVector3 accel = invMass * vFrictionForces; FGColumnVector3 omegadot = Jinv * vFrictionMoments; vBodyAccel += accel; vUVWdot += accel; vUVWidot += in.Tb2i * accel; vPQRdot += omegadot; vPQRidot += omegadot; }
// ** Temporary version int ClpPdco::pdco( ClpPdcoBase * stuff, Options &options, Info &info, Outfo &outfo) { // D1, D2 are positive-definite diagonal matrices defined from d1, d2. // In particular, d2 indicates the accuracy required for // satisfying each row of Ax = b. // // D1 and D2 (via d1 and d2) provide primal and dual regularization // respectively. They ensure that the primal and dual solutions // (x,r) and (y,z) are unique and bounded. // // A scalar d1 is equivalent to d1 = ones(n,1), D1 = diag(d1). // A scalar d2 is equivalent to d2 = ones(m,1), D2 = diag(d2). // Typically, d1 = d2 = 1e-4. // These values perturb phi(x) only slightly (by about 1e-8) and request // that A*x = b be satisfied quite accurately (to about 1e-8). // Set d1 = 1e-4, d2 = 1 for least-squares problems with bound constraints. // The problem is then // // minimize phi(x) + 1/2 norm(d1*x)^2 + 1/2 norm(A*x - b)^2 // subject to bl <= x <= bu. // // More generally, d1 and d2 may be n and m vectors containing any positive // values (preferably not too small, and typically no larger than 1). // Bigger elements of d1 and d2 improve the stability of the solver. // // At an optimal solution, if x(j) is on its lower or upper bound, // the corresponding z(j) is positive or negative respectively. // If x(j) is between its bounds, z(j) = 0. // If bl(j) = bu(j), x(j) is fixed at that value and z(j) may have // either sign. // // Also, r and y satisfy r = D2 y, so that Ax + D2^2 y = b. // Thus if d2(i) = 1e-4, the i-th row of Ax = b will be satisfied to // approximately 1e-8. This determines how large d2(i) can safely be. // // // EXTERNAL FUNCTIONS: // options = pdcoSet; provided with pdco.m // [obj,grad,hess] = pdObj( x ); provided by user // y = pdMat( name,mode,m,n,x ); provided by user if pdMat // is a string, not a matrix // // INPUT ARGUMENTS: // pdObj is a string containing the name of a function pdObj.m // or a function_handle for such a function // such that [obj,grad,hess] = pdObj(x) defines // obj = phi(x) : a scalar, // grad = gradient of phi(x) : an n-vector, // hess = diag(Hessian of phi): an n-vector. // Examples: // If phi(x) is the linear function c"x, pdObj should return // [obj,grad,hess] = [c"*x, c, zeros(n,1)]. // If phi(x) is the entropy function E(x) = sum x(j) log x(j), // [obj,grad,hess] = [E(x), log(x)+1, 1./x]. // pdMat may be an ifexplicit m x n matrix A (preferably sparse!), // or a string containing the name of a function pdMat.m // or a function_handle for such a function // such that y = pdMat( name,mode,m,n,x ) // returns y = A*x (mode=1) or y = A"*x (mode=2). // The input parameter "name" will be the string pdMat. // b is an m-vector. // bl is an n-vector of lower bounds. Non-existent bounds // may be represented by bl(j) = -Inf or bl(j) <= -1e+20. // bu is an n-vector of upper bounds. Non-existent bounds // may be represented by bu(j) = Inf or bu(j) >= 1e+20. // d1, d2 may be positive scalars or positive vectors (see above). // options is a structure that may be set and altered by pdcoSet // (type help pdcoSet). // x0, y0, z0 provide an initial solution. // xsize, zsize are estimates of the biggest x and z at the solution. // They are used to scale (x,y,z). Good estimates // should improve the performance of the barrier method. // // // OUTPUT ARGUMENTS: // x is the primal solution. // y is the dual solution associated with Ax + D2 r = b. // z is the dual solution associated with bl <= x <= bu. // inform = 0 if a solution is found; // = 1 if too many iterations were required; // = 2 if the linesearch failed too often. // PDitns is the number of Primal-Dual Barrier iterations required. // CGitns is the number of Conjugate-Gradient iterations required // if an iterative solver is used (LSQR). // time is the cpu time used. //---------------------------------------------------------------------- // PRIVATE FUNCTIONS: // pdxxxbounds // pdxxxdistrib // pdxxxlsqr // pdxxxlsqrmat // pdxxxmat // pdxxxmerit // pdxxxresid1 // pdxxxresid2 // pdxxxstep // // GLOBAL VARIABLES: // global pdDDD1 pdDDD2 pdDDD3 // // // NOTES: // The matrix A should be reasonably well scaled: norm(A,inf) =~ 1. // The vector b and objective phi(x) may be of any size, but ensure that // xsize and zsize are reasonably close to norm(x,inf) and norm(z,inf) // at the solution. // // The files defining pdObj and pdMat // must not be called Fname.m or Aname.m!! // // // AUTHOR: // Michael Saunders, Systems Optimization Laboratory (SOL), // Stanford University, Stanford, California, USA. // [email protected] // // CONTRIBUTORS: // Byunggyoo Kim, SOL, Stanford University. // [email protected] // // DEVELOPMENT: // 20 Jun 1997: Original version of pdsco.m derived from pdlp0.m. // 29 Sep 2002: Original version of pdco.m derived from pdsco.m. // Introduced D1, D2 in place of gamma*I, delta*I // and allowed for general bounds bl <= x <= bu. // 06 Oct 2002: Allowed for fixed variabes: bl(j) = bu(j) for any j. // 15 Oct 2002: Eliminated some work vectors (since m, n might be LARGE). // Modularized residuals, linesearch // 16 Oct 2002: pdxxx..., pdDDD... names rationalized. // pdAAA eliminated (global copy of A). // Aname is now used directly as an ifexplicit A or a function. // NOTE: If Aname is a function, it now has an extra parameter. // 23 Oct 2002: Fname and Aname can now be function handles. // 01 Nov 2002: Bug fixed in feval in pdxxxmat. //----------------------------------------------------------------------- // global pdDDD1 pdDDD2 pdDDD3 double inf = 1.0e30; double eps = 1.0e-15; double atolold = -1.0, r3ratio = -1.0, Pinf, Dinf, Cinf, Cinf0; printf("\n --------------------------------------------------------"); printf("\n pdco.m Version of 01 Nov 2002"); printf("\n Primal-dual barrier method to minimize a convex function"); printf("\n subject to linear constraints Ax + r = b, bl <= x <= bu"); printf("\n --------------------------------------------------------\n"); int m = numberRows_; int n = numberColumns_; bool ifexplicit = true; CoinDenseVector<double> b(m, rhs_); CoinDenseVector<double> x(n, x_); CoinDenseVector<double> y(m, y_); CoinDenseVector<double> z(n, dj_); //delete old arrays delete [] rhs_; delete [] x_; delete [] y_; delete [] dj_; rhs_ = NULL; x_ = NULL; y_ = NULL; dj_ = NULL; // Save stuff so available elsewhere pdcoStuff_ = stuff; double normb = b.infNorm(); double normx0 = x.infNorm(); double normy0 = y.infNorm(); double normz0 = z.infNorm(); printf("\nmax |b | = %8g max |x0| = %8g", normb , normx0); printf( " xsize = %8g", xsize_); printf("\nmax |y0| = %8g max |z0| = %8g", normy0, normz0); printf( " zsize = %8g", zsize_); //--------------------------------------------------------------------- // Initialize. //--------------------------------------------------------------------- //true = 1; //false = 0; //zn = zeros(n,1); //int nb = n + m; int CGitns = 0; int inform = 0; //--------------------------------------------------------------------- // Only allow scalar d1, d2 for now //--------------------------------------------------------------------- /* if (d1_->size()==1) d1_->resize(n, d1_->getElements()[0]); // Allow scalar d1, d2 if (d2_->size()==1) d2->resize(m, d2->getElements()[0]); // to mean dk * unit vector */ assert (stuff->sizeD1() == 1); double d1 = stuff->getD1(); double d2 = stuff->getD2(); //--------------------------------------------------------------------- // Grab input options. //--------------------------------------------------------------------- int maxitn = options.MaxIter; double featol = options.FeaTol; double opttol = options.OptTol; double steptol = options.StepTol; int stepSame = 1; /* options.StepSame; // 1 means stepx == stepz */ double x0min = options.x0min; double z0min = options.z0min; double mu0 = options.mu0; int LSproblem = options.LSproblem; // See below int LSmethod = options.LSmethod; // 1=Cholesky 2=QR 3=LSQR int itnlim = options.LSQRMaxIter * CoinMin(m, n); double atol1 = options.LSQRatol1; // Initial atol double atol2 = options.LSQRatol2; // Smallest atol,unless atol1 is smaller double conlim = options.LSQRconlim; //int wait = options.wait; // LSproblem: // 1 = dy 2 = dy shifted, DLS // 11 = s 12 = s shifted, DLS (dx = Ds) // 21 = dx // 31 = 3x3 system, symmetrized by Z^{1/2} // 32 = 2x2 system, symmetrized by X^{1/2} //--------------------------------------------------------------------- // Set other parameters. //--------------------------------------------------------------------- int kminor = 0; // 1 stops after each iteration double eta = 1e-4; // Linesearch tolerance for "sufficient descent" double maxf = 10; // Linesearch backtrack limit (function evaluations) double maxfail = 1; // Linesearch failure limit (consecutive iterations) double bigcenter = 1e+3; // mu is reduced if center < bigcenter. // Parameters for LSQR. double atolmin = eps; // Smallest atol if linesearch back-tracks double btol = 0; // Should be small (zero is ok) double show = false; // Controls lsqr iteration log /* double gamma = d1->infNorm(); double delta = d2->infNorm(); */ double gamma = d1; double delta = d2; printf("\n\nx0min = %8g featol = %8.1e", x0min, featol); printf( " d1max = %8.1e", gamma); printf( "\nz0min = %8g opttol = %8.1e", z0min, opttol); printf( " d2max = %8.1e", delta); printf( "\nmu0 = %8.1e steptol = %8g", mu0 , steptol); printf( " bigcenter= %8g" , bigcenter); printf("\n\nLSQR:"); printf("\natol1 = %8.1e atol2 = %8.1e", atol1 , atol2 ); printf( " btol = %8.1e", btol ); printf("\nconlim = %8.1e itnlim = %8d" , conlim, itnlim); printf( " show = %8g" , show ); // LSmethod = 3; ////// Hardwire LSQR // LSproblem = 1; ////// and LS problem defining "dy". /* if wait printf("\n\nReview parameters... then type "return"\n") keyboard end */ if (eta < 0) printf("\n\nLinesearch disabled by eta < 0"); //--------------------------------------------------------------------- // All parameters have now been set. //--------------------------------------------------------------------- double time = CoinCpuTime(); //bool useChol = (LSmethod == 1); //bool useQR = (LSmethod == 2); bool direct = (LSmethod <= 2 && ifexplicit); char solver[6]; strcpy(solver, " LSQR"); //--------------------------------------------------------------------- // Categorize bounds and allow for fixed variables by modifying b. //--------------------------------------------------------------------- int nlow, nupp, nfix; int *bptrs[3] = {0}; getBoundTypes(&nlow, &nupp, &nfix, bptrs ); int *low = bptrs[0]; int *upp = bptrs[1]; int *fix = bptrs[2]; int nU = n; if (nupp == 0) nU = 1; //Make dummy vectors if no Upper bounds //--------------------------------------------------------------------- // Get pointers to local copy of model bounds //--------------------------------------------------------------------- CoinDenseVector<double> bl(n, columnLower_); double *bl_elts = bl.getElements(); CoinDenseVector<double> bu(nU, columnUpper_); // this is dummy if no UB double *bu_elts = bu.getElements(); CoinDenseVector<double> r1(m, 0.0); double *r1_elts = r1.getElements(); CoinDenseVector<double> x1(n, 0.0); double *x1_elts = x1.getElements(); if (nfix > 0) { for (int k = 0; k < nfix; k++) x1_elts[fix[k]] = bl[fix[k]]; matVecMult(1, r1, x1); b = b - r1; // At some stage, might want to look at normfix = norm(r1,inf); } //--------------------------------------------------------------------- // Scale the input data. // The scaled variables are // xbar = x/beta, // ybar = y/zeta, // zbar = z/zeta. // Define // theta = beta*zeta; // The scaled function is // phibar = ( 1 /theta) fbar(beta*xbar), // gradient = (beta /theta) grad, // Hessian = (beta2/theta) hess. //--------------------------------------------------------------------- double beta = xsize_; if (beta == 0) beta = 1; // beta scales b, x. double zeta = zsize_; if (zeta == 0) zeta = 1; // zeta scales y, z. double theta = beta * zeta; // theta scales obj. // (theta could be anything, but theta = beta*zeta makes // scaled grad = grad/zeta = 1 approximately if zeta is chosen right.) for (int k = 0; k < nlow; k++) bl_elts[low[k]] = bl_elts[low[k]] / beta; for (int k = 0; k < nupp; k++) bu_elts[upp[k]] = bu_elts[upp[k]] / beta; d1 = d1 * ( beta / sqrt(theta) ); d2 = d2 * ( sqrt(theta) / beta ); double beta2 = beta * beta; b.scale( (1.0 / beta) ); y.scale( (1.0 / zeta) ); x.scale( (1.0 / beta) ); z.scale( (1.0 / zeta) ); //--------------------------------------------------------------------- // Initialize vectors that are not fully used if bounds are missing. //--------------------------------------------------------------------- CoinDenseVector<double> rL(n, 0.0); CoinDenseVector<double> cL(n, 0.0); CoinDenseVector<double> z1(n, 0.0); CoinDenseVector<double> dx1(n, 0.0); CoinDenseVector<double> dz1(n, 0.0); CoinDenseVector<double> r2(n, 0.0); // Assign upper bd regions (dummy if no UBs) CoinDenseVector<double> rU(nU, 0.0); CoinDenseVector<double> cU(nU, 0.0); CoinDenseVector<double> x2(nU, 0.0); CoinDenseVector<double> z2(nU, 0.0); CoinDenseVector<double> dx2(nU, 0.0); CoinDenseVector<double> dz2(nU, 0.0); //--------------------------------------------------------------------- // Initialize x, y, z, objective, etc. //--------------------------------------------------------------------- CoinDenseVector<double> dx(n, 0.0); CoinDenseVector<double> dy(m, 0.0); CoinDenseVector<double> Pr(m); CoinDenseVector<double> D(n); double *D_elts = D.getElements(); CoinDenseVector<double> w(n); double *w_elts = w.getElements(); CoinDenseVector<double> rhs(m + n); //--------------------------------------------------------------------- // Pull out the element array pointers for efficiency //--------------------------------------------------------------------- double *x_elts = x.getElements(); double *x2_elts = x2.getElements(); double *z_elts = z.getElements(); double *z1_elts = z1.getElements(); double *z2_elts = z2.getElements(); for (int k = 0; k < nlow; k++) { x_elts[low[k]] = CoinMax( x_elts[low[k]], bl[low[k]]); x1_elts[low[k]] = CoinMax( x_elts[low[k]] - bl[low[k]], x0min ); z1_elts[low[k]] = CoinMax( z_elts[low[k]], z0min ); } for (int k = 0; k < nupp; k++) { x_elts[upp[k]] = CoinMin( x_elts[upp[k]], bu[upp[k]]); x2_elts[upp[k]] = CoinMax(bu[upp[k]] - x_elts[upp[k]], x0min ); z2_elts[upp[k]] = CoinMax(-z_elts[upp[k]], z0min ); } //////////////////// Assume hessian is diagonal. ////////////////////// // [obj,grad,hess] = feval( Fname, (x*beta) ); x.scale(beta); double obj = getObj(x); CoinDenseVector<double> grad(n); getGrad(x, grad); CoinDenseVector<double> H(n); getHessian(x , H); x.scale((1.0 / beta)); //double * g_elts = grad.getElements(); double * H_elts = H.getElements(); obj /= theta; // Scaled obj. grad = grad * (beta / theta) + (d1 * d1) * x; // grad includes x regularization. H = H * (beta2 / theta) + (d1 * d1) ; // H includes x regularization. /*--------------------------------------------------------------------- // Compute primal and dual residuals: // r1 = b - Aprod(x) - d2*d2*y; // r2 = grad - Atprod(y) + z2 - z1; // rL = bl - x + x1; // rU = x + x2 - bu; */ //--------------------------------------------------------------------- // [r1,r2,rL,rU,Pinf,Dinf] = ... // pdxxxresid1( Aname,fix,low,upp, ... // b,bl,bu,d1,d2,grad,rL,rU,x,x1,x2,y,z1,z2 ); pdxxxresid1( this, nlow, nupp, nfix, low, upp, fix, b, bl_elts, bu_elts, d1, d2, grad, rL, rU, x, x1, x2, y, z1, z2, r1, r2, &Pinf, &Dinf); //--------------------------------------------------------------------- // Initialize mu and complementarity residuals: // cL = mu*e - X1*z1. // cU = mu*e - X2*z2. // // 25 Jan 2001: Now that b and obj are scaled (and hence x,y,z), // we should be able to use mufirst = mu0 (absolute value). // 0.1 worked poorly on StarTest1 with x0min = z0min = 0.1. // 29 Jan 2001: We might as well use mu0 = x0min * z0min; // so that most variables are centered after a warm start. // 29 Sep 2002: Use mufirst = mu0*(x0min * z0min), // regarding mu0 as a scaling of the initial center. //--------------------------------------------------------------------- // double mufirst = mu0*(x0min * z0min); double mufirst = mu0; // revert to absolute value double mulast = 0.1 * opttol; mulast = CoinMin( mulast, mufirst ); double mu = mufirst; double center, fmerit; pdxxxresid2( mu, nlow, nupp, low, upp, cL, cU, x1, x2, z1, z2, ¢er, &Cinf, &Cinf0 ); fmerit = pdxxxmerit(nlow, nupp, low, upp, r1, r2, rL, rU, cL, cU ); // Initialize other things. bool precon = true; double PDitns = 0; //bool converged = false; double atol = atol1; atol2 = CoinMax( atol2, atolmin ); atolmin = atol2; // pdDDD2 = d2; // Global vector for diagonal matrix D2 // Iteration log. int nf = 0; int itncg = 0; int nfail = 0; printf("\n\nItn mu stepx stepz Pinf Dinf"); printf(" Cinf Objective nf center"); if (direct) { printf("\n"); } else { printf(" atol solver Inexact\n"); } double regx = (d1 * x).twoNorm(); double regy = (d2 * y).twoNorm(); // regterm = twoNorm(d1.*x)^2 + norm(d2.*y)^2; double regterm = regx * regx + regy * regy; double objreg = obj + 0.5 * regterm; double objtrue = objreg * theta; printf("\n%3g ", PDitns ); printf("%6.1f%6.1f" , log10(Pinf ), log10(Dinf)); printf("%6.1f%15.7e", log10(Cinf0), objtrue ); printf(" %8.1f\n" , center ); /* if kminor printf("\n\nStart of first minor itn...\n"); keyboard end */ //--------------------------------------------------------------------- // Main loop. //--------------------------------------------------------------------- // Lsqr ClpLsqr thisLsqr(this); // while (converged) { while(PDitns < maxitn) { PDitns = PDitns + 1; // 31 Jan 2001: Set atol according to progress, a la Inexact Newton. // 07 Feb 2001: 0.1 not small enough for Satellite problem. Try 0.01. // 25 Apr 2001: 0.01 seems wasteful for Star problem. // Now that starting conditions are better, go back to 0.1. double r3norm = CoinMax(Pinf, CoinMax(Dinf, Cinf)); atol = CoinMin(atol, r3norm * 0.1); atol = CoinMax(atol, atolmin ); info.r3norm = r3norm; //------------------------------------------------------------------- // Define a damped Newton iteration for solving f = 0, // keeping x1, x2, z1, z2 > 0. We eliminate dx1, dx2, dz1, dz2 // to obtain the system // // [-H2 A" ] [ dx ] = [ w ], H2 = H + D1^2 + X1inv Z1 + X2inv Z2, // [ A D2^2] [ dy ] = [ r1] w = r2 - X1inv(cL + Z1 rL) // + X2inv(cU + Z2 rU), // // which is equivalent to the least-squares problem // // min || [ D A"]dy - [ D w ] ||, D = H2^{-1/2}. (*) // || [ D2 ] [D2inv r1] || //------------------------------------------------------------------- for (int k = 0; k < nlow; k++) H_elts[low[k]] = H_elts[low[k]] + z1[low[k]] / x1[low[k]]; for (int k = 0; k < nupp; k++) H[upp[k]] = H[upp[k]] + z2[upp[k]] / x2[upp[k]]; w = r2; for (int k = 0; k < nlow; k++) w[low[k]] = w[low[k]] - (cL[low[k]] + z1[low[k]] * rL[low[k]]) / x1[low[k]]; for (int k = 0; k < nupp; k++) w[upp[k]] = w[upp[k]] + (cU[upp[k]] + z2[upp[k]] * rU[upp[k]]) / x2[upp[k]]; if (LSproblem == 1) { //----------------------------------------------------------------- // Solve (*) for dy. //----------------------------------------------------------------- H = 1.0 / H; // H is now Hinv (NOTE!) for (int k = 0; k < nfix; k++) H[fix[k]] = 0; for (int k = 0; k < n; k++) D_elts[k] = sqrt(H_elts[k]); thisLsqr.borrowDiag1(D_elts); thisLsqr.diag2_ = d2; if (direct) { // Omit direct option for now } else {// Iterative solve using LSQR. //rhs = [ D.*w; r1./d2 ]; for (int k = 0; k < n; k++) rhs[k] = D_elts[k] * w_elts[k]; for (int k = 0; k < m; k++) rhs[n+k] = r1_elts[k] * (1.0 / d2); double damp = 0; if (precon) { // Construct diagonal preconditioner for LSQR matPrecon(d2, Pr, D); } /* rw(7) = precon; info.atolmin = atolmin; info.r3norm = fmerit; // Must be the 2-norm here. [ dy, istop, itncg, outfo ] = ... pdxxxlsqr( nb,m,"pdxxxlsqrmat",Aname,rw,rhs,damp, ... atol,btol,conlim,itnlim,show,info ); thisLsqr.input->rhs_vec = &rhs; thisLsqr.input->sol_vec = &dy; thisLsqr.input->rel_mat_err = atol; thisLsqr.do_lsqr(this); */ // New version of lsqr int istop; dy.clear(); show = false; info.atolmin = atolmin; info.r3norm = fmerit; // Must be the 2-norm here. thisLsqr.do_lsqr( rhs, damp, atol, btol, conlim, itnlim, show, info, dy , &istop, &itncg, &outfo, precon, Pr); if (precon) dy = dy * Pr; if (!precon && itncg > 999999) precon = true; if (istop == 3 || istop == 7 ) // conlim or itnlim printf("\n LSQR stopped early: istop = //%d", istop); atolold = outfo.atolold; atol = outfo.atolnew; r3ratio = outfo.r3ratio; }// LSproblem 1 // grad = pdxxxmat( Aname,2,m,n,dy ); // grad = A"dy grad.clear(); matVecMult(2, grad, dy); for (int k = 0; k < nfix; k++) grad[fix[k]] = 0; // grad is a work vector dx = H * (grad - w); } else { perror( "This LSproblem not yet implemented\n" ); } //------------------------------------------------------------------- CGitns += itncg; //------------------------------------------------------------------- // dx and dy are now known. Get dx1, dx2, dz1, dz2. //------------------------------------------------------------------- for (int k = 0; k < nlow; k++) { dx1[low[k]] = - rL[low[k]] + dx[low[k]]; dz1[low[k]] = (cL[low[k]] - z1[low[k]] * dx1[low[k]]) / x1[low[k]]; } for (int k = 0; k < nupp; k++) { dx2[upp[k]] = - rU[upp[k]] - dx[upp[k]]; dz2[upp[k]] = (cU[upp[k]] - z2[upp[k]] * dx2[upp[k]]) / x2[upp[k]]; } //------------------------------------------------------------------- // Find the maximum step. //-------------------------------------------------------------------- double stepx1 = pdxxxstep(nlow, low, x1, dx1 ); double stepx2 = inf; if (nupp > 0) stepx2 = pdxxxstep(nupp, upp, x2, dx2 ); double stepz1 = pdxxxstep( z1 , dz1 ); double stepz2 = inf; if (nupp > 0) stepz2 = pdxxxstep( z2 , dz2 ); double stepx = CoinMin( stepx1, stepx2 ); double stepz = CoinMin( stepz1, stepz2 ); stepx = CoinMin( steptol * stepx, 1.0 ); stepz = CoinMin( steptol * stepz, 1.0 ); if (stepSame) { // For NLPs, force same step stepx = CoinMin( stepx, stepz ); // (true Newton method) stepz = stepx; } //------------------------------------------------------------------- // Backtracking linesearch. //------------------------------------------------------------------- bool fail = true; nf = 0; while (nf < maxf) { nf = nf + 1; x = x + stepx * dx; y = y + stepz * dy; for (int k = 0; k < nlow; k++) { x1[low[k]] = x1[low[k]] + stepx * dx1[low[k]]; z1[low[k]] = z1[low[k]] + stepz * dz1[low[k]]; } for (int k = 0; k < nupp; k++) { x2[upp[k]] = x2[upp[k]] + stepx * dx2[upp[k]]; z2[upp[k]] = z2[upp[k]] + stepz * dz2[upp[k]]; } // [obj,grad,hess] = feval( Fname, (x*beta) ); x.scale(beta); obj = getObj(x); getGrad(x, grad); getHessian(x, H); x.scale((1.0 / beta)); obj /= theta; grad = grad * (beta / theta) + d1 * d1 * x; H = H * (beta2 / theta) + d1 * d1; // [r1,r2,rL,rU,Pinf,Dinf] = ... pdxxxresid1( this, nlow, nupp, nfix, low, upp, fix, b, bl_elts, bu_elts, d1, d2, grad, rL, rU, x, x1, x2, y, z1, z2, r1, r2, &Pinf, &Dinf ); //double center, Cinf, Cinf0; // [cL,cU,center,Cinf,Cinf0] = ... pdxxxresid2( mu, nlow, nupp, low, upp, cL, cU, x1, x2, z1, z2, ¢er, &Cinf, &Cinf0); double fmeritnew = pdxxxmerit(nlow, nupp, low, upp, r1, r2, rL, rU, cL, cU ); double step = CoinMin( stepx, stepz ); if (fmeritnew <= (1 - eta * step)*fmerit) { fail = false; break; } // Merit function didn"t decrease. // Restore variables to previous values. // (This introduces a little error, but save lots of space.) x = x - stepx * dx; y = y - stepz * dy; for (int k = 0; k < nlow; k++) { x1[low[k]] = x1[low[k]] - stepx * dx1[low[k]]; z1[low[k]] = z1[low[k]] - stepz * dz1[low[k]]; } for (int k = 0; k < nupp; k++) { x2[upp[k]] = x2[upp[k]] - stepx * dx2[upp[k]]; z2[upp[k]] = z2[upp[k]] - stepz * dz2[upp[k]]; } // Back-track. // If it"s the first time, // make stepx and stepz the same. if (nf == 1 && stepx != stepz) { stepx = step; } else if (nf < maxf) { stepx = stepx / 2; } stepz = stepx; } if (fail) { printf("\n Linesearch failed (nf too big)"); nfail += 1; } else { nfail = 0; } //------------------------------------------------------------------- // Set convergence measures. //-------------------------------------------------------------------- regx = (d1 * x).twoNorm(); regy = (d2 * y).twoNorm(); regterm = regx * regx + regy * regy; objreg = obj + 0.5 * regterm; objtrue = objreg * theta; bool primalfeas = Pinf <= featol; bool dualfeas = Dinf <= featol; bool complementary = Cinf0 <= opttol; bool enough = PDitns >= 4; // Prevent premature termination. bool converged = primalfeas & dualfeas & complementary & enough; //------------------------------------------------------------------- // Iteration log. //------------------------------------------------------------------- char str1[100], str2[100], str3[100], str4[100], str5[100]; sprintf(str1, "\n%3g%5.1f" , PDitns , log10(mu) ); sprintf(str2, "%8.5f%8.5f" , stepx , stepz ); if (stepx < 0.0001 || stepz < 0.0001) { sprintf(str2, " %6.1e %6.1e" , stepx , stepz ); } sprintf(str3, "%6.1f%6.1f" , log10(Pinf) , log10(Dinf)); sprintf(str4, "%6.1f%15.7e", log10(Cinf0), objtrue ); sprintf(str5, "%3d%8.1f" , nf , center ); if (center > 99999) { sprintf(str5, "%3d%8.1e" , nf , center ); } printf("%s%s%s%s%s", str1, str2, str3, str4, str5); if (direct) { // relax } else { printf(" %5.1f%7d%7.3f", log10(atolold), itncg, r3ratio); } //------------------------------------------------------------------- // Test for termination. //------------------------------------------------------------------- if (kminor) { printf( "\nStart of next minor itn...\n"); // keyboard; } if (converged) { printf("\n Converged"); break; } else if (PDitns >= maxitn) { printf("\n Too many iterations"); inform = 1; break; } else if (nfail >= maxfail) { printf("\n Too many linesearch failures"); inform = 2; break; } else { // Reduce mu, and reset certain residuals. double stepmu = CoinMin( stepx , stepz ); stepmu = CoinMin( stepmu, steptol ); double muold = mu; mu = mu - stepmu * mu; if (center >= bigcenter) mu = muold; // mutrad = mu0*(sum(Xz)/n); // 24 May 1998: Traditional value, but // mu = CoinMin(mu,mutrad ); // it seemed to decrease mu too much. mu = CoinMax(mu, mulast); // 13 Jun 1998: No need for smaller mu. // [cL,cU,center,Cinf,Cinf0] = ... pdxxxresid2( mu, nlow, nupp, low, upp, cL, cU, x1, x2, z1, z2, ¢er, &Cinf, &Cinf0 ); fmerit = pdxxxmerit( nlow, nupp, low, upp, r1, r2, rL, rU, cL, cU ); // Reduce atol for LSQR (and SYMMLQ). // NOW DONE AT TOP OF LOOP. atolold = atol; // if atol > atol2 // atolfac = (mu/mufirst)^0.25; // atol = CoinMax( atol*atolfac, atol2 ); // end // atol = CoinMin( atol, mu ); // 22 Jan 2001: a la Inexact Newton. // atol = CoinMin( atol, 0.5*mu ); // 30 Jan 2001: A bit tighter // If the linesearch took more than one function (nf > 1), // we assume the search direction needed more accuracy // (though this may be true only for LPs). // 12 Jun 1998: Ask for more accuracy if nf > 2. // 24 Nov 2000: Also if the steps are small. // 30 Jan 2001: Small steps might be ok with warm start. // 06 Feb 2001: Not necessarily. Reinstated tests in next line. if (nf > 2 || CoinMin( stepx, stepz ) <= 0.01) atol = atolold * 0.1; } //--------------------------------------------------------------------- // End of main loop. //--------------------------------------------------------------------- } for (int k = 0; k < nfix; k++) x[fix[k]] = bl[fix[k]]; z = z1; if (nupp > 0) z = z - z2; printf("\n\nmax |x| =%10.3f", x.infNorm() ); printf(" max |y| =%10.3f", y.infNorm() ); printf(" max |z| =%10.3f", z.infNorm() ); printf(" scaled"); x.scale(beta); y.scale(zeta); z.scale(zeta); // Unscale x, y, z. printf( "\nmax |x| =%10.3f", x.infNorm() ); printf(" max |y| =%10.3f", y.infNorm() ); printf(" max |z| =%10.3f", z.infNorm() ); printf(" unscaled\n"); time = CoinCpuTime() - time; char str1[100], str2[100]; sprintf(str1, "\nPDitns =%10g", PDitns ); sprintf(str2, "itns =%10d", CGitns ); // printf( [str1 " " solver str2] ); printf(" time =%10.1f\n", time); /* pdxxxdistrib( abs(x),abs(z) ); // Private function if (wait) keyboard; */ //----------------------------------------------------------------------- // End function pdco.m //----------------------------------------------------------------------- /* printf("Solution x values:\n\n"); for (int k=0; k<n; k++) printf(" %d %e\n", k, x[k]); */ // Print distribution double thresh[9] = { 0.00000001, 0.0000001, 0.000001, 0.00001, 0.0001, 0.001, 0.01, 0.1, 1.00001}; int counts[9] = {0}; for (int ij = 0; ij < n; ij++) { for (int j = 0; j < 9; j++) { if(x[ij] < thresh[j]) { counts[j] += 1; break; } } } printf ("Distribution of Solution Values\n"); for (int j = 8; j > 1; j--) printf(" %g to %g %d\n", thresh[j-1], thresh[j], counts[j]); printf(" Less than %g %d\n", thresh[2], counts[0]); return inform; }
void foo() { __block int i; i = rhs(); i += rhs(); }
int main (int argc, char* argv[]) { if(argc != 2){ cerr << "USAGE: " << argv[0] << " Jmax (= max. level of sparse grid)" << endl; exit(1); } //===============================================================// //========= PROBLEM SETUP =======================// //===============================================================// int d = 2; int d_ = 2; int j0 = 2; size_t Jmax = atoi(argv[1]); //getchar(); /// Basis initialization TrialBasis_Time basis_per(d,d_,j0); TestBasis_Time basis_int(d,d_,j0); Basis_Space basis_intbc(d,0); basis_intbc.enforceBoundaryCondition<DirichletBC>(); Basis2D_Trial basis2d_trial(basis_per,basis_intbc); Basis2D_Test basis2d_test(basis_int,basis_intbc); /// Initialization of operator // Bilinear Forms Convection1D_Time ConvectionBil_t(basis_per, basis_int); Identity1D_Time IdentityBil_t(basis_per, basis_int); Identity1D_Space IdentityBil_x(basis_intbc, basis_intbc); Laplace1D_Space LaplaceBil_x(basis_intbc, basis_intbc); RefConvection1D_Time RefConvectionBil_t(basis_per.refinementbasis, basis_int.refinementbasis); RefIdentity1D_Time RefIdentityBil_t(basis_per.refinementbasis, basis_int.refinementbasis); RefIdentity1D_Space RefIdentityBil_x(basis_intbc.refinementbasis, basis_intbc.refinementbasis); RefLaplace1D_Space RefLaplaceBil_x(basis_intbc.refinementbasis, basis_intbc.refinementbasis); // Transposed Bilinear Forms TranspConvection1D_Time TranspConvectionBil_t(basis_per, basis_int); TranspIdentity1D_Time TranspIdentityBil_t(basis_per, basis_int); TranspIdentity1D_Space TranspIdentityBil_x(basis_intbc, basis_intbc); TranspLaplace1D_Space TranspLaplaceBil_x(basis_intbc, basis_intbc); RefTranspConvection1D_Time RefTranspConvectionBil_t(basis_per.refinementbasis, basis_int.refinementbasis); RefTranspIdentity1D_Time RefTranspIdentityBil_t(basis_per.refinementbasis, basis_int.refinementbasis); RefTranspIdentity1D_Space RefTranspIdentityBil_x(basis_intbc.refinementbasis, basis_intbc.refinementbasis); RefTranspLaplace1D_Space RefTranspLaplaceBil_x(basis_intbc.refinementbasis, basis_intbc.refinementbasis); /// Initialization of local operator LOp_Conv1D_Time lOp_Conv1D_t(basis_int, basis_per, RefConvectionBil_t, ConvectionBil_t); LOp_Id1D_Time lOp_Id1D_t (basis_int, basis_per, RefIdentityBil_t, IdentityBil_t); LOp_Id1D_Space lOp_Id1D_x (basis_intbc, basis_intbc, RefIdentityBil_x, IdentityBil_x); LOp_Lapl1D_Space lOp_Lapl1D_x(basis_intbc, basis_intbc, RefLaplaceBil_x, LaplaceBil_x); LOpT_Conv1D_Time lOpT_Conv1D_t(basis_per, basis_int, RefTranspConvectionBil_t, TranspConvectionBil_t); LOpT_Id1D_Time lOpT_Id1D_t (basis_per, basis_int, RefTranspIdentityBil_t, TranspIdentityBil_t); LOpT_Id1D_Space lOpT_Id1D_x (basis_intbc, basis_intbc, RefTranspIdentityBil_x, TranspIdentityBil_x); LOpT_Lapl1D_Space lOpT_Lapl1D_x(basis_intbc, basis_intbc, RefTranspLaplaceBil_x, TranspLaplaceBil_x); LOp_Conv_Id_2D localConvectionIdentityOp2D(lOp_Conv1D_t, lOp_Id1D_x); LOp_Id_Lapl_2D localIdentityLaplaceOp2D(lOp_Id1D_t, lOp_Lapl1D_x); LOpT_Conv_Id_2D transpLocalConvectionIdentityOp2D(lOpT_Conv1D_t, lOpT_Id1D_x); LOpT_Id_Lapl_2D transpLocalIdentityLaplaceOp2D(lOpT_Id1D_t, lOpT_Lapl1D_x); localConvectionIdentityOp2D.setJ(9); localIdentityLaplaceOp2D.setJ(9); transpLocalConvectionIdentityOp2D.setJ(9); transpLocalIdentityLaplaceOp2D.setJ(9); // Use CompoundLocalOperator2D COp_Heat localOperator2D(localConvectionIdentityOp2D,localIdentityLaplaceOp2D); COpT_Heat transpLocalOperator2D(transpLocalConvectionIdentityOp2D,transpLocalIdentityLaplaceOp2D); // Use FlexibleCompoundLocalOperator2D // vector<AbstractLocalOperator2D<T>* > localOperatorVec, transpLocalOperatorVec; // localOperatorVec.push_back(&localConvectionIdentityOp2D); // localOperatorVec.push_back(&localIdentityLaplaceOp2D); // transpLocalOperatorVec.push_back(&transpLocalConvectionIdentityOp2D); // transpLocalOperatorVec.push_back(&transpLocalIdentityLaplaceOp2D); // FlexibleCompoundLocalOperator2D localOperator2D(localOperatorVec); // FlexibleCompoundLocalOperator2D transpLocalOperator2D(transpLocalOperatorVec); /// Initialization of preconditioner LeftPrec2D leftPrec(basis2d_test); RightPrec2D rightPrec(basis2d_trial); NoPrec2D noPrec; /// Initialization of rhs /// Right Hand Side: /// No Singular Supports in both dimensions DenseVectorT sing_support_x; DenseVectorT sing_support_t(n+1); for(size_t i = 0; i <= n; ++i){ sing_support_t(i+1) = i*l; } /// Forcing Functions SeparableFunction2D<T> F_fct(f_t, sing_support_t, f_x, sing_support_x); /// Peaks: points and corresponding coefficients /// (heights of jumps in derivatives) FullColMatrixT nodeltas; SeparableRhsIntegral2D rhs(basis2d_test, F_fct, nodeltas, nodeltas, 20); SeparableRhs F(rhs,noPrec); //===============================================================// //=============== AWGM =========================================// //===============================================================// /* AWGM PG Parameters Default Values double tol = 5e-03; double alpha = 0.7; size_t max_its = 100; size_t max_basissize = 400000; bool reset_res = false; bool print_info = true; bool verbose = true; bool plot_solution = false; bool verbose_extra = false; //(print added wavelet indizes) size_t hashmapsize_trial = 10; size_t hashmapsize_test = 10; std::string info_filename = "awgm_cgls_conv_info.txt"; std::string plot_filename = "awgm_cgls_u_plot"; bool write_intermediary_solutions = false; std::string intermediary_solutions_filename = "awgm_cgls_u"; */ /* IS Parameters Default Values bool adaptive_tol = true; size_t max_its = 100; double init_tol = 0.001; double res_reduction = 0.01; double absolute_tol = 1e-8; bool verbose = true; */ // MultitreeAWGM with default values //MT_AWGM multitree_awgm(basis2d_trial, basis2d_test, localOperator2D, transLocalOperator2D, // F, rightPrec, leftPrec); // If you want other parameters AWGM_PG_Parameters awgm_parameters; IS_Parameters cgls_parameters; // .... set them here: awgm_parameters.max_its = 0; awgm_parameters.tol = 1e-04; awgm_parameters.plot_solution = false; awgm_parameters.verbose_extra = false; awgm_parameters.info_filename = "awgm_ExSaw_SG_mv_conv_info.txt"; awgm_parameters.plot_filename = "awgm_ExSaw_SG_mv_u_plot"; awgm_parameters.write_intermediary_solutions = true; awgm_parameters.max_basissize = 1000000; cgls_parameters.adaptive_tol = true; cgls_parameters.init_tol = 1e-4; cgls_parameters.res_reduction = 0.01; cgls_parameters.max_its = 700; MT_AWGM multitree_awgm(basis2d_trial, basis2d_test, localOperator2D, transpLocalOperator2D, F, rightPrec, leftPrec, awgm_parameters, cgls_parameters); multitree_awgm.awgm_params.print(); multitree_awgm.is_params.print(); multitree_awgm.set_sol(dummy); for(size_t J = 2; J < Jmax; ++J){ stringstream filename; filename << "awgm_ExSaw_SG_mv_u_J_" << J; multitree_awgm.awgm_params.intermediary_solutions_filename = filename.str(); /// Initialization of solution vector and initial index sets Coefficients<Lexicographical,T,Index2D> u; T gamma = 0.2; IndexSet<Index2D> LambdaTrial, LambdaTest; getSparseGridIndexSet(basis2d_trial,LambdaTrial,J,0,gamma); getSparseGridIndexSet(basis2d_test ,LambdaTest ,J,1,gamma); Timer time; time.start(); multitree_awgm.solve(u, LambdaTrial, LambdaTest); time.stop(); cout << "Solution took " << time.elapsed() << " seconds" << endl; } return 0; }
ModelTracker::Transform ModelTracker::softPosit(unsigned int numImagePoints,ModelTracker::ImgPoint imagePoints[],const Transform& initialTransform) { typedef Transform::Vector Vector; /* Pre-transform the image points by the image transformation: */ for(unsigned int ipi=0;ipi<numImagePoints;++ipi) imagePoints[ipi]=imgTransform.transform(imagePoints[ipi]); /* Assign initial homogeneous weights to the model points: */ for(unsigned int mpi=0;mpi<numModelPoints;++mpi) mpws[mpi]=1.0; /* Create the assignment matrix: */ Math::Matrix m(numImagePoints+1,numModelPoints+1); /* Initialize the "slack" rows and columns: */ double gamma=1.0/double(Math::max(numImagePoints,numModelPoints)+1); for(unsigned int ipi=0;ipi<numImagePoints;++ipi) m(ipi,numModelPoints)=gamma; for(unsigned int mpi=0;mpi<numModelPoints;++mpi) m(numImagePoints,mpi)=gamma; m(numImagePoints,numModelPoints)=gamma; /* Initialize the pose vectors: */ Transform::Rotation inverseOrientation=Geometry::invert(initialTransform.getRotation()); Vector r1=inverseOrientation.getDirection(0); Vector r2=inverseOrientation.getDirection(1); Vector t=initialTransform.getTranslation(); double s=-f/t[2]; /* Perform the deterministic annealing loop: */ for(double beta=0.005;beta<=0.5;beta*=1.025) { /* Create the initial assignment matrix based on squared distances between projected object points and image points: */ for(unsigned int ipi=0;ipi<numImagePoints;++ipi) for(unsigned int mpi=0;mpi<numModelPoints;++mpi) { double d2=Math::sqr((r1*modelPoints[mpi]+t[0])*s-mpws[mpi]*imagePoints[ipi][0]) +Math::sqr((r2*modelPoints[mpi]+t[1])*s-mpws[mpi]*imagePoints[ipi][1]); m(ipi,mpi)=Math::exp(-beta*(d2-maxMatchDist2)); // DEBUGGING // std::cout<<' '<<d2; } // DEBUGGING // std::cout<<std::endl; /* Normalize the assignment matrix using Sinkhorn's method: */ double rowMaxDelta,colMaxDelta; do { /* Normalize image point rows: */ rowMaxDelta=0.0; for(unsigned int ipi=0;ipi<numImagePoints;++ipi) { /* Calculate the row sum: */ double rowSum=0.0; for(unsigned int mpi=0;mpi<numModelPoints+1;++mpi) rowSum+=m(ipi,mpi); /* Normalize the row: */ for(unsigned int mpi=0;mpi<numModelPoints+1;++mpi) { double oldM=m(ipi,mpi); m(ipi,mpi)/=rowSum; rowMaxDelta=Math::max(rowMaxDelta,Math::abs(m(ipi,mpi)-oldM)); } } /* Normalize model point columns: */ colMaxDelta=0.0; for(unsigned int mpi=0;mpi<numModelPoints;++mpi) { /* Calculate the column sum: */ double colSum=0.0; for(unsigned int ipi=0;ipi<numImagePoints+1;++ipi) colSum+=m(ipi,mpi); /* Normalize the column: */ for(unsigned int ipi=0;ipi<numImagePoints+1;++ipi) { double oldM=m(ipi,mpi); m(ipi,mpi)/=colSum; colMaxDelta=Math::max(colMaxDelta,Math::abs(m(ipi,mpi)-oldM)); } } } while(rowMaxDelta+colMaxDelta>1.0e-4); /* Compute the left-hand side of the pose alignment linear system: */ Math::Matrix lhs(4,4,0.0); for(unsigned int mpi=0;mpi<numModelPoints;++mpi) { const Point& mp=modelPoints[mpi]; /* Calculate the linear equation weight for the model point: */ double mpWeight=0.0; for(unsigned int ipi=0;ipi<numImagePoints;++ipi) mpWeight+=m(ipi,mpi); /* Enter the model point into the pose alignment linear system: */ for(int i=0;i<3;++i) { for(int j=0;j<3;++j) lhs(i,j)+=mp[i]*mp[j]*mpWeight; lhs(i,3)+=mp[i]*mpWeight; } for(int j=0;j<3;++j) lhs(3,j)+=mp[j]*mpWeight; lhs(3,3)+=mpWeight; } /* Invert the left-hand side matrix: */ Math::Matrix lhsInv; try { lhsInv=lhs.inverseFullPivot(); } catch(Math::Matrix::RankDeficientError) { std::cerr<<"Left-hand side matrix is rank deficient"<<std::endl; for(int i=0;i<4;++i) { for(int j=0;j<4;++j) std::cerr<<" "<<lhs(i,j); std::cerr<<std::endl; } std::cerr<<"Assignment matrix:"<<std::endl; for(unsigned int i=0;i<=numImagePoints;++i) { for(unsigned int j=0;j<=numModelPoints;++j) std::cerr<<" "<<m(i,j); std::cerr<<std::endl; } return Transform::identity; } /* Perform a fixed number of iterations of POSIT: */ for(unsigned int iteration=0;iteration<2U;++iteration) { /* Compute the right-hand side of the pose alignment linear system: */ Math::Matrix rhs(4,2,0.0); for(unsigned int mpi=0;mpi<numModelPoints;++mpi) { const Point& mp=modelPoints[mpi]; /* Enter the model point into the pose alignment linear system: */ double sumX=0.0; double sumY=0.0; for(unsigned int ipi=0;ipi<numImagePoints;++ipi) { sumX+=m(ipi,mpi)*imagePoints[ipi][0]; sumY+=m(ipi,mpi)*imagePoints[ipi][1]; } sumX*=mpws[mpi]; sumY*=mpws[mpi]; for(int i=0;i<3;++i) { rhs(i,0)+=sumX*mp[i]; rhs(i,1)+=sumY*mp[i]; } rhs(3,0)+=sumX; rhs(3,1)+=sumY; } /* Solve the pose alignment system: */ Math::Matrix pose=lhsInv*rhs; for(int i=0;i<3;++i) { r1[i]=pose(i,0); r2[i]=pose(i,1); } /* Orthonormalize the pose vectors: */ double s1=r1.mag(); double s2=r2.mag(); Vector r3=Geometry::normalize(r1^r2); Vector mid=r1/s1+r2/s2; mid/=mid.mag()*Math::sqrt(2.0); Vector mid2=r3^mid; r1=mid-mid2; r2=mid+mid2; s=Math::sqrt(s1*s2); t[0]=pose(3,0)/s; t[1]=pose(3,1)/s; t[2]=-f/s; /* Update the object points' homogeneous weights: */ for(unsigned int mpi=0;mpi<numModelPoints;++mpi) mpws[mpi]=(r3*modelPoints[mpi])/t[2]+1.0; } // DEBUGGING // std::cout<<"Intermediate: "<<Transform(t,Geometry::invert(Transform::Rotation::fromBaseVectors(r1,r2)))<<std::endl; } // DEBUGGING std::cerr<<"Final assignment matrix:"<<std::endl; for(unsigned int i=0;i<=numImagePoints;++i) { for(unsigned int j=0;j<=numModelPoints;++j) std::cerr<<" "<<m(i,j); std::cerr<<std::endl; } /* Return the result transformation: */ return Transform(t,Geometry::invert(Transform::Rotation::fromBaseVectors(r1,r2))); }
int CVODEModel::evaluateRHSFunction( double time, SundialsAbstractVector* y, SundialsAbstractVector* y_dot) { /* * Convert Sundials vectors to SAMRAI vectors */ std::shared_ptr<SAMRAIVectorReal<double> > y_samvect( Sundials_SAMRAIVector::getSAMRAIVector(y)); std::shared_ptr<SAMRAIVectorReal<double> > y_dot_samvect( Sundials_SAMRAIVector::getSAMRAIVector(y_dot)); std::shared_ptr<PatchHierarchy> hierarchy(y_samvect->getPatchHierarchy()); /* * Compute max norm of solution vector. */ //std::shared_ptr<HierarchyDataOpsReal<double> > hierops( // new HierarchyCellDataOpsReal<double>(hierarchy)); //double max_norm = hierops->maxNorm(y_samvect-> // getComponentDescriptorIndex(0)); if (d_print_solver_info) { pout << "\t\tEval RHS: " << "\n \t\t\ttime = " << time << "\n \t\t\ty_maxnorm = " << y_samvect->maxNorm() << endl; } /* * Allocate scratch space and fill ghost cells in the solution vector * 1) Create a refine algorithm * 2) Register with the algorithm the current & scratch space, along * with a refine operator. * 3) Use the refine algorithm to construct a refine schedule * 4) Use the refine schedule to fill data on fine level. */ std::shared_ptr<RefineAlgorithm> bdry_fill_alg( new RefineAlgorithm()); std::shared_ptr<RefineOperator> refine_op(d_grid_geometry-> lookupRefineOperator(d_soln_var, "CONSERVATIVE_LINEAR_REFINE")); bdry_fill_alg->registerRefine(d_soln_scr_id, // dest y_samvect-> getComponentDescriptorIndex(0), // src d_soln_scr_id, // scratch refine_op); for (int ln = hierarchy->getFinestLevelNumber(); ln >= 0; --ln) { std::shared_ptr<PatchLevel> level(hierarchy->getPatchLevel(ln)); if (!level->checkAllocated(d_soln_scr_id)) { level->allocatePatchData(d_soln_scr_id); } // Note: a pointer to "this" tells the refine schedule to invoke // the setPhysicalBCs defined in this class. std::shared_ptr<RefineSchedule> bdry_fill_alg_schedule( bdry_fill_alg->createSchedule(level, ln - 1, hierarchy, this)); bdry_fill_alg_schedule->fillData(time); } /* * Step through the levels and compute rhs */ for (int ln = hierarchy->getFinestLevelNumber(); ln >= 0; --ln) { std::shared_ptr<PatchLevel> level(hierarchy->getPatchLevel(ln)); for (PatchLevel::iterator ip(level->begin()); ip != level->end(); ++ip) { const std::shared_ptr<Patch>& patch = *ip; std::shared_ptr<CellData<double> > y( SAMRAI_SHARED_PTR_CAST<CellData<double>, PatchData>( patch->getPatchData(d_soln_scr_id))); std::shared_ptr<SideData<double> > diff( SAMRAI_SHARED_PTR_CAST<SideData<double>, PatchData>( patch->getPatchData(d_diff_id))); std::shared_ptr<CellData<double> > rhs( SAMRAI_SHARED_PTR_CAST<CellData<double>, PatchData>( patch->getPatchData(y_dot_samvect->getComponentDescriptorIndex(0)))); TBOX_ASSERT(y); TBOX_ASSERT(diff); TBOX_ASSERT(rhs); const Index ifirst(patch->getBox().lower()); const Index ilast(patch->getBox().upper()); const std::shared_ptr<CartesianPatchGeometry> patch_geom( SAMRAI_SHARED_PTR_CAST<CartesianPatchGeometry, PatchGeometry>( patch->getPatchGeometry())); TBOX_ASSERT(patch_geom); const double* dx = patch_geom->getDx(); IntVector ghost_cells(y->getGhostCellWidth()); /* * 1 eqn radiation diffusion */ if (d_dim == Dimension(2)) { SAMRAI_F77_FUNC(comprhs2d, COMPRHS2D) ( ifirst(0), ilast(0), ifirst(1), ilast(1), ghost_cells(0), ghost_cells(1), dx, y->getPointer(), diff->getPointer(0), diff->getPointer(1), rhs->getPointer()); } else if (d_dim == Dimension(3)) { SAMRAI_F77_FUNC(comprhs3d, COMPRHS3D) ( ifirst(0), ilast(0), ifirst(1), ilast(1), ifirst(2), ilast(2), ghost_cells(0), ghost_cells(1), ghost_cells(2), dx, y->getPointer(), diff->getPointer(0), diff->getPointer(1), diff->getPointer(2), rhs->getPointer()); } } // loop over patches } // loop over levels /* * Deallocate scratch space. */ for (int ln = hierarchy->getFinestLevelNumber(); ln >= 0; --ln) { hierarchy->getPatchLevel(ln)->deallocatePatchData(d_soln_scr_id); } /* * record current time and increment counter for number of RHS * evaluations. */ d_current_soln_time = time; ++d_number_rhs_eval; return 0; }
//-------------------------------------------------------------------------- //-------- execute --------------------------------------------------------- //-------------------------------------------------------------------------- void AssembleContinuityEdgeSolverAlgorithm::execute() { stk::mesh::MetaData & meta_data = realm_.meta_data(); const int nDim = meta_data.spatial_dimension(); // extract noc const std::string dofName = "pressure"; const double nocFac = (realm_.get_noc_usage(dofName) == true) ? 1.0 : 0.0; // time step const double dt = realm_.get_time_step(); const double gamma1 = realm_.get_gamma1(); const double projTimeScale = dt/gamma1; // deal with interpolation procedure const double interpTogether = realm_.get_mdot_interp(); const double om_interpTogether = 1.0-interpTogether; // space for LHS/RHS; always nodesPerEdge*nodesPerEdge and nodesPerEdge std::vector<double> lhs(4); std::vector<double> rhs(2); std::vector<stk::mesh::Entity> connected_nodes(2); // area vector; gather into std::vector<double> areaVec(nDim); // pointers for fast access double *p_lhs = &lhs[0]; double *p_rhs = &rhs[0]; double *p_areaVec = &areaVec[0]; // mesh motion std::vector<double> vrtmL(nDim); std::vector<double> vrtmR(nDim); double * p_vrtmL = &vrtmL[0]; double * p_vrtmR = &vrtmR[0]; // deal with state VectorFieldType &velocityNp1 = velocity_->field_of_state(stk::mesh::StateNP1); ScalarFieldType &densityNp1 = density_->field_of_state(stk::mesh::StateNP1); // define some common selectors stk::mesh::Selector s_locally_owned_union = meta_data.locally_owned_part() &stk::mesh::selectUnion(partVec_); stk::mesh::BucketVector const& edge_buckets = realm_.get_buckets( stk::topology::EDGE_RANK, s_locally_owned_union ); for ( stk::mesh::BucketVector::const_iterator ib = edge_buckets.begin(); ib != edge_buckets.end() ; ++ib ) { stk::mesh::Bucket & b = **ib ; const stk::mesh::Bucket::size_type length = b.size(); // pointer to edge area vector const double * av = stk::mesh::field_data(*edgeAreaVec_, b); for ( stk::mesh::Bucket::size_type k = 0 ; k < length ; ++k ) { // sanity check on number or nodes ThrowAssert( b.num_nodes(k) == 2 ); stk::mesh::Entity const * edge_node_rels = b.begin_nodes(k); // pointer to edge area vector for ( int j = 0; j < nDim; ++j ) p_areaVec[j] = av[k*nDim+j]; // left and right nodes stk::mesh::Entity nodeL = edge_node_rels[0]; stk::mesh::Entity nodeR = edge_node_rels[1]; connected_nodes[0] = nodeL; connected_nodes[1] = nodeR; // extract nodal fields const double * coordL = stk::mesh::field_data(*coordinates_, nodeL); const double * coordR = stk::mesh::field_data(*coordinates_, nodeR); const double * GpdxL = stk::mesh::field_data(*Gpdx_, nodeL); const double * GpdxR = stk::mesh::field_data(*Gpdx_, nodeR); const double * velocityNp1L = stk::mesh::field_data(velocityNp1, nodeL); const double * velocityNp1R = stk::mesh::field_data(velocityNp1, nodeR); const double pressureL = *stk::mesh::field_data(*pressure_, nodeL); const double pressureR = *stk::mesh::field_data(*pressure_, nodeR); const double densityL = *stk::mesh::field_data(densityNp1, nodeL); const double densityR = *stk::mesh::field_data(densityNp1, nodeR); // copy to velcoity relative to mesh for ( int j = 0; j < nDim; ++j ) { p_vrtmL[j] = velocityNp1L[j]; p_vrtmR[j] = velocityNp1R[j]; } // deal with mesh motion if ( meshMotion_ ) { const double * meshVelocityL = stk::mesh::field_data(*meshVelocity_, nodeL ); const double * meshVelocityR = stk::mesh::field_data(*meshVelocity_, nodeR ); for (int j = 0; j < nDim; ++j ) { p_vrtmL[j] -= meshVelocityL[j]; p_vrtmR[j] -= meshVelocityR[j]; } } // compute geometry double axdx = 0.0; double asq = 0.0; for ( int j = 0; j < nDim; ++j ) { const double axj = p_areaVec[j]; const double dxj = coordR[j] - coordL[j]; asq += axj*axj; axdx += axj*dxj; } const double inv_axdx = 1.0/axdx; const double rhoIp = 0.5*(densityR + densityL); // mdot double tmdot = -projTimeScale*(pressureR - pressureL)*asq*inv_axdx; for ( int j = 0; j < nDim; ++j ) { const double axj = p_areaVec[j]; const double dxj = coordR[j] - coordL[j]; const double kxj = axj - asq*inv_axdx*dxj; // NOC const double rhoUjIp = 0.5*(densityR*p_vrtmR[j] + densityL*p_vrtmL[j]); const double ujIp = 0.5*(p_vrtmR[j] + p_vrtmL[j]); const double GjIp = 0.5*(GpdxR[j] + GpdxL[j]); tmdot += (interpTogether*rhoUjIp + om_interpTogether*rhoIp*ujIp + projTimeScale*GjIp)*axj - projTimeScale*kxj*GjIp*nocFac; } const double lhsfac = -asq*inv_axdx; /* lhs[0] = IL,IL; lhs[1] = IL,IR; IR,IL; IR,IR */ // first left p_lhs[0] = -lhsfac; p_lhs[1] = +lhsfac; p_rhs[0] = -tmdot/projTimeScale; // now right p_lhs[2] = +lhsfac; p_lhs[3] = -lhsfac; p_rhs[1] = tmdot/projTimeScale; apply_coeff(connected_nodes, rhs, lhs, __FILE__); } } }
int main(int argc, char* argv[]) { BoxLib::Initialize(argc,argv); BL_PROFILE_VAR("main()", pmain); std::cout << std::setprecision(15); solver_type = BoxLib_C; bc_type = Periodic; Real a = 0.0; Real b = 1.0; // ---- First use the number of processors to decide how many grids you have. // ---- We arbitrarily decide to have one grid per MPI process in a uniform // ---- cubic domain, so we require that the number of processors be N^3. // ---- This requirement is somewhat arbitrary, but convenient for now. int nprocs = ParallelDescriptor::NProcs(); // N is the cube root of the number of processors int N(0); for(int i(1); i*i*i <= nprocs; ++i) { if(i*i*i == nprocs) { N = i; } } if(N == 0) { // not a cube if(ParallelDescriptor::IOProcessor()) { std::cerr << "**** Error: nprocs = " << nprocs << " is not currently supported." << std::endl; } BoxLib::Error("We require that the number of processors be a perfect cube"); } if(ParallelDescriptor::IOProcessor()) { std::cout << "N = " << N << std::endl; } // ---- make a box, then a boxarray with maxSize int domain_hi = (N*maxGrid) - 1; Box domain(IntVect(0,0,0), IntVect(domain_hi,domain_hi,domain_hi)); BoxArray bs(domain); bs.maxSize(maxGrid); // This defines the physical size of the box. Right now the box is [0,1] in each direction. RealBox real_box; for (int n = 0; n < BL_SPACEDIM; n++) { real_box.setLo(n, 0.0); real_box.setHi(n, 1.0); } // This says we are using Cartesian coordinates int coord = 0; // This sets the boundary conditions to be periodic or not int is_per[BL_SPACEDIM]; if (bc_type == Dirichlet || bc_type == Neumann) { for (int n = 0; n < BL_SPACEDIM; n++) is_per[n] = 0; } else { for (int n = 0; n < BL_SPACEDIM; n++) is_per[n] = 1; } // This defines a Geometry object which is useful for writing the plotfiles Geometry geom(domain,&real_box,coord,is_per); for ( int n=0; n<BL_SPACEDIM; n++ ) { dx[n] = ( geom.ProbHi(n) - geom.ProbLo(n) )/domain.length(n); } if (ParallelDescriptor::IOProcessor()) { std::cout << "Domain size : " << N << std::endl; std::cout << "Max_grid_size : " << maxGrid << std::endl; std::cout << "Number of grids : " << bs.size() << std::endl; } // Allocate and define the right hand side. MultiFab rhs(bs, Ncomp, 0, Fab_allocate); setup_rhs(rhs, geom, a, b); MultiFab alpha(bs, Ncomp, 0, Fab_allocate); MultiFab beta[BL_SPACEDIM]; for ( int n=0; n<BL_SPACEDIM; ++n ) { BoxArray bx(bs); beta[n].define(bx.surroundingNodes(n), Ncomp, 1, Fab_allocate); } setup_coeffs(bs, alpha, beta, geom); MultiFab anaSoln; if (comp_norm) { anaSoln.define(bs, Ncomp, 0, Fab_allocate); compute_analyticSolution(anaSoln); } // Allocate the solution array // Set the number of ghost cells in the solution array. MultiFab soln(bs, Ncomp, 1, Fab_allocate); solve(soln, anaSoln, a, b, alpha, beta, rhs, bs, geom, BoxLib_C); BL_PROFILE_VAR_STOP(pmain); BoxLib::Finalize(); }
void CBS :: solveYourselfAt(TimeStep *tStep) { int momneq = this->giveNumberOfDomainEquations(1, vnum); int presneq = this->giveNumberOfDomainEquations(1, pnum); int presneq_prescribed = this->giveNumberOfDomainEquations(1, pnumPrescribed); double deltaT = tStep->giveTimeIncrement(); FloatArray rhs(momneq); if ( initFlag ) { deltaAuxVelocity.resize(momneq); nodalPrescribedTractionPressureConnectivity.resize(presneq_prescribed); nodalPrescribedTractionPressureConnectivity.zero(); this->assembleVectorFromElements( nodalPrescribedTractionPressureConnectivity, tStep, NumberOfNodalPrescribedTractionPressureAssembler(), VM_Total, pnumPrescribed, this->giveDomain(1) ); lhs.reset( classFactory.createSparseMtrx(sparseMtrxType) ); if ( !lhs ) { OOFEM_ERROR("sparse matrix creation failed"); } lhs->buildInternalStructure(this, 1, pnum); this->assemble( *lhs, stepWhenIcApply.get(), PressureLhsAssembler(), pnum, this->giveDomain(1) ); lhs->times(deltaT * theta1 * theta2); if ( consistentMassFlag ) { mss.reset( classFactory.createSparseMtrx(sparseMtrxType) ); if ( !mss ) { OOFEM_ERROR("sparse matrix creation failed"); } mss->buildInternalStructure(this, 1, vnum); this->assemble( *mss, stepWhenIcApply.get(), MassMatrixAssembler(), vnum, this->giveDomain(1) ); } else { mm.resize(momneq); mm.zero(); this->assembleVectorFromElements( mm, tStep, LumpedMassVectorAssembler(), VM_Total, vnum, this->giveDomain(1) ); } //<RESTRICTED_SECTION> // init material interface if ( materialInterface ) { materialInterface->initialize(); } //</RESTRICTED_SECTION> initFlag = 0; } //<RESTRICTED_SECTION> else if ( materialInterface ) { lhs->zero(); this->assemble( *lhs, stepWhenIcApply.get(), PressureLhsAssembler(), pnum, this->giveDomain(1) ); lhs->times(deltaT * theta1 * theta2); if ( consistentMassFlag ) { mss->zero(); this->assemble( *mss, stepWhenIcApply.get(), MassMatrixAssembler(), vnum, this->giveDomain(1) ); } else { mm.zero(); this->assembleVectorFromElements( mm, tStep, LumpedMassVectorAssembler(), VM_Total, vnum, this->giveDomain(1) ); } } //</RESTRICTED_SECTION> if ( tStep->isTheFirstStep() ) { TimeStep *stepWhenIcApply = tStep->givePreviousStep(); this->applyIC(stepWhenIcApply); } VelocityField.advanceSolution(tStep); PressureField.advanceSolution(tStep); FloatArray *velocityVector = VelocityField.giveSolutionVector(tStep); FloatArray *prevVelocityVector = VelocityField.giveSolutionVector( tStep->givePreviousStep() ); FloatArray *pressureVector = PressureField.giveSolutionVector(tStep); FloatArray *prevPressureVector = PressureField.giveSolutionVector( tStep->givePreviousStep() ); velocityVector->resize(momneq); pressureVector->resize(presneq); /* STEP 1 - calculates auxiliary velocities*/ rhs.zero(); // Depends on old v: this->assembleVectorFromElements( rhs, tStep, IntermediateConvectionDiffusionAssembler(), VM_Total, vnum, this->giveDomain(1) ); //this->assembleVectorFromElements(mm, tStep, LumpedMassVectorAssembler(), VM_Total, this->giveDomain(1)); if ( consistentMassFlag ) { rhs.times(deltaT); // Depends on prescribed v this->assembleVectorFromElements( rhs, tStep, PrescribedVelocityRhsAssembler(), VM_Total, vnum, this->giveDomain(1) ); nMethod->solve(*mss, rhs, deltaAuxVelocity); } else { for ( int i = 1; i <= momneq; i++ ) { deltaAuxVelocity.at(i) = deltaT * rhs.at(i) / mm.at(i); } } /* STEP 2 - calculates pressure (implicit solver) */ this->prescribedTractionPressure.resize(presneq_prescribed); this->prescribedTractionPressure.zero(); this->assembleVectorFromElements( prescribedTractionPressure, tStep, DensityPrescribedTractionPressureAssembler(), VM_Total, pnumPrescribed, this->giveDomain(1) ); for ( int i = 1; i <= presneq_prescribed; i++ ) { prescribedTractionPressure.at(i) /= nodalPrescribedTractionPressureConnectivity.at(i); } // DensityRhsVelocityTerms needs this: Current velocity without correction; * velocityVector = * prevVelocityVector; velocityVector->add(this->theta1, deltaAuxVelocity); // Depends on old V + deltaAuxV * theta1 and p: rhs.resize(presneq); rhs.zero(); this->assembleVectorFromElements( rhs, tStep, DensityRhsAssembler(), VM_Total, pnum, this->giveDomain(1) ); this->giveNumericalMethod( this->giveCurrentMetaStep() ); nMethod->solve(*lhs, rhs, *pressureVector); pressureVector->times(this->theta2); pressureVector->add(* prevPressureVector); /* STEP 3 - velocity correction step */ rhs.resize(momneq); rhs.zero(); // Depends on p: this->assembleVectorFromElements( rhs, tStep, CorrectionRhsAssembler(), VM_Total, vnum, this->giveDomain(1) ); if ( consistentMassFlag ) { rhs.times(deltaT); //this->assembleVectorFromElements(rhs, tStep, PrescribedRhsAssembler(), VM_Incremental, vnum, this->giveDomain(1)); nMethod->solve(*mss, rhs, *velocityVector); velocityVector->add(deltaAuxVelocity); velocityVector->add(* prevVelocityVector); } else { for ( int i = 1; i <= momneq; i++ ) { velocityVector->at(i) = prevVelocityVector->at(i) + deltaAuxVelocity.at(i) + deltaT *rhs.at(i) / mm.at(i); } } // update solution state counter tStep->incrementStateCounter(); //<RESTRICTED_SECTION> if ( materialInterface ) { #ifdef TIME_REPORT Timer timer; timer.startTimer(); #endif materialInterface->updatePosition( this->giveCurrentStep() ); #ifdef TIME_REPORT timer.stopTimer(); OOFEM_LOG_INFO( "CBS info: user time consumed by updating interfaces: %.2fs\n", timer.getUtime() ); #endif } //</RESTRICTED_SECTION> }