void consumer(void) { int i; unsigned long long start, end, c_sum = 0, avg, dev = 0; rdtscll(start); for (i = 0 ; i < RB_ITER ; i++) { c_meas[i] = __c(); c_sum += c_meas[i]; } rdtscll(end); avg = sum/RB_ITER; for (i = 0 ; i < RB_ITER ; i++) { unsigned long long diff = (c_meas[i] > avg) ? c_meas[i] - avg : avg - c_meas[i]; dev += (diff*diff); // printf("%llu, diff %llu\n", c_meas[i], diff); } dev /= RB_ITER; printf("one way trip deviation^2 = %llu\n", dev); printf("one way: %d\n", sum / RB_ITER); // printf("RPC pipe: Consumer: %lld\n", avg); }
_Tp __p(unsigned int __k, _Tp __g) { const auto _S_pi = _Tp{3.1415926535897932384626433832795029L}; auto __fact = std::sqrt(_Tp{2} / _S_pi); auto __sum = __c(2 * __k + 1, 1) * __fact * std::exp(__g + 0.5) / std::sqrt(__g + 0.5); for (int __a = 1; __a <= __k; ++__a) { __fact *= _Tp(2 * __a - 1) / 2; __sum += __c(2 * __k + 1, 2 * __a + 1) * __fact * std::pow(__a + __g + 0.5L, -_Tp(__a + 0.5L)) * std::exp(__a + __g + 0.5L); } return __sum; }
Uint32 to_pixel(SDL_PixelFormat *fmt, double value) { double v = (value + (double) 1) / (double) 2; int a = 255; double r, g, b; if(v <= 0.5) { // water double p = v / 0.5; r = 0; g = 0; b = __i(p, 0.5, 1); } else if(v <= 0.53) { // beach double p = (v - 0.5) / 0.03; p = sqrt(p); r = __i(p, 0, 1); g = __i(p, 0, 1); b = __i(p, 1, 0.5); } else if(v <= 0.56) { // to-grass double p = (v - 0.53) / 0.03; p = __c(p); r = __i(p, 1, 0.5); g = 1; b = 0.5; } else if(v <= 0.7) { // grass r = 0.5; g = 1; b = 0.5; } else if(v <= 0.71) { // to-mountain double p = (v - 0.7) / 0.01; r = 0.5; g = __i(p, 1, 0.5); b = 0.5; } else if(v <= 0.80) { // mountain double p = (v - 0.71) / 0.09; r = __i(p, 0.5, 1); g = __i(p, 0.5, 1); b = __i(p, 0.5, 1); } else { // snow r = 1; g = 1; b = 1; } return SDL_MapRGBA(fmt, (int)(r * (double) 255), (int)(g * (double) 255), (int)(b * (double) 255), a ); }
// Checbyshev coefficient matrix. int __c(unsigned int __n, unsigned int __k) { if (__k > __n) return 0; else if (__n == 1) return 1; else if (__n == 2) { if (__k == 1) return 0; else if (__k == 2) return 1; } else { if (__k == 1) return -__c(__n - 2, 1); else if (__k == __n) return 2 * __c(__n - 1, __k - 1); else return 2 * __c(__n - 1, __k - 1) - __c(__n - 2, __k); } }
typename Evaluator<iDim, Iterator, Pset, ExprT>::eval_element_type Evaluator<iDim, Iterator, Pset, ExprT>::operator()( mpl::size_t<MESH_FACES> ) const { boost::timer __timer; VLOG(2) << "evaluator(MESH_FACES) " << "\n"; // // a few typedefs // // mesh element typedef typename mesh_element_type::entity_type geoelement_type; //typedef typename geoelement_type::face_type face_type; typedef mesh_element_type face_type; // geometric mapping context typedef typename geoelement_type::gm_type gm_type; typedef boost::shared_ptr<gm_type> gm_ptrtype; typedef typename geoelement_type::gm1_type gm1_type; typedef boost::shared_ptr<gm1_type> gm1_ptrtype; typedef typename gm_type::template Context<context, geoelement_type> gmc_type; typedef boost::shared_ptr<gmc_type> gmc_ptrtype; typedef fusion::map<fusion::pair<vf::detail::gmc<0>, gmc_ptrtype> > map_gmc_type; typedef typename gm1_type::template Context<context, geoelement_type> gmc1_type; typedef boost::shared_ptr<gmc1_type> gmc1_ptrtype; typedef fusion::map<fusion::pair<vf::detail::gmc<0>, gmc1_ptrtype> > map_gmc1_type; // expression //typedef typename expression_type::template tensor<map_gmc_type,fecontext_type> t_expr_type; //typedef decltype( basis_type::isomorphism( M_expr ) ) the_expression_type; typedef expression_type the_expression_type; typedef typename boost::remove_reference<typename boost::remove_const<the_expression_type>::type >::type iso_expression_type; typedef typename iso_expression_type::template tensor<map_gmc_type> t_expr_type; typedef typename iso_expression_type::template tensor<map_gmc1_type> t_expr1_type; typedef typename t_expr_type::shape shape; // // start // iterator_type __face_it, __face_en; boost::tie( boost::tuples::ignore, __face_it, __face_en ) = M_range; int npoints = M_pset.fpoints(0,1).size2(); element_type __v( M_pset.fpoints(0,1).size2()*std::distance( __face_it, __face_en )*shape::M ); node_type __p( mesh_element_type::nRealDim, M_pset.fpoints(0,1).size2()*std::distance( __face_it, __face_en ) ); __v.setZero(); __p.setZero(); VLOG(2) << "pset: " << M_pset.fpoints(0,1); VLOG(2) << "Checking trivial result..."; if ( __face_it == __face_en ) return boost::make_tuple( __v, __p ); gm_ptrtype __gm( new gm_type ); gm1_ptrtype __gm1( new gm1_type ); // // Precompute some data in the reference element for // geometric mapping and reference finite element // typedef typename geoelement_type::permutation_type permutation_type; typedef typename gm_type::precompute_ptrtype geopc_ptrtype; typedef typename gm_type::precompute_type geopc_type; typedef typename gm1_type::precompute_ptrtype geopc1_ptrtype; typedef typename gm1_type::precompute_type geopc1_type; std::vector<std::map<permutation_type, geopc_ptrtype> > __geopc( M_pset.nFaces() ); std::vector<std::map<permutation_type, geopc1_ptrtype> > __geopc1( M_pset.nFaces() ); VLOG(2) << "computing geopc..."; for ( uint16_type __f = 0; __f < M_pset.nFaces(); ++__f ) { for ( permutation_type __p( permutation_type::IDENTITY ); __p < permutation_type( permutation_type::N_PERMUTATIONS ); ++__p ) { __geopc[__f][__p] = geopc_ptrtype( new geopc_type( __gm, M_pset.fpoints(__f, __p.value() ) ) ); __geopc1[__f][__p] = geopc1_ptrtype( new geopc1_type( __gm1, M_pset.fpoints(__f, __p.value() ) ) ); DVLOG(2) << "pset " << __f << " : " << M_pset.fpoints(__f, __p.value() ); CHECK( __geopc[__f][__p]->nPoints() ) << "invalid number of points for geopc"; CHECK( __geopc1[__f][__p]->nPoints() ) << "invalid number of points for geopc1"; } } uint16_type __face_id = __face_it->pos_first(); gmc_ptrtype __c( new gmc_type( __gm, __face_it->element( 0 ), __geopc, __face_id ) ); gmc1_ptrtype __c1( new gmc1_type( __gm1, __face_it->element( 0 ), __geopc1, __face_id ) ); map_gmc_type mapgmc( fusion::make_pair<vf::detail::gmc<0> >( __c ) ); t_expr_type expr( M_expr, mapgmc ); map_gmc1_type mapgmc1( fusion::make_pair<vf::detail::gmc<0> >( __c1 ) ); t_expr1_type expr1( M_expr, mapgmc1 ); size_type nbFaceDof = invalid_size_type_value; for ( int e = 0; __face_it != __face_en; ++__face_it, ++e ) { FEELPP_ASSERT( __face_it->isOnBoundary() && !__face_it->isConnectedTo1() ) ( __face_it->marker() ) ( __face_it->isOnBoundary() ) ( __face_it->ad_first() ) ( __face_it->pos_first() ) ( __face_it->ad_second() ) ( __face_it->pos_second() ) ( __face_it->id() ).warn( "inconsistent data face" ); DVLOG(2) << "[evaluator] FACE_ID = " << __face_it->id() << " element id= " << __face_it->ad_first() << " pos in elt= " << __face_it->pos_first() << " marker: " << __face_it->marker() << "\n"; DVLOG(2) << "[evaluator] FACE_ID = " << __face_it->id() << " real pts=" << __face_it->G() << "\n"; uint16_type __face_id = __face_it->pos_first(); switch ( M_geomap_strategy ) { default: case GeomapStrategyType::GEOMAP_OPT: case GeomapStrategyType::GEOMAP_HO: { __c->update( __face_it->element( 0 ), __face_id ); DVLOG(2) << "[evaluator::GEOMAP_HO|GEOMAP_OPT] FACE_ID = " << __face_it->id() << " ref pts=" << __c->xRefs() << "\n"; DVLOG(2) << "[evaluator::GEOMAP_HO|GEOMAP_OPT] FACE_ID = " << __face_it->id() << " real pts=" << __c->xReal() << "\n"; map_gmc_type mapgmc( fusion::make_pair<vf::detail::gmc<0> >( __c ) ); expr.update( mapgmc ); for ( uint16_type p = 0; p < npoints; ++p ) { for ( uint16_type c1 = 0; c1 < mesh_element_type::nRealDim; ++c1 ) { __p(c1, e*npoints+p) = __c->xReal(p)[c1]; } for ( uint16_type c1 = 0; c1 < shape::M; ++c1 ) { __v( e*npoints*shape::M+shape::M*p+c1) = expr.evalq( c1, 0, p ); } } } break; case GeomapStrategyType::GEOMAP_O1: { __c1->update( __face_it->element( 0 ), __face_id ); DVLOG(2) << "[evaluator::GEOMAP_O1] FACE_ID = " << __face_it->id() << " ref pts=" << __c1->xRefs() << "\n"; DVLOG(2) << "[evaluator::GEOMAP_O1] FACE_ID = " << __face_it->id() << " real pts=" << __c1->xReal() << "\n"; map_gmc1_type mapgmc1( fusion::make_pair<vf::detail::gmc<0> >( __c1 ) ); expr1.update( mapgmc1 ); for ( uint16_type p = 0; p < npoints; ++p ) { for ( uint16_type c1 = 0; c1 < mesh_element_type::nRealDim; ++c1 ) { __p(c1, e*npoints+p) = __c1->xReal(p)[c1]; } for ( uint16_type c1 = 0; c1 < shape::M; ++c1 ) { __v( e*npoints*shape::M+shape::M*p+c1) = expr1.evalq( c1, 0, p ); } } } break; } } // face_it return boost::make_tuple( __v, __p ); }
typename Evaluator<iDim, Iterator, Pset, ExprT>::eval_element_type Evaluator<iDim, Iterator, Pset, ExprT>::operator()( mpl::size_t<MESH_ELEMENTS> ) const { boost::timer __timer; typedef typename mesh_element_type::gm_type gm_type; typedef typename gm_type::template Context<context, mesh_element_type> gm_context_type; typedef typename mesh_element_type::gm1_type gm1_type; typedef typename gm1_type::template Context<context, mesh_element_type> gm1_context_type; typedef boost::shared_ptr<gm_context_type> gm_context_ptrtype; typedef boost::shared_ptr<gm1_context_type> gm1_context_ptrtype; typedef fusion::map<fusion::pair<vf::detail::gmc<0>, gm_context_ptrtype> > map_gmc_type; typedef fusion::map<fusion::pair<vf::detail::gmc<0>, gm1_context_ptrtype> > map_gmc1_type; //typedef typename expression_type::template tensor<map_gmc_type,fusion::map<fusion::pair<vf::detail::gmc<0>,boost::shared_ptr<fecontext_type> > > > t_expr_type; //typedef decltype( basis_type::isomorphism( M_expr ) ) the_expression_type; typedef expression_type the_expression_type; typedef typename boost::remove_reference<typename boost::remove_const<the_expression_type>::type >::type iso_expression_type; typedef typename iso_expression_type::template tensor<map_gmc_type> t_expr_type; typedef typename iso_expression_type::template tensor<map_gmc1_type> t_expr1_type; typedef typename t_expr_type::value_type value_type; // we should manipulate the same type of functions on the left and // on the right //BOOST_STATIC_ASSERT(( boost::is_same<return_value_type, typename functionspace_type::return_value_type>::value )); typedef typename t_expr_type::shape shape; iterator_type it, en; boost::tie( boost::tuples::ignore, it, en ) = M_range; int npoints = M_pset.points().size2(); element_type __v( M_pset.points().size2()*std::distance( it, en )*shape::M ); node_type __p( mesh_element_type::nDim, M_pset.points().size2()*std::distance( it, en ) ); __v.setZero(); __p.setZero(); // return if no elements if ( it == en ) return boost::make_tuple( __v, __p ); // // Precompute some data in the reference element for // geometric mapping and reference finite element // typename gm_type::precompute_ptrtype __geopc( new typename gm_type::precompute_type( it->gm(), M_pset.points() ) ); typename gm1_type::precompute_ptrtype __geopc1( new typename gm1_type::precompute_type( it->gm1(), M_pset.points() ) ); gm_context_ptrtype __c( new gm_context_type( it->gm(),*it,__geopc ) ); gm1_context_ptrtype __c1( new gm1_context_type( it->gm1(),*it,__geopc1 ) ); map_gmc_type mapgmc( fusion::make_pair<vf::detail::gmc<0> >( __c ) ); t_expr_type tensor_expr( M_expr, mapgmc ); map_gmc1_type mapgmc1( fusion::make_pair<vf::detail::gmc<0> >( __c1 ) ); t_expr1_type tensor_expr1( M_expr, mapgmc1 ); for ( int e = 0; it!=en ; ++it, ++e ) { switch ( M_geomap_strategy ) { case GeomapStrategyType::GEOMAP_HO: { __c->update( *it ); map_gmc_type mapgmc( fusion::make_pair<vf::detail::gmc<0> >( __c ) ); tensor_expr.update( mapgmc ); for ( uint16_type p = 0; p < npoints; ++p ) { for ( uint16_type c1 = 0; c1 < mesh_element_type::nDim; ++c1 ) { __p(c1, e*npoints+p) = __c->xReal(p)[c1]; } for ( uint16_type c1 = 0; c1 < shape::M; ++c1 ) { __v( e*npoints*shape::M+shape::M*p+c1) = tensor_expr.evalq( c1, 0, p ); } } } break; case GeomapStrategyType::GEOMAP_O1: { __c1->update( *it ); map_gmc1_type mapgmc1( fusion::make_pair<vf::detail::gmc<0> >( __c1 ) ); tensor_expr1.update( mapgmc1 ); for ( uint16_type p = 0; p < npoints; ++p ) { for ( uint16_type c1 = 0; c1 < mesh_element_type::nDim; ++c1 ) { __p(c1, e*npoints+p) = __c1->xReal(p)[c1]; } for ( uint16_type c1 = 0; c1 < shape::M; ++c1 ) { __v( e*npoints*shape::M+shape::M*p+c1) = tensor_expr1.evalq( c1, 0, p ); } } } break; case GeomapStrategyType::GEOMAP_OPT: { if ( it->isOnBoundary() ) { // HO if on boundary __c->update( *it ); map_gmc_type mapgmc( fusion::make_pair<vf::detail::gmc<0> >( __c ) ); tensor_expr.update( mapgmc ); for ( uint16_type p = 0; p < npoints; ++p ) { for ( uint16_type c1 = 0; c1 < mesh_element_type::nDim; ++c1 ) { __p(c1, e*npoints+p) = __c->xReal(p)[c1]; } for ( uint16_type c1 = 0; c1 < shape::M; ++c1 ) { __v( e*npoints*shape::M+shape::M*p+c1) = tensor_expr.evalq( c1, 0, p ); } } } else { __c1->update( *it ); map_gmc1_type mapgmc1( fusion::make_pair<vf::detail::gmc<0> >( __c1 ) ); tensor_expr1.update( mapgmc1 ); for ( uint16_type p = 0; p < npoints; ++p ) { for ( uint16_type c1 = 0; c1 < mesh_element_type::nDim; ++c1 ) { __p(c1, e*npoints+p) = __c1->xReal(p)[c1]; } for ( uint16_type c1 = 0; c1 < shape::M; ++c1 ) { __v( e*npoints*shape::M+shape::M*p+c1) = tensor_expr1.evalq( c1, 0, p ); } } } } break; } } return boost::make_tuple( __v, __p ); }
void modifVec(std::list<ElementRange> const& __r, eltType const& u,vectorType & UnVec,ExprType const& expr, size_type rowstart, int ComponentShiftFactor, mpl::int_<MESH_FACES> /**/ ) { //using namespace Feel::vf; typedef typename eltType::functionspace_type::mesh_type mesh_type; typedef typename mesh_type::face_iterator face_iterator; typedef typename mesh_type::face_const_iterator face_const_iterator; typedef typename mesh_type::element_type geoelement_type; typedef typename geoelement_type::face_type face_type; // basis typedef typename eltType::functionspace_type::fe_type fe_type; typedef typename eltType::functionspace_type::dof_type dof_type; const size_type context = ExprType::context|vm::POINT; // geometric mapping context typedef typename mesh_type::gm_type gm_type; typedef boost::shared_ptr<gm_type> gm_ptrtype; typedef typename gm_type::template Context<context, geoelement_type> gmc_type; typedef boost::shared_ptr<gmc_type> gmc_ptrtype; typedef fusion::map<fusion::pair<vf::detail::gmc<0>, gmc_ptrtype> > map_gmc_type; typedef typename ExprType::template tensor<map_gmc_type> t_expr_type; if ( __r.size() == 0 ) return; auto __face_it = __r.begin()->template get<1>(); auto __face_en = __r.begin()->template get<2>(); //if ( __face_it == __face_en ) return; bool findAFace = false; for( auto lit = __r.begin(), len = __r.end(); lit != len; ++lit ) { __face_it = lit->template get<1>(); __face_en = lit->template get<2>(); if ( __face_it != __face_en ) { findAFace=true; break; } } if ( !findAFace ) return; // get the first face properly connected bool findAFaceToInit=false; for( auto lit = __r.begin(), len = __r.end(); lit != len; ++lit ) { __face_it = lit->template get<1>(); __face_en = lit->template get<2>(); for( ; __face_it != __face_en; ++__face_it ) { if ( boost::unwrap_ref(*__face_it).isConnectedTo0() ) { findAFaceToInit=true; break; } } if ( findAFaceToInit ) break; } CHECK( findAFaceToInit ) << "not find a face to init\n"; size_type nbFaceDof = invalid_size_type_value; if ( !fe_type::is_modal ) nbFaceDof = ( face_type::numVertices * fe_type::nDofPerVertex + face_type::numEdges * fe_type::nDofPerEdge + face_type::numFaces * fe_type::nDofPerFace ); else nbFaceDof = face_type::numVertices * fe_type::nDofPerVertex; //dof_type const* __dof = u.functionSpace()->dof().get(); fe_type const* __fe = u.functionSpace()->fe().get(); gm_ptrtype __gm( new gm_type ); // // Precompute some data in the reference element for // geometric mapping and reference finite element // typedef typename geoelement_type::permutation_type permutation_type; typedef typename gm_type::precompute_ptrtype geopc_ptrtype; typedef typename gm_type::precompute_type geopc_type; std::vector<std::map<permutation_type, geopc_ptrtype> > __geopc( geoelement_type::numTopologicalFaces ); for ( uint16_type __f = 0; __f < geoelement_type::numTopologicalFaces; ++__f ) { permutation_type __p( permutation_type::IDENTITY ); __geopc[__f][__p] = geopc_ptrtype( new geopc_type( __gm, __fe->points( __f ) ) ); } uint16_type __face_id = __face_it->pos_first(); gmc_ptrtype __c( new gmc_type( __gm, __face_it->element(0), __geopc, __face_id ) ); map_gmc_type mapgmc( fusion::make_pair<vf::detail::gmc<0> >( __c ) ); t_expr_type LExpr( expr, mapgmc ); std::vector<bool> dofdone( u.functionSpace()->dof()->nLocalDofWithGhost(), false ); //face_const_iterator __face_it, __face_en; for( auto lit = __r.begin(), len = __r.end(); lit != len; ++lit ) { __face_it = lit->template get<1>(); __face_en = lit->template get<2>(); for ( ; __face_it != __face_en; ++__face_it ) { uint16_type __face_id = __face_it->pos_first(); __c->update( __face_it->element(0), __face_id ); map_gmc_type mapgmc( fusion::make_pair<vf::detail::gmc<0> >( __c ) ); LExpr.update( mapgmc ); for (uint c1=0;c1<eltType::nComponents1;c1++) for (uint c2=0;c2<eltType::nComponents2;c2++) { for ( uint16_type l = 0; l < nbFaceDof; ++l ) { size_type index = boost::get<0>(u.functionSpace()->dof()->faceLocalToGlobal( __face_it->id(), l, c1 )); if ( dofdone[index] ) continue; size_type thedof = u.start() + ComponentShiftFactor*index; double __value=LExpr.evalq( c1, c2, l ); //u( thedof ) = __value; UnVec->set(rowstart+thedof,__value); dofdone[index] = true; } } } } //UnVec->close(); } // modifVec
void lanczos() { std::cout.precision(std::numeric_limits<_Tp>::digits10); // From Pugh.. int __n_old = 0; int __n = -2 - 0.3 * std::log(std::numeric_limits<_Tp>::epsilon()); std::cout << "n = " << __n << '\n'; auto __g = __n - _Tp{0.5L}; std::cout << "g = " << __g << '\n'; while (__n != __n_old) { std::cout << '\n'; std::vector<_Tp> __a; for (unsigned int k = 1; k <= __n; ++k) { for (unsigned int j = 1; j <= k; ++j) std::cout << " C(" << std::setw(2) << k << ", " << std::setw(2) << j << ") = " << std::setw(4) << __c(k, j); std::cout << '\n'; } std::cout << '\n'; auto __prev = std::numeric_limits<_Tp>::max(); for (unsigned int __k = 0; __k <= __n + 5; ++__k) { auto __curr = __p(__k, __g); if (std::abs(__curr) > std::abs(__prev)) { __n_old = __n; __n = __k; __g = __n - _Tp{0.5L}; break; } __prev = __curr; std::cout << " p(" << __k << ", " << __g << ") = " << __curr << '\n'; } std::cout << "n = " << __n << '\n'; } constexpr auto _S_log_sqrt_2pi = 9.189385332046727417803297364056176398620e-1L; auto __log_gamma_lanczos = [=](_Tp __z) -> _Tp { auto __fact = _Tp{1}; auto __sum = _Tp{0.5L} * __p(0, __g); for (unsigned int __k = 1; __k < __n; ++__k) { __fact *= (__z - __k + 1) / (__z + __k); __sum += __fact * __p(__k, __g); } return _S_log_sqrt_2pi + std::log(__sum) + (__z + 0.5L) * std::log(__z + __g + 0.5L) - (__z + __g + 0.5L) - std::log(__z); }; std::cout << '\n'; for (int i = 0; i <= 500; ++i) { auto z = _Tp{0.01Q} * i; std::cout << ' ' << z << ' ' << __log_gamma_lanczos(z) << ' ' << std::lgamma(z) << ' ' << __log_gamma_lanczos(z) - std::lgamma(z) << '\n'; } }
void computeCPUOMP(int threadId, expression_type * expr, im_type * im, element_iterator * elt_it, std::vector<std::pair<element_iterator, element_iterator> > * elts) { char * a; int cid; std::ostringstream oss; #if 0 hwloc_cpuset_t set = nullptr; /* get a cpuset object */ set = hwloc_bitmap_alloc(); /* Get the cpu thread affinity info of the current process/thread */ hwloc_get_cpubind(Environment::getHwlocTopology(), set, 0); hwloc_bitmap_asprintf(&a, set); oss << a; free(a); cid = hwloc_bitmap_first(set); oss << "("; while(cid != -1) { oss << cid << " "; cid = hwloc_bitmap_next(set, cid); } oss << ")|"; std::cout << Environment::worldComm().rank() << "|" << M_threadId << " " << oss.str() << std::endl; /* Get the latest core location of the current process/thread */ hwloc_get_last_cpu_location(Environment::getHwlocTopology(), set, 0); hwloc_bitmap_asprintf(&a, set); oss << a; free(a); cid = hwloc_bitmap_first(set); oss << "("; while(cid != -1) { oss << cid << " "; cid = hwloc_bitmap_next(set, cid); } oss << ");"; std::cout << Environment::worldComm().rank() << "|" << M_threadId << " " << oss.str() << std::endl; #endif #if defined(FEELPP_HAS_HARTS) perf_mng.init("cpu") ; perf_mng.start("cpu") ; perf_mng.init("1.1") ; perf_mng.init("1.2") ; perf_mng.init("2.1") ; perf_mng.init("2.2") ; perf_mng.init("3") ; #endif //M_gm((*elt_it)->gm()); gm_ptrtype gm = (*elt_it)->gm(); //M_geopc(new typename eval::gmpc_type( M_gm, im->points() )); typename eval::gmpc_ptrtype __geopc( new typename eval::gmpc_type(gm, im->points()) ); //M_c(new gmc_type( M_gm, *(*elt_it), M_geopc )); gmc_ptrtype __c( new gmc_type( gm, *(*elt_it), __geopc ) ); //M_expr( (*expr), map_gmc_type( fusion::make_pair<vf::detail::gmc<0> >( M_c ) ) ); eval_expr_type __expr( (*expr), map_gmc_type( fusion::make_pair<vf::detail::gmc<0> >( __c ) ) ); for (int i = 0; i < elts->size(); i++) { /* std::cout << Environment::worldComm().rank() << " nbItems: " << elts->size() << " nbElts " << std::distance(elts->at(i), elts->at(i+1)) << " 1st id " << elts->at(i)->id() << std::endl; */ //std::cout << Environment::worldComm().rank() << "|" << theadId << " fid=" elts.at(i).first.id() << std::endl; for ( auto _elt = elts->at(i).first; _elt != elts->at(i).second; ++_elt ) { //perf_mng.start("1.1") ; __c->update( *_elt ); //perf_mng.stop("1.1") ; //perf_mng.start("1.2") ; map_gmc_type mapgmc( fusion::make_pair<vf::detail::gmc<0> >( __c ) ); //perf_mng.stop("1.2") ; //perf_mng.start("2.1") ; __expr.update( mapgmc ); //perf_mng.stop("2.1") ; //perf_mng.start("2.2") ; im->update( *__c ); //perf_mng.stop("2.2") ; //perf_mng.start("3") ; for ( uint16_type c1 = 0; c1 < eval::shape::M; ++c1 ) { for ( uint16_type c2 = 0; c2 < eval::shape::N; ++c2 ) { M_ret( c1,c2 ) += (*im)( __expr, c1, c2 ); } } //perf_mng.stop("3") ; } } #if defined(FEELPP_HAS_HARTS) perf_mng.stop("cpu") ; M_cpuTime = perf_mng.getValueInSeconds("cpu"); #endif }
void computeCPU(DataArgsType& args) { char * a; int cid; hwloc_cpuset_t set = nullptr; std::ostringstream oss; /* This initialization takes some time */ /* When using hartsi, the object instanciation is done when creating tasks */ /* and this is not a parallel section, thus we lose time in initialization */ /* doing it the computation step allows to incorporate this init time in the parallel section */ /* M_threadId( threadId ), M_gm( new gm_type( *_elt.gm() ) ), M_geopc( new gmpc_type( M_gm, _im.points() ) ), M_c( new gmc_type( M_gm, _elt, M_geopc ) ), M_expr( _expr, map_gmc_type( fusion::make_pair<vf::detail::gmc<0> >( M_c ) ) ), M_im( _im ), M_ret( eval::matrix_type::Zero() ), M_cpuTime( 0.0 ) */ #if 0 /* get a cpuset object */ set = hwloc_bitmap_alloc(); /* Get the cpu thread affinity info of the current process/thread */ hwloc_get_cpubind(Environment::getHwlocTopology(), set, 0); hwloc_bitmap_asprintf(&a, set); oss << a; free(a); cid = hwloc_bitmap_first(set); oss << "("; while(cid != -1) { oss << cid << " "; cid = hwloc_bitmap_next(set, cid); } oss << ")|"; std::cout << Environment::worldComm().rank() << "|" << M_threadId << " " << oss.str() << std::endl; /* Get the latest core location of the current process/thread */ hwloc_get_last_cpu_location(Environment::getHwlocTopology(), set, 0); hwloc_bitmap_asprintf(&a, set); oss << a; free(a); cid = hwloc_bitmap_first(set); oss << "("; while(cid != -1) { oss << cid << " "; cid = hwloc_bitmap_next(set, cid); } oss << ");"; std::cout << Environment::worldComm().rank() << "|" << M_threadId << " " << oss.str() << std::endl; #endif perf_mng.init("1.1") ; perf_mng.init("1.1") ; perf_mng.init("2.1") ; perf_mng.init("2.2") ; perf_mng.init("3") ; /* free memory */ if(set != nullptr) { hwloc_bitmap_free(set); } //perf_mng.init("data") ; //perf_mng.start("data") ; // DEFINE the range to be iterated on std::vector<std::pair<element_iterator, element_iterator> > * elts = args.get("elements")->get<std::vector<std::pair<element_iterator, element_iterator> > >(); int * threadId = args.get("threadId")->get<int>(); expression_type * expr = args.get("expr")->get<expression_type>(); im_type * im = args.get("im")->get<im_type>(); element_iterator * elt_it = args.get("elt")->get<element_iterator>(); //M_gm((*elt_it)->gm()); gm_ptrtype gm = (*elt_it)->gm(); //M_geopc(new typename eval::gmpc_type( M_gm, im->points() )); typename eval::gmpc_ptrtype __geopc( new typename eval::gmpc_type(gm, im->points()) ); //M_c(new gmc_type( M_gm, *(*elt_it), M_geopc )); gmc_ptrtype __c( new gmc_type( gm, *(*elt_it), __geopc ) ); //M_expr( (*expr), map_gmc_type( fusion::make_pair<vf::detail::gmc<0> >( M_c ) ) ); eval_expr_type __expr( (*expr), map_gmc_type( fusion::make_pair<vf::detail::gmc<0> >( __c ) ) ); //perf_mng.stop("data"); perf_mng.init("cpu") ; perf_mng.start("cpu") ; for (int i = 0; i < elts->size(); i++) { //std::cout << Environment::worldComm().rank() << " nbItems: " << elts->size() << " nbElts " << std::distance(elts->at(i), elts->at(i+1)) << std::endl; for ( auto _elt = elts->at(i).first; _elt != elts->at(i).second; ++_elt ) { //perf_mng.start("1.1") ; //M_c->update( *_elt ); __c->update( *_elt ); //perf_mng.stop("1.1") ; //perf_mng.start("1.2") ; map_gmc_type mapgmc( fusion::make_pair<vf::detail::gmc<0> >( __c ) ); //perf_mng.stop("1.2") ; //perf_mng.start("2.1") ; __expr.update( mapgmc ); //perf_mng.stop("2.1") ; //perf_mng.start("2.2") ; im->update( *__c ); //perf_mng.stop("2.2") ; //perf_mng.start("3") ; for ( uint16_type c1 = 0; c1 < eval::shape::M; ++c1 ) { for ( uint16_type c2 = 0; c2 < eval::shape::N; ++c2 ) { M_ret( c1,c2 ) += (*im)( __expr, c1, c2 ); } } //perf_mng.stop("3") ; } } perf_mng.stop("cpu") ; M_cpuTime = perf_mng.getValueInSeconds("cpu"); }