inline void print_bitmap( std::ostream & s , const hwloc_const_bitmap_t bitmap ) { s << "{" ; for ( int i = hwloc_bitmap_first( bitmap ) ; -1 != i ; i = hwloc_bitmap_next( bitmap , i ) ) { s << " " << i ; } s << " }" ; }
/******************* FUNCTION *********************/ int TopoHwloc::getFirstBitInBitmap(hwloc_bitmap_t bitmap) const { int last = hwloc_bitmap_last(bitmap); int current = hwloc_bitmap_first(bitmap); assert(current != -1); while (current != last) { if (hwloc_bitmap_isset(bitmap,current)) break; current = hwloc_bitmap_next(bitmap,current); } return current; }
/* NTH: this is no longer used but may be used if we can determine the binding policy*/ static int mca_sbgp_map_to_logical_socket_id(int *socket) { int ret = OMPI_SUCCESS; hwloc_obj_t obj; hwloc_obj_t first_pu_object; hwloc_bitmap_t good; int pu_os_index = -1, my_logical_socket_id = -1; int this_pus_logical_socket_id = -1; *socket = my_logical_socket_id; /* bozo check */ if (NULL == opal_hwloc_topology) { return OPAL_ERR_NOT_INITIALIZED; } good = hwloc_bitmap_alloc(); if (NULL == good) { return OPAL_ERR_OUT_OF_RESOURCE; } /* get this process' CPU binding */ if( 0 != hwloc_get_cpubind(opal_hwloc_topology,good, 0)){ /* report some error */ BASESMSOCKET_VERBOSE(10, "The global variable opal_hwloc_topology appears not to have been initialized\n"); hwloc_bitmap_free(good); return OMPI_ERROR; } /* find the first logical PU object in the hwloc tree */ first_pu_object = hwloc_get_obj_by_type(opal_hwloc_topology, HWLOC_OBJ_PU, 0); /* get the next bit in the bitmap (note: if pu_os_index == -1, then the * first bit is returned */ /* traverse the hwloc tree */ while( -1 != (pu_os_index = hwloc_bitmap_next(good, pu_os_index) ) ) { /* Traverse all PUs in the machine in logical order, in the simple case * there should only be a single PU that this process is bound to, right? * */ for( obj = first_pu_object; obj != NULL; obj = obj->next_cousin ) {/* WTF is a "next_cousin" ? */ /* is this PU the same as the bit I pulled off the mask? */ if( obj->os_index == (unsigned int) pu_os_index) { /* Then I found it, break out of for loop */ break; } } if( NULL != obj) { /* if we found the PU, then go upward in the tree * looking for the enclosing socket */ while( (NULL != obj) && ( HWLOC_OBJ_SOCKET != obj->type) ){ obj = obj->parent; } if( NULL == obj ) { /* then we couldn't find an enclosing socket, report this */ } else { /* We found the enclosing socket */ if( -1 == my_logical_socket_id ){ /* this is the first PU that I'm bound to */ this_pus_logical_socket_id = obj->logical_index; my_logical_socket_id = this_pus_logical_socket_id; } else { /* this is not the first PU that I'm bound to. * Seems I'm bound to more than a single PU. Question * is, am I bound to the same socket?? */ /* in order to get rid of the compiler warning, I had to cast * "this_pus_logical_socket_id", at a glance this seems ok, * but if subgrouping problems arise, maybe look here. I shall * tag this line with the "mark of the beast" for grepability * 666 */ if( (unsigned int) this_pus_logical_socket_id != obj->logical_index ){ /* 666 */ /* Then we're bound to more than one socket...fail */ this_pus_logical_socket_id = -1; my_logical_socket_id = -1; break; } } } } /* end while */ } *socket = my_logical_socket_id; hwloc_bitmap_free(good); return ret; }
int main(void) { hwloc_bitmap_t set; /* check an empty bitmap */ set = hwloc_bitmap_alloc(); assert(hwloc_bitmap_to_ulong(set) == 0UL); assert(hwloc_bitmap_to_ith_ulong(set, 0) == 0UL); assert(hwloc_bitmap_to_ith_ulong(set, 1) == 0UL); assert(hwloc_bitmap_to_ith_ulong(set, 23) == 0UL); /* check a non-empty bitmap */ hwloc_bitmap_from_ith_ulong(set, 4, 0xff); assert(hwloc_bitmap_to_ith_ulong(set, 4) == 0xff); assert(hwloc_bitmap_to_ulong(set) == 0UL); assert(hwloc_bitmap_to_ith_ulong(set, 0) == 0UL); assert(hwloc_bitmap_to_ith_ulong(set, 1) == 0UL); assert(hwloc_bitmap_to_ith_ulong(set, 23) == 0UL); /* check a zeroed bitmap */ hwloc_bitmap_zero(set); assert(hwloc_bitmap_to_ulong(set) == 0UL); assert(hwloc_bitmap_to_ith_ulong(set, 0) == 0UL); assert(hwloc_bitmap_to_ith_ulong(set, 1) == 0UL); assert(hwloc_bitmap_to_ith_ulong(set, 4) == 0UL); assert(hwloc_bitmap_to_ith_ulong(set, 23) == 0UL); hwloc_bitmap_free(set); /* check a full bitmap */ set = hwloc_bitmap_alloc_full(); assert(hwloc_bitmap_to_ulong(set) == ~0UL); assert(hwloc_bitmap_to_ith_ulong(set, 0) == ~0UL); assert(hwloc_bitmap_to_ith_ulong(set, 1) == ~0UL); assert(hwloc_bitmap_to_ith_ulong(set, 23) == ~0UL); /* check a almost full bitmap */ hwloc_bitmap_set_ith_ulong(set, 4, 0xff); assert(hwloc_bitmap_to_ith_ulong(set, 4) == 0xff); assert(hwloc_bitmap_to_ulong(set) == ~0UL); assert(hwloc_bitmap_to_ith_ulong(set, 0) == ~0UL); assert(hwloc_bitmap_to_ith_ulong(set, 1) == ~0UL); assert(hwloc_bitmap_to_ith_ulong(set, 23) == ~0UL); /* check a almost empty bitmap */ hwloc_bitmap_from_ith_ulong(set, 4, 0xff); assert(hwloc_bitmap_to_ith_ulong(set, 4) == 0xff); assert(hwloc_bitmap_to_ulong(set) == 0UL); assert(hwloc_bitmap_to_ith_ulong(set, 0) == 0UL); assert(hwloc_bitmap_to_ith_ulong(set, 1) == 0UL); assert(hwloc_bitmap_to_ith_ulong(set, 23) == 0UL); hwloc_bitmap_free(set); /* check ranges */ set = hwloc_bitmap_alloc(); assert(hwloc_bitmap_weight(set) == 0); /* 23-45 */ hwloc_bitmap_set_range(set, 23, 45); assert(hwloc_bitmap_weight(set) == 23); /* 23-45,78- */ hwloc_bitmap_set_range(set, 78, -1); assert(hwloc_bitmap_weight(set) == -1); /* 23- */ hwloc_bitmap_set_range(set, 44, 79); assert(hwloc_bitmap_weight(set) == -1); assert(hwloc_bitmap_first(set) == 23); assert(!hwloc_bitmap_isfull(set)); /* 0- */ hwloc_bitmap_set_range(set, 0, 22); assert(hwloc_bitmap_weight(set) == -1); assert(hwloc_bitmap_isfull(set)); /* 0-34,57- */ hwloc_bitmap_clr_range(set, 35, 56); assert(hwloc_bitmap_weight(set) == -1); assert(!hwloc_bitmap_isfull(set)); /* 0-34,57 */ hwloc_bitmap_clr_range(set, 58, -1); assert(hwloc_bitmap_weight(set) == 36); assert(hwloc_bitmap_last(set) == 57); assert(hwloc_bitmap_next(set, 34) == 57); /* 0-34 */ hwloc_bitmap_clr(set, 57); assert(hwloc_bitmap_weight(set) == 35); assert(hwloc_bitmap_last(set) == 34); /* empty */ hwloc_bitmap_clr_range(set, 0, 34); assert(hwloc_bitmap_weight(set) == 0); assert(hwloc_bitmap_first(set) == -1); hwloc_bitmap_free(set); return 0; }
int main(void) { hwloc_bitmap_t set; int i, cpu, expected_cpu = 0; /* empty set */ set = hwloc_bitmap_alloc(); assert(hwloc_bitmap_first(set) == -1); assert(hwloc_bitmap_last(set) == -1); assert(hwloc_bitmap_next(set, 0) == -1); assert(hwloc_bitmap_next(set, -1) == -1); assert(hwloc_bitmap_weight(set) == 0); /* full set */ hwloc_bitmap_fill(set); assert(hwloc_bitmap_first(set) == 0); assert(hwloc_bitmap_last(set) == -1); assert(hwloc_bitmap_next(set, -1) == 0); assert(hwloc_bitmap_next(set, 0) == 1); assert(hwloc_bitmap_next(set, 1) == 2); assert(hwloc_bitmap_next(set, 2) == 3); assert(hwloc_bitmap_next(set, 30) == 31); assert(hwloc_bitmap_next(set, 31) == 32); assert(hwloc_bitmap_next(set, 32) == 33); assert(hwloc_bitmap_next(set, 62) == 63); assert(hwloc_bitmap_next(set, 63) == 64); assert(hwloc_bitmap_next(set, 64) == 65); assert(hwloc_bitmap_next(set, 12345) == 12346); assert(hwloc_bitmap_weight(set) == -1); /* custom sets */ hwloc_bitmap_zero(set); hwloc_bitmap_set_range(set, 36, 59); assert(hwloc_bitmap_first(set) == 36); assert(hwloc_bitmap_last(set) == 59); assert(hwloc_bitmap_next(set, -1) == 36); assert(hwloc_bitmap_next(set, 0) == 36); assert(hwloc_bitmap_next(set, 36) == 37); assert(hwloc_bitmap_next(set, 59) == -1); assert(hwloc_bitmap_weight(set) == 24); hwloc_bitmap_set_range(set, 136, 259); assert(hwloc_bitmap_first(set) == 36); assert(hwloc_bitmap_last(set) == 259); assert(hwloc_bitmap_next(set, 59) == 136); assert(hwloc_bitmap_next(set, 259) == -1); assert(hwloc_bitmap_weight(set) == 148); hwloc_bitmap_clr(set, 199); assert(hwloc_bitmap_first(set) == 36); assert(hwloc_bitmap_last(set) == 259); assert(hwloc_bitmap_next(set, 198) == 200); assert(hwloc_bitmap_next(set, 199) == 200); assert(hwloc_bitmap_weight(set) == 147); i = 0; hwloc_bitmap_foreach_begin(cpu, set) { if (0 <= i && i < 24) expected_cpu = i + 36; else if (24 <= i && i < 87) expected_cpu = i + 112; else if (87 <= i && i < 147) expected_cpu = i + 113; assert(expected_cpu == cpu); i++; } hwloc_bitmap_foreach_end(); hwloc_bitmap_free(set); return 0; }
void computeCPUOMP(int threadId, expression_type * expr, im_type * im, element_iterator * elt_it, std::vector<std::pair<element_iterator, element_iterator> > * elts) { char * a; int cid; std::ostringstream oss; #if 0 hwloc_cpuset_t set = nullptr; /* get a cpuset object */ set = hwloc_bitmap_alloc(); /* Get the cpu thread affinity info of the current process/thread */ hwloc_get_cpubind(Environment::getHwlocTopology(), set, 0); hwloc_bitmap_asprintf(&a, set); oss << a; free(a); cid = hwloc_bitmap_first(set); oss << "("; while(cid != -1) { oss << cid << " "; cid = hwloc_bitmap_next(set, cid); } oss << ")|"; std::cout << Environment::worldComm().rank() << "|" << M_threadId << " " << oss.str() << std::endl; /* Get the latest core location of the current process/thread */ hwloc_get_last_cpu_location(Environment::getHwlocTopology(), set, 0); hwloc_bitmap_asprintf(&a, set); oss << a; free(a); cid = hwloc_bitmap_first(set); oss << "("; while(cid != -1) { oss << cid << " "; cid = hwloc_bitmap_next(set, cid); } oss << ");"; std::cout << Environment::worldComm().rank() << "|" << M_threadId << " " << oss.str() << std::endl; #endif #if defined(FEELPP_HAS_HARTS) perf_mng.init("cpu") ; perf_mng.start("cpu") ; perf_mng.init("1.1") ; perf_mng.init("1.2") ; perf_mng.init("2.1") ; perf_mng.init("2.2") ; perf_mng.init("3") ; #endif //M_gm((*elt_it)->gm()); gm_ptrtype gm = (*elt_it)->gm(); //M_geopc(new typename eval::gmpc_type( M_gm, im->points() )); typename eval::gmpc_ptrtype __geopc( new typename eval::gmpc_type(gm, im->points()) ); //M_c(new gmc_type( M_gm, *(*elt_it), M_geopc )); gmc_ptrtype __c( new gmc_type( gm, *(*elt_it), __geopc ) ); //M_expr( (*expr), map_gmc_type( fusion::make_pair<vf::detail::gmc<0> >( M_c ) ) ); eval_expr_type __expr( (*expr), map_gmc_type( fusion::make_pair<vf::detail::gmc<0> >( __c ) ) ); for (int i = 0; i < elts->size(); i++) { /* std::cout << Environment::worldComm().rank() << " nbItems: " << elts->size() << " nbElts " << std::distance(elts->at(i), elts->at(i+1)) << " 1st id " << elts->at(i)->id() << std::endl; */ //std::cout << Environment::worldComm().rank() << "|" << theadId << " fid=" elts.at(i).first.id() << std::endl; for ( auto _elt = elts->at(i).first; _elt != elts->at(i).second; ++_elt ) { //perf_mng.start("1.1") ; __c->update( *_elt ); //perf_mng.stop("1.1") ; //perf_mng.start("1.2") ; map_gmc_type mapgmc( fusion::make_pair<vf::detail::gmc<0> >( __c ) ); //perf_mng.stop("1.2") ; //perf_mng.start("2.1") ; __expr.update( mapgmc ); //perf_mng.stop("2.1") ; //perf_mng.start("2.2") ; im->update( *__c ); //perf_mng.stop("2.2") ; //perf_mng.start("3") ; for ( uint16_type c1 = 0; c1 < eval::shape::M; ++c1 ) { for ( uint16_type c2 = 0; c2 < eval::shape::N; ++c2 ) { M_ret( c1,c2 ) += (*im)( __expr, c1, c2 ); } } //perf_mng.stop("3") ; } } #if defined(FEELPP_HAS_HARTS) perf_mng.stop("cpu") ; M_cpuTime = perf_mng.getValueInSeconds("cpu"); #endif }
void computeCPU(DataArgsType& args) { char * a; int cid; hwloc_cpuset_t set = nullptr; std::ostringstream oss; /* This initialization takes some time */ /* When using hartsi, the object instanciation is done when creating tasks */ /* and this is not a parallel section, thus we lose time in initialization */ /* doing it the computation step allows to incorporate this init time in the parallel section */ /* M_threadId( threadId ), M_gm( new gm_type( *_elt.gm() ) ), M_geopc( new gmpc_type( M_gm, _im.points() ) ), M_c( new gmc_type( M_gm, _elt, M_geopc ) ), M_expr( _expr, map_gmc_type( fusion::make_pair<vf::detail::gmc<0> >( M_c ) ) ), M_im( _im ), M_ret( eval::matrix_type::Zero() ), M_cpuTime( 0.0 ) */ #if 0 /* get a cpuset object */ set = hwloc_bitmap_alloc(); /* Get the cpu thread affinity info of the current process/thread */ hwloc_get_cpubind(Environment::getHwlocTopology(), set, 0); hwloc_bitmap_asprintf(&a, set); oss << a; free(a); cid = hwloc_bitmap_first(set); oss << "("; while(cid != -1) { oss << cid << " "; cid = hwloc_bitmap_next(set, cid); } oss << ")|"; std::cout << Environment::worldComm().rank() << "|" << M_threadId << " " << oss.str() << std::endl; /* Get the latest core location of the current process/thread */ hwloc_get_last_cpu_location(Environment::getHwlocTopology(), set, 0); hwloc_bitmap_asprintf(&a, set); oss << a; free(a); cid = hwloc_bitmap_first(set); oss << "("; while(cid != -1) { oss << cid << " "; cid = hwloc_bitmap_next(set, cid); } oss << ");"; std::cout << Environment::worldComm().rank() << "|" << M_threadId << " " << oss.str() << std::endl; #endif perf_mng.init("1.1") ; perf_mng.init("1.1") ; perf_mng.init("2.1") ; perf_mng.init("2.2") ; perf_mng.init("3") ; /* free memory */ if(set != nullptr) { hwloc_bitmap_free(set); } //perf_mng.init("data") ; //perf_mng.start("data") ; // DEFINE the range to be iterated on std::vector<std::pair<element_iterator, element_iterator> > * elts = args.get("elements")->get<std::vector<std::pair<element_iterator, element_iterator> > >(); int * threadId = args.get("threadId")->get<int>(); expression_type * expr = args.get("expr")->get<expression_type>(); im_type * im = args.get("im")->get<im_type>(); element_iterator * elt_it = args.get("elt")->get<element_iterator>(); //M_gm((*elt_it)->gm()); gm_ptrtype gm = (*elt_it)->gm(); //M_geopc(new typename eval::gmpc_type( M_gm, im->points() )); typename eval::gmpc_ptrtype __geopc( new typename eval::gmpc_type(gm, im->points()) ); //M_c(new gmc_type( M_gm, *(*elt_it), M_geopc )); gmc_ptrtype __c( new gmc_type( gm, *(*elt_it), __geopc ) ); //M_expr( (*expr), map_gmc_type( fusion::make_pair<vf::detail::gmc<0> >( M_c ) ) ); eval_expr_type __expr( (*expr), map_gmc_type( fusion::make_pair<vf::detail::gmc<0> >( __c ) ) ); //perf_mng.stop("data"); perf_mng.init("cpu") ; perf_mng.start("cpu") ; for (int i = 0; i < elts->size(); i++) { //std::cout << Environment::worldComm().rank() << " nbItems: " << elts->size() << " nbElts " << std::distance(elts->at(i), elts->at(i+1)) << std::endl; for ( auto _elt = elts->at(i).first; _elt != elts->at(i).second; ++_elt ) { //perf_mng.start("1.1") ; //M_c->update( *_elt ); __c->update( *_elt ); //perf_mng.stop("1.1") ; //perf_mng.start("1.2") ; map_gmc_type mapgmc( fusion::make_pair<vf::detail::gmc<0> >( __c ) ); //perf_mng.stop("1.2") ; //perf_mng.start("2.1") ; __expr.update( mapgmc ); //perf_mng.stop("2.1") ; //perf_mng.start("2.2") ; im->update( *__c ); //perf_mng.stop("2.2") ; //perf_mng.start("3") ; for ( uint16_type c1 = 0; c1 < eval::shape::M; ++c1 ) { for ( uint16_type c2 = 0; c2 < eval::shape::N; ++c2 ) { M_ret( c1,c2 ) += (*im)( __expr, c1, c2 ); } } //perf_mng.stop("3") ; } } perf_mng.stop("cpu") ; M_cpuTime = perf_mng.getValueInSeconds("cpu"); }