Esempio n. 1
0
inline
void print_bitmap( std::ostream & s , const hwloc_const_bitmap_t bitmap )
{
  s << "{" ;
  for ( int i = hwloc_bitmap_first( bitmap ) ;
        -1 != i ; i = hwloc_bitmap_next( bitmap , i ) ) {
    s << " " << i ;
  }
  s << " }" ;
}
Esempio n. 2
0
/*******************  FUNCTION  *********************/
int TopoHwloc::getFirstBitInBitmap(hwloc_bitmap_t bitmap) const
{
	int last = hwloc_bitmap_last(bitmap);
	int current = hwloc_bitmap_first(bitmap);
	assert(current != -1);
	while (current != last)
	{
		if (hwloc_bitmap_isset(bitmap,current))
			break;
		current = hwloc_bitmap_next(bitmap,current);
	}
	return current;
}
/* NTH: this is no longer used but may be used if we can determine the binding policy*/
static int mca_sbgp_map_to_logical_socket_id(int *socket)
{
    int ret = OMPI_SUCCESS;
    hwloc_obj_t obj;
    hwloc_obj_t first_pu_object;
    hwloc_bitmap_t good;
    int pu_os_index = -1, my_logical_socket_id = -1;
    int this_pus_logical_socket_id = -1;

    *socket = my_logical_socket_id;

    /* bozo check */
    if (NULL == opal_hwloc_topology) {
        return OPAL_ERR_NOT_INITIALIZED;
    }

    good = hwloc_bitmap_alloc();
    if (NULL == good) {
        return OPAL_ERR_OUT_OF_RESOURCE;
    }

    /* get this process' CPU binding */
    if( 0 !=  hwloc_get_cpubind(opal_hwloc_topology,good, 0)){
        /* report some error */
        BASESMSOCKET_VERBOSE(10, "The global variable opal_hwloc_topology appears not to have been initialized\n");
        hwloc_bitmap_free(good);
        return OMPI_ERROR;
    }

    /* find the first logical PU object in the hwloc tree */
    first_pu_object = hwloc_get_obj_by_type(opal_hwloc_topology, HWLOC_OBJ_PU, 0);


    /* get the next bit in the bitmap (note: if pu_os_index == -1, then the 
     * first bit is returned 
     */
     /* traverse the hwloc tree */
     while( -1 != (pu_os_index = hwloc_bitmap_next(good, pu_os_index) ) ) {
         /* Traverse all PUs in the machine in logical order, in the simple case 
          * there should only be a single PU that this process is bound to, right?
          *
          */
          for( obj = first_pu_object; obj != NULL; obj = obj->next_cousin ) {/* WTF is a "next_cousin" ? */ 
              /* is this PU the same as the bit I pulled off the mask? */
              if( obj->os_index == (unsigned int) pu_os_index) {
                  /* Then I found it, break out of for loop */
                  break;
              }
          }

          if( NULL != obj) {
              /* if we found the PU, then go upward in the tree
               * looking for the enclosing socket 
               */
               while( (NULL != obj) && ( HWLOC_OBJ_SOCKET != obj->type) ){
                   obj = obj->parent;
               }

               if( NULL == obj ) {
                   /* then we couldn't find an enclosing socket, report this */
               } else {
                   /* We found the enclosing socket */
                   if( -1 == my_logical_socket_id ){
                       /* this is the first PU that I'm bound to */
                       this_pus_logical_socket_id = obj->logical_index;
                       my_logical_socket_id = this_pus_logical_socket_id;
                   } else {
                       /* this is not the first PU that I'm bound to. 
                        * Seems I'm bound to more than a single PU. Question
                        * is, am I bound to the same socket?? 
                        */
                       /* in order to get rid of the compiler warning, I had to cast 
                        * "this_pus_logical_socket_id", at a glance this seems ok, 
                        * but if subgrouping problems arise, maybe look here. I shall 
                        * tag this line with the "mark of the beast" for grepability
                        * 666
                        */  
                        if( (unsigned int) this_pus_logical_socket_id != obj->logical_index ){
                            /* 666 */
                            /* Then we're bound to more than one socket...fail */
                            this_pus_logical_socket_id = -1;
                            my_logical_socket_id = -1;
                            break;
                        }
                   }
               }

          }

          /* end while */
     }
     *socket = my_logical_socket_id;
     hwloc_bitmap_free(good);

     return ret;

}
Esempio n. 4
0
int main(void)
{
    hwloc_bitmap_t set;

    /* check an empty bitmap */
    set = hwloc_bitmap_alloc();
    assert(hwloc_bitmap_to_ulong(set) == 0UL);
    assert(hwloc_bitmap_to_ith_ulong(set, 0) == 0UL);
    assert(hwloc_bitmap_to_ith_ulong(set, 1) == 0UL);
    assert(hwloc_bitmap_to_ith_ulong(set, 23) == 0UL);
    /* check a non-empty bitmap */
    hwloc_bitmap_from_ith_ulong(set, 4, 0xff);
    assert(hwloc_bitmap_to_ith_ulong(set, 4) == 0xff);
    assert(hwloc_bitmap_to_ulong(set) == 0UL);
    assert(hwloc_bitmap_to_ith_ulong(set, 0) == 0UL);
    assert(hwloc_bitmap_to_ith_ulong(set, 1) == 0UL);
    assert(hwloc_bitmap_to_ith_ulong(set, 23) == 0UL);
    /* check a zeroed bitmap */
    hwloc_bitmap_zero(set);
    assert(hwloc_bitmap_to_ulong(set) == 0UL);
    assert(hwloc_bitmap_to_ith_ulong(set, 0) == 0UL);
    assert(hwloc_bitmap_to_ith_ulong(set, 1) == 0UL);
    assert(hwloc_bitmap_to_ith_ulong(set, 4) == 0UL);
    assert(hwloc_bitmap_to_ith_ulong(set, 23) == 0UL);
    hwloc_bitmap_free(set);

    /* check a full bitmap */
    set = hwloc_bitmap_alloc_full();
    assert(hwloc_bitmap_to_ulong(set) == ~0UL);
    assert(hwloc_bitmap_to_ith_ulong(set, 0) == ~0UL);
    assert(hwloc_bitmap_to_ith_ulong(set, 1) == ~0UL);
    assert(hwloc_bitmap_to_ith_ulong(set, 23) == ~0UL);
    /* check a almost full bitmap */
    hwloc_bitmap_set_ith_ulong(set, 4, 0xff);
    assert(hwloc_bitmap_to_ith_ulong(set, 4) == 0xff);
    assert(hwloc_bitmap_to_ulong(set) == ~0UL);
    assert(hwloc_bitmap_to_ith_ulong(set, 0) == ~0UL);
    assert(hwloc_bitmap_to_ith_ulong(set, 1) == ~0UL);
    assert(hwloc_bitmap_to_ith_ulong(set, 23) == ~0UL);
    /* check a almost empty bitmap */
    hwloc_bitmap_from_ith_ulong(set, 4, 0xff);
    assert(hwloc_bitmap_to_ith_ulong(set, 4) == 0xff);
    assert(hwloc_bitmap_to_ulong(set) == 0UL);
    assert(hwloc_bitmap_to_ith_ulong(set, 0) == 0UL);
    assert(hwloc_bitmap_to_ith_ulong(set, 1) == 0UL);
    assert(hwloc_bitmap_to_ith_ulong(set, 23) == 0UL);
    hwloc_bitmap_free(set);

    /* check ranges */
    set = hwloc_bitmap_alloc();
    assert(hwloc_bitmap_weight(set) == 0);
    /* 23-45 */
    hwloc_bitmap_set_range(set, 23, 45);
    assert(hwloc_bitmap_weight(set) == 23);
    /* 23-45,78- */
    hwloc_bitmap_set_range(set, 78, -1);
    assert(hwloc_bitmap_weight(set) == -1);
    /* 23- */
    hwloc_bitmap_set_range(set, 44, 79);
    assert(hwloc_bitmap_weight(set) == -1);
    assert(hwloc_bitmap_first(set) == 23);
    assert(!hwloc_bitmap_isfull(set));
    /* 0- */
    hwloc_bitmap_set_range(set, 0, 22);
    assert(hwloc_bitmap_weight(set) == -1);
    assert(hwloc_bitmap_isfull(set));
    /* 0-34,57- */
    hwloc_bitmap_clr_range(set, 35, 56);
    assert(hwloc_bitmap_weight(set) == -1);
    assert(!hwloc_bitmap_isfull(set));
    /* 0-34,57 */
    hwloc_bitmap_clr_range(set, 58, -1);
    assert(hwloc_bitmap_weight(set) == 36);
    assert(hwloc_bitmap_last(set) == 57);
    assert(hwloc_bitmap_next(set, 34) == 57);
    /* 0-34 */
    hwloc_bitmap_clr(set, 57);
    assert(hwloc_bitmap_weight(set) == 35);
    assert(hwloc_bitmap_last(set) == 34);
    /* empty */
    hwloc_bitmap_clr_range(set, 0, 34);
    assert(hwloc_bitmap_weight(set) == 0);
    assert(hwloc_bitmap_first(set) == -1);
    hwloc_bitmap_free(set);

    return 0;
}
int main(void)
{
  hwloc_bitmap_t set;
  int i, cpu, expected_cpu = 0;

  /* empty set */
  set = hwloc_bitmap_alloc();
  assert(hwloc_bitmap_first(set) == -1);
  assert(hwloc_bitmap_last(set) == -1);
  assert(hwloc_bitmap_next(set, 0) == -1);
  assert(hwloc_bitmap_next(set, -1) == -1);
  assert(hwloc_bitmap_weight(set) == 0);

  /* full set */
  hwloc_bitmap_fill(set);
  assert(hwloc_bitmap_first(set) == 0);
  assert(hwloc_bitmap_last(set) == -1);
  assert(hwloc_bitmap_next(set, -1) == 0);
  assert(hwloc_bitmap_next(set, 0) == 1);
  assert(hwloc_bitmap_next(set, 1) == 2);
  assert(hwloc_bitmap_next(set, 2) == 3);
  assert(hwloc_bitmap_next(set, 30) == 31);
  assert(hwloc_bitmap_next(set, 31) == 32);
  assert(hwloc_bitmap_next(set, 32) == 33);
  assert(hwloc_bitmap_next(set, 62) == 63);
  assert(hwloc_bitmap_next(set, 63) == 64);
  assert(hwloc_bitmap_next(set, 64) == 65);
  assert(hwloc_bitmap_next(set, 12345) == 12346);
  assert(hwloc_bitmap_weight(set) == -1);

  /* custom sets */
  hwloc_bitmap_zero(set);
  hwloc_bitmap_set_range(set, 36, 59);
  assert(hwloc_bitmap_first(set) == 36);
  assert(hwloc_bitmap_last(set) == 59);
  assert(hwloc_bitmap_next(set, -1) == 36);
  assert(hwloc_bitmap_next(set, 0) == 36);
  assert(hwloc_bitmap_next(set, 36) == 37);
  assert(hwloc_bitmap_next(set, 59) == -1);
  assert(hwloc_bitmap_weight(set) == 24);
  hwloc_bitmap_set_range(set, 136, 259);
  assert(hwloc_bitmap_first(set) == 36);
  assert(hwloc_bitmap_last(set) == 259);
  assert(hwloc_bitmap_next(set, 59) == 136);
  assert(hwloc_bitmap_next(set, 259) == -1);
  assert(hwloc_bitmap_weight(set) == 148);
  hwloc_bitmap_clr(set, 199);
  assert(hwloc_bitmap_first(set) == 36);
  assert(hwloc_bitmap_last(set) == 259);
  assert(hwloc_bitmap_next(set, 198) == 200);
  assert(hwloc_bitmap_next(set, 199) == 200);
  assert(hwloc_bitmap_weight(set) == 147);

  i = 0;
  hwloc_bitmap_foreach_begin(cpu, set) {
    if (0 <= i && i < 24)
      expected_cpu = i + 36;
    else if (24 <= i && i < 87)
      expected_cpu = i + 112;
    else if (87 <= i && i < 147)
      expected_cpu = i + 113;

    assert(expected_cpu == cpu);

    i++;
  } hwloc_bitmap_foreach_end();

  hwloc_bitmap_free(set);

  return 0;
}
Esempio n. 6
0
        void computeCPUOMP(int threadId, expression_type * expr, im_type * im, element_iterator * elt_it, std::vector<std::pair<element_iterator, element_iterator> > * elts)
        {
            char * a;
            int cid;
            std::ostringstream oss;

#if 0
            hwloc_cpuset_t set = nullptr;

            /* get a cpuset object */
            set = hwloc_bitmap_alloc();

            /* Get the cpu thread affinity info of the current process/thread */
            hwloc_get_cpubind(Environment::getHwlocTopology(), set, 0);
            hwloc_bitmap_asprintf(&a, set);
            oss << a;
            free(a); 
            
            cid = hwloc_bitmap_first(set);
            oss << "(";
            while(cid != -1)
            {
                oss << cid << " ";
                cid = hwloc_bitmap_next(set, cid);
            }
            oss << ")|";
            std::cout << Environment::worldComm().rank() << "|" << M_threadId << " " << oss.str() << std::endl;

            /* Get the latest core location of the current process/thread */
            hwloc_get_last_cpu_location(Environment::getHwlocTopology(), set, 0);
            hwloc_bitmap_asprintf(&a, set);
            oss << a;
            free(a);

            cid = hwloc_bitmap_first(set);
            oss << "(";
            while(cid != -1)
            {
                oss << cid << " ";
                cid = hwloc_bitmap_next(set, cid);
            }
            oss << ");";
            std::cout << Environment::worldComm().rank() << "|" << M_threadId << " " << oss.str() << std::endl;
#endif

#if defined(FEELPP_HAS_HARTS)
            perf_mng.init("cpu") ;
            perf_mng.start("cpu") ;
            perf_mng.init("1.1") ;
            perf_mng.init("1.2") ;
            perf_mng.init("2.1") ;
            perf_mng.init("2.2") ;
            perf_mng.init("3") ;
#endif
            
            //M_gm((*elt_it)->gm());
            gm_ptrtype gm = (*elt_it)->gm();
            //M_geopc(new typename eval::gmpc_type( M_gm, im->points() ));
            typename eval::gmpc_ptrtype __geopc( new typename eval::gmpc_type(gm, im->points()) );
            //M_c(new gmc_type( M_gm, *(*elt_it), M_geopc ));
            gmc_ptrtype __c( new gmc_type( gm, *(*elt_it), __geopc ) );
            //M_expr( (*expr), map_gmc_type( fusion::make_pair<vf::detail::gmc<0> >( M_c ) ) );
            eval_expr_type __expr( (*expr), map_gmc_type( fusion::make_pair<vf::detail::gmc<0> >( __c ) ) );


            for (int i = 0; i < elts->size(); i++)
            {
                /*
                std::cout << Environment::worldComm().rank() <<  " nbItems: " << elts->size() 
                          << " nbElts " << std::distance(elts->at(i), elts->at(i+1))
                          << " 1st id " << elts->at(i)->id() << std::endl;
                */

                //std::cout << Environment::worldComm().rank() << "|" << theadId << " fid=" elts.at(i).first.id() << std::endl;
                for ( auto _elt = elts->at(i).first; _elt != elts->at(i).second; ++_elt )
                {
                    //perf_mng.start("1.1") ;
                    __c->update( *_elt );
                    //perf_mng.stop("1.1") ;
                    //perf_mng.start("1.2") ;
                    map_gmc_type mapgmc( fusion::make_pair<vf::detail::gmc<0> >( __c ) );
                    //perf_mng.stop("1.2") ;

                    //perf_mng.start("2.1") ;
                    __expr.update( mapgmc );
                    //perf_mng.stop("2.1") ;
                    //perf_mng.start("2.2") ;
                    im->update( *__c );
                    //perf_mng.stop("2.2") ;

                    //perf_mng.start("3") ;
                    for ( uint16_type c1 = 0; c1 < eval::shape::M; ++c1 )
                    {
                        for ( uint16_type c2 = 0; c2 < eval::shape::N; ++c2 )
                        {
                            M_ret( c1,c2 ) += (*im)( __expr, c1, c2 );
                        }
                    }
                    //perf_mng.stop("3") ;
                }
            }

#if defined(FEELPP_HAS_HARTS)
            perf_mng.stop("cpu") ;
            M_cpuTime = perf_mng.getValueInSeconds("cpu");
#endif
        }
Esempio n. 7
0
        void computeCPU(DataArgsType& args)
        {
            char * a;
            int cid;
            hwloc_cpuset_t set = nullptr;
            std::ostringstream oss;
            
            /* This initialization takes some time */
            /* When using hartsi, the object instanciation is done when creating tasks */
            /* and this is not a parallel section, thus we lose time in initialization */
            /* doing it the computation step allows to incorporate this init time in the parallel section */
            /*
            M_threadId( threadId ),
            M_gm( new gm_type( *_elt.gm() ) ),
            M_geopc( new gmpc_type( M_gm, _im.points() ) ),
            M_c( new gmc_type( M_gm, _elt, M_geopc ) ),
            M_expr( _expr, map_gmc_type( fusion::make_pair<vf::detail::gmc<0> >( M_c ) ) ),
            M_im( _im ),
            M_ret( eval::matrix_type::Zero() ),
            M_cpuTime( 0.0 )
            */

#if 0
            /* get a cpuset object */
            set = hwloc_bitmap_alloc();

            /* Get the cpu thread affinity info of the current process/thread */
            hwloc_get_cpubind(Environment::getHwlocTopology(), set, 0);
            hwloc_bitmap_asprintf(&a, set);
            oss << a;
            free(a); 
            
            cid = hwloc_bitmap_first(set);
            oss << "(";
            while(cid != -1)
            {
                oss << cid << " ";
                cid = hwloc_bitmap_next(set, cid);
            }
            oss << ")|";
            std::cout << Environment::worldComm().rank() << "|" << M_threadId << " " << oss.str() << std::endl;

            /* Get the latest core location of the current process/thread */
            hwloc_get_last_cpu_location(Environment::getHwlocTopology(), set, 0);
            hwloc_bitmap_asprintf(&a, set);
            oss << a;
            free(a);

            cid = hwloc_bitmap_first(set);
            oss << "(";
            while(cid != -1)
            {
                oss << cid << " ";
                cid = hwloc_bitmap_next(set, cid);
            }
            oss << ");";
            std::cout << Environment::worldComm().rank() << "|" << M_threadId << " " << oss.str() << std::endl;
#endif

            perf_mng.init("1.1") ;
            perf_mng.init("1.1") ;
            perf_mng.init("2.1") ;
            perf_mng.init("2.2") ;
            perf_mng.init("3") ;

            /* free memory */
            if(set != nullptr)
            {
                hwloc_bitmap_free(set);
            }

            //perf_mng.init("data") ;
            //perf_mng.start("data") ;

            // DEFINE the range to be iterated on
            std::vector<std::pair<element_iterator, element_iterator> > * elts =
                args.get("elements")->get<std::vector<std::pair<element_iterator, element_iterator> > >();

            int * threadId = args.get("threadId")->get<int>();
            expression_type * expr = args.get("expr")->get<expression_type>();
            im_type * im = args.get("im")->get<im_type>();
            element_iterator * elt_it = args.get("elt")->get<element_iterator>();
            
            //M_gm((*elt_it)->gm());
            gm_ptrtype gm = (*elt_it)->gm();
            //M_geopc(new typename eval::gmpc_type( M_gm, im->points() ));
            typename eval::gmpc_ptrtype __geopc( new typename eval::gmpc_type(gm, im->points()) );
            //M_c(new gmc_type( M_gm, *(*elt_it), M_geopc ));
            gmc_ptrtype __c( new gmc_type( gm, *(*elt_it), __geopc ) );
            //M_expr( (*expr), map_gmc_type( fusion::make_pair<vf::detail::gmc<0> >( M_c ) ) );
            eval_expr_type __expr( (*expr), map_gmc_type( fusion::make_pair<vf::detail::gmc<0> >( __c ) ) );

            //perf_mng.stop("data");

            perf_mng.init("cpu") ;
            perf_mng.start("cpu") ;

            for (int i = 0; i < elts->size(); i++)
            {
                //std::cout << Environment::worldComm().rank() <<  " nbItems: " << elts->size() << " nbElts " << std::distance(elts->at(i), elts->at(i+1)) << std::endl;
                for ( auto _elt = elts->at(i).first; _elt != elts->at(i).second; ++_elt )
                {
                    //perf_mng.start("1.1") ;
                    //M_c->update( *_elt );
                    __c->update( *_elt );
                    //perf_mng.stop("1.1") ;
                    //perf_mng.start("1.2") ;
                    map_gmc_type mapgmc( fusion::make_pair<vf::detail::gmc<0> >( __c ) );
                    //perf_mng.stop("1.2") ;

                    //perf_mng.start("2.1") ;
                    __expr.update( mapgmc );
                    //perf_mng.stop("2.1") ;
                    //perf_mng.start("2.2") ;
                    im->update( *__c );
                    //perf_mng.stop("2.2") ;

                    //perf_mng.start("3") ;
                    for ( uint16_type c1 = 0; c1 < eval::shape::M; ++c1 )
                    {
                        for ( uint16_type c2 = 0; c2 < eval::shape::N; ++c2 )
                        {
                            M_ret( c1,c2 ) += (*im)( __expr, c1, c2 );
                        }
                    }
                    //perf_mng.stop("3") ;
                }
            }

            perf_mng.stop("cpu") ;
            M_cpuTime = perf_mng.getValueInSeconds("cpu");
        }