static int _get_cpuinfo(uint32_t *nsockets, uint32_t *ncores,
			uint32_t *nthreads, uint32_t *npus)
{
	hwloc_topology_t topology;

	if (hwloc_topology_init(&topology)) {
		/* error in initialize hwloc library */
		error("%s: hwloc_topology_init() failed", __func__);
		return -1;
	}
	/* parse full system info */
	hwloc_topology_set_flags(topology, HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM);
	/* ignores cache, misc */
	hwloc_topology_ignore_type (topology, HWLOC_OBJ_CACHE);
	hwloc_topology_ignore_type (topology, HWLOC_OBJ_MISC);
	/* load topology */
	if (hwloc_topology_load(topology)) {
		error("%s: hwloc_topology_load() failed", __func__);
		hwloc_topology_destroy(topology);
		return -1;
	}

	*nsockets = (uint32_t) hwloc_get_nbobjs_by_type(topology,
							HWLOC_OBJ_SOCKET);
	*ncores = (uint32_t) hwloc_get_nbobjs_by_type(topology,
						      HWLOC_OBJ_CORE);
	*nthreads = (uint32_t) hwloc_get_nbobjs_by_type(topology,
							HWLOC_OBJ_PU);
	*npus = (uint32_t) hwloc_get_nbobjs_by_type(topology,
						    HWLOC_OBJ_PU);
	hwloc_topology_destroy(topology);
	return 0;
}
Exemple #2
0
int
main (int argc, char *argv[])
{
  int err;
  hwloc_topology_t topology;
  const char *filename = NULL;
  unsigned long flags = HWLOC_TOPOLOGY_FLAG_IO_DEVICES | HWLOC_TOPOLOGY_FLAG_IO_BRIDGES | HWLOC_TOPOLOGY_FLAG_ICACHES;
  unsigned long restrict_flags = 0;
  int merge = 0;
  int ignorecache = 0;
  char * callname;
  char * input = NULL;
  enum hwloc_utils_input_format input_format = HWLOC_UTILS_INPUT_DEFAULT;
  enum output_format output_format = LSTOPO_OUTPUT_DEFAULT;
  char *restrictstring = NULL;
  struct lstopo_output loutput;
  int opt;
  unsigned i;

  loutput.overwrite = 0;
  loutput.logical = -1;
  loutput.legend = 1;
  loutput.verbose_mode = LSTOPO_VERBOSE_MODE_DEFAULT;

  for(i=0; i<HWLOC_OBJ_TYPE_MAX; i++)
    force_orient[i] = LSTOPO_ORIENT_NONE;
  force_orient[HWLOC_OBJ_PU] = LSTOPO_ORIENT_HORIZ;
  force_orient[HWLOC_OBJ_CACHE] = LSTOPO_ORIENT_HORIZ;
  force_orient[HWLOC_OBJ_NUMANODE] = LSTOPO_ORIENT_HORIZ;

  /* enable verbose backends */
  putenv("HWLOC_XML_VERBOSE=1");
  putenv("HWLOC_SYNTHETIC_VERBOSE=1");

#ifdef HAVE_SETLOCALE
  setlocale(LC_ALL, "");
#endif

  callname = strrchr(argv[0], '/');
  if (!callname)
    callname = argv[0];
  else
    callname++;
  /* skip argv[0], handle options */
  argc--;
  argv++;

  err = hwloc_topology_init (&topology);
  if (err)
    return EXIT_FAILURE;

  while (argc >= 1)
    {
      opt = 0;
      if (!strcmp (argv[0], "-v") || !strcmp (argv[0], "--verbose")) {
	loutput.verbose_mode++;
      } else if (!strcmp (argv[0], "-s") || !strcmp (argv[0], "--silent")) {
	loutput.verbose_mode--;
      } else if (!strcmp (argv[0], "-h") || !strcmp (argv[0], "--help")) {
	usage(callname, stdout);
        exit(EXIT_SUCCESS);
      } else if (!strcmp (argv[0], "-f") || !strcmp (argv[0], "--force"))
	loutput.overwrite = 1;
      else if (!strcmp (argv[0], "-l") || !strcmp (argv[0], "--logical"))
	loutput.logical = 1;
      else if (!strcmp (argv[0], "-p") || !strcmp (argv[0], "--physical"))
	loutput.logical = 0;
      else if (!strcmp (argv[0], "-c") || !strcmp (argv[0], "--cpuset"))
	lstopo_show_cpuset = 1;
      else if (!strcmp (argv[0], "-C") || !strcmp (argv[0], "--cpuset-only"))
	lstopo_show_cpuset = 2;
      else if (!strcmp (argv[0], "--taskset")) {
	lstopo_show_taskset = 1;
	if (!lstopo_show_cpuset)
	  lstopo_show_cpuset = 1;
      } else if (!strcmp (argv[0], "--only")) {
	if (argc < 2) {
	  usage (callname, stderr);
	  exit(EXIT_FAILURE);
	}
        if (hwloc_obj_type_sscanf(argv[1], &lstopo_show_only, NULL, NULL, 0) < 0)
	  fprintf(stderr, "Unsupported type `%s' passed to --only, ignoring.\n", argv[1]);
	opt = 1;
      }
      else if (!strcmp (argv[0], "--ignore")) {
	hwloc_obj_type_t type;
	if (argc < 2) {
	  usage (callname, stderr);
	  exit(EXIT_FAILURE);
	}
	if (hwloc_obj_type_sscanf(argv[1], &type, NULL, NULL, 0) < 0)
	  fprintf(stderr, "Unsupported type `%s' passed to --ignore, ignoring.\n", argv[1]);
	else if (type == HWLOC_OBJ_PU)
	  lstopo_ignore_pus = 1;
	else
	  hwloc_topology_ignore_type(topology, type);
	opt = 1;
      }
      else if (!strcmp (argv[0], "--no-caches"))
	ignorecache = 2;
      else if (!strcmp (argv[0], "--no-useless-caches"))
	ignorecache = 1;
      else if (!strcmp (argv[0], "--no-icaches"))
	flags &= ~HWLOC_TOPOLOGY_FLAG_ICACHES;
      else if (!strcmp (argv[0], "--whole-system"))
	flags |= HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM;
      else if (!strcmp (argv[0], "--no-io"))
	flags &= ~(HWLOC_TOPOLOGY_FLAG_IO_DEVICES | HWLOC_TOPOLOGY_FLAG_IO_BRIDGES);
      else if (!strcmp (argv[0], "--no-bridges"))
	flags &= ~(HWLOC_TOPOLOGY_FLAG_IO_BRIDGES);
      else if (!strcmp (argv[0], "--whole-io"))
	flags |= HWLOC_TOPOLOGY_FLAG_WHOLE_IO;
      else if (!strcmp (argv[0], "--merge"))
	merge = 1;
      else if (!strcmp (argv[0], "--no-collapse"))
	lstopo_collapse = 0;
      else if (!strcmp (argv[0], "--thissystem"))
	flags |= HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM;
      else if (!strcmp (argv[0], "--restrict")) {
	if (argc < 2) {
	  usage (callname, stderr);
	  exit(EXIT_FAILURE);
	}
	restrictstring = strdup(argv[1]);
	opt = 1;
      }
      else if (!strcmp (argv[0], "--restrict-flags")) {
	if (argc < 2) {
	  usage (callname, stderr);
	  exit(EXIT_FAILURE);
	}
	restrict_flags = (unsigned long) strtoull(argv[1], NULL, 0);
	opt = 1;
      }
      else if (!strcmp (argv[0], "--export-synthetic-flags")) {
	if (argc < 2) {
	  usage (callname, stderr);
	  exit(EXIT_FAILURE);
	}
	lstopo_export_synthetic_flags = (unsigned long) strtoull(argv[1], NULL, 0);
	opt = 1;
      }
      else if (!strcmp (argv[0], "--horiz"))
	for(i=0; i<HWLOC_OBJ_TYPE_MAX; i++)
	  force_orient[i] = LSTOPO_ORIENT_HORIZ;
      else if (!strcmp (argv[0], "--vert"))
	for(i=0; i<HWLOC_OBJ_TYPE_MAX; i++)
	  force_orient[i] = LSTOPO_ORIENT_VERT;
      else if (!strcmp (argv[0], "--rect"))
	for(i=0; i<HWLOC_OBJ_TYPE_MAX; i++)
	  force_orient[i] = LSTOPO_ORIENT_RECT;
      else if (!strncmp (argv[0], "--horiz=", 8)
	       || !strncmp (argv[0], "--vert=", 7)
	       || !strncmp (argv[0], "--rect=", 7)) {
	enum lstopo_orient_e orient = (argv[0][2] == 'h') ? LSTOPO_ORIENT_HORIZ : (argv[0][2] == 'v') ? LSTOPO_ORIENT_VERT : LSTOPO_ORIENT_RECT;
	char *tmp = argv[0] + ((argv[0][2] == 'h') ? 8 : 7);
	while (tmp) {
	  char *end = strchr(tmp, ',');
	  hwloc_obj_type_t type;
	  if (end)
	    *end = '\0';
	  if (hwloc_obj_type_sscanf(tmp, &type, NULL, NULL, 0) < 0)
	    fprintf(stderr, "Unsupported type `%s' passed to %s, ignoring.\n", tmp, argv[0]);
	  else
	    force_orient[type] = orient;
	  if (!end)
	    break;
	  tmp = end+1;
        }
      }

      else if (!strcmp (argv[0], "--fontsize")) {
	if (argc < 2) {
	  usage (callname, stderr);
	  exit(EXIT_FAILURE);
	}
	fontsize = atoi(argv[1]);
	opt = 1;
      }
      else if (!strcmp (argv[0], "--gridsize")) {
	if (argc < 2) {
	  usage (callname, stderr);
	  exit(EXIT_FAILURE);
	}
	gridsize = atoi(argv[1]);
	opt = 1;
      }
      else if (!strcmp (argv[0], "--no-legend")) {
	loutput.legend = 0;
      }
      else if (!strcmp (argv[0], "--append-legend")) {
	char **tmp;
	if (argc < 2) {
	  usage (callname, stderr);
	  exit(EXIT_FAILURE);
	}
	tmp = realloc(lstopo_append_legends, (lstopo_append_legends_nr+1) * sizeof(*lstopo_append_legends));
	if (!tmp) {
	  fprintf(stderr, "Failed to realloc legend append array, legend ignored.\n");
	} else {
	  lstopo_append_legends = tmp;
	  lstopo_append_legends[lstopo_append_legends_nr] = strdup(argv[1]);
	  lstopo_append_legends_nr++;
	}
	opt = 1;
      }

      else if (hwloc_utils_lookup_input_option(argv, argc, &opt,
					       &input, &input_format,
					       callname)) {
	/* nothing to do anymore */

      } else if (!strcmp (argv[0], "--pid")) {
	if (argc < 2) {
	  usage (callname, stderr);
	  exit(EXIT_FAILURE);
	}
	lstopo_pid_number = atoi(argv[1]); opt = 1;
      } else if (!strcmp (argv[0], "--ps") || !strcmp (argv[0], "--top"))
        top = 1;
      else if (!strcmp (argv[0], "--version")) {
          printf("%s %s\n", callname, HWLOC_VERSION);
          exit(EXIT_SUCCESS);
      } else if (!strcmp (argv[0], "--output-format") || !strcmp (argv[0], "--of")) {
	if (argc < 2) {
	  usage (callname, stderr);
	  exit(EXIT_FAILURE);
	}
        output_format = parse_output_format(argv[1], callname);
        opt = 1;
      } else {
	if (filename) {
	  fprintf (stderr, "Unrecognized option: %s\n", argv[0]);
	  usage (callname, stderr);
	  exit(EXIT_FAILURE);
	} else
	  filename = argv[0];
      }
      argc -= opt+1;
      argv += opt+1;
    }

  if (lstopo_show_only != (hwloc_obj_type_t)-1)
    merge = 0;

  hwloc_topology_set_flags(topology, flags);

  if (ignorecache > 1) {
    hwloc_topology_ignore_type(topology, HWLOC_OBJ_CACHE);
  } else if (ignorecache) {
    hwloc_topology_ignore_type_keep_structure(topology, HWLOC_OBJ_CACHE);
  }
  if (merge)
    hwloc_topology_ignore_all_keep_structure(topology);

  if (input) {
    err = hwloc_utils_enable_input_format(topology, input, &input_format, loutput.verbose_mode > 1, callname);
    if (err)
      return err;
  }

  if (lstopo_pid_number > 0) {
    lstopo_pid = hwloc_pid_from_number(lstopo_pid_number, 0);
    if (hwloc_topology_set_pid(topology, lstopo_pid)) {
      perror("Setting target pid");
      return EXIT_FAILURE;
    }
  }

  /* if the output format wasn't enforced, look at the filename */
  if (filename && output_format == LSTOPO_OUTPUT_DEFAULT) {
    if (!strcmp(filename, "-")
	|| !strcmp(filename, "/dev/stdout")) {
      output_format = LSTOPO_OUTPUT_CONSOLE;
    } else {
      char *dot = strrchr(filename, '.');
      if (dot)
        output_format = parse_output_format(dot+1, callname);
      else {
	fprintf(stderr, "Cannot infer output type for file `%s' without any extension, using default output.\n", filename);
	filename = NULL;
      }
    }
  }

  /* if  the output format wasn't enforced, think a bit about what the user probably want */
  if (output_format == LSTOPO_OUTPUT_DEFAULT) {
    if (lstopo_show_cpuset
        || lstopo_show_only != (hwloc_obj_type_t)-1
        || loutput.verbose_mode != LSTOPO_VERBOSE_MODE_DEFAULT)
      output_format = LSTOPO_OUTPUT_CONSOLE;
  }

  if (input_format == HWLOC_UTILS_INPUT_XML
      && output_format == LSTOPO_OUTPUT_XML) {
    /* must be after parsing output format and before loading the topology */
    putenv("HWLOC_XML_USERDATA_NOT_DECODED=1");
    hwloc_topology_set_userdata_import_callback(topology, hwloc_utils_userdata_import_cb);
    hwloc_topology_set_userdata_export_callback(topology, hwloc_utils_userdata_export_cb);
  }

  err = hwloc_topology_load (topology);
  if (err) {
    fprintf(stderr, "hwloc_topology_load() failed (%s).\n", strerror(errno));
    return EXIT_FAILURE;
  }

  if (top)
    add_process_objects(topology);

  if (restrictstring) {
    hwloc_bitmap_t restrictset = hwloc_bitmap_alloc();
    if (!strcmp (restrictstring, "binding")) {
      if (lstopo_pid_number > 0)
	hwloc_get_proc_cpubind(topology, lstopo_pid, restrictset, HWLOC_CPUBIND_PROCESS);
      else
	hwloc_get_cpubind(topology, restrictset, HWLOC_CPUBIND_PROCESS);
    } else {
      hwloc_bitmap_sscanf(restrictset, restrictstring);
    }
    err = hwloc_topology_restrict (topology, restrictset, restrict_flags);
    if (err) {
      perror("Restricting the topology");
      /* fallthrough */
    }
    hwloc_bitmap_free(restrictset);
    free(restrictstring);
  }

  if (loutput.logical == -1) {
    if (output_format == LSTOPO_OUTPUT_CONSOLE)
      loutput.logical = 1;
    else if (output_format != LSTOPO_OUTPUT_DEFAULT)
      loutput.logical = 0;
  }

  loutput.topology = topology;
  loutput.file = NULL;

  lstopo_populate_userdata(hwloc_get_root_obj(topology));

  if (output_format != LSTOPO_OUTPUT_XML && lstopo_collapse)
    lstopo_add_collapse_attributes(topology);

  switch (output_format) {
    case LSTOPO_OUTPUT_DEFAULT:
#ifdef LSTOPO_HAVE_GRAPHICS
#if CAIRO_HAS_XLIB_SURFACE && defined HWLOC_HAVE_X11_KEYSYM
      if (getenv("DISPLAY")) {
        if (loutput.logical == -1)
          loutput.logical = 0;
        output_x11(&loutput, NULL);
      } else
#endif /* CAIRO_HAS_XLIB_SURFACE */
#ifdef HWLOC_WIN_SYS
      {
        if (loutput.logical == -1)
          loutput.logical = 0;
        output_windows(&loutput, NULL);
      }
#endif
#endif /* !LSTOPO_HAVE_GRAPHICS */
#if !defined HWLOC_WIN_SYS || !defined LSTOPO_HAVE_GRAPHICS
      {
        if (loutput.logical == -1)
          loutput.logical = 1;
        output_console(&loutput, NULL);
      }
#endif
      break;

    case LSTOPO_OUTPUT_CONSOLE:
      output_console(&loutput, filename);
      break;
    case LSTOPO_OUTPUT_SYNTHETIC:
      output_synthetic(&loutput, filename);
      break;
    case LSTOPO_OUTPUT_ASCII:
      output_ascii(&loutput, filename);
      break;
    case LSTOPO_OUTPUT_FIG:
      output_fig(&loutput, filename);
      break;
#ifdef LSTOPO_HAVE_GRAPHICS
# if CAIRO_HAS_PNG_FUNCTIONS
    case LSTOPO_OUTPUT_PNG:
      output_png(&loutput, filename);
      break;
# endif /* CAIRO_HAS_PNG_FUNCTIONS */
# if CAIRO_HAS_PDF_SURFACE
    case LSTOPO_OUTPUT_PDF:
      output_pdf(&loutput, filename);
      break;
# endif /* CAIRO_HAS_PDF_SURFACE */
# if CAIRO_HAS_PS_SURFACE
    case LSTOPO_OUTPUT_PS:
      output_ps(&loutput, filename);
      break;
#endif /* CAIRO_HAS_PS_SURFACE */
#if CAIRO_HAS_SVG_SURFACE
    case LSTOPO_OUTPUT_SVG:
      output_svg(&loutput, filename);
      break;
#endif /* CAIRO_HAS_SVG_SURFACE */
#endif /* LSTOPO_HAVE_GRAPHICS */
    case LSTOPO_OUTPUT_XML:
      output_xml(&loutput, filename);
      break;
    default:
      fprintf(stderr, "file format not supported\n");
      usage(callname, stderr);
      exit(EXIT_FAILURE);
  }

  lstopo_destroy_userdata(hwloc_get_root_obj(topology));
  hwloc_utils_userdata_free_recursive(hwloc_get_root_obj(topology));
  hwloc_topology_destroy (topology);

  for(i=0; i<lstopo_append_legends_nr; i++)
    free(lstopo_append_legends[i]);
  free(lstopo_append_legends);

  return EXIT_SUCCESS;
}
Exemple #3
0
extern int
get_cpuinfo(uint16_t *p_cpus, uint16_t *p_boards,
	    uint16_t *p_sockets, uint16_t *p_cores, uint16_t *p_threads,
	    uint16_t *p_block_map_size,
	    uint16_t **p_block_map, uint16_t **p_block_map_inv)
{
	enum { SOCKET=0, CORE=1, PU=2, LAST_OBJ=3 };
	hwloc_topology_t topology;
	hwloc_obj_t obj;
	hwloc_obj_type_t objtype[LAST_OBJ];
	unsigned idx[LAST_OBJ];
	int nobj[LAST_OBJ];
	int actual_cpus;
	int macid;
	int absid;
	int actual_boards = 1, depth;
	int i;

	debug2("hwloc_topology_init");
	if (hwloc_topology_init(&topology)) {
		/* error in initialize hwloc library */
		debug("hwloc_topology_init() failed.");
		return 1;
	}

	/* parse all system */
	hwloc_topology_set_flags(topology, HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM);

	/* ignores cache, misc */
	hwloc_topology_ignore_type (topology, HWLOC_OBJ_CACHE);
	hwloc_topology_ignore_type (topology, HWLOC_OBJ_MISC);

	/* load topology */
	debug2("hwloc_topology_load");
	if (hwloc_topology_load(topology)) {
		/* error in load hardware topology */
		debug("hwloc_topology_load() failed.");
		hwloc_topology_destroy(topology);
		return 2;
	}

	/* Some processors (e.g. AMD Opteron 6000 series) contain multiple
	 * NUMA nodes per socket. This is a configuration which does not map
	 * into the hardware entities that Slurm optimizes resource allocation
	 * for (PU/thread, core, socket, baseboard, node and network switch).
	 * In order to optimize resource allocations on such hardware, Slurm
	 * will consider each NUMA node within the socket as a separate socket.
	 * You can disable this configuring "SchedulerParameters=Ignore_NUMA",
	 * in which case Slurm will report the correct socket count on the node,
	 * but not be able to optimize resource allocations on the NUMA nodes.
	 */
	objtype[SOCKET] = HWLOC_OBJ_SOCKET;
	objtype[CORE]   = HWLOC_OBJ_CORE;
	objtype[PU]     = HWLOC_OBJ_PU;
	if (hwloc_get_type_depth(topology, HWLOC_OBJ_NODE) >
	    hwloc_get_type_depth(topology, HWLOC_OBJ_SOCKET)) {
		char *sched_params = slurm_get_sched_params();
		if (sched_params &&
		    strcasestr(sched_params, "Ignore_NUMA")) {
			info("Ignoring NUMA nodes within a socket");
		} else {
			info("Considering each NUMA node as a socket");
			objtype[SOCKET] = HWLOC_OBJ_NODE;
		}
		xfree(sched_params);
	}

	/* number of objects */
	depth = hwloc_get_type_depth(topology, HWLOC_OBJ_GROUP);
	if (depth != HWLOC_TYPE_DEPTH_UNKNOWN) {
		actual_boards = MAX(hwloc_get_nbobjs_by_depth(topology, depth),
				    1);
	}
	nobj[SOCKET] = hwloc_get_nbobjs_by_type(topology, objtype[SOCKET]);
	nobj[CORE]   = hwloc_get_nbobjs_by_type(topology, objtype[CORE]);
	/*
	 * Workaround for hwloc
	 * hwloc_get_nbobjs_by_type() returns 0 on some architectures.
	 */
	if ( nobj[SOCKET] == 0 ) {
		debug("get_cpuinfo() fudging nobj[SOCKET] from 0 to 1");
		nobj[SOCKET] = 1;
	}
	if ( nobj[CORE] == 0 ) {
		debug("get_cpuinfo() fudging nobj[CORE] from 0 to 1");
		nobj[CORE] = 1;
	}
	if ( nobj[SOCKET] == -1 )
		fatal("get_cpuinfo() can not handle nobj[SOCKET] = -1");
	if ( nobj[CORE] == -1 )
		fatal("get_cpuinfo() can not handle nobj[CORE] = -1");
	actual_cpus  = hwloc_get_nbobjs_by_type(topology, objtype[PU]);
#if 0
	/* Used to find workaround above */
	info("CORE = %d SOCKET = %d actual_cpus = %d nobj[CORE] = %d",
	     CORE, SOCKET, actual_cpus, nobj[CORE]);
#endif
	nobj[PU]     = actual_cpus/nobj[CORE];  /* threads per core */
	nobj[CORE]  /= nobj[SOCKET];            /* cores per socket */

	debug("CPUs:%d Boards:%u Sockets:%d CoresPerSocket:%d ThreadsPerCore:%d",
	      actual_cpus, actual_boards, nobj[SOCKET], nobj[CORE], nobj[PU]);

	/* allocate block_map */
	*p_block_map_size = (uint16_t)actual_cpus;
	if (p_block_map && p_block_map_inv) {
		*p_block_map     = xmalloc(actual_cpus * sizeof(uint16_t));
		*p_block_map_inv = xmalloc(actual_cpus * sizeof(uint16_t));

		/* initialize default as linear mapping */
		for (i = 0; i < actual_cpus; i++) {
			(*p_block_map)[i]     = i;
			(*p_block_map_inv)[i] = i;
		}
		/* create map with hwloc */
		for (idx[SOCKET]=0; idx[SOCKET]<nobj[SOCKET]; ++idx[SOCKET]) {
			for (idx[CORE]=0; idx[CORE]<nobj[CORE]; ++idx[CORE]) {
				for (idx[PU]=0; idx[PU]<nobj[PU]; ++idx[PU]) {
					/* get hwloc_obj by indexes */
					obj=hwloc_get_obj_below_array_by_type(
					            topology, 3, objtype, idx);
					if (!obj)
						continue;
					macid = obj->os_index;
					absid = idx[SOCKET]*nobj[CORE]*nobj[PU]
					      + idx[CORE]*nobj[PU]
					      + idx[PU];

					if ((macid >= actual_cpus) ||
					    (absid >= actual_cpus)) {
						/* physical or logical ID are
						 * out of range */
						continue;
					}
					debug4("CPU map[%d]=>%d", absid, macid);
					(*p_block_map)[absid]     = macid;
					(*p_block_map_inv)[macid] = absid;
				}
			 }
		}
	}

	hwloc_topology_destroy(topology);

	/* update output parameters */
	*p_cpus    = actual_cpus;
	*p_boards  = actual_boards;
	*p_sockets = nobj[SOCKET];
	*p_cores   = nobj[CORE];
	*p_threads = nobj[PU];

#if DEBUG_DETAIL
	/*** Display raw data ***/
	debug("CPUs:%u Boards:%u Sockets:%u CoresPerSocket:%u ThreadsPerCore:%u",
	      *p_cpus, *p_boards, *p_sockets, *p_cores, *p_threads);

	/* Display the mapping tables */
	if (p_block_map && p_block_map_inv) {
		debug("------");
		debug("Abstract -> Machine logical CPU ID block mapping:");
		debug("AbstractId PhysicalId Inverse");
		for (i = 0; i < *p_cpus; i++) {
			debug3("   %4d      %4u       %4u",
				i, (*p_block_map)[i], (*p_block_map_inv)[i]);
		}
		debug("------");
	}
#endif
	return 0;

}
Exemple #4
0
int
pocl_topology_detect_device_info(cl_device_id device)
{
  hwloc_topology_t pocl_topology;
  int ret = 0;

#ifdef HWLOC_API_2
  if (hwloc_get_api_version () < 0x20000)
    POCL_MSG_ERR ("pocl was compiled against libhwloc 2.x but is"
                  "actually running against libhwloc 1.x \n");
#else
  if (hwloc_get_api_version () >= 0x20000)
    POCL_MSG_ERR ("pocl was compiled against libhwloc 1.x but is"
                  "actually running against libhwloc 2.x \n");
#endif

  /*

   * hwloc's OpenCL backend causes problems at the initialization stage
   * because it reloads libpocl.so via the ICD loader.
   *
   * See: https://github.com/pocl/pocl/issues/261
   *
   * The only trick to stop hwloc from initializing the OpenCL plugin
   * I could find is to point the plugin search path to a place where there
   * are no plugins to be found.
   */
  setenv ("HWLOC_PLUGINS_PATH", "/dev/null", 1);

  ret = hwloc_topology_init (&pocl_topology);
  if (ret == -1)
  {
    POCL_MSG_ERR ("Cannot initialize the topology.\n");
    return ret;
  }

#ifdef HWLOC_API_2
  hwloc_topology_set_io_types_filter(pocl_topology, HWLOC_TYPE_FILTER_KEEP_NONE);
  hwloc_topology_set_type_filter (pocl_topology, HWLOC_OBJ_SYSTEM, HWLOC_TYPE_FILTER_KEEP_NONE);
  hwloc_topology_set_type_filter (pocl_topology, HWLOC_OBJ_GROUP, HWLOC_TYPE_FILTER_KEEP_NONE);
  hwloc_topology_set_type_filter (pocl_topology, HWLOC_OBJ_BRIDGE, HWLOC_TYPE_FILTER_KEEP_NONE);
  hwloc_topology_set_type_filter (pocl_topology, HWLOC_OBJ_MISC, HWLOC_TYPE_FILTER_KEEP_NONE);
  hwloc_topology_set_type_filter (pocl_topology, HWLOC_OBJ_PCI_DEVICE, HWLOC_TYPE_FILTER_KEEP_NONE);
  hwloc_topology_set_type_filter (pocl_topology, HWLOC_OBJ_OS_DEVICE, HWLOC_TYPE_FILTER_KEEP_NONE);
#else
  hwloc_topology_ignore_type (pocl_topology, HWLOC_TOPOLOGY_FLAG_WHOLE_IO);
  hwloc_topology_ignore_type (pocl_topology, HWLOC_OBJ_SYSTEM);
  hwloc_topology_ignore_type (pocl_topology, HWLOC_OBJ_GROUP);
  hwloc_topology_ignore_type (pocl_topology, HWLOC_OBJ_BRIDGE);
  hwloc_topology_ignore_type (pocl_topology, HWLOC_OBJ_MISC);
  hwloc_topology_ignore_type (pocl_topology, HWLOC_OBJ_PCI_DEVICE);
  hwloc_topology_ignore_type (pocl_topology, HWLOC_OBJ_OS_DEVICE);
#endif

  ret = hwloc_topology_load (pocl_topology);
  if (ret == -1)
  {
    POCL_MSG_ERR ("Cannot load the topology.\n");
    goto exit_destroy;
  }

#ifdef HWLOC_API_2
  device->global_mem_size =
      hwloc_get_root_obj(pocl_topology)->total_memory;
#else
  device->global_mem_size =
      hwloc_get_root_obj(pocl_topology)->memory.total_memory;
#endif

  // Try to get the number of CPU cores from topology
  int depth = hwloc_get_type_depth(pocl_topology, HWLOC_OBJ_PU);
  if(depth != HWLOC_TYPE_DEPTH_UNKNOWN)
    device->max_compute_units = hwloc_get_nbobjs_by_depth(pocl_topology, depth);

  /* Find information about global memory cache by looking at the first
   * cache covering the first PU */
  do {
      size_t cache_size = 0, cacheline_size = 0;

      hwloc_obj_t core
          = hwloc_get_next_obj_by_type (pocl_topology, HWLOC_OBJ_CORE, NULL);
      if (core)
        {
          hwloc_obj_t cache
              = hwloc_get_shared_cache_covering_obj (pocl_topology, core);
          if ((cache) && (cache->attr))
            {
              cacheline_size = cache->attr->cache.linesize;
              cache_size = cache->attr->cache.size;
            }
          else
            core = NULL; /* fallback to L1 cache size */
        }

      hwloc_obj_t pu
          = hwloc_get_next_obj_by_type (pocl_topology, HWLOC_OBJ_PU, NULL);
      if (!core && pu)
        {
          hwloc_obj_t cache
              = hwloc_get_shared_cache_covering_obj (pocl_topology, pu);
          if ((cache) && (cache->attr))
            {
              cacheline_size = cache->attr->cache.linesize;
              cache_size = cache->attr->cache.size;
            }
        }

      if (!cache_size || !cacheline_size)
        break;

      device->global_mem_cache_type
          = 0x2; // CL_READ_WRITE_CACHE, without including all of CL/cl.h
      device->global_mem_cacheline_size = cacheline_size;
      device->global_mem_cache_size = cache_size;
  } while (0);

  // Destroy topology object and return
exit_destroy:
  hwloc_topology_destroy (pocl_topology);
  return ret;

}
Exemple #5
0
extern int
get_cpuinfo(uint16_t *p_cpus, uint16_t *p_boards,
	    uint16_t *p_sockets, uint16_t *p_cores, uint16_t *p_threads,
	    uint16_t *p_block_map_size,
	    uint16_t **p_block_map, uint16_t **p_block_map_inv)
{
	enum { SOCKET=0, CORE=1, PU=2, LAST_OBJ=3 };
	hwloc_topology_t topology;
	hwloc_obj_t obj;
	hwloc_obj_type_t objtype[LAST_OBJ];
	unsigned idx[LAST_OBJ];
	int nobj[LAST_OBJ];
	int actual_cpus;
	int macid;
	int absid;
	int actual_boards = 1, depth;
	int i;

	debug2("hwloc_topology_init");
	if (hwloc_topology_init(&topology)) {
		/* error in initialize hwloc library */
		debug("hwloc_topology_init() failed.");
		return 1;
	}

	/* parse all system */
	hwloc_topology_set_flags(topology, HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM);

	/* ignores cache, misc */
	hwloc_topology_ignore_type (topology, HWLOC_OBJ_CACHE);
	hwloc_topology_ignore_type (topology, HWLOC_OBJ_MISC);

	/* load topology */
	debug2("hwloc_topology_load");
	if (hwloc_topology_load(topology)) {
		/* error in load hardware topology */
		debug("hwloc_topology_load() failed.");
		hwloc_topology_destroy(topology);
		return 2;
	}

	/* At least on a temporary basis, one could map AMD Bulldozer entities
	 * onto the entities that Slurm does optimize placement for today (e.g.
	 * map each Bulldozer core to a thread and each Bulldozer module to a
	 * Slurm core, alternately map the Bulldozer module to a Slurm socket
	 * and the Bulldozer socket to a Slurm board). Perhaps not ideal, but
	 * it would achieve the desired locality. */

	if ( hwloc_get_type_depth(topology, HWLOC_OBJ_NODE) >
	     hwloc_get_type_depth(topology, HWLOC_OBJ_SOCKET) ) {
		/* One socket contains multiple NUMA-nodes 
		 * like AMD Opteron 6000 series etc.
		 * In such case, use NUMA-node instead of socket. */
		objtype[SOCKET] = HWLOC_OBJ_NODE;
		objtype[CORE]   = HWLOC_OBJ_CORE;
		objtype[PU]     = HWLOC_OBJ_PU;
	} else {
		objtype[SOCKET] = HWLOC_OBJ_SOCKET;
		objtype[CORE]   = HWLOC_OBJ_CORE;
		objtype[PU]     = HWLOC_OBJ_PU;
	}

	/* number of objects */
	depth = hwloc_get_type_depth(topology, HWLOC_OBJ_GROUP);
	if (depth != HWLOC_TYPE_DEPTH_UNKNOWN) {
		actual_boards = MAX(hwloc_get_nbobjs_by_depth(topology, depth),
				    1);
	}
	nobj[SOCKET] = hwloc_get_nbobjs_by_type(topology, objtype[SOCKET]);
	nobj[CORE]   = hwloc_get_nbobjs_by_type(topology, objtype[CORE]);
	actual_cpus  = hwloc_get_nbobjs_by_type(topology, objtype[PU]);
	nobj[PU]     = actual_cpus/nobj[CORE];  /* threads per core */
	nobj[CORE]  /= nobj[SOCKET];            /* cores per socket */

	debug("CPUs:%d Boards:%u Sockets:%d CoresPerSocket:%d ThreadsPerCore:%d",
	      actual_cpus, actual_boards, nobj[SOCKET], nobj[CORE], nobj[PU]);

	/* allocate block_map */
	*p_block_map_size = (uint16_t)actual_cpus;
	if (p_block_map && p_block_map_inv) {
		*p_block_map     = xmalloc(actual_cpus * sizeof(uint16_t));
		*p_block_map_inv = xmalloc(actual_cpus * sizeof(uint16_t));

		/* initialize default as linear mapping */
		for (i = 0; i < actual_cpus; i++) {
			(*p_block_map)[i]     = i;
			(*p_block_map_inv)[i] = i;
		}
		
		/* create map with hwloc */
		for (idx[SOCKET]=0; idx[SOCKET]<nobj[SOCKET]; ++idx[SOCKET]) {
			for (idx[CORE]=0; idx[CORE]<nobj[CORE]; ++idx[CORE]) {
				for (idx[PU]=0; idx[PU]<nobj[PU]; ++idx[PU]) {
					/* get hwloc_obj by indexes */
					obj=hwloc_get_obj_below_array_by_type(
					            topology, 3, objtype, idx);
					if (!obj)
						continue;
					macid = obj->os_index;
					absid = idx[SOCKET]*nobj[CORE]*nobj[PU]
					      + idx[CORE]*nobj[PU]
					      + idx[PU];

					if ((macid >= actual_cpus) ||
					    (absid >= actual_cpus)) {
						/* physical or logical ID are
						 * out of range */
						continue;
					}
					debug4("CPU map[%d]=>%d", absid, macid);
					(*p_block_map)[absid]     = macid;
					(*p_block_map_inv)[macid] = absid;
				}
			 }
		}
	}

	hwloc_topology_destroy(topology);

	/* update output parameters */
	*p_cpus    = actual_cpus;
	*p_boards  = actual_boards;
	*p_sockets = nobj[SOCKET];
	*p_cores   = nobj[CORE];
	*p_threads = nobj[PU];

#if DEBUG_DETAIL
	/*** Display raw data ***/
	debug("CPUs:%u Boards:%u Sockets:%u CoresPerSocket:%u ThreadsPerCore:%u",
	      *p_cpus, *p_boards, *p_sockets, *p_cores, *p_threads);

	/* Display the mapping tables */
	if (p_block_map && p_block_map_inv) {
		debug("------");
		debug("Abstract -> Machine logical CPU ID block mapping:");
		debug("AbstractId PhysicalId Inverse");
		for (i = 0; i < *p_cpus; i++) {
			debug3("   %4d      %4u       %4u",
				i, (*p_block_map)[i], (*p_block_map_inv)[i]);
		}
		debug("------");
	}
#endif
	return 0;

}
Exemple #6
0
int main(void)
{
  hwloc_topology_t topology;
  hwloc_obj_t obj;

  hwloc_topology_init(&topology);
  hwloc_topology_set_flags(topology, HWLOC_TOPOLOGY_FLAG_WHOLE_IO);
  assert(-1 == hwloc_topology_ignore_type(topology, HWLOC_OBJ_PCI_DEVICE));
  assert(-1 == hwloc_topology_ignore_type_keep_structure(topology, HWLOC_OBJ_BRIDGE));
  assert(-1 == hwloc_topology_ignore_type(topology, HWLOC_OBJ_OS_DEVICE));
  hwloc_topology_load(topology);

  printf("Found %d bridges\n", hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_BRIDGE));
  obj = NULL;
  while ((obj = hwloc_get_next_bridge(topology, obj)) != NULL) {
    assert(obj->type == HWLOC_OBJ_BRIDGE);
    /* only host->pci and pci->pci bridge supported so far */
    if (obj->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_HOST) {
      assert(obj->attr->bridge.downstream_type == HWLOC_OBJ_BRIDGE_PCI);
      printf(" Found host->PCI bridge for domain %04x bus %02x-%02x\n",
	     obj->attr->bridge.downstream.pci.domain,
	     obj->attr->bridge.downstream.pci.secondary_bus,
	     obj->attr->bridge.downstream.pci.subordinate_bus);
    } else {
      assert(obj->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_PCI);
      assert(obj->attr->bridge.downstream_type == HWLOC_OBJ_BRIDGE_PCI);
      printf(" Found PCI->PCI bridge [%04x:%04x] for domain %04x bus %02x-%02x\n",
	     obj->attr->bridge.upstream.pci.vendor_id,
	     obj->attr->bridge.upstream.pci.device_id,
	     obj->attr->bridge.downstream.pci.domain,
	     obj->attr->bridge.downstream.pci.secondary_bus,
	     obj->attr->bridge.downstream.pci.subordinate_bus);
    }
  }

  printf("Found %d PCI devices\n", hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_PCI_DEVICE));
  obj = NULL;
  while ((obj = hwloc_get_next_pcidev(topology, obj)) != NULL) {
    assert(obj->type == HWLOC_OBJ_PCI_DEVICE);
    printf(" Found PCI device class %04x vendor %04x model %04x\n",
	   obj->attr->pcidev.class_id, obj->attr->pcidev.vendor_id, obj->attr->pcidev.device_id);
  }

  printf("Found %d OS devices\n", hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_OS_DEVICE));
  obj = NULL;
  while ((obj = hwloc_get_next_osdev(topology, obj)) != NULL) {
    assert(obj->type == HWLOC_OBJ_OS_DEVICE);
    printf(" Found OS device %s subtype %d\n", obj->name, obj->attr->osdev.type);
  }

  assert(HWLOC_TYPE_DEPTH_BRIDGE == hwloc_get_type_depth(topology, HWLOC_OBJ_BRIDGE));
  assert(HWLOC_TYPE_DEPTH_PCI_DEVICE == hwloc_get_type_depth(topology, HWLOC_OBJ_PCI_DEVICE));
  assert(HWLOC_TYPE_DEPTH_OS_DEVICE == hwloc_get_type_depth(topology, HWLOC_OBJ_OS_DEVICE));
  assert(hwloc_compare_types(HWLOC_OBJ_BRIDGE, HWLOC_OBJ_PCI_DEVICE) < 0);
  assert(hwloc_compare_types(HWLOC_OBJ_BRIDGE, HWLOC_OBJ_OS_DEVICE) < 0);
  assert(hwloc_compare_types(HWLOC_OBJ_PCI_DEVICE, HWLOC_OBJ_OS_DEVICE) < 0);

  /* check that hwloc_get_hostbridge_by_pcibus() and hwloc_get_non_io_ancestor_obj work fine */
  obj = NULL;
  while ((obj = hwloc_get_next_pcidev(topology, obj)) != NULL) {
    assert(hwloc_get_hostbridge_by_pcibus(topology,
					  obj->attr->pcidev.domain,
					  obj->attr->pcidev.bus)->parent
	   == hwloc_get_non_io_ancestor_obj(topology, obj));
  }

  hwloc_topology_destroy(topology);

  return 0;
}
Exemple #7
0
extern int
get_cpuinfo(uint16_t *p_cpus, uint16_t *p_boards,
	    uint16_t *p_sockets, uint16_t *p_cores, uint16_t *p_threads,
	    uint16_t *p_block_map_size,
	    uint16_t **p_block_map, uint16_t **p_block_map_inv)
{
	enum { SOCKET=0, CORE=1, PU=2, LAST_OBJ=3 };
	hwloc_topology_t topology;
	hwloc_obj_t obj;
	hwloc_obj_type_t objtype[LAST_OBJ];
	unsigned idx[LAST_OBJ];
	int nobj[LAST_OBJ];
	bitstr_t *used_socket = NULL;
	int *cores_per_socket;
	int actual_cpus;
	int macid;
	int absid;
	int actual_boards = 1, depth, sock_cnt, tot_socks = 0;
	int i, used_core_idx, used_sock_idx;

	debug2("hwloc_topology_init");
	if (hwloc_topology_init(&topology)) {
		/* error in initialize hwloc library */
		debug("hwloc_topology_init() failed.");
		return 1;
	}

	/* parse all system */
	hwloc_topology_set_flags(topology, HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM);

	/* ignores cache, misc */
#if HWLOC_API_VERSION < 0x00020000
	hwloc_topology_ignore_type(topology, HWLOC_OBJ_CACHE);
	hwloc_topology_ignore_type(topology, HWLOC_OBJ_MISC);
#else
	hwloc_topology_set_type_filter(topology, HWLOC_OBJ_L1CACHE,
				       HWLOC_TYPE_FILTER_KEEP_NONE);
	hwloc_topology_set_type_filter(topology, HWLOC_OBJ_L2CACHE,
				       HWLOC_TYPE_FILTER_KEEP_NONE);
	hwloc_topology_set_type_filter(topology, HWLOC_OBJ_L3CACHE,
				       HWLOC_TYPE_FILTER_KEEP_NONE);
	hwloc_topology_set_type_filter(topology, HWLOC_OBJ_L4CACHE,
				       HWLOC_TYPE_FILTER_KEEP_NONE);
	hwloc_topology_set_type_filter(topology, HWLOC_OBJ_L5CACHE,
				       HWLOC_TYPE_FILTER_KEEP_NONE);
	hwloc_topology_set_type_filter(topology, HWLOC_OBJ_MISC,
				       HWLOC_TYPE_FILTER_KEEP_NONE);
#endif

	/* load topology */
	debug2("hwloc_topology_load");
	if (hwloc_topology_load(topology)) {
		/* error in load hardware topology */
		debug("hwloc_topology_load() failed.");
		hwloc_topology_destroy(topology);
		return 2;
	}
#if _DEBUG
	_hwloc_children(topology, hwloc_get_root_obj(topology), 0);
#endif
	/*
	 * Some processors (e.g. AMD Opteron 6000 series) contain multiple
	 * NUMA nodes per socket. This is a configuration which does not map
	 * into the hardware entities that Slurm optimizes resource allocation
	 * for (PU/thread, core, socket, baseboard, node and network switch).
	 * In order to optimize resource allocations on such hardware, Slurm
	 * will consider each NUMA node within the socket as a separate socket.
	 * You can disable this configuring "SchedulerParameters=Ignore_NUMA",
	 * in which case Slurm will report the correct socket count on the node,
	 * but not be able to optimize resource allocations on the NUMA nodes.
	 */
	objtype[SOCKET] = HWLOC_OBJ_SOCKET;
	objtype[CORE]   = HWLOC_OBJ_CORE;
	objtype[PU]     = HWLOC_OBJ_PU;
	if (hwloc_get_type_depth(topology, HWLOC_OBJ_NODE) >
	    hwloc_get_type_depth(topology, HWLOC_OBJ_SOCKET)) {
		char *sched_params = slurm_get_sched_params();
		if (sched_params &&
		    strcasestr(sched_params, "Ignore_NUMA")) {
			info("Ignoring NUMA nodes within a socket");
		} else {
			info("Considering each NUMA node as a socket");
			objtype[SOCKET] = HWLOC_OBJ_NODE;
		}
		xfree(sched_params);
	}

	/* number of objects */
	depth = hwloc_get_type_depth(topology, HWLOC_OBJ_GROUP);
	if (depth != HWLOC_TYPE_DEPTH_UNKNOWN) {
		actual_boards = MAX(hwloc_get_nbobjs_by_depth(topology, depth),
				    1);
	}

	/*
	 * Count sockets/NUMA containing any cores.
	 * KNL NUMA with no cores are NOT counted.
	 */
	nobj[SOCKET] = 0;
	depth = hwloc_get_type_depth(topology, objtype[SOCKET]);
	used_socket = bit_alloc(_MAX_SOCKET_INX);
	cores_per_socket = xmalloc(sizeof(int) * _MAX_SOCKET_INX);
	sock_cnt = hwloc_get_nbobjs_by_depth(topology, depth);
	for (i = 0; i < sock_cnt; i++) {
		obj = hwloc_get_obj_by_depth(topology, depth, i);
		if (obj->type == objtype[SOCKET]) {
			cores_per_socket[i] = _core_child_count(topology, obj);
			if (cores_per_socket[i] > 0) {
				nobj[SOCKET]++;
				bit_set(used_socket, tot_socks);
			}
			if (++tot_socks >= _MAX_SOCKET_INX) {	/* Bitmap size */
				fatal("Socket count exceeds %d, expand data structure size",
				      _MAX_SOCKET_INX);
				break;
			}
		}
	}

	nobj[CORE] = hwloc_get_nbobjs_by_type(topology, objtype[CORE]);

	/*
	 * Workaround for hwloc bug, in some cases the topology "children" array
	 * does not get populated, so _core_child_count() always returns 0
	 */
	if (nobj[SOCKET] == 0) {
		nobj[SOCKET] = hwloc_get_nbobjs_by_type(topology,
							objtype[SOCKET]);
		if (nobj[SOCKET] == 0) {
			debug("get_cpuinfo() fudging nobj[SOCKET] from 0 to 1");
			nobj[SOCKET] = 1;
		}
		if (nobj[SOCKET] >= _MAX_SOCKET_INX) {	/* Bitmap size */
			fatal("Socket count exceeds %d, expand data structure size",
			      _MAX_SOCKET_INX);
		}
		bit_nset(used_socket, 0, nobj[SOCKET] - 1);
	}

	/*
	 * Workaround for hwloc
	 * hwloc_get_nbobjs_by_type() returns 0 on some architectures.
	 */
	if ( nobj[CORE] == 0 ) {
		debug("get_cpuinfo() fudging nobj[CORE] from 0 to 1");
		nobj[CORE] = 1;
	}
	if ( nobj[SOCKET] == -1 )
		fatal("get_cpuinfo() can not handle nobj[SOCKET] = -1");
	if ( nobj[CORE] == -1 )
		fatal("get_cpuinfo() can not handle nobj[CORE] = -1");
	actual_cpus  = hwloc_get_nbobjs_by_type(topology, objtype[PU]);
#if 0
	/* Used to find workaround above */
	info("CORE = %d SOCKET = %d actual_cpus = %d nobj[CORE] = %d",
	     CORE, SOCKET, actual_cpus, nobj[CORE]);
#endif
	if ((actual_cpus % nobj[CORE]) != 0) {
		error("Thread count (%d) not multiple of core count (%d)",
		      actual_cpus, nobj[CORE]);
	}
	nobj[PU] = actual_cpus / nobj[CORE];	/* threads per core */

	if ((nobj[CORE] % nobj[SOCKET]) != 0) {
		error("Core count (%d) not multiple of socket count (%d)",
		      nobj[CORE], nobj[SOCKET]);
	}
	nobj[CORE] /= nobj[SOCKET];		/* cores per socket */

	debug("CPUs:%d Boards:%d Sockets:%d CoresPerSocket:%d ThreadsPerCore:%d",
	      actual_cpus, actual_boards, nobj[SOCKET], nobj[CORE], nobj[PU]);

	/* allocate block_map */
	if (p_block_map_size)
		*p_block_map_size = (uint16_t)actual_cpus;
	if (p_block_map && p_block_map_inv) {
		*p_block_map     = xmalloc(actual_cpus * sizeof(uint16_t));
		*p_block_map_inv = xmalloc(actual_cpus * sizeof(uint16_t));

		/* initialize default as linear mapping */
		for (i = 0; i < actual_cpus; i++) {
			(*p_block_map)[i]     = i;
			(*p_block_map_inv)[i] = i;
		}
		/* create map with hwloc */
		used_sock_idx = -1;
		used_core_idx = -1;
		for (idx[SOCKET] = 0; (used_sock_idx + 1) < nobj[SOCKET];
		     idx[SOCKET]++) {
			if (!bit_test(used_socket, idx[SOCKET]))
				continue;
			used_sock_idx++;
			for (idx[CORE] = 0;
			     idx[CORE] < cores_per_socket[idx[SOCKET]];
			     idx[CORE]++) {
				used_core_idx++;
				for (idx[PU]=0; idx[PU]<nobj[PU]; ++idx[PU]) {
					/* get hwloc_obj by indexes */
					obj=hwloc_get_obj_below_array_by_type(
					            topology, 3, objtype, idx);
					if (!obj)
						continue;
					macid = obj->os_index;
					absid = used_core_idx * nobj[PU] + idx[PU];

					if ((macid >= actual_cpus) ||
					    (absid >= actual_cpus)) {
						/* physical or logical ID are
						 * out of range */
						continue;
					}
					debug4("CPU map[%d]=>%d S:C:T %d:%d:%d", absid, macid,
					       used_sock_idx, idx[CORE], idx[PU]);
					(*p_block_map)[absid]     = macid;
					(*p_block_map_inv)[macid] = absid;
				}
			}
		}
	}
	FREE_NULL_BITMAP(used_socket);
	xfree(cores_per_socket);
	hwloc_topology_destroy(topology);

	/* update output parameters */
	*p_cpus    = actual_cpus;
	*p_boards  = actual_boards;
	*p_sockets = nobj[SOCKET];
	*p_cores   = nobj[CORE];
	*p_threads = nobj[PU];

#if _DEBUG
	/*** Display raw data ***/
	debug("CPUs:%u Boards:%u Sockets:%u CoresPerSocket:%u ThreadsPerCore:%u",
	      *p_cpus, *p_boards, *p_sockets, *p_cores, *p_threads);

	/* Display the mapping tables */
	if (p_block_map && p_block_map_inv) {
		debug("------");
		debug("Abstract -> Machine logical CPU ID block mapping:");
		debug("AbstractId PhysicalId Inverse");
		for (i = 0; i < *p_cpus; i++) {
			debug3("   %4d      %4u       %4u",
				i, (*p_block_map)[i], (*p_block_map_inv)[i]);
		}
		debug("------");
	}
#endif
	return SLURM_SUCCESS;

}