예제 #1
0
static void
hwloc_info_show_obj(hwloc_obj_t obj, const char *type, const char *prefix, int verbose)
{
  char s[128];
  unsigned i;
  if (verbose < 0)
    return;
  printf("%s type = %s\n", prefix, hwloc_obj_type_string(obj->type));
  printf("%s full type = %s\n", prefix, type);
  printf("%s logical index = %u\n", prefix, obj->logical_index);
  if (obj->os_index != (unsigned) -1)
    printf("%s os index = %u\n", prefix, obj->os_index);
  if (obj->name)
    printf("%s name = %s\n", prefix, obj->name);
  if (obj->depth != (unsigned) -1)
    printf("%s depth = %u\n", prefix, obj->depth);
  printf("%s sibling rank = %u\n", prefix, obj->sibling_rank);
  printf("%s children = %u\n", prefix, obj->arity);
  if (obj->memory.local_memory)
    printf("%s local memory = %llu\n", prefix, (unsigned long long) obj->memory.local_memory);
  if (obj->memory.total_memory)
    printf("%s total memory = %llu\n", prefix, (unsigned long long) obj->memory.total_memory);

  if (obj->cpuset) {
    hwloc_bitmap_snprintf(s, sizeof(s), obj->cpuset);
    printf("%s cpuset = %s\n", prefix, s);
  }
  if (obj->complete_cpuset) {
    hwloc_bitmap_snprintf(s, sizeof(s), obj->complete_cpuset);
    printf("%s complete cpuset = %s\n", prefix, s);
  }
  if (obj->online_cpuset) {
    hwloc_bitmap_snprintf(s, sizeof(s), obj->online_cpuset);
    printf("%s online cpuset = %s\n", prefix, s);
  }
  if (obj->allowed_cpuset) {
    hwloc_bitmap_snprintf(s, sizeof(s), obj->allowed_cpuset);
    printf("%s allowed cpuset = %s\n", prefix, s);
  }

  if (obj->nodeset) {
    hwloc_bitmap_snprintf(s, sizeof(s), obj->nodeset);
    printf("%s nodeset = %s\n", prefix, s);
  }
  if (obj->complete_nodeset) {
    hwloc_bitmap_snprintf(s, sizeof(s), obj->complete_nodeset);
    printf("%s complete nodeset = %s\n", prefix, s);
  }
  if (obj->allowed_nodeset) {
    hwloc_bitmap_snprintf(s, sizeof(s), obj->allowed_nodeset);
    printf("%s allowed nodeset = %s\n", prefix, s);
  }

  switch (obj->type) {
  case HWLOC_OBJ_CACHE:
    printf("%s attr cache depth = %u\n", prefix, obj->attr->cache.depth);
    switch (obj->attr->cache.type) {
    case HWLOC_OBJ_CACHE_UNIFIED: printf("%s attr cache type = Unified\n", prefix); break;
    case HWLOC_OBJ_CACHE_DATA: printf("%s attr cache type = Data\n", prefix); break;
    case HWLOC_OBJ_CACHE_INSTRUCTION: printf("%s attr cache type = Instruction\n", prefix); break;
    }
    printf("%s attr cache size = %llu\n", prefix, (unsigned long long) obj->attr->cache.size);
    printf("%s attr cache line size = %u\n", prefix, obj->attr->cache.linesize);
    if (obj->attr->cache.associativity == -1)
      printf("%s attr cache ways = Fully-associative\n", prefix);
    else if (obj->attr->cache.associativity != 0)
      printf("%s attr cache ways = %d\n", prefix, obj->attr->cache.associativity);
    break;
  case HWLOC_OBJ_GROUP:
    printf("%s attr group depth = %u\n", prefix, obj->attr->group.depth);
    break;
  case HWLOC_OBJ_BRIDGE:
    switch (obj->attr->bridge.upstream_type) {
    case HWLOC_OBJ_BRIDGE_HOST:
      printf("%s attr bridge upstream type = Host\n", prefix);
      break;
    case HWLOC_OBJ_BRIDGE_PCI:
      printf("%s attr bridge upstream type = PCI\n", prefix);
      printf("%s attr PCI bus id = %04x:%02x:%02x.%01x\n",
	     prefix, obj->attr->pcidev.domain, obj->attr->pcidev.bus, obj->attr->pcidev.dev, obj->attr->pcidev.func);
      printf("%s attr PCI class = %04x\n",
	     prefix, obj->attr->pcidev.class_id);
      printf("%s attr PCI id = %04x:%04x\n",
	     prefix, obj->attr->pcidev.vendor_id, obj->attr->pcidev.device_id);
      if (obj->attr->pcidev.linkspeed)
	printf("%s attr PCI linkspeed = %f GB/s\n", prefix, obj->attr->pcidev.linkspeed);
      break;
    }
    switch (obj->attr->bridge.downstream_type) {
    case HWLOC_OBJ_BRIDGE_HOST:
      assert(0);
    case HWLOC_OBJ_BRIDGE_PCI:
      printf("%s attr bridge downstream type = PCI\n", prefix);
      printf("%s attr PCI secondary bus = %02x\n",
	     prefix, obj->attr->bridge.downstream.pci.secondary_bus);
      printf("%s attr PCI subordinate bus = %02x\n",
	     prefix, obj->attr->bridge.downstream.pci.subordinate_bus);
      break;
    }
    break;
  case HWLOC_OBJ_PCI_DEVICE:
    printf("%s attr PCI bus id = %04x:%02x:%02x.%01x\n",
	   prefix, obj->attr->pcidev.domain, obj->attr->pcidev.bus, obj->attr->pcidev.dev, obj->attr->pcidev.func);
    printf("%s attr PCI class = %04x\n",
	   prefix, obj->attr->pcidev.class_id);
    printf("%s attr PCI id = %04x:%04x\n",
	   prefix, obj->attr->pcidev.vendor_id, obj->attr->pcidev.device_id);
    if (obj->attr->pcidev.linkspeed)
      printf("%s attr PCI linkspeed = %f GB/s\n", prefix, obj->attr->pcidev.linkspeed);
    break;
  case HWLOC_OBJ_OS_DEVICE:
    printf("%s attr osdev type = %s\n", prefix, type);
    break;
  default:
    /* nothing to show */
    break;
  }
  for(i=0; i<obj->infos_count; i++) {
    printf("%s info %s = %s\n", prefix, obj->infos[i].name, obj->infos[i].value);
  }
}
예제 #2
0
static int _task_cgroup_cpuset_dist_block(
	hwloc_topology_t topology, hwloc_obj_type_t hwtype,
	hwloc_obj_type_t req_hwtype, uint32_t nobj,
	stepd_step_rec_t *job, int bind_verbose, hwloc_bitmap_t cpuset)
{
	hwloc_obj_t obj;
	uint32_t core_loop, ntskip, npdist;
	uint32_t i, j, pfirst, plast;
	uint32_t taskid = job->envtp->localid;
	int hwdepth;
	uint32_t npus, ncores, nsockets;
	int spec_thread_cnt = 0;
	bitstr_t *spec_threads = NULL;
	bool hwloc_success = true;

	uint32_t core_idx;
	bool core_fcyclic, core_block;

	nsockets = (uint32_t) hwloc_get_nbobjs_by_type(topology,
						       HWLOC_OBJ_SOCKET);
	ncores = (uint32_t) hwloc_get_nbobjs_by_type(topology,
						     HWLOC_OBJ_CORE);
	npus = (uint32_t) hwloc_get_nbobjs_by_type(topology,
						   HWLOC_OBJ_PU);

	core_block = (job->task_dist & SLURM_DIST_COREMASK) ==
		SLURM_DIST_COREBLOCK ? true : false;
	core_fcyclic = (job->task_dist & SLURM_DIST_COREMASK) ==
		SLURM_DIST_CORECFULL ? true : false;

	if (bind_verbose) {
		info("task/cgroup: task[%u] using block distribution, "
		     "task_dist 0x%x", taskid, job->task_dist);
	}

	if ((hwloc_compare_types(hwtype, HWLOC_OBJ_PU) == 0) && !core_block) {
		uint32_t *thread_idx = xmalloc(ncores * sizeof(uint32_t));
		ntskip = taskid;
		npdist = job->cpus_per_task;

		i = 0; j = 0;
		core_idx = 0;
		core_loop = 0;
		while (i < ntskip + 1 && core_loop < npdist + 1) {
			while ((core_idx < ncores) && (j < npdist)) {
				obj = hwloc_get_obj_below_by_type(
					topology, HWLOC_OBJ_CORE, core_idx,
					hwtype, thread_idx[core_idx]);
				if ((obj == NULL) && (core_idx == 0) &&
				    (thread_idx[core_idx] == 0))
					hwloc_success = false;
				if (obj != NULL) {
					thread_idx[core_idx]++;
					j++;
					if (i == ntskip)
						_add_hwloc_cpuset(hwtype,
							req_hwtype, obj, taskid,
							bind_verbose, cpuset);
					if ((j < npdist) && core_fcyclic)
						core_idx++;
				} else {
					core_idx++;
				}
			}
			if (j == npdist) {
				i++; j = 0;
				core_idx++; // no validity check, handled by the while
				core_loop = 0;
			} else {
				core_loop++;
				core_idx = 0;
			}
		}
		xfree(thread_idx);

		/* should never happen in normal scenario */
		if ((core_loop > npdist) && !hwloc_success) {
			/* hwloc_get_obj_below_by_type() fails if no CPU set
			 * configured, see hwloc documentation for details */
			error("task/cgroup: hwloc_get_obj_below_by_type() "
			      "failing, task/affinity plugin may be required"
			      "to address bug fixed in HWLOC version 1.11.5");
			return XCGROUP_ERROR;
		} else if (core_loop > npdist) {
			char buf[128] = "";
			hwloc_bitmap_snprintf(buf, sizeof(buf), cpuset);
			error("task/cgroup: task[%u] infinite loop broken while "
			      "trying to provision compute elements using %s (bitmap:%s)",
			      taskid, format_task_dist_states(job->task_dist),
			      buf);
			return XCGROUP_ERROR;
		} else
			return XCGROUP_SUCCESS;
	}

	if (hwloc_compare_types(hwtype, HWLOC_OBJ_CORE) >= 0) {
		/* cores or threads granularity */
		pfirst = taskid * job->cpus_per_task ;
		plast = pfirst + job->cpus_per_task - 1;
	} else {
		/* sockets or ldoms granularity */
		pfirst = taskid;
		plast = pfirst;
	}

	hwdepth = hwloc_get_type_depth(topology, hwtype);
	if ((job->job_core_spec != NO_VAL16) &&
	    (job->job_core_spec &  CORE_SPEC_THREAD)  &&
	    (job->job_core_spec != CORE_SPEC_THREAD)  &&
	    (nsockets != 0)) {
		/* Skip specialized threads as needed */
		int i, t, c, s;
		int cores = MAX(1, (ncores / nsockets));
		int threads = npus / cores;
		spec_thread_cnt = job->job_core_spec & (~CORE_SPEC_THREAD);
		spec_threads = bit_alloc(npus);
		for (t = threads - 1;
		     ((t >= 0) && (spec_thread_cnt > 0)); t--) {
			for (c = cores - 1;
			     ((c >= 0) && (spec_thread_cnt > 0)); c--) {
				for (s = nsockets - 1;
				     ((s >= 0) && (spec_thread_cnt > 0)); s--) {
					i = s * cores + c;
					i = (i * threads) + t;
					bit_set(spec_threads, i);
					spec_thread_cnt--;
				}
			}
		}
		if (hwtype == HWLOC_OBJ_PU) {
			for (i = 0; i <= pfirst && i < npus; i++) {
				if (bit_test(spec_threads, i))
					pfirst++;
			};
		}
	}

	for (i = pfirst; i <= plast && i < nobj ; i++) {
		obj = hwloc_get_obj_by_depth(topology, hwdepth, (int)i);
		_add_hwloc_cpuset(hwtype, req_hwtype, obj, taskid,
			    bind_verbose, cpuset);
	}

	if (spec_threads) {
		for (i = 0; i < npus; i++) {
			if (bit_test(spec_threads, i)) {
				hwloc_bitmap_clr(cpuset, i);
			}
		};
		FREE_NULL_BITMAP(spec_threads);
	}

	return XCGROUP_SUCCESS;
}
예제 #3
0
static void print_hwloc_obj(char **output, char *prefix,
                            hwloc_topology_t topo, hwloc_obj_t obj)
{
    hwloc_obj_t obj2;
    char string[1024], *tmp, *tmp2, *pfx;
    unsigned i;
    struct hwloc_topology_support *support;

    /* print the object type */
    hwloc_obj_type_snprintf(string, 1024, obj, 1);
    asprintf(&pfx, "\n%s\t", (NULL == prefix) ? "" : prefix);
    asprintf(&tmp, "%sType: %s Number of child objects: %u%sName=%s",
             (NULL == prefix) ? "" : prefix, string, obj->arity,
             pfx, (NULL == obj->name) ? "NULL" : obj->name);
    if (0 < hwloc_obj_attr_snprintf(string, 1024, obj, pfx, 1)) {
        /* print the attributes */
        asprintf(&tmp2, "%s%s%s", tmp, pfx, string);
        free(tmp);
        tmp = tmp2;
    }
    /* print the cpusets - apparently, some new HWLOC types don't
     * have cpusets, so protect ourselves here
     */
    if (NULL != obj->cpuset) {
        hwloc_bitmap_snprintf(string, OPAL_HWLOC_MAX_STRING, obj->cpuset);
        asprintf(&tmp2, "%s%sCpuset:  %s", tmp, pfx, string);
        free(tmp);
        tmp = tmp2;
    }
    if (NULL != obj->online_cpuset) {
        hwloc_bitmap_snprintf(string, OPAL_HWLOC_MAX_STRING, obj->online_cpuset);
        asprintf(&tmp2, "%s%sOnline:  %s", tmp, pfx, string);
        free(tmp);
        tmp = tmp2;
    }
    if (NULL != obj->allowed_cpuset) {
        hwloc_bitmap_snprintf(string, OPAL_HWLOC_MAX_STRING, obj->allowed_cpuset);
        asprintf(&tmp2, "%s%sAllowed: %s", tmp, pfx, string);
        free(tmp);
        tmp = tmp2;
    }
    if (HWLOC_OBJ_MACHINE == obj->type) {
        /* root level object - add support values */
        support = (struct hwloc_topology_support*)hwloc_topology_get_support(topo);
        asprintf(&tmp2, "%s%sBind CPU proc:   %s%sBind CPU thread: %s", tmp, pfx,
                 (support->cpubind->set_thisproc_cpubind) ? "TRUE" : "FALSE", pfx,
                 (support->cpubind->set_thisthread_cpubind) ? "TRUE" : "FALSE");
        free(tmp);
        tmp = tmp2;
        asprintf(&tmp2, "%s%sBind MEM proc:   %s%sBind MEM thread: %s", tmp, pfx,
                 (support->membind->set_thisproc_membind) ? "TRUE" : "FALSE", pfx,
                 (support->membind->set_thisthread_membind) ? "TRUE" : "FALSE");
        free(tmp);
        tmp = tmp2;
    }
    asprintf(&tmp2, "%s%s\n", (NULL == *output) ? "" : *output, tmp);
    free(tmp);
    free(pfx);
    asprintf(&pfx, "%s\t", (NULL == prefix) ? "" : prefix);
    for (i=0; i < obj->arity; i++) {
        obj2 = obj->children[i];
        /* print the object */
        print_hwloc_obj(&tmp2, pfx, topo, obj2);
    }
    free(pfx);
    if (NULL != *output) {
        free(*output);
    }
    *output = tmp2;
}
예제 #4
0
static int _task_cgroup_cpuset_dist_cyclic(
	hwloc_topology_t topology, hwloc_obj_type_t hwtype,
	hwloc_obj_type_t req_hwtype, stepd_step_rec_t *job, int bind_verbose,
	hwloc_bitmap_t cpuset)
{
	hwloc_obj_t obj;
	uint32_t  s_ix;		/* socket index */
	uint32_t *c_ixc;	/* core index by socket (current taskid) */
	uint32_t *c_ixn;	/* core index by socket (next taskid) */
	uint32_t *t_ix;		/* thread index by core by socket */
	uint16_t npus = 0, nboards = 0, nthreads = 0, ncores = 0, nsockets = 0;
	uint32_t taskid = job->envtp->localid;
	int spec_thread_cnt = 0;
	bitstr_t *spec_threads = NULL;
	uint32_t obj_idxs[3], cps, tpc, i, j, sock_loop, ntskip, npdist;
	bool core_cyclic, core_fcyclic, sock_fcyclic;
	bool hwloc_success = true;

	/*
	 * We can't trust the slurmd_conf_t *conf here as we need actual
	 * hardware instead of whatever is possibly configured.  So we need to
	 * look it up again.
	 */
	if (get_cpuinfo(&npus, &nboards, &nsockets, &ncores, &nthreads,
			NULL, NULL, NULL) != SLURM_SUCCESS) {
		/*
		 * Fall back to use allocated resources, but this may result
		 * in incorrect layout due to a uneven task distribution
		 * (e.g. 4 cores on socket 0 and 3 cores on socket 1)
		 */
		nsockets = (uint16_t) hwloc_get_nbobjs_by_type(topology,
							HWLOC_OBJ_SOCKET);
		ncores = (uint16_t) hwloc_get_nbobjs_by_type(topology,
							HWLOC_OBJ_CORE);
		nthreads = (uint16_t) hwloc_get_nbobjs_by_type(topology,
							HWLOC_OBJ_PU);
		npus = (uint16_t) hwloc_get_nbobjs_by_type(topology,
							   HWLOC_OBJ_PU);
	} else {
		/* Translate cores-per-socket to total core count, etc. */
		nsockets *= nboards;
		ncores *= nsockets;
		nthreads *= ncores;
	}

	if ((nsockets == 0) || (ncores == 0))
		return XCGROUP_ERROR;
	cps = (ncores + nsockets - 1) / nsockets;
	tpc = (nthreads + ncores - 1) / ncores;

	sock_fcyclic = (job->task_dist & SLURM_DIST_SOCKMASK) ==
		SLURM_DIST_SOCKCFULL ? true : false;
	core_cyclic = (job->task_dist & SLURM_DIST_COREMASK) ==
		SLURM_DIST_CORECYCLIC ? true : false;
	core_fcyclic = (job->task_dist & SLURM_DIST_COREMASK) ==
		SLURM_DIST_CORECFULL ? true : false;

	if (bind_verbose) {
		info("task/cgroup: task[%u] using %s distribution "
		     "(task_dist=0x%x)", taskid,
		     format_task_dist_states(job->task_dist), job->task_dist);
	}

	t_ix = xmalloc(ncores * sizeof(uint32_t));
	c_ixc = xmalloc(nsockets * sizeof(uint32_t));
	c_ixn = xmalloc(nsockets * sizeof(uint32_t));

	if (hwloc_compare_types(hwtype, HWLOC_OBJ_CORE) >= 0) {
		/* cores or threads granularity */
		ntskip = taskid;
		npdist = job->cpus_per_task;
	} else {
		/* sockets or ldoms granularity */
		ntskip = taskid;
		npdist = 1;
	}
	if ((job->job_core_spec != NO_VAL16) &&
	    (job->job_core_spec &  CORE_SPEC_THREAD)  &&
	    (job->job_core_spec != CORE_SPEC_THREAD)) {
		/* Skip specialized threads as needed */
		int i, t, c, s;
		int cores = (ncores + nsockets - 1) / nsockets;
		int threads = (npus + cores - 1) / cores;
		spec_thread_cnt = job->job_core_spec & (~CORE_SPEC_THREAD);
		spec_threads = bit_alloc(npus);
		for (t = threads - 1;
		     ((t >= 0) && (spec_thread_cnt > 0)); t--) {
			for (c = cores - 1;
			     ((c >= 0) && (spec_thread_cnt > 0)); c--) {
				for (s = nsockets - 1;
				     ((s >= 0) && (spec_thread_cnt > 0)); s--) {
					i = s * cores + c;
					i = (i * threads) + t;
					bit_set(spec_threads, i);
					spec_thread_cnt--;
				}
			}
		}
		if (hwtype == HWLOC_OBJ_PU) {
			for (i = 0; i <= ntskip && i < npus; i++) {
				if (bit_test(spec_threads, i))
					ntskip++;
			};
		}
	}

	/* skip objs for lower taskids, then add them to the
	   current task cpuset. To prevent infinite loop, check
	   that we do not loop more than npdist times around the available
	   sockets, which is the worst scenario we should afford here. */
	i = j = s_ix = sock_loop = 0;
	while (i < ntskip + 1 && (sock_loop/tpc) < npdist + 1) {
		/* fill one or multiple sockets using block mode, unless
		   otherwise stated in the job->task_dist field */
		while ((s_ix < nsockets) && (j < npdist)) {
			obj = hwloc_get_obj_below_by_type(
				topology, HWLOC_OBJ_SOCKET, s_ix,
				hwtype, c_ixc[s_ix]);
			if ((obj == NULL) && (s_ix == 0) && (c_ixc[s_ix] == 0))
				hwloc_success = false;	/* Complete failure */
			if ((obj != NULL) &&
			    (hwloc_bitmap_first(obj->allowed_cpuset) != -1)) {
				if (hwloc_compare_types(hwtype, HWLOC_OBJ_PU)
									>= 0) {
					/* granularity is thread */
					obj_idxs[0]=s_ix;
					obj_idxs[1]=c_ixc[s_ix];
					obj_idxs[2]=t_ix[(s_ix*cps)+c_ixc[s_ix]];
					obj = hwloc_get_obj_below_array_by_type(
						topology, 3, obj_types, obj_idxs);
					if ((obj != NULL) &&
					    (hwloc_bitmap_first(
					     obj->allowed_cpuset) != -1)) {
						t_ix[(s_ix*cps)+c_ixc[s_ix]]++;
						j++;
						if (i == ntskip)
							_add_hwloc_cpuset(hwtype,
							req_hwtype, obj, taskid,
							bind_verbose, cpuset);
						if (j < npdist) {
							if (core_cyclic) {
								c_ixn[s_ix] =
								c_ixc[s_ix] + 1;
							} else if (core_fcyclic){
								c_ixc[s_ix]++;
								c_ixn[s_ix] =
								c_ixc[s_ix];
							}
							if (sock_fcyclic)
								s_ix++;
						}
					} else {
						c_ixc[s_ix]++;
						if (c_ixc[s_ix] == cps)
							s_ix++;
					}
				} else {
					/* granularity is core or larger */
					c_ixc[s_ix]++;
					j++;
					if (i == ntskip)
						_add_hwloc_cpuset(hwtype,
							req_hwtype, obj, taskid,
						  	bind_verbose, cpuset);
					if ((j < npdist) && (sock_fcyclic))
						s_ix++;
				}
			} else
				s_ix++;
		}
		/* if it succeeds, switch to the next task, starting
		 * with the next available socket, otherwise, loop back
		 * from the first socket trying to find available slots. */
		if (j == npdist) {
			i++;
			j = 0;
			s_ix++; // no validity check, handled by the while
			sock_loop = 0;
		} else {
			sock_loop++;
			s_ix = 0;
		}
	}
	xfree(t_ix);
	xfree(c_ixc);
	xfree(c_ixn);

	if (spec_threads) {
		for (i = 0; i < npus; i++) {
			if (bit_test(spec_threads, i)) {
				hwloc_bitmap_clr(cpuset, i);
			}
		};
		FREE_NULL_BITMAP(spec_threads);
	}

	/* should never happen in normal scenario */
	if ((sock_loop > npdist) && !hwloc_success) {
		/* hwloc_get_obj_below_by_type() fails if no CPU set
		 * configured, see hwloc documentation for details */
		error("task/cgroup: hwloc_get_obj_below_by_type() failing, "
		      "task/affinity plugin may be required to address bug "
		      "fixed in HWLOC version 1.11.5");
		return XCGROUP_ERROR;
	} else if (sock_loop > npdist) {
		char buf[128] = "";
		hwloc_bitmap_snprintf(buf, sizeof(buf), cpuset);
		error("task/cgroup: task[%u] infinite loop broken while trying "
		      "to provision compute elements using %s (bitmap:%s)",
		      taskid, format_task_dist_states(job->task_dist), buf);
		return XCGROUP_ERROR;
	} else
		return XCGROUP_SUCCESS;
}