예제 #1
0
void output_synthetic(hwloc_topology_t topology, const char *filename, int overwrite, int logical __hwloc_attribute_unused, int legend __hwloc_attribute_unused, int verbose_mode __hwloc_attribute_unused)
{
  FILE *output;
  hwloc_obj_t obj = hwloc_get_root_obj(topology);
  int arity;

  if (!obj->symmetric_subtree) {
    fprintf(stderr, "Cannot output assymetric topology in synthetic format.\n");
    fprintf(stderr, "Adding --no-io may help making the topology symmetric.\n");
    return;
  }

  output = open_output(filename, overwrite);
  if (!output) {
    fprintf(stderr, "Failed to open %s for writing (%s)\n", filename, strerror(errno));
    return;
  }

  arity = obj->arity;
  while (arity) {
    char types[64];
    obj = obj->first_child;
    hwloc_obj_type_snprintf(types, sizeof(types), obj, 1);
    fprintf(output, "%s:%u ", types, arity);
    arity = obj->arity;
  }
  fprintf(output, "\n");

  if (output != stdout)
    fclose(output);
}
예제 #2
0
static void
output_console_obj (hwloc_topology_t topology, hwloc_obj_t l, FILE *output, int logical, int verbose_mode)
{
  char type[32], *attr, phys[32] = "";
  unsigned idx = logical ? l->logical_index : l->os_index;
  const char *indexprefix = logical ? " L#" :  " P#";
  if (lstopo_show_cpuset < 2) {
    int len;
    if (l->type == HWLOC_OBJ_MISC && l->name)
      fprintf(output, "%s", l->name);
    else {
      hwloc_obj_type_snprintf (type, sizeof(type), l, verbose_mode-1);
      fprintf(output, "%s", type);
    }
    if (l->depth != 0 && idx != (unsigned)-1
        && l->type != HWLOC_OBJ_PCI_DEVICE
        && (l->type != HWLOC_OBJ_BRIDGE || l->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_HOST))
      fprintf(output, "%s%u", indexprefix, idx);
    if (logical && l->os_index != (unsigned) -1 &&
	(verbose_mode >= 2 || l->type == HWLOC_OBJ_PU || l->type == HWLOC_OBJ_NODE))
      snprintf(phys, sizeof(phys), "P#%u", l->os_index);
    len = hwloc_obj_attr_snprintf (NULL, 0, l, " ", verbose_mode-1);
    attr = malloc(len+1);
    *attr = '\0';
    hwloc_obj_attr_snprintf (attr, len+1, l, " ", verbose_mode-1);
    if (*phys || *attr) {
      const char *separator = *phys != '\0' && *attr!= '\0' ? " " : "";
      fprintf(output, " (%s%s%s)",
	      phys, separator, attr);
    }
    free(attr);
    if ((l->type == HWLOC_OBJ_OS_DEVICE || verbose_mode >= 2) && l->name && l->type != HWLOC_OBJ_MISC)
      fprintf(output, " \"%s\"", l->name);
  }
  if (!l->cpuset)
    return;
  if (lstopo_show_cpuset == 1)
    fprintf(output, " cpuset=");
  if (lstopo_show_cpuset) {
    char *cpusetstr;
    if (lstopo_show_taskset)
      hwloc_bitmap_taskset_asprintf(&cpusetstr, l->cpuset);
    else
      hwloc_bitmap_asprintf(&cpusetstr, l->cpuset);
    fprintf(output, "%s", cpusetstr);
    free(cpusetstr);
  }

  /* annotate if the PU is forbidden/offline/running */
  if (l->type == HWLOC_OBJ_PU && verbose_mode >= 2) {
    if (lstopo_pu_offline(l))
      printf(" (offline)");
    else if (lstopo_pu_forbidden(l))
      printf(" (forbidden)");
    else if (lstopo_pu_running(topology, l))
      printf(" (running)");
  }
}
예제 #3
0
static int
lstopo_obj_snprintf(struct lstopo_output *loutput, char *text, size_t textlen, hwloc_obj_t obj)
{
  int logical = loutput->logical;
  unsigned idx = logical ? obj->logical_index : obj->os_index;
  const char *indexprefix = logical ? " L#" : " P#";
  char typestr[32];
  char indexstr[32]= "";
  char attrstr[256];
  char totmemstr[64] = "";
  int attrlen;

  /* For OSDev, Misc and Group, name replaces type+index+attrs */
  if (obj->name && (obj->type == HWLOC_OBJ_OS_DEVICE || obj->type == HWLOC_OBJ_MISC || obj->type == HWLOC_OBJ_GROUP)) {
    return snprintf(text, textlen, "%s", obj->name);
  }

  /* subtype replaces the basic type name */
  if (obj->subtype) {
    snprintf(typestr, sizeof(typestr), "%s", obj->subtype);
  } else {
    hwloc_obj_type_snprintf(typestr, sizeof(typestr), obj, 0);
  }

  if (loutput->show_indexes[obj->type]
      && idx != (unsigned)-1 && obj->depth != 0
      && obj->type != HWLOC_OBJ_PCI_DEVICE
      && (obj->type != HWLOC_OBJ_BRIDGE || obj->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_HOST))
    snprintf(indexstr, sizeof(indexstr), "%s%u", indexprefix, idx);

  if (loutput->show_attrs[obj->type]) {
    attrlen = hwloc_obj_attr_snprintf(attrstr, sizeof(attrstr), obj, " ", 0);
    /* display the root total_memory (cannot be local_memory since root cannot be a NUMA node) */
    if (!obj->parent && obj->total_memory)
      snprintf(totmemstr, sizeof(totmemstr), " (%lu%s total)",
	       (unsigned long) hwloc_memory_size_printf_value(obj->total_memory, 0),
	       hwloc_memory_size_printf_unit(obj->total_memory, 0));
  } else
    attrlen = 0;

  if (attrlen > 0)
    return snprintf(text, textlen, "%s%s (%s)%s", typestr, indexstr, attrstr, totmemstr);
  else
    return snprintf(text, textlen, "%s%s%s", typestr, indexstr, totmemstr);
}
예제 #4
0
static void print_children(hwloc_topology_t topology, hwloc_obj_t obj, 
                           int depth)
{
    char type[32], attr[1024];
    unsigned i;

    hwloc_obj_type_snprintf(type, sizeof(type), obj, 0);
    printf("%*s%s", 2*depth, "", type);
    if (obj->os_index != (unsigned) -1)
      printf("#%u", obj->os_index);
    hwloc_obj_attr_snprintf(attr, sizeof(attr), obj, " ", 0);
    if (*attr)
      printf("(%s)", attr);
    printf("\n");
    for (i = 0; i < obj->arity; i++) {
        print_children(topology, obj->children[i], depth + 1);
    }
}
예제 #5
0
파일: ma_hwloc.c 프로젝트: 12182007/cp2k
/*
*Prints the memory hierachy of the machine
*Recursive function that goes throught the machine topology object
*an group them into hierarchical groups 
* topology: the HWLOC object
* obj: the current object of the machine
*/
void print_children_mem(hwloc_topology_t topology, hwloc_obj_t obj, int depth)
{
    char string[128], out_string[128];
    unsigned i;

    if(obj->type == HWLOC_OBJ_MACHINE || obj->type == HWLOC_OBJ_NODE ||
       ( obj->type == HWLOC_OBJ_CACHE &&  obj->arity<=1)){
       hwloc_obj_snprintf(string, sizeof(string), topology, obj, "#", 0);
       sprintf(out_string,"%*s%s\n", depth, "", string);
       strcat(console_output,out_string);    
    }
    else if (obj->type == HWLOC_OBJ_CACHE && obj->arity>1){
       hwloc_obj_type_snprintf(string, sizeof(string), obj, 0);
       sprintf(out_string,"%*s%s", depth, "", string);
       strcat(console_output,out_string);
       sprintf(out_string," (%dMB)\n", obj->attr->cache.size/(1024*1024));
       strcat(console_output,out_string);
    }
    for (i = 0; i < obj->arity; i++) {
        print_children_mem(topology, obj->children[i], depth + 1);
    }
}
예제 #6
0
static int
lstopo_obj_snprintf(char *text, size_t textlen, hwloc_obj_t obj, int logical)
{
  unsigned idx = logical ? obj->logical_index : obj->os_index;
  const char *indexprefix = logical ? " L#" : " P#";
  const char *value;
  char typestr[32];
  char indexstr[32]= "";
  char attrstr[256];
  char totmemstr[64] = "";
  int attrlen;

  /* For OSDev, Misc and Group, name replaces type+index+attrs */
  if (obj->name && (obj->type == HWLOC_OBJ_OS_DEVICE || obj->type == HWLOC_OBJ_MISC || obj->type == HWLOC_OBJ_GROUP)) {
    return snprintf(text, textlen, "%s", obj->name);
  }

  /* Type replaces the basic type name */
  if ((value = hwloc_obj_get_info_by_name(obj, "Type")) != NULL) {
    snprintf(typestr, sizeof(typestr), "%s", value);
  } else {
    hwloc_obj_type_snprintf(typestr, sizeof(typestr), obj, 0);
  }

  if (idx != (unsigned)-1 && obj->depth != 0
      && obj->type != HWLOC_OBJ_PCI_DEVICE
      && (obj->type != HWLOC_OBJ_BRIDGE || obj->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_HOST))
    snprintf(indexstr, sizeof(indexstr), "%s%u", indexprefix, idx);
  attrlen = hwloc_obj_attr_snprintf(attrstr, sizeof(attrstr), obj, " ", 0);
  /* display the root total_memory if different from the local_memory (already shown) */
  if (!obj->parent && obj->memory.total_memory > obj->memory.local_memory)
    snprintf(totmemstr, sizeof(totmemstr), " (%lu%s total)",
             (unsigned long) hwloc_memory_size_printf_value(obj->memory.total_memory, 0),
             hwloc_memory_size_printf_unit(obj->memory.total_memory, 0));
  if (attrlen > 0)
    return snprintf(text, textlen, "%s%s (%s)%s", typestr, indexstr, attrstr, totmemstr);
  else
    return snprintf(text, textlen, "%s%s%s", typestr, indexstr, totmemstr);
}
예제 #7
0
static void print_task(hwloc_topology_t topology,
                       long pid_number, const char *name, hwloc_bitmap_t cpuset,
                       char *pidoutput,
                       int thread)
{
    printf("%s%ld\t", thread ? " " : "", pid_number);

    if (show_cpuset) {
        char *cpuset_str = NULL;
        hwloc_bitmap_asprintf(&cpuset_str, cpuset);
        printf("%s", cpuset_str);
        free(cpuset_str);
    } else {
        hwloc_bitmap_t remaining = hwloc_bitmap_dup(cpuset);
        int first = 1;
        while (!hwloc_bitmap_iszero(remaining)) {
            char type[64];
            unsigned idx;
            hwloc_obj_t obj = hwloc_get_first_largest_obj_inside_cpuset(topology, remaining);
            /* don't show a cache if there's something equivalent and nicer */
            while (hwloc_obj_type_is_cache(obj->type) && obj->arity == 1)
                obj = obj->first_child;
            hwloc_obj_type_snprintf(type, sizeof(type), obj, 1);
            idx = logical ? obj->logical_index : obj->os_index;
            if (idx == (unsigned) -1)
                printf("%s%s", first ? "" : " ", type);
            else
                printf("%s%s:%u", first ? "" : " ", type, idx);
            hwloc_bitmap_andnot(remaining, remaining, obj->cpuset);
            first = 0;
        }
        hwloc_bitmap_free(remaining);
    }

    printf("\t\t%s%s%s\n", name, pidoutput ? "\t" : "", pidoutput ? pidoutput : "");
}
예제 #8
0
static void print_hwloc_obj(char **output, char *prefix,
                            hwloc_topology_t topo, hwloc_obj_t obj)
{
    hwloc_obj_t obj2;
    char string[1024], *tmp, *tmp2, *pfx;
    unsigned i;
    struct hwloc_topology_support *support;

    /* print the object type */
    hwloc_obj_type_snprintf(string, 1024, obj, 1);
    asprintf(&pfx, "\n%s\t", (NULL == prefix) ? "" : prefix);
    asprintf(&tmp, "%sType: %s Number of child objects: %u%sName=%s",
             (NULL == prefix) ? "" : prefix, string, obj->arity,
             pfx, (NULL == obj->name) ? "NULL" : obj->name);
    if (0 < hwloc_obj_attr_snprintf(string, 1024, obj, pfx, 1)) {
        /* print the attributes */
        asprintf(&tmp2, "%s%s%s", tmp, pfx, string);
        free(tmp);
        tmp = tmp2;
    }
    /* print the cpusets - apparently, some new HWLOC types don't
     * have cpusets, so protect ourselves here
     */
    if (NULL != obj->cpuset) {
        hwloc_bitmap_snprintf(string, OPAL_HWLOC_MAX_STRING, obj->cpuset);
        asprintf(&tmp2, "%s%sCpuset:  %s", tmp, pfx, string);
        free(tmp);
        tmp = tmp2;
    }
    if (NULL != obj->online_cpuset) {
        hwloc_bitmap_snprintf(string, OPAL_HWLOC_MAX_STRING, obj->online_cpuset);
        asprintf(&tmp2, "%s%sOnline:  %s", tmp, pfx, string);
        free(tmp);
        tmp = tmp2;
    }
    if (NULL != obj->allowed_cpuset) {
        hwloc_bitmap_snprintf(string, OPAL_HWLOC_MAX_STRING, obj->allowed_cpuset);
        asprintf(&tmp2, "%s%sAllowed: %s", tmp, pfx, string);
        free(tmp);
        tmp = tmp2;
    }
    if (HWLOC_OBJ_MACHINE == obj->type) {
        /* root level object - add support values */
        support = (struct hwloc_topology_support*)hwloc_topology_get_support(topo);
        asprintf(&tmp2, "%s%sBind CPU proc:   %s%sBind CPU thread: %s", tmp, pfx,
                 (support->cpubind->set_thisproc_cpubind) ? "TRUE" : "FALSE", pfx,
                 (support->cpubind->set_thisthread_cpubind) ? "TRUE" : "FALSE");
        free(tmp);
        tmp = tmp2;
        asprintf(&tmp2, "%s%sBind MEM proc:   %s%sBind MEM thread: %s", tmp, pfx,
                 (support->membind->set_thisproc_membind) ? "TRUE" : "FALSE", pfx,
                 (support->membind->set_thisthread_membind) ? "TRUE" : "FALSE");
        free(tmp);
        tmp = tmp2;
    }
    asprintf(&tmp2, "%s%s\n", (NULL == *output) ? "" : *output, tmp);
    free(tmp);
    free(pfx);
    asprintf(&pfx, "%s\t", (NULL == prefix) ? "" : prefix);
    for (i=0; i < obj->arity; i++) {
        obj2 = obj->children[i];
        /* print the object */
        print_hwloc_obj(&tmp2, pfx, topo, obj2);
    }
    free(pfx);
    if (NULL != *output) {
        free(*output);
    }
    *output = tmp2;
}
예제 #9
0
static void
output_console_obj (hwloc_topology_t topology, hwloc_obj_t l, FILE *output, int logical, int verbose_mode, int collapsed)
{
  unsigned idx = logical ? l->logical_index : l->os_index;
  const char *value;
  char pidxstr[16] = "P#[collapsed]";
  char lidxstr[16] = "L#[collapsed]";

  if (!collapsed || l->type != HWLOC_OBJ_PCI_DEVICE) {
    snprintf(pidxstr, sizeof(pidxstr), "P#%u", l->os_index);
    snprintf(lidxstr, sizeof(lidxstr), "L#%u", l->logical_index);
  }

  if (lstopo_show_cpuset < 2) {
    char type[64], *attr, phys[32] = "";
    int len;
    value = hwloc_obj_get_info_by_name(l, "Type");
    hwloc_obj_type_snprintf (type, sizeof(type), l, verbose_mode-1);
    if (value)
      fprintf(output, "%s(%s)", type, value);
    else
      fprintf(output, "%s", type);
    if (l->depth != 0 && idx != (unsigned)-1
        && l->type != HWLOC_OBJ_MISC
        && l->type != HWLOC_OBJ_PCI_DEVICE
        && (l->type != HWLOC_OBJ_BRIDGE || l->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_HOST))
      fprintf(output, " %s", logical ? lidxstr : pidxstr);
    if (l->name && (l->type == HWLOC_OBJ_MISC || l->type == HWLOC_OBJ_GROUP))
      fprintf(output, " %s", l->name);
    if (logical && l->os_index != (unsigned) -1 &&
	(verbose_mode >= 2 || l->type == HWLOC_OBJ_PU || l->type == HWLOC_OBJ_NUMANODE))
      snprintf(phys, sizeof(phys), "%s", pidxstr);
    /* display attributes */
    len = hwloc_obj_attr_snprintf (NULL, 0, l, " ", verbose_mode-1);
    attr = malloc(len+1);
    *attr = '\0';
    hwloc_obj_attr_snprintf (attr, len+1, l, " ", verbose_mode-1);
    if (*phys || *attr) {
      const char *separator = *phys != '\0' && *attr!= '\0' ? " " : "";
      fprintf(output, " (%s%s%s)",
	      phys, separator, attr);
    }
    free(attr);
    /* display the root total_memory if not verbose (already shown)
     * and different from the local_memory (already shown) */
    if (verbose_mode == 1 && !l->parent && l->memory.total_memory > l->memory.local_memory)
      fprintf(output, " (%lu%s total)",
	      (unsigned long) hwloc_memory_size_printf_value(l->memory.total_memory, 0),
	      hwloc_memory_size_printf_unit(l->memory.total_memory, 0));
    /* append the name */
    if (l->name && (l->type == HWLOC_OBJ_OS_DEVICE || verbose_mode >= 2)
	&& l->type != HWLOC_OBJ_MISC && l->type != HWLOC_OBJ_GROUP)
      fprintf(output, " \"%s\"", l->name);
  }
  if (!l->cpuset)
    return;
  if (lstopo_show_cpuset == 1)
    fprintf(output, " cpuset=");
  if (lstopo_show_cpuset) {
    char *cpusetstr;
    if (lstopo_show_taskset)
      hwloc_bitmap_taskset_asprintf(&cpusetstr, l->cpuset);
    else
      hwloc_bitmap_asprintf(&cpusetstr, l->cpuset);
    fprintf(output, "%s", cpusetstr);
    free(cpusetstr);
  }

  /* annotate if the PU is forbidden/running */
  if (l->type == HWLOC_OBJ_PU && verbose_mode >= 2) {
    if (lstopo_pu_forbidden(l))
      fprintf(output, " (forbidden)");
    else if (lstopo_pu_running(topology, l))
      fprintf(output, " (running)");
  }
}
예제 #10
0
/* recursively climb the topology, pruning procs beyond that allowed
 * by the given ppr
 */
static void prune(orte_jobid_t jobid,
                  orte_app_idx_t app_idx,
                  orte_node_t *node,
                  opal_hwloc_level_t *level,
                  orte_vpid_t *nmapped)
{
    hwloc_obj_t obj, top;
    unsigned int i, nobjs;
    hwloc_obj_type_t lvl;
    unsigned cache_level = 0, k;
    int nprocs;
    hwloc_cpuset_t avail, cpus, childcpus;
    int n, limit, nmax, nunder, idx, idxmax = 0;
    orte_proc_t *proc, *pptr, *procmax;
    opal_hwloc_level_t ll;
    char dang[64];
    hwloc_obj_t locale;

    opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
                        "mca:rmaps:ppr: pruning level %d",
                        *level);

    /* convenience */
    ll = *level;

    /* convenience */
    lvl = opal_hwloc_levels[ll];
    limit = ppr[ll];

    if (0 == limit) {
        /* no limit at this level, so move up if necessary */
        if (0 == ll) {
            /* done */
            return;
        }
        --(*level);
        prune(jobid, app_idx, node, level, nmapped);
        return;
    }

    /* handle the darn cache thing again */
    if (OPAL_HWLOC_L3CACHE_LEVEL == ll) {
        cache_level = 3;
    } else if (OPAL_HWLOC_L2CACHE_LEVEL == ll) {
        cache_level = 2;
    } else if (OPAL_HWLOC_L1CACHE_LEVEL == ll) {
        cache_level = 1;
    }

    /* get the number of resources at this level on this node */
    nobjs = opal_hwloc_base_get_nbobjs_by_type(node->topology,
                                               lvl, cache_level,
                                               OPAL_HWLOC_AVAILABLE);

    /* for each resource, compute the number of procs sitting
     * underneath it and check against the limit
     */
    for (i=0; i < nobjs; i++) {
        obj = opal_hwloc_base_get_obj_by_type(node->topology,
                                              lvl, cache_level,
                                              i, OPAL_HWLOC_AVAILABLE);
        /* get the available cpuset */
        avail = opal_hwloc_base_get_available_cpus(node->topology, obj);

        /* look at the intersection of this object's cpuset and that
         * of each proc in the job/app - if they intersect, then count this proc
         * against the limit
         */
        nprocs = 0;
        for (n=0; n < node->procs->size; n++) {
            if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, n))) {
                continue;
            }
            if (proc->name.jobid != jobid ||
                proc->app_idx != app_idx) {
                continue;
            }
            locale = NULL;
            if (orte_get_attribute(&proc->attributes, ORTE_PROC_HWLOC_LOCALE, (void**)&locale, OPAL_PTR)) {
                ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
                return;
            }
            cpus = opal_hwloc_base_get_available_cpus(node->topology, locale);
            if (hwloc_bitmap_intersects(avail, cpus)) {
                nprocs++;
            }
        }
        opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
                            "mca:rmaps:ppr: found %d procs limit %d",
                            nprocs, limit);

        /* check against the limit */
        while (limit < nprocs) {
            /* need to remove procs - do this in a semi-intelligent
             * manner to provide a little load balancing by cycling
             * across the objects beneath this one, removing procs
             * in a round-robin fashion until the limit is satisfied
             *
             * NOTE: I'm sure someone more knowledgeable with hwloc
             * will come up with a more efficient way to do this, so
             * consider this is a starting point
             */

            /* find the first level that has more than
             * one child beneath it - if all levels
             * have only one child, then return this
             * object
             */
            top = find_split(node->topology, obj);
            hwloc_obj_type_snprintf(dang, 64, top, 1);
            opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
                                "mca:rmaps:ppr: SPLIT AT LEVEL %s", dang);

            /* cycle across the children of this object */
            nmax = 0;
            procmax = NULL;
            idx = 0;
            /* find the child with the most procs underneath it */
            for (k=0; k < top->arity && limit < nprocs; k++) {
                /* get this object's available cpuset */
                childcpus = opal_hwloc_base_get_available_cpus(node->topology, top->children[k]);
                nunder = 0;
                pptr = NULL;
                for (n=0; n < node->procs->size; n++) {
                    if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, n))) {
                        continue;
                    }
                    if (proc->name.jobid != jobid ||
                        proc->app_idx != app_idx) {
                        continue;
                    }
                    locale = NULL;
                    if (orte_get_attribute(&proc->attributes, ORTE_PROC_HWLOC_LOCALE, (void**)&locale, OPAL_PTR)) {
                        ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
                        return;
                    }
                    cpus = opal_hwloc_base_get_available_cpus(node->topology, locale);
                    if (hwloc_bitmap_intersects(childcpus, cpus)) {
                        nunder++;
                        if (NULL == pptr) {
                            /* save the location of the first proc under this object */
                            pptr = proc;
                            idx = n;
                        }
                    }
                }
                if (nmax < nunder) {
                    opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
                                        "mca:rmaps:ppr: PROCS UNDER CHILD %d %d MAX %d",
                                        k, nunder, nmax);
                    nmax = nunder;
                    procmax = pptr;
                    idxmax = idx;
                }
            }
            if (NULL == procmax) {
                /* can't find anything to remove - error out */
                goto error;
            }
            /* remove it */
            opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
                                "mca:rmaps:ppr: removing proc at posn %d",
                                idxmax);
            opal_pointer_array_set_item(node->procs, idxmax, NULL);
            node->num_procs--;
            node->slots_inuse--;
            if (node->slots_inuse < 0) {
                node->slots_inuse = 0;
            }
            nprocs--;
            *nmapped -= 1;
            OBJ_RELEASE(procmax);
        }
    }
    /* finished with this level - move up if necessary */
    if (0 == ll) {
        return;
    }
    --(*level);
    prune(jobid, app_idx, node, level, nmapped);
    return;

 error:
    opal_output(0, "INFINITE LOOP");
}
예제 #11
0
파일: gpu.c 프로젝트: DawidvC/chapel
int main(void)
{
    hwloc_topology_t topology;
    hwloc_obj_t obj;
    unsigned n, i;
    int devid, platformid;
    const char *dev;

    /* Allocate, initialize and load topology object. */
    hwloc_topology_init(&topology);
    hwloc_topology_set_flags(topology, HWLOC_TOPOLOGY_FLAG_IO_DEVICES);
    hwloc_topology_load(topology);

    /* Find CUDA devices through the corresponding OS devices */
    n = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_OS_DEVICE);

    for (i = 0; i < n ; i++) {
      const char *s;
      obj = hwloc_get_obj_by_type(topology, HWLOC_OBJ_OS_DEVICE, i);
      printf("%s:\n", obj->name);

      s = hwloc_obj_get_info_by_name(obj, "Backend");

      if (s && !strcmp(s, "CUDA")) {
        /* This is a CUDA device */
        assert(!strncmp(obj->name, "cuda", 4));
        devid = atoi(obj->name + 4);
        printf("CUDA device %d\n", devid);

        s = hwloc_obj_get_info_by_name(obj, "GPUModel");
        if (s)
          printf("Model: %s\n", s);

        s = hwloc_obj_get_info_by_name(obj, "CUDAGlobalMemorySize");
        if (s)
          printf("Memory: %s\n", s);

        s = hwloc_obj_get_info_by_name(obj, "CUDAMultiProcessors");
        if (s)
        {
          int mp = atoi(s);
          s = hwloc_obj_get_info_by_name(obj, "CUDACoresPerMP");
          if (s) {
            int mp_cores = atoi(s);
            printf("Cores: %d\n", mp * mp_cores);
          }
        }
      }

      if (s && !strcmp(s, "OpenCL")) {
        /* This is an OpenCL device */
        assert(!strncmp(obj->name, "opencl", 6));
        platformid = atoi(obj->name + 6);
        printf("OpenCL platform %d\n", platformid);
        dev = strchr(obj->name + 6, 'd');
        devid = atoi(dev + 1);
        printf("OpenCL device %d\n", devid);

        s = hwloc_obj_get_info_by_name(obj, "GPUModel");
        if (s)
          printf("Model: %s\n", s);

        s = hwloc_obj_get_info_by_name(obj, "OpenCLGlobalMemorySize");
        if (s)
          printf("Memory: %s\n", s);
      }

      /* One can also use helpers from hwloc/cuda.h, hwloc/cudart.h,
       * hwloc/opencl.h */


      /* Find out cpuset this is connected to */
      while (obj && (!obj->cpuset || hwloc_bitmap_iszero(obj->cpuset)))
        obj = obj->parent;

      if (obj) {
        char *cpuset_string;
        char name[16];
        hwloc_obj_type_snprintf(name, sizeof(name), obj, 0);
        hwloc_bitmap_asprintf(&cpuset_string, obj->cpuset);
        printf("Location: %s P#%d\n", name, obj->os_index);
        printf("Cpuset: %s\n", cpuset_string);
      }
      printf("\n");
    }

    /* Destroy topology object. */
    hwloc_topology_destroy(topology);

    return 0;
}
예제 #12
0
파일: ma_hwloc.c 프로젝트: 12182007/cp2k
/*
*Prints one branch of the machine topology
*Recursive function that goes throught the machine topology object
*an group them into hierarchical groups
* topology: the HWLOC object that represents the machine
* obj: the current object of a level
*/
void print_machine_branch(hwloc_topology_t topology, hwloc_obj_t obj, int depth, int obj_type)
{
    char string[128], out_string[128];
    unsigned i,arity;
    int *devIds,devId,countDev;

    if (obj->type != HWLOC_OBJ_MACHINE ){
     if(obj->type == HWLOC_OBJ_NODE){
       hwloc_obj_snprintf(string, sizeof(string), topology, obj, "#", 0);
       sprintf(out_string,"%*s%s\n", depth, "", string); 
       strcat(console_output,out_string);      
#if defined  (__DBCSR_ACC) || defined (__PW_CUDA)
       if ((local_topo->ngpus > 0) && (local_topo->ngpus < local_topo->ncores)){
                ma_get_nDevcu(obj->logical_index, &countDev);
                devIds = malloc (countDev*sizeof(int));
                ma_get_cu(obj->logical_index, devIds);
                strcat(console_output," Shared GPUS: ");
                for (i = 0; i<countDev; i++){
                 devId = devIds[i];
                 sprintf(out_string,"#%d ", devId);
                 strcat(console_output,out_string);}
                strcat(console_output,"\n");
       }
#endif
     } 
     else if (obj->type == HWLOC_OBJ_SOCKET){ 
       hwloc_obj_snprintf(string, sizeof(string), topology, obj, "#", 0);
       sprintf(out_string,"%*s%s\n", depth, "", string); 
       strcat(console_output,out_string);}
     else {
       hwloc_obj_snprintf(string, sizeof(string), topology, obj, "#", 0);
      if(obj->type == HWLOC_OBJ_PU)  {
#if defined  (__DBCSR_ACC) || defined (__PW_CUDA)
       sprintf(out_string,"%*s%s\t", depth, "", string);
       strcat(console_output,out_string);
       if (local_topo->ngpus > 0 && local_topo->ngpus == local_topo->ncores){
                ma_get_core_cu(obj->logical_index, &devId);
                strcat(console_output," GPU: ");
                sprintf(out_string,"%d ", devId);
                strcat(console_output,out_string);}
      strcat(console_output,"\n");
#else
       sprintf(out_string,"%*s%s\n", depth, "", string);
       strcat(console_output,out_string);
#endif      
      }
      else if (obj->type == HWLOC_OBJ_CACHE && obj->arity>1){
       hwloc_obj_type_snprintf(string, sizeof(string), obj, 0);
       sprintf(out_string,"%*s%s", depth, "", string);
       strcat(console_output,out_string);
       sprintf(out_string," (%dMB)\n", obj->attr->cache.size/(1024*1024));
       strcat(console_output,out_string);
      }
      else {
       sprintf(out_string,"%*s%s\t", depth, "", string); 
       strcat(console_output,out_string);
      }
     }                 
    }  
    if (obj->type != HWLOC_OBJ_PU) {//it is not a PU
      if((obj->first_child && obj->first_child->type == HWLOC_OBJ_PU) ||
          obj->first_child->type == obj_type)
       arity = 1; //number of children
      else
       arity = obj->arity;

    for (i = 0; i < arity; i++) 
        print_machine_branch(topology, obj->children[i],depth+1,obj_type);
   }
}
예제 #13
0
파일: ma_hwloc.c 프로젝트: 12182007/cp2k
/*
*Prints the machine hierarchy 
*Recursive function that goes throught the machine topology object
*an group them into hierarchical groups
* topology: the HWLOC object
* obj: the current object in the topology
* depth: the horizontal level in the machine topology 
*/
void print_machine(hwloc_topology_t topo, hwloc_obj_t obj, int depth)
{
    char string[256], out_string[256];
    unsigned i,arity;
    int *devIds,devId,countDev;

    if(obj->type == HWLOC_OBJ_SOCKET || obj->type == HWLOC_OBJ_MACHINE ){ 
       hwloc_obj_snprintf(string, sizeof(string), topology, obj, "#", 0);
       sprintf(out_string,"%*s%s\n", depth, "", string);
       strcat(console_output,out_string);
     }
     else if (obj->type == HWLOC_OBJ_NODE){
      hwloc_obj_snprintf(string, sizeof(string), topology, obj, "#", 0);
      sprintf(out_string,"%*s%s\n", depth, "", string);
      strcat(console_output,out_string); 
//if the machine has shared GPUs
#if defined  (__DBCSR_ACC) || defined (__PW_CUDA)
       if ((local_topo->ngpus > 0) && (local_topo->ngpus < local_topo->ncores)){
                ma_get_nDevcu(obj->logical_index, &countDev);
                devIds = malloc (countDev*sizeof(int));
                ma_get_cu(obj->logical_index, devIds);
		strcat(console_output," Shared GPUS: ");
		for (i = 0; i<countDev; i++){
		 devId = devIds[i];
                 sprintf(out_string,"#%d ", devId); 
       		 strcat(console_output,out_string);}
       		strcat(console_output,"\n");
       }
#endif
     }
     else {
       hwloc_obj_snprintf(string, sizeof(string), topology, obj, "#", 0);
      if(obj->type == HWLOC_OBJ_PU )
      {
#if defined  (__DBCSR_ACC) || defined (__PW_CUDA)
       sprintf(out_string,"%*s%s\t", depth, "", string);
       strcat(console_output,out_string);
       if (local_topo->ngpus > 0 && local_topo->ngpus == local_topo->ncores){
                ma_get_core_cu(obj->logical_index, &devId);
                strcat(console_output," GPU: ");
                sprintf(out_string,"%d ", devId);
                strcat(console_output,out_string);}
      strcat(console_output,"\n");
#else
       sprintf(out_string,"%*s%s\n", depth, "", string);
       strcat(console_output,out_string);	
#endif
      }
      else if (obj->type == HWLOC_OBJ_CACHE && obj->arity>1 ){
           hwloc_obj_type_snprintf(string, sizeof(string), obj, 0);
           sprintf(out_string,"%*s%s", depth, "", string);
           strcat(console_output,out_string);
           sprintf(out_string," (%dMB)\n", obj->attr->cache.size/(1024*1024));
           strcat(console_output,out_string);       
         }
      else if (obj->type == HWLOC_OBJ_OS_DEVICE ||
               obj->type == HWLOC_OBJ_PCI_DEVICE ||
               obj->type == HWLOC_OBJ_BRIDGE){
               if(obj->attr->osdev.type == HWLOC_OBJ_OSDEV_NETWORK ){
                sprintf(out_string,"%*s%s\n", depth, "--", "Network Card");
                  strcat(console_output,out_string);}
       }
      else if (obj->type == HWLOC_OBJ_CORE)
       {
          char number[33];
          strcpy(string,"Core#");
          sprintf(number,"%d",obj->logical_index);
          strcat(string,number);
          sprintf(out_string,"%*s%s\t", depth, "", string);
          strcat(console_output,out_string);
       }
      else {
           sprintf(out_string,"%*s%s\t", depth, "", string);
           strcat(console_output,out_string);
        }
     }  
    if (obj->type != HWLOC_OBJ_PU) {//it is not a PU
      if((obj->first_child && obj->first_child->type == HWLOC_OBJ_PU))
       arity = 1; //number of children
      else
       arity = obj->arity;

    for (i = 0; i < arity; i++) 
        print_machine(topo, obj->children[i],depth+1);
   }
}
예제 #14
0
int main(void)
{
    int depth;
    unsigned i, n;
    unsigned long size;
    int levels;
    char string[128];
    int topodepth;
    void *m;
    hwloc_topology_t topology;
    hwloc_cpuset_t cpuset;
    hwloc_obj_t obj;

    /* Allocate and initialize topology object. */
    hwloc_topology_init(&topology);

    /* ... Optionally, put detection configuration here to ignore
       some objects types, define a synthetic topology, etc....  

       The default is to detect all the objects of the machine that
       the caller is allowed to access.  See Configure Topology
       Detection. */

    /* Perform the topology detection. */
    hwloc_topology_load(topology);

    /* Optionally, get some additional topology information
       in case we need the topology depth later. */
    topodepth = hwloc_topology_get_depth(topology);

    /*****************************************************************
     * First example:
     * Walk the topology with an array style, from level 0 (always
     * the system level) to the lowest level (always the proc level).
     *****************************************************************/
    for (depth = 0; depth < topodepth; depth++) {
        printf("*** Objects at level %d\n", depth);
        for (i = 0; i < hwloc_get_nbobjs_by_depth(topology, depth); 
             i++) {
            hwloc_obj_type_snprintf(string, sizeof(string),
				    hwloc_get_obj_by_depth(topology, depth, i), 0);
            printf("Index %u: %s\n", i, string);
        }
    }

    /*****************************************************************
     * Second example:
     * Walk the topology with a tree style.
     *****************************************************************/
    printf("*** Printing overall tree\n");
    print_children(topology, hwloc_get_root_obj(topology), 0);

    /*****************************************************************
     * Third example:
     * Print the number of packages.
     *****************************************************************/
    depth = hwloc_get_type_depth(topology, HWLOC_OBJ_PACKAGE);
    if (depth == HWLOC_TYPE_DEPTH_UNKNOWN) {
        printf("*** The number of packages is unknown\n");
    } else {
        printf("*** %u package(s)\n",
               hwloc_get_nbobjs_by_depth(topology, depth));
    }

    /*****************************************************************
     * Fourth example:
     * Compute the amount of cache that the first logical processor
     * has above it.
     *****************************************************************/
    levels = 0;
    size = 0;
    for (obj = hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 0);
         obj;
         obj = obj->parent)
      if (obj->type == HWLOC_OBJ_CACHE) {
        levels++;
        size += obj->attr->cache.size;
      }
    printf("*** Logical processor 0 has %d caches totaling %luKB\n", 
           levels, size / 1024);

    /*****************************************************************
     * Fifth example:
     * Bind to only one thread of the last core of the machine.
     *
     * First find out where cores are, or else smaller sets of CPUs if
     * the OS doesn't have the notion of a "core".
     *****************************************************************/
    depth = hwloc_get_type_or_below_depth(topology, HWLOC_OBJ_CORE);

    /* Get last core. */
    obj = hwloc_get_obj_by_depth(topology, depth,
                   hwloc_get_nbobjs_by_depth(topology, depth) - 1);
    if (obj) {
        /* Get a copy of its cpuset that we may modify. */
        cpuset = hwloc_bitmap_dup(obj->cpuset);

        /* Get only one logical processor (in case the core is
           SMT/hyper-threaded). */
        hwloc_bitmap_singlify(cpuset);

        /* And try to bind ourself there. */
        if (hwloc_set_cpubind(topology, cpuset, 0)) {
            char *str;
            int error = errno;
            hwloc_bitmap_asprintf(&str, obj->cpuset);
            printf("Couldn't bind to cpuset %s: %s\n", str, strerror(error));
            free(str);
        }

        /* Free our cpuset copy */
        hwloc_bitmap_free(cpuset);
    }

    /*****************************************************************
     * Sixth example:
     * Allocate some memory on the last NUMA node, bind some existing
     * memory to the last NUMA node.
     *****************************************************************/
    /* Get last node. There's always at least one. */
    n = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_NUMANODE);
    obj = hwloc_get_obj_by_type(topology, HWLOC_OBJ_NUMANODE, n - 1);

    size = 1024*1024;
    m = hwloc_alloc_membind_nodeset(topology, size, obj->nodeset,
                                    HWLOC_MEMBIND_BIND, 0);
    hwloc_free(topology, m, size);

    m = malloc(size);
    hwloc_set_area_membind_nodeset(topology, m, size, obj->nodeset,
                                   HWLOC_MEMBIND_BIND, 0);
    free(m);

    /* Destroy topology object. */
    hwloc_topology_destroy(topology);

    return 0;
}
예제 #15
0
int main(void)
{
  hwloc_topology_t topology;
  hwloc_bitmap_t set, set2;
  hwloc_const_bitmap_t cset_available, cset_all;
  hwloc_obj_t obj;
  char *buffer;
  char type[64];
  unsigned i;
  int err;

  /* create a topology */
  err = hwloc_topology_init(&topology);
  if (err < 0) {
    fprintf(stderr, "failed to initialize the topology\n");
    return EXIT_FAILURE;
  }
  err = hwloc_topology_load(topology);
  if (err < 0) {
    fprintf(stderr, "failed to load the topology\n");
    hwloc_topology_destroy(topology);
    return EXIT_FAILURE;
  }

  /* retrieve the entire set of available PUs */
  cset_available = hwloc_topology_get_topology_cpuset(topology);

  /* retrieve the CPU binding of the current entire process */
  set = hwloc_bitmap_alloc();
  if (!set) {
    fprintf(stderr, "failed to allocate a bitmap\n");
    hwloc_topology_destroy(topology);
    return EXIT_FAILURE;
  }
  err = hwloc_get_cpubind(topology, set, HWLOC_CPUBIND_PROCESS);
  if (err < 0) {
    fprintf(stderr, "failed to get cpu binding\n");
    hwloc_bitmap_free(set);
    hwloc_topology_destroy(topology);
  }

  /* display the processing units that cannot be used by this process */
  if (hwloc_bitmap_isequal(set, cset_available)) {
    printf("this process can use all available processing units in the system\n");
  } else {
    /* compute the set where we currently cannot run.
     * we can't modify cset_available because it's a system read-only one,
     * so we do   set = available &~ set
     */
    hwloc_bitmap_andnot(set, cset_available, set);
    hwloc_bitmap_asprintf(&buffer, set);
    printf("process cannot use %d process units (%s) among %u in the system\n",
	   hwloc_bitmap_weight(set), buffer, hwloc_bitmap_weight(cset_available));
    free(buffer);
    /* restore set where it was before the &~ operation above */
    hwloc_bitmap_andnot(set, cset_available, set);
  }
  /* print the smallest object covering the current process binding */
  obj = hwloc_get_obj_covering_cpuset(topology, set);
  hwloc_obj_type_snprintf(type, sizeof(type), obj, 0);
  printf("process is bound within object %s logical index %u\n", type, obj->logical_index);

  /* retrieve the single PU where the current thread actually runs within this process binding */
  set2 = hwloc_bitmap_alloc();
  if (!set2) {
    fprintf(stderr, "failed to allocate a bitmap\n");
    hwloc_bitmap_free(set);
    hwloc_topology_destroy(topology);
    return EXIT_FAILURE;
  }
  err = hwloc_get_last_cpu_location(topology, set2, HWLOC_CPUBIND_THREAD);
  if (err < 0) {
    fprintf(stderr, "failed to get last cpu location\n");
    hwloc_bitmap_free(set);
    hwloc_bitmap_free(set2);
    hwloc_topology_destroy(topology);
  }
  /* sanity checks that are not actually needed but help the reader */
  /* this thread runs within the process binding */
  assert(hwloc_bitmap_isincluded(set2, set));
  /* this thread runs on a single PU at a time */
  assert(hwloc_bitmap_weight(set2) == 1);

  /* print the logical number of the PU where that thread runs */
  /* extract the PU OS index from the bitmap */
  i = hwloc_bitmap_first(set2);
  obj = hwloc_get_pu_obj_by_os_index(topology, i);
  printf("thread is now running on PU logical index %u (OS/physical index %u)\n",
	 obj->logical_index, i);

  /* migrate this single thread to where other PUs within the current binding */
  hwloc_bitmap_andnot(set2, set, set2);
  err = hwloc_set_cpubind(topology, set2, HWLOC_CPUBIND_THREAD);
  if (err < 0) {
    fprintf(stderr, "failed to set thread binding\n");
    hwloc_bitmap_free(set);
    hwloc_bitmap_free(set2);
    hwloc_topology_destroy(topology);
  }
  /* reprint the PU where that thread runs */
  err = hwloc_get_last_cpu_location(topology, set2, HWLOC_CPUBIND_THREAD);
  if (err < 0) {
    fprintf(stderr, "failed to get last cpu location\n");
    hwloc_bitmap_free(set);
    hwloc_bitmap_free(set2);
    hwloc_topology_destroy(topology);
  }
  /* print the logical number of the PU where that thread runs */
  /* extract the PU OS index from the bitmap */
  i = hwloc_bitmap_first(set2);
  obj = hwloc_get_pu_obj_by_os_index(topology, i);
  printf("thread is running on PU logical index %u (OS/physical index %u)\n",
	 obj->logical_index, i);

  hwloc_bitmap_free(set);
  hwloc_bitmap_free(set2);

  /* retrieve the entire set of all PUs */
  cset_all = hwloc_topology_get_complete_cpuset(topology);
  if (hwloc_bitmap_isequal(cset_all, cset_available)) {
    printf("all hardware PUs are available\n");
  } else {
    printf("only %d hardware PUs are available in the machine among %d\n",
	   hwloc_bitmap_weight(cset_available), hwloc_bitmap_weight(cset_all));
  }

  hwloc_topology_destroy(topology);
  return EXIT_SUCCESS;
}