Exemple #1
0
void* mmap_1g(void* addr /* = nullptr */, int node /* = -1 */) {
#ifdef __linux__
  if (s_num1GPages >= kMaxNum1GPages) return nullptr;
  if (get_huge1g_info(node).free_hugepages <= 0) return nullptr;
  if (node >= 0 && !numa_node_allowed(node)) return nullptr;
#ifdef HAVE_NUMA
  bitmask* memMask = nullptr;
  bitmask* interleaveMask = nullptr;
  if (node >= 0 && numa_num_nodes > 1) {
    memMask = numa_get_membind();
    interleaveMask = numa_get_interleave_mask();
    bitmask* mask = numa_allocate_nodemask();
    numa_bitmask_setbit(mask, node);
    numa_set_membind(mask);
    numa_bitmask_free(mask);
  }
#endif
  void* ret = mmap_1g_impl(addr);
  if (ret != nullptr) {
    s_1GPages[s_num1GPages++] = ret;
  }
#ifdef HAVE_NUMA
  if (memMask) {
    assert(interleaveMask);
    numa_set_membind(memMask);
    numa_set_interleave_mask(interleaveMask);
    numa_bitmask_free(memMask);
    numa_bitmask_free(interleaveMask);
  }
#endif
  return ret;
#else
  return nullptr;
#endif
}
Exemple #2
0
void bind_mem2node_id(int node_id)
{
	struct bitmask *bmp = numa_allocate_nodemask();
	numa_bitmask_setbit(bmp, node_id);
	numa_set_membind(bmp);
	numa_free_nodemask(bmp);
}
Exemple #3
0
  void ConfigureTableThread() {
    int32_t node_id = GlobalContext::get_numa_index();

    struct bitmask *mask = numa_allocate_nodemask();
    mask = numa_bitmask_setbit(mask, node_id);

    // set NUMA zone binding to be prefer
    numa_set_bind_policy(0);
    numa_set_membind(mask);
    numa_free_nodemask(mask);
  }
Exemple #4
0
  void ConfigureTableThread() {
    int32_t idx = ThreadContext::get_id() - GlobalContext::get_head_table_thread_id();
    int32_t node_id = idx % num_mem_nodes_;
    CHECK_EQ(numa_run_on_node(node_id), 0);

    struct bitmask *mask = numa_allocate_nodemask();
    mask = numa_bitmask_setbit(mask, node_id);

    // set NUMA zone binding to be prefer
    numa_set_bind_policy(0);
    numa_set_membind(mask);
    numa_free_nodemask(mask);
  }
Exemple #5
0
void* mmap_2m(void* addr, int prot, int node /* = -1 */,
              bool map_shared /* = false */, bool map_fixed /* = false */) {
#ifdef __linux__
  if (get_huge2m_info(node).free_hugepages <= 0) return nullptr;
#ifdef HAVE_NUMA
  bitmask* memMask = nullptr;
  bitmask* interleaveMask = nullptr;
  if (node >= 0 && numa_num_nodes > 1) {
    assert(numa_node_set != 0);
    if ((numa_node_set & (1u << node)) == 0) {
      // Numa policy forbids allocation on the node.
      return nullptr;
    }
    memMask = numa_get_membind();
    interleaveMask = numa_get_interleave_mask();
    bitmask* mask = numa_allocate_nodemask();
    numa_bitmask_setbit(mask, node);
    numa_set_membind(mask);
    numa_bitmask_free(mask);
  }
#endif
  void* ret = mmap_2m_impl(addr, prot, map_shared, map_fixed);
  s_num2MPages += !!ret;
#ifdef HAVE_NUMA
  if (memMask) {
    numa_set_membind(memMask);
    numa_set_interleave_mask(interleaveMask);
    numa_bitmask_free(memMask);
    numa_bitmask_free(interleaveMask);
  }
#endif
  return ret;
#else  // not linux
  return nullptr;
#endif
}
Exemple #6
0
int bind_cpu(int cpu)
{
	cpu_set_t *cmask;
	struct bitmask *bmask;
	size_t ncpu, setsize;
	int ret;

	ncpu = get_num_cpus();

	if (cpu < 0 || cpu >= (int)ncpu) {
		errno = -EINVAL;
		return -1;
	}

	cmask = CPU_ALLOC(ncpu);
	if (cmask == NULL)
		return -1;

	setsize = CPU_ALLOC_SIZE(ncpu);
	CPU_ZERO_S(setsize, cmask);
	CPU_SET_S(cpu, setsize, cmask);

	ret = sched_setaffinity(0, ncpu, cmask);

	CPU_FREE(cmask);

	/* skip NUMA stuff for UMA systems */
	if (numa_max_node() == 0)
		return ret;

	bmask = numa_bitmask_alloc(16);
	assert(bmask);

	numa_bitmask_setbit(bmask, cpu % 2);
	numa_set_membind(bmask);
	numa_bitmask_free(bmask);

	return ret;
}
int fire_worker_init(fire_worker_context_t *context)
{
	buffer[context->queue_id] = (char *)malloc(buf_size);

	/* init worker struct */
	fire_worker_t *cc = &(workers[context->queue_id]); 
	cc->total_packets = 0;
	cc->total_bytes = 0;

	/* nids init */
	nids_init(context->core_id);

#if !defined(AFFINITY_NO)
	/* set schedule affinity */
	unsigned long mask = 1 << context->core_id;
	if (sched_setaffinity(0, sizeof(unsigned long), (cpu_set_t *)&mask) < 0) {
		assert(0);
	}

	/* set schedule policy */
	struct sched_param param;
	param.sched_priority = 99;
	pthread_setschedparam(pthread_self(), SCHED_FIFO, &param);
#endif

	if (numa_max_node() == 0)
		return 0;
	
	struct bitmask *bmask;

	bmask = numa_bitmask_alloc(16);
	assert(bmask);
	numa_bitmask_setbit(bmask, context->core_id % 2);
	numa_set_membind(bmask);
	numa_bitmask_free(bmask);

	return 0;
}
Exemple #8
0
int
virNumaSetupMemoryPolicy(virDomainNumatuneMemMode mode,
                         virBitmapPtr nodeset)
{
    nodemask_t mask;
    int node = -1;
    int ret = -1;
    int bit = 0;
    size_t i;
    int maxnode = 0;

    if (!nodeset)
        return 0;

    if (!virNumaNodesetIsAvailable(nodeset))
        return -1;

    maxnode = numa_max_node();
    maxnode = maxnode < NUMA_NUM_NODES ? maxnode : NUMA_NUM_NODES;

    /* Convert nodemask to NUMA bitmask. */
    nodemask_zero(&mask);
    bit = -1;
    while ((bit = virBitmapNextSetBit(nodeset, bit)) >= 0) {
        if (bit > maxnode) {
            virReportError(VIR_ERR_INTERNAL_ERROR,
                           _("NUMA node %d is out of range"), bit);
            return -1;
        }
        nodemask_set(&mask, bit);
    }

    switch (mode) {
    case VIR_DOMAIN_NUMATUNE_MEM_STRICT:
        numa_set_bind_policy(1);
        numa_set_membind(&mask);
        numa_set_bind_policy(0);
        break;

    case VIR_DOMAIN_NUMATUNE_MEM_PREFERRED:
    {
        int nnodes = 0;
        for (i = 0; i < NUMA_NUM_NODES; i++) {
            if (nodemask_isset(&mask, i)) {
                node = i;
                nnodes++;
            }
        }

        if (nnodes != 1) {
            virReportError(VIR_ERR_INTERNAL_ERROR,
                           "%s", _("NUMA memory tuning in 'preferred' mode "
                                   "only supports single node"));
            goto cleanup;
        }

        numa_set_bind_policy(0);
        numa_set_preferred(node);
    }
    break;

    case VIR_DOMAIN_NUMATUNE_MEM_INTERLEAVE:
        numa_set_interleave_mask(&mask);
        break;

    case VIR_DOMAIN_NUMATUNE_MEM_LAST:
        break;
    }
    ret = 0;

cleanup:
    return ret;
}
Exemple #9
0
/*
 * task_pre_launch() is called prior to exec of application task.
 *	It is followed by TaskProlog program (from slurm.conf) and
 *	--task-prolog (from srun command line).
 */
extern int task_pre_launch (slurmd_job_t *job)
{
	char base[PATH_MAX], path[PATH_MAX];
	int rc = SLURM_SUCCESS;

	debug("affinity task_pre_launch:%u.%u, task:%u bind:%u",
	      job->jobid, job->stepid, job->envtp->procid,
	      job->cpu_bind_type);

	if (conf->task_plugin_param & CPU_BIND_CPUSETS) {
		info("Using cpuset affinity for tasks");
#ifdef MULTIPLE_SLURMD
		if (snprintf(base, PATH_MAX, "%s/slurm_%s_%u",
			     CPUSET_DIR,
			     (conf->node_name != NULL)?conf->node_name:"",
			     job->jobid) > PATH_MAX) {
			error("cpuset path too long");
			return SLURM_ERROR;
		}
#else
		if (snprintf(base, PATH_MAX, "%s/slurm%u",
				CPUSET_DIR, job->jobid) > PATH_MAX) {
			error("cpuset path too long");
			return SLURM_ERROR;
		}
#endif
		if (snprintf(path, PATH_MAX, "%s/slurm%u.%u_%d",
				base, job->jobid, job->stepid,
				job->envtp->localid) > PATH_MAX) {
			error("cpuset path too long");
			return SLURM_ERROR;
		}
	} else
		info("Using sched_affinity for tasks");

	/*** CPU binding support ***/
	if (job->cpu_bind_type) {
		cpu_set_t new_mask, cur_mask;
		pid_t mypid  = job->envtp->task_pid;

		slurm_getaffinity(mypid, sizeof(cur_mask), &cur_mask);

		if (get_cpuset(&new_mask, job) &&
		    (!(job->cpu_bind_type & CPU_BIND_NONE))) {
			if (conf->task_plugin_param & CPU_BIND_CPUSETS) {
				rc = slurm_set_cpuset(base, path, mypid,
						sizeof(new_mask),
						&new_mask);
				slurm_get_cpuset(path, mypid,
						 sizeof(cur_mask),
						 &cur_mask);
			} else {
				rc = slurm_setaffinity(mypid,
						       sizeof(new_mask),
						       &new_mask);
				slurm_getaffinity(mypid,
						  sizeof(cur_mask),
						  &cur_mask);
			}
		}
		slurm_chkaffinity(rc ? &cur_mask : &new_mask,
				  job, rc);
	} else if (job->mem_bind_type &&
		   (conf->task_plugin_param & CPU_BIND_CPUSETS)) {
		cpu_set_t cur_mask;
		pid_t mypid  = job->envtp->task_pid;

		/* Establish cpuset just for the memory binding */
		slurm_getaffinity(mypid, sizeof(cur_mask), &cur_mask);
		rc = slurm_set_cpuset(base, path,
				      (pid_t) job->envtp->task_pid,
				      sizeof(cur_mask), &cur_mask);
	}

#ifdef HAVE_NUMA
	if ((conf->task_plugin_param & CPU_BIND_CPUSETS) &&
	    (slurm_memset_available() >= 0)) {
		nodemask_t new_mask, cur_mask;

		cur_mask = numa_get_membind();
		if (get_memset(&new_mask, job) &&
		    (!(job->mem_bind_type & MEM_BIND_NONE))) {
			slurm_set_memset(path, &new_mask);
			if (numa_available() >= 0)
				numa_set_membind(&new_mask);
			cur_mask = new_mask;
		}
		slurm_chk_memset(&cur_mask, job);
	} else if (job->mem_bind_type && (numa_available() >= 0)) {
		nodemask_t new_mask, cur_mask;

		cur_mask = numa_get_membind();
		if (get_memset(&new_mask, job)
		&&  (!(job->mem_bind_type & MEM_BIND_NONE))) {
			numa_set_membind(&new_mask);
			cur_mask = new_mask;
		}
		slurm_chk_memset(&cur_mask, job);
	}
#endif
	return rc;
}
Exemple #10
0
int
virNumaSetupMemoryPolicy(virNumaTuneDef numatune,
                         virBitmapPtr nodemask)
{
    nodemask_t mask;
    int mode = -1;
    int node = -1;
    int ret = -1;
    int i = 0;
    int maxnode = 0;
    virBitmapPtr tmp_nodemask = NULL;

    if (numatune.memory.placement_mode ==
        VIR_NUMA_TUNE_MEM_PLACEMENT_MODE_STATIC) {
        if (!numatune.memory.nodemask)
            return 0;
        VIR_DEBUG("Set NUMA memory policy with specified nodeset");
        tmp_nodemask = numatune.memory.nodemask;
    } else if (numatune.memory.placement_mode ==
               VIR_NUMA_TUNE_MEM_PLACEMENT_MODE_AUTO) {
        VIR_DEBUG("Set NUMA memory policy with advisory nodeset from numad");
        tmp_nodemask = nodemask;
    } else {
        return 0;
    }

    if (numa_available() < 0) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       "%s", _("Host kernel is not aware of NUMA."));
        return -1;
    }

    maxnode = numa_max_node() + 1;

    /* Convert nodemask to NUMA bitmask. */
    nodemask_zero(&mask);
    i = -1;
    while ((i = virBitmapNextSetBit(tmp_nodemask, i)) >= 0) {
        if (i > maxnode || i > NUMA_NUM_NODES) {
            virReportError(VIR_ERR_INTERNAL_ERROR,
                           _("Nodeset is out of range, host cannot support "
                             "NUMA node bigger than %d"), i);
            return -1;
        }
        nodemask_set(&mask, i);
    }

    mode = numatune.memory.mode;

    if (mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT) {
        numa_set_bind_policy(1);
        numa_set_membind(&mask);
        numa_set_bind_policy(0);
    } else if (mode == VIR_DOMAIN_NUMATUNE_MEM_PREFERRED) {
        int nnodes = 0;
        for (i = 0; i < NUMA_NUM_NODES; i++) {
            if (nodemask_isset(&mask, i)) {
                node = i;
                nnodes++;
            }
        }

        if (nnodes != 1) {
            virReportError(VIR_ERR_INTERNAL_ERROR,
                           "%s", _("NUMA memory tuning in 'preferred' mode "
                                   "only supports single node"));
            goto cleanup;
        }

        numa_set_bind_policy(0);
        numa_set_preferred(node);
    } else if (mode == VIR_DOMAIN_NUMATUNE_MEM_INTERLEAVE) {
        numa_set_interleave_mask(&mask);
    } else {
        /* XXX: Shouldn't go here, as we already do checking when
         * parsing domain XML.
         */
        virReportError(VIR_ERR_XML_ERROR,
                       "%s", _("Invalid mode for memory NUMA tuning."));
        goto cleanup;
    }

    ret = 0;

cleanup:
    return ret;
}
Exemple #11
0
/*----------------------------------------------------------------------------*/
int 
mtcp_core_affinitize(int cpu)
{
#ifndef DISABLE_NUMA
	struct bitmask *bmask;
#endif /* DISABLE_NUMA */
	cpu_set_t cpus;
	FILE *fp;
	char sysfname[MAX_FILE_NAME];
	int phy_id;
	size_t n;
	int ret;

	n = GetNumCPUs();

	if (cpu < 0 || cpu >= (int) n) {
		errno = -EINVAL;
		return -1;
	}

	CPU_ZERO(&cpus);
	CPU_SET((unsigned)cpu, &cpus);

	ret = sched_setaffinity(Gettid(), sizeof(cpus), &cpus);

#ifndef DISABLE_NUMA
	if (numa_max_node() == 0)
		return ret;

	bmask = numa_bitmask_alloc(n);
	assert(bmask);
#endif /* DISABLE_NUMA */

	/* read physical id of the core from sys information */
	snprintf(sysfname, MAX_FILE_NAME - 1, 
			"/sys/devices/system/cpu/cpu%d/topology/physical_package_id", cpu);
	fp = fopen(sysfname, "r");
	if (!fp) {
		perror(sysfname);
		errno = EFAULT;
		return -1;
	}

	ret = fscanf(fp, "%d", &phy_id);
	if (ret != 1) {
		perror("Fail to read core id");
		errno = EFAULT;
		return -1;
	}


#ifndef DISABLE_NUMA
	numa_bitmask_setbit(bmask, phy_id);
	numa_set_membind(bmask);
	numa_bitmask_free(bmask);
#endif /* DISABLE_NUMA */

	fclose(fp);

	return ret;
}
Exemple #12
0
size_t remap_interleaved_2m_pages(void* addr, size_t pages, int prot,
                                  bool shared /* = false */) {
#ifdef __linux__
  assert(reinterpret_cast<uintptr_t>(addr) % size2m == 0);
  assert(addr != nullptr);

  if (pages == 0) return 0;

#ifdef HAVE_NUMA
  const int maxNode = numa_max_node();
  bitmask* memMask = nullptr;
  bitmask* interleaveMask = nullptr;
  bitmask* mask = nullptr;
  if (maxNode > 0) {
    memMask = numa_get_membind();
    interleaveMask = numa_get_interleave_mask();
    mask = numa_allocate_nodemask();
  }
#else
  constexpr int maxNode = 0;
#endif
  int node = -1;
  int failed = 0;                       // consecutive failure count
  int mapped_count = 0;
  do {
#ifdef HAVE_NUMA
    if (maxNode > 0) {
      if (++node > maxNode) node = 0;
      if (!numa_node_allowed(node)) {
        // Numa policy forbids allocation on node
        if (++failed > maxNode) break;
        continue;
      }
      numa_bitmask_setbit(mask, node);
      numa_set_membind(mask);
      numa_bitmask_clearbit(mask, node);
    }
#endif
    // Fail early if we don't have huge pages reserved.
    if (get_huge2m_info(node).free_hugepages > 0 &&
        mmap_2m_impl(addr, prot, shared, true /* MAP_FIXED */)) {
      addr = (char*)addr + size2m;
      ++mapped_count;
      failed = 0;
      continue;
    }
    // We failed on node, give up if we have failed on all nodes
    if (++failed > maxNode) break;
  } while (mapped_count < pages);

#ifdef HAVE_NUMA
  if (mask) {
    numa_set_membind(memMask);
    numa_set_interleave_mask(interleaveMask);
    numa_bitmask_free(mask);
    numa_bitmask_free(interleaveMask);
    numa_bitmask_free(memMask);
  }
#endif
  return mapped_count;
#else  // not linux
  return 0;
#endif
}
Exemple #13
0
int main(int ac, char **av)
{
	int c, i, nnodes=0;
	long node=-1;
	char *end;
	char shortopts[array_len(opts)*2 + 1];
	struct bitmask *mask = NULL;

	get_short_opts(opts,shortopts);
	while ((c = getopt_long(ac, av, shortopts, opts, NULL)) != -1) {
		switch (c) {
		case 's': /* --show */
			show();
			exit(0);
		case 'H': /* --hardware */
			nopolicy();
			hardware();
			exit(0);
		case 'i': /* --interleave */
			checknuma();
			mask = numactl_parse_nodestring(optarg);
			if (!mask) {
				printf ("<%s> is invalid\n", optarg);
				usage();
			}

			errno = 0;
			setpolicy(MPOL_INTERLEAVE);
			if (shmfd >= 0)
				numa_interleave_memory(shmptr, shmlen, mask);
			else
				numa_set_interleave_mask(mask);
			checkerror("setting interleave mask");
			break;
		case 'N': /* --cpunodebind */
		case 'c': /* --cpubind */
			dontshm("-c/--cpubind/--cpunodebind");
			checknuma();
			mask = numactl_parse_nodestring(optarg);
			if (!mask) {
				printf ("<%s> is invalid\n", optarg);
				usage();
			}
			errno = 0;
			check_cpubind(do_shm);
			did_cpubind = 1;
			numa_run_on_node_mask(mask);
			checkerror("sched_setaffinity");
			break;
		case 'C': /* --physcpubind */
		{
			struct bitmask *cpubuf;
			dontshm("-C/--physcpubind");
			cpubuf = numa_parse_cpustring(optarg);
			if (!cpubuf) {
				printf ("<%s> is invalid\n", optarg);
				usage();
			}
			errno = 0;
			check_cpubind(do_shm);
			did_cpubind = 1;
			numa_sched_setaffinity(0, cpubuf);
			checkerror("sched_setaffinity");
			free(cpubuf);
			break;
		}
		case 'm': /* --membind */
			checknuma();
			setpolicy(MPOL_BIND);
			mask = numactl_parse_nodestring(optarg);
			if (!mask) {
				printf ("<%s> is invalid\n", optarg);
				usage();
			}
			errno = 0;
			numa_set_bind_policy(1);
			if (shmfd >= 0) {
				numa_tonodemask_memory(shmptr, shmlen, mask);
			} else {
				numa_set_membind(mask);
			}
			numa_set_bind_policy(0);
			checkerror("setting membind");
			break;
		case 'p': /* --preferred */
			checknuma();
			setpolicy(MPOL_PREFERRED);
			mask = numactl_parse_nodestring(optarg);
			if (!mask) {
				printf ("<%s> is invalid\n", optarg);
				usage();
			}
			for (i=0; i<mask->size; i++) {
				if (numa_bitmask_isbitset(mask, i)) {
					node = i;
					nnodes++;
				}
			}
			if (nnodes != 1)
				usage();
			numa_bitmask_free(mask);
			errno = 0;
			numa_set_bind_policy(0);
			if (shmfd >= 0)
				numa_tonode_memory(shmptr, shmlen, node);
			else
				numa_set_preferred(node);
			checkerror("setting preferred node");
			break;
		case 'l': /* --local */
			checknuma();
			setpolicy(MPOL_DEFAULT);
			errno = 0;
			if (shmfd >= 0)
				numa_setlocal_memory(shmptr, shmlen);
			else
				numa_set_localalloc();
			checkerror("local allocation");
			break;
		case 'S': /* --shm */
			check_cpubind(did_cpubind);
			nopolicy();
			attach_sysvshm(optarg, "--shm");
			shmattached = 1;
			break;
		case 'f': /* --file */
			check_cpubind(did_cpubind);
			nopolicy();
			attach_shared(optarg, "--file");
			shmattached = 1;
			break;
		case 'L': /* --length */
			noshm("--length");
			shmlen = memsize(optarg);
			break;
		case 'M': /* --shmmode */
			noshm("--shmmode");
			shmmode = strtoul(optarg, &end, 8);
			if (end == optarg || *end)
				usage();
			break;
		case 'd': /* --dump */
			if (shmfd < 0)
				complain(
				"Cannot do --dump without shared memory.\n");
			dump_shm();
			do_dump = 1;
			break;
		case 'D': /* --dump-nodes */
			if (shmfd < 0)
				complain(
			    "Cannot do --dump-nodes without shared memory.\n");
			dump_shm_nodes();
			do_dump = 1;
			break;
		case 't': /* --strict */
			did_strict = 1;
			numa_set_strict(1);
			break;
		case 'I': /* --shmid */
			shmid = strtoul(optarg, &end, 0);
			if (end == optarg || *end)
				usage();
			break;

		case 'u': /* --huge */
			noshm("--huge");
			shmflags |= SHM_HUGETLB;
			break;

		case 'o':  /* --offset */
			noshm("--offset");
			shmoffset = memsize(optarg);
			break;			

		case 'T': /* --touch */
			needshm("--touch");
			check_shmbeyond("--touch");
			numa_police_memory(shmptr, shmlen);
			break;

		case 'V': /* --verify */
			needshm("--verify");
			if (set_policy < 0)
				complain("Need a policy first to verify");
			check_shmbeyond("--verify");
			numa_police_memory(shmptr, shmlen);
			if (!mask)
				complain("Need a mask to verify");
			else
				verify_shm(set_policy, mask);
			break;

		default:
			usage();
		}
	}

	av += optind;
	ac -= optind;

	if (shmfd >= 0) {
		if (*av)
			usage();
		exit(exitcode);
	}

	if (did_strict)
		fprintf(stderr,
			"numactl: warning. Strict flag for process ignored.\n");

	if (do_dump)
		usage_msg("cannot do --dump|--dump-shm for process");

	if (shmoption)
		usage_msg("shm related option %s for process", shmoption);
	
	if (*av == NULL)
		usage();
	execvp(*av, av);
	complain("execution of `%s': %s\n", av[0], strerror(errno));
	return 0; /* not reached */
}
Exemple #14
0
static int virLXCControllerSetupNUMAPolicy(virLXCControllerPtr ctrl)
{
    nodemask_t mask;
    int mode = -1;
    int node = -1;
    int ret = -1;
    int i = 0;
    int maxnode = 0;
    bool warned = false;

    if (!ctrl->def->numatune.memory.nodemask)
        return 0;

    VIR_DEBUG("Setting NUMA memory policy");

    if (numa_available() < 0) {
        virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
                       "%s", _("Host kernel is not aware of NUMA."));
        return -1;
    }

    maxnode = numa_max_node() + 1;

    /* Convert nodemask to NUMA bitmask. */
    nodemask_zero(&mask);
    i = -1;
    while ((i = virBitmapNextSetBit(ctrl->def->numatune.memory.nodemask, i)) >= 0) {
        if (i > NUMA_NUM_NODES) {
            virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
                           _("Host cannot support NUMA node %d"), i);
            return -1;
        }
        if (i > maxnode && !warned) {
            VIR_WARN("nodeset is out of range, there is only %d NUMA "
                     "nodes on host", maxnode);
            warned = true;
        }
        nodemask_set(&mask, i);
    }

    mode = ctrl->def->numatune.memory.mode;

    if (mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT) {
        numa_set_bind_policy(1);
        numa_set_membind(&mask);
        numa_set_bind_policy(0);
    } else if (mode == VIR_DOMAIN_NUMATUNE_MEM_PREFERRED) {
        int nnodes = 0;
        for (i = 0; i < NUMA_NUM_NODES; i++) {
            if (nodemask_isset(&mask, i)) {
                node = i;
                nnodes++;
            }
        }

        if (nnodes != 1) {
            virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
                           "%s", _("NUMA memory tuning in 'preferred' mode "
                                   "only supports single node"));
            goto cleanup;
        }

        numa_set_bind_policy(0);
        numa_set_preferred(node);
    } else if (mode == VIR_DOMAIN_NUMATUNE_MEM_INTERLEAVE) {
        numa_set_interleave_mask(&mask);
    } else {
        virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
                       _("Unable to set NUMA policy %s"),
                       virDomainNumatuneMemModeTypeToString(mode));
        goto cleanup;
    }

    ret = 0;

cleanup:
    return ret;
}
Exemple #15
0
/**
 * @brief Do memory binding.
 *
 * This is handling the binding types map_mem, mask_mem and rank.
 * The types local (default) and none are handled directly by the deamon.
 *
 * When using libnuma with API v1, this is a noop, just giving a warning.
 *
 * @param step  Step structure
 * @param task  Task structure
 *
 * @return No return value.
 */
void doMemBind(Step_t *step, PStask_t *task)
{

# ifndef HAVE_NUMA_ALLOCATE_NODEMASK
    mlog("%s: psslurm does not support memory binding types map_mem, mask_mem"
	    " and rank with libnuma v1\n", __func__);
    fprintf(stderr, "Memory binding type not supported with used libnuma"
	   " version");
    return;
# else

    const char delimiters[] = ",";
    uint32_t lTID;
    char *next, *saveptr, *ents, *myent, *endptr;
    char **entarray;
    unsigned int numents;
    uint16_t mynode;

    struct bitmask *nodemask = NULL;

    if (!(step->memBindType & MEM_BIND_MAP)
	    && !(step->memBindType & MEM_BIND_MASK)
	    && !(step->memBindType & MEM_BIND_RANK)) {
	/* things are handled elsewhere */
	return;
    }

    if (!PSIDnodes_bindMem(PSC_getMyID()) || getenv("__PSI_NO_MEMBIND")) {
	    // info messages already printed in doClamps()
	return;
    }

    if (numa_available()==-1) {
	fprintf(stderr, "NUMA not available:");
	return;
    }

    nodemask = numa_allocate_nodemask();
    if (!nodemask) {
	fprintf(stderr, "Allocation of nodemask failed:");
	return;
    }

    lTID = getLocalRankID(task->rank, step, step->localNodeId);

    if (step->memBindType & MEM_BIND_RANK) {
	if (lTID > (unsigned int)numa_max_node()) {
	    mlog("%s: memory binding to ranks not possible for rank %d."
		    " (local rank %d > #numa_nodes %d)\n", __func__,
		    task->rank, lTID, numa_max_node());
	    fprintf(stderr, "Memory binding to ranks not possible for rank %d,"
		    " local rank %u larger than max numa node %d.",
		    task->rank, lTID, numa_max_node());
	    if (nodemask) numa_free_nodemask(nodemask);
	    return;
	}
	if (numa_bitmask_isbitset(numa_get_mems_allowed(), lTID)) {
	    numa_bitmask_setbit(nodemask, lTID);
	} else {
	    mlog("%s: setting bit %d in memory mask not allowed in rank"
		    " %d\n", __func__, lTID, task->rank);
	    fprintf(stderr, "Not allowed to set bit %u in memory mask"
		    " of rank %d\n", lTID, task->rank);
	}
	numa_set_membind(nodemask);
	if (nodemask) numa_free_nodemask(nodemask);
	return;
    }

    ents = ustrdup(step->memBind);
    entarray = umalloc(step->tasksToLaunch[step->localNodeId] * sizeof(char*));
    numents = 0;
    myent = NULL;
    entarray[0] = NULL;

    next = strtok_r(ents, delimiters, &saveptr);
    while (next && (numents < step->tasksToLaunch[step->localNodeId])) {
	entarray[numents++] = next;
	if (numents == lTID+1) {
	    myent = next;
	    break;
	}
	next = strtok_r(NULL, delimiters, &saveptr);
    }

    if (!myent && numents) {
	myent = entarray[lTID % numents];
    }

    if (!myent) {
	numa_set_membind(numa_all_nodes_ptr);
	if (step->memBindType & MEM_BIND_MASK) {
	    mlog("%s: invalid mem mask string '%s'\n", __func__, ents);
	}
	else if (step->memBindType & MEM_BIND_MAP) {
	    mlog("%s: invalid mem map string '%s'\n", __func__, ents);
	}
	goto cleanup;
    }

    if (step->memBindType & MEM_BIND_MAP) {

	if (strncmp(myent, "0x", 2) == 0) {
	    mynode = strtoul (myent+2, &endptr, 16);
	} else {
	    mynode = strtoul (myent, &endptr, 10);
	}

	if (*endptr == '\0' && mynode <= numa_max_node()) {
	    if (numa_bitmask_isbitset(numa_get_mems_allowed(), mynode)) {
		numa_bitmask_setbit(nodemask, mynode);
	    } else {
		mlog("%s: setting bit %d in memory mask not allowed in rank"
			" %d\n", __func__, mynode, task->rank);
		fprintf(stderr, "Not allowed to set bit %d in memory mask"
			" of rank %d\n", mynode, task->rank);
	    }
	} else {
	    mlog("%s: invalid memory map entry '%s' (%d) for rank %d\n",
		    __func__, myent, mynode, task->rank);
	    fprintf(stderr, "Invalid memory map entry '%s' for rank %d\n",
		    myent, task->rank);
	    numa_set_membind(numa_all_nodes_ptr);
	    goto cleanup;
	}
	mdbg(PSSLURM_LOG_PART, "%s: (bind_map) node %i local task %i"
	     " memstr '%s'\n", __func__, step->localNodeId, lTID, myent);

    } else if (step->memBindType & MEM_BIND_MASK) {
	parseNUMAmask(nodemask, myent, task->rank);
    }

    numa_set_membind(nodemask);

    cleanup:

    ufree(ents);
    ufree(entarray);
    if (nodemask) numa_free_nodemask(nodemask);
# endif

    return;
}