Exemplo n.º 1
0
void* mmap_1g(void* addr /* = nullptr */, int node /* = -1 */) {
#ifdef __linux__
  if (s_num1GPages >= kMaxNum1GPages) return nullptr;
  if (get_huge1g_info(node).free_hugepages <= 0) return nullptr;
  if (node >= 0 && !numa_node_allowed(node)) return nullptr;
#ifdef HAVE_NUMA
  bitmask* memMask = nullptr;
  bitmask* interleaveMask = nullptr;
  if (node >= 0 && numa_num_nodes > 1) {
    memMask = numa_get_membind();
    interleaveMask = numa_get_interleave_mask();
    bitmask* mask = numa_allocate_nodemask();
    numa_bitmask_setbit(mask, node);
    numa_set_membind(mask);
    numa_bitmask_free(mask);
  }
#endif
  void* ret = mmap_1g_impl(addr);
  if (ret != nullptr) {
    s_1GPages[s_num1GPages++] = ret;
  }
#ifdef HAVE_NUMA
  if (memMask) {
    assert(interleaveMask);
    numa_set_membind(memMask);
    numa_set_interleave_mask(interleaveMask);
    numa_bitmask_free(memMask);
    numa_bitmask_free(interleaveMask);
  }
#endif
  return ret;
#else
  return nullptr;
#endif
}
Exemplo n.º 2
0
void* mmap_2m(void* addr, int prot, int node /* = -1 */,
              bool map_shared /* = false */, bool map_fixed /* = false */) {
#ifdef __linux__
  if (get_huge2m_info(node).free_hugepages <= 0) return nullptr;
#ifdef HAVE_NUMA
  bitmask* memMask = nullptr;
  bitmask* interleaveMask = nullptr;
  if (node >= 0 && numa_num_nodes > 1) {
    assert(numa_node_set != 0);
    if ((numa_node_set & (1u << node)) == 0) {
      // Numa policy forbids allocation on the node.
      return nullptr;
    }
    memMask = numa_get_membind();
    interleaveMask = numa_get_interleave_mask();
    bitmask* mask = numa_allocate_nodemask();
    numa_bitmask_setbit(mask, node);
    numa_set_membind(mask);
    numa_bitmask_free(mask);
  }
#endif
  void* ret = mmap_2m_impl(addr, prot, map_shared, map_fixed);
  s_num2MPages += !!ret;
#ifdef HAVE_NUMA
  if (memMask) {
    numa_set_membind(memMask);
    numa_set_interleave_mask(interleaveMask);
    numa_bitmask_free(memMask);
    numa_bitmask_free(interleaveMask);
  }
#endif
  return ret;
#else  // not linux
  return nullptr;
#endif
}
Exemplo n.º 3
0
int
virNumaSetupMemoryPolicy(virDomainNumatuneMemMode mode,
                         virBitmapPtr nodeset)
{
    nodemask_t mask;
    int node = -1;
    int ret = -1;
    int bit = 0;
    size_t i;
    int maxnode = 0;

    if (!nodeset)
        return 0;

    if (!virNumaNodesetIsAvailable(nodeset))
        return -1;

    maxnode = numa_max_node();
    maxnode = maxnode < NUMA_NUM_NODES ? maxnode : NUMA_NUM_NODES;

    /* Convert nodemask to NUMA bitmask. */
    nodemask_zero(&mask);
    bit = -1;
    while ((bit = virBitmapNextSetBit(nodeset, bit)) >= 0) {
        if (bit > maxnode) {
            virReportError(VIR_ERR_INTERNAL_ERROR,
                           _("NUMA node %d is out of range"), bit);
            return -1;
        }
        nodemask_set(&mask, bit);
    }

    switch (mode) {
    case VIR_DOMAIN_NUMATUNE_MEM_STRICT:
        numa_set_bind_policy(1);
        numa_set_membind(&mask);
        numa_set_bind_policy(0);
        break;

    case VIR_DOMAIN_NUMATUNE_MEM_PREFERRED:
    {
        int nnodes = 0;
        for (i = 0; i < NUMA_NUM_NODES; i++) {
            if (nodemask_isset(&mask, i)) {
                node = i;
                nnodes++;
            }
        }

        if (nnodes != 1) {
            virReportError(VIR_ERR_INTERNAL_ERROR,
                           "%s", _("NUMA memory tuning in 'preferred' mode "
                                   "only supports single node"));
            goto cleanup;
        }

        numa_set_bind_policy(0);
        numa_set_preferred(node);
    }
    break;

    case VIR_DOMAIN_NUMATUNE_MEM_INTERLEAVE:
        numa_set_interleave_mask(&mask);
        break;

    case VIR_DOMAIN_NUMATUNE_MEM_LAST:
        break;
    }
    ret = 0;

cleanup:
    return ret;
}
Exemplo n.º 4
0
int
virNumaSetupMemoryPolicy(virNumaTuneDef numatune,
                         virBitmapPtr nodemask)
{
    nodemask_t mask;
    int mode = -1;
    int node = -1;
    int ret = -1;
    int i = 0;
    int maxnode = 0;
    virBitmapPtr tmp_nodemask = NULL;

    if (numatune.memory.placement_mode ==
        VIR_NUMA_TUNE_MEM_PLACEMENT_MODE_STATIC) {
        if (!numatune.memory.nodemask)
            return 0;
        VIR_DEBUG("Set NUMA memory policy with specified nodeset");
        tmp_nodemask = numatune.memory.nodemask;
    } else if (numatune.memory.placement_mode ==
               VIR_NUMA_TUNE_MEM_PLACEMENT_MODE_AUTO) {
        VIR_DEBUG("Set NUMA memory policy with advisory nodeset from numad");
        tmp_nodemask = nodemask;
    } else {
        return 0;
    }

    if (numa_available() < 0) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       "%s", _("Host kernel is not aware of NUMA."));
        return -1;
    }

    maxnode = numa_max_node() + 1;

    /* Convert nodemask to NUMA bitmask. */
    nodemask_zero(&mask);
    i = -1;
    while ((i = virBitmapNextSetBit(tmp_nodemask, i)) >= 0) {
        if (i > maxnode || i > NUMA_NUM_NODES) {
            virReportError(VIR_ERR_INTERNAL_ERROR,
                           _("Nodeset is out of range, host cannot support "
                             "NUMA node bigger than %d"), i);
            return -1;
        }
        nodemask_set(&mask, i);
    }

    mode = numatune.memory.mode;

    if (mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT) {
        numa_set_bind_policy(1);
        numa_set_membind(&mask);
        numa_set_bind_policy(0);
    } else if (mode == VIR_DOMAIN_NUMATUNE_MEM_PREFERRED) {
        int nnodes = 0;
        for (i = 0; i < NUMA_NUM_NODES; i++) {
            if (nodemask_isset(&mask, i)) {
                node = i;
                nnodes++;
            }
        }

        if (nnodes != 1) {
            virReportError(VIR_ERR_INTERNAL_ERROR,
                           "%s", _("NUMA memory tuning in 'preferred' mode "
                                   "only supports single node"));
            goto cleanup;
        }

        numa_set_bind_policy(0);
        numa_set_preferred(node);
    } else if (mode == VIR_DOMAIN_NUMATUNE_MEM_INTERLEAVE) {
        numa_set_interleave_mask(&mask);
    } else {
        /* XXX: Shouldn't go here, as we already do checking when
         * parsing domain XML.
         */
        virReportError(VIR_ERR_XML_ERROR,
                       "%s", _("Invalid mode for memory NUMA tuning."));
        goto cleanup;
    }

    ret = 0;

cleanup:
    return ret;
}
Exemplo n.º 5
0
size_t remap_interleaved_2m_pages(void* addr, size_t pages, int prot,
                                  bool shared /* = false */) {
#ifdef __linux__
  assert(reinterpret_cast<uintptr_t>(addr) % size2m == 0);
  assert(addr != nullptr);

  if (pages == 0) return 0;

#ifdef HAVE_NUMA
  const int maxNode = numa_max_node();
  bitmask* memMask = nullptr;
  bitmask* interleaveMask = nullptr;
  bitmask* mask = nullptr;
  if (maxNode > 0) {
    memMask = numa_get_membind();
    interleaveMask = numa_get_interleave_mask();
    mask = numa_allocate_nodemask();
  }
#else
  constexpr int maxNode = 0;
#endif
  int node = -1;
  int failed = 0;                       // consecutive failure count
  int mapped_count = 0;
  do {
#ifdef HAVE_NUMA
    if (maxNode > 0) {
      if (++node > maxNode) node = 0;
      if (!numa_node_allowed(node)) {
        // Numa policy forbids allocation on node
        if (++failed > maxNode) break;
        continue;
      }
      numa_bitmask_setbit(mask, node);
      numa_set_membind(mask);
      numa_bitmask_clearbit(mask, node);
    }
#endif
    // Fail early if we don't have huge pages reserved.
    if (get_huge2m_info(node).free_hugepages > 0 &&
        mmap_2m_impl(addr, prot, shared, true /* MAP_FIXED */)) {
      addr = (char*)addr + size2m;
      ++mapped_count;
      failed = 0;
      continue;
    }
    // We failed on node, give up if we have failed on all nodes
    if (++failed > maxNode) break;
  } while (mapped_count < pages);

#ifdef HAVE_NUMA
  if (mask) {
    numa_set_membind(memMask);
    numa_set_interleave_mask(interleaveMask);
    numa_bitmask_free(mask);
    numa_bitmask_free(interleaveMask);
    numa_bitmask_free(memMask);
  }
#endif
  return mapped_count;
#else  // not linux
  return 0;
#endif
}
Exemplo n.º 6
0
int main(int ac, char **av)
{
	int c, i, nnodes=0;
	long node=-1;
	char *end;
	char shortopts[array_len(opts)*2 + 1];
	struct bitmask *mask = NULL;

	get_short_opts(opts,shortopts);
	while ((c = getopt_long(ac, av, shortopts, opts, NULL)) != -1) {
		switch (c) {
		case 's': /* --show */
			show();
			exit(0);
		case 'H': /* --hardware */
			nopolicy();
			hardware();
			exit(0);
		case 'i': /* --interleave */
			checknuma();
			mask = numactl_parse_nodestring(optarg);
			if (!mask) {
				printf ("<%s> is invalid\n", optarg);
				usage();
			}

			errno = 0;
			setpolicy(MPOL_INTERLEAVE);
			if (shmfd >= 0)
				numa_interleave_memory(shmptr, shmlen, mask);
			else
				numa_set_interleave_mask(mask);
			checkerror("setting interleave mask");
			break;
		case 'N': /* --cpunodebind */
		case 'c': /* --cpubind */
			dontshm("-c/--cpubind/--cpunodebind");
			checknuma();
			mask = numactl_parse_nodestring(optarg);
			if (!mask) {
				printf ("<%s> is invalid\n", optarg);
				usage();
			}
			errno = 0;
			check_cpubind(do_shm);
			did_cpubind = 1;
			numa_run_on_node_mask(mask);
			checkerror("sched_setaffinity");
			break;
		case 'C': /* --physcpubind */
		{
			struct bitmask *cpubuf;
			dontshm("-C/--physcpubind");
			cpubuf = numa_parse_cpustring(optarg);
			if (!cpubuf) {
				printf ("<%s> is invalid\n", optarg);
				usage();
			}
			errno = 0;
			check_cpubind(do_shm);
			did_cpubind = 1;
			numa_sched_setaffinity(0, cpubuf);
			checkerror("sched_setaffinity");
			free(cpubuf);
			break;
		}
		case 'm': /* --membind */
			checknuma();
			setpolicy(MPOL_BIND);
			mask = numactl_parse_nodestring(optarg);
			if (!mask) {
				printf ("<%s> is invalid\n", optarg);
				usage();
			}
			errno = 0;
			numa_set_bind_policy(1);
			if (shmfd >= 0) {
				numa_tonodemask_memory(shmptr, shmlen, mask);
			} else {
				numa_set_membind(mask);
			}
			numa_set_bind_policy(0);
			checkerror("setting membind");
			break;
		case 'p': /* --preferred */
			checknuma();
			setpolicy(MPOL_PREFERRED);
			mask = numactl_parse_nodestring(optarg);
			if (!mask) {
				printf ("<%s> is invalid\n", optarg);
				usage();
			}
			for (i=0; i<mask->size; i++) {
				if (numa_bitmask_isbitset(mask, i)) {
					node = i;
					nnodes++;
				}
			}
			if (nnodes != 1)
				usage();
			numa_bitmask_free(mask);
			errno = 0;
			numa_set_bind_policy(0);
			if (shmfd >= 0)
				numa_tonode_memory(shmptr, shmlen, node);
			else
				numa_set_preferred(node);
			checkerror("setting preferred node");
			break;
		case 'l': /* --local */
			checknuma();
			setpolicy(MPOL_DEFAULT);
			errno = 0;
			if (shmfd >= 0)
				numa_setlocal_memory(shmptr, shmlen);
			else
				numa_set_localalloc();
			checkerror("local allocation");
			break;
		case 'S': /* --shm */
			check_cpubind(did_cpubind);
			nopolicy();
			attach_sysvshm(optarg, "--shm");
			shmattached = 1;
			break;
		case 'f': /* --file */
			check_cpubind(did_cpubind);
			nopolicy();
			attach_shared(optarg, "--file");
			shmattached = 1;
			break;
		case 'L': /* --length */
			noshm("--length");
			shmlen = memsize(optarg);
			break;
		case 'M': /* --shmmode */
			noshm("--shmmode");
			shmmode = strtoul(optarg, &end, 8);
			if (end == optarg || *end)
				usage();
			break;
		case 'd': /* --dump */
			if (shmfd < 0)
				complain(
				"Cannot do --dump without shared memory.\n");
			dump_shm();
			do_dump = 1;
			break;
		case 'D': /* --dump-nodes */
			if (shmfd < 0)
				complain(
			    "Cannot do --dump-nodes without shared memory.\n");
			dump_shm_nodes();
			do_dump = 1;
			break;
		case 't': /* --strict */
			did_strict = 1;
			numa_set_strict(1);
			break;
		case 'I': /* --shmid */
			shmid = strtoul(optarg, &end, 0);
			if (end == optarg || *end)
				usage();
			break;

		case 'u': /* --huge */
			noshm("--huge");
			shmflags |= SHM_HUGETLB;
			break;

		case 'o':  /* --offset */
			noshm("--offset");
			shmoffset = memsize(optarg);
			break;			

		case 'T': /* --touch */
			needshm("--touch");
			check_shmbeyond("--touch");
			numa_police_memory(shmptr, shmlen);
			break;

		case 'V': /* --verify */
			needshm("--verify");
			if (set_policy < 0)
				complain("Need a policy first to verify");
			check_shmbeyond("--verify");
			numa_police_memory(shmptr, shmlen);
			if (!mask)
				complain("Need a mask to verify");
			else
				verify_shm(set_policy, mask);
			break;

		default:
			usage();
		}
	}

	av += optind;
	ac -= optind;

	if (shmfd >= 0) {
		if (*av)
			usage();
		exit(exitcode);
	}

	if (did_strict)
		fprintf(stderr,
			"numactl: warning. Strict flag for process ignored.\n");

	if (do_dump)
		usage_msg("cannot do --dump|--dump-shm for process");

	if (shmoption)
		usage_msg("shm related option %s for process", shmoption);
	
	if (*av == NULL)
		usage();
	execvp(*av, av);
	complain("execution of `%s': %s\n", av[0], strerror(errno));
	return 0; /* not reached */
}
Exemplo n.º 7
0
int INTERNAL qt_affinity_gendists(qthread_shepherd_t   *sheps,
                                  qthread_shepherd_id_t nshepherds)
{                                      /*{{{ */
    const size_t num_extant_nodes = numa_max_node() + 1;
    nodemask_t   bmask;

    qthread_debug(AFFINITY_FUNCTIONS, "sheps(%p), nshepherds(%u), num_extant_nodes:%u\n", sheps, nshepherds, (unsigned)num_extant_nodes);
    if (numa_available() == -1) {
        return QTHREAD_THIRD_PARTY_ERROR;
    }
    nodemask_zero(&bmask);
    /* assign nodes */
    qthread_debug(AFFINITY_DETAILS, "assign nodes...\n");
    for (size_t i = 0; i < nshepherds; ++i) {
        sheps[i].node = i % num_extant_nodes;
	qthread_debug(AFFINITY_DETAILS, "set bit %u in bmask\n", i % num_extant_nodes);
        nodemask_set(&bmask, i % num_extant_nodes);
    }
    qthread_debug(AFFINITY_DETAILS, "numa_set_interleave_mask\n");
    numa_set_interleave_mask(&bmask);
    qthread_debug(AFFINITY_DETAILS, "querying distances...\n");
    /* truly ancient versions of libnuma (in the changelog, this is
     * considered "pre-history") do not have numa_distance() */
    for (qthread_shepherd_id_t i = 0; i < nshepherds; i++) {
	qthread_debug(AFFINITY_DETAILS, "i = %u < %u...\n", i, nshepherds);
        const unsigned int node_i = sheps[i].node;
        size_t             j, k;
        sheps[i].shep_dists      = calloc(nshepherds, sizeof(unsigned int));
        sheps[i].sorted_sheplist = calloc(nshepherds - 1, sizeof(qthread_shepherd_id_t));
	qthread_debug(AFFINITY_DETAILS, "allocs %p %p\n", sheps[i].shep_dists, sheps[i].sorted_sheplist);
        assert(sheps[i].shep_dists);
        assert(sheps[i].sorted_sheplist);
        for (j = 0; j < nshepherds; j++) {
            const unsigned int node_j = sheps[j].node;

#if QTHREAD_NUMA_DISTANCE_WORKING
            if ((node_i != QTHREAD_NO_NODE) && (node_j != QTHREAD_NO_NODE) && (node_i != node_j)) {
                sheps[i].shep_dists[j] = numa_distance(node_i, node_j);
            } else {
#endif
                /* XXX too arbitrary */
                if (i == j) {
                    sheps[i].shep_dists[j] = 0;
                } else {
                    sheps[i].shep_dists[j] = 20;
                }
#if QTHREAD_NUMA_DISTANCE_WORKING
            }
#endif
	    qthread_debug(AFFINITY_DETAILS, "shep %u to shep %u distance: %u\n", i, j, sheps[i].shep_dists[j]);
        }
        k = 0;
        for (j = 0; j < nshepherds; j++) {
            if (j != i) {
                sheps[i].sorted_sheplist[k++] = j;
            }
        }
	if (nshepherds > 1) {
	    sort_sheps(sheps[i].shep_dists, sheps[i].sorted_sheplist, nshepherds);
	}
    }
    return QTHREAD_SUCCESS;
}                                      /*}}} */
Exemplo n.º 8
0
static int virLXCControllerSetupNUMAPolicy(virLXCControllerPtr ctrl)
{
    nodemask_t mask;
    int mode = -1;
    int node = -1;
    int ret = -1;
    int i = 0;
    int maxnode = 0;
    bool warned = false;

    if (!ctrl->def->numatune.memory.nodemask)
        return 0;

    VIR_DEBUG("Setting NUMA memory policy");

    if (numa_available() < 0) {
        virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
                       "%s", _("Host kernel is not aware of NUMA."));
        return -1;
    }

    maxnode = numa_max_node() + 1;

    /* Convert nodemask to NUMA bitmask. */
    nodemask_zero(&mask);
    i = -1;
    while ((i = virBitmapNextSetBit(ctrl->def->numatune.memory.nodemask, i)) >= 0) {
        if (i > NUMA_NUM_NODES) {
            virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
                           _("Host cannot support NUMA node %d"), i);
            return -1;
        }
        if (i > maxnode && !warned) {
            VIR_WARN("nodeset is out of range, there is only %d NUMA "
                     "nodes on host", maxnode);
            warned = true;
        }
        nodemask_set(&mask, i);
    }

    mode = ctrl->def->numatune.memory.mode;

    if (mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT) {
        numa_set_bind_policy(1);
        numa_set_membind(&mask);
        numa_set_bind_policy(0);
    } else if (mode == VIR_DOMAIN_NUMATUNE_MEM_PREFERRED) {
        int nnodes = 0;
        for (i = 0; i < NUMA_NUM_NODES; i++) {
            if (nodemask_isset(&mask, i)) {
                node = i;
                nnodes++;
            }
        }

        if (nnodes != 1) {
            virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
                           "%s", _("NUMA memory tuning in 'preferred' mode "
                                   "only supports single node"));
            goto cleanup;
        }

        numa_set_bind_policy(0);
        numa_set_preferred(node);
    } else if (mode == VIR_DOMAIN_NUMATUNE_MEM_INTERLEAVE) {
        numa_set_interleave_mask(&mask);
    } else {
        virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
                       _("Unable to set NUMA policy %s"),
                       virDomainNumatuneMemModeTypeToString(mode));
        goto cleanup;
    }

    ret = 0;

cleanup:
    return ret;
}