int main(void) { int max = numa_max_node(); int maxmask = numa_num_possible_nodes(); struct bitmask *nodes, *mask; int pagesize = getpagesize(); int i; int pol; int node; int err = 0; nodes = numa_bitmask_alloc(maxmask); mask = numa_bitmask_alloc(maxmask); for (i = max; i >= 0; --i) { char *mem = mmap(NULL, pagesize*(max+1), PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, 0, 0); char *adr = mem; if (mem == (char *)-1) err("mmap"); printf("%d offset %lx\n", i, (long)(adr - mem)); numa_bitmask_clearall(nodes); numa_bitmask_clearall(mask); numa_bitmask_setbit(nodes, i); if (mbind(adr, pagesize, MPOL_PREFERRED, nodes->maskp, nodes->size, 0) < 0) err("mbind"); ++*adr; if (get_mempolicy(&pol, mask->maskp, mask->size, adr, MPOL_F_ADDR) < 0) err("get_mempolicy"); assert(pol == MPOL_PREFERRED); assert(numa_bitmask_isbitset(mask, i)); node = 0x123; if (get_mempolicy(&node, NULL, 0, adr, MPOL_F_ADDR|MPOL_F_NODE) < 0) err("get_mempolicy2"); printf("got node %d expected %d\n", node, i); if (node != i) err = 1; } return err; }
void show_physcpubind(void) { int ncpus = numa_num_configured_cpus(); for (;;) { struct bitmask *cpubuf; cpubuf = numa_bitmask_alloc(ncpus); if (numa_sched_getaffinity(0, cpubuf) < 0) { if (errno == EINVAL && ncpus < 1024*1024) { ncpus *= 2; continue; } err("sched_get_affinity"); } printcpumask("physcpubind", cpubuf); break; } }
int bind_cpu(int cpu) { cpu_set_t *cmask; struct bitmask *bmask; size_t ncpu, setsize; int ret; ncpu = get_num_cpus(); if (cpu < 0 || cpu >= (int)ncpu) { errno = -EINVAL; return -1; } cmask = CPU_ALLOC(ncpu); if (cmask == NULL) return -1; setsize = CPU_ALLOC_SIZE(ncpu); CPU_ZERO_S(setsize, cmask); CPU_SET_S(cpu, setsize, cmask); ret = sched_setaffinity(0, ncpu, cmask); CPU_FREE(cmask); /* skip NUMA stuff for UMA systems */ if (numa_max_node() == 0) return ret; bmask = numa_bitmask_alloc(16); assert(bmask); numa_bitmask_setbit(bmask, cpu % 2); numa_set_membind(bmask); numa_bitmask_free(bmask); return ret; }
int fire_worker_init(fire_worker_context_t *context) { buffer[context->queue_id] = (char *)malloc(buf_size); /* init worker struct */ fire_worker_t *cc = &(workers[context->queue_id]); cc->total_packets = 0; cc->total_bytes = 0; /* nids init */ nids_init(context->core_id); #if !defined(AFFINITY_NO) /* set schedule affinity */ unsigned long mask = 1 << context->core_id; if (sched_setaffinity(0, sizeof(unsigned long), (cpu_set_t *)&mask) < 0) { assert(0); } /* set schedule policy */ struct sched_param param; param.sched_priority = 99; pthread_setschedparam(pthread_self(), SCHED_FIFO, ¶m); #endif if (numa_max_node() == 0) return 0; struct bitmask *bmask; bmask = numa_bitmask_alloc(16); assert(bmask); numa_bitmask_setbit(bmask, context->core_id % 2); numa_set_membind(bmask); numa_bitmask_free(bmask); return 0; }
void myhbwmalloc_init(void) { /* set to NULL before trying to initialize. if we return before * successful creation of the mspace, then it will still be NULL, * and we can use that in subsequent library calls to determine * that the library failed to initialize. */ myhbwmalloc_mspace = NULL; /* verbose printout? */ myhbwmalloc_verbose = 0; { char * env_char = getenv("HBWMALLOC_VERBOSE"); if (env_char != NULL) { myhbwmalloc_verbose = 1; printf("hbwmalloc: HBWMALLOC_VERBOSE set\n"); } } /* fail hard or soft? */ myhbwmalloc_hardfail = 1; { char * env_char = getenv("HBWMALLOC_SOFTFAIL"); if (env_char != NULL) { myhbwmalloc_hardfail = 0; printf("hbwmalloc: HBWMALLOC_SOFTFAIL set\n"); } } /* set the atexit handler that will destroy the mspace and free the numa allocation */ atexit(myhbwmalloc_final); /* detect and configure use of NUMA memory nodes */ { int max_possible_node = numa_max_possible_node(); int num_possible_nodes = numa_num_possible_nodes(); int max_numa_nodes = numa_max_node(); int num_configured_nodes = numa_num_configured_nodes(); int num_configured_cpus = numa_num_configured_cpus(); if (myhbwmalloc_verbose) { printf("hbwmalloc: numa_max_possible_node() = %d\n", max_possible_node); printf("hbwmalloc: numa_num_possible_nodes() = %d\n", num_possible_nodes); printf("hbwmalloc: numa_max_node() = %d\n", max_numa_nodes); printf("hbwmalloc: numa_num_configured_nodes() = %d\n", num_configured_nodes); printf("hbwmalloc: numa_num_configured_cpus() = %d\n", num_configured_cpus); } /* FIXME this is a hack. assumes HBW is only numa node 1. */ if (num_configured_nodes <= 2) { myhbwmalloc_numa_node = num_configured_nodes-1; } else { fprintf(stderr,"hbwmalloc: we support only 2 numa nodes, not %d\n", num_configured_nodes); } if (myhbwmalloc_verbose) { for (int i=0; i<num_configured_nodes; i++) { unsigned max_numa_cpus = numa_num_configured_cpus(); struct bitmask * mask = numa_bitmask_alloc( max_numa_cpus ); int rc = numa_node_to_cpus(i, mask); if (rc != 0) { fprintf(stderr, "hbwmalloc: numa_node_to_cpus failed\n"); } else { printf("hbwmalloc: numa node %d cpu mask:", i); for (unsigned j=0; j<max_numa_cpus; j++) { int bit = numa_bitmask_isbitset(mask,j); printf(" %d", bit); } printf("\n"); } numa_bitmask_free(mask); } fflush(stdout); } } #if 0 /* unused */ /* see if the user specifies a slab size */ size_t slab_size_requested = 0; { char * env_char = getenv("HBWMALLOC_BYTES"); if (env_char!=NULL) { long units = 1L; if ( NULL != strstr(env_char,"G") ) units = 1000000000L; else if ( NULL != strstr(env_char,"M") ) units = 1000000L; else if ( NULL != strstr(env_char,"K") ) units = 1000L; else units = 1L; int num_count = strspn(env_char, "0123456789"); memset( &env_char[num_count], ' ', strlen(env_char)-num_count); slab_size_requested = units * atol(env_char); } if (myhbwmalloc_verbose) { printf("hbwmalloc: requested slab_size_requested = %zu\n", slab_size_requested); } } #endif /* see what libnuma says is available */ size_t myhbwmalloc_slab_size; { int node = myhbwmalloc_numa_node; long long freemem; long long maxmem = numa_node_size64(node, &freemem); if (myhbwmalloc_verbose) { printf("hbwmalloc: numa_node_size64 says maxmem=%lld freemem=%lld for numa node %d\n", maxmem, freemem, node); } myhbwmalloc_slab_size = freemem; } /* assume threads, disable if MPI knows otherwise, then allow user to override. */ int multithreaded = 1; #ifdef HAVE_MPI int nprocs; { int is_init, is_final; MPI_Initialized(&is_init); MPI_Finalized(&is_final); if (is_init && !is_final) { MPI_Comm_size(MPI_COMM_WORLD, &nprocs); } /* give equal portion to every MPI process */ myhbwmalloc_slab_size /= nprocs; /* if the user initializes MPI with MPI_Init or * MPI_Init_thread(MPI_THREAD_SINGLE), they assert there * are no threads at all, which means we can skip the * malloc mspace lock. * * if the user lies to MPI, they deserve any bad thing * that comes of it. */ int provided; MPI_Query_thread(&provided); if (provided==MPI_THREAD_SINGLE) { multithreaded = 0; } else { multithreaded = 1; } if (myhbwmalloc_verbose) { printf("hbwmalloc: MPI processes = %d (threaded = %d)\n", nprocs, multithreaded); printf("hbwmalloc: myhbwmalloc_slab_size = %d\n", myhbwmalloc_slab_size); } } #endif /* user can assert that hbwmalloc and friends need not be thread-safe */ { char * env_char = getenv("HBWMALLOC_LOCKLESS"); if (env_char != NULL) { multithreaded = 0; if (myhbwmalloc_verbose) { printf("hbwmalloc: user has disabled locking in mspaces by setting HBWMALLOC_LOCKLESS\n"); } } } myhbwmalloc_slab = numa_alloc_onnode( myhbwmalloc_slab_size, myhbwmalloc_numa_node); if (myhbwmalloc_slab==NULL) { fprintf(stderr, "hbwmalloc: numa_alloc_onnode returned NULL for size = %zu\n", myhbwmalloc_slab_size); return; } else { if (myhbwmalloc_verbose) { printf("hbwmalloc: numa_alloc_onnode succeeded for size %zu\n", myhbwmalloc_slab_size); } /* part (less than 128*sizeof(size_t) bytes) of this space is used for bookkeeping, * so the capacity must be at least this large */ if (myhbwmalloc_slab_size < 128*sizeof(size_t)) { fprintf(stderr, "hbwmalloc: not enough space for mspace bookkeeping\n"); return; } /* see above regarding if the user lies to MPI. */ int locked = multithreaded; myhbwmalloc_mspace = create_mspace_with_base( myhbwmalloc_slab, myhbwmalloc_slab_size, locked); if (myhbwmalloc_mspace == NULL) { fprintf(stderr, "hbwmalloc: create_mspace_with_base returned NULL\n"); return; } else if (myhbwmalloc_verbose) { printf("hbwmalloc: create_mspace_with_base succeeded for size %zu\n", myhbwmalloc_slab_size); } } }
CPU_Set_t * CPU_ParseSet(const char *s) { // XXX numa_parse_cpustring will only accept CPU's that are // less than the highest CPU we're affinitized to and it // always masks it to the CPU's the program started with. // return numa_parse_cpustring((char*)s); struct bitmask *bm = NULL; int *cpus = NULL; int curCpu = 0, maxCpus = 0, maxCpuVal = 0; while (*s) { while (isspace(*s)) ++s; if (!*s) break; if (!isdigit(*s)) { fprintf(stderr, "CPU set expected number: %s", s); goto fail; } char *end; int lo = strtol(s, &end, 10); int hi = lo; s = end; while (isspace(*s)) ++s; if (*s == '-') { s++; while (isspace(*s)) ++s; if (!isdigit(*s)) { fprintf(stderr, "CPU set expected number: %s", s); goto fail; } hi = strtol(s, &end, 10); s = end; while (isspace(*s)) ++s; } for (int cpu = lo; cpu <= hi; ++cpu) { if (curCpu == maxCpus) { maxCpus = maxCpus ? 2*maxCpus : 16; cpus = realloc(cpus, maxCpus * sizeof *cpus); } cpus[curCpu++] = cpu; if (cpu > maxCpuVal) maxCpuVal = cpu; } if (*s == ',') { s++; continue; } if (*s) { fprintf(stderr, "CPU set expected ',': %s", s); goto fail; } } bm = numa_bitmask_alloc(maxCpuVal + 1); if (!bm) panic("Failed to allocate CPU bitmask"); for (int i = 0; i < curCpu; ++i) numa_bitmask_setbit(bm, cpus[i]); fail: free(cpus); return bm; }
/*----------------------------------------------------------------------------*/ int mtcp_core_affinitize(int cpu) { #ifndef DISABLE_NUMA struct bitmask *bmask; #endif /* DISABLE_NUMA */ cpu_set_t cpus; FILE *fp; char sysfname[MAX_FILE_NAME]; int phy_id; size_t n; int ret; n = GetNumCPUs(); if (cpu < 0 || cpu >= (int) n) { errno = -EINVAL; return -1; } CPU_ZERO(&cpus); CPU_SET((unsigned)cpu, &cpus); ret = sched_setaffinity(Gettid(), sizeof(cpus), &cpus); #ifndef DISABLE_NUMA if (numa_max_node() == 0) return ret; bmask = numa_bitmask_alloc(n); assert(bmask); #endif /* DISABLE_NUMA */ /* read physical id of the core from sys information */ snprintf(sysfname, MAX_FILE_NAME - 1, "/sys/devices/system/cpu/cpu%d/topology/physical_package_id", cpu); fp = fopen(sysfname, "r"); if (!fp) { perror(sysfname); errno = EFAULT; return -1; } ret = fscanf(fp, "%d", &phy_id); if (ret != 1) { perror("Fail to read core id"); errno = EFAULT; return -1; } #ifndef DISABLE_NUMA numa_bitmask_setbit(bmask, phy_id); numa_set_membind(bmask); numa_bitmask_free(bmask); #endif /* DISABLE_NUMA */ fclose(fp); return ret; }
int main(int argc, char *argv[]) { int i; int nr = 2; int ret; char c; char *p; int mapflag = MAP_ANONYMOUS; int protflag = PROT_READ|PROT_WRITE; struct bitmask *all_nodes; struct bitmask *old_nodes; struct bitmask *new_nodes; unsigned long nr_nodes = numa_max_node() + 1; while ((c = getopt(argc, argv, "vp:m:n:h:")) != -1) { switch(c) { case 'v': verbose = 1; break; case 'p': testpipe = optarg; { struct stat stat; lstat(testpipe, &stat); if (!S_ISFIFO(stat.st_mode)) errmsg("Given file is not fifo.\n"); } break; case 'm': if (!strcmp(optarg, "private")) mapflag |= MAP_PRIVATE; else if (!strcmp(optarg, "shared")) mapflag |= MAP_SHARED; else errmsg("invalid optarg for -m\n"); break; case 'n': nr = strtoul(optarg, NULL, 10); break; case 'h': mapflag |= MAP_HUGETLB; HPS = strtoul(optarg, NULL, 10) * 1024; /* todo: arch independent */ if (HPS != 2097152 && HPS != 1073741824) errmsg("Invalid hugepage size\n"); break; default: errmsg("invalid option\n"); break; } } if (nr_nodes < 2) errmsg("A minimum of 2 nodes is required for this test.\n"); all_nodes = numa_bitmask_alloc(nr_nodes); old_nodes = numa_bitmask_alloc(nr_nodes); new_nodes = numa_bitmask_alloc(nr_nodes); numa_bitmask_setbit(all_nodes, 0); numa_bitmask_setbit(all_nodes, 1); numa_bitmask_setbit(old_nodes, 0); numa_bitmask_setbit(new_nodes, 1); numa_sched_setaffinity(0, old_nodes); signal(SIGUSR1, sig_handle); p = mmap((void *)ADDR_INPUT, nr * HPS, protflag, mapflag, -1, 0); if (p == MAP_FAILED) err("mmap"); /* fault in */ memset(p, 'a', nr * HPS); pprintf("before memory_hotremove\n"); pause(); numa_sched_setaffinity(0, all_nodes); signal(SIGUSR1, sig_handle_flag); memset(p, 'a', nr * HPS); pprintf("entering busy loop\n"); while (flag) { memset(p, 'a', nr * HPS); /* important to control race b/w migration and fault */ sleep(1); } pprintf("exited busy loop\n"); pause(); return 0; }
int main(int argc, const char **argv) { int num_cpus = numa_num_task_cpus(); printf("num cpus: %d\n", num_cpus); printf("numa available: %d\n", numa_available()); numa_set_localalloc(); struct bitmask *bm = numa_bitmask_alloc(num_cpus); for (int i=0; i<=numa_max_node(); ++i) { numa_node_to_cpus(i, bm); printf("numa node %d ", i); print_bitmask(bm); printf(" - %g GiB\n", numa_node_size(i, 0) / (1024.*1024*1024.)); } numa_bitmask_free(bm); puts(""); char *x; const size_t cache_line_size = 64; const size_t array_size = 100*1000*1000; size_t ntrips = 2; #pragma omp parallel { assert(omp_get_num_threads() == num_cpus); int tid = omp_get_thread_num(); pin_to_core(tid); if(tid == 0) x = (char *) numa_alloc_local(array_size); // {{{ single access #pragma omp barrier for (size_t i = 0; i<num_cpus; ++i) { if (tid == i) { double t = measure_access(x, array_size, ntrips); printf("sequential core %d -> core 0 : BW %g MB/s\n", i, array_size*ntrips*cache_line_size / t / 1e6); } #pragma omp barrier } // }}} // {{{ everybody contends for one { if (tid == 0) puts(""); #pragma omp barrier double t = measure_access(x, array_size, ntrips); #pragma omp barrier for (size_t i = 0; i<num_cpus; ++i) { if (tid == i) printf("all-contention core %d -> core 0 : BW %g MB/s\n", tid, array_size*ntrips*cache_line_size / t / 1e6); #pragma omp barrier } } // }}} // {{{ zero and someone else contending if (tid == 0) puts(""); #pragma omp barrier for (size_t i = 1; i<num_cpus; ++i) { double t; if (tid == i || tid == 0) t = measure_access(x, array_size, ntrips); #pragma omp barrier if (tid == 0) { printf("two-contention core %d -> core 0 : BW %g MB/s\n", tid, array_size*ntrips*cache_line_size / t / 1e6); } #pragma omp barrier if (tid == i) { printf("two-contention core %d -> core 0 : BW %g MB/s\n\n", tid, array_size*ntrips*cache_line_size / t / 1e6); } #pragma omp barrier } } numa_free(x, array_size); return 0; }
int main(int argc, char *argv[]) { int i; int ret; int nr = 2; char c; char *p; int mapflag = MAP_ANONYMOUS; int protflag = PROT_READ|PROT_WRITE; unsigned long nr_nodes = numa_max_node() + 1; struct bitmask *new_nodes; unsigned long nodemask; int do_unpoison = 0; int loop = 3; while ((c = getopt(argc, argv, "vp:m:n:ul:h:")) != -1) { switch(c) { case 'v': verbose = 1; break; case 'p': testpipe = optarg; { struct stat stat; lstat(testpipe, &stat); if (!S_ISFIFO(stat.st_mode)) errmsg("Given file is not fifo.\n"); } break; case 'm': if (!strcmp(optarg, "private")) mapflag |= MAP_PRIVATE; else if (!strcmp(optarg, "shared")) mapflag |= MAP_SHARED; else errmsg("invalid optarg for -m\n"); break; case 'n': nr = strtoul(optarg, NULL, 10); break; case 'u': do_unpoison = 1; break; case 'l': loop = strtoul(optarg, NULL, 10); break; case 'h': HPS = strtoul(optarg, NULL, 10) * 1024; mapflag |= MAP_HUGETLB; /* todo: arch independent */ if (HPS != 2097152 && HPS != 1073741824) errmsg("Invalid hugepage size\n"); break; default: errmsg("invalid option\n"); break; } } if (nr_nodes < 2) errmsg("A minimum of 2 nodes is required for this test.\n"); new_nodes = numa_bitmask_alloc(nr_nodes); numa_bitmask_setbit(new_nodes, 1); nodemask = 1; /* only node 0 allowed */ if (set_mempolicy(MPOL_BIND, &nodemask, nr_nodes) == -1) err("set_mempolicy"); signal(SIGUSR2, sig_handle); pprintf("start background migration\n"); pause(); signal(SIGUSR1, sig_handle_flag); pprintf("hugepages prepared\n"); while (flag) { p = checked_mmap((void *)ADDR_INPUT, nr * HPS, protflag, mapflag, -1, 0); /* fault in */ memset(p, 'a', nr * HPS); for (i = 0; i < nr; i++) { ret = madvise(p + i * HPS, 4096, MADV_HWPOISON); if (ret) { perror("madvise"); pprintf("madvise returned %d\n", ret); } } if (do_unpoison) { pprintf("need unpoison\n"); pause(); } checked_munmap(p, nr * HPS); if (loop-- <= 0) break; } pprintf("exit\n"); pause(); return 0; }