void system_get_stack(pthread_t thread, void **base, size_t *size) { dbg_assert(base && size); pthread_attr_t attr; dbg_check( pthread_getattr_np(thread, &attr) ); dbg_check( pthread_attr_getstack(&attr, base, size) ); }
/// A simple mmap wrapper that guarantees alignment. /// /// This calls munmap at least once, and will attempt to allocate more data than /// is always necessary, so it should only be used on a fallback path. This /// implementation simply over allocates space so that we are guaranteed that /// there is a properly aligned region inside of the mapping. It then munmaps /// the parts of the allocation that aren't part of this region. /// /// @param addr A "suggested" address. This is most likely ignored. /// @param n The number of bytes to map, must be 2^n. /// @param prot The protection flags. /// @param flags Additional flags for the mapping. /// @param fd A file descriptor to map from. /// @param off The file offset to map at. /// @param align The alignment, must be 2^n. /// /// @returns The properly-aligned mapped region, or NULL if there was an error. static void *_mmap_aligned(void *addr, size_t n, int prot, int flags, int fd, int off, size_t align) { char *buffer = mmap(addr, n + align, prot, flags, fd, off); if (buffer == MAP_FAILED) { dbg_error("could not map %zu bytes with %zu alignment\n", n, align); } // find the range in the allocation that matches what we want uintptr_t bits = (uintptr_t)buffer; uintptr_t mask = (align - 1); uintptr_t suffix = bits & mask; uintptr_t prefix = align - suffix; // return the overallocated pages back to the OS, system_munmap here is fine // because we know our sizes are okay even for huge allocations if (prefix) { dbg_check( munmap(buffer, prefix) ); } if (suffix) { dbg_check( munmap(buffer + prefix + n, suffix) ); } // and return the correctly aligned range return buffer + prefix; }
/// This action can be used by a thread to wait on an LCO through suspension. /// /// @param reset Flag saying if this is just a wait, or a wait + reset. /// @param parcel The address to be forwarded back to the caller. /// /// @returns HPX_SUCCESS static int _isir_lco_wait_handler(int reset, void *parcel) { if (reset) { dbg_check( hpx_lco_wait_reset(self->current->target) ); } else { dbg_check( hpx_lco_wait(self->current->target) ); } return hpx_thread_continue(parcel); }
hpx_addr_t hpx_lco_user_new(size_t size, hpx_action_t id, hpx_action_t op, hpx_action_t predicate, void *init, size_t init_size) { _user_lco_t *u = NULL; hpx_addr_t gva = lco_alloc_local(1, sizeof(*u) + size + init_size, 0); if (!hpx_gas_try_pin(gva, (void**)&u)) { size_t args_size = sizeof(_user_lco_t) + init_size; _user_lco_init_args_t *args = calloc(1, args_size); args->size = size; args->id = id; args->op = op; args->predicate = predicate; args->init_size = init_size; memcpy(args->data, init, init_size); int e = hpx_call_sync(gva, _user_lco_init_action, NULL, 0, args, args_size); dbg_check(e, "could not initialize an allreduce at %"PRIu64"\n", gva); free(args); } else { LCO_LOG_NEW(gva, u); memcpy(u->data, init, init_size); _user_lco_init(u, size, id, op, predicate, init, init_size); hpx_gas_unpin(gva); } return gva; }
int lib_getLevel(){ float result = INVALID_VALUE; readRoutine(NUM_REGS, &result); dbg_check(result != INVALID_VALUE, "problem with the reference"); error: return INVALID_VALUE; }
/// Initialize a block of array of lco. static int _block_init_handler(_user_lco_t *lco, _user_lco_init_args_t *args) { int n = args->n; int lco_bytes = sizeof(_user_lco_t) + args->size + args->init_size; for (int i = 0; i < n; i++) { void *addr = (void *)((uintptr_t)lco + (i * lco_bytes)); int e = _user_lco_init_handler(addr, args); dbg_check(e, "_block_init_handler failed\n"); } return HPX_SUCCESS; }
/// Allocate a global array. static hpx_addr_t _smp_gas_alloc_cyclic(size_t n, uint32_t bsize, uint32_t boundary, uint32_t attr) { void *p = NULL; if (boundary) { dbg_check(posix_memalign(&p, boundary, n * bsize)); } else { p = malloc(n * bsize); } return _smp_lva_to_gva(p); }
/// Allocate a bunch of global memory static hpx_addr_t _smp_gas_alloc_local(size_t n, uint32_t bsize, uint32_t boundary, uint32_t attr) { size_t bytes = n * bsize; void *p = NULL; if (boundary) { dbg_check(posix_memalign(&p, boundary, bytes)); } else { p = malloc(bytes); } return _smp_lva_to_gva(p); }
void as_join(int id) { if (as_flags[id] != 0) { log_gas("address space %d already joined\n", id); return; } const chunk_hooks_t *hooks = _hooks[id]; // If there aren't any custom hooks set for this space, then the basic local // allocator is fine, which means that we don't need any special flags for // this address space. if (!hooks) { log_gas("no custom allocator for %d, using local\n", id); return; } // Create an arena that uses the right hooks. unsigned arena; size_t sz = sizeof(arena); dbg_check( je_mallctl("arenas.extend", &arena, &sz, NULL, 0) ); char path[128]; snprintf(path, 128, "arena.%u.chunk_hooks", arena); dbg_check( je_mallctl(path, NULL, NULL, (void*)hooks, sizeof(*hooks)) ); // // Disable dirty page purging for this arena // snprintf(path, 124, "arena.%u.lg_dirty_mult", arena); // ssize_t i = -1; // dbg_check( je_mallctl(path, NULL, NULL, (void*)&i, sizeof(i)) ); // Create a cache. unsigned cache; sz = sizeof(cache); dbg_check( je_mallctl("tcache.create", &cache, &sz, NULL, 0) ); // And set the flags. as_flags[id] = MALLOCX_ARENA(arena) | MALLOCX_TCACHE(cache); }
static hpx_addr_t _pgas_gas_calloc_cyclic(size_t n, uint32_t bsize, uint32_t boundary, uint32_t attr) { hpx_addr_t addr; if (here->rank == 0) { addr = pgas_calloc_cyclic_sync(n, bsize); } else { int e = hpx_call_sync(HPX_THERE(0), pgas_calloc_cyclic, &addr, sizeof(addr), &n, &bsize); dbg_check(e, "Failed to call pgas_calloc_cyclic_handler.\n"); } dbg_assert_str(addr != HPX_NULL, "HPX_NULL is not a valid allocation\n"); return addr; }
/// This mmap wrapper tries once to mmap, and then forwards to _mmap_aligned(). /// /// @param addr A "suggested" address. This is most likely ignored. /// @param n The number of bytes to map, must be 2^n. /// @param prot The protection flags. /// @param flags Additional flags for the mapping. /// @param fd A file descriptor to map from. /// @param off The file offset to map at. /// @param align The alignment, must be 2^n. /// /// @returns The properly-aligned mapped region, or NULL if there was an error. static void *_mmap_lucky(void *addr, size_t n, int prot, int flags, int fd, int off, size_t align) { void *buffer = mmap(addr, n, prot, flags, fd, off); if (buffer == MAP_FAILED) { dbg_error("could not mmap %zu bytes from file %d\n", n, fd); } uintptr_t bits = (uintptr_t)buffer; uintptr_t mask = align - 1; uintptr_t modulo = bits & mask; if (!modulo) { return buffer; } dbg_check(munmap(buffer, n)); return _mmap_aligned(addr, n, prot, flags, fd, off, align); }
/// Called by the application to terminate the scheduler and network. void hpx_exit(int code) { dbg_assert_str(here->ranks, "hpx_exit can only be called when the system is running.\n"); uint64_t c = (uint32_t)code; // Make sure we flush our local network when we stop, but don't send our own // shutdown here because it can "arrive" locally very quickly, before we've // even come close to sending the rest of the stop commands. This can cause // problems with flushing. for (int i = 0, e = here->ranks; i < e; ++i) { if (i != here->rank) { int e = action_call_lsync(locality_stop, HPX_THERE(i), 0, 0, 1, &c); dbg_check(e); } } // Call our own shutdown through cc, which orders it locally after the effects // from the loop above. int e = hpx_call_cc(HPX_HERE, locality_stop, &c); hpx_thread_exit(e); }
/// Free a global address. /// /// This global address must either be the base of a cyclic allocation, or a /// block allocated by _pgas_gas_alloc_local. At this time, we do not attempt to deal /// with the cyclic allocations, as they are using a simple csbrk allocator. static void _pgas_gas_free(void *gas, hpx_addr_t gpa, hpx_addr_t sync) { if (gpa == HPX_NULL) { return; } uint64_t offset = gpa_to_offset(gpa); void *lva = heap_offset_to_lva(global_heap, offset); dbg_assert_str(heap_contains_lva(global_heap, lva), "attempt to free out of bounds offset %"PRIu64"", offset); (void)lva; if (heap_offset_is_cyclic(global_heap, offset)) { heap_free_cyclic(global_heap, offset); hpx_lco_set(sync, 0, NULL, HPX_NULL, HPX_NULL); } else if (gpa_to_rank(gpa) == here->rank) { global_free(pgas_gpa_to_lva(offset)); hpx_lco_set(sync, 0, NULL, HPX_NULL, HPX_NULL); } else { dbg_check(hpx_call(gpa, pgas_free, sync), "free failed on %"PRIu64"", gpa); } }
/// Called to run HPX. int _hpx_run(hpx_action_t *act, int n, ...) { if (here->rank == 0) { va_list args; va_start(args, n); hpx_parcel_t *p = action_new_parcel_va(*act, HPX_HERE, 0, 0, n, &args); va_end(args); dbg_check(hpx_parcel_send(p, HPX_NULL), "failed to spawn initial action\n"); } log_dflt("hpx started running %"PRIu64"\n", here->epoch); int status = scheduler_restart(here->sched); log_dflt("hpx stopped running %"PRIu64"\n", here->epoch); // We need to flush the network here, because it might have messages that are // required for progress. self->network->flush(self->network); // Bump our epoch, and enforce the "collective" nature of run with a boot // barrier. here->epoch++; boot_barrier(here->boot); return status; }
/// Allocate an array of user LCO local to the calling locality. /// @param n The (total) number of lcos to allocate /// @param size The size of the LCO Buffer /// @param id An initialization function for the data, this is /// used to initialize the data in every epoch. /// @param op The commutative-associative operation we're /// performing. /// @param predicate Predicate to guard the LCO. /// @param init The initialization data address. /// @param init_size The size of the initialization data. /// /// @returns the global address of the allocated array lco. hpx_addr_t hpx_lco_user_local_array_new(int n, size_t size, hpx_action_t id, hpx_action_t op, hpx_action_t predicate, void *init, size_t init_size) { uint32_t lco_bytes = sizeof(_user_lco_t) + size + init_size; dbg_assert(n * lco_bytes < UINT32_MAX); hpx_addr_t base = lco_alloc_local(n, lco_bytes, 0); size_t args_size = sizeof(_user_lco_t) + init_size; _user_lco_init_args_t *args = calloc(1, args_size); args->n = n; args->size = size; args->id = id; args->op = op; args->predicate = predicate; args->init_size = init_size; memcpy(args->data, init, init_size); int e = hpx_call_sync(base, _block_init, NULL, 0, args, args_size); dbg_check(e, "call of _block_init_action failed\n"); free(args); // return the base address of the allocation return base; }
static void _isir_lco_wait_continuation(hpx_parcel_t *p, void *env) { _isir_lco_wait_env_t *e = env; hpx_action_t op = _isir_lco_wait; hpx_action_t rop = _isir_lco_launch_parcel; dbg_check( action_call_lsync(op, e->lco, HPX_HERE, rop, 2, &e->reset, &p) ); }
int hpx_init(int *argc, char ***argv) { int status = HPX_SUCCESS; // Start the internal clock libhpx_time_start(); here = malloc(sizeof(*here)); if (!here) { status = log_error("failed to allocate a locality.\n"); goto unwind0; } here->rank = -1; here->ranks = 0; here->epoch = 0; sigset_t set; sigemptyset(&set); dbg_check(pthread_sigmask(SIG_BLOCK, &set, &here->mask)); here->config = config_new(argc, argv); if (!here->config) { status = log_error("failed to create a configuration.\n"); goto unwind1; } // check to see if everyone is waiting if (config_dbg_waitat_isset(here->config, HPX_LOCALITY_ALL)) { dbg_wait(); } // bootstrap here->boot = boot_new(here->config->boot); if (!here->boot) { status = log_error("failed to bootstrap.\n"); goto unwind1; } here->rank = boot_rank(here->boot); here->ranks = boot_n_ranks(here->boot); // initialize the debugging system // @todo We would like to do this earlier but MPI_init() for the bootstrap // network overwrites our segv handler. if (LIBHPX_OK != dbg_init(here->config)) { goto unwind1; } // Now that we know our rank, we can be more specific about waiting. if (config_dbg_waitat_isset(here->config, here->rank)) { // Don't wait twice. if (!config_dbg_waitat_isset(here->config, HPX_LOCALITY_ALL)) { dbg_wait(); } } // see if we're supposed to output the configuration, only do this at rank 0 if (config_log_level_isset(here->config, HPX_LOG_CONFIG)) { if (here->rank == 0) { config_print(here->config, stdout); } } // topology discovery and initialization here->topology = topology_new(here->config); if (!here->topology) { status = log_error("failed to discover topology.\n"); goto unwind1; } // Initialize our instrumentation. if (inst_init(here->config)) { log_dflt("error detected while initializing instrumentation\n"); } // Allocate the global heap. here->gas = gas_new(here->config, here->boot); if (!here->gas) { status = log_error("failed to create the global address space.\n"); goto unwind1; } HPX_HERE = HPX_THERE(here->rank); here->percolation = percolation_new(); if (!here->percolation) { status = log_error("failed to activate percolation.\n"); goto unwind1; } int cores = system_get_available_cores(); dbg_assert(cores > 0); if (!here->config->threads) { here->config->threads = cores; } log_dflt("HPX running %d worker threads on %d cores\n", here->config->threads, cores); here->net = network_new(here->config, here->boot, here->gas); if (!here->net) { status = log_error("failed to create network.\n"); goto unwind1; } // thread scheduler here->sched = scheduler_new(here->config); if (!here->sched) { status = log_error("failed to create scheduler.\n"); goto unwind1; } #ifdef HAVE_APEX // initialize APEX, give this main thread a name apex_init("HPX WORKER THREAD"); apex_set_node_id(here->rank); #endif action_registration_finalize(); inst_start(); // start the scheduler, this will return after scheduler_shutdown() if (scheduler_startup(here->sched, here->config) != LIBHPX_OK) { log_error("scheduler shut down with error.\n"); goto unwind1; } if ((here->ranks > 1 && here->config->gas != HPX_GAS_AGAS) || !here->config->opt_smp) { status = hpx_run(&_hpx_143_fix); } return status; unwind1: _stop(here); _cleanup(here); unwind0: return status; }