Beispiel #1
0
void system_get_stack(pthread_t thread, void **base, size_t *size) {
  dbg_assert(base && size);

  pthread_attr_t attr;
  dbg_check( pthread_getattr_np(thread, &attr) );
  dbg_check( pthread_attr_getstack(&attr, base, size) );
}
Beispiel #2
0
/// A simple mmap wrapper that guarantees alignment.
///
/// This calls munmap at least once, and will attempt to allocate more data than
/// is always necessary, so it should only be used on a fallback path. This
/// implementation simply over allocates space so that we are guaranteed that
/// there is a properly aligned region inside of the mapping. It then munmaps
/// the parts of the allocation that aren't part of this region.
///
/// @param         addr A "suggested" address. This is most likely ignored.
/// @param            n The number of bytes to map, must be 2^n.
/// @param         prot The protection flags.
/// @param        flags Additional flags for the mapping.
/// @param           fd A file descriptor to map from.
/// @param          off The file offset to map at.
/// @param        align The alignment, must be 2^n.
///
/// @returns The properly-aligned mapped region, or NULL if there was an error.
static void *_mmap_aligned(void *addr, size_t n, int prot, int flags, int fd,
                           int off, size_t align) {
  char *buffer = mmap(addr, n + align, prot, flags, fd, off);
  if (buffer == MAP_FAILED) {
    dbg_error("could not map %zu bytes with %zu alignment\n", n, align);
  }

  // find the range in the allocation that matches what we want
  uintptr_t   bits = (uintptr_t)buffer;
  uintptr_t   mask = (align - 1);
  uintptr_t suffix = bits & mask;
  uintptr_t prefix = align - suffix;

  // return the overallocated pages back to the OS, system_munmap here is fine
  // because we know our sizes are okay even for huge allocations
  if (prefix) {
    dbg_check( munmap(buffer, prefix) );
  }
  if (suffix) {
    dbg_check( munmap(buffer + prefix + n, suffix) );
  }

  // and return the correctly aligned range
  return buffer + prefix;
}
/// This action can be used by a thread to wait on an LCO through suspension.
///
/// @param        reset Flag saying if this is just a wait, or a wait + reset.
/// @param       parcel The address to be forwarded back to the caller.
///
/// @returns            HPX_SUCCESS
static int _isir_lco_wait_handler(int reset, void *parcel) {
  if (reset) {
    dbg_check( hpx_lco_wait_reset(self->current->target) );
  }
  else {
    dbg_check( hpx_lco_wait(self->current->target) );
  }

  return hpx_thread_continue(parcel);
}
hpx_addr_t hpx_lco_user_new(size_t size, hpx_action_t id, hpx_action_t op,
                            hpx_action_t predicate, void *init,
                            size_t init_size) {
  _user_lco_t *u = NULL;
  hpx_addr_t gva = lco_alloc_local(1, sizeof(*u) + size + init_size, 0);

  if (!hpx_gas_try_pin(gva, (void**)&u)) {
    size_t args_size = sizeof(_user_lco_t) + init_size;
    _user_lco_init_args_t *args = calloc(1, args_size);
    args->size = size;
    args->id = id;
    args->op = op;
    args->predicate = predicate;
    args->init_size = init_size;
    memcpy(args->data, init, init_size);

    int e = hpx_call_sync(gva, _user_lco_init_action, NULL, 0, args, args_size);
    dbg_check(e, "could not initialize an allreduce at %"PRIu64"\n", gva);
    free(args);
  } else {
    LCO_LOG_NEW(gva, u);
    memcpy(u->data, init, init_size);
    _user_lco_init(u, size, id, op, predicate, init, init_size);
    hpx_gas_unpin(gva);
  }

  return gva;
}
int	lib_getLevel(){
	float result = INVALID_VALUE;
	readRoutine(NUM_REGS, &result);
	dbg_check(result != INVALID_VALUE, "problem with the reference");
error:
	return INVALID_VALUE; 
}
/// Initialize a block of array of lco.
static int
_block_init_handler(_user_lco_t *lco, _user_lco_init_args_t *args) {
  int n = args->n;
  int lco_bytes = sizeof(_user_lco_t) + args->size + args->init_size;
  for (int i = 0; i < n; i++) {
    void *addr = (void *)((uintptr_t)lco + (i * lco_bytes));
    int e = _user_lco_init_handler(addr, args);
    dbg_check(e, "_block_init_handler failed\n");
  }
  return HPX_SUCCESS;
}
Beispiel #7
0
/// Allocate a global array.
static hpx_addr_t
_smp_gas_alloc_cyclic(size_t n, uint32_t bsize, uint32_t boundary,
                      uint32_t attr) {
  void *p = NULL;
  if (boundary) {
    dbg_check(posix_memalign(&p, boundary, n * bsize));
  }
  else {
    p = malloc(n * bsize);
  }
  return _smp_lva_to_gva(p);
}
Beispiel #8
0
/// Allocate a bunch of global memory
static hpx_addr_t
_smp_gas_alloc_local(size_t n, uint32_t bsize, uint32_t boundary,
                     uint32_t attr) {
  size_t bytes = n * bsize;
  void *p = NULL;
  if (boundary) {
    dbg_check(posix_memalign(&p, boundary, bytes));
  } else {
    p = malloc(bytes);
  }
  return _smp_lva_to_gva(p);
}
void as_join(int id) {
  if (as_flags[id] != 0) {
    log_gas("address space %d already joined\n", id);
    return;
  }

  const chunk_hooks_t *hooks = _hooks[id];

  // If there aren't any custom hooks set for this space, then the basic local
  // allocator is fine, which means that we don't need any special flags for
  // this address space.
  if (!hooks) {
    log_gas("no custom allocator for %d, using local\n", id);
    return;
  }

  // Create an arena that uses the right hooks.
  unsigned arena;
  size_t sz = sizeof(arena);
  dbg_check( je_mallctl("arenas.extend", &arena, &sz, NULL, 0) );

  char path[128];
  snprintf(path, 128, "arena.%u.chunk_hooks", arena);
  dbg_check( je_mallctl(path, NULL, NULL, (void*)hooks, sizeof(*hooks)) );

  // // Disable dirty page purging for this arena
  // snprintf(path, 124, "arena.%u.lg_dirty_mult", arena);
  // ssize_t i = -1;
  // dbg_check( je_mallctl(path, NULL, NULL, (void*)&i, sizeof(i)) );

  // Create a cache.
  unsigned cache;
  sz = sizeof(cache);
  dbg_check( je_mallctl("tcache.create", &cache, &sz, NULL, 0) );

  // And set the flags.
  as_flags[id] = MALLOCX_ARENA(arena) | MALLOCX_TCACHE(cache);
}
Beispiel #10
0
static hpx_addr_t _pgas_gas_calloc_cyclic(size_t n, uint32_t bsize,
                                          uint32_t boundary, uint32_t attr) {
  hpx_addr_t addr;
  if (here->rank == 0) {
    addr = pgas_calloc_cyclic_sync(n, bsize);
  }
  else {
    int e = hpx_call_sync(HPX_THERE(0), pgas_calloc_cyclic, &addr, sizeof(addr),
                          &n, &bsize);
    dbg_check(e, "Failed to call pgas_calloc_cyclic_handler.\n");
  }
  dbg_assert_str(addr != HPX_NULL, "HPX_NULL is not a valid allocation\n");
  return addr;
}
Beispiel #11
0
/// This mmap wrapper tries once to mmap, and then forwards to _mmap_aligned().
///
/// @param         addr A "suggested" address. This is most likely ignored.
/// @param            n The number of bytes to map, must be 2^n.
/// @param         prot The protection flags.
/// @param        flags Additional flags for the mapping.
/// @param           fd A file descriptor to map from.
/// @param          off The file offset to map at.
/// @param        align The alignment, must be 2^n.
///
/// @returns The properly-aligned mapped region, or NULL if there was an error.
static void *_mmap_lucky(void *addr, size_t n, int prot, int flags, int fd,
                         int off, size_t align) {
  void *buffer = mmap(addr, n, prot, flags, fd, off);
  if (buffer == MAP_FAILED) {
    dbg_error("could not mmap %zu bytes from file %d\n", n, fd);
  }

  uintptr_t   bits = (uintptr_t)buffer;
  uintptr_t   mask = align - 1;
  uintptr_t modulo = bits & mask;
  if (!modulo) {
    return buffer;
  }

  dbg_check(munmap(buffer, n));
  return _mmap_aligned(addr, n, prot, flags, fd, off, align);
}
Beispiel #12
0
/// Called by the application to terminate the scheduler and network.
void hpx_exit(int code) {
  dbg_assert_str(here->ranks,
                 "hpx_exit can only be called when the system is running.\n");

  uint64_t c = (uint32_t)code;

  // Make sure we flush our local network when we stop, but don't send our own
  // shutdown here because it can "arrive" locally very quickly, before we've
  // even come close to sending the rest of the stop commands. This can cause
  // problems with flushing.
  for (int i = 0, e = here->ranks; i < e; ++i) {
    if (i != here->rank) {
      int e = action_call_lsync(locality_stop, HPX_THERE(i), 0, 0, 1, &c);
      dbg_check(e);
    }
  }

  // Call our own shutdown through cc, which orders it locally after the effects
  // from the loop above.
  int e = hpx_call_cc(HPX_HERE, locality_stop, &c);
  hpx_thread_exit(e);
}
Beispiel #13
0
/// Free a global address.
///
/// This global address must either be the base of a cyclic allocation, or a
/// block allocated by _pgas_gas_alloc_local. At this time, we do not attempt to deal
/// with the cyclic allocations, as they are using a simple csbrk allocator.
static void _pgas_gas_free(void *gas, hpx_addr_t gpa, hpx_addr_t sync) {
  if (gpa == HPX_NULL) {
    return;
  }

  uint64_t offset = gpa_to_offset(gpa);
  void *lva = heap_offset_to_lva(global_heap, offset);
  dbg_assert_str(heap_contains_lva(global_heap, lva),
                 "attempt to free out of bounds offset %"PRIu64"", offset);
  (void)lva;

  if (heap_offset_is_cyclic(global_heap, offset)) {
    heap_free_cyclic(global_heap, offset);
    hpx_lco_set(sync, 0, NULL, HPX_NULL, HPX_NULL);
  }
  else if (gpa_to_rank(gpa) == here->rank) {
    global_free(pgas_gpa_to_lva(offset));
    hpx_lco_set(sync, 0, NULL, HPX_NULL, HPX_NULL);
  }
  else {
    dbg_check(hpx_call(gpa, pgas_free, sync), "free failed on %"PRIu64"", gpa);
  }
}
Beispiel #14
0
/// Called to run HPX.
int _hpx_run(hpx_action_t *act, int n, ...) {
  if (here->rank == 0) {
    va_list args;
    va_start(args, n);
    hpx_parcel_t *p = action_new_parcel_va(*act, HPX_HERE, 0, 0, n, &args);
    va_end(args);
    dbg_check(hpx_parcel_send(p, HPX_NULL), "failed to spawn initial action\n");
  }
  log_dflt("hpx started running %"PRIu64"\n", here->epoch);
  int status = scheduler_restart(here->sched);
  log_dflt("hpx stopped running %"PRIu64"\n", here->epoch);

  // We need to flush the network here, because it might have messages that are
  // required for progress.
  self->network->flush(self->network);

  // Bump our epoch, and enforce the "collective" nature of run with a boot
  // barrier.
  here->epoch++;
  boot_barrier(here->boot);

  return status;
}
/// Allocate an array of user LCO local to the calling locality.
/// @param          n The (total) number of lcos to allocate
/// @param       size The size of the LCO Buffer
/// @param         id An initialization function for the data, this is
///                   used to initialize the data in every epoch.
/// @param         op The commutative-associative operation we're
///                   performing.
/// @param  predicate Predicate to guard the LCO.
/// @param       init The initialization data address.
/// @param  init_size The size of the initialization data.
///
/// @returns the global address of the allocated array lco.
hpx_addr_t hpx_lco_user_local_array_new(int n, size_t size, hpx_action_t id,
                                        hpx_action_t op, hpx_action_t predicate,
                                        void *init, size_t init_size) {
  uint32_t lco_bytes = sizeof(_user_lco_t) + size + init_size;
  dbg_assert(n * lco_bytes < UINT32_MAX);
  hpx_addr_t base = lco_alloc_local(n, lco_bytes, 0);

  size_t args_size = sizeof(_user_lco_t) + init_size;
  _user_lco_init_args_t *args = calloc(1, args_size);
  args->n = n;
  args->size = size;
  args->id = id;
  args->op = op;
  args->predicate = predicate;
  args->init_size = init_size;
  memcpy(args->data, init, init_size);

  int e = hpx_call_sync(base, _block_init, NULL, 0, args, args_size);
  dbg_check(e, "call of _block_init_action failed\n");

  free(args);
  // return the base address of the allocation
  return base;
}
static void _isir_lco_wait_continuation(hpx_parcel_t *p, void *env) {
  _isir_lco_wait_env_t *e = env;
  hpx_action_t op = _isir_lco_wait;
  hpx_action_t rop = _isir_lco_launch_parcel;
  dbg_check( action_call_lsync(op, e->lco, HPX_HERE, rop, 2, &e->reset, &p) );
}
Beispiel #17
0
int hpx_init(int *argc, char ***argv) {
  int status = HPX_SUCCESS;

  // Start the internal clock
  libhpx_time_start();

  here = malloc(sizeof(*here));
  if (!here) {
    status = log_error("failed to allocate a locality.\n");
    goto unwind0;
  }

  here->rank = -1;
  here->ranks = 0;
  here->epoch = 0;

  sigset_t set;
  sigemptyset(&set);
  dbg_check(pthread_sigmask(SIG_BLOCK, &set, &here->mask));

  here->config = config_new(argc, argv);
  if (!here->config) {
    status = log_error("failed to create a configuration.\n");
    goto unwind1;
  }

  // check to see if everyone is waiting
  if (config_dbg_waitat_isset(here->config, HPX_LOCALITY_ALL)) {
    dbg_wait();
  }

  // bootstrap
  here->boot = boot_new(here->config->boot);
  if (!here->boot) {
    status = log_error("failed to bootstrap.\n");
    goto unwind1;
  }
  here->rank = boot_rank(here->boot);
  here->ranks = boot_n_ranks(here->boot);

  // initialize the debugging system
  // @todo We would like to do this earlier but MPI_init() for the bootstrap
  //       network overwrites our segv handler.
  if (LIBHPX_OK != dbg_init(here->config)) {
    goto unwind1;
  }

  // Now that we know our rank, we can be more specific about waiting.
  if (config_dbg_waitat_isset(here->config, here->rank)) {
    // Don't wait twice.
    if (!config_dbg_waitat_isset(here->config, HPX_LOCALITY_ALL)) {
      dbg_wait();
    }
  }

  // see if we're supposed to output the configuration, only do this at rank 0
  if (config_log_level_isset(here->config, HPX_LOG_CONFIG)) {
    if (here->rank == 0) {
      config_print(here->config, stdout);
    }
  }

  // topology discovery and initialization
  here->topology = topology_new(here->config);
  if (!here->topology) {
    status = log_error("failed to discover topology.\n");
    goto unwind1;
  }

  // Initialize our instrumentation.
  if (inst_init(here->config)) {
    log_dflt("error detected while initializing instrumentation\n");
  }

  // Allocate the global heap.
  here->gas = gas_new(here->config, here->boot);
  if (!here->gas) {
    status = log_error("failed to create the global address space.\n");
    goto unwind1;
  }
  HPX_HERE = HPX_THERE(here->rank);

  here->percolation = percolation_new();
  if (!here->percolation) {
    status = log_error("failed to activate percolation.\n");
    goto unwind1;
  }

  int cores = system_get_available_cores();
  dbg_assert(cores > 0);

  if (!here->config->threads) {
    here->config->threads = cores;
  }
  log_dflt("HPX running %d worker threads on %d cores\n", here->config->threads,
           cores);

  here->net = network_new(here->config, here->boot, here->gas);
  if (!here->net) {
    status = log_error("failed to create network.\n");
    goto unwind1;
  }

  // thread scheduler
  here->sched = scheduler_new(here->config);
  if (!here->sched) {
    status = log_error("failed to create scheduler.\n");
    goto unwind1;
  }

#ifdef HAVE_APEX
  // initialize APEX, give this main thread a name
  apex_init("HPX WORKER THREAD");
  apex_set_node_id(here->rank);
#endif

  action_registration_finalize();
  inst_start();

  // start the scheduler, this will return after scheduler_shutdown()
  if (scheduler_startup(here->sched, here->config) != LIBHPX_OK) {
    log_error("scheduler shut down with error.\n");
    goto unwind1;
  }

  if ((here->ranks > 1 && here->config->gas != HPX_GAS_AGAS) ||
      !here->config->opt_smp) {
    status = hpx_run(&_hpx_143_fix);
  }

  return status;
 unwind1:
  _stop(here);
  _cleanup(here);
 unwind0:
  return status;
}