Exemple #1
0
static void comm_task_wrapper(void* arg) {
  thread_private_data_t* tp;

  tp = (thread_private_data_t*) chpl_mem_alloc(sizeof(thread_private_data_t),
                                               CHPL_RT_MD_THREAD_PRV_DATA,
                                               0, 0);

  tp->ptask = (task_pool_p) chpl_mem_alloc(sizeof(task_pool_t),
                                           CHPL_RT_MD_TASK_POOL_DESC,
                                           0, 0);
  tp->ptask->id           = get_next_task_id();
  tp->ptask->fun          = comm_task_fn;
  tp->ptask->arg          = arg;
  tp->ptask->is_executeOn = false;
  tp->ptask->filename     = CHPL_FILE_IDX_COMM_TASK;
  tp->ptask->lineno       = 0;
  tp->ptask->p_list_head  = NULL;
  tp->ptask->next         = NULL;

  //
  // The comm (polling) task shouldn't really need this information.
  //
  tp->ptask->chpl_data.prvdata.serial_state = true;

  tp->lockRprt = NULL;

  chpl_thread_setPrivateData(tp);

  (*comm_task_fn)(arg);
}
Exemple #2
0
static void setup_main_thread_private_data(void)
{
  thread_private_data_t* tp;

  tp = (thread_private_data_t*) chpl_mem_alloc(sizeof(thread_private_data_t),
                                               CHPL_RT_MD_THREAD_PRV_DATA,
                                               0, 0);

  tp->ptask = (task_pool_p) chpl_mem_alloc(sizeof(task_pool_t),
                                           CHPL_RT_MD_TASK_POOL_DESC,
                                           0, 0);
  tp->lockRprt            = NULL;

  tp->ptask->p_list_head  = NULL;
  tp->ptask->list_next    = NULL;
  tp->ptask->list_prev    = NULL;
  tp->ptask->next         = NULL;
  tp->ptask->prev         = NULL;

  // serial_state starts out true; it is set to false in chpl_std_module_init().
  tp->ptask->bundle.serial_state    = true;
  tp->ptask->bundle.countRunning    = false;
  tp->ptask->bundle.is_executeOn    = false;
  tp->ptask->bundle.lineno          = 0;
  tp->ptask->bundle.filename        = CHPL_FILE_IDX_MAIN_PROGRAM;
  tp->ptask->bundle.requestedSubloc = c_sublocid_any_val;
  tp->ptask->bundle.requested_fid   = FID_NONE;
  tp->ptask->bundle.requested_fn    = NULL;
  tp->ptask->bundle.id              = get_next_task_id();


  chpl_thread_setPrivateData(tp);
}
Exemple #3
0
static void comm_task_wrapper(void* arg) {
  thread_private_data_t* tp;

  tp = (thread_private_data_t*) chpl_mem_alloc(sizeof(thread_private_data_t),
                                               CHPL_RT_MD_THREAD_PRV_DATA,
                                               0, 0);

  tp->ptask = (task_pool_p) chpl_mem_alloc(sizeof(task_pool_t),
                                           CHPL_RT_MD_TASK_POOL_DESC,
                                           0, 0);
  tp->lockRprt            = NULL;

  tp->ptask->p_list_head  = NULL;
  tp->ptask->list_next    = NULL;
  tp->ptask->list_prev    = NULL;
  tp->ptask->next         = NULL;
  tp->ptask->prev         = NULL;

  tp->ptask->bundle.serial_state    = false;
  tp->ptask->bundle.countRunning    = false;
  tp->ptask->bundle.is_executeOn    = false;
  tp->ptask->bundle.lineno          = 0;
  tp->ptask->bundle.filename        = CHPL_FILE_IDX_COMM_TASK;
  tp->ptask->bundle.requestedSubloc = c_sublocid_any_val;
  tp->ptask->bundle.requested_fid   = FID_NONE;
  tp->ptask->bundle.requested_fn    = NULL;
  tp->ptask->bundle.id              = get_next_task_id();

  chpl_thread_setPrivateData(tp);

  (*comm_task_fn)(arg);
}
Exemple #4
0
static void setup_main_thread_private_data(void)
{
  thread_private_data_t* tp;

  tp = (thread_private_data_t*) chpl_mem_alloc(sizeof(thread_private_data_t),
                                               CHPL_RT_MD_THREAD_PRV_DATA,
                                               0, 0);

  tp->ptask = (task_pool_p) chpl_mem_alloc(sizeof(task_pool_t),
                                           CHPL_RT_MD_TASK_POOL_DESC,
                                           0, 0);
  tp->ptask->id           = get_next_task_id();
  tp->ptask->fun          = NULL;
  tp->ptask->arg          = NULL;
  tp->ptask->is_executeOn = false;
  tp->ptask->filename     = CHPL_FILE_IDX_MAIN_PROGRAM;
  tp->ptask->lineno       = 0;
  tp->ptask->p_list_head  = NULL;
  tp->ptask->next         = NULL;
  tp->lockRprt            = NULL;

  // Set up task-private data for locale (architectural) support.
  tp->ptask->chpl_data.prvdata.serial_state = true;     // Set to false in chpl_task_callMain().

  chpl_thread_setPrivateData(tp);
}
Exemple #5
0
chpl_thread_mutex_p chpl_thread_mutexNew(void) {
  chpl_thread_mutex_p m;
  m = (chpl_thread_mutex_p) chpl_mem_alloc(sizeof(chpl_thread_mutex_t),
                                           CHPL_RT_MD_MUTEX, 0, 0);
  chpl_thread_mutexInit(m);
  return m;
}
Exemple #6
0
void chpl_task_startMovedTask(chpl_fn_p fp,
                              void* a,
                              c_sublocid_t subloc,
                              chpl_taskID_t id,
                              chpl_bool serial_state) {
  movedTaskWrapperDesc_t* pmtwd;
  chpl_task_prvDataImpl_t private = {
    .prvdata = { .serial_state = serial_state } };

  assert(subloc == 0 || subloc == c_sublocid_any);
  assert(id == chpl_nullTaskID);

  pmtwd = (movedTaskWrapperDesc_t*)
          chpl_mem_alloc(sizeof(*pmtwd),
                         CHPL_RT_MD_THREAD_PRV_DATA,
                         0, 0);
  *pmtwd = (movedTaskWrapperDesc_t)
           { fp, a, canCountRunningTasks,
             private };

  // begin critical section
  chpl_thread_mutexLock(&threading_lock);

  (void) add_to_task_pool(movedTaskWrapper, pmtwd, true, pmtwd->chpl_data,
                          NULL, false, 0, CHPL_FILE_IDX_UNKNOWN);

  // end critical section
  chpl_thread_mutexUnlock(&threading_lock);
}
Exemple #7
0
static void libfabric_init_addrvec(int rx_ctx_cnt, int rx_ctx_bits) {
  struct gather_info* my_addr_info;
  void* addr_infos;
  char* addrs;
  char* tai;
  size_t my_addr_len;
  size_t addr_info_len;
  int i, j;

  // Assumes my_addr_len is the same on all nodes
  my_addr_len = 0;
  OFICHKRET(fi_getname(&ofi.ep->fid, NULL, &my_addr_len), -FI_ETOOSMALL);
  addr_info_len = sizeof(struct gather_info) + my_addr_len;
  my_addr_info = chpl_mem_alloc(addr_info_len,
                                CHPL_RT_MD_COMM_UTIL,
                                0, 0);
  my_addr_info->node = chpl_nodeID;
  OFICHKERR(fi_getname(&ofi.ep->fid, &my_addr_info->info, &my_addr_len));

  addr_infos = chpl_mem_allocMany(chpl_numNodes, addr_info_len,
                                  CHPL_RT_MD_COMM_PER_LOC_INFO,
                                  0, 0);

  chpl_comm_ofi_oob_allgather(my_addr_info, addr_infos, addr_info_len);

  addrs = chpl_mem_allocMany(chpl_numNodes, my_addr_len,
                             CHPL_RT_MD_COMM_PER_LOC_INFO,
                             0, 0);

  for (tai = addr_infos, i = 0; i < chpl_numNodes; i++) {
    struct gather_info* ai = (struct gather_info*) tai;
    assert(i >= 0);
    assert(i < chpl_numNodes);
    memcpy(addrs + ai->node * my_addr_len, ai->info, my_addr_len);
    tai += addr_info_len;
  }

  ofi.fi_addrs = chpl_mem_allocMany(chpl_numNodes, sizeof(ofi.fi_addrs[0]),
                                    CHPL_RT_MD_COMM_PER_LOC_INFO,
                                    0, 0);
  OFICHKRET(fi_av_insert(ofi.av, addrs, chpl_numNodes,
                         ofi.fi_addrs, 0, NULL), chpl_numNodes);

  ofi.rx_addrs = chpl_mem_allocMany(chpl_numNodes, sizeof(ofi.rx_addrs[0]),
                                    CHPL_RT_MD_COMM_PER_LOC_INFO,
                                    0, 0);
  for (i = 0; i < chpl_numNodes; i++) {
    ofi.rx_addrs[i] = chpl_mem_allocMany(rx_ctx_cnt,
                                         sizeof(ofi.rx_addrs[i][0]),
                                         CHPL_RT_MD_COMM_PER_LOC_INFO,
                                         0, 0);
    for (j = 0; j < rx_ctx_cnt; j++) {
      ofi.rx_addrs[i][j] = fi_rx_addr(ofi.fi_addrs[i], j, rx_ctx_bits);
    }
  }

  chpl_mem_free(my_addr_info, 0, 0);
  chpl_mem_free(addr_infos, 0, 0);
  chpl_mem_free(addrs, 0, 0);
}
Exemple #8
0
/* This function returns a string from src_locale located at src_addr.
 *
 *     src_locale: node id
 *     src_addr: string address on remote node
 *     src_len: length
 *
 */
c_string_copy remoteStringCopy(c_nodeid_t src_locale,
                               c_string src_addr, int64_t src_len,
                               int32_t lineno, c_string filename) {
  char* ret;
  if (src_addr == NULL) return NULL;
  ret = chpl_mem_alloc(src_len+1, CHPL_RT_MD_STRING_COPY_REMOTE,
                       lineno, filename);
  chpl_gen_comm_get((void*)ret, src_locale, (void*)src_addr, sizeof(char),
                    CHPL_TYPE_uint8_t, src_len+1, lineno, filename);
  return (c_string)ret;
}
Exemple #9
0
char *getNodeListOpt() {
  const char *nodeList = getenv("CHPL_LAUNCHER_NODELIST");
  char *nodeListOpt = NULL;

  if (nodeList) {
    nodeListOpt = chpl_mem_alloc(strlen(getNodeListStr())+strlen(nodeList)+1,
                                 CHPL_RT_MD_COMMAND_BUFFER, -1, 0);
    strcpy(nodeListOpt, getNodeListStr());
    strcat(nodeListOpt, nodeList);
  }

  return nodeListOpt;
}
Exemple #10
0
// create a task from the given function pointer and arguments
// and append it to the end of the task pool
// assumes threading_lock has already been acquired!
static inline
task_pool_p add_to_task_pool(chpl_fn_p fp,
                             void* a,
                             chpl_bool is_executeOn,
                             chpl_task_prvDataImpl_t chpl_data,
                             task_pool_p* p_task_list_head,
                             chpl_bool is_begin_stmt,
                             int lineno, int32_t filename) {
  task_pool_p ptask =
    (task_pool_p) chpl_mem_alloc(sizeof(task_pool_t),
                                        CHPL_RT_MD_TASK_POOL_DESC,
                                        0, 0);
  ptask->id           = get_next_task_id();
  ptask->fun          = fp;
  ptask->arg          = a;
  ptask->is_executeOn = is_executeOn;
  ptask->chpl_data    = chpl_data;
  ptask->filename     = filename;
  ptask->lineno       = lineno;
  ptask->p_list_head  = NULL;
  ptask->next         = NULL;

  enqueue_task(ptask, p_task_list_head);

  chpl_task_do_callbacks(chpl_task_cb_event_kind_create,
                         ptask->filename,
                         ptask->lineno,
                         ptask->id,
                         ptask->is_executeOn);

  if (do_taskReport) {
    chpl_thread_mutexLock(&taskTable_lock);
    chpldev_taskTable_add(ptask->id,
                          ptask->lineno, ptask->filename,
                          (uint64_t) (intptr_t) ptask);
    chpl_thread_mutexUnlock(&taskTable_lock);
  }

  //
  // If we now have more tasks than threads to run them on (taking
  // into account that the current parent of a structured parallel
  // construct can run at least one of that construct's children),
  // try to start another thread.
  //
  if (queued_task_cnt > idle_thread_cnt &&
      (p_task_list_head == NULL || ptask->list_next != NULL || is_begin_stmt)) {
    maybe_add_thread();
  }

  return ptask;
}
Exemple #11
0
void string_from_c_string(chpl_string *ret, c_string str, int haslen, int64_t len, int32_t lineno, chpl_string filename)
{
  char* s;

  if( str == NULL ) {
    *ret = NULL;
    return;
  }
  if( ! haslen ) len = strlen(str);

  s = (char*)chpl_mem_alloc(len+1, CHPL_RT_MD_STRING_COPY_DATA,
                              lineno, filename);
  chpl_memcpy(s, str, len);
  s[len] = '\0';
  *ret = s;
}
Exemple #12
0
static void* pthread_func(void* arg) {
  chpl_thread_id_t my_thread_id;
  thread_list_p          tlp;

  // disable cancellation immediately
  // enable only while waiting for new work
  pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL); 

  // add us to the list of threads
  tlp = (thread_list_p) chpl_mem_alloc(sizeof(struct thread_list),
                                       CHPL_RT_MD_THREAD_LIST_DESC, 0, 0);

  tlp->thread = pthread_self();
  tlp->next   = NULL;

  pthread_mutex_lock(&thread_info_lock);

  if (exiting) {
    pthread_mutex_unlock(&thread_info_lock);
    chpl_mem_free(tlp, 0, 0);
    return NULL;
  }

  my_thread_id = --curr_thread_id;

  if (thread_list_head == NULL)
    thread_list_head = tlp;
  else
    thread_list_tail->next = tlp;
  thread_list_tail = tlp;

  pthread_mutex_unlock(&thread_info_lock);

  CHPL_TLS_SET(chpl_thread_id, (intptr_t) my_thread_id);

  if (saved_threadEndFn == NULL)
    (*saved_threadBeginFn)(arg);
  else {
    pthread_cleanup_push((void (*)(void*)) saved_threadEndFn, NULL);
    (*saved_threadBeginFn)(arg);
    pthread_cleanup_pop(1); // Shouldn't we run the thread
                            // end function even if not cancelled?
  }

  return NULL;
}
Exemple #13
0
void wide_string_from_c_string(chpl____wide_chpl_string *ret, c_string str, int haslen, int64_t len, int32_t lineno, chpl_string filename)
{
  char* s;

  ret->locale = chpl_gen_getLocaleID();
  if( str == NULL ) {
    ret->addr = NULL;
    ret->size = 0;
    return;
  }
  if( ! haslen ) len = strlen(str);

  s = chpl_mem_alloc(len+1, CHPL_RT_MD_STRING_COPY_DATA, lineno, filename);
  chpl_memcpy(s, str, len);
  s[len] = '\0';

  ret->addr = s;
  ret->size = len + 1; // this size includes the terminating NUL
}
Exemple #14
0
//
// This function should be called exactly once per thread (not task!),
// including the main thread. It should be called before the first task
// this thread was created to do is started.
//
// Our handling of lock report list entries could be improved.  We
// allocate one each time this function is called, and this is called
// just before each task wrapper is called.  We never remove these
// from the list or deallocate them.  If we do traverse the list while
// reporting a deadlock, we just skip the leaked ones, because they
// don't say "blocked".
//
static void initializeLockReportForThread(void) {
  lockReport_t* newLockReport;

  newLockReport = (lockReport_t*) chpl_mem_alloc(sizeof(lockReport_t),
                                                 CHPL_RT_MD_LOCK_REPORT_DATA,
                                                 0, 0);
  newLockReport->maybeLocked = false;
  newLockReport->next = NULL;

  get_thread_private_data()->lockRprt = newLockReport;

  // Begin critical section
  chpl_thread_mutexLock(&block_report_lock);
  if (lockReportHead) {
    lockReportTail->next = newLockReport;
    lockReportTail = newLockReport;
  } else {
    lockReportHead = newLockReport;
    lockReportTail = newLockReport;
  }
  // End critical section
  chpl_thread_mutexUnlock(&block_report_lock);
}
Exemple #15
0
// Even if allocation is done here, the returned string is already owned
// elsewhere.  So we return a c_string, not a c_string_copy.
c_string_copy stringMove(c_string_copy dest, c_string src, int64_t len,
                         int32_t lineno, c_string filename) {
  char *ret;
  if (src == NULL)
    return NULL;

  if (dest == NULL ||
      // TODO: Want to deprecate indicating an empty string by a string of zero
      // length.  This works OK if the string is unallocated (such as a string
      // literal), but does not work well with an allocated string.  An
      // allocated string of zero length still occupies memory (one byte for
      // the NUL, at least), so that leaves us with a dilemma.  Which is it?
      strlen(dest) == 0)
    ret = chpl_mem_alloc(len+1, CHPL_RT_MD_STRING_MOVE_DATA, lineno, filename);
  else
    // reuse the buffer
    // The cast is necessary so we can write into the buffer (which is declared
    // to be const).
    ret = (char *) dest;

  snprintf(ret, len+1, "%s", src);
  return (c_string) ret;
}
Exemple #16
0
static inline
void taskCallBody(chpl_fn_p fp, void* arg, void* arg_copy,
                  c_sublocid_t subloc, chpl_bool serial_state,
                  int lineno, int32_t filename) {
  taskCallWrapperDesc_t* ptcwd;
  chpl_task_prvDataImpl_t private = {
    .prvdata = { .serial_state = serial_state } };

  ptcwd = (taskCallWrapperDesc_t*)
          chpl_mem_alloc(sizeof(*ptcwd),
                         CHPL_RT_MD_THREAD_PRV_DATA,
                         0, 0);
  *ptcwd = (taskCallWrapperDesc_t)
    { fp, arg, arg_copy, canCountRunningTasks, private };

  // begin critical section
  chpl_thread_mutexLock(&threading_lock);

  (void) add_to_task_pool(taskCallWrapper, ptcwd, true, ptcwd->chpl_data,
                          NULL, false, lineno, filename);

  // end critical section
  chpl_thread_mutexUnlock(&threading_lock);
}
Exemple #17
0
static void* chpl_gmp_alloc(size_t sz) {
  return chpl_mem_alloc(sz, CHPL_RT_MD_GMP, 0, 0);
}
Exemple #18
0
//
// When we create a thread it runs this wrapper function, which just
// executes tasks out of the pool as they become available.
//
static void
thread_begin(void* ptask_void) {
  task_pool_p ptask;
  thread_private_data_t *tp;

  tp = (thread_private_data_t*) chpl_mem_alloc(sizeof(thread_private_data_t),
                                               CHPL_RT_MD_THREAD_PRV_DATA,
                                               0, 0);
  chpl_thread_setPrivateData(tp);

  tp->lockRprt = NULL;
  if (blockreport)
    initializeLockReportForThread();

  while (true) {
    //
    // wait for a task to be present in the task pool
    //

    // In revision 22137, we investigated whether it was beneficial to
    // implement this while loop in a hybrid style, where depending on
    // the number of tasks available, idle threads would either yield or
    // wait on a condition variable to waken them.  Through analysis, we
    // realized this could potential create a case where a thread would
    // become stranded, waiting for a condition signal that would never
    // come.  A potential solution to this was to keep a count of threads
    // that were waiting on the signal, but since there was a performance
    // impact from keeping it as a hybrid as opposed to merely yielding,
    // it was decided that we would return to the simple yield case.
    while (!task_pool_head) {
      if (set_block_loc(0, CHPL_FILE_IDX_IDLE_TASK)) {
        // all other tasks appear to be blocked
        struct timeval deadline, now;
        gettimeofday(&deadline, NULL);
        deadline.tv_sec += 1;
        do {
          chpl_thread_yield();
          if (!task_pool_head)
            gettimeofday(&now, NULL);
        } while (!task_pool_head
                 && (now.tv_sec < deadline.tv_sec
                     || (now.tv_sec == deadline.tv_sec
                         && now.tv_usec < deadline.tv_usec)));
        if (!task_pool_head) {
          check_for_deadlock();
        }
      }
      else {
        do {
          chpl_thread_yield();
        } while (!task_pool_head);
      }

      unset_block_loc();
    }
 
    //
    // Just now the pool had at least one task in it.  Lock and see if
    // there's something still there.
    //
    chpl_thread_mutexLock(&threading_lock);
    if (!task_pool_head) {
      chpl_thread_mutexUnlock(&threading_lock);
      continue;
    }

    //
    // We've found a task to run.
    //

    if (blockreport)
      progress_cnt++;

    //
    // start new task; increment running count and remove task from pool
    // also add to task to task-table (structure in ChapelRuntime that keeps
    // track of currently running tasks for task-reports on deadlock or
    // Ctrl+C).
    //
    ptask = task_pool_head;
    idle_thread_cnt--;
    running_task_cnt++;

    dequeue_task(ptask);

    // end critical section
    chpl_thread_mutexUnlock(&threading_lock);

    tp->ptask = ptask;

    if (do_taskReport) {
      chpl_thread_mutexLock(&taskTable_lock);
      chpldev_taskTable_set_active(ptask->id);
      chpl_thread_mutexUnlock(&taskTable_lock);
    }

    chpl_task_do_callbacks(chpl_task_cb_event_kind_begin,
                           ptask->filename,
                           ptask->lineno,
                           ptask->id,
                           ptask->is_executeOn);

    (*ptask->fun)(ptask->arg);

    chpl_task_do_callbacks(chpl_task_cb_event_kind_end,
                           ptask->filename,
                           ptask->lineno,
                           ptask->id,
                           ptask->is_executeOn);

    if (do_taskReport) {
      chpl_thread_mutexLock(&taskTable_lock);
      chpldev_taskTable_remove(ptask->id);
      chpl_thread_mutexUnlock(&taskTable_lock);
    }

    tp->ptask = NULL;
    chpl_mem_free(ptask, 0, 0);

    // begin critical section
    chpl_thread_mutexLock(&threading_lock);

    //
    // finished task; decrement running count and increment idle count
    //
    assert(running_task_cnt > 0);
    running_task_cnt--;
    idle_thread_cnt++;

    // end critical section
    chpl_thread_mutexUnlock(&threading_lock);
  }
}
Exemple #19
0
void chpl_task_init(void) {
  chpl_thread_mutexInit(&threading_lock);
  chpl_thread_mutexInit(&extra_task_lock);
  chpl_thread_mutexInit(&task_id_lock);
  chpl_thread_mutexInit(&task_list_lock);
  queued_task_cnt = 0;
  running_task_cnt = 1;                     // only main task running
  blocked_thread_cnt = 0;
  idle_thread_cnt = 0;
  extra_task_cnt = 0;
  task_pool_head = task_pool_tail = NULL;

  chpl_thread_init(thread_begin, thread_end);

  //
  // Set main thread private data, so that things that require access
  // to it, like chpl_task_getID() and chpl_task_setSerial(), can be
  // called early (notably during standard module initialization).
  //
  // This needs to be done after the threading layer initialization,
  // because it's based on thread layer capabilities, but before we
  // install the signal handlers, because when those are invoked they
  // may use the thread private data.
  //
  {
    thread_private_data_t* tp;

    tp = (thread_private_data_t*) chpl_mem_alloc(sizeof(thread_private_data_t),
                                                 CHPL_RT_MD_THREAD_PRV_DATA,
                                                 0, 0);

    tp->ptask = (task_pool_p) chpl_mem_alloc(sizeof(task_pool_t),
                                             CHPL_RT_MD_TASK_POOL_DESC,
                                             0, 0);
    tp->ptask->id           = get_next_task_id();
    tp->ptask->fun          = NULL;
    tp->ptask->arg          = NULL;
    tp->ptask->is_executeOn = false;
    tp->ptask->filename     = CHPL_FILE_IDX_MAIN_PROGRAM;
    tp->ptask->lineno       = 0;
    tp->ptask->p_list_head  = NULL;
    tp->ptask->next         = NULL;
    tp->lockRprt            = NULL;

    // Set up task-private data for locale (architectural) support.
    tp->ptask->chpl_data.prvdata.serial_state = true;     // Set to false in chpl_task_callMain().

    chpl_thread_setPrivateData(tp);
  }

  if (blockreport) {
    progress_cnt = 0;
    chpl_thread_mutexInit(&block_report_lock);
    initializeLockReportForThread();
  }

  if (blockreport || taskreport) {
    signal(SIGINT, SIGINT_handler);
  }

  initialized = true;
}
Exemple #20
0
// create a task from the given function pointer and arguments
// and append it to the end of the task pool
// assumes threading_lock has already been acquired!
static inline
task_pool_p add_to_task_pool(chpl_fn_int_t fid, chpl_fn_p fp,
                             chpl_task_bundle_t* a, size_t a_size,
                             chpl_bool serial_state,
                             chpl_bool countRunningTasks,
                             chpl_bool is_executeOn,
                             task_pool_p* p_task_list_head,
                             chpl_bool is_begin_stmt,
                             int lineno, int32_t filename) {


  size_t payload_size;
  task_pool_p ptask;
  chpl_task_prvDataImpl_t pv;

  memset(&pv, 0, sizeof(pv));

  assert(a_size >= sizeof(chpl_task_bundle_t));

  payload_size = a_size - sizeof(chpl_task_bundle_t);
  ptask = (task_pool_p) chpl_mem_alloc(sizeof(task_pool_t) + payload_size,
                                       CHPL_RT_MD_TASK_ARG_AND_POOL_DESC,
                                       lineno, filename);

  memcpy(&ptask->bundle, a, a_size);

  ptask->p_list_head            = NULL;
  ptask->list_next              = NULL;
  ptask->list_prev              = NULL;
  ptask->next                   = NULL;
  ptask->prev                   = NULL;
  ptask->chpl_data              = pv;
  ptask->bundle.serial_state    = serial_state;
  ptask->bundle.countRunning    = countRunningTasks;
  ptask->bundle.is_executeOn    = is_executeOn;
  ptask->bundle.lineno          = lineno;
  ptask->bundle.filename        = filename;
  ptask->bundle.requestedSubloc = c_sublocid_any_val;
  ptask->bundle.requested_fid   = fid;
  ptask->bundle.requested_fn    = fp;
  ptask->bundle.id              = get_next_task_id();

  enqueue_task(ptask, p_task_list_head);

  chpl_task_do_callbacks(chpl_task_cb_event_kind_create,
                         ptask->bundle.requested_fid,
                         ptask->bundle.filename,
                         ptask->bundle.lineno,
                         ptask->bundle.id,
                         ptask->bundle.is_executeOn);

  if (do_taskReport) {
    chpl_thread_mutexLock(&taskTable_lock);
    chpldev_taskTable_add(ptask->bundle.id,
                          ptask->bundle.lineno, ptask->bundle.filename,
                          (uint64_t) (intptr_t) ptask);
    chpl_thread_mutexUnlock(&taskTable_lock);
  }

  //
  // If we now have more tasks than threads to run them on (taking
  // into account that the current parent of a structured parallel
  // construct can run at least one of that construct's children),
  // try to start another thread.
  //
  if (queued_task_cnt > idle_thread_cnt &&
      (p_task_list_head == NULL || ptask->list_next != NULL || is_begin_stmt)) {
    maybe_add_thread();
  }

  return ptask;
}