예제 #1
0
void chpl_task_init(void)
{
    int32_t   numCoresPerLocale;
    int32_t   numThreadsPerLocale;
    size_t    callStackSize;
    pthread_t initer;
    char      newenv_sheps[100] = { 0 };
    char      newenv_stack[100] = { 0 };

    // Set up available hardware parallelism
    numThreadsPerLocale = chpl_task_getenvNumThreadsPerLocale();
    numCoresPerLocale = chpl_numCoresOnThisLocale();
    if (numThreadsPerLocale == 0)
        numThreadsPerLocale = chpl_comm_getMaxThreads();
    if (0 < numThreadsPerLocale) {
        // We are assuming the user wants to constrain the hardware
        // resources used during this run of the application.

        // Unset relevant Qthreads environment variables
        qt_internal_unset_envstr("HWPAR");
        qt_internal_unset_envstr("NUM_SHEPHERDS");
        qt_internal_unset_envstr("NUM_WORKERS_PER_SHEPHERD");

        if (numCoresPerLocale < numThreadsPerLocale) {
            if (2 == verbosity) {
                printf("QTHREADS: Ignored --numThreadsPerLocale=%d to prevent oversubsription of the system.\n", numThreadsPerLocale);
            }

            // Do not oversubscribe the system, use all available resources.
            numThreadsPerLocale = numCoresPerLocale;
        }

        // Set environment variable for Qthreads
        snprintf(newenv_sheps, 99, "%i", (int)numThreadsPerLocale);
        setenv("QT_HWPAR", newenv_sheps, 1);
    } else if (qt_internal_get_env_str("HWPAR", NULL) ||
               qt_internal_get_env_str("NUM_SHEPHERDS", NULL) ||
               qt_internal_get_env_str("NUM_WORKERS_PER_SHEPHERD", NULL)) {
        // Assume the user wants has manually set the desired Qthreads
        // environment variables.
    } else {
        // Default to using all hardware resources.
        numThreadsPerLocale = numCoresPerLocale;
        snprintf(newenv_sheps, 99, "%i", (int)numThreadsPerLocale);
        setenv("QT_HWPAR", newenv_sheps, 1);
    }

    // Precendence (high-to-low):
    // 1) Chapel minimum
    // 2) QTHREAD_STACK_SIZE
    // In practice we never get to #2, because the Chapel minimum is
    // always > 0, but we cover that case as a backstop.
    callStackSize = chpl_task_getMinCallStackSize();
    if (callStackSize <= 0)
        callStackSize = 1024 * 1024 * sizeof(size_t);
    snprintf(newenv_stack, 99, "%zu", callStackSize);
    setenv("QT_STACK_SIZE", newenv_stack, 1);

    // Turn on informative Qthreads setting messages with Chapel's verbose flag
    if (verbosity == 2) {
        setenv("QT_INFO", "1", 1);
    }

    pthread_create(&initer, NULL, initializer, NULL);
    while (chpl_qthread_done_initializing == 0) SPINLOCK_BODY();

    if (blockreport || taskreport) {
        if (signal(SIGINT, SIGINT_handler) == SIG_ERR) {
            perror("Could not register SIGINT handler");
        }
    }
}
예제 #2
0
void chpl_thread_init(void(*threadBeginFn)(void*),
                      void(*threadEndFn)(void)) {
  //
  // This threading layer does not have any inherent limit on the number
  // of threads.  Its limit is the lesser of any limits imposed by the
  // comm layer and the user.
  //
  {
    uint32_t lim;

    if ((lim = chpl_task_getenvNumThreadsPerLocale()) > 0)
      maxThreads = lim;
    else if ((lim = chpl_comm_getMaxThreads()) > 0)
      maxThreads = lim;
  }

  //
  // Count the main thread on locale 0 as already existing, since it
  // is (or soon will be) running the main program.
  //
  if (chpl_nodeID == 0)
    numThreads = 1;

  //
  // If a value was specified for the call stack size config const, use
  // that (rounded up to a whole number of pages) to set the system and
  // pthread stack limits.
  //
  if (pthread_attr_init(&thread_attributes) != 0)
    chpl_internal_error("pthread_attr_init() failed");

  //
  // If a value was specified for the call stack size, use that (rounded
  // up to a whole number of pages) to set the system and pthread stack
  // limits.  This will in turn limit the stack for any task hosted by
  // either the main process or a pthread.
  //
  {
    size_t        css;
    size_t        pagesize = (size_t) sysconf(_SC_PAGESIZE);
    struct rlimit rlim;

    if ((css = chpl_task_getEnvCallStackSize()) == 0)
      css = chpl_task_getDefaultCallStackSize();
    assert(css > 0);

    css = (css + pagesize - 1) & ~(pagesize - 1);

    if (getrlimit(RLIMIT_STACK, &rlim) != 0)
      chpl_internal_error("getrlimit() failed");

    if (rlim.rlim_max != RLIM_INFINITY && css > rlim.rlim_max) {
      char warning[128];
      sprintf(warning, "call stack size capped at %lu\n", 
              (unsigned long)rlim.rlim_max);
      chpl_warning(warning, 0, 0);

      css = rlim.rlim_max;
    }

    rlim.rlim_cur = css;

#ifndef __CYGWIN__
    //
    // Cygwin can't do setrlimit(RLIMIT_STACK).
    //
    if (setrlimit(RLIMIT_STACK, &rlim) != 0)
      chpl_internal_error("setrlimit() failed");
#endif

    if (pthread_attr_setstacksize(&thread_attributes, css) != 0)
      chpl_internal_error("pthread_attr_setstacksize() failed");
  }

  if (pthread_attr_getstacksize(&thread_attributes, &threadCallStackSize) != 0)
      chpl_internal_error("pthread_attr_getstacksize() failed");

  saved_threadBeginFn = threadBeginFn;
  saved_threadEndFn   = threadEndFn;

  CHPL_TLS_INIT(chpl_thread_id);
  CHPL_TLS_SET(chpl_thread_id, (intptr_t) --curr_thread_id);
  CHPL_TLS_INIT(chpl_thread_data);

  pthread_mutex_init(&thread_info_lock, NULL);
  pthread_mutex_init(&numThreadsLock, NULL);

  //
  // This is something of a hack, but it makes us a bit more resilient
  // if we're out of memory or near to it at shutdown time.  Launch,
  // cancel, and join with an initial pthread, forcing initialization
  // needed by any of those activities.  (In particular we have found
  // that cancellation needs to dlopen(3) a shared object, which fails
  // if we are out of memory.  Doing it now means that shared object is
  // already available when we need it later.)
  //
  {
    pthread_t initial_pthread;

    if (!pthread_create(&initial_pthread, NULL, initial_pthread_func, NULL)) {
      (void) pthread_cancel(initial_pthread);
      (void) pthread_join(initial_pthread, NULL);
    }
  }
}
예제 #3
0
void chpl_task_init(void)
{
    chpl_bool we_set_worker_unit = false;
    int32_t   numThreadsPerLocale;
    int32_t   commMaxThreads;
    int32_t   hwpar;
    size_t    callStackSize;
    pthread_t initer;
    char      newenv_stack[100] = { 0 };
    char *noWorkSteal;


    // Set up available hardware parallelism.

    // Experience has shown that we hardly ever win by using more than
    // one PU per core, so default to that.  If this was explicitly
    // set by the user we won't override it, however.
    if (getenv("QTHREAD_WORKER_UNIT") == NULL) {
        we_set_worker_unit = (getenv("QT_WORKER_UNIT") == NULL);
        (void) setenv("QT_WORKER_UNIT", "core", 0);
    }

    // Determine the thread count.  CHPL_RT_NUM_THREADS_PER_LOCALE has
    // the highest precedence but we limit it to the number of PUs.
    // QTHREAD_HWPAR has the next precedence.  We don't impose the
    // same limit on it, so it can be used to overload the hardware.
    // In either case the number of threads can be no greater than any
    // maximum imposed by the comm layer.  This limit is imposed
    // silently.
    numThreadsPerLocale = chpl_task_getenvNumThreadsPerLocale();
    commMaxThreads = chpl_comm_getMaxThreads();
    hwpar = 0;
    if (numThreadsPerLocale != 0) {
        int32_t numPUsPerLocale;

        hwpar = numThreadsPerLocale;

        numPUsPerLocale = chpl_numCoresOnThisLocale();
        if (0 < numPUsPerLocale && numPUsPerLocale < hwpar) {
            if (2 == verbosity) {
                printf("QTHREADS: Reduced numThreadsPerLocale=%d to %d "
                       "to prevent oversubscription of the system.\n",
                       hwpar, numPUsPerLocale);
            }

            // Do not oversubscribe the system, use all available resources.
            hwpar = numPUsPerLocale;
        }

        if (0 < commMaxThreads && commMaxThreads < hwpar) {
            hwpar = commMaxThreads;
        }
    } else {
        if (0 < commMaxThreads) {
            hwpar = qt_internal_get_env_num("HWPAR", 0, 0);
            if (commMaxThreads < hwpar) {
                hwpar = commMaxThreads;
            }
        }
    }

    if (hwpar > 0) {
        char newenv[100];
        char *sched;

        // Unset relevant Qthreads environment variables.  Currently
        // QTHREAD_HWPAR has precedence over the QTHREAD_NUM_* ones,
        // but that isn't documented and may not be true forever, so
        // we unset them all.
        qt_internal_unset_envstr("HWPAR");
        qt_internal_unset_envstr("NUM_SHEPHERDS");
        qt_internal_unset_envstr("NUM_WORKERS_PER_SHEPHERD");
        
        // The current check for scheduler and setting HWPAR or
        // NUM_SHEPHERDS/WORKERS_PER_SHEPHERD is just to experiment with
        // the performance of different schedulers. This is not production code
        // and if it's around after July 2014, yell at Elliot.  
        sched = getenv("CHPL_QTHREAD_SCHEDULER");
        if (sched != NULL && strncmp(sched, "nemesis", 7) == 0) {
            // Set environment variable for Qthreads
            snprintf(newenv, sizeof(newenv), "%i", (int)hwpar);
            setenv("QT_NUM_SHEPHERDS", newenv, 1);
            setenv("QT_NUM_WORKERS_PER_SHEPHERD", "1", 1);
            // Unset QT_WORKER_UNIT iff we set it.
            if (we_set_worker_unit) {
              (void) unsetenv("QT_WORKER_UNIT");
            }
        } else {
            // Set environment variable for Qthreads
            snprintf(newenv, sizeof(newenv), "%i", (int)hwpar);
            setenv("QT_HWPAR", newenv, 1);
        }
    }

    // Precedence (high-to-low):
    // 1) Chapel minimum
    // 2) QTHREAD_STACK_SIZE
    // In practice we never get to #2, because the Chapel minimum is
    // always > 0, but we cover that case as a backstop.
    callStackSize = chpl_task_getMinCallStackSize();
    if (callStackSize <= 0)
        callStackSize = 1024 * 1024 * sizeof(size_t);
    snprintf(newenv_stack, 99, "%zu", callStackSize);
    setenv("QT_STACK_SIZE", newenv_stack, 1);

    // Turn on informative Qthreads setting messages with Chapel's verbose flag
    if (verbosity == 2) {
        setenv("QT_INFO", "1", 1);
    }

    pthread_create(&initer, NULL, initializer, NULL);
    while (chpl_qthread_done_initializing == 0) SPINLOCK_BODY();

    // Now that Qthreads is up and running, make sure that the number
    // of workers is less than any comm layer limit.  This is mainly
    // checking that the default thread count without QTHREAD_HWPAR
    // being set is within any comm layer limit, because we can't
    // determine that default ahead of time.  Secondarily, it's a
    // sanity check on the thread count versus comm limit logic
    // above.
    assert(0 == commMaxThreads || qthread_num_workers() < commMaxThreads);

    if (blockreport || taskreport) {
        if (signal(SIGINT, SIGINT_handler) == SIG_ERR) {
            perror("Could not register SIGINT handler");
        }
    }

    // Turn off work stealing if it was configured to be off
    noWorkSteal = getenv("CHPL_QTHREAD_NO_WORK_STEALING");
    if (noWorkSteal != NULL && strncmp(noWorkSteal, "yes", 3) == 0) {
      qthread_steal_disable();
    }
}