Ejemplo n.º 1
0
int __ompc_get_thread_num (void)
{
  int global_id = __pmp_get_current_global_id();
  __pmp_debug(PMP_DEBUG_CALLS, "__ompc_get_thread_num returns %d\n",global_id);
  __pmp_sample(PMP_PROFILE_OMPC_GET_THREAD_NUM);
  return global_id;
}
Ejemplo n.º 2
0
void __ompc_serialized_parallel (void)
{
  pmp_thread_t *thread = __pmp_get_current_thread();
  __pmp_debug(PMP_DEBUG_CALLS, "__ompc_serialized_parallel\n");
  __pmp_sample(PMP_PROFILE_OMPC_SERIALIZED_PARALLEL);
  thread->serialized_parallel++;
}
Ejemplo n.º 3
0
static inline void __pmp_thread_wake (pmp_thread_t *thread)
{
  int32_t sync = __pmp_atomic_cmpxchg32(&thread->sync, PMP_SYNC_IDLE,
                                        PMP_SYNC_UNBLOCKED);
  assert(sync != PMP_SYNC_UNBLOCKED);

  if (sync == PMP_SYNC_BLOCKED) {
    __pmp_debug(PMP_DEBUG_THREAD, "thread global_id %d is being signaled\n",
                thread->global_id);

    thread->sync = PMP_SYNC_IDLE;

    assert(thread->tid != -1);

    __pmp_sample(PMP_PROFILE_THREAD_RESCHEDULE);
#if (defined PMP_USE_PTHREAD_SIGNALS)
    if (pthread_kill(thread->pthread_id, SIGPMP) != 0) {
      __pmp_fatal("unable to wake thread using pthread_kill\n");
    }
#elif (defined PMP_NO_NPTL)
    if (kill(thread->tid, SIGPMP) != 0) {
      __pmp_fatal("unable to wake thread using kill\n");
    }
#else
    if (tkill(thread->tid, SIGPMP) != 0) {
      __pmp_fatal("unable to wake thread using tkill\n");
    }
#endif
  }
  else {
    __pmp_debug(PMP_DEBUG_THREAD, "thread global_id %d is woken\n",
                thread->global_id);
  }
}
Ejemplo n.º 4
0
void __ompc_copyin_thdprv (int n, ...)
{
  pmp_global_id_t global_id;

  __pmp_debug(PMP_DEBUG_CALLS, "__ompc_copyin_thdprv: n=%d\n", n);
  __pmp_sample(PMP_PROFILE_OMPC_COPYIN_THDPRV);

  if (__pmp_get_param()->disabled) {
    return;
  }

  global_id = __pmp_get_current_global_id();

  va_list ap;
  va_start(ap, n);
  while (n > 0) {
    void *dst = va_arg(ap, void*);
    void *src = va_arg(ap, void*);
    int size = va_arg(ap, int);
    if (dst != src) {
      __pmp_debug(PMP_DEBUG_THREAD, "__ompc_copyin_thdprv: global_id=%d "
                  "dst: %p, src: %p, size: %d\n", global_id, dst, src, size);
      memcpy(dst, src, size);
    }
    n -= 3;
  }
  va_end(ap);
}
Ejemplo n.º 5
0
void __ompc_static_init_4 (int global_id, int sched, int *lower,
                           int *upper, int *stride, int inc, int chunk)
{
  int64_t l = (int64_t) *lower;
  int64_t u = (int64_t) *upper;
  int64_t s;

#ifdef SUPER_DEBUG
  if (Enabled_Libomp_Call_Debug)
    __pmp_debug("CALLS_DEBUG", "__ompc_static_init_4 global_id=%d with "
                "sched=%d, lower=%d, upper=%d, inc=%d, chunk=%d\n",
                global_id, sched, *lower, *upper, inc, chunk);
#endif

  __pmp_sample(PMP_PROFILE_OMPC_STATIC_INIT_4);

  __pmp_static_init(global_id, sched, &l, &u, &s, inc, chunk);
  *lower = (int) l;
  *upper = (int) u;
  *stride = (int) s;

#ifdef SUPER_DEBUG
  if (Enabled_Libomp_Call_Debug)
    __pmp_debug("CALLS_DEBUG", "__ompc_static_init_4 global_id=%d returns "
                "lower=%d, upper=%d, stride=%d\n",
                global_id, *lower, *upper, *stride);
#endif
}
Ejemplo n.º 6
0
int __ompc_schedule_next_4 (int global_id, int *lower, int *upper,
                            int *inc)
{
  int result;
  int64_t l;
  int64_t u;
  int64_t i;

  __pmp_sample(PMP_PROFILE_OMPC_SCHEDULE_NEXT_4);

  result = __pmp_schedule_next(global_id, &l, &u, &i);

  *lower = (int) l;
  *upper = (int) u;
  *inc = (int) i;

#ifdef SUPER_DEBUG
  if (Enabled_Libomp_Call_Debug) {
    if (result == 0)
      __pmp_debug("CALLS_DEBUG", "__ompc_schedule_next_4 global_id=%d returns "
                  "result=0\n", global_id);
    else
      __pmp_debug("CALLS_DEBUG", "__ompc_schedule_next_4 global_id=%d returns "
                  "lower=%d, upper=%d, inc=%d, result=%d\n",
                  global_id, *lower, *upper, *inc, result);
  }
#endif

  return result;
}
Ejemplo n.º 7
0
void __ompc_barrier (void)
{
  pmp_thread_t *thread = __pmp_get_current_thread();
  __pmp_debug(PMP_DEBUG_CALLS, "__ompc_barrier\n");
  __pmp_sample(PMP_PROFILE_OMPC_BARRIER);
  __pmp_memory_fence();
  __pmp_thread_barrier(thread);
}
Ejemplo n.º 8
0
int __ompc_master (int global_id)
{
  int master = (__pmp_get_thread(global_id)->local_id == 0);
  __pmp_debug(PMP_DEBUG_CALLS, "__ompc_master global_id=%d returns %d\n",
              global_id, master);
  __pmp_sample(PMP_PROFILE_OMPC_MASTER);
  return master;
}
Ejemplo n.º 9
0
int __ompc_single (int global_id)
{
  int result = __pmp_thread_single(__pmp_get_thread(global_id));
  __pmp_debug(PMP_DEBUG_CALLS, "__ompc_single global_id=%d returns %d\n", 
              global_id, result);
  __pmp_sample(PMP_PROFILE_OMPC_SINGLE);
  return result;
}
Ejemplo n.º 10
0
int __ompc_get_local_thread_num (void)
{
  int local_id = __pmp_get_current_local_id();
  assert(local_id != -1);
  __pmp_debug(PMP_DEBUG_CALLS, "__ompc_get_local_thread_num returns %d\n",
              local_id);
  __pmp_sample(PMP_PROFILE_OMPC_GET_LOCAL_THREAD_NUM);
  return local_id;
}
Ejemplo n.º 11
0
void __ompc_static_fini (void)
{
#ifdef SUPER_DEBUG
  if (Enabled_Libomp_Call_Debug)
    __pmp_debug("CALLS_DEBUG", "__ompc_static_fini\n");
#endif
  __pmp_sample(PMP_PROFILE_OMPC_STATIC_FINI);
  /* no work here, NOTE: does not appear to be called by compiler anyway */
}
Ejemplo n.º 12
0
int __ompc_in_parallel (void)
{
  pmp_thread_t *thread = __pmp_get_current_thread();
  int in_parallel = (thread->team != NULL) || 
                    (thread->serialized_parallel > 0);
  __pmp_debug(PMP_DEBUG_CALLS, "__ompc_in_parallel returns %d\n", in_parallel);
  __pmp_sample(PMP_PROFILE_OMPC_IN_PARALLEL);
  return in_parallel;
}
Ejemplo n.º 13
0
static inline void __pmp_thread_wait (pmp_thread_t *thread)
{
  int32_t sync;
  int thread_spin;
  int i;

  if (thread->sync == PMP_SYNC_UNBLOCKED) {
    __pmp_debug(PMP_DEBUG_THREAD, "thread global_id %d does not block (1)\n",
                thread->global_id);
    thread->sync = PMP_SYNC_IDLE;
    return;
  }

  thread_spin = __pmp_get_param()->thread_spin;

  for (i = 0; i < thread_spin; i++) {
    /* USER LEVEL SPIN LOOP */
    if (thread->sync == PMP_SYNC_UNBLOCKED) {
      __pmp_debug(PMP_DEBUG_THREAD, "thread global_id %d does not block (2)\n",
                  thread->global_id);
      thread->sync = PMP_SYNC_IDLE;
      return;
    }
    __pmp_yield();
  }

  sync = __pmp_atomic_cmpxchg32(&thread->sync, PMP_SYNC_IDLE,
                                PMP_SYNC_BLOCKED);

  if (sync == PMP_SYNC_IDLE) {
    __pmp_debug(PMP_DEBUG_THREAD, "thread global_id %d is waiting\n",
                thread->global_id);

    __pmp_sample(PMP_PROFILE_THREAD_DESCHEDULE);

#ifdef PMP_USE_PTHREAD_SIGNALS
    {
      int sig;
      do {
        sigwait(&__pmp_manager.mask_block_sigpmp, &sig);
      } while (sig != SIGPMP);
    }
#else
    sigsuspend(&__pmp_manager.mask_unblock_sigpmp);
    /* NOTE: it is unfortunate that sigsuspend does not tell us which
     *       signal has been raised. */
#endif
    __pmp_debug(PMP_DEBUG_THREAD, "thread global_id %d is awake\n",
                thread->global_id);
  }
  else {
    __pmp_debug(PMP_DEBUG_THREAD, "thread global_id %d does not block (3)\n",
                thread->global_id);
    thread->sync = PMP_SYNC_IDLE;
  }
}
Ejemplo n.º 14
0
int __ompc_can_fork (void)
{
  int team_size = __pmp_get_new_team_size();
  int has_forked = (__pmp_get_main_thread()->nesting_depth > 0);
  pmp_param_t *param = __pmp_get_param();
  int serial_outline = param->serial_outline;
  int disabled = param->disabled;
  int can_fork = (team_size > 1 || has_forked || serial_outline) && !disabled;
  __pmp_debug(PMP_DEBUG_CALLS, "__ompc_can_fork returns %d\n", can_fork);
  __pmp_sample(PMP_PROFILE_OMPC_CAN_FORK);
  return can_fork;
}
Ejemplo n.º 15
0
void __ompc_fork (int nthreads, workfunc_t work, void *fp)
{
  pmp_thread_t *master = __pmp_get_current_thread();
  __pmp_debug(PMP_DEBUG_CALLS, "__ompc_fork nthreads=%d, work=%p, fp=%p "
              " (nesting depth = %d)\n",
              nthreads, work, fp, master->nesting_depth);
  __pmp_sample(PMP_PROFILE_OMPC_FORK);
  __pmp_memory_fence();
  __pmp_thread_fork(master, nthreads, work, fp);
  __pmp_memory_fence();
  __pmp_debug(PMP_DEBUG_CALLS, "__ompc_fork completed"
              " (nesting depth = %d)\n",
              master->nesting_depth);
}
Ejemplo n.º 16
0
void __ompc_scheduler_init_8 (int global_id, int sched, int64_t lower,
                              int64_t upper, int64_t inc, int64_t chunk)
{
#ifdef SUPER_DEBUG
  if (Enabled_Libomp_Call_Debug)
    __pmp_debug("CALLS_DEBUG", "__ompc_scheduler_init_8 global_id=%d with "
                "sched=%d, lower=%" PRId64 ", upper=%" PRId64 ", inc=%" PRId64 ", chunk=%" PRId64 "\n",
                global_id, sched, lower, upper, inc, chunk);
#endif

  __pmp_sample(PMP_PROFILE_OMPC_SCHEDULER_INIT_8);

  __pmp_scheduler_init(global_id, sched, lower, upper, inc, chunk);
}
Ejemplo n.º 17
0
void __ompc_scheduler_init_4 (int global_id, int sched, int lower, int upper,
                              int inc, int chunk)
{
#ifdef SUPER_DEBUG
  if (Enabled_Libomp_Call_Debug)
    __pmp_debug("CALLS_DEBUG", "__ompc_scheduler_init_4 global_id=%d with "
                "sched=%d, lower=%d, upper=%d, inc=%d, chunk=%d\n",
                global_id, sched, lower, upper, inc, chunk);
#endif

  __pmp_sample(PMP_PROFILE_OMPC_SCHEDULER_INIT_4);

  __pmp_scheduler_init(global_id, sched, (int64_t) lower, (int64_t) upper,
                       (int64_t) inc, (int64_t) chunk);
}
Ejemplo n.º 18
0
void __ompc_ordered (int global_id)
{
  pmp_thread_t *thread = __pmp_get_thread(global_id);

  if (__pmp_get_team_size(thread->team) > 1) {
    pmp_loop_t *loop = thread->loop;
    int64_t ticket_number = thread->ticket_number;
    int64_t now_serving;

#ifdef SUPER_DEBUG
    if (Enabled_Libomp_Call_Debug)
      __pmp_debug("CALLS_DEBUG", "__ompc_ordered: global_id=%d\n", global_id);
#endif
    __pmp_sample(PMP_PROFILE_OMPC_ORDERED);

    if (loop == NULL || loop->sched <= PMP_SCHED_ORDERED_OFFSET) {
      __pmp_warning("ordered directives must be used inside ordered "
                    "OpenMP loops\n");
      return;
    }

    assert(loop != NULL);

    now_serving = loop->now_serving;

    if (now_serving != ticket_number) {
      if ((loop->inc >= 0) ? (now_serving > ticket_number) :
                             (now_serving < ticket_number)) {
        __pmp_warning("ordered OpenMP loop may result in program deadlock\n");
        __pmp_warning("maybe due to multiple ordered directives "
                      "in a loop iteration\n");
      }
      while (loop->now_serving != ticket_number) {
        /* USER LEVEL SPIN LOOP */
        __pmp_yield();
      }
    }

#ifdef SUPER_DEBUG
    if (Enabled_Libomp_Loop_Debug)
      __pmp_debug("LOOPS_DEBUG", "__ompc_ordered: now serving global_id=%d "
                  " ticket_number=%" PRId64 "\n", global_id, ticket_number);
#endif
  }

  __pmp_memory_fence();
}
Ejemplo n.º 19
0
static inline void __pmp_scheduler_sample (int sched)
{
#ifdef PMP_PROFILE
  switch (sched) {
    case PMP_SCHED_STATIC: {
      __pmp_sample(PMP_PROFILE_SCHED_STATIC);
      break;
    }
    case PMP_SCHED_STATIC_EVEN: {
      __pmp_sample(PMP_PROFILE_SCHED_STATIC_EVEN);
      break;
    }
    case PMP_SCHED_DYNAMIC: {
      __pmp_sample(PMP_PROFILE_SCHED_DYNAMIC);
      break;
    }
    case PMP_SCHED_GUIDED: {
      __pmp_sample(PMP_PROFILE_SCHED_GUIDED);
      break;
    }
    case PMP_SCHED_ORDERED_STATIC: {
      __pmp_sample(PMP_PROFILE_SCHED_ORDERED_STATIC);
      break;
    }
    case PMP_SCHED_ORDERED_STATIC_EVEN: {
      __pmp_sample(PMP_PROFILE_SCHED_ORDERED_STATIC_EVEN);
      break;
    }
    case PMP_SCHED_ORDERED_DYNAMIC: {
      __pmp_sample(PMP_PROFILE_SCHED_ORDERED_DYNAMIC);
      break;
    }
    case PMP_SCHED_ORDERED_GUIDED: {
      __pmp_sample(PMP_PROFILE_SCHED_ORDERED_GUIDED);
      break;
    }
    default: {
        __pmp_fatal("unknown dynamic scheduling type %d\n", sched);
      break;
    }
  }
#endif
}
Ejemplo n.º 20
0
void __ompc_static_init_8 (int global_id, int sched, int64_t *lower,
                           int64_t *upper, int64_t *stride, int64_t inc,
                           int64_t chunk)
{
#ifdef SUPER_DEBUG
  if (Enabled_Libomp_Call_Debug)
    __pmp_debug("CALLS_DEBUG", "__ompc_static_init_8 global_id=%d with "
                "sched=%d, lower=%" PRId64 ", upper=%" PRId64 ", inc=%" PRId64 ", chunk=%" PRId64 "\n",
                global_id, sched, *lower, *upper, inc, chunk);
#endif
  __pmp_sample(PMP_PROFILE_OMPC_STATIC_INIT_8);
  __pmp_static_init(global_id, sched, lower, upper, stride, inc, chunk);
#ifdef SUPER_DEBUG
  if (Enabled_Libomp_Call_Debug)
    __pmp_debug("CALLS_DEBUG", "__ompc_static_init_8 global_id=%d returns "
                "lower=%" PRId64 ", upper=%" PRId64 ", stride=%" PRId64 "\n",
                global_id, *lower, *upper, *stride);
#endif
}
Ejemplo n.º 21
0
void __ompc_copyprivate (int lock, void *src, void (*copy)(void*, void*))
{
  pmp_thread_t *thread = __pmp_get_current_thread();
  pmp_team_t *team = thread->team;

  __pmp_debug(PMP_DEBUG_CALLS,"__ompc_copyprivate: lock=%d, src=%p, copy=%p\n",
              lock, src, copy);
  __pmp_sample(PMP_PROFILE_OMPC_COPYPRIVATE);

  if (team != NULL && team->team_size > 1) {
    /* TODO: consider a different implementation without double barriers? */
    if (lock) {
      team->copyprivate_src = src;
    }
    __pmp_thread_barrier(thread);
    if (!lock) {
      copy(team->copyprivate_src, src);
    }
    __pmp_thread_barrier(thread);
  }
}
Ejemplo n.º 22
0
void __ompc_end_ordered (int global_id)
{
  pmp_thread_t *thread = __pmp_get_thread(global_id);

  __pmp_memory_fence();

  if (__pmp_get_team_size(thread->team) > 1) {
    pmp_loop_t *loop = thread->loop;
    int64_t ticket_number = thread->ticket_number;

#ifdef SUPER_DEBUG
    if (Enabled_Libomp_Call_Debug)
      __pmp_debug("CALLS_DEBUG", "__ompc_end_ordered: global_id=%d\n",
                  global_id);
#endif
    __pmp_sample(PMP_PROFILE_OMPC_END_ORDERED);

    if (loop == NULL || loop->sched <= PMP_SCHED_ORDERED_OFFSET) {
      if (thread->global_id == 0)
        __pmp_warning("ordered directives must be used inside ordered "
                      "OpenMP loops\n");
      return;
    }

    assert(loop != NULL);
    assert(loop->now_serving == ticket_number);

#ifdef SUPER_DEBUG
    if (Enabled_Libomp_Loop_Debug)
      __pmp_debug("LOOPS_DEBUG", "__ompc_ordered: stop serving global_id=%d "
                  " ticket_number=%" PRId64 "\n", global_id, ticket_number);
#endif

    loop->now_serving += loop->inc;
    thread->ticket_number = ticket_number + loop->inc;
  }
}
Ejemplo n.º 23
0
int __ompc_schedule_next_8 (int global_id, int64_t *lower,
                            int64_t *upper, int64_t *inc)
{
  int result;

  __pmp_sample(PMP_PROFILE_OMPC_SCHEDULE_NEXT_8);

  result = __pmp_schedule_next(global_id, lower, upper, inc);

#ifdef SUPER_DEBUG
  if (Enabled_Libomp_Call_Debug) {
    if (result == 0) {
      __pmp_debug("CALLS_DEBUG", "__ompc_schedule_next_8 global_id=%d returns "
                  "result=0\n", global_id);
    }
    else {
      __pmp_debug("CALLS_DEBUG", "__ompc_schedule_next_8 global_id=%d returns "
                  "lower=%" PRId64 ", upper=%" PRId64 ", inc=%" PRId64 ", result=%d\n",
                  global_id, *lower, *upper, *inc, result);
    }
  }
#endif
  return result;
}
Ejemplo n.º 24
0
void __ompc_end_single (int global_id)
{
  __pmp_debug(PMP_DEBUG_CALLS, "__ompc_end_single global_id=%d\n", global_id);
  __pmp_sample(PMP_PROFILE_OMPC_END_SINGLE);
  /* no work here, note that the compiler does call this function */
}
Ejemplo n.º 25
0
void __ompc_get_thdprv (void ***thdprv, int64_t size,
                        void *data, int global_id)
{
  __pmp_debug(PMP_DEBUG_CALLS, "__ompc_get_thdprv: thdprv=%p, size=%ld, "
              "data=%p, global_id=%d\n", thdprv, (long) size, data, global_id);
  __pmp_sample(PMP_PROFILE_OMPC_GET_THDPRV);

  if (__pmp_get_param()->disabled) {
    void **t = (void **) calloc (1, sizeof(void *));
    if (t == NULL) {
      __pmp_fatal("failed to allocate thread private data\n");
    }
    t[0] = data;
    *thdprv = t;
  }
  else {
    void **t = *thdprv;
    if (t == NULL) {
      /* TODO: can I reduce the size of this array? Note that it is indexed
       * by global_id and global_id's can be arbitrarily assigned to threads
       * in general, so this may be difficult. */
      void *t_new;
      void *t_cur;
      t = (void **) calloc(PMP_MAX_THREADS, sizeof(void *));
      if (t == NULL) {
        __pmp_fatal("failed to allocate thread private data\n");
      }
      t_new = (void *) t;
      t_cur = __pmp_atomic_cmpxchgptr((volatile voidptr_t *) thdprv, 
                                      NULL, t_new);
      if (t_cur != NULL) {
        /* This thread lost the race and another thread has already
         * installed a thdprv array. Simply back out this allocation
         * and use *thdprv. */
        free(t);
        t = (void **) t_cur;
      }
    }
    if (t[global_id] == NULL) {
      /* The OpenMP 2.5 standard says:
       *
       * "Each copy of a threadprivate object is initialized once, in the manner
       * specified by the program, but at an unspecified point in the program
       * prior to the first reference to that copy."
       *
       * Since the initial values live in the statically allocated block of
       * memory passed to our "data" argument, the master thread needs to use
       * a dynamically allocated block, just as the additional threads do, so
       * that it if it changes its copies of the variables before the program
       * enters the first parallel region, those changes have no effect on the
       * copies in the additional threads. Observation shows that the code
       * generator calls __ompc_get_thdprv from the serial portion of the
       * program, for the master thread, before it changes any values.
       *
       * Note the copying is done without synchronization, which is safe only
       * because we're copying statically initialized and subsequently
       * unchanged values: copying from the main thread would require a
       * barrier.
       */
      t[global_id] = (void *) malloc(size);
      if (t[global_id] == NULL) {
        __pmp_fatal("failed to allocate thread private data");
      }
      memcpy(t[global_id], data, size);
    }
  }
}
Ejemplo n.º 26
0
void __ompc_end_master (int global_id)
{
  __pmp_debug(PMP_DEBUG_CALLS, "__ompc_end_master global_id=%d\n", global_id);
  __pmp_sample(PMP_PROFILE_OMPC_END_MASTER);
  /* no work here, NOTE: does not appear to be called by compiler anyway */
}
Ejemplo n.º 27
0
void __ompc_flush (void)
{
  __pmp_debug(PMP_DEBUG_CALLS, "__ompc_flush\n");
  __pmp_sample(PMP_PROFILE_OMPC_FLUSH);
  /* No work to do here as the architecture provides h/w SMP cache coherency */
}