static void __pmp_shared_catch_segv (pmp_thread_t *thread) { static int32_t installing_segv = 0; static int32_t installed_segv = 0; /* For Linuxthreads this only needs to be done once, since sigaction's are * shared across all of the pthreads. I arrange for it to be set up by the * first worker thread that is woken up. This tranfers SEGV catching * responsibility from the serial code in libfoobar to libopenmp as * soon as parallelism is employed. */ if (__pmp_atomic_cmpxchg32(&installing_segv, 0, 1) == 0) { __pmp_debug(PMP_DEBUG_THREAD, "thread global_id %d " "is installing the SEGV handler\n", thread->global_id); __pmp_catch_segv(); __pmp_debug(PMP_DEBUG_THREAD, "thread global_id %d " "has installed the SEGV handler\n", thread->global_id); installed_segv = 1; } while (installed_segv == 0) { /* USER LEVEL SPIN LOCK */ __pmp_yield(); } }
static inline void __pmp_thread_master_join (pmp_thread_t *master) { pmp_team_t *team = master->team; int32_t count; int thread_spin = __pmp_get_param()->thread_spin; int i; /* NOTE: insert a small spin loop here to try to arrange for the master * to arrive just after the last worker thread. If this happens * then we avoid a much more expensive thread synchronization. */ for (i = 0; i < thread_spin; i++) { /* USER LEVEL SPIN LOOP */ if (team->working_threads == 1) { team->working_threads = 0; return; } __pmp_yield(); } count = __pmp_atomic_xadd32(&team->working_threads, -1); __pmp_debug(PMP_DEBUG_THREAD, "master thread joins with count of %d\n", (int) count); assert(count >= 1); if (count > 1) { __pmp_thread_wait(master); } }
static inline void __pmp_thread_wait (pmp_thread_t *thread) { int32_t sync; int thread_spin; int i; if (thread->sync == PMP_SYNC_UNBLOCKED) { __pmp_debug(PMP_DEBUG_THREAD, "thread global_id %d does not block (1)\n", thread->global_id); thread->sync = PMP_SYNC_IDLE; return; } thread_spin = __pmp_get_param()->thread_spin; for (i = 0; i < thread_spin; i++) { /* USER LEVEL SPIN LOOP */ if (thread->sync == PMP_SYNC_UNBLOCKED) { __pmp_debug(PMP_DEBUG_THREAD, "thread global_id %d does not block (2)\n", thread->global_id); thread->sync = PMP_SYNC_IDLE; return; } __pmp_yield(); } sync = __pmp_atomic_cmpxchg32(&thread->sync, PMP_SYNC_IDLE, PMP_SYNC_BLOCKED); if (sync == PMP_SYNC_IDLE) { __pmp_debug(PMP_DEBUG_THREAD, "thread global_id %d is waiting\n", thread->global_id); __pmp_sample(PMP_PROFILE_THREAD_DESCHEDULE); #ifdef PMP_USE_PTHREAD_SIGNALS { int sig; do { sigwait(&__pmp_manager.mask_block_sigpmp, &sig); } while (sig != SIGPMP); } #else sigsuspend(&__pmp_manager.mask_unblock_sigpmp); /* NOTE: it is unfortunate that sigsuspend does not tell us which * signal has been raised. */ #endif __pmp_debug(PMP_DEBUG_THREAD, "thread global_id %d is awake\n", thread->global_id); } else { __pmp_debug(PMP_DEBUG_THREAD, "thread global_id %d does not block (3)\n", thread->global_id); thread->sync = PMP_SYNC_IDLE; } }
void __ompc_ordered (int global_id) { pmp_thread_t *thread = __pmp_get_thread(global_id); if (__pmp_get_team_size(thread->team) > 1) { pmp_loop_t *loop = thread->loop; int64_t ticket_number = thread->ticket_number; int64_t now_serving; #ifdef SUPER_DEBUG if (Enabled_Libomp_Call_Debug) __pmp_debug("CALLS_DEBUG", "__ompc_ordered: global_id=%d\n", global_id); #endif __pmp_sample(PMP_PROFILE_OMPC_ORDERED); if (loop == NULL || loop->sched <= PMP_SCHED_ORDERED_OFFSET) { __pmp_warning("ordered directives must be used inside ordered " "OpenMP loops\n"); return; } assert(loop != NULL); now_serving = loop->now_serving; if (now_serving != ticket_number) { if ((loop->inc >= 0) ? (now_serving > ticket_number) : (now_serving < ticket_number)) { __pmp_warning("ordered OpenMP loop may result in program deadlock\n"); __pmp_warning("maybe due to multiple ordered directives " "in a loop iteration\n"); } while (loop->now_serving != ticket_number) { /* USER LEVEL SPIN LOOP */ __pmp_yield(); } } #ifdef SUPER_DEBUG if (Enabled_Libomp_Loop_Debug) __pmp_debug("LOOPS_DEBUG", "__ompc_ordered: now serving global_id=%d " " ticket_number=%" PRId64 "\n", global_id, ticket_number); #endif } __pmp_memory_fence(); }
static void __pmp_thread_bind (pmp_thread_t *thread) { /* TODO : use dynamic information to bind threads appropriately */ pmp_param_t *param = __pmp_get_param(); if (param->enable_affinity) { int cpu; int index = param->global_affinity ? thread->global_id : thread->local_id; assert(index < PMP_MAX_THREADS); cpu = param->thread_to_cpu_map[index]; assert(cpu < param->machine_num_cpus); if (thread->cpu != cpu) { static bool __pmp_enable_affinity_warning = true; int e; if (__pmp_manager.params != NULL) { thread->param = &__pmp_manager.params[cpu]; } else { thread->param = &__pmp_param; } e = __pmp_set_affinity(cpu); __pmp_debug(PMP_DEBUG_THREAD, "__pmp_thread_bind: global_id=%d, " "local_id=%d, CPU=%d, param=%p\n", thread->global_id, thread->local_id, cpu, thread->param); if (e != 0 && __pmp_enable_affinity_warning) { __pmp_warning("failed to set affinity\n"); __pmp_warning("maybe the kernel does not support " "affinity system calls\n"); __pmp_enable_affinity_warning = false; } thread->cpu = cpu; } /* TODO: give the thread an opportunity to move to its bound CPU * before continuing? Currently just do a __pmp_yield(). It is not * clear if this is necessary or sufficient. */ __pmp_yield(); } }