void GOMP_doacross_wait (long first, ...) { struct gomp_thread *thr = gomp_thread (); struct gomp_work_share *ws = thr->ts.work_share; struct gomp_doacross_work_share *doacross = ws->doacross; va_list ap; unsigned long ent; unsigned int i; if (__builtin_expect (doacross == NULL, 0)) { __sync_synchronize (); return; } if (__builtin_expect (ws->sched == GFS_STATIC, 1)) { if (ws->chunk_size == 0) { if (first < doacross->boundary) ent = first / (doacross->q + 1); else ent = (first - doacross->boundary) / doacross->q + doacross->t; } else ent = first / ws->chunk_size % thr->ts.team->nthreads; } else if (ws->sched == GFS_GUIDED) ent = first; else ent = first / doacross->chunk_size; unsigned long *array = (unsigned long *) (doacross->array + ent * doacross->elt_sz); if (__builtin_expect (doacross->flattened, 1)) { unsigned long flattened = (unsigned long) first << doacross->shift_counts[0]; unsigned long cur; va_start (ap, first); for (i = 1; i < doacross->ncounts; i++) flattened |= (unsigned long) va_arg (ap, long) << doacross->shift_counts[i]; cur = __atomic_load_n (array, MEMMODEL_ACQUIRE); if (flattened < cur) { __atomic_thread_fence (MEMMODEL_RELEASE); va_end (ap); return; } doacross_spin (array, flattened, cur); __atomic_thread_fence (MEMMODEL_RELEASE); va_end (ap); return; }
void gomp_ordered_sync (void) { struct gomp_thread *thr = gomp_thread (); struct gomp_team *team = thr->ts.team; struct gomp_work_share *ws = thr->ts.work_share; /* Work share constructs can be orphaned. But this clearly means that we are the only thread, and so we automatically own the section. */ if (team == NULL || team->nthreads == 1) return; /* ??? I believe it to be safe to access this data without taking the ws->lock. The only presumed race condition is with the previous thread on the queue incrementing ordered_cur such that it points to us, concurrently with our check below. But our team_id is already present in the queue, and the other thread will always post to our release semaphore. So the two cases are that we will either win the race an momentarily block on the semaphore, or lose the race and find the semaphore already unlocked and so not block. Either way we get correct results. However, there is an implicit flush on entry to an ordered region, so we do need to have a barrier here. If we were taking a lock this could be MEMMODEL_RELEASE since the acquire would be coverd by the lock. */ __atomic_thread_fence (MEMMODEL_ACQ_REL); if (ws->ordered_owner != thr->ts.team_id) { gomp_sem_wait (team->ordered_release[thr->ts.team_id]); ws->ordered_owner = thr->ts.team_id; } }
void GOMP_critical_start (void) { /* There is an implicit flush on entry to a critical region. */ __atomic_thread_fence (MEMMODEL_RELEASE); gomp_mutex_lock (&default_lock); }
void host_acquire_fence() { #if defined(__GNUC__) // make sure the other threads see the reference count before the output is set __atomic_thread_fence( __ATOMIC_ACQUIRE ); #endif }
void host_release_fence() { #if defined(__GNUC__) // make sure the other threads see the reference count before the output is set __atomic_thread_fence( __ATOMIC_RELEASE ); #endif }
void GOMP_doacross_post (long *counts) { struct gomp_thread *thr = gomp_thread (); struct gomp_work_share *ws = thr->ts.work_share; struct gomp_doacross_work_share *doacross = ws->doacross; unsigned long ent; unsigned int i; if (__builtin_expect (doacross == NULL, 0)) { __sync_synchronize (); return; } if (__builtin_expect (ws->sched == GFS_STATIC, 1)) ent = thr->ts.team_id; else if (ws->sched == GFS_GUIDED) ent = counts[0]; else ent = counts[0] / doacross->chunk_size; unsigned long *array = (unsigned long *) (doacross->array + ent * doacross->elt_sz); if (__builtin_expect (doacross->flattened, 1)) { unsigned long flattened = (unsigned long) counts[0] << doacross->shift_counts[0]; for (i = 1; i < doacross->ncounts; i++) flattened |= (unsigned long) counts[i] << doacross->shift_counts[i]; flattened++; if (flattened == __atomic_load_n (array, MEMMODEL_ACQUIRE)) __atomic_thread_fence (MEMMODEL_RELEASE); else __atomic_store_n (array, flattened, MEMMODEL_RELEASE); return; } __atomic_thread_fence (MEMMODEL_ACQUIRE); for (i = doacross->ncounts; i-- > 0; ) { if (counts[i] + 1UL != __atomic_load_n (&array[i], MEMMODEL_RELAXED)) __atomic_store_n (&array[i], counts[i] + 1UL, MEMMODEL_RELEASE); } }
static void barrier_wait(uint32_t *barrier) { uint32_t val = __atomic_sub_fetch(barrier, 1, __ATOMIC_RELAXED); while (val != 0) val = __atomic_load_n(barrier, __ATOMIC_RELAXED); __atomic_thread_fence(__ATOMIC_SEQ_CST); }
void lock(int i) { int j; choosing[i] = 1; __atomic_thread_fence(__ATOMIC_RELEASE); number[i] = max(number, NUM_THREADS) + 1; __atomic_thread_fence(__ATOMIC_SEQ_CST); choosing[i] = 0; for (j=0; j<NUM_THREADS; j++) { while(choosing[j]); while(number[j] > 0 && (number[j] < number[i] || (number[j] == number[i] && j < i))); } }
static inline void maybe_acquire_fence (int model) { switch (model) { case __ATOMIC_ACQUIRE: __atomic_thread_fence (__ATOMIC_ACQUIRE); break; case __ATOMIC_ACQ_REL: __atomic_thread_fence (__ATOMIC_ACQ_REL); break; case __ATOMIC_SEQ_CST: __atomic_thread_fence (__ATOMIC_SEQ_CST); break; default: break; } }
static void post_atomic_barrier (int model) { switch ((enum memmodel) model) { case MEMMODEL_ACQUIRE: case MEMMODEL_ACQ_REL: case MEMMODEL_SEQ_CST: __atomic_thread_fence (model); break; default: break; } return; }
void unlock(struct mcs_spinlock *node) { struct mcs_spinlock *last = node; if (! node->next) { // I'm the last in the queue if (__atomic_compare_exchange_n(&tail, &last, NULL, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED) ) { return; } else { // Another process executed exchange but // didn't asssign our next yet, so wait while (! node->next); } } else { // We force a memory barrier to ensure the critical section was executed before the next __atomic_thread_fence (__ATOMIC_RELEASE); } node->next->locked = 0; }
void hs_device_unref(hs_device *dev) { if (dev) { #ifdef _MSC_VER if (InterlockedDecrement(&dev->refcount)) return; #else if (__atomic_fetch_sub(&dev->refcount, 1, __ATOMIC_RELEASE) > 1) return; __atomic_thread_fence(__ATOMIC_ACQUIRE); #endif free(dev->key); free(dev->location); free(dev->path); free(dev->manufacturer); free(dev->product); free(dev->serial); } free(dev); }
void unlock() { __atomic_thread_fence(__ATOMIC_RELEASE); mutex = 0; }
void unlock(int i) { __atomic_thread_fence(__ATOMIC_SEQ_CST); number[i] = 0; }