bool ponyint_messageq_push(messageq_t* q, pony_msg_t* first, pony_msg_t* last) { atomic_store_explicit(&last->next, NULL, memory_order_relaxed); // Without that fence, the store to last->next above could be reordered after // the exchange on the head and after the store to prev->next done by the // next push, which would result in the pop incorrectly seeing the queue as // empty. // Also synchronise with the pop on prev->next. atomic_thread_fence(memory_order_release); pony_msg_t* prev = atomic_exchange_explicit(&q->head, last, memory_order_relaxed); bool was_empty = ((uintptr_t)prev & 1) != 0; prev = (pony_msg_t*)((uintptr_t)prev & ~(uintptr_t)1); #ifdef USE_VALGRIND // Double fence with Valgrind since we need to have prev in scope for the // synchronisation annotation. ANNOTATE_HAPPENS_BEFORE(&prev->next); atomic_thread_fence(memory_order_release); #endif atomic_store_explicit(&prev->next, first, memory_order_relaxed); return was_empty; }
/* * qsbr_checkpoint: indicate a quiescent state of the current thread. */ void qsbr_checkpoint(qsbr_t *qs) { qsbr_tls_t *t; t = pthread_getspecific(qs->tls_key); ASSERT(t != NULL); /* Observe the current epoch. */ atomic_thread_fence(memory_order_release); t->local_epoch = qs->global_epoch; atomic_thread_fence(memory_order_acquire); }
void ccsynch_close_delegate_buffer(void * buffer, void (*funPtr)(unsigned int, void *)){ CCSynchLockNode *tmpNode; void (*tmpFunPtr)(unsigned int, void *); CCSynchLockNode *tmpNodeNext; int counter = 0; CCSynchLockNode *curNode = ccsynchNextLocalNode; curNode->buffer = buffer; curNode->requestFunction = funPtr; atomic_thread_fence( memory_order_release ); while (atomic_load_explicit(&curNode->wait, memory_order_acquire) == 1){ thread_yield(); } if(curNode->completed==true){ return; }else{ funPtr(curNode->messageSize, buffer); } tmpNode = (CCSynchLockNode *)atomic_load_explicit(&curNode->next, memory_order_acquire); while ((tmpNodeNext=(CCSynchLockNode *)atomic_load_explicit(&tmpNode->next, memory_order_acquire)) != NULL && counter < CCSYNCH_HAND_OFF_LIMIT) { counter = counter + 1; tmpFunPtr = tmpNode->requestFunction; if(tmpFunPtr==NULL){ break; } tmpFunPtr(tmpNode->messageSize, tmpNode->buffer); tmpNode->completed = true; atomic_store_explicit(&tmpNode->wait, 0, memory_order_release); tmpNode = tmpNodeNext; } atomic_store_explicit(&tmpNode->wait, 0, memory_order_release); }
void SystemProperties::ReadCallback(const prop_info* pi, void (*callback)(void* cookie, const char* name, const char* value, uint32_t serial), void* cookie) { // Read only properties don't need to copy the value to a temporary buffer, since it can never // change. if (is_read_only(pi->name)) { uint32_t serial = Serial(pi); if (pi->is_long()) { callback(cookie, pi->name, pi->long_value(), serial); } else { callback(cookie, pi->name, pi->value, serial); } return; } while (true) { uint32_t serial = Serial(pi); // acquire semantics size_t len = SERIAL_VALUE_LEN(serial); char value_buf[len + 1]; memcpy(value_buf, pi->value, len); value_buf[len] = '\0'; // TODO: see todo in Read function atomic_thread_fence(memory_order_acquire); if (serial == load_const_atomic(&(pi->serial), memory_order_relaxed)) { callback(cookie, pi->name, value_buf, serial); return; } } }
int SystemProperties::Update(prop_info* pi, const char* value, unsigned int len) { if (len >= PROP_VALUE_MAX) { return -1; } if (!initialized_) { return -1; } prop_area* pa = contexts_->GetSerialPropArea(); if (!pa) { return -1; } uint32_t serial = atomic_load_explicit(&pi->serial, memory_order_relaxed); serial |= 1; atomic_store_explicit(&pi->serial, serial, memory_order_relaxed); // The memcpy call here also races. Again pretend it // used memory_order_relaxed atomics, and use the analogous // counterintuitive fence. atomic_thread_fence(memory_order_release); strlcpy(pi->value, value, len + 1); atomic_store_explicit(&pi->serial, (len << 24) | ((serial + 1) & 0xffffff), memory_order_release); __futex_wake(&pi->serial, INT32_MAX); atomic_store_explicit(pa->serial(), atomic_load_explicit(pa->serial(), memory_order_relaxed) + 1, memory_order_release); __futex_wake(pa->serial(), INT32_MAX); return 0; }
void mb__system_property_read_callback(const prop_info* pi, void (*callback)(void* cookie, const char* name, const char* value, uint32_t serial), void* cookie) { #if MB_ENABLE_COMPAT_PROPERTIES // TODO (dimitry): do we need compat mode for this function? if (__predict_false(compat_mode)) { uint32_t serial = mb__system_property_serial_compat(pi); char name_buf[PROP_NAME_MAX]; char value_buf[PROP_VALUE_MAX]; mb__system_property_read_compat(pi, name_buf, value_buf); callback(cookie, name_buf, value_buf, serial); return; } #endif while (true) { uint32_t serial = mb__system_property_serial(pi); // acquire semantics size_t len = SERIAL_VALUE_LEN(serial); char value_buf[len + 1]; memcpy(value_buf, pi->value, len); value_buf[len] = '\0'; // TODO: see todo in __system_property_read function atomic_thread_fence(memory_order_acquire); if (serial == load_const_atomic(&(pi->serial), memory_order_relaxed)) { callback(cookie, pi->name, value_buf, serial); return; } } }
int mb__system_property_read(const prop_info* pi, char* name, char* value) { #if MB_ENABLE_COMPAT_PROPERTIES if (__predict_false(compat_mode)) { return mb__system_property_read_compat(pi, name, value); } #endif while (true) { uint32_t serial = mb__system_property_serial(pi); // acquire semantics size_t len = SERIAL_VALUE_LEN(serial); memcpy(value, pi->value, len + 1); // TODO: Fix the synchronization scheme here. // There is no fully supported way to implement this kind // of synchronization in C++11, since the memcpy races with // updates to pi, and the data being accessed is not atomic. // The following fence is unintuitive, but would be the // correct one if memcpy used memory_order_relaxed atomic accesses. // In practice it seems unlikely that the generated code would // would be any different, so this should be OK. atomic_thread_fence(memory_order_acquire); if (serial == load_const_atomic(&(pi->serial), memory_order_relaxed)) { if (name != nullptr) { size_t namelen = strlcpy(name, pi->name, PROP_NAME_MAX); if (namelen >= PROP_NAME_MAX) { LOGE("The property name length for \"%s\" is >= %d;" " please use __system_property_read_callback" " to read this property. (the name is truncated to \"%s\")", pi->name, PROP_NAME_MAX - 1, name); } } return len; } } }
// Increase the counts of all requested pages by 1. void fs_inode_map_region(struct inode *node, size_t offset, size_t length) { mutex_acquire(&node->mappings_lock); __init_physicals(node); ASSERT(!(offset & ~PAGE_MASK)); int page_number = offset / PAGE_SIZE; int npages = ((length-1) / PAGE_SIZE) + 1; for(int i = page_number; i < (page_number + npages); i++) { struct physical_page *entry; if((entry = hash_lookup(&node->physicals, &i, sizeof(i))) == NULL) { // Create the entry, and add it. entry = __create_entry(); entry->pn = i; hash_insert(&node->physicals, &entry->pn, sizeof(entry->pn), &entry->hash_elem, entry); atomic_fetch_add_explicit(&node->mapped_entries_count, 1, memory_order_relaxed); } mutex_acquire(&entry->lock); // Bump the count... atomic_fetch_add_explicit(&entry->count, 1, memory_order_relaxed); mutex_release(&entry->lock); /* NOTE: We're not actually allocating or mapping anything here, really. All we're doing * is indicating our intent to map a certain section, so we don't free pages. */ } atomic_thread_fence(memory_order_acq_rel); mutex_release(&node->mappings_lock); }
int __system_property_read(const prop_info *pi, char *name, char *value) { if (__predict_false(compat_mode)) { return __system_property_read_compat(pi, name, value); } while (true) { uint32_t serial = __system_property_serial(pi); // acquire semantics size_t len = SERIAL_VALUE_LEN(serial); memcpy(value, pi->value, len + 1); // TODO: Fix the synchronization scheme here. // There is no fully supported way to implement this kind // of synchronization in C++11, since the memcpy races with // updates to pi, and the data being accessed is not atomic. // The following fence is unintuitive, but would be the // correct one if memcpy used memory_order_relaxed atomic accesses. // In practice it seems unlikely that the generated code would // would be any different, so this should be OK. atomic_thread_fence(memory_order_acquire); if (serial == load_const_atomic(&(pi->serial), memory_order_relaxed)) { if (name != 0) { strcpy(name, pi->name); } return len; } } }
int __system_property_update(prop_info *pi, const char *value, unsigned int len) { prop_area *pa = __system_property_area__; if (len >= PROP_VALUE_MAX) return -1; uint32_t serial = atomic_load_explicit(&pi->serial, memory_order_relaxed); serial |= 1; atomic_store_explicit(&pi->serial, serial, memory_order_relaxed); // The memcpy call here also races. Again pretend it // used memory_order_relaxed atomics, and use the analogous // counterintuitive fence. atomic_thread_fence(memory_order_release); memcpy(pi->value, value, len + 1); atomic_store_explicit( &pi->serial, (len << 24) | ((serial + 1) & 0xffffff), memory_order_release); __futex_wake(&pi->serial, INT32_MAX); atomic_store_explicit( &pa->serial, atomic_load_explicit(&pa->serial, memory_order_relaxed) + 1, memory_order_release); __futex_wake(&pa->serial, INT32_MAX); return 0; }
int take(Deque *q) { std::string str1("pop_back"); //ANNOTATION function_call(str1, INVOCATION); //ANNOTATION size_t b = atomic_load_explicit(&q->bottom, memory_order_seq_cst) - 1; Array *a = (Array *) atomic_load_explicit(&q->array, memory_order_seq_cst); atomic_store_explicit(&q->bottom, b, memory_order_seq_cst); //relaxed atomic_thread_fence(memory_order_seq_cst); size_t t = atomic_load_explicit(&q->top, memory_order_seq_cst); int x; if (t <= b) { /* Non-empty queue. */ x = atomic_load_explicit(&a->buffer[b % atomic_load_explicit(&a->size,memory_order_seq_cst)], memory_order_seq_cst); if (t == b) { /* Single last element in queue. */ if (!atomic_compare_exchange_strong_explicit(&q->top, &t, t + 1, memory_order_seq_cst, memory_order_seq_cst)) /* Failed race. */ x = EMPTY; atomic_store_explicit(&q->bottom, b + 1, memory_order_seq_cst); //relaxed } } else { /* Empty queue. */ x = EMPTY; atomic_store_explicit(&q->bottom, b + 1, memory_order_seq_cst); // relaxed } //if(x == EMPTY) //function_call(str1, RESPONSE, (uint64_t) NULL); //ANNOTATION //else function_call(str1, RESPONSE, (uint64_t) x); //ANNOTATION return x; }
void* ponyint_mpmcq_pop(mpmcq_t* q) { size_t my_ticket = atomic_fetch_add_explicit(&q->ticket, 1, memory_order_relaxed); while(my_ticket != atomic_load_explicit(&q->waiting_for, memory_order_relaxed)) ponyint_cpu_relax(); atomic_thread_fence(memory_order_acquire); mpmcq_node_t* tail = atomic_load_explicit(&q->tail, memory_order_relaxed); // Get the next node rather than the tail. The tail is either a stub or has // already been consumed. mpmcq_node_t* next = atomic_load_explicit(&tail->next, memory_order_relaxed); // Bailout if we have no next node. if(next == NULL) { atomic_store_explicit(&q->waiting_for, my_ticket + 1, memory_order_relaxed); return NULL; } atomic_store_explicit(&q->tail, next, memory_order_relaxed); atomic_store_explicit(&q->waiting_for, my_ticket + 1, memory_order_release); // Synchronise-with the push. atomic_thread_fence(memory_order_acquire); // We'll return the data pointer from the next node. void* data = atomic_load_explicit(&next->data, memory_order_relaxed); // Since we will be freeing the old tail, we need to be sure no other // consumer is still reading the old tail. To do this, we set the data // pointer of our new tail to NULL, and we wait until the data pointer of // the old tail is NULL. atomic_store_explicit(&next->data, NULL, memory_order_release); while(atomic_load_explicit(&tail->data, memory_order_relaxed) != NULL) ponyint_cpu_relax(); atomic_thread_fence(memory_order_acquire); // Free the old tail. The new tail is the next node. POOL_FREE(mpmcq_node_t, tail); return data; }
friend void intrusive_ptr_release( unbounded_channel_base * p) { if ( p->use_count_.fetch_sub( 1, memory_order_release) == 1) { atomic_thread_fence( memory_order_acquire); delete p; } }
friend void intrusive_ptr_release( node * p) { if ( p->use_count.fetch_sub( 1, memory_order_release) == 1) { atomic_thread_fence( memory_order_acquire); delete p; } }
void dec_ref() const { int new_ref_count = ref_count_.fetch_sub(1, MEMORY_ORDER_RELEASE); assert(new_ref_count >= 1); if (new_ref_count == 1) { atomic_thread_fence(MEMORY_ORDER_ACQUIRE); delete static_cast<const T*>(this); } }
friend inline void intrusive_ptr_release( pool_base * p) { if ( 1 == p->use_count_.fetch_sub( 1, memory_order_release) ) { atomic_thread_fence( memory_order_acquire); delete p; } }
static void memory_fence() { // Internally, libuv has a "pending" flag check whose load can be reordered // before storing the data into the queue causing the data in the queue // not to be consumed. This fence ensures that the load happens after the // data has been store in the queue. #if defined(CASS_USE_BOOST_ATOMIC) || defined(CASS_USE_STD_ATOMIC) atomic_thread_fence(MEMORY_ORDER_SEQ_CST); #endif }
void func1() { for(int i = 0; i < 1000000; ++i) { a = i; // Ensure that changes to a to this point are visible to other threads atomic_thread_fence(std::memory_order_release); } }
void func2() { for(int i = 0; i < 1000000; ++i) { // Ensure that this thread's view of a is up to date atomic_thread_fence(std::memory_order_acquire); std::cout << a; } }
void GlobalRcu:: exit(size_t epoch) { // Ensures that all reads are terminated before we decrement the epoch // counter. Unfortunately there's no equivalent of the release semantic for // reads so we need to use a full barrier instead. Sucky but it's life. atomic_thread_fence(memory_order_seq_cst); getTls()[epoch & 1].count--; }
int main(int argc, char **argv) { (void)argc; (void)argv; int num_producers = NUM_THREADS-1; pthread_t producers[num_producers]; pthread_t consumer; struct timespec start, end; for(int i=0; i<NUM_THREADS; i++) { for(int j=0; j<NUM_ITEMS; j++) { items[i][j].sent = 0; items[i][j].recv = 0; } } int cap = atoi(argv[1]); queue = mpscq_create(NULL, cap); pthread_create(&consumer, NULL, consumer_main, NULL); clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &start); for(long i=0; i<num_producers; i++) { pthread_create(&producers[i], NULL, producer_main, (void *)i); } for(int i=0; i<num_producers; i++) { pthread_join(producers[i], NULL); } clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &end); done = true; pthread_join(consumer, NULL); atomic_thread_fence(memory_order_seq_cst); for(int i=0; i<num_producers; i++) { for(int j=0; j<NUM_ITEMS; j++) { if(items[i][j].sent != 2) { printf(":(%d %d): %d %d, %d %d\n", i, j, items[i][j].sent, items[i][j].recv, amount_produced, amount_consumed); } assert(items[i][j].sent == 2); assert(items[i][j].recv == 1); } } long ms = (end.tv_sec - start.tv_sec) * 1000; ms += (end.tv_nsec - start.tv_nsec) / 1000000; fprintf(stdout, "\t%d\t%ld\t%ld\n", retries, ms, (long)(total / amount_produced)); assert(amount_produced == amount_consumed); exit(amount_produced != amount_consumed); }
static void b(void *obj) { int r1, r2; r1 = atomic_load_explicit(&x, memory_order_relaxed); atomic_thread_fence(memory_order_acquire); r2 = atomic_load_explicit(&x, memory_order_relaxed); printf("FENCES: r1 = %d, r2 = %d\n", r1, r2); if (r1 == 2) MODEL_ASSERT(r2 != 1); }
/* ChapelBase.chpl:831 */ static void _waitEndCount(chpl___EndCount e, int64_t _ln, c_string _fn) { memory_order local_memory_order_acquire; memory_order local_memory_order_relaxed; chpl_task_list_p ret; _ref_atomic_int64 call_tmp = NULL; chpl_bool T; _ref_atomic_int_least64_t call_tmp2 = NULL; int64_t call_tmp3; chpl_bool call_tmp4; _ref_atomic_int_least64_t call_tmp5 = NULL; int64_t call_tmp6; chpl_bool call_tmp7; int64_t ret2; locale call_tmp8 = NULL; int32_t call_tmp9; chpl_localeID_t call_tmp10; _ref_chpl_localeID_t ret_to_arg_ref_tmp_ = NULL; chpl_localeID_t call_tmp11; locale call_tmp12 = NULL; _ref_atomic_int64 call_tmp13 = NULL; _ref_atomic_int_least64_t call_tmp14 = NULL; chpl_task_list_p ret3; local_memory_order_acquire = memory_order_acquire; local_memory_order_relaxed = memory_order_relaxed; ret = (e)->taskList; chpl_taskListExecute(ret, _ln, _fn); call_tmp = &((e)->i); call_tmp2 = &((call_tmp)->_v); call_tmp3 = atomic_load_explicit_int_least64_t(call_tmp2, local_memory_order_relaxed); call_tmp4 = (call_tmp3 != INT64(0)); T = call_tmp4; while (T) { chpl_task_yield(); call_tmp5 = &((call_tmp)->_v); call_tmp6 = atomic_load_explicit_int_least64_t(call_tmp5, local_memory_order_relaxed); call_tmp7 = (call_tmp6 != INT64(0)); T = call_tmp7; } atomic_thread_fence(local_memory_order_acquire); ret2 = (e)->taskCnt; call_tmp9 = chpl_task_getRequestedSubloc(); ret_to_arg_ref_tmp_ = &call_tmp10; chpl_buildLocaleID(chpl_nodeID, call_tmp9, ret_to_arg_ref_tmp_, _ln, _fn); call_tmp11 = chpl__initCopy_chpl_rt_localeID_t(call_tmp10); call_tmp12 = chpl_localeID_to_locale(&call_tmp11, _ln, _fn); call_tmp8 = call_tmp12; call_tmp13 = &((call_tmp8)->runningTaskCounter); call_tmp14 = &((call_tmp13)->_v); atomic_fetch_sub_explicit_int_least64_t(call_tmp14, ret2, local_memory_order_relaxed); ret3 = (e)->taskList; chpl_taskListFree(ret3, _ln, _fn); return; }
TEST(stdatomic, atomic_thread_fence) { atomic_thread_fence(memory_order_relaxed); atomic_thread_fence(memory_order_consume); atomic_thread_fence(memory_order_acquire); atomic_thread_fence(memory_order_release); atomic_thread_fence(memory_order_acq_rel); atomic_thread_fence(memory_order_seq_cst); }
caerModuleData caerModuleInitialize(uint16_t moduleID, const char *moduleShortName, sshsNode mainloopNode) { // Generate short module name with ID, reused in all error messages and later code. size_t nameLength = (size_t) snprintf(NULL, 0, "%" PRIu16 "-%s", moduleID, moduleShortName); char nameString[nameLength + 1]; snprintf(nameString, nameLength + 1, "%" PRIu16 "-%s", moduleID, moduleShortName); // Allocate memory for the module. caerModuleData moduleData = calloc(1, sizeof(struct caer_module_data)); if (moduleData == NULL) { caerLog(CAER_LOG_ALERT, nameString, "Failed to allocate memory for module. Error: %d.", errno); thrd_exit(EXIT_FAILURE); } // Set module ID for later identification (hash-table key). moduleData->moduleID = moduleID; // Put module into startup state. moduleData->moduleStatus = STOPPED; atomic_store_explicit(&moduleData->running, true, memory_order_relaxed); // Determine SSHS module node. Use short name for better human recognition. char sshsString[nameLength + 2]; strncpy(sshsString, nameString, nameLength); sshsString[nameLength] = '/'; sshsString[nameLength + 1] = '\0'; // Initialize configuration, shutdown hooks. moduleData->moduleNode = sshsGetRelativeNode(mainloopNode, sshsString); if (moduleData->moduleNode == NULL) { caerLog(CAER_LOG_ALERT, nameString, "Failed to allocate configuration node for module."); thrd_exit(EXIT_FAILURE); } sshsNodePutBool(moduleData->moduleNode, "shutdown", false); // Always reset to false. sshsNodeAddAttributeListener(moduleData->moduleNode, moduleData, &caerModuleShutdownListener); // Setup default full log string name. moduleData->moduleSubSystemString = malloc(nameLength + 1); if (moduleData->moduleSubSystemString == NULL) { caerLog(CAER_LOG_ALERT, nameString, "Failed to allocate subsystem string for module."); thrd_exit(EXIT_FAILURE); } strncpy(moduleData->moduleSubSystemString, nameString, nameLength); moduleData->moduleSubSystemString[nameLength] = '\0'; atomic_thread_fence(memory_order_release); return (moduleData); }
static int __pthread_rwlock_timedwrlock(pthread_rwlock_internal_t* rwlock, const timespec* abs_timeout_or_null) { if (__predict_false(__get_thread()->tid == atomic_load_explicit(&rwlock->writer_thread_id, memory_order_relaxed))) { return EDEADLK; } while (true) { int old_state = atomic_load_explicit(&rwlock->state, memory_order_relaxed); if (__predict_true(old_state == 0)) { if (atomic_compare_exchange_weak_explicit(&rwlock->state, &old_state, -1, memory_order_acquire, memory_order_relaxed)) { // writer_thread_id is protected by rwlock and can only be modified in rwlock write // owner thread. Other threads may read it for EDEADLK error checking, atomic operation // is safe enough for it. atomic_store_explicit(&rwlock->writer_thread_id, __get_thread()->tid, memory_order_relaxed); return 0; } } else { timespec ts; timespec* rel_timeout = NULL; if (abs_timeout_or_null != NULL) { rel_timeout = &ts; if (!timespec_from_absolute_timespec(*rel_timeout, *abs_timeout_or_null, CLOCK_REALTIME)) { return ETIMEDOUT; } } // To avoid losing wake ups, the pending_writers increment should be observed before // futex_wait by all threads. A seq_cst fence instead of a seq_cst operation is used // here. Because only a seq_cst fence can ensure sequential consistency for non-atomic // operations in futex_wait. atomic_fetch_add_explicit(&rwlock->pending_writers, 1, memory_order_relaxed); atomic_thread_fence(memory_order_seq_cst); int ret = __futex_wait_ex(&rwlock->state, rwlock->process_shared(), old_state, rel_timeout); atomic_fetch_sub_explicit(&rwlock->pending_writers, 1, memory_order_relaxed); if (ret == -ETIMEDOUT) { return ETIMEDOUT; } } } }
int pthread_rwlock_unlock(pthread_rwlock_t* rwlock_interface) { pthread_rwlock_internal_t* rwlock = __get_internal_rwlock(rwlock_interface); int old_state = atomic_load_explicit(&rwlock->state, memory_order_relaxed); if (__predict_false(old_state == 0)) { return EPERM; } else if (old_state == -1) { if (atomic_load_explicit(&rwlock->writer_thread_id, memory_order_relaxed) != __get_thread()->tid) { return EPERM; } // We're no longer the owner. atomic_store_explicit(&rwlock->writer_thread_id, 0, memory_order_relaxed); // Change state from -1 to 0. atomic_store_explicit(&rwlock->state, 0, memory_order_release); } else { // old_state > 0 // Reduce state by 1. while (old_state > 0 && !atomic_compare_exchange_weak_explicit(&rwlock->state, &old_state, old_state - 1, memory_order_release, memory_order_relaxed)) { } if (old_state <= 0) { return EPERM; } else if (old_state > 1) { return 0; } // old_state = 1, which means the last reader calling unlock. It has to wake up waiters. } // If having waiters, wake up them. // To avoid losing wake ups, the update of state should be observed before reading // pending_readers/pending_writers by all threads. Use read locking as an example: // read locking thread unlocking thread // pending_readers++; state = 0; // seq_cst fence seq_cst fence // read state for futex_wait read pending_readers for futex_wake // // So when locking and unlocking threads are running in parallel, we will not get // in a situation that the locking thread reads state as negative and needs to wait, // while the unlocking thread reads pending_readers as zero and doesn't need to wake up waiters. atomic_thread_fence(memory_order_seq_cst); if (__predict_false(atomic_load_explicit(&rwlock->pending_readers, memory_order_relaxed) > 0 || atomic_load_explicit(&rwlock->pending_writers, memory_order_relaxed) > 0)) { __futex_wake_ex(&rwlock->state, rwlock->process_shared(), INT_MAX); } return 0; }
void push(Deque *q, int x) { std::string str1("push_back"); //ANNOTATION function_call(str1, INVOCATION, (uint64_t) x); //ANNOTATION size_t b = atomic_load_explicit(&q->bottom, memory_order_seq_cst); size_t t = atomic_load_explicit(&q->top, memory_order_seq_cst); Array *a = (Array *) atomic_load_explicit(&q->array, memory_order_seq_cst); if (b - t > atomic_load_explicit(&a->size, memory_order_seq_cst) - 1) /* Full queue. */ { resize(q); //Bug in paper...should have next line... a = (Array *) atomic_load_explicit(&q->array, memory_order_seq_cst); } atomic_store_explicit(&a->buffer[b % atomic_load_explicit(&a->size, memory_order_seq_cst)], x,memory_order_seq_cst); atomic_thread_fence(memory_order_seq_cst); atomic_store_explicit(&q->bottom, b + 1, memory_order_seq_cst); //relaxed function_call(str1, RESPONSE); //ANNOTATION }
pony_msg_t* ponyint_messageq_pop(messageq_t* q) { pony_msg_t* tail = q->tail; pony_msg_t* next = atomic_load_explicit(&tail->next, memory_order_relaxed); if(next != NULL) { q->tail = next; atomic_thread_fence(memory_order_acquire); #ifdef USE_VALGRIND ANNOTATE_HAPPENS_AFTER(&tail->next); ANNOTATE_HAPPENS_BEFORE_FORGET_ALL(tail); #endif ponyint_pool_free(tail->index, tail); } return next; }
// Called from pthread_exit() to remove all pthread keys. This must call the destructor of // all keys that have a non-NULL data value and a non-NULL destructor. __LIBC_HIDDEN__ void pthread_key_clean_all() { // Because destructors can do funky things like deleting/creating other keys, // we need to implement this in a loop. pthread_key_data_t* key_data = get_thread_key_data(); for (size_t rounds = PTHREAD_DESTRUCTOR_ITERATIONS; rounds > 0; --rounds) { size_t called_destructor_count = 0; for (size_t i = 0; i < BIONIC_PTHREAD_KEY_COUNT; ++i) { uintptr_t seq = atomic_load_explicit(&key_map[i].seq, memory_order_relaxed); if (SeqOfKeyInUse(seq) && seq == key_data[i].seq && key_data[i].data != nullptr) { // Other threads may be calling pthread_key_delete/pthread_key_create while current thread // is exiting. So we need to ensure we read the right key_destructor. // We can rely on a user-established happens-before relationship between the creation and // use of pthread key to ensure that we're not getting an earlier key_destructor. // To avoid using the key_destructor of the newly created key in the same slot, we need to // recheck the sequence number after reading key_destructor. As a result, we either see the // right key_destructor, or the sequence number must have changed when we reread it below. key_destructor_t key_destructor = reinterpret_cast<key_destructor_t>( atomic_load_explicit(&key_map[i].key_destructor, memory_order_relaxed)); if (key_destructor == nullptr) { continue; } atomic_thread_fence(memory_order_acquire); if (atomic_load_explicit(&key_map[i].seq, memory_order_relaxed) != seq) { continue; } // We need to clear the key data now, this will prevent the destructor (or a later one) // from seeing the old value if it calls pthread_getspecific(). // We don't do this if 'key_destructor == NULL' just in case another destructor // function is responsible for manually releasing the corresponding data. void* data = key_data[i].data; key_data[i].data = nullptr; (*key_destructor)(data); ++called_destructor_count; } } // If we didn't call any destructors, there is no need to check the pthread keys again. if (called_destructor_count == 0) { break; } } }