bool ponyint_messageq_push(messageq_t* q, pony_msg_t* first, pony_msg_t* last) { atomic_store_explicit(&last->next, NULL, memory_order_relaxed); // Without that fence, the store to last->next above could be reordered after // the exchange on the head and after the store to prev->next done by the // next push, which would result in the pop incorrectly seeing the queue as // empty. // Also synchronise with the pop on prev->next. atomic_thread_fence(memory_order_release); pony_msg_t* prev = atomic_exchange_explicit(&q->head, last, memory_order_relaxed); bool was_empty = ((uintptr_t)prev & 1) != 0; prev = (pony_msg_t*)((uintptr_t)prev & ~(uintptr_t)1); #ifdef USE_VALGRIND // Double fence with Valgrind since we need to have prev in scope for the // synchronisation annotation. ANNOTATE_HAPPENS_BEFORE(&prev->next); atomic_thread_fence(memory_order_release); #endif atomic_store_explicit(&prev->next, first, memory_order_relaxed); return was_empty; }
const prop_info *prop_area::find_property(prop_bt *const trie, const char *name, uint8_t namelen, const char *value, uint8_t valuelen, bool alloc_if_needed) { if (!trie) return NULL; const char *remaining_name = name; prop_bt* current = trie; while (true) { const char *sep = strchr(remaining_name, '.'); const bool want_subtree = (sep != NULL); const uint8_t substr_size = (want_subtree) ? sep - remaining_name : strlen(remaining_name); if (!substr_size) { return NULL; } prop_bt* root = NULL; uint_least32_t children_offset = atomic_load_explicit(¤t->children, memory_order_relaxed); if (children_offset != 0) { root = to_prop_bt(¤t->children); } else if (alloc_if_needed) { uint_least32_t new_offset; root = new_prop_bt(remaining_name, substr_size, &new_offset); if (root) { atomic_store_explicit(¤t->children, new_offset, memory_order_release); } } if (!root) { return NULL; } current = find_prop_bt(root, remaining_name, substr_size, alloc_if_needed); if (!current) { return NULL; } if (!want_subtree) break; remaining_name = sep + 1; } uint_least32_t prop_offset = atomic_load_explicit(¤t->prop, memory_order_relaxed); if (prop_offset != 0) { return to_prop_info(¤t->prop); } else if (alloc_if_needed) { uint_least32_t new_offset; prop_info* new_info = new_prop_info(name, namelen, value, valuelen, &new_offset); if (new_info) { atomic_store_explicit(¤t->prop, new_offset, memory_order_release); } return new_info; } else { return NULL; } }
int SystemProperties::Update(prop_info* pi, const char* value, unsigned int len) { if (len >= PROP_VALUE_MAX) { return -1; } if (!initialized_) { return -1; } prop_area* pa = contexts_->GetSerialPropArea(); if (!pa) { return -1; } uint32_t serial = atomic_load_explicit(&pi->serial, memory_order_relaxed); serial |= 1; atomic_store_explicit(&pi->serial, serial, memory_order_relaxed); // The memcpy call here also races. Again pretend it // used memory_order_relaxed atomics, and use the analogous // counterintuitive fence. atomic_thread_fence(memory_order_release); strlcpy(pi->value, value, len + 1); atomic_store_explicit(&pi->serial, (len << 24) | ((serial + 1) & 0xffffff), memory_order_release); __futex_wake(&pi->serial, INT32_MAX); atomic_store_explicit(pa->serial(), atomic_load_explicit(pa->serial(), memory_order_relaxed) + 1, memory_order_release); __futex_wake(pa->serial(), INT32_MAX); return 0; }
static void b(void *obj) { int r2=atomic_load_explicit(&y, memory_order_relaxed); atomic_store_explicit(&x, r2, memory_order_relaxed); atomic_store_explicit(&x, r2 + 1, memory_order_relaxed); printf("r2=%d\n",r2); }
void * ccsynch_delegate_or_lock(void* lock, unsigned int messageSize) { CCSynchLock *l = (CCSynchLock*)lock; CCSynchLockNode *nextNode; CCSynchLockNode *curNode; ccsynchlock_initLocalIfNeeded(); nextNode = ccsynchNextLocalNode; atomic_store_explicit(&nextNode->next, (uintptr_t)NULL, memory_order_relaxed); atomic_store_explicit(&nextNode->wait, 1, memory_order_relaxed); nextNode->completed = false; curNode = (CCSynchLockNode *)atomic_exchange_explicit(&l->tailPtr.value, (uintptr_t)nextNode, memory_order_release); curNode->messageSize = messageSize; curNode->requestFunction = NULL; //Forces helper to stop if it sees this atomic_store_explicit(&curNode->next, (uintptr_t)nextNode, memory_order_release); ccsynchNextLocalNode = curNode; if (atomic_load_explicit(&curNode->wait, memory_order_acquire) == 1){ //Somone else has the lock delegate return curNode->tempBuffer; }else{ //Yey we got the lock return NULL; } }
int main () { v = 0; count = 0; atomic_init (&v, count + 1); if (v != ++count) abort (); atomic_store_explicit (&v, count + 1, memory_order_relaxed); if (v != ++count) abort (); atomic_store_explicit (&v, count + 1, memory_order_release); if (v != ++count) abort (); atomic_store_explicit (&v, count + 1, memory_order_seq_cst); if (v != ++count) abort (); count++; atomic_store (&v, count); if (v != count) abort (); return 0; }
void store (atomic_int *i) { atomic_store_explicit (i, 0, memory_order_consume); /* { dg-warning "invalid memory model" } */ atomic_store_explicit (i, 0, memory_order_acquire); /* { dg-warning "invalid memory model" } */ atomic_store_explicit (i, 0, memory_order_acq_rel); /* { dg-warning "invalid memory model" } */ }
static void main_task(void *param) { unsigned int val; int pid = *((int *)param); idx2 = pop(stack); if (idx2 != 0) { b = atomic_load_explicit(&x[idx2], relaxed); printf("b: %d\n", b); } if (pid % 4 == 0) { atomic_store_explicit(&x[1], 17, relaxed); push(stack, 1); } else if (pid % 4 == 1) { atomic_store_explicit(&x[2], 37, relaxed); push(stack, 2); } else if (pid % 4 == 2) { /* idx1 = pop(stack); if (idx1 != 0) { a = atomic_load_explicit(&x[idx1], relaxed); printf("a: %d\n", a); }*/ } else { /* idx2 = pop(stack); if (idx2 != 0) { b = atomic_load_explicit(&x[idx2], relaxed); printf("b: %d\n", b); }*/ } }
int take(Deque *q) { std::string str1("pop_back"); //ANNOTATION function_call(str1, INVOCATION); //ANNOTATION size_t b = atomic_load_explicit(&q->bottom, memory_order_seq_cst) - 1; Array *a = (Array *) atomic_load_explicit(&q->array, memory_order_seq_cst); atomic_store_explicit(&q->bottom, b, memory_order_seq_cst); //relaxed atomic_thread_fence(memory_order_seq_cst); size_t t = atomic_load_explicit(&q->top, memory_order_seq_cst); int x; if (t <= b) { /* Non-empty queue. */ x = atomic_load_explicit(&a->buffer[b % atomic_load_explicit(&a->size,memory_order_seq_cst)], memory_order_seq_cst); if (t == b) { /* Single last element in queue. */ if (!atomic_compare_exchange_strong_explicit(&q->top, &t, t + 1, memory_order_seq_cst, memory_order_seq_cst)) /* Failed race. */ x = EMPTY; atomic_store_explicit(&q->bottom, b + 1, memory_order_seq_cst); //relaxed } } else { /* Empty queue. */ x = EMPTY; atomic_store_explicit(&q->bottom, b + 1, memory_order_seq_cst); // relaxed } //if(x == EMPTY) //function_call(str1, RESPONSE, (uint64_t) NULL); //ANNOTATION //else function_call(str1, RESPONSE, (uint64_t) x); //ANNOTATION return x; }
void ccsynch_close_delegate_buffer(void * buffer, void (*funPtr)(unsigned int, void *)){ CCSynchLockNode *tmpNode; void (*tmpFunPtr)(unsigned int, void *); CCSynchLockNode *tmpNodeNext; int counter = 0; CCSynchLockNode *curNode = ccsynchNextLocalNode; curNode->buffer = buffer; curNode->requestFunction = funPtr; atomic_thread_fence( memory_order_release ); while (atomic_load_explicit(&curNode->wait, memory_order_acquire) == 1){ thread_yield(); } if(curNode->completed==true){ return; }else{ funPtr(curNode->messageSize, buffer); } tmpNode = (CCSynchLockNode *)atomic_load_explicit(&curNode->next, memory_order_acquire); while ((tmpNodeNext=(CCSynchLockNode *)atomic_load_explicit(&tmpNode->next, memory_order_acquire)) != NULL && counter < CCSYNCH_HAND_OFF_LIMIT) { counter = counter + 1; tmpFunPtr = tmpNode->requestFunction; if(tmpFunPtr==NULL){ break; } tmpFunPtr(tmpNode->messageSize, tmpNode->buffer); tmpNode->completed = true; atomic_store_explicit(&tmpNode->wait, 0, memory_order_release); tmpNode = tmpNodeNext; } atomic_store_explicit(&tmpNode->wait, 0, memory_order_release); }
int __system_property_update(prop_info *pi, const char *value, unsigned int len) { prop_area *pa = __system_property_area__; if (len >= PROP_VALUE_MAX) return -1; uint32_t serial = atomic_load_explicit(&pi->serial, memory_order_relaxed); serial |= 1; atomic_store_explicit(&pi->serial, serial, memory_order_relaxed); // The memcpy call here also races. Again pretend it // used memory_order_relaxed atomics, and use the analogous // counterintuitive fence. atomic_thread_fence(memory_order_release); memcpy(pi->value, value, len + 1); atomic_store_explicit( &pi->serial, (len << 24) | ((serial + 1) & 0xffffff), memory_order_release); __futex_wake(&pi->serial, INT32_MAX); atomic_store_explicit( &pa->serial, atomic_load_explicit(&pa->serial, memory_order_relaxed) + 1, memory_order_release); __futex_wake(&pa->serial, INT32_MAX); return 0; }
void* p0(void *) { atomic_store_explicit( &m1, 1, memory_order_relaxed ); atomic_store_explicit( &m2, 1, memory_order_relaxed ); // atomic_thread_fence(memory_order_release); atomic_store_explicit( &s1, 1, memory_order_relaxed ); atomic_store_explicit( &s2, 1, memory_order_relaxed ); return NULL; }
void ponyint_mpmcq_destroy(mpmcq_t* q) { mpmcq_node_t* tail = atomic_load_explicit(&q->tail, memory_order_relaxed); POOL_FREE(mpmcq_node_t, tail); atomic_store_explicit(&q->head, NULL, memory_order_relaxed); atomic_store_explicit(&q->tail, NULL, memory_order_relaxed); }
void ponyint_mpmcq_push(mpmcq_t* q, void* data) { mpmcq_node_t* node = POOL_ALLOC(mpmcq_node_t); atomic_store_explicit(&node->data, data, memory_order_relaxed); atomic_store_explicit(&node->next, NULL, memory_order_relaxed); mpmcq_node_t* prev = atomic_exchange_explicit(&q->head, node, memory_order_relaxed); atomic_store_explicit(&prev->next, node, memory_order_release); }
int main(void) { if( signal(SIGTERM, SIGTERM_handler) == SIG_ERR) perror("signal"), exit(1); // ... atomic_store_explicit( &data, 100, memory_order_relaxed); atomic_signal_fence( memory_order_release); atomic_store_explicit( &guide, 1, memory_order_relaxed); // ... return 0; }
void ponyint_mpmcq_push_single(mpmcq_t* q, void* data) { mpmcq_node_t* node = POOL_ALLOC(mpmcq_node_t); atomic_store_explicit(&node->data, data, memory_order_relaxed); atomic_store_explicit(&node->next, NULL, memory_order_relaxed); // If we have a single producer, the swap of the head need not be atomic RMW. mpmcq_node_t* prev = atomic_load_explicit(&q->head, memory_order_relaxed); atomic_store_explicit(&q->head, node, memory_order_relaxed); atomic_store_explicit(&prev->next, node, memory_order_release); }
void ponyint_messageq_init(messageq_t* q) { pony_msg_t* stub = POOL_ALLOC(pony_msg_t); stub->index = POOL_INDEX(sizeof(pony_msg_t)); atomic_store_explicit(&stub->next, NULL, memory_order_relaxed); atomic_store_explicit(&q->head, (pony_msg_t*)((uintptr_t)stub | 1), memory_order_relaxed); q->tail = stub; #ifndef NDEBUG messageq_size_debug(q); #endif }
bool ponyint_messageq_push(messageq_t* q, pony_msg_t* m) { atomic_store_explicit(&m->next, NULL, memory_order_relaxed); pony_msg_t* prev = atomic_exchange_explicit(&q->head, m, memory_order_relaxed); bool was_empty = ((uintptr_t)prev & 1) != 0; prev = (pony_msg_t*)((uintptr_t)prev & ~(uintptr_t)1); atomic_store_explicit(&prev->next, m, memory_order_release); return was_empty; }
prop_bt *prop_area::find_prop_bt(prop_bt *const bt, const char *name, uint8_t namelen, bool alloc_if_needed) { prop_bt* current = bt; while (true) { if (!current) { return NULL; } const int ret = cmp_prop_name(name, namelen, current->name, current->namelen); if (ret == 0) { return current; } if (ret < 0) { uint_least32_t left_offset = atomic_load_explicit(¤t->left, memory_order_relaxed); if (left_offset != 0) { current = to_prop_bt(¤t->left); } else { if (!alloc_if_needed) { return NULL; } uint_least32_t new_offset; prop_bt* new_bt = new_prop_bt(name, namelen, &new_offset); if (new_bt) { atomic_store_explicit(¤t->left, new_offset, memory_order_release); } return new_bt; } } else { uint_least32_t right_offset = atomic_load_explicit(¤t->right, memory_order_relaxed); if (right_offset != 0) { current = to_prop_bt(¤t->right); } else { if (!alloc_if_needed) { return NULL; } uint_least32_t new_offset; prop_bt* new_bt = new_prop_bt(name, namelen, &new_offset); if (new_bt) { atomic_store_explicit(¤t->right, new_offset, memory_order_release); } return new_bt; } } } }
static void *Worker( void *arg ) { TYPE id = (size_t)arg; uint64_t entry; #ifdef FAST unsigned int cnt = 0, oid = id; #endif // FAST for ( int r = 0; r < RUNS; r += 1 ) { entry = 0; while ( atomic_load(&stop) == 0 ) { atomic_store(&states[id*PADRATIO], LOCKED); while (1) { int lturn = atomic_load(&turn); if (!validate_left(id, lturn)) { atomic_store(&states[id*PADRATIO], WAITING); while (1) { if (validate_left(id, lturn) && lturn == atomic_load_explicit(&turn, memory_order_acquire)) break; Pause(); lturn = atomic_load_explicit(&turn, memory_order_acquire); } atomic_store(&states[id*PADRATIO], LOCKED); continue; } while (lturn == atomic_load_explicit(&turn, memory_order_acquire)) { if (validate_right(id, lturn)) break; Pause(); } if (lturn == atomic_load_explicit(&turn, memory_order_acquire)) break; } CriticalSection( id ); // critical section int lturn = (atomic_load_explicit(&turn, memory_order_relaxed)+1) % N; atomic_store_explicit(&turn, lturn, memory_order_relaxed); atomic_store_explicit(&states[id*PADRATIO], UNLOCKED, memory_order_release); // exit protocol #ifdef FAST id = startpoint( cnt ); // different starting point each experiment cnt = cycleUp( cnt, NoStartPoints ); #endif // FAST entry += 1; } // while #ifdef FAST id = oid; #endif // FAST entries[r][id] = entry; atomic_fetch_add( &Arrived, 1 ); while ( atomic_load(&stop) != 0 ) Pause(); atomic_fetch_add( &Arrived, -1 ); } // for return NULL; } // Worker
void resize(Deque *q) { Array *a = (Array *) atomic_load_explicit(&q->array, memory_order_seq_cst); size_t size=atomic_load_explicit(&a->size, memory_order_seq_cst); size_t new_size=size << 1; Array *new_a = (Array *) calloc(1, new_size * sizeof(atomic_int) + sizeof(Array)); size_t top=atomic_load_explicit(&q->top, memory_order_seq_cst); size_t bottom=atomic_load_explicit(&q->bottom, memory_order_seq_cst); atomic_store_explicit(&new_a->size, new_size, memory_order_seq_cst); size_t i; for(i=top; i < bottom; i++) { atomic_store_explicit(&new_a->buffer[i % new_size], atomic_load_explicit(&a->buffer[i % size], memory_order_seq_cst), memory_order_seq_cst); } atomic_store_explicit(&q->array, (long unsigned int) new_a, memory_order_seq_cst); printf("resize\n"); }
/** * Process upstream p2p bandwidth limits. * * @param[in] sess Session. * @param[in] packet_len Packet length. * @param[in] iph IP header. * @param[in] flow_dir Flow direction. * @return Zero on pass. */ static int packet_process_p2p_ipv4(struct zsession *sess, size_t packet_len, struct ip *iph, struct l4_data *l4, enum flow_dir flow_dir) { if (PROTO_MAX == l4->proto) { return 0; } uint16_t port = ntohs((DIR_UP == flow_dir) ? *l4->dst_port : *l4->src_port); pthread_rwlock_rdlock(&sess->lock_client); // p2p police enabled and port greater than 1024 and not whitelisted if (sess->client->p2p_policy && (port >= 1024) && !utarray_find(&zcfg()->p2p_ports_whitelist, &port, uint16_cmp)) { uint64_t speed = spdm_calc(&sess->client->speed[flow_dir]); // 1/4 of bw limit uint64_t throttle_speed = token_bucket_get_max(&sess->client->band[flow_dir]) / 4; uint64_t diff = zclock(false) - sess->client->last_p2p_throttle; if ((speed > throttle_speed) || (diff < P2P_THROTTLE_TIME)) { unsigned upstream_id = IPTOS_DSCP(iph->ip_tos) >> 2; struct token_bucket *bucket = &zinst()->upstreams[upstream_id].band[flow_dir]; if (0 != token_bucket_update(bucket, packet_len)) { return -1; } struct speed_meter *spd = &zinst()->upstreams[upstream_id].speed[flow_dir]; spdm_update(spd, packet_len); diff = zclock(false) - atomic_load_explicit(&sess->client->last_p2p_throttle, memory_order_acquire); if (diff > P2P_THROTTLE_TIME) { atomic_store_explicit(&sess->client->last_p2p_throttle, zclock(false), memory_order_release); } }
int __system_property_add(const char *name, unsigned int namelen, const char *value, unsigned int valuelen) { prop_area *pa = __system_property_area__; const prop_info *pi; if (namelen >= PROP_NAME_MAX) return -1; if (valuelen >= PROP_VALUE_MAX) return -1; if (namelen < 1) return -1; pi = find_property(root_node(), name, namelen, value, valuelen, true); if (!pi) return -1; // There is only a single mutator, but we want to make sure that // updates are visible to a reader waiting for the update. atomic_store_explicit( &pa->serial, atomic_load_explicit(&pa->serial, memory_order_relaxed) + 1, memory_order_release); __futex_wake(&pa->serial, INT32_MAX); return 0; }
/* Test for consistency on sizes 1, 2, 4, 8, 16 and 32. */ int main () { test_struct c; atomic_store_explicit (&a, zero, memory_order_relaxed); if (memcmp (&a, &zero, size)) abort (); c = atomic_exchange_explicit (&a, ones, memory_order_seq_cst); if (memcmp (&c, &zero, size)) abort (); if (memcmp (&a, &ones, size)) abort (); b = atomic_load_explicit (&a, memory_order_relaxed); if (memcmp (&b, &ones, size)) abort (); if (!atomic_compare_exchange_strong_explicit (&a, &b, zero, memory_order_seq_cst, memory_order_acquire)) abort (); if (memcmp (&a, &zero, size)) abort (); if (atomic_compare_exchange_weak_explicit (&a, &b, ones, memory_order_seq_cst, memory_order_acquire)) abort (); if (memcmp (&b, &zero, size)) abort (); return 0; }
bool ponyint_sched_start(bool library) { this_scheduler = NULL; if(!ponyint_asio_start()) return false; atomic_store_explicit(&detect_quiescence, !library, memory_order_relaxed); uint32_t start = 0; if(library) { pony_register_thread(); } for(uint32_t i = start; i < scheduler_count; i++) { if(!pony_thread_create(&scheduler[i].tid, run_thread, scheduler[i].cpu, &scheduler[i])) return false; } if(!library) { ponyint_sched_shutdown(); } return true; }
TEST(stdatomic, atomic_store) { atomic_int i; atomic_store(&i, 123); ASSERT_EQ(123, atomic_load(&i)); atomic_store_explicit(&i, 123, memory_order_relaxed); ASSERT_EQ(123, atomic_load_explicit(&i, memory_order_relaxed)); }
static void _compile_task(void *data) { struct compile_task_data *tdata = data; log_info("resource_compiler.task", "Compile resource \"%s\" to \"" "%" SDL_PRIX64 "%" SDL_PRIX64 "\"", tdata->source_filename, tdata->type.id, tdata->name.id); if (tdata->compilator(tdata->source_filename, tdata->source, tdata->build, &_compilator_api)) { builddb_set_file(tdata->source_filename, tdata->mtime); builddb_set_file_depend(tdata->source_filename, tdata->source_filename); log_info("resource_compiler.task", "Resource \"%s\" compiled", tdata->source_filename); } else { log_error("resource_compiler.task", "Resource \"%s\" compilation fail", tdata->source_filename); } CEL_DEALLOCATE(memsys_main_scratch_allocator(), tdata->source_filename); cel_vio_close(tdata->source); cel_vio_close(tdata->build); atomic_store_explicit(&tdata->completed, 1, memory_order_release); }
void _queueNode(stpcProxy* proxy, stpcNode* newNode) { sequencedPtr oldTail; sequencedPtr newTail; stpcNode *tailNode, *next; bool rc, rc2; long attempts = 0; newNode->count = GUARD_BIT + 2 * REFERENCE; /* * monkey through the trees queuing trick */ newTail.ptr = newNode; newTail.sequence = 0; oldTail.ival = atomic_load_explicit(&proxy->tail.ival, memory_order_consume); do { attempts++; } while (!atomic_compare_exchange_strong_explicit(&proxy->tail.ival, &oldTail.ival, newTail.ival, memory_order_acq_rel, memory_order_acquire)); atomic_store_explicit(&oldTail.ptr->next, newNode, memory_order_relaxed); // update old node's reference count by number of acquired references, clear guard bit, and drop ref acquired from tail pointer _dropProxyNodeReference(proxy, oldTail.ptr, (oldTail.sequence - GUARD_BIT)); stats_t *stats = stpcGetLocalStats(proxy); stats->tries++; // _addNode invocations stats->attempts += attempts; // tail enqueue attempts }
int mb__system_property_add(const char *name, unsigned int namelen, const char *value, unsigned int valuelen) { if (namelen >= PROP_NAME_MAX) return -1; if (valuelen >= PROP_VALUE_MAX) return -1; if (namelen < 1) return -1; if (!mb__system_property_area__) { return -1; } prop_area* pa = get_prop_area_for_name(name); if (!pa) { LOGE("Access denied adding property \"%s\"", name); return -1; } bool ret = pa->add(name, namelen, value, valuelen); if (!ret) return -1; // There is only a single mutator, but we want to make sure that // updates are visible to a reader waiting for the update. atomic_store_explicit( mb__system_property_area__->serial(), atomic_load_explicit(mb__system_property_area__->serial(), memory_order_relaxed) + 1, memory_order_release); __futex_wake(mb__system_property_area__->serial(), INT32_MAX); return 0; }
void* ponyint_mpmcq_pop(mpmcq_t* q) { size_t my_ticket = atomic_fetch_add_explicit(&q->ticket, 1, memory_order_relaxed); while(my_ticket != atomic_load_explicit(&q->waiting_for, memory_order_relaxed)) ponyint_cpu_relax(); atomic_thread_fence(memory_order_acquire); mpmcq_node_t* tail = atomic_load_explicit(&q->tail, memory_order_relaxed); // Get the next node rather than the tail. The tail is either a stub or has // already been consumed. mpmcq_node_t* next = atomic_load_explicit(&tail->next, memory_order_relaxed); // Bailout if we have no next node. if(next == NULL) { atomic_store_explicit(&q->waiting_for, my_ticket + 1, memory_order_relaxed); return NULL; } atomic_store_explicit(&q->tail, next, memory_order_relaxed); atomic_store_explicit(&q->waiting_for, my_ticket + 1, memory_order_release); // Synchronise-with the push. atomic_thread_fence(memory_order_acquire); // We'll return the data pointer from the next node. void* data = atomic_load_explicit(&next->data, memory_order_relaxed); // Since we will be freeing the old tail, we need to be sure no other // consumer is still reading the old tail. To do this, we set the data // pointer of our new tail to NULL, and we wait until the data pointer of // the old tail is NULL. atomic_store_explicit(&next->data, NULL, memory_order_release); while(atomic_load_explicit(&tail->data, memory_order_relaxed) != NULL) ponyint_cpu_relax(); atomic_thread_fence(memory_order_acquire); // Free the old tail. The new tail is the next node. POOL_FREE(mpmcq_node_t, tail); return data; }