AE_FORCEINLINE void fence(memory_order order) { // Non-specialized arch, use heavier memory barriers everywhere just in case :-( switch (order) { case memory_order_relaxed: break; case memory_order_acquire: _ReadBarrier(); AeLiteSync(); _ReadBarrier(); break; case memory_order_release: _WriteBarrier(); AeLiteSync(); _WriteBarrier(); break; case memory_order_acq_rel: _ReadWriteBarrier(); AeLiteSync(); _ReadWriteBarrier(); break; case memory_order_seq_cst: _ReadWriteBarrier(); AeFullSync(); _ReadWriteBarrier(); break; default: assert(false); } }
int thread_once(thread_control_t *control, void (*callback)(void)) { #ifdef ACE_WINDOWS int state = (int)(*control); _ReadWriteBarrier(); while (state != 1) { if ((!state) && (!_InterlockedCompareExchange(control, 2, 0))) { callback(); *control = 1; return 0; } YieldProcessor(); _ReadWriteBarrier(); state = (int)(*control); } return 0; #else return pthread_once(control, callback); #endif }
int pthread_once(pthread_once_t *once, void (*func)(void)) { long state = *once; _ReadWriteBarrier(); while (state != 1) { if (!state) { if (!InterlockedCompareExchange(once, 2, 0)) { func(); *once = 1; return 0; } } YieldProcessor(); _ReadWriteBarrier(); state = *once; } return 0; }
int _pthread_once_raw(pthread_once_t *o, void (*func)(void)) { long state = *o; _ReadWriteBarrier(); while (state != 1) { if (!state) { if (!_InterlockedCompareExchange(o, 2, 0)) { /* Success */ func(); /* Mark as done */ *o = 1; return 0; } } YieldProcessor(); _ReadWriteBarrier(); state = *o; } /* Done */ return 0; }
inline T load(MemoryOrder order = MEMORY_ORDER_SEQ_CST) const volatile { assert(order != MEMORY_ORDER_RELEASE); assert(order != MEMORY_ORDER_CONSUME); assert(order != MEMORY_ORDER_ACQ_REL); _ReadWriteBarrier(); T result = static_cast<T>(value_); _ReadWriteBarrier(); return result; }
/** Performs a 32-bit write to the specified, possibly unaligned I/O-type address. Writes the 32-bit I/O port specified by Port with the value specified by Value and returns Value. This function must guarantee that all I/O read and write operations are serialized. If 32-bit unaligned I/O port operations are not supported, then ASSERT(). @param[in] Port I/O port address @param[in] Value 32-bit word to write @return The value written to the I/O port. **/ UINT32 UnalignedIoWrite32 ( IN UINTN Port, IN UINT32 Value ) { _ReadWriteBarrier (); _outpd ((UINT16)Port, Value); _ReadWriteBarrier (); return Value; }
NTSTATUS v2v_stream_send(struct v2v_stream *stream, const void *buf, size_t buf_len, size_t *_bytes_sent) { NTSTATUS status = STATUS_SUCCESS; unsigned bytes_sent; unsigned short bytes_this_time; unsigned bytes_avail; volatile void *msg; HANDLE handles[3]; handles[0] = stream->send_event; handles[1] = stream->control_event; handles[2] = stream->receive_event; for (bytes_sent = 0; bytes_sent < buf_len; bytes_sent += bytes_this_time){ bytes_avail = v2v_nc2_producer_bytes_available(stream->channel); if (v2v_nc2_remote_requested_fast_wakeup(stream->channel)) bytes_avail = MIN(MAX_INLINE_BYTES, bytes_avail); bytes_this_time = (unsigned short)MIN(bytes_avail, buf_len - bytes_sent); status = v2v_nc2_prep_message(stream->channel, bytes_this_time, STREAM_MSG_DATA, 0, &msg); if (!NT_SUCCESS(status)) { if (status == STATUS_RETRY) { v2v_pull_incoming_messages(stream); if (bytes_sent != 0) v2v_nc2_send_messages(stream->channel); status = KeWaitForMultipleObjects(3, handles, WaitAny, Executive, KernelMode, FALSE, NULL, NULL); if (status == STATUS_WAIT_0 + 1 && v2v_remote_disconnect(stream)) { status = STATUS_VIRTUAL_CIRCUIT_CLOSED; break; } bytes_this_time = 0; continue; } break; /* end send with error status */ } _ReadWriteBarrier(); RtlCopyMemory((void *)msg, (const void *)((ULONG_PTR)buf + bytes_sent), bytes_this_time); _ReadWriteBarrier(); } if (bytes_sent != 0) v2v_nc2_send_messages(stream->channel); *_bytes_sent = bytes_sent; return status; }
AE_FORCEINLINE void compiler_fence(memory_order order) { switch (order) { case memory_order_relaxed: break; case memory_order_acquire: _ReadBarrier(); break; case memory_order_release: _WriteBarrier(); break; case memory_order_acq_rel: _ReadWriteBarrier(); break; case memory_order_seq_cst: _ReadWriteBarrier(); break; default: assert(false); } }
/** Reads a 32-bit word from the specified, possibly unaligned I/O-type address. Reads the 32-bit I/O port specified by Port. The 32-bit read value is returned. This function must guarantee that all I/O read and write operations are serialized. If 32-bit unaligned I/O port operations are not supported, then ASSERT(). @param[in] Port The I/O port to read. @return The value read. **/ UINT32 UnalignedIoRead32 ( IN UINTN Port ) { UINT32 Value; _ReadWriteBarrier (); Value = _inpd ((UINT16)Port); _ReadWriteBarrier (); return Value; }
inline void store(T value, MemoryOrder order = MEMORY_ORDER_SEQ_CST) volatile { assert(order != MEMORY_ORDER_ACQUIRE); assert(order != MEMORY_ORDER_CONSUME); assert(order != MEMORY_ORDER_ACQ_REL); if (order != MEMORY_ORDER_SEQ_CST) { _ReadWriteBarrier(); value_ = static_cast<ImplType>(value); _ReadWriteBarrier(); } else { Impl::exchange(value_, static_cast<ImplType>(value)); } }
/** Writes an 8-bit I/O port. Writes the 8-bit I/O port specified by Port with the value specified by Value and returns Value. This function must guarantee that all I/O read and write operations are serialized. If 8-bit I/O port operations are not supported, then ASSERT(). @param Port The I/O port to write. @param Value The value to write to the I/O port. @return The value written to the I/O port. **/ UINT8 EFIAPI IoWrite8 ( IN UINTN Port, IN UINT8 Value ) { _ReadWriteBarrier (); (UINT8)_outp ((UINT16)Port, Value); _ReadWriteBarrier (); return Value; }
BOOL v2v_stream_send(struct v2v_stream *stream, const void *buf, size_t buf_len, size_t *_bytes_sent) { unsigned bytes_sent; unsigned short bytes_this_time; unsigned bytes_avail; volatile void *msg; BOOL res; HANDLE handles[3]; DWORD status; handles[0] = stream->send_event; handles[1] = stream->control_event; handles[2] = stream->receive_event; res = TRUE; for (bytes_sent = 0; bytes_sent < buf_len; bytes_sent += bytes_this_time) { bytes_avail = v2v_nc2_producer_bytes_available(stream->channel); if (v2v_nc2_remote_requested_fast_wakeup(stream->channel)) bytes_avail = MIN(MAX_INLINE_BYTES, bytes_avail); bytes_this_time = (unsigned short)MIN(bytes_avail, buf_len - bytes_sent); if (!v2v_nc2_prep_message(stream->channel, bytes_this_time, STREAM_MSG_DATA, 0, &msg)) { if (GetLastError() == ERROR_RETRY) { pull_incoming_messages(stream); if (bytes_sent != 0) v2v_nc2_send_messages(stream->channel); status = WaitForMultipleObjects(3, handles, FALSE, INFINITE); if (status == WAIT_OBJECT_0 + 1 && remote_disconnect(stream)) { SetLastError(ERROR_VC_DISCONNECTED); res = FALSE; break; } bytes_this_time = 0; continue; } res = FALSE; break; } _ReadWriteBarrier(); memcpy((void *)msg, (const void *)((ULONG_PTR)buf + bytes_sent), bytes_this_time); _ReadWriteBarrier(); } if (bytes_sent != 0) v2v_nc2_send_messages(stream->channel); *_bytes_sent = bytes_sent; return res; }
/** Reads an 8-bit I/O port. Reads the 8-bit I/O port specified by Port. The 8-bit read value is returned. This function must guarantee that all I/O read and write operations are serialized. If 8-bit I/O port operations are not supported, then ASSERT(). @param Port The I/O port to read. @return The value read. **/ UINT8 EFIAPI IoRead8 ( IN UINTN Port ) { UINT8 Value; _ReadWriteBarrier (); Value = (UINT8)_inp ((UINT16)Port); _ReadWriteBarrier (); return Value; }
/** Writes a 16-bit I/O port. Writes the 16-bit I/O port specified by Port with the value specified by Value and returns Value. This function must guarantee that all I/O read and write operations are serialized. If 16-bit I/O port operations are not supported, then ASSERT(). If Port is not aligned on a 16-bit boundary, then ASSERT(). @param Port The I/O port to write. @param Value The value to write to the I/O port. @return The value written to the I/O port. **/ UINT16 EFIAPI IoWrite16 ( IN UINTN Port, IN UINT16 Value ) { ASSERT ((Port & 1) == 0); _ReadWriteBarrier (); _outpw ((UINT16)Port, Value); _ReadWriteBarrier (); return Value; }
EFIAPI InitializeSpinLock ( OUT SPIN_LOCK *SpinLock ) { ASSERT (SpinLock != NULL); _ReadWriteBarrier(); *SpinLock = SPIN_LOCK_RELEASED; _ReadWriteBarrier(); return SpinLock; }
/** Writes a 32-bit I/O port. Writes the 32-bit I/O port specified by Port with the value specified by Value and returns Value. This function must guarantee that all I/O read and write operations are serialized. If 32-bit I/O port operations are not supported, then ASSERT(). If Port is not aligned on a 32-bit boundary, then ASSERT(). @param Port The I/O port to write. @param Value The value to write to the I/O port. @return The value written to the I/O port. **/ UINT32 EFIAPI IoWrite32 ( IN UINTN Port, IN UINT32 Value ) { ASSERT ((Port & 3) == 0); _ReadWriteBarrier (); _outpd ((UINT16)Port, Value); _ReadWriteBarrier (); return Value; }
/* This is a bit skanky. If we're transmitting, and we need to block because the ring's full, we first pull all of the *incoming* messages off of the ring into local buffers. This unblocks the remote, which helps to avoid deadlocks. */ static void v2v_pull_incoming_messages(struct v2v_stream *stream) { struct queued_message *qm; const volatile void *payload; size_t size; unsigned type; unsigned flags; /* If we're processing an in-ring message, copy it out of the ring and into the local queue. */ if (stream->recv_state.current_message == &stream->recv_state.in_ring_message) { qm = ExAllocatePoolWithTag((stream->nonpaged ? NonPagedPool : PagedPool), sizeof(*qm) + stream->recv_state.in_ring_message.size, V2V_TAG); *qm = stream->recv_state.in_ring_message; qm->payload = qm + 1; _ReadWriteBarrier(); RtlCopyMemory(qm->payload, stream->recv_state.in_ring_message.payload, qm->size); _ReadWriteBarrier(); stream->recv_state.current_message = qm; v2v_nc2_finish_message(stream->channel); stream->recv_state.current_message = qm; } /* Pull all of the messages out of the ring and into the local queue. */ while (v2v_nc2_get_message(stream->channel, &payload, &size, &type, &flags) == STATUS_SUCCESS) { qm = ExAllocatePoolWithTag((stream->nonpaged ? NonPagedPool : PagedPool), sizeof(*qm) + size, V2V_TAG); qm->next = NULL; qm->size = size; qm->type = type; qm->flags = flags; qm->bytes_already_used = 0; qm->payload = qm + 1; _ReadWriteBarrier(); RtlCopyMemory(qm->payload, (const void *)payload, size); _ReadWriteBarrier(); v2v_nc2_finish_message(stream->channel); *stream->recv_state.queue.tail = qm; stream->recv_state.queue.tail = &qm->next; } }
/** Reads a 32-bit I/O port. Reads the 32-bit I/O port specified by Port. The 32-bit read value is returned. This function must guarantee that all I/O read and write operations are serialized. If 32-bit I/O port operations are not supported, then ASSERT(). If Port is not aligned on a 32-bit boundary, then ASSERT(). @param Port The I/O port to read. @return The value read. **/ UINT32 EFIAPI IoRead32 ( IN UINTN Port ) { UINT32 Value; ASSERT ((Port & 3) == 0); _ReadWriteBarrier (); Value = _inpd ((UINT16)Port); _ReadWriteBarrier (); return Value; }
/** Reads a 16-bit I/O port. Reads the 16-bit I/O port specified by Port. The 16-bit read value is returned. This function must guarantee that all I/O read and write operations are serialized. If 16-bit I/O port operations are not supported, then ASSERT(). If Port is not aligned on a 16-bit boundary, then ASSERT(). @param Port The I/O port to read. @return The value read. **/ UINT16 EFIAPI IoRead16 ( IN UINTN Port ) { UINT16 Value; ASSERT ((Port & 1) == 0); _ReadWriteBarrier (); Value = _inpw ((UINT16)Port); _ReadWriteBarrier (); return Value; }
/* This is a bit skanky. If we're transmitting, and we need to block because the ring's full, we first pull all of the *incoming* messages off of the ring into local buffers. This unblocks the remote, which helps to avoid deadlocks. */ static void pull_incoming_messages(struct v2v_stream *stream) { struct queued_message *qm; const volatile void *payload; size_t size; unsigned type; unsigned flags; /* If we're processing an in-ring message, copy it out of the ring and into the local queue. */ if (stream->recv_state.current_message == &stream->recv_state.in_ring_message) { qm = HeapAlloc(GetProcessHeap(), 0, sizeof(*qm) + stream->recv_state.in_ring_message.size); *qm = stream->recv_state.in_ring_message; qm->payload = qm + 1; _ReadWriteBarrier(); memcpy(qm->payload, stream->recv_state.in_ring_message.payload, qm->size); _ReadWriteBarrier(); stream->recv_state.current_message = qm; v2v_nc2_finish_message(stream->channel); stream->recv_state.current_message = qm; } /* Pull all of the messages out of the ring and into the local queue. */ while (v2v_nc2_get_message(stream->channel, &payload, &size, &type, &flags)) { qm = HeapAlloc(GetProcessHeap(), 0, sizeof(*qm) + size); qm->next = NULL; qm->size = size; qm->type = type; qm->flags = flags; qm->bytes_already_used = 0; qm->payload = qm + 1; _ReadWriteBarrier(); memcpy(qm->payload, (const void *)payload, size); _ReadWriteBarrier(); v2v_nc2_finish_message(stream->channel); *stream->recv_state.queue.tail = qm; stream->recv_state.queue.tail = &qm->next; } }
///@todo Combine this with QueueDraw void QueueDispatch(SWR_CONTEXT *pContext) { _ReadWriteBarrier(); pContext->DrawEnqueued++; if (KNOB_SINGLE_THREADED) { // flush denormals to 0 uint32_t mxcsr = _mm_getcsr(); _mm_setcsr(mxcsr | _MM_FLUSH_ZERO_ON | _MM_DENORMALS_ZERO_ON); WorkOnCompute(pContext, 0, pContext->WorkerBE[0]); // restore csr _mm_setcsr(mxcsr); } else { RDTSC_START(APIDrawWakeAllThreads); WakeAllThreads(pContext); RDTSC_STOP(APIDrawWakeAllThreads, 1, 0); } // Set current draw context to NULL so that next state call forces a new draw context to be created and populated. pContext->pPrevDrawContext = pContext->pCurDrawContext; pContext->pCurDrawContext = nullptr; }
//! Atomically read an boost::uint32_t from memory inline boost::uint32_t atomic_read32(volatile boost::uint32_t *mem) { //Patched for Safir SDK Core const boost::uint32_t val = *mem; _ReadWriteBarrier(); return val; }
////////////////////////////////////////////////////////////////////////// /// @brief Called when FE work is complete for this DC. INLINE void CompleteDrawFE(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC) { _ReadWriteBarrier(); if (pContext->pfnUpdateStatsFE && GetApiState(pDC).enableStats) { pContext->pfnUpdateStatsFE(GetPrivateState(pDC), &pDC->dynState.statsFE); } if (pContext->pfnUpdateSoWriteOffset) { for (uint32_t i = 0; i < MAX_SO_BUFFERS; ++i) { if ((pDC->dynState.SoWriteOffsetDirty[i]) && (pDC->pState->state.soBuffer[i].soWriteEnable)) { pContext->pfnUpdateSoWriteOffset(GetPrivateState(pDC), i, pDC->dynState.SoWriteOffset[i]); } } } pDC->doneFE = true; InterlockedDecrement((volatile LONG*)&pContext->drawsOutstandingFE); }
INLINE int64_t CompleteDrawContext(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC) { int64_t result = InterlockedDecrement64(&pDC->threadsDone); SWR_ASSERT(result >= 0); if (result == 0) { // Cleanup memory allocations pDC->pArena->Reset(true); if (!pDC->isCompute) { pDC->pTileMgr->initialize(); } if (pDC->cleanupState) { pDC->pState->pArena->Reset(true); } _ReadWriteBarrier(); pContext->dcRing.Dequeue(); // Remove from tail } return result; }
int _spin_lite_lock(spin_t *l) { CHECK_SPINLOCK_LITE(l); int lscnt = 0; _vol_spinlock v; v.l = (LONG *)&l->l; _spin_lite_lock_inc(bscnt); while (InterlockedExchange(v.lv, EBUSY)) { _spin_lite_lock_cnt(lscnt); /* Don't lock the bus whilst waiting */ while (*v.lv) { _spin_lite_lock_cnt(lscnt); YieldProcessor(); /* Compiler barrier. Prevent caching of *l */ _ReadWriteBarrier(); } } _spin_lite_lock_dec(bscnt); _spin_lite_lock_stat(lscnt); return 0; }
// inlined-only version INLINE int32_t CompleteDrawContextInl(SWR_CONTEXT* pContext, uint32_t workerId, DRAW_CONTEXT* pDC) { int32_t result = static_cast<int32_t>(InterlockedDecrement(&pDC->threadsDone)); SWR_ASSERT(result >= 0); AR_FLUSH(pDC->drawId); if (result == 0) { ExecuteCallbacks(pContext, workerId, pDC); // Cleanup memory allocations pDC->pArena->Reset(true); if (!pDC->isCompute) { pDC->pTileMgr->initialize(); } if (pDC->cleanupState) { pDC->pState->pArena->Reset(true); } _ReadWriteBarrier(); pContext->dcRing.Dequeue(); // Remove from tail } return result; }
static inline int _InitWaitCriticalSection(RTL_CRITICAL_SECTION *prc) { int r = 0; HANDLE evt; LONG LockCount = prc->LockCount; r = 0; if (!prc->OwningThread || !prc->RecursionCount || (LockCount & 1)) { /* not locked (anymore), caller should redo trylock sequence: */ return EAGAIN; } else { _ReadWriteBarrier(); if( LockCount != InterlockedCompareExchange(&prc->LockCount, LockCount+LockDelta, LockCount) ) { /* recheck here too: */ return EAGAIN; } } if ( !prc->LockSemaphore) { if (!(evt = CreateEvent(NULL,FALSE,FALSE,NULL)) ) { InterlockedExchangeAdd(&prc->LockCount, -LockDelta); return ENOMEM; } if(InterlockedCompareExchangePointer(&prc->LockSemaphore,evt,NULL)) { /* someone sneaked in between, keep the original: */ CloseHandle(evt); } } return r; }
unsigned int pthread_create_wrapper(void *args) { struct _pthread_v *tv = (struct _pthread_v*)args; _pthread_once_raw(&_pthread_tls_once, pthread_tls_init); TlsSetValue(_pthread_tls, tv); if (!setjmp(tv->jb)) { /* Call function and save return value */ tv->ret_arg = tv->func(tv->ret_arg); /* Clean up destructors */ _pthread_cleanup_dest(tv); } /* If we exit too early, then we can race with create */ while (tv->h == (HANDLE) -1) { YieldProcessor(); _ReadWriteBarrier(); } /* Make sure we free ourselves if we are detached */ if (!tv->h) { if(tv->keyval) free(tv->keyval); free(tv); } return 0; }
void thread_create(thread_t *thread) { #ifdef ACE_WINDOWS thread->thread = (HANDLE)-1; _ReadWriteBarrier(); thread->thread = (HANDLE)_beginthreadex(NULL, 0, &internal_win32_callback_wrapper, NULL, 0, &thread->thread_id); #else pthread_attr_init(&thread->thread_attr); if (thread->flags & ACE_THREAD_JOINABLE) { pthread_attr_setdetachstate(&thread->thread_attr, PTHREAD_CREATE_JOINABLE); } if (pthread_create(&thread->thread, &thread->thread_attr, &internal_linux_callback_wrapper, (void *)thread)) { exit(ENOMEM); } pthread_attr_destroy(&thread->thread_attr); #endif if (thread->flags & ACE_THREAD_DETACHED) { thread_detach(thread); } }
void unlock() { #if BOOST_WINDOWS _ReadWriteBarrier(); *const_cast< long volatile* >( &v_ ) = 0; #else __sync_lock_release( &v_ ); #endif }