/* * Wake up the oldest process sleeping on the CV, if there is any. * * Note: it's difficult to tell whether this has any real effect: we know * whether we took an entry off the list, but the entry might only be a * sentinel. Hence, think twice before proposing that this should return * a flag telling whether it woke somebody. */ void ConditionVariableSignal(ConditionVariable *cv) { PGPROC *proc = NULL; /* Remove the first process from the wakeup queue (if any). */ SpinLockAcquire(&cv->mutex); if (!proclist_is_empty(&cv->wakeup)) proc = proclist_pop_head_node(&cv->wakeup, cvWaitLink); SpinLockRelease(&cv->mutex); /* If we found someone sleeping, set their latch to wake them up. */ if (proc != NULL) SetLatch(&proc->procLatch); }
void SubscribeTerminate() { LIST_ITEM *pTrapReferenceListItem; _DBG_ENTER_LVL(_DBG_LVL_FUNC_TRACE, SubscribeTerminate); SpinLockAcquire(&SubscribedTrapsListLock); SubscribeInitialized = 0; SpinLockRelease(&SubscribedTrapsListLock); iba_sd_deregister(SdClientHandle); /* no more SD callbacks will occur now, so OPQUEUED is not important */ SpinLockAcquire(&SubscribedTrapsListLock); while ( NULL != (pTrapReferenceListItem = QListRemoveHead(&SubscribedTrapsList))) { TRAP_REFERENCE *pTrapReference = (TRAP_REFERENCE *)QListObj(pTrapReferenceListItem); ASSERT(pTrapReference != NULL); TimerStop(&pTrapReference->Timer); // release lock so TimerDestroy can wait for callback SpinLockRelease(&SubscribedTrapsListLock); TimerDestroy(&pTrapReference->Timer); MemoryDeallocate(pTrapReference); SpinLockAcquire(&SubscribedTrapsListLock); } SpinLockRelease(&SubscribedTrapsListLock); QListDestroy(&SubscribedTrapsList); SpinLockDestroy(&SubscribedTrapsListLock); _DBG_LEAVE_LVL( _DBG_LVL_FUNC_TRACE ); }
/* * Set the identity of the process that will receive from a shared message * queue. */ void shm_mq_set_receiver(shm_mq *mq, PGPROC *proc) { volatile shm_mq *vmq = mq; PGPROC *sender; SpinLockAcquire(&mq->mq_mutex); Assert(vmq->mq_receiver == NULL); vmq->mq_receiver = proc; sender = vmq->mq_sender; SpinLockRelease(&mq->mq_mutex); if (sender != NULL) SetLatch(&sender->procLatch); }
/* Set state for current walsender (only called in walsender) */ void WalSndSetState(WalSndState state) { /* use volatile pointer to prevent code rearrangement */ volatile WalSnd *walsnd = MyWalSnd; Assert(am_walsender); if (walsnd->state == state) return; SpinLockAcquire(&walsnd->mutex); walsnd->state = state; SpinLockRelease(&walsnd->mutex); }
/* * Returns the last+1 byte position that walreceiver has written. * * Optionally, returns the previous chunk start, that is the first byte * written in the most recent walreceiver flush cycle. Callers not * interested in that value may pass NULL for latestChunkStart. */ XLogRecPtr GetWalRcvWriteRecPtr(XLogRecPtr *latestChunkStart) { /* use volatile pointer to prevent code rearrangement */ volatile WalRcvData *walrcv = WalRcv; XLogRecPtr recptr; SpinLockAcquire(&walrcv->mutex); recptr = walrcv->receivedUpto; if (latestChunkStart) *latestChunkStart = walrcv->latestChunkStart; SpinLockRelease(&walrcv->mutex); return recptr; }
/* * Convert a slot that's marked as RS_EPHEMERAL to a RS_PERSISTENT slot, * guaranteeing it will be there after an eventual crash. */ void ReplicationSlotPersist(void) { ReplicationSlot *slot = MyReplicationSlot; Assert(slot != NULL); Assert(slot->data.persistency != RS_PERSISTENT); SpinLockAcquire(&slot->mutex); slot->data.persistency = RS_PERSISTENT; SpinLockRelease(&slot->mutex); ReplicationSlotMarkDirty(); ReplicationSlotSave(); }
/* * Cancel any pending sleep operation. * * We just need to remove ourselves from the wait queue of any condition * variable for which we have previously prepared a sleep. * * Do nothing if nothing is pending; this allows this function to be called * during transaction abort to clean up any unfinished CV sleep. */ void ConditionVariableCancelSleep(void) { ConditionVariable *cv = cv_sleep_target; if (cv == NULL) return; SpinLockAcquire(&cv->mutex); if (proclist_contains(&cv->wakeup, MyProc->pgprocno, cvWaitLink)) proclist_delete(&cv->wakeup, MyProc->pgprocno, cvWaitLink); SpinLockRelease(&cv->mutex); cv_sleep_target = NULL; }
/* * LWLockWaitCancel - cancel currently waiting on LW lock * * Used to clean up before immediate exit in certain very special situations * like shutdown request to Filerep Resync Manger or Workers. Although this is * not the best practice it is necessary to avoid any starvation situations * during filerep transition situations (Resync Mode -> Changetracking mode) * * Note:- This function should not be used for normal situations. It is strictly * written for very special situations. If you need to use this, you may want * to re-think your design. */ void LWLockWaitCancel(void) { volatile PGPROC *proc = MyProc; volatile LWLock *lwWaitingLock = NULL; /* We better have a PGPROC structure */ Assert(proc != NULL); /* If we're not waiting on any LWLock then nothing doing here */ if (!proc->lwWaiting) return; lwWaitingLock = &(LWLockArray[lwWaitingLockId].lock); /* Protect from other modifiers */ SpinLockAcquire(&lwWaitingLock->mutex); PGPROC *currProc = lwWaitingLock->head; /* Search our PROC in the waiters list and remove it */ if (proc == lwWaitingLock->head) { lwWaitingLock->head = currProc = proc->lwWaitLink; proc->lwWaitLink = NULL; } else { while(currProc != NULL) { if (currProc->lwWaitLink == proc) { currProc->lwWaitLink = proc->lwWaitLink; proc->lwWaitLink = NULL; break; } currProc = currProc->lwWaitLink; } } if (lwWaitingLock->tail == proc) lwWaitingLock->tail = currProc; /* Done with modification */ SpinLockRelease(&lwWaitingLock->mutex); return; }
/* * ShmemAlloc -- allocate max-aligned chunk from shared memory * * Assumes ShmemLock and ShmemSegHdr are initialized. * * Returns: real pointer to memory or NULL if we are out * of space. Has to return a real pointer in order * to be compatible with malloc(). */ void * ShmemAlloc(Size size) { Size newStart; Size newFree; void *newSpace; /* * Ensure all space is adequately aligned. We used to only MAXALIGN this * space but experience has proved that on modern systems that is not good * enough. Many parts of the system are very sensitive to critical data * structures getting split across cache line boundaries. To avoid that, * attempt to align the beginning of the allocation to a cache line * boundary. The calling code will still need to be careful about how it * uses the allocated space - e.g. by padding each element in an array of * structures out to a power-of-two size - but without this, even that * won't be sufficient. */ size = CACHELINEALIGN(size); Assert(ShmemSegHdr != NULL); SpinLockAcquire(ShmemLock); newStart = ShmemSegHdr->freeoffset; /* extra alignment for large requests, since they are probably buffers */ if (size >= BLCKSZ) newStart = BUFFERALIGN(newStart); newFree = newStart + size; if (newFree <= ShmemSegHdr->totalsize) { newSpace = (void *) ((char *) ShmemBase + newStart); ShmemSegHdr->freeoffset = newFree; } else newSpace = NULL; SpinLockRelease(ShmemLock); if (!newSpace) ereport(WARNING, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of shared memory"))); return newSpace; }
/* * Compute the oldest xmin across all slots and store it in the ProcArray. */ void ReplicationSlotsComputeRequiredXmin(bool already_locked) { int i; TransactionId agg_xmin = InvalidTransactionId; TransactionId agg_catalog_xmin = InvalidTransactionId; Assert(ReplicationSlotCtl != NULL); if (!already_locked) LWLockAcquire(ReplicationSlotControlLock, LW_SHARED); for (i = 0; i < max_replication_slots; i++) { ReplicationSlot *s = &ReplicationSlotCtl->replication_slots[i]; TransactionId effective_xmin; TransactionId effective_catalog_xmin; if (!s->in_use) continue; { volatile ReplicationSlot *vslot = s; SpinLockAcquire(&s->mutex); effective_xmin = vslot->effective_xmin; effective_catalog_xmin = vslot->effective_catalog_xmin; SpinLockRelease(&s->mutex); } /* check the data xmin */ if (TransactionIdIsValid(effective_xmin) && (!TransactionIdIsValid(agg_xmin) || TransactionIdPrecedes(effective_xmin, agg_xmin))) agg_xmin = effective_xmin; /* check the catalog xmin */ if (TransactionIdIsValid(effective_catalog_xmin) && (!TransactionIdIsValid(agg_catalog_xmin) || TransactionIdPrecedes(effective_catalog_xmin, agg_catalog_xmin))) agg_catalog_xmin = effective_catalog_xmin; } if (!already_locked) LWLockRelease(ReplicationSlotControlLock); ProcArraySetReplicationSlotXmin(agg_xmin, agg_catalog_xmin, already_locked); }
/* * Release a replication slot, this or another backend can ReAcquire it * later. Resources this slot requires will be preserved. */ void ReplicationSlotRelease(void) { ReplicationSlot *slot = MyReplicationSlot; Assert(slot != NULL && slot->active); /* Mark slot inactive. We're not freeing it, just disconnecting. */ { volatile ReplicationSlot *vslot = slot; SpinLockAcquire(&slot->mutex); vslot->active = false; SpinLockRelease(&slot->mutex); MyReplicationSlot = NULL; } }
/* * Returns the last+1 byte position that walreceiver has written. * * Optionally, returns the previous chunk start, that is the first byte * written in the most recent walreceiver flush cycle. Callers not * interested in that value may pass NULL for latestChunkStart. Same for * receiveTLI. */ XLogRecPtr GetWalRcvWriteRecPtr(XLogRecPtr *latestChunkStart, TimeLineID *receiveTLI) { WalRcvData *walrcv = WalRcv; XLogRecPtr recptr; SpinLockAcquire(&walrcv->mutex); recptr = walrcv->receivedUpto; if (latestChunkStart) *latestChunkStart = walrcv->latestChunkStart; if (receiveTLI) *receiveTLI = walrcv->receivedTLI; SpinLockRelease(&walrcv->mutex); return recptr; }
// callback when SA query completes or fails static void ProcessTrapSubscribeResponse(void* Context, FABRIC_OPERATION_DATA* pFabOp, FSTATUS Status, uint32 MadStatus) { TRAP_REFERENCE *pTrapReference = (TRAP_REFERENCE *)Context; _DBG_ENTER_LVL(_DBG_LVL_FUNC_TRACE, ProcessTrapSubscribeResponse); SpinLockAcquire(&SubscribedTrapsListLock); if (SubscribeInitialized == 0) goto unlock; // we are shutting down, don't start anything new pTrapReference->Flags = (TRAP_REFERENCE_FLAGS)(pTrapReference->Flags & ~TRAP_REF_FLAGS_OPQUEUED); if (Status != FSUCCESS) { // our attempt failed if (pFabOp->Value.InformInfo.Subscribe == 0) { _DBG_WARN( ("Trap Unsubscribe Failure. Status = %s, MadStatus = 0x%x: %s.\n", _DBG_PTR(iba_fstatus_msg(Status)), MadStatus, _DBG_PTR(iba_sd_mad_status_msg(MadStatus))) ); } else { _DBG_WARN( ("Trap Subscribe Failure. Status = %s, MadStatus = 0x%x: %s.\n", _DBG_PTR(iba_fstatus_msg(Status)), MadStatus, _DBG_PTR(iba_sd_mad_status_msg(MadStatus))) ); } // delay any retry or adjustment so we don't beat on SA too hard if (MadStatus == MAD_STATUS_BUSY) { ProcessTrapStateChange(pTrapReference, (SystemGetRandom() % PORT_DOWN_DELAY)); _DBG_PRINT(_DBG_LVL_INFO, ("SM Returned Busy. Delaying Subscription By %ld MS.\n", (SystemGetRandom() % PORT_DOWN_DELAY))); } else { ProcessTrapStateChange(pTrapReference, RETRY_DELAY); } } else { if (pFabOp->Value.InformInfo.Subscribe == 1) { /* We are now subscribed with the SA */ pTrapReference->Flags = (TRAP_REFERENCE_FLAGS)(pTrapReference->Flags | TRAP_REF_FLAGS_SUBSCRIBED); } else { /* We are now completely unsubscribed from the SA */ // flag was cleared when we issued unsubscribe request } ProcessTrapStateChange(pTrapReference, 0); } unlock: SpinLockRelease(&SubscribedTrapsListLock); _DBG_LEAVE_LVL( _DBG_LVL_FUNC_TRACE ); }
/* * Prepare to wait on a given condition variable. * * This can optionally be called before entering a test/sleep loop. * Doing so is more efficient if we'll need to sleep at least once. * However, if the first test of the exit condition is likely to succeed, * it's more efficient to omit the ConditionVariablePrepareToSleep call. * See comments in ConditionVariableSleep for more detail. * * Caution: "before entering the loop" means you *must* test the exit * condition between calling ConditionVariablePrepareToSleep and calling * ConditionVariableSleep. If that is inconvenient, omit calling * ConditionVariablePrepareToSleep. */ void ConditionVariablePrepareToSleep(ConditionVariable *cv) { int pgprocno = MyProc->pgprocno; /* * If first time through in this process, create a WaitEventSet, which * we'll reuse for all condition variable sleeps. */ if (cv_wait_event_set == NULL) { WaitEventSet *new_event_set; new_event_set = CreateWaitEventSet(TopMemoryContext, 2); AddWaitEventToSet(new_event_set, WL_LATCH_SET, PGINVALID_SOCKET, MyLatch, NULL); AddWaitEventToSet(new_event_set, WL_EXIT_ON_PM_DEATH, PGINVALID_SOCKET, NULL, NULL); /* Don't set cv_wait_event_set until we have a correct WES. */ cv_wait_event_set = new_event_set; } /* * If some other sleep is already prepared, cancel it; this is necessary * because we have just one static variable tracking the prepared sleep, * and also only one cvWaitLink in our PGPROC. It's okay to do this * because whenever control does return to the other test-and-sleep loop, * its ConditionVariableSleep call will just re-establish that sleep as * the prepared one. */ if (cv_sleep_target != NULL) ConditionVariableCancelSleep(); /* Record the condition variable on which we will sleep. */ cv_sleep_target = cv; /* * Reset my latch before adding myself to the queue, to ensure that we * don't miss a wakeup that occurs immediately. */ ResetLatch(MyLatch); /* Add myself to the wait queue. */ SpinLockAcquire(&cv->mutex); proclist_push_tail(&cv->wakeup, pgprocno, cvWaitLink); SpinLockRelease(&cv->mutex); }
/* * StrategySyncStart -- tell BufferSync where to start syncing * * The result is the buffer index of the best buffer to sync first. * BufferSync() will proceed circularly around the buffer array from there. * * In addition, we return the completed-pass count (which is effectively * the higher-order bits of nextVictimBuffer) and the count of recent buffer * allocs if non-NULL pointers are passed. The alloc count is reset after * being read. */ int StrategySyncStart(uint32 *complete_passes, uint32 *num_buf_alloc) { int result; SpinLockAcquire(&StrategyControl->buffer_strategy_lock); result = StrategyControl->nextVictimBuffer; if (complete_passes) *complete_passes = StrategyControl->completePasses; if (num_buf_alloc) { *num_buf_alloc = StrategyControl->numBufferAllocs; StrategyControl->numBufferAllocs = 0; } SpinLockRelease(&StrategyControl->buffer_strategy_lock); return result; }
/* * UnSetDistributedTransactionId simply acquires the mutex and resets the backend's * distributed transaction data in shared memory to the initial values. */ void UnSetDistributedTransactionId(void) { /* backend does not exist if the extension is not created */ if (MyBackendData) { SpinLockAcquire(&MyBackendData->mutex); MyBackendData->databaseId = 0; MyBackendData->transactionId.initiatorNodeIdentifier = 0; MyBackendData->transactionId.transactionOriginator = false; MyBackendData->transactionId.transactionNumber = 0; MyBackendData->transactionId.timestamp = 0; SpinLockRelease(&MyBackendData->mutex); } }
/* * Mark us as STOPPED in shared memory at exit. */ static void WalRcvDie(int code, Datum arg) { /* use volatile pointer to prevent code rearrangement */ volatile WalRcvData *walrcv = WalRcv; SpinLockAcquire(&walrcv->mutex); Assert(walrcv->walRcvState == WALRCV_RUNNING || walrcv->walRcvState == WALRCV_STOPPING); walrcv->walRcvState = WALRCV_STOPPED; walrcv->pid = 0; SpinLockRelease(&walrcv->mutex); /* Terminate the connection gracefully. */ if (walrcv_disconnect != NULL) walrcv_disconnect(); }
/* * allocate some new elements and link them into the free list */ static bool element_alloc(HTAB *hashp, int nelem) { /* use volatile pointer to prevent code rearrangement */ volatile HASHHDR *hctlv = hashp->hctl; Size elementSize; HASHELEMENT *firstElement; HASHELEMENT *tmpElement; HASHELEMENT *prevElement; int i; if (hashp->isfixed) return false; /* Each element has a HASHELEMENT header plus user data. */ elementSize = MAXALIGN(sizeof(HASHELEMENT)) + MAXALIGN(hctlv->entrysize); CurrentDynaHashCxt = hashp->hcxt; firstElement = (HASHELEMENT *) hashp->alloc(nelem * elementSize); if (!firstElement) return false; /* prepare to link all the new entries into the freelist */ prevElement = NULL; tmpElement = firstElement; for (i = 0; i < nelem; i++) { tmpElement->link = prevElement; prevElement = tmpElement; tmpElement = (HASHELEMENT *) (((char *) tmpElement) + elementSize); } /* if partitioned, must lock to touch freeList */ if (IS_PARTITIONED(hctlv)) SpinLockAcquire(&hctlv->mutex); /* freelist could be nonempty if two backends did this concurrently */ firstElement->link = hctlv->freeList; hctlv->freeList = prevElement; if (IS_PARTITIONED(hctlv)) SpinLockRelease(&hctlv->mutex); return true; }
/* * Find a previously created slot and mark it as used by this backend. */ void ReplicationSlotAcquire(const char *name) { ReplicationSlot *slot = NULL; int i; int active_pid = 0; Assert(MyReplicationSlot == NULL); ReplicationSlotValidateName(name, ERROR); /* Search for the named slot and mark it active if we find it. */ LWLockAcquire(ReplicationSlotControlLock, LW_SHARED); for (i = 0; i < max_replication_slots; i++) { ReplicationSlot *s = &ReplicationSlotCtl->replication_slots[i]; if (s->in_use && strcmp(name, NameStr(s->data.name)) == 0) { volatile ReplicationSlot *vslot = s; SpinLockAcquire(&s->mutex); active_pid = vslot->active_pid; if (active_pid == 0) vslot->active_pid = MyProcPid; SpinLockRelease(&s->mutex); slot = s; break; } } LWLockRelease(ReplicationSlotControlLock); /* If we did not find the slot or it was already active, error out. */ if (slot == NULL) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_OBJECT), errmsg("replication slot \"%s\" does not exist", name))); if (active_pid != 0) ereport(ERROR, (errcode(ERRCODE_OBJECT_IN_USE), errmsg("replication slot \"%s\" is already active for pid %d", name, active_pid))); /* We made this slot active, so it's ours now. */ MyReplicationSlot = slot; }
/* * Set sender's latch, unless queue is detached. */ static shm_mq_result shm_mq_notify_receiver(volatile shm_mq *mq) { PGPROC *receiver; bool detached; SpinLockAcquire(&mq->mq_mutex); detached = mq->mq_detached; receiver = mq->mq_receiver; SpinLockRelease(&mq->mq_mutex); if (detached) return SHM_MQ_DETACHED; if (receiver) SetLatch(&receiver->procLatch); return SHM_MQ_SUCCESS; }
/* * Flush the log to disk. * * If we're in the midst of dying, it's unwise to do anything that might throw * an error, so we skip sending a reply in that case. */ static void XLogWalRcvFlush(bool dying) { if (XLByteLT(LogstreamResult.Flush, LogstreamResult.Write)) { /* use volatile pointer to prevent code rearrangement */ volatile WalRcvData *walrcv = WalRcv; issue_xlog_fsync(recvFile, recvId, recvSeg); LogstreamResult.Flush = LogstreamResult.Write; /* Update shared-memory status */ SpinLockAcquire(&walrcv->mutex); if (XLByteLT(walrcv->receivedUpto, LogstreamResult.Flush)) { walrcv->latestChunkStart = walrcv->receivedUpto; walrcv->receivedUpto = LogstreamResult.Flush; } SpinLockRelease(&walrcv->mutex); /* Signal the startup process and walsender that new WAL has arrived */ WakeupRecovery(); if (AllowCascadeReplication()) WalSndWakeup(); /* Report XLOG streaming progress in PS display */ if (update_process_title) { char activitymsg[50]; snprintf(activitymsg, sizeof(activitymsg), "streaming %X/%X", LogstreamResult.Write.xlogid, LogstreamResult.Write.xrecoff); set_ps_display(activitymsg, false); } /* Also let the master know that we made some progress */ if (!dying) { XLogWalRcvSendReply(); XLogWalRcvSendHSFeedback(); } } }
/* * Request walsenders to reload the currently-open WAL file */ void WalSndRqstFileReload(void) { int i; for (i = 0; i < max_wal_senders; i++) { /* use volatile pointer to prevent code rearrangement */ volatile WalSnd *walsnd = &WalSndCtl->walsnds[i]; if (walsnd->pid == 0) continue; SpinLockAcquire(&walsnd->mutex); walsnd->needreload = true; SpinLockRelease(&walsnd->mutex); } }
/* * FirstCallSinceLastCheckpoint allows a process to take an action once * per checkpoint cycle by asynchronously checking for checkpoint completion. */ bool FirstCallSinceLastCheckpoint(void) { static int ckpt_done = 0; int new_done; bool FirstCall = false; SpinLockAcquire(&CheckpointerShmem->ckpt_lck); new_done = CheckpointerShmem->ckpt_done; SpinLockRelease(&CheckpointerShmem->ckpt_lck); if (new_done != ckpt_done) FirstCall = true; ckpt_done = new_done; return FirstCall; }
/* * Request postmaster to start walreceiver. * * recptr indicates the position where streaming should begin, and conninfo * is a libpq connection string to use. */ void RequestXLogStreaming(XLogRecPtr recptr, const char *conninfo) { /* use volatile pointer to prevent code rearrangement */ volatile WalRcvData *walrcv = WalRcv; pg_time_t now = (pg_time_t) time(NULL); /* * We always start at the beginning of the segment. That prevents a broken * segment (i.e., with no records in the first half of a segment) from * being created by XLOG streaming, which might cause trouble later on if * the segment is e.g archived. */ if (recptr.xrecoff % XLogSegSize != 0) recptr.xrecoff -= recptr.xrecoff % XLogSegSize; SpinLockAcquire(&walrcv->mutex); /* It better be stopped before we try to restart it */ Assert(walrcv->walRcvState == WALRCV_STOPPED); if (conninfo != NULL) strlcpy((char *) walrcv->conninfo, conninfo, MAXCONNINFO); else walrcv->conninfo[0] = '\0'; walrcv->walRcvState = WALRCV_STARTING; walrcv->startTime = now; /* * If this is the first startup of walreceiver, we initialize receivedUpto * and latestChunkStart to receiveStart. */ if (walrcv->receiveStart.xlogid == 0 && walrcv->receiveStart.xrecoff == 0) { walrcv->receivedUpto = recptr; walrcv->latestChunkStart = recptr; } walrcv->receiveStart = recptr; SpinLockRelease(&walrcv->mutex); SendPostmasterSignal(PMSIGNAL_START_WALRECEIVER); }
/* * Return the number of bytes that can still be allocated. */ extern Size shm_toc_freespace(shm_toc *toc) { volatile shm_toc *vtoc = toc; Size total_bytes; Size allocated_bytes; Size nentry; Size toc_bytes; SpinLockAcquire(&toc->toc_mutex); total_bytes = vtoc->toc_total_bytes; allocated_bytes = vtoc->toc_allocated_bytes; nentry = vtoc->toc_nentry; SpinLockRelease(&toc->toc_mutex); toc_bytes = offsetof(shm_toc, toc_entry) +nentry * sizeof(shm_toc_entry); Assert(allocated_bytes + BUFFERALIGN(toc_bytes) <= total_bytes); return total_bytes - (allocated_bytes + BUFFERALIGN(toc_bytes)); }
/* * ShmemAlloc -- allocate max-aligned chunk from shared memory * * Assumes ShmemLock and ShmemSegHdr are initialized. * * Returns: real pointer to memory or NULL if we are out * of space. Has to return a real pointer in order * to be compatible with malloc(). */ void * ShmemAlloc(Size size) { Size newStart; Size newFree; void *newSpace; /* use volatile pointer to prevent code rearrangement */ volatile PGShmemHeader *shmemseghdr = ShmemSegHdr; /* * ensure all space is adequately aligned. */ size = MAXALIGN(size); Assert(shmemseghdr != NULL); SpinLockAcquire(ShmemLock); newStart = shmemseghdr->freeoffset; /* extra alignment for large requests, since they are probably buffers */ if (size >= BLCKSZ) newStart = BUFFERALIGN(newStart); newFree = newStart + size; if (newFree <= shmemseghdr->totalsize) { newSpace = (void *) ((char *) ShmemBase + newStart); shmemseghdr->freeoffset = newFree; } else newSpace = NULL; SpinLockRelease(ShmemLock); if (!newSpace) ereport(WARNING, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of shared memory"))); return newSpace; }
/* * StrategyFreeBuffer: put a buffer on the freelist */ void StrategyFreeBuffer(volatile BufferDesc *buf) { SpinLockAcquire(&StrategyControl->buffer_strategy_lock); /* * It is possible that we are told to put something in the freelist that * is already in it; don't screw up the list if so. */ if (buf->freeNext == FREENEXT_NOT_IN_LIST) { buf->freeNext = StrategyControl->firstFreeBuffer; if (buf->freeNext < 0) StrategyControl->lastFreeBuffer = buf->buf_id; StrategyControl->firstFreeBuffer = buf->buf_id; } SpinLockRelease(&StrategyControl->buffer_strategy_lock); }
/* * GetBackendDataForProc writes the backend data for the given process to * result. If the process is part of a lock group (parallel query) it * returns the leader data instead. */ void GetBackendDataForProc(PGPROC *proc, BackendData *result) { BackendData *backendData = NULL; int pgprocno = proc->pgprocno; if (proc->lockGroupLeader != NULL) { pgprocno = proc->lockGroupLeader->pgprocno; } backendData = &backendManagementShmemData->backends[pgprocno]; SpinLockAcquire(&backendData->mutex); memcpy(result, backendData, sizeof(BackendData)); SpinLockRelease(&backendData->mutex); }
/* * Link an entry back in the cache freelist * * The entry must be already marked as free by the caller. */ void Cache_AddToFreelist(Cache *cache, CacheEntry *entry) { Assert(NULL != cache); Assert(NULL != entry); CACHE_ASSERT_WIPED(entry); Assert(entry->state == CACHE_ENTRY_FREE); CacheHdr *cacheHdr = cache->cacheHdr; /* Must lock to touch freeList */ SpinLockAcquire(&cacheHdr->spinlock); entry->nextEntry = cacheHdr->freeList; cacheHdr->freeList = entry; Cache_UpdatePerfCounter(&cacheHdr->cacheStats.noFreeEntries, 1 /* delta */); SpinLockRelease(&cacheHdr->spinlock); }
/* * ShmemAllocNoError -- allocate max-aligned chunk from shared memory * * As ShmemAlloc, but returns NULL if out of space, rather than erroring. */ void * ShmemAllocNoError(Size size) { Size newStart; Size newFree; void *newSpace; /* * Ensure all space is adequately aligned. We used to only MAXALIGN this * space but experience has proved that on modern systems that is not good * enough. Many parts of the system are very sensitive to critical data * structures getting split across cache line boundaries. To avoid that, * attempt to align the beginning of the allocation to a cache line * boundary. The calling code will still need to be careful about how it * uses the allocated space - e.g. by padding each element in an array of * structures out to a power-of-two size - but without this, even that * won't be sufficient. */ size = CACHELINEALIGN(size); Assert(ShmemSegHdr != NULL); SpinLockAcquire(ShmemLock); newStart = ShmemSegHdr->freeoffset; newFree = newStart + size; if (newFree <= ShmemSegHdr->totalsize) { newSpace = (void *) ((char *) ShmemBase + newStart); ShmemSegHdr->freeoffset = newFree; } else newSpace = NULL; SpinLockRelease(ShmemLock); /* note this assert is okay with newSpace == NULL */ Assert(newSpace == (void *) CACHELINEALIGN(newSpace)); return newSpace; }