void LogicalTxnLog::rollbackTxn(SharedLogicalTxn pTxn) { if (!pTxn->checkpointed) { // we never stored a checkpoint record for this txn, so during recovery // it can be ignored entirely StrictMutexGuard mutexGuard(mutex); removeTxn(pTxn); return; } // otherwise, write an EVENT_ROLLBACK so that the txn's fate is known // during recovery (eliminating the need for multiple passes over the log) LogicalTxnEventMemento memento; memento.event = LogicalTxnEventMemento::EVENT_ROLLBACK; memento.txnId = pTxn->txnId; memento.cbActionLast = 0; memento.nParticipants = 0; CompoundId::setPageId(memento.logPosition.segByteId, NULL_PAGE_ID); CompoundId::setByteOffset(memento.logPosition.segByteId, 0); memento.logPosition.cbOffset = 0; memento.longLog = true; StrictMutexGuard mutexGuard(mutex); pOutputStream->writeValue(memento); // no need for group commit since caller doesn't need to wait for // commit confirmation removeTxn(pTxn); }
void ParallelExecStreamScheduler::executeManager() { // TODO jvs 16-Aug-2008: RAII try { tryExecuteManager(); } catch (...) { StrictMutexGuard mutexGuard(mutex); mgrState = MGR_STOPPED; sentinelCondition.notify_all(); throw; } StrictMutexGuard mutexGuard(mutex); mgrState = MGR_STOPPED; sentinelCondition.notify_all(); }
void ParallelExecStreamScheduler::stop() { FENNEL_TRACE(TRACE_FINE, "stop"); StrictMutexGuard mutexGuard(mutex); if (mgrState != MGR_STOPPED) { mgrState = MGR_STOPPING; condition.notify_one(); while (mgrState != MGR_STOPPED) { sentinelCondition.wait(mutexGuard); } } mutexGuard.unlock(); threadPool.stop(); // NOTE jvs 10-Aug-2008: This is how we keep the cloned excn // from becoming a memory leak. It assumes that the caller // doesn't invoke pScheduler->stop() until *after* the exception // has been completely handled and is no longer referenced. pPendingExcn.reset(); completedQueue.clear(); inhibitedQueue.clear(); }
void ExternalSortExecStreamImpl::unreserveRunLoader( ExternalSortRunLoader &runLoader) { StrictMutexGuard mutexGuard(runLoaderMutex); runLoader.runningParallelTask = false; runLoaderAvailable.notify_all(); }
bool SimpleExecStreamGovernor::setResourceAvailability( ExecStreamResourceQuantity const &available, ExecStreamResourceType resourceType) { StrictMutexGuard mutexGuard(mutex); switch (resourceType) { case EXEC_RESOURCE_CACHE_PAGES: { uint pagesAvailable = available.nCachePages * (100 - knobSettings.cacheReservePercentage) / 100; if (pagesAvailable < resourcesAssigned.nCachePages) { return false; } resourcesAvailable.nCachePages = (pagesAvailable - resourcesAssigned.nCachePages); perGraphAllocation = computePerGraphAllocation(); FENNEL_TRACE( TRACE_FINE, resourcesAvailable.nCachePages << " cache pages now available for assignment. " << "Per graph allocation is now " << perGraphAllocation << " cache pages."); break; } case EXEC_RESOURCE_THREADS: resourcesAvailable.nThreads = available.nThreads; break; } return true; }
void VersionedSegment::delegatedCheckpoint( Segment &delegatingSegment,CheckpointType checkpointType) { if (checkpointType != CHECKPOINT_DISCARD) { // TODO: for a fuzzy checkpoint, only need to force the log pages for // data pages that are going to be flushed logSegment->checkpoint(checkpointType); assert(pWALSegment->getMinDirtyPageId() == NULL_PAGE_ID); } if (checkpointType == CHECKPOINT_FLUSH_FUZZY) { MappedPageListenerPredicate pagePredicate(delegatingSegment); fuzzyCheckpointSet.setDelegatePagePredicate(pagePredicate); pCache->checkpointPages(fuzzyCheckpointSet, checkpointType); fuzzyCheckpointSet.finishCheckpoint(); if (lastCheckpointLogPageId != NULL_PAGE_ID) { oldestLogPageId = logSegment->getPageSuccessor( lastCheckpointLogPageId); } else { oldestLogPageId = NULL_PAGE_ID; } } else { DelegatingSegment::delegatedCheckpoint( delegatingSegment, checkpointType); fuzzyCheckpointSet.clear(); oldestLogPageId = NULL_PAGE_ID; } if (checkpointType == CHECKPOINT_DISCARD) { logSegment->checkpoint(checkpointType); } StrictMutexGuard mutexGuard(mutex); ++versionNumber; dataToLogMap.clear(); }
void SourceObject::HandleAboutToFinish () { qDebug () << Q_FUNC_INFO; auto timeoutIndicator = std::make_shared<std::atomic_bool> (false); NextSrcMutex_.lock (); if (NextSource_.IsEmpty ()) { emit aboutToFinish (timeoutIndicator); NextSrcWC_.wait (&NextSrcMutex_, 500); } qDebug () << "wait finished; next source:" << NextSource_.ToUrl () << "; current source:" << CurrentSource_.ToUrl (); std::shared_ptr<void> mutexGuard (nullptr, [this] (void*) { NextSrcMutex_.unlock (); }); if (NextSource_.IsEmpty ()) { *timeoutIndicator = true; qDebug () << Q_FUNC_INFO << "no next source set, will stop playing"; return; } SetCurrentSource (NextSource_); }
void ExternalSortExecStreamImpl::deleteStoredRunInfo(uint iFirstRun, uint nRuns) { StrictMutexGuard mutexGuard(storedRunMutex); storedRuns.erase( storedRuns.begin() + iFirstRun, storedRuns.begin() + iFirstRun + nRuns); }
void SnapshotRandomAllocationSegment::deallocatePageRange( PageId startPageId, PageId endPageId) { assert(!readOnlyCommittedData); permAssert(startPageId != NULL_PAGE_ID); permAssert(startPageId == endPageId); SXMutexExclusiveGuard mapGuard(modPageMapMutex); StrictMutexGuard mutexGuard(snapshotPageMapMutex); // Mark the pages in the page chain as deallocation-deferred. The actual // deallocation of these pages will be done by an ALTER SYSTEM DEALLOCATE // OLD. // Note that we cannot discard snapshot pages from cache because they // really haven't been freed yet and still may be referenced by other // threads. The pages will be removed from the cache when they are // actually freed. PageId chainPageId = startPageId; VersionedPageEntry pageEntry; do { pVersionedRandomSegment->getLatestPageEntryCopy(chainPageId, pageEntry); DelegatingSegment::deallocatePageRange(chainPageId, chainPageId); incrPageUpdateCount( chainPageId, ANON_PAGE_OWNER_ID, ModifiedPageEntry::DEALLOCATED); snapshotPageMap.erase(chainPageId); chainPageId = pageEntry.versionChainPageId; } while (chainPageId != startPageId); }
void WALSegment::notifyAfterPageFlush(CachePage &page) { DelegatingSegment::notifyAfterPageFlush(page); PageId logPageId = translateBlockId(page.getBlockId()); StrictMutexGuard mutexGuard(mutex); dirtyPageSet.erase(logPageId); }
void LogicalTxnLog::checkpoint( LogicalTxnLogCheckpointMemento &memento, CheckpointType checkpointType) { StrictMutexGuard mutexGuard(mutex); if (checkpointType == CHECKPOINT_DISCARD) { uncommittedTxns.clear(); committedLongLogSegments.clear(); return; } pOutputStream->getSegPos(memento.logPosition); memento.nUncommittedTxns = uncommittedTxns.size(); memento.nextTxnId = nextTxnId; std::for_each( uncommittedTxns.begin(), uncommittedTxns.end(), boost::bind(&LogicalTxnLog::checkpointTxn,this,_1)); pOutputStream->hardPageBreak(); logSegmentAccessor.pSegment->checkpoint(checkpointType); if (checkpointType == CHECKPOINT_FLUSH_FUZZY) { // memento gets lastCheckpointMemento, and lastCheckpointMemento gets // new memento just created above std::swap(memento, lastCheckpointMemento); } }
void ParallelExecStreamScheduler::tryExecuteManager() { FENNEL_TRACE(TRACE_FINE, "manager task starting"); for (;;) { StrictMutexGuard mutexGuard(mutex); while (completedQueue.empty() && (mgrState == MGR_RUNNING) && !pPendingExcn) { condition.wait(mutexGuard); } if (pPendingExcn) { return; } if (mgrState != MGR_RUNNING) { return; } while (!completedQueue.empty()) { ParallelExecResult result = completedQueue.front(); completedQueue.pop_front(); // don't hold lock while doing expensive state maintenance mutexGuard.unlock(); processCompletedTask(result); if (pPendingExcn) { return; } mutexGuard.lock(); } } }
void WALSegment::notifyPageDirty(CachePage &page,bool bDataValid) { DelegatingSegment::notifyPageDirty(page, bDataValid); PageId logPageId = translateBlockId( page.getBlockId()); StrictMutexGuard mutexGuard(mutex); dirtyPageSet.insert(dirtyPageSet.end(), logPageId); }
void ParallelExecStreamScheduler::signalSentinel(ExecStreamId sentinelId) { alterNeighborInhibition(sentinelId, + 1); StrictMutexGuard mutexGuard(mutex); streamStateMap[sentinelId].state = SS_RUNNING; sentinelCondition.notify_all(); }
void DynamicParamManager::deleteParam(DynamicParamId dynamicParamId) { StrictMutexGuard mutexGuard(mutex); assert(paramMap.find(dynamicParamId) != paramMap.end()); paramMap.erase(dynamicParamId); assert(paramMap.find(dynamicParamId) == paramMap.end()); }
void LogicalTxnLog::commitTxn(SharedLogicalTxn pTxn) { LogicalTxnEventMemento memento; memento.event = LogicalTxnEventMemento::EVENT_COMMIT; memento.txnId = pTxn->txnId; memento.cbActionLast = pTxn->svpt.cbActionPrev; memento.nParticipants = pTxn->participants.size(); SharedSegment pSegment = pTxn->pOutputStream->getSegment(); if (pSegment) { assert(pTxn->pOutputStream.unique()); pTxn->pOutputStream->hardPageBreak(); pTxn->pOutputStream->getSegOutputStream()->getSegPos( memento.logPosition); pTxn->pOutputStream.reset(); pSegment->checkpoint(CHECKPOINT_FLUSH_AND_UNMAP); StrictMutexGuard mutexGuard(mutex); committedLongLogSegments.push_back(pSegment); } else { if (!pTxn->svpt.cbLogged) { // NOTE jvs 27-Feb-2006: "empty commit" is an important // optimization for queries in autocommit mode, where JDBC // specifies a commit whenever a cursor is closed. StrictMutexGuard mutexGuard(mutex); removeTxn(pTxn); return; } CompoundId::setPageId(memento.logPosition.segByteId, NULL_PAGE_ID); CompoundId::setByteOffset( memento.logPosition.segByteId, pTxn->svpt.cbLogged); memento.logPosition.cbOffset = pTxn->svpt.cbLogged; } memento.longLog = pSegment ? true : false; StrictMutexGuard mutexGuard(mutex); pOutputStream->writeValue(memento); if (!pSegment) { SharedByteInputStream pInputStream = pTxn->pOutputStream->getInputStream(); uint cbActual; PConstBuffer pBuffer = pInputStream->getReadPointer(1,&cbActual); pOutputStream->writeBytes(pBuffer, cbActual); } commitTxnWithGroup(mutexGuard); removeTxn(pTxn); }
PageId WALSegment::getMinDirtyPageId() const { StrictMutexGuard mutexGuard(mutex); if (dirtyPageSet.empty()) { return NULL_PAGE_ID; } PageId minDirtyPageId = *(dirtyPageSet.begin()); return minDirtyPageId; }
void DynamicParamManager::decrementCounterParam(DynamicParamId dynamicParamId) { StrictMutexGuard mutexGuard(mutex); DynamicParam ¶m = getParamInternal(dynamicParamId); assert(param.isCounter); int64_t *pCounter = reinterpret_cast<int64_t *>(param.pBuffer.get()); (*pCounter)--; }
void ParallelExecStreamScheduler::abort(ExecStreamGraph &graph) { StrictMutexGuard mutexGuard(mutex); FENNEL_TRACE(TRACE_FINE, "abort requested"); if (!pPendingExcn) { pPendingExcn.reset(new AbortExcn()); } condition.notify_one(); }
void ParallelExecStreamScheduler::tryExecuteTask(ExecStream &stream) { ExecStreamQuantum quantum; ExecStreamResult rc = executeStream(stream, quantum); ParallelExecResult result(stream.getStreamId(), rc); StrictMutexGuard mutexGuard(mutex); completedQueue.push_back(result); condition.notify_one(); }
void ParallelExecStreamScheduler::executeTask(ExecStream &stream) { try { tryExecuteTask(stream); } catch (std::exception &ex) { StrictMutexGuard mutexGuard(mutex); if (!pPendingExcn) { pPendingExcn.reset(threadTracker.cloneExcn(ex)); } condition.notify_one(); } catch (...) { // REVIEW jvs 22-Jul-2008: panic instead? StrictMutexGuard mutexGuard(mutex); if (!pPendingExcn) { pPendingExcn.reset(new FennelExcn("Unknown error")); } condition.notify_one(); } }
void DynamicParamManager::createParam( DynamicParamId dynamicParamId, const TupleAttributeDescriptor &attrDesc, bool failIfExists) { StrictMutexGuard mutexGuard(mutex); SharedDynamicParam param(new DynamicParam(attrDesc)); createParam(dynamicParamId, param, failIfExists); }
void VersionedSegment::prepareOnlineRecovery() { // For simplicity, force entire log out to disk first, but don't discard // it, since we're about to read it during recovery. logSegment->checkpoint(CHECKPOINT_FLUSH_ALL); StrictMutexGuard mutexGuard(mutex); dataToLogMap.clear(); oldestLogPageId = NULL_PAGE_ID; }
void DynamicParamManager::writeParam( DynamicParamId dynamicParamId, const TupleDatum &src) { StrictMutexGuard mutexGuard(mutex); DynamicParam ¶m = getParamInternal(dynamicParamId); if (src.pData) { assert(src.cbData <= param.getDesc().cbStorage); } param.datum.pData = param.pBuffer.get(); param.datum.memCopyFrom(src); }
void ExternalSortExecStreamImpl::storeRun(ExternalSortSubStream &subStream) { FENNEL_TRACE( TRACE_FINE, "storing run " << storedRuns.size()); boost::scoped_ptr<ExternalSortRunAccessor> pRunAccessor; pRunAccessor.reset(new ExternalSortRunAccessor(sortInfo)); pRunAccessor->storeRun(subStream); StrictMutexGuard mutexGuard(storedRunMutex); storedRuns.push_back(pRunAccessor->getStoredRun()); }
ExternalSortRunLoader &ExternalSortExecStreamImpl::reserveRunLoader() { StrictMutexGuard mutexGuard(runLoaderMutex); for (;;) { for (uint i = 0; i < nParallel; ++i) { ExternalSortRunLoader &runLoader = *(runLoaders[i]); if (!runLoader.runningParallelTask) { runLoader.runningParallelTask = true; return runLoader; } } runLoaderAvailable.wait(mutexGuard); } }
PageId SnapshotRandomAllocationSegment::getSnapshotId(PageId pageId) { StrictMutexGuard mutexGuard(snapshotPageMapMutex); // If possible, use the mapping we've previously cached PageMapConstIter pSnapshotPageId = snapshotPageMap.find(pageId); if (pSnapshotPageId != snapshotPageMap.end()) { return pSnapshotPageId->second; } VersionedPageEntry pageEntry; pVersionedRandomSegment->getLatestPageEntryCopy(pageId, pageEntry); // Handle the special case where there's no chain if (pageEntry.versionChainPageId == pageId) { assert(snapshotCsn >= pageEntry.allocationCsn); snapshotPageMap[pageId] = pageId; return pageId; } // If we have to walk through the page chain, then we need to be starting // from the anchor. Note that there's no need to acquire the deallocation // mutex while walking through the page chain looking for the appropriate // snapshot page because we always start at the anchor and walk from // newer pages to older pages. Therefore, we should never try reading // the pageEntry for an older page that's going to be deallocated. assert(pageId == getAnchorPageId(pageId)); PageId chainPageId = pageEntry.versionChainPageId; do { pVersionedRandomSegment->getLatestPageEntryCopy(chainPageId, pageEntry); if (snapshotCsn >= pageEntry.allocationCsn) { // only consider uncommitted pageEntry's if they correspond to // the current txn if ((!readOnlyCommittedData && pageEntry.ownerId == UNCOMMITTED_PAGE_OWNER_ID && snapshotCsn == pageEntry.allocationCsn) || pageEntry.ownerId != UNCOMMITTED_PAGE_OWNER_ID) { snapshotPageMap[pageId] = chainPageId; return chainPageId; } } // permAssert to prevent an infinite loop permAssert(chainPageId != pageId); chainPageId = pageEntry.versionChainPageId; } while (true); }
bool SimpleExecStreamGovernor::setResourceKnob( ExecStreamResourceKnobs const &knob, ExecStreamResourceKnobType knobType) { StrictMutexGuard mutexGuard(mutex); switch (knobType) { case EXEC_KNOB_EXPECTED_CONCURRENT_STATEMENTS: knobSettings.expectedConcurrentStatements = knob.expectedConcurrentStatements; perGraphAllocation = computePerGraphAllocation(); FENNEL_TRACE( TRACE_FINE, "Expected concurrent statements set to " << knobSettings.expectedConcurrentStatements << ". Per graph allocation is now " << perGraphAllocation << " cache pages."); break; case EXEC_KNOB_CACHE_RESERVE_PERCENTAGE: // make sure we have enough unassigned pages to set aside the new // reserve amount double percent = (100 - knobSettings.cacheReservePercentage) / 100.0; uint totalPagesAvailable = (uint) ((resourcesAvailable.nCachePages + resourcesAssigned.nCachePages) / percent); uint numReserve = totalPagesAvailable * knob.cacheReservePercentage / 100; if (totalPagesAvailable - numReserve < resourcesAssigned.nCachePages) { return false; } knobSettings.cacheReservePercentage = knob.cacheReservePercentage; resourcesAvailable.nCachePages = totalPagesAvailable - numReserve - resourcesAssigned.nCachePages; perGraphAllocation = computePerGraphAllocation(); FENNEL_TRACE( TRACE_FINE, "Cache reserve percentage set to " << knobSettings.cacheReservePercentage << ". Per graph allocation is now " << perGraphAllocation << " cache pages."); break; } return true; }
ExecStreamBufAccessor &ParallelExecStreamScheduler::readStream( ExecStream &stream) { FENNEL_TRACE( TRACE_FINE, "entering readStream " << stream.getName()); ExecStreamId current = stream.getStreamId(); ExecStreamGraphImpl &graphImpl = dynamic_cast<ExecStreamGraphImpl&>(*pGraph); ExecStreamGraphImpl::GraphRep const &graphRep = graphImpl.getGraphRep(); // assert that we're reading from a designated output stream assert(boost::out_degree(current, graphRep) == 1); ExecStreamGraphImpl::Edge edge = *(boost::out_edges(current,graphRep).first); ExecStreamBufAccessor &bufAccessor = graphImpl.getBufAccessorFromEdge(edge); current = boost::target(edge, graphRep); assert(!graphImpl.getStreamFromVertex(current)); if (bufAccessor.getState() == EXECBUF_EMPTY) { bufAccessor.requestProduction(); } else if (bufAccessor.getState() != EXECBUF_UNDERFLOW) { // data or EOS already available return bufAccessor; } // please sir, I'd like some more ParallelExecResult result(current, EXECRC_BUF_UNDERFLOW); StrictMutexGuard mutexGuard(mutex); streamStateMap[current].state = SS_SLEEPING; completedQueue.push_back(result); condition.notify_one(); while ((streamStateMap[current].state == SS_SLEEPING) && !pPendingExcn) { sentinelCondition.wait(mutexGuard); } if (pPendingExcn) { pPendingExcn->throwSelf(); } return bufAccessor; }
SharedLogicalTxn LogicalTxnLog::newLogicalTxn( SharedCacheAccessor pCacheAccessor) { StrictMutexGuard mutexGuard(mutex); // Set up cache accessor so that all page locks will be taken out // with the new TxnId. Just for sanity-checking, set up a quota to make // sure logging never locks more than two pages at a time. pCacheAccessor = SharedCacheAccessor( new QuotaCacheAccessor( SharedQuotaCacheAccessor(), pCacheAccessor, 2)); pCacheAccessor->setTxnId(nextTxnId); SharedLogicalTxn pTxn( new LogicalTxn(nextTxnId, shared_from_this(), pCacheAccessor)); uncommittedTxns.push_back(pTxn); ++nextTxnId; return pTxn; }