예제 #1
0
StatusWith<RecordId> WiredTigerRecordStore::updateRecord(OperationContext* txn,
                                                         const RecordId& loc,
                                                         const char* data,
                                                         int len,
                                                         bool enforceQuota,
                                                         UpdateNotifier* notifier) {
    WiredTigerCursor curwrap(_uri, _tableId, true, txn);
    curwrap.assertInActiveTxn();
    WT_CURSOR* c = curwrap.get();
    invariant(c);
    c->set_key(c, _makeKey(loc));
    int ret = WT_OP_CHECK(c->search(c));
    invariantWTOK(ret);

    WT_ITEM old_value;
    ret = c->get_value(c, &old_value);
    invariantWTOK(ret);

    int old_length = old_value.size;

    c->set_key(c, _makeKey(loc));
    WiredTigerItem value(data, len);
    c->set_value(c, value.Get());
    ret = WT_OP_CHECK(c->insert(c));
    invariantWTOK(ret);

    _increaseDataSize(txn, len - old_length);

    cappedDeleteAsNeeded(txn, loc);

    return StatusWith<RecordId>(loc);
}
예제 #2
0
boost::optional<RecordId> WiredTigerRecordStore::oplogStartHack(
    OperationContext* txn, const RecordId& startingPosition) const {
    if (!_useOplogHack)
        return boost::none;

    {
        WiredTigerRecoveryUnit* wru = WiredTigerRecoveryUnit::get(txn);
        _oplogSetStartHack(wru);
    }

    WiredTigerCursor cursor(_uri, _tableId, true, txn);
    WT_CURSOR* c = cursor.get();

    int cmp;
    c->set_key(c, _makeKey(startingPosition));
    int ret = WT_OP_CHECK(c->search_near(c, &cmp));
    if (ret == 0 && cmp > 0)
        ret = c->prev(c);  // landed one higher than startingPosition
    if (ret == WT_NOTFOUND)
        return RecordId();  // nothing <= startingPosition
    invariantWTOK(ret);

    int64_t key;
    ret = c->get_key(c, &key);
    invariantWTOK(ret);
    return _fromKey(key);
}
예제 #3
0
    boost::optional<Record> next() final {
        if (_eof)
            return {};

        WT_CURSOR* c = _cursor->get();

        bool mustAdvance = true;
        if (_lastReturnedId.isNull() && !_forward && _rs._isCapped) {
            // In this case we need to seek to the highest visible record.
            const RecordId reverseCappedInitialSeekPoint =
                _readUntilForOplog.isNull() ? _rs.lowestCappedHiddenRecord() : _readUntilForOplog;

            if (!reverseCappedInitialSeekPoint.isNull()) {
                c->set_key(c, _makeKey(reverseCappedInitialSeekPoint));
                int cmp;
                int seekRet = WT_OP_CHECK(c->search_near(c, &cmp));
                if (seekRet == WT_NOTFOUND) {
                    _eof = true;
                    return {};
                }
                invariantWTOK(seekRet);

                // If we landed at or past the lowest hidden record, we must advance to be in
                // the visible range.
                mustAdvance = _rs.isCappedHidden(reverseCappedInitialSeekPoint)
                    ? (cmp >= 0)
                    : (cmp > 0);  // No longer hidden.
            }
        }

        if (mustAdvance) {
            // Nothing after the next line can throw WCEs.
            // Note that an unpositioned (or eof) WT_CURSOR returns the first/last entry in the
            // table when you call next/prev.
            int advanceRet = WT_OP_CHECK(_forward ? c->next(c) : c->prev(c));
            if (advanceRet == WT_NOTFOUND) {
                _eof = true;
                return {};
            }
            invariantWTOK(advanceRet);
        }

        int64_t key;
        invariantWTOK(c->get_key(c, &key));
        const RecordId id = _fromKey(key);

        if (!isVisible(id)) {
            _eof = true;
            return {};
        }

        WT_ITEM value;
        invariantWTOK(c->get_value(c, &value));

        _lastReturnedId = id;
        return {{id, {static_cast<const char*>(value.data), static_cast<int>(value.size)}}};
    }
예제 #4
0
void WiredTigerRecoveryUnit::_txnClose(bool commit) {
    invariant(_isActive(), toString(_state));
    WT_SESSION* s = _session->getSession();
    if (_timer) {
        const int transactionTime = _timer->millis();
        // `serverGlobalParams.slowMs` can be set to values <= 0. In those cases, give logging a
        // break.
        if (transactionTime >= std::max(1, serverGlobalParams.slowMS)) {
            LOG(kSlowTransactionSeverity) << "Slow WT transaction. Lifetime of SnapshotId "
                                          << _mySnapshotId << " was " << transactionTime << "ms";
        }
    }

    int wtRet;
    if (commit) {
        if (!_commitTimestamp.isNull()) {
            const std::string conf = "commit_timestamp=" + integerToHex(_commitTimestamp.asULL());
            invariantWTOK(s->timestamp_transaction(s, conf.c_str()));
            _isTimestamped = true;
        }

        wtRet = s->commit_transaction(s, nullptr);
        LOG(3) << "WT commit_transaction for snapshot id " << _mySnapshotId;
    } else {
        wtRet = s->rollback_transaction(s, nullptr);
        invariant(!wtRet);
        LOG(3) << "WT rollback_transaction for snapshot id " << _mySnapshotId;
    }

    if (_isTimestamped) {
        if (!_orderedCommit) {
            // We only need to update oplog visibility where commits can be out-of-order with
            // respect to their assigned optime and such commits might otherwise be visible.
            // This should happen only on primary nodes.
            _oplogManager->triggerJournalFlush();
        }
        _isTimestamped = false;
    }
    invariantWTOK(wtRet);

    invariant(!_lastTimestampSet || _commitTimestamp.isNull(),
              str::stream() << "Cannot have both a _lastTimestampSet and a "
                               "_commitTimestamp. _lastTimestampSet: "
                            << _lastTimestampSet->toString()
                            << ". _commitTimestamp: "
                            << _commitTimestamp.toString());

    // We reset the _lastTimestampSet between transactions. Since it is legal for one
    // transaction on a RecoveryUnit to call setTimestamp() and another to call
    // setCommitTimestamp().
    _lastTimestampSet = boost::none;

    _prepareTimestamp = Timestamp();
    _mySnapshotId = nextSnapshotId.fetchAndAdd(1);
    _isOplogReader = false;
    _orderedCommit = true;  // Default value is true; we assume all writes are ordered.
}
void WiredTigerSessionCache::waitUntilDurable(bool forceCheckpoint) {
    const int shuttingDown = _shuttingDown.fetchAndAdd(1);
    ON_BLOCK_EXIT([this] { _shuttingDown.fetchAndSubtract(1); });

    uassert(ErrorCodes::ShutdownInProgress,
            "Cannot wait for durability because a shutdown is in progress",
            !(shuttingDown & kShuttingDownMask));

    // When forcing a checkpoint with journaling enabled, don't synchronize with other
    // waiters, as a log flush is much cheaper than a full checkpoint.
    if (forceCheckpoint && _engine->isDurable()) {
        UniqueWiredTigerSession session = getSession();
        WT_SESSION* s = session->getSession();
        {
            stdx::unique_lock<stdx::mutex> lk(_journalListenerMutex);
            JournalListener::Token token = _journalListener->getToken();
            invariantWTOK(s->checkpoint(s, NULL));
            _journalListener->onDurable(token);
        }
        LOG(4) << "created checkpoint (forced)";
        return;
    }

    uint32_t start = _lastSyncTime.load();
    // Do the remainder in a critical section that ensures only a single thread at a time
    // will attempt to synchronize.
    stdx::unique_lock<stdx::mutex> lk(_lastSyncMutex);
    uint32_t current = _lastSyncTime.loadRelaxed();  // synchronized with writes through mutex
    if (current != start) {
        // Someone else synced already since we read lastSyncTime, so we're done!
        return;
    }
    _lastSyncTime.store(current + 1);

    // Nobody has synched yet, so we have to sync ourselves.
    auto session = getSession();
    WT_SESSION* s = session->getSession();

    // This gets the token (OpTime) from the last write, before flushing (either the journal, or a
    // checkpoint), and then reports that token (OpTime) as a durable write.
    stdx::unique_lock<stdx::mutex> jlk(_journalListenerMutex);
    JournalListener::Token token = _journalListener->getToken();

    // Use the journal when available, or a checkpoint otherwise.
    if (_engine->isDurable()) {
        invariantWTOK(s->log_flush(s, "sync=on"));
        LOG(4) << "flushed journal";
    } else {
        invariantWTOK(s->checkpoint(s, NULL));
        LOG(4) << "created checkpoint";
    }
    _journalListener->onDurable(token);
}
예제 #6
0
void WiredTigerRecoveryUnit::_txnClose(bool commit) {
    invariant(_active);
    WT_SESSION* s = _session->getSession();
    if (commit) {
        invariantWTOK(s->commit_transaction(s, NULL));
        LOG(3) << "WT commit_transaction for snapshot id " << _mySnapshotId;
    } else {
        invariantWTOK(s->rollback_transaction(s, NULL));
        LOG(3) << "WT rollback_transaction for snapshot id " << _mySnapshotId;
    }
    _active = false;
    _mySnapshotId = nextSnapshotId.fetchAndAdd(1);
}
예제 #7
0
    bool restore() final {
        if (!_cursor)
            _cursor.emplace(_rs.getURI(), _rs.tableId(), true, _txn);

        // This will ensure an active session exists, so any restored cursors will bind to it
        invariant(WiredTigerRecoveryUnit::get(_txn)->getSession(_txn) == _cursor->getSession());

        // If we've hit EOF, then this iterator is done and need not be restored.
        if (_eof)
            return true;

        if (_lastReturnedId.isNull())
            return true;

        WT_CURSOR* c = _cursor->get();
        c->set_key(c, _makeKey(_lastReturnedId));

        int cmp;
        int ret = WT_OP_CHECK(c->search_near(c, &cmp));
        if (ret == WT_NOTFOUND) {
            _eof = true;
            return !_rs._isCapped;
        }
        invariantWTOK(ret);

        if (cmp == 0)
            return true;  // Landed right where we left off.

        if (_rs._isCapped) {
            // Doc was deleted either by cappedDeleteAsNeeded() or cappedTruncateAfter().
            // It is important that we error out in this case so that consumers don't
            // silently get 'holes' when scanning capped collections. We don't make
            // this guarantee for normal collections so it is ok to skip ahead in that case.
            _eof = true;
            return false;
        }

        if (_forward && cmp > 0) {
            // We landed after where we were. Move back one so that next() will return this
            // document.
            ret = WT_OP_CHECK(c->prev(c));
        } else if (!_forward && cmp < 0) {
            // Do the opposite for reverse cursors.
            ret = WT_OP_CHECK(c->next(c));
        }
        if (ret != WT_NOTFOUND)
            invariantWTOK(ret);

        return true;
    }
예제 #8
0
void WiredTigerRecoveryUnit::_txnClose(bool commit) {
    invariant(_active);
    WT_SESSION* s = _session->getSession();
    if (commit) {
        invariantWTOK(s->commit_transaction(s, NULL));
        LOG(2) << "WT commit_transaction";
    } else {
        invariantWTOK(s->rollback_transaction(s, NULL));
        LOG(2) << "WT rollback_transaction";
    }
    _active = false;
    _myTransactionCount++;
    _ticket.reset(NULL);
}
예제 #9
0
    boost::optional<Record> next() final {
        int advanceRet = WT_OP_CHECK(_cursor->next(_cursor));
        if (advanceRet == WT_NOTFOUND)
            return {};
        invariantWTOK(advanceRet);

        int64_t key;
        invariantWTOK(_cursor->get_key(_cursor, &key));
        const RecordId id = _fromKey(key);

        WT_ITEM value;
        invariantWTOK(_cursor->get_value(_cursor, &value));

        return {{id, {static_cast<const char*>(value.data), static_cast<int>(value.size)}}};
    }
예제 #10
0
    void WiredTigerKVEngine::syncSizeInfo() const {
        if ( !_sizeStorer )
            return;

        try {
            WiredTigerSession session( _conn, -1 );
            WT_SESSION* s = session.getSession();
            invariantWTOK( s->begin_transaction( s, "sync=true" ) );
            _sizeStorer->storeInto( &session, _sizeStorerUri );
            invariantWTOK( s->commit_transaction( s, NULL ) );
        }
        catch ( const WriteConflictException& de ) {
            // ignore, it means someone else is doing it
        }
    }
예제 #11
0
 void WiredTigerRecoveryUnit::_txnClose( bool commit ) {
     invariant( _active );
     WT_SESSION *s = _session->getSession();
     if ( commit ) {
         invariantWTOK( s->commit_transaction(s, NULL) );
         LOG(2) << "WT commit_transaction";
         if ( _syncing )
             awaitCommitData.syncHappend();
     }
     else {
         invariantWTOK( s->rollback_transaction(s, NULL) );
         LOG(2) << "WT rollback_transaction";
     }
     _active = false;
 }
예제 #12
0
void WiredTigerOperationStats::fetchStats(WT_SESSION* session,
                                          const std::string& uri,
                                          const std::string& config) {
    invariant(session);

    WT_CURSOR* c = nullptr;
    const char* cursorConfig = config.empty() ? nullptr : config.c_str();
    int ret = session->open_cursor(session, uri.c_str(), nullptr, cursorConfig, &c);
    uassert(ErrorCodes::CursorNotFound, "Unable to open statistics cursor", ret == 0);

    invariant(c);
    ON_BLOCK_EXIT([&] { c->close(c); });

    const char* desc;
    uint64_t value;
    uint64_t key;
    while (c->next(c) == 0 && c->get_key(c, &key) == 0) {
        fassert(51035, c->get_value(c, &desc, nullptr, &value) == 0);
#if defined(__s390x__)
        _stats[key >> 32] = WiredTigerUtil::castStatisticsValue<long long>(value);
#else
        _stats[key] = WiredTigerUtil::castStatisticsValue<long long>(value);
#endif  // __s390x__
    }

    // Reset the statistics so that the next fetch gives the recent values.
    invariantWTOK(c->reset(c));
}
예제 #13
0
    bool WiredTigerKVEngine::_drop( const StringData& ident ) {
        string uri = _uri( ident );

        WiredTigerSession session( _conn, -1 );

        int ret = session.getSession()->drop( session.getSession(), uri.c_str(), "force" );
        LOG(1) << "WT drop of  " << uri << " res " << ret;

        if ( ret == 0 ) {
            // yay, it worked
            return true;
        }

        if ( ret == EBUSY ) {
            // this is expected, queue it up
            {
                boost::mutex::scoped_lock lk( _identToDropMutex );
                _identToDrop.insert( uri );
                _epoch++;
            }
            _sessionCache->closeAll();
            return false;
        }

        invariantWTOK( ret );
        return false;
    }
void WiredTigerSnapshotManager::shutdown() {
    stdx::lock_guard<stdx::mutex> lock(_mutex);
    if (!_session)
        return;
    invariantWTOK(_session->close(_session, NULL));
    _session = nullptr;
}
예제 #15
0
Status WiredTigerRecordStore::truncate(OperationContext* txn) {
    WiredTigerCursor startWrap(_uri, _tableId, true, txn);
    WT_CURSOR* start = startWrap.get();
    int ret = WT_OP_CHECK(start->next(start));
    // Empty collections don't have anything to truncate.
    if (ret == WT_NOTFOUND) {
        return Status::OK();
    }
    invariantWTOK(ret);

    WT_SESSION* session = WiredTigerRecoveryUnit::get(txn)->getSession(txn)->getSession();
    invariantWTOK(WT_OP_CHECK(session->truncate(session, NULL, start, NULL, NULL)));
    _changeNumRecords(txn, -numRecords(txn));
    _increaseDataSize(txn, -dataSize(txn));

    return Status::OK();
}
예제 #16
0
 void WiredTigerRecoveryUnit::_txnOpen() {
     invariant( !_active );
     WT_SESSION *s = _session->getSession();
     _syncing = _syncing || awaitCommitData.numWaitingForSync.load() > 0;
     invariantWTOK( s->begin_transaction(s, _syncing ? "sync=true" : NULL) );
     LOG(2) << "WT begin_transaction";
     _timer.reset();
     _active = true;
 }
예제 #17
0
    boost::optional<Record> seekExact(const RecordId& id) final {
        WT_CURSOR* c = _cursor->get();
        c->set_key(c, _makeKey(id));
        // Nothing after the next line can throw WCEs.
        int seekRet = WT_OP_CHECK(c->search(c));
        if (seekRet == WT_NOTFOUND) {
            _eof = true;
            return {};
        }
        invariantWTOK(seekRet);

        WT_ITEM value;
        invariantWTOK(c->get_value(c, &value));

        _lastReturnedId = id;
        _eof = false;
        return {{id, {static_cast<const char*>(value.data), static_cast<int>(value.size)}}};
    }
예제 #18
0
// Retrieve the value from a positioned cursor.
RecordData WiredTigerRecordStore::_getData(const WiredTigerCursor& cursor) const {
    WT_ITEM value;
    int ret = cursor->get_value(cursor.get(), &value);
    invariantWTOK(ret);

    SharedBuffer data = SharedBuffer::allocate(value.size);
    memcpy(data.get(), value.data, value.size);
    return RecordData(data, value.size);
}
WiredTigerSession::WiredTigerSession(WT_CONNECTION* conn, WiredTigerSessionCache* cache, int epoch)
    : _epoch(epoch),
      _cache(cache),
      _session(NULL),
      _cursorGen(0),
      _cursorsCached(0),
      _cursorsOut(0) {
    invariantWTOK(conn->open_session(conn, NULL, "isolation=snapshot", &_session));
}
예제 #20
0
    int WiredTigerKVEngine::flushAllFiles( bool sync ) {
        LOG(1) << "WiredTigerKVEngine::flushAllFiles";
        syncSizeInfo();

        WiredTigerSession session( _conn, -1 );
        WT_SESSION* s = session.getSession();
        invariantWTOK( s->checkpoint(s, NULL ) );

        return 1;
    }
void WiredTigerSession::releaseCursor(uint64_t id, WT_CURSOR* cursor) {
    invariant(_session);
    invariant(cursor);
    _cursorsOut--;

    invariantWTOK(cursor->reset(cursor));

    // Cursors are pushed to the front of the list and removed from the back
    _cursors.push_front(WiredTigerCachedCursor(id, _cursorGen++, cursor));

    // A negative value for wiredTigercursorCacheSize means to use hybrid caching.
    std::uint32_t cacheSize = abs(kWiredTigerCursorCacheSize.load());

    while (!_cursors.empty() && _cursorGen - _cursors.back()._gen > cacheSize) {
        cursor = _cursors.back()._cursor;
        _cursors.pop_back();
        invariantWTOK(cursor->close(cursor));
    }
}
void WiredTigerSnapshotManager::cleanupUnneededSnapshots() {
    stdx::lock_guard<stdx::mutex> lock(_mutex);

    if (!_committedSnapshot)
        return;

    const std::string config = str::stream() << "drop=(before=" << _committedSnapshot->asU64()
                                             << ')';
    invariantWTOK(_session->snapshot(_session, config.c_str()));
}
void WiredTigerSession::closeAllCursors() {
    invariant(_session);
    for (CursorCache::iterator i = _cursors.begin(); i != _cursors.end(); ++i) {
        WT_CURSOR* cursor = i->_cursor;
        if (cursor) {
            invariantWTOK(cursor->close(cursor));
        }
    }
    _cursors.clear();
}
예제 #24
0
    void WiredTigerRecoveryUnit::_txnOpen(OperationContext* opCtx) {
        invariant( !_active );
        _getTicket(opCtx);

        WT_SESSION *s = _session->getSession();
        _syncing = _syncing || waitUntilDurableData.numWaitingForSync.load() > 0;
        invariantWTOK( s->begin_transaction(s, _syncing ? "sync=true" : NULL) );
        LOG(2) << "WT begin_transaction";
        _timer.reset();
        _active = true;
    }
void WiredTigerSnapshotManager::beginTransactionAtTimestamp(SnapshotName pointInTime,
                                                            WT_SESSION* session) const {
    char readTSConfigString[15 /* read_timestamp= */ + (8 * 2) /* 8 hexadecimal characters */ +
                            1 /* trailing null */];
    auto size = std::snprintf(readTSConfigString,
                              sizeof(readTSConfigString),
                              "read_timestamp=%llx",
                              static_cast<unsigned long long>(pointInTime.asU64()));
    invariant(static_cast<std::size_t>(size) < sizeof(readTSConfigString));
    invariantWTOK(session->begin_transaction(session, readTSConfigString));
}
예제 #26
0
    bool restore() final {
        // We can't use the CursorCache since this cursor needs a special config string.
        WT_SESSION* session = WiredTigerRecoveryUnit::get(_txn)->getSession(_txn)->getSession();

        if (!_cursor) {
            invariantWTOK(
                session->open_cursor(session, _rs->_uri.c_str(), NULL, "next_random", &_cursor));
            invariant(_cursor);
        }
        return true;
    }
예제 #27
0
RecordData WiredTigerRecordStore::dataFor(OperationContext* txn, const RecordId& loc) const {
    // ownership passes to the shared_array created below
    WiredTigerCursor curwrap(_uri, _tableId, true, txn);
    WT_CURSOR* c = curwrap.get();
    invariant(c);
    c->set_key(c, _makeKey(loc));
    int ret = WT_OP_CHECK(c->search(c));
    massert(28556, "Didn't find RecordId in WiredTigerRecordStore", ret != WT_NOTFOUND);
    invariantWTOK(ret);
    return _getData(curwrap);
}
예제 #28
0
void WiredTigerRecordStore::deleteRecord(OperationContext* txn, const RecordId& loc) {
    WiredTigerCursor cursor(_uri, _tableId, true, txn);
    cursor.assertInActiveTxn();
    WT_CURSOR* c = cursor.get();
    c->set_key(c, _makeKey(loc));
    int ret = WT_OP_CHECK(c->search(c));
    invariantWTOK(ret);

    WT_ITEM old_value;
    ret = c->get_value(c, &old_value);
    invariantWTOK(ret);

    int old_length = old_value.size;

    ret = WT_OP_CHECK(c->remove(c));
    invariantWTOK(ret);

    _changeNumRecords(txn, -1);
    _increaseDataSize(txn, -old_length);
}
예제 #29
0
Status WiredTigerRecordStore::compact(OperationContext* txn,
                                      RecordStoreCompactAdaptor* adaptor,
                                      const CompactOptions* options,
                                      CompactStats* stats) {
    WiredTigerSessionCache* cache = WiredTigerRecoveryUnit::get(txn)->getSessionCache();
    WiredTigerSession* session = cache->getSession();
    WT_SESSION* s = session->getSession();
    int ret = s->compact(s, getURI().c_str(), "timeout=0");
    invariantWTOK(ret);
    cache->releaseSession(session);
    return Status::OK();
}
예제 #30
0
void WiredTigerRecoveryUnit::prepareUnitOfWork() {
    invariant(_inUnitOfWork(), toString(_state));
    invariant(!_prepareTimestamp.isNull());

    auto session = getSession();
    WT_SESSION* s = session->getSession();

    LOG(1) << "preparing transaction at time: " << _prepareTimestamp;

    const std::string conf = "prepare_timestamp=" + integerToHex(_prepareTimestamp.asULL());
    // Prepare the transaction.
    invariantWTOK(s->prepare_transaction(s, conf.c_str()));
}