Пример #1
0
PlanStage::StageState MultiIteratorStage::work(WorkingSetID* out) {
    if (_collection == NULL)
        return PlanStage::DEAD;

    // The RecordId we're about to look at it might not be in memory. In this case
    // we request a yield while we fetch the document.
    if (!_iterators.empty()) {
        RecordId curr = _iterators.back()->curr();
        if (!curr.isNull()) {
            std::auto_ptr<RecordFetcher> fetcher(_collection->documentNeedsFetch(_txn, curr));
            if (NULL != fetcher.get()) {
                WorkingSetMember* member = _ws->get(_wsidForFetch);
                member->loc = curr;
                // Pass the RecordFetcher off to the WSM on which we're performing the fetch.
                member->setFetcher(fetcher.release());
                *out = _wsidForFetch;
                return NEED_FETCH;
            }
        }
    }

    RecordId next = _advance();
    if (next.isNull())
        return PlanStage::IS_EOF;

    *out = _ws->allocate();
    WorkingSetMember* member = _ws->get(*out);
    member->loc = next;
    member->obj = _collection->docFor(_txn, next);
    member->state = WorkingSetMember::LOC_AND_OBJ;
    return PlanStage::ADVANCED;
}
Пример #2
0
    PlanStage::StageState OplogStart::workExtentHopping(WorkingSetID* out) {
        if (_done || _subIterators.empty()) {
            return PlanStage::IS_EOF;
        }

        // we work from the back to the front since the back has the newest data.
        const RecordId loc = _subIterators.back()->curr();
        if (loc.isNull()) return PlanStage::NEED_TIME;

        // TODO: should we ever try and return NEED_FETCH here?
        const BSONObj obj = _subIterators.back()->dataFor(loc).releaseToBson();
        if (!_filter->matchesBSON(obj)) {
            _done = true;
            WorkingSetID id = _workingSet->allocate();
            WorkingSetMember* member = _workingSet->get(id);
            member->loc = loc;
            member->obj = Snapshotted<BSONObj>(_txn->recoveryUnit()->getSnapshotId(), obj);
            member->state = WorkingSetMember::LOC_AND_UNOWNED_OBJ;
            *out = id;
            return PlanStage::ADVANCED;
        }

        _subIterators.popAndDeleteBack();
        return PlanStage::NEED_TIME;
    }
Пример #3
0
bool Helpers::findById(OperationContext* opCtx,
                       Database* database,
                       StringData ns,
                       BSONObj query,
                       BSONObj& result,
                       bool* nsFound,
                       bool* indexFound) {
    invariant(database);

    Collection* collection = database->getCollection(opCtx, ns);
    if (!collection) {
        return false;
    }

    if (nsFound)
        *nsFound = true;

    IndexCatalog* catalog = collection->getIndexCatalog();
    const IndexDescriptor* desc = catalog->findIdIndex(opCtx);

    if (!desc)
        return false;

    if (indexFound)
        *indexFound = 1;

    RecordId loc = catalog->getIndex(desc)->findSingle(opCtx, query["_id"].wrap());
    if (loc.isNull())
        return false;
    result = collection->docFor(opCtx, loc).value();
    return true;
}
Пример #4
0
    boost::optional<Record> next() final {
        if (_eof)
            return {};

        WT_CURSOR* c = _cursor->get();

        bool mustAdvance = true;
        if (_lastReturnedId.isNull() && !_forward && _rs._isCapped) {
            // In this case we need to seek to the highest visible record.
            const RecordId reverseCappedInitialSeekPoint =
                _readUntilForOplog.isNull() ? _rs.lowestCappedHiddenRecord() : _readUntilForOplog;

            if (!reverseCappedInitialSeekPoint.isNull()) {
                c->set_key(c, _makeKey(reverseCappedInitialSeekPoint));
                int cmp;
                int seekRet = WT_OP_CHECK(c->search_near(c, &cmp));
                if (seekRet == WT_NOTFOUND) {
                    _eof = true;
                    return {};
                }
                invariantWTOK(seekRet);

                // If we landed at or past the lowest hidden record, we must advance to be in
                // the visible range.
                mustAdvance = _rs.isCappedHidden(reverseCappedInitialSeekPoint)
                    ? (cmp >= 0)
                    : (cmp > 0);  // No longer hidden.
            }
        }

        if (mustAdvance) {
            // Nothing after the next line can throw WCEs.
            // Note that an unpositioned (or eof) WT_CURSOR returns the first/last entry in the
            // table when you call next/prev.
            int advanceRet = WT_OP_CHECK(_forward ? c->next(c) : c->prev(c));
            if (advanceRet == WT_NOTFOUND) {
                _eof = true;
                return {};
            }
            invariantWTOK(advanceRet);
        }

        int64_t key;
        invariantWTOK(c->get_key(c, &key));
        const RecordId id = _fromKey(key);

        if (!isVisible(id)) {
            _eof = true;
            return {};
        }

        WT_ITEM value;
        invariantWTOK(c->get_value(c, &value));

        _lastReturnedId = id;
        return {{id, {static_cast<const char*>(value.data), static_cast<int>(value.size)}}};
    }
Пример #5
0
 int nExtents() const {
     int count = 0;
     for ( RecordId extLoc = nsd()->firstExtent();
             !extLoc.isNull();
             extLoc = extentManager()->getExtent(extLoc)->xnext ) {
         ++count;
     }
     return count;
 }
Пример #6
0
RecordId MultiIteratorStage::_advance() {
    while (!_iterators.empty()) {
        RecordId out = _iterators.back()->getNext();
        if (!out.isNull())
            return out;

        _iterators.popAndDeleteBack();
    }

    return RecordId();
}
Пример #7
0
 /* fetch a single object from collection ns that matches query
    set your db SavedContext first
 */
 bool Helpers::findOne(OperationContext* txn,
                       Collection* collection, 
                       const BSONObj &query, 
                       BSONObj& result, 
                       bool requireIndex) {
     RecordId loc = findOne( txn, collection, query, requireIndex );
     if ( loc.isNull() )
         return false;
     result = collection->docFor(txn, loc).value();
     return true;
 }
Пример #8
0
void ReplicationRecoveryImpl::_truncateOplogTo(OperationContext* opCtx,
                                               Timestamp truncateTimestamp) {
    Timer timer;
    const NamespaceString oplogNss(NamespaceString::kRsOplogNamespace);
    AutoGetDb autoDb(opCtx, oplogNss.db(), MODE_IX);
    Lock::CollectionLock oplogCollectionLoc(opCtx->lockState(), oplogNss.ns(), MODE_X);
    Collection* oplogCollection = autoDb.getDb()->getCollection(opCtx, oplogNss);
    if (!oplogCollection) {
        fassertFailedWithStatusNoTrace(
            34418,
            Status(ErrorCodes::NamespaceNotFound,
                   str::stream() << "Can't find " << NamespaceString::kRsOplogNamespace.ns()));
    }

    // Scan through oplog in reverse, from latest entry to first, to find the truncateTimestamp.
    RecordId oldestIDToDelete;  // Non-null if there is something to delete.
    auto oplogRs = oplogCollection->getRecordStore();
    auto oplogReverseCursor = oplogRs->getCursor(opCtx, /*forward=*/false);
    size_t count = 0;
    while (auto next = oplogReverseCursor->next()) {
        const BSONObj entry = next->data.releaseToBson();
        const RecordId id = next->id;
        count++;

        const auto tsElem = entry["ts"];
        if (count == 1) {
            if (tsElem.eoo())
                LOG(2) << "Oplog tail entry: " << redact(entry);
            else
                LOG(2) << "Oplog tail entry ts field: " << tsElem;
        }

        if (tsElem.timestamp() < truncateTimestamp) {
            // If count == 1, that means that we have nothing to delete because everything in the
            // oplog is < truncateTimestamp.
            if (count != 1) {
                invariant(!oldestIDToDelete.isNull());
                oplogCollection->cappedTruncateAfter(opCtx, oldestIDToDelete, /*inclusive=*/true);
            }
            log() << "Replication recovery oplog truncation finished in: " << timer.millis()
                  << "ms";
            return;
        }

        oldestIDToDelete = id;
    }

    severe() << "Reached end of oplog looking for oplog entry before " << truncateTimestamp.toBSON()
             << " but couldn't find any after looking through " << count << " entries.";
    fassertFailedNoTrace(40296);
}
Пример #9
0
PlanStage::StageState IDHackStage::doWork(WorkingSetID* out) {
    if (_done) {
        return PlanStage::IS_EOF;
    }

    WorkingSetID id = WorkingSet::INVALID_ID;
    try {
        // Look up the key by going directly to the index.
        RecordId recordId = indexAccessMethod()->findSingle(getOpCtx(), _key);

        // Key not found.
        if (recordId.isNull()) {
            _done = true;
            return PlanStage::IS_EOF;
        }

        ++_specificStats.keysExamined;
        ++_specificStats.docsExamined;

        // Create a new WSM for the result document.
        id = _workingSet->allocate();
        WorkingSetMember* member = _workingSet->get(id);
        member->recordId = recordId;
        _workingSet->transitionToRecordIdAndIdx(id);

        if (!_recordCursor)
            _recordCursor = collection()->getCursor(getOpCtx());

        // Find the document associated with 'id' in the collection's record store.
        if (!WorkingSetCommon::fetch(getOpCtx(), _workingSet, id, _recordCursor)) {
            // We didn't find a document with RecordId 'id'.
            _workingSet->free(id);
            _commonStats.isEOF = true;
            _done = true;
            return IS_EOF;
        }

        return advance(id, member, out);
    } catch (const WriteConflictException&) {
        // Restart at the beginning on retry.
        _recordCursor.reset();
        if (id != WorkingSet::INVALID_ID)
            _workingSet->free(id);

        *out = WorkingSet::INVALID_ID;
        return NEED_YIELD;
    }
}
bool TerarkDbRecordStore::findRecord(OperationContext* txn,
								   const RecordId& id,
								   RecordData* out) const {
	if (id.isNull())
		return false;
    llong recIdx = id.repr() - 1;
	CompositeTable* tab = m_table->m_tab.get();
    auto& td = m_table->getMyThreadData();
    tab->getValue(recIdx, &td.m_buf, &*td.m_dbCtx);
    SharedBuffer bson = td.m_coder.decode(&tab->rowSchema(), td.m_buf);

//  size_t bufsize = sizeof(SharedBuffer::Holder) + bson.objsize();
    int bufsize = ConstDataView(bson.get()).read<LittleEndian<int>>();
    *out = RecordData(bson, bufsize);
    return true;
}
Пример #11
0
 int nRecords() const {
     int count = 0;
     const Extent* ext;
     for ( RecordId extLoc = nsd()->firstExtent();
             !extLoc.isNull();
             extLoc = ext->xnext) {
         ext = extentManager()->getExtent(extLoc);
         int fileNo = ext->firstRecord.a();
         if ( fileNo == -1 )
             continue;
         for ( int recOfs = ext->firstRecord.getOfs(); recOfs != RecordId::NullOfs;
               recOfs = recordStore()->recordFor(RecordId(fileNo, recOfs))->nextOfs() ) {
             ++count;
         }
     }
     ASSERT_EQUALS( count, nsd()->numRecords() );
     return count;
 }
Пример #12
0
 InMemoryRecordIterator::InMemoryRecordIterator(OperationContext* txn,
                                                const InMemoryRecordStore::Records& records,
                                                const InMemoryRecordStore& rs,
                                                RecordId start,
                                                bool tailable)
         : _txn(txn),
           _tailable(tailable),
           _lastLoc(RecordId::min()),
           _killedByInvalidate(false),
           _records(records),
           _rs(rs) {
     if (start.isNull()) {
         _it = _records.begin();
     }
     else {
         _it = _records.find(start);
         invariant(_it != _records.end());
     }
 }
CollectionOptions MMAPV1DatabaseCatalogEntry::getCollectionOptions(OperationContext* txn,
                                                                   RecordId rid) const {
    CollectionOptions options;

    if (rid.isNull()) {
        return options;
    }

    RecordStoreV1Base* rs = _getNamespaceRecordStore();
    invariant(rs);

    RecordData data;
    invariant(rs->findRecord(txn, rid, &data));

    if (data.releaseToBson()["options"].isABSONObj()) {
        Status status = options.parse(data.releaseToBson()["options"].Obj());
        fassert(18523, status);
    }
    return options;
}
Пример #14
0
    InMemoryRecordReverseIterator::InMemoryRecordReverseIterator(
            OperationContext* txn,
            const InMemoryRecordStore::Records& records,
            const InMemoryRecordStore& rs,
            RecordId start) : _txn(txn),
                             _killedByInvalidate(false),
                             _records(records),
                             _rs(rs) {

        if (start.isNull()) {
            _it = _records.rbegin();
        }
        else {
            // The reverse iterator will point to the preceding element, so we
            // increment the base iterator to make it point past the found element
            InMemoryRecordStore::Records::const_iterator baseIt(++_records.find(start));
            _it = InMemoryRecordStore::Records::const_reverse_iterator(baseIt);
            invariant(_it != _records.rend());
        }
    }
Пример #15
0
            void run() {
                create();
                nsd()->deletedListEntry( 2 ) = nsd()->cappedListOfAllDeletedRecords().drec()->nextDeleted().drec()->nextDeleted();
                nsd()->cappedListOfAllDeletedRecords().drec()->nextDeleted().drec()->nextDeleted().writing() = RecordId();
                nsd()->cappedLastDelRecLastExtent().Null();
                NamespaceDetails *d = nsd();

                zero( &d->capExtent() );
                zero( &d->capFirstNewRecord() );

                // this has a side effect of called NamespaceDetails::cappedCheckMigrate
                db()->namespaceIndex().details( ns() );

                ASSERT( nsd()->firstExtent() == nsd()->capExtent() );
                ASSERT( nsd()->capExtent().getOfs() != 0 );
                ASSERT( !nsd()->capFirstNewRecord().isValid() );
                int nDeleted = 0;
                for ( RecordId i = nsd()->cappedListOfAllDeletedRecords(); !i.isNull(); i = i.drec()->nextDeleted(), ++nDeleted );
                ASSERT_EQUALS( 10, nDeleted );
                ASSERT( nsd()->cappedLastDelRecLastExtent().isNull() );
            }
Пример #16
0
    PlanStage::StageState OplogStart::workExtentHopping(WorkingSetID* out) {
        if (_done || _subIterators.empty()) {
            return PlanStage::IS_EOF;
        }

        // we work from the back to the front since the back has the newest data.
        const RecordId loc = _subIterators.back()->getNext();
        _subIterators.popAndDeleteBack();

        // TODO: should we ever try and return NEED_FETCH here?
        if (!loc.isNull() && !_filter->matchesBSON(_collection->docFor(_txn, loc))) {
            _done = true;
            WorkingSetID id = _workingSet->allocate();
            WorkingSetMember* member = _workingSet->get(id);
            member->loc = loc;
            member->obj = _collection->docFor(_txn, member->loc);
            member->state = WorkingSetMember::LOC_AND_UNOWNED_OBJ;
            *out = id;
            return PlanStage::ADVANCED;
        }

        return PlanStage::NEED_TIME;
    }
Пример #17
0
    bool Helpers::findById(OperationContext* txn,
                           Database* database,
                           const char *ns,
                           BSONObj query,
                           BSONObj& result,
                           bool* nsFound,
                           bool* indexFound) {

        invariant(database);

        Collection* collection = database->getCollection( ns );
        if ( !collection ) {
            return false;
        }

        if ( nsFound )
            *nsFound = true;

        IndexCatalog* catalog = collection->getIndexCatalog();
        const IndexDescriptor* desc = catalog->findIdIndex( txn );

        if ( !desc )
            return false;

        if ( indexFound )
            *indexFound = 1;

        // See SERVER-12397.  This may not always be true.
        BtreeBasedAccessMethod* accessMethod =
            static_cast<BtreeBasedAccessMethod*>(catalog->getIndex( desc ));

        RecordId loc = accessMethod->findSingle( txn, query["_id"].wrap() );
        if ( loc.isNull() )
            return false;
        result = collection->docFor(txn, loc).value();
        return true;
    }
Пример #18
0
PlanStage::StageState IDHackStage::doWork(WorkingSetID* out) {
    if (_done) {
        return PlanStage::IS_EOF;
    }

    if (WorkingSet::INVALID_ID != _idBeingPagedIn) {
        invariant(_recordCursor);
        WorkingSetID id = _idBeingPagedIn;
        _idBeingPagedIn = WorkingSet::INVALID_ID;

        invariant(WorkingSetCommon::fetchIfUnfetched(getOpCtx(), _workingSet, id, _recordCursor));

        WorkingSetMember* member = _workingSet->get(id);
        return advance(id, member, out);
    }

    WorkingSetID id = WorkingSet::INVALID_ID;
    try {
        // Look up the key by going directly to the index.
        RecordId recordId = _accessMethod->findSingle(getOpCtx(), _key);

        // Key not found.
        if (recordId.isNull()) {
            _done = true;
            return PlanStage::IS_EOF;
        }

        ++_specificStats.keysExamined;
        ++_specificStats.docsExamined;

        // Create a new WSM for the result document.
        id = _workingSet->allocate();
        WorkingSetMember* member = _workingSet->get(id);
        member->recordId = recordId;
        _workingSet->transitionToRecordIdAndIdx(id);

        if (!_recordCursor)
            _recordCursor = _collection->getCursor(getOpCtx());

        // We may need to request a yield while we fetch the document.
        if (auto fetcher = _recordCursor->fetcherForId(recordId)) {
            // There's something to fetch. Hand the fetcher off to the WSM, and pass up a
            // fetch request.
            _idBeingPagedIn = id;
            member->setFetcher(fetcher.release());
            *out = id;
            return NEED_YIELD;
        }

        // The doc was already in memory, so we go ahead and return it.
        if (!WorkingSetCommon::fetch(getOpCtx(), _workingSet, id, _recordCursor)) {
            // _id is immutable so the index would return the only record that could
            // possibly match the query.
            _workingSet->free(id);
            _commonStats.isEOF = true;
            _done = true;
            return IS_EOF;
        }

        return advance(id, member, out);
    } catch (const WriteConflictException& wce) {
        // Restart at the beginning on retry.
        _recordCursor.reset();
        if (id != WorkingSet::INVALID_ID)
            _workingSet->free(id);

        *out = WorkingSet::INVALID_ID;
        return NEED_YIELD;
    }
}
Пример #19
0
    void syncFixUp(OperationContext* txn,
                   FixUpInfo& fixUpInfo,
                   OplogReader* oplogreader,
                   ReplicationCoordinator* replCoord) {
        DBClientConnection* them = oplogreader->conn();

        // fetch all first so we needn't handle interruption in a fancy way

        unsigned long long totalSize = 0;

        list< pair<DocID, BSONObj> > goodVersions;

        BSONObj newMinValid;

        // fetch all the goodVersions of each document from current primary
        DocID doc;
        unsigned long long numFetched = 0;
        try {
            for (set<DocID>::iterator it = fixUpInfo.toRefetch.begin();
                    it != fixUpInfo.toRefetch.end();
                    it++) {
                doc = *it;

                verify(!doc._id.eoo());

                {
                    // TODO : slow.  lots of round trips.
                    numFetched++;
                    BSONObj good = them->findOne(doc.ns, doc._id.wrap(),
                                                     NULL, QueryOption_SlaveOk).getOwned();
                    totalSize += good.objsize();
                    uassert(13410, "replSet too much data to roll back",
                            totalSize < 300 * 1024 * 1024);

                    // note good might be eoo, indicating we should delete it
                    goodVersions.push_back(pair<DocID, BSONObj>(doc,good));
                }
            }
            newMinValid = oplogreader->getLastOp(rsOplogName);
            if (newMinValid.isEmpty()) {
                error() << "rollback error newMinValid empty?";
                return;
            }
        }
        catch (DBException& e) {
            LOG(1) << "rollback re-get objects: " << e.toString();
            error() << "rollback couldn't re-get ns:" << doc.ns << " _id:" << doc._id << ' '
                    << numFetched << '/' << fixUpInfo.toRefetch.size();
            throw e;
        }

        log() << "rollback 3.5";
        if (fixUpInfo.rbid != getRBID(oplogreader->conn())) {
            // our source rolled back itself.  so the data we received isn't necessarily consistent.
            warning() << "rollback rbid on source changed during rollback, cancelling this attempt";
            return;
        }

        // update them
        log() << "rollback 4 n:" << goodVersions.size();

        bool warn = false;

        invariant(!fixUpInfo.commonPointOurDiskloc.isNull());
        invariant(txn->lockState()->isW());

        // we have items we are writing that aren't from a point-in-time.  thus best not to come
        // online until we get to that point in freshness.
        Timestamp minValid = newMinValid["ts"].timestamp();
        log() << "minvalid=" << minValid.toStringLong();
        setMinValid(txn, minValid);

        // any full collection resyncs required?
        if (!fixUpInfo.collectionsToResyncData.empty()
                || !fixUpInfo.collectionsToResyncMetadata.empty()) {

            for (const string& ns : fixUpInfo.collectionsToResyncData) {
                log() << "rollback 4.1.1 coll resync " << ns;

                fixUpInfo.collectionsToResyncMetadata.erase(ns);

                const NamespaceString nss(ns);

                Database* db = dbHolder().openDb(txn, nss.db().toString());
                invariant(db);

                {
                    WriteUnitOfWork wunit(txn);
                    db->dropCollection(txn, ns);
                    wunit.commit();
                }

                {
                    string errmsg;

                    // This comes as a GlobalWrite lock, so there is no DB to be acquired after
                    // resume, so we can skip the DB stability checks. Also 
                    // copyCollectionFromRemote will acquire its own database pointer, under the
                    // appropriate locks, so just releasing and acquiring the lock is safe.
                    invariant(txn->lockState()->isW());
                    Lock::TempRelease release(txn->lockState());

                    bool ok = copyCollectionFromRemote(txn, them->getServerAddress(), ns, errmsg);
                    uassert(15909, str::stream() << "replSet rollback error resyncing collection "
                                                 << ns << ' ' << errmsg, ok);
                }
            }

            for (const string& ns : fixUpInfo.collectionsToResyncMetadata) {
                log() << "rollback 4.1.2 coll metadata resync " << ns;

                const NamespaceString nss(ns);
                auto db = dbHolder().openDb(txn, nss.db().toString());
                invariant(db);
                auto collection = db->getCollection(ns);
                invariant(collection);
                auto cce = collection->getCatalogEntry();

                const std::list<BSONObj> info =
                    them->getCollectionInfos(nss.db().toString(), BSON("name" << nss.coll()));

                if (info.empty()) {
                    // Collection dropped by "them" so we should drop it too.
                    log() << ns << " not found on remote host, dropping";
                    fixUpInfo.toDrop.insert(ns);
                    continue;
                }

                invariant(info.size() == 1);

                CollectionOptions options;
                auto status = options.parse(info.front());
                if (!status.isOK()) {
                    throw RSFatalException(str::stream() << "Failed to parse options "
                                                         << info.front() << ": "
                                                         << status.toString());
                }

                WriteUnitOfWork wuow(txn);
                if (options.flagsSet || cce->getCollectionOptions(txn).flagsSet) {
                    cce->updateFlags(txn, options.flags);
                }

                status = collection->setValidator(txn, options.validator);
                if (!status.isOK()) {
                    throw RSFatalException(str::stream() << "Failed to set validator: "
                                                         << status.toString());
                }
                wuow.commit();
            }

            // we did more reading from primary, so check it again for a rollback (which would mess
            // us up), and make minValid newer.
            log() << "rollback 4.2";

            string err;
            try {
                newMinValid = oplogreader->getLastOp(rsOplogName);
                if (newMinValid.isEmpty()) {
                    err = "can't get minvalid from sync source";
                }
                else {
                    Timestamp minValid = newMinValid["ts"].timestamp();
                    log() << "minvalid=" << minValid.toStringLong();
                    setMinValid(txn, minValid);
                }
            }
            catch (DBException& e) {
                err = "can't get/set minvalid: ";
                err += e.what();
            }
            if (fixUpInfo.rbid != getRBID(oplogreader->conn())) {
                // our source rolled back itself.  so the data we received isn't necessarily
                // consistent. however, we've now done writes.  thus we have a problem.
                err += "rbid at primary changed during resync/rollback";
            }
            if (!err.empty()) {
                severe() << "rolling back : " << err
                        << ". A full resync will be necessary.";
                // TODO: reset minvalid so that we are permanently in fatal state
                // TODO: don't be fatal, but rather, get all the data first.
                throw RSFatalException();
            }
            log() << "rollback 4.3";
        }

        map<string,shared_ptr<Helpers::RemoveSaver> > removeSavers;

        log() << "rollback 4.6";
        // drop collections to drop before doing individual fixups - that might make things faster
        // below actually if there were subsequent inserts to rollback
        for (set<string>::iterator it = fixUpInfo.toDrop.begin();
                it != fixUpInfo.toDrop.end();
                it++) {
            log() << "rollback drop: " << *it;

            Database* db = dbHolder().get(txn, nsToDatabaseSubstring(*it));
            if (db) {
                WriteUnitOfWork wunit(txn);

                shared_ptr<Helpers::RemoveSaver>& removeSaver = removeSavers[*it];
                if (!removeSaver)
                    removeSaver.reset(new Helpers::RemoveSaver("rollback", "", *it));

                // perform a collection scan and write all documents in the collection to disk
                boost::scoped_ptr<PlanExecutor> exec(
                        InternalPlanner::collectionScan(txn,
                                                        *it,
                                                        db->getCollection(*it)));
                BSONObj curObj;
                PlanExecutor::ExecState execState;
                while (PlanExecutor::ADVANCED == (execState = exec->getNext(&curObj, NULL))) {
                    removeSaver->goingToDelete(curObj);
                }
                if (execState != PlanExecutor::IS_EOF) {
                    if (execState == PlanExecutor::FAILURE &&
                            WorkingSetCommon::isValidStatusMemberObject(curObj)) {
                        Status errorStatus = WorkingSetCommon::getMemberObjectStatus(curObj);
                        severe() << "rolling back createCollection on " << *it
                                 << " failed with " << errorStatus
                                 << ". A full resync is necessary.";
                    }
                    else {
                        severe() << "rolling back createCollection on " << *it
                                 << " failed. A full resync is necessary.";
                    }
                            
                    throw RSFatalException();
                }

                db->dropCollection(txn, *it);
                wunit.commit();
            }
        }

        log() << "rollback 4.7";
        OldClientContext ctx(txn, rsOplogName);
        Collection* oplogCollection = ctx.db()->getCollection(rsOplogName);
        uassert(13423,
                str::stream() << "replSet error in rollback can't find " << rsOplogName,
                oplogCollection);

        unsigned deletes = 0, updates = 0;
        time_t lastProgressUpdate = time(0);
        time_t progressUpdateGap = 10;
        for (list<pair<DocID, BSONObj> >::iterator it = goodVersions.begin();
                it != goodVersions.end();
                it++) {
            time_t now = time(0);
            if (now - lastProgressUpdate > progressUpdateGap) {
                log() << deletes << " delete and "
                      << updates << " update operations processed out of "
                      << goodVersions.size() << " total operations";
                lastProgressUpdate = now;
            }
            const DocID& doc = it->first;
            BSONObj pattern = doc._id.wrap(); // { _id : ... }
            try {
                verify(doc.ns && *doc.ns);
                if (fixUpInfo.collectionsToResyncData.count(doc.ns)) {
                    // we just synced this entire collection
                    continue;
                }

                // keep an archive of items rolled back
                shared_ptr<Helpers::RemoveSaver>& removeSaver = removeSavers[doc.ns];
                if (!removeSaver)
                    removeSaver.reset(new Helpers::RemoveSaver("rollback", "", doc.ns));

                // todo: lots of overhead in context, this can be faster
                OldClientContext ctx(txn, doc.ns);

                // Add the doc to our rollback file
                BSONObj obj;
                Collection* collection = ctx.db()->getCollection(doc.ns);

                // Do not log an error when undoing an insert on a no longer existent collection.
                // It is likely that the collection was dropped as part of rolling back a
                // createCollection command and regardless, the document no longer exists.
                if (collection) {
                    bool found = Helpers::findOne(txn, collection, pattern, obj, false);
                    if (found) {
                        removeSaver->goingToDelete(obj);
                    }
                    else {
                        error() << "rollback cannot find object: " << pattern
                                << " in namespace " << doc.ns;
                    }
                }

                if (it->second.isEmpty()) {
                    // wasn't on the primary; delete.
                    // TODO 1.6 : can't delete from a capped collection.  need to handle that here.
                    deletes++;

                    if (collection) {
                        if (collection->isCapped()) {
                            // can't delete from a capped collection - so we truncate instead. if
                            // this item must go, so must all successors!!!
                            try {
                                // TODO: IIRC cappedTruncateAfter does not handle completely empty.
                                // this will crazy slow if no _id index.
                                long long start = Listener::getElapsedTimeMillis();
                                RecordId loc = Helpers::findOne(txn, collection, pattern, false);
                                if (Listener::getElapsedTimeMillis() - start > 200)
                                    warning() << "roll back slow no _id index for "
                                          << doc.ns << " perhaps?";
                                // would be faster but requires index:
                                // RecordId loc = Helpers::findById(nsd, pattern);
                                if (!loc.isNull()) {
                                    try {
                                        collection->temp_cappedTruncateAfter(txn, loc, true);
                                    }
                                    catch (DBException& e) {
                                        if (e.getCode() == 13415) {
                                            // hack: need to just make cappedTruncate do this...
                                            MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
                                                WriteUnitOfWork wunit(txn);
                                                uassertStatusOK(collection->truncate(txn));
                                                wunit.commit();
                                            } MONGO_WRITE_CONFLICT_RETRY_LOOP_END(
                                                                            txn,
                                                                            "truncate",
                                                                            collection->ns().ns());
                                        }
                                        else {
                                            throw e;
                                        }
                                    }
                                }
                            }
                            catch (DBException& e) {
                                error() << "rolling back capped collection rec "
                                      << doc.ns << ' ' << e.toString();
                            }
                        }
                        else {
Пример #20
0
PlanStage::StageState IDHackStage::work(WorkingSetID* out) {
    ++_commonStats.works;

    // Adds the amount of time taken by work() to executionTimeMillis.
    ScopedTimer timer(&_commonStats.executionTimeMillis);

    if (_done) {
        return PlanStage::IS_EOF;
    }

    if (WorkingSet::INVALID_ID != _idBeingPagedIn) {
        invariant(_recordCursor);
        WorkingSetID id = _idBeingPagedIn;
        _idBeingPagedIn = WorkingSet::INVALID_ID;
        WorkingSetMember* member = _workingSet->get(id);

        invariant(WorkingSetCommon::fetchIfUnfetched(_txn, member, _recordCursor));

        return advance(id, member, out);
    }

    WorkingSetID id = WorkingSet::INVALID_ID;
    try {
        // Use the index catalog to get the id index.
        const IndexCatalog* catalog = _collection->getIndexCatalog();

        // Find the index we use.
        IndexDescriptor* idDesc = catalog->findIdIndex(_txn);
        if (NULL == idDesc) {
            _done = true;
            return PlanStage::IS_EOF;
        }

        // Look up the key by going directly to the index.
        RecordId loc = catalog->getIndex(idDesc)->findSingle(_txn, _key);

        // Key not found.
        if (loc.isNull()) {
            _done = true;
            return PlanStage::IS_EOF;
        }

        ++_specificStats.keysExamined;
        ++_specificStats.docsExamined;

        // Create a new WSM for the result document.
        id = _workingSet->allocate();
        WorkingSetMember* member = _workingSet->get(id);
        member->state = WorkingSetMember::LOC_AND_IDX;
        member->loc = loc;

        if (!_recordCursor)
            _recordCursor = _collection->getCursor(_txn);

        // We may need to request a yield while we fetch the document.
        if (auto fetcher = _recordCursor->fetcherForId(loc)) {
            // There's something to fetch. Hand the fetcher off to the WSM, and pass up a
            // fetch request.
            _idBeingPagedIn = id;
            member->setFetcher(fetcher.release());
            *out = id;
            _commonStats.needYield++;
            return NEED_YIELD;
        }

        // The doc was already in memory, so we go ahead and return it.
        if (!WorkingSetCommon::fetch(_txn, member, _recordCursor)) {
            // _id is immutable so the index would return the only record that could
            // possibly match the query.
            _workingSet->free(id);
            _commonStats.isEOF = true;
            _done = true;
            return IS_EOF;
        }

        return advance(id, member, out);
    } catch (const WriteConflictException& wce) {
        // Restart at the beginning on retry.
        _recordCursor.reset();
        if (id != WorkingSet::INVALID_ID)
            _workingSet->free(id);

        *out = WorkingSet::INVALID_ID;
        _commonStats.needYield++;
        return NEED_YIELD;
    }
}
Пример #21
0
    PlanStage::StageState IDHackStage::work(WorkingSetID* out) {
        ++_commonStats.works;

        // Adds the amount of time taken by work() to executionTimeMillis.
        ScopedTimer timer(&_commonStats.executionTimeMillis);

        if (_done) { return PlanStage::IS_EOF; }

        if (WorkingSet::INVALID_ID != _idBeingPagedIn) {
            WorkingSetID id = _idBeingPagedIn;
            _idBeingPagedIn = WorkingSet::INVALID_ID;
            WorkingSetMember* member = _workingSet->get(id);

            WorkingSetCommon::completeFetch(_txn, member, _collection);

            return advance(id, member, out);
        }

        // Use the index catalog to get the id index.
        const IndexCatalog* catalog = _collection->getIndexCatalog();

        // Find the index we use.
        IndexDescriptor* idDesc = catalog->findIdIndex(_txn);
        if (NULL == idDesc) {
            _done = true;
            return PlanStage::IS_EOF;
        }

        // This may not be valid always.  See SERVER-12397.
        const BtreeBasedAccessMethod* accessMethod =
            static_cast<const BtreeBasedAccessMethod*>(catalog->getIndex(idDesc));

        // Look up the key by going directly to the Btree.
        RecordId loc = accessMethod->findSingle(_txn, _key);

        // Key not found.
        if (loc.isNull()) {
            _done = true;
            return PlanStage::IS_EOF;
        }

        ++_specificStats.keysExamined;
        ++_specificStats.docsExamined;

        // Create a new WSM for the result document.
        WorkingSetID id = _workingSet->allocate();
        WorkingSetMember* member = _workingSet->get(id);
        member->loc = loc;
        member->state = WorkingSetMember::LOC_AND_UNOWNED_OBJ;

        // We may need to request a yield while we fetch the document.
        std::auto_ptr<RecordFetcher> fetcher(_collection->documentNeedsFetch(_txn, loc));
        if (NULL != fetcher.get()) {
            // There's something to fetch. Hand the fetcher off to the WSM, and pass up a
            // fetch request.
            _idBeingPagedIn = id;
            member->setFetcher(fetcher.release());
            *out = id;
            _commonStats.needFetch++;
            return NEED_FETCH;
        }

        // The doc was already in memory, so we go ahead and return it.
        member->obj = _collection->docFor(_txn, member->loc);
        return advance(id, member, out);
    }
Status MMAPV1DatabaseCatalogEntry::_renameSingleNamespace(OperationContext* txn,
                                                          StringData fromNS,
                                                          StringData toNS,
                                                          bool stayTemp) {
    // some sanity checking
    NamespaceDetails* fromDetails = _namespaceIndex.details(fromNS);
    if (!fromDetails)
        return Status(ErrorCodes::BadValue, "from namespace doesn't exist");

    if (_namespaceIndex.details(toNS))
        return Status(ErrorCodes::BadValue, "to namespace already exists");

    // at this point, we haven't done anything destructive yet

    // ----
    // actually start moving
    // ----

    // this could throw, but if it does we're ok
    _namespaceIndex.add_ns(txn, toNS, fromDetails);
    NamespaceDetails* toDetails = _namespaceIndex.details(toNS);

    try {
        toDetails->copyingFrom(txn, toNS, _namespaceIndex, fromDetails);  // fixes extraOffset
    } catch (DBException&) {
        // could end up here if .ns is full - if so try to clean up / roll back a little
        _namespaceIndex.kill_ns(txn, toNS);
        throw;
    }

    // at this point, code .ns stuff moved

    _namespaceIndex.kill_ns(txn, fromNS);
    fromDetails = NULL;

    // fix system.namespaces
    BSONObj newSpec;
    RecordId oldSpecLocation = getCollectionCatalogEntry(fromNS)->getNamespacesRecordId();
    invariant(!oldSpecLocation.isNull());
    {
        BSONObj oldSpec = _getNamespaceRecordStore()->dataFor(txn, oldSpecLocation).releaseToBson();
        invariant(!oldSpec.isEmpty());

        BSONObjBuilder b;
        BSONObjIterator i(oldSpec.getObjectField("options"));
        while (i.more()) {
            BSONElement e = i.next();
            if (strcmp(e.fieldName(), "create") != 0) {
                if (stayTemp || (strcmp(e.fieldName(), "temp") != 0))
                    b.append(e);
            } else {
                b << "create" << toNS;
            }
        }
        newSpec = b.obj();
    }

    RecordId rid = _addNamespaceToNamespaceCollection(txn, toNS, newSpec.isEmpty() ? 0 : &newSpec);

    _getNamespaceRecordStore()->deleteRecord(txn, oldSpecLocation);

    Entry*& entry = _collections[toNS.toString()];
    invariant(entry == NULL);
    txn->recoveryUnit()->registerChange(new EntryInsertion(toNS, this));
    entry = new Entry();
    _removeFromCache(txn->recoveryUnit(), fromNS);
    _insertInCache(txn, toNS, rid, entry);

    return Status::OK();
}
Пример #23
0
            void pass(int p) {
                OperationContextImpl txn;
                create();
                ASSERT_EQUALS( 2, nExtents() );

                BSONObj b = bigObj();

                int N = MinExtentSize / b.objsize() * nExtents() + 5;
                int T = N - 4;

                RecordId truncAt;
                //RecordId l[ 8 ];
                for ( int i = 0; i < N; ++i ) {
                    BSONObj bb = bigObj();
                    StatusWith<RecordId> status = collection()->insertDocument( &txn, bb, true );
                    ASSERT( status.isOK() );
                    RecordId a = status.getValue();
                    if( T == i )
                        truncAt = a;
                    ASSERT( !a.isNull() );
                    /*ASSERT_EQUALS( i < 2 ? i + 1 : 3 + i % 2, nRecords() );
                    if ( i > 3 )
                        ASSERT( l[ i ] == l[ i - 4 ] );*/
                }
                ASSERT( nRecords() < N );

                RecordId last, first;
                {
                    auto_ptr<Runner> runner(InternalPlanner::collectionScan(&txn,
                                                                            ns(),
                                                                            collection(),
                                                                            InternalPlanner::BACKWARD));
                    runner->getNext(NULL, &last);
                    ASSERT( !last.isNull() );
                }
                {
                    auto_ptr<Runner> runner(InternalPlanner::collectionScan(&txn,
                                                                            ns(),
                                                                            collection(),
                                                                            InternalPlanner::FORWARD));
                    runner->getNext(NULL, &first);
                    ASSERT( !first.isNull() );
                    ASSERT( first != last ) ;
                }

                collection()->temp_cappedTruncateAfter(&txn, truncAt, false);
                ASSERT_EQUALS( collection()->numRecords() , 28u );

                {
                    RecordId loc;
                    auto_ptr<Runner> runner(InternalPlanner::collectionScan(&txn,
                                                                            ns(),
                                                                            collection(),
                                                                            InternalPlanner::FORWARD));
                    runner->getNext(NULL, &loc);
                    ASSERT( first == loc);
                }
                {
                    auto_ptr<Runner> runner(InternalPlanner::collectionScan(&txn,
                                                                            ns(),
                                                                            collection(),
                                                                            InternalPlanner::BACKWARD));
                    RecordId loc;
                    runner->getNext(NULL, &loc);
                    ASSERT( last != loc );
                    ASSERT( !last.isNull() );
                }

                // Too big
                BSONObjBuilder bob;
                bob.appendOID("_id", 0, true);
                bob.append( "a", string( MinExtentSize + 300, 'a' ) );
                BSONObj bigger = bob.done();
                StatusWith<RecordId> status = collection()->insertDocument( &txn, bigger, true );
                ASSERT( !status.isOK() );
                ASSERT_EQUALS( 0, nRecords() );
            }
Пример #24
0
    PlanStage::StageState CollectionScan::work(WorkingSetID* out) {
        ++_commonStats.works;

        // Adds the amount of time taken by work() to executionTimeMillis.
        ScopedTimer timer(&_commonStats.executionTimeMillis);

        if (_isDead) { return PlanStage::DEAD; }

        // Do some init if we haven't already.
        if (NULL == _iter) {
            if ( _params.collection == NULL ) {
                _isDead = true;
                return PlanStage::DEAD;
            }

            try {
                if (_lastSeenLoc.isNull()) {
                    _iter.reset( _params.collection->getIterator( _txn,
                                                                  _params.start,
                                                                  _params.direction ) );
                }
                else {
                    invariant(_params.tailable);

                    _iter.reset( _params.collection->getIterator( _txn,
                                                                  _lastSeenLoc,
                                                                  _params.direction ) );

                    // Advance _iter past where we were last time. If it returns something else,
                    // mark us as dead since we want to signal an error rather than silently
                    // dropping data from the stream. This is related to the _lastSeenLock handling
                    // in invalidate.
                    if (_iter->getNext() != _lastSeenLoc) {
                        _isDead = true;
                        return PlanStage::DEAD;
                    }
                }
            }
            catch (const WriteConflictException& wce) {
                // Leave us in a state to try again next time.
                _iter.reset();
                *out = WorkingSet::INVALID_ID;
                return PlanStage::NEED_YIELD;
            }

            ++_commonStats.needTime;
            return PlanStage::NEED_TIME;
        }

        // Should we try getNext() on the underlying _iter?
        if (isEOF())
            return PlanStage::IS_EOF;

        const RecordId curr = _iter->curr();
        if (curr.isNull()) {
            // We just hit EOF
            if (_params.tailable)
                _iter.reset(); // pick up where we left off on the next call to work()
            return PlanStage::IS_EOF;
        }

        _lastSeenLoc = curr;

        // See if the record we're about to access is in memory. If not, pass a fetch request up.
        // Note that curr() does not touch the record. This way, we are able to yield before
        // fetching the record.
        {
            std::auto_ptr<RecordFetcher> fetcher(
                _params.collection->documentNeedsFetch(_txn, curr));
            if (NULL != fetcher.get()) {
                WorkingSetMember* member = _workingSet->get(_wsidForFetch);
                member->loc = curr;
                // Pass the RecordFetcher off to the WSM.
                member->setFetcher(fetcher.release());
                *out = _wsidForFetch;
                _commonStats.needYield++;
                return NEED_YIELD;
            }
        }

        // Do this before advancing because it is more efficient while the iterator is still on this
        // document.
        const Snapshotted<BSONObj> obj = Snapshotted<BSONObj>(_txn->recoveryUnit()->getSnapshotId(),
                                                              _iter->dataFor(curr).releaseToBson());

        // Advance the iterator.
        try {
            invariant(_iter->getNext() == curr);
        }
        catch (const WriteConflictException& wce) {
            // If getNext thows, it leaves us on the original document.
            invariant(_iter->curr() == curr);
            *out = WorkingSet::INVALID_ID;
            return PlanStage::NEED_YIELD;
        }

        WorkingSetID id = _workingSet->allocate();
        WorkingSetMember* member = _workingSet->get(id);
        member->loc = curr;
        member->obj = obj;
        member->state = WorkingSetMember::LOC_AND_UNOWNED_OBJ;
	
        return returnIfMatches(member, id, out);
    }