Example #1
0
    virtual Status explain(OperationContext* txn,
                           const std::string& dbname,
                           const BSONObj& cmdObj,
                           ExplainCommon::Verbosity verbosity,
                           const rpc::ServerSelectionMetadata&,
                           BSONObjBuilder* out) const {
        const string ns = parseNs(dbname, cmdObj);
        AutoGetCollectionForRead ctx(txn, ns);

        Collection* collection = ctx.getCollection();

        StatusWith<unique_ptr<PlanExecutor>> executor =
            getPlanExecutor(txn, collection, ns, cmdObj, true);
        if (!executor.isOK()) {
            return executor.getStatus();
        }

        Explain::explainStages(executor.getValue().get(), verbosity, out);
        return Status::OK();
    }
    StatusWith<DiskLoc> RecordStoreV1Base::updateRecord( OperationContext* txn,
                                                         const DiskLoc& oldLocation,
                                                         const char* data,
                                                         int dataSize,
                                                         bool enforceQuota,
                                                         UpdateMoveNotifier* notifier ) {
        Record* oldRecord = recordFor( oldLocation );
        if ( oldRecord->netLength() >= dataSize ) {
            // we fit
            _paddingFits( txn );
            memcpy( txn->recoveryUnit()->writingPtr( oldRecord->data(), dataSize ), data, dataSize );
            return StatusWith<DiskLoc>( oldLocation );
        }

        if ( isCapped() )
            return StatusWith<DiskLoc>( ErrorCodes::InternalError,
                                        "failing update: objects in a capped ns cannot grow",
                                        10003 );

        // we have to move

        _paddingTooSmall( txn );

        StatusWith<DiskLoc> newLocation = _insertRecord( txn, data, dataSize, enforceQuota );
        if ( !newLocation.isOK() )
            return newLocation;

        // insert worked, so we delete old record
        if ( notifier ) {
            Status moveStatus = notifier->recordStoreGoingToMove( txn,
                                                                  oldLocation,
                                                                  oldRecord->data(),
                                                                  oldRecord->netLength() );
            if ( !moveStatus.isOK() )
                return StatusWith<DiskLoc>( moveStatus );
        }

        deleteRecord( txn, oldLocation );

        return newLocation;
    }
StatusWith<std::string> KVCatalog::newOrphanedIdent(OperationContext* opCtx, std::string ident) {
    // The collection will be named local.orphan.xxxxx.
    std::string identNs = ident;
    std::replace(identNs.begin(), identNs.end(), '-', '_');
    std::string ns = NamespaceString(NamespaceString::kOrphanCollectionDb,
                                     NamespaceString::kOrphanCollectionPrefix + identNs)
                         .ns();

    stdx::lock_guard<stdx::mutex> lk(_identsLock);
    Entry& old = _idents[ns];
    if (!old.ident.empty()) {
        return Status(ErrorCodes::NamespaceExists,
                      str::stream() << ns << " already exists in the catalog");
    }
    opCtx->recoveryUnit()->registerChange(new AddIdentChange(this, ns));

    // Generate a new UUID for the orphaned collection.
    CollectionOptions optionsWithUUID;
    optionsWithUUID.uuid.emplace(CollectionUUID::gen());
    BSONObj obj;
    {
        BSONObjBuilder b;
        b.append("ns", ns);
        b.append("ident", ident);
        BSONCollectionCatalogEntry::MetaData md;
        md.ns = ns;
        // Default options with newly generated UUID.
        md.options = optionsWithUUID;
        // Not Prefixed.
        md.prefix = KVPrefix::kNotPrefixed;
        b.append("md", md.toBSON());
        obj = b.obj();
    }
    StatusWith<RecordId> res = _rs->insertRecord(opCtx, obj.objdata(), obj.objsize(), Timestamp());
    if (!res.isOK())
        return res.getStatus();

    old = Entry(ident, res.getValue());
    LOG(1) << "stored meta data for orphaned collection " << ns << " @ " << res.getValue();
    return StatusWith<std::string>(std::move(ns));
}
Example #4
0
Status KVCatalog::newCollection(OperationContext* opCtx,
                                StringData ns,
                                const CollectionOptions& options) {
    invariant(opCtx->lockState() == NULL ||
              opCtx->lockState()->isDbLockedForMode(nsToDatabaseSubstring(ns), MODE_X));

    std::unique_ptr<Lock::ResourceLock> rLk;
    if (!_isRsThreadSafe && opCtx->lockState()) {
        rLk.reset(new Lock::ResourceLock(opCtx->lockState(), resourceIdCatalogMetadata, MODE_X));
    }

    const string ident = _newUniqueIdent(ns, "collection");

    stdx::lock_guard<stdx::mutex> lk(_identsLock);
    Entry& old = _idents[ns.toString()];
    if (!old.ident.empty()) {
        return Status(ErrorCodes::NamespaceExists, "collection already exists");
    }

    opCtx->recoveryUnit()->registerChange(new AddIdentChange(this, ns));

    BSONObj obj;
    {
        BSONObjBuilder b;
        b.append("ns", ns);
        b.append("ident", ident);
        BSONCollectionCatalogEntry::MetaData md;
        md.ns = ns.toString();
        md.options = options;
        b.append("md", md.toBSON());
        obj = b.obj();
    }

    StatusWith<RecordId> res = _rs->insertRecord(opCtx, obj.objdata(), obj.objsize(), false);
    if (!res.isOK())
        return res.getStatus();

    old = Entry(ident, res.getValue());
    LOG(1) << "stored meta data for " << ns << " @ " << res.getValue();
    return Status::OK();
}
void ReplicationCoordinatorImpl::_handleHeartbeatResponseAction(
    const HeartbeatResponseAction& action,
    const StatusWith<ReplSetHeartbeatResponse>& responseStatus) {
    switch (action.getAction()) {
        case HeartbeatResponseAction::NoAction:
            // Update the cached member state if different than the current topology member state
            if (_memberState != _topCoord->getMemberState()) {
                stdx::unique_lock<stdx::mutex> lk(_mutex);
                const PostMemberStateUpdateAction postUpdateAction =
                    _updateMemberStateFromTopologyCoordinator_inlock();
                lk.unlock();
                _performPostMemberStateUpdateAction(postUpdateAction);
            }
            break;
        case HeartbeatResponseAction::Reconfig:
            invariant(responseStatus.isOK());
            _scheduleHeartbeatReconfig(responseStatus.getValue().getConfig());
            break;
        case HeartbeatResponseAction::StartElection:
            if (isV1ElectionProtocol()) {
                _startElectSelfV1();
            } else {
                _startElectSelf();
            }
            break;
        case HeartbeatResponseAction::StepDownSelf:
            invariant(action.getPrimaryConfigIndex() == _selfIndex);
            log() << "Stepping down from primary in response to heartbeat";
            _stepDownStart();
            break;
        case HeartbeatResponseAction::StepDownRemotePrimary: {
            invariant(action.getPrimaryConfigIndex() != _selfIndex);
            _requestRemotePrimaryStepdown(
                _rsConfig.getMemberAt(action.getPrimaryConfigIndex()).getHostAndPort());
            break;
        }
        default:
            severe() << "Illegal heartbeat response action code " << int(action.getAction());
            invariant(false);
    }
}
Example #6
0
    // Goes over the request and preprocesses normalized versions of all the inserts in the request
    static void normalizeInserts( const BatchedCommandRequest& request,
                                  vector<StatusWith<BSONObj> >* normalizedInserts,
                                  vector<PregeneratedKeys>* pregen ) {

        normalizedInserts->reserve(request.sizeWriteOps());
        for ( size_t i = 0; i < request.sizeWriteOps(); ++i ) {
            BSONObj insertDoc = request.getInsertRequest()->getDocumentsAt( i );
            StatusWith<BSONObj> normalInsert = fixDocumentForInsert( insertDoc );
            normalizedInserts->push_back( normalInsert );
            if ( request.getOrdered() && !normalInsert.isOK() )
                break;

            if ( !normalInsert.getValue().isEmpty() )
                insertDoc = normalInsert.getValue();

            pregen->push_back( PregeneratedKeys() );
            GeneratorHolder::getInstance()->prepare( request.getTargetingNS(),
                                                     insertDoc,
                                                     &pregen->back() );
        }
    }
Example #7
0
BtreeLogicTestHelper<OnDiskFormat>::BtreeLogicTestHelper(const BSONObj& order)
    : recordStore("TestRecordStore"),
      btree(&headManager,
            &recordStore,
            &cursorRegistry,
            Ordering::make(order),
            "TestIndex",
            /*isUnique*/ false) {
    static const string randomData("RandomStuff");

    // Generate a valid record location for a "fake" record, which we will repeatedly use
    // thoughout the tests.
    OperationContextNoop opCtx;
    StatusWith<RecordId> s =
        recordStore.insertRecord(&opCtx, randomData.c_str(), randomData.length(), false);

    ASSERT_TRUE(s.isOK());
    ASSERT_EQUALS(1, recordStore.numRecords(NULL));

    dummyDiskLoc = DiskLoc::fromRecordId(s.getValue());
}
Example #8
0
    StatusWith<DiskLoc> Collection::insertDocument( OperationContext* txn,
                                                    const BSONObj& doc,
                                                    MultiIndexBlock& indexBlock ) {
        StatusWith<DiskLoc> loc = _recordStore->insertRecord( txn,
                                                              doc.objdata(),
                                                              doc.objsize(),
                                                              0 );

        if ( !loc.isOK() )
            return loc;

        InsertDeleteOptions indexOptions;
        indexOptions.logIfError = false;
        indexOptions.dupsAllowed = true; // in repair we should be doing no checking

        Status status = indexBlock.insert( doc, loc.getValue(), indexOptions );
        if ( !status.isOK() )
            return StatusWith<DiskLoc>( status );

        return loc;
    }
Example #9
0
    /**
     * Perform a single insert into a collection.  Requires the insert be preprocessed and the
     * collection already has been created.
     *
     * Might fault or error, otherwise populates the result.
     */
    static void singleInsert( OperationContext* txn,
                              const BSONObj& docToInsert,
                              Collection* collection,
                              WriteOpResult* result ) {

        const string& insertNS = collection->ns().ns();

        txn->lockState()->assertWriteLocked( insertNS );

        WriteUnitOfWork wunit(txn);
        StatusWith<DiskLoc> status = collection->insertDocument( txn, docToInsert, true );

        if ( !status.isOK() ) {
            result->setError(toWriteError(status.getStatus()));
        }
        else {
            repl::logOp( txn, "i", insertNS.c_str(), docToInsert );
            result->getStats().n = 1;
            wunit.commit();
        }
    }
    StatusWith<ReplicationExecutor::CallbackHandle>
    ReplicationExecutor::scheduleWorkWithGlobalExclusiveLock(
            const CallbackFn& work) {

        boost::lock_guard<boost::mutex> lk(_mutex);
        StatusWith<CallbackHandle> handle = enqueueWork_inlock(&_exclusiveLockInProgressQueue,
                                                               work);
        if (handle.isOK()) {
            const stdx::function<void (OperationContext*)> doOp = stdx::bind(
                    &ReplicationExecutor::doOperationWithGlobalExclusiveLock,
                    this,
                    stdx::placeholders::_1,
                    handle.getValue());
            _dblockWorkers.schedule(
                    makeNoExcept(stdx::bind(
                                         &NetworkInterface::runCallbackWithGlobalExclusiveLock,
                                         _networkInterface.get(),
                                         doOp)));
        }
        return handle;
    }
Status ReplicationCoordinatorExternalStateImpl::storeLocalLastVoteDocument(
    OperationContext* opCtx, const LastVote& lastVote) {
    BSONObj lastVoteObj = lastVote.toBSON();
    try {
        Status status =
            writeConflictRetry(opCtx, "save replica set lastVote", lastVoteCollectionName, [&] {
                Lock::DBLock dbWriteLock(opCtx, lastVoteDatabaseName, MODE_X);

                // If there is no last vote document, we want to store one. Otherwise, we only want
                // to replace it if the new last vote document would have a higher term. We both
                // check the term of the current last vote document and insert the new document
                // under the DBLock to synchronize the two operations.
                BSONObj result;
                bool exists = Helpers::getSingleton(opCtx, lastVoteCollectionName, result);
                if (!exists) {
                    Helpers::putSingleton(opCtx, lastVoteCollectionName, lastVoteObj);
                } else {
                    StatusWith<LastVote> oldLastVoteDoc = LastVote::readFromLastVote(result);
                    if (!oldLastVoteDoc.isOK()) {
                        return oldLastVoteDoc.getStatus();
                    }
                    if (lastVote.getTerm() > oldLastVoteDoc.getValue().getTerm()) {
                        Helpers::putSingleton(opCtx, lastVoteCollectionName, lastVoteObj);
                    }
                }

                return Status::OK();
            });

        if (!status.isOK()) {
            return status;
        }

        opCtx->recoveryUnit()->waitUntilDurable();

        return Status::OK();
    } catch (const DBException& ex) {
        return ex.toStatus();
    }
}
StatusWith<SettingsType> CatalogManagerReplicaSet::getGlobalSettings(OperationContext* txn,
                                                                     const string& key) {
    const auto configShard = grid.shardRegistry()->getShard(txn, "config");
    const auto readHost = configShard->getTargeter()->findHost(kConfigReadSelector);
    if (!readHost.isOK()) {
        return readHost.getStatus();
    }

    auto findStatus = _exhaustiveFindOnConfig(readHost.getValue(),
                                              NamespaceString(SettingsType::ConfigNS),
                                              BSON(SettingsType::key(key)),
                                              BSONObj(),
                                              1);
    if (!findStatus.isOK()) {
        return findStatus.getStatus();
    }

    const auto& docs = findStatus.getValue().value;
    if (docs.empty()) {
        return {ErrorCodes::NoMatchingDocument,
                str::stream() << "can't find settings document with key: " << key};
    }

    BSONObj settingsDoc = docs.front();
    StatusWith<SettingsType> settingsResult = SettingsType::fromBSON(settingsDoc);
    if (!settingsResult.isOK()) {
        return {ErrorCodes::FailedToParse,
                str::stream() << "error while parsing settings document: " << settingsDoc << " : "
                              << settingsResult.getStatus().toString()};
    }

    const SettingsType& settings = settingsResult.getValue();

    Status validationStatus = settings.validate();
    if (!validationStatus.isOK()) {
        return validationStatus;
    }

    return settingsResult;
}
Status EphemeralForTestRecordStore::insertRecordsWithDocWriter(OperationContext* opCtx,
                                                               const DocWriter* const* docs,
                                                               const Timestamp*,
                                                               size_t nDocs,
                                                               RecordId* idsOut) {
    stdx::lock_guard<stdx::recursive_mutex> lock(_data->recordsMutex);

    for (size_t i = 0; i < nDocs; i++) {
        const int len = docs[i]->documentSize();
        if (_isCapped && len > _cappedMaxSize) {
            // We use dataSize for capped rollover and we don't want to delete everything if we
            // know this won't fit.
            return Status(ErrorCodes::BadValue, "object to insert exceeds cappedMaxSize");
        }

        EphemeralForTestRecord rec(len);
        docs[i]->writeDocument(rec.data.get());

        RecordId loc;
        if (_data->isOplog) {
            StatusWith<RecordId> status = extractAndCheckLocForOplog(rec.data.get(), len);
            if (!status.isOK())
                return status.getStatus();
            loc = status.getValue();
        } else {
            loc = allocateLoc();
        }

        opCtx->recoveryUnit()->registerChange(new InsertChange(opCtx, _data, loc));
        _data->dataSize += len;
        _data->records[loc] = rec;

        cappedDeleteAsNeeded_inlock(opCtx);

        if (idsOut)
            idsOut[i] = loc;
    }

    return Status::OK();
}
Example #14
0
    bool WriteCmd::run(OperationContext* txn,
                       const string& dbName,
                       BSONObj& cmdObj,
                       int options,
                       string& errMsg,
                       BSONObjBuilder& result) {
        // Can't be run on secondaries.
        dassert(txn->writesAreReplicated());
        BatchedCommandRequest request( _writeType );
        BatchedCommandResponse response;

        if ( !request.parseBSON( cmdObj, &errMsg ) || !request.isValid( &errMsg ) ) {
            return appendCommandStatus( result, Status( ErrorCodes::FailedToParse, errMsg ) );
        }

        // Note that this is a runCommmand, and therefore, the database and the collection name
        // are in different parts of the grammar for the command. But it's more convenient to
        // work with a NamespaceString. We built it here and replace it in the parsed command.
        // Internally, everything work with the namespace string as opposed to just the
        // collection name.
        NamespaceString nss(dbName, request.getNS());
        request.setNSS(nss);

        StatusWith<WriteConcernOptions> wcStatus = extractWriteConcern(cmdObj);

        if (!wcStatus.isOK()) {
            return appendCommandStatus(result, wcStatus.getStatus());
        }
        txn->setWriteConcern(wcStatus.getValue());

        WriteBatchExecutor writeBatchExecutor(txn,
                                              &globalOpCounters,
                                              &LastError::get(txn->getClient()));

        writeBatchExecutor.executeBatch( request, &response );

        result.appendElements( response.toBSON() );
        return response.getOk();
    }
Example #15
0
StatusWith<RecordId> Collection::insertDocument(OperationContext* txn,
                                                const BSONObj& docToInsert,
                                                bool enforceQuota,
                                                bool fromMigrate) {
    {
        auto status = checkValidation(txn, docToInsert);
        if (!status.isOK())
            return status;
    }

    const SnapshotId sid = txn->recoveryUnit()->getSnapshotId();

    if (_indexCatalog.findIdIndex(txn)) {
        if (docToInsert["_id"].eoo()) {
            return StatusWith<RecordId>(ErrorCodes::InternalError,
                                        str::stream()
                                            << "Collection::insertDocument got "
                                               "document without _id for ns:" << _ns.ns());
        }
    }

    if (_mustTakeCappedLockOnInsert)
        synchronizeOnCappedInFlightResource(txn->lockState());

    StatusWith<RecordId> res = _insertDocument(txn, docToInsert, enforceQuota);
    invariant(sid == txn->recoveryUnit()->getSnapshotId());
    if (res.isOK()) {
        getGlobalServiceContext()->getOpObserver()->onInsert(txn, ns(), docToInsert, fromMigrate);

        // If there is a notifier object and another thread is waiting on it, then we notify
        // waiters of this document insert. Waiters keep a shared_ptr to '_cappedNotifier', so
        // there are waiters if this Collection's shared_ptr is not unique.
        if (_cappedNotifier && !_cappedNotifier.unique()) {
            _cappedNotifier->notifyOfInsert();
        }
    }

    return res;
}
Example #16
0
    StatusWith<DiskLoc> Collection::insertDocument( const BSONObj& docToInsert, bool enforceQuota ) {
        if ( _indexCatalog.findIdIndex() ) {
            if ( docToInsert["_id"].eoo() ) {
                return StatusWith<DiskLoc>( ErrorCodes::InternalError,
                                            "Collection::insertDocument got document without _id" );
            }
        }

        if ( _details->isCapped() ) {
            // TOOD: old god not done
            Status ret = _indexCatalog.checkNoIndexConflicts( docToInsert );
            if ( !ret.isOK() )
                return StatusWith<DiskLoc>( ret );
        }

        StatusWith<DiskLoc> status = _insertDocument( docToInsert, enforceQuota );
        if ( status.isOK() ) {
            _details->paddingFits();
        }

        return status;
    }
Example #17
0
    void WiredTigerUtil::fetchTypeAndSourceURI(OperationContext* opCtx,
                                               const std::string& tableUri,
                                               std::string* type,
                                               std::string* source) {
        std::string colgroupUri = "colgroup";
        const size_t colon = tableUri.find(':');
        invariant(colon != string::npos);
        colgroupUri += tableUri.substr(colon);
        StatusWith<std::string> colgroupResult = getMetadata(opCtx, colgroupUri);
        invariant(colgroupResult.isOK());
        WiredTigerConfigParser parser(colgroupResult.getValue());

        WT_CONFIG_ITEM typeItem;
        invariant(parser.get("type", &typeItem) == 0);
        invariant(typeItem.type == WT_CONFIG_ITEM::WT_CONFIG_ITEM_ID);
        *type = std::string(typeItem.str, typeItem.len);

        WT_CONFIG_ITEM sourceItem;
        invariant(parser.get("source", &sourceItem) == 0);
        invariant(sourceItem.type == WT_CONFIG_ITEM::WT_CONFIG_ITEM_STRING);
        *source = std::string(sourceItem.str, sourceItem.len);
    }
Example #18
0
Status KVCatalog::newCollection(OperationContext* opCtx,
                                StringData ns,
                                const CollectionOptions& options,
                                KVPrefix prefix) {
    invariant(opCtx->lockState()->isDbLockedForMode(nsToDatabaseSubstring(ns), MODE_X));

    const string ident = _newUniqueIdent(ns, "collection");

    stdx::lock_guard<stdx::mutex> lk(_identsLock);
    Entry& old = _idents[ns.toString()];
    if (!old.ident.empty()) {
        return Status(ErrorCodes::NamespaceExists, "collection already exists");
    }

    opCtx->recoveryUnit()->registerChange(new AddIdentChange(this, ns));

    BSONObj obj;
    {
        BSONObjBuilder b;
        b.append("ns", ns);
        b.append("ident", ident);
        BSONCollectionCatalogEntry::MetaData md;
        md.ns = ns.toString();
        md.options = options;
        md.prefix = prefix;
        b.append("md", md.toBSON());
        obj = b.obj();
    }
    const bool enforceQuota = false;
    // TODO SERVER-30638: using timestamp 0 for these inserts.
    StatusWith<RecordId> res =
        _rs->insertRecord(opCtx, obj.objdata(), obj.objsize(), Timestamp(), enforceQuota);
    if (!res.isOK())
        return res.getStatus();

    old = Entry(ident, res.getValue());
    LOG(1) << "stored meta data for " << ns << " @ " << res.getValue();
    return Status::OK();
}
Status ReplSetConfig::checkIfWriteConcernCanBeSatisfied(
    const WriteConcernOptions& writeConcern) const {
    if (!writeConcern.wMode.empty() && writeConcern.wMode != WriteConcernOptions::kMajority &&
        writeConcern.wMode != WriteConcernOptions::kInternalMajorityNoSnapshot) {
        StatusWith<ReplSetTagPattern> tagPatternStatus = findCustomWriteMode(writeConcern.wMode);
        if (!tagPatternStatus.isOK()) {
            return tagPatternStatus.getStatus();
        }

        ReplSetTagMatch matcher(tagPatternStatus.getValue());
        for (size_t j = 0; j < _members.size(); ++j) {
            const MemberConfig& memberConfig = _members[j];
            for (MemberConfig::TagIterator it = memberConfig.tagsBegin();
                 it != memberConfig.tagsEnd();
                 ++it) {
                if (matcher.update(*it)) {
                    return Status::OK();
                }
            }
        }
        // Even if all the nodes in the set had a given write it still would not satisfy this
        // write concern mode.
        return Status(ErrorCodes::UnsatisfiableWriteConcern,
                      str::stream() << "Not enough nodes match write concern mode \""
                                    << writeConcern.wMode
                                    << "\"");
    } else {
        int nodesRemaining = writeConcern.wNumNodes;
        for (size_t j = 0; j < _members.size(); ++j) {
            if (!_members[j].isArbiter()) {  // Only count data-bearing nodes
                --nodesRemaining;
                if (nodesRemaining <= 0) {
                    return Status::OK();
                }
            }
        }
        return Status(ErrorCodes::UnsatisfiableWriteConcern, "Not enough data-bearing nodes");
    }
}
Example #20
0
    bool Sync::shouldRetry(OperationContext* txn, const BSONObj& o) {
        const NamespaceString nss(o.getStringField("ns"));

        // Take an X lock on the database in order to preclude other modifications. Also, the
        // database might not exist yet, so create it.
        AutoGetOrCreateDb autoDb(txn, nss.db(), MODE_X);
        Database* const db = autoDb.getDb();

        // we don't have the object yet, which is possible on initial sync.  get it.
        log() << "adding missing object" << endl; // rare enough we can log

        BSONObj missingObj = getMissingDoc(txn, db, o);

        if( missingObj.isEmpty() ) {
            log() << "missing object not found on source. presumably deleted later in oplog" << endl;
            log() << "o2: " << o.getObjectField("o2").toString() << endl;
            log() << "o firstfield: " << o.getObjectField("o").firstElementFieldName() << endl;

            return false;
        }
        else {
            WriteUnitOfWork wunit(txn);

            Collection* const collection = db->getOrCreateCollection(txn, nss.toString());
            invariant(collection);

            StatusWith<RecordId> result = collection->insertDocument(txn, missingObj, true);
            uassert(15917,
                    str::stream() << "failed to insert missing doc: "
                                  << result.getStatus().toString(),
                    result.isOK() );

            LOG(1) << "inserted missing doc: " << missingObj.toString() << endl;

            wunit.commit();
            return true;
        }
    }
Example #21
0
    StatusWith<DiskLoc> RecordStoreV1Base::insertRecord( TransactionExperiment* txn,
                                                         const DocWriter* doc,
                                                         int quotaMax ) {
        int lenWHdr = doc->documentSize() + Record::HeaderSize;
        if ( doc->addPadding() )
            lenWHdr = getRecordAllocationSize( lenWHdr );

        StatusWith<DiskLoc> loc = allocRecord( txn, lenWHdr, quotaMax );
        if ( !loc.isOK() )
            return loc;

        Record *r = recordFor( loc.getValue() );
        fassert( 17319, r->lengthWithHeaders() >= lenWHdr );

        r = reinterpret_cast<Record*>( txn->writingPtr(r, lenWHdr) );
        doc->writeDocument( r->data() );

        _addRecordToRecListInExtent(txn, r, loc.getValue());

        _details->incrementStats( txn, r->netLength(), 1 );

        return loc;
    }
Example #22
0
StatusWith<RecordId> Collection::_insertDocument(OperationContext* txn,
                                                 const BSONObj& docToInsert,
                                                 bool enforceQuota) {
    dassert(txn->lockState()->isCollectionLockedForMode(ns().toString(), MODE_IX));

    // TODO: for now, capped logic lives inside NamespaceDetails, which is hidden
    //       under the RecordStore, this feels broken since that should be a
    //       collection access method probably

    StatusWith<RecordId> loc = _recordStore->insertRecord(
        txn, docToInsert.objdata(), docToInsert.objsize(), _enforceQuota(enforceQuota));
    if (!loc.isOK())
        return loc;

    invariant(RecordId::min() < loc.getValue());
    invariant(loc.getValue() < RecordId::max());

    Status s = _indexCatalog.indexRecord(txn, docToInsert, loc.getValue());
    if (!s.isOK())
        return StatusWith<RecordId>(s);

    return loc;
}
Example #23
0
/**
 * Perform a single insert into a collection.  Requires the insert be preprocessed and the
 * collection already has been created.
 *
 * Might fault or error, otherwise populates the result.
 */
static void singleInsert( const BatchItemRef& insertItem,
                          const BSONObj& normalInsert,
                          Collection* collection,
                          WriteOpResult* result ) {

    const string& insertNS = insertItem.getRequest()->getNS();

    Lock::assertWriteLocked( insertNS );

    try {

        // XXX - are we 100% sure that all !OK statuses do not write a document?
        StatusWith<DiskLoc> status = collection->insertDocument( normalInsert, true );

        if ( !status.isOK() ) {
            result->error = toWriteError( status.getStatus() );
        }
        else {
            logOp( "i", insertNS.c_str(), normalInsert );
            getDur().commitIfNeeded();
            result->stats.n = 1;
        }
    }
    catch ( const PageFaultException& ex ) {
        // TODO: An actual data structure that's not an exception for this
        result->fault = new PageFaultException( ex );
    }
    catch ( const DBException& ex ) {
        Status status(ex.toStatus());
        if (ErrorCodes::isInterruption(status.code())) {
            throw;
        }
        result->error = toWriteError(status);
    }

}
Example #24
0
    StatusWith<DiskLoc> RecordStoreV1Base::insertRecord( TransactionExperiment* txn,
                                                         const char* data,
                                                         int len,
                                                         int quotaMax ) {
        int lenWHdr = getRecordAllocationSize( len + Record::HeaderSize );
        fassert( 17208, lenWHdr >= ( len + Record::HeaderSize ) );

        StatusWith<DiskLoc> loc = allocRecord( txn, lenWHdr, quotaMax );
        if ( !loc.isOK() )
            return loc;

        Record *r = recordFor( loc.getValue() );
        fassert( 17210, r->lengthWithHeaders() >= lenWHdr );

        // copy the data
        r = reinterpret_cast<Record*>( txn->writingPtr(r, lenWHdr) );
        memcpy( r->data(), data, len );

        _addRecordToRecListInExtent(txn, r, loc.getValue());

        _details->incrementStats( txn, r->netLength(), 1 );

        return loc;
    }
Example #25
0
StatusWith<RecordId> Collection::insertDocument(OperationContext* txn,
                                                const BSONObj& doc,
                                                MultiIndexBlock* indexBlock,
                                                bool enforceQuota) {
    {
        auto status = checkValidation(txn, doc);
        if (!status.isOK())
            return status;
    }

    dassert(txn->lockState()->isCollectionLockedForMode(ns().toString(), MODE_IX));

    if (_mustTakeCappedLockOnInsert)
        synchronizeOnCappedInFlightResource(txn->lockState());

    StatusWith<RecordId> loc =
        _recordStore->insertRecord(txn, doc.objdata(), doc.objsize(), _enforceQuota(enforceQuota));

    if (!loc.isOK())
        return loc;

    Status status = indexBlock->insert(doc, loc.getValue());
    if (!status.isOK())
        return StatusWith<RecordId>(status);

    getGlobalServiceContext()->getOpObserver()->onInsert(txn, ns(), doc);

    // If there is a notifier object and another thread is waiting on it, then we notify waiters
    // of this document insert. Waiters keep a shared_ptr to '_cappedNotifier', so there are
    // waiters if this Collection's shared_ptr is not unique.
    if (_cappedNotifier && !_cappedNotifier.unique()) {
        _cappedNotifier->notifyOfInsert();
    }

    return loc;
}
Example #26
0
    StatusWith<DiskLoc> Collection::_insertDocument( OperationContext* txn,
                                                     const BSONObj& docToInsert,
                                                     bool enforceQuota ) {

        // TODO: for now, capped logic lives inside NamespaceDetails, which is hidden
        //       under the RecordStore, this feels broken since that should be a
        //       collection access method probably

        StatusWith<DiskLoc> loc = _recordStore->insertRecord( txn,
                                                              docToInsert.objdata(),
                                                              docToInsert.objsize(),
                                                              _enforceQuota( enforceQuota ) );
        if ( !loc.isOK() )
            return loc;

        _infoCache.notifyOfWriteOp();

        try {
            _indexCatalog.indexRecord(txn, docToInsert, loc.getValue());
        }
        catch ( AssertionException& e ) {
            if ( isCapped() ) {
                return StatusWith<DiskLoc>( ErrorCodes::InternalError,
                                            str::stream() << "unexpected index insertion failure on"
                                            << " capped collection" << e.toString()
                                            << " - collection and its index will not match" );
            }

            // indexRecord takes care of rolling back indexes
            // so we just have to delete the main storage
            _recordStore->deleteRecord( txn, loc.getValue() );
            return StatusWith<DiskLoc>( e.toStatus( "insertDocument" ) );
        }

        return loc;
    }
Status CatalogManagerReplicaSet::shardCollection(OperationContext* txn,
                                                 const string& ns,
                                                 const ShardKeyPattern& fieldsAndOrder,
                                                 bool unique,
                                                 const vector<BSONObj>& initPoints,
                                                 const set<ShardId>& initShardIds) {
    // Lock the collection globally so that no other mongos can try to shard or drop the collection
    // at the same time.
    auto scopedDistLock = getDistLockManager()->lock(ns, "shardCollection");
    if (!scopedDistLock.isOK()) {
        return scopedDistLock.getStatus();
    }

    StatusWith<DatabaseType> status = getDatabase(nsToDatabase(ns));
    if (!status.isOK()) {
        return status.getStatus();
    }

    DatabaseType dbt = status.getValue();
    ShardId dbPrimaryShardId = dbt.getPrimary();
    const auto primaryShard = grid.shardRegistry()->getShard(dbPrimaryShardId);

    {
        // In 3.0 and prior we include this extra safety check that the collection is not getting
        // sharded concurrently by two different mongos instances. It is not 100%-proof, but it
        // reduces the chance that two invocations of shard collection will step on each other's
        // toes.  Now we take the distributed lock so going forward this check won't be necessary
        // but we leave it around for compatibility with other mongoses from 3.0.
        // TODO(spencer): Remove this after 3.2 ships.
        const auto configShard = grid.shardRegistry()->getShard("config");
        const auto readHost = configShard->getTargeter()->findHost(kConfigReadSelector);
        if (!readHost.isOK()) {
            return readHost.getStatus();
        }

        auto countStatus = _runCountCommand(
            readHost.getValue(), NamespaceString(ChunkType::ConfigNS), BSON(ChunkType::ns(ns)));
        if (!countStatus.isOK()) {
            return countStatus.getStatus();
        }
        if (countStatus.getValue() > 0) {
            return Status(ErrorCodes::AlreadyInitialized,
                          str::stream() << "collection " << ns << " already sharded with "
                                        << countStatus.getValue() << " chunks.");
        }
    }

    // Record start in changelog
    {
        BSONObjBuilder collectionDetail;
        collectionDetail.append("shardKey", fieldsAndOrder.toBSON());
        collectionDetail.append("collection", ns);
        collectionDetail.append("primary", primaryShard->toString());

        {
            BSONArrayBuilder initialShards(collectionDetail.subarrayStart("initShards"));
            for (const ShardId& shardId : initShardIds) {
                initialShards.append(shardId);
            }
        }

        collectionDetail.append("numChunks", static_cast<int>(initPoints.size() + 1));

        logChange(txn->getClient()->clientAddress(true),
                  "shardCollection.start",
                  ns,
                  collectionDetail.obj());
    }

    ChunkManagerPtr manager(new ChunkManager(ns, fieldsAndOrder, unique));
    manager->createFirstChunks(dbPrimaryShardId, &initPoints, &initShardIds);
    manager->loadExistingRanges(nullptr);

    CollectionInfo collInfo;
    collInfo.useChunkManager(manager);
    collInfo.save(ns);
    manager->reload(true);

    // TODO(spencer) SERVER-19319: Send setShardVersion to primary shard so it knows to start
    // rejecting unversioned writes.

    BSONObj finishDetail = BSON("version"
                                << "");  // TODO(spencer) SERVER-19319 Report actual version used

    logChange(txn->getClient()->clientAddress(true), "shardCollection", ns, finishDetail);

    return Status::OK();
}
Example #28
0
void BackgroundSync::_fetcherCallback(const StatusWith<Fetcher::QueryResponse>& result,
                                      BSONObjBuilder* bob,
                                      const HostAndPort& source,
                                      OpTime lastOpTimeFetched,
                                      long long lastFetchedHash,
                                      Status* remoteOplogStartStatus) {
    // if target cut connections between connecting and querying (for
    // example, because it stepped down) we might not have a cursor
    if (!result.isOK()) {
        return;
    }

    if (inShutdown()) {
        return;
    }

    // Check if we have been paused.
    if (isPaused()) {
        return;
    }

    const auto& queryResponse = result.getValue();
    const auto& documents = queryResponse.documents;
    auto documentBegin = documents.cbegin();
    auto documentEnd = documents.cend();

    // Check start of remote oplog and, if necessary, stop fetcher to execute rollback.
    if (queryResponse.first) {
        auto getNextOperation = [&documentBegin, documentEnd]() -> StatusWith<BSONObj> {
            if (documentBegin == documentEnd) {
                return Status(ErrorCodes::OplogStartMissing, "remote oplog start missing");
            }
            return *(documentBegin++);
        };

        *remoteOplogStartStatus =
            checkRemoteOplogStart(getNextOperation, lastOpTimeFetched, lastFetchedHash);
        if (!remoteOplogStartStatus->isOK()) {
            // Stop fetcher and execute rollback.
            return;
        }

        // If this is the first batch and no rollback is needed, we should have advanced
        // the document iterator.
        invariant(documentBegin != documents.cbegin());
    }

    // process documents
    int currentBatchMessageSize = 0;
    for (auto documentIter = documentBegin; documentIter != documentEnd; ++documentIter) {
        if (inShutdown()) {
            return;
        }

        // If we are transitioning to primary state, we need to leave
        // this loop in order to go into bgsync-pause mode.
        if (_replCoord->isWaitingForApplierToDrain() || _replCoord->getMemberState().primary()) {
            LOG(1) << "waiting for draining or we are primary, not adding more ops to buffer";
            return;
        }

        // At this point, we are guaranteed to have at least one thing to read out
        // of the fetcher.
        const BSONObj& o = *documentIter;
        currentBatchMessageSize += o.objsize();
        opsReadStats.increment();

        if (MONGO_FAIL_POINT(stepDownWhileDrainingFailPoint)) {
            sleepsecs(20);
        }

        {
            stdx::unique_lock<stdx::mutex> lock(_mutex);
            _appliedBuffer = false;
        }

        OCCASIONALLY {
            LOG(2) << "bgsync buffer has " << _buffer.size() << " bytes";
        }

        bufferCountGauge.increment();
        bufferSizeGauge.increment(getSize(o));
        _buffer.push(o);

        {
            stdx::unique_lock<stdx::mutex> lock(_mutex);
            _lastFetchedHash = o["h"].numberLong();
            _lastOpTimeFetched = extractOpTime(o);
            LOG(3) << "lastOpTimeFetched: " << _lastOpTimeFetched;
        }
    }

    // record time for each batch
    getmoreReplStats.recordMillis(queryResponse.elapsedMillis.count());

    networkByteStats.increment(currentBatchMessageSize);

    // Check some things periodically
    // (whenever we run out of items in the
    // current cursor batch)
    if (currentBatchMessageSize > 0 && currentBatchMessageSize < BatchIsSmallish) {
        // on a very low latency network, if we don't wait a little, we'll be
        // getting ops to write almost one at a time.  this will both be expensive
        // for the upstream server as well as potentially defeating our parallel
        // application of batches on the secondary.
        //
        // the inference here is basically if the batch is really small, we are
        // "caught up".
        //
        sleepmillis(SleepToAllowBatchingMillis);
    }

    // If we are transitioning to primary state, we need to leave
    // this loop in order to go into bgsync-pause mode.
    if (_replCoord->isWaitingForApplierToDrain() || _replCoord->getMemberState().primary()) {
        return;
    }

    // re-evaluate quality of sync target
    if (_shouldChangeSyncSource(source)) {
        return;
    }

    // Check if we have been paused.
    if (isPaused()) {
        return;
    }

    // We fill in 'bob' to signal the fetcher to process with another getMore.
    invariant(bob);
    bob->append("getMore", queryResponse.cursorId);
    bob->append("collection", queryResponse.nss.coll());
    bob->append("maxTimeMS", int(fetcherMaxTimeMS.count()));
}
Example #29
0
    Status getOplogStartHack(OperationContext* txn,
                             Collection* collection,
                             CanonicalQuery* cq,
                             PlanExecutor** execOut) {
        invariant(cq);
        auto_ptr<CanonicalQuery> autoCq(cq);

        if ( collection == NULL )
            return Status(ErrorCodes::InternalError,
                          "getOplogStartHack called with a NULL collection" );

        // A query can only do oplog start finding if it has a top-level $gt or $gte predicate over
        // the "ts" field (the operation's timestamp). Find that predicate and pass it to
        // the OplogStart stage.
        MatchExpression* tsExpr = NULL;
        if (MatchExpression::AND == cq->root()->matchType()) {
            // The query has an AND at the top-level. See if any of the children
            // of the AND are $gt or $gte predicates over 'ts'.
            for (size_t i = 0; i < cq->root()->numChildren(); ++i) {
                MatchExpression* me = cq->root()->getChild(i);
                if (isOplogTsPred(me)) {
                    tsExpr = me;
                    break;
                }
            }
        }
        else if (isOplogTsPred(cq->root())) {
            // The root of the tree is a $gt or $gte predicate over 'ts'.
            tsExpr = cq->root();
        }

        if (NULL == tsExpr) {
            return Status(ErrorCodes::OplogOperationUnsupported,
                          "OplogReplay query does not contain top-level "
                          "$gt or $gte over the 'ts' field.");
        }

        DiskLoc startLoc = DiskLoc().setInvalid();

        // See if the RecordStore supports the oplogStartHack
        const BSONElement tsElem = extractOplogTsOptime(tsExpr);
        if (tsElem.type() == Timestamp) {
            StatusWith<DiskLoc> goal = oploghack::keyForOptime(tsElem._opTime());
            if (goal.isOK()) {
                startLoc = collection->getRecordStore()->oplogStartHack(txn, goal.getValue());
            }
        }

        if (startLoc.isValid()) {
            LOG(3) << "Using direct oplog seek";
        }
        else {
            LOG(3) << "Using OplogStart stage";

            // Fallback to trying the OplogStart stage.
            WorkingSet* oplogws = new WorkingSet();
            OplogStart* stage = new OplogStart(txn, collection, tsExpr, oplogws);
            PlanExecutor* rawExec;

            // Takes ownership of oplogws and stage.
            Status execStatus = PlanExecutor::make(txn, oplogws, stage, collection,
                                                   PlanExecutor::YIELD_AUTO, &rawExec);
            invariant(execStatus.isOK());
            scoped_ptr<PlanExecutor> exec(rawExec);

            // The stage returns a DiskLoc of where to start.
            PlanExecutor::ExecState state = exec->getNext(NULL, &startLoc);

            // This is normal.  The start of the oplog is the beginning of the collection.
            if (PlanExecutor::IS_EOF == state) {
                return getExecutor(txn, collection, autoCq.release(), PlanExecutor::YIELD_AUTO,
                                   execOut);
            }

            // This is not normal.  An error was encountered.
            if (PlanExecutor::ADVANCED != state) {
                return Status(ErrorCodes::InternalError,
                              "quick oplog start location had error...?");
            }
        }

        // cout << "diskloc is " << startLoc.toString() << endl;

        // Build our collection scan...
        CollectionScanParams params;
        params.collection = collection;
        params.start = startLoc;
        params.direction = CollectionScanParams::FORWARD;
        params.tailable = cq->getParsed().getOptions().tailable;

        WorkingSet* ws = new WorkingSet();
        CollectionScan* cs = new CollectionScan(txn, params, ws, cq->root());
        // Takes ownership of 'ws', 'cs', and 'cq'.
        return PlanExecutor::make(txn, ws, cs, autoCq.release(), collection,
                                  PlanExecutor::YIELD_AUTO, execOut);
    }
StatusWith<std::vector<ShardEndpoint>> ChunkManagerTargeter::targetDelete(
    OperationContext* opCtx, const write_ops::DeleteOpEntry& deleteDoc) const {
    BSONObj shardKey;

    if (_routingInfo->cm()) {
        //
        // Sharded collections have the following further requirements for targeting:
        //
        // Limit-1 deletes must be targeted exactly by shard key *or* exact _id
        //

        // Get the shard key
        StatusWith<BSONObj> status =
            _routingInfo->cm()->getShardKeyPattern().extractShardKeyFromQuery(opCtx,
                                                                              deleteDoc.getQ());

        // Bad query
        if (!status.isOK())
            return status.getStatus();

        shardKey = status.getValue();
    }

    const auto collation = write_ops::collationOf(deleteDoc);

    // Target the shard key or delete query
    if (!shardKey.isEmpty()) {
        try {
            return std::vector<ShardEndpoint>{_targetShardKey(shardKey, collation, 0)};
        } catch (const DBException&) {
            // This delete is potentially not constrained to a single shard
        }
    }

    // We failed to target a single shard.

    // Parse delete query.
    auto qr = stdx::make_unique<QueryRequest>(getNS());
    qr->setFilter(deleteDoc.getQ());
    if (!collation.isEmpty()) {
        qr->setCollation(collation);
    }
    const boost::intrusive_ptr<ExpressionContext> expCtx;
    auto cq = CanonicalQuery::canonicalize(opCtx,
                                           std::move(qr),
                                           expCtx,
                                           ExtensionsCallbackNoop(),
                                           MatchExpressionParser::kAllowAllSpecialFeatures);
    if (!cq.isOK()) {
        return cq.getStatus().withContext(str::stream() << "Could not parse delete query "
                                                        << deleteDoc.getQ());
    }

    // Single deletes must target a single shard or be exact-ID.
    if (_routingInfo->cm() && !deleteDoc.getMulti() &&
        !isExactIdQuery(opCtx, *cq.getValue(), _routingInfo->cm().get())) {
        return Status(ErrorCodes::ShardKeyNotFound,
                      str::stream()
                          << "A single delete on a sharded collection must contain an exact "
                             "match on _id (and have the collection default collation) or "
                             "contain the shard key (and have the simple collation). Delete "
                             "request: "
                          << deleteDoc.toBSON()
                          << ", shard key pattern: "
                          << _routingInfo->cm()->getShardKeyPattern().toString());
    }

    return _targetQuery(opCtx, deleteDoc.getQ(), collation);
}