Ejemplo n.º 1
0
StatusWith<RecordData> Collection::updateDocumentWithDamages(
    OperationContext* txn,
    const RecordId& loc,
    const Snapshotted<RecordData>& oldRec,
    const char* damageSource,
    const mutablebson::DamageVector& damages,
    oplogUpdateEntryArgs& args) {
    dassert(txn->lockState()->isCollectionLockedForMode(ns().toString(), MODE_IX));
    invariant(oldRec.snapshotId() == txn->recoveryUnit()->getSnapshotId());
    invariant(updateWithDamagesSupported());

    // Broadcast the mutation so that query results stay correct.
    _cursorManager.invalidateDocument(txn, loc, INVALIDATION_MUTATION);

    auto newRecStatus =
        _recordStore->updateWithDamages(txn, loc, oldRec.value(), damageSource, damages);

    if (newRecStatus.isOK()) {
        args.ns = ns().ns();
        getGlobalServiceContext()->getOpObserver()->onUpdate(txn, args);
    }
    return newRecStatus;
}
Ejemplo n.º 2
0
Status MultiIndexBlock::insertAllDocumentsInCollection(std::set<RecordId>* dupsOut) {
    const char* curopMessage = _buildInBackground ? "Index Build (background)" : "Index Build";
    const auto numRecords = _collection->numRecords(_txn);
    stdx::unique_lock<Client> lk(*_txn->getClient());
    ProgressMeterHolder progress(*_txn->setMessage_inlock(curopMessage, curopMessage, numRecords));
    lk.unlock();

    Timer t;

    unsigned long long n = 0;

    unique_ptr<PlanExecutor> exec(InternalPlanner::collectionScan(
        _txn, _collection->ns().ns(), _collection, PlanExecutor::YIELD_MANUAL));
    if (_buildInBackground) {
        invariant(_allowInterruption);
        exec->setYieldPolicy(PlanExecutor::YIELD_AUTO);
    } else {
        exec->setYieldPolicy(PlanExecutor::WRITE_CONFLICT_RETRY_ONLY);
    }

    Snapshotted<BSONObj> objToIndex;
    RecordId loc;
    PlanExecutor::ExecState state;
    int retries = 0;  // non-zero when retrying our last document.
    while (retries ||
           (PlanExecutor::ADVANCED == (state = exec->getNextSnapshotted(&objToIndex, &loc)))) {
        try {
            if (_allowInterruption)
                _txn->checkForInterrupt();

            // Make sure we are working with the latest version of the document.
            if (objToIndex.snapshotId() != _txn->recoveryUnit()->getSnapshotId() &&
                !_collection->findDoc(_txn, loc, &objToIndex)) {
                // doc was deleted so don't index it.
                retries = 0;
                continue;
            }

            // Done before insert so we can retry document if it WCEs.
            progress->setTotalWhileRunning(_collection->numRecords(_txn));

            WriteUnitOfWork wunit(_txn);
            Status ret = insert(objToIndex.value(), loc);
            if (_buildInBackground)
                exec->saveState();
            if (ret.isOK()) {
                wunit.commit();
            } else if (dupsOut && ret.code() == ErrorCodes::DuplicateKey) {
                // If dupsOut is non-null, we should only fail the specific insert that
                // led to a DuplicateKey rather than the whole index build.
                dupsOut->insert(loc);
            } else {
                // Fail the index build hard.
                return ret;
            }
            if (_buildInBackground)
                exec->restoreState();  // Handles any WCEs internally.

            // Go to the next document
            progress->hit();
            n++;
            retries = 0;
        } catch (const WriteConflictException& wce) {
            CurOp::get(_txn)->debug().writeConflicts++;
            retries++;  // logAndBackoff expects this to be 1 on first call.
            wce.logAndBackoff(retries, "index creation", _collection->ns().ns());

            // Can't use WRITE_CONFLICT_RETRY_LOOP macros since we need to save/restore exec
            // around call to abandonSnapshot.
            exec->saveState();
            _txn->recoveryUnit()->abandonSnapshot();
            exec->restoreState();  // Handles any WCEs internally.
        }
    }

    uassert(28550,
            "Unable to complete index build due to collection scan failure: " +
                WorkingSetCommon::toStatusString(objToIndex.value()),
            state == PlanExecutor::IS_EOF);

    progress->finished();

    Status ret = doneInserting(dupsOut);
    if (!ret.isOK())
        return ret;

    log() << "build index done.  scanned " << n << " total records. " << t.seconds() << " secs"
          << endl;

    return Status::OK();
}
Ejemplo n.º 3
0
StatusWith<RecordId> Collection::updateDocument(OperationContext* txn,
                                                const RecordId& oldLocation,
                                                const Snapshotted<BSONObj>& oldDoc,
                                                const BSONObj& newDoc,
                                                bool enforceQuota,
                                                bool indexesAffected,
                                                OpDebug* debug,
                                                oplogUpdateEntryArgs& args) {
    {
        auto status = checkValidation(txn, newDoc);
        if (!status.isOK()) {
            if (_validationLevel == STRICT_V) {
                return status;
            }
            // moderate means we have to check the old doc
            auto oldDocStatus = checkValidation(txn, oldDoc.value());
            if (oldDocStatus.isOK()) {
                // transitioning from good -> bad is not ok
                return status;
            }
            // bad -> bad is ok in moderate mode
        }
    }

    dassert(txn->lockState()->isCollectionLockedForMode(ns().toString(), MODE_IX));
    invariant(oldDoc.snapshotId() == txn->recoveryUnit()->getSnapshotId());

    if (_needCappedLock) {
        // X-lock the metadata resource for this capped collection until the end of the WUOW. This
        // prevents the primary from executing with more concurrency than secondaries.
        // See SERVER-21646.
        Lock::ResourceLock{txn->lockState(), ResourceId(RESOURCE_METADATA, _ns.ns()), MODE_X};
    }

    SnapshotId sid = txn->recoveryUnit()->getSnapshotId();

    BSONElement oldId = oldDoc.value()["_id"];
    if (!oldId.eoo() && (oldId != newDoc["_id"]))
        return StatusWith<RecordId>(
            ErrorCodes::InternalError, "in Collection::updateDocument _id mismatch", 13596);

    // The MMAPv1 storage engine implements capped collections in a way that does not allow records
    // to grow beyond their original size. If MMAPv1 part of a replicaset with storage engines that
    // do not have this limitation, replication could result in errors, so it is necessary to set a
    // uniform rule here. Similarly, it is not sufficient to disallow growing records, because this
    // happens when secondaries roll back an update shrunk a record. Exactly replicating legacy
    // MMAPv1 behavior would require padding shrunk documents on all storage engines. Instead forbid
    // all size changes.
    const auto oldSize = oldDoc.value().objsize();
    if (_recordStore->isCapped() && oldSize != newDoc.objsize())
        return {ErrorCodes::CannotGrowDocumentInCappedNamespace,
                str::stream() << "Cannot change the size of a document in a capped collection: "
                              << oldSize << " != " << newDoc.objsize()};

    // At the end of this step, we will have a map of UpdateTickets, one per index, which
    // represent the index updates needed to be done, based on the changes between oldDoc and
    // newDoc.
    OwnedPointerMap<IndexDescriptor*, UpdateTicket> updateTickets;
    if (indexesAffected) {
        IndexCatalog::IndexIterator ii = _indexCatalog.getIndexIterator(txn, true);
        while (ii.more()) {
            IndexDescriptor* descriptor = ii.next();
            IndexCatalogEntry* entry = ii.catalogEntry(descriptor);
            IndexAccessMethod* iam = ii.accessMethod(descriptor);

            InsertDeleteOptions options;
            options.logIfError = false;
            options.dupsAllowed =
                !(KeyPattern::isIdKeyPattern(descriptor->keyPattern()) || descriptor->unique()) ||
                repl::getGlobalReplicationCoordinator()->shouldIgnoreUniqueIndex(descriptor);
            UpdateTicket* updateTicket = new UpdateTicket();
            updateTickets.mutableMap()[descriptor] = updateTicket;
            Status ret = iam->validateUpdate(txn,
                                             oldDoc.value(),
                                             newDoc,
                                             oldLocation,
                                             options,
                                             updateTicket,
                                             entry->getFilterExpression());
            if (!ret.isOK()) {
                return StatusWith<RecordId>(ret);
            }
        }
    }

    // This can call back into Collection::recordStoreGoingToMove.  If that happens, the old
    // object is removed from all indexes.
    StatusWith<RecordId> newLocation = _recordStore->updateRecord(
        txn, oldLocation, newDoc.objdata(), newDoc.objsize(), _enforceQuota(enforceQuota), this);

    if (!newLocation.isOK()) {
        return newLocation;
    }

    // At this point, the old object may or may not still be indexed, depending on if it was
    // moved. If the object did move, we need to add the new location to all indexes.
    if (newLocation.getValue() != oldLocation) {
        if (debug) {
            if (debug->nmoved == -1)  // default of -1 rather than 0
                debug->nmoved = 1;
            else
                debug->nmoved += 1;
        }

        std::vector<BsonRecord> bsonRecords;
        BsonRecord bsonRecord = {newLocation.getValue(), &newDoc};
        bsonRecords.push_back(bsonRecord);
        Status s = _indexCatalog.indexRecords(txn, bsonRecords);
        if (!s.isOK())
            return StatusWith<RecordId>(s);
        invariant(sid == txn->recoveryUnit()->getSnapshotId());
        args.ns = ns().ns();
        getGlobalServiceContext()->getOpObserver()->onUpdate(txn, args);

        return newLocation;
    }

    // Object did not move.  We update each index with each respective UpdateTicket.

    if (debug)
        debug->keyUpdates = 0;

    if (indexesAffected) {
        IndexCatalog::IndexIterator ii = _indexCatalog.getIndexIterator(txn, true);
        while (ii.more()) {
            IndexDescriptor* descriptor = ii.next();
            IndexAccessMethod* iam = ii.accessMethod(descriptor);

            int64_t updatedKeys;
            Status ret = iam->update(txn, *updateTickets.mutableMap()[descriptor], &updatedKeys);
            if (!ret.isOK())
                return StatusWith<RecordId>(ret);
            if (debug)
                debug->keyUpdates += updatedKeys;
        }
    }

    invariant(sid == txn->recoveryUnit()->getSnapshotId());
    args.ns = ns().ns();
    getGlobalServiceContext()->getOpObserver()->onUpdate(txn, args);

    return newLocation;
}
Ejemplo n.º 4
0
Status cloneCollectionAsCapped(OperationContext* txn,
                               Database* db,
                               const std::string& shortFrom,
                               const std::string& shortTo,
                               double size,
                               bool temp) {
    std::string fromNs = db->name() + "." + shortFrom;
    std::string toNs = db->name() + "." + shortTo;

    Collection* fromCollection = db->getCollection(fromNs);
    if (!fromCollection)
        return Status(ErrorCodes::NamespaceNotFound,
                      str::stream() << "source collection " << fromNs << " does not exist");

    if (db->getCollection(toNs))
        return Status(ErrorCodes::NamespaceExists, "to collection already exists");

    // create new collection
    MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
        const auto fromOptions =
            fromCollection->getCatalogEntry()->getCollectionOptions(txn).toBSON();
        OldClientContext ctx(txn, toNs);
        BSONObjBuilder spec;
        spec.appendBool("capped", true);
        spec.append("size", size);
        if (temp)
            spec.appendBool("temp", true);
        spec.appendElementsUnique(fromOptions);

        WriteUnitOfWork wunit(txn);
        Status status = userCreateNS(txn, ctx.db(), toNs, spec.done());
        if (!status.isOK())
            return status;
        wunit.commit();
    }
    MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "cloneCollectionAsCapped", fromNs);

    Collection* toCollection = db->getCollection(toNs);
    invariant(toCollection);  // we created above

    // how much data to ignore because it won't fit anyway
    // datasize and extentSize can't be compared exactly, so add some padding to 'size'

    long long allocatedSpaceGuess =
        std::max(static_cast<long long>(size * 2),
                 static_cast<long long>(toCollection->getRecordStore()->storageSize(txn) * 2));

    long long excessSize = fromCollection->dataSize(txn) - allocatedSpaceGuess;

    std::unique_ptr<PlanExecutor> exec(
        InternalPlanner::collectionScan(txn, fromNs, fromCollection, InternalPlanner::FORWARD));

    exec->setYieldPolicy(PlanExecutor::WRITE_CONFLICT_RETRY_ONLY);

    Snapshotted<BSONObj> objToClone;
    RecordId loc;
    PlanExecutor::ExecState state = PlanExecutor::FAILURE;  // suppress uninitialized warnings

    DisableDocumentValidation validationDisabler(txn);

    int retries = 0;  // non-zero when retrying our last document.
    while (true) {
        if (!retries) {
            state = exec->getNextSnapshotted(&objToClone, &loc);
        }

        switch (state) {
            case PlanExecutor::IS_EOF:
                return Status::OK();
            case PlanExecutor::ADVANCED: {
                if (excessSize > 0) {
                    // 4x is for padding, power of 2, etc...
                    excessSize -= (4 * objToClone.value().objsize());
                    continue;
                }
                break;
            }
            default:
                // Unreachable as:
                // 1) We require a read lock (at a minimum) on the "from" collection
                //    and won't yield, preventing collection drop and PlanExecutor::DEAD
                // 2) PlanExecutor::FAILURE is only returned on PlanStage::FAILURE. The
                //    CollectionScan PlanStage does not have a FAILURE scenario.
                // 3) All other PlanExecutor states are handled above
                invariant(false);
        }

        try {
            // Make sure we are working with the latest version of the document.
            if (objToClone.snapshotId() != txn->recoveryUnit()->getSnapshotId() &&
                !fromCollection->findDoc(txn, loc, &objToClone)) {
                // doc was deleted so don't clone it.
                retries = 0;
                continue;
            }

            WriteUnitOfWork wunit(txn);
            toCollection->insertDocument(txn, objToClone.value(), true, txn->writesAreReplicated());
            wunit.commit();

            // Go to the next document
            retries = 0;
        } catch (const WriteConflictException& wce) {
            CurOp::get(txn)->debug().writeConflicts++;
            retries++;  // logAndBackoff expects this to be 1 on first call.
            wce.logAndBackoff(retries, "cloneCollectionAsCapped", fromNs);

            // Can't use WRITE_CONFLICT_RETRY_LOOP macros since we need to save/restore exec
            // around call to abandonSnapshot.
            exec->saveState();
            txn->recoveryUnit()->abandonSnapshot();
            exec->restoreState(txn);  // Handles any WCEs internally.
        }
    }

    invariant(false);  // unreachable
}
Ejemplo n.º 5
0
StatusWith<RecordId> Collection::updateDocument(OperationContext* txn,
                                                const RecordId& oldLocation,
                                                const Snapshotted<BSONObj>& oldDoc,
                                                const BSONObj& newDoc,
                                                bool enforceQuota,
                                                bool indexesAffected,
                                                OpDebug* debug,
                                                oplogUpdateEntryArgs& args) {
    {
        auto status = checkValidation(txn, newDoc);
        if (!status.isOK()) {
            if (_validationLevel == STRICT_V) {
                return status;
            }
            // moderate means we have to check the old doc
            auto oldDocStatus = checkValidation(txn, oldDoc.value());
            if (oldDocStatus.isOK()) {
                // transitioning from good -> bad is not ok
                return status;
            }
            // bad -> bad is ok in moderate mode
        }
    }

    dassert(txn->lockState()->isCollectionLockedForMode(ns().toString(), MODE_IX));
    invariant(oldDoc.snapshotId() == txn->recoveryUnit()->getSnapshotId());

    SnapshotId sid = txn->recoveryUnit()->getSnapshotId();

    BSONElement oldId = oldDoc.value()["_id"];
    if (!oldId.eoo() && (oldId != newDoc["_id"]))
        return StatusWith<RecordId>(
            ErrorCodes::InternalError, "in Collection::updateDocument _id mismatch", 13596);

    // At the end of this step, we will have a map of UpdateTickets, one per index, which
    // represent the index updates needed to be done, based on the changes between oldDoc and
    // newDoc.
    OwnedPointerMap<IndexDescriptor*, UpdateTicket> updateTickets;
    if (indexesAffected) {
        IndexCatalog::IndexIterator ii = _indexCatalog.getIndexIterator(txn, true);
        while (ii.more()) {
            IndexDescriptor* descriptor = ii.next();
            IndexCatalogEntry* entry = ii.catalogEntry(descriptor);
            IndexAccessMethod* iam = ii.accessMethod(descriptor);

            InsertDeleteOptions options;
            options.logIfError = false;
            options.dupsAllowed =
                !(KeyPattern::isIdKeyPattern(descriptor->keyPattern()) || descriptor->unique()) ||
                repl::getGlobalReplicationCoordinator()->shouldIgnoreUniqueIndex(descriptor);
            UpdateTicket* updateTicket = new UpdateTicket();
            updateTickets.mutableMap()[descriptor] = updateTicket;
            Status ret = iam->validateUpdate(txn,
                                             oldDoc.value(),
                                             newDoc,
                                             oldLocation,
                                             options,
                                             updateTicket,
                                             entry->getFilterExpression());
            if (!ret.isOK()) {
                return StatusWith<RecordId>(ret);
            }
        }
    }

    // This can call back into Collection::recordStoreGoingToMove.  If that happens, the old
    // object is removed from all indexes.
    StatusWith<RecordId> newLocation = _recordStore->updateRecord(
        txn, oldLocation, newDoc.objdata(), newDoc.objsize(), _enforceQuota(enforceQuota), this);

    if (!newLocation.isOK()) {
        return newLocation;
    }

    // At this point, the old object may or may not still be indexed, depending on if it was
    // moved. If the object did move, we need to add the new location to all indexes.
    if (newLocation.getValue() != oldLocation) {
        if (debug) {
            if (debug->nmoved == -1)  // default of -1 rather than 0
                debug->nmoved = 1;
            else
                debug->nmoved += 1;
        }

        Status s = _indexCatalog.indexRecord(txn, newDoc, newLocation.getValue());
        if (!s.isOK())
            return StatusWith<RecordId>(s);
        invariant(sid == txn->recoveryUnit()->getSnapshotId());
        args.ns = ns().ns();
        getGlobalServiceContext()->getOpObserver()->onUpdate(txn, args);

        return newLocation;
    }

    // Object did not move.  We update each index with each respective UpdateTicket.

    if (debug)
        debug->keyUpdates = 0;

    if (indexesAffected) {
        IndexCatalog::IndexIterator ii = _indexCatalog.getIndexIterator(txn, true);
        while (ii.more()) {
            IndexDescriptor* descriptor = ii.next();
            IndexAccessMethod* iam = ii.accessMethod(descriptor);

            int64_t updatedKeys;
            Status ret = iam->update(txn, *updateTickets.mutableMap()[descriptor], &updatedKeys);
            if (!ret.isOK())
                return StatusWith<RecordId>(ret);
            if (debug)
                debug->keyUpdates += updatedKeys;
        }
    }

    invariant(sid == txn->recoveryUnit()->getSnapshotId());
    args.ns = ns().ns();
    getGlobalServiceContext()->getOpObserver()->onUpdate(txn, args);

    return newLocation;
}
Ejemplo n.º 6
0
BSONObj UpdateStage::transformAndUpdate(const Snapshotted<BSONObj>& oldObj, RecordId& recordId) {
    const UpdateRequest* request = _params.request;
    UpdateDriver* driver = _params.driver;
    CanonicalQuery* cq = _params.canonicalQuery;
    UpdateLifecycle* lifecycle = request->getLifecycle();

    // If asked to return new doc, default to the oldObj, in case nothing changes.
    BSONObj newObj = oldObj.value();

    // Ask the driver to apply the mods. It may be that the driver can apply those "in
    // place", that is, some values of the old document just get adjusted without any
    // change to the binary layout on the bson layer. It may be that a whole new document
    // is needed to accomodate the new bson layout of the resulting document. In any event,
    // only enable in-place mutations if the underlying storage engine offers support for
    // writing damage events.
    _doc.reset(oldObj.value(),
               (_collection->updateWithDamagesSupported()
                    ? mutablebson::Document::kInPlaceEnabled
                    : mutablebson::Document::kInPlaceDisabled));

    BSONObj logObj;

    bool docWasModified = false;

    Status status = Status::OK();
    const bool validateForStorage = getOpCtx()->writesAreReplicated() && _enforceOkForStorage;
    FieldRefSet immutablePaths;
    if (getOpCtx()->writesAreReplicated() && !request->isFromMigration()) {
        if (lifecycle) {
            auto immutablePathsVector =
                getImmutableFields(getOpCtx(), request->getNamespaceString());
            if (immutablePathsVector) {
                immutablePaths.fillFrom(
                    transitional_tools_do_not_use::unspool_vector(*immutablePathsVector));
            }
        }
        immutablePaths.keepShortest(&idFieldRef);
    }
    if (!driver->needMatchDetails()) {
        // If we don't need match details, avoid doing the rematch
        status = driver->update(
            StringData(), &_doc, validateForStorage, immutablePaths, &logObj, &docWasModified);
    } else {
        // If there was a matched field, obtain it.
        MatchDetails matchDetails;
        matchDetails.requestElemMatchKey();

        dassert(cq);
        verify(cq->root()->matchesBSON(oldObj.value(), &matchDetails));

        string matchedField;
        if (matchDetails.hasElemMatchKey())
            matchedField = matchDetails.elemMatchKey();

        status = driver->update(
            matchedField, &_doc, validateForStorage, immutablePaths, &logObj, &docWasModified);
    }

    if (!status.isOK()) {
        uasserted(16837, status.reason());
    }

    // Skip adding _id field if the collection is capped (since capped collection documents can
    // neither grow nor shrink).
    const auto createIdField = !_collection->isCapped();

    // Ensure if _id exists it is first
    status = ensureIdFieldIsFirst(&_doc);
    if (status.code() == ErrorCodes::InvalidIdField) {
        // Create ObjectId _id field if we are doing that
        if (createIdField) {
            addObjectIDIdField(&_doc);
        }
    } else {
        uassertStatusOK(status);
    }

    // See if the changes were applied in place
    const char* source = NULL;
    const bool inPlace = _doc.getInPlaceUpdates(&_damages, &source);

    if (inPlace && _damages.empty()) {
        // An interesting edge case. A modifier didn't notice that it was really a no-op
        // during its 'prepare' phase. That represents a missed optimization, but we still
        // shouldn't do any real work. Toggle 'docWasModified' to 'false'.
        //
        // Currently, an example of this is '{ $push : { x : {$each: [], $sort: 1} } }' when the 'x'
        // array exists and is already sorted.
        docWasModified = false;
    }

    if (docWasModified) {

        // Prepare to write back the modified document
        WriteUnitOfWork wunit(getOpCtx());

        RecordId newRecordId;
        OplogUpdateEntryArgs args;
        if (!request->isExplain()) {
            invariant(_collection);
            auto* css = CollectionShardingState::get(getOpCtx(), _collection->ns());
            args.nss = _collection->ns();
            args.uuid = _collection->uuid();
            args.stmtId = request->getStmtId();
            args.update = logObj;
            args.criteria = css->getMetadata().extractDocumentKey(newObj);
            uassert(16980,
                    "Multi-update operations require all documents to have an '_id' field",
                    !request->isMulti() || args.criteria.hasField("_id"_sd));
            args.fromMigrate = request->isFromMigration();
            args.storeDocOption = getStoreDocMode(*request);
            if (args.storeDocOption == OplogUpdateEntryArgs::StoreDocOption::PreImage) {
                args.preImageDoc = oldObj.value().getOwned();
            }
        }

        if (inPlace) {
            if (!request->isExplain()) {
                newObj = oldObj.value();
                const RecordData oldRec(oldObj.value().objdata(), oldObj.value().objsize());

                Snapshotted<RecordData> snap(oldObj.snapshotId(), oldRec);

                StatusWith<RecordData> newRecStatus = _collection->updateDocumentWithDamages(
                    getOpCtx(), recordId, std::move(snap), source, _damages, &args);

                newObj = uassertStatusOK(std::move(newRecStatus)).releaseToBson();
            }

            newRecordId = recordId;
        } else {
            // The updates were not in place. Apply them through the file manager.

            newObj = _doc.getObject();
            uassert(17419,
                    str::stream() << "Resulting document after update is larger than "
                                  << BSONObjMaxUserSize,
                    newObj.objsize() <= BSONObjMaxUserSize);

            if (!request->isExplain()) {
                newRecordId = _collection->updateDocument(getOpCtx(),
                                                          recordId,
                                                          oldObj,
                                                          newObj,
                                                          true,
                                                          driver->modsAffectIndices(),
                                                          _params.opDebug,
                                                          &args);
            }
        }

        invariant(oldObj.snapshotId() == getOpCtx()->recoveryUnit()->getSnapshotId());
        wunit.commit();

        // If the document moved, we might see it again in a collection scan (maybe it's
        // a document after our current document).
        //
        // If the document is indexed and the mod changes an indexed value, we might see
        // it again.  For an example, see the comment above near declaration of
        // updatedRecordIds.
        //
        // This must be done after the wunit commits so we are sure we won't be rolling back.
        if (_updatedRecordIds && (newRecordId != recordId || driver->modsAffectIndices())) {
            _updatedRecordIds->insert(newRecordId);
        }
    }

    // Only record doc modifications if they wrote (exclude no-ops). Explains get
    // recorded as if they wrote.
    if (docWasModified || request->isExplain()) {
        _specificStats.nModified++;
    }

    return newObj;
}
Ejemplo n.º 7
0
mongo::Status mongo::cloneCollectionAsCapped(OperationContext* opCtx,
                                             Database* db,
                                             const std::string& shortFrom,
                                             const std::string& shortTo,
                                             long long size,
                                             bool temp) {
    NamespaceString fromNss(db->name(), shortFrom);
    NamespaceString toNss(db->name(), shortTo);

    Collection* fromCollection = db->getCollection(opCtx, fromNss);
    if (!fromCollection) {
        if (db->getViewCatalog()->lookup(opCtx, fromNss.ns())) {
            return Status(ErrorCodes::CommandNotSupportedOnView,
                          str::stream() << "cloneCollectionAsCapped not supported for views: "
                                        << fromNss.ns());
        }
        return Status(ErrorCodes::NamespaceNotFound,
                      str::stream() << "source collection " << fromNss.ns() << " does not exist");
    }

    if (fromNss.isDropPendingNamespace()) {
        return Status(ErrorCodes::NamespaceNotFound,
                      str::stream() << "source collection " << fromNss.ns()
                                    << " is currently in a drop-pending state.");
    }

    if (db->getCollection(opCtx, toNss)) {
        return Status(ErrorCodes::NamespaceExists,
                      str::stream() << "cloneCollectionAsCapped failed - destination collection "
                                    << toNss.ns()
                                    << " already exists. source collection: "
                                    << fromNss.ns());
    }

    // create new collection
    {
        auto options = fromCollection->getCatalogEntry()->getCollectionOptions(opCtx);
        // The capped collection will get its own new unique id, as the conversion isn't reversible,
        // so it can't be rolled back.
        options.uuid.reset();
        options.capped = true;
        options.cappedSize = size;
        if (temp)
            options.temp = true;

        BSONObjBuilder cmd;
        cmd.append("create", toNss.coll());
        cmd.appendElements(options.toBSON());
        Status status = createCollection(opCtx, toNss.db().toString(), cmd.done());
        if (!status.isOK())
            return status;
    }

    Collection* toCollection = db->getCollection(opCtx, toNss);
    invariant(toCollection);  // we created above

    // how much data to ignore because it won't fit anyway
    // datasize and extentSize can't be compared exactly, so add some padding to 'size'

    long long allocatedSpaceGuess =
        std::max(static_cast<long long>(size * 2),
                 static_cast<long long>(toCollection->getRecordStore()->storageSize(opCtx) * 2));

    long long excessSize = fromCollection->dataSize(opCtx) - allocatedSpaceGuess;

    auto exec = InternalPlanner::collectionScan(opCtx,
                                                fromNss.ns(),
                                                fromCollection,
                                                PlanExecutor::WRITE_CONFLICT_RETRY_ONLY,
                                                InternalPlanner::FORWARD);

    Snapshotted<BSONObj> objToClone;
    RecordId loc;
    PlanExecutor::ExecState state = PlanExecutor::FAILURE;  // suppress uninitialized warnings

    DisableDocumentValidation validationDisabler(opCtx);

    int retries = 0;  // non-zero when retrying our last document.
    while (true) {
        if (!retries) {
            state = exec->getNextSnapshotted(&objToClone, &loc);
        }

        switch (state) {
            case PlanExecutor::IS_EOF:
                return Status::OK();
            case PlanExecutor::ADVANCED: {
                if (excessSize > 0) {
                    // 4x is for padding, power of 2, etc...
                    excessSize -= (4 * objToClone.value().objsize());
                    continue;
                }
                break;
            }
            default:
                // Unreachable as:
                // 1) We require a read lock (at a minimum) on the "from" collection
                //    and won't yield, preventing collection drop and PlanExecutor::DEAD
                // 2) PlanExecutor::FAILURE is only returned on PlanStage::FAILURE. The
                //    CollectionScan PlanStage does not have a FAILURE scenario.
                // 3) All other PlanExecutor states are handled above
                MONGO_UNREACHABLE;
        }

        try {
            // Make sure we are working with the latest version of the document.
            if (objToClone.snapshotId() != opCtx->recoveryUnit()->getSnapshotId() &&
                !fromCollection->findDoc(opCtx, loc, &objToClone)) {
                // doc was deleted so don't clone it.
                retries = 0;
                continue;
            }

            WriteUnitOfWork wunit(opCtx);
            OpDebug* const nullOpDebug = nullptr;
            uassertStatusOK(toCollection->insertDocument(
                opCtx, InsertStatement(objToClone.value()), nullOpDebug, true));
            wunit.commit();

            // Go to the next document
            retries = 0;
        } catch (const WriteConflictException&) {
            CurOp::get(opCtx)->debug().additiveMetrics.incrementWriteConflicts(1);
            retries++;  // logAndBackoff expects this to be 1 on first call.
            WriteConflictException::logAndBackoff(retries, "cloneCollectionAsCapped", fromNss.ns());

            // Can't use writeConflictRetry since we need to save/restore exec around call to
            // abandonSnapshot.
            exec->saveState();
            opCtx->recoveryUnit()->abandonSnapshot();
            auto restoreStatus = exec->restoreState();  // Handles any WCEs internally.
            if (!restoreStatus.isOK()) {
                return restoreStatus;
            }
        }
    }

    MONGO_UNREACHABLE;
}
Ejemplo n.º 8
0
    void run() {
        OldClientWriteContext ctx(&_txn, ns());
        Database* db = ctx.db();
        Collection* coll = db->getCollection(ns());
        if (!coll) {
            WriteUnitOfWork wuow(&_txn);
            coll = db->createCollection(&_txn, ns());
            wuow.commit();
        }

        {
            WriteUnitOfWork wuow(&_txn);
            fillData();
            wuow.commit();
        }

        // The data we're going to later invalidate.
        set<RecordId> locs;
        getLocs(&locs, coll);

        std::unique_ptr<PlanExecutor> exec(makePlanExecutorWithSortStage(coll));
        SortStage* ss = static_cast<SortStage*>(exec->getRootStage());
        QueuedDataStage* ms = static_cast<QueuedDataStage*>(ss->getChildren()[0]);

        // Have sort read in data from the queued data stage.
        const int firstRead = 5;
        for (int i = 0; i < firstRead; ++i) {
            WorkingSetID id = WorkingSet::INVALID_ID;
            PlanStage::StageState status = ss->work(&id);
            ASSERT_NOT_EQUALS(PlanStage::ADVANCED, status);
        }

        // We should have read in the first 'firstRead' locs.  Invalidate the first one.
        // Since it's in the WorkingSet, the updates should not be reflected in the output.
        exec->saveState();
        set<RecordId>::iterator it = locs.begin();
        Snapshotted<BSONObj> oldDoc = coll->docFor(&_txn, *it);

        OID updatedId = oldDoc.value().getField("_id").OID();
        SnapshotId idBeforeUpdate = oldDoc.snapshotId();
        // We purposefully update the document to have a 'foo' value greater than limit().
        // This allows us to check that we don't return the new copy of a doc by asserting
        // foo < limit().
        BSONObj newDoc = BSON("_id" << updatedId << "foo" << limit() + 10);
        oplogUpdateEntryArgs args;
        {
            WriteUnitOfWork wuow(&_txn);
            coll->updateDocument(&_txn, *it, oldDoc, newDoc, false, false, NULL, args);
            wuow.commit();
        }
        exec->restoreState(&_txn);

        // Read the rest of the data from the queued data stage.
        while (!ms->isEOF()) {
            WorkingSetID id = WorkingSet::INVALID_ID;
            ss->work(&id);
        }

        // Let's just invalidate everything now. Already read into ss, so original values
        // should be fetched.
        exec->saveState();
        while (it != locs.end()) {
            oldDoc = coll->docFor(&_txn, *it);
            {
                WriteUnitOfWork wuow(&_txn);
                coll->updateDocument(&_txn, *it++, oldDoc, newDoc, false, false, NULL, args);
                wuow.commit();
            }
        }
        exec->restoreState(&_txn);

        // Verify that it's sorted, the right number of documents are returned, and they're all
        // in the expected range.
        int count = 0;
        int lastVal = 0;
        int thisVal;
        while (!ss->isEOF()) {
            WorkingSetID id = WorkingSet::INVALID_ID;
            PlanStage::StageState status = ss->work(&id);
            if (PlanStage::ADVANCED != status) {
                ASSERT_NE(status, PlanStage::FAILURE);
                ASSERT_NE(status, PlanStage::DEAD);
                continue;
            }
            WorkingSetMember* member = exec->getWorkingSet()->get(id);
            ASSERT(member->hasObj());
            if (member->obj.value().getField("_id").OID() == updatedId) {
                ASSERT(idBeforeUpdate == member->obj.snapshotId());
            }
            thisVal = member->obj.value().getField("foo").Int();
            ASSERT_LTE(lastVal, thisVal);
            // Expect docs in range [0, limit)
            ASSERT_LTE(0, thisVal);
            ASSERT_LT(thisVal, limit());
            lastVal = thisVal;
            ++count;
        }
        // Returns all docs.
        ASSERT_EQUALS(limit(), count);
    }
Ejemplo n.º 9
0
        static bool runImpl(OperationContext* txn,
                            const string& dbname,
                            const string& ns,
                            const BSONObj& query,
                            const BSONObj& fields,
                            const BSONObj& update,
                            const BSONObj& sort,
                            bool upsert,
                            bool returnNew,
                            bool remove ,
                            BSONObjBuilder& result,
                            string& errmsg) {

            AutoGetOrCreateDb autoDb(txn, dbname, MODE_IX);
            Lock::CollectionLock collLock(txn->lockState(), ns, MODE_IX);
            Client::Context ctx(txn, ns, autoDb.getDb(), autoDb.justCreated());

            if (!repl::getGlobalReplicationCoordinator()->canAcceptWritesForDatabase(dbname)) {
                return appendCommandStatus(result, Status(ErrorCodes::NotMaster, str::stream()
                    << "Not primary while running findAndModify in " << ns));
            }

            Collection* collection = ctx.db()->getCollection(ns);

            const WhereCallbackReal whereCallback(txn, StringData(ns));

            if ( !collection ) {
                if ( !upsert ) {
                    // no collectio and no upsert, so can't possible do anything
                    _appendHelper( result, BSONObj(), false, fields, whereCallback );
                    return true;
                }
                // no collection, but upsert, so we want to create it
                // problem is we only have IX on db and collection :(
                // so we tell our caller who can do it
                errmsg = "no-collection";
                return false;
            }

            Snapshotted<BSONObj> snapshotDoc;
            RecordId loc;
            bool found = false;
            {
                CanonicalQuery* cq;
                const BSONObj projection;
                const long long skip = 0;
                const long long limit = -1; // 1 document requested; negative indicates hard limit.
                uassertStatusOK(CanonicalQuery::canonicalize(ns,
                                                             query,
                                                             sort,
                                                             projection,
                                                             skip,
                                                             limit,
                                                             &cq,
                                                             whereCallback));

                PlanExecutor* rawExec;
                uassertStatusOK(getExecutor(txn,
                                            collection,
                                            cq,
                                            PlanExecutor::YIELD_AUTO,
                                            &rawExec,
                                            QueryPlannerParams::DEFAULT));

                scoped_ptr<PlanExecutor> exec(rawExec);

                PlanExecutor::ExecState state = exec->getNextSnapshotted(&snapshotDoc, &loc);
                if (PlanExecutor::ADVANCED == state) {
                    found = true;
                }
                else if (PlanExecutor::FAILURE == state || PlanExecutor::DEAD == state) {
                    if (PlanExecutor::FAILURE == state &&
                        WorkingSetCommon::isValidStatusMemberObject(snapshotDoc.value())) {
                        const Status errorStatus =
                            WorkingSetCommon::getMemberObjectStatus(snapshotDoc.value());
                        invariant(!errorStatus.isOK());
                        uasserted(errorStatus.code(), errorStatus.reason());
                    }
                    uasserted(ErrorCodes::OperationFailed,
                              str::stream() << "executor returned " << PlanExecutor::statestr(state)
                                            << " while finding document to update");
                }
                else {
                    invariant(PlanExecutor::IS_EOF == state);
                }
            }

            WriteUnitOfWork wuow(txn);
            if (found) {
                // We found a doc, but it might not be associated with the active snapshot.
                // If the doc has changed or is no longer in the collection, we will throw a
                // write conflict exception and start again from the beginning.
                if (txn->recoveryUnit()->getSnapshotId() != snapshotDoc.snapshotId()) {
                    BSONObj oldObj = snapshotDoc.value();
                    if (!collection->findDoc(txn, loc, &snapshotDoc)) {
                        // Got deleted in the new snapshot.
                        throw WriteConflictException();
                    }

                    if (!oldObj.binaryEqual(snapshotDoc.value())) {
                        // Got updated in the new snapshot.
                        throw WriteConflictException();
                    }
                }

                // If we get here without throwing, then we should have the copy of the doc from
                // the latest snapshot.
                invariant(txn->recoveryUnit()->getSnapshotId() == snapshotDoc.snapshotId());
            }

            BSONObj doc = snapshotDoc.value();
            BSONObj queryModified = query;
            if (found && !doc["_id"].eoo() && !CanonicalQuery::isSimpleIdQuery(query)) {
                // we're going to re-write the query to be more efficient
                // we have to be a little careful because of positional operators
                // maybe we can pass this all through eventually, but right now isn't an easy way
                
                bool hasPositionalUpdate = false;
                {
                    // if the update has a positional piece ($)
                    // then we need to pull all query parts in
                    // so here we check for $
                    // a little hacky
                    BSONObjIterator i( update );
                    while ( i.more() ) {
                        const BSONElement& elem = i.next();
                        
                        if ( elem.fieldName()[0] != '$' || elem.type() != Object )
                            continue;

                        BSONObjIterator j( elem.Obj() );
                        while ( j.more() ) {
                            if ( str::contains( j.next().fieldName(), ".$" ) ) {
                                hasPositionalUpdate = true;
                                break;
                            }
                        }
                    }
                }

                BSONObjBuilder b(query.objsize() + 10);
                b.append( doc["_id"] );
                
                bool addedAtomic = false;

                BSONObjIterator i(query);
                while ( i.more() ) {
                    const BSONElement& elem = i.next();

                    if ( str::equals( "_id" , elem.fieldName() ) ) {
                        // we already do _id
                        continue;
                    }
                    
                    if ( ! hasPositionalUpdate ) {
                        // if there is a dotted field, accept we may need more query parts
                        continue;
                    }
                    
                    if ( ! addedAtomic ) {
                        b.appendBool( "$atomic" , true );
                        addedAtomic = true;
                    }

                    b.append( elem );
                }

                queryModified = b.obj();
            }

            if ( remove ) {
                _appendHelper(result, doc, found, fields, whereCallback);
                if ( found ) {
                    deleteObjects(txn, ctx.db(), ns, queryModified, PlanExecutor::YIELD_MANUAL,
                                  true, true);
                    BSONObjBuilder le( result.subobjStart( "lastErrorObject" ) );
                    le.appendNumber( "n" , 1 );
                    le.done();
                }
            }
            else {
                // update
                if ( ! found && ! upsert ) {
                    // didn't have it, and am not upserting
                    _appendHelper(result, doc, found, fields, whereCallback);
                }
                else {
                    // we found it or we're updating
                    
                    if ( ! returnNew ) {
                        _appendHelper(result, doc, found, fields, whereCallback);
                    }
                    
                    const NamespaceString requestNs(ns);
                    UpdateRequest request(requestNs);

                    request.setQuery(queryModified);
                    request.setUpdates(update);
                    request.setUpsert(upsert);
                    request.setUpdateOpLog();
                    request.setStoreResultDoc(returnNew);

                    request.setYieldPolicy(PlanExecutor::YIELD_MANUAL);

                    // TODO(greg) We need to send if we are ignoring
                    // the shard version below, but for now no
                    UpdateLifecycleImpl updateLifecycle(false, requestNs);
                    request.setLifecycle(&updateLifecycle);
                    UpdateResult res = mongo::update(txn,
                                                     ctx.db(),
                                                     request,
                                                     &txn->getCurOp()->debug());

                    if (!found && res.existing) {
                        // No match was found during the read part of this find and modify, which
                        // means that we're here doing an upsert. But the update also told us that
                        // we modified an *already existing* document. This probably means that
                        // the query reported EOF based on an out-of-date snapshot. This should be
                        // a rare event, so we handle it by throwing a write conflict.
                        throw WriteConflictException();
                    }

                    if ( !collection ) {
                        // collection created by an upsert
                        collection = ctx.db()->getCollection(ns);
                    }

                    LOG(3) << "update result: "  << res ;
                    if (returnNew) {
                        dassert(!res.newObj.isEmpty());
                        _appendHelper(result, res.newObj, true, fields, whereCallback);
                    }

                    BSONObjBuilder le( result.subobjStart( "lastErrorObject" ) );
                    le.appendBool( "updatedExisting" , res.existing );
                    le.appendNumber( "n" , res.numMatched );
                    if ( !res.upserted.isEmpty() ) {
                        le.append( res.upserted[kUpsertedFieldName] );
                    }
                    le.done();
                }
            }

            // Committing the WUOW can close the current snapshot. Until this happens, the
            // snapshot id should not have changed.
            if (found) {
                invariant(txn->recoveryUnit()->getSnapshotId() == snapshotDoc.snapshotId());
            }
            wuow.commit();

            return true;
        }
Ejemplo n.º 10
0
Status MultiIndexBlockImpl::insertAllDocumentsInCollection(std::set<RecordId>* dupsOut) {
    const char* curopMessage = _buildInBackground ? "Index Build (background)" : "Index Build";
    const auto numRecords = _collection->numRecords(_opCtx);
    stdx::unique_lock<Client> lk(*_opCtx->getClient());
    ProgressMeterHolder progress(
        CurOp::get(_opCtx)->setMessage_inlock(curopMessage, curopMessage, numRecords));
    lk.unlock();

    Timer t;

    unsigned long long n = 0;

    PlanExecutor::YieldPolicy yieldPolicy;
    if (_buildInBackground) {
        invariant(_allowInterruption);
        yieldPolicy = PlanExecutor::YIELD_AUTO;
    } else {
        yieldPolicy = PlanExecutor::WRITE_CONFLICT_RETRY_ONLY;
    }
    auto exec =
        InternalPlanner::collectionScan(_opCtx, _collection->ns().ns(), _collection, yieldPolicy);

    Snapshotted<BSONObj> objToIndex;
    RecordId loc;
    PlanExecutor::ExecState state;
    int retries = 0;  // non-zero when retrying our last document.
    while (retries ||
           (PlanExecutor::ADVANCED == (state = exec->getNextSnapshotted(&objToIndex, &loc))) ||
           MONGO_FAIL_POINT(hangAfterStartingIndexBuild)) {
        try {
            if (_allowInterruption)
                _opCtx->checkForInterrupt();

            if (!(retries || (PlanExecutor::ADVANCED == state))) {
                // The only reason we are still in the loop is hangAfterStartingIndexBuild.
                log() << "Hanging index build due to 'hangAfterStartingIndexBuild' failpoint";
                invariant(_allowInterruption);
                sleepmillis(1000);
                continue;
            }

            // Make sure we are working with the latest version of the document.
            if (objToIndex.snapshotId() != _opCtx->recoveryUnit()->getSnapshotId() &&
                !_collection->findDoc(_opCtx, loc, &objToIndex)) {
                // doc was deleted so don't index it.
                retries = 0;
                continue;
            }

            // Done before insert so we can retry document if it WCEs.
            progress->setTotalWhileRunning(_collection->numRecords(_opCtx));

            WriteUnitOfWork wunit(_opCtx);
            Status ret = insert(objToIndex.value(), loc);
            if (_buildInBackground)
                exec->saveState();
            if (ret.isOK()) {
                wunit.commit();
            } else if (dupsOut && ret.code() == ErrorCodes::DuplicateKey) {
                // If dupsOut is non-null, we should only fail the specific insert that
                // led to a DuplicateKey rather than the whole index build.
                dupsOut->insert(loc);
            } else {
                // Fail the index build hard.
                return ret;
            }
            if (_buildInBackground) {
                auto restoreStatus = exec->restoreState();  // Handles any WCEs internally.
                if (!restoreStatus.isOK()) {
                    return restoreStatus;
                }
            }

            // Go to the next document
            progress->hit();
            n++;
            retries = 0;
        } catch (const WriteConflictException&) {
            CurOp::get(_opCtx)->debug().writeConflicts++;
            retries++;  // logAndBackoff expects this to be 1 on first call.
            WriteConflictException::logAndBackoff(
                retries, "index creation", _collection->ns().ns());

            // Can't use writeConflictRetry since we need to save/restore exec around call to
            // abandonSnapshot.
            exec->saveState();
            _opCtx->recoveryUnit()->abandonSnapshot();
            auto restoreStatus = exec->restoreState();  // Handles any WCEs internally.
            if (!restoreStatus.isOK()) {
                return restoreStatus;
            }
        }
    }

    uassert(28550,
            "Unable to complete index build due to collection scan failure: " +
                WorkingSetCommon::toStatusString(objToIndex.value()),
            state == PlanExecutor::IS_EOF);

    if (MONGO_FAIL_POINT(hangAfterStartingIndexBuildUnlocked)) {
        // Unlock before hanging so replication recognizes we've completed.
        Locker::LockSnapshot lockInfo;
        _opCtx->lockState()->saveLockStateAndUnlock(&lockInfo);
        while (MONGO_FAIL_POINT(hangAfterStartingIndexBuildUnlocked)) {
            log() << "Hanging index build with no locks due to "
                     "'hangAfterStartingIndexBuildUnlocked' failpoint";
            sleepmillis(1000);
        }
        // If we want to support this, we'd need to regrab the lock and be sure that all callers are
        // ok with us yielding. They should be for BG indexes, but not for foreground.
        invariant(!"the hangAfterStartingIndexBuildUnlocked failpoint can't be turned off");
    }

    progress->finished();

    Status ret = doneInserting(dupsOut);
    if (!ret.isOK())
        return ret;

    log() << "build index done.  scanned " << n << " total records. " << t.seconds() << " secs";

    return Status::OK();
}