Esempio n. 1
0
IndexAccessMethod* KVDatabaseCatalogEntry::getIndex(OperationContext* opCtx,
                                                    const CollectionCatalogEntry* collection,
                                                    IndexCatalogEntry* index) {
    IndexDescriptor* desc = index->descriptor();

    const std::string& type = desc->getAccessMethodName();

    std::string ident =
        _engine->getCatalog()->getIndexIdent(opCtx, collection->ns().ns(), desc->indexName());

    SortedDataInterface* sdi =
        _engine->getEngine()->getGroupedSortedDataInterface(opCtx, ident, desc, index->getPrefix());

    if ("" == type)
        return new BtreeAccessMethod(index, sdi);

    if (IndexNames::HASHED == type)
        return new HashAccessMethod(index, sdi);

    if (IndexNames::GEO_2DSPHERE == type)
        return new S2AccessMethod(index, sdi);

    if (IndexNames::TEXT == type)
        return new FTSAccessMethod(index, sdi);

    if (IndexNames::GEO_HAYSTACK == type)
        return new HaystackAccessMethod(index, sdi);

    if (IndexNames::GEO_2D == type)
        return new TwoDAccessMethod(index, sdi);

    log() << "Can't find index for keyPattern " << desc->keyPattern();
    invariant(false);
}
    void CollectionInfoCache::computeIndexKeys() {
        DEV Lock::assertWriteLocked( _collection->ns().ns() );

        _indexedPaths.clear();

        IndexCatalog::IndexIterator i = _collection->getIndexCatalog()->getIndexIterator(true);
        while (i.more()) {
            IndexDescriptor* descriptor = i.next();

            if (descriptor->getAccessMethodName() != IndexNames::TEXT) {
                BSONObj key = descriptor->keyPattern();
                BSONObjIterator j(key);
                while (j.more()) {
                    BSONElement e = j.next();
                    _indexedPaths.addPath(e.fieldName());
                }
            }
            else {
                fts::FTSSpec ftsSpec(descriptor->infoObj());

                if (ftsSpec.wildcard()) {
                    _indexedPaths.allPathsIndexed();
                }
                else {
                    for (size_t i = 0; i < ftsSpec.numExtraBefore(); ++i) {
                        _indexedPaths.addPath(ftsSpec.extraBefore(i));
                    }
                    for (fts::Weights::const_iterator it = ftsSpec.weights().begin();
                         it != ftsSpec.weights().end();
                         ++it) {
                        _indexedPaths.addPath(it->first);
                    }
                    for (size_t i = 0; i < ftsSpec.numExtraAfter(); ++i) {
                        _indexedPaths.addPath(ftsSpec.extraAfter(i));
                    }
                    // Any update to a path containing "language" as a component could change the
                    // language of a subdocument.  Add the override field as a path component.
                    _indexedPaths.addPathComponent(ftsSpec.languageOverrideField());
                }
            }
        }

        _keysComputed = true;

    }
Status MigrationChunkClonerSourceLegacy::_storeCurrentLocs(OperationContext* txn) {
    ScopedTransaction scopedXact(txn, MODE_IS);
    AutoGetCollection autoColl(txn, _args.getNss(), MODE_IS);

    Collection* const collection = autoColl.getCollection();
    if (!collection) {
        return {ErrorCodes::NamespaceNotFound,
                str::stream() << "Collection " << _args.getNss().ns() << " does not exist."};
    }

    // Allow multiKey based on the invariant that shard keys must be single-valued. Therefore, any
    // multi-key index prefixed by shard key cannot be multikey over the shard key fields.
    IndexDescriptor* idx =
        collection->getIndexCatalog()->findShardKeyPrefixedIndex(txn,
                                                                 _shardKeyPattern.toBSON(),
                                                                 false);  // requireSingleKey
    if (!idx) {
        return {ErrorCodes::IndexNotFound,
                str::stream() << "can't find index with prefix " << _shardKeyPattern.toBSON()
                              << " in storeCurrentLocs for "
                              << _args.getNss().ns()};
    }

    // Install the stage, which will listen for notifications on the collection
    {
        stdx::lock_guard<stdx::mutex> sl(_mutex);

        invariant(!_deleteNotifyExec);

        // Takes ownership of 'ws' and 'dns'.
        auto statusWithPlanExecutor =
            PlanExecutor::make(txn,
                               stdx::make_unique<WorkingSet>(),
                               stdx::make_unique<DeleteNotificationStage>(this, txn),
                               collection,
                               PlanExecutor::YIELD_MANUAL);
        invariant(statusWithPlanExecutor.isOK());

        _deleteNotifyExec = std::move(statusWithPlanExecutor.getValue());
        _deleteNotifyExec->registerExec(collection);
    }

    // Assume both min and max non-empty, append MinKey's to make them fit chosen index
    const KeyPattern kp(idx->keyPattern());

    BSONObj min = Helpers::toKeyFormat(kp.extendRangeBound(_args.getMinKey(), false));
    BSONObj max = Helpers::toKeyFormat(kp.extendRangeBound(_args.getMaxKey(), false));

    std::unique_ptr<PlanExecutor> exec(InternalPlanner::indexScan(txn,
                                                                  collection,
                                                                  idx,
                                                                  min,
                                                                  max,
                                                                  false,  // endKeyInclusive
                                                                  PlanExecutor::YIELD_MANUAL));

    // We can afford to yield here because any change to the base data that we might miss is already
    // being queued and will migrate in the 'transferMods' stage.
    exec->setYieldPolicy(PlanExecutor::YIELD_AUTO, collection);

    // Use the average object size to estimate how many objects a full chunk would carry do that
    // while traversing the chunk's range using the sharding index, below there's a fair amount of
    // slack before we determine a chunk is too large because object sizes will vary.
    unsigned long long maxRecsWhenFull;
    long long avgRecSize;

    const long long totalRecs = collection->numRecords(txn);
    if (totalRecs > 0) {
        avgRecSize = collection->dataSize(txn) / totalRecs;
        maxRecsWhenFull = _args.getMaxChunkSizeBytes() / avgRecSize;
        maxRecsWhenFull = std::min((unsigned long long)(Chunk::MaxObjectPerChunk + 1),
                                   130 * maxRecsWhenFull / 100 /* slack */);
    } else {
        avgRecSize = 0;
        maxRecsWhenFull = Chunk::MaxObjectPerChunk + 1;
    }

    // Do a full traversal of the chunk and don't stop even if we think it is a large chunk we want
    // the number of records to better report, in that case.
    bool isLargeChunk = false;
    unsigned long long recCount = 0;

    BSONObj obj;
    RecordId recordId;
    PlanExecutor::ExecState state;
    while (PlanExecutor::ADVANCED == (state = exec->getNext(&obj, &recordId))) {
        if (!isLargeChunk) {
            stdx::lock_guard<stdx::mutex> lk(_mutex);
            _cloneLocs.insert(recordId);
        }

        if (++recCount > maxRecsWhenFull) {
            isLargeChunk = true;
            // Continue on despite knowing that it will fail, just to get the correct value for
            // recCount
        }
    }

    if (PlanExecutor::DEAD == state || PlanExecutor::FAILURE == state) {
        return {ErrorCodes::InternalError,
                str::stream() << "Executor error while scanning for documents belonging to chunk: "
                              << WorkingSetCommon::toStatusString(obj)};
    }

    exec.reset();

    if (isLargeChunk) {
        return {
            ErrorCodes::ChunkTooBig,
            str::stream() << "Cannot move chunk: the maximum number of documents for a chunk is "
                          << maxRecsWhenFull
                          << ", the maximum chunk size is "
                          << _args.getMaxChunkSizeBytes()
                          << ", average document size is "
                          << avgRecSize
                          << ". Found "
                          << recCount
                          << " documents in chunk "
                          << " ns: "
                          << _args.getNss().ns()
                          << " "
                          << _args.getMinKey()
                          << " -> "
                          << _args.getMaxKey()};
    }

    _averageObjectSizeForCloneLocs = static_cast<uint64_t>(collection->averageObjectSize(txn) + 12);

    return Status::OK();
}
Esempio n. 4
0
StatusWith<RecordId> Collection::updateDocument(OperationContext* txn,
                                                const RecordId& oldLocation,
                                                const Snapshotted<BSONObj>& oldDoc,
                                                const BSONObj& newDoc,
                                                bool enforceQuota,
                                                bool indexesAffected,
                                                OpDebug* debug,
                                                oplogUpdateEntryArgs& args) {
    {
        auto status = checkValidation(txn, newDoc);
        if (!status.isOK()) {
            if (_validationLevel == STRICT_V) {
                return status;
            }
            // moderate means we have to check the old doc
            auto oldDocStatus = checkValidation(txn, oldDoc.value());
            if (oldDocStatus.isOK()) {
                // transitioning from good -> bad is not ok
                return status;
            }
            // bad -> bad is ok in moderate mode
        }
    }

    dassert(txn->lockState()->isCollectionLockedForMode(ns().toString(), MODE_IX));
    invariant(oldDoc.snapshotId() == txn->recoveryUnit()->getSnapshotId());

    if (_needCappedLock) {
        // X-lock the metadata resource for this capped collection until the end of the WUOW. This
        // prevents the primary from executing with more concurrency than secondaries.
        // See SERVER-21646.
        Lock::ResourceLock{txn->lockState(), ResourceId(RESOURCE_METADATA, _ns.ns()), MODE_X};
    }

    SnapshotId sid = txn->recoveryUnit()->getSnapshotId();

    BSONElement oldId = oldDoc.value()["_id"];
    if (!oldId.eoo() && (oldId != newDoc["_id"]))
        return StatusWith<RecordId>(
            ErrorCodes::InternalError, "in Collection::updateDocument _id mismatch", 13596);

    // The MMAPv1 storage engine implements capped collections in a way that does not allow records
    // to grow beyond their original size. If MMAPv1 part of a replicaset with storage engines that
    // do not have this limitation, replication could result in errors, so it is necessary to set a
    // uniform rule here. Similarly, it is not sufficient to disallow growing records, because this
    // happens when secondaries roll back an update shrunk a record. Exactly replicating legacy
    // MMAPv1 behavior would require padding shrunk documents on all storage engines. Instead forbid
    // all size changes.
    const auto oldSize = oldDoc.value().objsize();
    if (_recordStore->isCapped() && oldSize != newDoc.objsize())
        return {ErrorCodes::CannotGrowDocumentInCappedNamespace,
                str::stream() << "Cannot change the size of a document in a capped collection: "
                              << oldSize << " != " << newDoc.objsize()};

    // At the end of this step, we will have a map of UpdateTickets, one per index, which
    // represent the index updates needed to be done, based on the changes between oldDoc and
    // newDoc.
    OwnedPointerMap<IndexDescriptor*, UpdateTicket> updateTickets;
    if (indexesAffected) {
        IndexCatalog::IndexIterator ii = _indexCatalog.getIndexIterator(txn, true);
        while (ii.more()) {
            IndexDescriptor* descriptor = ii.next();
            IndexCatalogEntry* entry = ii.catalogEntry(descriptor);
            IndexAccessMethod* iam = ii.accessMethod(descriptor);

            InsertDeleteOptions options;
            options.logIfError = false;
            options.dupsAllowed =
                !(KeyPattern::isIdKeyPattern(descriptor->keyPattern()) || descriptor->unique()) ||
                repl::getGlobalReplicationCoordinator()->shouldIgnoreUniqueIndex(descriptor);
            UpdateTicket* updateTicket = new UpdateTicket();
            updateTickets.mutableMap()[descriptor] = updateTicket;
            Status ret = iam->validateUpdate(txn,
                                             oldDoc.value(),
                                             newDoc,
                                             oldLocation,
                                             options,
                                             updateTicket,
                                             entry->getFilterExpression());
            if (!ret.isOK()) {
                return StatusWith<RecordId>(ret);
            }
        }
    }

    // This can call back into Collection::recordStoreGoingToMove.  If that happens, the old
    // object is removed from all indexes.
    StatusWith<RecordId> newLocation = _recordStore->updateRecord(
        txn, oldLocation, newDoc.objdata(), newDoc.objsize(), _enforceQuota(enforceQuota), this);

    if (!newLocation.isOK()) {
        return newLocation;
    }

    // At this point, the old object may or may not still be indexed, depending on if it was
    // moved. If the object did move, we need to add the new location to all indexes.
    if (newLocation.getValue() != oldLocation) {
        if (debug) {
            if (debug->nmoved == -1)  // default of -1 rather than 0
                debug->nmoved = 1;
            else
                debug->nmoved += 1;
        }

        std::vector<BsonRecord> bsonRecords;
        BsonRecord bsonRecord = {newLocation.getValue(), &newDoc};
        bsonRecords.push_back(bsonRecord);
        Status s = _indexCatalog.indexRecords(txn, bsonRecords);
        if (!s.isOK())
            return StatusWith<RecordId>(s);
        invariant(sid == txn->recoveryUnit()->getSnapshotId());
        args.ns = ns().ns();
        getGlobalServiceContext()->getOpObserver()->onUpdate(txn, args);

        return newLocation;
    }

    // Object did not move.  We update each index with each respective UpdateTicket.

    if (debug)
        debug->keyUpdates = 0;

    if (indexesAffected) {
        IndexCatalog::IndexIterator ii = _indexCatalog.getIndexIterator(txn, true);
        while (ii.more()) {
            IndexDescriptor* descriptor = ii.next();
            IndexAccessMethod* iam = ii.accessMethod(descriptor);

            int64_t updatedKeys;
            Status ret = iam->update(txn, *updateTickets.mutableMap()[descriptor], &updatedKeys);
            if (!ret.isOK())
                return StatusWith<RecordId>(ret);
            if (debug)
                debug->keyUpdates += updatedKeys;
        }
    }

    invariant(sid == txn->recoveryUnit()->getSnapshotId());
    args.ns = ns().ns();
    getGlobalServiceContext()->getOpObserver()->onUpdate(txn, args);

    return newLocation;
}
Esempio n. 5
0
StatusWith<RecordId> Collection::updateDocument(OperationContext* txn,
                                                const RecordId& oldLocation,
                                                const Snapshotted<BSONObj>& oldDoc,
                                                const BSONObj& newDoc,
                                                bool enforceQuota,
                                                bool indexesAffected,
                                                OpDebug* debug,
                                                oplogUpdateEntryArgs& args) {
    {
        auto status = checkValidation(txn, newDoc);
        if (!status.isOK()) {
            if (_validationLevel == STRICT_V) {
                return status;
            }
            // moderate means we have to check the old doc
            auto oldDocStatus = checkValidation(txn, oldDoc.value());
            if (oldDocStatus.isOK()) {
                // transitioning from good -> bad is not ok
                return status;
            }
            // bad -> bad is ok in moderate mode
        }
    }

    dassert(txn->lockState()->isCollectionLockedForMode(ns().toString(), MODE_IX));
    invariant(oldDoc.snapshotId() == txn->recoveryUnit()->getSnapshotId());

    SnapshotId sid = txn->recoveryUnit()->getSnapshotId();

    BSONElement oldId = oldDoc.value()["_id"];
    if (!oldId.eoo() && (oldId != newDoc["_id"]))
        return StatusWith<RecordId>(
            ErrorCodes::InternalError, "in Collection::updateDocument _id mismatch", 13596);

    // At the end of this step, we will have a map of UpdateTickets, one per index, which
    // represent the index updates needed to be done, based on the changes between oldDoc and
    // newDoc.
    OwnedPointerMap<IndexDescriptor*, UpdateTicket> updateTickets;
    if (indexesAffected) {
        IndexCatalog::IndexIterator ii = _indexCatalog.getIndexIterator(txn, true);
        while (ii.more()) {
            IndexDescriptor* descriptor = ii.next();
            IndexCatalogEntry* entry = ii.catalogEntry(descriptor);
            IndexAccessMethod* iam = ii.accessMethod(descriptor);

            InsertDeleteOptions options;
            options.logIfError = false;
            options.dupsAllowed =
                !(KeyPattern::isIdKeyPattern(descriptor->keyPattern()) || descriptor->unique()) ||
                repl::getGlobalReplicationCoordinator()->shouldIgnoreUniqueIndex(descriptor);
            UpdateTicket* updateTicket = new UpdateTicket();
            updateTickets.mutableMap()[descriptor] = updateTicket;
            Status ret = iam->validateUpdate(txn,
                                             oldDoc.value(),
                                             newDoc,
                                             oldLocation,
                                             options,
                                             updateTicket,
                                             entry->getFilterExpression());
            if (!ret.isOK()) {
                return StatusWith<RecordId>(ret);
            }
        }
    }

    // This can call back into Collection::recordStoreGoingToMove.  If that happens, the old
    // object is removed from all indexes.
    StatusWith<RecordId> newLocation = _recordStore->updateRecord(
        txn, oldLocation, newDoc.objdata(), newDoc.objsize(), _enforceQuota(enforceQuota), this);

    if (!newLocation.isOK()) {
        return newLocation;
    }

    // At this point, the old object may or may not still be indexed, depending on if it was
    // moved. If the object did move, we need to add the new location to all indexes.
    if (newLocation.getValue() != oldLocation) {
        if (debug) {
            if (debug->nmoved == -1)  // default of -1 rather than 0
                debug->nmoved = 1;
            else
                debug->nmoved += 1;
        }

        Status s = _indexCatalog.indexRecord(txn, newDoc, newLocation.getValue());
        if (!s.isOK())
            return StatusWith<RecordId>(s);
        invariant(sid == txn->recoveryUnit()->getSnapshotId());
        args.ns = ns().ns();
        getGlobalServiceContext()->getOpObserver()->onUpdate(txn, args);

        return newLocation;
    }

    // Object did not move.  We update each index with each respective UpdateTicket.

    if (debug)
        debug->keyUpdates = 0;

    if (indexesAffected) {
        IndexCatalog::IndexIterator ii = _indexCatalog.getIndexIterator(txn, true);
        while (ii.more()) {
            IndexDescriptor* descriptor = ii.next();
            IndexAccessMethod* iam = ii.accessMethod(descriptor);

            int64_t updatedKeys;
            Status ret = iam->update(txn, *updateTickets.mutableMap()[descriptor], &updatedKeys);
            if (!ret.isOK())
                return StatusWith<RecordId>(ret);
            if (debug)
                debug->keyUpdates += updatedKeys;
        }
    }

    invariant(sid == txn->recoveryUnit()->getSnapshotId());
    args.ns = ns().ns();
    getGlobalServiceContext()->getOpObserver()->onUpdate(txn, args);

    return newLocation;
}
bool MigrationSourceManager::storeCurrentLocs(OperationContext* txn,
                                              long long maxChunkSize,
                                              string& errmsg,
                                              BSONObjBuilder& result) {
    AutoGetCollection autoColl(txn, _getNS(), MODE_IS);

    Collection* collection = autoColl.getCollection();
    if (!collection) {
        errmsg = "ns not found, should be impossible";
        return false;
    }

    // Allow multiKey based on the invariant that shard keys must be single-valued. Therefore, any
    // multi-key index prefixed by shard key cannot be multikey over the shard key fields.
    IndexDescriptor* idx =
        collection->getIndexCatalog()->findShardKeyPrefixedIndex(txn,
                                                                 _shardKeyPattern,
                                                                 false);  // requireSingleKey

    if (idx == NULL) {
        errmsg = str::stream() << "can't find index with prefix " << _shardKeyPattern
                               << " in storeCurrentLocs for " << _nss.toString();
        return false;
    }

    // Assume both min and max non-empty, append MinKey's to make them fit chosen index
    BSONObj min;
    BSONObj max;
    KeyPattern kp(idx->keyPattern());

    {
        // It's alright not to lock _mutex all the way through based on the assumption that this is
        // only called by the main thread that drives the migration and only it can start and stop
        // the current migration.
        stdx::lock_guard<stdx::mutex> sl(_mutex);

        invariant(_deleteNotifyExec.get() == NULL);
        unique_ptr<WorkingSet> ws = stdx::make_unique<WorkingSet>();
        unique_ptr<DeleteNotificationStage> dns = stdx::make_unique<DeleteNotificationStage>(this);

        // Takes ownership of 'ws' and 'dns'.
        auto statusWithPlanExecutor = PlanExecutor::make(
            txn, std::move(ws), std::move(dns), collection, PlanExecutor::YIELD_MANUAL);
        invariant(statusWithPlanExecutor.isOK());

        _deleteNotifyExec = std::move(statusWithPlanExecutor.getValue());
        _deleteNotifyExec->registerExec();

        min = Helpers::toKeyFormat(kp.extendRangeBound(_min, false));
        max = Helpers::toKeyFormat(kp.extendRangeBound(_max, false));
    }

    unique_ptr<PlanExecutor> exec(InternalPlanner::indexScan(txn,
                                                             collection,
                                                             idx,
                                                             min,
                                                             max,
                                                             false,  // endKeyInclusive
                                                             PlanExecutor::YIELD_MANUAL));

    // We can afford to yield here because any change to the base data that we might miss is already
    // being queued and will migrate in the 'transferMods' stage.
    exec->setYieldPolicy(PlanExecutor::YIELD_AUTO);

    // Use the average object size to estimate how many objects a full chunk would carry do that
    // while traversing the chunk's range using the sharding index, below there's a fair amount of
    // slack before we determine a chunk is too large because object sizes will vary.
    unsigned long long maxRecsWhenFull;
    long long avgRecSize;

    const long long totalRecs = collection->numRecords(txn);
    if (totalRecs > 0) {
        avgRecSize = collection->dataSize(txn) / totalRecs;
        maxRecsWhenFull = maxChunkSize / avgRecSize;
        maxRecsWhenFull = std::min((unsigned long long)(Chunk::MaxObjectPerChunk + 1),
                                   130 * maxRecsWhenFull / 100 /* slack */);
    } else {
        avgRecSize = 0;
        maxRecsWhenFull = Chunk::MaxObjectPerChunk + 1;
    }

    // Do a full traversal of the chunk and don't stop even if we think it is a large chunk we want
    // the number of records to better report, in that case
    bool isLargeChunk = false;
    unsigned long long recCount = 0;

    RecordId recordId;
    while (PlanExecutor::ADVANCED == exec->getNext(NULL, &recordId)) {
        if (!isLargeChunk) {
            stdx::lock_guard<stdx::mutex> lk(_cloneLocsMutex);
            _cloneLocs.insert(recordId);
        }

        if (++recCount > maxRecsWhenFull) {
            isLargeChunk = true;
            // Continue on despite knowing that it will fail, just to get the correct value for
            // recCount
        }
    }

    exec.reset();

    if (isLargeChunk) {
        stdx::lock_guard<stdx::mutex> sl(_mutex);
        warning() << "cannot move chunk: the maximum number of documents for a chunk is "
                  << maxRecsWhenFull << " , the maximum chunk size is " << maxChunkSize
                  << " , average document size is " << avgRecSize << ". Found " << recCount
                  << " documents in chunk "
                  << " ns: " << _nss << " " << _min << " -> " << _max << migrateLog;

        result.appendBool("chunkTooBig", true);
        result.appendNumber("estimatedChunkSize", (long long)(recCount * avgRecSize));
        errmsg = "chunk too big to move";
        return false;
    }

    log() << "moveChunk number of documents: " << cloneLocsRemaining() << migrateLog;

    txn->recoveryUnit()->abandonSnapshot();
    return true;
}
Esempio n. 7
0
    StatusWith<DiskLoc> Collection::updateDocument( OperationContext* txn,
                                                    const DiskLoc& oldLocation,
                                                    const BSONObj& objNew,
                                                    bool enforceQuota,
                                                    OpDebug* debug ) {

        BSONObj objOld = _recordStore->dataFor( txn, oldLocation ).toBson();

        if ( objOld.hasElement( "_id" ) ) {
            BSONElement oldId = objOld["_id"];
            BSONElement newId = objNew["_id"];
            if ( oldId != newId )
                return StatusWith<DiskLoc>( ErrorCodes::InternalError,
                                            "in Collection::updateDocument _id mismatch",
                                            13596 );
        }

        if ( ns().coll() == "system.users" ) {
            // XXX - andy and spencer think this should go away now
            V2UserDocumentParser parser;
            Status s = parser.checkValidUserDocument(objNew);
            if ( !s.isOK() )
                return StatusWith<DiskLoc>( s );
        }

        /* duplicate key check. we descend the btree twice - once for this check, and once for the actual inserts, further
           below.  that is suboptimal, but it's pretty complicated to do it the other way without rollbacks...
        */
        OwnedPointerMap<IndexDescriptor*,UpdateTicket> updateTickets;
        IndexCatalog::IndexIterator ii = _indexCatalog.getIndexIterator( txn, true );
        while ( ii.more() ) {
            IndexDescriptor* descriptor = ii.next();
            IndexAccessMethod* iam = _indexCatalog.getIndex( descriptor );

            InsertDeleteOptions options;
            options.logIfError = false;
            options.dupsAllowed =
                !(KeyPattern::isIdKeyPattern(descriptor->keyPattern()) || descriptor->unique())
                || repl::getGlobalReplicationCoordinator()->shouldIgnoreUniqueIndex(descriptor);
            UpdateTicket* updateTicket = new UpdateTicket();
            updateTickets.mutableMap()[descriptor] = updateTicket;
            Status ret = iam->validateUpdate(txn, objOld, objNew, oldLocation, options, updateTicket );
            if ( !ret.isOK() ) {
                return StatusWith<DiskLoc>( ret );
            }
        }

        // this can callback into Collection::recordStoreGoingToMove
        StatusWith<DiskLoc> newLocation = _recordStore->updateRecord( txn,
                                                                      oldLocation,
                                                                      objNew.objdata(),
                                                                      objNew.objsize(),
                                                                      _enforceQuota( enforceQuota ),
                                                                      this );

        if ( !newLocation.isOK() ) {
            return newLocation;
        }

        _infoCache.notifyOfWriteOp();

        if ( newLocation.getValue() != oldLocation ) {

            if ( debug ) {
                if (debug->nmoved == -1) // default of -1 rather than 0
                    debug->nmoved = 1;
                else
                    debug->nmoved += 1;
            }

            _indexCatalog.indexRecord(txn, objNew, newLocation.getValue());

            return newLocation;
        }

        if ( debug )
            debug->keyUpdates = 0;

        ii = _indexCatalog.getIndexIterator( txn, true );
        while ( ii.more() ) {
            IndexDescriptor* descriptor = ii.next();
            IndexAccessMethod* iam = _indexCatalog.getIndex( descriptor );

            int64_t updatedKeys;
            Status ret = iam->update(txn, *updateTickets.mutableMap()[descriptor], &updatedKeys);
            if ( !ret.isOK() )
                return StatusWith<DiskLoc>( ret );
            if ( debug )
                debug->keyUpdates += updatedKeys;
        }

        // Broadcast the mutation so that query results stay correct.
        _cursorCache.invalidateDocument(oldLocation, INVALIDATION_MUTATION);

        return newLocation;
    }
Esempio n. 8
0
    shared_ptr<Cursor> QueryPlan::newCursor( const DiskLoc& startLoc,
                                             bool requestIntervalCursor ) const {

        if ( _type ) {
            // hopefully safe to use original query in these contexts - don't think we can mix type
            // with $or clause separation yet
            int numWanted = 0;
            if ( _parsedQuery ) {
                // SERVER-5390
                numWanted = _parsedQuery->getSkip() + _parsedQuery->getNumToReturn();
            }

            IndexDescriptor* descriptor = CatalogHack::getDescriptor(_d, _idxNo);
            IndexAccessMethod* iam = CatalogHack::getIndex(descriptor);
            return shared_ptr<Cursor>(EmulatedCursor::make(descriptor, iam, _originalQuery,
                                                           _order, numWanted,
                                                           descriptor->keyPattern()));
        }

        if ( _utility == Impossible ) {
            // Dummy table scan cursor returning no results.  Allowed in --notablescan mode.
            return shared_ptr<Cursor>( new BasicCursor( DiskLoc() ) );
        }

        if ( willScanTable() ) {
            checkTableScanAllowed();
            return findTableScan( _frs.ns(), _order, startLoc );
        }
                
        massert( 10363,
                 "newCursor() with start location not implemented for indexed plans",
                 startLoc.isNull() );

        if ( _startOrEndSpec ) {
            // we are sure to spec _endKeyInclusive
            return shared_ptr<Cursor>( BtreeCursor::make( _d,
                                                          *_index,
                                                          _startKey,
                                                          _endKey,
                                                          _endKeyInclusive,
                                                          _direction >= 0 ? 1 : -1 ) );
        }

        if ( _index->getSpec().getType() ) {
            return shared_ptr<Cursor>( BtreeCursor::make( _d,
                                                          *_index,
                                                          _frv->startKey(),
                                                          _frv->endKey(),
                                                          true,
                                                          _direction >= 0 ? 1 : -1 ) );
        }

        // An IntervalBtreeCursor is returned if explicitly requested AND _frv is exactly
        // represented by a single interval within the btree.
        if ( // If an interval cursor is requested and ...
             requestIntervalCursor &&
             // ... equalities come before ranges (a requirement of Optimal) and ...
             _utility == Optimal &&
             // ... the field range vector exactly represents a single interval ...
             _frv->isSingleInterval() ) {
            // ... and an interval cursor can be created ...
            shared_ptr<Cursor> ret( IntervalBtreeCursor::make( _d,
                                                               *_index,
                                                               _frv->startKey(),
                                                               _frv->startKeyInclusive(),
                                                               _frv->endKey(),
                                                               _frv->endKeyInclusive() ) );
            if ( ret ) {
                // ... then return the interval cursor.
                return ret;
            }
        }

        return shared_ptr<Cursor>( BtreeCursor::make( _d,
                                                      *_index,
                                                      _frv,
                                                      independentRangesSingleIntervalLimit(),
                                                      _direction >= 0 ? 1 : -1 ) );
    }
Esempio n. 9
0
    StatusWith<DiskLoc> Collection::updateDocument( const DiskLoc& oldLocation,
                                                    const BSONObj& objNew,
                                                    bool enforceQuota,
                                                    OpDebug* debug ) {

        Record* oldRecord = getExtentManager()->recordFor( oldLocation );
        BSONObj objOld = BSONObj::make( oldRecord );

        if ( objOld.hasElement( "_id" ) ) {
            BSONElement oldId = objOld["_id"];
            BSONElement newId = objNew["_id"];
            if ( oldId != newId )
                return StatusWith<DiskLoc>( ErrorCodes::InternalError,
                                            "in Collection::updateDocument _id mismatch",
                                            13596 );
        }

        if ( ns().coll() == "system.users" ) {
            // XXX - andy and spencer think this should go away now
            V2UserDocumentParser parser;
            Status s = parser.checkValidUserDocument(objNew);
            if ( !s.isOK() )
                return StatusWith<DiskLoc>( s );
        }

        /* duplicate key check. we descend the btree twice - once for this check, and once for the actual inserts, further
           below.  that is suboptimal, but it's pretty complicated to do it the other way without rollbacks...
        */
        OwnedPointerVector<UpdateTicket> updateTickets;
        updateTickets.mutableVector().resize(_indexCatalog.numIndexesTotal());
        for (int i = 0; i < _indexCatalog.numIndexesTotal(); ++i) {
            IndexDescriptor* descriptor = _indexCatalog.getDescriptor( i );
            IndexAccessMethod* iam = _indexCatalog.getIndex( descriptor );

            InsertDeleteOptions options;
            options.logIfError = false;
            options.dupsAllowed =
                !(KeyPattern::isIdKeyPattern(descriptor->keyPattern()) || descriptor->unique())
                || ignoreUniqueIndex(descriptor);
            updateTickets.mutableVector()[i] = new UpdateTicket();
            Status ret = iam->validateUpdate(objOld, objNew, oldLocation, options,
                                             updateTickets.mutableVector()[i]);
            if ( !ret.isOK() ) {
                return StatusWith<DiskLoc>( ret );
            }
        }

        if ( oldRecord->netLength() < objNew.objsize() ) {
            // doesn't fit, have to move to new location

            if ( _details->isCapped() )
                return StatusWith<DiskLoc>( ErrorCodes::InternalError,
                                            "failing update: objects in a capped ns cannot grow",
                                            10003 );

            moveCounter.increment();
            _details->paddingTooSmall();

            // unindex old record, don't delete
            // this way, if inserting new doc fails, we can re-index this one
            ClientCursor::aboutToDelete(_ns.ns(), _details, oldLocation);
            _indexCatalog.unindexRecord( objOld, oldLocation, true );

            if ( debug ) {
                if (debug->nmoved == -1) // default of -1 rather than 0
                    debug->nmoved = 1;
                else
                    debug->nmoved += 1;
            }

            StatusWith<DiskLoc> loc = insertDocument( objNew, enforceQuota );

            if ( loc.isOK() ) {
                // insert successful, now lets deallocate the old location
                // remember its already unindexed
                _recordStore.deallocRecord( oldLocation, oldRecord );
            }
            else {
                // new doc insert failed, so lets re-index the old document and location
                _indexCatalog.indexRecord( objOld, oldLocation );
            }

            return loc;
        }

        _infoCache.notifyOfWriteOp();
        _details->paddingFits();

        if ( debug )
            debug->keyUpdates = 0;

        for (int i = 0; i < _indexCatalog.numIndexesTotal(); ++i) {
            IndexDescriptor* descriptor = _indexCatalog.getDescriptor( i );
            IndexAccessMethod* iam = _indexCatalog.getIndex( descriptor );

            int64_t updatedKeys;
            Status ret = iam->update(*updateTickets.vector()[i], &updatedKeys);
            if ( !ret.isOK() )
                return StatusWith<DiskLoc>( ret );
            if ( debug )
                debug->keyUpdates += updatedKeys;
        }

        //  update in place
        int sz = objNew.objsize();
        memcpy(getDur().writingPtr(oldRecord->data(), sz), objNew.objdata(), sz);
        return StatusWith<DiskLoc>( oldLocation );
    }
Esempio n. 10
0
    /**
     * For a given query, get a runner.  The runner could be a SingleSolutionRunner, a
     * CachedQueryRunner, or a MultiPlanRunner, depending on the cache/query solver/etc.
     */
    Status getRunner(CanonicalQuery* rawCanonicalQuery, Runner** out, size_t plannerOptions) {
        verify(rawCanonicalQuery);
        auto_ptr<CanonicalQuery> canonicalQuery(rawCanonicalQuery);

        // Try to look up a cached solution for the query.
        // TODO: Can the cache have negative data about a solution?
        PlanCache* localCache = PlanCache::get(canonicalQuery->ns());
        if (NULL != localCache) {
            CachedSolution* cs = localCache->get(*canonicalQuery);
            if (NULL != cs) {
                // We have a cached solution.  Hand the canonical query and cached solution off to
                // the cached plan runner, which takes ownership of both.
                WorkingSet* ws;
                PlanStage* root;
                verify(StageBuilder::build(*cs->solution, &root, &ws));
                *out = new CachedPlanRunner(canonicalQuery.release(), cs, root, ws);
                return Status::OK();
            }
        }

        // No entry in cache for the query.  We have to solve the query ourself.

        // Get the indices that we could possibly use.
        Database* db = cc().database();
        verify( db );
        Collection* collection = db->getCollection( canonicalQuery->ns() );

        // This can happen as we're called by internal clients as well.
        if (NULL == collection) {
            const string& ns = canonicalQuery->ns();
            *out = new EOFRunner(canonicalQuery.release(), ns);
            return Status::OK();
        }

        // If we have an _id index we can use the idhack runner.
        if (canUseIDHack(*canonicalQuery) && collection->getIndexCatalog()->findIdIndex()) {
            *out = new IDHackRunner(collection, canonicalQuery.release());
            return Status::OK();
        }

        // If it's not NULL, we may have indices.  Access the catalog and fill out IndexEntry(s)
        QueryPlannerParams plannerParams;
        for (int i = 0; i < collection->getIndexCatalog()->numIndexesReady(); ++i) {
            IndexDescriptor* desc = collection->getIndexCatalog()->getDescriptor( i );
            plannerParams.indices.push_back(IndexEntry(desc->keyPattern(),
                                                       desc->isMultikey(),
                                                       desc->isSparse(),
                                                       desc->indexName()));
        }

        // Tailable: If the query requests tailable the collection must be capped.
        if (canonicalQuery->getParsed().hasOption(QueryOption_CursorTailable)) {
            if (!collection->isCapped()) {
                return Status(ErrorCodes::BadValue,
                              "tailable cursor requested on non capped collection");
            }

            // If a sort is specified it must be equal to expectedSort.
            const BSONObj expectedSort = BSON("$natural" << 1);
            const BSONObj& actualSort = canonicalQuery->getParsed().getSort();
            if (!actualSort.isEmpty() && !(actualSort == expectedSort)) {
                return Status(ErrorCodes::BadValue,
                              "invalid sort specified for tailable cursor: "
                              + actualSort.toString());
            }
        }

        // Process the planning options.
        plannerParams.options = plannerOptions;
        if (storageGlobalParams.noTableScan) {
            const string& ns = canonicalQuery->ns();
            // There are certain cases where we ignore this restriction:
            bool ignore = canonicalQuery->getQueryObj().isEmpty()
                          || (string::npos != ns.find(".system."))
                          || (0 == ns.find("local."));
            if (!ignore) {
                plannerParams.options |= QueryPlannerParams::NO_TABLE_SCAN;
            }
        }

        if (!(plannerParams.options & QueryPlannerParams::NO_TABLE_SCAN)) {
            plannerParams.options |= QueryPlannerParams::INCLUDE_COLLSCAN;
        }

        // If the caller wants a shard filter, make sure we're actually sharded.
        if (plannerParams.options & QueryPlannerParams::INCLUDE_SHARD_FILTER) {
            CollectionMetadataPtr collMetadata = shardingState.getCollectionMetadata(canonicalQuery->ns());
            if (collMetadata) {
                plannerParams.shardKey = collMetadata->getKeyPattern();
            }
            else {
                // If there's no metadata don't bother w/the shard filter since we won't know what
                // the key pattern is anyway...
                plannerParams.options &= ~QueryPlannerParams::INCLUDE_SHARD_FILTER;
            }
        }

        vector<QuerySolution*> solutions;
        QueryPlanner::plan(*canonicalQuery, plannerParams, &solutions);

        /*
        for (size_t i = 0; i < solutions.size(); ++i) {
            QLOG() << "solution " << i << " is " << solutions[i]->toString() << endl;
        }
        */

        // We cannot figure out how to answer the query.  Should this ever happen?
        if (0 == solutions.size()) {
            return Status(ErrorCodes::BadValue, "No query solutions");
        }

        if (1 == solutions.size()) {
            // Only one possible plan.  Run it.  Build the stages from the solution.
            WorkingSet* ws;
            PlanStage* root;
            verify(StageBuilder::build(*solutions[0], &root, &ws));

            // And, run the plan.
            *out = new SingleSolutionRunner(canonicalQuery.release(), solutions[0], root, ws);
            return Status::OK();
        }
        else {
            // Many solutions.  Let the MultiPlanRunner pick the best, update the cache, and so on.
            auto_ptr<MultiPlanRunner> mpr(new MultiPlanRunner(canonicalQuery.release()));
            for (size_t i = 0; i < solutions.size(); ++i) {
                WorkingSet* ws;
                PlanStage* root;
                verify(StageBuilder::build(*solutions[i], &root, &ws));
                // Takes ownership of all arguments.
                mpr->addPlan(solutions[i], root, ws);
            }
            *out = mpr.release();
            return Status::OK();
        }
    }
Esempio n. 11
0
void CollectionInfoCache::computeIndexKeys(OperationContext* opCtx) {
    _indexedPaths.clear();

    bool hadTTLIndex = _hasTTLIndex;
    _hasTTLIndex = false;

    IndexCatalog::IndexIterator i = _collection->getIndexCatalog()->getIndexIterator(opCtx, true);
    while (i.more()) {
        IndexDescriptor* descriptor = i.next();

        if (descriptor->getAccessMethodName() != IndexNames::TEXT) {
            BSONObj key = descriptor->keyPattern();
            const BSONObj& infoObj = descriptor->infoObj();
            if (infoObj.hasField("expireAfterSeconds")) {
                _hasTTLIndex = true;
            }
            BSONObjIterator j(key);
            while (j.more()) {
                BSONElement e = j.next();
                _indexedPaths.addPath(e.fieldName());
            }
        } else {
            fts::FTSSpec ftsSpec(descriptor->infoObj());

            if (ftsSpec.wildcard()) {
                _indexedPaths.allPathsIndexed();
            } else {
                for (size_t i = 0; i < ftsSpec.numExtraBefore(); ++i) {
                    _indexedPaths.addPath(ftsSpec.extraBefore(i));
                }
                for (fts::Weights::const_iterator it = ftsSpec.weights().begin();
                     it != ftsSpec.weights().end();
                     ++it) {
                    _indexedPaths.addPath(it->first);
                }
                for (size_t i = 0; i < ftsSpec.numExtraAfter(); ++i) {
                    _indexedPaths.addPath(ftsSpec.extraAfter(i));
                }
                // Any update to a path containing "language" as a component could change the
                // language of a subdocument.  Add the override field as a path component.
                _indexedPaths.addPathComponent(ftsSpec.languageOverrideField());
            }
        }

        // handle partial indexes
        const IndexCatalogEntry* entry = i.catalogEntry(descriptor);
        const MatchExpression* filter = entry->getFilterExpression();
        if (filter) {
            unordered_set<std::string> paths;
            QueryPlannerIXSelect::getFields(filter, "", &paths);
            for (auto it = paths.begin(); it != paths.end(); ++it) {
                _indexedPaths.addPath(*it);
            }
        }
    }

    TTLCollectionCache& ttlCollectionCache = TTLCollectionCache::get(getGlobalServiceContext());

    if (_hasTTLIndex != hadTTLIndex) {
        if (_hasTTLIndex) {
            ttlCollectionCache.registerCollection(_collection->ns());
        } else {
            ttlCollectionCache.unregisterCollection(_collection->ns());
        }
    }

    _keysComputed = true;
}
Esempio n. 12
0
    StatusWith<RecordId> Collection::updateDocument( OperationContext* txn,
                                                    const RecordId& oldLocation,
                                                    const BSONObj& objNew,
                                                    bool enforceQuota,
                                                    OpDebug* debug ) {

        BSONObj objOld = _recordStore->dataFor( txn, oldLocation ).releaseToBson();

        if ( objOld.hasElement( "_id" ) ) {
            BSONElement oldId = objOld["_id"];
            BSONElement newId = objNew["_id"];
            if ( oldId != newId )
                return StatusWith<RecordId>( ErrorCodes::InternalError,
                                            "in Collection::updateDocument _id mismatch",
                                            13596 );
        }

        /* duplicate key check. we descend the btree twice - once for this check, and once for the actual inserts, further
           below.  that is suboptimal, but it's pretty complicated to do it the other way without rollbacks...
        */

        // At the end of this step, we will have a map of UpdateTickets, one per index, which
        // represent the index updates needed to be done, based on the changes between objOld and
        // objNew.
        OwnedPointerMap<IndexDescriptor*,UpdateTicket> updateTickets;
        IndexCatalog::IndexIterator ii = _indexCatalog.getIndexIterator( txn, true );
        while ( ii.more() ) {
            IndexDescriptor* descriptor = ii.next();
            IndexAccessMethod* iam = _indexCatalog.getIndex( descriptor );

            InsertDeleteOptions options;
            options.logIfError = false;
            options.dupsAllowed =
                !(KeyPattern::isIdKeyPattern(descriptor->keyPattern()) || descriptor->unique())
                || repl::getGlobalReplicationCoordinator()->shouldIgnoreUniqueIndex(descriptor);
            UpdateTicket* updateTicket = new UpdateTicket();
            updateTickets.mutableMap()[descriptor] = updateTicket;
            Status ret = iam->validateUpdate(txn, objOld, objNew, oldLocation, options, updateTicket );
            if ( !ret.isOK() ) {
                return StatusWith<RecordId>( ret );
            }
        }

        // This can call back into Collection::recordStoreGoingToMove.  If that happens, the old
        // object is removed from all indexes.
        StatusWith<RecordId> newLocation = _recordStore->updateRecord( txn,
                                                                      oldLocation,
                                                                      objNew.objdata(),
                                                                      objNew.objsize(),
                                                                      _enforceQuota( enforceQuota ),
                                                                      this );

        if ( !newLocation.isOK() ) {
            return newLocation;
        }

        // At this point, the old object may or may not still be indexed, depending on if it was
        // moved.

        _infoCache.notifyOfWriteOp();

        // If the object did move, we need to add the new location to all indexes.
        if ( newLocation.getValue() != oldLocation ) {

            if ( debug ) {
                if (debug->nmoved == -1) // default of -1 rather than 0
                    debug->nmoved = 1;
                else
                    debug->nmoved += 1;
            }

            Status s = _indexCatalog.indexRecord(txn, objNew, newLocation.getValue());
            if (!s.isOK())
                return StatusWith<RecordId>(s);

            return newLocation;
        }

        // Object did not move.  We update each index with each respective UpdateTicket.

        if ( debug )
            debug->keyUpdates = 0;

        ii = _indexCatalog.getIndexIterator( txn, true );
        while ( ii.more() ) {
            IndexDescriptor* descriptor = ii.next();
            IndexAccessMethod* iam = _indexCatalog.getIndex( descriptor );

            int64_t updatedKeys;
            Status ret = iam->update(txn, *updateTickets.mutableMap()[descriptor], &updatedKeys);
            if ( !ret.isOK() )
                return StatusWith<RecordId>( ret );
            if ( debug )
                debug->keyUpdates += updatedKeys;
        }

        // Broadcast the mutation so that query results stay correct.
        _cursorCache.invalidateDocument(txn, oldLocation, INVALIDATION_MUTATION);

        return newLocation;
    }