Beispiel #1
0
    void run() {
        OperationContextImpl txn;
        Client::WriteContext ctx(&txn, _ns);

        int numFinishedIndexesStart = _catalog->numIndexesReady(&txn);

        Helpers::ensureIndex(&txn, _coll, BSON("x" << 1), false, "_x_0");
        Helpers::ensureIndex(&txn, _coll, BSON("y" << 1), false, "_y_0");

        ASSERT_TRUE(_catalog->numIndexesReady(&txn) == numFinishedIndexesStart+2);

        IndexCatalog::IndexIterator ii = _catalog->getIndexIterator(&txn,false);
        int indexesIterated = 0;
        bool foundIndex = false;
        while (ii.more()) {
            IndexDescriptor* indexDesc = ii.next();
            indexesIterated++;
            BSONObjIterator boit(indexDesc->infoObj());
            while (boit.more() && !foundIndex) {
                BSONElement e = boit.next();
                if (str::equals(e.fieldName(), "name") &&
                        str::equals(e.valuestrsafe(), "_y_0")) {
                    foundIndex = true;
                    break;
                }
            }
        }

        ctx.commit();
        ASSERT_TRUE(indexesIterated == _catalog->numIndexesReady(&txn));
        ASSERT_TRUE(foundIndex);
    }
Beispiel #2
0
    long long Database::getIndexSizeForCollection(OperationContext* opCtx,
                                                  Collection* coll,
                                                  BSONObjBuilder* details,
                                                  int scale ) {
        if ( !coll )
            return 0;

        IndexCatalog::IndexIterator ii =
            coll->getIndexCatalog()->getIndexIterator( true /*includeUnfinishedIndexes*/ );

        long long totalSize = 0;

        while ( ii.more() ) {
            IndexDescriptor* d = ii.next();
            string indNS = d->indexNamespace();

            // XXX creating a Collection for an index which isn't a Collection
            Collection* indColl = getCollection( opCtx, indNS );
            if ( ! indColl ) {
                log() << "error: have index descriptor ["  << indNS
                      << "] but no entry in the index collection." << endl;
                continue;
            }
            totalSize += indColl->dataSize();
            if ( details ) {
                long long const indexSize = indColl->dataSize() / scale;
                details->appendNumber( d->indexName() , indexSize );
            }
        }
        return totalSize;
    }
Beispiel #3
0
Status Collection::touch(OperationContext* txn,
                         bool touchData,
                         bool touchIndexes,
                         BSONObjBuilder* output) const {
    if (touchData) {
        BSONObjBuilder b;
        Status status = _recordStore->touch(txn, &b);
        if (!status.isOK())
            return status;
        output->append("data", b.obj());
    }

    if (touchIndexes) {
        Timer t;
        IndexCatalog::IndexIterator ii = _indexCatalog.getIndexIterator(txn, false);
        while (ii.more()) {
            const IndexDescriptor* desc = ii.next();
            const IndexAccessMethod* iam = _indexCatalog.getIndex(desc);
            Status status = iam->touch(txn);
            if (!status.isOK())
                return status;
        }

        output->append("indexes",
                       BSON("num" << _indexCatalog.numIndexesTotal(txn) << "millis" << t.millis()));
    }

    return Status::OK();
}
Beispiel #4
0
    Status Database::renameCollection( OperationContext* txn,
                                       const StringData& fromNS,
                                       const StringData& toNS,
                                       bool stayTemp ) {

        audit::logRenameCollection( currentClient.get(), fromNS, toNS );

        { // remove anything cached
            Collection* coll = getCollection( txn, fromNS );
            if ( !coll )
                return Status( ErrorCodes::NamespaceNotFound, "collection not found to rename" );
            IndexCatalog::IndexIterator ii = coll->getIndexCatalog()->getIndexIterator( true );
            while ( ii.more() ) {
                IndexDescriptor* desc = ii.next();
                _clearCollectionCache( desc->indexNamespace() );
            }

            {
                scoped_lock lk( _collectionLock );
                _clearCollectionCache_inlock( fromNS );
                _clearCollectionCache_inlock( toNS );
            }

            Top::global.collectionDropped( fromNS.toString() );
        }

        return _dbEntry->renameCollection( txn, fromNS, toNS, stayTemp );
    }
Beispiel #5
0
/**
 * order will be:
 * 1) store index specs
 * 2) drop indexes
 * 3) truncate record store
 * 4) re-write indexes
 */
Status Collection::truncate(OperationContext* txn) {
    dassert(txn->lockState()->isCollectionLockedForMode(ns().toString(), MODE_X));
    BackgroundOperation::assertNoBgOpInProgForNs(ns());
    invariant(_indexCatalog.numIndexesInProgress(txn) == 0);

    // 1) store index specs
    vector<BSONObj> indexSpecs;
    {
        IndexCatalog::IndexIterator ii = _indexCatalog.getIndexIterator(txn, false);
        while (ii.more()) {
            const IndexDescriptor* idx = ii.next();
            indexSpecs.push_back(idx->infoObj().getOwned());
        }
    }

    // 2) drop indexes
    Status status = _indexCatalog.dropAllIndexes(txn, true);
    if (!status.isOK())
        return status;
    _cursorManager.invalidateAll(false, "collection truncated");

    // 3) truncate record store
    status = _recordStore->truncate(txn);
    if (!status.isOK())
        return status;

    // 4) re-create indexes
    for (size_t i = 0; i < indexSpecs.size(); i++) {
        status = _indexCatalog.createIndexOnEmptyCollection(txn, indexSpecs[i]);
        if (!status.isOK())
            return status;
    }

    return Status::OK();
}
void CollectionInfoCache::updatePlanCacheIndexEntries(OperationContext* opCtx) {
    std::vector<IndexEntry> indexEntries;

    // TODO We shouldn't need to include unfinished indexes, but we must here because the index
    // catalog may be in an inconsistent state.  SERVER-18346.
    const bool includeUnfinishedIndexes = true;
    IndexCatalog::IndexIterator ii =
        _collection->getIndexCatalog()->getIndexIterator(opCtx, includeUnfinishedIndexes);
    while (ii.more()) {
        const IndexDescriptor* desc = ii.next();
        const IndexCatalogEntry* ice = ii.catalogEntry(desc);
        indexEntries.emplace_back(desc->keyPattern(),
                                  desc->getAccessMethodName(),
                                  desc->isMultikey(opCtx),
                                  ice->getMultikeyPaths(opCtx),
                                  desc->isSparse(),
                                  desc->unique(),
                                  desc->indexName(),
                                  ice->getFilterExpression(),
                                  desc->infoObj(),
                                  ice->getCollator());
    }

    _planCache->notifyOfIndexEntries(indexEntries);
}
Beispiel #7
0
Status Database::renameCollection(OperationContext* txn,
                                  StringData fromNS,
                                  StringData toNS,
                                  bool stayTemp) {
    audit::logRenameCollection(&cc(), fromNS, toNS);
    invariant(txn->lockState()->isDbLockedForMode(name(), MODE_X));
    BackgroundOperation::assertNoBgOpInProgForNs(fromNS);
    BackgroundOperation::assertNoBgOpInProgForNs(toNS);

    {  // remove anything cached
        Collection* coll = getCollection(fromNS);
        if (!coll)
            return Status(ErrorCodes::NamespaceNotFound, "collection not found to rename");

        string clearCacheReason = str::stream() << "renamed collection '" << fromNS << "' to '"
                                                << toNS << "'";
        IndexCatalog::IndexIterator ii = coll->getIndexCatalog()->getIndexIterator(txn, true);
        while (ii.more()) {
            IndexDescriptor* desc = ii.next();
            _clearCollectionCache(txn, desc->indexNamespace(), clearCacheReason);
        }

        _clearCollectionCache(txn, fromNS, clearCacheReason);
        _clearCollectionCache(txn, toNS, clearCacheReason);

        Top::get(txn->getClient()->getServiceContext()).collectionDropped(fromNS.toString());
    }

    txn->recoveryUnit()->registerChange(new AddCollectionChange(txn, this, toNS));
    Status s = _dbEntry->renameCollection(txn, fromNS, toNS, stayTemp);
    _collections[toNS] = _getOrCreateCollectionInstance(txn, toNS);
    return s;
}
Beispiel #8
0
    /**
     * order will be:
     * 1) store index specs
     * 2) drop indexes
     * 3) truncate record store
     * 4) re-write indexes
     */
    Status Collection::truncate(OperationContext* txn) {
        massert( 17445, "index build in progress", _indexCatalog.numIndexesInProgress( txn ) == 0 );

        // 1) store index specs
        vector<BSONObj> indexSpecs;
        {
            IndexCatalog::IndexIterator ii = _indexCatalog.getIndexIterator( txn, false );
            while ( ii.more() ) {
                const IndexDescriptor* idx = ii.next();
                indexSpecs.push_back( idx->infoObj().getOwned() );
            }
        }

        // 2) drop indexes
        Status status = _indexCatalog.dropAllIndexes(txn, true);
        if ( !status.isOK() )
            return status;
        _cursorCache.invalidateAll( false );
        _infoCache.reset();

        // 3) truncate record store
        status = _recordStore->truncate(txn);
        if ( !status.isOK() )
            return status;

        // 4) re-create indexes
        for ( size_t i = 0; i < indexSpecs.size(); i++ ) {
            status = _indexCatalog.createIndexOnEmptyCollection(txn, indexSpecs[i]);
            if ( !status.isOK() )
                return status;
        }

        return Status::OK();
    }
Beispiel #9
0
    Status Database::renameCollection( OperationContext* txn,
                                       StringData fromNS,
                                       StringData toNS,
                                       bool stayTemp ) {

        audit::logRenameCollection( currentClient.get(), fromNS, toNS );
        invariant(txn->lockState()->isDbLockedForMode(name(), MODE_X));

        { // remove anything cached
            Collection* coll = getCollection( fromNS );
            if ( !coll )
                return Status( ErrorCodes::NamespaceNotFound, "collection not found to rename" );
            IndexCatalog::IndexIterator ii = coll->getIndexCatalog()->getIndexIterator( txn, true );
            while ( ii.more() ) {
                IndexDescriptor* desc = ii.next();
                _clearCollectionCache( txn, desc->indexNamespace() );
            }

            _clearCollectionCache( txn, fromNS );
            _clearCollectionCache( txn, toNS );

            Top::global.collectionDropped( fromNS.toString() );
        }

        txn->recoveryUnit()->registerChange( new AddCollectionChange(this, toNS) );
        Status s =  _dbEntry->renameCollection( txn, fromNS, toNS, stayTemp );
        _collections[toNS] =  _getOrCreateCollectionInstance(txn, toNS);
        return s;
    }
Beispiel #10
0
    // page in pages needed for all index lookups on a given object
    void prefetchIndexPages(Collection* collection,
                            const repl::ReplSetImpl::IndexPrefetchConfig& prefetchConfig,
                            const BSONObj& obj) {
        DiskLoc unusedDl; // unused
        BSONObjSet unusedKeys;

        // do we want prefetchConfig to be (1) as-is, (2) for update ops only, or (3) configured per op type?  
        // One might want PREFETCH_NONE for updates, but it's more rare that it is a bad idea for inserts.  
        // #3 (per op), a big issue would be "too many knobs".
        switch (prefetchConfig) {
        case repl::ReplSetImpl::PREFETCH_NONE:
            return;
        case repl::ReplSetImpl::PREFETCH_ID_ONLY:
        {
            TimerHolder timer( &prefetchIndexStats);
            // on the update op case, the call to prefetchRecordPages will touch the _id index.
            // thus perhaps this option isn't very useful?
            try {
                IndexDescriptor* desc = collection->getIndexCatalog()->findIdIndex();
                if ( !desc )
                    return;
                IndexAccessMethod* iam = collection->getIndexCatalog()->getIndex( desc );
                verify( iam );
                iam->touch(obj);
            }
            catch (const DBException& e) {
                LOG(2) << "ignoring exception in prefetchIndexPages(): " << e.what() << endl;
            }
            break;
        }
        case repl::ReplSetImpl::PREFETCH_ALL:
        {
            // indexCount includes all indexes, including ones
            // in the process of being built
            IndexCatalog::IndexIterator ii = collection->getIndexCatalog()->getIndexIterator( true );
            while ( ii.more() ) {
                TimerHolder timer( &prefetchIndexStats);
                // This will page in all index pages for the given object.
                try {
                    IndexDescriptor* desc = ii.next();
                    IndexAccessMethod* iam = collection->getIndexCatalog()->getIndex( desc );
                    verify( iam );
                    iam->touch(obj);
                }
                catch (const DBException& e) {
                    LOG(2) << "ignoring exception in prefetchIndexPages(): " << e.what() << endl;
                }
                unusedKeys.clear();
            }
            break;
        }
        default:
            fassertFailed(16427);
        }
    }
Beispiel #11
0
    Status Collection::validate( OperationContext* txn,
                                 bool full, bool scanData,
                                 ValidateResults* results, BSONObjBuilder* output ){

        MyValidateAdaptor adaptor;
        Status status = _recordStore->validate( txn, full, scanData, &adaptor, results, output );
        if ( !status.isOK() )
            return status;

        { // indexes
            output->append("nIndexes", _indexCatalog.numIndexesReady( txn ) );
            int idxn = 0;
            try  {
                // Only applicable when 'full' validation is requested.
                boost::scoped_ptr<BSONObjBuilder> indexDetails(full ? new BSONObjBuilder() : NULL);
                BSONObjBuilder indexes; // not using subObjStart to be exception safe

                IndexCatalog::IndexIterator i = _indexCatalog.getIndexIterator(txn, false);
                while( i.more() ) {
                    const IndexDescriptor* descriptor = i.next();
                    log(LogComponent::kIndex) << "validating index " << descriptor->indexNamespace() << endl;
                    IndexAccessMethod* iam = _indexCatalog.getIndex( descriptor );
                    invariant( iam );

                    boost::scoped_ptr<BSONObjBuilder> bob(
                        indexDetails.get() ? new BSONObjBuilder(
                            indexDetails->subobjStart(descriptor->indexNamespace())) :
                        NULL);

                    int64_t keys;
                    iam->validate(txn, full, &keys, bob.get());
                    indexes.appendNumber(descriptor->indexNamespace(),
                                         static_cast<long long>(keys));
                    idxn++;
                }

                output->append("keysPerIndex", indexes.done());
                if (indexDetails.get()) {
                    output->append("indexDetails", indexDetails->done());
                }
            }
            catch ( DBException& exc ) {
                string err = str::stream() <<
                    "exception during index validate idxn "<<
                    BSONObjBuilder::numStr(idxn) <<
                    ": " << exc.toString();
                results->errors.push_back( err );
                results->valid = false;
            }
        }

        return Status::OK();
    }
void CollectionInfoCache::init(OperationContext* opCtx) {
    // Requires exclusive collection lock.
    invariant(opCtx->lockState()->isCollectionLockedForMode(_collection->ns().ns(), MODE_X));

    const bool includeUnfinishedIndexes = false;
    IndexCatalog::IndexIterator ii =
        _collection->getIndexCatalog()->getIndexIterator(opCtx, includeUnfinishedIndexes);
    while (ii.more()) {
        const IndexDescriptor* desc = ii.next();
        _indexUsageTracker.registerIndex(desc->indexName(), desc->keyPattern());
    }

    rebuildIndexData(opCtx);
}
    void CollectionInfoCache::computeIndexKeys() {
        DEV Lock::assertWriteLocked( _collection->ns().ns() );

        _indexedPaths.clear();

        IndexCatalog::IndexIterator i = _collection->getIndexCatalog()->getIndexIterator(true);
        while (i.more()) {
            IndexDescriptor* descriptor = i.next();

            if (descriptor->getAccessMethodName() != IndexNames::TEXT) {
                BSONObj key = descriptor->keyPattern();
                BSONObjIterator j(key);
                while (j.more()) {
                    BSONElement e = j.next();
                    _indexedPaths.addPath(e.fieldName());
                }
            }
            else {
                fts::FTSSpec ftsSpec(descriptor->infoObj());

                if (ftsSpec.wildcard()) {
                    _indexedPaths.allPathsIndexed();
                }
                else {
                    for (size_t i = 0; i < ftsSpec.numExtraBefore(); ++i) {
                        _indexedPaths.addPath(ftsSpec.extraBefore(i));
                    }
                    for (fts::Weights::const_iterator it = ftsSpec.weights().begin();
                         it != ftsSpec.weights().end();
                         ++it) {
                        _indexedPaths.addPath(it->first);
                    }
                    for (size_t i = 0; i < ftsSpec.numExtraAfter(); ++i) {
                        _indexedPaths.addPath(ftsSpec.extraAfter(i));
                    }
                    // Any update to a path containing "language" as a component could change the
                    // language of a subdocument.  Add the override field as a path component.
                    _indexedPaths.addPathComponent(ftsSpec.languageOverrideField());
                }
            }
        }

        _keysComputed = true;

    }
Beispiel #14
0
uint64_t Collection::getIndexSize(OperationContext* opCtx, BSONObjBuilder* details, int scale) {
    IndexCatalog* idxCatalog = getIndexCatalog();

    IndexCatalog::IndexIterator ii = idxCatalog->getIndexIterator(opCtx, true);

    uint64_t totalSize = 0;

    while (ii.more()) {
        IndexDescriptor* d = ii.next();
        IndexAccessMethod* iam = idxCatalog->getIndex(d);

        long long ds = iam->getSpaceUsedBytes(opCtx);

        totalSize += ds;
        if (details) {
            details->appendNumber(d->indexName(), ds / scale);
        }
    }

    return totalSize;
}
Beispiel #15
0
    Status Collection::validate( bool full, bool scanData,
                                 ValidateResults* results, BSONObjBuilder* output ){

        MyValidateAdaptor adaptor;
        Status status = _recordStore->validate( full, scanData, &adaptor, results, output );
        if ( !status.isOK() )
            return status;

        { // indexes
            output->append("nIndexes", _indexCatalog.numIndexesReady() );
            int idxn = 0;
            try  {
                BSONObjBuilder indexes; // not using subObjStart to be exception safe
                IndexCatalog::IndexIterator i = _indexCatalog.getIndexIterator(false);
                while( i.more() ) {
                    const IndexDescriptor* descriptor = i.next();
                    log() << "validating index " << descriptor->indexNamespace() << endl;
                    IndexAccessMethod* iam = _indexCatalog.getIndex( descriptor );
                    invariant( iam );

                    int64_t keys;
                    iam->validate(&keys);
                    indexes.appendNumber(descriptor->indexNamespace(),
                                         static_cast<long long>(keys));
                    idxn++;
                }
                output->append("keysPerIndex", indexes.done());
            }
            catch ( DBException& exc ) {
                string err = str::stream() <<
                    "exception during index validate idxn "<<
                    BSONObjBuilder::numStr(idxn) <<
                    ": " << exc.toString();
                results->errors.push_back( err );
                results->valid = false;
            }
        }

        return Status::OK();
    }
        void run() {
            Client::WriteContext ctx(_ns);
            int numFinishedIndexesStart = _catalog->numIndexesReady();

            BSONObjBuilder b1;
            b1.append("key", BSON("x" << 1));
            b1.append("ns", _ns);
            b1.append("name", "_x_0");
            _catalog->createIndex(b1.obj(), true);

            BSONObjBuilder b2;
            b2.append("key", BSON("y" << 1));
            b2.append("ns", _ns);
            b2.append("name", "_y_0");
            _catalog->createIndex(b2.obj(), true);

            ASSERT_TRUE(_catalog->numIndexesReady() == numFinishedIndexesStart+2);

            IndexCatalog::IndexIterator ii = _catalog->getIndexIterator(false);
            int indexesIterated = 0;
            bool foundIndex = false;
            while (ii.more()) {
                IndexDescriptor* indexDesc = ii.next();
                indexesIterated++;
                BSONObjIterator boit(indexDesc->infoObj());
                while (boit.more() && !foundIndex) {
                    BSONElement e = boit.next();
                    if (str::equals(e.fieldName(), "name") &&
                            str::equals(e.valuestrsafe(), "_y_0")) {
                        foundIndex = true;
                        break;
                    }
                }
            }

            ASSERT_TRUE(indexesIterated == _catalog->numIndexesReady());
            ASSERT_TRUE(foundIndex);
        }
Beispiel #17
0
        virtual bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
            string source = cmdObj.getStringField( name.c_str() );
            string target = cmdObj.getStringField( "to" );

            if ( !NamespaceString::validCollectionComponent(target.c_str()) ) {
                errmsg = "invalid collection name: " + target;
                return false;
            }
            if ( source.empty() || target.empty() ) {
                errmsg = "invalid command syntax";
                return false;
            }

            if (!fromRepl) { // If it got through on the master, need to allow it here too
                Status sourceStatus = userAllowedWriteNS(source);
                if (!sourceStatus.isOK()) {
                    errmsg = "error with source namespace: " + sourceStatus.reason();
                    return false;
                }

                Status targetStatus = userAllowedWriteNS(target);
                if (!targetStatus.isOK()) {
                    errmsg = "error with target namespace: " + targetStatus.reason();
                    return false;
                }
            }

            string sourceDB = nsToDatabase(source);
            string targetDB = nsToDatabase(target);

            bool capped = false;
            long long size = 0;
            std::vector<BSONObj> indexesInProg;

            {
                Client::Context srcCtx( source );
                Collection* sourceColl = srcCtx.db()->getCollection( source );

                if ( !sourceColl ) {
                    errmsg = "source namespace does not exist";
                    return false;
                }

                // Ensure that collection name does not exceed maximum length.
                // Ensure that index names do not push the length over the max.
                // Iterator includes unfinished indexes.
                IndexCatalog::IndexIterator sourceIndIt =
                    sourceColl->getIndexCatalog()->getIndexIterator( true );
                int longestIndexNameLength = 0;
                while ( sourceIndIt.more() ) {
                    int thisLength = sourceIndIt.next()->indexName().length();
                    if ( thisLength > longestIndexNameLength )
                        longestIndexNameLength = thisLength;
                }

                unsigned int longestAllowed =
                    min(int(Namespace::MaxNsColletionLen),
                        int(Namespace::MaxNsLen) - 2/*strlen(".$")*/ - longestIndexNameLength);
                if (target.size() > longestAllowed) {
                    StringBuilder sb;
                    sb << "collection name length of " << target.size()
                       << " exceeds maximum length of " << longestAllowed
                       << ", allowing for index names";
                    errmsg = sb.str();
                    return false;
                }

                {

                    indexesInProg = stopIndexBuilds( srcCtx.db(), cmdObj );
                    capped = sourceColl->isCapped();
                    if ( capped ) {
                        size = sourceColl->storageSize();
                    }
                }
            }

            {
                Client::Context ctx( target );

                // Check if the target namespace exists and if dropTarget is true.
                // If target exists and dropTarget is not true, return false.
                if ( ctx.db()->getCollection( target ) ) {
                    if ( !cmdObj["dropTarget"].trueValue() ) {
                        errmsg = "target namespace exists";
                        return false;
                    }

                    Status s = cc().database()->dropCollection( target );
                    if ( !s.isOK() ) {
                        errmsg = s.toString();
                        restoreIndexBuildsOnSource( indexesInProg, source );
                        return false;
                    }
                }

                // If we are renaming in the same database, just
                // rename the namespace and we're done.
                if ( sourceDB == targetDB ) {
                    Status s = ctx.db()->renameCollection( source, target,
                                                           cmdObj["stayTemp"].trueValue() );
                    if ( !s.isOK() ) {
                        errmsg = s.toString();
                        restoreIndexBuildsOnSource( indexesInProg, source );
                        return false;
                    }
                    return true;
                }

                // Otherwise, we are enaming across databases, so we must copy all
                // the data and then remove the source collection.

                // Create the target collection.
                Collection* targetColl = NULL;
                if ( capped ) {
                    CollectionOptions options;
                    options.capped = true;
                    options.cappedSize = size;
                    options.setNoIdIndex();

                    targetColl = ctx.db()->createCollection( target, options );
                }
                else {
                    CollectionOptions options;
                    options.setNoIdIndex();
                    // No logOp necessary because the entire renameCollection command is one logOp.
                    targetColl = ctx.db()->createCollection( target, options );
                }
                if ( !targetColl ) {
                    errmsg = "Failed to create target collection.";
                    restoreIndexBuildsOnSource( indexesInProg, source );
                    return false;
                }
            }

            // Copy over all the data from source collection to target collection.
            bool insertSuccessful = true;
            boost::scoped_ptr<CollectionIterator> sourceIt;

            {
                Client::Context srcCtx( source );
                Collection* sourceColl = srcCtx.db()->getCollection( source );
                sourceIt.reset( sourceColl->getIterator( DiskLoc(), false, CollectionScanParams::FORWARD ) );
            }

            Collection* targetColl = NULL;
            while ( !sourceIt->isEOF() ) {
                BSONObj o;
                {
                    Client::Context srcCtx( source );
                    o = sourceIt->getNext().obj();
                }
                // Insert and check return status of insert.
                {
                    Client::Context ctx( target );
                    if ( !targetColl )
                        targetColl = ctx.db()->getCollection( target );
                    // No logOp necessary because the entire renameCollection command is one logOp.
                    Status s = targetColl->insertDocument( o, true ).getStatus();
                    if ( !s.isOK() ) {
                        insertSuccessful = false;
                        errmsg = s.toString();
                        break;
                    }
                }
            }

            // If inserts were unsuccessful, drop the target collection and return false.
            if ( !insertSuccessful ) {
                Client::Context ctx( target );
                Status s = ctx.db()->dropCollection( target );
                if ( !s.isOK() )
                    errmsg = s.toString();
                restoreIndexBuildsOnSource( indexesInProg, source );
                return false;
            }

            // Copy over the indexes to temp storage and then to the target..
            vector<BSONObj> copiedIndexes;
            bool indexSuccessful = true;
            {
                Client::Context srcCtx( source );
                Collection* sourceColl = srcCtx.db()->getCollection( source );
                IndexCatalog::IndexIterator sourceIndIt =
                    sourceColl->getIndexCatalog()->getIndexIterator( true );

                while ( sourceIndIt.more() ) {
                    BSONObj currIndex = sourceIndIt.next()->infoObj();

                    // Process the source index.
                    BSONObjBuilder b;
                    BSONObjIterator i( currIndex );
                    while( i.moreWithEOO() ) {
                        BSONElement e = i.next();
                        if ( e.eoo() )
                            break;
                        else if ( strcmp( e.fieldName(), "ns" ) == 0 )
                            b.append( "ns", target );
                        else
                            b.append( e );
                    }

                    BSONObj newIndex = b.obj();
                    copiedIndexes.push_back( newIndex );
                }
            }

            {
                Client::Context ctx( target );
                if ( !targetColl )
                    targetColl = ctx.db()->getCollection( target );

                for ( vector<BSONObj>::iterator it = copiedIndexes.begin();
                                                it != copiedIndexes.end(); ++it ) {
                    Status s = targetColl->getIndexCatalog()->createIndex( *it, true );
                    if ( !s.isOK() ) {
                        indexSuccessful = false;
                        errmsg = s.toString();
                        break;
                    }
                }

                // If indexes were unsuccessful, drop the target collection and return false.
                if ( !indexSuccessful ) {
                    Status s = ctx.db()->dropCollection( target );
                    if ( !s.isOK() )
                        errmsg = s.toString();
                    restoreIndexBuildsOnSource( indexesInProg, source );
                    return false;
                }
            }

            // Drop the source collection.
            {
                Client::Context srcCtx( source );
                Status s = srcCtx.db()->dropCollection( source );
                if ( !s.isOK() ) {
                    errmsg = s.toString();
                    restoreIndexBuildsOnSource( indexesInProg, source );
                    return false;
                }
            }

            return true;
        }
Beispiel #18
0
StatusWith<RecordId> Collection::updateDocument(OperationContext* txn,
                                                const RecordId& oldLocation,
                                                const Snapshotted<BSONObj>& oldDoc,
                                                const BSONObj& newDoc,
                                                bool enforceQuota,
                                                bool indexesAffected,
                                                OpDebug* debug,
                                                oplogUpdateEntryArgs& args) {
    {
        auto status = checkValidation(txn, newDoc);
        if (!status.isOK()) {
            if (_validationLevel == STRICT_V) {
                return status;
            }
            // moderate means we have to check the old doc
            auto oldDocStatus = checkValidation(txn, oldDoc.value());
            if (oldDocStatus.isOK()) {
                // transitioning from good -> bad is not ok
                return status;
            }
            // bad -> bad is ok in moderate mode
        }
    }

    dassert(txn->lockState()->isCollectionLockedForMode(ns().toString(), MODE_IX));
    invariant(oldDoc.snapshotId() == txn->recoveryUnit()->getSnapshotId());

    if (_needCappedLock) {
        // X-lock the metadata resource for this capped collection until the end of the WUOW. This
        // prevents the primary from executing with more concurrency than secondaries.
        // See SERVER-21646.
        Lock::ResourceLock{txn->lockState(), ResourceId(RESOURCE_METADATA, _ns.ns()), MODE_X};
    }

    SnapshotId sid = txn->recoveryUnit()->getSnapshotId();

    BSONElement oldId = oldDoc.value()["_id"];
    if (!oldId.eoo() && (oldId != newDoc["_id"]))
        return StatusWith<RecordId>(
            ErrorCodes::InternalError, "in Collection::updateDocument _id mismatch", 13596);

    // The MMAPv1 storage engine implements capped collections in a way that does not allow records
    // to grow beyond their original size. If MMAPv1 part of a replicaset with storage engines that
    // do not have this limitation, replication could result in errors, so it is necessary to set a
    // uniform rule here. Similarly, it is not sufficient to disallow growing records, because this
    // happens when secondaries roll back an update shrunk a record. Exactly replicating legacy
    // MMAPv1 behavior would require padding shrunk documents on all storage engines. Instead forbid
    // all size changes.
    const auto oldSize = oldDoc.value().objsize();
    if (_recordStore->isCapped() && oldSize != newDoc.objsize())
        return {ErrorCodes::CannotGrowDocumentInCappedNamespace,
                str::stream() << "Cannot change the size of a document in a capped collection: "
                              << oldSize << " != " << newDoc.objsize()};

    // At the end of this step, we will have a map of UpdateTickets, one per index, which
    // represent the index updates needed to be done, based on the changes between oldDoc and
    // newDoc.
    OwnedPointerMap<IndexDescriptor*, UpdateTicket> updateTickets;
    if (indexesAffected) {
        IndexCatalog::IndexIterator ii = _indexCatalog.getIndexIterator(txn, true);
        while (ii.more()) {
            IndexDescriptor* descriptor = ii.next();
            IndexCatalogEntry* entry = ii.catalogEntry(descriptor);
            IndexAccessMethod* iam = ii.accessMethod(descriptor);

            InsertDeleteOptions options;
            options.logIfError = false;
            options.dupsAllowed =
                !(KeyPattern::isIdKeyPattern(descriptor->keyPattern()) || descriptor->unique()) ||
                repl::getGlobalReplicationCoordinator()->shouldIgnoreUniqueIndex(descriptor);
            UpdateTicket* updateTicket = new UpdateTicket();
            updateTickets.mutableMap()[descriptor] = updateTicket;
            Status ret = iam->validateUpdate(txn,
                                             oldDoc.value(),
                                             newDoc,
                                             oldLocation,
                                             options,
                                             updateTicket,
                                             entry->getFilterExpression());
            if (!ret.isOK()) {
                return StatusWith<RecordId>(ret);
            }
        }
    }

    // This can call back into Collection::recordStoreGoingToMove.  If that happens, the old
    // object is removed from all indexes.
    StatusWith<RecordId> newLocation = _recordStore->updateRecord(
        txn, oldLocation, newDoc.objdata(), newDoc.objsize(), _enforceQuota(enforceQuota), this);

    if (!newLocation.isOK()) {
        return newLocation;
    }

    // At this point, the old object may or may not still be indexed, depending on if it was
    // moved. If the object did move, we need to add the new location to all indexes.
    if (newLocation.getValue() != oldLocation) {
        if (debug) {
            if (debug->nmoved == -1)  // default of -1 rather than 0
                debug->nmoved = 1;
            else
                debug->nmoved += 1;
        }

        std::vector<BsonRecord> bsonRecords;
        BsonRecord bsonRecord = {newLocation.getValue(), &newDoc};
        bsonRecords.push_back(bsonRecord);
        Status s = _indexCatalog.indexRecords(txn, bsonRecords);
        if (!s.isOK())
            return StatusWith<RecordId>(s);
        invariant(sid == txn->recoveryUnit()->getSnapshotId());
        args.ns = ns().ns();
        getGlobalServiceContext()->getOpObserver()->onUpdate(txn, args);

        return newLocation;
    }

    // Object did not move.  We update each index with each respective UpdateTicket.

    if (debug)
        debug->keyUpdates = 0;

    if (indexesAffected) {
        IndexCatalog::IndexIterator ii = _indexCatalog.getIndexIterator(txn, true);
        while (ii.more()) {
            IndexDescriptor* descriptor = ii.next();
            IndexAccessMethod* iam = ii.accessMethod(descriptor);

            int64_t updatedKeys;
            Status ret = iam->update(txn, *updateTickets.mutableMap()[descriptor], &updatedKeys);
            if (!ret.isOK())
                return StatusWith<RecordId>(ret);
            if (debug)
                debug->keyUpdates += updatedKeys;
        }
    }

    invariant(sid == txn->recoveryUnit()->getSnapshotId());
    args.ns = ns().ns();
    getGlobalServiceContext()->getOpObserver()->onUpdate(txn, args);

    return newLocation;
}
Beispiel #19
0
    StatusWith<DiskLoc> Collection::updateDocument( OperationContext* txn,
                                                    const DiskLoc& oldLocation,
                                                    const BSONObj& objNew,
                                                    bool enforceQuota,
                                                    OpDebug* debug ) {

        BSONObj objOld = _recordStore->dataFor( txn, oldLocation ).toBson();

        if ( objOld.hasElement( "_id" ) ) {
            BSONElement oldId = objOld["_id"];
            BSONElement newId = objNew["_id"];
            if ( oldId != newId )
                return StatusWith<DiskLoc>( ErrorCodes::InternalError,
                                            "in Collection::updateDocument _id mismatch",
                                            13596 );
        }

        if ( ns().coll() == "system.users" ) {
            // XXX - andy and spencer think this should go away now
            V2UserDocumentParser parser;
            Status s = parser.checkValidUserDocument(objNew);
            if ( !s.isOK() )
                return StatusWith<DiskLoc>( s );
        }

        /* duplicate key check. we descend the btree twice - once for this check, and once for the actual inserts, further
           below.  that is suboptimal, but it's pretty complicated to do it the other way without rollbacks...
        */
        OwnedPointerMap<IndexDescriptor*,UpdateTicket> updateTickets;
        IndexCatalog::IndexIterator ii = _indexCatalog.getIndexIterator( txn, true );
        while ( ii.more() ) {
            IndexDescriptor* descriptor = ii.next();
            IndexAccessMethod* iam = _indexCatalog.getIndex( descriptor );

            InsertDeleteOptions options;
            options.logIfError = false;
            options.dupsAllowed =
                !(KeyPattern::isIdKeyPattern(descriptor->keyPattern()) || descriptor->unique())
                || repl::getGlobalReplicationCoordinator()->shouldIgnoreUniqueIndex(descriptor);
            UpdateTicket* updateTicket = new UpdateTicket();
            updateTickets.mutableMap()[descriptor] = updateTicket;
            Status ret = iam->validateUpdate(txn, objOld, objNew, oldLocation, options, updateTicket );
            if ( !ret.isOK() ) {
                return StatusWith<DiskLoc>( ret );
            }
        }

        // this can callback into Collection::recordStoreGoingToMove
        StatusWith<DiskLoc> newLocation = _recordStore->updateRecord( txn,
                                                                      oldLocation,
                                                                      objNew.objdata(),
                                                                      objNew.objsize(),
                                                                      _enforceQuota( enforceQuota ),
                                                                      this );

        if ( !newLocation.isOK() ) {
            return newLocation;
        }

        _infoCache.notifyOfWriteOp();

        if ( newLocation.getValue() != oldLocation ) {

            if ( debug ) {
                if (debug->nmoved == -1) // default of -1 rather than 0
                    debug->nmoved = 1;
                else
                    debug->nmoved += 1;
            }

            _indexCatalog.indexRecord(txn, objNew, newLocation.getValue());

            return newLocation;
        }

        if ( debug )
            debug->keyUpdates = 0;

        ii = _indexCatalog.getIndexIterator( txn, true );
        while ( ii.more() ) {
            IndexDescriptor* descriptor = ii.next();
            IndexAccessMethod* iam = _indexCatalog.getIndex( descriptor );

            int64_t updatedKeys;
            Status ret = iam->update(txn, *updateTickets.mutableMap()[descriptor], &updatedKeys);
            if ( !ret.isOK() )
                return StatusWith<DiskLoc>( ret );
            if ( debug )
                debug->keyUpdates += updatedKeys;
        }

        // Broadcast the mutation so that query results stay correct.
        _cursorCache.invalidateDocument(oldLocation, INVALIDATION_MUTATION);

        return newLocation;
    }
Status RecordStoreValidateAdaptor::validate(const RecordId& recordId,
                                            const RecordData& record,
                                            size_t* dataSize) {
    BSONObj recordBson = record.toBson();

    const Status status = validateBSON(
        recordBson.objdata(), recordBson.objsize(), Validator<BSONObj>::enabledBSONVersion());
    if (status.isOK()) {
        *dataSize = recordBson.objsize();
    } else {
        return status;
    }

    if (!_indexCatalog->haveAnyIndexes()) {
        return status;
    }

    IndexCatalog::IndexIterator i = _indexCatalog->getIndexIterator(_opCtx, false);

    while (i.more()) {
        const IndexDescriptor* descriptor = i.next();
        const std::string indexNs = descriptor->indexNamespace();
        int indexNumber = _indexConsistency->getIndexNumber(indexNs);
        ValidateResults curRecordResults;

        const IndexAccessMethod* iam = _indexCatalog->getIndex(descriptor);

        if (descriptor->isPartial()) {
            const IndexCatalogEntry* ice = _indexCatalog->getEntry(descriptor);
            if (!ice->getFilterExpression()->matchesBSON(recordBson)) {
                (*_indexNsResultsMap)[indexNs] = curRecordResults;
                continue;
            }
        }

        BSONObjSet documentKeySet = SimpleBSONObjComparator::kInstance.makeBSONObjSet();
        BSONObjSet multikeyMetadataKeys = SimpleBSONObjComparator::kInstance.makeBSONObjSet();
        MultikeyPaths multikeyPaths;
        iam->getKeys(recordBson,
                     IndexAccessMethod::GetKeysMode::kEnforceConstraints,
                     &documentKeySet,
                     &multikeyMetadataKeys,
                     &multikeyPaths);

        if (!descriptor->isMultikey(_opCtx) &&
            iam->shouldMarkIndexAsMultikey(documentKeySet, multikeyMetadataKeys, multikeyPaths)) {
            std::string msg = str::stream() << "Index " << descriptor->indexName()
                                            << " is not multi-key, but a multikey path "
                                            << " is present in document " << recordId;
            curRecordResults.errors.push_back(msg);
            curRecordResults.valid = false;
        }

        for (const auto& key : multikeyMetadataKeys) {
            _indexConsistency->addMultikeyMetadataPath(makeWildCardMultikeyMetadataKeyString(key),
                                                       indexNumber);
        }

        const auto& pattern = descriptor->keyPattern();
        const Ordering ord = Ordering::make(pattern);
        bool largeKeyDisallowed = isLargeKeyDisallowed();

        for (const auto& key : documentKeySet) {
            if (largeKeyDisallowed &&
                key.objsize() >= static_cast<int64_t>(KeyString::TypeBits::kMaxKeyBytes)) {
                // Index keys >= 1024 bytes are not indexed.
                _indexConsistency->addLongIndexKey(indexNumber);
                continue;
            }

            // We want to use the latest version of KeyString here.
            KeyString ks(KeyString::kLatestVersion, key, ord, recordId);
            _indexConsistency->addDocKey(ks, indexNumber);
        }
        (*_indexNsResultsMap)[indexNs] = curRecordResults;
    }
    return status;
}
    Status MMAPV1Engine::repairDatabase( OperationContext* txn,
                                         const std::string& dbName,
                                         bool preserveClonedFilesOnFailure,
                                         bool backupOriginalFiles ) {
        // We must hold some form of lock here
        invariant(txn->lockState()->threadState());
        invariant( dbName.find( '.' ) == string::npos );

        scoped_ptr<RepairFileDeleter> repairFileDeleter;

        log() << "repairDatabase " << dbName << endl;

        BackgroundOperation::assertNoBgOpInProgForDb(dbName);

        txn->recoveryUnit()->syncDataAndTruncateJournal(); // Must be done before and after repair

        intmax_t totalSize = dbSize( dbName );
        intmax_t freeSize = File::freeSpace(storageGlobalParams.repairpath);

        if ( freeSize > -1 && freeSize < totalSize ) {
            return Status( ErrorCodes::OutOfDiskSpace,
                           str::stream() << "Cannot repair database " << dbName
                           << " having size: " << totalSize
                           << " (bytes) because free disk space is: " << freeSize << " (bytes)" );
        }

        txn->checkForInterrupt();

        Path reservedPath =
            uniqueReservedPath( ( preserveClonedFilesOnFailure || backupOriginalFiles ) ?
                                "backup" : "_tmp" );
        MONGO_ASSERT_ON_EXCEPTION( boost::filesystem::create_directory( reservedPath ) );
        string reservedPathString = reservedPath.string();

        if ( !preserveClonedFilesOnFailure )
            repairFileDeleter.reset( new RepairFileDeleter( txn,
                                                            dbName,
                                                            reservedPathString,
                                                            reservedPath ) );

        {
            Database* originalDatabase =
                            dbHolder().get(txn, dbName);
            if (originalDatabase == NULL) {
                return Status(ErrorCodes::NamespaceNotFound, "database does not exist to repair");
            }

            scoped_ptr<MMAPV1DatabaseCatalogEntry> dbEntry;
            scoped_ptr<Database> tempDatabase;
            {
                dbEntry.reset( new MMAPV1DatabaseCatalogEntry( txn,
                                                               dbName,
                                                               reservedPathString,
                                                               storageGlobalParams.directoryperdb,
                                                               true ) );
                invariant( !dbEntry->exists() );
                tempDatabase.reset( new Database( txn,
                                                  dbName,
                                                  dbEntry.get() ) );

            }

            map<string,CollectionOptions> namespacesToCopy;
            {
                string ns = dbName + ".system.namespaces";
                Client::Context ctx(txn,  ns );
                Collection* coll = originalDatabase->getCollection( txn, ns );
                if ( coll ) {
                    scoped_ptr<RecordIterator> it( coll->getIterator( txn,
                                                                      DiskLoc(),
                                                                      false,
                                                                      CollectionScanParams::FORWARD ) );
                    while ( !it->isEOF() ) {
                        DiskLoc loc = it->getNext();
                        BSONObj obj = coll->docFor( loc );

                        string ns = obj["name"].String();

                        NamespaceString nss( ns );
                        if ( nss.isSystem() ) {
                            if ( nss.isSystemDotIndexes() )
                                continue;
                            if ( nss.coll() == "system.namespaces" )
                                continue;
                        }

                        if ( !nss.isNormal() )
                            continue;

                        CollectionOptions options;
                        if ( obj["options"].isABSONObj() ) {
                            Status status = options.parse( obj["options"].Obj() );
                            if ( !status.isOK() )
                                return status;
                        }
                        namespacesToCopy[ns] = options;
                    }
                }
            }

            for ( map<string,CollectionOptions>::const_iterator i = namespacesToCopy.begin();
                  i != namespacesToCopy.end();
                  ++i ) {
                string ns = i->first;
                CollectionOptions options = i->second;

                Collection* tempCollection = NULL;
                {
                    Client::Context tempContext(txn, ns, tempDatabase );
                    WriteUnitOfWork wunit(txn);
                    tempCollection = tempDatabase->createCollection(txn, ns, options, true, false);
                    wunit.commit();
                }

                Client::Context readContext(txn, ns, originalDatabase);
                Collection* originalCollection = originalDatabase->getCollection( txn, ns );
                invariant( originalCollection );

                // data

                // TODO SERVER-14812 add a mode that drops duplicates rather than failing
                MultiIndexBlock indexer(txn, tempCollection );
                {
                    vector<BSONObj> indexes;
                    IndexCatalog::IndexIterator ii =
                        originalCollection->getIndexCatalog()->getIndexIterator( false );
                    while ( ii.more() ) {
                        IndexDescriptor* desc = ii.next();
                        indexes.push_back( desc->infoObj() );
                    }

                    Client::Context tempContext(txn, ns, tempDatabase);
                    Status status = indexer.init( indexes );
                    if ( !status.isOK() )
                        return status;
                }

                scoped_ptr<RecordIterator> iterator(
                    originalCollection->getIterator( txn, DiskLoc(), false,
                                                     CollectionScanParams::FORWARD ));
                while ( !iterator->isEOF() ) {
                    DiskLoc loc = iterator->getNext();
                    invariant( !loc.isNull() );

                    BSONObj doc = originalCollection->docFor( loc );

                    Client::Context tempContext(txn, ns, tempDatabase);
                    
                    WriteUnitOfWork wunit(txn);
                    StatusWith<DiskLoc> result = tempCollection->insertDocument(txn,
                                                                                doc,
                                                                                &indexer,
                                                                                false);
                    if ( !result.isOK() )
                        return result.getStatus();

                    wunit.commit();
                    txn->checkForInterrupt(false);
                }
                
                Status status = indexer.doneInserting();
                if (!status.isOK())
                    return status;

                {
                    Client::Context tempContext(txn, ns, tempDatabase);
                    WriteUnitOfWork wunit(txn);
                    indexer.commit();
                    wunit.commit();
                }

            }

            txn->recoveryUnit()->syncDataAndTruncateJournal();
            globalStorageEngine->flushAllFiles(true); // need both in case journaling is disabled

            txn->checkForInterrupt(false);
        }

        // at this point if we abort, we don't want to delete new files
        // as they might be the only copies

        if ( repairFileDeleter.get() )
            repairFileDeleter->success();

        dbHolder().close( txn, dbName );

        if ( backupOriginalFiles ) {
            _renameForBackup( dbName, reservedPath );
        }
        else {
            // first make new directory before deleting data
            Path newDir = Path(storageGlobalParams.dbpath) / dbName;
            MONGO_ASSERT_ON_EXCEPTION(boost::filesystem::create_directory(newDir));

            // this deletes old files
            _deleteDataFiles( dbName );

            if ( !boost::filesystem::exists(newDir) ) {
                // we deleted because of directoryperdb
                // re-create
                MONGO_ASSERT_ON_EXCEPTION(boost::filesystem::create_directory(newDir));
            }
        }

        _replaceWithRecovered( dbName, reservedPathString.c_str() );

        if ( !backupOriginalFiles )
            MONGO_ASSERT_ON_EXCEPTION( boost::filesystem::remove_all( reservedPath ) );

        return Status::OK();
    }
Beispiel #22
0
    Status MMAPV1Engine::repairDatabase( OperationContext* txn,
                                         const std::string& dbName,
                                         bool preserveClonedFilesOnFailure,
                                         bool backupOriginalFiles ) {
        unique_ptr<RepairFileDeleter> repairFileDeleter;

        // Must be done before and after repair
        getDur().syncDataAndTruncateJournal(txn);

        intmax_t totalSize = dbSize( dbName );
        intmax_t freeSize = File::freeSpace(storageGlobalParams.repairpath);

        if ( freeSize > -1 && freeSize < totalSize ) {
            return Status( ErrorCodes::OutOfDiskSpace,
                           str::stream() << "Cannot repair database " << dbName
                           << " having size: " << totalSize
                           << " (bytes) because free disk space is: " << freeSize << " (bytes)" );
        }

        txn->checkForInterrupt();

        Path reservedPath =
            uniqueReservedPath( ( preserveClonedFilesOnFailure || backupOriginalFiles ) ?
                                "backup" : "_tmp" );
        bool created = false;
        MONGO_ASSERT_ON_EXCEPTION( created = boost::filesystem::create_directory( reservedPath ) );
        invariant( created );
        string reservedPathString = reservedPath.string();

        if ( !preserveClonedFilesOnFailure )
            repairFileDeleter.reset( new RepairFileDeleter( txn,
                                                            dbName,
                                                            reservedPathString,
                                                            reservedPath ) );

        {
            Database* originalDatabase = dbHolder().openDb(txn, dbName);
            if (originalDatabase == NULL) {
                return Status(ErrorCodes::NamespaceNotFound, "database does not exist to repair");
            }

            unique_ptr<MMAPV1DatabaseCatalogEntry> dbEntry;
            unique_ptr<Database> tempDatabase;

            // Must call this before MMAPV1DatabaseCatalogEntry's destructor closes the DB files
            ON_BLOCK_EXIT(&dur::DurableInterface::syncDataAndTruncateJournal, &getDur(), txn);

            {
                dbEntry.reset(new MMAPV1DatabaseCatalogEntry(txn,
                                                             dbName,
                                                             reservedPathString,
                                                             storageGlobalParams.directoryperdb,
                                                             true));
                tempDatabase.reset( new Database(txn, dbName, dbEntry.get()));
            }

            map<string,CollectionOptions> namespacesToCopy;
            {
                string ns = dbName + ".system.namespaces";
                OldClientContext ctx(txn,  ns );
                Collection* coll = originalDatabase->getCollection( ns );
                if ( coll ) {
                    auto cursor = coll->getCursor(txn);
                    while (auto record = cursor->next()) {
                        BSONObj obj = record->data.releaseToBson();

                        string ns = obj["name"].String();

                        NamespaceString nss( ns );
                        if ( nss.isSystem() ) {
                            if ( nss.isSystemDotIndexes() )
                                continue;
                            if ( nss.coll() == "system.namespaces" )
                                continue;
                        }

                        if ( !nss.isNormal() )
                            continue;

                        CollectionOptions options;
                        if ( obj["options"].isABSONObj() ) {
                            Status status = options.parse( obj["options"].Obj() );
                            if ( !status.isOK() )
                                return status;
                        }
                        namespacesToCopy[ns] = options;
                    }
                }
            }

            for ( map<string,CollectionOptions>::const_iterator i = namespacesToCopy.begin();
                  i != namespacesToCopy.end();
                  ++i ) {
                string ns = i->first;
                CollectionOptions options = i->second;

                Collection* tempCollection = NULL;
                {
                    WriteUnitOfWork wunit(txn);
                    tempCollection = tempDatabase->createCollection(txn, ns, options, false);
                    wunit.commit();
                }

                OldClientContext readContext(txn, ns, originalDatabase);
                Collection* originalCollection = originalDatabase->getCollection( ns );
                invariant( originalCollection );

                // data

                // TODO SERVER-14812 add a mode that drops duplicates rather than failing
                MultiIndexBlock indexer(txn, tempCollection );
                {
                    vector<BSONObj> indexes;
                    IndexCatalog::IndexIterator ii =
                        originalCollection->getIndexCatalog()->getIndexIterator( txn, false );
                    while ( ii.more() ) {
                        IndexDescriptor* desc = ii.next();
                        indexes.push_back( desc->infoObj() );
                    }

                    Status status = indexer.init( indexes );
                    if (!status.isOK()) {
                        return status;
                    }
                }

                auto cursor = originalCollection->getCursor(txn);
                while (auto record = cursor->next()) {
                    BSONObj doc = record->data.releaseToBson();

                    WriteUnitOfWork wunit(txn);
                    StatusWith<RecordId> result = tempCollection->insertDocument(txn,
                                                                                 doc,
                                                                                 &indexer,
                                                                                 false);
                    if ( !result.isOK() )
                        return result.getStatus();

                    wunit.commit();
                    txn->checkForInterrupt();
                }
                
                Status status = indexer.doneInserting();
                if (!status.isOK())
                    return status;

                {
                    WriteUnitOfWork wunit(txn);
                    indexer.commit();
                    wunit.commit();
                }

            }

            getDur().syncDataAndTruncateJournal(txn);

            // need both in case journaling is disabled
            MongoFile::flushAll(true);

            txn->checkForInterrupt();
        }

        // at this point if we abort, we don't want to delete new files
        // as they might be the only copies

        if ( repairFileDeleter.get() )
            repairFileDeleter->success();

        // Close the database so we can rename/delete the original data files
        dbHolder().close(txn, dbName);

        if ( backupOriginalFiles ) {
            _renameForBackup( dbName, reservedPath );
        }
        else {
            // first make new directory before deleting data
            Path newDir = Path(storageGlobalParams.dbpath) / dbName;
            MONGO_ASSERT_ON_EXCEPTION(boost::filesystem::create_directory(newDir));

            // this deletes old files
            _deleteDataFiles( dbName );

            if ( !boost::filesystem::exists(newDir) ) {
                // we deleted because of directoryperdb
                // re-create
                MONGO_ASSERT_ON_EXCEPTION(boost::filesystem::create_directory(newDir));
            }
        }

        _replaceWithRecovered( dbName, reservedPathString.c_str() );

        if (!backupOriginalFiles) {
            MONGO_ASSERT_ON_EXCEPTION(boost::filesystem::remove_all(reservedPath));
        }

        // Reopen the database so it's discoverable
        dbHolder().openDb(txn, dbName);

        return Status::OK();
    }
Beispiel #23
0
Status appendCollectionStorageStats(OperationContext* opCtx,
                                    const NamespaceString& nss,
                                    const BSONObj& param,
                                    BSONObjBuilder* result) {
    int scale = 1;
    if (param["scale"].isNumber()) {
        scale = param["scale"].numberInt();
        if (scale < 1) {
            return {ErrorCodes::BadValue, "scale has to be >= 1"};
        }
    } else if (param["scale"].trueValue()) {
        return {ErrorCodes::BadValue, "scale has to be a number >= 1"};
    }

    bool verbose = param["verbose"].trueValue();

    AutoGetCollectionForReadCommand ctx(opCtx, nss);
    Collection* collection = ctx.getCollection();  // Will be set if present
    if (!ctx.getDb() || !collection) {
        result->appendNumber("size", 0);
        result->appendNumber("count", 0);
        result->appendNumber("storageSize", 0);
        result->append("nindexes", 0);
        result->appendNumber("totalIndexSize", 0);
        result->append("indexDetails", BSONObj());
        result->append("indexSizes", BSONObj());
        std::string errmsg = !(ctx.getDb()) ? "Database [" + nss.db().toString() + "] not found."
                                            : "Collection [" + nss.toString() + "] not found.";
        return {ErrorCodes::NamespaceNotFound, errmsg};
    }

    long long size = collection->dataSize(opCtx) / scale;
    result->appendNumber("size", size);
    long long numRecords = collection->numRecords(opCtx);
    result->appendNumber("count", numRecords);

    if (numRecords)
        result->append("avgObjSize", collection->averageObjectSize(opCtx));

    RecordStore* recordStore = collection->getRecordStore();
    result->appendNumber(
        "storageSize",
        static_cast<long long>(recordStore->storageSize(opCtx, result, verbose ? 1 : 0)) / scale);

    recordStore->appendCustomStats(opCtx, result, scale);

    IndexCatalog* indexCatalog = collection->getIndexCatalog();
    result->append("nindexes", indexCatalog->numIndexesReady(opCtx));

    BSONObjBuilder indexDetails;

    IndexCatalog::IndexIterator i = indexCatalog->getIndexIterator(opCtx, false);
    while (i.more()) {
        const IndexDescriptor* descriptor = i.next();
        IndexAccessMethod* iam = indexCatalog->getIndex(descriptor);
        invariant(iam);

        BSONObjBuilder bob;
        if (iam->appendCustomStats(opCtx, &bob, scale)) {
            indexDetails.append(descriptor->indexName(), bob.obj());
        }
    }

    result->append("indexDetails", indexDetails.obj());

    BSONObjBuilder indexSizes;
    long long indexSize = collection->getIndexSize(opCtx, &indexSizes, scale);

    result->appendNumber("totalIndexSize", indexSize / scale);
    result->append("indexSizes", indexSizes.obj());

    return Status::OK();
}
Beispiel #24
0
StatusWith<RecordId> Collection::updateDocument(OperationContext* txn,
                                                const RecordId& oldLocation,
                                                const Snapshotted<BSONObj>& oldDoc,
                                                const BSONObj& newDoc,
                                                bool enforceQuota,
                                                bool indexesAffected,
                                                OpDebug* debug,
                                                oplogUpdateEntryArgs& args) {
    {
        auto status = checkValidation(txn, newDoc);
        if (!status.isOK()) {
            if (_validationLevel == STRICT_V) {
                return status;
            }
            // moderate means we have to check the old doc
            auto oldDocStatus = checkValidation(txn, oldDoc.value());
            if (oldDocStatus.isOK()) {
                // transitioning from good -> bad is not ok
                return status;
            }
            // bad -> bad is ok in moderate mode
        }
    }

    dassert(txn->lockState()->isCollectionLockedForMode(ns().toString(), MODE_IX));
    invariant(oldDoc.snapshotId() == txn->recoveryUnit()->getSnapshotId());

    SnapshotId sid = txn->recoveryUnit()->getSnapshotId();

    BSONElement oldId = oldDoc.value()["_id"];
    if (!oldId.eoo() && (oldId != newDoc["_id"]))
        return StatusWith<RecordId>(
            ErrorCodes::InternalError, "in Collection::updateDocument _id mismatch", 13596);

    // At the end of this step, we will have a map of UpdateTickets, one per index, which
    // represent the index updates needed to be done, based on the changes between oldDoc and
    // newDoc.
    OwnedPointerMap<IndexDescriptor*, UpdateTicket> updateTickets;
    if (indexesAffected) {
        IndexCatalog::IndexIterator ii = _indexCatalog.getIndexIterator(txn, true);
        while (ii.more()) {
            IndexDescriptor* descriptor = ii.next();
            IndexCatalogEntry* entry = ii.catalogEntry(descriptor);
            IndexAccessMethod* iam = ii.accessMethod(descriptor);

            InsertDeleteOptions options;
            options.logIfError = false;
            options.dupsAllowed =
                !(KeyPattern::isIdKeyPattern(descriptor->keyPattern()) || descriptor->unique()) ||
                repl::getGlobalReplicationCoordinator()->shouldIgnoreUniqueIndex(descriptor);
            UpdateTicket* updateTicket = new UpdateTicket();
            updateTickets.mutableMap()[descriptor] = updateTicket;
            Status ret = iam->validateUpdate(txn,
                                             oldDoc.value(),
                                             newDoc,
                                             oldLocation,
                                             options,
                                             updateTicket,
                                             entry->getFilterExpression());
            if (!ret.isOK()) {
                return StatusWith<RecordId>(ret);
            }
        }
    }

    // This can call back into Collection::recordStoreGoingToMove.  If that happens, the old
    // object is removed from all indexes.
    StatusWith<RecordId> newLocation = _recordStore->updateRecord(
        txn, oldLocation, newDoc.objdata(), newDoc.objsize(), _enforceQuota(enforceQuota), this);

    if (!newLocation.isOK()) {
        return newLocation;
    }

    // At this point, the old object may or may not still be indexed, depending on if it was
    // moved. If the object did move, we need to add the new location to all indexes.
    if (newLocation.getValue() != oldLocation) {
        if (debug) {
            if (debug->nmoved == -1)  // default of -1 rather than 0
                debug->nmoved = 1;
            else
                debug->nmoved += 1;
        }

        Status s = _indexCatalog.indexRecord(txn, newDoc, newLocation.getValue());
        if (!s.isOK())
            return StatusWith<RecordId>(s);
        invariant(sid == txn->recoveryUnit()->getSnapshotId());
        args.ns = ns().ns();
        getGlobalServiceContext()->getOpObserver()->onUpdate(txn, args);

        return newLocation;
    }

    // Object did not move.  We update each index with each respective UpdateTicket.

    if (debug)
        debug->keyUpdates = 0;

    if (indexesAffected) {
        IndexCatalog::IndexIterator ii = _indexCatalog.getIndexIterator(txn, true);
        while (ii.more()) {
            IndexDescriptor* descriptor = ii.next();
            IndexAccessMethod* iam = ii.accessMethod(descriptor);

            int64_t updatedKeys;
            Status ret = iam->update(txn, *updateTickets.mutableMap()[descriptor], &updatedKeys);
            if (!ret.isOK())
                return StatusWith<RecordId>(ret);
            if (debug)
                debug->keyUpdates += updatedKeys;
        }
    }

    invariant(sid == txn->recoveryUnit()->getSnapshotId());
    args.ns = ns().ns();
    getGlobalServiceContext()->getOpObserver()->onUpdate(txn, args);

    return newLocation;
}
Beispiel #25
0
    void fillOutPlannerParams(Collection* collection,
                              CanonicalQuery* canonicalQuery,
                              QueryPlannerParams* plannerParams) {
        // If it's not NULL, we may have indices.  Access the catalog and fill out IndexEntry(s)
        IndexCatalog::IndexIterator ii = collection->getIndexCatalog()->getIndexIterator(false);
        while (ii.more()) {
            const IndexDescriptor* desc = ii.next();
            plannerParams->indices.push_back(IndexEntry(desc->keyPattern(),
                                                        desc->getAccessMethodName(),
                                                        desc->isMultikey(),
                                                        desc->isSparse(),
                                                        desc->indexName(),
                                                        desc->infoObj()));
        }

        // If query supports index filters, filter params.indices by indices in query settings.
        QuerySettings* querySettings = collection->infoCache()->getQuerySettings();
        AllowedIndices* allowedIndicesRaw;

        // Filter index catalog if index filters are specified for query.
        // Also, signal to planner that application hint should be ignored.
        if (querySettings->getAllowedIndices(*canonicalQuery, &allowedIndicesRaw)) {
            boost::scoped_ptr<AllowedIndices> allowedIndices(allowedIndicesRaw);
            filterAllowedIndexEntries(*allowedIndices, &plannerParams->indices);
            plannerParams->indexFiltersApplied = true;
        }

        // We will not output collection scans unless there are no indexed solutions. NO_TABLE_SCAN
        // overrides this behavior by not outputting a collscan even if there are no indexed
        // solutions.
        if (storageGlobalParams.noTableScan) {
            const string& ns = canonicalQuery->ns();
            // There are certain cases where we ignore this restriction:
            bool ignore = canonicalQuery->getQueryObj().isEmpty()
                          || (string::npos != ns.find(".system."))
                          || (0 == ns.find("local."));
            if (!ignore) {
                plannerParams->options |= QueryPlannerParams::NO_TABLE_SCAN;
            }
        }

        // If the caller wants a shard filter, make sure we're actually sharded.
        if (plannerParams->options & QueryPlannerParams::INCLUDE_SHARD_FILTER) {
            CollectionMetadataPtr collMetadata =
                shardingState.getCollectionMetadata(canonicalQuery->ns());

            if (collMetadata) {
                plannerParams->shardKey = collMetadata->getKeyPattern();
            }
            else {
                // If there's no metadata don't bother w/the shard filter since we won't know what
                // the key pattern is anyway...
                plannerParams->options &= ~QueryPlannerParams::INCLUDE_SHARD_FILTER;
            }
        }

        if (internalQueryPlannerEnableIndexIntersection) {
            plannerParams->options |= QueryPlannerParams::INDEX_INTERSECTION;
        }

        plannerParams->options |= QueryPlannerParams::KEEP_MUTATIONS;
        plannerParams->options |= QueryPlannerParams::SPLIT_LIMITED_SORT;
    }
Beispiel #26
0
    /**
     * For a given query, get a runner.  The runner could be a SingleSolutionRunner, a
     * CachedQueryRunner, or a MultiPlanRunner, depending on the cache/query solver/etc.
     */
    Status getRunner(Collection* collection,
                     CanonicalQuery* rawCanonicalQuery,
                     Runner** out,
                     size_t plannerOptions) {

        verify(rawCanonicalQuery);
        auto_ptr<CanonicalQuery> canonicalQuery(rawCanonicalQuery);

        // This can happen as we're called by internal clients as well.
        if (NULL == collection) {
            const string& ns = canonicalQuery->ns();
            *out = new EOFRunner(canonicalQuery.release(), ns);
            return Status::OK();
        }

        // If we have an _id index we can use the idhack runner.
        if (canUseIDHack(*canonicalQuery) && collection->getIndexCatalog()->findIdIndex()) {
            *out = new IDHackRunner(collection, canonicalQuery.release());
            return Status::OK();
        }

        // If it's not NULL, we may have indices.  Access the catalog and fill out IndexEntry(s)
        QueryPlannerParams plannerParams;

        IndexCatalog::IndexIterator ii = collection->getIndexCatalog()->getIndexIterator(false);
        while (ii.more()) {
            const IndexDescriptor* desc = ii.next();
            plannerParams.indices.push_back(IndexEntry(desc->keyPattern(),
                                                       desc->isMultikey(),
                                                       desc->isSparse(),
                                                       desc->indexName(),
                                                       desc->infoObj()));
        }

        // If query supports index filters, filter params.indices by indices in query settings.
        QuerySettings* querySettings = collection->infoCache()->getQuerySettings();
        AllowedIndices* allowedIndicesRaw;

        // Filter index catalog if index filters are specified for query.
        // Also, signal to planner that application hint should be ignored.
        if (querySettings->getAllowedIndices(*canonicalQuery, &allowedIndicesRaw)) {
            boost::scoped_ptr<AllowedIndices> allowedIndices(allowedIndicesRaw);
            filterAllowedIndexEntries(*allowedIndices, &plannerParams.indices);
            plannerParams.indexFiltersApplied = true;
        }

        // Tailable: If the query requests tailable the collection must be capped.
        if (canonicalQuery->getParsed().hasOption(QueryOption_CursorTailable)) {
            if (!collection->isCapped()) {
                return Status(ErrorCodes::BadValue,
                              "error processing query: " + canonicalQuery->toString() +
                              " tailable cursor requested on non capped collection");
            }

            // If a sort is specified it must be equal to expectedSort.
            const BSONObj expectedSort = BSON("$natural" << 1);
            const BSONObj& actualSort = canonicalQuery->getParsed().getSort();
            if (!actualSort.isEmpty() && !(actualSort == expectedSort)) {
                return Status(ErrorCodes::BadValue,
                              "error processing query: " + canonicalQuery->toString() +
                              " invalid sort specified for tailable cursor: "
                              + actualSort.toString());
            }
        }

        // Process the planning options.
        plannerParams.options = plannerOptions;
        if (storageGlobalParams.noTableScan) {
            const string& ns = canonicalQuery->ns();
            // There are certain cases where we ignore this restriction:
            bool ignore = canonicalQuery->getQueryObj().isEmpty()
                          || (string::npos != ns.find(".system."))
                          || (0 == ns.find("local."));
            if (!ignore) {
                plannerParams.options |= QueryPlannerParams::NO_TABLE_SCAN;
            }
        }

        if (!(plannerParams.options & QueryPlannerParams::NO_TABLE_SCAN)) {
            plannerParams.options |= QueryPlannerParams::INCLUDE_COLLSCAN;
        }

        // If the caller wants a shard filter, make sure we're actually sharded.
        if (plannerParams.options & QueryPlannerParams::INCLUDE_SHARD_FILTER) {
            CollectionMetadataPtr collMetadata =
                shardingState.getCollectionMetadata(canonicalQuery->ns());

            if (collMetadata) {
                plannerParams.shardKey = collMetadata->getKeyPattern();
            }
            else {
                // If there's no metadata don't bother w/the shard filter since we won't know what
                // the key pattern is anyway...
                plannerParams.options &= ~QueryPlannerParams::INCLUDE_SHARD_FILTER;
            }
        }

        // Try to look up a cached solution for the query.
        //
        // Skip cache look up for non-cacheable queries.
        // See PlanCache::shouldCacheQuery()
        //
        // TODO: Can the cache have negative data about a solution?
        CachedSolution* rawCS;
        if (PlanCache::shouldCacheQuery(*canonicalQuery) &&
            collection->infoCache()->getPlanCache()->get(*canonicalQuery, &rawCS).isOK()) {
            // We have a CachedSolution.  Have the planner turn it into a QuerySolution.
            boost::scoped_ptr<CachedSolution> cs(rawCS);
            QuerySolution *qs, *backupQs;
            Status status = QueryPlanner::planFromCache(*canonicalQuery, plannerParams, *cs,
                                                        &qs, &backupQs);

            // See SERVER-12438. Unfortunately we have to defer to the backup solution
            // if both a batch size is set and a sort is requested.
            //
            // TODO: it would be really nice to delete this block in the future.
            if (status.isOK() && NULL != backupQs &&
                0 < canonicalQuery->getParsed().getNumToReturn() &&
                !canonicalQuery->getParsed().getSort().isEmpty()) {
                delete qs;

                WorkingSet* ws;
                PlanStage* root;
                verify(StageBuilder::build(*backupQs, &root, &ws));

                // And, run the plan.
                *out = new SingleSolutionRunner(collection,
                                                canonicalQuery.release(),
                                                backupQs, root, ws);
                return Status::OK();
            }

            if (status.isOK()) {
                if (plannerParams.options & QueryPlannerParams::PRIVATE_IS_COUNT) {
                    if (turnIxscanIntoCount(qs)) {
                        WorkingSet* ws;
                        PlanStage* root;
                        verify(StageBuilder::build(*qs, &root, &ws));
                        *out = new SingleSolutionRunner(collection,
                                                        canonicalQuery.release(), qs, root, ws);
                        if (NULL != backupQs) {
                            delete backupQs;
                        }
                        return Status::OK();
                    }
                }

                WorkingSet* ws;
                PlanStage* root;
                verify(StageBuilder::build(*qs, &root, &ws));
                CachedPlanRunner* cpr = new CachedPlanRunner(collection,
                                                             canonicalQuery.release(), qs,
                                                             root, ws);

                if (NULL != backupQs) {
                    WorkingSet* backupWs;
                    PlanStage* backupRoot;
                    verify(StageBuilder::build(*backupQs, &backupRoot, &backupWs));
                    cpr->setBackupPlan(backupQs, backupRoot, backupWs);
                }

                *out = cpr;
                return Status::OK();
            }
        }

        if (enableIndexIntersection) {
            plannerParams.options |= QueryPlannerParams::INDEX_INTERSECTION;
        }

        plannerParams.options |= QueryPlannerParams::KEEP_MUTATIONS;

        vector<QuerySolution*> solutions;
        Status status = QueryPlanner::plan(*canonicalQuery, plannerParams, &solutions);
        if (!status.isOK()) {
            return Status(ErrorCodes::BadValue,
                          "error processing query: " + canonicalQuery->toString() +
                          " planner returned error: " + status.reason());
        }

        // We cannot figure out how to answer the query.  Perhaps it requires an index
        // we do not have?
        if (0 == solutions.size()) {
            return Status(ErrorCodes::BadValue,
                          str::stream()
                          << "error processing query: "
                          << canonicalQuery->toString()
                          << " No query solutions");
        }

        // See if one of our solutions is a fast count hack in disguise.
        if (plannerParams.options & QueryPlannerParams::PRIVATE_IS_COUNT) {
            for (size_t i = 0; i < solutions.size(); ++i) {
                if (turnIxscanIntoCount(solutions[i])) {
                    // Great, we can use solutions[i].  Clean up the other QuerySolution(s).
                    for (size_t j = 0; j < solutions.size(); ++j) {
                        if (j != i) {
                            delete solutions[j];
                        }
                    }

                    // We're not going to cache anything that's fast count.
                    WorkingSet* ws;
                    PlanStage* root;
                    verify(StageBuilder::build(*solutions[i], &root, &ws));
                    *out = new SingleSolutionRunner(collection,
                                                    canonicalQuery.release(),
                                                    solutions[i],
                                                    root,
                                                    ws);
                    return Status::OK();
                }
            }
        }

        if (1 == solutions.size()) {
            // Only one possible plan.  Run it.  Build the stages from the solution.
            WorkingSet* ws;
            PlanStage* root;
            verify(StageBuilder::build(*solutions[0], &root, &ws));

            // And, run the plan.
            *out = new SingleSolutionRunner(collection,
                                            canonicalQuery.release(),solutions[0], root, ws);
            return Status::OK();
        }
        else {
            // See SERVER-12438. In an ideal world we should not arbitrarily prefer certain
            // solutions over others. But unfortunately for historical reasons we are forced
            // to prefer a solution where the index provides the sort, if the batch size
            // is set and a sort is requested. Read SERVER-12438 for details, if you dare.
            //
            // TODO: it would be really nice to delete this entire block in the future.
            if (0 < canonicalQuery->getParsed().getNumToReturn()
                && !canonicalQuery->getParsed().getSort().isEmpty()) {
                // Look for a solution without a blocking sort stage.
                for (size_t i = 0; i < solutions.size(); ++i) {
                    if (!solutions[i]->hasSortStage) {
                        WorkingSet* ws;
                        PlanStage* root;
                        verify(StageBuilder::build(*solutions[i], &root, &ws));

                        // Free unused solutions.
                        for (size_t j = 0; j < solutions.size(); ++j) {
                            if (j != i) {
                                delete solutions[j];
                            }
                        }

                        // And, run the plan.
                        *out = new SingleSolutionRunner(collection,
                                                       canonicalQuery.release(),
                                                       solutions[i], root, ws);
                        return Status::OK();
                    }
                }
            }

            // Many solutions.  Let the MultiPlanRunner pick the best, update the cache, and so on.
            auto_ptr<MultiPlanRunner> mpr(new MultiPlanRunner(collection,canonicalQuery.release()));

            for (size_t i = 0; i < solutions.size(); ++i) {
                WorkingSet* ws;
                PlanStage* root;
                if (solutions[i]->cacheData.get()) {
                    solutions[i]->cacheData->indexFilterApplied = plannerParams.indexFiltersApplied;
                }
                verify(StageBuilder::build(*solutions[i], &root, &ws));
                // Takes ownership of all arguments.
                mpr->addPlan(solutions[i], root, ws);
            }
            *out = mpr.release();
            return Status::OK();
        }
    }
Beispiel #27
0
    Status getRunnerDistinct(Collection* collection,
                             const BSONObj& query,
                             const string& field,
                             Runner** out) {
        // This should'a been checked by the distinct command.
        verify(collection);

        // TODO: check for idhack here?

        // When can we do a fast distinct hack?
        // 1. There is a plan with just one leaf and that leaf is an ixscan.
        // 2. The ixscan indexes the field we're interested in.
        // 2a: We are correct if the index contains the field but for now we look for prefix.
        // 3. The query is covered/no fetch.
        //
        // We go through normal planning (with limited parameters) to see if we can produce
        // a soln with the above properties.

        QueryPlannerParams plannerParams;
        plannerParams.options = QueryPlannerParams::NO_TABLE_SCAN;

        IndexCatalog::IndexIterator ii = collection->getIndexCatalog()->getIndexIterator(false);
        while (ii.more()) {
            const IndexDescriptor* desc = ii.next();
            // The distinct hack can work if any field is in the index but it's not always clear
            // if it's a win unless it's the first field.
            if (desc->keyPattern().firstElement().fieldName() == field) {
                plannerParams.indices.push_back(IndexEntry(desc->keyPattern(),
                                                           desc->isMultikey(),
                                                           desc->isSparse(),
                                                           desc->indexName(),
                                                           desc->infoObj()));
            }
        }

        // We only care about the field that we're projecting over.  Have to drop the _id field
        // explicitly because those are .find() semantics.
        //
        // Applying a projection allows the planner to try to give us covered plans.
        BSONObj projection;
        if ("_id" == field) {
            projection = BSON("_id" << 1);
        }
        else {
            projection = BSON("_id" << 0 << field << 1);
        }

        // Apply a projection of the key.  Empty BSONObj() is for the sort.
        CanonicalQuery* cq;
        Status status = CanonicalQuery::canonicalize(collection->ns().ns(), query, BSONObj(), projection, &cq);
        if (!status.isOK()) {
            return status;
        }

        // No index has the field we're looking for.  Punt to normal planning.
        if (plannerParams.indices.empty()) {
            // Takes ownership of cq.
            return getRunner(cq, out);
        }

        // If we're here, we have an index prefixed by the field we're distinct-ing over.

        // If there's no query, we can just distinct-scan one of the indices.
        if (query.isEmpty()) {
            DistinctNode* dn = new DistinctNode();
            dn->indexKeyPattern = plannerParams.indices[0].keyPattern;
            dn->direction = 1;
            IndexBoundsBuilder::allValuesBounds(dn->indexKeyPattern, &dn->bounds);
            dn->fieldNo = 0;

            QueryPlannerParams params;

            // Takes ownership of 'dn'.
            QuerySolution* soln = QueryPlannerAnalysis::analyzeDataAccess(*cq, params, dn);
            verify(soln);

            WorkingSet* ws;
            PlanStage* root;
            verify(StageBuilder::build(*soln, &root, &ws));
            *out = new SingleSolutionRunner(collection, cq, soln, root, ws);
            return Status::OK();
        }

        // See if we can answer the query in a fast-distinct compatible fashion.
        vector<QuerySolution*> solutions;
        status = QueryPlanner::plan(*cq, plannerParams, &solutions);
        if (!status.isOK()) {
            return getRunner(cq, out);
        }

        // XXX: why do we need to do this?  planner should prob do this internally
        cq->root()->resetTag();

        // We look for a solution that has an ixscan we can turn into a distinctixscan
        for (size_t i = 0; i < solutions.size(); ++i) {
            if (turnIxscanIntoDistinctIxscan(solutions[i], field)) {
                // Great, we can use solutions[i].  Clean up the other QuerySolution(s).
                for (size_t j = 0; j < solutions.size(); ++j) {
                    if (j != i) {
                        delete solutions[j];
                    }
                }

                // Build and return the SSR over solutions[i].
                WorkingSet* ws;
                PlanStage* root;
                verify(StageBuilder::build(*solutions[i], &root, &ws));
                *out = new SingleSolutionRunner(collection, cq, solutions[i], root, ws);
                return Status::OK();
            }
        }

        // If we're here, the planner made a soln with the restricted index set but we couldn't
        // translate any of them into a distinct-compatible soln.  So, delete the solutions and just
        // go through normal planning.
        for (size_t i = 0; i < solutions.size(); ++i) {
            delete solutions[i];
        }

        return getRunner(cq, out);
    }
    Status repairDatabase( string dbName,
                           bool preserveClonedFilesOnFailure,
                           bool backupOriginalFiles ) {
        scoped_ptr<RepairFileDeleter> repairFileDeleter;
        doingRepair dr;
        dbName = nsToDatabase( dbName );

        log() << "repairDatabase " << dbName << endl;

        invariant( cc().database()->name() == dbName );
        invariant( cc().database()->path() == storageGlobalParams.dbpath );

        BackgroundOperation::assertNoBgOpInProgForDb(dbName);

        getDur().syncDataAndTruncateJournal(); // Must be done before and after repair

        intmax_t totalSize = dbSize( dbName );
        intmax_t freeSize = File::freeSpace(storageGlobalParams.repairpath);

        if ( freeSize > -1 && freeSize < totalSize ) {
            return Status( ErrorCodes::OutOfDiskSpace,
                           str::stream() << "Cannot repair database " << dbName
                           << " having size: " << totalSize
                           << " (bytes) because free disk space is: " << freeSize << " (bytes)" );
        }

        killCurrentOp.checkForInterrupt();

        Path reservedPath =
            uniqueReservedPath( ( preserveClonedFilesOnFailure || backupOriginalFiles ) ?
                                "backup" : "_tmp" );
        MONGO_ASSERT_ON_EXCEPTION( boost::filesystem::create_directory( reservedPath ) );
        string reservedPathString = reservedPath.string();

        if ( !preserveClonedFilesOnFailure )
            repairFileDeleter.reset( new RepairFileDeleter( dbName,
                                                            reservedPathString,
                                                            reservedPath ) );

        {
            Database* originalDatabase = dbHolder().get( dbName, storageGlobalParams.dbpath );
            if ( originalDatabase == NULL )
                return Status( ErrorCodes::NamespaceNotFound, "database does not exist to repair" );

            Database* tempDatabase = NULL;
            {
                bool justCreated = false;
                tempDatabase = dbHolderW().getOrCreate( dbName, reservedPathString, justCreated );
                invariant( justCreated );
            }

            map<string,CollectionOptions> namespacesToCopy;
            {
                string ns = dbName + ".system.namespaces";
                Client::Context ctx( ns );
                Collection* coll = originalDatabase->getCollection( ns );
                if ( coll ) {
                    scoped_ptr<CollectionIterator> it( coll->getIterator( DiskLoc(),
                                                                          false,
                                                                          CollectionScanParams::FORWARD ) );
                    while ( !it->isEOF() ) {
                        DiskLoc loc = it->getNext();
                        BSONObj obj = coll->docFor( loc );

                        string ns = obj["name"].String();

                        NamespaceString nss( ns );
                        if ( nss.isSystem() ) {
                            if ( nss.isSystemDotIndexes() )
                                continue;
                            if ( nss.coll() == "system.namespaces" )
                                continue;
                        }

                        if ( !nss.isNormal() )
                            continue;

                        CollectionOptions options;
                        if ( obj["options"].isABSONObj() ) {
                            Status status = options.parse( obj["options"].Obj() );
                            if ( !status.isOK() )
                                return status;
                        }
                        namespacesToCopy[ns] = options;
                    }
                }
            }

            for ( map<string,CollectionOptions>::const_iterator i = namespacesToCopy.begin();
                  i != namespacesToCopy.end();
                  ++i ) {
                string ns = i->first;
                CollectionOptions options = i->second;

                Collection* tempCollection = NULL;
                {
                    Client::Context tempContext( ns, tempDatabase );
                    tempCollection = tempDatabase->createCollection( ns, options, true, false );
                }

                Client::Context readContext( ns, originalDatabase );
                Collection* originalCollection = originalDatabase->getCollection( ns );
                invariant( originalCollection );

                // data

                MultiIndexBlock indexBlock( tempCollection );
                {
                    vector<BSONObj> indexes;
                    IndexCatalog::IndexIterator ii =
                        originalCollection->getIndexCatalog()->getIndexIterator( false );
                    while ( ii.more() ) {
                        IndexDescriptor* desc = ii.next();
                        indexes.push_back( desc->infoObj() );
                    }

                    Client::Context tempContext( ns, tempDatabase );
                    Status status = indexBlock.init( indexes );
                    if ( !status.isOK() )
                        return status;

                }

                scoped_ptr<CollectionIterator> iterator( originalCollection->getIterator( DiskLoc(),
                                                                                          false,
                                                                                          CollectionScanParams::FORWARD ) );
                while ( !iterator->isEOF() ) {
                    DiskLoc loc = iterator->getNext();
                    invariant( !loc.isNull() );

                    BSONObj doc = originalCollection->docFor( loc );

                    Client::Context tempContext( ns, tempDatabase );
                    StatusWith<DiskLoc> result = tempCollection->insertDocument( doc, indexBlock );
                    if ( !result.isOK() )
                        return result.getStatus();

                    getDur().commitIfNeeded();
                    killCurrentOp.checkForInterrupt(false);
                }

                {
                    Client::Context tempContext( ns, tempDatabase );
                    Status status = indexBlock.commit();
                    if ( !status.isOK() )
                        return status;
                }

            }

            getDur().syncDataAndTruncateJournal();
            MongoFile::flushAll(true); // need both in case journaling is disabled

            killCurrentOp.checkForInterrupt(false);

            Client::Context tempContext( dbName, reservedPathString );
            Database::closeDatabase( dbName, reservedPathString );
        }

        // at this point if we abort, we don't want to delete new files
        // as they might be the only copies

        if ( repairFileDeleter.get() )
            repairFileDeleter->success();

        Client::Context ctx( dbName );
        Database::closeDatabase(dbName, storageGlobalParams.dbpath);

        if ( backupOriginalFiles ) {
            _renameForBackup( dbName, reservedPath );
        }
        else {
            // first make new directory before deleting data
            Path newDir = Path(storageGlobalParams.dbpath) / dbName;
            MONGO_ASSERT_ON_EXCEPTION(boost::filesystem::create_directory(newDir));

            // this deletes old files
            _deleteDataFiles( dbName );

            if ( !boost::filesystem::exists(newDir) ) {
                // we deleted because of directoryperdb
                // re-create
                MONGO_ASSERT_ON_EXCEPTION(boost::filesystem::create_directory(newDir));
            }
        }

        _replaceWithRecovered( dbName, reservedPathString.c_str() );

        if ( !backupOriginalFiles )
            MONGO_ASSERT_ON_EXCEPTION( boost::filesystem::remove_all( reservedPath ) );

        return Status::OK();
    }
Beispiel #29
0
        virtual bool run(OperationContext* txn,
                         const string& dbname,
                         BSONObj& cmdObj,
                         int,
                         string& errmsg,
                         BSONObjBuilder& result,
                         bool fromRepl) {
            Lock::GlobalWrite globalWriteLock(txn->lockState());
            string source = cmdObj.getStringField( name.c_str() );
            string target = cmdObj.getStringField( "to" );

            // We stay in source context the whole time. This is mostly to set the CurOp namespace.
            Client::Context ctx(txn, source);

            if ( !NamespaceString::validCollectionComponent(target.c_str()) ) {
                errmsg = "invalid collection name: " + target;
                return false;
            }
            if ( source.empty() || target.empty() ) {
                errmsg = "invalid command syntax";
                return false;
            }

            if (!fromRepl) { // If it got through on the master, need to allow it here too
                Status sourceStatus = userAllowedWriteNS(source);
                if (!sourceStatus.isOK()) {
                    errmsg = "error with source namespace: " + sourceStatus.reason();
                    return false;
                }

                Status targetStatus = userAllowedWriteNS(target);
                if (!targetStatus.isOK()) {
                    errmsg = "error with target namespace: " + targetStatus.reason();
                    return false;
                }
            }

            if (NamespaceString(source).coll() == "system.indexes"
                || NamespaceString(target).coll() == "system.indexes") {
                errmsg = "renaming system.indexes is not allowed";
                return false;
            }

            Database* const sourceDB = dbHolder().get(txn, nsToDatabase(source));
            Collection* const sourceColl = sourceDB ? sourceDB->getCollection(txn, source)
                                                    : NULL;
            if (!sourceColl) {
                errmsg = "source namespace does not exist";
                return false;
            }

            {
                // Ensure that collection name does not exceed maximum length.
                // Ensure that index names do not push the length over the max.
                // Iterator includes unfinished indexes.
                IndexCatalog::IndexIterator sourceIndIt =
                    sourceColl->getIndexCatalog()->getIndexIterator( txn, true );
                int longestIndexNameLength = 0;
                while ( sourceIndIt.more() ) {
                    int thisLength = sourceIndIt.next()->indexName().length();
                    if ( thisLength > longestIndexNameLength )
                        longestIndexNameLength = thisLength;
                }

                unsigned int longestAllowed =
                    min(int(NamespaceString::MaxNsCollectionLen),
                        int(NamespaceString::MaxNsLen) - 2/*strlen(".$")*/ - longestIndexNameLength);
                if (target.size() > longestAllowed) {
                    StringBuilder sb;
                    sb << "collection name length of " << target.size()
                       << " exceeds maximum length of " << longestAllowed
                       << ", allowing for index names";
                    errmsg = sb.str();
                    return false;
                }
            }

            const std::vector<BSONObj> indexesInProg = stopIndexBuilds(txn, sourceDB, cmdObj);
            // Dismissed on success
            ScopeGuard indexBuildRestorer = MakeGuard(IndexBuilder::restoreIndexes, indexesInProg);

            Database* const targetDB = dbHolder().openDb(txn, nsToDatabase(target));

            {
                WriteUnitOfWork wunit(txn);

                // Check if the target namespace exists and if dropTarget is true.
                // If target exists and dropTarget is not true, return false.
                if (targetDB->getCollection(txn, target)) {
                    if (!cmdObj["dropTarget"].trueValue()) {
                        errmsg = "target namespace exists";
                        return false;
                    }

                    Status s = targetDB->dropCollection(txn, target);
                    if ( !s.isOK() ) {
                        errmsg = s.toString();
                        return false;
                    }
                }

                // If we are renaming in the same database, just
                // rename the namespace and we're done.
                if (sourceDB == targetDB) {
                    Status s = targetDB->renameCollection(txn,
                                                          source,
                                                          target,
                                                          cmdObj["stayTemp"].trueValue() );
                    if (!s.isOK()) {
                        return appendCommandStatus(result, s);
                    }

                    if (!fromRepl) {
                        repl::logOp(txn, "c", (dbname + ".$cmd").c_str(), cmdObj);
                    }

                    wunit.commit();
                    indexBuildRestorer.Dismiss();
                    return true;
                }

                wunit.commit();
            }

            // If we get here, we are renaming across databases, so we must copy all the data and
            // indexes, then remove the source collection.

            // Create the target collection. It will be removed if we fail to copy the collection.
            // TODO use a temp collection and unset the temp flag on success.
            Collection* targetColl = NULL;
            {
                CollectionOptions options;
                options.setNoIdIndex();

                if (sourceColl->isCapped()) {
                    const CollectionOptions sourceOpts =
                        sourceColl->getCatalogEntry()->getCollectionOptions(txn);

                    options.capped = true;
                    options.cappedSize = sourceOpts.cappedSize;
                    options.cappedMaxDocs = sourceOpts.cappedMaxDocs;
                }

                WriteUnitOfWork wunit(txn);

                // No logOp necessary because the entire renameCollection command is one logOp.
                targetColl = targetDB->createCollection(txn, target, options);
                if (!targetColl) {
                    errmsg = "Failed to create target collection.";
                    return false;
                }

                wunit.commit();
            }

            // Dismissed on success
            ScopeGuard targetCollectionDropper = MakeGuard(dropCollection, txn, targetDB, target);

            MultiIndexBlock indexer(txn, targetColl);
            indexer.allowInterruption();

            // Copy the index descriptions from the source collection, adjusting the ns field.
            {
                std::vector<BSONObj> indexesToCopy;
                IndexCatalog::IndexIterator sourceIndIt =
                    sourceColl->getIndexCatalog()->getIndexIterator( txn, true );
                while (sourceIndIt.more()) {
                    const BSONObj currIndex = sourceIndIt.next()->infoObj();

                    // Process the source index.
                    BSONObjBuilder newIndex;
                    newIndex.append("ns", target);
                    newIndex.appendElementsUnique(currIndex);
                    indexesToCopy.push_back(newIndex.obj());
                }
                indexer.init(indexesToCopy);
            }

            {
                // Copy over all the data from source collection to target collection.
                boost::scoped_ptr<RecordIterator> sourceIt(sourceColl->getIterator(txn));
                while (!sourceIt->isEOF()) {
                    txn->checkForInterrupt(false);

                    const BSONObj obj = sourceColl->docFor(txn, sourceIt->getNext());

                    WriteUnitOfWork wunit(txn);
                    // No logOp necessary because the entire renameCollection command is one logOp.
                    Status status = targetColl->insertDocument(txn, obj, &indexer, true).getStatus();
                    if (!status.isOK())
                        return appendCommandStatus(result, status);
                    wunit.commit();
                }
            }

            Status status = indexer.doneInserting();
            if (!status.isOK())
                return appendCommandStatus(result, status);

            {
                // Getting here means we successfully built the target copy. We now remove the
                // source collection and finalize the rename.
                WriteUnitOfWork wunit(txn);

                Status status = sourceDB->dropCollection(txn, source);
                if (!status.isOK())
                    return appendCommandStatus(result, status);

                indexer.commit();

                if (!fromRepl) {
                    repl::logOp(txn, "c", (dbname + ".$cmd").c_str(), cmdObj);
                }

                wunit.commit();
            }

            indexBuildRestorer.Dismiss();
            targetCollectionDropper.Dismiss();
            return true;
        }
Beispiel #30
0
    Status getRunnerDistinct(Collection* collection,
                             const BSONObj& query,
                             const string& field,
                             Runner** out) {
        // This should'a been checked by the distinct command.
        verify(collection);

        // TODO: check for idhack here?

        // When can we do a fast distinct hack?
        // 1. There is a plan with just one leaf and that leaf is an ixscan.
        // 2. The ixscan indexes the field we're interested in.
        // 2a: We are correct if the index contains the field but for now we look for prefix.
        // 3. The query is covered/no fetch.
        //
        // We go through normal planning (with limited parameters) to see if we can produce
        // a soln with the above properties.

        QueryPlannerParams plannerParams;
        plannerParams.options = QueryPlannerParams::NO_TABLE_SCAN;

        IndexCatalog::IndexIterator ii = collection->getIndexCatalog()->getIndexIterator(false);
        while (ii.more()) {
            const IndexDescriptor* desc = ii.next();
            // The distinct hack can work if any field is in the index but it's not always clear
            // if it's a win unless it's the first field.
            if (desc->keyPattern().firstElement().fieldName() == field) {
                plannerParams.indices.push_back(IndexEntry(desc->keyPattern(),
                                                           desc->getAccessMethodName(),
                                                           desc->isMultikey(),
                                                           desc->isSparse(),
                                                           desc->indexName(),
                                                           desc->infoObj()));
            }
        }

        // If there are no suitable indices for the distinct hack bail out now into regular planning
        // with no projection.
        if (plannerParams.indices.empty()) {
            CanonicalQuery* cq;
            Status status = CanonicalQuery::canonicalize(collection->ns().ns(),
                                                         query,
                                                         BSONObj(),
                                                         BSONObj(),
                                                         &cq);
            if (!status.isOK()) {
                return status;
            }

            // Takes ownership of cq.
            return getRunner(collection, cq, out);
        }

        //
        // If we're here, we have an index prefixed by the field we're distinct-ing over.
        //

        // Applying a projection allows the planner to try to give us covered plans that we can turn
        // into the projection hack.  getDistinctProjection deals with .find() projection semantics
        // (ie _id:1 being implied by default).
        BSONObj projection = getDistinctProjection(field);

        // Apply a projection of the key.  Empty BSONObj() is for the sort.
        CanonicalQuery* cq;
        Status status = CanonicalQuery::canonicalize(collection->ns().ns(),
                                                     query,
                                                     BSONObj(),
                                                     projection,
                                                     &cq);
        if (!status.isOK()) {
            return status;
        }

        // If there's no query, we can just distinct-scan one of the indices.
        // Not every index in plannerParams.indices may be suitable. Refer to
        // getDistinctNodeIndex().
        size_t distinctNodeIndex = 0;
        if (query.isEmpty() &&
            getDistinctNodeIndex(plannerParams.indices, field, &distinctNodeIndex)) {
            DistinctNode* dn = new DistinctNode();
            dn->indexKeyPattern = plannerParams.indices[distinctNodeIndex].keyPattern;
            dn->direction = 1;
            IndexBoundsBuilder::allValuesBounds(dn->indexKeyPattern, &dn->bounds);
            dn->fieldNo = 0;

            QueryPlannerParams params;

            // Takes ownership of 'dn'.
            QuerySolution* soln = QueryPlannerAnalysis::analyzeDataAccess(*cq, params, dn);
            verify(soln);

            LOG(2) << "Using fast distinct: " << cq->toStringShort()
                   << ", planSummary: " << getPlanSummary(*soln);

            WorkingSet* ws;
            PlanStage* root;
            verify(StageBuilder::build(collection, *soln, &root, &ws));
            *out = new SingleSolutionRunner(collection, cq, soln, root, ws);
            return Status::OK();
        }

        // See if we can answer the query in a fast-distinct compatible fashion.
        vector<QuerySolution*> solutions;
        status = QueryPlanner::plan(*cq, plannerParams, &solutions);
        if (!status.isOK()) {
            return getRunner(collection, cq, out);
        }

        // We look for a solution that has an ixscan we can turn into a distinctixscan
        for (size_t i = 0; i < solutions.size(); ++i) {
            if (turnIxscanIntoDistinctIxscan(solutions[i], field)) {
                // Great, we can use solutions[i].  Clean up the other QuerySolution(s).
                for (size_t j = 0; j < solutions.size(); ++j) {
                    if (j != i) {
                        delete solutions[j];
                    }
                }

                LOG(2) << "Using fast distinct: " << cq->toStringShort()
                       << ", planSummary: " << getPlanSummary(*solutions[i]);

                // Build and return the SSR over solutions[i].
                WorkingSet* ws;
                PlanStage* root;
                verify(StageBuilder::build(collection, *solutions[i], &root, &ws));
                *out = new SingleSolutionRunner(collection, cq, solutions[i], root, ws);
                return Status::OK();
            }
        }

        // If we're here, the planner made a soln with the restricted index set but we couldn't
        // translate any of them into a distinct-compatible soln.  So, delete the solutions and just
        // go through normal planning.
        for (size_t i = 0; i < solutions.size(); ++i) {
            delete solutions[i];
        }

        // We drop the projection from the 'cq'.  Unfortunately this is not trivial.
        delete cq;
        status = CanonicalQuery::canonicalize(collection->ns().ns(),
                                              query,
                                              BSONObj(),
                                              BSONObj(),
                                              &cq);
        if (!status.isOK()) {
            return status;
        }

        // Takes ownership of cq.
        return getRunner(collection, cq, out);
    }