Exemple #1
0
    Status Database::renameCollection( OperationContext* txn,
                                       const StringData& fromNS,
                                       const StringData& toNS,
                                       bool stayTemp ) {

        audit::logRenameCollection( currentClient.get(), fromNS, toNS );

        { // remove anything cached
            Collection* coll = getCollection( txn, fromNS );
            if ( !coll )
                return Status( ErrorCodes::NamespaceNotFound, "collection not found to rename" );
            IndexCatalog::IndexIterator ii = coll->getIndexCatalog()->getIndexIterator( true );
            while ( ii.more() ) {
                IndexDescriptor* desc = ii.next();
                _clearCollectionCache( desc->indexNamespace() );
            }

            {
                scoped_lock lk( _collectionLock );
                _clearCollectionCache_inlock( fromNS );
                _clearCollectionCache_inlock( toNS );
            }

            Top::global.collectionDropped( fromNS.toString() );
        }

        return _dbEntry->renameCollection( txn, fromNS, toNS, stayTemp );
    }
Exemple #2
0
    long long Database::getIndexSizeForCollection(OperationContext* opCtx,
                                                  Collection* coll,
                                                  BSONObjBuilder* details,
                                                  int scale ) {
        if ( !coll )
            return 0;

        IndexCatalog::IndexIterator ii =
            coll->getIndexCatalog()->getIndexIterator( true /*includeUnfinishedIndexes*/ );

        long long totalSize = 0;

        while ( ii.more() ) {
            IndexDescriptor* d = ii.next();
            string indNS = d->indexNamespace();

            // XXX creating a Collection for an index which isn't a Collection
            Collection* indColl = getCollection( opCtx, indNS );
            if ( ! indColl ) {
                log() << "error: have index descriptor ["  << indNS
                      << "] but no entry in the index collection." << endl;
                continue;
            }
            totalSize += indColl->dataSize();
            if ( details ) {
                long long const indexSize = indColl->dataSize() / scale;
                details->appendNumber( d->indexName() , indexSize );
            }
        }
        return totalSize;
    }
IndexAccessMethod* KVDatabaseCatalogEntry::getIndex(OperationContext* opCtx,
                                                    const CollectionCatalogEntry* collection,
                                                    IndexCatalogEntry* index) {
    IndexDescriptor* desc = index->descriptor();

    const std::string& type = desc->getAccessMethodName();

    std::string ident =
        _engine->getCatalog()->getIndexIdent(opCtx, collection->ns().ns(), desc->indexName());

    SortedDataInterface* sdi =
        _engine->getEngine()->getGroupedSortedDataInterface(opCtx, ident, desc, index->getPrefix());

    if ("" == type)
        return new BtreeAccessMethod(index, sdi);

    if (IndexNames::HASHED == type)
        return new HashAccessMethod(index, sdi);

    if (IndexNames::GEO_2DSPHERE == type)
        return new S2AccessMethod(index, sdi);

    if (IndexNames::TEXT == type)
        return new FTSAccessMethod(index, sdi);

    if (IndexNames::GEO_HAYSTACK == type)
        return new HaystackAccessMethod(index, sdi);

    if (IndexNames::GEO_2D == type)
        return new TwoDAccessMethod(index, sdi);

    log() << "Can't find index for keyPattern " << desc->keyPattern();
    invariant(false);
}
Exemple #4
0
Status Database::renameCollection(OperationContext* txn,
                                  StringData fromNS,
                                  StringData toNS,
                                  bool stayTemp) {
    audit::logRenameCollection(&cc(), fromNS, toNS);
    invariant(txn->lockState()->isDbLockedForMode(name(), MODE_X));
    BackgroundOperation::assertNoBgOpInProgForNs(fromNS);
    BackgroundOperation::assertNoBgOpInProgForNs(toNS);

    {  // remove anything cached
        Collection* coll = getCollection(fromNS);
        if (!coll)
            return Status(ErrorCodes::NamespaceNotFound, "collection not found to rename");

        string clearCacheReason = str::stream() << "renamed collection '" << fromNS << "' to '"
                                                << toNS << "'";
        IndexCatalog::IndexIterator ii = coll->getIndexCatalog()->getIndexIterator(txn, true);
        while (ii.more()) {
            IndexDescriptor* desc = ii.next();
            _clearCollectionCache(txn, desc->indexNamespace(), clearCacheReason);
        }

        _clearCollectionCache(txn, fromNS, clearCacheReason);
        _clearCollectionCache(txn, toNS, clearCacheReason);

        Top::get(txn->getClient()->getServiceContext()).collectionDropped(fromNS.toString());
    }

    txn->recoveryUnit()->registerChange(new AddCollectionChange(txn, this, toNS));
    Status s = _dbEntry->renameCollection(txn, fromNS, toNS, stayTemp);
    _collections[toNS] = _getOrCreateCollectionInstance(txn, toNS);
    return s;
}
Exemple #5
0
    void run() {
        OperationContextImpl txn;
        Client::WriteContext ctx(&txn, _ns);

        int numFinishedIndexesStart = _catalog->numIndexesReady(&txn);

        Helpers::ensureIndex(&txn, _coll, BSON("x" << 1), false, "_x_0");
        Helpers::ensureIndex(&txn, _coll, BSON("y" << 1), false, "_y_0");

        ASSERT_TRUE(_catalog->numIndexesReady(&txn) == numFinishedIndexesStart+2);

        IndexCatalog::IndexIterator ii = _catalog->getIndexIterator(&txn,false);
        int indexesIterated = 0;
        bool foundIndex = false;
        while (ii.more()) {
            IndexDescriptor* indexDesc = ii.next();
            indexesIterated++;
            BSONObjIterator boit(indexDesc->infoObj());
            while (boit.more() && !foundIndex) {
                BSONElement e = boit.next();
                if (str::equals(e.fieldName(), "name") &&
                        str::equals(e.valuestrsafe(), "_y_0")) {
                    foundIndex = true;
                    break;
                }
            }
        }

        ctx.commit();
        ASSERT_TRUE(indexesIterated == _catalog->numIndexesReady(&txn));
        ASSERT_TRUE(foundIndex);
    }
Exemple #6
0
    Status Database::renameCollection( OperationContext* txn,
                                       StringData fromNS,
                                       StringData toNS,
                                       bool stayTemp ) {

        audit::logRenameCollection( currentClient.get(), fromNS, toNS );
        invariant(txn->lockState()->isDbLockedForMode(name(), MODE_X));

        { // remove anything cached
            Collection* coll = getCollection( fromNS );
            if ( !coll )
                return Status( ErrorCodes::NamespaceNotFound, "collection not found to rename" );
            IndexCatalog::IndexIterator ii = coll->getIndexCatalog()->getIndexIterator( txn, true );
            while ( ii.more() ) {
                IndexDescriptor* desc = ii.next();
                _clearCollectionCache( txn, desc->indexNamespace() );
            }

            _clearCollectionCache( txn, fromNS );
            _clearCollectionCache( txn, toNS );

            Top::global.collectionDropped( fromNS.toString() );
        }

        txn->recoveryUnit()->registerChange( new AddCollectionChange(this, toNS) );
        Status s =  _dbEntry->renameCollection( txn, fromNS, toNS, stayTemp );
        _collections[toNS] =  _getOrCreateCollectionInstance(txn, toNS);
        return s;
    }
 void run() {
     IndexDescriptor* id = addIndexWithInfo();
     // Create a SortPhaseOne.
     SortPhaseOne phaseOne;
     phaseOne.sorter.reset( new BSONObjExternalSorter(_aFirstSort));
     // Add index keys to the phaseOne.
     int32_t nKeys = 130;
     for( int32_t i = 0; i < nKeys; ++i ) {
         phaseOne.sorter->add( BSON( "a" << i ), /* dummy disk loc */ DiskLoc(), false );
     }
     phaseOne.nkeys = phaseOne.n = nKeys;
     phaseOne.sorter->sort( false );
     // Set up remaining arguments.
     set<DiskLoc> dups;
     CurOp* op = cc().curop();
     ProgressMeterHolder pm (op->setMessage("BuildBottomUp",
                                            "BuildBottomUp Progress",
                                            nKeys,
                                            nKeys));
     pm.finished();
     Timer timer;
     // The index's root has not yet been set.
     ASSERT( id->getHead().isNull() );
     // Finish building the index.
     buildBottomUpPhases2And3<V1>( true,
                                   id,
                                   *phaseOne.sorter,
                                   false,
                                   dups,
                                   op,
                                   &phaseOne,
                                   pm,
                                   timer,
                                   true );
     // The index's root is set after the build is complete.
     ASSERT( !id->getHead().isNull() );
     // Create a cursor over the index.
     scoped_ptr<BtreeCursor> cursor(
             BtreeCursor::make( nsdetails( _ns ),
                                id->getOnDisk(),
                                BSON( "" << -1 ),    // startKey below minimum key.
                                BSON( "" << nKeys ), // endKey above maximum key.
                                true,                // endKeyInclusive true.
                                1                    // direction forward.
                                ) );
     // Check that the keys in the index are the expected ones.
     int32_t expectedKey = 0;
     for( ; cursor->ok(); cursor->advance(), ++expectedKey ) {
         ASSERT_EQUALS( expectedKey, cursor->currKey().firstElement().number() );
     }
     ASSERT_EQUALS( nKeys, expectedKey );
 }
    void CollectionInfoCache::computeIndexKeys() {
        DEV Lock::assertWriteLocked( _collection->ns().ns() );

        _indexedPaths.clear();

        IndexCatalog::IndexIterator i = _collection->getIndexCatalog()->getIndexIterator(true);
        while (i.more()) {
            IndexDescriptor* descriptor = i.next();

            if (descriptor->getAccessMethodName() != IndexNames::TEXT) {
                BSONObj key = descriptor->keyPattern();
                BSONObjIterator j(key);
                while (j.more()) {
                    BSONElement e = j.next();
                    _indexedPaths.addPath(e.fieldName());
                }
            }
            else {
                fts::FTSSpec ftsSpec(descriptor->infoObj());

                if (ftsSpec.wildcard()) {
                    _indexedPaths.allPathsIndexed();
                }
                else {
                    for (size_t i = 0; i < ftsSpec.numExtraBefore(); ++i) {
                        _indexedPaths.addPath(ftsSpec.extraBefore(i));
                    }
                    for (fts::Weights::const_iterator it = ftsSpec.weights().begin();
                         it != ftsSpec.weights().end();
                         ++it) {
                        _indexedPaths.addPath(it->first);
                    }
                    for (size_t i = 0; i < ftsSpec.numExtraAfter(); ++i) {
                        _indexedPaths.addPath(ftsSpec.extraAfter(i));
                    }
                    // Any update to a path containing "language" as a component could change the
                    // language of a subdocument.  Add the override field as a path component.
                    _indexedPaths.addPathComponent(ftsSpec.languageOverrideField());
                }
            }
        }

        _keysComputed = true;

    }
Exemple #9
0
uint64_t Collection::getIndexSize(OperationContext* opCtx, BSONObjBuilder* details, int scale) {
    IndexCatalog* idxCatalog = getIndexCatalog();

    IndexCatalog::IndexIterator ii = idxCatalog->getIndexIterator(opCtx, true);

    uint64_t totalSize = 0;

    while (ii.more()) {
        IndexDescriptor* d = ii.next();
        IndexAccessMethod* iam = idxCatalog->getIndex(d);

        long long ds = iam->getSpaceUsedBytes(opCtx);

        totalSize += ds;
        if (details) {
            details->appendNumber(d->indexName(), ds / scale);
        }
    }

    return totalSize;
}
        void run() {
            Client::WriteContext ctx(_ns);
            int numFinishedIndexesStart = _catalog->numIndexesReady();

            BSONObjBuilder b1;
            b1.append("key", BSON("x" << 1));
            b1.append("ns", _ns);
            b1.append("name", "_x_0");
            _catalog->createIndex(b1.obj(), true);

            BSONObjBuilder b2;
            b2.append("key", BSON("y" << 1));
            b2.append("ns", _ns);
            b2.append("name", "_y_0");
            _catalog->createIndex(b2.obj(), true);

            ASSERT_TRUE(_catalog->numIndexesReady() == numFinishedIndexesStart+2);

            IndexCatalog::IndexIterator ii = _catalog->getIndexIterator(false);
            int indexesIterated = 0;
            bool foundIndex = false;
            while (ii.more()) {
                IndexDescriptor* indexDesc = ii.next();
                indexesIterated++;
                BSONObjIterator boit(indexDesc->infoObj());
                while (boit.more() && !foundIndex) {
                    BSONElement e = boit.next();
                    if (str::equals(e.fieldName(), "name") &&
                            str::equals(e.valuestrsafe(), "_y_0")) {
                        foundIndex = true;
                        break;
                    }
                }
            }

            ASSERT_TRUE(indexesIterated == _catalog->numIndexesReady());
            ASSERT_TRUE(foundIndex);
        }
Exemple #11
0
    StatusWith<CompactStats> Collection::compact( OperationContext* txn,
                                                  const CompactOptions* compactOptions ) {
        if ( !_recordStore->compactSupported() )
            return StatusWith<CompactStats>( ErrorCodes::BadValue,
                                             str::stream() <<
                                             "cannot compact collection with record store: " <<
                                             _recordStore->name() );

        if ( _indexCatalog.numIndexesInProgress() )
            return StatusWith<CompactStats>( ErrorCodes::BadValue,
                                             "cannot compact when indexes in progress" );


        // same data, but might perform a little different after compact?
        _infoCache.reset();

        vector<BSONObj> indexSpecs;
        {
            IndexCatalog::IndexIterator ii( _indexCatalog.getIndexIterator( false ) );
            while ( ii.more() ) {
                IndexDescriptor* descriptor = ii.next();

                const BSONObj spec = _compactAdjustIndexSpec(descriptor->infoObj());
                const BSONObj key = spec.getObjectField("key");
                const Status keyStatus = validateKeyPattern(key);
                if (!keyStatus.isOK()) {
                    return StatusWith<CompactStats>(
                        ErrorCodes::CannotCreateIndex,
                        str::stream() << "Cannot compact collection due to invalid index "
                                      << spec << ": " << keyStatus.reason() << " For more info see"
                                      << " http://dochub.mongodb.org/core/index-validation");
                }
                indexSpecs.push_back(spec);
            }
        }

        // note that the drop indexes call also invalidates all clientcursors for the namespace,
        // which is important and wanted here
        log() << "compact dropping indexes" << endl;
        Status status = _indexCatalog.dropAllIndexes(txn, true);
        if ( !status.isOK() ) {
            return StatusWith<CompactStats>( status );
        }

        txn->checkForInterrupt();

        CompactStats stats;

        MultiIndexBlock multiIndexBlock(txn, this);
        status = multiIndexBlock.init( indexSpecs );
        if ( !status.isOK() )
            return StatusWith<CompactStats>( status );

        MyCompactAdaptor adaptor(this, &multiIndexBlock);

        _recordStore->compact( txn, &adaptor, compactOptions, &stats );

        log() << "starting index commits";
        status = multiIndexBlock.commit();
        if ( !status.isOK() )
            return StatusWith<CompactStats>( status );

        return StatusWith<CompactStats>( stats );
    }
Status MigrationChunkClonerSourceLegacy::_storeCurrentLocs(OperationContext* txn) {
    ScopedTransaction scopedXact(txn, MODE_IS);
    AutoGetCollection autoColl(txn, _args.getNss(), MODE_IS);

    Collection* const collection = autoColl.getCollection();
    if (!collection) {
        return {ErrorCodes::NamespaceNotFound,
                str::stream() << "Collection " << _args.getNss().ns() << " does not exist."};
    }

    // Allow multiKey based on the invariant that shard keys must be single-valued. Therefore, any
    // multi-key index prefixed by shard key cannot be multikey over the shard key fields.
    IndexDescriptor* idx =
        collection->getIndexCatalog()->findShardKeyPrefixedIndex(txn,
                                                                 _shardKeyPattern.toBSON(),
                                                                 false);  // requireSingleKey
    if (!idx) {
        return {ErrorCodes::IndexNotFound,
                str::stream() << "can't find index with prefix " << _shardKeyPattern.toBSON()
                              << " in storeCurrentLocs for "
                              << _args.getNss().ns()};
    }

    // Install the stage, which will listen for notifications on the collection
    {
        stdx::lock_guard<stdx::mutex> sl(_mutex);

        invariant(!_deleteNotifyExec);

        // Takes ownership of 'ws' and 'dns'.
        auto statusWithPlanExecutor =
            PlanExecutor::make(txn,
                               stdx::make_unique<WorkingSet>(),
                               stdx::make_unique<DeleteNotificationStage>(this, txn),
                               collection,
                               PlanExecutor::YIELD_MANUAL);
        invariant(statusWithPlanExecutor.isOK());

        _deleteNotifyExec = std::move(statusWithPlanExecutor.getValue());
        _deleteNotifyExec->registerExec(collection);
    }

    // Assume both min and max non-empty, append MinKey's to make them fit chosen index
    const KeyPattern kp(idx->keyPattern());

    BSONObj min = Helpers::toKeyFormat(kp.extendRangeBound(_args.getMinKey(), false));
    BSONObj max = Helpers::toKeyFormat(kp.extendRangeBound(_args.getMaxKey(), false));

    std::unique_ptr<PlanExecutor> exec(InternalPlanner::indexScan(txn,
                                                                  collection,
                                                                  idx,
                                                                  min,
                                                                  max,
                                                                  false,  // endKeyInclusive
                                                                  PlanExecutor::YIELD_MANUAL));

    // We can afford to yield here because any change to the base data that we might miss is already
    // being queued and will migrate in the 'transferMods' stage.
    exec->setYieldPolicy(PlanExecutor::YIELD_AUTO, collection);

    // Use the average object size to estimate how many objects a full chunk would carry do that
    // while traversing the chunk's range using the sharding index, below there's a fair amount of
    // slack before we determine a chunk is too large because object sizes will vary.
    unsigned long long maxRecsWhenFull;
    long long avgRecSize;

    const long long totalRecs = collection->numRecords(txn);
    if (totalRecs > 0) {
        avgRecSize = collection->dataSize(txn) / totalRecs;
        maxRecsWhenFull = _args.getMaxChunkSizeBytes() / avgRecSize;
        maxRecsWhenFull = std::min((unsigned long long)(Chunk::MaxObjectPerChunk + 1),
                                   130 * maxRecsWhenFull / 100 /* slack */);
    } else {
        avgRecSize = 0;
        maxRecsWhenFull = Chunk::MaxObjectPerChunk + 1;
    }

    // Do a full traversal of the chunk and don't stop even if we think it is a large chunk we want
    // the number of records to better report, in that case.
    bool isLargeChunk = false;
    unsigned long long recCount = 0;

    BSONObj obj;
    RecordId recordId;
    PlanExecutor::ExecState state;
    while (PlanExecutor::ADVANCED == (state = exec->getNext(&obj, &recordId))) {
        if (!isLargeChunk) {
            stdx::lock_guard<stdx::mutex> lk(_mutex);
            _cloneLocs.insert(recordId);
        }

        if (++recCount > maxRecsWhenFull) {
            isLargeChunk = true;
            // Continue on despite knowing that it will fail, just to get the correct value for
            // recCount
        }
    }

    if (PlanExecutor::DEAD == state || PlanExecutor::FAILURE == state) {
        return {ErrorCodes::InternalError,
                str::stream() << "Executor error while scanning for documents belonging to chunk: "
                              << WorkingSetCommon::toStatusString(obj)};
    }

    exec.reset();

    if (isLargeChunk) {
        return {
            ErrorCodes::ChunkTooBig,
            str::stream() << "Cannot move chunk: the maximum number of documents for a chunk is "
                          << maxRecsWhenFull
                          << ", the maximum chunk size is "
                          << _args.getMaxChunkSizeBytes()
                          << ", average document size is "
                          << avgRecSize
                          << ". Found "
                          << recCount
                          << " documents in chunk "
                          << " ns: "
                          << _args.getNss().ns()
                          << " "
                          << _args.getMinKey()
                          << " -> "
                          << _args.getMaxKey()};
    }

    _averageObjectSizeForCloneLocs = static_cast<uint64_t>(collection->averageObjectSize(txn) + 12);

    return Status::OK();
}
Exemple #13
0
StatusWith<RecordId> Collection::updateDocument(OperationContext* txn,
                                                const RecordId& oldLocation,
                                                const Snapshotted<BSONObj>& oldDoc,
                                                const BSONObj& newDoc,
                                                bool enforceQuota,
                                                bool indexesAffected,
                                                OpDebug* debug,
                                                oplogUpdateEntryArgs& args) {
    {
        auto status = checkValidation(txn, newDoc);
        if (!status.isOK()) {
            if (_validationLevel == STRICT_V) {
                return status;
            }
            // moderate means we have to check the old doc
            auto oldDocStatus = checkValidation(txn, oldDoc.value());
            if (oldDocStatus.isOK()) {
                // transitioning from good -> bad is not ok
                return status;
            }
            // bad -> bad is ok in moderate mode
        }
    }

    dassert(txn->lockState()->isCollectionLockedForMode(ns().toString(), MODE_IX));
    invariant(oldDoc.snapshotId() == txn->recoveryUnit()->getSnapshotId());

    if (_needCappedLock) {
        // X-lock the metadata resource for this capped collection until the end of the WUOW. This
        // prevents the primary from executing with more concurrency than secondaries.
        // See SERVER-21646.
        Lock::ResourceLock{txn->lockState(), ResourceId(RESOURCE_METADATA, _ns.ns()), MODE_X};
    }

    SnapshotId sid = txn->recoveryUnit()->getSnapshotId();

    BSONElement oldId = oldDoc.value()["_id"];
    if (!oldId.eoo() && (oldId != newDoc["_id"]))
        return StatusWith<RecordId>(
            ErrorCodes::InternalError, "in Collection::updateDocument _id mismatch", 13596);

    // The MMAPv1 storage engine implements capped collections in a way that does not allow records
    // to grow beyond their original size. If MMAPv1 part of a replicaset with storage engines that
    // do not have this limitation, replication could result in errors, so it is necessary to set a
    // uniform rule here. Similarly, it is not sufficient to disallow growing records, because this
    // happens when secondaries roll back an update shrunk a record. Exactly replicating legacy
    // MMAPv1 behavior would require padding shrunk documents on all storage engines. Instead forbid
    // all size changes.
    const auto oldSize = oldDoc.value().objsize();
    if (_recordStore->isCapped() && oldSize != newDoc.objsize())
        return {ErrorCodes::CannotGrowDocumentInCappedNamespace,
                str::stream() << "Cannot change the size of a document in a capped collection: "
                              << oldSize << " != " << newDoc.objsize()};

    // At the end of this step, we will have a map of UpdateTickets, one per index, which
    // represent the index updates needed to be done, based on the changes between oldDoc and
    // newDoc.
    OwnedPointerMap<IndexDescriptor*, UpdateTicket> updateTickets;
    if (indexesAffected) {
        IndexCatalog::IndexIterator ii = _indexCatalog.getIndexIterator(txn, true);
        while (ii.more()) {
            IndexDescriptor* descriptor = ii.next();
            IndexCatalogEntry* entry = ii.catalogEntry(descriptor);
            IndexAccessMethod* iam = ii.accessMethod(descriptor);

            InsertDeleteOptions options;
            options.logIfError = false;
            options.dupsAllowed =
                !(KeyPattern::isIdKeyPattern(descriptor->keyPattern()) || descriptor->unique()) ||
                repl::getGlobalReplicationCoordinator()->shouldIgnoreUniqueIndex(descriptor);
            UpdateTicket* updateTicket = new UpdateTicket();
            updateTickets.mutableMap()[descriptor] = updateTicket;
            Status ret = iam->validateUpdate(txn,
                                             oldDoc.value(),
                                             newDoc,
                                             oldLocation,
                                             options,
                                             updateTicket,
                                             entry->getFilterExpression());
            if (!ret.isOK()) {
                return StatusWith<RecordId>(ret);
            }
        }
    }

    // This can call back into Collection::recordStoreGoingToMove.  If that happens, the old
    // object is removed from all indexes.
    StatusWith<RecordId> newLocation = _recordStore->updateRecord(
        txn, oldLocation, newDoc.objdata(), newDoc.objsize(), _enforceQuota(enforceQuota), this);

    if (!newLocation.isOK()) {
        return newLocation;
    }

    // At this point, the old object may or may not still be indexed, depending on if it was
    // moved. If the object did move, we need to add the new location to all indexes.
    if (newLocation.getValue() != oldLocation) {
        if (debug) {
            if (debug->nmoved == -1)  // default of -1 rather than 0
                debug->nmoved = 1;
            else
                debug->nmoved += 1;
        }

        std::vector<BsonRecord> bsonRecords;
        BsonRecord bsonRecord = {newLocation.getValue(), &newDoc};
        bsonRecords.push_back(bsonRecord);
        Status s = _indexCatalog.indexRecords(txn, bsonRecords);
        if (!s.isOK())
            return StatusWith<RecordId>(s);
        invariant(sid == txn->recoveryUnit()->getSnapshotId());
        args.ns = ns().ns();
        getGlobalServiceContext()->getOpObserver()->onUpdate(txn, args);

        return newLocation;
    }

    // Object did not move.  We update each index with each respective UpdateTicket.

    if (debug)
        debug->keyUpdates = 0;

    if (indexesAffected) {
        IndexCatalog::IndexIterator ii = _indexCatalog.getIndexIterator(txn, true);
        while (ii.more()) {
            IndexDescriptor* descriptor = ii.next();
            IndexAccessMethod* iam = ii.accessMethod(descriptor);

            int64_t updatedKeys;
            Status ret = iam->update(txn, *updateTickets.mutableMap()[descriptor], &updatedKeys);
            if (!ret.isOK())
                return StatusWith<RecordId>(ret);
            if (debug)
                debug->keyUpdates += updatedKeys;
        }
    }

    invariant(sid == txn->recoveryUnit()->getSnapshotId());
    args.ns = ns().ns();
    getGlobalServiceContext()->getOpObserver()->onUpdate(txn, args);

    return newLocation;
}
    Status MMAPV1Engine::repairDatabase( OperationContext* txn,
                                         const std::string& dbName,
                                         bool preserveClonedFilesOnFailure,
                                         bool backupOriginalFiles ) {
        // We must hold some form of lock here
        invariant(txn->lockState()->threadState());
        invariant( dbName.find( '.' ) == string::npos );

        scoped_ptr<RepairFileDeleter> repairFileDeleter;

        log() << "repairDatabase " << dbName << endl;

        BackgroundOperation::assertNoBgOpInProgForDb(dbName);

        txn->recoveryUnit()->syncDataAndTruncateJournal(); // Must be done before and after repair

        intmax_t totalSize = dbSize( dbName );
        intmax_t freeSize = File::freeSpace(storageGlobalParams.repairpath);

        if ( freeSize > -1 && freeSize < totalSize ) {
            return Status( ErrorCodes::OutOfDiskSpace,
                           str::stream() << "Cannot repair database " << dbName
                           << " having size: " << totalSize
                           << " (bytes) because free disk space is: " << freeSize << " (bytes)" );
        }

        txn->checkForInterrupt();

        Path reservedPath =
            uniqueReservedPath( ( preserveClonedFilesOnFailure || backupOriginalFiles ) ?
                                "backup" : "_tmp" );
        MONGO_ASSERT_ON_EXCEPTION( boost::filesystem::create_directory( reservedPath ) );
        string reservedPathString = reservedPath.string();

        if ( !preserveClonedFilesOnFailure )
            repairFileDeleter.reset( new RepairFileDeleter( txn,
                                                            dbName,
                                                            reservedPathString,
                                                            reservedPath ) );

        {
            Database* originalDatabase =
                            dbHolder().get(txn, dbName);
            if (originalDatabase == NULL) {
                return Status(ErrorCodes::NamespaceNotFound, "database does not exist to repair");
            }

            scoped_ptr<MMAPV1DatabaseCatalogEntry> dbEntry;
            scoped_ptr<Database> tempDatabase;
            {
                dbEntry.reset( new MMAPV1DatabaseCatalogEntry( txn,
                                                               dbName,
                                                               reservedPathString,
                                                               storageGlobalParams.directoryperdb,
                                                               true ) );
                invariant( !dbEntry->exists() );
                tempDatabase.reset( new Database( txn,
                                                  dbName,
                                                  dbEntry.get() ) );

            }

            map<string,CollectionOptions> namespacesToCopy;
            {
                string ns = dbName + ".system.namespaces";
                Client::Context ctx(txn,  ns );
                Collection* coll = originalDatabase->getCollection( txn, ns );
                if ( coll ) {
                    scoped_ptr<RecordIterator> it( coll->getIterator( txn,
                                                                      DiskLoc(),
                                                                      false,
                                                                      CollectionScanParams::FORWARD ) );
                    while ( !it->isEOF() ) {
                        DiskLoc loc = it->getNext();
                        BSONObj obj = coll->docFor( loc );

                        string ns = obj["name"].String();

                        NamespaceString nss( ns );
                        if ( nss.isSystem() ) {
                            if ( nss.isSystemDotIndexes() )
                                continue;
                            if ( nss.coll() == "system.namespaces" )
                                continue;
                        }

                        if ( !nss.isNormal() )
                            continue;

                        CollectionOptions options;
                        if ( obj["options"].isABSONObj() ) {
                            Status status = options.parse( obj["options"].Obj() );
                            if ( !status.isOK() )
                                return status;
                        }
                        namespacesToCopy[ns] = options;
                    }
                }
            }

            for ( map<string,CollectionOptions>::const_iterator i = namespacesToCopy.begin();
                  i != namespacesToCopy.end();
                  ++i ) {
                string ns = i->first;
                CollectionOptions options = i->second;

                Collection* tempCollection = NULL;
                {
                    Client::Context tempContext(txn, ns, tempDatabase );
                    WriteUnitOfWork wunit(txn);
                    tempCollection = tempDatabase->createCollection(txn, ns, options, true, false);
                    wunit.commit();
                }

                Client::Context readContext(txn, ns, originalDatabase);
                Collection* originalCollection = originalDatabase->getCollection( txn, ns );
                invariant( originalCollection );

                // data

                // TODO SERVER-14812 add a mode that drops duplicates rather than failing
                MultiIndexBlock indexer(txn, tempCollection );
                {
                    vector<BSONObj> indexes;
                    IndexCatalog::IndexIterator ii =
                        originalCollection->getIndexCatalog()->getIndexIterator( false );
                    while ( ii.more() ) {
                        IndexDescriptor* desc = ii.next();
                        indexes.push_back( desc->infoObj() );
                    }

                    Client::Context tempContext(txn, ns, tempDatabase);
                    Status status = indexer.init( indexes );
                    if ( !status.isOK() )
                        return status;
                }

                scoped_ptr<RecordIterator> iterator(
                    originalCollection->getIterator( txn, DiskLoc(), false,
                                                     CollectionScanParams::FORWARD ));
                while ( !iterator->isEOF() ) {
                    DiskLoc loc = iterator->getNext();
                    invariant( !loc.isNull() );

                    BSONObj doc = originalCollection->docFor( loc );

                    Client::Context tempContext(txn, ns, tempDatabase);
                    
                    WriteUnitOfWork wunit(txn);
                    StatusWith<DiskLoc> result = tempCollection->insertDocument(txn,
                                                                                doc,
                                                                                &indexer,
                                                                                false);
                    if ( !result.isOK() )
                        return result.getStatus();

                    wunit.commit();
                    txn->checkForInterrupt(false);
                }
                
                Status status = indexer.doneInserting();
                if (!status.isOK())
                    return status;

                {
                    Client::Context tempContext(txn, ns, tempDatabase);
                    WriteUnitOfWork wunit(txn);
                    indexer.commit();
                    wunit.commit();
                }

            }

            txn->recoveryUnit()->syncDataAndTruncateJournal();
            globalStorageEngine->flushAllFiles(true); // need both in case journaling is disabled

            txn->checkForInterrupt(false);
        }

        // at this point if we abort, we don't want to delete new files
        // as they might be the only copies

        if ( repairFileDeleter.get() )
            repairFileDeleter->success();

        dbHolder().close( txn, dbName );

        if ( backupOriginalFiles ) {
            _renameForBackup( dbName, reservedPath );
        }
        else {
            // first make new directory before deleting data
            Path newDir = Path(storageGlobalParams.dbpath) / dbName;
            MONGO_ASSERT_ON_EXCEPTION(boost::filesystem::create_directory(newDir));

            // this deletes old files
            _deleteDataFiles( dbName );

            if ( !boost::filesystem::exists(newDir) ) {
                // we deleted because of directoryperdb
                // re-create
                MONGO_ASSERT_ON_EXCEPTION(boost::filesystem::create_directory(newDir));
            }
        }

        _replaceWithRecovered( dbName, reservedPathString.c_str() );

        if ( !backupOriginalFiles )
            MONGO_ASSERT_ON_EXCEPTION( boost::filesystem::remove_all( reservedPath ) );

        return Status::OK();
    }
Exemple #15
0
 void run() {
     IndexDescriptor* id = addIndexWithInfo();
     // Create a SortPhaseOne.
     SortPhaseOne phaseOne;
     phaseOne.sorter.reset(new BSONObjExternalSorter(_aFirstSort));
     // It's necessary to index sufficient keys that a RARELY condition will be triggered,
     // but few enough keys that the btree builder will not create an internal node and check
     // for an interrupt internally (which would cause this test to pass spuriously).
     int32_t nKeys = 130;
     // Add index keys to the phaseOne.
     for( int32_t i = 0; i < nKeys; ++i ) {
         phaseOne.sorter->add( BSON( "a" << i ), /* dummy disk loc */ DiskLoc(), false );
     }
     phaseOne.nkeys = phaseOne.n = nKeys;
     phaseOne.sorter->sort( false );
     // Set up remaining arguments.
     set<DiskLoc> dups;
     CurOp* op = cc().curop();
     ProgressMeterHolder pm (op->setMessage("InterruptBuildBottomUp",
                                            "InterruptBuildBottomUp Progress",
                                            nKeys,
                                            nKeys));
     pm.finished();
     Timer timer;
     // The index's root has not yet been set.
     ASSERT( id->getHead().isNull() );
     // Register a request to kill the current operation.
     cc().curop()->kill();
     if ( _mayInterrupt ) {
         // The build is aborted due to the kill request.
         ASSERT_THROWS
                 ( buildBottomUpPhases2And3<V1>( true,
                                                 id,
                                                 *phaseOne.sorter,
                                                 false,
                                                 dups,
                                                 op,
                                                 &phaseOne,
                                                 pm,
                                                 timer,
                                                 _mayInterrupt ),
                   UserException );
         // The root of the index is not set because the build did not complete.
         ASSERT( id->getHead().isNull() );
     }
     else {
         // The build is aborted despite the kill request because mayInterrupt == false.
         buildBottomUpPhases2And3<V1>( true,
                                       id,
                                       *phaseOne.sorter,
                                       false,
                                       dups,
                                       op,
                                       &phaseOne,
                                       pm,
                                       timer,
                                       _mayInterrupt );
         // The index's root is set after the build is complete.
         ASSERT( !id->getHead().isNull() );
     }
 }
void CollectionInfoCache::computeIndexKeys(OperationContext* opCtx) {
    _indexedPaths.clear();

    bool hadTTLIndex = _hasTTLIndex;
    _hasTTLIndex = false;

    IndexCatalog::IndexIterator i = _collection->getIndexCatalog()->getIndexIterator(opCtx, true);
    while (i.more()) {
        IndexDescriptor* descriptor = i.next();

        if (descriptor->getAccessMethodName() != IndexNames::TEXT) {
            BSONObj key = descriptor->keyPattern();
            const BSONObj& infoObj = descriptor->infoObj();
            if (infoObj.hasField("expireAfterSeconds")) {
                _hasTTLIndex = true;
            }
            BSONObjIterator j(key);
            while (j.more()) {
                BSONElement e = j.next();
                _indexedPaths.addPath(e.fieldName());
            }
        } else {
            fts::FTSSpec ftsSpec(descriptor->infoObj());

            if (ftsSpec.wildcard()) {
                _indexedPaths.allPathsIndexed();
            } else {
                for (size_t i = 0; i < ftsSpec.numExtraBefore(); ++i) {
                    _indexedPaths.addPath(ftsSpec.extraBefore(i));
                }
                for (fts::Weights::const_iterator it = ftsSpec.weights().begin();
                     it != ftsSpec.weights().end();
                     ++it) {
                    _indexedPaths.addPath(it->first);
                }
                for (size_t i = 0; i < ftsSpec.numExtraAfter(); ++i) {
                    _indexedPaths.addPath(ftsSpec.extraAfter(i));
                }
                // Any update to a path containing "language" as a component could change the
                // language of a subdocument.  Add the override field as a path component.
                _indexedPaths.addPathComponent(ftsSpec.languageOverrideField());
            }
        }

        // handle partial indexes
        const IndexCatalogEntry* entry = i.catalogEntry(descriptor);
        const MatchExpression* filter = entry->getFilterExpression();
        if (filter) {
            unordered_set<std::string> paths;
            QueryPlannerIXSelect::getFields(filter, "", &paths);
            for (auto it = paths.begin(); it != paths.end(); ++it) {
                _indexedPaths.addPath(*it);
            }
        }
    }

    TTLCollectionCache& ttlCollectionCache = TTLCollectionCache::get(getGlobalServiceContext());

    if (_hasTTLIndex != hadTTLIndex) {
        if (_hasTTLIndex) {
            ttlCollectionCache.registerCollection(_collection->ns());
        } else {
            ttlCollectionCache.unregisterCollection(_collection->ns());
        }
    }

    _keysComputed = true;
}
Exemple #17
0
        bool run(const string& dbname, BSONObj& jsobj, int, string& errmsg, BSONObjBuilder& anObjBuilder, bool /*fromRepl*/) {
            BSONElement e = jsobj.firstElement();
            const string toDeleteNs = dbname + '.' + e.valuestr();
            if (!serverGlobalParams.quiet) {
                MONGO_TLOG(0) << "CMD: dropIndexes " << toDeleteNs << endl;
            }

            Lock::DBWrite dbXLock(dbname);
            Client::Context ctx(toDeleteNs);

            Collection* collection = cc().database()->getCollection( toDeleteNs );
            if ( ! collection ) {
                errmsg = "ns not found";
                return false;
            }

            stopIndexBuilds(cc().database(), jsobj);

            IndexCatalog* indexCatalog = collection->getIndexCatalog();
            anObjBuilder.appendNumber("nIndexesWas", indexCatalog->numIndexesTotal() );


            BSONElement f = jsobj.getField("index");
            if ( f.type() == String ) {

                string indexToDelete = f.valuestr();

                if ( indexToDelete == "*" ) {
                    Status s = indexCatalog->dropAllIndexes( false );
                    if ( !s.isOK() ) {
                        appendCommandStatus( anObjBuilder, s );
                        return false;
                    }
                    anObjBuilder.append("msg", "non-_id indexes dropped for collection");
                    return true;
                }

                IndexDescriptor* desc = collection->getIndexCatalog()->findIndexByName( indexToDelete );
                if ( desc == NULL ) {
                    errmsg = str::stream() << "index not found with name [" << indexToDelete << "]";
                    return false;
                }

                if ( desc->isIdIndex() ) {
                    errmsg = "cannot drop _id index";
                    return false;
                }

                Status s = indexCatalog->dropIndex( desc );
                if ( !s.isOK() ) {
                    appendCommandStatus( anObjBuilder, s );
                    return false;
                }

                return true;
            }

            if ( f.type() == Object ) {
                IndexDescriptor* desc = collection->getIndexCatalog()->findIndexByKeyPattern( f.embeddedObject() );
                if ( desc == NULL ) {
                    errmsg = "can't find index with key:";
                    errmsg += f.embeddedObject().toString();
                    return false;
                }

                if ( desc->isIdIndex() ) {
                    errmsg = "cannot drop _id index";
                    return false;
                }

                Status s = indexCatalog->dropIndex( desc );
                if ( !s.isOK() ) {
                    appendCommandStatus( anObjBuilder, s );
                    return false;
                }

                return true;
            }

            errmsg = "invalid index name spec";
            return false;
        }
bool MigrationSourceManager::storeCurrentLocs(OperationContext* txn,
                                              long long maxChunkSize,
                                              string& errmsg,
                                              BSONObjBuilder& result) {
    AutoGetCollection autoColl(txn, _getNS(), MODE_IS);

    Collection* collection = autoColl.getCollection();
    if (!collection) {
        errmsg = "ns not found, should be impossible";
        return false;
    }

    // Allow multiKey based on the invariant that shard keys must be single-valued. Therefore, any
    // multi-key index prefixed by shard key cannot be multikey over the shard key fields.
    IndexDescriptor* idx =
        collection->getIndexCatalog()->findShardKeyPrefixedIndex(txn,
                                                                 _shardKeyPattern,
                                                                 false);  // requireSingleKey

    if (idx == NULL) {
        errmsg = str::stream() << "can't find index with prefix " << _shardKeyPattern
                               << " in storeCurrentLocs for " << _nss.toString();
        return false;
    }

    // Assume both min and max non-empty, append MinKey's to make them fit chosen index
    BSONObj min;
    BSONObj max;
    KeyPattern kp(idx->keyPattern());

    {
        // It's alright not to lock _mutex all the way through based on the assumption that this is
        // only called by the main thread that drives the migration and only it can start and stop
        // the current migration.
        stdx::lock_guard<stdx::mutex> sl(_mutex);

        invariant(_deleteNotifyExec.get() == NULL);
        unique_ptr<WorkingSet> ws = stdx::make_unique<WorkingSet>();
        unique_ptr<DeleteNotificationStage> dns = stdx::make_unique<DeleteNotificationStage>(this);

        // Takes ownership of 'ws' and 'dns'.
        auto statusWithPlanExecutor = PlanExecutor::make(
            txn, std::move(ws), std::move(dns), collection, PlanExecutor::YIELD_MANUAL);
        invariant(statusWithPlanExecutor.isOK());

        _deleteNotifyExec = std::move(statusWithPlanExecutor.getValue());
        _deleteNotifyExec->registerExec();

        min = Helpers::toKeyFormat(kp.extendRangeBound(_min, false));
        max = Helpers::toKeyFormat(kp.extendRangeBound(_max, false));
    }

    unique_ptr<PlanExecutor> exec(InternalPlanner::indexScan(txn,
                                                             collection,
                                                             idx,
                                                             min,
                                                             max,
                                                             false,  // endKeyInclusive
                                                             PlanExecutor::YIELD_MANUAL));

    // We can afford to yield here because any change to the base data that we might miss is already
    // being queued and will migrate in the 'transferMods' stage.
    exec->setYieldPolicy(PlanExecutor::YIELD_AUTO);

    // Use the average object size to estimate how many objects a full chunk would carry do that
    // while traversing the chunk's range using the sharding index, below there's a fair amount of
    // slack before we determine a chunk is too large because object sizes will vary.
    unsigned long long maxRecsWhenFull;
    long long avgRecSize;

    const long long totalRecs = collection->numRecords(txn);
    if (totalRecs > 0) {
        avgRecSize = collection->dataSize(txn) / totalRecs;
        maxRecsWhenFull = maxChunkSize / avgRecSize;
        maxRecsWhenFull = std::min((unsigned long long)(Chunk::MaxObjectPerChunk + 1),
                                   130 * maxRecsWhenFull / 100 /* slack */);
    } else {
        avgRecSize = 0;
        maxRecsWhenFull = Chunk::MaxObjectPerChunk + 1;
    }

    // Do a full traversal of the chunk and don't stop even if we think it is a large chunk we want
    // the number of records to better report, in that case
    bool isLargeChunk = false;
    unsigned long long recCount = 0;

    RecordId recordId;
    while (PlanExecutor::ADVANCED == exec->getNext(NULL, &recordId)) {
        if (!isLargeChunk) {
            stdx::lock_guard<stdx::mutex> lk(_cloneLocsMutex);
            _cloneLocs.insert(recordId);
        }

        if (++recCount > maxRecsWhenFull) {
            isLargeChunk = true;
            // Continue on despite knowing that it will fail, just to get the correct value for
            // recCount
        }
    }

    exec.reset();

    if (isLargeChunk) {
        stdx::lock_guard<stdx::mutex> sl(_mutex);
        warning() << "cannot move chunk: the maximum number of documents for a chunk is "
                  << maxRecsWhenFull << " , the maximum chunk size is " << maxChunkSize
                  << " , average document size is " << avgRecSize << ". Found " << recCount
                  << " documents in chunk "
                  << " ns: " << _nss << " " << _min << " -> " << _max << migrateLog;

        result.appendBool("chunkTooBig", true);
        result.appendNumber("estimatedChunkSize", (long long)(recCount * avgRecSize));
        errmsg = "chunk too big to move";
        return false;
    }

    log() << "moveChunk number of documents: " << cloneLocsRemaining() << migrateLog;

    txn->recoveryUnit()->abandonSnapshot();
    return true;
}
Exemple #19
0
StatusWith<RecordId> Collection::updateDocument(OperationContext* txn,
                                                const RecordId& oldLocation,
                                                const Snapshotted<BSONObj>& oldDoc,
                                                const BSONObj& newDoc,
                                                bool enforceQuota,
                                                bool indexesAffected,
                                                OpDebug* debug,
                                                oplogUpdateEntryArgs& args) {
    {
        auto status = checkValidation(txn, newDoc);
        if (!status.isOK()) {
            if (_validationLevel == STRICT_V) {
                return status;
            }
            // moderate means we have to check the old doc
            auto oldDocStatus = checkValidation(txn, oldDoc.value());
            if (oldDocStatus.isOK()) {
                // transitioning from good -> bad is not ok
                return status;
            }
            // bad -> bad is ok in moderate mode
        }
    }

    dassert(txn->lockState()->isCollectionLockedForMode(ns().toString(), MODE_IX));
    invariant(oldDoc.snapshotId() == txn->recoveryUnit()->getSnapshotId());

    SnapshotId sid = txn->recoveryUnit()->getSnapshotId();

    BSONElement oldId = oldDoc.value()["_id"];
    if (!oldId.eoo() && (oldId != newDoc["_id"]))
        return StatusWith<RecordId>(
            ErrorCodes::InternalError, "in Collection::updateDocument _id mismatch", 13596);

    // At the end of this step, we will have a map of UpdateTickets, one per index, which
    // represent the index updates needed to be done, based on the changes between oldDoc and
    // newDoc.
    OwnedPointerMap<IndexDescriptor*, UpdateTicket> updateTickets;
    if (indexesAffected) {
        IndexCatalog::IndexIterator ii = _indexCatalog.getIndexIterator(txn, true);
        while (ii.more()) {
            IndexDescriptor* descriptor = ii.next();
            IndexCatalogEntry* entry = ii.catalogEntry(descriptor);
            IndexAccessMethod* iam = ii.accessMethod(descriptor);

            InsertDeleteOptions options;
            options.logIfError = false;
            options.dupsAllowed =
                !(KeyPattern::isIdKeyPattern(descriptor->keyPattern()) || descriptor->unique()) ||
                repl::getGlobalReplicationCoordinator()->shouldIgnoreUniqueIndex(descriptor);
            UpdateTicket* updateTicket = new UpdateTicket();
            updateTickets.mutableMap()[descriptor] = updateTicket;
            Status ret = iam->validateUpdate(txn,
                                             oldDoc.value(),
                                             newDoc,
                                             oldLocation,
                                             options,
                                             updateTicket,
                                             entry->getFilterExpression());
            if (!ret.isOK()) {
                return StatusWith<RecordId>(ret);
            }
        }
    }

    // This can call back into Collection::recordStoreGoingToMove.  If that happens, the old
    // object is removed from all indexes.
    StatusWith<RecordId> newLocation = _recordStore->updateRecord(
        txn, oldLocation, newDoc.objdata(), newDoc.objsize(), _enforceQuota(enforceQuota), this);

    if (!newLocation.isOK()) {
        return newLocation;
    }

    // At this point, the old object may or may not still be indexed, depending on if it was
    // moved. If the object did move, we need to add the new location to all indexes.
    if (newLocation.getValue() != oldLocation) {
        if (debug) {
            if (debug->nmoved == -1)  // default of -1 rather than 0
                debug->nmoved = 1;
            else
                debug->nmoved += 1;
        }

        Status s = _indexCatalog.indexRecord(txn, newDoc, newLocation.getValue());
        if (!s.isOK())
            return StatusWith<RecordId>(s);
        invariant(sid == txn->recoveryUnit()->getSnapshotId());
        args.ns = ns().ns();
        getGlobalServiceContext()->getOpObserver()->onUpdate(txn, args);

        return newLocation;
    }

    // Object did not move.  We update each index with each respective UpdateTicket.

    if (debug)
        debug->keyUpdates = 0;

    if (indexesAffected) {
        IndexCatalog::IndexIterator ii = _indexCatalog.getIndexIterator(txn, true);
        while (ii.more()) {
            IndexDescriptor* descriptor = ii.next();
            IndexAccessMethod* iam = ii.accessMethod(descriptor);

            int64_t updatedKeys;
            Status ret = iam->update(txn, *updateTickets.mutableMap()[descriptor], &updatedKeys);
            if (!ret.isOK())
                return StatusWith<RecordId>(ret);
            if (debug)
                debug->keyUpdates += updatedKeys;
        }
    }

    invariant(sid == txn->recoveryUnit()->getSnapshotId());
    args.ns = ns().ns();
    getGlobalServiceContext()->getOpObserver()->onUpdate(txn, args);

    return newLocation;
}
Exemple #20
0
    StatusWith<DiskLoc> Collection::updateDocument( OperationContext* txn,
                                                    const DiskLoc& oldLocation,
                                                    const BSONObj& objNew,
                                                    bool enforceQuota,
                                                    OpDebug* debug ) {

        BSONObj objOld = _recordStore->dataFor( txn, oldLocation ).toBson();

        if ( objOld.hasElement( "_id" ) ) {
            BSONElement oldId = objOld["_id"];
            BSONElement newId = objNew["_id"];
            if ( oldId != newId )
                return StatusWith<DiskLoc>( ErrorCodes::InternalError,
                                            "in Collection::updateDocument _id mismatch",
                                            13596 );
        }

        if ( ns().coll() == "system.users" ) {
            // XXX - andy and spencer think this should go away now
            V2UserDocumentParser parser;
            Status s = parser.checkValidUserDocument(objNew);
            if ( !s.isOK() )
                return StatusWith<DiskLoc>( s );
        }

        /* duplicate key check. we descend the btree twice - once for this check, and once for the actual inserts, further
           below.  that is suboptimal, but it's pretty complicated to do it the other way without rollbacks...
        */
        OwnedPointerMap<IndexDescriptor*,UpdateTicket> updateTickets;
        IndexCatalog::IndexIterator ii = _indexCatalog.getIndexIterator( txn, true );
        while ( ii.more() ) {
            IndexDescriptor* descriptor = ii.next();
            IndexAccessMethod* iam = _indexCatalog.getIndex( descriptor );

            InsertDeleteOptions options;
            options.logIfError = false;
            options.dupsAllowed =
                !(KeyPattern::isIdKeyPattern(descriptor->keyPattern()) || descriptor->unique())
                || repl::getGlobalReplicationCoordinator()->shouldIgnoreUniqueIndex(descriptor);
            UpdateTicket* updateTicket = new UpdateTicket();
            updateTickets.mutableMap()[descriptor] = updateTicket;
            Status ret = iam->validateUpdate(txn, objOld, objNew, oldLocation, options, updateTicket );
            if ( !ret.isOK() ) {
                return StatusWith<DiskLoc>( ret );
            }
        }

        // this can callback into Collection::recordStoreGoingToMove
        StatusWith<DiskLoc> newLocation = _recordStore->updateRecord( txn,
                                                                      oldLocation,
                                                                      objNew.objdata(),
                                                                      objNew.objsize(),
                                                                      _enforceQuota( enforceQuota ),
                                                                      this );

        if ( !newLocation.isOK() ) {
            return newLocation;
        }

        _infoCache.notifyOfWriteOp();

        if ( newLocation.getValue() != oldLocation ) {

            if ( debug ) {
                if (debug->nmoved == -1) // default of -1 rather than 0
                    debug->nmoved = 1;
                else
                    debug->nmoved += 1;
            }

            _indexCatalog.indexRecord(txn, objNew, newLocation.getValue());

            return newLocation;
        }

        if ( debug )
            debug->keyUpdates = 0;

        ii = _indexCatalog.getIndexIterator( txn, true );
        while ( ii.more() ) {
            IndexDescriptor* descriptor = ii.next();
            IndexAccessMethod* iam = _indexCatalog.getIndex( descriptor );

            int64_t updatedKeys;
            Status ret = iam->update(txn, *updateTickets.mutableMap()[descriptor], &updatedKeys);
            if ( !ret.isOK() )
                return StatusWith<DiskLoc>( ret );
            if ( debug )
                debug->keyUpdates += updatedKeys;
        }

        // Broadcast the mutation so that query results stay correct.
        _cursorCache.invalidateDocument(oldLocation, INVALIDATION_MUTATION);

        return newLocation;
    }
    StatusWith<CompactStats> Collection::compact( const CompactOptions* compactOptions ) {

        if ( isCapped() )
            return StatusWith<CompactStats>( ErrorCodes::BadValue,
                                             "cannot compact capped collection" );

        if ( _indexCatalog.numIndexesInProgress() )
            return StatusWith<CompactStats>( ErrorCodes::BadValue,
                                             "cannot compact when indexes in progress" );

        NamespaceDetails* d = details();

        // this is a big job, so might as well make things tidy before we start just to be nice.
        getDur().commitIfNeeded();

        list<DiskLoc> extents;
        for( DiskLoc L = d->firstExtent(); !L.isNull(); L = L.ext()->xnext )
            extents.push_back(L);
        log() << "compact " << extents.size() << " extents" << endl;

        // same data, but might perform a little different after compact?
        _infoCache.reset();

        vector<BSONObj> indexSpecs;
        {
            IndexCatalog::IndexIterator ii( _indexCatalog.getIndexIterator( false ) );
            while ( ii.more() ) {
                IndexDescriptor* descriptor = ii.next();
                indexSpecs.push_back( _compactAdjustIndexSpec( descriptor->infoObj() ) );
            }
        }

        log() << "compact orphan deleted lists" << endl;
        d->orphanDeletedList();

        // Start over from scratch with our extent sizing and growth
        d->setLastExtentSize( 0 );

        // before dropping indexes, at least make sure we can allocate one extent!
        if ( allocateSpaceForANewRecord( _ns.ns().c_str(),
                                         d,
                                         Record::HeaderSize+1,
                                         false).isNull() ) {
            return StatusWith<CompactStats>( ErrorCodes::InternalError,
                                             "compact error no space available to allocate" );
        }

        // note that the drop indexes call also invalidates all clientcursors for the namespace,
        // which is important and wanted here
        log() << "compact dropping indexes" << endl;
        Status status = _indexCatalog.dropAllIndexes( true );
        if ( !status.isOK() ) {
            return StatusWith<CompactStats>( status );
        }

        getDur().commitIfNeeded();

        CompactStats stats;

        OwnedPointerVector<IndexCatalog::IndexBuildBlock> indexBuildBlocks;
        vector<IndexAccessMethod*> indexesToInsertTo;
        vector< std::pair<IndexAccessMethod*,IndexAccessMethod*> > bulkToCommit;
        for ( size_t i = 0; i < indexSpecs.size(); i++ ) {
            killCurrentOp.checkForInterrupt(false);
            BSONObj info = indexSpecs[i];
            info = _compactAdjustIndexSpec( info );
            info = _indexCatalog.fixIndexSpec( info );
            auto_ptr<IndexCatalog::IndexBuildBlock> block( new IndexCatalog::IndexBuildBlock( this,info ) );
            Status status = block->init();
            if ( !status.isOK() )
                return StatusWith<CompactStats>(status);

            IndexAccessMethod* accessMethod = block->getEntry()->accessMethod();
            status = accessMethod->initializeAsEmpty();
            if ( !status.isOK() )
                return StatusWith<CompactStats>(status);

            IndexAccessMethod* bulk = accessMethod->initiateBulk();
            if ( bulk ) {
                indexesToInsertTo.push_back( bulk );
                bulkToCommit.push_back( std::pair<IndexAccessMethod*,IndexAccessMethod*>( accessMethod, bulk ) );
            }
            else {
                indexesToInsertTo.push_back( accessMethod );
            }

            indexBuildBlocks.mutableVector().push_back( block.release() );
        }

        // reset data size and record counts to 0 for this namespace
        // as we're about to tally them up again for each new extent
        d->setStats( 0, 0 );

        ProgressMeterHolder pm(cc().curop()->setMessage("compact extent",
                                                        "Extent Compacting Progress",
                                                        extents.size()));

        int extentNumber = 0;
        for( list<DiskLoc>::iterator i = extents.begin(); i != extents.end(); i++ ) {
            _compactExtent(*i, extentNumber++, indexesToInsertTo, compactOptions, &stats );
            pm.hit();
        }

        verify( d->firstExtent().ext()->xprev.isNull() );

        // indexes will do their own progress meter?
        pm.finished();

        log() << "starting index commits";

        for ( size_t i = 0; i < bulkToCommit.size(); i++ ) {
            bulkToCommit[i].first->commitBulk( bulkToCommit[i].second, false, NULL );
        }

        for ( size_t i = 0; i < indexBuildBlocks.size(); i++ ) {
            IndexCatalog::IndexBuildBlock* block = indexBuildBlocks.mutableVector()[i];
            block->success();
        }

        return StatusWith<CompactStats>( stats );
    }
Exemple #22
0
    Status MMAPV1Engine::repairDatabase( OperationContext* txn,
                                         const std::string& dbName,
                                         bool preserveClonedFilesOnFailure,
                                         bool backupOriginalFiles ) {
        unique_ptr<RepairFileDeleter> repairFileDeleter;

        // Must be done before and after repair
        getDur().syncDataAndTruncateJournal(txn);

        intmax_t totalSize = dbSize( dbName );
        intmax_t freeSize = File::freeSpace(storageGlobalParams.repairpath);

        if ( freeSize > -1 && freeSize < totalSize ) {
            return Status( ErrorCodes::OutOfDiskSpace,
                           str::stream() << "Cannot repair database " << dbName
                           << " having size: " << totalSize
                           << " (bytes) because free disk space is: " << freeSize << " (bytes)" );
        }

        txn->checkForInterrupt();

        Path reservedPath =
            uniqueReservedPath( ( preserveClonedFilesOnFailure || backupOriginalFiles ) ?
                                "backup" : "_tmp" );
        bool created = false;
        MONGO_ASSERT_ON_EXCEPTION( created = boost::filesystem::create_directory( reservedPath ) );
        invariant( created );
        string reservedPathString = reservedPath.string();

        if ( !preserveClonedFilesOnFailure )
            repairFileDeleter.reset( new RepairFileDeleter( txn,
                                                            dbName,
                                                            reservedPathString,
                                                            reservedPath ) );

        {
            Database* originalDatabase = dbHolder().openDb(txn, dbName);
            if (originalDatabase == NULL) {
                return Status(ErrorCodes::NamespaceNotFound, "database does not exist to repair");
            }

            unique_ptr<MMAPV1DatabaseCatalogEntry> dbEntry;
            unique_ptr<Database> tempDatabase;

            // Must call this before MMAPV1DatabaseCatalogEntry's destructor closes the DB files
            ON_BLOCK_EXIT(&dur::DurableInterface::syncDataAndTruncateJournal, &getDur(), txn);

            {
                dbEntry.reset(new MMAPV1DatabaseCatalogEntry(txn,
                                                             dbName,
                                                             reservedPathString,
                                                             storageGlobalParams.directoryperdb,
                                                             true));
                tempDatabase.reset( new Database(txn, dbName, dbEntry.get()));
            }

            map<string,CollectionOptions> namespacesToCopy;
            {
                string ns = dbName + ".system.namespaces";
                OldClientContext ctx(txn,  ns );
                Collection* coll = originalDatabase->getCollection( ns );
                if ( coll ) {
                    auto cursor = coll->getCursor(txn);
                    while (auto record = cursor->next()) {
                        BSONObj obj = record->data.releaseToBson();

                        string ns = obj["name"].String();

                        NamespaceString nss( ns );
                        if ( nss.isSystem() ) {
                            if ( nss.isSystemDotIndexes() )
                                continue;
                            if ( nss.coll() == "system.namespaces" )
                                continue;
                        }

                        if ( !nss.isNormal() )
                            continue;

                        CollectionOptions options;
                        if ( obj["options"].isABSONObj() ) {
                            Status status = options.parse( obj["options"].Obj() );
                            if ( !status.isOK() )
                                return status;
                        }
                        namespacesToCopy[ns] = options;
                    }
                }
            }

            for ( map<string,CollectionOptions>::const_iterator i = namespacesToCopy.begin();
                  i != namespacesToCopy.end();
                  ++i ) {
                string ns = i->first;
                CollectionOptions options = i->second;

                Collection* tempCollection = NULL;
                {
                    WriteUnitOfWork wunit(txn);
                    tempCollection = tempDatabase->createCollection(txn, ns, options, false);
                    wunit.commit();
                }

                OldClientContext readContext(txn, ns, originalDatabase);
                Collection* originalCollection = originalDatabase->getCollection( ns );
                invariant( originalCollection );

                // data

                // TODO SERVER-14812 add a mode that drops duplicates rather than failing
                MultiIndexBlock indexer(txn, tempCollection );
                {
                    vector<BSONObj> indexes;
                    IndexCatalog::IndexIterator ii =
                        originalCollection->getIndexCatalog()->getIndexIterator( txn, false );
                    while ( ii.more() ) {
                        IndexDescriptor* desc = ii.next();
                        indexes.push_back( desc->infoObj() );
                    }

                    Status status = indexer.init( indexes );
                    if (!status.isOK()) {
                        return status;
                    }
                }

                auto cursor = originalCollection->getCursor(txn);
                while (auto record = cursor->next()) {
                    BSONObj doc = record->data.releaseToBson();

                    WriteUnitOfWork wunit(txn);
                    StatusWith<RecordId> result = tempCollection->insertDocument(txn,
                                                                                 doc,
                                                                                 &indexer,
                                                                                 false);
                    if ( !result.isOK() )
                        return result.getStatus();

                    wunit.commit();
                    txn->checkForInterrupt();
                }
                
                Status status = indexer.doneInserting();
                if (!status.isOK())
                    return status;

                {
                    WriteUnitOfWork wunit(txn);
                    indexer.commit();
                    wunit.commit();
                }

            }

            getDur().syncDataAndTruncateJournal(txn);

            // need both in case journaling is disabled
            MongoFile::flushAll(true);

            txn->checkForInterrupt();
        }

        // at this point if we abort, we don't want to delete new files
        // as they might be the only copies

        if ( repairFileDeleter.get() )
            repairFileDeleter->success();

        // Close the database so we can rename/delete the original data files
        dbHolder().close(txn, dbName);

        if ( backupOriginalFiles ) {
            _renameForBackup( dbName, reservedPath );
        }
        else {
            // first make new directory before deleting data
            Path newDir = Path(storageGlobalParams.dbpath) / dbName;
            MONGO_ASSERT_ON_EXCEPTION(boost::filesystem::create_directory(newDir));

            // this deletes old files
            _deleteDataFiles( dbName );

            if ( !boost::filesystem::exists(newDir) ) {
                // we deleted because of directoryperdb
                // re-create
                MONGO_ASSERT_ON_EXCEPTION(boost::filesystem::create_directory(newDir));
            }
        }

        _replaceWithRecovered( dbName, reservedPathString.c_str() );

        if (!backupOriginalFiles) {
            MONGO_ASSERT_ON_EXCEPTION(boost::filesystem::remove_all(reservedPath));
        }

        // Reopen the database so it's discoverable
        dbHolder().openDb(txn, dbName);

        return Status::OK();
    }
    StatusWith<CompactStats> Collection::compact( const CompactOptions* compactOptions ) {

        if ( isCapped() )
            return StatusWith<CompactStats>( ErrorCodes::BadValue,
                                             "cannot compact capped collection" );

        if ( _indexCatalog.numIndexesInProgress() )
            return StatusWith<CompactStats>( ErrorCodes::BadValue,
                                             "cannot compact when indexes in progress" );

        NamespaceDetails* d = details();

        // this is a big job, so might as well make things tidy before we start just to be nice.
        getDur().commitIfNeeded();

        list<DiskLoc> extents;
        for( DiskLoc L = d->firstExtent(); !L.isNull(); L = L.ext()->xnext )
            extents.push_back(L);
        log() << "compact " << extents.size() << " extents" << endl;

        // same data, but might perform a little different after compact?
        _infoCache.reset();

        vector<BSONObj> indexSpecs;
        {
            IndexCatalog::IndexIterator ii( _indexCatalog.getIndexIterator( false ) );
            while ( ii.more() ) {
                IndexDescriptor* descriptor = ii.next();

                const BSONObj spec = _compactAdjustIndexSpec(descriptor->infoObj());
                const BSONObj key = spec.getObjectField("key");
                const Status keyStatus = validateKeyPattern(key);
                if (!keyStatus.isOK()) {
                    return StatusWith<CompactStats>(
                        ErrorCodes::CannotCreateIndex,
                        str::stream() << "Cannot rebuild index " << spec << ": "
                                      << keyStatus.reason()
                                      << " For more info see"
                                      << " http://dochub.mongodb.org/core/index-validation");
                }
                indexSpecs.push_back(spec);
            }
        }

        log() << "compact orphan deleted lists" << endl;
        d->orphanDeletedList();

        // Start over from scratch with our extent sizing and growth
        d->setLastExtentSize( 0 );

        // before dropping indexes, at least make sure we can allocate one extent!
        // this will allocate an extent and add to free list
        // if it cannot, it will throw an exception
        increaseStorageSize( _details->lastExtentSize(), true );

        // note that the drop indexes call also invalidates all clientcursors for the namespace,
        // which is important and wanted here
        log() << "compact dropping indexes" << endl;
        Status status = _indexCatalog.dropAllIndexes( true );
        if ( !status.isOK() ) {
            return StatusWith<CompactStats>( status );
        }

        getDur().commitIfNeeded();
        killCurrentOp.checkForInterrupt();

        CompactStats stats;

        MultiIndexBlock multiIndexBlock( this );
        status = multiIndexBlock.init( indexSpecs );
        if ( !status.isOK() )
            return StatusWith<CompactStats>( status );

        // reset data size and record counts to 0 for this namespace
        // as we're about to tally them up again for each new extent
        d->setStats( 0, 0 );

        ProgressMeterHolder pm(cc().curop()->setMessage("compact extent",
                                                        "Extent Compacting Progress",
                                                        extents.size()));

        int extentNumber = 0;
        for( list<DiskLoc>::iterator i = extents.begin(); i != extents.end(); i++ ) {
            _compactExtent(*i, extentNumber++, multiIndexBlock, compactOptions, &stats );
            pm.hit();
        }

        verify( d->firstExtent().ext()->xprev.isNull() );

        // indexes will do their own progress meter?
        pm.finished();

        log() << "starting index commits";

        status = multiIndexBlock.commit();
        if ( !status.isOK() )
            return StatusWith<CompactStats>( status );

        return StatusWith<CompactStats>( stats );
    }
StatusWith<CompactStats> compactCollection(OperationContext* opCtx,
                                           Collection* collection,
                                           const CompactOptions* compactOptions) {
    dassert(opCtx->lockState()->isCollectionLockedForMode(collection->ns().toString(), MODE_X));

    DisableDocumentValidation validationDisabler(opCtx);

    auto recordStore = collection->getRecordStore();
    auto indexCatalog = collection->getIndexCatalog();

    if (!recordStore->compactSupported())
        return StatusWith<CompactStats>(ErrorCodes::CommandNotSupported,
                                        str::stream()
                                            << "cannot compact collection with record store: "
                                            << recordStore->name());

    if (recordStore->compactsInPlace()) {
        CompactStats stats;
        Status status = recordStore->compact(opCtx);
        if (!status.isOK())
            return StatusWith<CompactStats>(status);

        // Compact all indexes (not including unfinished indexes)
        std::unique_ptr<IndexCatalog::IndexIterator> ii(
            indexCatalog->getIndexIterator(opCtx, false));
        while (ii->more()) {
            IndexCatalogEntry* entry = ii->next();
            IndexDescriptor* descriptor = entry->descriptor();
            IndexAccessMethod* iam = entry->accessMethod();

            LOG(1) << "compacting index: " << descriptor->toString();
            Status status = iam->compact(opCtx);
            if (!status.isOK()) {
                error() << "failed to compact index: " << descriptor->toString();
                return status;
            }
        }

        return StatusWith<CompactStats>(stats);
    }

    if (indexCatalog->numIndexesInProgress(opCtx))
        return StatusWith<CompactStats>(ErrorCodes::BadValue,
                                        "cannot compact when indexes in progress");

    std::vector<BSONObj> indexSpecs;
    {
        std::unique_ptr<IndexCatalog::IndexIterator> ii(
            indexCatalog->getIndexIterator(opCtx, false));
        while (ii->more()) {
            IndexDescriptor* descriptor = ii->next()->descriptor();

            // Compact always creates the new index in the foreground.
            const BSONObj spec =
                descriptor->infoObj().removeField(IndexDescriptor::kBackgroundFieldName);
            const BSONObj key = spec.getObjectField("key");
            const Status keyStatus =
                index_key_validate::validateKeyPattern(key, descriptor->version());
            if (!keyStatus.isOK()) {
                return StatusWith<CompactStats>(
                    ErrorCodes::CannotCreateIndex,
                    str::stream() << "Cannot compact collection due to invalid index " << spec
                                  << ": "
                                  << keyStatus.reason()
                                  << " For more info see"
                                  << " http://dochub.mongodb.org/core/index-validation");
            }
            indexSpecs.push_back(spec);
        }
    }

    // Give a chance to be interrupted *before* we drop all indexes.
    opCtx->checkForInterrupt();

    {
        // note that the drop indexes call also invalidates all clientcursors for the namespace,
        // which is important and wanted here
        WriteUnitOfWork wunit(opCtx);
        log() << "compact dropping indexes";
        indexCatalog->dropAllIndexes(opCtx, true);
        wunit.commit();
    }

    CompactStats stats;

    MultiIndexBlockImpl indexer(opCtx, collection);
    indexer.allowInterruption();
    indexer.ignoreUniqueConstraint();  // in compact we should be doing no checking

    Status status = indexer.init(indexSpecs).getStatus();
    if (!status.isOK())
        return StatusWith<CompactStats>(status);

    status = recordStore->compact(opCtx);
    if (!status.isOK())
        return StatusWith<CompactStats>(status);

    log() << "starting index commits";
    status = indexer.dumpInsertsFromBulk();
    if (!status.isOK())
        return StatusWith<CompactStats>(status);

    {
        WriteUnitOfWork wunit(opCtx);
        status = indexer.commit();
        if (!status.isOK()) {
            return StatusWith<CompactStats>(status);
        }
        wunit.commit();
    }

    return StatusWith<CompactStats>(stats);
}
    Status repairDatabase( string dbName,
                           bool preserveClonedFilesOnFailure,
                           bool backupOriginalFiles ) {
        scoped_ptr<RepairFileDeleter> repairFileDeleter;
        doingRepair dr;
        dbName = nsToDatabase( dbName );

        log() << "repairDatabase " << dbName << endl;

        invariant( cc().database()->name() == dbName );
        invariant( cc().database()->path() == storageGlobalParams.dbpath );

        BackgroundOperation::assertNoBgOpInProgForDb(dbName);

        getDur().syncDataAndTruncateJournal(); // Must be done before and after repair

        intmax_t totalSize = dbSize( dbName );
        intmax_t freeSize = File::freeSpace(storageGlobalParams.repairpath);

        if ( freeSize > -1 && freeSize < totalSize ) {
            return Status( ErrorCodes::OutOfDiskSpace,
                           str::stream() << "Cannot repair database " << dbName
                           << " having size: " << totalSize
                           << " (bytes) because free disk space is: " << freeSize << " (bytes)" );
        }

        killCurrentOp.checkForInterrupt();

        Path reservedPath =
            uniqueReservedPath( ( preserveClonedFilesOnFailure || backupOriginalFiles ) ?
                                "backup" : "_tmp" );
        MONGO_ASSERT_ON_EXCEPTION( boost::filesystem::create_directory( reservedPath ) );
        string reservedPathString = reservedPath.string();

        if ( !preserveClonedFilesOnFailure )
            repairFileDeleter.reset( new RepairFileDeleter( dbName,
                                                            reservedPathString,
                                                            reservedPath ) );

        {
            Database* originalDatabase = dbHolder().get( dbName, storageGlobalParams.dbpath );
            if ( originalDatabase == NULL )
                return Status( ErrorCodes::NamespaceNotFound, "database does not exist to repair" );

            Database* tempDatabase = NULL;
            {
                bool justCreated = false;
                tempDatabase = dbHolderW().getOrCreate( dbName, reservedPathString, justCreated );
                invariant( justCreated );
            }

            map<string,CollectionOptions> namespacesToCopy;
            {
                string ns = dbName + ".system.namespaces";
                Client::Context ctx( ns );
                Collection* coll = originalDatabase->getCollection( ns );
                if ( coll ) {
                    scoped_ptr<CollectionIterator> it( coll->getIterator( DiskLoc(),
                                                                          false,
                                                                          CollectionScanParams::FORWARD ) );
                    while ( !it->isEOF() ) {
                        DiskLoc loc = it->getNext();
                        BSONObj obj = coll->docFor( loc );

                        string ns = obj["name"].String();

                        NamespaceString nss( ns );
                        if ( nss.isSystem() ) {
                            if ( nss.isSystemDotIndexes() )
                                continue;
                            if ( nss.coll() == "system.namespaces" )
                                continue;
                        }

                        if ( !nss.isNormal() )
                            continue;

                        CollectionOptions options;
                        if ( obj["options"].isABSONObj() ) {
                            Status status = options.parse( obj["options"].Obj() );
                            if ( !status.isOK() )
                                return status;
                        }
                        namespacesToCopy[ns] = options;
                    }
                }
            }

            for ( map<string,CollectionOptions>::const_iterator i = namespacesToCopy.begin();
                  i != namespacesToCopy.end();
                  ++i ) {
                string ns = i->first;
                CollectionOptions options = i->second;

                Collection* tempCollection = NULL;
                {
                    Client::Context tempContext( ns, tempDatabase );
                    tempCollection = tempDatabase->createCollection( ns, options, true, false );
                }

                Client::Context readContext( ns, originalDatabase );
                Collection* originalCollection = originalDatabase->getCollection( ns );
                invariant( originalCollection );

                // data

                MultiIndexBlock indexBlock( tempCollection );
                {
                    vector<BSONObj> indexes;
                    IndexCatalog::IndexIterator ii =
                        originalCollection->getIndexCatalog()->getIndexIterator( false );
                    while ( ii.more() ) {
                        IndexDescriptor* desc = ii.next();
                        indexes.push_back( desc->infoObj() );
                    }

                    Client::Context tempContext( ns, tempDatabase );
                    Status status = indexBlock.init( indexes );
                    if ( !status.isOK() )
                        return status;

                }

                scoped_ptr<CollectionIterator> iterator( originalCollection->getIterator( DiskLoc(),
                                                                                          false,
                                                                                          CollectionScanParams::FORWARD ) );
                while ( !iterator->isEOF() ) {
                    DiskLoc loc = iterator->getNext();
                    invariant( !loc.isNull() );

                    BSONObj doc = originalCollection->docFor( loc );

                    Client::Context tempContext( ns, tempDatabase );
                    StatusWith<DiskLoc> result = tempCollection->insertDocument( doc, indexBlock );
                    if ( !result.isOK() )
                        return result.getStatus();

                    getDur().commitIfNeeded();
                    killCurrentOp.checkForInterrupt(false);
                }

                {
                    Client::Context tempContext( ns, tempDatabase );
                    Status status = indexBlock.commit();
                    if ( !status.isOK() )
                        return status;
                }

            }

            getDur().syncDataAndTruncateJournal();
            MongoFile::flushAll(true); // need both in case journaling is disabled

            killCurrentOp.checkForInterrupt(false);

            Client::Context tempContext( dbName, reservedPathString );
            Database::closeDatabase( dbName, reservedPathString );
        }

        // at this point if we abort, we don't want to delete new files
        // as they might be the only copies

        if ( repairFileDeleter.get() )
            repairFileDeleter->success();

        Client::Context ctx( dbName );
        Database::closeDatabase(dbName, storageGlobalParams.dbpath);

        if ( backupOriginalFiles ) {
            _renameForBackup( dbName, reservedPath );
        }
        else {
            // first make new directory before deleting data
            Path newDir = Path(storageGlobalParams.dbpath) / dbName;
            MONGO_ASSERT_ON_EXCEPTION(boost::filesystem::create_directory(newDir));

            // this deletes old files
            _deleteDataFiles( dbName );

            if ( !boost::filesystem::exists(newDir) ) {
                // we deleted because of directoryperdb
                // re-create
                MONGO_ASSERT_ON_EXCEPTION(boost::filesystem::create_directory(newDir));
            }
        }

        _replaceWithRecovered( dbName, reservedPathString.c_str() );

        if ( !backupOriginalFiles )
            MONGO_ASSERT_ON_EXCEPTION( boost::filesystem::remove_all( reservedPath ) );

        return Status::OK();
    }
Exemple #26
0
StatusWith<CompactStats> Collection::compact(OperationContext* txn,
                                             const CompactOptions* compactOptions) {
    dassert(txn->lockState()->isCollectionLockedForMode(ns().toString(), MODE_X));

    DisableDocumentValidation validationDisabler(txn);

    if (!_recordStore->compactSupported())
        return StatusWith<CompactStats>(ErrorCodes::CommandNotSupported,
                                        str::stream()
                                            << "cannot compact collection with record store: "
                                            << _recordStore->name());

    if (_recordStore->compactsInPlace()) {
        CompactStats stats;
        Status status = _recordStore->compact(txn, NULL, compactOptions, &stats);
        if (!status.isOK())
            return StatusWith<CompactStats>(status);

        // Compact all indexes (not including unfinished indexes)
        IndexCatalog::IndexIterator ii(_indexCatalog.getIndexIterator(txn, false));
        while (ii.more()) {
            IndexDescriptor* descriptor = ii.next();
            IndexAccessMethod* index = _indexCatalog.getIndex(descriptor);

            LOG(1) << "compacting index: " << descriptor->toString();
            Status status = index->compact(txn);
            if (!status.isOK()) {
                error() << "failed to compact index: " << descriptor->toString();
                return status;
            }
        }

        return StatusWith<CompactStats>(stats);
    }

    if (_indexCatalog.numIndexesInProgress(txn))
        return StatusWith<CompactStats>(ErrorCodes::BadValue,
                                        "cannot compact when indexes in progress");

    vector<BSONObj> indexSpecs;
    {
        IndexCatalog::IndexIterator ii(_indexCatalog.getIndexIterator(txn, false));
        while (ii.more()) {
            IndexDescriptor* descriptor = ii.next();

            const BSONObj spec = _compactAdjustIndexSpec(descriptor->infoObj());
            const BSONObj key = spec.getObjectField("key");
            const Status keyStatus = validateKeyPattern(key);
            if (!keyStatus.isOK()) {
                return StatusWith<CompactStats>(
                    ErrorCodes::CannotCreateIndex,
                    str::stream() << "Cannot compact collection due to invalid index " << spec
                                  << ": "
                                  << keyStatus.reason()
                                  << " For more info see"
                                  << " http://dochub.mongodb.org/core/index-validation");
            }
            indexSpecs.push_back(spec);
        }
    }

    // Give a chance to be interrupted *before* we drop all indexes.
    txn->checkForInterrupt();

    {
        // note that the drop indexes call also invalidates all clientcursors for the namespace,
        // which is important and wanted here
        WriteUnitOfWork wunit(txn);
        log() << "compact dropping indexes";
        Status status = _indexCatalog.dropAllIndexes(txn, true);
        if (!status.isOK()) {
            return StatusWith<CompactStats>(status);
        }
        wunit.commit();
    }

    CompactStats stats;

    MultiIndexBlock indexer(txn, this);
    indexer.allowInterruption();
    indexer.ignoreUniqueConstraint();  // in compact we should be doing no checking

    Status status = indexer.init(indexSpecs);
    if (!status.isOK())
        return StatusWith<CompactStats>(status);

    MyCompactAdaptor adaptor(this, &indexer);

    status = _recordStore->compact(txn, &adaptor, compactOptions, &stats);
    if (!status.isOK())
        return StatusWith<CompactStats>(status);

    log() << "starting index commits";
    status = indexer.doneInserting();
    if (!status.isOK())
        return StatusWith<CompactStats>(status);

    {
        WriteUnitOfWork wunit(txn);
        indexer.commit();
        wunit.commit();
    }

    return StatusWith<CompactStats>(stats);
}
Exemple #27
0
    /**
     * For a given query, get a runner.  The runner could be a SingleSolutionRunner, a
     * CachedQueryRunner, or a MultiPlanRunner, depending on the cache/query solver/etc.
     */
    Status getRunner(CanonicalQuery* rawCanonicalQuery, Runner** out, size_t plannerOptions) {
        verify(rawCanonicalQuery);
        auto_ptr<CanonicalQuery> canonicalQuery(rawCanonicalQuery);

        // Try to look up a cached solution for the query.
        // TODO: Can the cache have negative data about a solution?
        PlanCache* localCache = PlanCache::get(canonicalQuery->ns());
        if (NULL != localCache) {
            CachedSolution* cs = localCache->get(*canonicalQuery);
            if (NULL != cs) {
                // We have a cached solution.  Hand the canonical query and cached solution off to
                // the cached plan runner, which takes ownership of both.
                WorkingSet* ws;
                PlanStage* root;
                verify(StageBuilder::build(*cs->solution, &root, &ws));
                *out = new CachedPlanRunner(canonicalQuery.release(), cs, root, ws);
                return Status::OK();
            }
        }

        // No entry in cache for the query.  We have to solve the query ourself.

        // Get the indices that we could possibly use.
        Database* db = cc().database();
        verify( db );
        Collection* collection = db->getCollection( canonicalQuery->ns() );

        // This can happen as we're called by internal clients as well.
        if (NULL == collection) {
            const string& ns = canonicalQuery->ns();
            *out = new EOFRunner(canonicalQuery.release(), ns);
            return Status::OK();
        }

        // If we have an _id index we can use the idhack runner.
        if (canUseIDHack(*canonicalQuery) && collection->getIndexCatalog()->findIdIndex()) {
            *out = new IDHackRunner(collection, canonicalQuery.release());
            return Status::OK();
        }

        // If it's not NULL, we may have indices.  Access the catalog and fill out IndexEntry(s)
        QueryPlannerParams plannerParams;
        for (int i = 0; i < collection->getIndexCatalog()->numIndexesReady(); ++i) {
            IndexDescriptor* desc = collection->getIndexCatalog()->getDescriptor( i );
            plannerParams.indices.push_back(IndexEntry(desc->keyPattern(),
                                                       desc->isMultikey(),
                                                       desc->isSparse(),
                                                       desc->indexName()));
        }

        // Tailable: If the query requests tailable the collection must be capped.
        if (canonicalQuery->getParsed().hasOption(QueryOption_CursorTailable)) {
            if (!collection->isCapped()) {
                return Status(ErrorCodes::BadValue,
                              "tailable cursor requested on non capped collection");
            }

            // If a sort is specified it must be equal to expectedSort.
            const BSONObj expectedSort = BSON("$natural" << 1);
            const BSONObj& actualSort = canonicalQuery->getParsed().getSort();
            if (!actualSort.isEmpty() && !(actualSort == expectedSort)) {
                return Status(ErrorCodes::BadValue,
                              "invalid sort specified for tailable cursor: "
                              + actualSort.toString());
            }
        }

        // Process the planning options.
        plannerParams.options = plannerOptions;
        if (storageGlobalParams.noTableScan) {
            const string& ns = canonicalQuery->ns();
            // There are certain cases where we ignore this restriction:
            bool ignore = canonicalQuery->getQueryObj().isEmpty()
                          || (string::npos != ns.find(".system."))
                          || (0 == ns.find("local."));
            if (!ignore) {
                plannerParams.options |= QueryPlannerParams::NO_TABLE_SCAN;
            }
        }

        if (!(plannerParams.options & QueryPlannerParams::NO_TABLE_SCAN)) {
            plannerParams.options |= QueryPlannerParams::INCLUDE_COLLSCAN;
        }

        // If the caller wants a shard filter, make sure we're actually sharded.
        if (plannerParams.options & QueryPlannerParams::INCLUDE_SHARD_FILTER) {
            CollectionMetadataPtr collMetadata = shardingState.getCollectionMetadata(canonicalQuery->ns());
            if (collMetadata) {
                plannerParams.shardKey = collMetadata->getKeyPattern();
            }
            else {
                // If there's no metadata don't bother w/the shard filter since we won't know what
                // the key pattern is anyway...
                plannerParams.options &= ~QueryPlannerParams::INCLUDE_SHARD_FILTER;
            }
        }

        vector<QuerySolution*> solutions;
        QueryPlanner::plan(*canonicalQuery, plannerParams, &solutions);

        /*
        for (size_t i = 0; i < solutions.size(); ++i) {
            QLOG() << "solution " << i << " is " << solutions[i]->toString() << endl;
        }
        */

        // We cannot figure out how to answer the query.  Should this ever happen?
        if (0 == solutions.size()) {
            return Status(ErrorCodes::BadValue, "No query solutions");
        }

        if (1 == solutions.size()) {
            // Only one possible plan.  Run it.  Build the stages from the solution.
            WorkingSet* ws;
            PlanStage* root;
            verify(StageBuilder::build(*solutions[0], &root, &ws));

            // And, run the plan.
            *out = new SingleSolutionRunner(canonicalQuery.release(), solutions[0], root, ws);
            return Status::OK();
        }
        else {
            // Many solutions.  Let the MultiPlanRunner pick the best, update the cache, and so on.
            auto_ptr<MultiPlanRunner> mpr(new MultiPlanRunner(canonicalQuery.release()));
            for (size_t i = 0; i < solutions.size(); ++i) {
                WorkingSet* ws;
                PlanStage* root;
                verify(StageBuilder::build(*solutions[i], &root, &ws));
                // Takes ownership of all arguments.
                mpr->addPlan(solutions[i], root, ws);
            }
            *out = mpr.release();
            return Status::OK();
        }
    }
Exemple #28
0
    StatusWith<DiskLoc> Collection::updateDocument( const DiskLoc& oldLocation,
                                                    const BSONObj& objNew,
                                                    bool enforceQuota,
                                                    OpDebug* debug ) {

        Record* oldRecord = getExtentManager()->recordFor( oldLocation );
        BSONObj objOld = BSONObj::make( oldRecord );

        if ( objOld.hasElement( "_id" ) ) {
            BSONElement oldId = objOld["_id"];
            BSONElement newId = objNew["_id"];
            if ( oldId != newId )
                return StatusWith<DiskLoc>( ErrorCodes::InternalError,
                                            "in Collection::updateDocument _id mismatch",
                                            13596 );
        }

        if ( ns().coll() == "system.users" ) {
            // XXX - andy and spencer think this should go away now
            V2UserDocumentParser parser;
            Status s = parser.checkValidUserDocument(objNew);
            if ( !s.isOK() )
                return StatusWith<DiskLoc>( s );
        }

        /* duplicate key check. we descend the btree twice - once for this check, and once for the actual inserts, further
           below.  that is suboptimal, but it's pretty complicated to do it the other way without rollbacks...
        */
        OwnedPointerVector<UpdateTicket> updateTickets;
        updateTickets.mutableVector().resize(_indexCatalog.numIndexesTotal());
        for (int i = 0; i < _indexCatalog.numIndexesTotal(); ++i) {
            IndexDescriptor* descriptor = _indexCatalog.getDescriptor( i );
            IndexAccessMethod* iam = _indexCatalog.getIndex( descriptor );

            InsertDeleteOptions options;
            options.logIfError = false;
            options.dupsAllowed =
                !(KeyPattern::isIdKeyPattern(descriptor->keyPattern()) || descriptor->unique())
                || ignoreUniqueIndex(descriptor);
            updateTickets.mutableVector()[i] = new UpdateTicket();
            Status ret = iam->validateUpdate(objOld, objNew, oldLocation, options,
                                             updateTickets.mutableVector()[i]);
            if ( !ret.isOK() ) {
                return StatusWith<DiskLoc>( ret );
            }
        }

        if ( oldRecord->netLength() < objNew.objsize() ) {
            // doesn't fit, have to move to new location

            if ( _details->isCapped() )
                return StatusWith<DiskLoc>( ErrorCodes::InternalError,
                                            "failing update: objects in a capped ns cannot grow",
                                            10003 );

            moveCounter.increment();
            _details->paddingTooSmall();

            // unindex old record, don't delete
            // this way, if inserting new doc fails, we can re-index this one
            ClientCursor::aboutToDelete(_ns.ns(), _details, oldLocation);
            _indexCatalog.unindexRecord( objOld, oldLocation, true );

            if ( debug ) {
                if (debug->nmoved == -1) // default of -1 rather than 0
                    debug->nmoved = 1;
                else
                    debug->nmoved += 1;
            }

            StatusWith<DiskLoc> loc = insertDocument( objNew, enforceQuota );

            if ( loc.isOK() ) {
                // insert successful, now lets deallocate the old location
                // remember its already unindexed
                _recordStore.deallocRecord( oldLocation, oldRecord );
            }
            else {
                // new doc insert failed, so lets re-index the old document and location
                _indexCatalog.indexRecord( objOld, oldLocation );
            }

            return loc;
        }

        _infoCache.notifyOfWriteOp();
        _details->paddingFits();

        if ( debug )
            debug->keyUpdates = 0;

        for (int i = 0; i < _indexCatalog.numIndexesTotal(); ++i) {
            IndexDescriptor* descriptor = _indexCatalog.getDescriptor( i );
            IndexAccessMethod* iam = _indexCatalog.getIndex( descriptor );

            int64_t updatedKeys;
            Status ret = iam->update(*updateTickets.vector()[i], &updatedKeys);
            if ( !ret.isOK() )
                return StatusWith<DiskLoc>( ret );
            if ( debug )
                debug->keyUpdates += updatedKeys;
        }

        //  update in place
        int sz = objNew.objsize();
        memcpy(getDur().writingPtr(oldRecord->data(), sz), objNew.objdata(), sz);
        return StatusWith<DiskLoc>( oldLocation );
    }
Exemple #29
0
        bool wrappedRun(OperationContext* txn,
                        const string& dbname,
                        BSONObj& jsobj,
                        string& errmsg,
                        BSONObjBuilder& anObjBuilder) {
            const std::string coll = jsobj.firstElement().valuestrsafe();
            if (coll.empty()) {
                errmsg = "no collection name specified";
                return false;
            }

            const std::string toDeleteNs = dbname + '.' + coll;
            if (!serverGlobalParams.quiet) {
                LOG(0) << "CMD: dropIndexes " << toDeleteNs << endl;
            }

            Client::Context ctx(txn, toDeleteNs);
            Database* db = ctx.db();

            Collection* collection = db->getCollection( txn, toDeleteNs );
            if ( ! collection ) {
                errmsg = "ns not found";
                return false;
            }

            stopIndexBuilds(txn, db, jsobj);

            IndexCatalog* indexCatalog = collection->getIndexCatalog();
            anObjBuilder.appendNumber("nIndexesWas", indexCatalog->numIndexesTotal(txn) );


            BSONElement f = jsobj.getField("index");
            if ( f.type() == String ) {

                string indexToDelete = f.valuestr();

                if ( indexToDelete == "*" ) {
                    Status s = indexCatalog->dropAllIndexes(txn, false);
                    if ( !s.isOK() ) {
                        appendCommandStatus( anObjBuilder, s );
                        return false;
                    }
                    anObjBuilder.append("msg", "non-_id indexes dropped for collection");
                    return true;
                }

                IndexDescriptor* desc = collection->getIndexCatalog()->findIndexByName( txn,
                                                                                        indexToDelete );
                if ( desc == NULL ) {
                    errmsg = str::stream() << "index not found with name [" << indexToDelete << "]";
                    return false;
                }

                if ( desc->isIdIndex() ) {
                    errmsg = "cannot drop _id index";
                    return false;
                }

                Status s = indexCatalog->dropIndex(txn, desc);
                if ( !s.isOK() ) {
                    appendCommandStatus( anObjBuilder, s );
                    return false;
                }

                return true;
            }

            if ( f.type() == Object ) {
                IndexDescriptor* desc =
                    collection->getIndexCatalog()->findIndexByKeyPattern( txn, f.embeddedObject() );
                if ( desc == NULL ) {
                    errmsg = "can't find index with key:";
                    errmsg += f.embeddedObject().toString();
                    return false;
                }

                if ( desc->isIdIndex() ) {
                    errmsg = "cannot drop _id index";
                    return false;
                }

                Status s = indexCatalog->dropIndex(txn, desc);
                if ( !s.isOK() ) {
                    appendCommandStatus( anObjBuilder, s );
                    return false;
                }

                return true;
            }

            errmsg = "invalid index name spec";
            return false;
        }
Exemple #30
0
    shared_ptr<Cursor> QueryPlan::newCursor( const DiskLoc& startLoc,
                                             bool requestIntervalCursor ) const {

        if ( _type ) {
            // hopefully safe to use original query in these contexts - don't think we can mix type
            // with $or clause separation yet
            int numWanted = 0;
            if ( _parsedQuery ) {
                // SERVER-5390
                numWanted = _parsedQuery->getSkip() + _parsedQuery->getNumToReturn();
            }

            IndexDescriptor* descriptor = CatalogHack::getDescriptor(_d, _idxNo);
            IndexAccessMethod* iam = CatalogHack::getIndex(descriptor);
            return shared_ptr<Cursor>(EmulatedCursor::make(descriptor, iam, _originalQuery,
                                                           _order, numWanted,
                                                           descriptor->keyPattern()));
        }

        if ( _utility == Impossible ) {
            // Dummy table scan cursor returning no results.  Allowed in --notablescan mode.
            return shared_ptr<Cursor>( new BasicCursor( DiskLoc() ) );
        }

        if ( willScanTable() ) {
            checkTableScanAllowed();
            return findTableScan( _frs.ns(), _order, startLoc );
        }
                
        massert( 10363,
                 "newCursor() with start location not implemented for indexed plans",
                 startLoc.isNull() );

        if ( _startOrEndSpec ) {
            // we are sure to spec _endKeyInclusive
            return shared_ptr<Cursor>( BtreeCursor::make( _d,
                                                          *_index,
                                                          _startKey,
                                                          _endKey,
                                                          _endKeyInclusive,
                                                          _direction >= 0 ? 1 : -1 ) );
        }

        if ( _index->getSpec().getType() ) {
            return shared_ptr<Cursor>( BtreeCursor::make( _d,
                                                          *_index,
                                                          _frv->startKey(),
                                                          _frv->endKey(),
                                                          true,
                                                          _direction >= 0 ? 1 : -1 ) );
        }

        // An IntervalBtreeCursor is returned if explicitly requested AND _frv is exactly
        // represented by a single interval within the btree.
        if ( // If an interval cursor is requested and ...
             requestIntervalCursor &&
             // ... equalities come before ranges (a requirement of Optimal) and ...
             _utility == Optimal &&
             // ... the field range vector exactly represents a single interval ...
             _frv->isSingleInterval() ) {
            // ... and an interval cursor can be created ...
            shared_ptr<Cursor> ret( IntervalBtreeCursor::make( _d,
                                                               *_index,
                                                               _frv->startKey(),
                                                               _frv->startKeyInclusive(),
                                                               _frv->endKey(),
                                                               _frv->endKeyInclusive() ) );
            if ( ret ) {
                // ... then return the interval cursor.
                return ret;
            }
        }

        return shared_ptr<Cursor>( BtreeCursor::make( _d,
                                                      *_index,
                                                      _frv,
                                                      independentRangesSingleIntervalLimit(),
                                                      _direction >= 0 ? 1 : -1 ) );
    }