void run() { OperationContextImpl txn; Client::WriteContext ctx(&txn, _ns); int numFinishedIndexesStart = _catalog->numIndexesReady(&txn); Helpers::ensureIndex(&txn, _coll, BSON("x" << 1), false, "_x_0"); Helpers::ensureIndex(&txn, _coll, BSON("y" << 1), false, "_y_0"); ASSERT_TRUE(_catalog->numIndexesReady(&txn) == numFinishedIndexesStart+2); IndexCatalog::IndexIterator ii = _catalog->getIndexIterator(&txn,false); int indexesIterated = 0; bool foundIndex = false; while (ii.more()) { IndexDescriptor* indexDesc = ii.next(); indexesIterated++; BSONObjIterator boit(indexDesc->infoObj()); while (boit.more() && !foundIndex) { BSONElement e = boit.next(); if (str::equals(e.fieldName(), "name") && str::equals(e.valuestrsafe(), "_y_0")) { foundIndex = true; break; } } } ctx.commit(); ASSERT_TRUE(indexesIterated == _catalog->numIndexesReady(&txn)); ASSERT_TRUE(foundIndex); }
long long Database::getIndexSizeForCollection(OperationContext* opCtx, Collection* coll, BSONObjBuilder* details, int scale ) { if ( !coll ) return 0; IndexCatalog::IndexIterator ii = coll->getIndexCatalog()->getIndexIterator( true /*includeUnfinishedIndexes*/ ); long long totalSize = 0; while ( ii.more() ) { IndexDescriptor* d = ii.next(); string indNS = d->indexNamespace(); // XXX creating a Collection for an index which isn't a Collection Collection* indColl = getCollection( opCtx, indNS ); if ( ! indColl ) { log() << "error: have index descriptor [" << indNS << "] but no entry in the index collection." << endl; continue; } totalSize += indColl->dataSize(); if ( details ) { long long const indexSize = indColl->dataSize() / scale; details->appendNumber( d->indexName() , indexSize ); } } return totalSize; }
Status Collection::touch(OperationContext* txn, bool touchData, bool touchIndexes, BSONObjBuilder* output) const { if (touchData) { BSONObjBuilder b; Status status = _recordStore->touch(txn, &b); if (!status.isOK()) return status; output->append("data", b.obj()); } if (touchIndexes) { Timer t; IndexCatalog::IndexIterator ii = _indexCatalog.getIndexIterator(txn, false); while (ii.more()) { const IndexDescriptor* desc = ii.next(); const IndexAccessMethod* iam = _indexCatalog.getIndex(desc); Status status = iam->touch(txn); if (!status.isOK()) return status; } output->append("indexes", BSON("num" << _indexCatalog.numIndexesTotal(txn) << "millis" << t.millis())); } return Status::OK(); }
Status Database::renameCollection( OperationContext* txn, const StringData& fromNS, const StringData& toNS, bool stayTemp ) { audit::logRenameCollection( currentClient.get(), fromNS, toNS ); { // remove anything cached Collection* coll = getCollection( txn, fromNS ); if ( !coll ) return Status( ErrorCodes::NamespaceNotFound, "collection not found to rename" ); IndexCatalog::IndexIterator ii = coll->getIndexCatalog()->getIndexIterator( true ); while ( ii.more() ) { IndexDescriptor* desc = ii.next(); _clearCollectionCache( desc->indexNamespace() ); } { scoped_lock lk( _collectionLock ); _clearCollectionCache_inlock( fromNS ); _clearCollectionCache_inlock( toNS ); } Top::global.collectionDropped( fromNS.toString() ); } return _dbEntry->renameCollection( txn, fromNS, toNS, stayTemp ); }
/** * order will be: * 1) store index specs * 2) drop indexes * 3) truncate record store * 4) re-write indexes */ Status Collection::truncate(OperationContext* txn) { dassert(txn->lockState()->isCollectionLockedForMode(ns().toString(), MODE_X)); BackgroundOperation::assertNoBgOpInProgForNs(ns()); invariant(_indexCatalog.numIndexesInProgress(txn) == 0); // 1) store index specs vector<BSONObj> indexSpecs; { IndexCatalog::IndexIterator ii = _indexCatalog.getIndexIterator(txn, false); while (ii.more()) { const IndexDescriptor* idx = ii.next(); indexSpecs.push_back(idx->infoObj().getOwned()); } } // 2) drop indexes Status status = _indexCatalog.dropAllIndexes(txn, true); if (!status.isOK()) return status; _cursorManager.invalidateAll(false, "collection truncated"); // 3) truncate record store status = _recordStore->truncate(txn); if (!status.isOK()) return status; // 4) re-create indexes for (size_t i = 0; i < indexSpecs.size(); i++) { status = _indexCatalog.createIndexOnEmptyCollection(txn, indexSpecs[i]); if (!status.isOK()) return status; } return Status::OK(); }
void CollectionInfoCache::updatePlanCacheIndexEntries(OperationContext* opCtx) { std::vector<IndexEntry> indexEntries; // TODO We shouldn't need to include unfinished indexes, but we must here because the index // catalog may be in an inconsistent state. SERVER-18346. const bool includeUnfinishedIndexes = true; IndexCatalog::IndexIterator ii = _collection->getIndexCatalog()->getIndexIterator(opCtx, includeUnfinishedIndexes); while (ii.more()) { const IndexDescriptor* desc = ii.next(); const IndexCatalogEntry* ice = ii.catalogEntry(desc); indexEntries.emplace_back(desc->keyPattern(), desc->getAccessMethodName(), desc->isMultikey(opCtx), ice->getMultikeyPaths(opCtx), desc->isSparse(), desc->unique(), desc->indexName(), ice->getFilterExpression(), desc->infoObj(), ice->getCollator()); } _planCache->notifyOfIndexEntries(indexEntries); }
Status Database::renameCollection(OperationContext* txn, StringData fromNS, StringData toNS, bool stayTemp) { audit::logRenameCollection(&cc(), fromNS, toNS); invariant(txn->lockState()->isDbLockedForMode(name(), MODE_X)); BackgroundOperation::assertNoBgOpInProgForNs(fromNS); BackgroundOperation::assertNoBgOpInProgForNs(toNS); { // remove anything cached Collection* coll = getCollection(fromNS); if (!coll) return Status(ErrorCodes::NamespaceNotFound, "collection not found to rename"); string clearCacheReason = str::stream() << "renamed collection '" << fromNS << "' to '" << toNS << "'"; IndexCatalog::IndexIterator ii = coll->getIndexCatalog()->getIndexIterator(txn, true); while (ii.more()) { IndexDescriptor* desc = ii.next(); _clearCollectionCache(txn, desc->indexNamespace(), clearCacheReason); } _clearCollectionCache(txn, fromNS, clearCacheReason); _clearCollectionCache(txn, toNS, clearCacheReason); Top::get(txn->getClient()->getServiceContext()).collectionDropped(fromNS.toString()); } txn->recoveryUnit()->registerChange(new AddCollectionChange(txn, this, toNS)); Status s = _dbEntry->renameCollection(txn, fromNS, toNS, stayTemp); _collections[toNS] = _getOrCreateCollectionInstance(txn, toNS); return s; }
/** * order will be: * 1) store index specs * 2) drop indexes * 3) truncate record store * 4) re-write indexes */ Status Collection::truncate(OperationContext* txn) { massert( 17445, "index build in progress", _indexCatalog.numIndexesInProgress( txn ) == 0 ); // 1) store index specs vector<BSONObj> indexSpecs; { IndexCatalog::IndexIterator ii = _indexCatalog.getIndexIterator( txn, false ); while ( ii.more() ) { const IndexDescriptor* idx = ii.next(); indexSpecs.push_back( idx->infoObj().getOwned() ); } } // 2) drop indexes Status status = _indexCatalog.dropAllIndexes(txn, true); if ( !status.isOK() ) return status; _cursorCache.invalidateAll( false ); _infoCache.reset(); // 3) truncate record store status = _recordStore->truncate(txn); if ( !status.isOK() ) return status; // 4) re-create indexes for ( size_t i = 0; i < indexSpecs.size(); i++ ) { status = _indexCatalog.createIndexOnEmptyCollection(txn, indexSpecs[i]); if ( !status.isOK() ) return status; } return Status::OK(); }
Status Database::renameCollection( OperationContext* txn, StringData fromNS, StringData toNS, bool stayTemp ) { audit::logRenameCollection( currentClient.get(), fromNS, toNS ); invariant(txn->lockState()->isDbLockedForMode(name(), MODE_X)); { // remove anything cached Collection* coll = getCollection( fromNS ); if ( !coll ) return Status( ErrorCodes::NamespaceNotFound, "collection not found to rename" ); IndexCatalog::IndexIterator ii = coll->getIndexCatalog()->getIndexIterator( txn, true ); while ( ii.more() ) { IndexDescriptor* desc = ii.next(); _clearCollectionCache( txn, desc->indexNamespace() ); } _clearCollectionCache( txn, fromNS ); _clearCollectionCache( txn, toNS ); Top::global.collectionDropped( fromNS.toString() ); } txn->recoveryUnit()->registerChange( new AddCollectionChange(this, toNS) ); Status s = _dbEntry->renameCollection( txn, fromNS, toNS, stayTemp ); _collections[toNS] = _getOrCreateCollectionInstance(txn, toNS); return s; }
// page in pages needed for all index lookups on a given object void prefetchIndexPages(Collection* collection, const repl::ReplSetImpl::IndexPrefetchConfig& prefetchConfig, const BSONObj& obj) { DiskLoc unusedDl; // unused BSONObjSet unusedKeys; // do we want prefetchConfig to be (1) as-is, (2) for update ops only, or (3) configured per op type? // One might want PREFETCH_NONE for updates, but it's more rare that it is a bad idea for inserts. // #3 (per op), a big issue would be "too many knobs". switch (prefetchConfig) { case repl::ReplSetImpl::PREFETCH_NONE: return; case repl::ReplSetImpl::PREFETCH_ID_ONLY: { TimerHolder timer( &prefetchIndexStats); // on the update op case, the call to prefetchRecordPages will touch the _id index. // thus perhaps this option isn't very useful? try { IndexDescriptor* desc = collection->getIndexCatalog()->findIdIndex(); if ( !desc ) return; IndexAccessMethod* iam = collection->getIndexCatalog()->getIndex( desc ); verify( iam ); iam->touch(obj); } catch (const DBException& e) { LOG(2) << "ignoring exception in prefetchIndexPages(): " << e.what() << endl; } break; } case repl::ReplSetImpl::PREFETCH_ALL: { // indexCount includes all indexes, including ones // in the process of being built IndexCatalog::IndexIterator ii = collection->getIndexCatalog()->getIndexIterator( true ); while ( ii.more() ) { TimerHolder timer( &prefetchIndexStats); // This will page in all index pages for the given object. try { IndexDescriptor* desc = ii.next(); IndexAccessMethod* iam = collection->getIndexCatalog()->getIndex( desc ); verify( iam ); iam->touch(obj); } catch (const DBException& e) { LOG(2) << "ignoring exception in prefetchIndexPages(): " << e.what() << endl; } unusedKeys.clear(); } break; } default: fassertFailed(16427); } }
Status Collection::validate( OperationContext* txn, bool full, bool scanData, ValidateResults* results, BSONObjBuilder* output ){ MyValidateAdaptor adaptor; Status status = _recordStore->validate( txn, full, scanData, &adaptor, results, output ); if ( !status.isOK() ) return status; { // indexes output->append("nIndexes", _indexCatalog.numIndexesReady( txn ) ); int idxn = 0; try { // Only applicable when 'full' validation is requested. boost::scoped_ptr<BSONObjBuilder> indexDetails(full ? new BSONObjBuilder() : NULL); BSONObjBuilder indexes; // not using subObjStart to be exception safe IndexCatalog::IndexIterator i = _indexCatalog.getIndexIterator(txn, false); while( i.more() ) { const IndexDescriptor* descriptor = i.next(); log(LogComponent::kIndex) << "validating index " << descriptor->indexNamespace() << endl; IndexAccessMethod* iam = _indexCatalog.getIndex( descriptor ); invariant( iam ); boost::scoped_ptr<BSONObjBuilder> bob( indexDetails.get() ? new BSONObjBuilder( indexDetails->subobjStart(descriptor->indexNamespace())) : NULL); int64_t keys; iam->validate(txn, full, &keys, bob.get()); indexes.appendNumber(descriptor->indexNamespace(), static_cast<long long>(keys)); idxn++; } output->append("keysPerIndex", indexes.done()); if (indexDetails.get()) { output->append("indexDetails", indexDetails->done()); } } catch ( DBException& exc ) { string err = str::stream() << "exception during index validate idxn "<< BSONObjBuilder::numStr(idxn) << ": " << exc.toString(); results->errors.push_back( err ); results->valid = false; } } return Status::OK(); }
void CollectionInfoCache::init(OperationContext* opCtx) { // Requires exclusive collection lock. invariant(opCtx->lockState()->isCollectionLockedForMode(_collection->ns().ns(), MODE_X)); const bool includeUnfinishedIndexes = false; IndexCatalog::IndexIterator ii = _collection->getIndexCatalog()->getIndexIterator(opCtx, includeUnfinishedIndexes); while (ii.more()) { const IndexDescriptor* desc = ii.next(); _indexUsageTracker.registerIndex(desc->indexName(), desc->keyPattern()); } rebuildIndexData(opCtx); }
void CollectionInfoCache::computeIndexKeys() { DEV Lock::assertWriteLocked( _collection->ns().ns() ); _indexedPaths.clear(); IndexCatalog::IndexIterator i = _collection->getIndexCatalog()->getIndexIterator(true); while (i.more()) { IndexDescriptor* descriptor = i.next(); if (descriptor->getAccessMethodName() != IndexNames::TEXT) { BSONObj key = descriptor->keyPattern(); BSONObjIterator j(key); while (j.more()) { BSONElement e = j.next(); _indexedPaths.addPath(e.fieldName()); } } else { fts::FTSSpec ftsSpec(descriptor->infoObj()); if (ftsSpec.wildcard()) { _indexedPaths.allPathsIndexed(); } else { for (size_t i = 0; i < ftsSpec.numExtraBefore(); ++i) { _indexedPaths.addPath(ftsSpec.extraBefore(i)); } for (fts::Weights::const_iterator it = ftsSpec.weights().begin(); it != ftsSpec.weights().end(); ++it) { _indexedPaths.addPath(it->first); } for (size_t i = 0; i < ftsSpec.numExtraAfter(); ++i) { _indexedPaths.addPath(ftsSpec.extraAfter(i)); } // Any update to a path containing "language" as a component could change the // language of a subdocument. Add the override field as a path component. _indexedPaths.addPathComponent(ftsSpec.languageOverrideField()); } } } _keysComputed = true; }
uint64_t Collection::getIndexSize(OperationContext* opCtx, BSONObjBuilder* details, int scale) { IndexCatalog* idxCatalog = getIndexCatalog(); IndexCatalog::IndexIterator ii = idxCatalog->getIndexIterator(opCtx, true); uint64_t totalSize = 0; while (ii.more()) { IndexDescriptor* d = ii.next(); IndexAccessMethod* iam = idxCatalog->getIndex(d); long long ds = iam->getSpaceUsedBytes(opCtx); totalSize += ds; if (details) { details->appendNumber(d->indexName(), ds / scale); } } return totalSize; }
Status Collection::validate( bool full, bool scanData, ValidateResults* results, BSONObjBuilder* output ){ MyValidateAdaptor adaptor; Status status = _recordStore->validate( full, scanData, &adaptor, results, output ); if ( !status.isOK() ) return status; { // indexes output->append("nIndexes", _indexCatalog.numIndexesReady() ); int idxn = 0; try { BSONObjBuilder indexes; // not using subObjStart to be exception safe IndexCatalog::IndexIterator i = _indexCatalog.getIndexIterator(false); while( i.more() ) { const IndexDescriptor* descriptor = i.next(); log() << "validating index " << descriptor->indexNamespace() << endl; IndexAccessMethod* iam = _indexCatalog.getIndex( descriptor ); invariant( iam ); int64_t keys; iam->validate(&keys); indexes.appendNumber(descriptor->indexNamespace(), static_cast<long long>(keys)); idxn++; } output->append("keysPerIndex", indexes.done()); } catch ( DBException& exc ) { string err = str::stream() << "exception during index validate idxn "<< BSONObjBuilder::numStr(idxn) << ": " << exc.toString(); results->errors.push_back( err ); results->valid = false; } } return Status::OK(); }
void run() { Client::WriteContext ctx(_ns); int numFinishedIndexesStart = _catalog->numIndexesReady(); BSONObjBuilder b1; b1.append("key", BSON("x" << 1)); b1.append("ns", _ns); b1.append("name", "_x_0"); _catalog->createIndex(b1.obj(), true); BSONObjBuilder b2; b2.append("key", BSON("y" << 1)); b2.append("ns", _ns); b2.append("name", "_y_0"); _catalog->createIndex(b2.obj(), true); ASSERT_TRUE(_catalog->numIndexesReady() == numFinishedIndexesStart+2); IndexCatalog::IndexIterator ii = _catalog->getIndexIterator(false); int indexesIterated = 0; bool foundIndex = false; while (ii.more()) { IndexDescriptor* indexDesc = ii.next(); indexesIterated++; BSONObjIterator boit(indexDesc->infoObj()); while (boit.more() && !foundIndex) { BSONElement e = boit.next(); if (str::equals(e.fieldName(), "name") && str::equals(e.valuestrsafe(), "_y_0")) { foundIndex = true; break; } } } ASSERT_TRUE(indexesIterated == _catalog->numIndexesReady()); ASSERT_TRUE(foundIndex); }
virtual bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { string source = cmdObj.getStringField( name.c_str() ); string target = cmdObj.getStringField( "to" ); if ( !NamespaceString::validCollectionComponent(target.c_str()) ) { errmsg = "invalid collection name: " + target; return false; } if ( source.empty() || target.empty() ) { errmsg = "invalid command syntax"; return false; } if (!fromRepl) { // If it got through on the master, need to allow it here too Status sourceStatus = userAllowedWriteNS(source); if (!sourceStatus.isOK()) { errmsg = "error with source namespace: " + sourceStatus.reason(); return false; } Status targetStatus = userAllowedWriteNS(target); if (!targetStatus.isOK()) { errmsg = "error with target namespace: " + targetStatus.reason(); return false; } } string sourceDB = nsToDatabase(source); string targetDB = nsToDatabase(target); bool capped = false; long long size = 0; std::vector<BSONObj> indexesInProg; { Client::Context srcCtx( source ); Collection* sourceColl = srcCtx.db()->getCollection( source ); if ( !sourceColl ) { errmsg = "source namespace does not exist"; return false; } // Ensure that collection name does not exceed maximum length. // Ensure that index names do not push the length over the max. // Iterator includes unfinished indexes. IndexCatalog::IndexIterator sourceIndIt = sourceColl->getIndexCatalog()->getIndexIterator( true ); int longestIndexNameLength = 0; while ( sourceIndIt.more() ) { int thisLength = sourceIndIt.next()->indexName().length(); if ( thisLength > longestIndexNameLength ) longestIndexNameLength = thisLength; } unsigned int longestAllowed = min(int(Namespace::MaxNsColletionLen), int(Namespace::MaxNsLen) - 2/*strlen(".$")*/ - longestIndexNameLength); if (target.size() > longestAllowed) { StringBuilder sb; sb << "collection name length of " << target.size() << " exceeds maximum length of " << longestAllowed << ", allowing for index names"; errmsg = sb.str(); return false; } { indexesInProg = stopIndexBuilds( srcCtx.db(), cmdObj ); capped = sourceColl->isCapped(); if ( capped ) { size = sourceColl->storageSize(); } } } { Client::Context ctx( target ); // Check if the target namespace exists and if dropTarget is true. // If target exists and dropTarget is not true, return false. if ( ctx.db()->getCollection( target ) ) { if ( !cmdObj["dropTarget"].trueValue() ) { errmsg = "target namespace exists"; return false; } Status s = cc().database()->dropCollection( target ); if ( !s.isOK() ) { errmsg = s.toString(); restoreIndexBuildsOnSource( indexesInProg, source ); return false; } } // If we are renaming in the same database, just // rename the namespace and we're done. if ( sourceDB == targetDB ) { Status s = ctx.db()->renameCollection( source, target, cmdObj["stayTemp"].trueValue() ); if ( !s.isOK() ) { errmsg = s.toString(); restoreIndexBuildsOnSource( indexesInProg, source ); return false; } return true; } // Otherwise, we are enaming across databases, so we must copy all // the data and then remove the source collection. // Create the target collection. Collection* targetColl = NULL; if ( capped ) { CollectionOptions options; options.capped = true; options.cappedSize = size; options.setNoIdIndex(); targetColl = ctx.db()->createCollection( target, options ); } else { CollectionOptions options; options.setNoIdIndex(); // No logOp necessary because the entire renameCollection command is one logOp. targetColl = ctx.db()->createCollection( target, options ); } if ( !targetColl ) { errmsg = "Failed to create target collection."; restoreIndexBuildsOnSource( indexesInProg, source ); return false; } } // Copy over all the data from source collection to target collection. bool insertSuccessful = true; boost::scoped_ptr<CollectionIterator> sourceIt; { Client::Context srcCtx( source ); Collection* sourceColl = srcCtx.db()->getCollection( source ); sourceIt.reset( sourceColl->getIterator( DiskLoc(), false, CollectionScanParams::FORWARD ) ); } Collection* targetColl = NULL; while ( !sourceIt->isEOF() ) { BSONObj o; { Client::Context srcCtx( source ); o = sourceIt->getNext().obj(); } // Insert and check return status of insert. { Client::Context ctx( target ); if ( !targetColl ) targetColl = ctx.db()->getCollection( target ); // No logOp necessary because the entire renameCollection command is one logOp. Status s = targetColl->insertDocument( o, true ).getStatus(); if ( !s.isOK() ) { insertSuccessful = false; errmsg = s.toString(); break; } } } // If inserts were unsuccessful, drop the target collection and return false. if ( !insertSuccessful ) { Client::Context ctx( target ); Status s = ctx.db()->dropCollection( target ); if ( !s.isOK() ) errmsg = s.toString(); restoreIndexBuildsOnSource( indexesInProg, source ); return false; } // Copy over the indexes to temp storage and then to the target.. vector<BSONObj> copiedIndexes; bool indexSuccessful = true; { Client::Context srcCtx( source ); Collection* sourceColl = srcCtx.db()->getCollection( source ); IndexCatalog::IndexIterator sourceIndIt = sourceColl->getIndexCatalog()->getIndexIterator( true ); while ( sourceIndIt.more() ) { BSONObj currIndex = sourceIndIt.next()->infoObj(); // Process the source index. BSONObjBuilder b; BSONObjIterator i( currIndex ); while( i.moreWithEOO() ) { BSONElement e = i.next(); if ( e.eoo() ) break; else if ( strcmp( e.fieldName(), "ns" ) == 0 ) b.append( "ns", target ); else b.append( e ); } BSONObj newIndex = b.obj(); copiedIndexes.push_back( newIndex ); } } { Client::Context ctx( target ); if ( !targetColl ) targetColl = ctx.db()->getCollection( target ); for ( vector<BSONObj>::iterator it = copiedIndexes.begin(); it != copiedIndexes.end(); ++it ) { Status s = targetColl->getIndexCatalog()->createIndex( *it, true ); if ( !s.isOK() ) { indexSuccessful = false; errmsg = s.toString(); break; } } // If indexes were unsuccessful, drop the target collection and return false. if ( !indexSuccessful ) { Status s = ctx.db()->dropCollection( target ); if ( !s.isOK() ) errmsg = s.toString(); restoreIndexBuildsOnSource( indexesInProg, source ); return false; } } // Drop the source collection. { Client::Context srcCtx( source ); Status s = srcCtx.db()->dropCollection( source ); if ( !s.isOK() ) { errmsg = s.toString(); restoreIndexBuildsOnSource( indexesInProg, source ); return false; } } return true; }
StatusWith<RecordId> Collection::updateDocument(OperationContext* txn, const RecordId& oldLocation, const Snapshotted<BSONObj>& oldDoc, const BSONObj& newDoc, bool enforceQuota, bool indexesAffected, OpDebug* debug, oplogUpdateEntryArgs& args) { { auto status = checkValidation(txn, newDoc); if (!status.isOK()) { if (_validationLevel == STRICT_V) { return status; } // moderate means we have to check the old doc auto oldDocStatus = checkValidation(txn, oldDoc.value()); if (oldDocStatus.isOK()) { // transitioning from good -> bad is not ok return status; } // bad -> bad is ok in moderate mode } } dassert(txn->lockState()->isCollectionLockedForMode(ns().toString(), MODE_IX)); invariant(oldDoc.snapshotId() == txn->recoveryUnit()->getSnapshotId()); if (_needCappedLock) { // X-lock the metadata resource for this capped collection until the end of the WUOW. This // prevents the primary from executing with more concurrency than secondaries. // See SERVER-21646. Lock::ResourceLock{txn->lockState(), ResourceId(RESOURCE_METADATA, _ns.ns()), MODE_X}; } SnapshotId sid = txn->recoveryUnit()->getSnapshotId(); BSONElement oldId = oldDoc.value()["_id"]; if (!oldId.eoo() && (oldId != newDoc["_id"])) return StatusWith<RecordId>( ErrorCodes::InternalError, "in Collection::updateDocument _id mismatch", 13596); // The MMAPv1 storage engine implements capped collections in a way that does not allow records // to grow beyond their original size. If MMAPv1 part of a replicaset with storage engines that // do not have this limitation, replication could result in errors, so it is necessary to set a // uniform rule here. Similarly, it is not sufficient to disallow growing records, because this // happens when secondaries roll back an update shrunk a record. Exactly replicating legacy // MMAPv1 behavior would require padding shrunk documents on all storage engines. Instead forbid // all size changes. const auto oldSize = oldDoc.value().objsize(); if (_recordStore->isCapped() && oldSize != newDoc.objsize()) return {ErrorCodes::CannotGrowDocumentInCappedNamespace, str::stream() << "Cannot change the size of a document in a capped collection: " << oldSize << " != " << newDoc.objsize()}; // At the end of this step, we will have a map of UpdateTickets, one per index, which // represent the index updates needed to be done, based on the changes between oldDoc and // newDoc. OwnedPointerMap<IndexDescriptor*, UpdateTicket> updateTickets; if (indexesAffected) { IndexCatalog::IndexIterator ii = _indexCatalog.getIndexIterator(txn, true); while (ii.more()) { IndexDescriptor* descriptor = ii.next(); IndexCatalogEntry* entry = ii.catalogEntry(descriptor); IndexAccessMethod* iam = ii.accessMethod(descriptor); InsertDeleteOptions options; options.logIfError = false; options.dupsAllowed = !(KeyPattern::isIdKeyPattern(descriptor->keyPattern()) || descriptor->unique()) || repl::getGlobalReplicationCoordinator()->shouldIgnoreUniqueIndex(descriptor); UpdateTicket* updateTicket = new UpdateTicket(); updateTickets.mutableMap()[descriptor] = updateTicket; Status ret = iam->validateUpdate(txn, oldDoc.value(), newDoc, oldLocation, options, updateTicket, entry->getFilterExpression()); if (!ret.isOK()) { return StatusWith<RecordId>(ret); } } } // This can call back into Collection::recordStoreGoingToMove. If that happens, the old // object is removed from all indexes. StatusWith<RecordId> newLocation = _recordStore->updateRecord( txn, oldLocation, newDoc.objdata(), newDoc.objsize(), _enforceQuota(enforceQuota), this); if (!newLocation.isOK()) { return newLocation; } // At this point, the old object may or may not still be indexed, depending on if it was // moved. If the object did move, we need to add the new location to all indexes. if (newLocation.getValue() != oldLocation) { if (debug) { if (debug->nmoved == -1) // default of -1 rather than 0 debug->nmoved = 1; else debug->nmoved += 1; } std::vector<BsonRecord> bsonRecords; BsonRecord bsonRecord = {newLocation.getValue(), &newDoc}; bsonRecords.push_back(bsonRecord); Status s = _indexCatalog.indexRecords(txn, bsonRecords); if (!s.isOK()) return StatusWith<RecordId>(s); invariant(sid == txn->recoveryUnit()->getSnapshotId()); args.ns = ns().ns(); getGlobalServiceContext()->getOpObserver()->onUpdate(txn, args); return newLocation; } // Object did not move. We update each index with each respective UpdateTicket. if (debug) debug->keyUpdates = 0; if (indexesAffected) { IndexCatalog::IndexIterator ii = _indexCatalog.getIndexIterator(txn, true); while (ii.more()) { IndexDescriptor* descriptor = ii.next(); IndexAccessMethod* iam = ii.accessMethod(descriptor); int64_t updatedKeys; Status ret = iam->update(txn, *updateTickets.mutableMap()[descriptor], &updatedKeys); if (!ret.isOK()) return StatusWith<RecordId>(ret); if (debug) debug->keyUpdates += updatedKeys; } } invariant(sid == txn->recoveryUnit()->getSnapshotId()); args.ns = ns().ns(); getGlobalServiceContext()->getOpObserver()->onUpdate(txn, args); return newLocation; }
StatusWith<DiskLoc> Collection::updateDocument( OperationContext* txn, const DiskLoc& oldLocation, const BSONObj& objNew, bool enforceQuota, OpDebug* debug ) { BSONObj objOld = _recordStore->dataFor( txn, oldLocation ).toBson(); if ( objOld.hasElement( "_id" ) ) { BSONElement oldId = objOld["_id"]; BSONElement newId = objNew["_id"]; if ( oldId != newId ) return StatusWith<DiskLoc>( ErrorCodes::InternalError, "in Collection::updateDocument _id mismatch", 13596 ); } if ( ns().coll() == "system.users" ) { // XXX - andy and spencer think this should go away now V2UserDocumentParser parser; Status s = parser.checkValidUserDocument(objNew); if ( !s.isOK() ) return StatusWith<DiskLoc>( s ); } /* duplicate key check. we descend the btree twice - once for this check, and once for the actual inserts, further below. that is suboptimal, but it's pretty complicated to do it the other way without rollbacks... */ OwnedPointerMap<IndexDescriptor*,UpdateTicket> updateTickets; IndexCatalog::IndexIterator ii = _indexCatalog.getIndexIterator( txn, true ); while ( ii.more() ) { IndexDescriptor* descriptor = ii.next(); IndexAccessMethod* iam = _indexCatalog.getIndex( descriptor ); InsertDeleteOptions options; options.logIfError = false; options.dupsAllowed = !(KeyPattern::isIdKeyPattern(descriptor->keyPattern()) || descriptor->unique()) || repl::getGlobalReplicationCoordinator()->shouldIgnoreUniqueIndex(descriptor); UpdateTicket* updateTicket = new UpdateTicket(); updateTickets.mutableMap()[descriptor] = updateTicket; Status ret = iam->validateUpdate(txn, objOld, objNew, oldLocation, options, updateTicket ); if ( !ret.isOK() ) { return StatusWith<DiskLoc>( ret ); } } // this can callback into Collection::recordStoreGoingToMove StatusWith<DiskLoc> newLocation = _recordStore->updateRecord( txn, oldLocation, objNew.objdata(), objNew.objsize(), _enforceQuota( enforceQuota ), this ); if ( !newLocation.isOK() ) { return newLocation; } _infoCache.notifyOfWriteOp(); if ( newLocation.getValue() != oldLocation ) { if ( debug ) { if (debug->nmoved == -1) // default of -1 rather than 0 debug->nmoved = 1; else debug->nmoved += 1; } _indexCatalog.indexRecord(txn, objNew, newLocation.getValue()); return newLocation; } if ( debug ) debug->keyUpdates = 0; ii = _indexCatalog.getIndexIterator( txn, true ); while ( ii.more() ) { IndexDescriptor* descriptor = ii.next(); IndexAccessMethod* iam = _indexCatalog.getIndex( descriptor ); int64_t updatedKeys; Status ret = iam->update(txn, *updateTickets.mutableMap()[descriptor], &updatedKeys); if ( !ret.isOK() ) return StatusWith<DiskLoc>( ret ); if ( debug ) debug->keyUpdates += updatedKeys; } // Broadcast the mutation so that query results stay correct. _cursorCache.invalidateDocument(oldLocation, INVALIDATION_MUTATION); return newLocation; }
Status RecordStoreValidateAdaptor::validate(const RecordId& recordId, const RecordData& record, size_t* dataSize) { BSONObj recordBson = record.toBson(); const Status status = validateBSON( recordBson.objdata(), recordBson.objsize(), Validator<BSONObj>::enabledBSONVersion()); if (status.isOK()) { *dataSize = recordBson.objsize(); } else { return status; } if (!_indexCatalog->haveAnyIndexes()) { return status; } IndexCatalog::IndexIterator i = _indexCatalog->getIndexIterator(_opCtx, false); while (i.more()) { const IndexDescriptor* descriptor = i.next(); const std::string indexNs = descriptor->indexNamespace(); int indexNumber = _indexConsistency->getIndexNumber(indexNs); ValidateResults curRecordResults; const IndexAccessMethod* iam = _indexCatalog->getIndex(descriptor); if (descriptor->isPartial()) { const IndexCatalogEntry* ice = _indexCatalog->getEntry(descriptor); if (!ice->getFilterExpression()->matchesBSON(recordBson)) { (*_indexNsResultsMap)[indexNs] = curRecordResults; continue; } } BSONObjSet documentKeySet = SimpleBSONObjComparator::kInstance.makeBSONObjSet(); BSONObjSet multikeyMetadataKeys = SimpleBSONObjComparator::kInstance.makeBSONObjSet(); MultikeyPaths multikeyPaths; iam->getKeys(recordBson, IndexAccessMethod::GetKeysMode::kEnforceConstraints, &documentKeySet, &multikeyMetadataKeys, &multikeyPaths); if (!descriptor->isMultikey(_opCtx) && iam->shouldMarkIndexAsMultikey(documentKeySet, multikeyMetadataKeys, multikeyPaths)) { std::string msg = str::stream() << "Index " << descriptor->indexName() << " is not multi-key, but a multikey path " << " is present in document " << recordId; curRecordResults.errors.push_back(msg); curRecordResults.valid = false; } for (const auto& key : multikeyMetadataKeys) { _indexConsistency->addMultikeyMetadataPath(makeWildCardMultikeyMetadataKeyString(key), indexNumber); } const auto& pattern = descriptor->keyPattern(); const Ordering ord = Ordering::make(pattern); bool largeKeyDisallowed = isLargeKeyDisallowed(); for (const auto& key : documentKeySet) { if (largeKeyDisallowed && key.objsize() >= static_cast<int64_t>(KeyString::TypeBits::kMaxKeyBytes)) { // Index keys >= 1024 bytes are not indexed. _indexConsistency->addLongIndexKey(indexNumber); continue; } // We want to use the latest version of KeyString here. KeyString ks(KeyString::kLatestVersion, key, ord, recordId); _indexConsistency->addDocKey(ks, indexNumber); } (*_indexNsResultsMap)[indexNs] = curRecordResults; } return status; }
Status MMAPV1Engine::repairDatabase( OperationContext* txn, const std::string& dbName, bool preserveClonedFilesOnFailure, bool backupOriginalFiles ) { // We must hold some form of lock here invariant(txn->lockState()->threadState()); invariant( dbName.find( '.' ) == string::npos ); scoped_ptr<RepairFileDeleter> repairFileDeleter; log() << "repairDatabase " << dbName << endl; BackgroundOperation::assertNoBgOpInProgForDb(dbName); txn->recoveryUnit()->syncDataAndTruncateJournal(); // Must be done before and after repair intmax_t totalSize = dbSize( dbName ); intmax_t freeSize = File::freeSpace(storageGlobalParams.repairpath); if ( freeSize > -1 && freeSize < totalSize ) { return Status( ErrorCodes::OutOfDiskSpace, str::stream() << "Cannot repair database " << dbName << " having size: " << totalSize << " (bytes) because free disk space is: " << freeSize << " (bytes)" ); } txn->checkForInterrupt(); Path reservedPath = uniqueReservedPath( ( preserveClonedFilesOnFailure || backupOriginalFiles ) ? "backup" : "_tmp" ); MONGO_ASSERT_ON_EXCEPTION( boost::filesystem::create_directory( reservedPath ) ); string reservedPathString = reservedPath.string(); if ( !preserveClonedFilesOnFailure ) repairFileDeleter.reset( new RepairFileDeleter( txn, dbName, reservedPathString, reservedPath ) ); { Database* originalDatabase = dbHolder().get(txn, dbName); if (originalDatabase == NULL) { return Status(ErrorCodes::NamespaceNotFound, "database does not exist to repair"); } scoped_ptr<MMAPV1DatabaseCatalogEntry> dbEntry; scoped_ptr<Database> tempDatabase; { dbEntry.reset( new MMAPV1DatabaseCatalogEntry( txn, dbName, reservedPathString, storageGlobalParams.directoryperdb, true ) ); invariant( !dbEntry->exists() ); tempDatabase.reset( new Database( txn, dbName, dbEntry.get() ) ); } map<string,CollectionOptions> namespacesToCopy; { string ns = dbName + ".system.namespaces"; Client::Context ctx(txn, ns ); Collection* coll = originalDatabase->getCollection( txn, ns ); if ( coll ) { scoped_ptr<RecordIterator> it( coll->getIterator( txn, DiskLoc(), false, CollectionScanParams::FORWARD ) ); while ( !it->isEOF() ) { DiskLoc loc = it->getNext(); BSONObj obj = coll->docFor( loc ); string ns = obj["name"].String(); NamespaceString nss( ns ); if ( nss.isSystem() ) { if ( nss.isSystemDotIndexes() ) continue; if ( nss.coll() == "system.namespaces" ) continue; } if ( !nss.isNormal() ) continue; CollectionOptions options; if ( obj["options"].isABSONObj() ) { Status status = options.parse( obj["options"].Obj() ); if ( !status.isOK() ) return status; } namespacesToCopy[ns] = options; } } } for ( map<string,CollectionOptions>::const_iterator i = namespacesToCopy.begin(); i != namespacesToCopy.end(); ++i ) { string ns = i->first; CollectionOptions options = i->second; Collection* tempCollection = NULL; { Client::Context tempContext(txn, ns, tempDatabase ); WriteUnitOfWork wunit(txn); tempCollection = tempDatabase->createCollection(txn, ns, options, true, false); wunit.commit(); } Client::Context readContext(txn, ns, originalDatabase); Collection* originalCollection = originalDatabase->getCollection( txn, ns ); invariant( originalCollection ); // data // TODO SERVER-14812 add a mode that drops duplicates rather than failing MultiIndexBlock indexer(txn, tempCollection ); { vector<BSONObj> indexes; IndexCatalog::IndexIterator ii = originalCollection->getIndexCatalog()->getIndexIterator( false ); while ( ii.more() ) { IndexDescriptor* desc = ii.next(); indexes.push_back( desc->infoObj() ); } Client::Context tempContext(txn, ns, tempDatabase); Status status = indexer.init( indexes ); if ( !status.isOK() ) return status; } scoped_ptr<RecordIterator> iterator( originalCollection->getIterator( txn, DiskLoc(), false, CollectionScanParams::FORWARD )); while ( !iterator->isEOF() ) { DiskLoc loc = iterator->getNext(); invariant( !loc.isNull() ); BSONObj doc = originalCollection->docFor( loc ); Client::Context tempContext(txn, ns, tempDatabase); WriteUnitOfWork wunit(txn); StatusWith<DiskLoc> result = tempCollection->insertDocument(txn, doc, &indexer, false); if ( !result.isOK() ) return result.getStatus(); wunit.commit(); txn->checkForInterrupt(false); } Status status = indexer.doneInserting(); if (!status.isOK()) return status; { Client::Context tempContext(txn, ns, tempDatabase); WriteUnitOfWork wunit(txn); indexer.commit(); wunit.commit(); } } txn->recoveryUnit()->syncDataAndTruncateJournal(); globalStorageEngine->flushAllFiles(true); // need both in case journaling is disabled txn->checkForInterrupt(false); } // at this point if we abort, we don't want to delete new files // as they might be the only copies if ( repairFileDeleter.get() ) repairFileDeleter->success(); dbHolder().close( txn, dbName ); if ( backupOriginalFiles ) { _renameForBackup( dbName, reservedPath ); } else { // first make new directory before deleting data Path newDir = Path(storageGlobalParams.dbpath) / dbName; MONGO_ASSERT_ON_EXCEPTION(boost::filesystem::create_directory(newDir)); // this deletes old files _deleteDataFiles( dbName ); if ( !boost::filesystem::exists(newDir) ) { // we deleted because of directoryperdb // re-create MONGO_ASSERT_ON_EXCEPTION(boost::filesystem::create_directory(newDir)); } } _replaceWithRecovered( dbName, reservedPathString.c_str() ); if ( !backupOriginalFiles ) MONGO_ASSERT_ON_EXCEPTION( boost::filesystem::remove_all( reservedPath ) ); return Status::OK(); }
Status MMAPV1Engine::repairDatabase( OperationContext* txn, const std::string& dbName, bool preserveClonedFilesOnFailure, bool backupOriginalFiles ) { unique_ptr<RepairFileDeleter> repairFileDeleter; // Must be done before and after repair getDur().syncDataAndTruncateJournal(txn); intmax_t totalSize = dbSize( dbName ); intmax_t freeSize = File::freeSpace(storageGlobalParams.repairpath); if ( freeSize > -1 && freeSize < totalSize ) { return Status( ErrorCodes::OutOfDiskSpace, str::stream() << "Cannot repair database " << dbName << " having size: " << totalSize << " (bytes) because free disk space is: " << freeSize << " (bytes)" ); } txn->checkForInterrupt(); Path reservedPath = uniqueReservedPath( ( preserveClonedFilesOnFailure || backupOriginalFiles ) ? "backup" : "_tmp" ); bool created = false; MONGO_ASSERT_ON_EXCEPTION( created = boost::filesystem::create_directory( reservedPath ) ); invariant( created ); string reservedPathString = reservedPath.string(); if ( !preserveClonedFilesOnFailure ) repairFileDeleter.reset( new RepairFileDeleter( txn, dbName, reservedPathString, reservedPath ) ); { Database* originalDatabase = dbHolder().openDb(txn, dbName); if (originalDatabase == NULL) { return Status(ErrorCodes::NamespaceNotFound, "database does not exist to repair"); } unique_ptr<MMAPV1DatabaseCatalogEntry> dbEntry; unique_ptr<Database> tempDatabase; // Must call this before MMAPV1DatabaseCatalogEntry's destructor closes the DB files ON_BLOCK_EXIT(&dur::DurableInterface::syncDataAndTruncateJournal, &getDur(), txn); { dbEntry.reset(new MMAPV1DatabaseCatalogEntry(txn, dbName, reservedPathString, storageGlobalParams.directoryperdb, true)); tempDatabase.reset( new Database(txn, dbName, dbEntry.get())); } map<string,CollectionOptions> namespacesToCopy; { string ns = dbName + ".system.namespaces"; OldClientContext ctx(txn, ns ); Collection* coll = originalDatabase->getCollection( ns ); if ( coll ) { auto cursor = coll->getCursor(txn); while (auto record = cursor->next()) { BSONObj obj = record->data.releaseToBson(); string ns = obj["name"].String(); NamespaceString nss( ns ); if ( nss.isSystem() ) { if ( nss.isSystemDotIndexes() ) continue; if ( nss.coll() == "system.namespaces" ) continue; } if ( !nss.isNormal() ) continue; CollectionOptions options; if ( obj["options"].isABSONObj() ) { Status status = options.parse( obj["options"].Obj() ); if ( !status.isOK() ) return status; } namespacesToCopy[ns] = options; } } } for ( map<string,CollectionOptions>::const_iterator i = namespacesToCopy.begin(); i != namespacesToCopy.end(); ++i ) { string ns = i->first; CollectionOptions options = i->second; Collection* tempCollection = NULL; { WriteUnitOfWork wunit(txn); tempCollection = tempDatabase->createCollection(txn, ns, options, false); wunit.commit(); } OldClientContext readContext(txn, ns, originalDatabase); Collection* originalCollection = originalDatabase->getCollection( ns ); invariant( originalCollection ); // data // TODO SERVER-14812 add a mode that drops duplicates rather than failing MultiIndexBlock indexer(txn, tempCollection ); { vector<BSONObj> indexes; IndexCatalog::IndexIterator ii = originalCollection->getIndexCatalog()->getIndexIterator( txn, false ); while ( ii.more() ) { IndexDescriptor* desc = ii.next(); indexes.push_back( desc->infoObj() ); } Status status = indexer.init( indexes ); if (!status.isOK()) { return status; } } auto cursor = originalCollection->getCursor(txn); while (auto record = cursor->next()) { BSONObj doc = record->data.releaseToBson(); WriteUnitOfWork wunit(txn); StatusWith<RecordId> result = tempCollection->insertDocument(txn, doc, &indexer, false); if ( !result.isOK() ) return result.getStatus(); wunit.commit(); txn->checkForInterrupt(); } Status status = indexer.doneInserting(); if (!status.isOK()) return status; { WriteUnitOfWork wunit(txn); indexer.commit(); wunit.commit(); } } getDur().syncDataAndTruncateJournal(txn); // need both in case journaling is disabled MongoFile::flushAll(true); txn->checkForInterrupt(); } // at this point if we abort, we don't want to delete new files // as they might be the only copies if ( repairFileDeleter.get() ) repairFileDeleter->success(); // Close the database so we can rename/delete the original data files dbHolder().close(txn, dbName); if ( backupOriginalFiles ) { _renameForBackup( dbName, reservedPath ); } else { // first make new directory before deleting data Path newDir = Path(storageGlobalParams.dbpath) / dbName; MONGO_ASSERT_ON_EXCEPTION(boost::filesystem::create_directory(newDir)); // this deletes old files _deleteDataFiles( dbName ); if ( !boost::filesystem::exists(newDir) ) { // we deleted because of directoryperdb // re-create MONGO_ASSERT_ON_EXCEPTION(boost::filesystem::create_directory(newDir)); } } _replaceWithRecovered( dbName, reservedPathString.c_str() ); if (!backupOriginalFiles) { MONGO_ASSERT_ON_EXCEPTION(boost::filesystem::remove_all(reservedPath)); } // Reopen the database so it's discoverable dbHolder().openDb(txn, dbName); return Status::OK(); }
Status appendCollectionStorageStats(OperationContext* opCtx, const NamespaceString& nss, const BSONObj& param, BSONObjBuilder* result) { int scale = 1; if (param["scale"].isNumber()) { scale = param["scale"].numberInt(); if (scale < 1) { return {ErrorCodes::BadValue, "scale has to be >= 1"}; } } else if (param["scale"].trueValue()) { return {ErrorCodes::BadValue, "scale has to be a number >= 1"}; } bool verbose = param["verbose"].trueValue(); AutoGetCollectionForReadCommand ctx(opCtx, nss); Collection* collection = ctx.getCollection(); // Will be set if present if (!ctx.getDb() || !collection) { result->appendNumber("size", 0); result->appendNumber("count", 0); result->appendNumber("storageSize", 0); result->append("nindexes", 0); result->appendNumber("totalIndexSize", 0); result->append("indexDetails", BSONObj()); result->append("indexSizes", BSONObj()); std::string errmsg = !(ctx.getDb()) ? "Database [" + nss.db().toString() + "] not found." : "Collection [" + nss.toString() + "] not found."; return {ErrorCodes::NamespaceNotFound, errmsg}; } long long size = collection->dataSize(opCtx) / scale; result->appendNumber("size", size); long long numRecords = collection->numRecords(opCtx); result->appendNumber("count", numRecords); if (numRecords) result->append("avgObjSize", collection->averageObjectSize(opCtx)); RecordStore* recordStore = collection->getRecordStore(); result->appendNumber( "storageSize", static_cast<long long>(recordStore->storageSize(opCtx, result, verbose ? 1 : 0)) / scale); recordStore->appendCustomStats(opCtx, result, scale); IndexCatalog* indexCatalog = collection->getIndexCatalog(); result->append("nindexes", indexCatalog->numIndexesReady(opCtx)); BSONObjBuilder indexDetails; IndexCatalog::IndexIterator i = indexCatalog->getIndexIterator(opCtx, false); while (i.more()) { const IndexDescriptor* descriptor = i.next(); IndexAccessMethod* iam = indexCatalog->getIndex(descriptor); invariant(iam); BSONObjBuilder bob; if (iam->appendCustomStats(opCtx, &bob, scale)) { indexDetails.append(descriptor->indexName(), bob.obj()); } } result->append("indexDetails", indexDetails.obj()); BSONObjBuilder indexSizes; long long indexSize = collection->getIndexSize(opCtx, &indexSizes, scale); result->appendNumber("totalIndexSize", indexSize / scale); result->append("indexSizes", indexSizes.obj()); return Status::OK(); }
StatusWith<RecordId> Collection::updateDocument(OperationContext* txn, const RecordId& oldLocation, const Snapshotted<BSONObj>& oldDoc, const BSONObj& newDoc, bool enforceQuota, bool indexesAffected, OpDebug* debug, oplogUpdateEntryArgs& args) { { auto status = checkValidation(txn, newDoc); if (!status.isOK()) { if (_validationLevel == STRICT_V) { return status; } // moderate means we have to check the old doc auto oldDocStatus = checkValidation(txn, oldDoc.value()); if (oldDocStatus.isOK()) { // transitioning from good -> bad is not ok return status; } // bad -> bad is ok in moderate mode } } dassert(txn->lockState()->isCollectionLockedForMode(ns().toString(), MODE_IX)); invariant(oldDoc.snapshotId() == txn->recoveryUnit()->getSnapshotId()); SnapshotId sid = txn->recoveryUnit()->getSnapshotId(); BSONElement oldId = oldDoc.value()["_id"]; if (!oldId.eoo() && (oldId != newDoc["_id"])) return StatusWith<RecordId>( ErrorCodes::InternalError, "in Collection::updateDocument _id mismatch", 13596); // At the end of this step, we will have a map of UpdateTickets, one per index, which // represent the index updates needed to be done, based on the changes between oldDoc and // newDoc. OwnedPointerMap<IndexDescriptor*, UpdateTicket> updateTickets; if (indexesAffected) { IndexCatalog::IndexIterator ii = _indexCatalog.getIndexIterator(txn, true); while (ii.more()) { IndexDescriptor* descriptor = ii.next(); IndexCatalogEntry* entry = ii.catalogEntry(descriptor); IndexAccessMethod* iam = ii.accessMethod(descriptor); InsertDeleteOptions options; options.logIfError = false; options.dupsAllowed = !(KeyPattern::isIdKeyPattern(descriptor->keyPattern()) || descriptor->unique()) || repl::getGlobalReplicationCoordinator()->shouldIgnoreUniqueIndex(descriptor); UpdateTicket* updateTicket = new UpdateTicket(); updateTickets.mutableMap()[descriptor] = updateTicket; Status ret = iam->validateUpdate(txn, oldDoc.value(), newDoc, oldLocation, options, updateTicket, entry->getFilterExpression()); if (!ret.isOK()) { return StatusWith<RecordId>(ret); } } } // This can call back into Collection::recordStoreGoingToMove. If that happens, the old // object is removed from all indexes. StatusWith<RecordId> newLocation = _recordStore->updateRecord( txn, oldLocation, newDoc.objdata(), newDoc.objsize(), _enforceQuota(enforceQuota), this); if (!newLocation.isOK()) { return newLocation; } // At this point, the old object may or may not still be indexed, depending on if it was // moved. If the object did move, we need to add the new location to all indexes. if (newLocation.getValue() != oldLocation) { if (debug) { if (debug->nmoved == -1) // default of -1 rather than 0 debug->nmoved = 1; else debug->nmoved += 1; } Status s = _indexCatalog.indexRecord(txn, newDoc, newLocation.getValue()); if (!s.isOK()) return StatusWith<RecordId>(s); invariant(sid == txn->recoveryUnit()->getSnapshotId()); args.ns = ns().ns(); getGlobalServiceContext()->getOpObserver()->onUpdate(txn, args); return newLocation; } // Object did not move. We update each index with each respective UpdateTicket. if (debug) debug->keyUpdates = 0; if (indexesAffected) { IndexCatalog::IndexIterator ii = _indexCatalog.getIndexIterator(txn, true); while (ii.more()) { IndexDescriptor* descriptor = ii.next(); IndexAccessMethod* iam = ii.accessMethod(descriptor); int64_t updatedKeys; Status ret = iam->update(txn, *updateTickets.mutableMap()[descriptor], &updatedKeys); if (!ret.isOK()) return StatusWith<RecordId>(ret); if (debug) debug->keyUpdates += updatedKeys; } } invariant(sid == txn->recoveryUnit()->getSnapshotId()); args.ns = ns().ns(); getGlobalServiceContext()->getOpObserver()->onUpdate(txn, args); return newLocation; }
void fillOutPlannerParams(Collection* collection, CanonicalQuery* canonicalQuery, QueryPlannerParams* plannerParams) { // If it's not NULL, we may have indices. Access the catalog and fill out IndexEntry(s) IndexCatalog::IndexIterator ii = collection->getIndexCatalog()->getIndexIterator(false); while (ii.more()) { const IndexDescriptor* desc = ii.next(); plannerParams->indices.push_back(IndexEntry(desc->keyPattern(), desc->getAccessMethodName(), desc->isMultikey(), desc->isSparse(), desc->indexName(), desc->infoObj())); } // If query supports index filters, filter params.indices by indices in query settings. QuerySettings* querySettings = collection->infoCache()->getQuerySettings(); AllowedIndices* allowedIndicesRaw; // Filter index catalog if index filters are specified for query. // Also, signal to planner that application hint should be ignored. if (querySettings->getAllowedIndices(*canonicalQuery, &allowedIndicesRaw)) { boost::scoped_ptr<AllowedIndices> allowedIndices(allowedIndicesRaw); filterAllowedIndexEntries(*allowedIndices, &plannerParams->indices); plannerParams->indexFiltersApplied = true; } // We will not output collection scans unless there are no indexed solutions. NO_TABLE_SCAN // overrides this behavior by not outputting a collscan even if there are no indexed // solutions. if (storageGlobalParams.noTableScan) { const string& ns = canonicalQuery->ns(); // There are certain cases where we ignore this restriction: bool ignore = canonicalQuery->getQueryObj().isEmpty() || (string::npos != ns.find(".system.")) || (0 == ns.find("local.")); if (!ignore) { plannerParams->options |= QueryPlannerParams::NO_TABLE_SCAN; } } // If the caller wants a shard filter, make sure we're actually sharded. if (plannerParams->options & QueryPlannerParams::INCLUDE_SHARD_FILTER) { CollectionMetadataPtr collMetadata = shardingState.getCollectionMetadata(canonicalQuery->ns()); if (collMetadata) { plannerParams->shardKey = collMetadata->getKeyPattern(); } else { // If there's no metadata don't bother w/the shard filter since we won't know what // the key pattern is anyway... plannerParams->options &= ~QueryPlannerParams::INCLUDE_SHARD_FILTER; } } if (internalQueryPlannerEnableIndexIntersection) { plannerParams->options |= QueryPlannerParams::INDEX_INTERSECTION; } plannerParams->options |= QueryPlannerParams::KEEP_MUTATIONS; plannerParams->options |= QueryPlannerParams::SPLIT_LIMITED_SORT; }
/** * For a given query, get a runner. The runner could be a SingleSolutionRunner, a * CachedQueryRunner, or a MultiPlanRunner, depending on the cache/query solver/etc. */ Status getRunner(Collection* collection, CanonicalQuery* rawCanonicalQuery, Runner** out, size_t plannerOptions) { verify(rawCanonicalQuery); auto_ptr<CanonicalQuery> canonicalQuery(rawCanonicalQuery); // This can happen as we're called by internal clients as well. if (NULL == collection) { const string& ns = canonicalQuery->ns(); *out = new EOFRunner(canonicalQuery.release(), ns); return Status::OK(); } // If we have an _id index we can use the idhack runner. if (canUseIDHack(*canonicalQuery) && collection->getIndexCatalog()->findIdIndex()) { *out = new IDHackRunner(collection, canonicalQuery.release()); return Status::OK(); } // If it's not NULL, we may have indices. Access the catalog and fill out IndexEntry(s) QueryPlannerParams plannerParams; IndexCatalog::IndexIterator ii = collection->getIndexCatalog()->getIndexIterator(false); while (ii.more()) { const IndexDescriptor* desc = ii.next(); plannerParams.indices.push_back(IndexEntry(desc->keyPattern(), desc->isMultikey(), desc->isSparse(), desc->indexName(), desc->infoObj())); } // If query supports index filters, filter params.indices by indices in query settings. QuerySettings* querySettings = collection->infoCache()->getQuerySettings(); AllowedIndices* allowedIndicesRaw; // Filter index catalog if index filters are specified for query. // Also, signal to planner that application hint should be ignored. if (querySettings->getAllowedIndices(*canonicalQuery, &allowedIndicesRaw)) { boost::scoped_ptr<AllowedIndices> allowedIndices(allowedIndicesRaw); filterAllowedIndexEntries(*allowedIndices, &plannerParams.indices); plannerParams.indexFiltersApplied = true; } // Tailable: If the query requests tailable the collection must be capped. if (canonicalQuery->getParsed().hasOption(QueryOption_CursorTailable)) { if (!collection->isCapped()) { return Status(ErrorCodes::BadValue, "error processing query: " + canonicalQuery->toString() + " tailable cursor requested on non capped collection"); } // If a sort is specified it must be equal to expectedSort. const BSONObj expectedSort = BSON("$natural" << 1); const BSONObj& actualSort = canonicalQuery->getParsed().getSort(); if (!actualSort.isEmpty() && !(actualSort == expectedSort)) { return Status(ErrorCodes::BadValue, "error processing query: " + canonicalQuery->toString() + " invalid sort specified for tailable cursor: " + actualSort.toString()); } } // Process the planning options. plannerParams.options = plannerOptions; if (storageGlobalParams.noTableScan) { const string& ns = canonicalQuery->ns(); // There are certain cases where we ignore this restriction: bool ignore = canonicalQuery->getQueryObj().isEmpty() || (string::npos != ns.find(".system.")) || (0 == ns.find("local.")); if (!ignore) { plannerParams.options |= QueryPlannerParams::NO_TABLE_SCAN; } } if (!(plannerParams.options & QueryPlannerParams::NO_TABLE_SCAN)) { plannerParams.options |= QueryPlannerParams::INCLUDE_COLLSCAN; } // If the caller wants a shard filter, make sure we're actually sharded. if (plannerParams.options & QueryPlannerParams::INCLUDE_SHARD_FILTER) { CollectionMetadataPtr collMetadata = shardingState.getCollectionMetadata(canonicalQuery->ns()); if (collMetadata) { plannerParams.shardKey = collMetadata->getKeyPattern(); } else { // If there's no metadata don't bother w/the shard filter since we won't know what // the key pattern is anyway... plannerParams.options &= ~QueryPlannerParams::INCLUDE_SHARD_FILTER; } } // Try to look up a cached solution for the query. // // Skip cache look up for non-cacheable queries. // See PlanCache::shouldCacheQuery() // // TODO: Can the cache have negative data about a solution? CachedSolution* rawCS; if (PlanCache::shouldCacheQuery(*canonicalQuery) && collection->infoCache()->getPlanCache()->get(*canonicalQuery, &rawCS).isOK()) { // We have a CachedSolution. Have the planner turn it into a QuerySolution. boost::scoped_ptr<CachedSolution> cs(rawCS); QuerySolution *qs, *backupQs; Status status = QueryPlanner::planFromCache(*canonicalQuery, plannerParams, *cs, &qs, &backupQs); // See SERVER-12438. Unfortunately we have to defer to the backup solution // if both a batch size is set and a sort is requested. // // TODO: it would be really nice to delete this block in the future. if (status.isOK() && NULL != backupQs && 0 < canonicalQuery->getParsed().getNumToReturn() && !canonicalQuery->getParsed().getSort().isEmpty()) { delete qs; WorkingSet* ws; PlanStage* root; verify(StageBuilder::build(*backupQs, &root, &ws)); // And, run the plan. *out = new SingleSolutionRunner(collection, canonicalQuery.release(), backupQs, root, ws); return Status::OK(); } if (status.isOK()) { if (plannerParams.options & QueryPlannerParams::PRIVATE_IS_COUNT) { if (turnIxscanIntoCount(qs)) { WorkingSet* ws; PlanStage* root; verify(StageBuilder::build(*qs, &root, &ws)); *out = new SingleSolutionRunner(collection, canonicalQuery.release(), qs, root, ws); if (NULL != backupQs) { delete backupQs; } return Status::OK(); } } WorkingSet* ws; PlanStage* root; verify(StageBuilder::build(*qs, &root, &ws)); CachedPlanRunner* cpr = new CachedPlanRunner(collection, canonicalQuery.release(), qs, root, ws); if (NULL != backupQs) { WorkingSet* backupWs; PlanStage* backupRoot; verify(StageBuilder::build(*backupQs, &backupRoot, &backupWs)); cpr->setBackupPlan(backupQs, backupRoot, backupWs); } *out = cpr; return Status::OK(); } } if (enableIndexIntersection) { plannerParams.options |= QueryPlannerParams::INDEX_INTERSECTION; } plannerParams.options |= QueryPlannerParams::KEEP_MUTATIONS; vector<QuerySolution*> solutions; Status status = QueryPlanner::plan(*canonicalQuery, plannerParams, &solutions); if (!status.isOK()) { return Status(ErrorCodes::BadValue, "error processing query: " + canonicalQuery->toString() + " planner returned error: " + status.reason()); } // We cannot figure out how to answer the query. Perhaps it requires an index // we do not have? if (0 == solutions.size()) { return Status(ErrorCodes::BadValue, str::stream() << "error processing query: " << canonicalQuery->toString() << " No query solutions"); } // See if one of our solutions is a fast count hack in disguise. if (plannerParams.options & QueryPlannerParams::PRIVATE_IS_COUNT) { for (size_t i = 0; i < solutions.size(); ++i) { if (turnIxscanIntoCount(solutions[i])) { // Great, we can use solutions[i]. Clean up the other QuerySolution(s). for (size_t j = 0; j < solutions.size(); ++j) { if (j != i) { delete solutions[j]; } } // We're not going to cache anything that's fast count. WorkingSet* ws; PlanStage* root; verify(StageBuilder::build(*solutions[i], &root, &ws)); *out = new SingleSolutionRunner(collection, canonicalQuery.release(), solutions[i], root, ws); return Status::OK(); } } } if (1 == solutions.size()) { // Only one possible plan. Run it. Build the stages from the solution. WorkingSet* ws; PlanStage* root; verify(StageBuilder::build(*solutions[0], &root, &ws)); // And, run the plan. *out = new SingleSolutionRunner(collection, canonicalQuery.release(),solutions[0], root, ws); return Status::OK(); } else { // See SERVER-12438. In an ideal world we should not arbitrarily prefer certain // solutions over others. But unfortunately for historical reasons we are forced // to prefer a solution where the index provides the sort, if the batch size // is set and a sort is requested. Read SERVER-12438 for details, if you dare. // // TODO: it would be really nice to delete this entire block in the future. if (0 < canonicalQuery->getParsed().getNumToReturn() && !canonicalQuery->getParsed().getSort().isEmpty()) { // Look for a solution without a blocking sort stage. for (size_t i = 0; i < solutions.size(); ++i) { if (!solutions[i]->hasSortStage) { WorkingSet* ws; PlanStage* root; verify(StageBuilder::build(*solutions[i], &root, &ws)); // Free unused solutions. for (size_t j = 0; j < solutions.size(); ++j) { if (j != i) { delete solutions[j]; } } // And, run the plan. *out = new SingleSolutionRunner(collection, canonicalQuery.release(), solutions[i], root, ws); return Status::OK(); } } } // Many solutions. Let the MultiPlanRunner pick the best, update the cache, and so on. auto_ptr<MultiPlanRunner> mpr(new MultiPlanRunner(collection,canonicalQuery.release())); for (size_t i = 0; i < solutions.size(); ++i) { WorkingSet* ws; PlanStage* root; if (solutions[i]->cacheData.get()) { solutions[i]->cacheData->indexFilterApplied = plannerParams.indexFiltersApplied; } verify(StageBuilder::build(*solutions[i], &root, &ws)); // Takes ownership of all arguments. mpr->addPlan(solutions[i], root, ws); } *out = mpr.release(); return Status::OK(); } }
Status getRunnerDistinct(Collection* collection, const BSONObj& query, const string& field, Runner** out) { // This should'a been checked by the distinct command. verify(collection); // TODO: check for idhack here? // When can we do a fast distinct hack? // 1. There is a plan with just one leaf and that leaf is an ixscan. // 2. The ixscan indexes the field we're interested in. // 2a: We are correct if the index contains the field but for now we look for prefix. // 3. The query is covered/no fetch. // // We go through normal planning (with limited parameters) to see if we can produce // a soln with the above properties. QueryPlannerParams plannerParams; plannerParams.options = QueryPlannerParams::NO_TABLE_SCAN; IndexCatalog::IndexIterator ii = collection->getIndexCatalog()->getIndexIterator(false); while (ii.more()) { const IndexDescriptor* desc = ii.next(); // The distinct hack can work if any field is in the index but it's not always clear // if it's a win unless it's the first field. if (desc->keyPattern().firstElement().fieldName() == field) { plannerParams.indices.push_back(IndexEntry(desc->keyPattern(), desc->isMultikey(), desc->isSparse(), desc->indexName(), desc->infoObj())); } } // We only care about the field that we're projecting over. Have to drop the _id field // explicitly because those are .find() semantics. // // Applying a projection allows the planner to try to give us covered plans. BSONObj projection; if ("_id" == field) { projection = BSON("_id" << 1); } else { projection = BSON("_id" << 0 << field << 1); } // Apply a projection of the key. Empty BSONObj() is for the sort. CanonicalQuery* cq; Status status = CanonicalQuery::canonicalize(collection->ns().ns(), query, BSONObj(), projection, &cq); if (!status.isOK()) { return status; } // No index has the field we're looking for. Punt to normal planning. if (plannerParams.indices.empty()) { // Takes ownership of cq. return getRunner(cq, out); } // If we're here, we have an index prefixed by the field we're distinct-ing over. // If there's no query, we can just distinct-scan one of the indices. if (query.isEmpty()) { DistinctNode* dn = new DistinctNode(); dn->indexKeyPattern = plannerParams.indices[0].keyPattern; dn->direction = 1; IndexBoundsBuilder::allValuesBounds(dn->indexKeyPattern, &dn->bounds); dn->fieldNo = 0; QueryPlannerParams params; // Takes ownership of 'dn'. QuerySolution* soln = QueryPlannerAnalysis::analyzeDataAccess(*cq, params, dn); verify(soln); WorkingSet* ws; PlanStage* root; verify(StageBuilder::build(*soln, &root, &ws)); *out = new SingleSolutionRunner(collection, cq, soln, root, ws); return Status::OK(); } // See if we can answer the query in a fast-distinct compatible fashion. vector<QuerySolution*> solutions; status = QueryPlanner::plan(*cq, plannerParams, &solutions); if (!status.isOK()) { return getRunner(cq, out); } // XXX: why do we need to do this? planner should prob do this internally cq->root()->resetTag(); // We look for a solution that has an ixscan we can turn into a distinctixscan for (size_t i = 0; i < solutions.size(); ++i) { if (turnIxscanIntoDistinctIxscan(solutions[i], field)) { // Great, we can use solutions[i]. Clean up the other QuerySolution(s). for (size_t j = 0; j < solutions.size(); ++j) { if (j != i) { delete solutions[j]; } } // Build and return the SSR over solutions[i]. WorkingSet* ws; PlanStage* root; verify(StageBuilder::build(*solutions[i], &root, &ws)); *out = new SingleSolutionRunner(collection, cq, solutions[i], root, ws); return Status::OK(); } } // If we're here, the planner made a soln with the restricted index set but we couldn't // translate any of them into a distinct-compatible soln. So, delete the solutions and just // go through normal planning. for (size_t i = 0; i < solutions.size(); ++i) { delete solutions[i]; } return getRunner(cq, out); }
Status repairDatabase( string dbName, bool preserveClonedFilesOnFailure, bool backupOriginalFiles ) { scoped_ptr<RepairFileDeleter> repairFileDeleter; doingRepair dr; dbName = nsToDatabase( dbName ); log() << "repairDatabase " << dbName << endl; invariant( cc().database()->name() == dbName ); invariant( cc().database()->path() == storageGlobalParams.dbpath ); BackgroundOperation::assertNoBgOpInProgForDb(dbName); getDur().syncDataAndTruncateJournal(); // Must be done before and after repair intmax_t totalSize = dbSize( dbName ); intmax_t freeSize = File::freeSpace(storageGlobalParams.repairpath); if ( freeSize > -1 && freeSize < totalSize ) { return Status( ErrorCodes::OutOfDiskSpace, str::stream() << "Cannot repair database " << dbName << " having size: " << totalSize << " (bytes) because free disk space is: " << freeSize << " (bytes)" ); } killCurrentOp.checkForInterrupt(); Path reservedPath = uniqueReservedPath( ( preserveClonedFilesOnFailure || backupOriginalFiles ) ? "backup" : "_tmp" ); MONGO_ASSERT_ON_EXCEPTION( boost::filesystem::create_directory( reservedPath ) ); string reservedPathString = reservedPath.string(); if ( !preserveClonedFilesOnFailure ) repairFileDeleter.reset( new RepairFileDeleter( dbName, reservedPathString, reservedPath ) ); { Database* originalDatabase = dbHolder().get( dbName, storageGlobalParams.dbpath ); if ( originalDatabase == NULL ) return Status( ErrorCodes::NamespaceNotFound, "database does not exist to repair" ); Database* tempDatabase = NULL; { bool justCreated = false; tempDatabase = dbHolderW().getOrCreate( dbName, reservedPathString, justCreated ); invariant( justCreated ); } map<string,CollectionOptions> namespacesToCopy; { string ns = dbName + ".system.namespaces"; Client::Context ctx( ns ); Collection* coll = originalDatabase->getCollection( ns ); if ( coll ) { scoped_ptr<CollectionIterator> it( coll->getIterator( DiskLoc(), false, CollectionScanParams::FORWARD ) ); while ( !it->isEOF() ) { DiskLoc loc = it->getNext(); BSONObj obj = coll->docFor( loc ); string ns = obj["name"].String(); NamespaceString nss( ns ); if ( nss.isSystem() ) { if ( nss.isSystemDotIndexes() ) continue; if ( nss.coll() == "system.namespaces" ) continue; } if ( !nss.isNormal() ) continue; CollectionOptions options; if ( obj["options"].isABSONObj() ) { Status status = options.parse( obj["options"].Obj() ); if ( !status.isOK() ) return status; } namespacesToCopy[ns] = options; } } } for ( map<string,CollectionOptions>::const_iterator i = namespacesToCopy.begin(); i != namespacesToCopy.end(); ++i ) { string ns = i->first; CollectionOptions options = i->second; Collection* tempCollection = NULL; { Client::Context tempContext( ns, tempDatabase ); tempCollection = tempDatabase->createCollection( ns, options, true, false ); } Client::Context readContext( ns, originalDatabase ); Collection* originalCollection = originalDatabase->getCollection( ns ); invariant( originalCollection ); // data MultiIndexBlock indexBlock( tempCollection ); { vector<BSONObj> indexes; IndexCatalog::IndexIterator ii = originalCollection->getIndexCatalog()->getIndexIterator( false ); while ( ii.more() ) { IndexDescriptor* desc = ii.next(); indexes.push_back( desc->infoObj() ); } Client::Context tempContext( ns, tempDatabase ); Status status = indexBlock.init( indexes ); if ( !status.isOK() ) return status; } scoped_ptr<CollectionIterator> iterator( originalCollection->getIterator( DiskLoc(), false, CollectionScanParams::FORWARD ) ); while ( !iterator->isEOF() ) { DiskLoc loc = iterator->getNext(); invariant( !loc.isNull() ); BSONObj doc = originalCollection->docFor( loc ); Client::Context tempContext( ns, tempDatabase ); StatusWith<DiskLoc> result = tempCollection->insertDocument( doc, indexBlock ); if ( !result.isOK() ) return result.getStatus(); getDur().commitIfNeeded(); killCurrentOp.checkForInterrupt(false); } { Client::Context tempContext( ns, tempDatabase ); Status status = indexBlock.commit(); if ( !status.isOK() ) return status; } } getDur().syncDataAndTruncateJournal(); MongoFile::flushAll(true); // need both in case journaling is disabled killCurrentOp.checkForInterrupt(false); Client::Context tempContext( dbName, reservedPathString ); Database::closeDatabase( dbName, reservedPathString ); } // at this point if we abort, we don't want to delete new files // as they might be the only copies if ( repairFileDeleter.get() ) repairFileDeleter->success(); Client::Context ctx( dbName ); Database::closeDatabase(dbName, storageGlobalParams.dbpath); if ( backupOriginalFiles ) { _renameForBackup( dbName, reservedPath ); } else { // first make new directory before deleting data Path newDir = Path(storageGlobalParams.dbpath) / dbName; MONGO_ASSERT_ON_EXCEPTION(boost::filesystem::create_directory(newDir)); // this deletes old files _deleteDataFiles( dbName ); if ( !boost::filesystem::exists(newDir) ) { // we deleted because of directoryperdb // re-create MONGO_ASSERT_ON_EXCEPTION(boost::filesystem::create_directory(newDir)); } } _replaceWithRecovered( dbName, reservedPathString.c_str() ); if ( !backupOriginalFiles ) MONGO_ASSERT_ON_EXCEPTION( boost::filesystem::remove_all( reservedPath ) ); return Status::OK(); }
virtual bool run(OperationContext* txn, const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { Lock::GlobalWrite globalWriteLock(txn->lockState()); string source = cmdObj.getStringField( name.c_str() ); string target = cmdObj.getStringField( "to" ); // We stay in source context the whole time. This is mostly to set the CurOp namespace. Client::Context ctx(txn, source); if ( !NamespaceString::validCollectionComponent(target.c_str()) ) { errmsg = "invalid collection name: " + target; return false; } if ( source.empty() || target.empty() ) { errmsg = "invalid command syntax"; return false; } if (!fromRepl) { // If it got through on the master, need to allow it here too Status sourceStatus = userAllowedWriteNS(source); if (!sourceStatus.isOK()) { errmsg = "error with source namespace: " + sourceStatus.reason(); return false; } Status targetStatus = userAllowedWriteNS(target); if (!targetStatus.isOK()) { errmsg = "error with target namespace: " + targetStatus.reason(); return false; } } if (NamespaceString(source).coll() == "system.indexes" || NamespaceString(target).coll() == "system.indexes") { errmsg = "renaming system.indexes is not allowed"; return false; } Database* const sourceDB = dbHolder().get(txn, nsToDatabase(source)); Collection* const sourceColl = sourceDB ? sourceDB->getCollection(txn, source) : NULL; if (!sourceColl) { errmsg = "source namespace does not exist"; return false; } { // Ensure that collection name does not exceed maximum length. // Ensure that index names do not push the length over the max. // Iterator includes unfinished indexes. IndexCatalog::IndexIterator sourceIndIt = sourceColl->getIndexCatalog()->getIndexIterator( txn, true ); int longestIndexNameLength = 0; while ( sourceIndIt.more() ) { int thisLength = sourceIndIt.next()->indexName().length(); if ( thisLength > longestIndexNameLength ) longestIndexNameLength = thisLength; } unsigned int longestAllowed = min(int(NamespaceString::MaxNsCollectionLen), int(NamespaceString::MaxNsLen) - 2/*strlen(".$")*/ - longestIndexNameLength); if (target.size() > longestAllowed) { StringBuilder sb; sb << "collection name length of " << target.size() << " exceeds maximum length of " << longestAllowed << ", allowing for index names"; errmsg = sb.str(); return false; } } const std::vector<BSONObj> indexesInProg = stopIndexBuilds(txn, sourceDB, cmdObj); // Dismissed on success ScopeGuard indexBuildRestorer = MakeGuard(IndexBuilder::restoreIndexes, indexesInProg); Database* const targetDB = dbHolder().openDb(txn, nsToDatabase(target)); { WriteUnitOfWork wunit(txn); // Check if the target namespace exists and if dropTarget is true. // If target exists and dropTarget is not true, return false. if (targetDB->getCollection(txn, target)) { if (!cmdObj["dropTarget"].trueValue()) { errmsg = "target namespace exists"; return false; } Status s = targetDB->dropCollection(txn, target); if ( !s.isOK() ) { errmsg = s.toString(); return false; } } // If we are renaming in the same database, just // rename the namespace and we're done. if (sourceDB == targetDB) { Status s = targetDB->renameCollection(txn, source, target, cmdObj["stayTemp"].trueValue() ); if (!s.isOK()) { return appendCommandStatus(result, s); } if (!fromRepl) { repl::logOp(txn, "c", (dbname + ".$cmd").c_str(), cmdObj); } wunit.commit(); indexBuildRestorer.Dismiss(); return true; } wunit.commit(); } // If we get here, we are renaming across databases, so we must copy all the data and // indexes, then remove the source collection. // Create the target collection. It will be removed if we fail to copy the collection. // TODO use a temp collection and unset the temp flag on success. Collection* targetColl = NULL; { CollectionOptions options; options.setNoIdIndex(); if (sourceColl->isCapped()) { const CollectionOptions sourceOpts = sourceColl->getCatalogEntry()->getCollectionOptions(txn); options.capped = true; options.cappedSize = sourceOpts.cappedSize; options.cappedMaxDocs = sourceOpts.cappedMaxDocs; } WriteUnitOfWork wunit(txn); // No logOp necessary because the entire renameCollection command is one logOp. targetColl = targetDB->createCollection(txn, target, options); if (!targetColl) { errmsg = "Failed to create target collection."; return false; } wunit.commit(); } // Dismissed on success ScopeGuard targetCollectionDropper = MakeGuard(dropCollection, txn, targetDB, target); MultiIndexBlock indexer(txn, targetColl); indexer.allowInterruption(); // Copy the index descriptions from the source collection, adjusting the ns field. { std::vector<BSONObj> indexesToCopy; IndexCatalog::IndexIterator sourceIndIt = sourceColl->getIndexCatalog()->getIndexIterator( txn, true ); while (sourceIndIt.more()) { const BSONObj currIndex = sourceIndIt.next()->infoObj(); // Process the source index. BSONObjBuilder newIndex; newIndex.append("ns", target); newIndex.appendElementsUnique(currIndex); indexesToCopy.push_back(newIndex.obj()); } indexer.init(indexesToCopy); } { // Copy over all the data from source collection to target collection. boost::scoped_ptr<RecordIterator> sourceIt(sourceColl->getIterator(txn)); while (!sourceIt->isEOF()) { txn->checkForInterrupt(false); const BSONObj obj = sourceColl->docFor(txn, sourceIt->getNext()); WriteUnitOfWork wunit(txn); // No logOp necessary because the entire renameCollection command is one logOp. Status status = targetColl->insertDocument(txn, obj, &indexer, true).getStatus(); if (!status.isOK()) return appendCommandStatus(result, status); wunit.commit(); } } Status status = indexer.doneInserting(); if (!status.isOK()) return appendCommandStatus(result, status); { // Getting here means we successfully built the target copy. We now remove the // source collection and finalize the rename. WriteUnitOfWork wunit(txn); Status status = sourceDB->dropCollection(txn, source); if (!status.isOK()) return appendCommandStatus(result, status); indexer.commit(); if (!fromRepl) { repl::logOp(txn, "c", (dbname + ".$cmd").c_str(), cmdObj); } wunit.commit(); } indexBuildRestorer.Dismiss(); targetCollectionDropper.Dismiss(); return true; }
Status getRunnerDistinct(Collection* collection, const BSONObj& query, const string& field, Runner** out) { // This should'a been checked by the distinct command. verify(collection); // TODO: check for idhack here? // When can we do a fast distinct hack? // 1. There is a plan with just one leaf and that leaf is an ixscan. // 2. The ixscan indexes the field we're interested in. // 2a: We are correct if the index contains the field but for now we look for prefix. // 3. The query is covered/no fetch. // // We go through normal planning (with limited parameters) to see if we can produce // a soln with the above properties. QueryPlannerParams plannerParams; plannerParams.options = QueryPlannerParams::NO_TABLE_SCAN; IndexCatalog::IndexIterator ii = collection->getIndexCatalog()->getIndexIterator(false); while (ii.more()) { const IndexDescriptor* desc = ii.next(); // The distinct hack can work if any field is in the index but it's not always clear // if it's a win unless it's the first field. if (desc->keyPattern().firstElement().fieldName() == field) { plannerParams.indices.push_back(IndexEntry(desc->keyPattern(), desc->getAccessMethodName(), desc->isMultikey(), desc->isSparse(), desc->indexName(), desc->infoObj())); } } // If there are no suitable indices for the distinct hack bail out now into regular planning // with no projection. if (plannerParams.indices.empty()) { CanonicalQuery* cq; Status status = CanonicalQuery::canonicalize(collection->ns().ns(), query, BSONObj(), BSONObj(), &cq); if (!status.isOK()) { return status; } // Takes ownership of cq. return getRunner(collection, cq, out); } // // If we're here, we have an index prefixed by the field we're distinct-ing over. // // Applying a projection allows the planner to try to give us covered plans that we can turn // into the projection hack. getDistinctProjection deals with .find() projection semantics // (ie _id:1 being implied by default). BSONObj projection = getDistinctProjection(field); // Apply a projection of the key. Empty BSONObj() is for the sort. CanonicalQuery* cq; Status status = CanonicalQuery::canonicalize(collection->ns().ns(), query, BSONObj(), projection, &cq); if (!status.isOK()) { return status; } // If there's no query, we can just distinct-scan one of the indices. // Not every index in plannerParams.indices may be suitable. Refer to // getDistinctNodeIndex(). size_t distinctNodeIndex = 0; if (query.isEmpty() && getDistinctNodeIndex(plannerParams.indices, field, &distinctNodeIndex)) { DistinctNode* dn = new DistinctNode(); dn->indexKeyPattern = plannerParams.indices[distinctNodeIndex].keyPattern; dn->direction = 1; IndexBoundsBuilder::allValuesBounds(dn->indexKeyPattern, &dn->bounds); dn->fieldNo = 0; QueryPlannerParams params; // Takes ownership of 'dn'. QuerySolution* soln = QueryPlannerAnalysis::analyzeDataAccess(*cq, params, dn); verify(soln); LOG(2) << "Using fast distinct: " << cq->toStringShort() << ", planSummary: " << getPlanSummary(*soln); WorkingSet* ws; PlanStage* root; verify(StageBuilder::build(collection, *soln, &root, &ws)); *out = new SingleSolutionRunner(collection, cq, soln, root, ws); return Status::OK(); } // See if we can answer the query in a fast-distinct compatible fashion. vector<QuerySolution*> solutions; status = QueryPlanner::plan(*cq, plannerParams, &solutions); if (!status.isOK()) { return getRunner(collection, cq, out); } // We look for a solution that has an ixscan we can turn into a distinctixscan for (size_t i = 0; i < solutions.size(); ++i) { if (turnIxscanIntoDistinctIxscan(solutions[i], field)) { // Great, we can use solutions[i]. Clean up the other QuerySolution(s). for (size_t j = 0; j < solutions.size(); ++j) { if (j != i) { delete solutions[j]; } } LOG(2) << "Using fast distinct: " << cq->toStringShort() << ", planSummary: " << getPlanSummary(*solutions[i]); // Build and return the SSR over solutions[i]. WorkingSet* ws; PlanStage* root; verify(StageBuilder::build(collection, *solutions[i], &root, &ws)); *out = new SingleSolutionRunner(collection, cq, solutions[i], root, ws); return Status::OK(); } } // If we're here, the planner made a soln with the restricted index set but we couldn't // translate any of them into a distinct-compatible soln. So, delete the solutions and just // go through normal planning. for (size_t i = 0; i < solutions.size(); ++i) { delete solutions[i]; } // We drop the projection from the 'cq'. Unfortunately this is not trivial. delete cq; status = CanonicalQuery::canonicalize(collection->ns().ns(), query, BSONObj(), BSONObj(), &cq); if (!status.isOK()) { return status; } // Takes ownership of cq. return getRunner(collection, cq, out); }