Status Collection::validate(OperationContext* txn, bool full, bool scanData, ValidateResults* results, BSONObjBuilder* output) { dassert(txn->lockState()->isCollectionLockedForMode(ns().toString(), MODE_IS)); MyValidateAdaptor adaptor; Status status = _recordStore->validate(txn, full, scanData, &adaptor, results, output); if (!status.isOK()) return status; { // indexes output->append("nIndexes", _indexCatalog.numIndexesReady(txn)); int idxn = 0; try { // Only applicable when 'full' validation is requested. std::unique_ptr<BSONObjBuilder> indexDetails(full ? new BSONObjBuilder() : NULL); BSONObjBuilder indexes; // not using subObjStart to be exception safe IndexCatalog::IndexIterator i = _indexCatalog.getIndexIterator(txn, false); while (i.more()) { const IndexDescriptor* descriptor = i.next(); log(LogComponent::kIndex) << "validating index " << descriptor->indexNamespace() << endl; IndexAccessMethod* iam = _indexCatalog.getIndex(descriptor); invariant(iam); std::unique_ptr<BSONObjBuilder> bob( indexDetails.get() ? new BSONObjBuilder(indexDetails->subobjStart( descriptor->indexNamespace())) : NULL); int64_t keys; iam->validate(txn, full, &keys, bob.get()); indexes.appendNumber(descriptor->indexNamespace(), static_cast<long long>(keys)); if (bob) { BSONObj obj = bob->done(); BSONElement valid = obj["valid"]; if (valid.ok() && !valid.trueValue()) { results->valid = false; } } idxn++; } output->append("keysPerIndex", indexes.done()); if (indexDetails.get()) { output->append("indexDetails", indexDetails->done()); } } catch (DBException& exc) { string err = str::stream() << "exception during index validate idxn " << BSONObjBuilder::numStr(idxn) << ": " << exc.toString(); results->errors.push_back(err); results->valid = false; } } return Status::OK(); }
virtual bool run(OperationContext* txn, const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { Lock::GlobalWrite globalWriteLock(txn->lockState()); string source = cmdObj.getStringField( name.c_str() ); string target = cmdObj.getStringField( "to" ); // We stay in source context the whole time. This is mostly to set the CurOp namespace. Client::Context ctx(txn, source); if ( !NamespaceString::validCollectionComponent(target.c_str()) ) { errmsg = "invalid collection name: " + target; return false; } if ( source.empty() || target.empty() ) { errmsg = "invalid command syntax"; return false; } if (!fromRepl) { // If it got through on the master, need to allow it here too Status sourceStatus = userAllowedWriteNS(source); if (!sourceStatus.isOK()) { errmsg = "error with source namespace: " + sourceStatus.reason(); return false; } Status targetStatus = userAllowedWriteNS(target); if (!targetStatus.isOK()) { errmsg = "error with target namespace: " + targetStatus.reason(); return false; } } if (NamespaceString(source).coll() == "system.indexes" || NamespaceString(target).coll() == "system.indexes") { errmsg = "renaming system.indexes is not allowed"; return false; } Database* const sourceDB = dbHolder().get(txn, nsToDatabase(source)); Collection* const sourceColl = sourceDB ? sourceDB->getCollection(txn, source) : NULL; if (!sourceColl) { errmsg = "source namespace does not exist"; return false; } { // Ensure that collection name does not exceed maximum length. // Ensure that index names do not push the length over the max. // Iterator includes unfinished indexes. IndexCatalog::IndexIterator sourceIndIt = sourceColl->getIndexCatalog()->getIndexIterator( txn, true ); int longestIndexNameLength = 0; while ( sourceIndIt.more() ) { int thisLength = sourceIndIt.next()->indexName().length(); if ( thisLength > longestIndexNameLength ) longestIndexNameLength = thisLength; } unsigned int longestAllowed = min(int(NamespaceString::MaxNsCollectionLen), int(NamespaceString::MaxNsLen) - 2/*strlen(".$")*/ - longestIndexNameLength); if (target.size() > longestAllowed) { StringBuilder sb; sb << "collection name length of " << target.size() << " exceeds maximum length of " << longestAllowed << ", allowing for index names"; errmsg = sb.str(); return false; } } const std::vector<BSONObj> indexesInProg = stopIndexBuilds(txn, sourceDB, cmdObj); // Dismissed on success ScopeGuard indexBuildRestorer = MakeGuard(IndexBuilder::restoreIndexes, indexesInProg); Database* const targetDB = dbHolder().openDb(txn, nsToDatabase(target)); { WriteUnitOfWork wunit(txn); // Check if the target namespace exists and if dropTarget is true. // If target exists and dropTarget is not true, return false. if (targetDB->getCollection(txn, target)) { if (!cmdObj["dropTarget"].trueValue()) { errmsg = "target namespace exists"; return false; } Status s = targetDB->dropCollection(txn, target); if ( !s.isOK() ) { errmsg = s.toString(); return false; } } // If we are renaming in the same database, just // rename the namespace and we're done. if (sourceDB == targetDB) { Status s = targetDB->renameCollection(txn, source, target, cmdObj["stayTemp"].trueValue() ); if (!s.isOK()) { return appendCommandStatus(result, s); } if (!fromRepl) { repl::logOp(txn, "c", (dbname + ".$cmd").c_str(), cmdObj); } wunit.commit(); indexBuildRestorer.Dismiss(); return true; } wunit.commit(); } // If we get here, we are renaming across databases, so we must copy all the data and // indexes, then remove the source collection. // Create the target collection. It will be removed if we fail to copy the collection. // TODO use a temp collection and unset the temp flag on success. Collection* targetColl = NULL; { CollectionOptions options; options.setNoIdIndex(); if (sourceColl->isCapped()) { const CollectionOptions sourceOpts = sourceColl->getCatalogEntry()->getCollectionOptions(txn); options.capped = true; options.cappedSize = sourceOpts.cappedSize; options.cappedMaxDocs = sourceOpts.cappedMaxDocs; } WriteUnitOfWork wunit(txn); // No logOp necessary because the entire renameCollection command is one logOp. targetColl = targetDB->createCollection(txn, target, options); if (!targetColl) { errmsg = "Failed to create target collection."; return false; } wunit.commit(); } // Dismissed on success ScopeGuard targetCollectionDropper = MakeGuard(dropCollection, txn, targetDB, target); MultiIndexBlock indexer(txn, targetColl); indexer.allowInterruption(); // Copy the index descriptions from the source collection, adjusting the ns field. { std::vector<BSONObj> indexesToCopy; IndexCatalog::IndexIterator sourceIndIt = sourceColl->getIndexCatalog()->getIndexIterator( txn, true ); while (sourceIndIt.more()) { const BSONObj currIndex = sourceIndIt.next()->infoObj(); // Process the source index. BSONObjBuilder newIndex; newIndex.append("ns", target); newIndex.appendElementsUnique(currIndex); indexesToCopy.push_back(newIndex.obj()); } indexer.init(indexesToCopy); } { // Copy over all the data from source collection to target collection. boost::scoped_ptr<RecordIterator> sourceIt(sourceColl->getIterator(txn)); while (!sourceIt->isEOF()) { txn->checkForInterrupt(false); const BSONObj obj = sourceColl->docFor(txn, sourceIt->getNext()); WriteUnitOfWork wunit(txn); // No logOp necessary because the entire renameCollection command is one logOp. Status status = targetColl->insertDocument(txn, obj, &indexer, true).getStatus(); if (!status.isOK()) return appendCommandStatus(result, status); wunit.commit(); } } Status status = indexer.doneInserting(); if (!status.isOK()) return appendCommandStatus(result, status); { // Getting here means we successfully built the target copy. We now remove the // source collection and finalize the rename. WriteUnitOfWork wunit(txn); Status status = sourceDB->dropCollection(txn, source); if (!status.isOK()) return appendCommandStatus(result, status); indexer.commit(); if (!fromRepl) { repl::logOp(txn, "c", (dbname + ".$cmd").c_str(), cmdObj); } wunit.commit(); } indexBuildRestorer.Dismiss(); targetCollectionDropper.Dismiss(); return true; }
StatusWith<DiskLoc> Collection::updateDocument( OperationContext* txn, const DiskLoc& oldLocation, const BSONObj& objNew, bool enforceQuota, OpDebug* debug ) { BSONObj objOld = _recordStore->dataFor( txn, oldLocation ).toBson(); if ( objOld.hasElement( "_id" ) ) { BSONElement oldId = objOld["_id"]; BSONElement newId = objNew["_id"]; if ( oldId != newId ) return StatusWith<DiskLoc>( ErrorCodes::InternalError, "in Collection::updateDocument _id mismatch", 13596 ); } if ( ns().coll() == "system.users" ) { // XXX - andy and spencer think this should go away now V2UserDocumentParser parser; Status s = parser.checkValidUserDocument(objNew); if ( !s.isOK() ) return StatusWith<DiskLoc>( s ); } /* duplicate key check. we descend the btree twice - once for this check, and once for the actual inserts, further below. that is suboptimal, but it's pretty complicated to do it the other way without rollbacks... */ OwnedPointerMap<IndexDescriptor*,UpdateTicket> updateTickets; IndexCatalog::IndexIterator ii = _indexCatalog.getIndexIterator( txn, true ); while ( ii.more() ) { IndexDescriptor* descriptor = ii.next(); IndexAccessMethod* iam = _indexCatalog.getIndex( descriptor ); InsertDeleteOptions options; options.logIfError = false; options.dupsAllowed = !(KeyPattern::isIdKeyPattern(descriptor->keyPattern()) || descriptor->unique()) || repl::getGlobalReplicationCoordinator()->shouldIgnoreUniqueIndex(descriptor); UpdateTicket* updateTicket = new UpdateTicket(); updateTickets.mutableMap()[descriptor] = updateTicket; Status ret = iam->validateUpdate(txn, objOld, objNew, oldLocation, options, updateTicket ); if ( !ret.isOK() ) { return StatusWith<DiskLoc>( ret ); } } // this can callback into Collection::recordStoreGoingToMove StatusWith<DiskLoc> newLocation = _recordStore->updateRecord( txn, oldLocation, objNew.objdata(), objNew.objsize(), _enforceQuota( enforceQuota ), this ); if ( !newLocation.isOK() ) { return newLocation; } _infoCache.notifyOfWriteOp(); if ( newLocation.getValue() != oldLocation ) { if ( debug ) { if (debug->nmoved == -1) // default of -1 rather than 0 debug->nmoved = 1; else debug->nmoved += 1; } _indexCatalog.indexRecord(txn, objNew, newLocation.getValue()); return newLocation; } if ( debug ) debug->keyUpdates = 0; ii = _indexCatalog.getIndexIterator( txn, true ); while ( ii.more() ) { IndexDescriptor* descriptor = ii.next(); IndexAccessMethod* iam = _indexCatalog.getIndex( descriptor ); int64_t updatedKeys; Status ret = iam->update(txn, *updateTickets.mutableMap()[descriptor], &updatedKeys); if ( !ret.isOK() ) return StatusWith<DiskLoc>( ret ); if ( debug ) debug->keyUpdates += updatedKeys; } // Broadcast the mutation so that query results stay correct. _cursorCache.invalidateDocument(oldLocation, INVALIDATION_MUTATION); return newLocation; }
StatusWith<RecordId> Collection::updateDocument(OperationContext* txn, const RecordId& oldLocation, const Snapshotted<BSONObj>& oldDoc, const BSONObj& newDoc, bool enforceQuota, bool indexesAffected, OpDebug* debug, oplogUpdateEntryArgs& args) { { auto status = checkValidation(txn, newDoc); if (!status.isOK()) { if (_validationLevel == STRICT_V) { return status; } // moderate means we have to check the old doc auto oldDocStatus = checkValidation(txn, oldDoc.value()); if (oldDocStatus.isOK()) { // transitioning from good -> bad is not ok return status; } // bad -> bad is ok in moderate mode } } dassert(txn->lockState()->isCollectionLockedForMode(ns().toString(), MODE_IX)); invariant(oldDoc.snapshotId() == txn->recoveryUnit()->getSnapshotId()); SnapshotId sid = txn->recoveryUnit()->getSnapshotId(); BSONElement oldId = oldDoc.value()["_id"]; if (!oldId.eoo() && (oldId != newDoc["_id"])) return StatusWith<RecordId>( ErrorCodes::InternalError, "in Collection::updateDocument _id mismatch", 13596); // At the end of this step, we will have a map of UpdateTickets, one per index, which // represent the index updates needed to be done, based on the changes between oldDoc and // newDoc. OwnedPointerMap<IndexDescriptor*, UpdateTicket> updateTickets; if (indexesAffected) { IndexCatalog::IndexIterator ii = _indexCatalog.getIndexIterator(txn, true); while (ii.more()) { IndexDescriptor* descriptor = ii.next(); IndexCatalogEntry* entry = ii.catalogEntry(descriptor); IndexAccessMethod* iam = ii.accessMethod(descriptor); InsertDeleteOptions options; options.logIfError = false; options.dupsAllowed = !(KeyPattern::isIdKeyPattern(descriptor->keyPattern()) || descriptor->unique()) || repl::getGlobalReplicationCoordinator()->shouldIgnoreUniqueIndex(descriptor); UpdateTicket* updateTicket = new UpdateTicket(); updateTickets.mutableMap()[descriptor] = updateTicket; Status ret = iam->validateUpdate(txn, oldDoc.value(), newDoc, oldLocation, options, updateTicket, entry->getFilterExpression()); if (!ret.isOK()) { return StatusWith<RecordId>(ret); } } } // This can call back into Collection::recordStoreGoingToMove. If that happens, the old // object is removed from all indexes. StatusWith<RecordId> newLocation = _recordStore->updateRecord( txn, oldLocation, newDoc.objdata(), newDoc.objsize(), _enforceQuota(enforceQuota), this); if (!newLocation.isOK()) { return newLocation; } // At this point, the old object may or may not still be indexed, depending on if it was // moved. If the object did move, we need to add the new location to all indexes. if (newLocation.getValue() != oldLocation) { if (debug) { if (debug->nmoved == -1) // default of -1 rather than 0 debug->nmoved = 1; else debug->nmoved += 1; } Status s = _indexCatalog.indexRecord(txn, newDoc, newLocation.getValue()); if (!s.isOK()) return StatusWith<RecordId>(s); invariant(sid == txn->recoveryUnit()->getSnapshotId()); args.ns = ns().ns(); getGlobalServiceContext()->getOpObserver()->onUpdate(txn, args); return newLocation; } // Object did not move. We update each index with each respective UpdateTicket. if (debug) debug->keyUpdates = 0; if (indexesAffected) { IndexCatalog::IndexIterator ii = _indexCatalog.getIndexIterator(txn, true); while (ii.more()) { IndexDescriptor* descriptor = ii.next(); IndexAccessMethod* iam = ii.accessMethod(descriptor); int64_t updatedKeys; Status ret = iam->update(txn, *updateTickets.mutableMap()[descriptor], &updatedKeys); if (!ret.isOK()) return StatusWith<RecordId>(ret); if (debug) debug->keyUpdates += updatedKeys; } } invariant(sid == txn->recoveryUnit()->getSnapshotId()); args.ns = ns().ns(); getGlobalServiceContext()->getOpObserver()->onUpdate(txn, args); return newLocation; }
void fillOutPlannerParams(Collection* collection, CanonicalQuery* canonicalQuery, QueryPlannerParams* plannerParams) { // If it's not NULL, we may have indices. Access the catalog and fill out IndexEntry(s) IndexCatalog::IndexIterator ii = collection->getIndexCatalog()->getIndexIterator(false); while (ii.more()) { const IndexDescriptor* desc = ii.next(); plannerParams->indices.push_back(IndexEntry(desc->keyPattern(), desc->getAccessMethodName(), desc->isMultikey(), desc->isSparse(), desc->indexName(), desc->infoObj())); } // If query supports index filters, filter params.indices by indices in query settings. QuerySettings* querySettings = collection->infoCache()->getQuerySettings(); AllowedIndices* allowedIndicesRaw; // Filter index catalog if index filters are specified for query. // Also, signal to planner that application hint should be ignored. if (querySettings->getAllowedIndices(*canonicalQuery, &allowedIndicesRaw)) { boost::scoped_ptr<AllowedIndices> allowedIndices(allowedIndicesRaw); filterAllowedIndexEntries(*allowedIndices, &plannerParams->indices); plannerParams->indexFiltersApplied = true; } // We will not output collection scans unless there are no indexed solutions. NO_TABLE_SCAN // overrides this behavior by not outputting a collscan even if there are no indexed // solutions. if (storageGlobalParams.noTableScan) { const string& ns = canonicalQuery->ns(); // There are certain cases where we ignore this restriction: bool ignore = canonicalQuery->getQueryObj().isEmpty() || (string::npos != ns.find(".system.")) || (0 == ns.find("local.")); if (!ignore) { plannerParams->options |= QueryPlannerParams::NO_TABLE_SCAN; } } // If the caller wants a shard filter, make sure we're actually sharded. if (plannerParams->options & QueryPlannerParams::INCLUDE_SHARD_FILTER) { CollectionMetadataPtr collMetadata = shardingState.getCollectionMetadata(canonicalQuery->ns()); if (collMetadata) { plannerParams->shardKey = collMetadata->getKeyPattern(); } else { // If there's no metadata don't bother w/the shard filter since we won't know what // the key pattern is anyway... plannerParams->options &= ~QueryPlannerParams::INCLUDE_SHARD_FILTER; } } if (internalQueryPlannerEnableIndexIntersection) { plannerParams->options |= QueryPlannerParams::INDEX_INTERSECTION; } plannerParams->options |= QueryPlannerParams::KEEP_MUTATIONS; plannerParams->options |= QueryPlannerParams::SPLIT_LIMITED_SORT; }
Status getRunnerDistinct(Collection* collection, const BSONObj& query, const string& field, Runner** out) { // This should'a been checked by the distinct command. verify(collection); // TODO: check for idhack here? // When can we do a fast distinct hack? // 1. There is a plan with just one leaf and that leaf is an ixscan. // 2. The ixscan indexes the field we're interested in. // 2a: We are correct if the index contains the field but for now we look for prefix. // 3. The query is covered/no fetch. // // We go through normal planning (with limited parameters) to see if we can produce // a soln with the above properties. QueryPlannerParams plannerParams; plannerParams.options = QueryPlannerParams::NO_TABLE_SCAN; IndexCatalog::IndexIterator ii = collection->getIndexCatalog()->getIndexIterator(false); while (ii.more()) { const IndexDescriptor* desc = ii.next(); // The distinct hack can work if any field is in the index but it's not always clear // if it's a win unless it's the first field. if (desc->keyPattern().firstElement().fieldName() == field) { plannerParams.indices.push_back(IndexEntry(desc->keyPattern(), desc->getAccessMethodName(), desc->isMultikey(), desc->isSparse(), desc->indexName(), desc->infoObj())); } } // If there are no suitable indices for the distinct hack bail out now into regular planning // with no projection. if (plannerParams.indices.empty()) { CanonicalQuery* cq; Status status = CanonicalQuery::canonicalize(collection->ns().ns(), query, BSONObj(), BSONObj(), &cq); if (!status.isOK()) { return status; } // Takes ownership of cq. return getRunner(collection, cq, out); } // // If we're here, we have an index prefixed by the field we're distinct-ing over. // // Applying a projection allows the planner to try to give us covered plans that we can turn // into the projection hack. getDistinctProjection deals with .find() projection semantics // (ie _id:1 being implied by default). BSONObj projection = getDistinctProjection(field); // Apply a projection of the key. Empty BSONObj() is for the sort. CanonicalQuery* cq; Status status = CanonicalQuery::canonicalize(collection->ns().ns(), query, BSONObj(), projection, &cq); if (!status.isOK()) { return status; } // If there's no query, we can just distinct-scan one of the indices. // Not every index in plannerParams.indices may be suitable. Refer to // getDistinctNodeIndex(). size_t distinctNodeIndex = 0; if (query.isEmpty() && getDistinctNodeIndex(plannerParams.indices, field, &distinctNodeIndex)) { DistinctNode* dn = new DistinctNode(); dn->indexKeyPattern = plannerParams.indices[distinctNodeIndex].keyPattern; dn->direction = 1; IndexBoundsBuilder::allValuesBounds(dn->indexKeyPattern, &dn->bounds); dn->fieldNo = 0; QueryPlannerParams params; // Takes ownership of 'dn'. QuerySolution* soln = QueryPlannerAnalysis::analyzeDataAccess(*cq, params, dn); verify(soln); LOG(2) << "Using fast distinct: " << cq->toStringShort() << ", planSummary: " << getPlanSummary(*soln); WorkingSet* ws; PlanStage* root; verify(StageBuilder::build(collection, *soln, &root, &ws)); *out = new SingleSolutionRunner(collection, cq, soln, root, ws); return Status::OK(); } // See if we can answer the query in a fast-distinct compatible fashion. vector<QuerySolution*> solutions; status = QueryPlanner::plan(*cq, plannerParams, &solutions); if (!status.isOK()) { return getRunner(collection, cq, out); } // We look for a solution that has an ixscan we can turn into a distinctixscan for (size_t i = 0; i < solutions.size(); ++i) { if (turnIxscanIntoDistinctIxscan(solutions[i], field)) { // Great, we can use solutions[i]. Clean up the other QuerySolution(s). for (size_t j = 0; j < solutions.size(); ++j) { if (j != i) { delete solutions[j]; } } LOG(2) << "Using fast distinct: " << cq->toStringShort() << ", planSummary: " << getPlanSummary(*solutions[i]); // Build and return the SSR over solutions[i]. WorkingSet* ws; PlanStage* root; verify(StageBuilder::build(collection, *solutions[i], &root, &ws)); *out = new SingleSolutionRunner(collection, cq, solutions[i], root, ws); return Status::OK(); } } // If we're here, the planner made a soln with the restricted index set but we couldn't // translate any of them into a distinct-compatible soln. So, delete the solutions and just // go through normal planning. for (size_t i = 0; i < solutions.size(); ++i) { delete solutions[i]; } // We drop the projection from the 'cq'. Unfortunately this is not trivial. delete cq; status = CanonicalQuery::canonicalize(collection->ns().ns(), query, BSONObj(), BSONObj(), &cq); if (!status.isOK()) { return status; } // Takes ownership of cq. return getRunner(collection, cq, out); }
Status MMAPV1Engine::repairDatabase( OperationContext* txn, const std::string& dbName, bool preserveClonedFilesOnFailure, bool backupOriginalFiles ) { // We must hold some form of lock here invariant(txn->lockState()->threadState()); invariant( dbName.find( '.' ) == string::npos ); scoped_ptr<RepairFileDeleter> repairFileDeleter; log() << "repairDatabase " << dbName << endl; BackgroundOperation::assertNoBgOpInProgForDb(dbName); txn->recoveryUnit()->syncDataAndTruncateJournal(); // Must be done before and after repair intmax_t totalSize = dbSize( dbName ); intmax_t freeSize = File::freeSpace(storageGlobalParams.repairpath); if ( freeSize > -1 && freeSize < totalSize ) { return Status( ErrorCodes::OutOfDiskSpace, str::stream() << "Cannot repair database " << dbName << " having size: " << totalSize << " (bytes) because free disk space is: " << freeSize << " (bytes)" ); } txn->checkForInterrupt(); Path reservedPath = uniqueReservedPath( ( preserveClonedFilesOnFailure || backupOriginalFiles ) ? "backup" : "_tmp" ); MONGO_ASSERT_ON_EXCEPTION( boost::filesystem::create_directory( reservedPath ) ); string reservedPathString = reservedPath.string(); if ( !preserveClonedFilesOnFailure ) repairFileDeleter.reset( new RepairFileDeleter( txn, dbName, reservedPathString, reservedPath ) ); { Database* originalDatabase = dbHolder().get(txn, dbName); if (originalDatabase == NULL) { return Status(ErrorCodes::NamespaceNotFound, "database does not exist to repair"); } scoped_ptr<MMAPV1DatabaseCatalogEntry> dbEntry; scoped_ptr<Database> tempDatabase; { dbEntry.reset( new MMAPV1DatabaseCatalogEntry( txn, dbName, reservedPathString, storageGlobalParams.directoryperdb, true ) ); invariant( !dbEntry->exists() ); tempDatabase.reset( new Database( txn, dbName, dbEntry.get() ) ); } map<string,CollectionOptions> namespacesToCopy; { string ns = dbName + ".system.namespaces"; Client::Context ctx(txn, ns ); Collection* coll = originalDatabase->getCollection( txn, ns ); if ( coll ) { scoped_ptr<RecordIterator> it( coll->getIterator( txn, DiskLoc(), false, CollectionScanParams::FORWARD ) ); while ( !it->isEOF() ) { DiskLoc loc = it->getNext(); BSONObj obj = coll->docFor( loc ); string ns = obj["name"].String(); NamespaceString nss( ns ); if ( nss.isSystem() ) { if ( nss.isSystemDotIndexes() ) continue; if ( nss.coll() == "system.namespaces" ) continue; } if ( !nss.isNormal() ) continue; CollectionOptions options; if ( obj["options"].isABSONObj() ) { Status status = options.parse( obj["options"].Obj() ); if ( !status.isOK() ) return status; } namespacesToCopy[ns] = options; } } } for ( map<string,CollectionOptions>::const_iterator i = namespacesToCopy.begin(); i != namespacesToCopy.end(); ++i ) { string ns = i->first; CollectionOptions options = i->second; Collection* tempCollection = NULL; { Client::Context tempContext(txn, ns, tempDatabase ); WriteUnitOfWork wunit(txn); tempCollection = tempDatabase->createCollection(txn, ns, options, true, false); wunit.commit(); } Client::Context readContext(txn, ns, originalDatabase); Collection* originalCollection = originalDatabase->getCollection( txn, ns ); invariant( originalCollection ); // data // TODO SERVER-14812 add a mode that drops duplicates rather than failing MultiIndexBlock indexer(txn, tempCollection ); { vector<BSONObj> indexes; IndexCatalog::IndexIterator ii = originalCollection->getIndexCatalog()->getIndexIterator( false ); while ( ii.more() ) { IndexDescriptor* desc = ii.next(); indexes.push_back( desc->infoObj() ); } Client::Context tempContext(txn, ns, tempDatabase); Status status = indexer.init( indexes ); if ( !status.isOK() ) return status; } scoped_ptr<RecordIterator> iterator( originalCollection->getIterator( txn, DiskLoc(), false, CollectionScanParams::FORWARD )); while ( !iterator->isEOF() ) { DiskLoc loc = iterator->getNext(); invariant( !loc.isNull() ); BSONObj doc = originalCollection->docFor( loc ); Client::Context tempContext(txn, ns, tempDatabase); WriteUnitOfWork wunit(txn); StatusWith<DiskLoc> result = tempCollection->insertDocument(txn, doc, &indexer, false); if ( !result.isOK() ) return result.getStatus(); wunit.commit(); txn->checkForInterrupt(false); } Status status = indexer.doneInserting(); if (!status.isOK()) return status; { Client::Context tempContext(txn, ns, tempDatabase); WriteUnitOfWork wunit(txn); indexer.commit(); wunit.commit(); } } txn->recoveryUnit()->syncDataAndTruncateJournal(); globalStorageEngine->flushAllFiles(true); // need both in case journaling is disabled txn->checkForInterrupt(false); } // at this point if we abort, we don't want to delete new files // as they might be the only copies if ( repairFileDeleter.get() ) repairFileDeleter->success(); dbHolder().close( txn, dbName ); if ( backupOriginalFiles ) { _renameForBackup( dbName, reservedPath ); } else { // first make new directory before deleting data Path newDir = Path(storageGlobalParams.dbpath) / dbName; MONGO_ASSERT_ON_EXCEPTION(boost::filesystem::create_directory(newDir)); // this deletes old files _deleteDataFiles( dbName ); if ( !boost::filesystem::exists(newDir) ) { // we deleted because of directoryperdb // re-create MONGO_ASSERT_ON_EXCEPTION(boost::filesystem::create_directory(newDir)); } } _replaceWithRecovered( dbName, reservedPathString.c_str() ); if ( !backupOriginalFiles ) MONGO_ASSERT_ON_EXCEPTION( boost::filesystem::remove_all( reservedPath ) ); return Status::OK(); }
Status renameCollection(OperationContext* txn, const NamespaceString& source, const NamespaceString& target, bool dropTarget, bool stayTemp) { DisableDocumentValidation validationDisabler(txn); ScopedTransaction transaction(txn, MODE_X); Lock::GlobalWrite globalWriteLock(txn->lockState()); // We stay in source context the whole time. This is mostly to set the CurOp namespace. OldClientContext ctx(txn, source.ns()); bool userInitiatedWritesAndNotPrimary = txn->writesAreReplicated() && !repl::getGlobalReplicationCoordinator()->canAcceptWritesFor(source); if (userInitiatedWritesAndNotPrimary) { return Status(ErrorCodes::NotMaster, str::stream() << "Not primary while renaming collection " << source.ns() << " to " << target.ns()); } Database* const sourceDB = dbHolder().get(txn, source.db()); Collection* const sourceColl = sourceDB ? sourceDB->getCollection(source.ns()) : nullptr; if (!sourceColl) { return Status(ErrorCodes::NamespaceNotFound, "source namespace does not exist"); } { // Ensure that collection name does not exceed maximum length. // Ensure that index names do not push the length over the max. // Iterator includes unfinished indexes. IndexCatalog::IndexIterator sourceIndIt = sourceColl->getIndexCatalog()->getIndexIterator(txn, true); int longestIndexNameLength = 0; while (sourceIndIt.more()) { int thisLength = sourceIndIt.next()->indexName().length(); if (thisLength > longestIndexNameLength) longestIndexNameLength = thisLength; } unsigned int longestAllowed = std::min(int(NamespaceString::MaxNsCollectionLen), int(NamespaceString::MaxNsLen) - 2 /*strlen(".$")*/ - longestIndexNameLength); if (target.size() > longestAllowed) { StringBuilder sb; sb << "collection name length of " << target.size() << " exceeds maximum length of " << longestAllowed << ", allowing for index names"; return Status(ErrorCodes::InvalidLength, sb.str()); } } BackgroundOperation::assertNoBgOpInProgForNs(source.ns()); Database* const targetDB = dbHolder().openDb(txn, target.db()); { WriteUnitOfWork wunit(txn); // Check if the target namespace exists and if dropTarget is true. // If target exists and dropTarget is not true, return false. if (targetDB->getCollection(target)) { if (!dropTarget) { return Status(ErrorCodes::NamespaceExists, "target namespace exists"); } Status s = targetDB->dropCollection(txn, target.ns()); if (!s.isOK()) { return s; } } // If we are renaming in the same database, just // rename the namespace and we're done. if (sourceDB == targetDB) { Status s = targetDB->renameCollection(txn, source.ns(), target.ns(), stayTemp); if (!s.isOK()) { return s; } getGlobalServiceContext()->getOpObserver()->onRenameCollection( txn, NamespaceString(source), NamespaceString(target), dropTarget, stayTemp); wunit.commit(); return Status::OK(); } wunit.commit(); } // If we get here, we are renaming across databases, so we must copy all the data and // indexes, then remove the source collection. // Create the target collection. It will be removed if we fail to copy the collection. // TODO use a temp collection and unset the temp flag on success. Collection* targetColl = nullptr; { CollectionOptions options = sourceColl->getCatalogEntry()->getCollectionOptions(txn); WriteUnitOfWork wunit(txn); // No logOp necessary because the entire renameCollection command is one logOp. bool shouldReplicateWrites = txn->writesAreReplicated(); txn->setReplicatedWrites(false); targetColl = targetDB->createCollection(txn, target.ns(), options, false); // _id index build with others later. txn->setReplicatedWrites(shouldReplicateWrites); if (!targetColl) { return Status(ErrorCodes::OutOfDiskSpace, "Failed to create target collection."); } wunit.commit(); } // Dismissed on success ScopeGuard targetCollectionDropper = MakeGuard(dropCollection, txn, targetDB, target.ns()); MultiIndexBlock indexer(txn, targetColl); indexer.allowInterruption(); // Copy the index descriptions from the source collection, adjusting the ns field. { std::vector<BSONObj> indexesToCopy; IndexCatalog::IndexIterator sourceIndIt = sourceColl->getIndexCatalog()->getIndexIterator(txn, true); while (sourceIndIt.more()) { const BSONObj currIndex = sourceIndIt.next()->infoObj(); // Process the source index. BSONObjBuilder newIndex; newIndex.append("ns", target.ns()); newIndex.appendElementsUnique(currIndex); indexesToCopy.push_back(newIndex.obj()); } indexer.init(indexesToCopy); } { // Copy over all the data from source collection to target collection. auto cursor = sourceColl->getCursor(txn); while (auto record = cursor->next()) { txn->checkForInterrupt(); const auto obj = record->data.releaseToBson(); WriteUnitOfWork wunit(txn); // No logOp necessary because the entire renameCollection command is one logOp. bool shouldReplicateWrites = txn->writesAreReplicated(); txn->setReplicatedWrites(false); Status status = targetColl->insertDocument(txn, obj, &indexer, true); txn->setReplicatedWrites(shouldReplicateWrites); if (!status.isOK()) return status; wunit.commit(); } } Status status = indexer.doneInserting(); if (!status.isOK()) return status; { // Getting here means we successfully built the target copy. We now remove the // source collection and finalize the rename. WriteUnitOfWork wunit(txn); bool shouldReplicateWrites = txn->writesAreReplicated(); txn->setReplicatedWrites(false); Status status = sourceDB->dropCollection(txn, source.ns()); txn->setReplicatedWrites(shouldReplicateWrites); if (!status.isOK()) return status; indexer.commit(); getGlobalServiceContext()->getOpObserver()->onRenameCollection( txn, NamespaceString(source), NamespaceString(target), dropTarget, stayTemp); wunit.commit(); } targetCollectionDropper.Dismiss(); return Status::OK(); }
Status getRunnerDistinct(Collection* collection, const BSONObj& query, const string& field, Runner** out) { Database* db = cc().database(); verify(db); // This should'a been checked by the distinct command. verify(collection); // TODO: check for idhack here? // When can we do a fast distinct hack? // 1. There is a plan with just one leaf and that leaf is an ixscan. // 2. The ixscan indexes the field we're interested in. // 2a: We are correct if the index contains the field but for now we look for prefix. // 3. The query is covered/no fetch. // // We go through normal planning (with limited parameters) to see if we can produce // a soln with the above properties. QueryPlannerParams plannerParams; plannerParams.options = QueryPlannerParams::NO_TABLE_SCAN; IndexCatalog::IndexIterator ii = collection->getIndexCatalog()->getIndexIterator(false); while (ii.more()) { const IndexDescriptor* desc = ii.next(); // The distinct hack can work if any field is in the index but it's not always clear // if it's a win unless it's the first field. if (desc->keyPattern().firstElement().fieldName() == field) { plannerParams.indices.push_back(IndexEntry(desc->keyPattern(), desc->isMultikey(), desc->isSparse(), desc->indexName(), desc->infoObj())); } } // We only care about the field that we're projecting over. Have to drop the _id field // explicitly because those are .find() semantics. // // Applying a projection allows the planner to try to give us covered plans. BSONObj projection; if ("_id" == field) { projection = BSON("_id" << 1); } else { projection = BSON("_id" << 0 << field << 1); } // Apply a projection of the key. Empty BSONObj() is for the sort. CanonicalQuery* cq; Status status = CanonicalQuery::canonicalize(collection->ns().ns(), query, BSONObj(), projection, &cq); if (!status.isOK()) { return status; } // No index has the field we're looking for. Punt to normal planning. if (plannerParams.indices.empty()) { // Takes ownership of cq. return getRunner(cq, out); } // If we're here, we have an index prefixed by the field we're distinct-ing over. // If there's no query, we can just distinct-scan one of the indices. if (query.isEmpty()) { DistinctNode* dn = new DistinctNode(); dn->indexKeyPattern = plannerParams.indices[0].keyPattern; dn->direction = 1; IndexBoundsBuilder::allValuesBounds(dn->indexKeyPattern, &dn->bounds); dn->fieldNo = 0; QueryPlannerParams params; // Takes ownership of 'dn'. QuerySolution* soln = QueryPlannerAnalysis::analyzeDataAccess(*cq, params, dn); verify(soln); WorkingSet* ws; PlanStage* root; verify(StageBuilder::build(*soln, &root, &ws)); *out = new SingleSolutionRunner(cq, soln, root, ws); return Status::OK(); } // See if we can answer the query in a fast-distinct compatible fashion. vector<QuerySolution*> solutions; status = QueryPlanner::plan(*cq, plannerParams, &solutions); if (!status.isOK()) { return getRunner(cq, out); } // XXX: why do we need to do this? planner should prob do this internally cq->root()->resetTag(); // We look for a solution that has an ixscan we can turn into a distinctixscan for (size_t i = 0; i < solutions.size(); ++i) { if (turnIxscanIntoDistinctIxscan(solutions[i], field)) { // Great, we can use solutions[i]. Clean up the other QuerySolution(s). for (size_t j = 0; j < solutions.size(); ++j) { if (j != i) { delete solutions[j]; } } // Build and return the SSR over solutions[i]. WorkingSet* ws; PlanStage* root; verify(StageBuilder::build(*solutions[i], &root, &ws)); *out = new SingleSolutionRunner(cq, solutions[i], root, ws); return Status::OK(); } } // If we're here, the planner made a soln with the restricted index set but we couldn't // translate any of them into a distinct-compatible soln. So, delete the solutions and just // go through normal planning. for (size_t i = 0; i < solutions.size(); ++i) { delete solutions[i]; } return getRunner(cq, out); }
Status repairDatabase( string dbName, bool preserveClonedFilesOnFailure, bool backupOriginalFiles ) { scoped_ptr<RepairFileDeleter> repairFileDeleter; doingRepair dr; dbName = nsToDatabase( dbName ); log() << "repairDatabase " << dbName << endl; invariant( cc().database()->name() == dbName ); invariant( cc().database()->path() == storageGlobalParams.dbpath ); BackgroundOperation::assertNoBgOpInProgForDb(dbName); getDur().syncDataAndTruncateJournal(); // Must be done before and after repair intmax_t totalSize = dbSize( dbName ); intmax_t freeSize = File::freeSpace(storageGlobalParams.repairpath); if ( freeSize > -1 && freeSize < totalSize ) { return Status( ErrorCodes::OutOfDiskSpace, str::stream() << "Cannot repair database " << dbName << " having size: " << totalSize << " (bytes) because free disk space is: " << freeSize << " (bytes)" ); } killCurrentOp.checkForInterrupt(); Path reservedPath = uniqueReservedPath( ( preserveClonedFilesOnFailure || backupOriginalFiles ) ? "backup" : "_tmp" ); MONGO_ASSERT_ON_EXCEPTION( boost::filesystem::create_directory( reservedPath ) ); string reservedPathString = reservedPath.string(); if ( !preserveClonedFilesOnFailure ) repairFileDeleter.reset( new RepairFileDeleter( dbName, reservedPathString, reservedPath ) ); { Database* originalDatabase = dbHolder().get( dbName, storageGlobalParams.dbpath ); if ( originalDatabase == NULL ) return Status( ErrorCodes::NamespaceNotFound, "database does not exist to repair" ); Database* tempDatabase = NULL; { bool justCreated = false; tempDatabase = dbHolderW().getOrCreate( dbName, reservedPathString, justCreated ); invariant( justCreated ); } map<string,CollectionOptions> namespacesToCopy; { string ns = dbName + ".system.namespaces"; Client::Context ctx( ns ); Collection* coll = originalDatabase->getCollection( ns ); if ( coll ) { scoped_ptr<CollectionIterator> it( coll->getIterator( DiskLoc(), false, CollectionScanParams::FORWARD ) ); while ( !it->isEOF() ) { DiskLoc loc = it->getNext(); BSONObj obj = coll->docFor( loc ); string ns = obj["name"].String(); NamespaceString nss( ns ); if ( nss.isSystem() ) { if ( nss.isSystemDotIndexes() ) continue; if ( nss.coll() == "system.namespaces" ) continue; } if ( !nss.isNormal() ) continue; CollectionOptions options; if ( obj["options"].isABSONObj() ) { Status status = options.parse( obj["options"].Obj() ); if ( !status.isOK() ) return status; } namespacesToCopy[ns] = options; } } } for ( map<string,CollectionOptions>::const_iterator i = namespacesToCopy.begin(); i != namespacesToCopy.end(); ++i ) { string ns = i->first; CollectionOptions options = i->second; Collection* tempCollection = NULL; { Client::Context tempContext( ns, tempDatabase ); tempCollection = tempDatabase->createCollection( ns, options, true, false ); } Client::Context readContext( ns, originalDatabase ); Collection* originalCollection = originalDatabase->getCollection( ns ); invariant( originalCollection ); // data MultiIndexBlock indexBlock( tempCollection ); { vector<BSONObj> indexes; IndexCatalog::IndexIterator ii = originalCollection->getIndexCatalog()->getIndexIterator( false ); while ( ii.more() ) { IndexDescriptor* desc = ii.next(); indexes.push_back( desc->infoObj() ); } Client::Context tempContext( ns, tempDatabase ); Status status = indexBlock.init( indexes ); if ( !status.isOK() ) return status; } scoped_ptr<CollectionIterator> iterator( originalCollection->getIterator( DiskLoc(), false, CollectionScanParams::FORWARD ) ); while ( !iterator->isEOF() ) { DiskLoc loc = iterator->getNext(); invariant( !loc.isNull() ); BSONObj doc = originalCollection->docFor( loc ); Client::Context tempContext( ns, tempDatabase ); StatusWith<DiskLoc> result = tempCollection->insertDocument( doc, indexBlock ); if ( !result.isOK() ) return result.getStatus(); getDur().commitIfNeeded(); killCurrentOp.checkForInterrupt(false); } { Client::Context tempContext( ns, tempDatabase ); Status status = indexBlock.commit(); if ( !status.isOK() ) return status; } } getDur().syncDataAndTruncateJournal(); MongoFile::flushAll(true); // need both in case journaling is disabled killCurrentOp.checkForInterrupt(false); Client::Context tempContext( dbName, reservedPathString ); Database::closeDatabase( dbName, reservedPathString ); } // at this point if we abort, we don't want to delete new files // as they might be the only copies if ( repairFileDeleter.get() ) repairFileDeleter->success(); Client::Context ctx( dbName ); Database::closeDatabase(dbName, storageGlobalParams.dbpath); if ( backupOriginalFiles ) { _renameForBackup( dbName, reservedPath ); } else { // first make new directory before deleting data Path newDir = Path(storageGlobalParams.dbpath) / dbName; MONGO_ASSERT_ON_EXCEPTION(boost::filesystem::create_directory(newDir)); // this deletes old files _deleteDataFiles( dbName ); if ( !boost::filesystem::exists(newDir) ) { // we deleted because of directoryperdb // re-create MONGO_ASSERT_ON_EXCEPTION(boost::filesystem::create_directory(newDir)); } } _replaceWithRecovered( dbName, reservedPathString.c_str() ); if ( !backupOriginalFiles ) MONGO_ASSERT_ON_EXCEPTION( boost::filesystem::remove_all( reservedPath ) ); return Status::OK(); }
/** * For a given query, get a runner. The runner could be a SingleSolutionRunner, a * CachedQueryRunner, or a MultiPlanRunner, depending on the cache/query solver/etc. */ Status getRunner(Collection* collection, CanonicalQuery* rawCanonicalQuery, Runner** out, size_t plannerOptions) { verify(rawCanonicalQuery); auto_ptr<CanonicalQuery> canonicalQuery(rawCanonicalQuery); // This can happen as we're called by internal clients as well. if (NULL == collection) { const string& ns = canonicalQuery->ns(); *out = new EOFRunner(canonicalQuery.release(), ns); return Status::OK(); } // If we have an _id index we can use the idhack runner. if (canUseIDHack(*canonicalQuery) && collection->getIndexCatalog()->findIdIndex()) { *out = new IDHackRunner(collection, canonicalQuery.release()); return Status::OK(); } // If it's not NULL, we may have indices. Access the catalog and fill out IndexEntry(s) QueryPlannerParams plannerParams; IndexCatalog::IndexIterator ii = collection->getIndexCatalog()->getIndexIterator(false); while (ii.more()) { const IndexDescriptor* desc = ii.next(); plannerParams.indices.push_back(IndexEntry(desc->keyPattern(), desc->isMultikey(), desc->isSparse(), desc->indexName(), desc->infoObj())); } // If query supports admin hint, filter params.indices by indexes in query settings. QuerySettings* querySettings = collection->infoCache()->getQuerySettings(); AllowedIndices* allowedIndicesRaw; // Filter index catalog if admin hint is specified for query. // Also, signal to planner that application hint should be ignored. if (querySettings->getAllowedIndices(*canonicalQuery, &allowedIndicesRaw)) { boost::scoped_ptr<AllowedIndices> allowedIndices(allowedIndicesRaw); filterAllowedIndexEntries(*allowedIndices, &plannerParams.indices); plannerParams.adminHintApplied = true; } // Tailable: If the query requests tailable the collection must be capped. if (canonicalQuery->getParsed().hasOption(QueryOption_CursorTailable)) { if (!collection->isCapped()) { return Status(ErrorCodes::BadValue, "error processing query: " + canonicalQuery->toString() + " tailable cursor requested on non capped collection"); } // If a sort is specified it must be equal to expectedSort. const BSONObj expectedSort = BSON("$natural" << 1); const BSONObj& actualSort = canonicalQuery->getParsed().getSort(); if (!actualSort.isEmpty() && !(actualSort == expectedSort)) { return Status(ErrorCodes::BadValue, "error processing query: " + canonicalQuery->toString() + " invalid sort specified for tailable cursor: " + actualSort.toString()); } } // Process the planning options. plannerParams.options = plannerOptions; if (storageGlobalParams.noTableScan) { const string& ns = canonicalQuery->ns(); // There are certain cases where we ignore this restriction: bool ignore = canonicalQuery->getQueryObj().isEmpty() || (string::npos != ns.find(".system.")) || (0 == ns.find("local.")); if (!ignore) { plannerParams.options |= QueryPlannerParams::NO_TABLE_SCAN; } } if (!(plannerParams.options & QueryPlannerParams::NO_TABLE_SCAN)) { plannerParams.options |= QueryPlannerParams::INCLUDE_COLLSCAN; } // If the caller wants a shard filter, make sure we're actually sharded. if (plannerParams.options & QueryPlannerParams::INCLUDE_SHARD_FILTER) { CollectionMetadataPtr collMetadata = shardingState.getCollectionMetadata(canonicalQuery->ns()); if (collMetadata) { plannerParams.shardKey = collMetadata->getKeyPattern(); } else { // If there's no metadata don't bother w/the shard filter since we won't know what // the key pattern is anyway... plannerParams.options &= ~QueryPlannerParams::INCLUDE_SHARD_FILTER; } } // Try to look up a cached solution for the query. // // Skip cache look up for non-cacheable queries. // See PlanCache::shouldCacheQuery() // // TODO: Can the cache have negative data about a solution? CachedSolution* rawCS; if (PlanCache::shouldCacheQuery(*canonicalQuery) && collection->infoCache()->getPlanCache()->get(*canonicalQuery, &rawCS).isOK()) { // We have a CachedSolution. Have the planner turn it into a QuerySolution. boost::scoped_ptr<CachedSolution> cs(rawCS); QuerySolution *qs, *backupQs; Status status = QueryPlanner::planFromCache(*canonicalQuery, plannerParams, *cs, &qs, &backupQs); if (status.isOK()) { WorkingSet* ws; PlanStage* root; verify(StageBuilder::build(*qs, &root, &ws)); CachedPlanRunner* cpr = new CachedPlanRunner(canonicalQuery.release(), qs, root, ws); if (NULL != backupQs) { WorkingSet* backupWs; PlanStage* backupRoot; verify(StageBuilder::build(*backupQs, &backupRoot, &backupWs)); cpr->setBackupPlan(backupQs, backupRoot, backupWs); } *out = cpr; return Status::OK(); } } plannerParams.options |= QueryPlannerParams::INDEX_INTERSECTION; plannerParams.options |= QueryPlannerParams::KEEP_MUTATIONS; vector<QuerySolution*> solutions; Status status = QueryPlanner::plan(*canonicalQuery, plannerParams, &solutions); if (!status.isOK()) { return Status(ErrorCodes::BadValue, "error processing query: " + canonicalQuery->toString() + " planner returned error: " + status.reason()); } /* for (size_t i = 0; i < solutions.size(); ++i) { QLOG() << "solution " << i << " is " << solutions[i]->toString() << endl; } */ // We cannot figure out how to answer the query. Should this ever happen? if (0 == solutions.size()) { return Status(ErrorCodes::BadValue, "error processing query: " + canonicalQuery->toString() + " No query solutions"); } if (1 == solutions.size()) { // Only one possible plan. Run it. Build the stages from the solution. WorkingSet* ws; PlanStage* root; verify(StageBuilder::build(*solutions[0], &root, &ws)); // And, run the plan. *out = new SingleSolutionRunner(canonicalQuery.release(), solutions[0], root, ws); return Status::OK(); } else { // Many solutions. Let the MultiPlanRunner pick the best, update the cache, and so on. auto_ptr<MultiPlanRunner> mpr(new MultiPlanRunner(canonicalQuery.release())); for (size_t i = 0; i < solutions.size(); ++i) { WorkingSet* ws; PlanStage* root; if (solutions[i]->cacheData.get()) { solutions[i]->cacheData->adminHintApplied = plannerParams.adminHintApplied; } verify(StageBuilder::build(*solutions[i], &root, &ws)); // Takes ownership of all arguments. mpr->addPlan(solutions[i], root, ws); } *out = mpr.release(); return Status::OK(); } }
void CollectionInfoCache::computeIndexKeys(OperationContext* opCtx) { _indexedPaths.clear(); bool hadTTLIndex = _hasTTLIndex; _hasTTLIndex = false; IndexCatalog::IndexIterator i = _collection->getIndexCatalog()->getIndexIterator(opCtx, true); while (i.more()) { IndexDescriptor* descriptor = i.next(); if (descriptor->getAccessMethodName() != IndexNames::TEXT) { BSONObj key = descriptor->keyPattern(); const BSONObj& infoObj = descriptor->infoObj(); if (infoObj.hasField("expireAfterSeconds")) { _hasTTLIndex = true; } BSONObjIterator j(key); while (j.more()) { BSONElement e = j.next(); _indexedPaths.addPath(e.fieldName()); } } else { fts::FTSSpec ftsSpec(descriptor->infoObj()); if (ftsSpec.wildcard()) { _indexedPaths.allPathsIndexed(); } else { for (size_t i = 0; i < ftsSpec.numExtraBefore(); ++i) { _indexedPaths.addPath(ftsSpec.extraBefore(i)); } for (fts::Weights::const_iterator it = ftsSpec.weights().begin(); it != ftsSpec.weights().end(); ++it) { _indexedPaths.addPath(it->first); } for (size_t i = 0; i < ftsSpec.numExtraAfter(); ++i) { _indexedPaths.addPath(ftsSpec.extraAfter(i)); } // Any update to a path containing "language" as a component could change the // language of a subdocument. Add the override field as a path component. _indexedPaths.addPathComponent(ftsSpec.languageOverrideField()); } } // handle partial indexes const IndexCatalogEntry* entry = i.catalogEntry(descriptor); const MatchExpression* filter = entry->getFilterExpression(); if (filter) { unordered_set<std::string> paths; QueryPlannerIXSelect::getFields(filter, "", &paths); for (auto it = paths.begin(); it != paths.end(); ++it) { _indexedPaths.addPath(*it); } } } TTLCollectionCache& ttlCollectionCache = TTLCollectionCache::get(getGlobalServiceContext()); if (_hasTTLIndex != hadTTLIndex) { if (_hasTTLIndex) { ttlCollectionCache.registerCollection(_collection->ns()); } else { ttlCollectionCache.unregisterCollection(_collection->ns()); } } _keysComputed = true; }
StatusWith<DiskLoc> Collection::updateDocument( const DiskLoc& oldLocation, const BSONObj& objNew, bool enforceQuota, OpDebug* debug ) { Record* oldRecord = _recordStore->recordFor( oldLocation ); BSONObj objOld( oldRecord->accessed()->data() ); if ( objOld.hasElement( "_id" ) ) { BSONElement oldId = objOld["_id"]; BSONElement newId = objNew["_id"]; if ( oldId != newId ) return StatusWith<DiskLoc>( ErrorCodes::InternalError, "in Collection::updateDocument _id mismatch", 13596 ); } if ( ns().coll() == "system.users" ) { // XXX - andy and spencer think this should go away now V2UserDocumentParser parser; Status s = parser.checkValidUserDocument(objNew); if ( !s.isOK() ) return StatusWith<DiskLoc>( s ); } /* duplicate key check. we descend the btree twice - once for this check, and once for the actual inserts, further below. that is suboptimal, but it's pretty complicated to do it the other way without rollbacks... */ OwnedPointerMap<IndexDescriptor*,UpdateTicket> updateTickets; IndexCatalog::IndexIterator ii = _indexCatalog.getIndexIterator( true ); while ( ii.more() ) { IndexDescriptor* descriptor = ii.next(); IndexAccessMethod* iam = _indexCatalog.getIndex( descriptor ); InsertDeleteOptions options; options.logIfError = false; options.dupsAllowed = !(KeyPattern::isIdKeyPattern(descriptor->keyPattern()) || descriptor->unique()) || ignoreUniqueIndex(descriptor); UpdateTicket* updateTicket = new UpdateTicket(); updateTickets.mutableMap()[descriptor] = updateTicket; Status ret = iam->validateUpdate(objOld, objNew, oldLocation, options, updateTicket ); if ( !ret.isOK() ) { return StatusWith<DiskLoc>( ret ); } } if ( oldRecord->netLength() < objNew.objsize() ) { // doesn't fit, have to move to new location if ( _details->isCapped() ) return StatusWith<DiskLoc>( ErrorCodes::InternalError, "failing update: objects in a capped ns cannot grow", 10003 ); moveCounter.increment(); _details->paddingTooSmall(); // unindex old record, don't delete // this way, if inserting new doc fails, we can re-index this one _cursorCache.invalidateDocument(oldLocation, INVALIDATION_DELETION); _indexCatalog.unindexRecord( objOld, oldLocation, true ); if ( debug ) { if (debug->nmoved == -1) // default of -1 rather than 0 debug->nmoved = 1; else debug->nmoved += 1; } StatusWith<DiskLoc> loc = _insertDocument( objNew, enforceQuota ); if ( loc.isOK() ) { // insert successful, now lets deallocate the old location // remember its already unindexed _recordStore->deleteRecord( oldLocation ); } else { // new doc insert failed, so lets re-index the old document and location _indexCatalog.indexRecord( objOld, oldLocation ); } return loc; } _infoCache.notifyOfWriteOp(); _details->paddingFits(); if ( debug ) debug->keyUpdates = 0; ii = _indexCatalog.getIndexIterator( true ); while ( ii.more() ) { IndexDescriptor* descriptor = ii.next(); IndexAccessMethod* iam = _indexCatalog.getIndex( descriptor ); int64_t updatedKeys; Status ret = iam->update(*updateTickets.mutableMap()[descriptor], &updatedKeys); if ( !ret.isOK() ) return StatusWith<DiskLoc>( ret ); if ( debug ) debug->keyUpdates += updatedKeys; } // Broadcast the mutation so that query results stay correct. _cursorCache.invalidateDocument(oldLocation, INVALIDATION_MUTATION); // update in place int sz = objNew.objsize(); memcpy(getDur().writingPtr(oldRecord->data(), sz), objNew.objdata(), sz); return StatusWith<DiskLoc>( oldLocation ); }
virtual bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { string source = cmdObj.getStringField( name.c_str() ); string target = cmdObj.getStringField( "to" ); if ( !NamespaceString::validCollectionComponent(target.c_str()) ) { errmsg = "invalid collection name: " + target; return false; } if ( source.empty() || target.empty() ) { errmsg = "invalid command syntax"; return false; } if (!fromRepl) { // If it got through on the master, need to allow it here too Status sourceStatus = userAllowedWriteNS(source); if (!sourceStatus.isOK()) { errmsg = "error with source namespace: " + sourceStatus.reason(); return false; } Status targetStatus = userAllowedWriteNS(target); if (!targetStatus.isOK()) { errmsg = "error with target namespace: " + targetStatus.reason(); return false; } } string sourceDB = nsToDatabase(source); string targetDB = nsToDatabase(target); bool capped = false; long long size = 0; std::vector<BSONObj> indexesInProg; Lock::GlobalWrite globalWriteLock; DurTransaction txn; { Client::Context srcCtx( source ); Collection* sourceColl = srcCtx.db()->getCollection( source ); if ( !sourceColl ) { errmsg = "source namespace does not exist"; return false; } // Ensure that collection name does not exceed maximum length. // Ensure that index names do not push the length over the max. // Iterator includes unfinished indexes. IndexCatalog::IndexIterator sourceIndIt = sourceColl->getIndexCatalog()->getIndexIterator( true ); int longestIndexNameLength = 0; while ( sourceIndIt.more() ) { int thisLength = sourceIndIt.next()->indexName().length(); if ( thisLength > longestIndexNameLength ) longestIndexNameLength = thisLength; } unsigned int longestAllowed = min(int(Namespace::MaxNsColletionLen), int(Namespace::MaxNsLen) - 2/*strlen(".$")*/ - longestIndexNameLength); if (target.size() > longestAllowed) { StringBuilder sb; sb << "collection name length of " << target.size() << " exceeds maximum length of " << longestAllowed << ", allowing for index names"; errmsg = sb.str(); return false; } { indexesInProg = stopIndexBuilds( srcCtx.db(), cmdObj ); capped = sourceColl->isCapped(); if ( capped ) { size = sourceColl->storageSize(); } } } { Client::Context ctx( target ); // Check if the target namespace exists and if dropTarget is true. // If target exists and dropTarget is not true, return false. if ( ctx.db()->getCollection( target ) ) { if ( !cmdObj["dropTarget"].trueValue() ) { errmsg = "target namespace exists"; return false; } Status s = ctx.db()->dropCollection( &txn, target ); if ( !s.isOK() ) { errmsg = s.toString(); restoreIndexBuildsOnSource( indexesInProg, source ); return false; } } // If we are renaming in the same database, just // rename the namespace and we're done. if ( sourceDB == targetDB ) { Status s = ctx.db()->renameCollection( &txn, source, target, cmdObj["stayTemp"].trueValue() ); if ( !s.isOK() ) { errmsg = s.toString(); restoreIndexBuildsOnSource( indexesInProg, source ); return false; } return true; } // Otherwise, we are enaming across databases, so we must copy all // the data and then remove the source collection. // Create the target collection. Collection* targetColl = NULL; if ( capped ) { CollectionOptions options; options.capped = true; options.cappedSize = size; options.setNoIdIndex(); targetColl = ctx.db()->createCollection( &txn, target, options ); } else { CollectionOptions options; options.setNoIdIndex(); // No logOp necessary because the entire renameCollection command is one logOp. targetColl = ctx.db()->createCollection( &txn, target, options ); } if ( !targetColl ) { errmsg = "Failed to create target collection."; restoreIndexBuildsOnSource( indexesInProg, source ); return false; } } // Copy over all the data from source collection to target collection. bool insertSuccessful = true; boost::scoped_ptr<RecordIterator> sourceIt; Collection* sourceColl = NULL; { Client::Context srcCtx( source ); sourceColl = srcCtx.db()->getCollection( source ); sourceIt.reset( sourceColl->getIterator( DiskLoc(), false, CollectionScanParams::FORWARD ) ); } Collection* targetColl = NULL; while ( !sourceIt->isEOF() ) { BSONObj o; { Client::Context srcCtx( source ); o = sourceColl->docFor(sourceIt->getNext()); } // Insert and check return status of insert. { Client::Context ctx( target ); if ( !targetColl ) targetColl = ctx.db()->getCollection( target ); // No logOp necessary because the entire renameCollection command is one logOp. Status s = targetColl->insertDocument( &txn, o, true ).getStatus(); if ( !s.isOK() ) { insertSuccessful = false; errmsg = s.toString(); break; } } } // If inserts were unsuccessful, drop the target collection and return false. if ( !insertSuccessful ) { Client::Context ctx( target ); Status s = ctx.db()->dropCollection( &txn, target ); if ( !s.isOK() ) errmsg = s.toString(); restoreIndexBuildsOnSource( indexesInProg, source ); return false; } // Copy over the indexes to temp storage and then to the target.. vector<BSONObj> copiedIndexes; bool indexSuccessful = true; { Client::Context srcCtx( source ); IndexCatalog::IndexIterator sourceIndIt = sourceColl->getIndexCatalog()->getIndexIterator( true ); while ( sourceIndIt.more() ) { BSONObj currIndex = sourceIndIt.next()->infoObj(); // Process the source index. BSONObjBuilder b; BSONObjIterator i( currIndex ); while( i.moreWithEOO() ) { BSONElement e = i.next(); if ( e.eoo() ) break; else if ( strcmp( e.fieldName(), "ns" ) == 0 ) b.append( "ns", target ); else b.append( e ); } BSONObj newIndex = b.obj(); copiedIndexes.push_back( newIndex ); } } { Client::Context ctx( target ); if ( !targetColl ) targetColl = ctx.db()->getCollection( target ); for ( vector<BSONObj>::iterator it = copiedIndexes.begin(); it != copiedIndexes.end(); ++it ) { Status s = targetColl->getIndexCatalog()->createIndex( *it, true ); if ( !s.isOK() ) { indexSuccessful = false; errmsg = s.toString(); break; } } // If indexes were unsuccessful, drop the target collection and return false. if ( !indexSuccessful ) { Status s = ctx.db()->dropCollection( &txn, target ); if ( !s.isOK() ) errmsg = s.toString(); restoreIndexBuildsOnSource( indexesInProg, source ); return false; } } // Drop the source collection. { Client::Context srcCtx( source ); Status s = srcCtx.db()->dropCollection( &txn, source ); if ( !s.isOK() ) { errmsg = s.toString(); restoreIndexBuildsOnSource( indexesInProg, source ); return false; } } return true; }
Status MMAPV1Engine::repairDatabase( OperationContext* txn, const std::string& dbName, bool preserveClonedFilesOnFailure, bool backupOriginalFiles ) { unique_ptr<RepairFileDeleter> repairFileDeleter; // Must be done before and after repair getDur().syncDataAndTruncateJournal(txn); intmax_t totalSize = dbSize( dbName ); intmax_t freeSize = File::freeSpace(storageGlobalParams.repairpath); if ( freeSize > -1 && freeSize < totalSize ) { return Status( ErrorCodes::OutOfDiskSpace, str::stream() << "Cannot repair database " << dbName << " having size: " << totalSize << " (bytes) because free disk space is: " << freeSize << " (bytes)" ); } txn->checkForInterrupt(); Path reservedPath = uniqueReservedPath( ( preserveClonedFilesOnFailure || backupOriginalFiles ) ? "backup" : "_tmp" ); bool created = false; MONGO_ASSERT_ON_EXCEPTION( created = boost::filesystem::create_directory( reservedPath ) ); invariant( created ); string reservedPathString = reservedPath.string(); if ( !preserveClonedFilesOnFailure ) repairFileDeleter.reset( new RepairFileDeleter( txn, dbName, reservedPathString, reservedPath ) ); { Database* originalDatabase = dbHolder().openDb(txn, dbName); if (originalDatabase == NULL) { return Status(ErrorCodes::NamespaceNotFound, "database does not exist to repair"); } unique_ptr<MMAPV1DatabaseCatalogEntry> dbEntry; unique_ptr<Database> tempDatabase; // Must call this before MMAPV1DatabaseCatalogEntry's destructor closes the DB files ON_BLOCK_EXIT(&dur::DurableInterface::syncDataAndTruncateJournal, &getDur(), txn); { dbEntry.reset(new MMAPV1DatabaseCatalogEntry(txn, dbName, reservedPathString, storageGlobalParams.directoryperdb, true)); tempDatabase.reset( new Database(txn, dbName, dbEntry.get())); } map<string,CollectionOptions> namespacesToCopy; { string ns = dbName + ".system.namespaces"; OldClientContext ctx(txn, ns ); Collection* coll = originalDatabase->getCollection( ns ); if ( coll ) { auto cursor = coll->getCursor(txn); while (auto record = cursor->next()) { BSONObj obj = record->data.releaseToBson(); string ns = obj["name"].String(); NamespaceString nss( ns ); if ( nss.isSystem() ) { if ( nss.isSystemDotIndexes() ) continue; if ( nss.coll() == "system.namespaces" ) continue; } if ( !nss.isNormal() ) continue; CollectionOptions options; if ( obj["options"].isABSONObj() ) { Status status = options.parse( obj["options"].Obj() ); if ( !status.isOK() ) return status; } namespacesToCopy[ns] = options; } } } for ( map<string,CollectionOptions>::const_iterator i = namespacesToCopy.begin(); i != namespacesToCopy.end(); ++i ) { string ns = i->first; CollectionOptions options = i->second; Collection* tempCollection = NULL; { WriteUnitOfWork wunit(txn); tempCollection = tempDatabase->createCollection(txn, ns, options, false); wunit.commit(); } OldClientContext readContext(txn, ns, originalDatabase); Collection* originalCollection = originalDatabase->getCollection( ns ); invariant( originalCollection ); // data // TODO SERVER-14812 add a mode that drops duplicates rather than failing MultiIndexBlock indexer(txn, tempCollection ); { vector<BSONObj> indexes; IndexCatalog::IndexIterator ii = originalCollection->getIndexCatalog()->getIndexIterator( txn, false ); while ( ii.more() ) { IndexDescriptor* desc = ii.next(); indexes.push_back( desc->infoObj() ); } Status status = indexer.init( indexes ); if (!status.isOK()) { return status; } } auto cursor = originalCollection->getCursor(txn); while (auto record = cursor->next()) { BSONObj doc = record->data.releaseToBson(); WriteUnitOfWork wunit(txn); StatusWith<RecordId> result = tempCollection->insertDocument(txn, doc, &indexer, false); if ( !result.isOK() ) return result.getStatus(); wunit.commit(); txn->checkForInterrupt(); } Status status = indexer.doneInserting(); if (!status.isOK()) return status; { WriteUnitOfWork wunit(txn); indexer.commit(); wunit.commit(); } } getDur().syncDataAndTruncateJournal(txn); // need both in case journaling is disabled MongoFile::flushAll(true); txn->checkForInterrupt(); } // at this point if we abort, we don't want to delete new files // as they might be the only copies if ( repairFileDeleter.get() ) repairFileDeleter->success(); // Close the database so we can rename/delete the original data files dbHolder().close(txn, dbName); if ( backupOriginalFiles ) { _renameForBackup( dbName, reservedPath ); } else { // first make new directory before deleting data Path newDir = Path(storageGlobalParams.dbpath) / dbName; MONGO_ASSERT_ON_EXCEPTION(boost::filesystem::create_directory(newDir)); // this deletes old files _deleteDataFiles( dbName ); if ( !boost::filesystem::exists(newDir) ) { // we deleted because of directoryperdb // re-create MONGO_ASSERT_ON_EXCEPTION(boost::filesystem::create_directory(newDir)); } } _replaceWithRecovered( dbName, reservedPathString.c_str() ); if (!backupOriginalFiles) { MONGO_ASSERT_ON_EXCEPTION(boost::filesystem::remove_all(reservedPath)); } // Reopen the database so it's discoverable dbHolder().openDb(txn, dbName); return Status::OK(); }
Status appendCollectionStorageStats(OperationContext* opCtx, const NamespaceString& nss, const BSONObj& param, BSONObjBuilder* result) { int scale = 1; if (param["scale"].isNumber()) { scale = param["scale"].numberInt(); if (scale < 1) { return {ErrorCodes::BadValue, "scale has to be >= 1"}; } } else if (param["scale"].trueValue()) { return {ErrorCodes::BadValue, "scale has to be a number >= 1"}; } bool verbose = param["verbose"].trueValue(); AutoGetCollectionForReadCommand ctx(opCtx, nss); Collection* collection = ctx.getCollection(); // Will be set if present if (!ctx.getDb() || !collection) { result->appendNumber("size", 0); result->appendNumber("count", 0); result->appendNumber("storageSize", 0); result->append("nindexes", 0); result->appendNumber("totalIndexSize", 0); result->append("indexDetails", BSONObj()); result->append("indexSizes", BSONObj()); std::string errmsg = !(ctx.getDb()) ? "Database [" + nss.db().toString() + "] not found." : "Collection [" + nss.toString() + "] not found."; return {ErrorCodes::NamespaceNotFound, errmsg}; } long long size = collection->dataSize(opCtx) / scale; result->appendNumber("size", size); long long numRecords = collection->numRecords(opCtx); result->appendNumber("count", numRecords); if (numRecords) result->append("avgObjSize", collection->averageObjectSize(opCtx)); RecordStore* recordStore = collection->getRecordStore(); result->appendNumber( "storageSize", static_cast<long long>(recordStore->storageSize(opCtx, result, verbose ? 1 : 0)) / scale); recordStore->appendCustomStats(opCtx, result, scale); IndexCatalog* indexCatalog = collection->getIndexCatalog(); result->append("nindexes", indexCatalog->numIndexesReady(opCtx)); BSONObjBuilder indexDetails; IndexCatalog::IndexIterator i = indexCatalog->getIndexIterator(opCtx, false); while (i.more()) { const IndexDescriptor* descriptor = i.next(); IndexAccessMethod* iam = indexCatalog->getIndex(descriptor); invariant(iam); BSONObjBuilder bob; if (iam->appendCustomStats(opCtx, &bob, scale)) { indexDetails.append(descriptor->indexName(), bob.obj()); } } result->append("indexDetails", indexDetails.obj()); BSONObjBuilder indexSizes; long long indexSize = collection->getIndexSize(opCtx, &indexSizes, scale); result->appendNumber("totalIndexSize", indexSize / scale); result->append("indexSizes", indexSizes.obj()); return Status::OK(); }
StatusWith<RecordId> Collection::updateDocument( OperationContext* txn, const RecordId& oldLocation, const BSONObj& objNew, bool enforceQuota, OpDebug* debug ) { BSONObj objOld = _recordStore->dataFor( txn, oldLocation ).releaseToBson(); if ( objOld.hasElement( "_id" ) ) { BSONElement oldId = objOld["_id"]; BSONElement newId = objNew["_id"]; if ( oldId != newId ) return StatusWith<RecordId>( ErrorCodes::InternalError, "in Collection::updateDocument _id mismatch", 13596 ); } /* duplicate key check. we descend the btree twice - once for this check, and once for the actual inserts, further below. that is suboptimal, but it's pretty complicated to do it the other way without rollbacks... */ // At the end of this step, we will have a map of UpdateTickets, one per index, which // represent the index updates needed to be done, based on the changes between objOld and // objNew. OwnedPointerMap<IndexDescriptor*,UpdateTicket> updateTickets; IndexCatalog::IndexIterator ii = _indexCatalog.getIndexIterator( txn, true ); while ( ii.more() ) { IndexDescriptor* descriptor = ii.next(); IndexAccessMethod* iam = _indexCatalog.getIndex( descriptor ); InsertDeleteOptions options; options.logIfError = false; options.dupsAllowed = !(KeyPattern::isIdKeyPattern(descriptor->keyPattern()) || descriptor->unique()) || repl::getGlobalReplicationCoordinator()->shouldIgnoreUniqueIndex(descriptor); UpdateTicket* updateTicket = new UpdateTicket(); updateTickets.mutableMap()[descriptor] = updateTicket; Status ret = iam->validateUpdate(txn, objOld, objNew, oldLocation, options, updateTicket ); if ( !ret.isOK() ) { return StatusWith<RecordId>( ret ); } } // This can call back into Collection::recordStoreGoingToMove. If that happens, the old // object is removed from all indexes. StatusWith<RecordId> newLocation = _recordStore->updateRecord( txn, oldLocation, objNew.objdata(), objNew.objsize(), _enforceQuota( enforceQuota ), this ); if ( !newLocation.isOK() ) { return newLocation; } // At this point, the old object may or may not still be indexed, depending on if it was // moved. _infoCache.notifyOfWriteOp(); // If the object did move, we need to add the new location to all indexes. if ( newLocation.getValue() != oldLocation ) { if ( debug ) { if (debug->nmoved == -1) // default of -1 rather than 0 debug->nmoved = 1; else debug->nmoved += 1; } Status s = _indexCatalog.indexRecord(txn, objNew, newLocation.getValue()); if (!s.isOK()) return StatusWith<RecordId>(s); return newLocation; } // Object did not move. We update each index with each respective UpdateTicket. if ( debug ) debug->keyUpdates = 0; ii = _indexCatalog.getIndexIterator( txn, true ); while ( ii.more() ) { IndexDescriptor* descriptor = ii.next(); IndexAccessMethod* iam = _indexCatalog.getIndex( descriptor ); int64_t updatedKeys; Status ret = iam->update(txn, *updateTickets.mutableMap()[descriptor], &updatedKeys); if ( !ret.isOK() ) return StatusWith<RecordId>( ret ); if ( debug ) debug->keyUpdates += updatedKeys; } // Broadcast the mutation so that query results stay correct. _cursorCache.invalidateDocument(txn, oldLocation, INVALIDATION_MUTATION); return newLocation; }