void IndexCatalogEntry::setHead( DiskLoc newHead ) { NamespaceDetails* nsd = _collection->detailsWritable(); int idxNo = _indexNo(); IndexDetails& id = nsd->idx( idxNo ); id.head.writing() = newHead; _head = newHead; }
bool Helpers::findById(Client& c, const char *ns, BSONObj query, BSONObj& result , bool * nsFound , bool * indexFound ) { Lock::assertAtLeastReadLocked(ns); Database *database = c.database(); verify( database ); NamespaceDetails *d = database->namespaceIndex.details(ns); if ( ! d ) return false; if ( nsFound ) *nsFound = 1; int idxNo = d->findIdIndex(); if ( idxNo < 0 ) return false; if ( indexFound ) *indexFound = 1; IndexDetails& i = d->idx( idxNo ); BSONObj key = i.getKeyFromQuery( query ); DiskLoc loc = QueryRunner::fastFindSingle(i, key); if ( loc.isNull() ) return false; result = loc.obj(); return true; }
shared_ptr<Cursor> NamespaceDetailsTransient::getCursor( const char *ns, const BSONObj &query, const BSONObj &order ) { if ( query.isEmpty() && order.isEmpty() ) { // TODO This will not use a covered index currently. return theDataFileMgr.findAll( ns ); } if ( isSimpleIdQuery( query ) ) { Database *database = cc().database(); assert( database ); NamespaceDetails *d = database->namespaceIndex.details(ns); if ( d ) { int idxNo = d->findIdIndex(); if ( idxNo >= 0 ) { IndexDetails& i = d->idx( idxNo ); BSONObj key = i.getKeyFromQuery( query ); return shared_ptr<Cursor>( BtreeCursor::make( d, idxNo, i, key, key, true, 1 ) ); } } } auto_ptr<MultiPlanScanner> mps( new MultiPlanScanner( ns, query, order ) ); // mayYield == false shared_ptr<Cursor> single = mps->singleCursor(); if ( single ) { if ( !query.isEmpty() && !single->matcher() ) { shared_ptr<CoveredIndexMatcher> matcher( new CoveredIndexMatcher( query, single->indexKeyPattern() ) ); single->setMatcher( matcher ); } return single; } return newQueryOptimizerCursor( mps ); }
void dupCheck(vector<IndexChanges>& v, NamespaceDetails& d, DiskLoc curObjLoc) { int z = d.nIndexesBeingBuilt(); for( int i = 0; i < z; i++ ) { IndexDetails& idx = d.idx(i); v[i].dupCheck(idx, curObjLoc); } }
long long Helpers::removeRange( const string& ns , const BSONObj& min , const BSONObj& max , bool yield , bool maxInclusive , RemoveCallback * callback, bool fromMigrate ) { BSONObj keya , keyb; BSONObj minClean = toKeyFormat( min , keya ); BSONObj maxClean = toKeyFormat( max , keyb ); verify( keya == keyb ); Client::Context ctx(ns); shared_ptr<Cursor> c; auto_ptr<ClientCursor> cc; { NamespaceDetails* nsd = nsdetails( ns.c_str() ); if ( ! nsd ) return 0; int ii = nsd->findIndexByKeyPattern( keya ); verify( ii >= 0 ); IndexDetails& i = nsd->idx( ii ); c.reset( BtreeCursor::make( nsd , ii , i , minClean , maxClean , maxInclusive, 1 ) ); cc.reset( new ClientCursor( QueryOption_NoCursorTimeout , c , ns ) ); cc->setDoingDeletes( true ); } long long num = 0; while ( cc->ok() ) { if ( yield && ! cc->yieldSometimes( ClientCursor::WillNeed) ) { // cursor got finished by someone else, so we're done cc.release(); // if the collection/db is dropped, cc may be deleted break; } if ( ! cc->ok() ) break; DiskLoc rloc = cc->currLoc(); if ( callback ) callback->goingToDelete( cc->current() ); cc->advance(); c->prepareToTouchEarlierIterate(); logOp( "d" , ns.c_str() , rloc.obj()["_id"].wrap() , 0 , 0 , fromMigrate ); theDataFileMgr.deleteRecord(ns.c_str() , rloc.rec(), rloc); num++; c->recoverFromTouchingEarlierIterate(); getDur().commitIfNeeded(); } return num; }
bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { string ns = dbname + "." + cmdObj.firstElement().valuestr(); NamespaceDetails *d = nsdetails(ns); if (NULL == d) { errmsg = "can't find ns"; return false; } GeoNearArguments commonArgs(cmdObj); if (commonArgs.numWanted < 0) { errmsg = "numWanted must be >= 0"; return false; } vector<int> idxs; d->findIndexByType("2d", idxs); if (idxs.size() > 1) { errmsg = "more than one 2d index, not sure which to run geoNear on"; return false; } if (1 == idxs.size()) { result.append("ns", ns); return run2DGeoNear(d->idx(idxs[0]), cmdObj, commonArgs, errmsg, result); } d->findIndexByType("2dsphere", idxs); if (idxs.size() > 1) { errmsg = "more than one 2dsphere index, not sure which to run geoNear on"; return false; } if (1 == idxs.size()) { result.append("ns", ns); return run2DSphereGeoNear(d->idx(idxs[0]), cmdObj, commonArgs, errmsg, result); } errmsg = "no geo indices for geoNear"; return false; }
Status Database::renameCollection( const StringData& fromNS, const StringData& toNS, bool stayTemp ) { // move data namespace Status s = _renameSingleNamespace( fromNS, toNS, stayTemp ); if ( !s.isOK() ) return s; NamespaceDetails* details = _namespaceIndex.details( toNS ); verify( details ); // move index namespaces string indexName = _name + ".system.indexes"; BSONObj oldIndexSpec; while( Helpers::findOne( indexName, BSON( "ns" << fromNS ), oldIndexSpec ) ) { oldIndexSpec = oldIndexSpec.getOwned(); BSONObj newIndexSpec; { BSONObjBuilder b; BSONObjIterator i( oldIndexSpec ); while( i.more() ) { BSONElement e = i.next(); if ( strcmp( e.fieldName(), "ns" ) != 0 ) b.append( e ); else b << "ns" << toNS; } newIndexSpec = b.obj(); } DiskLoc newIndexSpecLoc = theDataFileMgr.insert( indexName.c_str(), newIndexSpec.objdata(), newIndexSpec.objsize(), false, true, false ); int indexI = details->findIndexByName( oldIndexSpec.getStringField( "name" ) ); IndexDetails &indexDetails = details->idx(indexI); string oldIndexNs = indexDetails.indexNamespace(); indexDetails.info = newIndexSpecLoc; string newIndexNs = indexDetails.indexNamespace(); Status s = _renameSingleNamespace( oldIndexNs, newIndexNs, false ); if ( !s.isOK() ) return s; deleteObjects( indexName.c_str(), oldIndexSpec, true, false, true ); } Top::global.collectionDropped( fromNS.toString() ); return Status::OK(); }
void QueryPlanSet::addOtherPlans( bool checkFirst ) { const char *ns = _frsp->ns(); NamespaceDetails *d = nsdetails( ns ); if ( !d ) return; // If table scan is optimal or natural order requested or tailable cursor requested if ( !_frsp->matchPossible() || ( _frsp->noNontrivialRanges() && _order.isEmpty() ) || ( !_order.isEmpty() && !strcmp( _order.firstElement().fieldName(), "$natural" ) ) ) { // Table scan plan addPlan( QueryPlanPtr( new QueryPlan( d, -1, *_frsp, *_originalFrsp, _originalQuery, _order ) ), checkFirst ); return; } bool normalQuery = _hint.isEmpty() && _min.isEmpty() && _max.isEmpty(); PlanSet plans; QueryPlanPtr optimalPlan; for( int i = 0; i < d->nIndexes; ++i ) { if ( normalQuery ) { if ( !_frsp->matchPossibleForIndex( d, i, d->idx( i ).keyPattern() ) ) { // If no match is possible, only generate a trival plan that won't // scan any documents. QueryPlanPtr p( new QueryPlan( d, i, *_frsp, *_originalFrsp, _originalQuery, _order ) ); addPlan( p, checkFirst ); return; } if ( !QueryUtilIndexed::indexUseful( *_frsp, d, i, _order ) ) { continue; } } QueryPlanPtr p( new QueryPlan( d, i, *_frsp, *_originalFrsp, _originalQuery, _order ) ); if ( p->optimal() ) { if ( !optimalPlan.get() ) { optimalPlan = p; } } else if ( !p->unhelpful() ) { plans.push_back( p ); } } if ( optimalPlan.get() ) { addPlan( optimalPlan, checkFirst ); return; } for( PlanSet::iterator i = plans.begin(); i != plans.end(); ++i ) addPlan( *i, checkFirst ); // Table scan plan addPlan( QueryPlanPtr( new QueryPlan( d, -1, *_frsp, *_originalFrsp, _originalQuery, _order ) ), checkFirst ); }
bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { string ns = dbname + "." + cmdObj.firstElement().valuestr(); NamespaceDetails *nsd = nsdetails(ns); if (NULL == nsd) { errmsg = "can't find ns"; return false; } vector<int> idxs; nsd->findIndexByType(GEOSEARCHNAME, idxs); if (idxs.size() == 0) { errmsg = "no geoSearch index"; return false; } if (idxs.size() > 1) { errmsg = "more than 1 geosearch index"; return false; } BSONElement nearElt = cmdObj["near"]; BSONElement maxDistance = cmdObj["maxDistance"]; BSONElement search = cmdObj["search"]; uassert(13318, "near needs to be an array", nearElt.isABSONObj()); uassert(13319, "maxDistance needs a number", maxDistance.isNumber()); uassert(13320, "search needs to be an object", search.type() == Object); unsigned limit = 50; if (cmdObj["limit"].isNumber()) limit = static_cast<unsigned>(cmdObj["limit"].numberInt()); int idxNum = idxs[0]; IndexDetails& id = nsd->idx(idxNum); if (CatalogHack::testIndexMigration()) { auto_ptr<IndexDescriptor> desc(CatalogHack::getDescriptor(nsd, idxNum)); auto_ptr<HaystackAccessMethod> ham(new HaystackAccessMethod(desc.get())); ham->searchCommand(nearElt.Obj(), maxDistance.numberDouble(), search.Obj(), &result, limit); } else { GeoHaystackSearchIndex *si = static_cast<GeoHaystackSearchIndex*>(id.getSpec().getType()); verify(&id == si->getDetails()); si->searchCommand(nsd, nearElt.Obj(), maxDistance.numberDouble(), search.Obj(), result, limit); } return 1; }
long long Helpers::removeRange( const string& ns , const BSONObj& min , const BSONObj& max , bool yield , bool maxInclusive , RemoveCallback * callback ) { BSONObj keya , keyb; BSONObj minClean = toKeyFormat( min , keya ); BSONObj maxClean = toKeyFormat( max , keyb ); assert( keya == keyb ); Client::Context ctx(ns); NamespaceDetails* nsd = nsdetails( ns.c_str() ); if ( ! nsd ) return 0; int ii = nsd->findIndexByKeyPattern( keya ); assert( ii >= 0 ); long long num = 0; IndexDetails& i = nsd->idx( ii ); shared_ptr<Cursor> c( new BtreeCursor( nsd , ii , i , minClean , maxClean , maxInclusive, 1 ) ); auto_ptr<ClientCursor> cc( new ClientCursor( QueryOption_NoCursorTimeout , c , ns ) ); cc->setDoingDeletes( true ); while ( c->ok() ) { DiskLoc rloc = c->currLoc(); BSONObj key = c->currKey(); if ( callback ) callback->goingToDelete( c->current() ); c->advance(); c->noteLocation(); logOp( "d" , ns.c_str() , rloc.obj()["_id"].wrap() ); theDataFileMgr.deleteRecord(ns.c_str() , rloc.rec(), rloc); num++; c->checkLocation(); if ( yield && ! cc->yieldSometimes() ) { // cursor got finished by someone else, so we're done cc.release(); // if the collection/db is dropped, cc may be deleted break; } } return num; }
void IndexRebuilder::checkDB(const std::string& dbName, bool* firstTime) { const std::string systemNS = dbName + ".system.namespaces"; DBDirectClient cli; scoped_ptr<DBClientCursor> cursor(cli.query(systemNS, Query())); // This depends on system.namespaces not changing while we iterate while (cursor->more()) { BSONObj nsDoc = cursor->next(); const char* ns = nsDoc["name"].valuestrsafe(); Client::WriteContext ctx(ns); NamespaceDetails* nsd = nsdetails(ns); if (!nsd || !nsd->indexBuildsInProgress) { continue; } log() << "Found interrupted index build on " << ns << endl; if (*firstTime) { log() << "Restart the server with --noIndexBuildRetry to skip index rebuilds" << endl; *firstTime = false; } // If the indexBuildRetry flag isn't set, just clear the inProg flag if (!cmdLine.indexBuildRetry) { // If we crash between unsetting the inProg flag and cleaning up the index, the // index space will be lost. int inProg = nsd->indexBuildsInProgress; getDur().writingInt(nsd->indexBuildsInProgress) = 0; for (int i = 0; i < inProg; i++) { nsd->idx(nsd->nIndexes+i).kill_idx(); } continue; } // We go from right to left building these indexes, so that indexBuildInProgress-- has // the correct effect of "popping" an index off the list. while (nsd->indexBuildsInProgress > 0) { retryIndexBuild(dbName, nsd, nsd->nIndexes+nsd->indexBuildsInProgress-1); } } }
void getIndexChanges(vector<IndexChanges>& v, NamespaceDetails& d, BSONObj newObj, BSONObj oldObj, bool &changedId) { int z = d.nIndexesBeingBuilt(); v.resize(z); for( int i = 0; i < z; i++ ) { IndexDetails& idx = d.idx(i); BSONObj idxKey = idx.info.obj().getObjectField("key"); // eg { ts : 1 } IndexChanges& ch = v[i]; idx.getKeysFromObject(oldObj, ch.oldkeys); idx.getKeysFromObject(newObj, ch.newkeys); if( ch.newkeys.size() > 1 ) d.setIndexIsMultikey(i); setDifference(ch.oldkeys, ch.newkeys, ch.removed); setDifference(ch.newkeys, ch.oldkeys, ch.added); if ( ch.removed.size() > 0 && ch.added.size() > 0 && idx.isIdIndex() ) { changedId = true; } } }
bool run(const string& dbname , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { string ns = dbname + "." + cmdObj.firstElement().valuestr(); NamespaceDetails * d = nsdetails( ns.c_str() ); if ( ! d ) { errmsg = "can't find ns"; return false; } vector<int> idxs; d->findIndexByType( GEOSEARCHNAME , idxs ); if ( idxs.size() == 0 ) { errmsg = "no geoSearch index"; return false; } if ( idxs.size() > 1 ) { errmsg = "more than 1 geosearch index"; return false; } int idxNum = idxs[0]; IndexDetails& id = d->idx( idxNum ); GeoHaystackSearchIndex * si = (GeoHaystackSearchIndex*)id.getSpec().getType(); verify( &id == si->getDetails() ); BSONElement n = cmdObj["near"]; BSONElement maxDistance = cmdObj["maxDistance"]; BSONElement search = cmdObj["search"]; uassert( 13318 , "near needs to be an array" , n.isABSONObj() ); uassert( 13319 , "maxDistance needs a number" , maxDistance.isNumber() ); uassert( 13320 , "search needs to be an object" , search.type() == Object ); unsigned limit = 50; if ( cmdObj["limit"].isNumber() ) limit = (unsigned)cmdObj["limit"].numberInt(); si->searchCommand( d , idxNum , n.Obj() , maxDistance.numberDouble() , search.Obj() , result , limit ); return 1; }
void CursorGenerator::setArgumentsHint() { if ( useHints && _parsedQuery ) { _argumentsHint = _parsedQuery->getHint(); } if ( snapshot() ) { NamespaceDetails *d = nsdetails( _ns ); if ( d ) { int i = d->findIdIndex(); if( i < 0 ) { if ( _ns.find( ".system." ) == string::npos ) log() << "warning: no _id index on $snapshot query, ns:" << _ns << endl; } else { /* [dm] the name of an _id index tends to vary, so we build the hint the hard way here. probably need a better way to specify "use the _id index" as a hint. if someone is in the query optimizer please fix this then! */ _argumentsHint = BSON( "$hint" << d->idx(i).indexName() ); } } } }
shared_ptr<Cursor> CursorGenerator::shortcutCursor() const { if ( !mayShortcutQueryOptimizer() ) { return shared_ptr<Cursor>(); } if ( _planPolicy.permitOptimalNaturalPlan() && _query.isEmpty() && _order.isEmpty() ) { return theDataFileMgr.findAll( _ns ); } if ( _planPolicy.permitOptimalIdPlan() && isSimpleIdQuery( _query ) ) { Database *database = cc().database(); verify( database ); NamespaceDetails *d = database->namespaceIndex.details( _ns ); if ( d ) { int idxNo = d->findIdIndex(); if ( idxNo >= 0 ) { IndexDetails& i = d->idx( idxNo ); BSONObj key = i.getKeyFromQuery( _query ); return shared_ptr<Cursor>( BtreeCursor::make( d, i, key, key, true, 1 ) ); } } } return shared_ptr<Cursor>(); }
UpdateResult _updateObjects( const char* ns, const BSONObj& updateobj, const BSONObj& patternOrig, bool upsert, bool multi, bool logop , OpDebug& debug, bool fromMigrate, const QueryPlanSelectionPolicy& planPolicy ) { TOKULOG(2) << "update: " << ns << " update: " << updateobj << " query: " << patternOrig << " upsert: " << upsert << " multi: " << multi << endl; debug.updateobj = updateobj; NamespaceDetails *d = getAndMaybeCreateNS(ns, logop); auto_ptr<ModSet> mods; const bool isOperatorUpdate = updateobj.firstElementFieldName()[0] == '$'; bool modsAreIndexed = false; if ( isOperatorUpdate ) { if ( d->indexBuildInProgress() ) { set<string> bgKeys; d->inProgIdx().keyPattern().getFieldNames(bgKeys); mods.reset( new ModSet(updateobj, d->indexKeys(), &bgKeys) ); } else { mods.reset( new ModSet(updateobj, d->indexKeys()) ); } modsAreIndexed = mods->isIndexed(); } int idIdxNo = -1; if ( planPolicy.permitOptimalIdPlan() && !multi && !modsAreIndexed && (idIdxNo = d->findIdIndex()) >= 0 && mayUpdateById(d, patternOrig) ) { debug.idhack = true; IndexDetails &idx = d->idx(idIdxNo); BSONObj pk = idx.getKeyFromQuery(patternOrig); TOKULOG(3) << "_updateObjects using simple _id query, pattern " << patternOrig << ", pk " << pk << endl; UpdateResult result = _updateById( pk, isOperatorUpdate, mods.get(), d, ns, updateobj, patternOrig, logop, debug, fromMigrate); if ( result.existing || ! upsert ) { return result; } else if ( upsert && ! isOperatorUpdate && ! logop) { debug.upsert = true; BSONObj objModified = updateobj; insertAndLog( ns, d, objModified, logop, fromMigrate ); return UpdateResult( 0 , 0 , 1 , updateobj ); } } int numModded = 0; debug.nscanned = 0; shared_ptr<Cursor> c = getOptimizedCursor( ns, patternOrig, BSONObj(), planPolicy ); if( c->ok() ) { set<BSONObj> seenObjects; MatchDetails details; auto_ptr<ClientCursor> cc; do { debug.nscanned++; if ( mods.get() && mods->hasDynamicArray() ) { // The Cursor must have a Matcher to record an elemMatchKey. But currently // a modifier on a dynamic array field may be applied even if there is no // elemMatchKey, so a matcher cannot be required. //verify( c->matcher() ); details.requestElemMatchKey(); } if ( !c->currentMatches( &details ) ) { c->advance(); continue; } BSONObj currPK = c->currPK(); if ( c->getsetdup( currPK ) ) { c->advance(); continue; } BSONObj currentObj = c->current(); BSONObj pattern = patternOrig; if ( logop ) { BSONObjBuilder idPattern; BSONElement id; // NOTE: If the matching object lacks an id, we'll log // with the original pattern. This isn't replay-safe. // It might make sense to suppress the log instead // if there's no id. if ( currentObj.getObjectID( id ) ) { idPattern.append( id ); pattern = idPattern.obj(); } else { uassert( 10157 , "multi-update requires all modified objects to have an _id" , ! multi ); } } /* look for $inc etc. note as listed here, all fields to inc must be this type, you can't set some regular ones at the moment. */ struct LogOpUpdateDetails loud; loud.logop = logop; loud.ns = ns; loud.fromMigrate = fromMigrate; if ( isOperatorUpdate ) { if ( multi ) { // Make our own copies of the currPK and currentObj before we invalidate // them by advancing the cursor. currPK = currPK.copy(); currentObj = currentObj.copy(); // Advance past the document to be modified. This used to be because of SERVER-5198, // but TokuMX does it because we want to avoid needing to do manual deduplication // of this PK on the next iteration if the current update modifies the next // entry in the index. For example, an index scan over a:1 with mod {$inc: {a:1}} // would cause every other key read to be a duplicate if we didn't advance here. while ( c->ok() && currPK == c->currPK() ) { c->advance(); } // Multi updates need to do their own deduplication because updates may modify the // keys the cursor is in the process of scanning over. if ( seenObjects.count( currPK ) ) { continue; } else { seenObjects.insert( currPK ); } } ModSet* useMods = mods.get(); auto_ptr<ModSet> mymodset; if ( details.hasElemMatchKey() && mods->hasDynamicArray() ) { useMods = mods->fixDynamicArray( details.elemMatchKey() ); mymodset.reset( useMods ); } auto_ptr<ModSetState> mss = useMods->prepare( currentObj ); updateUsingMods( d, currPK, currentObj, *mss, &loud ); numModded++; if ( ! multi ) return UpdateResult( 1 , 1 , numModded , BSONObj() ); continue; } // end if operator is update uassert( 10158 , "multi update only works with $ operators" , ! multi ); updateNoMods( d, currPK, currentObj, updateobj, &loud ); return UpdateResult( 1 , 0 , 1 , BSONObj() ); } while ( c->ok() ); } // endif if ( numModded ) return UpdateResult( 1 , 1 , numModded , BSONObj() ); if ( upsert ) { BSONObj newObj = updateobj; if ( updateobj.firstElementFieldName()[0] == '$' ) { // upsert of an $operation. build a default object BSONObj newObj = mods->createNewFromQuery( patternOrig ); debug.fastmodinsert = true; insertAndLog( ns, d, newObj, logop, fromMigrate ); return UpdateResult( 0 , 1 , 1 , newObj ); } uassert( 10159 , "multi update only works with $ operators" , ! multi ); debug.upsert = true; insertAndLog( ns, d, newObj, logop, fromMigrate ); return UpdateResult( 0 , 0 , 1 , newObj ); } return UpdateResult( 0 , isOperatorUpdate , 0 , BSONObj() ); }
Status MMAPV1DatabaseCatalogEntry::renameCollection(OperationContext* txn, StringData fromNS, StringData toNS, bool stayTemp) { Status s = _renameSingleNamespace(txn, fromNS, toNS, stayTemp); if (!s.isOK()) return s; NamespaceDetails* details = _namespaceIndex.details(toNS); invariant(details); RecordStoreV1Base* systemIndexRecordStore = _getIndexRecordStore(); auto cursor = systemIndexRecordStore->getCursor(txn); while (auto record = cursor->next()) { BSONObj oldIndexSpec = record->data.releaseToBson(); if (fromNS != oldIndexSpec["ns"].valuestrsafe()) continue; BSONObj newIndexSpec; { BSONObjBuilder b; BSONObjIterator i(oldIndexSpec); while (i.more()) { BSONElement e = i.next(); if (strcmp(e.fieldName(), "ns") != 0) b.append(e); else b << "ns" << toNS; } newIndexSpec = b.obj(); } StatusWith<RecordId> newIndexSpecLoc = systemIndexRecordStore->insertRecord( txn, newIndexSpec.objdata(), newIndexSpec.objsize(), false); if (!newIndexSpecLoc.isOK()) return newIndexSpecLoc.getStatus(); const std::string& indexName = oldIndexSpec.getStringField("name"); { // Fix the IndexDetails pointer. int indexI = getCollectionCatalogEntry(toNS)->_findIndexNumber(txn, indexName); IndexDetails& indexDetails = details->idx(indexI); *txn->recoveryUnit()->writing(&indexDetails.info) = DiskLoc::fromRecordId(newIndexSpecLoc.getValue()); } { // Move the underlying namespace. std::string oldIndexNs = IndexDescriptor::makeIndexNamespace(fromNS, indexName); std::string newIndexNs = IndexDescriptor::makeIndexNamespace(toNS, indexName); Status s = _renameSingleNamespace(txn, oldIndexNs, newIndexNs, false); if (!s.isOK()) return s; } systemIndexRecordStore->deleteRecord(txn, record->id); } return Status::OK(); }
Status MMAPV1DatabaseCatalogEntry::renameCollection( OperationContext* txn, const StringData& fromNS, const StringData& toNS, bool stayTemp ) { Status s = _renameSingleNamespace( txn, fromNS, toNS, stayTemp ); if ( !s.isOK() ) return s; NamespaceDetails* details = _namespaceIndex.details( toNS ); invariant( details ); RecordStoreV1Base* systemIndexRecordStore = _getIndexRecordStore( txn ); scoped_ptr<RecordIterator> it( systemIndexRecordStore->getIterator() ); while ( !it->isEOF() ) { DiskLoc loc = it->getNext(); const Record* rec = it->recordFor( loc ); BSONObj oldIndexSpec( rec->data() ); if ( fromNS != oldIndexSpec["ns"].valuestrsafe() ) continue; BSONObj newIndexSpec; { BSONObjBuilder b; BSONObjIterator i( oldIndexSpec ); while( i.more() ) { BSONElement e = i.next(); if ( strcmp( e.fieldName(), "ns" ) != 0 ) b.append( e ); else b << "ns" << toNS; } newIndexSpec = b.obj(); } StatusWith<DiskLoc> newIndexSpecLoc = systemIndexRecordStore->insertRecord( txn, newIndexSpec.objdata(), newIndexSpec.objsize(), -1 ); if ( !newIndexSpecLoc.isOK() ) return newIndexSpecLoc.getStatus(); const string& indexName = oldIndexSpec.getStringField( "name" ); { // fix IndexDetails pointer NamespaceDetailsCollectionCatalogEntry ce( toNS, details, _getIndexRecordStore( txn ), this ); int indexI = ce._findIndexNumber( indexName ); IndexDetails& indexDetails = details->idx(indexI); *txn->recoveryUnit()->writing(&indexDetails.info) = newIndexSpecLoc.getValue(); // XXX: dur } { // move underlying namespac string oldIndexNs = IndexDescriptor::makeIndexNamespace( fromNS, indexName ); string newIndexNs = IndexDescriptor::makeIndexNamespace( toNS, indexName ); Status s = _renameSingleNamespace( txn, oldIndexNs, newIndexNs, false ); if ( !s.isOK() ) return s; } systemIndexRecordStore->deleteRecord( txn, loc ); } return Status::OK(); }
UpdateResult _updateObjects( bool su, const char* ns, const BSONObj& updateobj, const BSONObj& patternOrig, bool upsert, bool multi, bool logop , OpDebug& debug, RemoveSaver* rs, bool fromMigrate, const QueryPlanSelectionPolicy& planPolicy, bool forReplication ) { DEBUGUPDATE( "update: " << ns << " update: " << updateobj << " query: " << patternOrig << " upsert: " << upsert << " multi: " << multi ); Client& client = cc(); int profile = client.database()->profile; debug.updateobj = updateobj; // The idea with these here it to make them loop invariant for // multi updates, and thus be a bit faster for that case. The // pointers may be left invalid on a failed or terminal yield // recovery. NamespaceDetails* d = nsdetails(ns); // can be null if an upsert... NamespaceDetailsTransient* nsdt = &NamespaceDetailsTransient::get(ns); auto_ptr<ModSet> mods; bool isOperatorUpdate = updateobj.firstElementFieldName()[0] == '$'; int modsIsIndexed = false; // really the # of indexes if ( isOperatorUpdate ) { if( d && d->indexBuildsInProgress ) { set<string> bgKeys; for (int i = 0; i < d->indexBuildsInProgress; i++) { d->idx(d->nIndexes+i).keyPattern().getFieldNames(bgKeys); } mods.reset( new ModSet(updateobj, nsdt->indexKeys(), &bgKeys, forReplication) ); } else { mods.reset( new ModSet(updateobj, nsdt->indexKeys(), NULL, forReplication) ); } modsIsIndexed = mods->isIndexed(); } if( planPolicy.permitOptimalIdPlan() && !multi && isSimpleIdQuery(patternOrig) && d && !modsIsIndexed ) { int idxNo = d->findIdIndex(); if( idxNo >= 0 ) { debug.idhack = true; UpdateResult result = _updateById( isOperatorUpdate, idxNo, mods.get(), profile, d, nsdt, su, ns, updateobj, patternOrig, logop, debug, fromMigrate); if ( result.existing || ! upsert ) { return result; } else if ( upsert && ! isOperatorUpdate ) { // this handles repl inserts checkNoMods( updateobj ); debug.upsert = true; BSONObj no = updateobj; theDataFileMgr.insertWithObjMod(ns, no, false, su); if ( logop ) logOp( "i", ns, no, 0, 0, fromMigrate ); return UpdateResult( 0 , 0 , 1 , no ); } } } int numModded = 0; debug.nscanned = 0; shared_ptr<Cursor> c = NamespaceDetailsTransient::getCursor( ns, patternOrig, BSONObj(), planPolicy ); d = nsdetails(ns); nsdt = &NamespaceDetailsTransient::get(ns); bool autoDedup = c->autoDedup(); if( c->ok() ) { set<DiskLoc> seenObjects; MatchDetails details; auto_ptr<ClientCursor> cc; do { if ( cc.get() == 0 && client.allowedToThrowPageFaultException() && ! c->currLoc().isNull() && ! c->currLoc().rec()->likelyInPhysicalMemory() ) { throw PageFaultException( c->currLoc().rec() ); } bool atomic = c->matcher() && c->matcher()->docMatcher().atomic(); if ( ! atomic && debug.nscanned > 0 ) { // we need to use a ClientCursor to yield if ( cc.get() == 0 ) { shared_ptr< Cursor > cPtr = c; cc.reset( new ClientCursor( QueryOption_NoCursorTimeout , cPtr , ns ) ); } bool didYield; if ( ! cc->yieldSometimes( ClientCursor::WillNeed, &didYield ) ) { cc.release(); break; } if ( !c->ok() ) { break; } if ( didYield ) { d = nsdetails(ns); if ( ! d ) break; nsdt = &NamespaceDetailsTransient::get(ns); if ( mods.get() && ! mods->isIndexed() ) { set<string> bgKeys; for (int i = 0; i < d->indexBuildsInProgress; i++) { // we need to re-check indexes d->idx(d->nIndexes+i).keyPattern().getFieldNames(bgKeys); } mods->updateIsIndexed( nsdt->indexKeys() , &bgKeys ); modsIsIndexed = mods->isIndexed(); } } } // end yielding block debug.nscanned++; if ( mods.get() && mods->hasDynamicArray() ) { // The Cursor must have a Matcher to record an elemMatchKey. But currently // a modifier on a dynamic array field may be applied even if there is no // elemMatchKey, so a matcher cannot be required. //verify( c->matcher() ); details.requestElemMatchKey(); } if ( !c->currentMatches( &details ) ) { c->advance(); continue; } Record* r = c->_current(); DiskLoc loc = c->currLoc(); if ( c->getsetdup( loc ) && autoDedup ) { c->advance(); continue; } BSONObj js = BSONObj::make(r); BSONObj pattern = patternOrig; if ( logop ) { BSONObjBuilder idPattern; BSONElement id; // NOTE: If the matching object lacks an id, we'll log // with the original pattern. This isn't replay-safe. // It might make sense to suppress the log instead // if there's no id. if ( js.getObjectID( id ) ) { idPattern.append( id ); pattern = idPattern.obj(); } else { uassert( 10157 , "multi-update requires all modified objects to have an _id" , ! multi ); } } /* look for $inc etc. note as listed here, all fields to inc must be this type, you can't set some regular ones at the moment. */ if ( isOperatorUpdate ) { if ( multi ) { // go to next record in case this one moves c->advance(); // Update operations are deduped for cursors that implement their own // deduplication. In particular, some geo cursors are excluded. if ( autoDedup ) { if ( seenObjects.count( loc ) ) { continue; } // SERVER-5198 Advance past the document to be modified, provided // deduplication is enabled, but see SERVER-5725. while( c->ok() && loc == c->currLoc() ) { c->advance(); } } } const BSONObj& onDisk = loc.obj(); ModSet* useMods = mods.get(); auto_ptr<ModSet> mymodset; if ( details.hasElemMatchKey() && mods->hasDynamicArray() ) { useMods = mods->fixDynamicArray( details.elemMatchKey() ); mymodset.reset( useMods ); } auto_ptr<ModSetState> mss = useMods->prepare( onDisk ); bool willAdvanceCursor = multi && c->ok() && ( modsIsIndexed || ! mss->canApplyInPlace() ); if ( willAdvanceCursor ) { if ( cc.get() ) { cc->setDoingDeletes( true ); } c->prepareToTouchEarlierIterate(); } // If we've made it this far, "ns" must contain a valid collection name, and so // is of the form "db.collection". Therefore, the following expression must // always be valid. "system.users" updates must never be done in place, in // order to ensure that they are validated inside DataFileMgr::updateRecord(.). bool isSystemUsersMod = (NamespaceString(ns).coll == "system.users"); if ( modsIsIndexed <= 0 && mss->canApplyInPlace() && !isSystemUsersMod ) { mss->applyModsInPlace( true );// const_cast<BSONObj&>(onDisk) ); DEBUGUPDATE( "\t\t\t doing in place update" ); if ( profile && !multi ) debug.fastmod = true; if ( modsIsIndexed ) { seenObjects.insert( loc ); } d->paddingFits(); } else { if ( rs ) rs->goingToDelete( onDisk ); BSONObj newObj = mss->createNewFromMods(); checkTooLarge(newObj); DiskLoc newLoc = theDataFileMgr.updateRecord(ns, d, nsdt, r, loc, newObj.objdata(), newObj.objsize(), debug); if ( newLoc != loc || modsIsIndexed ){ // log() << "Moved obj " << newLoc.obj()["_id"] << " from " << loc << " to " << newLoc << endl; // object moved, need to make sure we don' get again seenObjects.insert( newLoc ); } } if ( logop ) { DEV verify( mods->size() ); BSONObj logObj = mss->getOpLogRewrite(); DEBUGUPDATE( "\t rewrite update: " << logObj ); // It is possible that the entire mod set was a no-op over this // document. We would have an empty log record in that case. If we // call logOp, with an empty record, that would be replicated as "clear // this record", which is not what we want. Therefore, to get a no-op // in the replica, we simply don't log. if ( logObj.nFields() ) { logOp("u", ns, logObj , &pattern, 0, fromMigrate ); } } numModded++; if ( ! multi ) return UpdateResult( 1 , 1 , numModded , BSONObj() ); if ( willAdvanceCursor ) c->recoverFromTouchingEarlierIterate(); getDur().commitIfNeeded(); continue; } uassert( 10158 , "multi update only works with $ operators" , ! multi ); BSONElementManipulator::lookForTimestamps( updateobj ); checkNoMods( updateobj ); theDataFileMgr.updateRecord(ns, d, nsdt, r, loc , updateobj.objdata(), updateobj.objsize(), debug, su); if ( logop ) { DEV wassert( !su ); // super used doesn't get logged, this would be bad. logOp("u", ns, updateobj, &pattern, 0, fromMigrate ); } return UpdateResult( 1 , 0 , 1 , BSONObj() ); } while ( c->ok() ); } // endif if ( numModded ) return UpdateResult( 1 , 1 , numModded , BSONObj() ); if ( upsert ) { if ( updateobj.firstElementFieldName()[0] == '$' ) { // upsert of an $operation. build a default object BSONObj newObj = mods->createNewFromQuery( patternOrig ); checkNoMods( newObj ); debug.fastmodinsert = true; theDataFileMgr.insertWithObjMod(ns, newObj, false, su); if ( logop ) logOp( "i", ns, newObj, 0, 0, fromMigrate ); return UpdateResult( 0 , 1 , 1 , newObj ); } uassert( 10159 , "multi update only works with $ operators" , ! multi ); checkNoMods( updateobj ); debug.upsert = true; BSONObj no = updateobj; theDataFileMgr.insertWithObjMod(ns, no, false, su); if ( logop ) logOp( "i", ns, no, 0, 0, fromMigrate ); return UpdateResult( 0 , 0 , 1 , no ); } return UpdateResult( 0 , isOperatorUpdate , 0 , BSONObj() ); }
DiskLoc IndexCatalogEntry::_catalogHead() const { NamespaceDetails* nsd = _collection->details(); int idxNo = _indexNo(); return nsd->idx( idxNo ).head; }
long long Helpers::removeRange( const string& ns , const BSONObj& min , const BSONObj& max , const BSONObj& keyPattern , bool maxInclusive , bool secondaryThrottle , RemoveCallback * callback, bool fromMigrate ) { Client& c = cc(); long long numDeleted = 0; PageFaultRetryableSection pgrs; long long millisWaitingForReplication = 0; while ( 1 ) { try { Client::WriteContext ctx(ns); scoped_ptr<Cursor> c; { NamespaceDetails* nsd = nsdetails( ns.c_str() ); if ( ! nsd ) break; int ii = nsd->findIndexByKeyPattern( keyPattern ); verify( ii >= 0 ); IndexDetails& i = nsd->idx( ii ); // Extend min to get (min, MinKey, MinKey, ....) BSONObj newMin = Helpers::modifiedRangeBound( min , keyPattern , -1 ); // If upper bound is included, extend max to get (max, MaxKey, MaxKey, ...) // If not included, extend max to get (max, MinKey, MinKey, ....) int minOrMax = maxInclusive ? 1 : -1; BSONObj newMax = Helpers::modifiedRangeBound( max , keyPattern , minOrMax ); c.reset( BtreeCursor::make( nsd , ii , i , newMin , newMax , maxInclusive , 1 ) ); } if ( ! c->ok() ) { // we're done break; } DiskLoc rloc = c->currLoc(); BSONObj obj = c->current(); // this is so that we don't have to handle this cursor in the delete code c.reset(0); if ( callback ) callback->goingToDelete( obj ); logOp( "d" , ns.c_str() , rloc.obj()["_id"].wrap() , 0 , 0 , fromMigrate ); theDataFileMgr.deleteRecord(ns.c_str() , rloc.rec(), rloc); numDeleted++; } catch( PageFaultException& e ) { e.touch(); continue; } Timer secondaryThrottleTime; if ( secondaryThrottle ) { if ( ! waitForReplication( c.getLastOp(), 2, 60 /* seconds to wait */ ) ) { warning() << "replication to secondaries for removeRange at least 60 seconds behind" << endl; } millisWaitingForReplication += secondaryThrottleTime.millis(); } if ( ! Lock::isLocked() ) { int micros = ( 2 * Client::recommendedYieldMicros() ) - secondaryThrottleTime.micros(); if ( micros > 0 ) { LOG(1) << "Helpers::removeRangeUnlocked going to sleep for " << micros << " micros" << endl; sleepmicros( micros ); } } } if ( secondaryThrottle ) log() << "Helpers::removeRangeUnlocked time spent waiting for replication: " << millisWaitingForReplication << "ms" << endl; return numDeleted; }