shared_ptr<Cursor> NamespaceDetailsTransient::getCursor( const char *ns, const BSONObj &query, const BSONObj &order ) { if ( query.isEmpty() && order.isEmpty() ) { // TODO This will not use a covered index currently. return theDataFileMgr.findAll( ns ); } if ( isSimpleIdQuery( query ) ) { Database *database = cc().database(); assert( database ); NamespaceDetails *d = database->namespaceIndex.details(ns); if ( d ) { int idxNo = d->findIdIndex(); if ( idxNo >= 0 ) { IndexDetails& i = d->idx( idxNo ); BSONObj key = i.getKeyFromQuery( query ); return shared_ptr<Cursor>( BtreeCursor::make( d, idxNo, i, key, key, true, 1 ) ); } } } auto_ptr<MultiPlanScanner> mps( new MultiPlanScanner( ns, query, order ) ); // mayYield == false shared_ptr<Cursor> single = mps->singleCursor(); if ( single ) { if ( !query.isEmpty() && !single->matcher() ) { shared_ptr<CoveredIndexMatcher> matcher( new CoveredIndexMatcher( query, single->indexKeyPattern() ) ); single->setMatcher( matcher ); } return single; } return newQueryOptimizerCursor( mps ); }
static bool mayUpdateById(NamespaceDetails *d, const BSONObj &patternOrig) { if ( isSimpleIdQuery(patternOrig) ) { for (int i = 0; i < d->nIndexesBeingBuilt(); i++) { IndexDetails &idx = d->idx(i); if (idx.info()["clustering"].trueValue()) { return false; } } // We may update by _id, since: // - The query is a simple _id query // - The modifications do not affect any indexed fields // - There are no clustering secondary keys. return true; } return false; }
shared_ptr<Cursor> CursorGenerator::shortcutCursor() const { if ( !mayShortcutQueryOptimizer() ) { return shared_ptr<Cursor>(); } if ( _planPolicy.permitOptimalNaturalPlan() && _query.isEmpty() && _order.isEmpty() ) { return theDataFileMgr.findAll( _ns ); } if ( _planPolicy.permitOptimalIdPlan() && isSimpleIdQuery( _query ) ) { Database *database = cc().database(); verify( database ); NamespaceDetails *d = database->namespaceIndex.details( _ns ); if ( d ) { int idxNo = d->findIdIndex(); if ( idxNo >= 0 ) { IndexDetails& i = d->idx( idxNo ); BSONObj key = i.getKeyFromQuery( _query ); return shared_ptr<Cursor>( BtreeCursor::make( d, i, key, key, true, 1 ) ); } } } return shared_ptr<Cursor>(); }
bool runNoDirectClient( const string& ns , const BSONObj& queryOriginal , const BSONObj& fields , const BSONObj& update , bool upsert , bool returnNew , bool remove , BSONObjBuilder& result , string& errmsg ) { Lock::DBWrite lk( ns ); Client::Context cx( ns ); BSONObj doc; bool found = Helpers::findOne( ns.c_str() , queryOriginal , doc ); BSONObj queryModified = queryOriginal; if ( found && doc["_id"].type() && ! isSimpleIdQuery( queryOriginal ) ) { // we're going to re-write the query to be more efficient // we have to be a little careful because of positional operators // maybe we can pass this all through eventually, but right now isn't an easy way bool hasPositionalUpdate = false; { // if the update has a positional piece ($) // then we need to pull all query parts in // so here we check for $ // a little hacky BSONObjIterator i( update ); while ( i.more() ) { const BSONElement& elem = i.next(); if ( elem.fieldName()[0] != '$' || elem.type() != Object ) continue; BSONObjIterator j( elem.Obj() ); while ( j.more() ) { if ( str::contains( j.next().fieldName(), ".$" ) ) { hasPositionalUpdate = true; break; } } } } BSONObjBuilder b( queryOriginal.objsize() + 10 ); b.append( doc["_id"] ); bool addedAtomic = false; BSONObjIterator i( queryOriginal ); while ( i.more() ) { const BSONElement& elem = i.next(); if ( str::equals( "_id" , elem.fieldName() ) ) { // we already do _id continue; } if ( ! hasPositionalUpdate ) { // if there is a dotted field, accept we may need more query parts continue; } if ( ! addedAtomic ) { b.appendBool( "$atomic" , true ); addedAtomic = true; } b.append( elem ); } queryModified = b.obj(); } if ( remove ) { _appendHelper( result , doc , found , fields ); if ( found ) { deleteObjects( ns.c_str() , queryModified , true , true ); BSONObjBuilder le( result.subobjStart( "lastErrorObject" ) ); le.appendNumber( "n" , 1 ); le.done(); } } else { // update if ( ! found && ! upsert ) { // didn't have it, and am not upserting _appendHelper( result , doc , found , fields ); } else { // we found it or we're updating if ( ! returnNew ) { _appendHelper( result , doc , found , fields ); } UpdateResult res = updateObjects( ns.c_str() , update , queryModified , upsert , false , true , cc().curop()->debug() ); if ( returnNew ) { if ( res.upserted.isSet() ) { queryModified = BSON( "_id" << res.upserted ); } else if ( queryModified["_id"].type() ) { // we do this so that if the update changes the fields, it still matches queryModified = queryModified["_id"].wrap(); } if ( ! Helpers::findOne( ns.c_str() , queryModified , doc ) ) { errmsg = str::stream() << "can't find object after modification " << " ns: " << ns << " queryModified: " << queryModified << " queryOriginal: " << queryOriginal; log() << errmsg << endl; return false; } _appendHelper( result , doc , true , fields ); } BSONObjBuilder le( result.subobjStart( "lastErrorObject" ) ); le.appendBool( "updatedExisting" , res.existing ); le.appendNumber( "n" , res.num ); if ( res.upserted.isSet() ) le.append( "upserted" , res.upserted ); le.done(); } } return true; }
/* ns: namespace, e.g. <database>.<collection> pattern: the "where" clause / criteria justOne: stop after 1 match god: allow access to system namespaces, and don't yield */ long long deleteObjects(const char *ns, BSONObj pattern, bool justOneOrig, bool logop, bool god, RemoveSaver * rs ) { if( !god ) { if ( strstr(ns, ".system.") ) { /* note a delete from system.indexes would corrupt the db if done here, as there are pointers into those objects in NamespaceDetails. */ uassert(12050, "cannot delete from system namespace", legalClientSystemNS( ns , true ) ); } if ( strchr( ns , '$' ) ) { log() << "cannot delete from collection with reserved $ in name: " << ns << endl; uassert( 10100 , "cannot delete from collection with reserved $ in name", strchr(ns, '$') == 0 ); } } { NamespaceDetails *d = nsdetails( ns ); if ( ! d ) return 0; uassert( 10101 , "can't remove from a capped collection" , ! d->capped ); } long long nDeleted = 0; shared_ptr< Cursor > creal = NamespaceDetailsTransient::getCursor( ns, pattern, BSONObj(), false, 0 ); if( !creal->ok() ) return nDeleted; shared_ptr< Cursor > cPtr = creal; auto_ptr<ClientCursor> cc( new ClientCursor( QueryOption_NoCursorTimeout, cPtr, ns) ); cc->setDoingDeletes( true ); CursorId id = cc->cursorid(); bool justOne = justOneOrig; bool canYield = !god && !(creal->matcher() && creal->matcher()->docMatcher().atomic()); do { // TODO: we can generalize this I believe // bool willNeedRecord = (creal->matcher() && creal->matcher()->needRecord()) || pattern.isEmpty() || isSimpleIdQuery( pattern ); if ( ! willNeedRecord ) { // TODO: this is a total hack right now // check if the index full encompasses query if ( pattern.nFields() == 1 && str::equals( pattern.firstElement().fieldName() , creal->indexKeyPattern().firstElement().fieldName() ) ) willNeedRecord = true; } if ( canYield && ! cc->yieldSometimes( willNeedRecord ? ClientCursor::WillNeed : ClientCursor::MaybeCovered ) ) { cc.release(); // has already been deleted elsewhere // TODO should we assert or something? break; } if ( !cc->ok() ) { break; // if we yielded, could have hit the end } // this way we can avoid calling updateLocation() every time (expensive) // as well as some other nuances handled cc->setDoingDeletes( true ); DiskLoc rloc = cc->currLoc(); BSONObj key = cc->currKey(); bool match = creal->currentMatches(); bool dup = cc->c()->getsetdup(rloc); if ( ! cc->advance() ) justOne = true; if ( ! match ) continue; assert( !dup ); // can't be a dup, we deleted it! if ( !justOne ) { /* NOTE: this is SLOW. this is not good, noteLocation() was designed to be called across getMore blocks. here we might call millions of times which would be bad. */ cc->c()->prepareToTouchEarlierIterate(); } if ( logop ) { BSONElement e; if( BSONObj( rloc.rec() ).getObjectID( e ) ) { BSONObjBuilder b; b.append( e ); bool replJustOne = true; logOp( "d", ns, b.done(), 0, &replJustOne ); } else { problem() << "deleted object without id, not logging" << endl; } } if ( rs ) rs->goingToDelete( rloc.obj() /*cc->c->current()*/ ); theDataFileMgr.deleteRecord(ns, rloc.rec(), rloc); nDeleted++; if ( justOne ) { break; } cc->c()->recoverFromTouchingEarlierIterate(); if( !god ) getDur().commitIfNeeded(); if( debug && god && nDeleted == 100 ) log() << "warning high number of deletes with god=true which could use significant memory" << endl; } while ( cc->ok() ); if ( cc.get() && ClientCursor::find( id , false ) == 0 ) { // TODO: remove this and the id declaration above if this doesn't trigger // if it does, then i'm very confused (ERH 06/2011) error() << "this should be impossible" << endl; printStackTrace(); cc.release(); } return nDeleted; }
UpdateResult _updateObjects( bool su, const char* ns, const BSONObj& updateobj, const BSONObj& patternOrig, bool upsert, bool multi, bool logop , OpDebug& debug, RemoveSaver* rs, bool fromMigrate, const QueryPlanSelectionPolicy& planPolicy, bool forReplication ) { DEBUGUPDATE( "update: " << ns << " update: " << updateobj << " query: " << patternOrig << " upsert: " << upsert << " multi: " << multi ); Client& client = cc(); debug.updateobj = updateobj; // The idea with these here it to make them loop invariant for // multi updates, and thus be a bit faster for that case. The // pointers may be left invalid on a failed or terminal yield // recovery. NamespaceDetails* d = nsdetails(ns); // can be null if an upsert... NamespaceDetailsTransient* nsdt = &NamespaceDetailsTransient::get(ns); auto_ptr<ModSet> mods; bool isOperatorUpdate = updateobj.firstElementFieldName()[0] == '$'; int modsIsIndexed = false; // really the # of indexes if ( isOperatorUpdate ) { mods.reset( new ModSet(updateobj, nsdt->indexKeys(), forReplication) ); modsIsIndexed = mods->maxNumIndexUpdated(); } if( planPolicy.permitOptimalIdPlan() && !multi && isSimpleIdQuery(patternOrig) && d && !modsIsIndexed ) { int idxNo = d->findIdIndex(); if( idxNo >= 0 ) { debug.idhack = true; UpdateResult result = _updateById( isOperatorUpdate, idxNo, mods.get(), d, nsdt, su, ns, updateobj, patternOrig, logop, debug, fromMigrate); if ( result.existing || ! upsert ) { return result; } else if ( upsert && ! isOperatorUpdate ) { // this handles repl inserts checkNoMods( updateobj ); debug.upsert = true; BSONObj no = updateobj; theDataFileMgr.insertWithObjMod(ns, no, false, su); if ( logop ) logOp( "i", ns, no, 0, 0, fromMigrate, &no ); return UpdateResult( 0 , 0 , 1 , no ); } } } int numModded = 0; debug.nscanned = 0; shared_ptr<Cursor> c = getOptimizedCursor( ns, patternOrig, BSONObj(), planPolicy ); d = nsdetails(ns); nsdt = &NamespaceDetailsTransient::get(ns); bool autoDedup = c->autoDedup(); if( c->ok() ) { set<DiskLoc> seenObjects; MatchDetails details; auto_ptr<ClientCursor> cc; do { if ( cc.get() == 0 && client.allowedToThrowPageFaultException() && ! c->currLoc().isNull() && ! c->currLoc().rec()->likelyInPhysicalMemory() ) { throw PageFaultException( c->currLoc().rec() ); } bool atomic = c->matcher() && c->matcher()->docMatcher().atomic(); if ( ! atomic && debug.nscanned > 0 ) { // we need to use a ClientCursor to yield if ( cc.get() == 0 ) { shared_ptr< Cursor > cPtr = c; cc.reset( new ClientCursor( QueryOption_NoCursorTimeout , cPtr , ns ) ); } bool didYield; if ( ! cc->yieldSometimes( ClientCursor::WillNeed, &didYield ) ) { cc.release(); break; } if ( !c->ok() ) { break; } if ( didYield ) { d = nsdetails(ns); if ( ! d ) break; nsdt = &NamespaceDetailsTransient::get(ns); if ( mods.get() ) { mods->setIndexedStatus( nsdt->indexKeys() ); modsIsIndexed = mods->maxNumIndexUpdated(); } } } // end yielding block debug.nscanned++; if ( mods.get() && mods->hasDynamicArray() ) { details.requestElemMatchKey(); } if ( !c->currentMatches( &details ) ) { c->advance(); continue; } Record* r = c->_current(); DiskLoc loc = c->currLoc(); if ( c->getsetdup( loc ) && autoDedup ) { c->advance(); continue; } BSONObj js = BSONObj::make(r); BSONObj pattern = patternOrig; if ( logop ) { BSONObjBuilder idPattern; BSONElement id; // NOTE: If the matching object lacks an id, we'll log // with the original pattern. This isn't replay-safe. // It might make sense to suppress the log instead // if there's no id. if ( js.getObjectID( id ) ) { idPattern.append( id ); pattern = idPattern.obj(); } else { uassert( 10157 , "multi-update requires all modified objects to have an _id" , ! multi ); } } /* look for $inc etc. note as listed here, all fields to inc must be this type, you can't set some regular ones at the moment. */ if ( isOperatorUpdate ) { if ( multi ) { // go to next record in case this one moves c->advance(); // Update operations are deduped for cursors that implement their own // deduplication. In particular, some geo cursors are excluded. if ( autoDedup ) { if ( seenObjects.count( loc ) ) { continue; } // SERVER-5198 Advance past the document to be modified, provided // deduplication is enabled, but see SERVER-5725. while( c->ok() && loc == c->currLoc() ) { c->advance(); } } } const BSONObj& onDisk = loc.obj(); ModSet* useMods = mods.get(); auto_ptr<ModSet> mymodset; if ( details.hasElemMatchKey() && mods->hasDynamicArray() ) { useMods = mods->fixDynamicArray( details.elemMatchKey() ); mymodset.reset( useMods ); } auto_ptr<ModSetState> mss = useMods->prepare( onDisk, false /* not an insertion */ ); bool willAdvanceCursor = multi && c->ok() && ( modsIsIndexed || ! mss->canApplyInPlace() ); if ( willAdvanceCursor ) { if ( cc.get() ) { cc->setDoingDeletes( true ); } c->prepareToTouchEarlierIterate(); } // If we've made it this far, "ns" must contain a valid collection name, and so // is of the form "db.collection". Therefore, the following expression must // always be valid. "system.users" updates must never be done in place, in // order to ensure that they are validated inside DataFileMgr::updateRecord(.). bool isSystemUsersMod = (NamespaceString(ns).coll == "system.users"); BSONObj newObj; if ( !mss->isUpdateIndexed() && mss->canApplyInPlace() && !isSystemUsersMod ) { mss->applyModsInPlace( true );// const_cast<BSONObj&>(onDisk) ); DEBUGUPDATE( "\t\t\t doing in place update" ); if ( !multi ) debug.fastmod = true; if ( modsIsIndexed ) { seenObjects.insert( loc ); } newObj = loc.obj(); d->paddingFits(); } else { newObj = mss->createNewFromMods(); checkTooLarge(newObj); DiskLoc newLoc = theDataFileMgr.updateRecord(ns, d, nsdt, r, loc, newObj.objdata(), newObj.objsize(), debug); if ( newLoc != loc || modsIsIndexed ){ // log() << "Moved obj " << newLoc.obj()["_id"] << " from " << loc << " to " << newLoc << endl; // object moved, need to make sure we don' get again seenObjects.insert( newLoc ); } } if ( logop ) { DEV verify( mods->size() ); BSONObj logObj = mss->getOpLogRewrite(); DEBUGUPDATE( "\t rewrite update: " << logObj ); // It is possible that the entire mod set was a no-op over this // document. We would have an empty log record in that case. If we // call logOp, with an empty record, that would be replicated as "clear // this record", which is not what we want. Therefore, to get a no-op // in the replica, we simply don't log. if ( logObj.nFields() ) { logOp("u", ns, logObj , &pattern, 0, fromMigrate, &newObj ); } } numModded++; if ( ! multi ) return UpdateResult( 1 , 1 , numModded , BSONObj() ); if ( willAdvanceCursor ) c->recoverFromTouchingEarlierIterate(); getDur().commitIfNeeded(); continue; } uassert( 10158 , "multi update only works with $ operators" , ! multi ); BSONElementManipulator::lookForTimestamps( updateobj ); checkNoMods( updateobj ); theDataFileMgr.updateRecord(ns, d, nsdt, r, loc , updateobj.objdata(), updateobj.objsize(), debug, su); if ( logop ) { DEV wassert( !su ); // super used doesn't get logged, this would be bad. logOp("u", ns, updateobj, &pattern, 0, fromMigrate, &updateobj ); } return UpdateResult( 1 , 0 , 1 , BSONObj() ); } while ( c->ok() ); } // endif if ( numModded ) return UpdateResult( 1 , 1 , numModded , BSONObj() ); if ( upsert ) { if ( updateobj.firstElementFieldName()[0] == '$' ) { // upsert of an $operation. build a default object BSONObj newObj = mods->createNewFromQuery( patternOrig ); checkNoMods( newObj ); debug.fastmodinsert = true; theDataFileMgr.insertWithObjMod(ns, newObj, false, su); if ( logop ) logOp( "i", ns, newObj, 0, 0, fromMigrate, &newObj ); return UpdateResult( 0 , 1 , 1 , newObj ); } uassert( 10159 , "multi update only works with $ operators" , ! multi ); checkNoMods( updateobj ); debug.upsert = true; BSONObj no = updateobj; theDataFileMgr.insertWithObjMod(ns, no, false, su); if ( logop ) logOp( "i", ns, no, 0, 0, fromMigrate, &no ); return UpdateResult( 0 , 0 , 1 , no ); } return UpdateResult( 0 , isOperatorUpdate , 0 , BSONObj() ); }
void QueryPlanSet::init() { DEBUGQO( "QueryPlanSet::init " << ns << "\t" << _originalQuery ); _plans.clear(); _mayRecordPlan = true; _usingPrerecordedPlan = false; const char *ns = _frsp->ns(); NamespaceDetails *d = nsdetails( ns ); if ( !d || !_frsp->matchPossible() ) { // Table scan plan, when no matches are possible _plans.push_back( QueryPlanPtr( new QueryPlan( d, -1, *_frsp, *_originalFrsp, _originalQuery, _order ) ) ); return; } BSONElement hint = _hint.firstElement(); if ( !hint.eoo() ) { _mayRecordPlan = false; IndexDetails *id = parseHint( hint, d ); if ( id ) { addHint( *id ); } else { massert( 10366 , "natural order cannot be specified with $min/$max", _min.isEmpty() && _max.isEmpty() ); // Table scan plan _plans.push_back( QueryPlanPtr( new QueryPlan( d, -1, *_frsp, *_originalFrsp, _originalQuery, _order ) ) ); } return; } if ( !_min.isEmpty() || !_max.isEmpty() ) { string errmsg; BSONObj keyPattern; IndexDetails *idx = indexDetailsForRange( ns, errmsg, _min, _max, keyPattern ); massert( 10367 , errmsg, idx ); _plans.push_back( QueryPlanPtr( new QueryPlan( d, d->idxNo(*idx), *_frsp, *_originalFrsp, _originalQuery, _order, _min, _max ) ) ); return; } if ( isSimpleIdQuery( _originalQuery ) ) { int idx = d->findIdIndex(); if ( idx >= 0 ) { _usingPrerecordedPlan = true; _mayRecordPlan = false; _plans.push_back( QueryPlanPtr( new QueryPlan( d , idx , *_frsp , *_originalFrsp , _originalQuery, _order ) ) ); return; } } if ( _originalQuery.isEmpty() && _order.isEmpty() ) { _plans.push_back( QueryPlanPtr( new QueryPlan( d, -1, *_frsp, *_originalFrsp, _originalQuery, _order ) ) ); return; } DEBUGQO( "\t special : " << _frsp->getSpecial() ); if ( _frsp->getSpecial().size() ) { _special = _frsp->getSpecial(); NamespaceDetails::IndexIterator i = d->ii(); while( i.more() ) { int j = i.pos(); IndexDetails& ii = i.next(); const IndexSpec& spec = ii.getSpec(); if ( spec.getTypeName() == _special && spec.suitability( _originalQuery , _order ) ) { _usingPrerecordedPlan = true; _mayRecordPlan = false; _plans.push_back( QueryPlanPtr( new QueryPlan( d , j , *_frsp , *_originalFrsp , _originalQuery, _order , BSONObj() , BSONObj() , _special ) ) ); return; } } uassert( 13038 , (string)"can't find special index: " + _special + " for: " + _originalQuery.toString() , 0 ); } if ( _honorRecordedPlan ) { pair< BSONObj, long long > best = QueryUtilIndexed::bestIndexForPatterns( *_frsp, _order ); BSONObj bestIndex = best.first; long long oldNScanned = best.second; if ( !bestIndex.isEmpty() ) { QueryPlanPtr p; _oldNScanned = oldNScanned; if ( !strcmp( bestIndex.firstElement().fieldName(), "$natural" ) ) { // Table scan plan p.reset( new QueryPlan( d, -1, *_frsp, *_originalFrsp, _originalQuery, _order ) ); } NamespaceDetails::IndexIterator i = d->ii(); while( i.more() ) { int j = i.pos(); IndexDetails& ii = i.next(); if( ii.keyPattern().woCompare(bestIndex) == 0 ) { p.reset( new QueryPlan( d, j, *_frsp, *_originalFrsp, _originalQuery, _order ) ); } } massert( 10368 , "Unable to locate previously recorded index", p.get() ); if ( !( _bestGuessOnly && p->scanAndOrderRequired() ) ) { _usingPrerecordedPlan = true; _mayRecordPlan = false; _plans.push_back( p ); return; } } } addOtherPlans( false ); }
// static void QueryPlanner::plan(const CanonicalQuery& query, const QueryPlannerParams& params, vector<QuerySolution*>* out) { QLOG() << "=============================\n" << "Beginning planning, options = " << optionString(params.options) << endl << "Canonical query:\n" << query.toString() << endl << "=============================" << endl; // The shortcut formerly known as IDHACK. See if it's a simple _id query. If so we might // just make an ixscan over the _id index and bypass the rest of planning entirely. if (!query.getParsed().isExplain() && !query.getParsed().showDiskLoc() && isSimpleIdQuery(query.getParsed().getFilter()) && !query.getParsed().hasOption(QueryOption_CursorTailable)) { // See if we can find an _id index. for (size_t i = 0; i < params.indices.size(); ++i) { if (isIdIndex(params.indices[i].keyPattern)) { const IndexEntry& index = params.indices[i]; QLOG() << "IDHACK using index " << index.toString() << endl; // If so, we make a simple scan to find the doc. IndexScanNode* isn = new IndexScanNode(); isn->indexKeyPattern = index.keyPattern; isn->indexIsMultiKey = index.multikey; isn->direction = 1; isn->bounds.isSimpleRange = true; BSONObj key = getKeyFromQuery(index.keyPattern, query.getParsed().getFilter()); isn->bounds.startKey = isn->bounds.endKey = key; isn->bounds.endKeyInclusive = true; isn->computeProperties(); QuerySolution* soln = QueryPlannerAnalysis::analyzeDataAccess(query, params, isn); if (NULL != soln) { out->push_back(soln); QLOG() << "IDHACK solution is:\n" << (*out)[0]->toString() << endl; // And that's it. return; } } } } for (size_t i = 0; i < params.indices.size(); ++i) { QLOG() << "idx " << i << " is " << params.indices[i].toString() << endl; } bool canTableScan = !(params.options & QueryPlannerParams::NO_TABLE_SCAN); // If the query requests a tailable cursor, the only solution is a collscan + filter with // tailable set on the collscan. TODO: This is a policy departure. Previously I think you // could ask for a tailable cursor and it just tried to give you one. Now, we fail if we // can't provide one. Is this what we want? if (query.getParsed().hasOption(QueryOption_CursorTailable)) { if (!QueryPlannerCommon::hasNode(query.root(), MatchExpression::GEO_NEAR) && canTableScan) { QuerySolution* soln = buildCollscanSoln(query, true, params); if (NULL != soln) { out->push_back(soln); } } return; } // The hint can be $natural: 1. If this happens, output a collscan. It's a weird way of // saying "table scan for two, please." if (!query.getParsed().getHint().isEmpty()) { BSONElement natural = query.getParsed().getHint().getFieldDotted("$natural"); if (!natural.eoo()) { QLOG() << "forcing a table scan due to hinted $natural\n"; if (canTableScan) { QuerySolution* soln = buildCollscanSoln(query, false, params); if (NULL != soln) { out->push_back(soln); } } return; } } // NOR and NOT we can't handle well with indices. If we see them here, they weren't // rewritten to remove the negation. Just output a collscan for those. if (QueryPlannerCommon::hasNode(query.root(), MatchExpression::NOT) || QueryPlannerCommon::hasNode(query.root(), MatchExpression::NOR)) { // If there's a near predicate, we can't handle this. // TODO: Should canonicalized query detect this? if (QueryPlannerCommon::hasNode(query.root(), MatchExpression::GEO_NEAR)) { warning() << "Can't handle NOT/NOR with GEO_NEAR"; return; } QLOG() << "NOT/NOR in plan, just outtping a collscan\n"; if (canTableScan) { QuerySolution* soln = buildCollscanSoln(query, false, params); if (NULL != soln) { out->push_back(soln); } } return; } // Figure out what fields we care about. unordered_set<string> fields; QueryPlannerIXSelect::getFields(query.root(), "", &fields); for (unordered_set<string>::const_iterator it = fields.begin(); it != fields.end(); ++it) { QLOG() << "predicate over field " << *it << endl; } // Filter our indices so we only look at indices that are over our predicates. vector<IndexEntry> relevantIndices; // Hints require us to only consider the hinted index. BSONObj hintIndex = query.getParsed().getHint(); // Snapshot is a form of a hint. If snapshot is set, try to use _id index to make a real // plan. If that fails, just scan the _id index. if (query.getParsed().isSnapshot()) { // Find the ID index in indexKeyPatterns. It's our hint. for (size_t i = 0; i < params.indices.size(); ++i) { if (isIdIndex(params.indices[i].keyPattern)) { hintIndex = params.indices[i].keyPattern; break; } } } size_t hintIndexNumber = numeric_limits<size_t>::max(); if (!hintIndex.isEmpty()) { // Sigh. If the hint is specified it might be using the index name. BSONElement firstHintElt = hintIndex.firstElement(); if (str::equals("$hint", firstHintElt.fieldName()) && String == firstHintElt.type()) { string hintName = firstHintElt.String(); for (size_t i = 0; i < params.indices.size(); ++i) { if (params.indices[i].name == hintName) { QLOG() << "hint by name specified, restricting indices to " << params.indices[i].keyPattern.toString() << endl; relevantIndices.clear(); relevantIndices.push_back(params.indices[i]); hintIndexNumber = i; hintIndex = params.indices[i].keyPattern; break; } } } else { for (size_t i = 0; i < params.indices.size(); ++i) { if (0 == params.indices[i].keyPattern.woCompare(hintIndex)) { relevantIndices.clear(); relevantIndices.push_back(params.indices[i]); QLOG() << "hint specified, restricting indices to " << hintIndex.toString() << endl; hintIndexNumber = i; break; } } } if (hintIndexNumber == numeric_limits<size_t>::max()) { // This is supposed to be an error. warning() << "Can't find hint for " << hintIndex.toString(); return; } } else { QLOG() << "Finding relevant indices\n"; QueryPlannerIXSelect::findRelevantIndices(fields, params.indices, &relevantIndices); } for (size_t i = 0; i < relevantIndices.size(); ++i) { QLOG() << "relevant idx " << i << " is " << relevantIndices[i].toString() << endl; } // Figure out how useful each index is to each predicate. // query.root() is now annotated with RelevantTag(s). QueryPlannerIXSelect::rateIndices(query.root(), "", relevantIndices); QLOG() << "rated tree" << endl; QLOG() << query.root()->toString() << endl; // If there is a GEO_NEAR it must have an index it can use directly. // XXX: move into data access? MatchExpression* gnNode = NULL; if (QueryPlannerCommon::hasNode(query.root(), MatchExpression::GEO_NEAR, &gnNode)) { // No index for GEO_NEAR? No query. RelevantTag* tag = static_cast<RelevantTag*>(gnNode->getTag()); if (0 == tag->first.size() && 0 == tag->notFirst.size()) { return; } GeoNearMatchExpression* gnme = static_cast<GeoNearMatchExpression*>(gnNode); vector<size_t> newFirst; // 2d + GEO_NEAR is annoying. Because 2d's GEO_NEAR isn't streaming we have to embed // the full query tree inside it as a matcher. for (size_t i = 0; i < tag->first.size(); ++i) { // GEO_NEAR has a non-2d index it can use. We can deal w/that in normal planning. if (!is2DIndex(relevantIndices[tag->first[i]].keyPattern)) { newFirst.push_back(i); continue; } // If we're here, GEO_NEAR has a 2d index. We create a 2dgeonear plan with the // entire tree as a filter, if possible. GeoNear2DNode* solnRoot = new GeoNear2DNode(); solnRoot->nq = gnme->getData(); if (MatchExpression::GEO_NEAR != query.root()->matchType()) { // root is an AND, clone and delete the GEO_NEAR child. MatchExpression* filterTree = query.root()->shallowClone(); verify(MatchExpression::AND == filterTree->matchType()); bool foundChild = false; for (size_t i = 0; i < filterTree->numChildren(); ++i) { if (MatchExpression::GEO_NEAR == filterTree->getChild(i)->matchType()) { foundChild = true; filterTree->getChildVector()->erase(filterTree->getChildVector()->begin() + i); break; } } verify(foundChild); solnRoot->filter.reset(filterTree); } solnRoot->numWanted = query.getParsed().getNumToReturn(); if (0 == solnRoot->numWanted) { solnRoot->numWanted = 100; } solnRoot->indexKeyPattern = relevantIndices[tag->first[i]].keyPattern; // Remove the 2d index. 2d can only be the first field, and we know there is // only one GEO_NEAR, so we don't care if anyone else was assigned it; it'll // only be first for gnNode. tag->first.erase(tag->first.begin() + i); QuerySolution* soln = QueryPlannerAnalysis::analyzeDataAccess(query, params, solnRoot); if (NULL != soln) { out->push_back(soln); } } // Continue planning w/non-2d indices tagged for this pred. tag->first.swap(newFirst); if (0 == tag->first.size() && 0 == tag->notFirst.size()) { return; } } // Likewise, if there is a TEXT it must have an index it can use directly. MatchExpression* textNode; if (QueryPlannerCommon::hasNode(query.root(), MatchExpression::TEXT, &textNode)) { RelevantTag* tag = static_cast<RelevantTag*>(textNode->getTag()); if (0 == tag->first.size() && 0 == tag->notFirst.size()) { return; } } // If we have any relevant indices, we try to create indexed plans. if (0 < relevantIndices.size()) { // The enumerator spits out trees tagged with IndexTag(s). PlanEnumerator isp(query.root(), &relevantIndices); isp.init(); MatchExpression* rawTree; while (isp.getNext(&rawTree)) { QLOG() << "about to build solntree from tagged tree:\n" << rawTree->toString() << endl; // This can fail if enumeration makes a mistake. QuerySolutionNode* solnRoot = QueryPlannerAccess::buildIndexedDataAccess(query, rawTree, false, relevantIndices); if (NULL == solnRoot) { continue; } QuerySolution* soln = QueryPlannerAnalysis::analyzeDataAccess(query, params, solnRoot); if (NULL != soln) { QLOG() << "Planner: adding solution:\n" << soln->toString() << endl; out->push_back(soln); } } } QLOG() << "Planner: outputted " << out->size() << " indexed solutions.\n"; // An index was hinted. If there are any solutions, they use the hinted index. If not, we // scan the entire index to provide results and output that as our plan. This is the // desired behavior when an index is hinted that is not relevant to the query. if (!hintIndex.isEmpty() && (0 == out->size())) { QuerySolution* soln = buildWholeIXSoln(params.indices[hintIndexNumber], query, params); if (NULL != soln) { QLOG() << "Planner: outputting soln that uses hinted index as scan." << endl; out->push_back(soln); } return; } // If a sort order is requested, there may be an index that provides it, even if that // index is not over any predicates in the query. // // XXX XXX: Can we do this even if the index is sparse? Might we miss things? if (!query.getParsed().getSort().isEmpty() && !QueryPlannerCommon::hasNode(query.root(), MatchExpression::GEO_NEAR) && !QueryPlannerCommon::hasNode(query.root(), MatchExpression::TEXT)) { // See if we have a sort provided from an index already. bool usingIndexToSort = false; for (size_t i = 0; i < out->size(); ++i) { QuerySolution* soln = (*out)[i]; if (!soln->hasSortStage) { usingIndexToSort = true; break; } } if (!usingIndexToSort) { for (size_t i = 0; i < params.indices.size(); ++i) { const BSONObj& kp = params.indices[i].keyPattern; if (providesSort(query, kp)) { QLOG() << "Planner: outputting soln that uses index to provide sort." << endl; QuerySolution* soln = buildWholeIXSoln(params.indices[i], query, params); if (NULL != soln) { out->push_back(soln); break; } } if (providesSort(query, QueryPlannerCommon::reverseSortObj(kp))) { QLOG() << "Planner: outputting soln that uses (reverse) index " << "to provide sort." << endl; QuerySolution* soln = buildWholeIXSoln(params.indices[i], query, params, -1); if (NULL != soln) { out->push_back(soln); break; } } } } } // TODO: Do we always want to offer a collscan solution? // XXX: currently disabling the always-use-a-collscan in order to find more planner bugs. if ( !QueryPlannerCommon::hasNode(query.root(), MatchExpression::GEO_NEAR) && !QueryPlannerCommon::hasNode(query.root(), MatchExpression::TEXT) && ((params.options & QueryPlannerParams::INCLUDE_COLLSCAN) || (0 == out->size() && canTableScan))) { QuerySolution* collscan = buildCollscanSoln(query, false, params); if (NULL != collscan) { out->push_back(collscan); QLOG() << "Planner: outputting a collscan:\n"; QLOG() << collscan->toString() << endl; } } }
bool runNoDirectClient( const string& ns , const BSONObj& queryOriginal , const BSONObj& fields , const BSONObj& update , bool upsert , bool returnNew , bool remove , BSONObjBuilder& result ) { Lock::DBWrite lk( ns ); Client::Context cx( ns ); BSONObj doc; bool found = Helpers::findOne( ns.c_str() , queryOriginal , doc ); BSONObj queryModified = queryOriginal; if ( found && doc["_id"].type() && ! isSimpleIdQuery( queryOriginal ) ) { // we're going to re-write the query to be more efficient // we have to be a little careful because of positional operators // maybe we can pass this all through eventually, but right now isn't an easy way BSONObjBuilder b( queryOriginal.objsize() + 10 ); b.append( doc["_id"] ); bool addedAtomic = false; BSONObjIterator i( queryOriginal ); while ( i.more() ) { const BSONElement& elem = i.next(); if ( str::equals( "_id" , elem.fieldName() ) ) { // we already do _id continue; } if ( ! str::contains( elem.fieldName() , '.' ) ) { // if there is a dotted field, accept we may need more query parts continue; } if ( ! addedAtomic ) { b.appendBool( "$atomic" , true ); addedAtomic = true; } b.append( elem ); } queryModified = b.obj(); } if ( remove ) { _appendHelper( result , doc , found , fields ); if ( found ) { deleteObjects( ns.c_str() , queryModified , true , true ); BSONObjBuilder le( result.subobjStart( "lastErrorObject" ) ); le.appendNumber( "n" , 1 ); le.done(); } } else { // update if ( ! found && ! upsert ) { // didn't have it, and am not upserting _appendHelper( result , doc , found , fields ); } else { // we found it or we're updating if ( ! returnNew ) { _appendHelper( result , doc , found , fields ); } UpdateResult res = updateObjects( ns.c_str() , update , queryModified , upsert , false , true , cc().curop()->debug() ); if ( returnNew ) { if ( ! res.existing && res.upserted.isSet() ) { queryModified = BSON( "_id" << res.upserted ); } log() << "queryModified: " << queryModified << endl; verify( Helpers::findOne( ns.c_str() , queryModified , doc ) ); _appendHelper( result , doc , true , fields ); } BSONObjBuilder le( result.subobjStart( "lastErrorObject" ) ); le.appendBool( "updatedExisting" , res.existing ); le.appendNumber( "n" , res.num ); if ( res.upserted.isSet() ) le.append( "upserted" , res.upserted ); le.done(); } } return true; }
static bool canUseIDHack(const CanonicalQuery& query) { return !query.getParsed().isExplain() && !query.getParsed().showDiskLoc() && isSimpleIdQuery(query.getParsed().getFilter()) && !query.getParsed().hasOption(QueryOption_CursorTailable); }
/** * Run a query -- includes checking for and running a Command. * @return points to ns if exhaust mode. 0=normal mode * @locks the db mutex for reading (and potentially for writing temporarily to create a new db). * @yields the db mutex periodically after acquiring it. * @asserts on scan and order memory exhaustion and other cases. */ const char *runQuery(Message& m, QueryMessage& q, CurOp& curop, Message &result) { shared_ptr<ParsedQuery> pq_shared( new ParsedQuery(q) ); ParsedQuery& pq( *pq_shared ); BSONObj jsobj = q.query; int queryOptions = q.queryOptions; const char *ns = q.ns; if( logLevel >= 2 ) log() << "runQuery called " << ns << " " << jsobj << endl; curop.debug().ns = ns; curop.debug().ntoreturn = pq.getNumToReturn(); curop.debug().query = jsobj; curop.setQuery(jsobj); // Run a command. if ( pq.couldBeCommand() ) { BufBuilder bb; bb.skip(sizeof(QueryResult)); BSONObjBuilder cmdResBuf; if ( runCommands(ns, jsobj, curop, bb, cmdResBuf, false, queryOptions) ) { curop.debug().iscommand = true; curop.debug().query = jsobj; curop.markCommand(); auto_ptr< QueryResult > qr; qr.reset( (QueryResult *) bb.buf() ); bb.decouple(); qr->setResultFlagsToOk(); qr->len = bb.len(); curop.debug().responseLength = bb.len(); qr->setOperation(opReply); qr->cursorId = 0; qr->startingFrom = 0; qr->nReturned = 1; result.setData( qr.release(), true ); } else { uasserted(13530, "bad or malformed command request?"); } return 0; } bool explain = pq.isExplain(); BSONObj order = pq.getOrder(); BSONObj query = pq.getFilter(); /* The ElemIter will not be happy if this isn't really an object. So throw exception here when that is true. (Which may indicate bad data from client.) */ if ( query.objsize() == 0 ) { out() << "Bad query object?\n jsobj:"; out() << jsobj.toString() << "\n query:"; out() << query.toString() << endl; uassert( 10110 , "bad query object", false); } Client::ReadContext ctx( ns , dbpath ); // read locks const ConfigVersion shardingVersionAtStart = shardingState.getVersion( ns ); replVerifyReadsOk(&pq); if ( pq.hasOption( QueryOption_CursorTailable ) ) { NamespaceDetails *d = nsdetails( ns ); uassert( 13051, "tailable cursor requested on non capped collection", d && d->isCapped() ); const BSONObj nat1 = BSON( "$natural" << 1 ); if ( order.isEmpty() ) { order = nat1; } else { uassert( 13052, "only {$natural:1} order allowed for tailable cursor", order == nat1 ); } } // Run a simple id query. if ( ! (explain || pq.showDiskLoc()) && isSimpleIdQuery( query ) && !pq.hasOption( QueryOption_CursorTailable ) ) { int n = 0; bool nsFound = false; bool indexFound = false; BSONObj resObject; Client& c = cc(); bool found = Helpers::findById( c, ns , query , resObject , &nsFound , &indexFound ); if ( nsFound == false || indexFound == true ) { if ( shardingState.needShardChunkManager( ns ) ) { ShardChunkManagerPtr m = shardingState.getShardChunkManager( ns ); if ( m && ! m->belongsToMe( resObject ) ) { // I have something this _id // but it doesn't belong to me // so return nothing resObject = BSONObj(); found = false; } } BufBuilder bb(sizeof(QueryResult)+resObject.objsize()+32); bb.skip(sizeof(QueryResult)); curop.debug().idhack = true; if ( found ) { n = 1; fillQueryResultFromObj( bb , pq.getFields() , resObject ); } auto_ptr< QueryResult > qr; qr.reset( (QueryResult *) bb.buf() ); bb.decouple(); qr->setResultFlagsToOk(); qr->len = bb.len(); curop.debug().responseLength = bb.len(); qr->setOperation(opReply); qr->cursorId = 0; qr->startingFrom = 0; qr->nReturned = n; result.setData( qr.release(), true ); return NULL; } } // Run a regular query. BSONObj oldPlan; if ( explain && ! pq.hasIndexSpecifier() ) { MultiPlanScanner mps( ns, query, order ); if ( mps.usingCachedPlan() ) { oldPlan = mps.oldExplain().firstElement().embeddedObject() .firstElement().embeddedObject().getOwned(); } } // In some cases the query may be retried if there is an in memory sort size assertion. for( int retry = 0; retry < 2; ++retry ) { try { return queryWithQueryOptimizer( m, queryOptions, ns, jsobj, curop, query, order, pq_shared, oldPlan, shardingVersionAtStart, result ); } catch ( const QueryRetryException & ) { verify( retry == 0 ); } } verify( false ); return 0; }