/** * Run a query -- includes checking for and running a Command. * @return points to ns if exhaust mode. 0=normal mode * @locks the db mutex for reading (and potentially for writing temporarily to create a new db). * @asserts on scan and order memory exhaustion and other cases. */ string runQuery(Message& m, QueryMessage& q, CurOp& curop, Message &result) { shared_ptr<ParsedQuery> pq_shared( new ParsedQuery(q) ); ParsedQuery& pq( *pq_shared ); BSONObj jsobj = q.query; int queryOptions = q.queryOptions; const char *ns = q.ns; uassert( 16332 , "can't have an empty ns" , ns[0] ); if( logLevel >= 2 ) log() << "runQuery called " << ns << " " << jsobj << endl; curop.debug().ns = ns; curop.debug().ntoreturn = pq.getNumToReturn(); curop.debug().query = jsobj; curop.setQuery(jsobj); uassert( 16256, str::stream() << "Invalid ns [" << ns << "]", NamespaceString::isValid(ns) ); // Run a command. if ( pq.couldBeCommand() ) { curop.markCommand(); BufBuilder bb; bb.skip(sizeof(QueryResult)); BSONObjBuilder cmdResBuf; if ( runCommands(ns, jsobj, curop, bb, cmdResBuf, false, queryOptions) ) { curop.debug().iscommand = true; curop.debug().query = jsobj; auto_ptr< QueryResult > qr; qr.reset( (QueryResult *) bb.buf() ); bb.decouple(); qr->setResultFlagsToOk(); qr->len = bb.len(); curop.debug().responseLength = bb.len(); qr->setOperation(opReply); qr->cursorId = 0; qr->startingFrom = 0; qr->nReturned = 1; result.setData( qr.release(), true ); } else { uasserted(13530, "bad or malformed command request?"); } return ""; } const bool explain = pq.isExplain(); const bool tailable = pq.hasOption(QueryOption_CursorTailable); BSONObj order = pq.getOrder(); BSONObj query = pq.getFilter(); /* The ElemIter will not be happy if this isn't really an object. So throw exception here when that is true. (Which may indicate bad data from client.) */ if ( query.objsize() == 0 ) { out() << "Bad query object?\n jsobj:"; out() << jsobj.toString() << "\n query:"; out() << query.toString() << endl; uassert( 10110 , "bad query object", false); } // Tailable cursors need to read newly written entries from the tail // of the collection. They manually arbitrate with the collection over // what data is readable and when, so we choose read uncommited isolation. OpSettings settings; settings.setQueryCursorMode(DEFAULT_LOCK_CURSOR); settings.setBulkFetch(true); settings.setCappedAppendPK(pq.hasOption(QueryOption_AddHiddenPK)); cc().setOpSettings(settings); // If our caller has a transaction, it's multi-statement. const bool inMultiStatementTxn = cc().hasTxn(); if (tailable) { // Because it's easier to disable this. It shouldn't be happening in a normal system. uassert(16812, "May not perform a tailable query in a multi-statement transaction.", !inMultiStatementTxn); } // Begin a read-only, snapshot transaction under normal circumstances. // If the cursor is tailable, we need to be able to read uncommitted data. const int txnFlags = (tailable ? DB_READ_UNCOMMITTED : DB_TXN_SNAPSHOT) | DB_TXN_READ_ONLY; LOCK_REASON(lockReason, "query"); Client::ReadContext ctx(ns, lockReason); scoped_ptr<Client::Transaction> transaction(!inMultiStatementTxn ? new Client::Transaction(txnFlags) : NULL); bool hasRetried = false; while ( 1 ) { try { replVerifyReadsOk(&pq); // Fast-path for primary key queries. if (!explain && !tailable) { replVerifyReadsOk(&pq); if (_tryQueryByPKHack(ns, query, pq, curop, result)) { if (transaction) { transaction->commit(); } return ""; } } // sanity check the query and projection if (pq.getFields() != NULL) { pq.getFields()->validateQuery( query ); } if (tailable) { Collection *cl = getCollection( ns ); if (cl != NULL && !(cl->isCapped() || str::equals(ns, rsoplog))) { uasserted( 13051, "tailable cursor requested on non-capped, non-oplog collection" ); } const BSONObj nat1 = BSON( "$natural" << 1 ); if ( order.isEmpty() ) { order = nat1; } else { uassert( 13052, "only {$natural:1} order allowed for tailable cursor", order == nat1 ); } } // Run a regular query. // these now may stored in a ClientCursor or somewhere else, // so make sure we use a real copy jsobj = jsobj.getOwned(); query = query.getOwned(); order = order.getOwned(); const ConfigVersion shardingVersionAtStart = shardingState.getVersion( ns ); const bool getCachedExplainPlan = ! hasRetried && explain && ! pq.hasIndexSpecifier(); const bool savedCursor = queryWithQueryOptimizer( queryOptions, ns, jsobj, curop, query, order, pq_shared, shardingVersionAtStart, getCachedExplainPlan, inMultiStatementTxn, result ); // Did not save the cursor, so we can commit the transaction now if it exists. if (transaction && !savedCursor) { transaction->commit(); } return curop.debug().exhaust ? ns : ""; } catch ( const QueryRetryException & ) { // In some cases the query may be retried if there is an in memory sort size assertion. verify( ! hasRetried ); hasRetried = true; } } }
/** * Run a query -- includes checking for and running a Command. * @return points to ns if exhaust mode. 0=normal mode * @locks the db mutex for reading (and potentially for writing temporarily to create a new db). * @yields the db mutex periodically after acquiring it. * @asserts on scan and order memory exhaustion and other cases. */ const char *runQuery(Message& m, QueryMessage& q, CurOp& curop, Message &result) { shared_ptr<ParsedQuery> pq_shared( new ParsedQuery(q) ); ParsedQuery& pq( *pq_shared ); BSONObj jsobj = q.query; int queryOptions = q.queryOptions; const char *ns = q.ns; if( logLevel >= 2 ) log() << "runQuery called " << ns << " " << jsobj << endl; curop.debug().ns = ns; curop.debug().ntoreturn = pq.getNumToReturn(); curop.debug().query = jsobj; curop.setQuery(jsobj); // Run a command. if ( pq.couldBeCommand() ) { BufBuilder bb; bb.skip(sizeof(QueryResult)); BSONObjBuilder cmdResBuf; if ( runCommands(ns, jsobj, curop, bb, cmdResBuf, false, queryOptions) ) { curop.debug().iscommand = true; curop.debug().query = jsobj; curop.markCommand(); auto_ptr< QueryResult > qr; qr.reset( (QueryResult *) bb.buf() ); bb.decouple(); qr->setResultFlagsToOk(); qr->len = bb.len(); curop.debug().responseLength = bb.len(); qr->setOperation(opReply); qr->cursorId = 0; qr->startingFrom = 0; qr->nReturned = 1; result.setData( qr.release(), true ); } else { uasserted(13530, "bad or malformed command request?"); } return 0; } bool explain = pq.isExplain(); BSONObj order = pq.getOrder(); BSONObj query = pq.getFilter(); /* The ElemIter will not be happy if this isn't really an object. So throw exception here when that is true. (Which may indicate bad data from client.) */ if ( query.objsize() == 0 ) { out() << "Bad query object?\n jsobj:"; out() << jsobj.toString() << "\n query:"; out() << query.toString() << endl; uassert( 10110 , "bad query object", false); } Client::ReadContext ctx( ns , dbpath ); // read locks const ConfigVersion shardingVersionAtStart = shardingState.getVersion( ns ); replVerifyReadsOk(&pq); if ( pq.hasOption( QueryOption_CursorTailable ) ) { NamespaceDetails *d = nsdetails( ns ); uassert( 13051, "tailable cursor requested on non capped collection", d && d->isCapped() ); const BSONObj nat1 = BSON( "$natural" << 1 ); if ( order.isEmpty() ) { order = nat1; } else { uassert( 13052, "only {$natural:1} order allowed for tailable cursor", order == nat1 ); } } // Run a simple id query. if ( ! (explain || pq.showDiskLoc()) && isSimpleIdQuery( query ) && !pq.hasOption( QueryOption_CursorTailable ) ) { int n = 0; bool nsFound = false; bool indexFound = false; BSONObj resObject; Client& c = cc(); bool found = Helpers::findById( c, ns , query , resObject , &nsFound , &indexFound ); if ( nsFound == false || indexFound == true ) { if ( shardingState.needShardChunkManager( ns ) ) { ShardChunkManagerPtr m = shardingState.getShardChunkManager( ns ); if ( m && ! m->belongsToMe( resObject ) ) { // I have something this _id // but it doesn't belong to me // so return nothing resObject = BSONObj(); found = false; } } BufBuilder bb(sizeof(QueryResult)+resObject.objsize()+32); bb.skip(sizeof(QueryResult)); curop.debug().idhack = true; if ( found ) { n = 1; fillQueryResultFromObj( bb , pq.getFields() , resObject ); } auto_ptr< QueryResult > qr; qr.reset( (QueryResult *) bb.buf() ); bb.decouple(); qr->setResultFlagsToOk(); qr->len = bb.len(); curop.debug().responseLength = bb.len(); qr->setOperation(opReply); qr->cursorId = 0; qr->startingFrom = 0; qr->nReturned = n; result.setData( qr.release(), true ); return NULL; } } // Run a regular query. BSONObj oldPlan; if ( explain && ! pq.hasIndexSpecifier() ) { MultiPlanScanner mps( ns, query, order ); if ( mps.usingCachedPlan() ) { oldPlan = mps.oldExplain().firstElement().embeddedObject() .firstElement().embeddedObject().getOwned(); } } // In some cases the query may be retried if there is an in memory sort size assertion. for( int retry = 0; retry < 2; ++retry ) { try { return queryWithQueryOptimizer( m, queryOptions, ns, jsobj, curop, query, order, pq_shared, oldPlan, shardingVersionAtStart, result ); } catch ( const QueryRetryException & ) { verify( retry == 0 ); } } verify( false ); return 0; }