bool _tryQueryByPKHack(const char *ns, const BSONObj &query, const ParsedQuery &pq, CurOp &curop, Message &result) { BSONObj resObject; bool found = false; Collection *cl = getCollection(ns); if (cl == NULL) { return false; // ns doesn't exist, fall through to optimizer for legacy reasons } const BSONObj &pk = cl->getSimplePKFromQuery(query); if (pk.isEmpty()) { return false; // unable to query by PK - resort to using the optimizer } found = queryByPKHack(cl, pk, query, resObject); if ( shardingState.needShardChunkManager( ns ) ) { ShardChunkManagerPtr m = shardingState.getShardChunkManager( ns ); if ( m && ! m->belongsToMe( resObject ) ) { // I have something for this _id // but it doesn't belong to me // so return nothing resObject = BSONObj(); found = false; } } BufBuilder bb(sizeof(QueryResult)+resObject.objsize()+32); bb.skip(sizeof(QueryResult)); if ( found ) { fillQueryResultFromObj( bb , pq.getFields() , resObject ); } auto_ptr< QueryResult > qr( (QueryResult *) bb.buf() ); bb.decouple(); qr->setResultFlagsToOk(); qr->len = bb.len(); curop.debug().responseLength = bb.len(); qr->setOperation(opReply); qr->cursorId = 0; qr->startingFrom = 0; qr->nReturned = found ? 1 : 0; result.setData( qr.release(), true ); return true; }
QueryResult* processGetMore(const char *ns, int ntoreturn, long long cursorid , CurOp& curop, int pass, bool& exhaust ) { exhaust = false; ClientCursor::Pointer p(cursorid); ClientCursor *cc = p.c(); int bufSize = 512 + sizeof( QueryResult ) + MaxBytesToReturnToClientAtOnce; BufBuilder b( bufSize ); b.skip(sizeof(QueryResult)); int resultFlags = ResultFlag_AwaitCapable; int start = 0; int n = 0; if ( unlikely(!cc) ) { LOGSOME << "getMore: cursorid not found " << ns << " " << cursorid << endl; cursorid = 0; resultFlags = ResultFlag_CursorNotFound; } else { // check for spoofing of the ns such that it does not match the one originally there for the cursor uassert(14833, "auth error", str::equals(ns, cc->ns().c_str())); if ( pass == 0 ) cc->updateSlaveLocation( curop ); int queryOptions = cc->queryOptions(); curop.debug().query = cc->query(); start = cc->pos(); Cursor *c = cc->c(); c->recoverFromYield(); DiskLoc last; scoped_ptr<Projection::KeyOnly> keyFieldsOnly; if ( cc->modifiedKeys() == false && cc->isMultiKey() == false && cc->fields ) keyFieldsOnly.reset( cc->fields->checkKey( cc->indexKeyPattern() ) ); // This manager may be stale, but it's the state of chunking when the cursor was created. ShardChunkManagerPtr manager = cc->getChunkManager(); while ( 1 ) { if ( !c->ok() ) { if ( c->tailable() ) { /* when a tailable cursor hits "EOF", ok() goes false, and current() is null. however advance() can still be retries as a reactivation attempt. when there is new data, it will return true. that's what we are doing here. */ if ( c->advance() ) continue; if( n == 0 && (queryOptions & QueryOption_AwaitData) && pass < 1000 ) { return 0; } break; } p.release(); bool ok = ClientCursor::erase(cursorid); verify(ok); cursorid = 0; cc = 0; break; } // in some cases (clone collection) there won't be a matcher if ( !c->currentMatches() ) { } else if ( manager && ! manager->belongsToMe( cc ) ){ LOG(2) << "cursor skipping document in un-owned chunk: " << c->current() << endl; } else { if( c->getsetdup(c->currLoc()) ) { //out() << " but it's a dup \n"; } else { last = c->currLoc(); n++; if ( keyFieldsOnly ) { fillQueryResultFromObj(b, 0, keyFieldsOnly->hydrate( c->currKey() ) ); } else { BSONObj js = c->current(); // show disk loc should be part of the main query, not in an $or clause, so this should be ok fillQueryResultFromObj(b, cc->fields.get(), js, ( cc->pq.get() && cc->pq->showDiskLoc() ? &last : 0)); } if ( ( ntoreturn && n >= ntoreturn ) || b.len() > MaxBytesToReturnToClientAtOnce ) { c->advance(); cc->incPos( n ); break; } } } c->advance(); if ( ! cc->yieldSometimes( ClientCursor::MaybeCovered ) ) { ClientCursor::erase(cursorid); cursorid = 0; cc = 0; p.deleted(); break; } } if ( cc ) { if ( c->supportYields() ) { ClientCursor::YieldData data; verify( cc->prepareToYield( data ) ); } else { cc->c()->noteLocation(); } cc->mayUpgradeStorage(); cc->storeOpForSlave( last ); exhaust = cc->queryOptions() & QueryOption_Exhaust; } } QueryResult *qr = (QueryResult *) b.buf(); qr->len = b.len(); qr->setOperation(opReply); qr->_resultFlags() = resultFlags; qr->cursorId = cursorid; qr->startingFrom = start; qr->nReturned = n; b.decouple(); return qr; }
QueryResult* processGetMore(const char* ns, int ntoreturn, long long cursorid, CurOp& curop, int pass, bool& exhaust, bool* isCursorAuthorized ) { exhaust = false; ClientCursor::Pin p(cursorid); ClientCursor *client_cursor = p.c(); int bufSize = 512 + sizeof( QueryResult ) + MaxBytesToReturnToClientAtOnce; BufBuilder b( bufSize ); b.skip(sizeof(QueryResult)); int resultFlags = ResultFlag_AwaitCapable; int start = 0; int n = 0; if ( unlikely(!client_cursor) ) { LOGSOME << "getMore: cursorid not found " << ns << " " << cursorid << endl; cursorid = 0; resultFlags = ResultFlag_CursorNotFound; } else { // check for spoofing of the ns such that it does not match the one originally there for the cursor uassert(14833, "auth error", str::equals(ns, client_cursor->ns().c_str())); uassert(16784, "oplog cursor reading data that is too old", !client_cursor->lastOpForSlaveTooOld()); int queryOptions = client_cursor->queryOptions(); OpSettings settings; settings.setBulkFetch(true); settings.setQueryCursorMode(DEFAULT_LOCK_CURSOR); settings.setCappedAppendPK(queryOptions & QueryOption_AddHiddenPK); cc().setOpSettings(settings); // Check if the cursor is part of a multi-statement transaction. If it is // and this is not the right client (meaning the current transaction stack // does not match that in the cursor), it will uassert. If the cursor is // not part of a multi-statement transaction, then we need to use the stack // in the cursor for this scope. const bool cursorPartOfMultiStatementTxn = client_cursor->checkMultiStatementTxn(); scoped_ptr<Client::WithTxnStack> wts; if (!cursorPartOfMultiStatementTxn) { // For simplicity, prevent multi-statement transactions from // reading cursors it didn't create. uassert(16813, "Cannot getMore() on a cursor not created by this multi-statement transaction", !cc().hasTxn()); wts.reset(new Client::WithTxnStack(client_cursor->transactions)); } *isCursorAuthorized = true; if (pass == 0) { client_cursor->updateSlaveLocation( curop ); } curop.debug().query = client_cursor->query(); start = client_cursor->pos(); Cursor *c = client_cursor->c(); // This manager may be stale, but it's the state of chunking when the cursor was created. ShardChunkManagerPtr manager = client_cursor->getChunkManager(); while ( 1 ) { if ( !c->ok() ) { if ( c->tailable() ) { /* when a tailable cursor hits "EOF", ok() goes false, and current() is null. however advance() can still be retries as a reactivation attempt. when there is new data, it will return true. that's what we are doing here. */ if ( c->advance() ) continue; if( n == 0 && (queryOptions & QueryOption_AwaitData) && pass < 1000 ) { return 0; } break; } p.release(); // Done with this cursor, steal transaction stack back to commit or abort it here. bool ok = ClientCursor::erase(cursorid); verify(ok); cursorid = 0; client_cursor = 0; break; } MatchDetails details; if ( client_cursor->fields && client_cursor->fields->getArrayOpType() == Projection::ARRAY_OP_POSITIONAL ) { // field projection specified, and contains an array operator details.requestElemMatchKey(); } // in some cases (clone collection) there won't be a matcher if ( !c->currentMatches( &details ) ) { } else if ( manager && ! manager->belongsToMe( client_cursor ) ){ LOG(2) << "cursor skipping document in un-owned chunk: " << c->current() << endl; } else { if( c->getsetdup(c->currPK()) ) { //out() << " but it's a dup \n"; } else { // save this so that at the end of the loop, // we can update the location for write concern // in replication. Note that if this cursor is not // doing replication, this is pointless if ( client_cursor->queryOptions() & QueryOption_OplogReplay ) { client_cursor->storeOpForSlave( c->current() ); } n++; client_cursor->fillQueryResultFromObj( b, &details ); if ( ( ntoreturn && n >= ntoreturn ) || b.len() > MaxBytesToReturnToClientAtOnce ) { c->advance(); client_cursor->incPos( n ); break; } } } c->advance(); } if ( client_cursor ) { client_cursor->resetIdleAge(); exhaust = client_cursor->queryOptions() & QueryOption_Exhaust; } else if (!cursorPartOfMultiStatementTxn) { // This cursor is done and it wasn't part of a multi-statement // transaction. We can commit the transaction now. cc().commitTopTxn(); wts->release(); } } QueryResult *qr = (QueryResult *) b.buf(); qr->len = b.len(); qr->setOperation(opReply); qr->_resultFlags() = resultFlags; qr->cursorId = cursorid; qr->startingFrom = start; qr->nReturned = n; b.decouple(); return qr; }
/** * Run a query -- includes checking for and running a Command. * @return points to ns if exhaust mode. 0=normal mode * @locks the db mutex for reading (and potentially for writing temporarily to create a new db). * @yields the db mutex periodically after acquiring it. * @asserts on scan and order memory exhaustion and other cases. */ const char *runQuery(Message& m, QueryMessage& q, CurOp& curop, Message &result) { shared_ptr<ParsedQuery> pq_shared( new ParsedQuery(q) ); ParsedQuery& pq( *pq_shared ); BSONObj jsobj = q.query; int queryOptions = q.queryOptions; const char *ns = q.ns; if( logLevel >= 2 ) log() << "runQuery called " << ns << " " << jsobj << endl; curop.debug().ns = ns; curop.debug().ntoreturn = pq.getNumToReturn(); curop.debug().query = jsobj; curop.setQuery(jsobj); // Run a command. if ( pq.couldBeCommand() ) { BufBuilder bb; bb.skip(sizeof(QueryResult)); BSONObjBuilder cmdResBuf; if ( runCommands(ns, jsobj, curop, bb, cmdResBuf, false, queryOptions) ) { curop.debug().iscommand = true; curop.debug().query = jsobj; curop.markCommand(); auto_ptr< QueryResult > qr; qr.reset( (QueryResult *) bb.buf() ); bb.decouple(); qr->setResultFlagsToOk(); qr->len = bb.len(); curop.debug().responseLength = bb.len(); qr->setOperation(opReply); qr->cursorId = 0; qr->startingFrom = 0; qr->nReturned = 1; result.setData( qr.release(), true ); } else { uasserted(13530, "bad or malformed command request?"); } return 0; } bool explain = pq.isExplain(); BSONObj order = pq.getOrder(); BSONObj query = pq.getFilter(); /* The ElemIter will not be happy if this isn't really an object. So throw exception here when that is true. (Which may indicate bad data from client.) */ if ( query.objsize() == 0 ) { out() << "Bad query object?\n jsobj:"; out() << jsobj.toString() << "\n query:"; out() << query.toString() << endl; uassert( 10110 , "bad query object", false); } Client::ReadContext ctx( ns , dbpath ); // read locks const ConfigVersion shardingVersionAtStart = shardingState.getVersion( ns ); replVerifyReadsOk(&pq); if ( pq.hasOption( QueryOption_CursorTailable ) ) { NamespaceDetails *d = nsdetails( ns ); uassert( 13051, "tailable cursor requested on non capped collection", d && d->isCapped() ); const BSONObj nat1 = BSON( "$natural" << 1 ); if ( order.isEmpty() ) { order = nat1; } else { uassert( 13052, "only {$natural:1} order allowed for tailable cursor", order == nat1 ); } } // Run a simple id query. if ( ! (explain || pq.showDiskLoc()) && isSimpleIdQuery( query ) && !pq.hasOption( QueryOption_CursorTailable ) ) { int n = 0; bool nsFound = false; bool indexFound = false; BSONObj resObject; Client& c = cc(); bool found = Helpers::findById( c, ns , query , resObject , &nsFound , &indexFound ); if ( nsFound == false || indexFound == true ) { if ( shardingState.needShardChunkManager( ns ) ) { ShardChunkManagerPtr m = shardingState.getShardChunkManager( ns ); if ( m && ! m->belongsToMe( resObject ) ) { // I have something this _id // but it doesn't belong to me // so return nothing resObject = BSONObj(); found = false; } } BufBuilder bb(sizeof(QueryResult)+resObject.objsize()+32); bb.skip(sizeof(QueryResult)); curop.debug().idhack = true; if ( found ) { n = 1; fillQueryResultFromObj( bb , pq.getFields() , resObject ); } auto_ptr< QueryResult > qr; qr.reset( (QueryResult *) bb.buf() ); bb.decouple(); qr->setResultFlagsToOk(); qr->len = bb.len(); curop.debug().responseLength = bb.len(); qr->setOperation(opReply); qr->cursorId = 0; qr->startingFrom = 0; qr->nReturned = n; result.setData( qr.release(), true ); return NULL; } } // Run a regular query. BSONObj oldPlan; if ( explain && ! pq.hasIndexSpecifier() ) { MultiPlanScanner mps( ns, query, order ); if ( mps.usingCachedPlan() ) { oldPlan = mps.oldExplain().firstElement().embeddedObject() .firstElement().embeddedObject().getOwned(); } } // In some cases the query may be retried if there is an in memory sort size assertion. for( int retry = 0; retry < 2; ++retry ) { try { return queryWithQueryOptimizer( m, queryOptions, ns, jsobj, curop, query, order, pq_shared, oldPlan, shardingVersionAtStart, result ); } catch ( const QueryRetryException & ) { verify( retry == 0 ); } } verify( false ); return 0; }