/** * Run a query with a cursor provided by the query optimizer, or FindingStartCursor. * @yields the db lock. */ const char *queryWithQueryOptimizer( Message &m, int queryOptions, const char *ns, const BSONObj &jsobj, CurOp& curop, const BSONObj &query, const BSONObj &order, const shared_ptr<ParsedQuery> &pq_shared, const BSONObj &oldPlan, const ConfigVersion &shardingVersionAtStart, Message &result ) { const ParsedQuery &pq( *pq_shared ); shared_ptr<Cursor> cursor; QueryPlanSummary queryPlan; if ( pq.hasOption( QueryOption_OplogReplay ) ) { cursor = FindingStartCursor::getCursor( ns, query, order ); } else { cursor = NamespaceDetailsTransient::getCursor( ns, query, order, QueryPlanSelectionPolicy::any(), 0, pq_shared, &queryPlan ); } verify( cursor ); QueryResponseBuilder queryResponseBuilder( pq, cursor, queryPlan, oldPlan ); bool saveClientCursor = false; const char *exhaust = 0; OpTime slaveReadTill; ClientCursor::CleanupPointer ccPointer; ccPointer.reset( new ClientCursor( QueryOption_NoCursorTimeout, cursor, ns ) ); for( ; cursor->ok(); cursor->advance() ) { bool yielded = false; if ( !ccPointer->yieldSometimes( ClientCursor::MaybeCovered, &yielded ) || !cursor->ok() ) { cursor.reset(); queryResponseBuilder.noteYield(); // !!! TODO The queryResponseBuilder still holds cursor. Currently it will not do // anything unsafe with the cursor in handoff(), but this is very fragile. // // We don't fail the query since we're fine with returning partial data if the // collection was dropped. // NOTE see SERVER-2454. // TODO This is wrong. The cursor could be gone if the closeAllDatabases command // just ran. break; } if ( yielded ) { queryResponseBuilder.noteYield(); } if ( pq.getMaxScan() && cursor->nscanned() > pq.getMaxScan() ) { break; } if ( !queryResponseBuilder.addMatch() ) { continue; } // Note slave's position in the oplog. if ( pq.hasOption( QueryOption_OplogReplay ) ) { BSONObj current = cursor->current(); BSONElement e = current["ts"]; if ( e.type() == Date || e.type() == Timestamp ) { slaveReadTill = e._opTime(); } } if ( !cursor->supportGetMore() || pq.isExplain() ) { if ( queryResponseBuilder.enoughTotalResults() ) { break; } } else if ( queryResponseBuilder.enoughForFirstBatch() ) { // if only 1 requested, no cursor saved for efficiency...we assume it is findOne() if ( pq.wantMore() && pq.getNumToReturn() != 1 ) { queryResponseBuilder.finishedFirstBatch(); if ( cursor->advance() ) { saveClientCursor = true; } } break; } } if ( cursor ) { if ( pq.hasOption( QueryOption_CursorTailable ) && pq.getNumToReturn() != 1 ) { cursor->setTailable(); } // If the tailing request succeeded. if ( cursor->tailable() ) { saveClientCursor = true; } } if ( shardingState.getVersion( ns ) != shardingVersionAtStart ) { // if the version changed during the query // we might be missing some data // and its safe to send this as mongos can resend // at this point throw SendStaleConfigException( ns , "version changed during initial query", shardingVersionAtStart, shardingState.getVersion( ns ) ); } int nReturned = queryResponseBuilder.handoff( result ); ccPointer.reset(); long long cursorid = 0; if ( saveClientCursor ) { // Create a new ClientCursor, with a default timeout. ccPointer.reset( new ClientCursor( queryOptions, cursor, ns, jsobj.getOwned() ) ); cursorid = ccPointer->cursorid(); DEV tlog(2) << "query has more, cursorid: " << cursorid << endl; if ( cursor->supportYields() ) { ClientCursor::YieldData data; ccPointer->prepareToYield( data ); } else { ccPointer->c()->noteLocation(); } // !!! Save the original message buffer, so it can be referenced in getMore. ccPointer->originalMessage = m; // Save slave's position in the oplog. if ( pq.hasOption( QueryOption_OplogReplay ) && !slaveReadTill.isNull() ) { ccPointer->slaveReadTill( slaveReadTill ); } if ( !ccPointer->ok() && ccPointer->c()->tailable() ) { DEV tlog() << "query has no more but tailable, cursorid: " << cursorid << endl; } if( queryOptions & QueryOption_Exhaust ) { exhaust = ns; curop.debug().exhaust = true; } // Set attributes for getMore. ccPointer->setChunkManager( queryResponseBuilder.chunkManager() ); ccPointer->setPos( nReturned ); ccPointer->pq = pq_shared; ccPointer->fields = pq.getFieldPtr(); ccPointer.release(); } QueryResult *qr = (QueryResult *) result.header(); qr->cursorId = cursorid; curop.debug().cursorid = ( cursorid == 0 ? -1 : qr->cursorId ); qr->setResultFlagsToOk(); // qr->len is updated automatically by appendData() curop.debug().responseLength = qr->len; qr->setOperation(opReply); qr->startingFrom = 0; qr->nReturned = nReturned; int duration = curop.elapsedMillis(); bool dbprofile = curop.shouldDBProfile( duration ); if ( dbprofile || duration >= cmdLine.slowMS ) { curop.debug().nscanned = (int)( cursor ? cursor->nscanned() : 0 ); curop.debug().ntoskip = pq.getSkip(); } curop.debug().nreturned = nReturned; return exhaust; }