/** * Run a query with a cursor provided by the query optimizer, or FindingStartCursor. * @yields the db lock. */ string queryWithQueryOptimizer( int queryOptions, const string& ns, const BSONObj &jsobj, CurOp& curop, const BSONObj &query, const BSONObj &order, const shared_ptr<ParsedQuery> &pq_shared, const BSONObj &oldPlan, const ChunkVersion &shardingVersionAtStart, scoped_ptr<PageFaultRetryableSection>& parentPageFaultSection, scoped_ptr<NoPageFaultsAllowed>& noPageFault, Message &result ) { const ParsedQuery &pq( *pq_shared ); shared_ptr<Cursor> cursor; QueryPlanSummary queryPlan; if ( pq.hasOption( QueryOption_OplogReplay ) ) { cursor = FindingStartCursor::getCursor( ns.c_str(), query, order ); } else { cursor = getOptimizedCursor( ns.c_str(), query, order, QueryPlanSelectionPolicy::any(), pq_shared, false, &queryPlan ); } verify( cursor ); scoped_ptr<QueryResponseBuilder> queryResponseBuilder ( QueryResponseBuilder::make( pq, cursor, queryPlan, oldPlan ) ); bool saveClientCursor = false; OpTime slaveReadTill; ClientCursorHolder ccPointer( new ClientCursor( QueryOption_NoCursorTimeout, cursor, ns ) ); for( ; cursor->ok(); cursor->advance() ) { bool yielded = false; if ( !ccPointer->yieldSometimes( ClientCursor::MaybeCovered, &yielded ) || !cursor->ok() ) { cursor.reset(); queryResponseBuilder->noteYield(); // !!! TODO The queryResponseBuilder still holds cursor. Currently it will not do // anything unsafe with the cursor in handoff(), but this is very fragile. // // We don't fail the query since we're fine with returning partial data if the // collection was dropped. // NOTE see SERVER-2454. // TODO This is wrong. The cursor could be gone if the closeAllDatabases command // just ran. break; } if ( yielded ) { queryResponseBuilder->noteYield(); } if ( pq.getMaxScan() && cursor->nscanned() > pq.getMaxScan() ) { break; } if ( !queryResponseBuilder->addMatch() ) { continue; } // Note slave's position in the oplog. if ( pq.hasOption( QueryOption_OplogReplay ) ) { BSONObj current = cursor->current(); BSONElement e = current["ts"]; if ( e.type() == Date || e.type() == Timestamp ) { slaveReadTill = e._opTime(); } } if ( !cursor->supportGetMore() || pq.isExplain() ) { if ( queryResponseBuilder->enoughTotalResults() ) { break; } } else if ( queryResponseBuilder->enoughForFirstBatch() ) { // if only 1 requested, no cursor saved for efficiency...we assume it is findOne() if ( pq.wantMore() && pq.getNumToReturn() != 1 ) { queryResponseBuilder->finishedFirstBatch(); if ( cursor->advance() ) { saveClientCursor = true; } } break; } } if ( cursor ) { if ( pq.hasOption( QueryOption_CursorTailable ) && pq.getNumToReturn() != 1 ) { cursor->setTailable(); } // If the tailing request succeeded. if ( cursor->tailable() ) { saveClientCursor = true; } } if ( ! shardingState.getVersion( ns ).isWriteCompatibleWith( shardingVersionAtStart ) ) { // if the version changed during the query // we might be missing some data // and its safe to send this as mongos can resend // at this point throw SendStaleConfigException(ns, "version changed during initial query", shardingVersionAtStart, shardingState.getVersion(ns)); } parentPageFaultSection.reset(0); noPageFault.reset( new NoPageFaultsAllowed() ); int nReturned = queryResponseBuilder->handoff( result ); ccPointer.reset(); long long cursorid = 0; if ( saveClientCursor ) { // Create a new ClientCursor, with a default timeout. ccPointer.reset( new ClientCursor( queryOptions, cursor, ns, jsobj.getOwned() ) ); cursorid = ccPointer->cursorid(); DEV { MONGO_TLOG(2) << "query has more, cursorid: " << cursorid << endl; } if ( cursor->supportYields() ) { ClientCursor::YieldData data; ccPointer->prepareToYield( data ); } else { ccPointer->c()->noteLocation(); } // Save slave's position in the oplog. if ( pq.hasOption( QueryOption_OplogReplay ) && !slaveReadTill.isNull() ) { ccPointer->slaveReadTill( slaveReadTill ); } if ( !ccPointer->ok() && ccPointer->c()->tailable() ) { DEV { MONGO_TLOG(0) << "query has no more but tailable, cursorid: " << cursorid << endl; } } if( queryOptions & QueryOption_Exhaust ) { curop.debug().exhaust = true; } // Set attributes for getMore. ccPointer->setCollMetadata( queryResponseBuilder->collMetadata() ); ccPointer->setPos( nReturned ); ccPointer->pq = pq_shared; ccPointer->fields = pq.getFieldPtr(); // If the query had a time limit, remaining time is "rolled over" to the cursor (for // use by future getmore ops). ccPointer->setLeftoverMaxTimeMicros( curop.getRemainingMaxTimeMicros() ); ccPointer.release(); }
bool group( const std::string& realdbname, const std::string& ns, const BSONObj& query, BSONObj keyPattern, const std::string& keyFunctionCode, const std::string& reduceCode, const char * reduceScope, BSONObj initial, const std::string& finalize, string& errmsg, BSONObjBuilder& result ) { auto_ptr<Scope> s = globalScriptEngine->getPooledScope( realdbname, "group"); if ( reduceScope ) s->init( reduceScope ); s->setObject( "$initial" , initial , true ); s->exec( "$reduce = " + reduceCode , "$group reduce setup" , false , true , true , 100 ); s->exec( "$arr = [];" , "$group reduce setup 2" , false , true , true , 100 ); ScriptingFunction f = s->createFunction( "function(){ " " if ( $arr[n] == null ){ " " next = {}; " " Object.extend( next , $key ); " " Object.extend( next , $initial , true ); " " $arr[n] = next; " " next = null; " " } " " $reduce( obj , $arr[n] ); " "}" ); ScriptingFunction keyFunction = 0; if ( keyFunctionCode.size() ) { keyFunction = s->createFunction( keyFunctionCode.c_str() ); } double keysize = keyPattern.objsize() * 3; double keynum = 1; map<BSONObj,int,BSONObjCmp> map; list<BSONObj> blah; shared_ptr<Cursor> cursor = getOptimizedCursor(ns.c_str() , query); ClientCursor::Holder ccPointer( new ClientCursor( QueryOption_NoCursorTimeout, cursor, ns ) ); while ( cursor->ok() ) { if ( !ccPointer->yieldSometimes( ClientCursor::MaybeCovered ) || !cursor->ok() ) { break; } if ( !cursor->currentMatches() || cursor->getsetdup( cursor->currLoc() ) ) { cursor->advance(); continue; } if ( !ccPointer->yieldSometimes( ClientCursor::WillNeed ) || !cursor->ok() ) { break; } BSONObj obj = cursor->current(); cursor->advance(); BSONObj key = getKey( obj , keyPattern , keyFunction , keysize / keynum , s.get() ); keysize += key.objsize(); keynum++; int& n = map[key]; if ( n == 0 ) { n = map.size(); s->setObject( "$key" , key , true ); uassert( 10043 , "group() can't handle more than 20000 unique keys" , n <= 20000 ); } s->setObject( "obj" , obj , true ); s->setNumber( "n" , n - 1 ); if ( s->invoke( f , 0, 0 , 0 , true ) ) { throw UserException( 9010 , (string)"reduce invoke failed: " + s->getError() ); } } ccPointer.reset(); if (!finalize.empty()) { s->exec( "$finalize = " + finalize , "$group finalize define" , false , true , true , 100 ); ScriptingFunction g = s->createFunction( "function(){ " " for(var i=0; i < $arr.length; i++){ " " var ret = $finalize($arr[i]); " " if (ret !== undefined) " " $arr[i] = ret; " " } " "}" ); s->invoke( g , 0, 0 , 0 , true ); } result.appendArray( "retval" , s->getObject( "$arr" ) ); result.append( "count" , keynum - 1 ); result.append( "keys" , (int)(map.size()) ); s->exec( "$arr = [];" , "$group reduce setup 2" , false , true , true , 100 ); s->gc(); return true; }
/** * Run a query with a cursor provided by the query optimizer, or FindingStartCursor. * @returns true if client cursor was saved, false if the query has completed. */ bool queryWithQueryOptimizer( int queryOptions, const string& ns, const BSONObj &jsobj, CurOp& curop, const BSONObj &query, const BSONObj &order, const shared_ptr<ParsedQuery> &pq_shared, const ConfigVersion &shardingVersionAtStart, const bool getCachedExplainPlan, const bool inMultiStatementTxn, Message &result ) { const ParsedQuery &pq( *pq_shared ); shared_ptr<Cursor> cursor; QueryPlanSummary queryPlan; const bool tailable = pq.hasOption( QueryOption_CursorTailable ) && pq.getNumToReturn() != 1; LOG(1) << "query beginning read-only transaction. tailable: " << tailable << endl; BSONObj oldPlan; if (getCachedExplainPlan) { scoped_ptr<MultiPlanScanner> mps( MultiPlanScanner::make( ns.c_str(), query, order ) ); oldPlan = mps->cachedPlanExplainSummary(); } cursor = getOptimizedCursor( ns.c_str(), query, order, QueryPlanSelectionPolicy::any(), pq_shared, false, &queryPlan ); verify( cursor ); // Tailable cursors must be marked as such before any use. This is so that // the implementation knows that uncommitted data cannot be returned. if ( tailable ) { cursor->setTailable(); } scoped_ptr<QueryResponseBuilder> queryResponseBuilder ( QueryResponseBuilder::make( pq, cursor, queryPlan, oldPlan ) ); bool saveClientCursor = false; int options = QueryOption_NoCursorTimeout; if (pq.hasOption( QueryOption_OplogReplay )) { options |= QueryOption_OplogReplay; } // create a client cursor that does not create a cursorID. // The cursor ID will be created if and only if we save // the client cursor further below ClientCursor::Holder ccPointer( new ClientCursor( options, cursor, ns, BSONObj(), false, false ) ); // for oplog cursors, we check if we are reading data that is too old and might // be stale. bool opChecked = false; bool slaveLocationUpdated = false; BSONObj last; bool lastBSONObjSet = false; for ( ; cursor->ok(); cursor->advance() ) { if ( pq.getMaxScan() && cursor->nscanned() > pq.getMaxScan() ) { break; } if ( !queryResponseBuilder->addMatch() ) { continue; } // Note slave's position in the oplog. if ( pq.hasOption( QueryOption_OplogReplay ) ) { BSONObj current = cursor->current(); last = current.copy(); lastBSONObjSet = true; // the first row returned is equal to the last element that // the slave has synced up to, so we might as well update // the slave location if (!slaveLocationUpdated) { ccPointer->updateSlaveLocation(curop); slaveLocationUpdated = true; } // check if data we are about to return may be too stale if (!opChecked) { ccPointer->storeOpForSlave(current); uassert(16785, "oplog cursor reading data that is too old", !ccPointer->lastOpForSlaveTooOld()); opChecked = true; } } if ( pq.isExplain() ) { if ( queryResponseBuilder->enoughTotalResults() ) { break; } } else if ( queryResponseBuilder->enoughForFirstBatch() ) { // if only 1 requested, no cursor saved for efficiency...we assume it is findOne() if ( pq.wantMore() && pq.getNumToReturn() != 1 ) { queryResponseBuilder->finishedFirstBatch(); if ( cursor->advance() ) { saveClientCursor = true; } } break; } } // If the tailing request succeeded if ( cursor->tailable() ) { saveClientCursor = true; } if ( ! shardingState.getVersion( ns ).isWriteCompatibleWith( shardingVersionAtStart ) ) { // if the version changed during the query // we might be missing some data // and its safe to send this as mongos can resend // at this point throw SendStaleConfigException( ns , "version changed during initial query", shardingVersionAtStart, shardingState.getVersion( ns ) ); } int nReturned = queryResponseBuilder->handoff( result ); ccPointer.reset(); long long cursorid = 0; if ( saveClientCursor ) { // Create a new ClientCursor, with a default timeout. ccPointer.reset( new ClientCursor( queryOptions, cursor, ns, jsobj.getOwned(), inMultiStatementTxn ) ); cursorid = ccPointer->cursorid(); DEV tlog(2) << "query has more, cursorid: " << cursorid << endl; if ( !ccPointer->ok() && ccPointer->c()->tailable() ) { DEV tlog() << "query has no more but tailable, cursorid: " << cursorid << endl; } if( queryOptions & QueryOption_Exhaust ) { curop.debug().exhaust = true; } // Set attributes for getMore. ccPointer->setChunkManager( queryResponseBuilder->chunkManager() ); ccPointer->setPos( nReturned ); ccPointer->pq = pq_shared; ccPointer->fields = pq.getFieldPtr(); if (pq.hasOption( QueryOption_OplogReplay ) && lastBSONObjSet) { ccPointer->storeOpForSlave(last); } if (!inMultiStatementTxn) { // This cursor is not part of a multi-statement transaction, so // we pass off the current client's transaction stack to the // cursor so that it may be live as long as the cursor. cc().swapTransactionStack(ccPointer->transactions); verify(!cc().hasTxn()); } ccPointer.release(); } QueryResult *qr = (QueryResult *) result.header(); qr->cursorId = cursorid; curop.debug().cursorid = ( cursorid == 0 ? -1 : qr->cursorId ); qr->setResultFlagsToOk(); // qr->len is updated automatically by appendData() curop.debug().responseLength = qr->len; qr->setOperation(opReply); qr->startingFrom = 0; qr->nReturned = nReturned; curop.debug().nscanned = ( cursor ? cursor->nscanned() : 0LL ); curop.debug().ntoskip = pq.getSkip(); curop.debug().nreturned = nReturned; return saveClientCursor; }