Beispiel #1
0
    /**
     * Run a query with a cursor provided by the query optimizer, or FindingStartCursor.
     * @yields the db lock.
     */
    string queryWithQueryOptimizer( int queryOptions, const string& ns,
                                    const BSONObj &jsobj, CurOp& curop,
                                    const BSONObj &query, const BSONObj &order,
                                    const shared_ptr<ParsedQuery> &pq_shared,
                                    const BSONObj &oldPlan,
                                    const ChunkVersion &shardingVersionAtStart,
                                    scoped_ptr<PageFaultRetryableSection>& parentPageFaultSection,
                                    scoped_ptr<NoPageFaultsAllowed>& noPageFault,
                                    Message &result ) {

        const ParsedQuery &pq( *pq_shared );
        shared_ptr<Cursor> cursor;
        QueryPlanSummary queryPlan;
        
        if ( pq.hasOption( QueryOption_OplogReplay ) ) {
            cursor = FindingStartCursor::getCursor( ns.c_str(), query, order );
        }
        else {
            cursor = getOptimizedCursor( ns.c_str(),
                                         query,
                                         order,
                                         QueryPlanSelectionPolicy::any(),
                                         pq_shared,
                                         false,
                                         &queryPlan );
        }
        verify( cursor );
        
        scoped_ptr<QueryResponseBuilder> queryResponseBuilder
                ( QueryResponseBuilder::make( pq, cursor, queryPlan, oldPlan ) );
        bool saveClientCursor = false;
        OpTime slaveReadTill;
        ClientCursorHolder ccPointer( new ClientCursor( QueryOption_NoCursorTimeout, cursor,
                                                         ns ) );
        
        for( ; cursor->ok(); cursor->advance() ) {

            bool yielded = false;
            if ( !ccPointer->yieldSometimes( ClientCursor::MaybeCovered, &yielded ) ||
                !cursor->ok() ) {
                cursor.reset();
                queryResponseBuilder->noteYield();
                // !!! TODO The queryResponseBuilder still holds cursor.  Currently it will not do
                // anything unsafe with the cursor in handoff(), but this is very fragile.
                //
                // We don't fail the query since we're fine with returning partial data if the
                // collection was dropped.
                // NOTE see SERVER-2454.
                // TODO This is wrong.  The cursor could be gone if the closeAllDatabases command
                // just ran.
                break;
            }

            if ( yielded ) {
                queryResponseBuilder->noteYield();
            }
            
            if ( pq.getMaxScan() && cursor->nscanned() > pq.getMaxScan() ) {
                break;
            }
            
            if ( !queryResponseBuilder->addMatch() ) {
                continue;
            }
            
            // Note slave's position in the oplog.
            if ( pq.hasOption( QueryOption_OplogReplay ) ) {
                BSONObj current = cursor->current();
                BSONElement e = current["ts"];
                if ( e.type() == Date || e.type() == Timestamp ) {
                    slaveReadTill = e._opTime();
                }
            }
            
            if ( !cursor->supportGetMore() || pq.isExplain() ) {
                if ( queryResponseBuilder->enoughTotalResults() ) {
                    break;
                }
            }
            else if ( queryResponseBuilder->enoughForFirstBatch() ) {
                // if only 1 requested, no cursor saved for efficiency...we assume it is findOne()
                if ( pq.wantMore() && pq.getNumToReturn() != 1 ) {
                    queryResponseBuilder->finishedFirstBatch();
                    if ( cursor->advance() ) {
                        saveClientCursor = true;
                    }
                }
                break;
            }
        }
        
        if ( cursor ) {
            if ( pq.hasOption( QueryOption_CursorTailable ) && pq.getNumToReturn() != 1 ) {
                cursor->setTailable();
            }
            
            // If the tailing request succeeded.
            if ( cursor->tailable() ) {
                saveClientCursor = true;
            }
        }
        
        if ( ! shardingState.getVersion( ns ).isWriteCompatibleWith( shardingVersionAtStart ) ) {
            // if the version changed during the query
            // we might be missing some data
            // and its safe to send this as mongos can resend
            // at this point
            throw SendStaleConfigException(ns, "version changed during initial query",
                                           shardingVersionAtStart,
                                           shardingState.getVersion(ns));
        }
        
        parentPageFaultSection.reset(0);
        noPageFault.reset( new NoPageFaultsAllowed() );

        int nReturned = queryResponseBuilder->handoff( result );

        ccPointer.reset();
        long long cursorid = 0;
        if ( saveClientCursor ) {
            // Create a new ClientCursor, with a default timeout.
            ccPointer.reset( new ClientCursor( queryOptions, cursor, ns,
                                              jsobj.getOwned() ) );
            cursorid = ccPointer->cursorid();
            DEV { MONGO_TLOG(2) << "query has more, cursorid: " << cursorid << endl; }
            if ( cursor->supportYields() ) {
                ClientCursor::YieldData data;
                ccPointer->prepareToYield( data );
            }
            else {
                ccPointer->c()->noteLocation();
            }
            
            // Save slave's position in the oplog.
            if ( pq.hasOption( QueryOption_OplogReplay ) && !slaveReadTill.isNull() ) {
                ccPointer->slaveReadTill( slaveReadTill );
            }
            
            if ( !ccPointer->ok() && ccPointer->c()->tailable() ) {
                DEV {
                    MONGO_TLOG(0) << "query has no more but tailable, cursorid: " << cursorid <<
                        endl;
                }
            }
            
            if( queryOptions & QueryOption_Exhaust ) {
                curop.debug().exhaust = true;
            }
            
            // Set attributes for getMore.
            ccPointer->setCollMetadata( queryResponseBuilder->collMetadata() );
            ccPointer->setPos( nReturned );
            ccPointer->pq = pq_shared;
            ccPointer->fields = pq.getFieldPtr();

            // If the query had a time limit, remaining time is "rolled over" to the cursor (for
            // use by future getmore ops).
            ccPointer->setLeftoverMaxTimeMicros( curop.getRemainingMaxTimeMicros() );

            ccPointer.release();
        }
Beispiel #2
0
        bool group( const std::string& realdbname,
                    const std::string& ns,
                    const BSONObj& query,
                    BSONObj keyPattern,
                    const std::string& keyFunctionCode,
                    const std::string& reduceCode,
                    const char * reduceScope,
                    BSONObj initial,
                    const std::string& finalize,
                    string& errmsg,
                    BSONObjBuilder& result ) {

            auto_ptr<Scope> s = globalScriptEngine->getPooledScope( realdbname, "group");

            if ( reduceScope )
                s->init( reduceScope );

            s->setObject( "$initial" , initial , true );

            s->exec( "$reduce = " + reduceCode , "$group reduce setup" , false , true , true , 100 );
            s->exec( "$arr = [];" , "$group reduce setup 2" , false , true , true , 100 );
            ScriptingFunction f = s->createFunction(
                                      "function(){ "
                                      "  if ( $arr[n] == null ){ "
                                      "    next = {}; "
                                      "    Object.extend( next , $key ); "
                                      "    Object.extend( next , $initial , true ); "
                                      "    $arr[n] = next; "
                                      "    next = null; "
                                      "  } "
                                      "  $reduce( obj , $arr[n] ); "
                                      "}" );

            ScriptingFunction keyFunction = 0;
            if ( keyFunctionCode.size() ) {
                keyFunction = s->createFunction( keyFunctionCode.c_str() );
            }


            double keysize = keyPattern.objsize() * 3;
            double keynum = 1;

            map<BSONObj,int,BSONObjCmp> map;
            list<BSONObj> blah;

            shared_ptr<Cursor> cursor = getOptimizedCursor(ns.c_str() , query);
            ClientCursor::Holder ccPointer( new ClientCursor( QueryOption_NoCursorTimeout, cursor,
                                                             ns ) );

            while ( cursor->ok() ) {
                
                if ( !ccPointer->yieldSometimes( ClientCursor::MaybeCovered ) ||
                    !cursor->ok() ) {
                    break;
                }
                
                if ( !cursor->currentMatches() || cursor->getsetdup( cursor->currLoc() ) ) {
                    cursor->advance();
                    continue;
                }

                if ( !ccPointer->yieldSometimes( ClientCursor::WillNeed ) ||
                    !cursor->ok() ) {
                    break;
                }
                
                BSONObj obj = cursor->current();
                cursor->advance();

                BSONObj key = getKey( obj , keyPattern , keyFunction , keysize / keynum , s.get() );
                keysize += key.objsize();
                keynum++;

                int& n = map[key];
                if ( n == 0 ) {
                    n = map.size();
                    s->setObject( "$key" , key , true );

                    uassert( 10043 ,  "group() can't handle more than 20000 unique keys" , n <= 20000 );
                }

                s->setObject( "obj" , obj , true );
                s->setNumber( "n" , n - 1 );
                if ( s->invoke( f , 0, 0 , 0 , true ) ) {
                    throw UserException( 9010 , (string)"reduce invoke failed: " + s->getError() );
                }
            }
            ccPointer.reset();

            if (!finalize.empty()) {
                s->exec( "$finalize = " + finalize , "$group finalize define" , false , true , true , 100 );
                ScriptingFunction g = s->createFunction(
                                          "function(){ "
                                          "  for(var i=0; i < $arr.length; i++){ "
                                          "  var ret = $finalize($arr[i]); "
                                          "  if (ret !== undefined) "
                                          "    $arr[i] = ret; "
                                          "  } "
                                          "}" );
                s->invoke( g , 0, 0 , 0 , true );
            }

            result.appendArray( "retval" , s->getObject( "$arr" ) );
            result.append( "count" , keynum - 1 );
            result.append( "keys" , (int)(map.size()) );
            s->exec( "$arr = [];" , "$group reduce setup 2" , false , true , true , 100 );
            s->gc();

            return true;
        }
Beispiel #3
0
    /**
     * Run a query with a cursor provided by the query optimizer, or FindingStartCursor.
     * @returns true if client cursor was saved, false if the query has completed.
     */
    bool queryWithQueryOptimizer( int queryOptions, const string& ns,
                                  const BSONObj &jsobj, CurOp& curop,
                                  const BSONObj &query, const BSONObj &order,
                                  const shared_ptr<ParsedQuery> &pq_shared,
                                  const ConfigVersion &shardingVersionAtStart,
                                  const bool getCachedExplainPlan,
                                  const bool inMultiStatementTxn,
                                  Message &result ) {

        const ParsedQuery &pq( *pq_shared );
        shared_ptr<Cursor> cursor;
        QueryPlanSummary queryPlan;

        const bool tailable = pq.hasOption( QueryOption_CursorTailable ) && pq.getNumToReturn() != 1;
        
        LOG(1) << "query beginning read-only transaction. tailable: " << tailable << endl;
        
        BSONObj oldPlan;
        if (getCachedExplainPlan) {
            scoped_ptr<MultiPlanScanner> mps( MultiPlanScanner::make( ns.c_str(), query, order ) );
            oldPlan = mps->cachedPlanExplainSummary();
        }
        
        cursor = getOptimizedCursor( ns.c_str(), query, order, QueryPlanSelectionPolicy::any(),
                                     pq_shared, false, &queryPlan );
        verify( cursor );

        // Tailable cursors must be marked as such before any use. This is so that
        // the implementation knows that uncommitted data cannot be returned.
        if ( tailable ) {
            cursor->setTailable();
        }

        scoped_ptr<QueryResponseBuilder> queryResponseBuilder
                ( QueryResponseBuilder::make( pq, cursor, queryPlan, oldPlan ) );
        bool saveClientCursor = false;
        int options = QueryOption_NoCursorTimeout;
        if (pq.hasOption( QueryOption_OplogReplay )) {
            options |= QueryOption_OplogReplay;
        }
        // create a client cursor that does not create a cursorID.
        // The cursor ID will be created if and only if we save
        // the client cursor further below
        ClientCursor::Holder ccPointer(
            new ClientCursor( options, cursor, ns, BSONObj(), false, false )
            );

        // for oplog cursors, we check if we are reading data that is too old and might
        // be stale.
        bool opChecked = false;
        bool slaveLocationUpdated = false;
        BSONObj last;
        bool lastBSONObjSet = false;
        for ( ; cursor->ok(); cursor->advance() ) {

            if ( pq.getMaxScan() && cursor->nscanned() > pq.getMaxScan() ) {
                break;
            }
            
            if ( !queryResponseBuilder->addMatch() ) {
                continue;
            }
            
            // Note slave's position in the oplog.
            if ( pq.hasOption( QueryOption_OplogReplay ) ) {
                BSONObj current = cursor->current();
                last = current.copy();
                lastBSONObjSet = true;
                
                // the first row returned is equal to the last element that
                // the slave has synced up to, so we might as well update
                // the slave location
                if (!slaveLocationUpdated) {
                    ccPointer->updateSlaveLocation(curop);
                    slaveLocationUpdated = true;
                }
                // check if data we are about to return may be too stale
                if (!opChecked) {
                    ccPointer->storeOpForSlave(current);
                    uassert(16785, "oplog cursor reading data that is too old", !ccPointer->lastOpForSlaveTooOld());
                    opChecked = true;
                }
            }
            
            if ( pq.isExplain() ) {
                if ( queryResponseBuilder->enoughTotalResults() ) {
                    break;
                }
            }
            else if ( queryResponseBuilder->enoughForFirstBatch() ) {
                // if only 1 requested, no cursor saved for efficiency...we assume it is findOne()
                if ( pq.wantMore() && pq.getNumToReturn() != 1 ) {
                    queryResponseBuilder->finishedFirstBatch();
                    if ( cursor->advance() ) {
                        saveClientCursor = true;
                    }
                }
                break;
            }
        }

        // If the tailing request succeeded
        if ( cursor->tailable() ) {
            saveClientCursor = true;
        }
        
        if ( ! shardingState.getVersion( ns ).isWriteCompatibleWith( shardingVersionAtStart ) ) {
            // if the version changed during the query
            // we might be missing some data
            // and its safe to send this as mongos can resend
            // at this point
            throw SendStaleConfigException( ns , "version changed during initial query", shardingVersionAtStart, shardingState.getVersion( ns ) );
        }
        
        int nReturned = queryResponseBuilder->handoff( result );

        ccPointer.reset();
        long long cursorid = 0;
        if ( saveClientCursor ) {
            // Create a new ClientCursor, with a default timeout.
            ccPointer.reset( new ClientCursor( queryOptions, cursor, ns,
                                               jsobj.getOwned(), inMultiStatementTxn ) );
            cursorid = ccPointer->cursorid();
            DEV tlog(2) << "query has more, cursorid: " << cursorid << endl;
            
            if ( !ccPointer->ok() && ccPointer->c()->tailable() ) {
                DEV tlog() << "query has no more but tailable, cursorid: " << cursorid << endl;
            }
            
            if( queryOptions & QueryOption_Exhaust ) {
                curop.debug().exhaust = true;
            }
            
            // Set attributes for getMore.
            ccPointer->setChunkManager( queryResponseBuilder->chunkManager() );
            ccPointer->setPos( nReturned );
            ccPointer->pq = pq_shared;
            ccPointer->fields = pq.getFieldPtr();
            if (pq.hasOption( QueryOption_OplogReplay ) && lastBSONObjSet) {
                ccPointer->storeOpForSlave(last);
            }
            if (!inMultiStatementTxn) {
                // This cursor is not part of a multi-statement transaction, so
                // we pass off the current client's transaction stack to the
                // cursor so that it may be live as long as the cursor.
                cc().swapTransactionStack(ccPointer->transactions);
                verify(!cc().hasTxn());
            }
            ccPointer.release();
        }

        QueryResult *qr = (QueryResult *) result.header();
        qr->cursorId = cursorid;
        curop.debug().cursorid = ( cursorid == 0 ? -1 : qr->cursorId );
        qr->setResultFlagsToOk();
        // qr->len is updated automatically by appendData()
        curop.debug().responseLength = qr->len;
        qr->setOperation(opReply);
        qr->startingFrom = 0;
        qr->nReturned = nReturned;

        curop.debug().nscanned = ( cursor ? cursor->nscanned() : 0LL );
        curop.debug().ntoskip = pq.getSkip();
        curop.debug().nreturned = nReturned;

        return saveClientCursor;
    }