Beispiel #1
0
    BSONObj ParallelSortShardedCursor::next(){
        advance();
            
        BSONObj best = BSONObj();
        int bestFrom = -1;
            
        for ( int i=0; i<_numServers; i++){
            if ( _nexts[i].isEmpty() )
                continue;

            if ( best.isEmpty() ){
                best = _nexts[i];
                bestFrom = i;
                continue;
            }
                
            int comp = best.woSortOrder( _nexts[i] , _sortKey );
            if ( comp < 0 )
                continue;
                
            best = _nexts[i];
            bestFrom = i;
        }
            
        uassert( "no more elements" , ! best.isEmpty() );
        _nexts[bestFrom] = BSONObj();
            
        return best;
    }
        /**
         * A template used by many tests below.
         * Fill out numObj objects, sort them in the order provided by 'direction'.
         * If extAllowed is true, sorting will use use external sorting if available.
         * If limit is not zero, we limit the output of the sort stage to 'limit' results.
         */
        void sortAndCheck(int direction) {
            WorkingSet* ws = new WorkingSet();
            MockStage* ms = new MockStage(ws);

            // Insert a mix of the various types of data.
            insertVarietyOfObjects(ms);

            SortStageParams params;
            params.pattern = BSON("foo" << direction);

            // Must fetch so we can look at the doc as a BSONObj.
            PlanExecutor runner(ws, new FetchStage(ws, new SortStage(params, ws, ms), NULL));

            // Look at pairs of objects to make sure that the sort order is pairwise (and therefore
            // totally) correct.
            BSONObj last;
            ASSERT_EQUALS(Runner::RUNNER_ADVANCED, runner.getNext(&last, NULL));

            // Count 'last'.
            int count = 1;

            BSONObj current;
            while (Runner::RUNNER_ADVANCED == runner.getNext(&current, NULL)) {
                int cmp = sgn(current.woSortOrder(last, params.pattern));
                // The next object should be equal to the previous or oriented according to the sort
                // pattern.
                ASSERT(cmp == 0 || cmp == 1);
                ++count;
                last = current;
            }

            // No limit, should get all objects back.
            ASSERT_EQUALS(numObj(), count);
        }
Beispiel #3
0
    BSONObj ParallelSortClusteredCursor::next(){
        BSONObj best = BSONObj();
        int bestFrom = -1;
            
        for ( int i=0; i<_numServers; i++){
            if ( ! _cursors[i].more() )
                continue;
            
            BSONObj me = _cursors[i].peek();

            if ( best.isEmpty() ){
                best = me;
                bestFrom = i;
                continue;
            }
                
            int comp = best.woSortOrder( me , _sortKey , true );
            if ( comp < 0 )
                continue;
                
            best = me;
            bestFrom = i;
        }
        
        uassert( 10019 ,  "no more elements" , ! best.isEmpty() );
        _cursors[bestFrom].next();
            
        return best;
    }
    /**
     * A template used by many tests below.
     * Fill out numObj objects, sort them in the order provided by 'direction'.
     * If extAllowed is true, sorting will use use external sorting if available.
     * If limit is not zero, we limit the output of the sort stage to 'limit' results.
     */
    void sortAndCheck(int direction, Collection* coll) {
        WorkingSet* ws = new WorkingSet();
        QueuedDataStage* ms = new QueuedDataStage(ws);

        // Insert a mix of the various types of data.
        insertVarietyOfObjects(ms, coll);

        SortStageParams params;
        params.collection = coll;
        params.pattern = BSON("foo" << direction);
        params.limit = limit();

        // Must fetch so we can look at the doc as a BSONObj.
        PlanExecutor* rawExec;
        Status status =
            PlanExecutor::make(&_txn,
                               ws,
                               new FetchStage(&_txn, ws, new SortStage(params, ws, ms), NULL, coll),
                               coll,
                               PlanExecutor::YIELD_MANUAL,
                               &rawExec);
        ASSERT_OK(status);
        boost::scoped_ptr<PlanExecutor> exec(rawExec);

        // Look at pairs of objects to make sure that the sort order is pairwise (and therefore
        // totally) correct.
        BSONObj last;
        ASSERT_EQUALS(PlanExecutor::ADVANCED, exec->getNext(&last, NULL));

        // Count 'last'.
        int count = 1;

        BSONObj current;
        while (PlanExecutor::ADVANCED == exec->getNext(&current, NULL)) {
            int cmp = sgn(current.woSortOrder(last, params.pattern));
            // The next object should be equal to the previous or oriented according to the sort
            // pattern.
            ASSERT(cmp == 0 || cmp == 1);
            ++count;
            last = current;
        }

        checkCount(count);
    }
Beispiel #5
0
            bool run(const char *ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){
                string dbname = cc().database()->name; // this has to come before dbtemprelease
                dbtemprelease temprelease; // we don't touch the db directly

                string shardedOutputCollection = cmdObj["shardedOutputCollection"].valuestrsafe();

                MRSetup mr( dbname , cmdObj.firstElement().embeddedObjectUserCheck() , false );
                
                set<ServerAndQuery> servers;
                
                BSONObjBuilder shardCounts;
                map<string,long long> counts;
                
                BSONObj shards = cmdObj["shards"].embeddedObjectUserCheck();
                vector< auto_ptr<DBClientCursor> > shardCursors;
                BSONObjIterator i( shards );
                while ( i.more() ){
                    BSONElement e = i.next();
                    string shard = e.fieldName();

                    BSONObj res = e.embeddedObjectUserCheck();
                    
                    uassert( 10078 ,  "something bad happened" , shardedOutputCollection == res["result"].valuestrsafe() );
                    servers.insert( shard );
                    shardCounts.appendAs( res["counts"] , shard.c_str() );

                    BSONObjIterator j( res["counts"].embeddedObjectUserCheck() );
                    while ( j.more() ){
                        BSONElement temp = j.next();
                        counts[temp.fieldName()] += temp.numberLong();
                    }

                }

                BSONObj sortKey = BSON( "_id" << 1 );

                ParallelSortClusteredCursor cursor( servers , dbname + "." + shardedOutputCollection ,
                                                    Query().sort( sortKey ) );
                
                
                auto_ptr<Scope> s = globalScriptEngine->getPooledScope( ns );
                ScriptingFunction reduceFunction = s->createFunction( mr.reduceCode.c_str() );
                ScriptingFunction finalizeFunction = 0;
                if ( mr.finalizeCode.size() )
                    finalizeFunction = s->createFunction( mr.finalizeCode.c_str() );

                BSONList values;

                result.append( "result" , mr.finalShort );

                DBDirectClient db;
                
                while ( cursor.more() ){
                    BSONObj t = cursor.next().getOwned();
                                        
                    if ( values.size() == 0 ){
                        values.push_back( t );
                        continue;
                    }
                    
                    if ( t.woSortOrder( *(values.begin()) , sortKey ) == 0 ){
                        values.push_back( t );
                        continue;
                    }
                    

                    db.insert( mr.tempLong , reduceValues( values , s.get() , reduceFunction , 1 , finalizeFunction ) );
                    values.clear();
                    values.push_back( t );
                }
                
                if ( values.size() )
                    db.insert( mr.tempLong , reduceValues( values , s.get() , reduceFunction , 1 , finalizeFunction ) );
                
                long long finalCount = mr.renameIfNeeded( db );
                log(0) << " mapreducefinishcommand " << mr.finalLong << " " << finalCount << endl;

                for ( set<ServerAndQuery>::iterator i=servers.begin(); i!=servers.end(); i++ ){
                    ScopedDbConnection conn( i->_server );
                    conn->dropCollection( dbname + "." + shardedOutputCollection );
                }
                
                result.append( "shardCounts" , shardCounts.obj() );
                
                {
                    BSONObjBuilder c;
                    for ( map<string,long long>::iterator i=counts.begin(); i!=counts.end(); i++ ){
                        c.append( i->first , i->second );
                    }
                    result.append( "counts" , c.obj() );
                }

                return 1;
            }
Beispiel #6
0
            bool run(const char *dbname, BSONObj& cmd, string& errmsg, BSONObjBuilder& result, bool fromRepl ){
                Timer t;
                Client::GodScope cg;
                Client& client = cc();
                CurOp * op = client.curop();

                MRSetup mr( nsToDatabase( dbname ) , cmd );

                log(1) << "mr ns: " << mr.ns << endl;
                
                if ( ! db.exists( mr.ns ) ){
                    errmsg = "ns doesn't exist";
                    return false;
                }
                
                bool shouldHaveData = false;
                
                long long num = 0;
                long long inReduce = 0;
                
                BSONObjBuilder countsBuilder;
                BSONObjBuilder timingBuilder;
                try {
                    
                    MRState state( mr );
                    state.scope->injectNative( "emit" , fast_emit );
                    
                    MRTL * mrtl = new MRTL( state );
                    _tlmr.reset( mrtl );

                    ProgressMeter & pm = op->setMessage( "m/r: (1/3) emit phase" , db.count( mr.ns , mr.filter ) );
                    long long mapTime = 0;
                    {
                        readlock lock( mr.ns );
                        Client::Context ctx( mr.ns );
                        
                        auto_ptr<Cursor> temp = QueryPlanSet(mr.ns.c_str() , mr.filter , BSONObj() ).getBestGuess()->newCursor();
                        auto_ptr<ClientCursor> cursor( new ClientCursor( QueryOption_NoCursorTimeout , temp , mr.ns.c_str() ) );

                        if ( ! mr.filter.isEmpty() )
                            cursor->matcher.reset( new CoveredIndexMatcher( mr.filter , cursor->indexKeyPattern() ) );
                        
                        Timer mt;
                        while ( cursor->ok() ){
                            
                            if ( ! cursor->currentMatches() ){
                                cursor->advance();
                                continue;
                            }
                            
                            BSONObj o = cursor->current(); 
                            cursor->advance();
                            
                            if ( mr.verbose ) mt.reset();
                            
                            state.scope->setThis( &o );
                            if ( state.scope->invoke( state.map , state.setup.mapparams , 0 , true ) )
                                throw UserException( 9014, (string)"map invoke failed: " + state.scope->getError() );
                            
                            if ( mr.verbose ) mapTime += mt.micros();
                            
                            num++;
                            if ( num % 100 == 0 ){
                                ClientCursor::YieldLock yield = cursor->yieldHold();
                                Timer t;
                                mrtl->checkSize();
                                inReduce += t.micros();
                                
                                if ( ! yield.stillOk() ){
                                    cursor.release();
                                    break;
                                }

                                killCurrentOp.checkForInterrupt();
                            }
                            pm.hit();
                            
                            if ( mr.limit && num >= mr.limit )
                                break;
                        }
                    }
                    pm.finished();
                    
                    killCurrentOp.checkForInterrupt();

                    countsBuilder.appendNumber( "input" , num );
                    countsBuilder.appendNumber( "emit" , mrtl->numEmits );
                    if ( mrtl->numEmits )
                        shouldHaveData = true;
                    
                    timingBuilder.append( "mapTime" , mapTime / 1000 );
                    timingBuilder.append( "emitLoop" , t.millis() );
                    
                    // final reduce
                    op->setMessage( "m/r: (2/3) final reduce in memory" );
                    mrtl->reduceInMemory();
                    mrtl->dump();
                    
                    BSONObj sortKey = BSON( "0" << 1 );
                    db.ensureIndex( mr.incLong , sortKey );
                    
                    {
                        writelock lock( mr.tempLong.c_str() );
                        Client::Context ctx( mr.tempLong.c_str() );
                        assert( userCreateNS( mr.tempLong.c_str() , BSONObj() , errmsg , mr.replicate ) );
                    }


                    {
                        readlock rl(mr.incLong.c_str());
                        Client::Context ctx( mr.incLong );
                        
                        BSONObj prev;
                        BSONList all;
                        
                        pm = op->setMessage( "m/r: (3/3) final reduce to collection" , db.count( mr.incLong ) );

                        auto_ptr<Cursor> temp = QueryPlanSet(mr.incLong.c_str() , BSONObj() , sortKey ).getBestGuess()->newCursor();
                        auto_ptr<ClientCursor> cursor( new ClientCursor( QueryOption_NoCursorTimeout , temp , mr.incLong.c_str() ) );
                        
                        while ( cursor->ok() ){
                            BSONObj o = cursor->current().getOwned();
                            cursor->advance();
                            
                            pm.hit();
                            
                            if ( o.woSortOrder( prev , sortKey ) == 0 ){
                                all.push_back( o );
                                if ( pm.hits() % 1000 == 0 ){
                                    if ( ! cursor->yield() ){
                                        cursor.release();
                                        break;
                                    } 
                                    killCurrentOp.checkForInterrupt();
                                }
                                continue;
                            }
                        
                            ClientCursor::YieldLock yield = cursor->yieldHold();
                                
                            state.finalReduce( all );
                            
                            all.clear();
                            prev = o;
                            all.push_back( o );

                            if ( ! yield.stillOk() ){
                                cursor.release();
                                break;
                            }
                            
                            killCurrentOp.checkForInterrupt();
                        }

                        {
                            dbtemprelease tl;
                            state.finalReduce( all );
                        }

                        pm.finished();
                    }

                    _tlmr.reset( 0 );
                }
                catch ( ... ){
                    log() << "mr failed, removing collection" << endl;
                    db.dropCollection( mr.tempLong );
                    db.dropCollection( mr.incLong );
                    throw;
                }
                
                long long finalCount = 0;
                {
                    dblock lock;
                    db.dropCollection( mr.incLong );
                
                    finalCount = mr.renameIfNeeded( db );
                }

                timingBuilder.append( "total" , t.millis() );
                
                result.append( "result" , mr.finalShort );
                result.append( "timeMillis" , t.millis() );
                countsBuilder.appendNumber( "output" , finalCount );
                if ( mr.verbose ) result.append( "timing" , timingBuilder.obj() );
                result.append( "counts" , countsBuilder.obj() );

                if ( finalCount == 0 && shouldHaveData ){
                    result.append( "cmd" , cmd );
                    errmsg = "there were emits but no data!";
                    return false;
                }

                return true;
            }