示例#1
0
        /**
         * Reduces a list of tuple object (key, value) to a single tuple {_id: key, value: val}
         * Also applies a finalizer method if present.
         */
        BSONObj JSReducer::finalReduce( const BSONList& tuples , Finalizer * finalizer ) {

            BSONObj res;
            BSONObj key;

            if (tuples.size() == 1) {
                // 1 obj, just use it
                key = tuples[0];
                BSONObjBuilder b(key.objsize());
                BSONObjIterator it(key);
                b.appendAs( it.next() , "_id" );
                b.appendAs( it.next() , "value" );
                res = b.obj();
            }
            else {
                // need to reduce
                int endSizeEstimate = 16;
                _reduce( tuples , key , endSizeEstimate );
                BSONObjBuilder b(endSizeEstimate);
                b.appendAs( key.firstElement() , "_id" );
                _func.scope()->append( b , "value" , "return" );
                res = b.obj();
            }

            if ( finalizer ) {
                res = finalizer->finalize( res );
            }

            return res;
        }
示例#2
0
        /**
         * Reduces a list of tuple objects (key, value) to a single tuple {"0": key, "1": value}
         */
        BSONObj JSReducer::reduce( const BSONList& tuples ) {
            if (tuples.size() <= 1)
                return tuples[0];
            BSONObj key;
            int endSizeEstimate = 16;
            _reduce( tuples , key , endSizeEstimate );

            BSONObjBuilder b(endSizeEstimate);
            b.appendAs( key.firstElement() , "0" );
            _func.scope()->append( b , "1" , "return" );
            return b.obj();
        }
示例#3
0
        /**
         * actually applies a reduce, to a list of tuples (key, value).
         * After the call, tuples will hold a single tuple {"0": key, "1": value}
         */
        void JSReducer::_reduce( const BSONList& tuples , BSONObj& key , int& endSizeEstimate ) {
            uassert( 10074 ,  "need values" , tuples.size() );

            int sizeEstimate = ( tuples.size() * tuples.begin()->getField( "value" ).size() ) + 128;

            // need to build the reduce args: ( key, [values] )
            BSONObjBuilder reduceArgs( sizeEstimate );
            boost::scoped_ptr<BSONArrayBuilder>  valueBuilder;
            int sizeSoFar = 0;
            unsigned n = 0;
            for ( ; n<tuples.size(); n++ ) {
                BSONObjIterator j(tuples[n]);
                BSONElement keyE = j.next();
                if ( n == 0 ) {
                    reduceArgs.append( keyE );
                    key = keyE.wrap();
                    sizeSoFar = 5 + keyE.size();
                    valueBuilder.reset(new BSONArrayBuilder( reduceArgs.subarrayStart( "tuples" ) ));
                }

                BSONElement ee = j.next();

                uassert( 13070 , "value too large to reduce" , ee.size() < ( BSONObjMaxUserSize / 2 ) );

                if ( sizeSoFar + ee.size() > BSONObjMaxUserSize ) {
                    assert( n > 1 ); // if not, inf. loop
                    break;
                }

                valueBuilder->append( ee );
                sizeSoFar += ee.size();
            }
            assert(valueBuilder);
            valueBuilder->done();
            BSONObj args = reduceArgs.obj();

            Scope * s = _func.scope();

            s->invokeSafe( _func.func() , args );

            if ( s->type( "return" ) == Array ) {
                uasserted( 10075 , "reduce -> multiple not supported yet");
                return;
            }

            endSizeEstimate = key.objsize() + ( args.objsize() / tuples.size() );

            if ( n == tuples.size() )
                return;

            // the input list was too large, add the rest of elmts to new tuples and reduce again
            // note: would be better to use loop instead of recursion to avoid stack overflow
            BSONList x;
            for ( ; n < tuples.size(); n++ ) {
                x.push_back( tuples[n] );
            }
            BSONObjBuilder temp( endSizeEstimate );
            temp.append( key.firstElement() );
            s->append( temp , "1" , "return" );
            x.push_back( temp.obj() );
            _reduce( x , key , endSizeEstimate );
        }
示例#4
0
文件: mr.cpp 项目: jayridge/mongo
            bool run(const char *ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){
                string dbname = cc().database()->name; // this has to come before dbtemprelease
                dbtemprelease temprelease; // we don't touch the db directly

                string shardedOutputCollection = cmdObj["shardedOutputCollection"].valuestrsafe();

                MRSetup mr( dbname , cmdObj.firstElement().embeddedObjectUserCheck() , false );
                
                set<ServerAndQuery> servers;
                
                BSONObjBuilder shardCounts;
                map<string,long long> counts;
                
                BSONObj shards = cmdObj["shards"].embeddedObjectUserCheck();
                vector< auto_ptr<DBClientCursor> > shardCursors;
                BSONObjIterator i( shards );
                while ( i.more() ){
                    BSONElement e = i.next();
                    string shard = e.fieldName();

                    BSONObj res = e.embeddedObjectUserCheck();
                    
                    uassert( 10078 ,  "something bad happened" , shardedOutputCollection == res["result"].valuestrsafe() );
                    servers.insert( shard );
                    shardCounts.appendAs( res["counts"] , shard.c_str() );

                    BSONObjIterator j( res["counts"].embeddedObjectUserCheck() );
                    while ( j.more() ){
                        BSONElement temp = j.next();
                        counts[temp.fieldName()] += temp.numberLong();
                    }

                }

                BSONObj sortKey = BSON( "_id" << 1 );

                ParallelSortClusteredCursor cursor( servers , dbname + "." + shardedOutputCollection ,
                                                    Query().sort( sortKey ) );
                
                
                auto_ptr<Scope> s = globalScriptEngine->getPooledScope( ns );
                ScriptingFunction reduceFunction = s->createFunction( mr.reduceCode.c_str() );
                ScriptingFunction finalizeFunction = 0;
                if ( mr.finalizeCode.size() )
                    finalizeFunction = s->createFunction( mr.finalizeCode.c_str() );

                BSONList values;

                result.append( "result" , mr.finalShort );

                DBDirectClient db;
                
                while ( cursor.more() ){
                    BSONObj t = cursor.next().getOwned();
                                        
                    if ( values.size() == 0 ){
                        values.push_back( t );
                        continue;
                    }
                    
                    if ( t.woSortOrder( *(values.begin()) , sortKey ) == 0 ){
                        values.push_back( t );
                        continue;
                    }
                    

                    db.insert( mr.tempLong , reduceValues( values , s.get() , reduceFunction , 1 , finalizeFunction ) );
                    values.clear();
                    values.push_back( t );
                }
                
                if ( values.size() )
                    db.insert( mr.tempLong , reduceValues( values , s.get() , reduceFunction , 1 , finalizeFunction ) );
                
                long long finalCount = mr.renameIfNeeded( db );
                log(0) << " mapreducefinishcommand " << mr.finalLong << " " << finalCount << endl;

                for ( set<ServerAndQuery>::iterator i=servers.begin(); i!=servers.end(); i++ ){
                    ScopedDbConnection conn( i->_server );
                    conn->dropCollection( dbname + "." + shardedOutputCollection );
                }
                
                result.append( "shardCounts" , shardCounts.obj() );
                
                {
                    BSONObjBuilder c;
                    for ( map<string,long long>::iterator i=counts.begin(); i!=counts.end(); i++ ){
                        c.append( i->first , i->second );
                    }
                    result.append( "counts" , c.obj() );
                }

                return 1;
            }
示例#5
0
文件: mr.cpp 项目: jayridge/mongo
            bool run(const char *dbname, BSONObj& cmd, string& errmsg, BSONObjBuilder& result, bool fromRepl ){
                Timer t;
                Client::GodScope cg;
                Client& client = cc();
                CurOp * op = client.curop();

                MRSetup mr( nsToDatabase( dbname ) , cmd );

                log(1) << "mr ns: " << mr.ns << endl;
                
                if ( ! db.exists( mr.ns ) ){
                    errmsg = "ns doesn't exist";
                    return false;
                }
                
                bool shouldHaveData = false;
                
                long long num = 0;
                long long inReduce = 0;
                
                BSONObjBuilder countsBuilder;
                BSONObjBuilder timingBuilder;
                try {
                    
                    MRState state( mr );
                    state.scope->injectNative( "emit" , fast_emit );
                    
                    MRTL * mrtl = new MRTL( state );
                    _tlmr.reset( mrtl );

                    ProgressMeter & pm = op->setMessage( "m/r: (1/3) emit phase" , db.count( mr.ns , mr.filter ) );
                    long long mapTime = 0;
                    {
                        readlock lock( mr.ns );
                        Client::Context ctx( mr.ns );
                        
                        auto_ptr<Cursor> temp = QueryPlanSet(mr.ns.c_str() , mr.filter , BSONObj() ).getBestGuess()->newCursor();
                        auto_ptr<ClientCursor> cursor( new ClientCursor( QueryOption_NoCursorTimeout , temp , mr.ns.c_str() ) );

                        if ( ! mr.filter.isEmpty() )
                            cursor->matcher.reset( new CoveredIndexMatcher( mr.filter , cursor->indexKeyPattern() ) );
                        
                        Timer mt;
                        while ( cursor->ok() ){
                            
                            if ( ! cursor->currentMatches() ){
                                cursor->advance();
                                continue;
                            }
                            
                            BSONObj o = cursor->current(); 
                            cursor->advance();
                            
                            if ( mr.verbose ) mt.reset();
                            
                            state.scope->setThis( &o );
                            if ( state.scope->invoke( state.map , state.setup.mapparams , 0 , true ) )
                                throw UserException( 9014, (string)"map invoke failed: " + state.scope->getError() );
                            
                            if ( mr.verbose ) mapTime += mt.micros();
                            
                            num++;
                            if ( num % 100 == 0 ){
                                ClientCursor::YieldLock yield = cursor->yieldHold();
                                Timer t;
                                mrtl->checkSize();
                                inReduce += t.micros();
                                
                                if ( ! yield.stillOk() ){
                                    cursor.release();
                                    break;
                                }

                                killCurrentOp.checkForInterrupt();
                            }
                            pm.hit();
                            
                            if ( mr.limit && num >= mr.limit )
                                break;
                        }
                    }
                    pm.finished();
                    
                    killCurrentOp.checkForInterrupt();

                    countsBuilder.appendNumber( "input" , num );
                    countsBuilder.appendNumber( "emit" , mrtl->numEmits );
                    if ( mrtl->numEmits )
                        shouldHaveData = true;
                    
                    timingBuilder.append( "mapTime" , mapTime / 1000 );
                    timingBuilder.append( "emitLoop" , t.millis() );
                    
                    // final reduce
                    op->setMessage( "m/r: (2/3) final reduce in memory" );
                    mrtl->reduceInMemory();
                    mrtl->dump();
                    
                    BSONObj sortKey = BSON( "0" << 1 );
                    db.ensureIndex( mr.incLong , sortKey );
                    
                    {
                        writelock lock( mr.tempLong.c_str() );
                        Client::Context ctx( mr.tempLong.c_str() );
                        assert( userCreateNS( mr.tempLong.c_str() , BSONObj() , errmsg , mr.replicate ) );
                    }


                    {
                        readlock rl(mr.incLong.c_str());
                        Client::Context ctx( mr.incLong );
                        
                        BSONObj prev;
                        BSONList all;
                        
                        pm = op->setMessage( "m/r: (3/3) final reduce to collection" , db.count( mr.incLong ) );

                        auto_ptr<Cursor> temp = QueryPlanSet(mr.incLong.c_str() , BSONObj() , sortKey ).getBestGuess()->newCursor();
                        auto_ptr<ClientCursor> cursor( new ClientCursor( QueryOption_NoCursorTimeout , temp , mr.incLong.c_str() ) );
                        
                        while ( cursor->ok() ){
                            BSONObj o = cursor->current().getOwned();
                            cursor->advance();
                            
                            pm.hit();
                            
                            if ( o.woSortOrder( prev , sortKey ) == 0 ){
                                all.push_back( o );
                                if ( pm.hits() % 1000 == 0 ){
                                    if ( ! cursor->yield() ){
                                        cursor.release();
                                        break;
                                    } 
                                    killCurrentOp.checkForInterrupt();
                                }
                                continue;
                            }
                        
                            ClientCursor::YieldLock yield = cursor->yieldHold();
                                
                            state.finalReduce( all );
                            
                            all.clear();
                            prev = o;
                            all.push_back( o );

                            if ( ! yield.stillOk() ){
                                cursor.release();
                                break;
                            }
                            
                            killCurrentOp.checkForInterrupt();
                        }

                        {
                            dbtemprelease tl;
                            state.finalReduce( all );
                        }

                        pm.finished();
                    }

                    _tlmr.reset( 0 );
                }
                catch ( ... ){
                    log() << "mr failed, removing collection" << endl;
                    db.dropCollection( mr.tempLong );
                    db.dropCollection( mr.incLong );
                    throw;
                }
                
                long long finalCount = 0;
                {
                    dblock lock;
                    db.dropCollection( mr.incLong );
                
                    finalCount = mr.renameIfNeeded( db );
                }

                timingBuilder.append( "total" , t.millis() );
                
                result.append( "result" , mr.finalShort );
                result.append( "timeMillis" , t.millis() );
                countsBuilder.appendNumber( "output" , finalCount );
                if ( mr.verbose ) result.append( "timing" , timingBuilder.obj() );
                result.append( "counts" , countsBuilder.obj() );

                if ( finalCount == 0 && shouldHaveData ){
                    result.append( "cmd" , cmd );
                    errmsg = "there were emits but no data!";
                    return false;
                }

                return true;
            }