Beispiel #1
0
        /**
         * actually applies a reduce, to a list of tuples (key, value).
         * After the call, tuples will hold a single tuple {"0": key, "1": value}
         */
        void JSReducer::_reduce( const BSONList& tuples , BSONObj& key , int& endSizeEstimate ) {
            int sizeEstimate = ( tuples.size() * tuples.begin()->getField( "value" ).size() ) + 128;

            // need to build the reduce args: ( key, [values] )
            BSONObjBuilder reduceArgs( sizeEstimate );
            boost::scoped_ptr<BSONArrayBuilder>  valueBuilder;
            int sizeSoFar = 0;
            unsigned n = 0;
            for ( ; n<tuples.size(); n++ ) {
                BSONObjIterator j(tuples[n]);
                BSONElement keyE = j.next();
                if ( n == 0 ) {
                    reduceArgs.append( keyE );
                    key = keyE.wrap();
                    sizeSoFar = 5 + keyE.size();
                    valueBuilder.reset(new BSONArrayBuilder( reduceArgs.subarrayStart( "tuples" ) ));
                }

                BSONElement ee = j.next();

                uassert( 14837 , "value too large to reduce" , ee.size() < ( BSONObjMaxUserSize / 2 ) );

                if ( sizeSoFar + ee.size() > BSONObjMaxUserSize ) {
                    assert( n > 1 ); // if not, inf. loop
                    break;
                }

                valueBuilder->append( ee );
                sizeSoFar += ee.size();
            }
            assert(valueBuilder);
            valueBuilder->done();
            BSONObj args = reduceArgs.obj();

            Scope * s = _func.scope();

            s->invokeSafe( _func.func() , &args, 0, 0, false, true, true );
            ++numReduces;

            if ( s->type( "return" ) == Array ) {
                uasserted( 14838 , "reduce -> multiple not supported yet");
                return;
            }

            endSizeEstimate = key.objsize() + ( args.objsize() / tuples.size() );

            if ( n == tuples.size() )
                return;

            // the input list was too large, add the rest of elmts to new tuples and reduce again
            // note: would be better to use loop instead of recursion to avoid stack overflow
            BSONList x;
            for ( ; n < tuples.size(); n++ ) {
                x.push_back( tuples[n] );
            }
            BSONObjBuilder temp( endSizeEstimate );
            temp.append( key.firstElement() );
            s->append( temp , "1" , "return" );
            x.push_back( temp.obj() );
            _reduce( x , key , endSizeEstimate );
        }
Beispiel #2
0
    bool run(const char *ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
        string dbname = cc().database()->name; // this has to come before dbtemprelease
        dbtemprelease temprelease; // we don't touch the db directly

        string shardedOutputCollection = cmdObj["shardedOutputCollection"].valuestrsafe();

        MRSetup mr( dbname , cmdObj.firstElement().embeddedObjectUserCheck() , false );

        set<ServerAndQuery> servers;

        BSONObjBuilder shardCounts;
        map<string,long long> counts;

        BSONObj shards = cmdObj["shards"].embeddedObjectUserCheck();
        vector< auto_ptr<DBClientCursor> > shardCursors;
        BSONObjIterator i( shards );
        while ( i.more() ) {
            BSONElement e = i.next();
            string shard = e.fieldName();

            BSONObj res = e.embeddedObjectUserCheck();

            uassert( 10078 ,  "something bad happened" , shardedOutputCollection == res["result"].valuestrsafe() );
            servers.insert( shard );
            shardCounts.appendAs( res["counts"] , shard.c_str() );

            BSONObjIterator j( res["counts"].embeddedObjectUserCheck() );
            while ( j.more() ) {
                BSONElement temp = j.next();
                counts[temp.fieldName()] += temp.numberLong();
            }

        }

        BSONObj sortKey = BSON( "_id" << 1 );

        ParallelSortClusteredCursor cursor( servers , dbname + "." + shardedOutputCollection ,
                                            Query().sort( sortKey ) );


        auto_ptr<Scope> s = globalScriptEngine->getPooledScope( ns );
        ScriptingFunction reduceFunction = s->createFunction( mr.reduceCode.c_str() );
        ScriptingFunction finalizeFunction = 0;
        if ( mr.finalizeCode.size() )
            finalizeFunction = s->createFunction( mr.finalizeCode.c_str() );

        BSONList values;

        result.append( "result" , mr.finalShort );

        DBDirectClient db;

        while ( cursor.more() ) {
            BSONObj t = cursor.next().getOwned();

            if ( values.size() == 0 ) {
                values.push_back( t );
                continue;
            }

            if ( t.woSortOrder( *(values.begin()) , sortKey ) == 0 ) {
                values.push_back( t );
                continue;
            }


            db.insert( mr.tempLong , reduceValues( values , s.get() , reduceFunction , 1 , finalizeFunction ) );
            values.clear();
            values.push_back( t );
        }

        if ( values.size() )
            db.insert( mr.tempLong , reduceValues( values , s.get() , reduceFunction , 1 , finalizeFunction ) );

        long long finalCount = mr.renameIfNeeded( db );
        log(0) << " mapreducefinishcommand " << mr.finalLong << " " << finalCount << endl;

        for ( set<ServerAndQuery>::iterator i=servers.begin(); i!=servers.end(); i++ ) {
            ScopedDbConnection conn( i->_server );
            conn->dropCollection( dbname + "." + shardedOutputCollection );
        }

        result.append( "shardCounts" , shardCounts.obj() );

        {
            BSONObjBuilder c;
            for ( map<string,long long>::iterator i=counts.begin(); i!=counts.end(); i++ ) {
                c.append( i->first , i->second );
            }
            result.append( "counts" , c.obj() );
        }

        return 1;
    }
Beispiel #3
0
Datei: mr.cpp Projekt: pdex/mongo
        void JSReducer::_reduce( const BSONList& tuples , BSONObj& key , int& endSizeEstimate ) {
            uassert( 10074 ,  "need values" , tuples.size() );

            int sizeEstimate = ( tuples.size() * tuples.begin()->getField( "value" ).size() ) + 128;

            BSONObjBuilder reduceArgs( sizeEstimate );
            boost::scoped_ptr<BSONArrayBuilder>  valueBuilder;

            int sizeSoFar = 0;
            unsigned n = 0;
            for ( ; n<tuples.size(); n++ ) {
                BSONObjIterator j(tuples[n]);
                BSONElement keyE = j.next();
                if ( n == 0 ) {
                    reduceArgs.append( keyE );
                    key = keyE.wrap();
                    sizeSoFar = 5 + keyE.size();
                    valueBuilder.reset(new BSONArrayBuilder( reduceArgs.subarrayStart( "tuples" ) ));
                }

                BSONElement ee = j.next();

                uassert( 13070 , "value to large to reduce" , ee.size() < ( BSONObjMaxUserSize / 2 ) );

                if ( sizeSoFar + ee.size() > BSONObjMaxUserSize ) {
                    assert( n > 1 ); // if not, inf. loop
                    break;
                }

                valueBuilder->append( ee );
                sizeSoFar += ee.size();
            }
            assert(valueBuilder);
            valueBuilder->done();
            BSONObj args = reduceArgs.obj();

            Scope * s = _func.scope();

            s->invokeSafe( _func.func() , args );

            if ( s->type( "return" ) == Array ) {
                uasserted( 10075 , "reduce -> multiple not supported yet");
                return;
            }

            endSizeEstimate = key.objsize() + ( args.objsize() / tuples.size() );

            if ( n == tuples.size() )
                return;

            // the input list was too large

            BSONList x;
            for ( ; n < tuples.size(); n++ ) {
                x.push_back( tuples[n] );
            }
            BSONObjBuilder temp( endSizeEstimate );
            temp.append( key.firstElement() );
            s->append( temp , "1" , "return" );
            x.push_back( temp.obj() );
            _reduce( x , key , endSizeEstimate );
        }
Beispiel #4
0
    bool run(const char *dbname, BSONObj& cmd, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
        Timer t;
        Client::GodScope cg;
        Client& client = cc();
        CurOp * op = client.curop();

        MRSetup mr( client.database()->name , cmd );

        log(1) << "mr ns: " << mr.ns << endl;

        if ( ! db.exists( mr.ns ) ) {
            errmsg = "ns doesn't exist";
            return false;
        }

        bool shouldHaveData = false;

        long long num = 0;
        long long inReduce = 0;

        BSONObjBuilder countsBuilder;
        BSONObjBuilder timingBuilder;
        try {

            MRState state( mr );
            state.scope->injectNative( "emit" , fast_emit );

            MRTL * mrtl = new MRTL( state );
            _tlmr.reset( mrtl );

            ProgressMeter & pm = op->setMessage( "m/r: (1/3) emit phase" , db.count( mr.ns , mr.filter ) );
            auto_ptr<DBClientCursor> cursor = db.query( mr.ns , mr.q );
            long long mapTime = 0;
            Timer mt;
            while ( cursor->more() ) {
                BSONObj o = cursor->next();

                if ( mr.verbose ) mt.reset();

                state.scope->setThis( &o );
                if ( state.scope->invoke( state.map , state.setup.mapparams , 0 , true ) )
                    throw UserException( 9014, (string)"map invoke failed: " + state.scope->getError() );

                if ( mr.verbose ) mapTime += mt.micros();

                num++;
                if ( num % 100 == 0 ) {
                    Timer t;
                    mrtl->checkSize();
                    inReduce += t.micros();
                    killCurrentOp.checkForInterrupt();
                    dbtemprelease temprlease;
                }
                pm.hit();

                if ( mr.limit && num >= mr.limit )
                    break;
            }
            pm.finished();

            countsBuilder.appendNumber( "input" , num );
            countsBuilder.appendNumber( "emit" , mrtl->numEmits );
            if ( mrtl->numEmits )
                shouldHaveData = true;

            timingBuilder.append( "mapTime" , mapTime / 1000 );
            timingBuilder.append( "emitLoop" , t.millis() );

            // final reduce
            op->setMessage( "m/r: (2/3) final reduce in memory" );
            mrtl->reduceInMemory();
            mrtl->dump();

            BSONObj sortKey = BSON( "0" << 1 );
            db.ensureIndex( mr.incLong , sortKey );

            BSONObj prev;
            BSONList all;

            assert( userCreateNS( mr.tempLong.c_str() , BSONObj() , errmsg , mr.replicate ) );

            pm = op->setMessage( "m/r: (3/3) final reduce to collection" , db.count( mr.incLong ) );
            cursor = db.query( mr.incLong, Query().sort( sortKey ) );

            while ( cursor->more() ) {
                BSONObj o = cursor->next().getOwned();
                pm.hit();

                if ( o.woSortOrder( prev , sortKey ) == 0 ) {
                    all.push_back( o );
                    if ( pm.hits() % 1000 == 0 ) {
                        dbtemprelease tl;
                    }
                    continue;
                }

                state.finalReduce( all );

                all.clear();
                prev = o;
                all.push_back( o );
                killCurrentOp.checkForInterrupt();
                dbtemprelease tl;
            }
            state.finalReduce( all );
            pm.finished();
            _tlmr.reset( 0 );
        }
        catch ( ... ) {
            log() << "mr failed, removing collection" << endl;
            db.dropCollection( mr.tempLong );
            db.dropCollection( mr.incLong );
            throw;
        }

        db.dropCollection( mr.incLong );

        long long finalCount = mr.renameIfNeeded( db );

        timingBuilder.append( "total" , t.millis() );

        result.append( "result" , mr.finalShort );
        result.append( "timeMillis" , t.millis() );
        countsBuilder.appendNumber( "output" , finalCount );
        if ( mr.verbose ) result.append( "timing" , timingBuilder.obj() );
        result.append( "counts" , countsBuilder.obj() );

        if ( finalCount == 0 && shouldHaveData ) {
            result.append( "cmd" , cmd );
            errmsg = "there were emits but no data!";
            return false;
        }

        return true;
    }