bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl ){ string ns = dbname + '.' + cmdObj.firstElement().valuestr(); string key = cmdObj["key"].valuestrsafe(); BSONObj keyPattern = BSON( key << 1 ); BSONObj query = getQuery( cmdObj ); BSONElementSet values; shared_ptr<Cursor> cursor = bestGuessCursor(ns.c_str() , query , BSONObj() ); scoped_ptr<ClientCursor> cc (new ClientCursor(QueryOption_NoCursorTimeout, cursor, ns)); while ( cursor->ok() ){ if ( !cursor->matcher() || cursor->matcher()->matchesCurrent( cursor.get() ) ){ BSONObj o = cursor->current(); o.getFieldsDotted( key, values ); } cursor->advance(); if (!cc->yieldSometimes()) break; RARELY killCurrentOp.checkForInterrupt(); } BSONArrayBuilder b( result.subarrayStart( "values" ) ); for ( BSONElementSet::iterator i = values.begin() ; i != values.end(); i++ ){ b.append( *i ); } BSONObj arr = b.done(); uassert(10044, "distinct too big, 4mb cap", arr.objsize() < BSONObjMaxUserSize ); return true; }
bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { Timer t; string ns = dbname + '.' + cmdObj.firstElement().valuestr(); string key = cmdObj["key"].valuestrsafe(); BSONObj keyPattern = BSON( key << 1 ); BSONObj query = getQuery( cmdObj ); int bufSize = BSONObjMaxUserSize - 4096; BufBuilder bb( bufSize ); char * start = bb.buf(); BSONArrayBuilder arr( bb ); BSONElementSet values; long long nscanned = 0; // locations looked at long long nscannedObjects = 0; // full objects looked at long long n = 0; // matches MatchDetails md; NamespaceDetails * d = nsdetails( ns.c_str() ); if ( ! d ) { result.appendArray( "values" , BSONObj() ); result.append( "stats" , BSON( "n" << 0 << "nscanned" << 0 << "nscannedObjects" << 0 ) ); return true; } shared_ptr<Cursor> cursor; if ( ! query.isEmpty() ) { cursor = NamespaceDetailsTransient::getCursor(ns.c_str() , query , BSONObj() ); } else { // query is empty, so lets see if we can find an index // with the key so we don't have to hit the raw data NamespaceDetails::IndexIterator ii = d->ii(); while ( ii.more() ) { IndexDetails& idx = ii.next(); if ( d->isMultikey( ii.pos() - 1 ) ) continue; if ( idx.inKeyPattern( key ) ) { cursor = bestGuessCursor( ns.c_str() , BSONObj() , idx.keyPattern() ); if( cursor.get() ) break; } } if ( ! cursor.get() ) cursor = NamespaceDetailsTransient::getCursor(ns.c_str() , query , BSONObj() ); } assert( cursor ); string cursorName = cursor->toString(); auto_ptr<ClientCursor> cc (new ClientCursor(QueryOption_NoCursorTimeout, cursor, ns)); while ( cursor->ok() ) { nscanned++; bool loadedObject = false; if ( ( !cursor->matcher() || cursor->matcher()->matchesCurrent( cursor.get() , &md ) ) && !cursor->getsetdup( cursor->currLoc() ) ) { n++; BSONElementSet temp; loadedObject = ! cc->getFieldsDotted( key , temp ); for ( BSONElementSet::iterator i=temp.begin(); i!=temp.end(); ++i ) { BSONElement e = *i; if ( values.count( e ) ) continue; int now = bb.len(); uassert(10044, "distinct too big, 16mb cap", ( now + e.size() + 1024 ) < bufSize ); arr.append( e ); BSONElement x( start + now ); values.insert( x ); } } if ( loadedObject || md._loadedObject ) nscannedObjects++; cursor->advance(); if (!cc->yieldSometimes( ClientCursor::MaybeCovered )) { cc.release(); break; } RARELY killCurrentOp.checkForInterrupt(); } assert( start == bb.buf() ); result.appendArray( "values" , arr.done() ); { BSONObjBuilder b; b.appendNumber( "n" , n ); b.appendNumber( "nscanned" , nscanned ); b.appendNumber( "nscannedObjects" , nscannedObjects ); b.appendNumber( "timems" , t.millis() ); b.append( "cursor" , cursorName ); result.append( "stats" , b.obj() ); } return true; }
bool group( string realdbname , const string& ns , const BSONObj& query , BSONObj keyPattern , string keyFunctionCode , string reduceCode , const char * reduceScope , BSONObj initial , string finalize , string& errmsg , BSONObjBuilder& result ) { auto_ptr<Scope> s = globalScriptEngine->getPooledScope( realdbname ); s->localConnect( realdbname.c_str() ); if ( reduceScope ) s->init( reduceScope ); s->setObject( "$initial" , initial , true ); s->exec( "$reduce = " + reduceCode , "reduce setup" , false , true , true , 100 ); s->exec( "$arr = [];" , "reduce setup 2" , false , true , true , 100 ); ScriptingFunction f = s->createFunction( "function(){ " " if ( $arr[n] == null ){ " " next = {}; " " Object.extend( next , $key ); " " Object.extend( next , $initial , true ); " " $arr[n] = next; " " next = null; " " } " " $reduce( obj , $arr[n] ); " "}" ); ScriptingFunction keyFunction = 0; if ( keyFunctionCode.size() ) { keyFunction = s->createFunction( keyFunctionCode.c_str() ); } double keysize = keyPattern.objsize() * 3; double keynum = 1; map<BSONObj,int,BSONObjCmp> map; list<BSONObj> blah; shared_ptr<Cursor> cursor = bestGuessCursor(ns.c_str() , query , BSONObj() ); while ( cursor->ok() ) { if ( cursor->matcher() && ! cursor->matcher()->matchesCurrent( cursor.get() ) ) { cursor->advance(); continue; } BSONObj obj = cursor->current(); cursor->advance(); BSONObj key = getKey( obj , keyPattern , keyFunction , keysize / keynum , s.get() ); keysize += key.objsize(); keynum++; int& n = map[key]; if ( n == 0 ) { n = map.size(); s->setObject( "$key" , key , true ); uassert( 10043 , "group() can't handle more than 20000 unique keys" , n <= 20000 ); } s->setObject( "obj" , obj , true ); s->setNumber( "n" , n - 1 ); if ( s->invoke( f , BSONObj() , 0 , true ) ) { throw UserException( 9010 , (string)"reduce invoke failed: " + s->getError() ); } } if (!finalize.empty()) { s->exec( "$finalize = " + finalize , "finalize define" , false , true , true , 100 ); ScriptingFunction g = s->createFunction( "function(){ " " for(var i=0; i < $arr.length; i++){ " " var ret = $finalize($arr[i]); " " if (ret !== undefined) " " $arr[i] = ret; " " } " "}" ); s->invoke( g , BSONObj() , 0 , true ); } result.appendArray( "retval" , s->getObject( "$arr" ) ); result.append( "count" , keynum - 1 ); result.append( "keys" , (int)(map.size()) ); s->exec( "$arr = [];" , "reduce setup 2" , false , true , true , 100 ); s->gc(); return true; }
bool run(const string& dbname , BSONObj& cmd, string& errmsg, BSONObjBuilder& result, bool fromRepl ){ Timer t; Client::GodScope cg; Client& client = cc(); CurOp * op = client.curop(); MRSetup mr( dbname , cmd ); log(1) << "mr ns: " << mr.ns << endl; if ( ! db.exists( mr.ns ) ){ errmsg = "ns doesn't exist"; return false; } bool shouldHaveData = false; long long num = 0; long long inReduce = 0; BSONObjBuilder countsBuilder; BSONObjBuilder timingBuilder; try { MRState state( mr ); state.scope->injectNative( "emit" , fast_emit ); MRTL * mrtl = new MRTL( state ); _tlmr.reset( mrtl ); ProgressMeterHolder pm( op->setMessage( "m/r: (1/3) emit phase" , db.count( mr.ns , mr.filter ) ) ); long long mapTime = 0; { readlock lock( mr.ns ); Client::Context ctx( mr.ns ); shared_ptr<Cursor> temp = bestGuessCursor( mr.ns.c_str(), mr.filter, mr.sort ); auto_ptr<ClientCursor> cursor( new ClientCursor( QueryOption_NoCursorTimeout , temp , mr.ns.c_str() ) ); Timer mt; while ( cursor->ok() ){ if ( ! cursor->currentMatches() ){ cursor->advance(); continue; } BSONObj o = cursor->current(); cursor->advance(); if ( mr.verbose ) mt.reset(); state.scope->setThis( &o ); if ( state.scope->invoke( state.map , state.setup.mapparams , 0 , true ) ) throw UserException( 9014, (string)"map invoke failed: " + state.scope->getError() ); if ( mr.verbose ) mapTime += mt.micros(); num++; if ( num % 100 == 0 ){ ClientCursor::YieldLock yield (cursor.get()); Timer t; mrtl->checkSize(); inReduce += t.micros(); if ( ! yield.stillOk() ){ cursor.release(); break; } killCurrentOp.checkForInterrupt(); } pm.hit(); if ( mr.limit && num >= mr.limit ) break; } } pm.finished(); killCurrentOp.checkForInterrupt(); countsBuilder.appendNumber( "input" , num ); countsBuilder.appendNumber( "emit" , mrtl->numEmits ); if ( mrtl->numEmits ) shouldHaveData = true; timingBuilder.append( "mapTime" , mapTime / 1000 ); timingBuilder.append( "emitLoop" , t.millis() ); // final reduce op->setMessage( "m/r: (2/3) final reduce in memory" ); mrtl->reduceInMemory(); mrtl->dump(); BSONObj sortKey = BSON( "0" << 1 ); db.ensureIndex( mr.incLong , sortKey ); { writelock lock( mr.tempLong.c_str() ); Client::Context ctx( mr.tempLong.c_str() ); assert( userCreateNS( mr.tempLong.c_str() , BSONObj() , errmsg , mr.replicate ) ); } { readlock rl(mr.incLong.c_str()); Client::Context ctx( mr.incLong ); BSONObj prev; BSONList all; assert( pm == op->setMessage( "m/r: (3/3) final reduce to collection" , db.count( mr.incLong ) ) ); shared_ptr<Cursor> temp = bestGuessCursor( mr.incLong.c_str() , BSONObj() , sortKey ); auto_ptr<ClientCursor> cursor( new ClientCursor( QueryOption_NoCursorTimeout , temp , mr.incLong.c_str() ) ); while ( cursor->ok() ){ BSONObj o = cursor->current().getOwned(); cursor->advance(); pm.hit(); if ( o.woSortOrder( prev , sortKey ) == 0 ){ all.push_back( o ); if ( pm->hits() % 1000 == 0 ){ if ( ! cursor->yield() ){ cursor.release(); break; } killCurrentOp.checkForInterrupt(); } continue; } ClientCursor::YieldLock yield (cursor.get()); state.finalReduce( all ); all.clear(); prev = o; all.push_back( o ); if ( ! yield.stillOk() ){ cursor.release(); break; } killCurrentOp.checkForInterrupt(); } { dbtempreleasecond tl; if ( ! tl.unlocked() ) log( LL_WARNING ) << "map/reduce can't temp release" << endl; state.finalReduce( all ); } pm.finished(); } _tlmr.reset( 0 ); } catch ( ... ){ log() << "mr failed, removing collection" << endl; db.dropCollection( mr.tempLong ); db.dropCollection( mr.incLong ); throw; } long long finalCount = 0; { dblock lock; db.dropCollection( mr.incLong ); finalCount = mr.renameIfNeeded( db ); } timingBuilder.append( "total" , t.millis() ); result.append( "result" , mr.finalShort ); result.append( "timeMillis" , t.millis() ); countsBuilder.appendNumber( "output" , finalCount ); if ( mr.verbose ) result.append( "timing" , timingBuilder.obj() ); result.append( "counts" , countsBuilder.obj() ); if ( finalCount == 0 && shouldHaveData ){ result.append( "cmd" , cmd ); errmsg = "there were emits but no data!"; return false; } return true; }