PlanStage::StageState CollectionScan::work(WorkingSetID* out) { ++_commonStats.works; // Adds the amount of time taken by work() to executionTimeMillis. ScopedTimer timer(&_commonStats.executionTimeMillis); if (_isDead) { return PlanStage::DEAD; } // Do some init if we haven't already. if (NULL == _iter) { if ( _params.collection == NULL ) { _isDead = true; return PlanStage::DEAD; } if (_lastSeenLoc.isNull()) { _iter.reset( _params.collection->getIterator( _txn, _params.start, _params.direction ) ); } else { invariant(_params.tailable); _iter.reset( _params.collection->getIterator( _txn, _lastSeenLoc, _params.direction ) ); // Advance _iter past where we were last time. If it returns something else, mark us // as dead since we want to signal an error rather than silently dropping data from // the stream. This is related to the _lastSeenLock handling in invalidate. if (_iter->getNext() != _lastSeenLoc) { _isDead = true; return PlanStage::DEAD; } } ++_commonStats.needTime; return PlanStage::NEED_TIME; } // Should we try getNext() on the underlying _iter? if (isEOF()) return PlanStage::IS_EOF; const DiskLoc curr = _iter->curr(); if (curr.isNull()) { // We just hit EOF if (_params.tailable) _iter.reset(); // pick up where we left off on the next call to work() return PlanStage::IS_EOF; } _lastSeenLoc = curr; // See if the record we're about to access is in memory. If not, pass a fetch request up. // Note that curr() does not touch the record (on MMAPv1 which is the only place we use // NEED_FETCH) so we are able to yield before touching the record, as long as we do so // before calling getNext(). { std::auto_ptr<RecordFetcher> fetcher( _params.collection->documentNeedsFetch(_txn, curr)); if (NULL != fetcher.get()) { WorkingSetMember* member = _workingSet->get(_wsidForFetch); member->loc = curr; // Pass the RecordFetcher off to the WSM. member->setFetcher(fetcher.release()); *out = _wsidForFetch; _commonStats.needFetch++; return NEED_FETCH; } } WorkingSetID id = _workingSet->allocate(); WorkingSetMember* member = _workingSet->get(id); member->loc = curr; member->obj = _iter->dataFor(member->loc).releaseToBson(); member->state = WorkingSetMember::LOC_AND_UNOWNED_OBJ; // Advance the iterator. invariant(_iter->getNext() == curr); return returnIfMatches(member, id, out); }
PlanStage::StageState OrStage::work(WorkingSetID* out) { ++_commonStats.works; if (isEOF()) { return PlanStage::IS_EOF; } if (0 == _specificStats.matchTested.size()) { _specificStats.matchTested = vector<uint64_t>(_children.size(), 0); } WorkingSetID id; StageState childStatus = _children[_currentChild]->work(&id); if (PlanStage::ADVANCED == childStatus) { WorkingSetMember* member = _ws->get(id); verify(member->hasLoc()); // If we're deduping... if (_dedup) { ++_specificStats.dupsTested; // ...and we've seen the DiskLoc before if (_seen.end() != _seen.find(member->loc)) { // ...drop it. ++_specificStats.dupsDropped; _ws->free(id); ++_commonStats.needTime; return PlanStage::NEED_TIME; } else { // Otherwise, note that we've seen it. _seen.insert(member->loc); } } if (NULL == _matcher || _matcher->matches(member)) { if (NULL != _matcher) { ++_specificStats.matchTested[_currentChild]; } // Match! return it. *out = id; ++_commonStats.advanced; return PlanStage::ADVANCED; } else { // Does not match, try again. _ws->free(id); ++_commonStats.needTime; return PlanStage::NEED_TIME; } } else if (PlanStage::IS_EOF == childStatus) { // Done with _currentChild, move to the next one. ++_currentChild; // Maybe we're out of children. if (isEOF()) { return PlanStage::IS_EOF; } else { ++_commonStats.needTime; return PlanStage::NEED_TIME; } } else { if (PlanStage::NEED_FETCH == childStatus) { ++_commonStats.needFetch; } else if (PlanStage::NEED_TIME == childStatus) { ++_commonStats.needTime; } // NEED_TIME, ERROR, NEED_YIELD, pass them up. return childStatus; } }
PlanStageStats* S2NearStage::getStats() { // TODO: must agg stats across child ixscan/fetches. // TODO: we can do better than this, need own common stats. _commonStats.isEOF = isEOF(); return new PlanStageStats(_commonStats, STAGE_GEO_NEAR_2DSPHERE); }
DiskLoc HeapRecordReverseIterator::curr() { if (isEOF()) return DiskLoc(); return _it->first; }
PlanStage::StageState OrStage::work(WorkingSetID* out) { ++_commonStats.works; // Adds the amount of time taken by work() to executionTimeMillis. ScopedTimer timer(&_commonStats.executionTimeMillis); if (isEOF()) { return PlanStage::IS_EOF; } if (0 == _specificStats.matchTested.size()) { _specificStats.matchTested = vector<size_t>(_children.size(), 0); } WorkingSetID id = WorkingSet::INVALID_ID; StageState childStatus = _children[_currentChild]->work(&id); if (PlanStage::ADVANCED == childStatus) { WorkingSetMember* member = _ws->get(id); // If we're deduping (and there's something to dedup by) if (_dedup && member->hasLoc()) { ++_specificStats.dupsTested; // ...and we've seen the DiskLoc before if (_seen.end() != _seen.find(member->loc)) { // ...drop it. ++_specificStats.dupsDropped; _ws->free(id); ++_commonStats.needTime; return PlanStage::NEED_TIME; } else { // Otherwise, note that we've seen it. _seen.insert(member->loc); } } if (Filter::passes(member, _filter)) { if (NULL != _filter) { ++_specificStats.matchTested[_currentChild]; } // Match! return it. *out = id; ++_commonStats.advanced; return PlanStage::ADVANCED; } else { // Does not match, try again. _ws->free(id); ++_commonStats.needTime; return PlanStage::NEED_TIME; } } else if (PlanStage::IS_EOF == childStatus) { // Done with _currentChild, move to the next one. ++_currentChild; // Maybe we're out of children. if (isEOF()) { return PlanStage::IS_EOF; } else { ++_commonStats.needTime; return PlanStage::NEED_TIME; } } else if (PlanStage::FAILURE == childStatus) { *out = id; // If a stage fails, it may create a status WSM to indicate why it // failed, in which case 'id' is valid. If ID is invalid, we // create our own error message. if (WorkingSet::INVALID_ID == id) { mongoutils::str::stream ss; ss << "OR stage failed to read in results from child " << _currentChild; Status status(ErrorCodes::InternalError, ss); *out = WorkingSetCommon::allocateStatusMember( _ws, status); } return childStatus; } else if (PlanStage::NEED_TIME == childStatus) { ++_commonStats.needTime; } else if (PlanStage::NEED_FETCH == childStatus) { ++_commonStats.needFetch; *out = id; } // NEED_TIME, ERROR, NEED_FETCH, pass them up. return childStatus; }
PlanStage::StageState GroupStage::work(WorkingSetID* out) { ++_commonStats.works; ScopedTimer timer(&_commonStats.executionTimeMillis); if (isEOF()) { return PlanStage::IS_EOF; } // Set the completed flag; this stage returns all results in a single call to work. // Subsequent calls will return EOF. _groupCompleted = true; // Initialize the Scope object. const std::string userToken = ClientBasic::getCurrent()->getAuthorizationSession() ->getAuthenticatedUserNamesToken(); auto_ptr<Scope> s = globalScriptEngine->getPooledScope(_txn, _db->name(), "group" + userToken); if (!_request.reduceScope.isEmpty()) { s->init(&_request.reduceScope); } s->setObject("$initial", _request.initial, true); s->exec("$reduce = " + _request.reduceCode, "$group reduce setup", false, true, true, 100); s->exec("$arr = [];", "$group reduce setup 2", false, true, true, 100); ScriptingFunction f = s->createFunction("function(){ " " if ( $arr[n] == null ){ " " next = {}; " " Object.extend( next , $key ); " " Object.extend( next , $initial , true ); " " $arr[n] = next; " " next = null; " " } " " $reduce( obj , $arr[n] ); " "}"); ScriptingFunction keyFunction = 0; if (_request.keyFunctionCode.size()) { keyFunction = s->createFunction(_request.keyFunctionCode.c_str()); } // Construct the set of groups. map<BSONObj, int, BSONObjCmp> map; while (!_child->isEOF()) { WorkingSetID id = WorkingSet::INVALID_ID; StageState status = _child->work(&id); if (PlanStage::IS_EOF == status) { break; } else if (PlanStage::NEED_TIME == status) { continue; } else if (PlanStage::FAILURE == status) { *out = id; // If a stage fails, it may create a status WSM to indicate why it failed, in which // case 'id' is valid. If ID is invalid, we create our own error message. if (WorkingSet::INVALID_ID == id) { const std::string errmsg = "group stage failed to read in results from child"; *out = WorkingSetCommon::allocateStatusMember(_ws, Status(ErrorCodes::InternalError, errmsg)); } return status; } else if (PlanStage::DEAD == status) { return status; } invariant(PlanStage::ADVANCED == status); WorkingSetMember* member = _ws->get(id); // Group queries can't have projections. This means that covering analysis will always // add a fetch. We should always get fetched data, and never just key data. invariant(member->hasObj()); BSONObj obj = member->obj; _ws->free(id); BSONObj key; Status getKeyStatus = getKey(obj, _request.keyPattern, keyFunction, s.get(), &key); if (!getKeyStatus.isOK()) { *out = WorkingSetCommon::allocateStatusMember(_ws, getKeyStatus); return PlanStage::FAILURE; } int& n = map[key]; if (n == 0) { n = map.size(); s->setObject("$key", key, true); if (n > 20000) { const std::string errmsg = "group() can't handle more than 20000 unique keys"; *out = WorkingSetCommon::allocateStatusMember(_ws, Status(ErrorCodes::BadValue, errmsg)); return PlanStage::FAILURE; } } s->setObject("obj", obj, true); s->setNumber("n", n - 1); if (s->invoke(f, 0, 0, 0, true)) { const std::string errmsg = str::stream() << "reduce invoke failed: " << s->getError(); *out = WorkingSetCommon::allocateStatusMember(_ws, Status(ErrorCodes::BadValue, errmsg)); return PlanStage::FAILURE; } } _specificStats.nGroups = map.size(); // Invoke the finalize function. if (!_request.finalize.empty()) { s->exec("$finalize = " + _request.finalize, "$group finalize define", false, true, true, 100); ScriptingFunction g = s->createFunction("function(){ " " for(var i=0; i < $arr.length; i++){ " " var ret = $finalize($arr[i]); " " if (ret !== undefined) " " $arr[i] = ret; " " } " "}"); s->invoke(g, 0, 0, 0, true); } // Return array of results. *out = _ws->allocate(); WorkingSetMember* member = _ws->get(*out); member->obj = s->getObject("$arr").getOwned(); member->state = WorkingSetMember::OWNED_OBJ; s->exec("$arr = [];", "$group reduce setup 2", false, true, true, 100); s->gc(); ++_commonStats.advanced; return PlanStage::ADVANCED; }
PlanStage::StageState TwoDNear::work(WorkingSetID* out) { ++_commonStats.works; // Adds the amount of time taken by work() to executionTimeMillis. ScopedTimer timer(&_commonStats.executionTimeMillis); if (!_initted) { _initted = true; if ( !_params.collection ) return PlanStage::IS_EOF; IndexCatalog* indexCatalog = _params.collection->getIndexCatalog(); IndexDescriptor* desc = indexCatalog->findIndexByKeyPattern(_params.indexKeyPattern); if ( desc == NULL ) return PlanStage::IS_EOF; TwoDAccessMethod* am = static_cast<TwoDAccessMethod*>( indexCatalog->getIndex( desc ) ); auto_ptr<twod_exec::GeoSearch> search; search.reset(new twod_exec::GeoSearch(_params.collection, am, _params.nearQuery.centroid.oldPoint, _params.numWanted, _params.filter, _params.nearQuery.maxDistance, _params.nearQuery.isNearSphere ? twod_exec::GEO_SPHERE : twod_exec::GEO_PLANE)); // This is where all the work is done. :( search->exec(); _specificStats.objectsLoaded = search->_objectsLoaded; _specificStats.nscanned = search->_lookedAt; for (twod_exec::GeoHopper::Holder::iterator it = search->_points.begin(); it != search->_points.end(); it++) { WorkingSetID id = _workingSet->allocate(); WorkingSetMember* member = _workingSet->get(id); member->loc = it->_loc; member->obj = _params.collection->docFor(member->loc); member->state = WorkingSetMember::LOC_AND_UNOWNED_OBJ; if (_params.addDistMeta) { member->addComputed(new GeoDistanceComputedData(it->_distance)); } if (_params.addPointMeta) { member->addComputed(new GeoNearPointComputedData(it->_pt)); } _results.push(Result(id, it->_distance)); _invalidationMap.insert(pair<DiskLoc, WorkingSetID>(it->_loc, id)); } } if (isEOF()) { return PlanStage::IS_EOF; } Result result = _results.top(); _results.pop(); *out = result.id; // Remove from invalidation map. WorkingSetMember* member = _workingSet->get(*out); // The WSM may have been mutated or deleted so it may not have a loc. if (member->hasLoc()) { typedef multimap<DiskLoc, WorkingSetID>::iterator MMIT; pair<MMIT, MMIT> range = _invalidationMap.equal_range(member->loc); for (MMIT it = range.first; it != range.second; ++it) { if (it->second == *out) { _invalidationMap.erase(it); break; } } } ++_commonStats.advanced; return PlanStage::ADVANCED; }
void IndexScan::prepareToYield() { if (isEOF()) { return; } _savedKey = _indexCursor->getKey().getOwned(); _savedLoc = _indexCursor->getValue(); _indexCursor->savePosition(); }
PlanStage::StageState DeleteStage::work(WorkingSetID* out) { ++_commonStats.works; // Adds the amount of time taken by work() to executionTimeMillis. ScopedTimer timer(&_commonStats.executionTimeMillis); if (isEOF()) { return PlanStage::IS_EOF; } invariant(_collection); // If isEOF() returns false, we must have a collection. WorkingSetID id = WorkingSet::INVALID_ID; StageState status = _child->work(&id); if (PlanStage::ADVANCED == status) { WorkingSetMember* member = _ws->get(id); if (!member->hasLoc()) { _ws->free(id); const std::string errmsg = "delete stage failed to read member w/ loc from child"; *out = WorkingSetCommon::allocateStatusMember(_ws, Status(ErrorCodes::InternalError, errmsg)); return PlanStage::FAILURE; } DiskLoc rloc = member->loc; _ws->free(id); BSONObj deletedDoc; WriteUnitOfWork wunit(_txn); // TODO: Do we want to buffer docs and delete them in a group rather than // saving/restoring state repeatedly? saveState(); const bool deleteCappedOK = false; const bool deleteNoWarn = false; _collection->deleteDocument(_txn, rloc, deleteCappedOK, deleteNoWarn, _params.shouldCallLogOp ? &deletedDoc : NULL); restoreState(_txn); ++_specificStats.docsDeleted; if (_params.shouldCallLogOp) { if (deletedDoc.isEmpty()) { log() << "Deleted object without id in collection " << _collection->ns() << ", not logging."; } else { bool replJustOne = true; repl::logOp(_txn, "d", _collection->ns().ns().c_str(), deletedDoc, 0, &replJustOne, _params.fromMigrate); } } wunit.commit(); _txn->recoveryUnit()->commitIfNeeded(); ++_commonStats.needTime; return PlanStage::NEED_TIME; } else if (PlanStage::FAILURE == status) { *out = id; // If a stage fails, it may create a status WSM to indicate why it failed, in which case // 'id' is valid. If ID is invalid, we create our own error message. if (WorkingSet::INVALID_ID == id) { const std::string errmsg = "delete stage failed to read in results from child"; *out = WorkingSetCommon::allocateStatusMember(_ws, Status(ErrorCodes::InternalError, errmsg)); return PlanStage::FAILURE; } return status; } else { if (PlanStage::NEED_TIME == status) { ++_commonStats.needTime; } return status; } }
bool parseForNextToken() { size_t pos = getCurrentPosition(); assert( pos < codestr.size() ); // // Consume whitespace // for ( ; pos < codestr.size() && isWhitespace(codestr[pos]); ++pos ) { // do nothing, just consume space. } // // End of the line? // if ( pos == codestr.size() ) { setLastToken( "", TOK_EOF ); return false; } size_t startPos = pos; // // Search for the next token // TokenType type = TOK_UNKNOWN; bool done = false; bool error = false; for ( ; (!done) && pos < codestr.size() && (!isWhitespace(codestr[pos])) && ((pos == startPos) || !isEndOfToken(codestr[pos]) ); ++pos ) { char c = codestr[pos]; switch ( type ) { case TOK_UNKNOWN: if ( c == '(' ) { type = TOK_OPEN; done = true; } else if ( c == ')' ) { type = TOK_CLOSE; done = true; } else if ( c == ',' ) { type = TOK_SEP; done = true; } else if ( isNumeric( c ) ) { type = TOK_NUMERIC; } else if ( isIdent( c ) ) { type = TOK_IDENT; } else { error = true; } break; case TOK_NUMERIC: if ( isNumeric( c ) == false ) { error = true; } break; case TOK_IDENT: if ( isIdent( c ) == false ) { error = true; } break; default: error = true; } // // Was there an error while parsing the token? // if ( error ) { std::cerr << "Error while parsing token. " << "start=" << startPos << ", " << "pos=" << pos << ", " << "type=" << toString(type) << ", " << "value: " << codestr.substr( startPos, pos - startPos+1 ) << std::endl; return false; } } setLastToken( codestr.substr(startPos, pos - startPos), type ); setCurrentPosition( pos ); return !isEOF(); }
// Gets the next reference section from the file & stores it in the // passed in section. It will read until a new section is found. bool GlfFile::getNextRefSection(GlfRefSection& refSection) { if(myIsOpenForRead == false) { // File is not open for read myStatus.setStatus(GlfStatus::FAIL_ORDER, "Cannot read reference section since the file is not open for reading"); throw(GlfException(myStatus)); return(false); } if(myNextSection == HEADER) { // The header has not yet been read. // TODO - maybe just read the header. myStatus.setStatus(GlfStatus::FAIL_ORDER, "Cannot read reference section since the header has not been read."); throw(GlfException(myStatus)); return(false); } // Keep reading until the next section is found. if(myNextSection == RECORD) { GlfRecord record; while(getNextRecord(record)) { // Nothing to do, with the record. } } // Check for end of file. If end of file, return false. if(isEOF()) { return(false); } if(myNextSection != REF_SECTION) { // Failed reading all the records, so throw exception. myStatus.setStatus(GlfStatus::FAIL_IO, "Failed to get to a reference section."); throw(GlfException(myStatus)); return(false); } // Ready to read the section: if(refSection.read(myFilePtr)) { myStatus = GlfStatus::SUCCESS; // Next a record should be read. myNextSection = RECORD; return(true); } // If it is the EOF, just return false. if(isEOF()) { return(false); } myStatus.setStatus(GlfStatus::UNKNOWN, "Failed reading a reference section from the file."); throw(GlfException(myStatus)); return(false); }
PlanStage::StageState IndexScan::work(WorkingSetID* out) { ++_commonStats.works; if (NULL == _indexCursor.get()) { // First call to work(). Perform cursor init. CursorOptions cursorOptions; // The limit is *required* for 2d $near, which is the only index that pays attention to // it anyway. cursorOptions.numWanted = _params.limit; if (1 == _params.direction) { cursorOptions.direction = CursorOptions::INCREASING; } else { cursorOptions.direction = CursorOptions::DECREASING; } IndexCursor *cursor; _iam->newCursor(&cursor); _indexCursor.reset(cursor); _indexCursor->setOptions(cursorOptions); if (_params.bounds.isSimpleRange) { // Start at one key, end at another. _indexCursor->seek(_params.bounds.startKey); } else { // "Fast" Btree-specific navigation. _btreeCursor = static_cast<BtreeIndexCursor*>(_indexCursor.get()); _checker.reset(new IndexBoundsChecker(&_params.bounds, _descriptor->keyPattern(), _params.direction)); int nFields = _descriptor->keyPattern().nFields(); vector<const BSONElement*> key; vector<bool> inc; key.resize(nFields); inc.resize(nFields); _checker->getStartKey(&key, &inc); _btreeCursor->seek(key, inc); _keyElts.resize(nFields); _keyEltsInc.resize(nFields); } checkEnd(); } else if (_yieldMovedCursor) { _yieldMovedCursor = false; // Note that we're not calling next() here. } else { // You're allowed to call work() even if the stage is EOF, but we can't call // _indexCursor->next() if we're EOF. if (!isEOF()) { _indexCursor->next(); checkEnd(); } } if (isEOF()) { return PlanStage::IS_EOF; } DiskLoc loc = _indexCursor->getValue(); if (_shouldDedup) { ++_specificStats.dupsTested; if (_returned.end() != _returned.find(loc)) { ++_specificStats.dupsDropped; ++_commonStats.needTime; return PlanStage::NEED_TIME; } else { _returned.insert(loc); } } WorkingSetID id = _workingSet->allocate(); WorkingSetMember* member = _workingSet->get(id); member->loc = loc; member->keyData.push_back(IndexKeyDatum(_descriptor->keyPattern(), _indexCursor->getKey().getOwned())); member->state = WorkingSetMember::LOC_AND_IDX; if (Filter::passes(member, _filter)) { if (NULL != _filter) { ++_specificStats.matchTested; } *out = id; ++_commonStats.advanced; return PlanStage::ADVANCED; } _workingSet->free(id); ++_commonStats.needTime; return PlanStage::NEED_TIME; }
PlanStageStats* IndexScan::getStats() { _commonStats.isEOF = isEOF(); auto_ptr<PlanStageStats> ret(new PlanStageStats(_commonStats)); ret->setSpecific<IndexScanStats>(_specificStats); return ret.release(); }
PlanStage::StageState CountStage::work(WorkingSetID* out) { ++_commonStats.works; // Adds the amount of time taken by work() to executionTimeMillis. ScopedTimer timer(&_commonStats.executionTimeMillis); // This stage never returns a working set member. *out = WorkingSet::INVALID_ID; // If we don't have a query and we have a non-NULL collection, then we can execute this // as a trivial count (just ask the collection for how many records it has). if (_request.query.isEmpty() && NULL != _collection) { trivialCount(); return PlanStage::IS_EOF; } if (isEOF()) { _commonStats.isEOF = true; return PlanStage::IS_EOF; } // For non-trivial counts, we should always have a child stage from which we can retrieve // results. invariant(_child.get()); WorkingSetID id = WorkingSet::INVALID_ID; PlanStage::StageState state = _child->work(&id); if (PlanStage::IS_EOF == state) { _commonStats.isEOF = true; return PlanStage::IS_EOF; } else if (PlanStage::DEAD == state) { return state; } else if (PlanStage::FAILURE == state) { *out = id; // If a stage fails, it may create a status WSM to indicate why it failed, in which cas // 'id' is valid. If ID is invalid, we create our own error message. if (WorkingSet::INVALID_ID == id) { const std::string errmsg = "count stage failed to read result from child"; Status status = Status(ErrorCodes::InternalError, errmsg); *out = WorkingSetCommon::allocateStatusMember(_ws, status); } return state; } else if (PlanStage::ADVANCED == state) { // We got a result. If we're still skipping, then decrement the number left to skip. // Otherwise increment the count until we hit the limit. if (_leftToSkip > 0) { _leftToSkip--; _specificStats.nSkipped++; } else { _specificStats.nCounted++; } // Count doesn't need the actual results, so we just discard any valid working // set members that got returned from the child. if (WorkingSet::INVALID_ID != id) { _ws->free(id); } } else if (PlanStage::NEED_FETCH == state) { *out = id; _commonStats.needFetch++; return PlanStage::NEED_FETCH; } _commonStats.needTime++; return PlanStage::NEED_TIME; }
PlanStageStats* TwoD::getStats() { _commonStats.isEOF = isEOF(); return new PlanStageStats(_commonStats, STAGE_GEO_2D); }
PlanStage::StageState IndexScan::work(WorkingSetID* out) { ++_commonStats.works; // Adds the amount of time taken by work() to executionTimeMillis. ScopedTimer timer(&_commonStats.executionTimeMillis); // If we examined multiple keys in a prior work cycle, make up for it here by returning // NEED_TIME. This is done for plan ranking. Refer to the comment for '_checkEndKeys' // in the .h for details. if (_checkEndKeys > 0) { --_checkEndKeys; ++_commonStats.needTime; return PlanStage::NEED_TIME; } if (NULL == _indexCursor.get()) { // First call to work(). Perform possibly heavy init. initIndexScan(); checkEnd(); ++_commonStats.needTime; return PlanStage::NEED_TIME; } else if (_yieldMovedCursor) { _yieldMovedCursor = false; // Note that we're not calling next() here. We got the next thing when we recovered // from yielding. } if (isEOF()) { return PlanStage::IS_EOF; } // Grab the next (key, value) from the index. BSONObj keyObj = _indexCursor->getKey(); DiskLoc loc = _indexCursor->getValue(); // Move to the next result. // The underlying IndexCursor points at the *next* thing we want to return. We do this so // that if we're scanning an index looking for docs to delete we don't continually clobber // the thing we're pointing at. _indexCursor->next(); checkEnd(); if (_shouldDedup) { ++_specificStats.dupsTested; if (_returned.end() != _returned.find(loc)) { ++_specificStats.dupsDropped; ++_commonStats.needTime; return PlanStage::NEED_TIME; } else { _returned.insert(loc); } } if (Filter::passes(keyObj, _keyPattern, _filter)) { if (NULL != _filter) { ++_specificStats.matchTested; } // We must make a copy of the on-disk data since it can mutate during the execution of // this query. BSONObj ownedKeyObj = keyObj.getOwned(); // Fill out the WSM. WorkingSetID id = _workingSet->allocate(); WorkingSetMember* member = _workingSet->get(id); member->loc = loc; member->keyData.push_back(IndexKeyDatum(_keyPattern, ownedKeyObj)); member->state = WorkingSetMember::LOC_AND_IDX; if (_params.addKeyMetadata) { BSONObjBuilder bob; bob.appendKeys(_keyPattern, ownedKeyObj); member->addComputed(new IndexKeyComputedData(bob.obj())); } *out = id; ++_commonStats.advanced; return PlanStage::ADVANCED; } ++_commonStats.needTime; return PlanStage::NEED_TIME; }
PlanStageStats* CollectionScan::getStats() { _commonStats.isEOF = isEOF(); auto_ptr<PlanStageStats> ret(new PlanStageStats(_commonStats, STAGE_COLLSCAN)); ret->specific.reset(new CollectionScanStats(_specificStats)); return ret.release(); }
void IndexScan::checkEnd() { if (isEOF()) { _commonStats.isEOF = true; return; } if (_params.bounds.isSimpleRange) { // "Normal" start -> end scanning. verify(NULL == _btreeCursor); verify(NULL == _checker.get()); // If there is an empty endKey we will scan until we run out of index to scan over. if (_params.bounds.endKey.isEmpty()) { return; } int cmp = sgn(_params.bounds.endKey.woCompare(_indexCursor->getKey(), _keyPattern)); if ((cmp != 0 && cmp != _params.direction) || (cmp == 0 && !_params.bounds.endKeyInclusive)) { _hitEnd = true; _commonStats.isEOF = true; } if (!isEOF() && _params.bounds.isSimpleRange) { ++_specificStats.keysExamined; } } else { verify(NULL != _btreeCursor); verify(NULL != _checker.get()); // Use _checker to see how things are. for (;;) { //cout << "current index key is " << _indexCursor->getKey().toString() << endl; //cout << "keysExamined is " << _specificStats.keysExamined << endl; IndexBoundsChecker::KeyState keyState; keyState = _checker->checkKey(_indexCursor->getKey(), &_keyEltsToUse, &_movePastKeyElts, &_keyElts, &_keyEltsInc); if (IndexBoundsChecker::DONE == keyState) { _hitEnd = true; break; } // This seems weird but it's the old definition of nscanned. ++_specificStats.keysExamined; if (IndexBoundsChecker::VALID == keyState) { break; } //cout << "skipping...\n"; verify(IndexBoundsChecker::MUST_ADVANCE == keyState); _btreeCursor->skip(_indexCursor->getKey(), _keyEltsToUse, _movePastKeyElts, _keyElts, _keyEltsInc); // Must check underlying cursor EOF after every cursor movement. if (_btreeCursor->isEOF()) { _hitEnd = true; break; } ++_checkEndKeys; } } }
PlanStageStats* TextStage::getStats() { _commonStats.isEOF = isEOF(); return new PlanStageStats(_commonStats, STAGE_TEXT); }
void IndexScan::initIndexScan() { // Perform the possibly heavy-duty initialization of the underlying index cursor. if (_params.doNotDedup) { _shouldDedup = false; } else { _shouldDedup = _params.descriptor->isMultikey(); } // We can't always access the descriptor in the call to getStats() so we pull // the status-only information we need out here. _specificStats.indexName = _params.descriptor->infoObj()["name"].String(); _specificStats.isMultiKey = _params.descriptor->isMultikey(); // Set up the index cursor. CursorOptions cursorOptions; if (1 == _params.direction) { cursorOptions.direction = CursorOptions::INCREASING; } else { cursorOptions.direction = CursorOptions::DECREASING; } IndexCursor *cursor; Status s = _iam->newCursor(_txn, cursorOptions, &cursor); verify(s.isOK()); _indexCursor.reset(cursor); if (_params.bounds.isSimpleRange) { // Start at one key, end at another. Status status = _indexCursor->seek(_params.bounds.startKey); if (!status.isOK()) { warning() << "IndexCursor seek failed: " << status.toString(); _hitEnd = true; } if (!isEOF()) { _specificStats.keysExamined = 1; } } else { // "Fast" Btree-specific navigation. _btreeCursor = static_cast<BtreeIndexCursor*>(_indexCursor.get()); _checker.reset(new IndexBoundsChecker(&_params.bounds, _keyPattern, _params.direction)); int nFields = _keyPattern.nFields(); vector<const BSONElement*> key; vector<bool> inc; key.resize(nFields); inc.resize(nFields); if (_checker->getStartKey(&key, &inc)) { _btreeCursor->seek(key, inc); _keyElts.resize(nFields); _keyEltsInc.resize(nFields); } else { _hitEnd = true; } } }
PlanStage::StageState UpdateStage::work(WorkingSetID* out) { ++_commonStats.works; // Adds the amount of time taken by work() to executionTimeMillis. ScopedTimer timer(&_commonStats.executionTimeMillis); if (isEOF()) { return PlanStage::IS_EOF; } if (doneUpdating()) { // Even if we're done updating, we may have some inserting left to do. if (needInsert()) { doInsert(); } // At this point either we're done updating and there was no insert to do, // or we're done updating and we're done inserting. Either way, we're EOF. invariant(isEOF()); return PlanStage::IS_EOF; } // If we're here, then we still have to ask for results from the child and apply // updates to them. We should only get here if the collection exists. invariant(_collection); WorkingSetID id = WorkingSet::INVALID_ID; StageState status = _child->work(&id); if (PlanStage::ADVANCED == status) { // Need to get these things from the result returned by the child. DiskLoc loc; BSONObj oldObj; WorkingSetMember* member = _ws->get(id); if (!member->hasLoc()) { _ws->free(id); const std::string errmsg = "update stage failed to read member w/ loc from child"; *out = WorkingSetCommon::allocateStatusMember(_ws, Status(ErrorCodes::InternalError, errmsg)); return PlanStage::FAILURE; } loc = member->loc; // Updates can't have projections. This means that covering analysis will always add // a fetch. We should always get fetched data, and never just key data. invariant(member->hasObj()); oldObj = member->obj; // If we're here, then we have retrieved both a DiskLoc and the corresponding // unowned object from the child stage. Since we have the object and the diskloc, // we can free the WSM. _ws->free(id); // We fill this with the new locs of moved doc so we don't double-update. if (_updatedLocs && _updatedLocs->count(loc) > 0) { // Found a loc that we already updated. ++_commonStats.needTime; return PlanStage::NEED_TIME; } ++_specificStats.nMatched; // Do the update and return. transformAndUpdate(oldObj, loc); ++_commonStats.needTime; return PlanStage::NEED_TIME; } else if (PlanStage::IS_EOF == status) { // The child is out of results, but we might not be done yet because we still might // have to do an insert. ++_commonStats.needTime; return PlanStage::NEED_TIME; } else if (PlanStage::FAILURE == status) { *out = id; // If a stage fails, it may create a status WSM to indicate why it failed, in which case // 'id' is valid. If ID is invalid, we create our own error message. if (WorkingSet::INVALID_ID == id) { const std::string errmsg = "delete stage failed to read in results from child"; *out = WorkingSetCommon::allocateStatusMember(_ws, Status(ErrorCodes::InternalError, errmsg)); return PlanStage::FAILURE; } return status; } else { if (PlanStage::NEED_TIME == status) { ++_commonStats.needTime; } return status; } }
PlanStageStats* CollectionScan::getStats() { _commonStats.isEOF = isEOF(); return new PlanStageStats(_commonStats); }
PlanStage::StageState TwoD::work(WorkingSetID* out) { if (isEOF()) { return PlanStage::IS_EOF; } if (!_initted) { _initted = true; if ( !_params.collection ) return PlanStage::IS_EOF; IndexCatalog* indexCatalog = _params.collection->getIndexCatalog(); _descriptor = indexCatalog->findIndexByKeyPattern(_params.indexKeyPattern); if ( _descriptor == NULL ) return PlanStage::IS_EOF; _am = static_cast<TwoDAccessMethod*>( indexCatalog->getIndex( _descriptor ) ); verify( _am ); if (NULL != _params.gq.getGeometry()._cap.get()) { _browse.reset(new twod_exec::GeoCircleBrowse(_params, _am)); } else if (NULL != _params.gq.getGeometry()._polygon.get()) { _browse.reset(new twod_exec::GeoPolygonBrowse(_params, _am)); } else { verify(NULL != _params.gq.getGeometry()._box.get()); _browse.reset(new twod_exec::GeoBoxBrowse(_params, _am)); } // Fill out static portion of plan stats. // We will retrieve the geo hashes used by the geo browser // when the search is complete. _specificStats.type = _browse->_type; _specificStats.field = _params.gq.getField(); _specificStats.converterParams = _browse->_converter->getParams(); return PlanStage::NEED_TIME; } verify(NULL != _browse.get()); if (!_browse->ok()) { // Grab geo hashes before disposing geo browser. _specificStats.expPrefixes.swap(_browse->_expPrefixes); _browse.reset(); return PlanStage::IS_EOF; } WorkingSetID id = _workingSet->allocate(); WorkingSetMember* member = _workingSet->get(id); member->loc = _browse->currLoc(); member->obj = _params.collection->docFor(member->loc); member->state = WorkingSetMember::LOC_AND_UNOWNED_OBJ; _browse->advance(); *out = id; _commonStats.advanced++; _commonStats.works++; return PlanStage::ADVANCED; }
PlanStage::StageState SubplanStage::work(WorkingSetID* out) { ++_commonStats.works; // Adds the amount of time taken by work() to executionTimeMillis. ScopedTimer timer(&_commonStats.executionTimeMillis); if (_killed) { return PlanStage::DEAD; } if (isEOF()) { return PlanStage::IS_EOF; } if (SubplanStage::PLANNING == _state) { // Try to run as sub-plans. if (runSubplans()) { // If runSubplans returns true we expect something here. invariant(_child.get()); } else if (!_killed) { // Couldn't run as subplans so we'll just call normal getExecutor. PlanExecutor* exec; Status status = getExecutorAlwaysPlan(_collection, _query, _plannerParams, &exec); if (!status.isOK()) { // We utterly failed. _killed = true; // Propagate the error to the user wrapped in a BSONObj WorkingSetID id = _ws->allocate(); WorkingSetMember* member = _ws->get(id); member->state = WorkingSetMember::OWNED_OBJ; member->keyData.clear(); member->loc = DiskLoc(); BSONObjBuilder bob; bob.append("ok", status.isOK() ? 1.0 : 0.0); bob.append("code", status.code()); bob.append("errmsg", status.reason()); member->obj = bob.obj(); *out = id; return PlanStage::FAILURE; } else { scoped_ptr<PlanExecutor> cleanupExec(exec); _child.reset(exec->releaseStages()); } } // We can change state when we're either killed or we have an underlying runner. invariant(_killed || NULL != _child.get()); _state = SubplanStage::RUNNING; } if (_killed) { return PlanStage::DEAD; } if (isEOF()) { return PlanStage::IS_EOF; } // If we're here we should have planned already. invariant(SubplanStage::RUNNING == _state); invariant(_child.get()); return _child->work(out); }
PlanStage::StageState KeepMutationsStage::work(WorkingSetID* out) { ++_commonStats.works; // Adds the amount of time taken by work() to executionTimeMillis. ScopedTimer timer(&_commonStats.executionTimeMillis); // If we've returned as many results as we're limited to, isEOF will be true. if (isEOF()) { return PlanStage::IS_EOF; } // Stream child results until the child is all done. if (!_doneReadingChild) { StageState status = _child->work(out); // Child is still returning results. Pass them through. if (PlanStage::IS_EOF != status) { if (PlanStage::ADVANCED == status) { ++_commonStats.advanced; } else if (PlanStage::NEED_TIME == status) { ++_commonStats.needTime; } else if (PlanStage::NEED_FETCH == status) { ++_commonStats.needFetch; } return status; } // Child is EOF. We want to stream flagged results if there are any. _doneReadingChild = true; // Read out all of the flagged results from the working set. We can't iterate through // the working set's flagged result set directly, since it may be modified later if // further documents are invalidated during a yield. std::copy(_workingSet->getFlagged().begin(), _workingSet->getFlagged().end(), std::back_inserter(_flagged)); _flaggedIterator = _flagged.begin(); } // We're streaming flagged results. invariant(!_doneReturningFlagged); if (_flaggedIterator == _flagged.end()) { _doneReturningFlagged = true; return PlanStage::IS_EOF; } WorkingSetID idToTest = *_flaggedIterator; _flaggedIterator++; WorkingSetMember* member = _workingSet->get(idToTest); if (Filter::passes(member, _filter)) { *out = idToTest; ++_commonStats.advanced; return PlanStage::ADVANCED; } else { _workingSet->free(idToTest); ++_commonStats.needTime; return PlanStage::NEED_TIME; } }
PlanStage::StageState MergeSortStage::work(WorkingSetID* out) { ++_commonStats.works; // Adds the amount of time taken by work() to executionTimeMillis. ScopedTimer timer(&_commonStats.executionTimeMillis); if (isEOF()) { return PlanStage::IS_EOF; } if (!_noResultToMerge.empty()) { // We have some child that we don't have a result from. Each child must have a result // in order to pick the minimum result among all our children. Work a child. PlanStage* child = _noResultToMerge.front(); WorkingSetID id = WorkingSet::INVALID_ID; StageState code = child->work(&id); if (PlanStage::ADVANCED == code) { // If we're deduping... if (_dedup) { WorkingSetMember* member = _ws->get(id); if (!member->hasLoc()) { // Can't dedup data unless there's a DiskLoc. We go ahead and use its // result. _noResultToMerge.pop(); } else { ++_specificStats.dupsTested; // ...and there's a diskloc and and we've seen the DiskLoc before if (_seen.end() != _seen.find(member->loc)) { // ...drop it. _ws->free(id); ++_commonStats.needTime; ++_specificStats.dupsDropped; return PlanStage::NEED_TIME; } else { // Otherwise, note that we've seen it. _seen.insert(member->loc); // We're going to use the result from the child, so we remove it from // the queue of children without a result. _noResultToMerge.pop(); } } } else { // Not deduping. We use any result we get from the child. Remove the child // from the queue of things without a result. _noResultToMerge.pop(); } // Store the result in our list. StageWithValue value; value.id = id; value.stage = child; _mergingData.push_front(value); // Insert the result (indirectly) into our priority queue. _merging.push(_mergingData.begin()); ++_commonStats.needTime; return PlanStage::NEED_TIME; } else if (PlanStage::IS_EOF == code) { // There are no more results possible from this child. Don't bother with it // anymore. _noResultToMerge.pop(); ++_commonStats.needTime; return PlanStage::NEED_TIME; } else if (PlanStage::FAILURE == code) { *out = id; // If a stage fails, it may create a status WSM to indicate why it // failed, in which case 'id' is valid. If ID is invalid, we // create our own error message. if (WorkingSet::INVALID_ID == id) { mongoutils::str::stream ss; ss << "merge sort stage failed to read in results from child"; Status status(ErrorCodes::InternalError, ss); *out = WorkingSetCommon::allocateStatusMember( _ws, status); } return code; } else { if (PlanStage::NEED_TIME == code) { ++_commonStats.needTime; } return code; } } // If we're here, for each non-EOF child, we have a valid WSID. verify(!_merging.empty()); // Get the 'min' WSID. _merging is a priority queue so its top is the smallest. MergingRef top = _merging.top(); _merging.pop(); // Since we're returning the WSID that came from top->stage, we need to work(...) it again // to get a new result. _noResultToMerge.push(top->stage); // Save the ID that we're returning and remove the returned result from our data. WorkingSetID idToTest = top->id; _mergingData.erase(top); // Return the min. *out = idToTest; ++_commonStats.advanced; // But don't return it if it's flagged. if (_ws->isFlagged(*out)) { _ws->free(*out); return PlanStage::NEED_TIME; } return PlanStage::ADVANCED; }
PlanStageStats* LimitStage::getStats() { _commonStats.isEOF = isEOF(); auto_ptr<PlanStageStats> ret(new PlanStageStats(_commonStats, STAGE_LIMIT)); ret->children.push_back(_child->getStats()); return ret.release(); }
PlanStage::StageState AndHashStage::work(WorkingSetID* out) { ++_commonStats.works; if (isEOF()) { return PlanStage::IS_EOF; } // An AND is either reading the first child into the hash table, probing against the hash // table with subsequent children, or checking the last child's results to see if they're // in the hash table. // We read the first child into our hash table. if (_hashingChildren) { if (0 == _currentChild) { return readFirstChild(out); } else if (_currentChild < _children.size() - 1) { return hashOtherChildren(out); } else { _hashingChildren = false; // We don't hash our last child. Instead, we probe the table created from the // previous children, returning results in the order of the last child. // Fall through to below. } } // Returning results. We read from the last child and return the results that are in our // hash map. // We should be EOF if we're not hashing results and the dataMap is empty. verify(!_dataMap.empty()); // We probe _dataMap with the last child. verify(_currentChild == _children.size() - 1); // Work the last child. StageState childStatus = _children[_children.size() - 1]->work(out); if (PlanStage::ADVANCED != childStatus) { return childStatus; } // We know that we've ADVANCED. See if the WSM is in our table. WorkingSetMember* member = _ws->get(*out); // Maybe the child had an invalidation. We intersect DiskLoc(s) so we can't do anything // with this WSM. if (!member->hasLoc()) { _ws->flagForReview(*out); return PlanStage::NEED_TIME; } DataMap::iterator it = _dataMap.find(member->loc); if (_dataMap.end() == it) { // Child's output wasn't in every previous child. Throw it out. _ws->free(*out); ++_commonStats.needTime; return PlanStage::NEED_TIME; } else { // Child's output was in every previous child. Merge any key data in // the child's output and free the child's just-outputted WSM. WorkingSetID hashID = it->second; _dataMap.erase(it); WorkingSetMember* olderMember = _ws->get(hashID); AndCommon::mergeFrom(olderMember, *member); _ws->free(*out); // We should check for matching at the end so the matcher can use information in the // indices of all our children. if (Filter::passes(olderMember, _filter)) { *out = hashID; ++_commonStats.advanced; return PlanStage::ADVANCED; } else { _ws->free(hashID); ++_commonStats.needTime; return PlanStage::NEED_TIME; } } }
PlanStageStats* TwoD::getStats() { _commonStats.isEOF = isEOF(); auto_ptr<PlanStageStats> ret(new PlanStageStats(_commonStats, STAGE_GEO_2D)); ret->specific.reset(new TwoDStats(_specificStats)); return ret.release(); }
PlanStage::StageState CollectionScan::work(WorkingSetID* out) { ++_commonStats.works; // Adds the amount of time taken by work() to executionTimeMillis. ScopedTimer timer(&_commonStats.executionTimeMillis); if (_isDead) { return PlanStage::DEAD; } // Do some init if we haven't already. if (NULL == _iter) { if ( _params.collection == NULL ) { _isDead = true; return PlanStage::DEAD; } if (_lastSeenLoc.isNull()) { _iter.reset( _params.collection->getIterator( _txn, _params.start, _params.direction ) ); } else { invariant(_params.tailable); _iter.reset( _params.collection->getIterator( _txn, _lastSeenLoc, _params.direction ) ); // Advance _iter past where we were last time. If it returns something else, mark us // as dead since we want to signal an error rather than silently dropping data from // the stream. This is related to the _lastSeenLock handling in invalidate. if (_iter->getNext() != _lastSeenLoc) { _isDead = true; return PlanStage::DEAD; } } ++_commonStats.needTime; return PlanStage::NEED_TIME; } // Should we try getNext() on the underlying _iter? if (isEOF()) return PlanStage::IS_EOF; // See if the record we're about to access is in memory. If not, pass a fetch request up. // Note that curr() returns the same thing as getNext() will, except without advancing the // iterator or touching the DiskLoc. This means that we can use curr() to check whether we // need to fetch on the DiskLoc prior to touching it with getNext(). DiskLoc curr = _iter->curr(); if (!curr.isNull()) { std::auto_ptr<RecordFetcher> fetcher( _params.collection->documentNeedsFetch(_txn, curr)); if (NULL != fetcher.get()) { WorkingSetMember* member = _workingSet->get(_wsidForFetch); member->loc = curr; // Pass the RecordFetcher off to the WSM. member->setFetcher(fetcher.release()); *out = _wsidForFetch; _commonStats.needFetch++; return NEED_FETCH; } } // What we'll return to the user. DiskLoc nextLoc; // See if _iter gives us anything new. nextLoc = _iter->getNext(); if (nextLoc.isNull()) { if (_params.tailable) _iter.reset(); // pick up where we left off on the next call to work() return PlanStage::IS_EOF; } _lastSeenLoc = nextLoc; WorkingSetID id = _workingSet->allocate(); WorkingSetMember* member = _workingSet->get(id); member->loc = nextLoc; member->obj = _iter->dataFor(member->loc).releaseToBson(); member->state = WorkingSetMember::LOC_AND_UNOWNED_OBJ; return returnIfMatches(member, id, out); }