PlanStage::StageState SortKeyGeneratorStage::doWork(WorkingSetID* out) { if (!_sortKeyGen) { _sortKeyGen = stdx::make_unique<SortKeyGenerator>(_sortSpec, _query); return PlanStage::NEED_TIME; } auto stageState = child()->work(out); if (stageState == PlanStage::ADVANCED) { WorkingSetMember* member = _ws->get(*out); BSONObj sortKey; Status sortKeyStatus = _sortKeyGen->getSortKey(*member, &sortKey); if (!sortKeyStatus.isOK()) { *out = WorkingSetCommon::allocateStatusMember(_ws, sortKeyStatus); return PlanStage::FAILURE; } // Add the sort key to the WSM as computed data. member->addComputed(new SortKeyComputedData(sortKey)); return PlanStage::ADVANCED; } if (stageState == PlanStage::IS_EOF) { _commonStats.isEOF = true; } return stageState; }
PlanStage::StageState TextStage::returnResults(WorkingSetID* out) { if (_scoreIterator == _scores.end()) { _internalState = DONE; return PlanStage::IS_EOF; } // Filter for phrases and negative terms, score and truncate. DiskLoc loc = _scoreIterator->first; double score = _scoreIterator->second; _scoreIterator++; // Ignore non-matched documents. if (score < 0) { return PlanStage::NEED_TIME; } // Filter for phrases and negated terms if (_params.query.hasNonTermPieces()) { if (!_ftsMatcher.matchesNonTerm(loc.obj())) { return PlanStage::NEED_TIME; } } *out = _ws->allocate(); WorkingSetMember* member = _ws->get(*out); member->loc = loc; member->obj = member->loc.obj(); member->state = WorkingSetMember::LOC_AND_UNOWNED_OBJ; member->addComputed(new TextScoreComputedData(score)); return PlanStage::ADVANCED; }
PlanStage::StageState TextStage::work(WorkingSetID* out) { ++_commonStats.works; if (isEOF()) { return PlanStage::IS_EOF; } // Fill out our result queue. if (!_filledOutResults) { PlanStage::StageState ss = fillOutResults(); if (ss == PlanStage::IS_EOF || ss == PlanStage::FAILURE) { return ss; } verify(ss == PlanStage::NEED_TIME); } // Having cached all our results, return them one at a time. // Fill out a WSM. WorkingSetID id = _ws->allocate(); WorkingSetMember* member = _ws->get(id); member->loc = _results[_curResult].loc; member->obj = member->loc.obj(); member->state = WorkingSetMember::LOC_AND_UNOWNED_OBJ; // TODO: Planner can tell us whether or not to do this depending on whether or not we have a // $textScore projection. member->addComputed(new TextScoreComputedData(_results[_curResult].score)); // Advance to next result. ++_curResult; *out = id; return PlanStage::ADVANCED; }
PlanStage::StageState TextStage::returnResults(WorkingSetID* out) { if (_scoreIterator == _scores.end()) { _internalState = DONE; return PlanStage::IS_EOF; } // Filter for phrases and negative terms, score and truncate. TextRecordData textRecordData = _scoreIterator->second; // Ignore non-matched documents. if (textRecordData.score < 0) { _scoreIterator++; invariant(textRecordData.wsid == WorkingSet::INVALID_ID); return PlanStage::NEED_TIME; } WorkingSetMember* wsm = _ws->get(textRecordData.wsid); try { if (!WorkingSetCommon::fetchIfUnfetched(_txn, wsm, _params.index->getCollection())) { _scoreIterator++; _ws->free(textRecordData.wsid); _commonStats.needTime++; return NEED_TIME; } } catch (const WriteConflictException& wce) { // Do this record again next time around. *out = WorkingSet::INVALID_ID; _commonStats.needYield++; return NEED_YIELD; } _scoreIterator++; // Filter for phrases and negated terms if (!_ftsMatcher.matches(wsm->obj.value())) { _ws->free(textRecordData.wsid); return PlanStage::NEED_TIME; } // Populate the working set member with the text score and return it. wsm->addComputed(new TextScoreComputedData(textRecordData.score)); *out = textRecordData.wsid; return PlanStage::ADVANCED; }
PlanStage::StageState TextStage::returnResults(WorkingSetID* out) { if (_scoreIterator == _curScoreMap->end()) { _startedNegativeScans = false; _internalState = DONE; return PlanStage::IS_EOF; } // Filter for phrases and negative terms, score and truncate. DiskLoc loc = _scoreIterator->first; const BSONObj obj = _params.index->getCollection()->docFor(loc); double score = _scoreIterator->second; _scoreIterator++; // If negated terms were present but we opted not to scan // (due to threshold constraint), do a manual scan for all // negative terms and all phrases. if (!_startedNegativeScans && _params.query.getNegatedTerms().size() > 0) { if (_params.query.hasNonTermPieces()) { if (!_ftsMatcher.matchesNonTerm(obj)) { return PlanStage::NEED_TIME; } } } // Otherwise, just scan for phrases. else if (_params.query.getPhr().size() > 0 || _params.query.getNegatedPhr().size() > 0) { if (!_ftsMatcher.phrasesMatch(obj)) { return PlanStage::NEED_TIME; } } // Ignore non-matched documents. if (score < 0) { return PlanStage::NEED_TIME; } *out = _ws->allocate(); WorkingSetMember* member = _ws->get(*out); member->loc = loc; member->obj = obj; member->state = WorkingSetMember::LOC_AND_UNOWNED_OBJ; member->addComputed(new TextScoreComputedData(score)); return PlanStage::ADVANCED; }
PlanStage::StageState SortKeyGeneratorStage::work(WorkingSetID* out) { ++_commonStats.works; // Adds the amount of time taken by work() to executionTimeMillis. ScopedTimer timer(&_commonStats.executionTimeMillis); if (!_sortKeyGen) { _sortKeyGen = stdx::make_unique<SortKeyGenerator>(_collection, _sortSpec, _query); ++_commonStats.needTime; return PlanStage::NEED_TIME; } auto stageState = child()->work(out); if (stageState == PlanStage::ADVANCED) { WorkingSetMember* member = _ws->get(*out); BSONObj sortKey; Status sortKeyStatus = _sortKeyGen->getSortKey(*member, &sortKey); if (!sortKeyStatus.isOK()) { *out = WorkingSetCommon::allocateStatusMember(_ws, sortKeyStatus); return PlanStage::FAILURE; } // Add the sort key to the WSM as computed data. member->addComputed(new SortKeyComputedData(sortKey)); return PlanStage::ADVANCED; } if (stageState == PlanStage::IS_EOF) { _commonStats.isEOF = true; } else if (stageState == PlanStage::NEED_TIME) { ++_commonStats.needTime; } else if (stageState == PlanStage::NEED_YIELD) { ++_commonStats.needYield; } return stageState; }
PlanStage::StageState TextOrStage::returnResults(WorkingSetID* out) { if (_scoreIterator == _scores.end()) { _internalState = State::kDone; return PlanStage::IS_EOF; } // Retrieve the record that contains the text score. TextRecordData textRecordData = _scoreIterator->second; ++_scoreIterator; // Ignore non-matched documents. if (textRecordData.score < 0) { invariant(textRecordData.wsid == WorkingSet::INVALID_ID); return PlanStage::NEED_TIME; } WorkingSetMember* wsm = _ws->get(textRecordData.wsid); // Populate the working set member with the text score and return it. wsm->addComputed(new TextScoreComputedData(textRecordData.score)); *out = textRecordData.wsid; return PlanStage::ADVANCED; }
PlanStage::StageState S2NearStage::addResultToQueue(WorkingSetID* out) { PlanStage::StageState state = _child->work(out); // All done reading from _child. if (PlanStage::IS_EOF == state) { _child.reset(); _keyGeoFilter.reset(); // Adjust the annulus size depending on how many results we got. if (_results.empty()) { _radiusIncrement *= 2; } else if (_results.size() < 300) { _radiusIncrement *= 2; } else if (_results.size() > 600) { _radiusIncrement /= 2; } // Make a new ixscan next time. return PlanStage::NEED_TIME; } // Nothing to do unless we advance. if (PlanStage::ADVANCED != state) { return state; } WorkingSetMember* member = _ws->get(*out); // Must have an object in order to get geometry out of it. verify(member->hasObj()); // The scans we use don't dedup so we must dedup them ourselves. We only put locs into here // if we know for sure whether or not we'll return them in this annulus. if (member->hasLoc()) { if (_seenInScan.end() != _seenInScan.find(member->loc)) { return PlanStage::NEED_TIME; } } // Get all the fields with that name from the document. BSONElementSet geom; member->obj.getFieldsDotted(_params.nearQuery.field, geom, false); if (geom.empty()) { return PlanStage::NEED_TIME; } // Some value that any distance we can calculate will be less than. double minDistance = numeric_limits<double>::max(); BSONObj minDistanceObj; for (BSONElementSet::iterator git = geom.begin(); git != geom.end(); ++git) { if (!git->isABSONObj()) { mongoutils::str::stream ss; ss << "s2near stage read invalid geometry element " << *git << " from child"; Status status(ErrorCodes::InternalError, ss); *out = WorkingSetCommon::allocateStatusMember( _ws, status); return PlanStage::FAILURE; } BSONObj obj = git->Obj(); double distToObj; if (S2SearchUtil::distanceBetween(_params.nearQuery.centroid.point, obj, &distToObj)) { if (distToObj < minDistance) { minDistance = distToObj; minDistanceObj = obj; } } else { warning() << "unknown geometry: " << obj.toString(); } } // If we're here we'll either include the doc in this annulus or reject it. It's safe to // ignore it if it pops up again in this annulus. if (member->hasLoc()) { _seenInScan.insert(member->loc); } // If the distance to the doc satisfies our distance criteria, add it to our buffered // results. if (minDistance >= _innerRadius && (_outerRadiusInclusive ? minDistance <= _outerRadius : minDistance < _outerRadius)) { _results.push(Result(*out, minDistance)); if (_params.addDistMeta) { // FLAT implies the output distances are in radians. Convert to meters. if (FLAT == _params.nearQuery.centroid.crs) { member->addComputed(new GeoDistanceComputedData(minDistance / kRadiusOfEarthInMeters)); } else { member->addComputed(new GeoDistanceComputedData(minDistance)); } } if (_params.addPointMeta) { member->addComputed(new GeoNearPointComputedData(minDistanceObj)); } if (member->hasLoc()) { _invalidationMap[member->loc] = *out; } } return PlanStage::NEED_TIME; }
PlanStage::StageState IndexScan::work(WorkingSetID* out) { ++_commonStats.works; if (NULL == _indexCursor.get()) { // First call to work(). Perform possibly heavy init. initIndexScan(); checkEnd(); } else if (_yieldMovedCursor) { _yieldMovedCursor = false; // Note that we're not calling next() here. We got the next thing when we recovered // from yielding. } if (isEOF()) { return PlanStage::IS_EOF; } // Grab the next (key, value) from the index. BSONObj keyObj = _indexCursor->getKey(); DiskLoc loc = _indexCursor->getValue(); // Move to the next result. // The underlying IndexCursor points at the *next* thing we want to return. We do this so // that if we're scanning an index looking for docs to delete we don't continually clobber // the thing we're pointing at. _indexCursor->next(); checkEnd(); if (_shouldDedup) { ++_specificStats.dupsTested; if (_returned.end() != _returned.find(loc)) { ++_specificStats.dupsDropped; ++_commonStats.needTime; return PlanStage::NEED_TIME; } else { _returned.insert(loc); } } if (Filter::passes(keyObj, _keyPattern, _filter)) { if (NULL != _filter) { ++_specificStats.matchTested; } // We must make a copy of the on-disk data since it can mutate during the execution of // this query. BSONObj ownedKeyObj = keyObj.getOwned(); // Fill out the WSM. WorkingSetID id = _workingSet->allocate(); WorkingSetMember* member = _workingSet->get(id); member->loc = loc; member->keyData.push_back(IndexKeyDatum(_keyPattern, ownedKeyObj)); member->state = WorkingSetMember::LOC_AND_IDX; if (_params.addKeyMetadata) { BSONObjBuilder bob; bob.appendKeys(_keyPattern, ownedKeyObj); member->addComputed(new IndexKeyComputedData(bob.obj())); } *out = id; ++_commonStats.advanced; return PlanStage::ADVANCED; } ++_commonStats.needTime; return PlanStage::NEED_TIME; }
PlanStage::StageState TwoDNear::work(WorkingSetID* out) { ++_commonStats.works; // Adds the amount of time taken by work() to executionTimeMillis. ScopedTimer timer(&_commonStats.executionTimeMillis); if (!_initted) { _initted = true; if ( !_params.collection ) return PlanStage::IS_EOF; IndexCatalog* indexCatalog = _params.collection->getIndexCatalog(); IndexDescriptor* desc = indexCatalog->findIndexByKeyPattern(_params.indexKeyPattern); if ( desc == NULL ) return PlanStage::IS_EOF; TwoDAccessMethod* am = static_cast<TwoDAccessMethod*>( indexCatalog->getIndex( desc ) ); auto_ptr<twod_exec::GeoSearch> search; search.reset(new twod_exec::GeoSearch(_params.collection, am, _params.nearQuery.centroid.oldPoint, _params.numWanted, _params.filter, _params.nearQuery.maxDistance, _params.nearQuery.isNearSphere ? twod_exec::GEO_SPHERE : twod_exec::GEO_PLANE)); // This is where all the work is done. :( search->exec(); _specificStats.objectsLoaded = search->_objectsLoaded; _specificStats.nscanned = search->_lookedAt; for (twod_exec::GeoHopper::Holder::iterator it = search->_points.begin(); it != search->_points.end(); it++) { WorkingSetID id = _workingSet->allocate(); WorkingSetMember* member = _workingSet->get(id); member->loc = it->_loc; member->obj = _params.collection->docFor(member->loc); member->state = WorkingSetMember::LOC_AND_UNOWNED_OBJ; if (_params.addDistMeta) { member->addComputed(new GeoDistanceComputedData(it->_distance)); } if (_params.addPointMeta) { member->addComputed(new GeoNearPointComputedData(it->_pt)); } _results.push(Result(id, it->_distance)); _invalidationMap.insert(pair<DiskLoc, WorkingSetID>(it->_loc, id)); } } if (isEOF()) { return PlanStage::IS_EOF; } Result result = _results.top(); _results.pop(); *out = result.id; // Remove from invalidation map. WorkingSetMember* member = _workingSet->get(*out); // The WSM may have been mutated or deleted so it may not have a loc. if (member->hasLoc()) { typedef multimap<DiskLoc, WorkingSetID>::iterator MMIT; pair<MMIT, MMIT> range = _invalidationMap.equal_range(member->loc); for (MMIT it = range.first; it != range.second; ++it) { if (it->second == *out) { _invalidationMap.erase(it); break; } } } ++_commonStats.advanced; return PlanStage::ADVANCED; }
PlanStage::StageState S2NearStage::addResultToQueue(WorkingSetID* out) { PlanStage::StageState state = _child->work(out); // All done reading from _child. if (PlanStage::IS_EOF == state) { _child.reset(); // Adjust the annulus size depending on how many results we got. if (_results.empty()) { _radiusIncrement *= 2; } else if (_results.size() < 300) { _radiusIncrement *= 2; } else if (_results.size() > 600) { _radiusIncrement /= 2; } // Make a new ixscan next time. return PlanStage::NEED_TIME; } // Nothing to do unless we advance. if (PlanStage::ADVANCED != state) { return state; } // TODO Speed improvements: // // 0. Modify fetch to preserve key data and test for intersection w/annulus. // // 1. keep track of what we've seen in this scan and possibly ignore it. // // 2. keep track of results we've returned before and ignore them. WorkingSetMember* member = _ws->get(*out); // Must have an object in order to get geometry out of it. verify(member->hasObj()); // Get all the fields with that name from the document. BSONElementSet geom; member->obj.getFieldsDotted(_params.nearQuery.field, geom, false); if (geom.empty()) {return PlanStage::NEED_TIME; } // Some value that any distance we can calculate will be less than. double minDistance = numeric_limits<double>::max(); BSONObj minDistanceObj; for (BSONElementSet::iterator git = geom.begin(); git != geom.end(); ++git) { if (!git->isABSONObj()) { mongoutils::str::stream ss; ss << "s2near stage read invalid geometry element " << *git << " from child"; Status status(ErrorCodes::InternalError, ss); *out = WorkingSetCommon::allocateStatusMember( _ws, status); return PlanStage::FAILURE; } BSONObj obj = git->Obj(); double distToObj; if (S2SearchUtil::distanceBetween(_params.nearQuery.centroid.point, obj, &distToObj)) { if (distToObj < minDistance) { minDistance = distToObj; minDistanceObj = obj; } } else { warning() << "unknown geometry: " << obj.toString(); } } // If the distance to the doc satisfies our distance criteria, add it to our buffered // results. if (minDistance >= _innerRadius && (_outerRadiusInclusive ? minDistance <= _outerRadius : minDistance < _outerRadius)) { _results.push(Result(*out, minDistance)); if (_params.addDistMeta) { member->addComputed(new GeoDistanceComputedData(minDistance)); } if (_params.addPointMeta) { member->addComputed(new GeoNearPointComputedData(minDistanceObj)); } if (member->hasLoc()) { _invalidationMap[member->loc] = *out; } } return PlanStage::NEED_TIME; }
PlanStage::StageState IndexScan::work(WorkingSetID* out) { ++_commonStats.works; if (NULL == _indexCursor.get()) { // First call to work(). Perform cursor init. CursorOptions cursorOptions; // The limit is *required* for 2d $near, which is the only index that pays attention to // it anyway. cursorOptions.numWanted = _params.limit; if (1 == _params.direction) { cursorOptions.direction = CursorOptions::INCREASING; } else { cursorOptions.direction = CursorOptions::DECREASING; } IndexCursor *cursor; Status s = _iam->newCursor(&cursor); verify(s.isOK()); _indexCursor.reset(cursor); _indexCursor->setOptions(cursorOptions); if (_params.bounds.isSimpleRange) { // Start at one key, end at another. Status status = _indexCursor->seek(_params.bounds.startKey); if (!status.isOK()) { warning() << "Seek failed: " << status.toString(); _hitEnd = true; return PlanStage::FAILURE; } if (!isEOF()) { _specificStats.keysExamined = 1; } } else { // "Fast" Btree-specific navigation. _btreeCursor = static_cast<BtreeIndexCursor*>(_indexCursor.get()); _checker.reset(new IndexBoundsChecker(&_params.bounds, _descriptor->keyPattern(), _params.direction)); int nFields = _descriptor->keyPattern().nFields(); vector<const BSONElement*> key; vector<bool> inc; key.resize(nFields); inc.resize(nFields); if (_checker->getStartKey(&key, &inc)) { _btreeCursor->seek(key, inc); _keyElts.resize(nFields); _keyEltsInc.resize(nFields); } else { _hitEnd = true; } } checkEnd(); } else if (_yieldMovedCursor) { _yieldMovedCursor = false; // Note that we're not calling next() here. } else { // You're allowed to call work() even if the stage is EOF, but we can't call // _indexCursor->next() if we're EOF. if (!isEOF()) { _indexCursor->next(); checkEnd(); } } if (isEOF()) { return PlanStage::IS_EOF; } DiskLoc loc = _indexCursor->getValue(); if (_shouldDedup) { ++_specificStats.dupsTested; if (_returned.end() != _returned.find(loc)) { ++_specificStats.dupsDropped; ++_commonStats.needTime; return PlanStage::NEED_TIME; } else { _returned.insert(loc); } } BSONObj ownedKeyObj = _indexCursor->getKey().getOwned(); WorkingSetID id = _workingSet->allocate(); WorkingSetMember* member = _workingSet->get(id); member->loc = loc; member->keyData.push_back(IndexKeyDatum(_descriptor->keyPattern(), ownedKeyObj)); member->state = WorkingSetMember::LOC_AND_IDX; if (Filter::passes(member, _filter)) { if (NULL != _filter) { ++_specificStats.matchTested; } if (_params.addKeyMetadata) { BSONObjBuilder bob; bob.appendKeys(_descriptor->keyPattern(), ownedKeyObj); member->addComputed(new IndexKeyComputedData(bob.obj())); } *out = id; ++_commonStats.advanced; return PlanStage::ADVANCED; } _workingSet->free(id); ++_commonStats.needTime; return PlanStage::NEED_TIME; }
PlanStage::StageState IndexScan::doWork(WorkingSetID* out) { // Get the next kv pair from the index, if any. boost::optional<IndexKeyEntry> kv; try { switch (_scanState) { case INITIALIZING: kv = initIndexScan(); break; case GETTING_NEXT: kv = _indexCursor->next(); break; case NEED_SEEK: ++_specificStats.seeks; kv = _indexCursor->seek(_seekPoint); break; case HIT_END: return PlanStage::IS_EOF; } } catch (const WriteConflictException& wce) { *out = WorkingSet::INVALID_ID; return PlanStage::NEED_YIELD; } if (kv) { // In debug mode, check that the cursor isn't lying to us. if (kDebugBuild && !_endKey.isEmpty()) { int cmp = kv->key.woCompare(_endKey, Ordering::make(_params.descriptor->keyPattern()), /*compareFieldNames*/ false); if (cmp == 0) dassert(_endKeyInclusive); dassert(_forward ? cmp <= 0 : cmp >= 0); } ++_specificStats.keysExamined; if (_params.maxScan && _specificStats.keysExamined >= _params.maxScan) { kv = boost::none; } } if (kv && _checker) { switch (_checker->checkKey(kv->key, &_seekPoint)) { case IndexBoundsChecker::VALID: break; case IndexBoundsChecker::DONE: kv = boost::none; break; case IndexBoundsChecker::MUST_ADVANCE: _scanState = NEED_SEEK; return PlanStage::NEED_TIME; } } if (!kv) { _scanState = HIT_END; _commonStats.isEOF = true; _indexCursor.reset(); return PlanStage::IS_EOF; } _scanState = GETTING_NEXT; if (_shouldDedup) { ++_specificStats.dupsTested; if (!_returned.insert(kv->loc).second) { // We've seen this RecordId before. Skip it this time. ++_specificStats.dupsDropped; return PlanStage::NEED_TIME; } } if (_filter) { if (!Filter::passes(kv->key, _keyPattern, _filter)) { return PlanStage::NEED_TIME; } } if (!kv->key.isOwned()) kv->key = kv->key.getOwned(); // We found something to return, so fill out the WSM. WorkingSetID id = _workingSet->allocate(); WorkingSetMember* member = _workingSet->get(id); member->recordId = kv->loc; member->keyData.push_back(IndexKeyDatum(_keyPattern, kv->key, _iam)); _workingSet->transitionToRecordIdAndIdx(id); if (_params.addKeyMetadata) { BSONObjBuilder bob; bob.appendKeys(_keyPattern, kv->key); member->addComputed(new IndexKeyComputedData(bob.obj())); } *out = id; return PlanStage::ADVANCED; }
PlanStage::StageState TwoDNear::work(WorkingSetID* out) { ++_commonStats.works; if (!_initted) { _initted = true; Database* db = cc().database(); if ( !db ) return PlanStage::IS_EOF; Collection* collection = db->getCollection( _params.ns ); if ( !collection ) return PlanStage::IS_EOF; int idxNo = collection->details()->findIndexByKeyPattern(_params.indexKeyPattern); if (-1 == idxNo) return PlanStage::IS_EOF; IndexDescriptor* desc = collection->getIndexCatalog()->getDescriptor(idxNo); TwoDAccessMethod* am = static_cast<TwoDAccessMethod*>( collection->getIndexCatalog()->getIndex( desc ) ); auto_ptr<twod_exec::GeoSearch> search; search.reset(new twod_exec::GeoSearch(am, _params.nearQuery.centroid.oldPoint, _params.numWanted, _params.filter, _params.nearQuery.maxDistance, _params.nearQuery.isNearSphere ? twod_exec::GEO_SPHERE : twod_exec::GEO_PLANE, _params.nearQuery.uniqueDocs, false)); // This is where all the work is done. :( search->exec(); _specificStats.objectsLoaded = search->_objectsLoaded; _specificStats.nscanned = search->_nscanned; for (twod_exec::GeoHopper::Holder::iterator it = search->_points.begin(); it != search->_points.end(); it++) { WorkingSetID id = _workingSet->allocate(); WorkingSetMember* member = _workingSet->get(id); member->loc = it->_loc; member->obj = member->loc.obj(); member->state = WorkingSetMember::LOC_AND_UNOWNED_OBJ; if (_params.addDistMeta) { member->addComputed(new GeoDistanceComputedData(it->_distance)); } if (_params.addPointMeta) { member->addComputed(new GeoNearPointComputedData(it->_pt)); } _results.push(Result(id, it->_distance)); _invalidationMap.insert(pair<DiskLoc, WorkingSetID>(it->_loc, id)); } } if (isEOF()) { return PlanStage::IS_EOF; } Result result = _results.top(); _results.pop(); *out = result.id; // Remove from invalidation map. WorkingSetMember* member = _workingSet->get(*out); // XXX make sure this is ok if (member->hasLoc()) { typedef multimap<DiskLoc, WorkingSetID>::iterator MMIT; pair<MMIT, MMIT> range = _invalidationMap.equal_range(member->loc); for (MMIT it = range.first; it != range.second; ++it) { if (it->second == *out) { _invalidationMap.erase(it); break; } } } ++_commonStats.advanced; return PlanStage::ADVANCED; }
PlanStage::StageState IndexScan::work(WorkingSetID* out) { ++_commonStats.works; // Adds the amount of time taken by work() to executionTimeMillis. ScopedTimer timer(&_commonStats.executionTimeMillis); if (INITIALIZING == _scanState) { invariant(NULL == _indexCursor.get()); initIndexScan(); } if (CHECKING_END == _scanState) { checkEnd(); } if (isEOF()) { _commonStats.isEOF = true; return PlanStage::IS_EOF; } if (GETTING_NEXT == _scanState) { // Grab the next (key, value) from the index. BSONObj keyObj = _indexCursor->getKey(); RecordId loc = _indexCursor->getValue(); bool filterPasses = Filter::passes(keyObj, _keyPattern, _filter); if ( filterPasses ) { // We must make a copy of the on-disk data since it can mutate during the execution // of this query. keyObj = keyObj.getOwned(); } // Move to the next result. // The underlying IndexCursor points at the *next* thing we want to return. We do this // so that if we're scanning an index looking for docs to delete we don't continually // clobber the thing we're pointing at. _indexCursor->next(); _scanState = CHECKING_END; if (_shouldDedup) { ++_specificStats.dupsTested; if (_returned.end() != _returned.find(loc)) { ++_specificStats.dupsDropped; ++_commonStats.needTime; return PlanStage::NEED_TIME; } else { _returned.insert(loc); } } if (filterPasses) { if (NULL != _filter) { ++_specificStats.matchTested; } // Fill out the WSM. WorkingSetID id = _workingSet->allocate(); WorkingSetMember* member = _workingSet->get(id); member->loc = loc; member->keyData.push_back(IndexKeyDatum(_keyPattern, keyObj)); member->state = WorkingSetMember::LOC_AND_IDX; if (_params.addKeyMetadata) { BSONObjBuilder bob; bob.appendKeys(_keyPattern, keyObj); member->addComputed(new IndexKeyComputedData(bob.obj())); } *out = id; ++_commonStats.advanced; return PlanStage::ADVANCED; } } ++_commonStats.needTime; return PlanStage::NEED_TIME; }
PlanStage::StageState TextStage::fillOutResults() { Database* db = cc().database(); Collection* collection = db->getCollection( _params.ns ); if (NULL == collection) { warning() << "TextStage params namespace error"; return PlanStage::FAILURE; } vector<IndexDescriptor*> idxMatches; collection->getIndexCatalog()->findIndexByType("text", idxMatches); if (1 != idxMatches.size()) { warning() << "Expected exactly one text index"; return PlanStage::FAILURE; } // Get all the index scans for each term in our query. OwnedPointerVector<PlanStage> scanners; for (size_t i = 0; i < _params.query.getTerms().size(); i++) { const string& term = _params.query.getTerms()[i]; IndexScanParams params; params.bounds.startKey = FTSIndexFormat::getIndexKey(MAX_WEIGHT, term, _params.indexPrefix); params.bounds.endKey = FTSIndexFormat::getIndexKey(0, term, _params.indexPrefix); params.bounds.endKeyInclusive = true; params.bounds.isSimpleRange = true; params.descriptor = idxMatches[0]; params.direction = -1; IndexScan* ixscan = new IndexScan(params, _ws, NULL); scanners.mutableVector().push_back(ixscan); } // Map: diskloc -> aggregate score for doc. typedef unordered_map<DiskLoc, double, DiskLoc::Hasher> ScoreMap; ScoreMap scores; // For each index scan, read all results and store scores. size_t currentIndexScanner = 0; while (currentIndexScanner < scanners.size()) { BSONObj keyObj; DiskLoc loc; WorkingSetID id; PlanStage::StageState state = scanners.vector()[currentIndexScanner]->work(&id); if (PlanStage::ADVANCED == state) { WorkingSetMember* wsm = _ws->get(id); IndexKeyDatum& keyDatum = wsm->keyData.back(); filterAndScore(keyDatum.keyData, wsm->loc, &scores[wsm->loc]); _ws->free(id); } else if (PlanStage::IS_EOF == state) { // Done with this scan. ++currentIndexScanner; } else if (PlanStage::NEED_FETCH == state) { // We're calling work() on ixscans and they have no way to return a fetch. verify(false); } else if (PlanStage::NEED_TIME == state) { // We are a blocking stage, so ignore scanner's request for more time. } else { verify(PlanStage::FAILURE == state); warning() << "error from index scan during text stage: invalid FAILURE state"; return PlanStage::FAILURE; } } // Filter for phrases and negative terms, score and truncate. for (ScoreMap::iterator i = scores.begin(); i != scores.end(); ++i) { DiskLoc loc = i->first; double score = i->second; // Ignore non-matched documents. if (score < 0) { continue; } // Filter for phrases and negated terms if (_params.query.hasNonTermPieces()) { if (!_ftsMatcher.matchesNonTerm(loc.obj())) { continue; } } // Add results to working set as LOC_AND_UNOWNED_OBJ initially. // On invalidation, we copy the object and change the state to // OWNED_OBJ. // Fill out a WSM. WorkingSetID id = _ws->allocate(); WorkingSetMember* member = _ws->get(id); member->loc = loc; member->obj = member->loc.obj(); member->state = WorkingSetMember::LOC_AND_UNOWNED_OBJ; member->addComputed(new TextScoreComputedData(score)); _results.push_back(id); _wsidByDiskLoc[member->loc] = id; } _filledOutResults = true; if (_results.size() == 0) { return PlanStage::IS_EOF; } return PlanStage::NEED_TIME; }