PlanStage::StageState TextStage::fillOutResults() { Database* db = cc().database(); Collection* collection = db->getCollection( _params.ns ); if (NULL == collection) { warning() << "TextStage params namespace error"; return PlanStage::FAILURE; } vector<int> idxMatches; collection->details()->findIndexByType("text", idxMatches); if (1 != idxMatches.size()) { warning() << "Expected exactly one text index"; return PlanStage::FAILURE; } // Get all the index scans for each term in our query. vector<IndexScan*> scanners; for (size_t i = 0; i < _params.query.getTerms().size(); i++) { const string& term = _params.query.getTerms()[i]; IndexScanParams params; params.bounds.startKey = FTSIndexFormat::getIndexKey(MAX_WEIGHT, term, _params.indexPrefix); params.bounds.endKey = FTSIndexFormat::getIndexKey(0, term, _params.indexPrefix); params.bounds.endKeyInclusive = true; params.bounds.isSimpleRange = true; params.descriptor = collection->getIndexCatalog()->getDescriptor(idxMatches[0]); params.forceBtreeAccessMethod = true; params.direction = -1; IndexScan* ixscan = new IndexScan(params, _ws, NULL); scanners.push_back(ixscan); } // For each index scan, read all results and store scores. size_t currentIndexScanner = 0; while (currentIndexScanner < scanners.size()) { BSONObj keyObj; DiskLoc loc; WorkingSetID id; PlanStage::StageState state = scanners[currentIndexScanner]->work(&id); if (PlanStage::ADVANCED == state) { WorkingSetMember* wsm = _ws->get(id); IndexKeyDatum& keyDatum = wsm->keyData.back(); filterAndScore(keyDatum.keyData, wsm->loc); _ws->free(id); } else if (PlanStage::IS_EOF == state) { // Done with this scan. ++currentIndexScanner; } else if (PlanStage::NEED_FETCH == state) { // We're calling work() on ixscans and they have no way to return a fetch. verify(false); } else if (PlanStage::NEED_TIME == state) { // We are a blocking stage, so ignore scanner's request for more time. } else { verify(PlanStage::FAILURE == state); warning() << "error from index scan during text stage: invalid FAILURE state"; for (size_t i=0; i<scanners.size(); ++i) { delete scanners[i]; } return PlanStage::FAILURE; } } for (size_t i=0; i<scanners.size(); ++i) { delete scanners[i]; } // Filter for phrases and negative terms, score and truncate. for (ScoreMap::iterator i = _scores.begin(); i != _scores.end(); ++i) { DiskLoc loc = i->first; double score = i->second; // Ignore non-matched documents. if (score < 0) { continue; } // Filter for phrases and negated terms if (_params.query.hasNonTermPieces()) { Record* rec_p = loc.rec(); if (!_ftsMatcher.matchesNonTerm(BSONObj::make(rec_p))) { continue; } } _results.push_back(ScoredLocation(loc, score)); } // Sort results by score (not always in correct order, especially w.r.t. multiterm). sort(_results.begin(), _results.end()); if (_results.size() > _params.limit) { _results.resize(_params.limit); } _filledOutResults = true; if (_results.size() == 0) { return PlanStage::IS_EOF; } return PlanStage::NEED_TIME; }
PlanStage::StageState TextStage::fillOutResults() { Database* db = cc().database(); Collection* collection = db->getCollection( _params.ns ); if (NULL == collection) { warning() << "TextStage params namespace error"; return PlanStage::FAILURE; } vector<IndexDescriptor*> idxMatches; collection->getIndexCatalog()->findIndexByType("text", idxMatches); if (1 != idxMatches.size()) { warning() << "Expected exactly one text index"; return PlanStage::FAILURE; } // Get all the index scans for each term in our query. OwnedPointerVector<PlanStage> scanners; for (size_t i = 0; i < _params.query.getTerms().size(); i++) { const string& term = _params.query.getTerms()[i]; IndexScanParams params; params.bounds.startKey = FTSIndexFormat::getIndexKey(MAX_WEIGHT, term, _params.indexPrefix); params.bounds.endKey = FTSIndexFormat::getIndexKey(0, term, _params.indexPrefix); params.bounds.endKeyInclusive = true; params.bounds.isSimpleRange = true; params.descriptor = idxMatches[0]; params.direction = -1; IndexScan* ixscan = new IndexScan(params, _ws, NULL); scanners.mutableVector().push_back(ixscan); } // Map: diskloc -> aggregate score for doc. typedef unordered_map<DiskLoc, double, DiskLoc::Hasher> ScoreMap; ScoreMap scores; // For each index scan, read all results and store scores. size_t currentIndexScanner = 0; while (currentIndexScanner < scanners.size()) { BSONObj keyObj; DiskLoc loc; WorkingSetID id; PlanStage::StageState state = scanners.vector()[currentIndexScanner]->work(&id); if (PlanStage::ADVANCED == state) { WorkingSetMember* wsm = _ws->get(id); IndexKeyDatum& keyDatum = wsm->keyData.back(); filterAndScore(keyDatum.keyData, wsm->loc, &scores[wsm->loc]); _ws->free(id); } else if (PlanStage::IS_EOF == state) { // Done with this scan. ++currentIndexScanner; } else if (PlanStage::NEED_FETCH == state) { // We're calling work() on ixscans and they have no way to return a fetch. verify(false); } else if (PlanStage::NEED_TIME == state) { // We are a blocking stage, so ignore scanner's request for more time. } else { verify(PlanStage::FAILURE == state); warning() << "error from index scan during text stage: invalid FAILURE state"; return PlanStage::FAILURE; } } // Filter for phrases and negative terms, score and truncate. for (ScoreMap::iterator i = scores.begin(); i != scores.end(); ++i) { DiskLoc loc = i->first; double score = i->second; // Ignore non-matched documents. if (score < 0) { continue; } // Filter for phrases and negated terms if (_params.query.hasNonTermPieces()) { if (!_ftsMatcher.matchesNonTerm(loc.obj())) { continue; } } // Add results to working set as LOC_AND_UNOWNED_OBJ initially. // On invalidation, we copy the object and change the state to // OWNED_OBJ. // Fill out a WSM. WorkingSetID id = _ws->allocate(); WorkingSetMember* member = _ws->get(id); member->loc = loc; member->obj = member->loc.obj(); member->state = WorkingSetMember::LOC_AND_UNOWNED_OBJ; member->addComputed(new TextScoreComputedData(score)); _results.push_back(id); _wsidByDiskLoc[member->loc] = id; } _filledOutResults = true; if (_results.size() == 0) { return PlanStage::IS_EOF; } return PlanStage::NEED_TIME; }