Ejemplo n.º 1
0
    PlanStage::StageState TextStage::fillOutResults() {
        Database* db = cc().database();
        Collection* collection = db->getCollection( _params.ns );
        if (NULL == collection) {
            warning() << "TextStage params namespace error";
            return PlanStage::FAILURE;
        }
        vector<int> idxMatches;
        collection->details()->findIndexByType("text", idxMatches);
        if (1 != idxMatches.size()) {
            warning() << "Expected exactly one text index";
            return PlanStage::FAILURE;
        }

        // Get all the index scans for each term in our query.
        vector<IndexScan*> scanners;
        for (size_t i = 0; i < _params.query.getTerms().size(); i++) {
            const string& term = _params.query.getTerms()[i];
            IndexScanParams params;
            params.bounds.startKey = FTSIndexFormat::getIndexKey(MAX_WEIGHT, term,
                                                                 _params.indexPrefix);
            params.bounds.endKey = FTSIndexFormat::getIndexKey(0, term, _params.indexPrefix);
            params.bounds.endKeyInclusive = true;
            params.bounds.isSimpleRange = true;
            params.descriptor = collection->getIndexCatalog()->getDescriptor(idxMatches[0]);
            params.forceBtreeAccessMethod = true;
            params.direction = -1;
            IndexScan* ixscan = new IndexScan(params, _ws, NULL);
            scanners.push_back(ixscan);
        }

        // For each index scan, read all results and store scores.
        size_t currentIndexScanner = 0;
        while (currentIndexScanner < scanners.size()) {
            BSONObj keyObj;
            DiskLoc loc;

            WorkingSetID id;
            PlanStage::StageState state = scanners[currentIndexScanner]->work(&id);

            if (PlanStage::ADVANCED == state) {
                WorkingSetMember* wsm = _ws->get(id);
                IndexKeyDatum& keyDatum = wsm->keyData.back();
                filterAndScore(keyDatum.keyData, wsm->loc);
                _ws->free(id);
            }
            else if (PlanStage::IS_EOF == state) {
                // Done with this scan.
                ++currentIndexScanner;
            }
            else if (PlanStage::NEED_FETCH == state) {
                // We're calling work() on ixscans and they have no way to return a fetch.
                verify(false);
            }
            else if (PlanStage::NEED_TIME == state) {
                // We are a blocking stage, so ignore scanner's request for more time.
            }
            else {
                verify(PlanStage::FAILURE == state);
                warning() << "error from index scan during text stage: invalid FAILURE state";
                for (size_t i=0; i<scanners.size(); ++i) { delete scanners[i]; }
                return PlanStage::FAILURE;
            }
        }

        for (size_t i=0; i<scanners.size(); ++i) { delete scanners[i]; }

        // Filter for phrases and negative terms, score and truncate.
        for (ScoreMap::iterator i = _scores.begin(); i != _scores.end(); ++i) {
            DiskLoc loc = i->first;
            double score = i->second;

            // Ignore non-matched documents.
            if (score < 0) {
                continue;
            }

            // Filter for phrases and negated terms
            if (_params.query.hasNonTermPieces()) {
                Record* rec_p = loc.rec();
                if (!_ftsMatcher.matchesNonTerm(BSONObj::make(rec_p))) {
                    continue;
                }
            }
            _results.push_back(ScoredLocation(loc, score));
        }

        // Sort results by score (not always in correct order, especially w.r.t. multiterm).
        sort(_results.begin(), _results.end());

        if (_results.size() > _params.limit) {
            _results.resize(_params.limit);
        }

        _filledOutResults = true;

        if (_results.size() == 0) {
            return PlanStage::IS_EOF;
        }
        return PlanStage::NEED_TIME;
    }
Ejemplo n.º 2
0
    PlanStage::StageState TextStage::fillOutResults() {
        Database* db = cc().database();
        Collection* collection = db->getCollection( _params.ns );
        if (NULL == collection) {
            warning() << "TextStage params namespace error";
            return PlanStage::FAILURE;
        }
        vector<IndexDescriptor*> idxMatches;
        collection->getIndexCatalog()->findIndexByType("text", idxMatches);
        if (1 != idxMatches.size()) {
            warning() << "Expected exactly one text index";
            return PlanStage::FAILURE;
        }

        // Get all the index scans for each term in our query.
        OwnedPointerVector<PlanStage> scanners;
        for (size_t i = 0; i < _params.query.getTerms().size(); i++) {
            const string& term = _params.query.getTerms()[i];
            IndexScanParams params;
            params.bounds.startKey = FTSIndexFormat::getIndexKey(MAX_WEIGHT, term,
                                                                 _params.indexPrefix);
            params.bounds.endKey = FTSIndexFormat::getIndexKey(0, term, _params.indexPrefix);
            params.bounds.endKeyInclusive = true;
            params.bounds.isSimpleRange = true;
            params.descriptor = idxMatches[0];
            params.direction = -1;
            IndexScan* ixscan = new IndexScan(params, _ws, NULL);
            scanners.mutableVector().push_back(ixscan);
        }

        // Map: diskloc -> aggregate score for doc.
        typedef unordered_map<DiskLoc, double, DiskLoc::Hasher> ScoreMap;
        ScoreMap scores;

        // For each index scan, read all results and store scores.
        size_t currentIndexScanner = 0;
        while (currentIndexScanner < scanners.size()) {
            BSONObj keyObj;
            DiskLoc loc;

            WorkingSetID id;
            PlanStage::StageState state = scanners.vector()[currentIndexScanner]->work(&id);

            if (PlanStage::ADVANCED == state) {
                WorkingSetMember* wsm = _ws->get(id);
                IndexKeyDatum& keyDatum = wsm->keyData.back();
                filterAndScore(keyDatum.keyData, wsm->loc, &scores[wsm->loc]);
                _ws->free(id);
            }
            else if (PlanStage::IS_EOF == state) {
                // Done with this scan.
                ++currentIndexScanner;
            }
            else if (PlanStage::NEED_FETCH == state) {
                // We're calling work() on ixscans and they have no way to return a fetch.
                verify(false);
            }
            else if (PlanStage::NEED_TIME == state) {
                // We are a blocking stage, so ignore scanner's request for more time.
            }
            else {
                verify(PlanStage::FAILURE == state);
                warning() << "error from index scan during text stage: invalid FAILURE state";
                return PlanStage::FAILURE;
            }
        }

        // Filter for phrases and negative terms, score and truncate.
        for (ScoreMap::iterator i = scores.begin(); i != scores.end(); ++i) {
            DiskLoc loc = i->first;
            double score = i->second;

            // Ignore non-matched documents.
            if (score < 0) {
                continue;
            }

            // Filter for phrases and negated terms
            if (_params.query.hasNonTermPieces()) {
                if (!_ftsMatcher.matchesNonTerm(loc.obj())) {
                    continue;
                }
            }

            // Add results to working set as LOC_AND_UNOWNED_OBJ initially.
            // On invalidation, we copy the object and change the state to
            // OWNED_OBJ.
            // Fill out a WSM.
            WorkingSetID id = _ws->allocate();
            WorkingSetMember* member = _ws->get(id);
            member->loc = loc;
            member->obj = member->loc.obj();
            member->state = WorkingSetMember::LOC_AND_UNOWNED_OBJ;
            member->addComputed(new TextScoreComputedData(score));

            _results.push_back(id);
            _wsidByDiskLoc[member->loc] = id;
        }

        _filledOutResults = true;

        if (_results.size() == 0) {
            return PlanStage::IS_EOF;
        }
        return PlanStage::NEED_TIME;
    }