Beispiel #1
0
        /*
         * GO: sets the tree cursors on each term in terms,  processes the terms by advancing
         * the terms cursors and storing the partial
         * results and lastly calculates the top results
         * @param results, the priority queue containing the top results
         * @param limit, number of results in the priority queue
         */
        void FTSSearch::go(Results* results, unsigned limit ) {
            vector< shared_ptr<BtreeCursor> > cursors;

            for ( unsigned i = 0; i < _query.getTerms().size(); i++ ) {
                const string& term = _query.getTerms()[i];
                BSONObj min = FTSIndexFormat::getIndexKey( MAX_WEIGHT, term, _indexPrefix );
                BSONObj max = FTSIndexFormat::getIndexKey( 0, term, _indexPrefix );
                shared_ptr<BtreeCursor> c( BtreeCursor::make( _ns, _id, min, max, true, -1 ) );
                cursors.push_back( c );
            }

            while ( !inShutdown() ) {
                bool gotAny = false;
                for ( unsigned i = 0; i < cursors.size(); i++ ) {
                    if ( cursors[i]->eof() )
                        continue;
                    gotAny = true;
                    _process( cursors[i].get() );
                    cursors[i]->advance();
                }

                if ( !gotAny )
                    break;

                RARELY killCurrentOp.checkForInterrupt();
            }


            // priority queue using a compare that grabs the lowest of two ScoredLocations by score.
            for ( Scores::iterator i = _scores.begin(); i != _scores.end(); ++i ) {

                if ( i->second < 0 )
                    continue;

                // priority queue
                if ( results->size() < limit ) { // case a: queue unfilled

                    if ( !_ok( i->first ) )
                        continue;

                    results->push( ScoredLocation( i->first, i->second ) );

                }
                else if ( i->second > results->top().score ) { // case b: queue filled

                    if ( !_ok( i->first ) )
                        continue;

                    results->pop();
                    results->push( ScoredLocation( i->first, i->second ) );
                }
                else {
                    // else do nothing (case c)
                }

            }

        }
Beispiel #2
0
    PlanStage::StageState TextStage::fillOutResults() {
        Database* db = cc().database();
        Collection* collection = db->getCollection( _params.ns );
        if (NULL == collection) {
            warning() << "TextStage params namespace error";
            return PlanStage::FAILURE;
        }
        vector<int> idxMatches;
        collection->details()->findIndexByType("text", idxMatches);
        if (1 != idxMatches.size()) {
            warning() << "Expected exactly one text index";
            return PlanStage::FAILURE;
        }

        // Get all the index scans for each term in our query.
        vector<IndexScan*> scanners;
        for (size_t i = 0; i < _params.query.getTerms().size(); i++) {
            const string& term = _params.query.getTerms()[i];
            IndexScanParams params;
            params.bounds.startKey = FTSIndexFormat::getIndexKey(MAX_WEIGHT, term,
                                                                 _params.indexPrefix);
            params.bounds.endKey = FTSIndexFormat::getIndexKey(0, term, _params.indexPrefix);
            params.bounds.endKeyInclusive = true;
            params.bounds.isSimpleRange = true;
            params.descriptor = collection->getIndexCatalog()->getDescriptor(idxMatches[0]);
            params.forceBtreeAccessMethod = true;
            params.direction = -1;
            IndexScan* ixscan = new IndexScan(params, _ws, NULL);
            scanners.push_back(ixscan);
        }

        // For each index scan, read all results and store scores.
        size_t currentIndexScanner = 0;
        while (currentIndexScanner < scanners.size()) {
            BSONObj keyObj;
            DiskLoc loc;

            WorkingSetID id;
            PlanStage::StageState state = scanners[currentIndexScanner]->work(&id);

            if (PlanStage::ADVANCED == state) {
                WorkingSetMember* wsm = _ws->get(id);
                IndexKeyDatum& keyDatum = wsm->keyData.back();
                filterAndScore(keyDatum.keyData, wsm->loc);
                _ws->free(id);
            }
            else if (PlanStage::IS_EOF == state) {
                // Done with this scan.
                ++currentIndexScanner;
            }
            else if (PlanStage::NEED_FETCH == state) {
                // We're calling work() on ixscans and they have no way to return a fetch.
                verify(false);
            }
            else if (PlanStage::NEED_TIME == state) {
                // We are a blocking stage, so ignore scanner's request for more time.
            }
            else {
                verify(PlanStage::FAILURE == state);
                warning() << "error from index scan during text stage: invalid FAILURE state";
                for (size_t i=0; i<scanners.size(); ++i) { delete scanners[i]; }
                return PlanStage::FAILURE;
            }
        }

        for (size_t i=0; i<scanners.size(); ++i) { delete scanners[i]; }

        // Filter for phrases and negative terms, score and truncate.
        for (ScoreMap::iterator i = _scores.begin(); i != _scores.end(); ++i) {
            DiskLoc loc = i->first;
            double score = i->second;

            // Ignore non-matched documents.
            if (score < 0) {
                continue;
            }

            // Filter for phrases and negated terms
            if (_params.query.hasNonTermPieces()) {
                Record* rec_p = loc.rec();
                if (!_ftsMatcher.matchesNonTerm(BSONObj::make(rec_p))) {
                    continue;
                }
            }
            _results.push_back(ScoredLocation(loc, score));
        }

        // Sort results by score (not always in correct order, especially w.r.t. multiterm).
        sort(_results.begin(), _results.end());

        if (_results.size() > _params.limit) {
            _results.resize(_params.limit);
        }

        _filledOutResults = true;

        if (_results.size() == 0) {
            return PlanStage::IS_EOF;
        }
        return PlanStage::NEED_TIME;
    }