void SearchDataI::splitPY(const string& pinyin,unsigned int start,int num,vector<string> subStr,CollectorPtr collector) { ostringstream osstmp; for (unsigned int k = 0; k < subStr.size(); k++) { osstmp << subStr[k] << " "; } MCE_DEBUG("splitPY: pingyin:" << pinyin << " start:" << start << " num:" << num << " subStr:" << osstmp.str()); if(start>=pinyin.size()) { ostringstream oss; for(int i=0;i<num;++i){ oss << subStr[i] << " "; } MCE_DEBUG("split res: "<<oss.str()); ScorerPtr sp = pyIndex_.getScorer(subStr,num); if(sp) sp->score(collector); return; }else { for(unsigned int end=start+1;end<=pinyin.size();++end) { string temp = pinyin.substr(start,end-start); if(Dictionary::instance().checkSyntax(temp)){ subStr[num]=temp; splitPY(pinyin,end,num+1,subStr,collector); } } } }
virtual void collect(int32_t doc) { double score = scorer->score(); doc = doc + base; docs.add(newLucene<TestHit>(doc, score)); BOOST_CHECK(score > 0); BOOST_CHECK(doc == 0 || doc == 5); }
void WorkplaceSearchDataI::splitPY(const string& pinyin,unsigned int start,int num,vector<string> subStr,CollectorPtr collector) { TimeCost tc = TimeCost::create("WorkplaceSearchDataI::splitPY",1); vector<string> resultStr; getSubStr(pinyin, start, num, subStr, resultStr); ostringstream osstmp; for (unsigned int k = 0; k < resultStr.size(); k++) { osstmp << resultStr[k] << " "; MCE_DEBUG("resultStr[" << k << "]:" << resultStr[k]); } tc.step(osstmp.str()); MCE_DEBUG("WorkplaceSearchDataI::splitPY: pingyin:" << pinyin << " start:" << start << " num:" << num << " resultStr:" << osstmp.str() << " resultStr.size:" << resultStr.size()); //ScorerPtr sp = pyIndex_.getScorer(resultStr,resultStr.size()); ScorerPtr sp = getScorer(resultStr,resultStr.size()); if(sp) sp->score(collector); tc.step(pinyin); }
void QueryExecutor::execute(HitCollectorPtr& pCollector, ScorerPtr& pScorer, const QueryTracerPtr& pTracer) { //TODO: allocate from mempool MatchedDocSet machedDocs(m_pPool, DEFAULT_MATCHED_DOC_BUFFER_SIZE, size()); machedDocs.setTracer(pTracer); QueryFeature queryFeature; queryFeature.reserve(size()); extractFeature(queryFeature); pScorer->beginQuery(m_pFeatureProvider, queryFeature); while (advance(machedDocs) > 0) { FX_TRACE("Matched doc count: [%u]", (uint32_t)machedDocs.size()); pScorer->score(machedDocs); pCollector->collect(machedDocs); machedDocs.reset(); } pScorer->endQuery(); }
Str2StrMapSeq SearchDataI::search(const string& query, int begin, int limit) { TimeCost tc = TimeCost::create("SearchDataI::Search",1); vector<string> words; splitWord(query,words); CollectorPtr collector = new Collector(100); if(StrUtil::mayPinyin(query,words)){ MCE_DEBUG("start py search: " << query); vector<string> subStr(query.size()); splitPY(query,0,0,subStr,collector); MCE_DEBUG("end py search"); } ScorerPtr sp = hzIndex_.getScorer(words,words.size()); if(sp == 0){ MCE_DEBUG("zero"); }else{ MCE_DEBUG("get "<<sp.get()); sp->score(collector); } vector<int> ids = collector->topDocs(); int size = ids.size(); if (size <= begin) { Str2StrMapSeq res; return res; } int total = begin + limit; if (total > size) { total = size; } Str2StrMapSeq res; for(int i = begin; i < total; ++i){ res.push_back(stores_[ids[i]]); } return res; }
ScoreCachingWrappingScorer::ScoreCachingWrappingScorer(ScorerPtr scorer) : Scorer(scorer->getSimilarity()) { this->curDoc = -1; this->curScore = 0.0; this->_scorer = scorer; }