void CommonLanguageAnalyzer::analyzeSynonym(TermList& outList, size_t n) { static UString SPACE(" ", izenelib::util::UString::UTF_8); TermList syOutList; size_t wordCount = outList.size(); for (size_t i = 0; i < wordCount; i++) { // cout << "[off]" <<outList[i].wordOffset_<<" [level]"<<outList[i].getLevel() <<" [andor]" <<(unsigned int)(outList[i].getAndOrBit()) // << " "<< outList[i].textString()<<endl; // find synonym for word(s) for (size_t len = 1; (len <= n) && (i+len <= wordCount) ; len++) { // with space bool ret = false; unsigned int subLevel = 0; UString combine; if (len > 1) { for (size_t j = 0; j < len-1; j++) { combine.append(outList[i+j].text_); combine.append(SPACE); } combine.append(outList[i+len-1].text_); ret = getSynonym(combine, outList[i].wordOffset_, Term::OR, outList[i].getLevel(), syOutList, subLevel); } // without space if (!ret) { combine.clear(); for (size_t j = 0; j < len; j++) combine.append(outList[i+j].text_); ret = getSynonym(combine, outList[i].wordOffset_, Term::OR, outList[i].getLevel(), syOutList, subLevel); } // adjust if (ret) { outList[i].setStats(outList[i].getAndOrBit(), outList[i].getLevel()+subLevel); for (size_t j = 1; j < len; j++) { outList[i+j].wordOffset_ = outList[i].wordOffset_; outList[i+j].setStats(outList[i+j].getAndOrBit(), outList[i].getLevel()); } break; } } syOutList.push_back(outList[i]); } outList.swap(syOutList); }