void LLSpellChecker::addToIgnoreList(const std::string& word) { std::string word_lower(word); LLStringUtil::toLower(word_lower); if (mIgnoreList.end() != std::find(mIgnoreList.begin(), mIgnoreList.end(), word_lower)) { mIgnoreList.push_back(word_lower); addToDictFile(getDictionaryUserPath() + mDictFile + DICT_IGNORE_SUFFIX + ".dic", word_lower); sSettingsChangeSignal(); } }
bool LLSpellChecker::checkSpelling(const std::string& word) const { if ( (!mHunspell) || (word.length() < 3) || (0 != mHunspell->spell(word.c_str())) ) { return true; } if (mIgnoreList.size() > 0) { std::string word_lower(word); LLStringUtil::toLower(word_lower); return (mIgnoreList.end() != std::find(mIgnoreList.begin(), mIgnoreList.end(), word_lower)); } return false; }
void CoreferenceDictionary::ReadGenderNumberStatistics() { CoreferenceOptions *options = static_cast<CoreferenceOptions*>(pipe_->GetOptions()); word_alphabet_.AllowGrowth(); word_lower_alphabet_.AllowGrowth(); gender_number_statistics_.Clear(); if (options->use_gender_number_statistics() && options->file_gender_number_statistics() != "") { LOG(INFO) << "Loading gender/number statistics file " << options->file_gender_number_statistics() << "..."; std::ifstream is; std::string line; // Read the gender/number statistics, one per line. is.open(options->file_gender_number_statistics().c_str(), ifstream::in); CHECK(is.good()) << "Could not open " << options->file_gender_number_statistics() << "."; if (is.is_open()) { while (!is.eof()) { getline(is, line); if (line == "") continue; // Ignore blank lines. std::vector<std::string> fields; StringSplit(line, "\t", &fields, true); // Break on tabs. CHECK_EQ(fields.size(), 2); const std::string &phrase = fields[0]; const std::string &statistics = fields[1]; std::vector<std::string> words; StringSplit(phrase, " ", &words, true); // Break on spaces. std::vector<int> phrase_ids; for (int i = 0; i < words.size(); ++i) { const std::string &word = words[i]; std::string word_lower(word); transform(word_lower.begin(), word_lower.end(), word_lower.begin(), ::tolower); int word_id = word_alphabet_.Insert(word); // Add lower-case form to the alphabet. // TODO(atm): "sanitize" words, by escaping digit sequences: // word = re.sub('[\d]+', '#', word.lower()) int word_lower_id = word_lower_alphabet_.Insert(word_lower); phrase_ids.push_back(word_lower_id); } std::vector<std::string> subfields; StringSplit(statistics, " ", &subfields, true); // Break on spaces. CHECK_EQ(subfields.size(), 4); std::vector<int> counts; for (int i = 0; i < subfields.size(); ++i) { std::stringstream ss(subfields[i]); int count; ss >> count; counts.push_back(count); } if (!gender_number_statistics_.AddPhrase(phrase_ids, counts)) { LOG(INFO) << "Repeated phrase: " << phrase; } } } is.close(); }