Ejemplo n.º 1
0
void LLSpellChecker::addToIgnoreList(const std::string& word)
{
	std::string word_lower(word);
	LLStringUtil::toLower(word_lower);
	if (mIgnoreList.end() != std::find(mIgnoreList.begin(), mIgnoreList.end(), word_lower))
	{
		mIgnoreList.push_back(word_lower);
		addToDictFile(getDictionaryUserPath() + mDictFile + DICT_IGNORE_SUFFIX + ".dic", word_lower);
		sSettingsChangeSignal();
	}
}
Ejemplo n.º 2
0
bool LLSpellChecker::checkSpelling(const std::string& word) const
{
	if ( (!mHunspell) || (word.length() < 3) || (0 != mHunspell->spell(word.c_str())) )
	{
		return true;
	}
	if (mIgnoreList.size() > 0)
	{
		std::string word_lower(word);
		LLStringUtil::toLower(word_lower);
		return (mIgnoreList.end() != std::find(mIgnoreList.begin(), mIgnoreList.end(), word_lower));
	}
	return false;
}
void CoreferenceDictionary::ReadGenderNumberStatistics() {
  CoreferenceOptions *options =
    static_cast<CoreferenceOptions*>(pipe_->GetOptions());

  word_alphabet_.AllowGrowth();
  word_lower_alphabet_.AllowGrowth();

  gender_number_statistics_.Clear();

  if (options->use_gender_number_statistics() &&
      options->file_gender_number_statistics() != "") {
    LOG(INFO) << "Loading gender/number statistics file "
      << options->file_gender_number_statistics() << "...";
    std::ifstream is;
    std::string line;

    // Read the gender/number statistics, one per line.
    is.open(options->file_gender_number_statistics().c_str(), ifstream::in);
    CHECK(is.good()) << "Could not open "
      << options->file_gender_number_statistics() << ".";
    if (is.is_open()) {
      while (!is.eof()) {
        getline(is, line);
        if (line == "") continue; // Ignore blank lines.
        std::vector<std::string> fields;
        StringSplit(line, "\t", &fields, true); // Break on tabs.
        CHECK_EQ(fields.size(), 2);
        const std::string &phrase = fields[0];
        const std::string &statistics = fields[1];
        std::vector<std::string> words;
        StringSplit(phrase, " ", &words, true); // Break on spaces.
        std::vector<int> phrase_ids;
        for (int i = 0; i < words.size(); ++i) {
          const std::string &word = words[i];
          std::string word_lower(word);
          transform(word_lower.begin(), word_lower.end(), word_lower.begin(),
                    ::tolower);

          int word_id = word_alphabet_.Insert(word);

          // Add lower-case form to the alphabet.
          // TODO(atm): "sanitize" words, by escaping digit sequences:
          // word = re.sub('[\d]+', '#', word.lower())
          int word_lower_id = word_lower_alphabet_.Insert(word_lower);
          phrase_ids.push_back(word_lower_id);
        }

        std::vector<std::string> subfields;
        StringSplit(statistics, " ", &subfields, true); // Break on spaces.
        CHECK_EQ(subfields.size(), 4);
        std::vector<int> counts;
        for (int i = 0; i < subfields.size(); ++i) {
          std::stringstream ss(subfields[i]);
          int count;
          ss >> count;
          counts.push_back(count);
        }

        if (!gender_number_statistics_.AddPhrase(phrase_ids, counts)) {
          LOG(INFO) << "Repeated phrase: " << phrase;
        }
      }
    }
    is.close();
  }