Example #1
0
bool EqualIgnoringCaseAndTerminalPunct(const WERD_CHOICE &word1,
                                       const WERD_CHOICE &word2) {
  const UNICHARSET *uchset = word1.unicharset();
  if (word2.unicharset() != uchset) return false;
  int w1start, w1end;
  word1.punct_stripped(&w1start, &w1end);
  int w2start, w2end;
  word2.punct_stripped(&w2start, &w2end);
  if (w1end - w1start != w2end - w2start) return false;
  for (int i = 0; i < w1end - w1start; i++) {
    if (uchset->to_lower(word1.unichar_id(w1start + i)) !=
        uchset->to_lower(word2.unichar_id(w2start + i))) {
        return false;
    }
  }
  return true;
}
Example #2
0
 inT16 Tesseract::count_alphas(const WERD_CHOICE &word) {
     int count = 0;
     for (int i = 0; i < word.length(); ++i) {
         if (word.unicharset()->get_isalpha(word.unichar_id(i)))
             count++;
     }
     return count;
 }
Example #3
0
// Update hyphen_word_, and copy the given DawgPositionVectors into
// hyphen_active_dawgs_.
void Dict::set_hyphen_word(const WERD_CHOICE &word,
                           const DawgPositionVector &active_dawgs) {
  if (hyphen_word_ == NULL) {
    hyphen_word_ = new WERD_CHOICE(word.unicharset());
    hyphen_word_->make_bad();
  }
  if (hyphen_word_->rating() > word.rating()) {
    *hyphen_word_ = word;
    // Remove the last unichar id as it is a hyphen, and remove
    // any unichar_string/lengths that are present.
    hyphen_word_->remove_last_unichar_id();
    hyphen_active_dawgs_ = active_dawgs;
  }
  if (hyphen_debug_level) {
    hyphen_word_->print("set_hyphen_word: ");
  }
}