Beispiel #1
0
array<string>
tt_analyze (string family) {
  array<string> r;
  font fn= tt_font (family, 10, 1200);
  font_metric fnm= tt_font_metric (family, 10, 1200);
  //cout << "Analyzing " << family << "\n";

  get_glyph_fatal= false;
  analyze_range (fn, fnm, r, family);
  analyze_special (fn, fnm, r);
  analyze_major (fn, fnm, r);
  //analyze_trace (fn, fnm, r);
  get_glyph_fatal= true;

  //cout << "  -> " << r << "\n";
  cout << r << " " << family << "\n";
  return r;
}
Beispiel #2
0
int generic_morpho::analyze(string_piece form, guesser_mode guesser, vector<tagged_lemma>& lemmas) const {
  lemmas.clear();

  if (form.len) {
    // Generate all casing variants if needed (they are different than given form).
    string form_uclc; // first uppercase, rest lowercase
    string form_lc;   // all lowercase
    generate_casing_variants(form, form_uclc, form_lc);

    // Start by analysing using the dictionary and all casing variants.
    dictionary.analyze(form, lemmas);
    if (!form_uclc.empty()) dictionary.analyze(form_uclc, lemmas);
    if (!form_lc.empty()) dictionary.analyze(form_lc, lemmas);
    if (!lemmas.empty()) return NO_GUESSER;

    // Then call analyze_special to handle numbers, punctuation and symbols.
    analyze_special(form, lemmas);
    if (!lemmas.empty()) return NO_GUESSER;

    // For the statistical guesser, use all casing variants.
    if (guesser == GUESSER && statistical_guesser) {
      if (form_uclc.empty() && form_lc.empty())
        statistical_guesser->analyze(form, lemmas, nullptr);
      else {
        morpho_statistical_guesser::used_rules used_rules; used_rules.reserve(3);
        statistical_guesser->analyze(form, lemmas, &used_rules);
        if (!form_uclc.empty()) statistical_guesser->analyze(form_uclc, lemmas, &used_rules);
        if (!form_lc.empty()) statistical_guesser->analyze(form_lc, lemmas, &used_rules);
      }
    }
    if (!lemmas.empty()) return GUESSER;
  }

  lemmas.emplace_back(string(form.str, form.len), unknown_tag);
  return -1;
}