Esempio n. 1
0
// Initialize for potentially a set of languages defined by the language
// string and recursively any additional languages required by any language
// traineddata file (via tessedit_load_sublangs in its config) that is loaded.
// See init_tesseract_internal for args.
int Tesseract::init_tesseract(
    const char *arg0, const char *textbase, const char *language,
    OcrEngineMode oem, char **configs, int configs_size,
    const GenericVector<STRING> *vars_vec,
    const GenericVector<STRING> *vars_values,
    bool set_only_non_debug_params) {
  GenericVector<STRING> langs_to_load;
  GenericVector<STRING> langs_not_to_load;
  ParseLanguageString(language, &langs_to_load, &langs_not_to_load);

  sub_langs_.delete_data_pointers();
  sub_langs_.clear();
  // Find the first loadable lang and load into this.
  // Add any languages that this language requires
  bool loaded_primary = false;
  // Load the rest into sub_langs_.
  for (int lang_index = 0; lang_index < langs_to_load.size(); ++lang_index) {
    if (!IsStrInList(langs_to_load[lang_index], langs_not_to_load)) {
      const char *lang_str = langs_to_load[lang_index].string();
      Tesseract *tess_to_init;
      if (!loaded_primary) {
        tess_to_init = this;
      } else {
        tess_to_init = new Tesseract;
      }

      int result = tess_to_init->init_tesseract_internal(
          arg0, textbase, lang_str, oem, configs, configs_size,
          vars_vec, vars_values, set_only_non_debug_params);

      if (!loaded_primary) {
        if (result < 0) {
          tprintf("Failed loading language '%s'\n", lang_str);
        } else {
          if (tessdata_manager_debug_level)
            tprintf("Loaded language '%s' as main language\n", lang_str);
          ParseLanguageString(tess_to_init->tessedit_load_sublangs.string(),
                              &langs_to_load, &langs_not_to_load);
          loaded_primary = true;
        }
      } else {
        if (result < 0) {
          tprintf("Failed loading language '%s'\n", lang_str);
          delete tess_to_init;
        } else {
          if (tessdata_manager_debug_level)
            tprintf("Loaded language '%s' as secondary language\n", lang_str);
          sub_langs_.push_back(tess_to_init);
          // Add any languages that this language requires
          ParseLanguageString(tess_to_init->tessedit_load_sublangs.string(),
                              &langs_to_load, &langs_not_to_load);
        }
      }
    }
  }
  if (!loaded_primary) {
    tprintf("Tesseract couldn't load any languages!\n");
    return -1;  // Couldn't load any language!
  }
  if (!sub_langs_.empty()) {
    // In multilingual mode word ratings have to be directly comparable,
    // so use the same language model weights for all languages:
    // use the primary language's params model if
    // tessedit_use_primary_params_model is set,
    // otherwise use default language model weights.
    if (tessedit_use_primary_params_model) {
      for (int s = 0; s < sub_langs_.size(); ++s) {
        sub_langs_[s]->language_model_->getParamsModel().Copy(
            this->language_model_->getParamsModel());
      }
      tprintf("Using params model of the primary language\n");
      if (tessdata_manager_debug_level)  {
        this->language_model_->getParamsModel().Print();
      }
    } else {
      this->language_model_->getParamsModel().Clear();
      for (int s = 0; s < sub_langs_.size(); ++s) {
        sub_langs_[s]->language_model_->getParamsModel().Clear();
      }
      tprintf("Using default language params\n");
    }
  }

  SetupUniversalFontIds();
  return 0;
}
Esempio n. 2
0
// Initialize for potentially a set of languages defined by the language
// string and recursively any additional languages required by any language
// traineddata file (via tessedit_load_sublangs in its config) that is loaded.
// See init_tesseract_internal for args.
int Tesseract::init_tesseract(
    const char *arg0, const char *textbase, const char *language,
    OcrEngineMode oem, char **configs, int configs_size,
    const GenericVector<STRING> *vars_vec,
    const GenericVector<STRING> *vars_values,
    bool set_only_non_debug_params) {
  GenericVector<STRING> langs_to_load;
  GenericVector<STRING> langs_not_to_load;
  ParseLanguageString(language, &langs_to_load, &langs_not_to_load);

  sub_langs_.delete_data_pointers();
  sub_langs_.clear();
  // Find the first loadable lang and load into this.
  // Add any languages that this language requires
  bool loaded_primary = false;
  // Load the rest into sub_langs_.
  for (int lang_index = 0; lang_index < langs_to_load.size(); ++lang_index) {
    if (!IsStrInList(langs_to_load[lang_index], langs_not_to_load)) {
      const char *lang_str = langs_to_load[lang_index].string();
      Tesseract *tess_to_init;
      if (!loaded_primary) {
        tess_to_init = this;
      } else {
        tess_to_init = new Tesseract;
      }

      int result = tess_to_init->init_tesseract_internal(
          arg0, textbase, lang_str, oem, configs, configs_size,
          vars_vec, vars_values, set_only_non_debug_params);

      if (!loaded_primary) {
        if (result < 0) {
          tprintf("Failed loading language '%s'\n", lang_str);
        } else {
          if (tessdata_manager_debug_level)
            tprintf("Loaded language '%s' as main language\n", lang_str);
          ParseLanguageString(tess_to_init->tessedit_load_sublangs.string(),
                              &langs_to_load, &langs_not_to_load);
          loaded_primary = true;
        }
      } else {
        if (result < 0) {
          tprintf("Failed loading language '%s'\n", lang_str);
          delete tess_to_init;
        } else {
          if (tessdata_manager_debug_level)
            tprintf("Loaded language '%s' as secondary language\n", lang_str);
          sub_langs_.push_back(tess_to_init);
          // Add any languages that this language requires
          ParseLanguageString(tess_to_init->tessedit_load_sublangs.string(),
                              &langs_to_load, &langs_not_to_load);
        }
      }
    }
  }
  if (!loaded_primary) {
    tprintf("Tesseract couldn't load any languages!\n");
    return -1;  // Couldn't load any language!
  }
  SetupUniversalFontIds();
  return 0;
}