예제 #1
0
// Common initialization for a single language.
// arg0 is the datapath for the tessdata directory, which could be the
// path of the tessdata directory with no trailing /, or (if tessdata
// lives in the same directory as the executable, the path of the executable,
// hence the name arg0.
// textbase is an optional output file basename (used only for training)
// language is the language code to load.
// oem controls which engine(s) will operate on the image
// configs (argv) is an array of config filenames to load variables from.
// May be NULL.
// configs_size (argc) is the number of elements in configs.
// vars_vec is an optional vector of variables to set.
// vars_values is an optional corresponding vector of values for the variables
// in vars_vec.
// If set_only_init_params is true, then only the initialization variables
// will be set.
int Tesseract::init_tesseract_internal(
    const char *arg0, const char *textbase, const char *language,
    OcrEngineMode oem, char **configs, int configs_size,
    const GenericVector<STRING> *vars_vec,
    const GenericVector<STRING> *vars_values,
    bool set_only_non_debug_params) {
  if (!init_tesseract_lang_data(arg0, textbase, language, oem, configs,
                                configs_size, vars_vec, vars_values,
                                set_only_non_debug_params)) {
    return -1;
  }
  if (tessedit_init_config_only) {
    tessdata_manager.End();
    return 0;
  }
  // If only Cube will be used, skip loading Tesseract classifier's
  // pre-trained templates.
  bool init_tesseract_classifier =
    (tessedit_ocr_engine_mode == OEM_TESSERACT_ONLY ||
     tessedit_ocr_engine_mode == OEM_TESSERACT_CUBE_COMBINED);
  // If only Cube will be used and if it has its own Unicharset,
  // skip initializing permuter and loading Tesseract Dawgs.
  bool init_dict =
    !(tessedit_ocr_engine_mode == OEM_CUBE_ONLY &&
      tessdata_manager.SeekToStart(TESSDATA_CUBE_UNICHARSET));
  program_editup(textbase, init_tesseract_classifier, init_dict);
  tessdata_manager.End();
  return 0;                      //Normal exit
}
예제 #2
0
// init the LM component
int Tesseract::init_tesseract_lm(const char *arg0,
                   const char *textbase,
                   const char *language) {
  init_tesseract_lang_data(arg0, textbase, language, NULL, 0, false);
  getDict().init_permute();
  tessdata_manager.End();
  return 0;
}
예제 #3
0
// init the LM component
int Tesseract::init_tesseract_lm(const char *arg0,
                   const char *textbase,
                   const char *language) {
  if (!init_tesseract_lang_data(arg0, textbase, language, OEM_TESSERACT_ONLY,
                                NULL, 0, NULL, NULL, false))
    return -1;
  getDict().Load(Dict::GlobalDawgCache());
  tessdata_manager.End();
  return 0;
}
예제 #4
0
// init the LM component
int Tesseract::init_tesseract_lm(const char *arg0, const char *textbase,
                                 const char *language, TessdataManager *mgr) {
  if (!init_tesseract_lang_data(arg0, textbase, language, OEM_TESSERACT_ONLY,
                                NULL, 0, NULL, NULL, false, mgr))
    return -1;
  getDict().SetupForLoad(Dict::GlobalDawgCache());
  getDict().Load(lang, mgr);
  getDict().FinishLoad();
  return 0;
}
예제 #5
0
int Tesseract::init_tesseract(
    const char *arg0, const char *textbase, const char *language,
    char **configs, int configs_size, bool configs_global_only) {
  if (!init_tesseract_lang_data(arg0, textbase, language, configs,
                                configs_size, configs_global_only)) {
    return -1;
  }
  start_recog(textbase);
  tessdata_manager.End();
  return 0;                      //Normal exit
}
예제 #6
0
// Init everything except the language model
int Tesseract::init_tesseract_classifier(
    const char *arg0, const char *textbase, const char *language,
    char **configs, int configs_size, bool configs_global_only) {
  if (!init_tesseract_lang_data (arg0, textbase, language, configs,
                                 configs_size, configs_global_only)) {
    return -1;
  }
  // Dont initialize the permuter.
  program_editup(textbase, false);
  tessdata_manager.End();
  return 0;
}
예제 #7
0
// init the LM component
int init_tesseract_lm(const char *arg0,
                   const char *textbase,
                   const char *language,
                   const char *configfile,
                   int configc,
                   const char *const *configv) {
  init_tesseract_lang_data (arg0, textbase, language,
    configfile, configc, configv);

  init_permute();

  return 0;                      //Normal exit
}
예제 #8
0
int init_tesseract(const char *arg0,
                   const char *textbase,
                   const char *language,
                   const char *configfile,
                   int configc,
                   const char *const *configv) {
  init_tesseract_lang_data (arg0, textbase, language,
    configfile, configc, configv);

  start_recog(configfile, textbase);

  set_tess_tweak_vars();

  if (tessedit_use_nn)           //phils nn stuff
    init_net();
  return 0;                      //Normal exit
}
예제 #9
0
// Common initialization for a single language.
// arg0 is the datapath for the tessdata directory, which could be the
// path of the tessdata directory with no trailing /, or (if tessdata
// lives in the same directory as the executable, the path of the executable,
// hence the name arg0.
// textbase is an optional output file basename (used only for training)
// language is the language code to load.
// oem controls which engine(s) will operate on the image
// configs (argv) is an array of config filenames to load variables from.
// May be NULL.
// configs_size (argc) is the number of elements in configs.
// vars_vec is an optional vector of variables to set.
// vars_values is an optional corresponding vector of values for the variables
// in vars_vec.
// If set_only_init_params is true, then only the initialization variables
// will be set.
int Tesseract::init_tesseract_internal(const char *arg0, const char *textbase,
                                       const char *language, OcrEngineMode oem,
                                       char **configs, int configs_size,
                                       const GenericVector<STRING> *vars_vec,
                                       const GenericVector<STRING> *vars_values,
                                       bool set_only_non_debug_params,
                                       TessdataManager *mgr) {
  if (!init_tesseract_lang_data(arg0, textbase, language, oem, configs,
                                configs_size, vars_vec, vars_values,
                                set_only_non_debug_params, mgr)) {
    return -1;
  }
  if (tessedit_init_config_only) {
    return 0;
  }
  // If only LSTM will be used, skip loading Tesseract classifier's
  // pre-trained templates and dictionary.
  bool init_tesseract = tessedit_ocr_engine_mode != OEM_LSTM_ONLY;
  program_editup(textbase, init_tesseract ? mgr : nullptr,
                 init_tesseract ? mgr : nullptr);
  return 0;                      //Normal exit
}