예제 #1
0
// Common initialization for a single language.
// arg0 is the datapath for the tessdata directory, which could be the
// path of the tessdata directory with no trailing /, or (if tessdata
// lives in the same directory as the executable, the path of the executable,
// hence the name arg0.
// textbase is an optional output file basename (used only for training)
// language is the language code to load.
// oem controls which engine(s) will operate on the image
// configs (argv) is an array of config filenames to load variables from.
// May be NULL.
// configs_size (argc) is the number of elements in configs.
// vars_vec is an optional vector of variables to set.
// vars_values is an optional corresponding vector of values for the variables
// in vars_vec.
// If set_only_init_params is true, then only the initialization variables
// will be set.
int Tesseract::init_tesseract_internal(
    const char *arg0, const char *textbase, const char *language,
    OcrEngineMode oem, char **configs, int configs_size,
    const GenericVector<STRING> *vars_vec,
    const GenericVector<STRING> *vars_values,
    bool set_only_non_debug_params) {
  if (!init_tesseract_lang_data(arg0, textbase, language, oem, configs,
                                configs_size, vars_vec, vars_values,
                                set_only_non_debug_params)) {
    return -1;
  }
  if (tessedit_init_config_only) {
    tessdata_manager.End();
    return 0;
  }
  // If only Cube will be used, skip loading Tesseract classifier's
  // pre-trained templates.
  bool init_tesseract_classifier =
    (tessedit_ocr_engine_mode == OEM_TESSERACT_ONLY ||
     tessedit_ocr_engine_mode == OEM_TESSERACT_CUBE_COMBINED);
  // If only Cube will be used and if it has its own Unicharset,
  // skip initializing permuter and loading Tesseract Dawgs.
  bool init_dict =
    !(tessedit_ocr_engine_mode == OEM_CUBE_ONLY &&
      tessdata_manager.SeekToStart(TESSDATA_CUBE_UNICHARSET));
  program_editup(textbase, init_tesseract_classifier, init_dict);
  tessdata_manager.End();
  return 0;                      //Normal exit
}
예제 #2
0
// Init everything except the language model
int Tesseract::init_tesseract_classifier(
    const char *arg0, const char *textbase, const char *language,
    char **configs, int configs_size, bool configs_global_only) {
  if (!init_tesseract_lang_data (arg0, textbase, language, configs,
                                 configs_size, configs_global_only)) {
    return -1;
  }
  // Dont initialize the permuter.
  program_editup(textbase, false);
  tessdata_manager.End();
  return 0;
}
예제 #3
0
// Common initialization for a single language.
// arg0 is the datapath for the tessdata directory, which could be the
// path of the tessdata directory with no trailing /, or (if tessdata
// lives in the same directory as the executable, the path of the executable,
// hence the name arg0.
// textbase is an optional output file basename (used only for training)
// language is the language code to load.
// oem controls which engine(s) will operate on the image
// configs (argv) is an array of config filenames to load variables from.
// May be NULL.
// configs_size (argc) is the number of elements in configs.
// vars_vec is an optional vector of variables to set.
// vars_values is an optional corresponding vector of values for the variables
// in vars_vec.
// If set_only_init_params is true, then only the initialization variables
// will be set.
int Tesseract::init_tesseract_internal(const char *arg0, const char *textbase,
                                       const char *language, OcrEngineMode oem,
                                       char **configs, int configs_size,
                                       const GenericVector<STRING> *vars_vec,
                                       const GenericVector<STRING> *vars_values,
                                       bool set_only_non_debug_params,
                                       TessdataManager *mgr) {
  if (!init_tesseract_lang_data(arg0, textbase, language, oem, configs,
                                configs_size, vars_vec, vars_values,
                                set_only_non_debug_params, mgr)) {
    return -1;
  }
  if (tessedit_init_config_only) {
    return 0;
  }
  // If only LSTM will be used, skip loading Tesseract classifier's
  // pre-trained templates and dictionary.
  bool init_tesseract = tessedit_ocr_engine_mode != OEM_LSTM_ONLY;
  program_editup(textbase, init_tesseract ? mgr : nullptr,
                 init_tesseract ? mgr : nullptr);
  return 0;                      //Normal exit
}
예제 #4
0
/**
 * @name start_recog
 *
 * Startup recog program ready to recognize words.
 */
int Wordrec::start_recog(const char *textbase) {

  program_editup(textbase, true);
  return (0);
}