// Common initialization for a single language. // arg0 is the datapath for the tessdata directory, which could be the // path of the tessdata directory with no trailing /, or (if tessdata // lives in the same directory as the executable, the path of the executable, // hence the name arg0. // textbase is an optional output file basename (used only for training) // language is the language code to load. // oem controls which engine(s) will operate on the image // configs (argv) is an array of config filenames to load variables from. // May be NULL. // configs_size (argc) is the number of elements in configs. // vars_vec is an optional vector of variables to set. // vars_values is an optional corresponding vector of values for the variables // in vars_vec. // If set_only_init_params is true, then only the initialization variables // will be set. int Tesseract::init_tesseract_internal( const char *arg0, const char *textbase, const char *language, OcrEngineMode oem, char **configs, int configs_size, const GenericVector<STRING> *vars_vec, const GenericVector<STRING> *vars_values, bool set_only_non_debug_params) { if (!init_tesseract_lang_data(arg0, textbase, language, oem, configs, configs_size, vars_vec, vars_values, set_only_non_debug_params)) { return -1; } if (tessedit_init_config_only) { tessdata_manager.End(); return 0; } // If only Cube will be used, skip loading Tesseract classifier's // pre-trained templates. bool init_tesseract_classifier = (tessedit_ocr_engine_mode == OEM_TESSERACT_ONLY || tessedit_ocr_engine_mode == OEM_TESSERACT_CUBE_COMBINED); // If only Cube will be used and if it has its own Unicharset, // skip initializing permuter and loading Tesseract Dawgs. bool init_dict = !(tessedit_ocr_engine_mode == OEM_CUBE_ONLY && tessdata_manager.SeekToStart(TESSDATA_CUBE_UNICHARSET)); program_editup(textbase, init_tesseract_classifier, init_dict); tessdata_manager.End(); return 0; //Normal exit }
// init the LM component int Tesseract::init_tesseract_lm(const char *arg0, const char *textbase, const char *language) { init_tesseract_lang_data(arg0, textbase, language, NULL, 0, false); getDict().init_permute(); tessdata_manager.End(); return 0; }
// init the LM component int Tesseract::init_tesseract_lm(const char *arg0, const char *textbase, const char *language) { if (!init_tesseract_lang_data(arg0, textbase, language, OEM_TESSERACT_ONLY, NULL, 0, NULL, NULL, false)) return -1; getDict().Load(Dict::GlobalDawgCache()); tessdata_manager.End(); return 0; }
// init the LM component int Tesseract::init_tesseract_lm(const char *arg0, const char *textbase, const char *language, TessdataManager *mgr) { if (!init_tesseract_lang_data(arg0, textbase, language, OEM_TESSERACT_ONLY, NULL, 0, NULL, NULL, false, mgr)) return -1; getDict().SetupForLoad(Dict::GlobalDawgCache()); getDict().Load(lang, mgr); getDict().FinishLoad(); return 0; }
int Tesseract::init_tesseract( const char *arg0, const char *textbase, const char *language, char **configs, int configs_size, bool configs_global_only) { if (!init_tesseract_lang_data(arg0, textbase, language, configs, configs_size, configs_global_only)) { return -1; } start_recog(textbase); tessdata_manager.End(); return 0; //Normal exit }
// Init everything except the language model int Tesseract::init_tesseract_classifier( const char *arg0, const char *textbase, const char *language, char **configs, int configs_size, bool configs_global_only) { if (!init_tesseract_lang_data (arg0, textbase, language, configs, configs_size, configs_global_only)) { return -1; } // Dont initialize the permuter. program_editup(textbase, false); tessdata_manager.End(); return 0; }
// init the LM component int init_tesseract_lm(const char *arg0, const char *textbase, const char *language, const char *configfile, int configc, const char *const *configv) { init_tesseract_lang_data (arg0, textbase, language, configfile, configc, configv); init_permute(); return 0; //Normal exit }
int init_tesseract(const char *arg0, const char *textbase, const char *language, const char *configfile, int configc, const char *const *configv) { init_tesseract_lang_data (arg0, textbase, language, configfile, configc, configv); start_recog(configfile, textbase); set_tess_tweak_vars(); if (tessedit_use_nn) //phils nn stuff init_net(); return 0; //Normal exit }
// Common initialization for a single language. // arg0 is the datapath for the tessdata directory, which could be the // path of the tessdata directory with no trailing /, or (if tessdata // lives in the same directory as the executable, the path of the executable, // hence the name arg0. // textbase is an optional output file basename (used only for training) // language is the language code to load. // oem controls which engine(s) will operate on the image // configs (argv) is an array of config filenames to load variables from. // May be NULL. // configs_size (argc) is the number of elements in configs. // vars_vec is an optional vector of variables to set. // vars_values is an optional corresponding vector of values for the variables // in vars_vec. // If set_only_init_params is true, then only the initialization variables // will be set. int Tesseract::init_tesseract_internal(const char *arg0, const char *textbase, const char *language, OcrEngineMode oem, char **configs, int configs_size, const GenericVector<STRING> *vars_vec, const GenericVector<STRING> *vars_values, bool set_only_non_debug_params, TessdataManager *mgr) { if (!init_tesseract_lang_data(arg0, textbase, language, oem, configs, configs_size, vars_vec, vars_values, set_only_non_debug_params, mgr)) { return -1; } if (tessedit_init_config_only) { return 0; } // If only LSTM will be used, skip loading Tesseract classifier's // pre-trained templates and dictionary. bool init_tesseract = tessedit_ocr_engine_mode != OEM_LSTM_ONLY; program_editup(textbase, init_tesseract ? mgr : nullptr, init_tesseract ? mgr : nullptr); return 0; //Normal exit }