// Initialize for potentially a set of languages defined by the language // string and recursively any additional languages required by any language // traineddata file (via tessedit_load_sublangs in its config) that is loaded. // See init_tesseract_internal for args. int Tesseract::init_tesseract( const char *arg0, const char *textbase, const char *language, OcrEngineMode oem, char **configs, int configs_size, const GenericVector<STRING> *vars_vec, const GenericVector<STRING> *vars_values, bool set_only_non_debug_params) { GenericVector<STRING> langs_to_load; GenericVector<STRING> langs_not_to_load; ParseLanguageString(language, &langs_to_load, &langs_not_to_load); sub_langs_.delete_data_pointers(); sub_langs_.clear(); // Find the first loadable lang and load into this. // Add any languages that this language requires bool loaded_primary = false; // Load the rest into sub_langs_. for (int lang_index = 0; lang_index < langs_to_load.size(); ++lang_index) { if (!IsStrInList(langs_to_load[lang_index], langs_not_to_load)) { const char *lang_str = langs_to_load[lang_index].string(); Tesseract *tess_to_init; if (!loaded_primary) { tess_to_init = this; } else { tess_to_init = new Tesseract; } int result = tess_to_init->init_tesseract_internal( arg0, textbase, lang_str, oem, configs, configs_size, vars_vec, vars_values, set_only_non_debug_params); if (!loaded_primary) { if (result < 0) { tprintf("Failed loading language '%s'\n", lang_str); } else { if (tessdata_manager_debug_level) tprintf("Loaded language '%s' as main language\n", lang_str); ParseLanguageString(tess_to_init->tessedit_load_sublangs.string(), &langs_to_load, &langs_not_to_load); loaded_primary = true; } } else { if (result < 0) { tprintf("Failed loading language '%s'\n", lang_str); delete tess_to_init; } else { if (tessdata_manager_debug_level) tprintf("Loaded language '%s' as secondary language\n", lang_str); sub_langs_.push_back(tess_to_init); // Add any languages that this language requires ParseLanguageString(tess_to_init->tessedit_load_sublangs.string(), &langs_to_load, &langs_not_to_load); } } } } if (!loaded_primary) { tprintf("Tesseract couldn't load any languages!\n"); return -1; // Couldn't load any language! } if (!sub_langs_.empty()) { // In multilingual mode word ratings have to be directly comparable, // so use the same language model weights for all languages: // use the primary language's params model if // tessedit_use_primary_params_model is set, // otherwise use default language model weights. if (tessedit_use_primary_params_model) { for (int s = 0; s < sub_langs_.size(); ++s) { sub_langs_[s]->language_model_->getParamsModel().Copy( this->language_model_->getParamsModel()); } tprintf("Using params model of the primary language\n"); if (tessdata_manager_debug_level) { this->language_model_->getParamsModel().Print(); } } else { this->language_model_->getParamsModel().Clear(); for (int s = 0; s < sub_langs_.size(); ++s) { sub_langs_[s]->language_model_->getParamsModel().Clear(); } tprintf("Using default language params\n"); } } SetupUniversalFontIds(); return 0; }
// Initialize for potentially a set of languages defined by the language // string and recursively any additional languages required by any language // traineddata file (via tessedit_load_sublangs in its config) that is loaded. // See init_tesseract_internal for args. int Tesseract::init_tesseract( const char *arg0, const char *textbase, const char *language, OcrEngineMode oem, char **configs, int configs_size, const GenericVector<STRING> *vars_vec, const GenericVector<STRING> *vars_values, bool set_only_non_debug_params) { GenericVector<STRING> langs_to_load; GenericVector<STRING> langs_not_to_load; ParseLanguageString(language, &langs_to_load, &langs_not_to_load); sub_langs_.delete_data_pointers(); sub_langs_.clear(); // Find the first loadable lang and load into this. // Add any languages that this language requires bool loaded_primary = false; // Load the rest into sub_langs_. for (int lang_index = 0; lang_index < langs_to_load.size(); ++lang_index) { if (!IsStrInList(langs_to_load[lang_index], langs_not_to_load)) { const char *lang_str = langs_to_load[lang_index].string(); Tesseract *tess_to_init; if (!loaded_primary) { tess_to_init = this; } else { tess_to_init = new Tesseract; } int result = tess_to_init->init_tesseract_internal( arg0, textbase, lang_str, oem, configs, configs_size, vars_vec, vars_values, set_only_non_debug_params); if (!loaded_primary) { if (result < 0) { tprintf("Failed loading language '%s'\n", lang_str); } else { if (tessdata_manager_debug_level) tprintf("Loaded language '%s' as main language\n", lang_str); ParseLanguageString(tess_to_init->tessedit_load_sublangs.string(), &langs_to_load, &langs_not_to_load); loaded_primary = true; } } else { if (result < 0) { tprintf("Failed loading language '%s'\n", lang_str); delete tess_to_init; } else { if (tessdata_manager_debug_level) tprintf("Loaded language '%s' as secondary language\n", lang_str); sub_langs_.push_back(tess_to_init); // Add any languages that this language requires ParseLanguageString(tess_to_init->tessedit_load_sublangs.string(), &langs_to_load, &langs_not_to_load); } } } } if (!loaded_primary) { tprintf("Tesseract couldn't load any languages!\n"); return -1; // Couldn't load any language! } SetupUniversalFontIds(); return 0; }