/*bool ReadSetting(LPTSTR szSettingName,int* Value,int DefaultValue,LPTSTR szString,LPTSTR szDefString) { TCHAR szFileName[256],szPath[256]; GetModuleFileName(0,szFileName,1023); ExtractFilePath(szFileName,szPath); wsprintf(szFileName,_T("%simgupload.ini"),szPath); TCHAR szBuffer1[128],szBuffer2[128]; lstrcpy(szBuffer2,GetFileExt(szSettingName)); GetOnlyFileName(szSettingName,szBuffer1); if(!szString) *Value = GetPrivateProfileInt(szBuffer1,szBuffer2, DefaultValue, szFileName); else GetPrivateProfileString(szBuffer1,szBuffer2, szDefString,szString,256, szFileName); return true; } bool WriteSetting(LPCTSTR szSettingName,int Value,LPCTSTR szString) { TCHAR szFileName[256],szPath[256]; GetModuleFileName(0,szFileName,1023); ExtractFilePath(szFileName,szPath); wsprintf(szFileName,_T("%simgupload.ini"),szPath); TCHAR szBuffer1[128],szBuffer2[128]; TCHAR szBuffer3[256]; lstrcpy(szBuffer2,GetFileExt(szSettingName)); GetOnlyFileName(szSettingName,szBuffer1); if(!szString) { wsprintf(szBuffer3,_T("%d"),Value); WritePrivateProfileString(szBuffer1,szBuffer2, szBuffer3, szFileName); } else WritePrivateProfileString(szBuffer1,szBuffer2, szString, szFileName); return true; } */ int GetSavingFormat(LPCTSTR szFileName) { if(!szFileName) return -1; LPCTSTR FileType = GetFileExt(szFileName); if(IsStrInList(FileType,_T("jpg\0jpeg\0\0"))) return 0; else if(IsStrInList(FileType,_T("png\0\0"))) return 1; else if(IsStrInList(FileType,_T("gif\0\0"))) return 2; else return 0; }
// Parse a string of the form [~]<lang>[+[~]<lang>]*. // Langs with no prefix get appended to to_load, provided they // are not in there already. // Langs with ~ prefix get appended to not_to_load, provided they are not in // there already. void Tesseract::ParseLanguageString(const char* lang_str, GenericVector<STRING>* to_load, GenericVector<STRING>* not_to_load) { STRING remains(lang_str); while (remains.length() > 0) { // Find the start of the lang code and which vector to add to. const char* start = remains.string(); while (*start == '+') ++start; GenericVector<STRING>* target = to_load; if (*start == '~') { target = not_to_load; ++start; } // Find the index of the end of the lang code in string start. int end = strlen(start); const char* plus = strchr(start, '+'); if (plus != NULL && plus - start < end) end = plus - start; STRING lang_code(start); lang_code.truncate_at(end); STRING next(start + end); remains = next; // Check whether lang_code is already in the target vector and add. if (!IsStrInList(lang_code, *target)) { if (tessdata_manager_debug_level) tprintf("Adding language '%s' to list\n", lang_code.string()); target->push_back(lang_code); } } }
bool IsImage(LPCTSTR szFileName) { LPCTSTR szExt = GetFileExt(szFileName); if(lstrlen(szExt)<1) return false; return IsStrInList(szExt,_T("jpg\0jpeg\0png\0bmp\0gif\0tif\0tiff\0\0")); }
// Initialize for potentially a set of languages defined by the language // string and recursively any additional languages required by any language // traineddata file (via tessedit_load_sublangs in its config) that is loaded. // See init_tesseract_internal for args. int Tesseract::init_tesseract( const char *arg0, const char *textbase, const char *language, OcrEngineMode oem, char **configs, int configs_size, const GenericVector<STRING> *vars_vec, const GenericVector<STRING> *vars_values, bool set_only_non_debug_params) { GenericVector<STRING> langs_to_load; GenericVector<STRING> langs_not_to_load; ParseLanguageString(language, &langs_to_load, &langs_not_to_load); sub_langs_.delete_data_pointers(); sub_langs_.clear(); // Find the first loadable lang and load into this. // Add any languages that this language requires bool loaded_primary = false; // Load the rest into sub_langs_. for (int lang_index = 0; lang_index < langs_to_load.size(); ++lang_index) { if (!IsStrInList(langs_to_load[lang_index], langs_not_to_load)) { const char *lang_str = langs_to_load[lang_index].string(); Tesseract *tess_to_init; if (!loaded_primary) { tess_to_init = this; } else { tess_to_init = new Tesseract; } int result = tess_to_init->init_tesseract_internal( arg0, textbase, lang_str, oem, configs, configs_size, vars_vec, vars_values, set_only_non_debug_params); if (!loaded_primary) { if (result < 0) { tprintf("Failed loading language '%s'\n", lang_str); } else { if (tessdata_manager_debug_level) tprintf("Loaded language '%s' as main language\n", lang_str); ParseLanguageString(tess_to_init->tessedit_load_sublangs.string(), &langs_to_load, &langs_not_to_load); loaded_primary = true; } } else { if (result < 0) { tprintf("Failed loading language '%s'\n", lang_str); delete tess_to_init; } else { if (tessdata_manager_debug_level) tprintf("Loaded language '%s' as secondary language\n", lang_str); sub_langs_.push_back(tess_to_init); // Add any languages that this language requires ParseLanguageString(tess_to_init->tessedit_load_sublangs.string(), &langs_to_load, &langs_not_to_load); } } } } if (!loaded_primary) { tprintf("Tesseract couldn't load any languages!\n"); return -1; // Couldn't load any language! } if (!sub_langs_.empty()) { // In multilingual mode word ratings have to be directly comparable, // so use the same language model weights for all languages: // use the primary language's params model if // tessedit_use_primary_params_model is set, // otherwise use default language model weights. if (tessedit_use_primary_params_model) { for (int s = 0; s < sub_langs_.size(); ++s) { sub_langs_[s]->language_model_->getParamsModel().Copy( this->language_model_->getParamsModel()); } tprintf("Using params model of the primary language\n"); if (tessdata_manager_debug_level) { this->language_model_->getParamsModel().Print(); } } else { this->language_model_->getParamsModel().Clear(); for (int s = 0; s < sub_langs_.size(); ++s) { sub_langs_[s]->language_model_->getParamsModel().Clear(); } tprintf("Using default language params\n"); } } SetupUniversalFontIds(); return 0; }
// Initialize for potentially a set of languages defined by the language // string and recursively any additional languages required by any language // traineddata file (via tessedit_load_sublangs in its config) that is loaded. // See init_tesseract_internal for args. int Tesseract::init_tesseract( const char *arg0, const char *textbase, const char *language, OcrEngineMode oem, char **configs, int configs_size, const GenericVector<STRING> *vars_vec, const GenericVector<STRING> *vars_values, bool set_only_non_debug_params) { GenericVector<STRING> langs_to_load; GenericVector<STRING> langs_not_to_load; ParseLanguageString(language, &langs_to_load, &langs_not_to_load); sub_langs_.delete_data_pointers(); sub_langs_.clear(); // Find the first loadable lang and load into this. // Add any languages that this language requires bool loaded_primary = false; // Load the rest into sub_langs_. for (int lang_index = 0; lang_index < langs_to_load.size(); ++lang_index) { if (!IsStrInList(langs_to_load[lang_index], langs_not_to_load)) { const char *lang_str = langs_to_load[lang_index].string(); Tesseract *tess_to_init; if (!loaded_primary) { tess_to_init = this; } else { tess_to_init = new Tesseract; } int result = tess_to_init->init_tesseract_internal( arg0, textbase, lang_str, oem, configs, configs_size, vars_vec, vars_values, set_only_non_debug_params); if (!loaded_primary) { if (result < 0) { tprintf("Failed loading language '%s'\n", lang_str); } else { if (tessdata_manager_debug_level) tprintf("Loaded language '%s' as main language\n", lang_str); ParseLanguageString(tess_to_init->tessedit_load_sublangs.string(), &langs_to_load, &langs_not_to_load); loaded_primary = true; } } else { if (result < 0) { tprintf("Failed loading language '%s'\n", lang_str); delete tess_to_init; } else { if (tessdata_manager_debug_level) tprintf("Loaded language '%s' as secondary language\n", lang_str); sub_langs_.push_back(tess_to_init); // Add any languages that this language requires ParseLanguageString(tess_to_init->tessedit_load_sublangs.string(), &langs_to_load, &langs_not_to_load); } } } } if (!loaded_primary) { tprintf("Tesseract couldn't load any languages!\n"); return -1; // Couldn't load any language! } SetupUniversalFontIds(); return 0; }
bool IsBothStrInList(const CString& s1, const CString& s2, std::vector<CString> list) { return IsStrInList(s1, list) && IsStrInList(s2, list); }