bool TessdataManager::TessdataTypeFromFileName( const char *filename, TessdataType *type, bool *text_file) { // Get the file suffix (extension) const char *suffix = strrchr(filename, '.'); if (suffix == NULL || *(++suffix) == '\0') return false; return TessdataTypeFromFileSuffix(suffix, type, text_file); }
bool TessdataManager::CombineDataFiles( const char *language_data_path_prefix, const char *output_filename) { // Load individual tessdata components from files. for (int i = 0; i < TESSDATA_NUM_ENTRIES; ++i) { TessdataType type; ASSERT_HOST(TessdataTypeFromFileSuffix(kTessdataFileSuffixes[i], &type)); STRING filename = language_data_path_prefix; filename += kTessdataFileSuffixes[i]; FILE *fp = fopen(filename.string(), "rb"); if (fp != nullptr) { fclose(fp); if (!LoadDataFromFile(filename, &entries_[type])) { tprintf("Load of file %s failed!\n", filename.string()); return false; } } } is_loaded_ = true; // Make sure that the required components are present. if (!IsBaseAvailable() && !IsLSTMAvailable()) { tprintf( "Error: traineddata file must contain at least (a unicharset file" "and inttemp) OR an lstm file.\n"); return false; } // Write updated data to the output traineddata file. return SaveFile(output_filename, nullptr); }
bool TessdataManager::CombineDataFiles( const char *language_data_path_prefix, const char *output_filename) { int i; inT64 offset_table[TESSDATA_NUM_ENTRIES]; for (i = 0; i < TESSDATA_NUM_ENTRIES; ++i) offset_table[i] = -1; FILE *output_file = fopen(output_filename, "wb"); if (output_file == NULL) { tprintf("Error opening %s for writing\n", output_filename); return false; } // Leave some space for recording the offset_table. if (fseek(output_file, sizeof(inT32) + sizeof(inT64) * TESSDATA_NUM_ENTRIES, SEEK_SET)) { tprintf("Error seeking %s\n", output_filename); fclose(output_file); return false; } TessdataType type = TESSDATA_NUM_ENTRIES; bool text_file = false; FILE *file_ptr[TESSDATA_NUM_ENTRIES]; // Load individual tessdata components from files. for (i = 0; i < TESSDATA_NUM_ENTRIES; ++i) { ASSERT_HOST(TessdataTypeFromFileSuffix( kTessdataFileSuffixes[i], &type, &text_file)); STRING filename = language_data_path_prefix; filename += kTessdataFileSuffixes[i]; file_ptr[i] = fopen(filename.string(), "rb"); if (file_ptr[i] != NULL) { offset_table[type] = ftell(output_file); CopyFile(file_ptr[i], output_file, text_file, -1); fclose(file_ptr[i]); } } // Make sure that the required components are present. if (file_ptr[TESSDATA_UNICHARSET] == NULL) { tprintf("Error opening %sunicharset file\n", language_data_path_prefix); fclose(output_file); return false; } if (file_ptr[TESSDATA_INTTEMP] != NULL && (file_ptr[TESSDATA_PFFMTABLE] == NULL || file_ptr[TESSDATA_NORMPROTO] == NULL)) { tprintf("Error opening %spffmtable and/or %snormproto files" " while %sinttemp file was present\n", language_data_path_prefix, language_data_path_prefix, language_data_path_prefix); fclose(output_file); return false; } return WriteMetadata(offset_table, language_data_path_prefix, output_file); }