// Reads from the given file. Returns false in case of error. // If swap is true, assumes a big/little-endian swap is needed. bool TrainingSample::DeSerialize(bool swap, FILE* fp) { if (fread(&class_id_, sizeof(class_id_), 1, fp) != 1) return false; if (fread(&font_id_, sizeof(font_id_), 1, fp) != 1) return false; if (fread(&page_num_, sizeof(page_num_), 1, fp) != 1) return false; if (!bounding_box_.DeSerialize(swap, fp)) return false; if (fread(&num_features_, sizeof(num_features_), 1, fp) != 1) return false; if (fread(&num_micro_features_, sizeof(num_micro_features_), 1, fp) != 1) return false; if (fread(&outline_length_, sizeof(outline_length_), 1, fp) != 1) return false; if (swap) { ReverseN(&class_id_, sizeof(class_id_)); ReverseN(&num_features_, sizeof(num_features_)); ReverseN(&num_micro_features_, sizeof(num_micro_features_)); ReverseN(&outline_length_, sizeof(outline_length_)); } // Arbitrarily limit the number of elements to protect against bad data. if (num_features_ > UINT16_MAX) return false; if (num_micro_features_ > UINT16_MAX) return false; delete [] features_; features_ = new INT_FEATURE_STRUCT[num_features_]; if (fread(features_, sizeof(*features_), num_features_, fp) != num_features_) return false; delete [] micro_features_; micro_features_ = new MicroFeature[num_micro_features_]; if (fread(micro_features_, sizeof(*micro_features_), num_micro_features_, fp) != num_micro_features_) return false; if (fread(cn_feature_, sizeof(*cn_feature_), kNumCNParams, fp) != kNumCNParams) return false; if (fread(geo_feature_, sizeof(*geo_feature_), GeoCount, fp) != GeoCount) return false; return true; }
bool read_spacing_info(FILE *f, FontInfo* fi, bool swap) { inT32 vec_size, kern_size; if (fread(&vec_size, sizeof(vec_size), 1, f) != 1) return false; if (swap) Reverse32(&vec_size); ASSERT_HOST(vec_size >= 0); if (vec_size == 0) return true; fi->init_spacing(vec_size); for (int i = 0; i < vec_size; ++i) { FontSpacingInfo *fs = new FontSpacingInfo(); if (fread(&fs->x_gap_before, sizeof(fs->x_gap_before), 1, f) != 1 || fread(&fs->x_gap_after, sizeof(fs->x_gap_after), 1, f) != 1 || fread(&kern_size, sizeof(kern_size), 1, f) != 1) { delete fs; return false; } if (swap) { ReverseN(&(fs->x_gap_before), sizeof(fs->x_gap_before)); ReverseN(&(fs->x_gap_after), sizeof(fs->x_gap_after)); Reverse32(&kern_size); } if (kern_size < 0) { // indication of a NULL entry in fi->spacing_vec delete fs; continue; } if (kern_size > 0 && (!fs->kerned_unichar_ids.DeSerialize(swap, f) || !fs->kerned_x_gaps.DeSerialize(swap, f))) { delete fs; return false; } fi->add_spacing(i, fs); } return true; }
bool TessdataManager::Init(const char *data_file_name, int debug_level) { int i; debug_level_ = debug_level; data_file_name_ = data_file_name; data_file_ = fopen(data_file_name, "rb"); if (data_file_ == NULL) { tprintf("Error opening data file %s\n", data_file_name); tprintf("Please make sure the TESSDATA_PREFIX environment variable is set " "to the parent directory of your \"tessdata\" directory.\n"); return false; } fread(&actual_tessdata_num_entries_, sizeof(inT32), 1, data_file_); swap_ = (actual_tessdata_num_entries_ > kMaxNumTessdataEntries); if (swap_) { ReverseN(&actual_tessdata_num_entries_, sizeof(actual_tessdata_num_entries_)); } ASSERT_HOST(actual_tessdata_num_entries_ <= TESSDATA_NUM_ENTRIES); fread(offset_table_, sizeof(inT64), actual_tessdata_num_entries_, data_file_); if (swap_) { for (i = 0 ; i < actual_tessdata_num_entries_; ++i) { ReverseN(&offset_table_[i], sizeof(offset_table_[i])); } } if (debug_level_) { tprintf("TessdataManager loaded %d types of tesseract data files.\n", actual_tessdata_num_entries_); for (i = 0; i < actual_tessdata_num_entries_; ++i) { tprintf("Offset for type %d is %lld\n", i, offset_table_[i]); } } return true; }
// Reads from the given file. Returns false in case of error. // If swap is true, assumes a big/little-endian swap is needed. bool TrainingSample::DeSerialize(bool swap, FILE* fp) { if (fread(&class_id_, sizeof(class_id_), 1, fp) != 1) return false; if (fread(&font_id_, sizeof(font_id_), 1, fp) != 1) return false; if (fread(&page_num_, sizeof(page_num_), 1, fp) != 1) return false; if (!bounding_box_.DeSerialize(swap, fp)) return false; if (fread(&num_features_, sizeof(num_features_), 1, fp) != 1) return false; if (fread(&num_micro_features_, sizeof(num_micro_features_), 1, fp) != 1) return false; if (swap) { ReverseN(&class_id_, sizeof(class_id_)); ReverseN(&num_features_, sizeof(num_features_)); ReverseN(&num_micro_features_, sizeof(num_micro_features_)); } delete [] features_; features_ = new INT_FEATURE_STRUCT[num_features_]; if (fread(features_, sizeof(*features_), num_features_, fp) != num_features_) return false; delete [] micro_features_; micro_features_ = new MicroFeature[num_micro_features_]; if (fread(micro_features_, sizeof(*micro_features_), num_micro_features_, fp) != num_micro_features_) return false; if (fread(cn_feature_, sizeof(*cn_feature_), kNumCNParams, fp) != kNumCNParams) return false; if (fread(geo_feature_, sizeof(*geo_feature_), GeoCount, fp) != GeoCount) return false; return true; }
// Reads from the given file. Returns false in case of error. // If swap is true, assumes a big/little-endian swap is needed. // Should be overridden by subclasses, but NOT called by their DeSerialize. bool Network::DeSerialize(bool swap, TFile* fp) { inT8 data = 0; if (fp->FRead(&data, sizeof(data), 1) != 1) return false; if (data == NT_NONE) { STRING type_name; if (!type_name.DeSerialize(swap, fp)) return false; for (data = 0; data < NT_COUNT && type_name != kTypeNames[data]; ++data) { } if (data == NT_COUNT) { tprintf("Invalid network layer type:%s\n", type_name.string()); return false; } } type_ = static_cast<NetworkType>(data); if (fp->FRead(&data, sizeof(data), 1) != 1) return false; training_ = data == TS_ENABLED ? TS_ENABLED : TS_DISABLED; if (fp->FRead(&data, sizeof(data), 1) != 1) return false; needs_to_backprop_ = data != 0; if (fp->FRead(&network_flags_, sizeof(network_flags_), 1) != 1) return false; if (fp->FRead(&ni_, sizeof(ni_), 1) != 1) return false; if (fp->FRead(&no_, sizeof(no_), 1) != 1) return false; if (fp->FRead(&num_weights_, sizeof(num_weights_), 1) != 1) return false; if (!name_.DeSerialize(swap, fp)) return false; if (swap) { ReverseN(&network_flags_, sizeof(network_flags_)); ReverseN(&ni_, sizeof(ni_)); ReverseN(&no_, sizeof(no_)); ReverseN(&num_weights_, sizeof(num_weights_)); } return true; }
// Reads from the given file. Returns false in case of error. // If swap is true, assumes a big/little-endian swap is needed. bool ICOORD::DeSerialize(bool swap, FILE* fp) { if (fread(&xcoord, sizeof(xcoord), 1, fp) != 1) return false; if (fread(&ycoord, sizeof(ycoord), 1, fp) != 1) return false; if (swap) { ReverseN(&xcoord, sizeof(xcoord)); ReverseN(&ycoord, sizeof(ycoord)); } return true; }
// Reads from the given file. Returns false in case of error. // If swap is true, assumes a big/little-endian swap is needed. bool TrainingSampleSet::FontClassInfo::DeSerialize(bool swap, FILE *fp) { if (fread(&num_raw_samples, sizeof(num_raw_samples), 1, fp) != 1) return false; if (fread(&canonical_sample, sizeof(canonical_sample), 1, fp) != 1) return false; if (fread(&canonical_dist, sizeof(canonical_dist), 1, fp) != 1) return false; if (!samples.DeSerialize(swap, fp)) return false; if (swap) { ReverseN(&num_raw_samples, sizeof(num_raw_samples)); ReverseN(&canonical_sample, sizeof(canonical_sample)); ReverseN(&canonical_dist, sizeof(canonical_dist)); } return true; }
// Reads from the given file. Returns false in case of error. // If swap is true, assumes a big/little-endian swap is needed. bool BitVector::DeSerialize(bool swap, FILE* fp) { uinT32 new_bit_size; if (fread(&new_bit_size, sizeof(new_bit_size), 1, fp) != 1) return false; if (swap) { ReverseN(&new_bit_size, sizeof(new_bit_size)); } Alloc(new_bit_size); int wordlen = WordLength(); if (fread(array_, sizeof(*array_), wordlen, fp) != wordlen) return false; if (swap) { for (int i = 0; i < wordlen; ++i) ReverseN(&array_[i], sizeof(array_[i])); } return true; }
// Reads from the given file. Returns false in case of error. // If swap is true, assumes a big/little-endian swap is needed. bool WordFeature::DeSerialize(bool swap, FILE* fp) { if (fread(&x_, sizeof(x_), 1, fp) != 1) return false; if (swap) ReverseN(&x_, sizeof(x_)); if (fread(&y_, sizeof(y_), 1, fp) != 1) return false; if (fread(&dir_, sizeof(dir_), 1, fp) != 1) return false; return true; }
// Reads from the given file. Returns false in case of error. // If swap is true, assumes a big/little-endian swap is needed. bool UnicharAndFonts::DeSerialize(bool swap, FILE* fp) { if (fread(&unichar_id, sizeof(unichar_id), 1, fp) != 1) return false; if (swap) ReverseN(&unichar_id, sizeof(unichar_id)); if (!font_ids.DeSerialize(swap, fp)) return false; return true; }
// Loads from the given memory buffer as if a file. bool TessdataManager::LoadMemBuffer(const char *name, const char *data, int size) { Clear(); data_file_name_ = name; TFile fp; fp.Open(data, size); uint32_t num_entries; if (!fp.DeSerialize(&num_entries)) return false; swap_ = num_entries > kMaxNumTessdataEntries; fp.set_swap(swap_); if (swap_) ReverseN(&num_entries, sizeof(num_entries)); if (num_entries > kMaxNumTessdataEntries) return false; GenericVector<int64_t> offset_table; offset_table.resize_no_init(num_entries); if (!fp.DeSerialize(&offset_table[0], num_entries)) return false; for (int i = 0; i < num_entries && i < TESSDATA_NUM_ENTRIES; ++i) { if (offset_table[i] >= 0) { int64_t entry_size = size - offset_table[i]; int j = i + 1; while (j < num_entries && offset_table[j] == -1) ++j; if (j < num_entries) entry_size = offset_table[j] - offset_table[i]; entries_[i].resize_no_init(entry_size); if (!fp.DeSerialize(&entries_[i][0], entry_size)) return false; } } if (entries_[TESSDATA_VERSION].empty()) { SetVersionString("Pre-4.0.0"); } is_loaded_ = true; return true; }
// Reads from the given file. Returns false in case of error. // If swap is true, assumes a big/little-endian swap is needed. bool STRING::DeSerialize(bool swap, FILE* fp) { inT32 len; if (fread(&len, sizeof(len), 1, fp) != 1) return false; if (swap) ReverseN(&len, sizeof(len)); truncate_at(len); if (fread(GetCStr(), 1, len, fp) != len) return false; return true; }
int TFile::FReadEndian(void* buffer, size_t size, int count) { int num_read = FRead(buffer, size, count); if (swap_) { char* char_buffer = static_cast<char*>(buffer); for (int i = 0; i < num_read; ++i, char_buffer += size) { ReverseN(char_buffer, size); } } return num_read; }
// Reads from the given file. Returns false in case of error. // If swap is true, assumes a big/little-endian swap is needed. bool ImageData::DeSerialize(bool swap, TFile* fp) { if (!imagefilename_.DeSerialize(swap, fp)) return false; if (fp->FRead(&page_number_, sizeof(page_number_), 1) != 1) return false; if (swap) ReverseN(&page_number_, sizeof(page_number_)); if (!image_data_.DeSerialize(swap, fp)) return false; if (!transcription_.DeSerialize(swap, fp)) return false; // WARNING: Will not work across different endian machines. if (!boxes_.DeSerialize(swap, fp)) return false; if (!box_texts_.DeSerializeClasses(swap, fp)) return false; inT8 vertical = 0; if (fp->FRead(&vertical, sizeof(vertical), 1) != 1) return false; vertical_text_ = vertical != 0; return true; }
// Reads from the given file. Returns false in case of error. // If swap is true, assumes a big/little-endian swap is needed. bool ImageData::DeSerialize(bool swap, FILE* fp) { if (!imagefilename_.DeSerialize(swap, fp)) return false; if (fread(&page_number_, sizeof(page_number_), 1, fp) != 1) return false; if (swap) ReverseN(&page_number_, sizeof(page_number_)); if (!image_data_.DeSerialize(swap, fp)) return false; if (!transcription_.DeSerialize(swap, fp)) return false; // WARNING: Will not work across different endian machines. if (!boxes_.DeSerialize(swap, fp)) return false; if (!box_texts_.DeSerializeClasses(swap, fp)) return false; if (!features_.DeSerializeClasses(swap, fp)) return false; if (!side_data_.DeSerialize(swap, fp)) return false; STRING box_str; for (int i = 0; i < box_texts_.size(); ++i) { box_str += box_texts_[i]; } partial_boxes_ = !box_texts_.empty() && transcription_ != box_str; return true; }