bool FormatPlugin::detectLanguage(Book &book, ZLInputStream &stream, const std::string &encoding, bool force) { std::string language = book.language(); if (!force && !language.empty()) { return true; } bool detected = false; PluginCollection &collection = PluginCollection::Instance(); if (collection.isLanguageAutoDetectEnabled() && stream.open()) { static const int BUFSIZE = 65536; char *buffer = new char[BUFSIZE]; const std::size_t size = stream.read(buffer, BUFSIZE); stream.close(); shared_ptr<ZLLanguageDetector::LanguageInfo> info = ZLLanguageDetector().findInfoForEncoding(encoding, buffer, size, -20000); delete[] buffer; if (!info.isNull()) { detected = true; if (!info->Language.empty()) { language = info->Language; } } } book.setLanguage(language); return detected; }
void FormatPlugin::detectLanguage(Book &book, ZLInputStream &stream) { std::string language = book.language(); if (!language.empty()) { return; } PluginCollection &collection = PluginCollection::Instance(); if (language.empty()) { language = collection.DefaultLanguageOption.value(); } if (collection.LanguageAutoDetectOption.value() && stream.open()) { static const int BUFSIZE = 65536; char *buffer = new char[BUFSIZE]; const size_t size = stream.read(buffer, BUFSIZE); stream.close(); shared_ptr<ZLLanguageDetector::LanguageInfo> info = ZLLanguageDetector().findInfo(buffer, size); delete[] buffer; if (!info.isNull()) { if (!info->Language.empty()) { language = info->Language; } } } book.setLanguage(language); }
void FormatPlugin::detectEncodingAndLanguage(Book &book, ZLInputStream &stream) { std::string language = book.language(); std::string encoding = book.encoding(); if (!encoding.empty() && !language.empty()) { return; } PluginCollection &collection = PluginCollection::Instance(); if (language.empty()) { language = collection.DefaultLanguageOption.value(); } if (encoding.empty()) { encoding = collection.DefaultEncodingOption.value(); } if (collection.LanguageAutoDetectOption.value() && stream.open()) { static const int BUFSIZE = 65536; char *buffer = new char[BUFSIZE]; const size_t size = stream.read(buffer, BUFSIZE); stream.close(); shared_ptr<ZLLanguageDetector::LanguageInfo> info = ZLLanguageDetector().findInfo(buffer, size); delete[] buffer; if (!info.isNull()) { if (!info->Language.empty()) { language = info->Language; } encoding = info->Encoding; if ((encoding == "US-ASCII") || (encoding == "ISO-8859-1")) { encoding = "windows-1252"; } } } book.setEncoding(encoding); book.setLanguage(language); }
bool FormatPlugin::detectEncodingAndLanguage(Book &book, ZLInputStream &stream, bool force) { std::string language = book.language(); std::string encoding = book.encoding(); if (!force && !encoding.empty()) { return true; } bool detected = false; PluginCollection &collection = PluginCollection::Instance(); if (encoding.empty()) { encoding = ZLEncodingConverter::UTF8; } if (collection.isLanguageAutoDetectEnabled() && stream.open()) { static const int BUFSIZE = 65536; char *buffer = new char[BUFSIZE]; const std::size_t size = stream.read(buffer, BUFSIZE); stream.close(); shared_ptr<ZLLanguageDetector::LanguageInfo> info = ZLLanguageDetector().findInfo(buffer, size); delete[] buffer; if (!info.isNull()) { detected = true; if (!info->Language.empty()) { language = info->Language; } encoding = info->Encoding; if (encoding == ZLEncodingConverter::ASCII || encoding == "iso-8859-1") { encoding = "windows-1252"; } } } book.setEncoding(encoding); book.setLanguage(language); return detected; }