void FormatPlugin::detectLanguage(Book &book, ZLInputStream &stream) { std::string language = book.language(); if (!language.empty()) { return; } PluginCollection &collection = PluginCollection::Instance(); if (language.empty()) { language = collection.DefaultLanguageOption.value(); } if (collection.LanguageAutoDetectOption.value() && stream.open()) { static const int BUFSIZE = 65536; char *buffer = new char[BUFSIZE]; const size_t size = stream.read(buffer, BUFSIZE); stream.close(); shared_ptr<ZLLanguageDetector::LanguageInfo> info = ZLLanguageDetector().findInfo(buffer, size); delete[] buffer; if (!info.isNull()) { if (!info->Language.empty()) { language = info->Language; } } } book.setLanguage(language); }
void EReaderPlugin::readDocumentInternal(const ZLFile &file, BookModel &model, const PlainTextFormat &format, const std::string &encoding, ZLInputStream &stream) const { if (!stream.open()) { //TODO maybe anything else opens stream return; } BookReader bookReader(model); PmlBookReader pmlBookReader(bookReader, format, encoding); bookReader.setMainTextModel(); pmlBookReader.readDocument(stream); EReaderStream &estream = (EReaderStream&)stream; const std::map<std::string, EReaderStream::ImageInfo>& imageIds = estream.images(); for(std::map<std::string, EReaderStream::ImageInfo>::const_iterator it = imageIds.begin(); it != imageIds.end(); ++it) { const std::string id = it->first; bookReader.addImage(id, new ZLFileImage(ZLFile(file.path(), it->second.Type), it->second.Offset, it->second.Size)); } const std::map<std::string, unsigned short>& footnoteIds = estream.footnotes(); for(std::map<std::string, unsigned short>::const_iterator it = footnoteIds.begin(); it != footnoteIds.end(); ++it) { const std::string id = it->first; if (estream.switchStreamDestination(EReaderStream::FOOTNOTE, id)) { bookReader.setFootnoteTextModel(id); bookReader.addHyperlinkLabel(id); pmlBookReader.readDocument(estream); } } stream.close(); }
bool FormatPlugin::detectLanguage(Book &book, ZLInputStream &stream, const std::string &encoding, bool force) { std::string language = book.language(); if (!force && !language.empty()) { return true; } bool detected = false; PluginCollection &collection = PluginCollection::Instance(); if (collection.isLanguageAutoDetectEnabled() && stream.open()) { static const int BUFSIZE = 65536; char *buffer = new char[BUFSIZE]; const std::size_t size = stream.read(buffer, BUFSIZE); stream.close(); shared_ptr<ZLLanguageDetector::LanguageInfo> info = ZLLanguageDetector().findInfoForEncoding(encoding, buffer, size, -20000); delete[] buffer; if (!info.isNull()) { detected = true; if (!info->Language.empty()) { language = info->Language; } } } book.setLanguage(language); return detected; }
ZLZipEntryCache::ZLZipEntryCache(const std::string &containerName, ZLInputStream &containerStream) : myContainerName(containerName) { //ZLLogger::Instance().println("ZipEntryCache", "creating cache for " + containerName); myLastModifiedTime = ZLFile(containerName).lastModified(); if (!containerStream.open()) { return; } ZLZipHeader header; while (header.readFrom(containerStream)) { Info *infoPtr = 0; if (header.Signature == (unsigned long)ZLZipHeader::SignatureLocalFile) { std::string entryName(header.NameLength, '\0'); if ((unsigned int)containerStream.read((char*)entryName.data(), header.NameLength) == header.NameLength) { entryName = AndroidUtil::convertNonUtfString(entryName); Info &info = myInfoMap[entryName]; info.Offset = containerStream.offset() + header.ExtraLength; info.CompressionMethod = header.CompressionMethod; info.CompressedSize = header.CompressedSize; info.UncompressedSize = header.UncompressedSize; infoPtr = &info; } } ZLZipHeader::skipEntry(containerStream, header); if (infoPtr != 0) { infoPtr->UncompressedSize = header.UncompressedSize; } } containerStream.close(); }
ZLZipEntryCache::ZLZipEntryCache(ZLInputStream &baseStream) { if (!baseStream.open()) { return; } ZLZipHeader header; while (header.readFrom(baseStream)) { Info *infoPtr = 0; if (header.Signature == ZLZipHeader::SignatureLocalFile) { std::string entryName(header.NameLength, '\0'); if ((unsigned int)baseStream.read((char*)entryName.data(), header.NameLength) == header.NameLength) { Info &info = myInfoMap[entryName]; info.Offset = baseStream.offset() + header.ExtraLength; info.CompressionMethod = header.CompressionMethod; info.CompressedSize = header.CompressedSize; info.UncompressedSize = header.UncompressedSize; infoPtr = &info; } } ZLZipHeader::skipEntry(baseStream, header); if (infoPtr != 0) { infoPtr->UncompressedSize = header.UncompressedSize; } } baseStream.close(); }
void FormatPlugin::detectEncodingAndLanguage(Book &book, ZLInputStream &stream) { std::string language = book.language(); std::string encoding = book.encoding(); if (!encoding.empty() && !language.empty()) { return; } PluginCollection &collection = PluginCollection::Instance(); if (language.empty()) { language = collection.DefaultLanguageOption.value(); } if (encoding.empty()) { encoding = collection.DefaultEncodingOption.value(); } if (collection.LanguageAutoDetectOption.value() && stream.open()) { static const int BUFSIZE = 65536; char *buffer = new char[BUFSIZE]; const size_t size = stream.read(buffer, BUFSIZE); stream.close(); shared_ptr<ZLLanguageDetector::LanguageInfo> info = ZLLanguageDetector().findInfo(buffer, size); delete[] buffer; if (!info.isNull()) { if (!info->Language.empty()) { language = info->Language; } encoding = info->Encoding; if ((encoding == "US-ASCII") || (encoding == "ISO-8859-1")) { encoding = "windows-1252"; } } } book.setEncoding(encoding); book.setLanguage(language); }
bool ZLZipHeader::readFrom(ZLInputStream &stream) { size_t startOffset = stream.offset(); Signature = readLong(stream); switch (Signature) { default: return false; case SignatureLocalFile: Version = readShort(stream); Flags = readShort(stream); CompressionMethod = readShort(stream); ModificationTime = readShort(stream); ModificationDate = readShort(stream); CRC32 = readLong(stream); CompressedSize = readLong(stream); UncompressedSize = readLong(stream); if (CompressionMethod == 0 && CompressedSize != UncompressedSize) { ZLLogger::Instance().println("zip", "Different compressed & uncompressed size for stored entry; the uncompressed one will be used."); CompressedSize = UncompressedSize; } NameLength = readShort(stream); ExtraLength = readShort(stream); return stream.offset() == startOffset + 30 && NameLength != 0; case SignatureData: CRC32 = readLong(stream); CompressedSize = readLong(stream); UncompressedSize = readLong(stream); NameLength = 0; ExtraLength = 0; return stream.offset() == startOffset + 16; } }
void TxtReader::readDocument(ZLInputStream &stream) { if (!stream.open()) { return; } startDocumentHandler(); myCore->readDocument(stream); endDocumentHandler(); stream.close(); }
void PalmDocPlugin::readDocumentInternal(const ZLFile &file, BookModel &model, const PlainTextFormat &format, const std::string &encoding, ZLInputStream &stream) const { stream.open(); bool readAsPalmDoc = ((PalmDocStream&)stream).hasExtraSections(); stream.close(); if (readAsPalmDoc) { MobipocketHtmlBookReader(file, model, format, encoding).readDocument(stream); } else { SimplePdbPlugin::readDocumentInternal(file, model, format, encoding, stream); } }
bool ZLZipHeader::readFrom(ZLInputStream &stream) { std::size_t startOffset = stream.offset(); Signature = readLong(stream); switch (Signature) { default: return stream.offset() == startOffset + 4; case SignatureCentralDirectory: { Version = readLong(stream); Flags = readShort(stream); CompressionMethod = readShort(stream); ModificationTime = readShort(stream); ModificationDate = readShort(stream); CRC32 = readLong(stream); CompressedSize = readLong(stream); UncompressedSize = readLong(stream); if (CompressionMethod == 0 && CompressedSize != UncompressedSize) { ZLLogger::Instance().println("zip", "Different compressed & uncompressed size for stored entry; the uncompressed one will be used."); CompressedSize = UncompressedSize; } NameLength = readShort(stream); ExtraLength = readShort(stream); const unsigned short toSkip = readShort(stream); stream.seek(12 + NameLength + ExtraLength + toSkip, false); return stream.offset() == startOffset + 42 + NameLength + ExtraLength + toSkip; } case SignatureLocalFile: Version = readShort(stream); Flags = readShort(stream); CompressionMethod = readShort(stream); ModificationTime = readShort(stream); ModificationDate = readShort(stream); CRC32 = readLong(stream); CompressedSize = readLong(stream); UncompressedSize = readLong(stream); if (CompressionMethod == 0 && CompressedSize != UncompressedSize) { ZLLogger::Instance().println("zip", "Different compressed & uncompressed size for stored entry; the uncompressed one will be used."); CompressedSize = UncompressedSize; } NameLength = readShort(stream); ExtraLength = readShort(stream); return stream.offset() == startOffset + 30 && NameLength != 0; case SignatureEndOfCentralDirectory: { stream.seek(16, false); const unsigned short toSkip = readShort(stream); stream.seek(toSkip, false); UncompressedSize = 0; return stream.offset() == startOffset + 18 + toSkip; } case SignatureData: CRC32 = readLong(stream); CompressedSize = readLong(stream); UncompressedSize = readLong(stream); NameLength = 0; ExtraLength = 0; return stream.offset() == startOffset + 16; } }
void ZLZipHeader::skipEntry(ZLInputStream &stream, const ZLZipHeader &header) { if (header.Flags & 0x08) { stream.seek(header.ExtraLength); ZLZDecompressor decompressor((size_t)-1); while (decompressor.decompress(stream, 0, 2048) == 2048) { } stream.seek(16); } else { stream.seek(header.ExtraLength + header.CompressedSize); } }
void TxtReader::readDocument(ZLInputStream &stream) { if (!stream.open()) { return; } startDocumentHandler(); const size_t BUFSIZE = 2048; char *buffer = new char[BUFSIZE]; std::string str; size_t length; do { length = stream.read(buffer, BUFSIZE); char *start = buffer; const char *end = buffer + length; for (char *ptr = start; ptr != end; ++ptr) { if (*ptr == '\n' || *ptr == '\r') { bool skipNewLine = false; if (*ptr == '\r' && (ptr + 1) != end && *(ptr + 1) == '\n') { skipNewLine = true; *ptr = '\n'; } if (start != ptr) { str.erase(); myConverter->convert(str, start, ptr + 1); characterDataHandler(str); } if (skipNewLine) { ++ptr; } start = ptr + 1; newLineHandler(); } else if (isspace((unsigned char)*ptr)) { if (*ptr != '\t') { *ptr = ' '; } } else { } } if (start != end) { str.erase(); myConverter->convert(str, start, end); characterDataHandler(str); } } while (length == BUFSIZE); delete[] buffer; endDocumentHandler(); stream.close(); }
void StyleSheetParser::parse(ZLInputStream &stream) { if (stream.open()) { char *buffer = new char[1024]; while (true) { int len = stream.read(buffer, 1024); if (len == 0) { break; } parse(buffer, len); } delete[] buffer; stream.close(); } }
ZLTarHeaderCache::ZLTarHeaderCache(ZLInputStream &baseStream) { if (!baseStream.open()) { return; } ZLTarHeader header; while (header.read(baseStream)) { if (header.IsRegularFile) { myHeaderMap[header.Name] = header; } baseStream.seek((header.Size + 0x1ff) & -0x200, false); header.erase(); } baseStream.close(); }
bool ZLZipHeader::readFrom(ZLInputStream &stream) { size_t startOffset = stream.offset(); Signature = readLong(stream); Version = readShort(stream); Flags = readShort(stream); CompressionMethod = readShort(stream); ModificationTime = readShort(stream); ModificationDate = readShort(stream); CRC32 = readLong(stream); CompressedSize = readLong(stream); UncompressedSize = readLong(stream); NameLength = readShort(stream); ExtraLength = readShort(stream); return (Signature == 0x04034B50) && (stream.offset() == startOffset + 30) && (NameLength != 0); }
size_t HuffDecompressor::decompress(ZLInputStream &stream, char *targetBuffer, size_t compressedSize, size_t maxUncompressedSize) { if (compressedSize == 0 || myErrorCode == ERROR_CORRUPTED_FILE) { return 0; } if (targetBuffer != 0) { unsigned char *sourceBuffer = new unsigned char[compressedSize]; myTargetBuffer = targetBuffer; myTargetBufferEnd = targetBuffer + maxUncompressedSize; myTargetBufferPtr = targetBuffer; if (stream.read((char*)sourceBuffer, compressedSize) == compressedSize) { const size_t trailSize = sizeOfTrailingEntries(sourceBuffer, compressedSize); if (trailSize < compressedSize) { bitsDecompress(BitReader(sourceBuffer, compressedSize - trailSize)); } else { myErrorCode = ERROR_CORRUPTED_FILE; } } delete[] sourceBuffer; } else { myTargetBuffer = 0; myTargetBufferEnd = 0; myTargetBufferPtr = 0; } return myTargetBufferPtr - myTargetBuffer; }
size_t DocDecompressor::decompress(ZLInputStream &stream, char *targetBuffer, size_t compressedSize, size_t maxUncompressedSize) { const unsigned char *sourceBuffer = new unsigned char[compressedSize]; const unsigned char *sourceBufferEnd = sourceBuffer + compressedSize; const unsigned char *sourcePtr = sourceBuffer; unsigned char *targetBufferEnd = (unsigned char*)targetBuffer + maxUncompressedSize; unsigned char *targetPtr = (unsigned char*)targetBuffer; if (stream.read((char*)sourceBuffer, compressedSize) == compressedSize) { unsigned char token; unsigned short copyLength, N, shift; unsigned char *shifted; while ((sourcePtr < sourceBufferEnd) && (targetPtr < targetBufferEnd)) { token = *(sourcePtr++); switch (TOKEN_CODE[token]) { case 0: *(targetPtr++) = token; break; case 1: if ((sourcePtr + token > sourceBufferEnd) || (targetPtr + token > targetBufferEnd)) { goto endOfLoop; } memcpy(targetPtr, sourcePtr, token); sourcePtr += token; targetPtr += token; break; case 2: if (targetPtr + 2 > targetBufferEnd) { goto endOfLoop; } *(targetPtr++) = ' '; *(targetPtr++) = token ^ 0x80; break; case 3: if (sourcePtr + 1 > sourceBufferEnd) { goto endOfLoop; } N = 256 * token + *(sourcePtr++); copyLength = (N & 7) + 3; if (targetPtr + copyLength > targetBufferEnd) { goto endOfLoop; } shift = (N & 0x3fff) / 8; shifted = targetPtr - shift; if ((char*)shifted >= targetBuffer) { for (short i = 0; i < copyLength; i++) { *(targetPtr++) = *(shifted++); } } break; } } } endOfLoop: delete[] sourceBuffer; return targetPtr - (unsigned char*)targetBuffer; }
static unsigned short readUnsignedWord(ZLInputStream &stream) { unsigned char buffer[2]; stream.read((char*)buffer, 2); unsigned short result = buffer[1]; result = result << 8; result += buffer[0]; return result; }
void PdbUtil::readUnsignedLongLE(ZLInputStream &stream, unsigned long &N) { unsigned char data[4]; stream.read((char*)data, 4); N = (((unsigned long)data[3]) << 24) + (((unsigned long)data[2]) << 16) + (((unsigned long)data[1]) << 8) + (unsigned long)data[0]; }
void PdbUtil::readUnsignedShort(ZLInputStream &stream, unsigned short &N) { unsigned char data[2]; stream.read((char*)data, 2); N = (((unsigned short)data[0]) << 8) + data[1]; /* stream.read((char*)&N + 1, 1); stream.read((char*)&N, 1); */ }
bool PPLBookReader::readDocument(ZLInputStream &stream) { std::cout<<"PPLBookReader::readDocument\n"; if (!stream.open()) { return false; } myModelReader.setMainTextModel(); myModelReader.pushKind(REGULAR); myCurrentParagraph.erase(); myEmptyLineCounter = 0; // "PPL\r\n" stream.seek(5); size_t size; do { size = stream.read(myBuffer, BUFFER_SIZE); myBuffer[size] = '\0'; const char *start = myBuffer; const char *end = myBuffer + size; const char *eol; do { eol = strchr(start, '\n'); if (eol != 0) { if (start < eol) { myConverter->convert(myCurrentParagraph, start, eol); } addParagraph(); start = eol + 1; } else { if (start < end) { myConverter->convert(myCurrentParagraph, start, end); } } } while (eol != 0); } while (size == BUFFER_SIZE); addParagraph(); stream.close(); return true; }
unsigned long ZLZipHeader::readLong(ZLInputStream &stream) { char buffer[4]; stream.read(buffer, 4); return ((((unsigned long)buffer[3]) & 0xFF) << 24) + ((((unsigned long)buffer[2]) & 0xFF) << 16) + ((((unsigned long)buffer[1]) & 0xFF) << 8) + ((unsigned long)buffer[0] & 0xFF); }
static unsigned long long readEncodedInteger(ZLInputStream &stream) { unsigned long long result = 0; char part; do { result = result << 7; stream.read(&part, 1); result += part & 0x7F; } while (part & -0x80); return result; }
void ZLZipHeader::skipEntry(ZLInputStream &stream, ZLZipHeader &header) { switch (header.Signature) { default: break; case SignatureLocalFile: if ((header.Flags & 0x08) == 0x08 && header.CompressionMethod != 0) { stream.seek(header.ExtraLength, false); ZLZDecompressor decompressor((std::size_t)-1); std::size_t size; do { size = decompressor.decompress(stream, 0, 2048); header.UncompressedSize += size; } while (size == 2048); //stream.seek(16, false); } else { stream.seek(header.ExtraLength + header.CompressedSize, false); } break; } }
static std::string readNTString(ZLInputStream &stream) { std::string s; char c; while (stream.read(&c, 1) == 1) { if (c == '\0') { break; } else { s += c; } } return CHMReferenceCollection::fullReference("/", s); }
size_t ZLZDecompressor::decompress(ZLInputStream &stream, char *buffer, size_t maxSize) { while ((myBuffer.length() < maxSize) && (myAvailableSize > 0)) { size_t size = std::min(myAvailableSize, (size_t)IN_BUFFER_SIZE); myZStream->next_in = (Bytef*)myInBuffer; myZStream->avail_in = stream.read(myInBuffer, size); if (myZStream->avail_in == size) { myAvailableSize -= size; } else { myAvailableSize = 0; } while (myZStream->avail_in == 0) { break; } while (myZStream->avail_in > 0) { myZStream->avail_out = OUT_BUFFER_SIZE; myZStream->next_out = (Bytef*)myOutBuffer; int code = ::inflate(myZStream, Z_SYNC_FLUSH); if ((code != Z_OK) && (code != Z_STREAM_END)) { break; } if (OUT_BUFFER_SIZE == myZStream->avail_out) { break; } myBuffer.append(myOutBuffer, OUT_BUFFER_SIZE - myZStream->avail_out); if (code == Z_STREAM_END) { myAvailableSize = 0; stream.seek(0 - myZStream->avail_in, false); break; } } } size_t realSize = std::min(maxSize, myBuffer.length()); if (buffer != 0) { memcpy(buffer, myBuffer.data(), realSize); } myBuffer.erase(0, realSize); return realSize; }
bool PdfBookReader::readReferenceTable(ZLInputStream &stream, int xrefOffset) { while (true) { stream.seek(xrefOffset, true); readLine(stream, myBuffer); stripBuffer(myBuffer); if (myBuffer != "xref") { return false; } while (true) { readLine(stream, myBuffer); stripBuffer(myBuffer); if (myBuffer == "trailer") { break; } const int index = myBuffer.find(' '); const int start = atoi(myBuffer.c_str()); const int len = atoi(myBuffer.c_str() + index + 1); for (int i = 0; i < len; ++i) { readLine(stream, myBuffer); stripBuffer(myBuffer); if (myBuffer.length() != 18) { return false; } const int objectOffset = atoi(myBuffer.c_str()); const int objectGeneration = atoi(myBuffer.c_str() + 11); const bool objectInUse = myBuffer[17] == 'n'; if (objectInUse) { myObjectLocationMap[std::pair<int,int>(start + i, objectGeneration)] = objectOffset; } } } char ch = 0; shared_ptr<PdfObject> trailer = PdfObject::readObject(stream, ch); if (trailer.isNull() || (trailer->type() != PdfObject::DICTIONARY)) { return false; } if (myTrailer.isNull()) { myTrailer = trailer; } PdfDictionaryObject &trailerDictionary = (PdfDictionaryObject&)*trailer; shared_ptr<PdfObject> previous = trailerDictionary["Prev"]; if (previous.isNull()) { return true; } if (previous->type() != PdfObject::INTEGER_NUMBER) { return false; } xrefOffset = ((PdfIntegerObject&)*previous).value(); } }
void PdbUtil::readUnsignedLong(ZLInputStream &stream, unsigned long &N) { unsigned char data[4]; stream.read((char*)data, 4); N = (((unsigned long)data[0]) << 24) + (((unsigned long)data[1]) << 16) + (((unsigned long)data[2]) << 8) + (unsigned long)data[3]; /* stream.read((char*)&N + 3, 1); stream.read((char*)&N + 2, 1); stream.read((char*)&N + 1, 1); stream.read((char*)&N, 1); */ }
void ZLZipHeader::skipEntry(ZLInputStream &stream, ZLZipHeader &header) { switch (header.Signature) { default: break; case SignatureLocalFile: if (header.Flags & 0x08) { stream.seek(header.ExtraLength, false); AppLog("ZLZDecompressor decompressor %d", (size_t)-1); ZLZDecompressor decompressor((size_t)-1); size_t size; do { //AppLog("ZLZipHeader::skipEntry 1"); size = decompressor.decompress(stream, 0, BUFFER_SIZE); //AppLog("decompress size=%d",size); header.UncompressedSize += size; } while (size == BUFFER_SIZE); AppLog("header.UncompressedSize %d",header.UncompressedSize); //stream.seek(16, false); } else { stream.seek(header.ExtraLength + header.CompressedSize, false); } break; } }
bool FormatPlugin::detectEncodingAndLanguage(Book &book, ZLInputStream &stream, bool force) { std::string language = book.language(); std::string encoding = book.encoding(); if (!force && !encoding.empty()) { return true; } bool detected = false; PluginCollection &collection = PluginCollection::Instance(); if (encoding.empty()) { encoding = ZLEncodingConverter::UTF8; } if (collection.isLanguageAutoDetectEnabled() && stream.open()) { static const int BUFSIZE = 65536; char *buffer = new char[BUFSIZE]; const std::size_t size = stream.read(buffer, BUFSIZE); stream.close(); shared_ptr<ZLLanguageDetector::LanguageInfo> info = ZLLanguageDetector().findInfo(buffer, size); delete[] buffer; if (!info.isNull()) { detected = true; if (!info->Language.empty()) { language = info->Language; } encoding = info->Encoding; if (encoding == ZLEncodingConverter::ASCII || encoding == "iso-8859-1") { encoding = "windows-1252"; } } } book.setEncoding(encoding); book.setLanguage(language); return detected; }