void DjVuSource::ReadAnnotations(GP<ByteStream> pInclStream, set<GUTF8String>& processed, GP<ByteStream> pAnnoStream) { // Look for shared annotations GUTF8String strInclude; char buf[1024]; int nLength; while ((nLength = pInclStream->read(buf, 1024))) strInclude += GUTF8String(buf, nLength); // Eat '\n' in the beginning and at the end while (strInclude.length() > 0 && strInclude[0] == '\n') strInclude = strInclude.substr(1, static_cast<unsigned int>(-1)); while (strInclude.length() > 0 && strInclude[static_cast<int>(strInclude.length()) - 1] == '\n') strInclude.setat(strInclude.length() - 1, 0); if (strInclude.length() > 0 && processed.find(strInclude) == processed.end()) { processed.insert(strInclude); GURL urlInclude = m_pDjVuDoc->id_to_url(strInclude); GP<DataPool> pool = m_pDjVuDoc->request_data(NULL, urlInclude); GP<ByteStream> stream = pool->get_stream(); GP<IFFByteStream> iff(IFFByteStream::create(stream)); // Check file format GUTF8String chkid; if (!iff->get_chunk(chkid) || (chkid != "FORM:DJVI" && chkid != "FORM:DJVU" && chkid != "FORM:PM44" && chkid != "FORM:BM44")) { return; } // Find chunk with page info while (iff->get_chunk(chkid) != 0) { GP<ByteStream> chunk_stream = iff->get_bytestream(); if (chkid == "INCL") { ReadAnnotations(pInclStream, processed, pAnnoStream); } else if (chkid == "FORM:ANNO") { pAnnoStream->copy(*chunk_stream); } else if (chkid == "ANTa" || chkid == "ANTz") { const GP<IFFByteStream> iffout = IFFByteStream::create(pAnnoStream); iffout->put_chunk(chkid); iffout->copy(*chunk_stream); iffout->close_chunk(); } iff->seek_close_chunk(); } } }
void create_bg44_chunk(IFFByteStream &iff, const char *ckid, GUTF8String filespec) { static GP<IFFByteStream> bg44iff; if (! bg44iff) { if (flag_contains_bg) DjVuPrintErrorUTF8("%s","djvumake: Duplicate BGxx chunk\n"); int i=filespec.rsearch(':'); for (int j=i+1; i>0 && j<(int)filespec.length(); j++) if (filespec[j] < '0' || filespec[j] > '9') i = -1; if (!i) G_THROW("djvumake: no filename specified in first BG44 specification"); GUTF8String filename=(i<0)?filespec:GUTF8String(filespec, i); const GURL::Filename::UTF8 url(filename); const GP<ByteStream> gbs(ByteStream::create(url,"rb")); if(!gbs) { G_THROW("djvumake: no such file as"+filename); } bg44iff = IFFByteStream::create(gbs); GUTF8String chkid; bg44iff->get_chunk(chkid); if (chkid != "FORM:PM44" && chkid != "FORM:BM44") G_THROW("djvumake: BG44 file has incorrect format (wrong IFF header)"); if (i>=0) filespec = i+1+(const char *)filespec; else filespec = "99"; } else { if (filespec.length() && filespec[0]!=':') G_THROW("djvumake: filename specified in BG44 refinement"); filespec = 1+(const char *)filespec; } const char *s=filespec; int nchunks = strtol((char *)s, (char **)&s, 10); if (nchunks<1 || nchunks>99) G_THROW("djvumake: invalid number of chunks in BG44 specification"); if (*s) G_THROW("djvumake: invalid BG44 specification (syntax error)"); int flag = (nchunks>=99); GUTF8String chkid; while (nchunks-->0 && bg44iff->get_chunk(chkid)) { if (chkid!="PM44" && chkid!="BM44") { DjVuPrintErrorUTF8("%s","djvumake: BG44 file contains unrecognized chunks (fixed)\n"); nchunks += 1; bg44iff->close_chunk(); continue; } GP<ByteStream> gmbs=ByteStream::create(); ByteStream &mbs=*gmbs; mbs.copy(*(bg44iff->get_bytestream())); bg44iff->close_chunk(); mbs.seek(0); if (mbs.readall((void*)&primary, sizeof(primary)) != sizeof(primary)) G_THROW("djvumake: BG44 file is corrupted (cannot read primary header)\n"); if (primary.serial == 0) { if (mbs.readall((void*)&secondary, sizeof(secondary)) != sizeof(secondary)) G_THROW("djvumake: BG44 file is corrupted (cannot read secondary header)\n"); int iw = (secondary.xhi<<8) + secondary.xlo; int ih = (secondary.yhi<<8) + secondary.ylo; int red; for (red=1; red<=12; red++) if (iw==(w+red-1)/red && ih==(h+red-1)/red) break; flag_contains_bg = red; if (red>12) DjVuPrintErrorUTF8("%s","djvumake: BG44 subsampling is not in [1..12] range\n"); } mbs.seek(0); iff.put_chunk(ckid); iff.copy(mbs); iff.close_chunk(); flag = 1; } if (!flag) DjVuPrintErrorUTF8("%s","djvumake: no more chunks in BG44 file\n"); }
PageInfo DjVuSource::ReadPageInfo(int nPage, bool bNeedText, bool bNeedAnno) { ASSERT(nPage >= 0 && nPage < m_nPageCount); PageInfo pageInfo; pageInfo.szPage.cx = 100; pageInfo.szPage.cy = 100; pageInfo.nDPI = 100; pageInfo.bDecoded = true; GP<ByteStream> pAnnoStream; if (bNeedAnno) pAnnoStream = ByteStream::create(); GP<ByteStream> pTextStream; if (bNeedText) pTextStream = ByteStream::create(); try { // Get raw data from the document and decode only requested chunks // DjVuFile is not used to ensure that we do not wait for a lock // to be released and thus do not block the UI thread GURL url = m_pDjVuDoc->page_to_url(nPage); GP<DataPool> pool = m_pDjVuDoc->request_data(NULL, url); GP<ByteStream> stream = pool->get_stream(); GP<IFFByteStream> iff(IFFByteStream::create(stream)); // Check file format GUTF8String chkid; if (!iff->get_chunk(chkid) || (chkid != "FORM:DJVI" && chkid != "FORM:DJVU" && chkid != "FORM:PM44" && chkid != "FORM:BM44")) { return pageInfo; } bool bHasIW44 = false; // Find chunk with page info while (iff->get_chunk(chkid) != 0) { GP<ByteStream> chunk_stream = iff->get_bytestream(); if (chkid == "INFO") { // Get page dimensions and resolution from info chunk GP<DjVuInfo> pInfo = DjVuInfo::create(); pInfo->decode(*chunk_stream); // Check data for consistency pageInfo.szPage.cx = max(pInfo->width, 0); pageInfo.szPage.cy = max(pInfo->height, 0); pageInfo.nInitialRotate = pInfo->orientation; pageInfo.nDPI = max(pInfo->dpi, 0); if ((pInfo->orientation & 1) != 0) swap(pageInfo.szPage.cx, pageInfo.szPage.cy); } else if (!bHasIW44 && (chkid == "PM44" || chkid == "BM44")) { bHasIW44 = true; // Get image dimensions and resolution from bitmap chunk UINT serial = chunk_stream->read8(); UINT slices = chunk_stream->read8(); UINT major = chunk_stream->read8(); UINT minor = chunk_stream->read8(); UINT xhi = chunk_stream->read8(); UINT xlo = chunk_stream->read8(); UINT yhi = chunk_stream->read8(); UINT ylo = chunk_stream->read8(); pageInfo.szPage.cx = (xhi << 8) | xlo; pageInfo.szPage.cy = (yhi << 8) | ylo; pageInfo.nDPI = 100; } else if (chkid == "TXTa" || chkid == "TXTz") { pageInfo.bHasText = true; if (bNeedText) { const GP<IFFByteStream> iffout = IFFByteStream::create(pTextStream); iffout->put_chunk(chkid); iffout->copy(*chunk_stream); iffout->close_chunk(); } } else if (bNeedAnno && chkid == "FORM:ANNO") { pAnnoStream->copy(*chunk_stream); } else if (bNeedAnno && (chkid == "ANTa" || chkid == "ANTz")) { const GP<IFFByteStream> iffout = IFFByteStream::create(pAnnoStream); iffout->put_chunk(chkid); iffout->copy(*chunk_stream); iffout->close_chunk(); } else if (bNeedAnno && chkid == "INCL") { set<GUTF8String> processed; ReadAnnotations(chunk_stream, processed, pAnnoStream); } iff->seek_close_chunk(); } if (bNeedText && pTextStream->tell()) pageInfo.DecodeText(pTextStream); if (bNeedAnno && pAnnoStream->tell()) pageInfo.DecodeAnno(pAnnoStream); } catch (GException&) { } catch (...) { if (pApplication != NULL) pApplication->ReportFatalError(); } return pageInfo; }