WordWithRanges( const lString16 & w, const lString8 & enc, const ldomWord & range ) : word( w ), encoded( enc ) { wordLower = w; wordLower.lowercase(); ranges.add( range ); }
void match( const lString8& prefix, LVArray<WordWithRanges *> & result ) { crtrace dumpstr; dumpstr << "match with " << prefix; for( int i=0; i<_words.length(); i++ ) { if( _words[i]->matchEncoded( prefix ) ) { result.add( _words[i] ); //dumpstr << " " << i << " " << encoded_words_[i]; }; }; }
void AddFootnoteFragmentToList() { if ( footstart==NULL ) return; // no data if ( footend==NULL ) footend = footstart; //CRLog::trace("AddFootnoteFragmentToList(%d, %d)", footstart->start, footend->end ); int h = footend->getEnd() - footstart->getStart(); // currentFootnoteHeight(); if ( h>0 && h<page_h ) { footheight += h; #ifdef DEBUG_FOOTNOTES //CRLog::trace("AddFootnoteFragmentToList(%d, %d)", footstart->getStart(), h); #endif footnotes.add( LVPageFootNoteInfo( footstart->getStart(), h ) ); } footstart = footend = NULL; }
bool setManglingKey(lString16 key) { if (key.startsWith(lString16(L"urn:uuid:"))) key = key.substr(9); _fontManglingKey.clear(); _fontManglingKey.reserve(16); lUInt8 b = 0; int n = 0; for (int i=0; i<key.length(); i++) { int d = hexDigit(key[i]); if (d>=0) { b = (b << 4) | d; if (++n > 1) { _fontManglingKey.add(b); n = 0; b = 0; } } } return _fontManglingKey.length() == 16; }
virtual void OnText( const lChar16 * text, int len, lUInt32 flags) { int fmt = m_stack.getInt(pi_imgfmt); if (!fmt) return; _fmt = fmt; for (int i=0; i<len;) { int d = -1; do { d = i<len ? hexDigit(text[i]) : -1; i++; } while (d<0 && i<len); if (_lastDigit>=0 && d>=0) { _buf.add((lUInt8)((_lastDigit<<4) | d)); _lastDigit = -1; } else { if (d>=0) _lastDigit = d; } } }
bool ImportEpubDocument( LVStreamRef stream, ldomDocument * m_doc, LVDocViewCallback * progressCallback, CacheLoadingCallback * formatCallback ) { LVContainerRef arc = LVOpenArchieve( stream ); if ( arc.isNull() ) return false; // not a ZIP archive // check root media type lString16 rootfilePath = EpubGetRootFilePath(arc); if ( rootfilePath.empty() ) return false; EncryptedDataContainer * decryptor = new EncryptedDataContainer(arc); if (decryptor->open()) { CRLog::debug("EPUB: encrypted items detected"); } LVContainerRef m_arc = LVContainerRef(decryptor); if (decryptor->hasUnsupportedEncryption()) { // DRM!!! createEncryptedEpubWarningDocument(m_doc); return true; } m_doc->setContainer(m_arc); // read content.opf EpubItems epubItems; //EpubItem * epubToc = NULL; //TODO LVArray<EpubItem*> spineItems; lString16 codeBase; //lString16 css; // { codeBase=LVExtractPath(rootfilePath, false); CRLog::trace("codeBase=%s", LCSTR(codeBase)); } LVStreamRef content_stream = m_arc->OpenStream(rootfilePath.c_str(), LVOM_READ); if ( content_stream.isNull() ) return false; lString16 ncxHref; lString16 coverId; LVEmbeddedFontList fontList; EmbeddedFontStyleParser styleParser(fontList); // reading content stream { ldomDocument * doc = LVParseXMLStream( content_stream ); if ( !doc ) return false; CRPropRef m_doc_props = m_doc->getProps(); lString16 author = doc->textFromXPath( lString16(L"package/metadata/creator")); lString16 title = doc->textFromXPath( lString16(L"package/metadata/title")); m_doc_props->setString(DOC_PROP_TITLE, title); m_doc_props->setString(DOC_PROP_AUTHORS, author ); for ( int i=1; i<50; i++ ) { ldomNode * item = doc->nodeFromXPath( lString16(L"package/metadata/identifier[") + lString16::itoa(i) + L"]" ); if (!item) break; lString16 key = item->getText(); if (decryptor->setManglingKey(key)) { CRLog::debug("Using font mangling key %s", LCSTR(key)); break; } } CRLog::info("Author: %s Title: %s", LCSTR(author), LCSTR(title)); for ( int i=1; i<20; i++ ) { ldomNode * item = doc->nodeFromXPath( lString16(L"package/metadata/meta[") + lString16::itoa(i) + L"]" ); if ( !item ) break; lString16 name = item->getAttributeValue(L"name"); lString16 content = item->getAttributeValue(L"content"); if ( name == L"cover" ) coverId = content; else if ( name==L"calibre:series" ) m_doc_props->setString(DOC_PROP_SERIES_NAME, content ); else if ( name==L"calibre:series_index" ) m_doc_props->setInt(DOC_PROP_SERIES_NUMBER, content.atoi() ); } // items for ( int i=1; i<50000; i++ ) { ldomNode * item = doc->nodeFromXPath( lString16(L"package/manifest/item[") + lString16::itoa(i) + L"]" ); if ( !item ) break; lString16 href = item->getAttributeValue(L"href"); lString16 mediaType = item->getAttributeValue(L"media-type"); lString16 id = item->getAttributeValue(L"id"); if ( !href.empty() && !id.empty() ) { if ( id==coverId ) { // coverpage file lString16 coverFileName = codeBase + href; CRLog::info("EPUB coverpage file: %s", LCSTR(coverFileName)); LVStreamRef stream = m_arc->OpenStream(coverFileName.c_str(), LVOM_READ); if ( !stream.isNull() ) { LVImageSourceRef img = LVCreateStreamImageSource(stream); if ( !img.isNull() ) { CRLog::info("EPUB coverpage image is correct: %d x %d", img->GetWidth(), img->GetHeight() ); m_doc_props->setString(DOC_PROP_COVER_FILE, coverFileName); } } } EpubItem * epubItem = new EpubItem; epubItem->href = href; epubItem->id = id; epubItem->mediaType = mediaType; epubItems.add( epubItem ); // // register embedded document fonts // if (mediaType == L"application/vnd.ms-opentype" // || mediaType == L"application/x-font-otf" // || mediaType == L"application/x-font-ttf") { // TODO: more media types? // // TODO: // fontList.add(codeBase + href); // } } if ( mediaType==L"text/css" ) { lString16 name = LVCombinePaths(codeBase, href); LVStreamRef cssStream = m_arc->OpenStream(name.c_str(), LVOM_READ); if (!cssStream.isNull()) { lString8 cssFile = UnicodeToUtf8(LVReadTextFile(cssStream)); lString16 base = name; LVExtractLastPathElement(base); CRLog::trace("style: %s", cssFile.c_str()); styleParser.parse(base, cssFile); } } } // spine == itemrefs if ( epubItems.length()>0 ) { ldomNode * spine = doc->nodeFromXPath( lString16(L"package/spine") ); if ( spine ) { EpubItem * ncx = epubItems.findById( spine->getAttributeValue(L"toc") ); //TODO //EpubItem * ncx = epubItems.findById(lString16("ncx")); if ( ncx!=NULL ) ncxHref = codeBase + ncx->href; for ( int i=1; i<50000; i++ ) { ldomNode * item = doc->nodeFromXPath( lString16(L"package/spine/itemref[") + lString16::itoa(i) + L"]" ); if ( !item ) break; EpubItem * epubItem = epubItems.findById( item->getAttributeValue(L"idref") ); if ( epubItem ) { // TODO: add to document spineItems.add( epubItem ); } } } } delete doc; } if ( spineItems.length()==0 ) return false; #if BUILD_LITE!=1 if ( m_doc->openFromCache(formatCallback) ) { if ( progressCallback ) { progressCallback->OnLoadFileEnd( ); } return true; } #endif lUInt32 saveFlags = m_doc->getDocFlags(); m_doc->setDocFlags( saveFlags ); m_doc->setContainer( m_arc ); ldomDocumentWriter writer(m_doc); #if 0 m_doc->setNodeTypes( fb2_elem_table ); m_doc->setAttributeTypes( fb2_attr_table ); m_doc->setNameSpaceTypes( fb2_ns_table ); #endif //m_doc->setCodeBase( codeBase ); ldomDocumentFragmentWriter appender(&writer, lString16(L"body"), lString16(L"DocFragment"), lString16::empty_str ); writer.OnStart(NULL); writer.OnTagOpenNoAttr(L"", L"body"); int fragmentCount = 0; for ( int i=0; i<spineItems.length(); i++ ) { if ( spineItems[i]->mediaType==L"application/xhtml+xml" ) { lString16 name = codeBase + spineItems[i]->href; appender.addPathSubstitution( name, lString16(L"_doc_fragment_") + lString16::itoa(i) ); } } for ( int i=0; i<spineItems.length(); i++ ) { if ( spineItems[i]->mediaType==L"application/xhtml+xml" ) { lString16 name = codeBase + spineItems[i]->href; { CRLog::debug("Checking fragment: %s", LCSTR(name)); LVStreamRef stream = m_arc->OpenStream(name.c_str(), LVOM_READ); if ( !stream.isNull() ) { appender.setCodeBase( name ); lString16 base = name; LVExtractLastPathElement(base); //CRLog::trace("base: %s", LCSTR(base)); //LVXMLParser LVHTMLParser parser(stream, &appender); if ( parser.CheckFormat() && parser.Parse() ) { // valid fragmentCount++; lString8 headCss = appender.getHeadStyleText(); //CRLog::trace("style: %s", headCss.c_str()); styleParser.parse(base, headCss); } else { CRLog::error("Document type is not XML/XHTML for fragment %s", LCSTR(name)); } } } } } ldomDocument * ncxdoc = NULL; if ( !ncxHref.empty() ) { LVStreamRef stream = m_arc->OpenStream(ncxHref.c_str(), LVOM_READ); lString16 codeBase = LVExtractPath( ncxHref ); if ( codeBase.length()>0 && codeBase.lastChar()!='/' ) codeBase.append(1, L'/'); appender.setCodeBase(codeBase); if ( !stream.isNull() ) { ldomDocument * ncxdoc = LVParseXMLStream( stream ); if ( ncxdoc!=NULL ) { ldomNode * navMap = ncxdoc->nodeFromXPath( lString16(L"ncx/navMap")); if ( navMap!=NULL ) ReadEpubToc( m_doc, navMap, m_doc->getToc(), appender ); delete ncxdoc; } } } writer.OnTagClose(L"", L"body"); writer.OnStop(); CRLog::debug("EPUB: %d documents merged", fragmentCount); if (!fontList.empty()) { // set document font list, and register fonts m_doc->getEmbeddedFontList().set(fontList); m_doc->registerEmbeddedFonts(); m_doc->forceReinitStyles(); } if ( fragmentCount==0 ) return false; #if 0 // set stylesheet //m_doc->getStyleSheet()->clear(); m_doc->setStyleSheet( NULL, true ); //m_doc->getStyleSheet()->parse(m_stylesheet.c_str()); if ( !css.empty() && m_doc->getDocFlag(DOC_FLAG_ENABLE_INTERNAL_STYLES) ) { m_doc->setStyleSheet( "p.p { text-align: justify }\n" "svg { text-align: center }\n" "i { display: inline; font-style: italic }\n" "b { display: inline; font-weight: bold }\n" "abbr { display: inline }\n" "acronym { display: inline }\n" "address { display: inline }\n" "p.title-p { hyphenate: none }\n" //abbr, acronym, address, blockquote, br, cite, code, dfn, div, em, h1, h2, h3, h4, h5, h6, kbd, p, pre, q, samp, span, strong, var , false); m_doc->setStyleSheet( UnicodeToUtf8(css).c_str(), false ); //m_doc->getStyleSheet()->parse(UnicodeToUtf8(css).c_str()); } else { //m_doc->getStyleSheet()->parse(m_stylesheet.c_str()); //m_doc->setStyleSheet( m_stylesheet.c_str(), false ); } #endif #if 0 LVStreamRef out = LVOpenFileStream( L"c:\\doc.xml" , LVOM_WRITE ); if ( !out.isNull() ) m_doc->saveToStream( out, "utf-8" ); #endif // DONE! if ( progressCallback ) { progressCallback->OnLoadFileEnd( ); m_doc->compact(); m_doc->dumpStatistics(); } return true; }
void add( const ldomWord & range ) { ranges.add( range ); }