예제 #1
0
 WordWithRanges( const lString16 & w, const lString8 & enc, const ldomWord & range )
     : word( w ), encoded( enc )
 {
     wordLower = w;
     wordLower.lowercase();
     ranges.add( range );
 }
예제 #2
0
 void match( const lString8& prefix, LVArray<WordWithRanges *> & result )
 {
     crtrace dumpstr;
     dumpstr << "match with " << prefix;
     for( int i=0; i<_words.length(); i++ ) {
         if( _words[i]->matchEncoded( prefix ) ) {
             result.add( _words[i] );
             //dumpstr << " " << i << " " << encoded_words_[i];
         };
     };
 }
예제 #3
0
    void AddFootnoteFragmentToList()
    {
        if ( footstart==NULL )
            return; // no data
        if ( footend==NULL )
            footend = footstart;
        //CRLog::trace("AddFootnoteFragmentToList(%d, %d)", footstart->start, footend->end );
        int h = footend->getEnd() - footstart->getStart(); // currentFootnoteHeight();
        if ( h>0 && h<page_h ) {
            footheight += h;
#ifdef DEBUG_FOOTNOTES
            //CRLog::trace("AddFootnoteFragmentToList(%d, %d)", footstart->getStart(), h);
#endif
            footnotes.add( LVPageFootNoteInfo( footstart->getStart(), h ) );
        }
        footstart = footend = NULL;
    }
예제 #4
0
 bool setManglingKey(lString16 key) {
     if (key.startsWith(lString16(L"urn:uuid:")))
         key = key.substr(9);
     _fontManglingKey.clear();
     _fontManglingKey.reserve(16);
     lUInt8 b = 0;
     int n = 0;
     for (int i=0; i<key.length(); i++) {
         int d = hexDigit(key[i]);
         if (d>=0) {
             b = (b << 4) | d;
             if (++n > 1) {
                 _fontManglingKey.add(b);
                 n = 0;
                 b = 0;
             }
         }
     }
     return _fontManglingKey.length() == 16;
 }
예제 #5
0
 virtual void OnText( const lChar16 * text, int len, lUInt32 flags)
 {
     int fmt = m_stack.getInt(pi_imgfmt);
     if (!fmt)
         return;
     _fmt = fmt;
     for (int i=0; i<len;) {
         int d = -1;
         do {
             d = i<len ? hexDigit(text[i]) : -1;
             i++;
         } while (d<0 && i<len);
         if (_lastDigit>=0 && d>=0) {
             _buf.add((lUInt8)((_lastDigit<<4) | d));
             _lastDigit = -1;
         } else {
             if (d>=0)
                 _lastDigit = d;
         }
     }
 }
예제 #6
0
bool ImportEpubDocument( LVStreamRef stream, ldomDocument * m_doc, LVDocViewCallback * progressCallback, CacheLoadingCallback * formatCallback )
{
    LVContainerRef arc = LVOpenArchieve( stream );
    if ( arc.isNull() )
        return false; // not a ZIP archive

    // check root media type
    lString16 rootfilePath = EpubGetRootFilePath(arc);
    if ( rootfilePath.empty() )
    	return false;

    EncryptedDataContainer * decryptor = new EncryptedDataContainer(arc);
    if (decryptor->open()) {
        CRLog::debug("EPUB: encrypted items detected");
    }

    LVContainerRef m_arc = LVContainerRef(decryptor);

    if (decryptor->hasUnsupportedEncryption()) {
        // DRM!!!
        createEncryptedEpubWarningDocument(m_doc);
        return true;
    }

    m_doc->setContainer(m_arc);

    // read content.opf
    EpubItems epubItems;
    //EpubItem * epubToc = NULL; //TODO
    LVArray<EpubItem*> spineItems;
    lString16 codeBase;
    //lString16 css;

    //
    {
        codeBase=LVExtractPath(rootfilePath, false);
        CRLog::trace("codeBase=%s", LCSTR(codeBase));
    }

    LVStreamRef content_stream = m_arc->OpenStream(rootfilePath.c_str(), LVOM_READ);
    if ( content_stream.isNull() )
        return false;


    lString16 ncxHref;
    lString16 coverId;

    LVEmbeddedFontList fontList;
    EmbeddedFontStyleParser styleParser(fontList);

    // reading content stream
    {
        ldomDocument * doc = LVParseXMLStream( content_stream );
        if ( !doc )
            return false;

        CRPropRef m_doc_props = m_doc->getProps();
        lString16 author = doc->textFromXPath( lString16(L"package/metadata/creator"));
        lString16 title = doc->textFromXPath( lString16(L"package/metadata/title"));
        m_doc_props->setString(DOC_PROP_TITLE, title);
        m_doc_props->setString(DOC_PROP_AUTHORS, author );

        for ( int i=1; i<50; i++ ) {
            ldomNode * item = doc->nodeFromXPath( lString16(L"package/metadata/identifier[") + lString16::itoa(i) + L"]" );
            if (!item)
                break;
            lString16 key = item->getText();
            if (decryptor->setManglingKey(key)) {
                CRLog::debug("Using font mangling key %s", LCSTR(key));
                break;
            }
        }

        CRLog::info("Author: %s Title: %s", LCSTR(author), LCSTR(title));
        for ( int i=1; i<20; i++ ) {
            ldomNode * item = doc->nodeFromXPath( lString16(L"package/metadata/meta[") + lString16::itoa(i) + L"]" );
            if ( !item )
                break;
            lString16 name = item->getAttributeValue(L"name");
            lString16 content = item->getAttributeValue(L"content");
            if ( name == L"cover" )
                coverId = content;
            else if ( name==L"calibre:series" )
                m_doc_props->setString(DOC_PROP_SERIES_NAME, content );
            else if ( name==L"calibre:series_index" )
                m_doc_props->setInt(DOC_PROP_SERIES_NUMBER, content.atoi() );
        }

        // items
        for ( int i=1; i<50000; i++ ) {
            ldomNode * item = doc->nodeFromXPath( lString16(L"package/manifest/item[") + lString16::itoa(i) + L"]" );
            if ( !item )
                break;
            lString16 href = item->getAttributeValue(L"href");
            lString16 mediaType = item->getAttributeValue(L"media-type");
            lString16 id = item->getAttributeValue(L"id");
            if ( !href.empty() && !id.empty() ) {
                if ( id==coverId ) {
                    // coverpage file
                    lString16 coverFileName = codeBase + href;
                    CRLog::info("EPUB coverpage file: %s", LCSTR(coverFileName));
                    LVStreamRef stream = m_arc->OpenStream(coverFileName.c_str(), LVOM_READ);
                    if ( !stream.isNull() ) {
                        LVImageSourceRef img = LVCreateStreamImageSource(stream);
                        if ( !img.isNull() ) {
                            CRLog::info("EPUB coverpage image is correct: %d x %d", img->GetWidth(), img->GetHeight() );
                            m_doc_props->setString(DOC_PROP_COVER_FILE, coverFileName);
                        }
                    }
                }
                EpubItem * epubItem = new EpubItem;
                epubItem->href = href;
                epubItem->id = id;
                epubItem->mediaType = mediaType;
                epubItems.add( epubItem );

//                // register embedded document fonts
//                if (mediaType == L"application/vnd.ms-opentype"
//                        || mediaType == L"application/x-font-otf"
//                        || mediaType == L"application/x-font-ttf") { // TODO: more media types?
//                    // TODO:
//                    fontList.add(codeBase + href);
//                }
            }
            if ( mediaType==L"text/css" ) {
                lString16 name = LVCombinePaths(codeBase, href);
                LVStreamRef cssStream = m_arc->OpenStream(name.c_str(), LVOM_READ);
                if (!cssStream.isNull()) {
                    lString8 cssFile = UnicodeToUtf8(LVReadTextFile(cssStream));
                    lString16 base = name;
                    LVExtractLastPathElement(base);
                    CRLog::trace("style: %s", cssFile.c_str());
                    styleParser.parse(base, cssFile);
                }
            }
        }

        // spine == itemrefs
        if ( epubItems.length()>0 ) {
            ldomNode * spine = doc->nodeFromXPath( lString16(L"package/spine") );
            if ( spine ) {

                EpubItem * ncx = epubItems.findById( spine->getAttributeValue(L"toc") ); //TODO
                //EpubItem * ncx = epubItems.findById(lString16("ncx"));
                if ( ncx!=NULL )
                    ncxHref = codeBase + ncx->href;

                for ( int i=1; i<50000; i++ ) {
                    ldomNode * item = doc->nodeFromXPath( lString16(L"package/spine/itemref[") + lString16::itoa(i) + L"]" );
                    if ( !item )
                        break;
                    EpubItem * epubItem = epubItems.findById( item->getAttributeValue(L"idref") );
                    if ( epubItem ) {
                        // TODO: add to document
                        spineItems.add( epubItem );
                    }
                }
            }
        }
        delete doc;
    }

    if ( spineItems.length()==0 )
        return false;


#if BUILD_LITE!=1
    if ( m_doc->openFromCache(formatCallback) ) {
        if ( progressCallback ) {
            progressCallback->OnLoadFileEnd( );
        }
        return true;
    }
#endif

    lUInt32 saveFlags = m_doc->getDocFlags();
    m_doc->setDocFlags( saveFlags );
    m_doc->setContainer( m_arc );

    ldomDocumentWriter writer(m_doc);
#if 0
    m_doc->setNodeTypes( fb2_elem_table );
    m_doc->setAttributeTypes( fb2_attr_table );
    m_doc->setNameSpaceTypes( fb2_ns_table );
#endif
    //m_doc->setCodeBase( codeBase );

    ldomDocumentFragmentWriter appender(&writer, lString16(L"body"), lString16(L"DocFragment"), lString16::empty_str );
    writer.OnStart(NULL);
    writer.OnTagOpenNoAttr(L"", L"body");
    int fragmentCount = 0;
    for ( int i=0; i<spineItems.length(); i++ ) {
        if ( spineItems[i]->mediaType==L"application/xhtml+xml" ) {
            lString16 name = codeBase + spineItems[i]->href;
            appender.addPathSubstitution( name, lString16(L"_doc_fragment_") + lString16::itoa(i) );
        }
    }
    for ( int i=0; i<spineItems.length(); i++ ) {
        if ( spineItems[i]->mediaType==L"application/xhtml+xml" ) {
            lString16 name = codeBase + spineItems[i]->href;
            {
                CRLog::debug("Checking fragment: %s", LCSTR(name));
                LVStreamRef stream = m_arc->OpenStream(name.c_str(), LVOM_READ);
                if ( !stream.isNull() ) {
                    appender.setCodeBase( name );
                    lString16 base = name;
                    LVExtractLastPathElement(base);
                    //CRLog::trace("base: %s", LCSTR(base));
                    //LVXMLParser
                    LVHTMLParser parser(stream, &appender);
                    if ( parser.CheckFormat() && parser.Parse() ) {
                        // valid
                        fragmentCount++;
                        lString8 headCss = appender.getHeadStyleText();
                        //CRLog::trace("style: %s", headCss.c_str());
                        styleParser.parse(base, headCss);
                    } else {
                        CRLog::error("Document type is not XML/XHTML for fragment %s", LCSTR(name));
                    }
                }
            }
        }
    }

    ldomDocument * ncxdoc = NULL;
    if ( !ncxHref.empty() ) {
        LVStreamRef stream = m_arc->OpenStream(ncxHref.c_str(), LVOM_READ);
        lString16 codeBase = LVExtractPath( ncxHref );
        if ( codeBase.length()>0 && codeBase.lastChar()!='/' )
            codeBase.append(1, L'/');
        appender.setCodeBase(codeBase);
        if ( !stream.isNull() ) {
            ldomDocument * ncxdoc = LVParseXMLStream( stream );
            if ( ncxdoc!=NULL ) {
                ldomNode * navMap = ncxdoc->nodeFromXPath( lString16(L"ncx/navMap"));
                if ( navMap!=NULL )
                    ReadEpubToc( m_doc, navMap, m_doc->getToc(), appender );
                delete ncxdoc;
            }
        }
    }

    writer.OnTagClose(L"", L"body");
    writer.OnStop();
    CRLog::debug("EPUB: %d documents merged", fragmentCount);

    if (!fontList.empty()) {
        // set document font list, and register fonts
        m_doc->getEmbeddedFontList().set(fontList);
        m_doc->registerEmbeddedFonts();
        m_doc->forceReinitStyles();
    }

    if ( fragmentCount==0 )
        return false;

#if 0
    // set stylesheet
    //m_doc->getStyleSheet()->clear();
    m_doc->setStyleSheet( NULL, true );
    //m_doc->getStyleSheet()->parse(m_stylesheet.c_str());
    if ( !css.empty() && m_doc->getDocFlag(DOC_FLAG_ENABLE_INTERNAL_STYLES) ) {

        m_doc->setStyleSheet( "p.p { text-align: justify }\n"
            "svg { text-align: center }\n"
            "i { display: inline; font-style: italic }\n"
            "b { display: inline; font-weight: bold }\n"
            "abbr { display: inline }\n"
            "acronym { display: inline }\n"
            "address { display: inline }\n"
            "p.title-p { hyphenate: none }\n"
//abbr, acronym, address, blockquote, br, cite, code, dfn, div, em, h1, h2, h3, h4, h5, h6, kbd, p, pre, q, samp, span, strong, var
        , false);
        m_doc->setStyleSheet( UnicodeToUtf8(css).c_str(), false );
        //m_doc->getStyleSheet()->parse(UnicodeToUtf8(css).c_str());
    } else {
        //m_doc->getStyleSheet()->parse(m_stylesheet.c_str());
        //m_doc->setStyleSheet( m_stylesheet.c_str(), false );
    }
#endif
#if 0
    LVStreamRef out = LVOpenFileStream( L"c:\\doc.xml" , LVOM_WRITE );
    if ( !out.isNull() )
        m_doc->saveToStream( out, "utf-8" );
#endif

    // DONE!
    if ( progressCallback ) {
        progressCallback->OnLoadFileEnd( );
        m_doc->compact();
        m_doc->dumpStatistics();
    }
    return true;

}
예제 #7
0
 void add( const ldomWord & range )
 {
     ranges.add( range );
 }