Example #1
0
bool Package::Unpack()
{
    PackagePtr sharedMe = shared_from_this();
    
    // very basic sanity check
    xmlNodePtr root = xmlDocGetRootElement(_opf);
    string rootName(reinterpret_cast<const char*>(root->name));
    rootName.tolower();
    
    if ( rootName != "package" )
    {
        HandleError(EPUBError::OPFInvalidPackageDocument);
        return false;       // not an OPF file, innit?
    }
    if ( _getProp(root, "version").empty() )
    {
        HandleError(EPUBError::OPFPackageHasNoVersion);
    }
    
    InstallPrefixesFromAttributeValue(_getProp(root, "prefix", ePub3NamespaceURI));
    
    // go through children to determine the CFI index of the <spine> tag
    static const xmlChar* kSpineName = BAD_CAST "spine";
    static const xmlChar* kManifestName = BAD_CAST "manifest";
    static const xmlChar* kMetadataName = BAD_CAST "metadata";
    _spineCFIIndex = 0;
    uint32_t idx = 0;
    xmlNodePtr child = xmlFirstElementChild(root);
    while ( child != nullptr )
    {
        idx += 2;
        if ( xmlStrEqual(child->name, kSpineName) )
        {
            _spineCFIIndex = idx;
            if ( _spineCFIIndex != 6 )
                HandleError(EPUBError::OPFSpineOutOfOrder);
        }
        else if ( xmlStrEqual(child->name, kManifestName) && idx != 4 )
        {
            HandleError(EPUBError::OPFManifestOutOfOrder);
        }
        else if ( xmlStrEqual(child->name, kMetadataName) && idx != 2 )
        {
            HandleError(EPUBError::OPFMetadataOutOfOrder);
        }
        
        child = xmlNextElementSibling(child);
    }
    
    if ( _spineCFIIndex == 0 )
    {
        HandleError(EPUBError::OPFNoSpine);
        return false;       // spineless!
    }
    
#if EPUB_COMPILER_SUPPORTS(CXX_INITIALIZER_LISTS)
    XPathWrangler xpath(_opf, {{"opf", OPFNamespace}, {"dc", DCNamespace}});
#else
    XPathWrangler::NamespaceList __m;
    __m["opf"] = OPFNamespace;
    __m["dc"] = DCNamespace;
    XPathWrangler xpath(_opf, __m);
#endif
    
    // simple things: manifest and spine items
    xmlNodeSetPtr manifestNodes = nullptr;
    xmlNodeSetPtr spineNodes = nullptr;
    
    try
    {
        manifestNodes = xpath.Nodes("/opf:package/opf:manifest/opf:item");
        spineNodes = xpath.Nodes("/opf:package/opf:spine/opf:itemref");
        
        if ( manifestNodes == nullptr )
        {
            HandleError(EPUBError::OPFNoManifestItems);
        }
        
        if ( spineNodes == nullptr )
        {
            HandleError(EPUBError::OPFNoSpineItems);
        }
        
        for ( int i = 0; i < manifestNodes->nodeNr; i++ )
        {
            auto p = std::make_shared<ManifestItem>(sharedMe);
            if ( p->ParseXML(p, manifestNodes->nodeTab[i]) )
            {
#if EPUB_HAVE(CXX_MAP_EMPLACE)
                _manifest.emplace(p->Identifier(), p);
#else
                _manifest[p->Identifier()] = p;
#endif
                StoreXMLIdentifiable(p);
            }
            else
            {
                // TODO: Need an error here
            }
        }
        
        // check fallback chains
        typedef std::map<string, bool> IdentSet;
        IdentSet idents;
        for ( auto &pair : _manifest )
        {
            ManifestItemPtr item = pair.second;
            if ( item->FallbackID().empty() )
                continue;
            
            idents[item->XMLIdentifier()] = true;
            while ( !item->FallbackID().empty() )
            {
                if ( idents[item->FallbackID()] )
                {
                    HandleError(EPUBError::OPFFallbackChainCircularReference);
                    break;
                }
                
                item = item->Fallback();
            }
            
            idents.clear();
        }
        
        SpineItemPtr cur;
        for ( int i = 0; i < spineNodes->nodeNr; i++ )
        {
            auto next = std::make_shared<SpineItem>(sharedMe);
            if ( next->ParseXML(next, spineNodes->nodeTab[i]) == false )
            {
                // TODO: need an error code here
                continue;
            }
            
            // validation of idref
            auto manifestFound = _manifest.find(next->Idref());
            if ( manifestFound == _manifest.end() )
            {
                HandleError(EPUBError::OPFInvalidSpineIdref, _Str(next->Idref(), " does not correspond to a manifest item"));
                continue;
            }
            
            // validation of spine resource type w/fallbacks
            ManifestItemPtr manifestItem = next->ManifestItem();
            bool isContentDoc = false;
            do
            {
                if ( manifestItem->MediaType() == "application/xhtml+xml" ||
                     manifestItem->MediaType() == "image/svg" )
                {
                    isContentDoc = true;
                    break;
                }
                
            } while ( (manifestItem = manifestItem->Fallback()) );
            
            if ( !isContentDoc )
                HandleError(EPUBError::OPFFallbackChainHasNoContentDocument);
            
            StoreXMLIdentifiable(next);
            
            if ( cur != nullptr )
            {
                cur->SetNextItem(next);
            }
            else
            {
                _spine = next;
            }
            
            cur = next;
        }
    }
    catch (const std::system_error& exc)
    {
        if ( manifestNodes != nullptr )
            xmlXPathFreeNodeSet(manifestNodes);
        if ( spineNodes != nullptr )
            xmlXPathFreeNodeSet(spineNodes);
        if ( exc.code().category() == epub_spec_category() )
            throw;
        return false;
    }
    catch (...)
    {
        if ( manifestNodes != nullptr )
            xmlXPathFreeNodeSet(manifestNodes);
        if ( spineNodes != nullptr )
            xmlXPathFreeNodeSet(spineNodes);
        return false;
    }
    
    xmlXPathFreeNodeSet(manifestNodes);
    xmlXPathFreeNodeSet(spineNodes);
    
    // now the metadata, which is slightly more involved due to extensions
    xmlNodeSetPtr metadataNodes = nullptr;
    xmlNodeSetPtr refineNodes = xmlXPathNodeSetCreate(nullptr);
    
    try
    {
        shared_ptr<PropertyHolder> holderPtr = std::dynamic_pointer_cast<PropertyHolder>(sharedMe);
        metadataNodes = xpath.Nodes("/opf:package/opf:metadata/*");
        if ( metadataNodes == nullptr )
            HandleError(EPUBError::OPFNoMetadata);
        
        bool foundIdentifier = false, foundTitle = false, foundLanguage = false, foundModDate = false;
        string uniqueIDRef = _getProp(root, "unique-identifier");
        if ( uniqueIDRef.empty() )
            HandleError(EPUBError::OPFPackageUniqueIDInvalid);
        
        for ( int i = 0; i < metadataNodes->nodeNr; i++ )
        {
            xmlNodePtr node = metadataNodes->nodeTab[i];
            PropertyPtr p;
            
            if ( node->ns != nullptr && xmlStrcmp(node->ns->href, BAD_CAST DCNamespace) == 0 )
            {
                // definitely a main node
                p = std::make_shared<Property>(holderPtr);
            }
            else if ( _getProp(node, "name").size() > 0 )
            {
                // it's an ePub2 item-- ignore it
                continue;
            }
            else if ( _getProp(node, "refines").empty() )
            {
                // not refining anything, so it's a main node
                p = std::make_shared<Property>(holderPtr);
            }
            else
            {
                // by elimination it's refining something-- we'll process it later when we know we've got all the main nodes in there
                xmlXPathNodeSetAdd(refineNodes, node);
            }
            
            if ( p && p->ParseMetaElement(node) )
            {
                switch ( p->Type() )
                {
                    case DCType::Identifier:
                    {
                        foundIdentifier = true;
                        if ( !uniqueIDRef.empty() && uniqueIDRef != p->XMLIdentifier() )
                            HandleError(EPUBError::OPFPackageUniqueIDInvalid);
                        break;
                    }
                    case DCType::Title:
                    {
                        foundTitle = true;
                        break;
                    }
                    case DCType::Language:
                    {
                        foundLanguage = true;
                        break;
                    }
                    case DCType::Custom:
                    {
                        if ( p->PropertyIdentifier() == MakePropertyIRI("modified", "dcterms") )
                            foundModDate = true;
                        break;
                    }
                        
                    default:
                        break;
                }
                
                AddProperty(p);
                StoreXMLIdentifiable(p);
            }
        }
        
        if ( !foundIdentifier )
            HandleError(EPUBError::OPFMissingIdentifierMetadata);
        if ( !foundTitle )
            HandleError(EPUBError::OPFMissingTitleMetadata);
        if ( !foundLanguage )
            HandleError(EPUBError::OPFMissingLanguageMetadata);
        if ( !foundModDate )
            HandleError(EPUBError::OPFMissingModificationDateMetadata);
        
        for ( int i = 0; i < refineNodes->nodeNr; i++ )
        {
            xmlNodePtr node = refineNodes->nodeTab[i];
            string ident = _getProp(node, "refines");
            if ( ident.empty() )
            {
                HandleError(EPUBError::OPFInvalidRefinementAttribute, "Empty IRI for 'refines' attribute");
                continue;
            }
            
            if ( ident[0] == '#' )
            {
                ident = ident.substr(1);
            }
            else
            {
                // validation only right now
                IRI iri(ident);
                if ( iri.IsEmpty() )
                {
                    HandleError(EPUBError::OPFInvalidRefinementAttribute, _Str("#", ident, " is not a valid IRI"));
                }
                else if ( iri.IsRelative() == false )
                {
                    HandleError(EPUBError::OPFInvalidRefinementAttribute, _Str(iri.IRIString(), " is not a relative IRI"));
                }
                continue;
            }
            
            auto found = _xmlIDLookup.find(ident);
            if ( found == _xmlIDLookup.end() )
            {
                HandleError(EPUBError::OPFInvalidRefinementTarget, _Str("#", ident, " does not reference an item in this document"));
                continue;
            }
            
            PropertyPtr prop = std::dynamic_pointer_cast<Property>(found->second);
            if ( prop )
            {
                // it's a property, so this is an extension
                PropertyExtensionPtr extPtr = std::make_shared<PropertyExtension>(prop);
                if ( extPtr->ParseMetaElement(node) )
                    prop->AddExtension(extPtr);
            }
            else
            {
                // not a property, so treat this as a plain property
                shared_ptr<PropertyHolder> ptr = std::dynamic_pointer_cast<PropertyHolder>(found->second);
                if ( ptr )
                {
                    prop = std::make_shared<Property>(ptr);
                    if ( prop->ParseMetaElement(node) )
                        ptr->AddProperty(prop);
                }
            }
        }
        
        // now look at the <spine> element for properties
        xmlNodePtr spineNode = xmlFirstElementChild(root);
        for ( uint32_t i = 2; i < _spineCFIIndex; i += 2 )
            spineNode = xmlNextElementSibling(spineNode);
        
        string value = _getProp(spineNode, "page-progression-direction");
        if ( !value.empty() )
        {
            PropertyPtr prop = std::make_shared<Property>(holderPtr);
            prop->SetPropertyIdentifier(MakePropertyIRI("page-progression-direction"));
            prop->SetValue(value);
            AddProperty(prop);
        }
    }
    catch (std::system_error& exc)
    {
        if ( metadataNodes != nullptr )
            xmlXPathFreeNodeSet(metadataNodes);
        if ( refineNodes != nullptr )
            xmlXPathFreeNodeSet(refineNodes);
        if ( exc.code().category() == epub_spec_category() )
            throw;
        return false;
    }
    catch (...)
    {
        if ( metadataNodes != nullptr )
            xmlXPathFreeNodeSet(metadataNodes);
        if ( refineNodes != nullptr )
            xmlXPathFreeNodeSet(refineNodes);
        return false;
    }
    
    xmlXPathFreeNodeSet(metadataNodes);
    xmlXPathFreeNodeSet(refineNodes);
    
    // now any content type bindings
    xmlNodeSetPtr bindingNodes = nullptr;
    
    try
    {
        bindingNodes = xpath.Nodes("/opf:package/opf:bindings/*");
        if ( bindingNodes != nullptr )
        {
            for ( int i = 0; i < bindingNodes->nodeNr; i++ )
            {
                xmlNodePtr node = bindingNodes->nodeTab[i];
                if ( xmlStrcasecmp(node->name, MediaTypeElementName) != 0 )
                    continue;
                
                ////////////////////////////////////////////////////////////
                // ePub Publications 3.0 §3.4.16: The `mediaType` Element
                
                // The media-type attribute is required.
                string mediaType = _getProp(node, "media-type");
                if ( mediaType.empty() )
                {
                    HandleError(EPUBError::OPFBindingHandlerNoMediaType);
                    throw false;
                }
                
                // Each child mediaType of a bindings element must define a unique
                // content type in its media-type attribute, and the media type
                // specified must not be a Core Media Type.
                if ( _contentHandlers[mediaType].empty() == false )
                {
                    // user shouldn't have added manual things yet, but for safety we'll look anyway
                    for ( auto ptr : _contentHandlers[mediaType] )
                    {
                        if ( typeid(*ptr) == typeid(MediaHandler) )
                        {
                            HandleError(EPUBError::OPFMultipleBindingsForMediaType);
                        }
                    }
                }
                if ( CoreMediaTypes.find(mediaType) != CoreMediaTypes.end() )
                {
                    HandleError(EPUBError::OPFCoreMediaTypeBindingEncountered);
                }
                
                // The handler attribute is required
                string handlerID = _getProp(node, "handler");
                if ( handlerID.empty() )
                {
                    HandleError(EPUBError::OPFBindingHandlerNotFound);
                }
                
                // The required handler attribute must reference the ID [XML] of an
                // item in the manifest of the default implementation for this media
                // type. The referenced item must be an XHTML Content Document.
                ManifestItemPtr handlerItem = ManifestItemWithID(handlerID);
                if ( !handlerItem )
                {
                    HandleError(EPUBError::OPFBindingHandlerNotFound);
                }
                if ( handlerItem->MediaType() != "application/xhtml+xml" )
                {
                    
                    HandleError(EPUBError::OPFBindingHandlerInvalidType, _Str("Media handlers must be XHTML content documents, but referenced item has type '", handlerItem->MediaType(), "'."));
                }
                
                // All XHTML Content Documents designated as handlers must have the
                // `scripted` property set in their manifest item's `properties`
                // attribute.
                if ( handlerItem->HasProperty(ItemProperties::HasScriptedContent) == false )
                {
                    HandleError(EPUBError::OPFBindingHandlerNotScripted);
                }
                
                // all good-- install it now
                _contentHandlers[mediaType].push_back(std::make_shared<MediaHandler>(sharedMe, mediaType, handlerItem->AbsolutePath()));
            }
        }
    }
    catch (std::exception& exc)
    {
        std::cerr << "Exception processing OPF file: " << exc.what() << std::endl;
        if ( bindingNodes != nullptr )
            xmlXPathFreeNodeSet(bindingNodes);
        throw;
    }
    catch (...)
    {
        if ( bindingNodes != nullptr )
            xmlXPathFreeNodeSet(bindingNodes);
        return false;
    }
    
    xmlXPathFreeNodeSet(bindingNodes);
    
    // now the navigation tables
    for ( auto item : _manifest )
    {
        if ( !item.second->HasProperty(ItemProperties::Navigation) )
            continue;
        
        NavigationList tables = NavTablesFromManifestItem(sharedMe, item.second);
        for ( auto table : tables )
        {
            // have to dynamic_cast these guys to get the right pointer type
            shared_ptr<class NavigationTable> navTable = std::dynamic_pointer_cast<class NavigationTable>(table);
#if EPUB_HAVE(CXX_MAP_EMPLACE)
            _navigation.emplace(navTable->Type(), navTable);
#else
            _navigation[navTable->Type()] = navTable;
#endif
        }
    }
    
    // lastly, let's set the media support information
    InitMediaSupport();
    
    return true;
}
Example #2
0
bool Package::Unpack()
{
    // very basic sanity check
    xmlNodePtr root = xmlDocGetRootElement(_opf);
    string rootName(reinterpret_cast<const char*>(root->name));
    rootName.tolower();
    
    if ( rootName != "package" )
        return false;       // not an OPF file, innit?
    
    InstallPrefixesFromAttributeValue(_getProp(root, "prefix", ePub3NamespaceURI));
    
    // go through children to determine the CFI index of the <spine> tag
    static const xmlChar* kSpineName = BAD_CAST "spine";
    _spineCFIIndex = 0;
    xmlNodePtr child = root->children;
    while ( child != nullptr )
    {
        if ( child->type == XML_ELEMENT_NODE )
        {
            _spineCFIIndex += 2;
            if ( xmlStrEqual(child->name, kSpineName) )
                break;
        }
        
        child = child->next;
    }
    
    if ( _spineCFIIndex == 0 )
        return false;       // spineless!
    
    XPathWrangler xpath(_opf, {{"opf", OPFNamespace}, {"dc", DCNamespace}});
    
    // simple things: manifest and spine items
    xmlNodeSetPtr manifestNodes = nullptr;
    xmlNodeSetPtr spineNodes = nullptr;
    
    try
    {
        manifestNodes = xpath.Nodes("/opf:package/opf:manifest/opf:item");
        spineNodes = xpath.Nodes("/opf:package/opf:spine/opf:itemref");
        
        if ( manifestNodes == nullptr || spineNodes == nullptr )
            throw false;   // looks invalid, or at least unusable, to me
        
        for ( int i = 0; i < manifestNodes->nodeNr; i++ )
        {
            ManifestItem *p = new ManifestItem(manifestNodes->nodeTab[i], this);
            _manifest.emplace(p->Identifier(), p);
        }
        
        SpineItem* cur = nullptr;
        for ( int i = 0; i < spineNodes->nodeNr; i++ )
        {
            SpineItem* next = new SpineItem(spineNodes->nodeTab[i], this);
            if ( cur != nullptr )
            {
                cur->SetNextItem(next);
            }
            else
            {
                _spine.reset(next);
            }
            
            cur = next;
        }
    }
    catch (...)
    {
        if ( manifestNodes != nullptr )
            xmlXPathFreeNodeSet(manifestNodes);
        if ( spineNodes != nullptr )
            xmlXPathFreeNodeSet(spineNodes);
        return false;
    }
    
    xmlXPathFreeNodeSet(manifestNodes);
    xmlXPathFreeNodeSet(spineNodes);
    
    // now the metadata, which is slightly more involved due to extensions
    xmlNodeSetPtr metadataNodes = nullptr;
    xmlNodeSetPtr refineNodes = xmlXPathNodeSetCreate(nullptr);
    
    try
    {
        metadataNodes = xpath.Nodes("/opf:package/opf:metadata/*");
        if ( metadataNodes == nullptr )
            throw false;
        
        std::map<string, class Metadata*> metadataByID;
        
        for ( int i = 0; i < metadataNodes->nodeNr; i++ )
        {
            xmlNodePtr node = metadataNodes->nodeTab[i];
            class Metadata* p = nullptr;
            
            if ( node->ns != nullptr && xmlStrcmp(node->ns->href, BAD_CAST DCNamespace) == 0 )
            {
                // definitely a main node
                p = new class Metadata(node, this);
            }
            else if ( _getProp(node, "name").size() > 0 )
            {
                // it's an ePub2 item-- ignore it
                continue;
            }
            else if ( _getProp(node, "refines").empty() )
            {
                // not refining anything, so it's a main node
                p = new class Metadata(node, this);
            }
            else
            {
                // by elimination it's refining something-- we'll process it later when we know we've got all the main nodes in there
                xmlXPathNodeSetAdd(refineNodes, node);
            }
            
            if ( p != nullptr )
            {
                _metadata.push_back(p);
                if ( !p->Identifier().empty() )
                    metadataByID[p->Identifier()] = p;
            }
        }
        
        for ( int i = 0; i < refineNodes->nodeNr; i++ )
        {
            xmlNodePtr node = refineNodes->nodeTab[i];
            string ident = _getProp(node, "refines");
            if ( ident.empty() )
                continue;
            
            if ( ident[0] == '#' )
                ident = ident.substr(1);
            
            auto found = metadataByID.find(ident);
            if ( found == metadataByID.end() )
                continue;
            
            found->second->AddExtension(node, this);
        }
    }
    catch (...)
    {
        if ( metadataNodes != nullptr )
            xmlXPathFreeNodeSet(metadataNodes);
        if ( refineNodes != nullptr )
            xmlXPathFreeNodeSet(refineNodes);
        return false;
    }
    
    xmlXPathFreeNodeSet(metadataNodes);
    xmlXPathFreeNodeSet(refineNodes);
    
    // now any content type bindings
    xmlNodeSetPtr bindingNodes = nullptr;
    
    try
    {
        bindingNodes = xpath.Nodes("/opf:package/opf:bindings/*");
        if ( bindingNodes != nullptr )
        {
            for ( int i = 0; i < bindingNodes->nodeNr; i++ )
            {
                xmlNodePtr node = bindingNodes->nodeTab[i];
                if ( xmlStrcasecmp(node->name, MediaTypeElementName) != 0 )
                    continue;
                
                ////////////////////////////////////////////////////////////
                // ePub Publications 3.0 §3.4.16: The `mediaType` Element
                
                // The media-type attribute is required.
                string mediaType = _getProp(node, "media-type");
                if ( mediaType.empty() )
                {
                    throw std::invalid_argument("mediaType element has missing or empty media-type attribute.");
                }
                
                // Each child mediaType of a bindings element must define a unique
                // content type in its media-type attribute, and the media type
                // specified must not be a Core Media Type.
                if ( _contentHandlers[mediaType].empty() == false )
                {
                    // user shouldn't have added manual things yet, but for safety we'll look anyway
                    for ( auto ptr : _contentHandlers[mediaType] )
                    {
                        if ( typeid(*ptr) == typeid(MediaHandler) )
                        {
                            throw std::invalid_argument(_Str("Duplicate media handler found for type '", mediaType, "'."));
                        }
                    }
                }
                if ( CoreMediaTypes.find(mediaType) != CoreMediaTypes.end() )
                {
                    throw std::invalid_argument("mediaType element specifies an EPUB Core Media Type.");
                }
                
                // The handler attribute is required
                string handlerID = _getProp(node, "handler");
                if ( handlerID.empty() )
                {
                    throw std::invalid_argument("mediaType element has missing or empty handler attribute.");
                }
                
                // The required handler attribute must reference the ID [XML] of an
                // item in the manifest of the default implementation for this media
                // type. The referenced item must be an XHTML Content Document.
                const ManifestItem* handlerItem = ManifestItemWithID(handlerID);
                if ( handlerItem == nullptr )
                {
                    throw std::invalid_argument(_Str("mediaType element references non-existent handler with ID '", handlerID, "'."));
                }
                if ( handlerItem->MediaType() != "application/xhtml+xml" )
                {
                    throw std::invalid_argument(_Str("Media handlers must be XHTML content documents, but referenced item has type '", handlerItem->MediaType(), "'."));
                }
                
                // All XHTML Content Documents designated as handlers must have the
                // `scripted` property set in their manifest item's `properties`
                // attribute.
                if ( handlerItem->HasProperty(ItemProperties::HasScriptedContent) == false )
                {
                    throw std::invalid_argument("Media handlers must have the `scripted` property.");
                }
                
                // all good-- install it now
                _contentHandlers[mediaType].push_back(new MediaHandler(this, mediaType, handlerItem->AbsolutePath()));
            }
        }
    }
    catch (std::exception& exc)
    {
        std::cerr << "Exception processing OPF file: " << exc.what() << std::endl;
        if ( bindingNodes != nullptr )
            xmlXPathFreeNodeSet(bindingNodes);
        return false;
    }
    catch (...)
    {
        if ( bindingNodes != nullptr )
            xmlXPathFreeNodeSet(bindingNodes);
        return false;
    }
    
    xmlXPathFreeNodeSet(bindingNodes);
    
    // now the navigation tables
    for ( auto item : _manifest )
    {
        if ( !item.second->HasProperty(ItemProperties::Navigation) )
            continue;
        
        NavigationList tables = NavTablesFromManifestItem(item.second);
        for ( auto table : tables )
        {
            // have to dynamic_cast these guys to get the right pointer type
            class NavigationTable* navTable = dynamic_cast<class NavigationTable*>(table);
            _navigation.emplace(navTable->Type(), navTable);
        }
    }
    
    return true;
}