void PropertyHolder::RemoveProperty(const string& reference, const string& prefix) { IRI iri = MakePropertyIRI(reference, prefix); if ( iri.IsEmpty() ) return; RemoveProperty(iri); }
const string& Package::Title(bool localized) const { IRI titleTypeIRI(MakePropertyIRI("title-type")); // http://idpf.org/epub/vocab/package/#title-type // find the main one for ( auto& item : PropertiesMatching(titleTypeIRI) ) { PropertyExtensionPtr extension = item->ExtensionWithIdentifier(titleTypeIRI); if ( extension == nullptr ) continue; if ( extension->Value() == "main" ) return (localized? item->LocalizedValue() : item->Value()); } // no 'main title' found: just get the dc:title value auto items = PropertiesMatching(DCType::Title); if ( items.empty() ) return string::EmptyString; if ( localized ) return items[0]->LocalizedValue(); return items[0]->Value(); }
const string Package::ModificationDate() const { MetadataMap items = MetadataItemsWithProperty(MakePropertyIRI("modified", "dcterms")); if ( items.empty() ) return string::EmptyString; return items[0]->Value(); }
bool PropertyHolder::ContainsProperty(const string& reference, const string& prefix, bool lookupParents) const { IRI iri = MakePropertyIRI(reference, prefix); if ( iri.IsEmpty() ) return false; return ContainsProperty(iri, lookupParents); }
PropertyPtr PropertyHolder::PropertyMatching(const string& reference, const string& prefix, bool lookupParents) const { IRI iri = MakePropertyIRI(reference, prefix); if ( iri.IsEmpty() ) return false; return PropertyMatching(iri, lookupParents); }
const string& Package::ModificationDate() const { auto items = PropertiesMatching(MakePropertyIRI("modified", "dcterms")); if ( items.empty() ) return string::EmptyString; return items[0]->Value(); }
const Package::AttributionList Package::ContributorNames(bool localized) const { AttributionList result; for ( auto item : PropertiesMatching(MakePropertyIRI("contributor", "dcterms")) ) { result.emplace_back((localized? item->LocalizedValue() : item->Value())); } return result; }
IRI PropertyHolder::PropertyIRIFromString(const string &attrValue) const { static REGEX_NS::regex re("^(?:(.+?):)?(.+)$"); REGEX_NS::smatch pieces; if ( REGEX_NS::regex_match(attrValue.stl_str(), pieces, re) == false ) throw std::invalid_argument(_Str("Attribute '", attrValue, "' doesn't look like a property name to me")); // there are two captures, at indices 1 and 2 return MakePropertyIRI(pieces.str(2), pieces.str(1)); }
IRI PropertyHolder::MakePropertyIRI(const string &reference, const string& prefix) const { auto found = _vocabularyLookup.find(prefix); if ( found == _vocabularyLookup.end() ) { auto parent = _parent.lock(); if ( parent ) return parent->MakePropertyIRI(reference, prefix); return IRI(); } return IRI(found->second + reference); }
const string Package::ISBN() const { for ( auto item : PropertiesMatching(DCType::Identifier) ) { if ( item->ExtensionWithIdentifier(MakePropertyIRI("identifier-type")) == nullptr ) continue; // this will be complicated... // TODO: Implementation of ISBN lookup } return string::EmptyString; }
const Package::AttributionList Package::AttributionNames() const { AttributionList result; IRI fileAsIRI(MakePropertyIRI("file-as")); for ( auto item : MetadataItemsWithDCType(Metadata::DCType::Creator) ) { const Metadata::Extension* extension = item->ExtensionWithProperty(fileAsIRI); if ( extension != nullptr ) result.emplace_back(extension->Value()); else result.emplace_back(item->Value()); } return result; }
const Package::AttributionList Package::AttributionNames(bool localized) const { AttributionList result; IRI fileAsIRI(MakePropertyIRI("file-as")); for ( auto item : PropertiesMatching(DCType::Creator) ) { auto extension = item->ExtensionWithIdentifier(fileAsIRI); if ( extension ) result.emplace_back(extension->Value()); else result.emplace_back((localized? item->LocalizedValue() : item->Value())); } return result; }
const string Package::FullTitle(bool localized) const { string expanded = ExpandedTitle(localized); if ( !expanded.empty() ) return expanded; auto items = PropertiesMatching(DCType::Title); if ( items.size() == 1 ) return items[0]->Value(); IRI displaySeqIRI(MakePropertyIRI("display-seq")); // http://idpf.org/epub/vocab/package/#display-seq std::vector<string> titles(items.size()); auto sequencedItems = PropertiesMatching(displaySeqIRI); if ( !sequencedItems.empty() ) { // all these have a 1-based sequence number for ( auto item : sequencedItems ) { PropertyExtensionPtr extension = item->ExtensionWithIdentifier(displaySeqIRI); size_t sz = strtoul(extension->Value().c_str(), nullptr, 10) - 1; titles[sz] = (localized ? item->LocalizedValue() : item->Value()); } } else { titles.clear(); // insert any non-sequenced items at the head of the list, in order for ( auto item : items ) { titles.emplace_back((localized ? item->LocalizedValue() : item->Value())); } } // put them all together now auto pos = titles.begin(); // TODO: this ought to be localized based on the value of Language(). std::stringstream ss; ss << *(pos++) << ": " << *(pos++); while ( pos != titles.end() ) { ss << ", " << *(pos++); } return string(ss.str()); }
const string Package::Subtitle() const { IRI titleTypeIRI(MakePropertyIRI("title-type")); // http://idpf.org/epub/vocab/package/#title-type // find the main one for ( auto item : MetadataItemsWithProperty(titleTypeIRI) ) { const Metadata::Extension* extension = item->ExtensionWithProperty(titleTypeIRI); if ( extension == nullptr ) continue; if ( extension->Value() == "subtitle" ) return item->Value(); } // no 'subtitle' found, so no subtitle return string::EmptyString; }
const string Package::FullTitle() const { MetadataMap items = MetadataItemsWithDCType(Metadata::DCType::Title); if ( items.size() == 1 ) return items[0]->Value(); IRI displaySeqIRI(MakePropertyIRI("display-seq")); // http://idpf.org/epub/vocab/package/#display-seq std::vector<string> titles(items.size()); MetadataMap sequencedItems = MetadataItemsWithProperty(displaySeqIRI); if ( !sequencedItems.empty() ) { // all these have a 1-based sequence number for ( auto item : sequencedItems ) { const Metadata::Extension* extension = item->ExtensionWithProperty(displaySeqIRI); size_t sz = strtoul(extension->Value().c_str(), nullptr, 10) - 1; titles[sz] = item->Value(); } } else { titles.clear(); // insert any non-sequenced items at the head of the list, in order for ( auto item : items ) { titles.emplace_back(item->Value()); } } // put them all together now auto pos = titles.begin(); // TODO: this ought to be localized based on the value of Language(). std::stringstream ss; ss << *(pos++) << ": " << *(pos++); while ( pos != titles.end() ) { ss << ", " << *(pos++); } return string(ss.str()); }
const Package::AttributionList Package::AuthorNames(bool localized) const { AttributionList result; for ( auto item : PropertiesMatching(DCType::Creator) ) { result.emplace_back((localized? item->LocalizedValue() : item->Value())); } if ( result.empty() ) { // maybe they're using dcterms:creator instead? for ( auto item : PropertiesMatching(MakePropertyIRI("creator", "dcterms")) ) { result.emplace_back((localized? item->LocalizedValue() : item->Value())); } } return result; }
const string Package::Title() const { IRI titleTypeIRI(MakePropertyIRI("title-type")); // http://idpf.org/epub/vocab/package/#title-type // find the main one for ( auto item : MetadataItemsWithProperty(titleTypeIRI) ) { const Metadata::Extension* extension = item->ExtensionWithProperty(titleTypeIRI); if ( extension == nullptr ) continue; if ( extension->Value() == "main" ) return item->Value(); } // no 'main title' found: just get the dc:title value const MetadataMap items = MetadataItemsWithDCType(Metadata::DCType::Title); if ( items.empty() ) return string::EmptyString; return items[0]->Value(); }
const string& Package::ShortTitle(bool localized) const { IRI titleTypeIRI(MakePropertyIRI("title-type")); // http://idpf.org/epub/vocab/package/#title-type // find the main one for ( auto item : PropertiesMatching(titleTypeIRI) ) { PropertyExtensionPtr extension = item->ExtensionWithIdentifier(titleTypeIRI); if ( extension == nullptr ) continue; if ( extension->Value() == "short" ) { if ( localized ) return item->LocalizedValue(); return item->Value(); } } // no 'subtitle' found, so no subtitle return string::EmptyString; }
bool Package::Unpack() { PackagePtr sharedMe = shared_from_this(); // very basic sanity check xmlNodePtr root = xmlDocGetRootElement(_opf); string rootName(reinterpret_cast<const char*>(root->name)); rootName.tolower(); if ( rootName != "package" ) { HandleError(EPUBError::OPFInvalidPackageDocument); return false; // not an OPF file, innit? } if ( _getProp(root, "version").empty() ) { HandleError(EPUBError::OPFPackageHasNoVersion); } InstallPrefixesFromAttributeValue(_getProp(root, "prefix", ePub3NamespaceURI)); // go through children to determine the CFI index of the <spine> tag static const xmlChar* kSpineName = BAD_CAST "spine"; static const xmlChar* kManifestName = BAD_CAST "manifest"; static const xmlChar* kMetadataName = BAD_CAST "metadata"; _spineCFIIndex = 0; uint32_t idx = 0; xmlNodePtr child = xmlFirstElementChild(root); while ( child != nullptr ) { idx += 2; if ( xmlStrEqual(child->name, kSpineName) ) { _spineCFIIndex = idx; if ( _spineCFIIndex != 6 ) HandleError(EPUBError::OPFSpineOutOfOrder); } else if ( xmlStrEqual(child->name, kManifestName) && idx != 4 ) { HandleError(EPUBError::OPFManifestOutOfOrder); } else if ( xmlStrEqual(child->name, kMetadataName) && idx != 2 ) { HandleError(EPUBError::OPFMetadataOutOfOrder); } child = xmlNextElementSibling(child); } if ( _spineCFIIndex == 0 ) { HandleError(EPUBError::OPFNoSpine); return false; // spineless! } #if EPUB_COMPILER_SUPPORTS(CXX_INITIALIZER_LISTS) XPathWrangler xpath(_opf, {{"opf", OPFNamespace}, {"dc", DCNamespace}}); #else XPathWrangler::NamespaceList __m; __m["opf"] = OPFNamespace; __m["dc"] = DCNamespace; XPathWrangler xpath(_opf, __m); #endif // simple things: manifest and spine items xmlNodeSetPtr manifestNodes = nullptr; xmlNodeSetPtr spineNodes = nullptr; try { manifestNodes = xpath.Nodes("/opf:package/opf:manifest/opf:item"); spineNodes = xpath.Nodes("/opf:package/opf:spine/opf:itemref"); if ( manifestNodes == nullptr ) { HandleError(EPUBError::OPFNoManifestItems); } if ( spineNodes == nullptr ) { HandleError(EPUBError::OPFNoSpineItems); } for ( int i = 0; i < manifestNodes->nodeNr; i++ ) { auto p = std::make_shared<ManifestItem>(sharedMe); if ( p->ParseXML(p, manifestNodes->nodeTab[i]) ) { #if EPUB_HAVE(CXX_MAP_EMPLACE) _manifest.emplace(p->Identifier(), p); #else _manifest[p->Identifier()] = p; #endif StoreXMLIdentifiable(p); } else { // TODO: Need an error here } } // check fallback chains typedef std::map<string, bool> IdentSet; IdentSet idents; for ( auto &pair : _manifest ) { ManifestItemPtr item = pair.second; if ( item->FallbackID().empty() ) continue; idents[item->XMLIdentifier()] = true; while ( !item->FallbackID().empty() ) { if ( idents[item->FallbackID()] ) { HandleError(EPUBError::OPFFallbackChainCircularReference); break; } item = item->Fallback(); } idents.clear(); } SpineItemPtr cur; for ( int i = 0; i < spineNodes->nodeNr; i++ ) { auto next = std::make_shared<SpineItem>(sharedMe); if ( next->ParseXML(next, spineNodes->nodeTab[i]) == false ) { // TODO: need an error code here continue; } // validation of idref auto manifestFound = _manifest.find(next->Idref()); if ( manifestFound == _manifest.end() ) { HandleError(EPUBError::OPFInvalidSpineIdref, _Str(next->Idref(), " does not correspond to a manifest item")); continue; } // validation of spine resource type w/fallbacks ManifestItemPtr manifestItem = next->ManifestItem(); bool isContentDoc = false; do { if ( manifestItem->MediaType() == "application/xhtml+xml" || manifestItem->MediaType() == "image/svg" ) { isContentDoc = true; break; } } while ( (manifestItem = manifestItem->Fallback()) ); if ( !isContentDoc ) HandleError(EPUBError::OPFFallbackChainHasNoContentDocument); StoreXMLIdentifiable(next); if ( cur != nullptr ) { cur->SetNextItem(next); } else { _spine = next; } cur = next; } } catch (const std::system_error& exc) { if ( manifestNodes != nullptr ) xmlXPathFreeNodeSet(manifestNodes); if ( spineNodes != nullptr ) xmlXPathFreeNodeSet(spineNodes); if ( exc.code().category() == epub_spec_category() ) throw; return false; } catch (...) { if ( manifestNodes != nullptr ) xmlXPathFreeNodeSet(manifestNodes); if ( spineNodes != nullptr ) xmlXPathFreeNodeSet(spineNodes); return false; } xmlXPathFreeNodeSet(manifestNodes); xmlXPathFreeNodeSet(spineNodes); // now the metadata, which is slightly more involved due to extensions xmlNodeSetPtr metadataNodes = nullptr; xmlNodeSetPtr refineNodes = xmlXPathNodeSetCreate(nullptr); try { shared_ptr<PropertyHolder> holderPtr = std::dynamic_pointer_cast<PropertyHolder>(sharedMe); metadataNodes = xpath.Nodes("/opf:package/opf:metadata/*"); if ( metadataNodes == nullptr ) HandleError(EPUBError::OPFNoMetadata); bool foundIdentifier = false, foundTitle = false, foundLanguage = false, foundModDate = false; string uniqueIDRef = _getProp(root, "unique-identifier"); if ( uniqueIDRef.empty() ) HandleError(EPUBError::OPFPackageUniqueIDInvalid); for ( int i = 0; i < metadataNodes->nodeNr; i++ ) { xmlNodePtr node = metadataNodes->nodeTab[i]; PropertyPtr p; if ( node->ns != nullptr && xmlStrcmp(node->ns->href, BAD_CAST DCNamespace) == 0 ) { // definitely a main node p = std::make_shared<Property>(holderPtr); } else if ( _getProp(node, "name").size() > 0 ) { // it's an ePub2 item-- ignore it continue; } else if ( _getProp(node, "refines").empty() ) { // not refining anything, so it's a main node p = std::make_shared<Property>(holderPtr); } else { // by elimination it's refining something-- we'll process it later when we know we've got all the main nodes in there xmlXPathNodeSetAdd(refineNodes, node); } if ( p && p->ParseMetaElement(node) ) { switch ( p->Type() ) { case DCType::Identifier: { foundIdentifier = true; if ( !uniqueIDRef.empty() && uniqueIDRef != p->XMLIdentifier() ) HandleError(EPUBError::OPFPackageUniqueIDInvalid); break; } case DCType::Title: { foundTitle = true; break; } case DCType::Language: { foundLanguage = true; break; } case DCType::Custom: { if ( p->PropertyIdentifier() == MakePropertyIRI("modified", "dcterms") ) foundModDate = true; break; } default: break; } AddProperty(p); StoreXMLIdentifiable(p); } } if ( !foundIdentifier ) HandleError(EPUBError::OPFMissingIdentifierMetadata); if ( !foundTitle ) HandleError(EPUBError::OPFMissingTitleMetadata); if ( !foundLanguage ) HandleError(EPUBError::OPFMissingLanguageMetadata); if ( !foundModDate ) HandleError(EPUBError::OPFMissingModificationDateMetadata); for ( int i = 0; i < refineNodes->nodeNr; i++ ) { xmlNodePtr node = refineNodes->nodeTab[i]; string ident = _getProp(node, "refines"); if ( ident.empty() ) { HandleError(EPUBError::OPFInvalidRefinementAttribute, "Empty IRI for 'refines' attribute"); continue; } if ( ident[0] == '#' ) { ident = ident.substr(1); } else { // validation only right now IRI iri(ident); if ( iri.IsEmpty() ) { HandleError(EPUBError::OPFInvalidRefinementAttribute, _Str("#", ident, " is not a valid IRI")); } else if ( iri.IsRelative() == false ) { HandleError(EPUBError::OPFInvalidRefinementAttribute, _Str(iri.IRIString(), " is not a relative IRI")); } continue; } auto found = _xmlIDLookup.find(ident); if ( found == _xmlIDLookup.end() ) { HandleError(EPUBError::OPFInvalidRefinementTarget, _Str("#", ident, " does not reference an item in this document")); continue; } PropertyPtr prop = std::dynamic_pointer_cast<Property>(found->second); if ( prop ) { // it's a property, so this is an extension PropertyExtensionPtr extPtr = std::make_shared<PropertyExtension>(prop); if ( extPtr->ParseMetaElement(node) ) prop->AddExtension(extPtr); } else { // not a property, so treat this as a plain property shared_ptr<PropertyHolder> ptr = std::dynamic_pointer_cast<PropertyHolder>(found->second); if ( ptr ) { prop = std::make_shared<Property>(ptr); if ( prop->ParseMetaElement(node) ) ptr->AddProperty(prop); } } } // now look at the <spine> element for properties xmlNodePtr spineNode = xmlFirstElementChild(root); for ( uint32_t i = 2; i < _spineCFIIndex; i += 2 ) spineNode = xmlNextElementSibling(spineNode); string value = _getProp(spineNode, "page-progression-direction"); if ( !value.empty() ) { PropertyPtr prop = std::make_shared<Property>(holderPtr); prop->SetPropertyIdentifier(MakePropertyIRI("page-progression-direction")); prop->SetValue(value); AddProperty(prop); } } catch (std::system_error& exc) { if ( metadataNodes != nullptr ) xmlXPathFreeNodeSet(metadataNodes); if ( refineNodes != nullptr ) xmlXPathFreeNodeSet(refineNodes); if ( exc.code().category() == epub_spec_category() ) throw; return false; } catch (...) { if ( metadataNodes != nullptr ) xmlXPathFreeNodeSet(metadataNodes); if ( refineNodes != nullptr ) xmlXPathFreeNodeSet(refineNodes); return false; } xmlXPathFreeNodeSet(metadataNodes); xmlXPathFreeNodeSet(refineNodes); // now any content type bindings xmlNodeSetPtr bindingNodes = nullptr; try { bindingNodes = xpath.Nodes("/opf:package/opf:bindings/*"); if ( bindingNodes != nullptr ) { for ( int i = 0; i < bindingNodes->nodeNr; i++ ) { xmlNodePtr node = bindingNodes->nodeTab[i]; if ( xmlStrcasecmp(node->name, MediaTypeElementName) != 0 ) continue; //////////////////////////////////////////////////////////// // ePub Publications 3.0 §3.4.16: The `mediaType` Element // The media-type attribute is required. string mediaType = _getProp(node, "media-type"); if ( mediaType.empty() ) { HandleError(EPUBError::OPFBindingHandlerNoMediaType); throw false; } // Each child mediaType of a bindings element must define a unique // content type in its media-type attribute, and the media type // specified must not be a Core Media Type. if ( _contentHandlers[mediaType].empty() == false ) { // user shouldn't have added manual things yet, but for safety we'll look anyway for ( auto ptr : _contentHandlers[mediaType] ) { if ( typeid(*ptr) == typeid(MediaHandler) ) { HandleError(EPUBError::OPFMultipleBindingsForMediaType); } } } if ( CoreMediaTypes.find(mediaType) != CoreMediaTypes.end() ) { HandleError(EPUBError::OPFCoreMediaTypeBindingEncountered); } // The handler attribute is required string handlerID = _getProp(node, "handler"); if ( handlerID.empty() ) { HandleError(EPUBError::OPFBindingHandlerNotFound); } // The required handler attribute must reference the ID [XML] of an // item in the manifest of the default implementation for this media // type. The referenced item must be an XHTML Content Document. ManifestItemPtr handlerItem = ManifestItemWithID(handlerID); if ( !handlerItem ) { HandleError(EPUBError::OPFBindingHandlerNotFound); } if ( handlerItem->MediaType() != "application/xhtml+xml" ) { HandleError(EPUBError::OPFBindingHandlerInvalidType, _Str("Media handlers must be XHTML content documents, but referenced item has type '", handlerItem->MediaType(), "'.")); } // All XHTML Content Documents designated as handlers must have the // `scripted` property set in their manifest item's `properties` // attribute. if ( handlerItem->HasProperty(ItemProperties::HasScriptedContent) == false ) { HandleError(EPUBError::OPFBindingHandlerNotScripted); } // all good-- install it now _contentHandlers[mediaType].push_back(std::make_shared<MediaHandler>(sharedMe, mediaType, handlerItem->AbsolutePath())); } } } catch (std::exception& exc) { std::cerr << "Exception processing OPF file: " << exc.what() << std::endl; if ( bindingNodes != nullptr ) xmlXPathFreeNodeSet(bindingNodes); throw; } catch (...) { if ( bindingNodes != nullptr ) xmlXPathFreeNodeSet(bindingNodes); return false; } xmlXPathFreeNodeSet(bindingNodes); // now the navigation tables for ( auto item : _manifest ) { if ( !item.second->HasProperty(ItemProperties::Navigation) ) continue; NavigationList tables = NavTablesFromManifestItem(sharedMe, item.second); for ( auto table : tables ) { // have to dynamic_cast these guys to get the right pointer type shared_ptr<class NavigationTable> navTable = std::dynamic_pointer_cast<class NavigationTable>(table); #if EPUB_HAVE(CXX_MAP_EMPLACE) _navigation.emplace(navTable->Type(), navTable); #else _navigation[navTable->Type()] = navTable; #endif } } // lastly, let's set the media support information InitMediaSupport(); return true; }