string PackageBase::CFISubpathForManifestItemWithID(const string &ident) const { size_t sz = IndexOfSpineItemWithIDRef(ident); if ( sz == size_t(-1) ) throw std::invalid_argument(_Str("Identifier '", ident, "' was not found in the spine.")); return _Str(_spineCFIIndex, "/", sz*2, "[", ident, "]!"); }
bool Container::Open(const string& path) { _archive = Archive::Open(path.stl_str()); if (_archive == nullptr) throw std::invalid_argument(_Str("Path does not point to a recognised archive file: '", path, "'")); _path = path; // TODO: Initialize lazily? Doing so would make initialization faster, but require // PackageLocations() to become non-const, like Packages(). ArchiveXmlReader reader(_archive->ReaderAtPath(gContainerFilePath)); if (!reader) { throw std::invalid_argument(_Str("Path does not point to a recognised archive file: '", path, "'")); } #if EPUB_USE(LIBXML2) _ocf = reader.xmlReadDocument(gContainerFilePath, nullptr, XML_PARSE_RECOVER|XML_PARSE_NOENT|XML_PARSE_DTDATTR); #else decltype(_ocf) __tmp(reader.ReadDocument(gContainerFilePath, nullptr, /*RESOLVE_EXTERNALS*/ 1)); _ocf = __tmp; #endif if (!((bool)_ocf)) return false; #if EPUB_COMPILER_SUPPORTS(CXX_INITIALIZER_LISTS) XPathWrangler xpath(_ocf, { { "ocf", "urn:oasis:names:tc:opendocument:xmlns:container" } }); #else XPathWrangler::NamespaceList __ns; __ns["ocf"] = OCFNamespaceURI; XPathWrangler xpath(_ocf, __ns); #endif xml::NodeSet nodes = xpath.Nodes(gRootfilesXPath); if (nodes.empty()) return false; LoadEncryption(); for (auto n : nodes) { string type = _getProp(n, "media-type"); string path = _getProp(n, "full-path"); if (path.empty()) continue; auto pkg = Package::New(Ptr(), type); if (pkg->Open(path)) _packages.push_back(pkg); } auto fm = FilterManager::Instance(); for (auto& pkg : _packages) { auto fc = fm->BuildFilterChainForPackage(pkg); pkg->SetFilterChain(fc); } return true; }
shared_ptr<ManifestItem> Package::ManifestItemForCFI(ePub3::CFI &cfi, CFI* pRemainingCFI) const { ManifestItemPtr result; // NB: Package is a friend of CFI, so it can access the components directly if ( cfi._components.size() < 2 ) { HandleError(EPUBError::CFITooShort, "CFI contains less than 2 nodes, so is invalid for package-based lookups."); } // first item directs us to the Spine: check the index against the one we know auto component = cfi._components[0]; if ( component.nodeIndex != _spineCFIIndex ) { HandleError(EPUBError::CFIInvalidSpineLocation, _Str("CFI first node index (spine) is ", component.nodeIndex, " but should be ", _spineCFIIndex)); // fix it ? //component.nodeIndex = _spineCFIIndex; return nullptr; } // second component is the particular spine item component = cfi._components[1]; if ( !component.IsIndirector() ) { HandleError(EPUBError::CFIUnexpectedComponent, "Package-based CFI's second item must be an indirector"); return nullptr; } try { if ( (component.nodeIndex % 2) == 1 ) throw CFI::InvalidCFI("CFI spine item index is odd, which makes no sense for always-empty spine nodes."); SpineItemPtr item = _spine->at((component.nodeIndex/2) - 1); // check and correct any qualifiers item = ConfirmOrCorrectSpineItemQualifier(item, &component); if ( item == nullptr ) { HandleError(EPUBError::CFIIndirectionTargetMissing, "CFI spine node qualifier doesn't match any spine item idref"); return nullptr; } // we know it's not null, because SpineItem::at() throws an exception if out of range result = ManifestItemWithID(item->Idref()); if ( pRemainingCFI != nullptr ) pRemainingCFI->Assign(cfi, 2); } catch (std::out_of_range& e) { HandleError(EPUBError::CFIStepOutOfBounds, _Str("CFI references out-of-range spine item: ", e.what())); } return result; }
IRI& IRI::operator=(const string& str) { // Support for URN strings bool isURN = false; if ( str.find("urn:", 0, 4) == 0 ) { REGEX_NS::regex reg(":"); auto components = str.split(reg); if ( components.size() == 3 ) { _urnComponents.push_back(gURNScheme); _urnComponents.push_back(components[1]); _urnComponents.push_back(components[2]); isURN = true; } } auto newURL = make_unique<GURL>(str.stl_str()); if ( !newURL->is_valid() && !isURN ) throw std::invalid_argument(_Str("IRI: '", str, "' is not a valid URL string.")); _url = std::move(newURL); _pureIRI = str; if ( !isURN ) _urnComponents.clear(); return *this; }
const CFI Package::CFIForSpineItem(shared_ptr<SpineItem> item) const { CFI result; result._components.emplace_back(_spineCFIIndex); result._components.emplace_back(_Str((item->Index()+1)*2, "[", item->Idref(), "]!")); return result; }
string CFI::SubCFIFromIndex(size_t index) const { if ( index >= TotalComponents() ) throw std::range_error(_Str("Index ", index, " is out of bounds.")); return Stringify(_components.begin()+index, _components.end()); }
IRI PackageBase::MakePropertyIRI(const string &reference, const string& prefix) const { auto found = _vocabularyLookup.find(prefix); if ( found == _vocabularyLookup.end() ) throw UnknownPrefix(_Str("Unknown prefix '", prefix, "'")); return IRI(found->second + reference); }
const CFI Package::CFIForManifestItem(shared_ptr<ManifestItem> item) const { CFI result; result._components.emplace_back(_spineCFIIndex); result._components.emplace_back(_Str((IndexOfSpineItemWithIDRef(item->Identifier())+1)*2, "[", item->Identifier(), "]!")); return result; }
shared_ptr<SpineItem> SpineItem::at(ssize_t idx) const { shared_ptr<SpineItem> result = std::const_pointer_cast<SpineItem>(enable_shared_from_this<SpineItem>::shared_from_this()); ssize_t i = idx; if ( i > 0 ) { while ( result != nullptr && i > 0 ) { result = result->Next(); i--; } } else if ( idx < 0 ) { while ( result != nullptr && i < 0 ) { result = result->Previous(); i++; } } // Q: maybe just return nullptr? if ( result == nullptr ) throw std::out_of_range(_Str("Index ", i, " is out of range")); return result; }
IRI::IRI(const string& scheme, const string& host, const string& path, const string& query, const string& fragment) : _urnComponents(), _url(nullptr) { _pureIRI = _Str(scheme, "://", host); if ( path.empty() ) _pureIRI += '/'; else if ( path.find(gPathSeparator) != 0 ) _pureIRI += ("/" + path); else _pureIRI += path; if ( !query.empty() ) _pureIRI += _Str("?", query); if ( !fragment.empty() ) _pureIRI += _Str("#", fragment); _url = new GURL(_pureIRI.stl_str()); }
CFI::StringList CFI::CFIComponentStrings(const string &cfi, const string& delimiter) { CFI::StringList components; string breaks = delimiter + "["; string tmp; string::size_type pos = 0, loc = 0; while ( pos < cfi.size() ) { loc = cfi.find_first_of(breaks, pos); if ( loc > pos ) { if ( loc == string::npos ) { tmp.append(cfi, pos, cfi.size()-pos); if ( !tmp.empty() ) components.push_back(tmp); tmp.clear(); break; } else { tmp.append(cfi, pos, loc-pos); } pos = loc; } if ( cfi[loc] == '[' ) { loc = cfi.find_first_of(']', loc); if ( loc == string::npos ) { HandleError(EPUBError::CFIParseFailed, _Str("CFI '", cfi, "' has an unterminated qualifier")); } ++loc; tmp.append(cfi, pos, loc-pos); } else if ( cfi.find(delimiter, loc) == loc ) { // delimiter found, push the current string if ( !tmp.empty() ) components.push_back(tmp); tmp.clear(); if ( loc == string::npos ) break; loc++; } pos = loc; } if ( !tmp.empty() ) components.push_back(tmp); return components; }
CFI::CFI(const string& str) : #if EPUB_PLATFORM(WINRT) NativeBridge(), #endif _components(), _rangeStart(), _rangeEnd(), _options(0) { if ( CompileCFI(str) == false ) HandleError(EPUBError::CFIParseFailed, _Str("Invalid CFI string: ", str.stl_str())); }
IRI PropertyHolder::PropertyIRIFromString(const string &attrValue) const { static REGEX_NS::regex re("^(?:(.+?):)?(.+)$"); REGEX_NS::smatch pieces; if ( REGEX_NS::regex_match(attrValue.stl_str(), pieces, re) == false ) throw std::invalid_argument(_Str("Attribute '", attrValue, "' doesn't look like a property name to me")); // there are two captures, at indices 1 and 2 return MakePropertyIRI(pieces.str(2), pieces.str(1)); }
const Property::ValueMap Property::DebugValues() const { ValueMap values; values.emplace_back(_identifier.IRIString(), _value); for ( auto extension : _extensions ) { values.emplace_back(_Str(" ", extension->PropertyIdentifier().IRIString()), extension->Value()); } return values; }
string Package::UniqueID() const { string packageID = PackageID(); if ( packageID.empty() ) return string::EmptyString; string modDate = ModificationDate(); if ( modDate.empty() ) return packageID; return _Str(packageID, "@", modDate); }
IRI::IRI(const string& nameID, const string& namespacedString) : #if EPUB_COMPILER_SUPPORTS(CXX_INITIALIZER_LISTS) _urnComponents({gURNScheme, nameID, namespacedString}), #endif _pureIRI(_Str("urn:", nameID, ":", namespacedString)), _url(make_unique<GURL>(_pureIRI.stl_str())) { #if !EPUB_COMPILER_SUPPORTS(CXX_INITIALIZER_LISTS) _urnComponents.push_back(gURNScheme); _urnComponents.push_back(nameID); _urnComponents.push_back(namespacedString); #endif }
IRI MediaHandler::Target(const string& src, const ParameterList& parameters) const { IRI result(_handlerIRI); // this will already include any fragment, we just have to add the query std::stringstream ss; ss << "src=" << src; for ( auto pair : parameters ) { string str = _Str('&', IRI::URLEncodeComponent(pair.first), '=', IRI::URLEncodeComponent(pair.second)); ss << str; } result.SetQuery(ss.str()); return result; }
bool Container::Open(const string& path) { ContainerPtr sharedThis(shared_from_this()); _archive = std::move(Archive::Open(path.stl_str())); if ( _archive == nullptr ) throw std::invalid_argument(_Str("Path does not point to a recognised archive file: '", path, "'")); // TODO: Initialize lazily? Doing so would make initialization faster, but require // PackageLocations() to become non-const, like Packages(). ArchiveXmlReader reader(_archive->ReaderAtPath(gContainerFilePath)); _ocf = reader.xmlReadDocument(gContainerFilePath, nullptr, XML_PARSE_RECOVER|XML_PARSE_NOENT|XML_PARSE_DTDATTR); if ( _ocf == nullptr ) return false; #if EPUB_COMPILER_SUPPORTS(CXX_INITIALIZER_LISTS) XPathWrangler xpath(_ocf, {{"ocf", "urn:oasis:names:tc:opendocument:xmlns:container"}}); #else XPathWrangler::NamespaceList __ns; __ns["ocf"] = OCFNamespaceURI; XPathWrangler xpath(_ocf, __ns); #endif xmlNodeSetPtr nodes = xpath.Nodes(reinterpret_cast<const xmlChar*>(gRootfilesXPath)); if ( nodes == nullptr || nodes->nodeNr == 0 ) return false; for ( int i = 0; i < nodes->nodeNr; i++ ) { xmlNodePtr n = nodes->nodeTab[i]; const xmlChar * _type = xmlGetProp(n, reinterpret_cast<const xmlChar*>("media-type")); std::string type((_type == nullptr ? "" : reinterpret_cast<const char*>(_type))); const xmlChar * _path = xmlGetProp(n, reinterpret_cast<const xmlChar*>("full-path")); if ( _path == nullptr ) continue; auto pkg = std::make_shared<Package>(sharedThis, type); if ( pkg->Open(_path) ) _packages.push_back(pkg); } LoadEncryption(); return true; }
void Package::FireLoadEvent(const IRI &url) const { if ( _loadEventHandler == nullptr ) throw std::runtime_error(_Str("No load event handler installed to load '", url.URIString(), "'")); if ( url.Path().find(_pathBase) == 0 ) { _loadEventHandler(url); return; } IRI fixed(IRI::gEPUBScheme, UniqueID(), _pathBase, url.Query(), url.Fragment()); fixed.AddPathComponent(url.Path()); IRI::IRICredentials creds(url.Credentials()); fixed.SetCredentials(creds.first, creds.second); _loadEventHandler(fixed); }
const ManifestItem* Package::ManifestItemForCFI(ePub3::CFI &cfi, CFI* pRemainingCFI) const { const ManifestItem* result = nullptr; // NB: Package is a friend of CFI, so it can access the components directly if ( cfi._components.size() < 2 ) throw CFI::InvalidCFI("CFI contains less than 2 nodes, so is invalid for package-based lookups."); // first item directs us to the Spine: check the index against the one we know auto component = cfi._components[0]; if ( component.nodeIndex != _spineCFIIndex ) { throw CFI::InvalidCFI(_Str("CFI first node index (spine) is ", component.nodeIndex, " but should be ", _spineCFIIndex)); } // second component is the particular spine item component = cfi._components[1]; if ( !component.IsIndirector() ) throw CFI::InvalidCFI("Package-based CFI's second item must be an indirector"); try { if ( (component.nodeIndex % 2) == 1 ) throw CFI::InvalidCFI("CFI spine item index is odd, which makes no sense for always-empty spine nodes."); const SpineItem* item = _spine->at(component.nodeIndex/2); // check and correct any qualifiers item = ConfirmOrCorrectSpineItemQualifier(item, &component); if ( item == nullptr ) throw CFI::InvalidCFI("CFI spine node qualifier doesn't match any spine item idref"); // we know it's not null, because SpineItem::at() throws an exception if out of range result = ManifestItemWithID(item->Idref()); if ( pRemainingCFI != nullptr ) pRemainingCFI->Assign(cfi, 2); } catch (std::out_of_range& e) { throw CFI::InvalidCFI("CFI references out-of-range spine item"); } return result; }
const string Package::Authors() const { // TODO: handle localization AttributionList authors = AuthorNames(); if ( authors.size() == 1 ) return authors[0]; else if ( authors.size() == 2 ) return _Str(authors[0], " and ", authors[1]); std::stringstream ss; auto pos = authors.begin(); auto last = pos + (authors.size() - 1); while ( pos != last ) { ss << *(pos++) << ", "; } ss << "and " << *last; return string(ss.str()); }
CFI& CFI::Assign(const ePub3::CFI &o, size_t fromIndex) { if ( fromIndex >= o._components.size() ) throw std::out_of_range(_Str("Component index ", fromIndex, " out of range [0..", _components.size(), "]")); _components.assign(o._components.begin()+fromIndex, o._components.end()); if ( o.IsRangeTriplet() ) { _rangeStart = o._rangeStart; _rangeEnd = o._rangeEnd; _options |= RangeTriplet; } else if ( IsRangeTriplet() ) { _rangeStart.clear(); _rangeEnd.clear(); _options &= ~RangeTriplet; } return *this; }
const string Package::Contributors(bool localized) const { // TODO: handle localization of the word 'and' AttributionList contributors = ContributorNames(localized); if ( contributors.empty() ) return string::EmptyString; if ( contributors.size() == 1 ) return contributors[0]; else if ( contributors.size() == 2 ) return _Str(contributors[0], " and ", contributors[1]); std::stringstream ss; auto pos = contributors.begin(); auto last = pos + (contributors.size() - 1); while ( pos != last ) { ss << *(pos++) << ", "; } ss << "and " << *last; return string(ss.str()); }
bool PackageBase::Open(const string& path) { ArchiveXmlReader reader(_archive->ReaderAtPath(path.stl_str())); _opf = reader.xmlReadDocument(path.c_str(), nullptr, XML_PARSE_RECOVER|XML_PARSE_NOENT|XML_PARSE_DTDATTR); if ( _opf == nullptr ) { HandleError(EPUBError::OCFInvalidRootfileURL, _Str(__PRETTY_FUNCTION__, ": No OPF file at ", path.stl_str())); return false; } size_t loc = path.rfind("/"); if ( loc == std::string::npos ) { _pathBase = '/'; } else { _pathBase = path.substr(0, loc+1); } return true; }
string Package::URLSafeUniqueID() const { string packageID = PackageID(); if ( packageID.empty() ) return string::EmptyString; string modDate = ModificationDate(); if ( modDate.empty() ) return packageID; // only include the first ten characters of the modification date (the date part) modDate = modDate.substr(0, 10); // trim the uniqueID if necessary to get the whole thing below 256 characters in length string::size_type maxLen = 255, totalLen = packageID.size() + 1 + modDate.size(); if ( totalLen > maxLen ) { string::size_type diff = totalLen - maxLen; packageID = packageID.substr(0, packageID.size() - diff); } return _Str(packageID, '_', modDate); }
void CFI::Component::Parse(const string &str) { if ( str.empty() ) throw std::invalid_argument("Empty string supplied to CFI::Component"); std::string utf8 = str.stl_str(); std::istringstream iss(utf8); // read an integer iss >> nodeIndex; if ( nodeIndex == 0 && iss.fail() ) throw std::invalid_argument(_Str("No node value at start of CFI::Component string '", str, "'")); while ( !iss.eof() ) { char next = 0; iss >> next; switch ( next ) { case '[': { size_t pos = static_cast<size_t>(iss.tellg()); iss.ignore(std::numeric_limits<std::streamsize>::max(), ']'); size_t end = ((size_t)iss.tellg()) - 1; if ( iss.eof() ) throw std::invalid_argument(_Str("Invalid string supplied to CFI::Component: ", str)); if ( characterOffset != 0 ) { // this is a text qualifier textQualifier = utf8.substr(pos, end-pos); flags |= TextQualifier; } else { // it's a position qualifier qualifier = utf8.substr(pos, end-pos); flags |= Qualifier; } break; } case '~': { // character offsets and spatial/temporal offsets are mutually exclusive if ( HasCharacterOffset() ) break; // read a numeral iss >> temporalOffset; flags |= TemporalOffset; break; } case '@': { // character offsets and spatial/temporal offsets are mutually exclusive if ( HasCharacterOffset() ) break; // two floats, separated by a colon float x, y; // read x iss >> x; // check for and skip delimiter if ( iss.peek() != ':' ) break; iss.ignore(1); // read y iss >> y; spatialOffset.x = x; spatialOffset.y = y; flags |= SpatialOffset; break; } case ':': { // character offsets and spatial/temporal offsets are mutually exclusive if ( HasSpatialTemporalOffset() ) break; iss >> characterOffset; flags |= CharacterOffset; break; } case '!': { // must be the last character, and no offsets if ( ((int)iss.peek()) != -1 || HasSpatialTemporalOffset() || HasCharacterOffset() ) break; flags |= Indirector; break; } default: break; } } }
bool Package::Unpack() { // very basic sanity check xmlNodePtr root = xmlDocGetRootElement(_opf); string rootName(reinterpret_cast<const char*>(root->name)); rootName.tolower(); if ( rootName != "package" ) return false; // not an OPF file, innit? InstallPrefixesFromAttributeValue(_getProp(root, "prefix", ePub3NamespaceURI)); // go through children to determine the CFI index of the <spine> tag static const xmlChar* kSpineName = BAD_CAST "spine"; _spineCFIIndex = 0; xmlNodePtr child = root->children; while ( child != nullptr ) { if ( child->type == XML_ELEMENT_NODE ) { _spineCFIIndex += 2; if ( xmlStrEqual(child->name, kSpineName) ) break; } child = child->next; } if ( _spineCFIIndex == 0 ) return false; // spineless! XPathWrangler xpath(_opf, {{"opf", OPFNamespace}, {"dc", DCNamespace}}); // simple things: manifest and spine items xmlNodeSetPtr manifestNodes = nullptr; xmlNodeSetPtr spineNodes = nullptr; try { manifestNodes = xpath.Nodes("/opf:package/opf:manifest/opf:item"); spineNodes = xpath.Nodes("/opf:package/opf:spine/opf:itemref"); if ( manifestNodes == nullptr || spineNodes == nullptr ) throw false; // looks invalid, or at least unusable, to me for ( int i = 0; i < manifestNodes->nodeNr; i++ ) { ManifestItem *p = new ManifestItem(manifestNodes->nodeTab[i], this); _manifest.emplace(p->Identifier(), p); } SpineItem* cur = nullptr; for ( int i = 0; i < spineNodes->nodeNr; i++ ) { SpineItem* next = new SpineItem(spineNodes->nodeTab[i], this); if ( cur != nullptr ) { cur->SetNextItem(next); } else { _spine.reset(next); } cur = next; } } catch (...) { if ( manifestNodes != nullptr ) xmlXPathFreeNodeSet(manifestNodes); if ( spineNodes != nullptr ) xmlXPathFreeNodeSet(spineNodes); return false; } xmlXPathFreeNodeSet(manifestNodes); xmlXPathFreeNodeSet(spineNodes); // now the metadata, which is slightly more involved due to extensions xmlNodeSetPtr metadataNodes = nullptr; xmlNodeSetPtr refineNodes = xmlXPathNodeSetCreate(nullptr); try { metadataNodes = xpath.Nodes("/opf:package/opf:metadata/*"); if ( metadataNodes == nullptr ) throw false; std::map<string, class Metadata*> metadataByID; for ( int i = 0; i < metadataNodes->nodeNr; i++ ) { xmlNodePtr node = metadataNodes->nodeTab[i]; class Metadata* p = nullptr; if ( node->ns != nullptr && xmlStrcmp(node->ns->href, BAD_CAST DCNamespace) == 0 ) { // definitely a main node p = new class Metadata(node, this); } else if ( _getProp(node, "name").size() > 0 ) { // it's an ePub2 item-- ignore it continue; } else if ( _getProp(node, "refines").empty() ) { // not refining anything, so it's a main node p = new class Metadata(node, this); } else { // by elimination it's refining something-- we'll process it later when we know we've got all the main nodes in there xmlXPathNodeSetAdd(refineNodes, node); } if ( p != nullptr ) { _metadata.push_back(p); if ( !p->Identifier().empty() ) metadataByID[p->Identifier()] = p; } } for ( int i = 0; i < refineNodes->nodeNr; i++ ) { xmlNodePtr node = refineNodes->nodeTab[i]; string ident = _getProp(node, "refines"); if ( ident.empty() ) continue; if ( ident[0] == '#' ) ident = ident.substr(1); auto found = metadataByID.find(ident); if ( found == metadataByID.end() ) continue; found->second->AddExtension(node, this); } } catch (...) { if ( metadataNodes != nullptr ) xmlXPathFreeNodeSet(metadataNodes); if ( refineNodes != nullptr ) xmlXPathFreeNodeSet(refineNodes); return false; } xmlXPathFreeNodeSet(metadataNodes); xmlXPathFreeNodeSet(refineNodes); // now any content type bindings xmlNodeSetPtr bindingNodes = nullptr; try { bindingNodes = xpath.Nodes("/opf:package/opf:bindings/*"); if ( bindingNodes != nullptr ) { for ( int i = 0; i < bindingNodes->nodeNr; i++ ) { xmlNodePtr node = bindingNodes->nodeTab[i]; if ( xmlStrcasecmp(node->name, MediaTypeElementName) != 0 ) continue; //////////////////////////////////////////////////////////// // ePub Publications 3.0 §3.4.16: The `mediaType` Element // The media-type attribute is required. string mediaType = _getProp(node, "media-type"); if ( mediaType.empty() ) { throw std::invalid_argument("mediaType element has missing or empty media-type attribute."); } // Each child mediaType of a bindings element must define a unique // content type in its media-type attribute, and the media type // specified must not be a Core Media Type. if ( _contentHandlers[mediaType].empty() == false ) { // user shouldn't have added manual things yet, but for safety we'll look anyway for ( auto ptr : _contentHandlers[mediaType] ) { if ( typeid(*ptr) == typeid(MediaHandler) ) { throw std::invalid_argument(_Str("Duplicate media handler found for type '", mediaType, "'.")); } } } if ( CoreMediaTypes.find(mediaType) != CoreMediaTypes.end() ) { throw std::invalid_argument("mediaType element specifies an EPUB Core Media Type."); } // The handler attribute is required string handlerID = _getProp(node, "handler"); if ( handlerID.empty() ) { throw std::invalid_argument("mediaType element has missing or empty handler attribute."); } // The required handler attribute must reference the ID [XML] of an // item in the manifest of the default implementation for this media // type. The referenced item must be an XHTML Content Document. const ManifestItem* handlerItem = ManifestItemWithID(handlerID); if ( handlerItem == nullptr ) { throw std::invalid_argument(_Str("mediaType element references non-existent handler with ID '", handlerID, "'.")); } if ( handlerItem->MediaType() != "application/xhtml+xml" ) { throw std::invalid_argument(_Str("Media handlers must be XHTML content documents, but referenced item has type '", handlerItem->MediaType(), "'.")); } // All XHTML Content Documents designated as handlers must have the // `scripted` property set in their manifest item's `properties` // attribute. if ( handlerItem->HasProperty(ItemProperties::HasScriptedContent) == false ) { throw std::invalid_argument("Media handlers must have the `scripted` property."); } // all good-- install it now _contentHandlers[mediaType].push_back(new MediaHandler(this, mediaType, handlerItem->AbsolutePath())); } } } catch (std::exception& exc) { std::cerr << "Exception processing OPF file: " << exc.what() << std::endl; if ( bindingNodes != nullptr ) xmlXPathFreeNodeSet(bindingNodes); return false; } catch (...) { if ( bindingNodes != nullptr ) xmlXPathFreeNodeSet(bindingNodes); return false; } xmlXPathFreeNodeSet(bindingNodes); // now the navigation tables for ( auto item : _manifest ) { if ( !item.second->HasProperty(ItemProperties::Navigation) ) continue; NavigationList tables = NavTablesFromManifestItem(item.second); for ( auto table : tables ) { // have to dynamic_cast these guys to get the right pointer type class NavigationTable* navTable = dynamic_cast<class NavigationTable*>(table); _navigation.emplace(navTable->Type(), navTable); } } return true; }
void CFI::Component::Parse(const string &str) { if ( str.empty() ) { HandleError(EPUBError::CFIParseFailed, "Empty string supplied to CFI::Component"); return; } std::string utf8 = str.stl_str(); std::istringstream iss(utf8); // read an integer iss >> nodeIndex; if ( nodeIndex == 0 && iss.fail() ) { HandleError(EPUBError::CFIParseFailed, _Str("No node value at start of CFI::Component string '", str, "'")); return; } while ( !iss.eof() ) { char next = 0; iss >> next; switch ( next ) { case '[': { size_t pos = static_cast<size_t>(iss.tellg()); iss.ignore(std::numeric_limits<std::streamsize>::max(), ']'); size_t end = ((size_t)iss.tellg()) - 1; if ( iss.eof() ) { HandleError(EPUBError::CFIParseFailed); return; } if ( characterOffset != 0 ) { // this is a text qualifier flags |= TextQualifier; std::string sub = utf8.substr(pos, end-pos); // is there a side-bias? auto biasPos = sub.find(";s="); if ( biasPos == std::string::npos ) { textQualifier = std::move(sub); } else { textQualifier = sub.substr(0, biasPos); if ( sub.size() > biasPos + 3 ) { switch ( sub[biasPos+3] ) { case 'b': sideBias = SideBias::Before; break; case 'a': sideBias = SideBias::After; break; default: sideBias = SideBias::Unspecified; break; } } } } else { // it's a position qualifier qualifier = utf8.substr(pos, end-pos); flags |= Qualifier; } break; } case '~': { // character offsets and spatial/temporal offsets are mutually exclusive if ( HasCharacterOffset() ) break; // read a numeral iss >> temporalOffset; flags |= TemporalOffset; break; } case '@': { // character offsets and spatial/temporal offsets are mutually exclusive if ( HasCharacterOffset() ) break; // two floats, separated by a colon float x, y; // read x iss >> x; // check for and skip delimiter if ( iss.peek() != ':' ) break; iss.ignore(1); // read y iss >> y; spatialOffset.x = x; spatialOffset.y = y; flags |= SpatialOffset; break; } case ':': { // character offsets and spatial/temporal offsets are mutually exclusive if ( HasSpatialTemporalOffset() ) break; iss >> characterOffset; flags |= CharacterOffset; break; } case '!': { // must be the last character, and no offsets if ( ((int)iss.peek()) != -1 || HasSpatialTemporalOffset() || HasCharacterOffset() ) break; flags |= Indirector; break; } default: break; } } }
bool CFI::CompileCFI(const string &str) { // strip the 'epubcfi(...)' wrapping string cfi(str); if ( str.find("epubcfi(") == 0 ) { cfi = cfi.substr(8, (str.size()-1)-8); } else if ( str.size() == 0 ) { HandleError(EPUBError::CFIParseFailed, "Empty CFI string"); return false; } else if ( str[0] != '/' ) { HandleError(EPUBError::CFINonSlashStartCharacter); } StringList rangePieces = RangedCFIComponents(cfi); if ( rangePieces.size() != 1 && rangePieces.size() != 3 ) { HandleError(EPUBError::CFIRangeComponentCountInvalid, _Str("Expected 1 or 3 range components, got ", rangePieces.size())); if ( rangePieces.size() == 0 ) return false; } if ( CompileComponentsToList(CFIComponentStrings(rangePieces[0]), &_components) == false ) return false; if ( rangePieces.size() >= 3 ) { if ( CompileComponentsToList(CFIComponentStrings(rangePieces[1]), &_rangeStart) == false ) return false; if ( CompileComponentsToList(CFIComponentStrings(rangePieces[2]), &_rangeEnd) == false ) return false; // now sanity-check the range delimiters: // neither should be empty if ( _rangeStart.empty() || _rangeEnd.empty() ) { HandleError(EPUBError::CFIRangeInvalid, "One of the supplied range components was empty."); return false; } // check the offsets at the end of each— they should be the same type if ( (_rangeStart.back().flags & Component::OffsetsMask) != (_rangeEnd.back().flags & Component::OffsetsMask) ) { HandleError(EPUBError::CFIRangeInvalid, "Offsets at the end of range components are of different types."); return false; } // ensure that there are no side-bias values if ( (_rangeStart.back().sideBias != SideBias::Unspecified) || (_rangeEnd.back().sideBias != SideBias::Unspecified) ) { HandleError(EPUBError::CFIRangeContainsSideBias); // can safely ignore this one } // where the delimiters' component ranges overlap, start must be <= end auto maxsz = std::max(_rangeStart.size(), _rangeEnd.size()); bool inequalNodeIndexFound = false; for ( decltype(maxsz) i = 0; i < maxsz; i++ ) { if ( _rangeStart[i].nodeIndex > _rangeEnd[i].nodeIndex ) { HandleError(EPUBError::CFIRangeInvalid, "Range components appear to be out of order."); } else if ( !inequalNodeIndexFound && _rangeStart[i].nodeIndex < _rangeEnd[i].nodeIndex ) { inequalNodeIndexFound = true; } } // if the two ranges are equal aside from their offsets, the end offset must be > the start offset if ( !inequalNodeIndexFound && _rangeStart.size() == _rangeEnd.size() ) { Component &s = _rangeStart.back(), &e = _rangeEnd.back(); if ( s.HasCharacterOffset() && s.characterOffset > e.characterOffset ) { HandleError(EPUBError::CFIRangeInvalid, "Range components appear to be out of order."); } else { if ( s.HasTemporalOffset() && s.temporalOffset > e.temporalOffset ) HandleError(EPUBError::CFIRangeInvalid, "Range components appear to be out of order."); if ( s.HasSpatialOffset() && s.spatialOffset > e.spatialOffset ) HandleError(EPUBError::CFIRangeInvalid, "Range components appear to be out of order."); } } _options |= RangeTriplet; } return true; }
bool Package::Unpack() { PackagePtr sharedMe = shared_from_this(); // very basic sanity check xmlNodePtr root = xmlDocGetRootElement(_opf); string rootName(reinterpret_cast<const char*>(root->name)); rootName.tolower(); if ( rootName != "package" ) { HandleError(EPUBError::OPFInvalidPackageDocument); return false; // not an OPF file, innit? } if ( _getProp(root, "version").empty() ) { HandleError(EPUBError::OPFPackageHasNoVersion); } InstallPrefixesFromAttributeValue(_getProp(root, "prefix", ePub3NamespaceURI)); // go through children to determine the CFI index of the <spine> tag static const xmlChar* kSpineName = BAD_CAST "spine"; static const xmlChar* kManifestName = BAD_CAST "manifest"; static const xmlChar* kMetadataName = BAD_CAST "metadata"; _spineCFIIndex = 0; uint32_t idx = 0; xmlNodePtr child = xmlFirstElementChild(root); while ( child != nullptr ) { idx += 2; if ( xmlStrEqual(child->name, kSpineName) ) { _spineCFIIndex = idx; if ( _spineCFIIndex != 6 ) HandleError(EPUBError::OPFSpineOutOfOrder); } else if ( xmlStrEqual(child->name, kManifestName) && idx != 4 ) { HandleError(EPUBError::OPFManifestOutOfOrder); } else if ( xmlStrEqual(child->name, kMetadataName) && idx != 2 ) { HandleError(EPUBError::OPFMetadataOutOfOrder); } child = xmlNextElementSibling(child); } if ( _spineCFIIndex == 0 ) { HandleError(EPUBError::OPFNoSpine); return false; // spineless! } #if EPUB_COMPILER_SUPPORTS(CXX_INITIALIZER_LISTS) XPathWrangler xpath(_opf, {{"opf", OPFNamespace}, {"dc", DCNamespace}}); #else XPathWrangler::NamespaceList __m; __m["opf"] = OPFNamespace; __m["dc"] = DCNamespace; XPathWrangler xpath(_opf, __m); #endif // simple things: manifest and spine items xmlNodeSetPtr manifestNodes = nullptr; xmlNodeSetPtr spineNodes = nullptr; try { manifestNodes = xpath.Nodes("/opf:package/opf:manifest/opf:item"); spineNodes = xpath.Nodes("/opf:package/opf:spine/opf:itemref"); if ( manifestNodes == nullptr ) { HandleError(EPUBError::OPFNoManifestItems); } if ( spineNodes == nullptr ) { HandleError(EPUBError::OPFNoSpineItems); } for ( int i = 0; i < manifestNodes->nodeNr; i++ ) { auto p = std::make_shared<ManifestItem>(sharedMe); if ( p->ParseXML(p, manifestNodes->nodeTab[i]) ) { #if EPUB_HAVE(CXX_MAP_EMPLACE) _manifest.emplace(p->Identifier(), p); #else _manifest[p->Identifier()] = p; #endif StoreXMLIdentifiable(p); } else { // TODO: Need an error here } } // check fallback chains typedef std::map<string, bool> IdentSet; IdentSet idents; for ( auto &pair : _manifest ) { ManifestItemPtr item = pair.second; if ( item->FallbackID().empty() ) continue; idents[item->XMLIdentifier()] = true; while ( !item->FallbackID().empty() ) { if ( idents[item->FallbackID()] ) { HandleError(EPUBError::OPFFallbackChainCircularReference); break; } item = item->Fallback(); } idents.clear(); } SpineItemPtr cur; for ( int i = 0; i < spineNodes->nodeNr; i++ ) { auto next = std::make_shared<SpineItem>(sharedMe); if ( next->ParseXML(next, spineNodes->nodeTab[i]) == false ) { // TODO: need an error code here continue; } // validation of idref auto manifestFound = _manifest.find(next->Idref()); if ( manifestFound == _manifest.end() ) { HandleError(EPUBError::OPFInvalidSpineIdref, _Str(next->Idref(), " does not correspond to a manifest item")); continue; } // validation of spine resource type w/fallbacks ManifestItemPtr manifestItem = next->ManifestItem(); bool isContentDoc = false; do { if ( manifestItem->MediaType() == "application/xhtml+xml" || manifestItem->MediaType() == "image/svg" ) { isContentDoc = true; break; } } while ( (manifestItem = manifestItem->Fallback()) ); if ( !isContentDoc ) HandleError(EPUBError::OPFFallbackChainHasNoContentDocument); StoreXMLIdentifiable(next); if ( cur != nullptr ) { cur->SetNextItem(next); } else { _spine = next; } cur = next; } } catch (const std::system_error& exc) { if ( manifestNodes != nullptr ) xmlXPathFreeNodeSet(manifestNodes); if ( spineNodes != nullptr ) xmlXPathFreeNodeSet(spineNodes); if ( exc.code().category() == epub_spec_category() ) throw; return false; } catch (...) { if ( manifestNodes != nullptr ) xmlXPathFreeNodeSet(manifestNodes); if ( spineNodes != nullptr ) xmlXPathFreeNodeSet(spineNodes); return false; } xmlXPathFreeNodeSet(manifestNodes); xmlXPathFreeNodeSet(spineNodes); // now the metadata, which is slightly more involved due to extensions xmlNodeSetPtr metadataNodes = nullptr; xmlNodeSetPtr refineNodes = xmlXPathNodeSetCreate(nullptr); try { shared_ptr<PropertyHolder> holderPtr = std::dynamic_pointer_cast<PropertyHolder>(sharedMe); metadataNodes = xpath.Nodes("/opf:package/opf:metadata/*"); if ( metadataNodes == nullptr ) HandleError(EPUBError::OPFNoMetadata); bool foundIdentifier = false, foundTitle = false, foundLanguage = false, foundModDate = false; string uniqueIDRef = _getProp(root, "unique-identifier"); if ( uniqueIDRef.empty() ) HandleError(EPUBError::OPFPackageUniqueIDInvalid); for ( int i = 0; i < metadataNodes->nodeNr; i++ ) { xmlNodePtr node = metadataNodes->nodeTab[i]; PropertyPtr p; if ( node->ns != nullptr && xmlStrcmp(node->ns->href, BAD_CAST DCNamespace) == 0 ) { // definitely a main node p = std::make_shared<Property>(holderPtr); } else if ( _getProp(node, "name").size() > 0 ) { // it's an ePub2 item-- ignore it continue; } else if ( _getProp(node, "refines").empty() ) { // not refining anything, so it's a main node p = std::make_shared<Property>(holderPtr); } else { // by elimination it's refining something-- we'll process it later when we know we've got all the main nodes in there xmlXPathNodeSetAdd(refineNodes, node); } if ( p && p->ParseMetaElement(node) ) { switch ( p->Type() ) { case DCType::Identifier: { foundIdentifier = true; if ( !uniqueIDRef.empty() && uniqueIDRef != p->XMLIdentifier() ) HandleError(EPUBError::OPFPackageUniqueIDInvalid); break; } case DCType::Title: { foundTitle = true; break; } case DCType::Language: { foundLanguage = true; break; } case DCType::Custom: { if ( p->PropertyIdentifier() == MakePropertyIRI("modified", "dcterms") ) foundModDate = true; break; } default: break; } AddProperty(p); StoreXMLIdentifiable(p); } } if ( !foundIdentifier ) HandleError(EPUBError::OPFMissingIdentifierMetadata); if ( !foundTitle ) HandleError(EPUBError::OPFMissingTitleMetadata); if ( !foundLanguage ) HandleError(EPUBError::OPFMissingLanguageMetadata); if ( !foundModDate ) HandleError(EPUBError::OPFMissingModificationDateMetadata); for ( int i = 0; i < refineNodes->nodeNr; i++ ) { xmlNodePtr node = refineNodes->nodeTab[i]; string ident = _getProp(node, "refines"); if ( ident.empty() ) { HandleError(EPUBError::OPFInvalidRefinementAttribute, "Empty IRI for 'refines' attribute"); continue; } if ( ident[0] == '#' ) { ident = ident.substr(1); } else { // validation only right now IRI iri(ident); if ( iri.IsEmpty() ) { HandleError(EPUBError::OPFInvalidRefinementAttribute, _Str("#", ident, " is not a valid IRI")); } else if ( iri.IsRelative() == false ) { HandleError(EPUBError::OPFInvalidRefinementAttribute, _Str(iri.IRIString(), " is not a relative IRI")); } continue; } auto found = _xmlIDLookup.find(ident); if ( found == _xmlIDLookup.end() ) { HandleError(EPUBError::OPFInvalidRefinementTarget, _Str("#", ident, " does not reference an item in this document")); continue; } PropertyPtr prop = std::dynamic_pointer_cast<Property>(found->second); if ( prop ) { // it's a property, so this is an extension PropertyExtensionPtr extPtr = std::make_shared<PropertyExtension>(prop); if ( extPtr->ParseMetaElement(node) ) prop->AddExtension(extPtr); } else { // not a property, so treat this as a plain property shared_ptr<PropertyHolder> ptr = std::dynamic_pointer_cast<PropertyHolder>(found->second); if ( ptr ) { prop = std::make_shared<Property>(ptr); if ( prop->ParseMetaElement(node) ) ptr->AddProperty(prop); } } } // now look at the <spine> element for properties xmlNodePtr spineNode = xmlFirstElementChild(root); for ( uint32_t i = 2; i < _spineCFIIndex; i += 2 ) spineNode = xmlNextElementSibling(spineNode); string value = _getProp(spineNode, "page-progression-direction"); if ( !value.empty() ) { PropertyPtr prop = std::make_shared<Property>(holderPtr); prop->SetPropertyIdentifier(MakePropertyIRI("page-progression-direction")); prop->SetValue(value); AddProperty(prop); } } catch (std::system_error& exc) { if ( metadataNodes != nullptr ) xmlXPathFreeNodeSet(metadataNodes); if ( refineNodes != nullptr ) xmlXPathFreeNodeSet(refineNodes); if ( exc.code().category() == epub_spec_category() ) throw; return false; } catch (...) { if ( metadataNodes != nullptr ) xmlXPathFreeNodeSet(metadataNodes); if ( refineNodes != nullptr ) xmlXPathFreeNodeSet(refineNodes); return false; } xmlXPathFreeNodeSet(metadataNodes); xmlXPathFreeNodeSet(refineNodes); // now any content type bindings xmlNodeSetPtr bindingNodes = nullptr; try { bindingNodes = xpath.Nodes("/opf:package/opf:bindings/*"); if ( bindingNodes != nullptr ) { for ( int i = 0; i < bindingNodes->nodeNr; i++ ) { xmlNodePtr node = bindingNodes->nodeTab[i]; if ( xmlStrcasecmp(node->name, MediaTypeElementName) != 0 ) continue; //////////////////////////////////////////////////////////// // ePub Publications 3.0 §3.4.16: The `mediaType` Element // The media-type attribute is required. string mediaType = _getProp(node, "media-type"); if ( mediaType.empty() ) { HandleError(EPUBError::OPFBindingHandlerNoMediaType); throw false; } // Each child mediaType of a bindings element must define a unique // content type in its media-type attribute, and the media type // specified must not be a Core Media Type. if ( _contentHandlers[mediaType].empty() == false ) { // user shouldn't have added manual things yet, but for safety we'll look anyway for ( auto ptr : _contentHandlers[mediaType] ) { if ( typeid(*ptr) == typeid(MediaHandler) ) { HandleError(EPUBError::OPFMultipleBindingsForMediaType); } } } if ( CoreMediaTypes.find(mediaType) != CoreMediaTypes.end() ) { HandleError(EPUBError::OPFCoreMediaTypeBindingEncountered); } // The handler attribute is required string handlerID = _getProp(node, "handler"); if ( handlerID.empty() ) { HandleError(EPUBError::OPFBindingHandlerNotFound); } // The required handler attribute must reference the ID [XML] of an // item in the manifest of the default implementation for this media // type. The referenced item must be an XHTML Content Document. ManifestItemPtr handlerItem = ManifestItemWithID(handlerID); if ( !handlerItem ) { HandleError(EPUBError::OPFBindingHandlerNotFound); } if ( handlerItem->MediaType() != "application/xhtml+xml" ) { HandleError(EPUBError::OPFBindingHandlerInvalidType, _Str("Media handlers must be XHTML content documents, but referenced item has type '", handlerItem->MediaType(), "'.")); } // All XHTML Content Documents designated as handlers must have the // `scripted` property set in their manifest item's `properties` // attribute. if ( handlerItem->HasProperty(ItemProperties::HasScriptedContent) == false ) { HandleError(EPUBError::OPFBindingHandlerNotScripted); } // all good-- install it now _contentHandlers[mediaType].push_back(std::make_shared<MediaHandler>(sharedMe, mediaType, handlerItem->AbsolutePath())); } } } catch (std::exception& exc) { std::cerr << "Exception processing OPF file: " << exc.what() << std::endl; if ( bindingNodes != nullptr ) xmlXPathFreeNodeSet(bindingNodes); throw; } catch (...) { if ( bindingNodes != nullptr ) xmlXPathFreeNodeSet(bindingNodes); return false; } xmlXPathFreeNodeSet(bindingNodes); // now the navigation tables for ( auto item : _manifest ) { if ( !item.second->HasProperty(ItemProperties::Navigation) ) continue; NavigationList tables = NavTablesFromManifestItem(sharedMe, item.second); for ( auto table : tables ) { // have to dynamic_cast these guys to get the right pointer type shared_ptr<class NavigationTable> navTable = std::dynamic_pointer_cast<class NavigationTable>(table); #if EPUB_HAVE(CXX_MAP_EMPLACE) _navigation.emplace(navTable->Type(), navTable); #else _navigation[navTable->Type()] = navTable; #endif } } // lastly, let's set the media support information InitMediaSupport(); return true; }