static void epub_parse_header(fz_context *ctx, epub_document *doc) { fz_archive *zip = doc->zip; fz_buffer *buf; fz_xml *container_xml, *content_opf; fz_xml *container, *rootfiles, *rootfile; fz_xml *package, *manifest, *spine, *itemref, *metadata; char base_uri[2048]; const char *full_path; const char *version; char ncx[2048], s[2048]; epub_chapter **tailp; size_t len; unsigned char *data; if (fz_has_archive_entry(ctx, zip, "META-INF/rights.xml")) fz_throw(ctx, FZ_ERROR_GENERIC, "EPUB is locked by DRM"); if (fz_has_archive_entry(ctx, zip, "META-INF/encryption.xml")) fz_throw(ctx, FZ_ERROR_GENERIC, "EPUB is locked by DRM"); /* parse META-INF/container.xml to find OPF */ buf = fz_read_archive_entry(ctx, zip, "META-INF/container.xml"); fz_write_buffer_byte(ctx, buf, 0); len = fz_buffer_storage(ctx, buf, &data); container_xml = fz_parse_xml(ctx, data, len, 0); fz_drop_buffer(ctx, buf); container = fz_xml_find(container_xml, "container"); rootfiles = fz_xml_find_down(container, "rootfiles"); rootfile = fz_xml_find_down(rootfiles, "rootfile"); full_path = fz_xml_att(rootfile, "full-path"); if (!full_path) fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find root file in EPUB"); fz_dirname(base_uri, full_path, sizeof base_uri); /* parse OPF to find NCX and spine */ buf = fz_read_archive_entry(ctx, zip, full_path); fz_write_buffer_byte(ctx, buf, 0); len = fz_buffer_storage(ctx, buf, &data); content_opf = fz_parse_xml(ctx, data, len, 0); fz_drop_buffer(ctx, buf); package = fz_xml_find(content_opf, "package"); version = fz_xml_att(package, "version"); if (!version || strcmp(version, "2.0")) fz_warn(ctx, "unknown epub version: %s", version ? version : "<none>"); metadata = fz_xml_find_down(package, "metadata"); if (metadata) { doc->dc_title = find_metadata(ctx, metadata, "title"); doc->dc_creator = find_metadata(ctx, metadata, "creator"); } manifest = fz_xml_find_down(package, "manifest"); spine = fz_xml_find_down(package, "spine"); if (path_from_idref(ncx, manifest, base_uri, fz_xml_att(spine, "toc"), sizeof ncx)) { epub_parse_ncx(ctx, doc, ncx); } doc->spine = NULL; tailp = &doc->spine; itemref = fz_xml_find_down(spine, "itemref"); while (itemref) { if (path_from_idref(s, manifest, base_uri, fz_xml_att(itemref, "idref"), sizeof s)) { *tailp = epub_parse_chapter(ctx, doc, s); tailp = &(*tailp)->next; } itemref = fz_xml_find_next(itemref, "itemref"); } fz_drop_xml(ctx, container_xml); fz_drop_xml(ctx, content_opf); }
static void epub_parse_header(fz_context *ctx, epub_document *doc) { fz_archive *zip = doc->zip; fz_buffer *buf; fz_xml *container_xml, *content_opf; fz_xml *container, *rootfiles, *rootfile; fz_xml *package, *manifest, *spine, *itemref; char base_uri[2048]; const char *full_path; const char *version; char ncx[2048], s[2048]; epub_chapter *head, *tail; if (fz_has_archive_entry(ctx, zip, "META-INF/rights.xml")) fz_throw(ctx, FZ_ERROR_GENERIC, "EPUB is locked by DRM"); if (fz_has_archive_entry(ctx, zip, "META-INF/encryption.xml")) fz_throw(ctx, FZ_ERROR_GENERIC, "EPUB is locked by DRM"); /* parse META-INF/container.xml to find OPF */ buf = fz_read_archive_entry(ctx, zip, "META-INF/container.xml"); fz_write_buffer_byte(ctx, buf, 0); container_xml = fz_parse_xml(ctx, buf->data, buf->len, 0); fz_drop_buffer(ctx, buf); container = fz_xml_find(container_xml, "container"); rootfiles = fz_xml_find_down(container, "rootfiles"); rootfile = fz_xml_find_down(rootfiles, "rootfile"); full_path = fz_xml_att(rootfile, "full-path"); if (!full_path) fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find root file in EPUB"); printf("epub: found root: %s\n", full_path); fz_dirname(base_uri, full_path, sizeof base_uri); /* parse OPF to find NCX and spine */ buf = fz_read_archive_entry(ctx, zip, full_path); fz_write_buffer_byte(ctx, buf, 0); content_opf = fz_parse_xml(ctx, buf->data, buf->len, 0); fz_drop_buffer(ctx, buf); package = fz_xml_find(content_opf, "package"); version = fz_xml_att(package, "version"); if (!version || strcmp(version, "2.0")) fz_warn(ctx, "unknown epub version: %s", version ? version : "<none>"); manifest = fz_xml_find_down(package, "manifest"); spine = fz_xml_find_down(package, "spine"); if (path_from_idref(ncx, manifest, base_uri, fz_xml_att(spine, "toc"), sizeof ncx)) { /* TODO: parse NCX to create fz_outline */ printf("epub: found outline: %s\n", ncx); } head = tail = NULL; itemref = fz_xml_find_down(spine, "itemref"); while (itemref) { if (path_from_idref(s, manifest, base_uri, fz_xml_att(itemref, "idref"), sizeof s)) { printf("epub: found spine %s\n", s); if (!head) head = tail = epub_parse_chapter(ctx, doc, s); else tail = tail->next = epub_parse_chapter(ctx, doc, s); } itemref = fz_xml_find_next(itemref, "itemref"); } doc->spine = head; printf("epub: done.\n"); fz_drop_xml(ctx, container_xml); fz_drop_xml(ctx, content_opf); }