static epub_chapter * epub_parse_chapter(fz_context *ctx, epub_document *doc, const char *path) { fz_archive *zip = doc->zip; fz_buffer *buf = NULL; epub_chapter *ch; char base_uri[2048]; fz_dirname(base_uri, path, sizeof base_uri); ch = fz_malloc_struct(ctx, epub_chapter); ch->path = NULL; ch->html = NULL; ch->next = NULL; fz_var(buf); fz_try(ctx) { buf = fz_read_archive_entry(ctx, zip, path); ch->path = fz_strdup(ctx, path); ch->html = fz_parse_html(ctx, doc->set, zip, base_uri, buf, fz_user_css(ctx)); } fz_always(ctx) fz_drop_buffer(ctx, buf); fz_catch(ctx) { fz_drop_html(ctx, ch->html); fz_free(ctx, ch->path); fz_free(ctx, ch); fz_rethrow(ctx); } return ch; }
static fz_document * htdoc_open_document(fz_context *ctx, const char *filename) { char dirname[2048]; fz_buffer *buf; html_document *doc; fz_dirname(dirname, filename, sizeof dirname); doc = fz_malloc_struct(ctx, html_document); doc->super.close = htdoc_close_document; doc->super.layout = htdoc_layout; doc->super.count_pages = htdoc_count_pages; doc->super.load_page = htdoc_load_page; doc->zip = fz_open_directory(ctx, dirname); doc->set = fz_new_html_font_set(ctx); buf = fz_read_file(ctx, filename); fz_write_buffer_byte(ctx, buf, 0); doc->box = fz_parse_html(ctx, doc->set, doc->zip, ".", buf, fz_user_css(ctx)); fz_drop_buffer(ctx, buf); return (fz_document*)doc; }
static epub_chapter * epub_parse_chapter(fz_context *ctx, epub_document *doc, const char *path) { fz_archive *zip = doc->zip; fz_buffer *buf; epub_chapter *ch; char base_uri[2048]; fz_dirname(base_uri, path, sizeof base_uri); buf = fz_read_archive_entry(ctx, zip, path); fz_write_buffer_byte(ctx, buf, 0); ch = fz_malloc_struct(ctx, epub_chapter); ch->box = fz_parse_html(ctx, doc->set, zip, base_uri, buf, fz_user_css(ctx)); ch->next = NULL; fz_drop_buffer(ctx, buf); return ch; }
static void epub_parse_ncx(fz_context *ctx, epub_document *doc, const char *path) { fz_archive *zip = doc->zip; fz_buffer *buf; fz_xml *ncx; char base_uri[2048]; unsigned char *data; size_t len; fz_dirname(base_uri, path, sizeof base_uri); buf = fz_read_archive_entry(ctx, zip, path); fz_write_buffer_byte(ctx, buf, 0); len = fz_buffer_storage(ctx, buf, &data); ncx = fz_parse_xml(ctx, data, len, 0); fz_drop_buffer(ctx, buf); doc->outline = epub_parse_ncx_imp(ctx, doc, fz_xml_find_down(ncx, "navMap"), base_uri); fz_drop_xml(ctx, ncx); }
static void epub_parse_header(fz_context *ctx, epub_document *doc) { fz_archive *zip = doc->zip; fz_buffer *buf; fz_xml *container_xml, *content_opf; fz_xml *container, *rootfiles, *rootfile; fz_xml *package, *manifest, *spine, *itemref, *metadata; char base_uri[2048]; const char *full_path; const char *version; char ncx[2048], s[2048]; epub_chapter **tailp; size_t len; unsigned char *data; if (fz_has_archive_entry(ctx, zip, "META-INF/rights.xml")) fz_throw(ctx, FZ_ERROR_GENERIC, "EPUB is locked by DRM"); if (fz_has_archive_entry(ctx, zip, "META-INF/encryption.xml")) fz_throw(ctx, FZ_ERROR_GENERIC, "EPUB is locked by DRM"); /* parse META-INF/container.xml to find OPF */ buf = fz_read_archive_entry(ctx, zip, "META-INF/container.xml"); fz_write_buffer_byte(ctx, buf, 0); len = fz_buffer_storage(ctx, buf, &data); container_xml = fz_parse_xml(ctx, data, len, 0); fz_drop_buffer(ctx, buf); container = fz_xml_find(container_xml, "container"); rootfiles = fz_xml_find_down(container, "rootfiles"); rootfile = fz_xml_find_down(rootfiles, "rootfile"); full_path = fz_xml_att(rootfile, "full-path"); if (!full_path) fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find root file in EPUB"); fz_dirname(base_uri, full_path, sizeof base_uri); /* parse OPF to find NCX and spine */ buf = fz_read_archive_entry(ctx, zip, full_path); fz_write_buffer_byte(ctx, buf, 0); len = fz_buffer_storage(ctx, buf, &data); content_opf = fz_parse_xml(ctx, data, len, 0); fz_drop_buffer(ctx, buf); package = fz_xml_find(content_opf, "package"); version = fz_xml_att(package, "version"); if (!version || strcmp(version, "2.0")) fz_warn(ctx, "unknown epub version: %s", version ? version : "<none>"); metadata = fz_xml_find_down(package, "metadata"); if (metadata) { doc->dc_title = find_metadata(ctx, metadata, "title"); doc->dc_creator = find_metadata(ctx, metadata, "creator"); } manifest = fz_xml_find_down(package, "manifest"); spine = fz_xml_find_down(package, "spine"); if (path_from_idref(ncx, manifest, base_uri, fz_xml_att(spine, "toc"), sizeof ncx)) { epub_parse_ncx(ctx, doc, ncx); } doc->spine = NULL; tailp = &doc->spine; itemref = fz_xml_find_down(spine, "itemref"); while (itemref) { if (path_from_idref(s, manifest, base_uri, fz_xml_att(itemref, "idref"), sizeof s)) { *tailp = epub_parse_chapter(ctx, doc, s); tailp = &(*tailp)->next; } itemref = fz_xml_find_next(itemref, "itemref"); } fz_drop_xml(ctx, container_xml); fz_drop_xml(ctx, content_opf); }
static void epub_parse_header(fz_context *ctx, epub_document *doc) { fz_archive *zip = doc->zip; fz_buffer *buf; fz_xml *container_xml, *content_opf; fz_xml *container, *rootfiles, *rootfile; fz_xml *package, *manifest, *spine, *itemref; char base_uri[2048]; const char *full_path; const char *version; char ncx[2048], s[2048]; epub_chapter *head, *tail; if (fz_has_archive_entry(ctx, zip, "META-INF/rights.xml")) fz_throw(ctx, FZ_ERROR_GENERIC, "EPUB is locked by DRM"); if (fz_has_archive_entry(ctx, zip, "META-INF/encryption.xml")) fz_throw(ctx, FZ_ERROR_GENERIC, "EPUB is locked by DRM"); /* parse META-INF/container.xml to find OPF */ buf = fz_read_archive_entry(ctx, zip, "META-INF/container.xml"); fz_write_buffer_byte(ctx, buf, 0); container_xml = fz_parse_xml(ctx, buf->data, buf->len, 0); fz_drop_buffer(ctx, buf); container = fz_xml_find(container_xml, "container"); rootfiles = fz_xml_find_down(container, "rootfiles"); rootfile = fz_xml_find_down(rootfiles, "rootfile"); full_path = fz_xml_att(rootfile, "full-path"); if (!full_path) fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find root file in EPUB"); printf("epub: found root: %s\n", full_path); fz_dirname(base_uri, full_path, sizeof base_uri); /* parse OPF to find NCX and spine */ buf = fz_read_archive_entry(ctx, zip, full_path); fz_write_buffer_byte(ctx, buf, 0); content_opf = fz_parse_xml(ctx, buf->data, buf->len, 0); fz_drop_buffer(ctx, buf); package = fz_xml_find(content_opf, "package"); version = fz_xml_att(package, "version"); if (!version || strcmp(version, "2.0")) fz_warn(ctx, "unknown epub version: %s", version ? version : "<none>"); manifest = fz_xml_find_down(package, "manifest"); spine = fz_xml_find_down(package, "spine"); if (path_from_idref(ncx, manifest, base_uri, fz_xml_att(spine, "toc"), sizeof ncx)) { /* TODO: parse NCX to create fz_outline */ printf("epub: found outline: %s\n", ncx); } head = tail = NULL; itemref = fz_xml_find_down(spine, "itemref"); while (itemref) { if (path_from_idref(s, manifest, base_uri, fz_xml_att(itemref, "idref"), sizeof s)) { printf("epub: found spine %s\n", s); if (!head) head = tail = epub_parse_chapter(ctx, doc, s); else tail = tail->next = epub_parse_chapter(ctx, doc, s); } itemref = fz_xml_find_next(itemref, "itemref"); } doc->spine = head; printf("epub: done.\n"); fz_drop_xml(ctx, container_xml); fz_drop_xml(ctx, content_opf); }