/* * Read and interleave split parts from a ZIP file. */ xps_part * xps_read_part(fz_context *ctx, xps_document *doc, char *partname) { fz_archive *zip = doc->zip; fz_buffer *buf, *tmp; char path[2048]; int count; char *name; int seen_last; name = partname; if (name[0] == '/') name ++; /* All in one piece */ if (fz_has_archive_entry(ctx, zip, name)) { buf = fz_read_archive_entry(ctx, zip, name); } /* Assemble all the pieces */ else { buf = fz_new_buffer(ctx, 512); seen_last = 0; for (count = 0; !seen_last; ++count) { fz_snprintf(path, sizeof path, "%s/[%d].piece", name, count); if (fz_has_archive_entry(ctx, zip, path)) { tmp = fz_read_archive_entry(ctx, zip, path); fz_append_buffer(ctx, buf, tmp); fz_drop_buffer(ctx, tmp); } else { fz_snprintf(path, sizeof path, "%s/[%d].last.piece", name, count); if (fz_has_archive_entry(ctx, zip, path)) { tmp = fz_read_archive_entry(ctx, zip, path); fz_append_buffer(ctx, buf, tmp); fz_drop_buffer(ctx, tmp); seen_last = 1; } else { fz_drop_buffer(ctx, buf); fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find all pieces for part '%s'", partname); } } } } return xps_new_part(ctx, doc, partname, buf); }
static void generate_image(fz_context *ctx, fz_archive *zip, const char *base_uri, fz_html *box, const char *src) { fz_image *img; fz_buffer *buf; char path[2048]; fz_html *flow = box; while (flow->type != BOX_FLOW) flow = flow->up; fz_strlcpy(path, base_uri, sizeof path); fz_strlcat(path, "/", sizeof path); fz_strlcat(path, src, sizeof path); fz_cleanname(path); fz_try(ctx) { buf = fz_read_archive_entry(ctx, zip, path); img = fz_new_image_from_buffer(ctx, buf); fz_drop_buffer(ctx, buf); add_flow_image(ctx, flow, &box->style, img); } fz_catch(ctx) { const char *alt = "[image]"; fz_warn(ctx, "html: cannot add image src='%s'", src); add_flow_word(ctx, flow, &box->style, alt, alt + 7); } }
cbz_page * cbz_load_page(cbz_document *doc, int number) { fz_context *ctx = doc->ctx; unsigned char *data = NULL; cbz_page *page = NULL; if (number < 0 || number >= doc->page_count) return NULL; fz_var(data); fz_var(page); fz_buffer *buf = fz_read_archive_entry(doc->ctx, doc->zip, doc->page[number]); fz_try(ctx) { page = fz_malloc_struct(ctx, cbz_page); page->image = fz_new_image_from_buffer(ctx, buf); } fz_always(ctx) { fz_drop_buffer(doc->ctx, buf); } fz_catch(ctx) { fz_free(ctx, data); cbz_free_page(doc, page); fz_rethrow(ctx); } return page; }
static epub_chapter * epub_parse_chapter(fz_context *ctx, epub_document *doc, const char *path) { fz_archive *zip = doc->zip; fz_buffer *buf = NULL; epub_chapter *ch; char base_uri[2048]; fz_dirname(base_uri, path, sizeof base_uri); ch = fz_malloc_struct(ctx, epub_chapter); ch->path = NULL; ch->html = NULL; ch->next = NULL; fz_var(buf); fz_try(ctx) { buf = fz_read_archive_entry(ctx, zip, path); ch->path = fz_strdup(ctx, path); ch->html = fz_parse_html(ctx, doc->set, zip, base_uri, buf, fz_user_css(ctx)); } fz_always(ctx) fz_drop_buffer(ctx, buf); fz_catch(ctx) { fz_drop_html(ctx, ch->html); fz_free(ctx, ch->path); fz_free(ctx, ch); fz_rethrow(ctx); } return ch; }
static cbz_page * cbz_load_page(fz_context *ctx, cbz_document *doc, int number) { unsigned char *data = NULL; cbz_page *page = NULL; fz_buffer *buf; if (number < 0 || number >= doc->page_count) return NULL; fz_var(data); fz_var(page); buf = fz_read_archive_entry(ctx, doc->zip, doc->page[number]); fz_try(ctx) { page = fz_new_page(ctx, sizeof *page); page->super.bound_page = (fz_page_bound_page_fn *)cbz_bound_page; page->super.run_page_contents = (fz_page_run_page_contents_fn *)cbz_run_page; page->super.drop_page_imp = (fz_page_drop_page_imp_fn *)cbz_drop_page_imp; page->image = fz_new_image_from_buffer(ctx, buf); } fz_always(ctx) { fz_drop_buffer(ctx, buf); } fz_catch(ctx) { fz_free(ctx, data); cbz_drop_page_imp(ctx, page); fz_rethrow(ctx); } return page; }
static fz_css_rule * html_load_css(fz_context *ctx, fz_archive *zip, const char *base_uri, fz_css_rule *css, fz_xml *root) { fz_xml *node; fz_buffer *buf; char path[2048]; for (node = root; node; node = fz_xml_next(node)) { const char *tag = fz_xml_tag(node); if (tag && !strcmp(tag, "link")) { char *rel = fz_xml_att(node, "rel"); if (rel && !fz_strcasecmp(rel, "stylesheet")) { char *type = fz_xml_att(node, "type"); if ((type && !strcmp(type, "text/css")) || !type) { char *href = fz_xml_att(node, "href"); if (href) { fz_strlcpy(path, base_uri, sizeof path); fz_strlcat(path, "/", sizeof path); fz_strlcat(path, href, sizeof path); fz_cleanname(path); buf = fz_read_archive_entry(ctx, zip, path); fz_write_buffer_byte(ctx, buf, 0); css = fz_parse_css(ctx, css, (char*)buf->data, path); fz_drop_buffer(ctx, buf); } } } } if (tag && !strcmp(tag, "style")) { char *s = concat_text(ctx, node); css = fz_parse_css(ctx, css, s, "<style>"); fz_free(ctx, s); } if (fz_xml_down(node)) css = html_load_css(ctx, zip, base_uri, css, fz_xml_down(node)); } return css; }
static fz_css_rule * html_load_css(fz_context *ctx, fz_archive *zip, const char *base_uri, fz_css_rule *css, fz_xml *root) { fz_xml *html, *head, *node; fz_buffer *buf; char path[2048]; fz_var(buf); html = fz_xml_find(root, "html"); head = fz_xml_find_down(html, "head"); for (node = fz_xml_down(head); node; node = fz_xml_next(node)) { if (fz_xml_is_tag(node, "link")) { char *rel = fz_xml_att(node, "rel"); if (rel && !fz_strcasecmp(rel, "stylesheet")) { char *type = fz_xml_att(node, "type"); if ((type && !strcmp(type, "text/css")) || !type) { char *href = fz_xml_att(node, "href"); if (href) { fz_strlcpy(path, base_uri, sizeof path); fz_strlcat(path, "/", sizeof path); fz_strlcat(path, href, sizeof path); fz_urldecode(path); fz_cleanname(path); buf = NULL; fz_try(ctx) { buf = fz_read_archive_entry(ctx, zip, path); fz_write_buffer_byte(ctx, buf, 0); css = fz_parse_css(ctx, css, (char*)buf->data, path); } fz_always(ctx) fz_drop_buffer(ctx, buf); fz_catch(ctx) fz_warn(ctx, "ignoring stylesheet %s", path); } } }
static epub_chapter * epub_parse_chapter(fz_context *ctx, epub_document *doc, const char *path) { fz_archive *zip = doc->zip; fz_buffer *buf; epub_chapter *ch; char base_uri[2048]; fz_dirname(base_uri, path, sizeof base_uri); buf = fz_read_archive_entry(ctx, zip, path); fz_write_buffer_byte(ctx, buf, 0); ch = fz_malloc_struct(ctx, epub_chapter); ch->box = fz_parse_html(ctx, doc->set, zip, base_uri, buf, fz_user_css(ctx)); ch->next = NULL; fz_drop_buffer(ctx, buf); return ch; }
static void epub_parse_ncx(fz_context *ctx, epub_document *doc, const char *path) { fz_archive *zip = doc->zip; fz_buffer *buf; fz_xml *ncx; char base_uri[2048]; unsigned char *data; size_t len; fz_dirname(base_uri, path, sizeof base_uri); buf = fz_read_archive_entry(ctx, zip, path); fz_write_buffer_byte(ctx, buf, 0); len = fz_buffer_storage(ctx, buf, &data); ncx = fz_parse_xml(ctx, data, len, 0); fz_drop_buffer(ctx, buf); doc->outline = epub_parse_ncx_imp(ctx, doc, fz_xml_find_down(ncx, "navMap"), base_uri); fz_drop_xml(ctx, ncx); }
static fz_page * cbz_load_page(fz_context *ctx, fz_document *doc_, int number) { cbz_document *doc = (cbz_document*)doc_; cbz_page *page = NULL; fz_buffer *buf = NULL; if (number < 0 || number >= doc->page_count) fz_throw(ctx, FZ_ERROR_GENERIC, "cannot load page %d", number); fz_var(page); if (doc->arch) buf = fz_read_archive_entry(ctx, doc->arch, doc->page[number]); if (!buf) fz_throw(ctx, FZ_ERROR_GENERIC, "cannot load cbz page"); fz_try(ctx) { page = fz_new_derived_page(ctx, cbz_page); page->super.bound_page = cbz_bound_page; page->super.run_page_contents = cbz_run_page; page->super.drop_page = cbz_drop_page; page->image = fz_new_image_from_buffer(ctx, buf); } fz_always(ctx) { fz_drop_buffer(ctx, buf); } fz_catch(ctx) { fz_drop_page(ctx, (fz_page*)page); fz_rethrow(ctx); } return (fz_page*)page; }
/* * Read and interleave split parts from a ZIP file. */ xps_part * xps_read_part(fz_context *ctx, xps_document *doc, char *partname) { fz_archive *zip = doc->zip; fz_buffer *buf, *tmp; char path[2048]; unsigned char *data; int size; int count; char *name; int seen_last; name = partname; if (name[0] == '/') name ++; /* All in one piece */ if (fz_has_archive_entry(ctx, zip, name)) { buf = fz_read_archive_entry(ctx, zip, name); } /* Assemble all the pieces */ else { buf = fz_new_buffer(ctx, 512); seen_last = 0; for (count = 0; !seen_last; ++count) { sprintf(path, "%s/[%d].piece", name, count); if (fz_has_archive_entry(ctx, zip, path)) { tmp = fz_read_archive_entry(ctx, zip, path); fz_append_buffer(ctx, buf, tmp); fz_drop_buffer(ctx, tmp); } else { sprintf(path, "%s/[%d].last.piece", name, count); if (fz_has_archive_entry(ctx, zip, path)) { tmp = fz_read_archive_entry(ctx, zip, path); fz_append_buffer(ctx, buf, tmp); fz_drop_buffer(ctx, tmp); seen_last = 1; } else { fz_drop_buffer(ctx, buf); fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find all pieces for part '%s'", partname); } } } } fz_write_buffer_byte(ctx, buf, 0); /* zero-terminate */ /* take over the data */ data = buf->data; /* size doesn't include the added zero-terminator */ size = buf->len - 1; fz_free(ctx, buf); return xps_new_part(ctx, doc, partname, data, size); }
void fz_add_css_font_face(fz_context *ctx, fz_html_font_set *set, fz_archive *zip, const char *base_uri, fz_css_property *declaration) { fz_html_font_face *custom; fz_css_property *prop; fz_font *font = NULL; fz_buffer *buf = NULL; int is_bold, is_italic; char path[2048]; const char *family = "serif"; const char *weight = "normal"; const char *style = "normal"; const char *src = NULL; for (prop = declaration; prop; prop = prop->next) { if (!strcmp(prop->name, "font-family")) family = prop->value->data; if (!strcmp(prop->name, "font-weight")) weight = prop->value->data; if (!strcmp(prop->name, "font-style")) style = prop->value->data; if (!strcmp(prop->name, "src")) src = prop->value->data; } if (!src) return; is_bold = is_bold_from_font_weight(weight); is_italic = is_italic_from_font_style(style); fz_strlcpy(path, base_uri, sizeof path); fz_strlcat(path, "/", sizeof path); fz_strlcat(path, src, sizeof path); fz_urldecode(path); fz_cleanname(path); for (custom = set->custom; custom; custom = custom->next) if (!strcmp(custom->src, path) && !strcmp(custom->family, family) && custom->is_bold == is_bold && custom->is_italic == is_italic) return; /* already loaded */ printf("epub: @font-face: family='%s' b=%d i=%d src=%s\n", family, is_bold, is_italic, src); fz_var(buf); fz_var(font); fz_try(ctx) { if (fz_has_archive_entry(ctx, zip, path)) buf = fz_read_archive_entry(ctx, zip, path); else buf = fz_read_file(ctx, src); font = fz_new_font_from_buffer(ctx, src, buf, 0, 0); fz_add_html_font_face(ctx, set, family, is_bold, is_italic, path, font); } fz_always(ctx) { fz_drop_buffer(ctx, buf); fz_drop_font(ctx, font); } fz_catch(ctx) { fz_warn(ctx, "cannot load font-face: %s", src); } }
static void epub_parse_header(fz_context *ctx, epub_document *doc) { fz_archive *zip = doc->zip; fz_buffer *buf; fz_xml *container_xml, *content_opf; fz_xml *container, *rootfiles, *rootfile; fz_xml *package, *manifest, *spine, *itemref, *metadata; char base_uri[2048]; const char *full_path; const char *version; char ncx[2048], s[2048]; epub_chapter **tailp; size_t len; unsigned char *data; if (fz_has_archive_entry(ctx, zip, "META-INF/rights.xml")) fz_throw(ctx, FZ_ERROR_GENERIC, "EPUB is locked by DRM"); if (fz_has_archive_entry(ctx, zip, "META-INF/encryption.xml")) fz_throw(ctx, FZ_ERROR_GENERIC, "EPUB is locked by DRM"); /* parse META-INF/container.xml to find OPF */ buf = fz_read_archive_entry(ctx, zip, "META-INF/container.xml"); fz_write_buffer_byte(ctx, buf, 0); len = fz_buffer_storage(ctx, buf, &data); container_xml = fz_parse_xml(ctx, data, len, 0); fz_drop_buffer(ctx, buf); container = fz_xml_find(container_xml, "container"); rootfiles = fz_xml_find_down(container, "rootfiles"); rootfile = fz_xml_find_down(rootfiles, "rootfile"); full_path = fz_xml_att(rootfile, "full-path"); if (!full_path) fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find root file in EPUB"); fz_dirname(base_uri, full_path, sizeof base_uri); /* parse OPF to find NCX and spine */ buf = fz_read_archive_entry(ctx, zip, full_path); fz_write_buffer_byte(ctx, buf, 0); len = fz_buffer_storage(ctx, buf, &data); content_opf = fz_parse_xml(ctx, data, len, 0); fz_drop_buffer(ctx, buf); package = fz_xml_find(content_opf, "package"); version = fz_xml_att(package, "version"); if (!version || strcmp(version, "2.0")) fz_warn(ctx, "unknown epub version: %s", version ? version : "<none>"); metadata = fz_xml_find_down(package, "metadata"); if (metadata) { doc->dc_title = find_metadata(ctx, metadata, "title"); doc->dc_creator = find_metadata(ctx, metadata, "creator"); } manifest = fz_xml_find_down(package, "manifest"); spine = fz_xml_find_down(package, "spine"); if (path_from_idref(ncx, manifest, base_uri, fz_xml_att(spine, "toc"), sizeof ncx)) { epub_parse_ncx(ctx, doc, ncx); } doc->spine = NULL; tailp = &doc->spine; itemref = fz_xml_find_down(spine, "itemref"); while (itemref) { if (path_from_idref(s, manifest, base_uri, fz_xml_att(itemref, "idref"), sizeof s)) { *tailp = epub_parse_chapter(ctx, doc, s); tailp = &(*tailp)->next; } itemref = fz_xml_find_next(itemref, "itemref"); } fz_drop_xml(ctx, container_xml); fz_drop_xml(ctx, content_opf); }
static void epub_parse_header(fz_context *ctx, epub_document *doc) { fz_archive *zip = doc->zip; fz_buffer *buf; fz_xml *container_xml, *content_opf; fz_xml *container, *rootfiles, *rootfile; fz_xml *package, *manifest, *spine, *itemref; char base_uri[2048]; const char *full_path; const char *version; char ncx[2048], s[2048]; epub_chapter *head, *tail; if (fz_has_archive_entry(ctx, zip, "META-INF/rights.xml")) fz_throw(ctx, FZ_ERROR_GENERIC, "EPUB is locked by DRM"); if (fz_has_archive_entry(ctx, zip, "META-INF/encryption.xml")) fz_throw(ctx, FZ_ERROR_GENERIC, "EPUB is locked by DRM"); /* parse META-INF/container.xml to find OPF */ buf = fz_read_archive_entry(ctx, zip, "META-INF/container.xml"); fz_write_buffer_byte(ctx, buf, 0); container_xml = fz_parse_xml(ctx, buf->data, buf->len, 0); fz_drop_buffer(ctx, buf); container = fz_xml_find(container_xml, "container"); rootfiles = fz_xml_find_down(container, "rootfiles"); rootfile = fz_xml_find_down(rootfiles, "rootfile"); full_path = fz_xml_att(rootfile, "full-path"); if (!full_path) fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find root file in EPUB"); printf("epub: found root: %s\n", full_path); fz_dirname(base_uri, full_path, sizeof base_uri); /* parse OPF to find NCX and spine */ buf = fz_read_archive_entry(ctx, zip, full_path); fz_write_buffer_byte(ctx, buf, 0); content_opf = fz_parse_xml(ctx, buf->data, buf->len, 0); fz_drop_buffer(ctx, buf); package = fz_xml_find(content_opf, "package"); version = fz_xml_att(package, "version"); if (!version || strcmp(version, "2.0")) fz_warn(ctx, "unknown epub version: %s", version ? version : "<none>"); manifest = fz_xml_find_down(package, "manifest"); spine = fz_xml_find_down(package, "spine"); if (path_from_idref(ncx, manifest, base_uri, fz_xml_att(spine, "toc"), sizeof ncx)) { /* TODO: parse NCX to create fz_outline */ printf("epub: found outline: %s\n", ncx); } head = tail = NULL; itemref = fz_xml_find_down(spine, "itemref"); while (itemref) { if (path_from_idref(s, manifest, base_uri, fz_xml_att(itemref, "idref"), sizeof s)) { printf("epub: found spine %s\n", s); if (!head) head = tail = epub_parse_chapter(ctx, doc, s); else tail = tail->next = epub_parse_chapter(ctx, doc, s); } itemref = fz_xml_find_next(itemref, "itemref"); } doc->spine = head; printf("epub: done.\n"); fz_drop_xml(ctx, container_xml); fz_drop_xml(ctx, content_opf); }