static void do_picture(deark *c, lctx *d, struct para_info *pinfo) { unsigned int mm; int orig_file_count, curr_file_count; i64 pos = pinfo->thisparapos; if(pinfo->thisparalen<2) goto done; mm = (unsigned int)de_getu16le(pos); de_dbg(c, "picture storage type: 0x%04x (%s)", mm, get_picture_storage_type_name(mm)); orig_file_count = get_next_output_file_id(c); switch(mm) { case 0x88: do_picture_metafile(c, d, pinfo); break; case 0xe3: do_picture_bitmap(c, d, pinfo); break; case 0xe4: do_picture_ole(c, d, pinfo); break; default: de_err(c, "Picture storage type 0x%04x not supported", mm); } if(d->html_outf) { char id_str[24]; // We want to include the image file ID numbers in the HTML document, // so that the user can figure out which image goes where. // To deduce the ID number, we watch the global file ID counter. // It's totally a hack, but unfortunately our high level functions that // create an output file (e.g. de_convert_and_write_image_bilevel) do // not have a way return the ID number of the file they created. It // would be a lot of trouble to create such a mechanism. do_emit_raw_sz(c, d, pinfo, "<p class=r>picture"); curr_file_count = get_next_output_file_id(c); if(curr_file_count == orig_file_count+1) { de_snprintf(id_str, sizeof(id_str), " %d", orig_file_count); } else if(curr_file_count == orig_file_count) { de_strlcpy(id_str, " (not extracted)", sizeof(id_str)); } else { de_snprintf(id_str, sizeof(id_str), "s %d-%d", orig_file_count, curr_file_count-1); } do_emit_raw_sz(c, d, pinfo, id_str); do_emit_raw_sz(c, d, pinfo, "</p>\n"); } done: ; }
static void handler_iccp(deark *c, lctx *d, struct handler_params *hp) { u8 cmpr_type; dbuf *f = NULL; struct de_stringreaderdata *prof_name_srd = NULL; de_finfo *fi = NULL; char prof_name2[100]; size_t prof_name2_strlen; i64 pos = hp->dpos; prof_name_srd = dbuf_read_string(c->infile, pos, 80, 80, DE_CONVFLAG_STOP_AT_NUL, DE_ENCODING_LATIN1); if(!prof_name_srd->found_nul) goto done; de_dbg(c, "profile name: \"%s\"", ucstring_getpsz_d(prof_name_srd->str)); pos += prof_name_srd->bytes_consumed; // Our working copy, to use as part of the filename. de_strlcpy(prof_name2, prof_name_srd->sz, sizeof(prof_name2)); if(!de_strcasecmp(prof_name2, "icc") || !de_strcasecmp(prof_name2, "icc profile")) { prof_name2[0] = '\0'; // Ignore generic name. } prof_name2_strlen = de_strlen(prof_name2); if(prof_name2_strlen>=5) { if(de_sz_has_ext(prof_name2, "icc")) { // If the name already ends in ".icc", chop it off so that we don't end // up with a double ".icc.icc" file extension. prof_name2[prof_name2_strlen-4] = '\0'; } } cmpr_type = de_getbyte_p(&pos); if(cmpr_type!=0) return; fi = de_finfo_create(c); if(c->filenames_from_file && prof_name2[0]) de_finfo_set_name_from_sz(c, fi, prof_name2, 0, DE_ENCODING_LATIN1); f = dbuf_create_output_file(c, "icc", fi, DE_CREATEFLAG_IS_AUX); de_decompress_deflate(c->infile, pos, hp->dlen - pos, f, 0, NULL, d->is_CgBI ? 0 : DE_DEFLATEFLAG_ISZLIB); done: dbuf_close(f); de_finfo_destroy(c, fi); de_destroy_stringreaderdata(c, prof_name_srd); }
static char *get_hdrtype_descr(deark *c, char *buf, size_t buflen, u8 hdr_type) { if(hdr_type==0) { de_strlcpy(buf, "", buflen); } else { de_ucstring *s = NULL; s = ucstring_create(c); if(hdr_type&0x01) ucstring_append_flags_item(s, "continuation page"); if(hdr_type&0x02) ucstring_append_flags_item(s, "first page"); if(hdr_type&0x04) ucstring_append_flags_item(s, "last page"); de_snprintf(buf, buflen, " (%s)", ucstring_getpsz(s)); ucstring_destroy(s); } return buf; }
void de_fmtutil_get_bmp_compression_name(u32 code, char *s, size_t s_len, int is_os2v2) { const char *name1 = "?"; switch(code) { case 0: name1 = "BI_RGB, uncompressed"; break; case 1: name1 = "BI_RLE8"; break; case 2: name1 = "BI_RLE4"; break; case 3: if(is_os2v2) name1 = "Huffman 1D"; else name1 = "BI_BITFIELDS, uncompressed"; break; case 4: if(is_os2v2) name1 = "RLE24"; else name1 = "BI_JPEG"; break; case 5: name1 = "BI_PNG"; break; } de_strlcpy(s, name1, s_len); }
static void do_icon(deark *c, lctx *d, struct page_ctx *pg) { de_int64 expected_image_size; int is_compressed; if(!pg->type_info) return; // Shouldn't happen. de_strlcpy(pg->filename_token, "", sizeof(pg->filename_token)); if(pg->type_info->image_type==IMGTYPE_MASK) { de_dbg(c, "transparency mask\n"); return; } if(pg->type_info->image_type==IMGTYPE_EMBEDDED_FILE) { de_snprintf(pg->filename_token, sizeof(pg->filename_token), "%dx%d", (int)pg->type_info->width, (int)pg->type_info->height); do_extract_png_or_jp2(c, d, pg); return; } if(pg->type_info->image_type!=IMGTYPE_IMAGE && pg->type_info->image_type!=IMGTYPE_IMAGE_AND_MASK) { return; } // At this point we know it's a regular image (or an image+mask) // Note - This pg->rowspan is arguably incorrect for 24-bit images, since // rows aren't stored contiguously. pg->rowspan = ((pg->type_info->bpp * pg->type_info->width)+7)/8; expected_image_size = pg->rowspan * pg->type_info->height; if(pg->type_info->image_type==IMGTYPE_IMAGE_AND_MASK) { expected_image_size *= 2; } is_compressed = (pg->type_info->bpp==24) ? 1 : 0; if(!is_compressed) { if(pg->image_len < expected_image_size) { de_err(c, "(Image #%d) Premature end of image (expected %d bytes, found %d)\n", pg->image_num, (int)expected_image_size, (int)pg->image_len); return; } if(pg->image_len > expected_image_size) { de_warn(c, "(Image #%d) Extra image data found (expected %d bytes, found %d)\n", pg->image_num, (int)expected_image_size, (int)pg->image_len); } } find_mask(c, d, pg); de_snprintf(pg->filename_token, sizeof(pg->filename_token), "%dx%dx%d", (int)pg->type_info->width, (int)pg->type_info->height, (int)pg->type_info->bpp); de_dbg(c, "image dimensions: %dx%d, bpp: %d\n", pg->type_info->width, pg->type_info->height, pg->type_info->bpp); if(pg->type_info->bpp==1 || pg->type_info->bpp==4 || pg->type_info->bpp==8) { do_decode_1_4_8bit(c, d, pg); return; } else if(pg->type_info->bpp==24) { do_decode_24bit(c, d, pg); return; } de_warn(c, "(Image #%d) Image type '%s' is not supported\n", pg->image_num, pg->code4cc.id_printable); }
static void de_run_png(deark *c, de_module_params *mparams) { lctx *d = NULL; i64 pos; i32 prev_chunk_id = 0; int suppress_idat_dbg = 0; d = de_malloc(c, sizeof(lctx)); de_dbg(c, "signature at %d", 0); de_dbg_indent(c, 1); d->fmt = do_identify_png_internal(c); switch(d->fmt) { case DE_PNGFMT_PNG: d->fmt_name = "PNG"; break; case DE_PNGFMT_JNG: d->fmt_name = "JNG"; break; case DE_PNGFMT_MNG: d->fmt_name = "MNG"; break; default: d->fmt_name = "?"; } de_dbg(c, "format: %s", d->fmt_name); if(d->fmt>0) { de_declare_fmt(c, d->fmt_name); } de_dbg_indent(c, -1); pos = 8; while(pos < c->infile->len) { struct de_fourcc chunk4cc; struct handler_params hp; u32 crc; char nbuf[80]; de_zeromem(&hp, sizeof(struct handler_params)); hp.dlen = de_getu32be(pos); if(pos + 8 + hp.dlen + 4 > c->infile->len) break; dbuf_read_fourcc(c->infile, pos+4, &chunk4cc, 4, 0x0); hp.cti = get_chunk_type_info(chunk4cc.id); if(chunk4cc.id==CODE_IDAT && suppress_idat_dbg) { ; } else if(chunk4cc.id==CODE_IDAT && prev_chunk_id==CODE_IDAT && c->debug_level<2) { de_dbg(c, "(more IDAT chunks follow)"); suppress_idat_dbg = 1; } else { if(hp.cti) { if(hp.cti->name) { de_snprintf(nbuf, sizeof(nbuf), " (%s)", hp.cti->name); } else { de_strlcpy(nbuf, "", sizeof(nbuf)); } } else { de_strlcpy(nbuf, " (?)", sizeof(nbuf)); } de_dbg(c, "chunk '%s'%s at %d dpos=%d dlen=%d", chunk4cc.id_dbgstr, nbuf, (int)pos, (int)(pos+8), (int)hp.dlen); if(chunk4cc.id!=CODE_IDAT) suppress_idat_dbg = 0; } pos += 8; de_dbg_indent(c, 1); hp.dpos = pos; hp.chunk4cc = &chunk4cc; if(hp.cti) { if(hp.cti->handler_fn) { hp.cti->handler_fn(c, d, &hp); } } else { if(c->debug_level>=2) { handler_hexdump(c, d, &hp); } } pos += hp.dlen; crc = (u32)de_getu32be(pos); de_dbg2(c, "crc32 (reported): 0x%08x", (unsigned int)crc); pos += 4; de_dbg_indent(c, -1); prev_chunk_id = chunk4cc.id; } de_free(c, d); }
dbuf *dbuf_create_output_file(deark *c, const char *ext, de_finfo *fi, unsigned int createflags) { char nbuf[500]; char msgbuf[200]; dbuf *f; const char *basefn; int file_index; u8 is_directory = 0; char *name_from_finfo = NULL; i64 name_from_finfo_len = 0; if(ext && fi && fi->original_filename_flag) { de_dbg(c, "[internal warning: Incorrect use of create_output_file]"); } f = de_malloc(c, sizeof(dbuf)); f->c = c; f->max_len_hard = c->max_output_file_size; f->is_managed = 1; if(fi && fi->is_directory) { is_directory = 1; } if(is_directory && !c->keep_dir_entries) { de_dbg(c, "skipping 'directory' file"); f->btype = DBUF_TYPE_NULL; goto done; } if(c->extract_policy==DE_EXTRACTPOLICY_MAINONLY) { if(createflags&DE_CREATEFLAG_IS_AUX) { de_dbg(c, "skipping 'auxiliary' file"); f->btype = DBUF_TYPE_NULL; goto done; } } else if(c->extract_policy==DE_EXTRACTPOLICY_AUXONLY) { if(!(createflags&DE_CREATEFLAG_IS_AUX)) { de_dbg(c, "skipping 'main' file"); f->btype = DBUF_TYPE_NULL; goto done; } } file_index = c->file_count; c->file_count++; basefn = c->base_output_filename ? c->base_output_filename : "output"; if(fi && ucstring_isnonempty(fi->file_name_internal)) { name_from_finfo_len = 1 + ucstring_count_utf8_bytes(fi->file_name_internal); name_from_finfo = de_malloc(c, name_from_finfo_len); ucstring_to_sz(fi->file_name_internal, name_from_finfo, (size_t)name_from_finfo_len, 0, DE_ENCODING_UTF8); } if(c->output_style==DE_OUTPUTSTYLE_ARCHIVE && !c->base_output_filename && fi && fi->is_directory && (fi->is_root_dir || (fi->detect_root_dot_dir && fi->orig_name_was_dot))) { de_strlcpy(nbuf, ".", sizeof(nbuf)); } else if(c->output_style==DE_OUTPUTSTYLE_ARCHIVE && !c->base_output_filename && fi && fi->original_filename_flag && name_from_finfo) { // TODO: This is a "temporary" hack to allow us to, when both reading from // and writing to an archive format, use some semblance of the correct // filename (instead of "output.xxx.yyy"). // There are some things that we don't handle optimally, such as // subdirectories. // A major redesign of the file naming logic would be good. de_strlcpy(nbuf, name_from_finfo, sizeof(nbuf)); } else { char fn_suffix[256]; if(ext && name_from_finfo) { de_snprintf(fn_suffix, sizeof(fn_suffix), "%s.%s", name_from_finfo, ext); } else if(ext) { de_strlcpy(fn_suffix, ext, sizeof(fn_suffix)); } else if(is_directory && name_from_finfo) { de_snprintf(fn_suffix, sizeof(fn_suffix), "%s.dir", name_from_finfo); } else if(name_from_finfo) { de_strlcpy(fn_suffix, name_from_finfo, sizeof(fn_suffix)); } else if(is_directory) { de_strlcpy(fn_suffix, "dir", sizeof(fn_suffix)); } else { de_strlcpy(fn_suffix, "bin", sizeof(fn_suffix)); } de_snprintf(nbuf, sizeof(nbuf), "%s.%03d.%s", basefn, file_index, fn_suffix); } f->name = de_strdup(c, nbuf); if(fi) { // The finfo object passed to us at file creation is not required to // remain valid, so make a copy of anything in it that we might need // later. f->fi_copy = de_finfo_create(c); finfo_shallow_copy(c, fi, f->fi_copy); // Here's where we respect the -intz option, by using it to convert to // UTC in some cases. if(f->fi_copy->mod_time.is_valid && f->fi_copy->mod_time.tzcode==DE_TZCODE_LOCAL && c->input_tz_offs_seconds!=0) { de_timestamp_cvt_to_utc(&f->fi_copy->mod_time, -c->input_tz_offs_seconds); } if(f->fi_copy->image_mod_time.is_valid && f->fi_copy->image_mod_time.tzcode==DE_TZCODE_LOCAL && c->input_tz_offs_seconds!=0) { de_timestamp_cvt_to_utc(&f->fi_copy->image_mod_time, -c->input_tz_offs_seconds); } } if(file_index < c->first_output_file) { f->btype = DBUF_TYPE_NULL; goto done; } if(c->max_output_files>=0 && file_index >= c->first_output_file + c->max_output_files) { f->btype = DBUF_TYPE_NULL; goto done; } c->num_files_extracted++; if(c->extrlist_dbuf) { dbuf_printf(c->extrlist_dbuf, "%s\n", f->name); dbuf_flush(c->extrlist_dbuf); } if(c->list_mode) { f->btype = DBUF_TYPE_NULL; if(c->list_mode_include_file_id) { de_msg(c, "%d:%s", file_index, f->name); } else { de_msg(c, "%s", f->name); } goto done; } if(c->output_style==DE_OUTPUTSTYLE_ARCHIVE && c->archive_fmt==DE_ARCHIVEFMT_TAR) { de_info(c, "Adding %s to TAR file", f->name); f->btype = DBUF_TYPE_ODBUF; // A dummy max_len_hard value. The parent will do the checking. f->max_len_hard = DE_DUMMY_MAX_FILE_SIZE; f->writing_to_tar_archive = 1; de_tar_start_member_file(c, f); } else if(c->output_style==DE_OUTPUTSTYLE_ARCHIVE) { // ZIP i64 initial_alloc; de_info(c, "Adding %s to ZIP file", f->name); f->btype = DBUF_TYPE_MEMBUF; f->max_len_hard = DE_MAX_MEMBUF_SIZE; if(is_directory) { // A directory entry is not expected to have any data associated // with it (besides the files it contains). initial_alloc = 16; } else { initial_alloc = 65536; } f->membuf_buf = de_malloc(c, initial_alloc); f->membuf_alloc = initial_alloc; f->write_memfile_to_zip_archive = 1; } else if(c->output_style==DE_OUTPUTSTYLE_STDOUT) { de_info(c, "Writing %s to [stdout]", f->name); f->btype = DBUF_TYPE_STDOUT; // TODO: Should we increase f->max_len_hard? f->fp = stdout; } else { de_info(c, "Writing %s", f->name); f->btype = DBUF_TYPE_OFILE; f->fp = de_fopen_for_write(c, f->name, msgbuf, sizeof(msgbuf), c->overwrite_mode, 0); if(!f->fp) { de_err(c, "Failed to write %s: %s", f->name, msgbuf); f->btype = DBUF_TYPE_NULL; } } done: de_free(c, name_from_finfo); return f; }
static int do_element(deark *c, lctx *d, i64 pos1, i64 nbytes_avail, i64 *bytes_used) { i64 ele_id; i64 ele_dlen; i64 pos = pos1; int retval = 0; const struct ele_id_info *einfo; const char *ele_name; int saved_indent_level; unsigned int dtype; int should_call_start_handler = 0; int should_decode_default = 0; int should_print_NOT_DECODING_msg = 0; int should_call_end_handler = 0; int len_ret; char tmpbuf[80]; de_dbg_indent_save(c, &saved_indent_level); de_dbg(c, "element at %"I64_FMT", max_len=%"I64_FMT, pos1, nbytes_avail); de_dbg_indent(c, 1); if(1!=get_var_size_int(c->infile, &ele_id, &pos, nbytes_avail)) { de_err(c, "Failed to read ID of element at %"I64_FMT, pos1); goto done; } einfo = find_ele_id_info(ele_id); if(einfo && einfo->name) ele_name = einfo->name; else ele_name = "?"; if(einfo) dtype = einfo->flags & 0xff; else dtype = 0; de_dbg(c, "id: 0x%"U64_FMTx" (%s)", (u64)ele_id, ele_name); if(d->show_encoded_id) { print_encoded_id(c, d, pos1, pos-pos1); } len_ret = get_var_size_int(c->infile, &ele_dlen, &pos, pos1+nbytes_avail-pos); if(len_ret==1) { de_snprintf(tmpbuf, sizeof(tmpbuf), "%"I64_FMT, ele_dlen); } else if(len_ret==2) { ele_dlen = c->infile->len - pos; de_strlcpy(tmpbuf, "unknown", sizeof(tmpbuf)); } else { de_err(c, "Failed to read length of element at %"I64_FMT, pos1); goto done; } de_dbg(c, "data at %"I64_FMT", dlen=%s, type=%s", pos, tmpbuf, get_type_name(dtype)); if(len_ret==2) { // EBML does not have any sort of end-of-master-element marker, which // presents a problem when a master element has an unknown length. // // EBML's "solution" is this: // "The end of an Unknown-Sized Element is determined by whichever // comes first: the end of the file or the beginning of the next EBML // Element, defined by this document or the corresponding EBML Schema, // that is not independently valid as Descendant Element of the // Unknown-Sized Element." // // This would appear to require a sophisticated, high-level algorithm // with 100% complete knowledge of the latest version of the specific // application format. We do not have such an algorithm. de_err(c, "EBML files with unknown-length elements are not supported"); goto done; } if(pos + ele_dlen > c->infile->len) { de_err(c, "Element at %"I64_FMT" goes beyond end of file", pos1); goto done; } if(einfo) { should_decode_default = 1; if(einfo->flags & 0x0200) { should_decode_default = 0; } else if((einfo->flags & 0x0100) && c->debug_level<2) { should_decode_default = 0; should_print_NOT_DECODING_msg = 1; } } if(should_decode_default && einfo && einfo->hfn) { should_call_start_handler = 1; } if(should_decode_default && einfo && einfo->hfn && (einfo->flags & 0x0800)) { should_call_end_handler = 1; } if(should_call_start_handler) { struct handler_params hp; de_zeromem(&hp, sizeof(struct handler_params)); hp.dpos = pos; hp.dlen = ele_dlen; einfo->hfn(c, d, &hp); } if(should_decode_default) { switch(dtype) { case TY_m: do_element_sequence(c, d, pos, ele_dlen); break; case TY_u: decode_uint(c, d, einfo, pos, ele_dlen); break; case TY_f: decode_float(c, d, einfo, pos, ele_dlen); break; case TY_8: decode_string(c, d, einfo, pos, ele_dlen, DE_ENCODING_UTF8); break; case TY_s: decode_string(c, d, einfo, pos, ele_dlen, DE_ENCODING_PRINTABLEASCII); break; case TY_d: decode_date(c, d, einfo, pos, ele_dlen); break; } } else { if(should_print_NOT_DECODING_msg) { de_dbg(c, "[not decoding this element]"); } } if(should_call_end_handler) { struct handler_params hp; de_zeromem(&hp, sizeof(struct handler_params)); hp.dpos = pos; hp.dlen = ele_dlen; hp.end_flag = 1; einfo->hfn(c, d, &hp); } pos += ele_dlen; *bytes_used = pos - pos1; retval = 1; done: de_dbg_indent_restore(c, saved_indent_level); return retval; }