static void do_extract_png_or_jp2(deark *c, lctx *d, struct page_ctx *pg) { de_byte buf[8]; de_finfo *fi = NULL; de_dbg(c, "Trying to extract file at %d\n", (int)pg->image_pos); // Detect the format de_read(buf, pg->image_pos, sizeof(buf)); fi = de_finfo_create(c); de_finfo_set_name_from_sz(c, fi, pg->filename_token, DE_ENCODING_ASCII); if(buf[4]=='j' && buf[5]=='P') { dbuf_create_file_from_slice(c->infile, pg->image_pos, pg->image_len, "jp2", fi, 0); } else if(buf[0]==0x89 && buf[1]==0x50) { dbuf_create_file_from_slice(c->infile, pg->image_pos, pg->image_len, "png", fi, 0); } else { de_err(c, "(Image #%d) Unidentified file format\n", pg->image_num); } de_finfo_destroy(c, fi); }
static void handler_attachedfile_end(deark *c, lctx *d) { de_finfo *fi = NULL; if(!d->attachmentctx) goto done; if(d->attachmentctx->data_pos==0) goto done; fi = de_finfo_create(c); // TODO: We could do a better job of constructing filenames in various // situations. if(d->attachmentctx->filename && (d->attachmentctx->filename->len > 0) && c->filenames_from_file) { de_finfo_set_name_from_ucstring(c, fi, d->attachmentctx->filename, 0); } else { de_finfo_set_name_from_sz(c, fi, "bin", 0, DE_ENCODING_UTF8); } dbuf_create_file_from_slice(c->infile, d->attachmentctx->data_pos, d->attachmentctx->data_len, NULL, fi, DE_CREATEFLAG_IS_AUX); done: de_finfo_destroy(c, fi); destroy_attachment_data(c, d); }
static void do_text_data(deark *c, lctx *d, de_finfo *fi, de_int64 text_pos, de_int64 text_len) { if(c->extract_level<2) return; if(text_len<1) return; if(text_pos + text_len > c->infile->len) return; dbuf_create_file_from_slice(c->infile, text_pos, text_len, "txt", fi, 0); }
static void do_read_embedded_profile(deark *c, lctx *d) { de_dbg(c, "embedded profile at %d, size=%d\n", (int)d->profile_offset, (int)d->profile_size); de_dbg_indent(c, 1); dbuf_create_file_from_slice(c->infile, d->profile_offset, d->profile_size, "icc", NULL, DE_CREATEFLAG_IS_AUX); de_dbg_indent(c, -1); }
// flags: // 0 = default behavior (currently: decode unless -opt extractplist was used) void de_fmtutil_handle_plist(deark *c, dbuf *f, i64 pos, i64 len, de_finfo *fi, unsigned int flags) { if(de_get_ext_option_bool(c, "extractplist", 0)) { dbuf_create_file_from_slice(f, pos, len, fi?NULL:"plist", fi, DE_CREATEFLAG_IS_AUX); return; } de_run_module_by_id_on_slice(c, "plist", NULL, f, pos, len); }
static void do_lump_extract(deark *c, lctx *d, i64 dpos, i64 dlen, struct de_stringreaderdata *srd) { de_finfo *fi = NULL; // 0-length lumps are assumed to be special "virtual" lumps. if(dlen<=0) return; if(dpos<0 || dpos>=c->infile->len || dpos+dlen>c->infile->len) return; fi = de_finfo_create(c); de_finfo_set_name_from_ucstring(c, fi, srd->str, 0); fi->original_filename_flag = 1; dbuf_create_file_from_slice(c->infile, dpos, dlen, NULL, fi, 0); de_finfo_destroy(c, fi); }
// If oparams is not NULL, if must be initialized by the caller. If the data is // decoded, oparams will be used by the submodule, and values may be returned in // it. // flags: // 0 = default behavior (currently: always decode) // 1 = always write to file // 2 = this came from our TIFF-encapsulated format void de_fmtutil_handle_photoshop_rsrc2(deark *c, dbuf *f, i64 pos, i64 len, unsigned int flags, struct de_module_out_params *oparams) { int should_decode; int should_extract; int extract_fmt = 1; // 0=raw, 1=TIFF-wrapped if(flags&0x1) { should_decode = 0; should_extract = 1; } else if(de_get_ext_option_bool(c, "extract8bim", 0)) { should_extract = 1; should_decode = 0; if(flags&0x2) { // Avoid "extracting" in a way that would just recreate the exact same file. extract_fmt = 0; } } else { should_decode = 1; should_extract = 0; } if(should_decode) { de_module_params *mparams = NULL; mparams = de_malloc(c, sizeof(de_module_params)); mparams->in_params.codes = "R"; if(oparams) { // Since mparams->out_params is an embedded struct, not a pointer, // we have to copy oparam's fields to and from it. mparams->out_params = *oparams; // struct copy } de_run_module_by_id_on_slice(c, "psd", mparams, f, pos, len); if(oparams) { *oparams = mparams->out_params; // struct copy } de_free(c, mparams); } if(should_extract && extract_fmt==0) { dbuf_create_file_from_slice(f, pos, len, "8bim", NULL, DE_CREATEFLAG_IS_AUX); } else if(should_extract && extract_fmt==1) { wrap_in_tiff(c, f, pos, len, "Deark extracted 8BIM", 34377, "8bimtiff", DE_CREATEFLAG_IS_AUX); } }
// Handle some box types that might be common to multiple formats. // This function should be called as needed by the client's box handler function. // TODO: A way to identify (name) the boxes that we handle here. int de_fmtutil_default_box_handler(deark *c, struct de_boxesctx *bctx) { struct de_boxdata *curbox = bctx->curbox; if(curbox->is_uuid) { if(!de_memcmp(curbox->uuid, "\xb1\x4b\xf8\xbd\x08\x3d\x4b\x43\xa5\xae\x8c\xd7\xd5\xa6\xce\x03", 16)) { de_dbg(c, "GeoTIFF data at %d, len=%d", (int)curbox->payload_pos, (int)curbox->payload_len); dbuf_create_file_from_slice(bctx->f, curbox->payload_pos, curbox->payload_len, "geo.tif", NULL, DE_CREATEFLAG_IS_AUX); } else if(!de_memcmp(curbox->uuid, "\xbe\x7a\xcf\xcb\x97\xa9\x42\xe8\x9c\x71\x99\x94\x91\xe3\xaf\xac", 16)) { de_dbg(c, "XMP data at %d, len=%d", (int)curbox->payload_pos, (int)curbox->payload_len); dbuf_create_file_from_slice(bctx->f, curbox->payload_pos, curbox->payload_len, "xmp", NULL, DE_CREATEFLAG_IS_AUX); } else if(!de_memcmp(curbox->uuid, "\x2c\x4c\x01\x00\x85\x04\x40\xb9\xa0\x3e\x56\x21\x48\xd6\xdf\xeb", 16)) { de_dbg(c, "Photoshop resources at %d, len=%d", (int)curbox->payload_pos, (int)curbox->payload_len); de_fmtutil_handle_photoshop_rsrc(c, bctx->f, curbox->payload_pos, curbox->payload_len, 0x0); } else if(!de_memcmp(curbox->uuid, "\x05\x37\xcd\xab\x9d\x0c\x44\x31\xa7\x2a\xfa\x56\x1f\x2a\x11\x3e", 16)) { de_dbg(c, "Exif data at %d, len=%d", (int)curbox->payload_pos, (int)curbox->payload_len); de_fmtutil_handle_exif(c, curbox->payload_pos, curbox->payload_len); } } return 1; }
static void do_picture_metafile(deark *c, lctx *d, struct para_info *pinfo) { i64 pos = pinfo->thisparapos; i64 cbHeader, cbSize; cbHeader = de_getu16le(pos+30); de_dbg(c, "cbHeader: %d", (int)cbHeader); cbSize = de_getu32le(pos+32); de_dbg(c, "cbSize: %d", (int)cbSize); if(cbHeader+cbSize <= pinfo->thisparalen) { dbuf_create_file_from_slice(c->infile, pos+cbHeader, cbSize, "wmf", NULL, 0); } }
static void handle_embedded_file(deark *c, lctx *d, de_int64 offset, de_int64 len) { de_byte buf[16]; const char *ext; int extract_this_file; int is_pic; de_dbg(c, "embedded file at %d, len=%d\n", (int)offset, (int)len); is_pic = 0; ext = "bin"; extract_this_file = 0; if(len>0 && c->extract_level>=2) extract_this_file = 1; // As far as I can tell, there's no way to tell the type of an // embedded file, except by sniffing it. de_read(buf, offset, 16); if(len>=8) { if(!de_memcmp(buf, "PIC\xdc\x30\x30", 6)) { // Looks like a PIC file is_pic = 1; ext = "pic"; extract_this_file = 1; } } if(extract_this_file) { if(is_pic && d->convert_images) { // Convert PIC to PNG. // For consistency, this option shouldn't exist. But I'm not sure that // PIC files embedded in APP files are really the same as PIC files on // their own. They might need special handling. Until I'm sure they don't, // I'll leave this option here. de_run_module_by_id_on_slice(c, "psionpic", NULL, c->infile, offset, len); } else { // Just extract the file dbuf_create_file_from_slice(c->infile, offset, len, ext, NULL, is_pic?0:DE_CREATEFLAG_IS_AUX); } } else { de_dbg(c, "(not extracting this file)\n"); } }
static int do_picture_ole_static_rendition(deark *c, lctx *d, struct para_info *pinfo, int rendition_idx, i64 pos1, i64 *bytes_consumed) { i64 pos = pos1; i64 stringlen; struct de_stringreaderdata *srd_typename = NULL; pos += 4; // 0x00000501 pos += 4; // "type" (probably already read by caller) stringlen = de_getu32le_p(&pos); srd_typename = dbuf_read_string(c->infile, pos, stringlen, 260, DE_CONVFLAG_STOP_AT_NUL, DE_ENCODING_ASCII); de_dbg(c, "typename: \"%s\"", ucstring_getpsz(srd_typename->str)); pos += stringlen; if(!de_strcmp(srd_typename->sz, "DIB")) { pos += 12; de_dbg_indent(c, 1); de_run_module_by_id_on_slice(c, "dib", NULL, c->infile, pos, pinfo->thisparapos+pinfo->thisparalen-pos); de_dbg_indent(c, -1); } else if(!de_strcmp(srd_typename->sz, "METAFILEPICT")) { i64 dlen; pos += 8; // ?? dlen = de_getu32le_p(&pos); de_dbg(c, "metafile size: %d", (int)dlen); // Includes "mfp", apparently pos += 8; // "mfp" struct dbuf_create_file_from_slice(c->infile, pos, dlen-8, "wmf", NULL, 0); } else if(!de_strcmp(srd_typename->sz, "BITMAP")) { do_static_bitmap(c, d, pinfo, pos); } else { de_warn(c, "Static OLE picture type \"%s\" is not supported", ucstring_getpsz(srd_typename->str)); } de_destroy_stringreaderdata(c, srd_typename); return 0; }
// Extracts Exif if extract_level>=2, or "extractexif" option is set. // Otherwise decodes. void de_fmtutil_handle_exif2(deark *c, i64 pos, i64 len, u32 *returned_flags, u32 *orientation, u32 *exifversion) { int user_opt; de_module_params *mparams = NULL; if(returned_flags) { *returned_flags = 0; } user_opt = de_get_ext_option_bool(c, "extractexif", -1); if(user_opt==1 || (c->extract_level>=2 && user_opt!=0)) { // Writing raw Exif data isn't very useful, but do so if requested. dbuf_create_file_from_slice(c->infile, pos, len, "exif.tif", NULL, DE_CREATEFLAG_IS_AUX); // Caller will have to reprocess the Exif file to extract anything from it. return; } mparams = de_malloc(c, sizeof(de_module_params)); mparams->in_params.codes = "E"; de_run_module_by_id_on_slice(c, "tiff", mparams, c->infile, pos, len); if(returned_flags) { // FIXME: It's an unfortunate bug that returned_flags does not work if // extract_level>=2, but for now there's no reasonable way to fix it. // We have to process -- not extract -- the Exif chunk if we want to // know what's in it. *returned_flags = mparams->out_params.flags; if((mparams->out_params.flags & 0x20) && orientation) { *orientation = mparams->out_params.uint1; } if((mparams->out_params.flags & 0x40) && exifversion) { *exifversion = mparams->out_params.uint2; } } de_free(c, mparams); }
static void extract_unknown_ole_obj(deark *c, lctx *d, i64 pos, i64 len, struct de_stringreaderdata *srd_typename) { de_finfo *fi = NULL; de_ucstring *s = NULL; fi = de_finfo_create(c); s = ucstring_create(c); ucstring_append_sz(s, "oleobj", DE_ENCODING_LATIN1); if(ucstring_isnonempty(srd_typename->str)) { ucstring_append_sz(s, ".", DE_ENCODING_LATIN1); ucstring_append_ucstring(s, srd_typename->str); } de_finfo_set_name_from_ucstring(c, fi, s, 0); dbuf_create_file_from_slice(c->infile, pos, len, "bin", fi, 0); ucstring_destroy(s); de_finfo_destroy(c, fi); }
// Caption/abstract static void handle_2_120(deark *c, lctx *d, const struct ds_info *dsi, de_int64 pos, de_int64 len) { de_ucstring *s = NULL; dbuf *outf = NULL; int encoding; const char *fntoken; if(c->extract_level<2) { handle_text(c, d, dsi, pos, len); goto done; } // FIXME: There is currently no way to extract IPTC captions to files, // except when reading a raw IPTC file. If IPTC is embedded in some other // file (as it usually is), then the -a option will extract the entire // IPTC data, and we will never get here. fntoken = "caption.txt"; encoding = get_ds_encoding(c, d, dsi->recnum); if(encoding==DE_ENCODING_UNKNOWN) { // If the encoding is unknown, copy the raw bytes. dbuf_create_file_from_slice(c->infile, pos, len, fntoken, NULL, DE_CREATEFLAG_IS_AUX); goto done; } // If the encoding is known, convert to UTF-8. s = ucstring_create(c); dbuf_read_to_ucstring(c->infile, pos, len, s, 0, encoding); outf = dbuf_create_output_file(c, fntoken, NULL, DE_CREATEFLAG_IS_AUX); ucstring_write_as_utf8(c, s, outf, 1); done: if(outf) dbuf_close(outf); if(s) ucstring_destroy(s); }
// Either extract the IPTC data to a file, or drill down into it. // flags: // 0 = default behavior (currently: depends on c->extract_level and options) // 2 = this came from our TIFF-encapsulated format void de_fmtutil_handle_iptc(deark *c, dbuf *f, i64 pos, i64 len, unsigned int flags) { int should_decode; int should_extract; int user_opt; int extract_fmt = 1; // 0=raw, 1=TIFF-wrapped if(len<1) return; user_opt = de_get_ext_option_bool(c, "extractiptc", -1); if(user_opt==1 || (c->extract_level>=2 && user_opt!=0)) { should_decode = 0; should_extract = 1; if(flags&0x2) { // Avoid "extracting" in a way that would just recreate the exact same file. extract_fmt = 0; } } else { should_decode = 1; should_extract = 0; } if(should_decode) { de_run_module_by_id_on_slice(c, "iptc", NULL, f, pos, len); } if(should_extract && extract_fmt==0) { dbuf_create_file_from_slice(f, pos, len, "iptc", NULL, DE_CREATEFLAG_IS_AUX); } else if(should_extract && extract_fmt==1) { wrap_in_tiff(c, f, pos, len, "Deark extracted IPTC", 33723, "iptctiff", DE_CREATEFLAG_IS_AUX); } }
// An internal function that does the main work of do_text_field(). // TODO: Clean up the text field processing code. It's gotten too messy. static int do_unc_text_field(deark *c, lctx *d, struct text_chunk_ctx *tcc, int which_field, dbuf *srcdbuf, i64 pos, i64 bytes_avail, int is_nul_terminated, int encoding, i64 *bytes_consumed) { const char *name; int retval = 0; struct de_stringreaderdata *srd = NULL; *bytes_consumed = 0; if(bytes_avail<0) return 0; if(which_field==FIELD_MAIN && tcc->is_xmp) { // The main field is never NUL terminated, so we can do this right away. dbuf_create_file_from_slice(srcdbuf, pos, bytes_avail, "xmp", NULL, DE_CREATEFLAG_IS_AUX); retval = 1; goto done; } if(is_nul_terminated) { srd = dbuf_read_string(srcdbuf, pos, bytes_avail, DE_DBG_MAX_STRLEN, DE_CONVFLAG_STOP_AT_NUL, encoding); if(!srd->found_nul) goto done; *bytes_consumed = srd->bytes_consumed - 1; } else { i64 bytes_to_scan; *bytes_consumed = bytes_avail; bytes_to_scan = bytes_avail; if(bytes_to_scan>DE_DBG_MAX_STRLEN) bytes_to_scan = DE_DBG_MAX_STRLEN; srd = dbuf_read_string(srcdbuf, pos, bytes_to_scan, bytes_to_scan, 0, encoding); } if(which_field==FIELD_KEYWORD) { if(!de_strcmp(srd->sz, "XML:com.adobe.xmp")) { tcc->is_xmp = 1; } } switch(which_field) { case FIELD_KEYWORD: name="keyword"; break; case FIELD_LANG: name="language"; break; case FIELD_XKEYWORD: name="translated keyword"; break; default: name="text"; } if(which_field==FIELD_MAIN && tcc->is_im_generic_profile) { de_dbg(c, "generic profile type: %s", tcc->im_generic_profile_type_name?tcc->im_generic_profile_type_name:"?"); } if(!(which_field==FIELD_MAIN && tcc->suppress_debugstr)) { de_dbg(c, "%s: \"%s\"", name, ucstring_getpsz(srd->str)); } retval = 1; if(which_field==FIELD_KEYWORD) { if(!de_strncmp(srd->sz, "Raw profile type ", 17)) { on_im_generic_profile_keyword(c, d, tcc, srd); } } if(which_field==FIELD_MAIN && tcc->is_im_generic_profile) { de_dbg_indent(c, 1); on_im_generic_profile_main(c, d, tcc, srcdbuf, pos, bytes_avail); de_dbg_indent(c, -1); goto done; } done: de_destroy_stringreaderdata(c, srd); return retval; }
static void de_run_rpm(deark *c, de_module_params *mparams) { lctx *d = NULL; i64 pos; u8 buf[8]; const char *ext; i64 section_size = 0; de_finfo *fi = NULL; char filename[128]; d = de_malloc(c, sizeof(lctx)); if(!do_lead_section(c, d)) { goto done; } pos = 96; if(!do_header_structure(c, d, 1, pos, §ion_size)) { goto done; } pos += section_size; // Header structures are 8-byte aligned. The first one always starts at // offset 96, so we don't have to worry about it. But we need to make // sure the second one is aligned. pos = ((pos + 7)/8)*8; if(!do_header_structure(c, d, 0, pos, §ion_size)) { goto done; } pos += section_size; de_dbg(c, "data pos: %d", (int)pos); if(pos > c->infile->len) goto done; // There is usually a tag that indicates the compression format, but we // primarily figure out the format by sniffing its magic number, on the // theory that that's more reliable. // TODO: I think it's also theoretically possible that it could use an archive // format other than cpio. de_read(buf, pos, 8); if(buf[0]==0x1f && buf[1]==0x8b) { ext = "cpio.gz"; } else if(buf[0]==0x42 && buf[1]==0x5a && buf[2]==0x68) { ext = "cpio.bz2"; } else if(buf[0]==0xfd && buf[1]==0x37 && buf[2]==0x7a) { ext = "cpio.xz"; } else if(d->cmpr_type==DE_RPM_CMPR_LZMA || buf[0]==0x5d) { ext = "cpio.lzma"; } else { de_warn(c, "Unidentified compression or archive format"); ext = "cpio.bin"; } if(d->name_srd && c->filenames_from_file) { const char *version2 = "x"; const char *release2 = "x"; if(d->version_srd) version2 = d->version_srd->sz; if(d->release_srd) release2 = d->release_srd->sz; fi = de_finfo_create(c); de_snprintf(filename, sizeof(filename), "%s-%s.%s", d->name_srd->sz, version2, release2); de_finfo_set_name_from_sz(c, fi, filename, 0, DE_ENCODING_ASCII); } dbuf_create_file_from_slice(c->infile, pos, c->infile->len - pos, ext, fi, 0); done: de_finfo_destroy(c, fi); if(d) { de_destroy_stringreaderdata(c, d->name_srd); de_destroy_stringreaderdata(c, d->release_srd); de_destroy_stringreaderdata(c, d->version_srd); de_free(c, d); } }
// pos1 points to the ole_id field (should be 0x00000501). // Caller must have looked ahead to check the type. static int do_picture_ole_embedded_rendition(deark *c, lctx *d, struct para_info *pinfo, int rendition_idx, i64 pos1, i64 *bytes_consumed) { i64 pos = pos1; i64 stringlen; i64 data_len; u8 buf[16]; struct de_stringreaderdata *srd_typename = NULL; struct de_stringreaderdata *srd_filename = NULL; struct de_stringreaderdata *srd_params = NULL; pos += 4; // 0x00000501 pos += 4; // "type" (probably already read by caller) stringlen = de_getu32le_p(&pos); srd_typename = dbuf_read_string(c->infile, pos, stringlen, 260, DE_CONVFLAG_STOP_AT_NUL, DE_ENCODING_ASCII); de_dbg(c, "typename: \"%s\"", ucstring_getpsz(srd_typename->str)); pos += stringlen; stringlen = de_getu32le_p(&pos); srd_filename = dbuf_read_string(c->infile, pos, stringlen, 260, DE_CONVFLAG_STOP_AT_NUL, DE_ENCODING_ASCII); de_dbg(c, "filename: \"%s\"", ucstring_getpsz(srd_filename->str)); pos += stringlen; stringlen = de_getu32le_p(&pos); srd_params = dbuf_read_string(c->infile, pos, stringlen, 260, DE_CONVFLAG_STOP_AT_NUL, DE_ENCODING_ASCII); de_dbg(c, "params: \"%s\"", ucstring_getpsz(srd_params->str)); pos += stringlen; data_len = de_getu32le_p(&pos); de_dbg(c, "embedded ole rendition data: pos=%d, len=%d", (int)pos, (int)data_len); // TODO: I don't know the extent to which it's better to sniff the data, or // rely on the typename. de_read(buf, pos, sizeof(buf)); if(!de_strcmp(srd_typename->sz, "CDraw") && !de_memcmp(&buf[0], (const void*)"RIFF", 4) && !de_memcmp(&buf[8], (const void*)"CDR", 3) ) { // Looks like CorelDRAW dbuf_create_file_from_slice(c->infile, pos, data_len, "cdr", NULL, 0); } else if(buf[0]=='B' && buf[1]=='M') { // TODO: Detect true length of data dbuf_create_file_from_slice(c->infile, pos, data_len, "bmp", NULL, 0); } else { if(d->extract_ole) { extract_unknown_ole_obj(c, d, pos, data_len, srd_typename); } else { de_warn(c, "Unknown/unsupported type of OLE object (\"%s\") at %d", ucstring_getpsz(srd_typename->str), (int)pos1); } } pos += data_len; *bytes_consumed = pos - pos1; de_destroy_stringreaderdata(c, srd_typename); de_destroy_stringreaderdata(c, srd_filename); de_destroy_stringreaderdata(c, srd_params); return 1; }