static void do_picture(deark *c, lctx *d, struct para_info *pinfo) { unsigned int mm; int orig_file_count, curr_file_count; i64 pos = pinfo->thisparapos; if(pinfo->thisparalen<2) goto done; mm = (unsigned int)de_getu16le(pos); de_dbg(c, "picture storage type: 0x%04x (%s)", mm, get_picture_storage_type_name(mm)); orig_file_count = get_next_output_file_id(c); switch(mm) { case 0x88: do_picture_metafile(c, d, pinfo); break; case 0xe3: do_picture_bitmap(c, d, pinfo); break; case 0xe4: do_picture_ole(c, d, pinfo); break; default: de_err(c, "Picture storage type 0x%04x not supported", mm); } if(d->html_outf) { char id_str[24]; // We want to include the image file ID numbers in the HTML document, // so that the user can figure out which image goes where. // To deduce the ID number, we watch the global file ID counter. // It's totally a hack, but unfortunately our high level functions that // create an output file (e.g. de_convert_and_write_image_bilevel) do // not have a way return the ID number of the file they created. It // would be a lot of trouble to create such a mechanism. do_emit_raw_sz(c, d, pinfo, "<p class=r>picture"); curr_file_count = get_next_output_file_id(c); if(curr_file_count == orig_file_count+1) { de_snprintf(id_str, sizeof(id_str), " %d", orig_file_count); } else if(curr_file_count == orig_file_count) { de_strlcpy(id_str, " (not extracted)", sizeof(id_str)); } else { de_snprintf(id_str, sizeof(id_str), "s %d-%d", orig_file_count, curr_file_count-1); } do_emit_raw_sz(c, d, pinfo, id_str); do_emit_raw_sz(c, d, pinfo, "</p>\n"); } done: ; }
// (xpos,ypos) is the lower-right corner // (or the bottom-center, if hcenter==1). static void draw_number(deark *c, de_bitmap *img, struct de_bitmap_font *dfont, i64 n, i64 xpos1, i64 ypos1, unsigned int flags) { char buf[32]; i64 len; i64 i; i64 xpos_start; i64 xpos, ypos; if(flags & DNFLAG_HEX) { if(flags & DNFLAG_LEADING_ZEROES) de_snprintf(buf, sizeof(buf), "%04X", (unsigned int)n); else de_snprintf(buf, sizeof(buf), "%X", (unsigned int)n); } else { de_snprintf(buf, sizeof(buf), "%u", (unsigned int)n); } len = (i64)de_strlen(buf); if(flags & DNFLAG_HCENTER) xpos_start = xpos1-(dfont->nominal_width*len)/2; else xpos_start = xpos1-dfont->nominal_width*len; // Make sure number doesn't go beyond the image if(xpos_start + dfont->nominal_width*len > img->width) { xpos_start = img->width - dfont->nominal_width*len; } for(i=len-1; i>=0; i--) { xpos = xpos_start + dfont->nominal_width*i; ypos = ypos1-dfont->nominal_height; de_font_paint_character_cp(c, img, dfont, buf[i], xpos, ypos, DE_MAKE_GRAY(255), 0, DE_PAINTFLAG_TRNSBKGD); } }
static void read_palette_ximg(deark *c, lctx *d) { i64 pal_entries_in_file; i64 pal_entries_to_read; i64 i; i64 cr1, cg1, cb1; u8 cr, cg, cb; int range_warned = 0; char tmps[64]; pal_entries_in_file = (d->header_size_in_bytes-22)/3; if(pal_entries_in_file<1) return; if(d->nplanes<=8) pal_entries_to_read = (i64)(1<<((unsigned int)d->nplanes)); else pal_entries_to_read = 0; if(pal_entries_to_read>pal_entries_in_file) pal_entries_to_read = pal_entries_in_file; if(pal_entries_to_read>256) pal_entries_to_read = 256; if(pal_entries_in_file<1) return; de_dbg(c, "palette at %d", 22); de_dbg_indent(c, 1); for(i=0; i<pal_entries_to_read; i++) { cr1 = de_getu16be(22 + 6*i); cg1 = de_getu16be(22 + 6*i + 2); cb1 = de_getu16be(22 + 6*i + 4); cr = de_scale_1000_to_255(cr1); cg = de_scale_1000_to_255(cg1); cb = de_scale_1000_to_255(cb1); d->pal[i] = DE_MAKE_RGB(cr, cg, cb); de_snprintf(tmps, sizeof(tmps), "(%4d,%4d,%4d) "DE_CHAR_RIGHTARROW" ", (int)cr1, (int)cg1, (int)cb1); de_dbg_pal_entry2(c, (int)i, d->pal[i], tmps, NULL, NULL); // TODO: Maybe some out-of-range colors have special meaning? if(!range_warned && (cr1>1000 || cg1>1000 || cb1>1000)) { de_warn(c, "Bad palette color #%d: is (%d,%d,%d), max=(1000,1000,1000).", (int)i, (int)cr1, (int)cg1, (int)cb1); range_warned=1; } } de_dbg_indent(c, -1); }
static char *get_hdrtype_descr(deark *c, char *buf, size_t buflen, u8 hdr_type) { if(hdr_type==0) { de_strlcpy(buf, "", buflen); } else { de_ucstring *s = NULL; s = ucstring_create(c); if(hdr_type&0x01) ucstring_append_flags_item(s, "continuation page"); if(hdr_type&0x02) ucstring_append_flags_item(s, "first page"); if(hdr_type&0x04) ucstring_append_flags_item(s, "last page"); de_snprintf(buf, buflen, " (%s)", ucstring_getpsz(s)); ucstring_destroy(s); } return buf; }
static void read_face_name(deark *c, lctx *d) { char buf[50]; char buf2[50]; if(d->dfFace<1) return; if(!c->filenames_from_file) return; // The facename is terminated with a NUL byte. // There seems to be no defined limit to its length, but Windows font face // names traditionally have to be quite short. dbuf_read_sz(c->infile, d->dfFace, buf, sizeof(buf)); de_snprintf(buf2, sizeof(buf2), "%s-%d", buf, (int)d->dfPoints); d->fi = de_finfo_create(c); de_finfo_set_name_from_sz(c, d->fi, buf2, DE_ENCODING_ASCII); }
// To be called when we encounter a page that is not the first page of // its bitstream (or at EOF). static void declare_ogg_format(deark *c, lctx *d) { char tmps[80]; const char *name = NULL; if(d->format_declared) return; d->format_declared = 1; // There's no nice way, that I know of, to characterize the contents of an Ogg // file. But I think it's worth trying. if(d->bitstream_count<1) { // If there are zero streams : "other" } else if(d->found_ogm) { // else if there's an OGM stream of any kind... name="OGM"; } else if(d->found_skeleton) { // else If there's a Skeleton stream... name="Skeleton"; } else if(d->first_stream_type_valid && d->first_stream_sti && !d->has_unknown_or_multiple_stream_types) { // else if all streams are the same known type: that stream type name = d->first_stream_sti->name; } else if(d->found_theora && d->found_vorbis && !d->has_non_vorbis_non_theora_stream) { // else if there are Theora and Vorbis streams and nothing else... name="Theora+Vorbis"; } else if(d->found_theora) { // else if there's a Theora stream... name="Theora+other"; } // (else "other") de_snprintf(tmps, sizeof(tmps), "Ogg %s", name?name:"(other)"); de_declare_fmt(c, tmps); }
static void do_icon(deark *c, lctx *d, struct page_ctx *pg) { de_int64 expected_image_size; int is_compressed; if(!pg->type_info) return; // Shouldn't happen. de_strlcpy(pg->filename_token, "", sizeof(pg->filename_token)); if(pg->type_info->image_type==IMGTYPE_MASK) { de_dbg(c, "transparency mask\n"); return; } if(pg->type_info->image_type==IMGTYPE_EMBEDDED_FILE) { de_snprintf(pg->filename_token, sizeof(pg->filename_token), "%dx%d", (int)pg->type_info->width, (int)pg->type_info->height); do_extract_png_or_jp2(c, d, pg); return; } if(pg->type_info->image_type!=IMGTYPE_IMAGE && pg->type_info->image_type!=IMGTYPE_IMAGE_AND_MASK) { return; } // At this point we know it's a regular image (or an image+mask) // Note - This pg->rowspan is arguably incorrect for 24-bit images, since // rows aren't stored contiguously. pg->rowspan = ((pg->type_info->bpp * pg->type_info->width)+7)/8; expected_image_size = pg->rowspan * pg->type_info->height; if(pg->type_info->image_type==IMGTYPE_IMAGE_AND_MASK) { expected_image_size *= 2; } is_compressed = (pg->type_info->bpp==24) ? 1 : 0; if(!is_compressed) { if(pg->image_len < expected_image_size) { de_err(c, "(Image #%d) Premature end of image (expected %d bytes, found %d)\n", pg->image_num, (int)expected_image_size, (int)pg->image_len); return; } if(pg->image_len > expected_image_size) { de_warn(c, "(Image #%d) Extra image data found (expected %d bytes, found %d)\n", pg->image_num, (int)expected_image_size, (int)pg->image_len); } } find_mask(c, d, pg); de_snprintf(pg->filename_token, sizeof(pg->filename_token), "%dx%dx%d", (int)pg->type_info->width, (int)pg->type_info->height, (int)pg->type_info->bpp); de_dbg(c, "image dimensions: %dx%d, bpp: %d\n", pg->type_info->width, pg->type_info->height, pg->type_info->bpp); if(pg->type_info->bpp==1 || pg->type_info->bpp==4 || pg->type_info->bpp==8) { do_decode_1_4_8bit(c, d, pg); return; } else if(pg->type_info->bpp==24) { do_decode_24bit(c, d, pg); return; } de_warn(c, "(Image #%d) Image type '%s' is not supported\n", pg->image_num, pg->code4cc.id_printable); }
void de_fmtutil_read_atari_palette(deark *c, dbuf *f, i64 pos, u32 *dstpal, i64 ncolors_to_read, i64 ncolors_used, unsigned int flags) { i64 i; unsigned int n; int pal_bits = 0; // 9, 12, or 15. 0 = not yet determined u8 cr, cg, cb; u8 cr1, cg1, cb1; char cbuf[32]; char tmps[64]; const char *s; s = de_get_ext_option(c, "atari:palbits"); if(s) { pal_bits = de_atoi(s); } if(pal_bits==0 && (flags&DE_FLAG_ATARI_15BIT_PAL)) { pal_bits = 15; } if(pal_bits==0) { // Pre-scan the palette, and try to guess whether Atari STE-style 12-bit // colors are used, instead of the usual 9-bit colors. // I don't know the best way to do this. Sometimes the 4th bit in each // nibble is used for extra color detail, and sometimes it just seems to // contain garbage. Maybe the logic should also depend on the file // format, or the number of colors. int bit_3_used = 0; int nibble_3_used = 0; for(i=0; i<ncolors_to_read; i++) { n = (unsigned int)dbuf_getu16be(f, pos + i*2); if(n&0xf000) { nibble_3_used = 1; } if(n&0x0888) { bit_3_used = 1; } } if(bit_3_used && !nibble_3_used) { de_dbg(c, "12-bit palette colors detected"); pal_bits = 12; } } if(pal_bits<12) { // Default to 9 if <12 pal_bits = 9; } else if(pal_bits<15) { pal_bits = 12; } else { pal_bits = 15; } for(i=0; i<ncolors_to_read; i++) { n = (unsigned int)dbuf_getu16be(f, pos + 2*i); if(pal_bits==15) { cr1 = (u8)((n>>6)&0x1c); if(n&0x0800) cr1+=2; if(n&0x8000) cr1++; cg1 = (u8)((n>>2)&0x1c); if(n&0x0080) cg1+=2; if(n&0x4000) cg1++; cb1 = (u8)((n<<2)&0x1c); if(n&0x0008) cb1+=2; if(n&0x2000) cb1++; cr = de_scale_n_to_255(31, cr1); cg = de_scale_n_to_255(31, cg1); cb = de_scale_n_to_255(31, cb1); de_snprintf(cbuf, sizeof(cbuf), "%2d,%2d,%2d", (int)cr1, (int)cg1, (int)cb1); } else if(pal_bits==12) {
static int do_box(deark *c, struct de_boxesctx *bctx, i64 pos, i64 len, int level, i64 *pbytes_consumed) { i64 size32, size64; i64 header_len; // Not including UUIDs i64 payload_len; // Including UUIDs i64 total_len; struct de_fourcc box4cc; char uuid_string[50]; int ret; int retval = 0; struct de_boxdata *parentbox; struct de_boxdata *curbox; parentbox = bctx->curbox; bctx->curbox = de_malloc(c, sizeof(struct de_boxdata)); curbox = bctx->curbox; curbox->parent = parentbox; if(len<8) { de_dbg(c, "(ignoring %d extra bytes at %"I64_FMT")", (int)len, pos); goto done; } size32 = dbuf_getu32be(bctx->f, pos); dbuf_read_fourcc(bctx->f, pos+4, &box4cc, 4, 0x0); curbox->boxtype = box4cc.id; if(size32>=8) { header_len = 8; payload_len = size32-8; } else if(size32==0) { header_len = 8; payload_len = len-8; } else if(size32==1) { if(len<16) { de_dbg(c, "(ignoring %d extra bytes at %"I64_FMT")", (int)len, pos); goto done; } header_len = 16; size64 = dbuf_geti64be(bctx->f, pos+8); if(size64<16) goto done; payload_len = size64-16; } else { de_err(c, "Invalid or unsupported box format"); goto done; } total_len = header_len + payload_len; if(curbox->boxtype==DE_BOX_uuid && payload_len>=16) { curbox->is_uuid = 1; dbuf_read(bctx->f, curbox->uuid, pos+header_len, 16); } curbox->level = level; curbox->box_pos = pos; curbox->box_len = total_len; curbox->payload_pos = pos+header_len; curbox->payload_len = payload_len; if(curbox->is_uuid) { curbox->payload_pos += 16; curbox->payload_len -= 16; } if(bctx->identify_box_fn) { bctx->identify_box_fn(c, bctx); } if(c->debug_level>0) { char name_str[80]; if(curbox->box_name) { de_snprintf(name_str, sizeof(name_str), " (%s)", curbox->box_name); } else { name_str[0] = '\0'; } if(curbox->is_uuid) { de_fmtutil_render_uuid(c, curbox->uuid, uuid_string, sizeof(uuid_string)); de_dbg(c, "box '%s'{%s}%s at %"I64_FMT", len=%"I64_FMT, box4cc.id_dbgstr, uuid_string, name_str, pos, total_len); } else { de_dbg(c, "box '%s'%s at %"I64_FMT", len=%"I64_FMT", dlen=%"I64_FMT, box4cc.id_dbgstr, name_str, pos, total_len, payload_len); } } if(total_len > len) { de_err(c, "Invalid oversized box, or unexpected end of file " "(box at %"I64_FMT" ends at %"I64_FMT", " "parent ends at %"I64_FMT")", pos, pos+total_len, pos+len); goto done; } de_dbg_indent(c, 1); ret = bctx->handle_box_fn(c, bctx); de_dbg_indent(c, -1); if(!ret) goto done; if(curbox->is_superbox) { i64 children_pos, children_len; i64 max_nchildren; de_dbg_indent(c, 1); children_pos = pos+header_len + curbox->extra_bytes_before_children; children_len = payload_len - curbox->extra_bytes_before_children; max_nchildren = (curbox->num_children_is_known) ? curbox->num_children : -1; do_box_sequence(c, bctx, children_pos, children_len, max_nchildren, level+1); de_dbg_indent(c, -1); } *pbytes_consumed = total_len; retval = 1; done: de_free(c, bctx->curbox); bctx->curbox = parentbox; // Restore the curbox pointer return retval; }
// Make a printable version of a UUID (or a big-endian GUID). // Caller supplies s. void de_fmtutil_render_uuid(deark *c, const u8 *uuid, char *s, size_t s_len) { de_snprintf(s, s_len, "%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x", uuid[0], uuid[1], uuid[2], uuid[3], uuid[4], uuid[5], uuid[6], uuid[7], uuid[8], uuid[9], uuid[10], uuid[11], uuid[12], uuid[13], uuid[14], uuid[15]); }
// Gathers information about a DIB. // If DE_BMPINFO_HAS_FILEHEADER flag is set, pos points to the BITMAPFILEHEADER. // Otherwise, it points to the BITMAPINFOHEADER. // Caller allocates bi. // Returns 0 if BMP is invalid. int de_fmtutil_get_bmpinfo(deark *c, dbuf *f, struct de_bmpinfo *bi, i64 pos, i64 len, unsigned int flags) { i64 fhs; // file header size i64 bmih_pos; struct de_fourcc cmpr4cc; char cmprname_dbgstr[80]; de_zeromem(bi, sizeof(struct de_bmpinfo)); de_zeromem(&cmpr4cc, sizeof(struct de_fourcc)); fhs = (flags & DE_BMPINFO_HAS_FILEHEADER) ? 14 : 0; if(fhs+len < 16) return 0; if(fhs) { if(flags & DE_BMPINFO_HAS_HOTSPOT) { bi->hotspot_x = dbuf_getu16le(f, pos+6); bi->hotspot_y = dbuf_getu16le(f, pos+8); de_dbg(c, "hotspot: (%d,%d)", (int)bi->hotspot_x, (int)bi->hotspot_y); } bi->bitsoffset = dbuf_getu32le(f, pos+10); de_dbg(c, "bits offset: %d", (int)bi->bitsoffset); } bmih_pos = pos + fhs; bi->infohdrsize = dbuf_getu32le(f, bmih_pos); if(bi->infohdrsize==0x474e5089 && (flags & DE_BMPINFO_ICO_FORMAT)) { // We don't examine PNG-formatted icons, but we can identify them. bi->infohdrsize = 0; bi->file_format = DE_BMPINFO_FMT_PNG; return 1; } de_dbg(c, "info header size: %d", (int)bi->infohdrsize); if(bi->infohdrsize==12) { bi->bytes_per_pal_entry = 3; bi->width = dbuf_getu16le(f, bmih_pos+4); bi->height = dbuf_getu16le(f, bmih_pos+6); bi->bitcount = dbuf_getu16le(f, bmih_pos+10); } else if(bi->infohdrsize>=16 && bi->infohdrsize<=124) { bi->bytes_per_pal_entry = 4; bi->width = dbuf_getu32le(f, bmih_pos+4); bi->height = dbuf_geti32le(f, bmih_pos+8); if(bi->height<0) { bi->is_topdown = 1; bi->height = -bi->height; } bi->bitcount = dbuf_getu16le(f, bmih_pos+14); if(bi->infohdrsize>=20) { bi->compression_field = (u32)dbuf_getu32le(f, bmih_pos+16); if(flags & DE_BMPINFO_CMPR_IS_4CC) { dbuf_read_fourcc(f, bmih_pos+16, &cmpr4cc, 4, 0x0); } } if(bi->infohdrsize>=24) { bi->sizeImage_field = dbuf_getu32le(f, bmih_pos+20); } if(bi->infohdrsize>=36) { bi->pal_entries = dbuf_getu32le(f, bmih_pos+32); } } else { return 0; } if(flags & DE_BMPINFO_ICO_FORMAT) bi->height /= 2; if(bi->bitcount>=1 && bi->bitcount<=8) { if(bi->pal_entries==0) { bi->pal_entries = (i64)(1<<(unsigned int)bi->bitcount); } // I think the NumColors field (in icons) is supposed to be the maximum number of // colors implied by the bit depth, not the number of colors in the palette. bi->num_colors = (i64)(1<<(unsigned int)bi->bitcount); } else { // An arbitrary value. All that matters is that it's >=256. bi->num_colors = 16777216; } de_dbg_dimensions(c, bi->width, bi->height); de_dbg(c, "bit count: %d", (int)bi->bitcount); if((flags & DE_BMPINFO_CMPR_IS_4CC) && (bi->compression_field>0xffff)) { de_snprintf(cmprname_dbgstr, sizeof(cmprname_dbgstr), "'%s'", cmpr4cc.id_dbgstr); } else { de_fmtutil_get_bmp_compression_name(bi->compression_field, cmprname_dbgstr, sizeof(cmprname_dbgstr), 0); } de_dbg(c, "compression: %u (%s)", (unsigned int)bi->compression_field, cmprname_dbgstr); if(bi->sizeImage_field!=0) { de_dbg(c, "sizeImage: %u", (unsigned int)bi->sizeImage_field); } de_dbg(c, "palette entries: %u", (unsigned int)bi->pal_entries); if(bi->pal_entries>256 && bi->bitcount>8) { de_warn(c, "Ignoring bad palette size (%u entries)", (unsigned int)bi->pal_entries); bi->pal_entries = 0; } bi->pal_bytes = bi->bytes_per_pal_entry*bi->pal_entries; bi->size_of_headers_and_pal = fhs + bi->infohdrsize + bi->pal_bytes; // FIXME: cmpr type 3 doesn't always mean BITFIELDS if(bi->compression_field==3) { bi->size_of_headers_and_pal += 12; // BITFIELDS } bi->is_compressed = !((bi->compression_field==0) || (bi->compression_field==3 && bi->bitcount>1)); if(!de_good_image_dimensions(c, bi->width, bi->height)) { return 0; } // TODO: This needs work, to decide how to handle compressed images. // TODO: What about BI_BITFIELDS images? if(bi->compression_field==0) { // Try to figure out the true size of the resource, minus any padding. bi->rowspan = ((bi->bitcount*bi->width +31)/32)*4; bi->foreground_size = bi->rowspan * bi->height; de_dbg(c, "foreground size: %d", (int)bi->foreground_size); if(flags & DE_BMPINFO_ICO_FORMAT) { bi->mask_rowspan = ((bi->width +31)/32)*4; bi->mask_size = bi->mask_rowspan * bi->height; de_dbg(c, "mask size: %d", (int)bi->mask_size); } else { bi->mask_size = 0; } bi->total_size = bi->size_of_headers_and_pal + bi->foreground_size + bi->mask_size; } else { // Don't try to figure out the true size of compressed or other unusual images. bi->total_size = len; } return 1; }
static void de_run_png(deark *c, de_module_params *mparams) { lctx *d = NULL; i64 pos; i32 prev_chunk_id = 0; int suppress_idat_dbg = 0; d = de_malloc(c, sizeof(lctx)); de_dbg(c, "signature at %d", 0); de_dbg_indent(c, 1); d->fmt = do_identify_png_internal(c); switch(d->fmt) { case DE_PNGFMT_PNG: d->fmt_name = "PNG"; break; case DE_PNGFMT_JNG: d->fmt_name = "JNG"; break; case DE_PNGFMT_MNG: d->fmt_name = "MNG"; break; default: d->fmt_name = "?"; } de_dbg(c, "format: %s", d->fmt_name); if(d->fmt>0) { de_declare_fmt(c, d->fmt_name); } de_dbg_indent(c, -1); pos = 8; while(pos < c->infile->len) { struct de_fourcc chunk4cc; struct handler_params hp; u32 crc; char nbuf[80]; de_zeromem(&hp, sizeof(struct handler_params)); hp.dlen = de_getu32be(pos); if(pos + 8 + hp.dlen + 4 > c->infile->len) break; dbuf_read_fourcc(c->infile, pos+4, &chunk4cc, 4, 0x0); hp.cti = get_chunk_type_info(chunk4cc.id); if(chunk4cc.id==CODE_IDAT && suppress_idat_dbg) { ; } else if(chunk4cc.id==CODE_IDAT && prev_chunk_id==CODE_IDAT && c->debug_level<2) { de_dbg(c, "(more IDAT chunks follow)"); suppress_idat_dbg = 1; } else { if(hp.cti) { if(hp.cti->name) { de_snprintf(nbuf, sizeof(nbuf), " (%s)", hp.cti->name); } else { de_strlcpy(nbuf, "", sizeof(nbuf)); } } else { de_strlcpy(nbuf, " (?)", sizeof(nbuf)); } de_dbg(c, "chunk '%s'%s at %d dpos=%d dlen=%d", chunk4cc.id_dbgstr, nbuf, (int)pos, (int)(pos+8), (int)hp.dlen); if(chunk4cc.id!=CODE_IDAT) suppress_idat_dbg = 0; } pos += 8; de_dbg_indent(c, 1); hp.dpos = pos; hp.chunk4cc = &chunk4cc; if(hp.cti) { if(hp.cti->handler_fn) { hp.cti->handler_fn(c, d, &hp); } } else { if(c->debug_level>=2) { handler_hexdump(c, d, &hp); } } pos += hp.dlen; crc = (u32)de_getu32be(pos); de_dbg2(c, "crc32 (reported): 0x%08x", (unsigned int)crc); pos += 4; de_dbg_indent(c, -1); prev_chunk_id = chunk4cc.id; } de_free(c, d); }
dbuf *dbuf_create_output_file(deark *c, const char *ext, de_finfo *fi, unsigned int createflags) { char nbuf[500]; char msgbuf[200]; dbuf *f; const char *basefn; int file_index; u8 is_directory = 0; char *name_from_finfo = NULL; i64 name_from_finfo_len = 0; if(ext && fi && fi->original_filename_flag) { de_dbg(c, "[internal warning: Incorrect use of create_output_file]"); } f = de_malloc(c, sizeof(dbuf)); f->c = c; f->max_len_hard = c->max_output_file_size; f->is_managed = 1; if(fi && fi->is_directory) { is_directory = 1; } if(is_directory && !c->keep_dir_entries) { de_dbg(c, "skipping 'directory' file"); f->btype = DBUF_TYPE_NULL; goto done; } if(c->extract_policy==DE_EXTRACTPOLICY_MAINONLY) { if(createflags&DE_CREATEFLAG_IS_AUX) { de_dbg(c, "skipping 'auxiliary' file"); f->btype = DBUF_TYPE_NULL; goto done; } } else if(c->extract_policy==DE_EXTRACTPOLICY_AUXONLY) { if(!(createflags&DE_CREATEFLAG_IS_AUX)) { de_dbg(c, "skipping 'main' file"); f->btype = DBUF_TYPE_NULL; goto done; } } file_index = c->file_count; c->file_count++; basefn = c->base_output_filename ? c->base_output_filename : "output"; if(fi && ucstring_isnonempty(fi->file_name_internal)) { name_from_finfo_len = 1 + ucstring_count_utf8_bytes(fi->file_name_internal); name_from_finfo = de_malloc(c, name_from_finfo_len); ucstring_to_sz(fi->file_name_internal, name_from_finfo, (size_t)name_from_finfo_len, 0, DE_ENCODING_UTF8); } if(c->output_style==DE_OUTPUTSTYLE_ARCHIVE && !c->base_output_filename && fi && fi->is_directory && (fi->is_root_dir || (fi->detect_root_dot_dir && fi->orig_name_was_dot))) { de_strlcpy(nbuf, ".", sizeof(nbuf)); } else if(c->output_style==DE_OUTPUTSTYLE_ARCHIVE && !c->base_output_filename && fi && fi->original_filename_flag && name_from_finfo) { // TODO: This is a "temporary" hack to allow us to, when both reading from // and writing to an archive format, use some semblance of the correct // filename (instead of "output.xxx.yyy"). // There are some things that we don't handle optimally, such as // subdirectories. // A major redesign of the file naming logic would be good. de_strlcpy(nbuf, name_from_finfo, sizeof(nbuf)); } else { char fn_suffix[256]; if(ext && name_from_finfo) { de_snprintf(fn_suffix, sizeof(fn_suffix), "%s.%s", name_from_finfo, ext); } else if(ext) { de_strlcpy(fn_suffix, ext, sizeof(fn_suffix)); } else if(is_directory && name_from_finfo) { de_snprintf(fn_suffix, sizeof(fn_suffix), "%s.dir", name_from_finfo); } else if(name_from_finfo) { de_strlcpy(fn_suffix, name_from_finfo, sizeof(fn_suffix)); } else if(is_directory) { de_strlcpy(fn_suffix, "dir", sizeof(fn_suffix)); } else { de_strlcpy(fn_suffix, "bin", sizeof(fn_suffix)); } de_snprintf(nbuf, sizeof(nbuf), "%s.%03d.%s", basefn, file_index, fn_suffix); } f->name = de_strdup(c, nbuf); if(fi) { // The finfo object passed to us at file creation is not required to // remain valid, so make a copy of anything in it that we might need // later. f->fi_copy = de_finfo_create(c); finfo_shallow_copy(c, fi, f->fi_copy); // Here's where we respect the -intz option, by using it to convert to // UTC in some cases. if(f->fi_copy->mod_time.is_valid && f->fi_copy->mod_time.tzcode==DE_TZCODE_LOCAL && c->input_tz_offs_seconds!=0) { de_timestamp_cvt_to_utc(&f->fi_copy->mod_time, -c->input_tz_offs_seconds); } if(f->fi_copy->image_mod_time.is_valid && f->fi_copy->image_mod_time.tzcode==DE_TZCODE_LOCAL && c->input_tz_offs_seconds!=0) { de_timestamp_cvt_to_utc(&f->fi_copy->image_mod_time, -c->input_tz_offs_seconds); } } if(file_index < c->first_output_file) { f->btype = DBUF_TYPE_NULL; goto done; } if(c->max_output_files>=0 && file_index >= c->first_output_file + c->max_output_files) { f->btype = DBUF_TYPE_NULL; goto done; } c->num_files_extracted++; if(c->extrlist_dbuf) { dbuf_printf(c->extrlist_dbuf, "%s\n", f->name); dbuf_flush(c->extrlist_dbuf); } if(c->list_mode) { f->btype = DBUF_TYPE_NULL; if(c->list_mode_include_file_id) { de_msg(c, "%d:%s", file_index, f->name); } else { de_msg(c, "%s", f->name); } goto done; } if(c->output_style==DE_OUTPUTSTYLE_ARCHIVE && c->archive_fmt==DE_ARCHIVEFMT_TAR) { de_info(c, "Adding %s to TAR file", f->name); f->btype = DBUF_TYPE_ODBUF; // A dummy max_len_hard value. The parent will do the checking. f->max_len_hard = DE_DUMMY_MAX_FILE_SIZE; f->writing_to_tar_archive = 1; de_tar_start_member_file(c, f); } else if(c->output_style==DE_OUTPUTSTYLE_ARCHIVE) { // ZIP i64 initial_alloc; de_info(c, "Adding %s to ZIP file", f->name); f->btype = DBUF_TYPE_MEMBUF; f->max_len_hard = DE_MAX_MEMBUF_SIZE; if(is_directory) { // A directory entry is not expected to have any data associated // with it (besides the files it contains). initial_alloc = 16; } else { initial_alloc = 65536; } f->membuf_buf = de_malloc(c, initial_alloc); f->membuf_alloc = initial_alloc; f->write_memfile_to_zip_archive = 1; } else if(c->output_style==DE_OUTPUTSTYLE_STDOUT) { de_info(c, "Writing %s to [stdout]", f->name); f->btype = DBUF_TYPE_STDOUT; // TODO: Should we increase f->max_len_hard? f->fp = stdout; } else { de_info(c, "Writing %s", f->name); f->btype = DBUF_TYPE_OFILE; f->fp = de_fopen_for_write(c, f->name, msgbuf, sizeof(msgbuf), c->overwrite_mode, 0); if(!f->fp) { de_err(c, "Failed to write %s: %s", f->name, msgbuf); f->btype = DBUF_TYPE_NULL; } } done: de_free(c, name_from_finfo); return f; }
static void de_run_rpm(deark *c, de_module_params *mparams) { lctx *d = NULL; i64 pos; u8 buf[8]; const char *ext; i64 section_size = 0; de_finfo *fi = NULL; char filename[128]; d = de_malloc(c, sizeof(lctx)); if(!do_lead_section(c, d)) { goto done; } pos = 96; if(!do_header_structure(c, d, 1, pos, §ion_size)) { goto done; } pos += section_size; // Header structures are 8-byte aligned. The first one always starts at // offset 96, so we don't have to worry about it. But we need to make // sure the second one is aligned. pos = ((pos + 7)/8)*8; if(!do_header_structure(c, d, 0, pos, §ion_size)) { goto done; } pos += section_size; de_dbg(c, "data pos: %d", (int)pos); if(pos > c->infile->len) goto done; // There is usually a tag that indicates the compression format, but we // primarily figure out the format by sniffing its magic number, on the // theory that that's more reliable. // TODO: I think it's also theoretically possible that it could use an archive // format other than cpio. de_read(buf, pos, 8); if(buf[0]==0x1f && buf[1]==0x8b) { ext = "cpio.gz"; } else if(buf[0]==0x42 && buf[1]==0x5a && buf[2]==0x68) { ext = "cpio.bz2"; } else if(buf[0]==0xfd && buf[1]==0x37 && buf[2]==0x7a) { ext = "cpio.xz"; } else if(d->cmpr_type==DE_RPM_CMPR_LZMA || buf[0]==0x5d) { ext = "cpio.lzma"; } else { de_warn(c, "Unidentified compression or archive format"); ext = "cpio.bin"; } if(d->name_srd && c->filenames_from_file) { const char *version2 = "x"; const char *release2 = "x"; if(d->version_srd) version2 = d->version_srd->sz; if(d->release_srd) release2 = d->release_srd->sz; fi = de_finfo_create(c); de_snprintf(filename, sizeof(filename), "%s-%s.%s", d->name_srd->sz, version2, release2); de_finfo_set_name_from_sz(c, fi, filename, 0, DE_ENCODING_ASCII); } dbuf_create_file_from_slice(c->infile, pos, c->infile->len - pos, ext, fi, 0); done: de_finfo_destroy(c, fi); if(d) { de_destroy_stringreaderdata(c, d->name_srd); de_destroy_stringreaderdata(c, d->release_srd); de_destroy_stringreaderdata(c, d->version_srd); de_free(c, d); } }
static int do_element(deark *c, lctx *d, i64 pos1, i64 nbytes_avail, i64 *bytes_used) { i64 ele_id; i64 ele_dlen; i64 pos = pos1; int retval = 0; const struct ele_id_info *einfo; const char *ele_name; int saved_indent_level; unsigned int dtype; int should_call_start_handler = 0; int should_decode_default = 0; int should_print_NOT_DECODING_msg = 0; int should_call_end_handler = 0; int len_ret; char tmpbuf[80]; de_dbg_indent_save(c, &saved_indent_level); de_dbg(c, "element at %"I64_FMT", max_len=%"I64_FMT, pos1, nbytes_avail); de_dbg_indent(c, 1); if(1!=get_var_size_int(c->infile, &ele_id, &pos, nbytes_avail)) { de_err(c, "Failed to read ID of element at %"I64_FMT, pos1); goto done; } einfo = find_ele_id_info(ele_id); if(einfo && einfo->name) ele_name = einfo->name; else ele_name = "?"; if(einfo) dtype = einfo->flags & 0xff; else dtype = 0; de_dbg(c, "id: 0x%"U64_FMTx" (%s)", (u64)ele_id, ele_name); if(d->show_encoded_id) { print_encoded_id(c, d, pos1, pos-pos1); } len_ret = get_var_size_int(c->infile, &ele_dlen, &pos, pos1+nbytes_avail-pos); if(len_ret==1) { de_snprintf(tmpbuf, sizeof(tmpbuf), "%"I64_FMT, ele_dlen); } else if(len_ret==2) { ele_dlen = c->infile->len - pos; de_strlcpy(tmpbuf, "unknown", sizeof(tmpbuf)); } else { de_err(c, "Failed to read length of element at %"I64_FMT, pos1); goto done; } de_dbg(c, "data at %"I64_FMT", dlen=%s, type=%s", pos, tmpbuf, get_type_name(dtype)); if(len_ret==2) { // EBML does not have any sort of end-of-master-element marker, which // presents a problem when a master element has an unknown length. // // EBML's "solution" is this: // "The end of an Unknown-Sized Element is determined by whichever // comes first: the end of the file or the beginning of the next EBML // Element, defined by this document or the corresponding EBML Schema, // that is not independently valid as Descendant Element of the // Unknown-Sized Element." // // This would appear to require a sophisticated, high-level algorithm // with 100% complete knowledge of the latest version of the specific // application format. We do not have such an algorithm. de_err(c, "EBML files with unknown-length elements are not supported"); goto done; } if(pos + ele_dlen > c->infile->len) { de_err(c, "Element at %"I64_FMT" goes beyond end of file", pos1); goto done; } if(einfo) { should_decode_default = 1; if(einfo->flags & 0x0200) { should_decode_default = 0; } else if((einfo->flags & 0x0100) && c->debug_level<2) { should_decode_default = 0; should_print_NOT_DECODING_msg = 1; } } if(should_decode_default && einfo && einfo->hfn) { should_call_start_handler = 1; } if(should_decode_default && einfo && einfo->hfn && (einfo->flags & 0x0800)) { should_call_end_handler = 1; } if(should_call_start_handler) { struct handler_params hp; de_zeromem(&hp, sizeof(struct handler_params)); hp.dpos = pos; hp.dlen = ele_dlen; einfo->hfn(c, d, &hp); } if(should_decode_default) { switch(dtype) { case TY_m: do_element_sequence(c, d, pos, ele_dlen); break; case TY_u: decode_uint(c, d, einfo, pos, ele_dlen); break; case TY_f: decode_float(c, d, einfo, pos, ele_dlen); break; case TY_8: decode_string(c, d, einfo, pos, ele_dlen, DE_ENCODING_UTF8); break; case TY_s: decode_string(c, d, einfo, pos, ele_dlen, DE_ENCODING_PRINTABLEASCII); break; case TY_d: decode_date(c, d, einfo, pos, ele_dlen); break; } } else { if(should_print_NOT_DECODING_msg) { de_dbg(c, "[not decoding this element]"); } } if(should_call_end_handler) { struct handler_params hp; de_zeromem(&hp, sizeof(struct handler_params)); hp.dpos = pos; hp.dlen = ele_dlen; hp.end_flag = 1; einfo->hfn(c, d, &hp); } pos += ele_dlen; *bytes_used = pos - pos1; retval = 1; done: de_dbg_indent_restore(c, saved_indent_level); return retval; }