// Read (up to) len bytes from f, translate them to characters, and append // them to s. void dbuf_read_to_ucstring(dbuf *f, i64 pos, i64 len, de_ucstring *s, unsigned int conv_flags, int encoding) { u8 *buf = NULL; deark *c = f->c; if(conv_flags & DE_CONVFLAG_STOP_AT_NUL) { i64 foundpos = 0; if(dbuf_search_byte(f, 0x00, pos, len, &foundpos)) { len = foundpos - pos; } } buf = de_malloc(c, len); dbuf_read(f, buf, pos, len); ucstring_append_bytes(s, buf, len, 0, encoding); de_free(c, buf); }
static int do_gzip_read_member(deark *c, lctx *d, i64 pos1, i64 *member_size) { u8 b0, b1; i64 pos; i64 n; i64 foundpos; i64 string_len; i64 cmpr_data_len; i64 mod_time_unix; u32 crc_calculated; de_ucstring *member_name = NULL; int saved_indent_level; int ret; struct member_data *md = NULL; int retval = 0; md = de_malloc(c, sizeof(struct member_data)); de_dbg_indent_save(c, &saved_indent_level); de_dbg(c, "gzip member at %d", (int)pos1); de_dbg_indent(c, 1); pos = pos1; b0 = de_getbyte(pos+0); b1 = de_getbyte(pos+1); if(b0!=0x1f || b1!=0x8b) { de_err(c, "Invalid gzip signature at %d. This is not a valid gzip file.", (int)pos1); goto done; } md->cmpr_code = de_getbyte(pos+2); if(md->cmpr_code!=0x08) { de_err(c, "Unsupported compression type (%d)", (int)md->cmpr_code); goto done; } md->flags = de_getbyte(pos+3); de_dbg(c, "flags: 0x%02x", (unsigned int)md->flags); pos += 4; mod_time_unix = de_getu32le(pos); de_unix_time_to_timestamp(mod_time_unix, &md->mod_time_ts, 0x1); if(md->mod_time_ts.is_valid) { char timestamp_buf[64]; de_timestamp_to_string(&md->mod_time_ts, timestamp_buf, sizeof(timestamp_buf), 0); de_dbg(c, "mod time: %" I64_FMT " (%s)", mod_time_unix, timestamp_buf); } pos += 4; b0 = de_getbyte(pos++); de_dbg(c, "extra flags: 0x%02x", (unsigned int)b0); b0 = de_getbyte(pos++); de_dbg(c, "OS or filesystem: %d (%s)", (int)b0, get_os_name(b0)); if(md->flags & GZIPFLAG_FEXTRA) { n = de_getu16le(pos); // XLEN // TODO: It might be interesting to dissect these extra fields, but it's // hard to find even a single file that uses them. de_dbg(c, "[extra fields at %d, dpos=%d, dlen=%d]", (int)pos, (int)(pos+2), (int)n); pos += 2; pos += n; } if(md->flags & GZIPFLAG_FNAME) { ret = dbuf_search_byte(c->infile, 0x00, pos, c->infile->len - pos, &foundpos); if(!ret) { de_err(c, "Invalid NAME field"); goto done; } string_len = foundpos - pos; member_name = ucstring_create(c); #define DE_GZIP_MAX_FNLEN 300 dbuf_read_to_ucstring_n(c->infile, pos, string_len, DE_GZIP_MAX_FNLEN, member_name, 0, DE_ENCODING_LATIN1); de_dbg(c, "file name at %d, len=%d: \"%s\"", (int)pos, (int)string_len, ucstring_getpsz_d(member_name)); pos = foundpos + 1; } if(md->flags & GZIPFLAG_FCOMMENT) { ret = dbuf_search_byte(c->infile, 0x00, pos, c->infile->len - pos, &foundpos); if(!ret) { de_err(c, "Invalid COMMENT field"); goto done; } pos = foundpos + 1; } if(md->flags & GZIPFLAG_FHCRC) { md->crc16_reported = (u32)de_getu16le(pos); de_dbg(c, "crc16 (reported): 0x%04x", (unsigned int)md->crc16_reported); pos += 2; } de_dbg(c, "compressed blocks at %d", (int)pos); if(!d->output_file) { // Although any member can have a name and mod time, this metadata // is ignored for members after the first one. de_finfo *fi = NULL; fi = de_finfo_create(c); if(member_name && c->filenames_from_file) { de_finfo_set_name_from_ucstring(c, fi, member_name, 0); fi->original_filename_flag = 1; } if(md->mod_time_ts.is_valid) { fi->mod_time = md->mod_time_ts; } d->output_file = dbuf_create_output_file(c, member_name?NULL:"bin", fi, 0); de_finfo_destroy(c, fi); } d->output_file->writecallback_fn = our_writecallback; d->output_file->userdata = (void*)md; md->crco = d->crco; de_crcobj_reset(md->crco); ret = de_decompress_deflate(c->infile, pos, c->infile->len - pos, d->output_file, 0, &cmpr_data_len, 0); crc_calculated = de_crcobj_getval(md->crco); d->output_file->writecallback_fn = NULL; d->output_file->userdata = NULL; if(!ret) goto done; pos += cmpr_data_len; de_dbg(c, "crc32 (calculated): 0x%08x", (unsigned int)crc_calculated); md->crc32_reported = (u32)de_getu32le(pos); de_dbg(c, "crc32 (reported) : 0x%08x", (unsigned int)md->crc32_reported); pos += 4; if(crc_calculated != md->crc32_reported) { de_warn(c, "CRC check failed: Expected 0x%08x, got 0x%08x", (unsigned int)md->crc32_reported, (unsigned int)crc_calculated); } md->isize = de_getu32le(pos); de_dbg(c, "uncompressed size (mod 2^32): %u", (unsigned int)md->isize); pos += 4; retval = 1; done: if(retval) *member_size = pos - pos1; else *member_size = 0; ucstring_destroy(member_name); de_free(c, md); de_dbg_indent_restore(c, saved_indent_level); return retval; }
static int do_gzip_read_member(deark *c, lctx *d, de_int64 pos1, de_int64 *member_size) { de_byte b0, b1; de_int64 cmpr_code; de_int64 pos; de_int64 n; de_int64 foundpos; de_int64 string_len; de_int64 cmpr_data_len; de_int64 isize; de_int64 mod_time_unix; struct de_timestamp mod_time_ts; de_uint32 crc32_field; de_ucstring *member_name = NULL; de_finfo *fi = NULL; int saved_indent_level; int ret; int retval = 0; mod_time_ts.is_valid = 0; de_dbg_indent_save(c, &saved_indent_level); de_dbg(c, "gzip member at %d\n", (int)pos1); de_dbg_indent(c, 1); pos = pos1; b0 = de_getbyte(pos+0); b1 = de_getbyte(pos+1); if(b0!=0x1f || b1!=0x8b) { de_err(c, "Invalid gzip signature at %d. This is not a valid gzip file.\n", (int)pos1); goto done; } cmpr_code=de_getbyte(pos+2); if(cmpr_code!=0x08) { de_err(c, "Unsupported compression type (%d)\n", (int)cmpr_code); goto done; } d->flags = de_getbyte(pos+3); de_dbg(c, "flags: 0x%02x\n", (unsigned int)d->flags); pos += 4; mod_time_unix = de_getui32le(pos); de_unix_time_to_timestamp(mod_time_unix, &mod_time_ts); if(mod_time_ts.is_valid) { char timestamp_buf[64]; de_timestamp_to_string(&mod_time_ts, timestamp_buf, sizeof(timestamp_buf), 1); de_dbg(c, "mod time: %" INT64_FMT " (%s)\n", mod_time_unix, timestamp_buf); } pos += 4; b0 = de_getbyte(pos++); de_dbg(c, "extra flags: 0x%02x\n", (unsigned int)b0); b0 = de_getbyte(pos++); de_dbg(c, "OS or filesystem: %d (%s)\n", (int)b0, get_os_name(b0)); if(d->flags & GZIPFLAG_FEXTRA) { n = de_getui16le(pos); // XLEN // TODO: It might be interesting to dissect these extra fields, but it's // hard to find even a single file that uses them. de_dbg(c, "[extra fields at %d, dpos=%d, dlen=%d]\n", (int)pos, (int)(pos+2), (int)n); pos += 2; pos += n; } if(d->flags & GZIPFLAG_FNAME) { ret = dbuf_search_byte(c->infile, 0x00, pos, c->infile->len - pos, &foundpos); if(!ret) { de_err(c, "Invalid NAME field\n"); goto done; } string_len = foundpos - pos; member_name = ucstring_create(c); dbuf_read_to_ucstring_n(c->infile, pos, string_len, 300, member_name, 0, DE_ENCODING_LATIN1); de_dbg(c, "file name at %d, len=%d: \"%s\"\n", (int)pos, (int)string_len, ucstring_get_printable_sz(member_name)); pos = foundpos + 1; } if(d->flags & GZIPFLAG_FCOMMENT) { ret = dbuf_search_byte(c->infile, 0x00, pos, c->infile->len - pos, &foundpos); if(!ret) { de_err(c, "Invalid COMMENT field\n"); goto done; } pos = foundpos + 1; } if(d->flags & GZIPFLAG_FHCRC) { pos += 2; } de_dbg(c, "compressed blocks at %d\n", (int)pos); if(!d->output_file) { fi = de_finfo_create(c); if(member_name && c->filenames_from_file) { de_finfo_set_name_from_ucstring(c, fi, member_name); fi->original_filename_flag = 1; } if(mod_time_ts.is_valid) { fi->mod_time = mod_time_ts; } d->output_file = dbuf_create_output_file(c, member_name?NULL:"bin", fi, 0); } ret = de_uncompress_deflate(c->infile, pos, c->infile->len - pos, d->output_file, &cmpr_data_len); if(!ret) goto done; pos += cmpr_data_len; crc32_field = (de_uint32)de_getui32le(pos); de_dbg(c, "crc32: 0x%08x\n", (unsigned int)crc32_field); pos += 4; // TODO: Validate CRCs isize = de_getui32le(pos); de_dbg(c, "uncompressed size (mod 2^32): %u\n", (unsigned int)isize); pos += 4; retval = 1; done: if(retval) *member_size = pos - pos1; else *member_size = 0; ucstring_destroy(member_name); de_finfo_destroy(c, fi); de_dbg_indent_restore(c, saved_indent_level); return retval; }
static void do_psf2_unicode_table(deark *c, lctx *d, struct de_bitmap_font *font) { de_int64 cur_idx; de_int64 pos; int ret; de_int64 foundpos; de_int64 char_data_len; de_byte char_data_buf[200]; de_int32 ch; de_int64 utf8len; de_dbg(c, "Unicode table at %d\n", (int)d->unicode_table_pos); de_dbg_indent(c, 1); pos = d->unicode_table_pos; cur_idx = 0; while(1) { de_int64 pos_in_char_data; de_int64 cp_idx; if(cur_idx >= d->num_glyphs) break; if(pos >= c->infile->len) break; // Figure out the size of the data for this glyph ret = dbuf_search_byte(c->infile, 0xff, pos, c->infile->len - pos, &foundpos); if(!ret) break; char_data_len = foundpos - pos; if(char_data_len<0) char_data_len=0; else if(char_data_len>(de_int64)sizeof(char_data_buf)) char_data_len=(de_int64)sizeof(char_data_buf); // Read all the data for this glyph de_read(char_data_buf, pos, char_data_len); // Read the codepoints for this glyph cp_idx = 0; pos_in_char_data = 0; while(1) { if(pos_in_char_data >= char_data_len) break; ret = de_utf8_to_uchar(&char_data_buf[pos_in_char_data], char_data_len-pos_in_char_data, &ch, &utf8len); if(!ret) { // If there are any multi-codepoint aliases for this glyph, we // expect de_utf8_to_uchar() to fail when it hits the 0xfe byte. // So, this is not necessarily an error. break; } if(cp_idx==0) { // This is the primary Unicode codepoint for this glyph de_dbg2(c, "char[%d] = U+%04x\n", (int)cur_idx, (unsigned int)ch); font->char_array[cur_idx].codepoint_unicode = ch; } else { do_extra_codepoint(c, d, font, cur_idx, ch); } cp_idx++; pos_in_char_data += utf8len; } if(cp_idx==0) { de_warn(c, "Missing codepoint for char #%d\n", (int)cur_idx); } // Advance to the next glyph pos = foundpos+1; cur_idx++; } font->has_unicode_codepoints = 1; font->prefer_unicode = 1; de_dbg_indent(c, -1); }
// Generic (ImageMagick?) profile. Hex-encoded, with three header lines. static void on_im_generic_profile_main(deark *c, lctx *d, struct text_chunk_ctx *tcc, dbuf *inf, i64 pos1, i64 len) { int k; i64 pos = pos1; i64 dlen; int dump_to_file = 0; int decode_to_membuf = 0; const char *ext = NULL; // Skip the first three lines for(k=0; k<3; k++) { int ret; i64 foundpos = 0; ret = dbuf_search_byte(inf, 0x0a, pos, pos1+len-pos, &foundpos); if(!ret) goto done; pos = foundpos+1; } dlen = pos1+len-pos; if(tcc->im_generic_profile_type==PROFILETYPE_XMP) { dump_to_file = 1; ext = "xmp"; } else if(tcc->im_generic_profile_type==PROFILETYPE_8BIM) { decode_to_membuf = 1; } else if(tcc->im_generic_profile_type==PROFILETYPE_IPTC) { if(c->extract_level>=2) { dump_to_file = 1; ext = "iptc"; } else { decode_to_membuf = 1; } } else if(tcc->im_generic_profile_type==PROFILETYPE_ICC) { dump_to_file = 1; ext = "icc"; } else { if(c->extract_level>=2) { dump_to_file = 1; ext = "profile.bin"; } } if(dump_to_file) { dbuf *outf; outf = dbuf_create_output_file(c, ext?ext:"bin", NULL, DE_CREATEFLAG_IS_AUX); de_decode_base16(c, inf, pos, dlen, outf, 0); dbuf_close(outf); } if(decode_to_membuf) { dbuf *tmpf; tmpf = dbuf_create_membuf(c, 0, 0); de_decode_base16(c, inf, pos, dlen, tmpf, 0); if(tcc->im_generic_profile_type==PROFILETYPE_8BIM) { de_fmtutil_handle_photoshop_rsrc(c, tmpf, 0, tmpf->len, 0x0); } else if(tcc->im_generic_profile_type==PROFILETYPE_IPTC) { de_fmtutil_handle_iptc(c, tmpf, 0, tmpf->len, 0x0); } dbuf_close(tmpf); } done: ; }
// An advanced function for reading a string from a file. // The issue is that some strings are both human-readable and machine-readable. // In such a case, we'd like to read some data from a file into a nice printable // ucstring, while also making some or all of the raw bytes available, say for // byte-for-byte string comparisons. // Plus (for NUL-terminated/padded strings), we may need to know the actual length // of the string in the file, so that it can be skipped over, even if we don't // care about the whole string. // Caller is responsible for calling destroy_stringreader() on the returned value. // max_bytes_to_scan: The maximum number of bytes to read from the file. // max_bytes_to_keep: The maximum (or in some cases the exact) number of bytes, // not counting any NUL terminator, to return in ->sz. // The ->str field is a Unicode version of ->sz, so this also affects ->str. // If DE_CONVFLAG_STOP_AT_NUL is not set, it is assumed we are reading a string // of known length, that may have internal NUL bytes. The caller must set // max_bytes_to_scan and max_bytes_to_keep to the same value. The ->sz field will // always be allocated with this many bytes, plus one more for an artificial NUL // terminator. // If DE_CONVFLAG_WANT_UTF8 is set, then the ->sz_utf8 field will be set to a // UTF-8 version of ->str. This is mainly useful if the original string was // UTF-16. sz_utf8 is not "printable" -- use ucstring_get_printable_sz_n(str) for // that. // Recognized flags: // - DE_CONVFLAG_STOP_AT_NUL // - DE_CONVFLAG_WANT_UTF8 struct de_stringreaderdata *dbuf_read_string(dbuf *f, i64 pos, i64 max_bytes_to_scan, i64 max_bytes_to_keep, unsigned int flags, int encoding) { deark *c = f->c; struct de_stringreaderdata *srd; i64 foundpos = 0; int ret; i64 bytes_avail_to_read; i64 bytes_to_malloc; i64 x_strlen; srd = de_malloc(c, sizeof(struct de_stringreaderdata)); srd->str = ucstring_create(c); bytes_avail_to_read = max_bytes_to_scan; if(bytes_avail_to_read > f->len-pos) { bytes_avail_to_read = f->len-pos; } srd->bytes_consumed = bytes_avail_to_read; // default // From here on, we can safely bail out ("goto done"). The // de_stringreaderdata struct is sufficiently valid. if(!(flags&DE_CONVFLAG_STOP_AT_NUL) && (max_bytes_to_scan != max_bytes_to_keep)) { // To reduce possible confusion, we require that // max_bytes_to_scan==max_bytes_to_keep in this case. srd->sz = de_malloc(c, max_bytes_to_keep+1); goto done; } if(flags&DE_CONVFLAG_STOP_AT_NUL) { ret = dbuf_search_byte(f, 0x00, pos, bytes_avail_to_read, &foundpos); if(ret) { srd->found_nul = 1; } else { // No NUL byte found. Could be an error in some formats, but in // others NUL is used as separator or as padding, not a terminator. foundpos = pos+bytes_avail_to_read; } x_strlen = foundpos-pos; srd->bytes_consumed = x_strlen+1; } else { x_strlen = max_bytes_to_keep; srd->bytes_consumed = x_strlen; } bytes_to_malloc = x_strlen+1; if(bytes_to_malloc>(max_bytes_to_keep+1)) { bytes_to_malloc = max_bytes_to_keep+1; srd->was_truncated = 1; } srd->sz = de_malloc(c, bytes_to_malloc); dbuf_read(f, (u8*)srd->sz, pos, bytes_to_malloc-1); // The last byte remains NUL ucstring_append_bytes(srd->str, (const u8*)srd->sz, bytes_to_malloc-1, 0, encoding); if(flags&DE_CONVFLAG_WANT_UTF8) { srd->sz_utf8_strlen = (size_t)ucstring_count_utf8_bytes(srd->str); srd->sz_utf8 = de_malloc(c, srd->sz_utf8_strlen + 1); ucstring_to_sz(srd->str, srd->sz_utf8, srd->sz_utf8_strlen + 1, 0, DE_ENCODING_UTF8); } done: if(!srd->sz) { // Always return a valid sz, even on failure. srd->sz = de_malloc(c, 1); } if((flags&DE_CONVFLAG_WANT_UTF8) && !srd->sz_utf8) { // Always return a valid sz_utf8 if it was requested, even on failure. srd->sz_utf8 = de_malloc(c, 1); srd->sz_utf8_strlen = 0; } return srd; }