Exemple #1
0
// Read (up to) len bytes from f, translate them to characters, and append
// them to s.
void dbuf_read_to_ucstring(dbuf *f, i64 pos, i64 len,
	de_ucstring *s, unsigned int conv_flags, int encoding)
{
	u8 *buf = NULL;
	deark *c = f->c;

	if(conv_flags & DE_CONVFLAG_STOP_AT_NUL) {
		i64 foundpos = 0;
		if(dbuf_search_byte(f, 0x00, pos, len, &foundpos)) {
			len = foundpos - pos;
		}
	}

	buf = de_malloc(c, len);
	dbuf_read(f, buf, pos, len);
	ucstring_append_bytes(s, buf, len, 0, encoding);
	de_free(c, buf);
}
Exemple #2
0
static int do_gzip_read_member(deark *c, lctx *d, i64 pos1, i64 *member_size)
{
	u8 b0, b1;
	i64 pos;
	i64 n;
	i64 foundpos;
	i64 string_len;
	i64 cmpr_data_len;
	i64 mod_time_unix;
	u32 crc_calculated;
	de_ucstring *member_name = NULL;
	int saved_indent_level;
	int ret;
	struct member_data *md = NULL;
	int retval = 0;

	md = de_malloc(c, sizeof(struct member_data));

	de_dbg_indent_save(c, &saved_indent_level);

	de_dbg(c, "gzip member at %d", (int)pos1);
	de_dbg_indent(c, 1);
	pos = pos1;

	b0 = de_getbyte(pos+0);
	b1 = de_getbyte(pos+1);
	if(b0!=0x1f || b1!=0x8b) {
		de_err(c, "Invalid gzip signature at %d. This is not a valid gzip file.",
			(int)pos1);
		goto done;
	}

	md->cmpr_code = de_getbyte(pos+2);
	if(md->cmpr_code!=0x08) {
		de_err(c, "Unsupported compression type (%d)", (int)md->cmpr_code);
		goto done;
	}

	md->flags = de_getbyte(pos+3);
	de_dbg(c, "flags: 0x%02x", (unsigned int)md->flags);
	pos += 4;

	mod_time_unix = de_getu32le(pos);
	de_unix_time_to_timestamp(mod_time_unix, &md->mod_time_ts, 0x1);
	if(md->mod_time_ts.is_valid) {
		char timestamp_buf[64];
		de_timestamp_to_string(&md->mod_time_ts, timestamp_buf, sizeof(timestamp_buf), 0);
		de_dbg(c, "mod time: %" I64_FMT " (%s)", mod_time_unix, timestamp_buf);
	}
	pos += 4;

	b0 = de_getbyte(pos++);
	de_dbg(c, "extra flags: 0x%02x", (unsigned int)b0);

	b0 = de_getbyte(pos++);
	de_dbg(c, "OS or filesystem: %d (%s)", (int)b0, get_os_name(b0));

	if(md->flags & GZIPFLAG_FEXTRA) {
		n = de_getu16le(pos); // XLEN
		// TODO: It might be interesting to dissect these extra fields, but it's
		// hard to find even a single file that uses them.
		de_dbg(c, "[extra fields at %d, dpos=%d, dlen=%d]",
			(int)pos, (int)(pos+2), (int)n);
		pos += 2;
		pos += n;
	}

	if(md->flags & GZIPFLAG_FNAME) {
		ret =  dbuf_search_byte(c->infile, 0x00, pos, c->infile->len - pos,
			&foundpos);
		if(!ret) {
			de_err(c, "Invalid NAME field");
			goto done;
		}

		string_len = foundpos - pos;

		member_name = ucstring_create(c);
#define DE_GZIP_MAX_FNLEN 300
		dbuf_read_to_ucstring_n(c->infile, pos, string_len, DE_GZIP_MAX_FNLEN,
			member_name, 0, DE_ENCODING_LATIN1);
		de_dbg(c, "file name at %d, len=%d: \"%s\"", (int)pos, (int)string_len,
			ucstring_getpsz_d(member_name));
		pos = foundpos + 1;
	}

	if(md->flags & GZIPFLAG_FCOMMENT) {
		ret =  dbuf_search_byte(c->infile, 0x00, pos, c->infile->len - pos,
			&foundpos);
		if(!ret) {
			de_err(c, "Invalid COMMENT field");
			goto done;
		}
		pos = foundpos + 1;
	}

	if(md->flags & GZIPFLAG_FHCRC) {
		md->crc16_reported = (u32)de_getu16le(pos);
		de_dbg(c, "crc16 (reported): 0x%04x", (unsigned int)md->crc16_reported);
		pos += 2;
	}

	de_dbg(c, "compressed blocks at %d", (int)pos);

	if(!d->output_file) {
		// Although any member can have a name and mod time, this metadata
		// is ignored for members after the first one.
		de_finfo *fi = NULL;

		fi = de_finfo_create(c);

		if(member_name && c->filenames_from_file) {
			de_finfo_set_name_from_ucstring(c, fi, member_name, 0);
			fi->original_filename_flag = 1;
		}

		if(md->mod_time_ts.is_valid) {
			fi->mod_time = md->mod_time_ts;
		}

		d->output_file = dbuf_create_output_file(c, member_name?NULL:"bin", fi, 0);

		de_finfo_destroy(c, fi);
	}

	d->output_file->writecallback_fn = our_writecallback;
	d->output_file->userdata = (void*)md;
	md->crco = d->crco;
	de_crcobj_reset(md->crco);

	ret = de_decompress_deflate(c->infile, pos, c->infile->len - pos, d->output_file,
		0, &cmpr_data_len, 0);

	crc_calculated = de_crcobj_getval(md->crco);
	d->output_file->writecallback_fn = NULL;
	d->output_file->userdata = NULL;

	if(!ret) goto done;
	pos += cmpr_data_len;

	de_dbg(c, "crc32 (calculated): 0x%08x", (unsigned int)crc_calculated);

	md->crc32_reported = (u32)de_getu32le(pos);
	de_dbg(c, "crc32 (reported)  : 0x%08x", (unsigned int)md->crc32_reported);
	pos += 4;

	if(crc_calculated != md->crc32_reported) {
		de_warn(c, "CRC check failed: Expected 0x%08x, got 0x%08x",
			(unsigned int)md->crc32_reported, (unsigned int)crc_calculated);
	}

	md->isize = de_getu32le(pos);
	de_dbg(c, "uncompressed size (mod 2^32): %u", (unsigned int)md->isize);
	pos += 4;

	retval = 1;

done:
	if(retval)
		*member_size = pos - pos1;
	else
		*member_size = 0;
	ucstring_destroy(member_name);
	de_free(c, md);
	de_dbg_indent_restore(c, saved_indent_level);
	return retval;
}
Exemple #3
0
static int do_gzip_read_member(deark *c, lctx *d, de_int64 pos1, de_int64 *member_size)
{
	de_byte b0, b1;
	de_int64 cmpr_code;
	de_int64 pos;
	de_int64 n;
	de_int64 foundpos;
	de_int64 string_len;
	de_int64 cmpr_data_len;
	de_int64 isize;
	de_int64 mod_time_unix;
	struct de_timestamp mod_time_ts;
	de_uint32 crc32_field;
	de_ucstring *member_name = NULL;
	de_finfo *fi = NULL;
	int saved_indent_level;
	int ret;
	int retval = 0;

	mod_time_ts.is_valid = 0;

	de_dbg_indent_save(c, &saved_indent_level);

	de_dbg(c, "gzip member at %d\n", (int)pos1);
	de_dbg_indent(c, 1);
	pos = pos1;

	b0 = de_getbyte(pos+0);
	b1 = de_getbyte(pos+1);
	if(b0!=0x1f || b1!=0x8b) {
		de_err(c, "Invalid gzip signature at %d. This is not a valid gzip file.\n",
			(int)pos1);
		goto done;
	}

	cmpr_code=de_getbyte(pos+2);
	if(cmpr_code!=0x08) {
		de_err(c, "Unsupported compression type (%d)\n", (int)cmpr_code);
		goto done;
	}

	d->flags = de_getbyte(pos+3);
	de_dbg(c, "flags: 0x%02x\n", (unsigned int)d->flags);
	pos += 4;

	mod_time_unix = de_getui32le(pos);
	de_unix_time_to_timestamp(mod_time_unix, &mod_time_ts);
	if(mod_time_ts.is_valid) {
		char timestamp_buf[64];
		de_timestamp_to_string(&mod_time_ts, timestamp_buf, sizeof(timestamp_buf), 1);
		de_dbg(c, "mod time: %" INT64_FMT " (%s)\n", mod_time_unix, timestamp_buf);
	}
	pos += 4;

	b0 = de_getbyte(pos++);
	de_dbg(c, "extra flags: 0x%02x\n", (unsigned int)b0);

	b0 = de_getbyte(pos++);
	de_dbg(c, "OS or filesystem: %d (%s)\n", (int)b0, get_os_name(b0));

	if(d->flags & GZIPFLAG_FEXTRA) {
		n = de_getui16le(pos); // XLEN
		// TODO: It might be interesting to dissect these extra fields, but it's
		// hard to find even a single file that uses them.
		de_dbg(c, "[extra fields at %d, dpos=%d, dlen=%d]\n",
			(int)pos, (int)(pos+2), (int)n);
		pos += 2;
		pos += n;
	}

	if(d->flags & GZIPFLAG_FNAME) {
		ret =  dbuf_search_byte(c->infile, 0x00, pos, c->infile->len - pos,
			&foundpos);
		if(!ret) {
			de_err(c, "Invalid NAME field\n");
			goto done;
		}

		string_len = foundpos - pos;

		member_name = ucstring_create(c);
		dbuf_read_to_ucstring_n(c->infile, pos, string_len, 300, member_name, 0, DE_ENCODING_LATIN1);
		de_dbg(c, "file name at %d, len=%d: \"%s\"\n", (int)pos, (int)string_len,
			ucstring_get_printable_sz(member_name));
		pos = foundpos + 1;
	}

	if(d->flags & GZIPFLAG_FCOMMENT) {
		ret =  dbuf_search_byte(c->infile, 0x00, pos, c->infile->len - pos,
			&foundpos);
		if(!ret) {
			de_err(c, "Invalid COMMENT field\n");
			goto done;
		}
		pos = foundpos + 1;
	}

	if(d->flags & GZIPFLAG_FHCRC) {
		pos += 2;
	}

	de_dbg(c, "compressed blocks at %d\n", (int)pos);

	if(!d->output_file) {
		fi = de_finfo_create(c);

		if(member_name && c->filenames_from_file) {
			de_finfo_set_name_from_ucstring(c, fi, member_name);
			fi->original_filename_flag = 1;
		}

		if(mod_time_ts.is_valid) {
			fi->mod_time = mod_time_ts;
		}

		d->output_file = dbuf_create_output_file(c, member_name?NULL:"bin", fi, 0);
	}

	ret = de_uncompress_deflate(c->infile, pos, c->infile->len - pos, d->output_file, &cmpr_data_len);

	if(!ret) goto done;
	pos += cmpr_data_len;

	crc32_field = (de_uint32)de_getui32le(pos);
	de_dbg(c, "crc32: 0x%08x\n", (unsigned int)crc32_field);
	pos += 4;
	// TODO: Validate CRCs

	isize = de_getui32le(pos);
	de_dbg(c, "uncompressed size (mod 2^32): %u\n", (unsigned int)isize);
	pos += 4;

	retval = 1;

done:
	if(retval)
		*member_size = pos - pos1;
	else
		*member_size = 0;
	ucstring_destroy(member_name);
	de_finfo_destroy(c, fi);
	de_dbg_indent_restore(c, saved_indent_level);
	return retval;
}
Exemple #4
0
static void do_psf2_unicode_table(deark *c, lctx *d, struct de_bitmap_font *font)
{
	de_int64 cur_idx;
	de_int64 pos;
	int ret;
	de_int64 foundpos;
	de_int64 char_data_len;
	de_byte char_data_buf[200];
	de_int32 ch;
	de_int64 utf8len;

	de_dbg(c, "Unicode table at %d\n", (int)d->unicode_table_pos);
	de_dbg_indent(c, 1);

	pos = d->unicode_table_pos;
	cur_idx = 0;
	while(1) {
		de_int64 pos_in_char_data;
		de_int64 cp_idx;

		if(cur_idx >= d->num_glyphs) break;
		if(pos >= c->infile->len) break;

		// Figure out the size of the data for this glyph
		ret = dbuf_search_byte(c->infile, 0xff, pos,
			c->infile->len - pos, &foundpos);
		if(!ret) break;
		char_data_len = foundpos - pos;
		if(char_data_len<0) char_data_len=0;
		else if(char_data_len>(de_int64)sizeof(char_data_buf)) char_data_len=(de_int64)sizeof(char_data_buf);

		// Read all the data for this glyph
		de_read(char_data_buf, pos, char_data_len);

		// Read the codepoints for this glyph
		cp_idx = 0;
		pos_in_char_data = 0;
		while(1) {
			if(pos_in_char_data >= char_data_len) break;

			ret = de_utf8_to_uchar(&char_data_buf[pos_in_char_data], char_data_len-pos_in_char_data,
				&ch, &utf8len);
			if(!ret) {
				// If there are any multi-codepoint aliases for this glyph, we
				// expect de_utf8_to_uchar() to fail when it hits the 0xfe byte.
				// So, this is not necessarily an error.
				break;
			}

			if(cp_idx==0) {
				// This is the primary Unicode codepoint for this glyph
				de_dbg2(c, "char[%d] = U+%04x\n", (int)cur_idx, (unsigned int)ch);
				font->char_array[cur_idx].codepoint_unicode = ch;
			}
			else {
				do_extra_codepoint(c, d, font, cur_idx, ch);
			}

			cp_idx++;
			pos_in_char_data += utf8len;
		}

		if(cp_idx==0) {
			de_warn(c, "Missing codepoint for char #%d\n", (int)cur_idx);
		}

		// Advance to the next glyph
		pos = foundpos+1;
		cur_idx++;
	}

	font->has_unicode_codepoints = 1;
	font->prefer_unicode = 1;

	de_dbg_indent(c, -1);
}
Exemple #5
0
// Generic (ImageMagick?) profile. Hex-encoded, with three header lines.
static void on_im_generic_profile_main(deark *c, lctx *d,
	struct text_chunk_ctx *tcc, dbuf *inf, i64 pos1, i64 len)
{
	int k;
	i64 pos = pos1;
	i64 dlen;
	int dump_to_file = 0;
	int decode_to_membuf = 0;
	const char *ext = NULL;

	// Skip the first three lines
	for(k=0; k<3; k++) {
		int ret;
		i64 foundpos = 0;
		ret = dbuf_search_byte(inf, 0x0a, pos, pos1+len-pos, &foundpos);
		if(!ret) goto done;
		pos = foundpos+1;
	}
	dlen = pos1+len-pos;

	if(tcc->im_generic_profile_type==PROFILETYPE_XMP) {
		dump_to_file = 1;
		ext = "xmp";
	}
	else if(tcc->im_generic_profile_type==PROFILETYPE_8BIM) {
		decode_to_membuf = 1;
	}
	else if(tcc->im_generic_profile_type==PROFILETYPE_IPTC) {
		if(c->extract_level>=2) {
			dump_to_file = 1;
			ext = "iptc";
		}
		else {
			decode_to_membuf = 1;
		}
	}
	else if(tcc->im_generic_profile_type==PROFILETYPE_ICC) {
		dump_to_file = 1;
		ext = "icc";
	}
	else {
		if(c->extract_level>=2) {
			dump_to_file = 1;
			ext = "profile.bin";
		}
	}

	if(dump_to_file) {
		dbuf *outf;
		outf = dbuf_create_output_file(c, ext?ext:"bin", NULL, DE_CREATEFLAG_IS_AUX);
		de_decode_base16(c, inf, pos, dlen, outf, 0);
		dbuf_close(outf);
	}

	if(decode_to_membuf) {
		dbuf *tmpf;

		tmpf = dbuf_create_membuf(c, 0, 0);
		de_decode_base16(c, inf, pos, dlen, tmpf, 0);

		if(tcc->im_generic_profile_type==PROFILETYPE_8BIM) {
			de_fmtutil_handle_photoshop_rsrc(c, tmpf, 0, tmpf->len, 0x0);
		}
		else if(tcc->im_generic_profile_type==PROFILETYPE_IPTC) {
			de_fmtutil_handle_iptc(c, tmpf, 0, tmpf->len, 0x0);
		}

		dbuf_close(tmpf);
	}

done:
	;
}
Exemple #6
0
// An advanced function for reading a string from a file.
// The issue is that some strings are both human-readable and machine-readable.
// In such a case, we'd like to read some data from a file into a nice printable
// ucstring, while also making some or all of the raw bytes available, say for
// byte-for-byte string comparisons.
// Plus (for NUL-terminated/padded strings), we may need to know the actual length
// of the string in the file, so that it can be skipped over, even if we don't
// care about the whole string.
// Caller is responsible for calling destroy_stringreader() on the returned value.
//  max_bytes_to_scan: The maximum number of bytes to read from the file.
//  max_bytes_to_keep: The maximum (or in some cases the exact) number of bytes,
//   not counting any NUL terminator, to return in ->sz.
//   The ->str field is a Unicode version of ->sz, so this also affects ->str.
// If DE_CONVFLAG_STOP_AT_NUL is not set, it is assumed we are reading a string
// of known length, that may have internal NUL bytes. The caller must set
// max_bytes_to_scan and max_bytes_to_keep to the same value. The ->sz field will
// always be allocated with this many bytes, plus one more for an artificial NUL
// terminator.
// If DE_CONVFLAG_WANT_UTF8 is set, then the ->sz_utf8 field will be set to a
// UTF-8 version of ->str. This is mainly useful if the original string was
// UTF-16. sz_utf8 is not "printable" -- use ucstring_get_printable_sz_n(str) for
// that.
// Recognized flags:
//   - DE_CONVFLAG_STOP_AT_NUL
//   - DE_CONVFLAG_WANT_UTF8
struct de_stringreaderdata *dbuf_read_string(dbuf *f, i64 pos,
	i64 max_bytes_to_scan,
	i64 max_bytes_to_keep,
	unsigned int flags, int encoding)
{
	deark *c = f->c;
	struct de_stringreaderdata *srd;
	i64 foundpos = 0;
	int ret;
	i64 bytes_avail_to_read;
	i64 bytes_to_malloc;
	i64 x_strlen;

	srd = de_malloc(c, sizeof(struct de_stringreaderdata));
	srd->str = ucstring_create(c);

	bytes_avail_to_read = max_bytes_to_scan;
	if(bytes_avail_to_read > f->len-pos) {
		bytes_avail_to_read = f->len-pos;
	}

	srd->bytes_consumed = bytes_avail_to_read; // default

	// From here on, we can safely bail out ("goto done"). The
	// de_stringreaderdata struct is sufficiently valid.

	if(!(flags&DE_CONVFLAG_STOP_AT_NUL) &&
		(max_bytes_to_scan != max_bytes_to_keep))
	{
		// To reduce possible confusion, we require that
		// max_bytes_to_scan==max_bytes_to_keep in this case.
		srd->sz = de_malloc(c, max_bytes_to_keep+1);
		goto done;
	}

	if(flags&DE_CONVFLAG_STOP_AT_NUL) {
		ret = dbuf_search_byte(f, 0x00, pos, bytes_avail_to_read, &foundpos);
		if(ret) {
			srd->found_nul = 1;
		}
		else {
			// No NUL byte found. Could be an error in some formats, but in
			// others NUL is used as separator or as padding, not a terminator.
			foundpos = pos+bytes_avail_to_read;
		}

		x_strlen = foundpos-pos;
		srd->bytes_consumed = x_strlen+1;
	}
	else {
		x_strlen = max_bytes_to_keep;
		srd->bytes_consumed = x_strlen;
	}

	bytes_to_malloc = x_strlen+1;
	if(bytes_to_malloc>(max_bytes_to_keep+1)) {
		bytes_to_malloc = max_bytes_to_keep+1;
		srd->was_truncated = 1;
	}

	srd->sz = de_malloc(c, bytes_to_malloc);
	dbuf_read(f, (u8*)srd->sz, pos, bytes_to_malloc-1); // The last byte remains NUL

	ucstring_append_bytes(srd->str, (const u8*)srd->sz, bytes_to_malloc-1, 0, encoding);

	if(flags&DE_CONVFLAG_WANT_UTF8) {
		srd->sz_utf8_strlen = (size_t)ucstring_count_utf8_bytes(srd->str);
		srd->sz_utf8 = de_malloc(c, srd->sz_utf8_strlen + 1);
		ucstring_to_sz(srd->str, srd->sz_utf8, srd->sz_utf8_strlen + 1, 0, DE_ENCODING_UTF8);
	}

done:
	if(!srd->sz) {
		// Always return a valid sz, even on failure.
		srd->sz = de_malloc(c, 1);
	}
	if((flags&DE_CONVFLAG_WANT_UTF8) && !srd->sz_utf8) {
		// Always return a valid sz_utf8 if it was requested, even on failure.
		srd->sz_utf8 = de_malloc(c, 1);
		srd->sz_utf8_strlen = 0;
	}
	return srd;
}