Пример #1
0
// If has_max!=0, uses no more than max_chars Unicode characters from s to create the
// printable string.
static void ucstring_to_printable_sz_internal(de_ucstring *s, char *szbuf, size_t szbuf_len,
	int has_max, de_int64 max_chars)
{
	de_ucstring *s2 = NULL;

	s2 = ucstring_clone(s);
	if(has_max) {
		// TODO: Maybe this should add an ellipsis, or something.
		ucstring_truncate(s2, max_chars);
	}
	ucstring_make_printable(s2);
	ucstring_to_sz(s2, szbuf, szbuf_len, DE_ENCODING_UTF8);
	ucstring_destroy(s2);
}
Пример #2
0
int ucstring_strcmp(de_ucstring *s, const char *s2, int encoding)
{
	size_t s2len;
	char *tmpbuf;
	int ret;

	if(!s && !s2) return 0;
	if(!s || !s2) return 1;

	s2len = de_strlen(s2);
	tmpbuf = de_malloc(s->c, s2len+1);
	ucstring_to_sz(s, tmpbuf, s2len+1, encoding);
	ret = de_strcmp(tmpbuf, tmpbuf);
	de_free(s->c, tmpbuf);
	return ret;
}
Пример #3
0
dbuf *dbuf_create_output_file(deark *c, const char *ext, de_finfo *fi,
	unsigned int createflags)
{
	char nbuf[500];
	char msgbuf[200];
	dbuf *f;
	const char *basefn;
	int file_index;
	u8 is_directory = 0;
	char *name_from_finfo = NULL;
	i64 name_from_finfo_len = 0;

	if(ext && fi && fi->original_filename_flag) {
		de_dbg(c, "[internal warning: Incorrect use of create_output_file]");
	}

	f = de_malloc(c, sizeof(dbuf));
	f->c = c;
	f->max_len_hard = c->max_output_file_size;
	f->is_managed = 1;

	if(fi && fi->is_directory) {
		is_directory = 1;
	}

	if(is_directory && !c->keep_dir_entries) {
		de_dbg(c, "skipping 'directory' file");
		f->btype = DBUF_TYPE_NULL;
		goto done;
	}

	if(c->extract_policy==DE_EXTRACTPOLICY_MAINONLY) {
		if(createflags&DE_CREATEFLAG_IS_AUX) {
			de_dbg(c, "skipping 'auxiliary' file");
			f->btype = DBUF_TYPE_NULL;
			goto done;
		}
	}
	else if(c->extract_policy==DE_EXTRACTPOLICY_AUXONLY) {
		if(!(createflags&DE_CREATEFLAG_IS_AUX)) {
			de_dbg(c, "skipping 'main' file");
			f->btype = DBUF_TYPE_NULL;
			goto done;
		}
	}

	file_index = c->file_count;
	c->file_count++;

	basefn = c->base_output_filename ? c->base_output_filename : "output";

	if(fi && ucstring_isnonempty(fi->file_name_internal)) {
		name_from_finfo_len = 1 + ucstring_count_utf8_bytes(fi->file_name_internal);
		name_from_finfo = de_malloc(c, name_from_finfo_len);
		ucstring_to_sz(fi->file_name_internal, name_from_finfo, (size_t)name_from_finfo_len, 0,
			DE_ENCODING_UTF8);
	}

	if(c->output_style==DE_OUTPUTSTYLE_ARCHIVE && !c->base_output_filename &&
		fi && fi->is_directory &&
		(fi->is_root_dir || (fi->detect_root_dot_dir && fi->orig_name_was_dot)))
	{
		de_strlcpy(nbuf, ".", sizeof(nbuf));
	}
	else if(c->output_style==DE_OUTPUTSTYLE_ARCHIVE && !c->base_output_filename &&
		fi && fi->original_filename_flag && name_from_finfo)
	{
		// TODO: This is a "temporary" hack to allow us to, when both reading from
		// and writing to an archive format, use some semblance of the correct
		// filename (instead of "output.xxx.yyy").
		// There are some things that we don't handle optimally, such as
		// subdirectories.
		// A major redesign of the file naming logic would be good.
		de_strlcpy(nbuf, name_from_finfo, sizeof(nbuf));
	}
	else {
		char fn_suffix[256];

		if(ext && name_from_finfo) {
			de_snprintf(fn_suffix, sizeof(fn_suffix), "%s.%s", name_from_finfo, ext);
		}
		else if(ext) {
			de_strlcpy(fn_suffix, ext, sizeof(fn_suffix));
		}
		else if(is_directory && name_from_finfo) {
			de_snprintf(fn_suffix, sizeof(fn_suffix), "%s.dir", name_from_finfo);
		}
		else if(name_from_finfo) {
			de_strlcpy(fn_suffix, name_from_finfo, sizeof(fn_suffix));
		}
		else if(is_directory) {
			de_strlcpy(fn_suffix, "dir", sizeof(fn_suffix));
		}
		else {
			de_strlcpy(fn_suffix, "bin", sizeof(fn_suffix));
		}

		de_snprintf(nbuf, sizeof(nbuf), "%s.%03d.%s", basefn, file_index, fn_suffix);
	}

	f->name = de_strdup(c, nbuf);

	if(fi) {
		// The finfo object passed to us at file creation is not required to
		// remain valid, so make a copy of anything in it that we might need
		// later.
		f->fi_copy = de_finfo_create(c);
		finfo_shallow_copy(c, fi, f->fi_copy);

		// Here's where we respect the -intz option, by using it to convert to
		// UTC in some cases.
		if(f->fi_copy->mod_time.is_valid && f->fi_copy->mod_time.tzcode==DE_TZCODE_LOCAL &&
			c->input_tz_offs_seconds!=0)
		{
			de_timestamp_cvt_to_utc(&f->fi_copy->mod_time, -c->input_tz_offs_seconds);
		}

		if(f->fi_copy->image_mod_time.is_valid && f->fi_copy->image_mod_time.tzcode==DE_TZCODE_LOCAL &&
			c->input_tz_offs_seconds!=0)
		{
			de_timestamp_cvt_to_utc(&f->fi_copy->image_mod_time, -c->input_tz_offs_seconds);
		}
	}

	if(file_index < c->first_output_file) {
		f->btype = DBUF_TYPE_NULL;
		goto done;
	}

	if(c->max_output_files>=0 &&
		file_index >= c->first_output_file + c->max_output_files)
	{
		f->btype = DBUF_TYPE_NULL;
		goto done;
	}

	c->num_files_extracted++;

	if(c->extrlist_dbuf) {
		dbuf_printf(c->extrlist_dbuf, "%s\n", f->name);
		dbuf_flush(c->extrlist_dbuf);
	}

	if(c->list_mode) {
		f->btype = DBUF_TYPE_NULL;
		if(c->list_mode_include_file_id) {
			de_msg(c, "%d:%s", file_index, f->name);
		}
		else {
			de_msg(c, "%s", f->name);
		}
		goto done;
	}

	if(c->output_style==DE_OUTPUTSTYLE_ARCHIVE && c->archive_fmt==DE_ARCHIVEFMT_TAR) {
		de_info(c, "Adding %s to TAR file", f->name);
		f->btype = DBUF_TYPE_ODBUF;
		// A dummy max_len_hard value. The parent will do the checking.
		f->max_len_hard = DE_DUMMY_MAX_FILE_SIZE;
		f->writing_to_tar_archive = 1;
		de_tar_start_member_file(c, f);
	}
	else if(c->output_style==DE_OUTPUTSTYLE_ARCHIVE) { // ZIP
		i64 initial_alloc;
		de_info(c, "Adding %s to ZIP file", f->name);
		f->btype = DBUF_TYPE_MEMBUF;
		f->max_len_hard = DE_MAX_MEMBUF_SIZE;
		if(is_directory) {
			// A directory entry is not expected to have any data associated
			// with it (besides the files it contains).
			initial_alloc = 16;
		}
		else {
			initial_alloc = 65536;
		}
		f->membuf_buf = de_malloc(c, initial_alloc);
		f->membuf_alloc = initial_alloc;
		f->write_memfile_to_zip_archive = 1;
	}
	else if(c->output_style==DE_OUTPUTSTYLE_STDOUT) {
		de_info(c, "Writing %s to [stdout]", f->name);
		f->btype = DBUF_TYPE_STDOUT;
		// TODO: Should we increase f->max_len_hard?
		f->fp = stdout;
	}
	else {
		de_info(c, "Writing %s", f->name);
		f->btype = DBUF_TYPE_OFILE;
		f->fp = de_fopen_for_write(c, f->name, msgbuf, sizeof(msgbuf),
			c->overwrite_mode, 0);

		if(!f->fp) {
			de_err(c, "Failed to write %s: %s", f->name, msgbuf);
			f->btype = DBUF_TYPE_NULL;
		}
	}

done:
	de_free(c, name_from_finfo);
	return f;
}
Пример #4
0
// An advanced function for reading a string from a file.
// The issue is that some strings are both human-readable and machine-readable.
// In such a case, we'd like to read some data from a file into a nice printable
// ucstring, while also making some or all of the raw bytes available, say for
// byte-for-byte string comparisons.
// Plus (for NUL-terminated/padded strings), we may need to know the actual length
// of the string in the file, so that it can be skipped over, even if we don't
// care about the whole string.
// Caller is responsible for calling destroy_stringreader() on the returned value.
//  max_bytes_to_scan: The maximum number of bytes to read from the file.
//  max_bytes_to_keep: The maximum (or in some cases the exact) number of bytes,
//   not counting any NUL terminator, to return in ->sz.
//   The ->str field is a Unicode version of ->sz, so this also affects ->str.
// If DE_CONVFLAG_STOP_AT_NUL is not set, it is assumed we are reading a string
// of known length, that may have internal NUL bytes. The caller must set
// max_bytes_to_scan and max_bytes_to_keep to the same value. The ->sz field will
// always be allocated with this many bytes, plus one more for an artificial NUL
// terminator.
// If DE_CONVFLAG_WANT_UTF8 is set, then the ->sz_utf8 field will be set to a
// UTF-8 version of ->str. This is mainly useful if the original string was
// UTF-16. sz_utf8 is not "printable" -- use ucstring_get_printable_sz_n(str) for
// that.
// Recognized flags:
//   - DE_CONVFLAG_STOP_AT_NUL
//   - DE_CONVFLAG_WANT_UTF8
struct de_stringreaderdata *dbuf_read_string(dbuf *f, i64 pos,
	i64 max_bytes_to_scan,
	i64 max_bytes_to_keep,
	unsigned int flags, int encoding)
{
	deark *c = f->c;
	struct de_stringreaderdata *srd;
	i64 foundpos = 0;
	int ret;
	i64 bytes_avail_to_read;
	i64 bytes_to_malloc;
	i64 x_strlen;

	srd = de_malloc(c, sizeof(struct de_stringreaderdata));
	srd->str = ucstring_create(c);

	bytes_avail_to_read = max_bytes_to_scan;
	if(bytes_avail_to_read > f->len-pos) {
		bytes_avail_to_read = f->len-pos;
	}

	srd->bytes_consumed = bytes_avail_to_read; // default

	// From here on, we can safely bail out ("goto done"). The
	// de_stringreaderdata struct is sufficiently valid.

	if(!(flags&DE_CONVFLAG_STOP_AT_NUL) &&
		(max_bytes_to_scan != max_bytes_to_keep))
	{
		// To reduce possible confusion, we require that
		// max_bytes_to_scan==max_bytes_to_keep in this case.
		srd->sz = de_malloc(c, max_bytes_to_keep+1);
		goto done;
	}

	if(flags&DE_CONVFLAG_STOP_AT_NUL) {
		ret = dbuf_search_byte(f, 0x00, pos, bytes_avail_to_read, &foundpos);
		if(ret) {
			srd->found_nul = 1;
		}
		else {
			// No NUL byte found. Could be an error in some formats, but in
			// others NUL is used as separator or as padding, not a terminator.
			foundpos = pos+bytes_avail_to_read;
		}

		x_strlen = foundpos-pos;
		srd->bytes_consumed = x_strlen+1;
	}
	else {
		x_strlen = max_bytes_to_keep;
		srd->bytes_consumed = x_strlen;
	}

	bytes_to_malloc = x_strlen+1;
	if(bytes_to_malloc>(max_bytes_to_keep+1)) {
		bytes_to_malloc = max_bytes_to_keep+1;
		srd->was_truncated = 1;
	}

	srd->sz = de_malloc(c, bytes_to_malloc);
	dbuf_read(f, (u8*)srd->sz, pos, bytes_to_malloc-1); // The last byte remains NUL

	ucstring_append_bytes(srd->str, (const u8*)srd->sz, bytes_to_malloc-1, 0, encoding);

	if(flags&DE_CONVFLAG_WANT_UTF8) {
		srd->sz_utf8_strlen = (size_t)ucstring_count_utf8_bytes(srd->str);
		srd->sz_utf8 = de_malloc(c, srd->sz_utf8_strlen + 1);
		ucstring_to_sz(srd->str, srd->sz_utf8, srd->sz_utf8_strlen + 1, 0, DE_ENCODING_UTF8);
	}

done:
	if(!srd->sz) {
		// Always return a valid sz, even on failure.
		srd->sz = de_malloc(c, 1);
	}
	if((flags&DE_CONVFLAG_WANT_UTF8) && !srd->sz_utf8) {
		// Always return a valid sz_utf8 if it was requested, even on failure.
		srd->sz_utf8 = de_malloc(c, 1);
		srd->sz_utf8_strlen = 0;
	}
	return srd;
}