// If has_max!=0, uses no more than max_chars Unicode characters from s to create the // printable string. static void ucstring_to_printable_sz_internal(de_ucstring *s, char *szbuf, size_t szbuf_len, int has_max, de_int64 max_chars) { de_ucstring *s2 = NULL; s2 = ucstring_clone(s); if(has_max) { // TODO: Maybe this should add an ellipsis, or something. ucstring_truncate(s2, max_chars); } ucstring_make_printable(s2); ucstring_to_sz(s2, szbuf, szbuf_len, DE_ENCODING_UTF8); ucstring_destroy(s2); }
int ucstring_strcmp(de_ucstring *s, const char *s2, int encoding) { size_t s2len; char *tmpbuf; int ret; if(!s && !s2) return 0; if(!s || !s2) return 1; s2len = de_strlen(s2); tmpbuf = de_malloc(s->c, s2len+1); ucstring_to_sz(s, tmpbuf, s2len+1, encoding); ret = de_strcmp(tmpbuf, tmpbuf); de_free(s->c, tmpbuf); return ret; }
dbuf *dbuf_create_output_file(deark *c, const char *ext, de_finfo *fi, unsigned int createflags) { char nbuf[500]; char msgbuf[200]; dbuf *f; const char *basefn; int file_index; u8 is_directory = 0; char *name_from_finfo = NULL; i64 name_from_finfo_len = 0; if(ext && fi && fi->original_filename_flag) { de_dbg(c, "[internal warning: Incorrect use of create_output_file]"); } f = de_malloc(c, sizeof(dbuf)); f->c = c; f->max_len_hard = c->max_output_file_size; f->is_managed = 1; if(fi && fi->is_directory) { is_directory = 1; } if(is_directory && !c->keep_dir_entries) { de_dbg(c, "skipping 'directory' file"); f->btype = DBUF_TYPE_NULL; goto done; } if(c->extract_policy==DE_EXTRACTPOLICY_MAINONLY) { if(createflags&DE_CREATEFLAG_IS_AUX) { de_dbg(c, "skipping 'auxiliary' file"); f->btype = DBUF_TYPE_NULL; goto done; } } else if(c->extract_policy==DE_EXTRACTPOLICY_AUXONLY) { if(!(createflags&DE_CREATEFLAG_IS_AUX)) { de_dbg(c, "skipping 'main' file"); f->btype = DBUF_TYPE_NULL; goto done; } } file_index = c->file_count; c->file_count++; basefn = c->base_output_filename ? c->base_output_filename : "output"; if(fi && ucstring_isnonempty(fi->file_name_internal)) { name_from_finfo_len = 1 + ucstring_count_utf8_bytes(fi->file_name_internal); name_from_finfo = de_malloc(c, name_from_finfo_len); ucstring_to_sz(fi->file_name_internal, name_from_finfo, (size_t)name_from_finfo_len, 0, DE_ENCODING_UTF8); } if(c->output_style==DE_OUTPUTSTYLE_ARCHIVE && !c->base_output_filename && fi && fi->is_directory && (fi->is_root_dir || (fi->detect_root_dot_dir && fi->orig_name_was_dot))) { de_strlcpy(nbuf, ".", sizeof(nbuf)); } else if(c->output_style==DE_OUTPUTSTYLE_ARCHIVE && !c->base_output_filename && fi && fi->original_filename_flag && name_from_finfo) { // TODO: This is a "temporary" hack to allow us to, when both reading from // and writing to an archive format, use some semblance of the correct // filename (instead of "output.xxx.yyy"). // There are some things that we don't handle optimally, such as // subdirectories. // A major redesign of the file naming logic would be good. de_strlcpy(nbuf, name_from_finfo, sizeof(nbuf)); } else { char fn_suffix[256]; if(ext && name_from_finfo) { de_snprintf(fn_suffix, sizeof(fn_suffix), "%s.%s", name_from_finfo, ext); } else if(ext) { de_strlcpy(fn_suffix, ext, sizeof(fn_suffix)); } else if(is_directory && name_from_finfo) { de_snprintf(fn_suffix, sizeof(fn_suffix), "%s.dir", name_from_finfo); } else if(name_from_finfo) { de_strlcpy(fn_suffix, name_from_finfo, sizeof(fn_suffix)); } else if(is_directory) { de_strlcpy(fn_suffix, "dir", sizeof(fn_suffix)); } else { de_strlcpy(fn_suffix, "bin", sizeof(fn_suffix)); } de_snprintf(nbuf, sizeof(nbuf), "%s.%03d.%s", basefn, file_index, fn_suffix); } f->name = de_strdup(c, nbuf); if(fi) { // The finfo object passed to us at file creation is not required to // remain valid, so make a copy of anything in it that we might need // later. f->fi_copy = de_finfo_create(c); finfo_shallow_copy(c, fi, f->fi_copy); // Here's where we respect the -intz option, by using it to convert to // UTC in some cases. if(f->fi_copy->mod_time.is_valid && f->fi_copy->mod_time.tzcode==DE_TZCODE_LOCAL && c->input_tz_offs_seconds!=0) { de_timestamp_cvt_to_utc(&f->fi_copy->mod_time, -c->input_tz_offs_seconds); } if(f->fi_copy->image_mod_time.is_valid && f->fi_copy->image_mod_time.tzcode==DE_TZCODE_LOCAL && c->input_tz_offs_seconds!=0) { de_timestamp_cvt_to_utc(&f->fi_copy->image_mod_time, -c->input_tz_offs_seconds); } } if(file_index < c->first_output_file) { f->btype = DBUF_TYPE_NULL; goto done; } if(c->max_output_files>=0 && file_index >= c->first_output_file + c->max_output_files) { f->btype = DBUF_TYPE_NULL; goto done; } c->num_files_extracted++; if(c->extrlist_dbuf) { dbuf_printf(c->extrlist_dbuf, "%s\n", f->name); dbuf_flush(c->extrlist_dbuf); } if(c->list_mode) { f->btype = DBUF_TYPE_NULL; if(c->list_mode_include_file_id) { de_msg(c, "%d:%s", file_index, f->name); } else { de_msg(c, "%s", f->name); } goto done; } if(c->output_style==DE_OUTPUTSTYLE_ARCHIVE && c->archive_fmt==DE_ARCHIVEFMT_TAR) { de_info(c, "Adding %s to TAR file", f->name); f->btype = DBUF_TYPE_ODBUF; // A dummy max_len_hard value. The parent will do the checking. f->max_len_hard = DE_DUMMY_MAX_FILE_SIZE; f->writing_to_tar_archive = 1; de_tar_start_member_file(c, f); } else if(c->output_style==DE_OUTPUTSTYLE_ARCHIVE) { // ZIP i64 initial_alloc; de_info(c, "Adding %s to ZIP file", f->name); f->btype = DBUF_TYPE_MEMBUF; f->max_len_hard = DE_MAX_MEMBUF_SIZE; if(is_directory) { // A directory entry is not expected to have any data associated // with it (besides the files it contains). initial_alloc = 16; } else { initial_alloc = 65536; } f->membuf_buf = de_malloc(c, initial_alloc); f->membuf_alloc = initial_alloc; f->write_memfile_to_zip_archive = 1; } else if(c->output_style==DE_OUTPUTSTYLE_STDOUT) { de_info(c, "Writing %s to [stdout]", f->name); f->btype = DBUF_TYPE_STDOUT; // TODO: Should we increase f->max_len_hard? f->fp = stdout; } else { de_info(c, "Writing %s", f->name); f->btype = DBUF_TYPE_OFILE; f->fp = de_fopen_for_write(c, f->name, msgbuf, sizeof(msgbuf), c->overwrite_mode, 0); if(!f->fp) { de_err(c, "Failed to write %s: %s", f->name, msgbuf); f->btype = DBUF_TYPE_NULL; } } done: de_free(c, name_from_finfo); return f; }
// An advanced function for reading a string from a file. // The issue is that some strings are both human-readable and machine-readable. // In such a case, we'd like to read some data from a file into a nice printable // ucstring, while also making some or all of the raw bytes available, say for // byte-for-byte string comparisons. // Plus (for NUL-terminated/padded strings), we may need to know the actual length // of the string in the file, so that it can be skipped over, even if we don't // care about the whole string. // Caller is responsible for calling destroy_stringreader() on the returned value. // max_bytes_to_scan: The maximum number of bytes to read from the file. // max_bytes_to_keep: The maximum (or in some cases the exact) number of bytes, // not counting any NUL terminator, to return in ->sz. // The ->str field is a Unicode version of ->sz, so this also affects ->str. // If DE_CONVFLAG_STOP_AT_NUL is not set, it is assumed we are reading a string // of known length, that may have internal NUL bytes. The caller must set // max_bytes_to_scan and max_bytes_to_keep to the same value. The ->sz field will // always be allocated with this many bytes, plus one more for an artificial NUL // terminator. // If DE_CONVFLAG_WANT_UTF8 is set, then the ->sz_utf8 field will be set to a // UTF-8 version of ->str. This is mainly useful if the original string was // UTF-16. sz_utf8 is not "printable" -- use ucstring_get_printable_sz_n(str) for // that. // Recognized flags: // - DE_CONVFLAG_STOP_AT_NUL // - DE_CONVFLAG_WANT_UTF8 struct de_stringreaderdata *dbuf_read_string(dbuf *f, i64 pos, i64 max_bytes_to_scan, i64 max_bytes_to_keep, unsigned int flags, int encoding) { deark *c = f->c; struct de_stringreaderdata *srd; i64 foundpos = 0; int ret; i64 bytes_avail_to_read; i64 bytes_to_malloc; i64 x_strlen; srd = de_malloc(c, sizeof(struct de_stringreaderdata)); srd->str = ucstring_create(c); bytes_avail_to_read = max_bytes_to_scan; if(bytes_avail_to_read > f->len-pos) { bytes_avail_to_read = f->len-pos; } srd->bytes_consumed = bytes_avail_to_read; // default // From here on, we can safely bail out ("goto done"). The // de_stringreaderdata struct is sufficiently valid. if(!(flags&DE_CONVFLAG_STOP_AT_NUL) && (max_bytes_to_scan != max_bytes_to_keep)) { // To reduce possible confusion, we require that // max_bytes_to_scan==max_bytes_to_keep in this case. srd->sz = de_malloc(c, max_bytes_to_keep+1); goto done; } if(flags&DE_CONVFLAG_STOP_AT_NUL) { ret = dbuf_search_byte(f, 0x00, pos, bytes_avail_to_read, &foundpos); if(ret) { srd->found_nul = 1; } else { // No NUL byte found. Could be an error in some formats, but in // others NUL is used as separator or as padding, not a terminator. foundpos = pos+bytes_avail_to_read; } x_strlen = foundpos-pos; srd->bytes_consumed = x_strlen+1; } else { x_strlen = max_bytes_to_keep; srd->bytes_consumed = x_strlen; } bytes_to_malloc = x_strlen+1; if(bytes_to_malloc>(max_bytes_to_keep+1)) { bytes_to_malloc = max_bytes_to_keep+1; srd->was_truncated = 1; } srd->sz = de_malloc(c, bytes_to_malloc); dbuf_read(f, (u8*)srd->sz, pos, bytes_to_malloc-1); // The last byte remains NUL ucstring_append_bytes(srd->str, (const u8*)srd->sz, bytes_to_malloc-1, 0, encoding); if(flags&DE_CONVFLAG_WANT_UTF8) { srd->sz_utf8_strlen = (size_t)ucstring_count_utf8_bytes(srd->str); srd->sz_utf8 = de_malloc(c, srd->sz_utf8_strlen + 1); ucstring_to_sz(srd->str, srd->sz_utf8, srd->sz_utf8_strlen + 1, 0, DE_ENCODING_UTF8); } done: if(!srd->sz) { // Always return a valid sz, even on failure. srd->sz = de_malloc(c, 1); } if((flags&DE_CONVFLAG_WANT_UTF8) && !srd->sz_utf8) { // Always return a valid sz_utf8 if it was requested, even on failure. srd->sz_utf8 = de_malloc(c, 1); srd->sz_utf8_strlen = 0; } return srd; }