// Read (up to) len bytes from f, translate them to characters, and append // them to s. void dbuf_read_to_ucstring(dbuf *f, i64 pos, i64 len, de_ucstring *s, unsigned int conv_flags, int encoding) { u8 *buf = NULL; deark *c = f->c; if(conv_flags & DE_CONVFLAG_STOP_AT_NUL) { i64 foundpos = 0; if(dbuf_search_byte(f, 0x00, pos, len, &foundpos)) { len = foundpos - pos; } } buf = de_malloc(c, len); dbuf_read(f, buf, pos, len); ucstring_append_bytes(s, buf, len, 0, encoding); de_free(c, buf); }
void ucstring_append_sz(de_ucstring *s, const char *sz, int encoding) { de_int64 len; len = (de_int64)de_strlen(sz); ucstring_append_bytes(s, (const de_byte*)sz, len, 0, encoding); }
// An advanced function for reading a string from a file. // The issue is that some strings are both human-readable and machine-readable. // In such a case, we'd like to read some data from a file into a nice printable // ucstring, while also making some or all of the raw bytes available, say for // byte-for-byte string comparisons. // Plus (for NUL-terminated/padded strings), we may need to know the actual length // of the string in the file, so that it can be skipped over, even if we don't // care about the whole string. // Caller is responsible for calling destroy_stringreader() on the returned value. // max_bytes_to_scan: The maximum number of bytes to read from the file. // max_bytes_to_keep: The maximum (or in some cases the exact) number of bytes, // not counting any NUL terminator, to return in ->sz. // The ->str field is a Unicode version of ->sz, so this also affects ->str. // If DE_CONVFLAG_STOP_AT_NUL is not set, it is assumed we are reading a string // of known length, that may have internal NUL bytes. The caller must set // max_bytes_to_scan and max_bytes_to_keep to the same value. The ->sz field will // always be allocated with this many bytes, plus one more for an artificial NUL // terminator. // If DE_CONVFLAG_WANT_UTF8 is set, then the ->sz_utf8 field will be set to a // UTF-8 version of ->str. This is mainly useful if the original string was // UTF-16. sz_utf8 is not "printable" -- use ucstring_get_printable_sz_n(str) for // that. // Recognized flags: // - DE_CONVFLAG_STOP_AT_NUL // - DE_CONVFLAG_WANT_UTF8 struct de_stringreaderdata *dbuf_read_string(dbuf *f, i64 pos, i64 max_bytes_to_scan, i64 max_bytes_to_keep, unsigned int flags, int encoding) { deark *c = f->c; struct de_stringreaderdata *srd; i64 foundpos = 0; int ret; i64 bytes_avail_to_read; i64 bytes_to_malloc; i64 x_strlen; srd = de_malloc(c, sizeof(struct de_stringreaderdata)); srd->str = ucstring_create(c); bytes_avail_to_read = max_bytes_to_scan; if(bytes_avail_to_read > f->len-pos) { bytes_avail_to_read = f->len-pos; } srd->bytes_consumed = bytes_avail_to_read; // default // From here on, we can safely bail out ("goto done"). The // de_stringreaderdata struct is sufficiently valid. if(!(flags&DE_CONVFLAG_STOP_AT_NUL) && (max_bytes_to_scan != max_bytes_to_keep)) { // To reduce possible confusion, we require that // max_bytes_to_scan==max_bytes_to_keep in this case. srd->sz = de_malloc(c, max_bytes_to_keep+1); goto done; } if(flags&DE_CONVFLAG_STOP_AT_NUL) { ret = dbuf_search_byte(f, 0x00, pos, bytes_avail_to_read, &foundpos); if(ret) { srd->found_nul = 1; } else { // No NUL byte found. Could be an error in some formats, but in // others NUL is used as separator or as padding, not a terminator. foundpos = pos+bytes_avail_to_read; } x_strlen = foundpos-pos; srd->bytes_consumed = x_strlen+1; } else { x_strlen = max_bytes_to_keep; srd->bytes_consumed = x_strlen; } bytes_to_malloc = x_strlen+1; if(bytes_to_malloc>(max_bytes_to_keep+1)) { bytes_to_malloc = max_bytes_to_keep+1; srd->was_truncated = 1; } srd->sz = de_malloc(c, bytes_to_malloc); dbuf_read(f, (u8*)srd->sz, pos, bytes_to_malloc-1); // The last byte remains NUL ucstring_append_bytes(srd->str, (const u8*)srd->sz, bytes_to_malloc-1, 0, encoding); if(flags&DE_CONVFLAG_WANT_UTF8) { srd->sz_utf8_strlen = (size_t)ucstring_count_utf8_bytes(srd->str); srd->sz_utf8 = de_malloc(c, srd->sz_utf8_strlen + 1); ucstring_to_sz(srd->str, srd->sz_utf8, srd->sz_utf8_strlen + 1, 0, DE_ENCODING_UTF8); } done: if(!srd->sz) { // Always return a valid sz, even on failure. srd->sz = de_malloc(c, 1); } if((flags&DE_CONVFLAG_WANT_UTF8) && !srd->sz_utf8) { // Always return a valid sz_utf8 if it was requested, even on failure. srd->sz_utf8 = de_malloc(c, 1); srd->sz_utf8_strlen = 0; } return srd; }