void ucstring_append_bytes(de_ucstring *s, const de_byte *buf, de_int64 buflen, unsigned int conv_flags, int encoding) { int ret; de_int64 pos = 0; de_int32 ch; de_int64 code_len; // Adjust buflen if necessary. if(conv_flags & DE_CONVFLAG_STOP_AT_NUL) { char *tmpp; tmpp = de_memchr(buf, 0, (size_t)buflen); if(tmpp) { buflen = (const de_byte*)tmpp - buf; } } while(pos<buflen) { if(encoding==DE_ENCODING_UTF8) { ret = de_utf8_to_uchar(&buf[pos], buflen-pos, &ch, &code_len); if(!ret) { ch = '_'; code_len = 1; } } else if(encoding==DE_ENCODING_UTF16LE) { ret = de_utf16x_to_uchar(&buf[pos], buflen-pos, &ch, &code_len, 1); if(!ret) { ch = '_'; code_len = 2; } } else if(encoding==DE_ENCODING_UTF16BE) { ret = de_utf16x_to_uchar(&buf[pos], buflen-pos, &ch, &code_len, 0); if(!ret) { ch = '_'; code_len = 2; } } else { ch = de_char_to_unicode(s->c, buf[pos], encoding); if(ch==DE_INVALID_CODEPOINT) { ch = '_'; } code_len = 1; } ucstring_append_char(s, ch); pos += code_len; } }
static void do_psf2_unicode_table(deark *c, lctx *d, struct de_bitmap_font *font) { de_int64 cur_idx; de_int64 pos; int ret; de_int64 foundpos; de_int64 char_data_len; de_byte char_data_buf[200]; de_int32 ch; de_int64 utf8len; de_dbg(c, "Unicode table at %d\n", (int)d->unicode_table_pos); de_dbg_indent(c, 1); pos = d->unicode_table_pos; cur_idx = 0; while(1) { de_int64 pos_in_char_data; de_int64 cp_idx; if(cur_idx >= d->num_glyphs) break; if(pos >= c->infile->len) break; // Figure out the size of the data for this glyph ret = dbuf_search_byte(c->infile, 0xff, pos, c->infile->len - pos, &foundpos); if(!ret) break; char_data_len = foundpos - pos; if(char_data_len<0) char_data_len=0; else if(char_data_len>(de_int64)sizeof(char_data_buf)) char_data_len=(de_int64)sizeof(char_data_buf); // Read all the data for this glyph de_read(char_data_buf, pos, char_data_len); // Read the codepoints for this glyph cp_idx = 0; pos_in_char_data = 0; while(1) { if(pos_in_char_data >= char_data_len) break; ret = de_utf8_to_uchar(&char_data_buf[pos_in_char_data], char_data_len-pos_in_char_data, &ch, &utf8len); if(!ret) { // If there are any multi-codepoint aliases for this glyph, we // expect de_utf8_to_uchar() to fail when it hits the 0xfe byte. // So, this is not necessarily an error. break; } if(cp_idx==0) { // This is the primary Unicode codepoint for this glyph de_dbg2(c, "char[%d] = U+%04x\n", (int)cur_idx, (unsigned int)ch); font->char_array[cur_idx].codepoint_unicode = ch; } else { do_extra_codepoint(c, d, font, cur_idx, ch); } cp_idx++; pos_in_char_data += utf8len; } if(cp_idx==0) { de_warn(c, "Missing codepoint for char #%d\n", (int)cur_idx); } // Advance to the next glyph pos = foundpos+1; cur_idx++; } font->has_unicode_codepoints = 1; font->prefer_unicode = 1; de_dbg_indent(c, -1); }