Пример #1
0
void ucstring_append_bytes(de_ucstring *s, const de_byte *buf, de_int64 buflen,
	unsigned int conv_flags, int encoding)
{
	int ret;
	de_int64 pos = 0;
	de_int32 ch;
	de_int64 code_len;

	// Adjust buflen if necessary.
	if(conv_flags & DE_CONVFLAG_STOP_AT_NUL) {
		char *tmpp;
		tmpp = de_memchr(buf, 0, (size_t)buflen);
		if(tmpp) {
			buflen = (const de_byte*)tmpp - buf;
		}
	}

	while(pos<buflen) {
		if(encoding==DE_ENCODING_UTF8) {
			ret = de_utf8_to_uchar(&buf[pos], buflen-pos, &ch, &code_len);
			if(!ret) {
				ch = '_';
				code_len = 1;
			}
		}
		else if(encoding==DE_ENCODING_UTF16LE) {
			ret = de_utf16x_to_uchar(&buf[pos], buflen-pos, &ch, &code_len, 1);
			if(!ret) {
				ch = '_';
				code_len = 2;
			}
		}
		else if(encoding==DE_ENCODING_UTF16BE) {
			ret = de_utf16x_to_uchar(&buf[pos], buflen-pos, &ch, &code_len, 0);
			if(!ret) {
				ch = '_';
				code_len = 2;
			}
		}
		else {
			ch = de_char_to_unicode(s->c, buf[pos], encoding);
			if(ch==DE_INVALID_CODEPOINT) {
				ch = '_';
			}
			code_len = 1;
		}
		ucstring_append_char(s, ch);
		pos += code_len;
	}
}
Пример #2
0
static void do_psf2_unicode_table(deark *c, lctx *d, struct de_bitmap_font *font)
{
	de_int64 cur_idx;
	de_int64 pos;
	int ret;
	de_int64 foundpos;
	de_int64 char_data_len;
	de_byte char_data_buf[200];
	de_int32 ch;
	de_int64 utf8len;

	de_dbg(c, "Unicode table at %d\n", (int)d->unicode_table_pos);
	de_dbg_indent(c, 1);

	pos = d->unicode_table_pos;
	cur_idx = 0;
	while(1) {
		de_int64 pos_in_char_data;
		de_int64 cp_idx;

		if(cur_idx >= d->num_glyphs) break;
		if(pos >= c->infile->len) break;

		// Figure out the size of the data for this glyph
		ret = dbuf_search_byte(c->infile, 0xff, pos,
			c->infile->len - pos, &foundpos);
		if(!ret) break;
		char_data_len = foundpos - pos;
		if(char_data_len<0) char_data_len=0;
		else if(char_data_len>(de_int64)sizeof(char_data_buf)) char_data_len=(de_int64)sizeof(char_data_buf);

		// Read all the data for this glyph
		de_read(char_data_buf, pos, char_data_len);

		// Read the codepoints for this glyph
		cp_idx = 0;
		pos_in_char_data = 0;
		while(1) {
			if(pos_in_char_data >= char_data_len) break;

			ret = de_utf8_to_uchar(&char_data_buf[pos_in_char_data], char_data_len-pos_in_char_data,
				&ch, &utf8len);
			if(!ret) {
				// If there are any multi-codepoint aliases for this glyph, we
				// expect de_utf8_to_uchar() to fail when it hits the 0xfe byte.
				// So, this is not necessarily an error.
				break;
			}

			if(cp_idx==0) {
				// This is the primary Unicode codepoint for this glyph
				de_dbg2(c, "char[%d] = U+%04x\n", (int)cur_idx, (unsigned int)ch);
				font->char_array[cur_idx].codepoint_unicode = ch;
			}
			else {
				do_extra_codepoint(c, d, font, cur_idx, ch);
			}

			cp_idx++;
			pos_in_char_data += utf8len;
		}

		if(cp_idx==0) {
			de_warn(c, "Missing codepoint for char #%d\n", (int)cur_idx);
		}

		// Advance to the next glyph
		pos = foundpos+1;
		cur_idx++;
	}

	font->has_unicode_codepoints = 1;
	font->prefer_unicode = 1;

	de_dbg_indent(c, -1);
}