/* int utf8( in, out[6] ); * * in is character code 0x0 -> 0x7FFFFFFF * int is number of characters for output * */ int utf8_encode( unsigned int value, unsigned char out[6] ) { int i; for ( i=1; i<6; ++i ) out[i] = 0x80; /* 10xxxxxx */ if ( value < 0x80 ) { out[0] = 0x0; /* 0xxxxxxx */ utf8_build( value, out, 25, 1 ); return 1; } else if ( value < 0x800 ) { out[0] = 0xC0; /* 110xxxxx */ utf8_build( value, out, 21, 3 ); return 2; } else if ( value < 0x10000 ) { out[0] = 0xE0; /* 1110xxxx */ utf8_build( value, out, 16, 4 ); return 3; } else if ( value < 0x200000 ) { out[0] = 0xF0; /* 11110xxx */ utf8_build( value, out, 11, 5 ); return 4; } else if ( value < 0x4000000 ) { out[0] = 0xF8; /* 111110xx */ utf8_build( value, out, 6, 6 ); return 5; } else if ( value < (unsigned int ) 0x80000000 ) { out[0] = 0xFC; /* 1111110x */ utf8_build( value, out, 1, 7 ); return 6; } else { /* error, above 2^31 bits encodable by UTF-8 */ return 0; } }
/* Lookup width of UTF-8 data in tree. */ u_int utf8_width(u_int uc) { struct utf8_width_entry *item; if (utf8_width_root == NULL) utf8_build(); item = utf8_width_root; while (item != NULL) { if (uc < item->first) item = item->left; else if (uc > item->last) item = item->right; else return (item->width); } return (1); }