static inline const char* utf8_next(const char *str) { int n = utf8_char_len(str); while (n > 0) { str++; if (*str == '\0') return str; n--; } return str; }
t_size strstr_ex(const char * p_string,t_size p_string_len,const char * p_substring,t_size p_substring_len) throw() { p_string_len = strlen_max(p_string,p_string_len); p_substring_len = strlen_max(p_substring,p_substring_len); t_size index = 0; while(index + p_substring_len <= p_string_len) { if (memcmp(p_string+index,p_substring,p_substring_len) == 0) return index; t_size delta = utf8_char_len(p_string+index,p_string_len - index); if (delta == 0) break; index += delta; } return ~0; }
// utf8 to LENDIAN unicode static status_t _utf8_to_lendian_unicode( const char *src, size_t *srcLen, char *dst, size_t *dstLen) { size_t srcLimit = *srcLen; size_t dstLimit = *dstLen - 1; size_t srcCount = 0; size_t dstCount = 0; status_t status = B_ERROR; while ((srcCount < srcLimit) && (dstCount < dstLimit)) { uint16 unicode; uint16 *UNICODE = &unicode; uchar *UTF8 = (uchar *)src + srcCount; int err_flag; if ((srcCount + utf8_char_len(src[srcCount])) > srcLimit) break; utf8_to_u_hostendian(UTF8, UNICODE, err_flag); if (err_flag == 1) return EINVAL; unicode = B_HOST_TO_LENDIAN_INT16(unicode); if ((dstCount + 1) > dstLimit) { status = B_BUFFER_OVERFLOW; break; } dst[dstCount++] = unicode & 0xFF; dst[dstCount++] = unicode >> 8; srcCount += UTF8 - ((uchar *)(src + srcCount)); status = B_OK; } *srcLen = srcCount; *dstLen = dstCount; return status; }
int main(void) { init(); void * input = new(File, "./utf8-example"); void * content = File_read(input); uint8_t * string = Object_inspect(content); printf("[%s]\n", string); uint8_t buffer[30]; buffer[0] = '\0'; size_t len = strlen(string); size_t n; for(int i = 0; i < len; i += n) { n = utf8_char_len(string + i); strncpy(buffer, string + i, n); buffer[n] = '\0'; printf("[%s]\n", buffer); } return 0; }
// utf8 to LENDIAN unicode static status_t _utf8_to_lendian_unicode( const char *src, int32 *srcLen, char *dst, uint32 *dstLen) { int32 srcLimit = *srcLen; int32 dstLimit = *dstLen - 1; int32 srcCount = 0; int32 dstCount = 0; while ((srcCount < srcLimit) && (dstCount < dstLimit)) { uint16 unicode; uint16 *UNICODE = &unicode; uchar *UTF8 = (uchar *)src + srcCount; int err_flag; if ((srcCount + utf8_char_len(src[srcCount])) > srcLimit) break; utf8_to_u_hostendian(UTF8, UNICODE, err_flag); if(err_flag == 1) return EINVAL; unicode = B_HOST_TO_LENDIAN_INT16(unicode); dst[dstCount++] = unicode & 0xFF; dst[dstCount++] = unicode >> 8; srcCount += UTF8 - ((uchar *)(src + srcCount)); } *srcLen = srcCount; *dstLen = dstCount; return ((dstCount > 0) ? B_NO_ERROR : B_ERROR); }
utf8string::iterator::self& utf8string::iterator::operator++() { base_it += utf8_char_len(*base_it); return *this; }