static int jl_id_start_char(uint32_t wc) { if ((wc >= 'A' && wc <= 'Z') || (wc >= 'a' && wc <= 'z') || wc == '_') return 1; if (wc < 0xA1 || wc > 0x10ffff) return 0; const utf8proc_property_t *prop = utf8proc_get_property(wc); return is_wc_cat_id_start(wc, prop->category); }
static void test_utf8proc_get_property ( void ) { int32_t unicode_char; const utf8proc_property_t* properties; for ( unicode_char = 0x0000; unicode_char <= 0x10FFFF; ++unicode_char ) { properties = utf8proc_get_property ( unicode_char ); rtems_test_assert ( NULL != properties ); } }
static int jl_id_char(uint32_t wc) { if ((wc >= 'A' && wc <= 'Z') || (wc >= 'a' && wc <= 'z') || wc == '_' || (wc >= '0' && wc <= '9') || wc == '!') return 1; if (wc < 0xA1 || wc > 0x10ffff) return 0; const utf8proc_property_t *prop = utf8proc_get_property(wc); utf8proc_propval_t cat = prop->category; if (is_wc_cat_id_start(wc, cat)) return 1; if (cat == UTF8PROC_CATEGORY_MN || cat == UTF8PROC_CATEGORY_MC || cat == UTF8PROC_CATEGORY_ND || cat == UTF8PROC_CATEGORY_PC || cat == UTF8PROC_CATEGORY_SK || cat == UTF8PROC_CATEGORY_ME || cat == UTF8PROC_CATEGORY_NO || // primes (wc >= 0x2032 && wc <= 0x2034) || // Other_ID_Continue wc == 0x0387 || wc == 0x19da || (wc >= 0x1369 && wc <= 0x1371)) return 1; return 0; }
unsigned int utf8_glypheme_length(const char *p) { int32_t cp; int len = utf8proc_iterate((uint8_t *) p, -1, &cp); if (len == UTF8PROC_ERROR_INVALIDUTF8) return 1; // Check for combining characters const char *p2 = p + len; while (*p2) { int comb_len = utf8proc_iterate((uint8_t *) p2, -1, &cp); if (comb_len == UTF8PROC_ERROR_INVALIDUTF8) return 1; const utf8proc_property_t* prop = utf8proc_get_property(cp); if (!prop->combining_class) break; len += comb_len; p2 += comb_len; } return len; }