int unicode_lb_lookup(unicode_char ch)
{
	return unicode_tab_lookup(ch,
				  unicode_indextab,
				  sizeof(unicode_indextab)
				  / sizeof(unicode_indextab[0]),
				  unicode_rangetab,
				  unicode_classtab,
				  UNICODE_LB_AL /* XX, LB1 */);
}
示例#2
0
int unicode_grapheme_break(unicode_char a, unicode_char b)
{
	uint8_t ac=unicode_tab_lookup(a, unicode_indextab,
			 sizeof(unicode_indextab)/sizeof(unicode_indextab[0]),
			 unicode_rangetab,
			 unicode_classtab,
			 UNICODE_GRAPHEMEBREAK_ANY),
		bc=unicode_tab_lookup(b, unicode_indextab,
			 sizeof(unicode_indextab)/sizeof(unicode_indextab[0]),
			 unicode_rangetab,
			 unicode_classtab,
			 UNICODE_GRAPHEMEBREAK_ANY);

	/* GB1 and GB2 are implied */

	if (ac == UNICODE_GRAPHEMEBREAK_CR && bc == UNICODE_GRAPHEMEBREAK_LF)
		return 0; /* GB3 */


	switch (ac) {
	case UNICODE_GRAPHEMEBREAK_CR:
	case UNICODE_GRAPHEMEBREAK_LF:
	case UNICODE_GRAPHEMEBREAK_Control:
		return 1; /* GB4 */
	default:
		break;
	}

	switch (bc) {
	case UNICODE_GRAPHEMEBREAK_CR:
	case UNICODE_GRAPHEMEBREAK_LF:
	case UNICODE_GRAPHEMEBREAK_Control:
		return 1; /* GB5 */
	default:
		break;
	}

	if (ac == UNICODE_GRAPHEMEBREAK_L)
		switch (bc) {
		case UNICODE_GRAPHEMEBREAK_L:
		case UNICODE_GRAPHEMEBREAK_V:
		case UNICODE_GRAPHEMEBREAK_LV:
		case UNICODE_GRAPHEMEBREAK_LVT:
			return 0; /* GB6 */
		}

	if ((ac == UNICODE_GRAPHEMEBREAK_LV ||
	     ac == UNICODE_GRAPHEMEBREAK_V) &&
	    (bc == UNICODE_GRAPHEMEBREAK_V ||
	     bc == UNICODE_GRAPHEMEBREAK_T))
		return 0; /* GB7 */

	if ((ac == UNICODE_GRAPHEMEBREAK_LVT ||
	     ac == UNICODE_GRAPHEMEBREAK_T) &&
	    bc == UNICODE_GRAPHEMEBREAK_T)
		return 0; /* GB8 */

	if (bc == UNICODE_GRAPHEMEBREAK_Extend)
		return 0; /* GB9 */

	if (bc == UNICODE_GRAPHEMEBREAK_SpacingMark)
		return 0; /* GB9a */

	if (ac == UNICODE_GRAPHEMEBREAK_Prepend)
		return 0; /* GB9b */

	return 1; /* GB10 */
}