int ml_unicode_renderer(varnam *handle, vtoken *previous, vtoken *current, strbuf *output) { int rc; vtoken *virama; bool removed; rc = vst_get_virama (handle, &virama); if (rc) return rc; if (previous != NULL && current->type == VARNAM_TOKEN_VOWEL && strcmp(current->pattern, "r") == 0) { strbuf_add (output, current->value3); return VARNAM_SUCCESS; } if (strcmp(current->tag, RENDER_VALUE2_TAG) == 0 && previous != NULL) { #ifdef _VARNAM_VERBOSE varnam_debug (handle, "ml-unicode-renderer - Found %s tag", RENDER_VALUE2_TAG); #endif strbuf_add(output, current->value2); return VARNAM_SUCCESS; } if (current->type == VARNAM_TOKEN_VOWEL && previous != NULL && strcmp(previous->tag, CHIL_TAG) == 0) { removed = strbuf_remove_from_last (output, previous->value1); if (!removed) { removed = strbuf_remove_from_last (output, previous->value2); } if (removed) { strbuf_add (output, previous->value3); strbuf_add (output, current->value2); return VARNAM_SUCCESS; } } return VARNAM_PARTIAL_RENDERING; }
static strbuf* sanitize_word (varnam *handle, const char *word) { size_t i; bool is_special = false; strbuf *string, *to_remove; string = get_pooled_string (handle); to_remove = get_pooled_string (handle); strbuf_add (string, word); for (i = 0; i < string->length; i++) { is_special = is_special_character (string->buffer[i]); if (is_special) strbuf_addc (to_remove, string->buffer[i]); else break; } strbuf_remove_from_first (string, strbuf_to_s (to_remove)); strbuf_clear (to_remove); for (i = string->length - 1; i >= 0; i--) { is_special = is_special_character (string->buffer[i]); if (is_special) strbuf_addc (to_remove, string->buffer[i]); else break; } strbuf_remove_from_last (string, strbuf_to_s (to_remove)); language_specific_sanitization (string); /* Remove trailing ZWNJ and leading ZWJ */ strbuf_remove_from_first (string, ZWNJ()); strbuf_remove_from_last (string, ZWNJ()); strbuf_remove_from_first (string, ZWJ()); return string; }
static void resolve_token(varnam *handle, struct token *match, struct strbuf *string) { const char *virama = NULL; struct varnam_token_rendering *rule; int rc; char zwnj[] = {'\xe2', '\x80', '\x8c', '\0'}; assert(handle); assert(match); assert(string); /* will be fixed later */ /* if(handle->internal->virama[0] == '\0') { */ /* fill_general_values(handle, handle->internal->virama, "virama"); */ /* } */ virama = handle->internal->virama; rule = get_additional_rendering_rule(handle); if(rule != NULL) { rc = rule->render(handle, match, string); if(rc == VARNAM_SUCCESS) { return; } } if(strcmp(match->value1, handle->internal->virama) == 0) { /* we are resolving a virama. If the output ends with a virama already, add a ZWNJ to it, so that following character will not be combined. if output not ends with virama, add a virama and ZWNJ */ if(strbuf_endswith (string, virama)) { strbuf_add (string, zwnj); } else { strbuf_add (string, virama); strbuf_add (string, zwnj); } return; } if(match->type == VARNAM_TOKEN_VOWEL) { if(strbuf_endswith(string, virama)) { /* removing the virama and adding dependent vowel value */ strbuf_remove_from_last(string, virama); if(match->value2[0] != '\0') { strbuf_add(string, match->value2); } } else if(handle->internal->last_token_available) { strbuf_add(string, match->value2); } else { strbuf_add(string, match->value1); } } else { strbuf_add(string, match->value1); } }