static cst_val* word_to_phones(const cst_item *word) { cst_val*phones=NULL; const char *name=item_feat_string(word, "name"); ustring32_t letters=ustring32_alloc(0); if(letters==NULL) return NULL; ustring32_assign8(letters,(const uint8_t*)name); if(ustring32_empty(letters)) { ustring32_free(letters); return NULL; } unsigned int flags=classify_characters(ustring32_str(letters),ustring32_length(letters)); int variant=item_feat_int(item_parent(item_as(word,"Token")),"variant"); if((flags&cs_lc)&&cst_streq(ffeature_string(word,"gpos"),"content")) { if(variant==variant_pseudo_english) phones=ustring32_lts_apply(letters,&en_consonants_lts); else phones=ustring32_lts_apply(letters,&ru_consonants_lts); item_set_int(word,"no_vr",1); } else if((variant==variant_pseudo_english)&&(flags&cs_en)) { cst_val *en_phones=lex_lookup(en_lex,name,(cst_streq(name,"a")?"n":NULL)); if(en_phones) { phones=ru_lts_apply(en_phones,&ru_en_lts); delete_val(en_phones); } item_set_int(word,"no_pl",1); } else { const ru_dict_entry *e=bsearch(name,ru_dict,ru_dict_size,sizeof(ru_dict_entry),compare_entries); if(e!=NULL) { if(e->stress > 0) ustring32_set(letters,e->stress-1,1105); else item_set_int(word,"stressed_syl_num",e->stress); } phones=ustring32_lts_apply(letters,&ru_lts); } ustring32_free(letters); return phones; }
/* Creates a buffer with an embedding level for every character in the * given text. Also determines the base level and returns it in * *baseDir if *baseDir does not initially contain a valid direction. */ static fz_bidi_level * create_levels(fz_context *ctx, const uint32_t *text, size_t len, fz_bidi_direction *baseDir, int resolveWhiteSpace, int flags) { fz_bidi_level *levels; fz_bidi_chartype *types = NULL; fz_bidi_level baseLevel; levels = fz_malloc(ctx, len * sizeof(*levels)); fz_var(types); fz_try(ctx) { types = fz_malloc(ctx, len * sizeof(fz_bidi_chartype)); classify_characters(text, types, len, flags); if (*baseDir != FZ_BIDI_LTR && *baseDir != FZ_BIDI_RTL) { /* Derive the base level from the text and * update *baseDir in case the caller wants to know. */ baseLevel = base_level_from_text(types, len); *baseDir = ODD(baseLevel)==1 ? FZ_BIDI_RTL : FZ_BIDI_LTR; } else { baseLevel = (fz_bidi_level)*baseDir; } { /* Replace tab with base direction, i.e. make tab appear as * 'strong left' if the base direction is left-to-right and * 'strong right' if base direction is right-to-left. This * allows Layout to implicitly treat tabs as 'segment separators'. */ size_t i; for (i = 0u; i < len; i++) { if (text[i]=='\t') { types[i] = (*baseDir == FZ_BIDI_RTL) ? BDI_R : BDI_L; } } } /* Look for quotation marks. Classify them as RLE or LRE * or leave them alone, depending on what follows them. */ classify_quoted_blocks(text, types, len); /* Work out the levels and character types... */ (void)fz_bidi_resolve_explicit(baseLevel, BDI_N, types, levels, len, 0); fz_bidi_resolve_weak(ctx, baseLevel, types, levels, len); fz_bidi_resolve_neutrals(baseLevel,types, levels, len); fz_bidi_resolve_implicit(types, levels, len); classify_characters(text, types, len, BIDI_CLASSIFY_WHITE_SPACE); if (resolveWhiteSpace) { /* resolve whitespace */ fz_bidi_resolve_whitespace(baseLevel, types, levels, len); } /* The levels buffer now has odd and even numbers indicating * rtl or ltr characters, respectively. */ #ifdef DEBUG_BIDI_VERBOSE fprintf(stderr, "Levels: "); { size_t i; for (i = 0; i < len; i++) { fprintf(stderr, "%d", levels[i]>9?0:levels[i]); } fprintf(stderr, "\n"); } #endif } fz_always(ctx) { fz_free(ctx, types); } fz_catch(ctx) { fz_free(ctx, levels); fz_rethrow(ctx); } return levels; }