Exemplo n.º 1
0
static cst_val* word_to_phones(const cst_item *word)
{
  cst_val*phones=NULL;
  const char *name=item_feat_string(word, "name");
  ustring32_t letters=ustring32_alloc(0);
  if(letters==NULL) return NULL;
  ustring32_assign8(letters,(const uint8_t*)name);
  if(ustring32_empty(letters))
    {
      ustring32_free(letters);
      return NULL;
    }
  unsigned int flags=classify_characters(ustring32_str(letters),ustring32_length(letters));
  int variant=item_feat_int(item_parent(item_as(word,"Token")),"variant");
  if((flags&cs_lc)&&cst_streq(ffeature_string(word,"gpos"),"content"))
    {
      if(variant==variant_pseudo_english)
        phones=ustring32_lts_apply(letters,&en_consonants_lts);
      else phones=ustring32_lts_apply(letters,&ru_consonants_lts);
      item_set_int(word,"no_vr",1);
    }
  else if((variant==variant_pseudo_english)&&(flags&cs_en))
    {
      cst_val *en_phones=lex_lookup(en_lex,name,(cst_streq(name,"a")?"n":NULL));
      if(en_phones)
        {
          phones=ru_lts_apply(en_phones,&ru_en_lts);
          delete_val(en_phones);
        }
      item_set_int(word,"no_pl",1);
    }
  else
    {
      const ru_dict_entry *e=bsearch(name,ru_dict,ru_dict_size,sizeof(ru_dict_entry),compare_entries);
      if(e!=NULL)
        {
          if(e->stress > 0)
            ustring32_set(letters,e->stress-1,1105);
          else
            item_set_int(word,"stressed_syl_num",e->stress);
        }
      phones=ustring32_lts_apply(letters,&ru_lts);
    }
  ustring32_free(letters);
  return phones;
}
Exemplo n.º 2
0
Arquivo: bidi.c Projeto: Enzime/mupdf
/* Creates a buffer with an embedding level for every character in the
 * given text. Also determines the base level and returns it in
 * *baseDir if *baseDir does not initially contain a valid direction.
 */
static fz_bidi_level *
create_levels(fz_context *ctx,
		const uint32_t *text,
		size_t len,
		fz_bidi_direction *baseDir,
		int resolveWhiteSpace,
		int flags)
{
	fz_bidi_level *levels;
	fz_bidi_chartype *types = NULL;
	fz_bidi_level baseLevel;

	levels = fz_malloc(ctx, len * sizeof(*levels));

	fz_var(types);

	fz_try(ctx)
	{
		types = fz_malloc(ctx, len * sizeof(fz_bidi_chartype));

		classify_characters(text, types, len, flags);

		if (*baseDir != FZ_BIDI_LTR && *baseDir != FZ_BIDI_RTL)
		{
			/* Derive the base level from the text and
			 * update *baseDir in case the caller wants to know.
			 */
			baseLevel = base_level_from_text(types, len);
			*baseDir = ODD(baseLevel)==1 ? FZ_BIDI_RTL : FZ_BIDI_LTR;
		}
		else
		{
			baseLevel = (fz_bidi_level)*baseDir;
		}

		{
			/* Replace tab with base direction, i.e. make tab appear as
			 * 'strong left' if the base direction is left-to-right and
			 * 'strong right' if base direction is right-to-left. This
			 * allows Layout to implicitly treat tabs as 'segment separators'.
			 */
			size_t i;

			for (i = 0u; i < len; i++)
			{
				if (text[i]=='\t')
				{
					types[i] = (*baseDir == FZ_BIDI_RTL) ? BDI_R : BDI_L;
				}
			}
		}

		/* Look for quotation marks. Classify them as RLE or LRE
		 * or leave them alone, depending on what follows them.
		 */
		classify_quoted_blocks(text, types, len);

		/* Work out the levels and character types... */
		(void)fz_bidi_resolve_explicit(baseLevel, BDI_N, types, levels, len, 0);
		fz_bidi_resolve_weak(ctx, baseLevel, types, levels, len);
		fz_bidi_resolve_neutrals(baseLevel,types, levels, len);
		fz_bidi_resolve_implicit(types, levels, len);

		classify_characters(text, types, len, BIDI_CLASSIFY_WHITE_SPACE);

		if (resolveWhiteSpace)
		{
			/* resolve whitespace */
			fz_bidi_resolve_whitespace(baseLevel, types, levels, len);
		}

		/* The levels buffer now has odd and even numbers indicating
		 * rtl or ltr characters, respectively.
		 */
#ifdef DEBUG_BIDI_VERBOSE
		fprintf(stderr, "Levels: ");
		{
			size_t i;
			for (i = 0; i < len; i++)
			{
				fprintf(stderr, "%d", levels[i]>9?0:levels[i]);
			}
			fprintf(stderr, "\n");
		}
#endif
	}
	fz_always(ctx)
	{
		fz_free(ctx, types);
	}
	fz_catch(ctx)
	{
		fz_free(ctx, levels);
		fz_rethrow(ctx);
	}
	return levels;
}