Exemplo n.º 1
0
static int WordSylSeg(cst_utterance *u)
{
    cst_item *word;
    cst_relation *sylstructure,*seg,*syl;
    cst_val *phones;
    const cst_val *p;
    cst_item *ssword,*segitem;
    
    syl = utt_relation_create(u,"Syllable");
    sylstructure = utt_relation_create(u,"SylStructure");
    seg = utt_relation_create(u,"Segment");
    
    for (word=relation_head(utt_relation(u,"Word")); 
	 word; word=item_next(word))
    {
	printf("word: %s\n",item_feat_string(word,"name"));
	ssword = relation_append(sylstructure,word);
	phones = lex_lookup((cst_lexicon *)&cmu_lex,item_feat_string(word,"name"),0);
	for (p=phones; p; p=val_cdr(p))
	{
	    segitem = relation_append(seg,NULL);
	    item_set(segitem,"name",val_car(p));
	    printf("seg: %s\n",item_feat_string(segitem,"name"));
	    item_add_daughter(ssword,segitem);
	}
	delete_val_list(phones);
    }

    return TRUE;

}
Exemplo n.º 2
0
static cst_val* word_to_phones(const cst_item *word)
{
  cst_val*phones=NULL;
  const char *name=item_feat_string(word, "name");
  ustring32_t letters=ustring32_alloc(0);
  if(letters==NULL) return NULL;
  ustring32_assign8(letters,(const uint8_t*)name);
  if(ustring32_empty(letters))
    {
      ustring32_free(letters);
      return NULL;
    }
  unsigned int flags=classify_characters(ustring32_str(letters),ustring32_length(letters));
  int variant=item_feat_int(item_parent(item_as(word,"Token")),"variant");
  if((flags&cs_lc)&&cst_streq(ffeature_string(word,"gpos"),"content"))
    {
      if(variant==variant_pseudo_english)
        phones=ustring32_lts_apply(letters,&en_consonants_lts);
      else phones=ustring32_lts_apply(letters,&ru_consonants_lts);
      item_set_int(word,"no_vr",1);
    }
  else if((variant==variant_pseudo_english)&&(flags&cs_en))
    {
      cst_val *en_phones=lex_lookup(en_lex,name,(cst_streq(name,"a")?"n":NULL));
      if(en_phones)
        {
          phones=ru_lts_apply(en_phones,&ru_en_lts);
          delete_val(en_phones);
        }
      item_set_int(word,"no_pl",1);
    }
  else
    {
      const ru_dict_entry *e=bsearch(name,ru_dict,ru_dict_size,sizeof(ru_dict_entry),compare_entries);
      if(e!=NULL)
        {
          if(e->stress > 0)
            ustring32_set(letters,e->stress-1,1105);
          else
            item_set_int(word,"stressed_syl_num",e->stress);
        }
      phones=ustring32_lts_apply(letters,&ru_lts);
    }
  ustring32_free(letters);
  return phones;
}
Exemplo n.º 3
0
cst_utterance *default_lexical_insertion(cst_utterance *u)
{
    cst_item *word;
    cst_relation *sylstructure,*seg,*syl;
    cst_lexicon *lex;
    const cst_val *lex_addenda = NULL;
    const cst_val *p, *wp = NULL;
    char *phone_name;
    char *stress = "0";
    const char *pos;
    cst_val *phones;
    cst_item *ssword, *sssyl, *segitem, *sylitem, *seg_in_syl;

    lex = val_lexicon(feat_val(u->features,"lexicon"));
    if (lex->lex_addenda)
	lex_addenda = lex->lex_addenda;

    syl = utt_relation_create(u,"Syllable");
    sylstructure = utt_relation_create(u,"SylStructure");
    seg = utt_relation_create(u,"Segment");

    for (word=relation_head(utt_relation(u,"Word")); 
	 word; word=item_next(word))
    {
	ssword = relation_append(sylstructure,word);
        pos = ffeature_string(word,"pos");
	phones = NULL;
        wp = NULL;
        
        /*        printf("awb_debug word %s pos %s gpos %s\n",
               item_feat_string(word,"name"),
               pos,
               ffeature_string(word,"gpos")); */

	/* FIXME: need to make sure that textanalysis won't split
           tokens with explicit pronunciation (or that it will
           propagate such to words, then we can remove the path here) */
	if (item_feat_present(item_parent(item_as(word, "Token")), "phones"))
	    phones = (cst_val *) item_feat(item_parent(item_as(word, "Token")), "phones");
	else
	{
            wp = val_assoc_string(item_feat_string(word, "name"),lex_addenda);
            if (wp)
                phones = (cst_val *)val_cdr(val_cdr(wp));
            else
		phones = lex_lookup(lex,item_feat_string(word,"name"),pos);
	}

	for (sssyl=NULL,sylitem=NULL,p=phones; p; p=val_cdr(p))
	{
	    if (sylitem == NULL)
	    {
		sylitem = relation_append(syl,NULL);
		sssyl = item_add_daughter(ssword,sylitem);
		stress = "0";
	    }
	    segitem = relation_append(seg,NULL);
	    phone_name = cst_strdup(val_string(val_car(p)));
	    if (phone_name[cst_strlen(phone_name)-1] == '1')
	    {
		stress = "1";
		phone_name[cst_strlen(phone_name)-1] = '\0';
	    }
	    else if (phone_name[cst_strlen(phone_name)-1] == '0')
	    {
		stress = "0";
		phone_name[cst_strlen(phone_name)-1] = '\0';
	    }
	    item_set_string(segitem,"name",phone_name);
	    seg_in_syl = item_add_daughter(sssyl,segitem);
#if 0
            printf("awb_debug ph %s\n",phone_name);
#endif
	    if ((lex->syl_boundary)(seg_in_syl,val_cdr(p)))
	    {
#if 0
                printf("awb_debug SYL\n");
#endif
		sylitem = NULL;
		if (sssyl)
		    item_set_string(sssyl,"stress",stress);
	    }
	    cst_free(phone_name);
	}
	if (!item_feat_present(item_parent(item_as(word, "Token")), "phones")
            && ! wp)
	    delete_val(phones);
    }

    return u;
}
Exemplo n.º 4
0
cst_utterance *default_lexical_insertion(cst_utterance *u)
{
    cst_item *word;
    cst_relation *sylstructure,*seg,*syl;
    cst_lexicon *lex, *ulex = NULL;
    const cst_val *p;
    char *phone_name;
    char *stress = "0";
    cst_val *phones;
    cst_item *ssword, *sssyl, *segitem, *sylitem, *seg_in_syl;


    lex = val_lexicon(feat_val(u->features,"lexicon"));
    if (feat_present(u->features, "user_lexicon"))
	ulex = val_lexicon(feat_val(u->features, "user_lexicon"));

    syl = utt_relation_create(u,"Syllable");
    sylstructure = utt_relation_create(u,"SylStructure");
    seg = utt_relation_create(u,"Segment");

    for (word=relation_head(utt_relation(u,"Word")); 
	 word; word=item_next(word))
    {
	ssword = relation_append(sylstructure,word);
	phones = NULL;

	/* FIXME: need to make sure that textanalysis won't split
           tokens with explicit pronunciation (or that it will
           propagate such to words, then we can remove the path here) */
	if (item_feat_present(item_parent(item_as(word, "Token")), "phones"))
	    phones = (cst_val *) item_feat(item_parent(item_as(word, "Token")), "phones");
	else
	{
	    if (ulex)
		phones = lex_lookup(ulex,item_feat_string(word, "name"),0);
	    if (phones == NULL)
		phones = lex_lookup(lex,item_feat_string(word,"name"),0);
	}

	for (sssyl=NULL,sylitem=NULL,p=phones; p; p=val_cdr(p))
	{
	    if (sylitem == NULL)
	    {
		sylitem = relation_append(syl,NULL);
		sssyl = item_add_daughter(ssword,sylitem);
		stress = "0";
	    }
	    segitem = relation_append(seg,NULL);
	    phone_name = cst_strdup(val_string(val_car(p)));
	    if (phone_name[strlen(phone_name)-1] == '1')
	    {
		stress = "1";
		phone_name[strlen(phone_name)-1] = '\0';
	    }
	    else if (phone_name[strlen(phone_name)-1] == '0')
	    {
		stress = "0";
		phone_name[strlen(phone_name)-1] = '\0';
	    }
	    item_set_string(segitem,"name",phone_name);
	    seg_in_syl = item_add_daughter(sssyl,segitem);
	    if ((lex->syl_boundary)(seg_in_syl,val_cdr(p)))
	    {
		sylitem = NULL;
		if (sssyl)
		    item_set_string(sssyl,"stress",stress);
	    }
	    cst_free(phone_name);
	}
	if (!item_feat_present(item_parent(item_as(word, "Token")), "phones"))
	    delete_val(phones);
    }

    return u;
}