Exemple #1
0
cst_utterance *russian_postlex_function(cst_utterance *u)
{
  const cst_item *word,*seg;
  const char *answer,*name,*pair;
  for(word=relation_head(utt_relation(u,"Transcription"));word;word=item_next(word))
    {
      if(item_feat_present(word,"no_pl")||item_feat_present(word,"no_vr"))
        continue;
      for(seg=item_daughter(word);seg;seg=item_next(seg))
        {
          name=item_feat_string(seg,"name");
          if(cst_member_string(name,unstressed_vowels))
            {
              answer=val_string(cart_interpret(item_as(seg,"Segment"),&ru_vowel_reduction_cart));
              if(!cst_streq(answer,"N"))
                item_set_string(seg,"name",answer);
            }
          else
            {
              if(cst_streq(name,"ii")&&
                 cst_streq(ffeature_string(seg,"R:Segment.p.ph_csoft"),"-")&&
                 !(cst_streq(item_feat_string(word,"name"),"и")&&
                   cst_streq(ffeature_string(word,"gpos"),"content")))
                {
                  item_set_string(seg,"name","yy");
                }
            }
        }
    }
  for(word=relation_tail(utt_relation(u,"Transcription"));word;word=item_prev(word))
    {
      if(item_feat_present(word,"no_pl"))
        continue;
      for(seg=item_last_daughter(word);seg;seg=item_prev(seg))
        {
          name=item_feat_string(seg,"name");
          pair=russian_vpair(name);
          if(pair!=NULL)
            {
              answer=val_string(cart_interpret(item_as(seg,"Segment"),&ru_vpair_cart));
              if(cst_streq(answer,"Y"))
                item_set_string(seg,"name",pair);
            }
        }
    }
  return u;
}
static const cst_val *syl_final(const cst_item *seg)
{   /* last segment in a syllable */
    const cst_item *s = item_as(seg,"SylStructure");

    if (!s || (item_next(s) == NULL))
	return VAL_STRING_1;
    else
	return VAL_STRING_0;
}
static const cst_val *word_numsyls(const cst_item *word)
{
    cst_item *d;
    int c;
    
    for (c=0,d=item_daughter(item_as(word,"SylStructure"));
	 d;
	 d=item_next(d),c++);

    return val_int_n(c);
}
static const cst_val *pos_in_syl(const cst_item *seg)
{
    const cst_item *s;
    int c;
    
    for (c=-1,s=item_as(seg,"SylStructure");
	 s;
	 s=item_prev(s),c++);

    return val_string_n(c);
}
Exemple #5
0
static void apostrophe_s(cst_utterance *u)
{
    cst_item *s;
    cst_item *schwa;
    const cst_phoneset *ps = u->vox->phoneset;
    const char *pname, *word;

    for (s=item_next(UTT_REL_HEAD(u,SEGMENT));
	 s;
         s=item_next(s))
    {
	word = val_string(ffeature(s, "R:"SYLSTRUCTURE".P.P.name"));
	if (cst_streq("'s", word))
	{
	    pname = item_feat_string(item_prev(s),"name");
	    if ((strchr("fa",*phone_feature_string(ps,pname,"ctype")) != NULL)
		&& (strchr("dbg",
			   *phone_feature_string(ps,pname,"cplace")) == NULL))
		/* needs a schwa */
	    {
		schwa = item_prepend(s,NULL);
		item_set_string(schwa,"name","ax");
		item_prepend(item_as(s,SYLSTRUCTURE),schwa);
	    }
	    else if (cst_streq("-",phone_feature_string(ps,pname,"cvox")))
		item_set_string(s,"name","s");
	}
	else if (cst_streq("'ve", word)
		 || cst_streq("'ll", word)
		 || cst_streq("'d", word))
	{
	    if (cst_streq("-",ffeature_string(s,"p."PH_VC)))
	    {
		schwa = item_prepend(s,NULL);
		item_set_string(schwa,"name","ax");
		item_prepend(item_as(s,SYLSTRUCTURE),schwa);
	    }
	}
    }

}
static const cst_val *seg_onsetcoda(const cst_item *seg)
{
    const cst_item *s;
    const cst_phoneset *ps = item_phoneset(seg);
    
    for (s=item_next(item_as(seg,"SylStructure"));
	 s;
	 s=item_next(s))
    {
	if (cst_streq("+",phone_feature_string(ps,item_feat_string(s,"name"),
					       "vc")))
	    return (cst_val *)&val_string_onset;
    }
    return (cst_val *)&val_string_coda;
}
static const cst_val *segment_duration(const cst_item *seg)
{
    const cst_item *s = item_as(seg,"Segment");

    if (!s)
	return VAL_STRING_0;
    else if (item_prev(s) == NULL)
	return item_feat(s,"end");
    else
	/* It should be okay to construct this as it will get
           dereferenced when the CART interpreter frees its feature
           cache. */
	return float_val(item_feat_float(s,"end")
			 - item_feat_float(item_prev(s),"end"));
}
static const cst_val *last_accent(const cst_item *syl)
{
    const cst_item *s;
    int c;
    
    for (c=0,s=item_as(syl,"Syllable");
	 s && (c < CST_CONST_INT_MAX); 
	 s=item_prev(s),c++)
    {
	if (val_int(accented(s)))
	    return val_string_n(c);
    }

    return val_string_n(c);
}
static const cst_val *syl_onsetsize(const cst_item *syl)
{
    cst_item *d;
    int c;
    
    for (c=0,d=item_daughter(item_as(syl,"SylStructure"));
	 d;
	 d=item_next(d),c++)
    {
	if (cst_streq("+",val_string(ph_vc(d))))
	    break;
    }

    return val_string_n(c);
}
static const cst_val *syl_break(const cst_item *syl)
{
    /* Break level after this syllable */
    cst_item *ss;

    ss = item_as(syl,"SylStructure");

    if (ss == NULL)
	return VAL_STRING_1;  /* hmm, no sylstructure */
    else if (item_next(ss) != NULL)
	return VAL_STRING_0;  /* word internal */
    else if (item_parent(ss) == NULL)  /* no parent */
	return VAL_STRING_1;
    else
	return word_break(item_parent(ss));
}
static const cst_val *syl_in(const cst_item *syl)
{
    /* Number of syllables since last major break */
    const cst_item *ss,*p,*fs;
    int c;

    ss = item_as(syl,"Syllable");

    fs = path_to_item(syl,"R:SylStructure.parent.R:Phrase.parent.daughter.R:SylStructure.daughter");

    for (c=0, p=ss; 
	 p && (c < CST_CONST_INT_MAX); 
	 p=item_prev(p),c++)
	if (item_equal(p,fs))
	    break;
    return val_string_n(c);
}
Exemple #12
0
static cst_val* word_to_phones(const cst_item *word)
{
  cst_val*phones=NULL;
  const char *name=item_feat_string(word, "name");
  ustring32_t letters=ustring32_alloc(0);
  if(letters==NULL) return NULL;
  ustring32_assign8(letters,(const uint8_t*)name);
  if(ustring32_empty(letters))
    {
      ustring32_free(letters);
      return NULL;
    }
  unsigned int flags=classify_characters(ustring32_str(letters),ustring32_length(letters));
  int variant=item_feat_int(item_parent(item_as(word,"Token")),"variant");
  if((flags&cs_lc)&&cst_streq(ffeature_string(word,"gpos"),"content"))
    {
      if(variant==variant_pseudo_english)
        phones=ustring32_lts_apply(letters,&en_consonants_lts);
      else phones=ustring32_lts_apply(letters,&ru_consonants_lts);
      item_set_int(word,"no_vr",1);
    }
  else if((variant==variant_pseudo_english)&&(flags&cs_en))
    {
      cst_val *en_phones=lex_lookup(en_lex,name,(cst_streq(name,"a")?"n":NULL));
      if(en_phones)
        {
          phones=ru_lts_apply(en_phones,&ru_en_lts);
          delete_val(en_phones);
        }
      item_set_int(word,"no_pl",1);
    }
  else
    {
      const ru_dict_entry *e=bsearch(name,ru_dict,ru_dict_size,sizeof(ru_dict_entry),compare_entries);
      if(e!=NULL)
        {
          if(e->stress > 0)
            ustring32_set(letters,e->stress-1,1105);
          else
            item_set_int(word,"stressed_syl_num",e->stress);
        }
      phones=ustring32_lts_apply(letters,&ru_lts);
    }
  ustring32_free(letters);
  return phones;
}
static const cst_val *position_type(const cst_item *syl)
{
    const cst_item *s = item_as(syl,"SylStructure");

    if (s == 0)
	return (cst_val *)&val_string_single;
    else if (item_next(s) == 0)
    {
	if (item_prev(s) == 0)
	    return (cst_val *)&val_string_single;
	else
	    return (cst_val *)&val_string_final;
    }
    else if (item_prev(s) == 0)
	return (cst_val *)&val_string_initial;
    else
	return (cst_val *)&val_string_mid;
}
static const cst_val *word_punc(const cst_item *word)
{
    cst_item *ww;
    const cst_val *v;

    ww = item_as(word,"Token");

    if ((ww != NULL) && (item_next(ww) != 0))
	v = &val_string_empty;
    else
	v = ffeature(item_parent(ww),"punc");

/*    printf("word_punc word %s punc %s\n",
	   item_feat_string(ww,"name"),
	   val_string(v)); */

    return v;

}
static const cst_val *seg_onset_ctype(const cst_item *seg, const char *ctype)
{
    const cst_item *s;
    const cst_phoneset *ps = item_phoneset(seg);
    
    for (s=item_daughter(item_parent(item_as(seg,"SylStructure")));
	 s;
	 s=item_next(s))
    {
	if (cst_streq("+",phone_feature_string(ps,item_feat_string(s,"name"),
					       "vc")))
	    return VAL_STRING_0;
	if (cst_streq(ctype,phone_feature_string(ps,item_feat_string(s,"name"),
						 "ctype")))
	    return VAL_STRING_1;
    }

    return VAL_STRING_0;
}
static const cst_val *word_break(const cst_item *word)
{
    cst_item *ww,*pp;
    const char *pname;

    ww = item_as(word,"Phrase");

    if ((ww == NULL) || (item_next(ww) != 0))
	return VAL_STRING_1;
    else
    {
	pp = item_parent(ww);
	pname = val_string(item_feat(pp,"name"));
	if (cst_streq("BB",pname))
	    return VAL_STRING_4;
	else if (cst_streq("B",pname))
	    return VAL_STRING_3;
	else 
	    return VAL_STRING_1;
    }
}
static const cst_val *ssyl_in(const cst_item *syl)
{
    /* Number of stressed syllables since last major break */
    const cst_item *ss,*p,*fs;
    int c;

    ss = item_as(syl,"Syllable");

    fs = path_to_item(syl,"R:SylStructure.parent.R:Phrase.parent.daughter.R:SylStructure.daughter");

    /* This should actually include the first syllable, but Festival's
       doesn't. */
    for (c=0, p=item_prev(ss); 
	 p && (!item_equal(p,fs)) && (c < CST_CONST_INT_MAX);
	 p=item_prev(p))
    {
	if (cst_streq("1",item_feat_string(p,"stress")))
	    c++;
    }
    
    return val_string_n(c);  /* its used randomly as int and float */
}
static const cst_val *ssyl_out(const cst_item *syl)
{
    /* Number of stressed syllables until last major break */
    const cst_item *ss,*p,*fs;
    int c;

    ss = item_as(syl,"Syllable");

    fs = path_to_item(syl,"R:SylStructure.parent.R:Phrase.parent.daughtern.R:SylStructure.daughtern");

    for (c=0, p=item_next(ss); 
	 p && (c < CST_CONST_INT_MAX); 
	 p=item_next(p))
    {
	if (cst_streq("1",item_feat_string(p,"stress")))
	    c++;
	if (item_equal(p,fs))
	    break;
    }
    
    return val_string_n(c);  /* its used randomly as int and float */
}
Exemple #19
0
cst_utterance *default_lexical_insertion(cst_utterance *u)
{
    cst_item *word;
    cst_relation *sylstructure,*seg,*syl;
    cst_lexicon *lex;
    const cst_val *lex_addenda = NULL;
    const cst_val *p, *wp = NULL;
    char *phone_name;
    char *stress = "0";
    const char *pos;
    cst_val *phones;
    cst_item *ssword, *sssyl, *segitem, *sylitem, *seg_in_syl;

    lex = val_lexicon(feat_val(u->features,"lexicon"));
    if (lex->lex_addenda)
	lex_addenda = lex->lex_addenda;

    syl = utt_relation_create(u,"Syllable");
    sylstructure = utt_relation_create(u,"SylStructure");
    seg = utt_relation_create(u,"Segment");

    for (word=relation_head(utt_relation(u,"Word")); 
	 word; word=item_next(word))
    {
	ssword = relation_append(sylstructure,word);
        pos = ffeature_string(word,"pos");
	phones = NULL;
        wp = NULL;
        
        /*        printf("awb_debug word %s pos %s gpos %s\n",
               item_feat_string(word,"name"),
               pos,
               ffeature_string(word,"gpos")); */

	/* FIXME: need to make sure that textanalysis won't split
           tokens with explicit pronunciation (or that it will
           propagate such to words, then we can remove the path here) */
	if (item_feat_present(item_parent(item_as(word, "Token")), "phones"))
	    phones = (cst_val *) item_feat(item_parent(item_as(word, "Token")), "phones");
	else
	{
            wp = val_assoc_string(item_feat_string(word, "name"),lex_addenda);
            if (wp)
                phones = (cst_val *)val_cdr(val_cdr(wp));
            else
		phones = lex_lookup(lex,item_feat_string(word,"name"),pos);
	}

	for (sssyl=NULL,sylitem=NULL,p=phones; p; p=val_cdr(p))
	{
	    if (sylitem == NULL)
	    {
		sylitem = relation_append(syl,NULL);
		sssyl = item_add_daughter(ssword,sylitem);
		stress = "0";
	    }
	    segitem = relation_append(seg,NULL);
	    phone_name = cst_strdup(val_string(val_car(p)));
	    if (phone_name[cst_strlen(phone_name)-1] == '1')
	    {
		stress = "1";
		phone_name[cst_strlen(phone_name)-1] = '\0';
	    }
	    else if (phone_name[cst_strlen(phone_name)-1] == '0')
	    {
		stress = "0";
		phone_name[cst_strlen(phone_name)-1] = '\0';
	    }
	    item_set_string(segitem,"name",phone_name);
	    seg_in_syl = item_add_daughter(sssyl,segitem);
#if 0
            printf("awb_debug ph %s\n",phone_name);
#endif
	    if ((lex->syl_boundary)(seg_in_syl,val_cdr(p)))
	    {
#if 0
                printf("awb_debug SYL\n");
#endif
		sylitem = NULL;
		if (sssyl)
		    item_set_string(sssyl,"stress",stress);
	    }
	    cst_free(phone_name);
	}
	if (!item_feat_present(item_parent(item_as(word, "Token")), "phones")
            && ! wp)
	    delete_val(phones);
    }

    return u;
}
Exemple #20
0
cst_utterance *russian_lexical_insertion(cst_utterance *u)
{
  cst_item *word;
  cst_relation *sylstructure,*seg,*syl,*sylvowel,*transcription;
  const cst_val *p;
  const char *phone_name;
  cst_val *phones;
  cst_item *ssword, *sssyl, *segitem, *sylitem, *seg_in_syl, *svsyl, *vowel_in_syl, *tword, *seg_in_word;
  cst_item *i,*tmp;
  int num_segs;
  int total_num_segs=0;
  syl = utt_relation_create(u,"Syllable");
  sylstructure = utt_relation_create(u,"SylStructure");
  seg = utt_relation_create(u,"Segment");
  sylvowel = utt_relation_create(u,"SylVowel");
  transcription = utt_relation_create(u,"Transcription");
  for (word=relation_head(utt_relation(u,"Word"));word;word=item_next(word))
    {
      phones=word_to_phones(word);
      if(!phones)
        continue;
      num_segs=val_length(phones);
      if((total_num_segs+num_segs)>max_num_segs)
        {
          delete_val(phones);
          break;
        }
      ssword = relation_append(sylstructure,word);
      tword = relation_append(transcription,word);
      for (sssyl=NULL,sylitem=NULL,p=phones; p; p=val_cdr(p))
        {
          if (sylitem == NULL)
            {
              sylitem = relation_append(syl,NULL);
              sssyl = item_add_daughter(ssword,sylitem);
            }
          segitem = relation_append(seg,NULL);
          phone_name = val_string(val_car(p));
          item_set_string(segitem,"name",phone_name);
          seg_in_syl = item_add_daughter(sssyl,segitem);
          seg_in_word = item_add_daughter(tword,segitem);
          if(is_vowel(phone_name))
            {
              svsyl=relation_append(sylvowel,sylitem);
              vowel_in_syl=item_add_daughter(svsyl,segitem);
            }
          if (ru_syl_boundary(seg_in_syl,val_cdr(p)))
            {
              sylitem = NULL;
              if (sssyl)
                item_set_string(sssyl,"stress","0");
            }
        }
      assign_stress(word);
      delete_val(phones);
      total_num_segs+=num_segs;
    }
  i=relation_head(utt_relation(u,"Word"));
  while(i)
    {
      tmp=item_next(i);
      if(item_as(i,"Transcription")==NULL)
        {
          delete_item(item_as(i,"Token"));
          delete_item(item_as(i,"Phrase"));
          delete_item(i);
        }
      i=tmp;
    }
  i=relation_head(utt_relation(u,"Phrase"));
  while(i)
    {
      tmp=item_next(i);
      if(item_daughter(i)==NULL)
        delete_item(i);
      i=tmp;
    }
  return u;
}
Exemple #21
0
cst_utterance *default_lexical_insertion(cst_utterance *u)
{
    cst_item *word;
    cst_relation *sylstructure,*seg,*syl;
    cst_lexicon *lex, *ulex = NULL;
    const cst_val *p;
    char *phone_name;
    char *stress = "0";
    cst_val *phones;
    cst_item *ssword, *sssyl, *segitem, *sylitem, *seg_in_syl;


    lex = val_lexicon(feat_val(u->features,"lexicon"));
    if (feat_present(u->features, "user_lexicon"))
	ulex = val_lexicon(feat_val(u->features, "user_lexicon"));

    syl = utt_relation_create(u,"Syllable");
    sylstructure = utt_relation_create(u,"SylStructure");
    seg = utt_relation_create(u,"Segment");

    for (word=relation_head(utt_relation(u,"Word")); 
	 word; word=item_next(word))
    {
	ssword = relation_append(sylstructure,word);
	phones = NULL;

	/* FIXME: need to make sure that textanalysis won't split
           tokens with explicit pronunciation (or that it will
           propagate such to words, then we can remove the path here) */
	if (item_feat_present(item_parent(item_as(word, "Token")), "phones"))
	    phones = (cst_val *) item_feat(item_parent(item_as(word, "Token")), "phones");
	else
	{
	    if (ulex)
		phones = lex_lookup(ulex,item_feat_string(word, "name"),0);
	    if (phones == NULL)
		phones = lex_lookup(lex,item_feat_string(word,"name"),0);
	}

	for (sssyl=NULL,sylitem=NULL,p=phones; p; p=val_cdr(p))
	{
	    if (sylitem == NULL)
	    {
		sylitem = relation_append(syl,NULL);
		sssyl = item_add_daughter(ssword,sylitem);
		stress = "0";
	    }
	    segitem = relation_append(seg,NULL);
	    phone_name = cst_strdup(val_string(val_car(p)));
	    if (phone_name[strlen(phone_name)-1] == '1')
	    {
		stress = "1";
		phone_name[strlen(phone_name)-1] = '\0';
	    }
	    else if (phone_name[strlen(phone_name)-1] == '0')
	    {
		stress = "0";
		phone_name[strlen(phone_name)-1] = '\0';
	    }
	    item_set_string(segitem,"name",phone_name);
	    seg_in_syl = item_add_daughter(sssyl,segitem);
	    if ((lex->syl_boundary)(seg_in_syl,val_cdr(p)))
	    {
		sylitem = NULL;
		if (sssyl)
		    item_set_string(sssyl,"stress",stress);
	    }
	    cst_free(phone_name);
	}
	if (!item_feat_present(item_parent(item_as(word, "Token")), "phones"))
	    delete_val(phones);
    }

    return u;
}
Exemple #22
0
static void assign_stress(cst_item *word)
{
  int numsyls=ffeature_int(word,"word_numsyls");
  const char *gpos=ffeature_string(word,"gpos");
  const cst_item *word_in_phrase=item_as(word,"Phrase");
  const cst_item *syls=item_as(word,"SylStructure");
  const cst_item *syl=item_daughter(syls);
  const cst_item *transcription=item_as(word,"Transcription");
  int stressed=FALSE;
  int n=item_feat_present(word,"stressed_syl_num")?item_feat_int(word,"stressed_syl_num"):0;
  const char *name=item_name(word);
  const char *pname=ffeature_string(word,"R:Phrase.p.name");
  const char *nname=ffeature_string(word,"R:Phrase.n.name");
  if(cst_streq(ffeature_string(word,"R:Token.p.name"),"по")&&
     (cst_streq(name,"моему")||cst_streq(name,"своему")||cst_streq(name,"твоему")))
    {
      item_set_string(syl,"stress","1");
      return;
    }
  else if((cst_streq(name,"не")||cst_streq(name,"ни"))&&
     (cst_streq(nname,"был")||cst_streq(nname,"были")||cst_streq(nname,"было")))
    {
      item_set_string(syl,"stress","1");
      return;
    }
  else if((cst_streq(name,"был")||cst_streq(name,"были")||cst_streq(name,"было"))&&
     (cst_streq(pname,"не")||cst_streq(pname,"ни")))
    return;
  else if(cst_streq(gpos,"enc")&&item_prev(word_in_phrase))
    return;
  else if(cst_streq(gpos,"proc")&&item_next(word_in_phrase))
    return;
  if(!vowel_seg_between(item_daughter(transcription),item_last_daughter(transcription)))
    return;
  if(numsyls==1)
    {
      item_set_string(syl,"stress","1");
      return;
    }
  for(;syl;syl=item_next(syl))
    {
      if(is_stressed_vowel(item_feat_string(item_daughter(item_as(syl,"SylVowel")),"name")))
        {
          item_set_string(syl,"stress","1");
          stressed=TRUE;
        }
    }
  if(n==0)
    {
      if(stressed)
        return;
      n=val_int(cart_interpret(word,&ru_stress_cart));
      if((numsyls+n) < 0)
        {
          if(numsyls <= 4)
            n=-2;
          else
            if(numsyls <= 6)
              n=-3;
            else
              n=-4;
        }
    }
  item_set_string(item_nth_daughter(syls,(numsyls+n)),"stress","1");
}