Пример #1
0
int relation_save(cst_relation *r, const char *filename)
{
    cst_file fd;
    cst_item *item;

    if (cst_streq(filename, "-"))
        fd = stdout;
    else

    if ((fd = cst_fopen(filename, CST_OPEN_WRITE)) == 0)
    {
        cst_errmsg("relation_save: can't open file \"%s\" for writing\n",
                   filename);
        return CST_ERROR_FORMAT;
    }

    for (item = relation_head(r); item; item = item_next(item))
    {
        if (item_feat_present(item, "end"))
            cst_fprintf(fd, "%f ", item_feat_float(item, "end"));
        else
            cst_fprintf(fd, "%f ", 0.00);
        if (item_feat_present(item, "name"))
            cst_fprintf(fd, "%s ", item_feat_string(item, "name"));
        else
            cst_fprintf(fd, "%s ", "_");
        cst_fprintf(fd, "\n");
    }
    if (fd != stdout)
        cst_fclose(fd);

    return CST_OK_FORMAT;
}
static cst_val *cmu_LANGNAME_tokentowords(cst_item *token, const char *name)
{
    /* Return list of words that expand token/name */
    cst_val *r;

    /* printf("token_name %s name %s\n",item_name(token),name); */

    if (item_feat_present(token,"phones"))
	return cons_val(string_val(name),NULL);

#if 0
    if (item_feat_present(token,"nsw"))
	nsw = item_feat_string(token,"nsw");

    utt = item_utt(token);
    lex = val_lexicon(feat_val(utt->features,"lexicon"));
#endif

    if (cst_strlen(name) > 0)
        r = cons_val(string_val(name),0);
    else
        r = NULL;
    
    return r;
}
Пример #3
0
static const cst_val *accented(const cst_item *syl)
{
    if ((item_feat_present(syl,"accent")) ||
	(item_feat_present(syl,"endtone")))
	return VAL_STRING_1;
    else
	return VAL_STRING_0;
}
Пример #4
0
cst_utterance *russian_postlex_function(cst_utterance *u)
{
  const cst_item *word,*seg;
  const char *answer,*name,*pair;
  for(word=relation_head(utt_relation(u,"Transcription"));word;word=item_next(word))
    {
      if(item_feat_present(word,"no_pl")||item_feat_present(word,"no_vr"))
        continue;
      for(seg=item_daughter(word);seg;seg=item_next(seg))
        {
          name=item_feat_string(seg,"name");
          if(cst_member_string(name,unstressed_vowels))
            {
              answer=val_string(cart_interpret(item_as(seg,"Segment"),&ru_vowel_reduction_cart));
              if(!cst_streq(answer,"N"))
                item_set_string(seg,"name",answer);
            }
          else
            {
              if(cst_streq(name,"ii")&&
                 cst_streq(ffeature_string(seg,"R:Segment.p.ph_csoft"),"-")&&
                 !(cst_streq(item_feat_string(word,"name"),"и")&&
                   cst_streq(ffeature_string(word,"gpos"),"content")))
                {
                  item_set_string(seg,"name","yy");
                }
            }
        }
    }
  for(word=relation_tail(utt_relation(u,"Transcription"));word;word=item_prev(word))
    {
      if(item_feat_present(word,"no_pl"))
        continue;
      for(seg=item_last_daughter(word);seg;seg=item_prev(seg))
        {
          name=item_feat_string(seg,"name");
          pair=russian_vpair(name);
          if(pair!=NULL)
            {
              answer=val_string(cart_interpret(item_as(seg,"Segment"),&ru_vpair_cart));
              if(cst_streq(answer,"Y"))
                item_set_string(seg,"name",pair);
            }
        }
    }
  return u;
}
Пример #5
0
cst_utterance *default_lexical_insertion(cst_utterance *u)
{
    cst_item *word;
    cst_relation *sylstructure,*seg,*syl;
    cst_lexicon *lex;
    const cst_val *lex_addenda = NULL;
    const cst_val *p, *wp = NULL;
    char *phone_name;
    char *stress = "0";
    const char *pos;
    cst_val *phones;
    cst_item *ssword, *sssyl, *segitem, *sylitem, *seg_in_syl;

    lex = val_lexicon(feat_val(u->features,"lexicon"));
    if (lex->lex_addenda)
	lex_addenda = lex->lex_addenda;

    syl = utt_relation_create(u,"Syllable");
    sylstructure = utt_relation_create(u,"SylStructure");
    seg = utt_relation_create(u,"Segment");

    for (word=relation_head(utt_relation(u,"Word")); 
	 word; word=item_next(word))
    {
	ssword = relation_append(sylstructure,word);
        pos = ffeature_string(word,"pos");
	phones = NULL;
        wp = NULL;
        
        /*        printf("awb_debug word %s pos %s gpos %s\n",
               item_feat_string(word,"name"),
               pos,
               ffeature_string(word,"gpos")); */

	/* FIXME: need to make sure that textanalysis won't split
           tokens with explicit pronunciation (or that it will
           propagate such to words, then we can remove the path here) */
	if (item_feat_present(item_parent(item_as(word, "Token")), "phones"))
	    phones = (cst_val *) item_feat(item_parent(item_as(word, "Token")), "phones");
	else
	{
            wp = val_assoc_string(item_feat_string(word, "name"),lex_addenda);
            if (wp)
                phones = (cst_val *)val_cdr(val_cdr(wp));
            else
		phones = lex_lookup(lex,item_feat_string(word,"name"),pos);
	}

	for (sssyl=NULL,sylitem=NULL,p=phones; p; p=val_cdr(p))
	{
	    if (sylitem == NULL)
	    {
		sylitem = relation_append(syl,NULL);
		sssyl = item_add_daughter(ssword,sylitem);
		stress = "0";
	    }
	    segitem = relation_append(seg,NULL);
	    phone_name = cst_strdup(val_string(val_car(p)));
	    if (phone_name[cst_strlen(phone_name)-1] == '1')
	    {
		stress = "1";
		phone_name[cst_strlen(phone_name)-1] = '\0';
	    }
	    else if (phone_name[cst_strlen(phone_name)-1] == '0')
	    {
		stress = "0";
		phone_name[cst_strlen(phone_name)-1] = '\0';
	    }
	    item_set_string(segitem,"name",phone_name);
	    seg_in_syl = item_add_daughter(sssyl,segitem);
#if 0
            printf("awb_debug ph %s\n",phone_name);
#endif
	    if ((lex->syl_boundary)(seg_in_syl,val_cdr(p)))
	    {
#if 0
                printf("awb_debug SYL\n");
#endif
		sylitem = NULL;
		if (sssyl)
		    item_set_string(sssyl,"stress",stress);
	    }
	    cst_free(phone_name);
	}
	if (!item_feat_present(item_parent(item_as(word, "Token")), "phones")
            && ! wp)
	    delete_val(phones);
    }

    return u;
}
Пример #6
0
cst_utterance *default_lexical_insertion(cst_utterance *u)
{
    cst_item *word;
    cst_relation *sylstructure,*seg,*syl;
    cst_lexicon *lex, *ulex = NULL;
    const cst_val *p;
    char *phone_name;
    char *stress = "0";
    cst_val *phones;
    cst_item *ssword, *sssyl, *segitem, *sylitem, *seg_in_syl;


    lex = val_lexicon(feat_val(u->features,"lexicon"));
    if (feat_present(u->features, "user_lexicon"))
	ulex = val_lexicon(feat_val(u->features, "user_lexicon"));

    syl = utt_relation_create(u,"Syllable");
    sylstructure = utt_relation_create(u,"SylStructure");
    seg = utt_relation_create(u,"Segment");

    for (word=relation_head(utt_relation(u,"Word")); 
	 word; word=item_next(word))
    {
	ssword = relation_append(sylstructure,word);
	phones = NULL;

	/* FIXME: need to make sure that textanalysis won't split
           tokens with explicit pronunciation (or that it will
           propagate such to words, then we can remove the path here) */
	if (item_feat_present(item_parent(item_as(word, "Token")), "phones"))
	    phones = (cst_val *) item_feat(item_parent(item_as(word, "Token")), "phones");
	else
	{
	    if (ulex)
		phones = lex_lookup(ulex,item_feat_string(word, "name"),0);
	    if (phones == NULL)
		phones = lex_lookup(lex,item_feat_string(word,"name"),0);
	}

	for (sssyl=NULL,sylitem=NULL,p=phones; p; p=val_cdr(p))
	{
	    if (sylitem == NULL)
	    {
		sylitem = relation_append(syl,NULL);
		sssyl = item_add_daughter(ssword,sylitem);
		stress = "0";
	    }
	    segitem = relation_append(seg,NULL);
	    phone_name = cst_strdup(val_string(val_car(p)));
	    if (phone_name[strlen(phone_name)-1] == '1')
	    {
		stress = "1";
		phone_name[strlen(phone_name)-1] = '\0';
	    }
	    else if (phone_name[strlen(phone_name)-1] == '0')
	    {
		stress = "0";
		phone_name[strlen(phone_name)-1] = '\0';
	    }
	    item_set_string(segitem,"name",phone_name);
	    seg_in_syl = item_add_daughter(sssyl,segitem);
	    if ((lex->syl_boundary)(seg_in_syl,val_cdr(p)))
	    {
		sylitem = NULL;
		if (sssyl)
		    item_set_string(sssyl,"stress",stress);
	    }
	    cst_free(phone_name);
	}
	if (!item_feat_present(item_parent(item_as(word, "Token")), "phones"))
	    delete_val(phones);
    }

    return u;
}
Пример #7
0
static void assign_stress(cst_item *word)
{
  int numsyls=ffeature_int(word,"word_numsyls");
  const char *gpos=ffeature_string(word,"gpos");
  const cst_item *word_in_phrase=item_as(word,"Phrase");
  const cst_item *syls=item_as(word,"SylStructure");
  const cst_item *syl=item_daughter(syls);
  const cst_item *transcription=item_as(word,"Transcription");
  int stressed=FALSE;
  int n=item_feat_present(word,"stressed_syl_num")?item_feat_int(word,"stressed_syl_num"):0;
  const char *name=item_name(word);
  const char *pname=ffeature_string(word,"R:Phrase.p.name");
  const char *nname=ffeature_string(word,"R:Phrase.n.name");
  if(cst_streq(ffeature_string(word,"R:Token.p.name"),"по")&&
     (cst_streq(name,"моему")||cst_streq(name,"своему")||cst_streq(name,"твоему")))
    {
      item_set_string(syl,"stress","1");
      return;
    }
  else if((cst_streq(name,"не")||cst_streq(name,"ни"))&&
     (cst_streq(nname,"был")||cst_streq(nname,"были")||cst_streq(nname,"было")))
    {
      item_set_string(syl,"stress","1");
      return;
    }
  else if((cst_streq(name,"был")||cst_streq(name,"были")||cst_streq(name,"было"))&&
     (cst_streq(pname,"не")||cst_streq(pname,"ни")))
    return;
  else if(cst_streq(gpos,"enc")&&item_prev(word_in_phrase))
    return;
  else if(cst_streq(gpos,"proc")&&item_next(word_in_phrase))
    return;
  if(!vowel_seg_between(item_daughter(transcription),item_last_daughter(transcription)))
    return;
  if(numsyls==1)
    {
      item_set_string(syl,"stress","1");
      return;
    }
  for(;syl;syl=item_next(syl))
    {
      if(is_stressed_vowel(item_feat_string(item_daughter(item_as(syl,"SylVowel")),"name")))
        {
          item_set_string(syl,"stress","1");
          stressed=TRUE;
        }
    }
  if(n==0)
    {
      if(stressed)
        return;
      n=val_int(cart_interpret(word,&ru_stress_cart));
      if((numsyls+n) < 0)
        {
          if(numsyls <= 4)
            n=-2;
          else
            if(numsyls <= 6)
              n=-3;
            else
              n=-4;
        }
    }
  item_set_string(item_nth_daughter(syls,(numsyls+n)),"stress","1");
}