int relation_save(cst_relation *r, const char *filename) { cst_file fd; cst_item *item; if (cst_streq(filename, "-")) fd = stdout; else if ((fd = cst_fopen(filename, CST_OPEN_WRITE)) == 0) { cst_errmsg("relation_save: can't open file \"%s\" for writing\n", filename); return CST_ERROR_FORMAT; } for (item = relation_head(r); item; item = item_next(item)) { if (item_feat_present(item, "end")) cst_fprintf(fd, "%f ", item_feat_float(item, "end")); else cst_fprintf(fd, "%f ", 0.00); if (item_feat_present(item, "name")) cst_fprintf(fd, "%s ", item_feat_string(item, "name")); else cst_fprintf(fd, "%s ", "_"); cst_fprintf(fd, "\n"); } if (fd != stdout) cst_fclose(fd); return CST_OK_FORMAT; }
static cst_val *cmu_LANGNAME_tokentowords(cst_item *token, const char *name) { /* Return list of words that expand token/name */ cst_val *r; /* printf("token_name %s name %s\n",item_name(token),name); */ if (item_feat_present(token,"phones")) return cons_val(string_val(name),NULL); #if 0 if (item_feat_present(token,"nsw")) nsw = item_feat_string(token,"nsw"); utt = item_utt(token); lex = val_lexicon(feat_val(utt->features,"lexicon")); #endif if (cst_strlen(name) > 0) r = cons_val(string_val(name),0); else r = NULL; return r; }
static const cst_val *accented(const cst_item *syl) { if ((item_feat_present(syl,"accent")) || (item_feat_present(syl,"endtone"))) return VAL_STRING_1; else return VAL_STRING_0; }
cst_utterance *russian_postlex_function(cst_utterance *u) { const cst_item *word,*seg; const char *answer,*name,*pair; for(word=relation_head(utt_relation(u,"Transcription"));word;word=item_next(word)) { if(item_feat_present(word,"no_pl")||item_feat_present(word,"no_vr")) continue; for(seg=item_daughter(word);seg;seg=item_next(seg)) { name=item_feat_string(seg,"name"); if(cst_member_string(name,unstressed_vowels)) { answer=val_string(cart_interpret(item_as(seg,"Segment"),&ru_vowel_reduction_cart)); if(!cst_streq(answer,"N")) item_set_string(seg,"name",answer); } else { if(cst_streq(name,"ii")&& cst_streq(ffeature_string(seg,"R:Segment.p.ph_csoft"),"-")&& !(cst_streq(item_feat_string(word,"name"),"и")&& cst_streq(ffeature_string(word,"gpos"),"content"))) { item_set_string(seg,"name","yy"); } } } } for(word=relation_tail(utt_relation(u,"Transcription"));word;word=item_prev(word)) { if(item_feat_present(word,"no_pl")) continue; for(seg=item_last_daughter(word);seg;seg=item_prev(seg)) { name=item_feat_string(seg,"name"); pair=russian_vpair(name); if(pair!=NULL) { answer=val_string(cart_interpret(item_as(seg,"Segment"),&ru_vpair_cart)); if(cst_streq(answer,"Y")) item_set_string(seg,"name",pair); } } } return u; }
cst_utterance *default_lexical_insertion(cst_utterance *u) { cst_item *word; cst_relation *sylstructure,*seg,*syl; cst_lexicon *lex; const cst_val *lex_addenda = NULL; const cst_val *p, *wp = NULL; char *phone_name; char *stress = "0"; const char *pos; cst_val *phones; cst_item *ssword, *sssyl, *segitem, *sylitem, *seg_in_syl; lex = val_lexicon(feat_val(u->features,"lexicon")); if (lex->lex_addenda) lex_addenda = lex->lex_addenda; syl = utt_relation_create(u,"Syllable"); sylstructure = utt_relation_create(u,"SylStructure"); seg = utt_relation_create(u,"Segment"); for (word=relation_head(utt_relation(u,"Word")); word; word=item_next(word)) { ssword = relation_append(sylstructure,word); pos = ffeature_string(word,"pos"); phones = NULL; wp = NULL; /* printf("awb_debug word %s pos %s gpos %s\n", item_feat_string(word,"name"), pos, ffeature_string(word,"gpos")); */ /* FIXME: need to make sure that textanalysis won't split tokens with explicit pronunciation (or that it will propagate such to words, then we can remove the path here) */ if (item_feat_present(item_parent(item_as(word, "Token")), "phones")) phones = (cst_val *) item_feat(item_parent(item_as(word, "Token")), "phones"); else { wp = val_assoc_string(item_feat_string(word, "name"),lex_addenda); if (wp) phones = (cst_val *)val_cdr(val_cdr(wp)); else phones = lex_lookup(lex,item_feat_string(word,"name"),pos); } for (sssyl=NULL,sylitem=NULL,p=phones; p; p=val_cdr(p)) { if (sylitem == NULL) { sylitem = relation_append(syl,NULL); sssyl = item_add_daughter(ssword,sylitem); stress = "0"; } segitem = relation_append(seg,NULL); phone_name = cst_strdup(val_string(val_car(p))); if (phone_name[cst_strlen(phone_name)-1] == '1') { stress = "1"; phone_name[cst_strlen(phone_name)-1] = '\0'; } else if (phone_name[cst_strlen(phone_name)-1] == '0') { stress = "0"; phone_name[cst_strlen(phone_name)-1] = '\0'; } item_set_string(segitem,"name",phone_name); seg_in_syl = item_add_daughter(sssyl,segitem); #if 0 printf("awb_debug ph %s\n",phone_name); #endif if ((lex->syl_boundary)(seg_in_syl,val_cdr(p))) { #if 0 printf("awb_debug SYL\n"); #endif sylitem = NULL; if (sssyl) item_set_string(sssyl,"stress",stress); } cst_free(phone_name); } if (!item_feat_present(item_parent(item_as(word, "Token")), "phones") && ! wp) delete_val(phones); } return u; }
cst_utterance *default_lexical_insertion(cst_utterance *u) { cst_item *word; cst_relation *sylstructure,*seg,*syl; cst_lexicon *lex, *ulex = NULL; const cst_val *p; char *phone_name; char *stress = "0"; cst_val *phones; cst_item *ssword, *sssyl, *segitem, *sylitem, *seg_in_syl; lex = val_lexicon(feat_val(u->features,"lexicon")); if (feat_present(u->features, "user_lexicon")) ulex = val_lexicon(feat_val(u->features, "user_lexicon")); syl = utt_relation_create(u,"Syllable"); sylstructure = utt_relation_create(u,"SylStructure"); seg = utt_relation_create(u,"Segment"); for (word=relation_head(utt_relation(u,"Word")); word; word=item_next(word)) { ssword = relation_append(sylstructure,word); phones = NULL; /* FIXME: need to make sure that textanalysis won't split tokens with explicit pronunciation (or that it will propagate such to words, then we can remove the path here) */ if (item_feat_present(item_parent(item_as(word, "Token")), "phones")) phones = (cst_val *) item_feat(item_parent(item_as(word, "Token")), "phones"); else { if (ulex) phones = lex_lookup(ulex,item_feat_string(word, "name"),0); if (phones == NULL) phones = lex_lookup(lex,item_feat_string(word,"name"),0); } for (sssyl=NULL,sylitem=NULL,p=phones; p; p=val_cdr(p)) { if (sylitem == NULL) { sylitem = relation_append(syl,NULL); sssyl = item_add_daughter(ssword,sylitem); stress = "0"; } segitem = relation_append(seg,NULL); phone_name = cst_strdup(val_string(val_car(p))); if (phone_name[strlen(phone_name)-1] == '1') { stress = "1"; phone_name[strlen(phone_name)-1] = '\0'; } else if (phone_name[strlen(phone_name)-1] == '0') { stress = "0"; phone_name[strlen(phone_name)-1] = '\0'; } item_set_string(segitem,"name",phone_name); seg_in_syl = item_add_daughter(sssyl,segitem); if ((lex->syl_boundary)(seg_in_syl,val_cdr(p))) { sylitem = NULL; if (sssyl) item_set_string(sssyl,"stress",stress); } cst_free(phone_name); } if (!item_feat_present(item_parent(item_as(word, "Token")), "phones")) delete_val(phones); } return u; }
static void assign_stress(cst_item *word) { int numsyls=ffeature_int(word,"word_numsyls"); const char *gpos=ffeature_string(word,"gpos"); const cst_item *word_in_phrase=item_as(word,"Phrase"); const cst_item *syls=item_as(word,"SylStructure"); const cst_item *syl=item_daughter(syls); const cst_item *transcription=item_as(word,"Transcription"); int stressed=FALSE; int n=item_feat_present(word,"stressed_syl_num")?item_feat_int(word,"stressed_syl_num"):0; const char *name=item_name(word); const char *pname=ffeature_string(word,"R:Phrase.p.name"); const char *nname=ffeature_string(word,"R:Phrase.n.name"); if(cst_streq(ffeature_string(word,"R:Token.p.name"),"по")&& (cst_streq(name,"моему")||cst_streq(name,"своему")||cst_streq(name,"твоему"))) { item_set_string(syl,"stress","1"); return; } else if((cst_streq(name,"не")||cst_streq(name,"ни"))&& (cst_streq(nname,"был")||cst_streq(nname,"были")||cst_streq(nname,"было"))) { item_set_string(syl,"stress","1"); return; } else if((cst_streq(name,"был")||cst_streq(name,"были")||cst_streq(name,"было"))&& (cst_streq(pname,"не")||cst_streq(pname,"ни"))) return; else if(cst_streq(gpos,"enc")&&item_prev(word_in_phrase)) return; else if(cst_streq(gpos,"proc")&&item_next(word_in_phrase)) return; if(!vowel_seg_between(item_daughter(transcription),item_last_daughter(transcription))) return; if(numsyls==1) { item_set_string(syl,"stress","1"); return; } for(;syl;syl=item_next(syl)) { if(is_stressed_vowel(item_feat_string(item_daughter(item_as(syl,"SylVowel")),"name"))) { item_set_string(syl,"stress","1"); stressed=TRUE; } } if(n==0) { if(stressed) return; n=val_int(cart_interpret(word,&ru_stress_cart)); if((numsyls+n) < 0) { if(numsyls <= 4) n=-2; else if(numsyls <= 6) n=-3; else n=-4; } } item_set_string(item_nth_daughter(syls,(numsyls+n)),"stress","1"); }