static const cst_val *syl_break(const cst_item *syl) { /* Break level after this syllable */ cst_item *ss; ss = item_as(syl,"SylStructure"); if (ss == NULL) return VAL_STRING_1; /* hmm, no sylstructure */ else if (item_next(ss) != NULL) return VAL_STRING_0; /* word internal */ else if (item_parent(ss) == NULL) /* no parent */ return VAL_STRING_1; else return word_break(item_parent(ss)); }
static cst_val* word_to_phones(const cst_item *word) { cst_val*phones=NULL; const char *name=item_feat_string(word, "name"); ustring32_t letters=ustring32_alloc(0); if(letters==NULL) return NULL; ustring32_assign8(letters,(const uint8_t*)name); if(ustring32_empty(letters)) { ustring32_free(letters); return NULL; } unsigned int flags=classify_characters(ustring32_str(letters),ustring32_length(letters)); int variant=item_feat_int(item_parent(item_as(word,"Token")),"variant"); if((flags&cs_lc)&&cst_streq(ffeature_string(word,"gpos"),"content")) { if(variant==variant_pseudo_english) phones=ustring32_lts_apply(letters,&en_consonants_lts); else phones=ustring32_lts_apply(letters,&ru_consonants_lts); item_set_int(word,"no_vr",1); } else if((variant==variant_pseudo_english)&&(flags&cs_en)) { cst_val *en_phones=lex_lookup(en_lex,name,(cst_streq(name,"a")?"n":NULL)); if(en_phones) { phones=ru_lts_apply(en_phones,&ru_en_lts); delete_val(en_phones); } item_set_int(word,"no_pl",1); } else { const ru_dict_entry *e=bsearch(name,ru_dict,ru_dict_size,sizeof(ru_dict_entry),compare_entries); if(e!=NULL) { if(e->stress > 0) ustring32_set(letters,e->stress-1,1105); else item_set_int(word,"stressed_syl_num",e->stress); } phones=ustring32_lts_apply(letters,&ru_lts); } ustring32_free(letters); return phones; }
static const cst_val *word_punc(const cst_item *word) { cst_item *ww; const cst_val *v; ww = item_as(word,"Token"); if ((ww != NULL) && (item_next(ww) != 0)) v = &val_string_empty; else v = ffeature(item_parent(ww),"punc"); /* printf("word_punc word %s punc %s\n", item_feat_string(ww,"name"), val_string(v)); */ return v; }
static const cst_val *seg_onset_ctype(const cst_item *seg, const char *ctype) { const cst_item *s; const cst_phoneset *ps = item_phoneset(seg); for (s=item_daughter(item_parent(item_as(seg,"SylStructure"))); s; s=item_next(s)) { if (cst_streq("+",phone_feature_string(ps,item_feat_string(s,"name"), "vc"))) return VAL_STRING_0; if (cst_streq(ctype,phone_feature_string(ps,item_feat_string(s,"name"), "ctype"))) return VAL_STRING_1; } return VAL_STRING_0; }
static const cst_val *word_break(const cst_item *word) { cst_item *ww,*pp; const char *pname; ww = item_as(word,"Phrase"); if ((ww == NULL) || (item_next(ww) != 0)) return VAL_STRING_1; else { pp = item_parent(ww); pname = val_string(item_feat(pp,"name")); if (cst_streq("BB",pname)) return VAL_STRING_4; else if (cst_streq("B",pname)) return VAL_STRING_3; else return VAL_STRING_1; } }
cst_utterance *default_lexical_insertion(cst_utterance *u) { cst_item *word; cst_relation *sylstructure,*seg,*syl; cst_lexicon *lex; const cst_val *lex_addenda = NULL; const cst_val *p, *wp = NULL; char *phone_name; char *stress = "0"; const char *pos; cst_val *phones; cst_item *ssword, *sssyl, *segitem, *sylitem, *seg_in_syl; lex = val_lexicon(feat_val(u->features,"lexicon")); if (lex->lex_addenda) lex_addenda = lex->lex_addenda; syl = utt_relation_create(u,"Syllable"); sylstructure = utt_relation_create(u,"SylStructure"); seg = utt_relation_create(u,"Segment"); for (word=relation_head(utt_relation(u,"Word")); word; word=item_next(word)) { ssword = relation_append(sylstructure,word); pos = ffeature_string(word,"pos"); phones = NULL; wp = NULL; /* printf("awb_debug word %s pos %s gpos %s\n", item_feat_string(word,"name"), pos, ffeature_string(word,"gpos")); */ /* FIXME: need to make sure that textanalysis won't split tokens with explicit pronunciation (or that it will propagate such to words, then we can remove the path here) */ if (item_feat_present(item_parent(item_as(word, "Token")), "phones")) phones = (cst_val *) item_feat(item_parent(item_as(word, "Token")), "phones"); else { wp = val_assoc_string(item_feat_string(word, "name"),lex_addenda); if (wp) phones = (cst_val *)val_cdr(val_cdr(wp)); else phones = lex_lookup(lex,item_feat_string(word,"name"),pos); } for (sssyl=NULL,sylitem=NULL,p=phones; p; p=val_cdr(p)) { if (sylitem == NULL) { sylitem = relation_append(syl,NULL); sssyl = item_add_daughter(ssword,sylitem); stress = "0"; } segitem = relation_append(seg,NULL); phone_name = cst_strdup(val_string(val_car(p))); if (phone_name[cst_strlen(phone_name)-1] == '1') { stress = "1"; phone_name[cst_strlen(phone_name)-1] = '\0'; } else if (phone_name[cst_strlen(phone_name)-1] == '0') { stress = "0"; phone_name[cst_strlen(phone_name)-1] = '\0'; } item_set_string(segitem,"name",phone_name); seg_in_syl = item_add_daughter(sssyl,segitem); #if 0 printf("awb_debug ph %s\n",phone_name); #endif if ((lex->syl_boundary)(seg_in_syl,val_cdr(p))) { #if 0 printf("awb_debug SYL\n"); #endif sylitem = NULL; if (sssyl) item_set_string(sssyl,"stress",stress); } cst_free(phone_name); } if (!item_feat_present(item_parent(item_as(word, "Token")), "phones") && ! wp) delete_val(phones); } return u; }
cst_utterance *default_lexical_insertion(cst_utterance *u) { cst_item *word; cst_relation *sylstructure,*seg,*syl; cst_lexicon *lex, *ulex = NULL; const cst_val *p; char *phone_name; char *stress = "0"; cst_val *phones; cst_item *ssword, *sssyl, *segitem, *sylitem, *seg_in_syl; lex = val_lexicon(feat_val(u->features,"lexicon")); if (feat_present(u->features, "user_lexicon")) ulex = val_lexicon(feat_val(u->features, "user_lexicon")); syl = utt_relation_create(u,"Syllable"); sylstructure = utt_relation_create(u,"SylStructure"); seg = utt_relation_create(u,"Segment"); for (word=relation_head(utt_relation(u,"Word")); word; word=item_next(word)) { ssword = relation_append(sylstructure,word); phones = NULL; /* FIXME: need to make sure that textanalysis won't split tokens with explicit pronunciation (or that it will propagate such to words, then we can remove the path here) */ if (item_feat_present(item_parent(item_as(word, "Token")), "phones")) phones = (cst_val *) item_feat(item_parent(item_as(word, "Token")), "phones"); else { if (ulex) phones = lex_lookup(ulex,item_feat_string(word, "name"),0); if (phones == NULL) phones = lex_lookup(lex,item_feat_string(word,"name"),0); } for (sssyl=NULL,sylitem=NULL,p=phones; p; p=val_cdr(p)) { if (sylitem == NULL) { sylitem = relation_append(syl,NULL); sssyl = item_add_daughter(ssword,sylitem); stress = "0"; } segitem = relation_append(seg,NULL); phone_name = cst_strdup(val_string(val_car(p))); if (phone_name[strlen(phone_name)-1] == '1') { stress = "1"; phone_name[strlen(phone_name)-1] = '\0'; } else if (phone_name[strlen(phone_name)-1] == '0') { stress = "0"; phone_name[strlen(phone_name)-1] = '\0'; } item_set_string(segitem,"name",phone_name); seg_in_syl = item_add_daughter(sssyl,segitem); if ((lex->syl_boundary)(seg_in_syl,val_cdr(p))) { sylitem = NULL; if (sssyl) item_set_string(sssyl,"stress",stress); } cst_free(phone_name); } if (!item_feat_present(item_parent(item_as(word, "Token")), "phones")) delete_val(phones); } return u; }