int default_utt_break(cst_tokenstream *ts, const char *token, cst_relation *tokens) { /* This is the default utt break functions, languages may override this */ /* This will be ok for some latin based languages */ const char *postpunct = item_feat_string(relation_tail(tokens), "punc"); const char *ltoken = item_name(relation_tail(tokens)); if (cst_strchr(ts->whitespace,'\n') != cst_strrchr(ts->whitespace,'\n')) /* contains two new lines */ return TRUE; else if (strchr(postpunct,':') || strchr(postpunct,'?') || strchr(postpunct,'!')) return TRUE; else if (strchr(postpunct,'.') && (cst_strlen(ts->whitespace) > 1) && strchr("ABCDEFGHIJKLMNOPQRSTUVWXYZ",token[0])) return TRUE; else if (strchr(postpunct,'.') && /* next word starts with a capital */ strchr("ABCDEFGHIJKLMNOPQRSTUVWXYZ",token[0]) && /* last word isn't an abbreviation */ !(strchr("ABCDEFGHIJKLMNOPQRSTUVWXYZ",ltoken[cst_strlen(ltoken)-1])|| ((cst_strlen(ltoken) < 4) && strchr("ABCDEFGHIJKLMNOPQRSTUVWXYZ",ltoken[0])))) return TRUE; else return FALSE; }
/* Dummy F0 modelling for phones, copied directly from us_f0_model.c */ cst_utterance *flat_prosody(cst_utterance *u) { /* F0 target model */ cst_item *s,*t; cst_relation *targ_rel; float mean, stddev; targ_rel = utt_relation_create(u,"Target"); mean = get_param_float(u->features,"target_f0_mean", 100.0); mean *= get_param_float(u->features,"f0_shift", 1.0); stddev = get_param_float(u->features,"target_f0_stddev", 12.0); s=relation_head(utt_relation(u,"Segment")); t = relation_append(targ_rel,NULL); item_set_float(t,"pos",0.0); item_set_float(t,"f0",mean+stddev); s=relation_tail(utt_relation(u,"Segment")); t = relation_append(targ_rel,NULL); item_set_float(t,"pos",item_feat_float(s,"end")); item_set_float(t,"f0",mean-stddev); return u; }
cst_utterance *russian_postlex_function(cst_utterance *u) { const cst_item *word,*seg; const char *answer,*name,*pair; for(word=relation_head(utt_relation(u,"Transcription"));word;word=item_next(word)) { if(item_feat_present(word,"no_pl")||item_feat_present(word,"no_vr")) continue; for(seg=item_daughter(word);seg;seg=item_next(seg)) { name=item_feat_string(seg,"name"); if(cst_member_string(name,unstressed_vowels)) { answer=val_string(cart_interpret(item_as(seg,"Segment"),&ru_vowel_reduction_cart)); if(!cst_streq(answer,"N")) item_set_string(seg,"name",answer); } else { if(cst_streq(name,"ii")&& cst_streq(ffeature_string(seg,"R:Segment.p.ph_csoft"),"-")&& !(cst_streq(item_feat_string(word,"name"),"и")&& cst_streq(ffeature_string(word,"gpos"),"content"))) { item_set_string(seg,"name","yy"); } } } } for(word=relation_tail(utt_relation(u,"Transcription"));word;word=item_prev(word)) { if(item_feat_present(word,"no_pl")) continue; for(seg=item_last_daughter(word);seg;seg=item_prev(seg)) { name=item_feat_string(seg,"name"); pair=russian_vpair(name); if(pair!=NULL) { answer=val_string(cart_interpret(item_as(seg,"Segment"),&ru_vpair_cart)); if(cst_streq(answer,"Y")) item_set_string(seg,"name",pair); } } } return u; }