int cmu_syl_boundary(const cst_item *i,const cst_val *v) { /* Returns TRUE if this should be a syllable boundary */ /* This is of course phone set dependent */ int p, n, nn; if (v == NULL) return TRUE; else if (cmu_is_silence(val_string(val_car(v)))) return TRUE; else if (!cmu_has_vowel_in_list(v)) /* no more vowels so rest *all* coda */ return FALSE; else if (!cmu_has_vowel_in_syl(i)) /* need a vowel */ return FALSE; else if (cmu_is_vowel(val_string(val_car(v)))) return TRUE; else if (val_cdr(v) == NULL) return FALSE; else { /* so there is following vowel, and multiple phones left */ p = cmu_sonority(item_feat_string(i,"name")); n = cmu_sonority(val_string(val_car(v))); nn = cmu_sonority(val_string(val_car(val_cdr(v)))); if ((p <= n) && (n <= nn)) return TRUE; else return FALSE; } }
static int cmu_has_vowel_in_syl(const cst_item *i) { const cst_item *n; for (n=i; n; n=item_prev(n)) if (cmu_is_vowel(item_feat_string(n,"name"))) return TRUE; return FALSE; }
static int cmu_has_vowel_in_list(const cst_val *v) { const cst_val *t; for (t=v; t; t=val_cdr(t)) if (cmu_is_vowel(val_string(val_car(v)))) return TRUE; return FALSE; }
static int cmulex_dist_to_vowel(const cst_val *rest) { if (rest == 0) return 0; /* shouldn't get here */ else if (cmu_is_vowel(val_string(val_car(rest)))) return 0; else return 1+cmulex_dist_to_vowel(val_cdr(rest)); }
static int cmu_sonority(const char *p) { /* A bunch of hacks for US English phoneset */ if (cmu_is_vowel(p) || (cmu_is_silence(p))) return 5; else if (strchr("wylr",p[0]) != NULL) return 4; /* glides/liquids */ else if (strchr("nm",p[0]) != NULL) return 3; /* nasals */ else if (strchr("bdgjlmnnnrvwyz",p[0]) != NULL) return 2; /* voiced obstruents */ else return 1; }
int cmu_syl_boundary_mo(const cst_item *i,const cst_val *rest) { /* syl boundary maximal onset */ int d2v; if (rest == NULL) return TRUE; else if (cmu_is_silence(val_string(val_car(rest)))) return TRUE; else if (!cmu_has_vowel_in_list(rest)) /* no more vowels so rest *all* coda */ return FALSE; else if (!cmu_has_vowel_in_syl(i)) /* need a vowel */ /* no vowel yet in syl so keep copying */ return FALSE; else if (cmu_is_vowel(val_string(val_car(rest)))) /* next is a vowel, syl has vowel, so this is a break */ return TRUE; else if (cst_streq("ng",val_string(val_car(rest)))) /* next is "ng" which can't start a word internal syl */ return FALSE; else { /* want to know if from rest to the next vowel is a valid onset */ d2v = cmulex_dist_to_vowel(rest); if (d2v < 2) return TRUE; else if (d2v > 3) return FALSE; else if (d2v == 2) return cmulex_onset_bigram(rest); else /* if (d2v == 3) */ return cmulex_onset_trigram(rest); return TRUE; } }