cst_val *en_exp_real(const char *numstring) { char *aaa, *p; cst_val *r; if (numstring && (numstring[0] == '-')) r = cons_val(string_val("minus"), en_exp_real(&numstring[1])); else if (numstring && (numstring[0] == '+')) r = cons_val(string_val("plus"), en_exp_real(&numstring[1])); else if (((p=strchr(numstring,'e')) != 0) || ((p=strchr(numstring,'E')) != 0)) { aaa = cst_strdup(numstring); aaa[cst_strlen(numstring)-cst_strlen(p)] = '\0'; r = val_append(en_exp_real(aaa), cons_val(string_val("e"), en_exp_real(p+1))); cst_free(aaa); } else if ((p=strchr(numstring,'.')) != 0) { aaa = cst_strdup(numstring); aaa[cst_strlen(numstring)-cst_strlen(p)] = '\0'; r = val_append(en_exp_number(aaa), cons_val(string_val("point"), en_exp_digits(p+1))); cst_free(aaa); } else r = en_exp_number(numstring); /* I don't think you can get here */ return r; }
char *bard_token_shorten(const char *name,int size) { /* Shortens the given string to given size putting "..." in the middle */ char *short_string; int sl; int i,m; sl = cst_strlen(name); if (sl <= size) return cst_strdup(name); if (size < 6) return cst_strdup("****"); short_string = cst_alloc(char,size+1); m = (size-3)/2; for (i=0; i<m; i++) short_string[i] = name[i]; short_string[i] = '.'; i++; short_string[i] = '.'; i++; short_string[i] = '.'; i++; for ( ; i<size; i++) short_string[i] = name[(sl-m)+(i-(m+3))]; short_string[i] = '\0'; return short_string; }
static char *__VOICENAME___unit_name(cst_item *s) { const char *name; /* This *is* long enough as long as you don't change external things */ char cname[30]; name = flite_ffeature_string(s,"name"); /* Comment this out if you have more complex unit names */ #if 1 if (1 == 1) return cst_strdup(name); else #endif if (cst_streq("+",flite_ffeature_string(s,"ph_vc"))) { cst_sprintf(cname,"%s_%s_%s",name, flite_ffeature_string(s,"R:SylStructure.parent.stress"), __VOICENAME___nextvoicing(s)); } else { cst_sprintf(cname,"%s_%s",name, __VOICENAME___nextvoicing(s)); } return cst_strdup(cname); }
bard_window *bard_make_help_window(bard_reader *br) { bard_window *hw; int indent = 10; char *bhs; hw = bard_window_new("help", br->display->screen_width-(2*indent), br->display->screen_height-(2*indent), br->display->screen->format); hw->x_offset = indent; hw->y_offset = indent; hw->font_name = cst_strdup(get_param_string(br->config,"-font",BARD_DEFAULT_FONT)); hw->font_size = get_param_int(br->config,"-help_font_size", get_param_int(br->config,"-font_size", BARD_DEFAULT_FONT_SIZE)); /* We assume we can open this font, as if we couldn't when initializing */ /* the display window, we'd have failed by now */ hw->font = bard_window_open_font(hw->font_name,hw->font_size); hw->background_color = bard_color_get(br->colors, get_param_string(br->config,"-help_background_color","cornsilk")); hw->foreground_color = bard_color_get(br->colors, get_param_string(br->config,"-help_foreground_color","steelblue")); hw->highlight_color = bard_color_get(br->colors, get_param_string(br->config,"-help_highlight_color","blue")); hw->tm=10; hw->bm=10; hw->tlm=10; hw->trm=10; bhs = cst_alloc(char,cst_strlen(bard_help_string)+ cst_strlen(BARD_PROJECT_VERSION)+ cst_strlen(BARD_PROJECT_STATE)+ cst_strlen(BARD_PROJECT_DATE)+1); cst_sprintf(bhs,bard_help_string,BARD_PROJECT_VERSION, BARD_PROJECT_STATE, BARD_PROJECT_DATE); hw->ts = ts_open_string(bhs, cst_ts_default_whitespacesymbols, "", cst_ts_default_prepunctuationsymbols, cst_ts_default_postpunctuationsymbols); hw->ts_mode = "literal"; cst_free(bhs); hw->update = bard_window_help_update; /* Put something on the screen */ bard_window_display_from_pos(hw,0); return hw; }
static void ef_set(cst_features *f,const char *fv,const char *type) { /* set feature from fv (F=V), guesses type if not explicit type given */ const char *val; char *feat; if ((val = strchr(fv,'=')) == 0) { fprintf(stderr, "flite: can't find '=' in featval \"%s\", ignoring it\n", fv); } else { feat = cst_strdup(fv); feat[strlen(fv)-strlen(val)] = '\0'; val = val+1; if ((type && cst_streq("int",type)) || ((type == 0) && (cst_regex_match(cst_rx_int,val)))) feat_set_int(f,feat,atoi(val)); else if ((type && cst_streq("float",type)) || ((type == 0) && (cst_regex_match(cst_rx_double,val)))) feat_set_float(f,feat,atof(val)); else feat_set_string(f,feat,val); /* I don't free feat, because feats think featnames are const */ /* which is true except in this particular case */ } }
static void ef_set(cst_features *f,const char *fv,const char *type) { /* set feature from fv (F=V), guesses type if not explicit type given */ const char *val; char *feat; if ((val = strchr(fv,'=')) == 0) { fprintf(stderr, "flite: can't find '=' in featval \"%s\", ignoring it\n", fv); } else { feat = cst_strdup(fv); feat[cst_strlen(fv)-cst_strlen(val)] = '\0'; val = val+1; if ((type && cst_streq("int",type)) || ((type == 0) && (cst_regex_match(cst_rx_int,val)))) feat_set_int(f,feat,atoi(val)); else if ((type && cst_streq("float",type)) || ((type == 0) && (cst_regex_match(cst_rx_double,val)))) feat_set_float(f,feat,atof(val)); else feat_set_string(f,feat,val); cst_free(feat); } }
static void parse_description(const char *description, cst_features *f) { /* parse the description into something more usable */ cst_tokenstream *ts; const char *arg; char *op; const char *xop; ts = ts_open_string(description, " \t\r\n", /* whitespace */ "{}[]|", /* singlecharsymbols */ "", /* prepunctuation */ ""); /* postpunctuation */ while (!ts_eof(ts)) { op = cst_strdup(ts_get(ts)); if ((op[0] == '-') && (cst_strchr(ts->whitespace,'\n') != 0)) { /* got an option */ xop = feat_own_string(f,op); arg = ts_get(ts); if (arg[0] == '<') feat_set_string(f,xop,arg); else feat_set_string(f,xop,"<binary>"); } cst_free(op); } ts_close(ts); }
static void parse_description(const char *description, cst_features *f) { /* parse the description into something more usable */ cst_tokenstream *ts; const char *arg; char *op; ts = ts_open_string(description); ts->whitespacesymbols = " \t\r\n"; ts->singlecharsymbols = "{}[]|"; ts->prepunctuationsymbols = ""; ts->postpunctuationsymbols = ""; while (!ts_eof(ts)) { op = cst_strdup(ts_get(ts)); if ((op[0] == '-') && (strchr(ts->whitespace,'\n') != 0)) { /* got an option */ arg = ts_get(ts); if (arg[0] == '<') feat_set_string(f,op,arg); else feat_set_string(f,op,"<binary>"); } } ts_close(ts); }
bard_token *bard_token_new(const char *name) { bard_token *token = cst_alloc(bard_token,1); token->token = cst_strdup(name); return token; }
cst_val *string_val(const char *s) { cst_val *v = new_val(); CST_VAL_TYPE(v) = CST_VAL_TYPE_STRING; /* would be nice to note if this is a deletable string or not */ CST_VAL_STRING_LVAL(v) = cst_strdup(s); return v; }
char *cst_string_before(const char *s,const char *c) { char *p; char *q; p = (char *)cst_strstr(s,c); if (p == NULL) return NULL; q = (char *)cst_strdup((unsigned char *)s); q[strlen(s)-strlen(p)] = '\0'; return q; }
char *cst_downcase(const char *str) { char *dc; int i; dc = cst_strdup(str); for (i=0; str[i] != '\0'; i++) { if (isupper((int)str[i])) dc[i] = tolower((int)str[i]); } return dc; }
char *cst_upcase(const char *str) { char *uc; int i; uc = cst_strdup(str); for (i=0; str[i] != '\0'; i++) { if (islower((int)str[i])) uc[i] = toupper((int)str[i]); } return uc; }
cst_val *en_exp_ordinal(const char *rawnumstring) { /* return ordinal for digit string */ cst_val *card, *o; const cst_val *t; const char *l; const char *ord; char *numstring; int i,j; numstring = cst_strdup(rawnumstring); for (j=i=0; i < cst_strlen(rawnumstring); i++) if (rawnumstring[i] != ',') { numstring[j] = rawnumstring[i]; j++; } numstring[j] = '\0'; card = val_reverse(en_exp_number(numstring)); cst_free(numstring); l = val_string(val_car(card)); ord = 0; for (i=0; i<10; i++) if (cst_streq(l,digit2num[i])) ord = ord2num[i]; if (!ord) for (i=0; i<10; i++) if (cst_streq(l,digit2teen[i])) ord = ord2teen[i]; if (!ord) for (i=0; i<10; i++) if (cst_streq(l,digit2enty[i])) ord = ord2enty[i]; if (cst_streq(l,"hundred")) ord = "hundredth"; if (cst_streq(l,"thousand")) ord = "thousandth"; if (cst_streq(l,"billion")) ord = "billtionth"; if (!ord) /* dunno, so don't convert anything */ return card; o = cons_val(string_val(ord),0); for (t=val_cdr(card); t; t=val_cdr(t)) o = cons_val(val_car(t),o); delete_val(card); return o; }
cst_tokenstream *ts_open_string(const cst_string *string, const cst_string *whitespace, const cst_string *singlechars, const cst_string *prepunct, const cst_string *postpunct) { cst_tokenstream *ts = new_tokenstream(whitespace, singlechars, prepunct, postpunct); ts->string_buffer = cst_strdup(string); ts_getc(ts); return ts; }
static cst_utterance *tokentosegs(cst_utterance *u) { cst_item *t; cst_relation *seg, *syl, *sylstructure, *word; cst_item *sylitem, *sylstructureitem, *worditem, *sssyl; cst_phoneset *ps; ps = val_phoneset(utt_feat_val(u, "phoneset")); /* Just copy tokens into the Segment relation */ seg = utt_relation_create(u, "Segment"); syl = utt_relation_create(u, "Syllable"); word = utt_relation_create(u, "Word"); sylstructure = utt_relation_create(u, "SylStructure"); sssyl = sylitem = worditem = sylstructureitem = 0; for (t = relation_head(utt_relation(u, "Token")); t; t = item_next(t)) { cst_item *segitem = relation_append(seg, NULL); char const *pname = item_feat_string(t, "name"); char *name = cst_strdup(pname); if (worditem == 0) { worditem = relation_append(word,NULL); item_set_string(worditem, "name", "phonestring"); sylstructureitem = relation_append(sylstructure,worditem); } if (sylitem == 0) { sylitem = relation_append(syl,NULL); sssyl = item_add_daughter(sylstructureitem,sylitem); } if (name[cst_strlen(name)-1] == '1') { item_set_string(sssyl,"stress","1"); name[cst_strlen(name)-1] = '\0'; } else if (name[cst_strlen(name)-1] == '0') { item_set_string(sssyl,"stress","0"); name[cst_strlen(name)-1] = '\0'; } if (cst_streq(name,"-")) { sylitem = 0; /* syllable break */ } else if (phone_id(ps, name) == -1) { cst_errmsg("Phone `%s' not in phoneset\n", pname); cst_error(); } else { item_add_daughter(sssyl,segitem); item_set_string(segitem, "name", name); } cst_free(name); } return u; }
cst_utterance *default_lexical_insertion(cst_utterance *u) { cst_item *word; cst_relation *sylstructure,*seg,*syl; cst_lexicon *lex; const cst_val *lex_addenda = NULL; const cst_val *p, *wp = NULL; char *phone_name; char *stress = "0"; const char *pos; cst_val *phones; cst_item *ssword, *sssyl, *segitem, *sylitem, *seg_in_syl; lex = val_lexicon(feat_val(u->features,"lexicon")); if (lex->lex_addenda) lex_addenda = lex->lex_addenda; syl = utt_relation_create(u,"Syllable"); sylstructure = utt_relation_create(u,"SylStructure"); seg = utt_relation_create(u,"Segment"); for (word=relation_head(utt_relation(u,"Word")); word; word=item_next(word)) { ssword = relation_append(sylstructure,word); pos = ffeature_string(word,"pos"); phones = NULL; wp = NULL; /* printf("awb_debug word %s pos %s gpos %s\n", item_feat_string(word,"name"), pos, ffeature_string(word,"gpos")); */ /* FIXME: need to make sure that textanalysis won't split tokens with explicit pronunciation (or that it will propagate such to words, then we can remove the path here) */ if (item_feat_present(item_parent(item_as(word, "Token")), "phones")) phones = (cst_val *) item_feat(item_parent(item_as(word, "Token")), "phones"); else { wp = val_assoc_string(item_feat_string(word, "name"),lex_addenda); if (wp) phones = (cst_val *)val_cdr(val_cdr(wp)); else phones = lex_lookup(lex,item_feat_string(word,"name"),pos); } for (sssyl=NULL,sylitem=NULL,p=phones; p; p=val_cdr(p)) { if (sylitem == NULL) { sylitem = relation_append(syl,NULL); sssyl = item_add_daughter(ssword,sylitem); stress = "0"; } segitem = relation_append(seg,NULL); phone_name = cst_strdup(val_string(val_car(p))); if (phone_name[cst_strlen(phone_name)-1] == '1') { stress = "1"; phone_name[cst_strlen(phone_name)-1] = '\0'; } else if (phone_name[cst_strlen(phone_name)-1] == '0') { stress = "0"; phone_name[cst_strlen(phone_name)-1] = '\0'; } item_set_string(segitem,"name",phone_name); seg_in_syl = item_add_daughter(sssyl,segitem); #if 0 printf("awb_debug ph %s\n",phone_name); #endif if ((lex->syl_boundary)(seg_in_syl,val_cdr(p))) { #if 0 printf("awb_debug SYL\n"); #endif sylitem = NULL; if (sssyl) item_set_string(sssyl,"stress",stress); } cst_free(phone_name); } if (!item_feat_present(item_parent(item_as(word, "Token")), "phones") && ! wp) delete_val(phones); } return u; }
cst_val *cst_lex_make_entry(const cst_lexicon *lex, const cst_string *entry) { /* if replace then replace entry in addenda of lex with entry */ /* else append entry to addenda of lex */ cst_tokenstream *e; cst_val *phones = NULL; cst_val *ventry; const cst_string *w, *p; cst_string *word; cst_string *pos; int i; e = ts_open_string(entry, cst_ts_default_whitespacesymbols, "","",""); w = ts_get(e); if (w[0] == '"') /* it was a quoted entry */ { /* so reparse it */ ts_close(e); e = ts_open_string(entry, cst_ts_default_whitespacesymbols, "","",""); w = ts_get_quoted_token(e,'"','\\'); } word = cst_strdup(w); p = ts_get(e); if (!cst_streq(":",p)) /* there is a real pos */ { pos = cst_strdup(p); p = ts_get(e); if (!cst_streq(":",p)) /* there is a real pos */ { cst_fprintf(stdout,"add_addenda: lex %s: expected \":\" in %s\n", lex->name, word); cst_free(word); cst_free(pos); ts_close(e); return NULL; } } else pos = cst_strdup("nil"); while (!ts_eof(e)) { p = ts_get(e); /* Check its a legal phone */ for (i=0; lex->phone_table[i]; i++) { if (cst_streq(p,lex->phone_table[i])) break; } if (cst_streq("#",p)) /* comment to end of line */ break; else if (cst_streq("",p)) /* trailing ws at eoln causes this */ break; else if (lex->phone_table[i]) /* Only add it if its a valid phone */ phones = cons_val(string_val(p),phones); else { cst_fprintf(stdout,"add_addenda: lex: %s word %s phone %s not in lexicon phoneset\n", lex->name, word, p); } } ventry = cons_val(string_val(word),cons_val(string_val(pos), val_reverse(phones))); cst_free(word); cst_free(pos); ts_close(e); #if 0 printf("entry: "); val_print(stdout,ventry); printf("\n"); #endif return ventry; }
cst_file cst_url_open(const char *url) { /* Always opens it for reading */ cst_tokenstream *urlts; const cst_string *protocol; int port; cst_string *host; int fd; char *url_request; char *path; cst_file ofd; int state,n; char c; urlts = ts_open_string(url, "", ":/", "", ""); protocol = ts_get(urlts); if (cst_streq(protocol,"http")) { #ifdef CST_NO_SOCKETS ts_close(urlts); return NULL; #else if (!cst_streq(ts_get(urlts),":") || !cst_streq(ts_get(urlts),"/") || !cst_streq(ts_get(urlts),"/")) { ts_close(urlts); return NULL; } host = cst_strdup(ts_get(urlts)); if (cst_streq(ts_get(urlts),":")) port = (int)cst_atof(ts_get(urlts)); else port = 80; /* Open port to web server */ fd = cst_socket_open(host,port); if (fd < 0) { cst_free(host); ts_close(urlts); return NULL; } url_request = cst_alloc(char,cst_strlen(url)+17); cst_sprintf(url_request,"GET %s HTTP/1.2\n\n",url); n = write(fd,url_request,cst_strlen(url_request)); cst_free(url_request); /* Skip http header -- until \n\n */ state=0; while (state != 4) { n=read(fd,&c,1); if (n == 0) { /* eof or link gone down */ cst_free(host); ts_close(urlts); return NULL; } if ((state == 0) && (c == '\r')) state=1; else if ((state == 1) && (c == '\n')) state=2; else if ((state == 2) && (c == '\r')) state=3; else if ((state == 3) && (c == '\n')) state=4; /* Not sure you can get no CRs in the stream */ else if ((state == 0) && (c == '\n')) state=2; else if ((state == 2) && (c == '\n')) state=4; else state = 0; } ofd = fdopen(fd,"rb"); ts_close(urlts); cst_free(host); return ofd; #endif }
cst_utterance *default_lexical_insertion(cst_utterance *u) { cst_item *word; cst_relation *sylstructure,*seg,*syl; cst_lexicon *lex, *ulex = NULL; const cst_val *p; char *phone_name; char *stress = "0"; cst_val *phones; cst_item *ssword, *sssyl, *segitem, *sylitem, *seg_in_syl; lex = val_lexicon(feat_val(u->features,"lexicon")); if (feat_present(u->features, "user_lexicon")) ulex = val_lexicon(feat_val(u->features, "user_lexicon")); syl = utt_relation_create(u,"Syllable"); sylstructure = utt_relation_create(u,"SylStructure"); seg = utt_relation_create(u,"Segment"); for (word=relation_head(utt_relation(u,"Word")); word; word=item_next(word)) { ssword = relation_append(sylstructure,word); phones = NULL; /* FIXME: need to make sure that textanalysis won't split tokens with explicit pronunciation (or that it will propagate such to words, then we can remove the path here) */ if (item_feat_present(item_parent(item_as(word, "Token")), "phones")) phones = (cst_val *) item_feat(item_parent(item_as(word, "Token")), "phones"); else { if (ulex) phones = lex_lookup(ulex,item_feat_string(word, "name"),0); if (phones == NULL) phones = lex_lookup(lex,item_feat_string(word,"name"),0); } for (sssyl=NULL,sylitem=NULL,p=phones; p; p=val_cdr(p)) { if (sylitem == NULL) { sylitem = relation_append(syl,NULL); sssyl = item_add_daughter(ssword,sylitem); stress = "0"; } segitem = relation_append(seg,NULL); phone_name = cst_strdup(val_string(val_car(p))); if (phone_name[strlen(phone_name)-1] == '1') { stress = "1"; phone_name[strlen(phone_name)-1] = '\0'; } else if (phone_name[strlen(phone_name)-1] == '0') { stress = "0"; phone_name[strlen(phone_name)-1] = '\0'; } item_set_string(segitem,"name",phone_name); seg_in_syl = item_add_daughter(sssyl,segitem); if ((lex->syl_boundary)(seg_in_syl,val_cdr(p))) { sylitem = NULL; if (sssyl) item_set_string(sssyl,"stress",stress); } cst_free(phone_name); } if (!item_feat_present(item_parent(item_as(word, "Token")), "phones")) delete_val(phones); } return u; }