int relation_load(cst_relation *r, const char *filename) { cst_tokenstream *fd; cst_item *item; const char *token=0; if ((fd = ts_open(filename,NULL,";","","")) == 0) { cst_errmsg("relation_load: can't open file \"%s\" for reading\n", filename); return CST_ERROR_FORMAT; } for ( ; !ts_eof(fd); ) { token = ts_get(fd); if (cst_streq("#",token)) break; } #import "OpenEarsStaticAnalysisToggle.h" #ifdef STATICANALYZEDEPENDENCIES #define __clang_analyzer__ 1 #endif #if !defined(__clang_analyzer__) || defined(STATICANALYZEDEPENDENCIES) #undef __clang_analyzer__ if (!cst_streq("#",token)) #endif { cst_errmsg("relation_load: no end of header marker in \"%s\"\n", filename); ts_close(fd); return CST_ERROR_FORMAT; } while (!ts_eof(fd)) { token = ts_get(fd); if (cst_streq(token,"")) continue; item = relation_append(r,NULL); item_set_float(item,"end",(float)cst_atof(token)); #import "OpenEarsStaticAnalysisToggle.h" #ifdef STATICANALYZEDEPENDENCIES #define __clang_analyzer__ 1 #endif #if !defined(__clang_analyzer__) || defined(STATICANALYZEDEPENDENCIES) #undef __clang_analyzer__ token = ts_get(fd); #endif token = ts_get(fd); item_set_string(item,"name",token); } ts_close(fd); return CST_OK_FORMAT; }
int relation_load(cst_relation *r, const char *filename) { cst_tokenstream *fd; cst_item *item; const char *token=0; if ((fd = ts_open(filename,NULL,";","","")) == 0) { cst_errmsg("relation_load: can't open file \"%s\" for reading\n", filename); return CST_ERROR_FORMAT; } for ( ; !ts_eof(fd); ) { token = ts_get(fd); if (cst_streq("#",token)) break; } if (!cst_streq("#",token)) { cst_errmsg("relation_load: no end of header marker in \"%s\"\n", filename); ts_close(fd); return CST_ERROR_FORMAT; } while (!ts_eof(fd)) { token = ts_get(fd); if (cst_streq(token,"")) continue; item = relation_append(r,NULL); item_set_float(item,"end",(float)cst_atof(token)); token = ts_get(fd); token = ts_get(fd); item_set_string(item,"name",token); } ts_close(fd); return CST_OK_FORMAT; }
cst_utterance *default_tokenization(cst_utterance *u) { const char *text,*token; cst_tokenstream *fd; cst_item *t; cst_relation *r; text = utt_input_text(u); r = utt_relation_create(u,"Token"); fd = ts_open_string(text, get_param_string(u->features,"text_whitespace",NULL), get_param_string(u->features,"text_singlecharsymbols",NULL), get_param_string(u->features,"text_prepunctuation",NULL), get_param_string(u->features,"text_postpunctuation",NULL)); while(!ts_eof(fd)) { token = ts_get(fd); if (cst_strlen(token) > 0) { t = relation_append(r,NULL); item_set_string(t,"name",token); item_set_string(t,"whitespace",fd->whitespace); item_set_string(t,"prepunctuation",fd->prepunctuation); item_set_string(t,"punc",fd->postpunctuation); item_set_int(t,"file_pos",fd->file_pos); item_set_int(t,"line_number",fd->line_number); } } ts_close(fd); return u; }
static void add_raw_data(cst_utterance *u, const char *raw_data, cst_features *attributes) { /* Add all tokens in raw _data to u */ cst_tokenstream *ts; cst_relation *r; cst_item *t; const char *token; r = utt_relation_create(u,"Token"); ts = ts_open_string(raw_data, get_param_string(u->features,"text_whitespace",NULL), get_param_string(u->features,"text_singlecharsymbols",NULL), get_param_string(u->features,"text_prepunctuation",NULL), get_param_string(u->features,"text_pospunctuation",NULL)); while (!(ts_eof(ts))) { t = relation_append(r,NULL); feat_copy_into(item_feats(t),attributes); token = ts_get(ts); if (cst_strlen(token) > 0) { t = relation_append(r,NULL); item_set_string(t,"name",token); item_set_string(t,"whitespace",ts->whitespace); item_set_string(t,"prepunctuation",ts->prepunctuation); item_set_string(t,"punc",ts->postpunctuation); } } }
static void parse_description(const char *description, cst_features *f) { /* parse the description into something more usable */ cst_tokenstream *ts; const char *arg; char *op; const char *xop; ts = ts_open_string(description, " \t\r\n", /* whitespace */ "{}[]|", /* singlecharsymbols */ "", /* prepunctuation */ ""); /* postpunctuation */ while (!ts_eof(ts)) { op = cst_strdup(ts_get(ts)); if ((op[0] == '-') && (cst_strchr(ts->whitespace,'\n') != 0)) { /* got an option */ xop = feat_own_string(f,op); arg = ts_get(ts); if (arg[0] == '<') feat_set_string(f,xop,arg); else feat_set_string(f,xop,"<binary>"); } cst_free(op); } ts_close(ts); }
static int bbb_relation_load(cst_relation *r,const char *filename) { const char *token; cst_item *item; cst_tokenstream *fd; fd = ts_open(filename); if (fd == 0) return 0; while (!ts_eof(fd)) { token = ts_get(fd); if (cst_streq(token,"")) continue; item = relation_append(r,NULL); item_set_string(item,"name",token); item_set_string(item,"whitespace",fd->whitespace); item_set_string(item,"prepunctuation",fd->prepunctuation); item_set_string(item,"punc",fd->postpunctuation); item_set_int(item,"file_pos",fd->file_pos); item_set_int(item,"line_number",fd->line_number); } ts_close(fd); return 1; }
static void parse_description(const char *description, cst_features *f) { /* parse the description into something more usable */ cst_tokenstream *ts; const char *arg; char *op; ts = ts_open_string(description); ts->whitespacesymbols = " \t\r\n"; ts->singlecharsymbols = "{}[]|"; ts->prepunctuationsymbols = ""; ts->postpunctuationsymbols = ""; while (!ts_eof(ts)) { op = cst_strdup(ts_get(ts)); if ((op[0] == '-') && (strchr(ts->whitespace,'\n') != 0)) { /* got an option */ arg = ts_get(ts); if (arg[0] == '<') feat_set_string(f,op,arg); else feat_set_string(f,op,"<binary>"); } } ts_close(ts); }
static int load_frame_ascii(cst_track *t, int i, cst_tokenstream *ts) { int j; t->times[i] = cst_atof(ts_get(ts)); ts_get(ts); /* the can be only 1 */ for (j = 0; j < t->num_channels; j++) t->frames[i][j] = cst_atof(ts_get(ts)); if ((i + 1 < t->num_frames) && (ts_eof(ts))) { return -1; } return 0; }
static cst_features *ssml_get_attributes(cst_tokenstream *ts) { cst_features *a = new_features(); const char* name, *val; set_charclasses(ts, ts->p_whitespacesymbols, ssml_singlecharsymbols_inattr, ts->p_prepunctuationsymbols, ts->p_postpunctuationsymbols); name = ts_get(ts); while (!cst_streq(">",name)) { if (cst_streq(name,"/")) feat_set_string(a,"_type","startend"); else { feat_set_string(a,"_type","start"); feat_set_string(a,"_name0",name); if (cst_streq("=",ts_get(ts))) { val = ts_get_quoted_remainder(ts); feat_set_string(a,"_val0",val); } } if (ts_eof(ts)) { fprintf(stderr,"ssml: unexpected EOF\n"); delete_features(a); return 0; } name = ts_get(ts); } set_charclasses(ts, ts->p_whitespacesymbols, ssml_singlecharsymbols_general, ts->p_prepunctuationsymbols, ts->p_postpunctuationsymbols); return a; }
cst_val *cst_lex_load_addenda(const cst_lexicon *lex, const char *lexfile) { /* Load an addend from given file, check its phones wrt lex */ cst_tokenstream *lf; const cst_string *line; cst_val *e = NULL; cst_val *na = NULL; int i; lf = ts_open(lexfile,"\n","","",""); if (lf == NULL) { cst_errmsg("lex_add_addenda: cannot open lexicon file\n"); return NULL;; } while (!ts_eof(lf)) { line = ts_get(lf); if (line[0] == '#') continue; /* a comment */ for (i=0; line[i]; i++) { if (line[i] != ' ') break; } if (line[i]) { e = cst_lex_make_entry(lex,line); if (e) na = cons_val(e,na); } else continue; /* a blank line */ } ts_close(lf); return val_reverse(na); }
float flite_file_to_speech(const char *filename, cst_voice *voice, const char *outtype) { cst_utterance *utt; cst_tokenstream *ts; const char *token; cst_item *t; cst_relation *tokrel; float d, durs = 0; int num_tokens; cst_breakfunc breakfunc = default_utt_break; if ((ts = ts_open(filename, get_param_string(voice->features,"text_whitespace",NULL), get_param_string(voice->features,"text_singlecharsymbols",NULL), get_param_string(voice->features,"text_prepunctuation",NULL), get_param_string(voice->features,"text_postpunctuation",NULL))) == NULL) { cst_errmsg("failed to open file \"%s\" for reading\n", filename); return 1; } if (feat_present(voice->features,"utt_break")) breakfunc = val_breakfunc(feat_val(voice->features,"utt_break")); /* If its a file to write to delete it as we're going to */ /* incrementally append to it */ if (!cst_streq(outtype,"play") && !cst_streq(outtype,"none")) { cst_wave *w; w = new_wave(); cst_wave_resize(w,0,1); cst_wave_set_sample_rate(w,16000); cst_wave_save_riff(w,outtype); /* an empty wave */ delete_wave(w); } num_tokens = 0; utt = new_utterance(); tokrel = utt_relation_create(utt, "Token"); while (!ts_eof(ts) || num_tokens > 0) { token = ts_get(ts); if ((strlen(token) == 0) || (num_tokens > 500) || /* need an upper bound */ (relation_head(tokrel) && breakfunc(ts,token,tokrel))) { /* An end of utt */ d = flite_tokens_to_speech(utt,voice,outtype); utt = NULL; if (d < 0) goto out; durs += d; if (ts_eof(ts)) goto out; utt = new_utterance(); tokrel = utt_relation_create(utt, "Token"); num_tokens = 0; } num_tokens++; t = relation_append(tokrel, NULL); item_set_string(t,"name",token); item_set_string(t,"whitespace",ts->whitespace); item_set_string(t,"prepunctuation",ts->prepunctuation); item_set_string(t,"punc",ts->postpunctuation); item_set_int(t,"file_pos",ts->file_pos); item_set_int(t,"line_number",ts->line_number); } out: delete_utterance(utt); ts_close(ts); return durs; }
cst_val *cst_lex_make_entry(const cst_lexicon *lex, const cst_string *entry) { /* if replace then replace entry in addenda of lex with entry */ /* else append entry to addenda of lex */ cst_tokenstream *e; cst_val *phones = NULL; cst_val *ventry; const cst_string *w, *p; cst_string *word; cst_string *pos; int i; e = ts_open_string(entry, cst_ts_default_whitespacesymbols, "","",""); w = ts_get(e); if (w[0] == '"') /* it was a quoted entry */ { /* so reparse it */ ts_close(e); e = ts_open_string(entry, cst_ts_default_whitespacesymbols, "","",""); w = ts_get_quoted_token(e,'"','\\'); } word = cst_strdup(w); p = ts_get(e); if (!cst_streq(":",p)) /* there is a real pos */ { pos = cst_strdup(p); p = ts_get(e); if (!cst_streq(":",p)) /* there is a real pos */ { cst_fprintf(stdout,"add_addenda: lex %s: expected \":\" in %s\n", lex->name, word); cst_free(word); cst_free(pos); ts_close(e); return NULL; } } else pos = cst_strdup("nil"); while (!ts_eof(e)) { p = ts_get(e); /* Check its a legal phone */ for (i=0; lex->phone_table[i]; i++) { if (cst_streq(p,lex->phone_table[i])) break; } if (cst_streq("#",p)) /* comment to end of line */ break; else if (cst_streq("",p)) /* trailing ws at eoln causes this */ break; else if (lex->phone_table[i]) /* Only add it if its a valid phone */ phones = cons_val(string_val(p),phones); else { cst_fprintf(stdout,"add_addenda: lex: %s word %s phone %s not in lexicon phoneset\n", lex->name, word, p); } } ventry = cons_val(string_val(word),cons_val(string_val(pos), val_reverse(phones))); cst_free(word); cst_free(pos); ts_close(e); #if 0 printf("entry: "); val_print(stdout,ventry); printf("\n"); #endif return ventry; }
static float flite_ssml_to_speech_ts(cst_tokenstream *ts, cst_voice *voice, const char *outtype) { cst_features *ssml_feats, *ssml_word_feats; cst_features *attributes; const char *token; char *tag; cst_utterance *utt; cst_relation *tokrel; int num_tokens; cst_breakfunc breakfunc = default_utt_break; cst_uttfunc utt_user_callback = 0; float durs = 0.0; cst_item *t; ssml_feats = new_features(); ssml_word_feats = new_features(); set_charclasses(ts, " \t\n\r", ssml_singlecharsymbols_general, get_param_string(voice->features,"text_prepunctuation",""), get_param_string(voice->features,"text_postpunctuation","") ); if (feat_present(voice->features,"utt_break")) breakfunc = val_breakfunc(feat_val(voice->features,"utt_break")); if (feat_present(voice->features,"utt_user_callback")) utt_user_callback = val_uttfunc(feat_val(voice->features,"utt_user_callback")); num_tokens = 0; utt = new_utterance(); tokrel = utt_relation_create(utt, "Token"); while (!ts_eof(ts) || num_tokens > 0) { token = ts_get(ts); if (cst_streq("<",token)) { /* A tag */ tag = cst_upcase(ts_get(ts)); if (cst_streq("/",tag)) /* an end tag */ { tag = cst_upcase(ts_get(ts)); attributes = ssml_get_attributes(ts); feat_set_string(attributes,"_type","end"); } else attributes = ssml_get_attributes(ts); utt = ssml_apply_tag(tag,attributes,utt,ssml_word_feats); cst_free(tag); } else if (cst_streq("&",token)) { /* an escape sequence */ /* skip to ; and insert value in rawdata */ } else { if ((cst_strlen(token) == 0) || (num_tokens > 500) || /* need an upper bound */ (relation_head(tokrel) && breakfunc(ts,token,tokrel))) { /* An end of utt, so synthesize it */ if (utt_user_callback) utt = (utt_user_callback)(utt); if (utt) { utt = flite_do_synth(utt,voice,utt_synth_tokens); durs += flite_process_output(utt,outtype,TRUE); delete_utterance(utt); utt = NULL; } else break; if (ts_eof(ts)) break; utt = new_utterance(); tokrel = utt_relation_create(utt, "Token"); num_tokens = 0; } num_tokens++; t = relation_append(tokrel, NULL); item_set_string(t,"name",token); item_set_string(t,"whitespace",ts->whitespace); item_set_string(t,"prepunctuation",ts->prepunctuation); item_set_string(t,"punc",ts->postpunctuation); /* Mark it at the beginning of the token */ item_set_int(t,"file_pos", ts->file_pos-(1+ /* as we are already on the next char */ cst_strlen(token)+ cst_strlen(ts->prepunctuation)+ cst_strlen(ts->postpunctuation))); item_set_int(t,"line_number",ts->line_number); } } delete_utterance(utt); return durs; }
float flite_file_to_speech(const char *filename, cst_voice *voice, const char *outtype) { cst_utterance *utt; cst_tokenstream *ts; const char *token; cst_item *t; cst_relation *tokrel; float durs = 0; int num_tokens; cst_wave *w; cst_breakfunc breakfunc = default_utt_break; cst_uttfunc utt_user_callback = 0; int fp; if ((ts = ts_open(filename, get_param_string(voice->features,"text_whitespace",NULL), get_param_string(voice->features,"text_singlecharsymbols",NULL), get_param_string(voice->features,"text_prepunctuation",NULL), get_param_string(voice->features,"text_postpunctuation",NULL))) == NULL) { cst_errmsg("failed to open file \"%s\" for reading\n", filename); return 1; } fp = get_param_int(voice->features,"file_start_position",0); if (fp > 0) ts_set_stream_pos(ts,fp); if (feat_present(voice->features,"utt_break")) breakfunc = val_breakfunc(feat_val(voice->features,"utt_break")); if (feat_present(voice->features,"utt_user_callback")) utt_user_callback = val_uttfunc(feat_val(voice->features,"utt_user_callback")); /* If its a file to write to, create and save an empty wave file */ /* as we are going to incrementally append to it */ if (!cst_streq(outtype,"play") && !cst_streq(outtype,"none") && !cst_streq(outtype,"stream")) { w = new_wave(); cst_wave_resize(w,0,1); cst_wave_set_sample_rate(w,16000); cst_wave_save_riff(w,outtype); /* an empty wave */ delete_wave(w); } num_tokens = 0; utt = new_utterance(); tokrel = utt_relation_create(utt, "Token"); while (!ts_eof(ts) || num_tokens > 0) { token = ts_get(ts); if ((cst_strlen(token) == 0) || (num_tokens > 500) || /* need an upper bound */ (relation_head(tokrel) && breakfunc(ts,token,tokrel))) { /* An end of utt, so synthesize it */ if (utt_user_callback) utt = (utt_user_callback)(utt); if (utt) { utt = flite_do_synth(utt,voice,utt_synth_tokens); durs += flite_process_output(utt,outtype,TRUE); delete_utterance(utt); utt = NULL; } else break; if (ts_eof(ts)) break; utt = new_utterance(); tokrel = utt_relation_create(utt, "Token"); num_tokens = 0; } num_tokens++; t = relation_append(tokrel, NULL); item_set_string(t,"name",token); item_set_string(t,"whitespace",ts->whitespace); item_set_string(t,"prepunctuation",ts->prepunctuation); item_set_string(t,"punc",ts->postpunctuation); /* Mark it at the beginning of the token */ item_set_int(t,"file_pos", ts->file_pos-(1+ /* as we are already on the next char */ cst_strlen(token)+ cst_strlen(ts->prepunctuation)+ cst_strlen(ts->postpunctuation))); item_set_int(t,"line_number",ts->line_number); } delete_utterance(utt); ts_close(ts); return durs; }
const unsigned char *ts_get_quoted_token(cst_tokenstream *ts, char quote, char escape) { /* for reading the next quoted token that starts with quote and ends with quote, quote may appear only if preceded by escape */ int l; /* Hmm can't change quotes within a ts */ ts->charclass[(unsigned int)quote] |= TS_CHARCLASS_QUOTE; ts->charclass[(unsigned int)escape] |= TS_CHARCLASS_QUOTE; /* skipping whitespace */ get_token_sub_part(ts,TS_CHARCLASS_WHITESPACE, &ts->whitespace, &ts->ws_max); ts->token_pos = ts->file_pos - 1; if (ts->current_char == quote) { /* go until quote */ ts_getc(ts); l=0; while (!ts_eof(ts)) { get_token_sub_part_2(ts,TS_CHARCLASS_QUOTE, &ts->token,&ts->token_max); if (ts->current_char == escape) { ts_getc(ts); l = cst_strlen(ts->token); if (l+1 >= ts->token_max) extend_buffer(&ts->token,&ts->token_max); ts->token[l] = ts->current_char; ts->token[l+1] = '\0'; ts_getc(ts); } else break; } ts_getc(ts); } else /* its not quotes, like to be careful dont you */ { /* treat is as standard token */ /* Get prepunctuation */ get_token_sub_part(ts,TS_CHARCLASS_PREPUNCT, &ts->prepunctuation, &ts->prep_max); /* Get the symbol itself */ if (!ts_charclass(ts->current_char,TS_CHARCLASS_SINGLECHAR,ts)) { if (2 >= ts->token_max) extend_buffer(&ts->token,&ts->token_max); ts->token[0] = ts->current_char; ts->token[1] = '\0'; ts_getc(ts); } else get_token_sub_part_2(ts, TS_CHARCLASS_WHITESPACE, /* end class1 */ &ts->token, &ts->token_max); /* This'll have token *plus* post punctuation in ts->token */ /* Get postpunctuation */ get_token_postpunctuation(ts); } return ts->token; }
int cst_track_load_est(cst_track *t, const char *filename) { cst_tokenstream *ts; const char *tok; int num_frames, num_channels; int i, ascii = 1, swap = 0, rv; num_frames = 0; num_channels = 0; ts = ts_open(filename, NULL, NULL, NULL, NULL); if (ts == NULL) { cst_errmsg("cst_track_load: can't open file \"%s\"\n", filename); return -1; } if (!cst_streq(ts_get(ts), "EST_File")) { cst_errmsg("cst_track_load: not an EST file \"%s\"\n", filename); ts_close(ts); return -1; } if (!cst_streq(ts_get(ts), "Track")) { cst_errmsg("cst_track_load: not an track file \"%s\"\n", filename); ts_close(ts); return -1; } while (!cst_streq("EST_Header_End", (tok = ts_get(ts)))) { if (cst_streq("DataType", tok)) { tok = ts_get(ts); if (cst_streq("ascii", tok)) { ascii = 1; } else if (cst_streq("binary", tok)) { ascii = 0; } else { cst_errmsg("cst_track_load: don't know how to deal " "with type \"%s\"\n", tok); ts_close(ts); return -1; } } else if (cst_streq("ByteOrder", tok)) { tok = ts_get(ts); swap = (cst_streq(tok, BYTE_ORDER_BIG) && CST_LITTLE_ENDIAN) || (cst_streq(tok, BYTE_ORDER_LITTLE) && CST_BIG_ENDIAN); } else if (cst_streq("NumFrames", tok)) num_frames = atoi(ts_get(ts)); else if (cst_streq("NumChannels", tok)) num_channels = atoi(ts_get(ts)); else ts_get(ts); if (ts_eof(ts)) { cst_errmsg("cst_track_load: EOF in header \"%s\"\n", filename); ts_close(ts); return -1; } } cst_track_resize(t, num_frames, num_channels); for (i = 0; i < t->num_frames; i++) { if (ascii) rv = load_frame_ascii(t, i, ts); else rv = load_frame_binary(t, i, ts, swap); if (rv < 0) { ts_close(ts); cst_errmsg("cst_track_load: EOF in data \"%s\"\n", filename); return rv; } } ts_get(ts); if (!ts_eof(ts)) { cst_errmsg("cst_track_load: not EOF when expected \"%s\"\n", filename); ts_close(ts); return -1; } ts_close(ts); return 0; }