static void add_raw_data(cst_utterance *u, const char *raw_data, cst_features *attributes) { /* Add all tokens in raw _data to u */ cst_tokenstream *ts; cst_relation *r; cst_item *t; const char *token; r = utt_relation_create(u,"Token"); ts = ts_open_string(raw_data, get_param_string(u->features,"text_whitespace",NULL), get_param_string(u->features,"text_singlecharsymbols",NULL), get_param_string(u->features,"text_prepunctuation",NULL), get_param_string(u->features,"text_pospunctuation",NULL)); while (!(ts_eof(ts))) { t = relation_append(r,NULL); feat_copy_into(item_feats(t),attributes); token = ts_get(ts); if (cst_strlen(token) > 0) { t = relation_append(r,NULL); item_set_string(t,"name",token); item_set_string(t,"whitespace",ts->whitespace); item_set_string(t,"prepunctuation",ts->prepunctuation); item_set_string(t,"punc",ts->postpunctuation); } } }
cst_utterance *default_textanalysis(cst_utterance *u) { cst_item *t,*word; cst_relation *word_rel; cst_val *words; const cst_val *w; const cst_val *ttwv; word_rel = utt_relation_create(u,"Word"); ttwv = feat_val(u->features, "tokentowords_func"); for (t=relation_head(utt_relation(u,"Token")); t; t=item_next(t)) { if (ttwv) words = (cst_val *)(*val_itemfunc(ttwv))(t); else words = default_tokentowords(t); for (w=words; w; w=val_cdr(w)) { word = item_add_daughter(t,NULL); if (cst_val_consp(val_car(w))) { /* Has extra features */ item_set_string(word,"name",val_string(val_car(val_car(w)))); feat_copy_into(val_features(val_cdr(val_car(w))), item_feats(word)); } else item_set_string(word,"name",val_string(val_car(w))); relation_append(word_rel,word); } delete_val(words); } return u; }
void item_set_string(const cst_item *i,const char *name,const char *val) { feat_set_string(item_feats(i),name,val); }
void item_set_float(const cst_item *i,const char *name,float val) { feat_set_float(item_feats(i),name,val); }
void item_set(const cst_item *i,const char *name,const cst_val *val) { feat_set(item_feats(i),name,val); }
const char *item_feat_string(const cst_item *i,const char *name) { return feat_string(item_feats(i),name); }
float item_feat_float(const cst_item *i,const char *name) { return feat_float(item_feats(i),name); }
const cst_val *item_feat(const cst_item *i,const char *name) { return feat_val(item_feats(i),name); }
int item_feat_remove(const cst_item *i,const char *name) { return feat_remove(item_feats(i),name); }
int item_feat_present(const cst_item *i,const char *name) { return feat_present(item_feats(i),name); }