Exemplo n.º 1
0
static void add_raw_data(cst_utterance *u, const char *raw_data,
			 cst_features *attributes)
{
    /* Add all tokens in raw _data to u */
    cst_tokenstream *ts;
    cst_relation *r;
    cst_item *t;
    const char *token;

    r = utt_relation_create(u,"Token");
    ts = 
     ts_open_string(raw_data,
                    get_param_string(u->features,"text_whitespace",NULL),
                    get_param_string(u->features,"text_singlecharsymbols",NULL),
                    get_param_string(u->features,"text_prepunctuation",NULL),
                    get_param_string(u->features,"text_pospunctuation",NULL));
    while (!(ts_eof(ts)))
    {
	t = relation_append(r,NULL);
	feat_copy_into(item_feats(t),attributes);
	token = ts_get(ts);
	if (cst_strlen(token) > 0)
	{
	    t = relation_append(r,NULL);
	    item_set_string(t,"name",token);
	    item_set_string(t,"whitespace",ts->whitespace);
	    item_set_string(t,"prepunctuation",ts->prepunctuation);
	    item_set_string(t,"punc",ts->postpunctuation);
	}
    }

}
Exemplo n.º 2
0
cst_utterance *default_tokenization(cst_utterance *u)
{
    const char *text,*token;
    cst_tokenstream *fd;
    cst_item *t;
    cst_relation *r;

    text = utt_input_text(u);
    r = utt_relation_create(u,"Token");
    fd = ts_open_string(text,
	get_param_string(u->features,"text_whitespace",NULL),
	get_param_string(u->features,"text_singlecharsymbols",NULL),
	get_param_string(u->features,"text_prepunctuation",NULL),
        get_param_string(u->features,"text_postpunctuation",NULL));
    
    while(!ts_eof(fd))
    {
	token = ts_get(fd);
	if (cst_strlen(token) > 0)
	{
	    t = relation_append(r,NULL);
	    item_set_string(t,"name",token);
	    item_set_string(t,"whitespace",fd->whitespace);
	    item_set_string(t,"prepunctuation",fd->prepunctuation);
	    item_set_string(t,"punc",fd->postpunctuation);
	    item_set_int(t,"file_pos",fd->file_pos);
	    item_set_int(t,"line_number",fd->line_number);
	}
    }

    ts_close(fd);
    
    return u;
}
Exemplo n.º 3
0
static void parse_description(const char *description, cst_features *f)
{
    /* parse the description into something more usable */
    cst_tokenstream *ts;
    const char *arg;
    char *op;
    const char *xop;

    ts = ts_open_string(description,
			" \t\r\n", /* whitespace */
			"{}[]|",   /* singlecharsymbols */
			"",        /* prepunctuation */
			"");       /* postpunctuation */
    while (!ts_eof(ts))
    {
	op = cst_strdup(ts_get(ts));
	if ((op[0] == '-') && (cst_strchr(ts->whitespace,'\n') != 0))
	{   /* got an option */
            xop = feat_own_string(f,op);
	    arg = ts_get(ts);
	    if (arg[0] == '<')
		feat_set_string(f,xop,arg);
	    else
		feat_set_string(f,xop,"<binary>");
        }
        cst_free(op);
    }

    ts_close(ts);

}
Exemplo n.º 4
0
bard_window *bard_make_help_window(bard_reader *br)
{
    bard_window *hw;
    int indent = 10;
    char *bhs;

    hw = bard_window_new("help",
                         br->display->screen_width-(2*indent),
                         br->display->screen_height-(2*indent),
                         br->display->screen->format);
    hw->x_offset = indent;
    hw->y_offset = indent;
    hw->font_name = 
        cst_strdup(get_param_string(br->config,"-font",BARD_DEFAULT_FONT));
    hw->font_size =
        get_param_int(br->config,"-help_font_size",
                      get_param_int(br->config,"-font_size",
                                    BARD_DEFAULT_FONT_SIZE));
    /* We assume we can open this font, as if we couldn't when initializing */
    /* the display window, we'd have failed by now */
    hw->font = bard_window_open_font(hw->font_name,hw->font_size);

    hw->background_color = 
        bard_color_get(br->colors,
         get_param_string(br->config,"-help_background_color","cornsilk"));
    hw->foreground_color = 
        bard_color_get(br->colors,
         get_param_string(br->config,"-help_foreground_color","steelblue"));
    hw->highlight_color = 
        bard_color_get(br->colors,
         get_param_string(br->config,"-help_highlight_color","blue"));

    hw->tm=10;
    hw->bm=10;
    hw->tlm=10;
    hw->trm=10;

    bhs = cst_alloc(char,cst_strlen(bard_help_string)+
                    cst_strlen(BARD_PROJECT_VERSION)+
                    cst_strlen(BARD_PROJECT_STATE)+
                    cst_strlen(BARD_PROJECT_DATE)+1);
    cst_sprintf(bhs,bard_help_string,BARD_PROJECT_VERSION,
                BARD_PROJECT_STATE,
                BARD_PROJECT_DATE);

    hw->ts = ts_open_string(bhs,
                     cst_ts_default_whitespacesymbols,
                     "",
                     cst_ts_default_prepunctuationsymbols,
                     cst_ts_default_postpunctuationsymbols);
    hw->ts_mode = "literal";
    cst_free(bhs);

    hw->update = bard_window_help_update;

    /* Put something on the screen */
    bard_window_display_from_pos(hw,0);

    return hw;
}
Exemplo n.º 5
0
static void parse_description(const char *description, cst_features *f)
{
    /* parse the description into something more usable */
    cst_tokenstream *ts;
    const char *arg;
    char *op;

    ts = ts_open_string(description);
    ts->whitespacesymbols = " \t\r\n";
    ts->singlecharsymbols = "{}[]|";
    ts->prepunctuationsymbols = "";
    ts->postpunctuationsymbols = "";

    while (!ts_eof(ts))
    {
	op = cst_strdup(ts_get(ts));
	if ((op[0] == '-') && (strchr(ts->whitespace,'\n') != 0))
	{    /* got an option */
	    arg = ts_get(ts);
	    if (arg[0] == '<')
		feat_set_string(f,op,arg);
	    else
		feat_set_string(f,op,"<binary>");
	}
    }

    ts_close(ts);

}
Exemplo n.º 6
0
cst_val *cst_lex_make_entry(const cst_lexicon *lex, const cst_string *entry)
{   /* if replace then replace entry in addenda of lex with entry */
    /* else append entry to addenda of lex                        */
    cst_tokenstream *e;
    cst_val *phones = NULL;
    cst_val *ventry;
    const cst_string *w, *p;
    cst_string *word;
    cst_string *pos;
    int i;

    e = ts_open_string(entry,
                       cst_ts_default_whitespacesymbols,
                       "","","");

    w = ts_get(e);
    if (w[0] == '"') /* it was a quoted entry */
    {   /* so reparse it */
        ts_close(e);
        e = ts_open_string(entry,
                           cst_ts_default_whitespacesymbols,
                           "","","");
        w = ts_get_quoted_token(e,'"','\\');
    }

    word = cst_strdup(w);
    p = ts_get(e);
    if (!cst_streq(":",p)) /* there is a real pos */
    {
        pos = cst_strdup(p);
        p = ts_get(e);
        if (!cst_streq(":",p)) /* there is a real pos */
        {
            cst_fprintf(stdout,"add_addenda: lex %s: expected \":\" in %s\n",
                        lex->name,
                        word);
            cst_free(word);
            cst_free(pos);
            ts_close(e);
            return NULL;
        }
    }
    else
        pos = cst_strdup("nil");

    while (!ts_eof(e))
    {
        p = ts_get(e);
        /* Check its a legal phone */
        for (i=0; lex->phone_table[i]; i++)
        {
            if (cst_streq(p,lex->phone_table[i]))
                break;
        }
        if (cst_streq("#",p)) /* comment to end of line */
            break;
        else if (cst_streq("",p)) /* trailing ws at eoln causes this */
            break;
        else if (lex->phone_table[i])
            /* Only add it if its a valid phone */
            phones = cons_val(string_val(p),phones);
        else
        {
            cst_fprintf(stdout,"add_addenda: lex: %s word %s phone %s not in lexicon phoneset\n",
                        lex->name,
                        word,
                        p);
        }
    }

    ventry = cons_val(string_val(word),cons_val(string_val(pos),
                      val_reverse(phones)));
    cst_free(word);
    cst_free(pos);
    ts_close(e);
#if 0
    printf("entry: ");
    val_print(stdout,ventry);
    printf("\n");
#endif

    return ventry;
}
Exemplo n.º 7
0
cst_file cst_url_open(const char *url)
{
    /* Always opens it for reading */
    cst_tokenstream *urlts;
    const cst_string *protocol;
    int port;
    cst_string *host;
    int fd;
    char *url_request;
    char *path;
    cst_file ofd;
    int state,n;
    char c;

    urlts = ts_open_string(url, "", ":/", "", "");

    protocol = ts_get(urlts);
    if (cst_streq(protocol,"http"))
    {
#ifdef CST_NO_SOCKETS
        ts_close(urlts);
        return NULL;
#else
        if (!cst_streq(ts_get(urlts),":") ||
            !cst_streq(ts_get(urlts),"/") ||
            !cst_streq(ts_get(urlts),"/"))
        {
            ts_close(urlts);
            return NULL;
        }
        host = cst_strdup(ts_get(urlts));
        if (cst_streq(ts_get(urlts),":"))
            port = (int)cst_atof(ts_get(urlts));
        else
            port = 80;

        /* Open port to web server */
        fd = cst_socket_open(host,port);
        if (fd < 0)
        {
            cst_free(host);
            ts_close(urlts);
            return NULL;
        }

        url_request = cst_alloc(char,cst_strlen(url)+17);
        cst_sprintf(url_request,"GET %s HTTP/1.2\n\n",url);
        n = write(fd,url_request,cst_strlen(url_request));
        cst_free(url_request);

        /* Skip http header -- until \n\n */
        state=0;
        while (state != 4)
        {
            n=read(fd,&c,1);
            if (n == 0)
            {   /* eof or link gone down */
                cst_free(host);
                ts_close(urlts);
                return NULL;
            }
            if ((state == 0) && (c == '\r'))
                state=1;
            else if ((state == 1) && (c == '\n'))
                state=2;
            else if ((state == 2) && (c == '\r'))
                state=3;
            else if ((state == 3) && (c == '\n'))
                state=4;
            /* Not sure you can get no CRs in the stream */
            else if ((state == 0) && (c == '\n'))
                state=2;
            else if ((state == 2) && (c == '\n'))
                state=4;
            else
                state = 0;
        }

        ofd = fdopen(fd,"rb");

        ts_close(urlts);
        cst_free(host);

        return ofd;
#endif
    }