Пример #1
0
long long
read_number(int ch)
{
	char		 number[32];
	size_t		 nlen;
	const char 	*errstr;
	long long	 n;

	nlen = 0;
	number[nlen++] = ch;
	while ((ch = lex_getc()) != EOF) {
		if (!isdigit((u_char) ch))
			break;
		number[nlen++] = ch;
		if (nlen == (sizeof number) - 1)
			yyerror("number too long");
	}
	number[nlen] = '\0';
	lex_ungetc(ch);

	n = strtonum(number, 0, LLONG_MAX, &errstr);
	if (errstr != NULL)
		yyerror("number is %s", errstr);
	return (n);
}
Пример #2
0
/*-------------------------------------------------------------------------
 * Function:    lex_special
 *
 * Purpose:     Special parsing for the next token.  For instance, the next
 *              token might be an unquoted file name `file.pdb' which would
 *              normally be returned as (SYM DOT SYM).  Instead, this function
 *              would parse it as a single string.
 *
 * Return:      void
 *
 * Programmer:  Robb Matzke
 *              [email protected]
 *              Feb  7 1997
 *
 * Modifications:
 *
 *-------------------------------------------------------------------------
 */
void
lex_special(lex_t *f, int skipnl)
{
    int          c, at=0;

    if (skipnl && f->tok) {
        f->tok = 0;
        f->lexeme[0] = 0;
    }

    assert(0==f->tok);          /*too late for special lexical analysis*/

    /* Skip leading space.  Skip line-feeds too if SKIPNL is non-zero. */    
    f->prompt = skipnl ? LEX_PROMPT2 : LEX_PROMPT;
    while (EOF!=(c=lex_getc(f)) && isspace(c) && (skipnl || '\n'!=c)) /*void*/;
    if (EOF==c) return;

    if (isalpha(c) || isdigit(c) || strchr ("!@$%^&*-_=+,.?/;:~", c)) {
        f->lexeme[0] = c;
        f->lexeme[1] = '\0';
        at = 1;
        while (EOF!=(c=lex_getc(f)) &&
               (isalpha(c) || isdigit(c) || strchr("!@$%^&*-_=+,.?/;:~", c))) {
            if (at+1<sizeof(f->lexeme)) {
                f->lexeme[at++] = c;
                f->lexeme[at] = '\0';
            }
        }
        f->tok = TOK_STR;
    }
    lex_ungetc (f, c);
}
Пример #3
0
char *
read_macro(int type, int ch)
{
	char	name[MAXNAMESIZE];
	size_t	nlen;
	int	brackets;

	brackets = 0;
	if (ch == '{') {
		ch = lex_getc();
		if (!isalnum((u_char) ch))
			yyerror("invalid macro name");
		brackets = 1;
	}

	nlen = 0;
	name[nlen++] = type;
	name[nlen++] = ch;
	while ((ch = lex_getc()) != EOF) {
		if (!isalnum((u_char) ch) && ch != '-' && ch != '_')
			break;
		name[nlen++] = ch;
		if (nlen == (sizeof name) - 1)
			yyerror("macro name too long");
	}
 	name[nlen] = '\0';
	if (!brackets)
		lex_ungetc(ch);

	if (brackets && ch != '}')
		yyerror("missing }");
	if (*name == '\0')
		yyerror("empty macro name");

	return (xstrdup(name));
}
Пример #4
0
/*-------------------------------------------------------------------------
 * Function:    lex_token
 *
 * Purpose:     Figures out what token is next on the input stream.  If
 *              skipnl is non-zero then the new-line token is skipped.
 *
 * Return:      Success:        Token number, optional lexeme returned
 *                              through the LEXEME argument.
 *
 *              Failure:        TOK_INVALID
 *
 * Programmer:  Robb Matzke
 *              [email protected]
 *              Dec  4 1996
 *
 * Modifications:
 *
 *      Robb Matzke, 3 Feb 1997
 *      Cleaned up error messages.
 *
 *      Robb Matzke, 7 Feb 1997
 *      Added the `=' token.
 *
 *      Robb Matzke, 7 Feb 1997
 *      The `*' and `?'characters are now legal as part of a symbol name
 *      so we can give those pattern matching characters to the `ls'
 *      command.
 *
 *      Robb Matzke, 12 Mar 1997
 *      Since we don't have mathematical expressions yet, a numeric
 *      constant is allowed to begin with a `-'.
 *
 *      Robb Matzke, 2000-06-06
 *      Symbol names may include `-'. Something that starts with a `-' is
 *      only a number if it's followed by a digit.
 *
 *      Mark C. Miller, Mon Nov  9 18:08:05 PST 2009
 *      Added logic to support parsing of '#nnnnnn' dataset names,
 *      but only when in '/.silo' dir.
 *-------------------------------------------------------------------------
 */
int
lex_token(lex_t *f, char **lexeme, int skipnl)
{
    int          c, at, quote, inDotSiloDir=0;
    static const char *symcharsA = "_$/*?";
    static const char *symcharsB = "_$/*?#";
    const char *symchars = symcharsA;

    /* Return the current token if appropriate. */    
    if (f->tok && (!skipnl || TOK_EOL!=f->tok)) {
        if (lexeme) *lexeme = f->lexeme;
        return f->tok;
    }

    /* Skip leading space. */    
    f->prompt = skipnl ? LEX_PROMPT2 : LEX_PROMPT;
    while (EOF!=(c=lex_getc(f)) && '\n'!=c && isspace(c)) /*void*/;

    /* handle special case of leading '#' and see if we're in .silo dir */
    if ('#'==c) {
        obj_t   f1, val;
        DBfile *file;
        char cwd[1024];

        f1 = obj_new (C_SYM, "$1");
        val = sym_vboundp (f1);
        f1 = obj_dest (f1);
        if (NULL!=(file=file_file(val)) && 
            DBGetDir(file, cwd)>=0 &&
            !strncmp(cwd,"/.silo",6)) {
            inDotSiloDir = 1;
            symchars = symcharsB;
        }
    }

    /* Store the next token. */    
    if (EOF==c) {
        f->lexeme[0] = '\0';
        f->tok = EOF;

    } else if ('\n'==c) {
        if (skipnl) {
            f->tok = lex_token(f, NULL, true);
        } else {
            f->lexeme[0] = '\n';
            f->lexeme[1] = '\0';
            f->tok = TOK_EOL;
        }

    } else if ('#'==c && !inDotSiloDir) {
        while (EOF!=(c=lex_getc(f)) && '\n'!=c) /*void*/;
        lex_ungetc(f, c);
        return lex_token(f, lexeme, skipnl);

    } else if ('>'==c) {
        c = lex_getc(f);
        if ('>'==c) {
            strcpy(f->lexeme, ">>");
            f->tok = TOK_RTRT;
        } else {
            lex_ungetc(f, c);
            strcpy(f->lexeme, ">");
            f->tok = TOK_RT;
        }

    } else if (strchr("|.()[]{}:,=", c)) {
        f->lexeme[0] = c;
        f->lexeme[1] = '\0';
        f->tok = c;

    } else if (isalpha(c) || strchr(symchars,c)) {
        /* A symbol. */        
        f->lexeme[0] = c;
        f->lexeme[1] = '\0';
        at = 1;
        while (EOF!=(c=lex_getc(f)) &&
               (isalpha(c) || isdigit(c) || strchr(symchars, c))) {
            if (at+1<sizeof(f->lexeme)) {
                f->lexeme[at++] = c;
                f->lexeme[at] = '\0';
            }
        }
        lex_ungetc(f, c);
        f->tok = TOK_SYM;
      
    } else if ('-'==c) {
        /* Could be a number or a symbol */
        f->lexeme[0] = c;
        f->lexeme[1] = '\0';
        if (EOF!=(c=lex_getc(f)) && ('.'==c || isdigit(c))) {
            f->lexeme[1] = c;
            f->lexeme[2] = '\0';
            at = 2;
            while (EOF!=(c=lex_getc(f)) &&
                   (isdigit(c) || strchr("+-.eE", c))) {
                if (at+1<sizeof(f->lexeme)) {
                    f->lexeme[at++] = c;
                    f->lexeme[at] = '\0';
                }
            }
            lex_ungetc(f, c);
            f->tok = TOK_NUM;
        } else {
            at=1;
            while (EOF!=c &&
                   (isalpha(c) || isdigit(c) || strchr("_$/*?-", c))) {
                if (at+1<sizeof(f->lexeme)) {
                    f->lexeme[at++] = c;
                    f->lexeme[at] = '\0';
                }
                c = lex_getc(f);
            }
            lex_ungetc(f, c);
            f->tok = TOK_SYM;
        }
       
    } else if ('-'==c || isdigit(c)) {
        /* A number */        
        f->lexeme[0] = c;
        f->lexeme[1] = '\0';
        at = 1;
        while (EOF!=(c=lex_getc(f)) &&
               (isdigit(c) || strchr("+-.eE", c))) {
            if (at+1<sizeof(f->lexeme)) {
                f->lexeme[at++] = c;
                f->lexeme[at] = '\0';
            }
        }
        lex_ungetc(f, c);
        f->tok = TOK_NUM;

    } else if ('"'==c || '\''==c) {
        /* A string */        
        quote = c;
        at = 0;
        f->lexeme[0] = '\0';
        while (EOF!=(c=lex_getc(f)) && quote!=c && '\n'!=c) {
            if ('\\'==c) {
                switch ((c=lex_getc(f))) {
                case 'b':
                    c = '\b';
                    break;
                case 'n':
                    c = '\n';
                    break;
                case 'r':
                    c = '\r';
                    break;
                case 't':
                    c = '\t';
                    break;
                case EOF:
                    c = '\\';
                    break;
                default:
                    if (c>='0' && c<='7') {
                        int c2 = lex_getc(f);
                        if (c2>='0' && c2<='7') {
                            int c3 = lex_getc(f);
                            if (c3>='0' && c3<='7') {
                                c = ((c-'0')*8+c2-'0')*8+c3-'0';
                            } else {
                                lex_ungetc(f, c3);
                                c = (c-'0')*8+c2-'0';
                            }
                        } else {
                            lex_ungetc(f, c2);
                            c -= '0';
                        }
                    }
                    break;
                }
            }
            if (at+1<sizeof(f->lexeme)) {
                f->lexeme[at++] = c;
                f->lexeme[at] = '\0';
            }
        }
        if ('\n'==c) {
            out_errorn("linefeed inside string constant (truncated at EOL)");
            lex_ungetc(f, c);
        } else if (c<0) {
            out_errorn("EOF inside string constant (truncated at EOF)");
        }
        f->tok = TOK_STR;

    } else {
        /* Invalid character.  Don't print an error message since a
         * syntax error will result in the parser anyway. */
        f->lexeme[0] = c;
        f->lexeme[1] = '\0';
        f->tok = TOK_INVALID;
    }

    if (lexeme) *lexeme = f->lexeme;
    return f->tok;
}
Пример #5
0
/*-------------------------------------------------------------------------
 * Function:    lex_getc
 *
 * Purpose:     Similar to getc(3) except uses the GNU readline library
 *              and issues prompts as necessary.
 *
 * Return:      Success:        Next character
 *
 *              Failure:        EOF
 *
 * Programmer:  Robb Matzke
 *              [email protected]
 *              Dec 10 1996
 *
 * Modifications:
 *      Robb Matzke, 29 Jul 1997
 *      If the line-feed is escaped with a backslash, then the backslash
 *      and line-feed are both ignored.
 *
 *      Jeremy Meredith, Thu Aug 26 09:59:44 PDT 1999
 *      Changed use of strdup() to safe_strdup().
 *
 *      Robb Matzke, 2000-07-10
 *      Modified to work with stacked input streams.
 *-------------------------------------------------------------------------
 */
int
lex_getc(lex_t *f)
{
    int          c=EOF;
#ifdef HAVE_READLINE_HISTORY
    static char  buf[1024];
#endif

    if (f->s) {
        c = f->s[f->at++];
        if (!f->s[f->at]) {
            free(f->s);
            f->s = NULL;
            f->at = 0;
        }

    } else if (f->f && isatty(fileno(f->f))) {
        /* Input is from the standard input stream.  Use readline() to
         * get it and add it to the history if different than the
         * previous line. */
#if defined(HAVE_READLINE_READLINE_H) && defined(HAVE_LIBREADLINE)
        char *temp = readline(f->prompt);
        if (temp) {
            f->s = malloc(strlen(temp)+2);
            strcpy(f->s, temp);
            strcat(f->s, "\n");
        }
#else
        char temp[4096];
        fputs(f->prompt, stdout);
        if (fgets(temp, sizeof(temp), f->f)) {
            f->s = safe_strdup(temp);
        } else {
            f->s = NULL;
        }
#endif
        f->at = 0;
#if defined(HAVE_READLINE_READLINE_H) && defined(HAVE_READLINE_HISTORY)
        if (f->s && f->s[0] && strncmp(buf, f->s, sizeof(buf))) {
            add_history(f->s);
            strncpy(buf, f->s, sizeof(buf));
        }
#endif
        c = (f->s ? lex_getc(f) : EOF);

    } else if (f->f) {
        /* Input is from a non-interactive stream. */        
        c = getc(f->f);
        
    } else if (f->nstack) {
        while (f->nstack && EOF==(c=lex_getc(f->stack[f->nstack-1]))) {
            lex_close(f->stack[f->nstack-1]);
            f->stack[--f->nstack] = NULL;
        }
        return c;
   
    } else {
        return EOF;
    }

    /* If this character is a backslash and the following character
     * is a line-feed, then ignore both of them and return the following
     * character instead.  This allows us to always continue a line by
     * escaping the line-feed. */
    if ('\\'==c) {
        int peek = lex_getc(f);
        if ('\n'!=peek) lex_ungetc(f, peek);
        else c = lex_getc(f);
    }

    return c;
}
Пример #6
0
char *
read_string(char endch, int esc)
{
	int		 ch, oldch;
	size_t		 pos, len, slen;
	char	        *name, *s, *buf;
	struct macro	*macro;

	len = 24;
        buf = xmalloc(len + 1);

	pos = 0;
        while ((ch = lex_getc()) != endch) {
                switch (ch) {
		case EOF:
			yyerror("missing %c", endch);
                case '\\':
			if (!esc)
				break;
                        switch (ch = lex_getc()) {
			case EOF:
				yyerror("missing %c", endch);
                        case 'r':
                                ch = '\r';
                                break;
                        case 'n':
                                ch = '\n';
                                break;
                        case 't':
                                ch = '\t';
                                break;
                        }
                        break;
		case '$':
		case '%':
			if (!esc)
				break;
			oldch = ch;

			ch = lex_getc();
			if (ch == EOF)
				yyerror("missing %c", endch);
			if (ch != '{') {
				lex_ungetc(ch);
				ch = oldch;
				break;
			}

			name = read_macro(oldch, '{');
			if ((macro = find_macro(name)) == NULL) {
				xfree(name);
				continue;
			}
			xfree(name);

			if (macro->type == MACRO_NUMBER)
 				xasprintf(&s, "%lld", macro->value.num);
			else
				s = macro->value.str;
			slen = strlen(s);

			ENSURE_FOR(buf, len, pos, slen + 1);
			memcpy(buf + pos, s, slen);
			pos += slen;

			if (macro->type == MACRO_NUMBER)
				xfree(s);
			continue;
                }

                buf[pos++] = ch;
                ENSURE_SIZE(buf, len, pos);
        }

        buf[pos] = '\0';

	return (buf);
}
Пример #7
0
int
read_token(int ch)
{
	int		 ch2;
	char		 token[128], *name;
	size_t		 tlen;
	struct token	*ptr;
	struct macro	*macro;

	tlen = 0;
	token[tlen++] = ch;
	while ((ch = lex_getc()) != EOF) {
		if (!isalnum((u_char) ch) && ch != '-' && ch != '_')
			break;
		token[tlen++] = ch;
		if (tlen == (sizeof token) - 1)
			yyerror("token too long");
	}
	token[tlen] = '\0';
	lex_ungetc(ch);

	/*
	 * ifdef/ifndef/endif is special-cased here since it is really really
	 * hard to make work with yacc.
	 */
	if (strcmp(token, "ifdef") == 0 || strcmp(token, "ifndef") == 0) {
		while ((ch = lex_getc()) != EOF && isspace((u_char) ch))
			;

		if (ch != '$' && ch != '%')
			yyerror("syntax error");
		ch2 = lex_getc();
		if (ch2 != '{' && !isalnum((u_char) ch2))
			yyerror("invalid macro name");

		name = read_macro(ch, ch2);
		macro = find_macro(name);
		xfree(name);

		if (token[2] == 'n' && macro != NULL)
			lex_skip = 1;
		if (token[2] != 'n' && macro == NULL)
			lex_skip = 1;
		lex_ifdef++;
		return (NONE);
	}
	if (strcmp(token, "endif") == 0) {
		if (lex_ifdef == 0)
			yyerror("spurious endif");
		lex_ifdef--;
		if (lex_ifdef == 0)
			lex_skip = 0;
		return (NONE);
	}

	if (strcmp(token, "include") == 0) {
		/*
		 * This is a bit strange.
		 *
		 * yacc may have symbols buffered and be waiting for more to
		 * decide which production to match, so we can't just switch
		 * file now. So, we set a flag that tells yylex to switch files
		 * next time it's called and return the NONE symbol. This is a
		 * placeholder not used in any real productions, so it should
		 * cause yacc to match using whatever it has (assuming it
		 * can). If we don't do this, there are problems with things
		 * like:
		 *
		 * 	$file = "abc"
		 * 	include "${file}"
		 *
		 * The include token is seen before yacc has matched the
		 * previous line, so the macro doesn't exist when we try to
		 * build the include file path.
		 */
		lex_include = 1;
		return (NONE);
	}

	ptr = bsearch(token, tokens,
	    (sizeof tokens)/(sizeof tokens[0]), sizeof tokens[0], cmp_token);
        if (ptr == NULL)
		yyerror("unknown token: %s", token);
	return (ptr->value);
}
Пример #8
0
int
yylex(void)
{
	int	 	 ch, value;
	char		*path;
	struct replpath  rp;

	/* Switch to new file. See comment in read_token below. */
	if (lex_include) {
		while ((ch = lex_getc()) != EOF && isspace((u_char) ch))
			;

		if (ch != '"' && ch != '\'')
			yyerror("syntax error");
		if (ch == '"')
			rp.str = read_string('"', 1);
		else
			rp.str = read_string('\'', 0);
		path = replacepath(&rp, parse_tags, NULL, NULL, conf.user_home);
		xfree(rp.str);
		include_start(path);
		lex_include = 0;
	}

restart:
	while ((ch = lex_getc()) != EOF) {
		switch (ch) {
		case '#':
			/* Comment: discard until EOL. */
			while ((ch = lex_getc()) != '\n' && ch != EOF)
				;
			parse_file->line++;
			break;
		case '\'':
			yylval.string = read_string('\'', 0);
			value = STRING;
			goto out;
		case '"':
			yylval.string = read_string('"', 1);
			value = STRING;
			goto out;
		case '$':
			ch = lex_getc();
			if (ch == '(') {
				yylval.string = read_command();
				value = STRCOMMAND;
				goto out;
			}
			if (ch == '{' || isalnum((u_char) ch)) {
				yylval.string = read_macro('$', ch);
				value = STRMACRO;
				goto out;
			}
			yyerror("invalid macro name");
		case '%':
			ch = lex_getc();
			if (ch == '(') {
				yylval.string = read_command();
				value = NUMCOMMAND;
				goto out;
			}
			if (ch == '{' || isalnum((u_char) ch)) {
				yylval.string = read_macro('%', ch);
				value = NUMMACRO;
				goto out;
			}
			yyerror("invalid macro name");
		case '=':
			ch = lex_getc();
			if (ch == '=') {
				value = TOKEQ;
				goto out;
			}
			lex_ungetc(ch);
			value = '=';
			goto out;
		case '!':
			ch = lex_getc();
			if (ch == '=') {
				value = TOKNE;
				goto out;
			}
			lex_ungetc(ch);
			value = '!';
			goto out;
		case '~':
		case '+':
		case '(':
		case ')':
		case ',':
		case '<':
		case '>':
		case '{':
		case '}':
		case '*':
			value = ch;
			goto out;
		case '\n':
			parse_file->line++;
			break;
		case ' ':
		case '\t':
			break;
		default:
			if (ch != '_' && ch != '-' && !isalnum((u_char) ch))
				yyerror("unexpected character: %c", ch);

			if (isdigit((u_char) ch)) {
				yylval.number = read_number(ch);
				value = NUMBER;
				goto out;
			}

			value = read_token(ch);
			goto out;
		}
	}

	if (!include_finish())
		goto restart;
	if (lex_ifdef != 0)
		yyerror("missing endif");
	return (EOF);

out:
	if (lex_skip)
		goto restart;
	return (value);
}
Пример #9
0
Token lex_scan(lex *l)
{
  int t;

  for (;;)
  {
    t = lex_getc(l);

    if (t == ' ' || t == '\t')
    {
      ; /* strip whitespace */
    }
    else if (t == '\n')
    {
      l->lineno++;
    }
    else if (isdigit(t))
    {
      int v;

      v = 0;
      do
      {
        v = 10 * v + (t - '0');
        t = lex_getc(l);
      }
      while (isdigit(t));

      if (t != EOF)
        lex_ungetc(l, t);

      l->val = v;

      return NUMBER;
    }
    else if (isalpha(t))
    {
      char *end;
      char *p;
      int   sym;

      end = l->lexbuf + l->lexbufsz;
      p   = l->lexbuf;
      do
      {
        *p++ = t;
        t = lex_getc(l);
      }
      while (p < end && isalnum(t));
      *p++ = '\0';

      if (t != EOF)
        lex_ungetc(l, t);

      sym = sym_lookup(l->sym, l->lexbuf);
      if (!sym)
        sym = sym_insert(l->sym, l->lexbuf, IDENT);

      l->val = sym;

      return IDENT;
    }
    else if (t == EOF)
    {
      return END;
    }
    else
    {
      l->val = NONE;
      return t;
    }
  }
}