Exemplo n.º 1
0
Arquivo: lex.c Projeto: AMDmi3/zsh
static enum lextok
gettokstr(int c, int sub)
{
    int bct = 0, pct = 0, brct = 0, seen_brct = 0, fdpar = 0;
    int intpos = 1, in_brace_param = 0;
    int inquote, unmatched = 0;
    enum lextok peek;
#ifdef DEBUG
    int ocmdsp = cmdsp;
#endif

    peek = STRING;
    if (!sub) {
	lexbuf.len = 0;
	lexbuf.ptr = tokstr = (char *) hcalloc(lexbuf.siz = LEX_HEAP_SIZE);
    }
    for (;;) {
	int act;
	int e;
	int inbl = inblank(c);
	
	if (fdpar && !inbl && c != ')')
	    fdpar = 0;

	if (inbl && !in_brace_param && !pct)
	    act = LX2_BREAK;
	else {
	    act = lexact2[STOUC(c)];
	    c = lextok2[STOUC(c)];
	}
	switch (act) {
	case LX2_BREAK:
	    if (!in_brace_param && !sub)
		goto brk;
	    break;
	case LX2_META:
	    c = hgetc();
#ifdef DEBUG
	    if (lexstop) {
		fputs("BUG: input terminated by Meta\n", stderr);
		fflush(stderr);
		goto brk;
	    }
#endif
	    add(Meta);
	    break;
	case LX2_OUTPAR:
	    if (fdpar) {
		/* this is a single word `(   )', treat as INOUTPAR */
		add(c);
		*lexbuf.ptr = '\0';
		return INOUTPAR;
	    }
	    if ((sub || in_brace_param) && isset(SHGLOB))
		break;
	    if (!in_brace_param && !pct--) {
		if (sub) {
		    pct = 0;
		    break;
		} else
		    goto brk;
	    }
	    c = Outpar;
	    break;
	case LX2_BAR:
	    if (!pct && !in_brace_param) {
		if (sub)
		    break;
		else
		    goto brk;
	    }
	    if (unset(SHGLOB) || (!sub && !in_brace_param))
		c = Bar;
	    break;
	case LX2_STRING:
	    e = hgetc();
	    if (e == '[') {
		cmdpush(CS_MATHSUBST);
		add(String);
		add(Inbrack);
		c = dquote_parse(']', sub);
		cmdpop();
		if (c) {
		    peek = LEXERR;
		    goto brk;
		}
		c = Outbrack;
	    } else if (e == '(') {
		add(String);
		switch (cmd_or_math_sub()) {
		case CMD_OR_MATH_CMD:
		    c = Outpar;
		    break;

		case CMD_OR_MATH_MATH:
		    c = Outparmath;
		    break;

		default:
		    peek = LEXERR;
		    goto brk;
		}
	    } else {
		if (e == '{') {
		    add(c);
		    c = Inbrace;
		    ++bct;
		    cmdpush(CS_BRACEPAR);
		    if (!in_brace_param) {
			if ((in_brace_param = bct))
			    seen_brct = 0;
		    }
		} else {
		    hungetc(e);
		    lexstop = 0;
		}
	    }
	    break;
	case LX2_INBRACK:
	    if (!in_brace_param) {
		brct++;
		seen_brct = 1;
	    }
	    c = Inbrack;
	    break;
	case LX2_OUTBRACK:
	    if (!in_brace_param)
		brct--;
	    if (brct < 0)
		brct = 0;
	    c = Outbrack;
	    break;
	case LX2_INPAR:
	    if (isset(SHGLOB)) {
		if (sub || in_brace_param)
		    break;
		if (incasepat && !lexbuf.len)
		    return INPAR;
		if (!isset(KSHGLOB) && lexbuf.len)
		    goto brk;
	    }
	    if (!in_brace_param) {
		if (!sub) {
		    e = hgetc();
		    hungetc(e);
		    lexstop = 0;
		    /* For command words, parentheses are only
		     * special at the start.  But now we're tokenising
		     * the remaining string.  So I don't see what
		     * the old incmdpos test here is for.
		     *   pws 1999/6/8
		     *
		     * Oh, no.
		     *  func1(   )
		     * is a valid function definition in [k]sh.  The best
		     * thing we can do, without really nasty lookahead tricks,
		     * is break if we find a blank after a parenthesis.  At
		     * least this can't happen inside braces or brackets.  We
		     * only allow this with SHGLOB (set for both sh and ksh).
		     *
		     * Things like `print @( |foo)' should still
		     * work, because [k]sh don't allow multiple words
		     * in a function definition, so we only do this
		     * in command position.
		     *   pws 1999/6/14
		     */
		    if (e == ')' || (isset(SHGLOB) && inblank(e) && !bct &&
				     !brct && !intpos && incmdpos)) {
			/*
			 * Either a () token, or a command word with
			 * something suspiciously like a ksh function
			 * definition.
			 * The current word isn't spellcheckable.
			 */
			nocorrect |= 2;
			goto brk;
		    }
		}
		/*
		 * This also handles the [k]sh `foo( )' function definition.
		 * Maintain a variable fdpar, set as long as a single set of
		 * parentheses contains only space.  Then if we get to the
		 * closing parenthesis and it is still set, we can assume we
		 * have a function definition.  Only do this at the start of
		 * the word, since the (...) must be a separate token.
		 */
		if (!pct++ && isset(SHGLOB) && intpos && !bct && !brct)
		    fdpar = 1;
	    }
	    c = Inpar;
	    break;
	case LX2_INBRACE:
	    if (isset(IGNOREBRACES) || sub)
		c = '{';
	    else {
		if (!lexbuf.len && incmdpos) {
		    add('{');
		    *lexbuf.ptr = '\0';
		    return STRING;
		}
		if (in_brace_param) {
		    cmdpush(CS_BRACE);
		}
		bct++;
	    }
	    break;
	case LX2_OUTBRACE:
	    if ((isset(IGNOREBRACES) || sub) && !in_brace_param)
		break;
	    if (!bct)
		break;
	    if (in_brace_param) {
		cmdpop();
	    }
	    if (bct-- == in_brace_param)
		in_brace_param = 0;
	    c = Outbrace;
	    break;
	case LX2_COMMA:
	    if (unset(IGNOREBRACES) && !sub && bct > in_brace_param)
		c = Comma;
	    break;
	case LX2_OUTANG:
	    if (in_brace_param || sub)
		break;
	    e = hgetc();
	    if (e != '(') {
		hungetc(e);
		lexstop = 0;
		goto brk;
	    }
	    add(OutangProc);
	    if (skipcomm()) {
		peek = LEXERR;
		goto brk;
	    }
	    c = Outpar;
	    break;
	case LX2_INANG:
	    if (isset(SHGLOB) && sub)
		break;
	    e = hgetc();
	    if (!(in_brace_param || sub) && e == '(') {
		add(Inang);
		if (skipcomm()) {
		    peek = LEXERR;
		    goto brk;
		}
		c = Outpar;
		break;
	    }
	    hungetc(e);
	    if(isnumglob()) {
		add(Inang);
		while ((c = hgetc()) != '>')
		    add(c);
		c = Outang;
		break;
	    }
	    lexstop = 0;
	    if (in_brace_param || sub)
		break;
	    goto brk;
	case LX2_EQUALS:
	    if (!sub) {
		if (intpos) {
		    e = hgetc();
		    if (e != '(') {
			hungetc(e);
			lexstop = 0;
			c = Equals;
		    } else {
			add(Equals);
			if (skipcomm()) {
			    peek = LEXERR;
			    goto brk;
			}
			c = Outpar;
		    }
		} else if (peek != ENVSTRING &&
			   (incmdpos || intypeset) && !bct && !brct) {
		    char *t = tokstr;
		    if (idigit(*t))
			while (++t < lexbuf.ptr && idigit(*t));
		    else {
			int sav = *lexbuf.ptr;
			*lexbuf.ptr = '\0';
			t = itype_end(t, IIDENT, 0);
			if (t < lexbuf.ptr) {
			    skipparens(Inbrack, Outbrack, &t);
			} else {
			    *lexbuf.ptr = sav;
			}
		    }
		    if (*t == '+')
			t++;
		    if (t == lexbuf.ptr) {
			e = hgetc();
			if (e == '(') {
			    *lexbuf.ptr = '\0';
			    return ENVARRAY;
			}
			hungetc(e);
			lexstop = 0;
			peek = ENVSTRING;
			intpos = 2;
		    } else
			c = Equals;
		} else
		    c = Equals;
	    }
	    break;
	case LX2_BKSLASH:
	    c = hgetc();
	    if (c == '\n') {
		c = hgetc();
		if (!lexstop)
		    continue;
	    } else {
		add(Bnull);
		if (c == STOUC(Meta)) {
		    c = hgetc();
#ifdef DEBUG
		    if (lexstop) {
			fputs("BUG: input terminated by Meta\n", stderr);
			fflush(stderr);
			goto brk;
		    }
#endif
		    add(Meta);
		}
	    }
	    if (lexstop)
		goto brk;
	    break;
	case LX2_QUOTE: {
	    int strquote = (lexbuf.len && lexbuf.ptr[-1] == String);

	    add(Snull);
	    cmdpush(CS_QUOTE);
	    for (;;) {
		STOPHIST
		while ((c = hgetc()) != '\'' && !lexstop) {
		    if (strquote && c == '\\') {
			c = hgetc();
			if (lexstop)
			    break;
			/*
			 * Mostly we don't need to do anything special
			 * with escape backslashes or closing quotes
			 * inside $'...'; however in completion we
			 * need to be able to strip multiple backslashes
			 * neatly.
			 */
			if (c == '\\' || c == '\'')
			    add(Bnull);
			else
			    add('\\');
		    } else if (!sub && isset(CSHJUNKIEQUOTES) && c == '\n') {
			if (lexbuf.ptr[-1] == '\\')
			    lexbuf.ptr--, lexbuf.len--;
			else
			    break;
		    }
		    add(c);
		}
		ALLOWHIST
		if (c != '\'') {
		    unmatched = '\'';
		    peek = LEXERR;
		    cmdpop();
		    goto brk;
		}
		e = hgetc();
		if (e != '\'' || unset(RCQUOTES) || strquote)
		    break;
		add(c);
	    }
	    cmdpop();
	    hungetc(e);
	    lexstop = 0;
	    c = Snull;
	    break;
	}
	case LX2_DQUOTE:
	    add(Dnull);
	    cmdpush(CS_DQUOTE);
	    c = dquote_parse('"', sub);
	    cmdpop();
	    if (c) {
		unmatched = '"';
		peek = LEXERR;
		goto brk;
	    }
	    c = Dnull;
	    break;
	case LX2_BQUOTE:
	    add(Tick);
	    cmdpush(CS_BQUOTE);
	    SETPARBEGIN
	    inquote = 0;
	    while ((c = hgetc()) != '`' && !lexstop) {
		if (c == '\\') {
		    c = hgetc();
		    if (c != '\n') {
			add(c == '`' || c == '\\' || c == '$' ? Bnull : '\\');
			add(c);
		    }
		    else if (!sub && isset(CSHJUNKIEQUOTES))
			add(c);
		} else {
		    if (!sub && isset(CSHJUNKIEQUOTES) && c == '\n') {
			break;
		    }
		    add(c);
		    if (c == '\'') {
			if ((inquote = !inquote))
			    STOPHIST
			else
			    ALLOWHIST
		    }
		}
	    }
	    if (inquote)
		ALLOWHIST
	    cmdpop();
	    if (c != '`') {
		unmatched = '`';
		peek = LEXERR;
		goto brk;
	    }
	    c = Tick;
	    SETPAREND
	    break;
	case LX2_DASH:
	    /*
	     * - shouldn't be treated as a special character unless
	     * we're in a pattern.  Howeve,simply  counting "[" doesn't
	     * work as []a-z] is a valid expression and we don't know
	     * down here what this "[" is for as $foo[stuff] is valid
	     * in zsh.  So just detect an opening [, which is enough
	     * to turn this into a pattern; the Dash will be harmlessly
	     * untokenised if not wanted.
	     */
	    if (seen_brct)
		c = Dash;
           else
               c = '-';
           break;
       case LX2_BANG:
           /*
            * Same logic as Dash, for ! to perform negation in range.
            */
           if (seen_brct)
               c = Bang;
           else
               c = '!';
       }
       add(c);
       c = hgetc();
	if (intpos)
	    intpos--;
	if (lexstop)
	    break;
    }
  brk:
    if (errflag) {
	if (in_brace_param) {
	    while(bct-- >= in_brace_param)
		cmdpop();
	}
	return LEXERR;
    }
    hungetc(c);
    if (unmatched)
	zerr("unmatched %c", unmatched);
    if (in_brace_param) {
	while(bct-- >= in_brace_param)
	    cmdpop();
	zerr("closing brace expected");
    } else if (unset(IGNOREBRACES) && !sub && lexbuf.len > 1 &&
	       peek == STRING && lexbuf.ptr[-1] == '}' &&
	       lexbuf.ptr[-2] != Bnull) {
	/* hack to get {foo} command syntax work */
	lexbuf.ptr--;
	lexbuf.len--;
	lexstop = 0;
	hungetc('}');
    }
    *lexbuf.ptr = '\0';
    DPUTS(cmdsp != ocmdsp, "BUG: gettok: cmdstack changed.");
    return peek;
}
Exemplo n.º 2
0
static int
gettokstr(int c, int sub)
{
    int bct = 0, pct = 0, brct = 0, fdpar = 0;
    int intpos = 1, in_brace_param = 0;
    int peek, inquote, unmatched = 0;
    char endchar='"';
#ifdef DEBUG
    int ocmdsp = cmdsp;
#endif

    peek = STRING;
    if (!sub) {
	len = 0;
	bptr = tokstr = (char *) hcalloc(bsiz = 32);
    }
    for (;;) {
	int act;
	int e;
	int inbl = inblank(c);
	
	if (fdpar && !inbl && c != ')')
	    fdpar = 0;

	if (inbl && !in_brace_param && !pct)
	    act = LX2_BREAK;
	else {
	    act = lexact2[STOUC(c)];
	    c = lextok2[STOUC(c)];
	}
	switch (act) {
	case LX2_BREAK:
	    if (!in_brace_param && !sub)
		goto brk;
	    break;
	case LX2_META:
	    c = hgetc();
#ifdef DEBUG
	    if (lexstop) {
		fputs("BUG: input terminated by Meta\n", stderr);
		fflush(stderr);
		goto brk;
	    }
#endif
	    add(Meta);
	    break;
	case LX2_OUTPAR:
	    if (fdpar) {
		/* this is a single word `(   )', treat as INOUTPAR */
		add(c);
		*bptr = '\0';
		return INOUTPAR;
	    }
	    if ((sub || in_brace_param) && isset(SHGLOB))
		break;
	    if (!in_brace_param && !pct--) {
		if (sub) {
		    pct = 0;
		    break;
		} else
		    goto brk;
	    }
	    c = Outpar;
	    break;
	case LX2_BAR:
	    if (!pct && !in_brace_param) {
		if (sub)
		    break;
		else
		    goto brk;
	    }
	    if (unset(SHGLOB) || (!sub && !in_brace_param))
		c = Bar;
	    break;
	case LX2_STRING:
	    e = hgetc();
	    if (e == '[') {
		cmdpush(CS_MATHSUBST);
		add(String);
		add(Inbrack);
		c = dquote_parse(']', sub);
		cmdpop();
		if (c) {
		    peek = LEXERR;
		    goto brk;
		}
		c = Outbrack;
	    } else if (e == '(') {
		add(String);
		c = cmd_or_math_sub();
		if (c) {
		    peek = LEXERR;
		    goto brk;
		}
		c = Outpar;
	    } else {
		if (e == '{') {
		    add(c);
		    c = Inbrace;
		    ++bct;
		    cmdpush(CS_BRACEPAR);
		    if (!in_brace_param)
			in_brace_param = bct;
		} else {
		    hungetc(e);
		    lexstop = 0;
		}
	    }
	    break;
	case LX2_INBRACK:
	    if (!in_brace_param)
		brct++;
	    c = Inbrack;
	    break;
	case LX2_OUTBRACK:
	    if (!in_brace_param)
		brct--;
	    if (brct < 0)
		brct = 0;
	    c = Outbrack;
	    break;
	case LX2_INPAR:
	    if (isset(SHGLOB)) {
		if (sub || in_brace_param)
		    break;
		if (incasepat && !len)
		    return INPAR;
	    }
	    if (!in_brace_param) {
		if (!sub) {
		    e = hgetc();
		    hungetc(e);
		    lexstop = 0;
		    /* For command words, parentheses are only
		     * special at the start.  But now we're tokenising
		     * the remaining string.  So I don't see what
		     * the old incmdpos test here is for.
		     *   pws 1999/6/8
		     *
		     * Oh, no.
		     *  func1(   )
		     * is a valid function definition in [k]sh.  The best
		     * thing we can do, without really nasty lookahead tricks,
		     * is break if we find a blank after a parenthesis.  At
		     * least this can't happen inside braces or brackets.  We
		     * only allow this with SHGLOB (set for both sh and ksh).
		     *
		     * Things like `print @( |foo)' should still
		     * work, because [k]sh don't allow multiple words
		     * in a function definition, so we only do this
		     * in command position.
		     *   pws 1999/6/14
		     */
		    if (e == ')' || (isset(SHGLOB) && inblank(e) && !bct &&
				     !brct && !intpos && incmdpos))
			goto brk;
		}
		/*
		 * This also handles the [k]sh `foo( )' function definition.
		 * Maintain a variable fdpar, set as long as a single set of
		 * parentheses contains only space.  Then if we get to the
		 * closing parenthesis and it is still set, we can assume we
		 * have a function definition.  Only do this at the start of
		 * the word, since the (...) must be a separate token.
		 */
		if (!pct++ && isset(SHGLOB) && intpos && !bct && !brct)
		    fdpar = 1;
	    }
	    c = Inpar;
	    break;
	case LX2_INBRACE:
	    if (isset(IGNOREBRACES) || sub)
		c = '{';
	    else {
		if (!len && incmdpos) {
		    add('{');
		    *bptr = '\0';
		    return STRING;
		}
		if (in_brace_param) {
		    cmdpush(CS_BRACE);
		}
		bct++;
	    }
	    break;
	case LX2_OUTBRACE:
	    if ((isset(IGNOREBRACES) || sub) && !in_brace_param)
		break;
	    if (!bct)
		break;
	    if (in_brace_param) {
		cmdpop();
	    }
	    if (bct-- == in_brace_param)
		in_brace_param = 0;
	    c = Outbrace;
	    break;
	case LX2_COMMA:
	    if (unset(IGNOREBRACES) && !sub && bct > in_brace_param)
		c = Comma;
	    break;
	case LX2_OUTANG:
	    if (!intpos) {
		if (in_brace_param || sub)
		    break;
		else
		    goto brk;
	    }
	    e = hgetc();
	    if (e != '(') {
		hungetc(e);
		lexstop = 0;
		goto brk;
	    }
	    add(Outang);
	    if (skipcomm()) {
		peek = LEXERR;
		goto brk;
	    }
	    c = Outpar;
	    break;
	case LX2_INANG:
	    if (isset(SHGLOB) && sub)
		break;
	    e = hgetc();
	    if(e == '(' && intpos) {
		add(Inang);
		if (skipcomm()) {
		    peek = LEXERR;
		    goto brk;
		}
		c = Outpar;
		break;
	    }
	    hungetc(e);
	    if(isnumglob()) {
		add(Inang);
		while ((c = hgetc()) != '>')
		    add(c);
		c = Outang;
		break;
	    }
	    lexstop = 0;
	    if (in_brace_param || sub)
		break;
	    goto brk;
	case LX2_EQUALS:
	    if (intpos) {
		e = hgetc();
		if (e != '(') {
		    hungetc(e);
		    lexstop = 0;
		    c = Equals;
		} else {
		    add(Equals);
		    if (skipcomm()) {
			peek = LEXERR;
			goto brk;
		    }
		    c = Outpar;
		}
	    } else if (!sub && peek != ENVSTRING &&
		       incmdpos && !bct && !brct) {
		char *t = tokstr;
		if (idigit(*t))
		    while (++t < bptr && idigit(*t));
		else {
		    while (iident(*t) && ++t < bptr);
		    if (t < bptr) {
			*bptr = '\0';
			skipparens(Inbrack, Outbrack, &t);
		    }
		}
		if (*t == '+')
                    t++;
		if (t == bptr) {
		    e = hgetc();
		    if (e == '(' && incmdpos) {
			*bptr = '\0';
			return ENVARRAY;
		    }
		    hungetc(e);
		    lexstop = 0;
		    peek = ENVSTRING;
		    intpos = 2;
		} else
		    c = Equals;
	    } else
		c = Equals;
	    break;
#ifndef __SYMBIAN32__	    
	case LX2_BKSLASH:
	    c = hgetc();
	    if (c == '\n') {
		c = hgetc();
		if (!lexstop)
		    continue;
	    } else
		add(Bnull);
	    if (lexstop)
		goto brk;
	    break;
#endif	    
	case LX2_QUOTE: {
	    int strquote = (len && bptr[-1] == String);

	    add(Snull);
	    cmdpush(CS_QUOTE);
	    for (;;) {
		STOPHIST
		while ((c = hgetc()) != '\'' && !lexstop) {
		    if (strquote && c == '\\') {
			add(c);
			c = hgetc();
			if (lexstop)
			    break;
		    } else if (!sub && isset(CSHJUNKIEQUOTES) && c == '\n') {
			if (bptr[-1] == '\\')
			    bptr--, len--;
			else
			    break;
		    }
		    add(c);
		}
		ALLOWHIST
		if (c != '\'') {
		    unmatched = '\'';
		    peek = LEXERR;
		    cmdpop();
		    goto brk;
		}
		e = hgetc();
		if (e != '\'' || unset(RCQUOTES) || strquote)
		    break;
		add(c);
	    }
	    cmdpop();
	    hungetc(e);
	    lexstop = 0;
	    c = Snull;
	    break;
	}
	case LX2_DQUOTE:
	    add(Dnull);
	    cmdpush(CS_DQUOTE);
	    c = dquote_parse('"', sub);
	    cmdpop();
	    if (c) {
		unmatched = '"';
		peek = LEXERR;
		goto brk;
	    }
	    c = Dnull;
	    break;
	case LX2_BQUOTE:
	    add(Tick);
	    cmdpush(CS_BQUOTE);
	    SETPARBEGIN
	    inquote = 0;
	    while ((c = hgetc()) != '`' && !lexstop) {
		if (c == '\\') {
		    c = hgetc();
		    if (c != '\n') {
			add(c == '`' || c == '\\' || c == '$' ? Bnull : '\\');
			add(c);
		    }
		    else if (!sub && isset(CSHJUNKIEQUOTES))
			add(c);
		} else {
		    if (!sub && isset(CSHJUNKIEQUOTES) && c == '\n') {
			break;
		    }
		    add(c);
		    if (c == '\'') {
			if ((inquote = !inquote))
			    STOPHIST
			else
			    ALLOWHIST
		    }
		}
	    }
	    if (inquote)
		ALLOWHIST
	    cmdpop();
	    if (c != '`') {
		unmatched = '`';
		peek = LEXERR;
		goto brk;
	    }
	    c = Tick;
	    SETPAREND
	    break;
	}
	#ifdef __SYMBIAN32__	
	if(c=='\\') 
		{
	    c = hgetc();
	    if (c != '\n') {
		if (c == endchar)
		    add(Bnull);
		else {
		    /* lexstop is implicitly handled here */
		    add('\\');
		    
		}
	    } 
	}
	#endif
	add(c);
	c = hgetc();
	if (intpos)
	    intpos--;
	if (lexstop)
	    break;
    }
  brk:
    hungetc(c);
    if (unmatched)
	zerr("unmatched %c", NULL, unmatched);
    if (in_brace_param) {
	while(bct-- >= in_brace_param)
	    cmdpop();
	zerr("closing brace expected", NULL, 0);
    } else if (unset(IGNOREBRACES) && !sub && len > 1 &&
	       peek == STRING && bptr[-1] == '}' && bptr[-2] != Bnull) {
	/* hack to get {foo} command syntax work */
	bptr--;
	len--;
	lexstop = 0;
	hungetc('}');
    }
    *bptr = '\0';
    DPUTS(cmdsp != ocmdsp, "BUG: gettok: cmdstack changed.");
    return peek;
}
Exemplo n.º 3
0
Arquivo: lex.c Projeto: AMDmi3/zsh
static enum lextok
gettok(void)
{
    int c, d;
    int peekfd = -1;
    enum lextok peek;

  beginning:
    tokstr = NULL;
    while (iblank(c = hgetc()) && !lexstop);
    toklineno = lineno;
    if (lexstop)
	return (errflag) ? LEXERR : ENDINPUT;
    isfirstln = 0;
    if ((lexflags & LEXFLAGS_ZLE))
	wordbeg = inbufct - (qbang && c == bangchar);
    hwbegin(-1-(qbang && c == bangchar));
    /* word includes the last character read and possibly \ before ! */
    if (dbparens) {
	lexbuf.len = 0;
	lexbuf.ptr = tokstr = (char *) hcalloc(lexbuf.siz = LEX_HEAP_SIZE);
	hungetc(c);
	cmdpush(CS_MATH);
	c = dquote_parse(infor ? ';' : ')', 0);
	cmdpop();
	*lexbuf.ptr = '\0';
	if (!c && infor) {
	    infor--;
	    return DINPAR;
	}
	if (c || (c = hgetc()) != ')') {
	    hungetc(c);
	    return LEXERR;
	}
	dbparens = 0;
	return DOUTPAR;
    } else if (idigit(c)) {	/* handle 1< foo */
	d = hgetc();
	if(d == '&') {
	    d = hgetc();
	    if(d == '>') {
		peekfd = c - '0';
		hungetc('>');
		c = '&';
	    } else {
		hungetc(d);
		lexstop = 0;
		hungetc('&');
	    }
	} else if (d == '>' || d == '<') {
	    peekfd = c - '0';
	    c = d;
	} else {
	    hungetc(d);
	    lexstop = 0;
	}
    }

    /* chars in initial position in word */

    /*
     * Handle comments.  There are some special cases when this
     * is not normal command input: lexflags implies we are examining
     * a line lexically without it being used for normal command input.
     */
    if (c == hashchar && !nocomments &&
	(isset(INTERACTIVECOMMENTS) ||
	 ((!lexflags || (lexflags & LEXFLAGS_COMMENTS)) && !expanding &&
	  (!interact || unset(SHINSTDIN) || strin)))) {
	/* History is handled here to prevent extra  *
	 * newlines being inserted into the history. */

	if (lexflags & LEXFLAGS_COMMENTS_KEEP) {
	    lexbuf.len = 0;
	    lexbuf.ptr = tokstr =
		(char *)hcalloc(lexbuf.siz = LEX_HEAP_SIZE);
	    add(c);
	}
	hwend();
	while ((c = ingetc()) != '\n' && !lexstop) {
	    hwaddc(c);
	    addtoline(c);
	    if (lexflags & LEXFLAGS_COMMENTS_KEEP)
		add(c);
	}

	if (errflag)
	    peek = LEXERR;
	else {
	    if (lexflags & LEXFLAGS_COMMENTS_KEEP) {
		*lexbuf.ptr = '\0';
		if (!lexstop)
		    hungetc(c);
		peek = STRING;
	    } else {
		hwend();
		hwbegin(0);
		hwaddc('\n');
		addtoline('\n');
		/*
		 * If splitting a line and removing comments,
		 * we don't want a newline token since it's
		 * treated specially.
		 */
		if ((lexflags & LEXFLAGS_COMMENTS_STRIP) && lexstop)
		    peek = ENDINPUT;
		else
		    peek = NEWLIN;
	    }
	}
	return peek;
    }
    switch (lexact1[STOUC(c)]) {
    case LX1_BKSLASH:
	d = hgetc();
	if (d == '\n')
	    goto beginning;
	hungetc(d);
	lexstop = 0;
	break;
    case LX1_NEWLIN:
	return NEWLIN;
    case LX1_SEMI:
	d = hgetc();
	if(d == ';')
	    return DSEMI;
	else if(d == '&')
	    return SEMIAMP;
	else if (d == '|')
	    return SEMIBAR;
	hungetc(d);
	lexstop = 0;
	return SEMI;
    case LX1_AMPER:
	d = hgetc();
	if (d == '&')
	    return DAMPER;
	else if (d == '!' || d == '|')
	    return AMPERBANG;
	else if (d == '>') {
	    tokfd = peekfd;
	    d = hgetc();
	    if (d == '!' || d == '|')
		return OUTANGAMPBANG;
	    else if (d == '>') {
		d = hgetc();
		if (d == '!' || d == '|')
		    return DOUTANGAMPBANG;
		hungetc(d);
		lexstop = 0;
		return DOUTANGAMP;
	    }
	    hungetc(d);
	    lexstop = 0;
	    return AMPOUTANG;
	}
	hungetc(d);
	lexstop = 0;
	return AMPER;
    case LX1_BAR:
	d = hgetc();
	if (d == '|')
	    return DBAR;
	else if (d == '&')
	    return BARAMP;
	hungetc(d);
	lexstop = 0;
	return BAR;
    case LX1_INPAR:
	d = hgetc();
	if (d == '(') {
	    if (infor) {
		dbparens = 1;
		return DINPAR;
	    }
	    if (incmdpos || (isset(SHGLOB) && !isset(KSHGLOB))) {
		lexbuf.len = 0;
		lexbuf.ptr = tokstr = (char *)
		    hcalloc(lexbuf.siz = LEX_HEAP_SIZE);
		switch (cmd_or_math(CS_MATH)) {
		case CMD_OR_MATH_MATH:
		    return DINPAR;

		case CMD_OR_MATH_CMD:
		    /*
		     * Not math, so we don't return the contents
		     * as a string in this case.
		     */
		    tokstr = NULL;
		    return INPAR;
		    
		case CMD_OR_MATH_ERR:
		    /*
		     * LEXFLAGS_ACTIVE means we came from bufferwords(),
		     * so we treat as an incomplete math expression
		     */
		    if (lexflags & LEXFLAGS_ACTIVE)
			tokstr = dyncat("((", tokstr ? tokstr : "");
		    /* fall through */

		default:
		    return LEXERR;
		}
	    }
	} else if (d == ')')
	    return INOUTPAR;
	hungetc(d);
	lexstop = 0;
	if (!(isset(SHGLOB) || incond == 1 || incmdpos))
	    break;
	return INPAR;
    case LX1_OUTPAR:
	return OUTPAR;
    case LX1_INANG:
	d = hgetc();
	if (d == '(') {
	    hungetc(d);
	    lexstop = 0;
	    unpeekfd:
	    if(peekfd != -1) {
		hungetc(c);
		c = '0' + peekfd;
	    }
	    break;
	}
	if (d == '>') {
	    peek = INOUTANG;
	} else if (d == '<') {
	    int e = hgetc();

	    if (e == '(') {
		hungetc(e);
		hungetc(d);
		peek = INANG;
	    } else if (e == '<')
		peek = TRINANG;
	    else if (e == '-')
		peek = DINANGDASH;
	    else {
		hungetc(e);
		lexstop = 0;
		peek = DINANG;
	    }
	} else if (d == '&') {
	    peek = INANGAMP;
	} else {
	    hungetc(d);
	    if(isnumglob())
		goto unpeekfd;
	    peek = INANG;
	}
	tokfd = peekfd;
	return peek;
    case LX1_OUTANG:
	d = hgetc();
	if (d == '(') {
	    hungetc(d);
	    goto unpeekfd;
	} else if (d == '&') {
	    d = hgetc();
	    if (d == '!' || d == '|')
		peek = OUTANGAMPBANG;
	    else {
		hungetc(d);
		lexstop = 0;
		peek = OUTANGAMP;
	    }
	} else if (d == '!' || d == '|')
	    peek = OUTANGBANG;
	else if (d == '>') {
	    d = hgetc();
	    if (d == '&') {
		d = hgetc();
		if (d == '!' || d == '|')
		    peek = DOUTANGAMPBANG;
		else {
		    hungetc(d);
		    lexstop = 0;
		    peek = DOUTANGAMP;
		}
	    } else if (d == '!' || d == '|')
		peek = DOUTANGBANG;
	    else if (d == '(') {
		hungetc(d);
		hungetc('>');
		peek = OUTANG;
	    } else {
		hungetc(d);
		lexstop = 0;
		peek = DOUTANG;
		if (isset(HISTALLOWCLOBBER))
		    hwaddc('|');
	    }
	} else {
	    hungetc(d);
	    lexstop = 0;
	    peek = OUTANG;
	    if (!incond && isset(HISTALLOWCLOBBER))
		hwaddc('|');
	}
	tokfd = peekfd;
	return peek;
    }

    /* we've started a string, now get the *
     * rest of it, performing tokenization */
    return gettokstr(c, 0);
}
Exemplo n.º 4
0
int
gettok(void)
{
    int c, d;
    int peekfd = -1, peek;

  beginning:
    tokstr = NULL;
    while (iblank(c = hgetc()) && !lexstop);
    if (lexstop)
	return (errflag) ? LEXERR : ENDINPUT;
    isfirstln = 0;
    wordbeg = inbufct - (qbang && c == bangchar);
    hwbegin(-1-(qbang && c == bangchar));
    /* word includes the last character read and possibly \ before ! */
    if (dbparens) {
	len = 0;
	bptr = tokstr = (char *) hcalloc(bsiz = 32);
	hungetc(c);
	cmdpush(CS_MATH);
	c = dquote_parse(infor ? ';' : ')', 0);
	cmdpop();
	*bptr = '\0';
	if (!c && infor) {
	    infor--;
	    return DINPAR;
	}
	if (c || (c = hgetc()) != ')') {
	    hungetc(c);
	    return LEXERR;
	}
	dbparens = 0;
	return DOUTPAR;
    } else if (idigit(c)) {	/* handle 1< foo */
	d = hgetc();
	if(d == '&') {
	    d = hgetc();
	    if(d == '>') {
		peekfd = c - '0';
		hungetc('>');
		c = '&';
	    } else {
		hungetc(d);
		lexstop = 0;
		hungetc('&');
	    }
	} else if (d == '>' || d == '<') {
	    peekfd = c - '0';
	    c = d;
	} else {
	    hungetc(d);
	    lexstop = 0;
	}
    }

    /* chars in initial position in word */

    if (c == hashchar && !nocomments &&
	(isset(INTERACTIVECOMMENTS) ||
	 (!zleparse && !expanding &&
	  (!interact || unset(SHINSTDIN) || strin)))) {
	/* History is handled here to prevent extra  *
	 * newlines being inserted into the history. */

	while ((c = ingetc()) != '\n' && !lexstop) {
	    hwaddc(c);
	    addtoline(c);
	}

	if (errflag)
	    peek = LEXERR;
	else {
	    hwend();
	    hwbegin(0);
	    hwaddc('\n');
	    addtoline('\n');
	    peek = NEWLIN;
	}
	return peek;
    }
    switch (lexact1[STOUC(c)]) {
    case LX1_BKSLASH:
	d = hgetc();
	if (d == '\n')
	    goto beginning;
	hungetc(d);
	lexstop = 0;
	break;
    case LX1_NEWLIN:
	return NEWLIN;
    case LX1_SEMI:
	d = hgetc();
	if(d == ';')
	    return DSEMI;
	else if(d == '&')
	    return SEMIAMP;
	hungetc(d);
	lexstop = 0;
	return SEMI;
    case LX1_AMPER:
	d = hgetc();
	if (d == '&')
	    return DAMPER;
	else if (d == '!' || d == '|')
	    return AMPERBANG;
	else if (d == '>') {
	    tokfd = peekfd;
	    d = hgetc();
	    if (d == '!' || d == '|')
		return OUTANGAMPBANG;
	    else if (d == '>') {
		d = hgetc();
		if (d == '!' || d == '|')
		    return DOUTANGAMPBANG;
		hungetc(d);
		lexstop = 0;
		return DOUTANGAMP;
	    }
	    hungetc(d);
	    lexstop = 0;
	    return AMPOUTANG;
	}
	hungetc(d);
	lexstop = 0;
	return AMPER;
    case LX1_BAR:
	d = hgetc();
	if (d == '|')
	    return DBAR;
	else if (d == '&')
	    return BARAMP;
	hungetc(d);
	lexstop = 0;
	return BAR;
    case LX1_INPAR:
	d = hgetc();
	if (d == '(') {
	    if (infor) {
		dbparens = 1;
		return DINPAR;
	    }
	    if (incmdpos) {
		len = 0;
		bptr = tokstr = (char *) hcalloc(bsiz = 32);
		return cmd_or_math(CS_MATH) ? DINPAR : INPAR;
	    }
	} else if (d == ')')
	    return INOUTPAR;
	hungetc(d);
	lexstop = 0;
	if (!(incond == 1 || incmdpos))
	    break;
	return INPAR;
    case LX1_OUTPAR:
	return OUTPAR;
    case LX1_INANG:
	d = hgetc();
	if (!incmdpos && d == '(') {
	    hungetc(d);
	    lexstop = 0;
	    unpeekfd:
	    if(peekfd != -1) {
		hungetc(c);
		c = '0' + peekfd;
	    }
	    break;
	}
	if (d == '>') {
	    peek = INOUTANG;
	} else if (d == '<') {
	    int e = hgetc();

	    if (e == '(') {
		hungetc(e);
		hungetc(d);
		peek = INANG;
	    } else if (e == '<')
		peek = TRINANG;
	    else if (e == '-')
		peek = DINANGDASH;
	    else {
		hungetc(e);
		lexstop = 0;
		peek = DINANG;
	    }
	} else if (d == '&') {
	    peek = INANGAMP;
	} else {
	    hungetc(d);
	    if(isnumglob())
		goto unpeekfd;
	    peek = INANG;
	}
	tokfd = peekfd;
	return peek;
    case LX1_OUTANG:
	d = hgetc();
	if (d == '(') {
	    hungetc(d);
	    goto unpeekfd;
	} else if (d == '&') {
	    d = hgetc();
	    if (d == '!' || d == '|')
		peek = OUTANGAMPBANG;
	    else {
		hungetc(d);
		lexstop = 0;
		peek = OUTANGAMP;
	    }
	} else if (d == '!' || d == '|')
	    peek = OUTANGBANG;
	else if (d == '>') {
	    d = hgetc();
	    if (d == '&') {
		d = hgetc();
		if (d == '!' || d == '|')
		    peek = DOUTANGAMPBANG;
		else {
		    hungetc(d);
		    lexstop = 0;
		    peek = DOUTANGAMP;
		}
	    } else if (d == '!' || d == '|')
		peek = DOUTANGBANG;
	    else if (d == '(') {
		hungetc(d);
		hungetc('>');
		peek = OUTANG;
	    } else {
		hungetc(d);
		lexstop = 0;
		peek = DOUTANG;
		if (isset(HISTALLOWCLOBBER))
		    hwaddc('|');
	    }
	} else {
	    hungetc(d);
	    lexstop = 0;
	    peek = OUTANG;
	    if (!incond && isset(HISTALLOWCLOBBER))
		hwaddc('|');
	}
	tokfd = peekfd;
	return peek;
    }

    /* we've started a string, now get the *
     * rest of it, performing tokenization */
    return gettokstr(c, 0);
}